1 /*****************************************************************************
2 
3 Copyright (c) 2011, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file fts/fts0fts.cc
29 Full Text Search interface
30 ***********************************************************************/
31 
32 #include "ha_prototypes.h"
33 
34 #include "trx0roll.h"
35 #include "row0mysql.h"
36 #include "row0upd.h"
37 #include "dict0types.h"
38 #include "dict0stats_bg.h"
39 #include "row0sel.h"
40 #include "fts0fts.h"
41 #include "fts0priv.h"
42 #include "fts0types.h"
43 #include "fts0types.ic"
44 #include "fts0vlc.ic"
45 #include "fts0plugin.h"
46 #include "dict0priv.h"
47 #include "dict0stats.h"
48 #include "btr0pcur.h"
49 #include "sync0sync.h"
50 #include "ut0new.h"
51 
52 #include "fil0crypt.h"
53 
54 static const ulint FTS_MAX_ID_LEN = 32;
55 
56 /** Column name from the FTS config table */
57 #define FTS_MAX_CACHE_SIZE_IN_MB	"cache_size_in_mb"
58 
59 /** Verify if a aux table name is a obsolete table
60 by looking up the key word in the obsolete table names */
61 #define FTS_IS_OBSOLETE_AUX_TABLE(table_name)			\
62 	(strstr((table_name), "DOC_ID") != NULL			\
63 	 || strstr((table_name), "ADDED") != NULL		\
64 	 || strstr((table_name), "STOPWORDS") != NULL)
65 
66 /** This is maximum FTS cache for each table and would be
67 a configurable variable */
68 ulong	fts_max_cache_size;
69 
70 /** Whether the total memory used for FTS cache is exhausted, and we will
71 need a sync to free some memory */
72 bool	fts_need_sync = false;
73 
74 /** Variable specifying the total memory allocated for FTS cache */
75 ulong	fts_max_total_cache_size;
76 
77 /** This is FTS result cache limit for each query and would be
78 a configurable variable */
79 ulong	fts_result_cache_limit;
80 
81 /** Variable specifying the maximum FTS max token size */
82 ulong	fts_max_token_size;
83 
84 /** Variable specifying the minimum FTS max token size */
85 ulong	fts_min_token_size;
86 
87 
88 // FIXME: testing
89 ib_time_monotonic_t elapsed_time = 0;
90 ulint n_nodes = 0;
91 
92 #ifdef FTS_CACHE_SIZE_DEBUG
93 /** The cache size permissible lower limit (1K) */
94 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
95 
96 /** The cache size permissible upper limit (1G) */
97 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
98 #endif
99 
100 /** Time to sleep after DEADLOCK error before retrying operation. */
101 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
102 
103 /** variable to record innodb_fts_internal_tbl_name for information
104 schema table INNODB_FTS_INSERTED etc. */
105 char* fts_internal_tbl_name		= NULL;
106 
107 /** InnoDB default stopword list:
108 There are different versions of stopwords, the stop words listed
109 below comes from "Google Stopword" list. Reference:
110 http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
111 The final version of InnoDB default stopword list is still pending
112 for decision */
113 const char *fts_default_stopword[] =
114 {
115 	"a",
116 	"about",
117 	"an",
118 	"are",
119 	"as",
120 	"at",
121 	"be",
122 	"by",
123 	"com",
124 	"de",
125 	"en",
126 	"for",
127 	"from",
128 	"how",
129 	"i",
130 	"in",
131 	"is",
132 	"it",
133 	"la",
134 	"of",
135 	"on",
136 	"or",
137 	"that",
138 	"the",
139 	"this",
140 	"to",
141 	"was",
142 	"what",
143 	"when",
144 	"where",
145 	"who",
146 	"will",
147 	"with",
148 	"und",
149 	"the",
150 	"www",
151 	NULL
152 };
153 
154 /** For storing table info when checking for orphaned tables. */
155 struct fts_aux_table_t {
156 	table_id_t	id;		/*!< Table id */
157 	table_id_t	parent_id;	/*!< Parent table id */
158 	table_id_t	index_id;	/*!< Table FT index id */
159 	char*		name;		/*!< Name of the table */
160 };
161 
162 #ifdef FTS_DOC_STATS_DEBUG
163 /** Template for creating the FTS auxiliary index specific tables. This is
164 mainly designed for the statistics work in the future */
165 static const char* fts_create_index_tables_sql = {
166 	"BEGIN\n"
167 	""
168 	"CREATE TABLE $doc_id_table (\n"
169 	"   doc_id BIGINT UNSIGNED,\n"
170 	"   word_count INTEGER UNSIGNED NOT NULL\n"
171 	") COMPACT;\n"
172 	"CREATE UNIQUE CLUSTERED INDEX IND ON $doc_id_table(doc_id);\n"
173 };
174 #endif
175 
176 /** FTS auxiliary table suffixes that are common to all FT indexes. */
177 const char* fts_common_tables[] = {
178 	"BEING_DELETED",
179 	"BEING_DELETED_CACHE",
180 	"CONFIG",
181 	"DELETED",
182 	"DELETED_CACHE",
183 	NULL
184 };
185 
186 /** FTS auxiliary INDEX split intervals. */
187 const  fts_index_selector_t fts_index_selector[] = {
188 	{ 9, "INDEX_1" },
189 	{ 65, "INDEX_2" },
190 	{ 70, "INDEX_3" },
191 	{ 75, "INDEX_4" },
192 	{ 80, "INDEX_5" },
193 	{ 85, "INDEX_6" },
194 	{  0 , NULL	 }
195 };
196 
197 /** Default config values for FTS indexes on a table. */
198 static const char* fts_config_table_insert_values_sql =
199 	"BEGIN\n"
200 	"\n"
201 	"INSERT INTO $config_table VALUES('"
202 		FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
203 	""
204 	"INSERT INTO $config_table VALUES('"
205 		FTS_OPTIMIZE_LIMIT_IN_SECS  "', '180');\n"
206 	""
207 	"INSERT INTO $config_table VALUES ('"
208 		FTS_SYNCED_DOC_ID "', '0');\n"
209 	""
210 	"INSERT INTO $config_table VALUES ('"
211 		FTS_TOTAL_DELETED_COUNT "', '0');\n"
212 	"" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
213 	"INSERT INTO $config_table VALUES ('"
214 		FTS_TABLE_STATE "', '0');\n";
215 
216 /** FTS tokenize parmameter for plugin parser */
217 struct fts_tokenize_param_t {
218 	fts_doc_t*	result_doc;	/*!< Result doc for tokens */
219 	ulint		add_pos;	/*!< Added position for tokens */
220 };
221 
222 /** Run SYNC on the table, i.e., write out data from the cache to the
223 FTS auxiliary INDEX table and clear the cache at the end.
224 @param[in,out]	sync		sync state
225 @param[in]	unlock_cache	whether unlock cache lock when write node
226 @param[in]	wait		whether wait when a sync is in progress
227 @param[in]	has_dict_lock	whether has dict operation lock
228 @return DB_SUCCESS if all OK */
229 static
230 dberr_t
231 fts_sync(
232 	fts_sync_t*	sync,
233 	bool		unlock_cache,
234 	bool		wait,
235 	bool		has_dict_lock);
236 
237 /****************************************************************//**
238 Release all resources help by the words rb tree e.g., the node ilist. */
239 static
240 void
241 fts_words_free(
242 /*===========*/
243 	ib_rbt_t*	words)		/*!< in: rb tree of words */
244 	MY_ATTRIBUTE((nonnull));
245 #ifdef FTS_CACHE_SIZE_DEBUG
246 /****************************************************************//**
247 Read the max cache size parameter from the config table. */
248 static
249 void
250 fts_update_max_cache_size(
251 /*======================*/
252 	fts_sync_t*	sync);		/*!< in: sync state */
253 #endif
254 
255 /*********************************************************************//**
256 This function fetches the document just inserted right before
257 we commit the transaction, and tokenize the inserted text data
258 and insert into FTS auxiliary table and its cache.
259 @return TRUE if successful */
260 static
261 ulint
262 fts_add_doc_by_id(
263 /*==============*/
264 	fts_trx_table_t*ftt,		/*!< in: FTS trx table */
265 	doc_id_t	doc_id,		/*!< in: doc id */
266 	ib_vector_t*	fts_indexes MY_ATTRIBUTE((unused)));
267 					/*!< in: affected fts indexes */
268 #ifdef FTS_DOC_STATS_DEBUG
269 /****************************************************************//**
270 Check whether a particular word (term) exists in the FTS index.
271 @return DB_SUCCESS if all went fine */
272 static
273 dberr_t
274 fts_is_word_in_index(
275 /*=================*/
276 	trx_t*		trx,		/*!< in: FTS query state */
277 	que_t**		graph,		/*!< out: Query graph */
278 	fts_table_t*	fts_table,	/*!< in: table instance */
279 	const fts_string_t* word,	/*!< in: the word to check */
280 	ibool*		found)		/*!< out: TRUE if exists */
281 	MY_ATTRIBUTE((nonnull, warn_unused_result));
282 #endif /* FTS_DOC_STATS_DEBUG */
283 
284 /******************************************************************//**
285 Update the last document id. This function could create a new
286 transaction to update the last document id.
287 @return DB_SUCCESS if OK */
288 static
289 dberr_t
290 fts_update_sync_doc_id(
291 /*===================*/
292 	const dict_table_t*	table,		/*!< in: table */
293 	const char*		table_name,	/*!< in: table name, or NULL */
294 	doc_id_t		doc_id,		/*!< in: last document id */
295 	trx_t*			trx)		/*!< in: update trx, or NULL */
296 	MY_ATTRIBUTE((nonnull(1)));
297 
298 /** Get a character set based on precise type.
299 @param prtype precise type
300 @return the corresponding character set */
301 UNIV_INLINE
302 CHARSET_INFO*
fts_get_charset(ulint prtype)303 fts_get_charset(ulint prtype)
304 {
305 #ifdef UNIV_DEBUG
306 	switch (prtype & DATA_MYSQL_TYPE_MASK) {
307 	case MYSQL_TYPE_BIT:
308 	case MYSQL_TYPE_STRING:
309 	case MYSQL_TYPE_VAR_STRING:
310 	case MYSQL_TYPE_TINY_BLOB:
311 	case MYSQL_TYPE_MEDIUM_BLOB:
312 	case MYSQL_TYPE_BLOB:
313 	case MYSQL_TYPE_LONG_BLOB:
314 	case MYSQL_TYPE_VARCHAR:
315 		break;
316 	default:
317 		ut_error;
318 	}
319 #endif /* UNIV_DEBUG */
320 
321 	uint cs_num = (uint) dtype_get_charset_coll(prtype);
322 
323 	if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
324 		return(cs);
325 	}
326 
327 	ib::fatal() << "Unable to find charset-collation " << cs_num;
328 	return(NULL);
329 }
330 
331 /****************************************************************//**
332 This function loads the default InnoDB stopword list */
333 static
334 void
fts_load_default_stopword(fts_stopword_t * stopword_info)335 fts_load_default_stopword(
336 /*======================*/
337 	fts_stopword_t*		stopword_info)	/*!< in: stopword info */
338 {
339 	fts_string_t		str;
340 	mem_heap_t*		heap;
341 	ib_alloc_t*		allocator;
342 	ib_rbt_t*		stop_words;
343 
344 	allocator = stopword_info->heap;
345 	heap = static_cast<mem_heap_t*>(allocator->arg);
346 
347 	if (!stopword_info->cached_stopword) {
348 		stopword_info->cached_stopword = rbt_create_arg_cmp(
349 			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
350 			&my_charset_latin1);
351 	}
352 
353 	stop_words = stopword_info->cached_stopword;
354 
355 	str.f_n_char = 0;
356 
357 	for (ulint i = 0; fts_default_stopword[i]; ++i) {
358 		char*			word;
359 		fts_tokenizer_word_t	new_word;
360 
361 		/* We are going to duplicate the value below. */
362 		word = const_cast<char*>(fts_default_stopword[i]);
363 
364 		new_word.nodes = ib_vector_create(
365 			allocator, sizeof(fts_node_t), 4);
366 
367 		str.f_len = ut_strlen(word);
368 		str.f_str = reinterpret_cast<byte*>(word);
369 
370 		fts_string_dup(&new_word.text, &str, heap);
371 
372 		rbt_insert(stop_words, &new_word, &new_word);
373 	}
374 
375 	stopword_info->status = STOPWORD_FROM_DEFAULT;
376 }
377 
378 /****************************************************************//**
379 Callback function to read a single stopword value.
380 @return Always return TRUE */
381 static
382 ibool
fts_read_stopword(void * row,void * user_arg)383 fts_read_stopword(
384 /*==============*/
385 	void*		row,		/*!< in: sel_node_t* */
386 	void*		user_arg)	/*!< in: pointer to ib_vector_t */
387 {
388 	ib_alloc_t*	allocator;
389 	fts_stopword_t*	stopword_info;
390 	sel_node_t*	sel_node;
391 	que_node_t*	exp;
392 	ib_rbt_t*	stop_words;
393 	dfield_t*	dfield;
394 	fts_string_t	str;
395 	mem_heap_t*	heap;
396 	ib_rbt_bound_t	parent;
397 
398 	sel_node = static_cast<sel_node_t*>(row);
399 	stopword_info = static_cast<fts_stopword_t*>(user_arg);
400 
401 	stop_words = stopword_info->cached_stopword;
402 	allocator =  static_cast<ib_alloc_t*>(stopword_info->heap);
403 	heap = static_cast<mem_heap_t*>(allocator->arg);
404 
405 	exp = sel_node->select_list;
406 
407 	/* We only need to read the first column */
408 	dfield = que_node_get_val(exp);
409 
410 	str.f_n_char = 0;
411 	str.f_str = static_cast<byte*>(dfield_get_data(dfield));
412 	str.f_len = dfield_get_len(dfield);
413 
414 	/* Only create new node if it is a value not already existed */
415 	if (str.f_len != UNIV_SQL_NULL
416 	    && rbt_search(stop_words, &parent, &str) != 0) {
417 
418 		fts_tokenizer_word_t	new_word;
419 
420 		new_word.nodes = ib_vector_create(
421 			allocator, sizeof(fts_node_t), 4);
422 
423 		new_word.text.f_str = static_cast<byte*>(
424 			 mem_heap_alloc(heap, str.f_len + 1));
425 
426 		memcpy(new_word.text.f_str, str.f_str, str.f_len);
427 
428 		new_word.text.f_n_char = 0;
429 		new_word.text.f_len = str.f_len;
430 		new_word.text.f_str[str.f_len] = 0;
431 
432 		rbt_insert(stop_words, &new_word, &new_word);
433 	}
434 
435 	return(TRUE);
436 }
437 
438 /******************************************************************//**
439 Load user defined stopword from designated user table
440 @return TRUE if load operation is successful */
441 static
442 ibool
fts_load_user_stopword(fts_t * fts,const char * stopword_table_name,fts_stopword_t * stopword_info)443 fts_load_user_stopword(
444 /*===================*/
445 	fts_t*		fts,			/*!< in: FTS struct */
446 	const char*	stopword_table_name,	/*!< in: Stopword table
447 						name */
448 	fts_stopword_t*	stopword_info)		/*!< in: Stopword info */
449 {
450 	pars_info_t*	info;
451 	que_t*		graph;
452 	dberr_t		error = DB_SUCCESS;
453 	ibool		ret = TRUE;
454 	trx_t*		trx;
455 	ibool		has_lock = fts->fts_status & TABLE_DICT_LOCKED;
456 
457 	trx = trx_allocate_for_background();
458 	trx->op_info = "Load user stopword table into FTS cache";
459 
460 	if (!has_lock) {
461 		mutex_enter(&dict_sys->mutex);
462 	}
463 
464 	/* Validate the user table existence and in the right
465 	format */
466 	stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
467 	if (!stopword_info->charset) {
468 		ret = FALSE;
469 		goto cleanup;
470 	} else if (!stopword_info->cached_stopword) {
471 		/* Create the stopword RB tree with the stopword column
472 		charset. All comparison will use this charset */
473 		stopword_info->cached_stopword = rbt_create_arg_cmp(
474 			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
475 			stopword_info->charset);
476 
477 	}
478 
479 	info = pars_info_create();
480 
481 	pars_info_bind_id(info, TRUE, "table_stopword", stopword_table_name);
482 
483 	pars_info_bind_function(info, "my_func", fts_read_stopword,
484 				stopword_info);
485 
486 	graph = fts_parse_sql_no_dict_lock(
487 		NULL,
488 		info,
489 		"DECLARE FUNCTION my_func;\n"
490 		"DECLARE CURSOR c IS"
491 		" SELECT value"
492 		" FROM $table_stopword;\n"
493 		"BEGIN\n"
494 		"\n"
495 		"OPEN c;\n"
496 		"WHILE 1 = 1 LOOP\n"
497 		"  FETCH c INTO my_func();\n"
498 		"  IF c % NOTFOUND THEN\n"
499 		"    EXIT;\n"
500 		"  END IF;\n"
501 		"END LOOP;\n"
502 		"CLOSE c;");
503 
504 	for (;;) {
505 		error = fts_eval_sql(trx, graph);
506 
507 		if (error == DB_SUCCESS) {
508 			fts_sql_commit(trx);
509 			stopword_info->status = STOPWORD_USER_TABLE;
510 			break;
511 		} else {
512 
513 			fts_sql_rollback(trx);
514 
515 			if (error == DB_LOCK_WAIT_TIMEOUT) {
516 				ib::warn() << "Lock wait timeout reading user"
517 					" stopword table. Retrying!";
518 
519 				trx->error_state = DB_SUCCESS;
520 			} else {
521 				ib::error() << "Error '" << ut_strerr(error)
522 					<< "' while reading user stopword"
523 					" table.";
524 				ret = FALSE;
525 				break;
526 			}
527 		}
528 	}
529 
530 	que_graph_free(graph);
531 
532 cleanup:
533 	if (!has_lock) {
534 		mutex_exit(&dict_sys->mutex);
535 	}
536 
537 	trx_free_for_background(trx);
538 	return(ret);
539 }
540 
541 /******************************************************************//**
542 Initialize the index cache. */
543 static
544 void
fts_index_cache_init(ib_alloc_t * allocator,fts_index_cache_t * index_cache)545 fts_index_cache_init(
546 /*=================*/
547 	ib_alloc_t*		allocator,	/*!< in: the allocator to use */
548 	fts_index_cache_t*	index_cache)	/*!< in: index cache */
549 {
550 	ulint			i;
551 
552 	ut_a(index_cache->words == NULL);
553 
554 	index_cache->words = rbt_create_arg_cmp(
555 		sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
556 		index_cache->charset);
557 
558 	ut_a(index_cache->doc_stats == NULL);
559 
560 	index_cache->doc_stats = ib_vector_create(
561 		allocator, sizeof(fts_doc_stats_t), 4);
562 
563 	for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
564 		ut_a(index_cache->ins_graph[i] == NULL);
565 		ut_a(index_cache->sel_graph[i] == NULL);
566 	}
567 }
568 
569 /*********************************************************************//**
570 Initialize FTS cache. */
571 void
fts_cache_init(fts_cache_t * cache)572 fts_cache_init(
573 /*===========*/
574 	fts_cache_t*	cache)		/*!< in: cache to initialize */
575 {
576 	ulint		i;
577 
578 	/* Just to make sure */
579 	ut_a(cache->sync_heap->arg == NULL);
580 
581 	cache->sync_heap->arg = mem_heap_create(1024);
582 
583 	cache->total_size = 0;
584 	cache->total_size_before_sync = 0;
585 
586 	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
587 	cache->deleted_doc_ids = ib_vector_create(
588 		cache->sync_heap, sizeof(fts_update_t), 4);
589 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
590 
591 	/* Reset the cache data for all the FTS indexes. */
592 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
593 		fts_index_cache_t*	index_cache;
594 
595 		index_cache = static_cast<fts_index_cache_t*>(
596 			ib_vector_get(cache->indexes, i));
597 
598 		fts_index_cache_init(cache->sync_heap, index_cache);
599 	}
600 }
601 
602 /****************************************************************//**
603 Create a FTS cache. */
604 fts_cache_t*
fts_cache_create(dict_table_t * table)605 fts_cache_create(
606 /*=============*/
607 	dict_table_t*	table)	/*!< in: table owns the FTS cache */
608 {
609 	mem_heap_t*	heap;
610 	fts_cache_t*	cache;
611 
612 	heap = static_cast<mem_heap_t*>(mem_heap_create(512));
613 
614 	cache = static_cast<fts_cache_t*>(
615 		mem_heap_zalloc(heap, sizeof(*cache)));
616 
617 	cache->cache_heap = heap;
618 
619 	rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
620 
621 	rw_lock_create(
622 		fts_cache_init_rw_lock_key, &cache->init_lock,
623 		SYNC_FTS_CACHE_INIT);
624 
625 	mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
626 
627 	mutex_create(LATCH_ID_FTS_OPTIMIZE, &cache->optimize_lock);
628 
629 	mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
630 
631 	/* This is the heap used to create the cache itself. */
632 	cache->self_heap = ib_heap_allocator_create(heap);
633 
634 	/* This is a transient heap, used for storing sync data. */
635 	cache->sync_heap = ib_heap_allocator_create(heap);
636 	cache->sync_heap->arg = NULL;
637 
638 	cache->sync = static_cast<fts_sync_t*>(
639 		mem_heap_zalloc(heap, sizeof(fts_sync_t)));
640 
641 	cache->sync->table = table;
642 	cache->sync->event = os_event_create(0);
643 
644 	/* Create the index cache vector that will hold the inverted indexes. */
645 	cache->indexes = ib_vector_create(
646 		cache->self_heap, sizeof(fts_index_cache_t), 2);
647 
648 	fts_cache_init(cache);
649 
650 	cache->stopword_info.cached_stopword = NULL;
651 	cache->stopword_info.charset = NULL;
652 
653 	cache->stopword_info.heap = cache->self_heap;
654 
655 	cache->stopword_info.status = STOPWORD_NOT_INIT;
656 
657 	return(cache);
658 }
659 
660 /*******************************************************************//**
661 Add a newly create index into FTS cache */
662 void
fts_add_index(dict_index_t * index,dict_table_t * table)663 fts_add_index(
664 /*==========*/
665 	dict_index_t*	index,		/*!< FTS index to be added */
666 	dict_table_t*	table)		/*!< table */
667 {
668 	fts_t*			fts = table->fts;
669 	fts_cache_t*		cache;
670 	fts_index_cache_t*	index_cache;
671 
672 	ut_ad(fts);
673 	cache = table->fts->cache;
674 
675 	rw_lock_x_lock(&cache->init_lock);
676 
677 	ib_vector_push(fts->indexes, &index);
678 
679 	index_cache = fts_find_index_cache(cache, index);
680 
681 	if (!index_cache) {
682 		/* Add new index cache structure */
683 		index_cache = fts_cache_index_cache_create(table, index);
684 	}
685 
686 	rw_lock_x_unlock(&cache->init_lock);
687 }
688 
689 /*******************************************************************//**
690 recalibrate get_doc structure after index_cache in cache->indexes changed */
691 static
692 void
fts_reset_get_doc(fts_cache_t * cache)693 fts_reset_get_doc(
694 /*==============*/
695 	fts_cache_t*	cache)	/*!< in: FTS index cache */
696 {
697 	fts_get_doc_t*  get_doc;
698 	ulint		i;
699 
700 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
701 
702 	ib_vector_reset(cache->get_docs);
703 
704 	for (i = 0; i < ib_vector_size(cache->indexes); i++) {
705 		fts_index_cache_t*	ind_cache;
706 
707 		ind_cache = static_cast<fts_index_cache_t*>(
708 			ib_vector_get(cache->indexes, i));
709 
710 		get_doc = static_cast<fts_get_doc_t*>(
711 			ib_vector_push(cache->get_docs, NULL));
712 
713 		memset(get_doc, 0x0, sizeof(*get_doc));
714 
715 		get_doc->index_cache = ind_cache;
716 	}
717 
718 	ut_ad(ib_vector_size(cache->get_docs)
719 	      == ib_vector_size(cache->indexes));
720 }
721 
722 /*******************************************************************//**
723 Check an index is in the table->indexes list
724 @return TRUE if it exists */
725 static
726 ibool
fts_in_dict_index(dict_table_t * table,dict_index_t * index_check)727 fts_in_dict_index(
728 /*==============*/
729 	dict_table_t*	table,		/*!< in: Table */
730 	dict_index_t*	index_check)	/*!< in: index to be checked */
731 {
732 	dict_index_t*	index;
733 
734 	for (index = dict_table_get_first_index(table);
735 	     index != NULL;
736 	     index = dict_table_get_next_index(index)) {
737 
738 		if (index == index_check) {
739 			return(TRUE);
740 		}
741 	}
742 
743 	return(FALSE);
744 }
745 
746 /*******************************************************************//**
747 Check an index is in the fts->cache->indexes list
748 @return TRUE if it exists */
749 static
750 ibool
fts_in_index_cache(dict_table_t * table,dict_index_t * index)751 fts_in_index_cache(
752 /*===============*/
753 	dict_table_t*	table,	/*!< in: Table */
754 	dict_index_t*	index)	/*!< in: index to be checked */
755 {
756 	ulint	i;
757 
758 	for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
759 		fts_index_cache_t*      index_cache;
760 
761 		index_cache = static_cast<fts_index_cache_t*>(
762 			ib_vector_get(table->fts->cache->indexes, i));
763 
764 		if (index_cache->index == index) {
765 			return(TRUE);
766 		}
767 	}
768 
769 	return(FALSE);
770 }
771 
772 /*******************************************************************//**
773 Check indexes in the fts->indexes is also present in index cache and
774 table->indexes list
775 @return TRUE if all indexes match */
776 ibool
fts_check_cached_index(dict_table_t * table)777 fts_check_cached_index(
778 /*===================*/
779 	dict_table_t*	table)	/*!< in: Table where indexes are dropped */
780 {
781 	ulint	i;
782 
783 	if (!table->fts || !table->fts->cache) {
784 		return(TRUE);
785 	}
786 
787 	ut_a(ib_vector_size(table->fts->indexes)
788 	      == ib_vector_size(table->fts->cache->indexes));
789 
790 	for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
791 		dict_index_t*	index;
792 
793 		index = static_cast<dict_index_t*>(
794 			ib_vector_getp(table->fts->indexes, i));
795 
796 		if (!fts_in_index_cache(table, index)) {
797 			return(FALSE);
798 		}
799 
800 		if (!fts_in_dict_index(table, index)) {
801 			return(FALSE);
802 		}
803 	}
804 
805 	return(TRUE);
806 }
807 
808 /*******************************************************************//**
809 Drop auxiliary tables related to an FTS index
810 @return DB_SUCCESS or error number */
811 dberr_t
fts_drop_index(dict_table_t * table,dict_index_t * index,trx_t * trx)812 fts_drop_index(
813 /*===========*/
814 	dict_table_t*	table,	/*!< in: Table where indexes are dropped */
815 	dict_index_t*	index,	/*!< in: Index to be dropped */
816 	trx_t*		trx)	/*!< in: Transaction for the drop */
817 {
818 	ib_vector_t*	indexes = table->fts->indexes;
819 	dberr_t		err = DB_SUCCESS;
820 
821 	ut_a(indexes);
822 
823 	if ((ib_vector_size(indexes) == 1
824 	    && (index == static_cast<dict_index_t*>(
825 			ib_vector_getp(table->fts->indexes, 0))))
826 	   || ib_vector_is_empty(indexes)) {
827 		doc_id_t	current_doc_id;
828 		doc_id_t	first_doc_id;
829 
830 		/* If we are dropping the only FTS index of the table,
831 		remove it from optimize thread */
832 		fts_optimize_remove_table(table);
833 
834 		DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
835 
836 		/* If Doc ID column is not added internally by FTS index,
837 		we can drop all FTS auxiliary tables. Otherwise, we will
838 		need to keep some common table such as CONFIG table, so
839 		as to keep track of incrementing Doc IDs */
840 		if (!DICT_TF2_FLAG_IS_SET(
841 			table, DICT_TF2_FTS_HAS_DOC_ID)) {
842 
843 			err = fts_drop_tables(trx, table);
844 
845 			err = fts_drop_index_tables(trx, index);
846 
847 			while (index->index_fts_syncing
848 				&& !trx_is_interrupted(trx)) {
849 				DICT_BG_YIELD(trx);
850 			}
851 
852 			fts_free(table);
853 
854 			return(err);
855 		}
856 
857 		while (index->index_fts_syncing
858 			&& !trx_is_interrupted(trx)) {
859 			DICT_BG_YIELD(trx);
860 		}
861 
862 		current_doc_id = table->fts->cache->next_doc_id;
863 		first_doc_id = table->fts->cache->first_doc_id;
864 		fts_cache_clear(table->fts->cache);
865 		fts_cache_destroy(table->fts->cache);
866 		table->fts->cache = fts_cache_create(table);
867 		table->fts->cache->next_doc_id = current_doc_id;
868 		table->fts->cache->first_doc_id = first_doc_id;
869 
870 	} else {
871 		fts_cache_t*            cache = table->fts->cache;
872 		fts_index_cache_t*      index_cache;
873 
874 		rw_lock_x_lock(&cache->init_lock);
875 
876 		index_cache = fts_find_index_cache(cache, index);
877 
878 		if (index_cache != NULL) {
879 			while (index->index_fts_syncing
880 				&& !trx_is_interrupted(trx)) {
881 				DICT_BG_YIELD(trx);
882 			}
883 
884 			if (index_cache->words) {
885 				fts_words_free(index_cache->words);
886 				rbt_free(index_cache->words);
887 			}
888 
889 			ib_vector_remove(cache->indexes, *(void**) index_cache);
890 		}
891 
892 		if (cache->get_docs) {
893 			fts_reset_get_doc(cache);
894 		}
895 
896 		rw_lock_x_unlock(&cache->init_lock);
897 	}
898 
899 	err = fts_drop_index_tables(trx, index);
900 
901 	ib_vector_remove(indexes, (const void*) index);
902 
903 	return(err);
904 }
905 
906 /****************************************************************//**
907 Free the query graph but check whether dict_sys->mutex is already
908 held */
909 void
fts_que_graph_free_check_lock(fts_table_t * fts_table,const fts_index_cache_t * index_cache,que_t * graph)910 fts_que_graph_free_check_lock(
911 /*==========================*/
912 	fts_table_t*		fts_table,	/*!< in: FTS table */
913 	const fts_index_cache_t*index_cache,	/*!< in: FTS index cache */
914 	que_t*			graph)		/*!< in: query graph */
915 {
916 	ibool	has_dict = FALSE;
917 
918 	if (fts_table && fts_table->table) {
919 		ut_ad(fts_table->table->fts);
920 
921 		has_dict = fts_table->table->fts->fts_status
922 			 & TABLE_DICT_LOCKED;
923 	} else if (index_cache) {
924 		ut_ad(index_cache->index->table->fts);
925 
926 		has_dict = index_cache->index->table->fts->fts_status
927 			 & TABLE_DICT_LOCKED;
928 	}
929 
930 	if (!has_dict) {
931 		mutex_enter(&dict_sys->mutex);
932 	}
933 
934 	ut_ad(mutex_own(&dict_sys->mutex));
935 
936 	que_graph_free(graph);
937 
938 	if (!has_dict) {
939 		mutex_exit(&dict_sys->mutex);
940 	}
941 }
942 
943 /****************************************************************//**
944 Create an FTS index cache. */
945 CHARSET_INFO*
fts_index_get_charset(dict_index_t * index)946 fts_index_get_charset(
947 /*==================*/
948 	dict_index_t*		index)		/*!< in: FTS index */
949 {
950 	CHARSET_INFO*		charset = NULL;
951 	dict_field_t*		field;
952 	ulint			prtype;
953 
954 	field = dict_index_get_nth_field(index, 0);
955 	prtype = field->col->prtype;
956 
957 	charset = fts_get_charset(prtype);
958 
959 #ifdef FTS_DEBUG
960 	/* Set up charset info for this index. Please note all
961 	field of the FTS index should have the same charset */
962 	for (i = 1; i < index->n_fields; i++) {
963 		CHARSET_INFO*   fld_charset;
964 
965 		field = dict_index_get_nth_field(index, i);
966 		prtype = field->col->prtype;
967 
968 		fld_charset = fts_get_charset(prtype);
969 
970 		/* All FTS columns should have the same charset */
971 		if (charset) {
972 			ut_a(charset == fld_charset);
973 		} else {
974 			charset = fld_charset;
975 		}
976 	}
977 #endif
978 
979 	return(charset);
980 
981 }
982 /****************************************************************//**
983 Create an FTS index cache.
984 @return Index Cache */
985 fts_index_cache_t*
fts_cache_index_cache_create(dict_table_t * table,dict_index_t * index)986 fts_cache_index_cache_create(
987 /*=========================*/
988 	dict_table_t*		table,		/*!< in: table with FTS index */
989 	dict_index_t*		index)		/*!< in: FTS index */
990 {
991 	ulint			n_bytes;
992 	fts_index_cache_t*	index_cache;
993 	fts_cache_t*		cache = table->fts->cache;
994 
995 	ut_a(cache != NULL);
996 
997 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
998 
999 	/* Must not already exist in the cache vector. */
1000 	ut_a(fts_find_index_cache(cache, index) == NULL);
1001 
1002 	index_cache = static_cast<fts_index_cache_t*>(
1003 		ib_vector_push(cache->indexes, NULL));
1004 
1005 	memset(index_cache, 0x0, sizeof(*index_cache));
1006 
1007 	index_cache->index = index;
1008 
1009 	index_cache->charset = fts_index_get_charset(index);
1010 
1011 	n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX;
1012 
1013 	index_cache->ins_graph = static_cast<que_t**>(
1014 		mem_heap_zalloc(static_cast<mem_heap_t*>(
1015 			cache->self_heap->arg), n_bytes));
1016 
1017 	index_cache->sel_graph = static_cast<que_t**>(
1018 		mem_heap_zalloc(static_cast<mem_heap_t*>(
1019 			cache->self_heap->arg), n_bytes));
1020 
1021 	fts_index_cache_init(cache->sync_heap, index_cache);
1022 
1023 	if (cache->get_docs) {
1024 		fts_reset_get_doc(cache);
1025 	}
1026 
1027 	return(index_cache);
1028 }
1029 
1030 /****************************************************************//**
1031 Release all resources help by the words rb tree e.g., the node ilist. */
1032 static
1033 void
fts_words_free(ib_rbt_t * words)1034 fts_words_free(
1035 /*===========*/
1036 	ib_rbt_t*	words)			/*!< in: rb tree of words */
1037 {
1038 	const ib_rbt_node_t*	rbt_node;
1039 
1040 	/* Free the resources held by a word. */
1041 	for (rbt_node = rbt_first(words);
1042 	     rbt_node != NULL;
1043 	     rbt_node = rbt_first(words)) {
1044 
1045 		ulint			i;
1046 		fts_tokenizer_word_t*	word;
1047 
1048 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
1049 
1050 		/* Free the ilists of this word. */
1051 		for (i = 0; i < ib_vector_size(word->nodes); ++i) {
1052 
1053 			fts_node_t* fts_node = static_cast<fts_node_t*>(
1054 				ib_vector_get(word->nodes, i));
1055 
1056 			ut_free(fts_node->ilist);
1057 			fts_node->ilist = NULL;
1058 		}
1059 
1060 		/* NOTE: We are responsible for free'ing the node */
1061 		ut_free(rbt_remove_node(words, rbt_node));
1062 	}
1063 }
1064 
1065 /** Clear cache.
1066 @param[in,out]	cache	fts cache */
1067 void
fts_cache_clear(fts_cache_t * cache)1068 fts_cache_clear(
1069 	fts_cache_t*	cache)
1070 {
1071 	ulint		i;
1072 
1073 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1074 		ulint			j;
1075 		fts_index_cache_t*	index_cache;
1076 
1077 		index_cache = static_cast<fts_index_cache_t*>(
1078 			ib_vector_get(cache->indexes, i));
1079 
1080 		fts_words_free(index_cache->words);
1081 
1082 		rbt_free(index_cache->words);
1083 
1084 		index_cache->words = NULL;
1085 
1086 		for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1087 
1088 			if (index_cache->ins_graph[j] != NULL) {
1089 
1090 				fts_que_graph_free_check_lock(
1091 					NULL, index_cache,
1092 					index_cache->ins_graph[j]);
1093 
1094 				index_cache->ins_graph[j] = NULL;
1095 			}
1096 
1097 			if (index_cache->sel_graph[j] != NULL) {
1098 
1099 				fts_que_graph_free_check_lock(
1100 					NULL, index_cache,
1101 					index_cache->sel_graph[j]);
1102 
1103 				index_cache->sel_graph[j] = NULL;
1104 			}
1105 		}
1106 
1107 		index_cache->doc_stats = NULL;
1108 	}
1109 
1110 	mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1111 	cache->sync_heap->arg = NULL;
1112 
1113 	fts_need_sync = false;
1114 
1115 	cache->total_size = 0;
1116 
1117 	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
1118 	cache->deleted_doc_ids = NULL;
1119 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
1120 }
1121 
1122 /*********************************************************************//**
1123 Search the index specific cache for a particular FTS index.
1124 @return the index cache else NULL */
1125 UNIV_INLINE
1126 fts_index_cache_t*
fts_get_index_cache(fts_cache_t * cache,const dict_index_t * index)1127 fts_get_index_cache(
1128 /*================*/
1129 	fts_cache_t*		cache,		/*!< in: cache to search */
1130 	const dict_index_t*	index)		/*!< in: index to search for */
1131 {
1132 	ulint			i;
1133 
1134 	ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_X)
1135 	      || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1136 
1137 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1138 		fts_index_cache_t*	index_cache;
1139 
1140 		index_cache = static_cast<fts_index_cache_t*>(
1141 			ib_vector_get(cache->indexes, i));
1142 
1143 		if (index_cache->index == index) {
1144 
1145 			return(index_cache);
1146 		}
1147 	}
1148 
1149 	return(NULL);
1150 }
1151 
1152 #ifdef FTS_DEBUG
1153 /*********************************************************************//**
1154 Search the index cache for a get_doc structure.
1155 @return the fts_get_doc_t item else NULL */
1156 static
1157 fts_get_doc_t*
fts_get_index_get_doc(fts_cache_t * cache,const dict_index_t * index)1158 fts_get_index_get_doc(
1159 /*==================*/
1160 	fts_cache_t*		cache,		/*!< in: cache to search */
1161 	const dict_index_t*	index)		/*!< in: index to search for */
1162 {
1163 	ulint			i;
1164 
1165 	ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1166 
1167 	for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
1168 		fts_get_doc_t*	get_doc;
1169 
1170 		get_doc = static_cast<fts_get_doc_t*>(
1171 			ib_vector_get(cache->get_docs, i));
1172 
1173 		if (get_doc->index_cache->index == index) {
1174 
1175 			return(get_doc);
1176 		}
1177 	}
1178 
1179 	return(NULL);
1180 }
1181 #endif
1182 
1183 /**********************************************************************//**
1184 Free the FTS cache. */
1185 void
fts_cache_destroy(fts_cache_t * cache)1186 fts_cache_destroy(
1187 /*==============*/
1188 	fts_cache_t*	cache)			/*!< in: cache*/
1189 {
1190 	rw_lock_free(&cache->lock);
1191 	rw_lock_free(&cache->init_lock);
1192 	mutex_free(&cache->optimize_lock);
1193 	mutex_free(&cache->deleted_lock);
1194 	mutex_free(&cache->doc_id_lock);
1195 	os_event_destroy(cache->sync->event);
1196 
1197 	if (cache->stopword_info.cached_stopword) {
1198 		rbt_free(cache->stopword_info.cached_stopword);
1199 	}
1200 
1201 	if (cache->sync_heap->arg) {
1202 		mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1203 	}
1204 
1205 	mem_heap_free(cache->cache_heap);
1206 }
1207 
1208 /**********************************************************************//**
1209 Find an existing word, or if not found, create one and return it.
1210 @return specified word token */
1211 static
1212 fts_tokenizer_word_t*
fts_tokenizer_word_get(fts_cache_t * cache,fts_index_cache_t * index_cache,fts_string_t * text)1213 fts_tokenizer_word_get(
1214 /*===================*/
1215 	fts_cache_t*	cache,			/*!< in: cache */
1216 	fts_index_cache_t*
1217 			index_cache,		/*!< in: index cache */
1218 	fts_string_t*	text)			/*!< in: node text */
1219 {
1220 	fts_tokenizer_word_t*	word;
1221 	ib_rbt_bound_t		parent;
1222 
1223 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1224 
1225 	ut_ad(current_thd != NULL);
1226 	/* If it is a stopword, do not index it */
1227 	if (!fts_check_token(text,
1228 		    cache->stopword_info.cached_stopword,
1229 		    index_cache->index->is_ngram,
1230 		    index_cache->charset,
1231 		    thd_has_ft_ignore_stopwords(current_thd))) {
1232 
1233 		return(NULL);
1234 	}
1235 
1236 	/* Check if we found a match, if not then add word to tree. */
1237 	if (rbt_search(index_cache->words, &parent, text) != 0) {
1238 		mem_heap_t*		heap;
1239 		fts_tokenizer_word_t	new_word;
1240 
1241 		heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
1242 
1243 		new_word.nodes = ib_vector_create(
1244 			cache->sync_heap, sizeof(fts_node_t), 4);
1245 
1246 		fts_string_dup(&new_word.text, text, heap);
1247 
1248 		parent.last = rbt_add_node(
1249 			index_cache->words, &parent, &new_word);
1250 
1251 		/* Take into account the RB tree memory use and the vector. */
1252 		cache->total_size += sizeof(new_word)
1253 			+ sizeof(ib_rbt_node_t)
1254 			+ text->f_len
1255 			+ (sizeof(fts_node_t) * 4)
1256 			+ sizeof(*new_word.nodes);
1257 
1258 		ut_ad(rbt_validate(index_cache->words));
1259 	}
1260 
1261 	word = rbt_value(fts_tokenizer_word_t, parent.last);
1262 
1263 	return(word);
1264 }
1265 
1266 /**********************************************************************//**
1267 Add the given doc_id/word positions to the given node's ilist. */
1268 void
fts_cache_node_add_positions(fts_cache_t * cache,fts_node_t * node,doc_id_t doc_id,ib_vector_t * positions)1269 fts_cache_node_add_positions(
1270 /*=========================*/
1271 	fts_cache_t*	cache,		/*!< in: cache */
1272 	fts_node_t*	node,		/*!< in: word node */
1273 	doc_id_t	doc_id,		/*!< in: doc id */
1274 	ib_vector_t*	positions)	/*!< in: fts_token_t::positions */
1275 {
1276 	ulint		i;
1277 	byte*		ptr;
1278 	byte*		ilist;
1279 	ulint		enc_len;
1280 	ulint		last_pos;
1281 	byte*		ptr_start;
1282 	ulint		doc_id_delta;
1283 
1284 #ifdef UNIV_DEBUG
1285 	if (cache) {
1286 		ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1287 	}
1288 #endif /* UNIV_DEBUG */
1289 
1290 	ut_ad(doc_id >= node->last_doc_id);
1291 
1292 	/* Calculate the space required to store the ilist. */
1293 	doc_id_delta = (ulint)(doc_id - node->last_doc_id);
1294 	enc_len = fts_get_encoded_len(doc_id_delta);
1295 
1296 	last_pos = 0;
1297 	for (i = 0; i < ib_vector_size(positions); i++) {
1298 		ulint	pos = *(static_cast<ulint*>(
1299 			ib_vector_get(positions, i)));
1300 
1301 		ut_ad(last_pos == 0 || pos > last_pos);
1302 
1303 		enc_len += fts_get_encoded_len(pos - last_pos);
1304 		last_pos = pos;
1305 	}
1306 
1307 	/* The 0x00 byte at the end of the token positions list. */
1308 	enc_len++;
1309 
1310 	if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1311 		/* No need to allocate more space, we can fit in the new
1312 		data at the end of the old one. */
1313 		ilist = NULL;
1314 		ptr = node->ilist + node->ilist_size;
1315 	} else {
1316 		ulint	new_size = node->ilist_size + enc_len;
1317 
1318 		/* Over-reserve space by a fixed size for small lengths and
1319 		by 20% for lengths >= 48 bytes. */
1320 		if (new_size < 16) {
1321 			new_size = 16;
1322 		} else if (new_size < 32) {
1323 			new_size = 32;
1324 		} else if (new_size < 48) {
1325 			new_size = 48;
1326 		} else {
1327 			new_size = (ulint)(1.2 * new_size);
1328 		}
1329 
1330 		ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
1331 		ptr = ilist + node->ilist_size;
1332 
1333 		node->ilist_size_alloc = new_size;
1334 		if (cache) {
1335 			cache->total_size += new_size;
1336 		}
1337 	}
1338 
1339 	ptr_start = ptr;
1340 
1341 	/* Encode the new fragment. */
1342 	ptr += fts_encode_int(doc_id_delta, ptr);
1343 
1344 	last_pos = 0;
1345 	for (i = 0; i < ib_vector_size(positions); i++) {
1346 		ulint	pos = *(static_cast<ulint*>(
1347 			 ib_vector_get(positions, i)));
1348 
1349 		ptr += fts_encode_int(pos - last_pos, ptr);
1350 		last_pos = pos;
1351 	}
1352 
1353 	*ptr++ = 0;
1354 
1355 	ut_a(enc_len == (ulint)(ptr - ptr_start));
1356 
1357 	if (ilist) {
1358 		/* Copy old ilist to the start of the new one and switch the
1359 		new one into place in the node. */
1360 		if (node->ilist_size > 0) {
1361 			memcpy(ilist, node->ilist, node->ilist_size);
1362 			ut_free(node->ilist);
1363 			if (cache) {
1364 				cache->total_size -= node->ilist_size;
1365 			}
1366 		}
1367 
1368 		node->ilist = ilist;
1369 	}
1370 
1371 	node->ilist_size += enc_len;
1372 
1373 	if (node->first_doc_id == FTS_NULL_DOC_ID) {
1374 		node->first_doc_id = doc_id;
1375 	}
1376 
1377 	node->last_doc_id = doc_id;
1378 	++node->doc_count;
1379 }
1380 
1381 /**********************************************************************//**
1382 Add document to the cache. */
1383 static
1384 void
fts_cache_add_doc(fts_cache_t * cache,fts_index_cache_t * index_cache,doc_id_t doc_id,ib_rbt_t * tokens)1385 fts_cache_add_doc(
1386 /*==============*/
1387 	fts_cache_t*	cache,			/*!< in: cache */
1388 	fts_index_cache_t*
1389 			index_cache,		/*!< in: index cache */
1390 	doc_id_t	doc_id,			/*!< in: doc id to add */
1391 	ib_rbt_t*	tokens)			/*!< in: document tokens */
1392 {
1393 	const ib_rbt_node_t*	node;
1394 	ulint			n_words;
1395 	fts_doc_stats_t*	doc_stats;
1396 
1397 	if (!tokens) {
1398 		return;
1399 	}
1400 
1401 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1402 
1403 	n_words = rbt_size(tokens);
1404 
1405 	for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1406 
1407 		fts_tokenizer_word_t*	word;
1408 		fts_node_t*		fts_node = NULL;
1409 		fts_token_t*		token = rbt_value(fts_token_t, node);
1410 
1411 		/* Find and/or add token to the cache. */
1412 		word = fts_tokenizer_word_get(
1413 			cache, index_cache, &token->text);
1414 
1415 		if (!word) {
1416 			ut_free(rbt_remove_node(tokens, node));
1417 			continue;
1418 		}
1419 
1420 		if (ib_vector_size(word->nodes) > 0) {
1421 			fts_node = static_cast<fts_node_t*>(
1422 				ib_vector_last(word->nodes));
1423 		}
1424 
1425 		if (fts_node == NULL || fts_node->synced
1426 		    || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
1427 		    || doc_id < fts_node->last_doc_id) {
1428 
1429 			fts_node = static_cast<fts_node_t*>(
1430 				ib_vector_push(word->nodes, NULL));
1431 
1432 			memset(fts_node, 0x0, sizeof(*fts_node));
1433 
1434 			cache->total_size += sizeof(*fts_node);
1435 		}
1436 
1437 		fts_cache_node_add_positions(
1438 			cache, fts_node, doc_id, token->positions);
1439 
1440 		ut_free(rbt_remove_node(tokens, node));
1441 	}
1442 
1443 	ut_a(rbt_empty(tokens));
1444 
1445 	/* Add to doc ids processed so far. */
1446 	doc_stats = static_cast<fts_doc_stats_t*>(
1447 		ib_vector_push(index_cache->doc_stats, NULL));
1448 
1449 	doc_stats->doc_id = doc_id;
1450 	doc_stats->word_count = n_words;
1451 
1452 	/* Add the doc stats memory usage too. */
1453 	cache->total_size += sizeof(*doc_stats);
1454 
1455 	if (doc_id > cache->sync->max_doc_id) {
1456 		cache->sync->max_doc_id = doc_id;
1457 	}
1458 }
1459 
1460 /****************************************************************//**
1461 Drops a table. If the table can't be found we return a SUCCESS code.
1462 @return DB_SUCCESS or error code */
1463 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1464 dberr_t
fts_drop_table(trx_t * trx,const char * table_name)1465 fts_drop_table(
1466 /*===========*/
1467 	trx_t*		trx,			/*!< in: transaction */
1468 	const char*	table_name)		/*!< in: table to drop */
1469 {
1470 	dict_table_t*	table;
1471 	dberr_t		error = DB_SUCCESS;
1472 
1473 	/* Check that the table exists in our data dictionary.
1474 	Similar to regular drop table case, we will open table with
1475 	DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1476 	table = dict_table_open_on_name(
1477 		table_name, TRUE, FALSE,
1478 		static_cast<dict_err_ignore_t>(
1479                         DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
1480 
1481 	if (table != 0) {
1482 
1483 		dict_table_close(table, TRUE, FALSE);
1484 
1485 		/* Pass nonatomic=false (dont allow data dict unlock),
1486 		because the transaction may hold locks on SYS_* tables from
1487 		previous calls to fts_drop_table(). */
1488 		error = row_drop_table_for_mysql(table_name, trx, true, false);
1489 
1490 		if (error != DB_SUCCESS) {
1491 			ib::error() << "Unable to drop FTS index aux table "
1492 				<< table_name << ": " << ut_strerr(error);
1493 		}
1494 	} else {
1495 		error = DB_FAIL;
1496 	}
1497 
1498 	return(error);
1499 }
1500 
1501 /****************************************************************//**
1502 Rename a single auxiliary table due to database name change.
1503 @return DB_SUCCESS or error code */
1504 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1505 dberr_t
fts_rename_one_aux_table(const char * new_name,const char * fts_table_old_name,trx_t * trx)1506 fts_rename_one_aux_table(
1507 /*=====================*/
1508 	const char*	new_name,		/*!< in: new parent tbl name */
1509 	const char*	fts_table_old_name,	/*!< in: old aux tbl name */
1510 	trx_t*		trx)			/*!< in: transaction */
1511 {
1512 	char	fts_table_new_name[MAX_TABLE_NAME_LEN];
1513 	ulint	new_db_name_len = dict_get_db_name_len(new_name);
1514 	ulint	old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1515 	ulint	table_new_name_len = strlen(fts_table_old_name)
1516 				     + new_db_name_len - old_db_name_len;
1517 
1518 	/* Check if the new and old database names are the same, if so,
1519 	nothing to do */
1520 	ut_ad((new_db_name_len != old_db_name_len)
1521 	      || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1522 
1523 	/* Get the database name from "new_name", and table name
1524 	from the fts_table_old_name */
1525 	strncpy(fts_table_new_name, new_name, new_db_name_len);
1526 	strncpy(fts_table_new_name + new_db_name_len,
1527 	       strchr(fts_table_old_name, '/'),
1528 	       table_new_name_len - new_db_name_len);
1529 	fts_table_new_name[table_new_name_len] = 0;
1530 
1531 	return(row_rename_table_for_mysql(
1532 		fts_table_old_name, fts_table_new_name, trx, false));
1533 }
1534 
1535 /****************************************************************//**
1536 Rename auxiliary tables for all fts index for a table. This(rename)
1537 is due to database name change
1538 @return DB_SUCCESS or error code */
1539 dberr_t
fts_rename_aux_tables(dict_table_t * table,const char * new_name,trx_t * trx)1540 fts_rename_aux_tables(
1541 /*==================*/
1542 	dict_table_t*	table,		/*!< in: user Table */
1543 	const char*     new_name,       /*!< in: new table name */
1544 	trx_t*		trx)		/*!< in: transaction */
1545 {
1546 	ulint		i;
1547 	fts_table_t	fts_table;
1548 
1549 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1550 
1551 	/* Rename common auxiliary tables */
1552 	for (i = 0; fts_common_tables[i] != NULL; ++i) {
1553 		char	old_table_name[MAX_FULL_NAME_LEN];
1554 		dberr_t	err = DB_SUCCESS;
1555 
1556 		fts_table.suffix = fts_common_tables[i];
1557 
1558 		fts_get_table_name(&fts_table, old_table_name);
1559 
1560 		err = fts_rename_one_aux_table(new_name, old_table_name, trx);
1561 
1562 		if (err != DB_SUCCESS) {
1563 			return(err);
1564 		}
1565 	}
1566 
1567 	fts_t*	fts = table->fts;
1568 
1569 	/* Rename index specific auxiliary tables */
1570 	for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1571 	     ++i) {
1572 		dict_index_t*	index;
1573 
1574 		index = static_cast<dict_index_t*>(
1575 			ib_vector_getp(fts->indexes, i));
1576 
1577 		FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1578 
1579 		for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1580 			dberr_t	err;
1581 			char	old_table_name[MAX_FULL_NAME_LEN];
1582 
1583 			fts_table.suffix = fts_get_suffix(j);
1584 
1585 			fts_get_table_name(&fts_table, old_table_name);
1586 
1587 			err = fts_rename_one_aux_table(
1588 				new_name, old_table_name, trx);
1589 
1590 			DBUG_EXECUTE_IF("fts_rename_failure",
1591 					err = DB_DEADLOCK;
1592 					fts_sql_rollback(trx););
1593 
1594 			if (err != DB_SUCCESS) {
1595 				return(err);
1596 			}
1597 		}
1598 	}
1599 
1600 	return(DB_SUCCESS);
1601 }
1602 
1603 /****************************************************************//**
1604 Drops the common ancillary tables needed for supporting an FTS index
1605 on the given table. row_mysql_lock_data_dictionary must have been called
1606 before this.
1607 @return DB_SUCCESS or error code */
1608 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1609 dberr_t
fts_drop_common_tables(trx_t * trx,fts_table_t * fts_table)1610 fts_drop_common_tables(
1611 /*===================*/
1612 	trx_t*		trx,			/*!< in: transaction */
1613 	fts_table_t*	fts_table)		/*!< in: table with an FTS
1614 						index */
1615 {
1616 	ulint		i;
1617 	dberr_t		error = DB_SUCCESS;
1618 
1619 	for (i = 0; fts_common_tables[i] != NULL; ++i) {
1620 		dberr_t	err;
1621 		char	table_name[MAX_FULL_NAME_LEN];
1622 
1623 		fts_table->suffix = fts_common_tables[i];
1624 
1625 		fts_get_table_name(fts_table, table_name);
1626 
1627 		err = fts_drop_table(trx, table_name);
1628 
1629 		/* We only return the status of the last error. */
1630 		if (err != DB_SUCCESS && err != DB_FAIL) {
1631 			error = err;
1632 		}
1633 	}
1634 
1635 	return(error);
1636 }
1637 
1638 /****************************************************************//**
1639 Since we do a horizontal split on the index table, we need to drop
1640 all the split tables.
1641 @return DB_SUCCESS or error code */
1642 dberr_t
fts_drop_index_split_tables(trx_t * trx,dict_index_t * index)1643 fts_drop_index_split_tables(
1644 /*========================*/
1645 	trx_t*		trx,			/*!< in: transaction */
1646 	dict_index_t*	index)			/*!< in: fts instance */
1647 
1648 {
1649 	ulint		i;
1650 	fts_table_t	fts_table;
1651 	dberr_t		error = DB_SUCCESS;
1652 
1653 	FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1654 
1655 	for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1656 		dberr_t	err;
1657 		char	table_name[MAX_FULL_NAME_LEN];
1658 
1659 		fts_table.suffix = fts_get_suffix(i);
1660 
1661 		fts_get_table_name(&fts_table, table_name);
1662 
1663 		err = fts_drop_table(trx, table_name);
1664 
1665 		/* We only return the status of the last error. */
1666 		if (err != DB_SUCCESS && err != DB_FAIL) {
1667 			error = err;
1668 		}
1669 	}
1670 
1671 	return(error);
1672 }
1673 
1674 /****************************************************************//**
1675 Drops FTS auxiliary tables for an FTS index
1676 @return DB_SUCCESS or error code */
1677 dberr_t
fts_drop_index_tables(trx_t * trx,dict_index_t * index)1678 fts_drop_index_tables(
1679 /*==================*/
1680 	trx_t*		trx,		/*!< in: transaction */
1681 	dict_index_t*	index)		/*!< in: Index to drop */
1682 {
1683 	dberr_t			error = DB_SUCCESS;
1684 
1685 #ifdef FTS_DOC_STATS_DEBUG
1686 	fts_table_t		fts_table;
1687 	static const char*	index_tables[] = {
1688 		"DOC_ID",
1689 		NULL
1690 	};
1691 #endif /* FTS_DOC_STATS_DEBUG */
1692 
1693 	dberr_t	err = fts_drop_index_split_tables(trx, index);
1694 
1695 	/* We only return the status of the last error. */
1696 	if (err != DB_SUCCESS) {
1697 		error = err;
1698 	}
1699 
1700 #ifdef FTS_DOC_STATS_DEBUG
1701 	FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1702 
1703 	for (ulint i = 0; index_tables[i] != NULL; ++i) {
1704 		char	table_name[MAX_FULL_NAME_LEN];
1705 
1706 		fts_table.suffix = index_tables[i];
1707 
1708 		fts_get_table_name(&fts_table, table_name);
1709 
1710 		err = fts_drop_table(trx, table_name);
1711 
1712 		/* We only return the status of the last error. */
1713 		if (err != DB_SUCCESS && err != DB_FAIL) {
1714 			error = err;
1715 		}
1716 	}
1717 #endif /* FTS_DOC_STATS_DEBUG */
1718 
1719 	return(error);
1720 }
1721 
1722 /****************************************************************//**
1723 Drops FTS ancillary tables needed for supporting an FTS index
1724 on the given table. row_mysql_lock_data_dictionary must have been called
1725 before this.
1726 @return DB_SUCCESS or error code */
1727 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1728 dberr_t
fts_drop_all_index_tables(trx_t * trx,fts_t * fts)1729 fts_drop_all_index_tables(
1730 /*======================*/
1731 	trx_t*		trx,			/*!< in: transaction */
1732 	fts_t*		fts)			/*!< in: fts instance */
1733 {
1734 	dberr_t		error = DB_SUCCESS;
1735 
1736 	for (ulint i = 0;
1737 	     fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1738 	     ++i) {
1739 
1740 		dberr_t		err;
1741 		dict_index_t*	index;
1742 
1743 		index = static_cast<dict_index_t*>(
1744 			ib_vector_getp(fts->indexes, i));
1745 
1746 		err = fts_drop_index_tables(trx, index);
1747 
1748 		if (err != DB_SUCCESS) {
1749 			error = err;
1750 		}
1751 	}
1752 
1753 	return(error);
1754 }
1755 
1756 /*********************************************************************//**
1757 Drops the ancillary tables needed for supporting an FTS index on a
1758 given table. row_mysql_lock_data_dictionary must have been called before
1759 this.
1760 @return DB_SUCCESS or error code */
1761 dberr_t
fts_drop_tables(trx_t * trx,dict_table_t * table)1762 fts_drop_tables(
1763 /*============*/
1764 	trx_t*		trx,		/*!< in: transaction */
1765 	dict_table_t*	table)		/*!< in: table has the FTS index */
1766 {
1767 	dberr_t		error;
1768 	fts_table_t	fts_table;
1769 
1770 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1771 
1772 	/* TODO: This is not atomic and can cause problems during recovery. */
1773 
1774 	error = fts_drop_common_tables(trx, &fts_table);
1775 
1776 	if (error == DB_SUCCESS) {
1777 		error = fts_drop_all_index_tables(trx, table->fts);
1778 	}
1779 
1780 	return(error);
1781 }
1782 
1783 /** Extract only the required flags from table->flags2 for FTS Aux
1784 tables.
1785 @param[in]	in_flags2	Table flags2
1786 @return extracted flags2 for FTS aux tables */
1787 static inline
1788 ulint
fts_get_table_flags2_for_aux_tables(ulint flags2)1789 fts_get_table_flags2_for_aux_tables(
1790 	ulint	flags2)
1791 {
1792 	/* Extract the file_per_table flag, temporary file flag and
1793 	encryption flag from the main FTS table flags2 */
1794 	return((flags2 & DICT_TF2_USE_FILE_PER_TABLE) |
1795                (flags2 & DICT_TF2_ENCRYPTION) |
1796 	       (flags2 & DICT_TF2_TEMPORARY));
1797 }
1798 
1799 /** Create dict_table_t object for FTS Aux tables.
1800 @param[in]	aux_table_name	FTS Aux table name
1801 @param[in]	table		table object of FTS Index
1802 @param[in]	n_cols		number of columns for FTS Aux table
1803 @return table object for FTS Aux table */
1804 static
1805 dict_table_t*
fts_create_in_mem_aux_table(const char * aux_table_name,const dict_table_t * table,ulint n_cols)1806 fts_create_in_mem_aux_table(
1807 	const char*		aux_table_name,
1808 	const dict_table_t*	table,
1809 	ulint			n_cols)
1810 {
1811 	dict_table_t*	new_table = dict_mem_table_create(
1812 		aux_table_name, table->space, n_cols, 0, table->flags,
1813 		fts_get_table_flags2_for_aux_tables(table->flags2));
1814 
1815 	if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
1816 		ut_ad(table->space == fil_space_get_id_by_name(
1817 			table->tablespace()));
1818 		new_table->tablespace = mem_heap_strdup(
1819 			new_table->heap, table->tablespace);
1820 	}
1821 
1822 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
1823 		ut_ad(table->data_dir_path != NULL);
1824 		new_table->data_dir_path = mem_heap_strdup(
1825 			new_table->heap, table->data_dir_path);
1826 	}
1827 
1828 	return(new_table);
1829 }
1830 
1831 /** Function to create on FTS common table.
1832 @param[in,out]	trx		InnoDB transaction
1833 @param[in]	table		Table that has FTS Index
1834 @param[in]	fts_table_name	FTS AUX table name
1835 @param[in]	fts_suffix	FTS AUX table suffix
1836 @param[in]	heap		heap
1837 @return table object if created, else NULL */
1838 static
1839 dict_table_t*
fts_create_one_common_table(trx_t * trx,const dict_table_t * table,const char * fts_table_name,const char * fts_suffix,mem_heap_t * heap)1840 fts_create_one_common_table(
1841 	trx_t*			trx,
1842 	const dict_table_t*	table,
1843 	const char*		fts_table_name,
1844 	const char*		fts_suffix,
1845 	mem_heap_t*		heap)
1846 {
1847 	dict_table_t*		new_table = NULL;
1848 	dberr_t			error;
1849 	bool			is_config = strcmp(fts_suffix, "CONFIG") == 0;
1850 
1851 	if (!is_config) {
1852 
1853 		new_table = fts_create_in_mem_aux_table(
1854 			fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS);
1855 
1856 		dict_mem_table_add_col(
1857 			new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
1858 			FTS_DELETED_TABLE_COL_LEN);
1859 	} else {
1860 		/* Config table has different schema. */
1861 		new_table = fts_create_in_mem_aux_table(
1862 			fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS);
1863 
1864 		dict_mem_table_add_col(
1865 			new_table, heap, "key", DATA_VARCHAR, 0,
1866 			FTS_CONFIG_TABLE_KEY_COL_LEN);
1867 
1868 		dict_mem_table_add_col(
1869 			new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL,
1870 			FTS_CONFIG_TABLE_VALUE_COL_LEN);
1871 	}
1872 
1873 	error = row_create_table_for_mysql(new_table, NULL, trx, false,
1874 					   FIL_ENCRYPTION_DEFAULT, CreateInfoEncryptionKeyId());
1875 
1876 	if (error == DB_SUCCESS) {
1877 
1878 		dict_index_t*	index = dict_mem_index_create(
1879 			fts_table_name, "FTS_COMMON_TABLE_IND",
1880 			new_table->space, DICT_UNIQUE|DICT_CLUSTERED, 1);
1881 
1882 		if (!is_config) {
1883 			dict_mem_index_add_field(index, "doc_id", 0);
1884 		} else {
1885 			dict_mem_index_add_field(index, "key", 0);
1886 		}
1887 
1888 		/* We save and restore trx->dict_operation because
1889 		row_create_index_for_mysql() changes the operation to
1890 		TRX_DICT_OP_TABLE. */
1891 		trx_dict_op_t op = trx_get_dict_operation(trx);
1892 
1893 		error =	row_create_index_for_mysql(index, trx, NULL, NULL);
1894 
1895 		trx->dict_operation = op;
1896 	}
1897 
1898 	if (error != DB_SUCCESS) {
1899 		trx->error_state = error;
1900 		dict_mem_table_free(new_table);
1901 		new_table = NULL;
1902 		ib::warn() << "Failed to create FTS common table "
1903 			<< fts_table_name;
1904 	}
1905 	return(new_table);
1906 }
1907 
1908 /** Creates the common auxiliary tables needed for supporting an FTS index
1909 on the given table. row_mysql_lock_data_dictionary must have been called
1910 before this.
1911 The following tables are created.
1912 CREATE TABLE $FTS_PREFIX_DELETED
1913 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1914 CREATE TABLE $FTS_PREFIX_DELETED_CACHE
1915 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1916 CREATE TABLE $FTS_PREFIX_BEING_DELETED
1917 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1918 CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
1919 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1920 CREATE TABLE $FTS_PREFIX_CONFIG
1921 	(key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
1922 @param[in,out]	trx			transaction
1923 @param[in]	table			table with FTS index
1924 @param[in]	name			table name normalized
1925 @param[in]	skip_doc_id_index	Skip index on doc id
1926 @return DB_SUCCESS if succeed */
1927 dberr_t
fts_create_common_tables(trx_t * trx,const dict_table_t * table,const char * name,bool skip_doc_id_index)1928 fts_create_common_tables(
1929 	trx_t*			trx,
1930 	const dict_table_t*	table,
1931 	const char*		name,
1932 	bool			skip_doc_id_index)
1933 {
1934 	dberr_t		error;
1935 	que_t*		graph;
1936 	fts_table_t	fts_table;
1937 	mem_heap_t*	heap = mem_heap_create(1024);
1938 	pars_info_t*	info;
1939 	char		fts_name[MAX_FULL_NAME_LEN];
1940 	char		full_name[sizeof(fts_common_tables) / sizeof(char*)]
1941 				[MAX_FULL_NAME_LEN];
1942 
1943 	dict_index_t*					index = NULL;
1944 	trx_dict_op_t					op;
1945 	/* common_tables vector is used for dropping FTS common tables
1946 	on error condition. */
1947 	std::vector<dict_table_t*>			common_tables;
1948 	std::vector<dict_table_t*>::const_iterator	it;
1949 
1950 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1951 
1952 	error = fts_drop_common_tables(trx, &fts_table);
1953 
1954 	if (error != DB_SUCCESS) {
1955 
1956 		goto func_exit;
1957 	}
1958 
1959 	/* Create the FTS tables that are common to an FTS index. */
1960 	for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
1961 
1962 		fts_table.suffix = fts_common_tables[i];
1963 		fts_get_table_name(&fts_table, full_name[i]);
1964 		dict_table_t*	common_table = fts_create_one_common_table(
1965 			trx, table, full_name[i], fts_table.suffix, heap);
1966 
1967 		 if (common_table == NULL) {
1968 			error = DB_ERROR;
1969 			goto func_exit;
1970 		} else {
1971 			common_tables.push_back(common_table);
1972 		}
1973 
1974 		DBUG_EXECUTE_IF("ib_fts_aux_table_error",
1975 			/* Return error after creating FTS_AUX_CONFIG table. */
1976 			if (i == 4) {
1977 				error = DB_ERROR;
1978 				goto func_exit;
1979 			}
1980 		);
1981 
1982 	}
1983 
1984 	/* Write the default settings to the config table. */
1985 	info = pars_info_create();
1986 
1987 	fts_table.suffix = "CONFIG";
1988 	fts_get_table_name(&fts_table, fts_name);
1989 	pars_info_bind_id(info, true, "config_table", fts_name);
1990 
1991 	graph = fts_parse_sql_no_dict_lock(
1992 		&fts_table, info, fts_config_table_insert_values_sql);
1993 
1994 	error = fts_eval_sql(trx, graph);
1995 
1996 	que_graph_free(graph);
1997 
1998 	if (error != DB_SUCCESS || skip_doc_id_index) {
1999 
2000 		goto func_exit;
2001 	}
2002 
2003 	index = dict_mem_index_create(
2004 		name, FTS_DOC_ID_INDEX_NAME, table->space,
2005 		DICT_UNIQUE, 1);
2006 	dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
2007 
2008 	op = trx_get_dict_operation(trx);
2009 
2010 	error =	row_create_index_for_mysql(index, trx, NULL, NULL);
2011 
2012 	trx->dict_operation = op;
2013 
2014 func_exit:
2015 	if (error != DB_SUCCESS) {
2016 
2017 		for (it = common_tables.begin(); it != common_tables.end();
2018 		     ++it) {
2019 			row_drop_table_for_mysql(
2020 				(*it)->name.m_name, trx, FALSE);
2021 		}
2022 	}
2023 
2024 	common_tables.clear();
2025 	mem_heap_free(heap);
2026 
2027 	return(error);
2028 }
2029 /** Creates one FTS auxiliary index table for an FTS index.
2030 @param[in,out]	trx		transaction
2031 @param[in]	index		the index instance
2032 @param[in]	fts_table	fts_table structure
2033 @param[in]	heap		memory heap
2034 @return DB_SUCCESS or error code */
2035 static
2036 dict_table_t*
fts_create_one_index_table(trx_t * trx,const dict_index_t * index,fts_table_t * fts_table,mem_heap_t * heap)2037 fts_create_one_index_table(
2038 	trx_t*			trx,
2039 	const dict_index_t*	index,
2040 	fts_table_t*		fts_table,
2041 	mem_heap_t*		heap)
2042 {
2043 	dict_field_t*		field;
2044 	dict_table_t*		new_table = NULL;
2045 	char			table_name[MAX_FULL_NAME_LEN];
2046 	dberr_t			error;
2047 	CHARSET_INFO*		charset;
2048 
2049 	ut_ad(index->type & DICT_FTS);
2050 
2051 	fts_get_table_name(fts_table, table_name);
2052 
2053 	new_table = fts_create_in_mem_aux_table(
2054 			table_name, fts_table->table,
2055 			FTS_AUX_INDEX_TABLE_NUM_COLS);
2056 
2057 	field = dict_index_get_nth_field(index, 0);
2058 	charset = fts_get_charset(field->col->prtype);
2059 
2060 	dict_mem_table_add_col(new_table, heap, "word",
2061 			       charset == &my_charset_latin1
2062 			       ? DATA_VARCHAR : DATA_VARMYSQL,
2063 			       field->col->prtype,
2064 			       FTS_INDEX_WORD_LEN);
2065 
2066 	dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
2067 			       DATA_NOT_NULL | DATA_UNSIGNED,
2068 			       FTS_INDEX_FIRST_DOC_ID_LEN);
2069 
2070 	dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
2071 			       DATA_NOT_NULL | DATA_UNSIGNED,
2072 			       FTS_INDEX_LAST_DOC_ID_LEN);
2073 
2074 	dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2075 			       DATA_NOT_NULL | DATA_UNSIGNED,
2076 			       FTS_INDEX_DOC_COUNT_LEN);
2077 
2078 	/* The precise type calculation is as follows:
2079 	least signficiant byte: MySQL type code (not applicable for sys cols)
2080 	second least : DATA_NOT_NULL | DATA_BINARY_TYPE
2081 	third least  : the MySQL charset-collation code (DATA_MTYPE_MAX) */
2082 
2083 	dict_mem_table_add_col(
2084 		new_table, heap, "ilist", DATA_BLOB,
2085 		(DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
2086 		FTS_INDEX_ILIST_LEN);
2087 
2088 	error = row_create_table_for_mysql(new_table, NULL, trx, false,
2089 					   FIL_ENCRYPTION_DEFAULT, CreateInfoEncryptionKeyId());
2090 
2091 	if (error == DB_SUCCESS) {
2092 		dict_index_t*	index = dict_mem_index_create(
2093 			table_name, "FTS_INDEX_TABLE_IND", new_table->space,
2094 			DICT_UNIQUE|DICT_CLUSTERED, 2);
2095 		dict_mem_index_add_field(index, "word", 0);
2096 		dict_mem_index_add_field(index, "first_doc_id", 0);
2097 
2098 		trx_dict_op_t op = trx_get_dict_operation(trx);
2099 
2100 		error =	row_create_index_for_mysql(index, trx, NULL, NULL);
2101 
2102 		trx->dict_operation = op;
2103 	}
2104 
2105 	if (error != DB_SUCCESS) {
2106 		trx->error_state = error;
2107 		dict_mem_table_free(new_table);
2108 		new_table = NULL;
2109 		ib::warn() << "Failed to create FTS index table "
2110 			<< table_name;
2111 	}
2112 
2113 	return(new_table);
2114 }
2115 
2116 /** Create auxiliary index tables for an FTS index.
2117 @param[in,out]	trx		transaction
2118 @param[in]	index		the index instance
2119 @param[in]	table_name	table name
2120 @param[in]	table_id	the table id
2121 @return DB_SUCCESS or error code */
2122 dberr_t
fts_create_index_tables_low(trx_t * trx,const dict_index_t * index,const char * table_name,table_id_t table_id)2123 fts_create_index_tables_low(
2124 	trx_t*			trx,
2125 	const dict_index_t*	index,
2126 	const char*		table_name,
2127 	table_id_t		table_id)
2128 {
2129 	ulint		i;
2130 	fts_table_t	fts_table;
2131 	dberr_t		error = DB_SUCCESS;
2132 	mem_heap_t*	heap = mem_heap_create(1024);
2133 
2134 	fts_table.type = FTS_INDEX_TABLE;
2135 	fts_table.index_id = index->id;
2136 	fts_table.table_id = table_id;
2137 	fts_table.parent = table_name;
2138 	fts_table.table = index->table;
2139 
2140 #ifdef FTS_DOC_STATS_DEBUG
2141 	/* Create the FTS auxiliary tables that are specific
2142 	to an FTS index. */
2143 	info = pars_info_create();
2144 
2145 	fts_table.suffix = "DOC_ID";
2146 	fts_get_table_name(&fts_table, fts_name);
2147 
2148 	pars_info_bind_id(info, true, "doc_id_table", fts_name);
2149 
2150 	graph = fts_parse_sql_no_dict_lock(NULL, info,
2151 					   fts_create_index_tables_sql);
2152 
2153 	error = fts_eval_sql(trx, graph);
2154 	que_graph_free(graph);
2155 #endif /* FTS_DOC_STATS_DEBUG */
2156 
2157 	/* aux_idx_tables vector is used for dropping FTS AUX INDEX
2158 	tables on error condition. */
2159 	std::vector<dict_table_t*>			aux_idx_tables;
2160 	std::vector<dict_table_t*>::const_iterator	it;
2161 
2162 	for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2163 		dict_table_t*	new_table;
2164 
2165 		/* Create the FTS auxiliary tables that are specific
2166 		to an FTS index. We need to preserve the table_id %s
2167 		which fts_parse_sql_no_dict_lock() will fill in for us. */
2168 		fts_table.suffix = fts_get_suffix(i);
2169 
2170 		new_table = fts_create_one_index_table(
2171 			trx, index, &fts_table, heap);
2172 
2173 		if (new_table == NULL) {
2174 			error = DB_FAIL;
2175 			break;
2176 		} else {
2177 			aux_idx_tables.push_back(new_table);
2178 		}
2179 
2180 		DBUG_EXECUTE_IF("ib_fts_index_table_error",
2181 			/* Return error after creating FTS_INDEX_5
2182 			aux table. */
2183 			if (i == 4) {
2184 				error = DB_FAIL;
2185 				break;
2186 			}
2187 		);
2188 	}
2189 
2190 	if (error != DB_SUCCESS) {
2191 
2192 		for (it = aux_idx_tables.begin(); it != aux_idx_tables.end();
2193 		     ++it) {
2194 			row_drop_table_for_mysql(
2195 				(*it)->name.m_name, trx, FALSE);
2196 		}
2197 	}
2198 
2199 	aux_idx_tables.clear();
2200 	mem_heap_free(heap);
2201 
2202 	return(error);
2203 }
2204 
2205 /** Creates the column specific ancillary tables needed for supporting an
2206 FTS index on the given table. row_mysql_lock_data_dictionary must have
2207 been called before this.
2208 
2209 All FTS AUX Index tables have the following schema.
2210 CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
2211 	word		VARCHAR(FTS_MAX_WORD_LEN),
2212 	first_doc_id	INT NOT NULL,
2213 	last_doc_id	UNSIGNED NOT NULL,
2214 	doc_count	UNSIGNED INT NOT NULL,
2215 	ilist		VARBINARY NOT NULL,
2216 	UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
2217 @param[in,out]	trx	transaction
2218 @param[in]	index	index instance
2219 @return DB_SUCCESS or error code */
2220 dberr_t
fts_create_index_tables(trx_t * trx,const dict_index_t * index)2221 fts_create_index_tables(
2222 	trx_t*			trx,
2223 	const dict_index_t*	index)
2224 {
2225 	dberr_t		err;
2226 	dict_table_t*	table;
2227 
2228 	table = dict_table_get_low(index->table_name);
2229 	ut_a(table != NULL);
2230 
2231 	err = fts_create_index_tables_low(
2232 		trx, index, table->name.m_name, table->id);
2233 
2234 	if (err == DB_SUCCESS) {
2235 		trx_commit(trx);
2236 	}
2237 
2238 	return(err);
2239 }
2240 #if 0
2241 /******************************************************************//**
2242 Return string representation of state. */
2243 static
2244 const char*
2245 fts_get_state_str(
2246 /*==============*/
2247 				/* out: string representation of state */
2248 	fts_row_state	state)	/*!< in: state */
2249 {
2250 	switch (state) {
2251 	case FTS_INSERT:
2252 		return("INSERT");
2253 
2254 	case FTS_MODIFY:
2255 		return("MODIFY");
2256 
2257 	case FTS_DELETE:
2258 		return("DELETE");
2259 
2260 	case FTS_NOTHING:
2261 		return("NOTHING");
2262 
2263 	case FTS_INVALID:
2264 		return("INVALID");
2265 
2266 	default:
2267 		return("UNKNOWN");
2268 	}
2269 }
2270 #endif
2271 
2272 /******************************************************************//**
2273 Calculate the new state of a row given the existing state and a new event.
2274 @return new state of row */
2275 static
2276 fts_row_state
fts_trx_row_get_new_state(fts_row_state old_state,fts_row_state event)2277 fts_trx_row_get_new_state(
2278 /*======================*/
2279 	fts_row_state	old_state,		/*!< in: existing state of row */
2280 	fts_row_state	event)			/*!< in: new event */
2281 {
2282 	/* The rules for transforming states:
2283 
2284 	I = inserted
2285 	M = modified
2286 	D = deleted
2287 	N = nothing
2288 
2289 	M+D -> D:
2290 
2291 	If the row existed before the transaction started and it is modified
2292 	during the transaction, followed by a deletion of the row, only the
2293 	deletion will be signaled.
2294 
2295 	M+ -> M:
2296 
2297 	If the row existed before the transaction started and it is modified
2298 	more than once during the transaction, only the last modification
2299 	will be signaled.
2300 
2301 	IM*D -> N:
2302 
2303 	If a new row is added during the transaction (and possibly modified
2304 	after its initial insertion) but it is deleted before the end of the
2305 	transaction, nothing will be signaled.
2306 
2307 	IM* -> I:
2308 
2309 	If a new row is added during the transaction and modified after its
2310 	initial insertion, only the addition will be signaled.
2311 
2312 	M*DI -> M:
2313 
2314 	If the row existed before the transaction started and it is deleted,
2315 	then re-inserted, only a modification will be signaled. Note that
2316 	this case is only possible if the table is using the row's primary
2317 	key for FTS row ids, since those can be re-inserted by the user,
2318 	which is not true for InnoDB generated row ids.
2319 
2320 	It is easily seen that the above rules decompose such that we do not
2321 	need to store the row's entire history of events. Instead, we can
2322 	store just one state for the row and update that when new events
2323 	arrive. Then we can implement the above rules as a two-dimensional
2324 	look-up table, and get checking of invalid combinations "for free"
2325 	in the process. */
2326 
2327 	/* The lookup table for transforming states. old_state is the
2328 	Y-axis, event is the X-axis. */
2329 	static const fts_row_state table[4][4] = {
2330 			/*    I            M            D            N */
2331 		/* I */	{ FTS_INVALID, FTS_INSERT,  FTS_NOTHING, FTS_INVALID },
2332 		/* M */	{ FTS_INVALID, FTS_MODIFY,  FTS_DELETE,  FTS_INVALID },
2333 		/* D */	{ FTS_MODIFY,  FTS_INVALID, FTS_INVALID, FTS_INVALID },
2334 		/* N */	{ FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
2335 	};
2336 
2337 	fts_row_state result;
2338 
2339 	ut_a(old_state < FTS_INVALID);
2340 	ut_a(event < FTS_INVALID);
2341 
2342 	result = table[(int) old_state][(int) event];
2343 	ut_a(result != FTS_INVALID);
2344 
2345 	return(result);
2346 }
2347 
2348 /******************************************************************//**
2349 Create a savepoint instance.
2350 @return savepoint instance */
2351 static
2352 fts_savepoint_t*
fts_savepoint_create(ib_vector_t * savepoints,const char * name,mem_heap_t * heap)2353 fts_savepoint_create(
2354 /*=================*/
2355 	ib_vector_t*	savepoints,		/*!< out: InnoDB transaction */
2356 	const char*	name,			/*!< in: savepoint name */
2357 	mem_heap_t*	heap)			/*!< in: heap */
2358 {
2359 	fts_savepoint_t*	savepoint;
2360 
2361 	savepoint = static_cast<fts_savepoint_t*>(
2362 		ib_vector_push(savepoints, NULL));
2363 
2364 	memset(savepoint, 0x0, sizeof(*savepoint));
2365 
2366 	if (name) {
2367 		savepoint->name = mem_heap_strdup(heap, name);
2368 	}
2369 
2370 	savepoint->tables = rbt_create(
2371 		sizeof(fts_trx_table_t*), fts_trx_table_cmp);
2372 
2373 	return(savepoint);
2374 }
2375 
2376 /******************************************************************//**
2377 Create an FTS trx.
2378 @return FTS trx */
2379 static
2380 fts_trx_t*
fts_trx_create(trx_t * trx)2381 fts_trx_create(
2382 /*===========*/
2383 	trx_t*	trx)				/*!< in/out: InnoDB
2384 						transaction */
2385 {
2386 	fts_trx_t*		ftt;
2387 	ib_alloc_t*		heap_alloc;
2388 	mem_heap_t*		heap = mem_heap_create(1024);
2389 	trx_named_savept_t*	savep;
2390 
2391 	ut_a(trx->fts_trx == NULL);
2392 
2393 	ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2394 	ftt->trx = trx;
2395 	ftt->heap = heap;
2396 
2397 	heap_alloc = ib_heap_allocator_create(heap);
2398 
2399 	ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
2400 		heap_alloc, sizeof(fts_savepoint_t), 4));
2401 
2402 	ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
2403 		heap_alloc, sizeof(fts_savepoint_t), 4));
2404 
2405 	/* Default instance has no name and no heap. */
2406 	fts_savepoint_create(ftt->savepoints, NULL, NULL);
2407 	fts_savepoint_create(ftt->last_stmt, NULL, NULL);
2408 
2409 	/* Copy savepoints that already set before. */
2410 	for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
2411 	     savep != NULL;
2412 	     savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2413 
2414 		fts_savepoint_take(trx, ftt, savep->name);
2415 	}
2416 
2417 	return(ftt);
2418 }
2419 
2420 /******************************************************************//**
2421 Create an FTS trx table.
2422 @return FTS trx table */
2423 static
2424 fts_trx_table_t*
fts_trx_table_create(fts_trx_t * fts_trx,dict_table_t * table)2425 fts_trx_table_create(
2426 /*=================*/
2427 	fts_trx_t*	fts_trx,		/*!< in: FTS trx */
2428 	dict_table_t*	table)			/*!< in: table */
2429 {
2430 	fts_trx_table_t*	ftt;
2431 
2432 	ftt = static_cast<fts_trx_table_t*>(
2433 		mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2434 
2435 	if (ftt != NULL) {
2436 		memset(ftt, 0x0, sizeof(*ftt));
2437 	}
2438 
2439 	ftt->table = table;
2440 	ftt->fts_trx = fts_trx;
2441 
2442 	ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2443 
2444 	return(ftt);
2445 }
2446 
2447 /******************************************************************//**
2448 Clone an FTS trx table.
2449 @return FTS trx table */
2450 static
2451 fts_trx_table_t*
fts_trx_table_clone(const fts_trx_table_t * ftt_src)2452 fts_trx_table_clone(
2453 /*=================*/
2454 	const fts_trx_table_t*	ftt_src)	/*!< in: FTS trx */
2455 {
2456 	fts_trx_table_t*	ftt;
2457 
2458 	ftt = static_cast<fts_trx_table_t*>(
2459 		mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2460 
2461 	memset(ftt, 0x0, sizeof(*ftt));
2462 
2463 	ftt->table = ftt_src->table;
2464 	ftt->fts_trx = ftt_src->fts_trx;
2465 
2466 	ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2467 
2468 	/* Copy the rb tree values to the new savepoint. */
2469 	rbt_merge_uniq(ftt->rows, ftt_src->rows);
2470 
2471 	/* These are only added on commit. At this stage we only have
2472 	the updated row state. */
2473 	ut_a(ftt_src->added_doc_ids == NULL);
2474 
2475 	return(ftt);
2476 }
2477 
2478 /******************************************************************//**
2479 Initialize the FTS trx instance.
2480 @return FTS trx instance */
2481 static
2482 fts_trx_table_t*
fts_trx_init(trx_t * trx,dict_table_t * table,ib_vector_t * savepoints)2483 fts_trx_init(
2484 /*=========*/
2485 	trx_t*			trx,		/*!< in: transaction */
2486 	dict_table_t*		table,		/*!< in: FTS table instance */
2487 	ib_vector_t*		savepoints)	/*!< in: Savepoints */
2488 {
2489 	fts_trx_table_t*	ftt;
2490 	ib_rbt_bound_t		parent;
2491 	ib_rbt_t*		tables;
2492 	fts_savepoint_t*	savepoint;
2493 
2494 	savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
2495 
2496 	tables = savepoint->tables;
2497 	rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
2498 
2499 	if (parent.result == 0) {
2500 		fts_trx_table_t**	fttp;
2501 
2502 		fttp = rbt_value(fts_trx_table_t*, parent.last);
2503 		ftt = *fttp;
2504 	} else {
2505 		ftt = fts_trx_table_create(trx->fts_trx, table);
2506 		rbt_add_node(tables, &parent, &ftt);
2507 	}
2508 
2509 	ut_a(ftt->table == table);
2510 
2511 	return(ftt);
2512 }
2513 
2514 /******************************************************************//**
2515 Notify the FTS system about an operation on an FTS-indexed table. */
2516 static
2517 void
fts_trx_table_add_op(fts_trx_table_t * ftt,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2518 fts_trx_table_add_op(
2519 /*=================*/
2520 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
2521 	doc_id_t	doc_id,			/*!< in: doc id */
2522 	fts_row_state	state,			/*!< in: state of the row */
2523 	ib_vector_t*	fts_indexes)		/*!< in: FTS indexes affected */
2524 {
2525 	ib_rbt_t*	rows;
2526 	ib_rbt_bound_t	parent;
2527 
2528 	rows = ftt->rows;
2529 	rbt_search(rows, &parent, &doc_id);
2530 
2531 	/* Row id found, update state, and if new state is FTS_NOTHING,
2532 	we delete the row from our tree. */
2533 	if (parent.result == 0) {
2534 		fts_trx_row_t*	row = rbt_value(fts_trx_row_t, parent.last);
2535 
2536 		row->state = fts_trx_row_get_new_state(row->state, state);
2537 
2538 		if (row->state == FTS_NOTHING) {
2539 			if (row->fts_indexes) {
2540 				ib_vector_free(row->fts_indexes);
2541 			}
2542 
2543 			ut_free(rbt_remove_node(rows, parent.last));
2544 			row = NULL;
2545 		} else if (row->fts_indexes != NULL) {
2546 			ib_vector_free(row->fts_indexes);
2547 			row->fts_indexes = fts_indexes;
2548 		}
2549 
2550 	} else { /* Row-id not found, create a new one. */
2551 		fts_trx_row_t	row;
2552 
2553 		row.doc_id = doc_id;
2554 		row.state = state;
2555 		row.fts_indexes = fts_indexes;
2556 
2557 		rbt_add_node(rows, &parent, &row);
2558 	}
2559 }
2560 
2561 /******************************************************************//**
2562 Notify the FTS system about an operation on an FTS-indexed table. */
2563 void
fts_trx_add_op(trx_t * trx,dict_table_t * table,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2564 fts_trx_add_op(
2565 /*===========*/
2566 	trx_t*		trx,			/*!< in: InnoDB transaction */
2567 	dict_table_t*	table,			/*!< in: table */
2568 	doc_id_t	doc_id,			/*!< in: new doc id */
2569 	fts_row_state	state,			/*!< in: state of the row */
2570 	ib_vector_t*	fts_indexes)		/*!< in: FTS indexes affected
2571 						(NULL=all) */
2572 {
2573 	fts_trx_table_t*	tran_ftt;
2574 	fts_trx_table_t*	stmt_ftt;
2575 
2576 	if (!trx->fts_trx) {
2577 		trx->fts_trx = fts_trx_create(trx);
2578 	}
2579 
2580 	tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2581 	stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2582 
2583 	fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2584 	fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2585 }
2586 
2587 /******************************************************************//**
2588 Fetch callback that converts a textual document id to a binary value and
2589 stores it in the given place.
2590 @return always returns NULL */
2591 static
2592 ibool
fts_fetch_store_doc_id(void * row,void * user_arg)2593 fts_fetch_store_doc_id(
2594 /*===================*/
2595 	void*		row,			/*!< in: sel_node_t* */
2596 	void*		user_arg)		/*!< in: doc_id_t* to store
2597 						doc_id in */
2598 {
2599 	int		n_parsed;
2600 	sel_node_t*	node = static_cast<sel_node_t*>(row);
2601 	doc_id_t*	doc_id = static_cast<doc_id_t*>(user_arg);
2602 	dfield_t*	dfield = que_node_get_val(node->select_list);
2603 	dtype_t*	type = dfield_get_type(dfield);
2604 	ulint		len = dfield_get_len(dfield);
2605 
2606 	char		buf[32];
2607 
2608 	ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2609 	ut_a(len > 0 && len < sizeof(buf));
2610 
2611 	memcpy(buf, dfield_get_data(dfield), len);
2612 	buf[len] = '\0';
2613 
2614 	n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2615 	ut_a(n_parsed == 1);
2616 
2617 	return(FALSE);
2618 }
2619 
2620 #ifdef FTS_CACHE_SIZE_DEBUG
2621 /******************************************************************//**
2622 Get the max cache size in bytes. If there is an error reading the
2623 value we simply print an error message here and return the default
2624 value to the caller.
2625 @return max cache size in bytes */
2626 static
2627 ulint
fts_get_max_cache_size(trx_t * trx,fts_table_t * fts_table)2628 fts_get_max_cache_size(
2629 /*===================*/
2630 	trx_t*		trx,			/*!< in: transaction */
2631 	fts_table_t*	fts_table)		/*!< in: table instance */
2632 {
2633 	dberr_t		error;
2634 	fts_string_t	value;
2635 	ulint		cache_size_in_mb;
2636 
2637 	/* Set to the default value. */
2638 	cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2639 
2640 	/* We set the length of value to the max bytes it can hold. This
2641 	information is used by the callback that reads the value. */
2642 	value.f_n_char = 0;
2643 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2644 	value.f_str = ut_malloc_nokey(value.f_len + 1);
2645 
2646 	error = fts_config_get_value(
2647 		trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2648 
2649 	if (error == DB_SUCCESS) {
2650 
2651 		value.f_str[value.f_len] = 0;
2652 		cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
2653 
2654 		if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2655 
2656 			ib::warn() << "FTS max cache size ("
2657 				<< cache_size_in_mb << ") out of range."
2658 				" Minimum value is "
2659 				<< FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2660 				<< "MB and the maximum value is "
2661 				<< FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2662 				<< "MB, setting cache size to upper limit";
2663 
2664 			cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2665 
2666 		} else if  (cache_size_in_mb
2667 			    < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2668 
2669 			ib::warn() << "FTS max cache size ("
2670 				<< cache_size_in_mb << ") out of range."
2671 				" Minimum value is "
2672 				<< FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2673 				<< "MB and the maximum value is"
2674 				<< FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2675 				<< "MB, setting cache size to lower limit";
2676 
2677 			cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2678 		}
2679 	} else {
2680 		ib::error() << "(" << ut_strerr(error) << ") reading max"
2681 			" cache config value from config table";
2682 	}
2683 
2684 	ut_free(value.f_str);
2685 
2686 	return(cache_size_in_mb * 1024 * 1024);
2687 }
2688 #endif
2689 
2690 #ifdef FTS_DOC_STATS_DEBUG
2691 /*********************************************************************//**
2692 Get the total number of words in the FTS for a particular FTS index.
2693 @return DB_SUCCESS if all OK else error code */
2694 dberr_t
fts_get_total_word_count(trx_t * trx,dict_index_t * index,ulint * total)2695 fts_get_total_word_count(
2696 /*=====================*/
2697 	trx_t*		trx,			/*!< in: transaction */
2698 	dict_index_t*	index,			/*!< in: for this index */
2699 	ulint*		total)			/* out: total words */
2700 {
2701 	dberr_t		error;
2702 	fts_string_t	value;
2703 
2704 	*total = 0;
2705 
2706 	/* We set the length of value to the max bytes it can hold. This
2707 	information is used by the callback that reads the value. */
2708 	value.f_n_char = 0;
2709 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2710 	value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
2711 
2712 	error = fts_config_get_index_value(
2713 		trx, index, FTS_TOTAL_WORD_COUNT, &value);
2714 
2715 	if (error == DB_SUCCESS) {
2716 
2717 		value.f_str[value.f_len] = 0;
2718 		*total = strtoul((char*) value.f_str, NULL, 10);
2719 	} else {
2720 		ib::error() << "(" << ut_strerr(error) << ") reading total"
2721 			" words value from config table";
2722 	}
2723 
2724 	ut_free(value.f_str);
2725 
2726 	return(error);
2727 }
2728 #endif /* FTS_DOC_STATS_DEBUG */
2729 
2730 /*********************************************************************//**
2731 Update the next and last Doc ID in the CONFIG table to be the input
2732 "doc_id" value (+ 1). We would do so after each FTS index build or
2733 table truncate */
2734 void
fts_update_next_doc_id(trx_t * trx,const dict_table_t * table,const char * table_name,doc_id_t doc_id)2735 fts_update_next_doc_id(
2736 /*===================*/
2737 	trx_t*			trx,		/*!< in/out: transaction */
2738 	const dict_table_t*	table,		/*!< in: table */
2739 	const char*		table_name,	/*!< in: table name, or NULL */
2740 	doc_id_t		doc_id)		/*!< in: DOC ID to set */
2741 {
2742 	table->fts->cache->synced_doc_id = doc_id;
2743 	table->fts->cache->next_doc_id = doc_id + 1;
2744 
2745 	table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2746 
2747 	fts_update_sync_doc_id(
2748 		table, table_name, table->fts->cache->synced_doc_id, trx);
2749 
2750 }
2751 
2752 /*********************************************************************//**
2753 Get the next available document id.
2754 @return DB_SUCCESS if OK */
2755 dberr_t
fts_get_next_doc_id(const dict_table_t * table,doc_id_t * doc_id)2756 fts_get_next_doc_id(
2757 /*================*/
2758 	const dict_table_t*	table,		/*!< in: table */
2759 	doc_id_t*		doc_id)		/*!< out: new document id */
2760 {
2761 	fts_cache_t*	cache = table->fts->cache;
2762 
2763 	/* If the Doc ID system has not yet been initialized, we
2764 	will consult the CONFIG table and user table to re-establish
2765 	the initial value of the Doc ID */
2766 	if (cache->first_doc_id == FTS_NULL_DOC_ID) {
2767 		fts_init_doc_id(table);
2768 	}
2769 
2770 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2771 		*doc_id = FTS_NULL_DOC_ID;
2772 		return(DB_SUCCESS);
2773 	}
2774 
2775 	mutex_enter(&cache->doc_id_lock);
2776 	*doc_id = ++cache->next_doc_id;
2777 	mutex_exit(&cache->doc_id_lock);
2778 
2779 	return(DB_SUCCESS);
2780 }
2781 
2782 /*********************************************************************//**
2783 This function fetch the Doc ID from CONFIG table, and compare with
2784 the Doc ID supplied. And store the larger one to the CONFIG table.
2785 @return DB_SUCCESS if OK */
2786 static MY_ATTRIBUTE((nonnull))
2787 dberr_t
fts_cmp_set_sync_doc_id(const dict_table_t * table,doc_id_t doc_id_cmp,ibool read_only,doc_id_t * doc_id)2788 fts_cmp_set_sync_doc_id(
2789 /*====================*/
2790 	const dict_table_t*	table,		/*!< in: table */
2791 	doc_id_t		doc_id_cmp,	/*!< in: Doc ID to compare */
2792 	ibool			read_only,	/*!< in: TRUE if read the
2793 						synced_doc_id only */
2794 	doc_id_t*		doc_id)		/*!< out: larger document id
2795 						after comparing "doc_id_cmp"
2796 						to the one stored in CONFIG
2797 						table */
2798 {
2799 	trx_t*		trx;
2800 	pars_info_t*	info;
2801 	dberr_t		error;
2802 	fts_table_t	fts_table;
2803 	que_t*		graph = NULL;
2804 	fts_cache_t*	cache = table->fts->cache;
2805 	char		table_name[MAX_FULL_NAME_LEN];
2806 retry:
2807 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2808 
2809 	fts_table.suffix = "CONFIG";
2810 	fts_table.table_id = table->id;
2811 	fts_table.type = FTS_COMMON_TABLE;
2812 	fts_table.table = table;
2813 
2814 	fts_table.parent = table->name.m_name;
2815 
2816 	trx = trx_allocate_for_background();
2817 
2818 	trx->op_info = "update the next FTS document id";
2819 
2820 	info = pars_info_create();
2821 
2822 	pars_info_bind_function(
2823 		info, "my_func", fts_fetch_store_doc_id, doc_id);
2824 
2825 	fts_get_table_name(&fts_table, table_name);
2826 	pars_info_bind_id(info, true, "config_table", table_name);
2827 
2828 	graph = fts_parse_sql(
2829 		&fts_table, info,
2830 		"DECLARE FUNCTION my_func;\n"
2831 		"DECLARE CURSOR c IS SELECT value FROM $config_table"
2832 		" WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2833 		"BEGIN\n"
2834 		""
2835 		"OPEN c;\n"
2836 		"WHILE 1 = 1 LOOP\n"
2837 		"  FETCH c INTO my_func();\n"
2838 		"  IF c % NOTFOUND THEN\n"
2839 		"    EXIT;\n"
2840 		"  END IF;\n"
2841 		"END LOOP;\n"
2842 		"CLOSE c;");
2843 
2844 	*doc_id = 0;
2845 
2846 	error = fts_eval_sql(trx, graph);
2847 
2848 	fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2849 
2850 	// FIXME: We need to retry deadlock errors
2851 	if (error != DB_SUCCESS) {
2852 		goto func_exit;
2853 	}
2854 
2855 	if (read_only) {
2856 		goto func_exit;
2857 	}
2858 
2859 	if (doc_id_cmp == 0 && *doc_id) {
2860 		cache->synced_doc_id = *doc_id - 1;
2861 	} else {
2862 		cache->synced_doc_id = ut_max(doc_id_cmp, *doc_id);
2863 	}
2864 
2865 	mutex_enter(&cache->doc_id_lock);
2866 	/* For each sync operation, we will add next_doc_id by 1,
2867 	so to mark a sync operation */
2868 	if (cache->next_doc_id < cache->synced_doc_id + 1) {
2869 		cache->next_doc_id = cache->synced_doc_id + 1;
2870 	}
2871 	mutex_exit(&cache->doc_id_lock);
2872 
2873 	if (doc_id_cmp > *doc_id) {
2874 		error = fts_update_sync_doc_id(
2875 			table, table->name.m_name, cache->synced_doc_id, trx);
2876 	}
2877 
2878 	*doc_id = cache->next_doc_id;
2879 
2880 func_exit:
2881 
2882 	if (error == DB_SUCCESS) {
2883 		fts_sql_commit(trx);
2884 	} else {
2885 		*doc_id = 0;
2886 
2887 		ib::error() << "(" << ut_strerr(error) << ") while getting"
2888 			" next doc id.";
2889 		fts_sql_rollback(trx);
2890 
2891 		if (error == DB_DEADLOCK) {
2892 			os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2893 			goto retry;
2894 		}
2895 	}
2896 
2897 	trx_free_for_background(trx);
2898 
2899 	return(error);
2900 }
2901 
2902 /*********************************************************************//**
2903 Update the last document id. This function could create a new
2904 transaction to update the last document id.
2905 @return DB_SUCCESS if OK */
2906 static
2907 dberr_t
fts_update_sync_doc_id(const dict_table_t * table,const char * table_name,doc_id_t doc_id,trx_t * trx)2908 fts_update_sync_doc_id(
2909 /*===================*/
2910 	const dict_table_t*	table,		/*!< in: table */
2911 	const char*		table_name,	/*!< in: table name, or NULL */
2912 	doc_id_t		doc_id,		/*!< in: last document id */
2913 	trx_t*			trx)		/*!< in: update trx, or NULL */
2914 {
2915 	byte		id[FTS_MAX_ID_LEN];
2916 	pars_info_t*	info;
2917 	fts_table_t	fts_table;
2918 	ulint		id_len;
2919 	que_t*		graph = NULL;
2920 	dberr_t		error;
2921 	ibool		local_trx = FALSE;
2922 	fts_cache_t*	cache = table->fts->cache;
2923 	char		fts_name[MAX_FULL_NAME_LEN];
2924 
2925 	fts_table.suffix = "CONFIG";
2926 	fts_table.table_id = table->id;
2927 	fts_table.type = FTS_COMMON_TABLE;
2928 	fts_table.table = table;
2929 	if (table_name) {
2930 		fts_table.parent = table_name;
2931 	} else {
2932 		fts_table.parent = table->name.m_name;
2933 	}
2934 
2935 	if (!trx) {
2936 		trx = trx_allocate_for_background();
2937 
2938 		trx->op_info = "setting last FTS document id";
2939 		local_trx = TRUE;
2940 	}
2941 
2942 	info = pars_info_create();
2943 
2944 	id_len = ut_snprintf(
2945 		(char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2946 
2947 	pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2948 
2949 	fts_get_table_name(&fts_table, fts_name);
2950 	pars_info_bind_id(info, true, "table_name", fts_name);
2951 
2952 	graph = fts_parse_sql(
2953 		&fts_table, info,
2954 		"BEGIN"
2955 		" UPDATE $table_name SET value = :doc_id"
2956 		" WHERE key = 'synced_doc_id';");
2957 
2958 	error = fts_eval_sql(trx, graph);
2959 
2960 	fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2961 
2962 	if (local_trx) {
2963 		if (error == DB_SUCCESS) {
2964 			fts_sql_commit(trx);
2965 			cache->synced_doc_id = doc_id;
2966 		} else {
2967 
2968 			ib::error() << "(" << ut_strerr(error) << ") while"
2969 				" updating last doc id.";
2970 
2971 			fts_sql_rollback(trx);
2972 		}
2973 		trx_free_for_background(trx);
2974 	}
2975 
2976 	return(error);
2977 }
2978 
2979 /*********************************************************************//**
2980 Create a new fts_doc_ids_t.
2981 @return new fts_doc_ids_t */
2982 fts_doc_ids_t*
fts_doc_ids_create(void)2983 fts_doc_ids_create(void)
2984 /*====================*/
2985 {
2986 	fts_doc_ids_t*	fts_doc_ids;
2987 	mem_heap_t*	heap = mem_heap_create(512);
2988 
2989 	fts_doc_ids = static_cast<fts_doc_ids_t*>(
2990 		mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2991 
2992 	fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2993 
2994 	fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
2995 		fts_doc_ids->self_heap, sizeof(fts_update_t), 32));
2996 
2997 	return(fts_doc_ids);
2998 }
2999 
3000 /*********************************************************************//**
3001 Free a fts_doc_ids_t. */
3002 void
fts_doc_ids_free(fts_doc_ids_t * fts_doc_ids)3003 fts_doc_ids_free(
3004 /*=============*/
3005 	fts_doc_ids_t*	fts_doc_ids)
3006 {
3007 	mem_heap_t*	heap = static_cast<mem_heap_t*>(
3008 		fts_doc_ids->self_heap->arg);
3009 
3010 	memset(fts_doc_ids, 0, sizeof(*fts_doc_ids));
3011 
3012 	mem_heap_free(heap);
3013 }
3014 
3015 /*********************************************************************//**
3016 Do commit-phase steps necessary for the insertion of a new row.
3017 @return DB_SUCCESS or error code */
3018 void
fts_add(fts_trx_table_t * ftt,fts_trx_row_t * row)3019 fts_add(
3020 /*====*/
3021 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
3022 	fts_trx_row_t*	row)			/*!< in: row */
3023 {
3024 	dict_table_t*	table = ftt->table;
3025 	doc_id_t	doc_id = row->doc_id;
3026 
3027 	ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
3028 
3029 	fts_add_doc_by_id(ftt, doc_id, row->fts_indexes);
3030 
3031 	mutex_enter(&table->fts->cache->deleted_lock);
3032 	++table->fts->cache->added;
3033 	mutex_exit(&table->fts->cache->deleted_lock);
3034 
3035 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
3036 	    && doc_id >= table->fts->cache->next_doc_id) {
3037 		table->fts->cache->next_doc_id = doc_id + 1;
3038 	}
3039 }
3040 
3041 /*********************************************************************//**
3042 Do commit-phase steps necessary for the deletion of a row.
3043 @return DB_SUCCESS or error code */
3044 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3045 dberr_t
fts_delete(fts_trx_table_t * ftt,fts_trx_row_t * row)3046 fts_delete(
3047 /*=======*/
3048 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
3049 	fts_trx_row_t*	row)			/*!< in: row */
3050 {
3051 	que_t*		graph;
3052 	fts_table_t	fts_table;
3053 	dberr_t		error = DB_SUCCESS;
3054 	doc_id_t	write_doc_id;
3055 	dict_table_t*	table = ftt->table;
3056 	doc_id_t	doc_id = row->doc_id;
3057 	trx_t*		trx = ftt->fts_trx->trx;
3058 	pars_info_t*	info = pars_info_create();
3059 	fts_cache_t*	cache = table->fts->cache;
3060 
3061 	/* we do not index Documents whose Doc ID value is 0 */
3062 	if (doc_id == FTS_NULL_DOC_ID) {
3063 		ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
3064 		return(error);
3065 	}
3066 
3067 	ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3068 
3069 	FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
3070 
3071 	/* Convert to "storage" byte order. */
3072 	fts_write_doc_id((byte*) &write_doc_id, doc_id);
3073 	fts_bind_doc_id(info, "doc_id", &write_doc_id);
3074 
3075 	/* It is possible we update a record that has not yet been sync-ed
3076 	into cache from last crash (delete Doc will not initialize the
3077 	sync). Avoid any added counter accounting until the FTS cache
3078 	is re-established and sync-ed */
3079 	if (table->fts->fts_status & ADDED_TABLE_SYNCED
3080 	    && doc_id > cache->synced_doc_id) {
3081 		mutex_enter(&table->fts->cache->deleted_lock);
3082 
3083 		/* The Doc ID could belong to those left in
3084 		ADDED table from last crash. So need to check
3085 		if it is less than first_doc_id when we initialize
3086 		the Doc ID system after reboot */
3087 		if (doc_id >= table->fts->cache->first_doc_id
3088 		    && table->fts->cache->added > 0) {
3089 			--table->fts->cache->added;
3090 		}
3091 
3092 		mutex_exit(&table->fts->cache->deleted_lock);
3093 
3094 		/* Only if the row was really deleted. */
3095 		ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3096 	}
3097 
3098 	/* Note the deleted document for OPTIMIZE to purge. */
3099 	if (error == DB_SUCCESS) {
3100 		char	table_name[MAX_FULL_NAME_LEN];
3101 
3102 		trx->op_info = "adding doc id to FTS DELETED";
3103 
3104 		info->graph_owns_us = TRUE;
3105 
3106 		fts_table.suffix = "DELETED";
3107 
3108 		fts_get_table_name(&fts_table, table_name);
3109 		pars_info_bind_id(info, true, "deleted", table_name);
3110 
3111 		graph = fts_parse_sql(
3112 			&fts_table,
3113 			info,
3114 			"BEGIN INSERT INTO $deleted VALUES (:doc_id);");
3115 
3116 		error = fts_eval_sql(trx, graph);
3117 
3118 		fts_que_graph_free(graph);
3119 	} else {
3120 		pars_info_free(info);
3121 	}
3122 
3123 	/* Increment the total deleted count, this is used to calculate the
3124 	number of documents indexed. */
3125 	if (error == DB_SUCCESS) {
3126 		mutex_enter(&table->fts->cache->deleted_lock);
3127 
3128 		++table->fts->cache->deleted;
3129 
3130 		mutex_exit(&table->fts->cache->deleted_lock);
3131 	}
3132 
3133 	return(error);
3134 }
3135 
3136 /*********************************************************************//**
3137 Do commit-phase steps necessary for the modification of a row.
3138 @return DB_SUCCESS or error code */
3139 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3140 dberr_t
fts_modify(fts_trx_table_t * ftt,fts_trx_row_t * row)3141 fts_modify(
3142 /*=======*/
3143 	fts_trx_table_t*	ftt,		/*!< in: FTS trx table */
3144 	fts_trx_row_t*		row)		/*!< in: row */
3145 {
3146 	dberr_t	error;
3147 
3148 	ut_a(row->state == FTS_MODIFY);
3149 
3150 	error = fts_delete(ftt, row);
3151 
3152 	if (error == DB_SUCCESS) {
3153 		fts_add(ftt, row);
3154 	}
3155 
3156 	return(error);
3157 }
3158 
3159 /*********************************************************************//**
3160 Create a new document id.
3161 @return DB_SUCCESS if all went well else error */
3162 dberr_t
fts_create_doc_id(dict_table_t * table,dtuple_t * row,mem_heap_t * heap)3163 fts_create_doc_id(
3164 /*==============*/
3165 	dict_table_t*	table,		/*!< in: row is of this table. */
3166 	dtuple_t*	row,		/* in/out: add doc id value to this
3167 					row. This is the current row that is
3168 					being inserted. */
3169 	mem_heap_t*	heap)		/*!< in: heap */
3170 {
3171 	doc_id_t	doc_id;
3172 	dberr_t		error = DB_SUCCESS;
3173 
3174 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
3175 
3176 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
3177 		if (table->fts->cache->first_doc_id == FTS_NULL_DOC_ID) {
3178 			error = fts_get_next_doc_id(table, &doc_id);
3179 		}
3180 		return(error);
3181 	}
3182 
3183 	error = fts_get_next_doc_id(table, &doc_id);
3184 
3185 	if (error == DB_SUCCESS) {
3186 		dfield_t*	dfield;
3187 		doc_id_t*	write_doc_id;
3188 
3189 		ut_a(doc_id > 0);
3190 
3191 		dfield = dtuple_get_nth_field(row, table->fts->doc_col);
3192 		write_doc_id = static_cast<doc_id_t*>(
3193 			mem_heap_alloc(heap, sizeof(*write_doc_id)));
3194 
3195 		ut_a(doc_id != FTS_NULL_DOC_ID);
3196 		ut_a(sizeof(doc_id) == dfield->type.len);
3197 		fts_write_doc_id((byte*) write_doc_id, doc_id);
3198 
3199 		dfield_set_data(dfield, write_doc_id, sizeof(*write_doc_id));
3200 	}
3201 
3202 	return(error);
3203 }
3204 
3205 /*********************************************************************//**
3206 The given transaction is about to be committed; do whatever is necessary
3207 from the FTS system's POV.
3208 @return DB_SUCCESS or error code */
3209 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3210 dberr_t
fts_commit_table(fts_trx_table_t * ftt)3211 fts_commit_table(
3212 /*=============*/
3213 	fts_trx_table_t*	ftt)		/*!< in: FTS table to commit*/
3214 {
3215 	const ib_rbt_node_t*	node;
3216 	ib_rbt_t*		rows;
3217 	dberr_t			error = DB_SUCCESS;
3218 	fts_cache_t*		cache = ftt->table->fts->cache;
3219 	trx_t*			trx = trx_allocate_for_background();
3220 
3221 	rows = ftt->rows;
3222 
3223 	ftt->fts_trx->trx = trx;
3224 
3225 	if (cache->get_docs == NULL) {
3226 		rw_lock_x_lock(&cache->init_lock);
3227 		if (cache->get_docs == NULL) {
3228 			cache->get_docs = fts_get_docs_create(cache);
3229 		}
3230 		rw_lock_x_unlock(&cache->init_lock);
3231 	}
3232 
3233 	for (node = rbt_first(rows);
3234 	     node != NULL && error == DB_SUCCESS;
3235 	     node = rbt_next(rows, node)) {
3236 
3237 		fts_trx_row_t*	row = rbt_value(fts_trx_row_t, node);
3238 
3239 		switch (row->state) {
3240 		case FTS_INSERT:
3241 			fts_add(ftt, row);
3242 			break;
3243 
3244 		case FTS_MODIFY:
3245 			error = fts_modify(ftt, row);
3246 			break;
3247 
3248 		case FTS_DELETE:
3249 			error = fts_delete(ftt, row);
3250 			break;
3251 
3252 		default:
3253 			ut_error;
3254 		}
3255 	}
3256 
3257 	fts_sql_commit(trx);
3258 
3259 	trx_free_for_background(trx);
3260 
3261 	return(error);
3262 }
3263 
3264 /*********************************************************************//**
3265 The given transaction is about to be committed; do whatever is necessary
3266 from the FTS system's POV.
3267 @return DB_SUCCESS or error code */
3268 dberr_t
fts_commit(trx_t * trx)3269 fts_commit(
3270 /*=======*/
3271 	trx_t*	trx)				/*!< in: transaction */
3272 {
3273 	const ib_rbt_node_t*	node;
3274 	dberr_t			error;
3275 	ib_rbt_t*		tables;
3276 	fts_savepoint_t*	savepoint;
3277 
3278 	savepoint = static_cast<fts_savepoint_t*>(
3279 		ib_vector_last(trx->fts_trx->savepoints));
3280 	tables = savepoint->tables;
3281 
3282 	for (node = rbt_first(tables), error = DB_SUCCESS;
3283 	     node != NULL && error == DB_SUCCESS;
3284 	     node = rbt_next(tables, node)) {
3285 
3286 		fts_trx_table_t**	ftt;
3287 
3288 		ftt = rbt_value(fts_trx_table_t*, node);
3289 
3290 		error = fts_commit_table(*ftt);
3291 	}
3292 
3293 	return(error);
3294 }
3295 
3296 /*********************************************************************//**
3297 Initialize a document. */
3298 void
fts_doc_init(fts_doc_t * doc)3299 fts_doc_init(
3300 /*=========*/
3301 	fts_doc_t*	doc)			/*!< in: doc to initialize */
3302 {
3303 	mem_heap_t*	heap = mem_heap_create(32);
3304 
3305 	memset(doc, 0, sizeof(*doc));
3306 
3307 	doc->self_heap = ib_heap_allocator_create(heap);
3308 }
3309 
3310 /*********************************************************************//**
3311 Free document. */
3312 void
fts_doc_free(fts_doc_t * doc)3313 fts_doc_free(
3314 /*=========*/
3315 	fts_doc_t*	doc)			/*!< in: document */
3316 {
3317 	mem_heap_t*	heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
3318 
3319 	if (doc->tokens) {
3320 		rbt_free(doc->tokens);
3321 	}
3322 
3323 	ut_d(memset(doc, 0, sizeof(*doc)));
3324 
3325 	mem_heap_free(heap);
3326 }
3327 
3328 /*********************************************************************//**
3329 Callback function for fetch that stores a row id to the location pointed.
3330 The column's type must be DATA_FIXBINARY, DATA_BINARY_TYPE, length = 8.
3331 @return always returns NULL */
3332 void*
fts_fetch_row_id(void * row,void * user_arg)3333 fts_fetch_row_id(
3334 /*=============*/
3335 	void*	row,				/*!< in: sel_node_t* */
3336 	void*	user_arg)			/*!< in: data pointer */
3337 {
3338 	sel_node_t*	node = static_cast<sel_node_t*>(row);
3339 
3340 	dfield_t*	dfield = que_node_get_val(node->select_list);
3341 	dtype_t*	type = dfield_get_type(dfield);
3342 	ulint		len = dfield_get_len(dfield);
3343 
3344 	ut_a(dtype_get_mtype(type) == DATA_FIXBINARY);
3345 	ut_a(dtype_get_prtype(type) & DATA_BINARY_TYPE);
3346 	ut_a(len == 8);
3347 
3348 	memcpy(user_arg, dfield_get_data(dfield), 8);
3349 
3350 	return(NULL);
3351 }
3352 
3353 /*********************************************************************//**
3354 Callback function for fetch that stores the text of an FTS document,
3355 converting each column to UTF-16.
3356 @return always FALSE */
3357 ibool
fts_query_expansion_fetch_doc(void * row,void * user_arg)3358 fts_query_expansion_fetch_doc(
3359 /*==========================*/
3360 	void*		row,			/*!< in: sel_node_t* */
3361 	void*		user_arg)		/*!< in: fts_doc_t* */
3362 {
3363 	que_node_t*	exp;
3364 	sel_node_t*	node = static_cast<sel_node_t*>(row);
3365 	fts_doc_t*	result_doc = static_cast<fts_doc_t*>(user_arg);
3366 	dfield_t*	dfield;
3367 	ulint		len;
3368 	ulint		doc_len;
3369 	fts_doc_t	doc;
3370 	CHARSET_INFO*	doc_charset = NULL;
3371 	ulint		field_no = 0;
3372 
3373 	len = 0;
3374 
3375 	fts_doc_init(&doc);
3376 	doc.found = TRUE;
3377 
3378 	exp = node->select_list;
3379 	doc_len = 0;
3380 
3381 	doc_charset  = result_doc->charset;
3382 
3383 	/* Copy each indexed column content into doc->text.f_str */
3384 	while (exp) {
3385 		dfield = que_node_get_val(exp);
3386 		len = dfield_get_len(dfield);
3387 
3388 		/* NULL column */
3389 		if (len == UNIV_SQL_NULL) {
3390 			exp = que_node_get_next(exp);
3391 			continue;
3392 		}
3393 
3394 		if (!doc_charset) {
3395 			doc_charset = fts_get_charset(dfield->type.prtype);
3396 		}
3397 
3398 		doc.charset = doc_charset;
3399 		doc.is_ngram = result_doc->is_ngram;
3400 
3401 		if (dfield_is_ext(dfield)) {
3402 			/* We ignore columns that are stored externally, this
3403 			could result in too many words to search */
3404 			exp = que_node_get_next(exp);
3405 			continue;
3406 		} else {
3407 			doc.text.f_n_char = 0;
3408 
3409 			doc.text.f_str = static_cast<byte*>(
3410 				dfield_get_data(dfield));
3411 
3412 			doc.text.f_len = len;
3413 		}
3414 
3415 		if (field_no == 0) {
3416 			fts_tokenize_document(&doc, result_doc,
3417 					      result_doc->parser);
3418 		} else {
3419 			fts_tokenize_document_next(&doc, doc_len, result_doc,
3420 						   result_doc->parser);
3421 		}
3422 
3423 		exp = que_node_get_next(exp);
3424 
3425 		doc_len += (exp) ? len + 1 : len;
3426 
3427 		field_no++;
3428 	}
3429 
3430 	ut_ad(doc_charset);
3431 
3432 	if (!result_doc->charset) {
3433 		result_doc->charset = doc_charset;
3434 	}
3435 
3436 	fts_doc_free(&doc);
3437 
3438 	return(FALSE);
3439 }
3440 
3441 /*********************************************************************//**
3442 fetch and tokenize the document. */
3443 static
3444 void
fts_fetch_doc_from_rec(fts_get_doc_t * get_doc,dict_index_t * clust_index,btr_pcur_t * pcur,ulint * offsets,fts_doc_t * doc)3445 fts_fetch_doc_from_rec(
3446 /*===================*/
3447 	fts_get_doc_t*  get_doc,	/*!< in: FTS index's get_doc struct */
3448 	dict_index_t*	clust_index,	/*!< in: cluster index */
3449 	btr_pcur_t*	pcur,		/*!< in: cursor whose position
3450 					has been stored */
3451 	ulint*		offsets,	/*!< in: offsets */
3452 	fts_doc_t*	doc)		/*!< out: fts doc to hold parsed
3453 					documents */
3454 {
3455 	dict_index_t*		index;
3456 	dict_table_t*		table;
3457 	const rec_t*		clust_rec;
3458 	ulint			num_field;
3459 	const dict_field_t*	ifield;
3460 	const dict_col_t*	col;
3461 	ulint			clust_pos;
3462 	ulint			i;
3463 	ulint			doc_len = 0;
3464 	ulint			processed_doc = 0;
3465 	st_mysql_ftparser*	parser;
3466 
3467 	if (!get_doc) {
3468 		return;
3469 	}
3470 
3471 	index = get_doc->index_cache->index;
3472 	table = get_doc->index_cache->index->table;
3473 	parser = get_doc->index_cache->index->parser;
3474 
3475 	clust_rec = btr_pcur_get_rec(pcur);
3476 
3477 	num_field = dict_index_get_n_fields(index);
3478 
3479 	for (i = 0; i < num_field; i++) {
3480 		ifield = dict_index_get_nth_field(index, i);
3481 		col = dict_field_get_col(ifield);
3482 		clust_pos = dict_col_get_clust_pos(col, clust_index);
3483 
3484 		if (!get_doc->index_cache->charset) {
3485 			get_doc->index_cache->charset = fts_get_charset(
3486 				ifield->col->prtype);
3487 		}
3488 
3489 		if (rec_offs_nth_extern(offsets, clust_pos)) {
3490 			doc->text.f_str =
3491 				btr_rec_copy_externally_stored_field(
3492 					clust_rec, offsets,
3493 					dict_table_page_size(table),
3494 					clust_pos, &doc->text.f_len,
3495 					static_cast<mem_heap_t*>(
3496 						doc->self_heap->arg));
3497 		} else {
3498 			doc->text.f_str = (byte*) rec_get_nth_field(
3499 				clust_rec, offsets, clust_pos,
3500 				&doc->text.f_len);
3501 		}
3502 
3503 		doc->found = TRUE;
3504 		doc->charset = get_doc->index_cache->charset;
3505 		doc->is_ngram = index->is_ngram;
3506 
3507 		/* Null Field */
3508 		if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3509 			continue;
3510 		}
3511 
3512 		if (processed_doc == 0) {
3513 			fts_tokenize_document(doc, NULL, parser);
3514 		} else {
3515 			fts_tokenize_document_next(doc, doc_len, NULL, parser);
3516 		}
3517 
3518 		processed_doc++;
3519 		doc_len += doc->text.f_len + 1;
3520 	}
3521 }
3522 
3523 /*********************************************************************//**
3524 This function fetches the document inserted during the committing
3525 transaction, and tokenize the inserted text data and insert into
3526 FTS auxiliary table and its cache.
3527 @return TRUE if successful */
3528 static
3529 ulint
fts_add_doc_by_id(fts_trx_table_t * ftt,doc_id_t doc_id,ib_vector_t * fts_indexes MY_ATTRIBUTE ((unused)))3530 fts_add_doc_by_id(
3531 /*==============*/
3532 	fts_trx_table_t*ftt,		/*!< in: FTS trx table */
3533 	doc_id_t	doc_id,		/*!< in: doc id */
3534 	ib_vector_t*	fts_indexes MY_ATTRIBUTE((unused)))
3535 					/*!< in: affected fts indexes */
3536 {
3537 	mtr_t		mtr;
3538 	mem_heap_t*	heap;
3539 	btr_pcur_t	pcur;
3540 	dict_table_t*	table;
3541 	dtuple_t*	tuple;
3542 	dfield_t*       dfield;
3543 	fts_get_doc_t*	get_doc;
3544 	doc_id_t        temp_doc_id;
3545 	dict_index_t*   clust_index;
3546 	dict_index_t*	fts_id_index;
3547 	ibool		is_id_cluster;
3548 	fts_cache_t*   	cache = ftt->table->fts->cache;
3549 
3550 	ut_ad(cache->get_docs);
3551 
3552 	/* If Doc ID has been supplied by the user, then the table
3553 	might not yet be sync-ed */
3554 
3555 	if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
3556 		fts_init_index(ftt->table, FALSE);
3557 	}
3558 
3559 	/* Get the first FTS index's get_doc */
3560 	get_doc = static_cast<fts_get_doc_t*>(
3561 		ib_vector_get(cache->get_docs, 0));
3562 	ut_ad(get_doc);
3563 
3564 	table = get_doc->index_cache->index->table;
3565 
3566 	heap = mem_heap_create(512);
3567 
3568 	clust_index = dict_table_get_first_index(table);
3569 	fts_id_index = table->fts_doc_id_index;
3570 
3571 	/* Check whether the index on FTS_DOC_ID is cluster index */
3572 	is_id_cluster = (clust_index == fts_id_index);
3573 
3574 	mtr_start(&mtr);
3575 	btr_pcur_init(&pcur);
3576 
3577 	/* Search based on Doc ID. Here, we'll need to consider the case
3578 	when there is no primary index on Doc ID */
3579 	tuple = dtuple_create(heap, 1);
3580 	dfield = dtuple_get_nth_field(tuple, 0);
3581 	dfield->type.mtype = DATA_INT;
3582 	dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3583 
3584 	mach_write_to_8((byte*) &temp_doc_id, doc_id);
3585 	dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3586 
3587 	btr_pcur_open_with_no_init(
3588 		fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3589 		&pcur, 0, &mtr);
3590 
3591 	/* If we have a match, add the data to doc structure */
3592 	if (btr_pcur_get_low_match(&pcur) == 1) {
3593 		const rec_t*	rec;
3594 		btr_pcur_t*	doc_pcur;
3595 		const rec_t*	clust_rec;
3596 		btr_pcur_t	clust_pcur;
3597 		ulint*		offsets = NULL;
3598 		ulint		num_idx = ib_vector_size(cache->get_docs);
3599 
3600 		rec = btr_pcur_get_rec(&pcur);
3601 
3602 		/* Doc could be deleted */
3603 		if (page_rec_is_infimum(rec)
3604 		    || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3605 
3606 			goto func_exit;
3607 		}
3608 
3609 		if (is_id_cluster) {
3610 			clust_rec = rec;
3611 			doc_pcur = &pcur;
3612 		} else {
3613 			dtuple_t*	clust_ref;
3614 			ulint		n_fields;
3615 
3616 			btr_pcur_init(&clust_pcur);
3617 			n_fields = dict_index_get_n_unique(clust_index);
3618 
3619 			clust_ref = dtuple_create(heap, n_fields);
3620 			dict_index_copy_types(clust_ref, clust_index, n_fields);
3621 
3622 			row_build_row_ref_in_tuple(
3623 				clust_ref, rec, fts_id_index, NULL, NULL);
3624 
3625 			btr_pcur_open_with_no_init(
3626 				clust_index, clust_ref, PAGE_CUR_LE,
3627 				BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3628 
3629 			doc_pcur = &clust_pcur;
3630 			clust_rec = btr_pcur_get_rec(&clust_pcur);
3631 
3632 		}
3633 
3634 		offsets = rec_get_offsets(clust_rec, clust_index,
3635 					  NULL, ULINT_UNDEFINED, &heap);
3636 
3637 		 for (ulint i = 0; i < num_idx; ++i) {
3638 			fts_doc_t       doc;
3639 			dict_table_t*   table;
3640 			fts_get_doc_t*  get_doc;
3641 
3642 			get_doc = static_cast<fts_get_doc_t*>(
3643 				ib_vector_get(cache->get_docs, i));
3644 
3645 			table = get_doc->index_cache->index->table;
3646 
3647 			fts_doc_init(&doc);
3648 
3649 			fts_fetch_doc_from_rec(
3650 				get_doc, clust_index, doc_pcur, offsets, &doc);
3651 
3652 			if (doc.found) {
3653 				ibool	success MY_ATTRIBUTE((unused));
3654 
3655 				btr_pcur_store_position(doc_pcur, &mtr);
3656 				mtr_commit(&mtr);
3657 
3658 				DEBUG_SYNC_C("fts_instrument_sync_cache_wait");
3659 				rw_lock_x_lock(&table->fts->cache->lock);
3660 
3661 				if (table->fts->cache->stopword_info.status
3662 				    & STOPWORD_NOT_INIT) {
3663 					fts_load_stopword(table, NULL, NULL,
3664 							  NULL, TRUE, TRUE);
3665 				}
3666 
3667 				fts_cache_add_doc(
3668 					table->fts->cache,
3669 					get_doc->index_cache,
3670 					doc_id, doc.tokens);
3671 
3672 				bool	need_sync = false;
3673 				if ((cache->total_size -
3674 				    cache->total_size_before_sync >
3675 				    fts_max_cache_size / 10 || fts_need_sync)
3676 				    && !cache->sync->in_progress) {
3677 					need_sync = true;
3678 					cache->total_size_before_sync =
3679 					    cache->total_size;
3680 				}
3681 
3682 				rw_lock_x_unlock(&table->fts->cache->lock);
3683 
3684 				DBUG_EXECUTE_IF(
3685                                         "fts_instrument_sync_cache_wait",
3686 					srv_fatal_semaphore_wait_threshold = 25;
3687 					fts_max_cache_size = 100;
3688                                         fts_sync(cache->sync, true, true, false);
3689                                 );
3690 
3691 				DBUG_EXECUTE_IF(
3692 					"fts_instrument_sync",
3693 					fts_optimize_request_sync_table(table);
3694 					os_event_wait(cache->sync->event);
3695 				);
3696 
3697 				DBUG_EXECUTE_IF(
3698 					"fts_instrument_sync_debug",
3699 					fts_sync(cache->sync, true, true, false);
3700 				);
3701 
3702 				DEBUG_SYNC_C("fts_instrument_sync_request");
3703 				DBUG_EXECUTE_IF(
3704 					"fts_instrument_sync_request",
3705 					fts_optimize_request_sync_table(table);
3706 				);
3707 
3708 				if (need_sync) {
3709 					fts_optimize_request_sync_table(table);
3710 				}
3711 
3712 				mtr_start(&mtr);
3713 
3714 				if (i < num_idx - 1) {
3715 
3716 					success = btr_pcur_restore_position(
3717 						BTR_SEARCH_LEAF, doc_pcur,
3718 						&mtr);
3719 
3720 					ut_ad(success);
3721 				}
3722 			}
3723 
3724 			fts_doc_free(&doc);
3725 		}
3726 
3727 		if (!is_id_cluster) {
3728 			btr_pcur_close(doc_pcur);
3729 		}
3730 	}
3731 func_exit:
3732 	mtr_commit(&mtr);
3733 
3734 	btr_pcur_close(&pcur);
3735 
3736 	mem_heap_free(heap);
3737 	return(TRUE);
3738 }
3739 
3740 
3741 /*********************************************************************//**
3742 Callback function to read a single ulint column.
3743 return always returns TRUE */
3744 static
3745 ibool
fts_read_ulint(void * row,void * user_arg)3746 fts_read_ulint(
3747 /*===========*/
3748 	void*		row,		/*!< in: sel_node_t* */
3749 	void*		user_arg)	/*!< in: pointer to ulint */
3750 {
3751 	sel_node_t*	sel_node = static_cast<sel_node_t*>(row);
3752 	ulint*		value = static_cast<ulint*>(user_arg);
3753 	que_node_t*	exp = sel_node->select_list;
3754 	dfield_t*	dfield = que_node_get_val(exp);
3755 	void*		data = dfield_get_data(dfield);
3756 
3757 	*value = static_cast<ulint>(mach_read_from_4(
3758 		static_cast<const byte*>(data)));
3759 
3760 	return(TRUE);
3761 }
3762 
3763 /*********************************************************************//**
3764 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3765 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
3766 doc_id_t
fts_get_max_doc_id(dict_table_t * table)3767 fts_get_max_doc_id(
3768 /*===============*/
3769 	dict_table_t*	table)		/*!< in: user table */
3770 {
3771 	dict_index_t*	index;
3772 	dict_field_t*	dfield MY_ATTRIBUTE((unused)) = NULL;
3773 	doc_id_t	doc_id = 0;
3774 	mtr_t		mtr;
3775 	btr_pcur_t	pcur;
3776 
3777 	index = table->fts_doc_id_index;
3778 
3779 	if (!index) {
3780 		return(0);
3781 	}
3782 
3783 	dfield = dict_index_get_nth_field(index, 0);
3784 
3785 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3786 	ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3787 #endif
3788 
3789 	mtr_start(&mtr);
3790 
3791 	/* fetch the largest indexes value */
3792 	btr_pcur_open_at_index_side(
3793 		false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
3794 
3795 	if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3796 		const rec_t*    rec = NULL;
3797 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
3798 		ulint*		offsets = offsets_;
3799 		mem_heap_t*	heap = NULL;
3800 		ulint		len;
3801 		const void*	data;
3802 
3803 		rec_offs_init(offsets_);
3804 
3805 		do {
3806 			rec = btr_pcur_get_rec(&pcur);
3807 
3808 			if (page_rec_is_user_rec(rec)) {
3809 				break;
3810 			}
3811 		} while (btr_pcur_move_to_prev(&pcur, &mtr));
3812 
3813 		if (!rec) {
3814 			goto func_exit;
3815 		}
3816 
3817 		offsets = rec_get_offsets(
3818 			rec, index, offsets, ULINT_UNDEFINED, &heap);
3819 
3820 		data = rec_get_nth_field(rec, offsets, 0, &len);
3821 
3822 		doc_id = static_cast<doc_id_t>(fts_read_doc_id(
3823 			static_cast<const byte*>(data)));
3824 	}
3825 
3826 func_exit:
3827 	btr_pcur_close(&pcur);
3828 	mtr_commit(&mtr);
3829 	return(doc_id);
3830 }
3831 
3832 /*********************************************************************//**
3833 Fetch document with the given document id.
3834 @return DB_SUCCESS if OK else error */
3835 dberr_t
fts_doc_fetch_by_doc_id(fts_get_doc_t * get_doc,doc_id_t doc_id,dict_index_t * index_to_use,ulint option,fts_sql_callback callback,void * arg)3836 fts_doc_fetch_by_doc_id(
3837 /*====================*/
3838 	fts_get_doc_t*	get_doc,	/*!< in: state */
3839 	doc_id_t	doc_id,		/*!< in: id of document to
3840 					fetch */
3841 	dict_index_t*	index_to_use,	/*!< in: caller supplied FTS index,
3842 					or NULL */
3843 	ulint		option,		/*!< in: search option, if it is
3844 					greater than doc_id or equal */
3845 	fts_sql_callback
3846 			callback,	/*!< in: callback to read */
3847 	void*		arg)		/*!< in: callback arg */
3848 {
3849 	pars_info_t*	info;
3850 	dberr_t		error;
3851 	const char*	select_str;
3852 	doc_id_t	write_doc_id;
3853 	dict_index_t*	index;
3854 	trx_t*		trx = trx_allocate_for_background();
3855 	que_t*          graph;
3856 
3857 	trx->op_info = "fetching indexed FTS document";
3858 
3859 	/* The FTS index can be supplied by caller directly with
3860 	"index_to_use", otherwise, get it from "get_doc" */
3861 	index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3862 
3863 	if (get_doc && get_doc->get_document_graph) {
3864 		info = get_doc->get_document_graph->info;
3865 	} else {
3866 		info = pars_info_create();
3867 	}
3868 
3869 	/* Convert to "storage" byte order. */
3870 	fts_write_doc_id((byte*) &write_doc_id, doc_id);
3871 	fts_bind_doc_id(info, "doc_id", &write_doc_id);
3872 	pars_info_bind_function(info, "my_func", callback, arg);
3873 
3874 	select_str = fts_get_select_columns_str(index, info, info->heap);
3875 	pars_info_bind_id(info, TRUE, "table_name", index->table_name);
3876 
3877 	if (!get_doc || !get_doc->get_document_graph) {
3878 		if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3879 			graph = fts_parse_sql(
3880 				NULL,
3881 				info,
3882 				mem_heap_printf(info->heap,
3883 					"DECLARE FUNCTION my_func;\n"
3884 					"DECLARE CURSOR c IS"
3885 					" SELECT %s FROM $table_name"
3886 					" WHERE %s = :doc_id;\n"
3887 					"BEGIN\n"
3888 					""
3889 					"OPEN c;\n"
3890 					"WHILE 1 = 1 LOOP\n"
3891 					"  FETCH c INTO my_func();\n"
3892 					"  IF c %% NOTFOUND THEN\n"
3893 					"    EXIT;\n"
3894 					"  END IF;\n"
3895 					"END LOOP;\n"
3896 					"CLOSE c;",
3897 					select_str, FTS_DOC_ID_COL_NAME));
3898 		} else {
3899 			ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3900 
3901 			/* This is used for crash recovery of table with
3902 			hidden DOC ID or FTS indexes. We will scan the table
3903 			to re-processing user table rows whose DOC ID or
3904 			FTS indexed documents have not been sync-ed to disc
3905 			during recent crash.
3906 			In the case that all fulltext indexes are dropped
3907 			for a table, we will keep the "hidden" FTS_DOC_ID
3908 			column, and this scan is to retreive the largest
3909 			DOC ID being used in the table to determine the
3910 			appropriate next DOC ID.
3911 			In the case of there exists fulltext index(es), this
3912 			operation will re-tokenize any docs that have not
3913 			been sync-ed to the disk, and re-prime the FTS
3914 			cached */
3915 			graph = fts_parse_sql(
3916 				NULL,
3917 				info,
3918 				mem_heap_printf(info->heap,
3919 					"DECLARE FUNCTION my_func;\n"
3920 					"DECLARE CURSOR c IS"
3921 					" SELECT %s, %s FROM $table_name"
3922 					" WHERE %s > :doc_id;\n"
3923 					"BEGIN\n"
3924 					""
3925 					"OPEN c;\n"
3926 					"WHILE 1 = 1 LOOP\n"
3927 					"  FETCH c INTO my_func();\n"
3928 					"  IF c %% NOTFOUND THEN\n"
3929 					"    EXIT;\n"
3930 					"  END IF;\n"
3931 					"END LOOP;\n"
3932 					"CLOSE c;",
3933 					FTS_DOC_ID_COL_NAME,
3934 					select_str, FTS_DOC_ID_COL_NAME));
3935 		}
3936 		if (get_doc) {
3937 			get_doc->get_document_graph = graph;
3938 		}
3939 	} else {
3940 		graph = get_doc->get_document_graph;
3941 	}
3942 
3943 	error = fts_eval_sql(trx, graph);
3944 
3945 	if (error == DB_SUCCESS) {
3946 		fts_sql_commit(trx);
3947 	} else {
3948 		fts_sql_rollback(trx);
3949 	}
3950 
3951 	trx_free_for_background(trx);
3952 
3953 	if (!get_doc) {
3954 		fts_que_graph_free(graph);
3955 	}
3956 
3957 	return(error);
3958 }
3959 
3960 /*********************************************************************//**
3961 Write out a single word's data as new entry/entries in the INDEX table.
3962 @return DB_SUCCESS if all OK. */
3963 dberr_t
fts_write_node(trx_t * trx,que_t ** graph,fts_table_t * fts_table,fts_string_t * word,fts_node_t * node)3964 fts_write_node(
3965 /*===========*/
3966 	trx_t*		trx,			/*!< in: transaction */
3967 	que_t**		graph,			/*!< in: query graph */
3968 	fts_table_t*	fts_table,		/*!< in: aux table */
3969 	fts_string_t*	word,			/*!< in: word in UTF-8 */
3970 	fts_node_t*	node)			/*!< in: node columns */
3971 {
3972 	pars_info_t*	info;
3973 	dberr_t		error;
3974 	ib_uint32_t	doc_count;
3975 	ib_time_monotonic_t	start_time;
3976 	doc_id_t	last_doc_id;
3977 	doc_id_t	first_doc_id;
3978 	char		table_name[MAX_FULL_NAME_LEN];
3979 
3980 	ut_a(node->ilist != NULL);
3981 
3982 	if (*graph) {
3983 		info = (*graph)->info;
3984 	} else {
3985 		info = pars_info_create();
3986 
3987 		fts_get_table_name(fts_table, table_name);
3988 		pars_info_bind_id(info, true, "index_table_name", table_name);
3989 	}
3990 
3991 	pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3992 
3993 	/* Convert to "storage" byte order. */
3994 	fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
3995 	fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3996 
3997 	/* Convert to "storage" byte order. */
3998 	fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
3999 	fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
4000 
4001 	ut_a(node->last_doc_id >= node->first_doc_id);
4002 
4003 	/* Convert to "storage" byte order. */
4004 	mach_write_to_4((byte*) &doc_count, node->doc_count);
4005 	pars_info_bind_int4_literal(
4006 		info, "doc_count", (const ib_uint32_t*) &doc_count);
4007 
4008 	/* Set copy_name to FALSE since it's a static. */
4009 	pars_info_bind_literal(
4010 		info, "ilist", node->ilist, node->ilist_size,
4011 		DATA_BLOB, DATA_BINARY_TYPE);
4012 
4013 	if (!*graph) {
4014 
4015 		*graph = fts_parse_sql(
4016 			fts_table,
4017 			info,
4018 			"BEGIN\n"
4019 			"INSERT INTO $index_table_name VALUES"
4020 			" (:token, :first_doc_id,"
4021 			"  :last_doc_id, :doc_count, :ilist);");
4022 	}
4023 
4024 	start_time = ut_time_monotonic();
4025 	error = fts_eval_sql(trx, *graph);
4026 	elapsed_time += ut_time_monotonic() - start_time;
4027 	++n_nodes;
4028 
4029 	return(error);
4030 }
4031 
4032 /*********************************************************************//**
4033 Add rows to the DELETED_CACHE table.
4034 @return DB_SUCCESS if all went well else error code*/
4035 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4036 dberr_t
fts_sync_add_deleted_cache(fts_sync_t * sync,ib_vector_t * doc_ids)4037 fts_sync_add_deleted_cache(
4038 /*=======================*/
4039 	fts_sync_t*	sync,			/*!< in: sync state */
4040 	ib_vector_t*	doc_ids)		/*!< in: doc ids to add */
4041 {
4042 	ulint		i;
4043 	pars_info_t*	info;
4044 	que_t*		graph;
4045 	fts_table_t	fts_table;
4046 	char		table_name[MAX_FULL_NAME_LEN];
4047 	doc_id_t	dummy = 0;
4048 	dberr_t		error = DB_SUCCESS;
4049 	ulint		n_elems = ib_vector_size(doc_ids);
4050 
4051 	ut_a(ib_vector_size(doc_ids) > 0);
4052 
4053 	ib_vector_sort(doc_ids, fts_update_doc_id_cmp);
4054 
4055 	info = pars_info_create();
4056 
4057 	fts_bind_doc_id(info, "doc_id", &dummy);
4058 
4059 	FTS_INIT_FTS_TABLE(
4060 		&fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
4061 
4062 	fts_get_table_name(&fts_table, table_name);
4063 	pars_info_bind_id(info, true, "table_name", table_name);
4064 
4065 	graph = fts_parse_sql(
4066 		&fts_table,
4067 		info,
4068 		"BEGIN INSERT INTO $table_name VALUES (:doc_id);");
4069 
4070 	for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
4071 		fts_update_t*	update;
4072 		doc_id_t	write_doc_id;
4073 
4074 		update = static_cast<fts_update_t*>(ib_vector_get(doc_ids, i));
4075 
4076 		/* Convert to "storage" byte order. */
4077 		fts_write_doc_id((byte*) &write_doc_id, update->doc_id);
4078 		fts_bind_doc_id(info, "doc_id", &write_doc_id);
4079 
4080 		error = fts_eval_sql(sync->trx, graph);
4081 	}
4082 
4083 	fts_que_graph_free(graph);
4084 
4085 	return(error);
4086 }
4087 
4088 /** Write the words and ilist to disk.
4089 @param[in,out]	trx		transaction
4090 @param[in]	index_cache	index cache
4091 @param[in]	unlock_cache	whether unlock cache when write node
4092 				Also set this to true if sync takes
4093 				very long
4094 @param[in]	sync_start_time	Holds the timestamp of start of sync
4095 				for deducing the length of sync time
4096 @return DB_SUCCESS if all went well else error code */
4097 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4098 dberr_t
fts_sync_write_words(trx_t * trx,fts_index_cache_t * index_cache,bool unlock_cache,ib_time_t sync_start_time)4099 fts_sync_write_words(
4100 	trx_t*			trx,
4101 	fts_index_cache_t*	index_cache,
4102 	bool			unlock_cache,
4103 	ib_time_t		sync_start_time)
4104 {
4105 	fts_table_t	fts_table;
4106 	ulint		n_nodes = 0;
4107 	ulint		n_words = 0;
4108 	const ib_rbt_node_t* rbt_node;
4109 	dberr_t		error = DB_SUCCESS;
4110 	ibool		print_error = FALSE;
4111 	dict_table_t*	table = index_cache->index->table;
4112 	/* We use this to deduce threshold value of time
4113 	that we can let sync to go on holding cache lock */
4114 	const float cutoff = 0.98;
4115 	ulint		lock_threshold =
4116 			(srv_fatal_semaphore_wait_threshold % SRV_SEMAPHORE_WAIT_EXTENSION)
4117 			* cutoff;
4118 	bool		timeout_extended = false;
4119 #ifdef FTS_DOC_STATS_DEBUG
4120 	ulint		n_new_words = 0;
4121 #endif /* FTS_DOC_STATS_DEBUG */
4122 
4123 	FTS_INIT_INDEX_TABLE(
4124 		&fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
4125 
4126 	n_words = rbt_size(index_cache->words);
4127 
4128 	/* We iterate over the entire tree, even if there is an error,
4129 	since we want to free the memory used during caching. */
4130 	for (rbt_node = rbt_first(index_cache->words);
4131 	     rbt_node;
4132 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
4133 
4134 		ulint			i;
4135 		ulint			selected;
4136 		fts_tokenizer_word_t*	word;
4137 
4138 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
4139 
4140 		DBUG_EXECUTE_IF("fts_instrument_write_words_before_select_index",
4141 				os_thread_sleep(300000););
4142 
4143 		selected = fts_select_index(
4144 			index_cache->charset, word->text.f_str,
4145 			word->text.f_len);
4146 
4147 		fts_table.suffix = fts_get_suffix(selected);
4148 
4149 #ifdef FTS_DOC_STATS_DEBUG
4150 		/* Check if the word exists in the FTS index and if not
4151 		then we need to increment the total word count stats. */
4152 		if (error == DB_SUCCESS && fts_enable_diag_print) {
4153 			ibool	found = FALSE;
4154 
4155 			error = fts_is_word_in_index(
4156 				trx,
4157 				&index_cache->sel_graph[selected],
4158 				&fts_table,
4159 				&word->text, &found);
4160 
4161 			if (error == DB_SUCCESS && !found) {
4162 
4163 				++n_new_words;
4164 			}
4165 		}
4166 #endif /* FTS_DOC_STATS_DEBUG */
4167 
4168 		/* We iterate over all the nodes even if there was an error */
4169 		for (i = 0; i < ib_vector_size(word->nodes); ++i) {
4170 
4171 			fts_node_t* fts_node = static_cast<fts_node_t*>(
4172 				ib_vector_get(word->nodes, i));
4173 
4174 			if (fts_node->synced) {
4175 				continue;
4176 			} else {
4177 				fts_node->synced = true;
4178 			}
4179 
4180 			/*FIXME: we need to handle the error properly. */
4181 			if (error == DB_SUCCESS) {
4182 				DEBUG_SYNC_C("fts_instrument_sync");
4183 				DBUG_EXECUTE_IF("fts_instrument_sync",
4184 			                        os_thread_sleep(10000000););
4185 				if (!unlock_cache) {
4186 					ulint cache_lock_time = ut_time_monotonic() - sync_start_time;
4187 					if (cache_lock_time > lock_threshold) {
4188 						if (!timeout_extended) {
4189 							os_atomic_increment_ulint(
4190 							&srv_fatal_semaphore_wait_threshold,
4191 							SRV_SEMAPHORE_WAIT_EXTENSION);
4192 							timeout_extended = true;
4193 							lock_threshold +=
4194 							SRV_SEMAPHORE_WAIT_EXTENSION;
4195 						} else {
4196 							unlock_cache = true;
4197 							os_atomic_decrement_ulint(
4198 							&srv_fatal_semaphore_wait_threshold,
4199 							SRV_SEMAPHORE_WAIT_EXTENSION);
4200 							timeout_extended = false;
4201 
4202 						}
4203 					}
4204 				}
4205 
4206 				if (unlock_cache) {
4207 					rw_lock_x_unlock(
4208 						&table->fts->cache->lock);
4209 				}
4210 
4211 				error = fts_write_node(
4212 					trx,
4213 					&index_cache->ins_graph[selected],
4214 					&fts_table, &word->text, fts_node);
4215 				DBUG_EXECUTE_IF("fts_instrument_sync",
4216                                                 os_thread_sleep(15000000););
4217 
4218 				DEBUG_SYNC_C("fts_write_node");
4219 				DBUG_EXECUTE_IF("fts_write_node_crash",
4220 					DBUG_SUICIDE(););
4221 
4222 				DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
4223 					os_thread_sleep(1000000);
4224 				);
4225 
4226 				if (unlock_cache) {
4227 					rw_lock_x_lock(
4228 						&table->fts->cache->lock);
4229 				}
4230 			}
4231 		}
4232 
4233 		n_nodes += ib_vector_size(word->nodes);
4234 
4235 		if (error != DB_SUCCESS && !print_error) {
4236 			ib::error() << "(" << ut_strerr(error) << ") writing"
4237 				" word node to FTS auxiliary index table.";
4238 			print_error = TRUE;
4239 		}
4240 	}
4241 
4242 #ifdef FTS_DOC_STATS_DEBUG
4243 	if (error == DB_SUCCESS && n_new_words > 0 && fts_enable_diag_print) {
4244 		fts_table_t	fts_table;
4245 
4246 		FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
4247 
4248 		/* Increment the total number of words in the FTS index */
4249 		error = fts_config_increment_index_value(
4250 			trx, index_cache->index, FTS_TOTAL_WORD_COUNT,
4251 			n_new_words);
4252 	}
4253 #endif /* FTS_DOC_STATS_DEBUG */
4254 
4255 	if (fts_enable_diag_print) {
4256 		printf("Avg number of nodes: %lf\n",
4257 		       (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
4258 	}
4259 
4260 	return(error);
4261 }
4262 
4263 #ifdef FTS_DOC_STATS_DEBUG
4264 /*********************************************************************//**
4265 Write a single documents statistics to disk.
4266 @return DB_SUCCESS if all went well else error code */
4267 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4268 dberr_t
fts_sync_write_doc_stat(trx_t * trx,dict_index_t * index,que_t ** graph,const fts_doc_stats_t * doc_stat)4269 fts_sync_write_doc_stat(
4270 /*====================*/
4271 	trx_t*			trx,		/*!< in: transaction */
4272 	dict_index_t*		index,		/*!< in: index */
4273 	que_t**			graph,		/* out: query graph */
4274 	const fts_doc_stats_t*	doc_stat)	/*!< in: doc stats to write */
4275 {
4276 	pars_info_t*	info;
4277 	doc_id_t	doc_id;
4278 	dberr_t		error = DB_SUCCESS;
4279 	ib_uint32_t	word_count;
4280 	char		table_name[MAX_FULL_NAME_LEN];
4281 
4282 	if (*graph) {
4283 		info = (*graph)->info;
4284 	} else {
4285 		info = pars_info_create();
4286 	}
4287 
4288 	/* Convert to "storage" byte order. */
4289 	mach_write_to_4((byte*) &word_count, doc_stat->word_count);
4290 	pars_info_bind_int4_literal(
4291 		info, "count", (const ib_uint32_t*) &word_count);
4292 
4293 	/* Convert to "storage" byte order. */
4294 	fts_write_doc_id((byte*) &doc_id, doc_stat->doc_id);
4295 	fts_bind_doc_id(info, "doc_id", &doc_id);
4296 
4297 	if (!*graph) {
4298 		fts_table_t	fts_table;
4299 
4300 		FTS_INIT_INDEX_TABLE(
4301 			&fts_table, "DOC_ID", FTS_INDEX_TABLE, index);
4302 
4303 		fts_get_table_name(&fts_table, table_name);
4304 
4305 		pars_info_bind_id(info, true, "doc_id_table", table_name);
4306 
4307 		*graph = fts_parse_sql(
4308 			&fts_table,
4309 			info,
4310 			"BEGIN"
4311 			" INSERT INTO $doc_id_table VALUES (:doc_id, :count);");
4312 	}
4313 
4314 	for (;;) {
4315 		error = fts_eval_sql(trx, *graph);
4316 
4317 		if (error == DB_SUCCESS) {
4318 
4319 			break;				/* Exit the loop. */
4320 		} else {
4321 
4322 			if (error == DB_LOCK_WAIT_TIMEOUT) {
4323 				ib::warn() << "Lock wait timeout writing to"
4324 					" FTS doc_id. Retrying!";
4325 
4326 				trx->error_state = DB_SUCCESS;
4327 			} else {
4328 				ib::error() << "(" << ut_strerr(error)
4329 					<< ") while writing to FTS doc_id.";
4330 
4331 				break;			/* Exit the loop. */
4332 			}
4333 		}
4334 	}
4335 
4336 	return(error);
4337 }
4338 
4339 /*********************************************************************//**
4340 Write document statistics to disk.
4341 @return DB_SUCCESS if all OK */
4342 static
4343 ulint
fts_sync_write_doc_stats(trx_t * trx,const fts_index_cache_t * index_cache)4344 fts_sync_write_doc_stats(
4345 /*=====================*/
4346 	trx_t*			trx,		/*!< in: transaction */
4347 	const fts_index_cache_t*index_cache)	/*!< in: index cache */
4348 {
4349 	dberr_t		error = DB_SUCCESS;
4350 	que_t*		graph = NULL;
4351 	fts_doc_stats_t*  doc_stat;
4352 
4353 	if (ib_vector_is_empty(index_cache->doc_stats)) {
4354 		return(DB_SUCCESS);
4355 	}
4356 
4357 	doc_stat = static_cast<ts_doc_stats_t*>(
4358 		ib_vector_pop(index_cache->doc_stats));
4359 
4360 	while (doc_stat) {
4361 		error = fts_sync_write_doc_stat(
4362 			trx, index_cache->index, &graph, doc_stat);
4363 
4364 		if (error != DB_SUCCESS) {
4365 			break;
4366 		}
4367 
4368 		if (ib_vector_is_empty(index_cache->doc_stats)) {
4369 			break;
4370 		}
4371 
4372 		doc_stat = static_cast<ts_doc_stats_t*>(
4373 			ib_vector_pop(index_cache->doc_stats));
4374 	}
4375 
4376 	if (graph != NULL) {
4377 		fts_que_graph_free_check_lock(NULL, index_cache, graph);
4378 	}
4379 
4380 	return(error);
4381 }
4382 
4383 /*********************************************************************//**
4384 Callback to check the existince of a word.
4385 @return always return NULL */
4386 static
4387 ibool
fts_lookup_word(void * row,void * user_arg)4388 fts_lookup_word(
4389 /*============*/
4390 	void*	row,				/*!< in:  sel_node_t* */
4391 	void*	user_arg)			/*!< in:  fts_doc_t* */
4392 {
4393 
4394 	que_node_t*	exp;
4395 	sel_node_t*	node = static_cast<sel_node_t*>(row);
4396 	ibool*		found = static_cast<ibool*>(user_arg);
4397 
4398 	exp = node->select_list;
4399 
4400 	while (exp) {
4401 		dfield_t*	dfield = que_node_get_val(exp);
4402 		ulint		len = dfield_get_len(dfield);
4403 
4404 		if (len != UNIV_SQL_NULL && len != 0) {
4405 			*found = TRUE;
4406 		}
4407 
4408 		exp = que_node_get_next(exp);
4409 	}
4410 
4411 	return(FALSE);
4412 }
4413 
4414 /*********************************************************************//**
4415 Check whether a particular word (term) exists in the FTS index.
4416 @return DB_SUCCESS if all went well else error code */
4417 static
4418 dberr_t
fts_is_word_in_index(trx_t * trx,que_t ** graph,fts_table_t * fts_table,const fts_string_t * word,ibool * found)4419 fts_is_word_in_index(
4420 /*=================*/
4421 	trx_t*		trx,			/*!< in: FTS query state */
4422 	que_t**		graph,			/* out: Query graph */
4423 	fts_table_t*	fts_table,		/*!< in: table instance */
4424 	const fts_string_t*
4425 			word,			/*!< in: the word to check */
4426 	ibool*		found)			/* out: TRUE if exists */
4427 {
4428 	pars_info_t*	info;
4429 	dberr_t		error;
4430 	char		table_name[MAX_FULL_NAME_LEN];
4431 
4432 	trx->op_info = "looking up word in FTS index";
4433 
4434 	if (*graph) {
4435 		info = (*graph)->info;
4436 	} else {
4437 		info = pars_info_create();
4438 	}
4439 
4440 	fts_get_table_name(fts_table, table_name);
4441 	pars_info_bind_id(info, true, "table_name", table_name);
4442 	pars_info_bind_function(info, "my_func", fts_lookup_word, found);
4443 	pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
4444 
4445 	if (*graph == NULL) {
4446 		*graph = fts_parse_sql(
4447 			fts_table,
4448 			info,
4449 			"DECLARE FUNCTION my_func;\n"
4450 			"DECLARE CURSOR c IS"
4451 			" SELECT doc_count\n"
4452 			" FROM $table_name\n"
4453 			" WHERE word = :word"
4454 			" ORDER BY first_doc_id;\n"
4455 			"BEGIN\n"
4456 			"\n"
4457 			"OPEN c;\n"
4458 			"WHILE 1 = 1 LOOP\n"
4459 			"  FETCH c INTO my_func();\n"
4460 			"  IF c % NOTFOUND THEN\n"
4461 			"    EXIT;\n"
4462 			"  END IF;\n"
4463 			"END LOOP;\n"
4464 			"CLOSE c;");
4465 	}
4466 
4467 	for (;;) {
4468 		error = fts_eval_sql(trx, *graph);
4469 
4470 		if (error == DB_SUCCESS) {
4471 
4472 			break;				/* Exit the loop. */
4473 		} else {
4474 
4475 			if (error == DB_LOCK_WAIT_TIMEOUT) {
4476 				ib::warn() << "Lock wait timeout reading"
4477 					" FTS index. Retrying!";
4478 
4479 				trx->error_state = DB_SUCCESS;
4480 			} else {
4481 				ib::error() << "(" << ut_strerr(error)
4482 					<< ") while reading FTS index.";
4483 
4484 				break;			/* Exit the loop. */
4485 			}
4486 		}
4487 	}
4488 
4489 	return(error);
4490 }
4491 #endif /* FTS_DOC_STATS_DEBUG */
4492 
4493 /*********************************************************************//**
4494 Begin Sync, create transaction, acquire locks, etc. */
4495 static
4496 void
fts_sync_begin(fts_sync_t * sync)4497 fts_sync_begin(
4498 /*===========*/
4499 	fts_sync_t*	sync)			/*!< in: sync state */
4500 {
4501 	fts_cache_t*	cache = sync->table->fts->cache;
4502 
4503 	n_nodes = 0;
4504 	elapsed_time = 0;
4505 
4506 	sync->start_time = ut_time_monotonic();
4507 
4508 	sync->trx = trx_allocate_for_background();
4509 
4510 	if (fts_enable_diag_print) {
4511 		ib::info() << "FTS SYNC for table " << sync->table->name
4512 			<< ", deleted count: "
4513 			<< ib_vector_size(cache->deleted_doc_ids)
4514 			<< " size: " << cache->total_size << " bytes";
4515 	}
4516 }
4517 
4518 /*********************************************************************//**
4519 Run SYNC on the table, i.e., write out data from the index specific
4520 cache to the FTS aux INDEX table and FTS aux doc id stats table.
4521 @return DB_SUCCESS if all OK */
4522 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4523 dberr_t
fts_sync_index(fts_sync_t * sync,fts_index_cache_t * index_cache)4524 fts_sync_index(
4525 /*===========*/
4526 	fts_sync_t*		sync,		/*!< in: sync state */
4527 	fts_index_cache_t*	index_cache)	/*!< in: index cache */
4528 {
4529 	trx_t*		trx = sync->trx;
4530 	dberr_t		error = DB_SUCCESS;
4531 
4532 	trx->op_info = "doing SYNC index";
4533 
4534 	if (fts_enable_diag_print) {
4535 		ib::info() << "SYNC words: " << rbt_size(index_cache->words);
4536 	}
4537 
4538 	ut_ad(rbt_validate(index_cache->words));
4539 
4540 	error = fts_sync_write_words(trx, index_cache, sync->unlock_cache,
4541 				     sync->start_time);
4542 
4543 #ifdef FTS_DOC_STATS_DEBUG
4544 	/* FTS_RESOLVE: the word counter info in auxiliary table "DOC_ID"
4545 	is not used currently for ranking. We disable fts_sync_write_doc_stats()
4546 	for now */
4547 	/* Write the per doc statistics that will be used for ranking. */
4548 	if (error == DB_SUCCESS) {
4549 
4550 		error = fts_sync_write_doc_stats(trx, index_cache);
4551 	}
4552 #endif /* FTS_DOC_STATS_DEBUG */
4553 
4554 	return(error);
4555 }
4556 
4557 /** Check if index cache has been synced completely
4558 @param[in,out]	index_cache	index cache
4559 @return true if index is synced, otherwise false. */
4560 static
4561 bool
fts_sync_index_check(fts_index_cache_t * index_cache)4562 fts_sync_index_check(
4563 	fts_index_cache_t*	index_cache)
4564 {
4565 	const ib_rbt_node_t*	rbt_node;
4566 
4567 	for (rbt_node = rbt_first(index_cache->words);
4568 	     rbt_node != NULL;
4569 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
4570 
4571 		fts_tokenizer_word_t*	word;
4572 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
4573 
4574 		fts_node_t*	fts_node;
4575 		fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4576 
4577 		if (!fts_node->synced) {
4578 			return(false);
4579 		}
4580 	}
4581 
4582 	return(true);
4583 }
4584 
4585 /** Reset synced flag in index cache when rollback
4586 @param[in,out]	index_cache	index cache */
4587 static
4588 void
fts_sync_index_reset(fts_index_cache_t * index_cache)4589 fts_sync_index_reset(
4590 	fts_index_cache_t*	index_cache)
4591 {
4592 	const ib_rbt_node_t*	rbt_node;
4593 
4594 	for (rbt_node = rbt_first(index_cache->words);
4595 	     rbt_node != NULL;
4596 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
4597 
4598 		fts_tokenizer_word_t*	word;
4599 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
4600 
4601 		fts_node_t*	fts_node;
4602 		fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4603 
4604 		fts_node->synced = false;
4605 	}
4606 }
4607 
4608 /** Commit the SYNC, change state of processed doc ids etc.
4609 @param[in,out]	sync	sync state
4610 @return DB_SUCCESS if all OK */
4611 static  MY_ATTRIBUTE((nonnull, warn_unused_result))
4612 dberr_t
fts_sync_commit(fts_sync_t * sync)4613 fts_sync_commit(
4614 	fts_sync_t*	sync)
4615 {
4616 	dberr_t		error;
4617 	trx_t*		trx = sync->trx;
4618 	fts_cache_t*	cache = sync->table->fts->cache;
4619 	doc_id_t	last_doc_id;
4620 
4621 	trx->op_info = "doing SYNC commit";
4622 
4623 	/* After each Sync, update the CONFIG table about the max doc id
4624 	we just sync-ed to index table */
4625 	error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4626 					&last_doc_id);
4627 
4628 	/* Get the list of deleted documents that are either in the
4629 	cache or were headed there but were deleted before the add
4630 	thread got to them. */
4631 
4632 	if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4633 
4634 		error = fts_sync_add_deleted_cache(
4635 			sync, cache->deleted_doc_ids);
4636 	}
4637 
4638 	/* We need to do this within the deleted lock since fts_delete() can
4639 	attempt to add a deleted doc id to the cache deleted id array. */
4640 	fts_cache_clear(cache);
4641 	DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4642 	fts_cache_init(cache);
4643 	rw_lock_x_unlock(&cache->lock);
4644 
4645 	if (error == DB_SUCCESS) {
4646 
4647 		fts_sql_commit(trx);
4648 
4649 	} else if (error != DB_SUCCESS) {
4650 
4651 		fts_sql_rollback(trx);
4652 
4653 		ib::error() << "(" << ut_strerr(error) << ") during SYNC.";
4654 	}
4655 
4656 	if (fts_enable_diag_print && elapsed_time) {
4657 		ib::info() << "SYNC for table " << sync->table->name
4658 			<< ": SYNC time: "
4659 			<< (ut_time_monotonic() - sync->start_time)
4660 			<< " secs: elapsed "
4661 			<< (double) n_nodes / elapsed_time
4662 			<< " ins/sec";
4663 	}
4664 
4665 	/* Avoid assertion in trx_free(). */
4666 	trx->dict_operation_lock_mode = 0;
4667 	trx_free_for_background(trx);
4668 
4669 	return(error);
4670 }
4671 
4672 /*********************************************************************//**
4673 Rollback a sync operation */
4674 static
4675 void
fts_sync_rollback(fts_sync_t * sync)4676 fts_sync_rollback(
4677 /*==============*/
4678 	fts_sync_t*	sync)			/*!< in: sync state */
4679 {
4680 	trx_t*		trx = sync->trx;
4681 	fts_cache_t*	cache = sync->table->fts->cache;
4682 
4683 	for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4684 		ulint			j;
4685 		fts_index_cache_t*	index_cache;
4686 
4687 		index_cache = static_cast<fts_index_cache_t*>(
4688 			ib_vector_get(cache->indexes, i));
4689 
4690 		/* Reset synced flag so nodes will not be skipped
4691 		in the next sync, see fts_sync_write_words(). */
4692 		fts_sync_index_reset(index_cache);
4693 
4694 		for (j = 0; fts_index_selector[j].value; ++j) {
4695 
4696 			if (index_cache->ins_graph[j] != NULL) {
4697 
4698 				fts_que_graph_free_check_lock(
4699 					NULL, index_cache,
4700 					index_cache->ins_graph[j]);
4701 
4702 				index_cache->ins_graph[j] = NULL;
4703 			}
4704 
4705 			if (index_cache->sel_graph[j] != NULL) {
4706 
4707 				fts_que_graph_free_check_lock(
4708 					NULL, index_cache,
4709 					index_cache->sel_graph[j]);
4710 
4711 				index_cache->sel_graph[j] = NULL;
4712 			}
4713 		}
4714 	}
4715 
4716 	rw_lock_x_unlock(&cache->lock);
4717 
4718 	fts_sql_rollback(trx);
4719 
4720 	/* Avoid assertion in trx_free(). */
4721 	trx->dict_operation_lock_mode = 0;
4722 	trx_free_for_background(trx);
4723 }
4724 
4725 /** Check that all indexes are synced.
4726 @param[in,out]	sync		sync state
4727 @return true if all indexes are synced, false otherwise. */
4728 static
4729 bool
fts_check_all_indexes_synced(fts_sync_t * sync)4730 fts_check_all_indexes_synced(
4731 	fts_sync_t*	sync)
4732 {
4733 	ulint i;
4734 	fts_cache_t*	cache = sync->table->fts->cache;
4735 
4736 	/* Make sure all the caches are synced. */
4737 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4738 		fts_index_cache_t*	index_cache;
4739 
4740 		index_cache = static_cast<fts_index_cache_t*>(
4741 			ib_vector_get(cache->indexes, i));
4742 
4743 		if (index_cache->index->to_be_dropped
4744 		    || index_cache->index->table->to_be_dropped
4745 		    || fts_sync_index_check(index_cache)) {
4746 			continue;
4747 		}
4748 
4749 		return false;
4750 	}
4751 
4752 	return true;
4753 }
4754 
4755 /** Run SYNC on the table, i.e., write out data from the cache to the
4756 FTS auxiliary INDEX table and clear the cache at the end.
4757 @param[in,out]	sync		sync state
4758 @param[in]	unlock_cache	whether unlock cache lock when write node
4759 @param[in]	wait		whether wait when a sync is in progress
4760 @param[in]	has_dict_lock		whether has dict operation lock
4761 @return DB_SUCCESS if all OK */
4762 static
4763 dberr_t
fts_sync(fts_sync_t * sync,bool unlock_cache,bool wait,bool has_dict_lock)4764 fts_sync(
4765 	fts_sync_t*	sync,
4766 	bool		unlock_cache,
4767 	bool		wait,
4768 	bool		has_dict_lock)
4769 {
4770 	ulint		i;
4771 	dberr_t		error = DB_SUCCESS;
4772 	fts_cache_t*	cache = sync->table->fts->cache;
4773 
4774 	rw_lock_x_lock(&cache->lock);
4775 
4776 	/* Check if cache is being synced.
4777 	Note: we release cache lock in fts_sync_write_words() to
4778 	avoid long wait for the lock by other threads. */
4779 	while (sync->in_progress) {
4780 		rw_lock_x_unlock(&cache->lock);
4781 
4782 		if (wait) {
4783 			os_event_wait(sync->event);
4784 		} else {
4785 			return(DB_SUCCESS);
4786 		}
4787 
4788 		rw_lock_x_lock(&cache->lock);
4789 	}
4790 
4791 	sync->unlock_cache = unlock_cache;
4792 	sync->in_progress = true;
4793 
4794 	DEBUG_SYNC_C("fts_sync_begin");
4795 	fts_sync_begin(sync);
4796 
4797 	if (has_dict_lock) {
4798 		/* If lock is already taken mark that in transaction
4799 		 * so rollback will not try to take it again.
4800 		 */
4801 		sync->trx->dict_operation_lock_mode = RW_S_LATCH;
4802 	}
4803 
4804 	do {
4805 		if (cache->total_size > fts_max_cache_size) {
4806 			/* Avoid the case: sync never finish when
4807 			insert/update keeps comming. */
4808 			ut_ad(sync->unlock_cache);
4809 			sync->unlock_cache = false;
4810 		}
4811 
4812 		for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4813 			fts_index_cache_t*	index_cache;
4814 
4815 			index_cache = static_cast<fts_index_cache_t*>(
4816 				ib_vector_get(cache->indexes, i));
4817 
4818 			if (index_cache->index->to_be_dropped
4819 			    || index_cache->index->table->to_be_dropped) {
4820 				continue;
4821 			}
4822 
4823 			DBUG_EXECUTE_IF("fts_instrument_sync_before_syncing",
4824 				os_thread_sleep(300000););
4825 
4826 			index_cache->index->index_fts_syncing = true;
4827 
4828 			error = fts_sync_index(sync, index_cache);
4829 
4830 			if (error != DB_SUCCESS) {
4831 				break;
4832 			}
4833 		}
4834 
4835 		DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
4836 				sync->interrupted = true;
4837 				error = DB_INTERRUPTED;
4838 		);
4839 
4840 		if (error != DB_SUCCESS) {
4841 			break;
4842 		}
4843 	} while (!fts_check_all_indexes_synced(sync));
4844 
4845 	if (error == DB_SUCCESS && !sync->interrupted) {
4846 		error = fts_sync_commit(sync);
4847 	} else {
4848 		fts_sync_rollback(sync);
4849 	}
4850 
4851 	rw_lock_x_lock(&cache->lock);
4852 	/* Clear fts syncing flags of any indexes in case sync is
4853 	interrupted */
4854 	DEBUG_SYNC_C("fts_instrument_sync");
4855 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4856 		fts_index_cache_t*      index_cache;
4857 		index_cache = static_cast<fts_index_cache_t*>(
4858 			ib_vector_get(cache->indexes, i));
4859 			if (index_cache->index->index_fts_syncing == true) {
4860 				index_cache->index->index_fts_syncing = false;
4861 			}
4862 		}
4863 	sync->interrupted = false;
4864 	sync->in_progress = false;
4865 	os_event_set(sync->event);
4866 	rw_lock_x_unlock(&cache->lock);
4867 
4868 	/* We need to check whether an optimize is required, for that
4869 	we make copies of the two variables that control the trigger. These
4870 	variables can change behind our back and we don't want to hold the
4871 	lock for longer than is needed. */
4872 	mutex_enter(&cache->deleted_lock);
4873 
4874 	cache->added = 0;
4875 	cache->deleted = 0;
4876 
4877 	mutex_exit(&cache->deleted_lock);
4878 
4879 	return(error);
4880 }
4881 
4882 /** Run SYNC on the table, i.e., write out data from the cache to the
4883 FTS auxiliary INDEX table and clear the cache at the end.
4884 @param[in,out]	table		fts table
4885 @param[in]	unlock_cache	whether unlock cache when write node
4886 @param[in]	wait		whether wait for existing sync to finish
4887 @param[in]	has_dict	whether has dict operation lock
4888 @return DB_SUCCESS on success, error code on failure. */
4889 dberr_t
fts_sync_table(dict_table_t * table,bool unlock_cache,bool wait,bool has_dict)4890 fts_sync_table(
4891 	dict_table_t*	table,
4892 	bool		unlock_cache,
4893 	bool		wait,
4894 	bool		has_dict)
4895 {
4896 	dberr_t	err = DB_SUCCESS;
4897 
4898 	ut_ad(table->fts);
4899 
4900 	if (!dict_table_is_discarded(table) && table->fts->cache
4901 	    && !dict_table_is_corrupted(table)) {
4902 		err = fts_sync(table->fts->cache->sync,
4903 			       unlock_cache, wait, has_dict);
4904 	}
4905 
4906 	return(err);
4907 }
4908 
4909 /** Check fts token
4910 1. for ngram token, check whether the token contains any words in stopwords
4911 2. for non-ngram token, check if it's stopword or less than fts_min_token_size
4912 or greater than fts_max_token_size.
4913 @param[in]	token		token string
4914 @param[in]	stopwords	stopwords rb tree
4915 @param[in]	is_ngram	is ngram parser
4916 @param[in]	cs		token charset
4917 @param[in]	skip		true if the check should be skipped
4918 @retval	true	if it is not stopword and length in range
4919 @retval	false	if it is stopword or lenght not in range */
4920 bool
fts_check_token(const fts_string_t * token,const ib_rbt_t * stopwords,bool is_ngram,const CHARSET_INFO * cs,bool skip)4921 fts_check_token(
4922 	const fts_string_t*		token,
4923 	const ib_rbt_t*			stopwords,
4924 	bool				is_ngram,
4925 	const CHARSET_INFO*		cs,
4926 	bool				skip)
4927 {
4928 	ut_ad(cs != NULL || stopwords == NULL);
4929 
4930 	if (skip) {
4931 		return(true);
4932 	}
4933 
4934 	if (!is_ngram) {
4935 		ib_rbt_bound_t  parent;
4936 
4937 		if (token->f_n_char < fts_min_token_size
4938 		    || token->f_n_char > fts_max_token_size
4939 		    || (stopwords != NULL
4940 			&& rbt_search(stopwords, &parent, token) == 0)) {
4941 			return(false);
4942 		} else {
4943 			return(true);
4944 		}
4945 	}
4946 
4947 	/* Check token for ngram. */
4948 	DBUG_EXECUTE_IF(
4949 		"fts_instrument_ignore_ngram_check",
4950 		return(true);
4951 	);
4952 
4953 	/* We ignore fts_min_token_size when ngram */
4954 	ut_ad(token->f_n_char > 0
4955 	      && token->f_n_char <= fts_max_token_size);
4956 
4957 	if (stopwords == NULL) {
4958 		return(true);
4959 	}
4960 
4961 	/*Ngram checks whether the token contains any words in stopwords.
4962 	We can't simply use CONTAIN to search in stopwords, because it's
4963 	built on COMPARE. So we need to tokenize the token into words
4964 	from unigram to f_n_char, and check them separately. */
4965 	for (ulint ngram_token_size = 1; ngram_token_size <= token->f_n_char;
4966 	     ngram_token_size ++) {
4967 		const char*	start;
4968 		const char*	next;
4969 		const char*	end;
4970 		ulint		char_len;
4971 		ulint		n_chars;
4972 
4973 		start = reinterpret_cast<char*>(token->f_str);
4974 		next = start;
4975 		end = start + token->f_len;
4976 		n_chars = 0;
4977 
4978 		while (next < end) {
4979 			char_len = my_mbcharlen_ptr(cs, next, end);
4980 
4981 			if (next + char_len > end || char_len == 0) {
4982 				break;
4983 			} else {
4984 				/* Skip SPACE */
4985 				if (char_len == 1 && *next == ' ') {
4986 					start = next + 1;
4987 					next = start;
4988 					n_chars = 0;
4989 
4990 					continue;
4991 				}
4992 
4993 				next += char_len;
4994 				n_chars++;
4995 			}
4996 
4997 			if (n_chars == ngram_token_size) {
4998 				fts_string_t	ngram_token;
4999 				ngram_token.f_str =
5000 					reinterpret_cast<byte*>(
5001 					const_cast<char*>(start));
5002 				ngram_token.f_len = next - start;
5003 				ngram_token.f_n_char = ngram_token_size;
5004 
5005 				ib_rbt_bound_t  parent;
5006 				if (rbt_search(stopwords, &parent,
5007 					       &ngram_token) == 0) {
5008 					return(false);
5009 				}
5010 
5011 				/* Move a char forward */
5012 				start += my_mbcharlen_ptr(cs, start, end);
5013 				n_chars = ngram_token_size - 1;
5014 			}
5015 		}
5016 	}
5017 
5018 	return(true);
5019 }
5020 
5021 /** Add the token and its start position to the token's list of positions.
5022 @param[in,out]	result_doc	result doc rb tree
5023 @param[in]	str		token string
5024 @param[in]	position	token position */
5025 static
5026 void
fts_add_token(fts_doc_t * result_doc,fts_string_t str,ulint position)5027 fts_add_token(
5028 	fts_doc_t*	result_doc,
5029 	fts_string_t	str,
5030 	ulint		position)
5031 {
5032 	/* Ignore string whose character number is less than
5033 	"fts_min_token_size" or more than "fts_max_token_size" */
5034 
5035 	ut_ad(current_thd != NULL);
5036 	if (fts_check_token(&str, NULL, result_doc->is_ngram,
5037 			    result_doc->charset,
5038 			    thd_has_ft_ignore_stopwords(current_thd))) {
5039 
5040 		mem_heap_t*	heap;
5041 		fts_string_t	t_str;
5042 		fts_token_t*	token;
5043 		ib_rbt_bound_t	parent;
5044 		ulint		newlen;
5045 
5046 		heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
5047 
5048 		t_str.f_n_char = str.f_n_char;
5049 
5050 		t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
5051 
5052 		t_str.f_str = static_cast<byte*>(
5053 			mem_heap_alloc(heap, t_str.f_len));
5054 
5055 		/* For binary collations, a case sensitive search is
5056 		performed. Hence don't convert to lower case. */
5057 		if (my_binary_compare(result_doc->charset)) {
5058 			memcpy(t_str.f_str, str.f_str, str.f_len);
5059 			t_str.f_str[str.f_len]= 0;
5060 			newlen= str.f_len;
5061 		} else {
5062 			newlen = innobase_fts_casedn_str(
5063 				result_doc->charset, (char*) str.f_str, str.f_len,
5064 				(char*) t_str.f_str, t_str.f_len);
5065 		}
5066 
5067 		t_str.f_len = newlen;
5068 		t_str.f_str[newlen] = 0;
5069 
5070 		/* Add the word to the document statistics. If the word
5071 		hasn't been seen before we create a new entry for it. */
5072 		if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
5073 			fts_token_t	new_token;
5074 
5075 			new_token.text.f_len = newlen;
5076 			new_token.text.f_str = t_str.f_str;
5077 			new_token.text.f_n_char = t_str.f_n_char;
5078 
5079 			new_token.positions = ib_vector_create(
5080 				result_doc->self_heap, sizeof(ulint), 32);
5081 
5082 			parent.last = rbt_add_node(
5083 				result_doc->tokens, &parent, &new_token);
5084 
5085 			ut_ad(rbt_validate(result_doc->tokens));
5086 		}
5087 
5088 		token = rbt_value(fts_token_t, parent.last);
5089 		ib_vector_push(token->positions, &position);
5090 	}
5091 }
5092 
5093 /********************************************************************
5094 Process next token from document starting at the given position, i.e., add
5095 the token's start position to the token's list of positions.
5096 @return number of characters handled in this call */
5097 static
5098 ulint
fts_process_token(fts_doc_t * doc,fts_doc_t * result,ulint start_pos,ulint add_pos)5099 fts_process_token(
5100 /*==============*/
5101 	fts_doc_t*	doc,		/* in/out: document to
5102 					tokenize */
5103 	fts_doc_t*	result,		/* out: if provided, save
5104 					result here */
5105 	ulint		start_pos,	/*!< in: start position in text */
5106 	ulint		add_pos)	/*!< in: add this position to all
5107 					tokens from this tokenization */
5108 {
5109 	ulint		ret;
5110 	fts_string_t	str;
5111 	ulint		position;
5112 	fts_doc_t*	result_doc;
5113 	byte		buf[FTS_MAX_WORD_LEN + 1];
5114 
5115 	str.f_str = buf;
5116 
5117 	/* Determine where to save the result. */
5118 	result_doc = (result != NULL) ? result : doc;
5119 
5120 	/* The length of a string in characters is set here only. */
5121 
5122 	ret = innobase_mysql_fts_get_token(
5123 		doc->charset, doc->text.f_str + start_pos,
5124 		doc->text.f_str + doc->text.f_len, false, &str);
5125 
5126 	position = start_pos + ret - str.f_len + add_pos;
5127 
5128 	fts_add_token(result_doc, str, position);
5129 
5130 	return(ret);
5131 }
5132 
5133 /*************************************************************//**
5134 Get token char size by charset
5135 @return token size */
5136 ulint
fts_get_token_size(const CHARSET_INFO * cs,const char * token,ulint len)5137 fts_get_token_size(
5138 /*===============*/
5139 	const CHARSET_INFO*	cs,	/*!< in: Character set */
5140 	const char*		token,	/*!< in: token */
5141 	ulint			len)	/*!< in: token length */
5142 {
5143 	char*	start;
5144 	char*	end;
5145 	ulint	size = 0;
5146 
5147 	/* const_cast is for reinterpret_cast below, or it will fail. */
5148 	start = const_cast<char*>(token);
5149 	end = start + len;
5150 	while (start < end) {
5151 		int	ctype;
5152 		int	mbl;
5153 
5154 		mbl = cs->cset->ctype(
5155 			cs, &ctype,
5156 			reinterpret_cast<uchar*>(start),
5157 			reinterpret_cast<uchar*>(end));
5158 
5159 		size++;
5160 
5161 		start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
5162 	}
5163 
5164 	return(size);
5165 }
5166 
5167 /*************************************************************//**
5168 FTS plugin parser 'myql_parser' callback function for document tokenize.
5169 Refer to 'st_mysql_ftparser_param' for more detail.
5170 @return always returns 0 */
5171 int
fts_tokenize_document_internal(MYSQL_FTPARSER_PARAM * param,char * doc,int len)5172 fts_tokenize_document_internal(
5173 /*===========================*/
5174 	MYSQL_FTPARSER_PARAM*	param,	/*!< in: parser parameter */
5175 	char*			doc,	/*!< in/out: document */
5176 	int			len)	/*!< in: document length */
5177 {
5178 	fts_string_t	str;
5179 	byte		buf[FTS_MAX_WORD_LEN + 1];
5180 	MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
5181 		{ FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 };
5182 
5183 	ut_ad(len >= 0);
5184 
5185 	str.f_str = buf;
5186 
5187 	for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
5188 		inc = innobase_mysql_fts_get_token(
5189 			const_cast<CHARSET_INFO*>(param->cs),
5190 			reinterpret_cast<byte*>(doc) + i,
5191 			reinterpret_cast<byte*>(doc) + len, false,
5192 			&str);
5193 
5194 		if (str.f_len > 0) {
5195 			bool_info.position =
5196 				static_cast<int>(i + inc - str.f_len);
5197 			ut_ad(bool_info.position >= 0);
5198 
5199 			/* Stop when add word fails */
5200 			if (param->mysql_add_word(
5201 				param,
5202 				reinterpret_cast<char*>(str.f_str),
5203 				static_cast<int>(str.f_len),
5204 				&bool_info)) {
5205 				break;
5206 			}
5207 		}
5208 	}
5209 
5210 	return(0);
5211 }
5212 
5213 /******************************************************************//**
5214 FTS plugin parser 'myql_add_word' callback function for document tokenize.
5215 Refer to 'st_mysql_ftparser_param' for more detail.
5216 @return always returns 0 */
5217 static
5218 int
fts_tokenize_add_word_for_parser(MYSQL_FTPARSER_PARAM * param,char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO * boolean_info)5219 fts_tokenize_add_word_for_parser(
5220 /*=============================*/
5221 	MYSQL_FTPARSER_PARAM*	param,		/* in: parser paramter */
5222 	char*			word,		/* in: token word */
5223 	int			word_len,	/* in: word len */
5224 	MYSQL_FTPARSER_BOOLEAN_INFO* boolean_info) /* in: word boolean info */
5225 {
5226 	fts_string_t	str;
5227 	fts_tokenize_param_t*	fts_param;
5228 	fts_doc_t*	result_doc;
5229 	ulint		position;
5230 
5231 	fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam);
5232 	result_doc = fts_param->result_doc;
5233 	ut_ad(result_doc != NULL);
5234 
5235 	str.f_str = reinterpret_cast<byte*>(word);
5236 	str.f_len = word_len;
5237 	str.f_n_char = fts_get_token_size(
5238 		const_cast<CHARSET_INFO*>(param->cs), word, word_len);
5239 
5240 	ut_ad(boolean_info->position >= 0);
5241 	position = boolean_info->position + fts_param->add_pos;
5242 
5243 	fts_add_token(result_doc, str, position);
5244 
5245 	return(0);
5246 }
5247 
5248 /******************************************************************//**
5249 Parse a document using an external / user supplied parser */
5250 static
5251 void
fts_tokenize_by_parser(fts_doc_t * doc,st_mysql_ftparser * parser,fts_tokenize_param_t * fts_param)5252 fts_tokenize_by_parser(
5253 /*===================*/
5254 	fts_doc_t*		doc,	/* in/out: document to tokenize */
5255 	st_mysql_ftparser*	parser, /* in: plugin fts parser */
5256 	fts_tokenize_param_t*	fts_param) /* in: fts tokenize param */
5257 {
5258 	MYSQL_FTPARSER_PARAM	param;
5259 
5260 	ut_a(parser);
5261 
5262 	/* Set paramters for param */
5263 	param.mysql_parse = fts_tokenize_document_internal;
5264 	param.mysql_add_word = fts_tokenize_add_word_for_parser;
5265 	param.mysql_ftparam = fts_param;
5266 	param.cs = doc->charset;
5267 	param.doc = reinterpret_cast<char*>(doc->text.f_str);
5268 	param.length = static_cast<int>(doc->text.f_len);
5269 	param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
5270 
5271 	PARSER_INIT(parser, &param);
5272 	parser->parse(&param);
5273 	PARSER_DEINIT(parser, &param);
5274 }
5275 
5276 /******************************************************************//**
5277 Tokenize a document. */
5278 void
fts_tokenize_document(fts_doc_t * doc,fts_doc_t * result,st_mysql_ftparser * parser)5279 fts_tokenize_document(
5280 /*==================*/
5281 	fts_doc_t*	doc,		/* in/out: document to
5282 					tokenize */
5283 	fts_doc_t*	result,		/* out: if provided, save
5284 					the result token here */
5285 	st_mysql_ftparser*	parser) /* in: plugin fts parser */
5286 {
5287 	ut_a(!doc->tokens);
5288 	ut_a(doc->charset);
5289 
5290 	doc->tokens = rbt_create_arg_cmp(
5291 		sizeof(fts_token_t), innobase_fts_text_cmp, doc->charset);
5292 
5293 	if (parser != NULL) {
5294 		fts_tokenize_param_t	fts_param;
5295 
5296 		fts_param.result_doc = (result != NULL) ? result : doc;
5297 		fts_param.add_pos = 0;
5298 
5299 		fts_tokenize_by_parser(doc, parser, &fts_param);
5300 	} else {
5301 		ulint		inc;
5302 
5303 		for (ulint i = 0; i < doc->text.f_len; i += inc) {
5304 			inc = fts_process_token(doc, result, i, 0);
5305 			ut_a(inc > 0);
5306 		}
5307 	}
5308 }
5309 
5310 /******************************************************************//**
5311 Continue to tokenize a document. */
5312 void
fts_tokenize_document_next(fts_doc_t * doc,ulint add_pos,fts_doc_t * result,st_mysql_ftparser * parser)5313 fts_tokenize_document_next(
5314 /*=======================*/
5315 	fts_doc_t*	doc,		/*!< in/out: document to
5316 					tokenize */
5317 	ulint		add_pos,	/*!< in: add this position to all
5318 					tokens from this tokenization */
5319 	fts_doc_t*	result,		/*!< out: if provided, save
5320 					the result token here */
5321 	st_mysql_ftparser*	parser) /* in: plugin fts parser */
5322 {
5323 	ut_a(doc->tokens);
5324 
5325 	if (parser) {
5326 		fts_tokenize_param_t	fts_param;
5327 
5328 		fts_param.result_doc = (result != NULL) ? result : doc;
5329 		fts_param.add_pos = add_pos;
5330 
5331 		fts_tokenize_by_parser(doc, parser, &fts_param);
5332 	} else {
5333 		ulint		inc;
5334 
5335 		for (ulint i = 0; i < doc->text.f_len; i += inc) {
5336 			inc = fts_process_token(doc, result, i, add_pos);
5337 			ut_a(inc > 0);
5338 		}
5339 	}
5340 }
5341 
5342 /********************************************************************
5343 Create the vector of fts_get_doc_t instances. */
5344 ib_vector_t*
fts_get_docs_create(fts_cache_t * cache)5345 fts_get_docs_create(
5346 /*================*/
5347 						/* out: vector of
5348 						fts_get_doc_t instances */
5349 	fts_cache_t*	cache)			/*!< in: fts cache */
5350 {
5351 	ib_vector_t*	get_docs;
5352 
5353 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
5354 
5355 	/* We need one instance of fts_get_doc_t per index. */
5356 	get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
5357 
5358 	/* Create the get_doc instance, we need one of these
5359 	per FTS index. */
5360 	for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
5361 
5362 		dict_index_t**	index;
5363 		fts_get_doc_t*	get_doc;
5364 
5365 		index = static_cast<dict_index_t**>(
5366 			ib_vector_get(cache->indexes, i));
5367 
5368 		get_doc = static_cast<fts_get_doc_t*>(
5369 			ib_vector_push(get_docs, NULL));
5370 
5371 		memset(get_doc, 0x0, sizeof(*get_doc));
5372 
5373 		get_doc->index_cache = fts_get_index_cache(cache, *index);
5374 		get_doc->cache = cache;
5375 
5376 		/* Must find the index cache. */
5377 		ut_a(get_doc->index_cache != NULL);
5378 	}
5379 
5380 	return(get_docs);
5381 }
5382 
5383 /********************************************************************
5384 Release any resources held by the fts_get_doc_t instances. */
5385 static
5386 void
fts_get_docs_clear(ib_vector_t * get_docs)5387 fts_get_docs_clear(
5388 /*===============*/
5389 	ib_vector_t*	get_docs)		/*!< in: Doc retrieval vector */
5390 {
5391 	ulint		i;
5392 
5393 	/* Release the get doc graphs if any. */
5394 	for (i = 0; i < ib_vector_size(get_docs); ++i) {
5395 
5396 		fts_get_doc_t*	get_doc = static_cast<fts_get_doc_t*>(
5397 			ib_vector_get(get_docs, i));
5398 
5399 		if (get_doc->get_document_graph != NULL) {
5400 
5401 			ut_a(get_doc->index_cache);
5402 
5403 			fts_que_graph_free(get_doc->get_document_graph);
5404 			get_doc->get_document_graph = NULL;
5405 		}
5406 	}
5407 }
5408 
5409 /*********************************************************************//**
5410 Get the initial Doc ID by consulting the CONFIG table
5411 @return initial Doc ID */
5412 doc_id_t
fts_init_doc_id(const dict_table_t * table)5413 fts_init_doc_id(
5414 /*============*/
5415 	const dict_table_t*	table)		/*!< in: table */
5416 {
5417 	doc_id_t	max_doc_id = 0;
5418 
5419 	rw_lock_x_lock(&table->fts->cache->lock);
5420 
5421 	/* Return if the table is already initialized for DOC ID */
5422 	if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
5423 		rw_lock_x_unlock(&table->fts->cache->lock);
5424 		return(0);
5425 	}
5426 
5427 	DEBUG_SYNC_C("fts_initialize_doc_id");
5428 
5429 	/* Then compare this value with the ID value stored in the CONFIG
5430 	table. The larger one will be our new initial Doc ID */
5431 	fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
5432 
5433 	/* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
5434 	creating index (and add doc id column. No need to recovery
5435 	documents */
5436 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
5437 		fts_init_index((dict_table_t*) table, TRUE);
5438 	}
5439 
5440 	table->fts->fts_status |= ADDED_TABLE_SYNCED;
5441 
5442 	table->fts->cache->first_doc_id = max_doc_id;
5443 
5444 	rw_lock_x_unlock(&table->fts->cache->lock);
5445 
5446 	ut_ad(max_doc_id > 0);
5447 
5448 	return(max_doc_id);
5449 }
5450 
5451 #ifdef FTS_MULT_INDEX
5452 /*********************************************************************//**
5453 Check if the index is in the affected set.
5454 @return TRUE if index is updated */
5455 static
5456 ibool
fts_is_index_updated(const ib_vector_t * fts_indexes,const fts_get_doc_t * get_doc)5457 fts_is_index_updated(
5458 /*=================*/
5459 	const ib_vector_t*	fts_indexes,	/*!< in: affected FTS indexes */
5460 	const fts_get_doc_t*	get_doc)	/*!< in: info for reading
5461 						document */
5462 {
5463 	ulint		i;
5464 	dict_index_t*	index = get_doc->index_cache->index;
5465 
5466 	for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
5467 		const dict_index_t*	updated_fts_index;
5468 
5469 		updated_fts_index = static_cast<const dict_index_t*>(
5470 			ib_vector_getp_const(fts_indexes, i));
5471 
5472 		ut_a(updated_fts_index != NULL);
5473 
5474 		if (updated_fts_index == index) {
5475 			return(TRUE);
5476 		}
5477 	}
5478 
5479 	return(FALSE);
5480 }
5481 #endif
5482 
5483 /*********************************************************************//**
5484 Fetch COUNT(*) from specified table.
5485 @return the number of rows in the table */
5486 ulint
fts_get_rows_count(fts_table_t * fts_table)5487 fts_get_rows_count(
5488 /*===============*/
5489 	fts_table_t*	fts_table)	/*!< in: fts table to read */
5490 {
5491 	trx_t*		trx;
5492 	pars_info_t*	info;
5493 	que_t*		graph;
5494 	dberr_t		error;
5495 	ulint		count = 0;
5496 	char		table_name[MAX_FULL_NAME_LEN];
5497 
5498 	trx = trx_allocate_for_background();
5499 
5500 	trx->op_info = "fetching FT table rows count";
5501 
5502 	info = pars_info_create();
5503 
5504 	pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
5505 
5506 	fts_get_table_name(fts_table, table_name);
5507 	pars_info_bind_id(info, true, "table_name", table_name);
5508 
5509 	graph = fts_parse_sql(
5510 		fts_table,
5511 		info,
5512 		"DECLARE FUNCTION my_func;\n"
5513 		"DECLARE CURSOR c IS"
5514 		" SELECT COUNT(*)"
5515 		" FROM $table_name;\n"
5516 		"BEGIN\n"
5517 		"\n"
5518 		"OPEN c;\n"
5519 		"WHILE 1 = 1 LOOP\n"
5520 		"  FETCH c INTO my_func();\n"
5521 		"  IF c % NOTFOUND THEN\n"
5522 		"    EXIT;\n"
5523 		"  END IF;\n"
5524 		"END LOOP;\n"
5525 		"CLOSE c;");
5526 
5527 	for (;;) {
5528 		error = fts_eval_sql(trx, graph);
5529 
5530 		if (error == DB_SUCCESS) {
5531 			fts_sql_commit(trx);
5532 
5533 			break;				/* Exit the loop. */
5534 		} else {
5535 			fts_sql_rollback(trx);
5536 
5537 			if (error == DB_LOCK_WAIT_TIMEOUT) {
5538 				ib::warn() << "lock wait timeout reading"
5539 					" FTS table. Retrying!";
5540 
5541 				trx->error_state = DB_SUCCESS;
5542 			} else {
5543 				ib::error() << "(" << ut_strerr(error)
5544 					<< ") while reading FTS table.";
5545 
5546 				break;			/* Exit the loop. */
5547 			}
5548 		}
5549 	}
5550 
5551 	fts_que_graph_free(graph);
5552 
5553 	trx_free_for_background(trx);
5554 
5555 	return(count);
5556 }
5557 
5558 #ifdef FTS_CACHE_SIZE_DEBUG
5559 /*********************************************************************//**
5560 Read the max cache size parameter from the config table. */
5561 static
5562 void
fts_update_max_cache_size(fts_sync_t * sync)5563 fts_update_max_cache_size(
5564 /*======================*/
5565 	fts_sync_t*	sync)			/*!< in: sync state */
5566 {
5567 	trx_t*		trx;
5568 	fts_table_t	fts_table;
5569 
5570 	trx = trx_allocate_for_background();
5571 
5572 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
5573 
5574 	/* The size returned is in bytes. */
5575 	sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
5576 
5577 	fts_sql_commit(trx);
5578 
5579 	trx_free_for_background(trx);
5580 }
5581 #endif /* FTS_CACHE_SIZE_DEBUG */
5582 
5583 /*********************************************************************//**
5584 Free the modified rows of a table. */
5585 UNIV_INLINE
5586 void
fts_trx_table_rows_free(ib_rbt_t * rows)5587 fts_trx_table_rows_free(
5588 /*====================*/
5589 	ib_rbt_t*	rows)			/*!< in: rbt of rows to free */
5590 {
5591 	const ib_rbt_node_t*	node;
5592 
5593 	for (node = rbt_first(rows); node; node = rbt_first(rows)) {
5594 		fts_trx_row_t*	row;
5595 
5596 		row = rbt_value(fts_trx_row_t, node);
5597 
5598 		if (row->fts_indexes != NULL) {
5599 			/* This vector shouldn't be using the
5600 			heap allocator.  */
5601 			ut_a(row->fts_indexes->allocator->arg == NULL);
5602 
5603 			ib_vector_free(row->fts_indexes);
5604 			row->fts_indexes = NULL;
5605 		}
5606 
5607 		ut_free(rbt_remove_node(rows, node));
5608 	}
5609 
5610 	ut_a(rbt_empty(rows));
5611 	rbt_free(rows);
5612 }
5613 
5614 /*********************************************************************//**
5615 Free an FTS savepoint instance. */
5616 UNIV_INLINE
5617 void
fts_savepoint_free(fts_savepoint_t * savepoint)5618 fts_savepoint_free(
5619 /*===============*/
5620 	fts_savepoint_t*	savepoint)	/*!< in: savepoint instance */
5621 {
5622 	const ib_rbt_node_t*	node;
5623 	ib_rbt_t*		tables = savepoint->tables;
5624 
5625 	/* Nothing to free! */
5626 	if (tables == NULL) {
5627 		return;
5628 	}
5629 
5630 	for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5631 		fts_trx_table_t*	ftt;
5632 		fts_trx_table_t**	fttp;
5633 
5634 		fttp = rbt_value(fts_trx_table_t*, node);
5635 		ftt = *fttp;
5636 
5637 		/* This can be NULL if a savepoint was released. */
5638 		if (ftt->rows != NULL) {
5639 			fts_trx_table_rows_free(ftt->rows);
5640 			ftt->rows = NULL;
5641 		}
5642 
5643 		/* This can be NULL if a savepoint was released. */
5644 		if (ftt->added_doc_ids != NULL) {
5645 			fts_doc_ids_free(ftt->added_doc_ids);
5646 			ftt->added_doc_ids = NULL;
5647 		}
5648 
5649 		/* The default savepoint name must be NULL. */
5650 		if (ftt->docs_added_graph) {
5651 			fts_que_graph_free(ftt->docs_added_graph);
5652 		}
5653 
5654 		/* NOTE: We are responsible for free'ing the node */
5655 		ut_free(rbt_remove_node(tables, node));
5656 	}
5657 
5658 	ut_a(rbt_empty(tables));
5659 	rbt_free(tables);
5660 	savepoint->tables = NULL;
5661 }
5662 
5663 /*********************************************************************//**
5664 Free an FTS trx. */
5665 void
fts_trx_free(fts_trx_t * fts_trx)5666 fts_trx_free(
5667 /*=========*/
5668 	fts_trx_t*	fts_trx)		/* in, own: FTS trx */
5669 {
5670 	ulint		i;
5671 
5672 	for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5673 		fts_savepoint_t*	savepoint;
5674 
5675 		savepoint = static_cast<fts_savepoint_t*>(
5676 			ib_vector_get(fts_trx->savepoints, i));
5677 
5678 		/* The default savepoint name must be NULL. */
5679 		if (i == 0) {
5680 			ut_a(savepoint->name == NULL);
5681 		}
5682 
5683 		fts_savepoint_free(savepoint);
5684 	}
5685 
5686 	for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5687 		fts_savepoint_t*	savepoint;
5688 
5689 		savepoint = static_cast<fts_savepoint_t*>(
5690 			ib_vector_get(fts_trx->last_stmt, i));
5691 
5692 		/* The default savepoint name must be NULL. */
5693 		if (i == 0) {
5694 			ut_a(savepoint->name == NULL);
5695 		}
5696 
5697 		fts_savepoint_free(savepoint);
5698 	}
5699 
5700 	if (fts_trx->heap) {
5701 		mem_heap_free(fts_trx->heap);
5702 	}
5703 }
5704 
5705 /*********************************************************************//**
5706 Extract the doc id from the FTS hidden column.
5707 @return doc id that was extracted from rec */
5708 doc_id_t
fts_get_doc_id_from_row(dict_table_t * table,dtuple_t * row)5709 fts_get_doc_id_from_row(
5710 /*====================*/
5711 	dict_table_t*	table,			/*!< in: table */
5712 	dtuple_t*	row)			/*!< in: row whose FTS doc id we
5713 						want to extract.*/
5714 {
5715 	dfield_t*	field;
5716 	doc_id_t	doc_id = 0;
5717 
5718 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5719 
5720 	field = dtuple_get_nth_field(row, table->fts->doc_col);
5721 
5722 	ut_a(dfield_get_len(field) == sizeof(doc_id));
5723 	ut_a(dfield_get_type(field)->mtype == DATA_INT);
5724 
5725 	doc_id = fts_read_doc_id(
5726 		static_cast<const byte*>(dfield_get_data(field)));
5727 
5728 	return(doc_id);
5729 }
5730 
5731 /** Extract the doc id from the record that belongs to index.
5732 @param[in]	table	table
5733 @param[in]	rec	record contains FTS_DOC_ID
5734 @param[in]	index	index of rec
5735 @param[in]	heap	heap memory
5736 @return doc id that was extracted from rec */
5737 doc_id_t
fts_get_doc_id_from_rec(dict_table_t * table,const rec_t * rec,const dict_index_t * index,mem_heap_t * heap)5738 fts_get_doc_id_from_rec(
5739 	dict_table_t*		table,
5740 	const rec_t*		rec,
5741 	const dict_index_t*	index,
5742 	mem_heap_t*		heap)
5743 {
5744 	ulint		len;
5745 	const byte*	data;
5746 	ulint		col_no;
5747 	doc_id_t	doc_id = 0;
5748 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
5749 	ulint*		offsets = offsets_;
5750 	mem_heap_t*	my_heap = heap;
5751 
5752 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5753 
5754 	rec_offs_init(offsets_);
5755 
5756 	offsets = rec_get_offsets(
5757 		rec, index, offsets, ULINT_UNDEFINED, &my_heap);
5758 
5759 	col_no = dict_col_get_index_pos(
5760 		&table->cols[table->fts->doc_col], index);
5761 
5762 	ut_ad(col_no != ULINT_UNDEFINED);
5763 
5764 	data = rec_get_nth_field(rec, offsets, col_no, &len);
5765 
5766 	ut_a(len == 8);
5767 	ut_ad(8 == sizeof(doc_id));
5768 	doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
5769 
5770 	if (my_heap && !heap) {
5771 		mem_heap_free(my_heap);
5772 	}
5773 
5774 	return(doc_id);
5775 }
5776 
5777 /*********************************************************************//**
5778 Search the index specific cache for a particular FTS index.
5779 @return the index specific cache else NULL */
5780 fts_index_cache_t*
fts_find_index_cache(const fts_cache_t * cache,const dict_index_t * index)5781 fts_find_index_cache(
5782 /*=================*/
5783 	const fts_cache_t*	cache,		/*!< in: cache to search */
5784 	const dict_index_t*	index)		/*!< in: index to search for */
5785 {
5786 	/* We cast away the const because our internal function, takes
5787 	non-const cache arg and returns a non-const pointer. */
5788 	return(static_cast<fts_index_cache_t*>(
5789 		fts_get_index_cache((fts_cache_t*) cache, index)));
5790 }
5791 
5792 /*********************************************************************//**
5793 Search cache for word.
5794 @return the word node vector if found else NULL */
5795 const ib_vector_t*
fts_cache_find_word(const fts_index_cache_t * index_cache,const fts_string_t * text)5796 fts_cache_find_word(
5797 /*================*/
5798 	const fts_index_cache_t*index_cache,	/*!< in: cache to search */
5799 	const fts_string_t*	text)		/*!< in: word to search for */
5800 {
5801 	ib_rbt_bound_t		parent;
5802 	const ib_vector_t*	nodes = NULL;
5803 #ifdef UNIV_DEBUG
5804 	dict_table_t*		table = index_cache->index->table;
5805 	fts_cache_t*		cache = table->fts->cache;
5806 
5807 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
5808 #endif /* UNIV_DEBUG */
5809 
5810 	/* Lookup the word in the rb tree */
5811 	if (rbt_search(index_cache->words, &parent, text) == 0) {
5812 		const fts_tokenizer_word_t*	word;
5813 
5814 		word = rbt_value(fts_tokenizer_word_t, parent.last);
5815 
5816 		nodes = word->nodes;
5817 	}
5818 
5819 	return(nodes);
5820 }
5821 
5822 /*********************************************************************//**
5823 Check cache for deleted doc id.
5824 @return TRUE if deleted */
5825 ibool
fts_cache_is_deleted_doc_id(const fts_cache_t * cache,doc_id_t doc_id)5826 fts_cache_is_deleted_doc_id(
5827 /*========================*/
5828 	const fts_cache_t*	cache,		/*!< in: cache ito search */
5829 	doc_id_t		doc_id)		/*!< in: doc id to search for */
5830 {
5831 	ut_ad(mutex_own(&cache->deleted_lock));
5832 
5833 	for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5834 		const fts_update_t*	update;
5835 
5836 		update = static_cast<const fts_update_t*>(
5837 			ib_vector_get_const(cache->deleted_doc_ids, i));
5838 
5839 		if (doc_id == update->doc_id) {
5840 
5841 			return(TRUE);
5842 		}
5843 	}
5844 
5845 	return(FALSE);
5846 }
5847 
5848 /*********************************************************************//**
5849 Append deleted doc ids to vector. */
5850 void
fts_cache_append_deleted_doc_ids(const fts_cache_t * cache,ib_vector_t * vector)5851 fts_cache_append_deleted_doc_ids(
5852 /*=============================*/
5853 	const fts_cache_t*	cache,		/*!< in: cache to use */
5854 	ib_vector_t*		vector)		/*!< in: append to this vector */
5855 {
5856 	mutex_enter(const_cast<ib_mutex_t*>(&cache->deleted_lock));
5857 
5858 	if (cache->deleted_doc_ids == NULL) {
5859 		mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5860 		return;
5861 	}
5862 
5863 
5864 	for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5865 		fts_update_t*	update;
5866 
5867 		update = static_cast<fts_update_t*>(
5868 			ib_vector_get(cache->deleted_doc_ids, i));
5869 
5870 		ib_vector_push(vector, &update->doc_id);
5871 	}
5872 
5873 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5874 }
5875 
5876 /*********************************************************************//**
5877 Wait for the background thread to start. We poll to detect change
5878 of state, which is acceptable, since the wait should happen only
5879 once during startup.
5880 @return true if the thread started else FALSE (i.e timed out) */
5881 ibool
fts_wait_for_background_thread_to_start(dict_table_t * table,ulint max_wait)5882 fts_wait_for_background_thread_to_start(
5883 /*====================================*/
5884 	dict_table_t*		table,		/*!< in: table to which the thread
5885 						is attached */
5886 	ulint			max_wait)	/*!< in: time in microseconds, if
5887 						set to 0 then it disables
5888 						timeout checking */
5889 {
5890 	ulint			count = 0;
5891 	ibool			done = FALSE;
5892 
5893 	ut_a(max_wait == 0 || max_wait >= FTS_MAX_BACKGROUND_THREAD_WAIT);
5894 
5895 	for (;;) {
5896 		fts_t*		fts = table->fts;
5897 
5898 		mutex_enter(&fts->bg_threads_mutex);
5899 
5900 		if (fts->fts_status & BG_THREAD_READY) {
5901 
5902 			done = TRUE;
5903 		}
5904 
5905 		mutex_exit(&fts->bg_threads_mutex);
5906 
5907 		if (!done) {
5908 			os_thread_sleep(FTS_MAX_BACKGROUND_THREAD_WAIT);
5909 
5910 			if (max_wait > 0) {
5911 
5912 				max_wait -= FTS_MAX_BACKGROUND_THREAD_WAIT;
5913 
5914 				/* We ignore the residual value. */
5915 				if (max_wait < FTS_MAX_BACKGROUND_THREAD_WAIT) {
5916 					break;
5917 				}
5918 			}
5919 
5920 			++count;
5921 		} else {
5922 			break;
5923 		}
5924 
5925 		if (count >= FTS_BACKGROUND_THREAD_WAIT_COUNT) {
5926 			ib::error() << "The background thread for the FTS"
5927 				" table " << table->name
5928 				<< " refuses to start";
5929 
5930 			count = 0;
5931 		}
5932 	}
5933 
5934 	return(done);
5935 }
5936 
5937 /*********************************************************************//**
5938 Add the FTS document id hidden column. */
5939 void
fts_add_doc_id_column(dict_table_t * table,mem_heap_t * heap)5940 fts_add_doc_id_column(
5941 /*==================*/
5942 	dict_table_t*	table,	/*!< in/out: Table with FTS index */
5943 	mem_heap_t*	heap)	/*!< in: temporary memory heap, or NULL */
5944 {
5945 	dict_mem_table_add_col(
5946 		table, heap,
5947 		FTS_DOC_ID_COL_NAME,
5948 		DATA_INT,
5949 		dtype_form_prtype(
5950 			DATA_NOT_NULL | DATA_UNSIGNED
5951 			| DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
5952 		sizeof(doc_id_t));
5953 	DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5954 }
5955 
5956 /** Add new fts doc id to the update vector.
5957 @param[in]	table		the table that contains the FTS index.
5958 @param[in,out]	ufield		the fts doc id field in the update vector.
5959 				No new memory is allocated for this in this
5960 				function.
5961 @param[in,out]	next_doc_id	the fts doc id that has been added to the
5962 				update vector.  If 0, a new fts doc id is
5963 				automatically generated.  The memory provided
5964 				for this argument will be used by the update
5965 				vector. Ensure that the life time of this
5966 				memory matches that of the update vector.
5967 @return the fts doc id used in the update vector */
5968 doc_id_t
fts_update_doc_id(dict_table_t * table,upd_field_t * ufield,doc_id_t * next_doc_id)5969 fts_update_doc_id(
5970 	dict_table_t*	table,
5971 	upd_field_t*	ufield,
5972 	doc_id_t*	next_doc_id)
5973 {
5974 	doc_id_t	doc_id;
5975 	dberr_t		error = DB_SUCCESS;
5976 
5977 	if (*next_doc_id) {
5978 		doc_id = *next_doc_id;
5979 	} else {
5980 		/* Get the new document id that will be added. */
5981 		error = fts_get_next_doc_id(table, &doc_id);
5982 	}
5983 
5984 	if (error == DB_SUCCESS) {
5985 		dict_index_t*	clust_index;
5986 		dict_col_t*	col = dict_table_get_nth_col(
5987 			table, table->fts->doc_col);
5988 
5989 		ufield->exp = NULL;
5990 
5991 		ufield->new_val.len = sizeof(doc_id);
5992 
5993 		clust_index = dict_table_get_first_index(table);
5994 
5995 		ufield->field_no = dict_col_get_clust_pos(col, clust_index);
5996 		dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
5997 
5998 		/* It is possible we update record that has
5999 		not yet be sync-ed from last crash. */
6000 
6001 		/* Convert to storage byte order. */
6002 		ut_a(doc_id != FTS_NULL_DOC_ID);
6003 		fts_write_doc_id((byte*) next_doc_id, doc_id);
6004 
6005 		ufield->new_val.data = next_doc_id;
6006                 ufield->new_val.ext = 0;
6007 	}
6008 
6009 	return(doc_id);
6010 }
6011 
6012 /*********************************************************************//**
6013 Check if the table has an FTS index. This is the non-inline version
6014 of dict_table_has_fts_index().
6015 @return TRUE if table has an FTS index */
6016 ibool
fts_dict_table_has_fts_index(dict_table_t * table)6017 fts_dict_table_has_fts_index(
6018 /*=========================*/
6019 	dict_table_t*	table)		/*!< in: table */
6020 {
6021 	return(dict_table_has_fts_index(table));
6022 }
6023 
6024 /** fts_t constructor.
6025 @param[in]	table	table with FTS indexes
6026 @param[in,out]	heap	memory heap where 'this' is stored */
fts_t(const dict_table_t * table,mem_heap_t * heap)6027 fts_t::fts_t(
6028 	const dict_table_t*	table,
6029 	mem_heap_t*		heap)
6030 	:
6031 	bg_threads(0),
6032 	fts_status(0),
6033 	add_wq(NULL),
6034 	cache(NULL),
6035 	doc_col(ULINT_UNDEFINED),
6036 	fts_heap(heap)
6037 {
6038 	ut_a(table->fts == NULL);
6039 
6040 	mutex_create(LATCH_ID_FTS_BG_THREADS, &bg_threads_mutex);
6041 
6042 	ib_alloc_t*	heap_alloc = ib_heap_allocator_create(fts_heap);
6043 
6044 	indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
6045 
6046 	dict_table_get_all_fts_indexes(table, indexes);
6047 }
6048 
6049 /** fts_t destructor. */
~fts_t()6050 fts_t::~fts_t()
6051 {
6052 	mutex_free(&bg_threads_mutex);
6053 
6054 	ut_ad(add_wq == NULL);
6055 
6056 	if (cache != NULL) {
6057 		fts_cache_clear(cache);
6058 		fts_cache_destroy(cache);
6059 		cache = NULL;
6060 	}
6061 
6062 	/* There is no need to call ib_vector_free() on this->indexes
6063 	because it is stored in this->fts_heap. */
6064 }
6065 
6066 /*********************************************************************//**
6067 Create an instance of fts_t.
6068 @return instance of fts_t */
6069 fts_t*
fts_create(dict_table_t * table)6070 fts_create(
6071 /*=======*/
6072 	dict_table_t*	table)		/*!< in/out: table with FTS indexes */
6073 {
6074 	fts_t*		fts;
6075 	mem_heap_t*	heap;
6076 
6077 	heap = mem_heap_create(512);
6078 
6079 	fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
6080 
6081 	new(fts) fts_t(table, heap);
6082 
6083 	return(fts);
6084 }
6085 
6086 /*********************************************************************//**
6087 Free the FTS resources. */
6088 void
fts_free(dict_table_t * table)6089 fts_free(
6090 /*=====*/
6091 	dict_table_t*	table)	/*!< in/out: table with FTS indexes */
6092 {
6093 	fts_t*	fts = table->fts;
6094 
6095 	fts->~fts_t();
6096 
6097 	mem_heap_free(fts->fts_heap);
6098 
6099 	table->fts = NULL;
6100 }
6101 
6102 /*********************************************************************//**
6103 Signal FTS threads to initiate shutdown. */
6104 void
fts_start_shutdown(dict_table_t * table,fts_t * fts)6105 fts_start_shutdown(
6106 /*===============*/
6107 	dict_table_t*	table,		/*!< in: table with FTS indexes */
6108 	fts_t*		fts)		/*!< in: fts instance that needs
6109 					to be informed about shutdown */
6110 {
6111 	mutex_enter(&fts->bg_threads_mutex);
6112 
6113 	fts->fts_status |= BG_THREAD_STOP;
6114 
6115 	mutex_exit(&fts->bg_threads_mutex);
6116 
6117 }
6118 
6119 /*********************************************************************//**
6120 Wait for FTS threads to shutdown. */
6121 void
fts_shutdown(dict_table_t * table,fts_t * fts)6122 fts_shutdown(
6123 /*=========*/
6124 	dict_table_t*	table,		/*!< in: table with FTS indexes */
6125 	fts_t*		fts)		/*!< in: fts instance to shutdown */
6126 {
6127 	mutex_enter(&fts->bg_threads_mutex);
6128 
6129 	ut_a(fts->fts_status & BG_THREAD_STOP);
6130 
6131 	dict_table_wait_for_bg_threads_to_exit(table, 20000);
6132 
6133 	mutex_exit(&fts->bg_threads_mutex);
6134 }
6135 
6136 /*********************************************************************//**
6137 Take a FTS savepoint. */
6138 UNIV_INLINE
6139 void
fts_savepoint_copy(const fts_savepoint_t * src,fts_savepoint_t * dst)6140 fts_savepoint_copy(
6141 /*===============*/
6142 	const fts_savepoint_t*	src,	/*!< in: source savepoint */
6143 	fts_savepoint_t*	dst)	/*!< out: destination savepoint */
6144 {
6145 	const ib_rbt_node_t*	node;
6146 	const ib_rbt_t*		tables;
6147 
6148 	tables = src->tables;
6149 
6150 	for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
6151 
6152 		fts_trx_table_t*	ftt_dst;
6153 		const fts_trx_table_t**	ftt_src;
6154 
6155 		ftt_src = rbt_value(const fts_trx_table_t*, node);
6156 
6157 		ftt_dst = fts_trx_table_clone(*ftt_src);
6158 
6159 		rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
6160 	}
6161 }
6162 
6163 /*********************************************************************//**
6164 Take a FTS savepoint. */
6165 void
fts_savepoint_take(trx_t * trx,fts_trx_t * fts_trx,const char * name)6166 fts_savepoint_take(
6167 /*===============*/
6168 	trx_t*		trx,		/*!< in: transaction */
6169 	fts_trx_t*	fts_trx,	/*!< in: fts transaction */
6170 	const char*	name)		/*!< in: savepoint name */
6171 {
6172 	mem_heap_t*		heap;
6173 	fts_savepoint_t*	savepoint;
6174 	fts_savepoint_t*	last_savepoint;
6175 
6176 	ut_a(name != NULL);
6177 
6178 	heap = fts_trx->heap;
6179 
6180 	/* The implied savepoint must exist. */
6181 	ut_a(ib_vector_size(fts_trx->savepoints) > 0);
6182 
6183 	last_savepoint = static_cast<fts_savepoint_t*>(
6184 		ib_vector_last(fts_trx->savepoints));
6185 	savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
6186 
6187 	if (last_savepoint->tables != NULL) {
6188 		fts_savepoint_copy(last_savepoint, savepoint);
6189 	}
6190 }
6191 
6192 /*********************************************************************//**
6193 Lookup a savepoint instance by name.
6194 @return ULINT_UNDEFINED if not found */
6195 UNIV_INLINE
6196 ulint
fts_savepoint_lookup(ib_vector_t * savepoints,const char * name)6197 fts_savepoint_lookup(
6198 /*==================*/
6199 	ib_vector_t*	savepoints,	/*!< in: savepoints */
6200 	const char*	name)		/*!< in: savepoint name */
6201 {
6202 	ulint			i;
6203 
6204 	ut_a(ib_vector_size(savepoints) > 0);
6205 
6206 	for (i = 1; i < ib_vector_size(savepoints); ++i) {
6207 		fts_savepoint_t*	savepoint;
6208 
6209 		savepoint = static_cast<fts_savepoint_t*>(
6210 			ib_vector_get(savepoints, i));
6211 
6212 		if (strcmp(name, savepoint->name) == 0) {
6213 			return(i);
6214 		}
6215 	}
6216 
6217 	return(ULINT_UNDEFINED);
6218 }
6219 
6220 /*********************************************************************//**
6221 Release the savepoint data identified by  name. All savepoints created
6222 after the named savepoint are kept.
6223 @return DB_SUCCESS or error code */
6224 void
fts_savepoint_release(trx_t * trx,const char * name)6225 fts_savepoint_release(
6226 /*==================*/
6227 	trx_t*		trx,		/*!< in: transaction */
6228 	const char*	name)		/*!< in: savepoint name */
6229 {
6230 	ut_a(name != NULL);
6231 
6232 	ib_vector_t*	savepoints = trx->fts_trx->savepoints;
6233 
6234 	ut_a(ib_vector_size(savepoints) > 0);
6235 
6236 	ulint   i = fts_savepoint_lookup(savepoints, name);
6237 	if (i != ULINT_UNDEFINED) {
6238 		ut_a(i >= 1);
6239 
6240 		fts_savepoint_t*        savepoint;
6241 		savepoint = static_cast<fts_savepoint_t*>(
6242 			ib_vector_get(savepoints, i));
6243 
6244 		if (i == ib_vector_size(savepoints) - 1) {
6245 			/* If the savepoint is the last, we save its
6246 			tables to the  previous savepoint. */
6247 			fts_savepoint_t*	prev_savepoint;
6248 			prev_savepoint = static_cast<fts_savepoint_t*>(
6249 				ib_vector_get(savepoints, i - 1));
6250 
6251 			ib_rbt_t*	tables = savepoint->tables;
6252 			savepoint->tables = prev_savepoint->tables;
6253 			prev_savepoint->tables = tables;
6254 		}
6255 
6256 		fts_savepoint_free(savepoint);
6257 		ib_vector_remove(savepoints, *(void**)savepoint);
6258 
6259 		/* Make sure we don't delete the implied savepoint. */
6260 		ut_a(ib_vector_size(savepoints) > 0);
6261 	}
6262 }
6263 
6264 /**********************************************************************//**
6265 Refresh last statement savepoint. */
6266 void
fts_savepoint_laststmt_refresh(trx_t * trx)6267 fts_savepoint_laststmt_refresh(
6268 /*===========================*/
6269 	trx_t*			trx)	/*!< in: transaction */
6270 {
6271 
6272 	fts_trx_t*              fts_trx;
6273 	fts_savepoint_t*        savepoint;
6274 
6275 	fts_trx = trx->fts_trx;
6276 
6277 	savepoint = static_cast<fts_savepoint_t*>(
6278 		ib_vector_pop(fts_trx->last_stmt));
6279 	fts_savepoint_free(savepoint);
6280 
6281 	ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
6282 	savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
6283 }
6284 
6285 /********************************************************************
6286 Undo the Doc ID add/delete operations in last stmt */
6287 static
6288 void
fts_undo_last_stmt(fts_trx_table_t * s_ftt,fts_trx_table_t * l_ftt)6289 fts_undo_last_stmt(
6290 /*===============*/
6291 	fts_trx_table_t*	s_ftt,	/*!< in: Transaction FTS table */
6292 	fts_trx_table_t*	l_ftt)	/*!< in: last stmt FTS table */
6293 {
6294 	ib_rbt_t*		s_rows;
6295 	ib_rbt_t*		l_rows;
6296 	const ib_rbt_node_t*	node;
6297 
6298 	l_rows = l_ftt->rows;
6299 	s_rows = s_ftt->rows;
6300 
6301 	for (node = rbt_first(l_rows);
6302 	     node;
6303 	     node = rbt_next(l_rows, node)) {
6304 		fts_trx_row_t*	l_row = rbt_value(fts_trx_row_t, node);
6305 		ib_rbt_bound_t	parent;
6306 
6307 		rbt_search(s_rows, &parent, &(l_row->doc_id));
6308 
6309 		if (parent.result == 0) {
6310 			fts_trx_row_t*	s_row = rbt_value(
6311 				fts_trx_row_t, parent.last);
6312 
6313 			switch (l_row->state) {
6314 			case FTS_INSERT:
6315 				ut_free(rbt_remove_node(s_rows, parent.last));
6316 				break;
6317 
6318 			case FTS_DELETE:
6319 				if (s_row->state == FTS_NOTHING) {
6320 					s_row->state = FTS_INSERT;
6321 				} else if (s_row->state == FTS_DELETE) {
6322 					ut_free(rbt_remove_node(
6323 						s_rows, parent.last));
6324 				}
6325 				break;
6326 
6327 			/* FIXME: Check if FTS_MODIFY need to be addressed */
6328 			case FTS_MODIFY:
6329 			case FTS_NOTHING:
6330 				break;
6331 			default:
6332 				ut_error;
6333 			}
6334 		}
6335 	}
6336 }
6337 
6338 /**********************************************************************//**
6339 Rollback to savepoint indentified by name.
6340 @return DB_SUCCESS or error code */
6341 void
fts_savepoint_rollback_last_stmt(trx_t * trx)6342 fts_savepoint_rollback_last_stmt(
6343 /*=============================*/
6344 	trx_t*		trx)		/*!< in: transaction */
6345 {
6346 	ib_vector_t*		savepoints;
6347 	fts_savepoint_t*	savepoint;
6348 	fts_savepoint_t*	last_stmt;
6349 	fts_trx_t*		fts_trx;
6350 	ib_rbt_bound_t		parent;
6351 	const ib_rbt_node_t*    node;
6352 	ib_rbt_t*		l_tables;
6353 	ib_rbt_t*		s_tables;
6354 
6355 	fts_trx = trx->fts_trx;
6356 	savepoints = fts_trx->savepoints;
6357 
6358 	savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
6359 	last_stmt = static_cast<fts_savepoint_t*>(
6360 		ib_vector_last(fts_trx->last_stmt));
6361 
6362 	l_tables = last_stmt->tables;
6363 	s_tables = savepoint->tables;
6364 
6365 	for (node = rbt_first(l_tables);
6366 	     node;
6367 	     node = rbt_next(l_tables, node)) {
6368 
6369 		fts_trx_table_t**	l_ftt;
6370 
6371 		l_ftt = rbt_value(fts_trx_table_t*, node);
6372 
6373 		rbt_search_cmp(
6374 			s_tables, &parent, &(*l_ftt)->table->id,
6375 			fts_trx_table_id_cmp, NULL);
6376 
6377 		if (parent.result == 0) {
6378 			fts_trx_table_t**	s_ftt;
6379 
6380 			s_ftt = rbt_value(fts_trx_table_t*, parent.last);
6381 
6382 			fts_undo_last_stmt(*s_ftt, *l_ftt);
6383 		}
6384 	}
6385 }
6386 
6387 /**********************************************************************//**
6388 Rollback to savepoint indentified by name.
6389 @return DB_SUCCESS or error code */
6390 void
fts_savepoint_rollback(trx_t * trx,const char * name)6391 fts_savepoint_rollback(
6392 /*===================*/
6393 	trx_t*		trx,		/*!< in: transaction */
6394 	const char*	name)		/*!< in: savepoint name */
6395 {
6396 	ulint		i;
6397 	ib_vector_t*	savepoints;
6398 
6399 	ut_a(name != NULL);
6400 
6401 	savepoints = trx->fts_trx->savepoints;
6402 
6403 	/* We pop all savepoints from the the top of the stack up to
6404 	and including the instance that was found. */
6405 	i = fts_savepoint_lookup(savepoints, name);
6406 
6407 	if (i != ULINT_UNDEFINED) {
6408 		fts_savepoint_t*	savepoint;
6409 
6410 		ut_a(i > 0);
6411 
6412 		while (ib_vector_size(savepoints) > i) {
6413 			fts_savepoint_t*	savepoint;
6414 
6415 			savepoint = static_cast<fts_savepoint_t*>(
6416 				ib_vector_pop(savepoints));
6417 
6418 			if (savepoint->name != NULL) {
6419 				/* Since name was allocated on the heap, the
6420 				memory will be released when the transaction
6421 				completes. */
6422 				savepoint->name = NULL;
6423 
6424 				fts_savepoint_free(savepoint);
6425 			}
6426 		}
6427 
6428 		/* Pop all a elements from the top of the stack that may
6429 		have been released. We have to be careful that we don't
6430 		delete the implied savepoint. */
6431 
6432 		for (savepoint = static_cast<fts_savepoint_t*>(
6433 				ib_vector_last(savepoints));
6434 		     ib_vector_size(savepoints) > 1
6435 		     && savepoint->name == NULL;
6436 		     savepoint = static_cast<fts_savepoint_t*>(
6437 				ib_vector_last(savepoints))) {
6438 
6439 			ib_vector_pop(savepoints);
6440 		}
6441 
6442 		/* Make sure we don't delete the implied savepoint. */
6443 		ut_a(ib_vector_size(savepoints) > 0);
6444 
6445 		/* Restore the savepoint. */
6446 		fts_savepoint_take(trx, trx->fts_trx, name);
6447 	}
6448 }
6449 
6450 /** Check if a table is an FTS auxiliary table name.
6451 @param[out]	table	FTS table info
6452 @param[in]	name	Table name
6453 @param[in]	len	Length of table name
6454 @return true if the name matches an auxiliary table name pattern */
6455 static
6456 bool
fts_is_aux_table_name(fts_aux_table_t * table,const char * name,ulint len)6457 fts_is_aux_table_name(
6458 	fts_aux_table_t*	table,
6459 	const char*		name,
6460 	ulint			len)
6461 {
6462 	const char*	ptr;
6463 	char*		end;
6464 	char		my_name[MAX_FULL_NAME_LEN + 1];
6465 
6466 	ut_ad(len <= MAX_FULL_NAME_LEN);
6467 	ut_memcpy(my_name, name, len);
6468 	my_name[len] = 0;
6469 	end = my_name + len;
6470 
6471 	ptr = static_cast<const char*>(memchr(my_name, '/', len));
6472 
6473 	if (ptr != NULL) {
6474 		/* We will start the match after the '/' */
6475 		++ptr;
6476 		len = end - ptr;
6477 	}
6478 
6479 	/* All auxiliary tables are prefixed with "FTS_" and the name
6480 	length will be at the very least greater than 20 bytes. */
6481 	if (ptr != NULL && len > 20 && strncmp(ptr, "FTS_", 4) == 0) {
6482 		ulint		i;
6483 
6484 		/* Skip the prefix. */
6485 		ptr += 4;
6486 		len -= 4;
6487 
6488 		/* Try and read the table id. */
6489 		if (!fts_read_object_id(&table->parent_id, ptr)) {
6490 			return(false);
6491 		}
6492 
6493 		/* Skip the table id. */
6494 		ptr = static_cast<const char*>(memchr(ptr, '_', len));
6495 
6496 		if (ptr == NULL) {
6497 			return(false);
6498 		}
6499 
6500 		/* Skip the underscore. */
6501 		++ptr;
6502 		ut_a(end > ptr);
6503 		len = end - ptr;
6504 
6505 		/* First search the common table suffix array. */
6506 		for (i = 0; fts_common_tables[i] != NULL; ++i) {
6507 
6508 			if (strncmp(ptr, fts_common_tables[i], len) == 0) {
6509 				return(true);
6510 			}
6511 		}
6512 
6513 		/* Could be obsolete common tables. */
6514 		if (strncmp(ptr, "ADDED", len) == 0
6515 		    || strncmp(ptr, "STOPWORDS", len) == 0) {
6516 			return(true);
6517 		}
6518 
6519 		/* Try and read the index id. */
6520 		if (!fts_read_object_id(&table->index_id, ptr)) {
6521 			return(false);
6522 		}
6523 
6524 		/* Skip the table id. */
6525 		ptr = static_cast<const char*>(memchr(ptr, '_', len));
6526 
6527 		if (ptr == NULL) {
6528 			return(false);
6529 		}
6530 
6531 		/* Skip the underscore. */
6532 		++ptr;
6533 		ut_a(end > ptr);
6534 		len = end - ptr;
6535 
6536 		/* Search the FT index specific array. */
6537 		for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
6538 
6539 			if (strncmp(ptr, fts_get_suffix(i), len) == 0) {
6540 				return(true);
6541 			}
6542 		}
6543 
6544 		/* Other FT index specific table(s). */
6545 		if (strncmp(ptr, "DOC_ID", len) == 0) {
6546 			return(true);
6547 		}
6548 	}
6549 
6550 	return(false);
6551 }
6552 
6553 /**********************************************************************//**
6554 Callback function to read a single table ID column.
6555 @return Always return TRUE */
6556 static
6557 ibool
fts_read_tables(void * row,void * user_arg)6558 fts_read_tables(
6559 /*============*/
6560 	void*		row,		/*!< in: sel_node_t* */
6561 	void*		user_arg)	/*!< in: pointer to ib_vector_t */
6562 {
6563 	int		i;
6564 	fts_aux_table_t*table;
6565 	mem_heap_t*	heap;
6566 	ibool		done = FALSE;
6567 	ib_vector_t*	tables = static_cast<ib_vector_t*>(user_arg);
6568 	sel_node_t*	sel_node = static_cast<sel_node_t*>(row);
6569 	que_node_t*	exp = sel_node->select_list;
6570 
6571 	/* Must be a heap allocated vector. */
6572 	ut_a(tables->allocator->arg != NULL);
6573 
6574 	/* We will use this heap for allocating strings. */
6575 	heap = static_cast<mem_heap_t*>(tables->allocator->arg);
6576 	table = static_cast<fts_aux_table_t*>(ib_vector_push(tables, NULL));
6577 
6578 	memset(table, 0x0, sizeof(*table));
6579 
6580 	/* Iterate over the columns and read the values. */
6581 	for (i = 0; exp && !done; exp = que_node_get_next(exp), ++i) {
6582 
6583 		dfield_t*	dfield = que_node_get_val(exp);
6584 		void*		data = dfield_get_data(dfield);
6585 		ulint		len = dfield_get_len(dfield);
6586 
6587 		ut_a(len != UNIV_SQL_NULL);
6588 
6589 		/* Note: The column numbers below must match the SELECT */
6590 		switch (i) {
6591 		case 0: /* NAME */
6592 
6593 			if (!fts_is_aux_table_name(
6594 				table, static_cast<const char*>(data), len)) {
6595 				ib_vector_pop(tables);
6596 				done = TRUE;
6597 				break;
6598 			}
6599 
6600 			table->name = static_cast<char*>(
6601 				mem_heap_alloc(heap, len + 1));
6602 			memcpy(table->name, data, len);
6603 			table->name[len] = 0;
6604 			break;
6605 
6606 		case 1: /* ID */
6607 			ut_a(len == 8);
6608 			table->id = mach_read_from_8(
6609 				static_cast<const byte*>(data));
6610 			break;
6611 
6612 		default:
6613 			ut_error;
6614 		}
6615 	}
6616 
6617 	return(TRUE);
6618 }
6619 
6620 /******************************************************************//**
6621 Callback that sets a hex formatted FTS table's flags2 in
6622 SYS_TABLES. The flags is stored in MIX_LEN column.
6623 @return FALSE if all OK */
6624 static
6625 ibool
fts_set_hex_format(void * row,void * user_arg)6626 fts_set_hex_format(
6627 /*===============*/
6628 	void*		row,		/*!< in: sel_node_t* */
6629 	void*		user_arg)	/*!< in: bool set/unset flag */
6630 {
6631 	sel_node_t*	node = static_cast<sel_node_t*>(row);
6632 	dfield_t*	dfield = que_node_get_val(node->select_list);
6633 
6634 	ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT);
6635 	ut_ad(dfield_get_len(dfield) == sizeof(ib_uint32_t));
6636 	/* There should be at most one matching record. So the value
6637 	must be the default value. */
6638 	ut_ad(mach_read_from_4(static_cast<byte*>(user_arg))
6639 	      == ULINT32_UNDEFINED);
6640 
6641 	ulint		flags2 = mach_read_from_4(
6642 			static_cast<byte*>(dfield_get_data(dfield)));
6643 
6644 	flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
6645 
6646 	mach_write_to_4(static_cast<byte*>(user_arg), flags2);
6647 
6648 	return(FALSE);
6649 }
6650 
6651 /*****************************************************************//**
6652 Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES.
6653 @return DB_SUCCESS or error code. */
6654 dberr_t
fts_update_hex_format_flag(trx_t * trx,table_id_t table_id,bool dict_locked)6655 fts_update_hex_format_flag(
6656 /*=======================*/
6657 	trx_t*		trx,		/*!< in/out: transaction that
6658 					covers the update */
6659 	table_id_t	table_id,	/*!< in: Table for which we want
6660 					to set the root table->flags2 */
6661 	bool		dict_locked)	/*!< in: set to true if the
6662 					caller already owns the
6663 					dict_sys_t::mutex. */
6664 {
6665 	pars_info_t*		info;
6666 	ib_uint32_t		flags2;
6667 
6668 	static const char	sql[] =
6669 		"PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n"
6670 		"DECLARE FUNCTION my_func;\n"
6671 		"DECLARE CURSOR c IS\n"
6672 		" SELECT MIX_LEN"
6673 		" FROM SYS_TABLES"
6674 		" WHERE ID = :table_id FOR UPDATE;"
6675 		"\n"
6676 		"BEGIN\n"
6677 		"OPEN c;\n"
6678 		"WHILE 1 = 1 LOOP\n"
6679 		"  FETCH c INTO my_func();\n"
6680 		"  IF c % NOTFOUND THEN\n"
6681 		"    EXIT;\n"
6682 		"  END IF;\n"
6683 		"END LOOP;\n"
6684 		"UPDATE SYS_TABLES"
6685 		" SET MIX_LEN = :flags2"
6686 		" WHERE ID = :table_id;\n"
6687 		"CLOSE c;\n"
6688 		"END;\n";
6689 
6690 	flags2 = ULINT32_UNDEFINED;
6691 
6692 	info = pars_info_create();
6693 
6694 	pars_info_add_ull_literal(info, "table_id", table_id);
6695 	pars_info_bind_int4_literal(info, "flags2", &flags2);
6696 
6697 	pars_info_bind_function(
6698 		info, "my_func", fts_set_hex_format, &flags2);
6699 
6700 	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6701 		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6702 	}
6703 
6704 	dberr_t err = que_eval_sql(info, sql, !dict_locked, trx);
6705 
6706 	ut_a(flags2 != ULINT32_UNDEFINED);
6707 
6708 	return(err);
6709 }
6710 
6711 /*********************************************************************//**
6712 Rename an aux table to HEX format. It's called when "%016llu" is used
6713 to format an object id in table name, which only happens in Windows. */
6714 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6715 dberr_t
fts_rename_one_aux_table_to_hex_format(trx_t * trx,const fts_aux_table_t * aux_table,const dict_table_t * parent_table)6716 fts_rename_one_aux_table_to_hex_format(
6717 /*===================================*/
6718 	trx_t*			trx,		/*!< in: transaction */
6719 	const fts_aux_table_t*	aux_table,	/*!< in: table info */
6720 	const dict_table_t*	parent_table)	/*!< in: parent table name */
6721 {
6722 	const char*     ptr;
6723 	fts_table_t	fts_table;
6724 	char		new_name[MAX_FULL_NAME_LEN];
6725 	dberr_t		error;
6726 
6727 	ptr = strchr(aux_table->name, '/');
6728 	ut_a(ptr != NULL);
6729 	++ptr;
6730 	/* Skip "FTS_", table id and underscore */
6731 	for (ulint i = 0; i < 2; ++i) {
6732 		ptr = strchr(ptr, '_');
6733 		ut_a(ptr != NULL);
6734 		++ptr;
6735 	}
6736 
6737 	fts_table.suffix = NULL;
6738 	if (aux_table->index_id == 0) {
6739 		fts_table.type = FTS_COMMON_TABLE;
6740 
6741 		for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
6742 			if (strcmp(ptr, fts_common_tables[i]) == 0) {
6743 				fts_table.suffix = fts_common_tables[i];
6744 				break;
6745 			}
6746 		}
6747 	} else {
6748 		fts_table.type = FTS_INDEX_TABLE;
6749 
6750 		/* Skip index id and underscore */
6751 		ptr = strchr(ptr, '_');
6752 		ut_a(ptr != NULL);
6753 		++ptr;
6754 
6755 		for (ulint i = 0; fts_index_selector[i].value; ++i) {
6756 			if (strcmp(ptr, fts_get_suffix(i)) == 0) {
6757 				fts_table.suffix = fts_get_suffix(i);
6758 				break;
6759 			}
6760 		}
6761 	}
6762 
6763 	ut_a(fts_table.suffix != NULL);
6764 
6765 	fts_table.parent = parent_table->name.m_name;
6766 	fts_table.table_id = aux_table->parent_id;
6767 	fts_table.index_id = aux_table->index_id;
6768 	fts_table.table = parent_table;
6769 
6770 	fts_get_table_name(&fts_table, new_name);
6771 	ut_ad(strcmp(new_name, aux_table->name) != 0);
6772 
6773 	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6774 		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6775 	}
6776 
6777 	error = row_rename_table_for_mysql(aux_table->name, new_name, trx,
6778 					   FALSE);
6779 
6780 	if (error != DB_SUCCESS) {
6781 		ib::warn() << "Failed to rename aux table '"
6782 			<< aux_table->name << "' to new format '"
6783 			<< new_name << "'.";
6784 	} else {
6785 		ib::info() << "Renamed aux table '" << aux_table->name
6786 			<< "' to '" << new_name << "'.";
6787 	}
6788 
6789 	return(error);
6790 }
6791 
6792 /**********************************************************************//**
6793 Rename all aux tables of a parent table to HEX format. Also set aux tables'
6794 flags2 and parent table's flags2 with DICT_TF2_FTS_AUX_HEX_NAME.
6795 It's called when "%016llu" is used to format an object id in table name,
6796 which only happens in Windows.
6797 Note the ids in tables are correct but the names are old ambiguous ones.
6798 
6799 This function should make sure that either all the parent table and aux tables
6800 are set DICT_TF2_FTS_AUX_HEX_NAME with flags2 or none of them are set */
6801 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6802 dberr_t
fts_rename_aux_tables_to_hex_format_low(trx_t * trx,dict_table_t * parent_table,ib_vector_t * tables)6803 fts_rename_aux_tables_to_hex_format_low(
6804 /*====================================*/
6805 	trx_t*		trx,		/*!< in: transaction */
6806 	dict_table_t*	parent_table,	/*!< in: parent table */
6807 	ib_vector_t*	tables)		/*!< in: aux tables to rename. */
6808 {
6809 	dberr_t		error;
6810 	ulint		count;
6811 
6812 	ut_ad(!DICT_TF2_FLAG_IS_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
6813 	ut_ad(!ib_vector_is_empty(tables));
6814 
6815 	error = fts_update_hex_format_flag(trx, parent_table->id, true);
6816 
6817 	if (error != DB_SUCCESS) {
6818 		ib::warn() << "Setting parent table " << parent_table->name
6819 			<< " to hex format failed.";
6820 		fts_sql_rollback(trx);
6821 		return(error);
6822 	}
6823 
6824 	DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6825 
6826 	for (count = 0; count < ib_vector_size(tables); ++count) {
6827 		dict_table_t*		table;
6828 		fts_aux_table_t*	aux_table;
6829 
6830 		aux_table = static_cast<fts_aux_table_t*>(
6831 			ib_vector_get(tables, count));
6832 
6833 		table = dict_table_open_on_id(aux_table->id, TRUE,
6834 					      DICT_TABLE_OP_NORMAL);
6835 
6836 		ut_ad(table != NULL);
6837 		ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_AUX_HEX_NAME));
6838 
6839 		/* Set HEX_NAME flag here to make sure we can get correct
6840 		new table name in following function */
6841 		DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6842 		error = fts_rename_one_aux_table_to_hex_format(trx,
6843 				aux_table, parent_table);
6844 		/* We will rollback the trx if the error != DB_SUCCESS,
6845 		so setting the flag here is the same with setting it in
6846 		row_rename_table_for_mysql */
6847 		DBUG_EXECUTE_IF("rename_aux_table_fail", error = DB_ERROR;);
6848 
6849 		if (error != DB_SUCCESS) {
6850 			dict_table_close(table, TRUE, FALSE);
6851 
6852 			ib::warn() << "Failed to rename one aux table "
6853 				<< aux_table->name << ". Will revert"
6854 				" all successful rename operations.";
6855 
6856 			fts_sql_rollback(trx);
6857 			break;
6858 		}
6859 
6860 		error = fts_update_hex_format_flag(trx, aux_table->id, true);
6861 		dict_table_close(table, TRUE, FALSE);
6862 
6863 		if (error != DB_SUCCESS) {
6864 			ib::warn() << "Setting aux table " << aux_table->name
6865 				<< " to hex format failed.";
6866 
6867 			fts_sql_rollback(trx);
6868 			break;
6869 		}
6870 	}
6871 
6872 	if (error != DB_SUCCESS) {
6873 		ut_ad(count != ib_vector_size(tables));
6874 
6875 		/* If rename fails, thr trx would be rolled back, we can't
6876 		use it any more, we'll start a new background trx to do
6877 		the reverting. */
6878 
6879 		ut_ad(!trx_is_started(trx));
6880 
6881 		bool not_rename = false;
6882 
6883 		/* Try to revert those succesful rename operations
6884 		in order to revert the ibd file rename. */
6885 		for (ulint i = 0; i <= count; ++i) {
6886 			dict_table_t*		table;
6887 			fts_aux_table_t*	aux_table;
6888 			trx_t*			trx_bg;
6889 			dberr_t			err;
6890 
6891 			aux_table = static_cast<fts_aux_table_t*>(
6892 				ib_vector_get(tables, i));
6893 
6894 			table = dict_table_open_on_id(aux_table->id, TRUE,
6895 						      DICT_TABLE_OP_NORMAL);
6896 			ut_ad(table != NULL);
6897 
6898 			if (not_rename) {
6899 				DICT_TF2_FLAG_UNSET(table,
6900 						    DICT_TF2_FTS_AUX_HEX_NAME);
6901 			}
6902 
6903 			if (!DICT_TF2_FLAG_IS_SET(table,
6904 						  DICT_TF2_FTS_AUX_HEX_NAME)) {
6905 				dict_table_close(table, TRUE, FALSE);
6906 				continue;
6907 			}
6908 
6909 			trx_bg = trx_allocate_for_background();
6910 			trx_bg->op_info = "Revert half done rename";
6911 			trx_bg->dict_operation_lock_mode = RW_X_LATCH;
6912 			trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
6913 
6914 			DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6915 			err = row_rename_table_for_mysql(table->name.m_name,
6916 							 aux_table->name,
6917 							 trx_bg, FALSE);
6918 
6919 			trx_bg->dict_operation_lock_mode = 0;
6920 			dict_table_close(table, TRUE, FALSE);
6921 
6922 			if (err != DB_SUCCESS) {
6923 				ib::warn() << "Failed to revert table "
6924 					<< table->name << ". Please revert"
6925 					" manually.";
6926 				fts_sql_rollback(trx_bg);
6927 				trx_free_for_background(trx_bg);
6928 				/* Continue to clear aux tables' flags2 */
6929 				not_rename = true;
6930 				continue;
6931 			}
6932 
6933 			fts_sql_commit(trx_bg);
6934 			trx_free_for_background(trx_bg);
6935 		}
6936 
6937 		DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6938 	}
6939 
6940 	return(error);
6941 }
6942 
6943 /**********************************************************************//**
6944 Convert an id, which is actually a decimal number but was regard as a HEX
6945 from a string, to its real value. */
6946 static
6947 ib_id_t
fts_fake_hex_to_dec(ib_id_t id)6948 fts_fake_hex_to_dec(
6949 /*================*/
6950 	ib_id_t		id)			/*!< in: number to convert */
6951 {
6952 	ib_id_t		dec_id = 0;
6953 	char		tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
6954 
6955 #ifdef UNIV_DEBUG
6956 	int		ret =
6957 #endif /* UNIV_DEBUG */
6958 	sprintf(tmp_id, UINT64PFx, id);
6959 	ut_ad(ret == 16);
6960 #ifdef UNIV_DEBUG
6961 	ret =
6962 #endif /* UNIV_DEBUG */
6963 #ifdef _WIN32
6964 	sscanf(tmp_id, "%016llu", &dec_id);
6965 #else
6966 	sscanf(tmp_id, "%016" PRIu64, &dec_id);
6967 #endif /* _WIN32 */
6968 	ut_ad(ret == 1);
6969 
6970 	return dec_id;
6971 }
6972 
6973 /*********************************************************************//**
6974 Compare two fts_aux_table_t parent_ids.
6975 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
6976 UNIV_INLINE
6977 int
fts_check_aux_table_parent_id_cmp(const void * p1,const void * p2)6978 fts_check_aux_table_parent_id_cmp(
6979 /*==============================*/
6980 	const void*	p1,		/*!< in: id1 */
6981 	const void*	p2)		/*!< in: id2 */
6982 {
6983 	const fts_aux_table_t*	fa1 = static_cast<const fts_aux_table_t*>(p1);
6984 	const fts_aux_table_t*	fa2 = static_cast<const fts_aux_table_t*>(p2);
6985 
6986 	return static_cast<int>(fa1->parent_id - fa2->parent_id);
6987 }
6988 
6989 /** Mark all the fts index associated with the parent table as corrupted.
6990 @param[in]	trx		transaction
6991 @param[in, out] parent_table	fts index associated with this parent table
6992 				will be marked as corrupted. */
6993 static
6994 void
fts_parent_all_index_set_corrupt(trx_t * trx,dict_table_t * parent_table)6995 fts_parent_all_index_set_corrupt(
6996 	trx_t*		trx,
6997 	dict_table_t*	parent_table)
6998 {
6999 	fts_t*	fts = parent_table->fts;
7000 
7001 	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
7002 		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
7003 	}
7004 
7005 	for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
7006 		dict_index_t*	index = static_cast<dict_index_t*>(
7007 			ib_vector_getp_const(fts->indexes, j));
7008 		dict_set_corrupted(index,
7009 				   trx, "DROP ORPHANED TABLE");
7010 	}
7011 }
7012 
7013 /** Mark the fts index which index id matches the id as corrupted.
7014 @param[in]	trx		transaction
7015 @param[in]	id		index id to search
7016 @param[in, out]	parent_table	parent table to check with all
7017 				the index. */
7018 static
7019 void
fts_set_index_corrupt(trx_t * trx,index_id_t id,dict_table_t * table)7020 fts_set_index_corrupt(
7021 	trx_t*		trx,
7022 	index_id_t	id,
7023 	dict_table_t*	table)
7024 {
7025 	fts_t*	fts = table->fts;
7026 
7027 	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
7028 		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
7029 	}
7030 
7031 	for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
7032 		dict_index_t*	index = static_cast<dict_index_t*>(
7033 			ib_vector_getp_const(fts->indexes, j));
7034 		if (index->id == id) {
7035 			dict_set_corrupted(index, trx,
7036 					   "DROP ORPHANED TABLE");
7037 			break;
7038 		}
7039 	}
7040 }
7041 
7042 /** Check the index for the aux table is corrupted.
7043 @param[in]	aux_table	auxiliary table
7044 @retval nonzero if index is corrupted, zero for valid index */
7045 static
7046 ulint
fts_check_corrupt_index(fts_aux_table_t * aux_table)7047 fts_check_corrupt_index(
7048 	fts_aux_table_t*	aux_table)
7049 {
7050 	dict_table_t*	table;
7051 	dict_index_t*	index;
7052 	table = dict_table_open_on_id(
7053 		aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7054 
7055 	if (table == NULL) {
7056 		return(0);
7057 	}
7058 
7059 	for (index = UT_LIST_GET_FIRST(table->indexes);
7060 	     index;
7061 	     index = UT_LIST_GET_NEXT(indexes, index)) {
7062 		if (index->id == aux_table->index_id) {
7063 			ut_ad(index->type & DICT_FTS);
7064 			dict_table_close(table, true, false);
7065 			return(dict_index_is_corrupted(index));
7066 		}
7067 	}
7068 
7069 	dict_table_close(table, true, false);
7070 	return(0);
7071 }
7072 
7073 /* Get parent table name if it's a fts aux table
7074 @param[in]	aux_table_name	aux table name
7075 @param[in]	aux_table_len	aux table length
7076 @return parent table name, or NULL */
7077 char*
fts_get_parent_table_name(const char * aux_table_name,ulint aux_table_len)7078 fts_get_parent_table_name(
7079 	const char*	aux_table_name,
7080 	ulint		aux_table_len)
7081 {
7082 	fts_aux_table_t	aux_table;
7083 	char*		parent_table_name = NULL;
7084 
7085 	if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
7086 		dict_table_t*	parent_table;
7087 
7088 		parent_table = dict_table_open_on_id(
7089 			aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7090 
7091 		if (parent_table != NULL) {
7092 			parent_table_name = mem_strdupl(
7093 				parent_table->name.m_name,
7094 				strlen(parent_table->name.m_name));
7095 
7096 			dict_table_close(parent_table, TRUE, FALSE);
7097 		}
7098 	}
7099 
7100 	return(parent_table_name);
7101 }
7102 
7103 /** Check the validity of the parent table.
7104 @param[in]	aux_table	auxiliary table
7105 @return true if it is a valid table or false if it is not */
7106 static
7107 bool
fts_valid_parent_table(const fts_aux_table_t * aux_table)7108 fts_valid_parent_table(
7109 	const fts_aux_table_t*	aux_table)
7110 {
7111 	dict_table_t*	parent_table;
7112 	bool		valid = false;
7113 
7114 	parent_table = dict_table_open_on_id(
7115 		aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7116 
7117 	if (parent_table != NULL && parent_table->fts != NULL) {
7118 		if (aux_table->index_id == 0) {
7119 			valid = true;
7120 		} else {
7121 			index_id_t	id = aux_table->index_id;
7122 			dict_index_t*	index;
7123 
7124 			/* Search for the FT index in the table's list. */
7125 			for (index = UT_LIST_GET_FIRST(parent_table->indexes);
7126 			     index;
7127 			     index = UT_LIST_GET_NEXT(indexes, index)) {
7128 				if (index->id == id) {
7129 					valid = true;
7130 					break;
7131 				}
7132 
7133 			}
7134 		}
7135 	}
7136 
7137 	if (parent_table) {
7138 		dict_table_close(parent_table, TRUE, FALSE);
7139 	}
7140 
7141 	return(valid);
7142 }
7143 
7144 /** Try to rename all aux tables of the specified parent table.
7145 @param[in]	aux_tables	aux_tables to be renamed
7146 @param[in]	parent_table	parent table of all aux
7147 				tables stored in tables. */
7148 static
7149 void
fts_rename_aux_tables_to_hex_format(ib_vector_t * aux_tables,dict_table_t * parent_table)7150 fts_rename_aux_tables_to_hex_format(
7151 	ib_vector_t*	aux_tables,
7152 	dict_table_t*	parent_table)
7153 {
7154 	dberr_t err;
7155 	trx_t*	trx_rename = trx_allocate_for_background();
7156 	trx_rename->op_info = "Rename aux tables to hex format";
7157 	trx_rename->dict_operation_lock_mode = RW_X_LATCH;
7158 	trx_start_for_ddl(trx_rename, TRX_DICT_OP_TABLE);
7159 
7160 	err = fts_rename_aux_tables_to_hex_format_low(trx_rename,
7161 						      parent_table, aux_tables);
7162 
7163 	trx_rename->dict_operation_lock_mode = 0;
7164 
7165 	if (err != DB_SUCCESS) {
7166 
7167 		ib::warn() << "Rollback operations on all aux tables of "
7168 			"table "<< parent_table->name << ". All the fts index "
7169 			"associated with the table are marked as corrupted. "
7170 			"Please rebuild the index again.";
7171 
7172 		/* Corrupting the fts index related to parent table. */
7173 		trx_t*	trx_corrupt;
7174 		trx_corrupt = trx_allocate_for_background();
7175 		trx_corrupt->dict_operation_lock_mode = RW_X_LATCH;
7176 		trx_start_for_ddl(trx_corrupt, TRX_DICT_OP_TABLE);
7177 		fts_parent_all_index_set_corrupt(trx_corrupt, parent_table);
7178 		trx_corrupt->dict_operation_lock_mode = 0;
7179 		fts_sql_commit(trx_corrupt);
7180 		trx_free_for_background(trx_corrupt);
7181 	} else {
7182 		fts_sql_commit(trx_rename);
7183 	}
7184 
7185 	trx_free_for_background(trx_rename);
7186 	ib_vector_reset(aux_tables);
7187 }
7188 
7189 /** Set the hex format flag for the parent table.
7190 @param[in, out]	parent_table	parent table
7191 @param[in]	trx		transaction */
7192 static
7193 void
fts_set_parent_hex_format_flag(dict_table_t * parent_table,trx_t * trx)7194 fts_set_parent_hex_format_flag(
7195 	dict_table_t*	parent_table,
7196 	trx_t*		trx)
7197 {
7198 	if (!DICT_TF2_FLAG_IS_SET(parent_table,
7199 				  DICT_TF2_FTS_AUX_HEX_NAME)) {
7200 		DBUG_EXECUTE_IF("parent_table_flag_fail", DBUG_SUICIDE(););
7201 
7202 		dberr_t	err = fts_update_hex_format_flag(
7203 				trx, parent_table->id, true);
7204 
7205 		if (err != DB_SUCCESS) {
7206 			ib::fatal() << "Setting parent table "
7207 				<< parent_table->name
7208 				<< "to hex format failed. Please try "
7209 				<< "to restart the server again, if it "
7210 				<< "doesn't work, the system tables "
7211 				<< "might be corrupted.";
7212 		} else {
7213 			DICT_TF2_FLAG_SET(
7214 				parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
7215 		}
7216 	}
7217 }
7218 
7219 /** Drop the obsolete auxilary table.
7220 @param[in]	tables	tables to be dropped. */
7221 static
7222 void
fts_drop_obsolete_aux_table_from_vector(ib_vector_t * tables)7223 fts_drop_obsolete_aux_table_from_vector(
7224 	ib_vector_t*	tables)
7225 {
7226 	dberr_t		err;
7227 
7228 	for (ulint count = 0; count < ib_vector_size(tables);
7229 	     ++count) {
7230 
7231 		fts_aux_table_t*	aux_drop_table;
7232 		aux_drop_table = static_cast<fts_aux_table_t*>(
7233 			ib_vector_get(tables, count));
7234 		trx_t*	trx_drop = trx_allocate_for_background();
7235 		trx_drop->op_info = "Drop obsolete aux tables";
7236 		trx_drop->dict_operation_lock_mode = RW_X_LATCH;
7237 		trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE);
7238 
7239 		err = row_drop_table_for_mysql(
7240 			aux_drop_table->name, trx_drop, false, true);
7241 
7242 		trx_drop->dict_operation_lock_mode = 0;
7243 
7244 		if (err != DB_SUCCESS) {
7245 			/* We don't need to worry about the
7246 			failure, since server would try to
7247 			drop it on next restart, even if
7248 			the table was broken. */
7249 			ib::warn() << "Failed to drop obsolete aux table "
7250 				<< aux_drop_table->name << ", which is "
7251 				<< "harmless. will try to drop it on next "
7252 				<< "restart.";
7253 
7254 			fts_sql_rollback(trx_drop);
7255 		} else {
7256 			ib::info() << "Dropped obsolete aux"
7257 				" table '" << aux_drop_table->name
7258 				<< "'.";
7259 
7260 			fts_sql_commit(trx_drop);
7261 		}
7262 
7263 		trx_free_for_background(trx_drop);
7264 	}
7265 }
7266 
7267 /** Drop all the auxiliary table present in the vector.
7268 @param[in]	trx	transaction
7269 @param[in]	tables	tables to be dropped */
7270 static
7271 void
fts_drop_aux_table_from_vector(trx_t * trx,ib_vector_t * tables)7272 fts_drop_aux_table_from_vector(
7273 	trx_t*		trx,
7274 	ib_vector_t*	tables)
7275 {
7276 	for (ulint count = 0; count < ib_vector_size(tables);
7277 	    ++count) {
7278 		fts_aux_table_t*	aux_drop_table;
7279 		aux_drop_table = static_cast<fts_aux_table_t*>(
7280 				ib_vector_get(tables, count));
7281 
7282 		/* Check for the validity of the parent table */
7283 		if (!fts_valid_parent_table(aux_drop_table)) {
7284 
7285 			ib::warn() << "Parent table of FTS auxiliary table "
7286 				<< aux_drop_table->name << " not found.";
7287 
7288 			dberr_t err = fts_drop_table(trx, aux_drop_table->name);
7289 			if (err == DB_FAIL) {
7290 
7291 				char*	path = fil_make_filepath(
7292 					NULL, aux_drop_table->name, IBD, false);
7293 
7294 				if (path != NULL) {
7295 					os_file_delete_if_exists(
7296 							innodb_data_file_key,
7297 							path , NULL);
7298 					ut_free(path);
7299 				}
7300 			}
7301 		}
7302 	}
7303 }
7304 
7305 /**********************************************************************//**
7306 Check and drop all orphaned FTS auxiliary tables, those that don't have
7307 a parent table or FTS index defined on them.
7308 @return DB_SUCCESS or error code */
7309 static MY_ATTRIBUTE((nonnull))
7310 void
fts_check_and_drop_orphaned_tables(trx_t * trx,ib_vector_t * tables)7311 fts_check_and_drop_orphaned_tables(
7312 /*===============================*/
7313 	trx_t*		trx,			/*!< in: transaction */
7314 	ib_vector_t*	tables)			/*!< in: tables to check */
7315 {
7316 	mem_heap_t*	heap;
7317 	ib_vector_t*	aux_tables_to_rename;
7318 	ib_vector_t*	invalid_aux_tables;
7319 	ib_vector_t*	valid_aux_tables;
7320 	ib_vector_t*	drop_aux_tables;
7321 	ib_vector_t*	obsolete_aux_tables;
7322 	ib_alloc_t*	heap_alloc;
7323 
7324 	heap = mem_heap_create(1024);
7325 	heap_alloc = ib_heap_allocator_create(heap);
7326 
7327 	/* We store all aux tables belonging to the same parent table here,
7328 	and rename all these tables in a batch mode. */
7329 	aux_tables_to_rename = ib_vector_create(heap_alloc,
7330 						sizeof(fts_aux_table_t), 128);
7331 
7332 	/* We store all fake auxiliary table and orphaned table here. */
7333 	invalid_aux_tables = ib_vector_create(heap_alloc,
7334 					      sizeof(fts_aux_table_t), 128);
7335 
7336 	/* We store all valid aux tables. We use this to filter the
7337 	fake auxiliary table from invalid auxiliary tables. */
7338 	valid_aux_tables = ib_vector_create(heap_alloc,
7339 					    sizeof(fts_aux_table_t), 128);
7340 
7341 	/* We store all auxiliary tables to be dropped. */
7342 	drop_aux_tables = ib_vector_create(heap_alloc,
7343 					   sizeof(fts_aux_table_t), 128);
7344 
7345 	/* We store all obsolete auxiliary tables to be dropped. */
7346 	obsolete_aux_tables = ib_vector_create(heap_alloc,
7347 					       sizeof(fts_aux_table_t), 128);
7348 
7349 	/* Sort by parent_id first, in case rename will fail */
7350 	ib_vector_sort(tables, fts_check_aux_table_parent_id_cmp);
7351 
7352 	for (ulint i = 0; i < ib_vector_size(tables); ++i) {
7353 		dict_table_t*		parent_table;
7354 		fts_aux_table_t*	aux_table;
7355 		bool			drop = false;
7356 		dict_table_t*		table;
7357 		fts_aux_table_t*	next_aux_table = NULL;
7358 		ib_id_t			orig_parent_id = 0;
7359 		ib_id_t			orig_index_id = 0;
7360 		bool			rename = false;
7361 
7362 		aux_table = static_cast<fts_aux_table_t*>(
7363 			ib_vector_get(tables, i));
7364 
7365 		table = dict_table_open_on_id(
7366 			aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
7367 		orig_parent_id = aux_table->parent_id;
7368 		orig_index_id = aux_table->index_id;
7369 
7370 		if (table == NULL
7371 		    || strcmp(table->name.m_name, aux_table->name)) {
7372 
7373 			bool	fake_aux = false;
7374 
7375 			if (table != NULL) {
7376 				dict_table_close(table, TRUE, FALSE);
7377 			}
7378 
7379 			if (i + 1 < ib_vector_size(tables)) {
7380 				next_aux_table = static_cast<fts_aux_table_t*>(
7381 						ib_vector_get(tables, i + 1));
7382 			}
7383 
7384 			/* To know whether aux table is fake fts or
7385 			orphan fts table. */
7386 			for (ulint count = 0;
7387 			     count < ib_vector_size(valid_aux_tables);
7388 			     count++) {
7389 				fts_aux_table_t*	valid_aux;
7390 				valid_aux = static_cast<fts_aux_table_t*>(
7391 					ib_vector_get(valid_aux_tables, count));
7392 				if (strcmp(valid_aux->name,
7393 					   aux_table->name) == 0) {
7394 					fake_aux = true;
7395 					break;
7396 				}
7397 			}
7398 
7399 			/* All aux tables of parent table, whose id is
7400 			last_parent_id, have been checked, try to rename
7401 			them if necessary. */
7402 			if ((next_aux_table == NULL
7403 			     || orig_parent_id != next_aux_table->parent_id)
7404 			    && (!ib_vector_is_empty(aux_tables_to_rename))) {
7405 
7406 					ib_id_t	parent_id = fts_fake_hex_to_dec(
7407 							aux_table->parent_id);
7408 
7409 					parent_table = dict_table_open_on_id(
7410 						parent_id, TRUE,
7411 						DICT_TABLE_OP_NORMAL);
7412 
7413 					fts_rename_aux_tables_to_hex_format(
7414 						aux_tables_to_rename, parent_table);
7415 
7416 					dict_table_close(parent_table, TRUE,
7417 							 FALSE);
7418 			}
7419 
7420 			/* If the aux table is fake aux table. Skip it. */
7421 			if (!fake_aux) {
7422 				ib_vector_push(invalid_aux_tables, aux_table);
7423 			}
7424 
7425 			continue;
7426 		} else if (!DICT_TF2_FLAG_IS_SET(table,
7427 						 DICT_TF2_FTS_AUX_HEX_NAME)) {
7428 
7429 			aux_table->parent_id = fts_fake_hex_to_dec(
7430 						aux_table->parent_id);
7431 
7432 			if (aux_table->index_id != 0) {
7433 				aux_table->index_id = fts_fake_hex_to_dec(
7434 							aux_table->index_id);
7435 			}
7436 
7437 			ut_ad(aux_table->id > aux_table->parent_id);
7438 
7439 			/* Check whether parent table id and index id
7440 			are stored as decimal format. */
7441 			if (fts_valid_parent_table(aux_table)) {
7442 
7443 				parent_table = dict_table_open_on_id(
7444 					aux_table->parent_id, true,
7445 					DICT_TABLE_OP_NORMAL);
7446 
7447 				ut_ad(parent_table != NULL);
7448 				ut_ad(parent_table->fts != NULL);
7449 
7450 				if (!DICT_TF2_FLAG_IS_SET(
7451 					parent_table,
7452 					DICT_TF2_FTS_AUX_HEX_NAME)) {
7453 					rename = true;
7454 				}
7455 
7456 				dict_table_close(parent_table, TRUE, FALSE);
7457 			}
7458 
7459 			if (!rename) {
7460 				/* Reassign the original value of
7461 				aux table if it is not in decimal format */
7462 				aux_table->parent_id = orig_parent_id;
7463 				aux_table->index_id = orig_index_id;
7464 			}
7465 		}
7466 
7467 		if (table != NULL) {
7468 			dict_table_close(table, TRUE, FALSE);
7469 		}
7470 
7471 		if (!rename) {
7472 			/* Check the validity of the parent table. */
7473 			if (!fts_valid_parent_table(aux_table)) {
7474 				drop = true;
7475 			}
7476 		}
7477 
7478 		/* Filter out the fake aux table by comparing with the
7479 		current valid auxiliary table name. */
7480 		for (ulint count = 0;
7481 		     count < ib_vector_size(invalid_aux_tables); count++) {
7482 			fts_aux_table_t*	invalid_aux;
7483 			invalid_aux = static_cast<fts_aux_table_t*>(
7484 				ib_vector_get(invalid_aux_tables, count));
7485 			if (strcmp(invalid_aux->name, aux_table->name) == 0) {
7486 				ib_vector_remove(
7487 					invalid_aux_tables,
7488 					*reinterpret_cast<void**>(invalid_aux));
7489 				break;
7490 			}
7491 		}
7492 
7493 		ib_vector_push(valid_aux_tables, aux_table);
7494 
7495 		/* If the index associated with aux table is corrupted,
7496 		skip it. */
7497 		if (fts_check_corrupt_index(aux_table) > 0) {
7498 
7499 			if (i + 1 < ib_vector_size(tables)) {
7500 				next_aux_table = static_cast<fts_aux_table_t*>(
7501 					ib_vector_get(tables, i + 1));
7502 			}
7503 
7504 			if (next_aux_table == NULL
7505 			    || orig_parent_id != next_aux_table->parent_id) {
7506 
7507 				parent_table = dict_table_open_on_id(
7508 					aux_table->parent_id, TRUE,
7509 					DICT_TABLE_OP_NORMAL);
7510 
7511 				if (!ib_vector_is_empty(aux_tables_to_rename)) {
7512 					fts_rename_aux_tables_to_hex_format(
7513 						aux_tables_to_rename, parent_table);
7514 				} else {
7515 					fts_set_parent_hex_format_flag(
7516 						parent_table, trx);
7517 				}
7518 
7519 				dict_table_close(parent_table, TRUE, FALSE);
7520 			}
7521 
7522 			continue;
7523 		}
7524 
7525 		parent_table = dict_table_open_on_id(
7526 			aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7527 
7528 		if (drop) {
7529 			ib_vector_push(drop_aux_tables, aux_table);
7530 		} else {
7531 			if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) {
7532 				ib_vector_push(obsolete_aux_tables, aux_table);
7533 				continue;
7534 			}
7535 		}
7536 
7537 		/* If the aux table is in decimal format, we should
7538 		rename it, so push it to aux_tables_to_rename */
7539 		if (!drop && rename) {
7540 			bool	rename_table = true;
7541 			for (ulint count = 0;
7542 			     count < ib_vector_size(aux_tables_to_rename);
7543 			     count++) {
7544 				fts_aux_table_t*	rename_aux =
7545 					static_cast<fts_aux_table_t*>(
7546 					ib_vector_get(aux_tables_to_rename,
7547 						      count));
7548 					if (strcmp(rename_aux->name,
7549 						   aux_table->name) == 0) {
7550 						rename_table = false;
7551 						break;
7552 					}
7553 			}
7554 
7555 			if (rename_table) {
7556 				ib_vector_push(aux_tables_to_rename,
7557 					       aux_table);
7558 			}
7559 		}
7560 
7561 		if (i + 1 < ib_vector_size(tables)) {
7562 			next_aux_table = static_cast<fts_aux_table_t*>(
7563 				ib_vector_get(tables, i + 1));
7564 		}
7565 
7566 		if ((next_aux_table == NULL
7567 		     || orig_parent_id != next_aux_table->parent_id)
7568 		    && !ib_vector_is_empty(aux_tables_to_rename)) {
7569 
7570 			ut_ad(rename);
7571 			ut_ad(!DICT_TF2_FLAG_IS_SET(
7572 				parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
7573 
7574 			fts_rename_aux_tables_to_hex_format(
7575 				aux_tables_to_rename,parent_table);
7576 		}
7577 
7578 		/* The IDs are already in correct hex format. */
7579 		if (!drop && !rename) {
7580 			dict_table_t*	table;
7581 
7582 			table = dict_table_open_on_id(
7583 				aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
7584 
7585 			if (table != NULL
7586 			    && strcmp(table->name.m_name, aux_table->name)) {
7587 				dict_table_close(table, TRUE, FALSE);
7588 				table = NULL;
7589 			}
7590 
7591 			if (table != NULL
7592 			    && !DICT_TF2_FLAG_IS_SET(
7593 					table,
7594 					DICT_TF2_FTS_AUX_HEX_NAME)) {
7595 
7596 				DBUG_EXECUTE_IF("aux_table_flag_fail",
7597 					ib::warn() << "Setting aux table "
7598 						<< table->name << " to hex "
7599 						"format failed.";
7600 					fts_set_index_corrupt(
7601 						trx, aux_table->index_id,
7602 						parent_table);
7603 						goto table_exit;);
7604 
7605 				dberr_t err = fts_update_hex_format_flag(
7606 						trx, table->id, true);
7607 
7608 				if (err != DB_SUCCESS) {
7609 					ib::warn() << "Setting aux table "
7610 						<< table->name << " to hex "
7611 						"format failed.";
7612 
7613 					fts_set_index_corrupt(
7614 						trx, aux_table->index_id,
7615 						parent_table);
7616 				} else {
7617 					DICT_TF2_FLAG_SET(table,
7618 						DICT_TF2_FTS_AUX_HEX_NAME);
7619 				}
7620 			}
7621 #ifndef NDEBUG
7622 table_exit:
7623 #endif	/* !NDEBUG */
7624 
7625 			if (table != NULL) {
7626 				dict_table_close(table, TRUE, FALSE);
7627 			}
7628 
7629 			ut_ad(parent_table != NULL);
7630 
7631 			fts_set_parent_hex_format_flag(
7632 				parent_table, trx);
7633 		}
7634 
7635 		if (parent_table != NULL) {
7636 			dict_table_close(parent_table, TRUE, FALSE);
7637 		}
7638 	}
7639 
7640 	fts_drop_aux_table_from_vector(trx, invalid_aux_tables);
7641 	fts_drop_aux_table_from_vector(trx, drop_aux_tables);
7642 	fts_sql_commit(trx);
7643 
7644 	fts_drop_obsolete_aux_table_from_vector(obsolete_aux_tables);
7645 
7646 	/* Free the memory allocated at the beginning */
7647 	if (heap != NULL) {
7648 		mem_heap_free(heap);
7649 	}
7650 }
7651 
7652 /**********************************************************************//**
7653 Drop all orphaned FTS auxiliary tables, those that don't have a parent
7654 table or FTS index defined on them. */
7655 void
fts_drop_orphaned_tables(void)7656 fts_drop_orphaned_tables(void)
7657 /*==========================*/
7658 {
7659 	trx_t*			trx;
7660 	pars_info_t*		info;
7661 	mem_heap_t*		heap;
7662 	que_t*			graph;
7663 	ib_vector_t*		tables;
7664 	ib_alloc_t*		heap_alloc;
7665 	space_name_list_t	space_name_list;
7666 	dberr_t			error = DB_SUCCESS;
7667 
7668 	/* Note: We have to free the memory after we are done with the list. */
7669 	error = fil_get_space_names(space_name_list);
7670 
7671 	if (error == DB_OUT_OF_MEMORY) {
7672 		ib::fatal() << "Out of memory";
7673 	}
7674 
7675 	heap = mem_heap_create(1024);
7676 	heap_alloc = ib_heap_allocator_create(heap);
7677 
7678 	/* We store the table ids of all the FTS indexes that were found. */
7679 	tables = ib_vector_create(heap_alloc, sizeof(fts_aux_table_t), 128);
7680 
7681 	/* Get the list of all known .ibd files and check for orphaned
7682 	FTS auxiliary files in that list. We need to remove them because
7683 	users can't map them back to table names and this will create
7684 	unnecessary clutter. */
7685 
7686 	for (space_name_list_t::iterator it = space_name_list.begin();
7687 	     it != space_name_list.end();
7688 	     ++it) {
7689 
7690 		fts_aux_table_t*	fts_aux_table;
7691 
7692 		fts_aux_table = static_cast<fts_aux_table_t*>(
7693 			ib_vector_push(tables, NULL));
7694 
7695 		memset(fts_aux_table, 0x0, sizeof(*fts_aux_table));
7696 
7697 		if (!fts_is_aux_table_name(fts_aux_table, *it, strlen(*it))) {
7698 			ib_vector_pop(tables);
7699 		} else {
7700 			ulint	len = strlen(*it);
7701 
7702 			fts_aux_table->id = fil_space_get_id_by_name(*it);
7703 
7704 			/* We got this list from fil0fil.cc. The tablespace
7705 			with this name must exist. */
7706 			ut_a(fts_aux_table->id != ULINT_UNDEFINED);
7707 
7708 			fts_aux_table->name = static_cast<char*>(
7709 				mem_heap_dup(heap, *it, len + 1));
7710 
7711 			fts_aux_table->name[len] = 0;
7712 		}
7713 	}
7714 
7715 	trx = trx_allocate_for_background();
7716 	trx->op_info = "dropping orphaned FTS tables";
7717 	row_mysql_lock_data_dictionary(trx);
7718 
7719 	info = pars_info_create();
7720 
7721 	pars_info_bind_function(info, "my_func", fts_read_tables, tables);
7722 
7723 	graph = fts_parse_sql_no_dict_lock(
7724 		NULL,
7725 		info,
7726 		"DECLARE FUNCTION my_func;\n"
7727 		"DECLARE CURSOR c IS"
7728 		" SELECT NAME, ID"
7729 		" FROM SYS_TABLES;\n"
7730 		"BEGIN\n"
7731 		"\n"
7732 		"OPEN c;\n"
7733 		"WHILE 1 = 1 LOOP\n"
7734 		"  FETCH c INTO my_func();\n"
7735 		"  IF c % NOTFOUND THEN\n"
7736 		"    EXIT;\n"
7737 		"  END IF;\n"
7738 		"END LOOP;\n"
7739 		"CLOSE c;");
7740 
7741 	for (;;) {
7742 		error = fts_eval_sql(trx, graph);
7743 
7744 		if (error == DB_SUCCESS) {
7745 			fts_check_and_drop_orphaned_tables(trx, tables);
7746 			break;				/* Exit the loop. */
7747 		} else {
7748 			ib_vector_reset(tables);
7749 
7750 			fts_sql_rollback(trx);
7751 
7752 			if (error == DB_LOCK_WAIT_TIMEOUT) {
7753 				ib::warn() << "lock wait timeout reading"
7754 					" SYS_TABLES. Retrying!";
7755 
7756 				trx->error_state = DB_SUCCESS;
7757 			} else {
7758 				ib::error() << "(" << ut_strerr(error)
7759 					<< ") while reading SYS_TABLES.";
7760 
7761 				break;			/* Exit the loop. */
7762 			}
7763 		}
7764 	}
7765 
7766 	que_graph_free(graph);
7767 
7768 	row_mysql_unlock_data_dictionary(trx);
7769 
7770 	trx_free_for_background(trx);
7771 
7772 	if (heap != NULL) {
7773 		mem_heap_free(heap);
7774 	}
7775 
7776 	/** Free the memory allocated to store the .ibd names. */
7777 	for (space_name_list_t::iterator it = space_name_list.begin();
7778 	     it != space_name_list.end();
7779 	     ++it) {
7780 
7781 		UT_DELETE_ARRAY(*it);
7782 	}
7783 }
7784 
7785 /**********************************************************************//**
7786 Check whether user supplied stopword table is of the right format.
7787 Caller is responsible to hold dictionary locks.
7788 @return the stopword column charset if qualifies */
7789 CHARSET_INFO*
fts_valid_stopword_table(const char * stopword_table_name)7790 fts_valid_stopword_table(
7791 /*=====================*/
7792 	const char*	stopword_table_name)	/*!< in: Stopword table
7793 						name */
7794 {
7795 	dict_table_t*	table;
7796 	dict_col_t*     col = NULL;
7797 
7798 	if (!stopword_table_name) {
7799 		return(NULL);
7800 	}
7801 
7802 	table = dict_table_get_low(stopword_table_name);
7803 
7804 	if (!table) {
7805 		ib::error() << "User stopword table " << stopword_table_name
7806 			<< " does not exist.";
7807 
7808 		return(NULL);
7809 	} else {
7810 		const char*     col_name;
7811 
7812 		col_name = dict_table_get_col_name(table, 0);
7813 
7814 		if (ut_strcmp(col_name, "value")) {
7815 			ib::error() << "Invalid column name for stopword"
7816 				" table " << stopword_table_name << ". Its"
7817 				" first column must be named as 'value'.";
7818 
7819 			return(NULL);
7820 		}
7821 
7822 		col = dict_table_get_nth_col(table, 0);
7823 
7824 		if (col->mtype != DATA_VARCHAR
7825 		    && col->mtype != DATA_VARMYSQL) {
7826 			ib::error() << "Invalid column type for stopword"
7827 				" table " << stopword_table_name << ". Its"
7828 				" first column must be of varchar type";
7829 
7830 			return(NULL);
7831 		}
7832 	}
7833 
7834 	ut_ad(col);
7835 
7836 	return(fts_get_charset(col->prtype));
7837 }
7838 
7839 /**********************************************************************//**
7840 This function loads the stopword into the FTS cache. It also
7841 records/fetches stopword configuration to/from FTS configure
7842 table, depending on whether we are creating or reloading the
7843 FTS.
7844 @return TRUE if load operation is successful */
7845 ibool
fts_load_stopword(const dict_table_t * table,trx_t * trx,const char * global_stopword_table,const char * session_stopword_table,ibool stopword_is_on,ibool reload)7846 fts_load_stopword(
7847 /*==============*/
7848 	const dict_table_t*
7849 			table,			/*!< in: Table with FTS */
7850 	trx_t*		trx,			/*!< in: Transactions */
7851 	const char*	global_stopword_table,	/*!< in: Global stopword table
7852 						name */
7853 	const char*	session_stopword_table,	/*!< in: Session stopword table
7854 						name */
7855 	ibool		stopword_is_on,		/*!< in: Whether stopword
7856 						option is turned on/off */
7857 	ibool		reload)			/*!< in: Whether it is
7858 						for reloading FTS table */
7859 {
7860 	fts_table_t	fts_table;
7861 	fts_string_t	str;
7862 	dberr_t		error = DB_SUCCESS;
7863 	ulint		use_stopword;
7864 	fts_cache_t*	cache;
7865 	const char*	stopword_to_use = NULL;
7866 	ibool		new_trx = FALSE;
7867 	byte		str_buffer[MAX_FULL_NAME_LEN + 1];
7868 
7869 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
7870 
7871 	cache = table->fts->cache;
7872 
7873 	if (!reload && !(cache->stopword_info.status
7874 			 & STOPWORD_NOT_INIT)) {
7875 		return(TRUE);
7876 	}
7877 
7878 	if (!trx) {
7879 		trx = trx_allocate_for_background();
7880 		trx->op_info = "upload FTS stopword";
7881 		new_trx = TRUE;
7882 	}
7883 
7884 	/* First check whether stopword filtering is turned off */
7885 	if (reload) {
7886 		error = fts_config_get_ulint(
7887 			trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
7888 	} else {
7889 		use_stopword = (ulint) stopword_is_on;
7890 
7891 		error = fts_config_set_ulint(
7892 			trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
7893 	}
7894 
7895 	if (error != DB_SUCCESS) {
7896 		goto cleanup;
7897 	}
7898 
7899 	/* If stopword is turned off, no need to continue to load the
7900 	stopword into cache, but still need to do initialization */
7901 	if (!use_stopword) {
7902 		cache->stopword_info.status = STOPWORD_OFF;
7903 		goto cleanup;
7904 	}
7905 
7906 	if (reload) {
7907 		/* Fetch the stopword table name from FTS config
7908 		table */
7909 		str.f_n_char = 0;
7910 		str.f_str = str_buffer;
7911 		str.f_len = sizeof(str_buffer) - 1;
7912 
7913 		error = fts_config_get_value(
7914 			trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7915 
7916 		if (error != DB_SUCCESS) {
7917 			goto cleanup;
7918 		}
7919 
7920 		if (strlen((char*) str.f_str) > 0) {
7921 			stopword_to_use = (const char*) str.f_str;
7922 		}
7923 	} else {
7924 		stopword_to_use = (session_stopword_table)
7925 			? session_stopword_table : global_stopword_table;
7926 	}
7927 
7928 	if (stopword_to_use
7929 	    && fts_load_user_stopword(table->fts, stopword_to_use,
7930 				      &cache->stopword_info)) {
7931 		/* Save the stopword table name to the configure
7932 		table */
7933 		if (!reload) {
7934 			str.f_n_char = 0;
7935 			str.f_str = (byte*) stopword_to_use;
7936 			str.f_len = ut_strlen(stopword_to_use);
7937 
7938 			error = fts_config_set_value(
7939 				trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7940 		}
7941 	} else {
7942 		/* Load system default stopword list */
7943 		fts_load_default_stopword(&cache->stopword_info);
7944 	}
7945 
7946 cleanup:
7947 	if (new_trx) {
7948 		if (error == DB_SUCCESS) {
7949 			fts_sql_commit(trx);
7950 		} else {
7951 			fts_sql_rollback(trx);
7952 		}
7953 
7954 		trx_free_for_background(trx);
7955 	}
7956 
7957 	if (!cache->stopword_info.cached_stopword) {
7958 		cache->stopword_info.cached_stopword = rbt_create_arg_cmp(
7959 			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
7960 			&my_charset_latin1);
7961 	}
7962 
7963 	return(error == DB_SUCCESS);
7964 }
7965 
7966 /**********************************************************************//**
7967 Callback function when we initialize the FTS at the start up
7968 time. It recovers the maximum Doc IDs presented in the current table.
7969 @return: always returns TRUE */
7970 static
7971 ibool
fts_init_get_doc_id(void * row,void * user_arg)7972 fts_init_get_doc_id(
7973 /*================*/
7974 	void*	row,			/*!< in: sel_node_t* */
7975 	void*	user_arg)		/*!< in: fts cache */
7976 {
7977 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
7978 	sel_node_t*	node = static_cast<sel_node_t*>(row);
7979 	que_node_t*	exp = node->select_list;
7980 	fts_cache_t*    cache = static_cast<fts_cache_t*>(user_arg);
7981 
7982 	ut_ad(ib_vector_is_empty(cache->get_docs));
7983 
7984 	/* Copy each indexed column content into doc->text.f_str */
7985 	if (exp) {
7986 		dfield_t*	dfield = que_node_get_val(exp);
7987 		dtype_t*        type = dfield_get_type(dfield);
7988 		void*           data = dfield_get_data(dfield);
7989 
7990 		ut_a(dtype_get_mtype(type) == DATA_INT);
7991 
7992 		doc_id = static_cast<doc_id_t>(mach_read_from_8(
7993 			static_cast<const byte*>(data)));
7994 
7995 		if (doc_id >= cache->next_doc_id) {
7996 			cache->next_doc_id = doc_id + 1;
7997 		}
7998 	}
7999 
8000 	return(TRUE);
8001 }
8002 
8003 /**********************************************************************//**
8004 Callback function when we initialize the FTS at the start up
8005 time. It recovers Doc IDs that have not sync-ed to the auxiliary
8006 table, and require to bring them back into FTS index.
8007 @return: always returns TRUE */
8008 static
8009 ibool
fts_init_recover_doc(void * row,void * user_arg)8010 fts_init_recover_doc(
8011 /*=================*/
8012 	void*	row,			/*!< in: sel_node_t* */
8013 	void*	user_arg)		/*!< in: fts cache */
8014 {
8015 
8016 	fts_doc_t       doc;
8017 	ulint		doc_len = 0;
8018 	ulint		field_no = 0;
8019 	fts_get_doc_t*  get_doc = static_cast<fts_get_doc_t*>(user_arg);
8020 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
8021 	sel_node_t*	node = static_cast<sel_node_t*>(row);
8022 	que_node_t*	exp = node->select_list;
8023 	fts_cache_t*	cache = get_doc->cache;
8024 	st_mysql_ftparser*	parser = get_doc->index_cache->index->parser;
8025 
8026 	fts_doc_init(&doc);
8027 	doc.found = TRUE;
8028 
8029 	ut_ad(cache);
8030 
8031 	/* Copy each indexed column content into doc->text.f_str */
8032 	while (exp) {
8033 		dfield_t*	dfield = que_node_get_val(exp);
8034 		ulint		len = dfield_get_len(dfield);
8035 
8036 		if (field_no == 0) {
8037 			dtype_t*        type = dfield_get_type(dfield);
8038 			void*           data = dfield_get_data(dfield);
8039 
8040 			ut_a(dtype_get_mtype(type) == DATA_INT);
8041 
8042 			doc_id = static_cast<doc_id_t>(mach_read_from_8(
8043 				static_cast<const byte*>(data)));
8044 
8045 			field_no++;
8046 			exp = que_node_get_next(exp);
8047 			continue;
8048 		}
8049 
8050 		if (len == UNIV_SQL_NULL) {
8051 			exp = que_node_get_next(exp);
8052 			continue;
8053 		}
8054 
8055 		ut_ad(get_doc);
8056 
8057 		if (!get_doc->index_cache->charset) {
8058 			get_doc->index_cache->charset = fts_get_charset(
8059 				dfield->type.prtype);
8060 		}
8061 
8062 		doc.charset = get_doc->index_cache->charset;
8063 		doc.is_ngram = get_doc->index_cache->index->is_ngram;
8064 
8065 		if (dfield_is_ext(dfield)) {
8066 			dict_table_t*	table = cache->sync->table;
8067 
8068 			doc.text.f_str = btr_copy_externally_stored_field(
8069 				&doc.text.f_len,
8070 				static_cast<byte*>(dfield_get_data(dfield)),
8071 				dict_table_page_size(table), len,
8072 				static_cast<mem_heap_t*>(doc.self_heap->arg));
8073 		} else {
8074 			doc.text.f_str = static_cast<byte*>(
8075 				dfield_get_data(dfield));
8076 
8077 			doc.text.f_len = len;
8078 		}
8079 
8080 		if (field_no == 1) {
8081 			fts_tokenize_document(&doc, NULL, parser);
8082 		} else {
8083 			fts_tokenize_document_next(&doc, doc_len, NULL, parser);
8084 		}
8085 
8086 		exp = que_node_get_next(exp);
8087 
8088 		doc_len += (exp) ? len + 1 : len;
8089 
8090 		field_no++;
8091 	}
8092 
8093 	fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
8094 
8095 	fts_doc_free(&doc);
8096 
8097 	cache->added++;
8098 
8099 	if (doc_id >= cache->next_doc_id) {
8100 		cache->next_doc_id = doc_id + 1;
8101 	}
8102 
8103 	return(TRUE);
8104 }
8105 
8106 /**********************************************************************//**
8107 This function brings FTS index in sync when FTS index is first
8108 used. There are documents that have not yet sync-ed to auxiliary
8109 tables from last server abnormally shutdown, we will need to bring
8110 such document into FTS cache before any further operations
8111 @return TRUE if all OK */
8112 ibool
fts_init_index(dict_table_t * table,ibool has_cache_lock)8113 fts_init_index(
8114 /*===========*/
8115 	dict_table_t*	table,		/*!< in: Table with FTS */
8116 	ibool		has_cache_lock)	/*!< in: Whether we already have
8117 					cache lock */
8118 {
8119 	dict_index_t*   index;
8120 	doc_id_t        start_doc;
8121 	fts_get_doc_t*  get_doc = NULL;
8122 	fts_cache_t*    cache = table->fts->cache;
8123 	bool		need_init = false;
8124 
8125 	ut_ad(!mutex_own(&dict_sys->mutex));
8126 
8127 	/* First check cache->get_docs is initialized */
8128 	if (!has_cache_lock) {
8129 		rw_lock_x_lock(&cache->lock);
8130 	}
8131 
8132 	rw_lock_x_lock(&cache->init_lock);
8133 	if (cache->get_docs == NULL) {
8134 		cache->get_docs = fts_get_docs_create(cache);
8135 	}
8136 	rw_lock_x_unlock(&cache->init_lock);
8137 
8138 	if (table->fts->fts_status & ADDED_TABLE_SYNCED) {
8139 		goto func_exit;
8140 	}
8141 
8142 	need_init = true;
8143 
8144 	start_doc = cache->synced_doc_id;
8145 
8146 	if (!start_doc) {
8147 		fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
8148 		cache->synced_doc_id = start_doc;
8149 	}
8150 
8151 	/* No FTS index, this is the case when previous FTS index
8152 	dropped, and we re-initialize the Doc ID system for subsequent
8153 	insertion */
8154 	if (ib_vector_is_empty(cache->get_docs)) {
8155 		index = table->fts_doc_id_index;
8156 
8157 		ut_a(index);
8158 
8159 		fts_doc_fetch_by_doc_id(NULL, start_doc, index,
8160 					FTS_FETCH_DOC_BY_ID_LARGE,
8161 					fts_init_get_doc_id, cache);
8162 	} else {
8163 		if (table->fts->cache->stopword_info.status
8164 		    & STOPWORD_NOT_INIT) {
8165 			fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE);
8166 		}
8167 
8168 		for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
8169 			get_doc = static_cast<fts_get_doc_t*>(
8170 				ib_vector_get(cache->get_docs, i));
8171 
8172 			index = get_doc->index_cache->index;
8173 
8174 			fts_doc_fetch_by_doc_id(NULL, start_doc, index,
8175 						FTS_FETCH_DOC_BY_ID_LARGE,
8176 						fts_init_recover_doc, get_doc);
8177 		}
8178 	}
8179 
8180 	table->fts->fts_status |= ADDED_TABLE_SYNCED;
8181 
8182 	fts_get_docs_clear(cache->get_docs);
8183 
8184 func_exit:
8185 	if (!has_cache_lock) {
8186 		rw_lock_x_unlock(&cache->lock);
8187 	}
8188 
8189 	if (need_init) {
8190 		mutex_enter(&dict_sys->mutex);
8191 		/* Register the table with the optimize thread. */
8192 		fts_optimize_add_table(table);
8193 		mutex_exit(&dict_sys->mutex);
8194 	}
8195 
8196 	return(TRUE);
8197 }
8198 
8199 /** Check if the all the auxillary tables associated with FTS index are in
8200 consistent state. For now consistency is check only by ensuring
8201 index->page_no != FIL_NULL
8202 @param[out]	base_table	table has host fts index
8203 @param[in,out]	trx		trx handler */
8204 void
fts_check_corrupt(dict_table_t * base_table,trx_t * trx)8205 fts_check_corrupt(
8206 	dict_table_t*	base_table,
8207 	trx_t*		trx)
8208 {
8209 	bool		sane = true;
8210 	fts_table_t	fts_table;
8211 
8212 	/* Iterate over the common table and check for their sanity. */
8213 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, base_table);
8214 
8215 	for (ulint i = 0; fts_common_tables[i] != NULL && sane; ++i) {
8216 
8217 		char	table_name[MAX_FULL_NAME_LEN];
8218 
8219 		fts_table.suffix = fts_common_tables[i];
8220 		fts_get_table_name(&fts_table, table_name);
8221 
8222 		dict_table_t*	aux_table = dict_table_open_on_name(
8223 			table_name, true, FALSE, DICT_ERR_IGNORE_NONE);
8224 
8225 		if (aux_table == NULL) {
8226 			dict_set_corrupted(
8227 				dict_table_get_first_index(base_table),
8228 				trx, "FTS_SANITY_CHECK");
8229 			ut_ad(base_table->corrupted == TRUE);
8230 			sane = false;
8231 			continue;
8232 		}
8233 
8234 		for (dict_index_t*	aux_table_index =
8235 			UT_LIST_GET_FIRST(aux_table->indexes);
8236 		     aux_table_index != NULL;
8237 		     aux_table_index =
8238 			UT_LIST_GET_NEXT(indexes, aux_table_index)) {
8239 
8240 			/* Check if auxillary table needed for FTS is sane. */
8241 			if (aux_table_index->page == FIL_NULL) {
8242 				dict_set_corrupted(
8243 					dict_table_get_first_index(base_table),
8244 					trx, "FTS_SANITY_CHECK");
8245 				ut_ad(base_table->corrupted == TRUE);
8246 				sane = false;
8247 			}
8248 		}
8249 
8250 		dict_table_close(aux_table, FALSE, FALSE);
8251 	}
8252 }
8253