1 /*****************************************************************************
2 
3 Copyright (c) 2011, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file fts/fts0fts.cc
29 Full Text Search interface
30 ***********************************************************************/
31 
32 #include "ha_prototypes.h"
33 
34 #include "trx0roll.h"
35 #include "row0mysql.h"
36 #include "row0upd.h"
37 #include "dict0types.h"
38 #include "dict0stats_bg.h"
39 #include "row0sel.h"
40 #include "fts0fts.h"
41 #include "fts0priv.h"
42 #include "fts0types.h"
43 #include "fts0types.ic"
44 #include "fts0vlc.ic"
45 #include "fts0plugin.h"
46 #include "dict0priv.h"
47 #include "dict0stats.h"
48 #include "btr0pcur.h"
49 #include "sync0sync.h"
50 #include "ut0new.h"
51 
52 static const ulint FTS_MAX_ID_LEN = 32;
53 
54 /** Column name from the FTS config table */
55 #define FTS_MAX_CACHE_SIZE_IN_MB	"cache_size_in_mb"
56 
57 /** Verify if a aux table name is a obsolete table
58 by looking up the key word in the obsolete table names */
59 #define FTS_IS_OBSOLETE_AUX_TABLE(table_name)			\
60 	(strstr((table_name), "DOC_ID") != NULL			\
61 	 || strstr((table_name), "ADDED") != NULL		\
62 	 || strstr((table_name), "STOPWORDS") != NULL)
63 
64 /** This is maximum FTS cache for each table and would be
65 a configurable variable */
66 ulong	fts_max_cache_size;
67 
68 /** Whether the total memory used for FTS cache is exhausted, and we will
69 need a sync to free some memory */
70 bool	fts_need_sync = false;
71 
72 /** Variable specifying the total memory allocated for FTS cache */
73 ulong	fts_max_total_cache_size;
74 
75 /** This is FTS result cache limit for each query and would be
76 a configurable variable */
77 ulong	fts_result_cache_limit;
78 
79 /** Variable specifying the maximum FTS max token size */
80 ulong	fts_max_token_size;
81 
82 /** Variable specifying the minimum FTS max token size */
83 ulong	fts_min_token_size;
84 
85 
86 // FIXME: testing
87 ib_time_monotonic_t elapsed_time = 0;
88 ulint n_nodes = 0;
89 
90 #ifdef FTS_CACHE_SIZE_DEBUG
91 /** The cache size permissible lower limit (1K) */
92 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
93 
94 /** The cache size permissible upper limit (1G) */
95 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
96 #endif
97 
98 /** Time to sleep after DEADLOCK error before retrying operation. */
99 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
100 
101 /** variable to record innodb_fts_internal_tbl_name for information
102 schema table INNODB_FTS_INSERTED etc. */
103 char* fts_internal_tbl_name		= NULL;
104 
105 /** InnoDB default stopword list:
106 There are different versions of stopwords, the stop words listed
107 below comes from "Google Stopword" list. Reference:
108 http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
109 The final version of InnoDB default stopword list is still pending
110 for decision */
111 const char *fts_default_stopword[] =
112 {
113 	"a",
114 	"about",
115 	"an",
116 	"are",
117 	"as",
118 	"at",
119 	"be",
120 	"by",
121 	"com",
122 	"de",
123 	"en",
124 	"for",
125 	"from",
126 	"how",
127 	"i",
128 	"in",
129 	"is",
130 	"it",
131 	"la",
132 	"of",
133 	"on",
134 	"or",
135 	"that",
136 	"the",
137 	"this",
138 	"to",
139 	"was",
140 	"what",
141 	"when",
142 	"where",
143 	"who",
144 	"will",
145 	"with",
146 	"und",
147 	"the",
148 	"www",
149 	NULL
150 };
151 
152 /** For storing table info when checking for orphaned tables. */
153 struct fts_aux_table_t {
154 	table_id_t	id;		/*!< Table id */
155 	table_id_t	parent_id;	/*!< Parent table id */
156 	table_id_t	index_id;	/*!< Table FT index id */
157 	char*		name;		/*!< Name of the table */
158 };
159 
160 #ifdef FTS_DOC_STATS_DEBUG
161 /** Template for creating the FTS auxiliary index specific tables. This is
162 mainly designed for the statistics work in the future */
163 static const char* fts_create_index_tables_sql = {
164 	"BEGIN\n"
165 	""
166 	"CREATE TABLE $doc_id_table (\n"
167 	"   doc_id BIGINT UNSIGNED,\n"
168 	"   word_count INTEGER UNSIGNED NOT NULL\n"
169 	") COMPACT;\n"
170 	"CREATE UNIQUE CLUSTERED INDEX IND ON $doc_id_table(doc_id);\n"
171 };
172 #endif
173 
174 /** FTS auxiliary table suffixes that are common to all FT indexes. */
175 const char* fts_common_tables[] = {
176 	"BEING_DELETED",
177 	"BEING_DELETED_CACHE",
178 	"CONFIG",
179 	"DELETED",
180 	"DELETED_CACHE",
181 	NULL
182 };
183 
184 /** FTS auxiliary INDEX split intervals. */
185 const  fts_index_selector_t fts_index_selector[] = {
186 	{ 9, "INDEX_1" },
187 	{ 65, "INDEX_2" },
188 	{ 70, "INDEX_3" },
189 	{ 75, "INDEX_4" },
190 	{ 80, "INDEX_5" },
191 	{ 85, "INDEX_6" },
192 	{  0 , NULL	 }
193 };
194 
195 /** Default config values for FTS indexes on a table. */
196 static const char* fts_config_table_insert_values_sql =
197 	"BEGIN\n"
198 	"\n"
199 	"INSERT INTO $config_table VALUES('"
200 		FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
201 	""
202 	"INSERT INTO $config_table VALUES('"
203 		FTS_OPTIMIZE_LIMIT_IN_SECS  "', '180');\n"
204 	""
205 	"INSERT INTO $config_table VALUES ('"
206 		FTS_SYNCED_DOC_ID "', '0');\n"
207 	""
208 	"INSERT INTO $config_table VALUES ('"
209 		FTS_TOTAL_DELETED_COUNT "', '0');\n"
210 	"" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
211 	"INSERT INTO $config_table VALUES ('"
212 		FTS_TABLE_STATE "', '0');\n";
213 
214 /** FTS tokenize parmameter for plugin parser */
215 struct fts_tokenize_param_t {
216 	fts_doc_t*	result_doc;	/*!< Result doc for tokens */
217 	ulint		add_pos;	/*!< Added position for tokens */
218 };
219 
220 /** Run SYNC on the table, i.e., write out data from the cache to the
221 FTS auxiliary INDEX table and clear the cache at the end.
222 @param[in,out]	sync		sync state
223 @param[in]	unlock_cache	whether unlock cache lock when write node
224 @param[in]	wait		whether wait when a sync is in progress
225 @param[in]	has_dict_lock	whether has dict operation lock
226 @return DB_SUCCESS if all OK */
227 static
228 dberr_t
229 fts_sync(
230 	fts_sync_t*	sync,
231 	bool		unlock_cache,
232 	bool		wait,
233 	bool		has_dict_lock);
234 
235 /****************************************************************//**
236 Release all resources help by the words rb tree e.g., the node ilist. */
237 static
238 void
239 fts_words_free(
240 /*===========*/
241 	ib_rbt_t*	words)		/*!< in: rb tree of words */
242 	MY_ATTRIBUTE((nonnull));
243 #ifdef FTS_CACHE_SIZE_DEBUG
244 /****************************************************************//**
245 Read the max cache size parameter from the config table. */
246 static
247 void
248 fts_update_max_cache_size(
249 /*======================*/
250 	fts_sync_t*	sync);		/*!< in: sync state */
251 #endif
252 
253 /*********************************************************************//**
254 This function fetches the document just inserted right before
255 we commit the transaction, and tokenize the inserted text data
256 and insert into FTS auxiliary table and its cache.
257 @return TRUE if successful */
258 static
259 ulint
260 fts_add_doc_by_id(
261 /*==============*/
262 	fts_trx_table_t*ftt,		/*!< in: FTS trx table */
263 	doc_id_t	doc_id,		/*!< in: doc id */
264 	ib_vector_t*	fts_indexes MY_ATTRIBUTE((unused)));
265 					/*!< in: affected fts indexes */
266 #ifdef FTS_DOC_STATS_DEBUG
267 /****************************************************************//**
268 Check whether a particular word (term) exists in the FTS index.
269 @return DB_SUCCESS if all went fine */
270 static
271 dberr_t
272 fts_is_word_in_index(
273 /*=================*/
274 	trx_t*		trx,		/*!< in: FTS query state */
275 	que_t**		graph,		/*!< out: Query graph */
276 	fts_table_t*	fts_table,	/*!< in: table instance */
277 	const fts_string_t* word,	/*!< in: the word to check */
278 	ibool*		found)		/*!< out: TRUE if exists */
279 	MY_ATTRIBUTE((nonnull, warn_unused_result));
280 #endif /* FTS_DOC_STATS_DEBUG */
281 
282 /******************************************************************//**
283 Update the last document id. This function could create a new
284 transaction to update the last document id.
285 @return DB_SUCCESS if OK */
286 static
287 dberr_t
288 fts_update_sync_doc_id(
289 /*===================*/
290 	const dict_table_t*	table,		/*!< in: table */
291 	const char*		table_name,	/*!< in: table name, or NULL */
292 	doc_id_t		doc_id,		/*!< in: last document id */
293 	trx_t*			trx)		/*!< in: update trx, or NULL */
294 	MY_ATTRIBUTE((nonnull(1)));
295 
296 /** Get a character set based on precise type.
297 @param prtype precise type
298 @return the corresponding character set */
299 UNIV_INLINE
300 CHARSET_INFO*
fts_get_charset(ulint prtype)301 fts_get_charset(ulint prtype)
302 {
303 #ifdef UNIV_DEBUG
304 	switch (prtype & DATA_MYSQL_TYPE_MASK) {
305 	case MYSQL_TYPE_BIT:
306 	case MYSQL_TYPE_STRING:
307 	case MYSQL_TYPE_VAR_STRING:
308 	case MYSQL_TYPE_TINY_BLOB:
309 	case MYSQL_TYPE_MEDIUM_BLOB:
310 	case MYSQL_TYPE_BLOB:
311 	case MYSQL_TYPE_LONG_BLOB:
312 	case MYSQL_TYPE_VARCHAR:
313 		break;
314 	default:
315 		ut_error;
316 	}
317 #endif /* UNIV_DEBUG */
318 
319 	uint cs_num = (uint) dtype_get_charset_coll(prtype);
320 
321 	if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
322 		return(cs);
323 	}
324 
325 	ib::fatal() << "Unable to find charset-collation " << cs_num;
326 	return(NULL);
327 }
328 
329 /****************************************************************//**
330 This function loads the default InnoDB stopword list */
331 static
332 void
fts_load_default_stopword(fts_stopword_t * stopword_info)333 fts_load_default_stopword(
334 /*======================*/
335 	fts_stopword_t*		stopword_info)	/*!< in: stopword info */
336 {
337 	fts_string_t		str;
338 	mem_heap_t*		heap;
339 	ib_alloc_t*		allocator;
340 	ib_rbt_t*		stop_words;
341 
342 	allocator = stopword_info->heap;
343 	heap = static_cast<mem_heap_t*>(allocator->arg);
344 
345 	if (!stopword_info->cached_stopword) {
346 		stopword_info->cached_stopword = rbt_create_arg_cmp(
347 			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
348 			&my_charset_latin1);
349 	}
350 
351 	stop_words = stopword_info->cached_stopword;
352 
353 	str.f_n_char = 0;
354 
355 	for (ulint i = 0; fts_default_stopword[i]; ++i) {
356 		char*			word;
357 		fts_tokenizer_word_t	new_word;
358 
359 		/* We are going to duplicate the value below. */
360 		word = const_cast<char*>(fts_default_stopword[i]);
361 
362 		new_word.nodes = ib_vector_create(
363 			allocator, sizeof(fts_node_t), 4);
364 
365 		str.f_len = ut_strlen(word);
366 		str.f_str = reinterpret_cast<byte*>(word);
367 
368 		fts_string_dup(&new_word.text, &str, heap);
369 
370 		rbt_insert(stop_words, &new_word, &new_word);
371 	}
372 
373 	stopword_info->status = STOPWORD_FROM_DEFAULT;
374 }
375 
376 /****************************************************************//**
377 Callback function to read a single stopword value.
378 @return Always return TRUE */
379 static
380 ibool
fts_read_stopword(void * row,void * user_arg)381 fts_read_stopword(
382 /*==============*/
383 	void*		row,		/*!< in: sel_node_t* */
384 	void*		user_arg)	/*!< in: pointer to ib_vector_t */
385 {
386 	ib_alloc_t*	allocator;
387 	fts_stopword_t*	stopword_info;
388 	sel_node_t*	sel_node;
389 	que_node_t*	exp;
390 	ib_rbt_t*	stop_words;
391 	dfield_t*	dfield;
392 	fts_string_t	str;
393 	mem_heap_t*	heap;
394 	ib_rbt_bound_t	parent;
395 
396 	sel_node = static_cast<sel_node_t*>(row);
397 	stopword_info = static_cast<fts_stopword_t*>(user_arg);
398 
399 	stop_words = stopword_info->cached_stopword;
400 	allocator =  static_cast<ib_alloc_t*>(stopword_info->heap);
401 	heap = static_cast<mem_heap_t*>(allocator->arg);
402 
403 	exp = sel_node->select_list;
404 
405 	/* We only need to read the first column */
406 	dfield = que_node_get_val(exp);
407 
408 	str.f_n_char = 0;
409 	str.f_str = static_cast<byte*>(dfield_get_data(dfield));
410 	str.f_len = dfield_get_len(dfield);
411 
412 	/* Only create new node if it is a value not already existed */
413 	if (str.f_len != UNIV_SQL_NULL
414 	    && rbt_search(stop_words, &parent, &str) != 0) {
415 
416 		fts_tokenizer_word_t	new_word;
417 
418 		new_word.nodes = ib_vector_create(
419 			allocator, sizeof(fts_node_t), 4);
420 
421 		new_word.text.f_str = static_cast<byte*>(
422 			 mem_heap_alloc(heap, str.f_len + 1));
423 
424 		memcpy(new_word.text.f_str, str.f_str, str.f_len);
425 
426 		new_word.text.f_n_char = 0;
427 		new_word.text.f_len = str.f_len;
428 		new_word.text.f_str[str.f_len] = 0;
429 
430 		rbt_insert(stop_words, &new_word, &new_word);
431 	}
432 
433 	return(TRUE);
434 }
435 
436 /******************************************************************//**
437 Load user defined stopword from designated user table
438 @return TRUE if load operation is successful */
439 static
440 ibool
fts_load_user_stopword(fts_t * fts,const char * stopword_table_name,fts_stopword_t * stopword_info)441 fts_load_user_stopword(
442 /*===================*/
443 	fts_t*		fts,			/*!< in: FTS struct */
444 	const char*	stopword_table_name,	/*!< in: Stopword table
445 						name */
446 	fts_stopword_t*	stopword_info)		/*!< in: Stopword info */
447 {
448 	pars_info_t*	info;
449 	que_t*		graph;
450 	dberr_t		error = DB_SUCCESS;
451 	ibool		ret = TRUE;
452 	trx_t*		trx;
453 	ibool		has_lock = fts->fts_status & TABLE_DICT_LOCKED;
454 
455 	trx = trx_allocate_for_background();
456 	trx->op_info = "Load user stopword table into FTS cache";
457 
458 	if (!has_lock) {
459 		mutex_enter(&dict_sys->mutex);
460 	}
461 
462 	/* Validate the user table existence and in the right
463 	format */
464 	stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
465 	if (!stopword_info->charset) {
466 		ret = FALSE;
467 		goto cleanup;
468 	} else if (!stopword_info->cached_stopword) {
469 		/* Create the stopword RB tree with the stopword column
470 		charset. All comparison will use this charset */
471 		stopword_info->cached_stopword = rbt_create_arg_cmp(
472 			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
473 			stopword_info->charset);
474 
475 	}
476 
477 	info = pars_info_create();
478 
479 	pars_info_bind_id(info, TRUE, "table_stopword", stopword_table_name);
480 
481 	pars_info_bind_function(info, "my_func", fts_read_stopword,
482 				stopword_info);
483 
484 	graph = fts_parse_sql_no_dict_lock(
485 		NULL,
486 		info,
487 		"DECLARE FUNCTION my_func;\n"
488 		"DECLARE CURSOR c IS"
489 		" SELECT value"
490 		" FROM $table_stopword;\n"
491 		"BEGIN\n"
492 		"\n"
493 		"OPEN c;\n"
494 		"WHILE 1 = 1 LOOP\n"
495 		"  FETCH c INTO my_func();\n"
496 		"  IF c % NOTFOUND THEN\n"
497 		"    EXIT;\n"
498 		"  END IF;\n"
499 		"END LOOP;\n"
500 		"CLOSE c;");
501 
502 	for (;;) {
503 		error = fts_eval_sql(trx, graph);
504 
505 		if (error == DB_SUCCESS) {
506 			fts_sql_commit(trx);
507 			stopword_info->status = STOPWORD_USER_TABLE;
508 			break;
509 		} else {
510 
511 			fts_sql_rollback(trx);
512 
513 			if (error == DB_LOCK_WAIT_TIMEOUT) {
514 				ib::warn() << "Lock wait timeout reading user"
515 					" stopword table. Retrying!";
516 
517 				trx->error_state = DB_SUCCESS;
518 			} else {
519 				ib::error() << "Error '" << ut_strerr(error)
520 					<< "' while reading user stopword"
521 					" table.";
522 				ret = FALSE;
523 				break;
524 			}
525 		}
526 	}
527 
528 	que_graph_free(graph);
529 
530 cleanup:
531 	if (!has_lock) {
532 		mutex_exit(&dict_sys->mutex);
533 	}
534 
535 	trx_free_for_background(trx);
536 	return(ret);
537 }
538 
539 /******************************************************************//**
540 Initialize the index cache. */
541 static
542 void
fts_index_cache_init(ib_alloc_t * allocator,fts_index_cache_t * index_cache)543 fts_index_cache_init(
544 /*=================*/
545 	ib_alloc_t*		allocator,	/*!< in: the allocator to use */
546 	fts_index_cache_t*	index_cache)	/*!< in: index cache */
547 {
548 	ulint			i;
549 
550 	ut_a(index_cache->words == NULL);
551 
552 	index_cache->words = rbt_create_arg_cmp(
553 		sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
554 		index_cache->charset);
555 
556 	ut_a(index_cache->doc_stats == NULL);
557 
558 	index_cache->doc_stats = ib_vector_create(
559 		allocator, sizeof(fts_doc_stats_t), 4);
560 
561 	for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
562 		ut_a(index_cache->ins_graph[i] == NULL);
563 		ut_a(index_cache->sel_graph[i] == NULL);
564 	}
565 }
566 
567 /*********************************************************************//**
568 Initialize FTS cache. */
569 void
fts_cache_init(fts_cache_t * cache)570 fts_cache_init(
571 /*===========*/
572 	fts_cache_t*	cache)		/*!< in: cache to initialize */
573 {
574 	ulint		i;
575 
576 	/* Just to make sure */
577 	ut_a(cache->sync_heap->arg == NULL);
578 
579 	cache->sync_heap->arg = mem_heap_create(1024);
580 
581 	cache->total_size = 0;
582 
583 	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
584 	cache->deleted_doc_ids = ib_vector_create(
585 		cache->sync_heap, sizeof(fts_update_t), 4);
586 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
587 
588 	/* Reset the cache data for all the FTS indexes. */
589 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
590 		fts_index_cache_t*	index_cache;
591 
592 		index_cache = static_cast<fts_index_cache_t*>(
593 			ib_vector_get(cache->indexes, i));
594 
595 		fts_index_cache_init(cache->sync_heap, index_cache);
596 	}
597 }
598 
599 /****************************************************************//**
600 Create a FTS cache. */
601 fts_cache_t*
fts_cache_create(dict_table_t * table)602 fts_cache_create(
603 /*=============*/
604 	dict_table_t*	table)	/*!< in: table owns the FTS cache */
605 {
606 	mem_heap_t*	heap;
607 	fts_cache_t*	cache;
608 
609 	heap = static_cast<mem_heap_t*>(mem_heap_create(512));
610 
611 	cache = static_cast<fts_cache_t*>(
612 		mem_heap_zalloc(heap, sizeof(*cache)));
613 
614 	cache->cache_heap = heap;
615 
616 	rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
617 
618 	rw_lock_create(
619 		fts_cache_init_rw_lock_key, &cache->init_lock,
620 		SYNC_FTS_CACHE_INIT);
621 
622 	mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
623 
624 	mutex_create(LATCH_ID_FTS_OPTIMIZE, &cache->optimize_lock);
625 
626 	mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
627 
628 	/* This is the heap used to create the cache itself. */
629 	cache->self_heap = ib_heap_allocator_create(heap);
630 
631 	/* This is a transient heap, used for storing sync data. */
632 	cache->sync_heap = ib_heap_allocator_create(heap);
633 	cache->sync_heap->arg = NULL;
634 
635 	cache->sync = static_cast<fts_sync_t*>(
636 		mem_heap_zalloc(heap, sizeof(fts_sync_t)));
637 
638 	cache->sync->table = table;
639 	cache->sync->event = os_event_create(0);
640 
641 	/* Create the index cache vector that will hold the inverted indexes. */
642 	cache->indexes = ib_vector_create(
643 		cache->self_heap, sizeof(fts_index_cache_t), 2);
644 
645 	fts_cache_init(cache);
646 
647 	cache->stopword_info.cached_stopword = NULL;
648 	cache->stopword_info.charset = NULL;
649 
650 	cache->stopword_info.heap = cache->self_heap;
651 
652 	cache->stopword_info.status = STOPWORD_NOT_INIT;
653 
654 	return(cache);
655 }
656 
657 /*******************************************************************//**
658 Add a newly create index into FTS cache */
659 void
fts_add_index(dict_index_t * index,dict_table_t * table)660 fts_add_index(
661 /*==========*/
662 	dict_index_t*	index,		/*!< FTS index to be added */
663 	dict_table_t*	table)		/*!< table */
664 {
665 	fts_t*			fts = table->fts;
666 	fts_cache_t*		cache;
667 	fts_index_cache_t*	index_cache;
668 
669 	ut_ad(fts);
670 	cache = table->fts->cache;
671 
672 	rw_lock_x_lock(&cache->init_lock);
673 
674 	ib_vector_push(fts->indexes, &index);
675 
676 	index_cache = fts_find_index_cache(cache, index);
677 
678 	if (!index_cache) {
679 		/* Add new index cache structure */
680 		index_cache = fts_cache_index_cache_create(table, index);
681 	}
682 
683 	rw_lock_x_unlock(&cache->init_lock);
684 }
685 
686 /*******************************************************************//**
687 recalibrate get_doc structure after index_cache in cache->indexes changed */
688 static
689 void
fts_reset_get_doc(fts_cache_t * cache)690 fts_reset_get_doc(
691 /*==============*/
692 	fts_cache_t*	cache)	/*!< in: FTS index cache */
693 {
694 	fts_get_doc_t*  get_doc;
695 	ulint		i;
696 
697 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
698 
699 	ib_vector_reset(cache->get_docs);
700 
701 	for (i = 0; i < ib_vector_size(cache->indexes); i++) {
702 		fts_index_cache_t*	ind_cache;
703 
704 		ind_cache = static_cast<fts_index_cache_t*>(
705 			ib_vector_get(cache->indexes, i));
706 
707 		get_doc = static_cast<fts_get_doc_t*>(
708 			ib_vector_push(cache->get_docs, NULL));
709 
710 		memset(get_doc, 0x0, sizeof(*get_doc));
711 
712 		get_doc->index_cache = ind_cache;
713 	}
714 
715 	ut_ad(ib_vector_size(cache->get_docs)
716 	      == ib_vector_size(cache->indexes));
717 }
718 
719 /*******************************************************************//**
720 Check an index is in the table->indexes list
721 @return TRUE if it exists */
722 static
723 ibool
fts_in_dict_index(dict_table_t * table,dict_index_t * index_check)724 fts_in_dict_index(
725 /*==============*/
726 	dict_table_t*	table,		/*!< in: Table */
727 	dict_index_t*	index_check)	/*!< in: index to be checked */
728 {
729 	dict_index_t*	index;
730 
731 	for (index = dict_table_get_first_index(table);
732 	     index != NULL;
733 	     index = dict_table_get_next_index(index)) {
734 
735 		if (index == index_check) {
736 			return(TRUE);
737 		}
738 	}
739 
740 	return(FALSE);
741 }
742 
743 /*******************************************************************//**
744 Check an index is in the fts->cache->indexes list
745 @return TRUE if it exists */
746 static
747 ibool
fts_in_index_cache(dict_table_t * table,dict_index_t * index)748 fts_in_index_cache(
749 /*===============*/
750 	dict_table_t*	table,	/*!< in: Table */
751 	dict_index_t*	index)	/*!< in: index to be checked */
752 {
753 	ulint	i;
754 
755 	for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
756 		fts_index_cache_t*      index_cache;
757 
758 		index_cache = static_cast<fts_index_cache_t*>(
759 			ib_vector_get(table->fts->cache->indexes, i));
760 
761 		if (index_cache->index == index) {
762 			return(TRUE);
763 		}
764 	}
765 
766 	return(FALSE);
767 }
768 
769 /*******************************************************************//**
770 Check indexes in the fts->indexes is also present in index cache and
771 table->indexes list
772 @return TRUE if all indexes match */
773 ibool
fts_check_cached_index(dict_table_t * table)774 fts_check_cached_index(
775 /*===================*/
776 	dict_table_t*	table)	/*!< in: Table where indexes are dropped */
777 {
778 	ulint	i;
779 
780 	if (!table->fts || !table->fts->cache) {
781 		return(TRUE);
782 	}
783 
784 	ut_a(ib_vector_size(table->fts->indexes)
785 	      == ib_vector_size(table->fts->cache->indexes));
786 
787 	for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
788 		dict_index_t*	index;
789 
790 		index = static_cast<dict_index_t*>(
791 			ib_vector_getp(table->fts->indexes, i));
792 
793 		if (!fts_in_index_cache(table, index)) {
794 			return(FALSE);
795 		}
796 
797 		if (!fts_in_dict_index(table, index)) {
798 			return(FALSE);
799 		}
800 	}
801 
802 	return(TRUE);
803 }
804 
805 /*******************************************************************//**
806 Drop auxiliary tables related to an FTS index
807 @return DB_SUCCESS or error number */
808 dberr_t
fts_drop_index(dict_table_t * table,dict_index_t * index,trx_t * trx)809 fts_drop_index(
810 /*===========*/
811 	dict_table_t*	table,	/*!< in: Table where indexes are dropped */
812 	dict_index_t*	index,	/*!< in: Index to be dropped */
813 	trx_t*		trx)	/*!< in: Transaction for the drop */
814 {
815 	ib_vector_t*	indexes = table->fts->indexes;
816 	dberr_t		err = DB_SUCCESS;
817 
818 	ut_a(indexes);
819 
820 	if ((ib_vector_size(indexes) == 1
821 	    && (index == static_cast<dict_index_t*>(
822 			ib_vector_getp(table->fts->indexes, 0))))
823 	   || ib_vector_is_empty(indexes)) {
824 		doc_id_t	current_doc_id;
825 		doc_id_t	first_doc_id;
826 
827 		/* If we are dropping the only FTS index of the table,
828 		remove it from optimize thread */
829 		fts_optimize_remove_table(table);
830 
831 		DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
832 
833 		/* If Doc ID column is not added internally by FTS index,
834 		we can drop all FTS auxiliary tables. Otherwise, we will
835 		need to keep some common table such as CONFIG table, so
836 		as to keep track of incrementing Doc IDs */
837 		if (!DICT_TF2_FLAG_IS_SET(
838 			table, DICT_TF2_FTS_HAS_DOC_ID)) {
839 
840 			err = fts_drop_tables(trx, table);
841 
842 			err = fts_drop_index_tables(trx, index);
843 
844 			while (index->index_fts_syncing
845 				&& !trx_is_interrupted(trx)) {
846 				DICT_BG_YIELD(trx);
847 			}
848 
849 			fts_free(table);
850 
851 			return(err);
852 		}
853 
854 		while (index->index_fts_syncing
855 			&& !trx_is_interrupted(trx)) {
856 			DICT_BG_YIELD(trx);
857 		}
858 
859 		current_doc_id = table->fts->cache->next_doc_id;
860 		first_doc_id = table->fts->cache->first_doc_id;
861 		fts_cache_clear(table->fts->cache);
862 		fts_cache_destroy(table->fts->cache);
863 		table->fts->cache = fts_cache_create(table);
864 		table->fts->cache->next_doc_id = current_doc_id;
865 		table->fts->cache->first_doc_id = first_doc_id;
866 
867 	} else {
868 		fts_cache_t*            cache = table->fts->cache;
869 		fts_index_cache_t*      index_cache;
870 
871 		rw_lock_x_lock(&cache->init_lock);
872 
873 		index_cache = fts_find_index_cache(cache, index);
874 
875 		if (index_cache != NULL) {
876 			while (index->index_fts_syncing
877 				&& !trx_is_interrupted(trx)) {
878 				DICT_BG_YIELD(trx);
879 			}
880 
881 			if (index_cache->words) {
882 				fts_words_free(index_cache->words);
883 				rbt_free(index_cache->words);
884 			}
885 
886 			ib_vector_remove(cache->indexes, *(void**) index_cache);
887 		}
888 
889 		if (cache->get_docs) {
890 			fts_reset_get_doc(cache);
891 		}
892 
893 		rw_lock_x_unlock(&cache->init_lock);
894 	}
895 
896 	err = fts_drop_index_tables(trx, index);
897 
898 	ib_vector_remove(indexes, (const void*) index);
899 
900 	return(err);
901 }
902 
903 /****************************************************************//**
904 Free the query graph but check whether dict_sys->mutex is already
905 held */
906 void
fts_que_graph_free_check_lock(fts_table_t * fts_table,const fts_index_cache_t * index_cache,que_t * graph)907 fts_que_graph_free_check_lock(
908 /*==========================*/
909 	fts_table_t*		fts_table,	/*!< in: FTS table */
910 	const fts_index_cache_t*index_cache,	/*!< in: FTS index cache */
911 	que_t*			graph)		/*!< in: query graph */
912 {
913 	ibool	has_dict = FALSE;
914 
915 	if (fts_table && fts_table->table) {
916 		ut_ad(fts_table->table->fts);
917 
918 		has_dict = fts_table->table->fts->fts_status
919 			 & TABLE_DICT_LOCKED;
920 	} else if (index_cache) {
921 		ut_ad(index_cache->index->table->fts);
922 
923 		has_dict = index_cache->index->table->fts->fts_status
924 			 & TABLE_DICT_LOCKED;
925 	}
926 
927 	if (!has_dict) {
928 		mutex_enter(&dict_sys->mutex);
929 	}
930 
931 	ut_ad(mutex_own(&dict_sys->mutex));
932 
933 	que_graph_free(graph);
934 
935 	if (!has_dict) {
936 		mutex_exit(&dict_sys->mutex);
937 	}
938 }
939 
940 /****************************************************************//**
941 Create an FTS index cache. */
942 CHARSET_INFO*
fts_index_get_charset(dict_index_t * index)943 fts_index_get_charset(
944 /*==================*/
945 	dict_index_t*		index)		/*!< in: FTS index */
946 {
947 	CHARSET_INFO*		charset = NULL;
948 	dict_field_t*		field;
949 	ulint			prtype;
950 
951 	field = dict_index_get_nth_field(index, 0);
952 	prtype = field->col->prtype;
953 
954 	charset = fts_get_charset(prtype);
955 
956 #ifdef FTS_DEBUG
957 	/* Set up charset info for this index. Please note all
958 	field of the FTS index should have the same charset */
959 	for (i = 1; i < index->n_fields; i++) {
960 		CHARSET_INFO*   fld_charset;
961 
962 		field = dict_index_get_nth_field(index, i);
963 		prtype = field->col->prtype;
964 
965 		fld_charset = fts_get_charset(prtype);
966 
967 		/* All FTS columns should have the same charset */
968 		if (charset) {
969 			ut_a(charset == fld_charset);
970 		} else {
971 			charset = fld_charset;
972 		}
973 	}
974 #endif
975 
976 	return(charset);
977 
978 }
979 /****************************************************************//**
980 Create an FTS index cache.
981 @return Index Cache */
982 fts_index_cache_t*
fts_cache_index_cache_create(dict_table_t * table,dict_index_t * index)983 fts_cache_index_cache_create(
984 /*=========================*/
985 	dict_table_t*		table,		/*!< in: table with FTS index */
986 	dict_index_t*		index)		/*!< in: FTS index */
987 {
988 	ulint			n_bytes;
989 	fts_index_cache_t*	index_cache;
990 	fts_cache_t*		cache = table->fts->cache;
991 
992 	ut_a(cache != NULL);
993 
994 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
995 
996 	/* Must not already exist in the cache vector. */
997 	ut_a(fts_find_index_cache(cache, index) == NULL);
998 
999 	index_cache = static_cast<fts_index_cache_t*>(
1000 		ib_vector_push(cache->indexes, NULL));
1001 
1002 	memset(index_cache, 0x0, sizeof(*index_cache));
1003 
1004 	index_cache->index = index;
1005 
1006 	index_cache->charset = fts_index_get_charset(index);
1007 
1008 	n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX;
1009 
1010 	index_cache->ins_graph = static_cast<que_t**>(
1011 		mem_heap_zalloc(static_cast<mem_heap_t*>(
1012 			cache->self_heap->arg), n_bytes));
1013 
1014 	index_cache->sel_graph = static_cast<que_t**>(
1015 		mem_heap_zalloc(static_cast<mem_heap_t*>(
1016 			cache->self_heap->arg), n_bytes));
1017 
1018 	fts_index_cache_init(cache->sync_heap, index_cache);
1019 
1020 	if (cache->get_docs) {
1021 		fts_reset_get_doc(cache);
1022 	}
1023 
1024 	return(index_cache);
1025 }
1026 
1027 /****************************************************************//**
1028 Release all resources help by the words rb tree e.g., the node ilist. */
1029 static
1030 void
fts_words_free(ib_rbt_t * words)1031 fts_words_free(
1032 /*===========*/
1033 	ib_rbt_t*	words)			/*!< in: rb tree of words */
1034 {
1035 	const ib_rbt_node_t*	rbt_node;
1036 
1037 	/* Free the resources held by a word. */
1038 	for (rbt_node = rbt_first(words);
1039 	     rbt_node != NULL;
1040 	     rbt_node = rbt_first(words)) {
1041 
1042 		ulint			i;
1043 		fts_tokenizer_word_t*	word;
1044 
1045 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
1046 
1047 		/* Free the ilists of this word. */
1048 		for (i = 0; i < ib_vector_size(word->nodes); ++i) {
1049 
1050 			fts_node_t* fts_node = static_cast<fts_node_t*>(
1051 				ib_vector_get(word->nodes, i));
1052 
1053 			ut_free(fts_node->ilist);
1054 			fts_node->ilist = NULL;
1055 		}
1056 
1057 		/* NOTE: We are responsible for free'ing the node */
1058 		ut_free(rbt_remove_node(words, rbt_node));
1059 	}
1060 }
1061 
1062 /** Clear cache.
1063 @param[in,out]	cache	fts cache */
1064 void
fts_cache_clear(fts_cache_t * cache)1065 fts_cache_clear(
1066 	fts_cache_t*	cache)
1067 {
1068 	ulint		i;
1069 
1070 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1071 		ulint			j;
1072 		fts_index_cache_t*	index_cache;
1073 
1074 		index_cache = static_cast<fts_index_cache_t*>(
1075 			ib_vector_get(cache->indexes, i));
1076 
1077 		fts_words_free(index_cache->words);
1078 
1079 		rbt_free(index_cache->words);
1080 
1081 		index_cache->words = NULL;
1082 
1083 		for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1084 
1085 			if (index_cache->ins_graph[j] != NULL) {
1086 
1087 				fts_que_graph_free_check_lock(
1088 					NULL, index_cache,
1089 					index_cache->ins_graph[j]);
1090 
1091 				index_cache->ins_graph[j] = NULL;
1092 			}
1093 
1094 			if (index_cache->sel_graph[j] != NULL) {
1095 
1096 				fts_que_graph_free_check_lock(
1097 					NULL, index_cache,
1098 					index_cache->sel_graph[j]);
1099 
1100 				index_cache->sel_graph[j] = NULL;
1101 			}
1102 		}
1103 
1104 		index_cache->doc_stats = NULL;
1105 	}
1106 
1107 	mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1108 	cache->sync_heap->arg = NULL;
1109 
1110 	fts_need_sync = false;
1111 
1112 	cache->total_size = 0;
1113 
1114 	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
1115 	cache->deleted_doc_ids = NULL;
1116 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
1117 }
1118 
1119 /*********************************************************************//**
1120 Search the index specific cache for a particular FTS index.
1121 @return the index cache else NULL */
1122 UNIV_INLINE
1123 fts_index_cache_t*
fts_get_index_cache(fts_cache_t * cache,const dict_index_t * index)1124 fts_get_index_cache(
1125 /*================*/
1126 	fts_cache_t*		cache,		/*!< in: cache to search */
1127 	const dict_index_t*	index)		/*!< in: index to search for */
1128 {
1129 	ulint			i;
1130 
1131 	ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_X)
1132 	      || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1133 
1134 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1135 		fts_index_cache_t*	index_cache;
1136 
1137 		index_cache = static_cast<fts_index_cache_t*>(
1138 			ib_vector_get(cache->indexes, i));
1139 
1140 		if (index_cache->index == index) {
1141 
1142 			return(index_cache);
1143 		}
1144 	}
1145 
1146 	return(NULL);
1147 }
1148 
1149 #ifdef FTS_DEBUG
1150 /*********************************************************************//**
1151 Search the index cache for a get_doc structure.
1152 @return the fts_get_doc_t item else NULL */
1153 static
1154 fts_get_doc_t*
fts_get_index_get_doc(fts_cache_t * cache,const dict_index_t * index)1155 fts_get_index_get_doc(
1156 /*==================*/
1157 	fts_cache_t*		cache,		/*!< in: cache to search */
1158 	const dict_index_t*	index)		/*!< in: index to search for */
1159 {
1160 	ulint			i;
1161 
1162 	ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1163 
1164 	for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
1165 		fts_get_doc_t*	get_doc;
1166 
1167 		get_doc = static_cast<fts_get_doc_t*>(
1168 			ib_vector_get(cache->get_docs, i));
1169 
1170 		if (get_doc->index_cache->index == index) {
1171 
1172 			return(get_doc);
1173 		}
1174 	}
1175 
1176 	return(NULL);
1177 }
1178 #endif
1179 
1180 /**********************************************************************//**
1181 Free the FTS cache. */
1182 void
fts_cache_destroy(fts_cache_t * cache)1183 fts_cache_destroy(
1184 /*==============*/
1185 	fts_cache_t*	cache)			/*!< in: cache*/
1186 {
1187 	rw_lock_free(&cache->lock);
1188 	rw_lock_free(&cache->init_lock);
1189 	mutex_free(&cache->optimize_lock);
1190 	mutex_free(&cache->deleted_lock);
1191 	mutex_free(&cache->doc_id_lock);
1192 	os_event_destroy(cache->sync->event);
1193 
1194 	if (cache->stopword_info.cached_stopword) {
1195 		rbt_free(cache->stopword_info.cached_stopword);
1196 	}
1197 
1198 	if (cache->sync_heap->arg) {
1199 		mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1200 	}
1201 
1202 	mem_heap_free(cache->cache_heap);
1203 }
1204 
1205 /**********************************************************************//**
1206 Find an existing word, or if not found, create one and return it.
1207 @return specified word token */
1208 static
1209 fts_tokenizer_word_t*
fts_tokenizer_word_get(fts_cache_t * cache,fts_index_cache_t * index_cache,fts_string_t * text)1210 fts_tokenizer_word_get(
1211 /*===================*/
1212 	fts_cache_t*	cache,			/*!< in: cache */
1213 	fts_index_cache_t*
1214 			index_cache,		/*!< in: index cache */
1215 	fts_string_t*	text)			/*!< in: node text */
1216 {
1217 	fts_tokenizer_word_t*	word;
1218 	ib_rbt_bound_t		parent;
1219 
1220 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1221 
1222 	/* If it is a stopword, do not index it */
1223 	if (!fts_check_token(text,
1224 		    cache->stopword_info.cached_stopword,
1225 		    index_cache->index->is_ngram,
1226 		    index_cache->charset)) {
1227 
1228 		return(NULL);
1229 	}
1230 
1231 	/* Check if we found a match, if not then add word to tree. */
1232 	if (rbt_search(index_cache->words, &parent, text) != 0) {
1233 		mem_heap_t*		heap;
1234 		fts_tokenizer_word_t	new_word;
1235 
1236 		heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
1237 
1238 		new_word.nodes = ib_vector_create(
1239 			cache->sync_heap, sizeof(fts_node_t), 4);
1240 
1241 		fts_string_dup(&new_word.text, text, heap);
1242 
1243 		parent.last = rbt_add_node(
1244 			index_cache->words, &parent, &new_word);
1245 
1246 		/* Take into account the RB tree memory use and the vector. */
1247 		cache->total_size += sizeof(new_word)
1248 			+ sizeof(ib_rbt_node_t)
1249 			+ text->f_len
1250 			+ (sizeof(fts_node_t) * 4)
1251 			+ sizeof(*new_word.nodes);
1252 
1253 		ut_ad(rbt_validate(index_cache->words));
1254 	}
1255 
1256 	word = rbt_value(fts_tokenizer_word_t, parent.last);
1257 
1258 	return(word);
1259 }
1260 
1261 /**********************************************************************//**
1262 Add the given doc_id/word positions to the given node's ilist. */
1263 void
fts_cache_node_add_positions(fts_cache_t * cache,fts_node_t * node,doc_id_t doc_id,ib_vector_t * positions)1264 fts_cache_node_add_positions(
1265 /*=========================*/
1266 	fts_cache_t*	cache,		/*!< in: cache */
1267 	fts_node_t*	node,		/*!< in: word node */
1268 	doc_id_t	doc_id,		/*!< in: doc id */
1269 	ib_vector_t*	positions)	/*!< in: fts_token_t::positions */
1270 {
1271 	ulint		i;
1272 	byte*		ptr;
1273 	byte*		ilist;
1274 	ulint		enc_len;
1275 	ulint		last_pos;
1276 	byte*		ptr_start;
1277 	ulint		doc_id_delta;
1278 
1279 #ifdef UNIV_DEBUG
1280 	if (cache) {
1281 		ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1282 	}
1283 #endif /* UNIV_DEBUG */
1284 
1285 	ut_ad(doc_id >= node->last_doc_id);
1286 
1287 	/* Calculate the space required to store the ilist. */
1288 	doc_id_delta = (ulint)(doc_id - node->last_doc_id);
1289 	enc_len = fts_get_encoded_len(doc_id_delta);
1290 
1291 	last_pos = 0;
1292 	for (i = 0; i < ib_vector_size(positions); i++) {
1293 		ulint	pos = *(static_cast<ulint*>(
1294 			ib_vector_get(positions, i)));
1295 
1296 		ut_ad(last_pos == 0 || pos > last_pos);
1297 
1298 		enc_len += fts_get_encoded_len(pos - last_pos);
1299 		last_pos = pos;
1300 	}
1301 
1302 	/* The 0x00 byte at the end of the token positions list. */
1303 	enc_len++;
1304 
1305 	if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1306 		/* No need to allocate more space, we can fit in the new
1307 		data at the end of the old one. */
1308 		ilist = NULL;
1309 		ptr = node->ilist + node->ilist_size;
1310 	} else {
1311 		ulint	new_size = node->ilist_size + enc_len;
1312 
1313 		/* Over-reserve space by a fixed size for small lengths and
1314 		by 20% for lengths >= 48 bytes. */
1315 		if (new_size < 16) {
1316 			new_size = 16;
1317 		} else if (new_size < 32) {
1318 			new_size = 32;
1319 		} else if (new_size < 48) {
1320 			new_size = 48;
1321 		} else {
1322 			new_size = (ulint)(1.2 * new_size);
1323 		}
1324 
1325 		ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
1326 		ptr = ilist + node->ilist_size;
1327 
1328 		node->ilist_size_alloc = new_size;
1329 	}
1330 
1331 	ptr_start = ptr;
1332 
1333 	/* Encode the new fragment. */
1334 	ptr += fts_encode_int(doc_id_delta, ptr);
1335 
1336 	last_pos = 0;
1337 	for (i = 0; i < ib_vector_size(positions); i++) {
1338 		ulint	pos = *(static_cast<ulint*>(
1339 			 ib_vector_get(positions, i)));
1340 
1341 		ptr += fts_encode_int(pos - last_pos, ptr);
1342 		last_pos = pos;
1343 	}
1344 
1345 	*ptr++ = 0;
1346 
1347 	ut_a(enc_len == (ulint)(ptr - ptr_start));
1348 
1349 	if (ilist) {
1350 		/* Copy old ilist to the start of the new one and switch the
1351 		new one into place in the node. */
1352 		if (node->ilist_size > 0) {
1353 			memcpy(ilist, node->ilist, node->ilist_size);
1354 			ut_free(node->ilist);
1355 		}
1356 
1357 		node->ilist = ilist;
1358 	}
1359 
1360 	node->ilist_size += enc_len;
1361 
1362 	if (cache) {
1363 		cache->total_size += enc_len;
1364 	}
1365 
1366 	if (node->first_doc_id == FTS_NULL_DOC_ID) {
1367 		node->first_doc_id = doc_id;
1368 	}
1369 
1370 	node->last_doc_id = doc_id;
1371 	++node->doc_count;
1372 }
1373 
1374 /**********************************************************************//**
1375 Add document to the cache. */
1376 static
1377 void
fts_cache_add_doc(fts_cache_t * cache,fts_index_cache_t * index_cache,doc_id_t doc_id,ib_rbt_t * tokens)1378 fts_cache_add_doc(
1379 /*==============*/
1380 	fts_cache_t*	cache,			/*!< in: cache */
1381 	fts_index_cache_t*
1382 			index_cache,		/*!< in: index cache */
1383 	doc_id_t	doc_id,			/*!< in: doc id to add */
1384 	ib_rbt_t*	tokens)			/*!< in: document tokens */
1385 {
1386 	const ib_rbt_node_t*	node;
1387 	ulint			n_words;
1388 	fts_doc_stats_t*	doc_stats;
1389 
1390 	if (!tokens) {
1391 		return;
1392 	}
1393 
1394 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1395 
1396 	n_words = rbt_size(tokens);
1397 
1398 	for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1399 
1400 		fts_tokenizer_word_t*	word;
1401 		fts_node_t*		fts_node = NULL;
1402 		fts_token_t*		token = rbt_value(fts_token_t, node);
1403 
1404 		/* Find and/or add token to the cache. */
1405 		word = fts_tokenizer_word_get(
1406 			cache, index_cache, &token->text);
1407 
1408 		if (!word) {
1409 			ut_free(rbt_remove_node(tokens, node));
1410 			continue;
1411 		}
1412 
1413 		if (ib_vector_size(word->nodes) > 0) {
1414 			fts_node = static_cast<fts_node_t*>(
1415 				ib_vector_last(word->nodes));
1416 		}
1417 
1418 		if (fts_node == NULL || fts_node->synced
1419 		    || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
1420 		    || doc_id < fts_node->last_doc_id) {
1421 
1422 			fts_node = static_cast<fts_node_t*>(
1423 				ib_vector_push(word->nodes, NULL));
1424 
1425 			memset(fts_node, 0x0, sizeof(*fts_node));
1426 
1427 			cache->total_size += sizeof(*fts_node);
1428 		}
1429 
1430 		fts_cache_node_add_positions(
1431 			cache, fts_node, doc_id, token->positions);
1432 
1433 		ut_free(rbt_remove_node(tokens, node));
1434 	}
1435 
1436 	ut_a(rbt_empty(tokens));
1437 
1438 	/* Add to doc ids processed so far. */
1439 	doc_stats = static_cast<fts_doc_stats_t*>(
1440 		ib_vector_push(index_cache->doc_stats, NULL));
1441 
1442 	doc_stats->doc_id = doc_id;
1443 	doc_stats->word_count = n_words;
1444 
1445 	/* Add the doc stats memory usage too. */
1446 	cache->total_size += sizeof(*doc_stats);
1447 
1448 	if (doc_id > cache->sync->max_doc_id) {
1449 		cache->sync->max_doc_id = doc_id;
1450 	}
1451 }
1452 
1453 /****************************************************************//**
1454 Drops a table. If the table can't be found we return a SUCCESS code.
1455 @return DB_SUCCESS or error code */
1456 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1457 dberr_t
fts_drop_table(trx_t * trx,const char * table_name)1458 fts_drop_table(
1459 /*===========*/
1460 	trx_t*		trx,			/*!< in: transaction */
1461 	const char*	table_name)		/*!< in: table to drop */
1462 {
1463 	dict_table_t*	table;
1464 	dberr_t		error = DB_SUCCESS;
1465 
1466 	/* Check that the table exists in our data dictionary.
1467 	Similar to regular drop table case, we will open table with
1468 	DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1469 	table = dict_table_open_on_name(
1470 		table_name, TRUE, FALSE,
1471 		static_cast<dict_err_ignore_t>(
1472                         DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
1473 
1474 	if (table != 0) {
1475 
1476 		dict_table_close(table, TRUE, FALSE);
1477 
1478 		/* Pass nonatomic=false (dont allow data dict unlock),
1479 		because the transaction may hold locks on SYS_* tables from
1480 		previous calls to fts_drop_table(). */
1481 		error = row_drop_table_for_mysql(table_name, trx, true, false);
1482 
1483 		if (error != DB_SUCCESS) {
1484 			ib::error() << "Unable to drop FTS index aux table "
1485 				<< table_name << ": " << ut_strerr(error);
1486 		}
1487 	} else {
1488 		error = DB_FAIL;
1489 	}
1490 
1491 	return(error);
1492 }
1493 
1494 /****************************************************************//**
1495 Rename a single auxiliary table due to database name change.
1496 @return DB_SUCCESS or error code */
1497 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1498 dberr_t
fts_rename_one_aux_table(const char * new_name,const char * fts_table_old_name,trx_t * trx)1499 fts_rename_one_aux_table(
1500 /*=====================*/
1501 	const char*	new_name,		/*!< in: new parent tbl name */
1502 	const char*	fts_table_old_name,	/*!< in: old aux tbl name */
1503 	trx_t*		trx)			/*!< in: transaction */
1504 {
1505 	char	fts_table_new_name[MAX_TABLE_NAME_LEN];
1506 	ulint	new_db_name_len = dict_get_db_name_len(new_name);
1507 	ulint	old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1508 	ulint	table_new_name_len = strlen(fts_table_old_name)
1509 				     + new_db_name_len - old_db_name_len;
1510 
1511 	/* Check if the new and old database names are the same, if so,
1512 	nothing to do */
1513 	ut_ad((new_db_name_len != old_db_name_len)
1514 	      || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1515 
1516 	/* Get the database name from "new_name", and table name
1517 	from the fts_table_old_name */
1518 	strncpy(fts_table_new_name, new_name, new_db_name_len);
1519 	strncpy(fts_table_new_name + new_db_name_len,
1520 	       strchr(fts_table_old_name, '/'),
1521 	       table_new_name_len - new_db_name_len);
1522 	fts_table_new_name[table_new_name_len] = 0;
1523 
1524 	return(row_rename_table_for_mysql(
1525 		fts_table_old_name, fts_table_new_name, trx, false));
1526 }
1527 
1528 /****************************************************************//**
1529 Rename auxiliary tables for all fts index for a table. This(rename)
1530 is due to database name change
1531 @return DB_SUCCESS or error code */
1532 dberr_t
fts_rename_aux_tables(dict_table_t * table,const char * new_name,trx_t * trx)1533 fts_rename_aux_tables(
1534 /*==================*/
1535 	dict_table_t*	table,		/*!< in: user Table */
1536 	const char*     new_name,       /*!< in: new table name */
1537 	trx_t*		trx)		/*!< in: transaction */
1538 {
1539 	ulint		i;
1540 	fts_table_t	fts_table;
1541 
1542 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1543 
1544 	/* Rename common auxiliary tables */
1545 	for (i = 0; fts_common_tables[i] != NULL; ++i) {
1546 		char	old_table_name[MAX_FULL_NAME_LEN];
1547 		dberr_t	err = DB_SUCCESS;
1548 
1549 		fts_table.suffix = fts_common_tables[i];
1550 
1551 		fts_get_table_name(&fts_table, old_table_name);
1552 
1553 		err = fts_rename_one_aux_table(new_name, old_table_name, trx);
1554 
1555 		if (err != DB_SUCCESS) {
1556 			return(err);
1557 		}
1558 	}
1559 
1560 	fts_t*	fts = table->fts;
1561 
1562 	/* Rename index specific auxiliary tables */
1563 	for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1564 	     ++i) {
1565 		dict_index_t*	index;
1566 
1567 		index = static_cast<dict_index_t*>(
1568 			ib_vector_getp(fts->indexes, i));
1569 
1570 		FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1571 
1572 		for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1573 			dberr_t	err;
1574 			char	old_table_name[MAX_FULL_NAME_LEN];
1575 
1576 			fts_table.suffix = fts_get_suffix(j);
1577 
1578 			fts_get_table_name(&fts_table, old_table_name);
1579 
1580 			err = fts_rename_one_aux_table(
1581 				new_name, old_table_name, trx);
1582 
1583 			DBUG_EXECUTE_IF("fts_rename_failure",
1584 					err = DB_DEADLOCK;
1585 					fts_sql_rollback(trx););
1586 
1587 			if (err != DB_SUCCESS) {
1588 				return(err);
1589 			}
1590 		}
1591 	}
1592 
1593 	return(DB_SUCCESS);
1594 }
1595 
1596 /****************************************************************//**
1597 Drops the common ancillary tables needed for supporting an FTS index
1598 on the given table. row_mysql_lock_data_dictionary must have been called
1599 before this.
1600 @return DB_SUCCESS or error code */
1601 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1602 dberr_t
fts_drop_common_tables(trx_t * trx,fts_table_t * fts_table)1603 fts_drop_common_tables(
1604 /*===================*/
1605 	trx_t*		trx,			/*!< in: transaction */
1606 	fts_table_t*	fts_table)		/*!< in: table with an FTS
1607 						index */
1608 {
1609 	ulint		i;
1610 	dberr_t		error = DB_SUCCESS;
1611 
1612 	for (i = 0; fts_common_tables[i] != NULL; ++i) {
1613 		dberr_t	err;
1614 		char	table_name[MAX_FULL_NAME_LEN];
1615 
1616 		fts_table->suffix = fts_common_tables[i];
1617 
1618 		fts_get_table_name(fts_table, table_name);
1619 
1620 		err = fts_drop_table(trx, table_name);
1621 
1622 		/* We only return the status of the last error. */
1623 		if (err != DB_SUCCESS && err != DB_FAIL) {
1624 			error = err;
1625 		}
1626 	}
1627 
1628 	return(error);
1629 }
1630 
1631 /****************************************************************//**
1632 Since we do a horizontal split on the index table, we need to drop
1633 all the split tables.
1634 @return DB_SUCCESS or error code */
1635 dberr_t
fts_drop_index_split_tables(trx_t * trx,dict_index_t * index)1636 fts_drop_index_split_tables(
1637 /*========================*/
1638 	trx_t*		trx,			/*!< in: transaction */
1639 	dict_index_t*	index)			/*!< in: fts instance */
1640 
1641 {
1642 	ulint		i;
1643 	fts_table_t	fts_table;
1644 	dberr_t		error = DB_SUCCESS;
1645 
1646 	FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1647 
1648 	for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1649 		dberr_t	err;
1650 		char	table_name[MAX_FULL_NAME_LEN];
1651 
1652 		fts_table.suffix = fts_get_suffix(i);
1653 
1654 		fts_get_table_name(&fts_table, table_name);
1655 
1656 		err = fts_drop_table(trx, table_name);
1657 
1658 		/* We only return the status of the last error. */
1659 		if (err != DB_SUCCESS && err != DB_FAIL) {
1660 			error = err;
1661 		}
1662 	}
1663 
1664 	return(error);
1665 }
1666 
1667 /****************************************************************//**
1668 Drops FTS auxiliary tables for an FTS index
1669 @return DB_SUCCESS or error code */
1670 dberr_t
fts_drop_index_tables(trx_t * trx,dict_index_t * index)1671 fts_drop_index_tables(
1672 /*==================*/
1673 	trx_t*		trx,		/*!< in: transaction */
1674 	dict_index_t*	index)		/*!< in: Index to drop */
1675 {
1676 	dberr_t			error = DB_SUCCESS;
1677 
1678 #ifdef FTS_DOC_STATS_DEBUG
1679 	fts_table_t		fts_table;
1680 	static const char*	index_tables[] = {
1681 		"DOC_ID",
1682 		NULL
1683 	};
1684 #endif /* FTS_DOC_STATS_DEBUG */
1685 
1686 	dberr_t	err = fts_drop_index_split_tables(trx, index);
1687 
1688 	/* We only return the status of the last error. */
1689 	if (err != DB_SUCCESS) {
1690 		error = err;
1691 	}
1692 
1693 #ifdef FTS_DOC_STATS_DEBUG
1694 	FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1695 
1696 	for (ulint i = 0; index_tables[i] != NULL; ++i) {
1697 		char	table_name[MAX_FULL_NAME_LEN];
1698 
1699 		fts_table.suffix = index_tables[i];
1700 
1701 		fts_get_table_name(&fts_table, table_name);
1702 
1703 		err = fts_drop_table(trx, table_name);
1704 
1705 		/* We only return the status of the last error. */
1706 		if (err != DB_SUCCESS && err != DB_FAIL) {
1707 			error = err;
1708 		}
1709 	}
1710 #endif /* FTS_DOC_STATS_DEBUG */
1711 
1712 	return(error);
1713 }
1714 
1715 /****************************************************************//**
1716 Drops FTS ancillary tables needed for supporting an FTS index
1717 on the given table. row_mysql_lock_data_dictionary must have been called
1718 before this.
1719 @return DB_SUCCESS or error code */
1720 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1721 dberr_t
fts_drop_all_index_tables(trx_t * trx,fts_t * fts)1722 fts_drop_all_index_tables(
1723 /*======================*/
1724 	trx_t*		trx,			/*!< in: transaction */
1725 	fts_t*		fts)			/*!< in: fts instance */
1726 {
1727 	dberr_t		error = DB_SUCCESS;
1728 
1729 	for (ulint i = 0;
1730 	     fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1731 	     ++i) {
1732 
1733 		dberr_t		err;
1734 		dict_index_t*	index;
1735 
1736 		index = static_cast<dict_index_t*>(
1737 			ib_vector_getp(fts->indexes, i));
1738 
1739 		err = fts_drop_index_tables(trx, index);
1740 
1741 		if (err != DB_SUCCESS) {
1742 			error = err;
1743 		}
1744 	}
1745 
1746 	return(error);
1747 }
1748 
1749 /*********************************************************************//**
1750 Drops the ancillary tables needed for supporting an FTS index on a
1751 given table. row_mysql_lock_data_dictionary must have been called before
1752 this.
1753 @return DB_SUCCESS or error code */
1754 dberr_t
fts_drop_tables(trx_t * trx,dict_table_t * table)1755 fts_drop_tables(
1756 /*============*/
1757 	trx_t*		trx,		/*!< in: transaction */
1758 	dict_table_t*	table)		/*!< in: table has the FTS index */
1759 {
1760 	dberr_t		error;
1761 	fts_table_t	fts_table;
1762 
1763 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1764 
1765 	/* TODO: This is not atomic and can cause problems during recovery. */
1766 
1767 	error = fts_drop_common_tables(trx, &fts_table);
1768 
1769 	if (error == DB_SUCCESS) {
1770 		error = fts_drop_all_index_tables(trx, table->fts);
1771 	}
1772 
1773 	return(error);
1774 }
1775 
1776 /** Extract only the required flags from table->flags2 for FTS Aux
1777 tables.
1778 @param[in]	in_flags2	Table flags2
1779 @return extracted flags2 for FTS aux tables */
1780 static inline
1781 ulint
fts_get_table_flags2_for_aux_tables(ulint flags2)1782 fts_get_table_flags2_for_aux_tables(
1783 	ulint	flags2)
1784 {
1785 	/* Extract the file_per_table flag, temporary file flag and
1786 	encryption flag from the main FTS table flags2 */
1787 	return((flags2 & DICT_TF2_USE_FILE_PER_TABLE) |
1788                (flags2 & DICT_TF2_ENCRYPTION) |
1789 	       (flags2 & DICT_TF2_TEMPORARY));
1790 }
1791 
1792 /** Create dict_table_t object for FTS Aux tables.
1793 @param[in]	aux_table_name	FTS Aux table name
1794 @param[in]	table		table object of FTS Index
1795 @param[in]	n_cols		number of columns for FTS Aux table
1796 @return table object for FTS Aux table */
1797 static
1798 dict_table_t*
fts_create_in_mem_aux_table(const char * aux_table_name,const dict_table_t * table,ulint n_cols)1799 fts_create_in_mem_aux_table(
1800 	const char*		aux_table_name,
1801 	const dict_table_t*	table,
1802 	ulint			n_cols)
1803 {
1804 	dict_table_t*	new_table = dict_mem_table_create(
1805 		aux_table_name, table->space, n_cols, 0, table->flags,
1806 		fts_get_table_flags2_for_aux_tables(table->flags2));
1807 
1808 	if (DICT_TF_HAS_SHARED_SPACE(table->flags)) {
1809 		ut_ad(table->space == fil_space_get_id_by_name(
1810 			table->tablespace()));
1811 		new_table->tablespace = mem_heap_strdup(
1812 			new_table->heap, table->tablespace);
1813 	}
1814 
1815 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
1816 		ut_ad(table->data_dir_path != NULL);
1817 		new_table->data_dir_path = mem_heap_strdup(
1818 			new_table->heap, table->data_dir_path);
1819 	}
1820 
1821 	return(new_table);
1822 }
1823 
1824 /** Function to create on FTS common table.
1825 @param[in,out]	trx		InnoDB transaction
1826 @param[in]	table		Table that has FTS Index
1827 @param[in]	fts_table_name	FTS AUX table name
1828 @param[in]	fts_suffix	FTS AUX table suffix
1829 @param[in]	heap		heap
1830 @return table object if created, else NULL */
1831 static
1832 dict_table_t*
fts_create_one_common_table(trx_t * trx,const dict_table_t * table,const char * fts_table_name,const char * fts_suffix,mem_heap_t * heap)1833 fts_create_one_common_table(
1834 	trx_t*			trx,
1835 	const dict_table_t*	table,
1836 	const char*		fts_table_name,
1837 	const char*		fts_suffix,
1838 	mem_heap_t*		heap)
1839 {
1840 	dict_table_t*		new_table = NULL;
1841 	dberr_t			error;
1842 	bool			is_config = strcmp(fts_suffix, "CONFIG") == 0;
1843 
1844 	if (!is_config) {
1845 
1846 		new_table = fts_create_in_mem_aux_table(
1847 			fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS);
1848 
1849 		dict_mem_table_add_col(
1850 			new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
1851 			FTS_DELETED_TABLE_COL_LEN);
1852 	} else {
1853 		/* Config table has different schema. */
1854 		new_table = fts_create_in_mem_aux_table(
1855 			fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS);
1856 
1857 		dict_mem_table_add_col(
1858 			new_table, heap, "key", DATA_VARCHAR, 0,
1859 			FTS_CONFIG_TABLE_KEY_COL_LEN);
1860 
1861 		dict_mem_table_add_col(
1862 			new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL,
1863 			FTS_CONFIG_TABLE_VALUE_COL_LEN);
1864 	}
1865 
1866 	error = row_create_table_for_mysql(new_table, NULL, trx, false);
1867 
1868 	if (error == DB_SUCCESS) {
1869 
1870 		dict_index_t*	index = dict_mem_index_create(
1871 			fts_table_name, "FTS_COMMON_TABLE_IND",
1872 			new_table->space, DICT_UNIQUE|DICT_CLUSTERED, 1);
1873 
1874 		if (!is_config) {
1875 			dict_mem_index_add_field(index, "doc_id", 0);
1876 		} else {
1877 			dict_mem_index_add_field(index, "key", 0);
1878 		}
1879 
1880 		/* We save and restore trx->dict_operation because
1881 		row_create_index_for_mysql() changes the operation to
1882 		TRX_DICT_OP_TABLE. */
1883 		trx_dict_op_t op = trx_get_dict_operation(trx);
1884 
1885 		error =	row_create_index_for_mysql(index, trx, NULL, NULL);
1886 
1887 		trx->dict_operation = op;
1888 	}
1889 
1890 	if (error != DB_SUCCESS) {
1891 		trx->error_state = error;
1892 		dict_mem_table_free(new_table);
1893 		new_table = NULL;
1894 		ib::warn() << "Failed to create FTS common table "
1895 			<< fts_table_name;
1896 	}
1897 	return(new_table);
1898 }
1899 
1900 /** Creates the common auxiliary tables needed for supporting an FTS index
1901 on the given table. row_mysql_lock_data_dictionary must have been called
1902 before this.
1903 The following tables are created.
1904 CREATE TABLE $FTS_PREFIX_DELETED
1905 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1906 CREATE TABLE $FTS_PREFIX_DELETED_CACHE
1907 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1908 CREATE TABLE $FTS_PREFIX_BEING_DELETED
1909 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1910 CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
1911 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1912 CREATE TABLE $FTS_PREFIX_CONFIG
1913 	(key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
1914 @param[in,out]	trx			transaction
1915 @param[in]	table			table with FTS index
1916 @param[in]	name			table name normalized
1917 @param[in]	skip_doc_id_index	Skip index on doc id
1918 @return DB_SUCCESS if succeed */
1919 dberr_t
fts_create_common_tables(trx_t * trx,const dict_table_t * table,const char * name,bool skip_doc_id_index)1920 fts_create_common_tables(
1921 	trx_t*			trx,
1922 	const dict_table_t*	table,
1923 	const char*		name,
1924 	bool			skip_doc_id_index)
1925 {
1926 	dberr_t		error;
1927 	que_t*		graph;
1928 	fts_table_t	fts_table;
1929 	mem_heap_t*	heap = mem_heap_create(1024);
1930 	pars_info_t*	info;
1931 	char		fts_name[MAX_FULL_NAME_LEN];
1932 	char		full_name[sizeof(fts_common_tables) / sizeof(char*)]
1933 				[MAX_FULL_NAME_LEN];
1934 
1935 	dict_index_t*					index = NULL;
1936 	trx_dict_op_t					op;
1937 	/* common_tables vector is used for dropping FTS common tables
1938 	on error condition. */
1939 	std::vector<dict_table_t*>			common_tables;
1940 	std::vector<dict_table_t*>::const_iterator	it;
1941 
1942 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1943 
1944 	error = fts_drop_common_tables(trx, &fts_table);
1945 
1946 	if (error != DB_SUCCESS) {
1947 
1948 		goto func_exit;
1949 	}
1950 
1951 	/* Create the FTS tables that are common to an FTS index. */
1952 	for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
1953 
1954 		fts_table.suffix = fts_common_tables[i];
1955 		fts_get_table_name(&fts_table, full_name[i]);
1956 		dict_table_t*	common_table = fts_create_one_common_table(
1957 			trx, table, full_name[i], fts_table.suffix, heap);
1958 
1959 		 if (common_table == NULL) {
1960 			error = DB_ERROR;
1961 			goto func_exit;
1962 		} else {
1963 			common_tables.push_back(common_table);
1964 		}
1965 
1966 		DBUG_EXECUTE_IF("ib_fts_aux_table_error",
1967 			/* Return error after creating FTS_AUX_CONFIG table. */
1968 			if (i == 4) {
1969 				error = DB_ERROR;
1970 				goto func_exit;
1971 			}
1972 		);
1973 
1974 	}
1975 
1976 	/* Write the default settings to the config table. */
1977 	info = pars_info_create();
1978 
1979 	fts_table.suffix = "CONFIG";
1980 	fts_get_table_name(&fts_table, fts_name);
1981 	pars_info_bind_id(info, true, "config_table", fts_name);
1982 
1983 	graph = fts_parse_sql_no_dict_lock(
1984 		&fts_table, info, fts_config_table_insert_values_sql);
1985 
1986 	error = fts_eval_sql(trx, graph);
1987 
1988 	que_graph_free(graph);
1989 
1990 	if (error != DB_SUCCESS || skip_doc_id_index) {
1991 
1992 		goto func_exit;
1993 	}
1994 
1995 	index = dict_mem_index_create(
1996 		name, FTS_DOC_ID_INDEX_NAME, table->space,
1997 		DICT_UNIQUE, 1);
1998 	dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
1999 
2000 	op = trx_get_dict_operation(trx);
2001 
2002 	error =	row_create_index_for_mysql(index, trx, NULL, NULL);
2003 
2004 	trx->dict_operation = op;
2005 
2006 func_exit:
2007 	if (error != DB_SUCCESS) {
2008 
2009 		for (it = common_tables.begin(); it != common_tables.end();
2010 		     ++it) {
2011 			row_drop_table_for_mysql(
2012 				(*it)->name.m_name, trx, FALSE);
2013 		}
2014 	}
2015 
2016 	common_tables.clear();
2017 	mem_heap_free(heap);
2018 
2019 	return(error);
2020 }
2021 /** Creates one FTS auxiliary index table for an FTS index.
2022 @param[in,out]	trx		transaction
2023 @param[in]	index		the index instance
2024 @param[in]	fts_table	fts_table structure
2025 @param[in]	heap		memory heap
2026 @return DB_SUCCESS or error code */
2027 static
2028 dict_table_t*
fts_create_one_index_table(trx_t * trx,const dict_index_t * index,fts_table_t * fts_table,mem_heap_t * heap)2029 fts_create_one_index_table(
2030 	trx_t*			trx,
2031 	const dict_index_t*	index,
2032 	fts_table_t*		fts_table,
2033 	mem_heap_t*		heap)
2034 {
2035 	dict_field_t*		field;
2036 	dict_table_t*		new_table = NULL;
2037 	char			table_name[MAX_FULL_NAME_LEN];
2038 	dberr_t			error;
2039 	CHARSET_INFO*		charset;
2040 
2041 	ut_ad(index->type & DICT_FTS);
2042 
2043 	fts_get_table_name(fts_table, table_name);
2044 
2045 	new_table = fts_create_in_mem_aux_table(
2046 			table_name, fts_table->table,
2047 			FTS_AUX_INDEX_TABLE_NUM_COLS);
2048 
2049 	field = dict_index_get_nth_field(index, 0);
2050 	charset = fts_get_charset(field->col->prtype);
2051 
2052 	dict_mem_table_add_col(new_table, heap, "word",
2053 			       charset == &my_charset_latin1
2054 			       ? DATA_VARCHAR : DATA_VARMYSQL,
2055 			       field->col->prtype,
2056 			       FTS_INDEX_WORD_LEN);
2057 
2058 	dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
2059 			       DATA_NOT_NULL | DATA_UNSIGNED,
2060 			       FTS_INDEX_FIRST_DOC_ID_LEN);
2061 
2062 	dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
2063 			       DATA_NOT_NULL | DATA_UNSIGNED,
2064 			       FTS_INDEX_LAST_DOC_ID_LEN);
2065 
2066 	dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2067 			       DATA_NOT_NULL | DATA_UNSIGNED,
2068 			       FTS_INDEX_DOC_COUNT_LEN);
2069 
2070 	/* The precise type calculation is as follows:
2071 	least signficiant byte: MySQL type code (not applicable for sys cols)
2072 	second least : DATA_NOT_NULL | DATA_BINARY_TYPE
2073 	third least  : the MySQL charset-collation code (DATA_MTYPE_MAX) */
2074 
2075 	dict_mem_table_add_col(
2076 		new_table, heap, "ilist", DATA_BLOB,
2077 		(DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
2078 		FTS_INDEX_ILIST_LEN);
2079 
2080 	error = row_create_table_for_mysql(new_table, NULL, trx, false);
2081 
2082 	if (error == DB_SUCCESS) {
2083 		dict_index_t*	index = dict_mem_index_create(
2084 			table_name, "FTS_INDEX_TABLE_IND", new_table->space,
2085 			DICT_UNIQUE|DICT_CLUSTERED, 2);
2086 		dict_mem_index_add_field(index, "word", 0);
2087 		dict_mem_index_add_field(index, "first_doc_id", 0);
2088 
2089 		trx_dict_op_t op = trx_get_dict_operation(trx);
2090 
2091 		error =	row_create_index_for_mysql(index, trx, NULL, NULL);
2092 
2093 		trx->dict_operation = op;
2094 	}
2095 
2096 	if (error != DB_SUCCESS) {
2097 		trx->error_state = error;
2098 		dict_mem_table_free(new_table);
2099 		new_table = NULL;
2100 		ib::warn() << "Failed to create FTS index table "
2101 			<< table_name;
2102 	}
2103 
2104 	return(new_table);
2105 }
2106 
2107 /** Create auxiliary index tables for an FTS index.
2108 @param[in,out]	trx		transaction
2109 @param[in]	index		the index instance
2110 @param[in]	table_name	table name
2111 @param[in]	table_id	the table id
2112 @return DB_SUCCESS or error code */
2113 dberr_t
fts_create_index_tables_low(trx_t * trx,const dict_index_t * index,const char * table_name,table_id_t table_id)2114 fts_create_index_tables_low(
2115 	trx_t*			trx,
2116 	const dict_index_t*	index,
2117 	const char*		table_name,
2118 	table_id_t		table_id)
2119 {
2120 	ulint		i;
2121 	fts_table_t	fts_table;
2122 	dberr_t		error = DB_SUCCESS;
2123 	mem_heap_t*	heap = mem_heap_create(1024);
2124 
2125 	fts_table.type = FTS_INDEX_TABLE;
2126 	fts_table.index_id = index->id;
2127 	fts_table.table_id = table_id;
2128 	fts_table.parent = table_name;
2129 	fts_table.table = index->table;
2130 
2131 #ifdef FTS_DOC_STATS_DEBUG
2132 	/* Create the FTS auxiliary tables that are specific
2133 	to an FTS index. */
2134 	info = pars_info_create();
2135 
2136 	fts_table.suffix = "DOC_ID";
2137 	fts_get_table_name(&fts_table, fts_name);
2138 
2139 	pars_info_bind_id(info, true, "doc_id_table", fts_name);
2140 
2141 	graph = fts_parse_sql_no_dict_lock(NULL, info,
2142 					   fts_create_index_tables_sql);
2143 
2144 	error = fts_eval_sql(trx, graph);
2145 	que_graph_free(graph);
2146 #endif /* FTS_DOC_STATS_DEBUG */
2147 
2148 	/* aux_idx_tables vector is used for dropping FTS AUX INDEX
2149 	tables on error condition. */
2150 	std::vector<dict_table_t*>			aux_idx_tables;
2151 	std::vector<dict_table_t*>::const_iterator	it;
2152 
2153 	for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2154 		dict_table_t*	new_table;
2155 
2156 		/* Create the FTS auxiliary tables that are specific
2157 		to an FTS index. We need to preserve the table_id %s
2158 		which fts_parse_sql_no_dict_lock() will fill in for us. */
2159 		fts_table.suffix = fts_get_suffix(i);
2160 
2161 		new_table = fts_create_one_index_table(
2162 			trx, index, &fts_table, heap);
2163 
2164 		if (new_table == NULL) {
2165 			error = DB_FAIL;
2166 			break;
2167 		} else {
2168 			aux_idx_tables.push_back(new_table);
2169 		}
2170 
2171 		DBUG_EXECUTE_IF("ib_fts_index_table_error",
2172 			/* Return error after creating FTS_INDEX_5
2173 			aux table. */
2174 			if (i == 4) {
2175 				error = DB_FAIL;
2176 				break;
2177 			}
2178 		);
2179 	}
2180 
2181 	if (error != DB_SUCCESS) {
2182 
2183 		for (it = aux_idx_tables.begin(); it != aux_idx_tables.end();
2184 		     ++it) {
2185 			row_drop_table_for_mysql(
2186 				(*it)->name.m_name, trx, FALSE);
2187 		}
2188 	}
2189 
2190 	aux_idx_tables.clear();
2191 	mem_heap_free(heap);
2192 
2193 	return(error);
2194 }
2195 
2196 /** Creates the column specific ancillary tables needed for supporting an
2197 FTS index on the given table. row_mysql_lock_data_dictionary must have
2198 been called before this.
2199 
2200 All FTS AUX Index tables have the following schema.
2201 CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
2202 	word		VARCHAR(FTS_MAX_WORD_LEN),
2203 	first_doc_id	INT NOT NULL,
2204 	last_doc_id	UNSIGNED NOT NULL,
2205 	doc_count	UNSIGNED INT NOT NULL,
2206 	ilist		VARBINARY NOT NULL,
2207 	UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
2208 @param[in,out]	trx	transaction
2209 @param[in]	index	index instance
2210 @return DB_SUCCESS or error code */
2211 dberr_t
fts_create_index_tables(trx_t * trx,const dict_index_t * index)2212 fts_create_index_tables(
2213 	trx_t*			trx,
2214 	const dict_index_t*	index)
2215 {
2216 	dberr_t		err;
2217 	dict_table_t*	table;
2218 
2219 	table = dict_table_get_low(index->table_name);
2220 	ut_a(table != NULL);
2221 
2222 	err = fts_create_index_tables_low(
2223 		trx, index, table->name.m_name, table->id);
2224 
2225 	if (err == DB_SUCCESS) {
2226 		trx_commit(trx);
2227 	}
2228 
2229 	return(err);
2230 }
2231 #if 0
2232 /******************************************************************//**
2233 Return string representation of state. */
2234 static
2235 const char*
2236 fts_get_state_str(
2237 /*==============*/
2238 				/* out: string representation of state */
2239 	fts_row_state	state)	/*!< in: state */
2240 {
2241 	switch (state) {
2242 	case FTS_INSERT:
2243 		return("INSERT");
2244 
2245 	case FTS_MODIFY:
2246 		return("MODIFY");
2247 
2248 	case FTS_DELETE:
2249 		return("DELETE");
2250 
2251 	case FTS_NOTHING:
2252 		return("NOTHING");
2253 
2254 	case FTS_INVALID:
2255 		return("INVALID");
2256 
2257 	default:
2258 		return("UNKNOWN");
2259 	}
2260 }
2261 #endif
2262 
2263 /******************************************************************//**
2264 Calculate the new state of a row given the existing state and a new event.
2265 @return new state of row */
2266 static
2267 fts_row_state
fts_trx_row_get_new_state(fts_row_state old_state,fts_row_state event)2268 fts_trx_row_get_new_state(
2269 /*======================*/
2270 	fts_row_state	old_state,		/*!< in: existing state of row */
2271 	fts_row_state	event)			/*!< in: new event */
2272 {
2273 	/* The rules for transforming states:
2274 
2275 	I = inserted
2276 	M = modified
2277 	D = deleted
2278 	N = nothing
2279 
2280 	M+D -> D:
2281 
2282 	If the row existed before the transaction started and it is modified
2283 	during the transaction, followed by a deletion of the row, only the
2284 	deletion will be signaled.
2285 
2286 	M+ -> M:
2287 
2288 	If the row existed before the transaction started and it is modified
2289 	more than once during the transaction, only the last modification
2290 	will be signaled.
2291 
2292 	IM*D -> N:
2293 
2294 	If a new row is added during the transaction (and possibly modified
2295 	after its initial insertion) but it is deleted before the end of the
2296 	transaction, nothing will be signaled.
2297 
2298 	IM* -> I:
2299 
2300 	If a new row is added during the transaction and modified after its
2301 	initial insertion, only the addition will be signaled.
2302 
2303 	M*DI -> M:
2304 
2305 	If the row existed before the transaction started and it is deleted,
2306 	then re-inserted, only a modification will be signaled. Note that
2307 	this case is only possible if the table is using the row's primary
2308 	key for FTS row ids, since those can be re-inserted by the user,
2309 	which is not true for InnoDB generated row ids.
2310 
2311 	It is easily seen that the above rules decompose such that we do not
2312 	need to store the row's entire history of events. Instead, we can
2313 	store just one state for the row and update that when new events
2314 	arrive. Then we can implement the above rules as a two-dimensional
2315 	look-up table, and get checking of invalid combinations "for free"
2316 	in the process. */
2317 
2318 	/* The lookup table for transforming states. old_state is the
2319 	Y-axis, event is the X-axis. */
2320 	static const fts_row_state table[4][4] = {
2321 			/*    I            M            D            N */
2322 		/* I */	{ FTS_INVALID, FTS_INSERT,  FTS_NOTHING, FTS_INVALID },
2323 		/* M */	{ FTS_INVALID, FTS_MODIFY,  FTS_DELETE,  FTS_INVALID },
2324 		/* D */	{ FTS_MODIFY,  FTS_INVALID, FTS_INVALID, FTS_INVALID },
2325 		/* N */	{ FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
2326 	};
2327 
2328 	fts_row_state result;
2329 
2330 	ut_a(old_state < FTS_INVALID);
2331 	ut_a(event < FTS_INVALID);
2332 
2333 	result = table[(int) old_state][(int) event];
2334 	ut_a(result != FTS_INVALID);
2335 
2336 	return(result);
2337 }
2338 
2339 /******************************************************************//**
2340 Create a savepoint instance.
2341 @return savepoint instance */
2342 static
2343 fts_savepoint_t*
fts_savepoint_create(ib_vector_t * savepoints,const char * name,mem_heap_t * heap)2344 fts_savepoint_create(
2345 /*=================*/
2346 	ib_vector_t*	savepoints,		/*!< out: InnoDB transaction */
2347 	const char*	name,			/*!< in: savepoint name */
2348 	mem_heap_t*	heap)			/*!< in: heap */
2349 {
2350 	fts_savepoint_t*	savepoint;
2351 
2352 	savepoint = static_cast<fts_savepoint_t*>(
2353 		ib_vector_push(savepoints, NULL));
2354 
2355 	memset(savepoint, 0x0, sizeof(*savepoint));
2356 
2357 	if (name) {
2358 		savepoint->name = mem_heap_strdup(heap, name);
2359 	}
2360 
2361 	savepoint->tables = rbt_create(
2362 		sizeof(fts_trx_table_t*), fts_trx_table_cmp);
2363 
2364 	return(savepoint);
2365 }
2366 
2367 /******************************************************************//**
2368 Create an FTS trx.
2369 @return FTS trx */
2370 static
2371 fts_trx_t*
fts_trx_create(trx_t * trx)2372 fts_trx_create(
2373 /*===========*/
2374 	trx_t*	trx)				/*!< in/out: InnoDB
2375 						transaction */
2376 {
2377 	fts_trx_t*		ftt;
2378 	ib_alloc_t*		heap_alloc;
2379 	mem_heap_t*		heap = mem_heap_create(1024);
2380 	trx_named_savept_t*	savep;
2381 
2382 	ut_a(trx->fts_trx == NULL);
2383 
2384 	ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2385 	ftt->trx = trx;
2386 	ftt->heap = heap;
2387 
2388 	heap_alloc = ib_heap_allocator_create(heap);
2389 
2390 	ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
2391 		heap_alloc, sizeof(fts_savepoint_t), 4));
2392 
2393 	ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
2394 		heap_alloc, sizeof(fts_savepoint_t), 4));
2395 
2396 	/* Default instance has no name and no heap. */
2397 	fts_savepoint_create(ftt->savepoints, NULL, NULL);
2398 	fts_savepoint_create(ftt->last_stmt, NULL, NULL);
2399 
2400 	/* Copy savepoints that already set before. */
2401 	for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
2402 	     savep != NULL;
2403 	     savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2404 
2405 		fts_savepoint_take(trx, ftt, savep->name);
2406 	}
2407 
2408 	return(ftt);
2409 }
2410 
2411 /******************************************************************//**
2412 Create an FTS trx table.
2413 @return FTS trx table */
2414 static
2415 fts_trx_table_t*
fts_trx_table_create(fts_trx_t * fts_trx,dict_table_t * table)2416 fts_trx_table_create(
2417 /*=================*/
2418 	fts_trx_t*	fts_trx,		/*!< in: FTS trx */
2419 	dict_table_t*	table)			/*!< in: table */
2420 {
2421 	fts_trx_table_t*	ftt;
2422 
2423 	ftt = static_cast<fts_trx_table_t*>(
2424 		mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2425 
2426 	memset(ftt, 0x0, sizeof(*ftt));
2427 
2428 	ftt->table = table;
2429 	ftt->fts_trx = fts_trx;
2430 
2431 	ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2432 
2433 	return(ftt);
2434 }
2435 
2436 /******************************************************************//**
2437 Clone an FTS trx table.
2438 @return FTS trx table */
2439 static
2440 fts_trx_table_t*
fts_trx_table_clone(const fts_trx_table_t * ftt_src)2441 fts_trx_table_clone(
2442 /*=================*/
2443 	const fts_trx_table_t*	ftt_src)	/*!< in: FTS trx */
2444 {
2445 	fts_trx_table_t*	ftt;
2446 
2447 	ftt = static_cast<fts_trx_table_t*>(
2448 		mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2449 
2450 	memset(ftt, 0x0, sizeof(*ftt));
2451 
2452 	ftt->table = ftt_src->table;
2453 	ftt->fts_trx = ftt_src->fts_trx;
2454 
2455 	ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2456 
2457 	/* Copy the rb tree values to the new savepoint. */
2458 	rbt_merge_uniq(ftt->rows, ftt_src->rows);
2459 
2460 	/* These are only added on commit. At this stage we only have
2461 	the updated row state. */
2462 	ut_a(ftt_src->added_doc_ids == NULL);
2463 
2464 	return(ftt);
2465 }
2466 
2467 /******************************************************************//**
2468 Initialize the FTS trx instance.
2469 @return FTS trx instance */
2470 static
2471 fts_trx_table_t*
fts_trx_init(trx_t * trx,dict_table_t * table,ib_vector_t * savepoints)2472 fts_trx_init(
2473 /*=========*/
2474 	trx_t*			trx,		/*!< in: transaction */
2475 	dict_table_t*		table,		/*!< in: FTS table instance */
2476 	ib_vector_t*		savepoints)	/*!< in: Savepoints */
2477 {
2478 	fts_trx_table_t*	ftt;
2479 	ib_rbt_bound_t		parent;
2480 	ib_rbt_t*		tables;
2481 	fts_savepoint_t*	savepoint;
2482 
2483 	savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
2484 
2485 	tables = savepoint->tables;
2486 	rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
2487 
2488 	if (parent.result == 0) {
2489 		fts_trx_table_t**	fttp;
2490 
2491 		fttp = rbt_value(fts_trx_table_t*, parent.last);
2492 		ftt = *fttp;
2493 	} else {
2494 		ftt = fts_trx_table_create(trx->fts_trx, table);
2495 		rbt_add_node(tables, &parent, &ftt);
2496 	}
2497 
2498 	ut_a(ftt->table == table);
2499 
2500 	return(ftt);
2501 }
2502 
2503 /******************************************************************//**
2504 Notify the FTS system about an operation on an FTS-indexed table. */
2505 static
2506 void
fts_trx_table_add_op(fts_trx_table_t * ftt,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2507 fts_trx_table_add_op(
2508 /*=================*/
2509 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
2510 	doc_id_t	doc_id,			/*!< in: doc id */
2511 	fts_row_state	state,			/*!< in: state of the row */
2512 	ib_vector_t*	fts_indexes)		/*!< in: FTS indexes affected */
2513 {
2514 	ib_rbt_t*	rows;
2515 	ib_rbt_bound_t	parent;
2516 
2517 	rows = ftt->rows;
2518 	rbt_search(rows, &parent, &doc_id);
2519 
2520 	/* Row id found, update state, and if new state is FTS_NOTHING,
2521 	we delete the row from our tree. */
2522 	if (parent.result == 0) {
2523 		fts_trx_row_t*	row = rbt_value(fts_trx_row_t, parent.last);
2524 
2525 		row->state = fts_trx_row_get_new_state(row->state, state);
2526 
2527 		if (row->state == FTS_NOTHING) {
2528 			if (row->fts_indexes) {
2529 				ib_vector_free(row->fts_indexes);
2530 			}
2531 
2532 			ut_free(rbt_remove_node(rows, parent.last));
2533 			row = NULL;
2534 		} else if (row->fts_indexes != NULL) {
2535 			ib_vector_free(row->fts_indexes);
2536 			row->fts_indexes = fts_indexes;
2537 		}
2538 
2539 	} else { /* Row-id not found, create a new one. */
2540 		fts_trx_row_t	row;
2541 
2542 		row.doc_id = doc_id;
2543 		row.state = state;
2544 		row.fts_indexes = fts_indexes;
2545 
2546 		rbt_add_node(rows, &parent, &row);
2547 	}
2548 }
2549 
2550 /******************************************************************//**
2551 Notify the FTS system about an operation on an FTS-indexed table. */
2552 void
fts_trx_add_op(trx_t * trx,dict_table_t * table,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2553 fts_trx_add_op(
2554 /*===========*/
2555 	trx_t*		trx,			/*!< in: InnoDB transaction */
2556 	dict_table_t*	table,			/*!< in: table */
2557 	doc_id_t	doc_id,			/*!< in: new doc id */
2558 	fts_row_state	state,			/*!< in: state of the row */
2559 	ib_vector_t*	fts_indexes)		/*!< in: FTS indexes affected
2560 						(NULL=all) */
2561 {
2562 	fts_trx_table_t*	tran_ftt;
2563 	fts_trx_table_t*	stmt_ftt;
2564 
2565 	if (!trx->fts_trx) {
2566 		trx->fts_trx = fts_trx_create(trx);
2567 	}
2568 
2569 	tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2570 	stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2571 
2572 	fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2573 	fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2574 }
2575 
2576 /******************************************************************//**
2577 Fetch callback that converts a textual document id to a binary value and
2578 stores it in the given place.
2579 @return always returns NULL */
2580 static
2581 ibool
fts_fetch_store_doc_id(void * row,void * user_arg)2582 fts_fetch_store_doc_id(
2583 /*===================*/
2584 	void*		row,			/*!< in: sel_node_t* */
2585 	void*		user_arg)		/*!< in: doc_id_t* to store
2586 						doc_id in */
2587 {
2588 	int		n_parsed;
2589 	sel_node_t*	node = static_cast<sel_node_t*>(row);
2590 	doc_id_t*	doc_id = static_cast<doc_id_t*>(user_arg);
2591 	dfield_t*	dfield = que_node_get_val(node->select_list);
2592 	dtype_t*	type = dfield_get_type(dfield);
2593 	ulint		len = dfield_get_len(dfield);
2594 
2595 	char		buf[32];
2596 
2597 	ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2598 	ut_a(len > 0 && len < sizeof(buf));
2599 
2600 	memcpy(buf, dfield_get_data(dfield), len);
2601 	buf[len] = '\0';
2602 
2603 	n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2604 	ut_a(n_parsed == 1);
2605 
2606 	return(FALSE);
2607 }
2608 
2609 #ifdef FTS_CACHE_SIZE_DEBUG
2610 /******************************************************************//**
2611 Get the max cache size in bytes. If there is an error reading the
2612 value we simply print an error message here and return the default
2613 value to the caller.
2614 @return max cache size in bytes */
2615 static
2616 ulint
fts_get_max_cache_size(trx_t * trx,fts_table_t * fts_table)2617 fts_get_max_cache_size(
2618 /*===================*/
2619 	trx_t*		trx,			/*!< in: transaction */
2620 	fts_table_t*	fts_table)		/*!< in: table instance */
2621 {
2622 	dberr_t		error;
2623 	fts_string_t	value;
2624 	ulint		cache_size_in_mb;
2625 
2626 	/* Set to the default value. */
2627 	cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2628 
2629 	/* We set the length of value to the max bytes it can hold. This
2630 	information is used by the callback that reads the value. */
2631 	value.f_n_char = 0;
2632 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2633 	value.f_str = ut_malloc_nokey(value.f_len + 1);
2634 
2635 	error = fts_config_get_value(
2636 		trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2637 
2638 	if (error == DB_SUCCESS) {
2639 
2640 		value.f_str[value.f_len] = 0;
2641 		cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
2642 
2643 		if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2644 
2645 			ib::warn() << "FTS max cache size ("
2646 				<< cache_size_in_mb << ") out of range."
2647 				" Minimum value is "
2648 				<< FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2649 				<< "MB and the maximum value is "
2650 				<< FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2651 				<< "MB, setting cache size to upper limit";
2652 
2653 			cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2654 
2655 		} else if  (cache_size_in_mb
2656 			    < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2657 
2658 			ib::warn() << "FTS max cache size ("
2659 				<< cache_size_in_mb << ") out of range."
2660 				" Minimum value is "
2661 				<< FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2662 				<< "MB and the maximum value is"
2663 				<< FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2664 				<< "MB, setting cache size to lower limit";
2665 
2666 			cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2667 		}
2668 	} else {
2669 		ib::error() << "(" << ut_strerr(error) << ") reading max"
2670 			" cache config value from config table";
2671 	}
2672 
2673 	ut_free(value.f_str);
2674 
2675 	return(cache_size_in_mb * 1024 * 1024);
2676 }
2677 #endif
2678 
2679 #ifdef FTS_DOC_STATS_DEBUG
2680 /*********************************************************************//**
2681 Get the total number of words in the FTS for a particular FTS index.
2682 @return DB_SUCCESS if all OK else error code */
2683 dberr_t
fts_get_total_word_count(trx_t * trx,dict_index_t * index,ulint * total)2684 fts_get_total_word_count(
2685 /*=====================*/
2686 	trx_t*		trx,			/*!< in: transaction */
2687 	dict_index_t*	index,			/*!< in: for this index */
2688 	ulint*		total)			/* out: total words */
2689 {
2690 	dberr_t		error;
2691 	fts_string_t	value;
2692 
2693 	*total = 0;
2694 
2695 	/* We set the length of value to the max bytes it can hold. This
2696 	information is used by the callback that reads the value. */
2697 	value.f_n_char = 0;
2698 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2699 	value.f_str = static_cast<byte*>(ut_malloc_nokey(value.f_len + 1));
2700 
2701 	error = fts_config_get_index_value(
2702 		trx, index, FTS_TOTAL_WORD_COUNT, &value);
2703 
2704 	if (error == DB_SUCCESS) {
2705 
2706 		value.f_str[value.f_len] = 0;
2707 		*total = strtoul((char*) value.f_str, NULL, 10);
2708 	} else {
2709 		ib::error() << "(" << ut_strerr(error) << ") reading total"
2710 			" words value from config table";
2711 	}
2712 
2713 	ut_free(value.f_str);
2714 
2715 	return(error);
2716 }
2717 #endif /* FTS_DOC_STATS_DEBUG */
2718 
2719 /*********************************************************************//**
2720 Update the next and last Doc ID in the CONFIG table to be the input
2721 "doc_id" value (+ 1). We would do so after each FTS index build or
2722 table truncate */
2723 void
fts_update_next_doc_id(trx_t * trx,const dict_table_t * table,const char * table_name,doc_id_t doc_id)2724 fts_update_next_doc_id(
2725 /*===================*/
2726 	trx_t*			trx,		/*!< in/out: transaction */
2727 	const dict_table_t*	table,		/*!< in: table */
2728 	const char*		table_name,	/*!< in: table name, or NULL */
2729 	doc_id_t		doc_id)		/*!< in: DOC ID to set */
2730 {
2731 	table->fts->cache->synced_doc_id = doc_id;
2732 	table->fts->cache->next_doc_id = doc_id + 1;
2733 
2734 	table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2735 
2736 	fts_update_sync_doc_id(
2737 		table, table_name, table->fts->cache->synced_doc_id, trx);
2738 
2739 }
2740 
2741 /*********************************************************************//**
2742 Get the next available document id.
2743 @return DB_SUCCESS if OK */
2744 dberr_t
fts_get_next_doc_id(const dict_table_t * table,doc_id_t * doc_id)2745 fts_get_next_doc_id(
2746 /*================*/
2747 	const dict_table_t*	table,		/*!< in: table */
2748 	doc_id_t*		doc_id)		/*!< out: new document id */
2749 {
2750 	fts_cache_t*	cache = table->fts->cache;
2751 
2752 	/* If the Doc ID system has not yet been initialized, we
2753 	will consult the CONFIG table and user table to re-establish
2754 	the initial value of the Doc ID */
2755 	if (cache->first_doc_id == FTS_NULL_DOC_ID) {
2756 		fts_init_doc_id(table);
2757 	}
2758 
2759 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2760 		*doc_id = FTS_NULL_DOC_ID;
2761 		return(DB_SUCCESS);
2762 	}
2763 
2764 	mutex_enter(&cache->doc_id_lock);
2765 	*doc_id = ++cache->next_doc_id;
2766 	mutex_exit(&cache->doc_id_lock);
2767 
2768 	return(DB_SUCCESS);
2769 }
2770 
2771 /*********************************************************************//**
2772 This function fetch the Doc ID from CONFIG table, and compare with
2773 the Doc ID supplied. And store the larger one to the CONFIG table.
2774 @return DB_SUCCESS if OK */
2775 static MY_ATTRIBUTE((nonnull))
2776 dberr_t
fts_cmp_set_sync_doc_id(const dict_table_t * table,doc_id_t doc_id_cmp,ibool read_only,doc_id_t * doc_id)2777 fts_cmp_set_sync_doc_id(
2778 /*====================*/
2779 	const dict_table_t*	table,		/*!< in: table */
2780 	doc_id_t		doc_id_cmp,	/*!< in: Doc ID to compare */
2781 	ibool			read_only,	/*!< in: TRUE if read the
2782 						synced_doc_id only */
2783 	doc_id_t*		doc_id)		/*!< out: larger document id
2784 						after comparing "doc_id_cmp"
2785 						to the one stored in CONFIG
2786 						table */
2787 {
2788 	trx_t*		trx;
2789 	pars_info_t*	info;
2790 	dberr_t		error;
2791 	fts_table_t	fts_table;
2792 	que_t*		graph = NULL;
2793 	fts_cache_t*	cache = table->fts->cache;
2794 	char		table_name[MAX_FULL_NAME_LEN];
2795 retry:
2796 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2797 
2798 	fts_table.suffix = "CONFIG";
2799 	fts_table.table_id = table->id;
2800 	fts_table.type = FTS_COMMON_TABLE;
2801 	fts_table.table = table;
2802 
2803 	fts_table.parent = table->name.m_name;
2804 
2805 	trx = trx_allocate_for_background();
2806 
2807 	trx->op_info = "update the next FTS document id";
2808 
2809 	info = pars_info_create();
2810 
2811 	pars_info_bind_function(
2812 		info, "my_func", fts_fetch_store_doc_id, doc_id);
2813 
2814 	fts_get_table_name(&fts_table, table_name);
2815 	pars_info_bind_id(info, true, "config_table", table_name);
2816 
2817 	graph = fts_parse_sql(
2818 		&fts_table, info,
2819 		"DECLARE FUNCTION my_func;\n"
2820 		"DECLARE CURSOR c IS SELECT value FROM $config_table"
2821 		" WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2822 		"BEGIN\n"
2823 		""
2824 		"OPEN c;\n"
2825 		"WHILE 1 = 1 LOOP\n"
2826 		"  FETCH c INTO my_func();\n"
2827 		"  IF c % NOTFOUND THEN\n"
2828 		"    EXIT;\n"
2829 		"  END IF;\n"
2830 		"END LOOP;\n"
2831 		"CLOSE c;");
2832 
2833 	*doc_id = 0;
2834 
2835 	error = fts_eval_sql(trx, graph);
2836 
2837 	fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2838 
2839 	// FIXME: We need to retry deadlock errors
2840 	if (error != DB_SUCCESS) {
2841 		goto func_exit;
2842 	}
2843 
2844 	if (read_only) {
2845 		goto func_exit;
2846 	}
2847 
2848 	if (doc_id_cmp == 0 && *doc_id) {
2849 		cache->synced_doc_id = *doc_id - 1;
2850 	} else {
2851 		cache->synced_doc_id = ut_max(doc_id_cmp, *doc_id);
2852 	}
2853 
2854 	mutex_enter(&cache->doc_id_lock);
2855 	/* For each sync operation, we will add next_doc_id by 1,
2856 	so to mark a sync operation */
2857 	if (cache->next_doc_id < cache->synced_doc_id + 1) {
2858 		cache->next_doc_id = cache->synced_doc_id + 1;
2859 	}
2860 	mutex_exit(&cache->doc_id_lock);
2861 
2862 	if (doc_id_cmp > *doc_id) {
2863 		error = fts_update_sync_doc_id(
2864 			table, table->name.m_name, cache->synced_doc_id, trx);
2865 	}
2866 
2867 	*doc_id = cache->next_doc_id;
2868 
2869 func_exit:
2870 
2871 	if (error == DB_SUCCESS) {
2872 		fts_sql_commit(trx);
2873 	} else {
2874 		*doc_id = 0;
2875 
2876 		ib::error() << "(" << ut_strerr(error) << ") while getting"
2877 			" next doc id.";
2878 		fts_sql_rollback(trx);
2879 
2880 		if (error == DB_DEADLOCK) {
2881 			os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2882 			goto retry;
2883 		}
2884 	}
2885 
2886 	trx_free_for_background(trx);
2887 
2888 	return(error);
2889 }
2890 
2891 /*********************************************************************//**
2892 Update the last document id. This function could create a new
2893 transaction to update the last document id.
2894 @return DB_SUCCESS if OK */
2895 static
2896 dberr_t
fts_update_sync_doc_id(const dict_table_t * table,const char * table_name,doc_id_t doc_id,trx_t * trx)2897 fts_update_sync_doc_id(
2898 /*===================*/
2899 	const dict_table_t*	table,		/*!< in: table */
2900 	const char*		table_name,	/*!< in: table name, or NULL */
2901 	doc_id_t		doc_id,		/*!< in: last document id */
2902 	trx_t*			trx)		/*!< in: update trx, or NULL */
2903 {
2904 	byte		id[FTS_MAX_ID_LEN];
2905 	pars_info_t*	info;
2906 	fts_table_t	fts_table;
2907 	ulint		id_len;
2908 	que_t*		graph = NULL;
2909 	dberr_t		error;
2910 	ibool		local_trx = FALSE;
2911 	fts_cache_t*	cache = table->fts->cache;
2912 	char		fts_name[MAX_FULL_NAME_LEN];
2913 
2914 	fts_table.suffix = "CONFIG";
2915 	fts_table.table_id = table->id;
2916 	fts_table.type = FTS_COMMON_TABLE;
2917 	fts_table.table = table;
2918 	if (table_name) {
2919 		fts_table.parent = table_name;
2920 	} else {
2921 		fts_table.parent = table->name.m_name;
2922 	}
2923 
2924 	if (!trx) {
2925 		trx = trx_allocate_for_background();
2926 
2927 		trx->op_info = "setting last FTS document id";
2928 		local_trx = TRUE;
2929 	}
2930 
2931 	info = pars_info_create();
2932 
2933 	id_len = ut_snprintf(
2934 		(char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2935 
2936 	pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2937 
2938 	fts_get_table_name(&fts_table, fts_name);
2939 	pars_info_bind_id(info, true, "table_name", fts_name);
2940 
2941 	graph = fts_parse_sql(
2942 		&fts_table, info,
2943 		"BEGIN"
2944 		" UPDATE $table_name SET value = :doc_id"
2945 		" WHERE key = 'synced_doc_id';");
2946 
2947 	error = fts_eval_sql(trx, graph);
2948 
2949 	fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2950 
2951 	if (local_trx) {
2952 		if (error == DB_SUCCESS) {
2953 			fts_sql_commit(trx);
2954 			cache->synced_doc_id = doc_id;
2955 		} else {
2956 
2957 			ib::error() << "(" << ut_strerr(error) << ") while"
2958 				" updating last doc id.";
2959 
2960 			fts_sql_rollback(trx);
2961 		}
2962 		trx_free_for_background(trx);
2963 	}
2964 
2965 	return(error);
2966 }
2967 
2968 /*********************************************************************//**
2969 Create a new fts_doc_ids_t.
2970 @return new fts_doc_ids_t */
2971 fts_doc_ids_t*
fts_doc_ids_create(void)2972 fts_doc_ids_create(void)
2973 /*====================*/
2974 {
2975 	fts_doc_ids_t*	fts_doc_ids;
2976 	mem_heap_t*	heap = mem_heap_create(512);
2977 
2978 	fts_doc_ids = static_cast<fts_doc_ids_t*>(
2979 		mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2980 
2981 	fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2982 
2983 	fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
2984 		fts_doc_ids->self_heap, sizeof(fts_update_t), 32));
2985 
2986 	return(fts_doc_ids);
2987 }
2988 
2989 /*********************************************************************//**
2990 Free a fts_doc_ids_t. */
2991 void
fts_doc_ids_free(fts_doc_ids_t * fts_doc_ids)2992 fts_doc_ids_free(
2993 /*=============*/
2994 	fts_doc_ids_t*	fts_doc_ids)
2995 {
2996 	mem_heap_t*	heap = static_cast<mem_heap_t*>(
2997 		fts_doc_ids->self_heap->arg);
2998 
2999 	memset(fts_doc_ids, 0, sizeof(*fts_doc_ids));
3000 
3001 	mem_heap_free(heap);
3002 }
3003 
3004 /*********************************************************************//**
3005 Do commit-phase steps necessary for the insertion of a new row.
3006 @return DB_SUCCESS or error code */
3007 void
fts_add(fts_trx_table_t * ftt,fts_trx_row_t * row)3008 fts_add(
3009 /*====*/
3010 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
3011 	fts_trx_row_t*	row)			/*!< in: row */
3012 {
3013 	dict_table_t*	table = ftt->table;
3014 	doc_id_t	doc_id = row->doc_id;
3015 
3016 	ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
3017 
3018 	fts_add_doc_by_id(ftt, doc_id, row->fts_indexes);
3019 
3020 	mutex_enter(&table->fts->cache->deleted_lock);
3021 	++table->fts->cache->added;
3022 	mutex_exit(&table->fts->cache->deleted_lock);
3023 
3024 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
3025 	    && doc_id >= table->fts->cache->next_doc_id) {
3026 		table->fts->cache->next_doc_id = doc_id + 1;
3027 	}
3028 }
3029 
3030 /*********************************************************************//**
3031 Do commit-phase steps necessary for the deletion of a row.
3032 @return DB_SUCCESS or error code */
3033 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3034 dberr_t
fts_delete(fts_trx_table_t * ftt,fts_trx_row_t * row)3035 fts_delete(
3036 /*=======*/
3037 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
3038 	fts_trx_row_t*	row)			/*!< in: row */
3039 {
3040 	que_t*		graph;
3041 	fts_table_t	fts_table;
3042 	dberr_t		error = DB_SUCCESS;
3043 	doc_id_t	write_doc_id;
3044 	dict_table_t*	table = ftt->table;
3045 	doc_id_t	doc_id = row->doc_id;
3046 	trx_t*		trx = ftt->fts_trx->trx;
3047 	pars_info_t*	info = pars_info_create();
3048 	fts_cache_t*	cache = table->fts->cache;
3049 
3050 	/* we do not index Documents whose Doc ID value is 0 */
3051 	if (doc_id == FTS_NULL_DOC_ID) {
3052 		ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
3053 		return(error);
3054 	}
3055 
3056 	ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3057 
3058 	FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
3059 
3060 	/* Convert to "storage" byte order. */
3061 	fts_write_doc_id((byte*) &write_doc_id, doc_id);
3062 	fts_bind_doc_id(info, "doc_id", &write_doc_id);
3063 
3064 	/* It is possible we update a record that has not yet been sync-ed
3065 	into cache from last crash (delete Doc will not initialize the
3066 	sync). Avoid any added counter accounting until the FTS cache
3067 	is re-established and sync-ed */
3068 	if (table->fts->fts_status & ADDED_TABLE_SYNCED
3069 	    && doc_id > cache->synced_doc_id) {
3070 		mutex_enter(&table->fts->cache->deleted_lock);
3071 
3072 		/* The Doc ID could belong to those left in
3073 		ADDED table from last crash. So need to check
3074 		if it is less than first_doc_id when we initialize
3075 		the Doc ID system after reboot */
3076 		if (doc_id >= table->fts->cache->first_doc_id
3077 		    && table->fts->cache->added > 0) {
3078 			--table->fts->cache->added;
3079 		}
3080 
3081 		mutex_exit(&table->fts->cache->deleted_lock);
3082 
3083 		/* Only if the row was really deleted. */
3084 		ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
3085 	}
3086 
3087 	/* Note the deleted document for OPTIMIZE to purge. */
3088 	if (error == DB_SUCCESS) {
3089 		char	table_name[MAX_FULL_NAME_LEN];
3090 
3091 		trx->op_info = "adding doc id to FTS DELETED";
3092 
3093 		info->graph_owns_us = TRUE;
3094 
3095 		fts_table.suffix = "DELETED";
3096 
3097 		fts_get_table_name(&fts_table, table_name);
3098 		pars_info_bind_id(info, true, "deleted", table_name);
3099 
3100 		graph = fts_parse_sql(
3101 			&fts_table,
3102 			info,
3103 			"BEGIN INSERT INTO $deleted VALUES (:doc_id);");
3104 
3105 		error = fts_eval_sql(trx, graph);
3106 
3107 		fts_que_graph_free(graph);
3108 	} else {
3109 		pars_info_free(info);
3110 	}
3111 
3112 	/* Increment the total deleted count, this is used to calculate the
3113 	number of documents indexed. */
3114 	if (error == DB_SUCCESS) {
3115 		mutex_enter(&table->fts->cache->deleted_lock);
3116 
3117 		++table->fts->cache->deleted;
3118 
3119 		mutex_exit(&table->fts->cache->deleted_lock);
3120 	}
3121 
3122 	return(error);
3123 }
3124 
3125 /*********************************************************************//**
3126 Do commit-phase steps necessary for the modification of a row.
3127 @return DB_SUCCESS or error code */
3128 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3129 dberr_t
fts_modify(fts_trx_table_t * ftt,fts_trx_row_t * row)3130 fts_modify(
3131 /*=======*/
3132 	fts_trx_table_t*	ftt,		/*!< in: FTS trx table */
3133 	fts_trx_row_t*		row)		/*!< in: row */
3134 {
3135 	dberr_t	error;
3136 
3137 	ut_a(row->state == FTS_MODIFY);
3138 
3139 	error = fts_delete(ftt, row);
3140 
3141 	if (error == DB_SUCCESS) {
3142 		fts_add(ftt, row);
3143 	}
3144 
3145 	return(error);
3146 }
3147 
3148 /*********************************************************************//**
3149 Create a new document id.
3150 @return DB_SUCCESS if all went well else error */
3151 dberr_t
fts_create_doc_id(dict_table_t * table,dtuple_t * row,mem_heap_t * heap)3152 fts_create_doc_id(
3153 /*==============*/
3154 	dict_table_t*	table,		/*!< in: row is of this table. */
3155 	dtuple_t*	row,		/* in/out: add doc id value to this
3156 					row. This is the current row that is
3157 					being inserted. */
3158 	mem_heap_t*	heap)		/*!< in: heap */
3159 {
3160 	doc_id_t	doc_id;
3161 	dberr_t		error = DB_SUCCESS;
3162 
3163 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
3164 
3165 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
3166 		if (table->fts->cache->first_doc_id == FTS_NULL_DOC_ID) {
3167 			error = fts_get_next_doc_id(table, &doc_id);
3168 		}
3169 		return(error);
3170 	}
3171 
3172 	error = fts_get_next_doc_id(table, &doc_id);
3173 
3174 	if (error == DB_SUCCESS) {
3175 		dfield_t*	dfield;
3176 		doc_id_t*	write_doc_id;
3177 
3178 		ut_a(doc_id > 0);
3179 
3180 		dfield = dtuple_get_nth_field(row, table->fts->doc_col);
3181 		write_doc_id = static_cast<doc_id_t*>(
3182 			mem_heap_alloc(heap, sizeof(*write_doc_id)));
3183 
3184 		ut_a(doc_id != FTS_NULL_DOC_ID);
3185 		ut_a(sizeof(doc_id) == dfield->type.len);
3186 		fts_write_doc_id((byte*) write_doc_id, doc_id);
3187 
3188 		dfield_set_data(dfield, write_doc_id, sizeof(*write_doc_id));
3189 	}
3190 
3191 	return(error);
3192 }
3193 
3194 /*********************************************************************//**
3195 The given transaction is about to be committed; do whatever is necessary
3196 from the FTS system's POV.
3197 @return DB_SUCCESS or error code */
3198 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3199 dberr_t
fts_commit_table(fts_trx_table_t * ftt)3200 fts_commit_table(
3201 /*=============*/
3202 	fts_trx_table_t*	ftt)		/*!< in: FTS table to commit*/
3203 {
3204 	const ib_rbt_node_t*	node;
3205 	ib_rbt_t*		rows;
3206 	dberr_t			error = DB_SUCCESS;
3207 	fts_cache_t*		cache = ftt->table->fts->cache;
3208 	trx_t*			trx = trx_allocate_for_background();
3209 
3210 	rows = ftt->rows;
3211 
3212 	ftt->fts_trx->trx = trx;
3213 
3214 	if (cache->get_docs == NULL) {
3215 		rw_lock_x_lock(&cache->init_lock);
3216 		if (cache->get_docs == NULL) {
3217 			cache->get_docs = fts_get_docs_create(cache);
3218 		}
3219 		rw_lock_x_unlock(&cache->init_lock);
3220 	}
3221 
3222 	for (node = rbt_first(rows);
3223 	     node != NULL && error == DB_SUCCESS;
3224 	     node = rbt_next(rows, node)) {
3225 
3226 		fts_trx_row_t*	row = rbt_value(fts_trx_row_t, node);
3227 
3228 		switch (row->state) {
3229 		case FTS_INSERT:
3230 			fts_add(ftt, row);
3231 			break;
3232 
3233 		case FTS_MODIFY:
3234 			error = fts_modify(ftt, row);
3235 			break;
3236 
3237 		case FTS_DELETE:
3238 			error = fts_delete(ftt, row);
3239 			break;
3240 
3241 		default:
3242 			ut_error;
3243 		}
3244 	}
3245 
3246 	fts_sql_commit(trx);
3247 
3248 	trx_free_for_background(trx);
3249 
3250 	return(error);
3251 }
3252 
3253 /*********************************************************************//**
3254 The given transaction is about to be committed; do whatever is necessary
3255 from the FTS system's POV.
3256 @return DB_SUCCESS or error code */
3257 dberr_t
fts_commit(trx_t * trx)3258 fts_commit(
3259 /*=======*/
3260 	trx_t*	trx)				/*!< in: transaction */
3261 {
3262 	const ib_rbt_node_t*	node;
3263 	dberr_t			error;
3264 	ib_rbt_t*		tables;
3265 	fts_savepoint_t*	savepoint;
3266 
3267 	savepoint = static_cast<fts_savepoint_t*>(
3268 		ib_vector_last(trx->fts_trx->savepoints));
3269 	tables = savepoint->tables;
3270 
3271 	for (node = rbt_first(tables), error = DB_SUCCESS;
3272 	     node != NULL && error == DB_SUCCESS;
3273 	     node = rbt_next(tables, node)) {
3274 
3275 		fts_trx_table_t**	ftt;
3276 
3277 		ftt = rbt_value(fts_trx_table_t*, node);
3278 
3279 		error = fts_commit_table(*ftt);
3280 	}
3281 
3282 	return(error);
3283 }
3284 
3285 /*********************************************************************//**
3286 Initialize a document. */
3287 void
fts_doc_init(fts_doc_t * doc)3288 fts_doc_init(
3289 /*=========*/
3290 	fts_doc_t*	doc)			/*!< in: doc to initialize */
3291 {
3292 	mem_heap_t*	heap = mem_heap_create(32);
3293 
3294 	memset(doc, 0, sizeof(*doc));
3295 
3296 	doc->self_heap = ib_heap_allocator_create(heap);
3297 }
3298 
3299 /*********************************************************************//**
3300 Free document. */
3301 void
fts_doc_free(fts_doc_t * doc)3302 fts_doc_free(
3303 /*=========*/
3304 	fts_doc_t*	doc)			/*!< in: document */
3305 {
3306 	mem_heap_t*	heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
3307 
3308 	if (doc->tokens) {
3309 		rbt_free(doc->tokens);
3310 	}
3311 
3312 	ut_d(memset(doc, 0, sizeof(*doc)));
3313 
3314 	mem_heap_free(heap);
3315 }
3316 
3317 /*********************************************************************//**
3318 Callback function for fetch that stores a row id to the location pointed.
3319 The column's type must be DATA_FIXBINARY, DATA_BINARY_TYPE, length = 8.
3320 @return always returns NULL */
3321 void*
fts_fetch_row_id(void * row,void * user_arg)3322 fts_fetch_row_id(
3323 /*=============*/
3324 	void*	row,				/*!< in: sel_node_t* */
3325 	void*	user_arg)			/*!< in: data pointer */
3326 {
3327 	sel_node_t*	node = static_cast<sel_node_t*>(row);
3328 
3329 	dfield_t*	dfield = que_node_get_val(node->select_list);
3330 	dtype_t*	type = dfield_get_type(dfield);
3331 	ulint		len = dfield_get_len(dfield);
3332 
3333 	ut_a(dtype_get_mtype(type) == DATA_FIXBINARY);
3334 	ut_a(dtype_get_prtype(type) & DATA_BINARY_TYPE);
3335 	ut_a(len == 8);
3336 
3337 	memcpy(user_arg, dfield_get_data(dfield), 8);
3338 
3339 	return(NULL);
3340 }
3341 
3342 /*********************************************************************//**
3343 Callback function for fetch that stores the text of an FTS document,
3344 converting each column to UTF-16.
3345 @return always FALSE */
3346 ibool
fts_query_expansion_fetch_doc(void * row,void * user_arg)3347 fts_query_expansion_fetch_doc(
3348 /*==========================*/
3349 	void*		row,			/*!< in: sel_node_t* */
3350 	void*		user_arg)		/*!< in: fts_doc_t* */
3351 {
3352 	que_node_t*	exp;
3353 	sel_node_t*	node = static_cast<sel_node_t*>(row);
3354 	fts_doc_t*	result_doc = static_cast<fts_doc_t*>(user_arg);
3355 	dfield_t*	dfield;
3356 	ulint		len;
3357 	ulint		doc_len;
3358 	fts_doc_t	doc;
3359 	CHARSET_INFO*	doc_charset = NULL;
3360 	ulint		field_no = 0;
3361 
3362 	len = 0;
3363 
3364 	fts_doc_init(&doc);
3365 	doc.found = TRUE;
3366 
3367 	exp = node->select_list;
3368 	doc_len = 0;
3369 
3370 	doc_charset  = result_doc->charset;
3371 
3372 	/* Copy each indexed column content into doc->text.f_str */
3373 	while (exp) {
3374 		dfield = que_node_get_val(exp);
3375 		len = dfield_get_len(dfield);
3376 
3377 		/* NULL column */
3378 		if (len == UNIV_SQL_NULL) {
3379 			exp = que_node_get_next(exp);
3380 			continue;
3381 		}
3382 
3383 		if (!doc_charset) {
3384 			doc_charset = fts_get_charset(dfield->type.prtype);
3385 		}
3386 
3387 		doc.charset = doc_charset;
3388 		doc.is_ngram = result_doc->is_ngram;
3389 
3390 		if (dfield_is_ext(dfield)) {
3391 			/* We ignore columns that are stored externally, this
3392 			could result in too many words to search */
3393 			exp = que_node_get_next(exp);
3394 			continue;
3395 		} else {
3396 			doc.text.f_n_char = 0;
3397 
3398 			doc.text.f_str = static_cast<byte*>(
3399 				dfield_get_data(dfield));
3400 
3401 			doc.text.f_len = len;
3402 		}
3403 
3404 		if (field_no == 0) {
3405 			fts_tokenize_document(&doc, result_doc,
3406 					      result_doc->parser);
3407 		} else {
3408 			fts_tokenize_document_next(&doc, doc_len, result_doc,
3409 						   result_doc->parser);
3410 		}
3411 
3412 		exp = que_node_get_next(exp);
3413 
3414 		doc_len += (exp) ? len + 1 : len;
3415 
3416 		field_no++;
3417 	}
3418 
3419 	ut_ad(doc_charset);
3420 
3421 	if (!result_doc->charset) {
3422 		result_doc->charset = doc_charset;
3423 	}
3424 
3425 	fts_doc_free(&doc);
3426 
3427 	return(FALSE);
3428 }
3429 
3430 /*********************************************************************//**
3431 fetch and tokenize the document. */
3432 static
3433 void
fts_fetch_doc_from_rec(fts_get_doc_t * get_doc,dict_index_t * clust_index,btr_pcur_t * pcur,ulint * offsets,fts_doc_t * doc)3434 fts_fetch_doc_from_rec(
3435 /*===================*/
3436 	fts_get_doc_t*  get_doc,	/*!< in: FTS index's get_doc struct */
3437 	dict_index_t*	clust_index,	/*!< in: cluster index */
3438 	btr_pcur_t*	pcur,		/*!< in: cursor whose position
3439 					has been stored */
3440 	ulint*		offsets,	/*!< in: offsets */
3441 	fts_doc_t*	doc)		/*!< out: fts doc to hold parsed
3442 					documents */
3443 {
3444 	dict_index_t*		index;
3445 	dict_table_t*		table;
3446 	const rec_t*		clust_rec;
3447 	ulint			num_field;
3448 	const dict_field_t*	ifield;
3449 	const dict_col_t*	col;
3450 	ulint			clust_pos;
3451 	ulint			i;
3452 	ulint			doc_len = 0;
3453 	ulint			processed_doc = 0;
3454 	st_mysql_ftparser*	parser;
3455 
3456 	if (!get_doc) {
3457 		return;
3458 	}
3459 
3460 	index = get_doc->index_cache->index;
3461 	table = get_doc->index_cache->index->table;
3462 	parser = get_doc->index_cache->index->parser;
3463 
3464 	clust_rec = btr_pcur_get_rec(pcur);
3465 
3466 	num_field = dict_index_get_n_fields(index);
3467 
3468 	for (i = 0; i < num_field; i++) {
3469 		ifield = dict_index_get_nth_field(index, i);
3470 		col = dict_field_get_col(ifield);
3471 		clust_pos = dict_col_get_clust_pos(col, clust_index);
3472 
3473 		if (!get_doc->index_cache->charset) {
3474 			get_doc->index_cache->charset = fts_get_charset(
3475 				ifield->col->prtype);
3476 		}
3477 
3478 		if (rec_offs_nth_extern(offsets, clust_pos)) {
3479 			doc->text.f_str =
3480 				btr_rec_copy_externally_stored_field(
3481 					clust_rec, offsets,
3482 					dict_table_page_size(table),
3483 					clust_pos, &doc->text.f_len,
3484 					static_cast<mem_heap_t*>(
3485 						doc->self_heap->arg));
3486 		} else {
3487 			doc->text.f_str = (byte*) rec_get_nth_field(
3488 				clust_rec, offsets, clust_pos,
3489 				&doc->text.f_len);
3490 		}
3491 
3492 		doc->found = TRUE;
3493 		doc->charset = get_doc->index_cache->charset;
3494 		doc->is_ngram = index->is_ngram;
3495 
3496 		/* Null Field */
3497 		if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3498 			continue;
3499 		}
3500 
3501 		if (processed_doc == 0) {
3502 			fts_tokenize_document(doc, NULL, parser);
3503 		} else {
3504 			fts_tokenize_document_next(doc, doc_len, NULL, parser);
3505 		}
3506 
3507 		processed_doc++;
3508 		doc_len += doc->text.f_len + 1;
3509 	}
3510 }
3511 
3512 /*********************************************************************//**
3513 This function fetches the document inserted during the committing
3514 transaction, and tokenize the inserted text data and insert into
3515 FTS auxiliary table and its cache.
3516 @return TRUE if successful */
3517 static
3518 ulint
fts_add_doc_by_id(fts_trx_table_t * ftt,doc_id_t doc_id,ib_vector_t * fts_indexes MY_ATTRIBUTE ((unused)))3519 fts_add_doc_by_id(
3520 /*==============*/
3521 	fts_trx_table_t*ftt,		/*!< in: FTS trx table */
3522 	doc_id_t	doc_id,		/*!< in: doc id */
3523 	ib_vector_t*	fts_indexes MY_ATTRIBUTE((unused)))
3524 					/*!< in: affected fts indexes */
3525 {
3526 	mtr_t		mtr;
3527 	mem_heap_t*	heap;
3528 	btr_pcur_t	pcur;
3529 	dict_table_t*	table;
3530 	dtuple_t*	tuple;
3531 	dfield_t*       dfield;
3532 	fts_get_doc_t*	get_doc;
3533 	doc_id_t        temp_doc_id;
3534 	dict_index_t*   clust_index;
3535 	dict_index_t*	fts_id_index;
3536 	ibool		is_id_cluster;
3537 	fts_cache_t*   	cache = ftt->table->fts->cache;
3538 
3539 	ut_ad(cache->get_docs);
3540 
3541 	/* If Doc ID has been supplied by the user, then the table
3542 	might not yet be sync-ed */
3543 
3544 	if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
3545 		fts_init_index(ftt->table, FALSE);
3546 	}
3547 
3548 	/* Get the first FTS index's get_doc */
3549 	get_doc = static_cast<fts_get_doc_t*>(
3550 		ib_vector_get(cache->get_docs, 0));
3551 	ut_ad(get_doc);
3552 
3553 	table = get_doc->index_cache->index->table;
3554 
3555 	heap = mem_heap_create(512);
3556 
3557 	clust_index = dict_table_get_first_index(table);
3558 	fts_id_index = table->fts_doc_id_index;
3559 
3560 	/* Check whether the index on FTS_DOC_ID is cluster index */
3561 	is_id_cluster = (clust_index == fts_id_index);
3562 
3563 	mtr_start(&mtr);
3564 	btr_pcur_init(&pcur);
3565 
3566 	/* Search based on Doc ID. Here, we'll need to consider the case
3567 	when there is no primary index on Doc ID */
3568 	tuple = dtuple_create(heap, 1);
3569 	dfield = dtuple_get_nth_field(tuple, 0);
3570 	dfield->type.mtype = DATA_INT;
3571 	dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3572 
3573 	mach_write_to_8((byte*) &temp_doc_id, doc_id);
3574 	dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3575 
3576 	btr_pcur_open_with_no_init(
3577 		fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3578 		&pcur, 0, &mtr);
3579 
3580 	/* If we have a match, add the data to doc structure */
3581 	if (btr_pcur_get_low_match(&pcur) == 1) {
3582 		const rec_t*	rec;
3583 		btr_pcur_t*	doc_pcur;
3584 		const rec_t*	clust_rec;
3585 		btr_pcur_t	clust_pcur;
3586 		ulint*		offsets = NULL;
3587 		ulint		num_idx = ib_vector_size(cache->get_docs);
3588 
3589 		rec = btr_pcur_get_rec(&pcur);
3590 
3591 		/* Doc could be deleted */
3592 		if (page_rec_is_infimum(rec)
3593 		    || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3594 
3595 			goto func_exit;
3596 		}
3597 
3598 		if (is_id_cluster) {
3599 			clust_rec = rec;
3600 			doc_pcur = &pcur;
3601 		} else {
3602 			dtuple_t*	clust_ref;
3603 			ulint		n_fields;
3604 
3605 			btr_pcur_init(&clust_pcur);
3606 			n_fields = dict_index_get_n_unique(clust_index);
3607 
3608 			clust_ref = dtuple_create(heap, n_fields);
3609 			dict_index_copy_types(clust_ref, clust_index, n_fields);
3610 
3611 			row_build_row_ref_in_tuple(
3612 				clust_ref, rec, fts_id_index, NULL, NULL);
3613 
3614 			btr_pcur_open_with_no_init(
3615 				clust_index, clust_ref, PAGE_CUR_LE,
3616 				BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3617 
3618 			doc_pcur = &clust_pcur;
3619 			clust_rec = btr_pcur_get_rec(&clust_pcur);
3620 
3621 		}
3622 
3623 		offsets = rec_get_offsets(clust_rec, clust_index,
3624 					  NULL, ULINT_UNDEFINED, &heap);
3625 
3626 		 for (ulint i = 0; i < num_idx; ++i) {
3627 			fts_doc_t       doc;
3628 			dict_table_t*   table;
3629 			fts_get_doc_t*  get_doc;
3630 
3631 			get_doc = static_cast<fts_get_doc_t*>(
3632 				ib_vector_get(cache->get_docs, i));
3633 
3634 			table = get_doc->index_cache->index->table;
3635 
3636 			fts_doc_init(&doc);
3637 
3638 			fts_fetch_doc_from_rec(
3639 				get_doc, clust_index, doc_pcur, offsets, &doc);
3640 
3641 			if (doc.found) {
3642 				ibool	success MY_ATTRIBUTE((unused));
3643 
3644 				btr_pcur_store_position(doc_pcur, &mtr);
3645 				mtr_commit(&mtr);
3646 
3647 				DEBUG_SYNC_C("fts_instrument_sync_cache_wait");
3648 				rw_lock_x_lock(&table->fts->cache->lock);
3649 
3650 				if (table->fts->cache->stopword_info.status
3651 				    & STOPWORD_NOT_INIT) {
3652 					fts_load_stopword(table, NULL, NULL,
3653 							  NULL, TRUE, TRUE);
3654 				}
3655 
3656 				fts_cache_add_doc(
3657 					table->fts->cache,
3658 					get_doc->index_cache,
3659 					doc_id, doc.tokens);
3660 
3661 				bool	need_sync = false;
3662 				if ((cache->total_size > fts_max_cache_size / 10
3663 				     || fts_need_sync)
3664 				    && !cache->sync->in_progress) {
3665 					need_sync = true;
3666 				}
3667 
3668 				rw_lock_x_unlock(&table->fts->cache->lock);
3669 
3670 				DBUG_EXECUTE_IF(
3671                                         "fts_instrument_sync_cache_wait",
3672 					srv_fatal_semaphore_wait_threshold = 25;
3673 					fts_max_cache_size = 100;
3674                                         fts_sync(cache->sync, true, true, false);
3675                                 );
3676 
3677 				DBUG_EXECUTE_IF(
3678 					"fts_instrument_sync",
3679 					fts_optimize_request_sync_table(table);
3680 					os_event_wait(cache->sync->event);
3681 				);
3682 
3683 				DBUG_EXECUTE_IF(
3684 					"fts_instrument_sync_debug",
3685 					fts_sync(cache->sync, true, true, false);
3686 				);
3687 
3688 				DEBUG_SYNC_C("fts_instrument_sync_request");
3689 				DBUG_EXECUTE_IF(
3690 					"fts_instrument_sync_request",
3691 					fts_optimize_request_sync_table(table);
3692 				);
3693 
3694 				if (need_sync) {
3695 					fts_optimize_request_sync_table(table);
3696 				}
3697 
3698 				mtr_start(&mtr);
3699 
3700 				if (i < num_idx - 1) {
3701 
3702 					success = btr_pcur_restore_position(
3703 						BTR_SEARCH_LEAF, doc_pcur,
3704 						&mtr);
3705 
3706 					ut_ad(success);
3707 				}
3708 			}
3709 
3710 			fts_doc_free(&doc);
3711 		}
3712 
3713 		if (!is_id_cluster) {
3714 			btr_pcur_close(doc_pcur);
3715 		}
3716 	}
3717 func_exit:
3718 	mtr_commit(&mtr);
3719 
3720 	btr_pcur_close(&pcur);
3721 
3722 	mem_heap_free(heap);
3723 	return(TRUE);
3724 }
3725 
3726 
3727 /*********************************************************************//**
3728 Callback function to read a single ulint column.
3729 return always returns TRUE */
3730 static
3731 ibool
fts_read_ulint(void * row,void * user_arg)3732 fts_read_ulint(
3733 /*===========*/
3734 	void*		row,		/*!< in: sel_node_t* */
3735 	void*		user_arg)	/*!< in: pointer to ulint */
3736 {
3737 	sel_node_t*	sel_node = static_cast<sel_node_t*>(row);
3738 	ulint*		value = static_cast<ulint*>(user_arg);
3739 	que_node_t*	exp = sel_node->select_list;
3740 	dfield_t*	dfield = que_node_get_val(exp);
3741 	void*		data = dfield_get_data(dfield);
3742 
3743 	*value = static_cast<ulint>(mach_read_from_4(
3744 		static_cast<const byte*>(data)));
3745 
3746 	return(TRUE);
3747 }
3748 
3749 /*********************************************************************//**
3750 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3751 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
3752 doc_id_t
fts_get_max_doc_id(dict_table_t * table)3753 fts_get_max_doc_id(
3754 /*===============*/
3755 	dict_table_t*	table)		/*!< in: user table */
3756 {
3757 	dict_index_t*	index;
3758 	dict_field_t*	dfield MY_ATTRIBUTE((unused)) = NULL;
3759 	doc_id_t	doc_id = 0;
3760 	mtr_t		mtr;
3761 	btr_pcur_t	pcur;
3762 
3763 	index = table->fts_doc_id_index;
3764 
3765 	if (!index) {
3766 		return(0);
3767 	}
3768 
3769 	dfield = dict_index_get_nth_field(index, 0);
3770 
3771 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3772 	ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3773 #endif
3774 
3775 	mtr_start(&mtr);
3776 
3777 	/* fetch the largest indexes value */
3778 	btr_pcur_open_at_index_side(
3779 		false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
3780 
3781 	if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3782 		const rec_t*    rec = NULL;
3783 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
3784 		ulint*		offsets = offsets_;
3785 		mem_heap_t*	heap = NULL;
3786 		ulint		len;
3787 		const void*	data;
3788 
3789 		rec_offs_init(offsets_);
3790 
3791 		do {
3792 			rec = btr_pcur_get_rec(&pcur);
3793 
3794 			if (page_rec_is_user_rec(rec)) {
3795 				break;
3796 			}
3797 		} while (btr_pcur_move_to_prev(&pcur, &mtr));
3798 
3799 		if (!rec) {
3800 			goto func_exit;
3801 		}
3802 
3803 		offsets = rec_get_offsets(
3804 			rec, index, offsets, ULINT_UNDEFINED, &heap);
3805 
3806 		data = rec_get_nth_field(rec, offsets, 0, &len);
3807 
3808 		doc_id = static_cast<doc_id_t>(fts_read_doc_id(
3809 			static_cast<const byte*>(data)));
3810 	}
3811 
3812 func_exit:
3813 	btr_pcur_close(&pcur);
3814 	mtr_commit(&mtr);
3815 	return(doc_id);
3816 }
3817 
3818 /*********************************************************************//**
3819 Fetch document with the given document id.
3820 @return DB_SUCCESS if OK else error */
3821 dberr_t
fts_doc_fetch_by_doc_id(fts_get_doc_t * get_doc,doc_id_t doc_id,dict_index_t * index_to_use,ulint option,fts_sql_callback callback,void * arg)3822 fts_doc_fetch_by_doc_id(
3823 /*====================*/
3824 	fts_get_doc_t*	get_doc,	/*!< in: state */
3825 	doc_id_t	doc_id,		/*!< in: id of document to
3826 					fetch */
3827 	dict_index_t*	index_to_use,	/*!< in: caller supplied FTS index,
3828 					or NULL */
3829 	ulint		option,		/*!< in: search option, if it is
3830 					greater than doc_id or equal */
3831 	fts_sql_callback
3832 			callback,	/*!< in: callback to read */
3833 	void*		arg)		/*!< in: callback arg */
3834 {
3835 	pars_info_t*	info;
3836 	dberr_t		error;
3837 	const char*	select_str;
3838 	doc_id_t	write_doc_id;
3839 	dict_index_t*	index;
3840 	trx_t*		trx = trx_allocate_for_background();
3841 	que_t*          graph;
3842 
3843 	trx->op_info = "fetching indexed FTS document";
3844 
3845 	/* The FTS index can be supplied by caller directly with
3846 	"index_to_use", otherwise, get it from "get_doc" */
3847 	index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3848 
3849 	if (get_doc && get_doc->get_document_graph) {
3850 		info = get_doc->get_document_graph->info;
3851 	} else {
3852 		info = pars_info_create();
3853 	}
3854 
3855 	/* Convert to "storage" byte order. */
3856 	fts_write_doc_id((byte*) &write_doc_id, doc_id);
3857 	fts_bind_doc_id(info, "doc_id", &write_doc_id);
3858 	pars_info_bind_function(info, "my_func", callback, arg);
3859 
3860 	select_str = fts_get_select_columns_str(index, info, info->heap);
3861 	pars_info_bind_id(info, TRUE, "table_name", index->table_name);
3862 
3863 	if (!get_doc || !get_doc->get_document_graph) {
3864 		if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3865 			graph = fts_parse_sql(
3866 				NULL,
3867 				info,
3868 				mem_heap_printf(info->heap,
3869 					"DECLARE FUNCTION my_func;\n"
3870 					"DECLARE CURSOR c IS"
3871 					" SELECT %s FROM $table_name"
3872 					" WHERE %s = :doc_id;\n"
3873 					"BEGIN\n"
3874 					""
3875 					"OPEN c;\n"
3876 					"WHILE 1 = 1 LOOP\n"
3877 					"  FETCH c INTO my_func();\n"
3878 					"  IF c %% NOTFOUND THEN\n"
3879 					"    EXIT;\n"
3880 					"  END IF;\n"
3881 					"END LOOP;\n"
3882 					"CLOSE c;",
3883 					select_str, FTS_DOC_ID_COL_NAME));
3884 		} else {
3885 			ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3886 
3887 			/* This is used for crash recovery of table with
3888 			hidden DOC ID or FTS indexes. We will scan the table
3889 			to re-processing user table rows whose DOC ID or
3890 			FTS indexed documents have not been sync-ed to disc
3891 			during recent crash.
3892 			In the case that all fulltext indexes are dropped
3893 			for a table, we will keep the "hidden" FTS_DOC_ID
3894 			column, and this scan is to retreive the largest
3895 			DOC ID being used in the table to determine the
3896 			appropriate next DOC ID.
3897 			In the case of there exists fulltext index(es), this
3898 			operation will re-tokenize any docs that have not
3899 			been sync-ed to the disk, and re-prime the FTS
3900 			cached */
3901 			graph = fts_parse_sql(
3902 				NULL,
3903 				info,
3904 				mem_heap_printf(info->heap,
3905 					"DECLARE FUNCTION my_func;\n"
3906 					"DECLARE CURSOR c IS"
3907 					" SELECT %s, %s FROM $table_name"
3908 					" WHERE %s > :doc_id;\n"
3909 					"BEGIN\n"
3910 					""
3911 					"OPEN c;\n"
3912 					"WHILE 1 = 1 LOOP\n"
3913 					"  FETCH c INTO my_func();\n"
3914 					"  IF c %% NOTFOUND THEN\n"
3915 					"    EXIT;\n"
3916 					"  END IF;\n"
3917 					"END LOOP;\n"
3918 					"CLOSE c;",
3919 					FTS_DOC_ID_COL_NAME,
3920 					select_str, FTS_DOC_ID_COL_NAME));
3921 		}
3922 		if (get_doc) {
3923 			get_doc->get_document_graph = graph;
3924 		}
3925 	} else {
3926 		graph = get_doc->get_document_graph;
3927 	}
3928 
3929 	error = fts_eval_sql(trx, graph);
3930 
3931 	if (error == DB_SUCCESS) {
3932 		fts_sql_commit(trx);
3933 	} else {
3934 		fts_sql_rollback(trx);
3935 	}
3936 
3937 	trx_free_for_background(trx);
3938 
3939 	if (!get_doc) {
3940 		fts_que_graph_free(graph);
3941 	}
3942 
3943 	return(error);
3944 }
3945 
3946 /*********************************************************************//**
3947 Write out a single word's data as new entry/entries in the INDEX table.
3948 @return DB_SUCCESS if all OK. */
3949 dberr_t
fts_write_node(trx_t * trx,que_t ** graph,fts_table_t * fts_table,fts_string_t * word,fts_node_t * node)3950 fts_write_node(
3951 /*===========*/
3952 	trx_t*		trx,			/*!< in: transaction */
3953 	que_t**		graph,			/*!< in: query graph */
3954 	fts_table_t*	fts_table,		/*!< in: aux table */
3955 	fts_string_t*	word,			/*!< in: word in UTF-8 */
3956 	fts_node_t*	node)			/*!< in: node columns */
3957 {
3958 	pars_info_t*	info;
3959 	dberr_t		error;
3960 	ib_uint32_t	doc_count;
3961 	ib_time_monotonic_t	start_time;
3962 	doc_id_t	last_doc_id;
3963 	doc_id_t	first_doc_id;
3964 	char		table_name[MAX_FULL_NAME_LEN];
3965 
3966 	ut_a(node->ilist != NULL);
3967 
3968 	if (*graph) {
3969 		info = (*graph)->info;
3970 	} else {
3971 		info = pars_info_create();
3972 
3973 		fts_get_table_name(fts_table, table_name);
3974 		pars_info_bind_id(info, true, "index_table_name", table_name);
3975 	}
3976 
3977 	pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3978 
3979 	/* Convert to "storage" byte order. */
3980 	fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
3981 	fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3982 
3983 	/* Convert to "storage" byte order. */
3984 	fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
3985 	fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
3986 
3987 	ut_a(node->last_doc_id >= node->first_doc_id);
3988 
3989 	/* Convert to "storage" byte order. */
3990 	mach_write_to_4((byte*) &doc_count, node->doc_count);
3991 	pars_info_bind_int4_literal(
3992 		info, "doc_count", (const ib_uint32_t*) &doc_count);
3993 
3994 	/* Set copy_name to FALSE since it's a static. */
3995 	pars_info_bind_literal(
3996 		info, "ilist", node->ilist, node->ilist_size,
3997 		DATA_BLOB, DATA_BINARY_TYPE);
3998 
3999 	if (!*graph) {
4000 
4001 		*graph = fts_parse_sql(
4002 			fts_table,
4003 			info,
4004 			"BEGIN\n"
4005 			"INSERT INTO $index_table_name VALUES"
4006 			" (:token, :first_doc_id,"
4007 			"  :last_doc_id, :doc_count, :ilist);");
4008 	}
4009 
4010 	start_time = ut_time_monotonic();
4011 	error = fts_eval_sql(trx, *graph);
4012 	elapsed_time += ut_time_monotonic() - start_time;
4013 	++n_nodes;
4014 
4015 	return(error);
4016 }
4017 
4018 /*********************************************************************//**
4019 Add rows to the DELETED_CACHE table.
4020 @return DB_SUCCESS if all went well else error code*/
4021 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4022 dberr_t
fts_sync_add_deleted_cache(fts_sync_t * sync,ib_vector_t * doc_ids)4023 fts_sync_add_deleted_cache(
4024 /*=======================*/
4025 	fts_sync_t*	sync,			/*!< in: sync state */
4026 	ib_vector_t*	doc_ids)		/*!< in: doc ids to add */
4027 {
4028 	ulint		i;
4029 	pars_info_t*	info;
4030 	que_t*		graph;
4031 	fts_table_t	fts_table;
4032 	char		table_name[MAX_FULL_NAME_LEN];
4033 	doc_id_t	dummy = 0;
4034 	dberr_t		error = DB_SUCCESS;
4035 	ulint		n_elems = ib_vector_size(doc_ids);
4036 
4037 	ut_a(ib_vector_size(doc_ids) > 0);
4038 
4039 	ib_vector_sort(doc_ids, fts_update_doc_id_cmp);
4040 
4041 	info = pars_info_create();
4042 
4043 	fts_bind_doc_id(info, "doc_id", &dummy);
4044 
4045 	FTS_INIT_FTS_TABLE(
4046 		&fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
4047 
4048 	fts_get_table_name(&fts_table, table_name);
4049 	pars_info_bind_id(info, true, "table_name", table_name);
4050 
4051 	graph = fts_parse_sql(
4052 		&fts_table,
4053 		info,
4054 		"BEGIN INSERT INTO $table_name VALUES (:doc_id);");
4055 
4056 	for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
4057 		fts_update_t*	update;
4058 		doc_id_t	write_doc_id;
4059 
4060 		update = static_cast<fts_update_t*>(ib_vector_get(doc_ids, i));
4061 
4062 		/* Convert to "storage" byte order. */
4063 		fts_write_doc_id((byte*) &write_doc_id, update->doc_id);
4064 		fts_bind_doc_id(info, "doc_id", &write_doc_id);
4065 
4066 		error = fts_eval_sql(sync->trx, graph);
4067 	}
4068 
4069 	fts_que_graph_free(graph);
4070 
4071 	return(error);
4072 }
4073 
4074 /** Write the words and ilist to disk.
4075 @param[in,out]	trx		transaction
4076 @param[in]	index_cache	index cache
4077 @param[in]	unlock_cache	whether unlock cache when write node
4078 				Also set this to true if sync takes
4079 				very long
4080 @param[in]	sync_start_time	Holds the timestamp of start of sync
4081 				for deducing the length of sync time
4082 @return DB_SUCCESS if all went well else error code */
4083 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4084 dberr_t
fts_sync_write_words(trx_t * trx,fts_index_cache_t * index_cache,bool unlock_cache,ib_time_t sync_start_time)4085 fts_sync_write_words(
4086 	trx_t*			trx,
4087 	fts_index_cache_t*	index_cache,
4088 	bool			unlock_cache,
4089 	ib_time_t		sync_start_time)
4090 {
4091 	fts_table_t	fts_table;
4092 	ulint		n_nodes = 0;
4093 	ulint		n_words = 0;
4094 	const ib_rbt_node_t* rbt_node;
4095 	dberr_t		error = DB_SUCCESS;
4096 	ibool		print_error = FALSE;
4097 	dict_table_t*	table = index_cache->index->table;
4098 	/* We use this to deduce threshold value of time
4099 	that we can let sync to go on holding cache lock */
4100 	const float cutoff = 0.98;
4101 	ulint		lock_threshold =
4102 			(srv_fatal_semaphore_wait_threshold % SRV_SEMAPHORE_WAIT_EXTENSION)
4103 			* cutoff;
4104 	bool		timeout_extended = false;
4105 #ifdef FTS_DOC_STATS_DEBUG
4106 	ulint		n_new_words = 0;
4107 #endif /* FTS_DOC_STATS_DEBUG */
4108 
4109 	FTS_INIT_INDEX_TABLE(
4110 		&fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
4111 
4112 	n_words = rbt_size(index_cache->words);
4113 
4114 	/* We iterate over the entire tree, even if there is an error,
4115 	since we want to free the memory used during caching. */
4116 	for (rbt_node = rbt_first(index_cache->words);
4117 	     rbt_node;
4118 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
4119 
4120 		ulint			i;
4121 		ulint			selected;
4122 		fts_tokenizer_word_t*	word;
4123 
4124 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
4125 
4126 		DBUG_EXECUTE_IF("fts_instrument_write_words_before_select_index",
4127 				os_thread_sleep(300000););
4128 
4129 		selected = fts_select_index(
4130 			index_cache->charset, word->text.f_str,
4131 			word->text.f_len);
4132 
4133 		fts_table.suffix = fts_get_suffix(selected);
4134 
4135 #ifdef FTS_DOC_STATS_DEBUG
4136 		/* Check if the word exists in the FTS index and if not
4137 		then we need to increment the total word count stats. */
4138 		if (error == DB_SUCCESS && fts_enable_diag_print) {
4139 			ibool	found = FALSE;
4140 
4141 			error = fts_is_word_in_index(
4142 				trx,
4143 				&index_cache->sel_graph[selected],
4144 				&fts_table,
4145 				&word->text, &found);
4146 
4147 			if (error == DB_SUCCESS && !found) {
4148 
4149 				++n_new_words;
4150 			}
4151 		}
4152 #endif /* FTS_DOC_STATS_DEBUG */
4153 
4154 		/* We iterate over all the nodes even if there was an error */
4155 		for (i = 0; i < ib_vector_size(word->nodes); ++i) {
4156 
4157 			fts_node_t* fts_node = static_cast<fts_node_t*>(
4158 				ib_vector_get(word->nodes, i));
4159 
4160 			if (fts_node->synced) {
4161 				continue;
4162 			} else {
4163 				fts_node->synced = true;
4164 			}
4165 
4166 			/*FIXME: we need to handle the error properly. */
4167 			if (error == DB_SUCCESS) {
4168 				DEBUG_SYNC_C("fts_instrument_sync");
4169 				DBUG_EXECUTE_IF("fts_instrument_sync",
4170 			                        os_thread_sleep(10000000););
4171 				if (!unlock_cache) {
4172 					ulint cache_lock_time = ut_time_monotonic() - sync_start_time;
4173 					if (cache_lock_time > lock_threshold) {
4174 						if (!timeout_extended) {
4175 							os_atomic_increment_ulint(
4176 							&srv_fatal_semaphore_wait_threshold,
4177 							SRV_SEMAPHORE_WAIT_EXTENSION);
4178 							timeout_extended = true;
4179 							lock_threshold +=
4180 							SRV_SEMAPHORE_WAIT_EXTENSION;
4181 						} else {
4182 							unlock_cache = true;
4183 							os_atomic_decrement_ulint(
4184 							&srv_fatal_semaphore_wait_threshold,
4185 							SRV_SEMAPHORE_WAIT_EXTENSION);
4186 							timeout_extended = false;
4187 
4188 						}
4189 					}
4190 				}
4191 
4192 				if (unlock_cache) {
4193 					rw_lock_x_unlock(
4194 						&table->fts->cache->lock);
4195 				}
4196 
4197 				error = fts_write_node(
4198 					trx,
4199 					&index_cache->ins_graph[selected],
4200 					&fts_table, &word->text, fts_node);
4201 				DBUG_EXECUTE_IF("fts_instrument_sync",
4202                                                 os_thread_sleep(15000000););
4203 
4204 				DEBUG_SYNC_C("fts_write_node");
4205 				DBUG_EXECUTE_IF("fts_write_node_crash",
4206 					DBUG_SUICIDE(););
4207 
4208 				DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
4209 					os_thread_sleep(1000000);
4210 				);
4211 
4212 				if (unlock_cache) {
4213 					rw_lock_x_lock(
4214 						&table->fts->cache->lock);
4215 				}
4216 			}
4217 		}
4218 
4219 		n_nodes += ib_vector_size(word->nodes);
4220 
4221 		if (error != DB_SUCCESS && !print_error) {
4222 			ib::error() << "(" << ut_strerr(error) << ") writing"
4223 				" word node to FTS auxiliary index table.";
4224 			print_error = TRUE;
4225 		}
4226 	}
4227 
4228 #ifdef FTS_DOC_STATS_DEBUG
4229 	if (error == DB_SUCCESS && n_new_words > 0 && fts_enable_diag_print) {
4230 		fts_table_t	fts_table;
4231 
4232 		FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
4233 
4234 		/* Increment the total number of words in the FTS index */
4235 		error = fts_config_increment_index_value(
4236 			trx, index_cache->index, FTS_TOTAL_WORD_COUNT,
4237 			n_new_words);
4238 	}
4239 #endif /* FTS_DOC_STATS_DEBUG */
4240 
4241 	if (fts_enable_diag_print) {
4242 		printf("Avg number of nodes: %lf\n",
4243 		       (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
4244 	}
4245 
4246 	return(error);
4247 }
4248 
4249 #ifdef FTS_DOC_STATS_DEBUG
4250 /*********************************************************************//**
4251 Write a single documents statistics to disk.
4252 @return DB_SUCCESS if all went well else error code */
4253 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4254 dberr_t
fts_sync_write_doc_stat(trx_t * trx,dict_index_t * index,que_t ** graph,const fts_doc_stats_t * doc_stat)4255 fts_sync_write_doc_stat(
4256 /*====================*/
4257 	trx_t*			trx,		/*!< in: transaction */
4258 	dict_index_t*		index,		/*!< in: index */
4259 	que_t**			graph,		/* out: query graph */
4260 	const fts_doc_stats_t*	doc_stat)	/*!< in: doc stats to write */
4261 {
4262 	pars_info_t*	info;
4263 	doc_id_t	doc_id;
4264 	dberr_t		error = DB_SUCCESS;
4265 	ib_uint32_t	word_count;
4266 	char		table_name[MAX_FULL_NAME_LEN];
4267 
4268 	if (*graph) {
4269 		info = (*graph)->info;
4270 	} else {
4271 		info = pars_info_create();
4272 	}
4273 
4274 	/* Convert to "storage" byte order. */
4275 	mach_write_to_4((byte*) &word_count, doc_stat->word_count);
4276 	pars_info_bind_int4_literal(
4277 		info, "count", (const ib_uint32_t*) &word_count);
4278 
4279 	/* Convert to "storage" byte order. */
4280 	fts_write_doc_id((byte*) &doc_id, doc_stat->doc_id);
4281 	fts_bind_doc_id(info, "doc_id", &doc_id);
4282 
4283 	if (!*graph) {
4284 		fts_table_t	fts_table;
4285 
4286 		FTS_INIT_INDEX_TABLE(
4287 			&fts_table, "DOC_ID", FTS_INDEX_TABLE, index);
4288 
4289 		fts_get_table_name(&fts_table, table_name);
4290 
4291 		pars_info_bind_id(info, true, "doc_id_table", table_name);
4292 
4293 		*graph = fts_parse_sql(
4294 			&fts_table,
4295 			info,
4296 			"BEGIN"
4297 			" INSERT INTO $doc_id_table VALUES (:doc_id, :count);");
4298 	}
4299 
4300 	for (;;) {
4301 		error = fts_eval_sql(trx, *graph);
4302 
4303 		if (error == DB_SUCCESS) {
4304 
4305 			break;				/* Exit the loop. */
4306 		} else {
4307 
4308 			if (error == DB_LOCK_WAIT_TIMEOUT) {
4309 				ib::warn() << "Lock wait timeout writing to"
4310 					" FTS doc_id. Retrying!";
4311 
4312 				trx->error_state = DB_SUCCESS;
4313 			} else {
4314 				ib::error() << "(" << ut_strerr(error)
4315 					<< ") while writing to FTS doc_id.";
4316 
4317 				break;			/* Exit the loop. */
4318 			}
4319 		}
4320 	}
4321 
4322 	return(error);
4323 }
4324 
4325 /*********************************************************************//**
4326 Write document statistics to disk.
4327 @return DB_SUCCESS if all OK */
4328 static
4329 ulint
fts_sync_write_doc_stats(trx_t * trx,const fts_index_cache_t * index_cache)4330 fts_sync_write_doc_stats(
4331 /*=====================*/
4332 	trx_t*			trx,		/*!< in: transaction */
4333 	const fts_index_cache_t*index_cache)	/*!< in: index cache */
4334 {
4335 	dberr_t		error = DB_SUCCESS;
4336 	que_t*		graph = NULL;
4337 	fts_doc_stats_t*  doc_stat;
4338 
4339 	if (ib_vector_is_empty(index_cache->doc_stats)) {
4340 		return(DB_SUCCESS);
4341 	}
4342 
4343 	doc_stat = static_cast<ts_doc_stats_t*>(
4344 		ib_vector_pop(index_cache->doc_stats));
4345 
4346 	while (doc_stat) {
4347 		error = fts_sync_write_doc_stat(
4348 			trx, index_cache->index, &graph, doc_stat);
4349 
4350 		if (error != DB_SUCCESS) {
4351 			break;
4352 		}
4353 
4354 		if (ib_vector_is_empty(index_cache->doc_stats)) {
4355 			break;
4356 		}
4357 
4358 		doc_stat = static_cast<ts_doc_stats_t*>(
4359 			ib_vector_pop(index_cache->doc_stats));
4360 	}
4361 
4362 	if (graph != NULL) {
4363 		fts_que_graph_free_check_lock(NULL, index_cache, graph);
4364 	}
4365 
4366 	return(error);
4367 }
4368 
4369 /*********************************************************************//**
4370 Callback to check the existince of a word.
4371 @return always return NULL */
4372 static
4373 ibool
fts_lookup_word(void * row,void * user_arg)4374 fts_lookup_word(
4375 /*============*/
4376 	void*	row,				/*!< in:  sel_node_t* */
4377 	void*	user_arg)			/*!< in:  fts_doc_t* */
4378 {
4379 
4380 	que_node_t*	exp;
4381 	sel_node_t*	node = static_cast<sel_node_t*>(row);
4382 	ibool*		found = static_cast<ibool*>(user_arg);
4383 
4384 	exp = node->select_list;
4385 
4386 	while (exp) {
4387 		dfield_t*	dfield = que_node_get_val(exp);
4388 		ulint		len = dfield_get_len(dfield);
4389 
4390 		if (len != UNIV_SQL_NULL && len != 0) {
4391 			*found = TRUE;
4392 		}
4393 
4394 		exp = que_node_get_next(exp);
4395 	}
4396 
4397 	return(FALSE);
4398 }
4399 
4400 /*********************************************************************//**
4401 Check whether a particular word (term) exists in the FTS index.
4402 @return DB_SUCCESS if all went well else error code */
4403 static
4404 dberr_t
fts_is_word_in_index(trx_t * trx,que_t ** graph,fts_table_t * fts_table,const fts_string_t * word,ibool * found)4405 fts_is_word_in_index(
4406 /*=================*/
4407 	trx_t*		trx,			/*!< in: FTS query state */
4408 	que_t**		graph,			/* out: Query graph */
4409 	fts_table_t*	fts_table,		/*!< in: table instance */
4410 	const fts_string_t*
4411 			word,			/*!< in: the word to check */
4412 	ibool*		found)			/* out: TRUE if exists */
4413 {
4414 	pars_info_t*	info;
4415 	dberr_t		error;
4416 	char		table_name[MAX_FULL_NAME_LEN];
4417 
4418 	trx->op_info = "looking up word in FTS index";
4419 
4420 	if (*graph) {
4421 		info = (*graph)->info;
4422 	} else {
4423 		info = pars_info_create();
4424 	}
4425 
4426 	fts_get_table_name(fts_table, table_name);
4427 	pars_info_bind_id(info, true, "table_name", table_name);
4428 	pars_info_bind_function(info, "my_func", fts_lookup_word, found);
4429 	pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
4430 
4431 	if (*graph == NULL) {
4432 		*graph = fts_parse_sql(
4433 			fts_table,
4434 			info,
4435 			"DECLARE FUNCTION my_func;\n"
4436 			"DECLARE CURSOR c IS"
4437 			" SELECT doc_count\n"
4438 			" FROM $table_name\n"
4439 			" WHERE word = :word"
4440 			" ORDER BY first_doc_id;\n"
4441 			"BEGIN\n"
4442 			"\n"
4443 			"OPEN c;\n"
4444 			"WHILE 1 = 1 LOOP\n"
4445 			"  FETCH c INTO my_func();\n"
4446 			"  IF c % NOTFOUND THEN\n"
4447 			"    EXIT;\n"
4448 			"  END IF;\n"
4449 			"END LOOP;\n"
4450 			"CLOSE c;");
4451 	}
4452 
4453 	for (;;) {
4454 		error = fts_eval_sql(trx, *graph);
4455 
4456 		if (error == DB_SUCCESS) {
4457 
4458 			break;				/* Exit the loop. */
4459 		} else {
4460 
4461 			if (error == DB_LOCK_WAIT_TIMEOUT) {
4462 				ib::warn() << "Lock wait timeout reading"
4463 					" FTS index. Retrying!";
4464 
4465 				trx->error_state = DB_SUCCESS;
4466 			} else {
4467 				ib::error() << "(" << ut_strerr(error)
4468 					<< ") while reading FTS index.";
4469 
4470 				break;			/* Exit the loop. */
4471 			}
4472 		}
4473 	}
4474 
4475 	return(error);
4476 }
4477 #endif /* FTS_DOC_STATS_DEBUG */
4478 
4479 /*********************************************************************//**
4480 Begin Sync, create transaction, acquire locks, etc. */
4481 static
4482 void
fts_sync_begin(fts_sync_t * sync)4483 fts_sync_begin(
4484 /*===========*/
4485 	fts_sync_t*	sync)			/*!< in: sync state */
4486 {
4487 	fts_cache_t*	cache = sync->table->fts->cache;
4488 
4489 	n_nodes = 0;
4490 	elapsed_time = 0;
4491 
4492 	sync->start_time = ut_time_monotonic();
4493 
4494 	sync->trx = trx_allocate_for_background();
4495 
4496 	if (fts_enable_diag_print) {
4497 		ib::info() << "FTS SYNC for table " << sync->table->name
4498 			<< ", deleted count: "
4499 			<< ib_vector_size(cache->deleted_doc_ids)
4500 			<< " size: " << cache->total_size << " bytes";
4501 	}
4502 }
4503 
4504 /*********************************************************************//**
4505 Run SYNC on the table, i.e., write out data from the index specific
4506 cache to the FTS aux INDEX table and FTS aux doc id stats table.
4507 @return DB_SUCCESS if all OK */
4508 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4509 dberr_t
fts_sync_index(fts_sync_t * sync,fts_index_cache_t * index_cache)4510 fts_sync_index(
4511 /*===========*/
4512 	fts_sync_t*		sync,		/*!< in: sync state */
4513 	fts_index_cache_t*	index_cache)	/*!< in: index cache */
4514 {
4515 	trx_t*		trx = sync->trx;
4516 	dberr_t		error = DB_SUCCESS;
4517 
4518 	trx->op_info = "doing SYNC index";
4519 
4520 	if (fts_enable_diag_print) {
4521 		ib::info() << "SYNC words: " << rbt_size(index_cache->words);
4522 	}
4523 
4524 	ut_ad(rbt_validate(index_cache->words));
4525 
4526 	error = fts_sync_write_words(trx, index_cache, sync->unlock_cache,
4527 				     sync->start_time);
4528 
4529 #ifdef FTS_DOC_STATS_DEBUG
4530 	/* FTS_RESOLVE: the word counter info in auxiliary table "DOC_ID"
4531 	is not used currently for ranking. We disable fts_sync_write_doc_stats()
4532 	for now */
4533 	/* Write the per doc statistics that will be used for ranking. */
4534 	if (error == DB_SUCCESS) {
4535 
4536 		error = fts_sync_write_doc_stats(trx, index_cache);
4537 	}
4538 #endif /* FTS_DOC_STATS_DEBUG */
4539 
4540 	return(error);
4541 }
4542 
4543 /** Check if index cache has been synced completely
4544 @param[in,out]	index_cache	index cache
4545 @return true if index is synced, otherwise false. */
4546 static
4547 bool
fts_sync_index_check(fts_index_cache_t * index_cache)4548 fts_sync_index_check(
4549 	fts_index_cache_t*	index_cache)
4550 {
4551 	const ib_rbt_node_t*	rbt_node;
4552 
4553 	for (rbt_node = rbt_first(index_cache->words);
4554 	     rbt_node != NULL;
4555 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
4556 
4557 		fts_tokenizer_word_t*	word;
4558 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
4559 
4560 		fts_node_t*	fts_node;
4561 		fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4562 
4563 		if (!fts_node->synced) {
4564 			return(false);
4565 		}
4566 	}
4567 
4568 	return(true);
4569 }
4570 
4571 /** Reset synced flag in index cache when rollback
4572 @param[in,out]	index_cache	index cache */
4573 static
4574 void
fts_sync_index_reset(fts_index_cache_t * index_cache)4575 fts_sync_index_reset(
4576 	fts_index_cache_t*	index_cache)
4577 {
4578 	const ib_rbt_node_t*	rbt_node;
4579 
4580 	for (rbt_node = rbt_first(index_cache->words);
4581 	     rbt_node != NULL;
4582 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
4583 
4584 		fts_tokenizer_word_t*	word;
4585 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
4586 
4587 		fts_node_t*	fts_node;
4588 		fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4589 
4590 		fts_node->synced = false;
4591 	}
4592 }
4593 
4594 /** Commit the SYNC, change state of processed doc ids etc.
4595 @param[in,out]	sync	sync state
4596 @return DB_SUCCESS if all OK */
4597 static  MY_ATTRIBUTE((nonnull, warn_unused_result))
4598 dberr_t
fts_sync_commit(fts_sync_t * sync)4599 fts_sync_commit(
4600 	fts_sync_t*	sync)
4601 {
4602 	dberr_t		error;
4603 	trx_t*		trx = sync->trx;
4604 	fts_cache_t*	cache = sync->table->fts->cache;
4605 	doc_id_t	last_doc_id;
4606 
4607 	trx->op_info = "doing SYNC commit";
4608 
4609 	/* After each Sync, update the CONFIG table about the max doc id
4610 	we just sync-ed to index table */
4611 	error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4612 					&last_doc_id);
4613 
4614 	/* Get the list of deleted documents that are either in the
4615 	cache or were headed there but were deleted before the add
4616 	thread got to them. */
4617 
4618 	if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4619 
4620 		error = fts_sync_add_deleted_cache(
4621 			sync, cache->deleted_doc_ids);
4622 	}
4623 
4624 	/* We need to do this within the deleted lock since fts_delete() can
4625 	attempt to add a deleted doc id to the cache deleted id array. */
4626 	fts_cache_clear(cache);
4627 	DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4628 	fts_cache_init(cache);
4629 	rw_lock_x_unlock(&cache->lock);
4630 
4631 	if (error == DB_SUCCESS) {
4632 
4633 		fts_sql_commit(trx);
4634 
4635 	} else if (error != DB_SUCCESS) {
4636 
4637 		fts_sql_rollback(trx);
4638 
4639 		ib::error() << "(" << ut_strerr(error) << ") during SYNC.";
4640 	}
4641 
4642 	if (fts_enable_diag_print && elapsed_time) {
4643 		ib::info() << "SYNC for table " << sync->table->name
4644 			<< ": SYNC time: "
4645 			<< (ut_time_monotonic() - sync->start_time)
4646 			<< " secs: elapsed "
4647 			<< (double) n_nodes / elapsed_time
4648 			<< " ins/sec";
4649 	}
4650 
4651 	/* Avoid assertion in trx_free(). */
4652 	trx->dict_operation_lock_mode = 0;
4653 	trx_free_for_background(trx);
4654 
4655 	return(error);
4656 }
4657 
4658 /*********************************************************************//**
4659 Rollback a sync operation */
4660 static
4661 void
fts_sync_rollback(fts_sync_t * sync)4662 fts_sync_rollback(
4663 /*==============*/
4664 	fts_sync_t*	sync)			/*!< in: sync state */
4665 {
4666 	trx_t*		trx = sync->trx;
4667 	fts_cache_t*	cache = sync->table->fts->cache;
4668 
4669 	for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4670 		ulint			j;
4671 		fts_index_cache_t*	index_cache;
4672 
4673 		index_cache = static_cast<fts_index_cache_t*>(
4674 			ib_vector_get(cache->indexes, i));
4675 
4676 		/* Reset synced flag so nodes will not be skipped
4677 		in the next sync, see fts_sync_write_words(). */
4678 		fts_sync_index_reset(index_cache);
4679 
4680 		for (j = 0; fts_index_selector[j].value; ++j) {
4681 
4682 			if (index_cache->ins_graph[j] != NULL) {
4683 
4684 				fts_que_graph_free_check_lock(
4685 					NULL, index_cache,
4686 					index_cache->ins_graph[j]);
4687 
4688 				index_cache->ins_graph[j] = NULL;
4689 			}
4690 
4691 			if (index_cache->sel_graph[j] != NULL) {
4692 
4693 				fts_que_graph_free_check_lock(
4694 					NULL, index_cache,
4695 					index_cache->sel_graph[j]);
4696 
4697 				index_cache->sel_graph[j] = NULL;
4698 			}
4699 		}
4700 	}
4701 
4702 	rw_lock_x_unlock(&cache->lock);
4703 
4704 	fts_sql_rollback(trx);
4705 
4706 	/* Avoid assertion in trx_free(). */
4707 	trx->dict_operation_lock_mode = 0;
4708 	trx_free_for_background(trx);
4709 }
4710 
4711 /** Check that all indexes are synced.
4712 @param[in,out]	sync		sync state
4713 @return true if all indexes are synced, false otherwise. */
4714 static
4715 bool
fts_check_all_indexes_synced(fts_sync_t * sync)4716 fts_check_all_indexes_synced(
4717 	fts_sync_t*	sync)
4718 {
4719 	ulint i;
4720 	fts_cache_t*	cache = sync->table->fts->cache;
4721 
4722 	/* Make sure all the caches are synced. */
4723 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4724 		fts_index_cache_t*	index_cache;
4725 
4726 		index_cache = static_cast<fts_index_cache_t*>(
4727 			ib_vector_get(cache->indexes, i));
4728 
4729 		if (index_cache->index->to_be_dropped
4730 		    || index_cache->index->table->to_be_dropped
4731 		    || fts_sync_index_check(index_cache)) {
4732 			continue;
4733 		}
4734 
4735 		return false;
4736 	}
4737 
4738 	return true;
4739 }
4740 
4741 /** Run SYNC on the table, i.e., write out data from the cache to the
4742 FTS auxiliary INDEX table and clear the cache at the end.
4743 @param[in,out]	sync		sync state
4744 @param[in]	unlock_cache	whether unlock cache lock when write node
4745 @param[in]	wait		whether wait when a sync is in progress
4746 @param[in]	has_dict_lock		whether has dict operation lock
4747 @return DB_SUCCESS if all OK */
4748 static
4749 dberr_t
fts_sync(fts_sync_t * sync,bool unlock_cache,bool wait,bool has_dict_lock)4750 fts_sync(
4751 	fts_sync_t*	sync,
4752 	bool		unlock_cache,
4753 	bool		wait,
4754 	bool		has_dict_lock)
4755 {
4756 	ulint		i;
4757 	dberr_t		error = DB_SUCCESS;
4758 	fts_cache_t*	cache = sync->table->fts->cache;
4759 
4760 	rw_lock_x_lock(&cache->lock);
4761 
4762 	/* Check if cache is being synced.
4763 	Note: we release cache lock in fts_sync_write_words() to
4764 	avoid long wait for the lock by other threads. */
4765 	while (sync->in_progress) {
4766 		rw_lock_x_unlock(&cache->lock);
4767 
4768 		if (wait) {
4769 			os_event_wait(sync->event);
4770 		} else {
4771 			return(DB_SUCCESS);
4772 		}
4773 
4774 		rw_lock_x_lock(&cache->lock);
4775 	}
4776 
4777 	sync->unlock_cache = unlock_cache;
4778 	sync->in_progress = true;
4779 
4780 	DEBUG_SYNC_C("fts_sync_begin");
4781 	fts_sync_begin(sync);
4782 
4783 	if (has_dict_lock) {
4784 		/* If lock is already taken mark that in transaction
4785 		 * so rollback will not try to take it again.
4786 		 */
4787 		sync->trx->dict_operation_lock_mode = RW_S_LATCH;
4788 	}
4789 
4790 	do {
4791 		if (cache->total_size > fts_max_cache_size) {
4792 			/* Avoid the case: sync never finish when
4793 			insert/update keeps comming. */
4794 			ut_ad(sync->unlock_cache);
4795 			sync->unlock_cache = false;
4796 		}
4797 
4798 		for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4799 			fts_index_cache_t*	index_cache;
4800 
4801 			index_cache = static_cast<fts_index_cache_t*>(
4802 				ib_vector_get(cache->indexes, i));
4803 
4804 			if (index_cache->index->to_be_dropped
4805 			    || index_cache->index->table->to_be_dropped) {
4806 				continue;
4807 			}
4808 
4809 			DBUG_EXECUTE_IF("fts_instrument_sync_before_syncing",
4810 				os_thread_sleep(300000););
4811 
4812 			index_cache->index->index_fts_syncing = true;
4813 
4814 			error = fts_sync_index(sync, index_cache);
4815 
4816 			if (error != DB_SUCCESS) {
4817 				break;
4818 			}
4819 		}
4820 
4821 		DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
4822 				sync->interrupted = true;
4823 				error = DB_INTERRUPTED;
4824 		);
4825 
4826 		if (error != DB_SUCCESS) {
4827 			break;
4828 		}
4829 	} while (!fts_check_all_indexes_synced(sync));
4830 
4831 	if (error == DB_SUCCESS && !sync->interrupted) {
4832 		error = fts_sync_commit(sync);
4833 	} else {
4834 		fts_sync_rollback(sync);
4835 	}
4836 
4837 	rw_lock_x_lock(&cache->lock);
4838 	/* Clear fts syncing flags of any indexes in case sync is
4839 	interrupted */
4840 	DEBUG_SYNC_C("fts_instrument_sync");
4841 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4842 		fts_index_cache_t*      index_cache;
4843 		index_cache = static_cast<fts_index_cache_t*>(
4844 			ib_vector_get(cache->indexes, i));
4845 			if (index_cache->index->index_fts_syncing == true) {
4846 				index_cache->index->index_fts_syncing = false;
4847 			}
4848 		}
4849 	sync->interrupted = false;
4850 	sync->in_progress = false;
4851 	os_event_set(sync->event);
4852 	rw_lock_x_unlock(&cache->lock);
4853 
4854 	/* We need to check whether an optimize is required, for that
4855 	we make copies of the two variables that control the trigger. These
4856 	variables can change behind our back and we don't want to hold the
4857 	lock for longer than is needed. */
4858 	mutex_enter(&cache->deleted_lock);
4859 
4860 	cache->added = 0;
4861 	cache->deleted = 0;
4862 
4863 	mutex_exit(&cache->deleted_lock);
4864 
4865 	return(error);
4866 }
4867 
4868 /** Run SYNC on the table, i.e., write out data from the cache to the
4869 FTS auxiliary INDEX table and clear the cache at the end.
4870 @param[in,out]	table		fts table
4871 @param[in]	unlock_cache	whether unlock cache when write node
4872 @param[in]	wait		whether wait for existing sync to finish
4873 @param[in]	has_dict	whether has dict operation lock
4874 @return DB_SUCCESS on success, error code on failure. */
4875 dberr_t
fts_sync_table(dict_table_t * table,bool unlock_cache,bool wait,bool has_dict)4876 fts_sync_table(
4877 	dict_table_t*	table,
4878 	bool		unlock_cache,
4879 	bool		wait,
4880 	bool		has_dict)
4881 {
4882 	dberr_t	err = DB_SUCCESS;
4883 
4884 	ut_ad(table->fts);
4885 
4886 	if (!dict_table_is_discarded(table) && table->fts->cache
4887 	    && !dict_table_is_corrupted(table)) {
4888 		err = fts_sync(table->fts->cache->sync,
4889 			       unlock_cache, wait, has_dict);
4890 	}
4891 
4892 	return(err);
4893 }
4894 
4895 /** Check fts token
4896 1. for ngram token, check whether the token contains any words in stopwords
4897 2. for non-ngram token, check if it's stopword or less than fts_min_token_size
4898 or greater than fts_max_token_size.
4899 @param[in]	token		token string
4900 @param[in]	stopwords	stopwords rb tree
4901 @param[in]	is_ngram	is ngram parser
4902 @param[in]	cs		token charset
4903 @retval	true	if it is not stopword and length in range
4904 @retval	false	if it is stopword or lenght not in range */
4905 bool
fts_check_token(const fts_string_t * token,const ib_rbt_t * stopwords,bool is_ngram,const CHARSET_INFO * cs)4906 fts_check_token(
4907 	const fts_string_t*		token,
4908 	const ib_rbt_t*			stopwords,
4909 	bool				is_ngram,
4910 	const CHARSET_INFO*		cs)
4911 {
4912 	ut_ad(cs != NULL || stopwords == NULL);
4913 
4914 	if (!is_ngram) {
4915 		ib_rbt_bound_t  parent;
4916 
4917 		if (token->f_n_char < fts_min_token_size
4918 		    || token->f_n_char > fts_max_token_size
4919 		    || (stopwords != NULL
4920 			&& rbt_search(stopwords, &parent, token) == 0)) {
4921 			return(false);
4922 		} else {
4923 			return(true);
4924 		}
4925 	}
4926 
4927 	/* Check token for ngram. */
4928 	DBUG_EXECUTE_IF(
4929 		"fts_instrument_ignore_ngram_check",
4930 		return(true);
4931 	);
4932 
4933 	/* We ignore fts_min_token_size when ngram */
4934 	ut_ad(token->f_n_char > 0
4935 	      && token->f_n_char <= fts_max_token_size);
4936 
4937 	if (stopwords == NULL) {
4938 		return(true);
4939 	}
4940 
4941 	/*Ngram checks whether the token contains any words in stopwords.
4942 	We can't simply use CONTAIN to search in stopwords, because it's
4943 	built on COMPARE. So we need to tokenize the token into words
4944 	from unigram to f_n_char, and check them separately. */
4945 	for (ulint ngram_token_size = 1; ngram_token_size <= token->f_n_char;
4946 	     ngram_token_size ++) {
4947 		const char*	start;
4948 		const char*	next;
4949 		const char*	end;
4950 		ulint		char_len;
4951 		ulint		n_chars;
4952 
4953 		start = reinterpret_cast<char*>(token->f_str);
4954 		next = start;
4955 		end = start + token->f_len;
4956 		n_chars = 0;
4957 
4958 		while (next < end) {
4959 			char_len = my_mbcharlen_ptr(cs, next, end);
4960 
4961 			if (next + char_len > end || char_len == 0) {
4962 				break;
4963 			} else {
4964 				/* Skip SPACE */
4965 				if (char_len == 1 && *next == ' ') {
4966 					start = next + 1;
4967 					next = start;
4968 					n_chars = 0;
4969 
4970 					continue;
4971 				}
4972 
4973 				next += char_len;
4974 				n_chars++;
4975 			}
4976 
4977 			if (n_chars == ngram_token_size) {
4978 				fts_string_t	ngram_token;
4979 				ngram_token.f_str =
4980 					reinterpret_cast<byte*>(
4981 					const_cast<char*>(start));
4982 				ngram_token.f_len = next - start;
4983 				ngram_token.f_n_char = ngram_token_size;
4984 
4985 				ib_rbt_bound_t  parent;
4986 				if (rbt_search(stopwords, &parent,
4987 					       &ngram_token) == 0) {
4988 					return(false);
4989 				}
4990 
4991 				/* Move a char forward */
4992 				start += my_mbcharlen_ptr(cs, start, end);
4993 				n_chars = ngram_token_size - 1;
4994 			}
4995 		}
4996 	}
4997 
4998 	return(true);
4999 }
5000 
5001 /** Add the token and its start position to the token's list of positions.
5002 @param[in,out]	result_doc	result doc rb tree
5003 @param[in]	str		token string
5004 @param[in]	position	token position */
5005 static
5006 void
fts_add_token(fts_doc_t * result_doc,fts_string_t str,ulint position)5007 fts_add_token(
5008 	fts_doc_t*	result_doc,
5009 	fts_string_t	str,
5010 	ulint		position)
5011 {
5012 	/* Ignore string whose character number is less than
5013 	"fts_min_token_size" or more than "fts_max_token_size" */
5014 
5015 	if (fts_check_token(&str, NULL, result_doc->is_ngram,
5016 			    result_doc->charset)) {
5017 
5018 		mem_heap_t*	heap;
5019 		fts_string_t	t_str;
5020 		fts_token_t*	token;
5021 		ib_rbt_bound_t	parent;
5022 		ulint		newlen;
5023 
5024 		heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
5025 
5026 		t_str.f_n_char = str.f_n_char;
5027 
5028 		t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
5029 
5030 		t_str.f_str = static_cast<byte*>(
5031 			mem_heap_alloc(heap, t_str.f_len));
5032 
5033 		/* For binary collations, a case sensitive search is
5034 		performed. Hence don't convert to lower case. */
5035 		if (my_binary_compare(result_doc->charset)) {
5036 			memcpy(t_str.f_str, str.f_str, str.f_len);
5037 			t_str.f_str[str.f_len]= 0;
5038 			newlen= str.f_len;
5039 		} else {
5040 			newlen = innobase_fts_casedn_str(
5041 				result_doc->charset, (char*) str.f_str, str.f_len,
5042 				(char*) t_str.f_str, t_str.f_len);
5043 		}
5044 
5045 		t_str.f_len = newlen;
5046 		t_str.f_str[newlen] = 0;
5047 
5048 		/* Add the word to the document statistics. If the word
5049 		hasn't been seen before we create a new entry for it. */
5050 		if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
5051 			fts_token_t	new_token;
5052 
5053 			new_token.text.f_len = newlen;
5054 			new_token.text.f_str = t_str.f_str;
5055 			new_token.text.f_n_char = t_str.f_n_char;
5056 
5057 			new_token.positions = ib_vector_create(
5058 				result_doc->self_heap, sizeof(ulint), 32);
5059 
5060 			parent.last = rbt_add_node(
5061 				result_doc->tokens, &parent, &new_token);
5062 
5063 			ut_ad(rbt_validate(result_doc->tokens));
5064 		}
5065 
5066 		token = rbt_value(fts_token_t, parent.last);
5067 		ib_vector_push(token->positions, &position);
5068 	}
5069 }
5070 
5071 /********************************************************************
5072 Process next token from document starting at the given position, i.e., add
5073 the token's start position to the token's list of positions.
5074 @return number of characters handled in this call */
5075 static
5076 ulint
fts_process_token(fts_doc_t * doc,fts_doc_t * result,ulint start_pos,ulint add_pos)5077 fts_process_token(
5078 /*==============*/
5079 	fts_doc_t*	doc,		/* in/out: document to
5080 					tokenize */
5081 	fts_doc_t*	result,		/* out: if provided, save
5082 					result here */
5083 	ulint		start_pos,	/*!< in: start position in text */
5084 	ulint		add_pos)	/*!< in: add this position to all
5085 					tokens from this tokenization */
5086 {
5087 	ulint		ret;
5088 	fts_string_t	str;
5089 	ulint		position;
5090 	fts_doc_t*	result_doc;
5091 	byte		buf[FTS_MAX_WORD_LEN + 1];
5092 
5093 	str.f_str = buf;
5094 
5095 	/* Determine where to save the result. */
5096 	result_doc = (result != NULL) ? result : doc;
5097 
5098 	/* The length of a string in characters is set here only. */
5099 
5100 	ret = innobase_mysql_fts_get_token(
5101 		doc->charset, doc->text.f_str + start_pos,
5102 		doc->text.f_str + doc->text.f_len, &str);
5103 
5104 	position = start_pos + ret - str.f_len + add_pos;
5105 
5106 	fts_add_token(result_doc, str, position);
5107 
5108 	return(ret);
5109 }
5110 
5111 /*************************************************************//**
5112 Get token char size by charset
5113 @return token size */
5114 ulint
fts_get_token_size(const CHARSET_INFO * cs,const char * token,ulint len)5115 fts_get_token_size(
5116 /*===============*/
5117 	const CHARSET_INFO*	cs,	/*!< in: Character set */
5118 	const char*		token,	/*!< in: token */
5119 	ulint			len)	/*!< in: token length */
5120 {
5121 	char*	start;
5122 	char*	end;
5123 	ulint	size = 0;
5124 
5125 	/* const_cast is for reinterpret_cast below, or it will fail. */
5126 	start = const_cast<char*>(token);
5127 	end = start + len;
5128 	while (start < end) {
5129 		int	ctype;
5130 		int	mbl;
5131 
5132 		mbl = cs->cset->ctype(
5133 			cs, &ctype,
5134 			reinterpret_cast<uchar*>(start),
5135 			reinterpret_cast<uchar*>(end));
5136 
5137 		size++;
5138 
5139 		start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
5140 	}
5141 
5142 	return(size);
5143 }
5144 
5145 /*************************************************************//**
5146 FTS plugin parser 'myql_parser' callback function for document tokenize.
5147 Refer to 'st_mysql_ftparser_param' for more detail.
5148 @return always returns 0 */
5149 int
fts_tokenize_document_internal(MYSQL_FTPARSER_PARAM * param,char * doc,int len)5150 fts_tokenize_document_internal(
5151 /*===========================*/
5152 	MYSQL_FTPARSER_PARAM*	param,	/*!< in: parser parameter */
5153 	char*			doc,	/*!< in/out: document */
5154 	int			len)	/*!< in: document length */
5155 {
5156 	fts_string_t	str;
5157 	byte		buf[FTS_MAX_WORD_LEN + 1];
5158 	MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
5159 		{ FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 };
5160 
5161 	ut_ad(len >= 0);
5162 
5163 	str.f_str = buf;
5164 
5165 	for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
5166 		inc = innobase_mysql_fts_get_token(
5167 			const_cast<CHARSET_INFO*>(param->cs),
5168 			reinterpret_cast<byte*>(doc) + i,
5169 			reinterpret_cast<byte*>(doc) + len,
5170 			&str);
5171 
5172 		if (str.f_len > 0) {
5173 			bool_info.position =
5174 				static_cast<int>(i + inc - str.f_len);
5175 			ut_ad(bool_info.position >= 0);
5176 
5177 			/* Stop when add word fails */
5178 			if (param->mysql_add_word(
5179 				param,
5180 				reinterpret_cast<char*>(str.f_str),
5181 				static_cast<int>(str.f_len),
5182 				&bool_info)) {
5183 				break;
5184 			}
5185 		}
5186 	}
5187 
5188 	return(0);
5189 }
5190 
5191 /******************************************************************//**
5192 FTS plugin parser 'myql_add_word' callback function for document tokenize.
5193 Refer to 'st_mysql_ftparser_param' for more detail.
5194 @return always returns 0 */
5195 static
5196 int
fts_tokenize_add_word_for_parser(MYSQL_FTPARSER_PARAM * param,char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO * boolean_info)5197 fts_tokenize_add_word_for_parser(
5198 /*=============================*/
5199 	MYSQL_FTPARSER_PARAM*	param,		/* in: parser paramter */
5200 	char*			word,		/* in: token word */
5201 	int			word_len,	/* in: word len */
5202 	MYSQL_FTPARSER_BOOLEAN_INFO* boolean_info) /* in: word boolean info */
5203 {
5204 	fts_string_t	str;
5205 	fts_tokenize_param_t*	fts_param;
5206 	fts_doc_t*	result_doc;
5207 	ulint		position;
5208 
5209 	fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam);
5210 	result_doc = fts_param->result_doc;
5211 	ut_ad(result_doc != NULL);
5212 
5213 	str.f_str = reinterpret_cast<byte*>(word);
5214 	str.f_len = word_len;
5215 	str.f_n_char = fts_get_token_size(
5216 		const_cast<CHARSET_INFO*>(param->cs), word, word_len);
5217 
5218 	ut_ad(boolean_info->position >= 0);
5219 	position = boolean_info->position + fts_param->add_pos;
5220 
5221 	fts_add_token(result_doc, str, position);
5222 
5223 	return(0);
5224 }
5225 
5226 /******************************************************************//**
5227 Parse a document using an external / user supplied parser */
5228 static
5229 void
fts_tokenize_by_parser(fts_doc_t * doc,st_mysql_ftparser * parser,fts_tokenize_param_t * fts_param)5230 fts_tokenize_by_parser(
5231 /*===================*/
5232 	fts_doc_t*		doc,	/* in/out: document to tokenize */
5233 	st_mysql_ftparser*	parser, /* in: plugin fts parser */
5234 	fts_tokenize_param_t*	fts_param) /* in: fts tokenize param */
5235 {
5236 	MYSQL_FTPARSER_PARAM	param;
5237 
5238 	ut_a(parser);
5239 
5240 	/* Set paramters for param */
5241 	param.mysql_parse = fts_tokenize_document_internal;
5242 	param.mysql_add_word = fts_tokenize_add_word_for_parser;
5243 	param.mysql_ftparam = fts_param;
5244 	param.cs = doc->charset;
5245 	param.doc = reinterpret_cast<char*>(doc->text.f_str);
5246 	param.length = static_cast<int>(doc->text.f_len);
5247 	param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
5248 
5249 	PARSER_INIT(parser, &param);
5250 	parser->parse(&param);
5251 	PARSER_DEINIT(parser, &param);
5252 }
5253 
5254 /******************************************************************//**
5255 Tokenize a document. */
5256 void
fts_tokenize_document(fts_doc_t * doc,fts_doc_t * result,st_mysql_ftparser * parser)5257 fts_tokenize_document(
5258 /*==================*/
5259 	fts_doc_t*	doc,		/* in/out: document to
5260 					tokenize */
5261 	fts_doc_t*	result,		/* out: if provided, save
5262 					the result token here */
5263 	st_mysql_ftparser*	parser) /* in: plugin fts parser */
5264 {
5265 	ut_a(!doc->tokens);
5266 	ut_a(doc->charset);
5267 
5268 	doc->tokens = rbt_create_arg_cmp(
5269 		sizeof(fts_token_t), innobase_fts_text_cmp, doc->charset);
5270 
5271 	if (parser != NULL) {
5272 		fts_tokenize_param_t	fts_param;
5273 
5274 		fts_param.result_doc = (result != NULL) ? result : doc;
5275 		fts_param.add_pos = 0;
5276 
5277 		fts_tokenize_by_parser(doc, parser, &fts_param);
5278 	} else {
5279 		ulint		inc;
5280 
5281 		for (ulint i = 0; i < doc->text.f_len; i += inc) {
5282 			inc = fts_process_token(doc, result, i, 0);
5283 			ut_a(inc > 0);
5284 		}
5285 	}
5286 }
5287 
5288 /******************************************************************//**
5289 Continue to tokenize a document. */
5290 void
fts_tokenize_document_next(fts_doc_t * doc,ulint add_pos,fts_doc_t * result,st_mysql_ftparser * parser)5291 fts_tokenize_document_next(
5292 /*=======================*/
5293 	fts_doc_t*	doc,		/*!< in/out: document to
5294 					tokenize */
5295 	ulint		add_pos,	/*!< in: add this position to all
5296 					tokens from this tokenization */
5297 	fts_doc_t*	result,		/*!< out: if provided, save
5298 					the result token here */
5299 	st_mysql_ftparser*	parser) /* in: plugin fts parser */
5300 {
5301 	ut_a(doc->tokens);
5302 
5303 	if (parser) {
5304 		fts_tokenize_param_t	fts_param;
5305 
5306 		fts_param.result_doc = (result != NULL) ? result : doc;
5307 		fts_param.add_pos = add_pos;
5308 
5309 		fts_tokenize_by_parser(doc, parser, &fts_param);
5310 	} else {
5311 		ulint		inc;
5312 
5313 		for (ulint i = 0; i < doc->text.f_len; i += inc) {
5314 			inc = fts_process_token(doc, result, i, add_pos);
5315 			ut_a(inc > 0);
5316 		}
5317 	}
5318 }
5319 
5320 /********************************************************************
5321 Create the vector of fts_get_doc_t instances. */
5322 ib_vector_t*
fts_get_docs_create(fts_cache_t * cache)5323 fts_get_docs_create(
5324 /*================*/
5325 						/* out: vector of
5326 						fts_get_doc_t instances */
5327 	fts_cache_t*	cache)			/*!< in: fts cache */
5328 {
5329 	ib_vector_t*	get_docs;
5330 
5331 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
5332 
5333 	/* We need one instance of fts_get_doc_t per index. */
5334 	get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
5335 
5336 	/* Create the get_doc instance, we need one of these
5337 	per FTS index. */
5338 	for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
5339 
5340 		dict_index_t**	index;
5341 		fts_get_doc_t*	get_doc;
5342 
5343 		index = static_cast<dict_index_t**>(
5344 			ib_vector_get(cache->indexes, i));
5345 
5346 		get_doc = static_cast<fts_get_doc_t*>(
5347 			ib_vector_push(get_docs, NULL));
5348 
5349 		memset(get_doc, 0x0, sizeof(*get_doc));
5350 
5351 		get_doc->index_cache = fts_get_index_cache(cache, *index);
5352 		get_doc->cache = cache;
5353 
5354 		/* Must find the index cache. */
5355 		ut_a(get_doc->index_cache != NULL);
5356 	}
5357 
5358 	return(get_docs);
5359 }
5360 
5361 /********************************************************************
5362 Release any resources held by the fts_get_doc_t instances. */
5363 static
5364 void
fts_get_docs_clear(ib_vector_t * get_docs)5365 fts_get_docs_clear(
5366 /*===============*/
5367 	ib_vector_t*	get_docs)		/*!< in: Doc retrieval vector */
5368 {
5369 	ulint		i;
5370 
5371 	/* Release the get doc graphs if any. */
5372 	for (i = 0; i < ib_vector_size(get_docs); ++i) {
5373 
5374 		fts_get_doc_t*	get_doc = static_cast<fts_get_doc_t*>(
5375 			ib_vector_get(get_docs, i));
5376 
5377 		if (get_doc->get_document_graph != NULL) {
5378 
5379 			ut_a(get_doc->index_cache);
5380 
5381 			fts_que_graph_free(get_doc->get_document_graph);
5382 			get_doc->get_document_graph = NULL;
5383 		}
5384 	}
5385 }
5386 
5387 /*********************************************************************//**
5388 Get the initial Doc ID by consulting the CONFIG table
5389 @return initial Doc ID */
5390 doc_id_t
fts_init_doc_id(const dict_table_t * table)5391 fts_init_doc_id(
5392 /*============*/
5393 	const dict_table_t*	table)		/*!< in: table */
5394 {
5395 	doc_id_t	max_doc_id = 0;
5396 
5397 	rw_lock_x_lock(&table->fts->cache->lock);
5398 
5399 	/* Return if the table is already initialized for DOC ID */
5400 	if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
5401 		rw_lock_x_unlock(&table->fts->cache->lock);
5402 		return(0);
5403 	}
5404 
5405 	DEBUG_SYNC_C("fts_initialize_doc_id");
5406 
5407 	/* Then compare this value with the ID value stored in the CONFIG
5408 	table. The larger one will be our new initial Doc ID */
5409 	fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
5410 
5411 	/* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
5412 	creating index (and add doc id column. No need to recovery
5413 	documents */
5414 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
5415 		fts_init_index((dict_table_t*) table, TRUE);
5416 	}
5417 
5418 	table->fts->fts_status |= ADDED_TABLE_SYNCED;
5419 
5420 	table->fts->cache->first_doc_id = max_doc_id;
5421 
5422 	rw_lock_x_unlock(&table->fts->cache->lock);
5423 
5424 	ut_ad(max_doc_id > 0);
5425 
5426 	return(max_doc_id);
5427 }
5428 
5429 #ifdef FTS_MULT_INDEX
5430 /*********************************************************************//**
5431 Check if the index is in the affected set.
5432 @return TRUE if index is updated */
5433 static
5434 ibool
fts_is_index_updated(const ib_vector_t * fts_indexes,const fts_get_doc_t * get_doc)5435 fts_is_index_updated(
5436 /*=================*/
5437 	const ib_vector_t*	fts_indexes,	/*!< in: affected FTS indexes */
5438 	const fts_get_doc_t*	get_doc)	/*!< in: info for reading
5439 						document */
5440 {
5441 	ulint		i;
5442 	dict_index_t*	index = get_doc->index_cache->index;
5443 
5444 	for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
5445 		const dict_index_t*	updated_fts_index;
5446 
5447 		updated_fts_index = static_cast<const dict_index_t*>(
5448 			ib_vector_getp_const(fts_indexes, i));
5449 
5450 		ut_a(updated_fts_index != NULL);
5451 
5452 		if (updated_fts_index == index) {
5453 			return(TRUE);
5454 		}
5455 	}
5456 
5457 	return(FALSE);
5458 }
5459 #endif
5460 
5461 /*********************************************************************//**
5462 Fetch COUNT(*) from specified table.
5463 @return the number of rows in the table */
5464 ulint
fts_get_rows_count(fts_table_t * fts_table)5465 fts_get_rows_count(
5466 /*===============*/
5467 	fts_table_t*	fts_table)	/*!< in: fts table to read */
5468 {
5469 	trx_t*		trx;
5470 	pars_info_t*	info;
5471 	que_t*		graph;
5472 	dberr_t		error;
5473 	ulint		count = 0;
5474 	char		table_name[MAX_FULL_NAME_LEN];
5475 
5476 	trx = trx_allocate_for_background();
5477 
5478 	trx->op_info = "fetching FT table rows count";
5479 
5480 	info = pars_info_create();
5481 
5482 	pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
5483 
5484 	fts_get_table_name(fts_table, table_name);
5485 	pars_info_bind_id(info, true, "table_name", table_name);
5486 
5487 	graph = fts_parse_sql(
5488 		fts_table,
5489 		info,
5490 		"DECLARE FUNCTION my_func;\n"
5491 		"DECLARE CURSOR c IS"
5492 		" SELECT COUNT(*)"
5493 		" FROM $table_name;\n"
5494 		"BEGIN\n"
5495 		"\n"
5496 		"OPEN c;\n"
5497 		"WHILE 1 = 1 LOOP\n"
5498 		"  FETCH c INTO my_func();\n"
5499 		"  IF c % NOTFOUND THEN\n"
5500 		"    EXIT;\n"
5501 		"  END IF;\n"
5502 		"END LOOP;\n"
5503 		"CLOSE c;");
5504 
5505 	for (;;) {
5506 		error = fts_eval_sql(trx, graph);
5507 
5508 		if (error == DB_SUCCESS) {
5509 			fts_sql_commit(trx);
5510 
5511 			break;				/* Exit the loop. */
5512 		} else {
5513 			fts_sql_rollback(trx);
5514 
5515 			if (error == DB_LOCK_WAIT_TIMEOUT) {
5516 				ib::warn() << "lock wait timeout reading"
5517 					" FTS table. Retrying!";
5518 
5519 				trx->error_state = DB_SUCCESS;
5520 			} else {
5521 				ib::error() << "(" << ut_strerr(error)
5522 					<< ") while reading FTS table.";
5523 
5524 				break;			/* Exit the loop. */
5525 			}
5526 		}
5527 	}
5528 
5529 	fts_que_graph_free(graph);
5530 
5531 	trx_free_for_background(trx);
5532 
5533 	return(count);
5534 }
5535 
5536 #ifdef FTS_CACHE_SIZE_DEBUG
5537 /*********************************************************************//**
5538 Read the max cache size parameter from the config table. */
5539 static
5540 void
fts_update_max_cache_size(fts_sync_t * sync)5541 fts_update_max_cache_size(
5542 /*======================*/
5543 	fts_sync_t*	sync)			/*!< in: sync state */
5544 {
5545 	trx_t*		trx;
5546 	fts_table_t	fts_table;
5547 
5548 	trx = trx_allocate_for_background();
5549 
5550 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
5551 
5552 	/* The size returned is in bytes. */
5553 	sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
5554 
5555 	fts_sql_commit(trx);
5556 
5557 	trx_free_for_background(trx);
5558 }
5559 #endif /* FTS_CACHE_SIZE_DEBUG */
5560 
5561 /*********************************************************************//**
5562 Free the modified rows of a table. */
5563 UNIV_INLINE
5564 void
fts_trx_table_rows_free(ib_rbt_t * rows)5565 fts_trx_table_rows_free(
5566 /*====================*/
5567 	ib_rbt_t*	rows)			/*!< in: rbt of rows to free */
5568 {
5569 	const ib_rbt_node_t*	node;
5570 
5571 	for (node = rbt_first(rows); node; node = rbt_first(rows)) {
5572 		fts_trx_row_t*	row;
5573 
5574 		row = rbt_value(fts_trx_row_t, node);
5575 
5576 		if (row->fts_indexes != NULL) {
5577 			/* This vector shouldn't be using the
5578 			heap allocator.  */
5579 			ut_a(row->fts_indexes->allocator->arg == NULL);
5580 
5581 			ib_vector_free(row->fts_indexes);
5582 			row->fts_indexes = NULL;
5583 		}
5584 
5585 		ut_free(rbt_remove_node(rows, node));
5586 	}
5587 
5588 	ut_a(rbt_empty(rows));
5589 	rbt_free(rows);
5590 }
5591 
5592 /*********************************************************************//**
5593 Free an FTS savepoint instance. */
5594 UNIV_INLINE
5595 void
fts_savepoint_free(fts_savepoint_t * savepoint)5596 fts_savepoint_free(
5597 /*===============*/
5598 	fts_savepoint_t*	savepoint)	/*!< in: savepoint instance */
5599 {
5600 	const ib_rbt_node_t*	node;
5601 	ib_rbt_t*		tables = savepoint->tables;
5602 
5603 	/* Nothing to free! */
5604 	if (tables == NULL) {
5605 		return;
5606 	}
5607 
5608 	for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5609 		fts_trx_table_t*	ftt;
5610 		fts_trx_table_t**	fttp;
5611 
5612 		fttp = rbt_value(fts_trx_table_t*, node);
5613 		ftt = *fttp;
5614 
5615 		/* This can be NULL if a savepoint was released. */
5616 		if (ftt->rows != NULL) {
5617 			fts_trx_table_rows_free(ftt->rows);
5618 			ftt->rows = NULL;
5619 		}
5620 
5621 		/* This can be NULL if a savepoint was released. */
5622 		if (ftt->added_doc_ids != NULL) {
5623 			fts_doc_ids_free(ftt->added_doc_ids);
5624 			ftt->added_doc_ids = NULL;
5625 		}
5626 
5627 		/* The default savepoint name must be NULL. */
5628 		if (ftt->docs_added_graph) {
5629 			fts_que_graph_free(ftt->docs_added_graph);
5630 		}
5631 
5632 		/* NOTE: We are responsible for free'ing the node */
5633 		ut_free(rbt_remove_node(tables, node));
5634 	}
5635 
5636 	ut_a(rbt_empty(tables));
5637 	rbt_free(tables);
5638 	savepoint->tables = NULL;
5639 }
5640 
5641 /*********************************************************************//**
5642 Free an FTS trx. */
5643 void
fts_trx_free(fts_trx_t * fts_trx)5644 fts_trx_free(
5645 /*=========*/
5646 	fts_trx_t*	fts_trx)		/* in, own: FTS trx */
5647 {
5648 	ulint		i;
5649 
5650 	for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5651 		fts_savepoint_t*	savepoint;
5652 
5653 		savepoint = static_cast<fts_savepoint_t*>(
5654 			ib_vector_get(fts_trx->savepoints, i));
5655 
5656 		/* The default savepoint name must be NULL. */
5657 		if (i == 0) {
5658 			ut_a(savepoint->name == NULL);
5659 		}
5660 
5661 		fts_savepoint_free(savepoint);
5662 	}
5663 
5664 	for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5665 		fts_savepoint_t*	savepoint;
5666 
5667 		savepoint = static_cast<fts_savepoint_t*>(
5668 			ib_vector_get(fts_trx->last_stmt, i));
5669 
5670 		/* The default savepoint name must be NULL. */
5671 		if (i == 0) {
5672 			ut_a(savepoint->name == NULL);
5673 		}
5674 
5675 		fts_savepoint_free(savepoint);
5676 	}
5677 
5678 	if (fts_trx->heap) {
5679 		mem_heap_free(fts_trx->heap);
5680 	}
5681 }
5682 
5683 /*********************************************************************//**
5684 Extract the doc id from the FTS hidden column.
5685 @return doc id that was extracted from rec */
5686 doc_id_t
fts_get_doc_id_from_row(dict_table_t * table,dtuple_t * row)5687 fts_get_doc_id_from_row(
5688 /*====================*/
5689 	dict_table_t*	table,			/*!< in: table */
5690 	dtuple_t*	row)			/*!< in: row whose FTS doc id we
5691 						want to extract.*/
5692 {
5693 	dfield_t*	field;
5694 	doc_id_t	doc_id = 0;
5695 
5696 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5697 
5698 	field = dtuple_get_nth_field(row, table->fts->doc_col);
5699 
5700 	ut_a(dfield_get_len(field) == sizeof(doc_id));
5701 	ut_a(dfield_get_type(field)->mtype == DATA_INT);
5702 
5703 	doc_id = fts_read_doc_id(
5704 		static_cast<const byte*>(dfield_get_data(field)));
5705 
5706 	return(doc_id);
5707 }
5708 
5709 /** Extract the doc id from the record that belongs to index.
5710 @param[in]	table	table
5711 @param[in]	rec	record contains FTS_DOC_ID
5712 @param[in]	index	index of rec
5713 @param[in]	heap	heap memory
5714 @return doc id that was extracted from rec */
5715 doc_id_t
fts_get_doc_id_from_rec(dict_table_t * table,const rec_t * rec,const dict_index_t * index,mem_heap_t * heap)5716 fts_get_doc_id_from_rec(
5717 	dict_table_t*		table,
5718 	const rec_t*		rec,
5719 	const dict_index_t*	index,
5720 	mem_heap_t*		heap)
5721 {
5722 	ulint		len;
5723 	const byte*	data;
5724 	ulint		col_no;
5725 	doc_id_t	doc_id = 0;
5726 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
5727 	ulint*		offsets = offsets_;
5728 	mem_heap_t*	my_heap = heap;
5729 
5730 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5731 
5732 	rec_offs_init(offsets_);
5733 
5734 	offsets = rec_get_offsets(
5735 		rec, index, offsets, ULINT_UNDEFINED, &my_heap);
5736 
5737 	col_no = dict_col_get_index_pos(
5738 		&table->cols[table->fts->doc_col], index);
5739 
5740 	ut_ad(col_no != ULINT_UNDEFINED);
5741 
5742 	data = rec_get_nth_field(rec, offsets, col_no, &len);
5743 
5744 	ut_a(len == 8);
5745 	ut_ad(8 == sizeof(doc_id));
5746 	doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
5747 
5748 	if (my_heap && !heap) {
5749 		mem_heap_free(my_heap);
5750 	}
5751 
5752 	return(doc_id);
5753 }
5754 
5755 /*********************************************************************//**
5756 Search the index specific cache for a particular FTS index.
5757 @return the index specific cache else NULL */
5758 fts_index_cache_t*
fts_find_index_cache(const fts_cache_t * cache,const dict_index_t * index)5759 fts_find_index_cache(
5760 /*=================*/
5761 	const fts_cache_t*	cache,		/*!< in: cache to search */
5762 	const dict_index_t*	index)		/*!< in: index to search for */
5763 {
5764 	/* We cast away the const because our internal function, takes
5765 	non-const cache arg and returns a non-const pointer. */
5766 	return(static_cast<fts_index_cache_t*>(
5767 		fts_get_index_cache((fts_cache_t*) cache, index)));
5768 }
5769 
5770 /*********************************************************************//**
5771 Search cache for word.
5772 @return the word node vector if found else NULL */
5773 const ib_vector_t*
fts_cache_find_word(const fts_index_cache_t * index_cache,const fts_string_t * text)5774 fts_cache_find_word(
5775 /*================*/
5776 	const fts_index_cache_t*index_cache,	/*!< in: cache to search */
5777 	const fts_string_t*	text)		/*!< in: word to search for */
5778 {
5779 	ib_rbt_bound_t		parent;
5780 	const ib_vector_t*	nodes = NULL;
5781 #ifdef UNIV_DEBUG
5782 	dict_table_t*		table = index_cache->index->table;
5783 	fts_cache_t*		cache = table->fts->cache;
5784 
5785 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
5786 #endif /* UNIV_DEBUG */
5787 
5788 	/* Lookup the word in the rb tree */
5789 	if (rbt_search(index_cache->words, &parent, text) == 0) {
5790 		const fts_tokenizer_word_t*	word;
5791 
5792 		word = rbt_value(fts_tokenizer_word_t, parent.last);
5793 
5794 		nodes = word->nodes;
5795 	}
5796 
5797 	return(nodes);
5798 }
5799 
5800 /*********************************************************************//**
5801 Check cache for deleted doc id.
5802 @return TRUE if deleted */
5803 ibool
fts_cache_is_deleted_doc_id(const fts_cache_t * cache,doc_id_t doc_id)5804 fts_cache_is_deleted_doc_id(
5805 /*========================*/
5806 	const fts_cache_t*	cache,		/*!< in: cache ito search */
5807 	doc_id_t		doc_id)		/*!< in: doc id to search for */
5808 {
5809 	ut_ad(mutex_own(&cache->deleted_lock));
5810 
5811 	for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5812 		const fts_update_t*	update;
5813 
5814 		update = static_cast<const fts_update_t*>(
5815 			ib_vector_get_const(cache->deleted_doc_ids, i));
5816 
5817 		if (doc_id == update->doc_id) {
5818 
5819 			return(TRUE);
5820 		}
5821 	}
5822 
5823 	return(FALSE);
5824 }
5825 
5826 /*********************************************************************//**
5827 Append deleted doc ids to vector. */
5828 void
fts_cache_append_deleted_doc_ids(const fts_cache_t * cache,ib_vector_t * vector)5829 fts_cache_append_deleted_doc_ids(
5830 /*=============================*/
5831 	const fts_cache_t*	cache,		/*!< in: cache to use */
5832 	ib_vector_t*		vector)		/*!< in: append to this vector */
5833 {
5834 	mutex_enter(const_cast<ib_mutex_t*>(&cache->deleted_lock));
5835 
5836 	if (cache->deleted_doc_ids == NULL) {
5837 		mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5838 		return;
5839 	}
5840 
5841 
5842 	for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5843 		fts_update_t*	update;
5844 
5845 		update = static_cast<fts_update_t*>(
5846 			ib_vector_get(cache->deleted_doc_ids, i));
5847 
5848 		ib_vector_push(vector, &update->doc_id);
5849 	}
5850 
5851 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5852 }
5853 
5854 /*********************************************************************//**
5855 Wait for the background thread to start. We poll to detect change
5856 of state, which is acceptable, since the wait should happen only
5857 once during startup.
5858 @return true if the thread started else FALSE (i.e timed out) */
5859 ibool
fts_wait_for_background_thread_to_start(dict_table_t * table,ulint max_wait)5860 fts_wait_for_background_thread_to_start(
5861 /*====================================*/
5862 	dict_table_t*		table,		/*!< in: table to which the thread
5863 						is attached */
5864 	ulint			max_wait)	/*!< in: time in microseconds, if
5865 						set to 0 then it disables
5866 						timeout checking */
5867 {
5868 	ulint			count = 0;
5869 	ibool			done = FALSE;
5870 
5871 	ut_a(max_wait == 0 || max_wait >= FTS_MAX_BACKGROUND_THREAD_WAIT);
5872 
5873 	for (;;) {
5874 		fts_t*		fts = table->fts;
5875 
5876 		mutex_enter(&fts->bg_threads_mutex);
5877 
5878 		if (fts->fts_status & BG_THREAD_READY) {
5879 
5880 			done = TRUE;
5881 		}
5882 
5883 		mutex_exit(&fts->bg_threads_mutex);
5884 
5885 		if (!done) {
5886 			os_thread_sleep(FTS_MAX_BACKGROUND_THREAD_WAIT);
5887 
5888 			if (max_wait > 0) {
5889 
5890 				max_wait -= FTS_MAX_BACKGROUND_THREAD_WAIT;
5891 
5892 				/* We ignore the residual value. */
5893 				if (max_wait < FTS_MAX_BACKGROUND_THREAD_WAIT) {
5894 					break;
5895 				}
5896 			}
5897 
5898 			++count;
5899 		} else {
5900 			break;
5901 		}
5902 
5903 		if (count >= FTS_BACKGROUND_THREAD_WAIT_COUNT) {
5904 			ib::error() << "The background thread for the FTS"
5905 				" table " << table->name
5906 				<< " refuses to start";
5907 
5908 			count = 0;
5909 		}
5910 	}
5911 
5912 	return(done);
5913 }
5914 
5915 /*********************************************************************//**
5916 Add the FTS document id hidden column. */
5917 void
fts_add_doc_id_column(dict_table_t * table,mem_heap_t * heap)5918 fts_add_doc_id_column(
5919 /*==================*/
5920 	dict_table_t*	table,	/*!< in/out: Table with FTS index */
5921 	mem_heap_t*	heap)	/*!< in: temporary memory heap, or NULL */
5922 {
5923 	dict_mem_table_add_col(
5924 		table, heap,
5925 		FTS_DOC_ID_COL_NAME,
5926 		DATA_INT,
5927 		dtype_form_prtype(
5928 			DATA_NOT_NULL | DATA_UNSIGNED
5929 			| DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
5930 		sizeof(doc_id_t));
5931 	DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5932 }
5933 
5934 /** Add new fts doc id to the update vector.
5935 @param[in]	table		the table that contains the FTS index.
5936 @param[in,out]	ufield		the fts doc id field in the update vector.
5937 				No new memory is allocated for this in this
5938 				function.
5939 @param[in,out]	next_doc_id	the fts doc id that has been added to the
5940 				update vector.  If 0, a new fts doc id is
5941 				automatically generated.  The memory provided
5942 				for this argument will be used by the update
5943 				vector. Ensure that the life time of this
5944 				memory matches that of the update vector.
5945 @return the fts doc id used in the update vector */
5946 doc_id_t
fts_update_doc_id(dict_table_t * table,upd_field_t * ufield,doc_id_t * next_doc_id)5947 fts_update_doc_id(
5948 	dict_table_t*	table,
5949 	upd_field_t*	ufield,
5950 	doc_id_t*	next_doc_id)
5951 {
5952 	doc_id_t	doc_id;
5953 	dberr_t		error = DB_SUCCESS;
5954 
5955 	if (*next_doc_id) {
5956 		doc_id = *next_doc_id;
5957 	} else {
5958 		/* Get the new document id that will be added. */
5959 		error = fts_get_next_doc_id(table, &doc_id);
5960 	}
5961 
5962 	if (error == DB_SUCCESS) {
5963 		dict_index_t*	clust_index;
5964 		dict_col_t*	col = dict_table_get_nth_col(
5965 			table, table->fts->doc_col);
5966 
5967 		ufield->exp = NULL;
5968 
5969 		ufield->new_val.len = sizeof(doc_id);
5970 
5971 		clust_index = dict_table_get_first_index(table);
5972 
5973 		ufield->field_no = dict_col_get_clust_pos(col, clust_index);
5974 		dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
5975 
5976 		/* It is possible we update record that has
5977 		not yet be sync-ed from last crash. */
5978 
5979 		/* Convert to storage byte order. */
5980 		ut_a(doc_id != FTS_NULL_DOC_ID);
5981 		fts_write_doc_id((byte*) next_doc_id, doc_id);
5982 
5983 		ufield->new_val.data = next_doc_id;
5984                 ufield->new_val.ext = 0;
5985 	}
5986 
5987 	return(doc_id);
5988 }
5989 
5990 /*********************************************************************//**
5991 Check if the table has an FTS index. This is the non-inline version
5992 of dict_table_has_fts_index().
5993 @return TRUE if table has an FTS index */
5994 ibool
fts_dict_table_has_fts_index(dict_table_t * table)5995 fts_dict_table_has_fts_index(
5996 /*=========================*/
5997 	dict_table_t*	table)		/*!< in: table */
5998 {
5999 	return(dict_table_has_fts_index(table));
6000 }
6001 
6002 /** fts_t constructor.
6003 @param[in]	table	table with FTS indexes
6004 @param[in,out]	heap	memory heap where 'this' is stored */
fts_t(const dict_table_t * table,mem_heap_t * heap)6005 fts_t::fts_t(
6006 	const dict_table_t*	table,
6007 	mem_heap_t*		heap)
6008 	:
6009 	bg_threads(0),
6010 	fts_status(0),
6011 	add_wq(NULL),
6012 	cache(NULL),
6013 	doc_col(ULINT_UNDEFINED),
6014 	fts_heap(heap)
6015 {
6016 	ut_a(table->fts == NULL);
6017 
6018 	mutex_create(LATCH_ID_FTS_BG_THREADS, &bg_threads_mutex);
6019 
6020 	ib_alloc_t*	heap_alloc = ib_heap_allocator_create(fts_heap);
6021 
6022 	indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
6023 
6024 	dict_table_get_all_fts_indexes(table, indexes);
6025 }
6026 
6027 /** fts_t destructor. */
~fts_t()6028 fts_t::~fts_t()
6029 {
6030 	mutex_free(&bg_threads_mutex);
6031 
6032 	ut_ad(add_wq == NULL);
6033 
6034 	if (cache != NULL) {
6035 		fts_cache_clear(cache);
6036 		fts_cache_destroy(cache);
6037 		cache = NULL;
6038 	}
6039 
6040 	/* There is no need to call ib_vector_free() on this->indexes
6041 	because it is stored in this->fts_heap. */
6042 }
6043 
6044 /*********************************************************************//**
6045 Create an instance of fts_t.
6046 @return instance of fts_t */
6047 fts_t*
fts_create(dict_table_t * table)6048 fts_create(
6049 /*=======*/
6050 	dict_table_t*	table)		/*!< in/out: table with FTS indexes */
6051 {
6052 	fts_t*		fts;
6053 	mem_heap_t*	heap;
6054 
6055 	heap = mem_heap_create(512);
6056 
6057 	fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
6058 
6059 	new(fts) fts_t(table, heap);
6060 
6061 	return(fts);
6062 }
6063 
6064 /*********************************************************************//**
6065 Free the FTS resources. */
6066 void
fts_free(dict_table_t * table)6067 fts_free(
6068 /*=====*/
6069 	dict_table_t*	table)	/*!< in/out: table with FTS indexes */
6070 {
6071 	fts_t*	fts = table->fts;
6072 
6073 	fts->~fts_t();
6074 
6075 	mem_heap_free(fts->fts_heap);
6076 
6077 	table->fts = NULL;
6078 }
6079 
6080 /*********************************************************************//**
6081 Signal FTS threads to initiate shutdown. */
6082 void
fts_start_shutdown(dict_table_t * table,fts_t * fts)6083 fts_start_shutdown(
6084 /*===============*/
6085 	dict_table_t*	table,		/*!< in: table with FTS indexes */
6086 	fts_t*		fts)		/*!< in: fts instance that needs
6087 					to be informed about shutdown */
6088 {
6089 	mutex_enter(&fts->bg_threads_mutex);
6090 
6091 	fts->fts_status |= BG_THREAD_STOP;
6092 
6093 	mutex_exit(&fts->bg_threads_mutex);
6094 
6095 }
6096 
6097 /*********************************************************************//**
6098 Wait for FTS threads to shutdown. */
6099 void
fts_shutdown(dict_table_t * table,fts_t * fts)6100 fts_shutdown(
6101 /*=========*/
6102 	dict_table_t*	table,		/*!< in: table with FTS indexes */
6103 	fts_t*		fts)		/*!< in: fts instance to shutdown */
6104 {
6105 	mutex_enter(&fts->bg_threads_mutex);
6106 
6107 	ut_a(fts->fts_status & BG_THREAD_STOP);
6108 
6109 	dict_table_wait_for_bg_threads_to_exit(table, 20000);
6110 
6111 	mutex_exit(&fts->bg_threads_mutex);
6112 }
6113 
6114 /*********************************************************************//**
6115 Take a FTS savepoint. */
6116 UNIV_INLINE
6117 void
fts_savepoint_copy(const fts_savepoint_t * src,fts_savepoint_t * dst)6118 fts_savepoint_copy(
6119 /*===============*/
6120 	const fts_savepoint_t*	src,	/*!< in: source savepoint */
6121 	fts_savepoint_t*	dst)	/*!< out: destination savepoint */
6122 {
6123 	const ib_rbt_node_t*	node;
6124 	const ib_rbt_t*		tables;
6125 
6126 	tables = src->tables;
6127 
6128 	for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
6129 
6130 		fts_trx_table_t*	ftt_dst;
6131 		const fts_trx_table_t**	ftt_src;
6132 
6133 		ftt_src = rbt_value(const fts_trx_table_t*, node);
6134 
6135 		ftt_dst = fts_trx_table_clone(*ftt_src);
6136 
6137 		rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
6138 	}
6139 }
6140 
6141 /*********************************************************************//**
6142 Take a FTS savepoint. */
6143 void
fts_savepoint_take(trx_t * trx,fts_trx_t * fts_trx,const char * name)6144 fts_savepoint_take(
6145 /*===============*/
6146 	trx_t*		trx,		/*!< in: transaction */
6147 	fts_trx_t*	fts_trx,	/*!< in: fts transaction */
6148 	const char*	name)		/*!< in: savepoint name */
6149 {
6150 	mem_heap_t*		heap;
6151 	fts_savepoint_t*	savepoint;
6152 	fts_savepoint_t*	last_savepoint;
6153 
6154 	ut_a(name != NULL);
6155 
6156 	heap = fts_trx->heap;
6157 
6158 	/* The implied savepoint must exist. */
6159 	ut_a(ib_vector_size(fts_trx->savepoints) > 0);
6160 
6161 	last_savepoint = static_cast<fts_savepoint_t*>(
6162 		ib_vector_last(fts_trx->savepoints));
6163 	savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
6164 
6165 	if (last_savepoint->tables != NULL) {
6166 		fts_savepoint_copy(last_savepoint, savepoint);
6167 	}
6168 }
6169 
6170 /*********************************************************************//**
6171 Lookup a savepoint instance by name.
6172 @return ULINT_UNDEFINED if not found */
6173 UNIV_INLINE
6174 ulint
fts_savepoint_lookup(ib_vector_t * savepoints,const char * name)6175 fts_savepoint_lookup(
6176 /*==================*/
6177 	ib_vector_t*	savepoints,	/*!< in: savepoints */
6178 	const char*	name)		/*!< in: savepoint name */
6179 {
6180 	ulint			i;
6181 
6182 	ut_a(ib_vector_size(savepoints) > 0);
6183 
6184 	for (i = 1; i < ib_vector_size(savepoints); ++i) {
6185 		fts_savepoint_t*	savepoint;
6186 
6187 		savepoint = static_cast<fts_savepoint_t*>(
6188 			ib_vector_get(savepoints, i));
6189 
6190 		if (strcmp(name, savepoint->name) == 0) {
6191 			return(i);
6192 		}
6193 	}
6194 
6195 	return(ULINT_UNDEFINED);
6196 }
6197 
6198 /*********************************************************************//**
6199 Release the savepoint data identified by  name. All savepoints created
6200 after the named savepoint are kept.
6201 @return DB_SUCCESS or error code */
6202 void
fts_savepoint_release(trx_t * trx,const char * name)6203 fts_savepoint_release(
6204 /*==================*/
6205 	trx_t*		trx,		/*!< in: transaction */
6206 	const char*	name)		/*!< in: savepoint name */
6207 {
6208 	ut_a(name != NULL);
6209 
6210 	ib_vector_t*	savepoints = trx->fts_trx->savepoints;
6211 
6212 	ut_a(ib_vector_size(savepoints) > 0);
6213 
6214 	ulint   i = fts_savepoint_lookup(savepoints, name);
6215 	if (i != ULINT_UNDEFINED) {
6216 		ut_a(i >= 1);
6217 
6218 		fts_savepoint_t*        savepoint;
6219 		savepoint = static_cast<fts_savepoint_t*>(
6220 			ib_vector_get(savepoints, i));
6221 
6222 		if (i == ib_vector_size(savepoints) - 1) {
6223 			/* If the savepoint is the last, we save its
6224 			tables to the  previous savepoint. */
6225 			fts_savepoint_t*	prev_savepoint;
6226 			prev_savepoint = static_cast<fts_savepoint_t*>(
6227 				ib_vector_get(savepoints, i - 1));
6228 
6229 			ib_rbt_t*	tables = savepoint->tables;
6230 			savepoint->tables = prev_savepoint->tables;
6231 			prev_savepoint->tables = tables;
6232 		}
6233 
6234 		fts_savepoint_free(savepoint);
6235 		ib_vector_remove(savepoints, *(void**)savepoint);
6236 
6237 		/* Make sure we don't delete the implied savepoint. */
6238 		ut_a(ib_vector_size(savepoints) > 0);
6239 	}
6240 }
6241 
6242 /**********************************************************************//**
6243 Refresh last statement savepoint. */
6244 void
fts_savepoint_laststmt_refresh(trx_t * trx)6245 fts_savepoint_laststmt_refresh(
6246 /*===========================*/
6247 	trx_t*			trx)	/*!< in: transaction */
6248 {
6249 
6250 	fts_trx_t*              fts_trx;
6251 	fts_savepoint_t*        savepoint;
6252 
6253 	fts_trx = trx->fts_trx;
6254 
6255 	savepoint = static_cast<fts_savepoint_t*>(
6256 		ib_vector_pop(fts_trx->last_stmt));
6257 	fts_savepoint_free(savepoint);
6258 
6259 	ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
6260 	savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
6261 }
6262 
6263 /********************************************************************
6264 Undo the Doc ID add/delete operations in last stmt */
6265 static
6266 void
fts_undo_last_stmt(fts_trx_table_t * s_ftt,fts_trx_table_t * l_ftt)6267 fts_undo_last_stmt(
6268 /*===============*/
6269 	fts_trx_table_t*	s_ftt,	/*!< in: Transaction FTS table */
6270 	fts_trx_table_t*	l_ftt)	/*!< in: last stmt FTS table */
6271 {
6272 	ib_rbt_t*		s_rows;
6273 	ib_rbt_t*		l_rows;
6274 	const ib_rbt_node_t*	node;
6275 
6276 	l_rows = l_ftt->rows;
6277 	s_rows = s_ftt->rows;
6278 
6279 	for (node = rbt_first(l_rows);
6280 	     node;
6281 	     node = rbt_next(l_rows, node)) {
6282 		fts_trx_row_t*	l_row = rbt_value(fts_trx_row_t, node);
6283 		ib_rbt_bound_t	parent;
6284 
6285 		rbt_search(s_rows, &parent, &(l_row->doc_id));
6286 
6287 		if (parent.result == 0) {
6288 			fts_trx_row_t*	s_row = rbt_value(
6289 				fts_trx_row_t, parent.last);
6290 
6291 			switch (l_row->state) {
6292 			case FTS_INSERT:
6293 				ut_free(rbt_remove_node(s_rows, parent.last));
6294 				break;
6295 
6296 			case FTS_DELETE:
6297 				if (s_row->state == FTS_NOTHING) {
6298 					s_row->state = FTS_INSERT;
6299 				} else if (s_row->state == FTS_DELETE) {
6300 					ut_free(rbt_remove_node(
6301 						s_rows, parent.last));
6302 				}
6303 				break;
6304 
6305 			/* FIXME: Check if FTS_MODIFY need to be addressed */
6306 			case FTS_MODIFY:
6307 			case FTS_NOTHING:
6308 				break;
6309 			default:
6310 				ut_error;
6311 			}
6312 		}
6313 	}
6314 }
6315 
6316 /**********************************************************************//**
6317 Rollback to savepoint indentified by name.
6318 @return DB_SUCCESS or error code */
6319 void
fts_savepoint_rollback_last_stmt(trx_t * trx)6320 fts_savepoint_rollback_last_stmt(
6321 /*=============================*/
6322 	trx_t*		trx)		/*!< in: transaction */
6323 {
6324 	ib_vector_t*		savepoints;
6325 	fts_savepoint_t*	savepoint;
6326 	fts_savepoint_t*	last_stmt;
6327 	fts_trx_t*		fts_trx;
6328 	ib_rbt_bound_t		parent;
6329 	const ib_rbt_node_t*    node;
6330 	ib_rbt_t*		l_tables;
6331 	ib_rbt_t*		s_tables;
6332 
6333 	fts_trx = trx->fts_trx;
6334 	savepoints = fts_trx->savepoints;
6335 
6336 	savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
6337 	last_stmt = static_cast<fts_savepoint_t*>(
6338 		ib_vector_last(fts_trx->last_stmt));
6339 
6340 	l_tables = last_stmt->tables;
6341 	s_tables = savepoint->tables;
6342 
6343 	for (node = rbt_first(l_tables);
6344 	     node;
6345 	     node = rbt_next(l_tables, node)) {
6346 
6347 		fts_trx_table_t**	l_ftt;
6348 
6349 		l_ftt = rbt_value(fts_trx_table_t*, node);
6350 
6351 		rbt_search_cmp(
6352 			s_tables, &parent, &(*l_ftt)->table->id,
6353 			fts_trx_table_id_cmp, NULL);
6354 
6355 		if (parent.result == 0) {
6356 			fts_trx_table_t**	s_ftt;
6357 
6358 			s_ftt = rbt_value(fts_trx_table_t*, parent.last);
6359 
6360 			fts_undo_last_stmt(*s_ftt, *l_ftt);
6361 		}
6362 	}
6363 }
6364 
6365 /**********************************************************************//**
6366 Rollback to savepoint indentified by name.
6367 @return DB_SUCCESS or error code */
6368 void
fts_savepoint_rollback(trx_t * trx,const char * name)6369 fts_savepoint_rollback(
6370 /*===================*/
6371 	trx_t*		trx,		/*!< in: transaction */
6372 	const char*	name)		/*!< in: savepoint name */
6373 {
6374 	ulint		i;
6375 	ib_vector_t*	savepoints;
6376 
6377 	ut_a(name != NULL);
6378 
6379 	savepoints = trx->fts_trx->savepoints;
6380 
6381 	/* We pop all savepoints from the the top of the stack up to
6382 	and including the instance that was found. */
6383 	i = fts_savepoint_lookup(savepoints, name);
6384 
6385 	if (i != ULINT_UNDEFINED) {
6386 		fts_savepoint_t*	savepoint;
6387 
6388 		ut_a(i > 0);
6389 
6390 		while (ib_vector_size(savepoints) > i) {
6391 			fts_savepoint_t*	savepoint;
6392 
6393 			savepoint = static_cast<fts_savepoint_t*>(
6394 				ib_vector_pop(savepoints));
6395 
6396 			if (savepoint->name != NULL) {
6397 				/* Since name was allocated on the heap, the
6398 				memory will be released when the transaction
6399 				completes. */
6400 				savepoint->name = NULL;
6401 
6402 				fts_savepoint_free(savepoint);
6403 			}
6404 		}
6405 
6406 		/* Pop all a elements from the top of the stack that may
6407 		have been released. We have to be careful that we don't
6408 		delete the implied savepoint. */
6409 
6410 		for (savepoint = static_cast<fts_savepoint_t*>(
6411 				ib_vector_last(savepoints));
6412 		     ib_vector_size(savepoints) > 1
6413 		     && savepoint->name == NULL;
6414 		     savepoint = static_cast<fts_savepoint_t*>(
6415 				ib_vector_last(savepoints))) {
6416 
6417 			ib_vector_pop(savepoints);
6418 		}
6419 
6420 		/* Make sure we don't delete the implied savepoint. */
6421 		ut_a(ib_vector_size(savepoints) > 0);
6422 
6423 		/* Restore the savepoint. */
6424 		fts_savepoint_take(trx, trx->fts_trx, name);
6425 	}
6426 }
6427 
6428 /** Check if a table is an FTS auxiliary table name.
6429 @param[out]	table	FTS table info
6430 @param[in]	name	Table name
6431 @param[in]	len	Length of table name
6432 @return true if the name matches an auxiliary table name pattern */
6433 static
6434 bool
fts_is_aux_table_name(fts_aux_table_t * table,const char * name,ulint len)6435 fts_is_aux_table_name(
6436 	fts_aux_table_t*	table,
6437 	const char*		name,
6438 	ulint			len)
6439 {
6440 	const char*	ptr;
6441 	char*		end;
6442 	char		my_name[MAX_FULL_NAME_LEN + 1];
6443 
6444 	ut_ad(len <= MAX_FULL_NAME_LEN);
6445 	ut_memcpy(my_name, name, len);
6446 	my_name[len] = 0;
6447 	end = my_name + len;
6448 
6449 	ptr = static_cast<const char*>(memchr(my_name, '/', len));
6450 
6451 	if (ptr != NULL) {
6452 		/* We will start the match after the '/' */
6453 		++ptr;
6454 		len = end - ptr;
6455 	}
6456 
6457 	/* All auxiliary tables are prefixed with "FTS_" and the name
6458 	length will be at the very least greater than 20 bytes. */
6459 	if (ptr != NULL && len > 20 && strncmp(ptr, "FTS_", 4) == 0) {
6460 		ulint		i;
6461 
6462 		/* Skip the prefix. */
6463 		ptr += 4;
6464 		len -= 4;
6465 
6466 		/* Try and read the table id. */
6467 		if (!fts_read_object_id(&table->parent_id, ptr)) {
6468 			return(false);
6469 		}
6470 
6471 		/* Skip the table id. */
6472 		ptr = static_cast<const char*>(memchr(ptr, '_', len));
6473 
6474 		if (ptr == NULL) {
6475 			return(false);
6476 		}
6477 
6478 		/* Skip the underscore. */
6479 		++ptr;
6480 		ut_a(end > ptr);
6481 		len = end - ptr;
6482 
6483 		/* First search the common table suffix array. */
6484 		for (i = 0; fts_common_tables[i] != NULL; ++i) {
6485 
6486 			if (strncmp(ptr, fts_common_tables[i], len) == 0) {
6487 				return(true);
6488 			}
6489 		}
6490 
6491 		/* Could be obsolete common tables. */
6492 		if (strncmp(ptr, "ADDED", len) == 0
6493 		    || strncmp(ptr, "STOPWORDS", len) == 0) {
6494 			return(true);
6495 		}
6496 
6497 		/* Try and read the index id. */
6498 		if (!fts_read_object_id(&table->index_id, ptr)) {
6499 			return(false);
6500 		}
6501 
6502 		/* Skip the table id. */
6503 		ptr = static_cast<const char*>(memchr(ptr, '_', len));
6504 
6505 		if (ptr == NULL) {
6506 			return(false);
6507 		}
6508 
6509 		/* Skip the underscore. */
6510 		++ptr;
6511 		ut_a(end > ptr);
6512 		len = end - ptr;
6513 
6514 		/* Search the FT index specific array. */
6515 		for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
6516 
6517 			if (strncmp(ptr, fts_get_suffix(i), len) == 0) {
6518 				return(true);
6519 			}
6520 		}
6521 
6522 		/* Other FT index specific table(s). */
6523 		if (strncmp(ptr, "DOC_ID", len) == 0) {
6524 			return(true);
6525 		}
6526 	}
6527 
6528 	return(false);
6529 }
6530 
6531 /**********************************************************************//**
6532 Callback function to read a single table ID column.
6533 @return Always return TRUE */
6534 static
6535 ibool
fts_read_tables(void * row,void * user_arg)6536 fts_read_tables(
6537 /*============*/
6538 	void*		row,		/*!< in: sel_node_t* */
6539 	void*		user_arg)	/*!< in: pointer to ib_vector_t */
6540 {
6541 	int		i;
6542 	fts_aux_table_t*table;
6543 	mem_heap_t*	heap;
6544 	ibool		done = FALSE;
6545 	ib_vector_t*	tables = static_cast<ib_vector_t*>(user_arg);
6546 	sel_node_t*	sel_node = static_cast<sel_node_t*>(row);
6547 	que_node_t*	exp = sel_node->select_list;
6548 
6549 	/* Must be a heap allocated vector. */
6550 	ut_a(tables->allocator->arg != NULL);
6551 
6552 	/* We will use this heap for allocating strings. */
6553 	heap = static_cast<mem_heap_t*>(tables->allocator->arg);
6554 	table = static_cast<fts_aux_table_t*>(ib_vector_push(tables, NULL));
6555 
6556 	memset(table, 0x0, sizeof(*table));
6557 
6558 	/* Iterate over the columns and read the values. */
6559 	for (i = 0; exp && !done; exp = que_node_get_next(exp), ++i) {
6560 
6561 		dfield_t*	dfield = que_node_get_val(exp);
6562 		void*		data = dfield_get_data(dfield);
6563 		ulint		len = dfield_get_len(dfield);
6564 
6565 		ut_a(len != UNIV_SQL_NULL);
6566 
6567 		/* Note: The column numbers below must match the SELECT */
6568 		switch (i) {
6569 		case 0: /* NAME */
6570 
6571 			if (!fts_is_aux_table_name(
6572 				table, static_cast<const char*>(data), len)) {
6573 				ib_vector_pop(tables);
6574 				done = TRUE;
6575 				break;
6576 			}
6577 
6578 			table->name = static_cast<char*>(
6579 				mem_heap_alloc(heap, len + 1));
6580 			memcpy(table->name, data, len);
6581 			table->name[len] = 0;
6582 			break;
6583 
6584 		case 1: /* ID */
6585 			ut_a(len == 8);
6586 			table->id = mach_read_from_8(
6587 				static_cast<const byte*>(data));
6588 			break;
6589 
6590 		default:
6591 			ut_error;
6592 		}
6593 	}
6594 
6595 	return(TRUE);
6596 }
6597 
6598 /******************************************************************//**
6599 Callback that sets a hex formatted FTS table's flags2 in
6600 SYS_TABLES. The flags is stored in MIX_LEN column.
6601 @return FALSE if all OK */
6602 static
6603 ibool
fts_set_hex_format(void * row,void * user_arg)6604 fts_set_hex_format(
6605 /*===============*/
6606 	void*		row,		/*!< in: sel_node_t* */
6607 	void*		user_arg)	/*!< in: bool set/unset flag */
6608 {
6609 	sel_node_t*	node = static_cast<sel_node_t*>(row);
6610 	dfield_t*	dfield = que_node_get_val(node->select_list);
6611 
6612 	ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT);
6613 	ut_ad(dfield_get_len(dfield) == sizeof(ib_uint32_t));
6614 	/* There should be at most one matching record. So the value
6615 	must be the default value. */
6616 	ut_ad(mach_read_from_4(static_cast<byte*>(user_arg))
6617 	      == ULINT32_UNDEFINED);
6618 
6619 	ulint		flags2 = mach_read_from_4(
6620 			static_cast<byte*>(dfield_get_data(dfield)));
6621 
6622 	flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
6623 
6624 	mach_write_to_4(static_cast<byte*>(user_arg), flags2);
6625 
6626 	return(FALSE);
6627 }
6628 
6629 /*****************************************************************//**
6630 Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES.
6631 @return DB_SUCCESS or error code. */
6632 dberr_t
fts_update_hex_format_flag(trx_t * trx,table_id_t table_id,bool dict_locked)6633 fts_update_hex_format_flag(
6634 /*=======================*/
6635 	trx_t*		trx,		/*!< in/out: transaction that
6636 					covers the update */
6637 	table_id_t	table_id,	/*!< in: Table for which we want
6638 					to set the root table->flags2 */
6639 	bool		dict_locked)	/*!< in: set to true if the
6640 					caller already owns the
6641 					dict_sys_t::mutex. */
6642 {
6643 	pars_info_t*		info;
6644 	ib_uint32_t		flags2;
6645 
6646 	static const char	sql[] =
6647 		"PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n"
6648 		"DECLARE FUNCTION my_func;\n"
6649 		"DECLARE CURSOR c IS\n"
6650 		" SELECT MIX_LEN"
6651 		" FROM SYS_TABLES"
6652 		" WHERE ID = :table_id FOR UPDATE;"
6653 		"\n"
6654 		"BEGIN\n"
6655 		"OPEN c;\n"
6656 		"WHILE 1 = 1 LOOP\n"
6657 		"  FETCH c INTO my_func();\n"
6658 		"  IF c % NOTFOUND THEN\n"
6659 		"    EXIT;\n"
6660 		"  END IF;\n"
6661 		"END LOOP;\n"
6662 		"UPDATE SYS_TABLES"
6663 		" SET MIX_LEN = :flags2"
6664 		" WHERE ID = :table_id;\n"
6665 		"CLOSE c;\n"
6666 		"END;\n";
6667 
6668 	flags2 = ULINT32_UNDEFINED;
6669 
6670 	info = pars_info_create();
6671 
6672 	pars_info_add_ull_literal(info, "table_id", table_id);
6673 	pars_info_bind_int4_literal(info, "flags2", &flags2);
6674 
6675 	pars_info_bind_function(
6676 		info, "my_func", fts_set_hex_format, &flags2);
6677 
6678 	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6679 		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6680 	}
6681 
6682 	dberr_t err = que_eval_sql(info, sql, !dict_locked, trx);
6683 
6684 	ut_a(flags2 != ULINT32_UNDEFINED);
6685 
6686 	return(err);
6687 }
6688 
6689 /*********************************************************************//**
6690 Rename an aux table to HEX format. It's called when "%016llu" is used
6691 to format an object id in table name, which only happens in Windows. */
6692 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6693 dberr_t
fts_rename_one_aux_table_to_hex_format(trx_t * trx,const fts_aux_table_t * aux_table,const dict_table_t * parent_table)6694 fts_rename_one_aux_table_to_hex_format(
6695 /*===================================*/
6696 	trx_t*			trx,		/*!< in: transaction */
6697 	const fts_aux_table_t*	aux_table,	/*!< in: table info */
6698 	const dict_table_t*	parent_table)	/*!< in: parent table name */
6699 {
6700 	const char*     ptr;
6701 	fts_table_t	fts_table;
6702 	char		new_name[MAX_FULL_NAME_LEN];
6703 	dberr_t		error;
6704 
6705 	ptr = strchr(aux_table->name, '/');
6706 	ut_a(ptr != NULL);
6707 	++ptr;
6708 	/* Skip "FTS_", table id and underscore */
6709 	for (ulint i = 0; i < 2; ++i) {
6710 		ptr = strchr(ptr, '_');
6711 		ut_a(ptr != NULL);
6712 		++ptr;
6713 	}
6714 
6715 	fts_table.suffix = NULL;
6716 	if (aux_table->index_id == 0) {
6717 		fts_table.type = FTS_COMMON_TABLE;
6718 
6719 		for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
6720 			if (strcmp(ptr, fts_common_tables[i]) == 0) {
6721 				fts_table.suffix = fts_common_tables[i];
6722 				break;
6723 			}
6724 		}
6725 	} else {
6726 		fts_table.type = FTS_INDEX_TABLE;
6727 
6728 		/* Skip index id and underscore */
6729 		ptr = strchr(ptr, '_');
6730 		ut_a(ptr != NULL);
6731 		++ptr;
6732 
6733 		for (ulint i = 0; fts_index_selector[i].value; ++i) {
6734 			if (strcmp(ptr, fts_get_suffix(i)) == 0) {
6735 				fts_table.suffix = fts_get_suffix(i);
6736 				break;
6737 			}
6738 		}
6739 	}
6740 
6741 	ut_a(fts_table.suffix != NULL);
6742 
6743 	fts_table.parent = parent_table->name.m_name;
6744 	fts_table.table_id = aux_table->parent_id;
6745 	fts_table.index_id = aux_table->index_id;
6746 	fts_table.table = parent_table;
6747 
6748 	fts_get_table_name(&fts_table, new_name);
6749 	ut_ad(strcmp(new_name, aux_table->name) != 0);
6750 
6751 	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6752 		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6753 	}
6754 
6755 	error = row_rename_table_for_mysql(aux_table->name, new_name, trx,
6756 					   FALSE);
6757 
6758 	if (error != DB_SUCCESS) {
6759 		ib::warn() << "Failed to rename aux table '"
6760 			<< aux_table->name << "' to new format '"
6761 			<< new_name << "'.";
6762 	} else {
6763 		ib::info() << "Renamed aux table '" << aux_table->name
6764 			<< "' to '" << new_name << "'.";
6765 	}
6766 
6767 	return(error);
6768 }
6769 
6770 /**********************************************************************//**
6771 Rename all aux tables of a parent table to HEX format. Also set aux tables'
6772 flags2 and parent table's flags2 with DICT_TF2_FTS_AUX_HEX_NAME.
6773 It's called when "%016llu" is used to format an object id in table name,
6774 which only happens in Windows.
6775 Note the ids in tables are correct but the names are old ambiguous ones.
6776 
6777 This function should make sure that either all the parent table and aux tables
6778 are set DICT_TF2_FTS_AUX_HEX_NAME with flags2 or none of them are set */
6779 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6780 dberr_t
fts_rename_aux_tables_to_hex_format_low(trx_t * trx,dict_table_t * parent_table,ib_vector_t * tables)6781 fts_rename_aux_tables_to_hex_format_low(
6782 /*====================================*/
6783 	trx_t*		trx,		/*!< in: transaction */
6784 	dict_table_t*	parent_table,	/*!< in: parent table */
6785 	ib_vector_t*	tables)		/*!< in: aux tables to rename. */
6786 {
6787 	dberr_t		error;
6788 	ulint		count;
6789 
6790 	ut_ad(!DICT_TF2_FLAG_IS_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
6791 	ut_ad(!ib_vector_is_empty(tables));
6792 
6793 	error = fts_update_hex_format_flag(trx, parent_table->id, true);
6794 
6795 	if (error != DB_SUCCESS) {
6796 		ib::warn() << "Setting parent table " << parent_table->name
6797 			<< " to hex format failed.";
6798 		fts_sql_rollback(trx);
6799 		return(error);
6800 	}
6801 
6802 	DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6803 
6804 	for (count = 0; count < ib_vector_size(tables); ++count) {
6805 		dict_table_t*		table;
6806 		fts_aux_table_t*	aux_table;
6807 
6808 		aux_table = static_cast<fts_aux_table_t*>(
6809 			ib_vector_get(tables, count));
6810 
6811 		table = dict_table_open_on_id(aux_table->id, TRUE,
6812 					      DICT_TABLE_OP_NORMAL);
6813 
6814 		ut_ad(table != NULL);
6815 		ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_AUX_HEX_NAME));
6816 
6817 		/* Set HEX_NAME flag here to make sure we can get correct
6818 		new table name in following function */
6819 		DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6820 		error = fts_rename_one_aux_table_to_hex_format(trx,
6821 				aux_table, parent_table);
6822 		/* We will rollback the trx if the error != DB_SUCCESS,
6823 		so setting the flag here is the same with setting it in
6824 		row_rename_table_for_mysql */
6825 		DBUG_EXECUTE_IF("rename_aux_table_fail", error = DB_ERROR;);
6826 
6827 		if (error != DB_SUCCESS) {
6828 			dict_table_close(table, TRUE, FALSE);
6829 
6830 			ib::warn() << "Failed to rename one aux table "
6831 				<< aux_table->name << ". Will revert"
6832 				" all successful rename operations.";
6833 
6834 			fts_sql_rollback(trx);
6835 			break;
6836 		}
6837 
6838 		error = fts_update_hex_format_flag(trx, aux_table->id, true);
6839 		dict_table_close(table, TRUE, FALSE);
6840 
6841 		if (error != DB_SUCCESS) {
6842 			ib::warn() << "Setting aux table " << aux_table->name
6843 				<< " to hex format failed.";
6844 
6845 			fts_sql_rollback(trx);
6846 			break;
6847 		}
6848 	}
6849 
6850 	if (error != DB_SUCCESS) {
6851 		ut_ad(count != ib_vector_size(tables));
6852 
6853 		/* If rename fails, thr trx would be rolled back, we can't
6854 		use it any more, we'll start a new background trx to do
6855 		the reverting. */
6856 
6857 		ut_ad(!trx_is_started(trx));
6858 
6859 		bool not_rename = false;
6860 
6861 		/* Try to revert those succesful rename operations
6862 		in order to revert the ibd file rename. */
6863 		for (ulint i = 0; i <= count; ++i) {
6864 			dict_table_t*		table;
6865 			fts_aux_table_t*	aux_table;
6866 			trx_t*			trx_bg;
6867 			dberr_t			err;
6868 
6869 			aux_table = static_cast<fts_aux_table_t*>(
6870 				ib_vector_get(tables, i));
6871 
6872 			table = dict_table_open_on_id(aux_table->id, TRUE,
6873 						      DICT_TABLE_OP_NORMAL);
6874 			ut_ad(table != NULL);
6875 
6876 			if (not_rename) {
6877 				DICT_TF2_FLAG_UNSET(table,
6878 						    DICT_TF2_FTS_AUX_HEX_NAME);
6879 			}
6880 
6881 			if (!DICT_TF2_FLAG_IS_SET(table,
6882 						  DICT_TF2_FTS_AUX_HEX_NAME)) {
6883 				dict_table_close(table, TRUE, FALSE);
6884 				continue;
6885 			}
6886 
6887 			trx_bg = trx_allocate_for_background();
6888 			trx_bg->op_info = "Revert half done rename";
6889 			trx_bg->dict_operation_lock_mode = RW_X_LATCH;
6890 			trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
6891 
6892 			DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6893 			err = row_rename_table_for_mysql(table->name.m_name,
6894 							 aux_table->name,
6895 							 trx_bg, FALSE);
6896 
6897 			trx_bg->dict_operation_lock_mode = 0;
6898 			dict_table_close(table, TRUE, FALSE);
6899 
6900 			if (err != DB_SUCCESS) {
6901 				ib::warn() << "Failed to revert table "
6902 					<< table->name << ". Please revert"
6903 					" manually.";
6904 				fts_sql_rollback(trx_bg);
6905 				trx_free_for_background(trx_bg);
6906 				/* Continue to clear aux tables' flags2 */
6907 				not_rename = true;
6908 				continue;
6909 			}
6910 
6911 			fts_sql_commit(trx_bg);
6912 			trx_free_for_background(trx_bg);
6913 		}
6914 
6915 		DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6916 	}
6917 
6918 	return(error);
6919 }
6920 
6921 /**********************************************************************//**
6922 Convert an id, which is actually a decimal number but was regard as a HEX
6923 from a string, to its real value. */
6924 static
6925 ib_id_t
fts_fake_hex_to_dec(ib_id_t id)6926 fts_fake_hex_to_dec(
6927 /*================*/
6928 	ib_id_t		id)			/*!< in: number to convert */
6929 {
6930 	ib_id_t		dec_id = 0;
6931 	char		tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
6932 
6933 #ifdef UNIV_DEBUG
6934 	int		ret =
6935 #endif /* UNIV_DEBUG */
6936 	sprintf(tmp_id, UINT64PFx, id);
6937 	ut_ad(ret == 16);
6938 #ifdef UNIV_DEBUG
6939 	ret =
6940 #endif /* UNIV_DEBUG */
6941 #ifdef _WIN32
6942 	sscanf(tmp_id, "%016llu", &dec_id);
6943 #else
6944 	sscanf(tmp_id, "%016" PRIu64, &dec_id);
6945 #endif /* _WIN32 */
6946 	ut_ad(ret == 1);
6947 
6948 	return dec_id;
6949 }
6950 
6951 /*********************************************************************//**
6952 Compare two fts_aux_table_t parent_ids.
6953 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
6954 UNIV_INLINE
6955 int
fts_check_aux_table_parent_id_cmp(const void * p1,const void * p2)6956 fts_check_aux_table_parent_id_cmp(
6957 /*==============================*/
6958 	const void*	p1,		/*!< in: id1 */
6959 	const void*	p2)		/*!< in: id2 */
6960 {
6961 	const fts_aux_table_t*	fa1 = static_cast<const fts_aux_table_t*>(p1);
6962 	const fts_aux_table_t*	fa2 = static_cast<const fts_aux_table_t*>(p2);
6963 
6964 	return static_cast<int>(fa1->parent_id - fa2->parent_id);
6965 }
6966 
6967 /** Mark all the fts index associated with the parent table as corrupted.
6968 @param[in]	trx		transaction
6969 @param[in, out] parent_table	fts index associated with this parent table
6970 				will be marked as corrupted. */
6971 static
6972 void
fts_parent_all_index_set_corrupt(trx_t * trx,dict_table_t * parent_table)6973 fts_parent_all_index_set_corrupt(
6974 	trx_t*		trx,
6975 	dict_table_t*	parent_table)
6976 {
6977 	fts_t*	fts = parent_table->fts;
6978 
6979 	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6980 		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6981 	}
6982 
6983 	for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
6984 		dict_index_t*	index = static_cast<dict_index_t*>(
6985 			ib_vector_getp_const(fts->indexes, j));
6986 		dict_set_corrupted(index,
6987 				   trx, "DROP ORPHANED TABLE");
6988 	}
6989 }
6990 
6991 /** Mark the fts index which index id matches the id as corrupted.
6992 @param[in]	trx		transaction
6993 @param[in]	id		index id to search
6994 @param[in, out]	parent_table	parent table to check with all
6995 				the index. */
6996 static
6997 void
fts_set_index_corrupt(trx_t * trx,index_id_t id,dict_table_t * table)6998 fts_set_index_corrupt(
6999 	trx_t*		trx,
7000 	index_id_t	id,
7001 	dict_table_t*	table)
7002 {
7003 	fts_t*	fts = table->fts;
7004 
7005 	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
7006 		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
7007 	}
7008 
7009 	for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
7010 		dict_index_t*	index = static_cast<dict_index_t*>(
7011 			ib_vector_getp_const(fts->indexes, j));
7012 		if (index->id == id) {
7013 			dict_set_corrupted(index, trx,
7014 					   "DROP ORPHANED TABLE");
7015 			break;
7016 		}
7017 	}
7018 }
7019 
7020 /** Check the index for the aux table is corrupted.
7021 @param[in]	aux_table	auxiliary table
7022 @retval nonzero if index is corrupted, zero for valid index */
7023 static
7024 ulint
fts_check_corrupt_index(fts_aux_table_t * aux_table)7025 fts_check_corrupt_index(
7026 	fts_aux_table_t*	aux_table)
7027 {
7028 	dict_table_t*	table;
7029 	dict_index_t*	index;
7030 	table = dict_table_open_on_id(
7031 		aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7032 
7033 	if (table == NULL) {
7034 		return(0);
7035 	}
7036 
7037 	for (index = UT_LIST_GET_FIRST(table->indexes);
7038 	     index;
7039 	     index = UT_LIST_GET_NEXT(indexes, index)) {
7040 		if (index->id == aux_table->index_id) {
7041 			ut_ad(index->type & DICT_FTS);
7042 			dict_table_close(table, true, false);
7043 			return(dict_index_is_corrupted(index));
7044 		}
7045 	}
7046 
7047 	dict_table_close(table, true, false);
7048 	return(0);
7049 }
7050 
7051 /* Get parent table name if it's a fts aux table
7052 @param[in]	aux_table_name	aux table name
7053 @param[in]	aux_table_len	aux table length
7054 @return parent table name, or NULL */
7055 char*
fts_get_parent_table_name(const char * aux_table_name,ulint aux_table_len)7056 fts_get_parent_table_name(
7057 	const char*	aux_table_name,
7058 	ulint		aux_table_len)
7059 {
7060 	fts_aux_table_t	aux_table;
7061 	char*		parent_table_name = NULL;
7062 
7063 	if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
7064 		dict_table_t*	parent_table;
7065 
7066 		parent_table = dict_table_open_on_id(
7067 			aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7068 
7069 		if (parent_table != NULL) {
7070 			parent_table_name = mem_strdupl(
7071 				parent_table->name.m_name,
7072 				strlen(parent_table->name.m_name));
7073 
7074 			dict_table_close(parent_table, TRUE, FALSE);
7075 		}
7076 	}
7077 
7078 	return(parent_table_name);
7079 }
7080 
7081 /** Check the validity of the parent table.
7082 @param[in]	aux_table	auxiliary table
7083 @return true if it is a valid table or false if it is not */
7084 static
7085 bool
fts_valid_parent_table(const fts_aux_table_t * aux_table)7086 fts_valid_parent_table(
7087 	const fts_aux_table_t*	aux_table)
7088 {
7089 	dict_table_t*	parent_table;
7090 	bool		valid = false;
7091 
7092 	parent_table = dict_table_open_on_id(
7093 		aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7094 
7095 	if (parent_table != NULL && parent_table->fts != NULL) {
7096 		if (aux_table->index_id == 0) {
7097 			valid = true;
7098 		} else {
7099 			index_id_t	id = aux_table->index_id;
7100 			dict_index_t*	index;
7101 
7102 			/* Search for the FT index in the table's list. */
7103 			for (index = UT_LIST_GET_FIRST(parent_table->indexes);
7104 			     index;
7105 			     index = UT_LIST_GET_NEXT(indexes, index)) {
7106 				if (index->id == id) {
7107 					valid = true;
7108 					break;
7109 				}
7110 
7111 			}
7112 		}
7113 	}
7114 
7115 	if (parent_table) {
7116 		dict_table_close(parent_table, TRUE, FALSE);
7117 	}
7118 
7119 	return(valid);
7120 }
7121 
7122 /** Try to rename all aux tables of the specified parent table.
7123 @param[in]	aux_tables	aux_tables to be renamed
7124 @param[in]	parent_table	parent table of all aux
7125 				tables stored in tables. */
7126 static
7127 void
fts_rename_aux_tables_to_hex_format(ib_vector_t * aux_tables,dict_table_t * parent_table)7128 fts_rename_aux_tables_to_hex_format(
7129 	ib_vector_t*	aux_tables,
7130 	dict_table_t*	parent_table)
7131 {
7132 	dberr_t err;
7133 	trx_t*	trx_rename = trx_allocate_for_background();
7134 	trx_rename->op_info = "Rename aux tables to hex format";
7135 	trx_rename->dict_operation_lock_mode = RW_X_LATCH;
7136 	trx_start_for_ddl(trx_rename, TRX_DICT_OP_TABLE);
7137 
7138 	err = fts_rename_aux_tables_to_hex_format_low(trx_rename,
7139 						      parent_table, aux_tables);
7140 
7141 	trx_rename->dict_operation_lock_mode = 0;
7142 
7143 	if (err != DB_SUCCESS) {
7144 
7145 		ib::warn() << "Rollback operations on all aux tables of "
7146 			"table "<< parent_table->name << ". All the fts index "
7147 			"associated with the table are marked as corrupted. "
7148 			"Please rebuild the index again.";
7149 
7150 		/* Corrupting the fts index related to parent table. */
7151 		trx_t*	trx_corrupt;
7152 		trx_corrupt = trx_allocate_for_background();
7153 		trx_corrupt->dict_operation_lock_mode = RW_X_LATCH;
7154 		trx_start_for_ddl(trx_corrupt, TRX_DICT_OP_TABLE);
7155 		fts_parent_all_index_set_corrupt(trx_corrupt, parent_table);
7156 		trx_corrupt->dict_operation_lock_mode = 0;
7157 		fts_sql_commit(trx_corrupt);
7158 		trx_free_for_background(trx_corrupt);
7159 	} else {
7160 		fts_sql_commit(trx_rename);
7161 	}
7162 
7163 	trx_free_for_background(trx_rename);
7164 	ib_vector_reset(aux_tables);
7165 }
7166 
7167 /** Set the hex format flag for the parent table.
7168 @param[in, out]	parent_table	parent table
7169 @param[in]	trx		transaction */
7170 static
7171 void
fts_set_parent_hex_format_flag(dict_table_t * parent_table,trx_t * trx)7172 fts_set_parent_hex_format_flag(
7173 	dict_table_t*	parent_table,
7174 	trx_t*		trx)
7175 {
7176 	if (!DICT_TF2_FLAG_IS_SET(parent_table,
7177 				  DICT_TF2_FTS_AUX_HEX_NAME)) {
7178 		DBUG_EXECUTE_IF("parent_table_flag_fail", DBUG_SUICIDE(););
7179 
7180 		dberr_t	err = fts_update_hex_format_flag(
7181 				trx, parent_table->id, true);
7182 
7183 		if (err != DB_SUCCESS) {
7184 			ib::fatal() << "Setting parent table "
7185 				<< parent_table->name
7186 				<< "to hex format failed. Please try "
7187 				<< "to restart the server again, if it "
7188 				<< "doesn't work, the system tables "
7189 				<< "might be corrupted.";
7190 		} else {
7191 			DICT_TF2_FLAG_SET(
7192 				parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
7193 		}
7194 	}
7195 }
7196 
7197 /** Drop the obsolete auxilary table.
7198 @param[in]	tables	tables to be dropped. */
7199 static
7200 void
fts_drop_obsolete_aux_table_from_vector(ib_vector_t * tables)7201 fts_drop_obsolete_aux_table_from_vector(
7202 	ib_vector_t*	tables)
7203 {
7204 	dberr_t		err;
7205 
7206 	for (ulint count = 0; count < ib_vector_size(tables);
7207 	     ++count) {
7208 
7209 		fts_aux_table_t*	aux_drop_table;
7210 		aux_drop_table = static_cast<fts_aux_table_t*>(
7211 			ib_vector_get(tables, count));
7212 		trx_t*	trx_drop = trx_allocate_for_background();
7213 		trx_drop->op_info = "Drop obsolete aux tables";
7214 		trx_drop->dict_operation_lock_mode = RW_X_LATCH;
7215 		trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE);
7216 
7217 		err = row_drop_table_for_mysql(
7218 			aux_drop_table->name, trx_drop, false, true);
7219 
7220 		trx_drop->dict_operation_lock_mode = 0;
7221 
7222 		if (err != DB_SUCCESS) {
7223 			/* We don't need to worry about the
7224 			failure, since server would try to
7225 			drop it on next restart, even if
7226 			the table was broken. */
7227 			ib::warn() << "Failed to drop obsolete aux table "
7228 				<< aux_drop_table->name << ", which is "
7229 				<< "harmless. will try to drop it on next "
7230 				<< "restart.";
7231 
7232 			fts_sql_rollback(trx_drop);
7233 		} else {
7234 			ib::info() << "Dropped obsolete aux"
7235 				" table '" << aux_drop_table->name
7236 				<< "'.";
7237 
7238 			fts_sql_commit(trx_drop);
7239 		}
7240 
7241 		trx_free_for_background(trx_drop);
7242 	}
7243 }
7244 
7245 /** Drop all the auxiliary table present in the vector.
7246 @param[in]	trx	transaction
7247 @param[in]	tables	tables to be dropped */
7248 static
7249 void
fts_drop_aux_table_from_vector(trx_t * trx,ib_vector_t * tables)7250 fts_drop_aux_table_from_vector(
7251 	trx_t*		trx,
7252 	ib_vector_t*	tables)
7253 {
7254 	for (ulint count = 0; count < ib_vector_size(tables);
7255 	    ++count) {
7256 		fts_aux_table_t*	aux_drop_table;
7257 		aux_drop_table = static_cast<fts_aux_table_t*>(
7258 				ib_vector_get(tables, count));
7259 
7260 		/* Check for the validity of the parent table */
7261 		if (!fts_valid_parent_table(aux_drop_table)) {
7262 
7263 			ib::warn() << "Parent table of FTS auxiliary table "
7264 				<< aux_drop_table->name << " not found.";
7265 
7266 			dberr_t err = fts_drop_table(trx, aux_drop_table->name);
7267 			if (err == DB_FAIL) {
7268 
7269 				char*	path = fil_make_filepath(
7270 					NULL, aux_drop_table->name, IBD, false);
7271 
7272 				if (path != NULL) {
7273 					os_file_delete_if_exists(
7274 							innodb_data_file_key,
7275 							path , NULL);
7276 					ut_free(path);
7277 				}
7278 			}
7279 		}
7280 	}
7281 }
7282 
7283 /**********************************************************************//**
7284 Check and drop all orphaned FTS auxiliary tables, those that don't have
7285 a parent table or FTS index defined on them.
7286 @return DB_SUCCESS or error code */
7287 static MY_ATTRIBUTE((nonnull))
7288 void
fts_check_and_drop_orphaned_tables(trx_t * trx,ib_vector_t * tables)7289 fts_check_and_drop_orphaned_tables(
7290 /*===============================*/
7291 	trx_t*		trx,			/*!< in: transaction */
7292 	ib_vector_t*	tables)			/*!< in: tables to check */
7293 {
7294 	mem_heap_t*	heap;
7295 	ib_vector_t*	aux_tables_to_rename;
7296 	ib_vector_t*	invalid_aux_tables;
7297 	ib_vector_t*	valid_aux_tables;
7298 	ib_vector_t*	drop_aux_tables;
7299 	ib_vector_t*	obsolete_aux_tables;
7300 	ib_alloc_t*	heap_alloc;
7301 
7302 	heap = mem_heap_create(1024);
7303 	heap_alloc = ib_heap_allocator_create(heap);
7304 
7305 	/* We store all aux tables belonging to the same parent table here,
7306 	and rename all these tables in a batch mode. */
7307 	aux_tables_to_rename = ib_vector_create(heap_alloc,
7308 						sizeof(fts_aux_table_t), 128);
7309 
7310 	/* We store all fake auxiliary table and orphaned table here. */
7311 	invalid_aux_tables = ib_vector_create(heap_alloc,
7312 					      sizeof(fts_aux_table_t), 128);
7313 
7314 	/* We store all valid aux tables. We use this to filter the
7315 	fake auxiliary table from invalid auxiliary tables. */
7316 	valid_aux_tables = ib_vector_create(heap_alloc,
7317 					    sizeof(fts_aux_table_t), 128);
7318 
7319 	/* We store all auxiliary tables to be dropped. */
7320 	drop_aux_tables = ib_vector_create(heap_alloc,
7321 					   sizeof(fts_aux_table_t), 128);
7322 
7323 	/* We store all obsolete auxiliary tables to be dropped. */
7324 	obsolete_aux_tables = ib_vector_create(heap_alloc,
7325 					       sizeof(fts_aux_table_t), 128);
7326 
7327 	/* Sort by parent_id first, in case rename will fail */
7328 	ib_vector_sort(tables, fts_check_aux_table_parent_id_cmp);
7329 
7330 	for (ulint i = 0; i < ib_vector_size(tables); ++i) {
7331 		dict_table_t*		parent_table;
7332 		fts_aux_table_t*	aux_table;
7333 		bool			drop = false;
7334 		dict_table_t*		table;
7335 		fts_aux_table_t*	next_aux_table = NULL;
7336 		ib_id_t			orig_parent_id = 0;
7337 		ib_id_t			orig_index_id = 0;
7338 		bool			rename = false;
7339 
7340 		aux_table = static_cast<fts_aux_table_t*>(
7341 			ib_vector_get(tables, i));
7342 
7343 		table = dict_table_open_on_id(
7344 			aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
7345 		orig_parent_id = aux_table->parent_id;
7346 		orig_index_id = aux_table->index_id;
7347 
7348 		if (table == NULL
7349 		    || strcmp(table->name.m_name, aux_table->name)) {
7350 
7351 			bool	fake_aux = false;
7352 
7353 			if (table != NULL) {
7354 				dict_table_close(table, TRUE, FALSE);
7355 			}
7356 
7357 			if (i + 1 < ib_vector_size(tables)) {
7358 				next_aux_table = static_cast<fts_aux_table_t*>(
7359 						ib_vector_get(tables, i + 1));
7360 			}
7361 
7362 			/* To know whether aux table is fake fts or
7363 			orphan fts table. */
7364 			for (ulint count = 0;
7365 			     count < ib_vector_size(valid_aux_tables);
7366 			     count++) {
7367 				fts_aux_table_t*	valid_aux;
7368 				valid_aux = static_cast<fts_aux_table_t*>(
7369 					ib_vector_get(valid_aux_tables, count));
7370 				if (strcmp(valid_aux->name,
7371 					   aux_table->name) == 0) {
7372 					fake_aux = true;
7373 					break;
7374 				}
7375 			}
7376 
7377 			/* All aux tables of parent table, whose id is
7378 			last_parent_id, have been checked, try to rename
7379 			them if necessary. */
7380 			if ((next_aux_table == NULL
7381 			     || orig_parent_id != next_aux_table->parent_id)
7382 			    && (!ib_vector_is_empty(aux_tables_to_rename))) {
7383 
7384 					ib_id_t	parent_id = fts_fake_hex_to_dec(
7385 							aux_table->parent_id);
7386 
7387 					parent_table = dict_table_open_on_id(
7388 						parent_id, TRUE,
7389 						DICT_TABLE_OP_NORMAL);
7390 
7391 					fts_rename_aux_tables_to_hex_format(
7392 						aux_tables_to_rename, parent_table);
7393 
7394 					dict_table_close(parent_table, TRUE,
7395 							 FALSE);
7396 			}
7397 
7398 			/* If the aux table is fake aux table. Skip it. */
7399 			if (!fake_aux) {
7400 				ib_vector_push(invalid_aux_tables, aux_table);
7401 			}
7402 
7403 			continue;
7404 		} else if (!DICT_TF2_FLAG_IS_SET(table,
7405 						 DICT_TF2_FTS_AUX_HEX_NAME)) {
7406 
7407 			aux_table->parent_id = fts_fake_hex_to_dec(
7408 						aux_table->parent_id);
7409 
7410 			if (aux_table->index_id != 0) {
7411 				aux_table->index_id = fts_fake_hex_to_dec(
7412 							aux_table->index_id);
7413 			}
7414 
7415 			ut_ad(aux_table->id > aux_table->parent_id);
7416 
7417 			/* Check whether parent table id and index id
7418 			are stored as decimal format. */
7419 			if (fts_valid_parent_table(aux_table)) {
7420 
7421 				parent_table = dict_table_open_on_id(
7422 					aux_table->parent_id, true,
7423 					DICT_TABLE_OP_NORMAL);
7424 
7425 				ut_ad(parent_table != NULL);
7426 				ut_ad(parent_table->fts != NULL);
7427 
7428 				if (!DICT_TF2_FLAG_IS_SET(
7429 					parent_table,
7430 					DICT_TF2_FTS_AUX_HEX_NAME)) {
7431 					rename = true;
7432 				}
7433 
7434 				dict_table_close(parent_table, TRUE, FALSE);
7435 			}
7436 
7437 			if (!rename) {
7438 				/* Reassign the original value of
7439 				aux table if it is not in decimal format */
7440 				aux_table->parent_id = orig_parent_id;
7441 				aux_table->index_id = orig_index_id;
7442 			}
7443 		}
7444 
7445 		if (table != NULL) {
7446 			dict_table_close(table, TRUE, FALSE);
7447 		}
7448 
7449 		if (!rename) {
7450 			/* Check the validity of the parent table. */
7451 			if (!fts_valid_parent_table(aux_table)) {
7452 				drop = true;
7453 			}
7454 		}
7455 
7456 		/* Filter out the fake aux table by comparing with the
7457 		current valid auxiliary table name. */
7458 		for (ulint count = 0;
7459 		     count < ib_vector_size(invalid_aux_tables); count++) {
7460 			fts_aux_table_t*	invalid_aux;
7461 			invalid_aux = static_cast<fts_aux_table_t*>(
7462 				ib_vector_get(invalid_aux_tables, count));
7463 			if (strcmp(invalid_aux->name, aux_table->name) == 0) {
7464 				ib_vector_remove(
7465 					invalid_aux_tables,
7466 					*reinterpret_cast<void**>(invalid_aux));
7467 				break;
7468 			}
7469 		}
7470 
7471 		ib_vector_push(valid_aux_tables, aux_table);
7472 
7473 		/* If the index associated with aux table is corrupted,
7474 		skip it. */
7475 		if (fts_check_corrupt_index(aux_table) > 0) {
7476 
7477 			if (i + 1 < ib_vector_size(tables)) {
7478 				next_aux_table = static_cast<fts_aux_table_t*>(
7479 					ib_vector_get(tables, i + 1));
7480 			}
7481 
7482 			if (next_aux_table == NULL
7483 			    || orig_parent_id != next_aux_table->parent_id) {
7484 
7485 				parent_table = dict_table_open_on_id(
7486 					aux_table->parent_id, TRUE,
7487 					DICT_TABLE_OP_NORMAL);
7488 
7489 				if (!ib_vector_is_empty(aux_tables_to_rename)) {
7490 					fts_rename_aux_tables_to_hex_format(
7491 						aux_tables_to_rename, parent_table);
7492 				} else {
7493 					fts_set_parent_hex_format_flag(
7494 						parent_table, trx);
7495 				}
7496 
7497 				dict_table_close(parent_table, TRUE, FALSE);
7498 			}
7499 
7500 			continue;
7501 		}
7502 
7503 		parent_table = dict_table_open_on_id(
7504 			aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7505 
7506 		if (drop) {
7507 			ib_vector_push(drop_aux_tables, aux_table);
7508 		} else {
7509 			if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) {
7510 				ib_vector_push(obsolete_aux_tables, aux_table);
7511 				continue;
7512 			}
7513 		}
7514 
7515 		/* If the aux table is in decimal format, we should
7516 		rename it, so push it to aux_tables_to_rename */
7517 		if (!drop && rename) {
7518 			bool	rename_table = true;
7519 			for (ulint count = 0;
7520 			     count < ib_vector_size(aux_tables_to_rename);
7521 			     count++) {
7522 				fts_aux_table_t*	rename_aux =
7523 					static_cast<fts_aux_table_t*>(
7524 					ib_vector_get(aux_tables_to_rename,
7525 						      count));
7526 					if (strcmp(rename_aux->name,
7527 						   aux_table->name) == 0) {
7528 						rename_table = false;
7529 						break;
7530 					}
7531 			}
7532 
7533 			if (rename_table) {
7534 				ib_vector_push(aux_tables_to_rename,
7535 					       aux_table);
7536 			}
7537 		}
7538 
7539 		if (i + 1 < ib_vector_size(tables)) {
7540 			next_aux_table = static_cast<fts_aux_table_t*>(
7541 				ib_vector_get(tables, i + 1));
7542 		}
7543 
7544 		if ((next_aux_table == NULL
7545 		     || orig_parent_id != next_aux_table->parent_id)
7546 		    && !ib_vector_is_empty(aux_tables_to_rename)) {
7547 
7548 			ut_ad(rename);
7549 			ut_ad(!DICT_TF2_FLAG_IS_SET(
7550 				parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
7551 
7552 			fts_rename_aux_tables_to_hex_format(
7553 				aux_tables_to_rename,parent_table);
7554 		}
7555 
7556 		/* The IDs are already in correct hex format. */
7557 		if (!drop && !rename) {
7558 			dict_table_t*	table;
7559 
7560 			table = dict_table_open_on_id(
7561 				aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
7562 
7563 			if (table != NULL
7564 			    && strcmp(table->name.m_name, aux_table->name)) {
7565 				dict_table_close(table, TRUE, FALSE);
7566 				table = NULL;
7567 			}
7568 
7569 			if (table != NULL
7570 			    && !DICT_TF2_FLAG_IS_SET(
7571 					table,
7572 					DICT_TF2_FTS_AUX_HEX_NAME)) {
7573 
7574 				DBUG_EXECUTE_IF("aux_table_flag_fail",
7575 					ib::warn() << "Setting aux table "
7576 						<< table->name << " to hex "
7577 						"format failed.";
7578 					fts_set_index_corrupt(
7579 						trx, aux_table->index_id,
7580 						parent_table);
7581 						goto table_exit;);
7582 
7583 				dberr_t err = fts_update_hex_format_flag(
7584 						trx, table->id, true);
7585 
7586 				if (err != DB_SUCCESS) {
7587 					ib::warn() << "Setting aux table "
7588 						<< table->name << " to hex "
7589 						"format failed.";
7590 
7591 					fts_set_index_corrupt(
7592 						trx, aux_table->index_id,
7593 						parent_table);
7594 				} else {
7595 					DICT_TF2_FLAG_SET(table,
7596 						DICT_TF2_FTS_AUX_HEX_NAME);
7597 				}
7598 			}
7599 #ifndef DBUG_OFF
7600 table_exit:
7601 #endif	/* !DBUG_OFF */
7602 
7603 			if (table != NULL) {
7604 				dict_table_close(table, TRUE, FALSE);
7605 			}
7606 
7607 			ut_ad(parent_table != NULL);
7608 
7609 			fts_set_parent_hex_format_flag(
7610 				parent_table, trx);
7611 		}
7612 
7613 		if (parent_table != NULL) {
7614 			dict_table_close(parent_table, TRUE, FALSE);
7615 		}
7616 	}
7617 
7618 	fts_drop_aux_table_from_vector(trx, invalid_aux_tables);
7619 	fts_drop_aux_table_from_vector(trx, drop_aux_tables);
7620 	fts_sql_commit(trx);
7621 
7622 	fts_drop_obsolete_aux_table_from_vector(obsolete_aux_tables);
7623 
7624 	/* Free the memory allocated at the beginning */
7625 	if (heap != NULL) {
7626 		mem_heap_free(heap);
7627 	}
7628 }
7629 
7630 /**********************************************************************//**
7631 Drop all orphaned FTS auxiliary tables, those that don't have a parent
7632 table or FTS index defined on them. */
7633 void
fts_drop_orphaned_tables(void)7634 fts_drop_orphaned_tables(void)
7635 /*==========================*/
7636 {
7637 	trx_t*			trx;
7638 	pars_info_t*		info;
7639 	mem_heap_t*		heap;
7640 	que_t*			graph;
7641 	ib_vector_t*		tables;
7642 	ib_alloc_t*		heap_alloc;
7643 	space_name_list_t	space_name_list;
7644 	dberr_t			error = DB_SUCCESS;
7645 
7646 	/* Note: We have to free the memory after we are done with the list. */
7647 	error = fil_get_space_names(space_name_list);
7648 
7649 	if (error == DB_OUT_OF_MEMORY) {
7650 		ib::fatal() << "Out of memory";
7651 	}
7652 
7653 	heap = mem_heap_create(1024);
7654 	heap_alloc = ib_heap_allocator_create(heap);
7655 
7656 	/* We store the table ids of all the FTS indexes that were found. */
7657 	tables = ib_vector_create(heap_alloc, sizeof(fts_aux_table_t), 128);
7658 
7659 	/* Get the list of all known .ibd files and check for orphaned
7660 	FTS auxiliary files in that list. We need to remove them because
7661 	users can't map them back to table names and this will create
7662 	unnecessary clutter. */
7663 
7664 	for (space_name_list_t::iterator it = space_name_list.begin();
7665 	     it != space_name_list.end();
7666 	     ++it) {
7667 
7668 		fts_aux_table_t*	fts_aux_table;
7669 
7670 		fts_aux_table = static_cast<fts_aux_table_t*>(
7671 			ib_vector_push(tables, NULL));
7672 
7673 		memset(fts_aux_table, 0x0, sizeof(*fts_aux_table));
7674 
7675 		if (!fts_is_aux_table_name(fts_aux_table, *it, strlen(*it))) {
7676 			ib_vector_pop(tables);
7677 		} else {
7678 			ulint	len = strlen(*it);
7679 
7680 			fts_aux_table->id = fil_space_get_id_by_name(*it);
7681 
7682 			/* We got this list from fil0fil.cc. The tablespace
7683 			with this name must exist. */
7684 			ut_a(fts_aux_table->id != ULINT_UNDEFINED);
7685 
7686 			fts_aux_table->name = static_cast<char*>(
7687 				mem_heap_dup(heap, *it, len + 1));
7688 
7689 			fts_aux_table->name[len] = 0;
7690 		}
7691 	}
7692 
7693 	trx = trx_allocate_for_background();
7694 	trx->op_info = "dropping orphaned FTS tables";
7695 	row_mysql_lock_data_dictionary(trx);
7696 
7697 	info = pars_info_create();
7698 
7699 	pars_info_bind_function(info, "my_func", fts_read_tables, tables);
7700 
7701 	graph = fts_parse_sql_no_dict_lock(
7702 		NULL,
7703 		info,
7704 		"DECLARE FUNCTION my_func;\n"
7705 		"DECLARE CURSOR c IS"
7706 		" SELECT NAME, ID"
7707 		" FROM SYS_TABLES;\n"
7708 		"BEGIN\n"
7709 		"\n"
7710 		"OPEN c;\n"
7711 		"WHILE 1 = 1 LOOP\n"
7712 		"  FETCH c INTO my_func();\n"
7713 		"  IF c % NOTFOUND THEN\n"
7714 		"    EXIT;\n"
7715 		"  END IF;\n"
7716 		"END LOOP;\n"
7717 		"CLOSE c;");
7718 
7719 	for (;;) {
7720 		error = fts_eval_sql(trx, graph);
7721 
7722 		if (error == DB_SUCCESS) {
7723 			fts_check_and_drop_orphaned_tables(trx, tables);
7724 			break;				/* Exit the loop. */
7725 		} else {
7726 			ib_vector_reset(tables);
7727 
7728 			fts_sql_rollback(trx);
7729 
7730 			if (error == DB_LOCK_WAIT_TIMEOUT) {
7731 				ib::warn() << "lock wait timeout reading"
7732 					" SYS_TABLES. Retrying!";
7733 
7734 				trx->error_state = DB_SUCCESS;
7735 			} else {
7736 				ib::error() << "(" << ut_strerr(error)
7737 					<< ") while reading SYS_TABLES.";
7738 
7739 				break;			/* Exit the loop. */
7740 			}
7741 		}
7742 	}
7743 
7744 	que_graph_free(graph);
7745 
7746 	row_mysql_unlock_data_dictionary(trx);
7747 
7748 	trx_free_for_background(trx);
7749 
7750 	if (heap != NULL) {
7751 		mem_heap_free(heap);
7752 	}
7753 
7754 	/** Free the memory allocated to store the .ibd names. */
7755 	for (space_name_list_t::iterator it = space_name_list.begin();
7756 	     it != space_name_list.end();
7757 	     ++it) {
7758 
7759 		UT_DELETE_ARRAY(*it);
7760 	}
7761 }
7762 
7763 /**********************************************************************//**
7764 Check whether user supplied stopword table is of the right format.
7765 Caller is responsible to hold dictionary locks.
7766 @return the stopword column charset if qualifies */
7767 CHARSET_INFO*
fts_valid_stopword_table(const char * stopword_table_name)7768 fts_valid_stopword_table(
7769 /*=====================*/
7770 	const char*	stopword_table_name)	/*!< in: Stopword table
7771 						name */
7772 {
7773 	dict_table_t*	table;
7774 	dict_col_t*     col = NULL;
7775 
7776 	if (!stopword_table_name) {
7777 		return(NULL);
7778 	}
7779 
7780 	table = dict_table_get_low(stopword_table_name);
7781 
7782 	if (!table) {
7783 		ib::error() << "User stopword table " << stopword_table_name
7784 			<< " does not exist.";
7785 
7786 		return(NULL);
7787 	} else {
7788 		const char*     col_name;
7789 
7790 		col_name = dict_table_get_col_name(table, 0);
7791 
7792 		if (ut_strcmp(col_name, "value")) {
7793 			ib::error() << "Invalid column name for stopword"
7794 				" table " << stopword_table_name << ". Its"
7795 				" first column must be named as 'value'.";
7796 
7797 			return(NULL);
7798 		}
7799 
7800 		col = dict_table_get_nth_col(table, 0);
7801 
7802 		if (col->mtype != DATA_VARCHAR
7803 		    && col->mtype != DATA_VARMYSQL) {
7804 			ib::error() << "Invalid column type for stopword"
7805 				" table " << stopword_table_name << ". Its"
7806 				" first column must be of varchar type";
7807 
7808 			return(NULL);
7809 		}
7810 	}
7811 
7812 	ut_ad(col);
7813 
7814 	return(fts_get_charset(col->prtype));
7815 }
7816 
7817 /**********************************************************************//**
7818 This function loads the stopword into the FTS cache. It also
7819 records/fetches stopword configuration to/from FTS configure
7820 table, depending on whether we are creating or reloading the
7821 FTS.
7822 @return TRUE if load operation is successful */
7823 ibool
fts_load_stopword(const dict_table_t * table,trx_t * trx,const char * global_stopword_table,const char * session_stopword_table,ibool stopword_is_on,ibool reload)7824 fts_load_stopword(
7825 /*==============*/
7826 	const dict_table_t*
7827 			table,			/*!< in: Table with FTS */
7828 	trx_t*		trx,			/*!< in: Transactions */
7829 	const char*	global_stopword_table,	/*!< in: Global stopword table
7830 						name */
7831 	const char*	session_stopword_table,	/*!< in: Session stopword table
7832 						name */
7833 	ibool		stopword_is_on,		/*!< in: Whether stopword
7834 						option is turned on/off */
7835 	ibool		reload)			/*!< in: Whether it is
7836 						for reloading FTS table */
7837 {
7838 	fts_table_t	fts_table;
7839 	fts_string_t	str;
7840 	dberr_t		error = DB_SUCCESS;
7841 	ulint		use_stopword;
7842 	fts_cache_t*	cache;
7843 	const char*	stopword_to_use = NULL;
7844 	ibool		new_trx = FALSE;
7845 	byte		str_buffer[MAX_FULL_NAME_LEN + 1];
7846 
7847 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
7848 
7849 	cache = table->fts->cache;
7850 
7851 	if (!reload && !(cache->stopword_info.status
7852 			 & STOPWORD_NOT_INIT)) {
7853 		return(TRUE);
7854 	}
7855 
7856 	if (!trx) {
7857 		trx = trx_allocate_for_background();
7858 		trx->op_info = "upload FTS stopword";
7859 		new_trx = TRUE;
7860 	}
7861 
7862 	/* First check whether stopword filtering is turned off */
7863 	if (reload) {
7864 		error = fts_config_get_ulint(
7865 			trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
7866 	} else {
7867 		use_stopword = (ulint) stopword_is_on;
7868 
7869 		error = fts_config_set_ulint(
7870 			trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
7871 	}
7872 
7873 	if (error != DB_SUCCESS) {
7874 		goto cleanup;
7875 	}
7876 
7877 	/* If stopword is turned off, no need to continue to load the
7878 	stopword into cache, but still need to do initialization */
7879 	if (!use_stopword) {
7880 		cache->stopword_info.status = STOPWORD_OFF;
7881 		goto cleanup;
7882 	}
7883 
7884 	if (reload) {
7885 		/* Fetch the stopword table name from FTS config
7886 		table */
7887 		str.f_n_char = 0;
7888 		str.f_str = str_buffer;
7889 		str.f_len = sizeof(str_buffer) - 1;
7890 
7891 		error = fts_config_get_value(
7892 			trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7893 
7894 		if (error != DB_SUCCESS) {
7895 			goto cleanup;
7896 		}
7897 
7898 		if (strlen((char*) str.f_str) > 0) {
7899 			stopword_to_use = (const char*) str.f_str;
7900 		}
7901 	} else {
7902 		stopword_to_use = (session_stopword_table)
7903 			? session_stopword_table : global_stopword_table;
7904 	}
7905 
7906 	if (stopword_to_use
7907 	    && fts_load_user_stopword(table->fts, stopword_to_use,
7908 				      &cache->stopword_info)) {
7909 		/* Save the stopword table name to the configure
7910 		table */
7911 		if (!reload) {
7912 			str.f_n_char = 0;
7913 			str.f_str = (byte*) stopword_to_use;
7914 			str.f_len = ut_strlen(stopword_to_use);
7915 
7916 			error = fts_config_set_value(
7917 				trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7918 		}
7919 	} else {
7920 		/* Load system default stopword list */
7921 		fts_load_default_stopword(&cache->stopword_info);
7922 	}
7923 
7924 cleanup:
7925 	if (new_trx) {
7926 		if (error == DB_SUCCESS) {
7927 			fts_sql_commit(trx);
7928 		} else {
7929 			fts_sql_rollback(trx);
7930 		}
7931 
7932 		trx_free_for_background(trx);
7933 	}
7934 
7935 	if (!cache->stopword_info.cached_stopword) {
7936 		cache->stopword_info.cached_stopword = rbt_create_arg_cmp(
7937 			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
7938 			&my_charset_latin1);
7939 	}
7940 
7941 	return(error == DB_SUCCESS);
7942 }
7943 
7944 /**********************************************************************//**
7945 Callback function when we initialize the FTS at the start up
7946 time. It recovers the maximum Doc IDs presented in the current table.
7947 @return: always returns TRUE */
7948 static
7949 ibool
fts_init_get_doc_id(void * row,void * user_arg)7950 fts_init_get_doc_id(
7951 /*================*/
7952 	void*	row,			/*!< in: sel_node_t* */
7953 	void*	user_arg)		/*!< in: fts cache */
7954 {
7955 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
7956 	sel_node_t*	node = static_cast<sel_node_t*>(row);
7957 	que_node_t*	exp = node->select_list;
7958 	fts_cache_t*    cache = static_cast<fts_cache_t*>(user_arg);
7959 
7960 	ut_ad(ib_vector_is_empty(cache->get_docs));
7961 
7962 	/* Copy each indexed column content into doc->text.f_str */
7963 	if (exp) {
7964 		dfield_t*	dfield = que_node_get_val(exp);
7965 		dtype_t*        type = dfield_get_type(dfield);
7966 		void*           data = dfield_get_data(dfield);
7967 
7968 		ut_a(dtype_get_mtype(type) == DATA_INT);
7969 
7970 		doc_id = static_cast<doc_id_t>(mach_read_from_8(
7971 			static_cast<const byte*>(data)));
7972 
7973 		if (doc_id >= cache->next_doc_id) {
7974 			cache->next_doc_id = doc_id + 1;
7975 		}
7976 	}
7977 
7978 	return(TRUE);
7979 }
7980 
7981 /**********************************************************************//**
7982 Callback function when we initialize the FTS at the start up
7983 time. It recovers Doc IDs that have not sync-ed to the auxiliary
7984 table, and require to bring them back into FTS index.
7985 @return: always returns TRUE */
7986 static
7987 ibool
fts_init_recover_doc(void * row,void * user_arg)7988 fts_init_recover_doc(
7989 /*=================*/
7990 	void*	row,			/*!< in: sel_node_t* */
7991 	void*	user_arg)		/*!< in: fts cache */
7992 {
7993 
7994 	fts_doc_t       doc;
7995 	ulint		doc_len = 0;
7996 	ulint		field_no = 0;
7997 	fts_get_doc_t*  get_doc = static_cast<fts_get_doc_t*>(user_arg);
7998 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
7999 	sel_node_t*	node = static_cast<sel_node_t*>(row);
8000 	que_node_t*	exp = node->select_list;
8001 	fts_cache_t*	cache = get_doc->cache;
8002 	st_mysql_ftparser*	parser = get_doc->index_cache->index->parser;
8003 
8004 	fts_doc_init(&doc);
8005 	doc.found = TRUE;
8006 
8007 	ut_ad(cache);
8008 
8009 	/* Copy each indexed column content into doc->text.f_str */
8010 	while (exp) {
8011 		dfield_t*	dfield = que_node_get_val(exp);
8012 		ulint		len = dfield_get_len(dfield);
8013 
8014 		if (field_no == 0) {
8015 			dtype_t*        type = dfield_get_type(dfield);
8016 			void*           data = dfield_get_data(dfield);
8017 
8018 			ut_a(dtype_get_mtype(type) == DATA_INT);
8019 
8020 			doc_id = static_cast<doc_id_t>(mach_read_from_8(
8021 				static_cast<const byte*>(data)));
8022 
8023 			field_no++;
8024 			exp = que_node_get_next(exp);
8025 			continue;
8026 		}
8027 
8028 		if (len == UNIV_SQL_NULL) {
8029 			exp = que_node_get_next(exp);
8030 			continue;
8031 		}
8032 
8033 		ut_ad(get_doc);
8034 
8035 		if (!get_doc->index_cache->charset) {
8036 			get_doc->index_cache->charset = fts_get_charset(
8037 				dfield->type.prtype);
8038 		}
8039 
8040 		doc.charset = get_doc->index_cache->charset;
8041 		doc.is_ngram = get_doc->index_cache->index->is_ngram;
8042 
8043 		if (dfield_is_ext(dfield)) {
8044 			dict_table_t*	table = cache->sync->table;
8045 
8046 			doc.text.f_str = btr_copy_externally_stored_field(
8047 				&doc.text.f_len,
8048 				static_cast<byte*>(dfield_get_data(dfield)),
8049 				dict_table_page_size(table), len,
8050 				static_cast<mem_heap_t*>(doc.self_heap->arg));
8051 		} else {
8052 			doc.text.f_str = static_cast<byte*>(
8053 				dfield_get_data(dfield));
8054 
8055 			doc.text.f_len = len;
8056 		}
8057 
8058 		if (field_no == 1) {
8059 			fts_tokenize_document(&doc, NULL, parser);
8060 		} else {
8061 			fts_tokenize_document_next(&doc, doc_len, NULL, parser);
8062 		}
8063 
8064 		exp = que_node_get_next(exp);
8065 
8066 		doc_len += (exp) ? len + 1 : len;
8067 
8068 		field_no++;
8069 	}
8070 
8071 	fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
8072 
8073 	fts_doc_free(&doc);
8074 
8075 	cache->added++;
8076 
8077 	if (doc_id >= cache->next_doc_id) {
8078 		cache->next_doc_id = doc_id + 1;
8079 	}
8080 
8081 	return(TRUE);
8082 }
8083 
8084 /**********************************************************************//**
8085 This function brings FTS index in sync when FTS index is first
8086 used. There are documents that have not yet sync-ed to auxiliary
8087 tables from last server abnormally shutdown, we will need to bring
8088 such document into FTS cache before any further operations
8089 @return TRUE if all OK */
8090 ibool
fts_init_index(dict_table_t * table,ibool has_cache_lock)8091 fts_init_index(
8092 /*===========*/
8093 	dict_table_t*	table,		/*!< in: Table with FTS */
8094 	ibool		has_cache_lock)	/*!< in: Whether we already have
8095 					cache lock */
8096 {
8097 	dict_index_t*   index;
8098 	doc_id_t        start_doc;
8099 	fts_get_doc_t*  get_doc = NULL;
8100 	fts_cache_t*    cache = table->fts->cache;
8101 	bool		need_init = false;
8102 
8103 	ut_ad(!mutex_own(&dict_sys->mutex));
8104 
8105 	/* First check cache->get_docs is initialized */
8106 	if (!has_cache_lock) {
8107 		rw_lock_x_lock(&cache->lock);
8108 	}
8109 
8110 	rw_lock_x_lock(&cache->init_lock);
8111 	if (cache->get_docs == NULL) {
8112 		cache->get_docs = fts_get_docs_create(cache);
8113 	}
8114 	rw_lock_x_unlock(&cache->init_lock);
8115 
8116 	if (table->fts->fts_status & ADDED_TABLE_SYNCED) {
8117 		goto func_exit;
8118 	}
8119 
8120 	need_init = true;
8121 
8122 	start_doc = cache->synced_doc_id;
8123 
8124 	if (!start_doc) {
8125 		fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
8126 		cache->synced_doc_id = start_doc;
8127 	}
8128 
8129 	/* No FTS index, this is the case when previous FTS index
8130 	dropped, and we re-initialize the Doc ID system for subsequent
8131 	insertion */
8132 	if (ib_vector_is_empty(cache->get_docs)) {
8133 		index = table->fts_doc_id_index;
8134 
8135 		ut_a(index);
8136 
8137 		fts_doc_fetch_by_doc_id(NULL, start_doc, index,
8138 					FTS_FETCH_DOC_BY_ID_LARGE,
8139 					fts_init_get_doc_id, cache);
8140 	} else {
8141 		if (table->fts->cache->stopword_info.status
8142 		    & STOPWORD_NOT_INIT) {
8143 			fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE);
8144 		}
8145 
8146 		for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
8147 			get_doc = static_cast<fts_get_doc_t*>(
8148 				ib_vector_get(cache->get_docs, i));
8149 
8150 			index = get_doc->index_cache->index;
8151 
8152 			fts_doc_fetch_by_doc_id(NULL, start_doc, index,
8153 						FTS_FETCH_DOC_BY_ID_LARGE,
8154 						fts_init_recover_doc, get_doc);
8155 		}
8156 	}
8157 
8158 	table->fts->fts_status |= ADDED_TABLE_SYNCED;
8159 
8160 	fts_get_docs_clear(cache->get_docs);
8161 
8162 func_exit:
8163 	if (!has_cache_lock) {
8164 		rw_lock_x_unlock(&cache->lock);
8165 	}
8166 
8167 	if (need_init) {
8168 		mutex_enter(&dict_sys->mutex);
8169 		/* Register the table with the optimize thread. */
8170 		fts_optimize_add_table(table);
8171 		mutex_exit(&dict_sys->mutex);
8172 	}
8173 
8174 	return(TRUE);
8175 }
8176 
8177 /** Check if the all the auxillary tables associated with FTS index are in
8178 consistent state. For now consistency is check only by ensuring
8179 index->page_no != FIL_NULL
8180 @param[out]	base_table	table has host fts index
8181 @param[in,out]	trx		trx handler */
8182 void
fts_check_corrupt(dict_table_t * base_table,trx_t * trx)8183 fts_check_corrupt(
8184 	dict_table_t*	base_table,
8185 	trx_t*		trx)
8186 {
8187 	bool		sane = true;
8188 	fts_table_t	fts_table;
8189 
8190 	/* Iterate over the common table and check for their sanity. */
8191 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, base_table);
8192 
8193 	for (ulint i = 0; fts_common_tables[i] != NULL && sane; ++i) {
8194 
8195 		char	table_name[MAX_FULL_NAME_LEN];
8196 
8197 		fts_table.suffix = fts_common_tables[i];
8198 		fts_get_table_name(&fts_table, table_name);
8199 
8200 		dict_table_t*	aux_table = dict_table_open_on_name(
8201 			table_name, true, FALSE, DICT_ERR_IGNORE_NONE);
8202 
8203 		if (aux_table == NULL) {
8204 			dict_set_corrupted(
8205 				dict_table_get_first_index(base_table),
8206 				trx, "FTS_SANITY_CHECK");
8207 			ut_ad(base_table->corrupted == TRUE);
8208 			sane = false;
8209 			continue;
8210 		}
8211 
8212 		for (dict_index_t*	aux_table_index =
8213 			UT_LIST_GET_FIRST(aux_table->indexes);
8214 		     aux_table_index != NULL;
8215 		     aux_table_index =
8216 			UT_LIST_GET_NEXT(indexes, aux_table_index)) {
8217 
8218 			/* Check if auxillary table needed for FTS is sane. */
8219 			if (aux_table_index->page == FIL_NULL) {
8220 				dict_set_corrupted(
8221 					dict_table_get_first_index(base_table),
8222 					trx, "FTS_SANITY_CHECK");
8223 				ut_ad(base_table->corrupted == TRUE);
8224 				sane = false;
8225 			}
8226 		}
8227 
8228 		dict_table_close(aux_table, FALSE, FALSE);
8229 	}
8230 }
8231