1 /*****************************************************************************
2 
3 Copyright (c) 2011, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file fts/fts0fts.cc
29 Full Text Search interface
30 ***********************************************************************/
31 
32 #include "trx0roll.h"
33 #include "row0mysql.h"
34 #include "row0upd.h"
35 #include "dict0types.h"
36 #include "dict0stats_bg.h"
37 #include "row0sel.h"
38 
39 #include "fts0fts.h"
40 #include "fts0priv.h"
41 #include "fts0types.h"
42 
43 #include "fts0types.ic"
44 #include "fts0vlc.ic"
45 #include "dict0priv.h"
46 #include "dict0stats.h"
47 #include "btr0pcur.h"
48 #include <vector>
49 
50 #include "ha_prototypes.h"
51 
52 #define FTS_MAX_ID_LEN	32
53 
54 /** Column name from the FTS config table */
55 #define FTS_MAX_CACHE_SIZE_IN_MB	"cache_size_in_mb"
56 
57 /** Verify if a aux table name is a obsolete table
58 by looking up the key word in the obsolete table names */
59 #define FTS_IS_OBSOLETE_AUX_TABLE(table_name)			\
60 	(strstr((table_name), "DOC_ID") != NULL			\
61 	 || strstr((table_name), "ADDED") != NULL		\
62 	 || strstr((table_name), "STOPWORDS") != NULL)
63 
64 /** This is maximum FTS cache for each table and would be
65 a configurable variable */
66 UNIV_INTERN ulong	fts_max_cache_size;
67 
68 /** Whether the total memory used for FTS cache is exhausted, and we will
69 need a sync to free some memory */
70 UNIV_INTERN bool       fts_need_sync = false;
71 
72 /** Variable specifying the total memory allocated for FTS cache */
73 UNIV_INTERN ulong      fts_max_total_cache_size;
74 
75 /** This is FTS result cache limit for each query and would be
76 a configurable variable */
77 UNIV_INTERN ulong	fts_result_cache_limit;
78 
79 /** Variable specifying the maximum FTS max token size */
80 UNIV_INTERN ulong	fts_max_token_size;
81 
82 /** Variable specifying the minimum FTS max token size */
83 UNIV_INTERN ulong	fts_min_token_size;
84 
85 
86 // FIXME: testing
87 ib_time_t elapsed_time = 0;
88 ulint n_nodes = 0;
89 
90 /** Error condition reported by fts_utf8_decode() */
91 const ulint UTF8_ERROR = 0xFFFFFFFF;
92 
93 #ifdef FTS_CACHE_SIZE_DEBUG
94 /** The cache size permissible lower limit (1K) */
95 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
96 
97 /** The cache size permissible upper limit (1G) */
98 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
99 #endif /* FTS_CACHE_SIZE_DEBUG */
100 
101 /** Time to sleep after DEADLOCK error before retrying operation. */
102 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
103 
104 #ifdef UNIV_PFS_RWLOCK
105 UNIV_INTERN mysql_pfs_key_t	fts_cache_rw_lock_key;
106 UNIV_INTERN mysql_pfs_key_t	fts_cache_init_rw_lock_key;
107 #endif /* UNIV_PFS_RWLOCK */
108 
109 #ifdef UNIV_PFS_MUTEX
110 UNIV_INTERN mysql_pfs_key_t	fts_delete_mutex_key;
111 UNIV_INTERN mysql_pfs_key_t	fts_optimize_mutex_key;
112 UNIV_INTERN mysql_pfs_key_t	fts_bg_threads_mutex_key;
113 UNIV_INTERN mysql_pfs_key_t	fts_doc_id_mutex_key;
114 UNIV_INTERN mysql_pfs_key_t	fts_pll_tokenize_mutex_key;
115 #endif /* UNIV_PFS_MUTEX */
116 
117 /** variable to record innodb_fts_internal_tbl_name for information
118 schema table INNODB_FTS_INSERTED etc. */
119 UNIV_INTERN char* fts_internal_tbl_name		= NULL;
120 
121 /** InnoDB default stopword list:
122 There are different versions of stopwords, the stop words listed
123 below comes from "Google Stopword" list. Reference:
124 http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
125 The final version of InnoDB default stopword list is still pending
126 for decision */
127 const char *fts_default_stopword[] =
128 {
129 	"a",
130 	"about",
131 	"an",
132 	"are",
133 	"as",
134 	"at",
135 	"be",
136 	"by",
137 	"com",
138 	"de",
139 	"en",
140 	"for",
141 	"from",
142 	"how",
143 	"i",
144 	"in",
145 	"is",
146 	"it",
147 	"la",
148 	"of",
149 	"on",
150 	"or",
151 	"that",
152 	"the",
153 	"this",
154 	"to",
155 	"was",
156 	"what",
157 	"when",
158 	"where",
159 	"who",
160 	"will",
161 	"with",
162 	"und",
163 	"the",
164 	"www",
165 	NULL
166 };
167 
168 /** For storing table info when checking for orphaned tables. */
169 struct fts_aux_table_t {
170 	table_id_t	id;		/*!< Table id */
171 	table_id_t	parent_id;	/*!< Parent table id */
172 	table_id_t	index_id;	/*!< Table FT index id */
173 	char*		name;		/*!< Name of the table */
174 };
175 
176 /** SQL statements for creating the ancillary common FTS tables. */
177 static const char* fts_create_common_tables_sql = {
178 	"BEGIN\n"
179 	""
180 	"CREATE TABLE \"%s_DELETED\" (\n"
181 	"  doc_id BIGINT UNSIGNED\n"
182 	") COMPACT;\n"
183 	"CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_DELETED\"(doc_id);\n"
184 	""
185 	"CREATE TABLE \"%s_DELETED_CACHE\" (\n"
186 	"  doc_id BIGINT UNSIGNED\n"
187 	") COMPACT;\n"
188 	"CREATE UNIQUE CLUSTERED INDEX IND "
189 		"ON \"%s_DELETED_CACHE\"(doc_id);\n"
190 	""
191 	"CREATE TABLE \"%s_BEING_DELETED\" (\n"
192 	"  doc_id BIGINT UNSIGNED\n"
193 	") COMPACT;\n"
194 	"CREATE UNIQUE CLUSTERED INDEX IND "
195 		"ON \"%s_BEING_DELETED\"(doc_id);\n"
196 	""
197 	"CREATE TABLE \"%s_BEING_DELETED_CACHE\" (\n"
198 	"  doc_id BIGINT UNSIGNED\n"
199 	") COMPACT;\n"
200 	"CREATE UNIQUE CLUSTERED INDEX IND "
201 		"ON \"%s_BEING_DELETED_CACHE\"(doc_id);\n"
202 	""
203 	"CREATE TABLE \"%s_CONFIG\" (\n"
204 	"  key CHAR(50),\n"
205 	"  value CHAR(200) NOT NULL\n"
206 	") COMPACT;\n"
207 	"CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_CONFIG\"(key);\n"
208 };
209 
210 #ifdef FTS_DOC_STATS_DEBUG
211 /** Template for creating the FTS auxiliary index specific tables. This is
212 mainly designed for the statistics work in the future */
213 static const char* fts_create_index_tables_sql = {
214 	"BEGIN\n"
215 	""
216 	"CREATE TABLE \"%s_DOC_ID\" (\n"
217 	"   doc_id BIGINT UNSIGNED,\n"
218 	"   word_count INTEGER UNSIGNED NOT NULL\n"
219 	") COMPACT;\n"
220 	"CREATE UNIQUE CLUSTERED INDEX IND ON \"%s_DOC_ID\"(doc_id);\n"
221 };
222 #endif
223 
224 /** Template for creating the ancillary FTS tables word index tables. */
225 static const char* fts_create_index_sql = {
226 	"BEGIN\n"
227 	""
228 	"CREATE UNIQUE CLUSTERED INDEX FTS_INDEX_TABLE_IND "
229 		"ON \"%s\"(word, first_doc_id);\n"
230 };
231 
232 /** FTS auxiliary table suffixes that are common to all FT indexes. */
233 static const char* fts_common_tables[] = {
234 	"BEING_DELETED",
235 	"BEING_DELETED_CACHE",
236 	"CONFIG",
237 	"DELETED",
238 	"DELETED_CACHE",
239 	NULL
240 };
241 
242 /** FTS auxiliary INDEX split intervals. */
243 const  fts_index_selector_t fts_index_selector[] = {
244 	{ 9, "INDEX_1" },
245 	{ 65, "INDEX_2" },
246 	{ 70, "INDEX_3" },
247 	{ 75, "INDEX_4" },
248 	{ 80, "INDEX_5" },
249 	{ 85, "INDEX_6" },
250 	{  0 , NULL	 }
251 };
252 
253 /** Default config values for FTS indexes on a table. */
254 static const char* fts_config_table_insert_values_sql =
255 	"BEGIN\n"
256 	"\n"
257 	"INSERT INTO \"%s\" VALUES('"
258 		FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
259 	""
260 	"INSERT INTO \"%s\" VALUES('"
261 		FTS_OPTIMIZE_LIMIT_IN_SECS  "', '180');\n"
262 	""
263 	"INSERT INTO \"%s\" VALUES ('"
264 		FTS_SYNCED_DOC_ID "', '0');\n"
265 	""
266 	"INSERT INTO \"%s\" VALUES ('"
267 		FTS_TOTAL_DELETED_COUNT "', '0');\n"
268 	"" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
269 	"INSERT INTO \"%s\" VALUES ('"
270 		FTS_TABLE_STATE "', '0');\n";
271 
272 /** Run SYNC on the table, i.e., write out data from the cache to the
273 FTS auxiliary INDEX table and clear the cache at the end.
274 @param[in,out]	sync		sync state
275 @param[in]	unlock_cache	whether unlock cache lock when write node
276 @param[in]	wait		whether wait when a sync is in progress
277 @param[in]      has_dict        whether has dict operation lock
278 @return DB_SUCCESS if all OK */
279 static
280 dberr_t
281 fts_sync(
282 	fts_sync_t*	sync,
283 	bool		unlock_cache,
284 	bool		wait,
285 	bool		has_dict);
286 
287 /****************************************************************//**
288 Release all resources help by the words rb tree e.g., the node ilist. */
289 static
290 void
291 fts_words_free(
292 /*===========*/
293 	ib_rbt_t*	words)		/*!< in: rb tree of words */
294 	MY_ATTRIBUTE((nonnull));
295 #ifdef FTS_CACHE_SIZE_DEBUG
296 /****************************************************************//**
297 Read the max cache size parameter from the config table. */
298 static
299 void
300 fts_update_max_cache_size(
301 /*======================*/
302 	fts_sync_t*	sync);		/*!< in: sync state */
303 #endif
304 
305 /*********************************************************************//**
306 This function fetches the document just inserted right before
307 we commit the transaction, and tokenize the inserted text data
308 and insert into FTS auxiliary table and its cache.
309 @return TRUE if successful */
310 static
311 ulint
312 fts_add_doc_by_id(
313 /*==============*/
314 	fts_trx_table_t*ftt,		/*!< in: FTS trx table */
315 	doc_id_t	doc_id,		/*!< in: doc id */
316 	ib_vector_t*	fts_indexes MY_ATTRIBUTE((unused)));
317 					/*!< in: affected fts indexes */
318 #ifdef FTS_DOC_STATS_DEBUG
319 /****************************************************************//**
320 Check whether a particular word (term) exists in the FTS index.
321 @return DB_SUCCESS if all went fine */
322 static
323 dberr_t
324 fts_is_word_in_index(
325 /*=================*/
326 	trx_t*		trx,		/*!< in: FTS query state */
327 	que_t**		graph,		/*!< out: Query graph */
328 	fts_table_t*	fts_table,	/*!< in: table instance */
329 	const fts_string_t* word,	/*!< in: the word to check */
330 	ibool*		found)		/*!< out: TRUE if exists */
331 	MY_ATTRIBUTE((nonnull, warn_unused_result));
332 #endif /* FTS_DOC_STATS_DEBUG */
333 
334 /******************************************************************//**
335 Update the last document id. This function could create a new
336 transaction to update the last document id.
337 @return DB_SUCCESS if OK */
338 static
339 dberr_t
340 fts_update_sync_doc_id(
341 /*===================*/
342 	const dict_table_t*	table,		/*!< in: table */
343 	const char*		table_name,	/*!< in: table name, or NULL */
344 	doc_id_t		doc_id,		/*!< in: last document id */
345 	trx_t*			trx)		/*!< in: update trx, or NULL */
346 	MY_ATTRIBUTE((nonnull(1)));
347 
348 /****************************************************************//**
349 This function loads the default InnoDB stopword list */
350 static
351 void
fts_load_default_stopword(fts_stopword_t * stopword_info)352 fts_load_default_stopword(
353 /*======================*/
354 	fts_stopword_t*		stopword_info)	/*!< in: stopword info */
355 {
356 	fts_string_t		str;
357 	mem_heap_t*		heap;
358 	ib_alloc_t*		allocator;
359 	ib_rbt_t*		stop_words;
360 
361 	allocator = stopword_info->heap;
362 	heap = static_cast<mem_heap_t*>(allocator->arg);
363 
364 	if (!stopword_info->cached_stopword) {
365 		/* For default stopword, we always use fts_utf8_string_cmp() */
366 		stopword_info->cached_stopword = rbt_create(
367 			sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp);
368 	}
369 
370 	stop_words = stopword_info->cached_stopword;
371 
372 	str.f_n_char = 0;
373 
374 	for (ulint i = 0; fts_default_stopword[i]; ++i) {
375 		char*			word;
376 		fts_tokenizer_word_t	new_word;
377 
378 		/* We are going to duplicate the value below. */
379 		word = const_cast<char*>(fts_default_stopword[i]);
380 
381 		new_word.nodes = ib_vector_create(
382 			allocator, sizeof(fts_node_t), 4);
383 
384 		str.f_len = ut_strlen(word);
385 		str.f_str = reinterpret_cast<byte*>(word);
386 
387 		fts_utf8_string_dup(&new_word.text, &str, heap);
388 
389 		rbt_insert(stop_words, &new_word, &new_word);
390 	}
391 
392 	stopword_info->status = STOPWORD_FROM_DEFAULT;
393 }
394 
395 /****************************************************************//**
396 Callback function to read a single stopword value.
397 @return Always return TRUE */
398 static
399 ibool
fts_read_stopword(void * row,void * user_arg)400 fts_read_stopword(
401 /*==============*/
402 	void*		row,		/*!< in: sel_node_t* */
403 	void*		user_arg)	/*!< in: pointer to ib_vector_t */
404 {
405 	ib_alloc_t*	allocator;
406 	fts_stopword_t*	stopword_info;
407 	sel_node_t*	sel_node;
408 	que_node_t*	exp;
409 	ib_rbt_t*	stop_words;
410 	dfield_t*	dfield;
411 	fts_string_t	str;
412 	mem_heap_t*	heap;
413 	ib_rbt_bound_t	parent;
414 
415 	sel_node = static_cast<sel_node_t*>(row);
416 	stopword_info = static_cast<fts_stopword_t*>(user_arg);
417 
418 	stop_words = stopword_info->cached_stopword;
419 	allocator =  static_cast<ib_alloc_t*>(stopword_info->heap);
420 	heap = static_cast<mem_heap_t*>(allocator->arg);
421 
422 	exp = sel_node->select_list;
423 
424 	/* We only need to read the first column */
425 	dfield = que_node_get_val(exp);
426 
427 	str.f_n_char = 0;
428 	str.f_str = static_cast<byte*>(dfield_get_data(dfield));
429 	str.f_len = dfield_get_len(dfield);
430 
431 	/* Only create new node if it is a value not already existed */
432 	if (str.f_len != UNIV_SQL_NULL
433 	    && rbt_search(stop_words, &parent, &str) != 0) {
434 
435 		fts_tokenizer_word_t	new_word;
436 
437 		new_word.nodes = ib_vector_create(
438 			allocator, sizeof(fts_node_t), 4);
439 
440 		new_word.text.f_str = static_cast<byte*>(
441 			 mem_heap_alloc(heap, str.f_len + 1));
442 
443 		memcpy(new_word.text.f_str, str.f_str, str.f_len);
444 
445 		new_word.text.f_n_char = 0;
446 		new_word.text.f_len = str.f_len;
447 		new_word.text.f_str[str.f_len] = 0;
448 
449 		rbt_insert(stop_words, &new_word, &new_word);
450 	}
451 
452 	return(TRUE);
453 }
454 
455 /******************************************************************//**
456 Load user defined stopword from designated user table
457 @return TRUE if load operation is successful */
458 static
459 ibool
fts_load_user_stopword(fts_t * fts,const char * stopword_table_name,fts_stopword_t * stopword_info)460 fts_load_user_stopword(
461 /*===================*/
462 	fts_t*		fts,			/*!< in: FTS struct */
463 	const char*	stopword_table_name,	/*!< in: Stopword table
464 						name */
465 	fts_stopword_t*	stopword_info)		/*!< in: Stopword info */
466 {
467 	pars_info_t*	info;
468 	que_t*		graph;
469 	dberr_t		error = DB_SUCCESS;
470 	ibool		ret = TRUE;
471 	trx_t*		trx;
472 	ibool		has_lock = fts->fts_status & TABLE_DICT_LOCKED;
473 
474 	trx = trx_allocate_for_background();
475 	trx->op_info = "Load user stopword table into FTS cache";
476 
477 	if (!has_lock) {
478 		mutex_enter(&dict_sys->mutex);
479 	}
480 
481 	/* Validate the user table existence and in the right
482 	format */
483 	stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
484 	if (!stopword_info->charset) {
485 		ret = FALSE;
486 		goto cleanup;
487 	} else if (!stopword_info->cached_stopword) {
488 		/* Create the stopword RB tree with the stopword column
489 		charset. All comparison will use this charset */
490 		stopword_info->cached_stopword = rbt_create_arg_cmp(
491 			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
492 			stopword_info->charset);
493 
494 	}
495 
496 	info = pars_info_create();
497 
498 	pars_info_bind_id(info, TRUE, "table_stopword", stopword_table_name);
499 
500 	pars_info_bind_function(info, "my_func", fts_read_stopword,
501 				stopword_info);
502 
503 	graph = fts_parse_sql_no_dict_lock(
504 		NULL,
505 		info,
506 		"DECLARE FUNCTION my_func;\n"
507 		"DECLARE CURSOR c IS"
508 		" SELECT value "
509 		" FROM $table_stopword;\n"
510 		"BEGIN\n"
511 		"\n"
512 		"OPEN c;\n"
513 		"WHILE 1 = 1 LOOP\n"
514 		"  FETCH c INTO my_func();\n"
515 		"  IF c % NOTFOUND THEN\n"
516 		"    EXIT;\n"
517 		"  END IF;\n"
518 		"END LOOP;\n"
519 		"CLOSE c;");
520 
521 	for (;;) {
522 		error = fts_eval_sql(trx, graph);
523 
524 		if (error == DB_SUCCESS) {
525 			fts_sql_commit(trx);
526 			stopword_info->status = STOPWORD_USER_TABLE;
527 			break;
528 		} else {
529 
530 			fts_sql_rollback(trx);
531 
532 			ut_print_timestamp(stderr);
533 
534 			if (error == DB_LOCK_WAIT_TIMEOUT) {
535 				fprintf(stderr, "  InnoDB: Warning: lock wait "
536 					"timeout reading user stopword table. "
537 					"Retrying!\n");
538 
539 				trx->error_state = DB_SUCCESS;
540 			} else {
541 				fprintf(stderr, "  InnoDB: Error '%s' "
542 					"while reading user stopword table.\n",
543 					ut_strerr(error));
544 				ret = FALSE;
545 				break;
546 			}
547 		}
548 	}
549 
550 	que_graph_free(graph);
551 
552 cleanup:
553 	if (!has_lock) {
554 		mutex_exit(&dict_sys->mutex);
555 	}
556 
557 	trx_free_for_background(trx);
558 	return(ret);
559 }
560 
561 /******************************************************************//**
562 Initialize the index cache. */
563 static
564 void
fts_index_cache_init(ib_alloc_t * allocator,fts_index_cache_t * index_cache)565 fts_index_cache_init(
566 /*=================*/
567 	ib_alloc_t*		allocator,	/*!< in: the allocator to use */
568 	fts_index_cache_t*	index_cache)	/*!< in: index cache */
569 {
570 	ulint			i;
571 
572 	ut_a(index_cache->words == NULL);
573 
574 	index_cache->words = rbt_create_arg_cmp(
575 		sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
576 		index_cache->charset);
577 
578 	ut_a(index_cache->doc_stats == NULL);
579 
580 	index_cache->doc_stats = ib_vector_create(
581 		allocator, sizeof(fts_doc_stats_t), 4);
582 
583 	for (i = 0; fts_index_selector[i].value; ++i) {
584 		ut_a(index_cache->ins_graph[i] == NULL);
585 		ut_a(index_cache->sel_graph[i] == NULL);
586 	}
587 }
588 
589 /*********************************************************************//**
590 Initialize FTS cache. */
591 UNIV_INTERN
592 void
fts_cache_init(fts_cache_t * cache)593 fts_cache_init(
594 /*===========*/
595 	fts_cache_t*	cache)		/*!< in: cache to initialize */
596 {
597 	ulint		i;
598 
599 	/* Just to make sure */
600 	ut_a(cache->sync_heap->arg == NULL);
601 
602 	cache->sync_heap->arg = mem_heap_create(1024);
603 
604 	cache->total_size = 0;
605 
606 	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
607 	cache->deleted_doc_ids = ib_vector_create(
608 		cache->sync_heap, sizeof(fts_update_t), 4);
609 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
610 
611 	/* Reset the cache data for all the FTS indexes. */
612 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
613 		fts_index_cache_t*	index_cache;
614 
615 		index_cache = static_cast<fts_index_cache_t*>(
616 			ib_vector_get(cache->indexes, i));
617 
618 		fts_index_cache_init(cache->sync_heap, index_cache);
619 	}
620 }
621 
622 /****************************************************************//**
623 Create a FTS cache. */
624 UNIV_INTERN
625 fts_cache_t*
fts_cache_create(dict_table_t * table)626 fts_cache_create(
627 /*=============*/
628 	dict_table_t*	table)	/*!< in: table owns the FTS cache */
629 {
630 	mem_heap_t*	heap;
631 	fts_cache_t*	cache;
632 
633 	heap = static_cast<mem_heap_t*>(mem_heap_create(512));
634 
635 	cache = static_cast<fts_cache_t*>(
636 		mem_heap_zalloc(heap, sizeof(*cache)));
637 
638 	cache->cache_heap = heap;
639 
640 	rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
641 
642 	rw_lock_create(
643 		fts_cache_init_rw_lock_key, &cache->init_lock,
644 		SYNC_FTS_CACHE_INIT);
645 
646 	mutex_create(
647 		fts_delete_mutex_key, &cache->deleted_lock, SYNC_FTS_OPTIMIZE);
648 
649 	mutex_create(
650 		fts_optimize_mutex_key, &cache->optimize_lock,
651 		SYNC_FTS_OPTIMIZE);
652 
653 	mutex_create(
654 		fts_doc_id_mutex_key, &cache->doc_id_lock, SYNC_FTS_OPTIMIZE);
655 
656 	/* This is the heap used to create the cache itself. */
657 	cache->self_heap = ib_heap_allocator_create(heap);
658 
659 	/* This is a transient heap, used for storing sync data. */
660 	cache->sync_heap = ib_heap_allocator_create(heap);
661 	cache->sync_heap->arg = NULL;
662 
663 	fts_need_sync = false;
664 
665 	cache->sync = static_cast<fts_sync_t*>(
666 		mem_heap_zalloc(heap, sizeof(fts_sync_t)));
667 
668 	cache->sync->table = table;
669 	cache->sync->event = os_event_create();
670 
671 	/* Create the index cache vector that will hold the inverted indexes. */
672 	cache->indexes = ib_vector_create(
673 		cache->self_heap, sizeof(fts_index_cache_t), 2);
674 
675 	fts_cache_init(cache);
676 
677 	cache->stopword_info.cached_stopword = NULL;
678 	cache->stopword_info.charset = NULL;
679 
680 	cache->stopword_info.heap = cache->self_heap;
681 
682 	cache->stopword_info.status = STOPWORD_NOT_INIT;
683 
684 	return(cache);
685 }
686 
687 /*******************************************************************//**
688 Add a newly create index into FTS cache */
689 UNIV_INTERN
690 void
fts_add_index(dict_index_t * index,dict_table_t * table)691 fts_add_index(
692 /*==========*/
693 	dict_index_t*	index,		/*!< FTS index to be added */
694 	dict_table_t*	table)		/*!< table */
695 {
696 	fts_t*			fts = table->fts;
697 	fts_cache_t*		cache;
698 	fts_index_cache_t*	index_cache;
699 
700 	ut_ad(fts);
701 	cache = table->fts->cache;
702 
703 	rw_lock_x_lock(&cache->init_lock);
704 
705 	ib_vector_push(fts->indexes, &index);
706 
707 	index_cache = fts_find_index_cache(cache, index);
708 
709 	if (!index_cache) {
710 		/* Add new index cache structure */
711 		index_cache = fts_cache_index_cache_create(table, index);
712 	}
713 
714 	rw_lock_x_unlock(&cache->init_lock);
715 }
716 
717 /*******************************************************************//**
718 recalibrate get_doc structure after index_cache in cache->indexes changed */
719 static
720 void
fts_reset_get_doc(fts_cache_t * cache)721 fts_reset_get_doc(
722 /*==============*/
723 	fts_cache_t*	cache)	/*!< in: FTS index cache */
724 {
725 	fts_get_doc_t*  get_doc;
726 	ulint		i;
727 
728 #ifdef UNIV_SYNC_DEBUG
729 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
730 #endif
731 	ib_vector_reset(cache->get_docs);
732 
733 	for (i = 0; i < ib_vector_size(cache->indexes); i++) {
734 		fts_index_cache_t*	ind_cache;
735 
736 		ind_cache = static_cast<fts_index_cache_t*>(
737 			ib_vector_get(cache->indexes, i));
738 
739 		get_doc = static_cast<fts_get_doc_t*>(
740 			ib_vector_push(cache->get_docs, NULL));
741 
742 		memset(get_doc, 0x0, sizeof(*get_doc));
743 
744 		get_doc->index_cache = ind_cache;
745 	}
746 
747 	ut_ad(ib_vector_size(cache->get_docs)
748 	      == ib_vector_size(cache->indexes));
749 }
750 
751 /*******************************************************************//**
752 Check an index is in the table->indexes list
753 @return TRUE if it exists */
754 static
755 ibool
fts_in_dict_index(dict_table_t * table,dict_index_t * index_check)756 fts_in_dict_index(
757 /*==============*/
758 	dict_table_t*	table,		/*!< in: Table */
759 	dict_index_t*	index_check)	/*!< in: index to be checked */
760 {
761 	dict_index_t*	index;
762 
763 	for (index = dict_table_get_first_index(table);
764 	     index != NULL;
765 	     index = dict_table_get_next_index(index)) {
766 
767 		if (index == index_check) {
768 			return(TRUE);
769 		}
770 	}
771 
772 	return(FALSE);
773 }
774 
775 /*******************************************************************//**
776 Check an index is in the fts->cache->indexes list
777 @return TRUE if it exists */
778 static
779 ibool
fts_in_index_cache(dict_table_t * table,dict_index_t * index)780 fts_in_index_cache(
781 /*===============*/
782 	dict_table_t*	table,	/*!< in: Table */
783 	dict_index_t*	index)	/*!< in: index to be checked */
784 {
785 	ulint	i;
786 
787 	for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
788 		fts_index_cache_t*      index_cache;
789 
790 		index_cache = static_cast<fts_index_cache_t*>(
791 			ib_vector_get(table->fts->cache->indexes, i));
792 
793 		if (index_cache->index == index) {
794 			return(TRUE);
795 		}
796 	}
797 
798 	return(FALSE);
799 }
800 
801 /*******************************************************************//**
802 Check indexes in the fts->indexes is also present in index cache and
803 table->indexes list
804 @return TRUE if all indexes match */
805 UNIV_INTERN
806 ibool
fts_check_cached_index(dict_table_t * table)807 fts_check_cached_index(
808 /*===================*/
809 	dict_table_t*	table)	/*!< in: Table where indexes are dropped */
810 {
811 	ulint	i;
812 
813 	if (!table->fts || !table->fts->cache) {
814 		return(TRUE);
815 	}
816 
817 	ut_a(ib_vector_size(table->fts->indexes)
818 	      == ib_vector_size(table->fts->cache->indexes));
819 
820 	for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
821 		dict_index_t*	index;
822 
823 		index = static_cast<dict_index_t*>(
824 			ib_vector_getp(table->fts->indexes, i));
825 
826 		if (!fts_in_index_cache(table, index)) {
827 			return(FALSE);
828 		}
829 
830 		if (!fts_in_dict_index(table, index)) {
831 			return(FALSE);
832 		}
833 	}
834 
835 	return(TRUE);
836 }
837 
838 /*******************************************************************//**
839 Drop auxiliary tables related to an FTS index
840 @return DB_SUCCESS or error number */
841 UNIV_INTERN
842 dberr_t
fts_drop_index(dict_table_t * table,dict_index_t * index,trx_t * trx)843 fts_drop_index(
844 /*===========*/
845 	dict_table_t*	table,	/*!< in: Table where indexes are dropped */
846 	dict_index_t*	index,	/*!< in: Index to be dropped */
847 	trx_t*		trx)	/*!< in: Transaction for the drop */
848 {
849 	ib_vector_t*	indexes = table->fts->indexes;
850 	dberr_t		err = DB_SUCCESS;
851 
852 	ut_a(indexes);
853 
854 	if ((ib_vector_size(indexes) == 1
855 	    && (index == static_cast<dict_index_t*>(
856 			ib_vector_getp(table->fts->indexes, 0))))
857 	   || ib_vector_is_empty(indexes)) {
858 		doc_id_t	current_doc_id;
859 		doc_id_t	first_doc_id;
860 
861 		/* If we are dropping the only FTS index of the table,
862 		remove it from optimize thread */
863 		fts_optimize_remove_table(table);
864 
865 		DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
866 
867 		/* If Doc ID column is not added internally by FTS index,
868 		we can drop all FTS auxiliary tables. Otherwise, we will
869 		need to keep some common table such as CONFIG table, so
870 		as to keep track of incrementing Doc IDs */
871 		if (!DICT_TF2_FLAG_IS_SET(
872 			table, DICT_TF2_FTS_HAS_DOC_ID)) {
873 
874 			err = fts_drop_tables(trx, table);
875 
876 			err = fts_drop_index_tables(trx, index);
877 
878 			while (index->index_fts_syncing
879                                 && !trx_is_interrupted(trx)) {
880                                 DICT_BG_YIELD(trx);
881                         }
882 
883                         fts_free(table);
884 
885 			return(err);
886 		}
887 
888 		while (index->index_fts_syncing
889                         && !trx_is_interrupted(trx)) {
890                         DICT_BG_YIELD(trx);
891                 }
892 
893 		current_doc_id = table->fts->cache->next_doc_id;
894 		first_doc_id = table->fts->cache->first_doc_id;
895 		fts_cache_clear(table->fts->cache);
896 		fts_cache_destroy(table->fts->cache);
897 		table->fts->cache = fts_cache_create(table);
898 		table->fts->cache->next_doc_id = current_doc_id;
899 		table->fts->cache->first_doc_id = first_doc_id;
900 	} else {
901 		fts_cache_t*            cache = table->fts->cache;
902 		fts_index_cache_t*      index_cache;
903 
904 		rw_lock_x_lock(&cache->init_lock);
905 
906 		index_cache = fts_find_index_cache(cache, index);
907 
908 		if (index_cache != NULL) {
909 			while (index->index_fts_syncing
910                                 && !trx_is_interrupted(trx)) {
911                                 DICT_BG_YIELD(trx);
912                         }
913 			if (index_cache->words) {
914 				fts_words_free(index_cache->words);
915 				rbt_free(index_cache->words);
916 			}
917 
918 			ib_vector_remove(cache->indexes, *(void**) index_cache);
919 		}
920 
921 		if (cache->get_docs) {
922 			fts_reset_get_doc(cache);
923 		}
924 
925 		rw_lock_x_unlock(&cache->init_lock);
926 	}
927 
928 	err = fts_drop_index_tables(trx, index);
929 
930 	ib_vector_remove(indexes, (const void*) index);
931 
932 	return(err);
933 }
934 
935 /****************************************************************//**
936 Free the query graph but check whether dict_sys->mutex is already
937 held */
938 UNIV_INTERN
939 void
fts_que_graph_free_check_lock(fts_table_t * fts_table,const fts_index_cache_t * index_cache,que_t * graph)940 fts_que_graph_free_check_lock(
941 /*==========================*/
942 	fts_table_t*		fts_table,	/*!< in: FTS table */
943 	const fts_index_cache_t*index_cache,	/*!< in: FTS index cache */
944 	que_t*			graph)		/*!< in: query graph */
945 {
946 	ibool	has_dict = FALSE;
947 
948 	if (fts_table && fts_table->table) {
949 		ut_ad(fts_table->table->fts);
950 
951 		has_dict = fts_table->table->fts->fts_status
952 			 & TABLE_DICT_LOCKED;
953 	} else if (index_cache) {
954 		ut_ad(index_cache->index->table->fts);
955 
956 		has_dict = index_cache->index->table->fts->fts_status
957 			 & TABLE_DICT_LOCKED;
958 	}
959 
960 	if (!has_dict) {
961 		mutex_enter(&dict_sys->mutex);
962 	}
963 
964 	ut_ad(mutex_own(&dict_sys->mutex));
965 
966 	que_graph_free(graph);
967 
968 	if (!has_dict) {
969 		mutex_exit(&dict_sys->mutex);
970 	}
971 }
972 
973 /****************************************************************//**
974 Create an FTS index cache. */
975 UNIV_INTERN
976 CHARSET_INFO*
fts_index_get_charset(dict_index_t * index)977 fts_index_get_charset(
978 /*==================*/
979 	dict_index_t*		index)		/*!< in: FTS index */
980 {
981 	CHARSET_INFO*		charset = NULL;
982 	dict_field_t*		field;
983 	ulint			prtype;
984 
985 	field = dict_index_get_nth_field(index, 0);
986 	prtype = field->col->prtype;
987 
988 	charset = innobase_get_fts_charset(
989 		(int) (prtype & DATA_MYSQL_TYPE_MASK),
990 		(uint) dtype_get_charset_coll(prtype));
991 
992 #ifdef FTS_DEBUG
993 	/* Set up charset info for this index. Please note all
994 	field of the FTS index should have the same charset */
995 	for (i = 1; i < index->n_fields; i++) {
996 		CHARSET_INFO*   fld_charset;
997 
998 		field = dict_index_get_nth_field(index, i);
999 		prtype = field->col->prtype;
1000 
1001 		fld_charset = innobase_get_fts_charset(
1002 			(int)(prtype & DATA_MYSQL_TYPE_MASK),
1003 			(uint) dtype_get_charset_coll(prtype));
1004 
1005 		/* All FTS columns should have the same charset */
1006 		if (charset) {
1007 			ut_a(charset == fld_charset);
1008 		} else {
1009 			charset = fld_charset;
1010 		}
1011 	}
1012 #endif
1013 
1014 	return(charset);
1015 
1016 }
1017 /****************************************************************//**
1018 Create an FTS index cache.
1019 @return Index Cache */
1020 UNIV_INTERN
1021 fts_index_cache_t*
fts_cache_index_cache_create(dict_table_t * table,dict_index_t * index)1022 fts_cache_index_cache_create(
1023 /*=========================*/
1024 	dict_table_t*		table,		/*!< in: table with FTS index */
1025 	dict_index_t*		index)		/*!< in: FTS index */
1026 {
1027 	ulint			n_bytes;
1028 	fts_index_cache_t*	index_cache;
1029 	fts_cache_t*		cache = table->fts->cache;
1030 
1031 	ut_a(cache != NULL);
1032 
1033 #ifdef UNIV_SYNC_DEBUG
1034 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
1035 #endif
1036 
1037 	/* Must not already exist in the cache vector. */
1038 	ut_a(fts_find_index_cache(cache, index) == NULL);
1039 
1040 	index_cache = static_cast<fts_index_cache_t*>(
1041 		ib_vector_push(cache->indexes, NULL));
1042 
1043 	memset(index_cache, 0x0, sizeof(*index_cache));
1044 
1045 	index_cache->index = index;
1046 
1047 	index_cache->charset = fts_index_get_charset(index);
1048 
1049 	n_bytes = sizeof(que_t*) * sizeof(fts_index_selector);
1050 
1051 	index_cache->ins_graph = static_cast<que_t**>(
1052 		mem_heap_zalloc(static_cast<mem_heap_t*>(
1053 			cache->self_heap->arg), n_bytes));
1054 
1055 	index_cache->sel_graph = static_cast<que_t**>(
1056 		mem_heap_zalloc(static_cast<mem_heap_t*>(
1057 			cache->self_heap->arg), n_bytes));
1058 
1059 	fts_index_cache_init(cache->sync_heap, index_cache);
1060 
1061 	if (cache->get_docs) {
1062 		fts_reset_get_doc(cache);
1063 	}
1064 
1065 	return(index_cache);
1066 }
1067 
1068 /****************************************************************//**
1069 Release all resources help by the words rb tree e.g., the node ilist. */
1070 static
1071 void
fts_words_free(ib_rbt_t * words)1072 fts_words_free(
1073 /*===========*/
1074 	ib_rbt_t*	words)			/*!< in: rb tree of words */
1075 {
1076 	const ib_rbt_node_t*	rbt_node;
1077 
1078 	/* Free the resources held by a word. */
1079 	for (rbt_node = rbt_first(words);
1080 	     rbt_node != NULL;
1081 	     rbt_node = rbt_first(words)) {
1082 
1083 		ulint			i;
1084 		fts_tokenizer_word_t*	word;
1085 
1086 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
1087 
1088 		/* Free the ilists of this word. */
1089 		for (i = 0; i < ib_vector_size(word->nodes); ++i) {
1090 
1091 			fts_node_t* fts_node = static_cast<fts_node_t*>(
1092 				ib_vector_get(word->nodes, i));
1093 
1094 			ut_free(fts_node->ilist);
1095 			fts_node->ilist = NULL;
1096 		}
1097 
1098 		/* NOTE: We are responsible for free'ing the node */
1099 		ut_free(rbt_remove_node(words, rbt_node));
1100 	}
1101 }
1102 
1103 /** Clear cache.
1104 @param[in,out]	cache	fts cache */
1105 UNIV_INTERN
1106 void
fts_cache_clear(fts_cache_t * cache)1107 fts_cache_clear(
1108 	fts_cache_t*	cache)
1109 {
1110 	ulint		i;
1111 
1112 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1113 		ulint			j;
1114 		fts_index_cache_t*	index_cache;
1115 
1116 		index_cache = static_cast<fts_index_cache_t*>(
1117 			ib_vector_get(cache->indexes, i));
1118 
1119 		fts_words_free(index_cache->words);
1120 
1121 		rbt_free(index_cache->words);
1122 
1123 		index_cache->words = NULL;
1124 
1125 		for (j = 0; fts_index_selector[j].value; ++j) {
1126 
1127 			if (index_cache->ins_graph[j] != NULL) {
1128 
1129 				fts_que_graph_free_check_lock(
1130 					NULL, index_cache,
1131 					index_cache->ins_graph[j]);
1132 
1133 				index_cache->ins_graph[j] = NULL;
1134 			}
1135 
1136 			if (index_cache->sel_graph[j] != NULL) {
1137 
1138 				fts_que_graph_free_check_lock(
1139 					NULL, index_cache,
1140 					index_cache->sel_graph[j]);
1141 
1142 				index_cache->sel_graph[j] = NULL;
1143 			}
1144 		}
1145 
1146 		index_cache->doc_stats = NULL;
1147 	}
1148 
1149 	mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1150 	cache->sync_heap->arg = NULL;
1151 
1152 	cache->total_size = 0;
1153 
1154 	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
1155 	cache->deleted_doc_ids = NULL;
1156 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
1157 }
1158 
1159 /*********************************************************************//**
1160 Search the index specific cache for a particular FTS index.
1161 @return the index cache else NULL */
1162 UNIV_INLINE
1163 fts_index_cache_t*
fts_get_index_cache(fts_cache_t * cache,const dict_index_t * index)1164 fts_get_index_cache(
1165 /*================*/
1166 	fts_cache_t*		cache,		/*!< in: cache to search */
1167 	const dict_index_t*	index)		/*!< in: index to search for */
1168 {
1169 	ulint			i;
1170 
1171 #ifdef UNIV_SYNC_DEBUG
1172 	ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_EX)
1173 	      || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_EX));
1174 #endif
1175 
1176 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1177 		fts_index_cache_t*	index_cache;
1178 
1179 		index_cache = static_cast<fts_index_cache_t*>(
1180 			ib_vector_get(cache->indexes, i));
1181 
1182 		if (index_cache->index == index) {
1183 
1184 			return(index_cache);
1185 		}
1186 	}
1187 
1188 	return(NULL);
1189 }
1190 
1191 #ifdef FTS_DEBUG
1192 /*********************************************************************//**
1193 Search the index cache for a get_doc structure.
1194 @return the fts_get_doc_t item else NULL */
1195 static
1196 fts_get_doc_t*
fts_get_index_get_doc(fts_cache_t * cache,const dict_index_t * index)1197 fts_get_index_get_doc(
1198 /*==================*/
1199 	fts_cache_t*		cache,		/*!< in: cache to search */
1200 	const dict_index_t*	index)		/*!< in: index to search for */
1201 {
1202 	ulint			i;
1203 
1204 #ifdef UNIV_SYNC_DEBUG
1205 	ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_EX));
1206 #endif
1207 
1208 	for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
1209 		fts_get_doc_t*	get_doc;
1210 
1211 		get_doc = static_cast<fts_get_doc_t*>(
1212 			ib_vector_get(cache->get_docs, i));
1213 
1214 		if (get_doc->index_cache->index == index) {
1215 
1216 			return(get_doc);
1217 		}
1218 	}
1219 
1220 	return(NULL);
1221 }
1222 #endif
1223 
1224 /**********************************************************************//**
1225 Free the FTS cache. */
1226 UNIV_INTERN
1227 void
fts_cache_destroy(fts_cache_t * cache)1228 fts_cache_destroy(
1229 /*==============*/
1230 	fts_cache_t*	cache)			/*!< in: cache*/
1231 {
1232 	rw_lock_free(&cache->lock);
1233 	rw_lock_free(&cache->init_lock);
1234 	mutex_free(&cache->optimize_lock);
1235 	mutex_free(&cache->deleted_lock);
1236 	mutex_free(&cache->doc_id_lock);
1237 	os_event_free(cache->sync->event);
1238 
1239 	if (cache->stopword_info.cached_stopword) {
1240 		rbt_free(cache->stopword_info.cached_stopword);
1241 	}
1242 
1243 	if (cache->sync_heap->arg) {
1244 		mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1245 	}
1246 
1247 	mem_heap_free(cache->cache_heap);
1248 }
1249 
1250 /**********************************************************************//**
1251 Find an existing word, or if not found, create one and return it.
1252 @return specified word token */
1253 static
1254 fts_tokenizer_word_t*
fts_tokenizer_word_get(fts_cache_t * cache,fts_index_cache_t * index_cache,fts_string_t * text)1255 fts_tokenizer_word_get(
1256 /*===================*/
1257 	fts_cache_t*	cache,			/*!< in: cache */
1258 	fts_index_cache_t*
1259 			index_cache,		/*!< in: index cache */
1260 	fts_string_t*	text)			/*!< in: node text */
1261 {
1262 	fts_tokenizer_word_t*	word;
1263 	ib_rbt_bound_t		parent;
1264 
1265 #ifdef UNIV_SYNC_DEBUG
1266 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
1267 #endif
1268 
1269 	/* If it is a stopword, do not index it */
1270 	if (cache->stopword_info.cached_stopword != NULL
1271 	    && rbt_search(cache->stopword_info.cached_stopword,
1272 		       &parent, text) == 0) {
1273 
1274 		return(NULL);
1275 	}
1276 
1277 	/* Check if we found a match, if not then add word to tree. */
1278 	if (rbt_search(index_cache->words, &parent, text) != 0) {
1279 		mem_heap_t*		heap;
1280 		fts_tokenizer_word_t	new_word;
1281 
1282 		heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
1283 
1284 		new_word.nodes = ib_vector_create(
1285 			cache->sync_heap, sizeof(fts_node_t), 4);
1286 
1287 		fts_utf8_string_dup(&new_word.text, text, heap);
1288 
1289 		parent.last = rbt_add_node(
1290 			index_cache->words, &parent, &new_word);
1291 
1292 		/* Take into account the RB tree memory use and the vector. */
1293 		cache->total_size += sizeof(new_word)
1294 			+ sizeof(ib_rbt_node_t)
1295 			+ text->f_len
1296 			+ (sizeof(fts_node_t) * 4)
1297 			+ sizeof(*new_word.nodes);
1298 
1299 		ut_ad(rbt_validate(index_cache->words));
1300 	}
1301 
1302 	word = rbt_value(fts_tokenizer_word_t, parent.last);
1303 
1304 	return(word);
1305 }
1306 
1307 /**********************************************************************//**
1308 Add the given doc_id/word positions to the given node's ilist. */
1309 UNIV_INTERN
1310 void
fts_cache_node_add_positions(fts_cache_t * cache,fts_node_t * node,doc_id_t doc_id,ib_vector_t * positions)1311 fts_cache_node_add_positions(
1312 /*=========================*/
1313 	fts_cache_t*	cache,		/*!< in: cache */
1314 	fts_node_t*	node,		/*!< in: word node */
1315 	doc_id_t	doc_id,		/*!< in: doc id */
1316 	ib_vector_t*	positions)	/*!< in: fts_token_t::positions */
1317 {
1318 	ulint		i;
1319 	byte*		ptr;
1320 	byte*		ilist;
1321 	ulint		enc_len;
1322 	ulint		last_pos;
1323 	byte*		ptr_start;
1324 	ulint		doc_id_delta;
1325 
1326 #ifdef UNIV_SYNC_DEBUG
1327 	if (cache) {
1328 		ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
1329 	}
1330 #endif
1331 	ut_ad(doc_id >= node->last_doc_id);
1332 
1333 	/* Calculate the space required to store the ilist. */
1334 	doc_id_delta = (ulint)(doc_id - node->last_doc_id);
1335 	enc_len = fts_get_encoded_len(doc_id_delta);
1336 
1337 	last_pos = 0;
1338 	for (i = 0; i < ib_vector_size(positions); i++) {
1339 		ulint	pos = *(static_cast<ulint*>(
1340 			ib_vector_get(positions, i)));
1341 
1342 		ut_ad(last_pos == 0 || pos > last_pos);
1343 
1344 		enc_len += fts_get_encoded_len(pos - last_pos);
1345 		last_pos = pos;
1346 	}
1347 
1348 	/* The 0x00 byte at the end of the token positions list. */
1349 	enc_len++;
1350 
1351 	if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1352 		/* No need to allocate more space, we can fit in the new
1353 		data at the end of the old one. */
1354 		ilist = NULL;
1355 		ptr = node->ilist + node->ilist_size;
1356 	} else {
1357 		ulint	new_size = node->ilist_size + enc_len;
1358 
1359 		/* Over-reserve space by a fixed size for small lengths and
1360 		by 20% for lengths >= 48 bytes. */
1361 		if (new_size < 16) {
1362 			new_size = 16;
1363 		} else if (new_size < 32) {
1364 			new_size = 32;
1365 		} else if (new_size < 48) {
1366 			new_size = 48;
1367 		} else {
1368 			new_size = (ulint)(1.2 * new_size);
1369 		}
1370 
1371 		ilist = static_cast<byte*>(ut_malloc(new_size));
1372 		ptr = ilist + node->ilist_size;
1373 
1374 		node->ilist_size_alloc = new_size;
1375 	}
1376 
1377 	ptr_start = ptr;
1378 
1379 	/* Encode the new fragment. */
1380 	ptr += fts_encode_int(doc_id_delta, ptr);
1381 
1382 	last_pos = 0;
1383 	for (i = 0; i < ib_vector_size(positions); i++) {
1384 		ulint	pos = *(static_cast<ulint*>(
1385 			 ib_vector_get(positions, i)));
1386 
1387 		ptr += fts_encode_int(pos - last_pos, ptr);
1388 		last_pos = pos;
1389 	}
1390 
1391 	*ptr++ = 0;
1392 
1393 	ut_a(enc_len == (ulint)(ptr - ptr_start));
1394 
1395 	if (ilist) {
1396 		/* Copy old ilist to the start of the new one and switch the
1397 		new one into place in the node. */
1398 		if (node->ilist_size > 0) {
1399 			memcpy(ilist, node->ilist, node->ilist_size);
1400 			ut_free(node->ilist);
1401 		}
1402 
1403 		node->ilist = ilist;
1404 	}
1405 
1406 	node->ilist_size += enc_len;
1407 
1408 	if (cache) {
1409 		cache->total_size += enc_len;
1410 	}
1411 
1412 	if (node->first_doc_id == FTS_NULL_DOC_ID) {
1413 		node->first_doc_id = doc_id;
1414 	}
1415 
1416 	node->last_doc_id = doc_id;
1417 	++node->doc_count;
1418 }
1419 
1420 /**********************************************************************//**
1421 Add document to the cache. */
1422 static
1423 void
fts_cache_add_doc(fts_cache_t * cache,fts_index_cache_t * index_cache,doc_id_t doc_id,ib_rbt_t * tokens)1424 fts_cache_add_doc(
1425 /*==============*/
1426 	fts_cache_t*	cache,			/*!< in: cache */
1427 	fts_index_cache_t*
1428 			index_cache,		/*!< in: index cache */
1429 	doc_id_t	doc_id,			/*!< in: doc id to add */
1430 	ib_rbt_t*	tokens)			/*!< in: document tokens */
1431 {
1432 	const ib_rbt_node_t*	node;
1433 	ulint			n_words;
1434 	fts_doc_stats_t*	doc_stats;
1435 
1436 	if (!tokens) {
1437 		return;
1438 	}
1439 
1440 #ifdef UNIV_SYNC_DEBUG
1441 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_EX));
1442 #endif
1443 
1444 	n_words = rbt_size(tokens);
1445 
1446 	for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1447 
1448 		fts_tokenizer_word_t*	word;
1449 		fts_node_t*		fts_node = NULL;
1450 		fts_token_t*		token = rbt_value(fts_token_t, node);
1451 
1452 		/* Find and/or add token to the cache. */
1453 		word = fts_tokenizer_word_get(
1454 			cache, index_cache, &token->text);
1455 
1456 		if (!word) {
1457 			ut_free(rbt_remove_node(tokens, node));
1458 			continue;
1459 		}
1460 
1461 		if (ib_vector_size(word->nodes) > 0) {
1462 			fts_node = static_cast<fts_node_t*>(
1463 				ib_vector_last(word->nodes));
1464 		}
1465 
1466 		if (fts_node == NULL || fts_node->synced
1467 		    || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
1468 		    || doc_id < fts_node->last_doc_id) {
1469 
1470 			fts_node = static_cast<fts_node_t*>(
1471 				ib_vector_push(word->nodes, NULL));
1472 
1473 			memset(fts_node, 0x0, sizeof(*fts_node));
1474 
1475 			cache->total_size += sizeof(*fts_node);
1476 		}
1477 
1478 		fts_cache_node_add_positions(
1479 			cache, fts_node, doc_id, token->positions);
1480 
1481 		ut_free(rbt_remove_node(tokens, node));
1482 	}
1483 
1484 	ut_a(rbt_empty(tokens));
1485 
1486 	/* Add to doc ids processed so far. */
1487 	doc_stats = static_cast<fts_doc_stats_t*>(
1488 		ib_vector_push(index_cache->doc_stats, NULL));
1489 
1490 	doc_stats->doc_id = doc_id;
1491 	doc_stats->word_count = n_words;
1492 
1493 	/* Add the doc stats memory usage too. */
1494 	cache->total_size += sizeof(*doc_stats);
1495 
1496 	if (doc_id > cache->sync->max_doc_id) {
1497 		cache->sync->max_doc_id = doc_id;
1498 	}
1499 }
1500 
1501 /****************************************************************//**
1502 Drops a table. If the table can't be found we return a SUCCESS code.
1503 @return DB_SUCCESS or error code */
1504 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1505 dberr_t
fts_drop_table(trx_t * trx,const char * table_name)1506 fts_drop_table(
1507 /*===========*/
1508 	trx_t*		trx,			/*!< in: transaction */
1509 	const char*	table_name)		/*!< in: table to drop */
1510 {
1511 	dict_table_t*	table;
1512 	dberr_t		error = DB_SUCCESS;
1513 
1514 	/* Check that the table exists in our data dictionary.
1515 	Similar to regular drop table case, we will open table with
1516 	DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1517 	table = dict_table_open_on_name(
1518 		table_name, TRUE, FALSE,
1519 		static_cast<dict_err_ignore_t>(
1520                         DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
1521 
1522 	if (table != 0) {
1523 
1524 		dict_table_close(table, TRUE, FALSE);
1525 
1526 		/* Pass nonatomic=false (dont allow data dict unlock),
1527 		because the transaction may hold locks on SYS_* tables from
1528 		previous calls to fts_drop_table(). */
1529 		error = row_drop_table_for_mysql(table_name, trx, true, false);
1530 
1531 		if (error != DB_SUCCESS) {
1532 			ib_logf(IB_LOG_LEVEL_ERROR,
1533 				"Unable to drop FTS index aux table %s: %s",
1534 				table_name, ut_strerr(error));
1535 		}
1536 	} else {
1537 		error = DB_FAIL;
1538 	}
1539 
1540 	return(error);
1541 }
1542 
1543 /****************************************************************//**
1544 Rename a single auxiliary table due to database name change.
1545 @return DB_SUCCESS or error code */
1546 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1547 dberr_t
fts_rename_one_aux_table(const char * new_name,const char * fts_table_old_name,trx_t * trx)1548 fts_rename_one_aux_table(
1549 /*=====================*/
1550 	const char*	new_name,		/*!< in: new parent tbl name */
1551 	const char*	fts_table_old_name,	/*!< in: old aux tbl name */
1552 	trx_t*		trx)			/*!< in: transaction */
1553 {
1554 	char	fts_table_new_name[MAX_TABLE_NAME_LEN];
1555 	ulint	new_db_name_len = dict_get_db_name_len(new_name);
1556 	ulint	old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1557 	ulint	table_new_name_len = strlen(fts_table_old_name)
1558 				     + new_db_name_len - old_db_name_len;
1559 
1560 	/* Check if the new and old database names are the same, if so,
1561 	nothing to do */
1562 	ut_ad((new_db_name_len != old_db_name_len)
1563 	      || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1564 
1565 	/* Get the database name from "new_name", and table name
1566 	from the fts_table_old_name */
1567 	strncpy(fts_table_new_name, new_name, new_db_name_len);
1568 	strncpy(fts_table_new_name + new_db_name_len,
1569 	       strchr(fts_table_old_name, '/'),
1570 	       table_new_name_len - new_db_name_len);
1571 	fts_table_new_name[table_new_name_len] = 0;
1572 
1573 	return(row_rename_table_for_mysql(
1574 		fts_table_old_name, fts_table_new_name, trx, false));
1575 }
1576 
1577 /****************************************************************//**
1578 Rename auxiliary tables for all fts index for a table. This(rename)
1579 is due to database name change
1580 @return DB_SUCCESS or error code */
1581 
1582 dberr_t
fts_rename_aux_tables(dict_table_t * table,const char * new_name,trx_t * trx)1583 fts_rename_aux_tables(
1584 /*==================*/
1585 	dict_table_t*	table,		/*!< in: user Table */
1586 	const char*     new_name,       /*!< in: new table name */
1587 	trx_t*		trx)		/*!< in: transaction */
1588 {
1589 	ulint		i;
1590 	fts_table_t	fts_table;
1591 
1592 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1593 
1594 	/* Rename common auxiliary tables */
1595 	for (i = 0; fts_common_tables[i] != NULL; ++i) {
1596 		char*	old_table_name;
1597 		dberr_t	err = DB_SUCCESS;
1598 
1599 		fts_table.suffix = fts_common_tables[i];
1600 
1601 		old_table_name = fts_get_table_name(&fts_table);
1602 
1603 		err = fts_rename_one_aux_table(new_name, old_table_name, trx);
1604 
1605 		mem_free(old_table_name);
1606 
1607 		if (err != DB_SUCCESS) {
1608 			return(err);
1609 		}
1610 	}
1611 
1612 	fts_t*	fts = table->fts;
1613 
1614 	/* Rename index specific auxiliary tables */
1615 	for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1616 	     ++i) {
1617 		dict_index_t*	index;
1618 
1619 		index = static_cast<dict_index_t*>(
1620 			ib_vector_getp(fts->indexes, i));
1621 
1622 		FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1623 
1624 		for (ulint j = 0; fts_index_selector[j].value; ++j) {
1625 			dberr_t	err;
1626 			char*	old_table_name;
1627 
1628 			fts_table.suffix = fts_get_suffix(j);
1629 
1630 			old_table_name = fts_get_table_name(&fts_table);
1631 
1632 			err = fts_rename_one_aux_table(
1633 				new_name, old_table_name, trx);
1634 
1635 			DBUG_EXECUTE_IF("fts_rename_failure",
1636 					err = DB_DEADLOCK;
1637 					fts_sql_rollback(trx););
1638 
1639 			mem_free(old_table_name);
1640 
1641 			if (err != DB_SUCCESS) {
1642 				return(err);
1643 			}
1644 		}
1645 	}
1646 
1647 	return(DB_SUCCESS);
1648 }
1649 
1650 /****************************************************************//**
1651 Drops the common ancillary tables needed for supporting an FTS index
1652 on the given table. row_mysql_lock_data_dictionary must have been called
1653 before this.
1654 @return DB_SUCCESS or error code */
1655 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1656 dberr_t
fts_drop_common_tables(trx_t * trx,fts_table_t * fts_table)1657 fts_drop_common_tables(
1658 /*===================*/
1659 	trx_t*		trx,			/*!< in: transaction */
1660 	fts_table_t*	fts_table)		/*!< in: table with an FTS
1661 						index */
1662 {
1663 	ulint		i;
1664 	dberr_t		error = DB_SUCCESS;
1665 
1666 	for (i = 0; fts_common_tables[i] != NULL; ++i) {
1667 		dberr_t	err;
1668 		char*	table_name;
1669 
1670 		fts_table->suffix = fts_common_tables[i];
1671 
1672 		table_name = fts_get_table_name(fts_table);
1673 
1674 		err = fts_drop_table(trx, table_name);
1675 
1676 		/* We only return the status of the last error. */
1677 		if (err != DB_SUCCESS && err != DB_FAIL) {
1678 			error = err;
1679 		}
1680 
1681 		mem_free(table_name);
1682 	}
1683 
1684 	return(error);
1685 }
1686 
1687 /****************************************************************//**
1688 Since we do a horizontal split on the index table, we need to drop
1689 all the split tables.
1690 @return DB_SUCCESS or error code */
1691 UNIV_INTERN
1692 dberr_t
fts_drop_index_split_tables(trx_t * trx,dict_index_t * index)1693 fts_drop_index_split_tables(
1694 /*========================*/
1695 	trx_t*		trx,			/*!< in: transaction */
1696 	dict_index_t*	index)			/*!< in: fts instance */
1697 
1698 {
1699 	ulint		i;
1700 	fts_table_t	fts_table;
1701 	dberr_t		error = DB_SUCCESS;
1702 
1703 	FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1704 
1705 	for (i = 0; fts_index_selector[i].value; ++i) {
1706 		dberr_t	err;
1707 		char*	table_name;
1708 
1709 		fts_table.suffix = fts_get_suffix(i);
1710 
1711 		table_name = fts_get_table_name(&fts_table);
1712 
1713 		err = fts_drop_table(trx, table_name);
1714 
1715 		/* We only return the status of the last error. */
1716 		if (err != DB_SUCCESS && err != DB_FAIL) {
1717 			error = err;
1718 		}
1719 
1720 		mem_free(table_name);
1721 	}
1722 
1723 	return(error);
1724 }
1725 
1726 /****************************************************************//**
1727 Drops FTS auxiliary tables for an FTS index
1728 @return DB_SUCCESS or error code */
1729 UNIV_INTERN
1730 dberr_t
fts_drop_index_tables(trx_t * trx,dict_index_t * index)1731 fts_drop_index_tables(
1732 /*==================*/
1733 	trx_t*		trx,		/*!< in: transaction */
1734 	dict_index_t*	index)		/*!< in: Index to drop */
1735 {
1736 	dberr_t			error = DB_SUCCESS;
1737 
1738 #ifdef FTS_DOC_STATS_DEBUG
1739 	fts_table_t		fts_table;
1740 	static const char*	index_tables[] = {
1741 		"DOC_ID",
1742 		NULL
1743 	};
1744 #endif /* FTS_DOC_STATS_DEBUG */
1745 
1746 	dberr_t	err = fts_drop_index_split_tables(trx, index);
1747 
1748 	/* We only return the status of the last error. */
1749 	if (err != DB_SUCCESS) {
1750 		error = err;
1751 	}
1752 
1753 #ifdef FTS_DOC_STATS_DEBUG
1754 	FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1755 
1756 	for (ulint i = 0; index_tables[i] != NULL; ++i) {
1757 		char*	table_name;
1758 
1759 		fts_table.suffix = index_tables[i];
1760 
1761 		table_name = fts_get_table_name(&fts_table);
1762 
1763 		err = fts_drop_table(trx, table_name);
1764 
1765 		/* We only return the status of the last error. */
1766 		if (err != DB_SUCCESS && err != DB_FAIL) {
1767 			error = err;
1768 		}
1769 
1770 		mem_free(table_name);
1771 	}
1772 #endif /* FTS_DOC_STATS_DEBUG */
1773 
1774 	return(error);
1775 }
1776 
1777 /****************************************************************//**
1778 Drops FTS ancillary tables needed for supporting an FTS index
1779 on the given table. row_mysql_lock_data_dictionary must have been called
1780 before this.
1781 @return DB_SUCCESS or error code */
1782 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1783 dberr_t
fts_drop_all_index_tables(trx_t * trx,fts_t * fts)1784 fts_drop_all_index_tables(
1785 /*======================*/
1786 	trx_t*		trx,			/*!< in: transaction */
1787 	fts_t*		fts)			/*!< in: fts instance */
1788 {
1789 	dberr_t		error = DB_SUCCESS;
1790 
1791 	for (ulint i = 0;
1792 	     fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1793 	     ++i) {
1794 
1795 		dberr_t		err;
1796 		dict_index_t*	index;
1797 
1798 		index = static_cast<dict_index_t*>(
1799 			ib_vector_getp(fts->indexes, i));
1800 
1801 		err = fts_drop_index_tables(trx, index);
1802 
1803 		if (err != DB_SUCCESS) {
1804 			error = err;
1805 		}
1806 	}
1807 
1808 	return(error);
1809 }
1810 
1811 /*********************************************************************//**
1812 Drops the ancillary tables needed for supporting an FTS index on a
1813 given table. row_mysql_lock_data_dictionary must have been called before
1814 this.
1815 @return DB_SUCCESS or error code */
1816 UNIV_INTERN
1817 dberr_t
fts_drop_tables(trx_t * trx,dict_table_t * table)1818 fts_drop_tables(
1819 /*============*/
1820 	trx_t*		trx,		/*!< in: transaction */
1821 	dict_table_t*	table)		/*!< in: table has the FTS index */
1822 {
1823 	dberr_t		error;
1824 	fts_table_t	fts_table;
1825 
1826 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1827 
1828 	/* TODO: This is not atomic and can cause problems during recovery. */
1829 
1830 	error = fts_drop_common_tables(trx, &fts_table);
1831 
1832 	if (error == DB_SUCCESS) {
1833 		error = fts_drop_all_index_tables(trx, table->fts);
1834 	}
1835 
1836 	return(error);
1837 }
1838 
1839 /*********************************************************************//**
1840 Prepare the SQL, so that all '%s' are replaced by the common prefix.
1841 @return sql string, use mem_free() to free the memory */
1842 static
1843 char*
fts_prepare_sql(fts_table_t * fts_table,const char * my_template)1844 fts_prepare_sql(
1845 /*============*/
1846 	fts_table_t*	fts_table,	/*!< in: table name info */
1847 	const char*	my_template)	/*!< in: sql template */
1848 {
1849 	char*		sql;
1850 	char*		name_prefix;
1851 
1852 	name_prefix = fts_get_table_name_prefix(fts_table);
1853 	sql = ut_strreplace(my_template, "%s", name_prefix);
1854 	mem_free(name_prefix);
1855 
1856 	return(sql);
1857 }
1858 
1859 /*********************************************************************//**
1860 Creates the common ancillary tables needed for supporting an FTS index
1861 on the given table. row_mysql_lock_data_dictionary must have been called
1862 before this.
1863 @return DB_SUCCESS if succeed */
1864 UNIV_INTERN
1865 dberr_t
fts_create_common_tables(trx_t * trx,const dict_table_t * table,const char * name,bool skip_doc_id_index)1866 fts_create_common_tables(
1867 /*=====================*/
1868 	trx_t*		trx,		/*!< in: transaction */
1869 	const dict_table_t* table,	/*!< in: table with FTS index */
1870 	const char*	name,		/*!< in: table name normalized.*/
1871 	bool		skip_doc_id_index)/*!< in: Skip index on doc id */
1872 {
1873 	char*		sql;
1874 	dberr_t		error;
1875 	que_t*		graph;
1876 	fts_table_t	fts_table;
1877 	mem_heap_t*	heap = mem_heap_create(1024);
1878 	pars_info_t*	info;
1879 
1880 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1881 
1882 	error = fts_drop_common_tables(trx, &fts_table);
1883 
1884 	if (error != DB_SUCCESS) {
1885 
1886 		goto func_exit;
1887 	}
1888 
1889 	/* Create the FTS tables that are common to an FTS index. */
1890 	sql = fts_prepare_sql(&fts_table, fts_create_common_tables_sql);
1891 	graph = fts_parse_sql_no_dict_lock(NULL, NULL, sql);
1892 	mem_free(sql);
1893 
1894 	error = fts_eval_sql(trx, graph);
1895 
1896 	que_graph_free(graph);
1897 
1898 	if (error != DB_SUCCESS) {
1899 
1900 		goto func_exit;
1901 	}
1902 
1903 	/* Write the default settings to the config table. */
1904 	fts_table.suffix = "CONFIG";
1905 	graph = fts_parse_sql_no_dict_lock(
1906 		&fts_table, NULL, fts_config_table_insert_values_sql);
1907 
1908 	error = fts_eval_sql(trx, graph);
1909 
1910 	que_graph_free(graph);
1911 
1912 	if (error != DB_SUCCESS || skip_doc_id_index) {
1913 
1914 		goto func_exit;
1915 	}
1916 
1917 	info = pars_info_create();
1918 
1919 	pars_info_bind_id(info, TRUE, "table_name", name);
1920 	pars_info_bind_id(info, TRUE, "index_name", FTS_DOC_ID_INDEX_NAME);
1921 	pars_info_bind_id(info, TRUE, "doc_id_col_name", FTS_DOC_ID_COL_NAME);
1922 
1923 	/* Create the FTS DOC_ID index on the hidden column. Currently this
1924 	is common for any FT index created on the table. */
1925 	graph = fts_parse_sql_no_dict_lock(
1926 		NULL,
1927 		info,
1928 		mem_heap_printf(
1929 			heap,
1930 			"BEGIN\n"
1931 			""
1932 			"CREATE UNIQUE INDEX $index_name ON $table_name("
1933 			"$doc_id_col_name);\n"));
1934 
1935 	error = fts_eval_sql(trx, graph);
1936 	que_graph_free(graph);
1937 
1938 func_exit:
1939 	if (error != DB_SUCCESS) {
1940 		/* We have special error handling here */
1941 
1942 		trx->error_state = DB_SUCCESS;
1943 
1944 		trx_rollback_to_savepoint(trx, NULL);
1945 
1946 		row_drop_table_for_mysql(table->name, trx, FALSE);
1947 
1948 		trx->error_state = DB_SUCCESS;
1949 	}
1950 
1951 	mem_heap_free(heap);
1952 
1953 	return(error);
1954 }
1955 
1956 /*************************************************************//**
1957 Wrapper function of fts_create_index_tables_low(), create auxiliary
1958 tables for an FTS index
1959 @return: DB_SUCCESS or error code */
1960 static
1961 dict_table_t*
fts_create_one_index_table(trx_t * trx,const dict_index_t * index,fts_table_t * fts_table,mem_heap_t * heap)1962 fts_create_one_index_table(
1963 /*=======================*/
1964 	trx_t*		trx,		/*!< in: transaction */
1965 	const dict_index_t*
1966 			index,		/*!< in: the index instance */
1967 	fts_table_t*	fts_table,	/*!< in: fts_table structure */
1968 	mem_heap_t*	heap)		/*!< in: heap */
1969 {
1970 	dict_field_t*		field;
1971 	dict_table_t*		new_table = NULL;
1972 	char*			table_name = fts_get_table_name(fts_table);
1973 	dberr_t			error;
1974 	CHARSET_INFO*		charset;
1975 	ulint			flags2 = 0;
1976 
1977 	ut_ad(index->type & DICT_FTS);
1978 
1979 	if (srv_file_per_table) {
1980 		flags2 = DICT_TF2_USE_TABLESPACE;
1981 	}
1982 
1983 	new_table = dict_mem_table_create(table_name, 0, 5, 1, flags2);
1984 
1985 	field = dict_index_get_nth_field(index, 0);
1986 	charset = innobase_get_fts_charset(
1987 		(int)(field->col->prtype & DATA_MYSQL_TYPE_MASK),
1988 		(uint) dtype_get_charset_coll(field->col->prtype));
1989 
1990 	if (strcmp(charset->name, "latin1_swedish_ci") == 0) {
1991 		dict_mem_table_add_col(new_table, heap, "word", DATA_VARCHAR,
1992 				       field->col->prtype, FTS_MAX_WORD_LEN);
1993 	} else {
1994 		dict_mem_table_add_col(new_table, heap, "word", DATA_VARMYSQL,
1995 				       field->col->prtype, FTS_MAX_WORD_LEN);
1996 	}
1997 
1998 	dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
1999 			       DATA_NOT_NULL | DATA_UNSIGNED,
2000 			       sizeof(doc_id_t));
2001 
2002 	dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
2003 			       DATA_NOT_NULL | DATA_UNSIGNED,
2004 			       sizeof(doc_id_t));
2005 
2006 	dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2007 			       DATA_NOT_NULL | DATA_UNSIGNED, 4);
2008 
2009 	dict_mem_table_add_col(new_table, heap, "ilist", DATA_BLOB,
2010 			       4130048,	0);
2011 
2012 	error = row_create_table_for_mysql(new_table, trx, false);
2013 
2014 	if (error != DB_SUCCESS) {
2015 		trx->error_state = error;
2016 		dict_mem_table_free(new_table);
2017 		new_table = NULL;
2018 		ib_logf(IB_LOG_LEVEL_WARN,
2019 			"Fail to create FTS index table %s", table_name);
2020 	}
2021 
2022 	mem_free(table_name);
2023 
2024 	return(new_table);
2025 }
2026 
2027 /*************************************************************//**
2028 Wrapper function of fts_create_index_tables_low(), create auxiliary
2029 tables for an FTS index
2030 @return: DB_SUCCESS or error code */
2031 UNIV_INTERN
2032 dberr_t
fts_create_index_tables_low(trx_t * trx,const dict_index_t * index,const char * table_name,table_id_t table_id)2033 fts_create_index_tables_low(
2034 /*========================*/
2035 	trx_t*		trx,		/*!< in: transaction */
2036 	const dict_index_t*
2037 			index,		/*!< in: the index instance */
2038 	const char*	table_name,	/*!< in: the table name */
2039 	table_id_t	table_id)	/*!< in: the table id */
2040 
2041 {
2042 	ulint		i;
2043 	que_t*		graph;
2044 	fts_table_t	fts_table;
2045 	dberr_t		error = DB_SUCCESS;
2046 	mem_heap_t*	heap = mem_heap_create(1024);
2047 
2048 	fts_table.type = FTS_INDEX_TABLE;
2049 	fts_table.index_id = index->id;
2050 	fts_table.table_id = table_id;
2051 	fts_table.parent = table_name;
2052 	fts_table.table = index->table;
2053 
2054 #ifdef FTS_DOC_STATS_DEBUG
2055 	char*		sql;
2056 
2057 	/* Create the FTS auxiliary tables that are specific
2058 	to an FTS index. */
2059 	sql = fts_prepare_sql(&fts_table, fts_create_index_tables_sql);
2060 
2061 	graph = fts_parse_sql_no_dict_lock(NULL, NULL, sql);
2062 	mem_free(sql);
2063 
2064 	error = fts_eval_sql(trx, graph);
2065 	que_graph_free(graph);
2066 #endif /* FTS_DOC_STATS_DEBUG */
2067 
2068 	for (i = 0; fts_index_selector[i].value && error == DB_SUCCESS; ++i) {
2069 		dict_table_t*	new_table;
2070 
2071 		/* Create the FTS auxiliary tables that are specific
2072 		to an FTS index. We need to preserve the table_id %s
2073 		which fts_parse_sql_no_dict_lock() will fill in for us. */
2074 		fts_table.suffix = fts_get_suffix(i);
2075 
2076 		new_table = fts_create_one_index_table(
2077 			trx, index, &fts_table, heap);
2078 
2079 		if (!new_table) {
2080 			error = DB_FAIL;
2081 			break;
2082 		}
2083 
2084 		graph = fts_parse_sql_no_dict_lock(
2085 			&fts_table, NULL, fts_create_index_sql);
2086 
2087 		error = fts_eval_sql(trx, graph);
2088 		que_graph_free(graph);
2089 	}
2090 
2091 	if (error != DB_SUCCESS) {
2092 		/* We have special error handling here */
2093 
2094 		trx->error_state = DB_SUCCESS;
2095 
2096 		trx_rollback_to_savepoint(trx, NULL);
2097 
2098 		row_drop_table_for_mysql(table_name, trx, FALSE);
2099 
2100 		trx->error_state = DB_SUCCESS;
2101 	}
2102 
2103 	mem_heap_free(heap);
2104 
2105 	return(error);
2106 }
2107 
2108 /******************************************************************//**
2109 Creates the column specific ancillary tables needed for supporting an
2110 FTS index on the given table. row_mysql_lock_data_dictionary must have
2111 been called before this.
2112 @return DB_SUCCESS or error code */
2113 UNIV_INTERN
2114 dberr_t
fts_create_index_tables(trx_t * trx,const dict_index_t * index)2115 fts_create_index_tables(
2116 /*====================*/
2117 	trx_t*			trx,	/*!< in: transaction */
2118 	const dict_index_t*	index)	/*!< in: the index instance */
2119 {
2120 	dberr_t		err;
2121 	dict_table_t*	table;
2122 
2123 	table = dict_table_get_low(index->table_name);
2124 	ut_a(table != NULL);
2125 
2126 	err = fts_create_index_tables_low(trx, index, table->name, table->id);
2127 
2128 	if (err == DB_SUCCESS) {
2129 		trx_commit(trx);
2130 	}
2131 
2132 	return(err);
2133 }
2134 #if 0
2135 /******************************************************************//**
2136 Return string representation of state. */
2137 static
2138 const char*
2139 fts_get_state_str(
2140 /*==============*/
2141 				/* out: string representation of state */
2142 	fts_row_state	state)	/*!< in: state */
2143 {
2144 	switch (state) {
2145 	case FTS_INSERT:
2146 		return("INSERT");
2147 
2148 	case FTS_MODIFY:
2149 		return("MODIFY");
2150 
2151 	case FTS_DELETE:
2152 		return("DELETE");
2153 
2154 	case FTS_NOTHING:
2155 		return("NOTHING");
2156 
2157 	case FTS_INVALID:
2158 		return("INVALID");
2159 
2160 	default:
2161 		return("UNKNOWN");
2162 	}
2163 }
2164 #endif
2165 
2166 /******************************************************************//**
2167 Calculate the new state of a row given the existing state and a new event.
2168 @return new state of row */
2169 static
2170 fts_row_state
fts_trx_row_get_new_state(fts_row_state old_state,fts_row_state event)2171 fts_trx_row_get_new_state(
2172 /*======================*/
2173 	fts_row_state	old_state,		/*!< in: existing state of row */
2174 	fts_row_state	event)			/*!< in: new event */
2175 {
2176 	/* The rules for transforming states:
2177 
2178 	I = inserted
2179 	M = modified
2180 	D = deleted
2181 	N = nothing
2182 
2183 	M+D -> D:
2184 
2185 	If the row existed before the transaction started and it is modified
2186 	during the transaction, followed by a deletion of the row, only the
2187 	deletion will be signaled.
2188 
2189 	M+ -> M:
2190 
2191 	If the row existed before the transaction started and it is modified
2192 	more than once during the transaction, only the last modification
2193 	will be signaled.
2194 
2195 	IM*D -> N:
2196 
2197 	If a new row is added during the transaction (and possibly modified
2198 	after its initial insertion) but it is deleted before the end of the
2199 	transaction, nothing will be signaled.
2200 
2201 	IM* -> I:
2202 
2203 	If a new row is added during the transaction and modified after its
2204 	initial insertion, only the addition will be signaled.
2205 
2206 	M*DI -> M:
2207 
2208 	If the row existed before the transaction started and it is deleted,
2209 	then re-inserted, only a modification will be signaled. Note that
2210 	this case is only possible if the table is using the row's primary
2211 	key for FTS row ids, since those can be re-inserted by the user,
2212 	which is not true for InnoDB generated row ids.
2213 
2214 	It is easily seen that the above rules decompose such that we do not
2215 	need to store the row's entire history of events. Instead, we can
2216 	store just one state for the row and update that when new events
2217 	arrive. Then we can implement the above rules as a two-dimensional
2218 	look-up table, and get checking of invalid combinations "for free"
2219 	in the process. */
2220 
2221 	/* The lookup table for transforming states. old_state is the
2222 	Y-axis, event is the X-axis. */
2223 	static const fts_row_state table[4][4] = {
2224 			/*    I            M            D            N */
2225 		/* I */	{ FTS_INVALID, FTS_INSERT,  FTS_NOTHING, FTS_INVALID },
2226 		/* M */	{ FTS_INVALID, FTS_MODIFY,  FTS_DELETE,  FTS_INVALID },
2227 		/* D */	{ FTS_MODIFY,  FTS_INVALID, FTS_INVALID, FTS_INVALID },
2228 		/* N */	{ FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
2229 	};
2230 
2231 	fts_row_state result;
2232 
2233 	ut_a(old_state < FTS_INVALID);
2234 	ut_a(event < FTS_INVALID);
2235 
2236 	result = table[(int) old_state][(int) event];
2237 	ut_a(result != FTS_INVALID);
2238 
2239 	return(result);
2240 }
2241 
2242 /******************************************************************//**
2243 Create a savepoint instance.
2244 @return savepoint instance */
2245 static
2246 fts_savepoint_t*
fts_savepoint_create(ib_vector_t * savepoints,const char * name,mem_heap_t * heap)2247 fts_savepoint_create(
2248 /*=================*/
2249 	ib_vector_t*	savepoints,		/*!< out: InnoDB transaction */
2250 	const char*	name,			/*!< in: savepoint name */
2251 	mem_heap_t*	heap)			/*!< in: heap */
2252 {
2253 	fts_savepoint_t*	savepoint;
2254 
2255 	savepoint = static_cast<fts_savepoint_t*>(
2256 		ib_vector_push(savepoints, NULL));
2257 
2258 	memset(savepoint, 0x0, sizeof(*savepoint));
2259 
2260 	if (name) {
2261 		savepoint->name = mem_heap_strdup(heap, name);
2262 	}
2263 
2264 	savepoint->tables = rbt_create(
2265 		sizeof(fts_trx_table_t*), fts_trx_table_cmp);
2266 
2267 	return(savepoint);
2268 }
2269 
2270 /******************************************************************//**
2271 Create an FTS trx.
2272 @return FTS trx  */
2273 static
2274 fts_trx_t*
fts_trx_create(trx_t * trx)2275 fts_trx_create(
2276 /*===========*/
2277 	trx_t*	trx)				/*!< in/out: InnoDB
2278 						transaction */
2279 {
2280 	fts_trx_t*		ftt;
2281 	ib_alloc_t*		heap_alloc;
2282 	mem_heap_t*		heap = mem_heap_create(1024);
2283 	trx_named_savept_t*	savep;
2284 
2285 	ut_a(trx->fts_trx == NULL);
2286 
2287 	ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2288 	ftt->trx = trx;
2289 	ftt->heap = heap;
2290 
2291 	heap_alloc = ib_heap_allocator_create(heap);
2292 
2293 	ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
2294 		heap_alloc, sizeof(fts_savepoint_t), 4));
2295 
2296 	ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
2297 		heap_alloc, sizeof(fts_savepoint_t), 4));
2298 
2299 	/* Default instance has no name and no heap. */
2300 	fts_savepoint_create(ftt->savepoints, NULL, NULL);
2301 	fts_savepoint_create(ftt->last_stmt, NULL, NULL);
2302 
2303 	/* Copy savepoints that already set before. */
2304 	for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
2305 	     savep != NULL;
2306 	     savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2307 
2308 		fts_savepoint_take(trx, ftt, savep->name);
2309 	}
2310 
2311 	return(ftt);
2312 }
2313 
2314 /******************************************************************//**
2315 Create an FTS trx table.
2316 @return FTS trx table */
2317 static
2318 fts_trx_table_t*
fts_trx_table_create(fts_trx_t * fts_trx,dict_table_t * table)2319 fts_trx_table_create(
2320 /*=================*/
2321 	fts_trx_t*	fts_trx,		/*!< in: FTS trx */
2322 	dict_table_t*	table)			/*!< in: table */
2323 {
2324 	fts_trx_table_t*	ftt;
2325 
2326 	ftt = static_cast<fts_trx_table_t*>(
2327 		mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2328 
2329 	memset(ftt, 0x0, sizeof(*ftt));
2330 
2331 	ftt->table = table;
2332 	ftt->fts_trx = fts_trx;
2333 
2334 	ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2335 
2336 	return(ftt);
2337 }
2338 
2339 /******************************************************************//**
2340 Clone an FTS trx table.
2341 @return FTS trx table */
2342 static
2343 fts_trx_table_t*
fts_trx_table_clone(const fts_trx_table_t * ftt_src)2344 fts_trx_table_clone(
2345 /*=================*/
2346 	const fts_trx_table_t*	ftt_src)	/*!< in: FTS trx */
2347 {
2348 	fts_trx_table_t*	ftt;
2349 
2350 	ftt = static_cast<fts_trx_table_t*>(
2351 		mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2352 
2353 	memset(ftt, 0x0, sizeof(*ftt));
2354 
2355 	ftt->table = ftt_src->table;
2356 	ftt->fts_trx = ftt_src->fts_trx;
2357 
2358 	ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2359 
2360 	/* Copy the rb tree values to the new savepoint. */
2361 	rbt_merge_uniq(ftt->rows, ftt_src->rows);
2362 
2363 	/* These are only added on commit. At this stage we only have
2364 	the updated row state. */
2365 	ut_a(ftt_src->added_doc_ids == NULL);
2366 
2367 	return(ftt);
2368 }
2369 
2370 /******************************************************************//**
2371 Initialize the FTS trx instance.
2372 @return FTS trx instance */
2373 static
2374 fts_trx_table_t*
fts_trx_init(trx_t * trx,dict_table_t * table,ib_vector_t * savepoints)2375 fts_trx_init(
2376 /*=========*/
2377 	trx_t*			trx,		/*!< in: transaction */
2378 	dict_table_t*		table,		/*!< in: FTS table instance */
2379 	ib_vector_t*		savepoints)	/*!< in: Savepoints */
2380 {
2381 	fts_trx_table_t*	ftt;
2382 	ib_rbt_bound_t		parent;
2383 	ib_rbt_t*		tables;
2384 	fts_savepoint_t*	savepoint;
2385 
2386 	savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
2387 
2388 	tables = savepoint->tables;
2389 	rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
2390 
2391 	if (parent.result == 0) {
2392 		fts_trx_table_t**	fttp;
2393 
2394 		fttp = rbt_value(fts_trx_table_t*, parent.last);
2395 		ftt = *fttp;
2396 	} else {
2397 		ftt = fts_trx_table_create(trx->fts_trx, table);
2398 		rbt_add_node(tables, &parent, &ftt);
2399 	}
2400 
2401 	ut_a(ftt->table == table);
2402 
2403 	return(ftt);
2404 }
2405 
2406 /******************************************************************//**
2407 Notify the FTS system about an operation on an FTS-indexed table. */
2408 static
2409 void
fts_trx_table_add_op(fts_trx_table_t * ftt,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2410 fts_trx_table_add_op(
2411 /*=================*/
2412 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
2413 	doc_id_t	doc_id,			/*!< in: doc id */
2414 	fts_row_state	state,			/*!< in: state of the row */
2415 	ib_vector_t*	fts_indexes)		/*!< in: FTS indexes affected */
2416 {
2417 	ib_rbt_t*	rows;
2418 	ib_rbt_bound_t	parent;
2419 
2420 	rows = ftt->rows;
2421 	rbt_search(rows, &parent, &doc_id);
2422 
2423 	/* Row id found, update state, and if new state is FTS_NOTHING,
2424 	we delete the row from our tree. */
2425 	if (parent.result == 0) {
2426 		fts_trx_row_t*	row = rbt_value(fts_trx_row_t, parent.last);
2427 
2428 		row->state = fts_trx_row_get_new_state(row->state, state);
2429 
2430 		if (row->state == FTS_NOTHING) {
2431 			if (row->fts_indexes) {
2432 				ib_vector_free(row->fts_indexes);
2433 			}
2434 
2435 			ut_free(rbt_remove_node(rows, parent.last));
2436 			row = NULL;
2437 		} else if (row->fts_indexes != NULL) {
2438 			ib_vector_free(row->fts_indexes);
2439 			row->fts_indexes = fts_indexes;
2440 		}
2441 
2442 	} else { /* Row-id not found, create a new one. */
2443 		fts_trx_row_t	row;
2444 
2445 		row.doc_id = doc_id;
2446 		row.state = state;
2447 		row.fts_indexes = fts_indexes;
2448 
2449 		rbt_add_node(rows, &parent, &row);
2450 	}
2451 }
2452 
2453 /******************************************************************//**
2454 Notify the FTS system about an operation on an FTS-indexed table. */
2455 UNIV_INTERN
2456 void
fts_trx_add_op(trx_t * trx,dict_table_t * table,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2457 fts_trx_add_op(
2458 /*===========*/
2459 	trx_t*		trx,			/*!< in: InnoDB transaction */
2460 	dict_table_t*	table,			/*!< in: table */
2461 	doc_id_t	doc_id,			/*!< in: new doc id */
2462 	fts_row_state	state,			/*!< in: state of the row */
2463 	ib_vector_t*	fts_indexes)		/*!< in: FTS indexes affected
2464 						(NULL=all) */
2465 {
2466 	fts_trx_table_t*	tran_ftt;
2467 	fts_trx_table_t*	stmt_ftt;
2468 
2469 	if (!trx->fts_trx) {
2470 		trx->fts_trx = fts_trx_create(trx);
2471 	}
2472 
2473 	tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2474 	stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2475 
2476 	fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2477 	fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2478 }
2479 
2480 /******************************************************************//**
2481 Fetch callback that converts a textual document id to a binary value and
2482 stores it in the given place.
2483 @return always returns NULL */
2484 static
2485 ibool
fts_fetch_store_doc_id(void * row,void * user_arg)2486 fts_fetch_store_doc_id(
2487 /*===================*/
2488 	void*		row,			/*!< in: sel_node_t* */
2489 	void*		user_arg)		/*!< in: doc_id_t* to store
2490 						doc_id in */
2491 {
2492 	int		n_parsed;
2493 	sel_node_t*	node = static_cast<sel_node_t*>(row);
2494 	doc_id_t*	doc_id = static_cast<doc_id_t*>(user_arg);
2495 	dfield_t*	dfield = que_node_get_val(node->select_list);
2496 	dtype_t*	type = dfield_get_type(dfield);
2497 	ulint		len = dfield_get_len(dfield);
2498 
2499 	char		buf[32];
2500 
2501 	ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2502 	ut_a(len > 0 && len < sizeof(buf));
2503 
2504 	memcpy(buf, dfield_get_data(dfield), len);
2505 	buf[len] = '\0';
2506 
2507 	n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2508 	ut_a(n_parsed == 1);
2509 
2510 	return(FALSE);
2511 }
2512 
2513 #ifdef FTS_CACHE_SIZE_DEBUG
2514 /******************************************************************//**
2515 Get the max cache size in bytes. If there is an error reading the
2516 value we simply print an error message here and return the default
2517 value to the caller.
2518 @return max cache size in bytes */
2519 static
2520 ulint
fts_get_max_cache_size(trx_t * trx,fts_table_t * fts_table)2521 fts_get_max_cache_size(
2522 /*===================*/
2523 	trx_t*		trx,			/*!< in: transaction */
2524 	fts_table_t*	fts_table)		/*!< in: table instance */
2525 {
2526 	dberr_t		error;
2527 	fts_string_t	value;
2528 	ulint		cache_size_in_mb;
2529 
2530 	/* Set to the default value. */
2531 	cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2532 
2533 	/* We set the length of value to the max bytes it can hold. This
2534 	information is used by the callback that reads the value. */
2535 	value.f_n_char = 0;
2536 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2537 	value.f_str = ut_malloc(value.f_len + 1);
2538 
2539 	error = fts_config_get_value(
2540 		trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2541 
2542 	if (error == DB_SUCCESS) {
2543 
2544 		value.f_str[value.f_len] = 0;
2545 		cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
2546 
2547 		if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2548 
2549 			ut_print_timestamp(stderr);
2550 			fprintf(stderr, "  InnoDB: Warning: FTS max cache size "
2551 				" (%lu) out of range. Minimum value is "
2552 				"%luMB and the maximum values is %luMB, "
2553 				"setting cache size to upper limit\n",
2554 				cache_size_in_mb,
2555 				FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB,
2556 				FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB);
2557 
2558 			cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2559 
2560 		} else if  (cache_size_in_mb
2561 			    < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2562 
2563 			ut_print_timestamp(stderr);
2564 			fprintf(stderr, "  InnoDB: Warning: FTS max cache size "
2565 				" (%lu) out of range. Minimum value is "
2566 				"%luMB and the maximum values is %luMB, "
2567 				"setting cache size to lower limit\n",
2568 				cache_size_in_mb,
2569 				FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB,
2570 				FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB);
2571 
2572 			cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2573 		}
2574 	} else {
2575 		ut_print_timestamp(stderr);
2576 		fprintf(stderr, "InnoDB: Error: (%lu) reading max cache "
2577 			"config value from config table\n", error);
2578 	}
2579 
2580 	ut_free(value.f_str);
2581 
2582 	return(cache_size_in_mb * 1024 * 1024);
2583 }
2584 #endif
2585 
2586 #ifdef FTS_DOC_STATS_DEBUG
2587 /*********************************************************************//**
2588 Get the total number of words in the FTS for a particular FTS index.
2589 @return DB_SUCCESS if all OK else error code */
2590 UNIV_INTERN
2591 dberr_t
fts_get_total_word_count(trx_t * trx,dict_index_t * index,ulint * total)2592 fts_get_total_word_count(
2593 /*=====================*/
2594 	trx_t*		trx,			/*!< in: transaction */
2595 	dict_index_t*	index,			/*!< in: for this index */
2596 	ulint*		total)			/* out: total words */
2597 {
2598 	dberr_t		error;
2599 	fts_string_t	value;
2600 
2601 	*total = 0;
2602 
2603 	/* We set the length of value to the max bytes it can hold. This
2604 	information is used by the callback that reads the value. */
2605 	value.f_n_char = 0;
2606 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2607 	value.f_str = static_cast<byte*>(ut_malloc(value.f_len + 1));
2608 
2609 	error = fts_config_get_index_value(
2610 		trx, index, FTS_TOTAL_WORD_COUNT, &value);
2611 
2612 	if (error == DB_SUCCESS) {
2613 
2614 		value.f_str[value.f_len] = 0;
2615 		*total = strtoul((char*) value.f_str, NULL, 10);
2616 	} else {
2617 		ut_print_timestamp(stderr);
2618 		fprintf(stderr, "  InnoDB: Error: (%s) reading total words "
2619 			"value from config table\n", ut_strerr(error));
2620 	}
2621 
2622 	ut_free(value.f_str);
2623 
2624 	return(error);
2625 }
2626 #endif /* FTS_DOC_STATS_DEBUG */
2627 
2628 /*********************************************************************//**
2629 Update the next and last Doc ID in the CONFIG table to be the input
2630 "doc_id" value (+ 1). We would do so after each FTS index build or
2631 table truncate */
2632 UNIV_INTERN
2633 void
fts_update_next_doc_id(trx_t * trx,const dict_table_t * table,const char * table_name,doc_id_t doc_id)2634 fts_update_next_doc_id(
2635 /*===================*/
2636 	trx_t*			trx,		/*!< in/out: transaction */
2637 	const dict_table_t*	table,		/*!< in: table */
2638 	const char*		table_name,	/*!< in: table name, or NULL */
2639 	doc_id_t		doc_id)		/*!< in: DOC ID to set */
2640 {
2641 	table->fts->cache->synced_doc_id = doc_id;
2642 	table->fts->cache->next_doc_id = doc_id + 1;
2643 
2644 	table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2645 
2646 	fts_update_sync_doc_id(
2647 		table, table_name, table->fts->cache->synced_doc_id, trx);
2648 
2649 }
2650 
2651 /*********************************************************************//**
2652 Get the next available document id.
2653 @return DB_SUCCESS if OK */
2654 UNIV_INTERN
2655 dberr_t
fts_get_next_doc_id(const dict_table_t * table,doc_id_t * doc_id)2656 fts_get_next_doc_id(
2657 /*================*/
2658 	const dict_table_t*	table,		/*!< in: table */
2659 	doc_id_t*		doc_id)		/*!< out: new document id */
2660 {
2661 	fts_cache_t*	cache = table->fts->cache;
2662 
2663 	/* If the Doc ID system has not yet been initialized, we
2664 	will consult the CONFIG table and user table to re-establish
2665 	the initial value of the Doc ID */
2666 
2667 	if (cache->first_doc_id != 0 || !fts_init_doc_id(table)) {
2668 		if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2669 			*doc_id = FTS_NULL_DOC_ID;
2670 			return(DB_SUCCESS);
2671 		}
2672 
2673 		/* Otherwise, simply increment the value in cache */
2674 		mutex_enter(&cache->doc_id_lock);
2675 		*doc_id = ++cache->next_doc_id;
2676 		mutex_exit(&cache->doc_id_lock);
2677 	} else {
2678 		mutex_enter(&cache->doc_id_lock);
2679 		*doc_id = cache->next_doc_id;
2680 		mutex_exit(&cache->doc_id_lock);
2681 	}
2682 
2683 	return(DB_SUCCESS);
2684 }
2685 
2686 /*********************************************************************//**
2687 This function fetch the Doc ID from CONFIG table, and compare with
2688 the Doc ID supplied. And store the larger one to the CONFIG table.
2689 @return DB_SUCCESS if OK */
2690 static MY_ATTRIBUTE((nonnull))
2691 dberr_t
fts_cmp_set_sync_doc_id(const dict_table_t * table,doc_id_t doc_id_cmp,ibool read_only,doc_id_t * doc_id)2692 fts_cmp_set_sync_doc_id(
2693 /*====================*/
2694 	const dict_table_t*	table,		/*!< in: table */
2695 	doc_id_t		doc_id_cmp,	/*!< in: Doc ID to compare */
2696 	ibool			read_only,	/*!< in: TRUE if read the
2697 						synced_doc_id only */
2698 	doc_id_t*		doc_id)		/*!< out: larger document id
2699 						after comparing "doc_id_cmp"
2700 						to the one stored in CONFIG
2701 						table */
2702 {
2703 	trx_t*		trx;
2704 	pars_info_t*	info;
2705 	dberr_t		error;
2706 	fts_table_t	fts_table;
2707 	que_t*		graph = NULL;
2708 	fts_cache_t*	cache = table->fts->cache;
2709 retry:
2710 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2711 
2712 	fts_table.suffix = "CONFIG";
2713 	fts_table.table_id = table->id;
2714 	fts_table.type = FTS_COMMON_TABLE;
2715 	fts_table.table = table;
2716 
2717 	fts_table.parent = table->name;
2718 
2719 	trx = trx_allocate_for_background();
2720 
2721 	trx->op_info = "update the next FTS document id";
2722 
2723 	info = pars_info_create();
2724 
2725 	pars_info_bind_function(
2726 		info, "my_func", fts_fetch_store_doc_id, doc_id);
2727 
2728 	graph = fts_parse_sql(
2729 		&fts_table, info,
2730 		"DECLARE FUNCTION my_func;\n"
2731 		"DECLARE CURSOR c IS SELECT value FROM \"%s\""
2732 		" WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2733 		"BEGIN\n"
2734 		""
2735 		"OPEN c;\n"
2736 		"WHILE 1 = 1 LOOP\n"
2737 		"  FETCH c INTO my_func();\n"
2738 		"  IF c % NOTFOUND THEN\n"
2739 		"    EXIT;\n"
2740 		"  END IF;\n"
2741 		"END LOOP;\n"
2742 		"CLOSE c;");
2743 
2744 	*doc_id = 0;
2745 
2746 	error = fts_eval_sql(trx, graph);
2747 
2748 	fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2749 
2750 	// FIXME: We need to retry deadlock errors
2751 	if (error != DB_SUCCESS) {
2752 		goto func_exit;
2753 	}
2754 
2755 	if (read_only) {
2756 		goto func_exit;
2757 	}
2758 
2759 	if (doc_id_cmp == 0 && *doc_id) {
2760 		cache->synced_doc_id = *doc_id - 1;
2761 	} else {
2762 		cache->synced_doc_id = ut_max(doc_id_cmp, *doc_id);
2763 	}
2764 
2765 	mutex_enter(&cache->doc_id_lock);
2766 	/* For each sync operation, we will add next_doc_id by 1,
2767 	so to mark a sync operation */
2768 	if (cache->next_doc_id < cache->synced_doc_id + 1) {
2769 		cache->next_doc_id = cache->synced_doc_id + 1;
2770 	}
2771 	mutex_exit(&cache->doc_id_lock);
2772 
2773 	if (doc_id_cmp > *doc_id) {
2774 		error = fts_update_sync_doc_id(
2775 			table, table->name, cache->synced_doc_id, trx);
2776 	}
2777 
2778 	*doc_id = cache->next_doc_id;
2779 
2780 func_exit:
2781 
2782 	if (error == DB_SUCCESS) {
2783 		fts_sql_commit(trx);
2784 	} else {
2785 		*doc_id = 0;
2786 
2787 		ut_print_timestamp(stderr);
2788 		fprintf(stderr, "  InnoDB: Error: (%s) "
2789 			"while getting next doc id.\n", ut_strerr(error));
2790 
2791 		fts_sql_rollback(trx);
2792 
2793 		if (error == DB_DEADLOCK) {
2794 			os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2795 			goto retry;
2796 		}
2797 	}
2798 
2799 	trx_free_for_background(trx);
2800 
2801 	return(error);
2802 }
2803 
2804 /*********************************************************************//**
2805 Update the last document id. This function could create a new
2806 transaction to update the last document id.
2807 @return DB_SUCCESS if OK */
2808 static
2809 dberr_t
fts_update_sync_doc_id(const dict_table_t * table,const char * table_name,doc_id_t doc_id,trx_t * trx)2810 fts_update_sync_doc_id(
2811 /*===================*/
2812 	const dict_table_t*	table,		/*!< in: table */
2813 	const char*		table_name,	/*!< in: table name, or NULL */
2814 	doc_id_t		doc_id,		/*!< in: last document id */
2815 	trx_t*			trx)		/*!< in: update trx, or NULL */
2816 {
2817 	byte		id[FTS_MAX_ID_LEN];
2818 	pars_info_t*	info;
2819 	fts_table_t	fts_table;
2820 	ulint		id_len;
2821 	que_t*		graph = NULL;
2822 	dberr_t		error;
2823 	ibool		local_trx = FALSE;
2824 	fts_cache_t*	cache = table->fts->cache;
2825 
2826 	fts_table.suffix = "CONFIG";
2827 	fts_table.table_id = table->id;
2828 	fts_table.type = FTS_COMMON_TABLE;
2829 	fts_table.table = table;
2830 	if (table_name) {
2831 		fts_table.parent = table_name;
2832 	} else {
2833 		fts_table.parent = table->name;
2834 	}
2835 
2836 	if (!trx) {
2837 		trx = trx_allocate_for_background();
2838 
2839 		trx->op_info = "setting last FTS document id";
2840 		local_trx = TRUE;
2841 	}
2842 
2843 	info = pars_info_create();
2844 
2845 	id_len = ut_snprintf(
2846 		(char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2847 
2848 	pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2849 
2850 	graph = fts_parse_sql(
2851 		&fts_table, info,
2852 		"BEGIN "
2853 		"UPDATE \"%s\" SET value = :doc_id"
2854 		" WHERE key = 'synced_doc_id';");
2855 
2856 	error = fts_eval_sql(trx, graph);
2857 
2858 	fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2859 
2860 	if (local_trx) {
2861 		if (error == DB_SUCCESS) {
2862 			fts_sql_commit(trx);
2863 			cache->synced_doc_id = doc_id;
2864 		} else {
2865 
2866 			ib_logf(IB_LOG_LEVEL_ERROR,
2867 				"(%s) while updating last doc id.",
2868 				ut_strerr(error));
2869 
2870 			fts_sql_rollback(trx);
2871 		}
2872 		trx_free_for_background(trx);
2873 	}
2874 
2875 	return(error);
2876 }
2877 
2878 /*********************************************************************//**
2879 Create a new fts_doc_ids_t.
2880 @return new fts_doc_ids_t */
2881 UNIV_INTERN
2882 fts_doc_ids_t*
fts_doc_ids_create(void)2883 fts_doc_ids_create(void)
2884 /*====================*/
2885 {
2886 	fts_doc_ids_t*	fts_doc_ids;
2887 	mem_heap_t*	heap = mem_heap_create(512);
2888 
2889 	fts_doc_ids = static_cast<fts_doc_ids_t*>(
2890 		mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2891 
2892 	fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2893 
2894 	fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
2895 		fts_doc_ids->self_heap, sizeof(fts_update_t), 32));
2896 
2897 	return(fts_doc_ids);
2898 }
2899 
2900 /*********************************************************************//**
2901 Free a fts_doc_ids_t. */
2902 
2903 void
fts_doc_ids_free(fts_doc_ids_t * fts_doc_ids)2904 fts_doc_ids_free(
2905 /*=============*/
2906 	fts_doc_ids_t*	fts_doc_ids)
2907 {
2908 	mem_heap_t*	heap = static_cast<mem_heap_t*>(
2909 		fts_doc_ids->self_heap->arg);
2910 
2911 	memset(fts_doc_ids, 0, sizeof(*fts_doc_ids));
2912 
2913 	mem_heap_free(heap);
2914 }
2915 
2916 /*********************************************************************//**
2917 Do commit-phase steps necessary for the insertion of a new row. */
2918 void
fts_add(fts_trx_table_t * ftt,fts_trx_row_t * row)2919 fts_add(
2920 /*====*/
2921 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
2922 	fts_trx_row_t*	row)			/*!< in: row */
2923 {
2924 	dict_table_t*	table = ftt->table;
2925 	doc_id_t	doc_id = row->doc_id;
2926 
2927 	ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
2928 
2929 	fts_add_doc_by_id(ftt, doc_id, row->fts_indexes);
2930 
2931 	mutex_enter(&table->fts->cache->deleted_lock);
2932 	++table->fts->cache->added;
2933 	mutex_exit(&table->fts->cache->deleted_lock);
2934 
2935 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
2936 	    && doc_id >= table->fts->cache->next_doc_id) {
2937 		table->fts->cache->next_doc_id = doc_id + 1;
2938 	}
2939 }
2940 
2941 /*********************************************************************//**
2942 Do commit-phase steps necessary for the deletion of a row.
2943 @return DB_SUCCESS or error code */
2944 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2945 dberr_t
fts_delete(fts_trx_table_t * ftt,fts_trx_row_t * row)2946 fts_delete(
2947 /*=======*/
2948 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
2949 	fts_trx_row_t*	row)			/*!< in: row */
2950 {
2951 	que_t*		graph;
2952 	fts_table_t	fts_table;
2953 	dberr_t		error = DB_SUCCESS;
2954 	doc_id_t	write_doc_id;
2955 	dict_table_t*	table = ftt->table;
2956 	doc_id_t	doc_id = row->doc_id;
2957 	trx_t*		trx = ftt->fts_trx->trx;
2958 	pars_info_t*	info = pars_info_create();
2959 	fts_cache_t*	cache = table->fts->cache;
2960 
2961 	/* we do not index Documents whose Doc ID value is 0 */
2962 	if (doc_id == FTS_NULL_DOC_ID) {
2963 		ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
2964 		return(error);
2965 	}
2966 
2967 	ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2968 
2969 	FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
2970 
2971 	/* Convert to "storage" byte order. */
2972 	fts_write_doc_id((byte*) &write_doc_id, doc_id);
2973 	fts_bind_doc_id(info, "doc_id", &write_doc_id);
2974 
2975 	/* It is possible we update a record that has not yet been sync-ed
2976 	into cache from last crash (delete Doc will not initialize the
2977 	sync). Avoid any added counter accounting until the FTS cache
2978 	is re-established and sync-ed */
2979 	if (table->fts->fts_status & ADDED_TABLE_SYNCED
2980 	    && doc_id > cache->synced_doc_id) {
2981 		mutex_enter(&table->fts->cache->deleted_lock);
2982 
2983 		/* The Doc ID could belong to those left in
2984 		ADDED table from last crash. So need to check
2985 		if it is less than first_doc_id when we initialize
2986 		the Doc ID system after reboot */
2987 		if (doc_id >= table->fts->cache->first_doc_id
2988 		    && table->fts->cache->added > 0) {
2989 			--table->fts->cache->added;
2990 		}
2991 
2992 		mutex_exit(&table->fts->cache->deleted_lock);
2993 
2994 		/* Only if the row was really deleted. */
2995 		ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2996 	}
2997 
2998 	/* Note the deleted document for OPTIMIZE to purge. */
2999 	if (error == DB_SUCCESS) {
3000 
3001 		trx->op_info = "adding doc id to FTS DELETED";
3002 
3003 		info->graph_owns_us = TRUE;
3004 
3005 		fts_table.suffix = "DELETED";
3006 
3007 		graph = fts_parse_sql(
3008 			&fts_table,
3009 			info,
3010 			"BEGIN INSERT INTO \"%s\" VALUES (:doc_id);");
3011 
3012 		error = fts_eval_sql(trx, graph);
3013 
3014 		fts_que_graph_free(graph);
3015 	} else {
3016 		pars_info_free(info);
3017 	}
3018 
3019 	/* Increment the total deleted count, this is used to calculate the
3020 	number of documents indexed. */
3021 	if (error == DB_SUCCESS) {
3022 		mutex_enter(&table->fts->cache->deleted_lock);
3023 
3024 		++table->fts->cache->deleted;
3025 
3026 		mutex_exit(&table->fts->cache->deleted_lock);
3027 	}
3028 
3029 	return(error);
3030 }
3031 
3032 /*********************************************************************//**
3033 Do commit-phase steps necessary for the modification of a row.
3034 @return DB_SUCCESS or error code */
3035 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3036 dberr_t
fts_modify(fts_trx_table_t * ftt,fts_trx_row_t * row)3037 fts_modify(
3038 /*=======*/
3039 	fts_trx_table_t*	ftt,		/*!< in: FTS trx table */
3040 	fts_trx_row_t*		row)		/*!< in: row */
3041 {
3042 	dberr_t	error;
3043 
3044 	ut_a(row->state == FTS_MODIFY);
3045 
3046 	error = fts_delete(ftt, row);
3047 
3048 	if (error == DB_SUCCESS) {
3049 		fts_add(ftt, row);
3050 	}
3051 
3052 	return(error);
3053 }
3054 
3055 /*********************************************************************//**
3056 Create a new document id.
3057 @return DB_SUCCESS if all went well else error */
3058 UNIV_INTERN
3059 dberr_t
fts_create_doc_id(dict_table_t * table,dtuple_t * row,mem_heap_t * heap)3060 fts_create_doc_id(
3061 /*==============*/
3062 	dict_table_t*	table,		/*!< in: row is of this table. */
3063 	dtuple_t*	row,		/* in/out: add doc id value to this
3064 					row. This is the current row that is
3065 					being inserted. */
3066 	mem_heap_t*	heap)		/*!< in: heap */
3067 {
3068 	doc_id_t	doc_id;
3069 	dberr_t		error = DB_SUCCESS;
3070 
3071 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
3072 
3073 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
3074 		if (table->fts->cache->first_doc_id == FTS_NULL_DOC_ID) {
3075 			error = fts_get_next_doc_id(table, &doc_id);
3076 		}
3077 		return(error);
3078 	}
3079 
3080 	error = fts_get_next_doc_id(table, &doc_id);
3081 
3082 	if (error == DB_SUCCESS) {
3083 		dfield_t*	dfield;
3084 		doc_id_t*	write_doc_id;
3085 
3086 		ut_a(doc_id > 0);
3087 
3088 		dfield = dtuple_get_nth_field(row, table->fts->doc_col);
3089 		write_doc_id = static_cast<doc_id_t*>(
3090 			mem_heap_alloc(heap, sizeof(*write_doc_id)));
3091 
3092 		ut_a(doc_id != FTS_NULL_DOC_ID);
3093 		ut_a(sizeof(doc_id) == dfield->type.len);
3094 		fts_write_doc_id((byte*) write_doc_id, doc_id);
3095 
3096 		dfield_set_data(dfield, write_doc_id, sizeof(*write_doc_id));
3097 	}
3098 
3099 	return(error);
3100 }
3101 
3102 /*********************************************************************//**
3103 The given transaction is about to be committed; do whatever is necessary
3104 from the FTS system's POV.
3105 @return DB_SUCCESS or error code */
3106 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3107 dberr_t
fts_commit_table(fts_trx_table_t * ftt)3108 fts_commit_table(
3109 /*=============*/
3110 	fts_trx_table_t*	ftt)		/*!< in: FTS table to commit*/
3111 {
3112 	const ib_rbt_node_t*	node;
3113 	ib_rbt_t*		rows;
3114 	dberr_t			error = DB_SUCCESS;
3115 	fts_cache_t*		cache = ftt->table->fts->cache;
3116 	trx_t*			trx = trx_allocate_for_background();
3117 
3118 	rows = ftt->rows;
3119 
3120 	ftt->fts_trx->trx = trx;
3121 
3122 	if (cache->get_docs == NULL) {
3123 		rw_lock_x_lock(&cache->init_lock);
3124 		if (cache->get_docs == NULL) {
3125 			cache->get_docs = fts_get_docs_create(cache);
3126 		}
3127 		rw_lock_x_unlock(&cache->init_lock);
3128 	}
3129 
3130 	for (node = rbt_first(rows);
3131 	     node != NULL && error == DB_SUCCESS;
3132 	     node = rbt_next(rows, node)) {
3133 
3134 		fts_trx_row_t*	row = rbt_value(fts_trx_row_t, node);
3135 
3136 		switch (row->state) {
3137 		case FTS_INSERT:
3138 			fts_add(ftt, row);
3139 			break;
3140 
3141 		case FTS_MODIFY:
3142 			error = fts_modify(ftt, row);
3143 			break;
3144 
3145 		case FTS_DELETE:
3146 			error = fts_delete(ftt, row);
3147 			break;
3148 
3149 		default:
3150 			ut_error;
3151 		}
3152 	}
3153 
3154 	fts_sql_commit(trx);
3155 
3156 	trx_free_for_background(trx);
3157 
3158 	return(error);
3159 }
3160 
3161 /*********************************************************************//**
3162 The given transaction is about to be committed; do whatever is necessary
3163 from the FTS system's POV.
3164 @return DB_SUCCESS or error code */
3165 UNIV_INTERN
3166 dberr_t
fts_commit(trx_t * trx)3167 fts_commit(
3168 /*=======*/
3169 	trx_t*	trx)				/*!< in: transaction */
3170 {
3171 	const ib_rbt_node_t*	node;
3172 	dberr_t			error;
3173 	ib_rbt_t*		tables;
3174 	fts_savepoint_t*	savepoint;
3175 
3176 	savepoint = static_cast<fts_savepoint_t*>(
3177 		ib_vector_last(trx->fts_trx->savepoints));
3178 	tables = savepoint->tables;
3179 
3180 	for (node = rbt_first(tables), error = DB_SUCCESS;
3181 	     node != NULL && error == DB_SUCCESS;
3182 	     node = rbt_next(tables, node)) {
3183 
3184 		fts_trx_table_t**	ftt;
3185 
3186 		ftt = rbt_value(fts_trx_table_t*, node);
3187 
3188 		error = fts_commit_table(*ftt);
3189 	}
3190 
3191 	return(error);
3192 }
3193 
3194 /*********************************************************************//**
3195 Initialize a document. */
3196 UNIV_INTERN
3197 void
fts_doc_init(fts_doc_t * doc)3198 fts_doc_init(
3199 /*=========*/
3200 	fts_doc_t*	doc)			/*!< in: doc to initialize */
3201 {
3202 	mem_heap_t*	heap = mem_heap_create(32);
3203 
3204 	memset(doc, 0, sizeof(*doc));
3205 
3206 	doc->self_heap = ib_heap_allocator_create(heap);
3207 }
3208 
3209 /*********************************************************************//**
3210 Free document. */
3211 UNIV_INTERN
3212 void
fts_doc_free(fts_doc_t * doc)3213 fts_doc_free(
3214 /*=========*/
3215 	fts_doc_t*	doc)			/*!< in: document */
3216 {
3217 	mem_heap_t*	heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
3218 
3219 	if (doc->tokens) {
3220 		rbt_free(doc->tokens);
3221 	}
3222 
3223 #ifdef UNIV_DEBUG
3224 	memset(doc, 0, sizeof(*doc));
3225 #endif /* UNIV_DEBUG */
3226 
3227 	mem_heap_free(heap);
3228 }
3229 
3230 /*********************************************************************//**
3231 Callback function for fetch that stores a row id to the location pointed.
3232 The column's type must be DATA_FIXBINARY, DATA_BINARY_TYPE, length = 8.
3233 @return always returns NULL */
3234 UNIV_INTERN
3235 void*
fts_fetch_row_id(void * row,void * user_arg)3236 fts_fetch_row_id(
3237 /*=============*/
3238 	void*	row,				/*!< in: sel_node_t* */
3239 	void*	user_arg)			/*!< in: data pointer */
3240 {
3241 	sel_node_t*	node = static_cast<sel_node_t*>(row);
3242 
3243 	dfield_t*	dfield = que_node_get_val(node->select_list);
3244 	dtype_t*	type = dfield_get_type(dfield);
3245 	ulint		len = dfield_get_len(dfield);
3246 
3247 	ut_a(dtype_get_mtype(type) == DATA_FIXBINARY);
3248 	ut_a(dtype_get_prtype(type) & DATA_BINARY_TYPE);
3249 	ut_a(len == 8);
3250 
3251 	memcpy(user_arg, dfield_get_data(dfield), 8);
3252 
3253 	return(NULL);
3254 }
3255 
3256 /*********************************************************************//**
3257 Callback function for fetch that stores the text of an FTS document,
3258 converting each column to UTF-16.
3259 @return always FALSE */
3260 UNIV_INTERN
3261 ibool
fts_query_expansion_fetch_doc(void * row,void * user_arg)3262 fts_query_expansion_fetch_doc(
3263 /*==========================*/
3264 	void*		row,			/*!< in: sel_node_t* */
3265 	void*		user_arg)		/*!< in: fts_doc_t* */
3266 {
3267 	que_node_t*	exp;
3268 	sel_node_t*	node = static_cast<sel_node_t*>(row);
3269 	fts_doc_t*	result_doc = static_cast<fts_doc_t*>(user_arg);
3270 	dfield_t*	dfield;
3271 	ulint		len;
3272 	ulint		doc_len;
3273 	fts_doc_t	doc;
3274 	CHARSET_INFO*	doc_charset = NULL;
3275 	ulint		field_no = 0;
3276 
3277 	len = 0;
3278 
3279 	fts_doc_init(&doc);
3280 	doc.found = TRUE;
3281 
3282 	exp = node->select_list;
3283 	doc_len = 0;
3284 
3285 	doc_charset  = result_doc->charset;
3286 
3287 	/* Copy each indexed column content into doc->text.f_str */
3288 	while (exp) {
3289 		dfield = que_node_get_val(exp);
3290 		len = dfield_get_len(dfield);
3291 
3292 		/* NULL column */
3293 		if (len == UNIV_SQL_NULL) {
3294 			exp = que_node_get_next(exp);
3295 			continue;
3296 		}
3297 
3298 		if (!doc_charset) {
3299 			ulint   prtype = dfield->type.prtype;
3300 			doc_charset = innobase_get_fts_charset(
3301 					(int)(prtype & DATA_MYSQL_TYPE_MASK),
3302 					(uint) dtype_get_charset_coll(prtype));
3303 		}
3304 
3305 		doc.charset = doc_charset;
3306 
3307 		if (dfield_is_ext(dfield)) {
3308 			/* We ignore columns that are stored externally, this
3309 			could result in too many words to search */
3310 			exp = que_node_get_next(exp);
3311 			continue;
3312 		} else {
3313 			doc.text.f_n_char = 0;
3314 
3315 			doc.text.f_str = static_cast<byte*>(
3316 				dfield_get_data(dfield));
3317 
3318 			doc.text.f_len = len;
3319 		}
3320 
3321 		if (field_no == 0) {
3322 			fts_tokenize_document(&doc, result_doc);
3323 		} else {
3324 			fts_tokenize_document_next(&doc, doc_len, result_doc);
3325 		}
3326 
3327 		exp = que_node_get_next(exp);
3328 
3329 		doc_len += (exp) ? len + 1 : len;
3330 
3331 		field_no++;
3332 	}
3333 
3334 	ut_ad(doc_charset);
3335 
3336 	if (!result_doc->charset) {
3337 		result_doc->charset = doc_charset;
3338 	}
3339 
3340 	fts_doc_free(&doc);
3341 
3342 	return(FALSE);
3343 }
3344 
3345 /*********************************************************************//**
3346 fetch and tokenize the document. */
3347 static
3348 void
fts_fetch_doc_from_rec(fts_get_doc_t * get_doc,dict_index_t * clust_index,btr_pcur_t * pcur,ulint * offsets,fts_doc_t * doc)3349 fts_fetch_doc_from_rec(
3350 /*===================*/
3351 	fts_get_doc_t*  get_doc,	/*!< in: FTS index's get_doc struct */
3352 	dict_index_t*	clust_index,	/*!< in: cluster index */
3353 	btr_pcur_t*	pcur,		/*!< in: cursor whose position
3354 					has been stored */
3355 	ulint*		offsets,	/*!< in: offsets */
3356 	fts_doc_t*	doc)		/*!< out: fts doc to hold parsed
3357 					documents */
3358 {
3359 	dict_index_t*		index;
3360 	dict_table_t*		table;
3361 	const rec_t*		clust_rec;
3362 	ulint			num_field;
3363 	const dict_field_t*	ifield;
3364 	const dict_col_t*	col;
3365 	ulint			clust_pos;
3366 	ulint			i;
3367 	ulint			doc_len = 0;
3368 	ulint			processed_doc = 0;
3369 
3370 	if (!get_doc) {
3371 		return;
3372 	}
3373 
3374 	index = get_doc->index_cache->index;
3375 	table = get_doc->index_cache->index->table;
3376 
3377 	clust_rec = btr_pcur_get_rec(pcur);
3378 
3379 	num_field = dict_index_get_n_fields(index);
3380 
3381 	for (i = 0; i < num_field; i++) {
3382 		ifield = dict_index_get_nth_field(index, i);
3383 		col = dict_field_get_col(ifield);
3384 		clust_pos = dict_col_get_clust_pos(col, clust_index);
3385 
3386 		if (!get_doc->index_cache->charset) {
3387 			ulint   prtype = ifield->col->prtype;
3388 
3389 			get_doc->index_cache->charset =
3390 				innobase_get_fts_charset(
3391 					(int) (prtype & DATA_MYSQL_TYPE_MASK),
3392 					(uint) dtype_get_charset_coll(prtype));
3393 		}
3394 
3395 		if (rec_offs_nth_extern(offsets, clust_pos)) {
3396 			doc->text.f_str =
3397 				btr_rec_copy_externally_stored_field(
3398 					clust_rec, offsets,
3399 					dict_table_zip_size(table),
3400 					clust_pos, &doc->text.f_len,
3401 					static_cast<mem_heap_t*>(
3402 						doc->self_heap->arg));
3403 		} else {
3404 			doc->text.f_str = (byte*) rec_get_nth_field(
3405 				clust_rec, offsets, clust_pos,
3406 				&doc->text.f_len);
3407 		}
3408 
3409 		doc->found = TRUE;
3410 		doc->charset = get_doc->index_cache->charset;
3411 
3412 		/* Null Field */
3413 		if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3414 			continue;
3415 		}
3416 
3417 		if (processed_doc == 0) {
3418 			fts_tokenize_document(doc, NULL);
3419 		} else {
3420 			fts_tokenize_document_next(doc, doc_len, NULL);
3421 		}
3422 
3423 		processed_doc++;
3424 		doc_len += doc->text.f_len + 1;
3425 	}
3426 }
3427 
3428 /*********************************************************************//**
3429 This function fetches the document inserted during the committing
3430 transaction, and tokenize the inserted text data and insert into
3431 FTS auxiliary table and its cache.
3432 @return TRUE if successful */
3433 static
3434 ulint
fts_add_doc_by_id(fts_trx_table_t * ftt,doc_id_t doc_id,ib_vector_t * fts_indexes MY_ATTRIBUTE ((unused)))3435 fts_add_doc_by_id(
3436 /*==============*/
3437 	fts_trx_table_t*ftt,		/*!< in: FTS trx table */
3438 	doc_id_t	doc_id,		/*!< in: doc id */
3439 	ib_vector_t*	fts_indexes MY_ATTRIBUTE((unused)))
3440 					/*!< in: affected fts indexes */
3441 {
3442 	mtr_t		mtr;
3443 	mem_heap_t*	heap;
3444 	btr_pcur_t	pcur;
3445 	dict_table_t*	table;
3446 	dtuple_t*	tuple;
3447 	dfield_t*       dfield;
3448 	fts_get_doc_t*	get_doc;
3449 	doc_id_t        temp_doc_id;
3450 	dict_index_t*   clust_index;
3451 	dict_index_t*	fts_id_index;
3452 	ibool		is_id_cluster;
3453 	fts_cache_t*   	cache = ftt->table->fts->cache;
3454 
3455 	ut_ad(cache->get_docs);
3456 
3457 	/* If Doc ID has been supplied by the user, then the table
3458 	might not yet be sync-ed */
3459 
3460 	if (!(ftt->table->fts->fts_status & ADDED_TABLE_SYNCED)) {
3461 		fts_init_index(ftt->table, FALSE);
3462 	}
3463 
3464 	/* Get the first FTS index's get_doc */
3465 	get_doc = static_cast<fts_get_doc_t*>(
3466 		ib_vector_get(cache->get_docs, 0));
3467 	ut_ad(get_doc);
3468 
3469 	table = get_doc->index_cache->index->table;
3470 
3471 	heap = mem_heap_create(512);
3472 
3473 	clust_index = dict_table_get_first_index(table);
3474 	fts_id_index = dict_table_get_index_on_name(
3475 				table, FTS_DOC_ID_INDEX_NAME);
3476 
3477 	/* Check whether the index on FTS_DOC_ID is cluster index */
3478 	is_id_cluster = (clust_index == fts_id_index);
3479 
3480 	mtr_start(&mtr);
3481 	btr_pcur_init(&pcur);
3482 
3483 	/* Search based on Doc ID. Here, we'll need to consider the case
3484 	when there is no primary index on Doc ID */
3485 	tuple = dtuple_create(heap, 1);
3486 	dfield = dtuple_get_nth_field(tuple, 0);
3487 	dfield->type.mtype = DATA_INT;
3488 	dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3489 
3490 	mach_write_to_8((byte*) &temp_doc_id, doc_id);
3491 	dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3492 
3493 	btr_pcur_open_with_no_init(
3494 		fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3495 		&pcur, 0, &mtr);
3496 
3497 	/* If we have a match, add the data to doc structure */
3498 	if (btr_pcur_get_low_match(&pcur) == 1) {
3499 		const rec_t*	rec;
3500 		btr_pcur_t*	doc_pcur;
3501 		const rec_t*	clust_rec;
3502 		btr_pcur_t	clust_pcur;
3503 		ulint*		offsets = NULL;
3504 		ulint		num_idx = ib_vector_size(cache->get_docs);
3505 
3506 		rec = btr_pcur_get_rec(&pcur);
3507 
3508 		/* Doc could be deleted */
3509 		if (page_rec_is_infimum(rec)
3510 		    || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3511 
3512 			goto func_exit;
3513 		}
3514 
3515 		if (is_id_cluster) {
3516 			clust_rec = rec;
3517 			doc_pcur = &pcur;
3518 		} else {
3519 			dtuple_t*	clust_ref;
3520 			ulint		n_fields;
3521 
3522 			btr_pcur_init(&clust_pcur);
3523 			n_fields = dict_index_get_n_unique(clust_index);
3524 
3525 			clust_ref = dtuple_create(heap, n_fields);
3526 			dict_index_copy_types(clust_ref, clust_index, n_fields);
3527 
3528 			row_build_row_ref_in_tuple(
3529 				clust_ref, rec, fts_id_index, NULL, NULL);
3530 
3531 			btr_pcur_open_with_no_init(
3532 				clust_index, clust_ref, PAGE_CUR_LE,
3533 				BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3534 
3535 			doc_pcur = &clust_pcur;
3536 			clust_rec = btr_pcur_get_rec(&clust_pcur);
3537 
3538 		}
3539 
3540 		offsets = rec_get_offsets(clust_rec, clust_index,
3541 					  NULL, ULINT_UNDEFINED, &heap);
3542 
3543 		 for (ulint i = 0; i < num_idx; ++i) {
3544 			fts_doc_t       doc;
3545 			dict_table_t*   table;
3546 			fts_get_doc_t*  get_doc;
3547 
3548 			get_doc = static_cast<fts_get_doc_t*>(
3549 				ib_vector_get(cache->get_docs, i));
3550 
3551 			table = get_doc->index_cache->index->table;
3552 
3553 			fts_doc_init(&doc);
3554 
3555 			fts_fetch_doc_from_rec(
3556 				get_doc, clust_index, doc_pcur, offsets, &doc);
3557 
3558 			if (doc.found) {
3559 				ibool	success MY_ATTRIBUTE((unused));
3560 
3561 				btr_pcur_store_position(doc_pcur, &mtr);
3562 				mtr_commit(&mtr);
3563 
3564 				DEBUG_SYNC_C("fts_instrument_sync_cache_wait");
3565 				rw_lock_x_lock(&table->fts->cache->lock);
3566 
3567 				if (table->fts->cache->stopword_info.status
3568 				    & STOPWORD_NOT_INIT) {
3569 					fts_load_stopword(table, NULL, NULL,
3570 							  NULL, TRUE, TRUE);
3571 				}
3572 
3573 				fts_cache_add_doc(
3574 					table->fts->cache,
3575 					get_doc->index_cache,
3576 					doc_id, doc.tokens);
3577 
3578 				bool	need_sync = false;
3579 				if ((cache->total_size > fts_max_cache_size / 10
3580 				     || fts_need_sync)
3581 				    && !cache->sync->in_progress) {
3582 					need_sync = true;
3583 				}
3584 
3585 				rw_lock_x_unlock(&table->fts->cache->lock);
3586 
3587 				DBUG_EXECUTE_IF(
3588                                         "fts_instrument_sync_cache_wait",
3589 					srv_fatal_semaphore_wait_threshold = 25;
3590 					fts_max_cache_size = 100;
3591 					fts_sync(cache->sync, true, true, false);
3592                                 );
3593 
3594 				DBUG_EXECUTE_IF(
3595 					"fts_instrument_sync",
3596 					fts_optimize_request_sync_table(table);
3597 					os_event_wait(cache->sync->event);
3598 				);
3599 
3600 				DBUG_EXECUTE_IF(
3601 					"fts_instrument_sync_debug",
3602 					fts_sync(cache->sync, true, true, false);
3603 				);
3604 
3605 				DEBUG_SYNC_C("fts_instrument_sync_request");
3606 				DBUG_EXECUTE_IF(
3607 					"fts_instrument_sync_request",
3608 					fts_optimize_request_sync_table(table);
3609 				);
3610 
3611 				if (need_sync) {
3612 					fts_optimize_request_sync_table(table);
3613 				}
3614 
3615 				mtr_start(&mtr);
3616 
3617 				if (i < num_idx - 1) {
3618 
3619 					success = btr_pcur_restore_position(
3620 						BTR_SEARCH_LEAF, doc_pcur,
3621 						&mtr);
3622 
3623 					ut_ad(success);
3624 				}
3625 			}
3626 
3627 			fts_doc_free(&doc);
3628 		}
3629 
3630 		if (!is_id_cluster) {
3631 			btr_pcur_close(doc_pcur);
3632 		}
3633 	}
3634 func_exit:
3635 	mtr_commit(&mtr);
3636 
3637 	btr_pcur_close(&pcur);
3638 
3639 	mem_heap_free(heap);
3640 	return(TRUE);
3641 }
3642 
3643 
3644 /*********************************************************************//**
3645 Callback function to read a single ulint column.
3646 return always returns TRUE */
3647 static
3648 ibool
fts_read_ulint(void * row,void * user_arg)3649 fts_read_ulint(
3650 /*===========*/
3651 	void*		row,		/*!< in: sel_node_t* */
3652 	void*		user_arg)	/*!< in: pointer to ulint */
3653 {
3654 	sel_node_t*	sel_node = static_cast<sel_node_t*>(row);
3655 	ulint*		value = static_cast<ulint*>(user_arg);
3656 	que_node_t*	exp = sel_node->select_list;
3657 	dfield_t*	dfield = que_node_get_val(exp);
3658 	void*		data = dfield_get_data(dfield);
3659 
3660 	*value = static_cast<ulint>(mach_read_from_4(
3661 		static_cast<const byte*>(data)));
3662 
3663 	return(TRUE);
3664 }
3665 
3666 /*********************************************************************//**
3667 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3668 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
3669 UNIV_INTERN
3670 doc_id_t
fts_get_max_doc_id(dict_table_t * table)3671 fts_get_max_doc_id(
3672 /*===============*/
3673 	dict_table_t*	table)		/*!< in: user table */
3674 {
3675 	dict_index_t*	index;
3676 	dict_field_t*	dfield MY_ATTRIBUTE((unused)) = NULL;
3677 	doc_id_t	doc_id = 0;
3678 	mtr_t		mtr;
3679 	btr_pcur_t	pcur;
3680 
3681 	index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME);
3682 
3683 	if (!index) {
3684 		return(0);
3685 	}
3686 
3687 	dfield = dict_index_get_nth_field(index, 0);
3688 
3689 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3690 	ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3691 #endif
3692 
3693 	mtr_start(&mtr);
3694 
3695 	/* fetch the largest indexes value */
3696 	btr_pcur_open_at_index_side(
3697 		false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
3698 
3699 	if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3700 		const rec_t*    rec = NULL;
3701 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
3702 		ulint*		offsets = offsets_;
3703 		mem_heap_t*	heap = NULL;
3704 		ulint		len;
3705 		const void*	data;
3706 
3707 		rec_offs_init(offsets_);
3708 
3709 		do {
3710 			rec = btr_pcur_get_rec(&pcur);
3711 
3712 			if (page_rec_is_user_rec(rec)) {
3713 				break;
3714 			}
3715 		} while (btr_pcur_move_to_prev(&pcur, &mtr));
3716 
3717 		if (!rec) {
3718 			goto func_exit;
3719 		}
3720 
3721 		offsets = rec_get_offsets(
3722 			rec, index, offsets, ULINT_UNDEFINED, &heap);
3723 
3724 		data = rec_get_nth_field(rec, offsets, 0, &len);
3725 
3726 		doc_id = static_cast<doc_id_t>(fts_read_doc_id(
3727 			static_cast<const byte*>(data)));
3728 	}
3729 
3730 func_exit:
3731 	btr_pcur_close(&pcur);
3732 	mtr_commit(&mtr);
3733 	return(doc_id);
3734 }
3735 
3736 /*********************************************************************//**
3737 Fetch document with the given document id.
3738 @return DB_SUCCESS if OK else error */
3739 UNIV_INTERN
3740 dberr_t
fts_doc_fetch_by_doc_id(fts_get_doc_t * get_doc,doc_id_t doc_id,dict_index_t * index_to_use,ulint option,fts_sql_callback callback,void * arg)3741 fts_doc_fetch_by_doc_id(
3742 /*====================*/
3743 	fts_get_doc_t*	get_doc,	/*!< in: state */
3744 	doc_id_t	doc_id,		/*!< in: id of document to
3745 					fetch */
3746 	dict_index_t*	index_to_use,	/*!< in: caller supplied FTS index,
3747 					or NULL */
3748 	ulint		option,		/*!< in: search option, if it is
3749 					greater than doc_id or equal */
3750 	fts_sql_callback
3751 			callback,	/*!< in: callback to read */
3752 	void*		arg)		/*!< in: callback arg */
3753 {
3754 	pars_info_t*	info;
3755 	dberr_t		error;
3756 	const char*	select_str;
3757 	doc_id_t	write_doc_id;
3758 	dict_index_t*	index;
3759 	trx_t*		trx = trx_allocate_for_background();
3760 	que_t*          graph;
3761 
3762 	trx->op_info = "fetching indexed FTS document";
3763 
3764 	/* The FTS index can be supplied by caller directly with
3765 	"index_to_use", otherwise, get it from "get_doc" */
3766 	index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3767 
3768 	if (get_doc && get_doc->get_document_graph) {
3769 		info = get_doc->get_document_graph->info;
3770 	} else {
3771 		info = pars_info_create();
3772 	}
3773 
3774 	/* Convert to "storage" byte order. */
3775 	fts_write_doc_id((byte*) &write_doc_id, doc_id);
3776 	fts_bind_doc_id(info, "doc_id", &write_doc_id);
3777 	pars_info_bind_function(info, "my_func", callback, arg);
3778 
3779 	select_str = fts_get_select_columns_str(index, info, info->heap);
3780 	pars_info_bind_id(info, TRUE, "table_name", index->table_name);
3781 
3782 	if (!get_doc || !get_doc->get_document_graph) {
3783 		if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3784 			graph = fts_parse_sql(
3785 				NULL,
3786 				info,
3787 				mem_heap_printf(info->heap,
3788 					"DECLARE FUNCTION my_func;\n"
3789 					"DECLARE CURSOR c IS"
3790 					" SELECT %s FROM $table_name"
3791 					" WHERE %s = :doc_id;\n"
3792 					"BEGIN\n"
3793 					""
3794 					"OPEN c;\n"
3795 					"WHILE 1 = 1 LOOP\n"
3796 					"  FETCH c INTO my_func();\n"
3797 					"  IF c %% NOTFOUND THEN\n"
3798 					"    EXIT;\n"
3799 					"  END IF;\n"
3800 					"END LOOP;\n"
3801 					"CLOSE c;",
3802 					select_str, FTS_DOC_ID_COL_NAME));
3803 		} else {
3804 			ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3805 
3806 			/* This is used for crash recovery of table with
3807 			hidden DOC ID or FTS indexes. We will scan the table
3808 			to re-processing user table rows whose DOC ID or
3809 			FTS indexed documents have not been sync-ed to disc
3810 			during recent crash.
3811 			In the case that all fulltext indexes are dropped
3812 			for a table, we will keep the "hidden" FTS_DOC_ID
3813 			column, and this scan is to retreive the largest
3814 			DOC ID being used in the table to determine the
3815 			appropriate next DOC ID.
3816 			In the case of there exists fulltext index(es), this
3817 			operation will re-tokenize any docs that have not
3818 			been sync-ed to the disk, and re-prime the FTS
3819 			cached */
3820 			graph = fts_parse_sql(
3821 				NULL,
3822 				info,
3823 				mem_heap_printf(info->heap,
3824 					"DECLARE FUNCTION my_func;\n"
3825 					"DECLARE CURSOR c IS"
3826 					" SELECT %s, %s FROM $table_name"
3827 					" WHERE %s > :doc_id;\n"
3828 					"BEGIN\n"
3829 					""
3830 					"OPEN c;\n"
3831 					"WHILE 1 = 1 LOOP\n"
3832 					"  FETCH c INTO my_func();\n"
3833 					"  IF c %% NOTFOUND THEN\n"
3834 					"    EXIT;\n"
3835 					"  END IF;\n"
3836 					"END LOOP;\n"
3837 					"CLOSE c;",
3838 					FTS_DOC_ID_COL_NAME,
3839 					select_str, FTS_DOC_ID_COL_NAME));
3840 		}
3841 		if (get_doc) {
3842 			get_doc->get_document_graph = graph;
3843 		}
3844 	} else {
3845 		graph = get_doc->get_document_graph;
3846 	}
3847 
3848 	error = fts_eval_sql(trx, graph);
3849 
3850 	if (error == DB_SUCCESS) {
3851 		fts_sql_commit(trx);
3852 	} else {
3853 		fts_sql_rollback(trx);
3854 	}
3855 
3856 	trx_free_for_background(trx);
3857 
3858 	if (!get_doc) {
3859 		fts_que_graph_free(graph);
3860 	}
3861 
3862 	return(error);
3863 }
3864 
3865 /*********************************************************************//**
3866 Write out a single word's data as new entry/entries in the INDEX table.
3867 @return DB_SUCCESS if all OK. */
3868 UNIV_INTERN
3869 dberr_t
fts_write_node(trx_t * trx,que_t ** graph,fts_table_t * fts_table,fts_string_t * word,fts_node_t * node)3870 fts_write_node(
3871 /*===========*/
3872 	trx_t*		trx,			/*!< in: transaction */
3873 	que_t**		graph,			/*!< in: query graph */
3874 	fts_table_t*	fts_table,		/*!< in: aux table */
3875 	fts_string_t*	word,			/*!< in: word in UTF-8 */
3876 	fts_node_t*	node)			/*!< in: node columns */
3877 {
3878 	pars_info_t*	info;
3879 	dberr_t		error;
3880 	ib_uint32_t	doc_count;
3881 	ib_time_t	start_time;
3882 	doc_id_t	last_doc_id;
3883 	doc_id_t	first_doc_id;
3884 
3885 	if (*graph) {
3886 		info = (*graph)->info;
3887 	} else {
3888 		info = pars_info_create();
3889 	}
3890 
3891 	pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3892 
3893 	/* Convert to "storage" byte order. */
3894 	fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
3895 	fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3896 
3897 	/* Convert to "storage" byte order. */
3898 	fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
3899 	fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
3900 
3901 	ut_a(node->last_doc_id >= node->first_doc_id);
3902 
3903 	/* Convert to "storage" byte order. */
3904 	mach_write_to_4((byte*) &doc_count, node->doc_count);
3905 	pars_info_bind_int4_literal(
3906 		info, "doc_count", (const ib_uint32_t*) &doc_count);
3907 
3908 	/* Set copy_name to FALSE since it's a static. */
3909 	pars_info_bind_literal(
3910 		info, "ilist", node->ilist, node->ilist_size,
3911 		DATA_BLOB, DATA_BINARY_TYPE);
3912 
3913 	if (!*graph) {
3914 		*graph = fts_parse_sql(
3915 			fts_table,
3916 			info,
3917 			"BEGIN\n"
3918 			"INSERT INTO \"%s\" VALUES "
3919 			"(:token, :first_doc_id,"
3920 			" :last_doc_id, :doc_count, :ilist);");
3921 	}
3922 
3923 	start_time = ut_time();
3924 	error = fts_eval_sql(trx, *graph);
3925 	elapsed_time += ut_time() - start_time;
3926 	++n_nodes;
3927 
3928 	return(error);
3929 }
3930 
3931 /*********************************************************************//**
3932 Add rows to the DELETED_CACHE table.
3933 @return DB_SUCCESS if all went well else error code*/
3934 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3935 dberr_t
fts_sync_add_deleted_cache(fts_sync_t * sync,ib_vector_t * doc_ids)3936 fts_sync_add_deleted_cache(
3937 /*=======================*/
3938 	fts_sync_t*	sync,			/*!< in: sync state */
3939 	ib_vector_t*	doc_ids)		/*!< in: doc ids to add */
3940 {
3941 	ulint		i;
3942 	pars_info_t*	info;
3943 	que_t*		graph;
3944 	fts_table_t	fts_table;
3945 	doc_id_t	dummy = 0;
3946 	dberr_t		error = DB_SUCCESS;
3947 	ulint		n_elems = ib_vector_size(doc_ids);
3948 
3949 	ut_a(ib_vector_size(doc_ids) > 0);
3950 
3951 	ib_vector_sort(doc_ids, fts_update_doc_id_cmp);
3952 
3953 	info = pars_info_create();
3954 
3955 	fts_bind_doc_id(info, "doc_id", &dummy);
3956 
3957 	FTS_INIT_FTS_TABLE(
3958 		&fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
3959 
3960 	graph = fts_parse_sql(
3961 		&fts_table,
3962 		info,
3963 		"BEGIN INSERT INTO \"%s\" VALUES (:doc_id);");
3964 
3965 	for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
3966 		fts_update_t*	update;
3967 		doc_id_t	write_doc_id;
3968 
3969 		update = static_cast<fts_update_t*>(ib_vector_get(doc_ids, i));
3970 
3971 		/* Convert to "storage" byte order. */
3972 		fts_write_doc_id((byte*) &write_doc_id, update->doc_id);
3973 		fts_bind_doc_id(info, "doc_id", &write_doc_id);
3974 
3975 		error = fts_eval_sql(sync->trx, graph);
3976 	}
3977 
3978 	fts_que_graph_free(graph);
3979 
3980 	return(error);
3981 }
3982 
3983 /** Write the words and ilist to disk.
3984 @param[in,out]	trx		transaction
3985 @param[in]	index_cache	index cache
3986 @param[in]	unlock_cache	whether unlock cache when write node
3987 @param[in]      sync_start_time Holds the timestamp of start of sync
3988                                 for deducing the length of sync time
3989 @return DB_SUCCESS if all went well else error code */
3990 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3991 dberr_t
fts_sync_write_words(trx_t * trx,fts_index_cache_t * index_cache,bool unlock_cache,ib_time_t sync_start_time)3992 fts_sync_write_words(
3993 	trx_t*			trx,
3994 	fts_index_cache_t*	index_cache,
3995 	bool			unlock_cache,
3996 	ib_time_t               sync_start_time)
3997 {
3998 	fts_table_t	fts_table;
3999 	ulint		n_nodes = 0;
4000 	ulint		n_words = 0;
4001 	const ib_rbt_node_t* rbt_node;
4002 	dberr_t		error = DB_SUCCESS;
4003 	ibool		print_error = FALSE;
4004 	dict_table_t*	table = index_cache->index->table;
4005 	/* We use this to deduce threshold value of time
4006 	that we can let sync to go on holding cache lock */
4007         const float cutoff = 0.98;
4008         ulint           lock_threshold =
4009                         (srv_fatal_semaphore_wait_threshold % SRV_SEMAPHORE_WAIT_EXTENSION)
4010                         * cutoff;
4011 	bool            timeout_extended = false;
4012 #ifdef FTS_DOC_STATS_DEBUG
4013 	ulint		n_new_words = 0;
4014 #endif /* FTS_DOC_STATS_DEBUG */
4015 
4016 	FTS_INIT_INDEX_TABLE(
4017 		&fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
4018 
4019 	n_words = rbt_size(index_cache->words);
4020 
4021 	/* We iterate over the entire tree, even if there is an error,
4022 	since we want to free the memory used during caching. */
4023 	for (rbt_node = rbt_first(index_cache->words);
4024 	     rbt_node;
4025 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
4026 
4027 		ulint			i;
4028 		ulint			selected;
4029 		fts_tokenizer_word_t*	word;
4030 
4031 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
4032 
4033 		selected = fts_select_index(
4034 			index_cache->charset, word->text.f_str,
4035 			word->text.f_len);
4036 
4037 		fts_table.suffix = fts_get_suffix(selected);
4038 
4039 #ifdef FTS_DOC_STATS_DEBUG
4040 		/* Check if the word exists in the FTS index and if not
4041 		then we need to increment the total word count stats. */
4042 		if (error == DB_SUCCESS && fts_enable_diag_print) {
4043 			ibool	found = FALSE;
4044 
4045 			error = fts_is_word_in_index(
4046 				trx,
4047 				&index_cache->sel_graph[selected],
4048 				&fts_table,
4049 				&word->text, &found);
4050 
4051 			if (error == DB_SUCCESS && !found) {
4052 
4053 				++n_new_words;
4054 			}
4055 		}
4056 #endif /* FTS_DOC_STATS_DEBUG */
4057 
4058 		/* We iterate over all the nodes even if there was an error */
4059 		for (i = 0; i < ib_vector_size(word->nodes); ++i) {
4060 
4061 			fts_node_t* fts_node = static_cast<fts_node_t*>(
4062 				ib_vector_get(word->nodes, i));
4063 
4064 			if (fts_node->synced) {
4065 				continue;
4066 			} else {
4067 				fts_node->synced = true;
4068 			}
4069 
4070 			/*FIXME: we need to handle the error properly. */
4071 			if (error == DB_SUCCESS) {
4072 				DBUG_EXECUTE_IF("fts_instrument_sync_write",
4073                                                os_thread_sleep(10000000););
4074 
4075 				if (!unlock_cache) {
4076                                         ulint cache_lock_time = ut_time() - sync_start_time;
4077                                         if (cache_lock_time > lock_threshold) {
4078                                                 if (!timeout_extended) {
4079                                                         os_atomic_increment_ulint(
4080                                                         &srv_fatal_semaphore_wait_threshold,
4081                                                         SRV_SEMAPHORE_WAIT_EXTENSION);
4082                                                         timeout_extended = true;
4083                                                         lock_threshold +=
4084                                                         SRV_SEMAPHORE_WAIT_EXTENSION;
4085                                                 } else {
4086                                                         unlock_cache = true;
4087                                                         os_atomic_decrement_ulint(
4088                                                         &srv_fatal_semaphore_wait_threshold,
4089                                                         SRV_SEMAPHORE_WAIT_EXTENSION);
4090                                                         timeout_extended = false;
4091 
4092                                                 }
4093                                         }
4094                                 }
4095 
4096 				if (unlock_cache) {
4097 					rw_lock_x_unlock(
4098 						&table->fts->cache->lock);
4099 				}
4100 
4101 				error = fts_write_node(
4102 					trx,
4103 					&index_cache->ins_graph[selected],
4104 					&fts_table, &word->text, fts_node);
4105 
4106 				DBUG_EXECUTE_IF("fts_instrument_sync_write",
4107                                                 os_thread_sleep(15000000););
4108 
4109 				DEBUG_SYNC_C("fts_write_node");
4110 				DBUG_EXECUTE_IF("fts_write_node_crash",
4111 					DBUG_SUICIDE(););
4112 
4113 				DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
4114 					os_thread_sleep(1000000);
4115 				);
4116 
4117 				if (unlock_cache) {
4118 					rw_lock_x_lock(
4119 						&table->fts->cache->lock);
4120 				}
4121 			}
4122 		}
4123 
4124 		n_nodes += ib_vector_size(word->nodes);
4125 
4126 		if (error != DB_SUCCESS && !print_error) {
4127 			ut_print_timestamp(stderr);
4128 			fprintf(stderr, "  InnoDB: Error (%s) writing "
4129 				"word node to FTS auxiliary index "
4130 				"table.\n", ut_strerr(error));
4131 
4132 			print_error = TRUE;
4133 		}
4134 	}
4135 
4136 #ifdef FTS_DOC_STATS_DEBUG
4137 	if (error == DB_SUCCESS && n_new_words > 0 && fts_enable_diag_print) {
4138 		fts_table_t	fts_table;
4139 
4140 		FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
4141 
4142 		/* Increment the total number of words in the FTS index */
4143 		error = fts_config_increment_index_value(
4144 			trx, index_cache->index, FTS_TOTAL_WORD_COUNT,
4145 			n_new_words);
4146 	}
4147 #endif /* FTS_DOC_STATS_DEBUG */
4148 
4149 	if (fts_enable_diag_print) {
4150 		printf("Avg number of nodes: %lf\n",
4151 		       (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
4152 	}
4153 
4154 	return(error);
4155 }
4156 
4157 #ifdef FTS_DOC_STATS_DEBUG
4158 /*********************************************************************//**
4159 Write a single documents statistics to disk.
4160 @return DB_SUCCESS if all went well else error code */
4161 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4162 dberr_t
fts_sync_write_doc_stat(trx_t * trx,dict_index_t * index,que_t ** graph,const fts_doc_stats_t * doc_stat)4163 fts_sync_write_doc_stat(
4164 /*====================*/
4165 	trx_t*			trx,		/*!< in: transaction */
4166 	dict_index_t*		index,		/*!< in: index */
4167 	que_t**			graph,		/* out: query graph */
4168 	const fts_doc_stats_t*	doc_stat)	/*!< in: doc stats to write */
4169 {
4170 	pars_info_t*	info;
4171 	doc_id_t	doc_id;
4172 	dberr_t		error = DB_SUCCESS;
4173 	ib_uint32_t	word_count;
4174 
4175 	if (*graph) {
4176 		info = (*graph)->info;
4177 	} else {
4178 		info = pars_info_create();
4179 	}
4180 
4181 	/* Convert to "storage" byte order. */
4182 	mach_write_to_4((byte*) &word_count, doc_stat->word_count);
4183 	pars_info_bind_int4_literal(
4184 		info, "count", (const ib_uint32_t*) &word_count);
4185 
4186 	/* Convert to "storage" byte order. */
4187 	fts_write_doc_id((byte*) &doc_id, doc_stat->doc_id);
4188 	fts_bind_doc_id(info, "doc_id", &doc_id);
4189 
4190 	if (!*graph) {
4191 		fts_table_t	fts_table;
4192 
4193 		FTS_INIT_INDEX_TABLE(
4194 			&fts_table, "DOC_ID", FTS_INDEX_TABLE, index);
4195 
4196 		*graph = fts_parse_sql(
4197 			&fts_table,
4198 			info,
4199 			"BEGIN INSERT INTO \"%s\" VALUES (:doc_id, :count);");
4200 	}
4201 
4202 	for (;;) {
4203 		error = fts_eval_sql(trx, *graph);
4204 
4205 		if (error == DB_SUCCESS) {
4206 
4207 			break;				/* Exit the loop. */
4208 		} else {
4209 			ut_print_timestamp(stderr);
4210 
4211 			if (error == DB_LOCK_WAIT_TIMEOUT) {
4212 				fprintf(stderr, "  InnoDB: Warning: lock wait "
4213 					"timeout writing to FTS doc_id. "
4214 					"Retrying!\n");
4215 
4216 				trx->error_state = DB_SUCCESS;
4217 			} else {
4218 				fprintf(stderr, "  InnoDB: Error: (%s) "
4219 					"while writing to FTS doc_id.\n",
4220 					ut_strerr(error));
4221 
4222 				break;			/* Exit the loop. */
4223 			}
4224 		}
4225 	}
4226 
4227 	return(error);
4228 }
4229 
4230 /*********************************************************************//**
4231 Write document statistics to disk.
4232 @return DB_SUCCESS if all OK */
4233 static
4234 ulint
fts_sync_write_doc_stats(trx_t * trx,const fts_index_cache_t * index_cache)4235 fts_sync_write_doc_stats(
4236 /*=====================*/
4237 	trx_t*			trx,		/*!< in: transaction */
4238 	const fts_index_cache_t*index_cache)	/*!< in: index cache */
4239 {
4240 	dberr_t		error = DB_SUCCESS;
4241 	que_t*		graph = NULL;
4242 	fts_doc_stats_t*  doc_stat;
4243 
4244 	if (ib_vector_is_empty(index_cache->doc_stats)) {
4245 		return(DB_SUCCESS);
4246 	}
4247 
4248 	doc_stat = static_cast<ts_doc_stats_t*>(
4249 		ib_vector_pop(index_cache->doc_stats));
4250 
4251 	while (doc_stat) {
4252 		error = fts_sync_write_doc_stat(
4253 			trx, index_cache->index, &graph, doc_stat);
4254 
4255 		if (error != DB_SUCCESS) {
4256 			break;
4257 		}
4258 
4259 		if (ib_vector_is_empty(index_cache->doc_stats)) {
4260 			break;
4261 		}
4262 
4263 		doc_stat = static_cast<ts_doc_stats_t*>(
4264 			ib_vector_pop(index_cache->doc_stats));
4265 	}
4266 
4267 	if (graph != NULL) {
4268 		fts_que_graph_free_check_lock(NULL, index_cache, graph);
4269 	}
4270 
4271 	return(error);
4272 }
4273 
4274 /*********************************************************************//**
4275 Callback to check the existince of a word.
4276 @return always return NULL */
4277 static
4278 ibool
fts_lookup_word(void * row,void * user_arg)4279 fts_lookup_word(
4280 /*============*/
4281 	void*	row,				/*!< in:  sel_node_t* */
4282 	void*	user_arg)			/*!< in:  fts_doc_t* */
4283 {
4284 
4285 	que_node_t*	exp;
4286 	sel_node_t*	node = static_cast<sel_node_t*>(row);
4287 	ibool*		found = static_cast<ibool*>(user_arg);
4288 
4289 	exp = node->select_list;
4290 
4291 	while (exp) {
4292 		dfield_t*	dfield = que_node_get_val(exp);
4293 		ulint		len = dfield_get_len(dfield);
4294 
4295 		if (len != UNIV_SQL_NULL && len != 0) {
4296 			*found = TRUE;
4297 		}
4298 
4299 		exp = que_node_get_next(exp);
4300 	}
4301 
4302 	return(FALSE);
4303 }
4304 
4305 /*********************************************************************//**
4306 Check whether a particular word (term) exists in the FTS index.
4307 @return DB_SUCCESS if all went well else error code */
4308 static
4309 dberr_t
fts_is_word_in_index(trx_t * trx,que_t ** graph,fts_table_t * fts_table,const fts_string_t * word,ibool * found)4310 fts_is_word_in_index(
4311 /*=================*/
4312 	trx_t*		trx,			/*!< in: FTS query state */
4313 	que_t**		graph,			/* out: Query graph */
4314 	fts_table_t*	fts_table,		/*!< in: table instance */
4315 	const fts_string_t*
4316 			word,			/*!< in: the word to check */
4317 	ibool*		found)			/* out: TRUE if exists */
4318 {
4319 	pars_info_t*	info;
4320 	dberr_t		error;
4321 
4322 	trx->op_info = "looking up word in FTS index";
4323 
4324 	if (*graph) {
4325 		info = (*graph)->info;
4326 	} else {
4327 		info = pars_info_create();
4328 	}
4329 
4330 	pars_info_bind_function(info, "my_func", fts_lookup_word, found);
4331 	pars_info_bind_varchar_literal(info, "word", word->f_str, word->f_len);
4332 
4333 	if (*graph == NULL) {
4334 		*graph = fts_parse_sql(
4335 			fts_table,
4336 			info,
4337 			"DECLARE FUNCTION my_func;\n"
4338 			"DECLARE CURSOR c IS"
4339 			" SELECT doc_count\n"
4340 			" FROM \"%s\"\n"
4341 			" WHERE word = :word "
4342 			" ORDER BY first_doc_id;\n"
4343 			"BEGIN\n"
4344 			"\n"
4345 			"OPEN c;\n"
4346 			"WHILE 1 = 1 LOOP\n"
4347 			"  FETCH c INTO my_func();\n"
4348 			"  IF c % NOTFOUND THEN\n"
4349 			"    EXIT;\n"
4350 			"  END IF;\n"
4351 			"END LOOP;\n"
4352 			"CLOSE c;");
4353 	}
4354 
4355 	for (;;) {
4356 		error = fts_eval_sql(trx, *graph);
4357 
4358 		if (error == DB_SUCCESS) {
4359 
4360 			break;				/* Exit the loop. */
4361 		} else {
4362 			ut_print_timestamp(stderr);
4363 
4364 			if (error == DB_LOCK_WAIT_TIMEOUT) {
4365 				fprintf(stderr, "  InnoDB: Warning: lock wait "
4366 					"timeout reading FTS index. "
4367 					"Retrying!\n");
4368 
4369 				trx->error_state = DB_SUCCESS;
4370 			} else {
4371 				fprintf(stderr, "  InnoDB: Error: (%s) "
4372 					"while reading FTS index.\n",
4373 					ut_strerr(error));
4374 
4375 				break;			/* Exit the loop. */
4376 			}
4377 		}
4378 	}
4379 
4380 	return(error);
4381 }
4382 #endif /* FTS_DOC_STATS_DEBUG */
4383 
4384 /*********************************************************************//**
4385 Begin Sync, create transaction, acquire locks, etc. */
4386 static
4387 void
fts_sync_begin(fts_sync_t * sync)4388 fts_sync_begin(
4389 /*===========*/
4390 	fts_sync_t*	sync)			/*!< in: sync state */
4391 {
4392 	fts_cache_t*	cache = sync->table->fts->cache;
4393 
4394 	n_nodes = 0;
4395 	elapsed_time = 0;
4396 
4397 	sync->start_time = ut_time();
4398 
4399 	sync->trx = trx_allocate_for_background();
4400 
4401 	if (fts_enable_diag_print) {
4402 		ib_logf(IB_LOG_LEVEL_INFO,
4403 			"FTS SYNC for table %s, deleted count: %ld size: "
4404 			"%lu bytes",
4405 			sync->table->name,
4406 			ib_vector_size(cache->deleted_doc_ids),
4407 			cache->total_size);
4408 	}
4409 }
4410 
4411 /*********************************************************************//**
4412 Run SYNC on the table, i.e., write out data from the index specific
4413 cache to the FTS aux INDEX table and FTS aux doc id stats table.
4414 @return DB_SUCCESS if all OK */
4415 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4416 dberr_t
fts_sync_index(fts_sync_t * sync,fts_index_cache_t * index_cache)4417 fts_sync_index(
4418 /*===========*/
4419 	fts_sync_t*		sync,		/*!< in: sync state */
4420 	fts_index_cache_t*	index_cache)	/*!< in: index cache */
4421 {
4422 	trx_t*		trx = sync->trx;
4423 	dberr_t		error = DB_SUCCESS;
4424 
4425 	trx->op_info = "doing SYNC index";
4426 
4427 	if (fts_enable_diag_print) {
4428 		ib_logf(IB_LOG_LEVEL_INFO,
4429 			"SYNC words: %ld", rbt_size(index_cache->words));
4430 	}
4431 
4432 	ut_ad(rbt_validate(index_cache->words));
4433 
4434 	error = fts_sync_write_words(sync->trx, index_cache, sync->unlock_cache,
4435 				    sync->start_time);
4436 
4437 #ifdef FTS_DOC_STATS_DEBUG
4438 	/* FTS_RESOLVE: the word counter info in auxiliary table "DOC_ID"
4439 	is not used currently for ranking. We disable fts_sync_write_doc_stats()
4440 	for now */
4441 	/* Write the per doc statistics that will be used for ranking. */
4442 	if (error == DB_SUCCESS) {
4443 
4444 		error = fts_sync_write_doc_stats(trx, index_cache);
4445 	}
4446 #endif /* FTS_DOC_STATS_DEBUG */
4447 
4448 	return(error);
4449 }
4450 
4451 /** Check if index cache has been synced completely
4452 @param[in,out]	index_cache	index cache
4453 @return true if index is synced, otherwise false. */
4454 static
4455 bool
fts_sync_index_check(fts_index_cache_t * index_cache)4456 fts_sync_index_check(
4457 	fts_index_cache_t*	index_cache)
4458 {
4459 	const ib_rbt_node_t*	rbt_node;
4460 
4461 	for (rbt_node = rbt_first(index_cache->words);
4462 	     rbt_node != NULL;
4463 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
4464 
4465 		fts_tokenizer_word_t*	word;
4466 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
4467 
4468 		fts_node_t*	fts_node;
4469 		fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4470 
4471 		if (!fts_node->synced) {
4472 			return(false);
4473 		}
4474 	}
4475 
4476 	return(true);
4477 }
4478 
4479 /** Reset synced flag in index cache when rollback
4480 @param[in,out]	index_cache	index cache */
4481 static
4482 void
fts_sync_index_reset(fts_index_cache_t * index_cache)4483 fts_sync_index_reset(
4484 	fts_index_cache_t*	index_cache)
4485 {
4486 	const ib_rbt_node_t*	rbt_node;
4487 
4488 	for (rbt_node = rbt_first(index_cache->words);
4489 	     rbt_node != NULL;
4490 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
4491 
4492 		fts_tokenizer_word_t*	word;
4493 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
4494 
4495 		fts_node_t*	fts_node;
4496 		fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4497 
4498 		fts_node->synced = false;
4499 	}
4500 }
4501 
4502 /** Commit the SYNC, change state of processed doc ids etc.
4503 @param[in,out]	sync	sync state
4504 @return DB_SUCCESS if all OK */
4505 static  MY_ATTRIBUTE((nonnull, warn_unused_result))
4506 dberr_t
fts_sync_commit(fts_sync_t * sync)4507 fts_sync_commit(
4508 	fts_sync_t*	sync)
4509 {
4510 	dberr_t		error;
4511 	trx_t*		trx = sync->trx;
4512 	fts_cache_t*	cache = sync->table->fts->cache;
4513 	doc_id_t	last_doc_id;
4514 
4515 	trx->op_info = "doing SYNC commit";
4516 
4517 	/* After each Sync, update the CONFIG table about the max doc id
4518 	we just sync-ed to index table */
4519 	error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4520 					&last_doc_id);
4521 
4522 	/* Get the list of deleted documents that are either in the
4523 	cache or were headed there but were deleted before the add
4524 	thread got to them. */
4525 
4526 	if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4527 
4528 		error = fts_sync_add_deleted_cache(
4529 			sync, cache->deleted_doc_ids);
4530 	}
4531 
4532 	/* We need to do this within the deleted lock since fts_delete() can
4533 	attempt to add a deleted doc id to the cache deleted id array. */
4534 	fts_cache_clear(cache);
4535 	DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4536 	fts_cache_init(cache);
4537 	rw_lock_x_unlock(&cache->lock);
4538 
4539 	if (error == DB_SUCCESS) {
4540 
4541 		fts_sql_commit(trx);
4542 
4543 	} else if (error != DB_SUCCESS) {
4544 
4545 		fts_sql_rollback(trx);
4546 
4547 		ut_print_timestamp(stderr);
4548 		fprintf(stderr, "  InnoDB: Error: (%s) during SYNC.\n",
4549 			ut_strerr(error));
4550 	}
4551 
4552 	if (fts_enable_diag_print && elapsed_time) {
4553 		ib_logf(IB_LOG_LEVEL_INFO,
4554 			"SYNC for table %s: SYNC time : %lu secs: "
4555 			"elapsed %lf ins/sec",
4556 			sync->table->name,
4557 			(ulong) (ut_time() - sync->start_time),
4558 			(double) n_nodes/ (double) elapsed_time);
4559 	}
4560 
4561 	/* Avoid assertion in trx_free(). */
4562 	trx->dict_operation_lock_mode = 0;
4563 	trx_free_for_background(trx);
4564 
4565 	return(error);
4566 }
4567 
4568 /** Rollback a sync operation
4569 @param[in,out]	sync	sync state */
4570 static
4571 void
fts_sync_rollback(fts_sync_t * sync)4572 fts_sync_rollback(
4573 	fts_sync_t*	sync)
4574 {
4575 	trx_t*		trx = sync->trx;
4576 	fts_cache_t*	cache = sync->table->fts->cache;
4577 
4578 	for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4579 		ulint			j;
4580 		fts_index_cache_t*	index_cache;
4581 
4582 		index_cache = static_cast<fts_index_cache_t*>(
4583 			ib_vector_get(cache->indexes, i));
4584 
4585 		/* Reset synced flag so nodes will not be skipped
4586 		in the next sync, see fts_sync_write_words(). */
4587 		fts_sync_index_reset(index_cache);
4588 
4589 		for (j = 0; fts_index_selector[j].value; ++j) {
4590 
4591 			if (index_cache->ins_graph[j] != NULL) {
4592 
4593 				fts_que_graph_free_check_lock(
4594 					NULL, index_cache,
4595 					index_cache->ins_graph[j]);
4596 
4597 				index_cache->ins_graph[j] = NULL;
4598 			}
4599 
4600 			if (index_cache->sel_graph[j] != NULL) {
4601 
4602 				fts_que_graph_free_check_lock(
4603 					NULL, index_cache,
4604 					index_cache->sel_graph[j]);
4605 
4606 				index_cache->sel_graph[j] = NULL;
4607 			}
4608 		}
4609 	}
4610 
4611 	rw_lock_x_unlock(&cache->lock);
4612 
4613 	fts_sql_rollback(trx);
4614 
4615 	/* Avoid assertion in trx_free(). */
4616 	trx->dict_operation_lock_mode = 0;
4617 	trx_free_for_background(trx);
4618 }
4619 
4620 /** Run SYNC on the table, i.e., write out data from the cache to the
4621 FTS auxiliary INDEX table and clear the cache at the end.
4622 @param[in,out]	sync		sync state
4623 @param[in]	unlock_cache	whether unlock cache lock when write node
4624 @param[in]	wait		whether wait when a sync is in progress
4625 @param[in]      has_dict        whether has dict operation lock
4626 @return DB_SUCCESS if all OK */
4627 static
4628 dberr_t
fts_sync(fts_sync_t * sync,bool unlock_cache,bool wait,bool has_dict)4629 fts_sync(
4630 	fts_sync_t*	sync,
4631 	bool		unlock_cache,
4632 	bool		wait,
4633 	bool		has_dict)
4634 {
4635 	ulint		i;
4636 	dberr_t		error = DB_SUCCESS;
4637 	fts_cache_t*	cache = sync->table->fts->cache;
4638 
4639 	rw_lock_x_lock(&cache->lock);
4640 
4641 	/* Check if cache is being synced.
4642 	Note: we release cache lock in fts_sync_write_words() to
4643 	avoid long wait for the lock by other threads. */
4644 	while (sync->in_progress) {
4645 		rw_lock_x_unlock(&cache->lock);
4646 
4647 		if (wait) {
4648 			os_event_wait(sync->event);
4649 		} else {
4650 			return(DB_SUCCESS);
4651 		}
4652 
4653 		rw_lock_x_lock(&cache->lock);
4654 	}
4655 
4656 	sync->unlock_cache = unlock_cache;
4657 	sync->in_progress = true;
4658 
4659 	DEBUG_SYNC_C("fts_sync_begin");
4660 	fts_sync_begin(sync);
4661 
4662 	/* When sync in background, we hold dict operation lock
4663 	to prevent DDL like DROP INDEX, etc. */
4664 	if (has_dict) {
4665 		sync->trx->dict_operation_lock_mode = RW_S_LATCH;
4666 	}
4667 
4668 begin_sync:
4669 	if (cache->total_size > fts_max_cache_size) {
4670 		/* Avoid the case: sync never finish when
4671 		insert/update keeps comming. */
4672 		ut_ad(sync->unlock_cache);
4673 		sync->unlock_cache = false;
4674 	}
4675 	DEBUG_SYNC_C("fts_instrument_sync");
4676 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4677 		fts_index_cache_t*	index_cache;
4678 
4679 		index_cache = static_cast<fts_index_cache_t*>(
4680 			ib_vector_get(cache->indexes, i));
4681 
4682 		if (index_cache->index->to_be_dropped
4683 		   || index_cache->index->table->to_be_dropped) {
4684 			continue;
4685 		}
4686 
4687 		index_cache->index->index_fts_syncing = true;
4688 		DBUG_EXECUTE_IF("fts_instrument_sync_sleep_drop_waits",
4689 				os_thread_sleep(10000000);
4690 				);
4691 
4692 		error = fts_sync_index(sync, index_cache);
4693 
4694 		if (error != DB_SUCCESS && !sync->interrupted) {
4695 
4696 			goto end_sync;
4697 		}
4698 	}
4699 
4700 	DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
4701 			sync->interrupted = true;
4702 			error = DB_INTERRUPTED;
4703 			goto end_sync;
4704 	);
4705 
4706 	/* Make sure all the caches are synced. */
4707 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4708 		fts_index_cache_t*	index_cache;
4709 
4710 		index_cache = static_cast<fts_index_cache_t*>(
4711 			ib_vector_get(cache->indexes, i));
4712 
4713 		if (index_cache->index->to_be_dropped
4714 		    || fts_sync_index_check(index_cache)) {
4715 			continue;
4716 		}
4717 
4718 		goto begin_sync;
4719 	}
4720 
4721 end_sync:
4722 	if (error == DB_SUCCESS && !sync->interrupted) {
4723 		error = fts_sync_commit(sync);
4724 		if (error == DB_SUCCESS) {
4725 			for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4726 				fts_index_cache_t*      index_cache;
4727 				index_cache = static_cast<fts_index_cache_t*>(
4728 					ib_vector_get(cache->indexes, i));
4729 				if (index_cache->index->index_fts_syncing) {
4730 					index_cache->index->index_fts_syncing
4731 								= false;
4732 				}
4733 			}
4734 		}
4735 	}  else {
4736 		fts_sync_rollback(sync);
4737 	}
4738 
4739 	rw_lock_x_lock(&cache->lock);
4740 	/* Clear fts syncing flags of any indexes incase sync is
4741 	interrupeted */
4742 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4743 		fts_index_cache_t*      index_cache;
4744 		index_cache = static_cast<fts_index_cache_t*>(
4745                       ib_vector_get(cache->indexes, i));
4746 		if (index_cache->index->index_fts_syncing == true) {
4747 			index_cache->index->index_fts_syncing = false;
4748                   }
4749 	}
4750 
4751 	sync->interrupted = false;
4752 	sync->in_progress = false;
4753 	os_event_set(sync->event);
4754 	rw_lock_x_unlock(&cache->lock);
4755 
4756 	/* We need to check whether an optimize is required, for that
4757 	we make copies of the two variables that control the trigger. These
4758 	variables can change behind our back and we don't want to hold the
4759 	lock for longer than is needed. */
4760 	mutex_enter(&cache->deleted_lock);
4761 
4762 	cache->added = 0;
4763 	cache->deleted = 0;
4764 
4765 	mutex_exit(&cache->deleted_lock);
4766 
4767 	return(error);
4768 }
4769 
4770 /** Run SYNC on the table, i.e., write out data from the cache to the
4771 FTS auxiliary INDEX table and clear the cache at the end.
4772 @param[in,out]	table		fts table
4773 @param[in]	unlock_cache	whether unlock cache when write node
4774 @param[in]	wait		whether wait for existing sync to finish
4775 @param[in]	has_dict	whether has dict operation lock
4776 @return DB_SUCCESS on success, error code on failure. */
4777 UNIV_INTERN
4778 dberr_t
fts_sync_table(dict_table_t * table,bool unlock_cache,bool wait,bool has_dict)4779 fts_sync_table(
4780 	dict_table_t*	table,
4781 	bool		unlock_cache,
4782 	bool		wait,
4783 	bool		has_dict)
4784 {
4785 	dberr_t	err = DB_SUCCESS;
4786 
4787 	ut_ad(table->fts);
4788 
4789 	if (!dict_table_is_discarded(table) && table->fts->cache) {
4790 		err = fts_sync(table->fts->cache->sync,
4791 			       unlock_cache, wait, has_dict);
4792 	}
4793 
4794 	return(err);
4795 }
4796 
4797 /********************************************************************
4798 Process next token from document starting at the given position, i.e., add
4799 the token's start position to the token's list of positions.
4800 @return number of characters handled in this call */
4801 static
4802 ulint
fts_process_token(fts_doc_t * doc,fts_doc_t * result,ulint start_pos,ulint add_pos)4803 fts_process_token(
4804 /*==============*/
4805 	fts_doc_t*	doc,		/* in/out: document to
4806 					tokenize */
4807 	fts_doc_t*	result,		/* out: if provided, save
4808 					result here */
4809 	ulint		start_pos,	/*!< in: start position in text */
4810 	ulint		add_pos)	/*!< in: add this position to all
4811 					tokens from this tokenization */
4812 {
4813 	ulint		ret;
4814 	fts_string_t	str;
4815 	ulint		offset = 0;
4816 	fts_doc_t*	result_doc;
4817 
4818 	/* Determine where to save the result. */
4819 	result_doc = (result) ? result : doc;
4820 
4821 	/* The length of a string in characters is set here only. */
4822 	ret = innobase_mysql_fts_get_token(
4823 		doc->charset, doc->text.f_str + start_pos,
4824 		doc->text.f_str + doc->text.f_len, &str, &offset);
4825 
4826 	/* Ignore string whose character number is less than
4827 	"fts_min_token_size" or more than "fts_max_token_size" */
4828 
4829 	if (str.f_n_char >= fts_min_token_size
4830 	    && str.f_n_char <= fts_max_token_size) {
4831 
4832 		mem_heap_t*	heap;
4833 		fts_string_t	t_str;
4834 		fts_token_t*	token;
4835 		ib_rbt_bound_t	parent;
4836 		ulint		newlen;
4837 
4838 		heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
4839 
4840 		t_str.f_n_char = str.f_n_char;
4841 
4842 		t_str.f_len = str.f_len * doc->charset->casedn_multiply + 1;
4843 
4844 		t_str.f_str = static_cast<byte*>(
4845 			mem_heap_alloc(heap, t_str.f_len));
4846 
4847 		/* For binary collations, a case sensitive search is
4848 		performed. Hence don't convert to lower case. */
4849 		if (my_binary_compare(result_doc->charset)) {
4850 			memcpy(t_str.f_str, str.f_str, str.f_len);
4851 			t_str.f_str[str.f_len]= 0;
4852 			newlen= str.f_len;
4853 		} else {
4854 			newlen = innobase_fts_casedn_str(
4855 				doc->charset, (char*) str.f_str, str.f_len,
4856 				(char*) t_str.f_str, t_str.f_len);
4857 		}
4858 
4859 		t_str.f_len = newlen;
4860 		t_str.f_str[newlen] = 0;
4861 
4862 		/* Add the word to the document statistics. If the word
4863 		hasn't been seen before we create a new entry for it. */
4864 		if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
4865 			fts_token_t	new_token;
4866 
4867 			new_token.text.f_len = newlen;
4868 			new_token.text.f_str = t_str.f_str;
4869 			new_token.text.f_n_char = t_str.f_n_char;
4870 
4871 			new_token.positions = ib_vector_create(
4872 				result_doc->self_heap, sizeof(ulint), 32);
4873 
4874 			ut_a(new_token.text.f_n_char >= fts_min_token_size);
4875 			ut_a(new_token.text.f_n_char <= fts_max_token_size);
4876 
4877 			parent.last = rbt_add_node(
4878 				result_doc->tokens, &parent, &new_token);
4879 
4880 			ut_ad(rbt_validate(result_doc->tokens));
4881 		}
4882 
4883 #ifdef	FTS_CHARSET_DEBUG
4884 		offset += start_pos + add_pos;
4885 #endif /* FTS_CHARSET_DEBUG */
4886 
4887 		offset += start_pos + ret - str.f_len + add_pos;
4888 
4889 		token = rbt_value(fts_token_t, parent.last);
4890 		ib_vector_push(token->positions, &offset);
4891 	}
4892 
4893 	return(ret);
4894 }
4895 
4896 /******************************************************************//**
4897 Tokenize a document. */
4898 UNIV_INTERN
4899 void
fts_tokenize_document(fts_doc_t * doc,fts_doc_t * result)4900 fts_tokenize_document(
4901 /*==================*/
4902 	fts_doc_t*	doc,		/* in/out: document to
4903 					tokenize */
4904 	fts_doc_t*	result)		/* out: if provided, save
4905 					the result token here */
4906 {
4907 	ulint		inc;
4908 
4909 	ut_a(!doc->tokens);
4910 	ut_a(doc->charset);
4911 
4912 	doc->tokens = rbt_create_arg_cmp(
4913 		sizeof(fts_token_t), innobase_fts_text_cmp, doc->charset);
4914 
4915 	for (ulint i = 0; i < doc->text.f_len; i += inc) {
4916 		inc = fts_process_token(doc, result, i, 0);
4917 		ut_a(inc > 0);
4918 	}
4919 }
4920 
4921 /******************************************************************//**
4922 Continue to tokenize a document. */
4923 UNIV_INTERN
4924 void
fts_tokenize_document_next(fts_doc_t * doc,ulint add_pos,fts_doc_t * result)4925 fts_tokenize_document_next(
4926 /*=======================*/
4927 	fts_doc_t*	doc,		/*!< in/out: document to
4928 					tokenize */
4929 	ulint		add_pos,	/*!< in: add this position to all
4930 					tokens from this tokenization */
4931 	fts_doc_t*	result)		/*!< out: if provided, save
4932 					the result token here */
4933 {
4934 	ulint		inc;
4935 
4936 	ut_a(doc->tokens);
4937 
4938 	for (ulint i = 0; i < doc->text.f_len; i += inc) {
4939 		inc = fts_process_token(doc, result, i, add_pos);
4940 		ut_a(inc > 0);
4941 	}
4942 }
4943 
4944 /********************************************************************
4945 Create the vector of fts_get_doc_t instances. */
4946 UNIV_INTERN
4947 ib_vector_t*
fts_get_docs_create(fts_cache_t * cache)4948 fts_get_docs_create(
4949 /*================*/
4950 						/* out: vector of
4951 						fts_get_doc_t instances */
4952 	fts_cache_t*	cache)			/*!< in: fts cache */
4953 {
4954 	ulint		i;
4955 	ib_vector_t*	get_docs;
4956 
4957 #ifdef UNIV_SYNC_DEBUG
4958 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_EX));
4959 #endif
4960 	/* We need one instance of fts_get_doc_t per index. */
4961 	get_docs = ib_vector_create(
4962 		cache->self_heap, sizeof(fts_get_doc_t), 4);
4963 
4964 	/* Create the get_doc instance, we need one of these
4965 	per FTS index. */
4966 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4967 
4968 		dict_index_t**	index;
4969 		fts_get_doc_t*	get_doc;
4970 
4971 		index = static_cast<dict_index_t**>(
4972 			ib_vector_get(cache->indexes, i));
4973 
4974 		get_doc = static_cast<fts_get_doc_t*>(
4975 			ib_vector_push(get_docs, NULL));
4976 
4977 		memset(get_doc, 0x0, sizeof(*get_doc));
4978 
4979 		get_doc->index_cache = fts_get_index_cache(cache, *index);
4980 		get_doc->cache = cache;
4981 
4982 		/* Must find the index cache. */
4983 		ut_a(get_doc->index_cache != NULL);
4984 	}
4985 
4986 	return(get_docs);
4987 }
4988 
4989 /********************************************************************
4990 Release any resources held by the fts_get_doc_t instances. */
4991 static
4992 void
fts_get_docs_clear(ib_vector_t * get_docs)4993 fts_get_docs_clear(
4994 /*===============*/
4995 	ib_vector_t*	get_docs)		/*!< in: Doc retrieval vector */
4996 {
4997 	ulint		i;
4998 
4999 	/* Release the get doc graphs if any. */
5000 	for (i = 0; i < ib_vector_size(get_docs); ++i) {
5001 
5002 		fts_get_doc_t*	get_doc = static_cast<fts_get_doc_t*>(
5003 			ib_vector_get(get_docs, i));
5004 
5005 		if (get_doc->get_document_graph != NULL) {
5006 
5007 			ut_a(get_doc->index_cache);
5008 
5009 			fts_que_graph_free(get_doc->get_document_graph);
5010 			get_doc->get_document_graph = NULL;
5011 		}
5012 	}
5013 }
5014 
5015 /*********************************************************************//**
5016 Get the initial Doc ID by consulting the CONFIG table
5017 @return initial Doc ID */
5018 UNIV_INTERN
5019 doc_id_t
fts_init_doc_id(const dict_table_t * table)5020 fts_init_doc_id(
5021 /*============*/
5022 	const dict_table_t*	table)		/*!< in: table */
5023 {
5024 	doc_id_t	max_doc_id = 0;
5025 
5026 	rw_lock_x_lock(&table->fts->cache->lock);
5027 
5028 	/* Return if the table is already initialized for DOC ID */
5029 	if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
5030 		rw_lock_x_unlock(&table->fts->cache->lock);
5031 		return(0);
5032 	}
5033 
5034 	DEBUG_SYNC_C("fts_initialize_doc_id");
5035 
5036 	/* Then compare this value with the ID value stored in the CONFIG
5037 	table. The larger one will be our new initial Doc ID */
5038 	fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
5039 
5040 	/* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
5041 	creating index (and add doc id column. No need to recovery
5042 	documents */
5043 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
5044 		fts_init_index((dict_table_t*) table, TRUE);
5045 	}
5046 
5047 	table->fts->fts_status |= ADDED_TABLE_SYNCED;
5048 
5049 	table->fts->cache->first_doc_id = max_doc_id;
5050 
5051 	rw_lock_x_unlock(&table->fts->cache->lock);
5052 
5053 	ut_ad(max_doc_id > 0);
5054 
5055 	return(max_doc_id);
5056 }
5057 
5058 #ifdef FTS_MULT_INDEX
5059 /*********************************************************************//**
5060 Check if the index is in the affected set.
5061 @return TRUE if index is updated */
5062 static
5063 ibool
fts_is_index_updated(const ib_vector_t * fts_indexes,const fts_get_doc_t * get_doc)5064 fts_is_index_updated(
5065 /*=================*/
5066 	const ib_vector_t*	fts_indexes,	/*!< in: affected FTS indexes */
5067 	const fts_get_doc_t*	get_doc)	/*!< in: info for reading
5068 						document */
5069 {
5070 	ulint		i;
5071 	dict_index_t*	index = get_doc->index_cache->index;
5072 
5073 	for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
5074 		const dict_index_t*	updated_fts_index;
5075 
5076 		updated_fts_index = static_cast<const dict_index_t*>(
5077 			ib_vector_getp_const(fts_indexes, i));
5078 
5079 		ut_a(updated_fts_index != NULL);
5080 
5081 		if (updated_fts_index == index) {
5082 			return(TRUE);
5083 		}
5084 	}
5085 
5086 	return(FALSE);
5087 }
5088 #endif
5089 
5090 /*********************************************************************//**
5091 Fetch COUNT(*) from specified table.
5092 @return the number of rows in the table */
5093 UNIV_INTERN
5094 ulint
fts_get_rows_count(fts_table_t * fts_table)5095 fts_get_rows_count(
5096 /*===============*/
5097 	fts_table_t*	fts_table)	/*!< in: fts table to read */
5098 {
5099 	trx_t*		trx;
5100 	pars_info_t*	info;
5101 	que_t*		graph;
5102 	dberr_t		error;
5103 	ulint		count = 0;
5104 
5105 	trx = trx_allocate_for_background();
5106 
5107 	trx->op_info = "fetching FT table rows count";
5108 
5109 	info = pars_info_create();
5110 
5111 	pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
5112 
5113 	graph = fts_parse_sql(
5114 		fts_table,
5115 		info,
5116 		"DECLARE FUNCTION my_func;\n"
5117 		"DECLARE CURSOR c IS"
5118 		" SELECT COUNT(*) "
5119 		" FROM \"%s\";\n"
5120 		"BEGIN\n"
5121 		"\n"
5122 		"OPEN c;\n"
5123 		"WHILE 1 = 1 LOOP\n"
5124 		"  FETCH c INTO my_func();\n"
5125 		"  IF c % NOTFOUND THEN\n"
5126 		"    EXIT;\n"
5127 		"  END IF;\n"
5128 		"END LOOP;\n"
5129 		"CLOSE c;");
5130 
5131 	for (;;) {
5132 		error = fts_eval_sql(trx, graph);
5133 
5134 		if (error == DB_SUCCESS) {
5135 			fts_sql_commit(trx);
5136 
5137 			break;				/* Exit the loop. */
5138 		} else {
5139 			fts_sql_rollback(trx);
5140 
5141 			ut_print_timestamp(stderr);
5142 
5143 			if (error == DB_LOCK_WAIT_TIMEOUT) {
5144 				fprintf(stderr, "  InnoDB: Warning: lock wait "
5145 					"timeout reading FTS table. "
5146 					"Retrying!\n");
5147 
5148 				trx->error_state = DB_SUCCESS;
5149 			} else {
5150 				fprintf(stderr, "  InnoDB: Error: (%s) "
5151 					"while reading FTS table.\n",
5152 					ut_strerr(error));
5153 
5154 				break;			/* Exit the loop. */
5155 			}
5156 		}
5157 	}
5158 
5159 	fts_que_graph_free(graph);
5160 
5161 	trx_free_for_background(trx);
5162 
5163 	return(count);
5164 }
5165 
5166 #ifdef FTS_CACHE_SIZE_DEBUG
5167 /*********************************************************************//**
5168 Read the max cache size parameter from the config table. */
5169 static
5170 void
fts_update_max_cache_size(fts_sync_t * sync)5171 fts_update_max_cache_size(
5172 /*======================*/
5173 	fts_sync_t*	sync)			/*!< in: sync state */
5174 {
5175 	trx_t*		trx;
5176 	fts_table_t	fts_table;
5177 
5178 	trx = trx_allocate_for_background();
5179 
5180 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
5181 
5182 	/* The size returned is in bytes. */
5183 	sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
5184 
5185 	fts_sql_commit(trx);
5186 
5187 	trx_free_for_background(trx);
5188 }
5189 #endif /* FTS_CACHE_SIZE_DEBUG */
5190 
5191 /*********************************************************************//**
5192 Free the modified rows of a table. */
5193 UNIV_INLINE
5194 void
fts_trx_table_rows_free(ib_rbt_t * rows)5195 fts_trx_table_rows_free(
5196 /*====================*/
5197 	ib_rbt_t*	rows)			/*!< in: rbt of rows to free */
5198 {
5199 	const ib_rbt_node_t*	node;
5200 
5201 	for (node = rbt_first(rows); node; node = rbt_first(rows)) {
5202 		fts_trx_row_t*	row;
5203 
5204 		row = rbt_value(fts_trx_row_t, node);
5205 
5206 		if (row->fts_indexes != NULL) {
5207 			/* This vector shouldn't be using the
5208 			heap allocator.  */
5209 			ut_a(row->fts_indexes->allocator->arg == NULL);
5210 
5211 			ib_vector_free(row->fts_indexes);
5212 			row->fts_indexes = NULL;
5213 		}
5214 
5215 		ut_free(rbt_remove_node(rows, node));
5216 	}
5217 
5218 	ut_a(rbt_empty(rows));
5219 	rbt_free(rows);
5220 }
5221 
5222 /*********************************************************************//**
5223 Free an FTS savepoint instance. */
5224 UNIV_INLINE
5225 void
fts_savepoint_free(fts_savepoint_t * savepoint)5226 fts_savepoint_free(
5227 /*===============*/
5228 	fts_savepoint_t*	savepoint)	/*!< in: savepoint instance */
5229 {
5230 	const ib_rbt_node_t*	node;
5231 	ib_rbt_t*		tables = savepoint->tables;
5232 
5233 	/* Nothing to free! */
5234 	if (tables == NULL) {
5235 		return;
5236 	}
5237 
5238 	for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5239 		fts_trx_table_t*	ftt;
5240 		fts_trx_table_t**	fttp;
5241 
5242 		fttp = rbt_value(fts_trx_table_t*, node);
5243 		ftt = *fttp;
5244 
5245 		/* This can be NULL if a savepoint was released. */
5246 		if (ftt->rows != NULL) {
5247 			fts_trx_table_rows_free(ftt->rows);
5248 			ftt->rows = NULL;
5249 		}
5250 
5251 		/* This can be NULL if a savepoint was released. */
5252 		if (ftt->added_doc_ids != NULL) {
5253 			fts_doc_ids_free(ftt->added_doc_ids);
5254 			ftt->added_doc_ids = NULL;
5255 		}
5256 
5257 		/* The default savepoint name must be NULL. */
5258 		if (ftt->docs_added_graph) {
5259 			fts_que_graph_free(ftt->docs_added_graph);
5260 		}
5261 
5262 		/* NOTE: We are responsible for free'ing the node */
5263 		ut_free(rbt_remove_node(tables, node));
5264 	}
5265 
5266 	ut_a(rbt_empty(tables));
5267 	rbt_free(tables);
5268 	savepoint->tables = NULL;
5269 }
5270 
5271 /*********************************************************************//**
5272 Free an FTS trx. */
5273 UNIV_INTERN
5274 void
fts_trx_free(fts_trx_t * fts_trx)5275 fts_trx_free(
5276 /*=========*/
5277 	fts_trx_t*	fts_trx)		/* in, own: FTS trx */
5278 {
5279 	ulint		i;
5280 
5281 	for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5282 		fts_savepoint_t*	savepoint;
5283 
5284 		savepoint = static_cast<fts_savepoint_t*>(
5285 			ib_vector_get(fts_trx->savepoints, i));
5286 
5287 		/* The default savepoint name must be NULL. */
5288 		if (i == 0) {
5289 			ut_a(savepoint->name == NULL);
5290 		}
5291 
5292 		fts_savepoint_free(savepoint);
5293 	}
5294 
5295 	for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5296 		fts_savepoint_t*	savepoint;
5297 
5298 		savepoint = static_cast<fts_savepoint_t*>(
5299 			ib_vector_get(fts_trx->last_stmt, i));
5300 
5301 		/* The default savepoint name must be NULL. */
5302 		if (i == 0) {
5303 			ut_a(savepoint->name == NULL);
5304 		}
5305 
5306 		fts_savepoint_free(savepoint);
5307 	}
5308 
5309 	if (fts_trx->heap) {
5310 		mem_heap_free(fts_trx->heap);
5311 	}
5312 }
5313 
5314 /*********************************************************************//**
5315 Extract the doc id from the FTS hidden column.
5316 @return doc id that was extracted from rec */
5317 UNIV_INTERN
5318 doc_id_t
fts_get_doc_id_from_row(dict_table_t * table,dtuple_t * row)5319 fts_get_doc_id_from_row(
5320 /*====================*/
5321 	dict_table_t*	table,			/*!< in: table */
5322 	dtuple_t*	row)			/*!< in: row whose FTS doc id we
5323 						want to extract.*/
5324 {
5325 	dfield_t*	field;
5326 	doc_id_t	doc_id = 0;
5327 
5328 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5329 
5330 	field = dtuple_get_nth_field(row, table->fts->doc_col);
5331 
5332 	ut_a(dfield_get_len(field) == sizeof(doc_id));
5333 	ut_a(dfield_get_type(field)->mtype == DATA_INT);
5334 
5335 	doc_id = fts_read_doc_id(
5336 		static_cast<const byte*>(dfield_get_data(field)));
5337 
5338 	return(doc_id);
5339 }
5340 
5341 /*********************************************************************//**
5342 Extract the doc id from the FTS hidden column.
5343 @return doc id that was extracted from rec */
5344 UNIV_INTERN
5345 doc_id_t
fts_get_doc_id_from_rec(dict_table_t * table,const rec_t * rec,mem_heap_t * heap)5346 fts_get_doc_id_from_rec(
5347 /*====================*/
5348 	dict_table_t*	table,			/*!< in: table */
5349 	const rec_t*	rec,			/*!< in: rec */
5350 	mem_heap_t*	heap)			/*!< in: heap */
5351 {
5352 	ulint		len;
5353 	const byte*	data;
5354 	ulint		col_no;
5355 	doc_id_t	doc_id = 0;
5356 	dict_index_t*	clust_index;
5357 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
5358 	ulint*		offsets = offsets_;
5359 	mem_heap_t*	my_heap = heap;
5360 
5361 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5362 
5363 	clust_index = dict_table_get_first_index(table);
5364 
5365 	rec_offs_init(offsets_);
5366 
5367 	offsets = rec_get_offsets(
5368 		rec, clust_index, offsets, ULINT_UNDEFINED, &my_heap);
5369 
5370 	col_no = dict_col_get_clust_pos(
5371 		&table->cols[table->fts->doc_col], clust_index);
5372 	ut_ad(col_no != ULINT_UNDEFINED);
5373 
5374 	data = rec_get_nth_field(rec, offsets, col_no, &len);
5375 
5376 	ut_a(len == 8);
5377 	ut_ad(8 == sizeof(doc_id));
5378 	doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
5379 
5380 	if (my_heap && !heap) {
5381 		mem_heap_free(my_heap);
5382 	}
5383 
5384 	return(doc_id);
5385 }
5386 
5387 /*********************************************************************//**
5388 Search the index specific cache for a particular FTS index.
5389 @return the index specific cache else NULL */
5390 UNIV_INTERN
5391 fts_index_cache_t*
fts_find_index_cache(const fts_cache_t * cache,const dict_index_t * index)5392 fts_find_index_cache(
5393 /*=================*/
5394 	const fts_cache_t*	cache,		/*!< in: cache to search */
5395 	const dict_index_t*	index)		/*!< in: index to search for */
5396 {
5397 	/* We cast away the const because our internal function, takes
5398 	non-const cache arg and returns a non-const pointer. */
5399 	return(static_cast<fts_index_cache_t*>(
5400 		fts_get_index_cache((fts_cache_t*) cache, index)));
5401 }
5402 
5403 /*********************************************************************//**
5404 Search cache for word.
5405 @return the word node vector if found else NULL */
5406 UNIV_INTERN
5407 const ib_vector_t*
fts_cache_find_word(const fts_index_cache_t * index_cache,const fts_string_t * text)5408 fts_cache_find_word(
5409 /*================*/
5410 	const fts_index_cache_t*index_cache,	/*!< in: cache to search */
5411 	const fts_string_t*	text)		/*!< in: word to search for */
5412 {
5413 	ib_rbt_bound_t		parent;
5414 	const ib_vector_t*	nodes = NULL;
5415 #ifdef UNIV_SYNC_DEBUG
5416 	dict_table_t*		table = index_cache->index->table;
5417 	fts_cache_t*		cache = table->fts->cache;
5418 
5419 	ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_EX));
5420 #endif
5421 
5422 	/* Lookup the word in the rb tree */
5423 	if (rbt_search(index_cache->words, &parent, text) == 0) {
5424 		const fts_tokenizer_word_t*	word;
5425 
5426 		word = rbt_value(fts_tokenizer_word_t, parent.last);
5427 
5428 		nodes = word->nodes;
5429 	}
5430 
5431 	return(nodes);
5432 }
5433 
5434 /*********************************************************************//**
5435 Check cache for deleted doc id.
5436 @return TRUE if deleted */
5437 UNIV_INTERN
5438 ibool
fts_cache_is_deleted_doc_id(const fts_cache_t * cache,doc_id_t doc_id)5439 fts_cache_is_deleted_doc_id(
5440 /*========================*/
5441 	const fts_cache_t*	cache,		/*!< in: cache ito search */
5442 	doc_id_t		doc_id)		/*!< in: doc id to search for */
5443 {
5444 	ulint			i;
5445 
5446 #ifdef UNIV_SYNC_DEBUG
5447 	ut_ad(mutex_own(&cache->deleted_lock));
5448 #endif
5449 
5450 	for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5451 		const fts_update_t*	update;
5452 
5453 		update = static_cast<const fts_update_t*>(
5454 			ib_vector_get_const(cache->deleted_doc_ids, i));
5455 
5456 		if (doc_id == update->doc_id) {
5457 
5458 			return(TRUE);
5459 		}
5460 	}
5461 
5462 	return(FALSE);
5463 }
5464 
5465 /*********************************************************************//**
5466 Append deleted doc ids to vector. */
5467 UNIV_INTERN
5468 void
fts_cache_append_deleted_doc_ids(const fts_cache_t * cache,ib_vector_t * vector)5469 fts_cache_append_deleted_doc_ids(
5470 /*=============================*/
5471 	const fts_cache_t*	cache,		/*!< in: cache to use */
5472 	ib_vector_t*		vector)		/*!< in: append to this vector */
5473 {
5474 	ulint			i;
5475 
5476 	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
5477 
5478 	if (cache->deleted_doc_ids == NULL) {
5479 		mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5480 		return;
5481 	}
5482 
5483 
5484 	for (i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5485 		fts_update_t*	update;
5486 
5487 		update = static_cast<fts_update_t*>(
5488 			ib_vector_get(cache->deleted_doc_ids, i));
5489 
5490 		ib_vector_push(vector, &update->doc_id);
5491 	}
5492 
5493 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5494 }
5495 
5496 /*********************************************************************//**
5497 Wait for the background thread to start. We poll to detect change
5498 of state, which is acceptable, since the wait should happen only
5499 once during startup.
5500 @return true if the thread started else FALSE (i.e timed out) */
5501 UNIV_INTERN
5502 ibool
fts_wait_for_background_thread_to_start(dict_table_t * table,ulint max_wait)5503 fts_wait_for_background_thread_to_start(
5504 /*====================================*/
5505 	dict_table_t*		table,		/*!< in: table to which the thread
5506 						is attached */
5507 	ulint			max_wait)	/*!< in: time in microseconds, if
5508 						set to 0 then it disables
5509 						timeout checking */
5510 {
5511 	ulint			count = 0;
5512 	ibool			done = FALSE;
5513 
5514 	ut_a(max_wait == 0 || max_wait >= FTS_MAX_BACKGROUND_THREAD_WAIT);
5515 
5516 	for (;;) {
5517 		fts_t*		fts = table->fts;
5518 
5519 		mutex_enter(&fts->bg_threads_mutex);
5520 
5521 		if (fts->fts_status & BG_THREAD_READY) {
5522 
5523 			done = TRUE;
5524 		}
5525 
5526 		mutex_exit(&fts->bg_threads_mutex);
5527 
5528 		if (!done) {
5529 			os_thread_sleep(FTS_MAX_BACKGROUND_THREAD_WAIT);
5530 
5531 			if (max_wait > 0) {
5532 
5533 				max_wait -= FTS_MAX_BACKGROUND_THREAD_WAIT;
5534 
5535 				/* We ignore the residual value. */
5536 				if (max_wait < FTS_MAX_BACKGROUND_THREAD_WAIT) {
5537 					break;
5538 				}
5539 			}
5540 
5541 			++count;
5542 		} else {
5543 			break;
5544 		}
5545 
5546 		if (count >= FTS_BACKGROUND_THREAD_WAIT_COUNT) {
5547 			ut_print_timestamp(stderr);
5548 			fprintf(stderr, " InnoDB: Error the background thread "
5549 				"for the FTS table %s refuses to start\n",
5550 				table->name);
5551 
5552 			count = 0;
5553 		}
5554 	}
5555 
5556 	return(done);
5557 }
5558 
5559 /*********************************************************************//**
5560 Add the FTS document id hidden column. */
5561 UNIV_INTERN
5562 void
fts_add_doc_id_column(dict_table_t * table,mem_heap_t * heap)5563 fts_add_doc_id_column(
5564 /*==================*/
5565 	dict_table_t*	table,	/*!< in/out: Table with FTS index */
5566 	mem_heap_t*	heap)	/*!< in: temporary memory heap, or NULL */
5567 {
5568 	dict_mem_table_add_col(
5569 		table, heap,
5570 		FTS_DOC_ID_COL_NAME,
5571 		DATA_INT,
5572 		dtype_form_prtype(
5573 			DATA_NOT_NULL | DATA_UNSIGNED
5574 			| DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
5575 		sizeof(doc_id_t));
5576 	DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5577 }
5578 
5579 /*********************************************************************//**
5580 Update the query graph with a new document id.
5581 @return Doc ID used */
5582 UNIV_INTERN
5583 doc_id_t
fts_update_doc_id(dict_table_t * table,upd_field_t * ufield,doc_id_t * next_doc_id)5584 fts_update_doc_id(
5585 /*==============*/
5586 	dict_table_t*	table,		/*!< in: table */
5587 	upd_field_t*	ufield,		/*!< out: update node */
5588 	doc_id_t*	next_doc_id)	/*!< in/out: buffer for writing */
5589 {
5590 	doc_id_t	doc_id;
5591 	dberr_t		error = DB_SUCCESS;
5592 
5593 	if (*next_doc_id) {
5594 		doc_id = *next_doc_id;
5595 	} else {
5596 		/* Get the new document id that will be added. */
5597 		error = fts_get_next_doc_id(table, &doc_id);
5598 	}
5599 
5600 	if (error == DB_SUCCESS) {
5601 		dict_index_t*	clust_index;
5602 
5603 		ufield->exp = NULL;
5604 
5605 		ufield->new_val.len = sizeof(doc_id);
5606 
5607 		clust_index = dict_table_get_first_index(table);
5608 
5609 		ufield->field_no = dict_col_get_clust_pos(
5610 			&table->cols[table->fts->doc_col], clust_index);
5611 
5612 		/* It is possible we update record that has
5613 		not yet be sync-ed from last crash. */
5614 
5615 		/* Convert to storage byte order. */
5616 		ut_a(doc_id != FTS_NULL_DOC_ID);
5617 		fts_write_doc_id((byte*) next_doc_id, doc_id);
5618 
5619 		ufield->new_val.data = next_doc_id;
5620 	}
5621 
5622 	return(doc_id);
5623 }
5624 
5625 /*********************************************************************//**
5626 Check if the table has an FTS index. This is the non-inline version
5627 of dict_table_has_fts_index().
5628 @return TRUE if table has an FTS index */
5629 UNIV_INTERN
5630 ibool
fts_dict_table_has_fts_index(dict_table_t * table)5631 fts_dict_table_has_fts_index(
5632 /*=========================*/
5633 	dict_table_t*	table)		/*!< in: table */
5634 {
5635 	return(dict_table_has_fts_index(table));
5636 }
5637 
5638 /*********************************************************************//**
5639 Create an instance of fts_t.
5640 @return instance of fts_t */
5641 UNIV_INTERN
5642 fts_t*
fts_create(dict_table_t * table)5643 fts_create(
5644 /*=======*/
5645 	dict_table_t*	table)		/*!< in/out: table with FTS indexes */
5646 {
5647 	fts_t*		fts;
5648 	ib_alloc_t*	heap_alloc;
5649 	mem_heap_t*	heap;
5650 
5651 	ut_a(!table->fts);
5652 
5653 	heap = mem_heap_create(512);
5654 
5655 	fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
5656 
5657 	memset(fts, 0x0, sizeof(*fts));
5658 
5659 	fts->fts_heap = heap;
5660 
5661 	fts->doc_col = ULINT_UNDEFINED;
5662 
5663 	mutex_create(
5664 		fts_bg_threads_mutex_key, &fts->bg_threads_mutex,
5665 		SYNC_FTS_BG_THREADS);
5666 
5667 	heap_alloc = ib_heap_allocator_create(heap);
5668 	fts->indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
5669 	dict_table_get_all_fts_indexes(table, fts->indexes);
5670 
5671 	return(fts);
5672 }
5673 
5674 /*********************************************************************//**
5675 Free the FTS resources. */
5676 UNIV_INTERN
5677 void
fts_free(dict_table_t * table)5678 fts_free(
5679 /*=====*/
5680 	dict_table_t*	table)	/*!< in/out: table with FTS indexes */
5681 {
5682 	fts_t*		fts = table->fts;
5683 
5684 	mutex_free(&fts->bg_threads_mutex);
5685 
5686 	ut_ad(!fts->add_wq);
5687 
5688 	if (fts->cache) {
5689 		fts_cache_clear(fts->cache);
5690 		fts_cache_destroy(fts->cache);
5691 		fts->cache = NULL;
5692 	}
5693 
5694 	mem_heap_free(fts->fts_heap);
5695 
5696 	table->fts = NULL;
5697 }
5698 
5699 /*********************************************************************//**
5700 Signal FTS threads to initiate shutdown. */
5701 UNIV_INTERN
5702 void
fts_start_shutdown(dict_table_t * table,fts_t * fts)5703 fts_start_shutdown(
5704 /*===============*/
5705 	dict_table_t*	table,		/*!< in: table with FTS indexes */
5706 	fts_t*		fts)		/*!< in: fts instance that needs
5707 					to be informed about shutdown */
5708 {
5709 	mutex_enter(&fts->bg_threads_mutex);
5710 
5711 	fts->fts_status |= BG_THREAD_STOP;
5712 
5713 	mutex_exit(&fts->bg_threads_mutex);
5714 
5715 }
5716 
5717 /*********************************************************************//**
5718 Wait for FTS threads to shutdown. */
5719 UNIV_INTERN
5720 void
fts_shutdown(dict_table_t * table,fts_t * fts)5721 fts_shutdown(
5722 /*=========*/
5723 	dict_table_t*	table,		/*!< in: table with FTS indexes */
5724 	fts_t*		fts)		/*!< in: fts instance to shutdown */
5725 {
5726 	mutex_enter(&fts->bg_threads_mutex);
5727 
5728 	ut_a(fts->fts_status & BG_THREAD_STOP);
5729 
5730 	dict_table_wait_for_bg_threads_to_exit(table, 20000);
5731 
5732 	mutex_exit(&fts->bg_threads_mutex);
5733 }
5734 
5735 /*********************************************************************//**
5736 Take a FTS savepoint. */
5737 UNIV_INLINE
5738 void
fts_savepoint_copy(const fts_savepoint_t * src,fts_savepoint_t * dst)5739 fts_savepoint_copy(
5740 /*===============*/
5741 	const fts_savepoint_t*	src,	/*!< in: source savepoint */
5742 	fts_savepoint_t*	dst)	/*!< out: destination savepoint */
5743 {
5744 	const ib_rbt_node_t*	node;
5745 	const ib_rbt_t*		tables;
5746 
5747 	tables = src->tables;
5748 
5749 	for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
5750 
5751 		fts_trx_table_t*	ftt_dst;
5752 		const fts_trx_table_t**	ftt_src;
5753 
5754 		ftt_src = rbt_value(const fts_trx_table_t*, node);
5755 
5756 		ftt_dst = fts_trx_table_clone(*ftt_src);
5757 
5758 		rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
5759 	}
5760 }
5761 
5762 /*********************************************************************//**
5763 Take a FTS savepoint. */
5764 UNIV_INTERN
5765 void
fts_savepoint_take(trx_t * trx,fts_trx_t * fts_trx,const char * name)5766 fts_savepoint_take(
5767 /*===============*/
5768 	trx_t*		trx,		/*!< in: transaction */
5769 	fts_trx_t*	fts_trx,	/*!< in: fts transaction */
5770 	const char*	name)		/*!< in: savepoint name */
5771 {
5772 	mem_heap_t*		heap;
5773 	fts_savepoint_t*	savepoint;
5774 	fts_savepoint_t*	last_savepoint;
5775 
5776 	ut_a(name != NULL);
5777 
5778 	heap = fts_trx->heap;
5779 
5780 	/* The implied savepoint must exist. */
5781 	ut_a(ib_vector_size(fts_trx->savepoints) > 0);
5782 
5783 	last_savepoint = static_cast<fts_savepoint_t*>(
5784 		ib_vector_last(fts_trx->savepoints));
5785 	savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
5786 
5787 	if (last_savepoint->tables != NULL) {
5788 		fts_savepoint_copy(last_savepoint, savepoint);
5789 	}
5790 }
5791 
5792 /*********************************************************************//**
5793 Lookup a savepoint instance by name.
5794 @return ULINT_UNDEFINED if not found */
5795 UNIV_INLINE
5796 ulint
fts_savepoint_lookup(ib_vector_t * savepoints,const char * name)5797 fts_savepoint_lookup(
5798 /*==================*/
5799 	ib_vector_t*	savepoints,	/*!< in: savepoints */
5800 	const char*	name)		/*!< in: savepoint name */
5801 {
5802 	ulint			i;
5803 
5804 	ut_a(ib_vector_size(savepoints) > 0);
5805 
5806 	for (i = 1; i < ib_vector_size(savepoints); ++i) {
5807 		fts_savepoint_t*	savepoint;
5808 
5809 		savepoint = static_cast<fts_savepoint_t*>(
5810 			ib_vector_get(savepoints, i));
5811 
5812 		if (strcmp(name, savepoint->name) == 0) {
5813 			return(i);
5814 		}
5815 	}
5816 
5817 	return(ULINT_UNDEFINED);
5818 }
5819 
5820 /*********************************************************************//**
5821 Release the savepoint data identified by  name. All savepoints created
5822 after the named savepoint are kept.
5823 @return DB_SUCCESS or error code */
5824 UNIV_INTERN
5825 void
fts_savepoint_release(trx_t * trx,const char * name)5826 fts_savepoint_release(
5827 /*==================*/
5828 	trx_t*		trx,		/*!< in: transaction */
5829 	const char*	name)		/*!< in: savepoint name */
5830 {
5831 	ut_a(name != NULL);
5832 
5833 	ib_vector_t*	savepoints = trx->fts_trx->savepoints;
5834 
5835 	ut_a(ib_vector_size(savepoints) > 0);
5836 
5837 	ulint   i = fts_savepoint_lookup(savepoints, name);
5838 	if (i != ULINT_UNDEFINED) {
5839 		ut_a(i >= 1);
5840 
5841 		fts_savepoint_t*        savepoint;
5842 		savepoint = static_cast<fts_savepoint_t*>(
5843 			ib_vector_get(savepoints, i));
5844 
5845 		if (i == ib_vector_size(savepoints) - 1) {
5846 			/* If the savepoint is the last, we save its
5847 			tables to the  previous savepoint. */
5848 			fts_savepoint_t*	prev_savepoint;
5849 			prev_savepoint = static_cast<fts_savepoint_t*>(
5850 				ib_vector_get(savepoints, i - 1));
5851 
5852 			ib_rbt_t*	tables = savepoint->tables;
5853 			savepoint->tables = prev_savepoint->tables;
5854 			prev_savepoint->tables = tables;
5855 		}
5856 
5857 		fts_savepoint_free(savepoint);
5858 		ib_vector_remove(savepoints, *(void**)savepoint);
5859 
5860 		/* Make sure we don't delete the implied savepoint. */
5861 		ut_a(ib_vector_size(savepoints) > 0);
5862 	}
5863 }
5864 
5865 /**********************************************************************//**
5866 Refresh last statement savepoint. */
5867 UNIV_INTERN
5868 void
fts_savepoint_laststmt_refresh(trx_t * trx)5869 fts_savepoint_laststmt_refresh(
5870 /*===========================*/
5871 	trx_t*			trx)	/*!< in: transaction */
5872 {
5873 
5874 	fts_trx_t*              fts_trx;
5875 	fts_savepoint_t*        savepoint;
5876 
5877 	fts_trx = trx->fts_trx;
5878 
5879 	savepoint = static_cast<fts_savepoint_t*>(
5880 		ib_vector_pop(fts_trx->last_stmt));
5881 	fts_savepoint_free(savepoint);
5882 
5883 	ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
5884 	savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
5885 }
5886 
5887 /********************************************************************
5888 Undo the Doc ID add/delete operations in last stmt */
5889 static
5890 void
fts_undo_last_stmt(fts_trx_table_t * s_ftt,fts_trx_table_t * l_ftt)5891 fts_undo_last_stmt(
5892 /*===============*/
5893 	fts_trx_table_t*	s_ftt,	/*!< in: Transaction FTS table */
5894 	fts_trx_table_t*	l_ftt)	/*!< in: last stmt FTS table */
5895 {
5896 	ib_rbt_t*		s_rows;
5897 	ib_rbt_t*		l_rows;
5898 	const ib_rbt_node_t*	node;
5899 
5900 	l_rows = l_ftt->rows;
5901 	s_rows = s_ftt->rows;
5902 
5903 	for (node = rbt_first(l_rows);
5904 	     node;
5905 	     node = rbt_next(l_rows, node)) {
5906 		fts_trx_row_t*	l_row = rbt_value(fts_trx_row_t, node);
5907 		ib_rbt_bound_t	parent;
5908 
5909 		rbt_search(s_rows, &parent, &(l_row->doc_id));
5910 
5911 		if (parent.result == 0) {
5912 			fts_trx_row_t*	s_row = rbt_value(
5913 				fts_trx_row_t, parent.last);
5914 
5915 			switch (l_row->state) {
5916 			case FTS_INSERT:
5917 				ut_free(rbt_remove_node(s_rows, parent.last));
5918 				break;
5919 
5920 			case FTS_DELETE:
5921 				if (s_row->state == FTS_NOTHING) {
5922 					s_row->state = FTS_INSERT;
5923 				} else if (s_row->state == FTS_DELETE) {
5924 					ut_free(rbt_remove_node(
5925 						s_rows, parent.last));
5926 				}
5927 				break;
5928 
5929 			/* FIXME: Check if FTS_MODIFY need to be addressed */
5930 			case FTS_MODIFY:
5931 			case FTS_NOTHING:
5932 				break;
5933 			default:
5934 				ut_error;
5935 			}
5936 		}
5937 	}
5938 }
5939 
5940 /**********************************************************************//**
5941 Rollback to savepoint indentified by name.
5942 @return DB_SUCCESS or error code */
5943 UNIV_INTERN
5944 void
fts_savepoint_rollback_last_stmt(trx_t * trx)5945 fts_savepoint_rollback_last_stmt(
5946 /*=============================*/
5947 	trx_t*		trx)		/*!< in: transaction */
5948 {
5949 	ib_vector_t*		savepoints;
5950 	fts_savepoint_t*	savepoint;
5951 	fts_savepoint_t*	last_stmt;
5952 	fts_trx_t*		fts_trx;
5953 	ib_rbt_bound_t		parent;
5954 	const ib_rbt_node_t*    node;
5955 	ib_rbt_t*		l_tables;
5956 	ib_rbt_t*		s_tables;
5957 
5958 	fts_trx = trx->fts_trx;
5959 	savepoints = fts_trx->savepoints;
5960 
5961 	savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
5962 	last_stmt = static_cast<fts_savepoint_t*>(
5963 		ib_vector_last(fts_trx->last_stmt));
5964 
5965 	l_tables = last_stmt->tables;
5966 	s_tables = savepoint->tables;
5967 
5968 	for (node = rbt_first(l_tables);
5969 	     node;
5970 	     node = rbt_next(l_tables, node)) {
5971 
5972 		fts_trx_table_t**	l_ftt;
5973 
5974 		l_ftt = rbt_value(fts_trx_table_t*, node);
5975 
5976 		rbt_search_cmp(
5977 			s_tables, &parent, &(*l_ftt)->table->id,
5978 			fts_trx_table_id_cmp, NULL);
5979 
5980 		if (parent.result == 0) {
5981 			fts_trx_table_t**	s_ftt;
5982 
5983 			s_ftt = rbt_value(fts_trx_table_t*, parent.last);
5984 
5985 			fts_undo_last_stmt(*s_ftt, *l_ftt);
5986 		}
5987 	}
5988 }
5989 
5990 /**********************************************************************//**
5991 Rollback to savepoint indentified by name.
5992 @return DB_SUCCESS or error code */
5993 UNIV_INTERN
5994 void
fts_savepoint_rollback(trx_t * trx,const char * name)5995 fts_savepoint_rollback(
5996 /*===================*/
5997 	trx_t*		trx,		/*!< in: transaction */
5998 	const char*	name)		/*!< in: savepoint name */
5999 {
6000 	ulint		i;
6001 	ib_vector_t*	savepoints;
6002 
6003 	ut_a(name != NULL);
6004 
6005 	savepoints = trx->fts_trx->savepoints;
6006 
6007 	/* We pop all savepoints from the the top of the stack up to
6008 	and including the instance that was found. */
6009 	i = fts_savepoint_lookup(savepoints, name);
6010 
6011 	if (i != ULINT_UNDEFINED) {
6012 		fts_savepoint_t*	savepoint;
6013 
6014 		ut_a(i > 0);
6015 
6016 		while (ib_vector_size(savepoints) > i) {
6017 			fts_savepoint_t*	savepoint;
6018 
6019 			savepoint = static_cast<fts_savepoint_t*>(
6020 				ib_vector_pop(savepoints));
6021 
6022 			if (savepoint->name != NULL) {
6023 				/* Since name was allocated on the heap, the
6024 				memory will be released when the transaction
6025 				completes. */
6026 				savepoint->name = NULL;
6027 
6028 				fts_savepoint_free(savepoint);
6029 			}
6030 		}
6031 
6032 		/* Pop all a elements from the top of the stack that may
6033 		have been released. We have to be careful that we don't
6034 		delete the implied savepoint. */
6035 
6036 		for (savepoint = static_cast<fts_savepoint_t*>(
6037 				ib_vector_last(savepoints));
6038 		     ib_vector_size(savepoints) > 1
6039 		     && savepoint->name == NULL;
6040 		     savepoint = static_cast<fts_savepoint_t*>(
6041 				ib_vector_last(savepoints))) {
6042 
6043 			ib_vector_pop(savepoints);
6044 		}
6045 
6046 		/* Make sure we don't delete the implied savepoint. */
6047 		ut_a(ib_vector_size(savepoints) > 0);
6048 
6049 		/* Restore the savepoint. */
6050 		fts_savepoint_take(trx, trx->fts_trx, name);
6051 	}
6052 }
6053 
6054 /**********************************************************************//**
6055 Check if a table is an FTS auxiliary table name.
6056 @return TRUE if the name matches an auxiliary table name pattern */
6057 static
6058 ibool
fts_is_aux_table_name(fts_aux_table_t * table,const char * name,ulint len)6059 fts_is_aux_table_name(
6060 /*==================*/
6061 	fts_aux_table_t*table,		/*!< out: table info */
6062 	const char*	name,		/*!< in: table name */
6063 	ulint		len)		/*!< in: length of table name */
6064 {
6065 	const char*	ptr;
6066 	char*		end;
6067 	char		my_name[MAX_FULL_NAME_LEN + 1];
6068 
6069 	ut_ad(len <= MAX_FULL_NAME_LEN);
6070 	ut_memcpy(my_name, name, len);
6071 	my_name[len] = 0;
6072 	end = my_name + len;
6073 
6074 	ptr = static_cast<const char*>(memchr(my_name, '/', len));
6075 
6076 	if (ptr != NULL) {
6077 		/* We will start the match after the '/' */
6078 		++ptr;
6079 		len = end - ptr;
6080 	}
6081 
6082 	/* All auxiliary tables are prefixed with "FTS_" and the name
6083 	length will be at the very least greater than 20 bytes. */
6084 	if (ptr != NULL && len > 20 && strncmp(ptr, "FTS_", 4) == 0) {
6085 		ulint		i;
6086 
6087 		/* Skip the prefix. */
6088 		ptr += 4;
6089 		len -= 4;
6090 
6091 		/* Try and read the table id. */
6092 		if (!fts_read_object_id(&table->parent_id, ptr)) {
6093 			return(FALSE);
6094 		}
6095 
6096 		/* Skip the table id. */
6097 		ptr = static_cast<const char*>(memchr(ptr, '_', len));
6098 
6099 		if (ptr == NULL) {
6100 			return(FALSE);
6101 		}
6102 
6103 		/* Skip the underscore. */
6104 		++ptr;
6105 		ut_a(end > ptr);
6106 		len = end - ptr;
6107 
6108 		/* First search the common table suffix array. */
6109 		for (i = 0; fts_common_tables[i] != NULL; ++i) {
6110 
6111 			if (strncmp(ptr, fts_common_tables[i], len) == 0) {
6112 				return(TRUE);
6113 			}
6114 		}
6115 
6116 		/* Could be obsolete common tables. */
6117 		if (strncmp(ptr, "ADDED", len) == 0
6118 		    || strncmp(ptr, "STOPWORDS", len) == 0) {
6119 			return(true);
6120 		}
6121 
6122 		/* Try and read the index id. */
6123 		if (!fts_read_object_id(&table->index_id, ptr)) {
6124 			return(FALSE);
6125 		}
6126 
6127 		/* Skip the table id. */
6128 		ptr = static_cast<const char*>(memchr(ptr, '_', len));
6129 
6130 		if (ptr == NULL) {
6131 			return(FALSE);
6132 		}
6133 
6134 		/* Skip the underscore. */
6135 		++ptr;
6136 		ut_a(end > ptr);
6137 		len = end - ptr;
6138 
6139 		/* Search the FT index specific array. */
6140 		for (i = 0; fts_index_selector[i].value; ++i) {
6141 
6142 			if (strncmp(ptr, fts_get_suffix(i), len) == 0) {
6143 				return(TRUE);
6144 			}
6145 		}
6146 
6147 		/* Other FT index specific table(s). */
6148 		if (strncmp(ptr, "DOC_ID", len) == 0) {
6149 			return(TRUE);
6150 		}
6151 	}
6152 
6153 	return(FALSE);
6154 }
6155 
6156 /**********************************************************************//**
6157 Callback function to read a single table ID column.
6158 @return Always return TRUE */
6159 static
6160 ibool
fts_read_tables(void * row,void * user_arg)6161 fts_read_tables(
6162 /*============*/
6163 	void*		row,		/*!< in: sel_node_t* */
6164 	void*		user_arg)	/*!< in: pointer to ib_vector_t */
6165 {
6166 	int		i;
6167 	fts_aux_table_t*table;
6168 	mem_heap_t*	heap;
6169 	ibool		done = FALSE;
6170 	ib_vector_t*	tables = static_cast<ib_vector_t*>(user_arg);
6171 	sel_node_t*	sel_node = static_cast<sel_node_t*>(row);
6172 	que_node_t*	exp = sel_node->select_list;
6173 
6174 	/* Must be a heap allocated vector. */
6175 	ut_a(tables->allocator->arg != NULL);
6176 
6177 	/* We will use this heap for allocating strings. */
6178 	heap = static_cast<mem_heap_t*>(tables->allocator->arg);
6179 	table = static_cast<fts_aux_table_t*>(ib_vector_push(tables, NULL));
6180 
6181 	memset(table, 0x0, sizeof(*table));
6182 
6183 	/* Iterate over the columns and read the values. */
6184 	for (i = 0; exp && !done; exp = que_node_get_next(exp), ++i) {
6185 
6186 		dfield_t*	dfield = que_node_get_val(exp);
6187 		void*		data = dfield_get_data(dfield);
6188 		ulint		len = dfield_get_len(dfield);
6189 
6190 		ut_a(len != UNIV_SQL_NULL);
6191 
6192 		/* Note: The column numbers below must match the SELECT */
6193 		switch (i) {
6194 		case 0: /* NAME */
6195 
6196 			if (!fts_is_aux_table_name(
6197 				table, static_cast<const char*>(data), len)) {
6198 				ib_vector_pop(tables);
6199 				done = TRUE;
6200 				break;
6201 			}
6202 
6203 			table->name = static_cast<char*>(
6204 				mem_heap_alloc(heap, len + 1));
6205 			memcpy(table->name, data, len);
6206 			table->name[len] = 0;
6207 			break;
6208 
6209 		case 1: /* ID */
6210 			ut_a(len == 8);
6211 			table->id = mach_read_from_8(
6212 				static_cast<const byte*>(data));
6213 			break;
6214 
6215 		default:
6216 			ut_error;
6217 		}
6218 	}
6219 
6220 	return(TRUE);
6221 }
6222 
6223 /******************************************************************//**
6224 Callback that sets a hex formatted FTS table's flags2 in
6225 SYS_TABLES. The flags is stored in MIX_LEN column.
6226 @return FALSE if all OK */
6227 static
6228 ibool
fts_set_hex_format(void * row,void * user_arg)6229 fts_set_hex_format(
6230 /*===============*/
6231 	void*		row,		/*!< in: sel_node_t* */
6232 	void*		user_arg)	/*!< in: bool set/unset flag */
6233 {
6234 	sel_node_t*	node = static_cast<sel_node_t*>(row);
6235 	dfield_t*	dfield = que_node_get_val(node->select_list);
6236 
6237 	ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT);
6238 	ut_ad(dfield_get_len(dfield) == sizeof(ib_uint32_t));
6239 	/* There should be at most one matching record. So the value
6240 	must be the default value. */
6241 	ut_ad(mach_read_from_4(static_cast<byte*>(user_arg))
6242 	      == ULINT32_UNDEFINED);
6243 
6244 	ulint		flags2 = mach_read_from_4(
6245 			static_cast<byte*>(dfield_get_data(dfield)));
6246 
6247 	flags2 |= DICT_TF2_FTS_AUX_HEX_NAME;
6248 
6249 	mach_write_to_4(static_cast<byte*>(user_arg), flags2);
6250 
6251 	return(FALSE);
6252 }
6253 
6254 /*****************************************************************//**
6255 Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES.
6256 @return DB_SUCCESS or error code. */
6257 UNIV_INTERN
6258 dberr_t
fts_update_hex_format_flag(trx_t * trx,table_id_t table_id,bool dict_locked)6259 fts_update_hex_format_flag(
6260 /*=======================*/
6261 	trx_t*		trx,		/*!< in/out: transaction that
6262 					covers the update */
6263 	table_id_t	table_id,	/*!< in: Table for which we want
6264 					to set the root table->flags2 */
6265 	bool		dict_locked)	/*!< in: set to true if the
6266 					caller already owns the
6267 					dict_sys_t::mutex. */
6268 {
6269 	pars_info_t*		info;
6270 	ib_uint32_t		flags2;
6271 
6272 	static const char	sql[] =
6273 		"PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n"
6274 		"DECLARE FUNCTION my_func;\n"
6275 		"DECLARE CURSOR c IS\n"
6276 		" SELECT MIX_LEN "
6277 		" FROM SYS_TABLES "
6278 		" WHERE ID = :table_id FOR UPDATE;"
6279 		"\n"
6280 		"BEGIN\n"
6281 		"OPEN c;\n"
6282 		"WHILE 1 = 1 LOOP\n"
6283 		"  FETCH c INTO my_func();\n"
6284 		"  IF c % NOTFOUND THEN\n"
6285 		"    EXIT;\n"
6286 		"  END IF;\n"
6287 		"END LOOP;\n"
6288 		"UPDATE SYS_TABLES"
6289 		" SET MIX_LEN = :flags2"
6290 		" WHERE ID = :table_id;\n"
6291 		"CLOSE c;\n"
6292 		"END;\n";
6293 
6294 	flags2 = ULINT32_UNDEFINED;
6295 
6296 	info = pars_info_create();
6297 
6298 	pars_info_add_ull_literal(info, "table_id", table_id);
6299 	pars_info_bind_int4_literal(info, "flags2", &flags2);
6300 
6301 	pars_info_bind_function(
6302 		info, "my_func", fts_set_hex_format, &flags2);
6303 
6304 	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6305 		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6306 	}
6307 
6308 	dberr_t err = que_eval_sql(info, sql, !dict_locked, trx);
6309 
6310 	ut_a(flags2 != ULINT32_UNDEFINED);
6311 
6312 	return (err);
6313 }
6314 
6315 /*********************************************************************//**
6316 Rename an aux table to HEX format. It's called when "%016llu" is used
6317 to format an object id in table name, which only happens in Windows. */
6318 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6319 dberr_t
fts_rename_one_aux_table_to_hex_format(trx_t * trx,const fts_aux_table_t * aux_table,const dict_table_t * parent_table)6320 fts_rename_one_aux_table_to_hex_format(
6321 /*===================================*/
6322 	trx_t*			trx,		/*!< in: transaction */
6323 	const fts_aux_table_t*	aux_table,	/*!< in: table info */
6324 	const dict_table_t*	parent_table)	/*!< in: parent table name */
6325 {
6326 	const char*     ptr;
6327 	fts_table_t	fts_table;
6328 	char*		new_name;
6329 	dberr_t		error;
6330 
6331 	ptr = strchr(aux_table->name, '/');
6332 	ut_a(ptr != NULL);
6333 	++ptr;
6334 	/* Skip "FTS_", table id and underscore */
6335 	for (ulint i = 0; i < 2; ++i) {
6336 		ptr = strchr(ptr, '_');
6337 		ut_a(ptr != NULL);
6338 		++ptr;
6339 	}
6340 
6341 	fts_table.suffix = NULL;
6342 	if (aux_table->index_id == 0) {
6343 		fts_table.type = FTS_COMMON_TABLE;
6344 
6345 		for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
6346 			if (strcmp(ptr, fts_common_tables[i]) == 0) {
6347 				fts_table.suffix = fts_common_tables[i];
6348 				break;
6349 			}
6350 		}
6351 	} else {
6352 		fts_table.type = FTS_INDEX_TABLE;
6353 
6354 		/* Skip index id and underscore */
6355 		ptr = strchr(ptr, '_');
6356 		ut_a(ptr != NULL);
6357 		++ptr;
6358 
6359 		for (ulint i = 0; fts_index_selector[i].value; ++i) {
6360 			if (strcmp(ptr, fts_get_suffix(i)) == 0) {
6361 				fts_table.suffix = fts_get_suffix(i);
6362 				break;
6363 			}
6364 		}
6365 	}
6366 
6367 	ut_a(fts_table.suffix != NULL);
6368 
6369 	fts_table.parent = parent_table->name;
6370 	fts_table.table_id = aux_table->parent_id;
6371 	fts_table.index_id = aux_table->index_id;
6372 	fts_table.table = parent_table;
6373 
6374 	new_name = fts_get_table_name(&fts_table);
6375 	ut_ad(strcmp(new_name, aux_table->name) != 0);
6376 
6377 	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6378 		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6379 	}
6380 
6381 	error = row_rename_table_for_mysql(aux_table->name, new_name, trx,
6382 					   FALSE);
6383 
6384 	if (error != DB_SUCCESS) {
6385 		ib_logf(IB_LOG_LEVEL_WARN,
6386 			"Failed to rename aux table \'%s\' to "
6387 			"new format \'%s\'. ",
6388 			aux_table->name, new_name);
6389 	} else {
6390 		ib_logf(IB_LOG_LEVEL_INFO,
6391 			"Renamed aux table \'%s\' to \'%s\'.",
6392 			aux_table->name, new_name);
6393 	}
6394 
6395 	mem_free(new_name);
6396 
6397 	return (error);
6398 }
6399 
6400 /**********************************************************************//**
6401 Rename all aux tables of a parent table to HEX format. Also set aux tables'
6402 flags2 and parent table's flags2 with DICT_TF2_FTS_AUX_HEX_NAME.
6403 It's called when "%016llu" is used to format an object id in table name,
6404 which only happens in Windows.
6405 Note the ids in tables are correct but the names are old ambiguous ones.
6406 
6407 This function should make sure that either all the parent table and aux tables
6408 are set DICT_TF2_FTS_AUX_HEX_NAME with flags2 or none of them are set */
6409 static MY_ATTRIBUTE((nonnull, warn_unused_result))
6410 dberr_t
fts_rename_aux_tables_to_hex_format_low(trx_t * trx,dict_table_t * parent_table,ib_vector_t * tables)6411 fts_rename_aux_tables_to_hex_format_low(
6412 /*====================================*/
6413 	trx_t*		trx,		/*!< in: transaction */
6414 	dict_table_t*	parent_table,	/*!< in: parent table */
6415 	ib_vector_t*	tables)		/*!< in: aux tables to rename. */
6416 {
6417 	dberr_t		error;
6418 	ulint		count;
6419 
6420 	ut_ad(!DICT_TF2_FLAG_IS_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
6421 	ut_ad(!ib_vector_is_empty(tables));
6422 
6423 	error = fts_update_hex_format_flag(trx, parent_table->id, true);
6424 
6425 	if (error != DB_SUCCESS) {
6426 		ib_logf(IB_LOG_LEVEL_WARN,
6427 			"Setting parent table %s to hex format failed.",
6428 			parent_table->name);
6429 
6430 		fts_sql_rollback(trx);
6431 		return (error);
6432 	}
6433 
6434 	DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6435 
6436 	for (count = 0; count < ib_vector_size(tables); ++count) {
6437 		dict_table_t*		table;
6438 		fts_aux_table_t*	aux_table;
6439 
6440 		aux_table = static_cast<fts_aux_table_t*>(
6441 			ib_vector_get(tables, count));
6442 
6443 		table = dict_table_open_on_id(aux_table->id, TRUE,
6444 					      DICT_TABLE_OP_NORMAL);
6445 
6446 		ut_ad(table != NULL);
6447 		ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_AUX_HEX_NAME));
6448 
6449 		/* Set HEX_NAME flag here to make sure we can get correct
6450 		new table name in following function */
6451 		DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6452 		error = fts_rename_one_aux_table_to_hex_format(trx,
6453 				aux_table, parent_table);
6454 		/* We will rollback the trx if the error != DB_SUCCESS,
6455 		so setting the flag here is the same with setting it in
6456 		row_rename_table_for_mysql */
6457 		DBUG_EXECUTE_IF("rename_aux_table_fail", error = DB_ERROR;);
6458 
6459 		if (error != DB_SUCCESS) {
6460 			dict_table_close(table, TRUE, FALSE);
6461 
6462 			ib_logf(IB_LOG_LEVEL_WARN,
6463 				"Failed to rename one aux table %s "
6464 				"Will revert all successful rename "
6465 				"operations.", aux_table->name);
6466 
6467 			fts_sql_rollback(trx);
6468 			break;
6469 		}
6470 
6471 		error = fts_update_hex_format_flag(trx, aux_table->id, true);
6472 		dict_table_close(table, TRUE, FALSE);
6473 
6474 		if (error != DB_SUCCESS) {
6475 			ib_logf(IB_LOG_LEVEL_WARN,
6476 				"Setting aux table %s to hex format failed.",
6477 				aux_table->name);
6478 
6479 			fts_sql_rollback(trx);
6480 			break;
6481 		}
6482 	}
6483 
6484 	if (error != DB_SUCCESS) {
6485 		ut_ad(count != ib_vector_size(tables));
6486 		/* If rename fails, thr trx would be rolled back, we can't
6487 		use it any more, we'll start a new background trx to do
6488 		the reverting. */
6489 		ut_a(trx->state == TRX_STATE_NOT_STARTED);
6490 		bool not_rename = false;
6491 
6492 		/* Try to revert those succesful rename operations
6493 		in order to revert the ibd file rename. */
6494 		for (ulint i = 0; i <= count; ++i) {
6495 			dict_table_t*		table;
6496 			fts_aux_table_t*	aux_table;
6497 			trx_t*			trx_bg;
6498 			dberr_t			err;
6499 
6500 			aux_table = static_cast<fts_aux_table_t*>(
6501 				ib_vector_get(tables, i));
6502 
6503 			table = dict_table_open_on_id(aux_table->id, TRUE,
6504 						      DICT_TABLE_OP_NORMAL);
6505 			ut_ad(table != NULL);
6506 
6507 			if (not_rename) {
6508 				DICT_TF2_FLAG_UNSET(table,
6509 						    DICT_TF2_FTS_AUX_HEX_NAME);
6510 			}
6511 
6512 			if (!DICT_TF2_FLAG_IS_SET(table,
6513 						  DICT_TF2_FTS_AUX_HEX_NAME)) {
6514 				dict_table_close(table, TRUE, FALSE);
6515 				continue;
6516 			}
6517 
6518 			trx_bg = trx_allocate_for_background();
6519 			trx_bg->op_info = "Revert half done rename";
6520 			trx_bg->dict_operation_lock_mode = RW_X_LATCH;
6521 			trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE);
6522 
6523 			DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME);
6524 			err = row_rename_table_for_mysql(table->name,
6525 							 aux_table->name,
6526 							 trx_bg, FALSE);
6527 
6528 			trx_bg->dict_operation_lock_mode = 0;
6529 			dict_table_close(table, TRUE, FALSE);
6530 
6531 			if (err != DB_SUCCESS) {
6532 				ib_logf(IB_LOG_LEVEL_WARN, "Failed to revert "
6533 					"table %s. Please revert manually.",
6534 					table->name);
6535 				fts_sql_rollback(trx_bg);
6536 				trx_free_for_background(trx_bg);
6537 				/* Continue to clear aux tables' flags2 */
6538 				not_rename = true;
6539 				continue;
6540 			}
6541 
6542 			fts_sql_commit(trx_bg);
6543 			trx_free_for_background(trx_bg);
6544 		}
6545 
6546 		DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6547 	}
6548 
6549 	return (error);
6550 }
6551 
6552 /**********************************************************************//**
6553 Convert an id, which is actually a decimal number but was regard as a HEX
6554 from a string, to its real value. */
6555 static
6556 ib_id_t
fts_fake_hex_to_dec(ib_id_t id)6557 fts_fake_hex_to_dec(
6558 /*================*/
6559 	ib_id_t		id)			/*!< in: number to convert */
6560 {
6561 	ib_id_t		dec_id = 0;
6562 	char		tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH];
6563 	int		ret MY_ATTRIBUTE((unused));
6564 
6565 	ret = sprintf(tmp_id, UINT64PFx, id);
6566 	ut_ad(ret == 16);
6567 #ifdef _WIN32
6568 	ret = sscanf(tmp_id, "%016llu", &dec_id);
6569 #else
6570 	ret = sscanf(tmp_id, "%016" PRIu64, &dec_id);
6571 #endif /* _WIN32 */
6572 	ut_ad(ret == 1);
6573 
6574 	return dec_id;
6575 }
6576 
6577 /*********************************************************************//**
6578 Compare two fts_aux_table_t parent_ids.
6579 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
6580 UNIV_INLINE
6581 int
fts_check_aux_table_parent_id_cmp(const void * p1,const void * p2)6582 fts_check_aux_table_parent_id_cmp(
6583 /*==============================*/
6584 	const void*	p1,		/*!< in: id1 */
6585 	const void*	p2)		/*!< in: id2 */
6586 {
6587 	const fts_aux_table_t*	fa1 = static_cast<const fts_aux_table_t*>(p1);
6588 	const fts_aux_table_t*	fa2 = static_cast<const fts_aux_table_t*>(p2);
6589 
6590 	return static_cast<int>(fa1->parent_id - fa2->parent_id);
6591 }
6592 
6593 /** Mark all the fts index associated with the parent table as corrupted.
6594 @param[in]	trx		transaction
6595 @param[in, out] parent_table	fts index associated with this parent table
6596 				will be marked as corrupted. */
6597 static
6598 void
fts_parent_all_index_set_corrupt(trx_t * trx,dict_table_t * parent_table)6599 fts_parent_all_index_set_corrupt(
6600 	trx_t*		trx,
6601 	dict_table_t*	parent_table)
6602 {
6603 	fts_t*	fts = parent_table->fts;
6604 
6605 	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6606 		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6607 	}
6608 
6609 	for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
6610 		dict_index_t*	index = static_cast<dict_index_t*>(
6611 			ib_vector_getp_const(fts->indexes, j));
6612 		dict_set_corrupted(index,
6613 				   trx, "DROP ORPHANED TABLE");
6614 	}
6615 }
6616 
6617 /** Mark the fts index which index id matches the id as corrupted.
6618 @param[in]	trx		transaction
6619 @param[in]	id		index id to search
6620 @param[in, out]	parent_table	parent table to check with all
6621 				the index. */
6622 static
6623 void
fts_set_index_corrupt(trx_t * trx,index_id_t id,dict_table_t * table)6624 fts_set_index_corrupt(
6625 	trx_t*		trx,
6626 	index_id_t	id,
6627 	dict_table_t*	table)
6628 {
6629 	fts_t*	fts = table->fts;
6630 
6631 	if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) {
6632 		trx_set_dict_operation(trx, TRX_DICT_OP_INDEX);
6633 	}
6634 
6635 	for (ulint j = 0; j < ib_vector_size(fts->indexes); j++) {
6636 		dict_index_t*   index = static_cast<dict_index_t*>(
6637 			ib_vector_getp_const(fts->indexes, j));
6638 		if (index->id == id) {
6639 			dict_set_corrupted(index, trx,
6640 					   "DROP ORPHANED TABLE");
6641 			break;
6642 		}
6643 	}
6644 }
6645 
6646 /** Check the index for the aux table is corrupted.
6647 @param[in]	aux_table	auxiliary table
6648 @retval nonzero if index is corrupted, zero for valid index */
6649 static
6650 ulint
fts_check_corrupt_index(fts_aux_table_t * aux_table)6651 fts_check_corrupt_index(
6652 	fts_aux_table_t*	aux_table)
6653 {
6654 	dict_table_t*	table;
6655 	dict_index_t*	index;
6656 	table = dict_table_open_on_id(
6657 		aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
6658 
6659 	if (table == NULL) {
6660 		return(0);
6661 	}
6662 
6663 	for (index = UT_LIST_GET_FIRST(table->indexes);
6664 	     index;
6665 	     index = UT_LIST_GET_NEXT(indexes, index)) {
6666 		if (index->id == aux_table->index_id) {
6667 			ut_ad(index->type & DICT_FTS);
6668 			dict_table_close(table, true, false);
6669 			return(dict_index_is_corrupted(index));
6670 		}
6671 	}
6672 
6673 	dict_table_close(table, true, false);
6674 	return(0);
6675 }
6676 
6677 /* Get parent table name if it's a fts aux table
6678 @param[in]	aux_table_name	aux table name
6679 @param[in]	aux_table_len	aux table length
6680 @return parent table name, or NULL */
6681 char*
fts_get_parent_table_name(const char * aux_table_name,ulint aux_table_len)6682 fts_get_parent_table_name(
6683 	const char*	aux_table_name,
6684 	ulint		aux_table_len)
6685 {
6686 	fts_aux_table_t	aux_table;
6687 	char*		parent_table_name = NULL;
6688 
6689 	if (fts_is_aux_table_name(&aux_table, aux_table_name, aux_table_len)) {
6690 		dict_table_t*	parent_table;
6691 
6692 		parent_table = dict_table_open_on_id(
6693 			aux_table.parent_id, TRUE, DICT_TABLE_OP_NORMAL);
6694 
6695 		if (parent_table != NULL) {
6696 			parent_table_name = mem_strdupl(
6697 				parent_table->name,
6698 				strlen(parent_table->name));
6699 
6700 			dict_table_close(parent_table, TRUE, FALSE);
6701 		}
6702 	}
6703 
6704 	return(parent_table_name);
6705 }
6706 
6707 /** Check the validity of the parent table.
6708 @param[in]	aux_table	auxiliary table
6709 @return true if it is a valid table or false if it is not */
6710 static
6711 bool
fts_valid_parent_table(const fts_aux_table_t * aux_table)6712 fts_valid_parent_table(
6713 	const fts_aux_table_t*	aux_table)
6714 {
6715 	dict_table_t*	parent_table;
6716 	bool		valid = false;
6717 
6718 	parent_table = dict_table_open_on_id(
6719 		aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
6720 
6721 	if (parent_table != NULL && parent_table->fts != NULL) {
6722 		if (aux_table->index_id == 0) {
6723 			valid = true;
6724 		} else {
6725 			index_id_t	id = aux_table->index_id;
6726 			dict_index_t*	index;
6727 
6728 			/* Search for the FT index in the table's list. */
6729 			for (index = UT_LIST_GET_FIRST(parent_table->indexes);
6730 			     index;
6731 			     index = UT_LIST_GET_NEXT(indexes, index)) {
6732 				if (index->id == id) {
6733 					valid = true;
6734 					break;
6735 				}
6736 
6737 			}
6738 		}
6739 	}
6740 
6741 	if (parent_table) {
6742 		dict_table_close(parent_table, TRUE, FALSE);
6743 	}
6744 
6745 	return(valid);
6746 }
6747 
6748 /** Try to rename all aux tables of the specified parent table.
6749 @param[in]	aux_tables	aux_tables to be renamed
6750 @param[in]	parent_table	parent table of all aux
6751 				tables stored in tables. */
6752 static
6753 void
fts_rename_aux_tables_to_hex_format(ib_vector_t * aux_tables,dict_table_t * parent_table)6754 fts_rename_aux_tables_to_hex_format(
6755 	ib_vector_t*	aux_tables,
6756 	dict_table_t*	parent_table)
6757 {
6758 	dberr_t err;
6759 	trx_t*	trx_rename = trx_allocate_for_background();
6760 	trx_rename->op_info = "Rename aux tables to hex format";
6761 	trx_rename->dict_operation_lock_mode = RW_X_LATCH;
6762 	trx_start_for_ddl(trx_rename, TRX_DICT_OP_TABLE);
6763 
6764 	err = fts_rename_aux_tables_to_hex_format_low(trx_rename,
6765 						      parent_table, aux_tables);
6766 
6767 	trx_rename->dict_operation_lock_mode = 0;
6768 
6769 	if (err != DB_SUCCESS) {
6770 
6771 		ib_logf(IB_LOG_LEVEL_WARN,
6772 			"Rollback operations on all aux tables of table %s. "
6773 			"All the fts index associated with the table are "
6774 			"marked as corrupted. Please rebuild the "
6775 			"index again.", parent_table->name);
6776 		fts_sql_rollback(trx_rename);
6777 
6778 		/* Corrupting the fts index related to parent table. */
6779 		trx_t*	trx_corrupt;
6780 		trx_corrupt = trx_allocate_for_background();
6781 		trx_corrupt->dict_operation_lock_mode = RW_X_LATCH;
6782 		trx_start_for_ddl(trx_corrupt, TRX_DICT_OP_TABLE);
6783 		fts_parent_all_index_set_corrupt(trx_corrupt, parent_table);
6784 		trx_corrupt->dict_operation_lock_mode = 0;
6785 		fts_sql_commit(trx_corrupt);
6786 		trx_free_for_background(trx_corrupt);
6787 	} else {
6788 		fts_sql_commit(trx_rename);
6789 	}
6790 
6791 	trx_free_for_background(trx_rename);
6792 	ib_vector_reset(aux_tables);
6793 }
6794 
6795 /** Set the hex format flag for the parent table.
6796 @param[in, out]	parent_table	parent table
6797 @param[in]	trx		transaction */
6798 static
6799 void
fts_set_parent_hex_format_flag(dict_table_t * parent_table,trx_t * trx)6800 fts_set_parent_hex_format_flag(
6801 	dict_table_t*	parent_table,
6802 	trx_t*		trx)
6803 {
6804 	if (!DICT_TF2_FLAG_IS_SET(parent_table,
6805 				  DICT_TF2_FTS_AUX_HEX_NAME)) {
6806 		DBUG_EXECUTE_IF("parent_table_flag_fail",
6807 			ib_logf(IB_LOG_LEVEL_FATAL,
6808 				"Setting parent table %s  to hex format "
6809 				"failed. Please try to restart the server "
6810 				"again, if it doesn't work, the system "
6811 				"tables might be corrupted.",
6812 				parent_table->name);
6813 			return;);
6814 
6815 		dberr_t	err = fts_update_hex_format_flag(
6816 				trx, parent_table->id, true);
6817 
6818 		if (err != DB_SUCCESS) {
6819 			ib_logf(IB_LOG_LEVEL_FATAL,
6820 				"Setting parent table %s  to hex format "
6821 				"failed. Please try to restart the server "
6822 				"again, if it doesn't work, the system "
6823 				"tables might be corrupted.",
6824 				parent_table->name);
6825 		} else {
6826 			DICT_TF2_FLAG_SET(
6827 				parent_table, DICT_TF2_FTS_AUX_HEX_NAME);
6828 		}
6829 	}
6830 }
6831 
6832 /** Drop the obsolete auxilary table.
6833 @param[in]	tables	tables to be dropped. */
6834 static
6835 void
fts_drop_obsolete_aux_table_from_vector(ib_vector_t * tables)6836 fts_drop_obsolete_aux_table_from_vector(
6837 	ib_vector_t*	tables)
6838 {
6839 	dberr_t		err;
6840 
6841 	for (ulint count = 0; count < ib_vector_size(tables);
6842 	     ++count) {
6843 
6844 		fts_aux_table_t*	aux_drop_table;
6845 		aux_drop_table = static_cast<fts_aux_table_t*>(
6846 			ib_vector_get(tables, count));
6847 		trx_t*	trx_drop = trx_allocate_for_background();
6848 		trx_drop->op_info = "Drop obsolete aux tables";
6849 		trx_drop->dict_operation_lock_mode = RW_X_LATCH;
6850 		trx_start_for_ddl(trx_drop, TRX_DICT_OP_TABLE);
6851 
6852 		err = row_drop_table_for_mysql(
6853 			aux_drop_table->name, trx_drop, false, true);
6854 
6855 		trx_drop->dict_operation_lock_mode = 0;
6856 
6857 		if (err != DB_SUCCESS) {
6858 			/* We don't need to worry about the
6859 			failure, since server would try to
6860 			drop it on next restart, even if
6861 			the table was broken. */
6862 			ib_logf(IB_LOG_LEVEL_WARN,
6863 				"Fail to drop obsolete aux table '%s', which "
6864 				"is harmless. will try to drop it on next "
6865 				"restart.", aux_drop_table->name);
6866 			fts_sql_rollback(trx_drop);
6867 		} else {
6868 			ib_logf(IB_LOG_LEVEL_INFO,
6869 				"Dropped obsolete aux table '%s'.",
6870 				aux_drop_table->name);
6871 
6872 			fts_sql_commit(trx_drop);
6873 		}
6874 
6875 		trx_free_for_background(trx_drop);
6876 	}
6877 }
6878 
6879 /** Drop all the auxiliary table present in the vector.
6880 @param[in]	trx	transaction
6881 @param[in]	tables	tables to be dropped */
6882 static
6883 void
fts_drop_aux_table_from_vector(trx_t * trx,ib_vector_t * tables)6884 fts_drop_aux_table_from_vector(
6885 	trx_t*		trx,
6886 	ib_vector_t*	tables)
6887 {
6888 	for (ulint count = 0; count < ib_vector_size(tables);
6889 	    ++count) {
6890 		fts_aux_table_t*	aux_drop_table;
6891 		aux_drop_table = static_cast<fts_aux_table_t*>(
6892 				ib_vector_get(tables, count));
6893 
6894 		/* Check for the validity of the parent table */
6895 		if (!fts_valid_parent_table(aux_drop_table)) {
6896 			ib_logf(IB_LOG_LEVEL_WARN,
6897 				"Parent table of FTS auxiliary table %s not "
6898 				"found.", aux_drop_table->name);
6899 			dberr_t err = fts_drop_table(trx, aux_drop_table->name);
6900 			if (err == DB_FAIL) {
6901 				char*	path = fil_make_ibd_name(
6902 					aux_drop_table->name, false);
6903 				os_file_delete_if_exists(innodb_file_data_key,
6904 							 path);
6905 				mem_free(path);
6906 			}
6907 		}
6908 	}
6909 }
6910 
6911 /**********************************************************************//**
6912 Check and drop all orphaned FTS auxiliary tables, those that don't have
6913 a parent table or FTS index defined on them.
6914 @return DB_SUCCESS or error code */
6915 static MY_ATTRIBUTE((nonnull))
6916 void
fts_check_and_drop_orphaned_tables(trx_t * trx,ib_vector_t * tables)6917 fts_check_and_drop_orphaned_tables(
6918 /*===============================*/
6919 	trx_t*		trx,			/*!< in: transaction */
6920 	ib_vector_t*	tables)			/*!< in: tables to check */
6921 {
6922 	mem_heap_t*	heap;
6923 	ib_vector_t*	aux_tables_to_rename;
6924 	ib_vector_t*	invalid_aux_tables;
6925 	ib_vector_t*	valid_aux_tables;
6926 	ib_vector_t*	drop_aux_tables;
6927 	ib_vector_t*	obsolete_aux_tables;
6928 	ib_alloc_t*	heap_alloc;
6929 
6930 	heap = mem_heap_create(1024);
6931 	heap_alloc = ib_heap_allocator_create(heap);
6932 
6933 	/* We store all aux tables belonging to the same parent table here,
6934 	and rename all these tables in a batch mode. */
6935 	aux_tables_to_rename = ib_vector_create(heap_alloc,
6936 						sizeof(fts_aux_table_t), 128);
6937 
6938 	/* We store all fake auxiliary table and orphaned table here. */
6939 	invalid_aux_tables = ib_vector_create(heap_alloc,
6940 					      sizeof(fts_aux_table_t), 128);
6941 
6942 	/* We store all valid aux tables. We use this to filter the
6943 	fake auxiliary table from invalid auxiliary tables. */
6944 	valid_aux_tables = ib_vector_create(heap_alloc,
6945 					    sizeof(fts_aux_table_t), 128);
6946 
6947 	/* We store all auxiliary tables to be dropped. */
6948 	drop_aux_tables = ib_vector_create(heap_alloc,
6949 					   sizeof(fts_aux_table_t), 128);
6950 
6951 	/* We store all obsolete auxiliary tables to be dropped. */
6952 	obsolete_aux_tables = ib_vector_create(heap_alloc,
6953 					       sizeof(fts_aux_table_t), 128);
6954 
6955 	/* Sort by parent_id first, in case rename will fail */
6956 	ib_vector_sort(tables, fts_check_aux_table_parent_id_cmp);
6957 
6958 	for (ulint i = 0; i < ib_vector_size(tables); ++i) {
6959 		dict_table_t*		parent_table;
6960 		fts_aux_table_t*	aux_table;
6961 		bool			drop = false;
6962 		dict_table_t*		table;
6963 		fts_aux_table_t*	next_aux_table = NULL;
6964 		ib_id_t			orig_parent_id = 0;
6965 		ib_id_t			orig_index_id = 0;
6966 		bool			rename = false;
6967 
6968 		aux_table = static_cast<fts_aux_table_t*>(
6969 			ib_vector_get(tables, i));
6970 
6971 		table = dict_table_open_on_id(
6972 			aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
6973 		orig_parent_id = aux_table->parent_id;
6974 		orig_index_id = aux_table->index_id;
6975 
6976 		if (table == NULL || strcmp(table->name, aux_table->name)) {
6977 
6978 			bool	fake_aux = false;
6979 
6980 			if (table != NULL) {
6981 				dict_table_close(table, TRUE, FALSE);
6982 			}
6983 
6984 			if (i + 1 < ib_vector_size(tables)) {
6985 				next_aux_table = static_cast<fts_aux_table_t*>(
6986 						ib_vector_get(tables, i + 1));
6987 			}
6988 
6989 			/* To know whether aux table is fake fts or
6990 			orphan fts table. */
6991 			for (ulint count = 0;
6992 			     count < ib_vector_size(valid_aux_tables);
6993 			     count++) {
6994 				fts_aux_table_t*	valid_aux;
6995 				valid_aux = static_cast<fts_aux_table_t*>(
6996 					ib_vector_get(valid_aux_tables, count));
6997 				if (strcmp(valid_aux->name,
6998 					   aux_table->name) == 0) {
6999 					fake_aux = true;
7000 					break;
7001 				}
7002 			}
7003 
7004 			/* All aux tables of parent table, whose id is
7005 			last_parent_id, have been checked, try to rename
7006 			them if necessary. */
7007 			if ((next_aux_table == NULL
7008 			     || orig_parent_id != next_aux_table->parent_id)
7009 			    && (!ib_vector_is_empty(aux_tables_to_rename))) {
7010 
7011 					ulint	parent_id = fts_fake_hex_to_dec(
7012 							aux_table->parent_id);
7013 
7014 					parent_table = dict_table_open_on_id(
7015 						parent_id, TRUE,
7016 						DICT_TABLE_OP_NORMAL);
7017 
7018 					fts_rename_aux_tables_to_hex_format(
7019 						aux_tables_to_rename, parent_table);
7020 
7021 					dict_table_close(parent_table, TRUE,
7022 							 FALSE);
7023 			}
7024 
7025 			/* If the aux table is fake aux table. Skip it. */
7026 			if (!fake_aux) {
7027 				ib_vector_push(invalid_aux_tables, aux_table);
7028 			}
7029 
7030 			continue;
7031 		} else if (!DICT_TF2_FLAG_IS_SET(table,
7032 						 DICT_TF2_FTS_AUX_HEX_NAME)) {
7033 
7034 			aux_table->parent_id = fts_fake_hex_to_dec(
7035 						aux_table->parent_id);
7036 
7037 			if (aux_table->index_id != 0) {
7038 				aux_table->index_id = fts_fake_hex_to_dec(
7039 							aux_table->index_id);
7040 			}
7041 
7042 			ut_ad(aux_table->id > aux_table->parent_id);
7043 
7044 			/* Check whether parent table id and index id
7045 			are stored as decimal format. */
7046 			if (fts_valid_parent_table(aux_table)) {
7047 
7048 				parent_table = dict_table_open_on_id(
7049 					aux_table->parent_id, true,
7050 					DICT_TABLE_OP_NORMAL);
7051 
7052 				ut_ad(parent_table != NULL);
7053 				ut_ad(parent_table->fts != NULL);
7054 
7055 				if (!DICT_TF2_FLAG_IS_SET(
7056 					parent_table,
7057 					DICT_TF2_FTS_AUX_HEX_NAME)) {
7058 					rename = true;
7059 				}
7060 
7061 				dict_table_close(parent_table, TRUE, FALSE);
7062 			}
7063 
7064 			if (!rename) {
7065 				/* Reassign the original value of
7066 				aux table if it is not in decimal format */
7067 				aux_table->parent_id = orig_parent_id;
7068 				aux_table->index_id = orig_index_id;
7069 			}
7070 		}
7071 
7072 		if (table != NULL) {
7073 			dict_table_close(table, true, false);
7074 		}
7075 
7076 		if (!rename) {
7077 			/* Check the validity of the parent table. */
7078 			if (!fts_valid_parent_table(aux_table)) {
7079 				drop = true;
7080 			}
7081 		}
7082 
7083 		/* Filter out the fake aux table by comparing with the
7084 		current valid auxiliary table name . */
7085 		for (ulint count = 0;
7086 		     count < ib_vector_size(invalid_aux_tables); count++) {
7087 			fts_aux_table_t*	invalid_aux;
7088 			invalid_aux = static_cast<fts_aux_table_t*>(
7089 				ib_vector_get(invalid_aux_tables, count));
7090 			if (strcmp(invalid_aux->name, aux_table->name) == 0) {
7091 				ib_vector_remove(
7092 					invalid_aux_tables,
7093 					*reinterpret_cast<void**>(invalid_aux));
7094 				break;
7095 			}
7096 		}
7097 
7098 		ib_vector_push(valid_aux_tables, aux_table);
7099 
7100 		/* If the index associated with aux table is corrupted,
7101 		skip it. */
7102 		if (fts_check_corrupt_index(aux_table) > 0) {
7103 
7104 			if (i + 1 < ib_vector_size(tables)) {
7105 				next_aux_table = static_cast<fts_aux_table_t*>(
7106 						ib_vector_get(tables, i + 1));
7107 			}
7108 
7109 			if (next_aux_table == NULL
7110 			    || orig_parent_id != next_aux_table->parent_id) {
7111 
7112 				parent_table = dict_table_open_on_id(
7113 					aux_table->parent_id, TRUE,
7114 					DICT_TABLE_OP_NORMAL);
7115 
7116 				if (!ib_vector_is_empty(aux_tables_to_rename)) {
7117 					fts_rename_aux_tables_to_hex_format(
7118 						aux_tables_to_rename, parent_table);
7119 
7120 				} else {
7121 					fts_set_parent_hex_format_flag(
7122 						parent_table, trx);
7123 				}
7124 
7125 				dict_table_close(parent_table, TRUE, FALSE);
7126 			}
7127 
7128 			continue;
7129 		}
7130 
7131 		parent_table = dict_table_open_on_id(
7132 			aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL);
7133 
7134 		if (drop) {
7135 			 ib_vector_push(drop_aux_tables, aux_table);
7136 		} else {
7137 			if (FTS_IS_OBSOLETE_AUX_TABLE(aux_table->name)) {
7138 
7139 				/* Current table could be one of the three
7140 				obsolete tables, in this case, we should
7141 				always try to drop it but not rename it.
7142 				This could happen when we try to upgrade
7143 				from older server to later one, which doesn't
7144 				contain these obsolete tables. */
7145 				ib_vector_push(obsolete_aux_tables, aux_table);
7146 				continue;
7147 			}
7148 		}
7149 
7150 		/* If the aux table is in decimal format, we should
7151 		rename it, so push it to aux_tables_to_rename */
7152 		if (!drop && rename) {
7153 			ib_vector_push(aux_tables_to_rename, aux_table);
7154 		}
7155 
7156 		if (i + 1 < ib_vector_size(tables)) {
7157 			next_aux_table = static_cast<fts_aux_table_t*>(
7158 					ib_vector_get(tables, i + 1));
7159 		}
7160 
7161 		if ((next_aux_table == NULL
7162 		     || orig_parent_id != next_aux_table->parent_id)
7163 		    && !ib_vector_is_empty(aux_tables_to_rename)) {
7164 			/* All aux tables of parent table, whose id is
7165 			last_parent_id, have been checked, try to rename
7166 			them if necessary. We had better use a new background
7167 			trx to rename rather than the original trx, in case
7168 			any failure would cause a complete rollback. */
7169 			ut_ad(rename);
7170 			ut_ad(!DICT_TF2_FLAG_IS_SET(
7171 				parent_table, DICT_TF2_FTS_AUX_HEX_NAME));
7172 
7173 			fts_rename_aux_tables_to_hex_format(
7174 				aux_tables_to_rename,parent_table);
7175 		}
7176 
7177 		/* The IDs are already in correct hex format. */
7178 		if (!drop && !rename) {
7179 			dict_table_t*	table;
7180 
7181 			table = dict_table_open_on_id(
7182 				aux_table->id, TRUE, DICT_TABLE_OP_NORMAL);
7183 			if (table != NULL
7184 			    && strcmp(table->name, aux_table->name)) {
7185 				dict_table_close(table, TRUE, FALSE);
7186 				table = NULL;
7187 			}
7188 
7189 			if (table != NULL
7190 			    && !DICT_TF2_FLAG_IS_SET(
7191 						table,
7192 						DICT_TF2_FTS_AUX_HEX_NAME)) {
7193 
7194 				DBUG_EXECUTE_IF("aux_table_flag_fail",
7195 					ib_logf(IB_LOG_LEVEL_WARN,
7196 						"Setting aux table %s to hex "
7197 						"format failed.", table->name);
7198 					fts_set_index_corrupt(
7199 						trx, aux_table->index_id,
7200 						parent_table);
7201 						goto table_exit;);
7202 
7203 				dberr_t err = fts_update_hex_format_flag(
7204 						trx, table->id, true);
7205 
7206 				if (err != DB_SUCCESS) {
7207 					ib_logf(IB_LOG_LEVEL_WARN,
7208 						"Setting aux table %s to hex "
7209 						"format failed.", table->name);
7210 
7211 					fts_set_index_corrupt(
7212 						trx, aux_table->index_id,
7213 						parent_table);
7214 				} else {
7215 					DICT_TF2_FLAG_SET(table,
7216 						DICT_TF2_FTS_AUX_HEX_NAME);
7217 				}
7218 			}
7219 #ifndef DBUG_OFF
7220 table_exit:
7221 #endif	/* !DBUG_OFF */
7222 
7223 			if (table != NULL) {
7224 				dict_table_close(table, TRUE, FALSE);
7225 			}
7226 
7227 			ut_ad(parent_table != NULL);
7228 
7229 			fts_set_parent_hex_format_flag(
7230 					parent_table, trx);
7231 		}
7232 
7233 		if (parent_table != NULL) {
7234 			dict_table_close(parent_table, TRUE, FALSE);
7235 		}
7236 	}
7237 
7238 	fts_drop_aux_table_from_vector(trx, invalid_aux_tables);
7239 	fts_drop_aux_table_from_vector(trx, drop_aux_tables);
7240 	fts_sql_commit(trx);
7241 
7242 	fts_drop_obsolete_aux_table_from_vector(obsolete_aux_tables);
7243 
7244 	/* Free the memory allocated at the beginning */
7245 	if (heap != NULL) {
7246 		mem_heap_free(heap);
7247 	}
7248 }
7249 
7250 /**********************************************************************//**
7251 Drop all orphaned FTS auxiliary tables, those that don't have a parent
7252 table or FTS index defined on them. */
7253 UNIV_INTERN
7254 void
fts_drop_orphaned_tables(void)7255 fts_drop_orphaned_tables(void)
7256 /*==========================*/
7257 {
7258 	trx_t*			trx;
7259 	pars_info_t*		info;
7260 	mem_heap_t*		heap;
7261 	que_t*			graph;
7262 	ib_vector_t*		tables;
7263 	ib_alloc_t*		heap_alloc;
7264 	space_name_list_t	space_name_list;
7265 	dberr_t			error = DB_SUCCESS;
7266 
7267 	/* Note: We have to free the memory after we are done with the list. */
7268 	error = fil_get_space_names(space_name_list);
7269 
7270 	if (error == DB_OUT_OF_MEMORY) {
7271 		ib_logf(IB_LOG_LEVEL_ERROR, "Out of memory");
7272 		ut_error;
7273 	}
7274 
7275 	heap = mem_heap_create(1024);
7276 	heap_alloc = ib_heap_allocator_create(heap);
7277 
7278 	/* We store the table ids of all the FTS indexes that were found. */
7279 	tables = ib_vector_create(heap_alloc, sizeof(fts_aux_table_t), 128);
7280 
7281 	/* Get the list of all known .ibd files and check for orphaned
7282 	FTS auxiliary files in that list. We need to remove them because
7283 	users can't map them back to table names and this will create
7284 	unnecessary clutter. */
7285 
7286 	for (space_name_list_t::iterator it = space_name_list.begin();
7287 	     it != space_name_list.end();
7288 	     ++it) {
7289 
7290 		fts_aux_table_t*	fts_aux_table;
7291 
7292 		fts_aux_table = static_cast<fts_aux_table_t*>(
7293 			ib_vector_push(tables, NULL));
7294 
7295 		memset(fts_aux_table, 0x0, sizeof(*fts_aux_table));
7296 
7297 		if (!fts_is_aux_table_name(fts_aux_table, *it, strlen(*it))) {
7298 			ib_vector_pop(tables);
7299 		} else {
7300 			ulint	len = strlen(*it);
7301 
7302 			fts_aux_table->id = fil_get_space_id_for_table(*it);
7303 
7304 			/* We got this list from fil0fil.cc. The tablespace
7305 			with this name must exist. */
7306 			ut_a(fts_aux_table->id != ULINT_UNDEFINED);
7307 
7308 			fts_aux_table->name = static_cast<char*>(
7309 				mem_heap_dup(heap, *it, len + 1));
7310 
7311 			fts_aux_table->name[len] = 0;
7312 		}
7313 	}
7314 
7315 	trx = trx_allocate_for_background();
7316 	trx->op_info = "dropping orphaned FTS tables";
7317 	row_mysql_lock_data_dictionary(trx);
7318 
7319 	info = pars_info_create();
7320 
7321 	pars_info_bind_function(info, "my_func", fts_read_tables, tables);
7322 
7323 	graph = fts_parse_sql_no_dict_lock(
7324 		NULL,
7325 		info,
7326 		"DECLARE FUNCTION my_func;\n"
7327 		"DECLARE CURSOR c IS"
7328 		" SELECT NAME, ID "
7329 		" FROM SYS_TABLES;\n"
7330 		"BEGIN\n"
7331 		"\n"
7332 		"OPEN c;\n"
7333 		"WHILE 1 = 1 LOOP\n"
7334 		"  FETCH c INTO my_func();\n"
7335 		"  IF c % NOTFOUND THEN\n"
7336 		"    EXIT;\n"
7337 		"  END IF;\n"
7338 		"END LOOP;\n"
7339 		"CLOSE c;");
7340 
7341 	for (;;) {
7342 		error = fts_eval_sql(trx, graph);
7343 
7344 		if (error == DB_SUCCESS) {
7345 			fts_check_and_drop_orphaned_tables(trx, tables);
7346 			break;				/* Exit the loop. */
7347 		} else {
7348 			ib_vector_reset(tables);
7349 
7350 			fts_sql_rollback(trx);
7351 
7352 			ut_print_timestamp(stderr);
7353 
7354 			if (error == DB_LOCK_WAIT_TIMEOUT) {
7355 				ib_logf(IB_LOG_LEVEL_WARN,
7356 					"lock wait timeout reading SYS_TABLES. "
7357 					"Retrying!");
7358 
7359 				trx->error_state = DB_SUCCESS;
7360 			} else {
7361 				ib_logf(IB_LOG_LEVEL_ERROR,
7362 					"(%s) while reading SYS_TABLES.",
7363 					ut_strerr(error));
7364 
7365 				break;			/* Exit the loop. */
7366 			}
7367 		}
7368 	}
7369 
7370 	que_graph_free(graph);
7371 
7372 	row_mysql_unlock_data_dictionary(trx);
7373 
7374 	trx_free_for_background(trx);
7375 
7376 	if (heap != NULL) {
7377 		mem_heap_free(heap);
7378 	}
7379 
7380 	/** Free the memory allocated to store the .ibd names. */
7381 	for (space_name_list_t::iterator it = space_name_list.begin();
7382 	     it != space_name_list.end();
7383 	     ++it) {
7384 
7385 		delete[] *it;
7386 	}
7387 }
7388 
7389 /**********************************************************************//**
7390 Check whether user supplied stopword table is of the right format.
7391 Caller is responsible to hold dictionary locks.
7392 @return the stopword column charset if qualifies */
7393 UNIV_INTERN
7394 CHARSET_INFO*
fts_valid_stopword_table(const char * stopword_table_name)7395 fts_valid_stopword_table(
7396 /*=====================*/
7397 	 const char*	stopword_table_name)	/*!< in: Stopword table
7398 						name */
7399 {
7400 	dict_table_t*	table;
7401 	dict_col_t*     col = NULL;
7402 
7403 	if (!stopword_table_name) {
7404 		return(NULL);
7405 	}
7406 
7407 	table = dict_table_get_low(stopword_table_name);
7408 
7409 	if (!table) {
7410 		fprintf(stderr,
7411 			"InnoDB: user stopword table %s does not exist.\n",
7412 			stopword_table_name);
7413 
7414 		return(NULL);
7415 	} else {
7416 		const char*     col_name;
7417 
7418 		col_name = dict_table_get_col_name(table, 0);
7419 
7420 		if (ut_strcmp(col_name, "value")) {
7421 			fprintf(stderr,
7422 				"InnoDB: invalid column name for stopword "
7423 				"table %s. Its first column must be named as "
7424 				"'value'.\n", stopword_table_name);
7425 
7426 			return(NULL);
7427 		}
7428 
7429 		col = dict_table_get_nth_col(table, 0);
7430 
7431 		if (col->mtype != DATA_VARCHAR
7432 		    && col->mtype != DATA_VARMYSQL) {
7433 			fprintf(stderr,
7434 				"InnoDB: invalid column type for stopword "
7435 				"table %s. Its first column must be of "
7436 				"varchar type\n", stopword_table_name);
7437 
7438 			return(NULL);
7439 		}
7440 	}
7441 
7442 	ut_ad(col);
7443 
7444 	return(innobase_get_fts_charset(
7445 		static_cast<int>(col->prtype & DATA_MYSQL_TYPE_MASK),
7446 		static_cast<uint>(dtype_get_charset_coll(col->prtype))));
7447 }
7448 
7449 /**********************************************************************//**
7450 This function loads the stopword into the FTS cache. It also
7451 records/fetches stopword configuration to/from FTS configure
7452 table, depending on whether we are creating or reloading the
7453 FTS.
7454 @return TRUE if load operation is successful */
7455 UNIV_INTERN
7456 ibool
fts_load_stopword(const dict_table_t * table,trx_t * trx,const char * global_stopword_table,const char * session_stopword_table,ibool stopword_is_on,ibool reload)7457 fts_load_stopword(
7458 /*==============*/
7459 	const dict_table_t*
7460 			table,			/*!< in: Table with FTS */
7461 	trx_t*		trx,			/*!< in: Transactions */
7462 	const char*	global_stopword_table,	/*!< in: Global stopword table
7463 						name */
7464 	const char*	session_stopword_table,	/*!< in: Session stopword table
7465 						name */
7466 	ibool		stopword_is_on,		/*!< in: Whether stopword
7467 						option is turned on/off */
7468 	ibool		reload)			/*!< in: Whether it is
7469 						for reloading FTS table */
7470 {
7471 	fts_table_t	fts_table;
7472 	fts_string_t	str;
7473 	dberr_t		error = DB_SUCCESS;
7474 	ulint		use_stopword;
7475 	fts_cache_t*	cache;
7476 	const char*	stopword_to_use = NULL;
7477 	ibool		new_trx = FALSE;
7478 	byte		str_buffer[MAX_FULL_NAME_LEN + 1];
7479 
7480 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
7481 
7482 	cache = table->fts->cache;
7483 
7484 	if (!reload && !(cache->stopword_info.status
7485 			 & STOPWORD_NOT_INIT)) {
7486 		return(TRUE);
7487 	}
7488 
7489 	if (!trx) {
7490 		trx = trx_allocate_for_background();
7491 		trx->op_info = "upload FTS stopword";
7492 		new_trx = TRUE;
7493 	}
7494 
7495 	/* First check whether stopword filtering is turned off */
7496 	if (reload) {
7497 		error = fts_config_get_ulint(
7498 			trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
7499 	} else {
7500 		use_stopword = (ulint) stopword_is_on;
7501 
7502 		error = fts_config_set_ulint(
7503 			trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
7504 	}
7505 
7506 	if (error != DB_SUCCESS) {
7507 		goto cleanup;
7508 	}
7509 
7510 	/* If stopword is turned off, no need to continue to load the
7511 	stopword into cache, but still need to do initialization */
7512 	if (!use_stopword) {
7513 		cache->stopword_info.status = STOPWORD_OFF;
7514 		goto cleanup;
7515 	}
7516 
7517 	if (reload) {
7518 		/* Fetch the stopword table name from FTS config
7519 		table */
7520 		str.f_n_char = 0;
7521 		str.f_str = str_buffer;
7522 		str.f_len = sizeof(str_buffer) - 1;
7523 
7524 		error = fts_config_get_value(
7525 			trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7526 
7527 		if (error != DB_SUCCESS) {
7528 			goto cleanup;
7529 		}
7530 
7531 		if (strlen((char*) str.f_str) > 0) {
7532 			stopword_to_use = (const char*) str.f_str;
7533 		}
7534 	} else {
7535 		stopword_to_use = (session_stopword_table)
7536 			? session_stopword_table : global_stopword_table;
7537 	}
7538 
7539 	if (stopword_to_use
7540 	    && fts_load_user_stopword(table->fts, stopword_to_use,
7541 				      &cache->stopword_info)) {
7542 		/* Save the stopword table name to the configure
7543 		table */
7544 		if (!reload) {
7545 			str.f_n_char = 0;
7546 			str.f_str = (byte*) stopword_to_use;
7547 			str.f_len = ut_strlen(stopword_to_use);
7548 
7549 			error = fts_config_set_value(
7550 				trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
7551 		}
7552 	} else {
7553 		/* Load system default stopword list */
7554 		fts_load_default_stopword(&cache->stopword_info);
7555 	}
7556 
7557 cleanup:
7558 	if (new_trx) {
7559 		if (error == DB_SUCCESS) {
7560 			fts_sql_commit(trx);
7561 		} else {
7562 			fts_sql_rollback(trx);
7563 		}
7564 
7565 		trx_free_for_background(trx);
7566 	}
7567 
7568 	if (!cache->stopword_info.cached_stopword) {
7569 		cache->stopword_info.cached_stopword = rbt_create(
7570 			sizeof(fts_tokenizer_word_t), fts_utf8_string_cmp);
7571 	}
7572 
7573 	return(error == DB_SUCCESS);
7574 }
7575 
7576 /**********************************************************************//**
7577 Callback function when we initialize the FTS at the start up
7578 time. It recovers the maximum Doc IDs presented in the current table.
7579 @return: always returns TRUE */
7580 static
7581 ibool
fts_init_get_doc_id(void * row,void * user_arg)7582 fts_init_get_doc_id(
7583 /*================*/
7584 	void*	row,			/*!< in: sel_node_t* */
7585 	void*	user_arg)		/*!< in: fts cache */
7586 {
7587 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
7588 	sel_node_t*	node = static_cast<sel_node_t*>(row);
7589 	que_node_t*	exp = node->select_list;
7590 	fts_cache_t*    cache = static_cast<fts_cache_t*>(user_arg);
7591 
7592 	ut_ad(ib_vector_is_empty(cache->get_docs));
7593 
7594 	/* Copy each indexed column content into doc->text.f_str */
7595 	if (exp) {
7596 		dfield_t*	dfield = que_node_get_val(exp);
7597 		dtype_t*        type = dfield_get_type(dfield);
7598 		void*           data = dfield_get_data(dfield);
7599 
7600 		ut_a(dtype_get_mtype(type) == DATA_INT);
7601 
7602 		doc_id = static_cast<doc_id_t>(mach_read_from_8(
7603 			static_cast<const byte*>(data)));
7604 
7605 		if (doc_id >= cache->next_doc_id) {
7606 			cache->next_doc_id = doc_id + 1;
7607 		}
7608 	}
7609 
7610 	return(TRUE);
7611 }
7612 
7613 /**********************************************************************//**
7614 Callback function when we initialize the FTS at the start up
7615 time. It recovers Doc IDs that have not sync-ed to the auxiliary
7616 table, and require to bring them back into FTS index.
7617 @return: always returns TRUE */
7618 static
7619 ibool
fts_init_recover_doc(void * row,void * user_arg)7620 fts_init_recover_doc(
7621 /*=================*/
7622 	void*	row,			/*!< in: sel_node_t* */
7623 	void*	user_arg)		/*!< in: fts cache */
7624 {
7625 
7626 	fts_doc_t       doc;
7627 	ulint		doc_len = 0;
7628 	ulint		field_no = 0;
7629 	fts_get_doc_t*  get_doc = static_cast<fts_get_doc_t*>(user_arg);
7630 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
7631 	sel_node_t*	node = static_cast<sel_node_t*>(row);
7632 	que_node_t*	exp = node->select_list;
7633 	fts_cache_t*	cache = get_doc->cache;
7634 
7635 	fts_doc_init(&doc);
7636 	doc.found = TRUE;
7637 
7638 	ut_ad(cache);
7639 
7640 	/* Copy each indexed column content into doc->text.f_str */
7641 	while (exp) {
7642 		dfield_t*	dfield = que_node_get_val(exp);
7643 		ulint		len = dfield_get_len(dfield);
7644 
7645 		if (field_no == 0) {
7646 			dtype_t*        type = dfield_get_type(dfield);
7647 			void*           data = dfield_get_data(dfield);
7648 
7649 			ut_a(dtype_get_mtype(type) == DATA_INT);
7650 
7651 			doc_id = static_cast<doc_id_t>(mach_read_from_8(
7652 				static_cast<const byte*>(data)));
7653 
7654 			field_no++;
7655 			exp = que_node_get_next(exp);
7656 			continue;
7657 		}
7658 
7659 		if (len == UNIV_SQL_NULL) {
7660 			exp = que_node_get_next(exp);
7661 			continue;
7662 		}
7663 
7664 		ut_ad(get_doc);
7665 
7666 		if (!get_doc->index_cache->charset) {
7667 			ulint   prtype = dfield->type.prtype;
7668 
7669 			get_doc->index_cache->charset =
7670 				innobase_get_fts_charset(
7671 				(int)(prtype & DATA_MYSQL_TYPE_MASK),
7672 				(uint) dtype_get_charset_coll(prtype));
7673 		}
7674 
7675 		doc.charset = get_doc->index_cache->charset;
7676 
7677 		if (dfield_is_ext(dfield)) {
7678 			dict_table_t*	table = cache->sync->table;
7679 			ulint		zip_size = dict_table_zip_size(table);
7680 
7681 			doc.text.f_str = btr_copy_externally_stored_field(
7682 				&doc.text.f_len,
7683 				static_cast<byte*>(dfield_get_data(dfield)),
7684 				zip_size, len,
7685 				static_cast<mem_heap_t*>(doc.self_heap->arg));
7686 		} else {
7687 			doc.text.f_str = static_cast<byte*>(
7688 				dfield_get_data(dfield));
7689 
7690 			doc.text.f_len = len;
7691 		}
7692 
7693 		if (field_no == 1) {
7694 			fts_tokenize_document(&doc, NULL);
7695 		} else {
7696 			fts_tokenize_document_next(&doc, doc_len, NULL);
7697 		}
7698 
7699 		exp = que_node_get_next(exp);
7700 
7701 		doc_len += (exp) ? len + 1 : len;
7702 
7703 		field_no++;
7704 	}
7705 
7706 	fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
7707 
7708 	fts_doc_free(&doc);
7709 
7710 	cache->added++;
7711 
7712 	if (doc_id >= cache->next_doc_id) {
7713 		cache->next_doc_id = doc_id + 1;
7714 	}
7715 
7716 	return(TRUE);
7717 }
7718 
7719 /**********************************************************************//**
7720 This function brings FTS index in sync when FTS index is first
7721 used. There are documents that have not yet sync-ed to auxiliary
7722 tables from last server abnormally shutdown, we will need to bring
7723 such document into FTS cache before any further operations
7724 @return TRUE if all OK */
7725 UNIV_INTERN
7726 ibool
fts_init_index(dict_table_t * table,ibool has_cache_lock)7727 fts_init_index(
7728 /*===========*/
7729 	dict_table_t*	table,		/*!< in: Table with FTS */
7730 	ibool		has_cache_lock)	/*!< in: Whether we already have
7731 					cache lock */
7732 {
7733 	dict_index_t*   index;
7734 	doc_id_t        start_doc;
7735 	fts_get_doc_t*  get_doc = NULL;
7736 	fts_cache_t*    cache = table->fts->cache;
7737 	bool		need_init = false;
7738 
7739 	ut_ad(!mutex_own(&dict_sys->mutex));
7740 
7741 	/* First check cache->get_docs is initialized */
7742 	if (!has_cache_lock) {
7743 		rw_lock_x_lock(&cache->lock);
7744 	}
7745 
7746 	rw_lock_x_lock(&cache->init_lock);
7747 	if (cache->get_docs == NULL) {
7748 		cache->get_docs = fts_get_docs_create(cache);
7749 	}
7750 	rw_lock_x_unlock(&cache->init_lock);
7751 
7752 	if (table->fts->fts_status & ADDED_TABLE_SYNCED) {
7753 		goto func_exit;
7754 	}
7755 
7756 	need_init = true;
7757 
7758 	start_doc = cache->synced_doc_id;
7759 
7760 	if (!start_doc) {
7761 		fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
7762 		cache->synced_doc_id = start_doc;
7763 	}
7764 
7765 	/* No FTS index, this is the case when previous FTS index
7766 	dropped, and we re-initialize the Doc ID system for subsequent
7767 	insertion */
7768 	if (ib_vector_is_empty(cache->get_docs)) {
7769 		index = dict_table_get_index_on_name(table, FTS_DOC_ID_INDEX_NAME);
7770 
7771 		ut_a(index);
7772 
7773 		fts_doc_fetch_by_doc_id(NULL, start_doc, index,
7774 					FTS_FETCH_DOC_BY_ID_LARGE,
7775 					fts_init_get_doc_id, cache);
7776 	} else {
7777 		if (table->fts->cache->stopword_info.status
7778 		    & STOPWORD_NOT_INIT) {
7779 			fts_load_stopword(table, NULL, NULL, NULL, TRUE, TRUE);
7780 		}
7781 
7782 		for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
7783 			get_doc = static_cast<fts_get_doc_t*>(
7784 				ib_vector_get(cache->get_docs, i));
7785 
7786 			index = get_doc->index_cache->index;
7787 
7788 			fts_doc_fetch_by_doc_id(NULL, start_doc, index,
7789 						FTS_FETCH_DOC_BY_ID_LARGE,
7790 						fts_init_recover_doc, get_doc);
7791 		}
7792 	}
7793 
7794 	table->fts->fts_status |= ADDED_TABLE_SYNCED;
7795 
7796 	fts_get_docs_clear(cache->get_docs);
7797 
7798 func_exit:
7799 	if (!has_cache_lock) {
7800 		rw_lock_x_unlock(&cache->lock);
7801 	}
7802 
7803 	if (need_init) {
7804 		mutex_enter(&dict_sys->mutex);
7805 		/* Register the table with the optimize thread. */
7806 		fts_optimize_add_table(table);
7807 		mutex_exit(&dict_sys->mutex);
7808 	}
7809 
7810 	return(TRUE);
7811 }
7812