1 /*****************************************************************************
2 
3 Copyright (c) 2011, 2021, Oracle and/or its affiliates.
4 Copyright (c) 2016, 2022, MariaDB Corporation.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /**************************************************//**
21 @file fts/fts0fts.cc
22 Full Text Search interface
23 ***********************************************************************/
24 
25 #include "trx0roll.h"
26 #include "row0mysql.h"
27 #include "row0upd.h"
28 #include "dict0types.h"
29 #include "dict0stats_bg.h"
30 #include "row0sel.h"
31 #include "fts0fts.h"
32 #include "fts0priv.h"
33 #include "fts0types.h"
34 #include "fts0types.inl"
35 #include "fts0vlc.h"
36 #include "fts0plugin.h"
37 #include "dict0priv.h"
38 #include "dict0stats.h"
39 #include "btr0pcur.h"
40 #include "sync0sync.h"
41 
42 static const ulint FTS_MAX_ID_LEN = 32;
43 
44 /** Column name from the FTS config table */
45 #define FTS_MAX_CACHE_SIZE_IN_MB	"cache_size_in_mb"
46 
47 /** Verify if a aux table name is a obsolete table
48 by looking up the key word in the obsolete table names */
49 #define FTS_IS_OBSOLETE_AUX_TABLE(table_name)			\
50 	(strstr((table_name), "DOC_ID") != NULL			\
51 	 || strstr((table_name), "ADDED") != NULL		\
52 	 || strstr((table_name), "STOPWORDS") != NULL)
53 
54 /** This is maximum FTS cache for each table and would be
55 a configurable variable */
56 ulong	fts_max_cache_size;
57 
58 /** Whether the total memory used for FTS cache is exhausted, and we will
59 need a sync to free some memory */
60 bool	fts_need_sync = false;
61 
62 /** Variable specifying the total memory allocated for FTS cache */
63 ulong	fts_max_total_cache_size;
64 
65 /** This is FTS result cache limit for each query and would be
66 a configurable variable */
67 size_t	fts_result_cache_limit;
68 
69 /** Variable specifying the maximum FTS max token size */
70 ulong	fts_max_token_size;
71 
72 /** Variable specifying the minimum FTS max token size */
73 ulong	fts_min_token_size;
74 
75 
76 // FIXME: testing
77 static time_t elapsed_time;
78 static ulint n_nodes;
79 
80 #ifdef FTS_CACHE_SIZE_DEBUG
81 /** The cache size permissible lower limit (1K) */
82 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
83 
84 /** The cache size permissible upper limit (1G) */
85 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
86 #endif
87 
88 /** Time to sleep after DEADLOCK error before retrying operation. */
89 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
90 
91 /** InnoDB default stopword list:
92 There are different versions of stopwords, the stop words listed
93 below comes from "Google Stopword" list. Reference:
94 http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
95 The final version of InnoDB default stopword list is still pending
96 for decision */
97 const char *fts_default_stopword[] =
98 {
99 	"a",
100 	"about",
101 	"an",
102 	"are",
103 	"as",
104 	"at",
105 	"be",
106 	"by",
107 	"com",
108 	"de",
109 	"en",
110 	"for",
111 	"from",
112 	"how",
113 	"i",
114 	"in",
115 	"is",
116 	"it",
117 	"la",
118 	"of",
119 	"on",
120 	"or",
121 	"that",
122 	"the",
123 	"this",
124 	"to",
125 	"was",
126 	"what",
127 	"when",
128 	"where",
129 	"who",
130 	"will",
131 	"with",
132 	"und",
133 	"the",
134 	"www",
135 	NULL
136 };
137 
138 /** For storing table info when checking for orphaned tables. */
139 struct fts_aux_table_t {
140 	table_id_t	id;		/*!< Table id */
141 	table_id_t	parent_id;	/*!< Parent table id */
142 	table_id_t	index_id;	/*!< Table FT index id */
143 	char*		name;		/*!< Name of the table */
144 };
145 
146 /** FTS auxiliary table suffixes that are common to all FT indexes. */
147 const char* fts_common_tables[] = {
148 	"BEING_DELETED",
149 	"BEING_DELETED_CACHE",
150 	"CONFIG",
151 	"DELETED",
152 	"DELETED_CACHE",
153 	NULL
154 };
155 
156 /** FTS auxiliary INDEX split intervals. */
157 const  fts_index_selector_t fts_index_selector[] = {
158 	{ 9, "INDEX_1" },
159 	{ 65, "INDEX_2" },
160 	{ 70, "INDEX_3" },
161 	{ 75, "INDEX_4" },
162 	{ 80, "INDEX_5" },
163 	{ 85, "INDEX_6" },
164 	{  0 , NULL	 }
165 };
166 
167 /** Default config values for FTS indexes on a table. */
168 static const char* fts_config_table_insert_values_sql =
169 	"BEGIN\n"
170 	"\n"
171 	"INSERT INTO $config_table VALUES('"
172 		FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
173 	""
174 	"INSERT INTO $config_table VALUES('"
175 		FTS_OPTIMIZE_LIMIT_IN_SECS  "', '180');\n"
176 	""
177 	"INSERT INTO $config_table VALUES ('"
178 		FTS_SYNCED_DOC_ID "', '0');\n"
179 	""
180 	"INSERT INTO $config_table VALUES ('"
181 		FTS_TOTAL_DELETED_COUNT "', '0');\n"
182 	"" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
183 	"INSERT INTO $config_table VALUES ('"
184 		FTS_TABLE_STATE "', '0');\n";
185 
186 /** FTS tokenize parmameter for plugin parser */
187 struct fts_tokenize_param_t {
188 	fts_doc_t*	result_doc;	/*!< Result doc for tokens */
189 	ulint		add_pos;	/*!< Added position for tokens */
190 };
191 
192 /** Run SYNC on the table, i.e., write out data from the cache to the
193 FTS auxiliary INDEX table and clear the cache at the end.
194 @param[in,out]	sync		sync state
195 @param[in]	unlock_cache	whether unlock cache lock when write node
196 @param[in]	wait		whether wait when a sync is in progress
197 @return DB_SUCCESS if all OK */
198 static
199 dberr_t
200 fts_sync(
201 	fts_sync_t*	sync,
202 	bool		unlock_cache,
203 	bool		wait);
204 
205 /****************************************************************//**
206 Release all resources help by the words rb tree e.g., the node ilist. */
207 static
208 void
209 fts_words_free(
210 /*===========*/
211 	ib_rbt_t*	words)		/*!< in: rb tree of words */
212 	MY_ATTRIBUTE((nonnull));
213 #ifdef FTS_CACHE_SIZE_DEBUG
214 /****************************************************************//**
215 Read the max cache size parameter from the config table. */
216 static
217 void
218 fts_update_max_cache_size(
219 /*======================*/
220 	fts_sync_t*	sync);		/*!< in: sync state */
221 #endif
222 
223 /*********************************************************************//**
224 This function fetches the document just inserted right before
225 we commit the transaction, and tokenize the inserted text data
226 and insert into FTS auxiliary table and its cache.
227 @return TRUE if successful */
228 static
229 ulint
230 fts_add_doc_by_id(
231 /*==============*/
232 	fts_trx_table_t*ftt,		/*!< in: FTS trx table */
233 	doc_id_t	doc_id);	/*!< in: doc id */
234 /******************************************************************//**
235 Update the last document id. This function could create a new
236 transaction to update the last document id.
237 @return DB_SUCCESS if OK */
238 static
239 dberr_t
240 fts_update_sync_doc_id(
241 /*===================*/
242 	const dict_table_t*	table,		/*!< in: table */
243 	doc_id_t		doc_id,		/*!< in: last document id */
244 	trx_t*			trx)		/*!< in: update trx, or NULL */
245 	MY_ATTRIBUTE((nonnull(1)));
246 
247 /** Tokenize a document.
248 @param[in,out]	doc	document to tokenize
249 @param[out]	result	tokenization result
250 @param[in]	parser	pluggable parser */
251 static
252 void
253 fts_tokenize_document(
254 	fts_doc_t*		doc,
255 	fts_doc_t*		result,
256 	st_mysql_ftparser*	parser);
257 
258 /** Continue to tokenize a document.
259 @param[in,out]	doc	document to tokenize
260 @param[in]	add_pos	add this position to all tokens from this tokenization
261 @param[out]	result	tokenization result
262 @param[in]	parser	pluggable parser */
263 static
264 void
265 fts_tokenize_document_next(
266 	fts_doc_t*		doc,
267 	ulint			add_pos,
268 	fts_doc_t*		result,
269 	st_mysql_ftparser*	parser);
270 
271 /** Create the vector of fts_get_doc_t instances.
272 @param[in,out]	cache	fts cache
273 @return	vector of fts_get_doc_t instances */
274 static
275 ib_vector_t*
276 fts_get_docs_create(
277 	fts_cache_t*	cache);
278 
279 /** Free the FTS cache.
280 @param[in,out]	cache to be freed */
281 static
282 void
fts_cache_destroy(fts_cache_t * cache)283 fts_cache_destroy(fts_cache_t* cache)
284 {
285 	rw_lock_free(&cache->lock);
286 	rw_lock_free(&cache->init_lock);
287 	mutex_free(&cache->deleted_lock);
288 	mutex_free(&cache->doc_id_lock);
289 	os_event_destroy(cache->sync->event);
290 
291 	if (cache->stopword_info.cached_stopword) {
292 		rbt_free(cache->stopword_info.cached_stopword);
293 	}
294 
295 	if (cache->sync_heap->arg) {
296 		mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
297 	}
298 
299 	mem_heap_free(cache->cache_heap);
300 }
301 
302 /** Get a character set based on precise type.
303 @param prtype precise type
304 @return the corresponding character set */
305 UNIV_INLINE
306 CHARSET_INFO*
fts_get_charset(ulint prtype)307 fts_get_charset(ulint prtype)
308 {
309 #ifdef UNIV_DEBUG
310 	switch (prtype & DATA_MYSQL_TYPE_MASK) {
311 	case MYSQL_TYPE_BIT:
312 	case MYSQL_TYPE_STRING:
313 	case MYSQL_TYPE_VAR_STRING:
314 	case MYSQL_TYPE_TINY_BLOB:
315 	case MYSQL_TYPE_MEDIUM_BLOB:
316 	case MYSQL_TYPE_BLOB:
317 	case MYSQL_TYPE_LONG_BLOB:
318 	case MYSQL_TYPE_VARCHAR:
319 		break;
320 	default:
321 		ut_error;
322 	}
323 #endif /* UNIV_DEBUG */
324 
325 	uint cs_num = (uint) dtype_get_charset_coll(prtype);
326 
327 	if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
328 		return(cs);
329 	}
330 
331 	ib::fatal() << "Unable to find charset-collation " << cs_num;
332 	return(NULL);
333 }
334 
335 /****************************************************************//**
336 This function loads the default InnoDB stopword list */
337 static
338 void
fts_load_default_stopword(fts_stopword_t * stopword_info)339 fts_load_default_stopword(
340 /*======================*/
341 	fts_stopword_t*		stopword_info)	/*!< in: stopword info */
342 {
343 	fts_string_t		str;
344 	mem_heap_t*		heap;
345 	ib_alloc_t*		allocator;
346 	ib_rbt_t*		stop_words;
347 
348 	allocator = stopword_info->heap;
349 	heap = static_cast<mem_heap_t*>(allocator->arg);
350 
351 	if (!stopword_info->cached_stopword) {
352 		stopword_info->cached_stopword = rbt_create_arg_cmp(
353 			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
354 			&my_charset_latin1);
355 	}
356 
357 	stop_words = stopword_info->cached_stopword;
358 
359 	str.f_n_char = 0;
360 
361 	for (ulint i = 0; fts_default_stopword[i]; ++i) {
362 		char*			word;
363 		fts_tokenizer_word_t	new_word;
364 
365 		/* We are going to duplicate the value below. */
366 		word = const_cast<char*>(fts_default_stopword[i]);
367 
368 		new_word.nodes = ib_vector_create(
369 			allocator, sizeof(fts_node_t), 4);
370 
371 		str.f_len = ut_strlen(word);
372 		str.f_str = reinterpret_cast<byte*>(word);
373 
374 		fts_string_dup(&new_word.text, &str, heap);
375 
376 		rbt_insert(stop_words, &new_word, &new_word);
377 	}
378 
379 	stopword_info->status = STOPWORD_FROM_DEFAULT;
380 }
381 
382 /****************************************************************//**
383 Callback function to read a single stopword value.
384 @return Always return TRUE */
385 static
386 ibool
fts_read_stopword(void * row,void * user_arg)387 fts_read_stopword(
388 /*==============*/
389 	void*		row,		/*!< in: sel_node_t* */
390 	void*		user_arg)	/*!< in: pointer to ib_vector_t */
391 {
392 	ib_alloc_t*	allocator;
393 	fts_stopword_t*	stopword_info;
394 	sel_node_t*	sel_node;
395 	que_node_t*	exp;
396 	ib_rbt_t*	stop_words;
397 	dfield_t*	dfield;
398 	fts_string_t	str;
399 	mem_heap_t*	heap;
400 	ib_rbt_bound_t	parent;
401 
402 	sel_node = static_cast<sel_node_t*>(row);
403 	stopword_info = static_cast<fts_stopword_t*>(user_arg);
404 
405 	stop_words = stopword_info->cached_stopword;
406 	allocator =  static_cast<ib_alloc_t*>(stopword_info->heap);
407 	heap = static_cast<mem_heap_t*>(allocator->arg);
408 
409 	exp = sel_node->select_list;
410 
411 	/* We only need to read the first column */
412 	dfield = que_node_get_val(exp);
413 
414 	str.f_n_char = 0;
415 	str.f_str = static_cast<byte*>(dfield_get_data(dfield));
416 	str.f_len = dfield_get_len(dfield);
417 
418 	/* Only create new node if it is a value not already existed */
419 	if (str.f_len != UNIV_SQL_NULL
420 	    && rbt_search(stop_words, &parent, &str) != 0) {
421 
422 		fts_tokenizer_word_t	new_word;
423 
424 		new_word.nodes = ib_vector_create(
425 			allocator, sizeof(fts_node_t), 4);
426 
427 		new_word.text.f_str = static_cast<byte*>(
428 			 mem_heap_alloc(heap, str.f_len + 1));
429 
430 		memcpy(new_word.text.f_str, str.f_str, str.f_len);
431 
432 		new_word.text.f_n_char = 0;
433 		new_word.text.f_len = str.f_len;
434 		new_word.text.f_str[str.f_len] = 0;
435 
436 		rbt_insert(stop_words, &new_word, &new_word);
437 	}
438 
439 	return(TRUE);
440 }
441 
442 /******************************************************************//**
443 Load user defined stopword from designated user table
444 @return whether the operation is successful */
445 static
446 bool
fts_load_user_stopword(fts_t * fts,const char * stopword_table_name,fts_stopword_t * stopword_info)447 fts_load_user_stopword(
448 /*===================*/
449 	fts_t*		fts,			/*!< in: FTS struct */
450 	const char*	stopword_table_name,	/*!< in: Stopword table
451 						name */
452 	fts_stopword_t*	stopword_info)		/*!< in: Stopword info */
453 {
454 	if (!fts->dict_locked) {
455 		mutex_enter(&dict_sys->mutex);
456 	}
457 
458 	/* Validate the user table existence in the right format */
459 	bool ret= false;
460 	stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
461 	if (!stopword_info->charset) {
462 cleanup:
463 		if (!fts->dict_locked) {
464 			mutex_exit(&dict_sys->mutex);
465 		}
466 
467 		return ret;
468 	}
469 
470 	trx_t* trx = trx_create();
471 	trx->op_info = "Load user stopword table into FTS cache";
472 
473 	if (!stopword_info->cached_stopword) {
474 		/* Create the stopword RB tree with the stopword column
475 		charset. All comparison will use this charset */
476 		stopword_info->cached_stopword = rbt_create_arg_cmp(
477 			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
478 			(void*)stopword_info->charset);
479 
480 	}
481 
482 	pars_info_t* info = pars_info_create();
483 
484 	pars_info_bind_id(info, "table_stopword", stopword_table_name);
485 
486 	pars_info_bind_function(info, "my_func", fts_read_stopword,
487 				stopword_info);
488 
489 	que_t* graph = fts_parse_sql_no_dict_lock(
490 		info,
491 		"DECLARE FUNCTION my_func;\n"
492 		"DECLARE CURSOR c IS"
493 		" SELECT value"
494 		" FROM $table_stopword;\n"
495 		"BEGIN\n"
496 		"\n"
497 		"OPEN c;\n"
498 		"WHILE 1 = 1 LOOP\n"
499 		"  FETCH c INTO my_func();\n"
500 		"  IF c % NOTFOUND THEN\n"
501 		"    EXIT;\n"
502 		"  END IF;\n"
503 		"END LOOP;\n"
504 		"CLOSE c;");
505 
506 	for (;;) {
507 		dberr_t error = fts_eval_sql(trx, graph);
508 
509 		if (UNIV_LIKELY(error == DB_SUCCESS)) {
510 			fts_sql_commit(trx);
511 			stopword_info->status = STOPWORD_USER_TABLE;
512 			break;
513 		} else {
514 			fts_sql_rollback(trx);
515 
516 			if (error == DB_LOCK_WAIT_TIMEOUT) {
517 				ib::warn() << "Lock wait timeout reading user"
518 					" stopword table. Retrying!";
519 
520 				trx->error_state = DB_SUCCESS;
521 			} else {
522 				ib::error() << "Error '" << error
523 					<< "' while reading user stopword"
524 					" table.";
525 				ret = FALSE;
526 				break;
527 			}
528 		}
529 	}
530 
531 	que_graph_free(graph);
532 	trx->free();
533 	ret = true;
534 	goto cleanup;
535 }
536 
537 /******************************************************************//**
538 Initialize the index cache. */
539 static
540 void
fts_index_cache_init(ib_alloc_t * allocator,fts_index_cache_t * index_cache)541 fts_index_cache_init(
542 /*=================*/
543 	ib_alloc_t*		allocator,	/*!< in: the allocator to use */
544 	fts_index_cache_t*	index_cache)	/*!< in: index cache */
545 {
546 	ulint			i;
547 
548 	ut_a(index_cache->words == NULL);
549 
550 	index_cache->words = rbt_create_arg_cmp(
551 		sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
552 		(void*) index_cache->charset);
553 
554 	ut_a(index_cache->doc_stats == NULL);
555 
556 	index_cache->doc_stats = ib_vector_create(
557 		allocator, sizeof(fts_doc_stats_t), 4);
558 
559 	for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
560 		ut_a(index_cache->ins_graph[i] == NULL);
561 		ut_a(index_cache->sel_graph[i] == NULL);
562 	}
563 }
564 
565 /*********************************************************************//**
566 Initialize FTS cache. */
567 void
fts_cache_init(fts_cache_t * cache)568 fts_cache_init(
569 /*===========*/
570 	fts_cache_t*	cache)		/*!< in: cache to initialize */
571 {
572 	ulint		i;
573 
574 	/* Just to make sure */
575 	ut_a(cache->sync_heap->arg == NULL);
576 
577 	cache->sync_heap->arg = mem_heap_create(1024);
578 
579 	cache->total_size = 0;
580 	cache->total_size_at_sync = 0;
581 
582 	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
583 	cache->deleted_doc_ids = ib_vector_create(
584 		cache->sync_heap, sizeof(doc_id_t), 4);
585 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
586 
587 	/* Reset the cache data for all the FTS indexes. */
588 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
589 		fts_index_cache_t*	index_cache;
590 
591 		index_cache = static_cast<fts_index_cache_t*>(
592 			ib_vector_get(cache->indexes, i));
593 
594 		fts_index_cache_init(cache->sync_heap, index_cache);
595 	}
596 }
597 
598 /****************************************************************//**
599 Create a FTS cache. */
600 fts_cache_t*
fts_cache_create(dict_table_t * table)601 fts_cache_create(
602 /*=============*/
603 	dict_table_t*	table)	/*!< in: table owns the FTS cache */
604 {
605 	mem_heap_t*	heap;
606 	fts_cache_t*	cache;
607 
608 	heap = static_cast<mem_heap_t*>(mem_heap_create(512));
609 
610 	cache = static_cast<fts_cache_t*>(
611 		mem_heap_zalloc(heap, sizeof(*cache)));
612 
613 	cache->cache_heap = heap;
614 
615 	rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
616 
617 	rw_lock_create(
618 		fts_cache_init_rw_lock_key, &cache->init_lock,
619 		SYNC_FTS_CACHE_INIT);
620 
621 	mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
622 
623 	mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
624 
625 	/* This is the heap used to create the cache itself. */
626 	cache->self_heap = ib_heap_allocator_create(heap);
627 
628 	/* This is a transient heap, used for storing sync data. */
629 	cache->sync_heap = ib_heap_allocator_create(heap);
630 	cache->sync_heap->arg = NULL;
631 
632 	cache->sync = static_cast<fts_sync_t*>(
633 		mem_heap_zalloc(heap, sizeof(fts_sync_t)));
634 
635 	cache->sync->table = table;
636 	cache->sync->event = os_event_create(0);
637 
638 	/* Create the index cache vector that will hold the inverted indexes. */
639 	cache->indexes = ib_vector_create(
640 		cache->self_heap, sizeof(fts_index_cache_t), 2);
641 
642 	fts_cache_init(cache);
643 
644 	cache->stopword_info.cached_stopword = NULL;
645 	cache->stopword_info.charset = NULL;
646 
647 	cache->stopword_info.heap = cache->self_heap;
648 
649 	cache->stopword_info.status = STOPWORD_NOT_INIT;
650 
651 	return(cache);
652 }
653 
654 /*******************************************************************//**
655 Add a newly create index into FTS cache */
656 void
fts_add_index(dict_index_t * index,dict_table_t * table)657 fts_add_index(
658 /*==========*/
659 	dict_index_t*	index,		/*!< FTS index to be added */
660 	dict_table_t*	table)		/*!< table */
661 {
662 	fts_t*			fts = table->fts;
663 	fts_cache_t*		cache;
664 	fts_index_cache_t*	index_cache;
665 
666 	ut_ad(fts);
667 	cache = table->fts->cache;
668 
669 	rw_lock_x_lock(&cache->init_lock);
670 
671 	ib_vector_push(fts->indexes, &index);
672 
673 	index_cache = fts_find_index_cache(cache, index);
674 
675 	if (!index_cache) {
676 		/* Add new index cache structure */
677 		index_cache = fts_cache_index_cache_create(table, index);
678 	}
679 
680 	rw_lock_x_unlock(&cache->init_lock);
681 }
682 
683 /*******************************************************************//**
684 recalibrate get_doc structure after index_cache in cache->indexes changed */
685 static
686 void
fts_reset_get_doc(fts_cache_t * cache)687 fts_reset_get_doc(
688 /*==============*/
689 	fts_cache_t*	cache)	/*!< in: FTS index cache */
690 {
691 	fts_get_doc_t*  get_doc;
692 	ulint		i;
693 
694 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
695 
696 	ib_vector_reset(cache->get_docs);
697 
698 	for (i = 0; i < ib_vector_size(cache->indexes); i++) {
699 		fts_index_cache_t*	ind_cache;
700 
701 		ind_cache = static_cast<fts_index_cache_t*>(
702 			ib_vector_get(cache->indexes, i));
703 
704 		get_doc = static_cast<fts_get_doc_t*>(
705 			ib_vector_push(cache->get_docs, NULL));
706 
707 		memset(get_doc, 0x0, sizeof(*get_doc));
708 
709 		get_doc->index_cache = ind_cache;
710 		get_doc->cache = cache;
711 	}
712 
713 	ut_ad(ib_vector_size(cache->get_docs)
714 	      == ib_vector_size(cache->indexes));
715 }
716 
717 /*******************************************************************//**
718 Check an index is in the table->indexes list
719 @return TRUE if it exists */
720 static
721 ibool
fts_in_dict_index(dict_table_t * table,dict_index_t * index_check)722 fts_in_dict_index(
723 /*==============*/
724 	dict_table_t*	table,		/*!< in: Table */
725 	dict_index_t*	index_check)	/*!< in: index to be checked */
726 {
727 	dict_index_t*	index;
728 
729 	for (index = dict_table_get_first_index(table);
730 	     index != NULL;
731 	     index = dict_table_get_next_index(index)) {
732 
733 		if (index == index_check) {
734 			return(TRUE);
735 		}
736 	}
737 
738 	return(FALSE);
739 }
740 
741 /*******************************************************************//**
742 Check an index is in the fts->cache->indexes list
743 @return TRUE if it exists */
744 static
745 ibool
fts_in_index_cache(dict_table_t * table,dict_index_t * index)746 fts_in_index_cache(
747 /*===============*/
748 	dict_table_t*	table,	/*!< in: Table */
749 	dict_index_t*	index)	/*!< in: index to be checked */
750 {
751 	ulint	i;
752 
753 	for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
754 		fts_index_cache_t*      index_cache;
755 
756 		index_cache = static_cast<fts_index_cache_t*>(
757 			ib_vector_get(table->fts->cache->indexes, i));
758 
759 		if (index_cache->index == index) {
760 			return(TRUE);
761 		}
762 	}
763 
764 	return(FALSE);
765 }
766 
767 /*******************************************************************//**
768 Check indexes in the fts->indexes is also present in index cache and
769 table->indexes list
770 @return TRUE if all indexes match */
771 ibool
fts_check_cached_index(dict_table_t * table)772 fts_check_cached_index(
773 /*===================*/
774 	dict_table_t*	table)	/*!< in: Table where indexes are dropped */
775 {
776 	ulint	i;
777 
778 	if (!table->fts || !table->fts->cache) {
779 		return(TRUE);
780 	}
781 
782 	ut_a(ib_vector_size(table->fts->indexes)
783 	      == ib_vector_size(table->fts->cache->indexes));
784 
785 	for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
786 		dict_index_t*	index;
787 
788 		index = static_cast<dict_index_t*>(
789 			ib_vector_getp(table->fts->indexes, i));
790 
791 		if (!fts_in_index_cache(table, index)) {
792 			return(FALSE);
793 		}
794 
795 		if (!fts_in_dict_index(table, index)) {
796 			return(FALSE);
797 		}
798 	}
799 
800 	return(TRUE);
801 }
802 
803 /** Clear all fts resources when there is no internal DOC_ID
804 and there are no new fts index to add.
805 @param[in,out]	table	table  where fts is to be freed
806 @param[in]	trx	transaction to drop all fts tables */
fts_clear_all(dict_table_t * table,trx_t * trx)807 void fts_clear_all(dict_table_t *table, trx_t *trx)
808 {
809   if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) ||
810       !table->fts ||
811       !ib_vector_is_empty(table->fts->indexes))
812     return;
813 
814   for (const dict_index_t *index= dict_table_get_first_index(table);
815        index; index= dict_table_get_next_index(index))
816     if (index->type & DICT_FTS)
817       return;
818 
819   fts_optimize_remove_table(table);
820 
821   fts_drop_tables(trx, table);
822   fts_free(table);
823   DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
824 }
825 
826 /*******************************************************************//**
827 Drop auxiliary tables related to an FTS index
828 @return DB_SUCCESS or error number */
829 dberr_t
fts_drop_index(dict_table_t * table,dict_index_t * index,trx_t * trx)830 fts_drop_index(
831 /*===========*/
832 	dict_table_t*	table,	/*!< in: Table where indexes are dropped */
833 	dict_index_t*	index,	/*!< in: Index to be dropped */
834 	trx_t*		trx)	/*!< in: Transaction for the drop */
835 {
836 	ib_vector_t*	indexes = table->fts->indexes;
837 	dberr_t		err = DB_SUCCESS;
838 
839 	ut_a(indexes);
840 
841 	if ((ib_vector_size(indexes) == 1
842 	     && (index == static_cast<dict_index_t*>(
843 			ib_vector_getp(table->fts->indexes, 0)))
844 	     && DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID))
845 	    || ib_vector_is_empty(indexes)) {
846 		doc_id_t	current_doc_id;
847 		doc_id_t	first_doc_id;
848 
849 		/* If we are dropping the only FTS index of the table,
850 		remove it from optimize thread */
851 		fts_optimize_remove_table(table);
852 
853 		DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
854 
855 		while (index->index_fts_syncing
856 		       && !trx_is_interrupted(trx)) {
857 			DICT_BG_YIELD(trx);
858 		}
859 
860 		current_doc_id = table->fts->cache->next_doc_id;
861 		first_doc_id = table->fts->cache->first_doc_id;
862 		fts_cache_clear(table->fts->cache);
863 		fts_cache_destroy(table->fts->cache);
864 		table->fts->cache = fts_cache_create(table);
865 		table->fts->cache->next_doc_id = current_doc_id;
866 		table->fts->cache->first_doc_id = first_doc_id;
867 	} else {
868 		fts_cache_t*            cache = table->fts->cache;
869 		fts_index_cache_t*      index_cache;
870 
871 		rw_lock_x_lock(&cache->init_lock);
872 
873 		index_cache = fts_find_index_cache(cache, index);
874 
875 		if (index_cache != NULL) {
876 			while (index->index_fts_syncing
877 			       && !trx_is_interrupted(trx)) {
878 				DICT_BG_YIELD(trx);
879 			}
880 			if (index_cache->words) {
881 				fts_words_free(index_cache->words);
882 				rbt_free(index_cache->words);
883 			}
884 
885 			ib_vector_remove(cache->indexes, *(void**) index_cache);
886 		}
887 
888 		if (cache->get_docs) {
889 			fts_reset_get_doc(cache);
890 		}
891 
892 		rw_lock_x_unlock(&cache->init_lock);
893 	}
894 
895 	err = fts_drop_index_tables(trx, index);
896 
897 	ib_vector_remove(indexes, (const void*) index);
898 
899 	return(err);
900 }
901 
902 /****************************************************************//**
903 Free the query graph but check whether dict_sys->mutex is already
904 held */
905 void
fts_que_graph_free_check_lock(fts_table_t * fts_table,const fts_index_cache_t * index_cache,que_t * graph)906 fts_que_graph_free_check_lock(
907 /*==========================*/
908 	fts_table_t*		fts_table,	/*!< in: FTS table */
909 	const fts_index_cache_t*index_cache,	/*!< in: FTS index cache */
910 	que_t*			graph)		/*!< in: query graph */
911 {
912 	bool	has_dict = FALSE;
913 
914 	if (fts_table && fts_table->table) {
915 		ut_ad(fts_table->table->fts);
916 
917 		has_dict = fts_table->table->fts->dict_locked;
918 	} else if (index_cache) {
919 		ut_ad(index_cache->index->table->fts);
920 
921 		has_dict = index_cache->index->table->fts->dict_locked;
922 	}
923 
924 	if (!has_dict) {
925 		mutex_enter(&dict_sys->mutex);
926 	}
927 
928 	ut_ad(mutex_own(&dict_sys->mutex));
929 
930 	que_graph_free(graph);
931 
932 	if (!has_dict) {
933 		mutex_exit(&dict_sys->mutex);
934 	}
935 }
936 
937 /****************************************************************//**
938 Create an FTS index cache. */
939 CHARSET_INFO*
fts_index_get_charset(dict_index_t * index)940 fts_index_get_charset(
941 /*==================*/
942 	dict_index_t*		index)		/*!< in: FTS index */
943 {
944 	CHARSET_INFO*		charset = NULL;
945 	dict_field_t*		field;
946 	ulint			prtype;
947 
948 	field = dict_index_get_nth_field(index, 0);
949 	prtype = field->col->prtype;
950 
951 	charset = fts_get_charset(prtype);
952 
953 #ifdef FTS_DEBUG
954 	/* Set up charset info for this index. Please note all
955 	field of the FTS index should have the same charset */
956 	for (i = 1; i < index->n_fields; i++) {
957 		CHARSET_INFO*   fld_charset;
958 
959 		field = dict_index_get_nth_field(index, i);
960 		prtype = field->col->prtype;
961 
962 		fld_charset = fts_get_charset(prtype);
963 
964 		/* All FTS columns should have the same charset */
965 		if (charset) {
966 			ut_a(charset == fld_charset);
967 		} else {
968 			charset = fld_charset;
969 		}
970 	}
971 #endif
972 
973 	return(charset);
974 
975 }
976 /****************************************************************//**
977 Create an FTS index cache.
978 @return Index Cache */
979 fts_index_cache_t*
fts_cache_index_cache_create(dict_table_t * table,dict_index_t * index)980 fts_cache_index_cache_create(
981 /*=========================*/
982 	dict_table_t*		table,		/*!< in: table with FTS index */
983 	dict_index_t*		index)		/*!< in: FTS index */
984 {
985 	ulint			n_bytes;
986 	fts_index_cache_t*	index_cache;
987 	fts_cache_t*		cache = table->fts->cache;
988 
989 	ut_a(cache != NULL);
990 
991 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
992 
993 	/* Must not already exist in the cache vector. */
994 	ut_a(fts_find_index_cache(cache, index) == NULL);
995 
996 	index_cache = static_cast<fts_index_cache_t*>(
997 		ib_vector_push(cache->indexes, NULL));
998 
999 	memset(index_cache, 0x0, sizeof(*index_cache));
1000 
1001 	index_cache->index = index;
1002 
1003 	index_cache->charset = fts_index_get_charset(index);
1004 
1005 	n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX;
1006 
1007 	index_cache->ins_graph = static_cast<que_t**>(
1008 		mem_heap_zalloc(static_cast<mem_heap_t*>(
1009 			cache->self_heap->arg), n_bytes));
1010 
1011 	index_cache->sel_graph = static_cast<que_t**>(
1012 		mem_heap_zalloc(static_cast<mem_heap_t*>(
1013 			cache->self_heap->arg), n_bytes));
1014 
1015 	fts_index_cache_init(cache->sync_heap, index_cache);
1016 
1017 	if (cache->get_docs) {
1018 		fts_reset_get_doc(cache);
1019 	}
1020 
1021 	return(index_cache);
1022 }
1023 
1024 /****************************************************************//**
1025 Release all resources help by the words rb tree e.g., the node ilist. */
1026 static
1027 void
fts_words_free(ib_rbt_t * words)1028 fts_words_free(
1029 /*===========*/
1030 	ib_rbt_t*	words)			/*!< in: rb tree of words */
1031 {
1032 	const ib_rbt_node_t*	rbt_node;
1033 
1034 	/* Free the resources held by a word. */
1035 	for (rbt_node = rbt_first(words);
1036 	     rbt_node != NULL;
1037 	     rbt_node = rbt_first(words)) {
1038 
1039 		ulint			i;
1040 		fts_tokenizer_word_t*	word;
1041 
1042 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
1043 
1044 		/* Free the ilists of this word. */
1045 		for (i = 0; i < ib_vector_size(word->nodes); ++i) {
1046 
1047 			fts_node_t* fts_node = static_cast<fts_node_t*>(
1048 				ib_vector_get(word->nodes, i));
1049 
1050 			ut_free(fts_node->ilist);
1051 			fts_node->ilist = NULL;
1052 		}
1053 
1054 		/* NOTE: We are responsible for free'ing the node */
1055 		ut_free(rbt_remove_node(words, rbt_node));
1056 	}
1057 }
1058 
1059 /** Clear cache.
1060 @param[in,out]	cache	fts cache */
1061 void
fts_cache_clear(fts_cache_t * cache)1062 fts_cache_clear(
1063 	fts_cache_t*	cache)
1064 {
1065 	ulint		i;
1066 
1067 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1068 		ulint			j;
1069 		fts_index_cache_t*	index_cache;
1070 
1071 		index_cache = static_cast<fts_index_cache_t*>(
1072 			ib_vector_get(cache->indexes, i));
1073 
1074 		fts_words_free(index_cache->words);
1075 
1076 		rbt_free(index_cache->words);
1077 
1078 		index_cache->words = NULL;
1079 
1080 		for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1081 
1082 			if (index_cache->ins_graph[j] != NULL) {
1083 
1084 				fts_que_graph_free_check_lock(
1085 					NULL, index_cache,
1086 					index_cache->ins_graph[j]);
1087 
1088 				index_cache->ins_graph[j] = NULL;
1089 			}
1090 
1091 			if (index_cache->sel_graph[j] != NULL) {
1092 
1093 				fts_que_graph_free_check_lock(
1094 					NULL, index_cache,
1095 					index_cache->sel_graph[j]);
1096 
1097 				index_cache->sel_graph[j] = NULL;
1098 			}
1099 		}
1100 
1101 		index_cache->doc_stats = NULL;
1102 	}
1103 
1104 	fts_need_sync = false;
1105 
1106 	cache->total_size = 0;
1107 
1108 	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
1109 	cache->deleted_doc_ids = NULL;
1110 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
1111 
1112 	mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1113 	cache->sync_heap->arg = NULL;
1114 }
1115 
1116 /*********************************************************************//**
1117 Search the index specific cache for a particular FTS index.
1118 @return the index cache else NULL */
1119 UNIV_INLINE
1120 fts_index_cache_t*
fts_get_index_cache(fts_cache_t * cache,const dict_index_t * index)1121 fts_get_index_cache(
1122 /*================*/
1123 	fts_cache_t*		cache,		/*!< in: cache to search */
1124 	const dict_index_t*	index)		/*!< in: index to search for */
1125 {
1126 	ulint			i;
1127 
1128 	ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_X)
1129 	      || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1130 
1131 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1132 		fts_index_cache_t*	index_cache;
1133 
1134 		index_cache = static_cast<fts_index_cache_t*>(
1135 			ib_vector_get(cache->indexes, i));
1136 
1137 		if (index_cache->index == index) {
1138 
1139 			return(index_cache);
1140 		}
1141 	}
1142 
1143 	return(NULL);
1144 }
1145 
1146 #ifdef FTS_DEBUG
1147 /*********************************************************************//**
1148 Search the index cache for a get_doc structure.
1149 @return the fts_get_doc_t item else NULL */
1150 static
1151 fts_get_doc_t*
fts_get_index_get_doc(fts_cache_t * cache,const dict_index_t * index)1152 fts_get_index_get_doc(
1153 /*==================*/
1154 	fts_cache_t*		cache,		/*!< in: cache to search */
1155 	const dict_index_t*	index)		/*!< in: index to search for */
1156 {
1157 	ulint			i;
1158 
1159 	ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1160 
1161 	for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
1162 		fts_get_doc_t*	get_doc;
1163 
1164 		get_doc = static_cast<fts_get_doc_t*>(
1165 			ib_vector_get(cache->get_docs, i));
1166 
1167 		if (get_doc->index_cache->index == index) {
1168 
1169 			return(get_doc);
1170 		}
1171 	}
1172 
1173 	return(NULL);
1174 }
1175 #endif
1176 
1177 /**********************************************************************//**
1178 Find an existing word, or if not found, create one and return it.
1179 @return specified word token */
1180 static
1181 fts_tokenizer_word_t*
fts_tokenizer_word_get(fts_cache_t * cache,fts_index_cache_t * index_cache,fts_string_t * text)1182 fts_tokenizer_word_get(
1183 /*===================*/
1184 	fts_cache_t*	cache,			/*!< in: cache */
1185 	fts_index_cache_t*
1186 			index_cache,		/*!< in: index cache */
1187 	fts_string_t*	text)			/*!< in: node text */
1188 {
1189 	fts_tokenizer_word_t*	word;
1190 	ib_rbt_bound_t		parent;
1191 
1192 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1193 
1194 	/* If it is a stopword, do not index it */
1195 	if (!fts_check_token(text,
1196 		    cache->stopword_info.cached_stopword,
1197 		    index_cache->charset)) {
1198 
1199 		return(NULL);
1200 	}
1201 
1202 	/* Check if we found a match, if not then add word to tree. */
1203 	if (rbt_search(index_cache->words, &parent, text) != 0) {
1204 		mem_heap_t*		heap;
1205 		fts_tokenizer_word_t	new_word;
1206 
1207 		heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
1208 
1209 		new_word.nodes = ib_vector_create(
1210 			cache->sync_heap, sizeof(fts_node_t), 4);
1211 
1212 		fts_string_dup(&new_word.text, text, heap);
1213 
1214 		parent.last = rbt_add_node(
1215 			index_cache->words, &parent, &new_word);
1216 
1217 		/* Take into account the RB tree memory use and the vector. */
1218 		cache->total_size += sizeof(new_word)
1219 			+ sizeof(ib_rbt_node_t)
1220 			+ text->f_len
1221 			+ (sizeof(fts_node_t) * 4)
1222 			+ sizeof(*new_word.nodes);
1223 
1224 		ut_ad(rbt_validate(index_cache->words));
1225 	}
1226 
1227 	word = rbt_value(fts_tokenizer_word_t, parent.last);
1228 
1229 	return(word);
1230 }
1231 
1232 /**********************************************************************//**
1233 Add the given doc_id/word positions to the given node's ilist. */
1234 void
fts_cache_node_add_positions(fts_cache_t * cache,fts_node_t * node,doc_id_t doc_id,ib_vector_t * positions)1235 fts_cache_node_add_positions(
1236 /*=========================*/
1237 	fts_cache_t*	cache,		/*!< in: cache */
1238 	fts_node_t*	node,		/*!< in: word node */
1239 	doc_id_t	doc_id,		/*!< in: doc id */
1240 	ib_vector_t*	positions)	/*!< in: fts_token_t::positions */
1241 {
1242 	ulint		i;
1243 	byte*		ptr;
1244 	byte*		ilist;
1245 	ulint		enc_len;
1246 	ulint		last_pos;
1247 	byte*		ptr_start;
1248 	doc_id_t	doc_id_delta;
1249 
1250 #ifdef UNIV_DEBUG
1251 	if (cache) {
1252 		ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1253 	}
1254 #endif /* UNIV_DEBUG */
1255 
1256 	ut_ad(doc_id >= node->last_doc_id);
1257 
1258 	/* Calculate the space required to store the ilist. */
1259 	doc_id_delta = doc_id - node->last_doc_id;
1260 	enc_len = fts_get_encoded_len(doc_id_delta);
1261 
1262 	last_pos = 0;
1263 	for (i = 0; i < ib_vector_size(positions); i++) {
1264 		ulint	pos = *(static_cast<ulint*>(
1265 			ib_vector_get(positions, i)));
1266 
1267 		ut_ad(last_pos == 0 || pos > last_pos);
1268 
1269 		enc_len += fts_get_encoded_len(pos - last_pos);
1270 		last_pos = pos;
1271 	}
1272 
1273 	/* The 0x00 byte at the end of the token positions list. */
1274 	enc_len++;
1275 
1276 	if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1277 		/* No need to allocate more space, we can fit in the new
1278 		data at the end of the old one. */
1279 		ilist = NULL;
1280 		ptr = node->ilist + node->ilist_size;
1281 	} else {
1282 		ulint	new_size = node->ilist_size + enc_len;
1283 
1284 		/* Over-reserve space by a fixed size for small lengths and
1285 		by 20% for lengths >= 48 bytes. */
1286 		if (new_size < 16) {
1287 			new_size = 16;
1288 		} else if (new_size < 32) {
1289 			new_size = 32;
1290 		} else if (new_size < 48) {
1291 			new_size = 48;
1292 		} else {
1293 			new_size = (ulint)(1.2 * new_size);
1294 		}
1295 
1296 		ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
1297 		ptr = ilist + node->ilist_size;
1298 
1299 		node->ilist_size_alloc = new_size;
1300 		if (cache) {
1301 			cache->total_size += new_size;
1302 		}
1303 	}
1304 
1305 	ptr_start = ptr;
1306 
1307 	/* Encode the new fragment. */
1308 	ptr = fts_encode_int(doc_id_delta, ptr);
1309 
1310 	last_pos = 0;
1311 	for (i = 0; i < ib_vector_size(positions); i++) {
1312 		ulint	pos = *(static_cast<ulint*>(
1313 			 ib_vector_get(positions, i)));
1314 
1315 		ptr = fts_encode_int(pos - last_pos, ptr);
1316 		last_pos = pos;
1317 	}
1318 
1319 	*ptr++ = 0;
1320 
1321 	ut_a(enc_len == (ulint)(ptr - ptr_start));
1322 
1323 	if (ilist) {
1324 		/* Copy old ilist to the start of the new one and switch the
1325 		new one into place in the node. */
1326 		if (node->ilist_size > 0) {
1327 			memcpy(ilist, node->ilist, node->ilist_size);
1328 			ut_free(node->ilist);
1329 			if (cache) {
1330 				cache->total_size -= node->ilist_size;
1331 			}
1332 		}
1333 
1334 		node->ilist = ilist;
1335 	}
1336 
1337 	node->ilist_size += enc_len;
1338 
1339 	if (node->first_doc_id == FTS_NULL_DOC_ID) {
1340 		node->first_doc_id = doc_id;
1341 	}
1342 
1343 	node->last_doc_id = doc_id;
1344 	++node->doc_count;
1345 }
1346 
1347 /**********************************************************************//**
1348 Add document to the cache. */
1349 static
1350 void
fts_cache_add_doc(fts_cache_t * cache,fts_index_cache_t * index_cache,doc_id_t doc_id,ib_rbt_t * tokens)1351 fts_cache_add_doc(
1352 /*==============*/
1353 	fts_cache_t*	cache,			/*!< in: cache */
1354 	fts_index_cache_t*
1355 			index_cache,		/*!< in: index cache */
1356 	doc_id_t	doc_id,			/*!< in: doc id to add */
1357 	ib_rbt_t*	tokens)			/*!< in: document tokens */
1358 {
1359 	const ib_rbt_node_t*	node;
1360 	ulint			n_words;
1361 	fts_doc_stats_t*	doc_stats;
1362 
1363 	if (!tokens) {
1364 		return;
1365 	}
1366 
1367 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1368 
1369 	n_words = rbt_size(tokens);
1370 
1371 	for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1372 
1373 		fts_tokenizer_word_t*	word;
1374 		fts_node_t*		fts_node = NULL;
1375 		fts_token_t*		token = rbt_value(fts_token_t, node);
1376 
1377 		/* Find and/or add token to the cache. */
1378 		word = fts_tokenizer_word_get(
1379 			cache, index_cache, &token->text);
1380 
1381 		if (!word) {
1382 			ut_free(rbt_remove_node(tokens, node));
1383 			continue;
1384 		}
1385 
1386 		if (ib_vector_size(word->nodes) > 0) {
1387 			fts_node = static_cast<fts_node_t*>(
1388 				ib_vector_last(word->nodes));
1389 		}
1390 
1391 		if (fts_node == NULL || fts_node->synced
1392 		    || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
1393 		    || doc_id < fts_node->last_doc_id) {
1394 
1395 			fts_node = static_cast<fts_node_t*>(
1396 				ib_vector_push(word->nodes, NULL));
1397 
1398 			memset(fts_node, 0x0, sizeof(*fts_node));
1399 
1400 			cache->total_size += sizeof(*fts_node);
1401 		}
1402 
1403 		fts_cache_node_add_positions(
1404 			cache, fts_node, doc_id, token->positions);
1405 
1406 		ut_free(rbt_remove_node(tokens, node));
1407 	}
1408 
1409 	ut_a(rbt_empty(tokens));
1410 
1411 	/* Add to doc ids processed so far. */
1412 	doc_stats = static_cast<fts_doc_stats_t*>(
1413 		ib_vector_push(index_cache->doc_stats, NULL));
1414 
1415 	doc_stats->doc_id = doc_id;
1416 	doc_stats->word_count = n_words;
1417 
1418 	/* Add the doc stats memory usage too. */
1419 	cache->total_size += sizeof(*doc_stats);
1420 
1421 	if (doc_id > cache->sync->max_doc_id) {
1422 		cache->sync->max_doc_id = doc_id;
1423 	}
1424 }
1425 
1426 /****************************************************************//**
1427 Drops a table. If the table can't be found we return a SUCCESS code.
1428 @return DB_SUCCESS or error code */
1429 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1430 dberr_t
fts_drop_table(trx_t * trx,const char * table_name)1431 fts_drop_table(
1432 /*===========*/
1433 	trx_t*		trx,			/*!< in: transaction */
1434 	const char*	table_name)		/*!< in: table to drop */
1435 {
1436 	dict_table_t*	table;
1437 	dberr_t		error = DB_SUCCESS;
1438 
1439 	/* Check that the table exists in our data dictionary.
1440 	Similar to regular drop table case, we will open table with
1441 	DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1442 	table = dict_table_open_on_name(
1443 		table_name, TRUE, FALSE,
1444 		static_cast<dict_err_ignore_t>(
1445                         DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
1446 
1447 	if (table != 0) {
1448 
1449 		dict_table_close(table, TRUE, FALSE);
1450 
1451 		/* Pass nonatomic=false (dont allow data dict unlock),
1452 		because the transaction may hold locks on SYS_* tables from
1453 		previous calls to fts_drop_table(). */
1454 		error = row_drop_table_for_mysql(table_name, trx,
1455 						 SQLCOM_DROP_DB, false, false);
1456 
1457 		if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
1458 			ib::error() << "Unable to drop FTS index aux table "
1459 				<< table_name << ": " << error;
1460 		}
1461 	} else {
1462 		error = DB_FAIL;
1463 	}
1464 
1465 	return(error);
1466 }
1467 
1468 /****************************************************************//**
1469 Rename a single auxiliary table due to database name change.
1470 @return DB_SUCCESS or error code */
1471 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1472 dberr_t
fts_rename_one_aux_table(const char * new_name,const char * fts_table_old_name,trx_t * trx)1473 fts_rename_one_aux_table(
1474 /*=====================*/
1475 	const char*	new_name,		/*!< in: new parent tbl name */
1476 	const char*	fts_table_old_name,	/*!< in: old aux tbl name */
1477 	trx_t*		trx)			/*!< in: transaction */
1478 {
1479 	char	fts_table_new_name[MAX_TABLE_NAME_LEN];
1480 	ulint	new_db_name_len = dict_get_db_name_len(new_name);
1481 	ulint	old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1482 	ulint	table_new_name_len = strlen(fts_table_old_name)
1483 				     + new_db_name_len - old_db_name_len;
1484 
1485 	/* Check if the new and old database names are the same, if so,
1486 	nothing to do */
1487 	ut_ad((new_db_name_len != old_db_name_len)
1488 	      || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1489 
1490 	/* Get the database name from "new_name", and table name
1491 	from the fts_table_old_name */
1492 	strncpy(fts_table_new_name, new_name, new_db_name_len);
1493 	strncpy(fts_table_new_name + new_db_name_len,
1494 	       strchr(fts_table_old_name, '/'),
1495 	       table_new_name_len - new_db_name_len);
1496 	fts_table_new_name[table_new_name_len] = 0;
1497 
1498 	return row_rename_table_for_mysql(
1499 		fts_table_old_name, fts_table_new_name, trx, false, false);
1500 }
1501 
1502 /****************************************************************//**
1503 Rename auxiliary tables for all fts index for a table. This(rename)
1504 is due to database name change
1505 @return DB_SUCCESS or error code */
1506 dberr_t
fts_rename_aux_tables(dict_table_t * table,const char * new_name,trx_t * trx)1507 fts_rename_aux_tables(
1508 /*==================*/
1509 	dict_table_t*	table,		/*!< in: user Table */
1510 	const char*     new_name,       /*!< in: new table name */
1511 	trx_t*		trx)		/*!< in: transaction */
1512 {
1513 	ulint		i;
1514 	fts_table_t	fts_table;
1515 
1516 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1517 
1518 	dberr_t err = DB_SUCCESS;
1519 	char old_table_name[MAX_FULL_NAME_LEN];
1520 
1521 	/* Rename common auxiliary tables */
1522 	for (i = 0; fts_common_tables[i] != NULL; ++i) {
1523 		fts_table.suffix = fts_common_tables[i];
1524 		fts_get_table_name(&fts_table, old_table_name, true);
1525 
1526 		err = fts_rename_one_aux_table(new_name, old_table_name, trx);
1527 
1528 		if (err != DB_SUCCESS) {
1529 			return(err);
1530 		}
1531 	}
1532 
1533 	fts_t*	fts = table->fts;
1534 
1535 	/* Rename index specific auxiliary tables */
1536 	for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1537 	     ++i) {
1538 		dict_index_t*	index;
1539 
1540 		index = static_cast<dict_index_t*>(
1541 			ib_vector_getp(fts->indexes, i));
1542 
1543 		FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1544 
1545 		for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1546 			fts_table.suffix = fts_get_suffix(j);
1547 			fts_get_table_name(&fts_table, old_table_name, true);
1548 
1549 			err = fts_rename_one_aux_table(
1550 				new_name, old_table_name, trx);
1551 
1552 			DBUG_EXECUTE_IF("fts_rename_failure",
1553 					err = DB_DEADLOCK;
1554 					fts_sql_rollback(trx););
1555 
1556 			if (err != DB_SUCCESS) {
1557 				return(err);
1558 			}
1559 		}
1560 	}
1561 
1562 	return(DB_SUCCESS);
1563 }
1564 
1565 /** Drops the common ancillary tables needed for supporting an FTS index
1566 on the given table. row_mysql_lock_data_dictionary must have been called
1567 before this.
1568 @param[in]	trx		transaction to drop fts common table
1569 @param[in]	fts_table	table with an FTS index
1570 @param[in]	drop_orphan	True if the function is used to drop
1571 				orphaned table
1572 @return DB_SUCCESS or error code */
1573 static dberr_t
fts_drop_common_tables(trx_t * trx,fts_table_t * fts_table,bool drop_orphan=false)1574 fts_drop_common_tables(
1575 	trx_t*		trx,
1576 	fts_table_t*	fts_table,
1577 	bool		drop_orphan=false)
1578 {
1579 	ulint		i;
1580 	dberr_t		error = DB_SUCCESS;
1581 
1582 	for (i = 0; fts_common_tables[i] != NULL; ++i) {
1583 		dberr_t	err;
1584 		char	table_name[MAX_FULL_NAME_LEN];
1585 
1586 		fts_table->suffix = fts_common_tables[i];
1587 		fts_get_table_name(fts_table, table_name, true);
1588 
1589 		err = fts_drop_table(trx, table_name);
1590 
1591 		/* We only return the status of the last error. */
1592 		if (err != DB_SUCCESS && err != DB_FAIL) {
1593 			error = err;
1594 		}
1595 
1596 		if (drop_orphan && err == DB_FAIL) {
1597 			char* path = fil_make_filepath(
1598 					NULL, table_name, IBD, false);
1599 			if (path != NULL) {
1600 				os_file_delete_if_exists(
1601 					innodb_data_file_key, path, NULL);
1602 				ut_free(path);
1603 			}
1604 		}
1605 	}
1606 
1607 	return(error);
1608 }
1609 
1610 /****************************************************************//**
1611 Since we do a horizontal split on the index table, we need to drop
1612 all the split tables.
1613 @return DB_SUCCESS or error code */
1614 static
1615 dberr_t
fts_drop_index_split_tables(trx_t * trx,dict_index_t * index)1616 fts_drop_index_split_tables(
1617 /*========================*/
1618 	trx_t*		trx,			/*!< in: transaction */
1619 	dict_index_t*	index)			/*!< in: fts instance */
1620 
1621 {
1622 	ulint		i;
1623 	fts_table_t	fts_table;
1624 	dberr_t		error = DB_SUCCESS;
1625 
1626 	FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1627 
1628 	for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1629 		dberr_t	err;
1630 		char	table_name[MAX_FULL_NAME_LEN];
1631 
1632 		fts_table.suffix = fts_get_suffix(i);
1633 		fts_get_table_name(&fts_table, table_name, true);
1634 
1635 		err = fts_drop_table(trx, table_name);
1636 
1637 		/* We only return the status of the last error. */
1638 		if (err != DB_SUCCESS && err != DB_FAIL) {
1639 			error = err;
1640 		}
1641 	}
1642 
1643 	return(error);
1644 }
1645 
1646 /****************************************************************//**
1647 Drops FTS auxiliary tables for an FTS index
1648 @return DB_SUCCESS or error code */
1649 dberr_t
fts_drop_index_tables(trx_t * trx,dict_index_t * index)1650 fts_drop_index_tables(
1651 /*==================*/
1652 	trx_t*		trx,		/*!< in: transaction */
1653 	dict_index_t*	index)		/*!< in: Index to drop */
1654 {
1655 	return(fts_drop_index_split_tables(trx, index));
1656 }
1657 
1658 /****************************************************************//**
1659 Drops FTS ancillary tables needed for supporting an FTS index
1660 on the given table. row_mysql_lock_data_dictionary must have been called
1661 before this.
1662 @return DB_SUCCESS or error code */
1663 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1664 dberr_t
fts_drop_all_index_tables(trx_t * trx,fts_t * fts)1665 fts_drop_all_index_tables(
1666 /*======================*/
1667 	trx_t*		trx,			/*!< in: transaction */
1668 	fts_t*		fts)			/*!< in: fts instance */
1669 {
1670 	dberr_t		error = DB_SUCCESS;
1671 
1672 	for (ulint i = 0;
1673 	     fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1674 	     ++i) {
1675 
1676 		dberr_t		err;
1677 		dict_index_t*	index;
1678 
1679 		index = static_cast<dict_index_t*>(
1680 			ib_vector_getp(fts->indexes, i));
1681 
1682 		err = fts_drop_index_tables(trx, index);
1683 
1684 		if (err != DB_SUCCESS) {
1685 			error = err;
1686 		}
1687 	}
1688 
1689 	return(error);
1690 }
1691 
1692 /*********************************************************************//**
1693 Drops the ancillary tables needed for supporting an FTS index on a
1694 given table. row_mysql_lock_data_dictionary must have been called before
1695 this.
1696 @return DB_SUCCESS or error code */
1697 dberr_t
fts_drop_tables(trx_t * trx,dict_table_t * table)1698 fts_drop_tables(
1699 /*============*/
1700 	trx_t*		trx,		/*!< in: transaction */
1701 	dict_table_t*	table)		/*!< in: table has the FTS index */
1702 {
1703 	dberr_t		error;
1704 	fts_table_t	fts_table;
1705 
1706 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1707 
1708 	/* TODO: This is not atomic and can cause problems during recovery. */
1709 
1710 	error = fts_drop_common_tables(trx, &fts_table);
1711 
1712 	if (error == DB_SUCCESS && table->fts) {
1713 		error = fts_drop_all_index_tables(trx, table->fts);
1714 	}
1715 
1716 	return(error);
1717 }
1718 
1719 /** Create dict_table_t object for FTS Aux tables.
1720 @param[in]	aux_table_name	FTS Aux table name
1721 @param[in]	table		table object of FTS Index
1722 @param[in]	n_cols		number of columns for FTS Aux table
1723 @return table object for FTS Aux table */
1724 static
1725 dict_table_t*
fts_create_in_mem_aux_table(const char * aux_table_name,const dict_table_t * table,ulint n_cols)1726 fts_create_in_mem_aux_table(
1727 	const char*		aux_table_name,
1728 	const dict_table_t*	table,
1729 	ulint			n_cols)
1730 {
1731 	dict_table_t*	new_table = dict_mem_table_create(
1732 		aux_table_name, NULL, n_cols, 0, table->flags,
1733 		table->space_id == TRX_SYS_SPACE
1734 		? 0 : table->space_id == SRV_TMP_SPACE_ID
1735 		? DICT_TF2_TEMPORARY : DICT_TF2_USE_FILE_PER_TABLE);
1736 
1737 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
1738 		ut_ad(table->data_dir_path != NULL);
1739 		new_table->data_dir_path = mem_heap_strdup(
1740 			new_table->heap, table->data_dir_path);
1741 	}
1742 
1743 	return(new_table);
1744 }
1745 
1746 /** Function to create on FTS common table.
1747 @param[in,out]	trx		InnoDB transaction
1748 @param[in]	table		Table that has FTS Index
1749 @param[in]	fts_table_name	FTS AUX table name
1750 @param[in]	fts_suffix	FTS AUX table suffix
1751 @param[in,out]	heap		temporary memory heap
1752 @return table object if created, else NULL */
1753 static
1754 dict_table_t*
fts_create_one_common_table(trx_t * trx,const dict_table_t * table,const char * fts_table_name,const char * fts_suffix,mem_heap_t * heap)1755 fts_create_one_common_table(
1756 	trx_t*			trx,
1757 	const dict_table_t*	table,
1758 	const char*		fts_table_name,
1759 	const char*		fts_suffix,
1760 	mem_heap_t*		heap)
1761 {
1762 	dict_table_t*		new_table;
1763 	dberr_t			error;
1764 	bool			is_config = strcmp(fts_suffix, "CONFIG") == 0;
1765 
1766 	if (!is_config) {
1767 
1768 		new_table = fts_create_in_mem_aux_table(
1769 			fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS);
1770 
1771 		dict_mem_table_add_col(
1772 			new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
1773 			FTS_DELETED_TABLE_COL_LEN);
1774 	} else {
1775 		/* Config table has different schema. */
1776 		new_table = fts_create_in_mem_aux_table(
1777 			fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS);
1778 
1779 		dict_mem_table_add_col(
1780 			new_table, heap, "key", DATA_VARCHAR, 0,
1781 			FTS_CONFIG_TABLE_KEY_COL_LEN);
1782 
1783 		dict_mem_table_add_col(
1784 			new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL,
1785 			FTS_CONFIG_TABLE_VALUE_COL_LEN);
1786 	}
1787 
1788 	dict_table_add_system_columns(new_table, heap);
1789 	error = row_create_table_for_mysql(new_table, trx,
1790 		FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
1791 	if (error == DB_SUCCESS) {
1792 
1793 		dict_index_t*	index = dict_mem_index_create(
1794 			new_table, "FTS_COMMON_TABLE_IND",
1795 			DICT_UNIQUE|DICT_CLUSTERED, 1);
1796 
1797 		if (!is_config) {
1798 			dict_mem_index_add_field(index, "doc_id", 0);
1799 		} else {
1800 			dict_mem_index_add_field(index, "key", 0);
1801 		}
1802 
1803 		/* We save and restore trx->dict_operation because
1804 		row_create_index_for_mysql() changes the operation to
1805 		TRX_DICT_OP_TABLE. */
1806 		trx_dict_op_t op = trx_get_dict_operation(trx);
1807 
1808 		error =	row_create_index_for_mysql(index, trx, NULL);
1809 
1810 		trx->dict_operation = op;
1811 	} else {
1812 err_exit:
1813 		new_table = NULL;
1814 		ib::warn() << "Failed to create FTS common table "
1815 			<< fts_table_name;
1816 		trx->error_state = error;
1817 		return NULL;
1818 	}
1819 
1820 	if (error != DB_SUCCESS) {
1821 		dict_mem_table_free(new_table);
1822 		trx->error_state = DB_SUCCESS;
1823 		row_drop_table_for_mysql(fts_table_name, trx, SQLCOM_DROP_DB);
1824 		goto err_exit;
1825 	}
1826 
1827 	return(new_table);
1828 }
1829 
1830 /** Creates the common auxiliary tables needed for supporting an FTS index
1831 on the given table. row_mysql_lock_data_dictionary must have been called
1832 before this.
1833 The following tables are created.
1834 CREATE TABLE $FTS_PREFIX_DELETED
1835 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1836 CREATE TABLE $FTS_PREFIX_DELETED_CACHE
1837 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1838 CREATE TABLE $FTS_PREFIX_BEING_DELETED
1839 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1840 CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
1841 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1842 CREATE TABLE $FTS_PREFIX_CONFIG
1843 	(key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
1844 @param[in,out]	trx			transaction
1845 @param[in,out]	table			table with FTS index
1846 @param[in]	skip_doc_id_index	Skip index on doc id
1847 @return DB_SUCCESS if succeed */
1848 dberr_t
fts_create_common_tables(trx_t * trx,dict_table_t * table,bool skip_doc_id_index)1849 fts_create_common_tables(
1850 	trx_t*		trx,
1851 	dict_table_t*	table,
1852 	bool		skip_doc_id_index)
1853 {
1854 	dberr_t		error;
1855 	que_t*		graph;
1856 	fts_table_t	fts_table;
1857 	mem_heap_t*	heap = mem_heap_create(1024);
1858 	pars_info_t*	info;
1859 	char		fts_name[MAX_FULL_NAME_LEN];
1860 	char		full_name[sizeof(fts_common_tables) / sizeof(char*)]
1861 				[MAX_FULL_NAME_LEN];
1862 
1863 	dict_index_t*					index = NULL;
1864 	trx_dict_op_t					op;
1865 	/* common_tables vector is used for dropping FTS common tables
1866 	on error condition. */
1867 	std::vector<dict_table_t*>			common_tables;
1868 	std::vector<dict_table_t*>::const_iterator	it;
1869 
1870 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1871 
1872 	op = trx_get_dict_operation(trx);
1873 
1874 	error = fts_drop_common_tables(trx, &fts_table);
1875 
1876 	if (error != DB_SUCCESS) {
1877 
1878 		goto func_exit;
1879 	}
1880 
1881 	/* Create the FTS tables that are common to an FTS index. */
1882 	for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
1883 
1884 		fts_table.suffix = fts_common_tables[i];
1885 		fts_get_table_name(&fts_table, full_name[i], true);
1886 		dict_table_t*	common_table = fts_create_one_common_table(
1887 			trx, table, full_name[i], fts_table.suffix, heap);
1888 
1889 		if (!common_table) {
1890 			trx->error_state = DB_SUCCESS;
1891 			error = DB_ERROR;
1892 			goto func_exit;
1893 		} else {
1894 			common_tables.push_back(common_table);
1895 		}
1896 
1897 		mem_heap_empty(heap);
1898 
1899 		DBUG_EXECUTE_IF("ib_fts_aux_table_error",
1900 			/* Return error after creating FTS_AUX_CONFIG table. */
1901 			if (i == 4) {
1902 				error = DB_ERROR;
1903 				goto func_exit;
1904 			}
1905 		);
1906 
1907 	}
1908 
1909 	/* Write the default settings to the config table. */
1910 	info = pars_info_create();
1911 
1912 	fts_table.suffix = "CONFIG";
1913 	fts_get_table_name(&fts_table, fts_name, true);
1914 	pars_info_bind_id(info, "config_table", fts_name);
1915 
1916 	graph = fts_parse_sql_no_dict_lock(
1917 		info, fts_config_table_insert_values_sql);
1918 
1919 	error = fts_eval_sql(trx, graph);
1920 
1921 	que_graph_free(graph);
1922 
1923 	if (error != DB_SUCCESS || skip_doc_id_index) {
1924 
1925 		goto func_exit;
1926 	}
1927 
1928 	index = dict_mem_index_create(table, FTS_DOC_ID_INDEX_NAME,
1929 				      DICT_UNIQUE, 1);
1930 	dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
1931 
1932 	op = trx_get_dict_operation(trx);
1933 
1934 	error =	row_create_index_for_mysql(index, trx, NULL);
1935 
1936 func_exit:
1937 	if (error != DB_SUCCESS) {
1938 		for (it = common_tables.begin(); it != common_tables.end();
1939 		     ++it) {
1940 			row_drop_table_for_mysql((*it)->name.m_name, trx,
1941 						 SQLCOM_DROP_DB);
1942 		}
1943 	}
1944 
1945 	trx->dict_operation = op;
1946 
1947 	common_tables.clear();
1948 	mem_heap_free(heap);
1949 
1950 	return(error);
1951 }
1952 
1953 /** Create one FTS auxiliary index table for an FTS index.
1954 @param[in,out]	trx		transaction
1955 @param[in]	index		the index instance
1956 @param[in]	fts_table	fts_table structure
1957 @param[in,out]	heap		temporary memory heap
1958 @see row_merge_create_fts_sort_index()
1959 @return DB_SUCCESS or error code */
1960 static
1961 dict_table_t*
fts_create_one_index_table(trx_t * trx,const dict_index_t * index,const fts_table_t * fts_table,mem_heap_t * heap)1962 fts_create_one_index_table(
1963 	trx_t*			trx,
1964 	const dict_index_t*	index,
1965 	const fts_table_t*	fts_table,
1966 	mem_heap_t*		heap)
1967 {
1968 	dict_field_t*		field;
1969 	dict_table_t*		new_table;
1970 	char			table_name[MAX_FULL_NAME_LEN];
1971 	dberr_t			error;
1972 	CHARSET_INFO*		charset;
1973 
1974 	ut_ad(index->type & DICT_FTS);
1975 
1976 	fts_get_table_name(fts_table, table_name, true);
1977 
1978 	new_table = fts_create_in_mem_aux_table(
1979 			table_name, fts_table->table,
1980 			FTS_AUX_INDEX_TABLE_NUM_COLS);
1981 
1982 	field = dict_index_get_nth_field(index, 0);
1983 	charset = fts_get_charset(field->col->prtype);
1984 
1985 	dict_mem_table_add_col(new_table, heap, "word",
1986 			       charset == &my_charset_latin1
1987 			       ? DATA_VARCHAR : DATA_VARMYSQL,
1988 			       field->col->prtype,
1989 			       FTS_MAX_WORD_LEN_IN_CHAR
1990 			       * unsigned(field->col->mbmaxlen));
1991 
1992 	dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
1993 			       DATA_NOT_NULL | DATA_UNSIGNED,
1994 			       FTS_INDEX_FIRST_DOC_ID_LEN);
1995 
1996 	dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
1997 			       DATA_NOT_NULL | DATA_UNSIGNED,
1998 			       FTS_INDEX_LAST_DOC_ID_LEN);
1999 
2000 	dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2001 			       DATA_NOT_NULL | DATA_UNSIGNED,
2002 			       FTS_INDEX_DOC_COUNT_LEN);
2003 
2004 	/* The precise type calculation is as follows:
2005 	least signficiant byte: MySQL type code (not applicable for sys cols)
2006 	second least : DATA_NOT_NULL | DATA_BINARY_TYPE
2007 	third least  : the MySQL charset-collation code (DATA_MTYPE_MAX) */
2008 
2009 	dict_mem_table_add_col(
2010 		new_table, heap, "ilist", DATA_BLOB,
2011 		(DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
2012 		FTS_INDEX_ILIST_LEN);
2013 
2014 	dict_table_add_system_columns(new_table, heap);
2015 	error = row_create_table_for_mysql(new_table, trx,
2016 		FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
2017 
2018 	if (error == DB_SUCCESS) {
2019 		dict_index_t*	index = dict_mem_index_create(
2020 			new_table, "FTS_INDEX_TABLE_IND",
2021 			DICT_UNIQUE|DICT_CLUSTERED, 2);
2022 		dict_mem_index_add_field(index, "word", 0);
2023 		dict_mem_index_add_field(index, "first_doc_id", 0);
2024 
2025 		trx_dict_op_t op = trx_get_dict_operation(trx);
2026 
2027 		error =	row_create_index_for_mysql(index, trx, NULL);
2028 
2029 		trx->dict_operation = op;
2030 	} else {
2031 err_exit:
2032 		new_table = NULL;
2033 		ib::warn() << "Failed to create FTS index table "
2034 			<< table_name;
2035 		trx->error_state = error;
2036 		return NULL;
2037 	}
2038 
2039 	if (error != DB_SUCCESS) {
2040 		dict_mem_table_free(new_table);
2041 		trx->error_state = DB_SUCCESS;
2042 		row_drop_table_for_mysql(table_name, trx, SQLCOM_DROP_DB);
2043 		goto err_exit;
2044 	}
2045 
2046 	return(new_table);
2047 }
2048 
2049 /** Creates the column specific ancillary tables needed for supporting an
2050 FTS index on the given table. row_mysql_lock_data_dictionary must have
2051 been called before this.
2052 
2053 All FTS AUX Index tables have the following schema.
2054 CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
2055 	word		VARCHAR(FTS_MAX_WORD_LEN),
2056 	first_doc_id	INT NOT NULL,
2057 	last_doc_id	UNSIGNED NOT NULL,
2058 	doc_count	UNSIGNED INT NOT NULL,
2059 	ilist		VARBINARY NOT NULL,
2060 	UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
2061 @param[in,out]	trx	dictionary transaction
2062 @param[in]	index	fulltext index
2063 @param[in]	id	table id
2064 @return DB_SUCCESS or error code */
2065 dberr_t
fts_create_index_tables(trx_t * trx,const dict_index_t * index,table_id_t id)2066 fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id)
2067 {
2068 	ulint		i;
2069 	fts_table_t	fts_table;
2070 	dberr_t		error = DB_SUCCESS;
2071 	mem_heap_t*	heap = mem_heap_create(1024);
2072 
2073 	fts_table.type = FTS_INDEX_TABLE;
2074 	fts_table.index_id = index->id;
2075 	fts_table.table_id = id;
2076 	fts_table.table = index->table;
2077 
2078 	/* aux_idx_tables vector is used for dropping FTS AUX INDEX
2079 	tables on error condition. */
2080 	std::vector<dict_table_t*>			aux_idx_tables;
2081 	std::vector<dict_table_t*>::const_iterator	it;
2082 
2083 	for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2084 		dict_table_t*	new_table;
2085 
2086 		/* Create the FTS auxiliary tables that are specific
2087 		to an FTS index. We need to preserve the table_id %s
2088 		which fts_parse_sql_no_dict_lock() will fill in for us. */
2089 		fts_table.suffix = fts_get_suffix(i);
2090 
2091 		new_table = fts_create_one_index_table(
2092 			trx, index, &fts_table, heap);
2093 
2094 		if (new_table == NULL) {
2095 			error = DB_FAIL;
2096 			break;
2097 		} else {
2098 			aux_idx_tables.push_back(new_table);
2099 		}
2100 
2101 		mem_heap_empty(heap);
2102 
2103 		DBUG_EXECUTE_IF("ib_fts_index_table_error",
2104 			/* Return error after creating FTS_INDEX_5
2105 			aux table. */
2106 			if (i == 4) {
2107 				error = DB_FAIL;
2108 				break;
2109 			}
2110 		);
2111 	}
2112 
2113 	if (error != DB_SUCCESS) {
2114 
2115 		for (it = aux_idx_tables.begin(); it != aux_idx_tables.end();
2116 		     ++it) {
2117 			row_drop_table_for_mysql((*it)->name.m_name, trx,
2118 						 SQLCOM_DROP_DB);
2119 		}
2120 	}
2121 
2122 	aux_idx_tables.clear();
2123 	mem_heap_free(heap);
2124 
2125 	return(error);
2126 }
2127 
2128 /******************************************************************//**
2129 Calculate the new state of a row given the existing state and a new event.
2130 @return new state of row */
2131 static
2132 fts_row_state
fts_trx_row_get_new_state(fts_row_state old_state,fts_row_state event)2133 fts_trx_row_get_new_state(
2134 /*======================*/
2135 	fts_row_state	old_state,		/*!< in: existing state of row */
2136 	fts_row_state	event)			/*!< in: new event */
2137 {
2138 	/* The rules for transforming states:
2139 
2140 	I = inserted
2141 	M = modified
2142 	D = deleted
2143 	N = nothing
2144 
2145 	M+D -> D:
2146 
2147 	If the row existed before the transaction started and it is modified
2148 	during the transaction, followed by a deletion of the row, only the
2149 	deletion will be signaled.
2150 
2151 	M+ -> M:
2152 
2153 	If the row existed before the transaction started and it is modified
2154 	more than once during the transaction, only the last modification
2155 	will be signaled.
2156 
2157 	IM*D -> N:
2158 
2159 	If a new row is added during the transaction (and possibly modified
2160 	after its initial insertion) but it is deleted before the end of the
2161 	transaction, nothing will be signaled.
2162 
2163 	IM* -> I:
2164 
2165 	If a new row is added during the transaction and modified after its
2166 	initial insertion, only the addition will be signaled.
2167 
2168 	M*DI -> M:
2169 
2170 	If the row existed before the transaction started and it is deleted,
2171 	then re-inserted, only a modification will be signaled. Note that
2172 	this case is only possible if the table is using the row's primary
2173 	key for FTS row ids, since those can be re-inserted by the user,
2174 	which is not true for InnoDB generated row ids.
2175 
2176 	It is easily seen that the above rules decompose such that we do not
2177 	need to store the row's entire history of events. Instead, we can
2178 	store just one state for the row and update that when new events
2179 	arrive. Then we can implement the above rules as a two-dimensional
2180 	look-up table, and get checking of invalid combinations "for free"
2181 	in the process. */
2182 
2183 	/* The lookup table for transforming states. old_state is the
2184 	Y-axis, event is the X-axis. */
2185 	static const fts_row_state table[4][4] = {
2186 			/*    I            M            D            N */
2187 		/* I */	{ FTS_INVALID, FTS_INSERT,  FTS_NOTHING, FTS_INVALID },
2188 		/* M */	{ FTS_INVALID, FTS_MODIFY,  FTS_DELETE,  FTS_INVALID },
2189 		/* D */	{ FTS_MODIFY,  FTS_INVALID, FTS_INVALID, FTS_INVALID },
2190 		/* N */	{ FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
2191 	};
2192 
2193 	fts_row_state result;
2194 
2195 	ut_a(old_state < FTS_INVALID);
2196 	ut_a(event < FTS_INVALID);
2197 
2198 	result = table[(int) old_state][(int) event];
2199 	ut_a(result != FTS_INVALID);
2200 
2201 	return(result);
2202 }
2203 
2204 /******************************************************************//**
2205 Create a savepoint instance.
2206 @return savepoint instance */
2207 static
2208 fts_savepoint_t*
fts_savepoint_create(ib_vector_t * savepoints,const char * name,mem_heap_t * heap)2209 fts_savepoint_create(
2210 /*=================*/
2211 	ib_vector_t*	savepoints,		/*!< out: InnoDB transaction */
2212 	const char*	name,			/*!< in: savepoint name */
2213 	mem_heap_t*	heap)			/*!< in: heap */
2214 {
2215 	fts_savepoint_t*	savepoint;
2216 
2217 	savepoint = static_cast<fts_savepoint_t*>(
2218 		ib_vector_push(savepoints, NULL));
2219 
2220 	memset(savepoint, 0x0, sizeof(*savepoint));
2221 
2222 	if (name) {
2223 		savepoint->name = mem_heap_strdup(heap, name);
2224 	}
2225 
2226 	savepoint->tables = rbt_create(
2227 		sizeof(fts_trx_table_t*), fts_trx_table_cmp);
2228 
2229 	return(savepoint);
2230 }
2231 
2232 /******************************************************************//**
2233 Create an FTS trx.
2234 @return FTS trx */
2235 fts_trx_t*
fts_trx_create(trx_t * trx)2236 fts_trx_create(
2237 /*===========*/
2238 	trx_t*	trx)				/*!< in/out: InnoDB
2239 						transaction */
2240 {
2241 	fts_trx_t*		ftt;
2242 	ib_alloc_t*		heap_alloc;
2243 	mem_heap_t*		heap = mem_heap_create(1024);
2244 	trx_named_savept_t*	savep;
2245 
2246 	ut_a(trx->fts_trx == NULL);
2247 
2248 	ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2249 	ftt->trx = trx;
2250 	ftt->heap = heap;
2251 
2252 	heap_alloc = ib_heap_allocator_create(heap);
2253 
2254 	ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
2255 		heap_alloc, sizeof(fts_savepoint_t), 4));
2256 
2257 	ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
2258 		heap_alloc, sizeof(fts_savepoint_t), 4));
2259 
2260 	/* Default instance has no name and no heap. */
2261 	fts_savepoint_create(ftt->savepoints, NULL, NULL);
2262 	fts_savepoint_create(ftt->last_stmt, NULL, NULL);
2263 
2264 	/* Copy savepoints that already set before. */
2265 	for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
2266 	     savep != NULL;
2267 	     savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2268 
2269 		fts_savepoint_take(ftt, savep->name);
2270 	}
2271 
2272 	return(ftt);
2273 }
2274 
2275 /******************************************************************//**
2276 Create an FTS trx table.
2277 @return FTS trx table */
2278 static
2279 fts_trx_table_t*
fts_trx_table_create(fts_trx_t * fts_trx,dict_table_t * table)2280 fts_trx_table_create(
2281 /*=================*/
2282 	fts_trx_t*	fts_trx,		/*!< in: FTS trx */
2283 	dict_table_t*	table)			/*!< in: table */
2284 {
2285 	fts_trx_table_t*	ftt;
2286 
2287 	ftt = static_cast<fts_trx_table_t*>(
2288 		mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2289 
2290 	memset(ftt, 0x0, sizeof(*ftt));
2291 
2292 	ftt->table = table;
2293 	ftt->fts_trx = fts_trx;
2294 
2295 	ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2296 
2297 	return(ftt);
2298 }
2299 
2300 /******************************************************************//**
2301 Clone an FTS trx table.
2302 @return FTS trx table */
2303 static
2304 fts_trx_table_t*
fts_trx_table_clone(const fts_trx_table_t * ftt_src)2305 fts_trx_table_clone(
2306 /*=================*/
2307 	const fts_trx_table_t*	ftt_src)	/*!< in: FTS trx */
2308 {
2309 	fts_trx_table_t*	ftt;
2310 
2311 	ftt = static_cast<fts_trx_table_t*>(
2312 		mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2313 
2314 	memset(ftt, 0x0, sizeof(*ftt));
2315 
2316 	ftt->table = ftt_src->table;
2317 	ftt->fts_trx = ftt_src->fts_trx;
2318 
2319 	ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2320 
2321 	/* Copy the rb tree values to the new savepoint. */
2322 	rbt_merge_uniq(ftt->rows, ftt_src->rows);
2323 
2324 	/* These are only added on commit. At this stage we only have
2325 	the updated row state. */
2326 	ut_a(ftt_src->added_doc_ids == NULL);
2327 
2328 	return(ftt);
2329 }
2330 
2331 /******************************************************************//**
2332 Initialize the FTS trx instance.
2333 @return FTS trx instance */
2334 static
2335 fts_trx_table_t*
fts_trx_init(trx_t * trx,dict_table_t * table,ib_vector_t * savepoints)2336 fts_trx_init(
2337 /*=========*/
2338 	trx_t*			trx,		/*!< in: transaction */
2339 	dict_table_t*		table,		/*!< in: FTS table instance */
2340 	ib_vector_t*		savepoints)	/*!< in: Savepoints */
2341 {
2342 	fts_trx_table_t*	ftt;
2343 	ib_rbt_bound_t		parent;
2344 	ib_rbt_t*		tables;
2345 	fts_savepoint_t*	savepoint;
2346 
2347 	savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
2348 
2349 	tables = savepoint->tables;
2350 	rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
2351 
2352 	if (parent.result == 0) {
2353 		fts_trx_table_t**	fttp;
2354 
2355 		fttp = rbt_value(fts_trx_table_t*, parent.last);
2356 		ftt = *fttp;
2357 	} else {
2358 		ftt = fts_trx_table_create(trx->fts_trx, table);
2359 		rbt_add_node(tables, &parent, &ftt);
2360 	}
2361 
2362 	ut_a(ftt->table == table);
2363 
2364 	return(ftt);
2365 }
2366 
2367 /******************************************************************//**
2368 Notify the FTS system about an operation on an FTS-indexed table. */
2369 static
2370 void
fts_trx_table_add_op(fts_trx_table_t * ftt,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2371 fts_trx_table_add_op(
2372 /*=================*/
2373 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
2374 	doc_id_t	doc_id,			/*!< in: doc id */
2375 	fts_row_state	state,			/*!< in: state of the row */
2376 	ib_vector_t*	fts_indexes)		/*!< in: FTS indexes affected */
2377 {
2378 	ib_rbt_t*	rows;
2379 	ib_rbt_bound_t	parent;
2380 
2381 	rows = ftt->rows;
2382 	rbt_search(rows, &parent, &doc_id);
2383 
2384 	/* Row id found, update state, and if new state is FTS_NOTHING,
2385 	we delete the row from our tree. */
2386 	if (parent.result == 0) {
2387 		fts_trx_row_t*	row = rbt_value(fts_trx_row_t, parent.last);
2388 
2389 		row->state = fts_trx_row_get_new_state(row->state, state);
2390 
2391 		if (row->state == FTS_NOTHING) {
2392 			if (row->fts_indexes) {
2393 				ib_vector_free(row->fts_indexes);
2394 			}
2395 
2396 			ut_free(rbt_remove_node(rows, parent.last));
2397 			row = NULL;
2398 		} else if (row->fts_indexes != NULL) {
2399 			ib_vector_free(row->fts_indexes);
2400 			row->fts_indexes = fts_indexes;
2401 		}
2402 
2403 	} else { /* Row-id not found, create a new one. */
2404 		fts_trx_row_t	row;
2405 
2406 		row.doc_id = doc_id;
2407 		row.state = state;
2408 		row.fts_indexes = fts_indexes;
2409 
2410 		rbt_add_node(rows, &parent, &row);
2411 	}
2412 }
2413 
2414 /******************************************************************//**
2415 Notify the FTS system about an operation on an FTS-indexed table. */
2416 void
fts_trx_add_op(trx_t * trx,dict_table_t * table,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2417 fts_trx_add_op(
2418 /*===========*/
2419 	trx_t*		trx,			/*!< in: InnoDB transaction */
2420 	dict_table_t*	table,			/*!< in: table */
2421 	doc_id_t	doc_id,			/*!< in: new doc id */
2422 	fts_row_state	state,			/*!< in: state of the row */
2423 	ib_vector_t*	fts_indexes)		/*!< in: FTS indexes affected
2424 						(NULL=all) */
2425 {
2426 	fts_trx_table_t*	tran_ftt;
2427 	fts_trx_table_t*	stmt_ftt;
2428 
2429 	if (!trx->fts_trx) {
2430 		trx->fts_trx = fts_trx_create(trx);
2431 	}
2432 
2433 	tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2434 	stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2435 
2436 	fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2437 	fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2438 }
2439 
2440 /******************************************************************//**
2441 Fetch callback that converts a textual document id to a binary value and
2442 stores it in the given place.
2443 @return always returns NULL */
2444 static
2445 ibool
fts_fetch_store_doc_id(void * row,void * user_arg)2446 fts_fetch_store_doc_id(
2447 /*===================*/
2448 	void*		row,			/*!< in: sel_node_t* */
2449 	void*		user_arg)		/*!< in: doc_id_t* to store
2450 						doc_id in */
2451 {
2452 	int		n_parsed;
2453 	sel_node_t*	node = static_cast<sel_node_t*>(row);
2454 	doc_id_t*	doc_id = static_cast<doc_id_t*>(user_arg);
2455 	dfield_t*	dfield = que_node_get_val(node->select_list);
2456 	dtype_t*	type = dfield_get_type(dfield);
2457 	ulint		len = dfield_get_len(dfield);
2458 
2459 	char		buf[32];
2460 
2461 	ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2462 	ut_a(len > 0 && len < sizeof(buf));
2463 
2464 	memcpy(buf, dfield_get_data(dfield), len);
2465 	buf[len] = '\0';
2466 
2467 	n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2468 	ut_a(n_parsed == 1);
2469 
2470 	return(FALSE);
2471 }
2472 
2473 #ifdef FTS_CACHE_SIZE_DEBUG
2474 /******************************************************************//**
2475 Get the max cache size in bytes. If there is an error reading the
2476 value we simply print an error message here and return the default
2477 value to the caller.
2478 @return max cache size in bytes */
2479 static
2480 ulint
fts_get_max_cache_size(trx_t * trx,fts_table_t * fts_table)2481 fts_get_max_cache_size(
2482 /*===================*/
2483 	trx_t*		trx,			/*!< in: transaction */
2484 	fts_table_t*	fts_table)		/*!< in: table instance */
2485 {
2486 	dberr_t		error;
2487 	fts_string_t	value;
2488 	ulong		cache_size_in_mb;
2489 
2490 	/* Set to the default value. */
2491 	cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2492 
2493 	/* We set the length of value to the max bytes it can hold. This
2494 	information is used by the callback that reads the value. */
2495 	value.f_n_char = 0;
2496 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2497 	value.f_str = ut_malloc_nokey(value.f_len + 1);
2498 
2499 	error = fts_config_get_value(
2500 		trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2501 
2502 	if (UNIV_LIKELY(error == DB_SUCCESS)) {
2503 		value.f_str[value.f_len] = 0;
2504 		cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
2505 
2506 		if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2507 
2508 			ib::warn() << "FTS max cache size ("
2509 				<< cache_size_in_mb << ") out of range."
2510 				" Minimum value is "
2511 				<< FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2512 				<< "MB and the maximum value is "
2513 				<< FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2514 				<< "MB, setting cache size to upper limit";
2515 
2516 			cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2517 
2518 		} else if  (cache_size_in_mb
2519 			    < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2520 
2521 			ib::warn() << "FTS max cache size ("
2522 				<< cache_size_in_mb << ") out of range."
2523 				" Minimum value is "
2524 				<< FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2525 				<< "MB and the maximum value is"
2526 				<< FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2527 				<< "MB, setting cache size to lower limit";
2528 
2529 			cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2530 		}
2531 	} else {
2532 		ib::error() << "(" << error << ") reading max"
2533 			" cache config value from config table "
2534 			<< fts_table->table->name;
2535 	}
2536 
2537 	ut_free(value.f_str);
2538 
2539 	return(cache_size_in_mb * 1024 * 1024);
2540 }
2541 #endif
2542 
2543 /*********************************************************************//**
2544 Update the next and last Doc ID in the CONFIG table to be the input
2545 "doc_id" value (+ 1). We would do so after each FTS index build or
2546 table truncate */
2547 void
fts_update_next_doc_id(trx_t * trx,const dict_table_t * table,doc_id_t doc_id)2548 fts_update_next_doc_id(
2549 /*===================*/
2550 	trx_t*			trx,		/*!< in/out: transaction */
2551 	const dict_table_t*	table,		/*!< in: table */
2552 	doc_id_t		doc_id)		/*!< in: DOC ID to set */
2553 {
2554 	table->fts->cache->synced_doc_id = doc_id;
2555 	table->fts->cache->next_doc_id = doc_id + 1;
2556 
2557 	table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2558 
2559 	fts_update_sync_doc_id(
2560 		table, table->fts->cache->synced_doc_id, trx);
2561 
2562 }
2563 
2564 /*********************************************************************//**
2565 Get the next available document id.
2566 @return DB_SUCCESS if OK */
2567 dberr_t
fts_get_next_doc_id(const dict_table_t * table,doc_id_t * doc_id)2568 fts_get_next_doc_id(
2569 /*================*/
2570 	const dict_table_t*	table,		/*!< in: table */
2571 	doc_id_t*		doc_id)		/*!< out: new document id */
2572 {
2573 	fts_cache_t*	cache = table->fts->cache;
2574 
2575 	/* If the Doc ID system has not yet been initialized, we
2576 	will consult the CONFIG table and user table to re-establish
2577 	the initial value of the Doc ID */
2578 	if (cache->first_doc_id == FTS_NULL_DOC_ID) {
2579 		fts_init_doc_id(table);
2580 	}
2581 
2582 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2583 		*doc_id = FTS_NULL_DOC_ID;
2584 		return(DB_SUCCESS);
2585 	}
2586 
2587 	DEBUG_SYNC_C("get_next_FTS_DOC_ID");
2588 	mutex_enter(&cache->doc_id_lock);
2589 	*doc_id = cache->next_doc_id++;
2590 	mutex_exit(&cache->doc_id_lock);
2591 
2592 	return(DB_SUCCESS);
2593 }
2594 
2595 /*********************************************************************//**
2596 This function fetch the Doc ID from CONFIG table, and compare with
2597 the Doc ID supplied. And store the larger one to the CONFIG table.
2598 @return DB_SUCCESS if OK */
2599 static MY_ATTRIBUTE((nonnull))
2600 dberr_t
fts_cmp_set_sync_doc_id(const dict_table_t * table,doc_id_t cmp_doc_id,ibool read_only,doc_id_t * doc_id)2601 fts_cmp_set_sync_doc_id(
2602 /*====================*/
2603 	const dict_table_t*	table,		/*!< in: table */
2604 	doc_id_t		cmp_doc_id,	/*!< in: Doc ID to compare */
2605 	ibool			read_only,	/*!< in: TRUE if read the
2606 						synced_doc_id only */
2607 	doc_id_t*		doc_id)		/*!< out: larger document id
2608 						after comparing "cmp_doc_id"
2609 						to the one stored in CONFIG
2610 						table */
2611 {
2612 	trx_t*		trx;
2613 	pars_info_t*	info;
2614 	dberr_t		error;
2615 	fts_table_t	fts_table;
2616 	que_t*		graph = NULL;
2617 	fts_cache_t*	cache = table->fts->cache;
2618 	char		table_name[MAX_FULL_NAME_LEN];
2619 retry:
2620 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2621 
2622 	fts_table.suffix = "CONFIG";
2623 	fts_table.table_id = table->id;
2624 	fts_table.type = FTS_COMMON_TABLE;
2625 	fts_table.table = table;
2626 
2627 	trx = trx_create();
2628 	if (srv_read_only_mode) {
2629 		trx_start_internal_read_only(trx);
2630 	} else {
2631 		trx_start_internal(trx);
2632 	}
2633 
2634 	trx->op_info = "update the next FTS document id";
2635 
2636 	info = pars_info_create();
2637 
2638 	pars_info_bind_function(
2639 		info, "my_func", fts_fetch_store_doc_id, doc_id);
2640 
2641 	fts_get_table_name(&fts_table, table_name);
2642 	pars_info_bind_id(info, "config_table", table_name);
2643 
2644 	graph = fts_parse_sql(
2645 		&fts_table, info,
2646 		"DECLARE FUNCTION my_func;\n"
2647 		"DECLARE CURSOR c IS SELECT value FROM $config_table"
2648 		" WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2649 		"BEGIN\n"
2650 		""
2651 		"OPEN c;\n"
2652 		"WHILE 1 = 1 LOOP\n"
2653 		"  FETCH c INTO my_func();\n"
2654 		"  IF c % NOTFOUND THEN\n"
2655 		"    EXIT;\n"
2656 		"  END IF;\n"
2657 		"END LOOP;\n"
2658 		"CLOSE c;");
2659 
2660 	*doc_id = 0;
2661 
2662 	error = fts_eval_sql(trx, graph);
2663 
2664 	fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2665 
2666 	// FIXME: We need to retry deadlock errors
2667 	if (error != DB_SUCCESS) {
2668 		goto func_exit;
2669 	}
2670 
2671 	if (read_only) {
2672 		/* InnoDB stores actual synced_doc_id value + 1 in
2673 		FTS_CONFIG table. Reduce the value by 1 while reading
2674 		after startup. */
2675 		if (*doc_id) *doc_id -= 1;
2676 		goto func_exit;
2677 	}
2678 
2679 	if (cmp_doc_id == 0 && *doc_id) {
2680 		cache->synced_doc_id = *doc_id - 1;
2681 	} else {
2682 		cache->synced_doc_id = ut_max(cmp_doc_id, *doc_id);
2683 	}
2684 
2685 	mutex_enter(&cache->doc_id_lock);
2686 	/* For each sync operation, we will add next_doc_id by 1,
2687 	so to mark a sync operation */
2688 	if (cache->next_doc_id < cache->synced_doc_id + 1) {
2689 		cache->next_doc_id = cache->synced_doc_id + 1;
2690 	}
2691 	mutex_exit(&cache->doc_id_lock);
2692 
2693 	if (cmp_doc_id > *doc_id) {
2694 		error = fts_update_sync_doc_id(
2695 			table, cache->synced_doc_id, trx);
2696 	}
2697 
2698 	*doc_id = cache->next_doc_id;
2699 
2700 func_exit:
2701 
2702 	if (UNIV_LIKELY(error == DB_SUCCESS)) {
2703 		fts_sql_commit(trx);
2704 	} else {
2705 		*doc_id = 0;
2706 
2707 		ib::error() << "(" << error << ") while getting next doc id "
2708 			"for table " << table->name;
2709 		fts_sql_rollback(trx);
2710 
2711 		if (error == DB_DEADLOCK) {
2712 			os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2713 			goto retry;
2714 		}
2715 	}
2716 
2717 	trx->free();
2718 
2719 	return(error);
2720 }
2721 
2722 /*********************************************************************//**
2723 Update the last document id. This function could create a new
2724 transaction to update the last document id.
2725 @return DB_SUCCESS if OK */
2726 static
2727 dberr_t
fts_update_sync_doc_id(const dict_table_t * table,doc_id_t doc_id,trx_t * trx)2728 fts_update_sync_doc_id(
2729 /*===================*/
2730 	const dict_table_t*	table,		/*!< in: table */
2731 	doc_id_t		doc_id,		/*!< in: last document id */
2732 	trx_t*			trx)		/*!< in: update trx, or NULL */
2733 {
2734 	byte		id[FTS_MAX_ID_LEN];
2735 	pars_info_t*	info;
2736 	fts_table_t	fts_table;
2737 	ulint		id_len;
2738 	que_t*		graph = NULL;
2739 	dberr_t		error;
2740 	ibool		local_trx = FALSE;
2741 	fts_cache_t*	cache = table->fts->cache;
2742 	char		fts_name[MAX_FULL_NAME_LEN];
2743 
2744 	if (srv_read_only_mode) {
2745 		return DB_READ_ONLY;
2746 	}
2747 
2748 	fts_table.suffix = "CONFIG";
2749 	fts_table.table_id = table->id;
2750 	fts_table.type = FTS_COMMON_TABLE;
2751 	fts_table.table = table;
2752 
2753 	if (!trx) {
2754 		trx = trx_create();
2755 		trx_start_internal(trx);
2756 
2757 		trx->op_info = "setting last FTS document id";
2758 		local_trx = TRUE;
2759 	}
2760 
2761 	info = pars_info_create();
2762 
2763 	id_len = (ulint) snprintf(
2764 		(char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2765 
2766 	pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2767 
2768 	fts_get_table_name(&fts_table, fts_name,
2769 			   table->fts->dict_locked);
2770 	pars_info_bind_id(info, "table_name", fts_name);
2771 
2772 	graph = fts_parse_sql(
2773 		&fts_table, info,
2774 		"BEGIN"
2775 		" UPDATE $table_name SET value = :doc_id"
2776 		" WHERE key = 'synced_doc_id';");
2777 
2778 	error = fts_eval_sql(trx, graph);
2779 
2780 	fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2781 
2782 	if (local_trx) {
2783 		if (UNIV_LIKELY(error == DB_SUCCESS)) {
2784 			fts_sql_commit(trx);
2785 			cache->synced_doc_id = doc_id;
2786 		} else {
2787 			ib::error() << "(" << error << ") while"
2788 				" updating last doc id for table"
2789 				<< table->name;
2790 
2791 			fts_sql_rollback(trx);
2792 		}
2793 		trx->free();
2794 	}
2795 
2796 	return(error);
2797 }
2798 
2799 /*********************************************************************//**
2800 Create a new fts_doc_ids_t.
2801 @return new fts_doc_ids_t */
2802 fts_doc_ids_t*
fts_doc_ids_create(void)2803 fts_doc_ids_create(void)
2804 /*====================*/
2805 {
2806 	fts_doc_ids_t*	fts_doc_ids;
2807 	mem_heap_t*	heap = mem_heap_create(512);
2808 
2809 	fts_doc_ids = static_cast<fts_doc_ids_t*>(
2810 		mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2811 
2812 	fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2813 
2814 	fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
2815 		fts_doc_ids->self_heap, sizeof(doc_id_t), 32));
2816 
2817 	return(fts_doc_ids);
2818 }
2819 
2820 /*********************************************************************//**
2821 Do commit-phase steps necessary for the insertion of a new row. */
2822 void
fts_add(fts_trx_table_t * ftt,fts_trx_row_t * row)2823 fts_add(
2824 /*====*/
2825 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
2826 	fts_trx_row_t*	row)			/*!< in: row */
2827 {
2828 	dict_table_t*	table = ftt->table;
2829 	doc_id_t	doc_id = row->doc_id;
2830 
2831 	ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
2832 
2833 	fts_add_doc_by_id(ftt, doc_id);
2834 
2835 	mutex_enter(&table->fts->cache->deleted_lock);
2836 	++table->fts->cache->added;
2837 	mutex_exit(&table->fts->cache->deleted_lock);
2838 
2839 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
2840 	    && doc_id >= table->fts->cache->next_doc_id) {
2841 		table->fts->cache->next_doc_id = doc_id + 1;
2842 	}
2843 }
2844 
2845 /*********************************************************************//**
2846 Do commit-phase steps necessary for the deletion of a row.
2847 @return DB_SUCCESS or error code */
2848 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2849 dberr_t
fts_delete(fts_trx_table_t * ftt,fts_trx_row_t * row)2850 fts_delete(
2851 /*=======*/
2852 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
2853 	fts_trx_row_t*	row)			/*!< in: row */
2854 {
2855 	que_t*		graph;
2856 	fts_table_t	fts_table;
2857 	dberr_t		error = DB_SUCCESS;
2858 	doc_id_t	write_doc_id;
2859 	dict_table_t*	table = ftt->table;
2860 	doc_id_t	doc_id = row->doc_id;
2861 	trx_t*		trx = ftt->fts_trx->trx;
2862 	pars_info_t*	info = pars_info_create();
2863 	fts_cache_t*	cache = table->fts->cache;
2864 
2865 	/* we do not index Documents whose Doc ID value is 0 */
2866 	if (doc_id == FTS_NULL_DOC_ID) {
2867 		ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
2868 		return(error);
2869 	}
2870 
2871 	ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2872 
2873 	FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
2874 
2875 	/* Convert to "storage" byte order. */
2876 	fts_write_doc_id((byte*) &write_doc_id, doc_id);
2877 	fts_bind_doc_id(info, "doc_id", &write_doc_id);
2878 
2879 	/* It is possible we update a record that has not yet been sync-ed
2880 	into cache from last crash (delete Doc will not initialize the
2881 	sync). Avoid any added counter accounting until the FTS cache
2882 	is re-established and sync-ed */
2883 	if (table->fts->added_synced
2884 	    && doc_id > cache->synced_doc_id) {
2885 		mutex_enter(&table->fts->cache->deleted_lock);
2886 
2887 		/* The Doc ID could belong to those left in
2888 		ADDED table from last crash. So need to check
2889 		if it is less than first_doc_id when we initialize
2890 		the Doc ID system after reboot */
2891 		if (doc_id >= table->fts->cache->first_doc_id
2892 		    && table->fts->cache->added > 0) {
2893 			--table->fts->cache->added;
2894 		}
2895 
2896 		mutex_exit(&table->fts->cache->deleted_lock);
2897 
2898 		/* Only if the row was really deleted. */
2899 		ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2900 	}
2901 
2902 	/* Note the deleted document for OPTIMIZE to purge. */
2903 	if (error == DB_SUCCESS) {
2904 		char	table_name[MAX_FULL_NAME_LEN];
2905 
2906 		trx->op_info = "adding doc id to FTS DELETED";
2907 
2908 		info->graph_owns_us = TRUE;
2909 
2910 		fts_table.suffix = "DELETED";
2911 
2912 		fts_get_table_name(&fts_table, table_name);
2913 		pars_info_bind_id(info, "deleted", table_name);
2914 
2915 		graph = fts_parse_sql(
2916 			&fts_table,
2917 			info,
2918 			"BEGIN INSERT INTO $deleted VALUES (:doc_id);");
2919 
2920 		error = fts_eval_sql(trx, graph);
2921 
2922 		fts_que_graph_free(graph);
2923 	} else {
2924 		pars_info_free(info);
2925 	}
2926 
2927 	/* Increment the total deleted count, this is used to calculate the
2928 	number of documents indexed. */
2929 	if (error == DB_SUCCESS) {
2930 		mutex_enter(&table->fts->cache->deleted_lock);
2931 
2932 		++table->fts->cache->deleted;
2933 
2934 		mutex_exit(&table->fts->cache->deleted_lock);
2935 	}
2936 
2937 	return(error);
2938 }
2939 
2940 /*********************************************************************//**
2941 Do commit-phase steps necessary for the modification of a row.
2942 @return DB_SUCCESS or error code */
2943 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2944 dberr_t
fts_modify(fts_trx_table_t * ftt,fts_trx_row_t * row)2945 fts_modify(
2946 /*=======*/
2947 	fts_trx_table_t*	ftt,		/*!< in: FTS trx table */
2948 	fts_trx_row_t*		row)		/*!< in: row */
2949 {
2950 	dberr_t	error;
2951 
2952 	ut_a(row->state == FTS_MODIFY);
2953 
2954 	error = fts_delete(ftt, row);
2955 
2956 	if (error == DB_SUCCESS) {
2957 		fts_add(ftt, row);
2958 	}
2959 
2960 	return(error);
2961 }
2962 
2963 /*********************************************************************//**
2964 The given transaction is about to be committed; do whatever is necessary
2965 from the FTS system's POV.
2966 @return DB_SUCCESS or error code */
2967 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2968 dberr_t
fts_commit_table(fts_trx_table_t * ftt)2969 fts_commit_table(
2970 /*=============*/
2971 	fts_trx_table_t*	ftt)		/*!< in: FTS table to commit*/
2972 {
2973 	if (srv_read_only_mode) {
2974 		return DB_READ_ONLY;
2975 	}
2976 
2977 	const ib_rbt_node_t*	node;
2978 	ib_rbt_t*		rows;
2979 	dberr_t			error = DB_SUCCESS;
2980 	fts_cache_t*		cache = ftt->table->fts->cache;
2981 	trx_t*			trx = trx_create();
2982 
2983 	trx_start_internal(trx);
2984 
2985 	rows = ftt->rows;
2986 
2987 	ftt->fts_trx->trx = trx;
2988 
2989 	if (cache->get_docs == NULL) {
2990 		rw_lock_x_lock(&cache->init_lock);
2991 		if (cache->get_docs == NULL) {
2992 			cache->get_docs = fts_get_docs_create(cache);
2993 		}
2994 		rw_lock_x_unlock(&cache->init_lock);
2995 	}
2996 
2997 	for (node = rbt_first(rows);
2998 	     node != NULL && error == DB_SUCCESS;
2999 	     node = rbt_next(rows, node)) {
3000 
3001 		fts_trx_row_t*	row = rbt_value(fts_trx_row_t, node);
3002 
3003 		switch (row->state) {
3004 		case FTS_INSERT:
3005 			fts_add(ftt, row);
3006 			break;
3007 
3008 		case FTS_MODIFY:
3009 			error = fts_modify(ftt, row);
3010 			break;
3011 
3012 		case FTS_DELETE:
3013 			error = fts_delete(ftt, row);
3014 			break;
3015 
3016 		default:
3017 			ut_error;
3018 		}
3019 	}
3020 
3021 	fts_sql_commit(trx);
3022 
3023 	trx->free();
3024 
3025 	return(error);
3026 }
3027 
3028 /*********************************************************************//**
3029 The given transaction is about to be committed; do whatever is necessary
3030 from the FTS system's POV.
3031 @return DB_SUCCESS or error code */
3032 dberr_t
fts_commit(trx_t * trx)3033 fts_commit(
3034 /*=======*/
3035 	trx_t*	trx)				/*!< in: transaction */
3036 {
3037 	const ib_rbt_node_t*	node;
3038 	dberr_t			error;
3039 	ib_rbt_t*		tables;
3040 	fts_savepoint_t*	savepoint;
3041 
3042 	savepoint = static_cast<fts_savepoint_t*>(
3043 		ib_vector_last(trx->fts_trx->savepoints));
3044 	tables = savepoint->tables;
3045 
3046 	for (node = rbt_first(tables), error = DB_SUCCESS;
3047 	     node != NULL && error == DB_SUCCESS;
3048 	     node = rbt_next(tables, node)) {
3049 
3050 		fts_trx_table_t**	ftt;
3051 
3052 		ftt = rbt_value(fts_trx_table_t*, node);
3053 
3054 		error = fts_commit_table(*ftt);
3055 	}
3056 
3057 	return(error);
3058 }
3059 
3060 /*********************************************************************//**
3061 Initialize a document. */
3062 void
fts_doc_init(fts_doc_t * doc)3063 fts_doc_init(
3064 /*=========*/
3065 	fts_doc_t*	doc)			/*!< in: doc to initialize */
3066 {
3067 	mem_heap_t*	heap = mem_heap_create(32);
3068 
3069 	memset(doc, 0, sizeof(*doc));
3070 
3071 	doc->self_heap = ib_heap_allocator_create(heap);
3072 }
3073 
3074 /*********************************************************************//**
3075 Free document. */
3076 void
fts_doc_free(fts_doc_t * doc)3077 fts_doc_free(
3078 /*=========*/
3079 	fts_doc_t*	doc)			/*!< in: document */
3080 {
3081 	mem_heap_t*	heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
3082 
3083 	if (doc->tokens) {
3084 		rbt_free(doc->tokens);
3085 	}
3086 
3087 	ut_d(memset(doc, 0, sizeof(*doc)));
3088 
3089 	mem_heap_free(heap);
3090 }
3091 
3092 /*********************************************************************//**
3093 Callback function for fetch that stores the text of an FTS document,
3094 converting each column to UTF-16.
3095 @return always FALSE */
3096 ibool
fts_query_expansion_fetch_doc(void * row,void * user_arg)3097 fts_query_expansion_fetch_doc(
3098 /*==========================*/
3099 	void*		row,			/*!< in: sel_node_t* */
3100 	void*		user_arg)		/*!< in: fts_doc_t* */
3101 {
3102 	que_node_t*	exp;
3103 	sel_node_t*	node = static_cast<sel_node_t*>(row);
3104 	fts_doc_t*	result_doc = static_cast<fts_doc_t*>(user_arg);
3105 	dfield_t*	dfield;
3106 	ulint		len;
3107 	ulint		doc_len;
3108 	fts_doc_t	doc;
3109 	CHARSET_INFO*	doc_charset = NULL;
3110 	ulint		field_no = 0;
3111 
3112 	len = 0;
3113 
3114 	fts_doc_init(&doc);
3115 	doc.found = TRUE;
3116 
3117 	exp = node->select_list;
3118 	doc_len = 0;
3119 
3120 	doc_charset  = result_doc->charset;
3121 
3122 	/* Copy each indexed column content into doc->text.f_str */
3123 	while (exp) {
3124 		dfield = que_node_get_val(exp);
3125 		len = dfield_get_len(dfield);
3126 
3127 		/* NULL column */
3128 		if (len == UNIV_SQL_NULL) {
3129 			exp = que_node_get_next(exp);
3130 			continue;
3131 		}
3132 
3133 		if (!doc_charset) {
3134 			doc_charset = fts_get_charset(dfield->type.prtype);
3135 		}
3136 
3137 		doc.charset = doc_charset;
3138 
3139 		if (dfield_is_ext(dfield)) {
3140 			/* We ignore columns that are stored externally, this
3141 			could result in too many words to search */
3142 			exp = que_node_get_next(exp);
3143 			continue;
3144 		} else {
3145 			doc.text.f_n_char = 0;
3146 
3147 			doc.text.f_str = static_cast<byte*>(
3148 				dfield_get_data(dfield));
3149 
3150 			doc.text.f_len = len;
3151 		}
3152 
3153 		if (field_no == 0) {
3154 			fts_tokenize_document(&doc, result_doc,
3155 					      result_doc->parser);
3156 		} else {
3157 			fts_tokenize_document_next(&doc, doc_len, result_doc,
3158 						   result_doc->parser);
3159 		}
3160 
3161 		exp = que_node_get_next(exp);
3162 
3163 		doc_len += (exp) ? len + 1 : len;
3164 
3165 		field_no++;
3166 	}
3167 
3168 	ut_ad(doc_charset);
3169 
3170 	if (!result_doc->charset) {
3171 		result_doc->charset = doc_charset;
3172 	}
3173 
3174 	fts_doc_free(&doc);
3175 
3176 	return(FALSE);
3177 }
3178 
3179 /*********************************************************************//**
3180 fetch and tokenize the document. */
3181 static
3182 void
fts_fetch_doc_from_rec(fts_get_doc_t * get_doc,dict_index_t * clust_index,btr_pcur_t * pcur,rec_offs * offsets,fts_doc_t * doc)3183 fts_fetch_doc_from_rec(
3184 /*===================*/
3185 	fts_get_doc_t*  get_doc,	/*!< in: FTS index's get_doc struct */
3186 	dict_index_t*	clust_index,	/*!< in: cluster index */
3187 	btr_pcur_t*	pcur,		/*!< in: cursor whose position
3188 					has been stored */
3189 	rec_offs*	offsets,	/*!< in: offsets */
3190 	fts_doc_t*	doc)		/*!< out: fts doc to hold parsed
3191 					documents */
3192 {
3193 	dict_index_t*		index;
3194 	dict_table_t*		table;
3195 	const rec_t*		clust_rec;
3196 	ulint			num_field;
3197 	const dict_field_t*	ifield;
3198 	const dict_col_t*	col;
3199 	ulint			clust_pos;
3200 	ulint			i;
3201 	ulint			doc_len = 0;
3202 	ulint			processed_doc = 0;
3203 	st_mysql_ftparser*	parser;
3204 
3205 	if (!get_doc) {
3206 		return;
3207 	}
3208 
3209 	index = get_doc->index_cache->index;
3210 	table = get_doc->index_cache->index->table;
3211 	parser = get_doc->index_cache->index->parser;
3212 
3213 	clust_rec = btr_pcur_get_rec(pcur);
3214 	ut_ad(!page_rec_is_comp(clust_rec)
3215 	      || rec_get_status(clust_rec) == REC_STATUS_ORDINARY);
3216 
3217 	num_field = dict_index_get_n_fields(index);
3218 
3219 	for (i = 0; i < num_field; i++) {
3220 		ifield = dict_index_get_nth_field(index, i);
3221 		col = dict_field_get_col(ifield);
3222 		clust_pos = dict_col_get_clust_pos(col, clust_index);
3223 
3224 		if (!get_doc->index_cache->charset) {
3225 			get_doc->index_cache->charset = fts_get_charset(
3226 				ifield->col->prtype);
3227 		}
3228 
3229 		if (rec_offs_nth_extern(offsets, clust_pos)) {
3230 			doc->text.f_str =
3231 				btr_rec_copy_externally_stored_field(
3232 					clust_rec, offsets,
3233 					dict_table_page_size(table),
3234 					clust_pos, &doc->text.f_len,
3235 					static_cast<mem_heap_t*>(
3236 						doc->self_heap->arg));
3237 		} else {
3238 			doc->text.f_str = (byte*) rec_get_nth_field(
3239 				clust_rec, offsets, clust_pos,
3240 				&doc->text.f_len);
3241 		}
3242 
3243 		doc->found = TRUE;
3244 		doc->charset = get_doc->index_cache->charset;
3245 
3246 		/* Null Field */
3247 		if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3248 			continue;
3249 		}
3250 
3251 		if (processed_doc == 0) {
3252 			fts_tokenize_document(doc, NULL, parser);
3253 		} else {
3254 			fts_tokenize_document_next(doc, doc_len, NULL, parser);
3255 		}
3256 
3257 		processed_doc++;
3258 		doc_len += doc->text.f_len + 1;
3259 	}
3260 }
3261 
3262 /** Fetch the data from tuple and tokenize the document.
3263 @param[in]     get_doc FTS index's get_doc struct
3264 @param[in]     tuple   tuple should be arranged in table schema order
3265 @param[out]    doc     fts doc to hold parsed documents. */
3266 static
3267 void
fts_fetch_doc_from_tuple(fts_get_doc_t * get_doc,const dtuple_t * tuple,fts_doc_t * doc)3268 fts_fetch_doc_from_tuple(
3269        fts_get_doc_t*  get_doc,
3270        const dtuple_t* tuple,
3271        fts_doc_t*      doc)
3272 {
3273        dict_index_t*           index;
3274        st_mysql_ftparser*      parser;
3275        ulint                   doc_len = 0;
3276        ulint                   processed_doc = 0;
3277        ulint                   num_field;
3278 
3279        if (get_doc == NULL) {
3280                return;
3281        }
3282 
3283        index = get_doc->index_cache->index;
3284        parser = get_doc->index_cache->index->parser;
3285        num_field = dict_index_get_n_fields(index);
3286 
3287        for (ulint i = 0; i < num_field; i++) {
3288                const dict_field_t*     ifield;
3289                const dict_col_t*       col;
3290                ulint                   pos;
3291 
3292                ifield = dict_index_get_nth_field(index, i);
3293                col = dict_field_get_col(ifield);
3294                pos = dict_col_get_no(col);
3295 		const dfield_t* field = dtuple_get_nth_field(tuple, pos);
3296 
3297                if (!get_doc->index_cache->charset) {
3298                        get_doc->index_cache->charset = fts_get_charset(
3299                                ifield->col->prtype);
3300                }
3301 
3302                ut_ad(!dfield_is_ext(field));
3303 
3304                doc->text.f_str = (byte*) dfield_get_data(field);
3305                doc->text.f_len = dfield_get_len(field);
3306                doc->found = TRUE;
3307                doc->charset = get_doc->index_cache->charset;
3308 
3309                /* field data is NULL. */
3310                if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3311                        continue;
3312                }
3313 
3314                if (processed_doc == 0) {
3315                        fts_tokenize_document(doc, NULL, parser);
3316                } else {
3317                        fts_tokenize_document_next(doc, doc_len, NULL, parser);
3318                }
3319 
3320                processed_doc++;
3321                doc_len += doc->text.f_len + 1;
3322        }
3323 }
3324 
3325 /** Fetch the document from tuple, tokenize the text data and
3326 insert the text data into fts auxiliary table and
3327 its cache. Moreover this tuple fields doesn't contain any information
3328 about externally stored field. This tuple contains data directly
3329 converted from mysql.
3330 @param[in]     ftt     FTS transaction table
3331 @param[in]     doc_id  doc id
3332 @param[in]     tuple   tuple from where data can be retrieved
3333                        and tuple should be arranged in table
3334                        schema order. */
3335 void
fts_add_doc_from_tuple(fts_trx_table_t * ftt,doc_id_t doc_id,const dtuple_t * tuple)3336 fts_add_doc_from_tuple(
3337        fts_trx_table_t*ftt,
3338        doc_id_t        doc_id,
3339        const dtuple_t* tuple)
3340 {
3341        mtr_t           mtr;
3342        fts_cache_t*    cache = ftt->table->fts->cache;
3343 
3344        ut_ad(cache->get_docs);
3345 
3346        if (!ftt->table->fts->added_synced) {
3347                fts_init_index(ftt->table, FALSE);
3348        }
3349 
3350        mtr_start(&mtr);
3351 
3352        ulint   num_idx = ib_vector_size(cache->get_docs);
3353 
3354        for (ulint i = 0; i < num_idx; ++i) {
3355                fts_doc_t       doc;
3356                dict_table_t*   table;
3357                fts_get_doc_t*  get_doc;
3358 
3359                get_doc = static_cast<fts_get_doc_t*>(
3360                        ib_vector_get(cache->get_docs, i));
3361                table = get_doc->index_cache->index->table;
3362 
3363                fts_doc_init(&doc);
3364                fts_fetch_doc_from_tuple(
3365                        get_doc, tuple, &doc);
3366 
3367                if (doc.found) {
3368                        mtr_commit(&mtr);
3369                        rw_lock_x_lock(&table->fts->cache->lock);
3370 
3371                        if (table->fts->cache->stopword_info.status
3372                            & STOPWORD_NOT_INIT) {
3373                                fts_load_stopword(table, NULL, NULL,
3374                                                  true, true);
3375                        }
3376 
3377                        fts_cache_add_doc(
3378                                table->fts->cache,
3379                                get_doc->index_cache,
3380                                doc_id, doc.tokens);
3381 
3382                        rw_lock_x_unlock(&table->fts->cache->lock);
3383 
3384                        if (cache->total_size > fts_max_cache_size / 5
3385                            || fts_need_sync) {
3386                                fts_sync(cache->sync, true, false);
3387                        }
3388 
3389                        mtr_start(&mtr);
3390 
3391                }
3392 
3393                fts_doc_free(&doc);
3394        }
3395 
3396        mtr_commit(&mtr);
3397 }
3398 
3399 /*********************************************************************//**
3400 This function fetches the document inserted during the committing
3401 transaction, and tokenize the inserted text data and insert into
3402 FTS auxiliary table and its cache.
3403 @return TRUE if successful */
3404 static
3405 ulint
fts_add_doc_by_id(fts_trx_table_t * ftt,doc_id_t doc_id)3406 fts_add_doc_by_id(
3407 /*==============*/
3408 	fts_trx_table_t*ftt,		/*!< in: FTS trx table */
3409 	doc_id_t	doc_id)		/*!< in: doc id */
3410 {
3411 	mtr_t		mtr;
3412 	mem_heap_t*	heap;
3413 	btr_pcur_t	pcur;
3414 	dict_table_t*	table;
3415 	dtuple_t*	tuple;
3416 	dfield_t*       dfield;
3417 	fts_get_doc_t*	get_doc;
3418 	doc_id_t        temp_doc_id;
3419 	dict_index_t*   clust_index;
3420 	dict_index_t*	fts_id_index;
3421 	ibool		is_id_cluster;
3422 	fts_cache_t*   	cache = ftt->table->fts->cache;
3423 
3424 	ut_ad(cache->get_docs);
3425 
3426 	/* If Doc ID has been supplied by the user, then the table
3427 	might not yet be sync-ed */
3428 
3429 	if (!ftt->table->fts->added_synced) {
3430 		fts_init_index(ftt->table, FALSE);
3431 	}
3432 
3433 	/* Get the first FTS index's get_doc */
3434 	get_doc = static_cast<fts_get_doc_t*>(
3435 		ib_vector_get(cache->get_docs, 0));
3436 	ut_ad(get_doc);
3437 
3438 	table = get_doc->index_cache->index->table;
3439 
3440 	heap = mem_heap_create(512);
3441 
3442 	clust_index = dict_table_get_first_index(table);
3443 	fts_id_index = table->fts_doc_id_index;
3444 
3445 	/* Check whether the index on FTS_DOC_ID is cluster index */
3446 	is_id_cluster = (clust_index == fts_id_index);
3447 
3448 	mtr_start(&mtr);
3449 	btr_pcur_init(&pcur);
3450 
3451 	/* Search based on Doc ID. Here, we'll need to consider the case
3452 	when there is no primary index on Doc ID */
3453 	tuple = dtuple_create(heap, 1);
3454 	dfield = dtuple_get_nth_field(tuple, 0);
3455 	dfield->type.mtype = DATA_INT;
3456 	dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3457 
3458 	mach_write_to_8((byte*) &temp_doc_id, doc_id);
3459 	dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3460 
3461 	btr_pcur_open_with_no_init(
3462 		fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3463 		&pcur, 0, &mtr);
3464 
3465 	/* If we have a match, add the data to doc structure */
3466 	if (btr_pcur_get_low_match(&pcur) == 1) {
3467 		const rec_t*	rec;
3468 		btr_pcur_t*	doc_pcur;
3469 		const rec_t*	clust_rec;
3470 		btr_pcur_t	clust_pcur;
3471 		rec_offs*	offsets = NULL;
3472 		ulint		num_idx = ib_vector_size(cache->get_docs);
3473 
3474 		rec = btr_pcur_get_rec(&pcur);
3475 
3476 		/* Doc could be deleted */
3477 		if (page_rec_is_infimum(rec)
3478 		    || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3479 
3480 			goto func_exit;
3481 		}
3482 
3483 		if (is_id_cluster) {
3484 			clust_rec = rec;
3485 			doc_pcur = &pcur;
3486 		} else {
3487 			dtuple_t*	clust_ref;
3488 			ulint		n_fields;
3489 
3490 			btr_pcur_init(&clust_pcur);
3491 			n_fields = dict_index_get_n_unique(clust_index);
3492 
3493 			clust_ref = dtuple_create(heap, n_fields);
3494 			dict_index_copy_types(clust_ref, clust_index, n_fields);
3495 
3496 			row_build_row_ref_in_tuple(
3497 				clust_ref, rec, fts_id_index, NULL);
3498 
3499 			btr_pcur_open_with_no_init(
3500 				clust_index, clust_ref, PAGE_CUR_LE,
3501 				BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3502 
3503 			doc_pcur = &clust_pcur;
3504 			clust_rec = btr_pcur_get_rec(&clust_pcur);
3505 
3506 		}
3507 
3508 		offsets = rec_get_offsets(clust_rec, clust_index, NULL,
3509 					  clust_index->n_core_fields,
3510 					  ULINT_UNDEFINED, &heap);
3511 
3512 		for (ulint i = 0; i < num_idx; ++i) {
3513 			fts_doc_t       doc;
3514 			dict_table_t*   table;
3515 			fts_get_doc_t*  get_doc;
3516 
3517 			get_doc = static_cast<fts_get_doc_t*>(
3518 				ib_vector_get(cache->get_docs, i));
3519 
3520 			table = get_doc->index_cache->index->table;
3521 
3522 			fts_doc_init(&doc);
3523 
3524 			fts_fetch_doc_from_rec(
3525 				get_doc, clust_index, doc_pcur, offsets, &doc);
3526 
3527 			if (doc.found) {
3528 				ibool	success MY_ATTRIBUTE((unused));
3529 
3530 				btr_pcur_store_position(doc_pcur, &mtr);
3531 				mtr_commit(&mtr);
3532 
3533 				rw_lock_x_lock(&table->fts->cache->lock);
3534 
3535 				if (table->fts->cache->stopword_info.status
3536 				    & STOPWORD_NOT_INIT) {
3537 					fts_load_stopword(table, NULL,
3538 							  NULL, true, true);
3539 				}
3540 
3541 				fts_cache_add_doc(
3542 					table->fts->cache,
3543 					get_doc->index_cache,
3544 					doc_id, doc.tokens);
3545 
3546 				bool	need_sync = !cache->sync->in_progress
3547 					&& (fts_need_sync
3548 					    || (cache->total_size
3549 						- cache->total_size_at_sync)
3550 					    > fts_max_cache_size / 10);
3551 				if (need_sync) {
3552 					cache->total_size_at_sync =
3553 						cache->total_size;
3554 				}
3555 
3556 				rw_lock_x_unlock(&table->fts->cache->lock);
3557 
3558 				DBUG_EXECUTE_IF(
3559 					"fts_instrument_sync",
3560 					fts_optimize_request_sync_table(table);
3561 					os_event_wait(cache->sync->event);
3562 				);
3563 
3564 				DBUG_EXECUTE_IF(
3565 					"fts_instrument_sync_debug",
3566 					fts_sync(cache->sync, true, true);
3567 				);
3568 
3569 				DEBUG_SYNC_C("fts_instrument_sync_request");
3570 				DBUG_EXECUTE_IF(
3571 					"fts_instrument_sync_request",
3572 					fts_optimize_request_sync_table(table);
3573 				);
3574 
3575 				if (need_sync) {
3576 					fts_optimize_request_sync_table(table);
3577 				}
3578 
3579 				mtr_start(&mtr);
3580 
3581 				if (i < num_idx - 1) {
3582 
3583 					success = btr_pcur_restore_position(
3584 						BTR_SEARCH_LEAF, doc_pcur,
3585 						&mtr);
3586 
3587 					ut_ad(success);
3588 				}
3589 			}
3590 
3591 			fts_doc_free(&doc);
3592 		}
3593 
3594 		if (!is_id_cluster) {
3595 			btr_pcur_close(doc_pcur);
3596 		}
3597 	}
3598 func_exit:
3599 	mtr_commit(&mtr);
3600 
3601 	btr_pcur_close(&pcur);
3602 
3603 	mem_heap_free(heap);
3604 	return(TRUE);
3605 }
3606 
3607 
3608 /*********************************************************************//**
3609 Callback function to read a single ulint column.
3610 return always returns TRUE */
3611 static
3612 ibool
fts_read_ulint(void * row,void * user_arg)3613 fts_read_ulint(
3614 /*===========*/
3615 	void*		row,		/*!< in: sel_node_t* */
3616 	void*		user_arg)	/*!< in: pointer to ulint */
3617 {
3618 	sel_node_t*	sel_node = static_cast<sel_node_t*>(row);
3619 	ulint*		value = static_cast<ulint*>(user_arg);
3620 	que_node_t*	exp = sel_node->select_list;
3621 	dfield_t*	dfield = que_node_get_val(exp);
3622 	void*		data = dfield_get_data(dfield);
3623 
3624 	*value = static_cast<ulint>(mach_read_from_4(
3625 		static_cast<const byte*>(data)));
3626 
3627 	return(TRUE);
3628 }
3629 
3630 /*********************************************************************//**
3631 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3632 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
3633 doc_id_t
fts_get_max_doc_id(dict_table_t * table)3634 fts_get_max_doc_id(
3635 /*===============*/
3636 	dict_table_t*	table)		/*!< in: user table */
3637 {
3638 	dict_index_t*	index;
3639 	dict_field_t*	dfield MY_ATTRIBUTE((unused)) = NULL;
3640 	doc_id_t	doc_id = 0;
3641 	mtr_t		mtr;
3642 	btr_pcur_t	pcur;
3643 
3644 	index = table->fts_doc_id_index;
3645 
3646 	if (!index) {
3647 		return(0);
3648 	}
3649 
3650 	ut_ad(!index->is_instant());
3651 
3652 	dfield = dict_index_get_nth_field(index, 0);
3653 
3654 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3655 	ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3656 #endif
3657 
3658 	mtr_start(&mtr);
3659 
3660 	/* fetch the largest indexes value */
3661 	btr_pcur_open_at_index_side(
3662 		false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
3663 
3664 	if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3665 		const rec_t*    rec = NULL;
3666 		rec_offs	offsets_[REC_OFFS_NORMAL_SIZE];
3667 		rec_offs*	offsets = offsets_;
3668 		mem_heap_t*	heap = NULL;
3669 		ulint		len;
3670 		const void*	data;
3671 
3672 		rec_offs_init(offsets_);
3673 
3674 		do {
3675 			rec = btr_pcur_get_rec(&pcur);
3676 
3677 			if (page_rec_is_user_rec(rec)) {
3678 				break;
3679 			}
3680 		} while (btr_pcur_move_to_prev(&pcur, &mtr));
3681 
3682 		if (!rec) {
3683 			goto func_exit;
3684 		}
3685 
3686 		ut_ad(!rec_is_metadata(rec, index));
3687 		offsets = rec_get_offsets(
3688 			rec, index, offsets, index->n_core_fields,
3689 			ULINT_UNDEFINED, &heap);
3690 
3691 		data = rec_get_nth_field(rec, offsets, 0, &len);
3692 
3693 		doc_id = static_cast<doc_id_t>(fts_read_doc_id(
3694 			static_cast<const byte*>(data)));
3695 	}
3696 
3697 func_exit:
3698 	btr_pcur_close(&pcur);
3699 	mtr_commit(&mtr);
3700 	return(doc_id);
3701 }
3702 
3703 /*********************************************************************//**
3704 Fetch document with the given document id.
3705 @return DB_SUCCESS if OK else error */
3706 dberr_t
fts_doc_fetch_by_doc_id(fts_get_doc_t * get_doc,doc_id_t doc_id,dict_index_t * index_to_use,ulint option,fts_sql_callback callback,void * arg)3707 fts_doc_fetch_by_doc_id(
3708 /*====================*/
3709 	fts_get_doc_t*	get_doc,	/*!< in: state */
3710 	doc_id_t	doc_id,		/*!< in: id of document to
3711 					fetch */
3712 	dict_index_t*	index_to_use,	/*!< in: caller supplied FTS index,
3713 					or NULL */
3714 	ulint		option,		/*!< in: search option, if it is
3715 					greater than doc_id or equal */
3716 	fts_sql_callback
3717 			callback,	/*!< in: callback to read */
3718 	void*		arg)		/*!< in: callback arg */
3719 {
3720 	pars_info_t*	info;
3721 	dberr_t		error;
3722 	const char*	select_str;
3723 	doc_id_t	write_doc_id;
3724 	dict_index_t*	index;
3725 	trx_t*		trx = trx_create();
3726 	que_t*          graph;
3727 
3728 	trx->op_info = "fetching indexed FTS document";
3729 
3730 	/* The FTS index can be supplied by caller directly with
3731 	"index_to_use", otherwise, get it from "get_doc" */
3732 	index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3733 
3734 	if (get_doc && get_doc->get_document_graph) {
3735 		info = get_doc->get_document_graph->info;
3736 	} else {
3737 		info = pars_info_create();
3738 	}
3739 
3740 	/* Convert to "storage" byte order. */
3741 	fts_write_doc_id((byte*) &write_doc_id, doc_id);
3742 	fts_bind_doc_id(info, "doc_id", &write_doc_id);
3743 	pars_info_bind_function(info, "my_func", callback, arg);
3744 
3745 	select_str = fts_get_select_columns_str(index, info, info->heap);
3746 	pars_info_bind_id(info, "table_name", index->table->name.m_name);
3747 
3748 	if (!get_doc || !get_doc->get_document_graph) {
3749 		if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3750 			graph = fts_parse_sql(
3751 				NULL,
3752 				info,
3753 				mem_heap_printf(info->heap,
3754 					"DECLARE FUNCTION my_func;\n"
3755 					"DECLARE CURSOR c IS"
3756 					" SELECT %s FROM $table_name"
3757 					" WHERE %s = :doc_id;\n"
3758 					"BEGIN\n"
3759 					""
3760 					"OPEN c;\n"
3761 					"WHILE 1 = 1 LOOP\n"
3762 					"  FETCH c INTO my_func();\n"
3763 					"  IF c %% NOTFOUND THEN\n"
3764 					"    EXIT;\n"
3765 					"  END IF;\n"
3766 					"END LOOP;\n"
3767 					"CLOSE c;",
3768 					select_str, FTS_DOC_ID_COL_NAME));
3769 		} else {
3770 			ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3771 
3772 			/* This is used for crash recovery of table with
3773 			hidden DOC ID or FTS indexes. We will scan the table
3774 			to re-processing user table rows whose DOC ID or
3775 			FTS indexed documents have not been sync-ed to disc
3776 			during recent crash.
3777 			In the case that all fulltext indexes are dropped
3778 			for a table, we will keep the "hidden" FTS_DOC_ID
3779 			column, and this scan is to retreive the largest
3780 			DOC ID being used in the table to determine the
3781 			appropriate next DOC ID.
3782 			In the case of there exists fulltext index(es), this
3783 			operation will re-tokenize any docs that have not
3784 			been sync-ed to the disk, and re-prime the FTS
3785 			cached */
3786 			graph = fts_parse_sql(
3787 				NULL,
3788 				info,
3789 				mem_heap_printf(info->heap,
3790 					"DECLARE FUNCTION my_func;\n"
3791 					"DECLARE CURSOR c IS"
3792 					" SELECT %s, %s FROM $table_name"
3793 					" WHERE %s > :doc_id;\n"
3794 					"BEGIN\n"
3795 					""
3796 					"OPEN c;\n"
3797 					"WHILE 1 = 1 LOOP\n"
3798 					"  FETCH c INTO my_func();\n"
3799 					"  IF c %% NOTFOUND THEN\n"
3800 					"    EXIT;\n"
3801 					"  END IF;\n"
3802 					"END LOOP;\n"
3803 					"CLOSE c;",
3804 					FTS_DOC_ID_COL_NAME,
3805 					select_str, FTS_DOC_ID_COL_NAME));
3806 		}
3807 		if (get_doc) {
3808 			get_doc->get_document_graph = graph;
3809 		}
3810 	} else {
3811 		graph = get_doc->get_document_graph;
3812 	}
3813 
3814 	error = fts_eval_sql(trx, graph);
3815 	fts_sql_commit(trx);
3816 	trx->free();
3817 
3818 	if (!get_doc) {
3819 		fts_que_graph_free(graph);
3820 	}
3821 
3822 	return(error);
3823 }
3824 
3825 /*********************************************************************//**
3826 Write out a single word's data as new entry/entries in the INDEX table.
3827 @return DB_SUCCESS if all OK. */
3828 dberr_t
fts_write_node(trx_t * trx,que_t ** graph,fts_table_t * fts_table,fts_string_t * word,fts_node_t * node)3829 fts_write_node(
3830 /*===========*/
3831 	trx_t*		trx,			/*!< in: transaction */
3832 	que_t**		graph,			/*!< in: query graph */
3833 	fts_table_t*	fts_table,		/*!< in: aux table */
3834 	fts_string_t*	word,			/*!< in: word in UTF-8 */
3835 	fts_node_t*	node)			/*!< in: node columns */
3836 {
3837 	pars_info_t*	info;
3838 	dberr_t		error;
3839 	ib_uint32_t	doc_count;
3840 	time_t		start_time;
3841 	doc_id_t	last_doc_id;
3842 	doc_id_t	first_doc_id;
3843 	char		table_name[MAX_FULL_NAME_LEN];
3844 
3845 	ut_a(node->ilist != NULL);
3846 
3847 	if (*graph) {
3848 		info = (*graph)->info;
3849 	} else {
3850 		info = pars_info_create();
3851 
3852 		fts_get_table_name(fts_table, table_name);
3853 		pars_info_bind_id(info, "index_table_name", table_name);
3854 	}
3855 
3856 	pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3857 
3858 	/* Convert to "storage" byte order. */
3859 	fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
3860 	fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3861 
3862 	/* Convert to "storage" byte order. */
3863 	fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
3864 	fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
3865 
3866 	ut_a(node->last_doc_id >= node->first_doc_id);
3867 
3868 	/* Convert to "storage" byte order. */
3869 	mach_write_to_4((byte*) &doc_count, node->doc_count);
3870 	pars_info_bind_int4_literal(
3871 		info, "doc_count", (const ib_uint32_t*) &doc_count);
3872 
3873 	/* Set copy_name to FALSE since it's a static. */
3874 	pars_info_bind_literal(
3875 		info, "ilist", node->ilist, node->ilist_size,
3876 		DATA_BLOB, DATA_BINARY_TYPE);
3877 
3878 	if (!*graph) {
3879 
3880 		*graph = fts_parse_sql(
3881 			fts_table,
3882 			info,
3883 			"BEGIN\n"
3884 			"INSERT INTO $index_table_name VALUES"
3885 			" (:token, :first_doc_id,"
3886 			"  :last_doc_id, :doc_count, :ilist);");
3887 	}
3888 
3889 	start_time = time(NULL);
3890 	error = fts_eval_sql(trx, *graph);
3891 	elapsed_time += time(NULL) - start_time;
3892 	++n_nodes;
3893 
3894 	return(error);
3895 }
3896 
3897 /*********************************************************************//**
3898 Add rows to the DELETED_CACHE table.
3899 @return DB_SUCCESS if all went well else error code*/
3900 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3901 dberr_t
fts_sync_add_deleted_cache(fts_sync_t * sync,ib_vector_t * doc_ids)3902 fts_sync_add_deleted_cache(
3903 /*=======================*/
3904 	fts_sync_t*	sync,			/*!< in: sync state */
3905 	ib_vector_t*	doc_ids)		/*!< in: doc ids to add */
3906 {
3907 	ulint		i;
3908 	pars_info_t*	info;
3909 	que_t*		graph;
3910 	fts_table_t	fts_table;
3911 	char		table_name[MAX_FULL_NAME_LEN];
3912 	doc_id_t	dummy = 0;
3913 	dberr_t		error = DB_SUCCESS;
3914 	ulint		n_elems = ib_vector_size(doc_ids);
3915 
3916 	ut_a(ib_vector_size(doc_ids) > 0);
3917 
3918 	ib_vector_sort(doc_ids, fts_doc_id_cmp);
3919 
3920 	info = pars_info_create();
3921 
3922 	fts_bind_doc_id(info, "doc_id", &dummy);
3923 
3924 	FTS_INIT_FTS_TABLE(
3925 		&fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
3926 
3927 	fts_get_table_name(&fts_table, table_name);
3928 	pars_info_bind_id(info, "table_name", table_name);
3929 
3930 	graph = fts_parse_sql(
3931 		&fts_table,
3932 		info,
3933 		"BEGIN INSERT INTO $table_name VALUES (:doc_id);");
3934 
3935 	for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
3936 		doc_id_t*	update;
3937 		doc_id_t	write_doc_id;
3938 
3939 		update = static_cast<doc_id_t*>(ib_vector_get(doc_ids, i));
3940 
3941 		/* Convert to "storage" byte order. */
3942 		fts_write_doc_id((byte*) &write_doc_id, *update);
3943 		fts_bind_doc_id(info, "doc_id", &write_doc_id);
3944 
3945 		error = fts_eval_sql(sync->trx, graph);
3946 	}
3947 
3948 	fts_que_graph_free(graph);
3949 
3950 	return(error);
3951 }
3952 
3953 /** Write the words and ilist to disk.
3954 @param[in,out]	trx		transaction
3955 @param[in]	index_cache	index cache
3956 @param[in]	unlock_cache	whether unlock cache when write node
3957 @return DB_SUCCESS if all went well else error code */
3958 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3959 dberr_t
fts_sync_write_words(trx_t * trx,fts_index_cache_t * index_cache,bool unlock_cache)3960 fts_sync_write_words(
3961 	trx_t*			trx,
3962 	fts_index_cache_t*	index_cache,
3963 	bool			unlock_cache)
3964 {
3965 	fts_table_t	fts_table;
3966 	ulint		n_nodes = 0;
3967 	ulint		n_words = 0;
3968 	const ib_rbt_node_t* rbt_node;
3969 	dberr_t		error = DB_SUCCESS;
3970 	ibool		print_error = FALSE;
3971 	dict_table_t*	table = index_cache->index->table;
3972 
3973 	FTS_INIT_INDEX_TABLE(
3974 		&fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
3975 
3976 	n_words = rbt_size(index_cache->words);
3977 
3978 	/* We iterate over the entire tree, even if there is an error,
3979 	since we want to free the memory used during caching. */
3980 	for (rbt_node = rbt_first(index_cache->words);
3981 	     rbt_node;
3982 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
3983 
3984 		ulint			i;
3985 		ulint			selected;
3986 		fts_tokenizer_word_t*	word;
3987 
3988 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
3989 
3990 		DBUG_EXECUTE_IF("fts_instrument_write_words_before_select_index",
3991 				os_thread_sleep(300000););
3992 
3993 		selected = fts_select_index(
3994 			index_cache->charset, word->text.f_str,
3995 			word->text.f_len);
3996 
3997 		fts_table.suffix = fts_get_suffix(selected);
3998 
3999 		/* We iterate over all the nodes even if there was an error */
4000 		for (i = 0; i < ib_vector_size(word->nodes); ++i) {
4001 
4002 			fts_node_t* fts_node = static_cast<fts_node_t*>(
4003 				ib_vector_get(word->nodes, i));
4004 
4005 			if (fts_node->synced) {
4006 				continue;
4007 			} else {
4008 				fts_node->synced = true;
4009 			}
4010 
4011 			/*FIXME: we need to handle the error properly. */
4012 			if (error == DB_SUCCESS) {
4013 				if (unlock_cache) {
4014 					rw_lock_x_unlock(
4015 						&table->fts->cache->lock);
4016 				}
4017 
4018 				error = fts_write_node(
4019 					trx,
4020 					&index_cache->ins_graph[selected],
4021 					&fts_table, &word->text, fts_node);
4022 
4023 				DEBUG_SYNC_C("fts_write_node");
4024 				DBUG_EXECUTE_IF("fts_write_node_crash",
4025 					DBUG_SUICIDE(););
4026 
4027 				DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
4028 					os_thread_sleep(1000000);
4029 				);
4030 
4031 				if (unlock_cache) {
4032 					rw_lock_x_lock(
4033 						&table->fts->cache->lock);
4034 				}
4035 			}
4036 		}
4037 
4038 		n_nodes += ib_vector_size(word->nodes);
4039 
4040 		if (UNIV_UNLIKELY(error != DB_SUCCESS) && !print_error) {
4041 			ib::error() << "(" << error << ") writing"
4042 				" word node to FTS auxiliary index table "
4043 				<< table->name;
4044 			print_error = TRUE;
4045 		}
4046 	}
4047 
4048 	if (UNIV_UNLIKELY(fts_enable_diag_print)) {
4049 		printf("Avg number of nodes: %lf\n",
4050 		       (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
4051 	}
4052 
4053 	return(error);
4054 }
4055 
4056 /*********************************************************************//**
4057 Begin Sync, create transaction, acquire locks, etc. */
4058 static
4059 void
fts_sync_begin(fts_sync_t * sync)4060 fts_sync_begin(
4061 /*===========*/
4062 	fts_sync_t*	sync)			/*!< in: sync state */
4063 {
4064 	fts_cache_t*	cache = sync->table->fts->cache;
4065 
4066 	n_nodes = 0;
4067 	elapsed_time = 0;
4068 
4069 	sync->start_time = time(NULL);
4070 
4071 	sync->trx = trx_create();
4072 	trx_start_internal(sync->trx);
4073 
4074 	if (UNIV_UNLIKELY(fts_enable_diag_print)) {
4075 		ib::info() << "FTS SYNC for table " << sync->table->name
4076 			<< ", deleted count: "
4077 			<< ib_vector_size(cache->deleted_doc_ids)
4078 			<< " size: " << cache->total_size << " bytes";
4079 	}
4080 }
4081 
4082 /*********************************************************************//**
4083 Run SYNC on the table, i.e., write out data from the index specific
4084 cache to the FTS aux INDEX table and FTS aux doc id stats table.
4085 @return DB_SUCCESS if all OK */
4086 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4087 dberr_t
fts_sync_index(fts_sync_t * sync,fts_index_cache_t * index_cache)4088 fts_sync_index(
4089 /*===========*/
4090 	fts_sync_t*		sync,		/*!< in: sync state */
4091 	fts_index_cache_t*	index_cache)	/*!< in: index cache */
4092 {
4093 	trx_t*		trx = sync->trx;
4094 
4095 	trx->op_info = "doing SYNC index";
4096 
4097 	if (UNIV_UNLIKELY(fts_enable_diag_print)) {
4098 		ib::info() << "SYNC words: " << rbt_size(index_cache->words);
4099 	}
4100 
4101 	ut_ad(rbt_validate(index_cache->words));
4102 
4103 	return(fts_sync_write_words(trx, index_cache, sync->unlock_cache));
4104 }
4105 
4106 /** Check if index cache has been synced completely
4107 @param[in,out]	index_cache	index cache
4108 @return true if index is synced, otherwise false. */
4109 static
4110 bool
fts_sync_index_check(fts_index_cache_t * index_cache)4111 fts_sync_index_check(
4112 	fts_index_cache_t*	index_cache)
4113 {
4114 	const ib_rbt_node_t*	rbt_node;
4115 
4116 	for (rbt_node = rbt_first(index_cache->words);
4117 	     rbt_node != NULL;
4118 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
4119 
4120 		fts_tokenizer_word_t*	word;
4121 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
4122 
4123 		fts_node_t*	fts_node;
4124 		fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4125 
4126 		if (!fts_node->synced) {
4127 			return(false);
4128 		}
4129 	}
4130 
4131 	return(true);
4132 }
4133 
4134 /** Reset synced flag in index cache when rollback
4135 @param[in,out]	index_cache	index cache */
4136 static
4137 void
fts_sync_index_reset(fts_index_cache_t * index_cache)4138 fts_sync_index_reset(
4139 	fts_index_cache_t*	index_cache)
4140 {
4141 	const ib_rbt_node_t*	rbt_node;
4142 
4143 	for (rbt_node = rbt_first(index_cache->words);
4144 	     rbt_node != NULL;
4145 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
4146 
4147 		fts_tokenizer_word_t*	word;
4148 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
4149 
4150 		fts_node_t*	fts_node;
4151 		fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4152 
4153 		fts_node->synced = false;
4154 	}
4155 }
4156 
4157 /** Commit the SYNC, change state of processed doc ids etc.
4158 @param[in,out]	sync	sync state
4159 @return DB_SUCCESS if all OK */
4160 static  MY_ATTRIBUTE((nonnull, warn_unused_result))
4161 dberr_t
fts_sync_commit(fts_sync_t * sync)4162 fts_sync_commit(
4163 	fts_sync_t*	sync)
4164 {
4165 	dberr_t		error;
4166 	trx_t*		trx = sync->trx;
4167 	fts_cache_t*	cache = sync->table->fts->cache;
4168 	doc_id_t	last_doc_id;
4169 
4170 	trx->op_info = "doing SYNC commit";
4171 
4172 	/* After each Sync, update the CONFIG table about the max doc id
4173 	we just sync-ed to index table */
4174 	error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4175 					&last_doc_id);
4176 
4177 	/* Get the list of deleted documents that are either in the
4178 	cache or were headed there but were deleted before the add
4179 	thread got to them. */
4180 
4181 	if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4182 
4183 		error = fts_sync_add_deleted_cache(
4184 			sync, cache->deleted_doc_ids);
4185 	}
4186 
4187 	/* We need to do this within the deleted lock since fts_delete() can
4188 	attempt to add a deleted doc id to the cache deleted id array. */
4189 	fts_cache_clear(cache);
4190 	DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4191 	fts_cache_init(cache);
4192 	rw_lock_x_unlock(&cache->lock);
4193 
4194 	if (UNIV_LIKELY(error == DB_SUCCESS)) {
4195 		fts_sql_commit(trx);
4196 	} else {
4197 		fts_sql_rollback(trx);
4198 		ib::error() << "(" << error << ") during SYNC of "
4199 			"table " << sync->table->name;
4200 	}
4201 
4202 	if (UNIV_UNLIKELY(fts_enable_diag_print) && elapsed_time) {
4203 		ib::info() << "SYNC for table " << sync->table->name
4204 			<< ": SYNC time: "
4205 			<< (time(NULL) - sync->start_time)
4206 			<< " secs: elapsed "
4207 			<< (double) n_nodes / elapsed_time
4208 			<< " ins/sec";
4209 	}
4210 
4211 	/* Avoid assertion in trx_t::free(). */
4212 	trx->dict_operation_lock_mode = 0;
4213 	trx->free();
4214 
4215 	return(error);
4216 }
4217 
4218 /** Rollback a sync operation
4219 @param[in,out]	sync	sync state */
4220 static
4221 void
fts_sync_rollback(fts_sync_t * sync)4222 fts_sync_rollback(
4223 	fts_sync_t*	sync)
4224 {
4225 	trx_t*		trx = sync->trx;
4226 	fts_cache_t*	cache = sync->table->fts->cache;
4227 
4228 	for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4229 		ulint			j;
4230 		fts_index_cache_t*	index_cache;
4231 
4232 		index_cache = static_cast<fts_index_cache_t*>(
4233 			ib_vector_get(cache->indexes, i));
4234 
4235 		/* Reset synced flag so nodes will not be skipped
4236 		in the next sync, see fts_sync_write_words(). */
4237 		fts_sync_index_reset(index_cache);
4238 
4239 		for (j = 0; fts_index_selector[j].value; ++j) {
4240 
4241 			if (index_cache->ins_graph[j] != NULL) {
4242 
4243 				fts_que_graph_free_check_lock(
4244 					NULL, index_cache,
4245 					index_cache->ins_graph[j]);
4246 
4247 				index_cache->ins_graph[j] = NULL;
4248 			}
4249 
4250 			if (index_cache->sel_graph[j] != NULL) {
4251 
4252 				fts_que_graph_free_check_lock(
4253 					NULL, index_cache,
4254 					index_cache->sel_graph[j]);
4255 
4256 				index_cache->sel_graph[j] = NULL;
4257 			}
4258 		}
4259 	}
4260 
4261 	rw_lock_x_unlock(&cache->lock);
4262 
4263 	fts_sql_rollback(trx);
4264 
4265 	/* Avoid assertion in trx_t::free(). */
4266 	trx->dict_operation_lock_mode = 0;
4267 	trx->free();
4268 }
4269 
4270 /** Run SYNC on the table, i.e., write out data from the cache to the
4271 FTS auxiliary INDEX table and clear the cache at the end.
4272 @param[in,out]	sync		sync state
4273 @param[in]	unlock_cache	whether unlock cache lock when write node
4274 @param[in]	wait		whether wait when a sync is in progress
4275 @return DB_SUCCESS if all OK */
4276 static
4277 dberr_t
fts_sync(fts_sync_t * sync,bool unlock_cache,bool wait)4278 fts_sync(
4279 	fts_sync_t*	sync,
4280 	bool		unlock_cache,
4281 	bool		wait)
4282 {
4283 	if (srv_read_only_mode) {
4284 		return DB_READ_ONLY;
4285 	}
4286 
4287 	ulint		i;
4288 	dberr_t		error = DB_SUCCESS;
4289 	fts_cache_t*	cache = sync->table->fts->cache;
4290 
4291 	rw_lock_x_lock(&cache->lock);
4292 
4293 	/* Check if cache is being synced.
4294 	Note: we release cache lock in fts_sync_write_words() to
4295 	avoid long wait for the lock by other threads. */
4296 	while (sync->in_progress) {
4297 		rw_lock_x_unlock(&cache->lock);
4298 
4299 		if (wait) {
4300 			os_event_wait(sync->event);
4301 		} else {
4302 			return(DB_SUCCESS);
4303 		}
4304 
4305 		rw_lock_x_lock(&cache->lock);
4306 	}
4307 
4308 	sync->unlock_cache = unlock_cache;
4309 	sync->in_progress = true;
4310 
4311 	DEBUG_SYNC_C("fts_sync_begin");
4312 	fts_sync_begin(sync);
4313 
4314 begin_sync:
4315 	if (cache->total_size > fts_max_cache_size) {
4316 		/* Avoid the case: sync never finish when
4317 		insert/update keeps comming. */
4318 		ut_ad(sync->unlock_cache);
4319 		sync->unlock_cache = false;
4320 	}
4321 
4322 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4323 		fts_index_cache_t*	index_cache;
4324 
4325 		index_cache = static_cast<fts_index_cache_t*>(
4326 			ib_vector_get(cache->indexes, i));
4327 
4328 		if (index_cache->index->to_be_dropped
4329 		   || index_cache->index->table->to_be_dropped) {
4330 			continue;
4331 		}
4332 
4333 		DBUG_EXECUTE_IF("fts_instrument_sync_before_syncing",
4334 				os_thread_sleep(300000););
4335 		index_cache->index->index_fts_syncing = true;
4336 
4337 		error = fts_sync_index(sync, index_cache);
4338 
4339 		if (error != DB_SUCCESS) {
4340 			goto end_sync;
4341 		}
4342 	}
4343 
4344 	DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
4345 			sync->interrupted = true;
4346 			error = DB_INTERRUPTED;
4347 			goto end_sync;
4348 	);
4349 
4350 	/* Make sure all the caches are synced. */
4351 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4352 		fts_index_cache_t*	index_cache;
4353 
4354 		index_cache = static_cast<fts_index_cache_t*>(
4355 			ib_vector_get(cache->indexes, i));
4356 
4357 		if (index_cache->index->to_be_dropped
4358 		    || index_cache->index->table->to_be_dropped
4359 		    || fts_sync_index_check(index_cache)) {
4360 			continue;
4361 		}
4362 
4363 		goto begin_sync;
4364 	}
4365 
4366 end_sync:
4367 	if (error == DB_SUCCESS && !sync->interrupted) {
4368 		error = fts_sync_commit(sync);
4369 	} else {
4370 		fts_sync_rollback(sync);
4371 	}
4372 
4373 	rw_lock_x_lock(&cache->lock);
4374 	/* Clear fts syncing flags of any indexes in case sync is
4375 	interrupted */
4376 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4377 		static_cast<fts_index_cache_t*>(
4378 			ib_vector_get(cache->indexes, i))
4379 			->index->index_fts_syncing = false;
4380 	}
4381 
4382 	sync->interrupted = false;
4383 	sync->in_progress = false;
4384 	os_event_set(sync->event);
4385 	rw_lock_x_unlock(&cache->lock);
4386 
4387 	/* We need to check whether an optimize is required, for that
4388 	we make copies of the two variables that control the trigger. These
4389 	variables can change behind our back and we don't want to hold the
4390 	lock for longer than is needed. */
4391 	mutex_enter(&cache->deleted_lock);
4392 
4393 	cache->added = 0;
4394 	cache->deleted = 0;
4395 
4396 	mutex_exit(&cache->deleted_lock);
4397 
4398 	return(error);
4399 }
4400 
4401 /** Run SYNC on the table, i.e., write out data from the cache to the
4402 FTS auxiliary INDEX table and clear the cache at the end.
4403 @param[in,out]	table		fts table
4404 @param[in]	wait		whether wait for existing sync to finish
4405 @return DB_SUCCESS on success, error code on failure. */
fts_sync_table(dict_table_t * table,bool wait)4406 dberr_t fts_sync_table(dict_table_t* table, bool wait)
4407 {
4408 	dberr_t	err = DB_SUCCESS;
4409 
4410 	ut_ad(table->fts);
4411 
4412 	if (table->space && table->fts->cache
4413 	    && !dict_table_is_corrupted(table)) {
4414 		err = fts_sync(table->fts->cache->sync, !wait, wait);
4415 	}
4416 
4417 	return(err);
4418 }
4419 
4420 /** Check if a fts token is a stopword or less than fts_min_token_size
4421 or greater than fts_max_token_size.
4422 @param[in]	token		token string
4423 @param[in]	stopwords	stopwords rb tree
4424 @param[in]	cs		token charset
4425 @retval	true	if it is not stopword and length in range
4426 @retval	false	if it is stopword or lenght not in range */
4427 bool
fts_check_token(const fts_string_t * token,const ib_rbt_t * stopwords,const CHARSET_INFO * cs)4428 fts_check_token(
4429 	const fts_string_t*		token,
4430 	const ib_rbt_t*			stopwords,
4431 	const CHARSET_INFO*		cs)
4432 {
4433 	ut_ad(cs != NULL || stopwords == NULL);
4434 
4435 	ib_rbt_bound_t  parent;
4436 
4437 	return(token->f_n_char >= fts_min_token_size
4438 	       && token->f_n_char <= fts_max_token_size
4439 	       && (stopwords == NULL
4440 		   || rbt_search(stopwords, &parent, token) != 0));
4441 }
4442 
4443 /** Add the token and its start position to the token's list of positions.
4444 @param[in,out]	result_doc	result doc rb tree
4445 @param[in]	str		token string
4446 @param[in]	position	token position */
4447 static
4448 void
fts_add_token(fts_doc_t * result_doc,fts_string_t str,ulint position)4449 fts_add_token(
4450 	fts_doc_t*	result_doc,
4451 	fts_string_t	str,
4452 	ulint		position)
4453 {
4454 	/* Ignore string whose character number is less than
4455 	"fts_min_token_size" or more than "fts_max_token_size" */
4456 
4457 	if (fts_check_token(&str, NULL, result_doc->charset)) {
4458 
4459 		mem_heap_t*	heap;
4460 		fts_string_t	t_str;
4461 		fts_token_t*	token;
4462 		ib_rbt_bound_t	parent;
4463 		ulint		newlen;
4464 
4465 		heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
4466 
4467 		t_str.f_n_char = str.f_n_char;
4468 
4469 		t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
4470 
4471 		t_str.f_str = static_cast<byte*>(
4472 			mem_heap_alloc(heap, t_str.f_len));
4473 
4474 		/* For binary collations, a case sensitive search is
4475 		performed. Hence don't convert to lower case. */
4476 		if (my_binary_compare(result_doc->charset)) {
4477 			memcpy(t_str.f_str, str.f_str, str.f_len);
4478 			t_str.f_str[str.f_len]= 0;
4479 			newlen= str.f_len;
4480 		} else {
4481 			newlen = innobase_fts_casedn_str(
4482 				result_doc->charset, (char*) str.f_str, str.f_len,
4483 				(char*) t_str.f_str, t_str.f_len);
4484 		}
4485 
4486 		t_str.f_len = newlen;
4487 		t_str.f_str[newlen] = 0;
4488 
4489 		/* Add the word to the document statistics. If the word
4490 		hasn't been seen before we create a new entry for it. */
4491 		if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
4492 			fts_token_t	new_token;
4493 
4494 			new_token.text.f_len = newlen;
4495 			new_token.text.f_str = t_str.f_str;
4496 			new_token.text.f_n_char = t_str.f_n_char;
4497 
4498 			new_token.positions = ib_vector_create(
4499 				result_doc->self_heap, sizeof(ulint), 32);
4500 
4501 			parent.last = rbt_add_node(
4502 				result_doc->tokens, &parent, &new_token);
4503 
4504 			ut_ad(rbt_validate(result_doc->tokens));
4505 		}
4506 
4507 		token = rbt_value(fts_token_t, parent.last);
4508 		ib_vector_push(token->positions, &position);
4509 	}
4510 }
4511 
4512 /********************************************************************
4513 Process next token from document starting at the given position, i.e., add
4514 the token's start position to the token's list of positions.
4515 @return number of characters handled in this call */
4516 static
4517 ulint
fts_process_token(fts_doc_t * doc,fts_doc_t * result,ulint start_pos,ulint add_pos)4518 fts_process_token(
4519 /*==============*/
4520 	fts_doc_t*	doc,		/* in/out: document to
4521 					tokenize */
4522 	fts_doc_t*	result,		/* out: if provided, save
4523 					result here */
4524 	ulint		start_pos,	/*!< in: start position in text */
4525 	ulint		add_pos)	/*!< in: add this position to all
4526 					tokens from this tokenization */
4527 {
4528 	ulint		ret;
4529 	fts_string_t	str;
4530 	ulint		position;
4531 	fts_doc_t*	result_doc;
4532 	byte		buf[FTS_MAX_WORD_LEN + 1];
4533 
4534 	str.f_str = buf;
4535 
4536 	/* Determine where to save the result. */
4537 	result_doc = (result != NULL) ? result : doc;
4538 
4539 	/* The length of a string in characters is set here only. */
4540 
4541 	ret = innobase_mysql_fts_get_token(
4542 		doc->charset, doc->text.f_str + start_pos,
4543 		doc->text.f_str + doc->text.f_len, &str);
4544 
4545 	position = start_pos + ret - str.f_len + add_pos;
4546 
4547 	fts_add_token(result_doc, str, position);
4548 
4549 	return(ret);
4550 }
4551 
4552 /*************************************************************//**
4553 Get token char size by charset
4554 @return token size */
4555 ulint
fts_get_token_size(const CHARSET_INFO * cs,const char * token,ulint len)4556 fts_get_token_size(
4557 /*===============*/
4558 	const CHARSET_INFO*	cs,	/*!< in: Character set */
4559 	const char*		token,	/*!< in: token */
4560 	ulint			len)	/*!< in: token length */
4561 {
4562 	char*	start;
4563 	char*	end;
4564 	ulint	size = 0;
4565 
4566 	/* const_cast is for reinterpret_cast below, or it will fail. */
4567 	start = const_cast<char*>(token);
4568 	end = start + len;
4569 	while (start < end) {
4570 		int	ctype;
4571 		int	mbl;
4572 
4573 		mbl = cs->cset->ctype(
4574 			cs, &ctype,
4575 			reinterpret_cast<uchar*>(start),
4576 			reinterpret_cast<uchar*>(end));
4577 
4578 		size++;
4579 
4580 		start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
4581 	}
4582 
4583 	return(size);
4584 }
4585 
4586 /*************************************************************//**
4587 FTS plugin parser 'myql_parser' callback function for document tokenize.
4588 Refer to 'st_mysql_ftparser_param' for more detail.
4589 @return always returns 0 */
4590 int
fts_tokenize_document_internal(MYSQL_FTPARSER_PARAM * param,const char * doc,int len)4591 fts_tokenize_document_internal(
4592 /*===========================*/
4593 	MYSQL_FTPARSER_PARAM*	param,	/*!< in: parser parameter */
4594 	const char*		doc,/*!< in/out: document */
4595 	int			len)	/*!< in: document length */
4596 {
4597 	fts_string_t	str;
4598 	byte		buf[FTS_MAX_WORD_LEN + 1];
4599 	/* JAN: TODO: MySQL 5.7
4600 	MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
4601 		{ FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 };
4602 	*/
4603 	MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
4604 		{ FT_TOKEN_WORD, 0, 0, 0, 0, ' ', 0};
4605 
4606 	ut_ad(len >= 0);
4607 
4608 	str.f_str = buf;
4609 
4610 	for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
4611 		inc = innobase_mysql_fts_get_token(
4612 			const_cast<CHARSET_INFO*>(param->cs),
4613 			(uchar*)(doc) + i,
4614 			(uchar*)(doc) + len,
4615 			&str);
4616 
4617 		if (str.f_len > 0) {
4618 			/* JAN: TODO: MySQL 5.7
4619 			bool_info.position =
4620 				static_cast<int>(i + inc - str.f_len);
4621 			ut_ad(bool_info.position >= 0);
4622 			*/
4623 
4624 			/* Stop when add word fails */
4625 			if (param->mysql_add_word(
4626 				param,
4627 				reinterpret_cast<char*>(str.f_str),
4628 				static_cast<int>(str.f_len),
4629 				&bool_info)) {
4630 				break;
4631 			}
4632 		}
4633 	}
4634 
4635 	return(0);
4636 }
4637 
4638 /******************************************************************//**
4639 FTS plugin parser 'myql_add_word' callback function for document tokenize.
4640 Refer to 'st_mysql_ftparser_param' for more detail.
4641 @return always returns 0 */
4642 static
4643 int
fts_tokenize_add_word_for_parser(MYSQL_FTPARSER_PARAM * param,const char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO *)4644 fts_tokenize_add_word_for_parser(
4645 /*=============================*/
4646 	MYSQL_FTPARSER_PARAM*	param,		/* in: parser paramter */
4647 	const char*			word,		/* in: token word */
4648 	int			word_len,	/* in: word len */
4649 	MYSQL_FTPARSER_BOOLEAN_INFO*)
4650 {
4651 	fts_string_t	str;
4652 	fts_tokenize_param_t*	fts_param;
4653 	fts_doc_t*	result_doc;
4654 	ulint		position;
4655 
4656 	fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam);
4657 	result_doc = fts_param->result_doc;
4658 	ut_ad(result_doc != NULL);
4659 
4660 	str.f_str = (byte*)(word);
4661 	str.f_len = ulint(word_len);
4662 	str.f_n_char = fts_get_token_size(
4663 		const_cast<CHARSET_INFO*>(param->cs), word, str.f_len);
4664 
4665 	/* JAN: TODO: MySQL 5.7 FTS
4666 	ut_ad(boolean_info->position >= 0);
4667 	position = boolean_info->position + fts_param->add_pos;
4668 	*/
4669 	position = fts_param->add_pos++;
4670 
4671 	fts_add_token(result_doc, str, position);
4672 
4673 	return(0);
4674 }
4675 
4676 /******************************************************************//**
4677 Parse a document using an external / user supplied parser */
4678 static
4679 void
fts_tokenize_by_parser(fts_doc_t * doc,st_mysql_ftparser * parser,fts_tokenize_param_t * fts_param)4680 fts_tokenize_by_parser(
4681 /*===================*/
4682 	fts_doc_t*		doc,	/* in/out: document to tokenize */
4683 	st_mysql_ftparser*	parser, /* in: plugin fts parser */
4684 	fts_tokenize_param_t*	fts_param) /* in: fts tokenize param */
4685 {
4686 	MYSQL_FTPARSER_PARAM	param;
4687 
4688 	ut_a(parser);
4689 
4690 	/* Set paramters for param */
4691 	param.mysql_parse = fts_tokenize_document_internal;
4692 	param.mysql_add_word = fts_tokenize_add_word_for_parser;
4693 	param.mysql_ftparam = fts_param;
4694 	param.cs = doc->charset;
4695 	param.doc = reinterpret_cast<char*>(doc->text.f_str);
4696 	param.length = static_cast<int>(doc->text.f_len);
4697 	param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
4698 
4699 	PARSER_INIT(parser, &param);
4700 	parser->parse(&param);
4701 	PARSER_DEINIT(parser, &param);
4702 }
4703 
4704 /** Tokenize a document.
4705 @param[in,out]	doc	document to tokenize
4706 @param[out]	result	tokenization result
4707 @param[in]	parser	pluggable parser */
4708 static
4709 void
fts_tokenize_document(fts_doc_t * doc,fts_doc_t * result,st_mysql_ftparser * parser)4710 fts_tokenize_document(
4711 	fts_doc_t*		doc,
4712 	fts_doc_t*		result,
4713 	st_mysql_ftparser*	parser)
4714 {
4715 	ut_a(!doc->tokens);
4716 	ut_a(doc->charset);
4717 
4718 	doc->tokens = rbt_create_arg_cmp(sizeof(fts_token_t),
4719 					 innobase_fts_text_cmp,
4720 					 (void*) doc->charset);
4721 
4722 	if (parser != NULL) {
4723 		fts_tokenize_param_t	fts_param;
4724 		fts_param.result_doc = (result != NULL) ? result : doc;
4725 		fts_param.add_pos = 0;
4726 
4727 		fts_tokenize_by_parser(doc, parser, &fts_param);
4728 	} else {
4729 		ulint		inc;
4730 
4731 		for (ulint i = 0; i < doc->text.f_len; i += inc) {
4732 			inc = fts_process_token(doc, result, i, 0);
4733 			ut_a(inc > 0);
4734 		}
4735 	}
4736 }
4737 
4738 /** Continue to tokenize a document.
4739 @param[in,out]	doc	document to tokenize
4740 @param[in]	add_pos	add this position to all tokens from this tokenization
4741 @param[out]	result	tokenization result
4742 @param[in]	parser	pluggable parser */
4743 static
4744 void
fts_tokenize_document_next(fts_doc_t * doc,ulint add_pos,fts_doc_t * result,st_mysql_ftparser * parser)4745 fts_tokenize_document_next(
4746 	fts_doc_t*		doc,
4747 	ulint			add_pos,
4748 	fts_doc_t*		result,
4749 	st_mysql_ftparser*	parser)
4750 {
4751 	ut_a(doc->tokens);
4752 
4753 	if (parser) {
4754 		fts_tokenize_param_t	fts_param;
4755 
4756 		fts_param.result_doc = (result != NULL) ? result : doc;
4757 		fts_param.add_pos = add_pos;
4758 
4759 		fts_tokenize_by_parser(doc, parser, &fts_param);
4760 	} else {
4761 		ulint		inc;
4762 
4763 		for (ulint i = 0; i < doc->text.f_len; i += inc) {
4764 			inc = fts_process_token(doc, result, i, add_pos);
4765 			ut_a(inc > 0);
4766 		}
4767 	}
4768 }
4769 
4770 /** Create the vector of fts_get_doc_t instances.
4771 @param[in,out]	cache	fts cache
4772 @return	vector of fts_get_doc_t instances */
4773 static
4774 ib_vector_t*
fts_get_docs_create(fts_cache_t * cache)4775 fts_get_docs_create(
4776 	fts_cache_t*	cache)
4777 {
4778 	ib_vector_t*	get_docs;
4779 
4780 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
4781 
4782 	/* We need one instance of fts_get_doc_t per index. */
4783 	get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
4784 
4785 	/* Create the get_doc instance, we need one of these
4786 	per FTS index. */
4787 	for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4788 
4789 		dict_index_t**	index;
4790 		fts_get_doc_t*	get_doc;
4791 
4792 		index = static_cast<dict_index_t**>(
4793 			ib_vector_get(cache->indexes, i));
4794 
4795 		get_doc = static_cast<fts_get_doc_t*>(
4796 			ib_vector_push(get_docs, NULL));
4797 
4798 		memset(get_doc, 0x0, sizeof(*get_doc));
4799 
4800 		get_doc->index_cache = fts_get_index_cache(cache, *index);
4801 		get_doc->cache = cache;
4802 
4803 		/* Must find the index cache. */
4804 		ut_a(get_doc->index_cache != NULL);
4805 	}
4806 
4807 	return(get_docs);
4808 }
4809 
4810 /********************************************************************
4811 Release any resources held by the fts_get_doc_t instances. */
4812 static
4813 void
fts_get_docs_clear(ib_vector_t * get_docs)4814 fts_get_docs_clear(
4815 /*===============*/
4816 	ib_vector_t*	get_docs)		/*!< in: Doc retrieval vector */
4817 {
4818 	ulint		i;
4819 
4820 	/* Release the get doc graphs if any. */
4821 	for (i = 0; i < ib_vector_size(get_docs); ++i) {
4822 
4823 		fts_get_doc_t*	get_doc = static_cast<fts_get_doc_t*>(
4824 			ib_vector_get(get_docs, i));
4825 
4826 		if (get_doc->get_document_graph != NULL) {
4827 
4828 			ut_a(get_doc->index_cache);
4829 
4830 			fts_que_graph_free(get_doc->get_document_graph);
4831 			get_doc->get_document_graph = NULL;
4832 		}
4833 	}
4834 }
4835 
4836 /*********************************************************************//**
4837 Get the initial Doc ID by consulting the CONFIG table
4838 @return initial Doc ID */
4839 doc_id_t
fts_init_doc_id(const dict_table_t * table)4840 fts_init_doc_id(
4841 /*============*/
4842 	const dict_table_t*	table)		/*!< in: table */
4843 {
4844 	doc_id_t	max_doc_id = 0;
4845 
4846 	rw_lock_x_lock(&table->fts->cache->lock);
4847 
4848 	/* Return if the table is already initialized for DOC ID */
4849 	if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
4850 		rw_lock_x_unlock(&table->fts->cache->lock);
4851 		return(0);
4852 	}
4853 
4854 	DEBUG_SYNC_C("fts_initialize_doc_id");
4855 
4856 	/* Then compare this value with the ID value stored in the CONFIG
4857 	table. The larger one will be our new initial Doc ID */
4858 	fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
4859 
4860 	/* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
4861 	creating index (and add doc id column. No need to recovery
4862 	documents */
4863 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
4864 		fts_init_index((dict_table_t*) table, TRUE);
4865 	}
4866 
4867 	table->fts->added_synced = true;
4868 
4869 	table->fts->cache->first_doc_id = max_doc_id;
4870 
4871 	rw_lock_x_unlock(&table->fts->cache->lock);
4872 
4873 	ut_ad(max_doc_id > 0);
4874 
4875 	return(max_doc_id);
4876 }
4877 
4878 #ifdef FTS_MULT_INDEX
4879 /*********************************************************************//**
4880 Check if the index is in the affected set.
4881 @return TRUE if index is updated */
4882 static
4883 ibool
fts_is_index_updated(const ib_vector_t * fts_indexes,const fts_get_doc_t * get_doc)4884 fts_is_index_updated(
4885 /*=================*/
4886 	const ib_vector_t*	fts_indexes,	/*!< in: affected FTS indexes */
4887 	const fts_get_doc_t*	get_doc)	/*!< in: info for reading
4888 						document */
4889 {
4890 	ulint		i;
4891 	dict_index_t*	index = get_doc->index_cache->index;
4892 
4893 	for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
4894 		const dict_index_t*	updated_fts_index;
4895 
4896 		updated_fts_index = static_cast<const dict_index_t*>(
4897 			ib_vector_getp_const(fts_indexes, i));
4898 
4899 		ut_a(updated_fts_index != NULL);
4900 
4901 		if (updated_fts_index == index) {
4902 			return(TRUE);
4903 		}
4904 	}
4905 
4906 	return(FALSE);
4907 }
4908 #endif
4909 
4910 /*********************************************************************//**
4911 Fetch COUNT(*) from specified table.
4912 @return the number of rows in the table */
4913 ulint
fts_get_rows_count(fts_table_t * fts_table)4914 fts_get_rows_count(
4915 /*===============*/
4916 	fts_table_t*	fts_table)	/*!< in: fts table to read */
4917 {
4918 	trx_t*		trx;
4919 	pars_info_t*	info;
4920 	que_t*		graph;
4921 	dberr_t		error;
4922 	ulint		count = 0;
4923 	char		table_name[MAX_FULL_NAME_LEN];
4924 
4925 	trx = trx_create();
4926 	trx->op_info = "fetching FT table rows count";
4927 
4928 	info = pars_info_create();
4929 
4930 	pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
4931 
4932 	fts_get_table_name(fts_table, table_name);
4933 	pars_info_bind_id(info, "table_name", table_name);
4934 
4935 	graph = fts_parse_sql(
4936 		fts_table,
4937 		info,
4938 		"DECLARE FUNCTION my_func;\n"
4939 		"DECLARE CURSOR c IS"
4940 		" SELECT COUNT(*)"
4941 		" FROM $table_name;\n"
4942 		"BEGIN\n"
4943 		"\n"
4944 		"OPEN c;\n"
4945 		"WHILE 1 = 1 LOOP\n"
4946 		"  FETCH c INTO my_func();\n"
4947 		"  IF c % NOTFOUND THEN\n"
4948 		"    EXIT;\n"
4949 		"  END IF;\n"
4950 		"END LOOP;\n"
4951 		"CLOSE c;");
4952 
4953 	for (;;) {
4954 		error = fts_eval_sql(trx, graph);
4955 
4956 		if (UNIV_LIKELY(error == DB_SUCCESS)) {
4957 			fts_sql_commit(trx);
4958 
4959 			break;				/* Exit the loop. */
4960 		} else {
4961 			fts_sql_rollback(trx);
4962 
4963 			if (error == DB_LOCK_WAIT_TIMEOUT) {
4964 				ib::warn() << "lock wait timeout reading"
4965 					" FTS table. Retrying!";
4966 
4967 				trx->error_state = DB_SUCCESS;
4968 			} else {
4969 				ib::error() << "(" << error
4970 					<< ") while reading FTS table "
4971 					<< table_name;
4972 
4973 				break;			/* Exit the loop. */
4974 			}
4975 		}
4976 	}
4977 
4978 	fts_que_graph_free(graph);
4979 
4980 	trx->free();
4981 
4982 	return(count);
4983 }
4984 
4985 #ifdef FTS_CACHE_SIZE_DEBUG
4986 /*********************************************************************//**
4987 Read the max cache size parameter from the config table. */
4988 static
4989 void
fts_update_max_cache_size(fts_sync_t * sync)4990 fts_update_max_cache_size(
4991 /*======================*/
4992 	fts_sync_t*	sync)			/*!< in: sync state */
4993 {
4994 	trx_t*		trx;
4995 	fts_table_t	fts_table;
4996 
4997 	trx = trx_create();
4998 
4999 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
5000 
5001 	/* The size returned is in bytes. */
5002 	sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
5003 
5004 	fts_sql_commit(trx);
5005 
5006 	trx->free();
5007 }
5008 #endif /* FTS_CACHE_SIZE_DEBUG */
5009 
5010 /*********************************************************************//**
5011 Free the modified rows of a table. */
5012 UNIV_INLINE
5013 void
fts_trx_table_rows_free(ib_rbt_t * rows)5014 fts_trx_table_rows_free(
5015 /*====================*/
5016 	ib_rbt_t*	rows)			/*!< in: rbt of rows to free */
5017 {
5018 	const ib_rbt_node_t*	node;
5019 
5020 	for (node = rbt_first(rows); node; node = rbt_first(rows)) {
5021 		fts_trx_row_t*	row;
5022 
5023 		row = rbt_value(fts_trx_row_t, node);
5024 
5025 		if (row->fts_indexes != NULL) {
5026 			/* This vector shouldn't be using the
5027 			heap allocator.  */
5028 			ut_a(row->fts_indexes->allocator->arg == NULL);
5029 
5030 			ib_vector_free(row->fts_indexes);
5031 			row->fts_indexes = NULL;
5032 		}
5033 
5034 		ut_free(rbt_remove_node(rows, node));
5035 	}
5036 
5037 	ut_a(rbt_empty(rows));
5038 	rbt_free(rows);
5039 }
5040 
5041 /*********************************************************************//**
5042 Free an FTS savepoint instance. */
5043 UNIV_INLINE
5044 void
fts_savepoint_free(fts_savepoint_t * savepoint)5045 fts_savepoint_free(
5046 /*===============*/
5047 	fts_savepoint_t*	savepoint)	/*!< in: savepoint instance */
5048 {
5049 	const ib_rbt_node_t*	node;
5050 	ib_rbt_t*		tables = savepoint->tables;
5051 
5052 	/* Nothing to free! */
5053 	if (tables == NULL) {
5054 		return;
5055 	}
5056 
5057 	for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5058 		fts_trx_table_t*	ftt;
5059 		fts_trx_table_t**	fttp;
5060 
5061 		fttp = rbt_value(fts_trx_table_t*, node);
5062 		ftt = *fttp;
5063 
5064 		/* This can be NULL if a savepoint was released. */
5065 		if (ftt->rows != NULL) {
5066 			fts_trx_table_rows_free(ftt->rows);
5067 			ftt->rows = NULL;
5068 		}
5069 
5070 		/* This can be NULL if a savepoint was released. */
5071 		if (ftt->added_doc_ids != NULL) {
5072 			fts_doc_ids_free(ftt->added_doc_ids);
5073 			ftt->added_doc_ids = NULL;
5074 		}
5075 
5076 		/* The default savepoint name must be NULL. */
5077 		if (ftt->docs_added_graph) {
5078 			fts_que_graph_free(ftt->docs_added_graph);
5079 		}
5080 
5081 		/* NOTE: We are responsible for free'ing the node */
5082 		ut_free(rbt_remove_node(tables, node));
5083 	}
5084 
5085 	ut_a(rbt_empty(tables));
5086 	rbt_free(tables);
5087 	savepoint->tables = NULL;
5088 }
5089 
5090 /*********************************************************************//**
5091 Free an FTS trx. */
5092 void
fts_trx_free(fts_trx_t * fts_trx)5093 fts_trx_free(
5094 /*=========*/
5095 	fts_trx_t*	fts_trx)		/* in, own: FTS trx */
5096 {
5097 	ulint		i;
5098 
5099 	for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5100 		fts_savepoint_t*	savepoint;
5101 
5102 		savepoint = static_cast<fts_savepoint_t*>(
5103 			ib_vector_get(fts_trx->savepoints, i));
5104 
5105 		/* The default savepoint name must be NULL. */
5106 		if (i == 0) {
5107 			ut_a(savepoint->name == NULL);
5108 		}
5109 
5110 		fts_savepoint_free(savepoint);
5111 	}
5112 
5113 	for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5114 		fts_savepoint_t*	savepoint;
5115 
5116 		savepoint = static_cast<fts_savepoint_t*>(
5117 			ib_vector_get(fts_trx->last_stmt, i));
5118 
5119 		/* The default savepoint name must be NULL. */
5120 		if (i == 0) {
5121 			ut_a(savepoint->name == NULL);
5122 		}
5123 
5124 		fts_savepoint_free(savepoint);
5125 	}
5126 
5127 	if (fts_trx->heap) {
5128 		mem_heap_free(fts_trx->heap);
5129 	}
5130 }
5131 
5132 /*********************************************************************//**
5133 Extract the doc id from the FTS hidden column.
5134 @return doc id that was extracted from rec */
5135 doc_id_t
fts_get_doc_id_from_row(dict_table_t * table,dtuple_t * row)5136 fts_get_doc_id_from_row(
5137 /*====================*/
5138 	dict_table_t*	table,			/*!< in: table */
5139 	dtuple_t*	row)			/*!< in: row whose FTS doc id we
5140 						want to extract.*/
5141 {
5142 	dfield_t*	field;
5143 	doc_id_t	doc_id = 0;
5144 
5145 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5146 
5147 	field = dtuple_get_nth_field(row, table->fts->doc_col);
5148 
5149 	ut_a(dfield_get_len(field) == sizeof(doc_id));
5150 	ut_a(dfield_get_type(field)->mtype == DATA_INT);
5151 
5152 	doc_id = fts_read_doc_id(
5153 		static_cast<const byte*>(dfield_get_data(field)));
5154 
5155 	return(doc_id);
5156 }
5157 
5158 /** Extract the doc id from the record that belongs to index.
5159 @param[in]	table	table
5160 @param[in]	rec	record contains FTS_DOC_ID
5161 @param[in]	index	index of rec
5162 @param[in]	heap	heap memory
5163 @return doc id that was extracted from rec */
5164 doc_id_t
fts_get_doc_id_from_rec(dict_table_t * table,const rec_t * rec,const dict_index_t * index,mem_heap_t * heap)5165 fts_get_doc_id_from_rec(
5166 	dict_table_t*		table,
5167 	const rec_t*		rec,
5168 	const dict_index_t*	index,
5169 	mem_heap_t*		heap)
5170 {
5171 	ulint		len;
5172 	const byte*	data;
5173 	ulint		col_no;
5174 	doc_id_t	doc_id = 0;
5175 	rec_offs	offsets_[REC_OFFS_NORMAL_SIZE];
5176 	rec_offs*	offsets = offsets_;
5177 	mem_heap_t*	my_heap = heap;
5178 
5179 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5180 
5181 	rec_offs_init(offsets_);
5182 
5183 	offsets = rec_get_offsets(
5184 		rec, index, offsets, index->n_core_fields,
5185 		ULINT_UNDEFINED, &my_heap);
5186 
5187 	col_no = dict_col_get_index_pos(
5188 		&table->cols[table->fts->doc_col], index);
5189 
5190 	ut_ad(col_no != ULINT_UNDEFINED);
5191 
5192 	data = rec_get_nth_field(rec, offsets, col_no, &len);
5193 
5194 	ut_a(len == 8);
5195 	ut_ad(8 == sizeof(doc_id));
5196 	doc_id = static_cast<doc_id_t>(mach_read_from_8(data));
5197 
5198 	if (my_heap && !heap) {
5199 		mem_heap_free(my_heap);
5200 	}
5201 
5202 	return(doc_id);
5203 }
5204 
5205 /*********************************************************************//**
5206 Search the index specific cache for a particular FTS index.
5207 @return the index specific cache else NULL */
5208 fts_index_cache_t*
fts_find_index_cache(const fts_cache_t * cache,const dict_index_t * index)5209 fts_find_index_cache(
5210 /*=================*/
5211 	const fts_cache_t*	cache,		/*!< in: cache to search */
5212 	const dict_index_t*	index)		/*!< in: index to search for */
5213 {
5214 	/* We cast away the const because our internal function, takes
5215 	non-const cache arg and returns a non-const pointer. */
5216 	return(static_cast<fts_index_cache_t*>(
5217 		fts_get_index_cache((fts_cache_t*) cache, index)));
5218 }
5219 
5220 /*********************************************************************//**
5221 Search cache for word.
5222 @return the word node vector if found else NULL */
5223 const ib_vector_t*
fts_cache_find_word(const fts_index_cache_t * index_cache,const fts_string_t * text)5224 fts_cache_find_word(
5225 /*================*/
5226 	const fts_index_cache_t*index_cache,	/*!< in: cache to search */
5227 	const fts_string_t*	text)		/*!< in: word to search for */
5228 {
5229 	ib_rbt_bound_t		parent;
5230 	const ib_vector_t*	nodes = NULL;
5231 #ifdef UNIV_DEBUG
5232 	dict_table_t*		table = index_cache->index->table;
5233 	fts_cache_t*		cache = table->fts->cache;
5234 
5235 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
5236 #endif /* UNIV_DEBUG */
5237 
5238 	/* Lookup the word in the rb tree */
5239 	if (rbt_search(index_cache->words, &parent, text) == 0) {
5240 		const fts_tokenizer_word_t*	word;
5241 
5242 		word = rbt_value(fts_tokenizer_word_t, parent.last);
5243 
5244 		nodes = word->nodes;
5245 	}
5246 
5247 	return(nodes);
5248 }
5249 
5250 /*********************************************************************//**
5251 Append deleted doc ids to vector. */
5252 void
fts_cache_append_deleted_doc_ids(const fts_cache_t * cache,ib_vector_t * vector)5253 fts_cache_append_deleted_doc_ids(
5254 /*=============================*/
5255 	const fts_cache_t*	cache,		/*!< in: cache to use */
5256 	ib_vector_t*		vector)		/*!< in: append to this vector */
5257 {
5258 	mutex_enter(const_cast<ib_mutex_t*>(&cache->deleted_lock));
5259 
5260 	if (cache->deleted_doc_ids == NULL) {
5261 		mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5262 		return;
5263 	}
5264 
5265 
5266 	for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5267 		doc_id_t*	update;
5268 
5269 		update = static_cast<doc_id_t*>(
5270 			ib_vector_get(cache->deleted_doc_ids, i));
5271 
5272 		ib_vector_push(vector, &update);
5273 	}
5274 
5275 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5276 }
5277 
5278 /*********************************************************************//**
5279 Add the FTS document id hidden column. */
5280 void
fts_add_doc_id_column(dict_table_t * table,mem_heap_t * heap)5281 fts_add_doc_id_column(
5282 /*==================*/
5283 	dict_table_t*	table,	/*!< in/out: Table with FTS index */
5284 	mem_heap_t*	heap)	/*!< in: temporary memory heap, or NULL */
5285 {
5286 	dict_mem_table_add_col(
5287 		table, heap,
5288 		FTS_DOC_ID_COL_NAME,
5289 		DATA_INT,
5290 		dtype_form_prtype(
5291 			DATA_NOT_NULL | DATA_UNSIGNED
5292 			| DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
5293 		sizeof(doc_id_t));
5294 	DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5295 }
5296 
5297 /** Add new fts doc id to the update vector.
5298 @param[in]	table		the table that contains the FTS index.
5299 @param[in,out]	ufield		the fts doc id field in the update vector.
5300 				No new memory is allocated for this in this
5301 				function.
5302 @param[in,out]	next_doc_id	the fts doc id that has been added to the
5303 				update vector.  If 0, a new fts doc id is
5304 				automatically generated.  The memory provided
5305 				for this argument will be used by the update
5306 				vector. Ensure that the life time of this
5307 				memory matches that of the update vector.
5308 @return the fts doc id used in the update vector */
5309 doc_id_t
fts_update_doc_id(dict_table_t * table,upd_field_t * ufield,doc_id_t * next_doc_id)5310 fts_update_doc_id(
5311 	dict_table_t*	table,
5312 	upd_field_t*	ufield,
5313 	doc_id_t*	next_doc_id)
5314 {
5315 	doc_id_t	doc_id;
5316 	dberr_t		error = DB_SUCCESS;
5317 
5318 	if (*next_doc_id) {
5319 		doc_id = *next_doc_id;
5320 	} else {
5321 		/* Get the new document id that will be added. */
5322 		error = fts_get_next_doc_id(table, &doc_id);
5323 	}
5324 
5325 	if (error == DB_SUCCESS) {
5326 		dict_index_t*	clust_index;
5327 		dict_col_t*	col = dict_table_get_nth_col(
5328 			table, table->fts->doc_col);
5329 
5330 		ufield->exp = NULL;
5331 
5332 		ufield->new_val.len = sizeof(doc_id);
5333 
5334 		clust_index = dict_table_get_first_index(table);
5335 
5336 		ufield->field_no = dict_col_get_clust_pos(col, clust_index);
5337 		dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
5338 
5339 		/* It is possible we update record that has
5340 		not yet be sync-ed from last crash. */
5341 
5342 		/* Convert to storage byte order. */
5343 		ut_a(doc_id != FTS_NULL_DOC_ID);
5344 		fts_write_doc_id((byte*) next_doc_id, doc_id);
5345 
5346 		ufield->new_val.data = next_doc_id;
5347                 ufield->new_val.ext = 0;
5348 	}
5349 
5350 	return(doc_id);
5351 }
5352 
5353 /** fts_t constructor.
5354 @param[in]	table	table with FTS indexes
5355 @param[in,out]	heap	memory heap where 'this' is stored */
fts_t(const dict_table_t * table,mem_heap_t * heap)5356 fts_t::fts_t(
5357 	const dict_table_t*	table,
5358 	mem_heap_t*		heap)
5359 	:
5360 	added_synced(0), dict_locked(0),
5361 	add_wq(NULL),
5362 	cache(NULL),
5363 	doc_col(ULINT_UNDEFINED), in_queue(false),
5364 	fts_heap(heap)
5365 {
5366 	ut_a(table->fts == NULL);
5367 
5368 	ib_alloc_t*	heap_alloc = ib_heap_allocator_create(fts_heap);
5369 
5370 	indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
5371 
5372 	dict_table_get_all_fts_indexes(table, indexes);
5373 }
5374 
5375 /** fts_t destructor. */
~fts_t()5376 fts_t::~fts_t()
5377 {
5378 	ut_ad(add_wq == NULL);
5379 
5380 	if (cache != NULL) {
5381 		fts_cache_clear(cache);
5382 		fts_cache_destroy(cache);
5383 		cache = NULL;
5384 	}
5385 
5386 	/* There is no need to call ib_vector_free() on this->indexes
5387 	because it is stored in this->fts_heap. */
5388 }
5389 
5390 /*********************************************************************//**
5391 Create an instance of fts_t.
5392 @return instance of fts_t */
5393 fts_t*
fts_create(dict_table_t * table)5394 fts_create(
5395 /*=======*/
5396 	dict_table_t*	table)		/*!< in/out: table with FTS indexes */
5397 {
5398 	fts_t*		fts;
5399 	mem_heap_t*	heap;
5400 
5401 	heap = mem_heap_create(512);
5402 
5403 	fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
5404 
5405 	new(fts) fts_t(table, heap);
5406 
5407 	return(fts);
5408 }
5409 
5410 /*********************************************************************//**
5411 Free the FTS resources. */
5412 void
fts_free(dict_table_t * table)5413 fts_free(
5414 /*=====*/
5415 	dict_table_t*	table)	/*!< in/out: table with FTS indexes */
5416 {
5417 	fts_t*	fts = table->fts;
5418 
5419 	fts->~fts_t();
5420 
5421 	mem_heap_free(fts->fts_heap);
5422 
5423 	table->fts = NULL;
5424 }
5425 
5426 /*********************************************************************//**
5427 Take a FTS savepoint. */
5428 UNIV_INLINE
5429 void
fts_savepoint_copy(const fts_savepoint_t * src,fts_savepoint_t * dst)5430 fts_savepoint_copy(
5431 /*===============*/
5432 	const fts_savepoint_t*	src,	/*!< in: source savepoint */
5433 	fts_savepoint_t*	dst)	/*!< out: destination savepoint */
5434 {
5435 	const ib_rbt_node_t*	node;
5436 	const ib_rbt_t*		tables;
5437 
5438 	tables = src->tables;
5439 
5440 	for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
5441 
5442 		fts_trx_table_t*	ftt_dst;
5443 		const fts_trx_table_t**	ftt_src;
5444 
5445 		ftt_src = rbt_value(const fts_trx_table_t*, node);
5446 
5447 		ftt_dst = fts_trx_table_clone(*ftt_src);
5448 
5449 		rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
5450 	}
5451 }
5452 
5453 /*********************************************************************//**
5454 Take a FTS savepoint. */
5455 void
fts_savepoint_take(fts_trx_t * fts_trx,const char * name)5456 fts_savepoint_take(
5457 /*===============*/
5458 	fts_trx_t*	fts_trx,	/*!< in: fts transaction */
5459 	const char*	name)		/*!< in: savepoint name */
5460 {
5461 	mem_heap_t*		heap;
5462 	fts_savepoint_t*	savepoint;
5463 	fts_savepoint_t*	last_savepoint;
5464 
5465 	ut_a(name != NULL);
5466 
5467 	heap = fts_trx->heap;
5468 
5469 	/* The implied savepoint must exist. */
5470 	ut_a(ib_vector_size(fts_trx->savepoints) > 0);
5471 
5472 	last_savepoint = static_cast<fts_savepoint_t*>(
5473 		ib_vector_last(fts_trx->savepoints));
5474 	savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
5475 
5476 	if (last_savepoint->tables != NULL) {
5477 		fts_savepoint_copy(last_savepoint, savepoint);
5478 	}
5479 }
5480 
5481 /*********************************************************************//**
5482 Lookup a savepoint instance by name.
5483 @return ULINT_UNDEFINED if not found */
5484 UNIV_INLINE
5485 ulint
fts_savepoint_lookup(ib_vector_t * savepoints,const char * name)5486 fts_savepoint_lookup(
5487 /*==================*/
5488 	ib_vector_t*	savepoints,	/*!< in: savepoints */
5489 	const char*	name)		/*!< in: savepoint name */
5490 {
5491 	ulint			i;
5492 
5493 	ut_a(ib_vector_size(savepoints) > 0);
5494 
5495 	for (i = 1; i < ib_vector_size(savepoints); ++i) {
5496 		fts_savepoint_t*	savepoint;
5497 
5498 		savepoint = static_cast<fts_savepoint_t*>(
5499 			ib_vector_get(savepoints, i));
5500 
5501 		if (strcmp(name, savepoint->name) == 0) {
5502 			return(i);
5503 		}
5504 	}
5505 
5506 	return(ULINT_UNDEFINED);
5507 }
5508 
5509 /*********************************************************************//**
5510 Release the savepoint data identified by  name. All savepoints created
5511 after the named savepoint are kept.
5512 @return DB_SUCCESS or error code */
5513 void
fts_savepoint_release(trx_t * trx,const char * name)5514 fts_savepoint_release(
5515 /*==================*/
5516 	trx_t*		trx,		/*!< in: transaction */
5517 	const char*	name)		/*!< in: savepoint name */
5518 {
5519 	ut_a(name != NULL);
5520 
5521 	ib_vector_t*	savepoints = trx->fts_trx->savepoints;
5522 
5523 	ut_a(ib_vector_size(savepoints) > 0);
5524 
5525 	ulint   i = fts_savepoint_lookup(savepoints, name);
5526 	if (i != ULINT_UNDEFINED) {
5527 		ut_a(i >= 1);
5528 
5529 		fts_savepoint_t*        savepoint;
5530 		savepoint = static_cast<fts_savepoint_t*>(
5531 			ib_vector_get(savepoints, i));
5532 
5533 		if (i == ib_vector_size(savepoints) - 1) {
5534 			/* If the savepoint is the last, we save its
5535 			tables to the  previous savepoint. */
5536 			fts_savepoint_t*	prev_savepoint;
5537 			prev_savepoint = static_cast<fts_savepoint_t*>(
5538 				ib_vector_get(savepoints, i - 1));
5539 
5540 			ib_rbt_t*	tables = savepoint->tables;
5541 			savepoint->tables = prev_savepoint->tables;
5542 			prev_savepoint->tables = tables;
5543 		}
5544 
5545 		fts_savepoint_free(savepoint);
5546 		ib_vector_remove(savepoints, *(void**)savepoint);
5547 
5548 		/* Make sure we don't delete the implied savepoint. */
5549 		ut_a(ib_vector_size(savepoints) > 0);
5550 	}
5551 }
5552 
5553 /**********************************************************************//**
5554 Refresh last statement savepoint. */
5555 void
fts_savepoint_laststmt_refresh(trx_t * trx)5556 fts_savepoint_laststmt_refresh(
5557 /*===========================*/
5558 	trx_t*			trx)	/*!< in: transaction */
5559 {
5560 
5561 	fts_trx_t*              fts_trx;
5562 	fts_savepoint_t*        savepoint;
5563 
5564 	fts_trx = trx->fts_trx;
5565 
5566 	savepoint = static_cast<fts_savepoint_t*>(
5567 		ib_vector_pop(fts_trx->last_stmt));
5568 	fts_savepoint_free(savepoint);
5569 
5570 	ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
5571 	savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
5572 }
5573 
5574 /********************************************************************
5575 Undo the Doc ID add/delete operations in last stmt */
5576 static
5577 void
fts_undo_last_stmt(fts_trx_table_t * s_ftt,fts_trx_table_t * l_ftt)5578 fts_undo_last_stmt(
5579 /*===============*/
5580 	fts_trx_table_t*	s_ftt,	/*!< in: Transaction FTS table */
5581 	fts_trx_table_t*	l_ftt)	/*!< in: last stmt FTS table */
5582 {
5583 	ib_rbt_t*		s_rows;
5584 	ib_rbt_t*		l_rows;
5585 	const ib_rbt_node_t*	node;
5586 
5587 	l_rows = l_ftt->rows;
5588 	s_rows = s_ftt->rows;
5589 
5590 	for (node = rbt_first(l_rows);
5591 	     node;
5592 	     node = rbt_next(l_rows, node)) {
5593 		fts_trx_row_t*	l_row = rbt_value(fts_trx_row_t, node);
5594 		ib_rbt_bound_t	parent;
5595 
5596 		rbt_search(s_rows, &parent, &(l_row->doc_id));
5597 
5598 		if (parent.result == 0) {
5599 			fts_trx_row_t*	s_row = rbt_value(
5600 				fts_trx_row_t, parent.last);
5601 
5602 			switch (l_row->state) {
5603 			case FTS_INSERT:
5604 				ut_free(rbt_remove_node(s_rows, parent.last));
5605 				break;
5606 
5607 			case FTS_DELETE:
5608 				if (s_row->state == FTS_NOTHING) {
5609 					s_row->state = FTS_INSERT;
5610 				} else if (s_row->state == FTS_DELETE) {
5611 					ut_free(rbt_remove_node(
5612 						s_rows, parent.last));
5613 				}
5614 				break;
5615 
5616 			/* FIXME: Check if FTS_MODIFY need to be addressed */
5617 			case FTS_MODIFY:
5618 			case FTS_NOTHING:
5619 				break;
5620 			default:
5621 				ut_error;
5622 			}
5623 		}
5624 	}
5625 }
5626 
5627 /**********************************************************************//**
5628 Rollback to savepoint indentified by name.
5629 @return DB_SUCCESS or error code */
5630 void
fts_savepoint_rollback_last_stmt(trx_t * trx)5631 fts_savepoint_rollback_last_stmt(
5632 /*=============================*/
5633 	trx_t*		trx)		/*!< in: transaction */
5634 {
5635 	ib_vector_t*		savepoints;
5636 	fts_savepoint_t*	savepoint;
5637 	fts_savepoint_t*	last_stmt;
5638 	fts_trx_t*		fts_trx;
5639 	ib_rbt_bound_t		parent;
5640 	const ib_rbt_node_t*    node;
5641 	ib_rbt_t*		l_tables;
5642 	ib_rbt_t*		s_tables;
5643 
5644 	fts_trx = trx->fts_trx;
5645 	savepoints = fts_trx->savepoints;
5646 
5647 	savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
5648 	last_stmt = static_cast<fts_savepoint_t*>(
5649 		ib_vector_last(fts_trx->last_stmt));
5650 
5651 	l_tables = last_stmt->tables;
5652 	s_tables = savepoint->tables;
5653 
5654 	for (node = rbt_first(l_tables);
5655 	     node;
5656 	     node = rbt_next(l_tables, node)) {
5657 
5658 		fts_trx_table_t**	l_ftt;
5659 
5660 		l_ftt = rbt_value(fts_trx_table_t*, node);
5661 
5662 		rbt_search_cmp(
5663 			s_tables, &parent, &(*l_ftt)->table->id,
5664 			fts_trx_table_id_cmp, NULL);
5665 
5666 		if (parent.result == 0) {
5667 			fts_trx_table_t**	s_ftt;
5668 
5669 			s_ftt = rbt_value(fts_trx_table_t*, parent.last);
5670 
5671 			fts_undo_last_stmt(*s_ftt, *l_ftt);
5672 		}
5673 	}
5674 }
5675 
5676 /**********************************************************************//**
5677 Rollback to savepoint indentified by name.
5678 @return DB_SUCCESS or error code */
5679 void
fts_savepoint_rollback(trx_t * trx,const char * name)5680 fts_savepoint_rollback(
5681 /*===================*/
5682 	trx_t*		trx,		/*!< in: transaction */
5683 	const char*	name)		/*!< in: savepoint name */
5684 {
5685 	ulint		i;
5686 	ib_vector_t*	savepoints;
5687 
5688 	ut_a(name != NULL);
5689 
5690 	savepoints = trx->fts_trx->savepoints;
5691 
5692 	/* We pop all savepoints from the the top of the stack up to
5693 	and including the instance that was found. */
5694 	i = fts_savepoint_lookup(savepoints, name);
5695 
5696 	if (i != ULINT_UNDEFINED) {
5697 		fts_savepoint_t*	savepoint;
5698 
5699 		ut_a(i > 0);
5700 
5701 		while (ib_vector_size(savepoints) > i) {
5702 			fts_savepoint_t*	savepoint;
5703 
5704 			savepoint = static_cast<fts_savepoint_t*>(
5705 				ib_vector_pop(savepoints));
5706 
5707 			if (savepoint->name != NULL) {
5708 				/* Since name was allocated on the heap, the
5709 				memory will be released when the transaction
5710 				completes. */
5711 				savepoint->name = NULL;
5712 
5713 				fts_savepoint_free(savepoint);
5714 			}
5715 		}
5716 
5717 		/* Pop all a elements from the top of the stack that may
5718 		have been released. We have to be careful that we don't
5719 		delete the implied savepoint. */
5720 
5721 		for (savepoint = static_cast<fts_savepoint_t*>(
5722 				ib_vector_last(savepoints));
5723 		     ib_vector_size(savepoints) > 1
5724 		     && savepoint->name == NULL;
5725 		     savepoint = static_cast<fts_savepoint_t*>(
5726 				ib_vector_last(savepoints))) {
5727 
5728 			ib_vector_pop(savepoints);
5729 		}
5730 
5731 		/* Make sure we don't delete the implied savepoint. */
5732 		ut_a(ib_vector_size(savepoints) > 0);
5733 
5734 		/* Restore the savepoint. */
5735 		fts_savepoint_take(trx->fts_trx, name);
5736 	}
5737 }
5738 
fts_check_aux_table(const char * name,table_id_t * table_id,index_id_t * index_id)5739 bool fts_check_aux_table(const char *name,
5740                          table_id_t *table_id,
5741                          index_id_t *index_id)
5742 {
5743   ulint len= strlen(name);
5744   const char* ptr;
5745   const char* end= name + len;
5746 
5747   ut_ad(len <= MAX_FULL_NAME_LEN);
5748   ptr= static_cast<const char*>(memchr(name, '/', len));
5749 
5750   if (ptr != NULL)
5751   {
5752     /* We will start the match after the '/' */
5753     ++ptr;
5754     len = end - ptr;
5755   }
5756 
5757   /* All auxiliary tables are prefixed with "FTS_" and the name
5758   length will be at the very least greater than 20 bytes. */
5759   if (ptr && len > 20 && !memcmp(ptr, "FTS_", 4))
5760   {
5761     /* Skip the prefix. */
5762     ptr+= 4;
5763     len-= 4;
5764 
5765     const char *table_id_ptr= ptr;
5766     /* Skip the table id. */
5767     ptr= static_cast<const char*>(memchr(ptr, '_', len));
5768 
5769     if (!ptr)
5770       return false;
5771 
5772     /* Skip the underscore. */
5773     ++ptr;
5774     ut_ad(end > ptr);
5775     len= end - ptr;
5776 
5777     sscanf(table_id_ptr, UINT64PFx, table_id);
5778     /* First search the common table suffix array. */
5779     for (ulint i = 0; fts_common_tables[i]; ++i)
5780     {
5781       if (!strncmp(ptr, fts_common_tables[i], len))
5782         return true;
5783     }
5784 
5785     /* Could be obsolete common tables. */
5786     if ((len == 5 && !memcmp(ptr, "ADDED", len)) ||
5787         (len == 9 && !memcmp(ptr, "STOPWORDS", len)))
5788       return true;
5789 
5790     const char* index_id_ptr= ptr;
5791     /* Skip the index id. */
5792     ptr= static_cast<const char*>(memchr(ptr, '_', len));
5793     if (!ptr)
5794       return false;
5795 
5796     sscanf(index_id_ptr, UINT64PFx, index_id);
5797 
5798     /* Skip the underscore. */
5799     ++ptr;
5800     ut_a(end > ptr);
5801     len= end - ptr;
5802 
5803     if (len > 7)
5804       return false;
5805 
5806     /* Search the FT index specific array. */
5807     for (ulint i = 0; i < FTS_NUM_AUX_INDEX; ++i)
5808     {
5809       if (!memcmp(ptr, "INDEX_", len - 1))
5810         return true;
5811     }
5812 
5813     /* Other FT index specific table(s). */
5814     if (len == 6 && !memcmp(ptr, "DOC_ID", len))
5815       return true;
5816   }
5817 
5818   return false;
5819 }
5820 
5821 typedef std::pair<table_id_t,index_id_t> fts_aux_id;
5822 typedef std::set<fts_aux_id> fts_space_set_t;
5823 
5824 /** Iterate over all the spaces in the space list and fetch the
5825 fts parent table id and index id.
5826 @param[in,out]	fts_space_set	store the list of tablespace id and
5827 				index id */
fil_get_fts_spaces(fts_space_set_t & fts_space_set)5828 static void fil_get_fts_spaces(fts_space_set_t& fts_space_set)
5829 {
5830   mutex_enter(&fil_system.mutex);
5831 
5832   for (fil_space_t *space= UT_LIST_GET_FIRST(fil_system.space_list);
5833        space;
5834        space= UT_LIST_GET_NEXT(space_list, space))
5835   {
5836     index_id_t index_id= 0;
5837     table_id_t table_id= 0;
5838 
5839     if (space->purpose == FIL_TYPE_TABLESPACE
5840         && fts_check_aux_table(space->name, &table_id, &index_id))
5841       fts_space_set.insert(std::make_pair(table_id, index_id));
5842   }
5843 
5844   mutex_exit(&fil_system.mutex);
5845 }
5846 
5847 /** Check whether the parent table id and index id of fts auxilary
5848 tables with SYS_INDEXES. If it exists then we can safely ignore the
5849 fts table from orphaned tables.
5850 @param[in,out]	fts_space_set	fts space set contains set of auxiliary
5851 				table ids */
fts_check_orphaned_tables(fts_space_set_t & fts_space_set)5852 static void fts_check_orphaned_tables(fts_space_set_t& fts_space_set)
5853 {
5854   btr_pcur_t pcur;
5855   mtr_t	     mtr;
5856   trx_t*     trx = trx_create();
5857   trx->op_info = "checking fts orphaned tables";
5858 
5859   row_mysql_lock_data_dictionary(trx);
5860 
5861   mtr.start();
5862   btr_pcur_open_at_index_side(
5863     true, dict_table_get_first_index(dict_sys->sys_indexes),
5864     BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
5865 
5866   do
5867   {
5868     const rec_t *rec;
5869     const byte *tbl_field;
5870     const byte *index_field;
5871     ulint len;
5872 
5873     btr_pcur_move_to_next_user_rec(&pcur, &mtr);
5874     if (!btr_pcur_is_on_user_rec(&pcur))
5875       break;
5876 
5877     rec= btr_pcur_get_rec(&pcur);
5878     if (rec_get_deleted_flag(rec, 0))
5879       continue;
5880 
5881     tbl_field= rec_get_nth_field_old(rec, 0, &len);
5882     if (len != 8)
5883       continue;
5884 
5885     index_field= rec_get_nth_field_old(rec, 1, &len);
5886     if (len != 8)
5887       continue;
5888 
5889     table_id_t table_id = mach_read_from_8(tbl_field);
5890     index_id_t index_id = mach_read_from_8(index_field);
5891 
5892     fts_space_set_t::iterator it = fts_space_set.find(
5893 	fts_aux_id(table_id, index_id));
5894 
5895     if (it != fts_space_set.end())
5896       fts_space_set.erase(*it);
5897     else
5898     {
5899       it= fts_space_set.find(fts_aux_id(table_id, 0));
5900       if (it != fts_space_set.end())
5901         fts_space_set.erase(*it);
5902     }
5903   } while(!fts_space_set.empty());
5904 
5905   btr_pcur_close(&pcur);
5906   mtr.commit();
5907   row_mysql_unlock_data_dictionary(trx);
5908   trx->free();
5909 }
5910 
5911 /** Drop all fts auxilary table for the respective fts_id
5912 @param[in]	fts_id	fts auxilary table ids */
fts_drop_all_aux_tables(trx_t * trx,fts_table_t * fts_table)5913 static void fts_drop_all_aux_tables(trx_t *trx, fts_table_t *fts_table)
5914 {
5915   char fts_table_name[MAX_FULL_NAME_LEN];
5916   for (ulint i= 0;i < FTS_NUM_AUX_INDEX; i++)
5917   {
5918     fts_table->suffix= fts_get_suffix(i);
5919     fts_get_table_name(fts_table, fts_table_name, true);
5920 
5921     /* Drop all fts aux and common table */
5922     dberr_t err= fts_drop_table(trx, fts_table_name);
5923 
5924     if (err == DB_FAIL)
5925     {
5926       char *path= fil_make_filepath(NULL, fts_table_name, IBD, false);
5927 
5928       if (path != NULL)
5929       {
5930         os_file_delete_if_exists(innodb_data_file_key, path , NULL);
5931         ut_free(path);
5932       }
5933     }
5934   }
5935 }
5936 
5937 /** Drop all orphaned FTS auxiliary tables, those that don't have
5938 a parent table or FTS index defined on them. */
fts_drop_orphaned_tables()5939 void fts_drop_orphaned_tables()
5940 {
5941   fts_space_set_t fts_space_set;
5942   fil_get_fts_spaces(fts_space_set);
5943 
5944   if (fts_space_set.empty())
5945     return;
5946 
5947   fts_check_orphaned_tables(fts_space_set);
5948 
5949   if (fts_space_set.empty())
5950     return;
5951 
5952   trx_t* trx= trx_create();
5953   trx->op_info= "Drop orphaned aux FTS tables";
5954   row_mysql_lock_data_dictionary(trx);
5955 
5956   for (fts_space_set_t::iterator it = fts_space_set.begin();
5957        it != fts_space_set.end(); it++)
5958   {
5959     fts_table_t fts_table;
5960     dict_table_t *table= dict_table_open_on_id(it->first, TRUE,
5961                                                DICT_TABLE_OP_NORMAL);
5962     if (!table)
5963       continue;
5964 
5965     FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
5966     fts_drop_common_tables(trx, &fts_table, true);
5967 
5968     fts_table.type= FTS_INDEX_TABLE;
5969     fts_table.index_id= it->second;
5970     fts_drop_all_aux_tables(trx, &fts_table);
5971 
5972     dict_table_close(table, true, false);
5973   }
5974   trx_commit_for_mysql(trx);
5975   row_mysql_unlock_data_dictionary(trx);
5976   trx->dict_operation_lock_mode= 0;
5977   trx->free();
5978 }
5979 
5980 /**********************************************************************//**
5981 Check whether user supplied stopword table is of the right format.
5982 Caller is responsible to hold dictionary locks.
5983 @return the stopword column charset if qualifies */
5984 CHARSET_INFO*
fts_valid_stopword_table(const char * stopword_table_name)5985 fts_valid_stopword_table(
5986 /*=====================*/
5987 	const char*	stopword_table_name)	/*!< in: Stopword table
5988 						name */
5989 {
5990 	dict_table_t*	table;
5991 	dict_col_t*     col = NULL;
5992 
5993 	if (!stopword_table_name) {
5994 		return(NULL);
5995 	}
5996 
5997 	table = dict_table_get_low(stopword_table_name);
5998 
5999 	if (!table) {
6000 		ib::error() << "User stopword table " << stopword_table_name
6001 			<< " does not exist.";
6002 
6003 		return(NULL);
6004 	} else {
6005 		const char*     col_name;
6006 
6007 		col_name = dict_table_get_col_name(table, 0);
6008 
6009 		if (ut_strcmp(col_name, "value")) {
6010 			ib::error() << "Invalid column name for stopword"
6011 				" table " << stopword_table_name << ". Its"
6012 				" first column must be named as 'value'.";
6013 
6014 			return(NULL);
6015 		}
6016 
6017 		col = dict_table_get_nth_col(table, 0);
6018 
6019 		if (col->mtype != DATA_VARCHAR
6020 		    && col->mtype != DATA_VARMYSQL) {
6021 			ib::error() << "Invalid column type for stopword"
6022 				" table " << stopword_table_name << ". Its"
6023 				" first column must be of varchar type";
6024 
6025 			return(NULL);
6026 		}
6027 	}
6028 
6029 	ut_ad(col);
6030 
6031 	return(fts_get_charset(col->prtype));
6032 }
6033 
6034 /**********************************************************************//**
6035 This function loads the stopword into the FTS cache. It also
6036 records/fetches stopword configuration to/from FTS configure
6037 table, depending on whether we are creating or reloading the
6038 FTS.
6039 @return true if load operation is successful */
6040 bool
fts_load_stopword(const dict_table_t * table,trx_t * trx,const char * session_stopword_table,bool stopword_is_on,bool reload)6041 fts_load_stopword(
6042 /*==============*/
6043 	const dict_table_t*
6044 			table,			/*!< in: Table with FTS */
6045 	trx_t*		trx,			/*!< in: Transactions */
6046 	const char*	session_stopword_table,	/*!< in: Session stopword table
6047 						name */
6048 	bool		stopword_is_on,		/*!< in: Whether stopword
6049 						option is turned on/off */
6050 	bool		reload)			/*!< in: Whether it is
6051 						for reloading FTS table */
6052 {
6053 	fts_table_t	fts_table;
6054 	fts_string_t	str;
6055 	dberr_t		error = DB_SUCCESS;
6056 	ulint		use_stopword;
6057 	fts_cache_t*	cache;
6058 	const char*	stopword_to_use = NULL;
6059 	ibool		new_trx = FALSE;
6060 	byte		str_buffer[MAX_FULL_NAME_LEN + 1];
6061 
6062 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
6063 
6064 	cache = table->fts->cache;
6065 
6066 	if (!reload && !(cache->stopword_info.status & STOPWORD_NOT_INIT)) {
6067 		return true;
6068 	}
6069 
6070 	if (!trx) {
6071 		trx = trx_create();
6072 		if (srv_read_only_mode) {
6073 			trx_start_internal_read_only(trx);
6074 		} else {
6075 			trx_start_internal(trx);
6076 		}
6077 		trx->op_info = "upload FTS stopword";
6078 		new_trx = TRUE;
6079 	}
6080 
6081 	/* First check whether stopword filtering is turned off */
6082 	if (reload) {
6083 		error = fts_config_get_ulint(
6084 			trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
6085 	} else {
6086 		use_stopword = (ulint) stopword_is_on;
6087 
6088 		error = fts_config_set_ulint(
6089 			trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
6090 	}
6091 
6092 	if (error != DB_SUCCESS) {
6093 		goto cleanup;
6094 	}
6095 
6096 	/* If stopword is turned off, no need to continue to load the
6097 	stopword into cache, but still need to do initialization */
6098 	if (!use_stopword) {
6099 		cache->stopword_info.status = STOPWORD_OFF;
6100 		goto cleanup;
6101 	}
6102 
6103 	if (reload) {
6104 		/* Fetch the stopword table name from FTS config
6105 		table */
6106 		str.f_n_char = 0;
6107 		str.f_str = str_buffer;
6108 		str.f_len = sizeof(str_buffer) - 1;
6109 
6110 		error = fts_config_get_value(
6111 			trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
6112 
6113 		if (error != DB_SUCCESS) {
6114 			goto cleanup;
6115 		}
6116 
6117 		if (*str.f_str) {
6118 			stopword_to_use = (const char*) str.f_str;
6119 		}
6120 	} else {
6121 		stopword_to_use = session_stopword_table;
6122 	}
6123 
6124 	if (stopword_to_use
6125 	    && fts_load_user_stopword(table->fts, stopword_to_use,
6126 				      &cache->stopword_info)) {
6127 		/* Save the stopword table name to the configure
6128 		table */
6129 		if (!reload) {
6130 			str.f_n_char = 0;
6131 			str.f_str = (byte*) stopword_to_use;
6132 			str.f_len = ut_strlen(stopword_to_use);
6133 
6134 			error = fts_config_set_value(
6135 				trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
6136 		}
6137 	} else {
6138 		/* Load system default stopword list */
6139 		fts_load_default_stopword(&cache->stopword_info);
6140 	}
6141 
6142 cleanup:
6143 	if (new_trx) {
6144 		if (error == DB_SUCCESS) {
6145 			fts_sql_commit(trx);
6146 		} else {
6147 			fts_sql_rollback(trx);
6148 		}
6149 
6150 		trx->free();
6151 	}
6152 
6153 	if (!cache->stopword_info.cached_stopword) {
6154 		cache->stopword_info.cached_stopword = rbt_create_arg_cmp(
6155 			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
6156 			&my_charset_latin1);
6157 	}
6158 
6159 	return error == DB_SUCCESS;
6160 }
6161 
6162 /**********************************************************************//**
6163 Callback function when we initialize the FTS at the start up
6164 time. It recovers the maximum Doc IDs presented in the current table.
6165 @return: always returns TRUE */
6166 static
6167 ibool
fts_init_get_doc_id(void * row,void * user_arg)6168 fts_init_get_doc_id(
6169 /*================*/
6170 	void*	row,			/*!< in: sel_node_t* */
6171 	void*	user_arg)		/*!< in: fts cache */
6172 {
6173 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
6174 	sel_node_t*	node = static_cast<sel_node_t*>(row);
6175 	que_node_t*	exp = node->select_list;
6176 	fts_cache_t*    cache = static_cast<fts_cache_t*>(user_arg);
6177 
6178 	ut_ad(ib_vector_is_empty(cache->get_docs));
6179 
6180 	/* Copy each indexed column content into doc->text.f_str */
6181 	if (exp) {
6182 		dfield_t*	dfield = que_node_get_val(exp);
6183 		dtype_t*        type = dfield_get_type(dfield);
6184 		void*           data = dfield_get_data(dfield);
6185 
6186 		ut_a(dtype_get_mtype(type) == DATA_INT);
6187 
6188 		doc_id = static_cast<doc_id_t>(mach_read_from_8(
6189 			static_cast<const byte*>(data)));
6190 
6191 		if (doc_id >= cache->next_doc_id) {
6192 			cache->next_doc_id = doc_id + 1;
6193 		}
6194 	}
6195 
6196 	return(TRUE);
6197 }
6198 
6199 /**********************************************************************//**
6200 Callback function when we initialize the FTS at the start up
6201 time. It recovers Doc IDs that have not sync-ed to the auxiliary
6202 table, and require to bring them back into FTS index.
6203 @return: always returns TRUE */
6204 static
6205 ibool
fts_init_recover_doc(void * row,void * user_arg)6206 fts_init_recover_doc(
6207 /*=================*/
6208 	void*	row,			/*!< in: sel_node_t* */
6209 	void*	user_arg)		/*!< in: fts cache */
6210 {
6211 
6212 	fts_doc_t       doc;
6213 	ulint		doc_len = 0;
6214 	ulint		field_no = 0;
6215 	fts_get_doc_t*  get_doc = static_cast<fts_get_doc_t*>(user_arg);
6216 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
6217 	sel_node_t*	node = static_cast<sel_node_t*>(row);
6218 	que_node_t*	exp = node->select_list;
6219 	fts_cache_t*	cache = get_doc->cache;
6220 	st_mysql_ftparser*	parser = get_doc->index_cache->index->parser;
6221 
6222 	fts_doc_init(&doc);
6223 	doc.found = TRUE;
6224 
6225 	ut_ad(cache);
6226 
6227 	/* Copy each indexed column content into doc->text.f_str */
6228 	while (exp) {
6229 		dfield_t*	dfield = que_node_get_val(exp);
6230 		ulint		len = dfield_get_len(dfield);
6231 
6232 		if (field_no == 0) {
6233 			dtype_t*        type = dfield_get_type(dfield);
6234 			void*           data = dfield_get_data(dfield);
6235 
6236 			ut_a(dtype_get_mtype(type) == DATA_INT);
6237 
6238 			doc_id = static_cast<doc_id_t>(mach_read_from_8(
6239 				static_cast<const byte*>(data)));
6240 
6241 			field_no++;
6242 			exp = que_node_get_next(exp);
6243 			continue;
6244 		}
6245 
6246 		if (len == UNIV_SQL_NULL) {
6247 			exp = que_node_get_next(exp);
6248 			continue;
6249 		}
6250 
6251 		ut_ad(get_doc);
6252 
6253 		if (!get_doc->index_cache->charset) {
6254 			get_doc->index_cache->charset = fts_get_charset(
6255 				dfield->type.prtype);
6256 		}
6257 
6258 		doc.charset = get_doc->index_cache->charset;
6259 
6260 		if (dfield_is_ext(dfield)) {
6261 			dict_table_t*	table = cache->sync->table;
6262 
6263 			doc.text.f_str = btr_copy_externally_stored_field(
6264 				&doc.text.f_len,
6265 				static_cast<byte*>(dfield_get_data(dfield)),
6266 				dict_table_page_size(table), len,
6267 				static_cast<mem_heap_t*>(doc.self_heap->arg));
6268 		} else {
6269 			doc.text.f_str = static_cast<byte*>(
6270 				dfield_get_data(dfield));
6271 
6272 			doc.text.f_len = len;
6273 		}
6274 
6275 		if (field_no == 1) {
6276 			fts_tokenize_document(&doc, NULL, parser);
6277 		} else {
6278 			fts_tokenize_document_next(&doc, doc_len, NULL, parser);
6279 		}
6280 
6281 		exp = que_node_get_next(exp);
6282 
6283 		doc_len += (exp) ? len + 1 : len;
6284 
6285 		field_no++;
6286 	}
6287 
6288 	fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
6289 
6290 	fts_doc_free(&doc);
6291 
6292 	cache->added++;
6293 
6294 	if (doc_id >= cache->next_doc_id) {
6295 		cache->next_doc_id = doc_id + 1;
6296 	}
6297 
6298 	return(TRUE);
6299 }
6300 
6301 /**********************************************************************//**
6302 This function brings FTS index in sync when FTS index is first
6303 used. There are documents that have not yet sync-ed to auxiliary
6304 tables from last server abnormally shutdown, we will need to bring
6305 such document into FTS cache before any further operations
6306 @return TRUE if all OK */
6307 ibool
fts_init_index(dict_table_t * table,ibool has_cache_lock)6308 fts_init_index(
6309 /*===========*/
6310 	dict_table_t*	table,		/*!< in: Table with FTS */
6311 	ibool		has_cache_lock)	/*!< in: Whether we already have
6312 					cache lock */
6313 {
6314 	dict_index_t*   index;
6315 	doc_id_t        start_doc;
6316 	fts_get_doc_t*  get_doc = NULL;
6317 	fts_cache_t*    cache = table->fts->cache;
6318 	bool		need_init = false;
6319 
6320 	ut_ad(!mutex_own(&dict_sys->mutex));
6321 
6322 	/* First check cache->get_docs is initialized */
6323 	if (!has_cache_lock) {
6324 		rw_lock_x_lock(&cache->lock);
6325 	}
6326 
6327 	rw_lock_x_lock(&cache->init_lock);
6328 	if (cache->get_docs == NULL) {
6329 		cache->get_docs = fts_get_docs_create(cache);
6330 	}
6331 	rw_lock_x_unlock(&cache->init_lock);
6332 
6333 	if (table->fts->added_synced) {
6334 		goto func_exit;
6335 	}
6336 
6337 	need_init = true;
6338 
6339 	start_doc = cache->synced_doc_id;
6340 
6341 	if (!start_doc) {
6342 		fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
6343 		cache->synced_doc_id = start_doc;
6344 	}
6345 
6346 	/* No FTS index, this is the case when previous FTS index
6347 	dropped, and we re-initialize the Doc ID system for subsequent
6348 	insertion */
6349 	if (ib_vector_is_empty(cache->get_docs)) {
6350 		index = table->fts_doc_id_index;
6351 
6352 		ut_a(index);
6353 
6354 		fts_doc_fetch_by_doc_id(NULL, start_doc, index,
6355 					FTS_FETCH_DOC_BY_ID_LARGE,
6356 					fts_init_get_doc_id, cache);
6357 	} else {
6358 		if (table->fts->cache->stopword_info.status
6359 		    & STOPWORD_NOT_INIT) {
6360 			fts_load_stopword(table, NULL, NULL, true, true);
6361 		}
6362 
6363 		for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
6364 			get_doc = static_cast<fts_get_doc_t*>(
6365 				ib_vector_get(cache->get_docs, i));
6366 
6367 			index = get_doc->index_cache->index;
6368 
6369 			fts_doc_fetch_by_doc_id(NULL, start_doc, index,
6370 						FTS_FETCH_DOC_BY_ID_LARGE,
6371 						fts_init_recover_doc, get_doc);
6372 		}
6373 	}
6374 
6375 	table->fts->added_synced = true;
6376 
6377 	fts_get_docs_clear(cache->get_docs);
6378 
6379 func_exit:
6380 	if (!has_cache_lock) {
6381 		rw_lock_x_unlock(&cache->lock);
6382 	}
6383 
6384 	if (need_init) {
6385 		mutex_enter(&dict_sys->mutex);
6386 		/* Register the table with the optimize thread. */
6387 		fts_optimize_add_table(table);
6388 		mutex_exit(&dict_sys->mutex);
6389 	}
6390 
6391 	return(TRUE);
6392 }
6393 
6394 /** Check if the all the auxillary tables associated with FTS index are in
6395 consistent state. For now consistency is check only by ensuring
6396 index->page_no != FIL_NULL
6397 @param[out]	base_table	table has host fts index
6398 @param[in,out]	trx		trx handler */
6399 void
fts_check_corrupt(dict_table_t * base_table,trx_t * trx)6400 fts_check_corrupt(
6401 	dict_table_t*	base_table,
6402 	trx_t*		trx)
6403 {
6404 	bool		sane = true;
6405 	fts_table_t	fts_table;
6406 
6407 	/* Iterate over the common table and check for their sanity. */
6408 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, base_table);
6409 
6410 	for (ulint i = 0; fts_common_tables[i] != NULL && sane; ++i) {
6411 
6412 		char	table_name[MAX_FULL_NAME_LEN];
6413 
6414 		fts_table.suffix = fts_common_tables[i];
6415 		fts_get_table_name(&fts_table, table_name);
6416 
6417 		dict_table_t*	aux_table = dict_table_open_on_name(
6418 			table_name, true, FALSE, DICT_ERR_IGNORE_NONE);
6419 
6420 		if (aux_table == NULL) {
6421 			dict_set_corrupted(
6422 				dict_table_get_first_index(base_table),
6423 				trx, "FTS_SANITY_CHECK");
6424 			ut_ad(base_table->corrupted == TRUE);
6425 			sane = false;
6426 			continue;
6427 		}
6428 
6429 		for (dict_index_t*	aux_table_index =
6430 			UT_LIST_GET_FIRST(aux_table->indexes);
6431 		     aux_table_index != NULL;
6432 		     aux_table_index =
6433 			UT_LIST_GET_NEXT(indexes, aux_table_index)) {
6434 
6435 			/* Check if auxillary table needed for FTS is sane. */
6436 			if (aux_table_index->page == FIL_NULL) {
6437 				dict_set_corrupted(
6438 					dict_table_get_first_index(base_table),
6439 					trx, "FTS_SANITY_CHECK");
6440 				ut_ad(base_table->corrupted == TRUE);
6441 				sane = false;
6442 			}
6443 		}
6444 
6445 		dict_table_close(aux_table, FALSE, FALSE);
6446 	}
6447 }
6448