1 /*****************************************************************************
2 
3 Copyright (c) 2011, 2021, Oracle and/or its affiliates.
4 Copyright (c) 2016, 2022, MariaDB Corporation.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /**************************************************//**
21 @file fts/fts0fts.cc
22 Full Text Search interface
23 ***********************************************************************/
24 
25 #include "trx0roll.h"
26 #include "row0mysql.h"
27 #include "row0upd.h"
28 #include "dict0types.h"
29 #include "dict0stats_bg.h"
30 #include "row0sel.h"
31 #include "fts0fts.h"
32 #include "fts0priv.h"
33 #include "fts0types.h"
34 #include "fts0types.inl"
35 #include "fts0vlc.h"
36 #include "fts0plugin.h"
37 #include "dict0priv.h"
38 #include "dict0stats.h"
39 #include "btr0pcur.h"
40 #include "sync0sync.h"
41 
42 static const ulint FTS_MAX_ID_LEN = 32;
43 
44 /** Column name from the FTS config table */
45 #define FTS_MAX_CACHE_SIZE_IN_MB	"cache_size_in_mb"
46 
47 /** Verify if a aux table name is a obsolete table
48 by looking up the key word in the obsolete table names */
49 #define FTS_IS_OBSOLETE_AUX_TABLE(table_name)			\
50 	(strstr((table_name), "DOC_ID") != NULL			\
51 	 || strstr((table_name), "ADDED") != NULL		\
52 	 || strstr((table_name), "STOPWORDS") != NULL)
53 
54 /** This is maximum FTS cache for each table and would be
55 a configurable variable */
56 ulong	fts_max_cache_size;
57 
58 /** Whether the total memory used for FTS cache is exhausted, and we will
59 need a sync to free some memory */
60 bool	fts_need_sync = false;
61 
62 /** Variable specifying the total memory allocated for FTS cache */
63 ulong	fts_max_total_cache_size;
64 
65 /** This is FTS result cache limit for each query and would be
66 a configurable variable */
67 size_t	fts_result_cache_limit;
68 
69 /** Variable specifying the maximum FTS max token size */
70 ulong	fts_max_token_size;
71 
72 /** Variable specifying the minimum FTS max token size */
73 ulong	fts_min_token_size;
74 
75 
76 // FIXME: testing
77 static time_t elapsed_time;
78 static ulint n_nodes;
79 
80 #ifdef FTS_CACHE_SIZE_DEBUG
81 /** The cache size permissible lower limit (1K) */
82 static const ulint FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB = 1;
83 
84 /** The cache size permissible upper limit (1G) */
85 static const ulint FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB = 1024;
86 #endif
87 
88 /** Time to sleep after DEADLOCK error before retrying operation. */
89 static const ulint FTS_DEADLOCK_RETRY_WAIT = 100000;
90 
91 /** InnoDB default stopword list:
92 There are different versions of stopwords, the stop words listed
93 below comes from "Google Stopword" list. Reference:
94 http://meta.wikimedia.org/wiki/Stop_word_list/google_stop_word_list.
95 The final version of InnoDB default stopword list is still pending
96 for decision */
97 const char *fts_default_stopword[] =
98 {
99 	"a",
100 	"about",
101 	"an",
102 	"are",
103 	"as",
104 	"at",
105 	"be",
106 	"by",
107 	"com",
108 	"de",
109 	"en",
110 	"for",
111 	"from",
112 	"how",
113 	"i",
114 	"in",
115 	"is",
116 	"it",
117 	"la",
118 	"of",
119 	"on",
120 	"or",
121 	"that",
122 	"the",
123 	"this",
124 	"to",
125 	"was",
126 	"what",
127 	"when",
128 	"where",
129 	"who",
130 	"will",
131 	"with",
132 	"und",
133 	"the",
134 	"www",
135 	NULL
136 };
137 
138 /** For storing table info when checking for orphaned tables. */
139 struct fts_aux_table_t {
140 	table_id_t	id;		/*!< Table id */
141 	table_id_t	parent_id;	/*!< Parent table id */
142 	table_id_t	index_id;	/*!< Table FT index id */
143 	char*		name;		/*!< Name of the table */
144 };
145 
146 /** FTS auxiliary table suffixes that are common to all FT indexes. */
147 const char* fts_common_tables[] = {
148 	"BEING_DELETED",
149 	"BEING_DELETED_CACHE",
150 	"CONFIG",
151 	"DELETED",
152 	"DELETED_CACHE",
153 	NULL
154 };
155 
156 /** FTS auxiliary INDEX split intervals. */
157 const  fts_index_selector_t fts_index_selector[] = {
158 	{ 9, "INDEX_1" },
159 	{ 65, "INDEX_2" },
160 	{ 70, "INDEX_3" },
161 	{ 75, "INDEX_4" },
162 	{ 80, "INDEX_5" },
163 	{ 85, "INDEX_6" },
164 	{  0 , NULL	 }
165 };
166 
167 /** Default config values for FTS indexes on a table. */
168 static const char* fts_config_table_insert_values_sql =
169 	"BEGIN\n"
170 	"\n"
171 	"INSERT INTO $config_table VALUES('"
172 		FTS_MAX_CACHE_SIZE_IN_MB "', '256');\n"
173 	""
174 	"INSERT INTO $config_table VALUES('"
175 		FTS_OPTIMIZE_LIMIT_IN_SECS  "', '180');\n"
176 	""
177 	"INSERT INTO $config_table VALUES ('"
178 		FTS_SYNCED_DOC_ID "', '0');\n"
179 	""
180 	"INSERT INTO $config_table VALUES ('"
181 		FTS_TOTAL_DELETED_COUNT "', '0');\n"
182 	"" /* Note: 0 == FTS_TABLE_STATE_RUNNING */
183 	"INSERT INTO $config_table VALUES ('"
184 		FTS_TABLE_STATE "', '0');\n";
185 
186 /** FTS tokenize parmameter for plugin parser */
187 struct fts_tokenize_param_t {
188 	fts_doc_t*	result_doc;	/*!< Result doc for tokens */
189 	ulint		add_pos;	/*!< Added position for tokens */
190 };
191 
192 /** Run SYNC on the table, i.e., write out data from the cache to the
193 FTS auxiliary INDEX table and clear the cache at the end.
194 @param[in,out]	sync		sync state
195 @param[in]	unlock_cache	whether unlock cache lock when write node
196 @param[in]	wait		whether wait when a sync is in progress
197 @return DB_SUCCESS if all OK */
198 static
199 dberr_t
200 fts_sync(
201 	fts_sync_t*	sync,
202 	bool		unlock_cache,
203 	bool		wait);
204 
205 /****************************************************************//**
206 Release all resources help by the words rb tree e.g., the node ilist. */
207 static
208 void
209 fts_words_free(
210 /*===========*/
211 	ib_rbt_t*	words)		/*!< in: rb tree of words */
212 	MY_ATTRIBUTE((nonnull));
213 #ifdef FTS_CACHE_SIZE_DEBUG
214 /****************************************************************//**
215 Read the max cache size parameter from the config table. */
216 static
217 void
218 fts_update_max_cache_size(
219 /*======================*/
220 	fts_sync_t*	sync);		/*!< in: sync state */
221 #endif
222 
223 /*********************************************************************//**
224 This function fetches the document just inserted right before
225 we commit the transaction, and tokenize the inserted text data
226 and insert into FTS auxiliary table and its cache.
227 @return TRUE if successful */
228 static
229 ulint
230 fts_add_doc_by_id(
231 /*==============*/
232 	fts_trx_table_t*ftt,		/*!< in: FTS trx table */
233 	doc_id_t	doc_id);	/*!< in: doc id */
234 /******************************************************************//**
235 Update the last document id. This function could create a new
236 transaction to update the last document id.
237 @return DB_SUCCESS if OK */
238 static
239 dberr_t
240 fts_update_sync_doc_id(
241 /*===================*/
242 	const dict_table_t*	table,		/*!< in: table */
243 	doc_id_t		doc_id,		/*!< in: last document id */
244 	trx_t*			trx)		/*!< in: update trx, or NULL */
245 	MY_ATTRIBUTE((nonnull(1)));
246 
247 /** Tokenize a document.
248 @param[in,out]	doc	document to tokenize
249 @param[out]	result	tokenization result
250 @param[in]	parser	pluggable parser */
251 static
252 void
253 fts_tokenize_document(
254 	fts_doc_t*		doc,
255 	fts_doc_t*		result,
256 	st_mysql_ftparser*	parser);
257 
258 /** Continue to tokenize a document.
259 @param[in,out]	doc	document to tokenize
260 @param[in]	add_pos	add this position to all tokens from this tokenization
261 @param[out]	result	tokenization result
262 @param[in]	parser	pluggable parser */
263 static
264 void
265 fts_tokenize_document_next(
266 	fts_doc_t*		doc,
267 	ulint			add_pos,
268 	fts_doc_t*		result,
269 	st_mysql_ftparser*	parser);
270 
271 /** Create the vector of fts_get_doc_t instances.
272 @param[in,out]	cache	fts cache
273 @return	vector of fts_get_doc_t instances */
274 static
275 ib_vector_t*
276 fts_get_docs_create(
277 	fts_cache_t*	cache);
278 
279 /** Free the FTS cache.
280 @param[in,out]	cache to be freed */
281 static
282 void
fts_cache_destroy(fts_cache_t * cache)283 fts_cache_destroy(fts_cache_t* cache)
284 {
285 	rw_lock_free(&cache->lock);
286 	rw_lock_free(&cache->init_lock);
287 	mutex_free(&cache->deleted_lock);
288 	mutex_free(&cache->doc_id_lock);
289 	os_event_destroy(cache->sync->event);
290 
291 	if (cache->stopword_info.cached_stopword) {
292 		rbt_free(cache->stopword_info.cached_stopword);
293 	}
294 
295 	if (cache->sync_heap->arg) {
296 		mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
297 	}
298 
299 	mem_heap_free(cache->cache_heap);
300 }
301 
302 /** Get a character set based on precise type.
303 @param prtype precise type
304 @return the corresponding character set */
305 UNIV_INLINE
306 CHARSET_INFO*
fts_get_charset(ulint prtype)307 fts_get_charset(ulint prtype)
308 {
309 #ifdef UNIV_DEBUG
310 	switch (prtype & DATA_MYSQL_TYPE_MASK) {
311 	case MYSQL_TYPE_BIT:
312 	case MYSQL_TYPE_STRING:
313 	case MYSQL_TYPE_VAR_STRING:
314 	case MYSQL_TYPE_TINY_BLOB:
315 	case MYSQL_TYPE_MEDIUM_BLOB:
316 	case MYSQL_TYPE_BLOB:
317 	case MYSQL_TYPE_LONG_BLOB:
318 	case MYSQL_TYPE_VARCHAR:
319 		break;
320 	default:
321 		ut_error;
322 	}
323 #endif /* UNIV_DEBUG */
324 
325 	uint cs_num = (uint) dtype_get_charset_coll(prtype);
326 
327 	if (CHARSET_INFO* cs = get_charset(cs_num, MYF(MY_WME))) {
328 		return(cs);
329 	}
330 
331 	ib::fatal() << "Unable to find charset-collation " << cs_num;
332 	return(NULL);
333 }
334 
335 /****************************************************************//**
336 This function loads the default InnoDB stopword list */
337 static
338 void
fts_load_default_stopword(fts_stopword_t * stopword_info)339 fts_load_default_stopword(
340 /*======================*/
341 	fts_stopword_t*		stopword_info)	/*!< in: stopword info */
342 {
343 	fts_string_t		str;
344 	mem_heap_t*		heap;
345 	ib_alloc_t*		allocator;
346 	ib_rbt_t*		stop_words;
347 
348 	allocator = stopword_info->heap;
349 	heap = static_cast<mem_heap_t*>(allocator->arg);
350 
351 	if (!stopword_info->cached_stopword) {
352 		stopword_info->cached_stopword = rbt_create_arg_cmp(
353 			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
354 			&my_charset_latin1);
355 	}
356 
357 	stop_words = stopword_info->cached_stopword;
358 
359 	str.f_n_char = 0;
360 
361 	for (ulint i = 0; fts_default_stopword[i]; ++i) {
362 		char*			word;
363 		fts_tokenizer_word_t	new_word;
364 
365 		/* We are going to duplicate the value below. */
366 		word = const_cast<char*>(fts_default_stopword[i]);
367 
368 		new_word.nodes = ib_vector_create(
369 			allocator, sizeof(fts_node_t), 4);
370 
371 		str.f_len = ut_strlen(word);
372 		str.f_str = reinterpret_cast<byte*>(word);
373 
374 		fts_string_dup(&new_word.text, &str, heap);
375 
376 		rbt_insert(stop_words, &new_word, &new_word);
377 	}
378 
379 	stopword_info->status = STOPWORD_FROM_DEFAULT;
380 }
381 
382 /****************************************************************//**
383 Callback function to read a single stopword value.
384 @return Always return TRUE */
385 static
386 ibool
fts_read_stopword(void * row,void * user_arg)387 fts_read_stopword(
388 /*==============*/
389 	void*		row,		/*!< in: sel_node_t* */
390 	void*		user_arg)	/*!< in: pointer to ib_vector_t */
391 {
392 	ib_alloc_t*	allocator;
393 	fts_stopword_t*	stopword_info;
394 	sel_node_t*	sel_node;
395 	que_node_t*	exp;
396 	ib_rbt_t*	stop_words;
397 	dfield_t*	dfield;
398 	fts_string_t	str;
399 	mem_heap_t*	heap;
400 	ib_rbt_bound_t	parent;
401 
402 	sel_node = static_cast<sel_node_t*>(row);
403 	stopword_info = static_cast<fts_stopword_t*>(user_arg);
404 
405 	stop_words = stopword_info->cached_stopword;
406 	allocator =  static_cast<ib_alloc_t*>(stopword_info->heap);
407 	heap = static_cast<mem_heap_t*>(allocator->arg);
408 
409 	exp = sel_node->select_list;
410 
411 	/* We only need to read the first column */
412 	dfield = que_node_get_val(exp);
413 
414 	str.f_n_char = 0;
415 	str.f_str = static_cast<byte*>(dfield_get_data(dfield));
416 	str.f_len = dfield_get_len(dfield);
417 
418 	/* Only create new node if it is a value not already existed */
419 	if (str.f_len != UNIV_SQL_NULL
420 	    && rbt_search(stop_words, &parent, &str) != 0) {
421 
422 		fts_tokenizer_word_t	new_word;
423 
424 		new_word.nodes = ib_vector_create(
425 			allocator, sizeof(fts_node_t), 4);
426 
427 		new_word.text.f_str = static_cast<byte*>(
428 			 mem_heap_alloc(heap, str.f_len + 1));
429 
430 		memcpy(new_word.text.f_str, str.f_str, str.f_len);
431 
432 		new_word.text.f_n_char = 0;
433 		new_word.text.f_len = str.f_len;
434 		new_word.text.f_str[str.f_len] = 0;
435 
436 		rbt_insert(stop_words, &new_word, &new_word);
437 	}
438 
439 	return(TRUE);
440 }
441 
442 /******************************************************************//**
443 Load user defined stopword from designated user table
444 @return whether the operation is successful */
445 static
446 bool
fts_load_user_stopword(fts_t * fts,const char * stopword_table_name,fts_stopword_t * stopword_info)447 fts_load_user_stopword(
448 /*===================*/
449 	fts_t*		fts,			/*!< in: FTS struct */
450 	const char*	stopword_table_name,	/*!< in: Stopword table
451 						name */
452 	fts_stopword_t*	stopword_info)		/*!< in: Stopword info */
453 {
454 	if (!fts->dict_locked) {
455 		mutex_enter(&dict_sys.mutex);
456 	}
457 
458 	/* Validate the user table existence in the right format */
459 	bool ret= false;
460 	stopword_info->charset = fts_valid_stopword_table(stopword_table_name);
461 	if (!stopword_info->charset) {
462 cleanup:
463 		if (!fts->dict_locked) {
464 			mutex_exit(&dict_sys.mutex);
465 		}
466 
467 		return ret;
468 	}
469 
470 	trx_t* trx = trx_create();
471 	trx->op_info = "Load user stopword table into FTS cache";
472 
473 	if (!stopword_info->cached_stopword) {
474 		/* Create the stopword RB tree with the stopword column
475 		charset. All comparison will use this charset */
476 		stopword_info->cached_stopword = rbt_create_arg_cmp(
477 			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
478 			(void*)stopword_info->charset);
479 
480 	}
481 
482 	pars_info_t* info = pars_info_create();
483 
484 	pars_info_bind_id(info, "table_stopword", stopword_table_name);
485 
486 	pars_info_bind_function(info, "my_func", fts_read_stopword,
487 				stopword_info);
488 
489 	que_t* graph = fts_parse_sql_no_dict_lock(
490 		info,
491 		"DECLARE FUNCTION my_func;\n"
492 		"DECLARE CURSOR c IS"
493 		" SELECT value"
494 		" FROM $table_stopword;\n"
495 		"BEGIN\n"
496 		"\n"
497 		"OPEN c;\n"
498 		"WHILE 1 = 1 LOOP\n"
499 		"  FETCH c INTO my_func();\n"
500 		"  IF c % NOTFOUND THEN\n"
501 		"    EXIT;\n"
502 		"  END IF;\n"
503 		"END LOOP;\n"
504 		"CLOSE c;");
505 
506 	for (;;) {
507 		dberr_t error = fts_eval_sql(trx, graph);
508 
509 		if (UNIV_LIKELY(error == DB_SUCCESS)) {
510 			fts_sql_commit(trx);
511 			stopword_info->status = STOPWORD_USER_TABLE;
512 			break;
513 		} else {
514 			fts_sql_rollback(trx);
515 
516 			if (error == DB_LOCK_WAIT_TIMEOUT) {
517 				ib::warn() << "Lock wait timeout reading user"
518 					" stopword table. Retrying!";
519 
520 				trx->error_state = DB_SUCCESS;
521 			} else {
522 				ib::error() << "Error '" << error
523 					<< "' while reading user stopword"
524 					" table.";
525 				ret = FALSE;
526 				break;
527 			}
528 		}
529 	}
530 
531 	que_graph_free(graph);
532 	trx->free();
533 	ret = true;
534 	goto cleanup;
535 }
536 
537 /******************************************************************//**
538 Initialize the index cache. */
539 static
540 void
fts_index_cache_init(ib_alloc_t * allocator,fts_index_cache_t * index_cache)541 fts_index_cache_init(
542 /*=================*/
543 	ib_alloc_t*		allocator,	/*!< in: the allocator to use */
544 	fts_index_cache_t*	index_cache)	/*!< in: index cache */
545 {
546 	ulint			i;
547 
548 	ut_a(index_cache->words == NULL);
549 
550 	index_cache->words = rbt_create_arg_cmp(
551 		sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
552 		(void*) index_cache->charset);
553 
554 	ut_a(index_cache->doc_stats == NULL);
555 
556 	index_cache->doc_stats = ib_vector_create(
557 		allocator, sizeof(fts_doc_stats_t), 4);
558 
559 	for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
560 		ut_a(index_cache->ins_graph[i] == NULL);
561 		ut_a(index_cache->sel_graph[i] == NULL);
562 	}
563 }
564 
565 /*********************************************************************//**
566 Initialize FTS cache. */
567 void
fts_cache_init(fts_cache_t * cache)568 fts_cache_init(
569 /*===========*/
570 	fts_cache_t*	cache)		/*!< in: cache to initialize */
571 {
572 	ulint		i;
573 
574 	/* Just to make sure */
575 	ut_a(cache->sync_heap->arg == NULL);
576 
577 	cache->sync_heap->arg = mem_heap_create(1024);
578 
579 	cache->total_size = 0;
580 	cache->total_size_at_sync = 0;
581 
582 	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
583 	cache->deleted_doc_ids = ib_vector_create(
584 		cache->sync_heap, sizeof(doc_id_t), 4);
585 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
586 
587 	/* Reset the cache data for all the FTS indexes. */
588 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
589 		fts_index_cache_t*	index_cache;
590 
591 		index_cache = static_cast<fts_index_cache_t*>(
592 			ib_vector_get(cache->indexes, i));
593 
594 		fts_index_cache_init(cache->sync_heap, index_cache);
595 	}
596 }
597 
598 /****************************************************************//**
599 Create a FTS cache. */
600 fts_cache_t*
fts_cache_create(dict_table_t * table)601 fts_cache_create(
602 /*=============*/
603 	dict_table_t*	table)	/*!< in: table owns the FTS cache */
604 {
605 	mem_heap_t*	heap;
606 	fts_cache_t*	cache;
607 
608 	heap = static_cast<mem_heap_t*>(mem_heap_create(512));
609 
610 	cache = static_cast<fts_cache_t*>(
611 		mem_heap_zalloc(heap, sizeof(*cache)));
612 
613 	cache->cache_heap = heap;
614 
615 	rw_lock_create(fts_cache_rw_lock_key, &cache->lock, SYNC_FTS_CACHE);
616 
617 	rw_lock_create(
618 		fts_cache_init_rw_lock_key, &cache->init_lock,
619 		SYNC_FTS_CACHE_INIT);
620 
621 	mutex_create(LATCH_ID_FTS_DELETE, &cache->deleted_lock);
622 
623 	mutex_create(LATCH_ID_FTS_DOC_ID, &cache->doc_id_lock);
624 
625 	/* This is the heap used to create the cache itself. */
626 	cache->self_heap = ib_heap_allocator_create(heap);
627 
628 	/* This is a transient heap, used for storing sync data. */
629 	cache->sync_heap = ib_heap_allocator_create(heap);
630 	cache->sync_heap->arg = NULL;
631 
632 	cache->sync = static_cast<fts_sync_t*>(
633 		mem_heap_zalloc(heap, sizeof(fts_sync_t)));
634 
635 	cache->sync->table = table;
636 	cache->sync->event = os_event_create(0);
637 
638 	/* Create the index cache vector that will hold the inverted indexes. */
639 	cache->indexes = ib_vector_create(
640 		cache->self_heap, sizeof(fts_index_cache_t), 2);
641 
642 	fts_cache_init(cache);
643 
644 	cache->stopword_info.cached_stopword = NULL;
645 	cache->stopword_info.charset = NULL;
646 
647 	cache->stopword_info.heap = cache->self_heap;
648 
649 	cache->stopword_info.status = STOPWORD_NOT_INIT;
650 
651 	return(cache);
652 }
653 
654 /*******************************************************************//**
655 Add a newly create index into FTS cache */
656 void
fts_add_index(dict_index_t * index,dict_table_t * table)657 fts_add_index(
658 /*==========*/
659 	dict_index_t*	index,		/*!< FTS index to be added */
660 	dict_table_t*	table)		/*!< table */
661 {
662 	fts_t*			fts = table->fts;
663 	fts_cache_t*		cache;
664 	fts_index_cache_t*	index_cache;
665 
666 	ut_ad(fts);
667 	cache = table->fts->cache;
668 
669 	rw_lock_x_lock(&cache->init_lock);
670 
671 	ib_vector_push(fts->indexes, &index);
672 
673 	index_cache = fts_find_index_cache(cache, index);
674 
675 	if (!index_cache) {
676 		/* Add new index cache structure */
677 		index_cache = fts_cache_index_cache_create(table, index);
678 	}
679 
680 	rw_lock_x_unlock(&cache->init_lock);
681 }
682 
683 /*******************************************************************//**
684 recalibrate get_doc structure after index_cache in cache->indexes changed */
685 static
686 void
fts_reset_get_doc(fts_cache_t * cache)687 fts_reset_get_doc(
688 /*==============*/
689 	fts_cache_t*	cache)	/*!< in: FTS index cache */
690 {
691 	fts_get_doc_t*  get_doc;
692 	ulint		i;
693 
694 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
695 
696 	ib_vector_reset(cache->get_docs);
697 
698 	for (i = 0; i < ib_vector_size(cache->indexes); i++) {
699 		fts_index_cache_t*	ind_cache;
700 
701 		ind_cache = static_cast<fts_index_cache_t*>(
702 			ib_vector_get(cache->indexes, i));
703 
704 		get_doc = static_cast<fts_get_doc_t*>(
705 			ib_vector_push(cache->get_docs, NULL));
706 
707 		memset(get_doc, 0x0, sizeof(*get_doc));
708 
709 		get_doc->index_cache = ind_cache;
710 		get_doc->cache = cache;
711 	}
712 
713 	ut_ad(ib_vector_size(cache->get_docs)
714 	      == ib_vector_size(cache->indexes));
715 }
716 
717 /*******************************************************************//**
718 Check an index is in the table->indexes list
719 @return TRUE if it exists */
720 static
721 ibool
fts_in_dict_index(dict_table_t * table,dict_index_t * index_check)722 fts_in_dict_index(
723 /*==============*/
724 	dict_table_t*	table,		/*!< in: Table */
725 	dict_index_t*	index_check)	/*!< in: index to be checked */
726 {
727 	dict_index_t*	index;
728 
729 	for (index = dict_table_get_first_index(table);
730 	     index != NULL;
731 	     index = dict_table_get_next_index(index)) {
732 
733 		if (index == index_check) {
734 			return(TRUE);
735 		}
736 	}
737 
738 	return(FALSE);
739 }
740 
741 /*******************************************************************//**
742 Check an index is in the fts->cache->indexes list
743 @return TRUE if it exists */
744 static
745 ibool
fts_in_index_cache(dict_table_t * table,dict_index_t * index)746 fts_in_index_cache(
747 /*===============*/
748 	dict_table_t*	table,	/*!< in: Table */
749 	dict_index_t*	index)	/*!< in: index to be checked */
750 {
751 	ulint	i;
752 
753 	for (i = 0; i < ib_vector_size(table->fts->cache->indexes); i++) {
754 		fts_index_cache_t*      index_cache;
755 
756 		index_cache = static_cast<fts_index_cache_t*>(
757 			ib_vector_get(table->fts->cache->indexes, i));
758 
759 		if (index_cache->index == index) {
760 			return(TRUE);
761 		}
762 	}
763 
764 	return(FALSE);
765 }
766 
767 /*******************************************************************//**
768 Check indexes in the fts->indexes is also present in index cache and
769 table->indexes list
770 @return TRUE if all indexes match */
771 ibool
fts_check_cached_index(dict_table_t * table)772 fts_check_cached_index(
773 /*===================*/
774 	dict_table_t*	table)	/*!< in: Table where indexes are dropped */
775 {
776 	ulint	i;
777 
778 	if (!table->fts || !table->fts->cache) {
779 		return(TRUE);
780 	}
781 
782 	ut_a(ib_vector_size(table->fts->indexes)
783 	      == ib_vector_size(table->fts->cache->indexes));
784 
785 	for (i = 0; i < ib_vector_size(table->fts->indexes); i++) {
786 		dict_index_t*	index;
787 
788 		index = static_cast<dict_index_t*>(
789 			ib_vector_getp(table->fts->indexes, i));
790 
791 		if (!fts_in_index_cache(table, index)) {
792 			return(FALSE);
793 		}
794 
795 		if (!fts_in_dict_index(table, index)) {
796 			return(FALSE);
797 		}
798 	}
799 
800 	return(TRUE);
801 }
802 
803 /** Clear all fts resources when there is no internal DOC_ID
804 and there are no new fts index to add.
805 @param[in,out]	table	table  where fts is to be freed
806 @param[in]	trx	transaction to drop all fts tables */
fts_clear_all(dict_table_t * table,trx_t * trx)807 void fts_clear_all(dict_table_t *table, trx_t *trx)
808 {
809   if (DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID) ||
810       !table->fts ||
811       !ib_vector_is_empty(table->fts->indexes))
812     return;
813 
814   for (const dict_index_t *index= dict_table_get_first_index(table);
815        index; index= dict_table_get_next_index(index))
816     if (index->type & DICT_FTS)
817       return;
818 
819   fts_optimize_remove_table(table);
820 
821   fts_drop_tables(trx, table);
822   fts_free(table);
823   DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
824 }
825 
826 /*******************************************************************//**
827 Drop auxiliary tables related to an FTS index
828 @return DB_SUCCESS or error number */
829 dberr_t
fts_drop_index(dict_table_t * table,dict_index_t * index,trx_t * trx)830 fts_drop_index(
831 /*===========*/
832 	dict_table_t*	table,	/*!< in: Table where indexes are dropped */
833 	dict_index_t*	index,	/*!< in: Index to be dropped */
834 	trx_t*		trx)	/*!< in: Transaction for the drop */
835 {
836 	ib_vector_t*	indexes = table->fts->indexes;
837 	dberr_t		err = DB_SUCCESS;
838 
839 	ut_a(indexes);
840 
841 	if ((ib_vector_size(indexes) == 1
842 	     && (index == static_cast<dict_index_t*>(
843 			ib_vector_getp(table->fts->indexes, 0)))
844 	     && DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID))
845 	    || ib_vector_is_empty(indexes)) {
846 		doc_id_t	current_doc_id;
847 		doc_id_t	first_doc_id;
848 
849 		/* If we are dropping the only FTS index of the table,
850 		remove it from optimize thread */
851 		fts_optimize_remove_table(table);
852 
853 		DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS);
854 
855 		while (index->index_fts_syncing
856 		       && !trx_is_interrupted(trx)) {
857 			DICT_BG_YIELD(trx);
858 		}
859 
860 		current_doc_id = table->fts->cache->next_doc_id;
861 		first_doc_id = table->fts->cache->first_doc_id;
862 		fts_cache_clear(table->fts->cache);
863 		fts_cache_destroy(table->fts->cache);
864 		table->fts->cache = fts_cache_create(table);
865 		table->fts->cache->next_doc_id = current_doc_id;
866 		table->fts->cache->first_doc_id = first_doc_id;
867 	} else {
868 		fts_cache_t*            cache = table->fts->cache;
869 		fts_index_cache_t*      index_cache;
870 
871 		rw_lock_x_lock(&cache->init_lock);
872 
873 		index_cache = fts_find_index_cache(cache, index);
874 
875 		if (index_cache != NULL) {
876 			while (index->index_fts_syncing
877 			       && !trx_is_interrupted(trx)) {
878 				DICT_BG_YIELD(trx);
879 			}
880 			if (index_cache->words) {
881 				fts_words_free(index_cache->words);
882 				rbt_free(index_cache->words);
883 			}
884 
885 			ib_vector_remove(cache->indexes, *(void**) index_cache);
886 		}
887 
888 		if (cache->get_docs) {
889 			fts_reset_get_doc(cache);
890 		}
891 
892 		rw_lock_x_unlock(&cache->init_lock);
893 	}
894 
895 	err = fts_drop_index_tables(trx, index);
896 
897 	ib_vector_remove(indexes, (const void*) index);
898 
899 	return(err);
900 }
901 
902 /****************************************************************//**
903 Free the query graph but check whether dict_sys.mutex is already
904 held */
905 void
fts_que_graph_free_check_lock(fts_table_t * fts_table,const fts_index_cache_t * index_cache,que_t * graph)906 fts_que_graph_free_check_lock(
907 /*==========================*/
908 	fts_table_t*		fts_table,	/*!< in: FTS table */
909 	const fts_index_cache_t*index_cache,	/*!< in: FTS index cache */
910 	que_t*			graph)		/*!< in: query graph */
911 {
912 	bool	has_dict = FALSE;
913 
914 	if (fts_table && fts_table->table) {
915 		ut_ad(fts_table->table->fts);
916 
917 		has_dict = fts_table->table->fts->dict_locked;
918 	} else if (index_cache) {
919 		ut_ad(index_cache->index->table->fts);
920 
921 		has_dict = index_cache->index->table->fts->dict_locked;
922 	}
923 
924 	if (!has_dict) {
925 		mutex_enter(&dict_sys.mutex);
926 	}
927 
928 	ut_ad(mutex_own(&dict_sys.mutex));
929 
930 	que_graph_free(graph);
931 
932 	if (!has_dict) {
933 		mutex_exit(&dict_sys.mutex);
934 	}
935 }
936 
937 /****************************************************************//**
938 Create an FTS index cache. */
939 CHARSET_INFO*
fts_index_get_charset(dict_index_t * index)940 fts_index_get_charset(
941 /*==================*/
942 	dict_index_t*		index)		/*!< in: FTS index */
943 {
944 	CHARSET_INFO*		charset = NULL;
945 	dict_field_t*		field;
946 	ulint			prtype;
947 
948 	field = dict_index_get_nth_field(index, 0);
949 	prtype = field->col->prtype;
950 
951 	charset = fts_get_charset(prtype);
952 
953 #ifdef FTS_DEBUG
954 	/* Set up charset info for this index. Please note all
955 	field of the FTS index should have the same charset */
956 	for (i = 1; i < index->n_fields; i++) {
957 		CHARSET_INFO*   fld_charset;
958 
959 		field = dict_index_get_nth_field(index, i);
960 		prtype = field->col->prtype;
961 
962 		fld_charset = fts_get_charset(prtype);
963 
964 		/* All FTS columns should have the same charset */
965 		if (charset) {
966 			ut_a(charset == fld_charset);
967 		} else {
968 			charset = fld_charset;
969 		}
970 	}
971 #endif
972 
973 	return(charset);
974 
975 }
976 /****************************************************************//**
977 Create an FTS index cache.
978 @return Index Cache */
979 fts_index_cache_t*
fts_cache_index_cache_create(dict_table_t * table,dict_index_t * index)980 fts_cache_index_cache_create(
981 /*=========================*/
982 	dict_table_t*		table,		/*!< in: table with FTS index */
983 	dict_index_t*		index)		/*!< in: FTS index */
984 {
985 	ulint			n_bytes;
986 	fts_index_cache_t*	index_cache;
987 	fts_cache_t*		cache = table->fts->cache;
988 
989 	ut_a(cache != NULL);
990 
991 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
992 
993 	/* Must not already exist in the cache vector. */
994 	ut_a(fts_find_index_cache(cache, index) == NULL);
995 
996 	index_cache = static_cast<fts_index_cache_t*>(
997 		ib_vector_push(cache->indexes, NULL));
998 
999 	memset(index_cache, 0x0, sizeof(*index_cache));
1000 
1001 	index_cache->index = index;
1002 
1003 	index_cache->charset = fts_index_get_charset(index);
1004 
1005 	n_bytes = sizeof(que_t*) * FTS_NUM_AUX_INDEX;
1006 
1007 	index_cache->ins_graph = static_cast<que_t**>(
1008 		mem_heap_zalloc(static_cast<mem_heap_t*>(
1009 			cache->self_heap->arg), n_bytes));
1010 
1011 	index_cache->sel_graph = static_cast<que_t**>(
1012 		mem_heap_zalloc(static_cast<mem_heap_t*>(
1013 			cache->self_heap->arg), n_bytes));
1014 
1015 	fts_index_cache_init(cache->sync_heap, index_cache);
1016 
1017 	if (cache->get_docs) {
1018 		fts_reset_get_doc(cache);
1019 	}
1020 
1021 	return(index_cache);
1022 }
1023 
1024 /****************************************************************//**
1025 Release all resources help by the words rb tree e.g., the node ilist. */
1026 static
1027 void
fts_words_free(ib_rbt_t * words)1028 fts_words_free(
1029 /*===========*/
1030 	ib_rbt_t*	words)			/*!< in: rb tree of words */
1031 {
1032 	const ib_rbt_node_t*	rbt_node;
1033 
1034 	/* Free the resources held by a word. */
1035 	for (rbt_node = rbt_first(words);
1036 	     rbt_node != NULL;
1037 	     rbt_node = rbt_first(words)) {
1038 
1039 		ulint			i;
1040 		fts_tokenizer_word_t*	word;
1041 
1042 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
1043 
1044 		/* Free the ilists of this word. */
1045 		for (i = 0; i < ib_vector_size(word->nodes); ++i) {
1046 
1047 			fts_node_t* fts_node = static_cast<fts_node_t*>(
1048 				ib_vector_get(word->nodes, i));
1049 
1050 			ut_free(fts_node->ilist);
1051 			fts_node->ilist = NULL;
1052 		}
1053 
1054 		/* NOTE: We are responsible for free'ing the node */
1055 		ut_free(rbt_remove_node(words, rbt_node));
1056 	}
1057 }
1058 
1059 /** Clear cache.
1060 @param[in,out]	cache	fts cache */
1061 void
fts_cache_clear(fts_cache_t * cache)1062 fts_cache_clear(
1063 	fts_cache_t*	cache)
1064 {
1065 	ulint		i;
1066 
1067 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1068 		ulint			j;
1069 		fts_index_cache_t*	index_cache;
1070 
1071 		index_cache = static_cast<fts_index_cache_t*>(
1072 			ib_vector_get(cache->indexes, i));
1073 
1074 		fts_words_free(index_cache->words);
1075 
1076 		rbt_free(index_cache->words);
1077 
1078 		index_cache->words = NULL;
1079 
1080 		for (j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1081 
1082 			if (index_cache->ins_graph[j] != NULL) {
1083 
1084 				fts_que_graph_free_check_lock(
1085 					NULL, index_cache,
1086 					index_cache->ins_graph[j]);
1087 
1088 				index_cache->ins_graph[j] = NULL;
1089 			}
1090 
1091 			if (index_cache->sel_graph[j] != NULL) {
1092 
1093 				fts_que_graph_free_check_lock(
1094 					NULL, index_cache,
1095 					index_cache->sel_graph[j]);
1096 
1097 				index_cache->sel_graph[j] = NULL;
1098 			}
1099 		}
1100 
1101 		index_cache->doc_stats = NULL;
1102 	}
1103 
1104 	fts_need_sync = false;
1105 
1106 	cache->total_size = 0;
1107 
1108 	mutex_enter((ib_mutex_t*) &cache->deleted_lock);
1109 	cache->deleted_doc_ids = NULL;
1110 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
1111 
1112 	mem_heap_free(static_cast<mem_heap_t*>(cache->sync_heap->arg));
1113 	cache->sync_heap->arg = NULL;
1114 }
1115 
1116 /*********************************************************************//**
1117 Search the index specific cache for a particular FTS index.
1118 @return the index cache else NULL */
1119 UNIV_INLINE
1120 fts_index_cache_t*
fts_get_index_cache(fts_cache_t * cache,const dict_index_t * index)1121 fts_get_index_cache(
1122 /*================*/
1123 	fts_cache_t*		cache,		/*!< in: cache to search */
1124 	const dict_index_t*	index)		/*!< in: index to search for */
1125 {
1126 	ulint			i;
1127 
1128 	ut_ad(rw_lock_own((rw_lock_t*) &cache->lock, RW_LOCK_X)
1129 	      || rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1130 
1131 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
1132 		fts_index_cache_t*	index_cache;
1133 
1134 		index_cache = static_cast<fts_index_cache_t*>(
1135 			ib_vector_get(cache->indexes, i));
1136 
1137 		if (index_cache->index == index) {
1138 
1139 			return(index_cache);
1140 		}
1141 	}
1142 
1143 	return(NULL);
1144 }
1145 
1146 #ifdef FTS_DEBUG
1147 /*********************************************************************//**
1148 Search the index cache for a get_doc structure.
1149 @return the fts_get_doc_t item else NULL */
1150 static
1151 fts_get_doc_t*
fts_get_index_get_doc(fts_cache_t * cache,const dict_index_t * index)1152 fts_get_index_get_doc(
1153 /*==================*/
1154 	fts_cache_t*		cache,		/*!< in: cache to search */
1155 	const dict_index_t*	index)		/*!< in: index to search for */
1156 {
1157 	ulint			i;
1158 
1159 	ut_ad(rw_lock_own((rw_lock_t*) &cache->init_lock, RW_LOCK_X));
1160 
1161 	for (i = 0; i < ib_vector_size(cache->get_docs); ++i) {
1162 		fts_get_doc_t*	get_doc;
1163 
1164 		get_doc = static_cast<fts_get_doc_t*>(
1165 			ib_vector_get(cache->get_docs, i));
1166 
1167 		if (get_doc->index_cache->index == index) {
1168 
1169 			return(get_doc);
1170 		}
1171 	}
1172 
1173 	return(NULL);
1174 }
1175 #endif
1176 
1177 /**********************************************************************//**
1178 Find an existing word, or if not found, create one and return it.
1179 @return specified word token */
1180 static
1181 fts_tokenizer_word_t*
fts_tokenizer_word_get(fts_cache_t * cache,fts_index_cache_t * index_cache,fts_string_t * text)1182 fts_tokenizer_word_get(
1183 /*===================*/
1184 	fts_cache_t*	cache,			/*!< in: cache */
1185 	fts_index_cache_t*
1186 			index_cache,		/*!< in: index cache */
1187 	fts_string_t*	text)			/*!< in: node text */
1188 {
1189 	fts_tokenizer_word_t*	word;
1190 	ib_rbt_bound_t		parent;
1191 
1192 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1193 
1194 	/* If it is a stopword, do not index it */
1195 	if (!fts_check_token(text,
1196 		    cache->stopword_info.cached_stopword,
1197 		    index_cache->charset)) {
1198 
1199 		return(NULL);
1200 	}
1201 
1202 	/* Check if we found a match, if not then add word to tree. */
1203 	if (rbt_search(index_cache->words, &parent, text) != 0) {
1204 		mem_heap_t*		heap;
1205 		fts_tokenizer_word_t	new_word;
1206 
1207 		heap = static_cast<mem_heap_t*>(cache->sync_heap->arg);
1208 
1209 		new_word.nodes = ib_vector_create(
1210 			cache->sync_heap, sizeof(fts_node_t), 4);
1211 
1212 		fts_string_dup(&new_word.text, text, heap);
1213 
1214 		parent.last = rbt_add_node(
1215 			index_cache->words, &parent, &new_word);
1216 
1217 		/* Take into account the RB tree memory use and the vector. */
1218 		cache->total_size += sizeof(new_word)
1219 			+ sizeof(ib_rbt_node_t)
1220 			+ text->f_len
1221 			+ (sizeof(fts_node_t) * 4)
1222 			+ sizeof(*new_word.nodes);
1223 
1224 		ut_ad(rbt_validate(index_cache->words));
1225 	}
1226 
1227 	word = rbt_value(fts_tokenizer_word_t, parent.last);
1228 
1229 	return(word);
1230 }
1231 
1232 /**********************************************************************//**
1233 Add the given doc_id/word positions to the given node's ilist. */
1234 void
fts_cache_node_add_positions(fts_cache_t * cache,fts_node_t * node,doc_id_t doc_id,ib_vector_t * positions)1235 fts_cache_node_add_positions(
1236 /*=========================*/
1237 	fts_cache_t*	cache,		/*!< in: cache */
1238 	fts_node_t*	node,		/*!< in: word node */
1239 	doc_id_t	doc_id,		/*!< in: doc id */
1240 	ib_vector_t*	positions)	/*!< in: fts_token_t::positions */
1241 {
1242 	ulint		i;
1243 	byte*		ptr;
1244 	byte*		ilist;
1245 	ulint		enc_len;
1246 	ulint		last_pos;
1247 	byte*		ptr_start;
1248 	doc_id_t	doc_id_delta;
1249 
1250 #ifdef UNIV_DEBUG
1251 	if (cache) {
1252 		ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1253 	}
1254 #endif /* UNIV_DEBUG */
1255 
1256 	ut_ad(doc_id >= node->last_doc_id);
1257 
1258 	/* Calculate the space required to store the ilist. */
1259 	doc_id_delta = doc_id - node->last_doc_id;
1260 	enc_len = fts_get_encoded_len(doc_id_delta);
1261 
1262 	last_pos = 0;
1263 	for (i = 0; i < ib_vector_size(positions); i++) {
1264 		ulint	pos = *(static_cast<ulint*>(
1265 			ib_vector_get(positions, i)));
1266 
1267 		ut_ad(last_pos == 0 || pos > last_pos);
1268 
1269 		enc_len += fts_get_encoded_len(pos - last_pos);
1270 		last_pos = pos;
1271 	}
1272 
1273 	/* The 0x00 byte at the end of the token positions list. */
1274 	enc_len++;
1275 
1276 	if ((node->ilist_size_alloc - node->ilist_size) >= enc_len) {
1277 		/* No need to allocate more space, we can fit in the new
1278 		data at the end of the old one. */
1279 		ilist = NULL;
1280 		ptr = node->ilist + node->ilist_size;
1281 	} else {
1282 		ulint	new_size = node->ilist_size + enc_len;
1283 
1284 		/* Over-reserve space by a fixed size for small lengths and
1285 		by 20% for lengths >= 48 bytes. */
1286 		if (new_size < 16) {
1287 			new_size = 16;
1288 		} else if (new_size < 32) {
1289 			new_size = 32;
1290 		} else if (new_size < 48) {
1291 			new_size = 48;
1292 		} else {
1293 			new_size = (ulint)(1.2 * new_size);
1294 		}
1295 
1296 		ilist = static_cast<byte*>(ut_malloc_nokey(new_size));
1297 		ptr = ilist + node->ilist_size;
1298 
1299 		node->ilist_size_alloc = new_size;
1300 		if (cache) {
1301 			cache->total_size += new_size;
1302 		}
1303 	}
1304 
1305 	ptr_start = ptr;
1306 
1307 	/* Encode the new fragment. */
1308 	ptr = fts_encode_int(doc_id_delta, ptr);
1309 
1310 	last_pos = 0;
1311 	for (i = 0; i < ib_vector_size(positions); i++) {
1312 		ulint	pos = *(static_cast<ulint*>(
1313 			 ib_vector_get(positions, i)));
1314 
1315 		ptr = fts_encode_int(pos - last_pos, ptr);
1316 		last_pos = pos;
1317 	}
1318 
1319 	*ptr++ = 0;
1320 
1321 	ut_a(enc_len == (ulint)(ptr - ptr_start));
1322 
1323 	if (ilist) {
1324 		/* Copy old ilist to the start of the new one and switch the
1325 		new one into place in the node. */
1326 		if (node->ilist_size > 0) {
1327 			memcpy(ilist, node->ilist, node->ilist_size);
1328 			ut_free(node->ilist);
1329 			if (cache) {
1330 				cache->total_size -= node->ilist_size;
1331 			}
1332 		}
1333 
1334 		node->ilist = ilist;
1335 	}
1336 
1337 	node->ilist_size += enc_len;
1338 
1339 	if (node->first_doc_id == FTS_NULL_DOC_ID) {
1340 		node->first_doc_id = doc_id;
1341 	}
1342 
1343 	node->last_doc_id = doc_id;
1344 	++node->doc_count;
1345 }
1346 
1347 /**********************************************************************//**
1348 Add document to the cache. */
1349 static
1350 void
fts_cache_add_doc(fts_cache_t * cache,fts_index_cache_t * index_cache,doc_id_t doc_id,ib_rbt_t * tokens)1351 fts_cache_add_doc(
1352 /*==============*/
1353 	fts_cache_t*	cache,			/*!< in: cache */
1354 	fts_index_cache_t*
1355 			index_cache,		/*!< in: index cache */
1356 	doc_id_t	doc_id,			/*!< in: doc id to add */
1357 	ib_rbt_t*	tokens)			/*!< in: document tokens */
1358 {
1359 	const ib_rbt_node_t*	node;
1360 	ulint			n_words;
1361 	fts_doc_stats_t*	doc_stats;
1362 
1363 	if (!tokens) {
1364 		return;
1365 	}
1366 
1367 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
1368 
1369 	n_words = rbt_size(tokens);
1370 
1371 	for (node = rbt_first(tokens); node; node = rbt_first(tokens)) {
1372 
1373 		fts_tokenizer_word_t*	word;
1374 		fts_node_t*		fts_node = NULL;
1375 		fts_token_t*		token = rbt_value(fts_token_t, node);
1376 
1377 		/* Find and/or add token to the cache. */
1378 		word = fts_tokenizer_word_get(
1379 			cache, index_cache, &token->text);
1380 
1381 		if (!word) {
1382 			ut_free(rbt_remove_node(tokens, node));
1383 			continue;
1384 		}
1385 
1386 		if (ib_vector_size(word->nodes) > 0) {
1387 			fts_node = static_cast<fts_node_t*>(
1388 				ib_vector_last(word->nodes));
1389 		}
1390 
1391 		if (fts_node == NULL || fts_node->synced
1392 		    || fts_node->ilist_size > FTS_ILIST_MAX_SIZE
1393 		    || doc_id < fts_node->last_doc_id) {
1394 
1395 			fts_node = static_cast<fts_node_t*>(
1396 				ib_vector_push(word->nodes, NULL));
1397 
1398 			memset(fts_node, 0x0, sizeof(*fts_node));
1399 
1400 			cache->total_size += sizeof(*fts_node);
1401 		}
1402 
1403 		fts_cache_node_add_positions(
1404 			cache, fts_node, doc_id, token->positions);
1405 
1406 		ut_free(rbt_remove_node(tokens, node));
1407 	}
1408 
1409 	ut_a(rbt_empty(tokens));
1410 
1411 	/* Add to doc ids processed so far. */
1412 	doc_stats = static_cast<fts_doc_stats_t*>(
1413 		ib_vector_push(index_cache->doc_stats, NULL));
1414 
1415 	doc_stats->doc_id = doc_id;
1416 	doc_stats->word_count = n_words;
1417 
1418 	/* Add the doc stats memory usage too. */
1419 	cache->total_size += sizeof(*doc_stats);
1420 
1421 	if (doc_id > cache->sync->max_doc_id) {
1422 		cache->sync->max_doc_id = doc_id;
1423 	}
1424 }
1425 
1426 /****************************************************************//**
1427 Drops a table. If the table can't be found we return a SUCCESS code.
1428 @return DB_SUCCESS or error code */
1429 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1430 dberr_t
fts_drop_table(trx_t * trx,const char * table_name)1431 fts_drop_table(
1432 /*===========*/
1433 	trx_t*		trx,			/*!< in: transaction */
1434 	const char*	table_name)		/*!< in: table to drop */
1435 {
1436 	dict_table_t*	table;
1437 	dberr_t		error = DB_SUCCESS;
1438 
1439 	/* Check that the table exists in our data dictionary.
1440 	Similar to regular drop table case, we will open table with
1441 	DICT_ERR_IGNORE_INDEX_ROOT and DICT_ERR_IGNORE_CORRUPT option */
1442 	table = dict_table_open_on_name(
1443 		table_name, TRUE, FALSE,
1444 		static_cast<dict_err_ignore_t>(
1445                         DICT_ERR_IGNORE_INDEX_ROOT | DICT_ERR_IGNORE_CORRUPT));
1446 
1447 	if (table != 0) {
1448 
1449 		dict_table_close(table, TRUE, FALSE);
1450 
1451 		/* Pass nonatomic=false (dont allow data dict unlock),
1452 		because the transaction may hold locks on SYS_* tables from
1453 		previous calls to fts_drop_table(). */
1454 		error = row_drop_table_for_mysql(table_name, trx,
1455 						 SQLCOM_DROP_DB, false, false);
1456 
1457 		if (UNIV_UNLIKELY(error != DB_SUCCESS)) {
1458 			ib::error() << "Unable to drop FTS index aux table "
1459 				<< table_name << ": " << error;
1460 		}
1461 	} else {
1462 		error = DB_FAIL;
1463 	}
1464 
1465 	return(error);
1466 }
1467 
1468 /****************************************************************//**
1469 Rename a single auxiliary table due to database name change.
1470 @return DB_SUCCESS or error code */
1471 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1472 dberr_t
fts_rename_one_aux_table(const char * new_name,const char * fts_table_old_name,trx_t * trx)1473 fts_rename_one_aux_table(
1474 /*=====================*/
1475 	const char*	new_name,		/*!< in: new parent tbl name */
1476 	const char*	fts_table_old_name,	/*!< in: old aux tbl name */
1477 	trx_t*		trx)			/*!< in: transaction */
1478 {
1479 	char	fts_table_new_name[MAX_TABLE_NAME_LEN];
1480 	ulint	new_db_name_len = dict_get_db_name_len(new_name);
1481 	ulint	old_db_name_len = dict_get_db_name_len(fts_table_old_name);
1482 	ulint	table_new_name_len = strlen(fts_table_old_name)
1483 				     + new_db_name_len - old_db_name_len;
1484 
1485 	/* Check if the new and old database names are the same, if so,
1486 	nothing to do */
1487 	ut_ad((new_db_name_len != old_db_name_len)
1488 	      || strncmp(new_name, fts_table_old_name, old_db_name_len) != 0);
1489 
1490 	/* Get the database name from "new_name", and table name
1491 	from the fts_table_old_name */
1492 	strncpy(fts_table_new_name, new_name, new_db_name_len);
1493 	strncpy(fts_table_new_name + new_db_name_len,
1494 	       strchr(fts_table_old_name, '/'),
1495 	       table_new_name_len - new_db_name_len);
1496 	fts_table_new_name[table_new_name_len] = 0;
1497 
1498 	return row_rename_table_for_mysql(
1499 		fts_table_old_name, fts_table_new_name, trx, false, false);
1500 }
1501 
1502 /****************************************************************//**
1503 Rename auxiliary tables for all fts index for a table. This(rename)
1504 is due to database name change
1505 @return DB_SUCCESS or error code */
1506 dberr_t
fts_rename_aux_tables(dict_table_t * table,const char * new_name,trx_t * trx)1507 fts_rename_aux_tables(
1508 /*==================*/
1509 	dict_table_t*	table,		/*!< in: user Table */
1510 	const char*     new_name,       /*!< in: new table name */
1511 	trx_t*		trx)		/*!< in: transaction */
1512 {
1513 	ulint		i;
1514 	fts_table_t	fts_table;
1515 
1516 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1517 
1518 	dberr_t err = DB_SUCCESS;
1519 	char old_table_name[MAX_FULL_NAME_LEN];
1520 
1521 	/* Rename common auxiliary tables */
1522 	for (i = 0; fts_common_tables[i] != NULL; ++i) {
1523 		fts_table.suffix = fts_common_tables[i];
1524 		fts_get_table_name(&fts_table, old_table_name, true);
1525 
1526 		err = fts_rename_one_aux_table(new_name, old_table_name, trx);
1527 
1528 		if (err != DB_SUCCESS) {
1529 			return(err);
1530 		}
1531 	}
1532 
1533 	fts_t*	fts = table->fts;
1534 
1535 	/* Rename index specific auxiliary tables */
1536 	for (i = 0; fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1537 	     ++i) {
1538 		dict_index_t*	index;
1539 
1540 		index = static_cast<dict_index_t*>(
1541 			ib_vector_getp(fts->indexes, i));
1542 
1543 		FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1544 
1545 		for (ulint j = 0; j < FTS_NUM_AUX_INDEX; ++j) {
1546 			fts_table.suffix = fts_get_suffix(j);
1547 			fts_get_table_name(&fts_table, old_table_name, true);
1548 
1549 			err = fts_rename_one_aux_table(
1550 				new_name, old_table_name, trx);
1551 
1552 			DBUG_EXECUTE_IF("fts_rename_failure",
1553 					err = DB_DEADLOCK;
1554 					fts_sql_rollback(trx););
1555 
1556 			if (err != DB_SUCCESS) {
1557 				return(err);
1558 			}
1559 		}
1560 	}
1561 
1562 	return(DB_SUCCESS);
1563 }
1564 
1565 /** Drops the common ancillary tables needed for supporting an FTS index
1566 on the given table. row_mysql_lock_data_dictionary must have been called
1567 before this.
1568 @param[in]	trx		transaction to drop fts common table
1569 @param[in]	fts_table	table with an FTS index
1570 @param[in]	drop_orphan	True if the function is used to drop
1571 				orphaned table
1572 @return DB_SUCCESS or error code */
1573 static dberr_t
fts_drop_common_tables(trx_t * trx,fts_table_t * fts_table,bool drop_orphan=false)1574 fts_drop_common_tables(
1575 	trx_t*		trx,
1576 	fts_table_t*	fts_table,
1577 	bool		drop_orphan=false)
1578 {
1579 	ulint		i;
1580 	dberr_t		error = DB_SUCCESS;
1581 
1582 	for (i = 0; fts_common_tables[i] != NULL; ++i) {
1583 		dberr_t	err;
1584 		char	table_name[MAX_FULL_NAME_LEN];
1585 
1586 		fts_table->suffix = fts_common_tables[i];
1587 		fts_get_table_name(fts_table, table_name, true);
1588 
1589 		err = fts_drop_table(trx, table_name);
1590 
1591 		/* We only return the status of the last error. */
1592 		if (err != DB_SUCCESS && err != DB_FAIL) {
1593 			error = err;
1594 		}
1595 
1596 		if (drop_orphan && err == DB_FAIL) {
1597 			char* path = fil_make_filepath(
1598 					NULL, table_name, IBD, false);
1599 			if (path != NULL) {
1600 				os_file_delete_if_exists(
1601 					innodb_data_file_key, path, NULL);
1602 				ut_free(path);
1603 			}
1604 		}
1605 	}
1606 
1607 	return(error);
1608 }
1609 
1610 /****************************************************************//**
1611 Since we do a horizontal split on the index table, we need to drop
1612 all the split tables.
1613 @return DB_SUCCESS or error code */
1614 static
1615 dberr_t
fts_drop_index_split_tables(trx_t * trx,dict_index_t * index)1616 fts_drop_index_split_tables(
1617 /*========================*/
1618 	trx_t*		trx,			/*!< in: transaction */
1619 	dict_index_t*	index)			/*!< in: fts instance */
1620 
1621 {
1622 	ulint		i;
1623 	fts_table_t	fts_table;
1624 	dberr_t		error = DB_SUCCESS;
1625 
1626 	FTS_INIT_INDEX_TABLE(&fts_table, NULL, FTS_INDEX_TABLE, index);
1627 
1628 	for (i = 0; i < FTS_NUM_AUX_INDEX; ++i) {
1629 		dberr_t	err;
1630 		char	table_name[MAX_FULL_NAME_LEN];
1631 
1632 		fts_table.suffix = fts_get_suffix(i);
1633 		fts_get_table_name(&fts_table, table_name, true);
1634 
1635 		err = fts_drop_table(trx, table_name);
1636 
1637 		/* We only return the status of the last error. */
1638 		if (err != DB_SUCCESS && err != DB_FAIL) {
1639 			error = err;
1640 		}
1641 	}
1642 
1643 	return(error);
1644 }
1645 
1646 /****************************************************************//**
1647 Drops FTS auxiliary tables for an FTS index
1648 @return DB_SUCCESS or error code */
1649 dberr_t
fts_drop_index_tables(trx_t * trx,dict_index_t * index)1650 fts_drop_index_tables(
1651 /*==================*/
1652 	trx_t*		trx,		/*!< in: transaction */
1653 	dict_index_t*	index)		/*!< in: Index to drop */
1654 {
1655 	return(fts_drop_index_split_tables(trx, index));
1656 }
1657 
1658 /****************************************************************//**
1659 Drops FTS ancillary tables needed for supporting an FTS index
1660 on the given table. row_mysql_lock_data_dictionary must have been called
1661 before this.
1662 @return DB_SUCCESS or error code */
1663 static MY_ATTRIBUTE((nonnull, warn_unused_result))
1664 dberr_t
fts_drop_all_index_tables(trx_t * trx,fts_t * fts)1665 fts_drop_all_index_tables(
1666 /*======================*/
1667 	trx_t*		trx,			/*!< in: transaction */
1668 	fts_t*		fts)			/*!< in: fts instance */
1669 {
1670 	dberr_t		error = DB_SUCCESS;
1671 
1672 	for (ulint i = 0;
1673 	     fts->indexes != 0 && i < ib_vector_size(fts->indexes);
1674 	     ++i) {
1675 
1676 		dberr_t		err;
1677 		dict_index_t*	index;
1678 
1679 		index = static_cast<dict_index_t*>(
1680 			ib_vector_getp(fts->indexes, i));
1681 
1682 		err = fts_drop_index_tables(trx, index);
1683 
1684 		if (err != DB_SUCCESS) {
1685 			error = err;
1686 		}
1687 	}
1688 
1689 	return(error);
1690 }
1691 
1692 /*********************************************************************//**
1693 Drops the ancillary tables needed for supporting an FTS index on a
1694 given table. row_mysql_lock_data_dictionary must have been called before
1695 this.
1696 @return DB_SUCCESS or error code */
1697 dberr_t
fts_drop_tables(trx_t * trx,dict_table_t * table)1698 fts_drop_tables(
1699 /*============*/
1700 	trx_t*		trx,		/*!< in: transaction */
1701 	dict_table_t*	table)		/*!< in: table has the FTS index */
1702 {
1703 	dberr_t		error;
1704 	fts_table_t	fts_table;
1705 
1706 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1707 
1708 	/* TODO: This is not atomic and can cause problems during recovery. */
1709 
1710 	error = fts_drop_common_tables(trx, &fts_table);
1711 
1712 	if (error == DB_SUCCESS && table->fts) {
1713 		error = fts_drop_all_index_tables(trx, table->fts);
1714 	}
1715 
1716 	return(error);
1717 }
1718 
1719 /** Create dict_table_t object for FTS Aux tables.
1720 @param[in]	aux_table_name	FTS Aux table name
1721 @param[in]	table		table object of FTS Index
1722 @param[in]	n_cols		number of columns for FTS Aux table
1723 @return table object for FTS Aux table */
1724 static
1725 dict_table_t*
fts_create_in_mem_aux_table(const char * aux_table_name,const dict_table_t * table,ulint n_cols)1726 fts_create_in_mem_aux_table(
1727 	const char*		aux_table_name,
1728 	const dict_table_t*	table,
1729 	ulint			n_cols)
1730 {
1731 	dict_table_t*	new_table = dict_mem_table_create(
1732 		aux_table_name, NULL, n_cols, 0, table->flags,
1733 		table->space_id == TRX_SYS_SPACE
1734 		? 0 : table->space_id == SRV_TMP_SPACE_ID
1735 		? DICT_TF2_TEMPORARY : DICT_TF2_USE_FILE_PER_TABLE);
1736 
1737 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
1738 		ut_ad(table->data_dir_path != NULL);
1739 		new_table->data_dir_path = mem_heap_strdup(
1740 			new_table->heap, table->data_dir_path);
1741 	}
1742 
1743 	return(new_table);
1744 }
1745 
1746 /** Function to create on FTS common table.
1747 @param[in,out]	trx		InnoDB transaction
1748 @param[in]	table		Table that has FTS Index
1749 @param[in]	fts_table_name	FTS AUX table name
1750 @param[in]	fts_suffix	FTS AUX table suffix
1751 @param[in,out]	heap		temporary memory heap
1752 @return table object if created, else NULL */
1753 static
1754 dict_table_t*
fts_create_one_common_table(trx_t * trx,const dict_table_t * table,const char * fts_table_name,const char * fts_suffix,mem_heap_t * heap)1755 fts_create_one_common_table(
1756 	trx_t*			trx,
1757 	const dict_table_t*	table,
1758 	const char*		fts_table_name,
1759 	const char*		fts_suffix,
1760 	mem_heap_t*		heap)
1761 {
1762 	dict_table_t*		new_table;
1763 	dberr_t			error;
1764 	bool			is_config = strcmp(fts_suffix, "CONFIG") == 0;
1765 
1766 	if (!is_config) {
1767 
1768 		new_table = fts_create_in_mem_aux_table(
1769 			fts_table_name, table, FTS_DELETED_TABLE_NUM_COLS);
1770 
1771 		dict_mem_table_add_col(
1772 			new_table, heap, "doc_id", DATA_INT, DATA_UNSIGNED,
1773 			FTS_DELETED_TABLE_COL_LEN);
1774 	} else {
1775 		/* Config table has different schema. */
1776 		new_table = fts_create_in_mem_aux_table(
1777 			fts_table_name, table, FTS_CONFIG_TABLE_NUM_COLS);
1778 
1779 		dict_mem_table_add_col(
1780 			new_table, heap, "key", DATA_VARCHAR, 0,
1781 			FTS_CONFIG_TABLE_KEY_COL_LEN);
1782 
1783 		dict_mem_table_add_col(
1784 			new_table, heap, "value", DATA_VARCHAR, DATA_NOT_NULL,
1785 			FTS_CONFIG_TABLE_VALUE_COL_LEN);
1786 	}
1787 
1788 	dict_table_add_system_columns(new_table, heap);
1789 	error = row_create_table_for_mysql(new_table, trx,
1790 		FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
1791 	if (error == DB_SUCCESS) {
1792 
1793 		dict_index_t*	index = dict_mem_index_create(
1794 			new_table, "FTS_COMMON_TABLE_IND",
1795 			DICT_UNIQUE|DICT_CLUSTERED, 1);
1796 
1797 		if (!is_config) {
1798 			dict_mem_index_add_field(index, "doc_id", 0);
1799 		} else {
1800 			dict_mem_index_add_field(index, "key", 0);
1801 		}
1802 
1803 		/* We save and restore trx->dict_operation because
1804 		row_create_index_for_mysql() changes the operation to
1805 		TRX_DICT_OP_TABLE. */
1806 		trx_dict_op_t op = trx_get_dict_operation(trx);
1807 
1808 		error =	row_create_index_for_mysql(index, trx, NULL);
1809 
1810 		trx->dict_operation = op;
1811 	} else {
1812 err_exit:
1813 		new_table = NULL;
1814 		ib::warn() << "Failed to create FTS common table "
1815 			<< fts_table_name;
1816 		trx->error_state = error;
1817 		return NULL;
1818 	}
1819 
1820 	if (error != DB_SUCCESS) {
1821 		dict_mem_table_free(new_table);
1822 		trx->error_state = DB_SUCCESS;
1823 		row_drop_table_for_mysql(fts_table_name, trx, SQLCOM_DROP_DB);
1824 		goto err_exit;
1825 	}
1826 
1827 	return(new_table);
1828 }
1829 
1830 /** Creates the common auxiliary tables needed for supporting an FTS index
1831 on the given table. row_mysql_lock_data_dictionary must have been called
1832 before this.
1833 The following tables are created.
1834 CREATE TABLE $FTS_PREFIX_DELETED
1835 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1836 CREATE TABLE $FTS_PREFIX_DELETED_CACHE
1837 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1838 CREATE TABLE $FTS_PREFIX_BEING_DELETED
1839 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1840 CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
1841 	(doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
1842 CREATE TABLE $FTS_PREFIX_CONFIG
1843 	(key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
1844 @param[in,out]	trx			transaction
1845 @param[in,out]	table			table with FTS index
1846 @param[in]	skip_doc_id_index	Skip index on doc id
1847 @return DB_SUCCESS if succeed */
1848 dberr_t
fts_create_common_tables(trx_t * trx,dict_table_t * table,bool skip_doc_id_index)1849 fts_create_common_tables(
1850 	trx_t*		trx,
1851 	dict_table_t*	table,
1852 	bool		skip_doc_id_index)
1853 {
1854 	dberr_t		error;
1855 	que_t*		graph;
1856 	fts_table_t	fts_table;
1857 	mem_heap_t*	heap = mem_heap_create(1024);
1858 	pars_info_t*	info;
1859 	char		fts_name[MAX_FULL_NAME_LEN];
1860 	char		full_name[sizeof(fts_common_tables) / sizeof(char*)]
1861 				[MAX_FULL_NAME_LEN];
1862 
1863 	dict_index_t*					index = NULL;
1864 	trx_dict_op_t					op;
1865 	/* common_tables vector is used for dropping FTS common tables
1866 	on error condition. */
1867 	std::vector<dict_table_t*>			common_tables;
1868 	std::vector<dict_table_t*>::const_iterator	it;
1869 
1870 	FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
1871 
1872 	op = trx_get_dict_operation(trx);
1873 
1874 	error = fts_drop_common_tables(trx, &fts_table);
1875 
1876 	if (error != DB_SUCCESS) {
1877 
1878 		goto func_exit;
1879 	}
1880 
1881 	/* Create the FTS tables that are common to an FTS index. */
1882 	for (ulint i = 0; fts_common_tables[i] != NULL; ++i) {
1883 
1884 		fts_table.suffix = fts_common_tables[i];
1885 		fts_get_table_name(&fts_table, full_name[i], true);
1886 		dict_table_t*	common_table = fts_create_one_common_table(
1887 			trx, table, full_name[i], fts_table.suffix, heap);
1888 
1889 		if (!common_table) {
1890 			trx->error_state = DB_SUCCESS;
1891 			error = DB_ERROR;
1892 			goto func_exit;
1893 		} else {
1894 			common_tables.push_back(common_table);
1895 		}
1896 
1897 		mem_heap_empty(heap);
1898 
1899 		DBUG_EXECUTE_IF("ib_fts_aux_table_error",
1900 			/* Return error after creating FTS_AUX_CONFIG table. */
1901 			if (i == 4) {
1902 				error = DB_ERROR;
1903 				goto func_exit;
1904 			}
1905 		);
1906 
1907 	}
1908 
1909 	/* Write the default settings to the config table. */
1910 	info = pars_info_create();
1911 
1912 	fts_table.suffix = "CONFIG";
1913 	fts_get_table_name(&fts_table, fts_name, true);
1914 	pars_info_bind_id(info, "config_table", fts_name);
1915 
1916 	graph = fts_parse_sql_no_dict_lock(
1917 		info, fts_config_table_insert_values_sql);
1918 
1919 	error = fts_eval_sql(trx, graph);
1920 
1921 	que_graph_free(graph);
1922 
1923 	if (error != DB_SUCCESS || skip_doc_id_index) {
1924 
1925 		goto func_exit;
1926 	}
1927 
1928 	index = dict_mem_index_create(table, FTS_DOC_ID_INDEX_NAME,
1929 				      DICT_UNIQUE, 1);
1930 	dict_mem_index_add_field(index, FTS_DOC_ID_COL_NAME, 0);
1931 
1932 	op = trx_get_dict_operation(trx);
1933 
1934 	error =	row_create_index_for_mysql(index, trx, NULL);
1935 
1936 func_exit:
1937 	if (error != DB_SUCCESS) {
1938 		for (it = common_tables.begin(); it != common_tables.end();
1939 		     ++it) {
1940 			row_drop_table_for_mysql((*it)->name.m_name, trx,
1941 						 SQLCOM_DROP_DB);
1942 		}
1943 	}
1944 
1945 	trx->dict_operation = op;
1946 
1947 	common_tables.clear();
1948 	mem_heap_free(heap);
1949 
1950 	return(error);
1951 }
1952 
1953 /** Create one FTS auxiliary index table for an FTS index.
1954 @param[in,out]	trx		transaction
1955 @param[in]	index		the index instance
1956 @param[in]	fts_table	fts_table structure
1957 @param[in,out]	heap		temporary memory heap
1958 @see row_merge_create_fts_sort_index()
1959 @return DB_SUCCESS or error code */
1960 static
1961 dict_table_t*
fts_create_one_index_table(trx_t * trx,const dict_index_t * index,const fts_table_t * fts_table,mem_heap_t * heap)1962 fts_create_one_index_table(
1963 	trx_t*			trx,
1964 	const dict_index_t*	index,
1965 	const fts_table_t*	fts_table,
1966 	mem_heap_t*		heap)
1967 {
1968 	dict_field_t*		field;
1969 	dict_table_t*		new_table;
1970 	char			table_name[MAX_FULL_NAME_LEN];
1971 	dberr_t			error;
1972 	CHARSET_INFO*		charset;
1973 
1974 	ut_ad(index->type & DICT_FTS);
1975 
1976 	fts_get_table_name(fts_table, table_name, true);
1977 
1978 	new_table = fts_create_in_mem_aux_table(
1979 			table_name, fts_table->table,
1980 			FTS_AUX_INDEX_TABLE_NUM_COLS);
1981 
1982 	field = dict_index_get_nth_field(index, 0);
1983 	charset = fts_get_charset(field->col->prtype);
1984 
1985 	dict_mem_table_add_col(new_table, heap, "word",
1986 			       charset == &my_charset_latin1
1987 			       ? DATA_VARCHAR : DATA_VARMYSQL,
1988 			       field->col->prtype,
1989 			       FTS_MAX_WORD_LEN_IN_CHAR
1990 			       * unsigned(field->col->mbmaxlen));
1991 
1992 	dict_mem_table_add_col(new_table, heap, "first_doc_id", DATA_INT,
1993 			       DATA_NOT_NULL | DATA_UNSIGNED,
1994 			       FTS_INDEX_FIRST_DOC_ID_LEN);
1995 
1996 	dict_mem_table_add_col(new_table, heap, "last_doc_id", DATA_INT,
1997 			       DATA_NOT_NULL | DATA_UNSIGNED,
1998 			       FTS_INDEX_LAST_DOC_ID_LEN);
1999 
2000 	dict_mem_table_add_col(new_table, heap, "doc_count", DATA_INT,
2001 			       DATA_NOT_NULL | DATA_UNSIGNED,
2002 			       FTS_INDEX_DOC_COUNT_LEN);
2003 
2004 	/* The precise type calculation is as follows:
2005 	least signficiant byte: MySQL type code (not applicable for sys cols)
2006 	second least : DATA_NOT_NULL | DATA_BINARY_TYPE
2007 	third least  : the MySQL charset-collation code (DATA_MTYPE_MAX) */
2008 
2009 	dict_mem_table_add_col(
2010 		new_table, heap, "ilist", DATA_BLOB,
2011 		(DATA_MTYPE_MAX << 16) | DATA_UNSIGNED | DATA_NOT_NULL,
2012 		FTS_INDEX_ILIST_LEN);
2013 
2014 	dict_table_add_system_columns(new_table, heap);
2015 	error = row_create_table_for_mysql(new_table, trx,
2016 		FIL_ENCRYPTION_DEFAULT, FIL_DEFAULT_ENCRYPTION_KEY);
2017 
2018 	if (error == DB_SUCCESS) {
2019 		dict_index_t*	index = dict_mem_index_create(
2020 			new_table, "FTS_INDEX_TABLE_IND",
2021 			DICT_UNIQUE|DICT_CLUSTERED, 2);
2022 		dict_mem_index_add_field(index, "word", 0);
2023 		dict_mem_index_add_field(index, "first_doc_id", 0);
2024 
2025 		trx_dict_op_t op = trx_get_dict_operation(trx);
2026 
2027 		error =	row_create_index_for_mysql(index, trx, NULL);
2028 
2029 		trx->dict_operation = op;
2030 	} else {
2031 err_exit:
2032 		new_table = NULL;
2033 		ib::warn() << "Failed to create FTS index table "
2034 			<< table_name;
2035 		trx->error_state = error;
2036 		return NULL;
2037 	}
2038 
2039 	if (error != DB_SUCCESS) {
2040 		dict_mem_table_free(new_table);
2041 		trx->error_state = DB_SUCCESS;
2042 		row_drop_table_for_mysql(table_name, trx, SQLCOM_DROP_DB);
2043 		goto err_exit;
2044 	}
2045 
2046 	return(new_table);
2047 }
2048 
2049 /** Creates the column specific ancillary tables needed for supporting an
2050 FTS index on the given table. row_mysql_lock_data_dictionary must have
2051 been called before this.
2052 
2053 All FTS AUX Index tables have the following schema.
2054 CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
2055 	word		VARCHAR(FTS_MAX_WORD_LEN),
2056 	first_doc_id	INT NOT NULL,
2057 	last_doc_id	UNSIGNED NOT NULL,
2058 	doc_count	UNSIGNED INT NOT NULL,
2059 	ilist		VARBINARY NOT NULL,
2060 	UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
2061 @param[in,out]	trx	dictionary transaction
2062 @param[in]	index	fulltext index
2063 @param[in]	id	table id
2064 @return DB_SUCCESS or error code */
2065 dberr_t
fts_create_index_tables(trx_t * trx,const dict_index_t * index,table_id_t id)2066 fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id)
2067 {
2068 	ulint		i;
2069 	fts_table_t	fts_table;
2070 	dberr_t		error = DB_SUCCESS;
2071 	mem_heap_t*	heap = mem_heap_create(1024);
2072 
2073 	fts_table.type = FTS_INDEX_TABLE;
2074 	fts_table.index_id = index->id;
2075 	fts_table.table_id = id;
2076 	fts_table.table = index->table;
2077 
2078 	/* aux_idx_tables vector is used for dropping FTS AUX INDEX
2079 	tables on error condition. */
2080 	std::vector<dict_table_t*>			aux_idx_tables;
2081 	std::vector<dict_table_t*>::const_iterator	it;
2082 
2083 	for (i = 0; i < FTS_NUM_AUX_INDEX && error == DB_SUCCESS; ++i) {
2084 		dict_table_t*	new_table;
2085 
2086 		/* Create the FTS auxiliary tables that are specific
2087 		to an FTS index. We need to preserve the table_id %s
2088 		which fts_parse_sql_no_dict_lock() will fill in for us. */
2089 		fts_table.suffix = fts_get_suffix(i);
2090 
2091 		new_table = fts_create_one_index_table(
2092 			trx, index, &fts_table, heap);
2093 
2094 		if (new_table == NULL) {
2095 			error = DB_FAIL;
2096 			break;
2097 		} else {
2098 			aux_idx_tables.push_back(new_table);
2099 		}
2100 
2101 		mem_heap_empty(heap);
2102 
2103 		DBUG_EXECUTE_IF("ib_fts_index_table_error",
2104 			/* Return error after creating FTS_INDEX_5
2105 			aux table. */
2106 			if (i == 4) {
2107 				error = DB_FAIL;
2108 				break;
2109 			}
2110 		);
2111 	}
2112 
2113 	if (error != DB_SUCCESS) {
2114 
2115 		for (it = aux_idx_tables.begin(); it != aux_idx_tables.end();
2116 		     ++it) {
2117 			row_drop_table_for_mysql((*it)->name.m_name, trx,
2118 						 SQLCOM_DROP_DB);
2119 		}
2120 	}
2121 
2122 	aux_idx_tables.clear();
2123 	mem_heap_free(heap);
2124 
2125 	return(error);
2126 }
2127 
2128 /******************************************************************//**
2129 Calculate the new state of a row given the existing state and a new event.
2130 @return new state of row */
2131 static
2132 fts_row_state
fts_trx_row_get_new_state(fts_row_state old_state,fts_row_state event)2133 fts_trx_row_get_new_state(
2134 /*======================*/
2135 	fts_row_state	old_state,		/*!< in: existing state of row */
2136 	fts_row_state	event)			/*!< in: new event */
2137 {
2138 	/* The rules for transforming states:
2139 
2140 	I = inserted
2141 	M = modified
2142 	D = deleted
2143 	N = nothing
2144 
2145 	M+D -> D:
2146 
2147 	If the row existed before the transaction started and it is modified
2148 	during the transaction, followed by a deletion of the row, only the
2149 	deletion will be signaled.
2150 
2151 	M+ -> M:
2152 
2153 	If the row existed before the transaction started and it is modified
2154 	more than once during the transaction, only the last modification
2155 	will be signaled.
2156 
2157 	IM*D -> N:
2158 
2159 	If a new row is added during the transaction (and possibly modified
2160 	after its initial insertion) but it is deleted before the end of the
2161 	transaction, nothing will be signaled.
2162 
2163 	IM* -> I:
2164 
2165 	If a new row is added during the transaction and modified after its
2166 	initial insertion, only the addition will be signaled.
2167 
2168 	M*DI -> M:
2169 
2170 	If the row existed before the transaction started and it is deleted,
2171 	then re-inserted, only a modification will be signaled. Note that
2172 	this case is only possible if the table is using the row's primary
2173 	key for FTS row ids, since those can be re-inserted by the user,
2174 	which is not true for InnoDB generated row ids.
2175 
2176 	It is easily seen that the above rules decompose such that we do not
2177 	need to store the row's entire history of events. Instead, we can
2178 	store just one state for the row and update that when new events
2179 	arrive. Then we can implement the above rules as a two-dimensional
2180 	look-up table, and get checking of invalid combinations "for free"
2181 	in the process. */
2182 
2183 	/* The lookup table for transforming states. old_state is the
2184 	Y-axis, event is the X-axis. */
2185 	static const fts_row_state table[4][4] = {
2186 			/*    I            M            D            N */
2187 		/* I */	{ FTS_INVALID, FTS_INSERT,  FTS_NOTHING, FTS_INVALID },
2188 		/* M */	{ FTS_INVALID, FTS_MODIFY,  FTS_DELETE,  FTS_INVALID },
2189 		/* D */	{ FTS_MODIFY,  FTS_INVALID, FTS_INVALID, FTS_INVALID },
2190 		/* N */	{ FTS_INVALID, FTS_INVALID, FTS_INVALID, FTS_INVALID }
2191 	};
2192 
2193 	fts_row_state result;
2194 
2195 	ut_a(old_state < FTS_INVALID);
2196 	ut_a(event < FTS_INVALID);
2197 
2198 	result = table[(int) old_state][(int) event];
2199 	ut_a(result != FTS_INVALID);
2200 
2201 	return(result);
2202 }
2203 
2204 /******************************************************************//**
2205 Create a savepoint instance.
2206 @return savepoint instance */
2207 static
2208 fts_savepoint_t*
fts_savepoint_create(ib_vector_t * savepoints,const char * name,mem_heap_t * heap)2209 fts_savepoint_create(
2210 /*=================*/
2211 	ib_vector_t*	savepoints,		/*!< out: InnoDB transaction */
2212 	const char*	name,			/*!< in: savepoint name */
2213 	mem_heap_t*	heap)			/*!< in: heap */
2214 {
2215 	fts_savepoint_t*	savepoint;
2216 
2217 	savepoint = static_cast<fts_savepoint_t*>(
2218 		ib_vector_push(savepoints, NULL));
2219 
2220 	memset(savepoint, 0x0, sizeof(*savepoint));
2221 
2222 	if (name) {
2223 		savepoint->name = mem_heap_strdup(heap, name);
2224 	}
2225 
2226 	savepoint->tables = rbt_create(
2227 		sizeof(fts_trx_table_t*), fts_trx_table_cmp);
2228 
2229 	return(savepoint);
2230 }
2231 
2232 /******************************************************************//**
2233 Create an FTS trx.
2234 @return FTS trx */
2235 fts_trx_t*
fts_trx_create(trx_t * trx)2236 fts_trx_create(
2237 /*===========*/
2238 	trx_t*	trx)				/*!< in/out: InnoDB
2239 						transaction */
2240 {
2241 	fts_trx_t*		ftt;
2242 	ib_alloc_t*		heap_alloc;
2243 	mem_heap_t*		heap = mem_heap_create(1024);
2244 	trx_named_savept_t*	savep;
2245 
2246 	ut_a(trx->fts_trx == NULL);
2247 
2248 	ftt = static_cast<fts_trx_t*>(mem_heap_alloc(heap, sizeof(fts_trx_t)));
2249 	ftt->trx = trx;
2250 	ftt->heap = heap;
2251 
2252 	heap_alloc = ib_heap_allocator_create(heap);
2253 
2254 	ftt->savepoints = static_cast<ib_vector_t*>(ib_vector_create(
2255 		heap_alloc, sizeof(fts_savepoint_t), 4));
2256 
2257 	ftt->last_stmt = static_cast<ib_vector_t*>(ib_vector_create(
2258 		heap_alloc, sizeof(fts_savepoint_t), 4));
2259 
2260 	/* Default instance has no name and no heap. */
2261 	fts_savepoint_create(ftt->savepoints, NULL, NULL);
2262 	fts_savepoint_create(ftt->last_stmt, NULL, NULL);
2263 
2264 	/* Copy savepoints that already set before. */
2265 	for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
2266 	     savep != NULL;
2267 	     savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
2268 
2269 		fts_savepoint_take(ftt, savep->name);
2270 	}
2271 
2272 	return(ftt);
2273 }
2274 
2275 /******************************************************************//**
2276 Create an FTS trx table.
2277 @return FTS trx table */
2278 static
2279 fts_trx_table_t*
fts_trx_table_create(fts_trx_t * fts_trx,dict_table_t * table)2280 fts_trx_table_create(
2281 /*=================*/
2282 	fts_trx_t*	fts_trx,		/*!< in: FTS trx */
2283 	dict_table_t*	table)			/*!< in: table */
2284 {
2285 	fts_trx_table_t*	ftt;
2286 
2287 	ftt = static_cast<fts_trx_table_t*>(
2288 		mem_heap_alloc(fts_trx->heap, sizeof(*ftt)));
2289 
2290 	memset(ftt, 0x0, sizeof(*ftt));
2291 
2292 	ftt->table = table;
2293 	ftt->fts_trx = fts_trx;
2294 
2295 	ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2296 
2297 	return(ftt);
2298 }
2299 
2300 /******************************************************************//**
2301 Clone an FTS trx table.
2302 @return FTS trx table */
2303 static
2304 fts_trx_table_t*
fts_trx_table_clone(const fts_trx_table_t * ftt_src)2305 fts_trx_table_clone(
2306 /*=================*/
2307 	const fts_trx_table_t*	ftt_src)	/*!< in: FTS trx */
2308 {
2309 	fts_trx_table_t*	ftt;
2310 
2311 	ftt = static_cast<fts_trx_table_t*>(
2312 		mem_heap_alloc(ftt_src->fts_trx->heap, sizeof(*ftt)));
2313 
2314 	memset(ftt, 0x0, sizeof(*ftt));
2315 
2316 	ftt->table = ftt_src->table;
2317 	ftt->fts_trx = ftt_src->fts_trx;
2318 
2319 	ftt->rows = rbt_create(sizeof(fts_trx_row_t), fts_trx_row_doc_id_cmp);
2320 
2321 	/* Copy the rb tree values to the new savepoint. */
2322 	rbt_merge_uniq(ftt->rows, ftt_src->rows);
2323 
2324 	/* These are only added on commit. At this stage we only have
2325 	the updated row state. */
2326 	ut_a(ftt_src->added_doc_ids == NULL);
2327 
2328 	return(ftt);
2329 }
2330 
2331 /******************************************************************//**
2332 Initialize the FTS trx instance.
2333 @return FTS trx instance */
2334 static
2335 fts_trx_table_t*
fts_trx_init(trx_t * trx,dict_table_t * table,ib_vector_t * savepoints)2336 fts_trx_init(
2337 /*=========*/
2338 	trx_t*			trx,		/*!< in: transaction */
2339 	dict_table_t*		table,		/*!< in: FTS table instance */
2340 	ib_vector_t*		savepoints)	/*!< in: Savepoints */
2341 {
2342 	fts_trx_table_t*	ftt;
2343 	ib_rbt_bound_t		parent;
2344 	ib_rbt_t*		tables;
2345 	fts_savepoint_t*	savepoint;
2346 
2347 	savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
2348 
2349 	tables = savepoint->tables;
2350 	rbt_search_cmp(tables, &parent, &table->id, fts_trx_table_id_cmp, NULL);
2351 
2352 	if (parent.result == 0) {
2353 		fts_trx_table_t**	fttp;
2354 
2355 		fttp = rbt_value(fts_trx_table_t*, parent.last);
2356 		ftt = *fttp;
2357 	} else {
2358 		ftt = fts_trx_table_create(trx->fts_trx, table);
2359 		rbt_add_node(tables, &parent, &ftt);
2360 	}
2361 
2362 	ut_a(ftt->table == table);
2363 
2364 	return(ftt);
2365 }
2366 
2367 /******************************************************************//**
2368 Notify the FTS system about an operation on an FTS-indexed table. */
2369 static
2370 void
fts_trx_table_add_op(fts_trx_table_t * ftt,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2371 fts_trx_table_add_op(
2372 /*=================*/
2373 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
2374 	doc_id_t	doc_id,			/*!< in: doc id */
2375 	fts_row_state	state,			/*!< in: state of the row */
2376 	ib_vector_t*	fts_indexes)		/*!< in: FTS indexes affected */
2377 {
2378 	ib_rbt_t*	rows;
2379 	ib_rbt_bound_t	parent;
2380 
2381 	rows = ftt->rows;
2382 	rbt_search(rows, &parent, &doc_id);
2383 
2384 	/* Row id found, update state, and if new state is FTS_NOTHING,
2385 	we delete the row from our tree. */
2386 	if (parent.result == 0) {
2387 		fts_trx_row_t*	row = rbt_value(fts_trx_row_t, parent.last);
2388 
2389 		row->state = fts_trx_row_get_new_state(row->state, state);
2390 
2391 		if (row->state == FTS_NOTHING) {
2392 			if (row->fts_indexes) {
2393 				ib_vector_free(row->fts_indexes);
2394 			}
2395 
2396 			ut_free(rbt_remove_node(rows, parent.last));
2397 			row = NULL;
2398 		} else if (row->fts_indexes != NULL) {
2399 			ib_vector_free(row->fts_indexes);
2400 			row->fts_indexes = fts_indexes;
2401 		}
2402 
2403 	} else { /* Row-id not found, create a new one. */
2404 		fts_trx_row_t	row;
2405 
2406 		row.doc_id = doc_id;
2407 		row.state = state;
2408 		row.fts_indexes = fts_indexes;
2409 
2410 		rbt_add_node(rows, &parent, &row);
2411 	}
2412 }
2413 
2414 /******************************************************************//**
2415 Notify the FTS system about an operation on an FTS-indexed table. */
2416 void
fts_trx_add_op(trx_t * trx,dict_table_t * table,doc_id_t doc_id,fts_row_state state,ib_vector_t * fts_indexes)2417 fts_trx_add_op(
2418 /*===========*/
2419 	trx_t*		trx,			/*!< in: InnoDB transaction */
2420 	dict_table_t*	table,			/*!< in: table */
2421 	doc_id_t	doc_id,			/*!< in: new doc id */
2422 	fts_row_state	state,			/*!< in: state of the row */
2423 	ib_vector_t*	fts_indexes)		/*!< in: FTS indexes affected
2424 						(NULL=all) */
2425 {
2426 	fts_trx_table_t*	tran_ftt;
2427 	fts_trx_table_t*	stmt_ftt;
2428 
2429 	if (!trx->fts_trx) {
2430 		trx->fts_trx = fts_trx_create(trx);
2431 	}
2432 
2433 	tran_ftt = fts_trx_init(trx, table, trx->fts_trx->savepoints);
2434 	stmt_ftt = fts_trx_init(trx, table, trx->fts_trx->last_stmt);
2435 
2436 	fts_trx_table_add_op(tran_ftt, doc_id, state, fts_indexes);
2437 	fts_trx_table_add_op(stmt_ftt, doc_id, state, fts_indexes);
2438 }
2439 
2440 /******************************************************************//**
2441 Fetch callback that converts a textual document id to a binary value and
2442 stores it in the given place.
2443 @return always returns NULL */
2444 static
2445 ibool
fts_fetch_store_doc_id(void * row,void * user_arg)2446 fts_fetch_store_doc_id(
2447 /*===================*/
2448 	void*		row,			/*!< in: sel_node_t* */
2449 	void*		user_arg)		/*!< in: doc_id_t* to store
2450 						doc_id in */
2451 {
2452 	int		n_parsed;
2453 	sel_node_t*	node = static_cast<sel_node_t*>(row);
2454 	doc_id_t*	doc_id = static_cast<doc_id_t*>(user_arg);
2455 	dfield_t*	dfield = que_node_get_val(node->select_list);
2456 	dtype_t*	type = dfield_get_type(dfield);
2457 	ulint		len = dfield_get_len(dfield);
2458 
2459 	char		buf[32];
2460 
2461 	ut_a(dtype_get_mtype(type) == DATA_VARCHAR);
2462 	ut_a(len > 0 && len < sizeof(buf));
2463 
2464 	memcpy(buf, dfield_get_data(dfield), len);
2465 	buf[len] = '\0';
2466 
2467 	n_parsed = sscanf(buf, FTS_DOC_ID_FORMAT, doc_id);
2468 	ut_a(n_parsed == 1);
2469 
2470 	return(FALSE);
2471 }
2472 
2473 #ifdef FTS_CACHE_SIZE_DEBUG
2474 /******************************************************************//**
2475 Get the max cache size in bytes. If there is an error reading the
2476 value we simply print an error message here and return the default
2477 value to the caller.
2478 @return max cache size in bytes */
2479 static
2480 ulint
fts_get_max_cache_size(trx_t * trx,fts_table_t * fts_table)2481 fts_get_max_cache_size(
2482 /*===================*/
2483 	trx_t*		trx,			/*!< in: transaction */
2484 	fts_table_t*	fts_table)		/*!< in: table instance */
2485 {
2486 	dberr_t		error;
2487 	fts_string_t	value;
2488 	ulong		cache_size_in_mb;
2489 
2490 	/* Set to the default value. */
2491 	cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2492 
2493 	/* We set the length of value to the max bytes it can hold. This
2494 	information is used by the callback that reads the value. */
2495 	value.f_n_char = 0;
2496 	value.f_len = FTS_MAX_CONFIG_VALUE_LEN;
2497 	value.f_str = ut_malloc_nokey(value.f_len + 1);
2498 
2499 	error = fts_config_get_value(
2500 		trx, fts_table, FTS_MAX_CACHE_SIZE_IN_MB, &value);
2501 
2502 	if (UNIV_LIKELY(error == DB_SUCCESS)) {
2503 		value.f_str[value.f_len] = 0;
2504 		cache_size_in_mb = strtoul((char*) value.f_str, NULL, 10);
2505 
2506 		if (cache_size_in_mb > FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB) {
2507 
2508 			ib::warn() << "FTS max cache size ("
2509 				<< cache_size_in_mb << ") out of range."
2510 				" Minimum value is "
2511 				<< FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2512 				<< "MB and the maximum value is "
2513 				<< FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2514 				<< "MB, setting cache size to upper limit";
2515 
2516 			cache_size_in_mb = FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB;
2517 
2518 		} else if  (cache_size_in_mb
2519 			    < FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB) {
2520 
2521 			ib::warn() << "FTS max cache size ("
2522 				<< cache_size_in_mb << ") out of range."
2523 				" Minimum value is "
2524 				<< FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB
2525 				<< "MB and the maximum value is"
2526 				<< FTS_CACHE_SIZE_UPPER_LIMIT_IN_MB
2527 				<< "MB, setting cache size to lower limit";
2528 
2529 			cache_size_in_mb = FTS_CACHE_SIZE_LOWER_LIMIT_IN_MB;
2530 		}
2531 	} else {
2532 		ib::error() << "(" << error << ") reading max"
2533 			" cache config value from config table "
2534 			<< fts_table->table->name;
2535 	}
2536 
2537 	ut_free(value.f_str);
2538 
2539 	return(cache_size_in_mb * 1024 * 1024);
2540 }
2541 #endif
2542 
2543 /*********************************************************************//**
2544 Update the next and last Doc ID in the CONFIG table to be the input
2545 "doc_id" value (+ 1). We would do so after each FTS index build or
2546 table truncate */
2547 void
fts_update_next_doc_id(trx_t * trx,const dict_table_t * table,doc_id_t doc_id)2548 fts_update_next_doc_id(
2549 /*===================*/
2550 	trx_t*			trx,		/*!< in/out: transaction */
2551 	const dict_table_t*	table,		/*!< in: table */
2552 	doc_id_t		doc_id)		/*!< in: DOC ID to set */
2553 {
2554 	table->fts->cache->synced_doc_id = doc_id;
2555 	table->fts->cache->next_doc_id = doc_id + 1;
2556 
2557 	table->fts->cache->first_doc_id = table->fts->cache->next_doc_id;
2558 
2559 	fts_update_sync_doc_id(
2560 		table, table->fts->cache->synced_doc_id, trx);
2561 
2562 }
2563 
2564 /*********************************************************************//**
2565 Get the next available document id.
2566 @return DB_SUCCESS if OK */
2567 dberr_t
fts_get_next_doc_id(const dict_table_t * table,doc_id_t * doc_id)2568 fts_get_next_doc_id(
2569 /*================*/
2570 	const dict_table_t*	table,		/*!< in: table */
2571 	doc_id_t*		doc_id)		/*!< out: new document id */
2572 {
2573 	fts_cache_t*	cache = table->fts->cache;
2574 
2575 	/* If the Doc ID system has not yet been initialized, we
2576 	will consult the CONFIG table and user table to re-establish
2577 	the initial value of the Doc ID */
2578 	if (cache->first_doc_id == FTS_NULL_DOC_ID) {
2579 		fts_init_doc_id(table);
2580 	}
2581 
2582 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)) {
2583 		*doc_id = FTS_NULL_DOC_ID;
2584 		return(DB_SUCCESS);
2585 	}
2586 
2587 	DEBUG_SYNC_C("get_next_FTS_DOC_ID");
2588 	mutex_enter(&cache->doc_id_lock);
2589 	*doc_id = cache->next_doc_id++;
2590 	mutex_exit(&cache->doc_id_lock);
2591 
2592 	return(DB_SUCCESS);
2593 }
2594 
2595 /*********************************************************************//**
2596 This function fetch the Doc ID from CONFIG table, and compare with
2597 the Doc ID supplied. And store the larger one to the CONFIG table.
2598 @return DB_SUCCESS if OK */
2599 static MY_ATTRIBUTE((nonnull))
2600 dberr_t
fts_cmp_set_sync_doc_id(const dict_table_t * table,doc_id_t cmp_doc_id,ibool read_only,doc_id_t * doc_id)2601 fts_cmp_set_sync_doc_id(
2602 /*====================*/
2603 	const dict_table_t*	table,		/*!< in: table */
2604 	doc_id_t		cmp_doc_id,	/*!< in: Doc ID to compare */
2605 	ibool			read_only,	/*!< in: TRUE if read the
2606 						synced_doc_id only */
2607 	doc_id_t*		doc_id)		/*!< out: larger document id
2608 						after comparing "cmp_doc_id"
2609 						to the one stored in CONFIG
2610 						table */
2611 {
2612 	trx_t*		trx;
2613 	pars_info_t*	info;
2614 	dberr_t		error;
2615 	fts_table_t	fts_table;
2616 	que_t*		graph = NULL;
2617 	fts_cache_t*	cache = table->fts->cache;
2618 	char		table_name[MAX_FULL_NAME_LEN];
2619 retry:
2620 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
2621 
2622 	fts_table.suffix = "CONFIG";
2623 	fts_table.table_id = table->id;
2624 	fts_table.type = FTS_COMMON_TABLE;
2625 	fts_table.table = table;
2626 
2627 	trx = trx_create();
2628 	if (srv_read_only_mode) {
2629 		trx_start_internal_read_only(trx);
2630 	} else {
2631 		trx_start_internal(trx);
2632 	}
2633 
2634 	trx->op_info = "update the next FTS document id";
2635 
2636 	info = pars_info_create();
2637 
2638 	pars_info_bind_function(
2639 		info, "my_func", fts_fetch_store_doc_id, doc_id);
2640 
2641 	fts_get_table_name(&fts_table, table_name);
2642 	pars_info_bind_id(info, "config_table", table_name);
2643 
2644 	graph = fts_parse_sql(
2645 		&fts_table, info,
2646 		"DECLARE FUNCTION my_func;\n"
2647 		"DECLARE CURSOR c IS SELECT value FROM $config_table"
2648 		" WHERE key = 'synced_doc_id' FOR UPDATE;\n"
2649 		"BEGIN\n"
2650 		""
2651 		"OPEN c;\n"
2652 		"WHILE 1 = 1 LOOP\n"
2653 		"  FETCH c INTO my_func();\n"
2654 		"  IF c % NOTFOUND THEN\n"
2655 		"    EXIT;\n"
2656 		"  END IF;\n"
2657 		"END LOOP;\n"
2658 		"CLOSE c;");
2659 
2660 	*doc_id = 0;
2661 
2662 	error = fts_eval_sql(trx, graph);
2663 
2664 	fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2665 
2666 	// FIXME: We need to retry deadlock errors
2667 	if (error != DB_SUCCESS) {
2668 		goto func_exit;
2669 	}
2670 
2671 	if (read_only) {
2672 		/* InnoDB stores actual synced_doc_id value + 1 in
2673 		FTS_CONFIG table. Reduce the value by 1 while reading
2674 		after startup. */
2675 		if (*doc_id) *doc_id -= 1;
2676 		goto func_exit;
2677 	}
2678 
2679 	if (cmp_doc_id == 0 && *doc_id) {
2680 		cache->synced_doc_id = *doc_id - 1;
2681 	} else {
2682 		cache->synced_doc_id = ut_max(cmp_doc_id, *doc_id);
2683 	}
2684 
2685 	mutex_enter(&cache->doc_id_lock);
2686 	/* For each sync operation, we will add next_doc_id by 1,
2687 	so to mark a sync operation */
2688 	if (cache->next_doc_id < cache->synced_doc_id + 1) {
2689 		cache->next_doc_id = cache->synced_doc_id + 1;
2690 	}
2691 	mutex_exit(&cache->doc_id_lock);
2692 
2693 	if (cmp_doc_id > *doc_id) {
2694 		error = fts_update_sync_doc_id(
2695 			table, cache->synced_doc_id, trx);
2696 	}
2697 
2698 	*doc_id = cache->next_doc_id;
2699 
2700 func_exit:
2701 
2702 	if (UNIV_LIKELY(error == DB_SUCCESS)) {
2703 		fts_sql_commit(trx);
2704 	} else {
2705 		*doc_id = 0;
2706 
2707 		ib::error() << "(" << error << ") while getting next doc id "
2708 			"for table " << table->name;
2709 		fts_sql_rollback(trx);
2710 
2711 		if (error == DB_DEADLOCK) {
2712 			os_thread_sleep(FTS_DEADLOCK_RETRY_WAIT);
2713 			goto retry;
2714 		}
2715 	}
2716 
2717 	trx->free();
2718 
2719 	return(error);
2720 }
2721 
2722 /*********************************************************************//**
2723 Update the last document id. This function could create a new
2724 transaction to update the last document id.
2725 @return DB_SUCCESS if OK */
2726 static
2727 dberr_t
fts_update_sync_doc_id(const dict_table_t * table,doc_id_t doc_id,trx_t * trx)2728 fts_update_sync_doc_id(
2729 /*===================*/
2730 	const dict_table_t*	table,		/*!< in: table */
2731 	doc_id_t		doc_id,		/*!< in: last document id */
2732 	trx_t*			trx)		/*!< in: update trx, or NULL */
2733 {
2734 	byte		id[FTS_MAX_ID_LEN];
2735 	pars_info_t*	info;
2736 	fts_table_t	fts_table;
2737 	ulint		id_len;
2738 	que_t*		graph = NULL;
2739 	dberr_t		error;
2740 	ibool		local_trx = FALSE;
2741 	fts_cache_t*	cache = table->fts->cache;
2742 	char		fts_name[MAX_FULL_NAME_LEN];
2743 
2744 	if (srv_read_only_mode) {
2745 		return DB_READ_ONLY;
2746 	}
2747 
2748 	fts_table.suffix = "CONFIG";
2749 	fts_table.table_id = table->id;
2750 	fts_table.type = FTS_COMMON_TABLE;
2751 	fts_table.table = table;
2752 
2753 	if (!trx) {
2754 		trx = trx_create();
2755 		trx_start_internal(trx);
2756 
2757 		trx->op_info = "setting last FTS document id";
2758 		local_trx = TRUE;
2759 	}
2760 
2761 	info = pars_info_create();
2762 
2763 	id_len = (ulint) snprintf(
2764 		(char*) id, sizeof(id), FTS_DOC_ID_FORMAT, doc_id + 1);
2765 
2766 	pars_info_bind_varchar_literal(info, "doc_id", id, id_len);
2767 
2768 	fts_get_table_name(&fts_table, fts_name,
2769 			   table->fts->dict_locked);
2770 	pars_info_bind_id(info, "table_name", fts_name);
2771 
2772 	graph = fts_parse_sql(
2773 		&fts_table, info,
2774 		"BEGIN"
2775 		" UPDATE $table_name SET value = :doc_id"
2776 		" WHERE key = 'synced_doc_id';");
2777 
2778 	error = fts_eval_sql(trx, graph);
2779 
2780 	fts_que_graph_free_check_lock(&fts_table, NULL, graph);
2781 
2782 	if (local_trx) {
2783 		if (UNIV_LIKELY(error == DB_SUCCESS)) {
2784 			fts_sql_commit(trx);
2785 			cache->synced_doc_id = doc_id;
2786 		} else {
2787 			ib::error() << "(" << error << ") while"
2788 				" updating last doc id for table"
2789 				<< table->name;
2790 
2791 			fts_sql_rollback(trx);
2792 		}
2793 		trx->free();
2794 	}
2795 
2796 	return(error);
2797 }
2798 
2799 /*********************************************************************//**
2800 Create a new fts_doc_ids_t.
2801 @return new fts_doc_ids_t */
2802 fts_doc_ids_t*
fts_doc_ids_create(void)2803 fts_doc_ids_create(void)
2804 /*====================*/
2805 {
2806 	fts_doc_ids_t*	fts_doc_ids;
2807 	mem_heap_t*	heap = mem_heap_create(512);
2808 
2809 	fts_doc_ids = static_cast<fts_doc_ids_t*>(
2810 		mem_heap_alloc(heap, sizeof(*fts_doc_ids)));
2811 
2812 	fts_doc_ids->self_heap = ib_heap_allocator_create(heap);
2813 
2814 	fts_doc_ids->doc_ids = static_cast<ib_vector_t*>(ib_vector_create(
2815 		fts_doc_ids->self_heap, sizeof(doc_id_t), 32));
2816 
2817 	return(fts_doc_ids);
2818 }
2819 
2820 /*********************************************************************//**
2821 Do commit-phase steps necessary for the insertion of a new row. */
2822 void
fts_add(fts_trx_table_t * ftt,fts_trx_row_t * row)2823 fts_add(
2824 /*====*/
2825 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
2826 	fts_trx_row_t*	row)			/*!< in: row */
2827 {
2828 	dict_table_t*	table = ftt->table;
2829 	doc_id_t	doc_id = row->doc_id;
2830 
2831 	ut_a(row->state == FTS_INSERT || row->state == FTS_MODIFY);
2832 
2833 	fts_add_doc_by_id(ftt, doc_id);
2834 
2835 	mutex_enter(&table->fts->cache->deleted_lock);
2836 	++table->fts->cache->added;
2837 	mutex_exit(&table->fts->cache->deleted_lock);
2838 
2839 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID)
2840 	    && doc_id >= table->fts->cache->next_doc_id) {
2841 		table->fts->cache->next_doc_id = doc_id + 1;
2842 	}
2843 }
2844 
2845 /*********************************************************************//**
2846 Do commit-phase steps necessary for the deletion of a row.
2847 @return DB_SUCCESS or error code */
2848 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2849 dberr_t
fts_delete(fts_trx_table_t * ftt,fts_trx_row_t * row)2850 fts_delete(
2851 /*=======*/
2852 	fts_trx_table_t*ftt,			/*!< in: FTS trx table */
2853 	fts_trx_row_t*	row)			/*!< in: row */
2854 {
2855 	que_t*		graph;
2856 	fts_table_t	fts_table;
2857 	dberr_t		error = DB_SUCCESS;
2858 	doc_id_t	write_doc_id;
2859 	dict_table_t*	table = ftt->table;
2860 	doc_id_t	doc_id = row->doc_id;
2861 	trx_t*		trx = ftt->fts_trx->trx;
2862 	pars_info_t*	info = pars_info_create();
2863 	fts_cache_t*	cache = table->fts->cache;
2864 
2865 	/* we do not index Documents whose Doc ID value is 0 */
2866 	if (doc_id == FTS_NULL_DOC_ID) {
2867 		ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_HAS_DOC_ID));
2868 		return(error);
2869 	}
2870 
2871 	ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2872 
2873 	FTS_INIT_FTS_TABLE(&fts_table, "DELETED", FTS_COMMON_TABLE, table);
2874 
2875 	/* Convert to "storage" byte order. */
2876 	fts_write_doc_id((byte*) &write_doc_id, doc_id);
2877 	fts_bind_doc_id(info, "doc_id", &write_doc_id);
2878 
2879 	/* It is possible we update a record that has not yet been sync-ed
2880 	into cache from last crash (delete Doc will not initialize the
2881 	sync). Avoid any added counter accounting until the FTS cache
2882 	is re-established and sync-ed */
2883 	if (table->fts->added_synced
2884 	    && doc_id > cache->synced_doc_id) {
2885 		mutex_enter(&table->fts->cache->deleted_lock);
2886 
2887 		/* The Doc ID could belong to those left in
2888 		ADDED table from last crash. So need to check
2889 		if it is less than first_doc_id when we initialize
2890 		the Doc ID system after reboot */
2891 		if (doc_id >= table->fts->cache->first_doc_id
2892 		    && table->fts->cache->added > 0) {
2893 			--table->fts->cache->added;
2894 		}
2895 
2896 		mutex_exit(&table->fts->cache->deleted_lock);
2897 
2898 		/* Only if the row was really deleted. */
2899 		ut_a(row->state == FTS_DELETE || row->state == FTS_MODIFY);
2900 	}
2901 
2902 	/* Note the deleted document for OPTIMIZE to purge. */
2903 	if (error == DB_SUCCESS) {
2904 		char	table_name[MAX_FULL_NAME_LEN];
2905 
2906 		trx->op_info = "adding doc id to FTS DELETED";
2907 
2908 		info->graph_owns_us = TRUE;
2909 
2910 		fts_table.suffix = "DELETED";
2911 
2912 		fts_get_table_name(&fts_table, table_name);
2913 		pars_info_bind_id(info, "deleted", table_name);
2914 
2915 		graph = fts_parse_sql(
2916 			&fts_table,
2917 			info,
2918 			"BEGIN INSERT INTO $deleted VALUES (:doc_id);");
2919 
2920 		error = fts_eval_sql(trx, graph);
2921 
2922 		fts_que_graph_free(graph);
2923 	} else {
2924 		pars_info_free(info);
2925 	}
2926 
2927 	/* Increment the total deleted count, this is used to calculate the
2928 	number of documents indexed. */
2929 	if (error == DB_SUCCESS) {
2930 		mutex_enter(&table->fts->cache->deleted_lock);
2931 
2932 		++table->fts->cache->deleted;
2933 
2934 		mutex_exit(&table->fts->cache->deleted_lock);
2935 	}
2936 
2937 	return(error);
2938 }
2939 
2940 /*********************************************************************//**
2941 Do commit-phase steps necessary for the modification of a row.
2942 @return DB_SUCCESS or error code */
2943 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2944 dberr_t
fts_modify(fts_trx_table_t * ftt,fts_trx_row_t * row)2945 fts_modify(
2946 /*=======*/
2947 	fts_trx_table_t*	ftt,		/*!< in: FTS trx table */
2948 	fts_trx_row_t*		row)		/*!< in: row */
2949 {
2950 	dberr_t	error;
2951 
2952 	ut_a(row->state == FTS_MODIFY);
2953 
2954 	error = fts_delete(ftt, row);
2955 
2956 	if (error == DB_SUCCESS) {
2957 		fts_add(ftt, row);
2958 	}
2959 
2960 	return(error);
2961 }
2962 
2963 /*********************************************************************//**
2964 The given transaction is about to be committed; do whatever is necessary
2965 from the FTS system's POV.
2966 @return DB_SUCCESS or error code */
2967 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2968 dberr_t
fts_commit_table(fts_trx_table_t * ftt)2969 fts_commit_table(
2970 /*=============*/
2971 	fts_trx_table_t*	ftt)		/*!< in: FTS table to commit*/
2972 {
2973 	if (srv_read_only_mode) {
2974 		return DB_READ_ONLY;
2975 	}
2976 
2977 	const ib_rbt_node_t*	node;
2978 	ib_rbt_t*		rows;
2979 	dberr_t			error = DB_SUCCESS;
2980 	fts_cache_t*		cache = ftt->table->fts->cache;
2981 	trx_t*			trx = trx_create();
2982 
2983 	trx_start_internal(trx);
2984 
2985 	rows = ftt->rows;
2986 
2987 	ftt->fts_trx->trx = trx;
2988 
2989 	if (cache->get_docs == NULL) {
2990 		rw_lock_x_lock(&cache->init_lock);
2991 		if (cache->get_docs == NULL) {
2992 			cache->get_docs = fts_get_docs_create(cache);
2993 		}
2994 		rw_lock_x_unlock(&cache->init_lock);
2995 	}
2996 
2997 	for (node = rbt_first(rows);
2998 	     node != NULL && error == DB_SUCCESS;
2999 	     node = rbt_next(rows, node)) {
3000 
3001 		fts_trx_row_t*	row = rbt_value(fts_trx_row_t, node);
3002 
3003 		switch (row->state) {
3004 		case FTS_INSERT:
3005 			fts_add(ftt, row);
3006 			break;
3007 
3008 		case FTS_MODIFY:
3009 			error = fts_modify(ftt, row);
3010 			break;
3011 
3012 		case FTS_DELETE:
3013 			error = fts_delete(ftt, row);
3014 			break;
3015 
3016 		default:
3017 			ut_error;
3018 		}
3019 	}
3020 
3021 	fts_sql_commit(trx);
3022 
3023 	trx->free();
3024 
3025 	return(error);
3026 }
3027 
3028 /*********************************************************************//**
3029 The given transaction is about to be committed; do whatever is necessary
3030 from the FTS system's POV.
3031 @return DB_SUCCESS or error code */
3032 dberr_t
fts_commit(trx_t * trx)3033 fts_commit(
3034 /*=======*/
3035 	trx_t*	trx)				/*!< in: transaction */
3036 {
3037 	const ib_rbt_node_t*	node;
3038 	dberr_t			error;
3039 	ib_rbt_t*		tables;
3040 	fts_savepoint_t*	savepoint;
3041 
3042 	savepoint = static_cast<fts_savepoint_t*>(
3043 		ib_vector_last(trx->fts_trx->savepoints));
3044 	tables = savepoint->tables;
3045 
3046 	for (node = rbt_first(tables), error = DB_SUCCESS;
3047 	     node != NULL && error == DB_SUCCESS;
3048 	     node = rbt_next(tables, node)) {
3049 
3050 		fts_trx_table_t**	ftt;
3051 
3052 		ftt = rbt_value(fts_trx_table_t*, node);
3053 
3054 		error = fts_commit_table(*ftt);
3055 	}
3056 
3057 	return(error);
3058 }
3059 
3060 /*********************************************************************//**
3061 Initialize a document. */
3062 void
fts_doc_init(fts_doc_t * doc)3063 fts_doc_init(
3064 /*=========*/
3065 	fts_doc_t*	doc)			/*!< in: doc to initialize */
3066 {
3067 	mem_heap_t*	heap = mem_heap_create(32);
3068 
3069 	memset(doc, 0, sizeof(*doc));
3070 
3071 	doc->self_heap = ib_heap_allocator_create(heap);
3072 }
3073 
3074 /*********************************************************************//**
3075 Free document. */
3076 void
fts_doc_free(fts_doc_t * doc)3077 fts_doc_free(
3078 /*=========*/
3079 	fts_doc_t*	doc)			/*!< in: document */
3080 {
3081 	mem_heap_t*	heap = static_cast<mem_heap_t*>(doc->self_heap->arg);
3082 
3083 	if (doc->tokens) {
3084 		rbt_free(doc->tokens);
3085 	}
3086 
3087 	ut_d(memset(doc, 0, sizeof(*doc)));
3088 
3089 	mem_heap_free(heap);
3090 }
3091 
3092 /*********************************************************************//**
3093 Callback function for fetch that stores the text of an FTS document,
3094 converting each column to UTF-16.
3095 @return always FALSE */
3096 ibool
fts_query_expansion_fetch_doc(void * row,void * user_arg)3097 fts_query_expansion_fetch_doc(
3098 /*==========================*/
3099 	void*		row,			/*!< in: sel_node_t* */
3100 	void*		user_arg)		/*!< in: fts_doc_t* */
3101 {
3102 	que_node_t*	exp;
3103 	sel_node_t*	node = static_cast<sel_node_t*>(row);
3104 	fts_doc_t*	result_doc = static_cast<fts_doc_t*>(user_arg);
3105 	dfield_t*	dfield;
3106 	ulint		len;
3107 	ulint		doc_len;
3108 	fts_doc_t	doc;
3109 	CHARSET_INFO*	doc_charset = NULL;
3110 	ulint		field_no = 0;
3111 
3112 	len = 0;
3113 
3114 	fts_doc_init(&doc);
3115 	doc.found = TRUE;
3116 
3117 	exp = node->select_list;
3118 	doc_len = 0;
3119 
3120 	doc_charset  = result_doc->charset;
3121 
3122 	/* Copy each indexed column content into doc->text.f_str */
3123 	while (exp) {
3124 		dfield = que_node_get_val(exp);
3125 		len = dfield_get_len(dfield);
3126 
3127 		/* NULL column */
3128 		if (len == UNIV_SQL_NULL) {
3129 			exp = que_node_get_next(exp);
3130 			continue;
3131 		}
3132 
3133 		if (!doc_charset) {
3134 			doc_charset = fts_get_charset(dfield->type.prtype);
3135 		}
3136 
3137 		doc.charset = doc_charset;
3138 
3139 		if (dfield_is_ext(dfield)) {
3140 			/* We ignore columns that are stored externally, this
3141 			could result in too many words to search */
3142 			exp = que_node_get_next(exp);
3143 			continue;
3144 		} else {
3145 			doc.text.f_n_char = 0;
3146 
3147 			doc.text.f_str = static_cast<byte*>(
3148 				dfield_get_data(dfield));
3149 
3150 			doc.text.f_len = len;
3151 		}
3152 
3153 		if (field_no == 0) {
3154 			fts_tokenize_document(&doc, result_doc,
3155 					      result_doc->parser);
3156 		} else {
3157 			fts_tokenize_document_next(&doc, doc_len, result_doc,
3158 						   result_doc->parser);
3159 		}
3160 
3161 		exp = que_node_get_next(exp);
3162 
3163 		doc_len += (exp) ? len + 1 : len;
3164 
3165 		field_no++;
3166 	}
3167 
3168 	ut_ad(doc_charset);
3169 
3170 	if (!result_doc->charset) {
3171 		result_doc->charset = doc_charset;
3172 	}
3173 
3174 	fts_doc_free(&doc);
3175 
3176 	return(FALSE);
3177 }
3178 
3179 /*********************************************************************//**
3180 fetch and tokenize the document. */
3181 static
3182 void
fts_fetch_doc_from_rec(fts_get_doc_t * get_doc,dict_index_t * clust_index,btr_pcur_t * pcur,rec_offs * offsets,fts_doc_t * doc)3183 fts_fetch_doc_from_rec(
3184 /*===================*/
3185 	fts_get_doc_t*  get_doc,	/*!< in: FTS index's get_doc struct */
3186 	dict_index_t*	clust_index,	/*!< in: cluster index */
3187 	btr_pcur_t*	pcur,		/*!< in: cursor whose position
3188 					has been stored */
3189 	rec_offs*	offsets,	/*!< in: offsets */
3190 	fts_doc_t*	doc)		/*!< out: fts doc to hold parsed
3191 					documents */
3192 {
3193 	dict_index_t*		index;
3194 	const rec_t*		clust_rec;
3195 	const dict_field_t*	ifield;
3196 	ulint			clust_pos;
3197 	ulint			doc_len = 0;
3198 	st_mysql_ftparser*	parser;
3199 
3200 	if (!get_doc) {
3201 		return;
3202 	}
3203 
3204 	index = get_doc->index_cache->index;
3205 	parser = get_doc->index_cache->index->parser;
3206 
3207 	clust_rec = btr_pcur_get_rec(pcur);
3208 	ut_ad(!page_rec_is_comp(clust_rec)
3209 	      || rec_get_status(clust_rec) == REC_STATUS_ORDINARY);
3210 
3211 	for (ulint i = 0; i < index->n_fields; i++) {
3212 		ifield = dict_index_get_nth_field(index, i);
3213 		clust_pos = dict_col_get_clust_pos(ifield->col, clust_index);
3214 
3215 		if (!get_doc->index_cache->charset) {
3216 			get_doc->index_cache->charset = fts_get_charset(
3217 				ifield->col->prtype);
3218 		}
3219 
3220 		if (rec_offs_nth_extern(offsets, clust_pos)) {
3221 			doc->text.f_str =
3222 				btr_rec_copy_externally_stored_field(
3223 					clust_rec, offsets,
3224 					btr_pcur_get_block(pcur)->zip_size(),
3225 					clust_pos, &doc->text.f_len,
3226 					static_cast<mem_heap_t*>(
3227 						doc->self_heap->arg));
3228 		} else {
3229 			doc->text.f_str = (byte*) rec_get_nth_field(
3230 				clust_rec, offsets, clust_pos,
3231 				&doc->text.f_len);
3232 		}
3233 
3234 		doc->found = TRUE;
3235 		doc->charset = get_doc->index_cache->charset;
3236 
3237 		/* Null Field */
3238 		if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3239 			continue;
3240 		}
3241 
3242 		if (!doc_len) {
3243 			fts_tokenize_document(doc, NULL, parser);
3244 		} else {
3245 			fts_tokenize_document_next(doc, doc_len, NULL, parser);
3246 		}
3247 
3248 		doc_len += doc->text.f_len + 1;
3249 	}
3250 }
3251 
3252 /** Fetch the data from tuple and tokenize the document.
3253 @param[in]     get_doc FTS index's get_doc struct
3254 @param[in]     tuple   tuple should be arranged in table schema order
3255 @param[out]    doc     fts doc to hold parsed documents. */
3256 static
3257 void
fts_fetch_doc_from_tuple(fts_get_doc_t * get_doc,const dtuple_t * tuple,fts_doc_t * doc)3258 fts_fetch_doc_from_tuple(
3259        fts_get_doc_t*  get_doc,
3260        const dtuple_t* tuple,
3261        fts_doc_t*      doc)
3262 {
3263        dict_index_t*           index;
3264        st_mysql_ftparser*      parser;
3265        ulint                   doc_len = 0;
3266        ulint                   processed_doc = 0;
3267        ulint                   num_field;
3268 
3269        if (get_doc == NULL) {
3270                return;
3271        }
3272 
3273        index = get_doc->index_cache->index;
3274        parser = get_doc->index_cache->index->parser;
3275        num_field = dict_index_get_n_fields(index);
3276 
3277        for (ulint i = 0; i < num_field; i++) {
3278                const dict_field_t*     ifield;
3279                const dict_col_t*       col;
3280                ulint                   pos;
3281 
3282                ifield = dict_index_get_nth_field(index, i);
3283                col = dict_field_get_col(ifield);
3284                pos = dict_col_get_no(col);
3285 		const dfield_t* field = dtuple_get_nth_field(tuple, pos);
3286 
3287                if (!get_doc->index_cache->charset) {
3288                        get_doc->index_cache->charset = fts_get_charset(
3289                                ifield->col->prtype);
3290                }
3291 
3292                ut_ad(!dfield_is_ext(field));
3293 
3294                doc->text.f_str = (byte*) dfield_get_data(field);
3295                doc->text.f_len = dfield_get_len(field);
3296                doc->found = TRUE;
3297                doc->charset = get_doc->index_cache->charset;
3298 
3299                /* field data is NULL. */
3300                if (doc->text.f_len == UNIV_SQL_NULL || doc->text.f_len == 0) {
3301                        continue;
3302                }
3303 
3304                if (processed_doc == 0) {
3305                        fts_tokenize_document(doc, NULL, parser);
3306                } else {
3307                        fts_tokenize_document_next(doc, doc_len, NULL, parser);
3308                }
3309 
3310                processed_doc++;
3311                doc_len += doc->text.f_len + 1;
3312        }
3313 }
3314 
3315 /** Fetch the document from tuple, tokenize the text data and
3316 insert the text data into fts auxiliary table and
3317 its cache. Moreover this tuple fields doesn't contain any information
3318 about externally stored field. This tuple contains data directly
3319 converted from mysql.
3320 @param[in]     ftt     FTS transaction table
3321 @param[in]     doc_id  doc id
3322 @param[in]     tuple   tuple from where data can be retrieved
3323                        and tuple should be arranged in table
3324                        schema order. */
3325 void
fts_add_doc_from_tuple(fts_trx_table_t * ftt,doc_id_t doc_id,const dtuple_t * tuple)3326 fts_add_doc_from_tuple(
3327        fts_trx_table_t*ftt,
3328        doc_id_t        doc_id,
3329        const dtuple_t* tuple)
3330 {
3331        mtr_t           mtr;
3332        fts_cache_t*    cache = ftt->table->fts->cache;
3333 
3334        ut_ad(cache->get_docs);
3335 
3336        if (!ftt->table->fts->added_synced) {
3337                fts_init_index(ftt->table, FALSE);
3338        }
3339 
3340        mtr_start(&mtr);
3341 
3342        ulint   num_idx = ib_vector_size(cache->get_docs);
3343 
3344        for (ulint i = 0; i < num_idx; ++i) {
3345                fts_doc_t       doc;
3346                dict_table_t*   table;
3347                fts_get_doc_t*  get_doc;
3348 
3349                get_doc = static_cast<fts_get_doc_t*>(
3350                        ib_vector_get(cache->get_docs, i));
3351                table = get_doc->index_cache->index->table;
3352 
3353                fts_doc_init(&doc);
3354                fts_fetch_doc_from_tuple(
3355                        get_doc, tuple, &doc);
3356 
3357                if (doc.found) {
3358                        mtr_commit(&mtr);
3359                        rw_lock_x_lock(&table->fts->cache->lock);
3360 
3361                        if (table->fts->cache->stopword_info.status
3362                            & STOPWORD_NOT_INIT) {
3363                                fts_load_stopword(table, NULL, NULL,
3364                                                  true, true);
3365                        }
3366 
3367                        fts_cache_add_doc(
3368                                table->fts->cache,
3369                                get_doc->index_cache,
3370                                doc_id, doc.tokens);
3371 
3372                        rw_lock_x_unlock(&table->fts->cache->lock);
3373 
3374                        if (cache->total_size > fts_max_cache_size / 5
3375                            || fts_need_sync) {
3376                                fts_sync(cache->sync, true, false);
3377                        }
3378 
3379                        mtr_start(&mtr);
3380 
3381                }
3382 
3383                fts_doc_free(&doc);
3384        }
3385 
3386        mtr_commit(&mtr);
3387 }
3388 
3389 /*********************************************************************//**
3390 This function fetches the document inserted during the committing
3391 transaction, and tokenize the inserted text data and insert into
3392 FTS auxiliary table and its cache.
3393 @return TRUE if successful */
3394 static
3395 ulint
fts_add_doc_by_id(fts_trx_table_t * ftt,doc_id_t doc_id)3396 fts_add_doc_by_id(
3397 /*==============*/
3398 	fts_trx_table_t*ftt,		/*!< in: FTS trx table */
3399 	doc_id_t	doc_id)		/*!< in: doc id */
3400 {
3401 	mtr_t		mtr;
3402 	mem_heap_t*	heap;
3403 	btr_pcur_t	pcur;
3404 	dict_table_t*	table;
3405 	dtuple_t*	tuple;
3406 	dfield_t*       dfield;
3407 	fts_get_doc_t*	get_doc;
3408 	doc_id_t        temp_doc_id;
3409 	dict_index_t*   clust_index;
3410 	dict_index_t*	fts_id_index;
3411 	ibool		is_id_cluster;
3412 	fts_cache_t*   	cache = ftt->table->fts->cache;
3413 
3414 	ut_ad(cache->get_docs);
3415 
3416 	/* If Doc ID has been supplied by the user, then the table
3417 	might not yet be sync-ed */
3418 
3419 	if (!ftt->table->fts->added_synced) {
3420 		fts_init_index(ftt->table, FALSE);
3421 	}
3422 
3423 	/* Get the first FTS index's get_doc */
3424 	get_doc = static_cast<fts_get_doc_t*>(
3425 		ib_vector_get(cache->get_docs, 0));
3426 	ut_ad(get_doc);
3427 
3428 	table = get_doc->index_cache->index->table;
3429 
3430 	heap = mem_heap_create(512);
3431 
3432 	clust_index = dict_table_get_first_index(table);
3433 	fts_id_index = table->fts_doc_id_index;
3434 
3435 	/* Check whether the index on FTS_DOC_ID is cluster index */
3436 	is_id_cluster = (clust_index == fts_id_index);
3437 
3438 	mtr_start(&mtr);
3439 	btr_pcur_init(&pcur);
3440 
3441 	/* Search based on Doc ID. Here, we'll need to consider the case
3442 	when there is no primary index on Doc ID */
3443 	tuple = dtuple_create(heap, 1);
3444 	dfield = dtuple_get_nth_field(tuple, 0);
3445 	dfield->type.mtype = DATA_INT;
3446 	dfield->type.prtype = DATA_NOT_NULL | DATA_UNSIGNED | DATA_BINARY_TYPE;
3447 
3448 	mach_write_to_8((byte*) &temp_doc_id, doc_id);
3449 	dfield_set_data(dfield, &temp_doc_id, sizeof(temp_doc_id));
3450 
3451 	btr_pcur_open_with_no_init(
3452 		fts_id_index, tuple, PAGE_CUR_LE, BTR_SEARCH_LEAF,
3453 		&pcur, 0, &mtr);
3454 
3455 	/* If we have a match, add the data to doc structure */
3456 	if (btr_pcur_get_low_match(&pcur) == 1) {
3457 		const rec_t*	rec;
3458 		btr_pcur_t*	doc_pcur;
3459 		const rec_t*	clust_rec;
3460 		btr_pcur_t	clust_pcur;
3461 		rec_offs*	offsets = NULL;
3462 		ulint		num_idx = ib_vector_size(cache->get_docs);
3463 
3464 		rec = btr_pcur_get_rec(&pcur);
3465 
3466 		/* Doc could be deleted */
3467 		if (page_rec_is_infimum(rec)
3468 		    || rec_get_deleted_flag(rec, dict_table_is_comp(table))) {
3469 
3470 			goto func_exit;
3471 		}
3472 
3473 		if (is_id_cluster) {
3474 			clust_rec = rec;
3475 			doc_pcur = &pcur;
3476 		} else {
3477 			dtuple_t*	clust_ref;
3478 			ulint		n_fields;
3479 
3480 			btr_pcur_init(&clust_pcur);
3481 			n_fields = dict_index_get_n_unique(clust_index);
3482 
3483 			clust_ref = dtuple_create(heap, n_fields);
3484 			dict_index_copy_types(clust_ref, clust_index, n_fields);
3485 
3486 			row_build_row_ref_in_tuple(
3487 				clust_ref, rec, fts_id_index, NULL);
3488 
3489 			btr_pcur_open_with_no_init(
3490 				clust_index, clust_ref, PAGE_CUR_LE,
3491 				BTR_SEARCH_LEAF, &clust_pcur, 0, &mtr);
3492 
3493 			doc_pcur = &clust_pcur;
3494 			clust_rec = btr_pcur_get_rec(&clust_pcur);
3495 
3496 		}
3497 
3498 		offsets = rec_get_offsets(clust_rec, clust_index, NULL,
3499 					  clust_index->n_core_fields,
3500 					  ULINT_UNDEFINED, &heap);
3501 
3502 		for (ulint i = 0; i < num_idx; ++i) {
3503 			fts_doc_t       doc;
3504 			dict_table_t*   table;
3505 			fts_get_doc_t*  get_doc;
3506 
3507 			get_doc = static_cast<fts_get_doc_t*>(
3508 				ib_vector_get(cache->get_docs, i));
3509 
3510 			table = get_doc->index_cache->index->table;
3511 
3512 			fts_doc_init(&doc);
3513 
3514 			fts_fetch_doc_from_rec(
3515 				get_doc, clust_index, doc_pcur, offsets, &doc);
3516 
3517 			if (doc.found) {
3518 				ibool	success MY_ATTRIBUTE((unused));
3519 
3520 				btr_pcur_store_position(doc_pcur, &mtr);
3521 				mtr_commit(&mtr);
3522 
3523 				rw_lock_x_lock(&table->fts->cache->lock);
3524 
3525 				if (table->fts->cache->stopword_info.status
3526 				    & STOPWORD_NOT_INIT) {
3527 					fts_load_stopword(table, NULL,
3528 							  NULL, true, true);
3529 				}
3530 
3531 				fts_cache_add_doc(
3532 					table->fts->cache,
3533 					get_doc->index_cache,
3534 					doc_id, doc.tokens);
3535 
3536 				bool	need_sync = !cache->sync->in_progress
3537 					&& (fts_need_sync
3538 					    || (cache->total_size
3539 						- cache->total_size_at_sync)
3540 					    > fts_max_cache_size / 10);
3541 				if (need_sync) {
3542 					cache->total_size_at_sync =
3543 						cache->total_size;
3544 				}
3545 
3546 				rw_lock_x_unlock(&table->fts->cache->lock);
3547 
3548 				DBUG_EXECUTE_IF(
3549 					"fts_instrument_sync",
3550 					fts_optimize_request_sync_table(table);
3551 					os_event_wait(cache->sync->event);
3552 				);
3553 
3554 				DBUG_EXECUTE_IF(
3555 					"fts_instrument_sync_debug",
3556 					fts_sync(cache->sync, true, true);
3557 				);
3558 
3559 				DEBUG_SYNC_C("fts_instrument_sync_request");
3560 				DBUG_EXECUTE_IF(
3561 					"fts_instrument_sync_request",
3562 					fts_optimize_request_sync_table(table);
3563 				);
3564 
3565 				if (need_sync) {
3566 					fts_optimize_request_sync_table(table);
3567 				}
3568 
3569 				mtr_start(&mtr);
3570 
3571 				if (i < num_idx - 1) {
3572 
3573 					success = btr_pcur_restore_position(
3574 						BTR_SEARCH_LEAF, doc_pcur,
3575 						&mtr);
3576 
3577 					ut_ad(success);
3578 				}
3579 			}
3580 
3581 			fts_doc_free(&doc);
3582 		}
3583 
3584 		if (!is_id_cluster) {
3585 			btr_pcur_close(doc_pcur);
3586 		}
3587 	}
3588 func_exit:
3589 	mtr_commit(&mtr);
3590 
3591 	btr_pcur_close(&pcur);
3592 
3593 	mem_heap_free(heap);
3594 	return(TRUE);
3595 }
3596 
3597 
3598 /*********************************************************************//**
3599 Callback function to read a single ulint column.
3600 return always returns TRUE */
3601 static
3602 ibool
fts_read_ulint(void * row,void * user_arg)3603 fts_read_ulint(
3604 /*===========*/
3605 	void*		row,		/*!< in: sel_node_t* */
3606 	void*		user_arg)	/*!< in: pointer to ulint */
3607 {
3608 	sel_node_t*	sel_node = static_cast<sel_node_t*>(row);
3609 	ulint*		value = static_cast<ulint*>(user_arg);
3610 	que_node_t*	exp = sel_node->select_list;
3611 	dfield_t*	dfield = que_node_get_val(exp);
3612 	void*		data = dfield_get_data(dfield);
3613 
3614 	*value = mach_read_from_4(static_cast<const byte*>(data));
3615 
3616 	return(TRUE);
3617 }
3618 
3619 /*********************************************************************//**
3620 Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists
3621 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */
3622 doc_id_t
fts_get_max_doc_id(dict_table_t * table)3623 fts_get_max_doc_id(
3624 /*===============*/
3625 	dict_table_t*	table)		/*!< in: user table */
3626 {
3627 	dict_index_t*	index;
3628 	dict_field_t*	dfield MY_ATTRIBUTE((unused)) = NULL;
3629 	doc_id_t	doc_id = 0;
3630 	mtr_t		mtr;
3631 	btr_pcur_t	pcur;
3632 
3633 	index = table->fts_doc_id_index;
3634 
3635 	if (!index) {
3636 		return(0);
3637 	}
3638 
3639 	ut_ad(!index->is_instant());
3640 
3641 	dfield = dict_index_get_nth_field(index, 0);
3642 
3643 #if 0 /* This can fail when renaming a column to FTS_DOC_ID_COL_NAME. */
3644 	ut_ad(innobase_strcasecmp(FTS_DOC_ID_COL_NAME, dfield->name) == 0);
3645 #endif
3646 
3647 	mtr_start(&mtr);
3648 
3649 	/* fetch the largest indexes value */
3650 	btr_pcur_open_at_index_side(
3651 		false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
3652 
3653 	if (!page_is_empty(btr_pcur_get_page(&pcur))) {
3654 		const rec_t*    rec = NULL;
3655 
3656 		do {
3657 			rec = btr_pcur_get_rec(&pcur);
3658 
3659 			if (page_rec_is_user_rec(rec)) {
3660 				break;
3661 			}
3662 		} while (btr_pcur_move_to_prev(&pcur, &mtr));
3663 
3664 		if (!rec || rec_is_metadata(rec, *index)) {
3665 			goto func_exit;
3666 		}
3667 
3668 		doc_id = fts_read_doc_id(rec);
3669 	}
3670 
3671 func_exit:
3672 	btr_pcur_close(&pcur);
3673 	mtr_commit(&mtr);
3674 	return(doc_id);
3675 }
3676 
3677 /*********************************************************************//**
3678 Fetch document with the given document id.
3679 @return DB_SUCCESS if OK else error */
3680 dberr_t
fts_doc_fetch_by_doc_id(fts_get_doc_t * get_doc,doc_id_t doc_id,dict_index_t * index_to_use,ulint option,fts_sql_callback callback,void * arg)3681 fts_doc_fetch_by_doc_id(
3682 /*====================*/
3683 	fts_get_doc_t*	get_doc,	/*!< in: state */
3684 	doc_id_t	doc_id,		/*!< in: id of document to
3685 					fetch */
3686 	dict_index_t*	index_to_use,	/*!< in: caller supplied FTS index,
3687 					or NULL */
3688 	ulint		option,		/*!< in: search option, if it is
3689 					greater than doc_id or equal */
3690 	fts_sql_callback
3691 			callback,	/*!< in: callback to read */
3692 	void*		arg)		/*!< in: callback arg */
3693 {
3694 	pars_info_t*	info;
3695 	dberr_t		error;
3696 	const char*	select_str;
3697 	doc_id_t	write_doc_id;
3698 	dict_index_t*	index;
3699 	trx_t*		trx = trx_create();
3700 	que_t*          graph;
3701 
3702 	trx->op_info = "fetching indexed FTS document";
3703 
3704 	/* The FTS index can be supplied by caller directly with
3705 	"index_to_use", otherwise, get it from "get_doc" */
3706 	index = (index_to_use) ? index_to_use : get_doc->index_cache->index;
3707 
3708 	if (get_doc && get_doc->get_document_graph) {
3709 		info = get_doc->get_document_graph->info;
3710 	} else {
3711 		info = pars_info_create();
3712 	}
3713 
3714 	/* Convert to "storage" byte order. */
3715 	fts_write_doc_id((byte*) &write_doc_id, doc_id);
3716 	fts_bind_doc_id(info, "doc_id", &write_doc_id);
3717 	pars_info_bind_function(info, "my_func", callback, arg);
3718 
3719 	select_str = fts_get_select_columns_str(index, info, info->heap);
3720 	pars_info_bind_id(info, "table_name", index->table->name.m_name);
3721 
3722 	if (!get_doc || !get_doc->get_document_graph) {
3723 		if (option == FTS_FETCH_DOC_BY_ID_EQUAL) {
3724 			graph = fts_parse_sql(
3725 				NULL,
3726 				info,
3727 				mem_heap_printf(info->heap,
3728 					"DECLARE FUNCTION my_func;\n"
3729 					"DECLARE CURSOR c IS"
3730 					" SELECT %s FROM $table_name"
3731 					" WHERE %s = :doc_id;\n"
3732 					"BEGIN\n"
3733 					""
3734 					"OPEN c;\n"
3735 					"WHILE 1 = 1 LOOP\n"
3736 					"  FETCH c INTO my_func();\n"
3737 					"  IF c %% NOTFOUND THEN\n"
3738 					"    EXIT;\n"
3739 					"  END IF;\n"
3740 					"END LOOP;\n"
3741 					"CLOSE c;",
3742 					select_str, FTS_DOC_ID_COL_NAME));
3743 		} else {
3744 			ut_ad(option == FTS_FETCH_DOC_BY_ID_LARGE);
3745 
3746 			/* This is used for crash recovery of table with
3747 			hidden DOC ID or FTS indexes. We will scan the table
3748 			to re-processing user table rows whose DOC ID or
3749 			FTS indexed documents have not been sync-ed to disc
3750 			during recent crash.
3751 			In the case that all fulltext indexes are dropped
3752 			for a table, we will keep the "hidden" FTS_DOC_ID
3753 			column, and this scan is to retreive the largest
3754 			DOC ID being used in the table to determine the
3755 			appropriate next DOC ID.
3756 			In the case of there exists fulltext index(es), this
3757 			operation will re-tokenize any docs that have not
3758 			been sync-ed to the disk, and re-prime the FTS
3759 			cached */
3760 			graph = fts_parse_sql(
3761 				NULL,
3762 				info,
3763 				mem_heap_printf(info->heap,
3764 					"DECLARE FUNCTION my_func;\n"
3765 					"DECLARE CURSOR c IS"
3766 					" SELECT %s, %s FROM $table_name"
3767 					" WHERE %s > :doc_id;\n"
3768 					"BEGIN\n"
3769 					""
3770 					"OPEN c;\n"
3771 					"WHILE 1 = 1 LOOP\n"
3772 					"  FETCH c INTO my_func();\n"
3773 					"  IF c %% NOTFOUND THEN\n"
3774 					"    EXIT;\n"
3775 					"  END IF;\n"
3776 					"END LOOP;\n"
3777 					"CLOSE c;",
3778 					FTS_DOC_ID_COL_NAME,
3779 					select_str, FTS_DOC_ID_COL_NAME));
3780 		}
3781 		if (get_doc) {
3782 			get_doc->get_document_graph = graph;
3783 		}
3784 	} else {
3785 		graph = get_doc->get_document_graph;
3786 	}
3787 
3788 	error = fts_eval_sql(trx, graph);
3789 	fts_sql_commit(trx);
3790 	trx->free();
3791 
3792 	if (!get_doc) {
3793 		fts_que_graph_free(graph);
3794 	}
3795 
3796 	return(error);
3797 }
3798 
3799 /*********************************************************************//**
3800 Write out a single word's data as new entry/entries in the INDEX table.
3801 @return DB_SUCCESS if all OK. */
3802 dberr_t
fts_write_node(trx_t * trx,que_t ** graph,fts_table_t * fts_table,fts_string_t * word,fts_node_t * node)3803 fts_write_node(
3804 /*===========*/
3805 	trx_t*		trx,			/*!< in: transaction */
3806 	que_t**		graph,			/*!< in: query graph */
3807 	fts_table_t*	fts_table,		/*!< in: aux table */
3808 	fts_string_t*	word,			/*!< in: word in UTF-8 */
3809 	fts_node_t*	node)			/*!< in: node columns */
3810 {
3811 	pars_info_t*	info;
3812 	dberr_t		error;
3813 	ib_uint32_t	doc_count;
3814 	time_t		start_time;
3815 	doc_id_t	last_doc_id;
3816 	doc_id_t	first_doc_id;
3817 	char		table_name[MAX_FULL_NAME_LEN];
3818 
3819 	ut_a(node->ilist != NULL);
3820 
3821 	if (*graph) {
3822 		info = (*graph)->info;
3823 	} else {
3824 		info = pars_info_create();
3825 
3826 		fts_get_table_name(fts_table, table_name);
3827 		pars_info_bind_id(info, "index_table_name", table_name);
3828 	}
3829 
3830 	pars_info_bind_varchar_literal(info, "token", word->f_str, word->f_len);
3831 
3832 	/* Convert to "storage" byte order. */
3833 	fts_write_doc_id((byte*) &first_doc_id, node->first_doc_id);
3834 	fts_bind_doc_id(info, "first_doc_id", &first_doc_id);
3835 
3836 	/* Convert to "storage" byte order. */
3837 	fts_write_doc_id((byte*) &last_doc_id, node->last_doc_id);
3838 	fts_bind_doc_id(info, "last_doc_id", &last_doc_id);
3839 
3840 	ut_a(node->last_doc_id >= node->first_doc_id);
3841 
3842 	/* Convert to "storage" byte order. */
3843 	mach_write_to_4((byte*) &doc_count, node->doc_count);
3844 	pars_info_bind_int4_literal(
3845 		info, "doc_count", (const ib_uint32_t*) &doc_count);
3846 
3847 	/* Set copy_name to FALSE since it's a static. */
3848 	pars_info_bind_literal(
3849 		info, "ilist", node->ilist, node->ilist_size,
3850 		DATA_BLOB, DATA_BINARY_TYPE);
3851 
3852 	if (!*graph) {
3853 
3854 		*graph = fts_parse_sql(
3855 			fts_table,
3856 			info,
3857 			"BEGIN\n"
3858 			"INSERT INTO $index_table_name VALUES"
3859 			" (:token, :first_doc_id,"
3860 			"  :last_doc_id, :doc_count, :ilist);");
3861 	}
3862 
3863 	start_time = time(NULL);
3864 	error = fts_eval_sql(trx, *graph);
3865 	elapsed_time += time(NULL) - start_time;
3866 	++n_nodes;
3867 
3868 	return(error);
3869 }
3870 
3871 /*********************************************************************//**
3872 Add rows to the DELETED_CACHE table.
3873 @return DB_SUCCESS if all went well else error code*/
3874 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3875 dberr_t
fts_sync_add_deleted_cache(fts_sync_t * sync,ib_vector_t * doc_ids)3876 fts_sync_add_deleted_cache(
3877 /*=======================*/
3878 	fts_sync_t*	sync,			/*!< in: sync state */
3879 	ib_vector_t*	doc_ids)		/*!< in: doc ids to add */
3880 {
3881 	ulint		i;
3882 	pars_info_t*	info;
3883 	que_t*		graph;
3884 	fts_table_t	fts_table;
3885 	char		table_name[MAX_FULL_NAME_LEN];
3886 	doc_id_t	dummy = 0;
3887 	dberr_t		error = DB_SUCCESS;
3888 	ulint		n_elems = ib_vector_size(doc_ids);
3889 
3890 	ut_a(ib_vector_size(doc_ids) > 0);
3891 
3892 	ib_vector_sort(doc_ids, fts_doc_id_cmp);
3893 
3894 	info = pars_info_create();
3895 
3896 	fts_bind_doc_id(info, "doc_id", &dummy);
3897 
3898 	FTS_INIT_FTS_TABLE(
3899 		&fts_table, "DELETED_CACHE", FTS_COMMON_TABLE, sync->table);
3900 
3901 	fts_get_table_name(&fts_table, table_name);
3902 	pars_info_bind_id(info, "table_name", table_name);
3903 
3904 	graph = fts_parse_sql(
3905 		&fts_table,
3906 		info,
3907 		"BEGIN INSERT INTO $table_name VALUES (:doc_id);");
3908 
3909 	for (i = 0; i < n_elems && error == DB_SUCCESS; ++i) {
3910 		doc_id_t*	update;
3911 		doc_id_t	write_doc_id;
3912 
3913 		update = static_cast<doc_id_t*>(ib_vector_get(doc_ids, i));
3914 
3915 		/* Convert to "storage" byte order. */
3916 		fts_write_doc_id((byte*) &write_doc_id, *update);
3917 		fts_bind_doc_id(info, "doc_id", &write_doc_id);
3918 
3919 		error = fts_eval_sql(sync->trx, graph);
3920 	}
3921 
3922 	fts_que_graph_free(graph);
3923 
3924 	return(error);
3925 }
3926 
3927 /** Write the words and ilist to disk.
3928 @param[in,out]	trx		transaction
3929 @param[in]	index_cache	index cache
3930 @param[in]	unlock_cache	whether unlock cache when write node
3931 @return DB_SUCCESS if all went well else error code */
3932 static MY_ATTRIBUTE((nonnull, warn_unused_result))
3933 dberr_t
fts_sync_write_words(trx_t * trx,fts_index_cache_t * index_cache,bool unlock_cache)3934 fts_sync_write_words(
3935 	trx_t*			trx,
3936 	fts_index_cache_t*	index_cache,
3937 	bool			unlock_cache)
3938 {
3939 	fts_table_t	fts_table;
3940 	ulint		n_nodes = 0;
3941 	ulint		n_words = 0;
3942 	const ib_rbt_node_t* rbt_node;
3943 	dberr_t		error = DB_SUCCESS;
3944 	ibool		print_error = FALSE;
3945 	dict_table_t*	table = index_cache->index->table;
3946 
3947 	FTS_INIT_INDEX_TABLE(
3948 		&fts_table, NULL, FTS_INDEX_TABLE, index_cache->index);
3949 
3950 	n_words = rbt_size(index_cache->words);
3951 
3952 	/* We iterate over the entire tree, even if there is an error,
3953 	since we want to free the memory used during caching. */
3954 	for (rbt_node = rbt_first(index_cache->words);
3955 	     rbt_node;
3956 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
3957 
3958 		ulint			i;
3959 		ulint			selected;
3960 		fts_tokenizer_word_t*	word;
3961 
3962 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
3963 
3964 		DBUG_EXECUTE_IF("fts_instrument_write_words_before_select_index",
3965 				os_thread_sleep(300000););
3966 
3967 		selected = fts_select_index(
3968 			index_cache->charset, word->text.f_str,
3969 			word->text.f_len);
3970 
3971 		fts_table.suffix = fts_get_suffix(selected);
3972 
3973 		/* We iterate over all the nodes even if there was an error */
3974 		for (i = 0; i < ib_vector_size(word->nodes); ++i) {
3975 
3976 			fts_node_t* fts_node = static_cast<fts_node_t*>(
3977 				ib_vector_get(word->nodes, i));
3978 
3979 			if (fts_node->synced) {
3980 				continue;
3981 			} else {
3982 				fts_node->synced = true;
3983 			}
3984 
3985 			/*FIXME: we need to handle the error properly. */
3986 			if (error == DB_SUCCESS) {
3987 				if (unlock_cache) {
3988 					rw_lock_x_unlock(
3989 						&table->fts->cache->lock);
3990 				}
3991 
3992 				error = fts_write_node(
3993 					trx,
3994 					&index_cache->ins_graph[selected],
3995 					&fts_table, &word->text, fts_node);
3996 
3997 				DEBUG_SYNC_C("fts_write_node");
3998 				DBUG_EXECUTE_IF("fts_write_node_crash",
3999 					DBUG_SUICIDE(););
4000 
4001 				DBUG_EXECUTE_IF("fts_instrument_sync_sleep",
4002 					os_thread_sleep(1000000);
4003 				);
4004 
4005 				if (unlock_cache) {
4006 					rw_lock_x_lock(
4007 						&table->fts->cache->lock);
4008 				}
4009 			}
4010 		}
4011 
4012 		n_nodes += ib_vector_size(word->nodes);
4013 
4014 		if (UNIV_UNLIKELY(error != DB_SUCCESS) && !print_error) {
4015 			ib::error() << "(" << error << ") writing"
4016 				" word node to FTS auxiliary index table "
4017 				<< table->name;
4018 			print_error = TRUE;
4019 		}
4020 	}
4021 
4022 	if (UNIV_UNLIKELY(fts_enable_diag_print)) {
4023 		printf("Avg number of nodes: %lf\n",
4024 		       (double) n_nodes / (double) (n_words > 1 ? n_words : 1));
4025 	}
4026 
4027 	return(error);
4028 }
4029 
4030 /*********************************************************************//**
4031 Begin Sync, create transaction, acquire locks, etc. */
4032 static
4033 void
fts_sync_begin(fts_sync_t * sync)4034 fts_sync_begin(
4035 /*===========*/
4036 	fts_sync_t*	sync)			/*!< in: sync state */
4037 {
4038 	fts_cache_t*	cache = sync->table->fts->cache;
4039 
4040 	n_nodes = 0;
4041 	elapsed_time = 0;
4042 
4043 	sync->start_time = time(NULL);
4044 
4045 	sync->trx = trx_create();
4046 	trx_start_internal(sync->trx);
4047 
4048 	if (UNIV_UNLIKELY(fts_enable_diag_print)) {
4049 		ib::info() << "FTS SYNC for table " << sync->table->name
4050 			<< ", deleted count: "
4051 			<< ib_vector_size(cache->deleted_doc_ids)
4052 			<< " size: " << cache->total_size << " bytes";
4053 	}
4054 }
4055 
4056 /*********************************************************************//**
4057 Run SYNC on the table, i.e., write out data from the index specific
4058 cache to the FTS aux INDEX table and FTS aux doc id stats table.
4059 @return DB_SUCCESS if all OK */
4060 static MY_ATTRIBUTE((nonnull, warn_unused_result))
4061 dberr_t
fts_sync_index(fts_sync_t * sync,fts_index_cache_t * index_cache)4062 fts_sync_index(
4063 /*===========*/
4064 	fts_sync_t*		sync,		/*!< in: sync state */
4065 	fts_index_cache_t*	index_cache)	/*!< in: index cache */
4066 {
4067 	trx_t*		trx = sync->trx;
4068 
4069 	trx->op_info = "doing SYNC index";
4070 
4071 	if (UNIV_UNLIKELY(fts_enable_diag_print)) {
4072 		ib::info() << "SYNC words: " << rbt_size(index_cache->words);
4073 	}
4074 
4075 	ut_ad(rbt_validate(index_cache->words));
4076 
4077 	return(fts_sync_write_words(trx, index_cache, sync->unlock_cache));
4078 }
4079 
4080 /** Check if index cache has been synced completely
4081 @param[in,out]	index_cache	index cache
4082 @return true if index is synced, otherwise false. */
4083 static
4084 bool
fts_sync_index_check(fts_index_cache_t * index_cache)4085 fts_sync_index_check(
4086 	fts_index_cache_t*	index_cache)
4087 {
4088 	const ib_rbt_node_t*	rbt_node;
4089 
4090 	for (rbt_node = rbt_first(index_cache->words);
4091 	     rbt_node != NULL;
4092 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
4093 
4094 		fts_tokenizer_word_t*	word;
4095 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
4096 
4097 		fts_node_t*	fts_node;
4098 		fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4099 
4100 		if (!fts_node->synced) {
4101 			return(false);
4102 		}
4103 	}
4104 
4105 	return(true);
4106 }
4107 
4108 /** Reset synced flag in index cache when rollback
4109 @param[in,out]	index_cache	index cache */
4110 static
4111 void
fts_sync_index_reset(fts_index_cache_t * index_cache)4112 fts_sync_index_reset(
4113 	fts_index_cache_t*	index_cache)
4114 {
4115 	const ib_rbt_node_t*	rbt_node;
4116 
4117 	for (rbt_node = rbt_first(index_cache->words);
4118 	     rbt_node != NULL;
4119 	     rbt_node = rbt_next(index_cache->words, rbt_node)) {
4120 
4121 		fts_tokenizer_word_t*	word;
4122 		word = rbt_value(fts_tokenizer_word_t, rbt_node);
4123 
4124 		fts_node_t*	fts_node;
4125 		fts_node = static_cast<fts_node_t*>(ib_vector_last(word->nodes));
4126 
4127 		fts_node->synced = false;
4128 	}
4129 }
4130 
4131 /** Commit the SYNC, change state of processed doc ids etc.
4132 @param[in,out]	sync	sync state
4133 @return DB_SUCCESS if all OK */
4134 static  MY_ATTRIBUTE((nonnull, warn_unused_result))
4135 dberr_t
fts_sync_commit(fts_sync_t * sync)4136 fts_sync_commit(
4137 	fts_sync_t*	sync)
4138 {
4139 	dberr_t		error;
4140 	trx_t*		trx = sync->trx;
4141 	fts_cache_t*	cache = sync->table->fts->cache;
4142 	doc_id_t	last_doc_id;
4143 
4144 	trx->op_info = "doing SYNC commit";
4145 
4146 	/* After each Sync, update the CONFIG table about the max doc id
4147 	we just sync-ed to index table */
4148 	error = fts_cmp_set_sync_doc_id(sync->table, sync->max_doc_id, FALSE,
4149 					&last_doc_id);
4150 
4151 	/* Get the list of deleted documents that are either in the
4152 	cache or were headed there but were deleted before the add
4153 	thread got to them. */
4154 
4155 	if (error == DB_SUCCESS && ib_vector_size(cache->deleted_doc_ids) > 0) {
4156 
4157 		error = fts_sync_add_deleted_cache(
4158 			sync, cache->deleted_doc_ids);
4159 	}
4160 
4161 	/* We need to do this within the deleted lock since fts_delete() can
4162 	attempt to add a deleted doc id to the cache deleted id array. */
4163 	fts_cache_clear(cache);
4164 	DEBUG_SYNC_C("fts_deleted_doc_ids_clear");
4165 	fts_cache_init(cache);
4166 	rw_lock_x_unlock(&cache->lock);
4167 
4168 	if (UNIV_LIKELY(error == DB_SUCCESS)) {
4169 		fts_sql_commit(trx);
4170 	} else {
4171 		fts_sql_rollback(trx);
4172 		ib::error() << "(" << error << ") during SYNC of "
4173 			"table " << sync->table->name;
4174 	}
4175 
4176 	if (UNIV_UNLIKELY(fts_enable_diag_print) && elapsed_time) {
4177 		ib::info() << "SYNC for table " << sync->table->name
4178 			<< ": SYNC time: "
4179 			<< (time(NULL) - sync->start_time)
4180 			<< " secs: elapsed "
4181 			<< (double) n_nodes / elapsed_time
4182 			<< " ins/sec";
4183 	}
4184 
4185 	/* Avoid assertion in trx_t::free(). */
4186 	trx->dict_operation_lock_mode = 0;
4187 	trx->free();
4188 
4189 	return(error);
4190 }
4191 
4192 /** Rollback a sync operation
4193 @param[in,out]	sync	sync state */
4194 static
4195 void
fts_sync_rollback(fts_sync_t * sync)4196 fts_sync_rollback(
4197 	fts_sync_t*	sync)
4198 {
4199 	trx_t*		trx = sync->trx;
4200 	fts_cache_t*	cache = sync->table->fts->cache;
4201 
4202 	for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4203 		ulint			j;
4204 		fts_index_cache_t*	index_cache;
4205 
4206 		index_cache = static_cast<fts_index_cache_t*>(
4207 			ib_vector_get(cache->indexes, i));
4208 
4209 		/* Reset synced flag so nodes will not be skipped
4210 		in the next sync, see fts_sync_write_words(). */
4211 		fts_sync_index_reset(index_cache);
4212 
4213 		for (j = 0; fts_index_selector[j].value; ++j) {
4214 
4215 			if (index_cache->ins_graph[j] != NULL) {
4216 
4217 				fts_que_graph_free_check_lock(
4218 					NULL, index_cache,
4219 					index_cache->ins_graph[j]);
4220 
4221 				index_cache->ins_graph[j] = NULL;
4222 			}
4223 
4224 			if (index_cache->sel_graph[j] != NULL) {
4225 
4226 				fts_que_graph_free_check_lock(
4227 					NULL, index_cache,
4228 					index_cache->sel_graph[j]);
4229 
4230 				index_cache->sel_graph[j] = NULL;
4231 			}
4232 		}
4233 	}
4234 
4235 	rw_lock_x_unlock(&cache->lock);
4236 
4237 	fts_sql_rollback(trx);
4238 
4239 	/* Avoid assertion in trx_t::free(). */
4240 	trx->dict_operation_lock_mode = 0;
4241 	trx->free();
4242 }
4243 
4244 /** Run SYNC on the table, i.e., write out data from the cache to the
4245 FTS auxiliary INDEX table and clear the cache at the end.
4246 @param[in,out]	sync		sync state
4247 @param[in]	unlock_cache	whether unlock cache lock when write node
4248 @param[in]	wait		whether wait when a sync is in progress
4249 @return DB_SUCCESS if all OK */
4250 static
4251 dberr_t
fts_sync(fts_sync_t * sync,bool unlock_cache,bool wait)4252 fts_sync(
4253 	fts_sync_t*	sync,
4254 	bool		unlock_cache,
4255 	bool		wait)
4256 {
4257 	if (srv_read_only_mode) {
4258 		return DB_READ_ONLY;
4259 	}
4260 
4261 	ulint		i;
4262 	dberr_t		error = DB_SUCCESS;
4263 	fts_cache_t*	cache = sync->table->fts->cache;
4264 
4265 	rw_lock_x_lock(&cache->lock);
4266 
4267 	/* Check if cache is being synced.
4268 	Note: we release cache lock in fts_sync_write_words() to
4269 	avoid long wait for the lock by other threads. */
4270 	while (sync->in_progress) {
4271 		rw_lock_x_unlock(&cache->lock);
4272 
4273 		if (wait) {
4274 			os_event_wait(sync->event);
4275 		} else {
4276 			return(DB_SUCCESS);
4277 		}
4278 
4279 		rw_lock_x_lock(&cache->lock);
4280 	}
4281 
4282 	sync->unlock_cache = unlock_cache;
4283 	sync->in_progress = true;
4284 
4285 	DEBUG_SYNC_C("fts_sync_begin");
4286 	fts_sync_begin(sync);
4287 
4288 begin_sync:
4289 	if (cache->total_size > fts_max_cache_size) {
4290 		/* Avoid the case: sync never finish when
4291 		insert/update keeps comming. */
4292 		ut_ad(sync->unlock_cache);
4293 		sync->unlock_cache = false;
4294 	}
4295 
4296 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4297 		fts_index_cache_t*	index_cache;
4298 
4299 		index_cache = static_cast<fts_index_cache_t*>(
4300 			ib_vector_get(cache->indexes, i));
4301 
4302 		if (index_cache->index->to_be_dropped
4303 		   || index_cache->index->table->to_be_dropped) {
4304 			continue;
4305 		}
4306 
4307 		DBUG_EXECUTE_IF("fts_instrument_sync_before_syncing",
4308 				os_thread_sleep(300000););
4309 		index_cache->index->index_fts_syncing = true;
4310 
4311 		error = fts_sync_index(sync, index_cache);
4312 
4313 		if (error != DB_SUCCESS) {
4314 			goto end_sync;
4315 		}
4316 	}
4317 
4318 	DBUG_EXECUTE_IF("fts_instrument_sync_interrupted",
4319 			sync->interrupted = true;
4320 			error = DB_INTERRUPTED;
4321 			goto end_sync;
4322 	);
4323 
4324 	/* Make sure all the caches are synced. */
4325 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4326 		fts_index_cache_t*	index_cache;
4327 
4328 		index_cache = static_cast<fts_index_cache_t*>(
4329 			ib_vector_get(cache->indexes, i));
4330 
4331 		if (index_cache->index->to_be_dropped
4332 		    || index_cache->index->table->to_be_dropped
4333 		    || fts_sync_index_check(index_cache)) {
4334 			continue;
4335 		}
4336 
4337 		goto begin_sync;
4338 	}
4339 
4340 end_sync:
4341 	if (error == DB_SUCCESS && !sync->interrupted) {
4342 		error = fts_sync_commit(sync);
4343 	} else {
4344 		fts_sync_rollback(sync);
4345 	}
4346 
4347 	rw_lock_x_lock(&cache->lock);
4348 	/* Clear fts syncing flags of any indexes in case sync is
4349 	interrupted */
4350 	for (i = 0; i < ib_vector_size(cache->indexes); ++i) {
4351 		static_cast<fts_index_cache_t*>(
4352 			ib_vector_get(cache->indexes, i))
4353 			->index->index_fts_syncing = false;
4354 	}
4355 
4356 	sync->interrupted = false;
4357 	sync->in_progress = false;
4358 	os_event_set(sync->event);
4359 	rw_lock_x_unlock(&cache->lock);
4360 
4361 	/* We need to check whether an optimize is required, for that
4362 	we make copies of the two variables that control the trigger. These
4363 	variables can change behind our back and we don't want to hold the
4364 	lock for longer than is needed. */
4365 	mutex_enter(&cache->deleted_lock);
4366 
4367 	cache->added = 0;
4368 	cache->deleted = 0;
4369 
4370 	mutex_exit(&cache->deleted_lock);
4371 
4372 	return(error);
4373 }
4374 
4375 /** Run SYNC on the table, i.e., write out data from the cache to the
4376 FTS auxiliary INDEX table and clear the cache at the end.
4377 @param[in,out]	table		fts table
4378 @param[in]	wait		whether wait for existing sync to finish
4379 @return DB_SUCCESS on success, error code on failure. */
fts_sync_table(dict_table_t * table,bool wait)4380 dberr_t fts_sync_table(dict_table_t* table, bool wait)
4381 {
4382 	dberr_t	err = DB_SUCCESS;
4383 
4384 	ut_ad(table->fts);
4385 
4386 	if (table->space && table->fts->cache
4387 	    && !dict_table_is_corrupted(table)) {
4388 		err = fts_sync(table->fts->cache->sync, !wait, wait);
4389 	}
4390 
4391 	return(err);
4392 }
4393 
4394 /** Check if a fts token is a stopword or less than fts_min_token_size
4395 or greater than fts_max_token_size.
4396 @param[in]	token		token string
4397 @param[in]	stopwords	stopwords rb tree
4398 @param[in]	cs		token charset
4399 @retval	true	if it is not stopword and length in range
4400 @retval	false	if it is stopword or lenght not in range */
4401 bool
fts_check_token(const fts_string_t * token,const ib_rbt_t * stopwords,const CHARSET_INFO * cs)4402 fts_check_token(
4403 	const fts_string_t*		token,
4404 	const ib_rbt_t*			stopwords,
4405 	const CHARSET_INFO*		cs)
4406 {
4407 	ut_ad(cs != NULL || stopwords == NULL);
4408 
4409 	ib_rbt_bound_t  parent;
4410 
4411 	return(token->f_n_char >= fts_min_token_size
4412 	       && token->f_n_char <= fts_max_token_size
4413 	       && (stopwords == NULL
4414 		   || rbt_search(stopwords, &parent, token) != 0));
4415 }
4416 
4417 /** Add the token and its start position to the token's list of positions.
4418 @param[in,out]	result_doc	result doc rb tree
4419 @param[in]	str		token string
4420 @param[in]	position	token position */
4421 static
4422 void
fts_add_token(fts_doc_t * result_doc,fts_string_t str,ulint position)4423 fts_add_token(
4424 	fts_doc_t*	result_doc,
4425 	fts_string_t	str,
4426 	ulint		position)
4427 {
4428 	/* Ignore string whose character number is less than
4429 	"fts_min_token_size" or more than "fts_max_token_size" */
4430 
4431 	if (fts_check_token(&str, NULL, result_doc->charset)) {
4432 
4433 		mem_heap_t*	heap;
4434 		fts_string_t	t_str;
4435 		fts_token_t*	token;
4436 		ib_rbt_bound_t	parent;
4437 		ulint		newlen;
4438 
4439 		heap = static_cast<mem_heap_t*>(result_doc->self_heap->arg);
4440 
4441 		t_str.f_n_char = str.f_n_char;
4442 
4443 		t_str.f_len = str.f_len * result_doc->charset->casedn_multiply + 1;
4444 
4445 		t_str.f_str = static_cast<byte*>(
4446 			mem_heap_alloc(heap, t_str.f_len));
4447 
4448 		/* For binary collations, a case sensitive search is
4449 		performed. Hence don't convert to lower case. */
4450 		if (my_binary_compare(result_doc->charset)) {
4451 			memcpy(t_str.f_str, str.f_str, str.f_len);
4452 			t_str.f_str[str.f_len]= 0;
4453 			newlen= str.f_len;
4454 		} else {
4455 			newlen = innobase_fts_casedn_str(
4456 				result_doc->charset, (char*) str.f_str, str.f_len,
4457 				(char*) t_str.f_str, t_str.f_len);
4458 		}
4459 
4460 		t_str.f_len = newlen;
4461 		t_str.f_str[newlen] = 0;
4462 
4463 		/* Add the word to the document statistics. If the word
4464 		hasn't been seen before we create a new entry for it. */
4465 		if (rbt_search(result_doc->tokens, &parent, &t_str) != 0) {
4466 			fts_token_t	new_token;
4467 
4468 			new_token.text.f_len = newlen;
4469 			new_token.text.f_str = t_str.f_str;
4470 			new_token.text.f_n_char = t_str.f_n_char;
4471 
4472 			new_token.positions = ib_vector_create(
4473 				result_doc->self_heap, sizeof(ulint), 32);
4474 
4475 			parent.last = rbt_add_node(
4476 				result_doc->tokens, &parent, &new_token);
4477 
4478 			ut_ad(rbt_validate(result_doc->tokens));
4479 		}
4480 
4481 		token = rbt_value(fts_token_t, parent.last);
4482 		ib_vector_push(token->positions, &position);
4483 	}
4484 }
4485 
4486 /********************************************************************
4487 Process next token from document starting at the given position, i.e., add
4488 the token's start position to the token's list of positions.
4489 @return number of characters handled in this call */
4490 static
4491 ulint
fts_process_token(fts_doc_t * doc,fts_doc_t * result,ulint start_pos,ulint add_pos)4492 fts_process_token(
4493 /*==============*/
4494 	fts_doc_t*	doc,		/* in/out: document to
4495 					tokenize */
4496 	fts_doc_t*	result,		/* out: if provided, save
4497 					result here */
4498 	ulint		start_pos,	/*!< in: start position in text */
4499 	ulint		add_pos)	/*!< in: add this position to all
4500 					tokens from this tokenization */
4501 {
4502 	ulint		ret;
4503 	fts_string_t	str;
4504 	ulint		position;
4505 	fts_doc_t*	result_doc;
4506 	byte		buf[FTS_MAX_WORD_LEN + 1];
4507 
4508 	str.f_str = buf;
4509 
4510 	/* Determine where to save the result. */
4511 	result_doc = (result != NULL) ? result : doc;
4512 
4513 	/* The length of a string in characters is set here only. */
4514 
4515 	ret = innobase_mysql_fts_get_token(
4516 		doc->charset, doc->text.f_str + start_pos,
4517 		doc->text.f_str + doc->text.f_len, &str);
4518 
4519 	position = start_pos + ret - str.f_len + add_pos;
4520 
4521 	fts_add_token(result_doc, str, position);
4522 
4523 	return(ret);
4524 }
4525 
4526 /*************************************************************//**
4527 Get token char size by charset
4528 @return token size */
4529 ulint
fts_get_token_size(const CHARSET_INFO * cs,const char * token,ulint len)4530 fts_get_token_size(
4531 /*===============*/
4532 	const CHARSET_INFO*	cs,	/*!< in: Character set */
4533 	const char*		token,	/*!< in: token */
4534 	ulint			len)	/*!< in: token length */
4535 {
4536 	char*	start;
4537 	char*	end;
4538 	ulint	size = 0;
4539 
4540 	/* const_cast is for reinterpret_cast below, or it will fail. */
4541 	start = const_cast<char*>(token);
4542 	end = start + len;
4543 	while (start < end) {
4544 		int	ctype;
4545 		int	mbl;
4546 
4547 		mbl = cs->cset->ctype(
4548 			cs, &ctype,
4549 			reinterpret_cast<uchar*>(start),
4550 			reinterpret_cast<uchar*>(end));
4551 
4552 		size++;
4553 
4554 		start += mbl > 0 ? mbl : (mbl < 0 ? -mbl : 1);
4555 	}
4556 
4557 	return(size);
4558 }
4559 
4560 /*************************************************************//**
4561 FTS plugin parser 'myql_parser' callback function for document tokenize.
4562 Refer to 'st_mysql_ftparser_param' for more detail.
4563 @return always returns 0 */
4564 int
fts_tokenize_document_internal(MYSQL_FTPARSER_PARAM * param,const char * doc,int len)4565 fts_tokenize_document_internal(
4566 /*===========================*/
4567 	MYSQL_FTPARSER_PARAM*	param,	/*!< in: parser parameter */
4568 	const char*		doc,/*!< in/out: document */
4569 	int			len)	/*!< in: document length */
4570 {
4571 	fts_string_t	str;
4572 	byte		buf[FTS_MAX_WORD_LEN + 1];
4573 	/* JAN: TODO: MySQL 5.7
4574 	MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
4575 		{ FT_TOKEN_WORD, 0, 0, 0, 0, 0, ' ', 0 };
4576 	*/
4577 	MYSQL_FTPARSER_BOOLEAN_INFO bool_info =
4578 		{ FT_TOKEN_WORD, 0, 0, 0, 0, ' ', 0};
4579 
4580 	ut_ad(len >= 0);
4581 
4582 	str.f_str = buf;
4583 
4584 	for (ulint i = 0, inc = 0; i < static_cast<ulint>(len); i += inc) {
4585 		inc = innobase_mysql_fts_get_token(
4586 			const_cast<CHARSET_INFO*>(param->cs),
4587 			(uchar*)(doc) + i,
4588 			(uchar*)(doc) + len,
4589 			&str);
4590 
4591 		if (str.f_len > 0) {
4592 			/* JAN: TODO: MySQL 5.7
4593 			bool_info.position =
4594 				static_cast<int>(i + inc - str.f_len);
4595 			ut_ad(bool_info.position >= 0);
4596 			*/
4597 
4598 			/* Stop when add word fails */
4599 			if (param->mysql_add_word(
4600 				param,
4601 				reinterpret_cast<char*>(str.f_str),
4602 				static_cast<int>(str.f_len),
4603 				&bool_info)) {
4604 				break;
4605 			}
4606 		}
4607 	}
4608 
4609 	return(0);
4610 }
4611 
4612 /******************************************************************//**
4613 FTS plugin parser 'myql_add_word' callback function for document tokenize.
4614 Refer to 'st_mysql_ftparser_param' for more detail.
4615 @return always returns 0 */
4616 static
4617 int
fts_tokenize_add_word_for_parser(MYSQL_FTPARSER_PARAM * param,const char * word,int word_len,MYSQL_FTPARSER_BOOLEAN_INFO *)4618 fts_tokenize_add_word_for_parser(
4619 /*=============================*/
4620 	MYSQL_FTPARSER_PARAM*	param,		/* in: parser paramter */
4621 	const char*			word,		/* in: token word */
4622 	int			word_len,	/* in: word len */
4623 	MYSQL_FTPARSER_BOOLEAN_INFO*)
4624 {
4625 	fts_string_t	str;
4626 	fts_tokenize_param_t*	fts_param;
4627 	fts_doc_t*	result_doc;
4628 	ulint		position;
4629 
4630 	fts_param = static_cast<fts_tokenize_param_t*>(param->mysql_ftparam);
4631 	result_doc = fts_param->result_doc;
4632 	ut_ad(result_doc != NULL);
4633 
4634 	str.f_str = (byte*)(word);
4635 	str.f_len = ulint(word_len);
4636 	str.f_n_char = fts_get_token_size(
4637 		const_cast<CHARSET_INFO*>(param->cs), word, str.f_len);
4638 
4639 	/* JAN: TODO: MySQL 5.7 FTS
4640 	ut_ad(boolean_info->position >= 0);
4641 	position = boolean_info->position + fts_param->add_pos;
4642 	*/
4643 	position = fts_param->add_pos++;
4644 
4645 	fts_add_token(result_doc, str, position);
4646 
4647 	return(0);
4648 }
4649 
4650 /******************************************************************//**
4651 Parse a document using an external / user supplied parser */
4652 static
4653 void
fts_tokenize_by_parser(fts_doc_t * doc,st_mysql_ftparser * parser,fts_tokenize_param_t * fts_param)4654 fts_tokenize_by_parser(
4655 /*===================*/
4656 	fts_doc_t*		doc,	/* in/out: document to tokenize */
4657 	st_mysql_ftparser*	parser, /* in: plugin fts parser */
4658 	fts_tokenize_param_t*	fts_param) /* in: fts tokenize param */
4659 {
4660 	MYSQL_FTPARSER_PARAM	param;
4661 
4662 	ut_a(parser);
4663 
4664 	/* Set paramters for param */
4665 	param.mysql_parse = fts_tokenize_document_internal;
4666 	param.mysql_add_word = fts_tokenize_add_word_for_parser;
4667 	param.mysql_ftparam = fts_param;
4668 	param.cs = doc->charset;
4669 	param.doc = reinterpret_cast<char*>(doc->text.f_str);
4670 	param.length = static_cast<int>(doc->text.f_len);
4671 	param.mode= MYSQL_FTPARSER_SIMPLE_MODE;
4672 
4673 	PARSER_INIT(parser, &param);
4674 	parser->parse(&param);
4675 	PARSER_DEINIT(parser, &param);
4676 }
4677 
4678 /** Tokenize a document.
4679 @param[in,out]	doc	document to tokenize
4680 @param[out]	result	tokenization result
4681 @param[in]	parser	pluggable parser */
4682 static
4683 void
fts_tokenize_document(fts_doc_t * doc,fts_doc_t * result,st_mysql_ftparser * parser)4684 fts_tokenize_document(
4685 	fts_doc_t*		doc,
4686 	fts_doc_t*		result,
4687 	st_mysql_ftparser*	parser)
4688 {
4689 	ut_a(!doc->tokens);
4690 	ut_a(doc->charset);
4691 
4692 	doc->tokens = rbt_create_arg_cmp(sizeof(fts_token_t),
4693 					 innobase_fts_text_cmp,
4694 					 (void*) doc->charset);
4695 
4696 	if (parser != NULL) {
4697 		fts_tokenize_param_t	fts_param;
4698 		fts_param.result_doc = (result != NULL) ? result : doc;
4699 		fts_param.add_pos = 0;
4700 
4701 		fts_tokenize_by_parser(doc, parser, &fts_param);
4702 	} else {
4703 		ulint		inc;
4704 
4705 		for (ulint i = 0; i < doc->text.f_len; i += inc) {
4706 			inc = fts_process_token(doc, result, i, 0);
4707 			ut_a(inc > 0);
4708 		}
4709 	}
4710 }
4711 
4712 /** Continue to tokenize a document.
4713 @param[in,out]	doc	document to tokenize
4714 @param[in]	add_pos	add this position to all tokens from this tokenization
4715 @param[out]	result	tokenization result
4716 @param[in]	parser	pluggable parser */
4717 static
4718 void
fts_tokenize_document_next(fts_doc_t * doc,ulint add_pos,fts_doc_t * result,st_mysql_ftparser * parser)4719 fts_tokenize_document_next(
4720 	fts_doc_t*		doc,
4721 	ulint			add_pos,
4722 	fts_doc_t*		result,
4723 	st_mysql_ftparser*	parser)
4724 {
4725 	ut_a(doc->tokens);
4726 
4727 	if (parser) {
4728 		fts_tokenize_param_t	fts_param;
4729 
4730 		fts_param.result_doc = (result != NULL) ? result : doc;
4731 		fts_param.add_pos = add_pos;
4732 
4733 		fts_tokenize_by_parser(doc, parser, &fts_param);
4734 	} else {
4735 		ulint		inc;
4736 
4737 		for (ulint i = 0; i < doc->text.f_len; i += inc) {
4738 			inc = fts_process_token(doc, result, i, add_pos);
4739 			ut_a(inc > 0);
4740 		}
4741 	}
4742 }
4743 
4744 /** Create the vector of fts_get_doc_t instances.
4745 @param[in,out]	cache	fts cache
4746 @return	vector of fts_get_doc_t instances */
4747 static
4748 ib_vector_t*
fts_get_docs_create(fts_cache_t * cache)4749 fts_get_docs_create(
4750 	fts_cache_t*	cache)
4751 {
4752 	ib_vector_t*	get_docs;
4753 
4754 	ut_ad(rw_lock_own(&cache->init_lock, RW_LOCK_X));
4755 
4756 	/* We need one instance of fts_get_doc_t per index. */
4757 	get_docs = ib_vector_create(cache->self_heap, sizeof(fts_get_doc_t), 4);
4758 
4759 	/* Create the get_doc instance, we need one of these
4760 	per FTS index. */
4761 	for (ulint i = 0; i < ib_vector_size(cache->indexes); ++i) {
4762 
4763 		dict_index_t**	index;
4764 		fts_get_doc_t*	get_doc;
4765 
4766 		index = static_cast<dict_index_t**>(
4767 			ib_vector_get(cache->indexes, i));
4768 
4769 		get_doc = static_cast<fts_get_doc_t*>(
4770 			ib_vector_push(get_docs, NULL));
4771 
4772 		memset(get_doc, 0x0, sizeof(*get_doc));
4773 
4774 		get_doc->index_cache = fts_get_index_cache(cache, *index);
4775 		get_doc->cache = cache;
4776 
4777 		/* Must find the index cache. */
4778 		ut_a(get_doc->index_cache != NULL);
4779 	}
4780 
4781 	return(get_docs);
4782 }
4783 
4784 /********************************************************************
4785 Release any resources held by the fts_get_doc_t instances. */
4786 static
4787 void
fts_get_docs_clear(ib_vector_t * get_docs)4788 fts_get_docs_clear(
4789 /*===============*/
4790 	ib_vector_t*	get_docs)		/*!< in: Doc retrieval vector */
4791 {
4792 	ulint		i;
4793 
4794 	/* Release the get doc graphs if any. */
4795 	for (i = 0; i < ib_vector_size(get_docs); ++i) {
4796 
4797 		fts_get_doc_t*	get_doc = static_cast<fts_get_doc_t*>(
4798 			ib_vector_get(get_docs, i));
4799 
4800 		if (get_doc->get_document_graph != NULL) {
4801 
4802 			ut_a(get_doc->index_cache);
4803 
4804 			fts_que_graph_free(get_doc->get_document_graph);
4805 			get_doc->get_document_graph = NULL;
4806 		}
4807 	}
4808 }
4809 
4810 /*********************************************************************//**
4811 Get the initial Doc ID by consulting the CONFIG table
4812 @return initial Doc ID */
4813 doc_id_t
fts_init_doc_id(const dict_table_t * table)4814 fts_init_doc_id(
4815 /*============*/
4816 	const dict_table_t*	table)		/*!< in: table */
4817 {
4818 	doc_id_t	max_doc_id = 0;
4819 
4820 	rw_lock_x_lock(&table->fts->cache->lock);
4821 
4822 	/* Return if the table is already initialized for DOC ID */
4823 	if (table->fts->cache->first_doc_id != FTS_NULL_DOC_ID) {
4824 		rw_lock_x_unlock(&table->fts->cache->lock);
4825 		return(0);
4826 	}
4827 
4828 	DEBUG_SYNC_C("fts_initialize_doc_id");
4829 
4830 	/* Then compare this value with the ID value stored in the CONFIG
4831 	table. The larger one will be our new initial Doc ID */
4832 	fts_cmp_set_sync_doc_id(table, 0, FALSE, &max_doc_id);
4833 
4834 	/* If DICT_TF2_FTS_ADD_DOC_ID is set, we are in the process of
4835 	creating index (and add doc id column. No need to recovery
4836 	documents */
4837 	if (!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_ADD_DOC_ID)) {
4838 		fts_init_index((dict_table_t*) table, TRUE);
4839 	}
4840 
4841 	table->fts->added_synced = true;
4842 
4843 	table->fts->cache->first_doc_id = max_doc_id;
4844 
4845 	rw_lock_x_unlock(&table->fts->cache->lock);
4846 
4847 	ut_ad(max_doc_id > 0);
4848 
4849 	return(max_doc_id);
4850 }
4851 
4852 #ifdef FTS_MULT_INDEX
4853 /*********************************************************************//**
4854 Check if the index is in the affected set.
4855 @return TRUE if index is updated */
4856 static
4857 ibool
fts_is_index_updated(const ib_vector_t * fts_indexes,const fts_get_doc_t * get_doc)4858 fts_is_index_updated(
4859 /*=================*/
4860 	const ib_vector_t*	fts_indexes,	/*!< in: affected FTS indexes */
4861 	const fts_get_doc_t*	get_doc)	/*!< in: info for reading
4862 						document */
4863 {
4864 	ulint		i;
4865 	dict_index_t*	index = get_doc->index_cache->index;
4866 
4867 	for (i = 0; i < ib_vector_size(fts_indexes); ++i) {
4868 		const dict_index_t*	updated_fts_index;
4869 
4870 		updated_fts_index = static_cast<const dict_index_t*>(
4871 			ib_vector_getp_const(fts_indexes, i));
4872 
4873 		ut_a(updated_fts_index != NULL);
4874 
4875 		if (updated_fts_index == index) {
4876 			return(TRUE);
4877 		}
4878 	}
4879 
4880 	return(FALSE);
4881 }
4882 #endif
4883 
4884 /*********************************************************************//**
4885 Fetch COUNT(*) from specified table.
4886 @return the number of rows in the table */
4887 ulint
fts_get_rows_count(fts_table_t * fts_table)4888 fts_get_rows_count(
4889 /*===============*/
4890 	fts_table_t*	fts_table)	/*!< in: fts table to read */
4891 {
4892 	trx_t*		trx;
4893 	pars_info_t*	info;
4894 	que_t*		graph;
4895 	dberr_t		error;
4896 	ulint		count = 0;
4897 	char		table_name[MAX_FULL_NAME_LEN];
4898 
4899 	trx = trx_create();
4900 	trx->op_info = "fetching FT table rows count";
4901 
4902 	info = pars_info_create();
4903 
4904 	pars_info_bind_function(info, "my_func", fts_read_ulint, &count);
4905 
4906 	fts_get_table_name(fts_table, table_name);
4907 	pars_info_bind_id(info, "table_name", table_name);
4908 
4909 	graph = fts_parse_sql(
4910 		fts_table,
4911 		info,
4912 		"DECLARE FUNCTION my_func;\n"
4913 		"DECLARE CURSOR c IS"
4914 		" SELECT COUNT(*)"
4915 		" FROM $table_name;\n"
4916 		"BEGIN\n"
4917 		"\n"
4918 		"OPEN c;\n"
4919 		"WHILE 1 = 1 LOOP\n"
4920 		"  FETCH c INTO my_func();\n"
4921 		"  IF c % NOTFOUND THEN\n"
4922 		"    EXIT;\n"
4923 		"  END IF;\n"
4924 		"END LOOP;\n"
4925 		"CLOSE c;");
4926 
4927 	for (;;) {
4928 		error = fts_eval_sql(trx, graph);
4929 
4930 		if (UNIV_LIKELY(error == DB_SUCCESS)) {
4931 			fts_sql_commit(trx);
4932 
4933 			break;				/* Exit the loop. */
4934 		} else {
4935 			fts_sql_rollback(trx);
4936 
4937 			if (error == DB_LOCK_WAIT_TIMEOUT) {
4938 				ib::warn() << "lock wait timeout reading"
4939 					" FTS table. Retrying!";
4940 
4941 				trx->error_state = DB_SUCCESS;
4942 			} else {
4943 				ib::error() << "(" << error
4944 					<< ") while reading FTS table "
4945 					<< table_name;
4946 
4947 				break;			/* Exit the loop. */
4948 			}
4949 		}
4950 	}
4951 
4952 	fts_que_graph_free(graph);
4953 
4954 	trx->free();
4955 
4956 	return(count);
4957 }
4958 
4959 #ifdef FTS_CACHE_SIZE_DEBUG
4960 /*********************************************************************//**
4961 Read the max cache size parameter from the config table. */
4962 static
4963 void
fts_update_max_cache_size(fts_sync_t * sync)4964 fts_update_max_cache_size(
4965 /*======================*/
4966 	fts_sync_t*	sync)			/*!< in: sync state */
4967 {
4968 	trx_t*		trx;
4969 	fts_table_t	fts_table;
4970 
4971 	trx = trx_create();
4972 
4973 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, sync->table);
4974 
4975 	/* The size returned is in bytes. */
4976 	sync->max_cache_size = fts_get_max_cache_size(trx, &fts_table);
4977 
4978 	fts_sql_commit(trx);
4979 
4980 	trx->free();
4981 }
4982 #endif /* FTS_CACHE_SIZE_DEBUG */
4983 
4984 /*********************************************************************//**
4985 Free the modified rows of a table. */
4986 UNIV_INLINE
4987 void
fts_trx_table_rows_free(ib_rbt_t * rows)4988 fts_trx_table_rows_free(
4989 /*====================*/
4990 	ib_rbt_t*	rows)			/*!< in: rbt of rows to free */
4991 {
4992 	const ib_rbt_node_t*	node;
4993 
4994 	for (node = rbt_first(rows); node; node = rbt_first(rows)) {
4995 		fts_trx_row_t*	row;
4996 
4997 		row = rbt_value(fts_trx_row_t, node);
4998 
4999 		if (row->fts_indexes != NULL) {
5000 			/* This vector shouldn't be using the
5001 			heap allocator.  */
5002 			ut_a(row->fts_indexes->allocator->arg == NULL);
5003 
5004 			ib_vector_free(row->fts_indexes);
5005 			row->fts_indexes = NULL;
5006 		}
5007 
5008 		ut_free(rbt_remove_node(rows, node));
5009 	}
5010 
5011 	ut_a(rbt_empty(rows));
5012 	rbt_free(rows);
5013 }
5014 
5015 /*********************************************************************//**
5016 Free an FTS savepoint instance. */
5017 UNIV_INLINE
5018 void
fts_savepoint_free(fts_savepoint_t * savepoint)5019 fts_savepoint_free(
5020 /*===============*/
5021 	fts_savepoint_t*	savepoint)	/*!< in: savepoint instance */
5022 {
5023 	const ib_rbt_node_t*	node;
5024 	ib_rbt_t*		tables = savepoint->tables;
5025 
5026 	/* Nothing to free! */
5027 	if (tables == NULL) {
5028 		return;
5029 	}
5030 
5031 	for (node = rbt_first(tables); node; node = rbt_first(tables)) {
5032 		fts_trx_table_t*	ftt;
5033 		fts_trx_table_t**	fttp;
5034 
5035 		fttp = rbt_value(fts_trx_table_t*, node);
5036 		ftt = *fttp;
5037 
5038 		/* This can be NULL if a savepoint was released. */
5039 		if (ftt->rows != NULL) {
5040 			fts_trx_table_rows_free(ftt->rows);
5041 			ftt->rows = NULL;
5042 		}
5043 
5044 		/* This can be NULL if a savepoint was released. */
5045 		if (ftt->added_doc_ids != NULL) {
5046 			fts_doc_ids_free(ftt->added_doc_ids);
5047 			ftt->added_doc_ids = NULL;
5048 		}
5049 
5050 		/* The default savepoint name must be NULL. */
5051 		if (ftt->docs_added_graph) {
5052 			fts_que_graph_free(ftt->docs_added_graph);
5053 		}
5054 
5055 		/* NOTE: We are responsible for free'ing the node */
5056 		ut_free(rbt_remove_node(tables, node));
5057 	}
5058 
5059 	ut_a(rbt_empty(tables));
5060 	rbt_free(tables);
5061 	savepoint->tables = NULL;
5062 }
5063 
5064 /*********************************************************************//**
5065 Free an FTS trx. */
5066 void
fts_trx_free(fts_trx_t * fts_trx)5067 fts_trx_free(
5068 /*=========*/
5069 	fts_trx_t*	fts_trx)		/* in, own: FTS trx */
5070 {
5071 	ulint		i;
5072 
5073 	for (i = 0; i < ib_vector_size(fts_trx->savepoints); ++i) {
5074 		fts_savepoint_t*	savepoint;
5075 
5076 		savepoint = static_cast<fts_savepoint_t*>(
5077 			ib_vector_get(fts_trx->savepoints, i));
5078 
5079 		/* The default savepoint name must be NULL. */
5080 		if (i == 0) {
5081 			ut_a(savepoint->name == NULL);
5082 		}
5083 
5084 		fts_savepoint_free(savepoint);
5085 	}
5086 
5087 	for (i = 0; i < ib_vector_size(fts_trx->last_stmt); ++i) {
5088 		fts_savepoint_t*	savepoint;
5089 
5090 		savepoint = static_cast<fts_savepoint_t*>(
5091 			ib_vector_get(fts_trx->last_stmt, i));
5092 
5093 		/* The default savepoint name must be NULL. */
5094 		if (i == 0) {
5095 			ut_a(savepoint->name == NULL);
5096 		}
5097 
5098 		fts_savepoint_free(savepoint);
5099 	}
5100 
5101 	if (fts_trx->heap) {
5102 		mem_heap_free(fts_trx->heap);
5103 	}
5104 }
5105 
5106 /*********************************************************************//**
5107 Extract the doc id from the FTS hidden column.
5108 @return doc id that was extracted from rec */
5109 doc_id_t
fts_get_doc_id_from_row(dict_table_t * table,dtuple_t * row)5110 fts_get_doc_id_from_row(
5111 /*====================*/
5112 	dict_table_t*	table,			/*!< in: table */
5113 	dtuple_t*	row)			/*!< in: row whose FTS doc id we
5114 						want to extract.*/
5115 {
5116 	dfield_t*	field;
5117 	doc_id_t	doc_id = 0;
5118 
5119 	ut_a(table->fts->doc_col != ULINT_UNDEFINED);
5120 
5121 	field = dtuple_get_nth_field(row, table->fts->doc_col);
5122 
5123 	ut_a(dfield_get_len(field) == sizeof(doc_id));
5124 	ut_a(dfield_get_type(field)->mtype == DATA_INT);
5125 
5126 	doc_id = fts_read_doc_id(
5127 		static_cast<const byte*>(dfield_get_data(field)));
5128 
5129 	return(doc_id);
5130 }
5131 
5132 /** Extract the doc id from the record that belongs to index.
5133 @param[in]	rec	record containing FTS_DOC_ID
5134 @param[in]	index	index of rec
5135 @param[in]	offsets	rec_get_offsets(rec,index)
5136 @return doc id that was extracted from rec */
5137 doc_id_t
fts_get_doc_id_from_rec(const rec_t * rec,const dict_index_t * index,const rec_offs * offsets)5138 fts_get_doc_id_from_rec(
5139 	const rec_t*		rec,
5140 	const dict_index_t*	index,
5141 	const rec_offs*		offsets)
5142 {
5143 	ulint f = dict_col_get_index_pos(
5144 		&index->table->cols[index->table->fts->doc_col], index);
5145 	ulint len;
5146 	doc_id_t doc_id = mach_read_from_8(
5147 		rec_get_nth_field(rec, offsets, f, &len));
5148 	ut_ad(len == 8);
5149 	return doc_id;
5150 }
5151 
5152 /*********************************************************************//**
5153 Search the index specific cache for a particular FTS index.
5154 @return the index specific cache else NULL */
5155 fts_index_cache_t*
fts_find_index_cache(const fts_cache_t * cache,const dict_index_t * index)5156 fts_find_index_cache(
5157 /*=================*/
5158 	const fts_cache_t*	cache,		/*!< in: cache to search */
5159 	const dict_index_t*	index)		/*!< in: index to search for */
5160 {
5161 	/* We cast away the const because our internal function, takes
5162 	non-const cache arg and returns a non-const pointer. */
5163 	return(static_cast<fts_index_cache_t*>(
5164 		fts_get_index_cache((fts_cache_t*) cache, index)));
5165 }
5166 
5167 /*********************************************************************//**
5168 Search cache for word.
5169 @return the word node vector if found else NULL */
5170 const ib_vector_t*
fts_cache_find_word(const fts_index_cache_t * index_cache,const fts_string_t * text)5171 fts_cache_find_word(
5172 /*================*/
5173 	const fts_index_cache_t*index_cache,	/*!< in: cache to search */
5174 	const fts_string_t*	text)		/*!< in: word to search for */
5175 {
5176 	ib_rbt_bound_t		parent;
5177 	const ib_vector_t*	nodes = NULL;
5178 #ifdef UNIV_DEBUG
5179 	dict_table_t*		table = index_cache->index->table;
5180 	fts_cache_t*		cache = table->fts->cache;
5181 
5182 	ut_ad(rw_lock_own(&cache->lock, RW_LOCK_X));
5183 #endif /* UNIV_DEBUG */
5184 
5185 	/* Lookup the word in the rb tree */
5186 	if (rbt_search(index_cache->words, &parent, text) == 0) {
5187 		const fts_tokenizer_word_t*	word;
5188 
5189 		word = rbt_value(fts_tokenizer_word_t, parent.last);
5190 
5191 		nodes = word->nodes;
5192 	}
5193 
5194 	return(nodes);
5195 }
5196 
5197 /*********************************************************************//**
5198 Append deleted doc ids to vector. */
5199 void
fts_cache_append_deleted_doc_ids(const fts_cache_t * cache,ib_vector_t * vector)5200 fts_cache_append_deleted_doc_ids(
5201 /*=============================*/
5202 	const fts_cache_t*	cache,		/*!< in: cache to use */
5203 	ib_vector_t*		vector)		/*!< in: append to this vector */
5204 {
5205 	mutex_enter(const_cast<ib_mutex_t*>(&cache->deleted_lock));
5206 
5207 	if (cache->deleted_doc_ids == NULL) {
5208 		mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5209 		return;
5210 	}
5211 
5212 
5213 	for (ulint i = 0; i < ib_vector_size(cache->deleted_doc_ids); ++i) {
5214 		doc_id_t*	update;
5215 
5216 		update = static_cast<doc_id_t*>(
5217 			ib_vector_get(cache->deleted_doc_ids, i));
5218 
5219 		ib_vector_push(vector, &update);
5220 	}
5221 
5222 	mutex_exit((ib_mutex_t*) &cache->deleted_lock);
5223 }
5224 
5225 /*********************************************************************//**
5226 Add the FTS document id hidden column. */
5227 void
fts_add_doc_id_column(dict_table_t * table,mem_heap_t * heap)5228 fts_add_doc_id_column(
5229 /*==================*/
5230 	dict_table_t*	table,	/*!< in/out: Table with FTS index */
5231 	mem_heap_t*	heap)	/*!< in: temporary memory heap, or NULL */
5232 {
5233 	dict_mem_table_add_col(
5234 		table, heap,
5235 		FTS_DOC_ID_COL_NAME,
5236 		DATA_INT,
5237 		dtype_form_prtype(
5238 			DATA_NOT_NULL | DATA_UNSIGNED
5239 			| DATA_BINARY_TYPE | DATA_FTS_DOC_ID, 0),
5240 		sizeof(doc_id_t));
5241 	DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_HAS_DOC_ID);
5242 }
5243 
5244 /** Add new fts doc id to the update vector.
5245 @param[in]	table		the table that contains the FTS index.
5246 @param[in,out]	ufield		the fts doc id field in the update vector.
5247 				No new memory is allocated for this in this
5248 				function.
5249 @param[in,out]	next_doc_id	the fts doc id that has been added to the
5250 				update vector.  If 0, a new fts doc id is
5251 				automatically generated.  The memory provided
5252 				for this argument will be used by the update
5253 				vector. Ensure that the life time of this
5254 				memory matches that of the update vector.
5255 @return the fts doc id used in the update vector */
5256 doc_id_t
fts_update_doc_id(dict_table_t * table,upd_field_t * ufield,doc_id_t * next_doc_id)5257 fts_update_doc_id(
5258 	dict_table_t*	table,
5259 	upd_field_t*	ufield,
5260 	doc_id_t*	next_doc_id)
5261 {
5262 	doc_id_t	doc_id;
5263 	dberr_t		error = DB_SUCCESS;
5264 
5265 	if (*next_doc_id) {
5266 		doc_id = *next_doc_id;
5267 	} else {
5268 		/* Get the new document id that will be added. */
5269 		error = fts_get_next_doc_id(table, &doc_id);
5270 	}
5271 
5272 	if (error == DB_SUCCESS) {
5273 		dict_index_t*	clust_index;
5274 		dict_col_t*	col = dict_table_get_nth_col(
5275 			table, table->fts->doc_col);
5276 
5277 		ufield->exp = NULL;
5278 
5279 		ufield->new_val.len = sizeof(doc_id);
5280 
5281 		clust_index = dict_table_get_first_index(table);
5282 
5283 		ufield->field_no = dict_col_get_clust_pos(col, clust_index);
5284 		dict_col_copy_type(col, dfield_get_type(&ufield->new_val));
5285 
5286 		/* It is possible we update record that has
5287 		not yet be sync-ed from last crash. */
5288 
5289 		/* Convert to storage byte order. */
5290 		ut_a(doc_id != FTS_NULL_DOC_ID);
5291 		fts_write_doc_id((byte*) next_doc_id, doc_id);
5292 
5293 		ufield->new_val.data = next_doc_id;
5294                 ufield->new_val.ext = 0;
5295 	}
5296 
5297 	return(doc_id);
5298 }
5299 
5300 /** fts_t constructor.
5301 @param[in]	table	table with FTS indexes
5302 @param[in,out]	heap	memory heap where 'this' is stored */
fts_t(const dict_table_t * table,mem_heap_t * heap)5303 fts_t::fts_t(
5304 	const dict_table_t*	table,
5305 	mem_heap_t*		heap)
5306 	:
5307 	added_synced(0), dict_locked(0),
5308 	add_wq(NULL),
5309 	cache(NULL),
5310 	doc_col(ULINT_UNDEFINED), in_queue(false),
5311 	fts_heap(heap)
5312 {
5313 	ut_a(table->fts == NULL);
5314 
5315 	ib_alloc_t*	heap_alloc = ib_heap_allocator_create(fts_heap);
5316 
5317 	indexes = ib_vector_create(heap_alloc, sizeof(dict_index_t*), 4);
5318 
5319 	dict_table_get_all_fts_indexes(table, indexes);
5320 }
5321 
5322 /** fts_t destructor. */
~fts_t()5323 fts_t::~fts_t()
5324 {
5325 	ut_ad(add_wq == NULL);
5326 
5327 	if (cache != NULL) {
5328 		fts_cache_clear(cache);
5329 		fts_cache_destroy(cache);
5330 		cache = NULL;
5331 	}
5332 
5333 	/* There is no need to call ib_vector_free() on this->indexes
5334 	because it is stored in this->fts_heap. */
5335 }
5336 
5337 /*********************************************************************//**
5338 Create an instance of fts_t.
5339 @return instance of fts_t */
5340 fts_t*
fts_create(dict_table_t * table)5341 fts_create(
5342 /*=======*/
5343 	dict_table_t*	table)		/*!< in/out: table with FTS indexes */
5344 {
5345 	fts_t*		fts;
5346 	mem_heap_t*	heap;
5347 
5348 	heap = mem_heap_create(512);
5349 
5350 	fts = static_cast<fts_t*>(mem_heap_alloc(heap, sizeof(*fts)));
5351 
5352 	new(fts) fts_t(table, heap);
5353 
5354 	return(fts);
5355 }
5356 
5357 /*********************************************************************//**
5358 Free the FTS resources. */
5359 void
fts_free(dict_table_t * table)5360 fts_free(
5361 /*=====*/
5362 	dict_table_t*	table)	/*!< in/out: table with FTS indexes */
5363 {
5364 	fts_t*	fts = table->fts;
5365 
5366 	fts->~fts_t();
5367 
5368 	mem_heap_free(fts->fts_heap);
5369 
5370 	table->fts = NULL;
5371 }
5372 
5373 /*********************************************************************//**
5374 Take a FTS savepoint. */
5375 UNIV_INLINE
5376 void
fts_savepoint_copy(const fts_savepoint_t * src,fts_savepoint_t * dst)5377 fts_savepoint_copy(
5378 /*===============*/
5379 	const fts_savepoint_t*	src,	/*!< in: source savepoint */
5380 	fts_savepoint_t*	dst)	/*!< out: destination savepoint */
5381 {
5382 	const ib_rbt_node_t*	node;
5383 	const ib_rbt_t*		tables;
5384 
5385 	tables = src->tables;
5386 
5387 	for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
5388 
5389 		fts_trx_table_t*	ftt_dst;
5390 		const fts_trx_table_t**	ftt_src;
5391 
5392 		ftt_src = rbt_value(const fts_trx_table_t*, node);
5393 
5394 		ftt_dst = fts_trx_table_clone(*ftt_src);
5395 
5396 		rbt_insert(dst->tables, &ftt_dst, &ftt_dst);
5397 	}
5398 }
5399 
5400 /*********************************************************************//**
5401 Take a FTS savepoint. */
5402 void
fts_savepoint_take(fts_trx_t * fts_trx,const char * name)5403 fts_savepoint_take(
5404 /*===============*/
5405 	fts_trx_t*	fts_trx,	/*!< in: fts transaction */
5406 	const char*	name)		/*!< in: savepoint name */
5407 {
5408 	mem_heap_t*		heap;
5409 	fts_savepoint_t*	savepoint;
5410 	fts_savepoint_t*	last_savepoint;
5411 
5412 	ut_a(name != NULL);
5413 
5414 	heap = fts_trx->heap;
5415 
5416 	/* The implied savepoint must exist. */
5417 	ut_a(ib_vector_size(fts_trx->savepoints) > 0);
5418 
5419 	last_savepoint = static_cast<fts_savepoint_t*>(
5420 		ib_vector_last(fts_trx->savepoints));
5421 	savepoint = fts_savepoint_create(fts_trx->savepoints, name, heap);
5422 
5423 	if (last_savepoint->tables != NULL) {
5424 		fts_savepoint_copy(last_savepoint, savepoint);
5425 	}
5426 }
5427 
5428 /*********************************************************************//**
5429 Lookup a savepoint instance by name.
5430 @return ULINT_UNDEFINED if not found */
5431 UNIV_INLINE
5432 ulint
fts_savepoint_lookup(ib_vector_t * savepoints,const char * name)5433 fts_savepoint_lookup(
5434 /*==================*/
5435 	ib_vector_t*	savepoints,	/*!< in: savepoints */
5436 	const char*	name)		/*!< in: savepoint name */
5437 {
5438 	ulint			i;
5439 
5440 	ut_a(ib_vector_size(savepoints) > 0);
5441 
5442 	for (i = 1; i < ib_vector_size(savepoints); ++i) {
5443 		fts_savepoint_t*	savepoint;
5444 
5445 		savepoint = static_cast<fts_savepoint_t*>(
5446 			ib_vector_get(savepoints, i));
5447 
5448 		if (strcmp(name, savepoint->name) == 0) {
5449 			return(i);
5450 		}
5451 	}
5452 
5453 	return(ULINT_UNDEFINED);
5454 }
5455 
5456 /*********************************************************************//**
5457 Release the savepoint data identified by  name. All savepoints created
5458 after the named savepoint are kept.
5459 @return DB_SUCCESS or error code */
5460 void
fts_savepoint_release(trx_t * trx,const char * name)5461 fts_savepoint_release(
5462 /*==================*/
5463 	trx_t*		trx,		/*!< in: transaction */
5464 	const char*	name)		/*!< in: savepoint name */
5465 {
5466 	ut_a(name != NULL);
5467 
5468 	ib_vector_t*	savepoints = trx->fts_trx->savepoints;
5469 
5470 	ut_a(ib_vector_size(savepoints) > 0);
5471 
5472 	ulint   i = fts_savepoint_lookup(savepoints, name);
5473 	if (i != ULINT_UNDEFINED) {
5474 		ut_a(i >= 1);
5475 
5476 		fts_savepoint_t*        savepoint;
5477 		savepoint = static_cast<fts_savepoint_t*>(
5478 			ib_vector_get(savepoints, i));
5479 
5480 		if (i == ib_vector_size(savepoints) - 1) {
5481 			/* If the savepoint is the last, we save its
5482 			tables to the  previous savepoint. */
5483 			fts_savepoint_t*	prev_savepoint;
5484 			prev_savepoint = static_cast<fts_savepoint_t*>(
5485 				ib_vector_get(savepoints, i - 1));
5486 
5487 			ib_rbt_t*	tables = savepoint->tables;
5488 			savepoint->tables = prev_savepoint->tables;
5489 			prev_savepoint->tables = tables;
5490 		}
5491 
5492 		fts_savepoint_free(savepoint);
5493 		ib_vector_remove(savepoints, *(void**)savepoint);
5494 
5495 		/* Make sure we don't delete the implied savepoint. */
5496 		ut_a(ib_vector_size(savepoints) > 0);
5497 	}
5498 }
5499 
5500 /**********************************************************************//**
5501 Refresh last statement savepoint. */
5502 void
fts_savepoint_laststmt_refresh(trx_t * trx)5503 fts_savepoint_laststmt_refresh(
5504 /*===========================*/
5505 	trx_t*			trx)	/*!< in: transaction */
5506 {
5507 
5508 	fts_trx_t*              fts_trx;
5509 	fts_savepoint_t*        savepoint;
5510 
5511 	fts_trx = trx->fts_trx;
5512 
5513 	savepoint = static_cast<fts_savepoint_t*>(
5514 		ib_vector_pop(fts_trx->last_stmt));
5515 	fts_savepoint_free(savepoint);
5516 
5517 	ut_ad(ib_vector_is_empty(fts_trx->last_stmt));
5518 	savepoint = fts_savepoint_create(fts_trx->last_stmt, NULL, NULL);
5519 }
5520 
5521 /********************************************************************
5522 Undo the Doc ID add/delete operations in last stmt */
5523 static
5524 void
fts_undo_last_stmt(fts_trx_table_t * s_ftt,fts_trx_table_t * l_ftt)5525 fts_undo_last_stmt(
5526 /*===============*/
5527 	fts_trx_table_t*	s_ftt,	/*!< in: Transaction FTS table */
5528 	fts_trx_table_t*	l_ftt)	/*!< in: last stmt FTS table */
5529 {
5530 	ib_rbt_t*		s_rows;
5531 	ib_rbt_t*		l_rows;
5532 	const ib_rbt_node_t*	node;
5533 
5534 	l_rows = l_ftt->rows;
5535 	s_rows = s_ftt->rows;
5536 
5537 	for (node = rbt_first(l_rows);
5538 	     node;
5539 	     node = rbt_next(l_rows, node)) {
5540 		fts_trx_row_t*	l_row = rbt_value(fts_trx_row_t, node);
5541 		ib_rbt_bound_t	parent;
5542 
5543 		rbt_search(s_rows, &parent, &(l_row->doc_id));
5544 
5545 		if (parent.result == 0) {
5546 			fts_trx_row_t*	s_row = rbt_value(
5547 				fts_trx_row_t, parent.last);
5548 
5549 			switch (l_row->state) {
5550 			case FTS_INSERT:
5551 				ut_free(rbt_remove_node(s_rows, parent.last));
5552 				break;
5553 
5554 			case FTS_DELETE:
5555 				if (s_row->state == FTS_NOTHING) {
5556 					s_row->state = FTS_INSERT;
5557 				} else if (s_row->state == FTS_DELETE) {
5558 					ut_free(rbt_remove_node(
5559 						s_rows, parent.last));
5560 				}
5561 				break;
5562 
5563 			/* FIXME: Check if FTS_MODIFY need to be addressed */
5564 			case FTS_MODIFY:
5565 			case FTS_NOTHING:
5566 				break;
5567 			default:
5568 				ut_error;
5569 			}
5570 		}
5571 	}
5572 }
5573 
5574 /**********************************************************************//**
5575 Rollback to savepoint indentified by name.
5576 @return DB_SUCCESS or error code */
5577 void
fts_savepoint_rollback_last_stmt(trx_t * trx)5578 fts_savepoint_rollback_last_stmt(
5579 /*=============================*/
5580 	trx_t*		trx)		/*!< in: transaction */
5581 {
5582 	ib_vector_t*		savepoints;
5583 	fts_savepoint_t*	savepoint;
5584 	fts_savepoint_t*	last_stmt;
5585 	fts_trx_t*		fts_trx;
5586 	ib_rbt_bound_t		parent;
5587 	const ib_rbt_node_t*    node;
5588 	ib_rbt_t*		l_tables;
5589 	ib_rbt_t*		s_tables;
5590 
5591 	fts_trx = trx->fts_trx;
5592 	savepoints = fts_trx->savepoints;
5593 
5594 	savepoint = static_cast<fts_savepoint_t*>(ib_vector_last(savepoints));
5595 	last_stmt = static_cast<fts_savepoint_t*>(
5596 		ib_vector_last(fts_trx->last_stmt));
5597 
5598 	l_tables = last_stmt->tables;
5599 	s_tables = savepoint->tables;
5600 
5601 	for (node = rbt_first(l_tables);
5602 	     node;
5603 	     node = rbt_next(l_tables, node)) {
5604 
5605 		fts_trx_table_t**	l_ftt;
5606 
5607 		l_ftt = rbt_value(fts_trx_table_t*, node);
5608 
5609 		rbt_search_cmp(
5610 			s_tables, &parent, &(*l_ftt)->table->id,
5611 			fts_trx_table_id_cmp, NULL);
5612 
5613 		if (parent.result == 0) {
5614 			fts_trx_table_t**	s_ftt;
5615 
5616 			s_ftt = rbt_value(fts_trx_table_t*, parent.last);
5617 
5618 			fts_undo_last_stmt(*s_ftt, *l_ftt);
5619 		}
5620 	}
5621 }
5622 
5623 /**********************************************************************//**
5624 Rollback to savepoint indentified by name.
5625 @return DB_SUCCESS or error code */
5626 void
fts_savepoint_rollback(trx_t * trx,const char * name)5627 fts_savepoint_rollback(
5628 /*===================*/
5629 	trx_t*		trx,		/*!< in: transaction */
5630 	const char*	name)		/*!< in: savepoint name */
5631 {
5632 	ulint		i;
5633 	ib_vector_t*	savepoints;
5634 
5635 	ut_a(name != NULL);
5636 
5637 	savepoints = trx->fts_trx->savepoints;
5638 
5639 	/* We pop all savepoints from the the top of the stack up to
5640 	and including the instance that was found. */
5641 	i = fts_savepoint_lookup(savepoints, name);
5642 
5643 	if (i != ULINT_UNDEFINED) {
5644 		fts_savepoint_t*	savepoint;
5645 
5646 		ut_a(i > 0);
5647 
5648 		while (ib_vector_size(savepoints) > i) {
5649 			fts_savepoint_t*	savepoint;
5650 
5651 			savepoint = static_cast<fts_savepoint_t*>(
5652 				ib_vector_pop(savepoints));
5653 
5654 			if (savepoint->name != NULL) {
5655 				/* Since name was allocated on the heap, the
5656 				memory will be released when the transaction
5657 				completes. */
5658 				savepoint->name = NULL;
5659 
5660 				fts_savepoint_free(savepoint);
5661 			}
5662 		}
5663 
5664 		/* Pop all a elements from the top of the stack that may
5665 		have been released. We have to be careful that we don't
5666 		delete the implied savepoint. */
5667 
5668 		for (savepoint = static_cast<fts_savepoint_t*>(
5669 				ib_vector_last(savepoints));
5670 		     ib_vector_size(savepoints) > 1
5671 		     && savepoint->name == NULL;
5672 		     savepoint = static_cast<fts_savepoint_t*>(
5673 				ib_vector_last(savepoints))) {
5674 
5675 			ib_vector_pop(savepoints);
5676 		}
5677 
5678 		/* Make sure we don't delete the implied savepoint. */
5679 		ut_a(ib_vector_size(savepoints) > 0);
5680 
5681 		/* Restore the savepoint. */
5682 		fts_savepoint_take(trx->fts_trx, name);
5683 	}
5684 }
5685 
fts_check_aux_table(const char * name,table_id_t * table_id,index_id_t * index_id)5686 bool fts_check_aux_table(const char *name,
5687                          table_id_t *table_id,
5688                          index_id_t *index_id)
5689 {
5690   ulint len= strlen(name);
5691   const char* ptr;
5692   const char* end= name + len;
5693 
5694   ut_ad(len <= MAX_FULL_NAME_LEN);
5695   ptr= static_cast<const char*>(memchr(name, '/', len));
5696 
5697   if (ptr != NULL)
5698   {
5699     /* We will start the match after the '/' */
5700     ++ptr;
5701     len = end - ptr;
5702   }
5703 
5704   /* All auxiliary tables are prefixed with "FTS_" and the name
5705   length will be at the very least greater than 20 bytes. */
5706   if (ptr && len > 20 && !memcmp(ptr, "FTS_", 4))
5707   {
5708     /* Skip the prefix. */
5709     ptr+= 4;
5710     len-= 4;
5711 
5712     const char *table_id_ptr= ptr;
5713     /* Skip the table id. */
5714     ptr= static_cast<const char*>(memchr(ptr, '_', len));
5715 
5716     if (!ptr)
5717       return false;
5718 
5719     /* Skip the underscore. */
5720     ++ptr;
5721     ut_ad(end > ptr);
5722     len= end - ptr;
5723 
5724     sscanf(table_id_ptr, UINT64PFx, table_id);
5725     /* First search the common table suffix array. */
5726     for (ulint i = 0; fts_common_tables[i]; ++i)
5727     {
5728       if (!strncmp(ptr, fts_common_tables[i], len))
5729         return true;
5730     }
5731 
5732     /* Could be obsolete common tables. */
5733     if ((len == 5 && !memcmp(ptr, "ADDED", len)) ||
5734         (len == 9 && !memcmp(ptr, "STOPWORDS", len)))
5735       return true;
5736 
5737     const char* index_id_ptr= ptr;
5738     /* Skip the index id. */
5739     ptr= static_cast<const char*>(memchr(ptr, '_', len));
5740     if (!ptr)
5741       return false;
5742 
5743     sscanf(index_id_ptr, UINT64PFx, index_id);
5744 
5745     /* Skip the underscore. */
5746     ++ptr;
5747     ut_a(end > ptr);
5748     len= end - ptr;
5749 
5750     if (len > 7)
5751       return false;
5752 
5753     /* Search the FT index specific array. */
5754     for (ulint i = 0; i < FTS_NUM_AUX_INDEX; ++i)
5755     {
5756       if (!memcmp(ptr, "INDEX_", len - 1))
5757         return true;
5758     }
5759 
5760     /* Other FT index specific table(s). */
5761     if (len == 6 && !memcmp(ptr, "DOC_ID", len))
5762       return true;
5763   }
5764 
5765   return false;
5766 }
5767 
5768 typedef std::pair<table_id_t,index_id_t> fts_aux_id;
5769 typedef std::set<fts_aux_id> fts_space_set_t;
5770 
5771 /** Iterate over all the spaces in the space list and fetch the
5772 fts parent table id and index id.
5773 @param[in,out]	fts_space_set	store the list of tablespace id and
5774 				index id */
fil_get_fts_spaces(fts_space_set_t & fts_space_set)5775 static void fil_get_fts_spaces(fts_space_set_t& fts_space_set)
5776 {
5777   mutex_enter(&fil_system.mutex);
5778 
5779   for (fil_space_t *space= UT_LIST_GET_FIRST(fil_system.space_list);
5780        space;
5781        space= UT_LIST_GET_NEXT(space_list, space))
5782   {
5783     index_id_t index_id= 0;
5784     table_id_t table_id= 0;
5785 
5786     if (space->purpose == FIL_TYPE_TABLESPACE
5787         && fts_check_aux_table(space->name, &table_id, &index_id))
5788       fts_space_set.insert(std::make_pair(table_id, index_id));
5789   }
5790 
5791   mutex_exit(&fil_system.mutex);
5792 }
5793 
5794 /** Check whether the parent table id and index id of fts auxilary
5795 tables with SYS_INDEXES. If it exists then we can safely ignore the
5796 fts table from orphaned tables.
5797 @param[in,out]	fts_space_set	fts space set contains set of auxiliary
5798 				table ids */
fts_check_orphaned_tables(fts_space_set_t & fts_space_set)5799 static void fts_check_orphaned_tables(fts_space_set_t& fts_space_set)
5800 {
5801   btr_pcur_t pcur;
5802   mtr_t	     mtr;
5803   trx_t*     trx = trx_create();
5804   trx->op_info = "checking fts orphaned tables";
5805 
5806   row_mysql_lock_data_dictionary(trx);
5807 
5808   mtr.start();
5809   btr_pcur_open_at_index_side(
5810     true, dict_table_get_first_index(dict_sys.sys_indexes),
5811     BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
5812 
5813   do
5814   {
5815     const rec_t *rec;
5816     const byte *tbl_field;
5817     const byte *index_field;
5818     ulint len;
5819 
5820     btr_pcur_move_to_next_user_rec(&pcur, &mtr);
5821     if (!btr_pcur_is_on_user_rec(&pcur))
5822       break;
5823 
5824     rec= btr_pcur_get_rec(&pcur);
5825     if (rec_get_deleted_flag(rec, 0))
5826       continue;
5827 
5828     tbl_field= rec_get_nth_field_old(rec, 0, &len);
5829     if (len != 8)
5830       continue;
5831 
5832     index_field= rec_get_nth_field_old(rec, 1, &len);
5833     if (len != 8)
5834       continue;
5835 
5836     table_id_t table_id = mach_read_from_8(tbl_field);
5837     index_id_t index_id = mach_read_from_8(index_field);
5838 
5839     fts_space_set_t::iterator it = fts_space_set.find(
5840 	fts_aux_id(table_id, index_id));
5841 
5842     if (it != fts_space_set.end())
5843       fts_space_set.erase(*it);
5844     else
5845     {
5846       it= fts_space_set.find(fts_aux_id(table_id, 0));
5847       if (it != fts_space_set.end())
5848         fts_space_set.erase(*it);
5849     }
5850   } while(!fts_space_set.empty());
5851 
5852   btr_pcur_close(&pcur);
5853   mtr.commit();
5854   row_mysql_unlock_data_dictionary(trx);
5855   trx->free();
5856 }
5857 
5858 /** Drop all fts auxilary table for the respective fts_id
5859 @param[in]	fts_id	fts auxilary table ids */
fts_drop_all_aux_tables(trx_t * trx,fts_table_t * fts_table)5860 static void fts_drop_all_aux_tables(trx_t *trx, fts_table_t *fts_table)
5861 {
5862   char fts_table_name[MAX_FULL_NAME_LEN];
5863   for (ulint i= 0;i < FTS_NUM_AUX_INDEX; i++)
5864   {
5865     fts_table->suffix= fts_get_suffix(i);
5866     fts_get_table_name(fts_table, fts_table_name, true);
5867 
5868     /* Drop all fts aux and common table */
5869     dberr_t err= fts_drop_table(trx, fts_table_name);
5870 
5871     if (err == DB_FAIL)
5872     {
5873       char *path= fil_make_filepath(NULL, fts_table_name, IBD, false);
5874 
5875       if (path != NULL)
5876       {
5877         os_file_delete_if_exists(innodb_data_file_key, path , NULL);
5878         ut_free(path);
5879       }
5880     }
5881   }
5882 }
5883 
5884 /** Drop all orphaned FTS auxiliary tables, those that don't have
5885 a parent table or FTS index defined on them. */
fts_drop_orphaned_tables()5886 void fts_drop_orphaned_tables()
5887 {
5888   fts_space_set_t fts_space_set;
5889   fil_get_fts_spaces(fts_space_set);
5890 
5891   if (fts_space_set.empty())
5892     return;
5893 
5894   fts_check_orphaned_tables(fts_space_set);
5895 
5896   if (fts_space_set.empty())
5897     return;
5898 
5899   trx_t* trx= trx_create();
5900   trx->op_info= "Drop orphaned aux FTS tables";
5901   row_mysql_lock_data_dictionary(trx);
5902 
5903   for (fts_space_set_t::iterator it = fts_space_set.begin();
5904        it != fts_space_set.end(); it++)
5905   {
5906     fts_table_t fts_table;
5907     dict_table_t *table= dict_table_open_on_id(it->first, TRUE,
5908                                                DICT_TABLE_OP_NORMAL);
5909     if (!table)
5910       continue;
5911 
5912     FTS_INIT_FTS_TABLE(&fts_table, NULL, FTS_COMMON_TABLE, table);
5913     fts_drop_common_tables(trx, &fts_table, true);
5914 
5915     fts_table.type= FTS_INDEX_TABLE;
5916     fts_table.index_id= it->second;
5917     fts_drop_all_aux_tables(trx, &fts_table);
5918 
5919     dict_table_close(table, true, false);
5920   }
5921   trx_commit_for_mysql(trx);
5922   row_mysql_unlock_data_dictionary(trx);
5923   trx->dict_operation_lock_mode= 0;
5924   trx->free();
5925 }
5926 
5927 /**********************************************************************//**
5928 Check whether user supplied stopword table is of the right format.
5929 Caller is responsible to hold dictionary locks.
5930 @return the stopword column charset if qualifies */
5931 CHARSET_INFO*
fts_valid_stopword_table(const char * stopword_table_name)5932 fts_valid_stopword_table(
5933 /*=====================*/
5934 	const char*	stopword_table_name)	/*!< in: Stopword table
5935 						name */
5936 {
5937 	dict_table_t*	table;
5938 	dict_col_t*     col = NULL;
5939 
5940 	if (!stopword_table_name) {
5941 		return(NULL);
5942 	}
5943 
5944 	table = dict_table_get_low(stopword_table_name);
5945 
5946 	if (!table) {
5947 		ib::error() << "User stopword table " << stopword_table_name
5948 			<< " does not exist.";
5949 
5950 		return(NULL);
5951 	} else {
5952 		const char*     col_name;
5953 
5954 		col_name = dict_table_get_col_name(table, 0);
5955 
5956 		if (ut_strcmp(col_name, "value")) {
5957 			ib::error() << "Invalid column name for stopword"
5958 				" table " << stopword_table_name << ". Its"
5959 				" first column must be named as 'value'.";
5960 
5961 			return(NULL);
5962 		}
5963 
5964 		col = dict_table_get_nth_col(table, 0);
5965 
5966 		if (col->mtype != DATA_VARCHAR
5967 		    && col->mtype != DATA_VARMYSQL) {
5968 			ib::error() << "Invalid column type for stopword"
5969 				" table " << stopword_table_name << ". Its"
5970 				" first column must be of varchar type";
5971 
5972 			return(NULL);
5973 		}
5974 	}
5975 
5976 	ut_ad(col);
5977 
5978 	return(fts_get_charset(col->prtype));
5979 }
5980 
5981 /**********************************************************************//**
5982 This function loads the stopword into the FTS cache. It also
5983 records/fetches stopword configuration to/from FTS configure
5984 table, depending on whether we are creating or reloading the
5985 FTS.
5986 @return true if load operation is successful */
5987 bool
fts_load_stopword(const dict_table_t * table,trx_t * trx,const char * session_stopword_table,bool stopword_is_on,bool reload)5988 fts_load_stopword(
5989 /*==============*/
5990 	const dict_table_t*
5991 			table,			/*!< in: Table with FTS */
5992 	trx_t*		trx,			/*!< in: Transactions */
5993 	const char*	session_stopword_table,	/*!< in: Session stopword table
5994 						name */
5995 	bool		stopword_is_on,		/*!< in: Whether stopword
5996 						option is turned on/off */
5997 	bool		reload)			/*!< in: Whether it is
5998 						for reloading FTS table */
5999 {
6000 	fts_table_t	fts_table;
6001 	fts_string_t	str;
6002 	dberr_t		error = DB_SUCCESS;
6003 	ulint		use_stopword;
6004 	fts_cache_t*	cache;
6005 	const char*	stopword_to_use = NULL;
6006 	ibool		new_trx = FALSE;
6007 	byte		str_buffer[MAX_FULL_NAME_LEN + 1];
6008 
6009 	FTS_INIT_FTS_TABLE(&fts_table, "CONFIG", FTS_COMMON_TABLE, table);
6010 
6011 	cache = table->fts->cache;
6012 
6013 	if (!reload && !(cache->stopword_info.status & STOPWORD_NOT_INIT)) {
6014 		return true;
6015 	}
6016 
6017 	if (!trx) {
6018 		trx = trx_create();
6019 		if (srv_read_only_mode) {
6020 			trx_start_internal_read_only(trx);
6021 		} else {
6022 			trx_start_internal(trx);
6023 		}
6024 		trx->op_info = "upload FTS stopword";
6025 		new_trx = TRUE;
6026 	}
6027 
6028 	/* First check whether stopword filtering is turned off */
6029 	if (reload) {
6030 		error = fts_config_get_ulint(
6031 			trx, &fts_table, FTS_USE_STOPWORD, &use_stopword);
6032 	} else {
6033 		use_stopword = (ulint) stopword_is_on;
6034 
6035 		error = fts_config_set_ulint(
6036 			trx, &fts_table, FTS_USE_STOPWORD, use_stopword);
6037 	}
6038 
6039 	if (error != DB_SUCCESS) {
6040 		goto cleanup;
6041 	}
6042 
6043 	/* If stopword is turned off, no need to continue to load the
6044 	stopword into cache, but still need to do initialization */
6045 	if (!use_stopword) {
6046 		cache->stopword_info.status = STOPWORD_OFF;
6047 		goto cleanup;
6048 	}
6049 
6050 	if (reload) {
6051 		/* Fetch the stopword table name from FTS config
6052 		table */
6053 		str.f_n_char = 0;
6054 		str.f_str = str_buffer;
6055 		str.f_len = sizeof(str_buffer) - 1;
6056 
6057 		error = fts_config_get_value(
6058 			trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
6059 
6060 		if (error != DB_SUCCESS) {
6061 			goto cleanup;
6062 		}
6063 
6064 		if (*str.f_str) {
6065 			stopword_to_use = (const char*) str.f_str;
6066 		}
6067 	} else {
6068 		stopword_to_use = session_stopword_table;
6069 	}
6070 
6071 	if (stopword_to_use
6072 	    && fts_load_user_stopword(table->fts, stopword_to_use,
6073 				      &cache->stopword_info)) {
6074 		/* Save the stopword table name to the configure
6075 		table */
6076 		if (!reload) {
6077 			str.f_n_char = 0;
6078 			str.f_str = (byte*) stopword_to_use;
6079 			str.f_len = ut_strlen(stopword_to_use);
6080 
6081 			error = fts_config_set_value(
6082 				trx, &fts_table, FTS_STOPWORD_TABLE_NAME, &str);
6083 		}
6084 	} else {
6085 		/* Load system default stopword list */
6086 		fts_load_default_stopword(&cache->stopword_info);
6087 	}
6088 
6089 cleanup:
6090 	if (new_trx) {
6091 		if (error == DB_SUCCESS) {
6092 			fts_sql_commit(trx);
6093 		} else {
6094 			fts_sql_rollback(trx);
6095 		}
6096 
6097 		trx->free();
6098 	}
6099 
6100 	if (!cache->stopword_info.cached_stopword) {
6101 		cache->stopword_info.cached_stopword = rbt_create_arg_cmp(
6102 			sizeof(fts_tokenizer_word_t), innobase_fts_text_cmp,
6103 			&my_charset_latin1);
6104 	}
6105 
6106 	return error == DB_SUCCESS;
6107 }
6108 
6109 /**********************************************************************//**
6110 Callback function when we initialize the FTS at the start up
6111 time. It recovers the maximum Doc IDs presented in the current table.
6112 @return: always returns TRUE */
6113 static
6114 ibool
fts_init_get_doc_id(void * row,void * user_arg)6115 fts_init_get_doc_id(
6116 /*================*/
6117 	void*	row,			/*!< in: sel_node_t* */
6118 	void*	user_arg)		/*!< in: fts cache */
6119 {
6120 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
6121 	sel_node_t*	node = static_cast<sel_node_t*>(row);
6122 	que_node_t*	exp = node->select_list;
6123 	fts_cache_t*    cache = static_cast<fts_cache_t*>(user_arg);
6124 
6125 	ut_ad(ib_vector_is_empty(cache->get_docs));
6126 
6127 	/* Copy each indexed column content into doc->text.f_str */
6128 	if (exp) {
6129 		dfield_t*	dfield = que_node_get_val(exp);
6130 		dtype_t*        type = dfield_get_type(dfield);
6131 		void*           data = dfield_get_data(dfield);
6132 
6133 		ut_a(dtype_get_mtype(type) == DATA_INT);
6134 
6135 		doc_id = static_cast<doc_id_t>(mach_read_from_8(
6136 			static_cast<const byte*>(data)));
6137 
6138 		if (doc_id >= cache->next_doc_id) {
6139 			cache->next_doc_id = doc_id + 1;
6140 		}
6141 	}
6142 
6143 	return(TRUE);
6144 }
6145 
6146 /**********************************************************************//**
6147 Callback function when we initialize the FTS at the start up
6148 time. It recovers Doc IDs that have not sync-ed to the auxiliary
6149 table, and require to bring them back into FTS index.
6150 @return: always returns TRUE */
6151 static
6152 ibool
fts_init_recover_doc(void * row,void * user_arg)6153 fts_init_recover_doc(
6154 /*=================*/
6155 	void*	row,			/*!< in: sel_node_t* */
6156 	void*	user_arg)		/*!< in: fts cache */
6157 {
6158 
6159 	fts_doc_t       doc;
6160 	ulint		doc_len = 0;
6161 	ulint		field_no = 0;
6162 	fts_get_doc_t*  get_doc = static_cast<fts_get_doc_t*>(user_arg);
6163 	doc_id_t	doc_id = FTS_NULL_DOC_ID;
6164 	sel_node_t*	node = static_cast<sel_node_t*>(row);
6165 	que_node_t*	exp = node->select_list;
6166 	fts_cache_t*	cache = get_doc->cache;
6167 	st_mysql_ftparser*	parser = get_doc->index_cache->index->parser;
6168 
6169 	fts_doc_init(&doc);
6170 	doc.found = TRUE;
6171 
6172 	ut_ad(cache);
6173 
6174 	/* Copy each indexed column content into doc->text.f_str */
6175 	while (exp) {
6176 		dfield_t*	dfield = que_node_get_val(exp);
6177 		ulint		len = dfield_get_len(dfield);
6178 
6179 		if (field_no == 0) {
6180 			dtype_t*        type = dfield_get_type(dfield);
6181 			void*           data = dfield_get_data(dfield);
6182 
6183 			ut_a(dtype_get_mtype(type) == DATA_INT);
6184 
6185 			doc_id = static_cast<doc_id_t>(mach_read_from_8(
6186 				static_cast<const byte*>(data)));
6187 
6188 			field_no++;
6189 			exp = que_node_get_next(exp);
6190 			continue;
6191 		}
6192 
6193 		if (len == UNIV_SQL_NULL) {
6194 			exp = que_node_get_next(exp);
6195 			continue;
6196 		}
6197 
6198 		ut_ad(get_doc);
6199 
6200 		if (!get_doc->index_cache->charset) {
6201 			get_doc->index_cache->charset = fts_get_charset(
6202 				dfield->type.prtype);
6203 		}
6204 
6205 		doc.charset = get_doc->index_cache->charset;
6206 
6207 		if (dfield_is_ext(dfield)) {
6208 			dict_table_t*	table = cache->sync->table;
6209 
6210 			doc.text.f_str = btr_copy_externally_stored_field(
6211 				&doc.text.f_len,
6212 				static_cast<byte*>(dfield_get_data(dfield)),
6213 				table->space->zip_size(), len,
6214 				static_cast<mem_heap_t*>(doc.self_heap->arg));
6215 		} else {
6216 			doc.text.f_str = static_cast<byte*>(
6217 				dfield_get_data(dfield));
6218 
6219 			doc.text.f_len = len;
6220 		}
6221 
6222 		if (field_no == 1) {
6223 			fts_tokenize_document(&doc, NULL, parser);
6224 		} else {
6225 			fts_tokenize_document_next(&doc, doc_len, NULL, parser);
6226 		}
6227 
6228 		exp = que_node_get_next(exp);
6229 
6230 		doc_len += (exp) ? len + 1 : len;
6231 
6232 		field_no++;
6233 	}
6234 
6235 	fts_cache_add_doc(cache, get_doc->index_cache, doc_id, doc.tokens);
6236 
6237 	fts_doc_free(&doc);
6238 
6239 	cache->added++;
6240 
6241 	if (doc_id >= cache->next_doc_id) {
6242 		cache->next_doc_id = doc_id + 1;
6243 	}
6244 
6245 	return(TRUE);
6246 }
6247 
6248 /**********************************************************************//**
6249 This function brings FTS index in sync when FTS index is first
6250 used. There are documents that have not yet sync-ed to auxiliary
6251 tables from last server abnormally shutdown, we will need to bring
6252 such document into FTS cache before any further operations
6253 @return TRUE if all OK */
6254 ibool
fts_init_index(dict_table_t * table,ibool has_cache_lock)6255 fts_init_index(
6256 /*===========*/
6257 	dict_table_t*	table,		/*!< in: Table with FTS */
6258 	ibool		has_cache_lock)	/*!< in: Whether we already have
6259 					cache lock */
6260 {
6261 	dict_index_t*   index;
6262 	doc_id_t        start_doc;
6263 	fts_get_doc_t*  get_doc = NULL;
6264 	fts_cache_t*    cache = table->fts->cache;
6265 	bool		need_init = false;
6266 
6267 	ut_ad(!mutex_own(&dict_sys.mutex));
6268 
6269 	/* First check cache->get_docs is initialized */
6270 	if (!has_cache_lock) {
6271 		rw_lock_x_lock(&cache->lock);
6272 	}
6273 
6274 	rw_lock_x_lock(&cache->init_lock);
6275 	if (cache->get_docs == NULL) {
6276 		cache->get_docs = fts_get_docs_create(cache);
6277 	}
6278 	rw_lock_x_unlock(&cache->init_lock);
6279 
6280 	if (table->fts->added_synced) {
6281 		goto func_exit;
6282 	}
6283 
6284 	need_init = true;
6285 
6286 	start_doc = cache->synced_doc_id;
6287 
6288 	if (!start_doc) {
6289 		fts_cmp_set_sync_doc_id(table, 0, TRUE, &start_doc);
6290 		cache->synced_doc_id = start_doc;
6291 	}
6292 
6293 	/* No FTS index, this is the case when previous FTS index
6294 	dropped, and we re-initialize the Doc ID system for subsequent
6295 	insertion */
6296 	if (ib_vector_is_empty(cache->get_docs)) {
6297 		index = table->fts_doc_id_index;
6298 
6299 		ut_a(index);
6300 
6301 		fts_doc_fetch_by_doc_id(NULL, start_doc, index,
6302 					FTS_FETCH_DOC_BY_ID_LARGE,
6303 					fts_init_get_doc_id, cache);
6304 	} else {
6305 		if (table->fts->cache->stopword_info.status
6306 		    & STOPWORD_NOT_INIT) {
6307 			fts_load_stopword(table, NULL, NULL, true, true);
6308 		}
6309 
6310 		for (ulint i = 0; i < ib_vector_size(cache->get_docs); ++i) {
6311 			get_doc = static_cast<fts_get_doc_t*>(
6312 				ib_vector_get(cache->get_docs, i));
6313 
6314 			index = get_doc->index_cache->index;
6315 
6316 			fts_doc_fetch_by_doc_id(NULL, start_doc, index,
6317 						FTS_FETCH_DOC_BY_ID_LARGE,
6318 						fts_init_recover_doc, get_doc);
6319 		}
6320 	}
6321 
6322 	table->fts->added_synced = true;
6323 
6324 	fts_get_docs_clear(cache->get_docs);
6325 
6326 func_exit:
6327 	if (!has_cache_lock) {
6328 		rw_lock_x_unlock(&cache->lock);
6329 	}
6330 
6331 	if (need_init) {
6332 		mutex_enter(&dict_sys.mutex);
6333 		/* Register the table with the optimize thread. */
6334 		fts_optimize_add_table(table);
6335 		mutex_exit(&dict_sys.mutex);
6336 	}
6337 
6338 	return(TRUE);
6339 }
6340