1 /***************************************************************************** 2 3 Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved. 4 5 This program is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License, version 2.0, as published by the 7 Free Software Foundation. 8 9 This program is also distributed with certain software (including but not 10 limited to OpenSSL) that is licensed under separate terms, as designated in a 11 particular file or component or in included license documentation. The authors 12 of MySQL hereby grant you an additional permission to link the program and 13 your derivative works with the separately licensed software that they have 14 included with MySQL. 15 16 This program is distributed in the hope that it will be useful, but WITHOUT 17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, 19 for more details. 20 21 You should have received a copy of the GNU General Public License along with 22 this program; if not, write to the Free Software Foundation, Inc., 23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 25 *****************************************************************************/ 26 27 /** @file include/fts0fts.h 28 Full text search header file 29 30 Created 2011/09/02 Sunny Bains 31 ***********************************************************************/ 32 33 #ifndef fts0fts_h 34 #define fts0fts_h 35 36 #include "ha_prototypes.h" 37 38 #include "data0type.h" 39 #include "data0types.h" 40 #include "dict0types.h" 41 #include "ft_global.h" 42 #include "hash0hash.h" 43 #include "mem0mem.h" 44 #include "mysql/plugin_ftparser.h" 45 #include "que0types.h" 46 #include "rem0types.h" 47 #include "row0types.h" 48 #include "trx0types.h" 49 #include "ut0rbt.h" 50 #include "ut0vec.h" 51 #include "ut0wqueue.h" 52 53 /** "NULL" value of a document id. */ 54 #define FTS_NULL_DOC_ID 0 55 56 /** FTS hidden column that is used to map to and from the row */ 57 #define FTS_DOC_ID_COL_NAME "FTS_DOC_ID" 58 59 /** The name of the index created by FTS */ 60 #define FTS_DOC_ID_INDEX_NAME "FTS_DOC_ID_INDEX" 61 62 #define FTS_DOC_ID_INDEX_NAME_LEN 16 63 64 /** Doc ID is a 8 byte value */ 65 #define FTS_DOC_ID_LEN 8 66 67 /** The number of fields to sort when we build FT index with 68 FIC. Three fields are sort: (word, doc_id, position) */ 69 #define FTS_NUM_FIELDS_SORT 3 70 71 /** Maximum number of rows in a table, smaller than which, we will 72 optimize using a 4 byte Doc ID for FIC merge sort to reduce sort size */ 73 #define MAX_DOC_ID_OPT_VAL 1073741824 74 75 /** Document id type. */ 76 typedef ib_uint64_t doc_id_t; 77 78 /** doc_id_t printf format */ 79 #define FTS_DOC_ID_FORMAT IB_ID_FMT 80 81 /** Convert document id to the InnoDB (BIG ENDIAN) storage format. */ 82 #define fts_write_doc_id(d, s) mach_write_to_8(d, s) 83 84 /** Read a document id to internal format. */ 85 #define fts_read_doc_id(s) mach_read_from_8(s) 86 87 /** Bind the doc id to a variable */ 88 #define fts_bind_doc_id(i, n, v) pars_info_bind_int8_literal(i, n, v) 89 90 /** Defines for FTS query mode, they have the same values as 91 those defined in mysql file ft_global.h */ 92 #define FTS_NL 0 93 #define FTS_BOOL 1 94 #define FTS_SORTED 2 95 #define FTS_EXPAND 4 96 #define FTS_NO_RANKING 8 97 #define FTS_PROXIMITY 16 98 #define FTS_PHRASE 32 99 #define FTS_OPT_RANKING 64 100 101 #define FTS_INDEX_TABLE_IND_NAME "FTS_INDEX_TABLE_IND" 102 #define FTS_COMMON_TABLE_IND_NAME "FTS_COMMON_TABLE_IND" 103 104 /** The number of FTS index partitions for a fulltext idnex */ 105 #define FTS_NUM_AUX_INDEX 6 106 107 /** The number of FTS AUX common table for a fulltext idnex */ 108 #define FTS_NUM_AUX_COMMON 5 109 110 /** Threshold where our optimize thread automatically kicks in */ 111 #define FTS_OPTIMIZE_THRESHOLD 10000000 112 113 /** Threshold to avoid exhausting of doc ids. Consecutive doc id difference 114 should not exceed FTS_DOC_ID_MAX_STEP */ 115 #define FTS_DOC_ID_MAX_STEP 65535 116 117 /** Maximum possible Fulltext word length */ 118 #define FTS_MAX_WORD_LEN HA_FT_MAXBYTELEN 119 120 /** Maximum possible Fulltext word length (in characters) */ 121 #define FTS_MAX_WORD_LEN_IN_CHAR HA_FT_MAXCHARLEN 122 123 /** Number of columns in FTS AUX Tables */ 124 #define FTS_DELETED_TABLE_NUM_COLS 1 125 #define FTS_CONFIG_TABLE_NUM_COLS 2 126 #define FTS_AUX_INDEX_TABLE_NUM_COLS 5 127 128 /** DELETED_TABLE(doc_id BIGINT UNSIGNED) */ 129 #define FTS_DELETED_TABLE_COL_LEN 8 130 /** CONFIG_TABLE(key CHAR(50), value CHAR(200)) */ 131 #define FTS_CONFIG_TABLE_KEY_COL_LEN 50 132 #define FTS_CONFIG_TABLE_VALUE_COL_LEN 200 133 134 #define FTS_INDEX_WORD_LEN FTS_MAX_WORD_LEN 135 #define FTS_INDEX_FIRST_DOC_ID_LEN 8 136 #define FTS_INDEX_LAST_DOC_ID_LEN 8 137 #define FTS_INDEX_DOC_COUNT_LEN 4 138 /* BLOB COLUMN, 0 means VARIABLE SIZE */ 139 #define FTS_INDEX_ILIST_LEN 0 140 141 extern const char *FTS_PREFIX; 142 extern const char *FTS_SUFFIX_BEING_DELETED; 143 extern const char *FTS_SUFFIX_BEING_DELETED_CACHE; 144 extern const char *FTS_SUFFIX_CONFIG; 145 extern const char *FTS_SUFFIX_DELETED; 146 extern const char *FTS_SUFFIX_DELETED_CACHE; 147 148 extern const char *FTS_PREFIX_5_7; 149 extern const char *FTS_SUFFIX_CONFIG_5_7; 150 151 /** Variable specifying the FTS parallel sort degree */ 152 extern ulong fts_sort_pll_degree; 153 154 /** Variable specifying the number of word to optimize for each optimize table 155 call */ 156 extern ulong fts_num_word_optimize; 157 158 /** Variable specifying whether we do additional FTS diagnostic printout 159 in the log */ 160 extern bool fts_enable_diag_print; 161 162 /** FTS rank type, which will be between 0 .. 1 inclusive */ 163 typedef float fts_rank_t; 164 165 /** Structure to manage FTS AUX table name and MDL during its drop */ 166 struct aux_name_vec_t { 167 /** AUX table name */ 168 std::vector<char *> aux_name; 169 }; 170 171 /** Type of a row during a transaction. FTS_NOTHING means the row can be 172 forgotten from the FTS system's POV, FTS_INVALID is an internal value used 173 to mark invalid states. 174 175 NOTE: Do not change the order or value of these, fts_trx_row_get_new_state 176 depends on them being exactly as they are. */ 177 enum fts_row_state { 178 FTS_INSERT = 0, 179 FTS_MODIFY, 180 FTS_DELETE, 181 FTS_NOTHING, 182 FTS_INVALID 183 }; 184 185 /** The FTS table types. */ 186 enum fts_table_type_t { 187 FTS_INDEX_TABLE, /*!< FTS auxiliary table that is 188 specific to a particular FTS index 189 on a table */ 190 191 FTS_COMMON_TABLE, /*!< FTS auxiliary table that is common 192 for all FTS index on a table */ 193 194 FTS_OBSOLETED_TABLE /*!< FTS obsoleted tables like DOC_ID, 195 ADDED, STOPWORDS */ 196 }; 197 198 struct fts_doc_t; 199 struct fts_cache_t; 200 struct fts_token_t; 201 struct fts_doc_ids_t; 202 struct fts_index_cache_t; 203 204 /** Initialize the "fts_table" for internal query into FTS auxiliary 205 tables */ 206 #define FTS_INIT_FTS_TABLE(fts_table, m_suffix, m_type, m_table) \ 207 do { \ 208 (fts_table)->suffix = m_suffix; \ 209 (fts_table)->type = m_type; \ 210 (fts_table)->table_id = m_table->id; \ 211 (fts_table)->parent = m_table->name.m_name; \ 212 (fts_table)->table = m_table; \ 213 } while (0); 214 215 #define FTS_INIT_INDEX_TABLE(fts_table, m_suffix, m_type, m_index) \ 216 do { \ 217 (fts_table)->suffix = m_suffix; \ 218 (fts_table)->type = m_type; \ 219 (fts_table)->table_id = m_index->table->id; \ 220 (fts_table)->parent = m_index->table->name.m_name; \ 221 (fts_table)->table = m_index->table; \ 222 (fts_table)->index_id = m_index->id; \ 223 } while (0); 224 225 /** Information about changes in a single transaction affecting 226 the FTS system. */ 227 struct fts_trx_t { 228 trx_t *trx; /*!< InnoDB transaction */ 229 230 ib_vector_t *savepoints; /*!< Active savepoints, must have at 231 least one element, the implied 232 savepoint */ 233 ib_vector_t *last_stmt; /*!< last_stmt */ 234 235 mem_heap_t *heap; /*!< heap */ 236 }; 237 238 /** Information required for transaction savepoint handling. */ 239 struct fts_savepoint_t { 240 char *name; /*!< First entry is always NULL, the 241 default instance. Otherwise the name 242 of the savepoint */ 243 244 ib_rbt_t *tables; /*!< Modified FTS tables */ 245 }; 246 247 /** Information about changed rows in a transaction for a single table. */ 248 struct fts_trx_table_t { 249 dict_table_t *table; /*!< table */ 250 251 fts_trx_t *fts_trx; /*!< link to parent */ 252 253 ib_rbt_t *rows; /*!< rows changed; indexed by doc-id, 254 cells are fts_trx_row_t* */ 255 256 fts_doc_ids_t *added_doc_ids; /*!< list of added doc ids (NULL until 257 the first addition) */ 258 259 /*!< for adding doc ids */ 260 que_t *docs_added_graph; 261 }; 262 263 /** Information about one changed row in a transaction. */ 264 struct fts_trx_row_t { 265 doc_id_t doc_id; /*!< Id of the ins/upd/del document */ 266 267 fts_row_state state; /*!< state of the row */ 268 269 ib_vector_t *fts_indexes; /*!< The indexes that are affected */ 270 }; 271 272 /** List of document ids that were added during a transaction. This 273 list is passed on to a background 'Add' thread and OPTIMIZE, so it 274 needs its own memory heap. */ 275 struct fts_doc_ids_t { 276 ib_vector_t *doc_ids; /*!< document ids (each element is 277 of type doc_id_t). */ 278 279 ib_alloc_t *self_heap; /*!< Allocator used to create an 280 instance of this type and the 281 doc_ids vector */ 282 }; 283 284 // FIXME: Get rid of this if possible. 285 /** Since MySQL's character set support for Unicode is woefully inadequate 286 (it supports basic operations like isalpha etc. only for 8-bit characters), 287 we have to implement our own. We use UTF-16 without surrogate processing 288 as our in-memory format. This typedef is a single such character. */ 289 typedef unsigned short ib_uc_t; 290 291 /** An UTF-16 ro UTF-8 string. */ 292 struct fts_string_t { 293 byte *f_str; /*!< string, not necessary terminated in 294 any way */ 295 ulint f_len; /*!< Length of the string in bytes */ 296 ulint f_n_char; /*!< Number of characters */ 297 }; 298 299 /** Query ranked doc ids. */ 300 struct fts_ranking_t { 301 doc_id_t doc_id; /*!< Document id */ 302 303 fts_rank_t rank; /*!< Rank is between 0 .. 1 */ 304 305 byte *words; /*!< this contains the words 306 that were queried 307 and found in this document */ 308 ulint words_len; /*!< words len */ 309 }; 310 311 /** Query result. */ 312 struct fts_result_t { 313 ib_rbt_node_t *current; /*!< Current element */ 314 315 ib_rbt_t *rankings_by_id; /*!< RB tree of type fts_ranking_t 316 indexed by doc id */ 317 ib_rbt_t *rankings_by_rank; /*!< RB tree of type fts_ranking_t 318 indexed by rank */ 319 }; 320 321 /** This is used to generate the FTS auxiliary table name, we need the 322 table id and the index id to generate the column specific FTS auxiliary 323 table name. */ 324 struct fts_table_t { 325 const char *parent; /*!< Parent table name, this is 326 required only for the database 327 name */ 328 329 fts_table_type_t type; /*!< The auxiliary table type */ 330 331 table_id_t table_id; /*!< The table id */ 332 333 space_index_t index_id; /*!< The index id */ 334 335 const char *suffix; /*!< The suffix of the fts auxiliary 336 table name, can be NULL, not used 337 everywhere (yet) */ 338 const dict_table_t *table; /*!< Parent table */ 339 CHARSET_INFO *charset; /*!< charset info if it is for FTS 340 index auxiliary table */ 341 }; 342 343 enum fts_status { 344 BG_THREAD_STOP = 1, /*!< TRUE if the FTS background thread 345 has finished reading the ADDED table, 346 meaning more items can be added to 347 the table. */ 348 349 BG_THREAD_READY = 2, /*!< TRUE if the FTS background thread 350 is ready */ 351 352 ADD_THREAD_STARTED = 4, /*!< TRUE if the FTS add thread 353 has started */ 354 355 ADDED_TABLE_SYNCED = 8, /*!< TRUE if the ADDED table record is 356 sync-ed after crash recovery */ 357 }; 358 359 typedef enum fts_status fts_status_t; 360 361 /** The state of the FTS sub system. */ 362 class fts_t { 363 public: 364 /** fts_t constructor. 365 @param[in] table table with FTS indexes 366 @param[in,out] heap memory heap where 'this' is stored */ 367 fts_t(dict_table_t *table, mem_heap_t *heap); 368 369 /** fts_t destructor. */ 370 ~fts_t(); 371 372 /** Mutex protecting bg_threads* and fts_add_wq. */ 373 ib_mutex_t bg_threads_mutex; 374 375 /** Number of background threads accessing this table. */ 376 ulint bg_threads; 377 378 /** Status bit regarding fts running state. TRUE if background 379 threads running should stop themselves. */ 380 ulint fts_status; 381 382 /** Work queue for scheduling jobs for the FTS 'Add' thread, or NULL 383 if the thread has not yet been created. Each work item is a 384 fts_trx_doc_ids_t*. */ 385 ib_wqueue_t *add_wq; 386 387 /** FTS memory buffer for this table, or NULL if the table has no FTS 388 index. */ 389 fts_cache_t *cache; 390 391 /** FTS doc id hidden column number in the CLUSTERED index. */ 392 ulint doc_col; 393 394 /** Vector of FTS indexes, this is mainly for caching purposes. */ 395 ib_vector_t *indexes; 396 397 /** Heap for fts_t allocation. */ 398 mem_heap_t *fts_heap; 399 }; 400 401 struct fts_stopword_t; 402 403 /** status bits for fts_stopword_t status field. */ 404 #define STOPWORD_NOT_INIT 0x1 405 #define STOPWORD_OFF 0x2 406 #define STOPWORD_FROM_DEFAULT 0x4 407 #define STOPWORD_USER_TABLE 0x8 408 409 extern const char *fts_default_stopword[]; 410 411 /** Variable specifying the maximum FTS cache size for each table */ 412 extern ulong fts_max_cache_size; 413 414 /** Variable specifying the total memory allocated for FTS cache */ 415 extern ulong fts_max_total_cache_size; 416 417 /** Variable specifying the FTS result cache limit for each query */ 418 extern ulong fts_result_cache_limit; 419 420 /** Variable specifying the maximum FTS max token size */ 421 extern ulong fts_max_token_size; 422 423 /** Variable specifying the minimum FTS max token size */ 424 extern ulong fts_min_token_size; 425 426 /** Whether the total memory used for FTS cache is exhausted, and we will 427 need a sync to free some memory */ 428 extern bool fts_need_sync; 429 430 /** Variable specifying the table that has Fulltext index to display its 431 content through information schema table */ 432 extern char *fts_internal_tbl_name; 433 434 #define fts_que_graph_free(graph) \ 435 do { \ 436 que_graph_free(graph); \ 437 } while (0) 438 439 /** Create a FTS cache. */ 440 fts_cache_t *fts_cache_create( 441 dict_table_t *table); /*!< table owns the FTS cache */ 442 443 /** Create a FTS index cache. 444 @return Index Cache */ 445 fts_index_cache_t *fts_cache_index_cache_create( 446 dict_table_t *table, /*!< in: table with FTS index */ 447 dict_index_t *index); /*!< in: FTS index */ 448 449 /** Remove a FTS index cache 450 @param[in] table table with FTS index 451 @param[in] index FTS index */ 452 void fts_cache_index_cache_remove(dict_table_t *table, dict_index_t *index); 453 454 /** Get the next available document id. This function creates a new 455 transaction to generate the document id. 456 @return DB_SUCCESS if OK */ 457 dberr_t fts_get_next_doc_id(const dict_table_t *table, /*!< in: table */ 458 doc_id_t *doc_id); /*!< out: new document id */ 459 /** Update the next and last Doc ID in the CONFIG table to be the input 460 "doc_id" value (+ 1). We would do so after each FTS index build or 461 table truncate */ 462 void fts_update_next_doc_id( 463 trx_t *trx, /*!< in/out: transaction */ 464 const dict_table_t *table, /*!< in: table */ 465 const char *table_name, /*!< in: table name, or NULL */ 466 doc_id_t doc_id); /*!< in: DOC ID to set */ 467 468 /** Create a new document id . 469 @return DB_SUCCESS if all went well else error */ 470 dberr_t fts_create_doc_id(dict_table_t *table, /*!< in: row is of this 471 table. */ 472 dtuple_t *row, /*!< in/out: add doc id 473 value to this row. This is the 474 current row that is being 475 inserted. */ 476 mem_heap_t *heap); /*!< in: heap */ 477 478 /** Create a new fts_doc_ids_t. 479 @return new fts_doc_ids_t. */ 480 fts_doc_ids_t *fts_doc_ids_create(void); 481 482 /** Free a fts_doc_ids_t. */ 483 void fts_doc_ids_free(fts_doc_ids_t *doc_ids); /*!< in: doc_ids to free */ 484 485 /** Notify the FTS system about an operation on an FTS-indexed table. */ 486 void fts_trx_add_op(trx_t *trx, /*!< in: InnoDB transaction */ 487 dict_table_t *table, /*!< in: table */ 488 doc_id_t doc_id, /*!< in: doc id */ 489 fts_row_state state, /*!< in: state of the row */ 490 ib_vector_t *fts_indexes); /*!< in: FTS indexes affected 491 (NULL=all) */ 492 493 /** Free an FTS trx. */ 494 void fts_trx_free(fts_trx_t *fts_trx); /*!< in, own: FTS trx */ 495 496 /** Check if common tables already exist 497 @param[in] table table with fts index 498 @return true on success, false on failure */ 499 bool fts_check_common_tables_exist(const dict_table_t *table); 500 501 /** Creates the common auxiliary tables needed for supporting an FTS index 502 on the given table. row_mysql_lock_data_dictionary must have been called 503 before this. 504 The following tables are created. 505 CREATE TABLE $FTS_PREFIX_DELETED 506 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id) 507 CREATE TABLE $FTS_PREFIX_DELETED_CACHE 508 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id) 509 CREATE TABLE $FTS_PREFIX_BEING_DELETED 510 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id) 511 CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE 512 (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id) 513 CREATE TABLE $FTS_PREFIX_CONFIG 514 (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key) 515 @param[in,out] trx transaction 516 @param[in] table table with FTS index 517 @param[in] name table name normalized 518 @param[in] skip_doc_id_index Skip index on doc id 519 @return DB_SUCCESS if succeed */ 520 dberr_t fts_create_common_tables(trx_t *trx, const dict_table_t *table, 521 const char *name, bool skip_doc_id_index) 522 MY_ATTRIBUTE((warn_unused_result)); 523 524 /** Creates the column specific ancillary tables needed for supporting an 525 FTS index on the given table. row_mysql_lock_data_dictionary must have 526 been called before this. 527 528 All FTS AUX Index tables have the following schema. 529 CREAT TABLE $FTS_PREFIX_INDEX_[1-6]( 530 word VARCHAR(FTS_MAX_WORD_LEN), 531 first_doc_id INT NOT NULL, 532 last_doc_id UNSIGNED NOT NULL, 533 doc_count UNSIGNED INT NOT NULL, 534 ilist VARBINARY NOT NULL, 535 UNIQUE CLUSTERED INDEX ON (word, first_doc_id)) 536 @param[in,out] trx transaction 537 @param[in] index index instance 538 @return DB_SUCCESS or error code */ 539 dberr_t fts_create_index_tables(trx_t *trx, dict_index_t *index) 540 MY_ATTRIBUTE((warn_unused_result)); 541 542 /** Create auxiliary index tables for an FTS index. 543 @param[in,out] trx transaction 544 @param[in] index the index instance 545 @param[in] table_name table name 546 @param[in] table_id the table id 547 @return DB_SUCCESS or error code */ 548 dberr_t fts_create_index_tables_low(trx_t *trx, dict_index_t *index, 549 const char *table_name, table_id_t table_id) 550 MY_ATTRIBUTE((warn_unused_result)); 551 552 /** Add the FTS document id hidden column. */ 553 void fts_add_doc_id_column( 554 dict_table_t *table, /*!< in/out: Table with FTS index */ 555 mem_heap_t *heap); /*!< in: temporary memory heap, or NULL */ 556 557 /** Drops the ancillary tables needed for supporting an FTS index on a 558 given table. row_mysql_lock_data_dictionary must have been called before 559 this. 560 @param[in,out] trx transaction 561 @param[in] table table has the fts index 562 @param[in,out] aux_vec fts aux table name vector 563 @return DB_SUCCESS or error code */ 564 dberr_t fts_drop_tables(trx_t *trx, dict_table_t *table, 565 aux_name_vec_t *aux_vec); 566 567 /** Lock all FTS AUX tables (for dropping table) 568 @param[in] thd thread locking the AUX table 569 @param[in] table table has the fts index 570 @return DB_SUCCESS or error code */ 571 dberr_t fts_lock_all_aux_tables(THD *thd, dict_table_t *table); 572 573 /** Drop FTS AUX table DD table objects in vector 574 @param[in] aux_vec aux table name vector 575 @param[in] file_per_table whether file per table 576 @return true on success, false on failure. */ 577 bool fts_drop_dd_tables(const aux_name_vec_t *aux_vec, bool file_per_table); 578 579 /** Free FTS AUX table names in vector 580 @param[in] aux_vec aux table name vector 581 */ 582 void fts_free_aux_names(aux_name_vec_t *aux_vec); 583 584 /** The given transaction is about to be committed; do whatever is necessary 585 from the FTS system's POV. 586 @return DB_SUCCESS or error code */ 587 dberr_t fts_commit(trx_t *trx) /*!< in: transaction */ 588 MY_ATTRIBUTE((warn_unused_result)); 589 590 /** FTS Query entry point. 591 @param[in] trx transaction 592 @param[in] index fts index to search 593 @param[in] flags FTS search mode 594 @param[in] query_str FTS query 595 @param[in] query_len FTS query string len in bytes 596 @param[in,out] result result doc ids 597 @param[in] limit limit value 598 @return DB_SUCCESS if successful otherwise error code */ 599 dberr_t fts_query(trx_t *trx, dict_index_t *index, uint flags, 600 const byte *query_str, ulint query_len, fts_result_t **result, 601 ulonglong limit) MY_ATTRIBUTE((warn_unused_result)); 602 603 /** Retrieve the FTS Relevance Ranking result for doc with doc_id 604 @return the relevance ranking value. */ 605 float fts_retrieve_ranking( 606 fts_result_t *result, /*!< in: FTS result structure */ 607 doc_id_t doc_id); /*!< in: the interested document 608 doc_id */ 609 610 /** FTS Query sort result, returned by fts_query() on fts_ranking_t::rank. */ 611 void fts_query_sort_result_on_rank(fts_result_t *result); /*!< out: result 612 instance to sort.*/ 613 614 /** FTS Query free result, returned by fts_query(). */ 615 void fts_query_free_result(fts_result_t *result); /*!< in: result instance 616 to free.*/ 617 618 /** Extract the doc id from the FTS hidden column. */ 619 doc_id_t fts_get_doc_id_from_row(dict_table_t *table, /*!< in: table */ 620 dtuple_t *row); /*!< in: row whose FTS doc id 621 we want to extract.*/ 622 623 /** Extract the doc id from the record that belongs to index. 624 @param[in] table table 625 @param[in] rec record contains FTS_DOC_ID 626 @param[in] index index of rec 627 @param[in] heap heap memory 628 @return doc id that was extracted from rec */ 629 doc_id_t fts_get_doc_id_from_rec(dict_table_t *table, const rec_t *rec, 630 const dict_index_t *index, mem_heap_t *heap); 631 632 /** Add new fts doc id to the update vector. 633 @param[in] table the table that contains the FTS index. 634 @param[in,out] ufield the fts doc id field in the update vector. 635 No new memory is allocated for this in this 636 function. 637 @param[in,out] next_doc_id the fts doc id that has been added to the 638 update vector. If 0, a new fts doc id is 639 automatically generated. The memory provided 640 for this argument will be used by the update 641 vector. Ensure that the life time of this 642 memory matches that of the update vector. 643 @return the fts doc id used in the update vector */ 644 doc_id_t fts_update_doc_id(dict_table_t *table, upd_field_t *ufield, 645 doc_id_t *next_doc_id); 646 647 /** FTS initialize. */ 648 void fts_startup(void); 649 650 #if 0 // TODO: Enable this in WL#6608 651 /******************************************************************//** 652 Signal FTS threads to initiate shutdown. */ 653 void 654 fts_start_shutdown( 655 dict_table_t* table, /*!< in: table with FTS 656 indexes */ 657 fts_t* fts); /*!< in: fts instance to 658 shutdown */ 659 660 /******************************************************************//** 661 Wait for FTS threads to shutdown. */ 662 void 663 fts_shutdown( 664 dict_table_t* table, /*!< in: table with FTS 665 indexes */ 666 fts_t* fts); /*!< in: fts instance to 667 shutdown */ 668 #endif 669 670 /** Create an instance of fts_t. 671 @return instance of fts_t */ 672 fts_t *fts_create(dict_table_t *table); /*!< out: table with FTS 673 indexes */ 674 675 /** Free the FTS resources. */ 676 void fts_free(dict_table_t *table); /*!< in/out: table with 677 FTS indexes */ 678 679 /** Run OPTIMIZE on the given table. 680 @return DB_SUCCESS if all OK */ 681 dberr_t fts_optimize_table(dict_table_t *table); /*!< in: table to optimiza */ 682 683 /** Startup the optimize thread and create the work queue. */ 684 void fts_optimize_init(void); 685 686 /** Since we do a horizontal split on the index table, we need to drop 687 all the split tables. 688 @param[in] trx transaction 689 @param[in] index fts index 690 @param[out] aux_vec dropped table name vector 691 @return DB_SUCCESS or error code */ 692 dberr_t fts_drop_index_tables(trx_t *trx, dict_index_t *index, 693 aux_name_vec_t *aux_vec); 694 695 /** Empty all common talbes. 696 @param[in,out] trx transaction 697 @param[in] table dict table 698 @return DB_SUCCESS or error code. */ 699 dberr_t fts_empty_common_tables(trx_t *trx, dict_table_t *table); 700 701 /** Remove the table from the OPTIMIZER's list. We do wait for 702 acknowledgement from the consumer of the message. */ 703 void fts_optimize_remove_table(dict_table_t *table); /*!< in: table to remove */ 704 705 /** Shutdown fts optimize thread. */ 706 void fts_optimize_shutdown(); 707 708 /** Send sync fts cache for the table. 709 @param[in] table table to sync */ 710 void fts_optimize_request_sync_table(dict_table_t *table); 711 712 /** Take a FTS savepoint. */ 713 void fts_savepoint_take(trx_t *trx, /*!< in: transaction */ 714 fts_trx_t *fts_trx, /*!< in: fts transaction */ 715 const char *name); /*!< in: savepoint name */ 716 717 /** Refresh last statement savepoint. */ 718 void fts_savepoint_laststmt_refresh(trx_t *trx); /*!< in: transaction */ 719 720 /** Release the savepoint data identified by name. */ 721 void fts_savepoint_release(trx_t *trx, /*!< in: transaction */ 722 const char *name); /*!< in: savepoint name */ 723 724 /** Clear cache. 725 @param[in,out] cache fts cache */ 726 void fts_cache_clear(fts_cache_t *cache); 727 728 /** Initialize things in cache. */ 729 void fts_cache_init(fts_cache_t *cache); /*!< in: cache */ 730 731 /** Rollback to and including savepoint indentified by name. */ 732 void fts_savepoint_rollback(trx_t *trx, /*!< in: transaction */ 733 const char *name); /*!< in: savepoint name */ 734 735 /** Rollback to and including savepoint indentified by name. */ 736 void fts_savepoint_rollback_last_stmt(trx_t *trx); /*!< in: transaction */ 737 738 /* Get parent table name if it's a fts aux table 739 @param[in] aux_table_name aux table name 740 @param[in] aux_table_len aux table length 741 @return parent table name, or NULL */ 742 char *fts_get_parent_table_name(const char *aux_table_name, 743 ulint aux_table_len); 744 745 /** Run SYNC on the table, i.e., write out data from the cache to the 746 FTS auxiliary INDEX table and clear the cache at the end. 747 @param[in,out] table fts table 748 @param[in] unlock_cache whether unlock cache when write node 749 @param[in] wait whether wait for existing sync to finish 750 @param[in] has_dict whether has dict operation lock 751 @return DB_SUCCESS on success, error code on failure. */ 752 dberr_t fts_sync_table(dict_table_t *table, bool unlock_cache, bool wait, 753 bool has_dict); 754 755 /** Create an FTS index cache. */ 756 CHARSET_INFO *fts_index_get_charset(dict_index_t *index); /*!< in: FTS index */ 757 758 /** Get the initial Doc ID by consulting the CONFIG table 759 @return initial Doc ID */ 760 doc_id_t fts_init_doc_id(const dict_table_t *table); /*!< in: table */ 761 762 /** compare two character string according to their charset. */ 763 extern int innobase_fts_text_cmp(const void *cs, /*!< in: Character set */ 764 const void *p1, /*!< in: key */ 765 const void *p2); /*!< in: node */ 766 767 /** Makes all characters in a string lower case. */ 768 extern size_t innobase_fts_casedn_str( 769 CHARSET_INFO *cs, /*!< in: Character set */ 770 char *src, /*!< in: string to put in 771 lower case */ 772 size_t src_len, /*!< in: input string length */ 773 char *dst, /*!< in: buffer for result 774 string */ 775 size_t dst_len); /*!< in: buffer size */ 776 777 /** compare two character string according to their charset. */ 778 extern int innobase_fts_text_cmp_prefix( 779 const void *cs, /*!< in: Character set */ 780 const void *p1, /*!< in: key */ 781 const void *p2); /*!< in: node */ 782 783 /** Get the next token from the given string and store it in *token. */ 784 extern ulint innobase_mysql_fts_get_token( 785 CHARSET_INFO *charset, /*!< in: Character set */ 786 const byte *start, /*!< in: start of text */ 787 const byte *end, /*!< in: one character past 788 end of text */ 789 fts_string_t *token); /*!< out: token's text */ 790 791 /** Drop dd table & tablespace for fts aux table 792 @param[in] name table name 793 @param[in] file_per_table flag whether use file per table 794 @return true on success, false on failure. */ 795 bool innobase_fts_drop_dd_table(const char *name, bool file_per_table); 796 797 /** Get token char size by charset 798 @return the number of token char size */ 799 ulint fts_get_token_size(const CHARSET_INFO *cs, /*!< in: Character set */ 800 const char *token, /*!< in: token */ 801 ulint len); /*!< in: token length */ 802 803 /** FULLTEXT tokenizer internal in MYSQL_FTPARSER_SIMPLE_MODE 804 @return 0 if tokenize sucessfully */ 805 int fts_tokenize_document_internal( 806 MYSQL_FTPARSER_PARAM *param, /*!< in: parser parameter */ 807 char *doc, /*!< in: document to tokenize */ 808 int len); /*!< in: document length */ 809 810 /** Fetch COUNT(*) from specified table. 811 @return the number of rows in the table */ 812 ulint fts_get_rows_count(fts_table_t *fts_table); /*!< in: fts table to read */ 813 814 /** Get maximum Doc ID in a table if index "FTS_DOC_ID_INDEX" exists 815 @return max Doc ID or 0 if index "FTS_DOC_ID_INDEX" does not exist */ 816 doc_id_t fts_get_max_doc_id(dict_table_t *table); /*!< in: user table */ 817 818 /** Check whether user supplied stopword table exists and is of 819 the right format. 820 @return the stopword column charset if qualifies */ 821 CHARSET_INFO *fts_valid_stopword_table( 822 const char *stopword_table_name); /*!< in: Stopword table 823 name */ 824 /** This function loads specified stopword into FTS cache 825 @return true if success */ 826 ibool fts_load_stopword( 827 const dict_table_t *table, /*!< in: Table with FTS */ 828 trx_t *trx, /*!< in: Transaction */ 829 const char *global_stopword_table, /*!< in: Global stopword table 830 name */ 831 const char *session_stopword_table, /*!< in: Session stopword table 832 name */ 833 ibool stopword_is_on, /*!< in: Whether stopword 834 option is turned on/off */ 835 ibool reload); /*!< in: Whether it is during 836 reload of FTS table */ 837 838 /** Read the rows from the FTS index 839 @return DB_SUCCESS if OK */ 840 dberr_t fts_table_fetch_doc_ids(trx_t *trx, /*!< in: transaction */ 841 fts_table_t *fts_table, /*!< in: aux table */ 842 fts_doc_ids_t *doc_ids); /*!< in: For collecting 843 doc ids */ 844 /** This function brings FTS index in sync when FTS index is first 845 used. There are documents that have not yet sync-ed to auxiliary 846 tables from last server abnormally shutdown, we will need to bring 847 such document into FTS cache before any further operations 848 @return true if all OK */ 849 ibool fts_init_index(dict_table_t *table, /*!< in: Table with FTS */ 850 ibool has_cache_lock); /*!< in: Whether we already 851 have cache lock */ 852 /** Add a newly create index in FTS cache */ 853 void fts_add_index(dict_index_t *index, /*!< FTS index to be added */ 854 dict_table_t *table); /*!< table */ 855 856 /** Drop auxiliary tables related to an FTS index 857 @param[in] table Table where indexes are dropped 858 @param[in] index Index to be dropped 859 @param[in] trx Transaction for the drop 860 @param[in,out] aux_vec Aux table name vector 861 @return DB_SUCCESS or error number */ 862 dberr_t fts_drop_index(dict_table_t *table, dict_index_t *index, trx_t *trx, 863 aux_name_vec_t *aux_vec); 864 865 /** Rename auxiliary tables for all fts index for a table 866 @return DB_SUCCESS or error code */ 867 dberr_t fts_rename_aux_tables(dict_table_t *table, /*!< in: user Table */ 868 const char *new_name, /*!< in: new table name */ 869 trx_t *trx, /*!< in: transaction */ 870 bool replay); /*!< Whether in replay 871 stage */ 872 873 /** Check indexes in the fts->indexes is also present in index cache and 874 table->indexes list 875 @return true if all indexes match */ 876 ibool fts_check_cached_index( 877 dict_table_t *table); /*!< in: Table where indexes are dropped */ 878 879 /** Fetch the document from tuple, tokenize the text data and 880 insert the text data into fts auxiliary table and 881 its cache. Moreover this tuple fields doesn't contain any information 882 about externally stored field. This tuple contains data directly 883 converted from mysql. 884 @param[in] ftt FTS transaction table 885 @param[in] doc_id doc id 886 @param[in] tuple tuple from where data can be retrieved 887 and tuple should be arranged in table 888 schema order. */ 889 void fts_add_doc_from_tuple(fts_trx_table_t *ftt, doc_id_t doc_id, 890 const dtuple_t *tuple); 891 892 /** Create an FTS trx. 893 @param[in,out] trx InnoDB Transaction 894 @return FTS transaction. */ 895 fts_trx_t *fts_trx_create(trx_t *trx); 896 897 /** For storing table info when checking for orphaned tables. */ 898 struct fts_aux_table_t { 899 /** Table id */ 900 table_id_t id; 901 902 /** Parent table id */ 903 table_id_t parent_id; 904 905 /** Table FT index id */ 906 table_id_t index_id; 907 908 /** Name of the table */ 909 char *name; 910 911 /** FTS table type */ 912 fts_table_type_t type; 913 }; 914 915 /** Check if a table is an FTS auxiliary table name. 916 @param[out] table FTS table info 917 @param[in] name Table name 918 @param[in] len Length of table name 919 @return true if the name matches an auxiliary table name pattern */ 920 bool fts_is_aux_table_name(fts_aux_table_t *table, const char *name, ulint len); 921 922 /** Freeze all auiliary tables to be not evictable if exist, with dict_mutex 923 held 924 @param[in] table InnoDB table object */ 925 void fts_freeze_aux_tables(const dict_table_t *table); 926 927 /** Allow all the auxiliary tables of specified base table to be evictable 928 if they exist, if not exist just ignore 929 @param[in] table InnoDB table object 930 @param[in] dict_locked True if we have dict_sys mutex */ 931 void fts_detach_aux_tables(const dict_table_t *table, bool dict_locked); 932 933 /** Update DD system table for auxiliary common tables for an FTS index. 934 @param[in] table dict table instance 935 @return true on success, false on failure */ 936 bool fts_create_common_dd_tables(const dict_table_t *table); 937 938 /** Check if a table has FTS index needs to have its auxiliary index 939 tables' metadata updated in DD 940 @param[in,out] table table to check 941 @return DB_SUCCESS or error code */ 942 dberr_t fts_create_index_dd_tables(dict_table_t *table); 943 944 /** Upgrade FTS AUX Tables. The FTS common and aux tables are 945 renamed because they have table_id in their name. We move table_ids 946 by DICT_MAX_DD_TABLES offset. Aux tables are registered into DD 947 afer rename. 948 @param[in] table InnoDB table object 949 @return DB_SUCCESS or error code */ 950 dberr_t fts_upgrade_aux_tables(dict_table_t *table); 951 952 /** Rename FTS AUX tablespace name from 8.0 format to 5.7 format. 953 This will be done on upgrade failure 954 @param[in] table parent table 955 @param[in] rollback rollback the rename from 8.0 to 5.7 956 if true, rename to 5.7 format 957 if false, mark the table as evictable 958 @return DB_SUCCESS on success, DB_ERROR on error */ 959 dberr_t fts_upgrade_rename(const dict_table_t *table, bool rollback); 960 961 #endif /*!< fts0fts.h */ 962