1 /***************************************************************************** 2 3 Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved. 4 5 This program is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License, version 2.0, as published by the 7 Free Software Foundation. 8 9 This program is also distributed with certain software (including but not 10 limited to OpenSSL) that is licensed under separate terms, as designated in a 11 particular file or component or in included license documentation. The authors 12 of MySQL hereby grant you an additional permission to link the program and 13 your derivative works with the separately licensed software that they have 14 included with MySQL. 15 16 This program is distributed in the hope that it will be useful, but WITHOUT 17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, 19 for more details. 20 21 You should have received a copy of the GNU General Public License along with 22 this program; if not, write to the Free Software Foundation, Inc., 23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 25 *****************************************************************************/ 26 27 /** @file include/fts0priv.h 28 Full text search internal header file 29 30 Created 2011/09/02 Sunny Bains 31 ***********************************************************************/ 32 33 #ifndef INNOBASE_FTS0PRIV_H 34 #define INNOBASE_FTS0PRIV_H 35 36 #include "dict0dict.h" 37 #include "fts0types.h" 38 #include "pars0pars.h" 39 #include "que0que.h" 40 #include "que0types.h" 41 #include "univ.i" 42 43 /* The various states of the FTS sub system pertaining to a table with 44 FTS indexes defined on it. */ 45 enum fts_table_state_enum { 46 /* !<This must be 0 since we insert 47 a hard coded '0' at create time 48 to the config table */ 49 50 FTS_TABLE_STATE_RUNNING = 0, /*!< Auxiliary tables created OK */ 51 52 FTS_TABLE_STATE_OPTIMIZING, /*!< This is a substate of RUNNING */ 53 54 FTS_TABLE_STATE_DELETED /*!< All aux tables to be dropped when 55 it's safe to do so */ 56 }; 57 58 typedef enum fts_table_state_enum fts_table_state_t; 59 60 /** The default time to wait for the background thread (in microsecnds). */ 61 #define FTS_MAX_BACKGROUND_THREAD_WAIT 10000 62 63 /** Maximum number of iterations to wait before we complain */ 64 #define FTS_BACKGROUND_THREAD_WAIT_COUNT 1000 65 66 /** The maximum length of the config table's value column in bytes */ 67 #define FTS_MAX_CONFIG_NAME_LEN 64 68 69 /** The maximum length of the config table's value column in bytes */ 70 #define FTS_MAX_CONFIG_VALUE_LEN 1024 71 72 /** Approx. upper limit of ilist length in bytes. */ 73 #define FTS_ILIST_MAX_SIZE (64 * 1024) 74 75 /** FTS config table name parameters */ 76 77 /** The number of seconds after which an OPTIMIZE run will stop */ 78 #define FTS_OPTIMIZE_LIMIT_IN_SECS "optimize_checkpoint_limit" 79 80 /** The next doc id */ 81 #define FTS_SYNCED_DOC_ID "synced_doc_id" 82 83 /** The last word that was OPTIMIZED */ 84 #define FTS_LAST_OPTIMIZED_WORD "last_optimized_word" 85 86 /** Total number of documents that have been deleted. The next_doc_id 87 minus this count gives us the total number of documents. */ 88 #define FTS_TOTAL_DELETED_COUNT "deleted_doc_count" 89 90 /** Total number of words parsed from all documents */ 91 #define FTS_TOTAL_WORD_COUNT "total_word_count" 92 93 /** Start of optimize of an FTS index */ 94 #define FTS_OPTIMIZE_START_TIME "optimize_start_time" 95 96 /** End of optimize for an FTS index */ 97 #define FTS_OPTIMIZE_END_TIME "optimize_end_time" 98 99 /** User specified stopword table name */ 100 #define FTS_STOPWORD_TABLE_NAME "stopword_table_name" 101 102 /** Whether to use (turn on/off) stopword */ 103 #define FTS_USE_STOPWORD "use_stopword" 104 105 /** State of the FTS system for this table. It can be one of 106 RUNNING, OPTIMIZING, DELETED. */ 107 #define FTS_TABLE_STATE "table_state" 108 109 /** The minimum length of an FTS auxiliary table names's id component 110 e.g., For an auxiliary table name 111 112 "FTS_@<TABLE_ID@>_SUFFIX" 113 114 This constant is for the minimum length required to store the @<TABLE_ID@> 115 component. 116 */ 117 #define FTS_AUX_MIN_TABLE_ID_LENGTH 48 118 119 /** Maximum length of an integer stored in the config table value column. */ 120 #define FTS_MAX_INT_LEN 32 121 122 /** Parse an SQL string. %s is replaced with the table's id. 123 @return query graph */ 124 que_t *fts_parse_sql(fts_table_t *fts_table, /*!< in: FTS aux table */ 125 pars_info_t *info, /*!< in: info struct, or NULL */ 126 const char *sql) /*!< in: SQL string to evaluate */ 127 MY_ATTRIBUTE((warn_unused_result)); 128 129 /** Evaluate a parsed SQL statement 130 @return DB_SUCCESS or error code */ 131 dberr_t fts_eval_sql(trx_t *trx, /*!< in: transaction */ 132 que_t *graph) /*!< in: Parsed statement */ 133 MY_ATTRIBUTE((warn_unused_result)); 134 135 /** Construct the name of an ancillary FTS table for the given table. 136 Caller must allocate enough memory(usually size of MAX_FULL_NAME_LEN) 137 for param 'table_name'. */ 138 void fts_get_table_name( 139 const fts_table_t *fts_table, /*!< in: FTS aux table info */ 140 char *table_name); /*!< in/out: aux table name */ 141 142 /** Construct the name of an ancillary FTS table for the given table in 143 5.7 compatible format. Caller must allocate enough memory(usually size 144 of MAX_FULL_NAME_LEN) for param 'table_name' 145 @param[in] fts_table Auxiliary table object 146 @param[in,out] table_name aux table name */ 147 void fts_get_table_name_5_7(const fts_table_t *fts_table, char *table_name); 148 149 /** Construct the column specification part of the SQL string for selecting the 150 indexed FTS columns for the given table. Adds the necessary bound 151 ids to the given 'info' and returns the SQL string. Examples: 152 153 One indexed column named "text": 154 155 "$sel0", 156 info/ids: sel0 -> "text" 157 158 Two indexed columns named "subject" and "content": 159 160 "$sel0, $sel1", 161 info/ids: sel0 -> "subject", sel1 -> "content", 162 @return heap-allocated WHERE string */ 163 const char *fts_get_select_columns_str( 164 dict_index_t *index, /*!< in: FTS index */ 165 pars_info_t *info, /*!< in/out: parser info */ 166 mem_heap_t *heap) /*!< in: memory heap */ 167 MY_ATTRIBUTE((warn_unused_result)); 168 169 /** define for fts_doc_fetch_by_doc_id() "option" value, defines whether 170 we want to get Doc whose ID is equal to or greater or smaller than supplied 171 ID */ 172 #define FTS_FETCH_DOC_BY_ID_EQUAL 1 173 #define FTS_FETCH_DOC_BY_ID_LARGE 2 174 175 /** Fetch document (= a single row's indexed text) with the given 176 document id. 177 @return: DB_SUCCESS if fetch is successful, else error */ 178 dberr_t fts_doc_fetch_by_doc_id( 179 fts_get_doc_t *get_doc, /*!< in: state */ 180 doc_id_t doc_id, /*!< in: id of document to fetch */ 181 dict_index_t *index_to_use, /*!< in: caller supplied FTS index, 182 or NULL */ 183 ulint option, /*!< in: search option, if it is 184 greater than doc_id or equal */ 185 fts_sql_callback callback, /*!< in: callback to read 186 records */ 187 void *arg); /*!< in: callback arg */ 188 189 /** Callback function for fetch that stores the text of an FTS document, 190 converting each column to UTF-16. 191 @return always false */ 192 ibool fts_query_expansion_fetch_doc(void *row, /*!< in: sel_node_t* */ 193 void *user_arg); /*!< in: fts_doc_t* */ 194 195 /******************************************************************** 196 Write out a single word's data as new entry/entries in the INDEX table. 197 @return DB_SUCCESS if all OK. */ 198 dberr_t fts_write_node(trx_t *trx, /*!< in: transaction */ 199 que_t **graph, /*!< in: query graph */ 200 fts_table_t *fts_table, /*!< in: the FTS aux index */ 201 fts_string_t *word, /*!< in: word in UTF-8 */ 202 fts_node_t *node) /*!< in: node columns */ 203 MY_ATTRIBUTE((warn_unused_result)); 204 205 /** Check fts token 206 1. for ngram token, check whether the token contains any words in stopwords 207 2. for non-ngram token, check if it's stopword or less than fts_min_token_size 208 or greater than fts_max_token_size. 209 @param[in] token token string 210 @param[in] stopwords stopwords rb tree 211 @param[in] is_ngram is ngram parser 212 @param[in] cs token charset 213 @retval true if it is not stopword and length in range 214 @retval false if it is stopword or length not in range */ 215 bool fts_check_token(const fts_string_t *token, const ib_rbt_t *stopwords, 216 bool is_ngram, const CHARSET_INFO *cs); 217 218 /** Initialize a document. */ 219 void fts_doc_init(fts_doc_t *doc); /*!< in: doc to initialize */ 220 221 /** Do a binary search for a doc id in the array 222 @return +ve index if found -ve index where it should be 223 inserted if not found */ 224 int fts_bsearch(fts_update_t *array, /*!< in: array to sort */ 225 int lower, /*!< in: lower bound of array*/ 226 int upper, /*!< in: upper bound of array*/ 227 doc_id_t doc_id) /*!< in: doc id to lookup */ 228 MY_ATTRIBUTE((warn_unused_result)); 229 /** Free document. */ 230 void fts_doc_free(fts_doc_t *doc); /*!< in: document */ 231 232 /** Free fts_optimizer_word_t instanace.*/ 233 void fts_word_free(fts_word_t *word); /*!< in: instance to free.*/ 234 235 /** Read the rows from the FTS inde 236 @return DB_SUCCESS or error code */ 237 dberr_t fts_index_fetch_nodes( 238 trx_t *trx, /*!< in: transaction */ 239 que_t **graph, /*!< in: prepared statement */ 240 fts_table_t *fts_table, /*!< in: FTS aux table */ 241 const fts_string_t *word, /*!< in: the word to fetch */ 242 fts_fetch_t *fetch); /*!< in: fetch callback.*/ 243 244 /** Compare two fts_trx_table_t instances, we actually compare the 245 table id's here. 246 @param[in] v1 id1 247 @param[in] v2 id2 248 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ 249 UNIV_INLINE 250 int fts_trx_table_cmp(const void *v1, const void *v2); 251 252 /** Compare a table id with a trx_table_t table id. 253 @param[in] p1 id1 254 @param[in] p2 id2 255 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ 256 UNIV_INLINE 257 int fts_trx_table_id_cmp(const void *p1, const void *p2); 258 259 /** Commit a transaction. 260 @return DB_SUCCESS if all OK */ 261 dberr_t fts_sql_commit(trx_t *trx); /*!< in: transaction */ 262 263 /** Rollback a transaction. 264 @return DB_SUCCESS if all OK */ 265 dberr_t fts_sql_rollback(trx_t *trx); /*!< in: transaction */ 266 267 /** Get value from config table. The caller must ensure that enough 268 space is allocated for value to hold the column contents 269 @return DB_SUCCESS or error code */ 270 dberr_t fts_config_get_value( 271 trx_t *trx, /* transaction */ 272 fts_table_t *fts_table, /*!< in: the indexed FTS table */ 273 const char *name, /*!< in: get config value for 274 this parameter name */ 275 fts_string_t *value); /*!< out: value read from 276 config table */ 277 /** Get value specific to an FTS index from the config table. The caller 278 must ensure that enough space is allocated for value to hold the 279 column contents. 280 @return DB_SUCCESS or error code */ 281 dberr_t fts_config_get_index_value(trx_t *trx, /*!< transaction */ 282 dict_index_t *index, /*!< in: index */ 283 const char *param, /*!< in: get config value 284 for this parameter name */ 285 fts_string_t *value) /*!< out: value read 286 from config table */ 287 MY_ATTRIBUTE((warn_unused_result)); 288 289 /** Set the value in the config table for name. 290 @return DB_SUCCESS or error code */ 291 dberr_t fts_config_set_value( 292 trx_t *trx, /*!< transaction */ 293 fts_table_t *fts_table, /*!< in: the indexed FTS table */ 294 const char *name, /*!< in: get config value for 295 this parameter name */ 296 const fts_string_t *value); /*!< in: value to update */ 297 298 /** Set an ulint value in the config table. 299 @return DB_SUCCESS if all OK else error code */ 300 dberr_t fts_config_set_ulint( 301 trx_t *trx, /*!< in: transaction */ 302 fts_table_t *fts_table, /*!< in: the indexed FTS table */ 303 const char *name, /*!< in: param name */ 304 ulint int_value) /*!< in: value */ 305 MY_ATTRIBUTE((warn_unused_result)); 306 307 /** Set the value specific to an FTS index in the config table. 308 @return DB_SUCCESS or error code */ 309 dberr_t fts_config_set_index_value(trx_t *trx, /*!< transaction */ 310 dict_index_t *index, /*!< in: index */ 311 const char *param, /*!< in: get config value 312 for this parameter name */ 313 fts_string_t *value) /*!< out: value read 314 from config table */ 315 MY_ATTRIBUTE((warn_unused_result)); 316 317 #ifdef FTS_OPTIMIZE_DEBUG 318 /** Get an ulint value from the config table. 319 @return DB_SUCCESS or error code */ 320 dberr_t fts_config_get_index_ulint(trx_t *trx, /*!< in: transaction */ 321 dict_index_t *index, /*!< in: FTS index */ 322 const char *name, /*!< in: param name */ 323 ulint *int_value) /*!< out: value */ 324 MY_ATTRIBUTE((warn_unused_result)); 325 326 /** Set an ulint value int the config table. 327 @return DB_SUCCESS or error code */ 328 dberr_t fts_config_set_index_ulint(trx_t *trx, /*!< in: transaction */ 329 dict_index_t *index, /*!< in: FTS index */ 330 const char *name, /*!< in: param name */ 331 ulint int_value) /*!< in: value */ 332 MY_ATTRIBUTE((warn_unused_result)); 333 #endif /* FTS_OPTIMIZE_DEBUG */ 334 335 /** Get an ulint value from the config table. 336 @return DB_SUCCESS or error code */ 337 dberr_t fts_config_get_ulint( 338 trx_t *trx, /*!< in: transaction */ 339 fts_table_t *fts_table, /*!< in: the indexed FTS table */ 340 const char *name, /*!< in: param name */ 341 ulint *int_value); /*!< out: value */ 342 343 /** Search cache for word. 344 @return the word node vector if found else NULL */ 345 const ib_vector_t *fts_cache_find_word( 346 const fts_index_cache_t *index_cache, /*!< in: cache to search */ 347 const fts_string_t *text) /*!< in: word to search for */ 348 MY_ATTRIBUTE((warn_unused_result)); 349 350 /** Append deleted doc ids to vector and sort the vector. */ 351 void fts_cache_append_deleted_doc_ids( 352 const fts_cache_t *cache, /*!< in: cache to use */ 353 ib_vector_t *vector); /*!< in: append to this vector */ 354 /** Wait for the background thread to start. We poll to detect change 355 of state, which is acceptable, since the wait should happen only 356 once during startup. 357 @return true if the thread started else false (i.e timed out) */ 358 ibool fts_wait_for_background_thread_to_start( 359 dict_table_t *table, /*!< in: table to which the thread 360 is attached */ 361 ulint max_wait); /*!< in: time in microseconds, if set 362 to 0 then it disables timeout 363 checking */ 364 /** Search the index specific cache for a particular FTS index. 365 @return the index specific cache else NULL */ 366 fts_index_cache_t *fts_find_index_cache( 367 const fts_cache_t *cache, /*!< in: cache to search */ 368 const dict_index_t *index) /*!< in: index to search for */ 369 MY_ATTRIBUTE((warn_unused_result)); 370 371 /** Write the table id to the given buffer (including final NUL). Buffer must 372 be at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long. 373 @param[in] id a table/index id 374 @param[in] str buffer to write the id to 375 @return number of bytes written */ 376 UNIV_INLINE 377 int fts_write_object_id(ib_id_t id, char *str); 378 379 /** Read the table id from the string generated by fts_write_object_id(). 380 @return true if parse successful */ 381 UNIV_INLINE 382 ibool fts_read_object_id(ib_id_t *id, /*!< out: a table id */ 383 const char *str) /*!< in: buffer to read from */ 384 MY_ATTRIBUTE((warn_unused_result)); 385 386 /** Get the table id. 387 @return number of bytes written */ 388 int fts_get_table_id( 389 const fts_table_t *fts_table, /*!< in: FTS Auxiliary table */ 390 char *table_id) /*!< out: table id, must be at least 391 FTS_AUX_MIN_TABLE_ID_LENGTH bytes 392 long */ 393 MY_ATTRIBUTE((warn_unused_result)); 394 395 /** Add the table to add to the OPTIMIZER's list. */ 396 void fts_optimize_add_table(dict_table_t *table); /*!< in: table to add */ 397 398 /** Construct the prefix name of an FTS table. 399 @return own: table name, must be freed with ut_free() */ 400 char *fts_get_table_name_prefix( 401 const fts_table_t *fts_table) /*!< in: Auxiliary table type */ 402 MY_ATTRIBUTE((warn_unused_result)); 403 404 /** Add node positions. */ 405 void fts_cache_node_add_positions( 406 fts_cache_t *cache, /*!< in: cache */ 407 fts_node_t *node, /*!< in: word node */ 408 doc_id_t doc_id, /*!< in: doc id */ 409 ib_vector_t *positions); /*!< in: fts_token_t::positions */ 410 411 /** Create the config table name for retrieving index specific value. 412 @return index config parameter name */ 413 char *fts_config_create_index_param_name( 414 const char *param, /*!< in: base name of param */ 415 const dict_index_t *index) /*!< in: index for config */ 416 MY_ATTRIBUTE((warn_unused_result)); 417 418 #include "fts0priv.ic" 419 420 #endif /* INNOBASE_FTS0PRIV_H */ 421