1 /*****************************************************************************
2 
3 Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file include/fts0priv.h
28  Full text search internal header file
29 
30  Created 2011/09/02 Sunny Bains
31  ***********************************************************************/
32 
33 #ifndef INNOBASE_FTS0PRIV_H
34 #define INNOBASE_FTS0PRIV_H
35 
36 #include "dict0dict.h"
37 #include "fts0types.h"
38 #include "pars0pars.h"
39 #include "que0que.h"
40 #include "que0types.h"
41 #include "univ.i"
42 
43 /* The various states of the FTS sub system pertaining to a table with
44 FTS indexes defined on it. */
45 enum fts_table_state_enum {
46   /* !<This must be 0 since we insert
47   a hard coded '0' at create time
48   to the config table */
49 
50   FTS_TABLE_STATE_RUNNING = 0, /*!< Auxiliary tables created OK */
51 
52   FTS_TABLE_STATE_OPTIMIZING, /*!< This is a substate of RUNNING */
53 
54   FTS_TABLE_STATE_DELETED /*!< All aux tables to be dropped when
55                           it's safe to do so */
56 };
57 
58 typedef enum fts_table_state_enum fts_table_state_t;
59 
60 /** The default time to wait for the background thread (in microsecnds). */
61 #define FTS_MAX_BACKGROUND_THREAD_WAIT 10000
62 
63 /** Maximum number of iterations to wait before we complain */
64 #define FTS_BACKGROUND_THREAD_WAIT_COUNT 1000
65 
66 /** The maximum length of the config table's value column in bytes */
67 #define FTS_MAX_CONFIG_NAME_LEN 64
68 
69 /** The maximum length of the config table's value column in bytes */
70 #define FTS_MAX_CONFIG_VALUE_LEN 1024
71 
72 /** Approx. upper limit of ilist length in bytes. */
73 #define FTS_ILIST_MAX_SIZE (64 * 1024)
74 
75 /** FTS config table name parameters */
76 
77 /** The number of seconds after which an OPTIMIZE run will stop */
78 #define FTS_OPTIMIZE_LIMIT_IN_SECS "optimize_checkpoint_limit"
79 
80 /** The next doc id */
81 #define FTS_SYNCED_DOC_ID "synced_doc_id"
82 
83 /** The last word that was OPTIMIZED */
84 #define FTS_LAST_OPTIMIZED_WORD "last_optimized_word"
85 
86 /** Total number of documents that have been deleted. The next_doc_id
87 minus this count gives us the total number of documents. */
88 #define FTS_TOTAL_DELETED_COUNT "deleted_doc_count"
89 
90 /** Total number of words parsed from all documents */
91 #define FTS_TOTAL_WORD_COUNT "total_word_count"
92 
93 /** Start of optimize of an FTS index */
94 #define FTS_OPTIMIZE_START_TIME "optimize_start_time"
95 
96 /** End of optimize for an FTS index */
97 #define FTS_OPTIMIZE_END_TIME "optimize_end_time"
98 
99 /** User specified stopword table name */
100 #define FTS_STOPWORD_TABLE_NAME "stopword_table_name"
101 
102 /** Whether to use (turn on/off) stopword */
103 #define FTS_USE_STOPWORD "use_stopword"
104 
105 /** State of the FTS system for this table. It can be one of
106  RUNNING, OPTIMIZING, DELETED. */
107 #define FTS_TABLE_STATE "table_state"
108 
109 /** The minimum length of an FTS auxiliary table names's id component
110 e.g., For an auxiliary table name
111 
112         "FTS_@<TABLE_ID@>_SUFFIX"
113 
114 This constant is for the minimum length required to store the @<TABLE_ID@>
115 component.
116 */
117 #define FTS_AUX_MIN_TABLE_ID_LENGTH 48
118 
119 /** Maximum length of an integer stored in the config table value column. */
120 #define FTS_MAX_INT_LEN 32
121 
122 /** Parse an SQL string. %s is replaced with the table's id.
123  @return query graph */
124 que_t *fts_parse_sql(fts_table_t *fts_table, /*!< in: FTS aux table */
125                      pars_info_t *info,      /*!< in: info struct, or NULL */
126                      const char *sql)        /*!< in: SQL string to evaluate */
127     MY_ATTRIBUTE((warn_unused_result));
128 
129 /** Evaluate a parsed SQL statement
130  @return DB_SUCCESS or error code */
131 dberr_t fts_eval_sql(trx_t *trx,   /*!< in: transaction */
132                      que_t *graph) /*!< in: Parsed statement */
133     MY_ATTRIBUTE((warn_unused_result));
134 
135 /** Construct the name of an ancillary FTS table for the given table.
136  Caller must allocate enough memory(usually size of MAX_FULL_NAME_LEN)
137  for param 'table_name'. */
138 void fts_get_table_name(
139     const fts_table_t *fts_table, /*!< in: FTS aux table info */
140     char *table_name);            /*!< in/out: aux table name */
141 
142 /** Construct the name of an ancillary FTS table for the given table in
143 5.7 compatible format. Caller must allocate enough memory(usually size
144 of MAX_FULL_NAME_LEN) for param 'table_name'
145 @param[in]	fts_table	Auxiliary table object
146 @param[in,out]	table_name	aux table name */
147 void fts_get_table_name_5_7(const fts_table_t *fts_table, char *table_name);
148 
149 /** Construct the column specification part of the SQL string for selecting the
150  indexed FTS columns for the given table. Adds the necessary bound
151  ids to the given 'info' and returns the SQL string. Examples:
152 
153  One indexed column named "text":
154 
155   "$sel0",
156   info/ids: sel0 -> "text"
157 
158  Two indexed columns named "subject" and "content":
159 
160   "$sel0, $sel1",
161   info/ids: sel0 -> "subject", sel1 -> "content",
162  @return heap-allocated WHERE string */
163 const char *fts_get_select_columns_str(
164     dict_index_t *index, /*!< in: FTS index */
165     pars_info_t *info,   /*!< in/out: parser info */
166     mem_heap_t *heap)    /*!< in: memory heap */
167     MY_ATTRIBUTE((warn_unused_result));
168 
169 /** define for fts_doc_fetch_by_doc_id() "option" value, defines whether
170 we want to get Doc whose ID is equal to or greater or smaller than supplied
171 ID */
172 #define FTS_FETCH_DOC_BY_ID_EQUAL 1
173 #define FTS_FETCH_DOC_BY_ID_LARGE 2
174 
175 /** Fetch document (= a single row's indexed text) with the given
176  document id.
177  @return: DB_SUCCESS if fetch is successful, else error */
178 dberr_t fts_doc_fetch_by_doc_id(
179     fts_get_doc_t *get_doc,     /*!< in: state */
180     doc_id_t doc_id,            /*!< in: id of document to fetch */
181     dict_index_t *index_to_use, /*!< in: caller supplied FTS index,
182                                 or NULL */
183     ulint option,               /*!< in: search option, if it is
184                                 greater than doc_id or equal */
185     fts_sql_callback callback,  /*!< in: callback to read
186                                 records */
187     void *arg);                 /*!< in: callback arg */
188 
189 /** Callback function for fetch that stores the text of an FTS document,
190  converting each column to UTF-16.
191  @return always false */
192 ibool fts_query_expansion_fetch_doc(void *row,       /*!< in: sel_node_t* */
193                                     void *user_arg); /*!< in: fts_doc_t* */
194 
195 /********************************************************************
196 Write out a single word's data as new entry/entries in the INDEX table.
197 @return DB_SUCCESS if all OK. */
198 dberr_t fts_write_node(trx_t *trx,             /*!< in: transaction */
199                        que_t **graph,          /*!< in: query graph */
200                        fts_table_t *fts_table, /*!< in: the FTS aux index */
201                        fts_string_t *word,     /*!< in: word in UTF-8 */
202                        fts_node_t *node)       /*!< in: node columns */
203     MY_ATTRIBUTE((warn_unused_result));
204 
205 /** Check fts token
206 1. for ngram token, check whether the token contains any words in stopwords
207 2. for non-ngram token, check if it's stopword or less than fts_min_token_size
208 or greater than fts_max_token_size.
209 @param[in]	token		token string
210 @param[in]	stopwords	stopwords rb tree
211 @param[in]	is_ngram	is ngram parser
212 @param[in]	cs		token charset
213 @retval true	if it is not stopword and length in range
214 @retval false	if it is stopword or length not in range */
215 bool fts_check_token(const fts_string_t *token, const ib_rbt_t *stopwords,
216                      bool is_ngram, const CHARSET_INFO *cs);
217 
218 /** Initialize a document. */
219 void fts_doc_init(fts_doc_t *doc); /*!< in: doc to initialize */
220 
221 /** Do a binary search for a doc id in the array
222  @return +ve index if found -ve index where it should be
223          inserted if not found */
224 int fts_bsearch(fts_update_t *array, /*!< in: array to sort */
225                 int lower,           /*!< in: lower bound of array*/
226                 int upper,           /*!< in: upper bound of array*/
227                 doc_id_t doc_id)     /*!< in: doc id to lookup */
228     MY_ATTRIBUTE((warn_unused_result));
229 /** Free document. */
230 void fts_doc_free(fts_doc_t *doc); /*!< in: document */
231 
232 /** Free fts_optimizer_word_t instanace.*/
233 void fts_word_free(fts_word_t *word); /*!< in: instance to free.*/
234 
235 /** Read the rows from the FTS inde
236  @return DB_SUCCESS or error code */
237 dberr_t fts_index_fetch_nodes(
238     trx_t *trx,               /*!< in: transaction */
239     que_t **graph,            /*!< in: prepared statement */
240     fts_table_t *fts_table,   /*!< in: FTS aux table */
241     const fts_string_t *word, /*!< in: the word to fetch */
242     fts_fetch_t *fetch);      /*!< in: fetch callback.*/
243 
244 /** Compare two fts_trx_table_t instances, we actually compare the
245 table id's here.
246 @param[in]	v1	id1
247 @param[in]	v2	id2
248 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
249 UNIV_INLINE
250 int fts_trx_table_cmp(const void *v1, const void *v2);
251 
252 /** Compare a table id with a trx_table_t table id.
253 @param[in]	p1	id1
254 @param[in]	p2	id2
255 @return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */
256 UNIV_INLINE
257 int fts_trx_table_id_cmp(const void *p1, const void *p2);
258 
259 /** Commit a transaction.
260  @return DB_SUCCESS if all OK */
261 dberr_t fts_sql_commit(trx_t *trx); /*!< in: transaction */
262 
263 /** Rollback a transaction.
264  @return DB_SUCCESS if all OK */
265 dberr_t fts_sql_rollback(trx_t *trx); /*!< in: transaction */
266 
267 /** Get value from config table. The caller must ensure that enough
268  space is allocated for value to hold the column contents
269  @return DB_SUCCESS or error code */
270 dberr_t fts_config_get_value(
271     trx_t *trx,             /* transaction */
272     fts_table_t *fts_table, /*!< in: the indexed FTS table */
273     const char *name,       /*!< in: get config value for
274                             this parameter name */
275     fts_string_t *value);   /*!< out: value read from
276                             config table */
277 /** Get value specific to an FTS index from the config table. The caller
278  must ensure that enough space is allocated for value to hold the
279  column contents.
280  @return DB_SUCCESS or error code */
281 dberr_t fts_config_get_index_value(trx_t *trx,          /*!< transaction */
282                                    dict_index_t *index, /*!< in: index */
283                                    const char *param, /*!< in: get config value
284                                                       for this parameter name */
285                                    fts_string_t *value) /*!< out: value read
286                                                         from config table */
287     MY_ATTRIBUTE((warn_unused_result));
288 
289 /** Set the value in the config table for name.
290  @return DB_SUCCESS or error code */
291 dberr_t fts_config_set_value(
292     trx_t *trx,                 /*!< transaction */
293     fts_table_t *fts_table,     /*!< in: the indexed FTS table */
294     const char *name,           /*!< in: get config value for
295                                 this parameter name */
296     const fts_string_t *value); /*!< in: value to update */
297 
298 /** Set an ulint value in the config table.
299  @return DB_SUCCESS if all OK else error code */
300 dberr_t fts_config_set_ulint(
301     trx_t *trx,             /*!< in: transaction */
302     fts_table_t *fts_table, /*!< in: the indexed FTS table */
303     const char *name,       /*!< in: param name */
304     ulint int_value)        /*!< in: value */
305     MY_ATTRIBUTE((warn_unused_result));
306 
307 /** Set the value specific to an FTS index in the config table.
308  @return DB_SUCCESS or error code */
309 dberr_t fts_config_set_index_value(trx_t *trx,          /*!< transaction */
310                                    dict_index_t *index, /*!< in: index */
311                                    const char *param, /*!< in: get config value
312                                                       for this parameter name */
313                                    fts_string_t *value) /*!< out: value read
314                                                         from config table */
315     MY_ATTRIBUTE((warn_unused_result));
316 
317 #ifdef FTS_OPTIMIZE_DEBUG
318 /** Get an ulint value from the config table.
319  @return DB_SUCCESS or error code */
320 dberr_t fts_config_get_index_ulint(trx_t *trx,          /*!< in: transaction */
321                                    dict_index_t *index, /*!< in: FTS index */
322                                    const char *name,    /*!< in: param name */
323                                    ulint *int_value)    /*!< out: value */
324     MY_ATTRIBUTE((warn_unused_result));
325 
326 /** Set an ulint value int the config table.
327  @return DB_SUCCESS or error code */
328 dberr_t fts_config_set_index_ulint(trx_t *trx,          /*!< in: transaction */
329                                    dict_index_t *index, /*!< in: FTS index */
330                                    const char *name,    /*!< in: param name */
331                                    ulint int_value)     /*!< in: value */
332     MY_ATTRIBUTE((warn_unused_result));
333 #endif /* FTS_OPTIMIZE_DEBUG */
334 
335 /** Get an ulint value from the config table.
336  @return DB_SUCCESS or error code */
337 dberr_t fts_config_get_ulint(
338     trx_t *trx,             /*!< in: transaction */
339     fts_table_t *fts_table, /*!< in: the indexed FTS table */
340     const char *name,       /*!< in: param name */
341     ulint *int_value);      /*!< out: value */
342 
343 /** Search cache for word.
344  @return the word node vector if found else NULL */
345 const ib_vector_t *fts_cache_find_word(
346     const fts_index_cache_t *index_cache, /*!< in: cache to search */
347     const fts_string_t *text)             /*!< in: word to search for */
348     MY_ATTRIBUTE((warn_unused_result));
349 
350 /** Append deleted doc ids to vector and sort the vector. */
351 void fts_cache_append_deleted_doc_ids(
352     const fts_cache_t *cache, /*!< in: cache to use */
353     ib_vector_t *vector);     /*!< in: append to this vector */
354 /** Wait for the background thread to start. We poll to detect change
355  of state, which is acceptable, since the wait should happen only
356  once during startup.
357  @return true if the thread started else false (i.e timed out) */
358 ibool fts_wait_for_background_thread_to_start(
359     dict_table_t *table, /*!< in: table to which the thread
360                          is attached */
361     ulint max_wait);     /*!< in: time in microseconds, if set
362                          to 0 then it disables timeout
363                          checking */
364 /** Search the index specific cache for a particular FTS index.
365  @return the index specific cache else NULL */
366 fts_index_cache_t *fts_find_index_cache(
367     const fts_cache_t *cache,  /*!< in: cache to search */
368     const dict_index_t *index) /*!< in: index to search for */
369     MY_ATTRIBUTE((warn_unused_result));
370 
371 /** Write the table id to the given buffer (including final NUL). Buffer must
372 be at least FTS_AUX_MIN_TABLE_ID_LENGTH bytes long.
373 @param[in]	id		a table/index id
374 @param[in]	str		buffer to write the id to
375 @return number of bytes written */
376 UNIV_INLINE
377 int fts_write_object_id(ib_id_t id, char *str);
378 
379 /** Read the table id from the string generated by fts_write_object_id().
380  @return true if parse successful */
381 UNIV_INLINE
382 ibool fts_read_object_id(ib_id_t *id,     /*!< out: a table id */
383                          const char *str) /*!< in: buffer to read from */
384     MY_ATTRIBUTE((warn_unused_result));
385 
386 /** Get the table id.
387  @return number of bytes written */
388 int fts_get_table_id(
389     const fts_table_t *fts_table, /*!< in: FTS Auxiliary table */
390     char *table_id)               /*!< out: table id, must be at least
391                                   FTS_AUX_MIN_TABLE_ID_LENGTH bytes
392                                   long */
393     MY_ATTRIBUTE((warn_unused_result));
394 
395 /** Add the table to add to the OPTIMIZER's list. */
396 void fts_optimize_add_table(dict_table_t *table); /*!< in: table to add */
397 
398 /** Construct the prefix name of an FTS table.
399  @return own: table name, must be freed with ut_free() */
400 char *fts_get_table_name_prefix(
401     const fts_table_t *fts_table) /*!< in: Auxiliary table type */
402     MY_ATTRIBUTE((warn_unused_result));
403 
404 /** Add node positions. */
405 void fts_cache_node_add_positions(
406     fts_cache_t *cache,      /*!< in: cache */
407     fts_node_t *node,        /*!< in: word node */
408     doc_id_t doc_id,         /*!< in: doc id */
409     ib_vector_t *positions); /*!< in: fts_token_t::positions */
410 
411 /** Create the config table name for retrieving index specific value.
412  @return index config parameter name */
413 char *fts_config_create_index_param_name(
414     const char *param,         /*!< in: base name of param */
415     const dict_index_t *index) /*!< in: index for config */
416     MY_ATTRIBUTE((warn_unused_result));
417 
418 #include "fts0priv.ic"
419 
420 #endif /* INNOBASE_FTS0PRIV_H */
421