1 /*****************************************************************************
2 
3 Copyright (c) 2010, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file include/row0ftsort.h
28  Create Full Text Index with (parallel) merge sort
29 
30  Created 10/13/2010 Jimmy Yang
31  *******************************************************/
32 
33 #ifndef row0ftsort_h
34 #define row0ftsort_h
35 
36 #include "btr0bulk.h"
37 #include "data0data.h"
38 #include "dict0types.h"
39 #include "fts0fts.h"
40 #include "fts0priv.h"
41 #include "fts0types.h"
42 #include "row0merge.h"
43 #include "row0mysql.h"
44 #include "univ.i"
45 
46 /** This structure defineds information the scan thread will fetch
47 and put to the linked list for parallel tokenization/sort threads
48 to process */
49 typedef struct fts_doc_item fts_doc_item_t;
50 
51 /** Information about temporary files used in merge sort */
52 struct fts_doc_item {
53   dfield_t *field; /*!< field contains document string */
54   doc_id_t doc_id; /*!< document ID */
55   UT_LIST_NODE_T(fts_doc_item_t) doc_list;
56   /*!< list of doc items */
57 };
58 
59 /** This defines the list type that scan thread would feed the parallel
60 tokenization threads and sort threads. */
61 typedef UT_LIST_BASE_NODE_T(fts_doc_item_t) fts_doc_list_t;
62 
63 #define FTS_PLL_MERGE 1
64 
65 /** Sort information passed to each individual parallel sort thread */
66 struct fts_psort_t;
67 
68 /** Common info passed to each parallel sort thread */
69 struct fts_psort_common_t {
70   row_merge_dup_t *dup;    /*!< descriptor of FTS index */
71   dict_table_t *old_table; /*!< Needed to fetch LOB from
72                            old table. */
73   dict_table_t *new_table; /*!< source table */
74   trx_t *trx;              /*!< transaction */
75   fts_psort_t *all_info;   /*!< all parallel sort info */
76   os_event_t sort_event;   /*!< sort event */
77   os_event_t merge_event;  /*!< merge event */
78   ibool opt_doc_id_size;   /*!< whether to use 4 bytes
79                            instead of 8 bytes integer to
80                            store Doc ID during sort, if
81                            Doc ID will not be big enough
82                            to use 8 bytes value */
83 };
84 
85 struct fts_psort_t {
86   ulint psort_id; /*!< Parallel sort ID */
87   row_merge_buf_t *merge_buf[FTS_NUM_AUX_INDEX];
88   /*!< sort buffer */
89   merge_file_t *merge_file[FTS_NUM_AUX_INDEX];
90   /*!< sort file */
91   row_merge_block_t *merge_block[FTS_NUM_AUX_INDEX];
92   /*!< buffer to write to file */
93   row_merge_block_t *block_alloc[FTS_NUM_AUX_INDEX];
94   /*!< buffer to allocated */
95   ulint child_status;               /*!< child thread status */
96   ulint state;                      /*!< parent thread state */
97   fts_doc_list_t fts_doc_list;      /*!< doc list to process */
98   fts_psort_common_t *psort_common; /*!< ptr to all psort info */
99   dberr_t error;                    /*!< db error during psort */
100   ulint memory_used;                /*!< memory used by fts_doc_list */
101   ib_mutex_t mutex;                 /*!< mutex for fts_doc_list */
102 };
103 
104 /** Row fts token for plugin parser */
105 struct row_fts_token_t {
106   fts_string_t *text; /*!< token */
107   ulint position;     /*!< token position in the document */
108   UT_LIST_NODE_T(row_fts_token_t)
109   token_list; /*!< next token link */
110 };
111 
112 typedef UT_LIST_BASE_NODE_T(row_fts_token_t) fts_token_list_t;
113 
114 /** Structure stores information from string tokenization operation */
115 struct fts_tokenize_ctx {
116   ulint processed_len{0}; /*!< processed string length */
117   ulint init_pos{0};      /*!< doc start position */
118   ulint buf_used{0};      /*!< the sort buffer (ID) when
119                                tokenization stops, which
120                                could due to sort buffer full */
121   ulint rows_added[FTS_NUM_AUX_INDEX]{0};
122   /*!< number of rows added for
123   each FTS index partition */
124   ib_rbt_t *cached_stopword{nullptr}; /*!< in: stopword list */
125   dfield_t sort_field[FTS_NUM_FIELDS_SORT];
126   /*!< in: sort field */
127   fts_token_list_t fts_token_list;
128 };
129 
130 typedef struct fts_tokenize_ctx fts_tokenize_ctx_t;
131 
132 /** Structure stores information needed for the insertion phase of FTS
133 parallel sort. */
134 struct fts_psort_insert {
135   CHARSET_INFO *charset; /*!< charset info */
136   mem_heap_t *heap;      /*!< heap */
137   ibool opt_doc_id_size; /*!< Whether to use smaller (4 bytes)
138                          integer for Doc ID */
139   BtrBulk *btr_bulk;     /*!< Bulk load instance */
140   dtuple_t *tuple;       /*!< Tuple to insert */
141 
142 #ifdef UNIV_DEBUG
143   ulint aux_index_id; /*!< Auxiliary index id */
144 #endif
145 };
146 
147 typedef struct fts_psort_insert fts_psort_insert_t;
148 
149 /** status bit used for communication between parent and child thread */
150 #define FTS_PARENT_COMPLETE 1
151 #define FTS_PARENT_EXITING 2
152 #define FTS_CHILD_COMPLETE 1
153 #define FTS_CHILD_EXITING 2
154 
155 /** Print some debug information */
156 #define FTSORT_PRINT
157 
158 #ifdef FTSORT_PRINT
159 #define DEBUG_FTS_SORT_PRINT(str) \
160   do {                            \
161     ut_print_timestamp(stderr);   \
162     fprintf(stderr, str);         \
163   } while (0)
164 #else
165 #define DEBUG_FTS_SORT_PRINT(str)
166 #endif /* FTSORT_PRINT */
167 
168 /** Create a temporary "fts sort index" used to merge sort the
169  tokenized doc string. The index has three "fields":
170 
171  1. Tokenized word,
172  2. Doc ID
173  3. Word's position in original 'doc'.
174 
175  @return dict_index_t structure for the fts sort index */
176 dict_index_t *row_merge_create_fts_sort_index(
177     dict_index_t *index,       /*!< in: Original FTS index
178                                based on which this sort index
179                                is created */
180     const dict_table_t *table, /*!< in: table that FTS index
181                                is being created on */
182     ibool *opt_doc_id_size);
183 /*!< out: whether to use 4 bytes
184 instead of 8 bytes integer to
185 store Doc ID during sort */
186 
187 /** Initialize FTS parallel sort structures.
188  @return true if all successful */
189 ibool row_fts_psort_info_init(
190     trx_t *trx,           /*!< in: transaction */
191     row_merge_dup_t *dup, /*!< in,own: descriptor of
192                           FTS index being created */
193     const dict_table_t *old_table,
194     /*!< in: Needed to fetch LOB from old
195     table */
196     const dict_table_t *new_table, /*!< in: table where indexes are
197                                  created */
198     ibool opt_doc_id_size,
199     /*!< in: whether to use 4 bytes
200     instead of 8 bytes integer to
201     store Doc ID during sort */
202     fts_psort_t **psort,  /*!< out: parallel sort info to be
203                           instantiated */
204     fts_psort_t **merge); /*!< out: parallel merge info
205                           to be instantiated */
206 /** Clean up and deallocate FTS parallel sort structures, and close
207  temparary merge sort files */
208 void row_fts_psort_info_destroy(
209     fts_psort_t *psort_info,  /*!< parallel sort info */
210     fts_psort_t *merge_info); /*!< parallel merge info */
211 /** Free up merge buffers when merge sort is done */
212 void row_fts_free_pll_merge_buf(
213     fts_psort_t *psort_info); /*!< in: parallel sort info */
214 
215 /** Start the parallel tokenization and parallel merge sort */
216 void row_fts_start_psort(
217     fts_psort_t *psort_info); /*!< in: parallel sort info */
218 /** Kick off the parallel merge and insert thread */
219 void row_fts_start_parallel_merge(
220     fts_psort_t *merge_info); /*!< in: parallel sort info */
221 /** Propagate a newly added record up one level in the selection tree
222  @return parent where this value propagated to */
223 int row_merge_fts_sel_propagate(
224     int propogated,      /*!< [in] tree node propagated */
225     int *sel_tree,       /*!< [in] selection tree */
226     ulint level,         /*!< [in] selection tree level */
227     const mrec_t **mrec, /*!< [in] sort record */
228     ulint **offsets,     /*!< [in] record offsets */
229     dict_index_t *index  /*!< [in] FTS index */
230 );
231 
232 /** Read sorted file containing index data tuples and insert these data
233 tuples to the index
234 @param[in]	index		index
235 @param[in]	table		new table
236 @param[in]	psort_info	parallel sort info
237 @param[in]	id		which auxiliary table's data to insert to
238 @return DB_SUCCESS or error number */
239 dberr_t row_fts_merge_insert(dict_index_t *index, dict_table_t *table,
240                              fts_psort_t *psort_info, ulint id);
241 #endif /* row0ftsort_h */
242