1 /***************************************************************************** 2 3 Copyright (c) 2010, 2018, Oracle and/or its affiliates. All Rights Reserved. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License, version 2.0, 7 as published by the Free Software Foundation. 8 9 This program is also distributed with certain software (including 10 but not limited to OpenSSL) that is licensed under separate terms, 11 as designated in a particular file or component or in included license 12 documentation. The authors of MySQL hereby grant you an additional 13 permission to link the program and your derivative works with the 14 separately licensed software that they have included with MySQL. 15 16 This program is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 GNU General Public License, version 2.0, for more details. 20 21 You should have received a copy of the GNU General Public License along with 22 this program; if not, write to the Free Software Foundation, Inc., 23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA 24 25 *****************************************************************************/ 26 27 /**************************************************//** 28 @file include/row0ftsort.h 29 Create Full Text Index with (parallel) merge sort 30 31 Created 10/13/2010 Jimmy Yang 32 *******************************************************/ 33 34 #ifndef row0ftsort_h 35 #define row0ftsort_h 36 37 #include "univ.i" 38 #include "data0data.h" 39 #include "dict0types.h" 40 #include "row0mysql.h" 41 #include "fts0fts.h" 42 #include "fts0types.h" 43 #include "fts0priv.h" 44 #include "row0merge.h" 45 #include "btr0bulk.h" 46 47 /** This structure defineds information the scan thread will fetch 48 and put to the linked list for parallel tokenization/sort threads 49 to process */ 50 typedef struct fts_doc_item fts_doc_item_t; 51 52 /** Information about temporary files used in merge sort */ 53 struct fts_doc_item { 54 dfield_t* field; /*!< field contains document string */ 55 doc_id_t doc_id; /*!< document ID */ 56 UT_LIST_NODE_T(fts_doc_item_t) doc_list; 57 /*!< list of doc items */ 58 }; 59 60 /** This defines the list type that scan thread would feed the parallel 61 tokenization threads and sort threads. */ 62 typedef UT_LIST_BASE_NODE_T(fts_doc_item_t) fts_doc_list_t; 63 64 #define FTS_PLL_MERGE 1 65 66 /** Sort information passed to each individual parallel sort thread */ 67 struct fts_psort_t; 68 69 /** Common info passed to each parallel sort thread */ 70 struct fts_psort_common_t { 71 row_merge_dup_t* dup; /*!< descriptor of FTS index */ 72 dict_table_t* new_table; /*!< source table */ 73 trx_t* trx; /*!< transaction */ 74 fts_psort_t* all_info; /*!< all parallel sort info */ 75 os_event_t sort_event; /*!< sort event */ 76 os_event_t merge_event; /*!< merge event */ 77 ibool opt_doc_id_size;/*!< whether to use 4 bytes 78 instead of 8 bytes integer to 79 store Doc ID during sort, if 80 Doc ID will not be big enough 81 to use 8 bytes value */ 82 }; 83 84 struct fts_psort_t { 85 ulint psort_id; /*!< Parallel sort ID */ 86 row_merge_buf_t* merge_buf[FTS_NUM_AUX_INDEX]; 87 /*!< sort buffer */ 88 merge_file_t* merge_file[FTS_NUM_AUX_INDEX]; 89 /*!< sort file */ 90 row_merge_block_t* merge_block[FTS_NUM_AUX_INDEX]; 91 /*!< buffer to write to file */ 92 row_merge_block_t* block_alloc[FTS_NUM_AUX_INDEX]; 93 /*!< buffer to allocated */ 94 ulint child_status; /*!< child thread status */ 95 ulint state; /*!< parent thread state */ 96 fts_doc_list_t fts_doc_list; /*!< doc list to process */ 97 fts_psort_common_t* psort_common; /*!< ptr to all psort info */ 98 os_thread_id_t thread_hdl; /*!< thread handle */ 99 dberr_t error; /*!< db error during psort */ 100 ulint memory_used; /*!< memory used by fts_doc_list */ 101 ib_mutex_t mutex; /*!< mutex for fts_doc_list */ 102 }; 103 104 /** Row fts token for plugin parser */ 105 struct row_fts_token_t { 106 fts_string_t* text; /*!< token */ 107 ulint position; /*!< token position in the document */ 108 UT_LIST_NODE_T(row_fts_token_t) 109 token_list; /*!< next token link */ 110 }; 111 112 typedef UT_LIST_BASE_NODE_T(row_fts_token_t) fts_token_list_t; 113 114 /** Structure stores information from string tokenization operation */ 115 struct fts_tokenize_ctx { fts_tokenize_ctxfts_tokenize_ctx116 fts_tokenize_ctx() { memset(this, 0, sizeof(*this)); } 117 118 ulint processed_len; /*!< processed string length */ 119 ulint init_pos; /*!< doc start position */ 120 ulint buf_used; /*!< the sort buffer (ID) when 121 tokenization stops, which 122 could due to sort buffer full */ 123 ulint rows_added[FTS_NUM_AUX_INDEX]; 124 /*!< number of rows added for 125 each FTS index partition */ 126 ib_rbt_t* cached_stopword;/*!< in: stopword list */ 127 dfield_t sort_field[FTS_NUM_FIELDS_SORT]; 128 /*!< in: sort field */ 129 fts_token_list_t fts_token_list; 130 }; 131 132 typedef struct fts_tokenize_ctx fts_tokenize_ctx_t; 133 134 /** Structure stores information needed for the insertion phase of FTS 135 parallel sort. */ 136 struct fts_psort_insert { 137 CHARSET_INFO* charset; /*!< charset info */ 138 mem_heap_t* heap; /*!< heap */ 139 ibool opt_doc_id_size;/*!< Whether to use smaller (4 bytes) 140 integer for Doc ID */ 141 BtrBulk* btr_bulk; /*!< Bulk load instance */ 142 dtuple_t* tuple; /*!< Tuple to insert */ 143 144 #ifdef UNIV_DEBUG 145 ulint aux_index_id; /*!< Auxiliary index id */ 146 #endif 147 }; 148 149 typedef struct fts_psort_insert fts_psort_insert_t; 150 151 152 /** status bit used for communication between parent and child thread */ 153 #define FTS_PARENT_COMPLETE 1 154 #define FTS_PARENT_EXITING 2 155 #define FTS_CHILD_COMPLETE 1 156 #define FTS_CHILD_EXITING 2 157 158 /** Print some debug information */ 159 #define FTSORT_PRINT 160 161 #ifdef FTSORT_PRINT 162 #define DEBUG_FTS_SORT_PRINT(str) \ 163 do { \ 164 ut_print_timestamp(stderr); \ 165 fprintf(stderr, str); \ 166 } while (0) 167 #else 168 #define DEBUG_FTS_SORT_PRINT(str) 169 #endif /* FTSORT_PRINT */ 170 171 /*************************************************************//** 172 Create a temporary "fts sort index" used to merge sort the 173 tokenized doc string. The index has three "fields": 174 175 1) Tokenized word, 176 2) Doc ID 177 3) Word's position in original 'doc'. 178 179 @return dict_index_t structure for the fts sort index */ 180 dict_index_t* 181 row_merge_create_fts_sort_index( 182 /*============================*/ 183 dict_index_t* index, /*!< in: Original FTS index 184 based on which this sort index 185 is created */ 186 const dict_table_t* table, /*!< in: table that FTS index 187 is being created on */ 188 ibool* opt_doc_id_size); 189 /*!< out: whether to use 4 bytes 190 instead of 8 bytes integer to 191 store Doc ID during sort */ 192 193 /********************************************************************//** 194 Initialize FTS parallel sort structures. 195 @return TRUE if all successful */ 196 ibool 197 row_fts_psort_info_init( 198 /*====================*/ 199 trx_t* trx, /*!< in: transaction */ 200 row_merge_dup_t* dup, /*!< in,own: descriptor of 201 FTS index being created */ 202 const dict_table_t* new_table,/*!< in: table where indexes are 203 created */ 204 ibool opt_doc_id_size, 205 /*!< in: whether to use 4 bytes 206 instead of 8 bytes integer to 207 store Doc ID during sort */ 208 fts_psort_t** psort, /*!< out: parallel sort info to be 209 instantiated */ 210 fts_psort_t** merge) /*!< out: parallel merge info 211 to be instantiated */ 212 MY_ATTRIBUTE((nonnull)); 213 /********************************************************************//** 214 Clean up and deallocate FTS parallel sort structures, and close 215 temparary merge sort files */ 216 void 217 row_fts_psort_info_destroy( 218 /*=======================*/ 219 fts_psort_t* psort_info, /*!< parallel sort info */ 220 fts_psort_t* merge_info); /*!< parallel merge info */ 221 /********************************************************************//** 222 Free up merge buffers when merge sort is done */ 223 void 224 row_fts_free_pll_merge_buf( 225 /*=======================*/ 226 fts_psort_t* psort_info); /*!< in: parallel sort info */ 227 228 /*********************************************************************//** 229 Function performs parallel tokenization of the incoming doc strings. 230 @return OS_THREAD_DUMMY_RETURN */ 231 os_thread_ret_t 232 fts_parallel_tokenization( 233 /*======================*/ 234 void* arg); /*!< in: psort_info for the thread */ 235 /*********************************************************************//** 236 Start the parallel tokenization and parallel merge sort */ 237 void 238 row_fts_start_psort( 239 /*================*/ 240 fts_psort_t* psort_info); /*!< in: parallel sort info */ 241 /*********************************************************************//** 242 Function performs the merge and insertion of the sorted records. 243 @return OS_THREAD_DUMMY_RETURN */ 244 os_thread_ret_t 245 fts_parallel_merge( 246 /*===============*/ 247 void* arg); /*!< in: parallel merge info */ 248 /*********************************************************************//** 249 Kick off the parallel merge and insert thread */ 250 void 251 row_fts_start_parallel_merge( 252 /*=========================*/ 253 fts_psort_t* merge_info); /*!< in: parallel sort info */ 254 /********************************************************************//** 255 Read sorted FTS data files and insert data tuples to auxillary tables. 256 @return DB_SUCCESS or error number */ 257 void 258 row_fts_insert_tuple( 259 /*=================*/ 260 fts_psort_insert_t* 261 ins_ctx, /*!< in: insert context */ 262 fts_tokenizer_word_t* word, /*!< in: last processed 263 tokenized word */ 264 ib_vector_t* positions, /*!< in: word position */ 265 doc_id_t* in_doc_id, /*!< in: last item doc id */ 266 dtuple_t* dtuple); /*!< in: entry to insert */ 267 /********************************************************************//** 268 Propagate a newly added record up one level in the selection tree 269 @return parent where this value propagated to */ 270 int 271 row_merge_fts_sel_propagate( 272 /*========================*/ 273 int propogated, /*<! in: tree node propagated */ 274 int* sel_tree, /*<! in: selection tree */ 275 ulint level, /*<! in: selection tree level */ 276 const mrec_t** mrec, /*<! in: sort record */ 277 ulint** offsets, /*<! in: record offsets */ 278 dict_index_t* index); /*<! in: FTS index */ 279 /********************************************************************//** 280 Read sorted file containing index data tuples and insert these data 281 tuples to the index 282 @return DB_SUCCESS or error number */ 283 dberr_t 284 row_fts_merge_insert( 285 /*=================*/ 286 dict_index_t* index, /*!< in: index */ 287 dict_table_t* table, /*!< in: new table */ 288 fts_psort_t* psort_info, /*!< parallel sort info */ 289 ulint id); /* !< in: which auxiliary table's data 290 to insert to */ 291 #endif /* row0ftsort_h */ 292