1 /***************************************************************************** 2 3 Copyright (c) 2010, 2016, Oracle and/or its affiliates. All Rights Reserved. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License, version 2.0, 7 as published by the Free Software Foundation. 8 9 This program is also distributed with certain software (including 10 but not limited to OpenSSL) that is licensed under separate terms, 11 as designated in a particular file or component or in included license 12 documentation. The authors of MySQL hereby grant you an additional 13 permission to link the program and your derivative works with the 14 separately licensed software that they have included with MySQL. 15 16 This program is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 GNU General Public License, version 2.0, for more details. 20 21 You should have received a copy of the GNU General Public License along with 22 this program; if not, write to the Free Software Foundation, Inc., 23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA 24 25 *****************************************************************************/ 26 27 /**************************************************//** 28 @file include/row0ftsort.h 29 Create Full Text Index with (parallel) merge sort 30 31 Created 10/13/2010 Jimmy Yang 32 *******************************************************/ 33 34 #ifndef row0ftsort_h 35 #define row0ftsort_h 36 37 #include "univ.i" 38 #include "data0data.h" 39 #include "dict0types.h" 40 #include "row0mysql.h" 41 #include "fts0fts.h" 42 #include "fts0types.h" 43 #include "fts0priv.h" 44 #include "row0merge.h" 45 46 /** This structure defineds information the scan thread will fetch 47 and put to the linked list for parallel tokenization/sort threads 48 to process */ 49 typedef struct fts_doc_item fts_doc_item_t; 50 51 /** Information about temporary files used in merge sort */ 52 struct fts_doc_item { 53 dfield_t* field; /*!< field contains document string */ 54 doc_id_t doc_id; /*!< document ID */ 55 UT_LIST_NODE_T(fts_doc_item_t) doc_list; 56 /*!< list of doc items */ 57 }; 58 59 /** This defines the list type that scan thread would feed the parallel 60 tokenization threads and sort threads. */ 61 typedef UT_LIST_BASE_NODE_T(fts_doc_item_t) fts_doc_list_t; 62 63 #define FTS_NUM_AUX_INDEX 6 64 #define FTS_PLL_MERGE 1 65 66 /** Sort information passed to each individual parallel sort thread */ 67 struct fts_psort_t; 68 69 /** Common info passed to each parallel sort thread */ 70 struct fts_psort_common_t { 71 row_merge_dup_t* dup; /*!< descriptor of FTS index */ 72 dict_table_t* new_table; /*!< source table */ 73 trx_t* trx; /*!< transaction */ 74 fts_psort_t* all_info; /*!< all parallel sort info */ 75 os_event_t sort_event; /*!< sort event */ 76 os_event_t merge_event; /*!< merge event */ 77 ibool opt_doc_id_size;/*!< whether to use 4 bytes 78 instead of 8 bytes integer to 79 store Doc ID during sort, if 80 Doc ID will not be big enough 81 to use 8 bytes value */ 82 }; 83 84 struct fts_psort_t { 85 ulint psort_id; /*!< Parallel sort ID */ 86 row_merge_buf_t* merge_buf[FTS_NUM_AUX_INDEX]; 87 /*!< sort buffer */ 88 merge_file_t* merge_file[FTS_NUM_AUX_INDEX]; 89 /*!< sort file */ 90 row_merge_block_t* merge_block[FTS_NUM_AUX_INDEX]; 91 /*!< buffer to write to file */ 92 row_merge_block_t* block_alloc[FTS_NUM_AUX_INDEX]; 93 /*!< buffer to allocated */ 94 ulint child_status; /*!< child thread status */ 95 ulint state; /*!< parent thread state */ 96 fts_doc_list_t fts_doc_list; /*!< doc list to process */ 97 fts_psort_common_t* psort_common; /*!< ptr to all psort info */ 98 os_thread_t thread_hdl; /*!< thread handler */ 99 dberr_t error; /*!< db error during psort */ 100 ulint memory_used; /*!< memory used by fts_doc_list */ 101 ib_mutex_t mutex; /*!< mutex for fts_doc_list */ 102 }; 103 104 /** Structure stores information from string tokenization operation */ 105 struct fts_tokenize_ctx { 106 ulint processed_len; /*!< processed string length */ 107 ulint init_pos; /*!< doc start position */ 108 ulint buf_used; /*!< the sort buffer (ID) when 109 tokenization stops, which 110 could due to sort buffer full */ 111 ulint rows_added[FTS_NUM_AUX_INDEX]; 112 /*!< number of rows added for 113 each FTS index partition */ 114 ib_rbt_t* cached_stopword;/*!< in: stopword list */ 115 dfield_t sort_field[FTS_NUM_FIELDS_SORT]; 116 /*!< in: sort field */ 117 }; 118 119 typedef struct fts_tokenize_ctx fts_tokenize_ctx_t; 120 121 /** Structure stores information needed for the insertion phase of FTS 122 parallel sort. */ 123 struct fts_psort_insert { 124 trx_t* trx; /*!< Transaction used for insertion */ 125 que_t** ins_graph; /*!< insert graph */ 126 fts_table_t fts_table; /*!< auxiliary table */ 127 CHARSET_INFO* charset; /*!< charset info */ 128 mem_heap_t* heap; /*!< heap */ 129 ibool opt_doc_id_size;/*!< Whether to use smaller (4 bytes) 130 integer for Doc ID */ 131 }; 132 133 typedef struct fts_psort_insert fts_psort_insert_t; 134 135 136 /** status bit used for communication between parent and child thread */ 137 #define FTS_PARENT_COMPLETE 1 138 #define FTS_PARENT_EXITING 2 139 #define FTS_CHILD_COMPLETE 1 140 #define FTS_CHILD_EXITING 2 141 142 /** Print some debug information */ 143 #define FTSORT_PRINT 144 145 #ifdef FTSORT_PRINT 146 #define DEBUG_FTS_SORT_PRINT(str) \ 147 do { \ 148 ut_print_timestamp(stderr); \ 149 fprintf(stderr, str); \ 150 } while (0) 151 #else 152 #define DEBUG_FTS_SORT_PRINT(str) 153 #endif /* FTSORT_PRINT */ 154 155 /*************************************************************//** 156 Create a temporary "fts sort index" used to merge sort the 157 tokenized doc string. The index has three "fields": 158 159 1) Tokenized word, 160 2) Doc ID 161 3) Word's position in original 'doc'. 162 163 @return dict_index_t structure for the fts sort index */ 164 UNIV_INTERN 165 dict_index_t* 166 row_merge_create_fts_sort_index( 167 /*============================*/ 168 dict_index_t* index, /*!< in: Original FTS index 169 based on which this sort index 170 is created */ 171 const dict_table_t* table, /*!< in: table that FTS index 172 is being created on */ 173 ibool* opt_doc_id_size); 174 /*!< out: whether to use 4 bytes 175 instead of 8 bytes integer to 176 store Doc ID during sort */ 177 178 /********************************************************************//** 179 Initialize FTS parallel sort structures. 180 @return TRUE if all successful */ 181 UNIV_INTERN 182 ibool 183 row_fts_psort_info_init( 184 /*====================*/ 185 trx_t* trx, /*!< in: transaction */ 186 row_merge_dup_t* dup, /*!< in,own: descriptor of 187 FTS index being created */ 188 const dict_table_t* new_table,/*!< in: table where indexes are 189 created */ 190 ibool opt_doc_id_size, 191 /*!< in: whether to use 4 bytes 192 instead of 8 bytes integer to 193 store Doc ID during sort */ 194 fts_psort_t** psort, /*!< out: parallel sort info to be 195 instantiated */ 196 fts_psort_t** merge) /*!< out: parallel merge info 197 to be instantiated */ 198 MY_ATTRIBUTE((nonnull)); 199 /********************************************************************//** 200 Clean up and deallocate FTS parallel sort structures, and close 201 temparary merge sort files */ 202 UNIV_INTERN 203 void 204 row_fts_psort_info_destroy( 205 /*=======================*/ 206 fts_psort_t* psort_info, /*!< parallel sort info */ 207 fts_psort_t* merge_info); /*!< parallel merge info */ 208 /********************************************************************//** 209 Free up merge buffers when merge sort is done */ 210 UNIV_INTERN 211 void 212 row_fts_free_pll_merge_buf( 213 /*=======================*/ 214 fts_psort_t* psort_info); /*!< in: parallel sort info */ 215 216 /*********************************************************************//** 217 Function performs parallel tokenization of the incoming doc strings. 218 @return OS_THREAD_DUMMY_RETURN */ 219 UNIV_INTERN 220 os_thread_ret_t 221 fts_parallel_tokenization( 222 /*======================*/ 223 void* arg); /*!< in: psort_info for the thread */ 224 /*********************************************************************//** 225 Start the parallel tokenization and parallel merge sort */ 226 UNIV_INTERN 227 void 228 row_fts_start_psort( 229 /*================*/ 230 fts_psort_t* psort_info); /*!< in: parallel sort info */ 231 /*********************************************************************//** 232 Function performs the merge and insertion of the sorted records. 233 @return OS_THREAD_DUMMY_RETURN */ 234 UNIV_INTERN 235 os_thread_ret_t 236 fts_parallel_merge( 237 /*===============*/ 238 void* arg); /*!< in: parallel merge info */ 239 /*********************************************************************//** 240 Kick off the parallel merge and insert thread */ 241 UNIV_INTERN 242 void 243 row_fts_start_parallel_merge( 244 /*=========================*/ 245 fts_psort_t* merge_info); /*!< in: parallel sort info */ 246 /********************************************************************//** 247 Read sorted FTS data files and insert data tuples to auxillary tables. 248 @return DB_SUCCESS or error number */ 249 UNIV_INTERN 250 void 251 row_fts_insert_tuple( 252 /*=================*/ 253 fts_psort_insert_t* 254 ins_ctx, /*!< in: insert context */ 255 fts_tokenizer_word_t* word, /*!< in: last processed 256 tokenized word */ 257 ib_vector_t* positions, /*!< in: word position */ 258 doc_id_t* in_doc_id, /*!< in: last item doc id */ 259 dtuple_t* dtuple); /*!< in: entry to insert */ 260 /********************************************************************//** 261 Propagate a newly added record up one level in the selection tree 262 @return parent where this value propagated to */ 263 UNIV_INTERN 264 int 265 row_merge_fts_sel_propagate( 266 /*========================*/ 267 int propogated, /*<! in: tree node propagated */ 268 int* sel_tree, /*<! in: selection tree */ 269 ulint level, /*<! in: selection tree level */ 270 const mrec_t** mrec, /*<! in: sort record */ 271 ulint** offsets, /*<! in: record offsets */ 272 dict_index_t* index); /*<! in: FTS index */ 273 /********************************************************************//** 274 Read sorted file containing index data tuples and insert these data 275 tuples to the index 276 @return DB_SUCCESS or error number */ 277 UNIV_INTERN 278 dberr_t 279 row_fts_merge_insert( 280 /*=================*/ 281 dict_index_t* index, /*!< in: index */ 282 dict_table_t* table, /*!< in: new table */ 283 fts_psort_t* psort_info, /*!< parallel sort info */ 284 ulint id) /* !< in: which auxiliary table's data 285 to insert to */ 286 MY_ATTRIBUTE((nonnull)); 287 #endif /* row0ftsort_h */ 288