1 /***************************************************************************** 2 3 Copyright (c) 2005, 2021, Oracle and/or its affiliates. 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License, version 2.0, 7 as published by the Free Software Foundation. 8 9 This program is also distributed with certain software (including 10 but not limited to OpenSSL) that is licensed under separate terms, 11 as designated in a particular file or component or in included license 12 documentation. The authors of MySQL hereby grant you an additional 13 permission to link the program and your derivative works with the 14 separately licensed software that they have included with MySQL. 15 16 This program is distributed in the hope that it will be useful, 17 but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 GNU General Public License, version 2.0, for more details. 20 21 You should have received a copy of the GNU General Public License along with 22 this program; if not, write to the Free Software Foundation, Inc., 23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA 24 25 *****************************************************************************/ 26 27 /**************************************************//** 28 @file include/row0merge.h 29 Index build routines using a merge sort 30 31 Created 13/06/2005 Jan Lindstrom 32 *******************************************************/ 33 34 #ifndef row0merge_h 35 #define row0merge_h 36 37 #include "univ.i" 38 #include "data0data.h" 39 #include "dict0types.h" 40 #include "trx0types.h" 41 #include "que0types.h" 42 #include "mtr0mtr.h" 43 #include "rem0types.h" 44 #include "rem0rec.h" 45 #include "btr0types.h" 46 #include "row0mysql.h" 47 #include "lock0types.h" 48 #include "srv0srv.h" 49 #include "ut0stage.h" 50 51 // Forward declaration 52 struct ib_sequence_t; 53 54 /** @brief Block size for I/O operations in merge sort. 55 56 The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty() 57 rounded to a power of 2. 58 59 When not creating a PRIMARY KEY that contains column prefixes, this 60 can be set as small as UNIV_PAGE_SIZE / 2. */ 61 typedef byte row_merge_block_t; 62 63 /** @brief Secondary buffer for I/O operations of merge records. 64 65 This buffer is used for writing or reading a record that spans two 66 row_merge_block_t. Thus, it must be able to hold one merge record, 67 whose maximum size is the same as the minimum size of 68 row_merge_block_t. */ 69 typedef byte mrec_buf_t[UNIV_PAGE_SIZE_MAX]; 70 71 /** @brief Merge record in row_merge_block_t. 72 73 The format is the same as a record in ROW_FORMAT=COMPACT with the 74 exception that the REC_N_NEW_EXTRA_BYTES are omitted. */ 75 typedef byte mrec_t; 76 77 /** Merge record in row_merge_buf_t */ 78 struct mtuple_t { 79 dfield_t* fields; /*!< data fields */ 80 }; 81 82 /** Buffer for sorting in main memory. */ 83 struct row_merge_buf_t { 84 mem_heap_t* heap; /*!< memory heap where allocated */ 85 dict_index_t* index; /*!< the index the tuples belong to */ 86 ulint total_size; /*!< total amount of data bytes */ 87 ulint n_tuples; /*!< number of data tuples */ 88 ulint max_tuples; /*!< maximum number of data tuples */ 89 mtuple_t* tuples; /*!< array of data tuples */ 90 mtuple_t* tmp_tuples; /*!< temporary copy of tuples, 91 for sorting */ 92 }; 93 94 /** Information about temporary files used in merge sort */ 95 struct merge_file_t { 96 int fd; /*!< file descriptor */ 97 ulint offset; /*!< file offset (end of file) */ 98 ib_uint64_t n_rec; /*!< number of records in the file */ 99 }; 100 101 /** Index field definition */ 102 struct index_field_t { 103 ulint col_no; /*!< column offset */ 104 ulint prefix_len; /*!< column prefix length, or 0 105 if indexing the whole column */ 106 bool is_v_col; /*!< whether this is a virtual column */ 107 }; 108 109 /** Definition of an index being created */ 110 struct index_def_t { 111 const char* name; /*!< index name */ 112 bool rebuild; /*!< whether the table is rebuilt */ 113 ulint ind_type; /*!< 0, DICT_UNIQUE, 114 or DICT_CLUSTERED */ 115 ulint key_number; /*!< MySQL key number, 116 or ULINT_UNDEFINED if none */ 117 ulint n_fields; /*!< number of fields in index */ 118 index_field_t* fields; /*!< field definitions */ 119 st_mysql_ftparser* 120 parser; /*!< fulltext parser plugin */ 121 bool is_ngram; /*!< true if it's ngram parser */ 122 }; 123 124 /** Structure for reporting duplicate records. */ 125 struct row_merge_dup_t { 126 dict_index_t* index; /*!< index being sorted */ 127 struct TABLE* table; /*!< MySQL table object */ 128 const ulint* col_map;/*!< mapping of column numbers 129 in table to the rebuilt table 130 (index->table), or NULL if not 131 rebuilding table */ 132 ulint n_dup; /*!< number of duplicates */ 133 }; 134 135 /*************************************************************//** 136 Report a duplicate key. */ 137 void 138 row_merge_dup_report( 139 /*=================*/ 140 row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */ 141 const dfield_t* entry); /*!< in: duplicate index entry */ 142 /*********************************************************************//** 143 Sets an exclusive lock on a table, for the duration of creating indexes. 144 @return error code or DB_SUCCESS */ 145 dberr_t 146 row_merge_lock_table( 147 /*=================*/ 148 trx_t* trx, /*!< in/out: transaction */ 149 dict_table_t* table, /*!< in: table to lock */ 150 enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */ 151 MY_ATTRIBUTE((warn_unused_result)); 152 /*********************************************************************//** 153 Drop indexes that were created before an error occurred. 154 The data dictionary must have been locked exclusively by the caller, 155 because the transaction will not be committed. */ 156 void 157 row_merge_drop_indexes_dict( 158 /*========================*/ 159 trx_t* trx, /*!< in/out: dictionary transaction */ 160 table_id_t table_id);/*!< in: table identifier */ 161 /*********************************************************************//** 162 Drop those indexes which were created before an error occurred. 163 The data dictionary must have been locked exclusively by the caller, 164 because the transaction will not be committed. */ 165 void 166 row_merge_drop_indexes( 167 /*===================*/ 168 trx_t* trx, /*!< in/out: transaction */ 169 dict_table_t* table, /*!< in/out: table containing the indexes */ 170 ibool locked); /*!< in: TRUE=table locked, 171 FALSE=may need to do a lazy drop */ 172 /*********************************************************************//** 173 Drop all partially created indexes during crash recovery. */ 174 void 175 row_merge_drop_temp_indexes(void); 176 /*=============================*/ 177 178 /** Create temporary merge files in the given paramater path, and if 179 UNIV_PFS_IO defined, register the file descriptor with Performance Schema. 180 @param[in] path location for creating temporary merge files. 181 @return File descriptor */ 182 int 183 row_merge_file_create_low( 184 const char* path) 185 MY_ATTRIBUTE((warn_unused_result)); 186 /*********************************************************************//** 187 Destroy a merge file. And de-register the file from Performance Schema 188 if UNIV_PFS_IO is defined. */ 189 void 190 row_merge_file_destroy_low( 191 /*=======================*/ 192 int fd); /*!< in: merge file descriptor */ 193 194 /*********************************************************************//** 195 Provide a new pathname for a table that is being renamed if it belongs to 196 a file-per-table tablespace. The caller is responsible for freeing the 197 memory allocated for the return value. 198 @return new pathname of tablespace file, or NULL if space = 0 */ 199 char* 200 row_make_new_pathname( 201 /*==================*/ 202 dict_table_t* table, /*!< in: table to be renamed */ 203 const char* new_name); /*!< in: new name */ 204 /*********************************************************************//** 205 Rename the tables in the data dictionary. The data dictionary must 206 have been locked exclusively by the caller, because the transaction 207 will not be committed. 208 @return error code or DB_SUCCESS */ 209 dberr_t 210 row_merge_rename_tables_dict( 211 /*=========================*/ 212 dict_table_t* old_table, /*!< in/out: old table, renamed to 213 tmp_name */ 214 dict_table_t* new_table, /*!< in/out: new table, renamed to 215 old_table->name */ 216 const char* tmp_name, /*!< in: new name for old_table */ 217 trx_t* trx) /*!< in/out: dictionary transaction */ 218 MY_ATTRIBUTE((warn_unused_result)); 219 220 /*********************************************************************//** 221 Rename an index in the dictionary that was created. The data 222 dictionary must have been locked exclusively by the caller, because 223 the transaction will not be committed. 224 @return DB_SUCCESS if all OK */ 225 dberr_t 226 row_merge_rename_index_to_add( 227 /*==========================*/ 228 trx_t* trx, /*!< in/out: transaction */ 229 table_id_t table_id, /*!< in: table identifier */ 230 index_id_t index_id) /*!< in: index identifier */ 231 MY_ATTRIBUTE((warn_unused_result)); 232 /*********************************************************************//** 233 Rename an index in the dictionary that is to be dropped. The data 234 dictionary must have been locked exclusively by the caller, because 235 the transaction will not be committed. 236 @return DB_SUCCESS if all OK */ 237 dberr_t 238 row_merge_rename_index_to_drop( 239 /*===========================*/ 240 trx_t* trx, /*!< in/out: transaction */ 241 table_id_t table_id, /*!< in: table identifier */ 242 index_id_t index_id) /*!< in: index identifier */ 243 MY_ATTRIBUTE((warn_unused_result)); 244 /** Create the index and load in to the dictionary. 245 @param[in,out] trx trx (sets error_state) 246 @param[in,out] table the index is on this table 247 @param[in] index_def the index definition 248 @param[in] add_v new virtual columns added along with add 249 index call 250 @return index, or NULL on error */ 251 dict_index_t* 252 row_merge_create_index( 253 trx_t* trx, 254 dict_table_t* table, 255 const index_def_t* index_def, 256 const dict_add_v_col_t* add_v); 257 /*********************************************************************//** 258 Check if a transaction can use an index. 259 @return TRUE if index can be used by the transaction else FALSE */ 260 ibool 261 row_merge_is_index_usable( 262 /*======================*/ 263 const trx_t* trx, /*!< in: transaction */ 264 const dict_index_t* index); /*!< in: index to check */ 265 /*********************************************************************//** 266 Drop a table. The caller must have ensured that the background stats 267 thread is not processing the table. This can be done by calling 268 dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and 269 before calling this function. 270 @return DB_SUCCESS or error code */ 271 dberr_t 272 row_merge_drop_table( 273 /*=================*/ 274 trx_t* trx, /*!< in: transaction */ 275 dict_table_t* table); /*!< in: table instance to drop */ 276 277 /** Build indexes on a table by reading a clustered index, creating a temporary 278 file containing index entries, merge sorting these index entries and inserting 279 sorted index entries to indexes. 280 @param[in] trx transaction 281 @param[in] old_table table where rows are read from 282 @param[in] new_table table where indexes are created; identical to 283 old_table unless creating a PRIMARY KEY 284 @param[in] online true if creating indexes online 285 @param[in] indexes indexes to be created 286 @param[in] key_numbers MySQL key numbers 287 @param[in] n_indexes size of indexes[] 288 @param[in,out] table MySQL table, for reporting erroneous key value 289 if applicable 290 @param[in] add_cols default values of added columns, or NULL 291 @param[in] col_map mapping of old column numbers to new ones, or 292 NULL if old_table == new_table 293 @param[in] add_autoinc number of added AUTO_INCREMENT columns, or 294 ULINT_UNDEFINED if none is added 295 @param[in,out] sequence autoinc sequence 296 @param[in] skip_pk_sort whether the new PRIMARY KEY will follow 297 existing order 298 @param[in,out] stage performance schema accounting object, used by 299 ALTER TABLE. stage->begin_phase_read_pk() will be called at the beginning of 300 this function and it will be passed to other functions for further accounting. 301 @param[in] add_v new virtual columns added along with indexes 302 @param[in] eval_table mysql table used to evaluate virtual column 303 value, see innobase_get_computed_value(). 304 @param[in] prebuilt compress_heap must be taken from here 305 @return DB_SUCCESS or error code */ 306 dberr_t 307 row_merge_build_indexes( 308 trx_t* trx, 309 dict_table_t* old_table, 310 dict_table_t* new_table, 311 bool online, 312 dict_index_t** indexes, 313 const ulint* key_numbers, 314 ulint n_indexes, 315 struct TABLE* table, 316 const dtuple_t* add_cols, 317 const ulint* col_map, 318 ulint add_autoinc, 319 ib_sequence_t& sequence, 320 bool skip_pk_sort, 321 ut_stage_alter_t* stage, 322 const dict_add_v_col_t* add_v, 323 struct TABLE* eval_table, 324 row_prebuilt_t* prebuilt) 325 MY_ATTRIBUTE((warn_unused_result)); 326 327 /********************************************************************//** 328 Write a buffer to a block. */ 329 void 330 row_merge_buf_write( 331 /*================*/ 332 const row_merge_buf_t* buf, /*!< in: sorted buffer */ 333 const merge_file_t* of, /*!< in: output file */ 334 row_merge_block_t* block); /*!< out: buffer for writing to file */ 335 336 /********************************************************************//** 337 Sort a buffer. */ 338 void 339 row_merge_buf_sort( 340 /*===============*/ 341 row_merge_buf_t* buf, /*!< in/out: sort buffer */ 342 row_merge_dup_t* dup); /*!< in/out: reporter of duplicates 343 (NULL if non-unique index) */ 344 /********************************************************************//** 345 Write a merge block to the file system. 346 @return TRUE if request was successful, FALSE if fail */ 347 ibool 348 row_merge_write( 349 /*============*/ 350 int fd, /*!< in: file descriptor */ 351 ulint offset, /*!< in: offset where to write, 352 in number of row_merge_block_t 353 elements */ 354 void* buf, /*!< in: data */ 355 void* crypt_buf, /*!< in: crypt buf or NULL */ 356 ulint space_id); /*!< in: tablespace id */ 357 /********************************************************************//** 358 Empty a sort buffer. 359 @return sort buffer */ 360 row_merge_buf_t* 361 row_merge_buf_empty( 362 /*================*/ 363 row_merge_buf_t* buf) /*!< in,own: sort buffer */ 364 MY_ATTRIBUTE((warn_unused_result)); 365 366 /** Create a merge file in the given location. 367 @param[out] merge_file merge file structure 368 @param[in] path location for creating temporary file 369 @return file descriptor, or -1 on failure */ 370 int 371 row_merge_file_create( 372 merge_file_t* merge_file, 373 const char* path); 374 375 /** Merge disk files. 376 @param[in] trx transaction 377 @param[in] dup descriptor of index being created 378 @param[in,out] file file containing index entries 379 @param[in,out] block 3 buffers 380 @param[in,out] crypt_block encrypted file buffer 381 @param[in] space_id tablespace id 382 @param[in,out] tmpfd temporary file handle 383 @param[in,out] stage performance schema accounting object, used by 384 ALTER TABLE. If not NULL, stage->begin_phase_sort() will be called initially 385 and then stage->inc() will be called for each record processed. 386 @return DB_SUCCESS or error code */ 387 dberr_t 388 row_merge_sort( 389 trx_t* trx, 390 const row_merge_dup_t* dup, 391 merge_file_t* file, 392 row_merge_block_t* block, 393 row_merge_block_t* crypt_block, 394 ulint space_id, 395 int* tmpfd, 396 ut_stage_alter_t* stage = NULL); 397 398 /*********************************************************************//** 399 Allocate a sort buffer. 400 @return own: sort buffer */ 401 row_merge_buf_t* 402 row_merge_buf_create( 403 /*=================*/ 404 dict_index_t* index) /*!< in: secondary index */ 405 MY_ATTRIBUTE((warn_unused_result, malloc)); 406 /*********************************************************************//** 407 Deallocate a sort buffer. */ 408 void 409 row_merge_buf_free( 410 /*===============*/ 411 row_merge_buf_t* buf); /*!< in,own: sort buffer to be freed */ 412 /*********************************************************************//** 413 Destroy a merge file. */ 414 void 415 row_merge_file_destroy( 416 /*===================*/ 417 merge_file_t* merge_file); /*!< in/out: merge file structure */ 418 /********************************************************************//** 419 Read a merge block from the file system. 420 @return TRUE if request was successful, FALSE if fail */ 421 ibool 422 row_merge_read( 423 /*===========*/ 424 int fd, /*!< in: file descriptor */ 425 ulint offset, /*!< in: offset where to read 426 in number of row_merge_block_t 427 elements */ 428 row_merge_block_t* buf, /*!< out: data */ 429 row_merge_block_t* crypt_buf, /*!< in: crypt buf or NULL */ 430 ulint space_id); /*!< in: tablespace id */ 431 /********************************************************************//** 432 Read a merge record. 433 @return pointer to next record, or NULL on I/O error or end of list */ 434 const byte* 435 row_merge_read_rec( 436 /*===============*/ 437 row_merge_block_t* block, /*!< in/out: file buffer */ 438 row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */ 439 ulint space_id, /*!< in: tablespace id */ 440 mrec_buf_t* buf, /*!< in/out: secondary buffer */ 441 const byte* b, /*!< in: pointer to record */ 442 const dict_index_t* index, /*!< in: index of the record */ 443 int fd, /*!< in: file descriptor */ 444 ulint* foffs, /*!< in/out: file offset */ 445 const mrec_t** mrec, /*!< out: pointer to merge 446 record, or NULL on end of list 447 (non-NULL on I/O error) */ 448 ulint* offsets) /*!< out: offsets of mrec */ 449 MY_ATTRIBUTE((warn_unused_result)); 450 #endif /* row0merge.h */ 451