1 /***************************************************************************** 2 3 Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. 4 Copyright (c) 2015, 2020, MariaDB Corporation. 5 6 This program is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free Software 8 Foundation; version 2 of the License. 9 10 This program is distributed in the hope that it will be useful, but WITHOUT 11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License along with 15 this program; if not, write to the Free Software Foundation, Inc., 16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA 17 18 *****************************************************************************/ 19 20 /**************************************************//** 21 @file include/row0merge.h 22 Index build routines using a merge sort 23 24 Created 13/06/2005 Jan Lindstrom 25 *******************************************************/ 26 27 #ifndef row0merge_h 28 #define row0merge_h 29 30 #include "que0types.h" 31 #include "trx0types.h" 32 #include "mtr0mtr.h" 33 #include "rem0types.h" 34 #include "rem0rec.h" 35 #include "btr0types.h" 36 #include "row0mysql.h" 37 #include "lock0types.h" 38 #include "srv0srv.h" 39 #include "ut0stage.h" 40 41 /* Reserve free space from every block for key_version */ 42 #define ROW_MERGE_RESERVE_SIZE 4 43 44 /* Cluster index read task is mandatory */ 45 #define COST_READ_CLUSTERED_INDEX 1.0 46 47 /* Basic fixed cost to build all type of index */ 48 #define COST_BUILD_INDEX_STATIC 0.5 49 /* Dynamic cost to build all type of index, dynamic cost will be re-distributed based on page count ratio of each index */ 50 #define COST_BUILD_INDEX_DYNAMIC 0.5 51 52 /* Sum of below two must be 1.0 */ 53 #define PCT_COST_MERGESORT_INDEX 0.4 54 #define PCT_COST_INSERT_INDEX 0.6 55 56 // Forward declaration 57 struct ib_sequence_t; 58 59 /** @brief Block size for I/O operations in merge sort. 60 61 The minimum is srv_page_size, or page_get_free_space_of_empty() 62 rounded to a power of 2. 63 64 When not creating a PRIMARY KEY that contains column prefixes, this 65 can be set as small as srv_page_size / 2. */ 66 typedef byte row_merge_block_t; 67 68 /** @brief Secondary buffer for I/O operations of merge records. 69 70 This buffer is used for writing or reading a record that spans two 71 row_merge_block_t. Thus, it must be able to hold one merge record, 72 whose maximum size is the same as the minimum size of 73 row_merge_block_t. */ 74 typedef byte mrec_buf_t[UNIV_PAGE_SIZE_MAX]; 75 76 /** @brief Merge record in row_merge_block_t. 77 78 The format is the same as a record in ROW_FORMAT=COMPACT with the 79 exception that the REC_N_NEW_EXTRA_BYTES are omitted. */ 80 typedef byte mrec_t; 81 82 /** Merge record in row_merge_buf_t */ 83 struct mtuple_t { 84 dfield_t* fields; /*!< data fields */ 85 }; 86 87 /** Buffer for sorting in main memory. */ 88 struct row_merge_buf_t { 89 mem_heap_t* heap; /*!< memory heap where allocated */ 90 dict_index_t* index; /*!< the index the tuples belong to */ 91 ulint total_size; /*!< total amount of data bytes */ 92 ulint n_tuples; /*!< number of data tuples */ 93 ulint max_tuples; /*!< maximum number of data tuples */ 94 mtuple_t* tuples; /*!< array of data tuples */ 95 mtuple_t* tmp_tuples; /*!< temporary copy of tuples, 96 for sorting */ 97 }; 98 99 /** Information about temporary files used in merge sort */ 100 struct merge_file_t { 101 pfs_os_file_t fd; /*!< file descriptor */ 102 ulint offset; /*!< file offset (end of file) */ 103 ib_uint64_t n_rec; /*!< number of records in the file */ 104 }; 105 106 /** Index field definition */ 107 struct index_field_t { 108 ulint col_no; /*!< column offset */ 109 ulint prefix_len; /*!< column prefix length, or 0 110 if indexing the whole column */ 111 bool is_v_col; /*!< whether this is a virtual column */ 112 }; 113 114 /** Definition of an index being created */ 115 struct index_def_t { 116 const char* name; /*!< index name */ 117 bool rebuild; /*!< whether the table is rebuilt */ 118 ulint ind_type; /*!< 0, DICT_UNIQUE, 119 or DICT_CLUSTERED */ 120 ulint key_number; /*!< MySQL key number, 121 or ULINT_UNDEFINED if none */ 122 ulint n_fields; /*!< number of fields in index */ 123 index_field_t* fields; /*!< field definitions */ 124 st_mysql_ftparser* 125 parser; /*!< fulltext parser plugin */ 126 }; 127 128 /** Structure for reporting duplicate records. */ 129 struct row_merge_dup_t { 130 dict_index_t* index; /*!< index being sorted */ 131 struct TABLE* table; /*!< MySQL table object */ 132 const ulint* col_map;/*!< mapping of column numbers 133 in table to the rebuilt table 134 (index->table), or NULL if not 135 rebuilding table */ 136 ulint n_dup; /*!< number of duplicates */ 137 }; 138 139 /*************************************************************//** 140 Report a duplicate key. */ 141 void 142 row_merge_dup_report( 143 /*=================*/ 144 row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */ 145 const dfield_t* entry) /*!< in: duplicate index entry */ 146 MY_ATTRIBUTE((nonnull)); 147 148 /*********************************************************************//** 149 Sets an exclusive lock on a table, for the duration of creating indexes. 150 @return error code or DB_SUCCESS */ 151 dberr_t 152 row_merge_lock_table( 153 /*=================*/ 154 trx_t* trx, /*!< in/out: transaction */ 155 dict_table_t* table, /*!< in: table to lock */ 156 enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */ 157 MY_ATTRIBUTE((nonnull(1,2), warn_unused_result)); 158 159 /*********************************************************************//** 160 Drop indexes that were created before an error occurred. 161 The data dictionary must have been locked exclusively by the caller, 162 because the transaction will not be committed. */ 163 void 164 row_merge_drop_indexes_dict( 165 /*========================*/ 166 trx_t* trx, /*!< in/out: dictionary transaction */ 167 table_id_t table_id)/*!< in: table identifier */ 168 MY_ATTRIBUTE((nonnull)); 169 170 /** Drop indexes that were created before an error occurred. 171 The data dictionary must have been locked exclusively by the caller, 172 because the transaction will not be committed. 173 @param trx dictionary transaction 174 @param table table containing the indexes 175 @param locked True if table is locked, 176 false - may need to do lazy drop 177 @param alter_trx Alter table transaction */ 178 void 179 row_merge_drop_indexes( 180 trx_t* trx, 181 dict_table_t* table, 182 bool locked, 183 const trx_t* alter_trx=NULL); 184 185 /*********************************************************************//** 186 Drop all partially created indexes during crash recovery. */ 187 void 188 row_merge_drop_temp_indexes(void); 189 /*=============================*/ 190 191 /** Create temporary merge files in the given paramater path, and if 192 UNIV_PFS_IO defined, register the file descriptor with Performance Schema. 193 @param[in] path location for creating temporary merge files, or NULL 194 @return File descriptor */ 195 pfs_os_file_t 196 row_merge_file_create_low( 197 const char* path) 198 MY_ATTRIBUTE((warn_unused_result)); 199 /*********************************************************************//** 200 Destroy a merge file. And de-register the file from Performance Schema 201 if UNIV_PFS_IO is defined. */ 202 void 203 row_merge_file_destroy_low( 204 /*=======================*/ 205 const pfs_os_file_t& fd); /*!< in: merge file descriptor */ 206 207 /*********************************************************************//** 208 Rename the tables in the data dictionary. The data dictionary must 209 have been locked exclusively by the caller, because the transaction 210 will not be committed. 211 @return error code or DB_SUCCESS */ 212 dberr_t 213 row_merge_rename_tables_dict( 214 /*=========================*/ 215 dict_table_t* old_table, /*!< in/out: old table, renamed to 216 tmp_name */ 217 dict_table_t* new_table, /*!< in/out: new table, renamed to 218 old_table->name */ 219 const char* tmp_name, /*!< in: new name for old_table */ 220 trx_t* trx) /*!< in/out: dictionary transaction */ 221 MY_ATTRIBUTE((nonnull, warn_unused_result)); 222 223 /*********************************************************************//** 224 Rename an index in the dictionary that was created. The data 225 dictionary must have been locked exclusively by the caller, because 226 the transaction will not be committed. 227 @return DB_SUCCESS if all OK */ 228 dberr_t 229 row_merge_rename_index_to_add( 230 /*==========================*/ 231 trx_t* trx, /*!< in/out: transaction */ 232 table_id_t table_id, /*!< in: table identifier */ 233 index_id_t index_id) /*!< in: index identifier */ 234 MY_ATTRIBUTE((nonnull(1), warn_unused_result)); 235 236 /*********************************************************************//** 237 Rename an index in the dictionary that is to be dropped. The data 238 dictionary must have been locked exclusively by the caller, because 239 the transaction will not be committed. 240 @return DB_SUCCESS if all OK */ 241 dberr_t 242 row_merge_rename_index_to_drop( 243 /*===========================*/ 244 trx_t* trx, /*!< in/out: transaction */ 245 table_id_t table_id, /*!< in: table identifier */ 246 index_id_t index_id) /*!< in: index identifier */ 247 MY_ATTRIBUTE((nonnull(1), warn_unused_result)); 248 249 /** Create the index and load in to the dictionary. 250 @param[in,out] table the index is on this table 251 @param[in] index_def the index definition 252 @param[in] add_v new virtual columns added along with add 253 index call 254 @return index, or NULL on error */ 255 dict_index_t* 256 row_merge_create_index( 257 dict_table_t* table, 258 const index_def_t* index_def, 259 const dict_add_v_col_t* add_v) 260 MY_ATTRIBUTE((warn_unused_result)); 261 262 /*********************************************************************//** 263 Check if a transaction can use an index. 264 @return whether the index can be used by the transaction */ 265 bool 266 row_merge_is_index_usable( 267 /*======================*/ 268 const trx_t* trx, /*!< in: transaction */ 269 const dict_index_t* index) /*!< in: index to check */ 270 MY_ATTRIBUTE((nonnull, warn_unused_result)); 271 272 /*********************************************************************//** 273 Drop a table. The caller must have ensured that the background stats 274 thread is not processing the table. This can be done by calling 275 dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and 276 before calling this function. 277 @return DB_SUCCESS or error code */ 278 dberr_t 279 row_merge_drop_table( 280 /*=================*/ 281 trx_t* trx, /*!< in: transaction */ 282 dict_table_t* table) /*!< in: table instance to drop */ 283 MY_ATTRIBUTE((nonnull, warn_unused_result)); 284 285 /** Write an MLOG_INDEX_LOAD record to indicate in the redo-log 286 that redo-logging of individual index pages was disabled, and 287 the flushing of such pages to the data files was completed. 288 @param[in] index an index tree on which redo logging was disabled */ 289 void row_merge_write_redo(const dict_index_t* index); 290 291 /** Build indexes on a table by reading a clustered index, creating a temporary 292 file containing index entries, merge sorting these index entries and inserting 293 sorted index entries to indexes. 294 @param[in] trx transaction 295 @param[in] old_table table where rows are read from 296 @param[in] new_table table where indexes are created; identical to 297 old_table unless creating a PRIMARY KEY 298 @param[in] online true if creating indexes online 299 @param[in] indexes indexes to be created 300 @param[in] key_numbers MySQL key numbers 301 @param[in] n_indexes size of indexes[] 302 @param[in,out] table MySQL table, for reporting erroneous key value 303 if applicable 304 @param[in] defaults default values of added, changed columns, or NULL 305 @param[in] col_map mapping of old column numbers to new ones, or 306 NULL if old_table == new_table 307 @param[in] add_autoinc number of added AUTO_INCREMENT columns, or 308 ULINT_UNDEFINED if none is added 309 @param[in,out] sequence autoinc sequence 310 @param[in] skip_pk_sort whether the new PRIMARY KEY will follow 311 existing order 312 @param[in,out] stage performance schema accounting object, used by 313 ALTER TABLE. stage->begin_phase_read_pk() will be called at the beginning of 314 this function and it will be passed to other functions for further accounting. 315 @param[in] add_v new virtual columns added along with indexes 316 @param[in] eval_table mysql table used to evaluate virtual column 317 value, see innobase_get_computed_value(). 318 @param[in] allow_non_null allow the conversion from null to not-null 319 @return DB_SUCCESS or error code */ 320 dberr_t 321 row_merge_build_indexes( 322 trx_t* trx, 323 dict_table_t* old_table, 324 dict_table_t* new_table, 325 bool online, 326 dict_index_t** indexes, 327 const ulint* key_numbers, 328 ulint n_indexes, 329 struct TABLE* table, 330 const dtuple_t* defaults, 331 const ulint* col_map, 332 ulint add_autoinc, 333 ib_sequence_t& sequence, 334 bool skip_pk_sort, 335 ut_stage_alter_t* stage, 336 const dict_add_v_col_t* add_v, 337 struct TABLE* eval_table, 338 bool allow_non_null) 339 MY_ATTRIBUTE((warn_unused_result)); 340 341 /********************************************************************//** 342 Write a buffer to a block. */ 343 void 344 row_merge_buf_write( 345 /*================*/ 346 const row_merge_buf_t* buf, /*!< in: sorted buffer */ 347 const merge_file_t* of, /*!< in: output file */ 348 row_merge_block_t* block) /*!< out: buffer for writing to file */ 349 MY_ATTRIBUTE((nonnull)); 350 351 /********************************************************************//** 352 Sort a buffer. */ 353 void 354 row_merge_buf_sort( 355 /*===============*/ 356 row_merge_buf_t* buf, /*!< in/out: sort buffer */ 357 row_merge_dup_t* dup) /*!< in/out: reporter of duplicates 358 (NULL if non-unique index) */ 359 MY_ATTRIBUTE((nonnull(1))); 360 361 /********************************************************************//** 362 Write a merge block to the file system. 363 @return whether the request was completed successfully 364 @retval false on error 365 @retval true on success */ 366 UNIV_INTERN 367 bool 368 row_merge_write( 369 /*============*/ 370 const pfs_os_file_t& fd, /*!< in: file descriptor */ 371 ulint offset, /*!< in: offset where to write, 372 in number of row_merge_block_t elements */ 373 const void* buf, /*!< in: data */ 374 void* crypt_buf, /*!< in: crypt buf or NULL */ 375 ulint space) /*!< in: space id */ 376 MY_ATTRIBUTE((warn_unused_result)); 377 378 /********************************************************************//** 379 Empty a sort buffer. 380 @return sort buffer */ 381 row_merge_buf_t* 382 row_merge_buf_empty( 383 /*================*/ 384 row_merge_buf_t* buf) /*!< in,own: sort buffer */ 385 MY_ATTRIBUTE((warn_unused_result, nonnull)); 386 387 /** Create a merge file in the given location. 388 @param[out] merge_file merge file structure 389 @param[in] path location for creating temporary file, or NULL 390 @return file descriptor, or -1 on failure */ 391 pfs_os_file_t 392 row_merge_file_create( 393 merge_file_t* merge_file, 394 const char* path) 395 MY_ATTRIBUTE((warn_unused_result, nonnull(1))); 396 397 /** Merge disk files. 398 @param[in] trx transaction 399 @param[in] dup descriptor of index being created 400 @param[in,out] file file containing index entries 401 @param[in,out] block 3 buffers 402 @param[in,out] tmpfd temporary file handle 403 @param[in] update_progress true, if we should update progress status 404 @param[in] pct_progress total progress percent until now 405 @param[in] pct_ocst current progress percent 406 @param[in] crypt_block crypt buf or NULL 407 @param[in] space space_id 408 @param[in,out] stage performance schema accounting object, used by 409 ALTER TABLE. If not NULL, stage->begin_phase_sort() will be called initially 410 and then stage->inc() will be called for each record processed. 411 @return DB_SUCCESS or error code */ 412 dberr_t 413 row_merge_sort( 414 /*===========*/ 415 trx_t* trx, 416 const row_merge_dup_t* dup, 417 merge_file_t* file, 418 row_merge_block_t* block, 419 pfs_os_file_t* tmpfd, 420 const bool update_progress, 421 const double pct_progress, 422 const double pct_cost, 423 row_merge_block_t* crypt_block, 424 ulint space, 425 ut_stage_alter_t* stage = NULL) 426 MY_ATTRIBUTE((warn_unused_result)); 427 428 /*********************************************************************//** 429 Allocate a sort buffer. 430 @return own: sort buffer */ 431 row_merge_buf_t* 432 row_merge_buf_create( 433 /*=================*/ 434 dict_index_t* index) /*!< in: secondary index */ 435 MY_ATTRIBUTE((warn_unused_result, nonnull, malloc)); 436 437 /*********************************************************************//** 438 Deallocate a sort buffer. */ 439 void 440 row_merge_buf_free( 441 /*===============*/ 442 row_merge_buf_t* buf) /*!< in,own: sort buffer to be freed */ 443 MY_ATTRIBUTE((nonnull)); 444 445 /*********************************************************************//** 446 Destroy a merge file. */ 447 void 448 row_merge_file_destroy( 449 /*===================*/ 450 merge_file_t* merge_file) /*!< in/out: merge file structure */ 451 MY_ATTRIBUTE((nonnull)); 452 453 /** Read a merge block from the file system. 454 @return whether the request was completed successfully */ 455 bool 456 row_merge_read( 457 /*===========*/ 458 const pfs_os_file_t& fd, /*!< in: file descriptor */ 459 ulint offset, /*!< in: offset where to read 460 in number of row_merge_block_t 461 elements */ 462 row_merge_block_t* buf, /*!< out: data */ 463 row_merge_block_t* crypt_buf, /*!< in: crypt buf or NULL */ 464 ulint space) /*!< in: space id */ 465 MY_ATTRIBUTE((warn_unused_result)); 466 467 /********************************************************************//** 468 Read a merge record. 469 @return pointer to next record, or NULL on I/O error or end of list */ 470 const byte* 471 row_merge_read_rec( 472 /*===============*/ 473 row_merge_block_t* block, /*!< in/out: file buffer */ 474 mrec_buf_t* buf, /*!< in/out: secondary buffer */ 475 const byte* b, /*!< in: pointer to record */ 476 const dict_index_t* index, /*!< in: index of the record */ 477 const pfs_os_file_t& fd, /*!< in: file descriptor */ 478 ulint* foffs, /*!< in/out: file offset */ 479 const mrec_t** mrec, /*!< out: pointer to merge record, 480 or NULL on end of list 481 (non-NULL on I/O error) */ 482 rec_offs* offsets,/*!< out: offsets of mrec */ 483 row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */ 484 ulint space) /*!< in: space id */ 485 MY_ATTRIBUTE((warn_unused_result)); 486 #endif /* row0merge.h */ 487