1 /***************************************************************************** 2 3 Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. 4 Copyright (c) 2015, 2020, MariaDB Corporation. 5 6 This program is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free Software 8 Foundation; version 2 of the License. 9 10 This program is distributed in the hope that it will be useful, but WITHOUT 11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License along with 15 this program; if not, write to the Free Software Foundation, Inc., 16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA 17 18 *****************************************************************************/ 19 20 /**************************************************//** 21 @file include/row0merge.h 22 Index build routines using a merge sort 23 24 Created 13/06/2005 Jan Lindstrom 25 *******************************************************/ 26 27 #ifndef row0merge_h 28 #define row0merge_h 29 30 #include "que0types.h" 31 #include "trx0types.h" 32 #include "mtr0mtr.h" 33 #include "rem0types.h" 34 #include "rem0rec.h" 35 #include "btr0types.h" 36 #include "row0mysql.h" 37 #include "lock0types.h" 38 #include "srv0srv.h" 39 #include "ut0stage.h" 40 41 /* Reserve free space from every block for key_version */ 42 #define ROW_MERGE_RESERVE_SIZE 4 43 44 /* Cluster index read task is mandatory */ 45 #define COST_READ_CLUSTERED_INDEX 1.0 46 47 /* Basic fixed cost to build all type of index */ 48 #define COST_BUILD_INDEX_STATIC 0.5 49 /* Dynamic cost to build all type of index, dynamic cost will be re-distributed based on page count ratio of each index */ 50 #define COST_BUILD_INDEX_DYNAMIC 0.5 51 52 /* Sum of below two must be 1.0 */ 53 #define PCT_COST_MERGESORT_INDEX 0.4 54 #define PCT_COST_INSERT_INDEX 0.6 55 56 // Forward declaration 57 struct ib_sequence_t; 58 59 /** @brief Block size for I/O operations in merge sort. 60 61 The minimum is srv_page_size, or page_get_free_space_of_empty() 62 rounded to a power of 2. 63 64 When not creating a PRIMARY KEY that contains column prefixes, this 65 can be set as small as srv_page_size / 2. */ 66 typedef byte row_merge_block_t; 67 68 /** @brief Secondary buffer for I/O operations of merge records. 69 70 This buffer is used for writing or reading a record that spans two 71 row_merge_block_t. Thus, it must be able to hold one merge record, 72 whose maximum size is the same as the minimum size of 73 row_merge_block_t. */ 74 typedef byte mrec_buf_t[UNIV_PAGE_SIZE_MAX]; 75 76 /** @brief Merge record in row_merge_block_t. 77 78 The format is the same as a record in ROW_FORMAT=COMPACT with the 79 exception that the REC_N_NEW_EXTRA_BYTES are omitted. */ 80 typedef byte mrec_t; 81 82 /** Merge record in row_merge_buf_t */ 83 struct mtuple_t { 84 dfield_t* fields; /*!< data fields */ 85 }; 86 87 /** Buffer for sorting in main memory. */ 88 struct row_merge_buf_t { 89 mem_heap_t* heap; /*!< memory heap where allocated */ 90 dict_index_t* index; /*!< the index the tuples belong to */ 91 ulint total_size; /*!< total amount of data bytes */ 92 ulint n_tuples; /*!< number of data tuples */ 93 ulint max_tuples; /*!< maximum number of data tuples */ 94 mtuple_t* tuples; /*!< array of data tuples */ 95 mtuple_t* tmp_tuples; /*!< temporary copy of tuples, 96 for sorting */ 97 }; 98 99 /** Information about temporary files used in merge sort */ 100 struct merge_file_t { 101 pfs_os_file_t fd; /*!< file descriptor */ 102 ulint offset; /*!< file offset (end of file) */ 103 ib_uint64_t n_rec; /*!< number of records in the file */ 104 }; 105 106 /** Index field definition */ 107 struct index_field_t { 108 ulint col_no; /*!< column offset */ 109 ulint prefix_len; /*!< column prefix length, or 0 110 if indexing the whole column */ 111 bool is_v_col; /*!< whether this is a virtual column */ 112 }; 113 114 /** Definition of an index being created */ 115 struct index_def_t { 116 const char* name; /*!< index name */ 117 bool rebuild; /*!< whether the table is rebuilt */ 118 ulint ind_type; /*!< 0, DICT_UNIQUE, 119 or DICT_CLUSTERED */ 120 ulint key_number; /*!< MySQL key number, 121 or ULINT_UNDEFINED if none */ 122 ulint n_fields; /*!< number of fields in index */ 123 index_field_t* fields; /*!< field definitions */ 124 st_mysql_ftparser* 125 parser; /*!< fulltext parser plugin */ 126 }; 127 128 /** Structure for reporting duplicate records. */ 129 struct row_merge_dup_t { 130 dict_index_t* index; /*!< index being sorted */ 131 struct TABLE* table; /*!< MySQL table object */ 132 const ulint* col_map;/*!< mapping of column numbers 133 in table to the rebuilt table 134 (index->table), or NULL if not 135 rebuilding table */ 136 ulint n_dup; /*!< number of duplicates */ 137 }; 138 139 /*************************************************************//** 140 Report a duplicate key. */ 141 void 142 row_merge_dup_report( 143 /*=================*/ 144 row_merge_dup_t* dup, /*!< in/out: for reporting duplicates */ 145 const dfield_t* entry) /*!< in: duplicate index entry */ 146 MY_ATTRIBUTE((nonnull)); 147 148 /*********************************************************************//** 149 Sets an exclusive lock on a table, for the duration of creating indexes. 150 @return error code or DB_SUCCESS */ 151 dberr_t 152 row_merge_lock_table( 153 /*=================*/ 154 trx_t* trx, /*!< in/out: transaction */ 155 dict_table_t* table, /*!< in: table to lock */ 156 enum lock_mode mode) /*!< in: LOCK_X or LOCK_S */ 157 MY_ATTRIBUTE((nonnull(1,2), warn_unused_result)); 158 159 /*********************************************************************//** 160 Drop indexes that were created before an error occurred. 161 The data dictionary must have been locked exclusively by the caller, 162 because the transaction will not be committed. */ 163 void 164 row_merge_drop_indexes_dict( 165 /*========================*/ 166 trx_t* trx, /*!< in/out: dictionary transaction */ 167 table_id_t table_id)/*!< in: table identifier */ 168 MY_ATTRIBUTE((nonnull)); 169 170 /** Drop indexes that were created before an error occurred. 171 The data dictionary must have been locked exclusively by the caller, 172 because the transaction will not be committed. 173 @param trx dictionary transaction 174 @param table table containing the indexes 175 @param locked True if table is locked, 176 false - may need to do lazy drop 177 @param alter_trx Alter table transaction */ 178 void 179 row_merge_drop_indexes( 180 trx_t* trx, 181 dict_table_t* table, 182 bool locked, 183 const trx_t* alter_trx=NULL); 184 185 /*********************************************************************//** 186 Drop all partially created indexes during crash recovery. */ 187 void 188 row_merge_drop_temp_indexes(void); 189 /*=============================*/ 190 191 /** Create temporary merge files in the given paramater path, and if 192 UNIV_PFS_IO defined, register the file descriptor with Performance Schema. 193 @param[in] path location for creating temporary merge files, or NULL 194 @return File descriptor */ 195 pfs_os_file_t 196 row_merge_file_create_low( 197 const char* path) 198 MY_ATTRIBUTE((warn_unused_result)); 199 /*********************************************************************//** 200 Destroy a merge file. And de-register the file from Performance Schema 201 if UNIV_PFS_IO is defined. */ 202 void 203 row_merge_file_destroy_low( 204 /*=======================*/ 205 const pfs_os_file_t& fd); /*!< in: merge file descriptor */ 206 207 /*********************************************************************//** 208 Rename an index in the dictionary that was created. The data 209 dictionary must have been locked exclusively by the caller, because 210 the transaction will not be committed. 211 @return DB_SUCCESS if all OK */ 212 dberr_t 213 row_merge_rename_index_to_add( 214 /*==========================*/ 215 trx_t* trx, /*!< in/out: transaction */ 216 table_id_t table_id, /*!< in: table identifier */ 217 index_id_t index_id) /*!< in: index identifier */ 218 MY_ATTRIBUTE((nonnull(1), warn_unused_result)); 219 220 /*********************************************************************//** 221 Rename an index in the dictionary that is to be dropped. The data 222 dictionary must have been locked exclusively by the caller, because 223 the transaction will not be committed. 224 @return DB_SUCCESS if all OK */ 225 dberr_t 226 row_merge_rename_index_to_drop( 227 /*===========================*/ 228 trx_t* trx, /*!< in/out: transaction */ 229 table_id_t table_id, /*!< in: table identifier */ 230 index_id_t index_id) /*!< in: index identifier */ 231 MY_ATTRIBUTE((nonnull(1), warn_unused_result)); 232 233 /** Create the index and load in to the dictionary. 234 @param[in,out] table the index is on this table 235 @param[in] index_def the index definition 236 @param[in] add_v new virtual columns added along with add 237 index call 238 @return index, or NULL on error */ 239 dict_index_t* 240 row_merge_create_index( 241 dict_table_t* table, 242 const index_def_t* index_def, 243 const dict_add_v_col_t* add_v) 244 MY_ATTRIBUTE((warn_unused_result)); 245 246 /*********************************************************************//** 247 Check if a transaction can use an index. 248 @return whether the index can be used by the transaction */ 249 bool 250 row_merge_is_index_usable( 251 /*======================*/ 252 const trx_t* trx, /*!< in: transaction */ 253 const dict_index_t* index) /*!< in: index to check */ 254 MY_ATTRIBUTE((nonnull, warn_unused_result)); 255 256 /*********************************************************************//** 257 Drop a table. The caller must have ensured that the background stats 258 thread is not processing the table. This can be done by calling 259 dict_stats_wait_bg_to_stop_using_table() after locking the dictionary and 260 before calling this function. 261 @return DB_SUCCESS or error code */ 262 dberr_t 263 row_merge_drop_table( 264 /*=================*/ 265 trx_t* trx, /*!< in: transaction */ 266 dict_table_t* table) /*!< in: table instance to drop */ 267 MY_ATTRIBUTE((nonnull, warn_unused_result)); 268 269 /** Build indexes on a table by reading a clustered index, creating a temporary 270 file containing index entries, merge sorting these index entries and inserting 271 sorted index entries to indexes. 272 @param[in] trx transaction 273 @param[in] old_table table where rows are read from 274 @param[in] new_table table where indexes are created; identical to 275 old_table unless creating a PRIMARY KEY 276 @param[in] online true if creating indexes online 277 @param[in] indexes indexes to be created 278 @param[in] key_numbers MySQL key numbers 279 @param[in] n_indexes size of indexes[] 280 @param[in,out] table MySQL table, for reporting erroneous key value 281 if applicable 282 @param[in] defaults default values of added, changed columns, or NULL 283 @param[in] col_map mapping of old column numbers to new ones, or 284 NULL if old_table == new_table 285 @param[in] add_autoinc number of added AUTO_INCREMENT columns, or 286 ULINT_UNDEFINED if none is added 287 @param[in,out] sequence autoinc sequence 288 @param[in] skip_pk_sort whether the new PRIMARY KEY will follow 289 existing order 290 @param[in,out] stage performance schema accounting object, used by 291 ALTER TABLE. stage->begin_phase_read_pk() will be called at the beginning of 292 this function and it will be passed to other functions for further accounting. 293 @param[in] add_v new virtual columns added along with indexes 294 @param[in] eval_table mysql table used to evaluate virtual column 295 value, see innobase_get_computed_value(). 296 @param[in] allow_non_null allow the conversion from null to not-null 297 @return DB_SUCCESS or error code */ 298 dberr_t 299 row_merge_build_indexes( 300 trx_t* trx, 301 dict_table_t* old_table, 302 dict_table_t* new_table, 303 bool online, 304 dict_index_t** indexes, 305 const ulint* key_numbers, 306 ulint n_indexes, 307 struct TABLE* table, 308 const dtuple_t* defaults, 309 const ulint* col_map, 310 ulint add_autoinc, 311 ib_sequence_t& sequence, 312 bool skip_pk_sort, 313 ut_stage_alter_t* stage, 314 const dict_add_v_col_t* add_v, 315 struct TABLE* eval_table, 316 bool allow_non_null) 317 MY_ATTRIBUTE((warn_unused_result)); 318 319 /********************************************************************//** 320 Write a buffer to a block. */ 321 void 322 row_merge_buf_write( 323 /*================*/ 324 const row_merge_buf_t* buf, /*!< in: sorted buffer */ 325 const merge_file_t* of, /*!< in: output file */ 326 row_merge_block_t* block) /*!< out: buffer for writing to file */ 327 MY_ATTRIBUTE((nonnull)); 328 329 /********************************************************************//** 330 Sort a buffer. */ 331 void 332 row_merge_buf_sort( 333 /*===============*/ 334 row_merge_buf_t* buf, /*!< in/out: sort buffer */ 335 row_merge_dup_t* dup) /*!< in/out: reporter of duplicates 336 (NULL if non-unique index) */ 337 MY_ATTRIBUTE((nonnull(1))); 338 339 /********************************************************************//** 340 Write a merge block to the file system. 341 @return whether the request was completed successfully 342 @retval false on error 343 @retval true on success */ 344 UNIV_INTERN 345 bool 346 row_merge_write( 347 /*============*/ 348 const pfs_os_file_t& fd, /*!< in: file descriptor */ 349 ulint offset, /*!< in: offset where to write, 350 in number of row_merge_block_t elements */ 351 const void* buf, /*!< in: data */ 352 void* crypt_buf, /*!< in: crypt buf or NULL */ 353 ulint space) /*!< in: space id */ 354 MY_ATTRIBUTE((warn_unused_result)); 355 356 /********************************************************************//** 357 Empty a sort buffer. 358 @return sort buffer */ 359 row_merge_buf_t* 360 row_merge_buf_empty( 361 /*================*/ 362 row_merge_buf_t* buf) /*!< in,own: sort buffer */ 363 MY_ATTRIBUTE((warn_unused_result, nonnull)); 364 365 /** Create a merge file in the given location. 366 @param[out] merge_file merge file structure 367 @param[in] path location for creating temporary file, or NULL 368 @return file descriptor, or -1 on failure */ 369 pfs_os_file_t 370 row_merge_file_create( 371 merge_file_t* merge_file, 372 const char* path) 373 MY_ATTRIBUTE((warn_unused_result, nonnull(1))); 374 375 /** Merge disk files. 376 @param[in] trx transaction 377 @param[in] dup descriptor of index being created 378 @param[in,out] file file containing index entries 379 @param[in,out] block 3 buffers 380 @param[in,out] tmpfd temporary file handle 381 @param[in] update_progress true, if we should update progress status 382 @param[in] pct_progress total progress percent until now 383 @param[in] pct_ocst current progress percent 384 @param[in] crypt_block crypt buf or NULL 385 @param[in] space space_id 386 @param[in,out] stage performance schema accounting object, used by 387 ALTER TABLE. If not NULL, stage->begin_phase_sort() will be called initially 388 and then stage->inc() will be called for each record processed. 389 @return DB_SUCCESS or error code */ 390 dberr_t 391 row_merge_sort( 392 /*===========*/ 393 trx_t* trx, 394 const row_merge_dup_t* dup, 395 merge_file_t* file, 396 row_merge_block_t* block, 397 pfs_os_file_t* tmpfd, 398 const bool update_progress, 399 const double pct_progress, 400 const double pct_cost, 401 row_merge_block_t* crypt_block, 402 ulint space, 403 ut_stage_alter_t* stage = NULL) 404 MY_ATTRIBUTE((warn_unused_result)); 405 406 /*********************************************************************//** 407 Allocate a sort buffer. 408 @return own: sort buffer */ 409 row_merge_buf_t* 410 row_merge_buf_create( 411 /*=================*/ 412 dict_index_t* index) /*!< in: secondary index */ 413 MY_ATTRIBUTE((warn_unused_result, nonnull, malloc)); 414 415 /*********************************************************************//** 416 Deallocate a sort buffer. */ 417 void 418 row_merge_buf_free( 419 /*===============*/ 420 row_merge_buf_t* buf) /*!< in,own: sort buffer to be freed */ 421 MY_ATTRIBUTE((nonnull)); 422 423 /*********************************************************************//** 424 Destroy a merge file. */ 425 void 426 row_merge_file_destroy( 427 /*===================*/ 428 merge_file_t* merge_file) /*!< in/out: merge file structure */ 429 MY_ATTRIBUTE((nonnull)); 430 431 /** Read a merge block from the file system. 432 @return whether the request was completed successfully */ 433 bool 434 row_merge_read( 435 /*===========*/ 436 const pfs_os_file_t& fd, /*!< in: file descriptor */ 437 ulint offset, /*!< in: offset where to read 438 in number of row_merge_block_t 439 elements */ 440 row_merge_block_t* buf, /*!< out: data */ 441 row_merge_block_t* crypt_buf, /*!< in: crypt buf or NULL */ 442 ulint space) /*!< in: space id */ 443 MY_ATTRIBUTE((warn_unused_result)); 444 445 /********************************************************************//** 446 Read a merge record. 447 @return pointer to next record, or NULL on I/O error or end of list */ 448 const byte* 449 row_merge_read_rec( 450 /*===============*/ 451 row_merge_block_t* block, /*!< in/out: file buffer */ 452 mrec_buf_t* buf, /*!< in/out: secondary buffer */ 453 const byte* b, /*!< in: pointer to record */ 454 const dict_index_t* index, /*!< in: index of the record */ 455 const pfs_os_file_t& fd, /*!< in: file descriptor */ 456 ulint* foffs, /*!< in/out: file offset */ 457 const mrec_t** mrec, /*!< out: pointer to merge record, 458 or NULL on end of list 459 (non-NULL on I/O error) */ 460 rec_offs* offsets,/*!< out: offsets of mrec */ 461 row_merge_block_t* crypt_block, /*!< in: crypt buf or NULL */ 462 ulint space) /*!< in: space id */ 463 MY_ATTRIBUTE((warn_unused_result)); 464 #endif /* row0merge.h */ 465