1 /***************************************************************************** 2 3 Copyright (c) 2017, 2020, Oracle and/or its affiliates. All Rights Reserved. 4 5 This program is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License, version 2.0, as published by the 7 Free Software Foundation. 8 9 This program is also distributed with certain software (including but not 10 limited to OpenSSL) that is licensed under separate terms, as designated in a 11 particular file or component or in included license documentation. The authors 12 of MySQL hereby grant you an additional permission to link the program and 13 your derivative works with the separately licensed software that they have 14 included with MySQL. 15 16 This program is distributed in the hope that it will be useful, but WITHOUT 17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, 19 for more details. 20 21 You should have received a copy of the GNU General Public License along with 22 this program; if not, write to the Free Software Foundation, Inc., 23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 25 *****************************************************************************/ 26 27 /** @file include/clone0snapshot.h 28 Database Physical Snapshot 29 30 *******************************************************/ 31 32 #ifndef CLONE_SNAPSHOT_INCLUDE 33 #define CLONE_SNAPSHOT_INCLUDE 34 35 #include "univ.i" 36 37 #include "arch0log.h" 38 #include "arch0page.h" 39 #include "clone0desc.h" 40 #include "clone0monitor.h" 41 #include "fil0fil.h" 42 #include "sql/handler.h" 43 44 #include <map> 45 #include <vector> 46 47 /** Vector type for storing clone files */ 48 using Clone_File_Vec = std::vector<Clone_File_Meta *>; 49 50 /** Map type for mapping space ID to clone file index */ 51 using Clone_File_Map = std::map<space_id_t, uint>; 52 53 /** Page identified by space and page number */ 54 struct Clone_Page { 55 /** Tablespace ID */ 56 ib_uint32_t m_space_id; 57 58 /** Page number within tablespace */ 59 ib_uint32_t m_page_no; 60 }; 61 62 /** Comparator for storing sorted page ID. */ 63 struct Less_Clone_Page { 64 /** Less than operator for page ID. 65 @param[in] page1 first page 66 @param[in] page2 second page 67 @return true, if page1 is less than page2 */ operatorLess_Clone_Page68 inline bool operator()(const Clone_Page &page1, 69 const Clone_Page &page2) const { 70 if (page1.m_space_id < page2.m_space_id) { 71 return (true); 72 } 73 74 if (page1.m_space_id == page2.m_space_id && 75 page1.m_page_no < page2.m_page_no) { 76 return (true); 77 } 78 return (false); 79 } 80 }; 81 82 /** Vector type for storing clone page IDs */ 83 using Clone_Page_Vec = std::vector<Clone_Page>; 84 85 /** Set for storing unique page IDs. */ 86 using Clone_Page_Set = std::set<Clone_Page, Less_Clone_Page>; 87 88 /** Clone handle type */ 89 enum Clone_Handle_Type { 90 /** Clone Handle for COPY */ 91 CLONE_HDL_COPY = 1, 92 93 /** Clone Handle for APPLY */ 94 CLONE_HDL_APPLY 95 }; 96 97 /** Default chunk size in power of 2 in unit of pages. 98 Chunks are reserved by each thread for multi-threaded clone. For 16k page 99 size, chunk size is 64M. */ 100 const uint SNAPSHOT_DEF_CHUNK_SIZE_POW2 = 12; 101 102 /** Default block size in power of 2 in unit of pages. 103 Data transfer callback is invoked once for each block. This is also 104 the maximum size of data that would be re-send if clone is stopped 105 and resumed. For 16k page size, block size is 1M. */ 106 const uint SNAPSHOT_DEF_BLOCK_SIZE_POW2 = 6; 107 108 /** Maximum block size in power of 2 in unit of pages. 109 For 16k page size, maximum block size is 64M. */ 110 const uint SNAPSHOT_MAX_BLOCK_SIZE_POW2 = 12; 111 112 /** Sleep time in microseconds while waiting for other clone/task */ 113 const uint SNAPSHOT_STATE_CHANGE_SLEEP = 100 * 1000; 114 115 /** Dynamic database snapshot: Holds metadata and handle to data */ 116 class Clone_Snapshot { 117 public: 118 /** Construct snapshot 119 @param[in] hdl_type copy, apply 120 @param[in] clone_type clone type 121 @param[in] arr_idx index in global array 122 @param[in] snap_id unique snapshot ID */ 123 Clone_Snapshot(Clone_Handle_Type hdl_type, Ha_clone_type clone_type, 124 uint arr_idx, ib_uint64_t snap_id); 125 126 /** Release contexts and free heap */ 127 ~Clone_Snapshot(); 128 129 /** @return estimated bytes on disk */ get_disk_estimate()130 uint64_t get_disk_estimate() const { return (m_data_bytes_disk); } 131 132 /** Get unique snapshot identifier 133 @return snapshot ID */ get_id()134 ib_uint64_t get_id() { return (m_snapshot_id); } 135 136 /** Get snapshot index in global array 137 @return array index */ get_index()138 uint get_index() { return (m_snapshot_arr_idx); } 139 140 /** Get performance schema accounting object used to monitor stage 141 progress. 142 @return PFS stage object */ get_clone_monitor()143 Clone_Monitor &get_clone_monitor() { return (m_monitor); } 144 145 /** Get snapshot heap used for allocation during clone. 146 @return heap */ lock_heap()147 mem_heap_t *lock_heap() { 148 mutex_enter(&m_snapshot_mutex); 149 return (m_snapshot_heap); 150 } 151 152 /* Release snapshot heap */ release_heap(mem_heap_t * & heap)153 void release_heap(mem_heap_t *&heap) { 154 heap = nullptr; 155 mutex_exit(&m_snapshot_mutex); 156 } 157 158 /** Get snapshot state 159 @return state */ get_state()160 Snapshot_State get_state() { return (m_snapshot_state); } 161 162 /** Get the redo file size for the snapshot 163 @return redo file size */ get_redo_file_size()164 ib_uint64_t get_redo_file_size() { return (m_redo_file_size); } 165 166 /** Get total number of chunks for current state 167 @return number of data chunks */ get_num_chunks()168 uint get_num_chunks() { return (m_num_current_chunks); } 169 170 /** Get maximum file length seen till now 171 @return file name length */ get_max_file_name_length()172 size_t get_max_file_name_length() { return (m_max_file_name_len); } 173 174 /** Get maximum buffer size required for clone 175 @return maximum dynamic buffer */ get_dyn_buffer_length()176 uint get_dyn_buffer_length() { 177 uint ret_len = 0; 178 179 if (is_copy() && m_snapshot_type != HA_CLONE_BLOCKING) { 180 ret_len = static_cast<uint>(2 * UNIV_PAGE_SIZE); 181 } 182 183 return (ret_len); 184 } 185 186 using File_Cbk_Func = std::function<int(Clone_File_Meta *)>; 187 188 /** Iterate through all files in current state 189 @param[in] func callback function 190 @return error code */ 191 int iterate_files(File_Cbk_Func &&func); 192 193 /** Fill state descriptor from snapshot 194 @param[in] do_estimate estimate data bytes to transfer 195 @param[out] state_desc snapshot state descriptor */ 196 void get_state_info(bool do_estimate, Clone_Desc_State *state_desc); 197 198 /** Set state information during apply 199 @param[in] state_desc snapshot state descriptor */ 200 void set_state_info(Clone_Desc_State *state_desc); 201 202 /** Get next state based on snapshot type 203 @return next state */ 204 Snapshot_State get_next_state(); 205 206 /** Try to attach to snapshot 207 @param[in] hdl_type copy, apply 208 @param[in] pfs_monitor enable PFS monitoring 209 @return true if successfully attached */ 210 bool attach(Clone_Handle_Type hdl_type, bool pfs_monitor); 211 212 /** Detach from snapshot 213 @return number of clones attached */ 214 uint detach(); 215 216 /** Start transition to new state 217 @param[in] state_desc descriptor for next state 218 @param[in] new_state state to move for apply 219 @param[in] temp_buffer buffer used for collecting page IDs 220 @param[in] temp_buffer_len buffer length 221 @param[in] cbk alter callback for long wait 222 @param[out] pending_clones clones yet to transit to next state 223 @return error code */ 224 int change_state(Clone_Desc_State *state_desc, Snapshot_State new_state, 225 byte *temp_buffer, uint temp_buffer_len, 226 Clone_Alert_Func cbk, uint &pending_clones); 227 228 /** Check if transition is complete 229 @param[in] new_state new state after transition 230 @param[in] exit_on_wait exit from transition if needs to wait 231 @return number of clones yet to transit to next state */ 232 uint check_state(Snapshot_State new_state, bool exit_on_wait); 233 234 /* Don't allow to attach new clone - Not supported 235 void stop_attach_new_clone() 236 { 237 m_allow_new_clone = false; 238 } 239 */ 240 241 /** Add file metadata entry at destination 242 @param[in,out] file_desc if there, set to current descriptor 243 @param[in] data_dir destination data directory 244 @param[in] desc_create create if doesn't exist 245 @param[out] desc_exists descriptor already exists 246 @return error code */ 247 int get_file_from_desc(Clone_File_Meta *&file_desc, const char *data_dir, 248 bool desc_create, bool &desc_exists); 249 250 /** Add file descriptor to file list 251 @param[in,out] file_desc current file descriptor 252 @return true, if it is the last file. */ 253 bool add_file_from_desc(Clone_File_Meta *&file_desc); 254 255 /** Extract file information from node and add to snapshot 256 @param[in] node file node 257 @return error code */ 258 dberr_t add_node(fil_node_t *node); 259 260 /** Add page ID to to the set of pages in snapshot 261 @param[in] space_id page tablespace 262 @param[in] page_num page number within tablespace 263 @return error code */ 264 int add_page(ib_uint32_t space_id, ib_uint32_t page_num); 265 266 /** Add redo file to snapshot 267 @param[in] file_name file name 268 @param[in] file_size file size in bytes 269 @param[in] file_offset start offset 270 @return error code. */ 271 int add_redo_file(char *file_name, ib_uint64_t file_size, 272 ib_uint64_t file_offset); 273 274 /** Get file metadata by index for current state 275 @param[in] index file index 276 @return file metadata entry */ 277 Clone_File_Meta *get_file_by_index(uint index); 278 279 /** Get next block of data to transfer 280 @param[in] chunk_num current chunk 281 @param[in,out] block_num current/next block 282 @param[in,out] file_meta current/next block file metadata 283 @param[out] data_offset block offset in file 284 @param[out] data_buf data buffer or NULL if transfer from file 285 @param[out] data_size size of data in bytes 286 @return error code */ 287 int get_next_block(uint chunk_num, uint &block_num, 288 Clone_File_Meta *file_meta, ib_uint64_t &data_offset, 289 byte *&data_buf, uint &data_size); 290 291 /** Update snapshot block size based on caller's buffer size 292 @param[in] buff_size buffer size for clone transfer */ 293 void update_block_size(uint buff_size); 294 295 /** Check if copy snapshot 296 @return true if snapshot is for copy */ is_copy()297 bool is_copy() const { return (m_snapshot_handle_type == CLONE_HDL_COPY); } 298 299 /** Update file size when file is extended during page copy 300 @param[in] file_index current file index 301 @param[in] file_size new file size */ 302 void update_file_size(uint32_t file_index, uint64_t file_size); 303 304 /** Encrypt tablespace key in header page with master key. 305 @param[in] page_size page size descriptor 306 @param[in,out] page_data page data to update 307 @return true, if successful. */ 308 bool encrypt_key_in_header(const page_size_t &page_size, byte *page_data); 309 310 /** Encrypt tablespace key in header page with master key. 311 @param[in,out] log_header page data to update 312 @param[in] header_len length of log header 313 @return true, if successful. */ 314 bool encrypt_key_in_log_header(byte *log_header, uint32_t header_len); 315 316 /** Decrypt tablespace key in header page with master key. 317 @param[in] space tablespace 318 @param[in] page_size page size descriptor 319 @param[in,out] page_data page data to update */ 320 void decrypt_key_in_header(fil_space_t *space, const page_size_t &page_size, 321 byte *&page_data); 322 323 private: 324 /** Synchronize snapshot with binary log and GTID. 325 @param[in] cbk alert callback for long wait 326 @return error code. */ 327 int synchronize_binlog_gtid(Clone_Alert_Func cbk); 328 329 /** Make sure that the trx sys page binary log position correctly reflects 330 all transactions committed to innodb. It updates binary log position 331 in transaction sys page, if required. The caller must ensure that any new 332 transaction is committed in order of binary log. 333 @return error code. */ 334 int update_binlog_position(); 335 336 /** Wait for already prepared binlog transactions to end. 337 @return error code. */ 338 int wait_for_binlog_prepared_trx(); 339 340 /** Wait for a transaction to end. 341 @param[in] thd current THD 342 @param[in] trx_id transaction to wait for 343 @return error code. */ 344 int wait_trx_end(THD *thd, trx_id_t trx_id); 345 346 /** Check if state transition is in progress 347 @return true during state transition */ in_transit_state()348 bool in_transit_state() { 349 mutex_own(&m_snapshot_mutex); 350 return (m_snapshot_next_state != CLONE_SNAPSHOT_NONE); 351 } 352 353 /** Initialize current state 354 @param[in] state_desc descriptor for the state 355 @param[in] temp_buffer buffer used during page copy initialize 356 @param[in] temp_buffer_len buffer length 357 @param[in] cbk alert callback for long wait 358 @return error code */ 359 int init_state(Clone_Desc_State *state_desc, byte *temp_buffer, 360 uint temp_buffer_len, Clone_Alert_Func cbk); 361 362 /** Initialize snapshot state for file copy 363 @return error code */ 364 int init_file_copy(); 365 366 /** Initialize disk byte estimate. */ init_disk_estimate()367 void init_disk_estimate() { 368 /* Initial size is set to the redo file size on disk. */ 369 m_data_bytes_disk = log_get_file_capacity(*log_sys); 370 } 371 372 /** Initialize snapshot state for page copy 373 @param[in] page_buffer temporary buffer to copy page IDs 374 @param[in] page_buffer_len buffer length 375 @return error code */ 376 int init_page_copy(byte *page_buffer, uint page_buffer_len); 377 378 /** Initialize snapshot state for redo copy 379 @param[in] cbk alert callback for long wait 380 @return error code */ 381 int init_redo_copy(Clone_Alert_Func cbk); 382 383 /** Initialize state while applying cloned data 384 @param[in] state_desc snapshot state descriptor 385 @return error code */ 386 int init_apply_state(Clone_Desc_State *state_desc); 387 388 /** Extend and flush files after copying data 389 @param[in] is_redo if true flush redo, otherwise data 390 @return error code */ 391 int extend_and_flush_files(bool is_redo); 392 393 /** Create file descriptor and add to current file list 394 @param[in] data_dir destination data directory 395 @param[in,out] file_desc file descriptor 396 @return error code */ 397 int create_desc(const char *data_dir, Clone_File_Meta *&file_desc); 398 399 /** Get file metadata for current chunk 400 @param[in] file_vector clone file vector 401 @param[in] num_files total number of files 402 @param[in] chunk_num current chunk number 403 @param[in] start_index index for starting the search 404 @return file metadata */ 405 Clone_File_Meta *get_file(Clone_File_Vec &file_vector, uint num_files, 406 uint chunk_num, uint start_index); 407 408 /** Get next page from buffer pool 409 @param[in] chunk_num current chunk 410 @param[in,out] block_num current, next block 411 @param[in] file_meta file metadata for page 412 @param[out] data_offset offset in file 413 @param[out] data_buf page data 414 @param[out] data_size page data size 415 @return error code */ 416 int get_next_page(uint chunk_num, uint &block_num, Clone_File_Meta *file_meta, 417 ib_uint64_t &data_offset, byte *&data_buf, uint &data_size); 418 419 /** Get page from buffer pool and make ready for write 420 @param[in] page_id page ID chunk 421 @param[in] page_size page size descriptor 422 @param[in] file_meta file metadata for page 423 @param[out] page_data data page 424 @param[out] data_size page size in bytes 425 @return error code */ 426 int get_page_for_write(const page_id_t &page_id, const page_size_t &page_size, 427 Clone_File_Meta *file_meta, byte *&page_data, 428 uint &data_size); 429 430 /* Make page ready for flush by updating LSN anc checksum 431 @param[in] page_size page size descriptor 432 @param[in] page_lsn LSN to update the page with 433 @param[in,out] page_data data page */ 434 void page_update_for_flush(const page_size_t &page_size, lsn_t page_lsn, 435 byte *&page_data); 436 437 /** Build file metadata entry 438 @param[in] file_name name of the file 439 @param[in] file_size file size in bytes 440 @param[in] file_offset start offset 441 @param[in] num_chunks total number of chunks in the file 442 @param[in] copy_file_name copy the file name or use reference 443 @return file metadata entry */ 444 Clone_File_Meta *build_file(const char *file_name, uint64_t file_size, 445 uint64_t file_offset, uint &num_chunks, 446 bool copy_file_name); 447 448 /** Add buffer pool dump file to the file list 449 @return error code */ 450 int add_buf_pool_file(); 451 452 /** Add file to snapshot 453 @param[in] name file name 454 @param[in] size_bytes file size in bytes 455 @param[in] alloc_bytes allocation size on disk for sparse file 456 @param[in] node file node 457 @param[in] copy_name copy the file name or use reference 458 @return error code. */ 459 int add_file(const char *name, uint64_t size_bytes, uint64_t alloc_bytes, 460 fil_node_t *node, bool copy_name); 461 462 /** Get chunk size 463 @return chunk size in pages */ chunk_size()464 uint chunk_size() { 465 uint size; 466 467 size = static_cast<uint>(ut_2_exp(m_chunk_size_pow2)); 468 return (size); 469 } 470 471 /** Get block size 472 @return block size in pages */ block_size()473 uint block_size() { 474 uint size; 475 476 ut_a(m_block_size_pow2 <= SNAPSHOT_MAX_BLOCK_SIZE_POW2); 477 size = static_cast<uint>(ut_2_exp(m_block_size_pow2)); 478 479 return (size); 480 } 481 482 /** Get number of blocks per chunk 483 @return blocks per chunk */ blocks_per_chunk()484 uint blocks_per_chunk() { 485 ut_a(m_block_size_pow2 <= m_chunk_size_pow2); 486 return (1 << (m_chunk_size_pow2 - m_block_size_pow2)); 487 } 488 489 /** Update file name in descriptor from configuration. 490 @param[in] data_dir clone data directory 491 @param[in,out] file_desc file descriptor 492 @param[in,out] path buffer for updated path 493 @param[in] path_len path buffer length 494 @return error code */ 495 int update_file_name(const char *data_dir, Clone_File_Meta *file_desc, 496 char *path, size_t path_len); 497 498 /** Build file name along with path for cloned data files. 499 @param[in] data_dir clone data directory 500 @param[in] alloc_size new file size to be allocated 501 @param[in,out] file_desc file descriptor 502 @return error code */ 503 int build_file_path(const char *data_dir, ulint alloc_size, 504 Clone_File_Meta *&file_desc); 505 506 /** Check for existing file and add clone extension. 507 @param[in] replace if data directory is replaced 508 @param[in,out] file_desc file descriptor 509 @return error code */ 510 int handle_existing_file(bool replace, Clone_File_Meta *file_desc); 511 512 /** Compute total length of cloned data file name and path. 513 @param[in] data_dir clone data directory 514 @param[in] file_desc file descriptor 515 @return total size in bytes */ 516 size_t compute_path_length(const char *data_dir, 517 const Clone_File_Meta *file_desc); 518 519 private: 520 /** @name Snapshot type and ID */ 521 522 /** Snapshot handle type */ 523 Clone_Handle_Type m_snapshot_handle_type; 524 525 /** Clone type */ 526 Ha_clone_type m_snapshot_type; 527 528 /** Unique snapshot ID */ 529 ib_uint64_t m_snapshot_id; 530 531 /** Index in global snapshot array */ 532 uint m_snapshot_arr_idx; 533 534 /** @name Snapshot State */ 535 536 /** Mutex to handle access by concurrent clones */ 537 ib_mutex_t m_snapshot_mutex; 538 539 /** Allow new clones to get attached to this snapshot */ 540 bool m_allow_new_clone; 541 542 /** Number of clones attached to this snapshot */ 543 uint m_num_clones; 544 545 /** Number of clones in current state */ 546 uint m_num_clones_current; 547 548 /** Number of clones moved over to next state */ 549 uint m_num_clones_next; 550 551 /** Current state */ 552 Snapshot_State m_snapshot_state; 553 554 /** Next state to move to. Set only during state transfer. */ 555 Snapshot_State m_snapshot_next_state; 556 557 /** @name Snapshot data block */ 558 559 /** Memory allocation heap */ 560 mem_heap_t *m_snapshot_heap; 561 562 /** Chunk size in power of 2 */ 563 uint m_chunk_size_pow2; 564 565 /** Block size in power of 2 */ 566 uint m_block_size_pow2; 567 568 /** Number of chunks in current state */ 569 uint m_num_current_chunks; 570 571 /** Maximum file name length observed till now. */ 572 size_t m_max_file_name_len; 573 574 /** @name Snapshot file data */ 575 576 /** All data files for transfer */ 577 Clone_File_Vec m_data_file_vector; 578 579 /** Map space ID to file vector index */ 580 Clone_File_Map m_data_file_map; 581 582 /** Number of data files to transfer */ 583 uint m_num_data_files; 584 585 /** Total number of data chunks */ 586 uint m_num_data_chunks; 587 588 /** Number of bytes on disk. */ 589 uint64_t m_data_bytes_disk; 590 591 /** Index into m_data_file_vector for all undo files. */ 592 std::vector<int> m_undo_file_indexes; 593 594 /** @name Snapshot page data */ 595 596 /** Page archiver client */ 597 Page_Arch_Client_Ctx m_page_ctx; 598 599 /** Set of unique page IDs */ 600 Clone_Page_Set m_page_set; 601 602 /** Sorted page IDs to transfer */ 603 Clone_Page_Vec m_page_vector; 604 605 /** Number of pages to transfer */ 606 uint m_num_pages; 607 608 /** Number of duplicate pages found */ 609 uint m_num_duplicate_pages; 610 611 /** @name Snapshot redo data */ 612 613 /** redo log archiver client */ 614 Log_Arch_Client_Ctx m_redo_ctx; 615 616 /** All archived redo files to transfer */ 617 Clone_File_Vec m_redo_file_vector; 618 619 /** Start offset in first redo file */ 620 ib_uint64_t m_redo_start_offset; 621 622 /** Redo header block */ 623 byte *m_redo_header; 624 625 /** Redo header size */ 626 uint m_redo_header_size; 627 628 /** Redo trailer block */ 629 byte *m_redo_trailer; 630 631 /** Redo trailer size */ 632 uint m_redo_trailer_size; 633 634 /** Redo trailer block offset */ 635 ib_uint64_t m_redo_trailer_offset; 636 637 /** Archived redo file size */ 638 ib_uint64_t m_redo_file_size; 639 640 /** Number of archived redo files to transfer */ 641 uint m_num_redo_files; 642 643 /** Total number of redo data chunks */ 644 uint m_num_redo_chunks; 645 646 /** Enable PFS monitoring */ 647 bool m_enable_pfs; 648 649 /** Performance Schema accounting object to monitor stage progess */ 650 Clone_Monitor m_monitor; 651 }; 652 653 #endif /* CLONE_SNAPSHOT_INCLUDE */ 654