1 /***************************************************************************** 2 3 Copyright (c) 1997, 2020, Oracle and/or its affiliates. All Rights Reserved. 4 5 This program is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License, version 2.0, as published by the 7 Free Software Foundation. 8 9 This program is also distributed with certain software (including but not 10 limited to OpenSSL) that is licensed under separate terms, as designated in a 11 particular file or component or in included license documentation. The authors 12 of MySQL hereby grant you an additional permission to link the program and 13 your derivative works with the separately licensed software that they have 14 included with MySQL. 15 16 This program is distributed in the hope that it will be useful, but WITHOUT 17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, 19 for more details. 20 21 You should have received a copy of the GNU General Public License along with 22 this program; if not, write to the Free Software Foundation, Inc., 23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 25 *****************************************************************************/ 26 27 /** @file include/log0recv.h 28 Recovery 29 30 Created 9/20/1997 Heikki Tuuri 31 *******************************************************/ 32 33 #ifndef log0recv_h 34 #define log0recv_h 35 36 #include "buf0types.h" 37 #include "dict0types.h" 38 #include "hash0hash.h" 39 #include "log0types.h" 40 #include "mtr0types.h" 41 #include "os0file.h" /* OS_FILE_LOG_BLOCK_SIZE */ 42 #include "univ.i" 43 #include "ut0byte.h" 44 #include "ut0new.h" 45 46 #include <list> 47 #include <set> 48 #include <unordered_map> 49 50 class MetadataRecover; 51 class PersistentTableMetadata; 52 53 struct recv_addr_t; 54 55 /** list of tablespaces, that experienced an inplace DDL during a backup op */ 56 extern std::list<std::pair<space_id_t, lsn_t>> index_load_list; 57 /** the last redo log flush len as seen by MEB */ 58 extern volatile lsn_t backup_redo_log_flushed_lsn; 59 /** TRUE when the redo log is being backed up */ 60 extern bool recv_is_making_a_backup; 61 62 #ifdef UNIV_HOTBACKUP 63 64 /** Scans the log segment and n_bytes_scanned is set to the length of valid 65 log scanned. 66 @param[in] buf buffer containing log data 67 @param[in] buf_len data length in that buffer 68 @param[in,out] scanned_lsn lsn of buffer start, we return scanned 69 lsn 70 @param[in,out] scanned_checkpoint_no 4 lowest bytes of the highest scanned 71 @param[out] block_no highest block no in scanned buffer. 72 checkpoint number so far 73 @param[out] n_bytes_scanned how much we were able to scan, smaller 74 than buf_len if log data ended here 75 @param[out] has_encrypted_log set true, if buffer contains encrypted 76 redo log, set false otherwise */ 77 void meb_scan_log_seg(byte *buf, ulint buf_len, lsn_t *scanned_lsn, 78 uint32_t *scanned_checkpoint_no, uint32_t *block_no, 79 ulint *n_bytes_scanned, bool *has_encrypted_log); 80 81 /** Applies the hashed log records to the page, if the page lsn is less than the 82 lsn of a log record. This can be called when a buffer page has just been 83 read in, or also for a page already in the buffer pool. 84 85 TODO(Bug#31173032): Remove SUPPRESS_UBSAN_CLANG10. 86 87 @param[in,out] block buffer block */ 88 void recv_recover_page_func(buf_block_t *block) SUPPRESS_UBSAN_CLANG10; 89 90 /** Wrapper for recv_recover_page_func(). 91 Applies the hashed log records to the page, if the page lsn is less than the 92 lsn of a log record. This can be called when a buffer page has just been 93 read in, or also for a page already in the buffer pool. 94 @param jri in: TRUE if just read in (the i/o handler calls this for 95 a freshly read page) 96 @param block in,out: the buffer block 97 */ 98 #define recv_recover_page(jri, block) recv_recover_page_func(block) 99 100 /** Applies log records in the hash table to a backup. */ 101 void meb_apply_log_recs(void); 102 103 /** Applies log records in the hash table to a backup using a callback 104 functions. 105 @param[in] apply_log_record_function function for apply 106 @param[in] wait_till_done_function function for wait */ 107 void meb_apply_log_recs_via_callback( 108 void (*apply_log_record_function)(recv_addr_t *), 109 void (*wait_till_done_function)()); 110 111 /** Applies a log record in the hash table to a backup. 112 @param[in] recv_addr chain of log records 113 @param[in,out] block buffer block to apply the records to */ 114 void meb_apply_log_record(recv_addr_t *recv_addr, buf_block_t *block); 115 116 /** Process a file name passed as an input 117 @param[in] name absolute path of tablespace file 118 @param[in] space_id the tablespace ID 119 @retval true if able to process file successfully. 120 @retval false if unable to process the file */ 121 void meb_fil_name_process(const char *name, space_id_t space_id); 122 123 /** Scans log from a buffer and stores new log data to the parsing buffer. 124 Parses and hashes the log records if new data found. Unless 125 UNIV_HOTBACKUP is defined, this function will apply log records 126 automatically when the hash table becomes full. 127 @param[in] available_memory we let the hash table of recs 128 to grow to this size, at the maximum 129 @param[in] buf buffer containing a log 130 segment or garbage 131 @param[in] len buffer length 132 @param[in] checkpoint_lsn latest checkpoint LSN 133 @param[in] start_lsn buffer start lsn 134 @param[in] contiguous_lsn it is known that all log 135 groups contain contiguous log data up to this lsn 136 @param[out] group_scanned_lsn scanning succeeded up to this lsn 137 @retval true if limit_lsn has been reached, or not able to scan any 138 more in this log group 139 @retval false otherwise */ 140 bool meb_scan_log_recs(ulint available_memory, const byte *buf, ulint len, 141 lsn_t checkpoint_lsn, lsn_t start_lsn, 142 lsn_t *contiguous_lsn, lsn_t *group_scanned_lsn); 143 144 /** Creates an IORequest object for decrypting redo log with 145 Encryption::decrypt_log() method. If the encryption_info parameter is 146 a null pointer, then encryption information is read from 147 "ib_logfile0". If the encryption_info parameter is not null, then it 148 should contain a copy of the encryption info stored in the header of 149 "ib_logfile0". 150 @param[in,out] encryption_request an IORequest object 151 @param[in] encryption_info a copy of the encryption info in 152 the header of "ib_logfile0", or a null pointer 153 @retval true if the call succeeded 154 @retval false otherwise */ 155 bool meb_read_log_encryption(IORequest &encryption_request, 156 byte *encryption_info = nullptr); 157 158 bool recv_check_log_header_checksum(const byte *buf); 159 /** Check the 4-byte checksum to the trailer checksum field of a log 160 block. 161 @param[in] block pointer to a log block 162 @return whether the checksum matches */ 163 bool log_block_checksum_is_ok(const byte *block); 164 #else /* UNIV_HOTBACKUP */ 165 166 /** Applies the hashed log records to the page, if the page lsn is less than the 167 lsn of a log record. This can be called when a buffer page has just been 168 read in, or also for a page already in the buffer pool. 169 170 TODO(fix Bug#31173032): Remove SUPPRESS_UBSAN_CLANG10. 171 172 @param[in] just_read_in true if the IO handler calls this for a freshly 173 read page 174 @param[in,out] block buffer block */ 175 void recv_recover_page_func(bool just_read_in, 176 buf_block_t *block) SUPPRESS_UBSAN_CLANG10; 177 178 /** Wrapper for recv_recover_page_func(). 179 Applies the hashed log records to the page, if the page lsn is less than the 180 lsn of a log record. This can be called when a buffer page has just been 181 read in, or also for a page already in the buffer pool. 182 @param jri in: TRUE if just read in (the i/o handler calls this for 183 a freshly read page) 184 @param[in,out] block buffer block */ 185 #define recv_recover_page(jri, block) recv_recover_page_func(jri, block) 186 187 /** Frees the recovery system. */ 188 void recv_sys_free(); 189 190 /** Reset the state of the recovery system variables. */ 191 void recv_sys_var_init(); 192 193 #endif /* UNIV_HOTBACKUP */ 194 195 #ifdef UNIV_HOTBACKUP 196 /** Get the number of bytes used by all the heaps 197 @return number of bytes used */ 198 size_t meb_heap_used(); 199 #endif /* UNIV_HOTBACKUP */ 200 201 /** Returns true if recovery is currently running. 202 @return recv_recovery_on */ 203 UNIV_INLINE 204 bool recv_recovery_is_on() MY_ATTRIBUTE((warn_unused_result)); 205 206 /** Start recovering from a redo log checkpoint. 207 @see recv_recovery_from_checkpoint_finish 208 @param[in,out] log redo log 209 @param[in] flush_lsn FIL_PAGE_FILE_FLUSH_LSN 210 of first system tablespace page 211 @param[in] to_lsn LSN to store recovery at 212 @return error code or DB_SUCCESS */ 213 dberr_t recv_recovery_from_checkpoint_start(log_t &log, lsn_t flush_lsn, 214 lsn_t to_lsn) 215 MY_ATTRIBUTE((warn_unused_result)); 216 217 /** Complete the recovery from the latest checkpoint. 218 @param[in,out] log redo log 219 @param[in] aborting true if the server has to abort due to an error 220 @return recovered persistent metadata or nullptr if aborting*/ 221 MetadataRecover *recv_recovery_from_checkpoint_finish(log_t &log, bool aborting) 222 MY_ATTRIBUTE((warn_unused_result)); 223 224 /** Creates the recovery system. */ 225 void recv_sys_create(); 226 227 /** Release recovery system mutexes. */ 228 void recv_sys_close(); 229 230 /** Inits the recovery system for a recovery operation. 231 @param[in] max_mem Available memory in bytes */ 232 void recv_sys_init(ulint max_mem); 233 234 /** Calculates the new value for lsn when more data is added to the log. 235 @param[in] lsn Old LSN 236 @param[in] len This many bytes of data is added, log block 237 headers not included 238 @return LSN after data addition */ 239 lsn_t recv_calc_lsn_on_data_add(lsn_t lsn, uint64_t len); 240 241 /** Empties the hash table of stored log records, applying them to appropriate 242 pages. 243 @param[in,out] log redo log 244 @param[in] allow_ibuf if true, ibuf operations are allowed during 245 the application; if false, no ibuf operations 246 are allowed, and after the application all 247 file pages are flushed to disk and invalidated 248 in buffer pool: this alternative means that 249 no new log records can be generated during 250 the application; the caller must in this case 251 own the log mutex */ 252 void recv_apply_hashed_log_recs(log_t &log, bool allow_ibuf); 253 254 #if defined(UNIV_DEBUG) || defined(UNIV_HOTBACKUP) 255 /** Return string name of the redo log record type. 256 @param[in] type record log record enum 257 @return string name of record log record */ 258 const char *get_mlog_string(mlog_id_t type); 259 #endif /* UNIV_DEBUG || UNIV_HOTBACKUP */ 260 261 /** Block of log record data */ 262 struct recv_data_t { 263 /** pointer to the next block or NULL. The log record data 264 is stored physically immediately after this struct, max amount 265 RECV_DATA_BLOCK_SIZE bytes of it */ 266 267 recv_data_t *next; 268 }; 269 270 /** Stored log record struct */ 271 struct recv_t { 272 using Node = UT_LIST_NODE_T(recv_t); 273 274 /** Log record type */ 275 mlog_id_t type; 276 277 /** Log record body length in bytes */ 278 ulint len; 279 280 /** Chain of blocks containing the log record body */ 281 recv_data_t *data; 282 283 /** Start lsn of the log segment written by the mtr which generated 284 this log record: NOTE that this is not necessarily the start lsn of 285 this log record */ 286 lsn_t start_lsn; 287 288 /** End lsn of the log segment written by the mtr which generated 289 this log record: NOTE that this is not necessarily the end LSN of 290 this log record */ 291 lsn_t end_lsn; 292 293 /** List node, list anchored in recv_addr_t */ 294 Node rec_list; 295 }; 296 297 /** States of recv_addr_t */ 298 enum recv_addr_state { 299 300 /** not yet processed */ 301 RECV_NOT_PROCESSED, 302 303 /** page is being read */ 304 RECV_BEING_READ, 305 306 /** log records are being applied on the page */ 307 RECV_BEING_PROCESSED, 308 309 /** log records have been applied on the page */ 310 RECV_PROCESSED, 311 312 /** log records have been discarded because the tablespace 313 does not exist */ 314 RECV_DISCARDED 315 }; 316 317 /** Hashed page file address struct */ 318 struct recv_addr_t { 319 using List = UT_LIST_BASE_NODE_T(recv_t); 320 321 /** recovery state of the page */ 322 recv_addr_state state; 323 324 /** Space ID */ 325 space_id_t space; 326 327 /** Page number */ 328 page_no_t page_no; 329 330 /** List of log records for this page */ 331 List rec_list; 332 }; 333 334 // Forward declaration 335 namespace dblwr { 336 namespace recv { 337 class DBLWR; 338 } 339 } // namespace dblwr 340 341 /** Class to parse persistent dynamic metadata redo log, store and 342 merge them and apply them to in-memory table objects finally */ 343 class MetadataRecover { 344 using PersistentTables = std::map< 345 table_id_t, PersistentTableMetadata *, std::less<table_id_t>, 346 ut_allocator<std::pair<const table_id_t, PersistentTableMetadata *>>>; 347 348 public: 349 /** Default constructor */ MetadataRecover()350 MetadataRecover() UNIV_NOTHROW {} 351 352 /** Destructor */ 353 ~MetadataRecover(); 354 355 /** Parse a dynamic metadata redo log of a table and store 356 the metadata locally 357 @param[in] id table id 358 @param[in] version table dynamic metadata version 359 @param[in] ptr redo log start 360 @param[in] end end of redo log 361 @retval ptr to next redo log record, NULL if this log record 362 was truncated */ 363 byte *parseMetadataLog(table_id_t id, uint64_t version, byte *ptr, byte *end); 364 365 /** Apply the collected persistent dynamic metadata to in-memory 366 table objects */ 367 void apply(); 368 369 /** Store the collected persistent dynamic metadata to 370 mysql.innodb_dynamic_metadata */ 371 void store(); 372 373 /** If there is any metadata to be applied 374 @return true if any metadata to be applied, otherwise false */ empty()375 bool empty() const { return (m_tables.empty()); } 376 377 private: 378 /** Get the dynamic metadata of a specified table, 379 create a new one if not exist 380 @param[in] id table id 381 @return the metadata of the specified table */ 382 PersistentTableMetadata *getMetadata(table_id_t id); 383 384 private: 385 /** Map used to store and merge persistent dynamic metadata */ 386 PersistentTables m_tables; 387 }; 388 389 /** Recovery system data structure */ 390 struct recv_sys_t { 391 using Pages = 392 std::unordered_map<page_no_t, recv_addr_t *, std::hash<page_no_t>, 393 std::equal_to<page_no_t>>; 394 395 /** Every space has its own heap and pages that belong to it. */ 396 struct Space { 397 /** Constructor 398 @param[in,out] heap Heap to use for the log records. */ Spacerecv_sys_t::Space399 explicit Space(mem_heap_t *heap) : m_heap(heap), m_pages() {} 400 401 /** Default constructor */ Spacerecv_sys_t::Space402 Space() : m_heap(), m_pages() {} 403 404 /** Memory heap of log records and file addresses */ 405 mem_heap_t *m_heap; 406 407 /** Pages that need to be recovered */ 408 Pages m_pages; 409 }; 410 411 using Missing_Ids = std::set<space_id_t>; 412 413 using Spaces = std::unordered_map<space_id_t, Space, std::hash<space_id_t>, 414 std::equal_to<space_id_t>>; 415 416 /* Recovery encryption information */ 417 struct Encryption_Key { 418 /** Tablespace ID */ 419 space_id_t space_id; 420 421 /** Encryption key */ 422 byte *ptr; 423 424 /** Encryption IV */ 425 byte *iv; 426 }; 427 428 using Encryption_Keys = std::vector<Encryption_Key>; 429 430 #ifndef UNIV_HOTBACKUP 431 432 /*!< mutex protecting the fields apply_log_recs, n_addrs, and the 433 state field in each recv_addr struct */ 434 ib_mutex_t mutex; 435 436 /** mutex coordinating flushing between recv_writer_thread and 437 the recovery thread. */ 438 ib_mutex_t writer_mutex; 439 440 /** event to activate page cleaner threads */ 441 os_event_t flush_start; 442 443 /** event to signal that the page cleaner has finished the request */ 444 os_event_t flush_end; 445 446 /** type of the flush request. BUF_FLUSH_LRU: flush end of LRU, 447 keeping free blocks. BUF_FLUSH_LIST: flush all of blocks. */ 448 buf_flush_t flush_type; 449 450 #else /* !UNIV_HOTBACKUP */ 451 bool apply_file_operations; 452 #endif /* !UNIV_HOTBACKUP */ 453 454 /** This is true when log rec application to pages is allowed; 455 this flag tells the i/o-handler if it should do log record 456 application */ 457 bool apply_log_recs; 458 459 /** This is true when a log rec application batch is running */ 460 bool apply_batch_on; 461 462 /** Possible incomplete last recovered log block */ 463 byte *last_block; 464 465 /** The nonaligned start address of the preceding buffer */ 466 byte *last_block_buf_start; 467 468 /** Buffer for parsing log records */ 469 byte *buf; 470 471 /** Size of the parsing buffer */ 472 size_t buf_len; 473 474 /** Amount of data in buf */ 475 ulint len; 476 477 /** This is the lsn from which we were able to start parsing 478 log records and adding them to the hash table; zero if a suitable 479 start point not found yet */ 480 lsn_t parse_start_lsn; 481 482 /** Checkpoint lsn that was used during recovery (read from file). */ 483 lsn_t checkpoint_lsn; 484 485 /** Number of data bytes to ignore until we reach checkpoint_lsn. */ 486 ulint bytes_to_ignore_before_checkpoint; 487 488 /** The log data has been scanned up to this lsn */ 489 lsn_t scanned_lsn; 490 491 /** The log data has been scanned up to this checkpoint 492 number (lowest 4 bytes) */ 493 ulint scanned_checkpoint_no; 494 495 /** Start offset of non-parsed log records in buf */ 496 ulint recovered_offset; 497 498 /** The log records have been parsed up to this lsn */ 499 lsn_t recovered_lsn; 500 501 /** The previous value of recovered_lsn - before we parsed the last mtr. 502 It is equal to recovered_lsn before we parsed any mtr. This is used to 503 find moments in which recovered_lsn moves to the next block in which case 504 we should update the last_block_first_rec_group (described below). */ 505 lsn_t previous_recovered_lsn; 506 507 /** Tracks what should be the proper value of first_rec_group field in the 508 header of the block to which recovered_lsn belongs. It might be also zero, 509 in which case it means we do not know. */ 510 uint32_t last_block_first_rec_group; 511 512 /** Set when finding a corrupt log block or record, or there 513 is a log parsing buffer overflow */ 514 bool found_corrupt_log; 515 516 /** Set when an inconsistency with the file system contents 517 is detected during log scan or apply */ 518 bool found_corrupt_fs; 519 520 /** If the recovery is from a cloned database. */ 521 bool is_cloned_db; 522 523 /** Recovering from MEB. */ 524 bool is_meb_recovery; 525 526 /** Doublewrite buffer state before MEB recovery starts. We restore to this 527 state after MEB recovery completes and disable the doublewrite buffer during 528 MEB recovery. */ 529 bool dblwr_state; 530 531 /** Hash table of pages, indexed by SpaceID. */ 532 Spaces *spaces; 533 534 /** Number of not processed hashed file addresses in the hash table */ 535 ulint n_addrs; 536 537 /** Doublewrite buffer pages, destroyed after recovery completes */ 538 dblwr::recv::DBLWR *dblwr; 539 540 /** We store and merge all table persistent data here during 541 scanning redo logs */ 542 MetadataRecover *metadata_recover; 543 544 /** Encryption Key information per tablespace ID */ 545 Encryption_Keys *keys; 546 547 /** Tablespace IDs that were ignored during redo log apply. */ 548 Missing_Ids missing_ids; 549 550 /** Tablespace IDs that were explicitly deleted. */ 551 Missing_Ids deleted; 552 }; 553 554 /** The recovery system */ 555 extern recv_sys_t *recv_sys; 556 557 /** TRUE when applying redo log records during crash recovery; FALSE 558 otherwise. Note that this is FALSE while a background thread is 559 rolling back incomplete transactions. */ 560 extern volatile bool recv_recovery_on; 561 562 /** If the following is TRUE, the buffer pool file pages must be invalidated 563 after recovery and no ibuf operations are allowed; this becomes TRUE if 564 the log record hash table becomes too full, and log records must be merged 565 to file pages already before the recovery is finished: in this case no 566 ibuf operations are allowed, as they could modify the pages read in the 567 buffer pool before the pages have been recovered to the up-to-date state. 568 569 TRUE means that recovery is running and no operations on the log files 570 are allowed yet: the variable name is misleading. */ 571 extern bool recv_no_ibuf_operations; 572 573 /** TRUE when recv_init_crash_recovery() has been called. */ 574 extern bool recv_needed_recovery; 575 576 /** TRUE if buf_page_is_corrupted() should check if the log sequence 577 number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by 578 recv_recovery_from_checkpoint_start(). */ 579 extern bool recv_lsn_checks_on; 580 581 /** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many 582 times! */ 583 #define RECV_PARSING_BUF_SIZE (2 * 1024 * 1024) 584 585 /** Size of block reads when the log groups are scanned forward to do a 586 roll-forward */ 587 #define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE) 588 589 /** This many frames must be left free in the buffer pool when we scan 590 the log and store the scanned log records in the buffer pool: we will 591 use these free frames to read in pages when we start applying the 592 log records to the database. */ 593 extern ulint recv_n_pool_free_frames; 594 595 /** A list of tablespaces for which (un)encryption process was not 596 completed before crash. */ 597 extern std::list<space_id_t> recv_encr_ts_list; 598 599 /** Check the 4-byte checksum to the trailer checksum field of a log 600 block. 601 @param[in] block pointer to a log block 602 @return whether the checksum matches */ 603 bool log_block_checksum_is_ok(const byte *block); 604 605 /** Find the latest checkpoint in the log header. 606 @param[in,out] log redo log 607 @param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 608 @return error code or DB_SUCCESS */ 609 MY_ATTRIBUTE((warn_unused_result)) 610 dberr_t recv_find_max_checkpoint(log_t &log, ulint *max_field); 611 612 /** Reads a specified log segment to a buffer. 613 @param[in,out] log redo log 614 @param[in,out] buf buffer where to read 615 @param[in] start_lsn read area start 616 @param[in] end_lsn read area end */ 617 void recv_read_log_seg(log_t &log, byte *buf, lsn_t start_lsn, lsn_t end_lsn); 618 619 /** Adds data from a new log block to the parsing buffer of recv_sys if 620 recv_sys->parse_start_lsn is non-zero. 621 @param[in] log_block log block 622 @param[in] scanned_lsn lsn of how far we were able 623 to find data in this log block 624 @param[in] len 0 if full block or length of the data to add 625 @return true if more data added */ 626 bool recv_sys_add_to_parsing_buf(const byte *log_block, lsn_t scanned_lsn, 627 ulint len); 628 629 /** Moves the parsing buffer data left to the buffer start. */ 630 void recv_reset_buffer(); 631 632 /** Resize the recovery parsing buffer upto log_buffer_size */ 633 bool recv_sys_resize_buf(); 634 635 /** Parse log records from a buffer and optionally store them to a 636 hash table to wait merging to file pages. 637 @param[in] checkpoint_lsn the LSN of the latest checkpoint */ 638 void recv_parse_log_recs(lsn_t checkpoint_lsn); 639 640 #include "log0recv.ic" 641 642 #endif 643