1 /*****************************************************************************
2 
3 Copyright (c) 1997, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file include/log0recv.h
28  Recovery
29 
30  Created 9/20/1997 Heikki Tuuri
31  *******************************************************/
32 
33 #ifndef log0recv_h
34 #define log0recv_h
35 
36 #include "buf0types.h"
37 #include "dict0types.h"
38 #include "hash0hash.h"
39 #include "log0types.h"
40 #include "mtr0types.h"
41 #include "os0file.h" /* OS_FILE_LOG_BLOCK_SIZE */
42 #include "univ.i"
43 #include "ut0byte.h"
44 #include "ut0new.h"
45 
46 #include <list>
47 #include <set>
48 #include <unordered_map>
49 
50 class MetadataRecover;
51 class PersistentTableMetadata;
52 
53 struct recv_addr_t;
54 
55 /** list of tablespaces, that experienced an inplace DDL during a backup op */
56 extern std::list<std::pair<space_id_t, lsn_t>> index_load_list;
57 /** the last redo log flush len as seen by MEB */
58 extern volatile lsn_t backup_redo_log_flushed_lsn;
59 /** TRUE when the redo log is being backed up */
60 extern bool recv_is_making_a_backup;
61 
62 #ifdef UNIV_HOTBACKUP
63 
64 /** Scans the log segment and n_bytes_scanned is set to the length of valid
65 log scanned.
66 @param[in]	buf			buffer containing log data
67 @param[in]	buf_len			data length in that buffer
68 @param[in,out]	scanned_lsn		lsn of buffer start, we return scanned
69 lsn
70 @param[in,out]	scanned_checkpoint_no	4 lowest bytes of the highest scanned
71 @param[out]	block_no	highest block no in scanned buffer.
72 checkpoint number so far
73 @param[out]	n_bytes_scanned		how much we were able to scan, smaller
74 than buf_len if log data ended here
75 @param[out]	has_encrypted_log	set true, if buffer contains encrypted
76 redo log, set false otherwise */
77 void meb_scan_log_seg(byte *buf, ulint buf_len, lsn_t *scanned_lsn,
78                       uint32_t *scanned_checkpoint_no, uint32_t *block_no,
79                       ulint *n_bytes_scanned, bool *has_encrypted_log);
80 
81 /** Applies the hashed log records to the page, if the page lsn is less than the
82 lsn of a log record. This can be called when a buffer page has just been
83 read in, or also for a page already in the buffer pool.
84 
85 TODO(Bug#31173032): Remove SUPPRESS_UBSAN_CLANG10.
86 
87 @param[in,out]	block		buffer block */
88 void recv_recover_page_func(buf_block_t *block) SUPPRESS_UBSAN_CLANG10;
89 
90 /** Wrapper for recv_recover_page_func().
91 Applies the hashed log records to the page, if the page lsn is less than the
92 lsn of a log record. This can be called when a buffer page has just been
93 read in, or also for a page already in the buffer pool.
94 @param jri in: TRUE if just read in (the i/o handler calls this for
95 a freshly read page)
96 @param block in,out: the buffer block
97 */
98 #define recv_recover_page(jri, block) recv_recover_page_func(block)
99 
100 /** Applies log records in the hash table to a backup. */
101 void meb_apply_log_recs(void);
102 
103 /** Applies log records in the hash table to a backup using a callback
104 functions.
105 @param[in]	apply_log_record_function  function for apply
106 @param[in]	wait_till_done_function    function for wait */
107 void meb_apply_log_recs_via_callback(
108     void (*apply_log_record_function)(recv_addr_t *),
109     void (*wait_till_done_function)());
110 
111 /** Applies a log record in the hash table to a backup.
112 @param[in]	recv_addr	chain of log records
113 @param[in,out]	block		buffer block to apply the records to */
114 void meb_apply_log_record(recv_addr_t *recv_addr, buf_block_t *block);
115 
116 /** Process a file name passed as an input
117 @param[in]	name		absolute path of tablespace file
118 @param[in]	space_id	the tablespace ID
119 @retval		true		if able to process file successfully.
120 @retval		false		if unable to process the file */
121 void meb_fil_name_process(const char *name, space_id_t space_id);
122 
123 /** Scans log from a buffer and stores new log data to the parsing buffer.
124 Parses and hashes the log records if new data found.  Unless
125 UNIV_HOTBACKUP is defined, this function will apply log records
126 automatically when the hash table becomes full.
127 @param[in]	available_memory	we let the hash table of recs
128 to grow to this size, at the maximum
129 @param[in]	buf			buffer containing a log
130 segment or garbage
131 @param[in]	len			buffer length
132 @param[in]	checkpoint_lsn		latest checkpoint LSN
133 @param[in]	start_lsn		buffer start lsn
134 @param[in]	contiguous_lsn		it is known that all log
135 groups contain contiguous log data up to this lsn
136 @param[out]	group_scanned_lsn	scanning succeeded up to this lsn
137 @retval	true	if limit_lsn has been reached, or not able to scan any
138 more in this log group
139 @retval	false	otherwise */
140 bool meb_scan_log_recs(ulint available_memory, const byte *buf, ulint len,
141                        lsn_t checkpoint_lsn, lsn_t start_lsn,
142                        lsn_t *contiguous_lsn, lsn_t *group_scanned_lsn);
143 
144 /** Creates an IORequest object for decrypting redo log with
145 Encryption::decrypt_log() method. If the encryption_info parameter is
146 a null pointer, then encryption information is read from
147 "ib_logfile0". If the encryption_info parameter is not null, then it
148 should contain a copy of the encryption info stored in the header of
149 "ib_logfile0".
150 @param[in,out]	encryption_request      an IORequest object
151 @param[in]	encryption_info         a copy of the encryption info in
152 the header of "ib_logfile0", or a null pointer
153 @retval	true	if the call succeeded
154 @retval	false	otherwise */
155 bool meb_read_log_encryption(IORequest &encryption_request,
156                              byte *encryption_info = nullptr);
157 
158 bool recv_check_log_header_checksum(const byte *buf);
159 /** Check the 4-byte checksum to the trailer checksum field of a log
160 block.
161 @param[in]	block	pointer to a log block
162 @return whether the checksum matches */
163 bool log_block_checksum_is_ok(const byte *block);
164 #else /* UNIV_HOTBACKUP */
165 
166 /** Applies the hashed log records to the page, if the page lsn is less than the
167 lsn of a log record. This can be called when a buffer page has just been
168 read in, or also for a page already in the buffer pool.
169 
170 TODO(fix Bug#31173032): Remove SUPPRESS_UBSAN_CLANG10.
171 
172 @param[in]	just_read_in	true if the IO handler calls this for a freshly
173                                 read page
174 @param[in,out]	block		buffer block */
175 void recv_recover_page_func(bool just_read_in,
176                             buf_block_t *block) SUPPRESS_UBSAN_CLANG10;
177 
178 /** Wrapper for recv_recover_page_func().
179 Applies the hashed log records to the page, if the page lsn is less than the
180 lsn of a log record. This can be called when a buffer page has just been
181 read in, or also for a page already in the buffer pool.
182 @param jri in: TRUE if just read in (the i/o handler calls this for
183 a freshly read page)
184 @param[in,out]	block	buffer block */
185 #define recv_recover_page(jri, block) recv_recover_page_func(jri, block)
186 
187 /** Frees the recovery system. */
188 void recv_sys_free();
189 
190 /** Reset the state of the recovery system variables. */
191 void recv_sys_var_init();
192 
193 #endif /* UNIV_HOTBACKUP */
194 
195 #ifdef UNIV_HOTBACKUP
196 /** Get the number of bytes used by all the heaps
197 @return number of bytes used */
198 size_t meb_heap_used();
199 #endif /* UNIV_HOTBACKUP */
200 
201 /** Returns true if recovery is currently running.
202 @return recv_recovery_on */
203 UNIV_INLINE
204 bool recv_recovery_is_on() MY_ATTRIBUTE((warn_unused_result));
205 
206 /** Start recovering from a redo log checkpoint.
207 @see recv_recovery_from_checkpoint_finish
208 @param[in,out]  log   redo log
209 @param[in]  flush_lsn FIL_PAGE_FILE_FLUSH_LSN
210                                 of first system tablespace page
211 @param[in]  to_lsn    LSN to store recovery at
212 @return error code or DB_SUCCESS */
213 dberr_t recv_recovery_from_checkpoint_start(log_t &log, lsn_t flush_lsn,
214                                             lsn_t to_lsn)
215     MY_ATTRIBUTE((warn_unused_result));
216 
217 /** Complete the recovery from the latest checkpoint.
218 @param[in,out]	log		redo log
219 @param[in]	aborting	true if the server has to abort due to an error
220 @return recovered persistent metadata or nullptr if aborting*/
221 MetadataRecover *recv_recovery_from_checkpoint_finish(log_t &log, bool aborting)
222     MY_ATTRIBUTE((warn_unused_result));
223 
224 /** Creates the recovery system. */
225 void recv_sys_create();
226 
227 /** Release recovery system mutexes. */
228 void recv_sys_close();
229 
230 /** Inits the recovery system for a recovery operation.
231 @param[in]	max_mem		Available memory in bytes */
232 void recv_sys_init(ulint max_mem);
233 
234 /** Calculates the new value for lsn when more data is added to the log.
235 @param[in]	lsn		Old LSN
236 @param[in]	len		This many bytes of data is added, log block
237                                 headers not included
238 @return LSN after data addition */
239 lsn_t recv_calc_lsn_on_data_add(lsn_t lsn, uint64_t len);
240 
241 /** Empties the hash table of stored log records, applying them to appropriate
242 pages.
243 @param[in,out]	log		redo log
244 @param[in]	allow_ibuf	if true, ibuf operations are allowed during
245                                 the application; if false, no ibuf operations
246                                 are allowed, and after the application all
247                                 file pages are flushed to disk and invalidated
248                                 in buffer pool: this alternative means that
249                                 no new log records can be generated during
250                                 the application; the caller must in this case
251                                 own the log mutex */
252 void recv_apply_hashed_log_recs(log_t &log, bool allow_ibuf);
253 
254 #if defined(UNIV_DEBUG) || defined(UNIV_HOTBACKUP)
255 /** Return string name of the redo log record type.
256 @param[in]	type	record log record enum
257 @return string name of record log record */
258 const char *get_mlog_string(mlog_id_t type);
259 #endif /* UNIV_DEBUG || UNIV_HOTBACKUP */
260 
261 /** Block of log record data */
262 struct recv_data_t {
263   /** pointer to the next block or NULL.  The log record data
264   is stored physically immediately after this struct, max amount
265   RECV_DATA_BLOCK_SIZE bytes of it */
266 
267   recv_data_t *next;
268 };
269 
270 /** Stored log record struct */
271 struct recv_t {
272   using Node = UT_LIST_NODE_T(recv_t);
273 
274   /** Log record type */
275   mlog_id_t type;
276 
277   /** Log record body length in bytes */
278   ulint len;
279 
280   /** Chain of blocks containing the log record body */
281   recv_data_t *data;
282 
283   /** Start lsn of the log segment written by the mtr which generated
284   this log record: NOTE that this is not necessarily the start lsn of
285   this log record */
286   lsn_t start_lsn;
287 
288   /** End lsn of the log segment written by the mtr which generated
289   this log record: NOTE that this is not necessarily the end LSN of
290   this log record */
291   lsn_t end_lsn;
292 
293   /** List node, list anchored in recv_addr_t */
294   Node rec_list;
295 };
296 
297 /** States of recv_addr_t */
298 enum recv_addr_state {
299 
300   /** not yet processed */
301   RECV_NOT_PROCESSED,
302 
303   /** page is being read */
304   RECV_BEING_READ,
305 
306   /** log records are being applied on the page */
307   RECV_BEING_PROCESSED,
308 
309   /** log records have been applied on the page */
310   RECV_PROCESSED,
311 
312   /** log records have been discarded because the tablespace
313   does not exist */
314   RECV_DISCARDED
315 };
316 
317 /** Hashed page file address struct */
318 struct recv_addr_t {
319   using List = UT_LIST_BASE_NODE_T(recv_t);
320 
321   /** recovery state of the page */
322   recv_addr_state state;
323 
324   /** Space ID */
325   space_id_t space;
326 
327   /** Page number */
328   page_no_t page_no;
329 
330   /** List of log records for this page */
331   List rec_list;
332 };
333 
334 // Forward declaration
335 namespace dblwr {
336 namespace recv {
337 class DBLWR;
338 }
339 }  // namespace dblwr
340 
341 /** Class to parse persistent dynamic metadata redo log, store and
342 merge them and apply them to in-memory table objects finally */
343 class MetadataRecover {
344   using PersistentTables = std::map<
345       table_id_t, PersistentTableMetadata *, std::less<table_id_t>,
346       ut_allocator<std::pair<const table_id_t, PersistentTableMetadata *>>>;
347 
348  public:
349   /** Default constructor */
MetadataRecover()350   MetadataRecover() UNIV_NOTHROW {}
351 
352   /** Destructor */
353   ~MetadataRecover();
354 
355   /** Parse a dynamic metadata redo log of a table and store
356   the metadata locally
357   @param[in]	id		table id
358   @param[in]	version		table dynamic metadata version
359   @param[in]	ptr		redo log start
360   @param[in]	end		end of redo log
361   @retval ptr to next redo log record, NULL if this log record
362   was truncated */
363   byte *parseMetadataLog(table_id_t id, uint64_t version, byte *ptr, byte *end);
364 
365   /** Apply the collected persistent dynamic metadata to in-memory
366   table objects */
367   void apply();
368 
369   /** Store the collected persistent dynamic metadata to
370   mysql.innodb_dynamic_metadata */
371   void store();
372 
373   /** If there is any metadata to be applied
374   @return	true if any metadata to be applied, otherwise false */
empty()375   bool empty() const { return (m_tables.empty()); }
376 
377  private:
378   /** Get the dynamic metadata of a specified table,
379   create a new one if not exist
380   @param[in]	id	table id
381   @return the metadata of the specified table */
382   PersistentTableMetadata *getMetadata(table_id_t id);
383 
384  private:
385   /** Map used to store and merge persistent dynamic metadata */
386   PersistentTables m_tables;
387 };
388 
389 /** Recovery system data structure */
390 struct recv_sys_t {
391   using Pages =
392       std::unordered_map<page_no_t, recv_addr_t *, std::hash<page_no_t>,
393                          std::equal_to<page_no_t>>;
394 
395   /** Every space has its own heap and pages that belong to it. */
396   struct Space {
397     /** Constructor
398     @param[in,out]	heap	Heap to use for the log records. */
Spacerecv_sys_t::Space399     explicit Space(mem_heap_t *heap) : m_heap(heap), m_pages() {}
400 
401     /** Default constructor */
Spacerecv_sys_t::Space402     Space() : m_heap(), m_pages() {}
403 
404     /** Memory heap of log records and file addresses */
405     mem_heap_t *m_heap;
406 
407     /** Pages that need to be recovered */
408     Pages m_pages;
409   };
410 
411   using Missing_Ids = std::set<space_id_t>;
412 
413   using Spaces = std::unordered_map<space_id_t, Space, std::hash<space_id_t>,
414                                     std::equal_to<space_id_t>>;
415 
416   /* Recovery encryption information */
417   struct Encryption_Key {
418     /** Tablespace ID */
419     space_id_t space_id;
420 
421     /** Encryption key */
422     byte *ptr;
423 
424     /** Encryption IV */
425     byte *iv;
426   };
427 
428   using Encryption_Keys = std::vector<Encryption_Key>;
429 
430 #ifndef UNIV_HOTBACKUP
431 
432   /*!< mutex protecting the fields apply_log_recs, n_addrs, and the
433   state field in each recv_addr struct */
434   ib_mutex_t mutex;
435 
436   /** mutex coordinating flushing between recv_writer_thread and
437   the recovery thread. */
438   ib_mutex_t writer_mutex;
439 
440   /** event to activate page cleaner threads */
441   os_event_t flush_start;
442 
443   /** event to signal that the page cleaner has finished the request */
444   os_event_t flush_end;
445 
446   /** type of the flush request. BUF_FLUSH_LRU: flush end of LRU,
447   keeping free blocks.  BUF_FLUSH_LIST: flush all of blocks. */
448   buf_flush_t flush_type;
449 
450 #else  /* !UNIV_HOTBACKUP */
451   bool apply_file_operations;
452 #endif /* !UNIV_HOTBACKUP */
453 
454   /** This is true when log rec application to pages is allowed;
455   this flag tells the i/o-handler if it should do log record
456   application */
457   bool apply_log_recs;
458 
459   /** This is true when a log rec application batch is running */
460   bool apply_batch_on;
461 
462   /** Possible incomplete last recovered log block */
463   byte *last_block;
464 
465   /** The nonaligned start address of the preceding buffer */
466   byte *last_block_buf_start;
467 
468   /** Buffer for parsing log records */
469   byte *buf;
470 
471   /** Size of the parsing buffer */
472   size_t buf_len;
473 
474   /** Amount of data in buf */
475   ulint len;
476 
477   /** This is the lsn from which we were able to start parsing
478   log records and adding them to the hash table; zero if a suitable
479   start point not found yet */
480   lsn_t parse_start_lsn;
481 
482   /** Checkpoint lsn that was used during recovery (read from file). */
483   lsn_t checkpoint_lsn;
484 
485   /** Number of data bytes to ignore until we reach checkpoint_lsn. */
486   ulint bytes_to_ignore_before_checkpoint;
487 
488   /** The log data has been scanned up to this lsn */
489   lsn_t scanned_lsn;
490 
491   /** The log data has been scanned up to this checkpoint
492   number (lowest 4 bytes) */
493   ulint scanned_checkpoint_no;
494 
495   /** Start offset of non-parsed log records in buf */
496   ulint recovered_offset;
497 
498   /** The log records have been parsed up to this lsn */
499   lsn_t recovered_lsn;
500 
501   /** The previous value of recovered_lsn - before we parsed the last mtr.
502   It is equal to recovered_lsn before we parsed any mtr. This is used to
503   find moments in which recovered_lsn moves to the next block in which case
504   we should update the last_block_first_rec_group (described below). */
505   lsn_t previous_recovered_lsn;
506 
507   /** Tracks what should be the proper value of first_rec_group field in the
508   header of the block to which recovered_lsn belongs. It might be also zero,
509   in which case it means we do not know. */
510   uint32_t last_block_first_rec_group;
511 
512   /** Set when finding a corrupt log block or record, or there
513   is a log parsing buffer overflow */
514   bool found_corrupt_log;
515 
516   /** Set when an inconsistency with the file system contents
517   is detected during log scan or apply */
518   bool found_corrupt_fs;
519 
520   /** If the recovery is from a cloned database. */
521   bool is_cloned_db;
522 
523   /** Recovering from MEB. */
524   bool is_meb_recovery;
525 
526   /** Doublewrite buffer state before MEB recovery starts. We restore to this
527   state after MEB recovery completes and disable the doublewrite buffer during
528   MEB recovery. */
529   bool dblwr_state;
530 
531   /** Hash table of pages, indexed by SpaceID. */
532   Spaces *spaces;
533 
534   /** Number of not processed hashed file addresses in the hash table */
535   ulint n_addrs;
536 
537   /** Doublewrite buffer pages, destroyed after recovery completes */
538   dblwr::recv::DBLWR *dblwr;
539 
540   /** We store and merge all table persistent data here during
541   scanning redo logs */
542   MetadataRecover *metadata_recover;
543 
544   /** Encryption Key information per tablespace ID */
545   Encryption_Keys *keys;
546 
547   /** Tablespace IDs that were ignored during redo log apply. */
548   Missing_Ids missing_ids;
549 
550   /** Tablespace IDs that were explicitly deleted. */
551   Missing_Ids deleted;
552 };
553 
554 /** The recovery system */
555 extern recv_sys_t *recv_sys;
556 
557 /** TRUE when applying redo log records during crash recovery; FALSE
558 otherwise.  Note that this is FALSE while a background thread is
559 rolling back incomplete transactions. */
560 extern volatile bool recv_recovery_on;
561 
562 /** If the following is TRUE, the buffer pool file pages must be invalidated
563 after recovery and no ibuf operations are allowed; this becomes TRUE if
564 the log record hash table becomes too full, and log records must be merged
565 to file pages already before the recovery is finished: in this case no
566 ibuf operations are allowed, as they could modify the pages read in the
567 buffer pool before the pages have been recovered to the up-to-date state.
568 
569 TRUE means that recovery is running and no operations on the log files
570 are allowed yet: the variable name is misleading. */
571 extern bool recv_no_ibuf_operations;
572 
573 /** TRUE when recv_init_crash_recovery() has been called. */
574 extern bool recv_needed_recovery;
575 
576 /** TRUE if buf_page_is_corrupted() should check if the log sequence
577 number (FIL_PAGE_LSN) is in the future.  Initially FALSE, and set by
578 recv_recovery_from_checkpoint_start(). */
579 extern bool recv_lsn_checks_on;
580 
581 /** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
582 times! */
583 #define RECV_PARSING_BUF_SIZE (2 * 1024 * 1024)
584 
585 /** Size of block reads when the log groups are scanned forward to do a
586 roll-forward */
587 #define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
588 
589 /** This many frames must be left free in the buffer pool when we scan
590 the log and store the scanned log records in the buffer pool: we will
591 use these free frames to read in pages when we start applying the
592 log records to the database. */
593 extern ulint recv_n_pool_free_frames;
594 
595 /** A list of tablespaces for which (un)encryption process was not
596 completed before crash. */
597 extern std::list<space_id_t> recv_encr_ts_list;
598 
599 /** Check the 4-byte checksum to the trailer checksum field of a log
600 block.
601 @param[in]  block pointer to a log block
602 @return whether the checksum matches */
603 bool log_block_checksum_is_ok(const byte *block);
604 
605 /** Find the latest checkpoint in the log header.
606 @param[in,out]  log   redo log
607 @param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
608 @return error code or DB_SUCCESS */
609 MY_ATTRIBUTE((warn_unused_result))
610 dberr_t recv_find_max_checkpoint(log_t &log, ulint *max_field);
611 
612 /** Reads a specified log segment to a buffer.
613 @param[in,out]  log   redo log
614 @param[in,out]  buf   buffer where to read
615 @param[in]  start_lsn read area start
616 @param[in]  end_lsn   read area end */
617 void recv_read_log_seg(log_t &log, byte *buf, lsn_t start_lsn, lsn_t end_lsn);
618 
619 /** Adds data from a new log block to the parsing buffer of recv_sys if
620 recv_sys->parse_start_lsn is non-zero.
621 @param[in]  log_block   log block
622 @param[in]  scanned_lsn  lsn of how far we were able
623                          to find data in this log block
624 @param[in]  len          0 if full block or length of the data to add
625 @return true if more data added */
626 bool recv_sys_add_to_parsing_buf(const byte *log_block, lsn_t scanned_lsn,
627                                  ulint len);
628 
629 /** Moves the parsing buffer data left to the buffer start. */
630 void recv_reset_buffer();
631 
632 /** Resize the recovery parsing buffer upto log_buffer_size */
633 bool recv_sys_resize_buf();
634 
635 /** Parse log records from a buffer and optionally store them to a
636 hash table to wait merging to file pages.
637 @param[in]  checkpoint_lsn  the LSN of the latest checkpoint */
638 void recv_parse_log_recs(lsn_t checkpoint_lsn);
639 
640 #include "log0recv.ic"
641 
642 #endif
643