1 /*****************************************************************************
2
3 Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2017, 2020, MariaDB Corporation.
5
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17
18 *****************************************************************************/
19
20 /**************************************************//**
21 @file include/log0recv.h
22 Recovery
23
24 Created 9/20/1997 Heikki Tuuri
25 *******************************************************/
26
27 #ifndef log0recv_h
28 #define log0recv_h
29
30 #include "ut0byte.h"
31 #include "buf0types.h"
32 #include "hash0hash.h"
33 #include "log0log.h"
34 #include "mtr0types.h"
35
36 #include <deque>
37
38 /** Is recv_writer_thread active? */
39 extern bool recv_writer_thread_active;
40
41 /** @return whether recovery is currently running. */
42 #define recv_recovery_is_on() UNIV_UNLIKELY(recv_recovery_on)
43
44 /** Find the latest checkpoint in the log header.
45 @param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
46 @return error code or DB_SUCCESS */
47 dberr_t
48 recv_find_max_checkpoint(ulint* max_field)
49 MY_ATTRIBUTE((nonnull, warn_unused_result));
50
51 /** Reduces recv_sys.n_addrs for the corrupted page.
52 This function should called when srv_force_recovery > 0.
53 @param[in] page_id page id of the corrupted page */
54 void recv_recover_corrupt_page(page_id_t page_id);
55
56 /** Apply any buffered redo log to a page that was just read from a data file.
57 @param[in,out] bpage buffer pool page */
58 ATTRIBUTE_COLD void recv_recover_page(buf_page_t* bpage);
59
60 /** Start recovering from a redo log checkpoint.
61 @see recv_recovery_from_checkpoint_finish
62 @param[in] flush_lsn FIL_PAGE_FILE_FLUSH_LSN
63 of first system tablespace page
64 @return error code or DB_SUCCESS */
65 dberr_t
66 recv_recovery_from_checkpoint_start(
67 lsn_t flush_lsn);
68 /** Complete recovery from a checkpoint. */
69 void
70 recv_recovery_from_checkpoint_finish(void);
71 /********************************************************//**
72 Initiates the rollback of active transactions. */
73 void
74 recv_recovery_rollback_active(void);
75 /*===============================*/
76
77 /********************************************************//**
78 Reset the state of the recovery system variables. */
79 void
80 recv_sys_var_init(void);
81 /*===================*/
82
83 /** Apply the hash table of stored log records to persistent data pages.
84 @param[in] last_batch whether the change buffer merge will be
85 performed as part of the operation */
86 void
87 recv_apply_hashed_log_recs(bool last_batch);
88
89 /** Whether to store redo log records to the hash table */
90 enum store_t {
91 /** Do not store redo log records. */
92 STORE_NO,
93 /** Store redo log records. */
94 STORE_YES,
95 /** Store redo log records if the tablespace exists. */
96 STORE_IF_EXISTS
97 };
98
99
100 /** Adds data from a new log block to the parsing buffer of recv_sys if
101 recv_sys.parse_start_lsn is non-zero.
102 @param[in] log_block log block to add
103 @param[in] scanned_lsn lsn of how far we were able to find
104 data in this log block
105 @return true if more data added */
106 bool recv_sys_add_to_parsing_buf(const byte* log_block, lsn_t scanned_lsn);
107
108 /** Parse log records from a buffer and optionally store them to a
109 hash table to wait merging to file pages.
110 @param[in] checkpoint_lsn the LSN of the latest checkpoint
111 @param[in] store whether to store page operations
112 @param[in] available_memory memory to read the redo logs
113 @param[in] apply whether to apply the records
114 @return whether MLOG_CHECKPOINT record was seen the first time,
115 or corruption was noticed */
116 bool recv_parse_log_recs(
117 lsn_t checkpoint_lsn,
118 store_t* store,
119 ulint available_memory,
120 bool apply);
121
122 /** Moves the parsing buffer data left to the buffer start */
123 void recv_sys_justify_left_parsing_buf();
124
125 /** Report optimized DDL operation (without redo log),
126 corresponding to MLOG_INDEX_LOAD.
127 @param[in] space_id tablespace identifier
128 */
129 extern void (*log_optimized_ddl_op)(ulint space_id);
130
131 /** Report an operation to create, delete, or rename a file during backup.
132 @param[in] space_id tablespace identifier
133 @param[in] flags tablespace flags (NULL if not create)
134 @param[in] name file name (not NUL-terminated)
135 @param[in] len length of name, in bytes
136 @param[in] new_name new file name (NULL if not rename)
137 @param[in] new_len length of new_name, in bytes (0 if NULL) */
138 extern void (*log_file_op)(ulint space_id, const byte* flags,
139 const byte* name, ulint len,
140 const byte* new_name, ulint new_len);
141
142 /** Block of log record data */
143 struct recv_data_t{
144 recv_data_t* next; /*!< pointer to the next block or NULL */
145 /*!< the log record data is stored physically
146 immediately after this struct, max amount
147 RECV_DATA_BLOCK_SIZE bytes of it */
148 };
149
150 /** Stored log record struct */
151 struct recv_t{
152 mlog_id_t type; /*!< log record type */
153 ulint len; /*!< log record body length in bytes */
154 recv_data_t* data; /*!< chain of blocks containing the log record
155 body */
156 lsn_t start_lsn;/*!< start lsn of the log segment written by
157 the mtr which generated this log record: NOTE
158 that this is not necessarily the start lsn of
159 this log record */
160 lsn_t end_lsn;/*!< end lsn of the log segment written by
161 the mtr which generated this log record: NOTE
162 that this is not necessarily the end lsn of
163 this log record */
164 UT_LIST_NODE_T(recv_t)
165 rec_list;/*!< list of log records for this page */
166 };
167
168 struct recv_dblwr_t
169 {
170 /** Add a page frame to the doublewrite recovery buffer. */
addrecv_dblwr_t171 void add(byte *page) { pages.push_front(page); }
172
173 /** Validate the page.
174 @param page_id page identifier
175 @param page page contents
176 @param space the tablespace of the page (not available for page 0)
177 @param tmp_buf 2*srv_page_size for decrypting and decompressing any
178 page_compressed or encrypted pages
179 @return whether the page is valid */
180 bool validate_page(const page_id_t page_id, const byte *page,
181 const fil_space_t *space, byte *tmp_buf);
182
183 /** Find a doublewrite copy of a page.
184 @param page_id page identifier
185 @param space tablespace (not available for page_id.page_no()==0)
186 @param tmp_buf 2*srv_page_size for decrypting and decompressing any
187 page_compressed or encrypted pages
188 @return page frame
189 @retval NULL if no valid page for page_id was found */
190 byte* find_page(const page_id_t page_id, const fil_space_t *space= NULL,
191 byte *tmp_buf= NULL);
192
193 typedef std::deque<byte*, ut_allocator<byte*> > list;
194
195 /** Recovered doublewrite buffer page frames */
196 list pages;
197 };
198
199 /** Recovery system data structure */
200 struct recv_sys_t{
201 ib_mutex_t mutex; /*!< mutex protecting the fields apply_log_recs,
202 n_addrs, and the state field in each recv_addr
203 struct */
204 ib_mutex_t writer_mutex;/*!< mutex coordinating
205 flushing between recv_writer_thread and
206 the recovery thread. */
207 os_event_t flush_start;/*!< event to activate
208 page cleaner threads */
209 os_event_t flush_end;/*!< event to signal that the page
210 cleaner has finished the request */
211 buf_flush_t flush_type;/*!< type of the flush request.
212 BUF_FLUSH_LRU: flush end of LRU, keeping free blocks.
213 BUF_FLUSH_LIST: flush all of blocks. */
214 /** whether recv_recover_page(), invoked from buf_page_io_complete(),
215 should apply log records*/
216 bool apply_log_recs;
217 /** whether recv_apply_hashed_log_recs() is running */
218 bool apply_batch_on;
219 byte* buf; /*!< buffer for parsing log records */
220 size_t buf_size; /*!< size of buf */
221 ulint len; /*!< amount of data in buf */
222 lsn_t parse_start_lsn;
223 /*!< this is the lsn from which we were able to
224 start parsing log records and adding them to
225 the hash table; zero if a suitable
226 start point not found yet */
227 lsn_t scanned_lsn;
228 /*!< the log data has been scanned up to this
229 lsn */
230 ulint scanned_checkpoint_no;
231 /*!< the log data has been scanned up to this
232 checkpoint number (lowest 4 bytes) */
233 ulint recovered_offset;
234 /*!< start offset of non-parsed log records in
235 buf */
236 lsn_t recovered_lsn;
237 /*!< the log records have been parsed up to
238 this lsn */
239 bool found_corrupt_log;
240 /*!< set when finding a corrupt log
241 block or record, or there is a log
242 parsing buffer overflow */
243 bool found_corrupt_fs;
244 /*!< set when an inconsistency with
245 the file system contents is detected
246 during log scan or apply */
247 lsn_t mlog_checkpoint_lsn;
248 /*!< the LSN of a MLOG_CHECKPOINT
249 record, or 0 if none was parsed */
250 /** the time when progress was last reported */
251 time_t progress_time;
252 mem_heap_t* heap; /*!< memory heap of log records and file
253 addresses*/
254 hash_table_t* addr_hash;/*!< hash table of file addresses of pages */
255 ulint n_addrs;/*!< number of not processed hashed file
256 addresses in the hash table */
257
258 /** Undo tablespaces for which truncate has been logged
259 (indexed by id - srv_undo_space_id_start) */
260 struct trunc {
261 /** log sequence number of MLOG_FILE_CREATE2, or 0 if none */
262 lsn_t lsn;
263 /** truncated size of the tablespace, or 0 if not truncated */
264 unsigned pages;
265 } truncated_undo_spaces[127];
266
267 recv_dblwr_t dblwr;
268
269 /** Lastly added LSN to the hash table of log records. */
270 lsn_t last_stored_lsn;
271
272 /** Initialize the redo log recovery subsystem. */
273 void create();
274
275 /** Free most recovery data structures. */
276 void debug_free();
277
278 /** Clean up after create() */
279 void close();
280
is_initialisedrecv_sys_t281 bool is_initialised() const { return buf_size != 0; }
282
283 /** Store a redo log record for applying.
284 @param type record type
285 @param space tablespace identifier
286 @param page_no page number
287 @param body record body
288 @param rec_end end of record
289 @param lsn start LSN of the mini-transaction
290 @param end_lsn end LSN of the mini-transaction */
291 inline void add(mlog_id_t type, ulint space, ulint page_no,
292 byte* body, byte* rec_end, lsn_t lsn,
293 lsn_t end_lsn);
294
295 /** Empty a fully processed set of stored redo log records. */
296 inline void empty();
297
298 /** Determine whether redo log recovery progress should be reported.
299 @param[in] time the current time
300 @return whether progress should be reported
301 (the last report was at least 15 seconds ago) */
reportrecv_sys_t302 bool report(time_t time)
303 {
304 if (time - progress_time < 15) {
305 return false;
306 }
307
308 progress_time = time;
309 return true;
310 }
311 };
312
313 /** The recovery system */
314 extern recv_sys_t recv_sys;
315
316 /** TRUE when applying redo log records during crash recovery; FALSE
317 otherwise. Note that this is FALSE while a background thread is
318 rolling back incomplete transactions. */
319 extern volatile bool recv_recovery_on;
320 /** If the following is TRUE, the buffer pool file pages must be invalidated
321 after recovery and no ibuf operations are allowed; this becomes TRUE if
322 the log record hash table becomes too full, and log records must be merged
323 to file pages already before the recovery is finished: in this case no
324 ibuf operations are allowed, as they could modify the pages read in the
325 buffer pool before the pages have been recovered to the up-to-date state.
326
327 TRUE means that recovery is running and no operations on the log files
328 are allowed yet: the variable name is misleading. */
329 extern bool recv_no_ibuf_operations;
330 /** TRUE when recv_init_crash_recovery() has been called. */
331 extern bool recv_needed_recovery;
332 #ifdef UNIV_DEBUG
333 /** TRUE if writing to the redo log (mtr_commit) is forbidden.
334 Protected by log_sys.mutex. */
335 extern bool recv_no_log_write;
336 #endif /* UNIV_DEBUG */
337
338 /** TRUE if buf_page_is_corrupted() should check if the log sequence
339 number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
340 recv_recovery_from_checkpoint_start(). */
341 extern bool recv_lsn_checks_on;
342
343 /** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
344 times! */
345 #define RECV_PARSING_BUF_SIZE (2U << 20)
346
347 /** Size of block reads when the log groups are scanned forward to do a
348 roll-forward */
349 #define RECV_SCAN_SIZE (4U << srv_page_size_shift)
350
351 /** This is a low level function for the recovery system
352 to create a page which has buffered intialized redo log records.
353 @param[in] page_id page to be created using redo logs
354 @return whether the page creation successfully */
355 buf_block_t* recv_recovery_create_page_low(const page_id_t page_id);
356
357 /** Recovery system creates a page which has buffered intialized
358 redo log records.
359 @param[in] page_id page to be created using redo logs
360 @return block which contains page was initialized */
recv_recovery_create_page(const page_id_t page_id)361 inline buf_block_t* recv_recovery_create_page(const page_id_t page_id)
362 {
363 if (UNIV_LIKELY(!recv_recovery_on))
364 return NULL;
365
366 return recv_recovery_create_page_low(page_id);
367 }
368
369 #endif
370