1 /*****************************************************************************
2
3 Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2017, 2020, MariaDB Corporation.
5
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17
18 *****************************************************************************/
19
20 /**************************************************//**
21 @file include/log0recv.h
22 Recovery
23
24 Created 9/20/1997 Heikki Tuuri
25 *******************************************************/
26
27 #ifndef log0recv_h
28 #define log0recv_h
29
30 #include "ut0byte.h"
31 #include "buf0types.h"
32 #include "hash0hash.h"
33 #include "log0log.h"
34 #include "mtr0types.h"
35
36 #include <list>
37 #include <vector>
38
39 /** Is recv_writer_thread active? */
40 extern bool recv_writer_thread_active;
41
42 /** @return whether recovery is currently running. */
43 #define recv_recovery_is_on() UNIV_UNLIKELY(recv_recovery_on)
44
45 /** Find the latest checkpoint in the log header.
46 @param[out] max_field LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2
47 @return error code or DB_SUCCESS */
48 dberr_t
49 recv_find_max_checkpoint(ulint* max_field)
50 MY_ATTRIBUTE((nonnull, warn_unused_result));
51
52 /** Reduces recv_sys->n_addrs for the corrupted page.
53 This function should called when srv_force_recovery > 0.
54 @param[in] page_id page id of the corrupted page */
55 void recv_recover_corrupt_page(page_id_t page_id);
56
57 /** Apply any buffered redo log to a page that was just read from a data file.
58 @param[in,out] bpage buffer pool page */
59 ATTRIBUTE_COLD void recv_recover_page(buf_page_t* bpage);
60
61 /** Start recovering from a redo log checkpoint.
62 @see recv_recovery_from_checkpoint_finish
63 @param[in] flush_lsn FIL_PAGE_FILE_FLUSH_LSN
64 of first system tablespace page
65 @return error code or DB_SUCCESS */
66 dberr_t
67 recv_recovery_from_checkpoint_start(
68 lsn_t flush_lsn);
69 /** Complete recovery from a checkpoint. */
70 void
71 recv_recovery_from_checkpoint_finish(void);
72 /********************************************************//**
73 Initiates the rollback of active transactions. */
74 void
75 recv_recovery_rollback_active(void);
76 /*===============================*/
77 /** Clean up after recv_sys_init() */
78 void
79 recv_sys_close();
80 /** Initialize the redo log recovery subsystem. */
81 void
82 recv_sys_init();
83 /********************************************************//**
84 Frees the recovery system. */
85 void
86 recv_sys_debug_free(void);
87 /*=====================*/
88
89 /********************************************************//**
90 Reset the state of the recovery system variables. */
91 void
92 recv_sys_var_init(void);
93 /*===================*/
94
95 /** Apply the hash table of stored log records to persistent data pages.
96 @param[in] last_batch whether the change buffer merge will be
97 performed as part of the operation */
98 void
99 recv_apply_hashed_log_recs(bool last_batch);
100
101 /** Whether to store redo log records to the hash table */
102 enum store_t {
103 /** Do not store redo log records. */
104 STORE_NO,
105 /** Store redo log records. */
106 STORE_YES,
107 /** Store redo log records if the tablespace exists. */
108 STORE_IF_EXISTS
109 };
110
111
112 /** Adds data from a new log block to the parsing buffer of recv_sys if
113 recv_sys->parse_start_lsn is non-zero.
114 @param[in] log_block log block to add
115 @param[in] scanned_lsn lsn of how far we were able to find
116 data in this log block
117 @return true if more data added */
118 bool recv_sys_add_to_parsing_buf(const byte* log_block, lsn_t scanned_lsn);
119
120 /** Parse log records from a buffer and optionally store them to a
121 hash table to wait merging to file pages.
122 @param[in] checkpoint_lsn the LSN of the latest checkpoint
123 @param[in] store whether to store page operations
124 @param[in] available_memory memory to read the redo logs
125 @param[in] apply whether to apply the records
126 @return whether MLOG_CHECKPOINT record was seen the first time,
127 or corruption was noticed */
128 bool recv_parse_log_recs(
129 lsn_t checkpoint_lsn,
130 store_t* store,
131 ulint available_memory,
132 bool apply);
133
134 /** Moves the parsing buffer data left to the buffer start */
135 void recv_sys_justify_left_parsing_buf();
136
137 /** Report optimized DDL operation (without redo log),
138 corresponding to MLOG_INDEX_LOAD.
139 @param[in] space_id tablespace identifier
140 */
141 extern void (*log_optimized_ddl_op)(ulint space_id);
142
143 /** Report backup-unfriendly TRUNCATE operation (with separate log file),
144 corresponding to MLOG_TRUNCATE. */
145 extern void (*log_truncate)();
146
147 /** Report an operation to create, delete, or rename a file during backup.
148 @param[in] space_id tablespace identifier
149 @param[in] flags tablespace flags (NULL if not create)
150 @param[in] name file name (not NUL-terminated)
151 @param[in] len length of name, in bytes
152 @param[in] new_name new file name (NULL if not rename)
153 @param[in] new_len length of new_name, in bytes (0 if NULL) */
154 extern void (*log_file_op)(ulint space_id, const byte* flags,
155 const byte* name, ulint len,
156 const byte* new_name, ulint new_len);
157
158 /** Block of log record data */
159 struct recv_data_t{
160 recv_data_t* next; /*!< pointer to the next block or NULL */
161 /*!< the log record data is stored physically
162 immediately after this struct, max amount
163 RECV_DATA_BLOCK_SIZE bytes of it */
164 };
165
166 /** Stored log record struct */
167 struct recv_t{
168 mlog_id_t type; /*!< log record type */
169 ulint len; /*!< log record body length in bytes */
170 recv_data_t* data; /*!< chain of blocks containing the log record
171 body */
172 lsn_t start_lsn;/*!< start lsn of the log segment written by
173 the mtr which generated this log record: NOTE
174 that this is not necessarily the start lsn of
175 this log record */
176 lsn_t end_lsn;/*!< end lsn of the log segment written by
177 the mtr which generated this log record: NOTE
178 that this is not necessarily the end lsn of
179 this log record */
180 UT_LIST_NODE_T(recv_t)
181 rec_list;/*!< list of log records for this page */
182 };
183
184 struct recv_dblwr_t
185 {
186 /** Add a page frame to the doublewrite recovery buffer. */
addrecv_dblwr_t187 void add(byte *page) { pages.push_back(page); }
188
189 /** Validate the page.
190 @param page_id page identifier
191 @param page page contents
192 @param space the tablespace of the page (not available for page 0)
193 @param tmp_buf 2*srv_page_size for decrypting and decompressing any
194 page_compressed or encrypted pages
195 @return whether the page is valid */
196 bool validate_page(const page_id_t page_id, const byte *page,
197 const fil_space_t *space, byte *tmp_buf);
198
199 /** Find a doublewrite copy of a page.
200 @param page_id page identifier
201 @param space tablespace (not available for page_id.page_no()==0)
202 @param tmp_buf 2*srv_page_size for decrypting and decompressing any
203 page_compressed or encrypted pages
204 @return page frame
205 @retval NULL if no valid page for page_id was found */
206 byte* find_page(const page_id_t page_id, const fil_space_t *space= NULL,
207 byte *tmp_buf= NULL);
208
209 typedef std::list<byte*, ut_allocator<byte*> > list;
210
211 /** Recovered doublewrite buffer page frames */
212 list pages;
213 };
214
215 /** Recovery system data structure */
216 struct recv_sys_t{
217 ib_mutex_t mutex; /*!< mutex protecting the fields apply_log_recs,
218 n_addrs, and the state field in each recv_addr
219 struct */
220 ib_mutex_t writer_mutex;/*!< mutex coordinating
221 flushing between recv_writer_thread and
222 the recovery thread. */
223 os_event_t flush_start;/*!< event to activate
224 page cleaner threads */
225 os_event_t flush_end;/*!< event to signal that the page
226 cleaner has finished the request */
227 buf_flush_t flush_type;/*!< type of the flush request.
228 BUF_FLUSH_LRU: flush end of LRU, keeping free blocks.
229 BUF_FLUSH_LIST: flush all of blocks. */
230 ibool apply_log_recs;
231 /*!< this is TRUE when log rec application to
232 pages is allowed; this flag tells the
233 i/o-handler if it should do log record
234 application */
235 ibool apply_batch_on;
236 /*!< this is TRUE when a log rec application
237 batch is running */
238 byte* buf; /*!< buffer for parsing log records */
239 size_t buf_size; /*!< size of buf */
240 ulint len; /*!< amount of data in buf */
241 lsn_t parse_start_lsn;
242 /*!< this is the lsn from which we were able to
243 start parsing log records and adding them to
244 the hash table; zero if a suitable
245 start point not found yet */
246 lsn_t scanned_lsn;
247 /*!< the log data has been scanned up to this
248 lsn */
249 ulint scanned_checkpoint_no;
250 /*!< the log data has been scanned up to this
251 checkpoint number (lowest 4 bytes) */
252 ulint recovered_offset;
253 /*!< start offset of non-parsed log records in
254 buf */
255 lsn_t recovered_lsn;
256 /*!< the log records have been parsed up to
257 this lsn */
258 bool found_corrupt_log;
259 /*!< set when finding a corrupt log
260 block or record, or there is a log
261 parsing buffer overflow */
262 bool found_corrupt_fs;
263 /*!< set when an inconsistency with
264 the file system contents is detected
265 during log scan or apply */
266 lsn_t mlog_checkpoint_lsn;
267 /*!< the LSN of a MLOG_CHECKPOINT
268 record, or 0 if none was parsed */
269 /** the time when progress was last reported */
270 time_t progress_time;
271 mem_heap_t* heap; /*!< memory heap of log records and file
272 addresses*/
273 hash_table_t* addr_hash;/*!< hash table of file addresses of pages */
274 ulint n_addrs;/*!< number of not processed hashed file
275 addresses in the hash table */
276
277 /** Undo tablespaces for which truncate has been logged
278 (indexed by id - srv_undo_space_id_start) */
279 struct trunc {
280 /** log sequence number of MLOG_FILE_CREATE2, or 0 if none */
281 lsn_t lsn;
282 /** truncated size of the tablespace, or 0 if not truncated */
283 unsigned pages;
284 } truncated_undo_spaces[127];
285
286 recv_dblwr_t dblwr;
287
288 /** Lastly added LSN to the hash table of log records. */
289 lsn_t last_stored_lsn;
290
291 /** Determine whether redo log recovery progress should be reported.
292 @param[in] time the current time
293 @return whether progress should be reported
294 (the last report was at least 15 seconds ago) */
reportrecv_sys_t295 bool report(time_t time)
296 {
297 if (time - progress_time < 15) {
298 return false;
299 }
300
301 progress_time = time;
302 return true;
303 }
304 };
305
306 /** The recovery system */
307 extern recv_sys_t* recv_sys;
308
309 /** TRUE when applying redo log records during crash recovery; FALSE
310 otherwise. Note that this is FALSE while a background thread is
311 rolling back incomplete transactions. */
312 extern volatile bool recv_recovery_on;
313 /** If the following is TRUE, the buffer pool file pages must be invalidated
314 after recovery and no ibuf operations are allowed; this becomes TRUE if
315 the log record hash table becomes too full, and log records must be merged
316 to file pages already before the recovery is finished: in this case no
317 ibuf operations are allowed, as they could modify the pages read in the
318 buffer pool before the pages have been recovered to the up-to-date state.
319
320 TRUE means that recovery is running and no operations on the log files
321 are allowed yet: the variable name is misleading. */
322 extern bool recv_no_ibuf_operations;
323 /** TRUE when recv_init_crash_recovery() has been called. */
324 extern bool recv_needed_recovery;
325 #ifdef UNIV_DEBUG
326 /** TRUE if writing to the redo log (mtr_commit) is forbidden.
327 Protected by log_sys.mutex. */
328 extern bool recv_no_log_write;
329 #endif /* UNIV_DEBUG */
330
331 /** TRUE if buf_page_is_corrupted() should check if the log sequence
332 number (FIL_PAGE_LSN) is in the future. Initially FALSE, and set by
333 recv_recovery_from_checkpoint_start(). */
334 extern bool recv_lsn_checks_on;
335
336 /** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
337 times! */
338 #define RECV_PARSING_BUF_SIZE (2U << 20)
339
340 /** Size of block reads when the log groups are scanned forward to do a
341 roll-forward */
342 #define RECV_SCAN_SIZE (4U << srv_page_size_shift)
343
344 /** This is a low level function for the recovery system
345 to create a page which has buffered intialized redo log records.
346 @param[in] page_id page to be created using redo logs
347 @return whether the page creation successfully */
348 buf_block_t* recv_recovery_create_page_low(const page_id_t page_id);
349
350 /** Recovery system creates a page which has buffered intialized
351 redo log records.
352 @param[in] page_id page to be created using redo logs
353 @return block which contains page was initialized */
recv_recovery_create_page(const page_id_t page_id)354 inline buf_block_t* recv_recovery_create_page(const page_id_t page_id)
355 {
356 if (UNIV_LIKELY(!recv_recovery_on))
357 return NULL;
358
359 return recv_recovery_create_page_low(page_id);
360 }
361
362 #endif
363