1 /*****************************************************************************
2
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
4 Copyright (c) 2009, Google Inc.
5 Copyright (c) 2017, 2021, MariaDB Corporation.
6
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12
13 This program is free software; you can redistribute it and/or modify it under
14 the terms of the GNU General Public License as published by the Free Software
15 Foundation; version 2 of the License.
16
17 This program is distributed in the hope that it will be useful, but WITHOUT
18 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file include/log0log.h
29 Database log
30
31 Created 12/9/1995 Heikki Tuuri
32 *******************************************************/
33
34 #ifndef log0log_h
35 #define log0log_h
36
37 #include "dyn0buf.h"
38 #include "sync0rw.h"
39 #include "log0types.h"
40 #include "os0event.h"
41 #include "os0file.h"
42
43 #ifndef UINT32_MAX
44 #define UINT32_MAX (4294967295U)
45 #endif
46
47 /** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
48 #define SRV_N_LOG_FILES_MAX 100
49
50 /** Magic value to use instead of log checksums when they are disabled */
51 #define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
52
53 /* Margin for the free space in the smallest log group, before a new query
54 step which modifies the database, is started */
55
56 #define LOG_CHECKPOINT_FREE_PER_THREAD (4U << srv_page_size_shift)
57 #define LOG_CHECKPOINT_EXTRA_FREE (8U << srv_page_size_shift)
58
59 /** Append a string to the log.
60 @param[in] str string
61 @param[in] len string length
62 @param[out] start_lsn start LSN of the log record
63 @return end lsn of the log record, zero if did not succeed */
64 UNIV_INLINE
65 lsn_t
66 log_reserve_and_write_fast(
67 const void* str,
68 ulint len,
69 lsn_t* start_lsn);
70 /***********************************************************************//**
71 Checks if there is need for a log buffer flush or a new checkpoint, and does
72 this if yes. Any database operation should call this when it has modified
73 more than about 4 pages. NOTE that this function may only be called when the
74 OS thread owns no synchronization objects except the dictionary mutex. */
75 UNIV_INLINE
76 void
77 log_free_check(void);
78 /*================*/
79
80 /** Extends the log buffer.
81 @param[in] len requested minimum size in bytes */
82 void log_buffer_extend(ulong len);
83
84 /** Check margin not to overwrite transaction log from the last checkpoint.
85 If would estimate the log write to exceed the log_group_capacity,
86 waits for the checkpoint is done enough.
87 @param[in] len length of the data to be written */
88
89 void
90 log_margin_checkpoint_age(
91 ulint len);
92
93 /** Open the log for log_write_low. The log must be closed with log_close.
94 @param[in] len length of the data to be written
95 @return start lsn of the log record */
96 lsn_t
97 log_reserve_and_open(
98 ulint len);
99 /************************************************************//**
100 Writes to the log the string given. It is assumed that the caller holds the
101 log mutex. */
102 void
103 log_write_low(
104 /*==========*/
105 const byte* str, /*!< in: string */
106 ulint str_len); /*!< in: string length */
107 /************************************************************//**
108 Closes the log.
109 @return lsn */
110 lsn_t
111 log_close(void);
112 /*===========*/
113 /************************************************************//**
114 Gets the current lsn.
115 @return current lsn */
116 UNIV_INLINE
117 lsn_t
118 log_get_lsn(void);
119 /*=============*/
120 /************************************************************//**
121 Gets the current lsn.
122 @return current lsn */
123 UNIV_INLINE
124 lsn_t
125 log_get_lsn_nowait(void);
126 /*=============*/
127 /************************************************************//**
128 Gets the last lsn that is fully flushed to disk.
129 @return last flushed lsn */
130 UNIV_INLINE
131 ib_uint64_t
132 log_get_flush_lsn(void);
133 /*=============*/
134 /****************************************************************
135 Gets the log group capacity. It is OK to read the value without
136 holding log_sys.mutex because it is constant.
137 @return log group capacity */
138 UNIV_INLINE
139 lsn_t
140 log_get_capacity(void);
141 /*==================*/
142 /****************************************************************
143 Get log_sys::max_modified_age_async. It is OK to read the value without
144 holding log_sys::mutex because it is constant.
145 @return max_modified_age_async */
146 UNIV_INLINE
147 lsn_t
148 log_get_max_modified_age_async(void);
149 /*================================*/
150
151 /** Calculate the recommended highest values for lsn - last_checkpoint_lsn
152 and lsn - buf_get_oldest_modification().
153 @param[in] file_size requested innodb_log_file_size
154 @retval true on success
155 @retval false if the smallest log group is too small to
156 accommodate the number of OS threads in the database server */
157 bool
158 log_set_capacity(ulonglong file_size)
159 MY_ATTRIBUTE((warn_unused_result));
160
161 /** Ensure that the log has been written to the log file up to a given
162 log entry (such as that of a transaction commit). Start a new write, or
163 wait and check if an already running write is covering the request.
164 @param[in] lsn log sequence number that should be
165 included in the redo log file write
166 @param[in] flush_to_disk whether the written log should also
167 be flushed to the file system
168 @param[in] rotate_key whether to rotate the encryption key */
169 void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key = false);
170
171 /** write to the log file up to the last log entry.
172 @param[in] sync whether we want the written log
173 also to be flushed to disk. */
174 void log_buffer_flush_to_disk(bool sync= true);
175
176
177 /** Prepare to invoke log_write_and_flush(), before acquiring log_sys.mutex. */
178 #define log_write_and_flush_prepare() log_write_mutex_enter()
179
180 /** Durably write the log up to log_sys.lsn and release log_sys.mutex. */
181 ATTRIBUTE_COLD void log_write_and_flush();
182
183 /****************************************************************//**
184 This functions writes the log buffer to the log file and if 'flush'
185 is set it forces a flush of the log file as well. This is meant to be
186 called from background master thread only as it does not wait for
187 the write (+ possible flush) to finish. */
188 void
189 log_buffer_sync_in_background(
190 /*==========================*/
191 bool flush); /*<! in: flush the logs to disk */
192 /** Make a checkpoint. Note that this function does not flush dirty
193 blocks from the buffer pool: it only checks what is lsn of the oldest
194 modification in the pool, and writes information about the lsn in
195 log files. Use log_make_checkpoint() to flush also the pool.
196 @param[in] sync whether to wait for the write to complete
197 @return true if success, false if a checkpoint write was already running */
198 bool log_checkpoint(bool sync);
199
200 /** Make a checkpoint */
201 void log_make_checkpoint();
202
203 /****************************************************************//**
204 Makes a checkpoint at the latest lsn and writes it to first page of each
205 data file in the database, so that we know that the file spaces contain
206 all modifications up to that lsn. This can only be called at database
207 shutdown. This function also writes all log in log files to the log archive. */
208 void
209 logs_empty_and_mark_files_at_shutdown(void);
210 /*=======================================*/
211 /** Read a log group header page to log_sys.checkpoint_buf.
212 @param[in] header 0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */
213 void log_header_read(ulint header);
214 /** Write checkpoint info to the log header and invoke log_mutex_exit().
215 @param[in] sync whether to wait for the write to complete
216 @param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */
217 void
218 log_write_checkpoint_info(bool sync, lsn_t end_lsn);
219
220 /** Set extra data to be written to the redo log during checkpoint.
221 @param[in] buf data to be appended on checkpoint, or NULL
222 @return pointer to previous data to be appended on checkpoint */
223 mtr_buf_t*
224 log_append_on_checkpoint(
225 mtr_buf_t* buf);
226 /**
227 Checks that there is enough free space in the log to start a new query step.
228 Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
229 function may only be called if the calling thread owns no synchronization
230 objects! */
231 void
232 log_check_margins(void);
233
234 /************************************************************//**
235 Gets a log block flush bit.
236 @return TRUE if this block was the first to be written in a log flush */
237 UNIV_INLINE
238 ibool
239 log_block_get_flush_bit(
240 /*====================*/
241 const byte* log_block); /*!< in: log block */
242 /************************************************************//**
243 Gets a log block number stored in the header.
244 @return log block number stored in the block header */
245 UNIV_INLINE
246 ulint
247 log_block_get_hdr_no(
248 /*=================*/
249 const byte* log_block); /*!< in: log block */
250 /************************************************************//**
251 Gets a log block data length.
252 @return log block data length measured as a byte offset from the block start */
253 UNIV_INLINE
254 ulint
255 log_block_get_data_len(
256 /*===================*/
257 const byte* log_block); /*!< in: log block */
258 /************************************************************//**
259 Sets the log block data length. */
260 UNIV_INLINE
261 void
262 log_block_set_data_len(
263 /*===================*/
264 byte* log_block, /*!< in/out: log block */
265 ulint len); /*!< in: data length */
266
267 /** Calculates the checksum for a log block using the CRC32 algorithm.
268 @param[in] block log block
269 @return checksum */
270 UNIV_INLINE
271 ulint
272 log_block_calc_checksum_crc32(
273 const byte* block);
274
275 /************************************************************//**
276 Gets a log block checksum field value.
277 @return checksum */
278 UNIV_INLINE
279 ulint
280 log_block_get_checksum(
281 /*===================*/
282 const byte* log_block); /*!< in: log block */
283 /************************************************************//**
284 Sets a log block checksum field value. */
285 UNIV_INLINE
286 void
287 log_block_set_checksum(
288 /*===================*/
289 byte* log_block, /*!< in/out: log block */
290 ulint checksum); /*!< in: checksum */
291 /************************************************************//**
292 Gets a log block first mtr log record group offset.
293 @return first mtr log record group byte offset from the block start, 0
294 if none */
295 UNIV_INLINE
296 ulint
297 log_block_get_first_rec_group(
298 /*==========================*/
299 const byte* log_block); /*!< in: log block */
300 /************************************************************//**
301 Sets the log block first mtr log record group offset. */
302 UNIV_INLINE
303 void
304 log_block_set_first_rec_group(
305 /*==========================*/
306 byte* log_block, /*!< in/out: log block */
307 ulint offset); /*!< in: offset, 0 if none */
308 /************************************************************//**
309 Gets a log block checkpoint number field (4 lowest bytes).
310 @return checkpoint no (4 lowest bytes) */
311 UNIV_INLINE
312 ulint
313 log_block_get_checkpoint_no(
314 /*========================*/
315 const byte* log_block); /*!< in: log block */
316 /************************************************************//**
317 Initializes a log block in the log buffer. */
318 UNIV_INLINE
319 void
320 log_block_init(
321 /*===========*/
322 byte* log_block, /*!< in: pointer to the log buffer */
323 lsn_t lsn); /*!< in: lsn within the log block */
324 /************************************************************//**
325 Converts a lsn to a log block number.
326 @return log block number, it is > 0 and <= 1G */
327 UNIV_INLINE
328 ulint
329 log_block_convert_lsn_to_no(
330 /*========================*/
331 lsn_t lsn); /*!< in: lsn of a byte within the block */
332 /******************************************************//**
333 Prints info of the log. */
334 void
335 log_print(
336 /*======*/
337 FILE* file); /*!< in: file where to print */
338 /******************************************************//**
339 Peeks the current lsn.
340 @return TRUE if success, FALSE if could not get the log system mutex */
341 ibool
342 log_peek_lsn(
343 /*=========*/
344 lsn_t* lsn); /*!< out: if returns TRUE, current lsn is here */
345 /**********************************************************************//**
346 Refreshes the statistics used to print per-second averages. */
347 void
348 log_refresh_stats(void);
349 /*===================*/
350
351 /** Whether to require checksums on the redo log pages */
352 extern my_bool innodb_log_checksums;
353
354 /* Values used as flags */
355 #define LOG_FLUSH 7652559
356 #define LOG_CHECKPOINT 78656949
357
358 /* The counting of lsn's starts from this value: this must be non-zero */
359 #define LOG_START_LSN ((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
360
361 /* Offsets of a log block header */
362 #define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and
363 is allowed to wrap around at 2G; the
364 highest bit is set to 1 if this is the
365 first log block in a log flush write
366 segment */
367 #define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL
368 /* mask used to get the highest bit in
369 the preceding field */
370 #define LOG_BLOCK_HDR_DATA_LEN 4 /* number of bytes of log written to
371 this block */
372 #define LOG_BLOCK_FIRST_REC_GROUP 6 /* offset of the first start of an
373 mtr log record group in this log block,
374 0 if none; if the value is the same
375 as LOG_BLOCK_HDR_DATA_LEN, it means
376 that the first rec group has not yet
377 been catenated to this log block, but
378 if it will, it will start at this
379 offset; an archive recovery can
380 start parsing the log records starting
381 from this offset in this log block,
382 if value not 0 */
383 #define LOG_BLOCK_CHECKPOINT_NO 8 /* 4 lower bytes of the value of
384 log_sys.next_checkpoint_no when the
385 log block was last written to: if the
386 block has not yet been written full,
387 this value is only updated before a
388 log buffer flush */
389 #define LOG_BLOCK_HDR_SIZE 12 /* size of the log block header in
390 bytes */
391
392 #define LOG_BLOCK_KEY 4 /* encryption key version
393 before LOG_BLOCK_CHECKSUM;
394 in log_t::FORMAT_ENC_10_4 only */
395 #define LOG_BLOCK_CHECKSUM 4 /* 4 byte checksum of the log block
396 contents; in InnoDB versions
397 < 3.23.52 this did not contain the
398 checksum but the same value as
399 LOG_BLOCK_HDR_NO */
400
401 /** Offsets inside the checkpoint pages (redo log format version 1) @{ */
402 /** Checkpoint number */
403 #define LOG_CHECKPOINT_NO 0
404 /** Log sequence number up to which all changes have been flushed */
405 #define LOG_CHECKPOINT_LSN 8
406 /** Byte offset of the log record corresponding to LOG_CHECKPOINT_LSN */
407 #define LOG_CHECKPOINT_OFFSET 16
408 /** srv_log_buffer_size at the time of the checkpoint (not used) */
409 #define LOG_CHECKPOINT_LOG_BUF_SIZE 24
410 /** MariaDB 10.2.5 encrypted redo log encryption key version (32 bits)*/
411 #define LOG_CHECKPOINT_CRYPT_KEY 32
412 /** MariaDB 10.2.5 encrypted redo log random nonce (32 bits) */
413 #define LOG_CHECKPOINT_CRYPT_NONCE 36
414 /** MariaDB 10.2.5 encrypted redo log random message (MY_AES_BLOCK_SIZE) */
415 #define LOG_CHECKPOINT_CRYPT_MESSAGE 40
416 /** start LSN of the MLOG_CHECKPOINT mini-transaction corresponding
417 to this checkpoint, or 0 if the information has not been written */
418 #define LOG_CHECKPOINT_END_LSN OS_FILE_LOG_BLOCK_SIZE - 16
419
420 /* @} */
421
422 /** Offsets of a log file header */
423 /* @{ */
424 /** Log file header format identifier (32-bit unsigned big-endian integer).
425 This used to be called LOG_GROUP_ID and always written as 0,
426 because InnoDB never supported more than one copy of the redo log. */
427 #define LOG_HEADER_FORMAT 0
428 /** Redo log subformat (originally 0). In format version 0, the
429 LOG_FILE_START_LSN started here, 4 bytes earlier than LOG_HEADER_START_LSN,
430 which the LOG_FILE_START_LSN was renamed to.
431 Subformat 1 is for the fully redo-logged TRUNCATE
432 (no MLOG_TRUNCATE records or extra log checkpoints or log files) */
433 #define LOG_HEADER_SUBFORMAT 4
434 /** LSN of the start of data in this log file (with format version 1;
435 in format version 0, it was called LOG_FILE_START_LSN and at offset 4). */
436 #define LOG_HEADER_START_LSN 8
437 /** A null-terminated string which will contain either the string 'ibbackup'
438 and the creation time if the log file was created by mysqlbackup --restore,
439 or the MySQL version that created the redo log file. */
440 #define LOG_HEADER_CREATOR 16
441 /** End of the log file creator field. */
442 #define LOG_HEADER_CREATOR_END (LOG_HEADER_CREATOR + 32)
443 /** Contents of the LOG_HEADER_CREATOR field */
444 #define LOG_HEADER_CREATOR_CURRENT \
445 "MariaDB " \
446 IB_TO_STR(MYSQL_VERSION_MAJOR) "." \
447 IB_TO_STR(MYSQL_VERSION_MINOR) "." \
448 IB_TO_STR(MYSQL_VERSION_PATCH)
449
450 /* @} */
451
452 #define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE
453 /* first checkpoint field in the log
454 header; we write alternately to the
455 checkpoint fields when we make new
456 checkpoints; this field is only defined
457 in the first log file of a log group */
458 #define LOG_CHECKPOINT_2 (3 * OS_FILE_LOG_BLOCK_SIZE)
459 /* second checkpoint field in the log
460 header */
461 #define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE)
462
463 /* As long as fil_io() is used to handle log io, log group max size is limited
464 by (maximum page number) * (minimum page size). Page number type is uint32_t.
465 Remove this limitation if page number is no longer used for log file io. */
466 static const ulonglong log_group_max_size =
467 ((ulonglong(UINT32_MAX) + 1) * UNIV_PAGE_SIZE_MIN - 1);
468
469 typedef ib_mutex_t LogSysMutex;
470 typedef ib_mutex_t FlushOrderMutex;
471
472 /** Redo log buffer */
473 struct log_t{
474 /** The original (not version-tagged) InnoDB redo log format */
475 static constexpr uint32_t FORMAT_3_23 = 0;
476 /** The MySQL 5.7.9/MariaDB 10.2.2 log format */
477 static constexpr uint32_t FORMAT_10_2 = 1;
478 /** The MariaDB 10.3.2 log format.
479 To prevent crash-downgrade to earlier 10.2 due to the inability to
480 roll back a retroactively introduced TRX_UNDO_RENAME_TABLE undo log record,
481 MariaDB 10.2.18 and later will use the 10.3 format, but LOG_HEADER_SUBFORMAT
482 1 instead of 0. MariaDB 10.3 will use subformat 0 (5.7-style TRUNCATE) or 2
483 (MDEV-13564 backup-friendly TRUNCATE). */
484 static constexpr uint32_t FORMAT_10_3 = 103;
485 /** The MariaDB 10.4.0 log format. */
486 static constexpr uint32_t FORMAT_10_4 = 104;
487 /** Encrypted MariaDB redo log */
488 static constexpr uint32_t FORMAT_ENCRYPTED = 1U << 31;
489 /** The MariaDB 10.4.0 log format (only with innodb_encrypt_log=ON) */
490 static constexpr uint32_t FORMAT_ENC_10_4 = FORMAT_10_4 | FORMAT_ENCRYPTED;
491
492 MY_ALIGNED(CACHE_LINE_SIZE)
493 lsn_t lsn; /*!< log sequence number */
494 ulong buf_free; /*!< first free offset within the log
495 buffer in use */
496
497 MY_ALIGNED(CACHE_LINE_SIZE)
498 LogSysMutex mutex; /*!< mutex protecting the log */
499 MY_ALIGNED(CACHE_LINE_SIZE)
500 LogSysMutex write_mutex; /*!< mutex protecting writing to log */
501 MY_ALIGNED(CACHE_LINE_SIZE)
502 FlushOrderMutex log_flush_order_mutex;/*!< mutex to serialize access to
503 the flush list when we are putting
504 dirty blocks in the list. The idea
505 behind this mutex is to be able
506 to release log_sys.mutex during
507 mtr_commit and still ensure that
508 insertions in the flush_list happen
509 in the LSN order. */
510 /** log_buffer, append data here */
511 byte* buf;
512 /** log_buffer, writing data to file from this buffer.
513 Before flushing write_buf is swapped with flush_buf */
514 byte* flush_buf;
515 ulong max_buf_free; /*!< recommended maximum value of
516 buf_free for the buffer in use, after
517 which the buffer is flushed */
518 bool check_flush_or_checkpoint;
519 /*!< this is set when there may
520 be need to flush the log buffer, or
521 preflush buffer pool pages, or make
522 a checkpoint; this MUST be TRUE when
523 lsn - last_checkpoint_lsn >
524 max_checkpoint_age; this flag is
525 peeked at by log_free_check(), which
526 does not reserve the log mutex */
527
528 /** Log files. Protected by mutex or write_mutex. */
529 struct files {
530 /** number of files */
531 ulint n_files;
532 /** format of the redo log: e.g., FORMAT_10_4 */
533 uint32_t format;
534 /** redo log subformat: 0 with separately logged TRUNCATE,
535 2 with fully redo-logged TRUNCATE (1 in MariaDB 10.2) */
536 uint32_t subformat;
537 /** individual log file size in bytes, including the header */
538 lsn_t file_size;
539 private:
540 /** lsn used to fix coordinates within the log group */
541 lsn_t lsn;
542 /** the byte offset of the above lsn */
543 lsn_t lsn_offset;
544 public:
545 /** used only in recovery: recovery scan succeeded up to this
546 lsn in this log group */
547 lsn_t scanned_lsn;
548
549 /** @return whether the redo log is encrypted */
is_encryptedlog_t::files550 bool is_encrypted() const { return format & FORMAT_ENCRYPTED; }
551 /** @return capacity in bytes */
capacitylog_t::files552 lsn_t capacity() const{ return (file_size - LOG_FILE_HDR_SIZE) * n_files; }
553 /** Calculate the offset of a log sequence number.
554 @param[in] lsn log sequence number
555 @return offset within the log */
556 inline lsn_t calc_lsn_offset(lsn_t lsn) const;
557
558 /** Set the field values to correspond to a given lsn. */
set_fieldslog_t::files559 void set_fields(lsn_t lsn)
560 {
561 lsn_t c_lsn_offset = calc_lsn_offset(lsn);
562 set_lsn(lsn);
563 set_lsn_offset(c_lsn_offset);
564 }
565
566 /** Read a log segment to log_sys.buf.
567 @param[in,out] start_lsn in: read area start,
568 out: the last read valid lsn
569 @param[in] end_lsn read area end
570 @return whether no invalid blocks (e.g checksum mismatch) were found */
571 bool read_log_seg(lsn_t* start_lsn, lsn_t end_lsn);
572
573 /** Initialize the redo log buffer.
574 @param[in] n_files number of files */
575 void create(ulint n_files);
576
577 /** Close the redo log buffer. */
closelog_t::files578 void close()
579 {
580 n_files = 0;
581 }
582 void set_lsn(lsn_t a_lsn);
get_lsnlog_t::files583 lsn_t get_lsn() const { return lsn; }
584 void set_lsn_offset(lsn_t a_lsn);
get_lsn_offsetlog_t::files585 lsn_t get_lsn_offset() const { return lsn_offset; }
586 } log;
587
588 /** The fields involved in the log buffer flush @{ */
589
590 ulong buf_next_to_write;/*!< first offset in the log buffer
591 where the byte content may not exist
592 written to file, e.g., the start
593 offset of a log record catenated
594 later; this is advanced when a flush
595 operation is completed to all the log
596 groups */
597 lsn_t write_lsn; /*!< last written lsn */
598 lsn_t current_flush_lsn;/*!< end lsn for the current running
599 write + flush operation */
600 lsn_t flushed_to_disk_lsn;
601 /*!< how far we have written the log
602 AND flushed to disk */
603 ulint n_pending_flushes;/*!< number of currently
604 pending flushes; protected by
605 log_sys.mutex */
606 os_event_t flush_event; /*!< this event is in the reset state
607 when a flush is running;
608 os_event_set() and os_event_reset()
609 are protected by log_sys.mutex */
610 ulint n_log_ios; /*!< number of log i/os initiated thus
611 far */
612 ulint n_log_ios_old; /*!< number of log i/o's at the
613 previous printout */
614 time_t last_printout_time;/*!< when log_print was last time
615 called */
616 /* @} */
617
618 /** Fields involved in checkpoints @{ */
619 lsn_t log_group_capacity; /*!< capacity of the log group; if
620 the checkpoint age exceeds this, it is
621 a serious error because it is possible
622 we will then overwrite log and spoil
623 crash recovery */
624 lsn_t max_modified_age_async;
625 /*!< when this recommended
626 value for lsn -
627 buf_pool_get_oldest_modification()
628 is exceeded, we start an
629 asynchronous preflush of pool pages */
630 lsn_t max_modified_age_sync;
631 /*!< when this recommended
632 value for lsn -
633 buf_pool_get_oldest_modification()
634 is exceeded, we start a
635 synchronous preflush of pool pages */
636 lsn_t max_checkpoint_age_async;
637 /*!< when this checkpoint age
638 is exceeded we start an
639 asynchronous writing of a new
640 checkpoint */
641 lsn_t max_checkpoint_age;
642 /*!< this is the maximum allowed value
643 for lsn - last_checkpoint_lsn when a
644 new query step is started */
645 ib_uint64_t next_checkpoint_no;
646 /*!< next checkpoint number */
647 lsn_t last_checkpoint_lsn;
648 /*!< latest checkpoint lsn */
649 lsn_t next_checkpoint_lsn;
650 /*!< next checkpoint lsn */
651 mtr_buf_t* append_on_checkpoint;
652 /*!< extra redo log records to write
653 during a checkpoint, or NULL if none.
654 The pointer is protected by
655 log_sys.mutex, and the data must
656 remain constant as long as this
657 pointer is not NULL. */
658 ulint n_pending_checkpoint_writes;
659 /*!< number of currently pending
660 checkpoint writes */
661 rw_lock_t checkpoint_lock;/*!< this latch is x-locked when a
662 checkpoint write is running; a thread
663 should wait for this without owning
664 the log mutex */
665
666 /** buffer for checkpoint header */
667 MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE)
668 byte checkpoint_buf[OS_FILE_LOG_BLOCK_SIZE];
669 /* @} */
670
671 private:
672 bool m_initialised;
673 public:
674 /**
675 Constructor.
676
677 Some members may require late initialisation, thus we just mark object as
678 uninitialised. Real initialisation happens in create().
679 */
log_tlog_t680 log_t(): m_initialised(false) {}
681
682 /** @return whether the redo log is encrypted */
is_encryptedlog_t683 bool is_encrypted() const { return(log.is_encrypted()); }
684
is_initialisedlog_t685 bool is_initialised() const { return m_initialised; }
686
687 /** Complete an asynchronous checkpoint write. */
688 void complete_checkpoint();
689
690 /** @return the log block header + trailer size */
framing_sizelog_t691 unsigned framing_size() const
692 {
693 return log.format == FORMAT_ENC_10_4
694 ? LOG_BLOCK_HDR_SIZE + LOG_BLOCK_KEY + LOG_BLOCK_CHECKSUM
695 : LOG_BLOCK_HDR_SIZE + LOG_BLOCK_CHECKSUM;
696 }
697 /** @return the log block payload size */
payload_sizelog_t698 unsigned payload_size() const
699 {
700 return log.format == FORMAT_ENC_10_4
701 ? OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - LOG_BLOCK_CHECKSUM -
702 LOG_BLOCK_KEY
703 : OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - LOG_BLOCK_CHECKSUM;
704 }
705 /** @return the log block trailer offset */
trailer_offsetlog_t706 unsigned trailer_offset() const
707 {
708 return log.format == FORMAT_ENC_10_4
709 ? OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_CHECKSUM - LOG_BLOCK_KEY
710 : OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_CHECKSUM;
711 }
712
713 /** Initialise the redo log subsystem. */
714 void create();
715
716 /** Shut down the redo log subsystem. */
717 void close();
718 };
719
720 /** Redo log system */
721 extern log_t log_sys;
722
723 /** Calculate the offset of a log sequence number.
724 @param[in] lsn log sequence number
725 @return offset within the log */
calc_lsn_offset(lsn_t lsn)726 inline lsn_t log_t::files::calc_lsn_offset(lsn_t lsn) const
727 {
728 ut_ad(this == &log_sys.log);
729 /* The lsn parameters are updated while holding both the mutexes
730 and it is ok to have either of them while reading */
731 ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned());
732 const lsn_t group_size= capacity();
733 lsn_t l= lsn - this->lsn;
734 if (longlong(l) < 0) {
735 l= lsn_t(-longlong(l)) % group_size;
736 l= group_size - l;
737 }
738
739 l+= lsn_offset - LOG_FILE_HDR_SIZE * (1 + lsn_offset / file_size);
740 l%= group_size;
741 return l + LOG_FILE_HDR_SIZE * (1 + l / (file_size - LOG_FILE_HDR_SIZE));
742 }
743
set_lsn(lsn_t a_lsn)744 inline void log_t::files::set_lsn(lsn_t a_lsn) {
745 ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned());
746 lsn = a_lsn;
747 }
748
set_lsn_offset(lsn_t a_lsn)749 inline void log_t::files::set_lsn_offset(lsn_t a_lsn) {
750 ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned());
751 ut_ad((lsn % OS_FILE_LOG_BLOCK_SIZE) == (a_lsn % OS_FILE_LOG_BLOCK_SIZE));
752 lsn_offset = a_lsn;
753 }
754
755 /** Test if flush order mutex is owned. */
756 #define log_flush_order_mutex_own() \
757 mutex_own(&log_sys.log_flush_order_mutex)
758
759 /** Acquire the flush order mutex. */
760 #define log_flush_order_mutex_enter() do { \
761 mutex_enter(&log_sys.log_flush_order_mutex); \
762 } while (0)
763 /** Release the flush order mutex. */
764 # define log_flush_order_mutex_exit() do { \
765 mutex_exit(&log_sys.log_flush_order_mutex); \
766 } while (0)
767
768 /** Test if log sys mutex is owned. */
769 #define log_mutex_own() mutex_own(&log_sys.mutex)
770
771 /** Test if log sys write mutex is owned. */
772 #define log_write_mutex_own() mutex_own(&log_sys.write_mutex)
773
774 /** Acquire the log sys mutex. */
775 #define log_mutex_enter() mutex_enter(&log_sys.mutex)
776
777 /** Acquire the log sys write mutex. */
778 #define log_write_mutex_enter() mutex_enter(&log_sys.write_mutex)
779
780 /** Acquire all the log sys mutexes. */
781 #define log_mutex_enter_all() do { \
782 mutex_enter(&log_sys.write_mutex); \
783 mutex_enter(&log_sys.mutex); \
784 } while (0)
785
786 /** Release the log sys mutex. */
787 #define log_mutex_exit() mutex_exit(&log_sys.mutex)
788
789 /** Release the log sys write mutex.*/
790 #define log_write_mutex_exit() mutex_exit(&log_sys.write_mutex)
791
792 /** Release all the log sys mutexes. */
793 #define log_mutex_exit_all() do { \
794 mutex_exit(&log_sys.mutex); \
795 mutex_exit(&log_sys.write_mutex); \
796 } while (0)
797
798 /* log scrubbing speed, in bytes/sec */
799 extern ulonglong innodb_scrub_log_speed;
800
801 /** Event to wake up log_scrub_thread */
802 extern os_event_t log_scrub_event;
803 /** Whether log_scrub_thread is active */
804 extern bool log_scrub_thread_active;
805
806 #include "log0log.inl"
807
808 #endif
809