1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved.
4 Copyright (c) 2009, Google Inc.
5 Copyright (c) 2017, 2021, MariaDB Corporation.
6 
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12 
13 This program is free software; you can redistribute it and/or modify it under
14 the terms of the GNU General Public License as published by the Free Software
15 Foundation; version 2 of the License.
16 
17 This program is distributed in the hope that it will be useful, but WITHOUT
18 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file include/log0log.h
29 Database log
30 
31 Created 12/9/1995 Heikki Tuuri
32 *******************************************************/
33 
34 #ifndef log0log_h
35 #define log0log_h
36 
37 #include "dyn0buf.h"
38 #include "sync0rw.h"
39 #include "log0types.h"
40 #include "os0event.h"
41 #include "os0file.h"
42 
43 #ifndef UINT32_MAX
44 #define UINT32_MAX             (4294967295U)
45 #endif
46 
47 /** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
48 #define SRV_N_LOG_FILES_MAX 100
49 
50 /** Magic value to use instead of log checksums when they are disabled */
51 #define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
52 
53 /* Margin for the free space in the smallest log group, before a new query
54 step which modifies the database, is started */
55 
56 #define LOG_CHECKPOINT_FREE_PER_THREAD	(4U << srv_page_size_shift)
57 #define LOG_CHECKPOINT_EXTRA_FREE	(8U << srv_page_size_shift)
58 
59 /** Append a string to the log.
60 @param[in]	str		string
61 @param[in]	len		string length
62 @param[out]	start_lsn	start LSN of the log record
63 @return end lsn of the log record, zero if did not succeed */
64 UNIV_INLINE
65 lsn_t
66 log_reserve_and_write_fast(
67 	const void*	str,
68 	ulint		len,
69 	lsn_t*		start_lsn);
70 /***********************************************************************//**
71 Checks if there is need for a log buffer flush or a new checkpoint, and does
72 this if yes. Any database operation should call this when it has modified
73 more than about 4 pages. NOTE that this function may only be called when the
74 OS thread owns no synchronization objects except the dictionary mutex. */
75 UNIV_INLINE
76 void
77 log_free_check(void);
78 /*================*/
79 
80 /** Extends the log buffer.
81 @param[in]	len	requested minimum size in bytes */
82 void log_buffer_extend(ulong len);
83 
84 /** Check margin not to overwrite transaction log from the last checkpoint.
85 If would estimate the log write to exceed the log_group_capacity,
86 waits for the checkpoint is done enough.
87 @param[in]	len	length of the data to be written */
88 
89 void
90 log_margin_checkpoint_age(
91 	ulint	len);
92 
93 /** Open the log for log_write_low. The log must be closed with log_close.
94 @param[in]	len	length of the data to be written
95 @return start lsn of the log record */
96 lsn_t
97 log_reserve_and_open(
98 	ulint	len);
99 /************************************************************//**
100 Writes to the log the string given. It is assumed that the caller holds the
101 log mutex. */
102 void
103 log_write_low(
104 /*==========*/
105 	const byte*	str,		/*!< in: string */
106 	ulint		str_len);	/*!< in: string length */
107 /************************************************************//**
108 Closes the log.
109 @return lsn */
110 lsn_t
111 log_close(void);
112 /*===========*/
113 /************************************************************//**
114 Gets the current lsn.
115 @return current lsn */
116 UNIV_INLINE
117 lsn_t
118 log_get_lsn(void);
119 /*=============*/
120 /************************************************************//**
121 Gets the current lsn.
122 @return	current lsn */
123 UNIV_INLINE
124 lsn_t
125 log_get_lsn_nowait(void);
126 /*=============*/
127 /************************************************************//**
128 Gets the last lsn that is fully flushed to disk.
129 @return	last flushed lsn */
130 UNIV_INLINE
131 ib_uint64_t
132 log_get_flush_lsn(void);
133 /*=============*/
134 /****************************************************************
135 Gets the log group capacity. It is OK to read the value without
136 holding log_sys.mutex because it is constant.
137 @return log group capacity */
138 UNIV_INLINE
139 lsn_t
140 log_get_capacity(void);
141 /*==================*/
142 /****************************************************************
143 Get log_sys::max_modified_age_async. It is OK to read the value without
144 holding log_sys::mutex because it is constant.
145 @return max_modified_age_async */
146 UNIV_INLINE
147 lsn_t
148 log_get_max_modified_age_async(void);
149 /*================================*/
150 
151 /** Calculate the recommended highest values for lsn - last_checkpoint_lsn
152 and lsn - buf_get_oldest_modification().
153 @param[in]	file_size	requested innodb_log_file_size
154 @retval true on success
155 @retval false if the smallest log group is too small to
156 accommodate the number of OS threads in the database server */
157 bool
158 log_set_capacity(ulonglong file_size)
159 	MY_ATTRIBUTE((warn_unused_result));
160 
161 /** Ensure that the log has been written to the log file up to a given
162 log entry (such as that of a transaction commit). Start a new write, or
163 wait and check if an already running write is covering the request.
164 @param[in]	lsn		log sequence number that should be
165 included in the redo log file write
166 @param[in]	flush_to_disk	whether the written log should also
167 be flushed to the file system
168 @param[in]	rotate_key	whether to rotate the encryption key */
169 void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key = false);
170 
171 /** write to the log file up to the last log entry.
172 @param[in]	sync	whether we want the written log
173 also to be flushed to disk. */
174 void log_buffer_flush_to_disk(bool sync= true);
175 
176 
177 /** Prepare to invoke log_write_and_flush(), before acquiring log_sys.mutex. */
178 #define log_write_and_flush_prepare() log_write_mutex_enter()
179 
180 /** Durably write the log up to log_sys.lsn and release log_sys.mutex. */
181 ATTRIBUTE_COLD void log_write_and_flush();
182 
183 /****************************************************************//**
184 This functions writes the log buffer to the log file and if 'flush'
185 is set it forces a flush of the log file as well. This is meant to be
186 called from background master thread only as it does not wait for
187 the write (+ possible flush) to finish. */
188 void
189 log_buffer_sync_in_background(
190 /*==========================*/
191 	bool	flush);	/*<! in: flush the logs to disk */
192 /** Make a checkpoint. Note that this function does not flush dirty
193 blocks from the buffer pool: it only checks what is lsn of the oldest
194 modification in the pool, and writes information about the lsn in
195 log files. Use log_make_checkpoint() to flush also the pool.
196 @param[in]	sync		whether to wait for the write to complete
197 @return true if success, false if a checkpoint write was already running */
198 bool log_checkpoint(bool sync);
199 
200 /** Make a checkpoint */
201 void log_make_checkpoint();
202 
203 /****************************************************************//**
204 Makes a checkpoint at the latest lsn and writes it to first page of each
205 data file in the database, so that we know that the file spaces contain
206 all modifications up to that lsn. This can only be called at database
207 shutdown. This function also writes all log in log files to the log archive. */
208 void
209 logs_empty_and_mark_files_at_shutdown(void);
210 /*=======================================*/
211 /** Read a log group header page to log_sys.checkpoint_buf.
212 @param[in]	header	0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */
213 void log_header_read(ulint header);
214 /** Write checkpoint info to the log header and invoke log_mutex_exit().
215 @param[in]	sync	whether to wait for the write to complete
216 @param[in]	end_lsn	start LSN of the MLOG_CHECKPOINT mini-transaction */
217 void
218 log_write_checkpoint_info(bool sync, lsn_t end_lsn);
219 
220 /** Set extra data to be written to the redo log during checkpoint.
221 @param[in]	buf	data to be appended on checkpoint, or NULL
222 @return pointer to previous data to be appended on checkpoint */
223 mtr_buf_t*
224 log_append_on_checkpoint(
225 	mtr_buf_t*	buf);
226 /**
227 Checks that there is enough free space in the log to start a new query step.
228 Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
229 function may only be called if the calling thread owns no synchronization
230 objects! */
231 void
232 log_check_margins(void);
233 
234 /************************************************************//**
235 Gets a log block flush bit.
236 @return TRUE if this block was the first to be written in a log flush */
237 UNIV_INLINE
238 ibool
239 log_block_get_flush_bit(
240 /*====================*/
241 	const byte*	log_block);	/*!< in: log block */
242 /************************************************************//**
243 Gets a log block number stored in the header.
244 @return log block number stored in the block header */
245 UNIV_INLINE
246 ulint
247 log_block_get_hdr_no(
248 /*=================*/
249 	const byte*	log_block);	/*!< in: log block */
250 /************************************************************//**
251 Gets a log block data length.
252 @return log block data length measured as a byte offset from the block start */
253 UNIV_INLINE
254 ulint
255 log_block_get_data_len(
256 /*===================*/
257 	const byte*	log_block);	/*!< in: log block */
258 /************************************************************//**
259 Sets the log block data length. */
260 UNIV_INLINE
261 void
262 log_block_set_data_len(
263 /*===================*/
264 	byte*	log_block,	/*!< in/out: log block */
265 	ulint	len);		/*!< in: data length */
266 
267 /** Calculates the checksum for a log block using the CRC32 algorithm.
268 @param[in]	block	log block
269 @return checksum */
270 UNIV_INLINE
271 ulint
272 log_block_calc_checksum_crc32(
273 	const byte*	block);
274 
275 /************************************************************//**
276 Gets a log block checksum field value.
277 @return checksum */
278 UNIV_INLINE
279 ulint
280 log_block_get_checksum(
281 /*===================*/
282 	const byte*	log_block);	/*!< in: log block */
283 /************************************************************//**
284 Sets a log block checksum field value. */
285 UNIV_INLINE
286 void
287 log_block_set_checksum(
288 /*===================*/
289 	byte*	log_block,	/*!< in/out: log block */
290 	ulint	checksum);	/*!< in: checksum */
291 /************************************************************//**
292 Gets a log block first mtr log record group offset.
293 @return first mtr log record group byte offset from the block start, 0
294 if none */
295 UNIV_INLINE
296 ulint
297 log_block_get_first_rec_group(
298 /*==========================*/
299 	const byte*	log_block);	/*!< in: log block */
300 /************************************************************//**
301 Sets the log block first mtr log record group offset. */
302 UNIV_INLINE
303 void
304 log_block_set_first_rec_group(
305 /*==========================*/
306 	byte*	log_block,	/*!< in/out: log block */
307 	ulint	offset);	/*!< in: offset, 0 if none */
308 /************************************************************//**
309 Gets a log block checkpoint number field (4 lowest bytes).
310 @return checkpoint no (4 lowest bytes) */
311 UNIV_INLINE
312 ulint
313 log_block_get_checkpoint_no(
314 /*========================*/
315 	const byte*	log_block);	/*!< in: log block */
316 /************************************************************//**
317 Initializes a log block in the log buffer. */
318 UNIV_INLINE
319 void
320 log_block_init(
321 /*===========*/
322 	byte*	log_block,	/*!< in: pointer to the log buffer */
323 	lsn_t	lsn);		/*!< in: lsn within the log block */
324 /************************************************************//**
325 Converts a lsn to a log block number.
326 @return log block number, it is > 0 and <= 1G */
327 UNIV_INLINE
328 ulint
329 log_block_convert_lsn_to_no(
330 /*========================*/
331 	lsn_t	lsn);	/*!< in: lsn of a byte within the block */
332 /******************************************************//**
333 Prints info of the log. */
334 void
335 log_print(
336 /*======*/
337 	FILE*	file);	/*!< in: file where to print */
338 /******************************************************//**
339 Peeks the current lsn.
340 @return TRUE if success, FALSE if could not get the log system mutex */
341 ibool
342 log_peek_lsn(
343 /*=========*/
344 	lsn_t*	lsn);	/*!< out: if returns TRUE, current lsn is here */
345 /**********************************************************************//**
346 Refreshes the statistics used to print per-second averages. */
347 void
348 log_refresh_stats(void);
349 /*===================*/
350 
351 /** Whether to require checksums on the redo log pages */
352 extern my_bool	innodb_log_checksums;
353 
354 /* Values used as flags */
355 #define LOG_FLUSH	7652559
356 #define LOG_CHECKPOINT	78656949
357 
358 /* The counting of lsn's starts from this value: this must be non-zero */
359 #define LOG_START_LSN		((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
360 
361 /* Offsets of a log block header */
362 #define	LOG_BLOCK_HDR_NO	0	/* block number which must be > 0 and
363 					is allowed to wrap around at 2G; the
364 					highest bit is set to 1 if this is the
365 					first log block in a log flush write
366 					segment */
367 #define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL
368 					/* mask used to get the highest bit in
369 					the preceding field */
370 #define	LOG_BLOCK_HDR_DATA_LEN	4	/* number of bytes of log written to
371 					this block */
372 #define	LOG_BLOCK_FIRST_REC_GROUP 6	/* offset of the first start of an
373 					mtr log record group in this log block,
374 					0 if none; if the value is the same
375 					as LOG_BLOCK_HDR_DATA_LEN, it means
376 					that the first rec group has not yet
377 					been catenated to this log block, but
378 					if it will, it will start at this
379 					offset; an archive recovery can
380 					start parsing the log records starting
381 					from this offset in this log block,
382 					if value not 0 */
383 #define LOG_BLOCK_CHECKPOINT_NO	8	/* 4 lower bytes of the value of
384 					log_sys.next_checkpoint_no when the
385 					log block was last written to: if the
386 					block has not yet been written full,
387 					this value is only updated before a
388 					log buffer flush */
389 #define LOG_BLOCK_HDR_SIZE	12	/* size of the log block header in
390 					bytes */
391 
392 #define	LOG_BLOCK_KEY		4	/* encryption key version
393 					before LOG_BLOCK_CHECKSUM;
394 					in log_t::FORMAT_ENC_10_4 only */
395 #define	LOG_BLOCK_CHECKSUM	4	/* 4 byte checksum of the log block
396 					contents; in InnoDB versions
397 					< 3.23.52 this did not contain the
398 					checksum but the same value as
399 					LOG_BLOCK_HDR_NO */
400 
401 /** Offsets inside the checkpoint pages (redo log format version 1) @{ */
402 /** Checkpoint number */
403 #define LOG_CHECKPOINT_NO		0
404 /** Log sequence number up to which all changes have been flushed */
405 #define LOG_CHECKPOINT_LSN		8
406 /** Byte offset of the log record corresponding to LOG_CHECKPOINT_LSN */
407 #define LOG_CHECKPOINT_OFFSET		16
408 /** srv_log_buffer_size at the time of the checkpoint (not used) */
409 #define LOG_CHECKPOINT_LOG_BUF_SIZE	24
410 /** MariaDB 10.2.5 encrypted redo log encryption key version (32 bits)*/
411 #define LOG_CHECKPOINT_CRYPT_KEY	32
412 /** MariaDB 10.2.5 encrypted redo log random nonce (32 bits) */
413 #define LOG_CHECKPOINT_CRYPT_NONCE	36
414 /** MariaDB 10.2.5 encrypted redo log random message (MY_AES_BLOCK_SIZE) */
415 #define LOG_CHECKPOINT_CRYPT_MESSAGE	40
416 /** start LSN of the MLOG_CHECKPOINT mini-transaction corresponding
417 to this checkpoint, or 0 if the information has not been written */
418 #define LOG_CHECKPOINT_END_LSN		OS_FILE_LOG_BLOCK_SIZE - 16
419 
420 /* @} */
421 
422 /** Offsets of a log file header */
423 /* @{ */
424 /** Log file header format identifier (32-bit unsigned big-endian integer).
425 This used to be called LOG_GROUP_ID and always written as 0,
426 because InnoDB never supported more than one copy of the redo log. */
427 #define LOG_HEADER_FORMAT	0
428 /** Redo log subformat (originally 0). In format version 0, the
429 LOG_FILE_START_LSN started here, 4 bytes earlier than LOG_HEADER_START_LSN,
430 which the LOG_FILE_START_LSN was renamed to.
431 Subformat 1 is for the fully redo-logged TRUNCATE
432 (no MLOG_TRUNCATE records or extra log checkpoints or log files) */
433 #define LOG_HEADER_SUBFORMAT	4
434 /** LSN of the start of data in this log file (with format version 1;
435 in format version 0, it was called LOG_FILE_START_LSN and at offset 4). */
436 #define LOG_HEADER_START_LSN	8
437 /** A null-terminated string which will contain either the string 'ibbackup'
438 and the creation time if the log file was created by mysqlbackup --restore,
439 or the MySQL version that created the redo log file. */
440 #define LOG_HEADER_CREATOR	16
441 /** End of the log file creator field. */
442 #define LOG_HEADER_CREATOR_END	(LOG_HEADER_CREATOR + 32)
443 /** Contents of the LOG_HEADER_CREATOR field */
444 #define LOG_HEADER_CREATOR_CURRENT		\
445 	"MariaDB "				\
446 	IB_TO_STR(MYSQL_VERSION_MAJOR) "."	\
447 	IB_TO_STR(MYSQL_VERSION_MINOR) "."	\
448 	IB_TO_STR(MYSQL_VERSION_PATCH)
449 
450 /* @} */
451 
452 #define LOG_CHECKPOINT_1	OS_FILE_LOG_BLOCK_SIZE
453 					/* first checkpoint field in the log
454 					header; we write alternately to the
455 					checkpoint fields when we make new
456 					checkpoints; this field is only defined
457 					in the first log file of a log group */
458 #define LOG_CHECKPOINT_2	(3 * OS_FILE_LOG_BLOCK_SIZE)
459 					/* second checkpoint field in the log
460 					header */
461 #define LOG_FILE_HDR_SIZE	(4 * OS_FILE_LOG_BLOCK_SIZE)
462 
463 /* As long as fil_io() is used to handle log io, log group max size is limited
464 by (maximum page number) * (minimum page size). Page number type is uint32_t.
465 Remove this limitation if page number is no longer used for log file io. */
466 static const ulonglong log_group_max_size =
467 	((ulonglong(UINT32_MAX) + 1) * UNIV_PAGE_SIZE_MIN - 1);
468 
469 typedef ib_mutex_t	LogSysMutex;
470 typedef ib_mutex_t	FlushOrderMutex;
471 
472 /** Redo log buffer */
473 struct log_t{
474   /** The original (not version-tagged) InnoDB redo log format */
475   static constexpr uint32_t FORMAT_3_23 = 0;
476   /** The MySQL 5.7.9/MariaDB 10.2.2 log format */
477   static constexpr uint32_t FORMAT_10_2 = 1;
478   /** The MariaDB 10.3.2 log format.
479   To prevent crash-downgrade to earlier 10.2 due to the inability to
480   roll back a retroactively introduced TRX_UNDO_RENAME_TABLE undo log record,
481   MariaDB 10.2.18 and later will use the 10.3 format, but LOG_HEADER_SUBFORMAT
482   1 instead of 0. MariaDB 10.3 will use subformat 0 (5.7-style TRUNCATE) or 2
483   (MDEV-13564 backup-friendly TRUNCATE). */
484   static constexpr uint32_t FORMAT_10_3 = 103;
485   /** The MariaDB 10.4.0 log format. */
486   static constexpr uint32_t FORMAT_10_4 = 104;
487   /** Encrypted MariaDB redo log */
488   static constexpr uint32_t FORMAT_ENCRYPTED = 1U << 31;
489   /** The MariaDB 10.4.0 log format (only with innodb_encrypt_log=ON) */
490   static constexpr uint32_t FORMAT_ENC_10_4 = FORMAT_10_4 | FORMAT_ENCRYPTED;
491 
492 	MY_ALIGNED(CACHE_LINE_SIZE)
493 	lsn_t		lsn;		/*!< log sequence number */
494 	ulong		buf_free;	/*!< first free offset within the log
495 					buffer in use */
496 
497 	MY_ALIGNED(CACHE_LINE_SIZE)
498 	LogSysMutex	mutex;		/*!< mutex protecting the log */
499 	MY_ALIGNED(CACHE_LINE_SIZE)
500 	LogSysMutex	write_mutex;	/*!< mutex protecting writing to log */
501 	MY_ALIGNED(CACHE_LINE_SIZE)
502 	FlushOrderMutex	log_flush_order_mutex;/*!< mutex to serialize access to
503 					the flush list when we are putting
504 					dirty blocks in the list. The idea
505 					behind this mutex is to be able
506 					to release log_sys.mutex during
507 					mtr_commit and still ensure that
508 					insertions in the flush_list happen
509 					in the LSN order. */
510 	/** log_buffer, append data here */
511 	byte*		buf;
512 	/** log_buffer, writing data to file from this buffer.
513 	Before flushing write_buf is swapped with flush_buf */
514 	byte*		flush_buf;
515 	ulong		max_buf_free;	/*!< recommended maximum value of
516 					buf_free for the buffer in use, after
517 					which the buffer is flushed */
518 	bool		check_flush_or_checkpoint;
519 					/*!< this is set when there may
520 					be need to flush the log buffer, or
521 					preflush buffer pool pages, or make
522 					a checkpoint; this MUST be TRUE when
523 					lsn - last_checkpoint_lsn >
524 					max_checkpoint_age; this flag is
525 					peeked at by log_free_check(), which
526 					does not reserve the log mutex */
527 
528   /** Log files. Protected by mutex or write_mutex. */
529   struct files {
530     /** number of files */
531     ulint				n_files;
532     /** format of the redo log: e.g., FORMAT_10_4 */
533     uint32_t				format;
534     /** redo log subformat: 0 with separately logged TRUNCATE,
535     2 with fully redo-logged TRUNCATE (1 in MariaDB 10.2) */
536     uint32_t				subformat;
537     /** individual log file size in bytes, including the header */
538     lsn_t				file_size;
539   private:
540     /** lsn used to fix coordinates within the log group */
541     lsn_t				lsn;
542     /** the byte offset of the above lsn */
543     lsn_t				lsn_offset;
544   public:
545     /** used only in recovery: recovery scan succeeded up to this
546     lsn in this log group */
547     lsn_t				scanned_lsn;
548 
549     /** @return whether the redo log is encrypted */
is_encryptedlog_t::files550     bool is_encrypted() const { return format & FORMAT_ENCRYPTED; }
551     /** @return capacity in bytes */
capacitylog_t::files552     lsn_t capacity() const{ return (file_size - LOG_FILE_HDR_SIZE) * n_files; }
553     /** Calculate the offset of a log sequence number.
554     @param[in]	lsn	log sequence number
555     @return offset within the log */
556     inline lsn_t calc_lsn_offset(lsn_t lsn) const;
557 
558     /** Set the field values to correspond to a given lsn. */
set_fieldslog_t::files559     void set_fields(lsn_t lsn)
560     {
561       lsn_t c_lsn_offset = calc_lsn_offset(lsn);
562       set_lsn(lsn);
563       set_lsn_offset(c_lsn_offset);
564     }
565 
566     /** Read a log segment to log_sys.buf.
567     @param[in,out]	start_lsn	in: read area start,
568 					out: the last read valid lsn
569     @param[in]		end_lsn		read area end
570     @return	whether no invalid blocks (e.g checksum mismatch) were found */
571     bool read_log_seg(lsn_t* start_lsn, lsn_t end_lsn);
572 
573     /** Initialize the redo log buffer.
574     @param[in]	n_files		number of files */
575     void create(ulint n_files);
576 
577     /** Close the redo log buffer. */
closelog_t::files578     void close()
579     {
580       n_files = 0;
581     }
582     void set_lsn(lsn_t a_lsn);
get_lsnlog_t::files583     lsn_t get_lsn() const { return lsn; }
584     void set_lsn_offset(lsn_t a_lsn);
get_lsn_offsetlog_t::files585     lsn_t get_lsn_offset() const { return lsn_offset; }
586   } log;
587 
588 	/** The fields involved in the log buffer flush @{ */
589 
590 	ulong		buf_next_to_write;/*!< first offset in the log buffer
591 					where the byte content may not exist
592 					written to file, e.g., the start
593 					offset of a log record catenated
594 					later; this is advanced when a flush
595 					operation is completed to all the log
596 					groups */
597 	lsn_t		write_lsn;	/*!< last written lsn */
598 	lsn_t		current_flush_lsn;/*!< end lsn for the current running
599 					write + flush operation */
600 	lsn_t		flushed_to_disk_lsn;
601 					/*!< how far we have written the log
602 					AND flushed to disk */
603 	ulint		n_pending_flushes;/*!< number of currently
604 					pending flushes; protected by
605 					log_sys.mutex */
606 	os_event_t	flush_event;	/*!< this event is in the reset state
607 					when a flush is running;
608 					os_event_set() and os_event_reset()
609 					are protected by log_sys.mutex */
610 	ulint		n_log_ios;	/*!< number of log i/os initiated thus
611 					far */
612 	ulint		n_log_ios_old;	/*!< number of log i/o's at the
613 					previous printout */
614 	time_t		last_printout_time;/*!< when log_print was last time
615 					called */
616 	/* @} */
617 
618 	/** Fields involved in checkpoints @{ */
619 	lsn_t		log_group_capacity; /*!< capacity of the log group; if
620 					the checkpoint age exceeds this, it is
621 					a serious error because it is possible
622 					we will then overwrite log and spoil
623 					crash recovery */
624 	lsn_t		max_modified_age_async;
625 					/*!< when this recommended
626 					value for lsn -
627 					buf_pool_get_oldest_modification()
628 					is exceeded, we start an
629 					asynchronous preflush of pool pages */
630 	lsn_t		max_modified_age_sync;
631 					/*!< when this recommended
632 					value for lsn -
633 					buf_pool_get_oldest_modification()
634 					is exceeded, we start a
635 					synchronous preflush of pool pages */
636 	lsn_t		max_checkpoint_age_async;
637 					/*!< when this checkpoint age
638 					is exceeded we start an
639 					asynchronous writing of a new
640 					checkpoint */
641 	lsn_t		max_checkpoint_age;
642 					/*!< this is the maximum allowed value
643 					for lsn - last_checkpoint_lsn when a
644 					new query step is started */
645 	ib_uint64_t	next_checkpoint_no;
646 					/*!< next checkpoint number */
647 	lsn_t		last_checkpoint_lsn;
648 					/*!< latest checkpoint lsn */
649 	lsn_t		next_checkpoint_lsn;
650 					/*!< next checkpoint lsn */
651 	mtr_buf_t*	append_on_checkpoint;
652 					/*!< extra redo log records to write
653 					during a checkpoint, or NULL if none.
654 					The pointer is protected by
655 					log_sys.mutex, and the data must
656 					remain constant as long as this
657 					pointer is not NULL. */
658 	ulint		n_pending_checkpoint_writes;
659 					/*!< number of currently pending
660 					checkpoint writes */
661 	rw_lock_t	checkpoint_lock;/*!< this latch is x-locked when a
662 					checkpoint write is running; a thread
663 					should wait for this without owning
664 					the log mutex */
665 
666 	/** buffer for checkpoint header */
667 	MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE)
668 	byte		checkpoint_buf[OS_FILE_LOG_BLOCK_SIZE];
669 	/* @} */
670 
671 private:
672   bool m_initialised;
673 public:
674   /**
675     Constructor.
676 
677     Some members may require late initialisation, thus we just mark object as
678     uninitialised. Real initialisation happens in create().
679   */
log_tlog_t680   log_t(): m_initialised(false) {}
681 
682   /** @return whether the redo log is encrypted */
is_encryptedlog_t683   bool is_encrypted() const { return(log.is_encrypted()); }
684 
is_initialisedlog_t685   bool is_initialised() const { return m_initialised; }
686 
687   /** Complete an asynchronous checkpoint write. */
688   void complete_checkpoint();
689 
690   /** @return the log block header + trailer size */
framing_sizelog_t691   unsigned framing_size() const
692   {
693     return log.format == FORMAT_ENC_10_4
694       ? LOG_BLOCK_HDR_SIZE + LOG_BLOCK_KEY + LOG_BLOCK_CHECKSUM
695       : LOG_BLOCK_HDR_SIZE + LOG_BLOCK_CHECKSUM;
696   }
697   /** @return the log block payload size */
payload_sizelog_t698   unsigned payload_size() const
699   {
700     return log.format == FORMAT_ENC_10_4
701       ? OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - LOG_BLOCK_CHECKSUM -
702       LOG_BLOCK_KEY
703       : OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - LOG_BLOCK_CHECKSUM;
704   }
705   /** @return the log block trailer offset */
trailer_offsetlog_t706   unsigned trailer_offset() const
707   {
708     return log.format == FORMAT_ENC_10_4
709       ? OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_CHECKSUM - LOG_BLOCK_KEY
710       : OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_CHECKSUM;
711   }
712 
713   /** Initialise the redo log subsystem. */
714   void create();
715 
716   /** Shut down the redo log subsystem. */
717   void close();
718 };
719 
720 /** Redo log system */
721 extern log_t	log_sys;
722 
723 /** Calculate the offset of a log sequence number.
724 @param[in]     lsn     log sequence number
725 @return offset within the log */
calc_lsn_offset(lsn_t lsn)726 inline lsn_t log_t::files::calc_lsn_offset(lsn_t lsn) const
727 {
728   ut_ad(this == &log_sys.log);
729   /* The lsn parameters are updated while holding both the mutexes
730   and it is ok to have either of them while reading */
731   ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned());
732   const lsn_t group_size= capacity();
733   lsn_t l= lsn - this->lsn;
734   if (longlong(l) < 0) {
735     l= lsn_t(-longlong(l)) % group_size;
736     l= group_size - l;
737   }
738 
739   l+= lsn_offset - LOG_FILE_HDR_SIZE * (1 + lsn_offset / file_size);
740   l%= group_size;
741   return l + LOG_FILE_HDR_SIZE * (1 + l / (file_size - LOG_FILE_HDR_SIZE));
742 }
743 
set_lsn(lsn_t a_lsn)744 inline void log_t::files::set_lsn(lsn_t a_lsn) {
745       ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned());
746       lsn = a_lsn;
747 }
748 
set_lsn_offset(lsn_t a_lsn)749 inline void log_t::files::set_lsn_offset(lsn_t a_lsn) {
750       ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned());
751       ut_ad((lsn % OS_FILE_LOG_BLOCK_SIZE) == (a_lsn % OS_FILE_LOG_BLOCK_SIZE));
752       lsn_offset = a_lsn;
753 }
754 
755 /** Test if flush order mutex is owned. */
756 #define log_flush_order_mutex_own()			\
757 	mutex_own(&log_sys.log_flush_order_mutex)
758 
759 /** Acquire the flush order mutex. */
760 #define log_flush_order_mutex_enter() do {		\
761 	mutex_enter(&log_sys.log_flush_order_mutex);	\
762 } while (0)
763 /** Release the flush order mutex. */
764 # define log_flush_order_mutex_exit() do {		\
765 	mutex_exit(&log_sys.log_flush_order_mutex);	\
766 } while (0)
767 
768 /** Test if log sys mutex is owned. */
769 #define log_mutex_own() mutex_own(&log_sys.mutex)
770 
771 /** Test if log sys write mutex is owned. */
772 #define log_write_mutex_own() mutex_own(&log_sys.write_mutex)
773 
774 /** Acquire the log sys mutex. */
775 #define log_mutex_enter() mutex_enter(&log_sys.mutex)
776 
777 /** Acquire the log sys write mutex. */
778 #define log_write_mutex_enter() mutex_enter(&log_sys.write_mutex)
779 
780 /** Acquire all the log sys mutexes. */
781 #define log_mutex_enter_all() do {		\
782 	mutex_enter(&log_sys.write_mutex);	\
783 	mutex_enter(&log_sys.mutex);		\
784 } while (0)
785 
786 /** Release the log sys mutex. */
787 #define log_mutex_exit() mutex_exit(&log_sys.mutex)
788 
789 /** Release the log sys write mutex.*/
790 #define log_write_mutex_exit() mutex_exit(&log_sys.write_mutex)
791 
792 /** Release all the log sys mutexes. */
793 #define log_mutex_exit_all() do {		\
794 	mutex_exit(&log_sys.mutex);		\
795 	mutex_exit(&log_sys.write_mutex);	\
796 } while (0)
797 
798 /* log scrubbing speed, in bytes/sec */
799 extern ulonglong innodb_scrub_log_speed;
800 
801 /** Event to wake up log_scrub_thread */
802 extern os_event_t	log_scrub_event;
803 /** Whether log_scrub_thread is active */
804 extern bool		log_scrub_thread_active;
805 
806 #include "log0log.inl"
807 
808 #endif
809