1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2009, Google Inc.
5 
6 Portions of this file contain modifications contributed and copyrighted by
7 Google, Inc. Those modifications are gratefully acknowledged and are described
8 briefly in the InnoDB documentation. The contributions by Google are
9 incorporated with their permission, and subject to the conditions contained in
10 the file COPYING.Google.
11 
12 This program is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License, version 2.0,
14 as published by the Free Software Foundation.
15 
16 This program is also distributed with certain software (including
17 but not limited to OpenSSL) that is licensed under separate terms,
18 as designated in a particular file or component or in included license
19 documentation.  The authors of MySQL hereby grant you an additional
20 permission to link the program and your derivative works with the
21 separately licensed software that they have included with MySQL.
22 
23 This program is distributed in the hope that it will be useful,
24 but WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
26 GNU General Public License, version 2.0, for more details.
27 
28 You should have received a copy of the GNU General Public License along with
29 this program; if not, write to the Free Software Foundation, Inc.,
30 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
31 
32 *****************************************************************************/
33 
34 /**************************************************//**
35 @file log/log0log.cc
36 Database log
37 
38 Created 12/9/1995 Heikki Tuuri
39 *******************************************************/
40 
41 #include "ha_prototypes.h"
42 #include <debug_sync.h>
43 
44 #include "log0log.h"
45 
46 #ifdef UNIV_NONINL
47 #include "log0log.ic"
48 #endif
49 
50 #include "mem0mem.h"
51 #include "buf0buf.h"
52 #ifndef UNIV_HOTBACKUP
53 #include "buf0flu.h"
54 #include "srv0srv.h"
55 #include "log0recv.h"
56 #include "fil0fil.h"
57 #include "dict0boot.h"
58 #include "dict0stats_bg.h"
59 #include "srv0srv.h"
60 #include "srv0start.h"
61 #include "trx0sys.h"
62 #include "trx0trx.h"
63 #include "trx0roll.h"
64 #include "srv0mon.h"
65 #include "sync0sync.h"
66 #endif /* !UNIV_HOTBACKUP */
67 #include "xb0xb.h"
68 
69 /*
70 General philosophy of InnoDB redo-logs:
71 
72 1) Every change to a contents of a data page must be done
73 through mtr, which in mtr_commit() writes log records
74 to the InnoDB redo log.
75 
76 2) Normally these changes are performed using a mlog_write_ulint()
77 or similar function.
78 
79 3) In some page level operations only a code number of a
80 c-function and its parameters are written to the log to
81 reduce the size of the log.
82 
83   3a) You should not add parameters to these kind of functions
84   (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse())
85 
86   3b) You should not add such functionality which either change
87   working when compared with the old or are dependent on data
88   outside of the page. These kind of functions should implement
89   self-contained page transformation and it should be unchanged
90   if you don't have very essential reasons to change log
91   semantics or format.
92 
93 */
94 
95 /** Redo log system */
96 log_t*	log_sys	= NULL;
97 
98 /** Whether to generate and require checksums on the redo log pages */
99 my_bool	innodb_log_checksums;
100 
101 /** Pointer to the log checksum calculation function */
102 log_checksum_func_t log_checksum_algorithm_ptr;
103 
104 /* These control how often we print warnings if the last checkpoint is too
105 old */
106 bool	log_has_printed_chkp_warning = false;
107 time_t	log_last_warning_time;
108 
109 bool	log_has_printed_chkp_margine_warning = false;
110 time_t	log_last_margine_warning_time;
111 
112 /* A margin for free space in the log buffer before a log entry is catenated */
113 #define LOG_BUF_WRITE_MARGIN	(4 * OS_FILE_LOG_BLOCK_SIZE)
114 
115 /* Margins for free space in the log buffer after a log entry is catenated */
116 #define LOG_BUF_FLUSH_RATIO	2
117 #define LOG_BUF_FLUSH_MARGIN	(LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE)
118 
119 /* This parameter controls asynchronous making of a new checkpoint; the value
120 should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */
121 
122 #define LOG_POOL_CHECKPOINT_RATIO_ASYNC	32
123 
124 /* This parameter controls synchronous preflushing of modified buffer pages */
125 #define LOG_POOL_PREFLUSH_RATIO_SYNC	16
126 
127 /* The same ratio for asynchronous preflushing; this value should be less than
128 the previous */
129 #define LOG_POOL_PREFLUSH_RATIO_ASYNC	8
130 
131 /* Codes used in unlocking flush latches */
132 #define LOG_UNLOCK_NONE_FLUSHED_LOCK	1
133 #define LOG_UNLOCK_FLUSH_LOCK		2
134 
135 /******************************************************//**
136 Completes a checkpoint write i/o to a log file. */
137 static
138 void
139 log_io_complete_checkpoint(void);
140 /*============================*/
141 
142 #ifndef UNIV_HOTBACKUP
143 /****************************************************************//**
144 Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
145 exists.
146 @return LSN of oldest modification */
147 static
148 lsn_t
log_buf_pool_get_oldest_modification(void)149 log_buf_pool_get_oldest_modification(void)
150 /*======================================*/
151 {
152 	lsn_t	lsn;
153 
154 	ut_ad(log_mutex_own());
155 
156 	lsn = buf_pool_get_oldest_modification();
157 
158 	if (!lsn) {
159 
160 		lsn = log_sys->lsn;
161 	}
162 
163 	return(lsn);
164 }
165 #endif  /* !UNIV_HOTBACKUP */
166 
167 /** Extends the log buffer.
168 @param[in]	len	requested minimum size in bytes */
169 void
log_buffer_extend(ulint len)170 log_buffer_extend(
171 	ulint	len)
172 {
173 	ulint	move_start;
174 	ulint	move_end;
175 	byte	*tmp_buf = static_cast<byte *>(alloca(OS_FILE_LOG_BLOCK_SIZE));
176 
177 	log_mutex_enter_all();
178 
179 	while (log_sys->is_extending) {
180 		/* Another thread is trying to extend already.
181 		Needs to wait for. */
182 		log_mutex_exit_all();
183 
184 		log_buffer_flush_to_disk();
185 
186 		log_mutex_enter_all();
187 
188 		if (srv_log_buffer_size > len / UNIV_PAGE_SIZE) {
189 			/* Already extended enough by the others */
190 			log_mutex_exit_all();
191 			return;
192 		}
193 	}
194 
195 	if (len >= log_sys->buf_size / 2) {
196 		DBUG_EXECUTE_IF("ib_log_buffer_is_short_crash",
197 				DBUG_SUICIDE(););
198 
199 		/* log_buffer is too small. try to extend instead of crash. */
200 		ib::warn() << "The transaction log size is too large"
201 			" for innodb_log_buffer_size (" << len << " >= "
202 			<< LOG_BUFFER_SIZE << " / 2). Trying to extend it.";
203 	}
204 
205 	log_sys->is_extending = true;
206 
207 	while (ut_calc_align_down(log_sys->buf_free,
208 				  OS_FILE_LOG_BLOCK_SIZE)
209 	       != ut_calc_align_down(log_sys->buf_next_to_write,
210 				     OS_FILE_LOG_BLOCK_SIZE)) {
211 		/* Buffer might have >1 blocks to write still. */
212 		log_mutex_exit_all();
213 
214 		log_buffer_flush_to_disk();
215 
216 		log_mutex_enter_all();
217 	}
218 
219 	move_start = ut_calc_align_down(
220 		log_sys->buf_free,
221 		OS_FILE_LOG_BLOCK_SIZE);
222 	move_end = log_sys->buf_free;
223 
224 	/* store the last log block in buffer */
225 	ut_memcpy(tmp_buf, log_sys->buf + move_start,
226 		  move_end - move_start);
227 
228 	log_sys->buf_free -= move_start;
229 	log_sys->buf_next_to_write -= move_start;
230 
231 	/* reallocate log buffer */
232 	srv_log_buffer_size = len / UNIV_PAGE_SIZE + 1;
233 	ut_free(log_sys->buf_ptr);
234 
235 	log_sys->buf_size = LOG_BUFFER_SIZE;
236 
237 	log_sys->buf_ptr = static_cast<byte*>(
238 		ut_zalloc_nokey(log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE));
239 	log_sys->buf = static_cast<byte*>(
240 		ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
241 
242 	log_sys->first_in_use = true;
243 
244 	log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
245 		- LOG_BUF_FLUSH_MARGIN;
246 
247 	/* restore the last log block */
248 	ut_memcpy(log_sys->buf, tmp_buf, move_end - move_start);
249 
250 	ut_ad(log_sys->is_extending);
251 	log_sys->is_extending = false;
252 
253 	log_mutex_exit_all();
254 
255 	ib::info() << "innodb_log_buffer_size was extended to "
256 		<< LOG_BUFFER_SIZE << ".";
257 }
258 
259 #ifndef UNIV_HOTBACKUP
260 /** Calculate actual length in redo buffer and file including
261 block header and trailer.
262 @param[in]	len	length to write
263 @return actual length to write including header and trailer. */
264 static inline
265 ulint
log_calculate_actual_len(ulint len)266 log_calculate_actual_len(
267 	ulint len)
268 {
269 	ut_ad(log_mutex_own());
270 
271 	/* actual length stored per block */
272 	const ulint	len_per_blk = OS_FILE_LOG_BLOCK_SIZE
273 		- (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
274 
275 	/* actual data length in last block already written */
276 	ulint	extra_len = (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE);
277 
278 	ut_ad(extra_len >= LOG_BLOCK_HDR_SIZE);
279 	extra_len -= LOG_BLOCK_HDR_SIZE;
280 
281 	/* total extra length for block header and trailer */
282 	extra_len = ((len + extra_len) / len_per_blk)
283 		* (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE);
284 
285 	return(len + extra_len);
286 }
287 
288 /** Check margin not to overwrite transaction log from the last checkpoint.
289 If would estimate the log write to exceed the log_group_capacity,
290 waits for the checkpoint is done enough.
291 @param[in]	len	length of the data to be written */
292 
293 void
log_margin_checkpoint_age(ulint len)294 log_margin_checkpoint_age(
295 	ulint	len)
296 {
297 	ulint	margin = log_calculate_actual_len(len);
298 
299 	ut_ad(log_mutex_own());
300 
301 	if (margin > log_sys->log_group_capacity) {
302 		/* return with warning output to avoid deadlock */
303 		if (!log_has_printed_chkp_margine_warning
304 		    || difftime(time(NULL),
305 				log_last_margine_warning_time) > 15) {
306 			log_has_printed_chkp_margine_warning = true;
307 			log_last_margine_warning_time = time(NULL);
308 
309 			ib::error() << "The transaction log files are too"
310 				" small for the single transaction log (size="
311 				<< len << "). So, the last checkpoint age"
312 				" might exceed the log group capacity "
313 				<< log_sys->log_group_capacity << ".";
314 		}
315 
316 		return;
317 	}
318 
319 	/* Our margin check should ensure that we never reach this condition.
320 	Try to do checkpoint once. We cannot keep waiting here as it might
321 	result in hang in case the current mtr has latch on oldest lsn */
322 	if (log_sys->lsn - log_sys->last_checkpoint_lsn + margin
323 	    > log_sys->log_group_capacity) {
324 		/* The log write of 'len' might overwrite the transaction log
325 		after the last checkpoint. Makes checkpoint. */
326 
327 		bool	flushed_enough = false;
328 
329 		if (log_sys->lsn - log_buf_pool_get_oldest_modification()
330 		    + margin
331 		    <= log_sys->log_group_capacity) {
332 			flushed_enough = true;
333 		}
334 
335 		log_sys->check_flush_or_checkpoint = true;
336 		log_mutex_exit();
337 
338 		DEBUG_SYNC_C("margin_checkpoint_age_rescue");
339 
340 		if (!flushed_enough) {
341 			os_thread_sleep(100000);
342 		}
343 		log_checkpoint(true, false);
344 
345 		log_mutex_enter();
346 	}
347 
348 	return;
349 }
350 #endif /* !UNIV_HOTBACKUP */
351 /** Open the log for log_write_low. The log must be closed with log_close.
352 @param[in]	len	length of the data to be written
353 @return start lsn of the log record */
354 lsn_t
log_reserve_and_open(ulint len)355 log_reserve_and_open(
356 	ulint	len)
357 {
358 	ulint	len_upper_limit;
359 #ifdef UNIV_DEBUG
360 	ulint	count			= 0;
361 #endif /* UNIV_DEBUG */
362 
363 loop:
364 	ut_ad(log_mutex_own());
365 	ut_ad(!recv_no_log_write);
366 
367 	if (log_sys->is_extending) {
368 		log_mutex_exit();
369 
370 		/* Log buffer size is extending. Writing up to the next block
371 		should wait for the extending finished. */
372 
373 		os_thread_sleep(100000);
374 
375 		ut_ad(++count < 50);
376 
377 		log_mutex_enter();
378 		goto loop;
379 	}
380 
381 	/* Calculate an upper limit for the space the string may take in the
382 	log buffer */
383 
384 	len_upper_limit = LOG_BUF_WRITE_MARGIN + srv_log_write_ahead_size
385 			  + (5 * len) / 4;
386 
387 	if (log_sys->buf_free + len_upper_limit > log_sys->buf_size) {
388 		log_mutex_exit();
389 
390 		DEBUG_SYNC_C("log_buf_size_exceeded");
391 
392 		/* Not enough free space, do a write of the log buffer */
393 
394 		log_buffer_sync_in_background(false);
395 
396 		srv_stats.log_waits.inc();
397 
398 		ut_ad(++count < 50);
399 
400 		log_mutex_enter();
401 		goto loop;
402 	}
403 
404 	return(log_sys->lsn);
405 }
406 
407 /************************************************************//**
408 Writes to the log the string given. It is assumed that the caller holds the
409 log mutex. */
410 void
log_write_low(const byte * str,ulint str_len)411 log_write_low(
412 /*==========*/
413 	const byte*	str,		/*!< in: string */
414 	ulint		str_len)	/*!< in: string length */
415 {
416 	log_t*	log	= log_sys;
417 	ulint	len;
418 	ulint	data_len;
419 	byte*	log_block;
420 
421 	ut_ad(log_mutex_own());
422 part_loop:
423 	ut_ad(!recv_no_log_write);
424 	/* Calculate a part length */
425 
426 	data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len;
427 
428 	if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
429 
430 		/* The string fits within the current log block */
431 
432 		len = str_len;
433 	} else {
434 		data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE;
435 
436 		len = OS_FILE_LOG_BLOCK_SIZE
437 			- (log->buf_free % OS_FILE_LOG_BLOCK_SIZE)
438 			- LOG_BLOCK_TRL_SIZE;
439 	}
440 
441 	ut_memcpy(log->buf + log->buf_free, str, len);
442 
443 	str_len -= len;
444 	str = str + len;
445 
446 	log_block = static_cast<byte*>(
447 		ut_align_down(
448 			log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
449 
450 	log_block_set_data_len(log_block, data_len);
451 
452 	if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
453 		/* This block became full */
454 		log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE);
455 		log_block_set_checkpoint_no(log_block,
456 					    log_sys->next_checkpoint_no);
457 		len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE;
458 
459 		log->lsn += len;
460 
461 		/* Initialize the next block header */
462 		log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn);
463 	} else {
464 		log->lsn += len;
465 	}
466 
467 	log->buf_free += len;
468 
469 	ut_ad(log->buf_free <= log->buf_size);
470 
471 	if (str_len > 0) {
472 		goto part_loop;
473 	}
474 
475 	srv_stats.log_write_requests.inc();
476 }
477 
478 /************************************************************//**
479 Closes the log.
480 @return lsn */
481 lsn_t
log_close(void)482 log_close(void)
483 /*===========*/
484 {
485 	byte*		log_block;
486 	ulint		first_rec_group;
487 	lsn_t		oldest_lsn;
488 	lsn_t		lsn;
489 	log_t*		log	= log_sys;
490 	lsn_t		checkpoint_age;
491 
492 	ut_ad(log_mutex_own());
493 	ut_ad(!recv_no_log_write);
494 
495 	lsn = log->lsn;
496 
497 	log_block = static_cast<byte*>(
498 		ut_align_down(
499 			log->buf + log->buf_free, OS_FILE_LOG_BLOCK_SIZE));
500 
501 	first_rec_group = log_block_get_first_rec_group(log_block);
502 
503 	if (first_rec_group == 0) {
504 		/* We initialized a new log block which was not written
505 		full by the current mtr: the next mtr log record group
506 		will start within this block at the offset data_len */
507 
508 		log_block_set_first_rec_group(
509 			log_block, log_block_get_data_len(log_block));
510 	}
511 
512 	if (log->buf_free > log->max_buf_free) {
513 
514 		log->check_flush_or_checkpoint = true;
515 	}
516 
517 	checkpoint_age = lsn - log->last_checkpoint_lsn;
518 
519 	if (checkpoint_age >= log->log_group_capacity) {
520 		DBUG_EXECUTE_IF(
521 			"print_all_chkp_warnings",
522 			log_has_printed_chkp_warning = false;);
523 
524 		if (!log_has_printed_chkp_warning
525 		    || difftime(time(NULL), log_last_warning_time) > 15) {
526 
527 			log_has_printed_chkp_warning = true;
528 			log_last_warning_time = time(NULL);
529 
530 			ib::error() << "The age of the last checkpoint is "
531 				<< checkpoint_age << ", which exceeds the log"
532 				" group capacity " << log->log_group_capacity
533 				<< ".";
534 		}
535 	}
536 
537 	if (checkpoint_age <= log->max_modified_age_sync) {
538 
539 		goto function_exit;
540 	}
541 
542 	oldest_lsn = buf_pool_get_oldest_modification();
543 
544 	if (!oldest_lsn
545 	    || lsn - oldest_lsn > log->max_modified_age_sync
546 	    || checkpoint_age > log->max_checkpoint_age_async) {
547 
548 		log->check_flush_or_checkpoint = true;
549 	}
550 function_exit:
551 
552 	return(lsn);
553 }
554 
555 /******************************************************//**
556 Calculates the data capacity of a log group, when the log file headers are not
557 included.
558 @return capacity in bytes */
559 lsn_t
log_group_get_capacity(const log_group_t * group)560 log_group_get_capacity(
561 /*===================*/
562 	const log_group_t*	group)	/*!< in: log group */
563 {
564 	/* The lsn parameters are updated while holding both the mutexes
565 	and it is ok to have either of them while reading */
566 	ut_ad(log_mutex_own() || log_write_mutex_own());
567 
568 	return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files);
569 }
570 
571 /******************************************************//**
572 Calculates the offset within a log group, when the log file headers are not
573 included.
574 @return size offset (<= offset) */
575 UNIV_INLINE
576 lsn_t
log_group_calc_size_offset(lsn_t offset,const log_group_t * group)577 log_group_calc_size_offset(
578 /*=======================*/
579 	lsn_t			offset,	/*!< in: real offset within the
580 					log group */
581 	const log_group_t*	group)	/*!< in: log group */
582 {
583 	/* The lsn parameters are updated while holding both the mutexes
584 	and it is ok to have either of them while reading */
585 	ut_ad(log_mutex_own() || log_write_mutex_own());
586 
587 	return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size));
588 }
589 
590 /******************************************************//**
591 Calculates the offset within a log group, when the log file headers are
592 included.
593 @return real offset (>= offset) */
594 UNIV_INLINE
595 lsn_t
log_group_calc_real_offset(lsn_t offset,const log_group_t * group)596 log_group_calc_real_offset(
597 /*=======================*/
598 	lsn_t			offset,	/*!< in: size offset within the
599 					log group */
600 	const log_group_t*	group)	/*!< in: log group */
601 {
602 	/* The lsn parameters are updated while holding both the mutexes
603 	and it is ok to have either of them while reading */
604 	ut_ad(log_mutex_own() || log_write_mutex_own());
605 
606 	return(offset + LOG_FILE_HDR_SIZE
607 	       * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE)));
608 }
609 
610 /** Calculate the offset of an lsn within a log group.
611 @param[in]	lsn	log sequence number
612 @param[in]	group	log group
613 @return offset within the log group */
614 lsn_t
log_group_calc_lsn_offset(lsn_t lsn,const log_group_t * group)615 log_group_calc_lsn_offset(
616 	lsn_t			lsn,
617 	const log_group_t*	group)
618 {
619 	lsn_t	gr_lsn;
620 	lsn_t	gr_lsn_size_offset;
621 	lsn_t	difference;
622 	lsn_t	group_size;
623 	lsn_t	offset;
624 
625 	/* The lsn parameters are updated while holding both the mutexes
626 	and it is ok to have either of them while reading */
627 	ut_ad(log_mutex_own() || log_write_mutex_own());
628 
629 	gr_lsn = group->lsn;
630 
631 	gr_lsn_size_offset = log_group_calc_size_offset(
632 		group->lsn_offset, group);
633 
634 	group_size = log_group_get_capacity(group);
635 
636 	if (lsn >= gr_lsn) {
637 
638 		difference = lsn - gr_lsn;
639 	} else {
640 		difference = gr_lsn - lsn;
641 
642 		difference = difference % group_size;
643 
644 		difference = group_size - difference;
645 	}
646 
647 	offset = (gr_lsn_size_offset + difference) % group_size;
648 
649 	/* fprintf(stderr,
650 	"Offset is " LSN_PF " gr_lsn_offset is " LSN_PF
651 	" difference is " LSN_PF "\n",
652 	offset, gr_lsn_size_offset, difference);
653 	*/
654 
655 	return(log_group_calc_real_offset(offset, group));
656 }
657 
658 /*******************************************************************//**
659 Calculates where in log files we find a specified lsn.
660 @return log file number */
661 ulint
log_calc_where_lsn_is(int64_t * log_file_offset,ib_uint64_t first_header_lsn,ib_uint64_t lsn,ulint n_log_files,int64_t log_file_size)662 log_calc_where_lsn_is(
663 /*==================*/
664 	int64_t*	log_file_offset,	/*!< out: offset in that file
665 						(including the header) */
666 	ib_uint64_t	first_header_lsn,	/*!< in: first log file start
667 						lsn */
668 	ib_uint64_t	lsn,			/*!< in: lsn whose position to
669 						determine */
670 	ulint		n_log_files,		/*!< in: total number of log
671 						files */
672 	int64_t		log_file_size)		/*!< in: log file size
673 						(including the header) */
674 {
675 	int64_t		capacity	= log_file_size - LOG_FILE_HDR_SIZE;
676 	ulint		file_no;
677 	int64_t		add_this_many;
678 
679 	if (lsn < first_header_lsn) {
680 		add_this_many = 1 + (first_header_lsn - lsn)
681 			/ (capacity * static_cast<int64_t>(n_log_files));
682 		lsn += add_this_many
683 			* capacity * static_cast<int64_t>(n_log_files);
684 	}
685 
686 	ut_a(lsn >= first_header_lsn);
687 
688 	file_no = ((ulint)((lsn - first_header_lsn) / capacity))
689 		% n_log_files;
690 	*log_file_offset = (lsn - first_header_lsn) % capacity;
691 
692 	*log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
693 
694 	return(file_no);
695 }
696 
697 
698 /********************************************************//**
699 Sets the field values in group to correspond to a given lsn. For this function
700 to work, the values must already be correctly initialized to correspond to
701 some lsn, for instance, a checkpoint lsn. */
702 void
log_group_set_fields(log_group_t * group,lsn_t lsn)703 log_group_set_fields(
704 /*=================*/
705 	log_group_t*	group,	/*!< in/out: group */
706 	lsn_t		lsn)	/*!< in: lsn for which the values should be
707 				set */
708 {
709 	group->lsn_offset = log_group_calc_lsn_offset(lsn, group);
710 	group->lsn = lsn;
711 }
712 #ifndef UNIV_HOTBACKUP
713 /*****************************************************************//**
714 Calculates the recommended highest values for lsn - last_checkpoint_lsn
715 and lsn - buf_get_oldest_modification().
716 @retval true on success
717 @retval false if the smallest log group is too small to
718 accommodate the number of OS threads in the database server */
719 static MY_ATTRIBUTE((warn_unused_result))
720 bool
log_calc_max_ages(void)721 log_calc_max_ages(void)
722 /*===================*/
723 {
724 	log_group_t*	group;
725 	lsn_t		margin;
726 	ulint		free;
727 	bool		success	= true;
728 	lsn_t		smallest_capacity;
729 
730 	log_mutex_enter();
731 
732 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
733 
734 	ut_ad(group);
735 
736 	smallest_capacity = LSN_MAX;
737 
738 	while (group) {
739 		if (log_group_get_capacity(group) < smallest_capacity) {
740 
741 			smallest_capacity = log_group_get_capacity(group);
742 		}
743 
744 		group = UT_LIST_GET_NEXT(log_groups, group);
745 	}
746 
747 	/* Add extra safety */
748 	smallest_capacity = smallest_capacity - smallest_capacity / 10;
749 
750 	/* For each OS thread we must reserve so much free space in the
751 	smallest log group that it can accommodate the log entries produced
752 	by single query steps: running out of free log space is a serious
753 	system error which requires rebooting the database. */
754 
755 	free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency)
756 		+ LOG_CHECKPOINT_EXTRA_FREE;
757 	if (free >= smallest_capacity / 2) {
758 		success = false;
759 
760 		goto failure;
761 	} else {
762 		margin = smallest_capacity - free;
763 	}
764 
765 	margin = margin - margin / 10;	/* Add still some extra safety */
766 
767 	log_sys->log_group_capacity = smallest_capacity;
768 
769 	log_sys->max_modified_age_async = margin
770 		- margin / LOG_POOL_PREFLUSH_RATIO_ASYNC;
771 	log_sys->max_modified_age_sync = margin
772 		- margin / LOG_POOL_PREFLUSH_RATIO_SYNC;
773 
774 	log_sys->max_checkpoint_age_async = margin - margin
775 		/ LOG_POOL_CHECKPOINT_RATIO_ASYNC;
776 	log_sys->max_checkpoint_age = margin;
777 
778 failure:
779 	log_mutex_exit();
780 
781 	if (!success) {
782 		ib::error() << "Cannot continue operation. ib_logfiles are too"
783 			" small for innodb_thread_concurrency "
784 			<< srv_thread_concurrency << ". The combined size of"
785 			" ib_logfiles should be bigger than"
786 			" 200 kB * innodb_thread_concurrency. To get mysqld"
787 			" to start up, set innodb_thread_concurrency in"
788 			" my.cnf to a lower value, for example, to 8. After"
789 			" an ERROR-FREE shutdown of mysqld you can adjust"
790 			" the size of ib_logfiles. " << INNODB_PARAMETERS_MSG;
791 	}
792 
793 	return(success);
794 }
795 
796 /******************************************************//**
797 Initializes the log. */
798 void
log_init(void)799 log_init(void)
800 /*==========*/
801 {
802 	log_sys = static_cast<log_t*>(ut_zalloc_nokey(sizeof(log_t)));
803 
804 	mutex_create(LATCH_ID_LOG_SYS, &log_sys->mutex);
805 	mutex_create(LATCH_ID_LOG_WRITE, &log_sys->write_mutex);
806 
807 	mutex_create(LATCH_ID_LOG_FLUSH_ORDER, &log_sys->log_flush_order_mutex);
808 
809 	/* Start the lsn from one log block from zero: this way every
810 	log record has a start lsn != zero, a fact which we will use */
811 
812 	log_sys->lsn = LOG_START_LSN;
813 
814 	ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE);
815 	ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE);
816 
817 	log_sys->buf_size = LOG_BUFFER_SIZE;
818 
819 	log_sys->buf_ptr = static_cast<byte*>(
820 		ut_zalloc_nokey(log_sys->buf_size * 2 + OS_FILE_LOG_BLOCK_SIZE));
821 	log_sys->buf = static_cast<byte*>(
822 		ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
823 
824 	log_sys->first_in_use = true;
825 
826 	log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO
827 		- LOG_BUF_FLUSH_MARGIN;
828 	log_sys->check_flush_or_checkpoint = true;
829 	UT_LIST_INIT(log_sys->log_groups, &log_group_t::log_groups);
830 
831 	log_sys->n_log_ios_old = log_sys->n_log_ios;
832 	log_sys->last_printout_time = time(NULL);
833 	/*----------------------------*/
834 
835 	log_sys->write_lsn = log_sys->lsn;
836 
837 	log_sys->flush_event = os_event_create(0);
838 
839 	os_event_set(log_sys->flush_event);
840 
841 	/*----------------------------*/
842 
843 	log_sys->last_checkpoint_lsn = log_sys->lsn;
844 
845 	rw_lock_create(
846 		checkpoint_lock_key, &log_sys->checkpoint_lock,
847 		SYNC_NO_ORDER_CHECK);
848 
849 	log_sys->checkpoint_buf_ptr = static_cast<byte*>(
850 		ut_zalloc_nokey(2 * OS_FILE_LOG_BLOCK_SIZE));
851 
852 	log_sys->checkpoint_buf = static_cast<byte*>(
853 		ut_align(log_sys->checkpoint_buf_ptr, OS_FILE_LOG_BLOCK_SIZE));
854 
855 	/*----------------------------*/
856 
857 	log_block_init(log_sys->buf, log_sys->lsn);
858 	log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE);
859 
860 	log_sys->buf_free = LOG_BLOCK_HDR_SIZE;
861 	log_sys->lsn = LOG_START_LSN + LOG_BLOCK_HDR_SIZE;
862 
863 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
864 		    log_sys->lsn - log_sys->last_checkpoint_lsn);
865 }
866 
867 /******************************************************************//**
868 Inits a log group to the log system.
869 @return true if success, false if not */
870 MY_ATTRIBUTE((warn_unused_result))
871 bool
log_group_init(ulint id,ulint n_files,lsn_t file_size,ulint space_id)872 log_group_init(
873 /*===========*/
874 	ulint	id,			/*!< in: group id */
875 	ulint	n_files,		/*!< in: number of log files */
876 	lsn_t	file_size,		/*!< in: log file size in bytes */
877 	ulint	space_id)		/*!< in: space id of the file space
878 					which contains the log files of this
879 					group */
880 {
881 	ulint	i;
882 	log_group_t*	group;
883 
884 	group = static_cast<log_group_t*>(ut_malloc_nokey(sizeof(log_group_t)));
885 
886 	group->id = id;
887 	group->n_files = n_files;
888 	group->format = LOG_HEADER_FORMAT_CURRENT;
889 	group->file_size = file_size;
890 	group->space_id = space_id;
891 	group->state = LOG_GROUP_OK;
892 	group->lsn = LOG_START_LSN;
893 	group->lsn_offset = LOG_FILE_HDR_SIZE;
894 	group->lsn_offset_ps55 = LOG_FILE_HDR_SIZE;
895 
896 	group->file_header_bufs_ptr = static_cast<byte**>(
897 		ut_zalloc_nokey(sizeof(byte*) * n_files));
898 
899 	group->file_header_bufs = static_cast<byte**>(
900 		ut_zalloc_nokey(sizeof(byte**) * n_files));
901 
902 	for (i = 0; i < n_files; i++) {
903 		group->file_header_bufs_ptr[i] = static_cast<byte*>(
904 			ut_zalloc_nokey(LOG_FILE_HDR_SIZE
905 					+ OS_FILE_LOG_BLOCK_SIZE));
906 
907 		group->file_header_bufs[i] = static_cast<byte*>(
908 			ut_align(group->file_header_bufs_ptr[i],
909 				 OS_FILE_LOG_BLOCK_SIZE));
910 	}
911 
912 	group->checkpoint_buf_ptr = static_cast<byte*>(
913 		ut_zalloc_nokey(2 * OS_FILE_LOG_BLOCK_SIZE));
914 
915 	group->checkpoint_buf = static_cast<byte*>(
916 		ut_align(group->checkpoint_buf_ptr,OS_FILE_LOG_BLOCK_SIZE));
917 
918 	UT_LIST_ADD_LAST(log_sys->log_groups, group);
919 
920 	return(log_calc_max_ages());
921 }
922 #endif /* !UNIV_HOTBACKUP */
923 /******************************************************//**
924 Completes an i/o to a log file. */
925 void
log_io_complete(log_group_t * group)926 log_io_complete(
927 /*============*/
928 	log_group_t*	group)	/*!< in: log group or a dummy pointer */
929 {
930 	if ((ulint) group & 0x1UL) {
931 		/* It was a checkpoint write */
932 		group = (log_group_t*)((ulint) group - 1);
933 
934 #ifdef _WIN32
935 		fil_flush(group->space_id);
936 #else
937 		switch (srv_unix_file_flush_method) {
938 		case SRV_UNIX_O_DSYNC:
939 		case SRV_UNIX_NOSYNC:
940 		case SRV_UNIX_ALL_O_DIRECT:
941 			break;
942 		case SRV_UNIX_FSYNC:
943 		case SRV_UNIX_LITTLESYNC:
944 		case SRV_UNIX_O_DIRECT:
945 		case SRV_UNIX_O_DIRECT_NO_FSYNC:
946 			fil_flush(group->space_id);
947 		}
948 #endif /* _WIN32 */
949 
950 		DBUG_PRINT("ib_log", ("checkpoint info written to group %u",
951 				      unsigned(group->id)));
952 		log_io_complete_checkpoint();
953 
954 		return;
955 	}
956 
957 	ut_error;	/*!< We currently use synchronous writing of the
958 			logs and cannot end up here! */
959 }
960 
961 /******************************************************//**
962 Writes a log file header to a log file space. */
963 static
964 void
log_group_file_header_flush_0(log_group_t * group,ulint nth_file,lsn_t start_lsn)965 log_group_file_header_flush_0(
966 /*========================*/
967 	log_group_t*	group,		/*!< in: log group */
968 	ulint		nth_file,	/*!< in: header to the nth file in the
969 					log file space */
970 	lsn_t		start_lsn)	/*!< in: log file data starts at this
971 					lsn */
972 {
973 	byte*	buf;
974 	lsn_t	dest_offset;
975 
976 	/* log group number */
977 	static const uint GROUP_ID = 16;
978 	/* lsn of the start of data in this log file */
979 	static const uint FILE_START_LSN = 4;
980 
981 	ut_ad(log_write_mutex_own());
982 	ut_ad(!recv_no_log_write);
983 	ut_a(nth_file < group->n_files);
984 
985 	buf = *(group->file_header_bufs + nth_file);
986 
987 	mach_write_to_4(buf + GROUP_ID, group->id);
988 	mach_write_to_8(buf + FILE_START_LSN, start_lsn);
989 
990 	/* Wipe over possible label of mysqlbackup --restore */
991 	memcpy(buf + LOG_HEADER_CREATOR, "    ", 4);
992 
993 	if (srv_log_block_size > 512) {
994 		mach_write_to_4(buf + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE,
995 				srv_log_block_size);
996 	}
997 
998 	dest_offset = nth_file * group->file_size;
999 
1000 	DBUG_PRINT("ib_log", ("write " LSN_PF
1001 			      " group " ULINTPF
1002 			      " file " ULINTPF " header",
1003 			      start_lsn, group->id, nth_file));
1004 
1005 	log_sys->n_log_ios++;
1006 
1007 	MONITOR_INC(MONITOR_LOG_IO);
1008 
1009 	srv_stats.os_log_pending_writes.inc();
1010 
1011 	const ulint	page_no
1012 		= (ulint) (dest_offset / univ_page_size.physical());
1013 
1014 	fil_io(IORequestLogWrite, true,
1015 	       page_id_t(group->space_id, page_no),
1016 	       univ_page_size,
1017 	       (ulint) (dest_offset % univ_page_size.physical()),
1018 	       OS_FILE_LOG_BLOCK_SIZE, buf, group);
1019 
1020 	srv_stats.os_log_pending_writes.dec();
1021 }
1022 
1023 /******************************************************//**
1024 Writes a log file header to a log file space. */
1025 static
1026 void
log_group_file_header_flush(log_group_t * group,ulint nth_file,lsn_t start_lsn)1027 log_group_file_header_flush(
1028 /*========================*/
1029 	log_group_t*	group,		/*!< in: log group */
1030 	ulint		nth_file,	/*!< in: header to the nth file in the
1031 					log file space */
1032 	lsn_t		start_lsn)	/*!< in: log file data starts at this
1033 					lsn */
1034 {
1035 	byte*	buf;
1036 	lsn_t	dest_offset;
1037 
1038 	ut_ad(log_write_mutex_own());
1039 	ut_ad(!recv_no_log_write);
1040 	ut_ad(group->id == 0);
1041 	ut_a(nth_file < group->n_files);
1042 
1043 	if (redo_log_version == REDO_LOG_V0) {
1044 		log_group_file_header_flush_0(group, nth_file, start_lsn);
1045 		return;
1046 	}
1047 
1048 	buf = *(group->file_header_bufs + nth_file);
1049 
1050 	memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
1051 	mach_write_to_4(buf + LOG_HEADER_FORMAT, LOG_HEADER_FORMAT_CURRENT);
1052 	mach_write_to_8(buf + LOG_HEADER_START_LSN, start_lsn);
1053 	strcpy(reinterpret_cast<char*>(buf) + LOG_HEADER_CREATOR,
1054 	       LOG_HEADER_CREATOR_CURRENT);
1055 	ut_ad(LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR
1056 	      >= sizeof LOG_HEADER_CREATOR_CURRENT);
1057 	log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
1058 
1059 	if (srv_log_block_size > 512) {
1060 		mach_write_to_4(buf + LOG_FILE_OS_FILE_LOG_BLOCK_SIZE,
1061 				srv_log_block_size);
1062 	}
1063 
1064 	dest_offset = nth_file * group->file_size;
1065 
1066 	DBUG_PRINT("ib_log", ("write " LSN_PF
1067 			      " group " ULINTPF
1068 			      " file " ULINTPF " header",
1069 			      start_lsn, group->id, nth_file));
1070 
1071 	log_sys->n_log_ios++;
1072 
1073 	MONITOR_INC(MONITOR_LOG_IO);
1074 
1075 	srv_stats.os_log_pending_writes.inc();
1076 
1077 	const ulint	page_no
1078 		= (ulint) (dest_offset / univ_page_size.physical());
1079 
1080 	fil_io(IORequestLogWrite, true,
1081 	       page_id_t(group->space_id, page_no),
1082 	       univ_page_size,
1083 	       (ulint) (dest_offset % univ_page_size.physical()),
1084 	       OS_FILE_LOG_BLOCK_SIZE, buf, group);
1085 
1086 	srv_stats.os_log_pending_writes.dec();
1087 }
1088 
1089 /******************************************************//**
1090 Stores a 4-byte checksum to the trailer checksum field of a log block
1091 before writing it to a log file. This checksum is used in recovery to
1092 check the consistency of a log block. */
1093 static
1094 void
log_block_store_checksum(byte * block)1095 log_block_store_checksum(
1096 /*=====================*/
1097 	byte*	block)	/*!< in/out: pointer to a log block */
1098 {
1099 	log_block_set_checksum(block, log_block_calc_checksum(block));
1100 }
1101 
1102 /******************************************************//**
1103 Writes a buffer to a log file group. */
1104 static
1105 void
log_group_write_buf(log_group_t * group,byte * buf,ulint len,ulint pad_len,lsn_t start_lsn,ulint new_data_offset)1106 log_group_write_buf(
1107 /*================*/
1108 	log_group_t*	group,		/*!< in: log group */
1109 	byte*		buf,		/*!< in: buffer */
1110 	ulint		len,		/*!< in: buffer len; must be divisible
1111 					by OS_FILE_LOG_BLOCK_SIZE */
1112 #ifdef UNIV_DEBUG
1113 	ulint		pad_len,	/*!< in: pad len in the buffer len */
1114 #endif /* UNIV_DEBUG */
1115 	lsn_t		start_lsn,	/*!< in: start lsn of the buffer; must
1116 					be divisible by
1117 					OS_FILE_LOG_BLOCK_SIZE */
1118 	ulint		new_data_offset)/*!< in: start offset of new data in
1119 					buf: this parameter is used to decide
1120 					if we have to write a new log file
1121 					header */
1122 {
1123 	ulint		write_len;
1124 	bool		write_header	= new_data_offset == 0;
1125 	lsn_t		next_offset;
1126 	ulint		i;
1127 
1128 	ut_ad(log_write_mutex_own());
1129 	ut_ad(!recv_no_log_write);
1130 	ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
1131 	ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
1132 
1133 loop:
1134 	if (len == 0) {
1135 
1136 		return;
1137 	}
1138 
1139 	next_offset = log_group_calc_lsn_offset(start_lsn, group);
1140 
1141 	if (write_header
1142 	    && next_offset % group->file_size == LOG_FILE_HDR_SIZE) {
1143 		/* We start to write a new log file instance in the group */
1144 
1145 		ut_a(next_offset / group->file_size <= ULINT_MAX);
1146 
1147 		log_group_file_header_flush(group, (ulint)
1148 					    (next_offset / group->file_size),
1149 					    start_lsn);
1150 		srv_stats.os_log_written.add(OS_FILE_LOG_BLOCK_SIZE);
1151 
1152 		srv_stats.log_writes.inc();
1153 	}
1154 
1155 	if ((next_offset % group->file_size) + len > group->file_size) {
1156 
1157 		/* if the above condition holds, then the below expression
1158 		is < len which is ulint, so the typecast is ok */
1159 		write_len = (ulint)
1160 			(group->file_size - (next_offset % group->file_size));
1161 	} else {
1162 		write_len = len;
1163 	}
1164 
1165 	DBUG_PRINT("ib_log",
1166 		   ("write " LSN_PF " to " LSN_PF
1167 		    ": group " ULINTPF " len " ULINTPF
1168 		    " blocks " ULINTPF ".." ULINTPF,
1169 		    start_lsn, next_offset,
1170 		    group->id, write_len,
1171 		    log_block_get_hdr_no(buf),
1172 		    log_block_get_hdr_no(
1173 			    buf + write_len
1174 			    - OS_FILE_LOG_BLOCK_SIZE)));
1175 
1176 	ut_ad(pad_len >= len
1177 	      || log_block_get_hdr_no(buf)
1178 		 == log_block_convert_lsn_to_no(start_lsn));
1179 
1180 	/* Calculate the checksums for each log block and write them to
1181 	the trailer fields of the log blocks */
1182 
1183 	for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
1184 		ut_ad(pad_len >= len
1185 		      || i * OS_FILE_LOG_BLOCK_SIZE >= len - pad_len
1186 		      || log_block_get_hdr_no(
1187 			      buf + i * OS_FILE_LOG_BLOCK_SIZE)
1188 			 == log_block_get_hdr_no(buf) + i);
1189 		log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
1190 	}
1191 
1192 	log_sys->n_log_ios++;
1193 
1194 	MONITOR_INC(MONITOR_LOG_IO);
1195 
1196 	srv_stats.os_log_pending_writes.inc();
1197 
1198 	ut_a(next_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
1199 
1200 	const ulint	page_no
1201 		= (ulint) (next_offset / univ_page_size.physical());
1202 
1203 	fil_io(IORequestLogWrite, true,
1204 	       page_id_t(group->space_id, page_no),
1205 	       univ_page_size,
1206 	       (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
1207 	       group);
1208 
1209 	srv_stats.os_log_pending_writes.dec();
1210 
1211 	srv_stats.os_log_written.add(write_len);
1212 	srv_stats.log_writes.inc();
1213 
1214 	if (write_len < len) {
1215 		start_lsn += write_len;
1216 		len -= write_len;
1217 		buf += write_len;
1218 
1219 		write_header = true;
1220 
1221 		goto loop;
1222 	}
1223 }
1224 
1225 /** Flush the log has been written to the log file. */
1226 static
1227 void
log_write_flush_to_disk_low()1228 log_write_flush_to_disk_low()
1229 {
1230 	ut_a(log_sys->n_pending_flushes == 1); /* No other threads here */
1231 
1232 #ifndef _WIN32
1233 	bool	do_flush = srv_unix_file_flush_method != SRV_UNIX_O_DSYNC;
1234 #else
1235 	bool	do_flush = true;
1236 #endif
1237 	if (do_flush) {
1238 		log_group_t*	group = UT_LIST_GET_FIRST(log_sys->log_groups);
1239 		fil_flush(group->space_id);
1240 		log_sys->flushed_to_disk_lsn = log_sys->current_flush_lsn;
1241 	}
1242 
1243 	log_sys->n_pending_flushes--;
1244 	MONITOR_DEC(MONITOR_PENDING_LOG_FLUSH);
1245 
1246 	os_event_set(log_sys->flush_event);
1247 }
1248 
1249 /** Switch the log buffer in use, and copy the content of last block
1250 from old log buffer to the head of the to be used one. Thus, buf_free and
1251 buf_next_to_write would be changed accordingly */
1252 static inline
1253 void
log_buffer_switch()1254 log_buffer_switch()
1255 {
1256 	ut_ad(log_mutex_own());
1257 	ut_ad(log_write_mutex_own());
1258 
1259 	const byte*	old_buf = log_sys->buf;
1260 	ulint		area_end = ut_calc_align(log_sys->buf_free,
1261 						 OS_FILE_LOG_BLOCK_SIZE);
1262 
1263 	if (log_sys->first_in_use) {
1264 		ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
1265 					       OS_FILE_LOG_BLOCK_SIZE));
1266 		log_sys->buf += log_sys->buf_size;
1267 	} else {
1268 		log_sys->buf -= log_sys->buf_size;
1269 		ut_ad(log_sys->buf == ut_align(log_sys->buf_ptr,
1270 					       OS_FILE_LOG_BLOCK_SIZE));
1271 	}
1272 
1273 	log_sys->first_in_use = !log_sys->first_in_use;
1274 
1275 	/* Copy the last block to new buf */
1276 	ut_memcpy(log_sys->buf,
1277 		  old_buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
1278 		  OS_FILE_LOG_BLOCK_SIZE);
1279 
1280 	log_sys->buf_free %= OS_FILE_LOG_BLOCK_SIZE;
1281 	log_sys->buf_next_to_write = log_sys->buf_free;
1282 }
1283 
1284 /** Ensure that the log has been written to the log file up to a given
1285 log entry (such as that of a transaction commit). Start a new write, or
1286 wait and check if an already running write is covering the request.
1287 @param[in]	lsn		log sequence number that should be
1288 included in the redo log file write
1289 @param[in]	flush_to_disk	whether the written log should also
1290 be flushed to the file system */
1291 void
log_write_up_to(lsn_t lsn,bool flush_to_disk)1292 log_write_up_to(
1293 	lsn_t	lsn,
1294 	bool	flush_to_disk)
1295 {
1296 #ifdef UNIV_DEBUG
1297 	ulint		loop_count	= 0;
1298 #endif /* UNIV_DEBUG */
1299 	byte*           write_buf;
1300 	lsn_t           write_lsn;
1301 
1302 	ut_ad(!srv_read_only_mode);
1303 
1304 	if (recv_no_ibuf_operations) {
1305 		/* Recovery is running and no operations on the log files are
1306 		allowed yet (the variable name .._no_ibuf_.. is misleading) */
1307 
1308 		return;
1309 	}
1310 
1311 loop:
1312 	ut_ad(++loop_count < 128);
1313 
1314 #if UNIV_WORD_SIZE > 7
1315 	/* We can do a dirty read of LSN. */
1316 	/* NOTE: Currently doesn't do dirty read for
1317 	(flush_to_disk == true) case, because the log_mutex
1318 	contention also works as the arbitrator for write-IO
1319 	(fsync) bandwidth between log files and data files. */
1320 	os_rmb;
1321 	if (!flush_to_disk && log_sys->write_lsn >= lsn) {
1322 		return;
1323 	}
1324 #endif
1325 
1326 	log_write_mutex_enter();
1327 	ut_ad(!recv_no_log_write);
1328 
1329 	lsn_t	limit_lsn = flush_to_disk
1330 		? log_sys->flushed_to_disk_lsn
1331 		: log_sys->write_lsn;
1332 
1333 	if (limit_lsn >= lsn) {
1334 		log_write_mutex_exit();
1335 		return;
1336 	}
1337 
1338 #ifdef _WIN32
1339 # ifndef UNIV_HOTBACKUP
1340 	/* write requests during fil_flush() might not be good for Windows */
1341 	if (log_sys->n_pending_flushes > 0
1342 	    || !os_event_is_set(log_sys->flush_event)) {
1343 		log_write_mutex_exit();
1344 		os_event_wait(log_sys->flush_event);
1345 		goto loop;
1346 	}
1347 # else
1348 	if (log_sys->n_pending_flushes > 0) {
1349 		goto loop;
1350 	}
1351 # endif  /* !UNIV_HOTBACKUP */
1352 #endif /* _WIN32 */
1353 
1354 	/* If it is a write call we should just go ahead and do it
1355 	as we checked that write_lsn is not where we'd like it to
1356 	be. If we have to flush as well then we check if there is a
1357 	pending flush and based on that we wait for it to finish
1358 	before proceeding further. */
1359 	if (flush_to_disk
1360 	    && (log_sys->n_pending_flushes > 0
1361 		|| !os_event_is_set(log_sys->flush_event))) {
1362 
1363 		/* Figure out if the current flush will do the job
1364 		for us. */
1365 		bool work_done = log_sys->current_flush_lsn >= lsn;
1366 
1367 		log_write_mutex_exit();
1368 
1369 		os_event_wait(log_sys->flush_event);
1370 
1371 		if (work_done) {
1372 			return;
1373 		} else {
1374 			goto loop;
1375 		}
1376 	}
1377 
1378 	log_mutex_enter();
1379 	if (!flush_to_disk
1380 	    && log_sys->buf_free == log_sys->buf_next_to_write) {
1381 		/* Nothing to write and no flush to disk requested */
1382 		log_mutex_exit_all();
1383 		return;
1384 	}
1385 
1386 	log_group_t*	group;
1387 	ulint		start_offset;
1388 	ulint		end_offset;
1389 	ulint		area_start;
1390 	ulint		area_end;
1391 	ulong		write_ahead_size = srv_log_write_ahead_size;
1392 	ulint		pad_size;
1393 
1394 	DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF,
1395 			      log_sys->write_lsn,
1396 			      log_sys->lsn));
1397 
1398 	if (flush_to_disk) {
1399 		log_sys->n_pending_flushes++;
1400 		log_sys->current_flush_lsn = log_sys->lsn;
1401 		MONITOR_INC(MONITOR_PENDING_LOG_FLUSH);
1402 		os_event_reset(log_sys->flush_event);
1403 
1404 		if (log_sys->buf_free == log_sys->buf_next_to_write) {
1405 			/* Nothing to write, flush only */
1406 			log_mutex_exit_all();
1407 			log_write_flush_to_disk_low();
1408 			return;
1409 		}
1410 	}
1411 
1412 	start_offset = log_sys->buf_next_to_write;
1413 	end_offset = log_sys->buf_free;
1414 
1415 	area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE);
1416 	area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE);
1417 
1418 	ut_ad(area_end - area_start > 0);
1419 
1420 	log_block_set_flush_bit(log_sys->buf + area_start, TRUE);
1421 	log_block_set_checkpoint_no(
1422 		log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
1423 		log_sys->next_checkpoint_no);
1424 
1425 	write_lsn = log_sys->lsn;
1426 	write_buf = log_sys->buf;
1427 
1428 	log_buffer_switch();
1429 
1430 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
1431 
1432 	log_group_set_fields(group, log_sys->write_lsn);
1433 
1434 	log_mutex_exit();
1435 
1436 	/* Calculate pad_size if needed. */
1437 	pad_size = 0;
1438 	if (write_ahead_size > OS_FILE_LOG_BLOCK_SIZE) {
1439 		lsn_t	end_offset;
1440 		ulint	end_offset_in_unit;
1441 
1442 		end_offset = log_group_calc_lsn_offset(
1443 			ut_uint64_align_up(write_lsn,
1444 					   OS_FILE_LOG_BLOCK_SIZE),
1445 			group);
1446 		end_offset_in_unit = (ulint) (end_offset % write_ahead_size);
1447 
1448 		if (end_offset_in_unit > 0
1449 		    && (area_end - area_start) > end_offset_in_unit) {
1450 			/* The first block in the unit was initialized
1451 			after the last writing.
1452 			Needs to be written padded data once. */
1453 			pad_size = write_ahead_size - end_offset_in_unit;
1454 
1455 			if (area_end + pad_size > log_sys->buf_size) {
1456 				pad_size = log_sys->buf_size - area_end;
1457 			}
1458 
1459 			::memset(write_buf + area_end, 0, pad_size);
1460 		}
1461 	}
1462 
1463 	/* Do the write to the log files */
1464 	log_group_write_buf(
1465 		group, write_buf + area_start,
1466 		area_end - area_start + pad_size,
1467 #ifdef UNIV_DEBUG
1468 		pad_size,
1469 #endif /* UNIV_DEBUG */
1470 		ut_uint64_align_down(log_sys->write_lsn,
1471 				     OS_FILE_LOG_BLOCK_SIZE),
1472 		start_offset - area_start);
1473 
1474 	srv_stats.log_padded.add(pad_size);
1475 
1476 	log_sys->write_lsn = write_lsn;
1477 
1478 #ifndef _WIN32
1479 	if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC ||
1480 	    srv_unix_file_flush_method != SRV_UNIX_ALL_O_DIRECT) {
1481 		/* O_DSYNC or SRV_UNIX_ALL_O_DIRECT means the OS did not buffer
1482 		the log file at all: so we have also flushed to disk what we
1483 		have written */
1484 		log_sys->flushed_to_disk_lsn = log_sys->write_lsn;
1485 	}
1486 #endif /* !_WIN32 */
1487 
1488 	log_write_mutex_exit();
1489 
1490 	if (flush_to_disk) {
1491 		log_write_flush_to_disk_low();
1492 	}
1493 }
1494 
1495 /** write to the log file up to the last log entry.
1496 @param[in]	sync	whether we want the written log
1497 also to be flushed to disk. */
1498 void
log_buffer_flush_to_disk(bool sync)1499 log_buffer_flush_to_disk(
1500 	bool sync)
1501 {
1502 	ut_ad(!srv_read_only_mode);
1503 	log_write_up_to(log_get_lsn(), sync);
1504 }
1505 
1506 /****************************************************************//**
1507 This functions writes the log buffer to the log file and if 'flush'
1508 is set it forces a flush of the log file as well. This is meant to be
1509 called from background master thread only as it does not wait for
1510 the write (+ possible flush) to finish. */
1511 void
log_buffer_sync_in_background(bool flush)1512 log_buffer_sync_in_background(
1513 /*==========================*/
1514 	bool	flush)	/*!< in: flush the logs to disk */
1515 {
1516 	lsn_t	lsn;
1517 
1518 	log_mutex_enter();
1519 
1520 	lsn = log_sys->lsn;
1521 
1522 	if (flush
1523 	    && log_sys->n_pending_flushes > 0
1524 	    && log_sys->current_flush_lsn >= lsn) {
1525 		/* The write + flush will write enough */
1526 		log_mutex_exit();
1527 		return;
1528 	}
1529 
1530 	log_mutex_exit();
1531 
1532 	log_write_up_to(lsn, flush);
1533 }
1534 
1535 /********************************************************************
1536 
1537 Tries to establish a big enough margin of free space in the log buffer, such
1538 that a new log entry can be catenated without an immediate need for a flush. */
1539 static
1540 void
log_flush_margin(void)1541 log_flush_margin(void)
1542 /*==================*/
1543 {
1544 	log_t*	log	= log_sys;
1545 	lsn_t	lsn	= 0;
1546 
1547 	log_mutex_enter();
1548 
1549 	if (log->buf_free > log->max_buf_free) {
1550 		/* We can write during flush */
1551 		lsn = log->lsn;
1552 	}
1553 
1554 	log_mutex_exit();
1555 
1556 	if (lsn) {
1557 		log_write_up_to(lsn, false);
1558 	}
1559 }
1560 #ifndef UNIV_HOTBACKUP
1561 /** Advances the smallest lsn for which there are unflushed dirty blocks in the
1562 buffer pool.
1563 NOTE: this function may only be called if the calling thread owns no
1564 synchronization objects!
1565 @param[in]	new_oldest	try to advance oldest_modified_lsn at least to
1566 this lsn
1567 @return false if there was a flush batch of the same type running,
1568 which means that we could not start this flush batch */
1569 static
1570 bool
log_preflush_pool_modified_pages(lsn_t new_oldest)1571 log_preflush_pool_modified_pages(
1572 	lsn_t			new_oldest)
1573 {
1574 	bool	success;
1575 
1576 	if (recv_recovery_on) {
1577 		/* If the recovery is running, we must first apply all
1578 		log records to their respective file pages to get the
1579 		right modify lsn values to these pages: otherwise, there
1580 		might be pages on disk which are not yet recovered to the
1581 		current lsn, and even after calling this function, we could
1582 		not know how up-to-date the disk version of the database is,
1583 		and we could not make a new checkpoint on the basis of the
1584 		info on the buffer pool only. */
1585 
1586 		recv_apply_hashed_log_recs(TRUE);
1587 	}
1588 
1589 	if (new_oldest == LSN_MAX
1590 	    || !buf_page_cleaner_is_active
1591 	    || srv_is_being_started) {
1592 
1593 		ulint	n_pages;
1594 
1595 		success = buf_flush_lists(ULINT_MAX, new_oldest, &n_pages);
1596 
1597 		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
1598 
1599 		if (!success) {
1600 			MONITOR_INC(MONITOR_FLUSH_SYNC_WAITS);
1601 		}
1602 
1603 		MONITOR_INC_VALUE_CUMULATIVE(
1604 			MONITOR_FLUSH_SYNC_TOTAL_PAGE,
1605 			MONITOR_FLUSH_SYNC_COUNT,
1606 			MONITOR_FLUSH_SYNC_PAGES,
1607 			n_pages);
1608 	} else {
1609 		/* better to wait for flushed by page cleaner */
1610 
1611 		if (srv_flush_sync) {
1612 			/* wake page cleaner for IO burst */
1613 			buf_flush_request_force(new_oldest);
1614 		}
1615 
1616 		buf_flush_wait_flushed(new_oldest);
1617 
1618 		success = true;
1619 	}
1620 
1621 	return(success);
1622 }
1623 #endif /* !UNIV_HOTBACKUP */
1624 /******************************************************//**
1625 Completes a checkpoint. */
1626 static
1627 void
log_complete_checkpoint(void)1628 log_complete_checkpoint(void)
1629 /*=========================*/
1630 {
1631 	ut_ad(log_mutex_own());
1632 	ut_ad(log_sys->n_pending_checkpoint_writes == 0);
1633 
1634 	log_sys->next_checkpoint_no++;
1635 
1636 	log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn;
1637 	MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
1638 		    log_sys->lsn - log_sys->last_checkpoint_lsn);
1639 
1640 	DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
1641 			      ", flushed to " LSN_PF,
1642 			      log_sys->last_checkpoint_lsn,
1643 			      log_sys->flushed_to_disk_lsn));
1644 
1645 	rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT);
1646 }
1647 
1648 /******************************************************//**
1649 Completes an asynchronous checkpoint info write i/o to a log file. */
1650 static
1651 void
log_io_complete_checkpoint(void)1652 log_io_complete_checkpoint(void)
1653 /*============================*/
1654 {
1655 	MONITOR_DEC(MONITOR_PENDING_CHECKPOINT_WRITE);
1656 
1657 	log_mutex_enter();
1658 
1659 	ut_ad(log_sys->n_pending_checkpoint_writes > 0);
1660 
1661 	if (--log_sys->n_pending_checkpoint_writes == 0) {
1662 		log_complete_checkpoint();
1663 	}
1664 
1665 	log_mutex_exit();
1666 }
1667 
1668 static
1669 void
log_group_checkpoint_0(log_group_t * group)1670 log_group_checkpoint_0(
1671 /*===================*/
1672 	log_group_t*	group)	/*!< in: log group */
1673 {
1674 	ulint		fold;
1675 	byte*		buf;
1676 	lsn_t		lsn_offset;
1677 
1678 	/** Offset of the first checkpoint checksum */
1679 	static const uint CHECKSUM_1 = 288;
1680 	/** Offset of the second checkpoint checksum */
1681 	static const uint CHECKSUM_2 = CHECKSUM_1 + 4;
1682 	/** Most significant bits of the checkpoint offset */
1683 	static const uint OFFSET_HIGH32 = CHECKSUM_2 + 12;
1684 	/** Least significant bits of the checkpoint offset */
1685 	static const uint OFFSET_LOW32 = 16;
1686 	/** Checkpoint offset read by PS 5.5 */
1687 	static const uint ARCHIVED_LSN = 24;
1688 
1689 	buf = group->checkpoint_buf;
1690 
1691 	mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
1692 	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
1693 
1694 	lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn,
1695 					       group);
1696 	mach_write_to_4(buf + OFFSET_LOW32, lsn_offset & 0xFFFFFFFFUL);
1697 	mach_write_to_4(buf + OFFSET_HIGH32, lsn_offset >> 32);
1698 
1699 	mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
1700 
1701 	mach_write_to_8(buf + ARCHIVED_LSN, lsn_offset);
1702 
1703 	fold = ut_fold_binary(buf, CHECKSUM_1);
1704 	mach_write_to_4(buf + CHECKSUM_1, fold);
1705 
1706 	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
1707 			      CHECKSUM_2 - LOG_CHECKPOINT_LSN);
1708 	mach_write_to_4(buf + CHECKSUM_2, fold);
1709 
1710 	MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE);
1711 
1712 	log_sys->n_log_ios++;
1713 
1714 	MONITOR_INC(MONITOR_LOG_IO);
1715 
1716 	if (log_sys->n_pending_checkpoint_writes++ == 0) {
1717 		rw_lock_x_lock_gen(&log_sys->checkpoint_lock,
1718 				   LOG_CHECKPOINT);
1719 	}
1720 
1721 	/* Note: We alternate the physical place of the checkpoint info.
1722 	See the (next_checkpoint_no & 1) below. */
1723 
1724 	/* We send as the last parameter the group machine address
1725 	added with 1, as we want to distinguish between a normal log
1726 	file write and a checkpoint field write */
1727 
1728 	fil_io(IORequestLogWrite, false,
1729 	       page_id_t(group->space_id, 0),
1730 	       univ_page_size,
1731 	       (log_sys->next_checkpoint_no & 1)
1732 	       ? LOG_CHECKPOINT_2 : LOG_CHECKPOINT_1,
1733 	       OS_FILE_LOG_BLOCK_SIZE,
1734 	       buf, (byte*) group + 1);
1735 
1736 	ut_ad(((ulint) group & 0x1UL) == 0);
1737 }
1738 
1739 /******************************************************//**
1740 Writes the checkpoint info to a log group header. */
1741 static
1742 void
log_group_checkpoint(log_group_t * group)1743 log_group_checkpoint(
1744 /*=================*/
1745 	log_group_t*	group)	/*!< in: log group */
1746 {
1747 	lsn_t		lsn_offset;
1748 	byte*		buf;
1749 
1750 	ut_ad(!srv_read_only_mode);
1751 	ut_ad(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE);
1752 	ut_ad(log_mutex_own());
1753 
1754 	DBUG_PRINT("ib_log", ("checkpoint " UINT64PF " at " LSN_PF
1755 			      " written to group " ULINTPF,
1756 			      log_sys->next_checkpoint_no,
1757 			      log_sys->next_checkpoint_lsn,
1758 			      group->id));
1759 
1760 	if (redo_log_version == REDO_LOG_V0) {
1761 		log_group_checkpoint_0(group);
1762 		return;
1763 	}
1764 
1765 	buf = group->checkpoint_buf;
1766 	memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
1767 
1768 	mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no);
1769 	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys->next_checkpoint_lsn);
1770 
1771 	lsn_offset = log_group_calc_lsn_offset(log_sys->next_checkpoint_lsn,
1772 					       group);
1773 	mach_write_to_8(buf + LOG_CHECKPOINT_OFFSET, lsn_offset);
1774 	mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size);
1775 
1776 	log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
1777 
1778 	MONITOR_INC(MONITOR_PENDING_CHECKPOINT_WRITE);
1779 
1780 	log_sys->n_log_ios++;
1781 
1782 	MONITOR_INC(MONITOR_LOG_IO);
1783 
1784 	ut_ad(LOG_CHECKPOINT_1 < univ_page_size.physical());
1785 	ut_ad(LOG_CHECKPOINT_2 < univ_page_size.physical());
1786 
1787 	if (log_sys->n_pending_checkpoint_writes++ == 0) {
1788 		rw_lock_x_lock_gen(&log_sys->checkpoint_lock,
1789 				   LOG_CHECKPOINT);
1790 	}
1791 
1792 	/* Note: We alternate the physical place of the checkpoint info.
1793 	See the (next_checkpoint_no & 1) below. */
1794 
1795 	/* We send as the last parameter the group machine address
1796 	added with 1, as we want to distinguish between a normal log
1797 	file write and a checkpoint field write */
1798 
1799 	fil_io(IORequestLogWrite, false,
1800 	       page_id_t(group->space_id, 0),
1801 	       univ_page_size,
1802 	       (log_sys->next_checkpoint_no & 1)
1803 	       ? LOG_CHECKPOINT_2 : LOG_CHECKPOINT_1,
1804 	       OS_FILE_LOG_BLOCK_SIZE,
1805 	       buf, (byte*) group + 1);
1806 
1807 	ut_ad(((ulint) group & 0x1UL) == 0);
1808 }
1809 
1810 #ifdef UNIV_HOTBACKUP
1811 /******************************************************//**
1812 Writes info to a buffer of a log group when log files are created in
1813 backup restoration. */
1814 void
log_reset_first_header_and_checkpoint(byte * hdr_buf,ib_uint64_t start)1815 log_reset_first_header_and_checkpoint(
1816 /*==================================*/
1817 	byte*		hdr_buf,/*!< in: buffer which will be written to the
1818 				start of the first log file */
1819 	ib_uint64_t	start)	/*!< in: lsn of the start of the first log file;
1820 				we pretend that there is a checkpoint at
1821 				start + LOG_BLOCK_HDR_SIZE */
1822 {
1823 	byte*		buf;
1824 	ib_uint64_t	lsn;
1825 
1826 	mach_write_to_4(hdr_buf + LOG_HEADER_FORMAT,
1827 			LOG_HEADER_FORMAT_CURRENT);
1828 	mach_write_to_8(hdr_buf + LOG_HEADER_START_LSN, start);
1829 
1830 	lsn = start + LOG_BLOCK_HDR_SIZE;
1831 
1832 	/* Write the label of mysqlbackup --restore */
1833 	strcpy((char*)hdr_buf + LOG_HEADER_CREATOR, LOG_HEADER_CREATOR_CURRENT);
1834 	ut_sprintf_timestamp((char*) hdr_buf
1835 			     + (LOG_HEADER_CREATOR
1836 			     + (sizeof LOG_HEADER_CREATOR_CURRENT) - 1));
1837 	buf = hdr_buf + LOG_CHECKPOINT_1;
1838 	memset(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
1839 
1840 	/*mach_write_to_8(buf + LOG_CHECKPOINT_NO, 0);*/
1841 	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
1842 
1843 	mach_write_to_8(buf + LOG_CHECKPOINT_OFFSET,
1844 			LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
1845 	mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
1846 
1847 	log_block_set_checksum(buf, log_block_calc_checksum_crc32(buf));
1848 }
1849 #endif /* UNIV_HOTBACKUP */
1850 
1851 #ifndef UNIV_HOTBACKUP
1852 /** Read a log group header page to log_sys->checkpoint_buf.
1853 @param[in]	group	log group
1854 @param[in]	header	0 or LOG_CHEKCPOINT_1 or LOG_CHECKPOINT2 */
1855 void
log_group_header_read(const log_group_t * group,ulint header)1856 log_group_header_read(
1857 	const log_group_t*	group,
1858 	ulint			header)
1859 {
1860 	ut_ad(log_mutex_own());
1861 
1862 	log_sys->n_log_ios++;
1863 
1864 	MONITOR_INC(MONITOR_LOG_IO);
1865 
1866 	fil_io(IORequestLogRead, true,
1867 	       page_id_t(group->space_id, header / univ_page_size.physical()),
1868 	       univ_page_size, header % univ_page_size.physical(),
1869 	       OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
1870 }
1871 
1872 /** Write checkpoint info to the log header and invoke log_mutex_exit().
1873 @param[in]	sync	whether to wait for the write to complete */
1874 void
log_write_checkpoint_info(bool sync)1875 log_write_checkpoint_info(
1876 	bool	sync)
1877 {
1878 	log_group_t*	group;
1879 
1880 	ut_ad(log_mutex_own());
1881 
1882 	if (!srv_read_only_mode) {
1883 		for (group = UT_LIST_GET_FIRST(log_sys->log_groups);
1884 		     group;
1885 		     group = UT_LIST_GET_NEXT(log_groups, group)) {
1886 
1887 			log_group_checkpoint(group);
1888 		}
1889 	}
1890 
1891 	log_mutex_exit();
1892 
1893 	MONITOR_INC(MONITOR_NUM_CHECKPOINT);
1894 
1895 	if (sync) {
1896 		/* Wait for the checkpoint write to complete */
1897 		rw_lock_s_lock(&log_sys->checkpoint_lock);
1898 		rw_lock_s_unlock(&log_sys->checkpoint_lock);
1899 
1900 		DEBUG_SYNC_C("checkpoint_completed");
1901 
1902 		DBUG_EXECUTE_IF(
1903 			"crash_after_checkpoint",
1904 			DBUG_SUICIDE(););
1905 	}
1906 }
1907 
1908 /** Set extra data to be written to the redo log during checkpoint.
1909 @param[in]	buf	data to be appended on checkpoint, or NULL
1910 @return pointer to previous data to be appended on checkpoint */
1911 mtr_buf_t*
log_append_on_checkpoint(mtr_buf_t * buf)1912 log_append_on_checkpoint(
1913 	mtr_buf_t*	buf)
1914 {
1915 	log_mutex_enter();
1916 	mtr_buf_t*	old = log_sys->append_on_checkpoint;
1917 	log_sys->append_on_checkpoint = buf;
1918 	log_mutex_exit();
1919 	return(old);
1920 }
1921 
1922 /** Make a checkpoint. Note that this function does not flush dirty
1923 blocks from the buffer pool: it only checks what is lsn of the oldest
1924 modification in the pool, and writes information about the lsn in
1925 log files. Use log_make_checkpoint_at() to flush also the pool.
1926 @param[in]	sync		whether to wait for the write to complete
1927 @param[in]	write_always	force a write even if no log
1928 has been generated since the latest checkpoint
1929 @return true if success, false if a checkpoint write was already running */
1930 bool
log_checkpoint(bool sync,bool write_always)1931 log_checkpoint(
1932 	bool	sync,
1933 	bool	write_always)
1934 {
1935 	lsn_t	oldest_lsn;
1936 
1937 	ut_ad(!srv_read_only_mode);
1938 
1939 	if (recv_recovery_is_on()) {
1940 		recv_apply_hashed_log_recs(TRUE);
1941 	}
1942 
1943 #ifndef _WIN32
1944 	switch (srv_unix_file_flush_method) {
1945 	case SRV_UNIX_NOSYNC:
1946 	case SRV_UNIX_ALL_O_DIRECT:
1947 		break;
1948 	case SRV_UNIX_O_DSYNC:
1949 	case SRV_UNIX_FSYNC:
1950 	case SRV_UNIX_LITTLESYNC:
1951 	case SRV_UNIX_O_DIRECT:
1952 	case SRV_UNIX_O_DIRECT_NO_FSYNC:
1953 		fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
1954 	}
1955 #endif /* !_WIN32 */
1956 
1957 	log_mutex_enter();
1958 
1959 	ut_ad(!recv_no_log_write);
1960 	oldest_lsn = log_buf_pool_get_oldest_modification();
1961 
1962 	/* Because log also contains headers and dummy log records,
1963 	log_buf_pool_get_oldest_modification() will return log_sys->lsn
1964 	if the buffer pool contains no dirty buffers.
1965 	We must make sure that the log is flushed up to that lsn.
1966 	If there are dirty buffers in the buffer pool, then our
1967 	write-ahead-logging algorithm ensures that the log has been
1968 	flushed up to oldest_lsn. */
1969 
1970 	ut_ad(oldest_lsn >= log_sys->last_checkpoint_lsn);
1971 	if (!write_always
1972 	    && oldest_lsn
1973 	    <= log_sys->last_checkpoint_lsn + SIZE_OF_MLOG_CHECKPOINT) {
1974 		/* Do nothing, because nothing was logged (other than
1975 		a MLOG_CHECKPOINT marker) since the previous checkpoint. */
1976 		log_mutex_exit();
1977 		return(true);
1978 	}
1979 
1980 	/* Repeat the MLOG_FILE_NAME records after the checkpoint, in
1981 	case some log records between the checkpoint and log_sys->lsn
1982 	need them. Finally, write a MLOG_CHECKPOINT marker. Redo log
1983 	apply expects to see a MLOG_CHECKPOINT after the checkpoint,
1984 	except on clean shutdown, where the log will be empty after
1985 	the checkpoint.
1986 
1987 	It is important that we write out the redo log before any
1988 	further dirty pages are flushed to the tablespace files.  At
1989 	this point, because log_mutex_own(), mtr_commit() in other
1990 	threads will be blocked, and no pages can be added to the
1991 	flush lists. */
1992 	lsn_t		flush_lsn	= oldest_lsn;
1993 	const bool	do_write
1994 		= srv_shutdown_state == SRV_SHUTDOWN_NONE
1995 		|| flush_lsn != log_sys->lsn;
1996 
1997 	if (fil_names_clear(flush_lsn, do_write)) {
1998 		ut_ad(log_sys->lsn >= flush_lsn + SIZE_OF_MLOG_CHECKPOINT);
1999 		flush_lsn = log_sys->lsn;
2000 	}
2001 
2002 	log_mutex_exit();
2003 
2004 	log_write_up_to(flush_lsn, true);
2005 
2006 	DBUG_EXECUTE_IF(
2007 		"using_wa_checkpoint_middle",
2008 		if (write_always) {
2009 			DEBUG_SYNC_C("wa_checkpoint_middle");
2010 
2011 			const my_bool b = TRUE;
2012 			buf_flush_page_cleaner_disabled_debug_update(
2013 				NULL, NULL, NULL, &b);
2014 			dict_stats_disabled_debug_update(
2015 				NULL, NULL, NULL, &b);
2016 			srv_master_thread_disabled_debug_update(
2017 				NULL, NULL, NULL, &b);
2018 		});
2019 
2020 	log_mutex_enter();
2021 
2022 	ut_ad(log_sys->flushed_to_disk_lsn >= flush_lsn);
2023 	ut_ad(flush_lsn >= oldest_lsn);
2024 
2025 	if (log_sys->last_checkpoint_lsn >= oldest_lsn) {
2026 		log_mutex_exit();
2027 		return(true);
2028 	}
2029 
2030 	if (log_sys->n_pending_checkpoint_writes > 0) {
2031 		/* A checkpoint write is running */
2032 		log_mutex_exit();
2033 
2034 		if (sync) {
2035 			/* Wait for the checkpoint write to complete */
2036 			rw_lock_s_lock(&log_sys->checkpoint_lock);
2037 			rw_lock_s_unlock(&log_sys->checkpoint_lock);
2038 		}
2039 
2040 		return(false);
2041 	}
2042 
2043 	log_sys->next_checkpoint_lsn = oldest_lsn;
2044 	log_write_checkpoint_info(sync);
2045 	ut_ad(!log_mutex_own());
2046 
2047 	return(true);
2048 }
2049 
2050 /** Make a checkpoint at or after a specified LSN.
2051 @param[in]	lsn		the log sequence number, or LSN_MAX
2052 for the latest LSN
2053 @param[in]	write_always	force a write even if no log
2054 has been generated since the latest checkpoint */
2055 void
log_make_checkpoint_at(lsn_t lsn,bool write_always)2056 log_make_checkpoint_at(
2057 	lsn_t			lsn,
2058 	bool			write_always)
2059 {
2060 	/* Preflush pages synchronously */
2061 
2062 	while (!log_preflush_pool_modified_pages(lsn)) {
2063 		/* Flush as much as we can */
2064 	}
2065 
2066 	while (!log_checkpoint(true, write_always)) {
2067 		/* Force a checkpoint */
2068 	}
2069 }
2070 
2071 /****************************************************************//**
2072 Tries to establish a big enough margin of free space in the log groups, such
2073 that a new log entry can be catenated without an immediate need for a
2074 checkpoint. NOTE: this function may only be called if the calling thread
2075 owns no synchronization objects! */
2076 static
2077 void
log_checkpoint_margin(void)2078 log_checkpoint_margin(void)
2079 /*=======================*/
2080 {
2081 	log_t*		log		= log_sys;
2082 	lsn_t		age;
2083 	lsn_t		checkpoint_age;
2084 	ib_uint64_t	advance;
2085 	lsn_t		oldest_lsn;
2086 	bool		success;
2087 loop:
2088 	advance = 0;
2089 
2090 	log_mutex_enter();
2091 	ut_ad(!recv_no_log_write);
2092 
2093 	if (!log->check_flush_or_checkpoint) {
2094 		log_mutex_exit();
2095 		return;
2096 	}
2097 
2098 	oldest_lsn = log_buf_pool_get_oldest_modification();
2099 
2100 	age = log->lsn - oldest_lsn;
2101 
2102 	if (age > log->max_modified_age_sync) {
2103 
2104 		/* A flush is urgent: we have to do a synchronous preflush */
2105 		advance = age - log->max_modified_age_sync;
2106 	}
2107 
2108 	checkpoint_age = log->lsn - log->last_checkpoint_lsn;
2109 
2110 	bool	checkpoint_sync;
2111 	bool	do_checkpoint;
2112 
2113 	if (checkpoint_age > log->max_checkpoint_age) {
2114 		/* A checkpoint is urgent: we do it synchronously */
2115 		checkpoint_sync = true;
2116 		do_checkpoint = true;
2117 	} else if (checkpoint_age > log->max_checkpoint_age_async) {
2118 		/* A checkpoint is not urgent: do it asynchronously */
2119 		do_checkpoint = true;
2120 		checkpoint_sync = false;
2121 		log->check_flush_or_checkpoint = false;
2122 	} else {
2123 		do_checkpoint = false;
2124 		checkpoint_sync = false;
2125 		log->check_flush_or_checkpoint = false;
2126 	}
2127 
2128 	log_mutex_exit();
2129 
2130 	if (advance) {
2131 		lsn_t	new_oldest = oldest_lsn + advance;
2132 
2133 		success = log_preflush_pool_modified_pages(new_oldest);
2134 
2135 		/* If the flush succeeded, this thread has done its part
2136 		and can proceed. If it did not succeed, there was another
2137 		thread doing a flush at the same time. */
2138 		if (!success) {
2139 			log_mutex_enter();
2140 
2141 			log->check_flush_or_checkpoint = true;
2142 
2143 			log_mutex_exit();
2144 			goto loop;
2145 		}
2146 	}
2147 
2148 	if (do_checkpoint) {
2149 		log_checkpoint(checkpoint_sync, FALSE);
2150 
2151 		if (checkpoint_sync) {
2152 
2153 			goto loop;
2154 		}
2155 	}
2156 }
2157 
2158 /******************************************************//**
2159 Reads a specified log segment to a buffer. */
2160 void
log_group_read_log_seg(byte * buf,log_group_t * group,lsn_t start_lsn,lsn_t end_lsn)2161 log_group_read_log_seg(
2162 /*===================*/
2163 	byte*		buf,		/*!< in: buffer where to read */
2164 	log_group_t*	group,		/*!< in: log group */
2165 	lsn_t		start_lsn,	/*!< in: read area start */
2166 	lsn_t		end_lsn)	/*!< in: read area end */
2167 {
2168 	ulint	len;
2169 	lsn_t	source_offset;
2170 
2171 	ut_ad(log_mutex_own());
2172 
2173 loop:
2174 	source_offset = log_group_calc_lsn_offset(start_lsn, group);
2175 
2176 	ut_a(end_lsn - start_lsn <= ULINT_MAX);
2177 	len = (ulint) (end_lsn - start_lsn);
2178 
2179 	ut_ad(len != 0);
2180 
2181 	if ((source_offset % group->file_size) + len > group->file_size) {
2182 
2183 		/* If the above condition is true then len (which is ulint)
2184 		is > the expression below, so the typecast is ok */
2185 		len = (ulint) (group->file_size -
2186 			(source_offset % group->file_size));
2187 	}
2188 
2189 	log_sys->n_log_ios++;
2190 
2191 	MONITOR_INC(MONITOR_LOG_IO);
2192 
2193 	ut_a(source_offset / UNIV_PAGE_SIZE <= ULINT_MAX);
2194 
2195 	const ulint	page_no
2196 		= (ulint) (source_offset / univ_page_size.physical());
2197 
2198 	fil_io(IORequestLogRead, true,
2199 	       page_id_t(group->space_id, page_no),
2200 	       univ_page_size,
2201 	       (ulint) (source_offset % univ_page_size.physical()),
2202 	       len, buf, NULL);
2203 
2204 	start_lsn += len;
2205 	buf += len;
2206 
2207 	if (start_lsn != end_lsn) {
2208 
2209 		goto loop;
2210 	}
2211 }
2212 
2213 /**
2214 Checks that there is enough free space in the log to start a new query step.
2215 Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
2216 function may only be called if the calling thread owns no synchronization
2217 objects! */
2218 void
log_check_margins(void)2219 log_check_margins(void)
2220 {
2221 	bool	check;
2222 
2223 	do {
2224 		log_flush_margin();
2225 		log_checkpoint_margin();
2226 		log_mutex_enter();
2227 		ut_ad(!recv_no_log_write);
2228 		check = log_sys->check_flush_or_checkpoint;
2229 		log_mutex_exit();
2230 	} while (check);
2231 }
2232 
2233 /****************************************************************//**
2234 Makes a checkpoint at the latest lsn and writes it to first page of each
2235 data file in the database, so that we know that the file spaces contain
2236 all modifications up to that lsn. This can only be called at database
2237 shutdown. This function also writes all log in log files to the log archive. */
2238 void
logs_empty_and_mark_files_at_shutdown(void)2239 logs_empty_and_mark_files_at_shutdown(void)
2240 /*=======================================*/
2241 {
2242 	lsn_t			lsn;
2243 	ulint			count = 0;
2244 	ulint			total_trx;
2245 	ulint			pending_io;
2246 	enum srv_thread_type	active_thd;
2247 	const char*		thread_name;
2248 
2249 	ib::info() << "Starting shutdown...";
2250 
2251 	while (srv_fast_shutdown == 0 && trx_rollback_or_clean_is_active) {
2252 		/* we should wait until rollback after recovery end
2253 		for slow shutdown */
2254 		os_thread_sleep(100000);
2255 	}
2256 
2257 	/* Wait until the master thread and all other operations are idle: our
2258 	algorithm only works if the server is idle at shutdown */
2259 
2260 	srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
2261 loop:
2262 	os_thread_sleep(100000);
2263 
2264 	count++;
2265 
2266 	/* We need the monitor threads to stop before we proceed with
2267 	a shutdown. */
2268 
2269 	thread_name = srv_any_background_threads_are_active();
2270 
2271 	if (thread_name != NULL) {
2272 		/* Print a message every 60 seconds if we are waiting
2273 		for the monitor thread to exit. Master and worker
2274 		threads check will be done later. */
2275 
2276 		if (srv_print_verbose_log && count > 600) {
2277 			ib::info() << "Waiting for " << thread_name
2278 				<< " to exit";
2279 			count = 0;
2280 		}
2281 
2282 		goto loop;
2283 	}
2284 
2285 	/* Check that there are no longer transactions, except for
2286 	PREPARED ones. We need this wait even for the 'very fast'
2287 	shutdown, because the InnoDB layer may have committed or
2288 	prepared transactions and we don't want to lose them. */
2289 
2290 	total_trx = trx_sys_any_active_transactions();
2291 
2292 	if (total_trx > 0) {
2293 
2294 		if (srv_print_verbose_log && count > 600) {
2295 			ib::info() << "Waiting for " << total_trx << " active"
2296 				<< " transactions to finish";
2297 
2298 			count = 0;
2299 		}
2300 
2301 		goto loop;
2302 	}
2303 
2304 	/* Check that the background threads are suspended */
2305 
2306 	active_thd = srv_get_active_thread_type();
2307 
2308 	if (active_thd != SRV_NONE
2309 		|| (srv_fast_shutdown != 2
2310 			&& trx_rollback_or_clean_is_active)) {
2311 
2312 		if (active_thd == SRV_PURGE) {
2313 			srv_purge_wakeup();
2314 		}
2315 
2316 		/* The srv_lock_timeout_thread, srv_error_monitor_thread
2317 		and srv_monitor_thread should already exit by now. The
2318 		only threads to be suspended are the master threads
2319 		and worker threads (purge threads). Print the thread
2320 		type if any of such threads not in suspended mode */
2321 		if (srv_print_verbose_log && count > 600) {
2322 			const char*	thread_type = "<null>";
2323 
2324 			switch (active_thd) {
2325 			case SRV_NONE:
2326 				thread_type = "rollback";
2327 				break;
2328 			case SRV_WORKER:
2329 				thread_type = "worker threads";
2330 				break;
2331 			case SRV_MASTER:
2332 				thread_type = "master thread";
2333 				break;
2334 			case SRV_PURGE:
2335 				thread_type = "purge thread";
2336 				break;
2337 			}
2338 
2339 			ib::info() << "Waiting for " << thread_type
2340 				<< " to be suspended";
2341 
2342 			count = 0;
2343 		}
2344 
2345 		goto loop;
2346 	}
2347 
2348 	/* At this point only page_cleaner should be active. We wait
2349 	here to let it complete the flushing of the buffer pools
2350 	before proceeding further. */
2351 	srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE;
2352 	count = 0;
2353 	while (buf_page_cleaner_is_active) {
2354 		++count;
2355 		os_thread_sleep(100000);
2356 		if (srv_print_verbose_log && count > 600) {
2357 			ib::info() << "Waiting for page_cleaner to"
2358 				" finish flushing of buffer pool";
2359 			count = 0;
2360 		}
2361 	}
2362 
2363 	log_mutex_enter();
2364 	const ulint	n_write	= log_sys->n_pending_checkpoint_writes;
2365 	const ulint	n_flush	= log_sys->n_pending_flushes;
2366 	log_mutex_exit();
2367 
2368 	if (n_write != 0 || n_flush != 0) {
2369 		if (srv_print_verbose_log && count > 600) {
2370 			ib::info() << "Pending checkpoint_writes: " << n_write
2371 				<< ". Pending log flush writes: " << n_flush;
2372 			count = 0;
2373 		}
2374 		goto loop;
2375 	}
2376 
2377 	pending_io = buf_pool_check_no_pending_io();
2378 
2379 	if (pending_io) {
2380 		if (srv_print_verbose_log && count > 600) {
2381 			ib::info() << "Waiting for " << pending_io << " buffer"
2382 				" page I/Os to complete";
2383 			count = 0;
2384 		}
2385 
2386 		goto loop;
2387 	}
2388 
2389 	if (srv_fast_shutdown == 2) {
2390 		if (!srv_read_only_mode) {
2391 			ib::info() << "MySQL has requested a very fast"
2392 				" shutdown without flushing the InnoDB buffer"
2393 				" pool to data files. At the next mysqld"
2394 				" startup InnoDB will do a crash recovery!";
2395 
2396 			/* In this fastest shutdown we do not flush the
2397 			buffer pool:
2398 
2399 			it is essentially a 'crash' of the InnoDB server.
2400 			Make sure that the log is all flushed to disk, so
2401 			that we can recover all committed transactions in
2402 			a crash recovery. We must not write the lsn stamps
2403 			to the data files, since at a startup InnoDB deduces
2404 			from the stamps if the previous shutdown was clean. */
2405 
2406 			log_buffer_flush_to_disk();
2407 
2408 			/* Check that the background threads stay suspended */
2409 			thread_name = srv_any_background_threads_are_active();
2410 
2411 			if (thread_name != NULL) {
2412 				ib::warn() << "Background thread "
2413 					<< thread_name << " woke up during"
2414 					" shutdown";
2415 				goto loop;
2416 			}
2417 		}
2418 
2419 		srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
2420 
2421 		fil_close_all_files();
2422 
2423 		thread_name = srv_any_background_threads_are_active();
2424 
2425 		ut_a(!thread_name);
2426 
2427 		return;
2428 	}
2429 
2430 	if (!srv_read_only_mode) {
2431 		log_make_checkpoint_at(LSN_MAX, TRUE);
2432 	}
2433 
2434 	log_mutex_enter();
2435 
2436 	lsn = log_sys->lsn;
2437 
2438 	ut_ad(lsn >= log_sys->last_checkpoint_lsn);
2439 
2440 	log_mutex_exit();
2441 
2442 	/** If innodb_force_recovery is set to 6 then log_sys doesn't
2443 	have recent checkpoint information. So last checkpoint lsn
2444 	will never be equal to current lsn. */
2445 	const bool	is_last = ((srv_force_recovery == SRV_FORCE_NO_LOG_REDO
2446 				    && lsn == log_sys->last_checkpoint_lsn
2447 						+ LOG_BLOCK_HDR_SIZE)
2448 				   || lsn == log_sys->last_checkpoint_lsn);
2449 
2450 	if (!is_last) {
2451 		goto loop;
2452 	}
2453 
2454 	/* Check that the background threads stay suspended */
2455 	thread_name = srv_any_background_threads_are_active();
2456 	if (thread_name != NULL) {
2457 		ib::warn() << "Background thread " << thread_name << " woke up"
2458 			" during shutdown";
2459 
2460 		goto loop;
2461 	}
2462 
2463 	if (!srv_read_only_mode) {
2464 		fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
2465 		fil_flush_file_spaces(FIL_TYPE_LOG);
2466 	}
2467 
2468 	/* The call fil_write_flushed_lsn() will bypass the buffer
2469 	pool: therefore it is essential that the buffer pool has been
2470 	completely flushed to disk! (We do not call fil_write... if the
2471 	'very fast' shutdown is enabled.) */
2472 
2473 	if (!buf_all_freed()) {
2474 
2475 		if (srv_print_verbose_log && count > 600) {
2476 			ib::info() << "Waiting for dirty buffer pages to be"
2477 				" flushed";
2478 			count = 0;
2479 		}
2480 
2481 		goto loop;
2482 	}
2483 
2484 	srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
2485 
2486 	/* Make some checks that the server really is quiet */
2487 	srv_thread_type	type = srv_get_active_thread_type();
2488 	ut_a(type == SRV_NONE);
2489 
2490 	bool	freed = buf_all_freed();
2491 	ut_a(freed);
2492 
2493 	ut_a(lsn == log_sys->lsn);
2494 
2495 	if (lsn < srv_start_lsn) {
2496 		ib::error() << "Log sequence number at shutdown " << lsn
2497 			<< " is lower than at startup " << srv_start_lsn
2498 			<< "!";
2499 	}
2500 
2501 	srv_shutdown_lsn = lsn;
2502 
2503 	if (!srv_read_only_mode) {
2504 		fil_write_flushed_lsn_to_data_files(lsn);
2505 	}
2506 
2507 	fil_close_all_files();
2508 
2509 	/* Make some checks that the server really is quiet */
2510 	type = srv_get_active_thread_type();
2511 	ut_a(type == SRV_NONE);
2512 
2513 	freed = buf_all_freed();
2514 	ut_a(freed);
2515 
2516 	ut_a(lsn == log_sys->lsn);
2517 }
2518 
2519 /******************************************************//**
2520 Peeks the current lsn.
2521 @return TRUE if success, FALSE if could not get the log system mutex */
2522 ibool
log_peek_lsn(lsn_t * lsn)2523 log_peek_lsn(
2524 /*=========*/
2525 	lsn_t*	lsn)	/*!< out: if returns TRUE, current lsn is here */
2526 {
2527 	if (0 == mutex_enter_nowait(&(log_sys->mutex))) {
2528 		*lsn = log_sys->lsn;
2529 
2530 		log_mutex_exit();
2531 
2532 		return(TRUE);
2533 	}
2534 
2535 	return(FALSE);
2536 }
2537 
2538 /******************************************************//**
2539 Prints info of the log. */
2540 void
log_print(FILE * file)2541 log_print(
2542 /*======*/
2543 	FILE*	file)	/*!< in: file where to print */
2544 {
2545 	double	time_elapsed;
2546 	time_t	current_time;
2547 
2548 	log_mutex_enter();
2549 
2550 	fprintf(file,
2551 		"Log sequence number " LSN_PF "\n"
2552 		"Log flushed up to   " LSN_PF "\n"
2553 		"Pages flushed up to " LSN_PF "\n"
2554 		"Last checkpoint at  " LSN_PF "\n",
2555 		log_sys->lsn,
2556 		log_sys->flushed_to_disk_lsn,
2557 		log_buf_pool_get_oldest_modification(),
2558 		log_sys->last_checkpoint_lsn);
2559 
2560 	current_time = time(NULL);
2561 
2562 	time_elapsed = difftime(current_time,
2563 				log_sys->last_printout_time);
2564 
2565 	if (time_elapsed <= 0) {
2566 		time_elapsed = 1;
2567 	}
2568 
2569 	fprintf(file,
2570 		ULINTPF " pending log flushes, "
2571 		ULINTPF " pending chkp writes\n"
2572 		ULINTPF " log i/o's done, %.2f log i/o's/second\n",
2573 		log_sys->n_pending_flushes,
2574 		log_sys->n_pending_checkpoint_writes,
2575 		log_sys->n_log_ios,
2576 		static_cast<double>(
2577 			log_sys->n_log_ios - log_sys->n_log_ios_old)
2578 		/ time_elapsed);
2579 
2580 	log_sys->n_log_ios_old = log_sys->n_log_ios;
2581 	log_sys->last_printout_time = current_time;
2582 
2583 	log_mutex_exit();
2584 }
2585 
2586 /**********************************************************************//**
2587 Refreshes the statistics used to print per-second averages. */
2588 void
log_refresh_stats(void)2589 log_refresh_stats(void)
2590 /*===================*/
2591 {
2592 	log_sys->n_log_ios_old = log_sys->n_log_ios;
2593 	log_sys->last_printout_time = time(NULL);
2594 }
2595 
2596 /********************************************************//**
2597 Closes a log group. */
2598 static
2599 void
log_group_close(log_group_t * group)2600 log_group_close(
2601 /*===========*/
2602 	log_group_t*	group)		/* in,own: log group to close */
2603 {
2604 	ulint	i;
2605 
2606 	for (i = 0; i < group->n_files; i++) {
2607 		ut_free(group->file_header_bufs_ptr[i]);
2608 	}
2609 
2610 	ut_free(group->file_header_bufs_ptr);
2611 	ut_free(group->file_header_bufs);
2612 	ut_free(group->checkpoint_buf_ptr);
2613 	ut_free(group);
2614 }
2615 
2616 /********************************************************//**
2617 Closes all log groups. */
2618 void
log_group_close_all(void)2619 log_group_close_all(void)
2620 /*=====================*/
2621 {
2622 	log_group_t*	group;
2623 
2624 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
2625 
2626 	while (UT_LIST_GET_LEN(log_sys->log_groups) > 0) {
2627 		log_group_t*	prev_group = group;
2628 
2629 		group = UT_LIST_GET_NEXT(log_groups, group);
2630 
2631 		UT_LIST_REMOVE(log_sys->log_groups, prev_group);
2632 
2633 		log_group_close(prev_group);
2634 	}
2635 }
2636 
2637 /********************************************************//**
2638 Shutdown the log system but do not release all the memory. */
2639 void
log_shutdown(void)2640 log_shutdown(void)
2641 /*==============*/
2642 {
2643 	log_group_close_all();
2644 
2645 	ut_free(log_sys->buf_ptr);
2646 	log_sys->buf_ptr = NULL;
2647 	log_sys->buf = NULL;
2648 	ut_free(log_sys->checkpoint_buf_ptr);
2649 	log_sys->checkpoint_buf_ptr = NULL;
2650 	log_sys->checkpoint_buf = NULL;
2651 
2652 	os_event_destroy(log_sys->flush_event);
2653 
2654 	rw_lock_free(&log_sys->checkpoint_lock);
2655 
2656 	mutex_free(&log_sys->mutex);
2657 	mutex_free(&log_sys->write_mutex);
2658 	mutex_free(&log_sys->log_flush_order_mutex);
2659 
2660 	recv_sys_close();
2661 }
2662 
2663 /********************************************************//**
2664 Free the log system data structures. */
2665 void
log_mem_free(void)2666 log_mem_free(void)
2667 /*==============*/
2668 {
2669 	if (log_sys != NULL) {
2670 		recv_sys_mem_free();
2671 		ut_free(log_sys);
2672 
2673 		log_sys = NULL;
2674 	}
2675 }
2676 #endif /* !UNIV_HOTBACKUP */
2677