1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2009, Google Inc.
5 Copyright (c) 2014, 2021, MariaDB Corporation.
6 
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12 
13 This program is free software; you can redistribute it and/or modify it under
14 the terms of the GNU General Public License as published by the Free Software
15 Foundation; version 2 of the License.
16 
17 This program is distributed in the hope that it will be useful, but WITHOUT
18 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file log/log0log.cc
29 Database log
30 
31 Created 12/9/1995 Heikki Tuuri
32 *******************************************************/
33 
34 #include "univ.i"
35 #include <debug_sync.h>
36 #include <my_service_manager.h>
37 
38 #include "log0log.h"
39 #include "log0crypt.h"
40 #include "buf0buf.h"
41 #include "buf0flu.h"
42 #include "lock0lock.h"
43 #include "log0recv.h"
44 #include "fil0fil.h"
45 #include "dict0stats_bg.h"
46 #include "btr0defragment.h"
47 #include "srv0srv.h"
48 #include "srv0start.h"
49 #include "trx0sys.h"
50 #include "trx0trx.h"
51 #include "trx0roll.h"
52 #include "srv0mon.h"
53 #include "sync0sync.h"
54 #include "buf0dump.h"
55 #include "log0sync.h"
56 
57 /*
58 General philosophy of InnoDB redo-logs:
59 
60 Every change to a contents of a data page must be done
61 through mtr_t, and mtr_t::commit() will write log records
62 to the InnoDB redo log. */
63 
64 /** Redo log system */
65 log_t	log_sys;
66 
67 /* A margin for free space in the log buffer before a log entry is catenated */
68 #define LOG_BUF_WRITE_MARGIN	(4 * OS_FILE_LOG_BLOCK_SIZE)
69 
70 /* Margins for free space in the log buffer after a log entry is catenated */
71 #define LOG_BUF_FLUSH_RATIO	2
72 #define LOG_BUF_FLUSH_MARGIN	(LOG_BUF_WRITE_MARGIN		\
73 				 + (4U << srv_page_size_shift))
74 
75 /** Extends the log buffer.
76 @param[in]	len	requested minimum size in bytes */
log_buffer_extend(ulong len)77 void log_buffer_extend(ulong len)
78 {
79 	const size_t new_buf_size = ut_calc_align(len, srv_page_size);
80 	byte* new_buf = static_cast<byte*>
81 		(ut_malloc_dontdump(new_buf_size, PSI_INSTRUMENT_ME));
82 	byte* new_flush_buf = static_cast<byte*>
83 		(ut_malloc_dontdump(new_buf_size, PSI_INSTRUMENT_ME));
84 
85 	mysql_mutex_lock(&log_sys.mutex);
86 
87 	if (len <= srv_log_buffer_size) {
88 		/* Already extended enough by the others */
89 		mysql_mutex_unlock(&log_sys.mutex);
90 		ut_free_dodump(new_buf, new_buf_size);
91 		ut_free_dodump(new_flush_buf, new_buf_size);
92 		return;
93 	}
94 
95 	ib::warn() << "The redo log transaction size " << len <<
96 		" exceeds innodb_log_buffer_size="
97 		<< srv_log_buffer_size << " / 2). Trying to extend it.";
98 
99 	byte* old_buf = log_sys.buf;
100 	byte* old_flush_buf = log_sys.flush_buf;
101 	const ulong old_buf_size = srv_log_buffer_size;
102 	srv_log_buffer_size = static_cast<ulong>(new_buf_size);
103 	log_sys.buf = new_buf;
104 	log_sys.flush_buf = new_flush_buf;
105 	memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(new_buf, old_buf,
106 					       log_sys.buf_free);
107 
108 	log_sys.max_buf_free = new_buf_size / LOG_BUF_FLUSH_RATIO
109 		- LOG_BUF_FLUSH_MARGIN;
110 
111 	mysql_mutex_unlock(&log_sys.mutex);
112 
113 	ut_free_dodump(old_buf, old_buf_size);
114 	ut_free_dodump(old_flush_buf, old_buf_size);
115 
116 	ib::info() << "innodb_log_buffer_size was extended to "
117 		<< new_buf_size << ".";
118 }
119 
120 /** Calculate the recommended highest values for lsn - last_checkpoint_lsn
121 and lsn - buf_pool.get_oldest_modification().
122 @param[in]	file_size	requested innodb_log_file_size
123 @retval true on success
124 @retval false if the smallest log group is too small to
125 accommodate the number of OS threads in the database server */
126 bool
log_set_capacity(ulonglong file_size)127 log_set_capacity(ulonglong file_size)
128 {
129 	/* Margin for the free space in the smallest log, before a new query
130 	step which modifies the database, is started */
131 	const size_t LOG_CHECKPOINT_FREE_PER_THREAD = 4U
132 						      << srv_page_size_shift;
133 	const size_t LOG_CHECKPOINT_EXTRA_FREE = 8U << srv_page_size_shift;
134 
135 	lsn_t		margin;
136 	ulint		free;
137 
138 	lsn_t smallest_capacity = file_size - LOG_FILE_HDR_SIZE;
139 	/* Add extra safety */
140 	smallest_capacity -= smallest_capacity / 10;
141 
142 	/* For each OS thread we must reserve so much free space in the
143 	smallest log group that it can accommodate the log entries produced
144 	by single query steps: running out of free log space is a serious
145 	system error which requires rebooting the database. */
146 
147 	free = LOG_CHECKPOINT_FREE_PER_THREAD * 10
148 		+ LOG_CHECKPOINT_EXTRA_FREE;
149 	if (free >= smallest_capacity / 2) {
150 		ib::error() << "Cannot continue operation because log file is "
151 			       "too small. Increase innodb_log_file_size "
152 			       "or decrease innodb_thread_concurrency. "
153 			    << INNODB_PARAMETERS_MSG;
154 		return false;
155 	}
156 
157 	margin = smallest_capacity - free;
158 	margin = margin - margin / 10;	/* Add still some extra safety */
159 
160 	mysql_mutex_lock(&log_sys.mutex);
161 
162 	log_sys.log_capacity = smallest_capacity;
163 
164 	log_sys.max_modified_age_async = margin - margin / 8;
165 	log_sys.max_checkpoint_age = margin;
166 
167 	mysql_mutex_unlock(&log_sys.mutex);
168 
169 	return(true);
170 }
171 
172 /** Initialize the redo log subsystem. */
create()173 void log_t::create()
174 {
175   ut_ad(this == &log_sys);
176   ut_ad(!is_initialised());
177   m_initialised= true;
178 
179   mysql_mutex_init(log_sys_mutex_key, &mutex, nullptr);
180   mysql_mutex_init(log_flush_order_mutex_key, &flush_order_mutex, nullptr);
181 
182   /* Start the lsn from one log block from zero: this way every
183   log record has a non-zero start lsn, a fact which we will use */
184 
185   set_lsn(LOG_START_LSN + LOG_BLOCK_HDR_SIZE);
186   set_flushed_lsn(LOG_START_LSN + LOG_BLOCK_HDR_SIZE);
187 
188   ut_ad(srv_log_buffer_size >= 16 * OS_FILE_LOG_BLOCK_SIZE);
189   ut_ad(srv_log_buffer_size >= 4U << srv_page_size_shift);
190 
191   buf= static_cast<byte*>(ut_malloc_dontdump(srv_log_buffer_size,
192                                              PSI_INSTRUMENT_ME));
193   TRASH_ALLOC(buf, srv_log_buffer_size);
194   flush_buf= static_cast<byte*>(ut_malloc_dontdump(srv_log_buffer_size,
195                                                    PSI_INSTRUMENT_ME));
196   TRASH_ALLOC(flush_buf, srv_log_buffer_size);
197 
198   max_buf_free= srv_log_buffer_size / LOG_BUF_FLUSH_RATIO -
199     LOG_BUF_FLUSH_MARGIN;
200   set_check_flush_or_checkpoint();
201 
202   n_log_ios_old= n_log_ios;
203   last_printout_time= time(NULL);
204 
205   buf_next_to_write= 0;
206   last_checkpoint_lsn= write_lsn= LOG_START_LSN;
207   n_log_ios= 0;
208   n_log_ios_old= 0;
209   log_capacity= 0;
210   max_modified_age_async= 0;
211   max_checkpoint_age= 0;
212   next_checkpoint_no= 0;
213   next_checkpoint_lsn= 0;
214   n_pending_checkpoint_writes= 0;
215 
216   log_block_init(buf, LOG_START_LSN);
217   log_block_set_first_rec_group(buf, LOG_BLOCK_HDR_SIZE);
218 
219   buf_free= LOG_BLOCK_HDR_SIZE;
220   checkpoint_buf= static_cast<byte*>
221     (aligned_malloc(OS_FILE_LOG_BLOCK_SIZE, OS_FILE_LOG_BLOCK_SIZE));
222 }
223 
file_os_io(file_os_io && rhs)224 file_os_io::file_os_io(file_os_io &&rhs) : m_fd(rhs.m_fd)
225 {
226   rhs.m_fd= OS_FILE_CLOSED;
227 }
228 
operator =(file_os_io && rhs)229 file_os_io &file_os_io::operator=(file_os_io &&rhs)
230 {
231   std::swap(m_fd, rhs.m_fd);
232   return *this;
233 }
234 
~file_os_io()235 file_os_io::~file_os_io() noexcept
236 {
237   if (is_opened())
238     close();
239 }
240 
open(const char * path,bool read_only)241 dberr_t file_os_io::open(const char *path, bool read_only) noexcept
242 {
243   ut_ad(!is_opened());
244 
245   bool success;
246   auto tmp_fd= os_file_create(
247       innodb_log_file_key, path, OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
248       OS_FILE_NORMAL, OS_LOG_FILE, read_only, &success);
249   if (!success)
250     return DB_ERROR;
251 
252   m_durable_writes= srv_file_flush_method == SRV_O_DSYNC;
253   m_fd= tmp_fd;
254   return success ? DB_SUCCESS : DB_ERROR;
255 }
256 
rename(const char * old_path,const char * new_path)257 dberr_t file_os_io::rename(const char *old_path, const char *new_path) noexcept
258 {
259   return os_file_rename(innodb_log_file_key, old_path, new_path) ? DB_SUCCESS
260                                                                  : DB_ERROR;
261 }
262 
close()263 dberr_t file_os_io::close() noexcept
264 {
265   if (!os_file_close(m_fd))
266     return DB_ERROR;
267 
268   m_fd= OS_FILE_CLOSED;
269   return DB_SUCCESS;
270 }
271 
read(os_offset_t offset,span<byte> buf)272 dberr_t file_os_io::read(os_offset_t offset, span<byte> buf) noexcept
273 {
274   return os_file_read(IORequestRead, m_fd, buf.data(), offset, buf.size());
275 }
276 
write(const char * path,os_offset_t offset,span<const byte> buf)277 dberr_t file_os_io::write(const char *path, os_offset_t offset,
278                           span<const byte> buf) noexcept
279 {
280   return os_file_write(IORequestWrite, path, m_fd, buf.data(), offset,
281                        buf.size());
282 }
283 
flush()284 dberr_t file_os_io::flush() noexcept
285 {
286   return os_file_flush(m_fd) ? DB_SUCCESS : DB_ERROR;
287 }
288 
289 #ifdef HAVE_PMEM
290 
291 #include <libpmem.h>
292 
293 /** Memory mapped file */
294 class mapped_file_t
295 {
296 public:
297   mapped_file_t()= default;
298   mapped_file_t(const mapped_file_t &)= delete;
299   mapped_file_t &operator=(const mapped_file_t &)= delete;
300   mapped_file_t(mapped_file_t &&)= delete;
301   mapped_file_t &operator=(mapped_file_t &&)= delete;
302   ~mapped_file_t() noexcept;
303 
304   dberr_t map(const char *path, bool read_only= false,
305               bool nvme= false) noexcept;
306   dberr_t unmap() noexcept;
data()307   byte *data() noexcept { return m_area.data(); }
308 
309 private:
310   span<byte> m_area;
311 };
312 
~mapped_file_t()313 mapped_file_t::~mapped_file_t() noexcept
314 {
315   if (!m_area.empty())
316     unmap();
317 }
318 
map(const char * path,bool read_only,bool nvme)319 dberr_t mapped_file_t::map(const char *path, bool read_only,
320                            bool nvme) noexcept
321 {
322   auto fd= mysql_file_open(innodb_log_file_key, path,
323                            read_only ? O_RDONLY : O_RDWR, MYF(MY_WME));
324   if (fd == -1)
325     return DB_ERROR;
326 
327   const auto file_size= size_t{os_file_get_size(path).m_total_size};
328 
329   const int nvme_flag= nvme ? MAP_SYNC : 0;
330   void *ptr=
331       my_mmap(0, file_size, read_only ? PROT_READ : PROT_READ | PROT_WRITE,
332               MAP_SHARED_VALIDATE | nvme_flag, fd, 0);
333   mysql_file_close(fd, MYF(MY_WME));
334 
335   if (ptr == MAP_FAILED)
336     return DB_ERROR;
337 
338   m_area= {static_cast<byte *>(ptr), file_size};
339   return DB_SUCCESS;
340 }
341 
unmap()342 dberr_t mapped_file_t::unmap() noexcept
343 {
344   ut_ad(!m_area.empty());
345 
346   if (my_munmap(m_area.data(), m_area.size()))
347     return DB_ERROR;
348 
349   m_area= {};
350   return DB_SUCCESS;
351 }
352 
is_pmem(const char * path)353 static bool is_pmem(const char *path) noexcept
354 {
355   mapped_file_t mf;
356   return mf.map(path, true, true) == DB_SUCCESS ? true : false;
357 }
358 
359 class file_pmem_io final : public file_io
360 {
361 public:
file_pmem_io()362   file_pmem_io() noexcept : file_io(true) {}
363 
open(const char * path,bool read_only)364   dberr_t open(const char *path, bool read_only) noexcept final
365   {
366     return m_file.map(path, read_only, true);
367   }
rename(const char * old_path,const char * new_path)368   dberr_t rename(const char *old_path, const char *new_path) noexcept final
369   {
370     return os_file_rename(innodb_log_file_key, old_path, new_path) ? DB_SUCCESS
371                                                                    : DB_ERROR;
372   }
close()373   dberr_t close() noexcept final { return m_file.unmap(); }
read(os_offset_t offset,span<byte> buf)374   dberr_t read(os_offset_t offset, span<byte> buf) noexcept final
375   {
376     memcpy(buf.data(), m_file.data() + offset, buf.size());
377     return DB_SUCCESS;
378   }
write(const char *,os_offset_t offset,span<const byte> buf)379   dberr_t write(const char *, os_offset_t offset,
380                 span<const byte> buf) noexcept final
381   {
382     pmem_memcpy_persist(m_file.data() + offset, buf.data(), buf.size());
383     return DB_SUCCESS;
384   }
flush()385   dberr_t flush() noexcept final
386   {
387     ut_ad(0);
388     return DB_SUCCESS;
389   }
390 
391 private:
392   mapped_file_t m_file;
393 };
394 #endif
395 
open(bool read_only)396 dberr_t log_file_t::open(bool read_only) noexcept
397 {
398   ut_a(!is_opened());
399 
400 #ifdef HAVE_PMEM
401   auto ptr= is_pmem(m_path.c_str())
402                 ? std::unique_ptr<file_io>(new file_pmem_io)
403                 : std::unique_ptr<file_io>(new file_os_io);
404 #else
405   auto ptr= std::unique_ptr<file_io>(new file_os_io);
406 #endif
407 
408   if (dberr_t err= ptr->open(m_path.c_str(), read_only))
409     return err;
410 
411   m_file= std::move(ptr);
412   return DB_SUCCESS;
413 }
414 
is_opened() const415 bool log_file_t::is_opened() const noexcept
416 {
417   return static_cast<bool>(m_file);
418 }
419 
rename(std::string new_path)420 dberr_t log_file_t::rename(std::string new_path) noexcept
421 {
422   if (dberr_t err= m_file->rename(m_path.c_str(), new_path.c_str()))
423     return err;
424 
425   m_path = std::move(new_path);
426   return DB_SUCCESS;
427 }
428 
close()429 dberr_t log_file_t::close() noexcept
430 {
431   ut_a(is_opened());
432 
433   if (dberr_t err= m_file->close())
434     return err;
435 
436   m_file.reset();
437   return DB_SUCCESS;
438 }
439 
read(os_offset_t offset,span<byte> buf)440 dberr_t log_file_t::read(os_offset_t offset, span<byte> buf) noexcept
441 {
442   ut_ad(is_opened());
443   return m_file->read(offset, buf);
444 }
445 
writes_are_durable() const446 bool log_file_t::writes_are_durable() const noexcept
447 {
448   return m_file->writes_are_durable();
449 }
450 
write(os_offset_t offset,span<const byte> buf)451 dberr_t log_file_t::write(os_offset_t offset, span<const byte> buf) noexcept
452 {
453   ut_ad(is_opened());
454   return m_file->write(m_path.c_str(), offset, buf);
455 }
456 
flush()457 dberr_t log_file_t::flush() noexcept
458 {
459   ut_ad(is_opened());
460   return m_file->flush();
461 }
462 
open_file(std::string path)463 void log_t::file::open_file(std::string path)
464 {
465   fd= log_file_t(std::move(path));
466   if (const dberr_t err= fd.open(srv_read_only_mode))
467     ib::fatal() << "open(" << fd.get_path() << ") returned " << err;
468 }
469 
470 /** Update the log block checksum. */
log_block_store_checksum(byte * block)471 static void log_block_store_checksum(byte* block)
472 {
473   log_block_set_checksum(block, log_block_calc_checksum_crc32(block));
474 }
475 
write_header_durable(lsn_t lsn)476 void log_t::file::write_header_durable(lsn_t lsn)
477 {
478   ut_ad(lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
479   ut_ad(!recv_no_log_write);
480   ut_ad(log_sys.log.format == log_t::FORMAT_10_5 ||
481         log_sys.log.format == log_t::FORMAT_ENC_10_5);
482 
483   byte *buf= log_sys.checkpoint_buf;
484   memset_aligned<OS_FILE_LOG_BLOCK_SIZE>(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
485 
486   mach_write_to_4(buf + LOG_HEADER_FORMAT, log_sys.log.format);
487   mach_write_to_4(buf + LOG_HEADER_SUBFORMAT, log_sys.log.subformat);
488   mach_write_to_8(buf + LOG_HEADER_START_LSN, lsn);
489   strcpy(reinterpret_cast<char*>(buf) + LOG_HEADER_CREATOR,
490          LOG_HEADER_CREATOR_CURRENT);
491   ut_ad(LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR >=
492         sizeof LOG_HEADER_CREATOR_CURRENT);
493   log_block_store_checksum(buf);
494 
495   DBUG_PRINT("ib_log", ("write " LSN_PF, lsn));
496 
497   log_sys.log.write(0, {buf, OS_FILE_LOG_BLOCK_SIZE});
498   if (!log_sys.log.writes_are_durable())
499     log_sys.log.flush();
500 }
501 
read(os_offset_t offset,span<byte> buf)502 void log_t::file::read(os_offset_t offset, span<byte> buf)
503 {
504   if (const dberr_t err= fd.read(offset, buf))
505     ib::fatal() << "read(" << fd.get_path() << ") returned "<< err;
506 }
507 
writes_are_durable() const508 bool log_t::file::writes_are_durable() const noexcept
509 {
510   return fd.writes_are_durable();
511 }
512 
write(os_offset_t offset,span<byte> buf)513 void log_t::file::write(os_offset_t offset, span<byte> buf)
514 {
515   srv_stats.os_log_pending_writes.inc();
516   if (const dberr_t err= fd.write(offset, buf))
517     ib::fatal() << "write(" << fd.get_path() << ") returned " << err;
518   srv_stats.os_log_pending_writes.dec();
519   srv_stats.os_log_written.add(buf.size());
520   srv_stats.log_writes.inc();
521   log_sys.n_log_ios++;
522 }
523 
flush()524 void log_t::file::flush()
525 {
526   log_sys.pending_flushes.fetch_add(1, std::memory_order_acquire);
527   if (const dberr_t err= fd.flush())
528     ib::fatal() << "flush(" << fd.get_path() << ") returned " << err;
529   log_sys.pending_flushes.fetch_sub(1, std::memory_order_release);
530   log_sys.flushes.fetch_add(1, std::memory_order_release);
531 }
532 
close_file()533 void log_t::file::close_file()
534 {
535   if (fd.is_opened())
536   {
537     if (const dberr_t err= fd.close())
538       ib::fatal() << "close(" << fd.get_path() << ") returned " << err;
539   }
540   fd.free();                                    // Free path
541 }
542 
543 /** Initialize the redo log. */
create()544 void log_t::file::create()
545 {
546   ut_ad(this == &log_sys.log);
547   ut_ad(log_sys.is_initialised());
548 
549   format= srv_encrypt_log ? log_t::FORMAT_ENC_10_5 : log_t::FORMAT_10_5;
550   subformat= 2;
551   file_size= srv_log_file_size;
552   lsn= LOG_START_LSN;
553   lsn_offset= LOG_FILE_HDR_SIZE;
554 }
555 
556 /******************************************************//**
557 Writes a buffer to a log file. */
558 static
559 void
log_write_buf(byte * buf,ulint len,ulint pad_len,lsn_t start_lsn,ulint new_data_offset)560 log_write_buf(
561 	byte*		buf,		/*!< in: buffer */
562 	ulint		len,		/*!< in: buffer len; must be divisible
563 					by OS_FILE_LOG_BLOCK_SIZE */
564 #ifdef UNIV_DEBUG
565 	ulint		pad_len,	/*!< in: pad len in the buffer len */
566 #endif /* UNIV_DEBUG */
567 	lsn_t		start_lsn,	/*!< in: start lsn of the buffer; must
568 					be divisible by
569 					OS_FILE_LOG_BLOCK_SIZE */
570 	ulint		new_data_offset)/*!< in: start offset of new data in
571 					buf: this parameter is used to decide
572 					if we have to write a new log file
573 					header */
574 {
575 	ulint		write_len;
576 	lsn_t		next_offset;
577 	ulint		i;
578 
579 	ut_ad(log_write_lock_own());
580 	ut_ad(!recv_no_log_write);
581 	ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
582 	ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
583 
584 loop:
585 	if (len == 0) {
586 
587 		return;
588 	}
589 
590 	next_offset = log_sys.log.calc_lsn_offset(start_lsn);
591 
592 	if ((next_offset % log_sys.log.file_size) + len
593 	    > log_sys.log.file_size) {
594 		/* if the above condition holds, then the below expression
595 		is < len which is ulint, so the typecast is ok */
596 		write_len = ulint(log_sys.log.file_size
597 				  - (next_offset % log_sys.log.file_size));
598 	} else {
599 		write_len = len;
600 	}
601 
602 	DBUG_PRINT("ib_log",
603 		   ("write " LSN_PF " to " LSN_PF
604 		    ": len " ULINTPF
605 		    " blocks " ULINTPF ".." ULINTPF,
606 		    start_lsn, next_offset,
607 		    write_len,
608 		    log_block_get_hdr_no(buf),
609 		    log_block_get_hdr_no(
610 			    buf + write_len
611 			    - OS_FILE_LOG_BLOCK_SIZE)));
612 
613 	ut_ad(pad_len >= len
614 	      || log_block_get_hdr_no(buf)
615 		 == log_block_convert_lsn_to_no(start_lsn));
616 
617 	/* Calculate the checksums for each log block and write them to
618 	the trailer fields of the log blocks */
619 
620 	for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
621 #ifdef UNIV_DEBUG
622 		ulint hdr_no_2 = log_block_get_hdr_no(buf) + i;
623 		DBUG_EXECUTE_IF("innodb_small_log_block_no_limit",
624 				hdr_no_2 = ((hdr_no_2 - 1) & 0xFUL) + 1;);
625 #endif
626 		ut_ad(pad_len >= len
627 			|| i * OS_FILE_LOG_BLOCK_SIZE >= len - pad_len
628 			|| log_block_get_hdr_no(buf + i * OS_FILE_LOG_BLOCK_SIZE) == hdr_no_2);
629 		log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
630 	}
631 
632 	log_sys.log.write(next_offset, {buf, write_len});
633 
634 	if (write_len < len) {
635 		start_lsn += write_len;
636 		len -= write_len;
637 		buf += write_len;
638 		goto loop;
639 	}
640 }
641 
642 /** Flush the recently written changes to the log file.*/
log_write_flush_to_disk_low(lsn_t lsn)643 static void log_write_flush_to_disk_low(lsn_t lsn)
644 {
645   if (!log_sys.log.writes_are_durable())
646     log_sys.log.flush();
647   ut_a(lsn >= log_sys.get_flushed_lsn());
648   log_sys.set_flushed_lsn(lsn);
649 }
650 
651 /** Swap log buffers, and copy the content of last block
652 from old buf to the head of the new buf. Thus, buf_free and
653 buf_next_to_write would be changed accordingly */
654 static inline
655 void
log_buffer_switch()656 log_buffer_switch()
657 {
658 	mysql_mutex_assert_owner(&log_sys.mutex);
659 	ut_ad(log_write_lock_own());
660 
661 	size_t		area_end = ut_calc_align<size_t>(
662 		log_sys.buf_free, OS_FILE_LOG_BLOCK_SIZE);
663 
664 	/* Copy the last block to new buf */
665 	memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(
666 		log_sys.flush_buf,
667 		log_sys.buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
668 		OS_FILE_LOG_BLOCK_SIZE);
669 
670 	std::swap(log_sys.buf, log_sys.flush_buf);
671 
672 	log_sys.buf_free %= OS_FILE_LOG_BLOCK_SIZE;
673 	log_sys.buf_next_to_write = log_sys.buf_free;
674 }
675 
676 /** Invoke commit_checkpoint_notify_ha() to notify that outstanding
677 log writes have been completed. */
678 void log_flush_notify(lsn_t flush_lsn);
679 
680 /**
681 Writes log buffer to disk
682 which is the "write" part of log_write_up_to().
683 
684 This function does not flush anything.
685 
686 Note : the caller must have log_sys.mutex locked, and this
687 mutex is released in the function.
688 
689 */
log_write(bool rotate_key)690 static void log_write(bool rotate_key)
691 {
692 	mysql_mutex_assert_owner(&log_sys.mutex);
693 	ut_ad(!recv_no_log_write);
694 	lsn_t write_lsn;
695 	if (log_sys.buf_free == log_sys.buf_next_to_write) {
696 		/* Nothing to write */
697 		mysql_mutex_unlock(&log_sys.mutex);
698 		return;
699 	}
700 
701 	ulint		start_offset;
702 	ulint		end_offset;
703 	ulint		area_start;
704 	ulint		area_end;
705 	ulong		write_ahead_size = srv_log_write_ahead_size;
706 	ulint		pad_size;
707 
708 	DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF,
709 			      log_sys.write_lsn,
710 			      log_sys.get_lsn()));
711 
712 
713 	start_offset = log_sys.buf_next_to_write;
714 	end_offset = log_sys.buf_free;
715 
716 	area_start = ut_2pow_round(start_offset,
717 				   ulint(OS_FILE_LOG_BLOCK_SIZE));
718 	area_end = ut_calc_align(end_offset, ulint(OS_FILE_LOG_BLOCK_SIZE));
719 
720 	ut_ad(area_end - area_start > 0);
721 
722 	log_block_set_flush_bit(log_sys.buf + area_start, TRUE);
723 	log_block_set_checkpoint_no(
724 		log_sys.buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
725 		log_sys.next_checkpoint_no);
726 
727 	write_lsn = log_sys.get_lsn();
728 	byte *write_buf = log_sys.buf;
729 
730 	log_buffer_switch();
731 
732 	log_sys.log.set_fields(log_sys.write_lsn);
733 
734 	mysql_mutex_unlock(&log_sys.mutex);
735 	/* Erase the end of the last log block. */
736 	memset(write_buf + end_offset, 0,
737 	       ~end_offset & (OS_FILE_LOG_BLOCK_SIZE - 1));
738 
739 	/* Calculate pad_size if needed. */
740 	pad_size = 0;
741 	if (write_ahead_size > OS_FILE_LOG_BLOCK_SIZE) {
742 		ulint	end_offset_in_unit;
743 		lsn_t	end_offset = log_sys.log.calc_lsn_offset(
744 			ut_uint64_align_up(write_lsn, OS_FILE_LOG_BLOCK_SIZE));
745 		end_offset_in_unit = (ulint) (end_offset % write_ahead_size);
746 
747 		if (end_offset_in_unit > 0
748 		    && (area_end - area_start) > end_offset_in_unit) {
749 			/* The first block in the unit was initialized
750 			after the last writing.
751 			Needs to be written padded data once. */
752 			pad_size = std::min<ulint>(
753 				ulint(write_ahead_size) - end_offset_in_unit,
754 				srv_log_buffer_size - area_end);
755 			::memset(write_buf + area_end, 0, pad_size);
756 		}
757 	}
758 
759 	if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED)) {
760 		service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
761 					       "InnoDB log write: "
762 					       LSN_PF, log_sys.write_lsn);
763 	}
764 
765 	if (log_sys.is_encrypted()) {
766 		log_crypt(write_buf + area_start, log_sys.write_lsn,
767 			  area_end - area_start,
768 			  rotate_key ? LOG_ENCRYPT_ROTATE_KEY : LOG_ENCRYPT);
769 	}
770 
771 	/* Do the write to the log file */
772 	log_write_buf(
773 		write_buf + area_start, area_end - area_start + pad_size,
774 #ifdef UNIV_DEBUG
775 		pad_size,
776 #endif /* UNIV_DEBUG */
777 		ut_uint64_align_down(log_sys.write_lsn,
778 				     OS_FILE_LOG_BLOCK_SIZE),
779 		start_offset - area_start);
780 	srv_stats.log_padded.add(pad_size);
781 	log_sys.write_lsn = write_lsn;
782 	return;
783 }
784 
785 static group_commit_lock write_lock;
786 static group_commit_lock flush_lock;
787 
788 #ifdef UNIV_DEBUG
log_write_lock_own()789 bool log_write_lock_own()
790 {
791   return write_lock.is_owner();
792 }
793 #endif
794 
795 /** Ensure that the log has been written to the log file up to a given
796 log entry (such as that of a transaction commit). Start a new write, or
797 wait and check if an already running write is covering the request.
798 @param[in]	lsn		log sequence number that should be
799 included in the redo log file write
800 @param[in]	flush_to_disk	whether the written log should also
801 be flushed to the file system
802 @param[in]	rotate_key	whether to rotate the encryption key */
log_write_up_to(lsn_t lsn,bool flush_to_disk,bool rotate_key)803 void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key)
804 {
805   ut_ad(!srv_read_only_mode);
806   ut_ad(!rotate_key || flush_to_disk);
807   ut_ad(lsn != LSN_MAX);
808 
809   if (recv_no_ibuf_operations)
810   {
811     /* Recovery is running and no operations on the log files are
812     allowed yet (the variable name .._no_ibuf_.. is misleading) */
813     return;
814   }
815 
816   if (flush_to_disk &&
817     flush_lock.acquire(lsn) != group_commit_lock::ACQUIRED)
818   {
819     return;
820   }
821 
822   if (write_lock.acquire(lsn) == group_commit_lock::ACQUIRED)
823   {
824     mysql_mutex_lock(&log_sys.mutex);
825     lsn_t write_lsn= log_sys.get_lsn();
826     write_lock.set_pending(write_lsn);
827 
828     log_write(rotate_key);
829 
830     ut_a(log_sys.write_lsn == write_lsn);
831     write_lock.release(write_lsn);
832   }
833 
834   if (!flush_to_disk)
835   {
836     return;
837   }
838 
839   /* Flush the highest written lsn.*/
840   auto flush_lsn = write_lock.value();
841   flush_lock.set_pending(flush_lsn);
842   log_write_flush_to_disk_low(flush_lsn);
843   flush_lock.release(flush_lsn);
844 
845   log_flush_notify(flush_lsn);
846 }
847 
848 /** Write to the log file up to the last log entry.
849 @param sync  whether to wait for a durable write to complete */
log_buffer_flush_to_disk(bool sync)850 void log_buffer_flush_to_disk(bool sync)
851 {
852   ut_ad(!srv_read_only_mode);
853   log_write_up_to(log_sys.get_lsn(std::memory_order_acquire), sync);
854 }
855 
856 /** Prepare to invoke log_write_and_flush(), before acquiring log_sys.mutex. */
log_write_and_flush_prepare()857 ATTRIBUTE_COLD void log_write_and_flush_prepare()
858 {
859   mysql_mutex_assert_not_owner(&log_sys.mutex);
860 
861   while (flush_lock.acquire(log_sys.get_lsn() + 1) !=
862          group_commit_lock::ACQUIRED);
863   while (write_lock.acquire(log_sys.get_lsn() + 1) !=
864          group_commit_lock::ACQUIRED);
865 }
866 
867 /** Durably write the log and release log_sys.mutex */
log_write_and_flush()868 ATTRIBUTE_COLD void log_write_and_flush()
869 {
870   ut_ad(!srv_read_only_mode);
871   auto lsn= log_sys.get_lsn();
872   write_lock.set_pending(lsn);
873   log_write(false);
874   ut_a(log_sys.write_lsn == lsn);
875   write_lock.release(lsn);
876 
877   lsn= write_lock.value();
878   flush_lock.set_pending(lsn);
879   log_write_flush_to_disk_low(lsn);
880   flush_lock.release(lsn);
881 }
882 
883 /********************************************************************
884 
885 Tries to establish a big enough margin of free space in the log buffer, such
886 that a new log entry can be catenated without an immediate need for a flush. */
log_flush_margin()887 ATTRIBUTE_COLD static void log_flush_margin()
888 {
889 	lsn_t	lsn	= 0;
890 
891 	mysql_mutex_lock(&log_sys.mutex);
892 
893 	if (log_sys.buf_free > log_sys.max_buf_free) {
894 		/* We can write during flush */
895 		lsn = log_sys.get_lsn();
896 	}
897 
898 	mysql_mutex_unlock(&log_sys.mutex);
899 
900 	if (lsn) {
901 		log_write_up_to(lsn, false);
902 	}
903 }
904 
905 /** Write checkpoint info to the log header and release log_sys.mutex.
906 @param[in]	end_lsn	start LSN of the FILE_CHECKPOINT mini-transaction */
log_write_checkpoint_info(lsn_t end_lsn)907 ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn)
908 {
909 	ut_ad(!srv_read_only_mode);
910 	ut_ad(end_lsn == 0 || end_lsn >= log_sys.next_checkpoint_lsn);
911 	ut_ad(end_lsn <= log_sys.get_lsn());
912 	ut_ad(end_lsn + SIZE_OF_FILE_CHECKPOINT <= log_sys.get_lsn()
913 	      || srv_shutdown_state > SRV_SHUTDOWN_INITIATED);
914 
915 	DBUG_PRINT("ib_log", ("checkpoint " UINT64PF " at " LSN_PF
916 			      " written",
917 			      log_sys.next_checkpoint_no,
918 			      log_sys.next_checkpoint_lsn));
919 
920 	byte* buf = log_sys.checkpoint_buf;
921 	memset_aligned<OS_FILE_LOG_BLOCK_SIZE>(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
922 
923 	mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys.next_checkpoint_no);
924 	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys.next_checkpoint_lsn);
925 
926 	if (log_sys.is_encrypted()) {
927 		log_crypt_write_checkpoint_buf(buf);
928 	}
929 
930 	lsn_t lsn_offset
931 		= log_sys.log.calc_lsn_offset(log_sys.next_checkpoint_lsn);
932 	mach_write_to_8(buf + LOG_CHECKPOINT_OFFSET, lsn_offset);
933 	mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE,
934 			srv_log_buffer_size);
935 	mach_write_to_8(buf + LOG_CHECKPOINT_END_LSN, end_lsn);
936 
937 	log_block_store_checksum(buf);
938 
939 	ut_ad(LOG_CHECKPOINT_1 < srv_page_size);
940 	ut_ad(LOG_CHECKPOINT_2 < srv_page_size);
941 
942 	++log_sys.n_pending_checkpoint_writes;
943 
944 	mysql_mutex_unlock(&log_sys.mutex);
945 
946 	/* Note: We alternate the physical place of the checkpoint info.
947 	See the (next_checkpoint_no & 1) below. */
948 
949 	log_sys.log.write((log_sys.next_checkpoint_no & 1) ? LOG_CHECKPOINT_2
950 							   : LOG_CHECKPOINT_1,
951 			  {buf, OS_FILE_LOG_BLOCK_SIZE});
952 
953 	log_sys.log.flush();
954 
955 	mysql_mutex_lock(&log_sys.mutex);
956 
957 	--log_sys.n_pending_checkpoint_writes;
958 	ut_ad(log_sys.n_pending_checkpoint_writes == 0);
959 
960 	log_sys.next_checkpoint_no++;
961 
962 	log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn;
963 
964 	DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
965 			      ", flushed to " LSN_PF,
966 			      lsn_t{log_sys.last_checkpoint_lsn},
967 			      log_sys.get_flushed_lsn()));
968 
969 	MONITOR_INC(MONITOR_NUM_CHECKPOINT);
970 
971 	DBUG_EXECUTE_IF("crash_after_checkpoint", DBUG_SUICIDE(););
972 
973 	mysql_mutex_unlock(&log_sys.mutex);
974 }
975 
976 /****************************************************************//**
977 Tries to establish a big enough margin of free space in the log, such
978 that a new log entry can be catenated without an immediate need for a
979 checkpoint. NOTE: this function may only be called if the calling thread
980 owns no synchronization objects! */
log_checkpoint_margin()981 ATTRIBUTE_COLD static void log_checkpoint_margin()
982 {
983   while (log_sys.check_flush_or_checkpoint())
984   {
985     mysql_mutex_lock(&log_sys.mutex);
986     ut_ad(!recv_no_log_write);
987 
988     if (!log_sys.check_flush_or_checkpoint())
989     {
990 func_exit:
991       mysql_mutex_unlock(&log_sys.mutex);
992       return;
993     }
994 
995     const lsn_t lsn= log_sys.get_lsn();
996     const lsn_t checkpoint= log_sys.last_checkpoint_lsn;
997     const lsn_t sync_lsn= checkpoint + log_sys.max_checkpoint_age;
998     if (lsn <= sync_lsn)
999     {
1000       log_sys.set_check_flush_or_checkpoint(false);
1001       goto func_exit;
1002     }
1003 
1004     mysql_mutex_unlock(&log_sys.mutex);
1005 
1006     /* We must wait to prevent the tail of the log overwriting the head. */
1007     buf_flush_wait_flushed(std::min(sync_lsn, checkpoint + (1U << 20)));
1008     os_thread_sleep(10000); /* Sleep 10ms to avoid a thundering herd */
1009   }
1010 }
1011 
1012 /**
1013 Checks that there is enough free space in the log to start a new query step.
1014 Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
1015 function may only be called if the calling thread owns no synchronization
1016 objects! */
log_check_margins()1017 ATTRIBUTE_COLD void log_check_margins()
1018 {
1019   do
1020   {
1021     log_flush_margin();
1022     log_checkpoint_margin();
1023     ut_ad(!recv_no_log_write);
1024   }
1025   while (log_sys.check_flush_or_checkpoint());
1026 }
1027 
1028 extern void buf_resize_shutdown();
1029 
1030 /** Make a checkpoint at the latest lsn on shutdown. */
logs_empty_and_mark_files_at_shutdown()1031 ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown()
1032 {
1033 	lsn_t			lsn;
1034 	ulint			count = 0;
1035 
1036 	ib::info() << "Starting shutdown...";
1037 
1038 	/* Wait until the master thread and all other operations are idle: our
1039 	algorithm only works if the server is idle at shutdown */
1040 	bool do_srv_shutdown = false;
1041 	if (srv_master_timer) {
1042 		do_srv_shutdown = srv_fast_shutdown < 2;
1043 		srv_master_timer.reset();
1044 	}
1045 
1046 	/* Wait for the end of the buffer resize task.*/
1047 	buf_resize_shutdown();
1048 	dict_stats_shutdown();
1049 	btr_defragment_shutdown();
1050 
1051 	srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
1052 
1053 	if (srv_buffer_pool_dump_at_shutdown &&
1054 		!srv_read_only_mode && srv_fast_shutdown < 2) {
1055 		buf_dump_start();
1056 	}
1057 	srv_monitor_timer.reset();
1058 	lock_sys.timeout_timer.reset();
1059 	if (do_srv_shutdown) {
1060 		srv_shutdown(srv_fast_shutdown == 0);
1061 	}
1062 
1063 
1064 loop:
1065 	ut_ad(lock_sys.is_initialised() || !srv_was_started);
1066 	ut_ad(log_sys.is_initialised() || !srv_was_started);
1067 	ut_ad(fil_system.is_initialised() || !srv_was_started);
1068 
1069 #define COUNT_INTERVAL 600U
1070 #define CHECK_INTERVAL 100000U
1071 	os_thread_sleep(CHECK_INTERVAL);
1072 
1073 	count++;
1074 
1075 	/* Check that there are no longer transactions, except for
1076 	PREPARED ones. We need this wait even for the 'very fast'
1077 	shutdown, because the InnoDB layer may have committed or
1078 	prepared transactions and we don't want to lose them. */
1079 
1080 	if (ulint total_trx = srv_was_started && !srv_read_only_mode
1081 	    && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
1082 	    ? trx_sys.any_active_transactions() : 0) {
1083 
1084 		if (srv_print_verbose_log && count > COUNT_INTERVAL) {
1085 			service_manager_extend_timeout(
1086 				COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2,
1087 				"Waiting for %lu active transactions to finish",
1088 				(ulong) total_trx);
1089 			ib::info() << "Waiting for " << total_trx << " active"
1090 				<< " transactions to finish";
1091 
1092 			count = 0;
1093 		}
1094 
1095 		goto loop;
1096 	}
1097 
1098 	/* We need these threads to stop early in shutdown. */
1099 	const char* thread_name;
1100 
1101    if (srv_fast_shutdown != 2 && trx_rollback_is_active) {
1102 		thread_name = "rollback of recovered transactions";
1103 	} else {
1104 		thread_name = NULL;
1105 	}
1106 
1107 	if (thread_name) {
1108 		ut_ad(!srv_read_only_mode);
1109 wait_suspend_loop:
1110 		service_manager_extend_timeout(
1111 			COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2,
1112 			"Waiting for %s to exit", thread_name);
1113 		if (srv_print_verbose_log && count > COUNT_INTERVAL) {
1114 			ib::info() << "Waiting for " << thread_name
1115 				   << " to exit";
1116 			count = 0;
1117 		}
1118 		goto loop;
1119 	}
1120 
1121 	/* Check that the background threads are suspended */
1122 
1123 	ut_ad(!srv_any_background_activity());
1124 	if (srv_n_fil_crypt_threads_started) {
1125 		os_event_set(fil_crypt_threads_event);
1126 		thread_name = "fil_crypt_thread";
1127 		goto wait_suspend_loop;
1128 	}
1129 
1130 	if (buf_page_cleaner_is_active) {
1131 		thread_name = "page cleaner thread";
1132 		pthread_cond_signal(&buf_pool.do_flush_list);
1133 		goto wait_suspend_loop;
1134 	}
1135 
1136 	buf_load_dump_end();
1137 
1138 	if (!buf_pool.is_initialised()) {
1139 		ut_ad(!srv_was_started);
1140 	} else if (ulint pending_io = buf_pool.io_pending()) {
1141 		if (srv_print_verbose_log && count > 600) {
1142 			ib::info() << "Waiting for " << pending_io << " buffer"
1143 				" page I/Os to complete";
1144 			count = 0;
1145 		}
1146 
1147 		goto loop;
1148 	} else {
1149 		buf_flush_buffer_pool();
1150 	}
1151 
1152 	if (log_sys.is_initialised()) {
1153 		mysql_mutex_lock(&log_sys.mutex);
1154 		const ulint	n_write	= log_sys.n_pending_checkpoint_writes;
1155 		const ulint	n_flush	= log_sys.pending_flushes;
1156 		mysql_mutex_unlock(&log_sys.mutex);
1157 
1158 		if (n_write || n_flush) {
1159 			if (srv_print_verbose_log && count > 600) {
1160 				ib::info() << "Pending checkpoint_writes: "
1161 					<< n_write
1162 					<< ". Pending log flush writes: "
1163 					<< n_flush;
1164 				count = 0;
1165 			}
1166 			goto loop;
1167 		}
1168 	}
1169 
1170 	if (srv_fast_shutdown == 2 || !srv_was_started) {
1171 		if (!srv_read_only_mode && srv_was_started) {
1172 			ib::info() << "MySQL has requested a very fast"
1173 				" shutdown without flushing the InnoDB buffer"
1174 				" pool to data files. At the next mysqld"
1175 				" startup InnoDB will do a crash recovery!";
1176 
1177 			/* In this fastest shutdown we do not flush the
1178 			buffer pool:
1179 
1180 			it is essentially a 'crash' of the InnoDB server.
1181 			Make sure that the log is all flushed to disk, so
1182 			that we can recover all committed transactions in
1183 			a crash recovery. We must not write the lsn stamps
1184 			to the data files, since at a startup InnoDB deduces
1185 			from the stamps if the previous shutdown was clean. */
1186 
1187 			log_buffer_flush_to_disk();
1188 		}
1189 
1190 		srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
1191 		return;
1192 	}
1193 
1194 	if (!srv_read_only_mode) {
1195 		service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
1196 			"ensuring dirty buffer pool are written to log");
1197 		log_make_checkpoint();
1198 
1199 		mysql_mutex_lock(&log_sys.mutex);
1200 
1201 		lsn = log_sys.get_lsn();
1202 
1203 		const bool lsn_changed = lsn != log_sys.last_checkpoint_lsn
1204 			&& lsn != log_sys.last_checkpoint_lsn
1205 			+ SIZE_OF_FILE_CHECKPOINT;
1206 		ut_ad(lsn >= log_sys.last_checkpoint_lsn);
1207 
1208 		mysql_mutex_unlock(&log_sys.mutex);
1209 
1210 		if (lsn_changed) {
1211 			goto loop;
1212 		}
1213 
1214 		log_sys.log.flush();
1215 	} else {
1216 		lsn = recv_sys.recovered_lsn;
1217 	}
1218 
1219 	srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
1220 
1221 	/* Make some checks that the server really is quiet */
1222 	ut_ad(!srv_any_background_activity());
1223 
1224 	service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
1225 				       "Free innodb buffer pool");
1226 	ut_d(buf_pool.assert_all_freed());
1227 
1228 	ut_a(lsn == log_sys.get_lsn()
1229 	     || srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
1230 
1231 	if (UNIV_UNLIKELY(lsn < recv_sys.recovered_lsn)) {
1232 		ib::error() << "Shutdown LSN=" << lsn
1233 			    << " is less than start LSN="
1234 			    << recv_sys.recovered_lsn;
1235 	}
1236 
1237 	srv_shutdown_lsn = lsn;
1238 
1239 	if (!srv_read_only_mode) {
1240 		dberr_t err = fil_write_flushed_lsn(lsn);
1241 
1242 		if (err != DB_SUCCESS) {
1243 			ib::error() << "Writing flushed lsn " << lsn
1244 				<< " failed; error=" << err;
1245 		}
1246 	}
1247 
1248 	/* Make some checks that the server really is quiet */
1249 	ut_ad(!srv_any_background_activity());
1250 
1251 	ut_a(lsn == log_sys.get_lsn()
1252 	     || srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
1253 }
1254 
1255 /******************************************************//**
1256 Prints info of the log. */
1257 void
log_print(FILE * file)1258 log_print(
1259 /*======*/
1260 	FILE*	file)	/*!< in: file where to print */
1261 {
1262 	double	time_elapsed;
1263 	time_t	current_time;
1264 
1265 	mysql_mutex_lock(&log_sys.mutex);
1266 
1267 	const lsn_t lsn= log_sys.get_lsn();
1268 	mysql_mutex_lock(&buf_pool.flush_list_mutex);
1269 	const lsn_t pages_flushed = buf_pool.get_oldest_modification(lsn);
1270 	mysql_mutex_unlock(&buf_pool.flush_list_mutex);
1271 
1272 	fprintf(file,
1273 		"Log sequence number " LSN_PF "\n"
1274 		"Log flushed up to   " LSN_PF "\n"
1275 		"Pages flushed up to " LSN_PF "\n"
1276 		"Last checkpoint at  " LSN_PF "\n",
1277 		lsn,
1278 		log_sys.get_flushed_lsn(),
1279 		pages_flushed,
1280 		lsn_t{log_sys.last_checkpoint_lsn});
1281 
1282 	current_time = time(NULL);
1283 
1284 	time_elapsed = difftime(current_time,
1285 				log_sys.last_printout_time);
1286 
1287 	if (time_elapsed <= 0) {
1288 		time_elapsed = 1;
1289 	}
1290 
1291 	fprintf(file,
1292 		ULINTPF " pending log flushes, "
1293 		ULINTPF " pending chkp writes\n"
1294 		ULINTPF " log i/o's done, %.2f log i/o's/second\n",
1295 		log_sys.pending_flushes.load(),
1296 		log_sys.n_pending_checkpoint_writes,
1297 		log_sys.n_log_ios,
1298 		static_cast<double>(
1299 			log_sys.n_log_ios - log_sys.n_log_ios_old)
1300 		/ time_elapsed);
1301 
1302 	log_sys.n_log_ios_old = log_sys.n_log_ios;
1303 	log_sys.last_printout_time = current_time;
1304 
1305 	mysql_mutex_unlock(&log_sys.mutex);
1306 }
1307 
1308 /**********************************************************************//**
1309 Refreshes the statistics used to print per-second averages. */
1310 void
log_refresh_stats(void)1311 log_refresh_stats(void)
1312 /*===================*/
1313 {
1314 	log_sys.n_log_ios_old = log_sys.n_log_ios;
1315 	log_sys.last_printout_time = time(NULL);
1316 }
1317 
1318 /** Shut down the redo log subsystem. */
close()1319 void log_t::close()
1320 {
1321   ut_ad(this == &log_sys);
1322   if (!is_initialised()) return;
1323   m_initialised= false;
1324   log.close();
1325 
1326   ut_free_dodump(buf, srv_log_buffer_size);
1327   buf= nullptr;
1328   ut_free_dodump(flush_buf, srv_log_buffer_size);
1329   flush_buf= nullptr;
1330 
1331   mysql_mutex_destroy(&mutex);
1332   mysql_mutex_destroy(&flush_order_mutex);
1333 
1334   recv_sys.close();
1335 
1336   aligned_free(checkpoint_buf);
1337   checkpoint_buf= nullptr;
1338 }
1339 
get_log_file_path(const char * filename)1340 std::string get_log_file_path(const char *filename)
1341 {
1342   const size_t size= strlen(srv_log_group_home_dir) + /* path separator */ 1 +
1343                      strlen(filename) + /* longest suffix */ 3;
1344   std::string path;
1345   path.reserve(size);
1346   path.assign(srv_log_group_home_dir);
1347 
1348   std::replace(path.begin(), path.end(), OS_PATH_SEPARATOR_ALT,
1349 	       OS_PATH_SEPARATOR);
1350 
1351   if (path.back() != OS_PATH_SEPARATOR)
1352     path.push_back(OS_PATH_SEPARATOR);
1353   path.append(filename);
1354 
1355   return path;
1356 }
1357 
get_existing_log_files_paths()1358 std::vector<std::string> get_existing_log_files_paths() {
1359   std::vector<std::string> result;
1360 
1361   for (int i= 0; i < 101; i++) {
1362     auto path= get_log_file_path(LOG_FILE_NAME_PREFIX)
1363                                  .append(std::to_string(i));
1364     os_file_stat_t stat;
1365     dberr_t err= os_file_get_status(path.c_str(), &stat, false, true);
1366     if (err)
1367       break;
1368 
1369     if (stat.type != OS_FILE_TYPE_FILE)
1370       break;
1371 
1372     result.push_back(std::move(path));
1373   }
1374 
1375   return result;
1376 }
1377