1 /*****************************************************************************
2
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2009, Google Inc.
5 Copyright (c) 2014, 2021, MariaDB Corporation.
6
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12
13 This program is free software; you can redistribute it and/or modify it under
14 the terms of the GNU General Public License as published by the Free Software
15 Foundation; version 2 of the License.
16
17 This program is distributed in the hope that it will be useful, but WITHOUT
18 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file log/log0log.cc
29 Database log
30
31 Created 12/9/1995 Heikki Tuuri
32 *******************************************************/
33
34 #include "univ.i"
35 #include <debug_sync.h>
36 #include <my_service_manager.h>
37
38 #include "log0log.h"
39 #include "log0crypt.h"
40 #include "buf0buf.h"
41 #include "buf0flu.h"
42 #include "lock0lock.h"
43 #include "log0recv.h"
44 #include "fil0fil.h"
45 #include "dict0stats_bg.h"
46 #include "btr0defragment.h"
47 #include "srv0srv.h"
48 #include "srv0start.h"
49 #include "trx0sys.h"
50 #include "trx0trx.h"
51 #include "trx0roll.h"
52 #include "srv0mon.h"
53 #include "sync0sync.h"
54 #include "buf0dump.h"
55 #include "log0sync.h"
56
57 /*
58 General philosophy of InnoDB redo-logs:
59
60 Every change to a contents of a data page must be done
61 through mtr_t, and mtr_t::commit() will write log records
62 to the InnoDB redo log. */
63
64 /** Redo log system */
65 log_t log_sys;
66
67 /* A margin for free space in the log buffer before a log entry is catenated */
68 #define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE)
69
70 /* Margins for free space in the log buffer after a log entry is catenated */
71 #define LOG_BUF_FLUSH_RATIO 2
72 #define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN \
73 + (4U << srv_page_size_shift))
74
75 /** Extends the log buffer.
76 @param[in] len requested minimum size in bytes */
log_buffer_extend(ulong len)77 void log_buffer_extend(ulong len)
78 {
79 const size_t new_buf_size = ut_calc_align(len, srv_page_size);
80 byte* new_buf = static_cast<byte*>
81 (ut_malloc_dontdump(new_buf_size, PSI_INSTRUMENT_ME));
82 byte* new_flush_buf = static_cast<byte*>
83 (ut_malloc_dontdump(new_buf_size, PSI_INSTRUMENT_ME));
84
85 mysql_mutex_lock(&log_sys.mutex);
86
87 if (len <= srv_log_buffer_size) {
88 /* Already extended enough by the others */
89 mysql_mutex_unlock(&log_sys.mutex);
90 ut_free_dodump(new_buf, new_buf_size);
91 ut_free_dodump(new_flush_buf, new_buf_size);
92 return;
93 }
94
95 ib::warn() << "The redo log transaction size " << len <<
96 " exceeds innodb_log_buffer_size="
97 << srv_log_buffer_size << " / 2). Trying to extend it.";
98
99 byte* old_buf = log_sys.buf;
100 byte* old_flush_buf = log_sys.flush_buf;
101 const ulong old_buf_size = srv_log_buffer_size;
102 srv_log_buffer_size = static_cast<ulong>(new_buf_size);
103 log_sys.buf = new_buf;
104 log_sys.flush_buf = new_flush_buf;
105 memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(new_buf, old_buf,
106 log_sys.buf_free);
107
108 log_sys.max_buf_free = new_buf_size / LOG_BUF_FLUSH_RATIO
109 - LOG_BUF_FLUSH_MARGIN;
110
111 mysql_mutex_unlock(&log_sys.mutex);
112
113 ut_free_dodump(old_buf, old_buf_size);
114 ut_free_dodump(old_flush_buf, old_buf_size);
115
116 ib::info() << "innodb_log_buffer_size was extended to "
117 << new_buf_size << ".";
118 }
119
120 /** Calculate the recommended highest values for lsn - last_checkpoint_lsn
121 and lsn - buf_pool.get_oldest_modification().
122 @param[in] file_size requested innodb_log_file_size
123 @retval true on success
124 @retval false if the smallest log group is too small to
125 accommodate the number of OS threads in the database server */
126 bool
log_set_capacity(ulonglong file_size)127 log_set_capacity(ulonglong file_size)
128 {
129 /* Margin for the free space in the smallest log, before a new query
130 step which modifies the database, is started */
131 const size_t LOG_CHECKPOINT_FREE_PER_THREAD = 4U
132 << srv_page_size_shift;
133 const size_t LOG_CHECKPOINT_EXTRA_FREE = 8U << srv_page_size_shift;
134
135 lsn_t margin;
136 ulint free;
137
138 lsn_t smallest_capacity = file_size - LOG_FILE_HDR_SIZE;
139 /* Add extra safety */
140 smallest_capacity -= smallest_capacity / 10;
141
142 /* For each OS thread we must reserve so much free space in the
143 smallest log group that it can accommodate the log entries produced
144 by single query steps: running out of free log space is a serious
145 system error which requires rebooting the database. */
146
147 free = LOG_CHECKPOINT_FREE_PER_THREAD * 10
148 + LOG_CHECKPOINT_EXTRA_FREE;
149 if (free >= smallest_capacity / 2) {
150 ib::error() << "Cannot continue operation because log file is "
151 "too small. Increase innodb_log_file_size "
152 "or decrease innodb_thread_concurrency. "
153 << INNODB_PARAMETERS_MSG;
154 return false;
155 }
156
157 margin = smallest_capacity - free;
158 margin = margin - margin / 10; /* Add still some extra safety */
159
160 mysql_mutex_lock(&log_sys.mutex);
161
162 log_sys.log_capacity = smallest_capacity;
163
164 log_sys.max_modified_age_async = margin - margin / 8;
165 log_sys.max_checkpoint_age = margin;
166
167 mysql_mutex_unlock(&log_sys.mutex);
168
169 return(true);
170 }
171
172 /** Initialize the redo log subsystem. */
create()173 void log_t::create()
174 {
175 ut_ad(this == &log_sys);
176 ut_ad(!is_initialised());
177 m_initialised= true;
178
179 mysql_mutex_init(log_sys_mutex_key, &mutex, nullptr);
180 mysql_mutex_init(log_flush_order_mutex_key, &flush_order_mutex, nullptr);
181
182 /* Start the lsn from one log block from zero: this way every
183 log record has a non-zero start lsn, a fact which we will use */
184
185 set_lsn(LOG_START_LSN + LOG_BLOCK_HDR_SIZE);
186 set_flushed_lsn(LOG_START_LSN + LOG_BLOCK_HDR_SIZE);
187
188 ut_ad(srv_log_buffer_size >= 16 * OS_FILE_LOG_BLOCK_SIZE);
189 ut_ad(srv_log_buffer_size >= 4U << srv_page_size_shift);
190
191 buf= static_cast<byte*>(ut_malloc_dontdump(srv_log_buffer_size,
192 PSI_INSTRUMENT_ME));
193 TRASH_ALLOC(buf, srv_log_buffer_size);
194 flush_buf= static_cast<byte*>(ut_malloc_dontdump(srv_log_buffer_size,
195 PSI_INSTRUMENT_ME));
196 TRASH_ALLOC(flush_buf, srv_log_buffer_size);
197
198 max_buf_free= srv_log_buffer_size / LOG_BUF_FLUSH_RATIO -
199 LOG_BUF_FLUSH_MARGIN;
200 set_check_flush_or_checkpoint();
201
202 n_log_ios_old= n_log_ios;
203 last_printout_time= time(NULL);
204
205 buf_next_to_write= 0;
206 last_checkpoint_lsn= write_lsn= LOG_START_LSN;
207 n_log_ios= 0;
208 n_log_ios_old= 0;
209 log_capacity= 0;
210 max_modified_age_async= 0;
211 max_checkpoint_age= 0;
212 next_checkpoint_no= 0;
213 next_checkpoint_lsn= 0;
214 n_pending_checkpoint_writes= 0;
215
216 log_block_init(buf, LOG_START_LSN);
217 log_block_set_first_rec_group(buf, LOG_BLOCK_HDR_SIZE);
218
219 buf_free= LOG_BLOCK_HDR_SIZE;
220 checkpoint_buf= static_cast<byte*>
221 (aligned_malloc(OS_FILE_LOG_BLOCK_SIZE, OS_FILE_LOG_BLOCK_SIZE));
222 }
223
file_os_io(file_os_io && rhs)224 file_os_io::file_os_io(file_os_io &&rhs) : m_fd(rhs.m_fd)
225 {
226 rhs.m_fd= OS_FILE_CLOSED;
227 }
228
operator =(file_os_io && rhs)229 file_os_io &file_os_io::operator=(file_os_io &&rhs)
230 {
231 std::swap(m_fd, rhs.m_fd);
232 return *this;
233 }
234
~file_os_io()235 file_os_io::~file_os_io() noexcept
236 {
237 if (is_opened())
238 close();
239 }
240
open(const char * path,bool read_only)241 dberr_t file_os_io::open(const char *path, bool read_only) noexcept
242 {
243 ut_ad(!is_opened());
244
245 bool success;
246 auto tmp_fd= os_file_create(
247 innodb_log_file_key, path, OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
248 OS_FILE_NORMAL, OS_LOG_FILE, read_only, &success);
249 if (!success)
250 return DB_ERROR;
251
252 m_durable_writes= srv_file_flush_method == SRV_O_DSYNC;
253 m_fd= tmp_fd;
254 return success ? DB_SUCCESS : DB_ERROR;
255 }
256
rename(const char * old_path,const char * new_path)257 dberr_t file_os_io::rename(const char *old_path, const char *new_path) noexcept
258 {
259 return os_file_rename(innodb_log_file_key, old_path, new_path) ? DB_SUCCESS
260 : DB_ERROR;
261 }
262
close()263 dberr_t file_os_io::close() noexcept
264 {
265 if (!os_file_close(m_fd))
266 return DB_ERROR;
267
268 m_fd= OS_FILE_CLOSED;
269 return DB_SUCCESS;
270 }
271
read(os_offset_t offset,span<byte> buf)272 dberr_t file_os_io::read(os_offset_t offset, span<byte> buf) noexcept
273 {
274 return os_file_read(IORequestRead, m_fd, buf.data(), offset, buf.size());
275 }
276
write(const char * path,os_offset_t offset,span<const byte> buf)277 dberr_t file_os_io::write(const char *path, os_offset_t offset,
278 span<const byte> buf) noexcept
279 {
280 return os_file_write(IORequestWrite, path, m_fd, buf.data(), offset,
281 buf.size());
282 }
283
flush()284 dberr_t file_os_io::flush() noexcept
285 {
286 return os_file_flush(m_fd) ? DB_SUCCESS : DB_ERROR;
287 }
288
289 #ifdef HAVE_PMEM
290
291 #include <libpmem.h>
292
293 /** Memory mapped file */
294 class mapped_file_t
295 {
296 public:
297 mapped_file_t()= default;
298 mapped_file_t(const mapped_file_t &)= delete;
299 mapped_file_t &operator=(const mapped_file_t &)= delete;
300 mapped_file_t(mapped_file_t &&)= delete;
301 mapped_file_t &operator=(mapped_file_t &&)= delete;
302 ~mapped_file_t() noexcept;
303
304 dberr_t map(const char *path, bool read_only= false,
305 bool nvme= false) noexcept;
306 dberr_t unmap() noexcept;
data()307 byte *data() noexcept { return m_area.data(); }
308
309 private:
310 span<byte> m_area;
311 };
312
~mapped_file_t()313 mapped_file_t::~mapped_file_t() noexcept
314 {
315 if (!m_area.empty())
316 unmap();
317 }
318
map(const char * path,bool read_only,bool nvme)319 dberr_t mapped_file_t::map(const char *path, bool read_only,
320 bool nvme) noexcept
321 {
322 auto fd= mysql_file_open(innodb_log_file_key, path,
323 read_only ? O_RDONLY : O_RDWR, MYF(MY_WME));
324 if (fd == -1)
325 return DB_ERROR;
326
327 const auto file_size= size_t{os_file_get_size(path).m_total_size};
328
329 const int nvme_flag= nvme ? MAP_SYNC : 0;
330 void *ptr=
331 my_mmap(0, file_size, read_only ? PROT_READ : PROT_READ | PROT_WRITE,
332 MAP_SHARED_VALIDATE | nvme_flag, fd, 0);
333 mysql_file_close(fd, MYF(MY_WME));
334
335 if (ptr == MAP_FAILED)
336 return DB_ERROR;
337
338 m_area= {static_cast<byte *>(ptr), file_size};
339 return DB_SUCCESS;
340 }
341
unmap()342 dberr_t mapped_file_t::unmap() noexcept
343 {
344 ut_ad(!m_area.empty());
345
346 if (my_munmap(m_area.data(), m_area.size()))
347 return DB_ERROR;
348
349 m_area= {};
350 return DB_SUCCESS;
351 }
352
is_pmem(const char * path)353 static bool is_pmem(const char *path) noexcept
354 {
355 mapped_file_t mf;
356 return mf.map(path, true, true) == DB_SUCCESS ? true : false;
357 }
358
359 class file_pmem_io final : public file_io
360 {
361 public:
file_pmem_io()362 file_pmem_io() noexcept : file_io(true) {}
363
open(const char * path,bool read_only)364 dberr_t open(const char *path, bool read_only) noexcept final
365 {
366 return m_file.map(path, read_only, true);
367 }
rename(const char * old_path,const char * new_path)368 dberr_t rename(const char *old_path, const char *new_path) noexcept final
369 {
370 return os_file_rename(innodb_log_file_key, old_path, new_path) ? DB_SUCCESS
371 : DB_ERROR;
372 }
close()373 dberr_t close() noexcept final { return m_file.unmap(); }
read(os_offset_t offset,span<byte> buf)374 dberr_t read(os_offset_t offset, span<byte> buf) noexcept final
375 {
376 memcpy(buf.data(), m_file.data() + offset, buf.size());
377 return DB_SUCCESS;
378 }
write(const char *,os_offset_t offset,span<const byte> buf)379 dberr_t write(const char *, os_offset_t offset,
380 span<const byte> buf) noexcept final
381 {
382 pmem_memcpy_persist(m_file.data() + offset, buf.data(), buf.size());
383 return DB_SUCCESS;
384 }
flush()385 dberr_t flush() noexcept final
386 {
387 ut_ad(0);
388 return DB_SUCCESS;
389 }
390
391 private:
392 mapped_file_t m_file;
393 };
394 #endif
395
open(bool read_only)396 dberr_t log_file_t::open(bool read_only) noexcept
397 {
398 ut_a(!is_opened());
399
400 #ifdef HAVE_PMEM
401 auto ptr= is_pmem(m_path.c_str())
402 ? std::unique_ptr<file_io>(new file_pmem_io)
403 : std::unique_ptr<file_io>(new file_os_io);
404 #else
405 auto ptr= std::unique_ptr<file_io>(new file_os_io);
406 #endif
407
408 if (dberr_t err= ptr->open(m_path.c_str(), read_only))
409 return err;
410
411 m_file= std::move(ptr);
412 return DB_SUCCESS;
413 }
414
is_opened() const415 bool log_file_t::is_opened() const noexcept
416 {
417 return static_cast<bool>(m_file);
418 }
419
rename(std::string new_path)420 dberr_t log_file_t::rename(std::string new_path) noexcept
421 {
422 if (dberr_t err= m_file->rename(m_path.c_str(), new_path.c_str()))
423 return err;
424
425 m_path = std::move(new_path);
426 return DB_SUCCESS;
427 }
428
close()429 dberr_t log_file_t::close() noexcept
430 {
431 ut_a(is_opened());
432
433 if (dberr_t err= m_file->close())
434 return err;
435
436 m_file.reset();
437 return DB_SUCCESS;
438 }
439
read(os_offset_t offset,span<byte> buf)440 dberr_t log_file_t::read(os_offset_t offset, span<byte> buf) noexcept
441 {
442 ut_ad(is_opened());
443 return m_file->read(offset, buf);
444 }
445
writes_are_durable() const446 bool log_file_t::writes_are_durable() const noexcept
447 {
448 return m_file->writes_are_durable();
449 }
450
write(os_offset_t offset,span<const byte> buf)451 dberr_t log_file_t::write(os_offset_t offset, span<const byte> buf) noexcept
452 {
453 ut_ad(is_opened());
454 return m_file->write(m_path.c_str(), offset, buf);
455 }
456
flush()457 dberr_t log_file_t::flush() noexcept
458 {
459 ut_ad(is_opened());
460 return m_file->flush();
461 }
462
open_file(std::string path)463 void log_t::file::open_file(std::string path)
464 {
465 fd= log_file_t(std::move(path));
466 if (const dberr_t err= fd.open(srv_read_only_mode))
467 ib::fatal() << "open(" << fd.get_path() << ") returned " << err;
468 }
469
470 /** Update the log block checksum. */
log_block_store_checksum(byte * block)471 static void log_block_store_checksum(byte* block)
472 {
473 log_block_set_checksum(block, log_block_calc_checksum_crc32(block));
474 }
475
write_header_durable(lsn_t lsn)476 void log_t::file::write_header_durable(lsn_t lsn)
477 {
478 ut_ad(lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
479 ut_ad(!recv_no_log_write);
480 ut_ad(log_sys.log.format == log_t::FORMAT_10_5 ||
481 log_sys.log.format == log_t::FORMAT_ENC_10_5);
482
483 byte *buf= log_sys.checkpoint_buf;
484 memset_aligned<OS_FILE_LOG_BLOCK_SIZE>(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
485
486 mach_write_to_4(buf + LOG_HEADER_FORMAT, log_sys.log.format);
487 mach_write_to_4(buf + LOG_HEADER_SUBFORMAT, log_sys.log.subformat);
488 mach_write_to_8(buf + LOG_HEADER_START_LSN, lsn);
489 strcpy(reinterpret_cast<char*>(buf) + LOG_HEADER_CREATOR,
490 LOG_HEADER_CREATOR_CURRENT);
491 ut_ad(LOG_HEADER_CREATOR_END - LOG_HEADER_CREATOR >=
492 sizeof LOG_HEADER_CREATOR_CURRENT);
493 log_block_store_checksum(buf);
494
495 DBUG_PRINT("ib_log", ("write " LSN_PF, lsn));
496
497 log_sys.log.write(0, {buf, OS_FILE_LOG_BLOCK_SIZE});
498 if (!log_sys.log.writes_are_durable())
499 log_sys.log.flush();
500 }
501
read(os_offset_t offset,span<byte> buf)502 void log_t::file::read(os_offset_t offset, span<byte> buf)
503 {
504 if (const dberr_t err= fd.read(offset, buf))
505 ib::fatal() << "read(" << fd.get_path() << ") returned "<< err;
506 }
507
writes_are_durable() const508 bool log_t::file::writes_are_durable() const noexcept
509 {
510 return fd.writes_are_durable();
511 }
512
write(os_offset_t offset,span<byte> buf)513 void log_t::file::write(os_offset_t offset, span<byte> buf)
514 {
515 srv_stats.os_log_pending_writes.inc();
516 if (const dberr_t err= fd.write(offset, buf))
517 ib::fatal() << "write(" << fd.get_path() << ") returned " << err;
518 srv_stats.os_log_pending_writes.dec();
519 srv_stats.os_log_written.add(buf.size());
520 srv_stats.log_writes.inc();
521 log_sys.n_log_ios++;
522 }
523
flush()524 void log_t::file::flush()
525 {
526 log_sys.pending_flushes.fetch_add(1, std::memory_order_acquire);
527 if (const dberr_t err= fd.flush())
528 ib::fatal() << "flush(" << fd.get_path() << ") returned " << err;
529 log_sys.pending_flushes.fetch_sub(1, std::memory_order_release);
530 log_sys.flushes.fetch_add(1, std::memory_order_release);
531 }
532
close_file()533 void log_t::file::close_file()
534 {
535 if (fd.is_opened())
536 {
537 if (const dberr_t err= fd.close())
538 ib::fatal() << "close(" << fd.get_path() << ") returned " << err;
539 }
540 fd.free(); // Free path
541 }
542
543 /** Initialize the redo log. */
create()544 void log_t::file::create()
545 {
546 ut_ad(this == &log_sys.log);
547 ut_ad(log_sys.is_initialised());
548
549 format= srv_encrypt_log ? log_t::FORMAT_ENC_10_5 : log_t::FORMAT_10_5;
550 subformat= 2;
551 file_size= srv_log_file_size;
552 lsn= LOG_START_LSN;
553 lsn_offset= LOG_FILE_HDR_SIZE;
554 }
555
556 /******************************************************//**
557 Writes a buffer to a log file. */
558 static
559 void
log_write_buf(byte * buf,ulint len,ulint pad_len,lsn_t start_lsn,ulint new_data_offset)560 log_write_buf(
561 byte* buf, /*!< in: buffer */
562 ulint len, /*!< in: buffer len; must be divisible
563 by OS_FILE_LOG_BLOCK_SIZE */
564 #ifdef UNIV_DEBUG
565 ulint pad_len, /*!< in: pad len in the buffer len */
566 #endif /* UNIV_DEBUG */
567 lsn_t start_lsn, /*!< in: start lsn of the buffer; must
568 be divisible by
569 OS_FILE_LOG_BLOCK_SIZE */
570 ulint new_data_offset)/*!< in: start offset of new data in
571 buf: this parameter is used to decide
572 if we have to write a new log file
573 header */
574 {
575 ulint write_len;
576 lsn_t next_offset;
577 ulint i;
578
579 ut_ad(log_write_lock_own());
580 ut_ad(!recv_no_log_write);
581 ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0);
582 ut_a(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
583
584 loop:
585 if (len == 0) {
586
587 return;
588 }
589
590 next_offset = log_sys.log.calc_lsn_offset(start_lsn);
591
592 if ((next_offset % log_sys.log.file_size) + len
593 > log_sys.log.file_size) {
594 /* if the above condition holds, then the below expression
595 is < len which is ulint, so the typecast is ok */
596 write_len = ulint(log_sys.log.file_size
597 - (next_offset % log_sys.log.file_size));
598 } else {
599 write_len = len;
600 }
601
602 DBUG_PRINT("ib_log",
603 ("write " LSN_PF " to " LSN_PF
604 ": len " ULINTPF
605 " blocks " ULINTPF ".." ULINTPF,
606 start_lsn, next_offset,
607 write_len,
608 log_block_get_hdr_no(buf),
609 log_block_get_hdr_no(
610 buf + write_len
611 - OS_FILE_LOG_BLOCK_SIZE)));
612
613 ut_ad(pad_len >= len
614 || log_block_get_hdr_no(buf)
615 == log_block_convert_lsn_to_no(start_lsn));
616
617 /* Calculate the checksums for each log block and write them to
618 the trailer fields of the log blocks */
619
620 for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
621 #ifdef UNIV_DEBUG
622 ulint hdr_no_2 = log_block_get_hdr_no(buf) + i;
623 DBUG_EXECUTE_IF("innodb_small_log_block_no_limit",
624 hdr_no_2 = ((hdr_no_2 - 1) & 0xFUL) + 1;);
625 #endif
626 ut_ad(pad_len >= len
627 || i * OS_FILE_LOG_BLOCK_SIZE >= len - pad_len
628 || log_block_get_hdr_no(buf + i * OS_FILE_LOG_BLOCK_SIZE) == hdr_no_2);
629 log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE);
630 }
631
632 log_sys.log.write(next_offset, {buf, write_len});
633
634 if (write_len < len) {
635 start_lsn += write_len;
636 len -= write_len;
637 buf += write_len;
638 goto loop;
639 }
640 }
641
642 /** Flush the recently written changes to the log file.*/
log_write_flush_to_disk_low(lsn_t lsn)643 static void log_write_flush_to_disk_low(lsn_t lsn)
644 {
645 if (!log_sys.log.writes_are_durable())
646 log_sys.log.flush();
647 ut_a(lsn >= log_sys.get_flushed_lsn());
648 log_sys.set_flushed_lsn(lsn);
649 }
650
651 /** Swap log buffers, and copy the content of last block
652 from old buf to the head of the new buf. Thus, buf_free and
653 buf_next_to_write would be changed accordingly */
654 static inline
655 void
log_buffer_switch()656 log_buffer_switch()
657 {
658 mysql_mutex_assert_owner(&log_sys.mutex);
659 ut_ad(log_write_lock_own());
660
661 size_t area_end = ut_calc_align<size_t>(
662 log_sys.buf_free, OS_FILE_LOG_BLOCK_SIZE);
663
664 /* Copy the last block to new buf */
665 memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(
666 log_sys.flush_buf,
667 log_sys.buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
668 OS_FILE_LOG_BLOCK_SIZE);
669
670 std::swap(log_sys.buf, log_sys.flush_buf);
671
672 log_sys.buf_free %= OS_FILE_LOG_BLOCK_SIZE;
673 log_sys.buf_next_to_write = log_sys.buf_free;
674 }
675
676 /** Invoke commit_checkpoint_notify_ha() to notify that outstanding
677 log writes have been completed. */
678 void log_flush_notify(lsn_t flush_lsn);
679
680 /**
681 Writes log buffer to disk
682 which is the "write" part of log_write_up_to().
683
684 This function does not flush anything.
685
686 Note : the caller must have log_sys.mutex locked, and this
687 mutex is released in the function.
688
689 */
log_write(bool rotate_key)690 static void log_write(bool rotate_key)
691 {
692 mysql_mutex_assert_owner(&log_sys.mutex);
693 ut_ad(!recv_no_log_write);
694 lsn_t write_lsn;
695 if (log_sys.buf_free == log_sys.buf_next_to_write) {
696 /* Nothing to write */
697 mysql_mutex_unlock(&log_sys.mutex);
698 return;
699 }
700
701 ulint start_offset;
702 ulint end_offset;
703 ulint area_start;
704 ulint area_end;
705 ulong write_ahead_size = srv_log_write_ahead_size;
706 ulint pad_size;
707
708 DBUG_PRINT("ib_log", ("write " LSN_PF " to " LSN_PF,
709 log_sys.write_lsn,
710 log_sys.get_lsn()));
711
712
713 start_offset = log_sys.buf_next_to_write;
714 end_offset = log_sys.buf_free;
715
716 area_start = ut_2pow_round(start_offset,
717 ulint(OS_FILE_LOG_BLOCK_SIZE));
718 area_end = ut_calc_align(end_offset, ulint(OS_FILE_LOG_BLOCK_SIZE));
719
720 ut_ad(area_end - area_start > 0);
721
722 log_block_set_flush_bit(log_sys.buf + area_start, TRUE);
723 log_block_set_checkpoint_no(
724 log_sys.buf + area_end - OS_FILE_LOG_BLOCK_SIZE,
725 log_sys.next_checkpoint_no);
726
727 write_lsn = log_sys.get_lsn();
728 byte *write_buf = log_sys.buf;
729
730 log_buffer_switch();
731
732 log_sys.log.set_fields(log_sys.write_lsn);
733
734 mysql_mutex_unlock(&log_sys.mutex);
735 /* Erase the end of the last log block. */
736 memset(write_buf + end_offset, 0,
737 ~end_offset & (OS_FILE_LOG_BLOCK_SIZE - 1));
738
739 /* Calculate pad_size if needed. */
740 pad_size = 0;
741 if (write_ahead_size > OS_FILE_LOG_BLOCK_SIZE) {
742 ulint end_offset_in_unit;
743 lsn_t end_offset = log_sys.log.calc_lsn_offset(
744 ut_uint64_align_up(write_lsn, OS_FILE_LOG_BLOCK_SIZE));
745 end_offset_in_unit = (ulint) (end_offset % write_ahead_size);
746
747 if (end_offset_in_unit > 0
748 && (area_end - area_start) > end_offset_in_unit) {
749 /* The first block in the unit was initialized
750 after the last writing.
751 Needs to be written padded data once. */
752 pad_size = std::min<ulint>(
753 ulint(write_ahead_size) - end_offset_in_unit,
754 srv_log_buffer_size - area_end);
755 ::memset(write_buf + area_end, 0, pad_size);
756 }
757 }
758
759 if (UNIV_UNLIKELY(srv_shutdown_state > SRV_SHUTDOWN_INITIATED)) {
760 service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
761 "InnoDB log write: "
762 LSN_PF, log_sys.write_lsn);
763 }
764
765 if (log_sys.is_encrypted()) {
766 log_crypt(write_buf + area_start, log_sys.write_lsn,
767 area_end - area_start,
768 rotate_key ? LOG_ENCRYPT_ROTATE_KEY : LOG_ENCRYPT);
769 }
770
771 /* Do the write to the log file */
772 log_write_buf(
773 write_buf + area_start, area_end - area_start + pad_size,
774 #ifdef UNIV_DEBUG
775 pad_size,
776 #endif /* UNIV_DEBUG */
777 ut_uint64_align_down(log_sys.write_lsn,
778 OS_FILE_LOG_BLOCK_SIZE),
779 start_offset - area_start);
780 srv_stats.log_padded.add(pad_size);
781 log_sys.write_lsn = write_lsn;
782 return;
783 }
784
785 static group_commit_lock write_lock;
786 static group_commit_lock flush_lock;
787
788 #ifdef UNIV_DEBUG
log_write_lock_own()789 bool log_write_lock_own()
790 {
791 return write_lock.is_owner();
792 }
793 #endif
794
795 /** Ensure that the log has been written to the log file up to a given
796 log entry (such as that of a transaction commit). Start a new write, or
797 wait and check if an already running write is covering the request.
798 @param[in] lsn log sequence number that should be
799 included in the redo log file write
800 @param[in] flush_to_disk whether the written log should also
801 be flushed to the file system
802 @param[in] rotate_key whether to rotate the encryption key */
log_write_up_to(lsn_t lsn,bool flush_to_disk,bool rotate_key)803 void log_write_up_to(lsn_t lsn, bool flush_to_disk, bool rotate_key)
804 {
805 ut_ad(!srv_read_only_mode);
806 ut_ad(!rotate_key || flush_to_disk);
807 ut_ad(lsn != LSN_MAX);
808
809 if (recv_no_ibuf_operations)
810 {
811 /* Recovery is running and no operations on the log files are
812 allowed yet (the variable name .._no_ibuf_.. is misleading) */
813 return;
814 }
815
816 if (flush_to_disk &&
817 flush_lock.acquire(lsn) != group_commit_lock::ACQUIRED)
818 {
819 return;
820 }
821
822 if (write_lock.acquire(lsn) == group_commit_lock::ACQUIRED)
823 {
824 mysql_mutex_lock(&log_sys.mutex);
825 lsn_t write_lsn= log_sys.get_lsn();
826 write_lock.set_pending(write_lsn);
827
828 log_write(rotate_key);
829
830 ut_a(log_sys.write_lsn == write_lsn);
831 write_lock.release(write_lsn);
832 }
833
834 if (!flush_to_disk)
835 {
836 return;
837 }
838
839 /* Flush the highest written lsn.*/
840 auto flush_lsn = write_lock.value();
841 flush_lock.set_pending(flush_lsn);
842 log_write_flush_to_disk_low(flush_lsn);
843 flush_lock.release(flush_lsn);
844
845 log_flush_notify(flush_lsn);
846 }
847
848 /** Write to the log file up to the last log entry.
849 @param sync whether to wait for a durable write to complete */
log_buffer_flush_to_disk(bool sync)850 void log_buffer_flush_to_disk(bool sync)
851 {
852 ut_ad(!srv_read_only_mode);
853 log_write_up_to(log_sys.get_lsn(std::memory_order_acquire), sync);
854 }
855
856 /** Prepare to invoke log_write_and_flush(), before acquiring log_sys.mutex. */
log_write_and_flush_prepare()857 ATTRIBUTE_COLD void log_write_and_flush_prepare()
858 {
859 mysql_mutex_assert_not_owner(&log_sys.mutex);
860
861 while (flush_lock.acquire(log_sys.get_lsn() + 1) !=
862 group_commit_lock::ACQUIRED);
863 while (write_lock.acquire(log_sys.get_lsn() + 1) !=
864 group_commit_lock::ACQUIRED);
865 }
866
867 /** Durably write the log and release log_sys.mutex */
log_write_and_flush()868 ATTRIBUTE_COLD void log_write_and_flush()
869 {
870 ut_ad(!srv_read_only_mode);
871 auto lsn= log_sys.get_lsn();
872 write_lock.set_pending(lsn);
873 log_write(false);
874 ut_a(log_sys.write_lsn == lsn);
875 write_lock.release(lsn);
876
877 lsn= write_lock.value();
878 flush_lock.set_pending(lsn);
879 log_write_flush_to_disk_low(lsn);
880 flush_lock.release(lsn);
881 }
882
883 /********************************************************************
884
885 Tries to establish a big enough margin of free space in the log buffer, such
886 that a new log entry can be catenated without an immediate need for a flush. */
log_flush_margin()887 ATTRIBUTE_COLD static void log_flush_margin()
888 {
889 lsn_t lsn = 0;
890
891 mysql_mutex_lock(&log_sys.mutex);
892
893 if (log_sys.buf_free > log_sys.max_buf_free) {
894 /* We can write during flush */
895 lsn = log_sys.get_lsn();
896 }
897
898 mysql_mutex_unlock(&log_sys.mutex);
899
900 if (lsn) {
901 log_write_up_to(lsn, false);
902 }
903 }
904
905 /** Write checkpoint info to the log header and release log_sys.mutex.
906 @param[in] end_lsn start LSN of the FILE_CHECKPOINT mini-transaction */
log_write_checkpoint_info(lsn_t end_lsn)907 ATTRIBUTE_COLD void log_write_checkpoint_info(lsn_t end_lsn)
908 {
909 ut_ad(!srv_read_only_mode);
910 ut_ad(end_lsn == 0 || end_lsn >= log_sys.next_checkpoint_lsn);
911 ut_ad(end_lsn <= log_sys.get_lsn());
912 ut_ad(end_lsn + SIZE_OF_FILE_CHECKPOINT <= log_sys.get_lsn()
913 || srv_shutdown_state > SRV_SHUTDOWN_INITIATED);
914
915 DBUG_PRINT("ib_log", ("checkpoint " UINT64PF " at " LSN_PF
916 " written",
917 log_sys.next_checkpoint_no,
918 log_sys.next_checkpoint_lsn));
919
920 byte* buf = log_sys.checkpoint_buf;
921 memset_aligned<OS_FILE_LOG_BLOCK_SIZE>(buf, 0, OS_FILE_LOG_BLOCK_SIZE);
922
923 mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys.next_checkpoint_no);
924 mach_write_to_8(buf + LOG_CHECKPOINT_LSN, log_sys.next_checkpoint_lsn);
925
926 if (log_sys.is_encrypted()) {
927 log_crypt_write_checkpoint_buf(buf);
928 }
929
930 lsn_t lsn_offset
931 = log_sys.log.calc_lsn_offset(log_sys.next_checkpoint_lsn);
932 mach_write_to_8(buf + LOG_CHECKPOINT_OFFSET, lsn_offset);
933 mach_write_to_8(buf + LOG_CHECKPOINT_LOG_BUF_SIZE,
934 srv_log_buffer_size);
935 mach_write_to_8(buf + LOG_CHECKPOINT_END_LSN, end_lsn);
936
937 log_block_store_checksum(buf);
938
939 ut_ad(LOG_CHECKPOINT_1 < srv_page_size);
940 ut_ad(LOG_CHECKPOINT_2 < srv_page_size);
941
942 ++log_sys.n_pending_checkpoint_writes;
943
944 mysql_mutex_unlock(&log_sys.mutex);
945
946 /* Note: We alternate the physical place of the checkpoint info.
947 See the (next_checkpoint_no & 1) below. */
948
949 log_sys.log.write((log_sys.next_checkpoint_no & 1) ? LOG_CHECKPOINT_2
950 : LOG_CHECKPOINT_1,
951 {buf, OS_FILE_LOG_BLOCK_SIZE});
952
953 log_sys.log.flush();
954
955 mysql_mutex_lock(&log_sys.mutex);
956
957 --log_sys.n_pending_checkpoint_writes;
958 ut_ad(log_sys.n_pending_checkpoint_writes == 0);
959
960 log_sys.next_checkpoint_no++;
961
962 log_sys.last_checkpoint_lsn = log_sys.next_checkpoint_lsn;
963
964 DBUG_PRINT("ib_log", ("checkpoint ended at " LSN_PF
965 ", flushed to " LSN_PF,
966 lsn_t{log_sys.last_checkpoint_lsn},
967 log_sys.get_flushed_lsn()));
968
969 MONITOR_INC(MONITOR_NUM_CHECKPOINT);
970
971 DBUG_EXECUTE_IF("crash_after_checkpoint", DBUG_SUICIDE(););
972
973 mysql_mutex_unlock(&log_sys.mutex);
974 }
975
976 /****************************************************************//**
977 Tries to establish a big enough margin of free space in the log, such
978 that a new log entry can be catenated without an immediate need for a
979 checkpoint. NOTE: this function may only be called if the calling thread
980 owns no synchronization objects! */
log_checkpoint_margin()981 ATTRIBUTE_COLD static void log_checkpoint_margin()
982 {
983 while (log_sys.check_flush_or_checkpoint())
984 {
985 mysql_mutex_lock(&log_sys.mutex);
986 ut_ad(!recv_no_log_write);
987
988 if (!log_sys.check_flush_or_checkpoint())
989 {
990 func_exit:
991 mysql_mutex_unlock(&log_sys.mutex);
992 return;
993 }
994
995 const lsn_t lsn= log_sys.get_lsn();
996 const lsn_t checkpoint= log_sys.last_checkpoint_lsn;
997 const lsn_t sync_lsn= checkpoint + log_sys.max_checkpoint_age;
998 if (lsn <= sync_lsn)
999 {
1000 log_sys.set_check_flush_or_checkpoint(false);
1001 goto func_exit;
1002 }
1003
1004 mysql_mutex_unlock(&log_sys.mutex);
1005
1006 /* We must wait to prevent the tail of the log overwriting the head. */
1007 buf_flush_wait_flushed(std::min(sync_lsn, checkpoint + (1U << 20)));
1008 os_thread_sleep(10000); /* Sleep 10ms to avoid a thundering herd */
1009 }
1010 }
1011
1012 /**
1013 Checks that there is enough free space in the log to start a new query step.
1014 Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this
1015 function may only be called if the calling thread owns no synchronization
1016 objects! */
log_check_margins()1017 ATTRIBUTE_COLD void log_check_margins()
1018 {
1019 do
1020 {
1021 log_flush_margin();
1022 log_checkpoint_margin();
1023 ut_ad(!recv_no_log_write);
1024 }
1025 while (log_sys.check_flush_or_checkpoint());
1026 }
1027
1028 extern void buf_resize_shutdown();
1029
1030 /** Make a checkpoint at the latest lsn on shutdown. */
logs_empty_and_mark_files_at_shutdown()1031 ATTRIBUTE_COLD void logs_empty_and_mark_files_at_shutdown()
1032 {
1033 lsn_t lsn;
1034 ulint count = 0;
1035
1036 ib::info() << "Starting shutdown...";
1037
1038 /* Wait until the master thread and all other operations are idle: our
1039 algorithm only works if the server is idle at shutdown */
1040 bool do_srv_shutdown = false;
1041 if (srv_master_timer) {
1042 do_srv_shutdown = srv_fast_shutdown < 2;
1043 srv_master_timer.reset();
1044 }
1045
1046 /* Wait for the end of the buffer resize task.*/
1047 buf_resize_shutdown();
1048 dict_stats_shutdown();
1049 btr_defragment_shutdown();
1050
1051 srv_shutdown_state = SRV_SHUTDOWN_CLEANUP;
1052
1053 if (srv_buffer_pool_dump_at_shutdown &&
1054 !srv_read_only_mode && srv_fast_shutdown < 2) {
1055 buf_dump_start();
1056 }
1057 srv_monitor_timer.reset();
1058 lock_sys.timeout_timer.reset();
1059 if (do_srv_shutdown) {
1060 srv_shutdown(srv_fast_shutdown == 0);
1061 }
1062
1063
1064 loop:
1065 ut_ad(lock_sys.is_initialised() || !srv_was_started);
1066 ut_ad(log_sys.is_initialised() || !srv_was_started);
1067 ut_ad(fil_system.is_initialised() || !srv_was_started);
1068
1069 #define COUNT_INTERVAL 600U
1070 #define CHECK_INTERVAL 100000U
1071 os_thread_sleep(CHECK_INTERVAL);
1072
1073 count++;
1074
1075 /* Check that there are no longer transactions, except for
1076 PREPARED ones. We need this wait even for the 'very fast'
1077 shutdown, because the InnoDB layer may have committed or
1078 prepared transactions and we don't want to lose them. */
1079
1080 if (ulint total_trx = srv_was_started && !srv_read_only_mode
1081 && srv_force_recovery < SRV_FORCE_NO_TRX_UNDO
1082 ? trx_sys.any_active_transactions() : 0) {
1083
1084 if (srv_print_verbose_log && count > COUNT_INTERVAL) {
1085 service_manager_extend_timeout(
1086 COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2,
1087 "Waiting for %lu active transactions to finish",
1088 (ulong) total_trx);
1089 ib::info() << "Waiting for " << total_trx << " active"
1090 << " transactions to finish";
1091
1092 count = 0;
1093 }
1094
1095 goto loop;
1096 }
1097
1098 /* We need these threads to stop early in shutdown. */
1099 const char* thread_name;
1100
1101 if (srv_fast_shutdown != 2 && trx_rollback_is_active) {
1102 thread_name = "rollback of recovered transactions";
1103 } else {
1104 thread_name = NULL;
1105 }
1106
1107 if (thread_name) {
1108 ut_ad(!srv_read_only_mode);
1109 wait_suspend_loop:
1110 service_manager_extend_timeout(
1111 COUNT_INTERVAL * CHECK_INTERVAL/1000000 * 2,
1112 "Waiting for %s to exit", thread_name);
1113 if (srv_print_verbose_log && count > COUNT_INTERVAL) {
1114 ib::info() << "Waiting for " << thread_name
1115 << " to exit";
1116 count = 0;
1117 }
1118 goto loop;
1119 }
1120
1121 /* Check that the background threads are suspended */
1122
1123 ut_ad(!srv_any_background_activity());
1124 if (srv_n_fil_crypt_threads_started) {
1125 os_event_set(fil_crypt_threads_event);
1126 thread_name = "fil_crypt_thread";
1127 goto wait_suspend_loop;
1128 }
1129
1130 if (buf_page_cleaner_is_active) {
1131 thread_name = "page cleaner thread";
1132 pthread_cond_signal(&buf_pool.do_flush_list);
1133 goto wait_suspend_loop;
1134 }
1135
1136 buf_load_dump_end();
1137
1138 if (!buf_pool.is_initialised()) {
1139 ut_ad(!srv_was_started);
1140 } else if (ulint pending_io = buf_pool.io_pending()) {
1141 if (srv_print_verbose_log && count > 600) {
1142 ib::info() << "Waiting for " << pending_io << " buffer"
1143 " page I/Os to complete";
1144 count = 0;
1145 }
1146
1147 goto loop;
1148 } else {
1149 buf_flush_buffer_pool();
1150 }
1151
1152 if (log_sys.is_initialised()) {
1153 mysql_mutex_lock(&log_sys.mutex);
1154 const ulint n_write = log_sys.n_pending_checkpoint_writes;
1155 const ulint n_flush = log_sys.pending_flushes;
1156 mysql_mutex_unlock(&log_sys.mutex);
1157
1158 if (n_write || n_flush) {
1159 if (srv_print_verbose_log && count > 600) {
1160 ib::info() << "Pending checkpoint_writes: "
1161 << n_write
1162 << ". Pending log flush writes: "
1163 << n_flush;
1164 count = 0;
1165 }
1166 goto loop;
1167 }
1168 }
1169
1170 if (srv_fast_shutdown == 2 || !srv_was_started) {
1171 if (!srv_read_only_mode && srv_was_started) {
1172 ib::info() << "MySQL has requested a very fast"
1173 " shutdown without flushing the InnoDB buffer"
1174 " pool to data files. At the next mysqld"
1175 " startup InnoDB will do a crash recovery!";
1176
1177 /* In this fastest shutdown we do not flush the
1178 buffer pool:
1179
1180 it is essentially a 'crash' of the InnoDB server.
1181 Make sure that the log is all flushed to disk, so
1182 that we can recover all committed transactions in
1183 a crash recovery. We must not write the lsn stamps
1184 to the data files, since at a startup InnoDB deduces
1185 from the stamps if the previous shutdown was clean. */
1186
1187 log_buffer_flush_to_disk();
1188 }
1189
1190 srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
1191 return;
1192 }
1193
1194 if (!srv_read_only_mode) {
1195 service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
1196 "ensuring dirty buffer pool are written to log");
1197 log_make_checkpoint();
1198
1199 mysql_mutex_lock(&log_sys.mutex);
1200
1201 lsn = log_sys.get_lsn();
1202
1203 const bool lsn_changed = lsn != log_sys.last_checkpoint_lsn
1204 && lsn != log_sys.last_checkpoint_lsn
1205 + SIZE_OF_FILE_CHECKPOINT;
1206 ut_ad(lsn >= log_sys.last_checkpoint_lsn);
1207
1208 mysql_mutex_unlock(&log_sys.mutex);
1209
1210 if (lsn_changed) {
1211 goto loop;
1212 }
1213
1214 log_sys.log.flush();
1215 } else {
1216 lsn = recv_sys.recovered_lsn;
1217 }
1218
1219 srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
1220
1221 /* Make some checks that the server really is quiet */
1222 ut_ad(!srv_any_background_activity());
1223
1224 service_manager_extend_timeout(INNODB_EXTEND_TIMEOUT_INTERVAL,
1225 "Free innodb buffer pool");
1226 ut_d(buf_pool.assert_all_freed());
1227
1228 ut_a(lsn == log_sys.get_lsn()
1229 || srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
1230
1231 if (UNIV_UNLIKELY(lsn < recv_sys.recovered_lsn)) {
1232 ib::error() << "Shutdown LSN=" << lsn
1233 << " is less than start LSN="
1234 << recv_sys.recovered_lsn;
1235 }
1236
1237 srv_shutdown_lsn = lsn;
1238
1239 if (!srv_read_only_mode) {
1240 dberr_t err = fil_write_flushed_lsn(lsn);
1241
1242 if (err != DB_SUCCESS) {
1243 ib::error() << "Writing flushed lsn " << lsn
1244 << " failed; error=" << err;
1245 }
1246 }
1247
1248 /* Make some checks that the server really is quiet */
1249 ut_ad(!srv_any_background_activity());
1250
1251 ut_a(lsn == log_sys.get_lsn()
1252 || srv_force_recovery == SRV_FORCE_NO_LOG_REDO);
1253 }
1254
1255 /******************************************************//**
1256 Prints info of the log. */
1257 void
log_print(FILE * file)1258 log_print(
1259 /*======*/
1260 FILE* file) /*!< in: file where to print */
1261 {
1262 double time_elapsed;
1263 time_t current_time;
1264
1265 mysql_mutex_lock(&log_sys.mutex);
1266
1267 const lsn_t lsn= log_sys.get_lsn();
1268 mysql_mutex_lock(&buf_pool.flush_list_mutex);
1269 const lsn_t pages_flushed = buf_pool.get_oldest_modification(lsn);
1270 mysql_mutex_unlock(&buf_pool.flush_list_mutex);
1271
1272 fprintf(file,
1273 "Log sequence number " LSN_PF "\n"
1274 "Log flushed up to " LSN_PF "\n"
1275 "Pages flushed up to " LSN_PF "\n"
1276 "Last checkpoint at " LSN_PF "\n",
1277 lsn,
1278 log_sys.get_flushed_lsn(),
1279 pages_flushed,
1280 lsn_t{log_sys.last_checkpoint_lsn});
1281
1282 current_time = time(NULL);
1283
1284 time_elapsed = difftime(current_time,
1285 log_sys.last_printout_time);
1286
1287 if (time_elapsed <= 0) {
1288 time_elapsed = 1;
1289 }
1290
1291 fprintf(file,
1292 ULINTPF " pending log flushes, "
1293 ULINTPF " pending chkp writes\n"
1294 ULINTPF " log i/o's done, %.2f log i/o's/second\n",
1295 log_sys.pending_flushes.load(),
1296 log_sys.n_pending_checkpoint_writes,
1297 log_sys.n_log_ios,
1298 static_cast<double>(
1299 log_sys.n_log_ios - log_sys.n_log_ios_old)
1300 / time_elapsed);
1301
1302 log_sys.n_log_ios_old = log_sys.n_log_ios;
1303 log_sys.last_printout_time = current_time;
1304
1305 mysql_mutex_unlock(&log_sys.mutex);
1306 }
1307
1308 /**********************************************************************//**
1309 Refreshes the statistics used to print per-second averages. */
1310 void
log_refresh_stats(void)1311 log_refresh_stats(void)
1312 /*===================*/
1313 {
1314 log_sys.n_log_ios_old = log_sys.n_log_ios;
1315 log_sys.last_printout_time = time(NULL);
1316 }
1317
1318 /** Shut down the redo log subsystem. */
close()1319 void log_t::close()
1320 {
1321 ut_ad(this == &log_sys);
1322 if (!is_initialised()) return;
1323 m_initialised= false;
1324 log.close();
1325
1326 ut_free_dodump(buf, srv_log_buffer_size);
1327 buf= nullptr;
1328 ut_free_dodump(flush_buf, srv_log_buffer_size);
1329 flush_buf= nullptr;
1330
1331 mysql_mutex_destroy(&mutex);
1332 mysql_mutex_destroy(&flush_order_mutex);
1333
1334 recv_sys.close();
1335
1336 aligned_free(checkpoint_buf);
1337 checkpoint_buf= nullptr;
1338 }
1339
get_log_file_path(const char * filename)1340 std::string get_log_file_path(const char *filename)
1341 {
1342 const size_t size= strlen(srv_log_group_home_dir) + /* path separator */ 1 +
1343 strlen(filename) + /* longest suffix */ 3;
1344 std::string path;
1345 path.reserve(size);
1346 path.assign(srv_log_group_home_dir);
1347
1348 std::replace(path.begin(), path.end(), OS_PATH_SEPARATOR_ALT,
1349 OS_PATH_SEPARATOR);
1350
1351 if (path.back() != OS_PATH_SEPARATOR)
1352 path.push_back(OS_PATH_SEPARATOR);
1353 path.append(filename);
1354
1355 return path;
1356 }
1357
get_existing_log_files_paths()1358 std::vector<std::string> get_existing_log_files_paths() {
1359 std::vector<std::string> result;
1360
1361 for (int i= 0; i < 101; i++) {
1362 auto path= get_log_file_path(LOG_FILE_NAME_PREFIX)
1363 .append(std::to_string(i));
1364 os_file_stat_t stat;
1365 dberr_t err= os_file_get_status(path.c_str(), &stat, false, true);
1366 if (err)
1367 break;
1368
1369 if (stat.type != OS_FILE_TYPE_FILE)
1370 break;
1371
1372 result.push_back(std::move(path));
1373 }
1374
1375 return result;
1376 }
1377