1 /*****************************************************************************
2
3 Copyright (c) 1996, 2020, Oracle and/or its affiliates. All rights reserved.
4 Copyright (c) 2008, Google Inc.
5 Copyright (c) 2009, Percona Inc.
6
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19
20 This program is free software; you can redistribute it and/or modify it under
21 the terms of the GNU General Public License, version 2.0, as published by the
22 Free Software Foundation.
23
24 This program is also distributed with certain software (including but not
25 limited to OpenSSL) that is licensed under separate terms, as designated in a
26 particular file or component or in included license documentation. The authors
27 of MySQL hereby grant you an additional permission to link the program and
28 your derivative works with the separately licensed software that they have
29 included with MySQL.
30
31 This program is distributed in the hope that it will be useful, but WITHOUT
32 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
33 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
34 for more details.
35
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
39
40 *****************************************************************************/
41
42 /** @file srv/srv0start.cc
43 Starts the InnoDB database server
44
45 Created 2/16/1996 Heikki Tuuri
46 *************************************************************************/
47
48 #include <errno.h>
49 #include <stdio.h>
50 #include <stdlib.h>
51 #include <sys/types.h>
52 #include <zlib.h>
53
54 #include "my_dbug.h"
55
56 #include "btr0btr.h"
57 #include "btr0cur.h"
58 #include "buf0buf.h"
59 #include "buf0dump.h"
60 #include "current_thd.h"
61 #include "data0data.h"
62 #include "data0type.h"
63 #include "dict0dd.h"
64 #include "dict0dict.h"
65 #include "fil0fil.h"
66 #include "fsp0fsp.h"
67 #include "fsp0sysspace.h"
68 #include "ha_prototypes.h"
69 #include "ibuf0ibuf.h"
70 #include "log0log.h"
71 #include "log0recv.h"
72 #include "mem0mem.h"
73 #include "mtr0mtr.h"
74
75 #include "my_dbug.h"
76 #include "my_psi_config.h"
77 #include "mysql/psi/mysql_stage.h"
78 #include "mysqld.h"
79
80 #include "os0file.h"
81 #include "os0thread-create.h"
82 #include "os0thread.h"
83 #include "page0cur.h"
84 #include "page0page.h"
85 #include "rem0rec.h"
86 #include "row0ftsort.h"
87 #include "srv0srv.h"
88 #include "srv0start.h"
89 #include "trx0sys.h"
90 #include "trx0trx.h"
91 #include "ut0mem.h"
92
93 #include <zlib.h>
94
95 #include "arch0arch.h"
96 #include "arch0recv.h"
97 #include "btr0pcur.h"
98 #include "btr0sea.h"
99 #include "buf0flu.h"
100 #include "buf0rea.h"
101 #include "clone0api.h"
102 #include "clone0clone.h"
103 #include "dict0boot.h"
104 #include "dict0crea.h"
105 #include "dict0load.h"
106 #include "dict0stats_bg.h"
107 #include "lock0lock.h"
108 #include "os0event.h"
109 #include "os0proc.h"
110 #include "pars0pars.h"
111 #include "que0que.h"
112 #include "rem0cmp.h"
113 #include "row0ins.h"
114 #include "row0mysql.h"
115 #include "row0row.h"
116 #include "row0sel.h"
117 #include "row0upd.h"
118 #include "srv0tmp.h"
119 #include "trx0purge.h"
120 #include "trx0roll.h"
121 #include "trx0rseg.h"
122 #include "usr0sess.h"
123 #include "ut0crc32.h"
124 #include "ut0new.h"
125 #include "xb0xb.h"
126
127 /** fil_space_t::flags for hard-coded tablespaces */
128 extern uint32_t predefined_flags;
129
130 /** Recovered persistent metadata */
131 static MetadataRecover *srv_dict_metadata;
132
133 /** TRUE if a raw partition is in use */
134 ibool srv_start_raw_disk_in_use = FALSE;
135
136 /** Number of IO threads to use */
137 ulint srv_n_file_io_threads = 0;
138
139 /** true if the server is being started */
140 bool srv_is_being_started = false;
141 /** true if SYS_TABLESPACES is available for lookups */
142 bool srv_sys_tablespaces_open = false;
143 /** true if the server is being started, before rolling back any
144 incomplete transactions */
145 bool srv_startup_is_before_trx_rollback_phase = false;
146 /** true if srv_start() has been called */
147 static bool srv_start_has_been_called = false;
148
149 /** Bit flags for tracking background thread creation. They are used to
150 determine which threads need to be stopped if we need to abort during
151 the initialisation step. */
152 enum srv_start_state_t {
153 SRV_START_STATE_NONE = 0, /*!< No thread started */
154 SRV_START_STATE_LOCK_SYS = 1, /*!< Started lock-timeout
155 thread. */
156 SRV_START_STATE_IO = 2, /*!< Started IO threads */
157 SRV_START_STATE_MONITOR = 4, /*!< Started montior thread */
158 SRV_START_STATE_MASTER = 8, /*!< Started master threadd. */
159 SRV_START_STATE_PURGE = 16, /*!< Started purge thread(s) */
160 SRV_START_STATE_STAT = 32 /*!< Started bufdump + dict stat
161 and FTS optimize thread. */
162 };
163
164 /** Track server thrd starting phases */
165 static uint64_t srv_start_state = SRV_START_STATE_NONE;
166
167 std::atomic<enum srv_shutdown_t> srv_shutdown_state{SRV_SHUTDOWN_NONE};
168
169 /** Files comprising the system tablespace */
170 static pfs_os_file_t files[1000];
171
172 /** Name of srv_monitor_file */
173 static char *srv_monitor_file_name;
174
175 /** */
176 #define SRV_MAX_N_PENDING_SYNC_IOS 100
177
178 /* Keys to register InnoDB threads with performance schema */
179 #ifdef UNIV_PFS_THREAD
180 mysql_pfs_key_t log_archiver_thread_key;
181 mysql_pfs_key_t page_archiver_thread_key;
182 mysql_pfs_key_t buf_dump_thread_key;
183 mysql_pfs_key_t buf_resize_thread_key;
184 mysql_pfs_key_t clone_ddl_thread_key;
185 mysql_pfs_key_t clone_gtid_thread_key;
186 mysql_pfs_key_t dict_stats_thread_key;
187 mysql_pfs_key_t fts_optimize_thread_key;
188 mysql_pfs_key_t fts_parallel_merge_thread_key;
189 mysql_pfs_key_t fts_parallel_tokenization_thread_key;
190 mysql_pfs_key_t io_handler_thread_key;
191 mysql_pfs_key_t io_ibuf_thread_key;
192 mysql_pfs_key_t io_log_thread_key;
193 mysql_pfs_key_t io_read_thread_key;
194 mysql_pfs_key_t io_write_thread_key;
195 mysql_pfs_key_t srv_error_monitor_thread_key;
196 mysql_pfs_key_t srv_lock_timeout_thread_key;
197 mysql_pfs_key_t srv_master_thread_key;
198 mysql_pfs_key_t srv_monitor_thread_key;
199 mysql_pfs_key_t srv_purge_thread_key;
200 mysql_pfs_key_t srv_worker_thread_key;
201 mysql_pfs_key_t trx_recovery_rollback_thread_key;
202 mysql_pfs_key_t srv_ts_alter_encrypt_thread_key;
203 #endif /* UNIV_PFS_THREAD */
204
205 #ifdef HAVE_PSI_STAGE_INTERFACE
206 /** Array of all InnoDB stage events for monitoring activities via
207 performance schema. */
208 static PSI_stage_info *srv_stages[] = {
209 &srv_stage_alter_table_end,
210 &srv_stage_alter_table_flush,
211 &srv_stage_alter_table_insert,
212 &srv_stage_alter_table_log_index,
213 &srv_stage_alter_table_log_table,
214 &srv_stage_alter_table_merge_sort,
215 &srv_stage_alter_table_read_pk_internal_sort,
216 &srv_stage_alter_tablespace_encryption,
217 &srv_stage_buffer_pool_load,
218 &srv_stage_clone_file_copy,
219 &srv_stage_clone_redo_copy,
220 &srv_stage_clone_page_copy,
221 };
222 #endif /* HAVE_PSI_STAGE_INTERFACE */
223
224 /** Sleep time in loops which wait for pending tasks during shutdown. */
225 static constexpr uint32_t SHUTDOWN_SLEEP_TIME_US = 100;
226
227 /** Number of wait rounds during shutdown, after which error is produced,
228 or other policy for timed out wait is applied. */
229 static constexpr uint32_t SHUTDOWN_SLEEP_ROUNDS =
230 60 * 1000 * 1000 / SHUTDOWN_SLEEP_TIME_US;
231
232 /** Check if a file can be opened in read-write mode.
233 @return true if it doesn't exist or can be opened in rw mode. */
srv_file_check_mode(const char * name)234 static bool srv_file_check_mode(const char *name) /*!< in: filename to check */
235 {
236 os_file_stat_t stat;
237
238 memset(&stat, 0x0, sizeof(stat));
239
240 dberr_t err = os_file_get_status(name, &stat, true, srv_read_only_mode);
241
242 if (err == DB_FAIL) {
243 ib::error(ER_IB_MSG_1058, name);
244 return (false);
245
246 } else if (err == DB_SUCCESS) {
247 /* Note: stat.rw_perm is only valid on files */
248
249 if (stat.type == OS_FILE_TYPE_FILE) {
250 /* rw_perm is true if it can be opened in
251 srv_read_only_mode mode. */
252 if (!stat.rw_perm) {
253 const char *mode = srv_read_only_mode ? "read" : "read-write";
254
255 ib::error(ER_IB_MSG_1059, name, mode);
256 return (false);
257 }
258 } else {
259 /* Not a regular file, bail out. */
260 ib::error(ER_IB_MSG_1060, name);
261
262 return (false);
263 }
264 } else {
265 /* This is OK. If the file create fails on RO media, there
266 is nothing we can do. */
267
268 ut_a(err == DB_NOT_FOUND);
269 }
270
271 return (true);
272 }
273
274 /** I/o-handler thread function.
275 @param[in] segment The AIO segment the thread will work on */
io_handler_thread(ulint segment)276 static void io_handler_thread(ulint segment) {
277 while (srv_shutdown_state.load() != SRV_SHUTDOWN_EXIT_THREADS ||
278 buf_flush_page_cleaner_is_active() || !os_aio_all_slots_free()) {
279 fil_aio_wait(segment);
280 }
281 }
282
283 /** Creates a log file.
284 @return DB_SUCCESS or error code */
285 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
create_log_file(pfs_os_file_t * file,const char * name)286 create_log_file(pfs_os_file_t *file, /*!< out: file handle */
287 const char *name) /*!< in: log file name */
288 {
289 bool ret;
290
291 *file = os_file_create(innodb_log_file_key, name,
292 OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
293 OS_FILE_NORMAL, OS_LOG_FILE, srv_read_only_mode, &ret);
294
295 if (!ret) {
296 ib::error(ER_IB_MSG_1061, name);
297 return (DB_ERROR);
298 }
299
300 auto size = srv_log_file_size >> 20;
301
302 ib::info(ER_IB_MSG_CREATE_LOG_FILE, name);
303
304 #ifdef UNIV_DEBUG_DEDICATED
305 if (srv_dedicated_server && strstr(name, "ib_logfile101") == 0) {
306 auto tmp_size = srv_buf_pool_min_size >> (20 - UNIV_PAGE_SIZE_SHIFT);
307 ret = os_file_set_size(name, *file, 0, tmp_size, srv_read_only_mode, true);
308 ret = os_file_close(*file);
309 return (DB_SUCCESS);
310 }
311 #endif /* UNIV_DEBUG_DEDICATED */
312
313 ret = os_file_set_size_fast(name, *file, 0, (os_offset_t)srv_log_file_size,
314 srv_read_only_mode, true);
315
316 if (!ret) {
317 ib::error(ER_IB_MSG_1063, name, size);
318
319 /* Delete incomplete file if OOM */
320 if (os_has_said_disk_full) {
321 ret = os_file_close(*file);
322 ut_a(ret);
323 os_file_delete(innodb_log_file_key, name);
324 }
325
326 return (DB_ERROR);
327 }
328
329 ret = os_file_close(*file);
330 ut_a(ret);
331
332 return (DB_SUCCESS);
333 }
334
335 /** Initial number of the first redo log file */
336 #define INIT_LOG_FILE0 (SRV_N_LOG_FILES_MAX + 1)
337
338 /** Creates all log files.
339 @param[in,out] logfilename buffer for log file name
340 @param[in] dirnamelen length of the directory path
341 @param[in] lsn FIL_PAGE_FILE_FLUSH_LSN value
342 @param[in] num_old_files number of old redo log files to remove
343 @param[out] logfile0 name of the first log file
344 @param[out] checkpoint_lsn lsn of the first created checkpoint
345 @return DB_SUCCESS or error code */
create_log_files(char * logfilename,size_t dirnamelen,lsn_t lsn,uint32_t num_old_files,char * & logfile0,lsn_t & checkpoint_lsn)346 static dberr_t create_log_files(char *logfilename, size_t dirnamelen, lsn_t lsn,
347 uint32_t num_old_files, char *&logfile0,
348 lsn_t &checkpoint_lsn) {
349 dberr_t err;
350
351 if (srv_read_only_mode) {
352 ib::error(ER_IB_MSG_1064);
353 return (DB_READ_ONLY);
354 }
355
356 if (num_old_files < INIT_LOG_FILE0) {
357 num_old_files = INIT_LOG_FILE0;
358 }
359
360 /* Remove any old log files. */
361 for (unsigned i = 0; i <= num_old_files; i++) {
362 sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
363
364 /* Ignore errors about non-existent files or files
365 that cannot be removed. The create_log_file() will
366 return an error when the file exists. */
367 #ifdef _WIN32
368 DeleteFile((LPCTSTR)logfilename);
369 #else
370 unlink(logfilename);
371 #endif /* _WIN32 */
372 /* Crashing after deleting the first
373 file should be recoverable. The buffer
374 pool was clean, and we can simply create
375 all log files from the scratch. */
376 RECOVERY_CRASH(6);
377 }
378
379 ut_ad(!buf_pool_check_no_pending_io());
380
381 RECOVERY_CRASH(7);
382
383 for (unsigned i = 0; i < srv_n_log_files; i++) {
384 sprintf(logfilename + dirnamelen, "ib_logfile%u", i ? i : INIT_LOG_FILE0);
385
386 err = create_log_file(&files[i], logfilename);
387
388 if (err != DB_SUCCESS) {
389 return (err);
390 }
391 }
392
393 RECOVERY_CRASH(8);
394
395 /* We did not create the first log file initially as
396 ib_logfile0, so that crash recovery cannot find it until it
397 has been completed and renamed. */
398 sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
399
400 /* Disable the doublewrite buffer for log files, not required */
401
402 fil_space_t *log_space = fil_space_create(
403 "innodb_redo_log", dict_sys_t::s_log_space_first_id,
404 fsp_flags_set_page_size(0, univ_page_size), FIL_TYPE_LOG);
405
406 ut_ad(fil_validate());
407 ut_a(log_space != nullptr);
408
409 /* Once the redo log is set to be encrypted,
410 initialize encryption information. */
411 if (srv_redo_log_encrypt) {
412 #if !defined(XTRABACKUP)
413 if (!Encryption::check_keyring()) {
414 ib::error(ER_IB_MSG_1065);
415
416 return (DB_ERROR);
417 }
418 #endif
419
420 fsp_flags_set_encryption(log_space->flags);
421 err = fil_set_encryption(log_space->id, Encryption::AES, nullptr, nullptr);
422 ut_ad(err == DB_SUCCESS);
423 if (use_dumped_tablespace_keys && !srv_backup_mode) {
424 xb_insert_tablespace_key(log_space->id, log_space->encryption_key,
425 log_space->encryption_iv);
426 }
427 }
428
429 const ulonglong file_pages = srv_log_file_size / UNIV_PAGE_SIZE;
430
431 logfile0 = fil_node_create(logfilename, static_cast<page_no_t>(file_pages),
432 log_space, false, false);
433
434 ut_a(logfile0 != nullptr);
435
436 for (unsigned i = 1; i < srv_n_log_files; i++) {
437 sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
438
439 if (fil_node_create(logfilename, static_cast<page_no_t>(file_pages),
440 log_space, false, false) == nullptr) {
441 ib::error(ER_IB_MSG_1066, logfilename);
442
443 return (DB_ERROR);
444 }
445 }
446
447 if (!log_sys_init(srv_n_log_files, srv_log_file_size,
448 dict_sys_t::s_log_space_first_id)) {
449 return (DB_ERROR);
450 }
451
452 ut_a(log_sys != nullptr);
453
454 fil_open_log_and_system_tablespace_files();
455
456 /* Create the first checkpoint and flush headers of the first log
457 file (the flushed headers store information about the checkpoint,
458 format of redo log and that it is not created by mysqlbackup). */
459
460 /* We start at the next log block. Note, that we keep invariant,
461 that start lsn stored in header of the first log file is divisble
462 by OS_FILE_LOG_BLOCK_SIZE. */
463 lsn = ut_uint64_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE);
464
465 /* Checkpoint lsn should be outside header of log block. */
466 lsn += LOG_BLOCK_HDR_SIZE;
467
468 log_create_first_checkpoint(*log_sys, lsn);
469 checkpoint_lsn = lsn;
470
471 /* Write encryption information into the first log file header
472 if redo log is set with encryption. */
473 if (FSP_FLAGS_GET_ENCRYPTION(log_space->flags) &&
474 !log_write_encryption(log_space->encryption_key, log_space->encryption_iv,
475 true)) {
476 return (DB_ERROR);
477 }
478
479 /* Note that potentially some log files are still unflushed.
480 However it does not matter, because ib_logfile0 is not present
481 Before renaming ib_logfile101 to ib_logfile0, log files have
482 to be flushed. We could postpone that to just before the rename,
483 as we possibly will write some log records before doing the rename.
484
485 However OS could anyway do the flush, and we prefer to minimize
486 possible scenarios. Hence, to make situation more deterministic,
487 we do the fsyncs now unconditionally and repeat the required
488 flush just before the rename. */
489 fil_flush_file_redo();
490
491 return (DB_SUCCESS);
492 }
493
494 /** Renames the first log file. */
create_log_files_rename(char * logfilename,size_t dirnamelen,lsn_t lsn,char * logfile0)495 static void create_log_files_rename(
496 char *logfilename, /*!< in/out: buffer for log file name */
497 size_t dirnamelen, /*!< in: length of the directory path */
498 lsn_t lsn, /*!< in: checkpoint lsn (and start lsn) */
499 char *logfile0) /*!< in/out: name of the first log file */
500 {
501 /* If innodb_flush_method=O_DSYNC,
502 we need to explicitly flush the log buffers. */
503
504 /* Note that we need to have fsync performed for the created files.
505 This is the moment we do it. Keep in mind that fil_close_log_files()
506 ensures there are no unflushed modifications in the files. */
507 fil_flush_file_redo();
508
509 /* Close the log files, so that we can rename
510 the first one. */
511 fil_close_log_files(false);
512
513 /* Rename the first log file, now that a log
514 checkpoint has been created. */
515 sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
516
517 RECOVERY_CRASH(9);
518
519 ib::info(ER_IB_MSG_1067, logfile0, logfilename);
520
521 ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
522 bool success = os_file_rename(innodb_log_file_key, logfile0, logfilename);
523 ut_a(success);
524
525 RECOVERY_CRASH(10);
526
527 /* Replace the first file with ib_logfile0. */
528 strcpy(logfile0, logfilename);
529
530 fil_open_log_and_system_tablespace_files();
531
532 /* For cloned database it is normal to resize redo logs. */
533 ib::info(ER_IB_MSG_1068, ulonglong{lsn});
534 }
535
536 /** Opens a log file.
537 @return DB_SUCCESS or error code */
538 static MY_ATTRIBUTE((warn_unused_result)) dberr_t
open_log_file(pfs_os_file_t * file,const char * name,os_offset_t * size)539 open_log_file(pfs_os_file_t *file, /*!< out: file handle */
540 const char *name, /*!< in: log file name */
541 os_offset_t *size) /*!< out: file size */
542 {
543 bool ret;
544
545 *file = os_file_create(innodb_log_file_key, name, OS_FILE_OPEN, OS_FILE_AIO,
546 OS_LOG_FILE, srv_read_only_mode, &ret);
547 if (!ret) {
548 ib::error(ER_IB_MSG_1069, name);
549 return (DB_ERROR);
550 }
551
552 *size = os_file_get_size(*file);
553
554 ret = os_file_close(*file);
555 ut_a(ret);
556 return (DB_SUCCESS);
557 }
558
559 /** Create undo tablespace.
560 @param[in] undo_space Undo Tablespace
561 @return DB_SUCCESS or error code */
srv_undo_tablespace_create(undo::Tablespace & undo_space)562 static dberr_t srv_undo_tablespace_create(undo::Tablespace &undo_space) {
563 pfs_os_file_t fh;
564 bool ret;
565 dberr_t err = DB_SUCCESS;
566 char *file_name = undo_space.file_name();
567 space_id_t space_id = undo_space.id();
568
569 ut_a(!srv_read_only_mode);
570 ut_a(!srv_force_recovery);
571
572 os_file_create_subdirs_if_needed(file_name);
573
574 /* Until this undo tablespace can become active, keep a truncate log
575 file around so that if a crash happens it can be rebuilt at startup. */
576 err = undo::start_logging(&undo_space);
577 if (err != DB_SUCCESS) {
578 ib::error(ER_IB_MSG_1070, undo_space.log_file_name(),
579 undo_space.space_name());
580 }
581 ut_ad(err == DB_SUCCESS);
582
583 fh = os_file_create(innodb_data_file_key, file_name,
584 (srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE) |
585 OS_FILE_ON_ERROR_NO_EXIT,
586 OS_FILE_NORMAL, OS_DATA_FILE, srv_read_only_mode, &ret);
587
588 if (ret == FALSE) {
589 std::ostringstream stmt;
590
591 if (os_file_get_last_error(false) == OS_FILE_ALREADY_EXISTS) {
592 stmt << " since '" << file_name << "' already exists.";
593 } else {
594 stmt << ". os_file_create() returned " << ret << ".";
595 }
596
597 ib::error(ER_IB_MSG_1214, undo_space.space_name(), stmt.str().c_str());
598
599 err = DB_ERROR;
600 } else {
601 ut_a(!srv_read_only_mode);
602
603 /* We created the data file and now write it full of zeros */
604 undo_space.set_new();
605
606 ib::info(ER_IB_MSG_1071, file_name);
607
608 ulint size_mb =
609 SRV_UNDO_TABLESPACE_SIZE_IN_PAGES << UNIV_PAGE_SIZE_SHIFT >> 20;
610
611 ib::info(ER_IB_MSG_1072, file_name, ulonglong{size_mb});
612
613 ib::info(ER_IB_MSG_1073);
614
615 ret = os_file_set_size(
616 file_name, fh, 0,
617 SRV_UNDO_TABLESPACE_SIZE_IN_PAGES << UNIV_PAGE_SIZE_SHIFT,
618 srv_read_only_mode, true);
619
620 DBUG_EXECUTE_IF("ib_undo_tablespace_create_fail", ret = false;);
621
622 if (!ret) {
623 ib::info(ER_IB_MSG_1074, file_name);
624 err = DB_OUT_OF_FILE_SPACE;
625 }
626
627 os_file_close(fh);
628
629 /* Add this space to the list of undo tablespaces to
630 construct by creating header pages. If an old undo
631 tablespace needed fixup before it is upgraded,
632 there is no need to construct it.*/
633 if (undo::is_reserved(space_id)) {
634 undo::add_space_to_construction_list(space_id);
635 }
636 }
637
638 return (err);
639 }
640
641 /** Try to enable encryption of an undo log tablespace.
642 @param[in] space_id undo tablespace id
643 @return DB_SUCCESS if success */
srv_undo_tablespace_enable_encryption(space_id_t space_id)644 static dberr_t srv_undo_tablespace_enable_encryption(space_id_t space_id) {
645 dberr_t err;
646
647 ut_ad(Encryption::check_keyring());
648
649 /* Set the space flag. The encryption metadata
650 will be generated in fsp_header_init later. */
651 fil_space_t *space = fil_space_get(space_id);
652 if (!FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
653 fsp_flags_set_encryption(space->flags);
654 err = fil_set_encryption(space_id, Encryption::AES, nullptr, nullptr);
655 if (err != DB_SUCCESS) {
656 ib::error(ER_IB_MSG_1075, space->name);
657 return (err);
658 }
659 }
660
661 return (DB_SUCCESS);
662 }
663
664 /** Try to read encryption metadata from an undo tablespace.
665 @param[in] fh file handle of undo log file
666 @param[in] file_name file name
667 @param[in] space undo tablespace
668 @return DB_SUCCESS if success */
srv_undo_tablespace_read_encryption(pfs_os_file_t fh,const char * file_name,fil_space_t * space)669 static dberr_t srv_undo_tablespace_read_encryption(pfs_os_file_t fh,
670 const char *file_name,
671 fil_space_t *space) {
672 IORequest request;
673 ulint n_read = 0;
674 size_t page_size = UNIV_PAGE_SIZE_MAX;
675 dberr_t err = DB_ERROR;
676
677 byte *first_page_buf =
678 static_cast<byte *>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE_MAX));
679 /* Align the memory for a possible read from a raw device */
680 byte *first_page =
681 static_cast<byte *>(ut_align(first_page_buf, UNIV_PAGE_SIZE));
682
683 /* Don't want unnecessary complaints about partial reads. */
684 request.disable_partial_io_warnings();
685
686 err = os_file_read_no_error_handling(request, file_name, fh, first_page, 0,
687 page_size, &n_read);
688
689 if (err != DB_SUCCESS) {
690 ib::info(ER_IB_MSG_1076, space->name, ut_strerr(err));
691 ut_free(first_page_buf);
692 return (err);
693 }
694
695 ulint offset;
696 const page_size_t space_page_size(space->flags);
697
698 offset = fsp_header_get_encryption_offset(space_page_size);
699 ut_ad(offset);
700
701 /* Return if the encryption metadata is empty. */
702 if (memcmp(first_page + offset, Encryption::KEY_MAGIC_V3,
703 Encryption::MAGIC_SIZE) != 0) {
704 ut_free(first_page_buf);
705 return (DB_SUCCESS);
706 }
707
708 if (!use_dumped_tablespace_keys || srv_backup_mode) {
709 byte key[Encryption::KEY_LEN];
710 byte iv[Encryption::KEY_LEN];
711 if (fsp_header_get_encryption_key(space->flags, key, iv, first_page)) {
712 fsp_flags_set_encryption(space->flags);
713 err = fil_set_encryption(space->id, Encryption::AES, key, iv);
714 ut_ad(err == DB_SUCCESS);
715 } else {
716 ut_free(first_page_buf);
717 return (DB_FAIL);
718 }
719 } else {
720 err = xb_set_encryption(space);
721 if (err != DB_SUCCESS) {
722 ut_free(first_page_buf);
723 return (DB_FAIL);
724 }
725 }
726
727 ut_free(first_page_buf);
728
729 return (DB_SUCCESS);
730 }
731
732 /** Fix up a v5.7 type undo tablespace that was being truncated.
733 The space_id is not a reserved undo space_id. We will just delete
734 the file since it will be replaced.
735 @param[in] space_id Tablespace ID
736 @return error code */
srv_undo_tablespace_fixup_57(space_id_t space_id)737 static dberr_t srv_undo_tablespace_fixup_57(space_id_t space_id) {
738 space_id_t space_num = undo::id2num(space_id);
739 ut_ad(space_num == space_id);
740 if (undo::is_active_truncate_log_present(space_num)) {
741 ib::info(ER_IB_MSG_1077, ulong{space_num});
742
743 if (srv_read_only_mode) {
744 ib::error(ER_IB_MSG_1078);
745 return (DB_READ_ONLY);
746 }
747
748 undo::Tablespace undo_space(space_id);
749
750 /* Flush any changes recovered in REDO */
751 fil_flush(space_id);
752 fil_space_close(space_id);
753
754 os_file_delete_if_exists(innodb_data_file_key, undo_space.file_name(),
755 nullptr);
756
757 return (DB_TABLESPACE_DELETED);
758 }
759
760 return (DB_SUCCESS);
761 }
762
763 /** Start the fix-up process on an undo tablespace if it was in the process
764 of being truncated when the server crashed. At this point, just delete the
765 old file if it exists.
766 We could do the whole reconstruction here for implicit undo spaces since we
767 know the space_id, space_name, and file_name implicitly. But for explicit
768 undo spaces, we must wait for the DD to be scanned in boot_tablespaces()
769 in order to know the space_id, space_name, and file_name.
770 @param[in] space_num undo tablespace number
771 @return error code */
srv_undo_tablespace_fixup_num(space_id_t space_num)772 static dberr_t srv_undo_tablespace_fixup_num(space_id_t space_num) {
773 if (!undo::is_active_truncate_log_present(space_num)) {
774 return (DB_SUCCESS);
775 }
776
777 ib::info(ER_IB_MSG_1077, ulong{space_num});
778
779 if (srv_read_only_mode) {
780 ib::error(ER_IB_MSG_1078);
781 return (DB_READ_ONLY);
782 }
783
784 /*
785 Search for a file that is using any of the space IDs assigned to this
786 undo number. The directory scan assured that there are no duplicate files
787 with the same space_id or with the same undo space number.
788 */
789 space_id_t space_id = SPACE_UNKNOWN;
790 std::string scanned_name;
791 for (size_t ndx = 0;
792 ndx < dict_sys_t::undo_space_id_range && scanned_name.length() == 0;
793 ndx++) {
794 space_id = undo::num2id(space_num, ndx);
795
796 scanned_name = fil_system_open_fetch(space_id);
797 }
798
799 /* If the previous file still exists, delete it. */
800 if (scanned_name.length() > 0) {
801 /* Flush any changes recovered in REDO */
802 fil_flush(space_id);
803 fil_space_close(space_id);
804 os_file_delete_if_exists(innodb_data_file_key, scanned_name.c_str(),
805 nullptr);
806
807 } else if (space_num < FSP_IMPLICIT_UNDO_TABLESPACES) {
808 /* If there is any file with the implicit file name, delete it. */
809 undo::Tablespace undo_space(undo::num2id(space_num, 0));
810 os_file_delete_if_exists(innodb_data_file_key, undo_space.file_name(),
811 nullptr);
812 }
813
814 return (DB_SUCCESS);
815 }
816
817 /** Fix up an undo tablespace if it was in the process of being truncated
818 when the server crashed. This is the second call and is done after the DD
819 is available so now we know the space_name, file_name and previous space_id.
820 @param[in] space_name undo tablespace name
821 @param[in] file_name undo tablespace file name
822 @param[in] space_id undo tablespace ID
823 @return error code */
srv_undo_tablespace_fixup(const char * space_name,const char * file_name,space_id_t space_id)824 dberr_t srv_undo_tablespace_fixup(const char *space_name, const char *file_name,
825 space_id_t space_id) {
826 ut_ad(fsp_is_undo_tablespace(space_id));
827
828 space_id_t space_num = undo::id2num(space_id);
829 if (!undo::is_active_truncate_log_present(space_num)) {
830 return (DB_SUCCESS);
831 }
832
833 if (srv_read_only_mode) {
834 return (DB_READ_ONLY);
835 }
836
837 ib::info(ER_IB_MSG_1079, ulong{space_num});
838
839 /* It is possible for an explicit undo tablespace to have been truncated and
840 recreated but not yet written with a header page when a crash occurred. In
841 this case, the empty file would not have been scanned at startup and the
842 first call to fixup did not know the filename. Now that we know it, just
843 delete any file with that name if it exists. The dictionary claims it is
844 an undo tablespace and there is a truncate log file present. */
845 os_file_delete_if_exists(innodb_data_file_key, file_name, nullptr);
846
847 /* Mark the space_id for this undo tablespace number as in-use. */
848 undo::spaces->x_lock();
849 undo::unuse_space_id(space_id);
850 space_id_t new_space_id = undo::next_space_id(space_id);
851 undo::use_space_id(new_space_id);
852 undo::spaces->x_unlock();
853
854 dberr_t err = srv_undo_tablespace_create(space_name, file_name, new_space_id);
855 if (err != DB_SUCCESS) {
856 return (err);
857 }
858
859 /* Update the DD with the new space ID and state. */
860 undo::spaces->s_lock();
861 undo::Tablespace *undo_space = undo::spaces->find(space_num);
862 dd_space_states to_state;
863 if (undo_space->is_inactive_explicit()) {
864 to_state = DD_SPACE_STATE_EMPTY;
865 undo_space->set_empty();
866 } else {
867 to_state = DD_SPACE_STATE_ACTIVE;
868 undo_space->set_active();
869 }
870 undo::spaces->s_unlock();
871
872 bool dd_result = dd_tablespace_get_mdl(space_name);
873 if (dd_result == DD_SUCCESS) {
874 dd_result =
875 dd_tablespace_set_id_and_state(space_name, new_space_id, to_state);
876 }
877 if (dd_result != DD_SUCCESS) {
878 err = DB_ERROR;
879 }
880
881 return (err);
882 }
883
884 /** Open an undo tablespace.
885 @param[in] undo_space Undo tablespace
886 @return DB_SUCCESS or error code */
srv_undo_tablespace_open(undo::Tablespace & undo_space)887 dberr_t srv_undo_tablespace_open(undo::Tablespace &undo_space) {
888 DBUG_EXECUTE_IF("ib_undo_tablespace_open_fail",
889 return (DB_CANNOT_OPEN_FILE););
890
891 pfs_os_file_t fh;
892 bool success;
893 uint32_t flags;
894 bool atomic_write;
895 dberr_t err = DB_ERROR;
896 space_id_t space_id = undo_space.id();
897 char *undo_name = undo_space.space_name();
898 char *file_name = undo_space.file_name();
899
900 /* Check if it was already opened during redo recovery. */
901 fil_space_t *space = fil_space_get(space_id);
902
903 /* Flush and close any current file handle so we can open
904 a local one below. */
905 if (space != nullptr) {
906 fil_flush(space_id);
907 fil_space_close(space_id);
908 }
909
910 if (!srv_file_check_mode(file_name)) {
911 ib::error(ER_IB_MSG_1081, file_name,
912 srv_read_only_mode ? "readable!" : "writable!");
913
914 return (DB_READ_ONLY);
915 }
916
917 /* Open a local handle. */
918 fh = os_file_create(
919 innodb_data_file_key, file_name,
920 OS_FILE_OPEN_RETRY | OS_FILE_ON_ERROR_NO_EXIT | OS_FILE_ON_ERROR_SILENT,
921 OS_FILE_NORMAL, OS_DATA_FILE, srv_read_only_mode, &success);
922 if (!success) {
923 return (DB_CANNOT_OPEN_FILE);
924 }
925
926 /* Check if this file supports atomic write. */
927 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
928 if (!dblwr::enabled) {
929 atomic_write = fil_fusionio_enable_atomic_write(fh);
930 } else {
931 atomic_write = false;
932 }
933 #else
934 atomic_write = false;
935 #endif /* !NO_FALLOCATE && UNIV_LINUX */
936
937 if (space == nullptr) {
938 /* Load the tablespace into InnoDB's internal data structures.
939 Set the compressed page size to 0 (non-compressed) */
940 flags = fsp_flags_init(univ_page_size, false, false, false, false);
941 space = fil_space_create(undo_name, space_id, flags, FIL_TYPE_TABLESPACE);
942 ut_a(space != nullptr);
943 ut_ad(fil_validate());
944
945 os_offset_t size = os_file_get_size(fh);
946 ut_a(size != (os_offset_t)-1);
947 page_no_t n_pages = static_cast<page_no_t>(size / UNIV_PAGE_SIZE);
948
949 if (fil_node_create(file_name, n_pages, space, false, atomic_write) ==
950 nullptr) {
951 os_file_close(fh);
952
953 ib::error(ER_IB_MSG_1082, undo_name);
954
955 return (DB_ERROR);
956 }
957
958 } else {
959 auto &file = space->files.front();
960
961 file.atomic_write = atomic_write;
962 }
963
964 /* Read the encryption metadata in this undo tablespace.
965 If the encryption info in the first page cannot be decrypted
966 by the master key, this table cannot be opened. */
967 err = srv_undo_tablespace_read_encryption(fh, file_name, space);
968
969 /* The file handle will no longer be needed. */
970 success = os_file_close(fh);
971 ut_ad(success);
972
973 if (err != DB_SUCCESS) {
974 ib::error(ER_IB_MSG_1083, undo_name);
975 return (err);
976 }
977
978 /* Now that space and node exist, make sure this undo tablespace
979 is open so that it stays open until shutdown.
980 But if it is under construction, we cannot open it until the
981 header page has been written. */
982 if (!undo::is_under_construction(space_id)) {
983 bool success = fil_space_open(space_id);
984 ut_a(success);
985 }
986
987 if (undo::is_reserved(space_id)) {
988 undo::spaces->add(undo_space);
989 }
990
991 return (DB_SUCCESS);
992 }
993
994 /** Open an undo tablespace with a specified space_id.
995 @param[in] space_id tablespace ID
996 @return DB_SUCCESS or error code */
srv_undo_tablespace_open_by_id(space_id_t space_id)997 static dberr_t srv_undo_tablespace_open_by_id(space_id_t space_id) {
998 undo::Tablespace undo_space(space_id);
999
1000 /* See if the name found in the file map for this undo space_id
1001 is the standard name. The directory scan assured that there are
1002 no duplicates. The filename found must match the standard name
1003 if this is an implicit undo tablespace. In other words, implicit
1004 undo tablespaces must be found in srv_undo_dir. */
1005 std::string scanned_name = fil_system_open_fetch(space_id);
1006
1007 if (scanned_name.length() != 0 &&
1008 !Fil_path::is_same_as(undo_space.file_name(), scanned_name.c_str())) {
1009 ib::error(ER_IB_MSG_FOUND_WRONG_UNDO_SPACE, undo_space.file_name(),
1010 ulong{space_id}, scanned_name.c_str());
1011
1012 return (DB_WRONG_FILE_NAME);
1013 }
1014
1015 return (srv_undo_tablespace_open(undo_space));
1016 }
1017
1018 /** Open an undo tablespace with a specified undo number.
1019 @param[in] space_num undo tablespace number
1020 @return DB_SUCCESS or error code */
srv_undo_tablespace_open_by_num(space_id_t space_num)1021 static dberr_t srv_undo_tablespace_open_by_num(space_id_t space_num) {
1022 space_id_t space_id = SPACE_UNKNOWN;
1023 size_t ndx;
1024 std::string scanned_name;
1025
1026 /* Search for a file that is using any of the space IDs assigned to this
1027 undo number. The directory scan assured that there are no duplicate files
1028 with the same space_id or with the same undo space number. */
1029 for (ndx = 0;
1030 ndx < dict_sys_t::undo_space_id_range && scanned_name.length() == 0;
1031 ndx++) {
1032 space_id = undo::num2id(space_num, ndx);
1033
1034 scanned_name = fil_system_open_fetch(space_id);
1035 }
1036 if (scanned_name.length() == 0) {
1037 return (DB_CANNOT_OPEN_FILE);
1038 }
1039
1040 undo::Tablespace undo_space(space_id);
1041
1042 /* The first 2 undo space numbers must be implicit. v8.0.12 used
1043 innodb_undo_tablespaces to implicitly create undo spaces. */
1044 bool is_default = (space_num <= FSP_IMPLICIT_UNDO_TABLESPACES);
1045
1046 /* v8.0.12 used innodb_undo_tablespaces to implicitly create undo
1047 spaces so there may be more than 2 implicit undo tablespaces. They
1048 must match the default undo filename and must be found in
1049 srv_undo_directory. */
1050 bool has_implicit_name =
1051 Fil_path::is_same_as(undo_space.file_name(), scanned_name.c_str());
1052
1053 if (is_default || has_implicit_name) {
1054 if (!has_implicit_name) {
1055 ib::info(ER_IB_MSG_1080, undo_space.file_name(), scanned_name.c_str(),
1056 ulong{space_id});
1057
1058 return (DB_WRONG_FILE_NAME);
1059 }
1060
1061 } else {
1062 /* Explicit undo tablespaces must end with the suffix '.ibu'. */
1063 if (!Fil_path::has_suffix(IBU, scanned_name)) {
1064 ib::info(ER_IB_MSG_NOT_END_WITH_IBU, scanned_name.c_str());
1065
1066 return (DB_WRONG_FILE_NAME);
1067 }
1068
1069 /* Use the file name found in the scan. */
1070 undo_space.set_file_name(scanned_name.c_str());
1071 }
1072
1073 /* Mark the space_id for this undo tablespace number as in-use. */
1074 undo::use_space_id(space_id);
1075
1076 ib::info(ER_IB_MSG_USING_UNDO_SPACE, scanned_name.c_str());
1077
1078 return (srv_undo_tablespace_open(undo_space));
1079 }
1080
1081 /* Open existing undo tablespaces up to the number in target_undo_tablespace.
1082 If we are making a new database, these have been created.
1083 If doing recovery, these should exist and may be needed for recovery.
1084 If we fail to open any of these it is a fatal error.
1085 @return DB_SUCCESS or error code */
srv_undo_tablespaces_open(bool backup_mode)1086 static dberr_t srv_undo_tablespaces_open(bool backup_mode) {
1087 dberr_t err;
1088
1089 if (!backup_mode) {
1090 /* If upgrading from 5.7, build a list of existing undo tablespaces
1091 from the references in the TRX_SYS page. (not including the system
1092 tablespace) */
1093 trx_rseg_get_n_undo_tablespaces(trx_sys_undo_spaces);
1094
1095 /* If undo tablespaces are being tracked in trx_sys then these
1096 will need to be replaced by independent undo tablespaces with
1097 reserved space_ids and RSEG_ARRAY pages. */
1098 if (trx_sys_undo_spaces->size() > 0) {
1099 /* Open each undo tablespace tracked in TRX_SYS. */
1100 for (const auto space_id : *trx_sys_undo_spaces) {
1101 fil_set_max_space_id_if_bigger(space_id);
1102
1103 /* Check if this undo tablespace was in the process of being truncated.
1104 If so, just delete the file since it will be replaced. */
1105 if (DB_TABLESPACE_DELETED == srv_undo_tablespace_fixup_57(space_id)) {
1106 continue;
1107 }
1108
1109 err = srv_undo_tablespace_open_by_id(space_id);
1110 if (err != DB_SUCCESS) {
1111 ib::error(ER_IB_MSG_CANNOT_OPEN_57_UNDO, ulong{space_id});
1112 return (err);
1113 }
1114 }
1115 }
1116 }
1117
1118 /* Open all existing implicit and explicit undo tablespaces.
1119 The tablespace scan has completed and the undo::space_id_bank has been
1120 filled with the space Ids that were found. */
1121 undo::spaces->x_lock();
1122 ut_ad(undo::spaces->size() == 0);
1123
1124 for (space_id_t num = 1; num <= FSP_MAX_UNDO_TABLESPACES; ++num) {
1125 /* Check if this undo tablespace was in the
1126 process of being truncated. If so, recreate it
1127 and add it to the construction list. */
1128 dberr_t err = srv_undo_tablespace_fixup_num(num);
1129 if (err != DB_SUCCESS) {
1130 undo::spaces->x_unlock();
1131 return (err);
1132 }
1133
1134 err = srv_undo_tablespace_open_by_num(num);
1135 switch (err) {
1136 case DB_WRONG_FILE_NAME:
1137 /* An Undo tablespace was found where the mapping
1138 file said it was. Now we have a different filename
1139 for it. The undo directory must have changed and
1140 the the files were not moved. Cannot startup. */
1141 case DB_READ_ONLY:
1142 /* The undo tablespace was found where it should be
1143 but it cannot be opened in read/write mode. */
1144 default:
1145 /* The undo tablespace was found where it should be
1146 but it cannot be used. */
1147 undo::spaces->x_unlock();
1148 return (err);
1149
1150 case DB_SUCCESS:
1151
1152 case DB_CANNOT_OPEN_FILE:
1153 /* Doesn't exist, keep looking */
1154 break;
1155 }
1156 }
1157
1158 ulint n_found_new = undo::spaces->size();
1159 ulint n_found_old = trx_sys_undo_spaces->size();
1160 undo::spaces->x_unlock();
1161
1162 if (n_found_old != 0 || n_found_new < FSP_IMPLICIT_UNDO_TABLESPACES) {
1163 std::ostringstream msg;
1164
1165 if (n_found_old != 0) {
1166 msg << "Found " << n_found_old << " undo tablespaces that"
1167 << " need to be upgraded. ";
1168 }
1169
1170 if (n_found_new < FSP_IMPLICIT_UNDO_TABLESPACES) {
1171 msg << "Will create " << (FSP_IMPLICIT_UNDO_TABLESPACES - n_found_new)
1172 << " new undo tablespaces.";
1173 }
1174
1175 ib::info(ER_IB_MSG_1215) << msg.str();
1176 }
1177
1178 if (n_found_new + n_found_old) {
1179 ib::info(ER_IB_MSG_1085, ulonglong{n_found_new + n_found_old});
1180 }
1181
1182 return (DB_SUCCESS);
1183 }
1184
1185 /** Create the implicit undo tablespaces if we are creating a new instance
1186 or if there was not enough implicit undo tablespaces previously existing.
1187 @return DB_SUCCESS or error code */
srv_undo_tablespaces_create()1188 static dberr_t srv_undo_tablespaces_create() {
1189 dberr_t err = DB_SUCCESS;
1190
1191 undo::spaces->x_lock();
1192
1193 ulint initial_implicit_undo_spaces = 0;
1194 for (auto undo_space : undo::spaces->m_spaces) {
1195 if (undo_space->num() <= FSP_IMPLICIT_UNDO_TABLESPACES) {
1196 initial_implicit_undo_spaces++;
1197 }
1198 }
1199
1200 if (initial_implicit_undo_spaces >= FSP_IMPLICIT_UNDO_TABLESPACES) {
1201 undo::spaces->x_unlock();
1202 return (DB_SUCCESS);
1203 }
1204
1205 if (srv_read_only_mode || srv_force_recovery > 0) {
1206 const char *mode;
1207
1208 mode = srv_read_only_mode ? "read_only" : "force_recovery",
1209
1210 ib::warn(ER_IB_MSG_1086, mode, ulonglong{initial_implicit_undo_spaces});
1211
1212 if (initial_implicit_undo_spaces == 0) {
1213 ib::error(ER_IB_MSG_1087, mode);
1214
1215 undo::spaces->x_unlock();
1216 return (DB_ERROR);
1217 }
1218
1219 undo::spaces->x_unlock();
1220 return (DB_SUCCESS);
1221 }
1222
1223 /* Create all implicit undo tablespaces that are needed. */
1224 for (space_id_t num = 1; num <= FSP_IMPLICIT_UNDO_TABLESPACES; ++num) {
1225 /* If the trunc log file is present, the fixup process will be
1226 finished later. */
1227 if (undo::is_active_truncate_log_present(num)) {
1228 continue;
1229 }
1230
1231 /* Check if an independent undo space for this space_id
1232 has already been found. */
1233 if (undo::spaces->contains(num)) {
1234 continue;
1235 }
1236
1237 /* Mark this implicit undo space number as used and return the next
1238 available space_id. */
1239 space_id_t space_id = undo::use_next_space_id(num);
1240
1241 /* Since it is not found, create it. */
1242 undo::Tablespace undo_space(space_id);
1243 undo_space.set_new();
1244 err = srv_undo_tablespace_create(undo_space);
1245 if (err != DB_SUCCESS) {
1246 ib::info(ER_IB_MSG_1088, undo_space.space_name());
1247 break;
1248 }
1249
1250 /* Open this new undo tablespace. */
1251 err = srv_undo_tablespace_open(undo_space);
1252 if (err != DB_SUCCESS) {
1253 ib::info(ER_IB_MSG_1089, int{err}, ut_strerr(err),
1254 undo_space.space_name());
1255
1256 break;
1257 }
1258 }
1259
1260 undo::spaces->x_unlock();
1261
1262 ulint new_spaces =
1263 FSP_IMPLICIT_UNDO_TABLESPACES - initial_implicit_undo_spaces;
1264
1265 ib::info(ER_IB_MSG_1090, ulonglong{new_spaces});
1266
1267 return (err);
1268 }
1269
1270 /** Finish building an undo tablespace. So far these tablespace files in
1271 the construction list should be created and filled with zeros.
1272 @param[in] create_new_db whether to create a new database
1273 @return DB_SUCCESS or error code */
srv_undo_tablespaces_construct(bool create_new_db)1274 static dberr_t srv_undo_tablespaces_construct(bool create_new_db) {
1275 mtr_t mtr;
1276
1277 if (undo::s_under_construction.empty()) {
1278 return (DB_SUCCESS);
1279 }
1280
1281 ut_a(!srv_read_only_mode);
1282 ut_a(!srv_force_recovery);
1283
1284 if (srv_undo_log_encrypt && Encryption::check_keyring() == false) {
1285 my_error(ER_CANNOT_FIND_KEY_IN_KEYRING, MYF(0));
1286 return (DB_ERROR);
1287 }
1288
1289 for (auto space_id : undo::s_under_construction) {
1290 /* Enable undo log encryption if it's ON. */
1291 if (srv_undo_log_encrypt) {
1292 dberr_t err = srv_undo_tablespace_enable_encryption(space_id);
1293
1294 if (err != DB_SUCCESS) {
1295 ib::error(ER_IB_MSG_1091, ulong{undo::id2num(space_id)});
1296
1297 return (err);
1298 }
1299 }
1300
1301 log_free_check();
1302
1303 mtr_start(&mtr);
1304
1305 mtr_x_lock(fil_space_get_latch(space_id), &mtr);
1306
1307 if (!fsp_header_init(space_id, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr,
1308 create_new_db)) {
1309 ib::error(ER_IB_MSG_1093, ulong{undo::id2num(space_id)});
1310
1311 mtr_commit(&mtr);
1312 return (DB_ERROR);
1313 }
1314
1315 /* Add the RSEG_ARRAY page. */
1316 trx_rseg_array_create(space_id, &mtr);
1317
1318 mtr_commit(&mtr);
1319
1320 /* The rollback segments will get created later in
1321 trx_rseg_add_rollback_segments(). */
1322 }
1323
1324 if (srv_undo_log_encrypt) {
1325 ut_d(bool ret =) srv_enable_undo_encryption(false);
1326 ut_ad(!ret);
1327 }
1328
1329 return (DB_SUCCESS);
1330 }
1331
1332 /** Mark the point in which the undo tablespaces in the construction list
1333 are fully constructed and ready to use. */
srv_undo_tablespaces_mark_construction_done()1334 static void srv_undo_tablespaces_mark_construction_done() {
1335 /* Remove the truncate log files if they exist. */
1336 for (auto space_id : undo::s_under_construction) {
1337 /* Flush these pages to disk since they were not redo logged. */
1338 FlushObserver *flush_observer =
1339 UT_NEW_NOKEY(FlushObserver(space_id, nullptr, nullptr));
1340 flush_observer->flush();
1341 UT_DELETE(flush_observer);
1342
1343 space_id_t space_num = undo::id2num(space_id);
1344 if (undo::is_active_truncate_log_present(space_num)) {
1345 undo::done_logging(space_num);
1346 }
1347 }
1348
1349 undo::clear_construction_list();
1350 }
1351
1352 /** Upgrade undo tablespaces by deleting the old undo tablespaces
1353 referenced by the TRX_SYS page.
1354 @return error code */
srv_undo_tablespaces_upgrade()1355 dberr_t srv_undo_tablespaces_upgrade() {
1356 if (trx_sys_undo_spaces->empty()) {
1357 goto cleanup;
1358 }
1359
1360 /* Recovered transactions in the prepared state prevent the old
1361 rsegs and undo tablespaces they are in from being deleted.
1362 These transactions must be either committed or rolled back by
1363 the mysql server.*/
1364 if (trx_sys->n_prepared_trx > 0) {
1365 ib::warn(ER_IB_MSG_1094);
1366 return (DB_SUCCESS);
1367 }
1368
1369 ib::info(ER_IB_MSG_1095, trx_sys_undo_spaces->size(),
1370 ulong{FSP_IMPLICIT_UNDO_TABLESPACES});
1371
1372 /* All Undo Tablespaces found in the TRX_SYS page need to be
1373 deleted. The new independent undo tablespaces were created in
1374 in srv_undo_tablespaces_create() */
1375 for (const auto space_id : *trx_sys_undo_spaces) {
1376 undo::Tablespace undo_space(space_id);
1377
1378 fil_space_close(undo_space.id());
1379
1380 auto err = fil_delete_tablespace(undo_space.id(), BUF_REMOVE_ALL_NO_WRITE);
1381
1382 if (err != DB_SUCCESS) {
1383 ib::warn(ER_IB_MSG_57_UNDO_SPACE_DELETE_FAIL, undo_space.space_name());
1384 }
1385 }
1386
1387 /* Remove the tracking of these undo tablespaces from TRX_SYS page and
1388 trx_sys->rsegs. */
1389 trx_rseg_upgrade_undo_tablespaces();
1390
1391 /* Since we now have new format undo tablespaces, we will no longer
1392 look for undo tablespaces or rollback segments in the TRX_SYS page
1393 or the trx_sys->rsegs vector. */
1394 trx_sys_undo_spaces->clear();
1395
1396 cleanup:
1397 /* Post 5.7 undo tablespaces track their own rsegs.
1398 Clear the list of rsegs in old undo tablespaces. */
1399 trx_sys->rsegs.clear();
1400
1401 return (DB_SUCCESS);
1402 }
1403
1404 /** Downgrade undo tablespaces by deleting the new undo tablespaces which
1405 are not referenced by the TRX_SYS page. */
srv_undo_tablespaces_downgrade()1406 static void srv_undo_tablespaces_downgrade() {
1407 ut_ad(srv_downgrade_logs);
1408
1409 ib::info(ER_IB_MSG_1096, ulonglong{undo::spaces->size()});
1410
1411 /* All the new independent undo tablespaces that were created in
1412 in srv_undo_tablespaces_create() need to be deleted. */
1413 for (const auto undo_space : undo::spaces->m_spaces) {
1414 fil_space_close(undo_space->id());
1415
1416 os_file_delete(innodb_data_file_key, undo_space->file_name());
1417 }
1418 }
1419
1420 /** Create an undo tablespace with an explicit file name
1421 This is called during CREATE UNDO TABLESPACE.
1422 @param[in] space_name tablespace name
1423 @param[in] file_name file name
1424 @param[in] space_id Tablespace ID
1425 @return DB_SUCCESS or error code */
srv_undo_tablespace_create(const char * space_name,const char * file_name,space_id_t space_id)1426 dberr_t srv_undo_tablespace_create(const char *space_name,
1427 const char *file_name, space_id_t space_id) {
1428 if (srv_undo_log_encrypt && Encryption::check_keyring() == false) {
1429 my_error(ER_CANNOT_FIND_KEY_IN_KEYRING, MYF(0));
1430 return (DB_ERROR);
1431 }
1432
1433 /* We need to x_lock the undo::spaces list until after this
1434 is created and added to it. */
1435 undo::spaces->x_lock();
1436
1437 ut_ad(undo::spaces->find(undo::id2num(space_id)) == nullptr);
1438
1439 undo::Tablespace undo_space(space_id);
1440 undo_space.set_space_name(space_name);
1441 undo_space.set_file_name(file_name);
1442
1443 dberr_t err = srv_undo_tablespace_create(undo_space);
1444 if (err != DB_SUCCESS) {
1445 undo::spaces->x_unlock();
1446 goto cleanup_and_exit;
1447 }
1448
1449 /* Open this new undo tablespace. */
1450 err = srv_undo_tablespace_open(undo_space);
1451 if (err != DB_SUCCESS) {
1452 ib::error(ER_IB_MSG_ERROR_OPENING_NEW_UNDO_SPACE, int{err}, space_name);
1453 undo::spaces->x_unlock();
1454 goto cleanup_and_exit;
1455 }
1456
1457 /* Unlock the undo::spaces list now that we are no longer changing it.
1458 This new undo space will not be used by new transactions until it
1459 becomes active. */
1460 undo::spaces->x_unlock();
1461
1462 /* Write header and RSEG_ARRAY pages to this undo tablespace. */
1463 err = srv_undo_tablespaces_construct(false);
1464 if (err != DB_SUCCESS) {
1465 goto cleanup_and_exit;
1466 }
1467
1468 /* Create the rollback segments in this tablespace and add an Rseg object
1469 for each one to the Rsegs list. */
1470 if (!trx_rseg_init_rollback_segments(space_id, srv_rollback_segments)) {
1471 err = DB_ERROR;
1472 goto cleanup_and_exit;
1473 }
1474
1475 cleanup_and_exit:
1476 /* If UNDO tablespace couldn't initialize completely, remove it from
1477 undo tablespace list */
1478 if (err != DB_SUCCESS) {
1479 undo::spaces->x_lock();
1480 undo::spaces->drop(undo_space);
1481 undo::spaces->x_unlock();
1482
1483 /* Remove undo tablespace file (if created) */
1484 os_file_delete_if_exists(innodb_data_file_key, undo_space.file_name(),
1485 nullptr);
1486 }
1487
1488 srv_undo_tablespaces_mark_construction_done();
1489 return (err);
1490 }
1491
1492 /** Initialize undo::spaces and trx_sys_undo_spaces,
1493 called once during srv_start(). */
undo_spaces_init()1494 void undo_spaces_init() {
1495 ut_ad(undo::spaces == nullptr);
1496
1497 undo::spaces = UT_NEW(undo::Tablespaces(), mem_key_undo_spaces);
1498
1499 trx_sys_undo_spaces_init();
1500
1501 undo::init_space_id_bank();
1502 }
1503
1504 /** Free the resources occupied by undo::spaces and trx_sys_undo_spaces,
1505 called once during thread de-initialization. */
undo_spaces_deinit()1506 void undo_spaces_deinit() {
1507 if (srv_downgrade_logs) {
1508 srv_undo_tablespaces_downgrade();
1509 }
1510
1511 if (undo::spaces != nullptr) {
1512 /* There can't be any active transactions. */
1513 undo::spaces->clear();
1514
1515 UT_DELETE(undo::spaces);
1516 undo::spaces = nullptr;
1517 }
1518
1519 trx_sys_undo_spaces_deinit();
1520
1521 if (undo::space_id_bank != nullptr) {
1522 UT_DELETE_ARRAY(undo::space_id_bank);
1523 undo::space_id_bank = nullptr;
1524 }
1525 }
1526
1527 /** Open the configured number of implicit undo tablespaces.
1528 @param[in] create_new_db true if new db being created
1529 @param[in] true disables reading the system tablespace (used in XtraBackup),
1530 false is passed on recovery.
1531 @return DB_SUCCESS or error code */
srv_undo_tablespaces_init(bool create_new_db,bool backup_mode)1532 dberr_t srv_undo_tablespaces_init(bool create_new_db, bool backup_mode) {
1533 dberr_t err = DB_SUCCESS;
1534
1535 /* Open any existing implicit undo tablespaces. */
1536 if (!create_new_db) {
1537 err = srv_undo_tablespaces_open(backup_mode);
1538 if (err != DB_SUCCESS) {
1539 return (err);
1540 }
1541 }
1542
1543 /* If this is opening an existing database, create and open any
1544 undo tablespaces that are still needed. For a new DB, create
1545 them all. */
1546 mutex_enter(&undo::ddl_mutex);
1547 err = srv_undo_tablespaces_create();
1548 if (err != DB_SUCCESS) {
1549 mutex_exit(&undo::ddl_mutex);
1550 return (err);
1551 }
1552
1553 /* Finish building any undo tablespaces just created by adding
1554 header pages, rseg_array pages, and rollback segments. Then delete
1555 any undo truncation log files and clear the construction list.
1556 This list includes any tablespace newly created or fixed-up. */
1557 err = srv_undo_tablespaces_construct(create_new_db);
1558 if (err != DB_SUCCESS) {
1559 mutex_exit(&undo::ddl_mutex);
1560 return (err);
1561 }
1562
1563 mutex_exit(&undo::ddl_mutex);
1564 return (DB_SUCCESS);
1565 }
1566
1567 /********************************************************************
1568 Wait for the purge thread(s) to start up. */
srv_start_wait_for_purge_to_start()1569 static void srv_start_wait_for_purge_to_start() {
1570 /* Wait for the purge coordinator and master thread to startup. */
1571
1572 purge_state_t state = trx_purge_state();
1573
1574 ut_a(state != PURGE_STATE_DISABLED);
1575
1576 while (srv_shutdown_state.load() < SRV_SHUTDOWN_PURGE &&
1577 srv_force_recovery < SRV_FORCE_NO_BACKGROUND &&
1578 state == PURGE_STATE_INIT) {
1579 switch (state = trx_purge_state()) {
1580 case PURGE_STATE_RUN:
1581 case PURGE_STATE_STOP:
1582 break;
1583
1584 case PURGE_STATE_INIT:
1585 ib::info(ER_IB_MSG_1097);
1586
1587 os_thread_sleep(50000);
1588 break;
1589
1590 case PURGE_STATE_EXIT:
1591 case PURGE_STATE_DISABLED:
1592 ut_error;
1593 }
1594 }
1595 }
1596
1597 /** Create the temporary file tablespace.
1598 @param[in] create_new_db whether we are creating a new database
1599 @param[in,out] tmp_space Shared Temporary SysTablespace
1600 @return DB_SUCCESS or error code. */
srv_open_tmp_tablespace(bool create_new_db,SysTablespace * tmp_space)1601 static dberr_t srv_open_tmp_tablespace(bool create_new_db,
1602 SysTablespace *tmp_space) {
1603 page_no_t sum_of_new_sizes;
1604
1605 /* Will try to remove if there is existing file left-over by last
1606 unclean shutdown */
1607 tmp_space->set_sanity_check_status(true);
1608 tmp_space->delete_files();
1609 tmp_space->set_ignore_read_only(true);
1610
1611 ib::info(ER_IB_MSG_1098);
1612
1613 bool create_new_temp_space = true;
1614
1615 tmp_space->set_space_id(dict_sys_t::s_temp_space_id);
1616
1617 RECOVERY_CRASH(100);
1618
1619 dberr_t err =
1620 tmp_space->check_file_spec(create_new_temp_space, 12 * 1024 * 1024);
1621
1622 if (err == DB_FAIL) {
1623 ib::error(ER_IB_MSG_1099, tmp_space->name());
1624
1625 err = DB_ERROR;
1626
1627 } else if (err != DB_SUCCESS) {
1628 ib::error(ER_IB_MSG_1100, tmp_space->name());
1629
1630 } else if ((err = tmp_space->open_or_create(true, create_new_db,
1631 &sum_of_new_sizes, nullptr)) !=
1632 DB_SUCCESS) {
1633 ib::error(ER_IB_MSG_1101, tmp_space->name());
1634
1635 } else {
1636 mtr_t mtr;
1637 page_no_t size = tmp_space->get_sum_of_sizes();
1638
1639 /* Open this shared temp tablespace in the fil_system so that
1640 it stays open until shutdown. */
1641 if (fil_space_open(tmp_space->space_id())) {
1642 /* Initialize the header page */
1643 mtr_start(&mtr);
1644 mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
1645
1646 fsp_header_init(tmp_space->space_id(), size, &mtr, false);
1647
1648 mtr_commit(&mtr);
1649 } else {
1650 /* This file was just opened in the code above! */
1651 ib::error(ER_IB_MSG_1102, tmp_space->name());
1652
1653 err = DB_ERROR;
1654 }
1655 }
1656
1657 return (err);
1658 }
1659
1660 /** Create SDI Indexes in system tablespace. */
srv_create_sdi_indexes()1661 static void srv_create_sdi_indexes() {
1662 btr_sdi_create_index(SYSTEM_TABLE_SPACE, false);
1663 }
1664
1665 /** Set state to indicate start of particular group of threads in InnoDB. */
1666 UNIV_INLINE
srv_start_state_set(srv_start_state_t state)1667 void srv_start_state_set(srv_start_state_t state) /*!< in: indicate current
1668 state of thread startup */
1669 {
1670 srv_start_state |= state;
1671 }
1672
1673 /** Check if following group of threads is started.
1674 @return true if started */
1675 UNIV_INLINE
srv_start_state_is_set(srv_start_state_t state)1676 bool srv_start_state_is_set(
1677 srv_start_state_t state) /*!< in: state to check for */
1678 {
1679 return (srv_start_state & state);
1680 }
1681
srv_shutdown_exit_threads()1682 void srv_shutdown_exit_threads() {
1683 srv_shutdown_state.store(SRV_SHUTDOWN_EXIT_THREADS);
1684
1685 if (srv_start_state == SRV_START_STATE_NONE) {
1686 return;
1687 }
1688
1689 uint32_t i;
1690
1691 /* All threads end up waiting for certain events. Put those events
1692 to the signaled state. Then the threads will exit themselves after
1693 os_event_wait(). */
1694 for (i = 0; i < SHUTDOWN_SLEEP_ROUNDS; i++) {
1695 /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
1696 HERE OR EARLIER */
1697
1698 if (!srv_read_only_mode) {
1699 if (srv_start_state_is_set(SRV_START_STATE_LOCK_SYS)) {
1700 /* a. Let the lock timeout thread exit */
1701 os_event_set(lock_sys->timeout_event);
1702 }
1703
1704 /* b. srv error monitor thread exits automatically,
1705 no need to do anything here */
1706
1707 if (srv_start_state_is_set(SRV_START_STATE_MASTER)) {
1708 /* c. We wake the master thread so that
1709 it exits */
1710 srv_wake_master_thread();
1711 }
1712
1713 if (srv_start_state_is_set(SRV_START_STATE_PURGE)) {
1714 /* d. Wakeup purge threads. */
1715 srv_purge_wakeup();
1716 }
1717 }
1718
1719 if (srv_start_state_is_set(SRV_START_STATE_IO)) {
1720 /* e. Exit the i/o threads */
1721 if (!srv_read_only_mode) {
1722 if (recv_sys->flush_start != nullptr) {
1723 os_event_set(recv_sys->flush_start);
1724 }
1725 if (recv_sys->flush_end != nullptr) {
1726 os_event_set(recv_sys->flush_end);
1727 }
1728 }
1729
1730 os_event_set(buf_flush_event);
1731
1732 if (!buf_flush_page_cleaner_is_active() && os_aio_all_slots_free()) {
1733 os_aio_wake_all_threads_at_shutdown();
1734 }
1735 }
1736
1737 if (srv_thread_is_active(srv_threads.m_dict_stats)) {
1738 os_event_set(dict_stats_event);
1739 }
1740
1741 /* Try to stop archiver threads. */
1742 arch_wake_threads();
1743
1744 if (log_sys != nullptr) {
1745 /* Preserve the log threads for the 75% of the total
1746 time we are waiting here until all threads are stopped.
1747 This is because log threads are normally shut down at
1748 the very end and we might need their help to stop other
1749 threads. */
1750 if (!buf_flush_page_cleaner_is_active() ||
1751 i >= SHUTDOWN_SLEEP_ROUNDS * 0.75) {
1752 log_stop_background_threads_nowait(*log_sys);
1753
1754 } else {
1755 /* Ensure log threads are working. The redo log is
1756 like a blood, we need it for a lot of other systems
1757 to work. Ensure the blood flows. */
1758 log_wake_threads(*log_sys);
1759 }
1760 }
1761
1762 bool active = os_thread_any_active();
1763
1764 os_thread_sleep(SHUTDOWN_SLEEP_TIME_US);
1765
1766 if (!active) {
1767 break;
1768 }
1769 }
1770
1771 if (i == SHUTDOWN_SLEEP_ROUNDS) {
1772 ib::warn(ER_IB_MSG_1103, os_thread_count.load());
1773
1774 #ifdef UNIV_DEBUG
1775 os_aio_print_pending_io(stderr);
1776 ut_ad(0);
1777 #endif /* UNIV_DEBUG */
1778 } else {
1779 /* Reset the start state. */
1780 srv_start_state = SRV_START_STATE_NONE;
1781 }
1782 }
1783
1784 #ifdef UNIV_DEBUG
1785 #define srv_init_abort(_db_err) \
1786 srv_init_abort_low(create_new_db, __FILE__, __LINE__, _db_err)
1787 #else
1788 #define srv_init_abort(_db_err) srv_init_abort_low(create_new_db, _db_err)
1789 #endif /* UNIV_DEBUG */
1790
1791 /** Innobase start-up aborted. Perform cleanup actions.
1792 @param[in] create_new_db TRUE if new db is being created */
1793 #ifdef UNIV_DEBUG
1794 /**
1795 @param[in] file File name
1796 @param[in] line Line number */
1797 #endif /* UNIV_DEBUG */
1798 /**
1799 @param[in] err Reason for aborting InnoDB startup
1800 @return DB_SUCCESS or error code. */
srv_init_abort_low(bool create_new_db,const char * file,ulint line,dberr_t err)1801 static dberr_t srv_init_abort_low(bool create_new_db,
1802 #ifdef UNIV_DEBUG
1803 const char *file, ulint line,
1804 #endif /* UNIV_DEBUG */
1805 dberr_t err) {
1806 std::ostringstream msg;
1807
1808 #ifdef UNIV_DEBUG
1809 msg << "at " << innobase_basename(file) << "[" << line << "] ";
1810 #endif /* UNIV_DEBUG */
1811
1812 if (create_new_db) {
1813 ib::error(ER_IB_MSG_1104, msg.str().c_str(), ut_strerr(err));
1814 } else {
1815 ib::error(ER_IB_MSG_1105, msg.str().c_str(), ut_strerr(err));
1816 }
1817
1818 clone_files_error();
1819 srv_shutdown_exit_threads();
1820
1821 return (err);
1822 }
1823
1824 /** Prepare to delete the redo log files. Flush the dirty pages from all the
1825 buffer pools. Flush the redo log buffer to the redo log file.
1826 @param[in] n_files number of old redo log files
1827 @return lsn upto which data pages have been flushed. */
srv_prepare_to_delete_redo_log_files(ulint n_files)1828 static lsn_t srv_prepare_to_delete_redo_log_files(ulint n_files) {
1829 lsn_t flushed_lsn;
1830 ulint pending_io = 0;
1831 ulint count = 0;
1832
1833 do {
1834 /* Clean the buffer pool. */
1835 buf_flush_sync_all_buf_pools();
1836
1837 RECOVERY_CRASH(1);
1838
1839 flushed_lsn = log_get_lsn(*log_sys);
1840
1841 if (count == 0) {
1842 std::ostringstream info;
1843
1844 if (srv_log_file_size == 0) {
1845 info << "Upgrading redo log: ";
1846 } else {
1847 info << "Resizing redo log from " << n_files << "*" << srv_log_file_size
1848 << " to ";
1849 }
1850
1851 info << srv_n_log_files << "*" << srv_log_file_size_requested
1852 << " bytes, LSN=" << flushed_lsn;
1853
1854 ib::info(ER_IB_MSG_1216) << info.str();
1855 }
1856
1857 /* Flush the old log files. */
1858 log_write_up_to(*log_sys, flushed_lsn, true);
1859
1860 /* If innodb_flush_method=O_DSYNC, we need to explicitly
1861 flush the log buffers. */
1862 fil_flush_file_redo();
1863
1864 ut_ad(flushed_lsn == log_get_lsn(*log_sys));
1865
1866 /* Check if the buffer pools are clean. If not
1867 retry till it is clean. */
1868 pending_io = buf_pool_check_no_pending_io();
1869
1870 if (pending_io > 0) {
1871 count++;
1872 /* Print a message every 60 seconds if we
1873 are waiting to clean the buffer pools */
1874 if (count >= SHUTDOWN_SLEEP_ROUNDS) {
1875 ib::info(ER_IB_MSG_1106, ulonglong{pending_io});
1876 count = 0;
1877 }
1878 }
1879 os_thread_sleep(SHUTDOWN_SLEEP_TIME_US);
1880
1881 } while (buf_pool_check_no_pending_io());
1882
1883 return (flushed_lsn);
1884 }
1885
1886 /** At startup load the encryption information from first datafile
1887 to tablespace object
1888 @return DB_SUCCESS on succes, others on failure */
srv_sys_enable_encryption()1889 static dberr_t srv_sys_enable_encryption() {
1890 fil_space_t *space = fil_space_get(TRX_SYS_SPACE);
1891 const ulint fsp_flags = srv_sys_space.m_files.begin()->flags();
1892 const bool is_encrypted = FSP_FLAGS_GET_ENCRYPTION(fsp_flags);
1893 dberr_t err = DB_SUCCESS;
1894
1895 if (is_encrypted && !use_dumped_tablespace_keys) {
1896 fsp_flags_set_encryption(space->flags);
1897 srv_sys_space.set_flags(space->flags);
1898
1899 err = fil_set_encryption(space->id, Encryption::AES,
1900 srv_sys_space.m_files.begin()->m_encryption_key,
1901 srv_sys_space.m_files.begin()->m_encryption_iv);
1902 ut_ad(err == DB_SUCCESS);
1903 }
1904
1905 return (err);
1906 }
1907
1908 /** Start InnoDB.
1909 @param[in] create_new_db Whether to create a new database
1910 @param[in] to_lsn LSN to stop recovery at
1911 @return DB_SUCCESS or error code */
srv_start(bool create_new_db,lsn_t to_lsn)1912 dberr_t srv_start(bool create_new_db, lsn_t to_lsn) {
1913 lsn_t flushed_lsn;
1914
1915 /* just for assertions */
1916 lsn_t previous_lsn;
1917
1918 /* output from call to create_log_files(...) */
1919 lsn_t new_checkpoint_lsn = 0;
1920
1921 dberr_t err;
1922 uint32_t srv_n_log_files_found = srv_n_log_files;
1923 mtr_t mtr;
1924 purge_pq_t *purge_queue;
1925 char logfilename[10000];
1926 char *logfile0 = nullptr;
1927 size_t dirnamelen;
1928 unsigned i = 0;
1929
1930 DBUG_ASSERT(srv_dict_metadata == nullptr);
1931 /* Reset the start state. */
1932 srv_start_state = SRV_START_STATE_NONE;
1933
1934 #ifdef UNIV_LINUX
1935 #ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
1936 ib::info(ER_IB_MSG_1107);
1937 #else
1938 ib::info(ER_IB_MSG_1108);
1939 #endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
1940 #endif /* UNIV_LINUX */
1941
1942 if (sizeof(ulint) != sizeof(void *)) {
1943 ib::error(ER_IB_MSG_1109, sizeof(ulint), sizeof(void *));
1944 }
1945
1946 if (srv_is_upgrade_mode) {
1947 if (srv_read_only_mode) {
1948 ib::error(ER_IB_MSG_1110);
1949 return (srv_init_abort(DB_ERROR));
1950 }
1951 if (srv_force_recovery != 0) {
1952 ib::error(ER_IB_MSG_1111);
1953 return (srv_init_abort(DB_ERROR));
1954 }
1955 }
1956
1957 #ifdef UNIV_DEBUG
1958 ib::info(ER_IB_MSG_1112) << "!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!";
1959 #endif
1960
1961 #ifdef UNIV_IBUF_DEBUG
1962 ib::info(ER_IB_MSG_1113) << "!!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!";
1963 #ifdef UNIV_IBUF_COUNT_DEBUG
1964 ib::info(ER_IB_MSG_1114)
1965 << "!!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on !!!!!!!!!";
1966 ib::error(ER_IB_MSG_1115)
1967 << "Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG";
1968 #endif
1969 #endif
1970
1971 #ifdef UNIV_LOG_LSN_DEBUG
1972 ib::info(ER_IB_MSG_1116)
1973 << "!!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!";
1974 #endif /* UNIV_LOG_LSN_DEBUG */
1975
1976 #if defined(COMPILER_HINTS_ENABLED)
1977 ib::info(ER_IB_MSG_1117) << "Compiler hints enabled.";
1978 #endif /* defined(COMPILER_HINTS_ENABLED) */
1979
1980 ib::info(ER_IB_MSG_1118) << IB_ATOMICS_STARTUP_MSG;
1981 ib::info(ER_IB_MSG_1119) << MUTEX_TYPE;
1982 ib::info(ER_IB_MSG_1120) << IB_MEMORY_BARRIER_STARTUP_MSG;
1983
1984 if (srv_force_recovery > 0) {
1985 ib::info(ER_IB_MSG_1121) << "!!! innodb_force_recovery is set to "
1986 << srv_force_recovery << " !!!";
1987 }
1988
1989 #ifndef HAVE_MEMORY_BARRIER
1990 #if defined __i386__ || defined __x86_64__ || defined _M_IX86 || \
1991 defined _M_X64 || defined _WIN32
1992 #else
1993 ib::warn(ER_IB_MSG_1122);
1994 #endif /* IA32 or AMD64 */
1995 #endif /* HAVE_MEMORY_BARRIER */
1996
1997 #ifdef UNIV_ZIP_DEBUG
1998 ib::info(ER_IB_MSG_1123, ZLIB_VERSION) << " with validation";
1999 #else
2000 ib::info(ER_IB_MSG_1123, ZLIB_VERSION);
2001 #endif /* UNIV_ZIP_DEBUG */
2002
2003 #ifdef UNIV_ZIP_COPY
2004 ib::info(ER_IB_MSG_1124) << "and extra copying";
2005 #endif /* UNIV_ZIP_COPY */
2006
2007 /* Since InnoDB does not currently clean up all its internal data
2008 structures in MySQL Embedded Server Library server_end(), we
2009 print an error message if someone tries to start up InnoDB a
2010 second time during the process lifetime. */
2011
2012 if (srv_start_has_been_called) {
2013 ib::error(ER_IB_MSG_1125);
2014 }
2015
2016 srv_start_has_been_called = true;
2017
2018 srv_is_being_started = true;
2019
2020 /* Register performance schema stages before any real work has been
2021 started which may need to be instrumented. */
2022 mysql_stage_register("innodb", srv_stages, UT_ARR_SIZE(srv_stages));
2023
2024 /* Switch latching order checks on in sync0debug.cc, if
2025 --innodb-sync-debug=false (default) */
2026 ut_d(sync_check_enable());
2027
2028 srv_boot();
2029
2030 ib::info(ER_IB_MSG_1126) << (ut_crc32_cpu_enabled ? "Using" : "Not using")
2031 << " CPU crc32 instructions";
2032
2033 os_create_block_cache();
2034
2035 fil_init(srv_max_n_open_files);
2036
2037 /* This is the default directory for IBD and IBU files. Put it first
2038 in the list of known directories. */
2039 fil_set_scan_dir(MySQL_datadir_path.path());
2040
2041 /* Add --innodb-data-home-dir as a known location for IBD and IBU files
2042 if it is not already there. */
2043 ut_ad(srv_data_home != nullptr && *srv_data_home != '\0');
2044 fil_set_scan_dir(Fil_path::remove_quotes(srv_data_home));
2045
2046 /* Add --innodb-directories as known locations for IBD and IBU files. */
2047 if (srv_innodb_directories != nullptr && *srv_innodb_directories != 0) {
2048 fil_set_scan_dirs(Fil_path::remove_quotes(srv_innodb_directories));
2049 }
2050
2051 /* Note whether the undo path is different (not the same or under)
2052 from all other known directories. If so, this will allow us to keep
2053 IBD files out of this unique undo location.*/
2054 MySQL_undo_path_is_unique = !fil_path_is_known(MySQL_undo_path.path());
2055
2056 /* For the purpose of file discovery at startup, we need to scan
2057 --innodb-undo-directory also if it is different from the locations above. */
2058 if (MySQL_undo_path_is_unique) {
2059 fil_set_scan_dir(Fil_path::remove_quotes(MySQL_undo_path));
2060 }
2061
2062 ib::info(ER_IB_MSG_378) << "Directories to scan '" << fil_get_dirs() << "'";
2063
2064 /* Must replace clone files before scanning directories. When
2065 clone replaces current database, cloned files are moved to data files
2066 at this stage. */
2067 err = clone_init();
2068
2069 if (err != DB_SUCCESS) {
2070 return (srv_init_abort(err));
2071 }
2072
2073 err = fil_scan_for_tablespaces(false);
2074
2075 if (err != DB_SUCCESS) {
2076 return (srv_init_abort(err));
2077 }
2078
2079 if (!srv_read_only_mode) {
2080 mutex_create(LATCH_ID_SRV_MONITOR_FILE, &srv_monitor_file_mutex);
2081
2082 if (srv_innodb_status) {
2083 srv_monitor_file_name = static_cast<char *>(ut_malloc_nokey(
2084 MySQL_datadir_path.len() + 20 + sizeof "/innodb_status."));
2085
2086 sprintf(srv_monitor_file_name, "%s/innodb_status." ULINTPF,
2087 static_cast<const char *>(MySQL_datadir_path),
2088 os_proc_get_number());
2089
2090 srv_monitor_file = fopen(srv_monitor_file_name, "w+");
2091
2092 if (!srv_monitor_file) {
2093 ib::error(ER_IB_MSG_1127, srv_monitor_file_name, strerror(errno));
2094
2095 return (srv_init_abort(DB_ERROR));
2096 }
2097 } else {
2098 srv_monitor_file_name = nullptr;
2099 srv_monitor_file = os_file_create_tmpfile(nullptr);
2100
2101 if (!srv_monitor_file) {
2102 return (srv_init_abort(DB_ERROR));
2103 }
2104 }
2105
2106 mutex_create(LATCH_ID_SRV_MISC_TMPFILE, &srv_misc_tmpfile_mutex);
2107
2108 srv_misc_tmpfile = os_file_create_tmpfile(nullptr);
2109
2110 if (!srv_misc_tmpfile) {
2111 return (srv_init_abort(DB_ERROR));
2112 }
2113 }
2114
2115 srv_n_file_io_threads = srv_n_read_io_threads;
2116
2117 srv_n_file_io_threads += srv_n_write_io_threads;
2118
2119 if (!srv_read_only_mode) {
2120 /* Add the log and ibuf IO threads. */
2121 srv_n_file_io_threads += 2;
2122 } else {
2123 ib::info(ER_IB_MSG_1128);
2124 }
2125
2126 ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
2127
2128 if (!os_aio_init(srv_n_read_io_threads, srv_n_write_io_threads,
2129 SRV_MAX_N_PENDING_SYNC_IOS)) {
2130 ib::error(ER_IB_MSG_1129);
2131
2132 return (srv_init_abort(DB_ERROR));
2133 }
2134
2135 double size;
2136 char unit;
2137
2138 if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
2139 size = ((double)srv_buf_pool_size) / (1024 * 1024 * 1024);
2140 unit = 'G';
2141 } else {
2142 size = ((double)srv_buf_pool_size) / (1024 * 1024);
2143 unit = 'M';
2144 }
2145
2146 double chunk_size;
2147 char chunk_unit;
2148
2149 if (srv_buf_pool_chunk_unit >= 1024 * 1024 * 1024) {
2150 chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024 / 1024;
2151 chunk_unit = 'G';
2152 } else {
2153 chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024;
2154 chunk_unit = 'M';
2155 }
2156
2157 ib::info(ER_IB_MSG_1130, size, unit, srv_buf_pool_instances, chunk_size,
2158 chunk_unit);
2159
2160 err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
2161
2162 if (err != DB_SUCCESS) {
2163 ib::error(ER_IB_MSG_1131);
2164
2165 return (srv_init_abort(DB_ERROR));
2166 }
2167
2168 ib::info(ER_IB_MSG_1132);
2169
2170 #ifdef UNIV_DEBUG
2171 /* We have observed deadlocks with a 5MB buffer pool but
2172 the actual lower limit could very well be a little higher. */
2173
2174 if (srv_buf_pool_size <= 5 * 1024 * 1024) {
2175 ib::info(ER_IB_MSG_1133, ulonglong{srv_buf_pool_size / 1024 / 1024});
2176 }
2177 #endif /* UNIV_DEBUG */
2178
2179 fsp_init();
2180 pars_init();
2181 recv_sys_create();
2182 recv_sys_init(buf_pool_get_curr_size());
2183 trx_sys_create();
2184 lock_sys_create(srv_lock_table_size);
2185 srv_start_state_set(SRV_START_STATE_LOCK_SYS);
2186
2187 /* Create i/o-handler threads: */
2188
2189 /* For read only mode, we don't need ibuf and log I/O thread.
2190 Please see innobase_start_or_create_for_mysql() */
2191 ulint start = (srv_read_only_mode) ? 0 : 2;
2192
2193 for (ulint t = 0; t < srv_n_file_io_threads; ++t) {
2194 IB_thread thread;
2195 if (t < start) {
2196 if (t == 0) {
2197 thread = os_thread_create(io_ibuf_thread_key, io_handler_thread, t);
2198 } else {
2199 ut_ad(t == 1);
2200 thread = os_thread_create(io_log_thread_key, io_handler_thread, t);
2201 }
2202 } else if (t >= start && t < (start + srv_n_read_io_threads)) {
2203 thread = os_thread_create(io_read_thread_key, io_handler_thread, t);
2204
2205 } else if (t >= (start + srv_n_read_io_threads) &&
2206 t < (start + srv_n_read_io_threads + srv_n_write_io_threads)) {
2207 thread = os_thread_create(io_write_thread_key, io_handler_thread, t);
2208 } else {
2209 thread = os_thread_create(io_handler_thread_key, io_handler_thread, t);
2210 }
2211 thread.start();
2212 }
2213
2214 /* Even in read-only mode there could be flush job generated by
2215 intrinsic table operations. */
2216 buf_flush_page_cleaner_init(srv_n_page_cleaners);
2217
2218 srv_start_state_set(SRV_START_STATE_IO);
2219
2220 srv_startup_is_before_trx_rollback_phase = !create_new_db;
2221
2222 if (create_new_db) {
2223 recv_sys_free();
2224 }
2225
2226 /* Open or create the data files. */
2227 page_no_t sum_of_new_sizes;
2228
2229 err = srv_sys_space.open_or_create(false, create_new_db, &sum_of_new_sizes,
2230 &flushed_lsn);
2231
2232 /* FIXME: This can be done earlier, but we now have to wait for
2233 checking of system tablespace. */
2234 dict_persist_init();
2235
2236 switch (err) {
2237 case DB_SUCCESS:
2238 err = srv_sys_enable_encryption();
2239 if (err != DB_SUCCESS) return (srv_init_abort(err));
2240 break;
2241 case DB_CANNOT_OPEN_FILE:
2242 ib::error(ER_IB_MSG_1134);
2243 /* fall through */
2244 default:
2245
2246 /* Other errors might come from
2247 Datafile::validate_first_page() */
2248
2249 return (srv_init_abort(err));
2250 }
2251
2252 dirnamelen = strlen(srv_log_group_home_dir);
2253 ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
2254 memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
2255
2256 /* Add a path separator if needed. */
2257 if (dirnamelen && logfilename[dirnamelen - 1] != OS_PATH_SEPARATOR) {
2258 logfilename[dirnamelen++] = OS_PATH_SEPARATOR;
2259 }
2260
2261 srv_log_file_size_requested = srv_log_file_size;
2262
2263 if (create_new_db) {
2264 ut_a(buf_are_flush_lists_empty_validate());
2265
2266 flushed_lsn = LOG_START_LSN;
2267
2268 err = create_log_files(logfilename, dirnamelen, flushed_lsn, 0, logfile0,
2269 new_checkpoint_lsn);
2270
2271 if (err != DB_SUCCESS) {
2272 return (srv_init_abort(err));
2273 }
2274
2275 flushed_lsn = new_checkpoint_lsn;
2276
2277 ut_a(new_checkpoint_lsn == LOG_START_LSN + LOG_BLOCK_HDR_SIZE);
2278
2279 } else {
2280 for (i = 0; i < SRV_N_LOG_FILES_CLONE_MAX; i++) {
2281 os_offset_t size;
2282 os_file_stat_t stat_info;
2283
2284 sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
2285
2286 err = os_file_get_status(logfilename, &stat_info, false,
2287 srv_read_only_mode);
2288
2289 if (err == DB_NOT_FOUND) {
2290 if (i == 0) {
2291 if (flushed_lsn < static_cast<lsn_t>(1000)) {
2292 ib::error(ER_IB_MSG_1135);
2293 return (srv_init_abort(DB_ERROR));
2294 }
2295
2296 err = create_log_files(logfilename, dirnamelen, flushed_lsn,
2297 SRV_N_LOG_FILES_CLONE_MAX, logfile0,
2298 new_checkpoint_lsn);
2299
2300 if (err != DB_SUCCESS) {
2301 return (srv_init_abort(err));
2302 }
2303
2304 create_log_files_rename(logfilename, dirnamelen, new_checkpoint_lsn,
2305 logfile0);
2306
2307 /* Suppress the message about
2308 crash recovery. */
2309 flushed_lsn = new_checkpoint_lsn;
2310 ut_a(log_sys != nullptr);
2311 goto files_checked;
2312 #if !defined(XTRABACKUP)
2313 } else if (i < 2) {
2314 /* must have at least 2 log files */
2315 ib::error(ER_IB_MSG_1136);
2316 return (srv_init_abort(err));
2317 #endif
2318 }
2319
2320 /* opened all files */
2321 break;
2322 }
2323
2324 if (!srv_file_check_mode(logfilename)) {
2325 return (srv_init_abort(DB_ERROR));
2326 }
2327
2328 err = open_log_file(&files[i], logfilename, &size);
2329
2330 if (err != DB_SUCCESS) {
2331 return (srv_init_abort(err));
2332 }
2333
2334 ut_a(size != (os_offset_t)-1);
2335
2336 if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
2337 ib::error(ER_IB_MSG_1137, logfilename, ulonglong{size});
2338 return (srv_init_abort(DB_ERROR));
2339 }
2340
2341 if (i == 0) {
2342 srv_log_file_size = size;
2343 #ifndef UNIV_DEBUG_DEDICATED
2344 } else if (size != srv_log_file_size) {
2345 #else
2346 } else if (!srv_dedicated_server && size != srv_log_file_size) {
2347 #endif /* UNIV_DEBUG_DEDICATED */
2348 ib::error(ER_IB_MSG_1138, logfilename, ulonglong{size},
2349 srv_log_file_size);
2350
2351 return (srv_init_abort(DB_ERROR));
2352 }
2353 }
2354
2355 srv_n_log_files_found = i;
2356
2357 /* Create the in-memory file space objects. */
2358
2359 sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
2360
2361 /* Disable the doublewrite buffer for log files. */
2362 fil_space_t *log_space = fil_space_create(
2363 "innodb_redo_log", dict_sys_t::s_log_space_first_id,
2364 fsp_flags_set_page_size(0, univ_page_size), FIL_TYPE_LOG);
2365
2366 ut_ad(fil_validate());
2367 ut_a(log_space != nullptr);
2368
2369 /* srv_log_file_size is measured in bytes */
2370 ut_a(srv_log_file_size / UNIV_PAGE_SIZE <= PAGE_NO_MAX);
2371
2372 for (unsigned j = 0; j < i; j++) {
2373 sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
2374
2375 const ulonglong file_pages = srv_log_file_size / UNIV_PAGE_SIZE;
2376
2377 if (fil_node_create(logfilename, static_cast<page_no_t>(file_pages),
2378 log_space, false, false) == nullptr) {
2379 return (srv_init_abort(DB_ERROR));
2380 }
2381 }
2382
2383 if (!log_sys_init(i, srv_log_file_size, dict_sys_t::s_log_space_first_id)) {
2384 return (srv_init_abort(DB_ERROR));
2385 }
2386
2387 /* Read the first log file header to get the encryption
2388 information if it exist. */
2389 if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO && !log_read_encryption()) {
2390 return (srv_init_abort(DB_ERROR));
2391 }
2392 }
2393
2394 ut_a(log_sys != nullptr);
2395
2396 /* Open all log files and data files in the system
2397 tablespace: we keep them open until database shutdown.
2398
2399 When we use goto files_checked; we don't need the line below,
2400 because in such case, it's been already called at the end of
2401 create_log_files_rename(). */
2402
2403 fil_open_log_and_system_tablespace_files();
2404
2405 files_checked:
2406
2407 if (dblwr::enabled && ((err = dblwr::open(create_new_db)) != DB_SUCCESS)) {
2408 return (srv_init_abort(err));
2409 }
2410
2411 arch_init();
2412
2413 mtr_t::s_logging.init();
2414
2415 if (create_new_db) {
2416 ut_a(!srv_read_only_mode);
2417
2418 ut_a(log_sys->last_checkpoint_lsn.load() ==
2419 LOG_START_LSN + LOG_BLOCK_HDR_SIZE);
2420
2421 ut_a(flushed_lsn == LOG_START_LSN + LOG_BLOCK_HDR_SIZE);
2422
2423 log_start(*log_sys, 0, flushed_lsn, flushed_lsn);
2424
2425 log_start_background_threads(*log_sys);
2426
2427 err = srv_undo_tablespaces_init(true, false);
2428
2429 if (err != DB_SUCCESS) {
2430 return (srv_init_abort(err));
2431 }
2432
2433 mtr_start(&mtr);
2434
2435 bool ret = fsp_header_init(0, sum_of_new_sizes, &mtr, false);
2436
2437 mtr_commit(&mtr);
2438
2439 if (!ret) {
2440 return (srv_init_abort(DB_ERROR));
2441 }
2442
2443 /* To maintain backward compatibility we create only
2444 the first rollback segment before the double write buffer.
2445 All the remaining rollback segments will be created later,
2446 after the double write buffers haves been created. */
2447 trx_sys_create_sys_pages();
2448
2449 purge_queue = trx_sys_init_at_db_start();
2450
2451 /* The purge system needs to create the purge view and
2452 therefore requires that the trx_sys is inited. */
2453
2454 trx_purge_sys_create(srv_threads.m_purge_workers_n, purge_queue);
2455
2456 err = dict_create();
2457
2458 if (err != DB_SUCCESS) {
2459 return (srv_init_abort(err));
2460 }
2461
2462 srv_create_sdi_indexes();
2463
2464 previous_lsn = log_get_lsn(*log_sys);
2465
2466 buf_flush_sync_all_buf_pools();
2467
2468 log_stop_background_threads(*log_sys);
2469
2470 flushed_lsn = log_get_lsn(*log_sys);
2471
2472 ut_a(flushed_lsn == previous_lsn);
2473
2474 err = fil_write_flushed_lsn(flushed_lsn);
2475 ut_a(err == DB_SUCCESS);
2476
2477 create_log_files_rename(logfilename, dirnamelen, new_checkpoint_lsn,
2478 logfile0);
2479
2480 log_start_background_threads(*log_sys);
2481
2482 ut_a(buf_are_flush_lists_empty_validate());
2483
2484 /* We always create the legacy double write buffer to preserve the
2485 expected page ordering of the system tablespace.
2486 FIXME: Try and remove this requirement. */
2487 err = dblwr::v1::create();
2488
2489 if (err != DB_SUCCESS) {
2490 return srv_init_abort(err);
2491 }
2492
2493 } else {
2494 /* Load the reserved boundaries of the legacy dblwr buffer, this is
2495 requird to check for stray reads and writes trying to access this
2496 reserved region in the sys tablespace.
2497 FIXME: Try and remove this requirement. */
2498 err = dblwr::v1::init();
2499
2500 if (err != DB_SUCCESS) {
2501 return srv_init_abort(err);
2502 }
2503
2504 /* Invalidate the buffer pool to ensure that we reread
2505 the page that we read above, during recovery.
2506 Note that this is not as heavy weight as it seems. At
2507 this point there will be only ONE page in the buf_LRU
2508 and there must be no page in the buf_flush list. */
2509 buf_pool_invalidate();
2510
2511 /* We always try to do a recovery, even if the database had
2512 been shut down normally: this is the normal startup path */
2513
2514 err = recv_recovery_from_checkpoint_start(*log_sys, flushed_lsn, to_lsn);
2515
2516 if (err == DB_SUCCESS) {
2517 arch_page_sys->post_recovery_init();
2518
2519 /* Initialize the change buffer. */
2520 err = dict_boot();
2521 }
2522
2523 if (err != DB_SUCCESS) {
2524 return (srv_init_abort(err));
2525 }
2526
2527 ut_ad(clone_check_recovery_crashpoint(recv_sys->is_cloned_db));
2528
2529 /* We need to start log threads before asking to flush
2530 all dirty pages. That's because some dirty pages could
2531 be dirty because of ibuf merges. The ibuf merges could
2532 have written log records to the log buffer. The redo
2533 log has to be flushed up to the newest_modification of
2534 a dirty page, before the page might be flushed to disk.
2535 Hence we need the log_flusher thread which will flush
2536 log records related to the ibuf merges, allowing to
2537 flush the modified pages. That's why we need to start
2538 the log threads before flushing dirty pages. */
2539
2540 if (!srv_read_only_mode) {
2541 log_start_background_threads(*log_sys);
2542 }
2543
2544 if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2545 /* Apply the hashed log records to the
2546 respective file pages, for the last batch of
2547 recv_group_scan_log_recs(). */
2548
2549 /* Don't allow IBUF operations for crash
2550 recovery as it would add extra redo log and we may
2551 not have enough margin. */
2552 if (!srv_read_only_mode) {
2553 recv_apply_hashed_log_recs(*log_sys, false);
2554
2555 } else {
2556 recv_apply_hashed_log_recs(*log_sys, true);
2557 }
2558
2559 if (recv_sys->found_corrupt_log) {
2560 err = DB_ERROR;
2561 return (srv_init_abort(err));
2562 }
2563
2564 DBUG_PRINT("ib_log", ("apply completed"));
2565
2566 /* Check and print if there were any tablespaces
2567 which had redo log records but we couldn't apply
2568 them because the filenames were missing. */
2569 }
2570
2571 if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2572 /* Recovery complete, start verifying the
2573 page LSN on read. */
2574 recv_lsn_checks_on = true;
2575 }
2576
2577 /* We have gone through the redo log, now check if all the
2578 tablespaces were found and recovered. */
2579
2580 if (srv_force_recovery == 0 && fil_check_missing_tablespaces()) {
2581 // Missing tablespaces in the redo log are a valid possibility
2582 // with partial backups.
2583 // But keep them in the output for visibility
2584 ib::warn(ER_IB_MSG_1139);
2585 }
2586
2587 /* We have successfully recovered from the redo log. The
2588 data dictionary should now be readable. */
2589
2590 if (recv_sys->found_corrupt_log) {
2591 ib::warn(ER_IB_MSG_1140);
2592 }
2593
2594 if (!srv_force_recovery && !srv_read_only_mode) {
2595 buf_flush_sync_all_buf_pools();
2596 }
2597
2598 srv_dict_metadata = recv_recovery_from_checkpoint_finish(*log_sys, false);
2599
2600 if (!srv_force_recovery && !recv_sys->found_corrupt_log &&
2601 (srv_log_file_size_requested != srv_log_file_size ||
2602 srv_n_log_files_found != srv_n_log_files)) {
2603 /* Prepare to replace the redo log files. */
2604
2605 if (srv_read_only_mode) {
2606 ib::error(ER_IB_MSG_1141);
2607 return (srv_init_abort(DB_READ_ONLY));
2608 }
2609
2610 if (srv_dict_metadata != nullptr && !srv_dict_metadata->empty()) {
2611 /* Open this table in case srv_dict_metadata
2612 should be applied to this table before
2613 checkpoint. And because DD is not fully up yet,
2614 the table can be opened by internal APIs. */
2615
2616 fil_space_t *space = fil_space_acquire_silent(dict_sys_t::s_space_id);
2617 if (space == nullptr) {
2618 dberr_t error =
2619 fil_ibd_open(true, FIL_TYPE_TABLESPACE, dict_sys_t::s_space_id,
2620 predefined_flags, dict_sys_t::s_dd_space_name,
2621 dict_sys_t::s_dd_space_name,
2622 dict_sys_t::s_dd_space_file_name, true, false);
2623 if (error != DB_SUCCESS) {
2624 ib::error(ER_IB_MSG_1142);
2625 return (srv_init_abort(DB_ERROR));
2626 }
2627 } else {
2628 fil_space_release(space);
2629 }
2630
2631 dict_persist->table_buffer = UT_NEW_NOKEY(DDTableBuffer());
2632 /* This writes redo logs. Since the log file
2633 size hasn't changed now, there should be enough
2634 room in log files, supposing log_free_check()
2635 works fine before crash */
2636 srv_dict_metadata->store();
2637 }
2638
2639 /* Prepare to delete the old redo log files */
2640 flushed_lsn = srv_prepare_to_delete_redo_log_files(i);
2641
2642 log_stop_background_threads(*log_sys);
2643
2644 /* Prohibit redo log writes from any other
2645 threads until creating a log checkpoint at the
2646 end of create_log_files(). */
2647 ut_d(log_sys->disable_redo_writes = true);
2648
2649 ut_ad(!buf_pool_check_no_pending_io());
2650
2651 RECOVERY_CRASH(3);
2652
2653 /* Stamp the LSN to the data files. */
2654 err = fil_write_flushed_lsn(flushed_lsn);
2655 ut_a(err == DB_SUCCESS);
2656
2657 RECOVERY_CRASH(4);
2658
2659 /* Close and free the redo log files, so that
2660 we can replace them. */
2661 fil_close_log_files(true);
2662
2663 RECOVERY_CRASH(5);
2664
2665 log_sys_close();
2666
2667 /* Finish clone file recovery before creating new log files. We
2668 roll forward to remove any intermediate files here. */
2669 clone_files_recovery(true);
2670
2671 ib::info(ER_IB_MSG_1143);
2672
2673 srv_log_file_size = srv_log_file_size_requested;
2674
2675 err =
2676 create_log_files(logfilename, dirnamelen, flushed_lsn,
2677 srv_n_log_files_found, logfile0, new_checkpoint_lsn);
2678
2679 if (err != DB_SUCCESS) {
2680 return (srv_init_abort(err));
2681 }
2682
2683 create_log_files_rename(logfilename, dirnamelen, new_checkpoint_lsn,
2684 logfile0);
2685
2686 ut_d(log_sys->disable_redo_writes = false);
2687
2688 flushed_lsn = new_checkpoint_lsn;
2689
2690 log_start(*log_sys, 0, flushed_lsn, flushed_lsn);
2691
2692 log_start_background_threads(*log_sys);
2693
2694 } else if (recv_sys->is_cloned_db) {
2695 /* Reset creator for log */
2696
2697 log_stop_background_threads(*log_sys);
2698
2699 log_files_header_read(*log_sys, 0);
2700
2701 lsn_t start_lsn;
2702 start_lsn =
2703 mach_read_from_8(log_sys->checkpoint_buf + LOG_HEADER_START_LSN);
2704
2705 log_files_header_read(*log_sys, LOG_CHECKPOINT_1);
2706
2707 log_files_header_flush(*log_sys, 0, start_lsn);
2708
2709 log_start_background_threads(*log_sys);
2710 }
2711
2712 if (sum_of_new_sizes > 0) {
2713 /* New data file(s) were added */
2714 mtr_start(&mtr);
2715
2716 fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
2717
2718 mtr_commit(&mtr);
2719
2720 /* Immediately write the log record about
2721 increased tablespace size to disk, so that it
2722 is durable even if mysqld would crash
2723 quickly */
2724
2725 log_buffer_flush_to_disk(*log_sys);
2726 }
2727
2728 err = srv_undo_tablespaces_init(false, false);
2729
2730 if (err != DB_SUCCESS && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
2731 return (srv_init_abort(err));
2732 }
2733
2734 purge_queue = trx_sys_init_at_db_start();
2735
2736 if (srv_is_upgrade_mode) {
2737 if (!purge_queue->empty()) {
2738 ib::info(ER_IB_MSG_1144);
2739 srv_upgrade_old_undo_found = true;
2740 }
2741 /* Either the old or new undo tablespaces will
2742 be deleted later depending on the value of
2743 'failed_upgrade' in dd_upgrade_finish(). */
2744 } else {
2745 /* New undo tablespaces have been created.
2746 Delete the old undo tablespaces and the references
2747 to them in the TRX_SYS page. */
2748 srv_undo_tablespaces_upgrade();
2749 }
2750
2751 DBUG_EXECUTE_IF("check_no_undo", ut_ad(purge_queue->empty()););
2752
2753 /* The purge system needs to create the purge view and
2754 therefore requires that the trx_sys and trx lists were
2755 initialized in trx_sys_init_at_db_start(). */
2756 trx_purge_sys_create(srv_threads.m_purge_workers_n, purge_queue);
2757 }
2758
2759 /* Open temp-tablespace and keep it open until shutdown. */
2760 err = srv_open_tmp_tablespace(create_new_db, &srv_tmp_space);
2761 if (err != DB_SUCCESS) {
2762 return (srv_init_abort(err));
2763 }
2764
2765 err = ibt::open_or_create(create_new_db);
2766 if (err != DB_SUCCESS) {
2767 return (srv_init_abort(err));
2768 }
2769
2770 /* Here the double write buffer has already been created and so
2771 any new rollback segments will be allocated after the double
2772 write buffer. The default segment should already exist.
2773 We create the new segments only if it's a new database or
2774 the database was shutdown cleanly. */
2775
2776 /* Note: When creating the extra rollback segments during an upgrade
2777 we violate the latching order, even if the change buffer is empty.
2778 We make an exception in sync0sync.cc and check srv_is_being_started
2779 for that violation. It cannot create a deadlock because we are still
2780 running in single threaded mode essentially. Only the IO threads
2781 should be running at this stage. */
2782
2783 ut_a(srv_rollback_segments > 0);
2784 ut_a(srv_rollback_segments <= TRX_SYS_N_RSEGS);
2785
2786 /* Make sure there are enough rollback segments in each tablespace
2787 and that each rollback segment has an associated memory object.
2788 If any of these rollback segments contain undo logs, load them into
2789 the purge queue */
2790 if (!trx_rseg_adjust_rollback_segments(srv_rollback_segments)) {
2791 return (srv_init_abort(DB_ERROR));
2792 }
2793
2794 /* Any undo tablespaces under construction are now fully built
2795 with all needed rsegs. Delete the trunc.log files and clear the
2796 construction list. */
2797 srv_undo_tablespaces_mark_construction_done();
2798
2799 /* Now that all rsegs are ready for use, make them active. */
2800 undo::spaces->s_lock();
2801 for (auto undo_space : undo::spaces->m_spaces) {
2802 if (!undo_space->is_empty()) {
2803 undo_space->set_active();
2804 }
2805 }
2806 undo::spaces->s_unlock();
2807
2808 /* Undo Tablespaces and Rollback Segments are ready. */
2809 srv_startup_is_before_trx_rollback_phase = false;
2810
2811 if (!srv_read_only_mode) {
2812 if (create_new_db) {
2813 srv_buffer_pool_load_at_startup = FALSE;
2814 }
2815
2816 /* Create the thread which watches the timeouts
2817 for lock waits */
2818 srv_threads.m_lock_wait_timeout =
2819 os_thread_create(srv_lock_timeout_thread_key, lock_wait_timeout_thread);
2820
2821 srv_threads.m_lock_wait_timeout.start();
2822
2823 /* Create the thread which warns of long semaphore waits */
2824 srv_threads.m_error_monitor = os_thread_create(srv_error_monitor_thread_key,
2825 srv_error_monitor_thread);
2826
2827 srv_threads.m_error_monitor.start();
2828
2829 /* Create the thread which prints InnoDB monitor info */
2830 srv_threads.m_monitor =
2831 os_thread_create(srv_monitor_thread_key, srv_monitor_thread);
2832
2833 srv_threads.m_monitor.start();
2834
2835 srv_start_state_set(SRV_START_STATE_MONITOR);
2836 }
2837
2838 srv_sys_tablespaces_open = true;
2839
2840 /* Rotate the encryption key for recovery. It's because
2841 server could crash in middle of key rotation. Some tablespace
2842 didn't complete key rotation. Here, we will resume the
2843 rotation. */
2844 if (!srv_read_only_mode && !create_new_db &&
2845 srv_force_recovery < SRV_FORCE_NO_LOG_REDO &&
2846 !use_dumped_tablespace_keys) {
2847 if (!fil_encryption_rotate()) {
2848 ib::info(ER_IB_MSG_1146) << "fil_encryption_rotate() failed!";
2849 }
2850 }
2851
2852 srv_is_being_started = false;
2853
2854 ut_a(trx_purge_state() == PURGE_STATE_INIT);
2855
2856 /* wake main loop of page cleaner up */
2857 os_event_set(buf_flush_event);
2858
2859 /* Finish clone files recovery. This call is idempotent and is no op
2860 if it is already done before creating new log files. */
2861 clone_files_recovery(true);
2862
2863 ib::info(ER_IB_MSG_1151, INNODB_VERSION_STR,
2864 ulonglong{log_get_lsn(*log_sys)});
2865
2866 return (DB_SUCCESS);
2867 }
2868
2869 /** Applier of dynamic metadata */
2870 struct metadata_applier {
2871 /** Default constructor */
metadata_appliermetadata_applier2872 metadata_applier() {}
2873 /** Visitor.
2874 @param[in] table table to visit */
operator ()metadata_applier2875 void operator()(dict_table_t *table) const {
2876 ut_ad(dict_sys->dynamic_metadata != nullptr);
2877 ib_uint64_t autoinc = table->autoinc;
2878 dict_table_load_dynamic_metadata(table);
2879 /* For those tables which were not opened by
2880 ha_innobase::open() and not initialized by
2881 innobase_initialize_autoinc(), the next counter should be
2882 advanced properly */
2883 if (autoinc != table->autoinc && table->autoinc != ~0ULL) {
2884 ++table->autoinc;
2885 }
2886 }
2887 };
2888
2889 /** Apply the dynamic metadata to all tables */
apply_dynamic_metadata()2890 static void apply_dynamic_metadata() {
2891 const metadata_applier applier;
2892
2893 dict_sys->for_each_table(applier);
2894
2895 if (srv_dict_metadata != nullptr) {
2896 srv_dict_metadata->apply();
2897 UT_DELETE(srv_dict_metadata);
2898 srv_dict_metadata = nullptr;
2899 }
2900 }
2901
2902 /** On a restart, initialize the remaining InnoDB subsystems so that
2903 any tables (including data dictionary tables) can be accessed. */
srv_dict_recover_on_restart()2904 void srv_dict_recover_on_restart() {
2905 trx_resurrect_locks();
2906
2907 /* Roll back any recovered data dictionary transactions, so
2908 that the data dictionary tables will be free of any locks.
2909 The data dictionary latch should guarantee that there is at
2910 most one data dictionary transaction active at a time. */
2911 if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO && trx_sys_need_rollback()) {
2912 trx_rollback_or_clean_recovered(FALSE);
2913 }
2914
2915 /* Do after all DD transactions recovery, to get consistent metadata */
2916 apply_dynamic_metadata();
2917
2918 if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
2919 srv_sys_tablespaces_open = true;
2920 }
2921 }
2922
2923 /* If early redo/undo log encryption processing is done. */
is_early_redo_undo_encryption_done()2924 bool is_early_redo_undo_encryption_done() {
2925 /* Early redo/undo encryption is done during post recovery before purge
2926 thread is started. */
2927 return (srv_start_state_is_set(SRV_START_STATE_PURGE));
2928 }
2929
2930 /** Start purge threads. During upgrade we start
2931 purge threads early to apply purge. */
srv_start_purge_threads()2932 void srv_start_purge_threads() {
2933 /* Start purge threads only if they are not started earlier. */
2934 if (srv_start_state_is_set(SRV_START_STATE_PURGE)) {
2935 return;
2936 }
2937
2938 srv_threads.m_purge_coordinator =
2939 os_thread_create(srv_purge_thread_key, srv_purge_coordinator_thread);
2940
2941 srv_threads.m_purge_workers[0] = srv_threads.m_purge_coordinator;
2942
2943 /* We've already created the purge coordinator thread above. */
2944 for (size_t i = 1; i < srv_threads.m_purge_workers_n; ++i) {
2945 srv_threads.m_purge_workers[i] =
2946 os_thread_create(srv_worker_thread_key, srv_worker_thread);
2947 }
2948
2949 for (size_t i = 0; i < srv_threads.m_purge_workers_n; ++i) {
2950 srv_threads.m_purge_workers[i].start();
2951 }
2952
2953 srv_start_wait_for_purge_to_start();
2954
2955 srv_start_state_set(SRV_START_STATE_PURGE);
2956 }
2957
2958 /** Start up the remaining InnoDB service threads.
2959 @param[in] bootstrap True if this is in bootstrap */
srv_start_threads(bool bootstrap)2960 void srv_start_threads(bool bootstrap) {
2961 if (!srv_read_only_mode) {
2962 /* Before 8.0, it was master thread that was doing periodical
2963 checkpoints (every 7s). Since 8.0, it is the log checkpointer
2964 thread, which is owned by log_sys, that is responsible for
2965 periodical checkpoints (every innodb_log_checkpoint_every ms).
2966 Note that the log checkpointer thread was created earlier and
2967 is already active, but the periodical checkpoints were disabled.
2968 Only the required checkpoints were allowed, which includes:
2969 - checkpoints because of too old last_checkpoint_lsn,
2970 - checkpoints explicitly requested (because of call to
2971 log_make_latest_checkpoint()).
2972 The reason was to make the situation more deterministic during
2973 the startup, because then:
2974 - it is easier to write mtr tests,
2975 - there are less possible flows - smaller risk of bug.
2976 Now we start allowing periodical checkpoints! Since now, it's
2977 hard to predict when checkpoints are written! */
2978 log_limits_mutex_enter(*log_sys);
2979 log_sys->periodical_checkpoints_enabled = true;
2980 log_limits_mutex_exit(*log_sys);
2981 }
2982
2983 srv_threads.m_buf_resize =
2984 os_thread_create(buf_resize_thread_key, buf_resize_thread);
2985
2986 srv_threads.m_buf_resize.start();
2987
2988 if (srv_read_only_mode) {
2989 purge_sys->state = PURGE_STATE_DISABLED;
2990 return;
2991 }
2992
2993 if (!srv_apply_log_only && !bootstrap &&
2994 srv_force_recovery < SRV_FORCE_NO_TRX_UNDO && trx_sys_need_rollback()) {
2995 /* Rollback all recovered transactions that are
2996 not in committed nor in XA PREPARE state. */
2997 srv_threads.m_trx_recovery_rollback = os_thread_create(
2998 trx_recovery_rollback_thread_key, trx_recovery_rollback_thread);
2999
3000 srv_threads.m_trx_recovery_rollback.start();
3001 }
3002
3003 /* Create the master thread which does purge and other utility
3004 operations */
3005 srv_threads.m_master =
3006 os_thread_create(srv_master_thread_key, srv_master_thread);
3007
3008 srv_start_state_set(SRV_START_STATE_MASTER);
3009
3010 srv_threads.m_master.start();
3011
3012 if (srv_force_recovery == 0) {
3013 /* In the insert buffer we may have even bigger tablespace
3014 id's, because we may have dropped those tablespaces, but
3015 insert buffer merge has not had time to clean the records from
3016 the ibuf tree. */
3017
3018 ibuf_update_max_tablespace_id();
3019 }
3020
3021 /* Create the dict stats gathering thread */
3022 srv_threads.m_dict_stats =
3023 os_thread_create(dict_stats_thread_key, dict_stats_thread);
3024
3025 dict_stats_thread_init();
3026
3027 srv_threads.m_dict_stats.start();
3028
3029 /* Create the thread that will optimize the FTS sub-system. */
3030 fts_optimize_init();
3031
3032 srv_start_state_set(SRV_START_STATE_STAT);
3033 }
3034
srv_start_threads_after_ddl_recovery()3035 void srv_start_threads_after_ddl_recovery() {
3036 /* Start the buffer pool dump/load thread, which will access spaces thus
3037 must wait for DDL recovery */
3038 srv_threads.m_buf_dump =
3039 os_thread_create(buf_dump_thread_key, buf_dump_thread);
3040
3041 srv_threads.m_buf_dump.start();
3042
3043 /* Resume unfinished (un)encryption process in background thread. */
3044 if (!ts_encrypt_ddl_records.empty()) {
3045 srv_threads.m_ts_alter_encrypt =
3046 os_thread_create(srv_ts_alter_encrypt_thread_key,
3047 fsp_init_resume_alter_encrypt_tablespace);
3048
3049 srv_threads.m_ts_alter_encrypt.start();
3050
3051 /* Wait till shared MDL is taken by background thread for all tablespaces,
3052 for which (un)encryption is to be rolled forward. */
3053 mysql_mutex_lock(&resume_encryption_cond_m);
3054 mysql_cond_wait(&resume_encryption_cond, &resume_encryption_cond_m);
3055 mysql_mutex_unlock(&resume_encryption_cond_m);
3056 }
3057
3058 /* Start and consume all GTIDs for recovered transactions. */
3059 auto >id_persistor = clone_sys->get_gtid_persistor();
3060 gtid_persistor.start();
3061
3062 DBUG_EXECUTE_IF("crash_before_purge_thread", DBUG_SUICIDE(););
3063
3064 /* Now the InnoDB Metadata and file system should be consistent.
3065 Start the Purge thread */
3066 srv_start_purge_threads();
3067 }
3068
3069 #if 0
3070 /********************************************************************
3071 Sync all FTS cache before shutdown */
3072 static
3073 void
3074 srv_fts_close(void)
3075 {
3076 dict_table_t* table;
3077
3078 for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3079 table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
3080 fts_t* fts = table->fts;
3081
3082 if (fts != NULL) {
3083 fts_sync_table(table);
3084 }
3085 }
3086
3087 for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
3088 table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
3089 fts_t* fts = table->fts;
3090
3091 if (fts != NULL) {
3092 fts_sync_table(table);
3093 }
3094 }
3095 }
3096 #endif
3097
3098 /** Set srv_shutdown_state to a given state and validate change is proper.
3099 @remarks This function is used only from the main thread, and only during
3100 startup or shutdown.
3101 @param[in] new_state new state to set */
srv_shutdown_set_state(srv_shutdown_t new_state)3102 static void srv_shutdown_set_state(srv_shutdown_t new_state) {
3103 ut_a(static_cast<int>(srv_shutdown_state.load()) + 1 ==
3104 static_cast<int>(new_state));
3105
3106 srv_shutdown_state.store(new_state);
3107 }
3108
3109 static void srv_shutdown_cleanup_and_master_stop();
3110
srv_shutdown_waits_for_rollback_of_recovered_transactions()3111 bool srv_shutdown_waits_for_rollback_of_recovered_transactions() {
3112 return (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO && srv_fast_shutdown == 0);
3113 }
3114
3115 /** Shut down all InnoDB background tasks that may look up objects in
3116 the data dictionary. */
srv_pre_dd_shutdown()3117 void srv_pre_dd_shutdown() {
3118 ut_a(srv_shutdown_state.load() == SRV_SHUTDOWN_NONE);
3119
3120 /* Warn and wait if there are still some query threads alive.
3121 If all is correct, then all user threads should already be gone,
3122 because before clean_up() -> srv_pre_dd_shutdown() is called,
3123 we are joining signal_hand thread, which before exiting waits
3124 for all connections to be closed (close_connections()). */
3125 for (size_t count = 0; count < 10; ++count) {
3126 const auto threads_count = srv_conc_get_active_threads();
3127 if (threads_count == 0) {
3128 break;
3129 }
3130 ib::warn(ER_IB_MSG_1154, ulonglong{threads_count});
3131 os_thread_sleep(1000000); // 1s
3132 }
3133 /* Crash if some query threads are still alive. */
3134 ut_a(srv_conc_get_active_threads() == 0);
3135
3136 ut_a(!srv_thread_is_active(srv_threads.m_recv_writer));
3137
3138 /* This assertion is waiting for fix, because currently purge threads might
3139 create transactions belonging to mysql_trx_list (found by Nikša, thanks!)
3140 trx_sys_before_pre_dd_shutdown_validate(); */
3141
3142 /* Avoid fast shutdown, if redo logging is disabled. Otherwise, we won't be
3143 able to recover. */
3144 if (mtr_t::s_logging.is_disabled() && srv_fast_shutdown == 2) {
3145 ib::warn(ER_IB_WRN_FAST_SHUTDOWN_REDO_DISABLED);
3146 srv_fast_shutdown = 1;
3147 }
3148
3149 /* Stop service for persisting GTID */
3150 auto >id_persistor = clone_sys->get_gtid_persistor();
3151 gtid_persistor.stop();
3152
3153 if (srv_read_only_mode) {
3154 /* Check that goal of SRV_SHUTDOWN_RECOVERY_ROLLBACK is reached:
3155 1. In read-only mode, no rollbacks should be executed.
3156 2. The trx_recovery_rollback thread should not be started. */
3157 ut_ad(trx_sys_recovered_active_trxs_count() == 0);
3158 ut_a(!srv_thread_is_active(srv_threads.m_trx_recovery_rollback));
3159
3160 /* Check the goal of SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS,
3161 the following threads should not be started in read-only mode: */
3162 ut_a(!srv_thread_is_active(srv_threads.m_dict_stats));
3163 ut_a(!srv_thread_is_active(srv_threads.m_fts_optimize));
3164 ut_a(!srv_thread_is_active(srv_threads.m_ts_alter_encrypt));
3165
3166 /* In read-only mode, there is no master thread. */
3167 ut_a(!srv_thread_is_active(srv_threads.m_master));
3168
3169 /* In read-only mode, no purge should be done, so goal of the
3170 SRV_SHUTDOWN_PURGE is already satisfied (no purge threads). */
3171 ut_a(!srv_purge_threads_active());
3172
3173 /* Advance quickly through all states to SRV_SHUTDOWN_DD. */
3174 srv_shutdown_set_state(SRV_SHUTDOWN_RECOVERY_ROLLBACK);
3175 srv_shutdown_set_state(SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS);
3176 srv_shutdown_set_state(SRV_SHUTDOWN_PURGE);
3177 srv_shutdown_set_state(SRV_SHUTDOWN_DD);
3178 return;
3179 }
3180
3181 srv_shutdown_set_state(SRV_SHUTDOWN_RECOVERY_ROLLBACK);
3182
3183 if (srv_shutdown_waits_for_rollback_of_recovered_transactions()) {
3184 /* We need to wait for rollback of recovered transactions. */
3185 for (uint32_t count = 0;; ++count) {
3186 const auto total_trx = trx_sys_recovered_active_trxs_count();
3187 if (total_trx == 0) {
3188 break;
3189 }
3190 if (count >= SHUTDOWN_SLEEP_ROUNDS) {
3191 ib::info(ER_IB_MSG_1249, total_trx);
3192 count = 0;
3193 }
3194 os_thread_sleep(SHUTDOWN_SLEEP_TIME_US);
3195 }
3196 }
3197
3198 if (srv_thread_is_active(srv_threads.m_trx_recovery_rollback)) {
3199 /* We should wait until rollback after recovery end to avoid
3200 adding more for purge and to avoid touching transaction objects
3201 since this point. */
3202 srv_threads.m_trx_recovery_rollback.wait();
3203 }
3204
3205 srv_shutdown_set_state(SRV_SHUTDOWN_PRE_DD_AND_SYSTEM_TRANSACTIONS);
3206
3207 if (srv_start_state_is_set(SRV_START_STATE_STAT)) {
3208 fts_optimize_shutdown();
3209 dict_stats_shutdown();
3210 dict_stats_thread_deinit();
3211 }
3212 ut_a(!srv_thread_is_active(srv_threads.m_fts_optimize));
3213 ut_a(!srv_thread_is_active(srv_threads.m_dict_stats));
3214
3215 for (uint32_t count = 1; srv_thread_is_active(srv_threads.m_ts_alter_encrypt);
3216 ++count) {
3217 if (count % SHUTDOWN_SLEEP_ROUNDS == 0) {
3218 ib::info(ER_IB_MSG_WAIT_FOR_ENCRYPT_THREAD);
3219 }
3220 os_thread_sleep(SHUTDOWN_SLEEP_TIME_US);
3221 }
3222
3223 /* Wait until the master thread exits its main loop and notices that:
3224 - it should do shutdown-cleanup,
3225 - and still is allowed to access DD objects. */
3226 if (srv_thread_is_active(srv_threads.m_master)) {
3227 srv_wake_master_thread();
3228 os_event_wait(srv_threads.m_master_ready_for_dd_shutdown);
3229 }
3230
3231 /* Since this point we do not expect accesses to DD coming from InnoDB. */
3232
3233 srv_shutdown_set_state(SRV_SHUTDOWN_PURGE);
3234
3235 for (uint32_t count = 1; srv_purge_threads_active(); ++count) {
3236 srv_purge_wakeup();
3237 if (count % SHUTDOWN_SLEEP_ROUNDS == 0) {
3238 ib::info(ER_IB_MSG_1152);
3239 }
3240 os_thread_sleep(SHUTDOWN_SLEEP_TIME_US);
3241 }
3242 switch (trx_purge_state()) {
3243 case PURGE_STATE_INIT:
3244 case PURGE_STATE_EXIT:
3245 case PURGE_STATE_DISABLED:
3246 srv_start_state &= ~SRV_START_STATE_PURGE;
3247 break;
3248 case PURGE_STATE_RUN:
3249 case PURGE_STATE_STOP:
3250 ut_ad(0);
3251 }
3252
3253 /* After this phase plugins are asked to be shut down, in which case they
3254 will be marked as DELETED. Note: we cannot leave any transaction in the THD,
3255 because the mechanism which cleans resources in THD would not be able to
3256 unregister those transactions from mysql_trx_list, because the handler
3257 of close_connection in InnoDB handlerton would not be called, because
3258 InnoDB has already been marked as DELETED. You should close your thread
3259 here, in the srv_pre_dd_shutdown, if it might do lookups in DD objects.
3260 No other transactions should be useful, so for sake of simplicity we
3261 require to have no transactions at all here, except transactions:
3262 - with state = TRX_STATE_PREPARED,
3263 - with state = TRX_STATE_ACTIVE and with is_recovered == true */
3264
3265 trx_sys_after_pre_dd_shutdown_validate();
3266
3267 srv_shutdown_set_state(SRV_SHUTDOWN_DD);
3268
3269 DBUG_EXECUTE_IF("wait_for_threads_in_pre_dd_shutdown",
3270 srv_shutdown_cleanup_and_master_stop(););
3271 }
3272
3273 /** Shutdown background threads of InnoDB at the start of the shutdown phase.
3274 Handles shutdown phases: SRV_SHUTDOWN_CLEANUP and SRV_SHUTDOWN_MASTER_STOP. */
srv_shutdown_cleanup_and_master_stop()3275 static void srv_shutdown_cleanup_and_master_stop() {
3276 DBUG_EXECUTE_IF("threads_wait_on_cleanup",
3277 os_event_set(srv_threads.m_shutdown_cleanup_dbg););
3278
3279 ut_a(srv_shutdown_state.load() == SRV_SHUTDOWN_DD);
3280
3281 srv_shutdown_set_state(SRV_SHUTDOWN_CLEANUP);
3282
3283 struct Thread_to_stop {
3284 /** Name of the thread, printed to the error log if we waited too
3285 long (after 60 seconds and then every 60 seconds). */
3286 const char *m_name;
3287
3288 /** Future which allows to check if given task is completed. */
3289 const IB_thread &m_thread;
3290
3291 /** Function which can be called any number of times to wake
3292 the possibly waiting thread, so it could exit. */
3293 std::function<void()> m_notify;
3294
3295 /** Shutdown state in which we are waiting until thread is exited
3296 (earlier we keep notifying but we don't require it to exit before
3297 we may switch to the next state). */
3298 srv_shutdown_t m_wait_on_state;
3299 };
3300
3301 const Thread_to_stop threads_to_stop[]{
3302
3303 {"lock_wait_timeout", srv_threads.m_lock_wait_timeout,
3304 lock_set_timeout_event, SRV_SHUTDOWN_CLEANUP},
3305
3306 {"error_monitor", srv_threads.m_error_monitor,
3307 std::bind(os_event_set, srv_error_event), SRV_SHUTDOWN_CLEANUP},
3308
3309 {"monitor", srv_threads.m_monitor,
3310 std::bind(os_event_set, srv_monitor_event), SRV_SHUTDOWN_CLEANUP},
3311
3312 {"buf_dump", srv_threads.m_buf_dump,
3313 std::bind(os_event_set, srv_buf_dump_event), SRV_SHUTDOWN_CLEANUP},
3314
3315 {"buf_resize", srv_threads.m_buf_resize,
3316 std::bind(os_event_set, srv_buf_resize_event), SRV_SHUTDOWN_CLEANUP},
3317
3318 {"master", srv_threads.m_master, srv_wake_master_thread,
3319 SRV_SHUTDOWN_MASTER_STOP}};
3320
3321 const srv_shutdown_t max_wait_on_state{SRV_SHUTDOWN_MASTER_STOP};
3322
3323 uint32_t count = 0;
3324
3325 for (;;) {
3326 /* Print messages every 60 seconds when we are waiting for any
3327 of those threads to exit. */
3328 bool print;
3329 if (count >= SHUTDOWN_SLEEP_ROUNDS) {
3330 print = true;
3331 count = 0;
3332 } else {
3333 print = false;
3334 }
3335
3336 size_t active_found = 0;
3337 for (const auto &thread_info : threads_to_stop) {
3338 ut_a(thread_info.m_wait_on_state <= max_wait_on_state);
3339 if (thread_info.m_wait_on_state == srv_shutdown_state.load() &&
3340 srv_thread_is_active(thread_info.m_thread)) {
3341 ++active_found;
3342 if (print) {
3343 ib::info(ER_IB_MSG_1248, thread_info.m_name);
3344 }
3345 thread_info.m_notify();
3346 }
3347 }
3348
3349 if (active_found == 0) {
3350 if (srv_shutdown_state.load() == max_wait_on_state) {
3351 break;
3352 }
3353 srv_shutdown_set_state(static_cast<srv_shutdown_t>(
3354 static_cast<int>(srv_shutdown_state.load()) + 1));
3355 }
3356
3357 os_thread_sleep(SHUTDOWN_SLEEP_TIME_US);
3358 ++count;
3359 }
3360
3361 ut_a(srv_shutdown_state.load() == SRV_SHUTDOWN_MASTER_STOP);
3362
3363 trx_sys_after_background_threads_shutdown_validate();
3364 }
3365
3366 /** Waits for page cleaners exit. */
srv_shutdown_page_cleaners()3367 static void srv_shutdown_page_cleaners() {
3368 ut_a(srv_shutdown_state.load() == SRV_SHUTDOWN_MASTER_STOP);
3369 ut_a(!srv_master_thread_is_active());
3370
3371 srv_shutdown_set_state(SRV_SHUTDOWN_FLUSH_PHASE);
3372
3373 /* At this point only page_cleaner should be active. We wait
3374 here to let it complete the flushing of the buffer pools
3375 before proceeding further. */
3376
3377 for (uint32_t count = 0; buf_flush_page_cleaner_is_active(); ++count) {
3378 if (count >= SHUTDOWN_SLEEP_ROUNDS) {
3379 ib::info(ER_IB_MSG_1251);
3380 count = 0;
3381 }
3382 os_event_set(buf_flush_event);
3383 os_thread_sleep(SHUTDOWN_SLEEP_TIME_US);
3384 }
3385
3386 for (uint32_t count = 0;; ++count) {
3387 const ulint pending_io = buf_pool_check_no_pending_io();
3388
3389 if (pending_io == 0) {
3390 break;
3391 }
3392
3393 if (count >= SHUTDOWN_SLEEP_ROUNDS) {
3394 ib::info(ER_IB_MSG_1252, pending_io);
3395 count = 0;
3396 }
3397 os_thread_sleep(SHUTDOWN_SLEEP_TIME_US);
3398 }
3399 }
3400
3401 /** Closes redo log. If this is not fast shutdown, it forces to write a
3402 checkpoint which should be written for logically empty redo log. Note that we
3403 forced to flush all dirty pages in the last stages of page cleaners activity
3404 (unless it was fast shutdown). After checkpoint is written, the flushed_lsn is
3405 updated within header of the system tablespace. This is lsn of the last clean
3406 shutdown. */
srv_shutdown_log()3407 static lsn_t srv_shutdown_log() {
3408 ut_a(srv_shutdown_state.load() == SRV_SHUTDOWN_FLUSH_PHASE);
3409 ut_a(!buf_flush_page_cleaner_is_active());
3410 ut_a(buf_pool_check_no_pending_io() == 0);
3411
3412 if (srv_fast_shutdown == 2) {
3413 if (!srv_read_only_mode) {
3414 ib::info(ER_IB_MSG_1253);
3415
3416 /* In this fastest shutdown we do not flush the
3417 buffer pool:
3418
3419 it is essentially a 'crash' of the InnoDB server.
3420 Make sure that the log is all flushed to disk, so
3421 that we can recover all committed transactions in
3422 a crash recovery. We must not write the lsn stamps
3423 to the data files, since at a startup InnoDB deduces
3424 from the stamps if the previous shutdown was clean. */
3425
3426 log_stop_background_threads(*log_sys);
3427 }
3428
3429 /* No redo log might be generated since now. */
3430 log_background_threads_inactive_validate(*log_sys);
3431
3432 srv_shutdown_set_state(SRV_SHUTDOWN_LAST_PHASE);
3433
3434 return (log_get_lsn(*log_sys));
3435 }
3436
3437 if (!srv_read_only_mode) {
3438 while (log_make_latest_checkpoint(*log_sys)) {
3439 /* It could happen, that when writing a new checkpoint,
3440 DD dynamic metadata was persisted, making some pages
3441 dirty (with the persisted data) and writing new redo
3442 records to protect those modifications. In such case,
3443 current lsn would be higher than lsn and we would need
3444 another iteration to ensure, that checkpoint lsn points
3445 to the newest lsn. */
3446 }
3447
3448 log_stop_background_threads(*log_sys);
3449 }
3450
3451 /* No redo log might be generated since now. */
3452 log_background_threads_inactive_validate(*log_sys);
3453 buf_must_be_all_freed();
3454
3455 const lsn_t lsn = log_get_lsn(*log_sys);
3456
3457 if (!srv_read_only_mode) {
3458 fil_flush_file_spaces(to_int(FIL_TYPE_TABLESPACE) | to_int(FIL_TYPE_LOG));
3459 }
3460
3461 srv_shutdown_set_state(SRV_SHUTDOWN_LAST_PHASE);
3462
3463 if (srv_downgrade_logs) {
3464 ut_a(!srv_read_only_mode);
3465
3466 log_files_downgrade(*log_sys);
3467
3468 fil_flush_file_redo();
3469 }
3470
3471 /* Validate lsn and write it down. */
3472 ut_a(log_lsn_validate(lsn) || srv_force_recovery >= SRV_FORCE_NO_LOG_REDO);
3473
3474 ut_a(lsn == log_sys->last_checkpoint_lsn.load() ||
3475 srv_force_recovery >= SRV_FORCE_NO_LOG_REDO);
3476
3477 ut_a(lsn == log_get_lsn(*log_sys));
3478
3479 if (!srv_read_only_mode) {
3480 ut_a(srv_force_recovery < SRV_FORCE_NO_LOG_REDO);
3481
3482 auto err = fil_write_flushed_lsn(lsn);
3483 ut_a(err == DB_SUCCESS);
3484 }
3485
3486 buf_must_be_all_freed();
3487 ut_a(lsn == log_get_lsn(*log_sys));
3488
3489 return (lsn);
3490 }
3491
3492 /** Copy all remaining data and shutdown archiver threads. */
srv_shutdown_arch()3493 static void srv_shutdown_arch() {
3494 uint32_t count = 0;
3495
3496 while (arch_wake_threads()) {
3497 ++count;
3498 os_thread_sleep(SHUTDOWN_SLEEP_TIME_US);
3499
3500 if (count > SHUTDOWN_SLEEP_ROUNDS) {
3501 ib::info(ER_IB_MSG_1246);
3502 count = 0;
3503 }
3504 }
3505 }
3506
srv_thread_delay_cleanup_if_needed(bool wait_for_signal)3507 void srv_thread_delay_cleanup_if_needed(bool wait_for_signal) {
3508 DBUG_EXECUTE_IF("threads_wait_on_cleanup", {
3509 if (wait_for_signal) {
3510 os_event_wait(srv_threads.m_shutdown_cleanup_dbg);
3511 } else {
3512 /* In some cases we cannot wait for the signal, because we would otherwise
3513 never reach the end of pre_dd_shutdown, becase pre_dd_shutdown is waiting
3514 for this thread before it ends. Then we would never reach shutdown phase
3515 in which the signal becomes signalled. Still we would like to have a way
3516 to detect situation in which someone broke the code and pre_dd_shutdown
3517 no longer waits for this thread. */
3518 os_thread_sleep(1000);
3519 }
3520 });
3521 }
3522
3523 /** Shut down the InnoDB database. */
srv_shutdown()3524 void srv_shutdown() {
3525 trx_sys_after_pre_dd_shutdown_validate();
3526
3527 /* Need to revert partition file names if minor upgrade fails. */
3528 uint data_version = MYSQL_VERSION_ID;
3529
3530 if (!fsp_header_dict_get_server_version(&data_version) &&
3531 data_version != MYSQL_VERSION_ID) {
3532 srv_downgrade_partition_files = true;
3533 }
3534
3535 ib::info(ER_IB_MSG_1247);
3536
3537 ut_a(!srv_is_being_started);
3538
3539 /* Ensure threads below have been stopped. */
3540 const auto threads_stopped_before_shutdown = {
3541 std::cref(srv_threads.m_purge_coordinator),
3542 std::cref(srv_threads.m_ts_alter_encrypt),
3543 std::cref(srv_threads.m_fts_optimize),
3544 std::cref(srv_threads.m_recv_writer),
3545 std::cref(srv_threads.m_dict_stats)};
3546
3547 for (const auto &thread : threads_stopped_before_shutdown) {
3548 ut_a(!srv_thread_is_active(thread));
3549 }
3550
3551 #ifdef UNIV_DEBUG
3552 /* In DEBUG we might be testing scenario in which we forced to
3553 call srv_shutdown_cleanup_and_master_stop() to stop all threads
3554 at the end of the srv_pre_dd_shutdown(). */
3555 DBUG_EXECUTE_IF("wait_for_threads_in_pre_dd_shutdown",
3556 srv_shutdown_state.store(SRV_SHUTDOWN_DD););
3557 #endif /* UNIV_DEBUG */
3558
3559 /* The SRV_SHUTDOWN_DD state was set during pre_dd_shutdown phase. */
3560 ut_a(srv_shutdown_state.load() == SRV_SHUTDOWN_DD);
3561
3562 /* Write dynamic metadata to DD buffer table. */
3563 dict_persist_to_dd_table_buffer();
3564
3565 /* 0. Stop remaining background threads except:
3566 - page-cleaners - we are shutting down page cleaners in step 1
3567 - redo-log-threads - these need to be shutdown after page cleaners,
3568 - archiver threads - these need to be shutdown after redo threads.
3569 After this call the state of shutdown is advanced to SRV_SHUTDOWN_MASTER_STOP.
3570 */
3571 srv_shutdown_cleanup_and_master_stop();
3572
3573 ut_a(srv_shutdown_state.load() == SRV_SHUTDOWN_MASTER_STOP);
3574
3575 /* Check again and write dynamic metadata to DD buffer table. Ideally we
3576 would not have dynamic metadata written so late in shutdown phase but
3577 currently we have certain operations done in master thread which could
3578 generate metadata. It is safe to check and write it here before we flush
3579 buffer pool to disk. */
3580 dict_persist_to_dd_table_buffer();
3581
3582 /* The steps 1-4 is the real InnoDB shutdown.
3583 All before was to stop activity which could produce new changes.
3584 All after is just cleaning up (freeing memory). */
3585
3586 /* 1. Flush the buffer pool to disk. */
3587 srv_shutdown_page_cleaners();
3588
3589 ut_a(srv_shutdown_state.load() == SRV_SHUTDOWN_FLUSH_PHASE);
3590
3591 /* 2. Write the current lsn to the tablespace header(s). */
3592 const lsn_t shutdown_lsn = srv_shutdown_log();
3593
3594 ut_a(srv_shutdown_state.load() == SRV_SHUTDOWN_LAST_PHASE);
3595
3596 /* 3. Close all opened files. */
3597 ibt::close_files();
3598 fil_close_all_files();
3599 if (srv_monitor_file) {
3600 fclose(srv_monitor_file);
3601 }
3602 if (srv_misc_tmpfile) {
3603 fclose(srv_misc_tmpfile);
3604 }
3605
3606 /* 4. Copy all log data to archive and stop archiver threads. */
3607 srv_shutdown_arch();
3608
3609 /* This is to preserve the old style, we should finally get rid of the call
3610 here. For that, we need to ensure we have already effectively closed all
3611 threads. */
3612 srv_shutdown_exit_threads();
3613
3614 ut_a(srv_shutdown_state.load() == SRV_SHUTDOWN_EXIT_THREADS);
3615 ut_ad(!os_thread_any_active());
3616
3617 /* 5. Free all the resources acquired by InnoDB (mutexes, events, memory). */
3618 ibt::delete_pool_manager();
3619
3620 if (srv_monitor_file) {
3621 srv_monitor_file = nullptr;
3622 if (srv_monitor_file_name) {
3623 unlink(srv_monitor_file_name);
3624 ut_free(srv_monitor_file_name);
3625 }
3626 mutex_free(&srv_monitor_file_mutex);
3627 }
3628
3629 if (srv_misc_tmpfile) {
3630 srv_misc_tmpfile = nullptr;
3631 mutex_free(&srv_misc_tmpfile_mutex);
3632 }
3633
3634 /* This must be disabled before closing the buffer pool
3635 and closing the data dictionary. */
3636 btr_search_disable(true);
3637
3638 ibuf_close();
3639 ddl_log_close();
3640 log_sys_close();
3641 recv_sys_close();
3642 trx_sys_close();
3643 lock_sys_close();
3644 trx_pool_close();
3645
3646 dict_close();
3647 dict_persist_close();
3648 btr_search_sys_free();
3649 undo_spaces_deinit();
3650
3651 UT_DELETE(srv_dict_metadata);
3652
3653 os_aio_free();
3654 que_close();
3655 row_mysql_close();
3656 srv_free();
3657 fil_close();
3658 pars_close();
3659
3660 pars_lexer_close();
3661 buf_pool_free_all();
3662
3663 /* 6. Free the thread management resoruces. */
3664 clone_free();
3665 arch_free();
3666
3667 dblwr::close();
3668 os_thread_close();
3669
3670 /* 6. Free the synchronisation infrastructure. */
3671 sync_check_close();
3672
3673 ib::info(ER_IB_MSG_1155, ulonglong{shutdown_lsn});
3674
3675 srv_start_has_been_called = false;
3676 srv_start_state = SRV_START_STATE_NONE;
3677 }
3678
srv_get_encryption_data_filename(dict_table_t * table,char * filename,ulint max_len)3679 void srv_get_encryption_data_filename(dict_table_t *table, char *filename,
3680 ulint max_len) {
3681 /* Make sure the data_dir_path is set. */
3682 dd_get_and_save_data_dir_path<dd::Table>(table, nullptr, false);
3683
3684 std::string path = dict_table_get_datadir(table);
3685
3686 auto filepath = Fil_path::make(path, table->name.m_name, CFP, true);
3687
3688 size_t len = strlen(filepath);
3689 ut_a(max_len >= len);
3690
3691 strcpy(filename, filepath);
3692
3693 ut_free(filepath);
3694 }
3695
3696 /** Call std::_Exit(3) */
srv_fatal_error()3697 void srv_fatal_error() {
3698 ib::error(ER_IB_MSG_1156);
3699
3700 fflush(stderr);
3701
3702 ut_d(innodb_calling_exit = true);
3703
3704 flush_error_log_messages();
3705
3706 std::_Exit(3);
3707 }
3708