1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2021, Oracle and/or its affiliates.
4 Copyright (c) 2008, Google Inc.
5 Copyright (c) 2009, 2016, Percona Inc.
6 
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12 
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19 
20 This program is free software; you can redistribute it and/or modify
21 it under the terms of the GNU General Public License, version 2.0,
22 as published by the Free Software Foundation.
23 
24 This program is also distributed with certain software (including
25 but not limited to OpenSSL) that is licensed under separate terms,
26 as designated in a particular file or component or in included license
27 documentation.  The authors of MySQL hereby grant you an additional
28 permission to link the program and your derivative works with the
29 separately licensed software that they have included with MySQL.
30 
31 This program is distributed in the hope that it will be useful,
32 but WITHOUT ANY WARRANTY; without even the implied warranty of
33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34 GNU General Public License, version 2.0, for more details.
35 
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
39 
40 *****************************************************************************/
41 
42 /********************************************************************//**
43 @file srv/srv0start.cc
44 Starts the InnoDB database server
45 
46 Created 2/16/1996 Heikki Tuuri
47 *************************************************************************/
48 
49 #include "my_global.h"
50 
51 #include "ha_prototypes.h"
52 
53 #include "mysqld.h"
54 #include "mysql/psi/mysql_stage.h"
55 #include "mysql/psi/psi.h"
56 
57 #include "row0ftsort.h"
58 #include "ut0mem.h"
59 #include "mem0mem.h"
60 #include "data0data.h"
61 #include "data0type.h"
62 #include "dict0dict.h"
63 #include "buf0buf.h"
64 #include "buf0dump.h"
65 #include "os0file.h"
66 #include "os0thread.h"
67 #include "fil0fil.h"
68 #include "fil0crypt.h"
69 #include "fsp0fsp.h"
70 #include "rem0rec.h"
71 #include "mtr0mtr.h"
72 #include "log0log.h"
73 #include "log0online.h"
74 #include "log0recv.h"
75 #include "page0page.h"
76 #include "page0cur.h"
77 #include "trx0trx.h"
78 #include "trx0sys.h"
79 #include "btr0btr.h"
80 #include "btr0cur.h"
81 #include "btr0scrub.h"
82 #include "rem0rec.h"
83 #include "ibuf0ibuf.h"
84 #include "srv0start.h"
85 #include "srv0srv.h"
86 #include "fsp0sysspace.h"
87 #include "row0trunc.h"
88 #ifndef UNIV_HOTBACKUP
89 # include "trx0rseg.h"
90 # include "os0proc.h"
91 # include "buf0flu.h"
92 # include "buf0rea.h"
93 # include "dict0boot.h"
94 # include "dict0load.h"
95 # include "dict0stats_bg.h"
96 # include "que0que.h"
97 # include "usr0sess.h"
98 # include "lock0lock.h"
99 # include "trx0roll.h"
100 # include "trx0purge.h"
101 # include "lock0lock.h"
102 # include "pars0pars.h"
103 # include "btr0sea.h"
104 # include "rem0cmp.h"
105 # include "dict0crea.h"
106 # include "row0ins.h"
107 # include "row0sel.h"
108 # include "row0upd.h"
109 # include "row0row.h"
110 # include "row0mysql.h"
111 # include "row0trunc.h"
112 # include "btr0pcur.h"
113 # include "os0event.h"
114 # include "zlib.h"
115 # include "ut0crc32.h"
116 # include "ut0new.h"
117 
118 #ifdef HAVE_LZO1X
119 #include <lzo/lzo1x.h>
120 extern bool srv_lzo_disabled;
121 #endif /* HAVE_LZO1X */
122 
123 /** Log sequence number immediately after startup */
124 lsn_t	srv_start_lsn;
125 /** Log sequence number at shutdown */
126 lsn_t	srv_shutdown_lsn;
127 
128 /** TRUE if a raw partition is in use */
129 ibool	srv_start_raw_disk_in_use = FALSE;
130 
131 /** UNDO tablespaces starts with space id. */
132 ulint	srv_undo_space_id_start;
133 
134 /** Number of IO threads to use */
135 ulint	srv_n_file_io_threads = 0;
136 
137 /** TRUE if the server is being started, before rolling back any
138 incomplete transactions */
139 bool	srv_startup_is_before_trx_rollback_phase = false;
140 /** TRUE if the server is being started */
141 bool	srv_is_being_started = false;
142 /** TRUE if SYS_TABLESPACES is available for lookups */
143 bool	srv_sys_tablespaces_open = false;
144 /** TRUE if the server was successfully started */
145 ibool	srv_was_started = FALSE;
146 /** TRUE if innobase_start_or_create_for_mysql() has been called */
147 static ibool	srv_start_has_been_called = FALSE;
148 
149 /** Bit flags for tracking background thread creation. They are used to
150 determine which threads need to be stopped if we need to abort during
151 the initialisation step. */
152 enum srv_start_state_t {
153 	SRV_START_STATE_NONE = 0,		/*!< No thread started */
154 	SRV_START_STATE_LOCK_SYS = 1,		/*!< Started lock-timeout
155 						thread. */
156 	SRV_START_STATE_IO = 2,			/*!< Started IO threads */
157 	SRV_START_STATE_MONITOR = 4,		/*!< Started montior thread */
158 	SRV_START_STATE_MASTER = 8,		/*!< Started master threadd. */
159 	SRV_START_STATE_PURGE = 16,		/*!< Started purge thread(s) */
160 	SRV_START_STATE_STAT = 32		/*!< Started bufdump + dict stat
161 						and FTS optimize thread. */
162 };
163 
164 /** Track server thrd starting phases */
165 static ulint	srv_start_state;
166 
167 /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
168 SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
169 enum srv_shutdown_t	srv_shutdown_state = SRV_SHUTDOWN_NONE;
170 
171 /** Files comprising the system tablespace */
172 static pfs_os_file_t	files[1000];
173 
174 /** io_handler_thread parameters for thread identification */
175 static ulint		n[SRV_MAX_N_IO_THREADS];
176 /** io_handler_thread identifiers, 32 is the maximum number of purge threads.
177 The extra elements at the end are allocated as follows:
178 SRV_MAX_N_IO_THREADS + 1: srv_master_thread
179 SRV_MAX_N_IO_THREADS + 2: lock_wait_timeout_thread
180 SRV_MAX_N_IO_THREADS + 3: srv_error_monitor_thread
181 SRV_MAX_N_IO_THREADS + 4: srv_monitor_thread
182 SRV_MAX_N_IO_THREADS + 5: srv_redo_log_follow_thread
183 SRV_MAX_N_IO_THREADS + 6: srv_purge_coordinator_thread
184 SRV_MAX_N_IO_THREADS + 7: srv_worker_thread
185 ...
186 SRV_MAX_N_IO_THREADS + 7 + srv_n_purge_threads - 1: srv_worker_thread */
187 static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 7
188 				   + SRV_MAX_N_PURGE_THREADS];
189 
190 /** Name of srv_monitor_file */
191 static char*	srv_monitor_file_name;
192 #endif /* !UNIV_HOTBACKUP */
193 
194 /** Minimum expected tablespace size. (10M) */
195 static const ulint MIN_EXPECTED_TABLESPACE_SIZE = 5 * 1024 * 1024;
196 
197 /** */
198 #define SRV_MAX_N_PENDING_SYNC_IOS	100
199 
200 #ifdef UNIV_PFS_THREAD
201 /* Keys to register InnoDB threads with performance schema */
202 mysql_pfs_key_t	buf_dump_thread_key;
203 mysql_pfs_key_t	dict_stats_thread_key;
204 mysql_pfs_key_t	io_handler_thread_key;
205 mysql_pfs_key_t	io_ibuf_thread_key;
206 mysql_pfs_key_t	io_log_thread_key;
207 mysql_pfs_key_t	io_read_thread_key;
208 mysql_pfs_key_t	io_write_thread_key;
209 mysql_pfs_key_t	srv_error_monitor_thread_key;
210 mysql_pfs_key_t	srv_lock_timeout_thread_key;
211 mysql_pfs_key_t	srv_master_thread_key;
212 mysql_pfs_key_t	srv_monitor_thread_key;
213 mysql_pfs_key_t	srv_purge_thread_key;
214 mysql_pfs_key_t	srv_log_tracking_thread_key;
215 mysql_pfs_key_t	srv_worker_thread_key;
216 #endif /* UNIV_PFS_THREAD */
217 
218 int unlock_keyrings(THD *thd);
219 
220 #ifdef HAVE_PSI_STAGE_INTERFACE
221 /** Array of all InnoDB stage events for monitoring activities via
222 performance schema. */
223 static PSI_stage_info*	srv_stages[] =
224 {
225 	&srv_stage_alter_table_end,
226 	&srv_stage_alter_table_flush,
227 	&srv_stage_alter_table_insert,
228 	&srv_stage_alter_table_log_index,
229 	&srv_stage_alter_table_log_table,
230 	&srv_stage_alter_table_merge_sort,
231 	&srv_stage_alter_table_read_pk_internal_sort,
232 	&srv_stage_buffer_pool_load,
233 };
234 #endif /* HAVE_PSI_STAGE_INTERFACE */
235 
236 /*********************************************************************//**
237 Check if a file can be opened in read-write mode.
238 @return true if it doesn't exist or can be opened in rw mode. */
239 static
240 bool
srv_file_check_mode(const char * name)241 srv_file_check_mode(
242 /*================*/
243 	const char*	name)		/*!< in: filename to check */
244 {
245 	os_file_stat_t	stat;
246 
247 	memset(&stat, 0x0, sizeof(stat));
248 
249 	dberr_t		err = os_file_get_status(
250 		name, &stat, true, srv_read_only_mode);
251 
252 	if (err == DB_FAIL) {
253 		ib::error() << "os_file_get_status() failed on '" << name
254 			<< "'. Can't determine file permissions.";
255 		return(false);
256 
257 	} else if (err == DB_SUCCESS) {
258 
259 		/* Note: stat.rw_perm is only valid of files */
260 
261 		if (stat.type == OS_FILE_TYPE_FILE) {
262 
263 			if (!stat.rw_perm) {
264 				const char*	mode = srv_read_only_mode
265 					? "read" : "read-write";
266 				ib::error() << name << " can't be opened in "
267 					<< mode << " mode.";
268 				return(false);
269 			}
270 		} else {
271 			/* Not a regular file, bail out. */
272 			ib::error() << "'" << name << "' not a regular file.";
273 
274 			return(false);
275 		}
276 	} else {
277 
278 		/* This is OK. If the file create fails on RO media, there
279 		is nothing we can do. */
280 
281 		ut_a(err == DB_NOT_FOUND);
282 	}
283 
284 	return(true);
285 }
286 
287 #ifndef UNIV_HOTBACKUP
288 
289 static ulint io_tid_i = 0;
290 
291 /********************************************************************//**
292 I/o-handler thread function.
293 @return OS_THREAD_DUMMY_RETURN */
294 extern "C"
295 os_thread_ret_t
DECLARE_THREAD(io_handler_thread)296 DECLARE_THREAD(io_handler_thread)(
297 /*==============================*/
298 	void*	arg)	/*!< in: pointer to the number of the segment in
299 			the aio array */
300 {
301 	ulint	segment;
302 	ulint	tid_i = os_atomic_increment_ulint(&io_tid_i, 1) - 1;
303 
304 	ut_ad(tid_i < srv_n_file_io_threads);
305 
306 	segment = *((ulint*) arg);
307 
308 	srv_io_tids[tid_i] = os_thread_get_tid();
309 	os_thread_set_priority(srv_io_tids[tid_i], srv_sched_priority_io);
310 
311 #ifdef UNIV_DEBUG_THREAD_CREATION
312 	ib::info() << "Io handler thread " << segment << " starts, id "
313 		<< os_thread_pf(os_thread_get_curr_id());
314 #endif
315 
316 #ifdef UNIV_PFS_THREAD
317 	/* For read only mode, we don't need ibuf and log I/O thread.
318 	Please see innobase_start_or_create_for_mysql() */
319 	ulint   start = (srv_read_only_mode) ? 0 : 2;
320 
321 	if (segment < start) {
322 		if (segment == 0) {
323 			pfs_register_thread(io_ibuf_thread_key);
324 		} else {
325 			ut_ad(segment == 1);
326 			pfs_register_thread(io_log_thread_key);
327 		}
328 	} else if (segment >= start
329 		   && segment < (start + srv_n_read_io_threads)) {
330 			pfs_register_thread(io_read_thread_key);
331 
332 	} else if (segment >= (start + srv_n_read_io_threads)
333 		   && segment < (start + srv_n_read_io_threads
334 				 + srv_n_write_io_threads)) {
335 		pfs_register_thread(io_write_thread_key);
336 
337 	} else {
338 		pfs_register_thread(io_handler_thread_key);
339 	}
340 #endif /* UNIV_PFS_THREAD */
341 
342 	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS
343 	       || buf_page_cleaner_is_active
344 	       || !os_aio_all_slots_free()) {
345 		fil_aio_wait(segment);
346 	}
347 
348 	/* We count the number of threads in os_thread_exit(). A created
349 	thread should always use that to exit and not use return() to exit.
350 	The thread actually never comes here because it is exited in an
351 	os_event_wait(). */
352 
353 	os_thread_exit();
354 
355 	OS_THREAD_DUMMY_RETURN;
356 }
357 #endif /* !UNIV_HOTBACKUP */
358 
359 #ifndef UNIV_HOTBACKUP
360 /*********************************************************************//**
361 Creates a log file.
362 @return DB_SUCCESS or error code */
363 static MY_ATTRIBUTE((nonnull, warn_unused_result))
364 dberr_t
create_log_file(pfs_os_file_t * file,const char * name)365 create_log_file(
366 /*============*/
367 	pfs_os_file_t*	file,	/*!< out: file handle */
368 	const char*	name)	/*!< in: log file name */
369 {
370 	bool		ret;
371 
372 	*file = os_file_create(
373 		innodb_log_file_key, name,
374 		OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
375 		OS_LOG_FILE, srv_read_only_mode, &ret);
376 
377 	if (!ret) {
378 		ib::error() << "Cannot create " << name;
379 		return(DB_ERROR);
380 	}
381 
382 	ib::info() << "Setting log file " << name << " size to "
383 		<< (srv_log_file_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
384 		<< " MB";
385 
386 	ret = os_file_set_size(name, *file,
387 			       (os_offset_t) srv_log_file_size
388 			       << UNIV_PAGE_SIZE_SHIFT,
389 			       srv_read_only_mode);
390 	if (!ret) {
391 		ib::error() << "Cannot set log file " << name << " to size "
392 			<< (srv_log_file_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
393 			<< " MB";
394 		return(DB_ERROR);
395 	}
396 
397 	ret = os_file_close(*file);
398 	ut_a(ret);
399 
400 	return(DB_SUCCESS);
401 }
402 
403 /** Initial number of the first redo log file */
404 #define INIT_LOG_FILE0	(SRV_N_LOG_FILES_MAX + 1)
405 
406 /*********************************************************************//**
407 Creates all log files.
408 @return DB_SUCCESS or error code */
409 static
410 dberr_t
create_log_files(char * logfilename,size_t dirnamelen,lsn_t lsn,char * & logfile0)411 create_log_files(
412 /*=============*/
413 	char*	logfilename,	/*!< in/out: buffer for log file name */
414 	size_t	dirnamelen,	/*!< in: length of the directory path */
415 	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
416 	char*&	logfile0)	/*!< out: name of the first log file */
417 {
418 	dberr_t err;
419 
420 	if (srv_read_only_mode) {
421 		ib::error() << "Cannot create log files in read-only mode";
422 		return(DB_READ_ONLY);
423 	}
424 
425 	/* Remove any old log files. */
426 	for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
427 		sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
428 
429 		/* Ignore errors about non-existent files or files
430 		that cannot be removed. The create_log_file() will
431 		return an error when the file exists. */
432 #ifdef _WIN32
433 		DeleteFile((LPCTSTR) logfilename);
434 #else
435 		unlink(logfilename);
436 #endif
437 		/* Crashing after deleting the first
438 		file should be recoverable. The buffer
439 		pool was clean, and we can simply create
440 		all log files from the scratch. */
441 		RECOVERY_CRASH(6);
442 	}
443 
444 	ut_ad(!buf_pool_check_no_pending_io());
445 
446 	RECOVERY_CRASH(7);
447 
448 	for (unsigned i = 0; i < srv_n_log_files; i++) {
449 		sprintf(logfilename + dirnamelen,
450 			"ib_logfile%u", i ? i : INIT_LOG_FILE0);
451 
452 		err = create_log_file(&files[i], logfilename);
453 
454 		if (err != DB_SUCCESS) {
455 			return(err);
456 		}
457 	}
458 
459 	RECOVERY_CRASH(8);
460 
461 	/* We did not create the first log file initially as
462 	ib_logfile0, so that crash recovery cannot find it until it
463 	has been completed and renamed. */
464 	sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
465 
466 	/* Disable the doublewrite buffer for log files, not required */
467 
468 	fil_space_t*	log_space = fil_space_create(
469 		"innodb_redo_log", SRV_LOG_SPACE_FIRST_ID,
470 		fsp_flags_set_page_size(0, univ_page_size),
471 		FIL_TYPE_LOG,
472                 NULL);
473 	ut_a(fil_validate());
474 	ut_a(log_space != NULL);
475 
476 	/* Once the redo log is set to be encrypted,
477 	   initialize encryption information. */
478 	if (srv_redo_log_encrypt != REDO_LOG_ENCRYPT_OFF) {
479 		if (!Encryption::check_keyring()) {
480 			ib::error()
481 				<< "Redo log encryption is enabled,"
482 				<< " but keyring plugin is not loaded.";
483 
484 			return(DB_ERROR);
485 		}
486 
487 		Encryption::Type alg = srv_redo_log_encrypt == REDO_LOG_ENCRYPT_RK
488 		       ? Encryption::KEYRING : Encryption::AES;
489 
490 		log_space->flags |= FSP_FLAGS_MASK_ENCRYPTION;
491 
492 		redo_log_key* mkey = redo_log_key_mgr.generate_new_key_without_storing();
493 		err = fil_set_encryption(log_space->id,
494 					 alg,
495 					 reinterpret_cast<byte*>(mkey->key),
496 					 NULL);
497 
498 		if (err != DB_SUCCESS) {
499 			ib::error() << "Failed to encrypt redo log tablespace.";
500 			return(DB_ERROR);
501 		}
502 
503 		log_space->encryption_redo_key = mkey;
504 		log_space->encryption_key_version = REDO_LOG_ENCRYPT_NO_VERSION;
505 		ut_ad(err == DB_SUCCESS);
506        }
507 
508 
509 	logfile0 = fil_node_create(
510 		logfilename, (ulint) srv_log_file_size,
511 		log_space, false, false);
512 	ut_a(logfile0);
513 
514 	for (unsigned i = 1; i < srv_n_log_files; i++) {
515 
516 		sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
517 
518 		if (!fil_node_create(logfilename,
519 				     (ulint) srv_log_file_size,
520 				     log_space, false, false)) {
521 
522 			ib::error()
523 				<< "Cannot create file node for log file "
524 				<< logfilename;
525 
526 			return(DB_ERROR);
527 		}
528 	}
529 
530 	if (!log_group_init(0, srv_n_log_files,
531 			    srv_log_file_size * UNIV_PAGE_SIZE,
532 			    SRV_LOG_SPACE_FIRST_ID)) {
533 		return(DB_ERROR);
534 	}
535 
536 	fil_open_log_and_system_tablespace_files();
537 
538 	/* Create a log checkpoint. */
539 	log_mutex_enter();
540 	ut_d(recv_no_log_write = false);
541 	recv_reset_logs(lsn);
542 	log_mutex_exit();
543 
544 	/* Write encryption information into the first log file header
545 	if redo log is set with encryption. */
546 	if (FSP_FLAGS_GET_ENCRYPTION(log_space->flags)) {
547 		if (!log_write_encryption(log_space->encryption_key,
548 					  log_space->encryption_iv,
549 					  static_cast<redo_log_encrypt_enum>(srv_redo_log_encrypt))) {
550 			return(DB_ERROR);
551 		}
552 	}
553 
554 	return(DB_SUCCESS);
555 }
556 
557 /*********************************************************************//**
558 Renames the first log file. */
559 static
560 void
create_log_files_rename(char * logfilename,size_t dirnamelen,lsn_t lsn,char * logfile0)561 create_log_files_rename(
562 /*====================*/
563 	char*	logfilename,	/*!< in/out: buffer for log file name */
564 	size_t	dirnamelen,	/*!< in: length of the directory path */
565 	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
566 	char*	logfile0)	/*!< in/out: name of the first log file */
567 {
568 	/* If innodb_flush_method=O_DSYNC,
569 	we need to explicitly flush the log buffers. */
570 	fil_flush(SRV_LOG_SPACE_FIRST_ID);
571 	/* Close the log files, so that we can rename
572 	the first one. */
573 	fil_close_log_files(false);
574 
575 	/* Rename the first log file, now that a log
576 	checkpoint has been created. */
577 	sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
578 
579 	RECOVERY_CRASH(9);
580 
581 	ib::info() << "Renaming log file " << logfile0 << " to "
582 		<< logfilename;
583 
584 	log_mutex_enter();
585 	ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
586 	bool success = os_file_rename(
587 		innodb_log_file_key, logfile0, logfilename);
588 	ut_a(success);
589 
590 	RECOVERY_CRASH(10);
591 
592 	/* Replace the first file with ib_logfile0. */
593 	strcpy(logfile0, logfilename);
594 	log_mutex_exit();
595 
596 	fil_open_log_and_system_tablespace_files();
597 
598 	ib::warn() << "New log files created, LSN=" << lsn;
599 }
600 
601 /*********************************************************************//**
602 Opens a log file.
603 @return DB_SUCCESS or error code */
604 static MY_ATTRIBUTE((nonnull, warn_unused_result))
605 dberr_t
open_log_file(pfs_os_file_t * file,const char * name,os_offset_t * size)606 open_log_file(
607 /*==========*/
608 	pfs_os_file_t*	file,	/*!< out: file handle */
609 	const char*	name,	/*!< in: log file name */
610 	os_offset_t*	size)	/*!< out: file size */
611 {
612 	bool	ret;
613 
614 	*file = os_file_create(innodb_log_file_key, name,
615 			       OS_FILE_OPEN, OS_FILE_AIO,
616 			       OS_LOG_FILE, srv_read_only_mode, &ret);
617 	if (!ret) {
618 		ib::error() << "Unable to open '" << name << "'";
619 		return(DB_ERROR);
620 	}
621 
622 	*size = os_file_get_size(*file);
623 
624 	ret = os_file_close(*file);
625 	ut_a(ret);
626 	return(DB_SUCCESS);
627 }
628 
629 /*********************************************************************//**
630 Create undo tablespace.
631 @return DB_SUCCESS or error code */
632 static
633 dberr_t
srv_undo_tablespace_create(const char * name,ulint size)634 srv_undo_tablespace_create(
635 /*=======================*/
636 	const char*	name,		/*!< in: tablespace name */
637 	ulint		size)		/*!< in: tablespace size in pages */
638 {
639 	pfs_os_file_t	fh;
640 	bool		ret;
641 	dberr_t		err = DB_SUCCESS;
642 
643 	os_file_create_subdirs_if_needed(name);
644 
645 	fh = os_file_create(
646 		innodb_data_file_key,
647 		name,
648 		srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
649 		OS_FILE_NORMAL, OS_DATA_FILE, srv_read_only_mode, &ret);
650 
651 	if (srv_read_only_mode && ret) {
652 
653 		ib::info() << name << " opened in read-only mode";
654 
655 	} else if (ret == FALSE) {
656 		if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS) {
657 
658 			ib::error() << "Can't create UNDO tablespace "
659 				<< name;
660 		}
661 		err = DB_ERROR;
662 	} else {
663 		ut_a(!srv_read_only_mode);
664 
665 		/* We created the data file and now write it full of zeros */
666 
667 		ib::info() << "Data file " << name << " did not exist: new to"
668 			" be created";
669 
670 		ib::info() << "Setting file " << name << " size to "
671 			<< (size >> (20 - UNIV_PAGE_SIZE_SHIFT)) << " MB";
672 
673 		ib::info() << "Database physically writes the file full: "
674 			<< "wait...";
675 
676 		ret = os_file_set_size(
677 			name, fh, size << UNIV_PAGE_SIZE_SHIFT,
678 			srv_read_only_mode);
679 
680 		if (!ret) {
681 			ib::info() << "Error in creating " << name
682 				<< ": probably out of disk space";
683 
684 			err = DB_ERROR;
685 		}
686 
687 		os_file_close(fh);
688 	}
689 
690 	return(err);
691 }
692 
693 /** Try to read encryption metadata from an undo tablespace.
694 @param[in]	fh		file handle of undo log file
695 @param[in]	space		undo tablespace
696 @return DB_SUCCESS if success */
697 static
698 dberr_t
srv_undo_tablespace_read_encryption(pfs_os_file_t fh,fil_space_t * space)699 srv_undo_tablespace_read_encryption(
700 	pfs_os_file_t	fh,
701 	fil_space_t*	space)
702 {
703 	IORequest	request;
704 	ulint		n_read = 0;
705 	size_t		page_size = UNIV_PAGE_SIZE_MAX;
706 	dberr_t		err = DB_ERROR;
707  	byte* first_page_buf = static_cast<byte*>(
708 		ut_malloc_nokey(2 * UNIV_PAGE_SIZE_MAX));
709 	/* Align the memory for a possible read from a raw device */
710 	byte* first_page = static_cast<byte*>(
711 		ut_align(first_page_buf, UNIV_PAGE_SIZE));
712  	/* Don't want unnecessary complaints about partial reads. */
713 	request.disable_partial_io_warnings();
714  	err = os_file_read_no_error_handling(
715 		request, fh, first_page, 0, page_size, &n_read);
716  	if (err != DB_SUCCESS) {
717 		ib::info()
718 			<< "Cannot read first page of '"
719 			<< space->name << "' "
720 			<< ut_strerr(err);
721 		ut_free(first_page_buf);
722 		return(err);
723 	}
724  	ulint			offset;
725 	const page_size_t	space_page_size(space->flags);
726  	offset = fsp_header_get_encryption_offset(space_page_size);
727 	ut_ad(offset);
728  	/* Return if the encryption metadata is empty. */
729 	if (memcmp(first_page + offset,
730 		   ENCRYPTION_KEY_MAGIC_V2,
731 		   ENCRYPTION_MAGIC_SIZE) != 0
732 	    &&
733 	    memcmp(first_page + offset,
734 		   ENCRYPTION_KEY_MAGIC_V3,
735 		   ENCRYPTION_MAGIC_SIZE) != 0
736 	    ) {
737 		ut_free(first_page_buf);
738 		return(DB_SUCCESS);
739 	}
740  	byte	key[ENCRYPTION_KEY_LEN];
741 	byte	iv[ENCRYPTION_KEY_LEN];
742 	if (fsp_header_get_encryption_key(space->flags, key,
743 					  iv, first_page)) {
744  		space->flags |= FSP_FLAGS_MASK_ENCRYPTION;
745 		err = fil_set_encryption(space->id,
746 					 Encryption::AES,
747 					 key,
748 					 iv);
749 		ut_ad(err == DB_SUCCESS);
750 	} else {
751 		ut_free(first_page_buf);
752 		return(DB_FAIL);
753 	}
754  	ut_free(first_page_buf);
755  	return(DB_SUCCESS);
756 }
757 
758 /*********************************************************************//**
759 Open an undo tablespace.
760 @return DB_SUCCESS or error code */
761 static
762 dberr_t
srv_undo_tablespace_open(const char * name,ulint space_id)763 srv_undo_tablespace_open(
764 /*=====================*/
765 	const char*	name,		/*!< in: tablespace file name */
766 	ulint		space_id)	/*!< in: tablespace id */
767 {
768 	pfs_os_file_t	fh;
769 	bool		ret;
770 	ulint		flags;
771 	dberr_t		err	= DB_ERROR;
772 	char		undo_name[sizeof "innodb_undo000"];
773 
774 	ut_snprintf(undo_name, sizeof(undo_name),
775 		   "innodb_undo%03u", static_cast<unsigned>(space_id));
776 
777 	if (!srv_file_check_mode(name)) {
778 		ib::error() << "UNDO tablespaces must be " <<
779 			(srv_read_only_mode ? "writable" : "readable") << "!";
780 
781 		return(DB_ERROR);
782 	}
783 
784 	fh = os_file_create(
785 		innodb_data_file_key, name,
786 		OS_FILE_OPEN_RETRY
787 		| OS_FILE_ON_ERROR_NO_EXIT
788 		| OS_FILE_ON_ERROR_SILENT,
789 		OS_FILE_NORMAL,
790 		OS_DATA_FILE,
791 		srv_read_only_mode,
792 		&ret);
793 
794 	/* If the file open was successful then load the tablespace. */
795 
796 	if (ret) {
797 		os_offset_t	size;
798 		fil_space_t*	space;
799 
800 		bool	atomic_write;
801 
802 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
803 		if (!srv_use_doublewrite_buf) {
804 			atomic_write = fil_fusionio_enable_atomic_write(fh);
805 		} else {
806 			atomic_write = false;
807 		}
808 #else
809 		atomic_write = false;
810 #endif /* !NO_FALLOCATE && UNIV_LINUX */
811 
812 		size = os_file_get_size(fh);
813 		ut_a(size != (os_offset_t) -1);
814 
815 		/* Load the tablespace into InnoDB's internal
816 		data structures. */
817 
818 		/* We set the biggest space id to the undo tablespace
819 		because InnoDB hasn't opened any other tablespace apart
820 		from the system tablespace. */
821 
822 		fil_set_max_space_id_if_bigger(space_id);
823 
824 		/* Set the compressed page size to 0 (non-compressed) */
825 		flags = fsp_flags_init(
826 			univ_page_size, false, false, false, false);
827 		space = fil_space_create(
828 			undo_name, space_id, flags, FIL_TYPE_TABLESPACE, NULL);
829 
830 		ut_a(fil_validate());
831 		ut_a(space);
832 
833 		os_offset_t	n_pages = size / UNIV_PAGE_SIZE;
834 
835 		/* On 32-bit platforms, ulint is 32 bits and os_offset_t
836 		is 64 bits. It is OK to cast the n_pages to ulint because
837 		the unit has been scaled to pages and page number is always
838 		32 bits. */
839 		if (!fil_node_create(
840 			name, (ulint) n_pages, space, false, atomic_write)) {
841 			os_file_close(fh);
842 			ib::error() << "Error creating file node for " << undo_name;
843 			return(DB_ERROR);
844 		}
845 
846 		err = DB_SUCCESS;
847 		/* Read the encryption metadata in this undo tablespace.
848 		If the encryption info in the first page cannot be decrypted
849 		by the master key, this table cannot be opened. */
850 		err = srv_undo_tablespace_read_encryption(fh, space);
851  		/* The file handle will no longer be needed. */
852 		os_file_close(fh);
853 
854 		if (err != DB_SUCCESS) {
855 			ib::error() << "Error reading encryption for " << undo_name;
856 			return(err);
857 		}
858 
859 	}
860 
861 	return(err);
862 }
863 
864 /** Check if undo tablespaces and redo log files exist before creating a
865 new system tablespace
866 @retval DB_SUCCESS  if all undo and redo logs are not found
867 @retval DB_ERROR    if any undo and redo logs are found */
868 static
869 dberr_t
srv_check_undo_redo_logs_exists()870 srv_check_undo_redo_logs_exists()
871 {
872 	bool		ret;
873 	pfs_os_file_t	fh;
874 	char	name[OS_FILE_MAX_PATH];
875 
876 	/* Check if any undo tablespaces exist */
877 	for (ulint i = 1; i <= srv_undo_tablespaces; ++i) {
878 
879 		ut_snprintf(
880 			name, sizeof(name),
881 			"%s%cundo%03lu",
882 			srv_undo_dir, OS_PATH_SEPARATOR,
883 			i);
884 
885 		fh = os_file_create(
886 			innodb_data_file_key, name,
887 			OS_FILE_OPEN_RETRY
888 			| OS_FILE_ON_ERROR_NO_EXIT
889 			| OS_FILE_ON_ERROR_SILENT,
890 			OS_FILE_NORMAL,
891 			OS_DATA_FILE,
892 			srv_read_only_mode,
893 			&ret);
894 
895 		if (ret) {
896 			os_file_close(fh);
897 			ib::error()
898 				<< "undo tablespace '" << name << "' exists."
899 				" Creating system tablespace with existing undo"
900 				" tablespaces is not supported. Please delete"
901 				" all undo tablespaces before creating new"
902 				" system tablespace.";
903 			return(DB_ERROR);
904 		}
905 	}
906 
907 	/* Check if any redo log files exist */
908 	char	logfilename[OS_FILE_MAX_PATH];
909 	size_t dirnamelen = strlen(srv_log_group_home_dir);
910 	memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
911 
912 	for (unsigned i = 0; i < srv_n_log_files; i++) {
913 		sprintf(logfilename + dirnamelen,
914 			"ib_logfile%u", i);
915 
916 		fh = os_file_create(
917 			innodb_log_file_key, logfilename,
918 			OS_FILE_OPEN_RETRY
919 			| OS_FILE_ON_ERROR_NO_EXIT
920 			| OS_FILE_ON_ERROR_SILENT,
921 			OS_FILE_NORMAL,
922 			OS_LOG_FILE,
923 			srv_read_only_mode,
924 			&ret);
925 
926 		if (ret) {
927 			os_file_close(fh);
928 			ib::error() << "redo log file '" << logfilename
929 				<< "' exists. Creating system tablespace with"
930 				" existing redo log files is not recommended."
931 				" Please delete all redo log files before"
932 				" creating new system tablespace.";
933 			return(DB_ERROR);
934 		}
935 	}
936 
937 	return(DB_SUCCESS);
938 }
939 
940 undo::undo_spaces_t	undo::Truncate::s_fix_up_spaces;
941 
942 /********************************************************************
943 Opens the configured number of undo tablespaces.
944 @return DB_SUCCESS or error code */
945 static
946 dberr_t
srv_undo_tablespaces_init(bool create_new_db,const ulint n_conf_tablespaces,ulint * n_opened)947 srv_undo_tablespaces_init(
948 /*======================*/
949 	bool		create_new_db,		/*!< in: TRUE if new db being
950 						created */
951 	const ulint	n_conf_tablespaces,	/*!< in: configured undo
952 						tablespaces */
953 	ulint*		n_opened)		/*!< out: number of UNDO
954 						tablespaces successfully
955 						discovered and opened */
956 {
957 	ulint			i;
958 	dberr_t			err = DB_SUCCESS;
959 	ulint			prev_space_id = 0;
960 	ulint			n_undo_tablespaces;
961 	ulint			undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
962 
963 	*n_opened = 0;
964 
965 	ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS);
966 
967 	memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
968 
969 	/* Create the undo spaces only if we are creating a new
970 	instance. We don't allow creating of new undo tablespaces
971 	in an existing instance (yet).  This restriction exists because
972 	we check in several places for SYSTEM tablespaces to be less than
973 	the min of user defined tablespace ids. Once we implement saving
974 	the location of the undo tablespaces and their space ids this
975 	restriction will/should be lifted. */
976 
977 	for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) {
978 		char		name[OS_FILE_MAX_PATH];
979 		ulint		space_id;
980 
981 		DBUG_EXECUTE_IF("innodb_undo_upgrade",
982 			if (i == 0) {
983 				dict_hdr_get_new_id(
984 					NULL, NULL, &space_id, NULL, true);
985 				dict_hdr_get_new_id(
986 					NULL, NULL, &space_id, NULL, true);
987 				dict_hdr_get_new_id(
988 					NULL, NULL, &space_id, NULL, true);
989 			});
990 
991 		dict_hdr_get_new_id(NULL, NULL, &space_id, NULL, true);
992 
993 		fil_set_max_space_id_if_bigger(space_id);
994 
995 		if (i == 0) {
996 			srv_undo_space_id_start = space_id;
997 			prev_space_id = srv_undo_space_id_start - 1;
998 		}
999 
1000 		ut_snprintf(
1001 			name, sizeof(name),
1002 			"%s%cundo%03lu",
1003 			srv_undo_dir, OS_PATH_SEPARATOR, space_id);
1004 
1005 		undo_tablespace_ids[i] = space_id;
1006 
1007 		err = srv_undo_tablespace_create(
1008 			name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
1009 
1010 		if (err != DB_SUCCESS) {
1011 			ib::error() << "Could not create undo tablespace '"
1012 				<< name << "'.";
1013 			return(err);
1014 		}
1015 	}
1016 
1017 	/* Get the tablespace ids of all the undo segments excluding
1018 	the system tablespace (0). If we are creating a new instance then
1019 	we build the undo_tablespace_ids ourselves since they don't
1020 	already exist. */
1021 
1022 	if (!create_new_db) {
1023 		n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces(
1024 			undo_tablespace_ids);
1025 
1026 		srv_undo_tablespaces_active = n_undo_tablespaces;
1027 
1028 		if (srv_undo_tablespaces_active != 0) {
1029 			srv_undo_space_id_start = undo_tablespace_ids[0];
1030 			prev_space_id = srv_undo_space_id_start - 1;
1031 		}
1032 
1033 		/* Check if any of the UNDO tablespace needs fix-up because
1034 		server crashed while truncate was active on UNDO tablespace.*/
1035 		for (i = 0; i < n_undo_tablespaces; ++i) {
1036 
1037 			undo::Truncate	undo_trunc;
1038 
1039 			if (undo_trunc.needs_fix_up(undo_tablespace_ids[i])) {
1040 
1041 				char	name[OS_FILE_MAX_PATH];
1042 
1043 				ut_snprintf(name, sizeof(name),
1044 					    "%s%cundo%03lu",
1045 					    srv_undo_dir, OS_PATH_SEPARATOR,
1046 					    undo_tablespace_ids[i]);
1047 
1048 				os_file_delete(innodb_data_file_key, name);
1049 
1050 				err = srv_undo_tablespace_create(
1051 					name,
1052 					SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
1053 
1054 				if (err != DB_SUCCESS) {
1055 					ib::error() << "Could not fix-up undo "
1056 						" tablespace truncate '"
1057 						<< name << "'.";
1058 					return(err);
1059 				}
1060 
1061 				undo::Truncate::s_fix_up_spaces.push_back(
1062 					undo_tablespace_ids[i]);
1063 			}
1064 		}
1065 	} else {
1066 		n_undo_tablespaces = n_conf_tablespaces;
1067 
1068 		undo_tablespace_ids[n_conf_tablespaces] = ULINT_UNDEFINED;
1069 	}
1070 
1071 	/* Open all the undo tablespaces that are currently in use. If we
1072 	fail to open any of these it is a fatal error. The tablespace ids
1073 	should be contiguous. It is a fatal error because they are required
1074 	for recovery and are referenced by the UNDO logs (a.k.a RBS). */
1075 
1076 	for (i = 0; i < n_undo_tablespaces; ++i) {
1077 		char	name[OS_FILE_MAX_PATH];
1078 
1079 		ut_snprintf(
1080 			name, sizeof(name),
1081 			"%s%cundo%03lu",
1082 			srv_undo_dir, OS_PATH_SEPARATOR,
1083 			undo_tablespace_ids[i]);
1084 
1085 		/* Should be no gaps in undo tablespace ids. */
1086 		ut_a(prev_space_id + 1 == undo_tablespace_ids[i]);
1087 
1088 		/* The system space id should not be in this array. */
1089 		ut_a(undo_tablespace_ids[i] != 0);
1090 		ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED);
1091 
1092 		fil_set_max_space_id_if_bigger(undo_tablespace_ids[i]);
1093 
1094 		err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
1095 
1096 		if (err != DB_SUCCESS) {
1097 			ib::error() << "Unable to open undo tablespace '"
1098 				<< name << "'.";
1099 			return(err);
1100 		}
1101 
1102 		prev_space_id = undo_tablespace_ids[i];
1103 
1104 		++*n_opened;
1105 	}
1106 
1107 	/* Open any extra unused undo tablespaces. These must be contiguous.
1108 	We stop at the first failure. These are undo tablespaces that are
1109 	not in use and therefore not required by recovery. We only check
1110 	that there are no gaps. */
1111 
1112 	for (i = prev_space_id + 1; i < TRX_SYS_N_RSEGS; ++i) {
1113 		char	name[OS_FILE_MAX_PATH];
1114 
1115 		ut_snprintf(
1116 			name, sizeof(name),
1117 			"%s%cundo%03lu", srv_undo_dir, OS_PATH_SEPARATOR, i);
1118 
1119 		err = srv_undo_tablespace_open(name, i);
1120 
1121 		if (err != DB_SUCCESS) {
1122 			break;
1123 		}
1124 
1125 		/** Note the first undo tablespace id in case of
1126 		no active undo tablespace. */
1127 		if (n_undo_tablespaces == 0) {
1128 			srv_undo_space_id_start = i;
1129 		}
1130 
1131 		++n_undo_tablespaces;
1132 
1133 		++*n_opened;
1134 	}
1135 
1136 	/** Explictly specify the srv_undo_space_id_start
1137 	as zero when there are no undo tablespaces. */
1138 	if (n_undo_tablespaces == 0) {
1139 		srv_undo_space_id_start = 0;
1140 	}
1141 
1142 	/* If the user says that there are fewer than what we find we
1143 	tolerate that discrepancy but not the inverse. Because there could
1144 	be unused undo tablespaces for future use. */
1145 
1146 	if (n_conf_tablespaces > n_undo_tablespaces) {
1147 		ib::error() << "Expected to open " << n_conf_tablespaces
1148 			<< " undo tablespaces but was able to find only "
1149 			<< n_undo_tablespaces << " undo tablespaces. Set the"
1150 			" innodb_undo_tablespaces parameter to the correct"
1151 			" value and retry. Suggested value is "
1152 			<< n_undo_tablespaces;
1153 
1154 		return(err != DB_SUCCESS ? err : DB_ERROR);
1155 
1156 	} else  if (n_undo_tablespaces > 0) {
1157 
1158 		ib::info() << "Opened " << n_undo_tablespaces
1159 			<< " undo tablespaces";
1160 
1161 		ib::info() << srv_undo_tablespaces_active << " undo tablespaces"
1162 			<< " made active";
1163 
1164 		if (n_conf_tablespaces == 0) {
1165 			ib::warn() << "Will use system tablespace for all newly"
1166 				<< " created rollback-segment as"
1167 				<< " innodb_undo_tablespaces=0";
1168 		}
1169 	}
1170 
1171 	if (create_new_db) {
1172 		mtr_t	mtr;
1173 
1174 		mtr_start(&mtr);
1175 
1176 		/* The undo log tablespace */
1177 		for (i = 0; i < n_undo_tablespaces; ++i) {
1178 
1179 			fsp_header_init(
1180 				undo_tablespace_ids[i],
1181 				SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
1182 		}
1183 
1184 		mtr_commit(&mtr);
1185 	}
1186 
1187 	if (!undo::Truncate::s_fix_up_spaces.empty()) {
1188 
1189 		/* Step-1: Initialize the tablespace header and rsegs header. */
1190 		mtr_t		mtr;
1191 		trx_sysf_t*	sys_header;
1192 
1193 		mtr_start(&mtr);
1194 		/* Turn off REDO logging. We are in server start mode and fixing
1195 		UNDO tablespace even before REDO log is read. Let's say we
1196 		do REDO logging here then this REDO log record will be applied
1197 		as part of the current recovery process. We surely don't need
1198 		that as this is fix-up action parallel to REDO logging. */
1199 		mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
1200 		sys_header = trx_sysf_get(&mtr);
1201 
1202 		for (undo::undo_spaces_t::const_iterator it
1203 			     = undo::Truncate::s_fix_up_spaces.begin();
1204 		     it != undo::Truncate::s_fix_up_spaces.end();
1205 		     ++it) {
1206 
1207 			undo::Truncate::add_space_to_trunc_list(*it);
1208 
1209 			fsp_header_init(
1210 				*it, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
1211 
1212 			mtr_x_lock(fil_space_get_latch(*it, NULL), &mtr);
1213 
1214 			for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
1215 
1216 				ulint	space_id = trx_sysf_rseg_get_space(
1217 						sys_header, i, &mtr);
1218 
1219 				if (space_id == *it) {
1220 					trx_rseg_header_create(
1221 						*it, univ_page_size, ULINT_MAX,
1222 						i, &mtr);
1223 				}
1224 			}
1225 
1226 			undo::Truncate::clear_trunc_list();
1227 		}
1228 		mtr_commit(&mtr);
1229 
1230 		/* Step-2: Flush the dirty pages from the buffer pool. */
1231 		for (undo::undo_spaces_t::const_iterator it
1232 			     = undo::Truncate::s_fix_up_spaces.begin();
1233 		     it != undo::Truncate::s_fix_up_spaces.end();
1234 		     ++it) {
1235 
1236 			buf_LRU_flush_or_remove_pages(
1237 				TRX_SYS_SPACE, BUF_REMOVE_FLUSH_WRITE, NULL);
1238 
1239 			buf_LRU_flush_or_remove_pages(
1240 				*it, BUF_REMOVE_FLUSH_WRITE, NULL);
1241 
1242 			/* Remove the truncate redo log file. */
1243 			undo::Truncate	undo_trunc;
1244 			undo_trunc.done_logging(*it);
1245 		}
1246 	}
1247 
1248 	return(DB_SUCCESS);
1249 }
1250 
1251 /********************************************************************
1252 Wait for the purge thread(s) to start up. */
1253 static
1254 void
srv_start_wait_for_purge_to_start()1255 srv_start_wait_for_purge_to_start()
1256 /*===============================*/
1257 {
1258 	/* Wait for the purge coordinator and master thread to startup. */
1259 
1260 	purge_state_t	state = trx_purge_state();
1261 
1262 	ut_a(state != PURGE_STATE_DISABLED);
1263 
1264 	while (srv_shutdown_state == SRV_SHUTDOWN_NONE
1265 	       && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
1266 	       && state == PURGE_STATE_INIT) {
1267 
1268 		switch (state = trx_purge_state()) {
1269 		case PURGE_STATE_RUN:
1270 		case PURGE_STATE_STOP:
1271 			break;
1272 
1273 		case PURGE_STATE_INIT:
1274 			ib::info() << "Waiting for purge to start";
1275 
1276 			os_thread_sleep(50000);
1277 			break;
1278 
1279 		case PURGE_STATE_EXIT:
1280 		case PURGE_STATE_DISABLED:
1281 			ut_error;
1282 		}
1283 	}
1284 }
1285 
1286 /** Initializes the log tracking subsystem and starts its thread.  */
1287 void
srv_init_log_online(void)1288 srv_init_log_online(void)
1289 {
1290 	if (UNIV_UNLIKELY(srv_force_recovery > 0 || srv_read_only_mode)) {
1291 		srv_track_changed_pages = FALSE;
1292 		return;
1293 	}
1294 
1295 	if (srv_track_changed_pages) {
1296 
1297 		log_online_read_init();
1298 
1299 		/* Create the thread that follows the redo log to output the
1300 		   changed page bitmap */
1301 		os_thread_create(&srv_redo_log_follow_thread, NULL,
1302 				 thread_ids + 5 + SRV_MAX_N_IO_THREADS);
1303 	}
1304 }
1305 
1306 /** Create the temporary file tablespace.
1307 @param[in]	create_new_db	whether we are creating a new database
1308 @param[in,out]	tmp_space	Shared Temporary SysTablespace
1309 @return DB_SUCCESS or error code. */
1310 static
1311 dberr_t
srv_open_tmp_tablespace(bool create_new_db,SysTablespace * tmp_space)1312 srv_open_tmp_tablespace(
1313 	bool		create_new_db,
1314 	SysTablespace*	tmp_space)
1315 {
1316 	ulint	sum_of_new_sizes;
1317 
1318 	/* Will try to remove if there is existing file left-over by last
1319 	unclean shutdown */
1320 	tmp_space->set_sanity_check_status(true);
1321 	tmp_space->delete_files();
1322 	tmp_space->set_ignore_read_only(true);
1323 
1324 	ib::info() << "Creating shared tablespace for temporary tables";
1325 
1326 	bool	create_new_temp_space;
1327 	ulint	temp_space_id = ULINT_UNDEFINED;
1328 
1329 	dict_hdr_get_new_id(NULL, NULL, &temp_space_id, NULL, true);
1330 
1331 	tmp_space->set_space_id(temp_space_id);
1332 
1333 	RECOVERY_CRASH(100);
1334 
1335 	dberr_t	err = tmp_space->check_file_spec(
1336 			&create_new_temp_space, 12 * 1024 * 1024);
1337 
1338 	if (err == DB_FAIL) {
1339 
1340 		ib::error() << "The " << tmp_space->name()
1341 			<< " data file must be writable!";
1342 
1343 		err = DB_ERROR;
1344 
1345 	} else if (err != DB_SUCCESS) {
1346 		ib::error() << "Could not create the shared "
1347 			<< tmp_space->name() << ".";
1348 
1349 	} else if ((err = tmp_space->open_or_create(
1350 			    true, create_new_db, &sum_of_new_sizes, NULL))
1351 		   != DB_SUCCESS) {
1352 
1353 		ib::error() << "Unable to create the shared "
1354 			<< tmp_space->name();
1355 
1356 	} else {
1357 
1358 		mtr_t	mtr;
1359 		ulint	size = tmp_space->get_sum_of_sizes();
1360 
1361 		ut_a(temp_space_id != ULINT_UNDEFINED);
1362 		ut_a(tmp_space->space_id() == temp_space_id);
1363 
1364 		/* Open this shared temp tablespace in the fil_system so that
1365 		it stays open until shutdown. */
1366 		if (fil_space_open(tmp_space->name())) {
1367 
1368 			if (srv_tmp_tablespace_encrypt) {
1369 
1370 				/* Make sure the keyring is loaded. */
1371 				if (!Encryption::check_keyring()) {
1372 					srv_tmp_tablespace_encrypt = false;
1373 					ib::error() << "Can't set temporary"
1374 						<< " tablespace to be encrypted"
1375 						<< " because keyring plugin is"
1376 						<< " not available.";
1377 				        return(DB_ERROR);
1378 				}
1379 
1380 				fil_space_t*	space =
1381 					fil_space_get(temp_space_id);
1382 
1383 				err = fil_set_encryption(space->id,
1384 							 Encryption::AES,
1385 							 NULL,
1386 							 NULL);
1387 
1388 				tmp_space->set_flags(space->flags);
1389 				ut_a(err == DB_SUCCESS);
1390 			}
1391 
1392 			/* Initialize the header page */
1393 			mtr_start(&mtr);
1394 			mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
1395 
1396 			fsp_header_init(tmp_space->space_id(), size, &mtr);
1397 
1398 			mtr_commit(&mtr);
1399 		} else {
1400 			/* This file was just opened in the code above! */
1401 			ib::error() << "The " << tmp_space->name()
1402 				<< " data file cannot be re-opened"
1403 				" after check_file_spec() succeeded!";
1404 
1405 			err = DB_ERROR;
1406 		}
1407 	}
1408 
1409 	return(err);
1410 }
1411 
1412 /****************************************************************//**
1413 Set state to indicate start of particular group of threads in InnoDB. */
1414 UNIV_INLINE
1415 void
srv_start_state_set(srv_start_state_t state)1416 srv_start_state_set(
1417 /*================*/
1418 	srv_start_state_t state)	/*!< in: indicate current state of
1419 					thread startup */
1420 {
1421 	srv_start_state |= state;
1422 }
1423 
1424 /****************************************************************//**
1425 Check if following group of threads is started.
1426 @return true if started */
1427 UNIV_INLINE
1428 bool
srv_start_state_is_set(srv_start_state_t state)1429 srv_start_state_is_set(
1430 /*===================*/
1431 	srv_start_state_t state)	/*!< in: state to check for */
1432 {
1433 	return(srv_start_state & state);
1434 }
1435 
1436 /**
1437 Shutdown all background threads created by InnoDB. */
1438 void
srv_shutdown_all_bg_threads()1439 srv_shutdown_all_bg_threads()
1440 {
1441 	ulint	i;
1442 
1443 	srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
1444 
1445 	if (!srv_start_state) {
1446 		return;
1447 	}
1448 
1449 	/* All threads end up waiting for certain events. Put those events
1450 	to the signaled state. Then the threads will exit themselves after
1451 	os_event_wait(). */
1452 	for (i = 0; i < 1000; i++) {
1453 		/* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
1454 		HERE OR EARLIER */
1455 
1456 		if (!srv_read_only_mode) {
1457 
1458 			if (srv_start_state_is_set(SRV_START_STATE_LOCK_SYS)) {
1459 				/* a. Let the lock timeout thread exit */
1460 				os_event_set(lock_sys->timeout_event);
1461 			}
1462 
1463 			/* b. srv error monitor thread exits automatically,
1464 			no need to do anything here */
1465 
1466 			if (srv_start_state_is_set(SRV_START_STATE_MASTER)) {
1467 				/* c. We wake the master thread so that
1468 				it exits */
1469 				srv_wake_master_thread();
1470 			}
1471 
1472 			if (srv_start_state_is_set(SRV_START_STATE_PURGE)) {
1473 				/* d. Wakeup purge threads. */
1474 				srv_purge_wakeup();
1475 			}
1476 
1477 			if (srv_n_fil_crypt_threads_started) {
1478 				os_event_set(fil_crypt_threads_event);
1479 			}
1480 
1481 			/* Stop srv_redo_log_follow_thread thread */
1482 			if (srv_redo_log_thread_started) {
1483 				os_event_reset(srv_redo_log_tracked_event);
1484 				os_event_set(srv_checkpoint_completed_event);
1485 			}
1486 		}
1487 
1488 		if (srv_start_state_is_set(SRV_START_STATE_IO)) {
1489 			/* e. Exit the i/o threads */
1490 			if (!srv_read_only_mode) {
1491 				if (recv_sys->flush_start != NULL) {
1492 					os_event_set(recv_sys->flush_start);
1493 				}
1494 				if (recv_sys->flush_end != NULL) {
1495 					os_event_set(recv_sys->flush_end);
1496 				}
1497 			}
1498 
1499 			os_event_set(buf_flush_event);
1500 
1501 			if (!buf_page_cleaner_is_active
1502 			    && os_aio_all_slots_free()) {
1503 				os_aio_wake_all_threads_at_shutdown();
1504 			}
1505 		}
1506 
1507 		/* f. dict_stats_thread is signaled from
1508 		logs_empty_and_mark_files_at_shutdown() and should have
1509 		already quit or is quitting right now. */
1510 
1511 		bool	active = os_thread_active();
1512 
1513 		os_thread_sleep(100000);
1514 
1515 		if (!active) {
1516 			break;
1517 		}
1518 	}
1519 
1520 	if (i == 1000) {
1521 		ib::warn() << os_thread_count << " threads created by InnoDB"
1522 			" had not exited at shutdown!";
1523 #ifdef UNIV_DEBUG
1524 		os_aio_print_pending_io(stderr);
1525 		ut_ad(0);
1526 #endif /* UNIV_DEBUG */
1527 	} else {
1528 		/* Reset the start state. */
1529 		srv_start_state = SRV_START_STATE_NONE;
1530 	}
1531 }
1532 
1533 #ifdef UNIV_DEBUG
1534 # define srv_init_abort(_db_err)	\
1535 	srv_init_abort_low(create_new_db, __FILE__, __LINE__, _db_err)
1536 #else
1537 # define srv_init_abort(_db_err)	\
1538 	srv_init_abort_low(create_new_db, _db_err)
1539 #endif /* UNIV_DEBUG */
1540 
1541 /** Innobase start-up aborted. Perform cleanup actions.
1542 @param[in]	create_new_db	TRUE if new db is  being created
1543 @param[in]	file		File name
1544 @param[in]	line		Line number
1545 @param[in]	err		Reason for aborting InnoDB startup
1546 @return DB_SUCCESS or error code. */
1547 static
1548 dberr_t
srv_init_abort_low(bool create_new_db,const char * file,ulint line,dberr_t err)1549 srv_init_abort_low(
1550 	bool		create_new_db,
1551 #ifdef UNIV_DEBUG
1552 	const char*	file,
1553 	ulint		line,
1554 #endif /* UNIV_DEBUG */
1555 	dberr_t		err)
1556 {
1557 	if (create_new_db) {
1558 		ib::error() << "InnoDB Database creation was aborted"
1559 #ifdef UNIV_DEBUG
1560 			" at " << innobase_basename(file) << "[" << line << "]"
1561 #endif /* UNIV_DEBUG */
1562 			" with error " << ut_strerr(err) << ". You may need"
1563 			" to delete the ibdata1 file before trying to start"
1564 			" up again.";
1565 	} else {
1566 		ib::error() << "Plugin initialization aborted"
1567 #ifdef UNIV_DEBUG
1568 			" at " << innobase_basename(file) << "[" << line << "]"
1569 #endif /* UNIV_DEBUG */
1570 			" with error " << ut_strerr(err);
1571 	}
1572 
1573 	srv_shutdown_all_bg_threads();
1574 	return(err);
1575 }
1576 
1577 /** Prepare to delete the redo log files. Flush the dirty pages from all the
1578 buffer pools.  Flush the redo log buffer to the redo log file.
1579 @param[in]	n_files		number of old redo log files
1580 @return lsn upto which data pages have been flushed. */
1581 static
1582 lsn_t
srv_prepare_to_delete_redo_log_files(ulint n_files)1583 srv_prepare_to_delete_redo_log_files(
1584 	ulint	n_files)
1585 {
1586 	lsn_t	flushed_lsn;
1587 	ulint	pending_io = 0;
1588 	ulint	count = 0;
1589 
1590 	do {
1591 		/* Clean the buffer pool. */
1592 		buf_flush_sync_all_buf_pools();
1593 
1594 		RECOVERY_CRASH(1);
1595 
1596 		log_mutex_enter();
1597 
1598 		fil_names_clear(log_sys->lsn, false);
1599 
1600 		flushed_lsn = log_sys->lsn;
1601 
1602 		{
1603 			ib::warn	warning;
1604 			if (srv_log_file_size == 0) {
1605 				warning << "Upgrading redo log: ";
1606 			} else {
1607 				warning << "Resizing redo log from "
1608 					<< n_files << "*"
1609 					<< srv_log_file_size << " to ";
1610 			}
1611 			warning << srv_n_log_files << "*"
1612 				<< srv_log_file_size_requested
1613 				<< " pages, LSN=" << flushed_lsn;
1614 		}
1615 
1616 		/* Flush the old log files. */
1617 		log_mutex_exit();
1618 
1619 		log_write_up_to(flushed_lsn, true);
1620 
1621 		/* If innodb_flush_method=O_DSYNC,
1622 		we need to explicitly flush the log buffers. */
1623 		fil_flush(SRV_LOG_SPACE_FIRST_ID);
1624 
1625 		ut_ad(flushed_lsn == log_get_lsn());
1626 
1627 		/* Check if the buffer pools are clean.  If not
1628 		retry till it is clean. */
1629 		pending_io = buf_pool_check_no_pending_io();
1630 
1631 		if (pending_io > 0) {
1632 			count++;
1633 			/* Print a message every 60 seconds if we
1634 			are waiting to clean the buffer pools */
1635 			if (srv_print_verbose_log && count > 600) {
1636 				ib::info() << "Waiting for "
1637 					<< pending_io << " buffer "
1638 					<< "page I/Os to complete";
1639 				count = 0;
1640 			}
1641 		}
1642 		os_thread_sleep(100000);
1643 
1644 	} while (buf_pool_check_no_pending_io());
1645 
1646 	return(flushed_lsn);
1647 }
1648 
1649 /** Enable encryption of system tablespace if requested. At
1650 startup load the encryption information from first datafile
1651 to tablespace object
1652 @return DB_SUCCESS on succes, others on failure */
1653 static
1654 dberr_t
srv_sys_enable_encryption(bool create_new_db)1655 srv_sys_enable_encryption(bool create_new_db) {
1656 
1657 	fil_space_t*	space = fil_space_get(TRX_SYS_SPACE);
1658 	dberr_t		err = DB_SUCCESS;
1659 
1660 	if (create_new_db && srv_sys_tablespace_encrypt) {
1661 
1662 		space->flags |= FSP_FLAGS_MASK_ENCRYPTION;
1663 		srv_sys_space.set_flags(space->flags);
1664 
1665 		err = fil_set_encryption(space->id,
1666 					 Encryption::AES,
1667 					 NULL,
1668 					 NULL);
1669 		ut_ad(err == DB_SUCCESS);
1670 	} else {
1671 
1672 		ulint	fsp_flags = srv_sys_space.m_files.begin()->flags();
1673 		bool	is_encrypted = FSP_FLAGS_GET_ENCRYPTION(fsp_flags);
1674 
1675 		if (is_encrypted && !srv_sys_tablespace_encrypt) {
1676 			ib::error() << "The system tablespace is encrypted but"
1677 				<< " --innodb_sys_tablespace_encrypt is"
1678 				<< " OFF. Enable the option and start server";
1679 			return(DB_ERROR);
1680 		}
1681 
1682 		if (!is_encrypted && srv_sys_tablespace_encrypt) {
1683 			ib::error() << "The system tablespace is not encrypted but"
1684 				<< " --innodb_sys_tablespace_encrypt is"
1685 				<< " ON. This instance was not bootstrapped"
1686 				<< " with --innodb_sys_tablespace_encrypt=ON."
1687 				<< " Disable this option and start server";
1688 			return(DB_ERROR);
1689 		}
1690 
1691 		if (is_encrypted) {
1692 
1693 			space->flags |= FSP_FLAGS_MASK_ENCRYPTION;
1694 			srv_sys_space.set_flags(space->flags);
1695 
1696 			err = fil_set_encryption(
1697 				space->id,
1698 				Encryption::AES,
1699 				srv_sys_space.m_files.begin()->m_encryption_key,
1700 				srv_sys_space.m_files.begin()->m_encryption_iv);
1701 			ut_ad(err == DB_SUCCESS);
1702 
1703 			recv_sys->dblwr.decrypt_sys_dblwr_pages();
1704 		}
1705 	}
1706 
1707 	return(err);
1708 }
1709 
1710 /********************************************************************
1711 Starts InnoDB and creates a new database if database files
1712 are not found and the user wants.
1713 @return DB_SUCCESS or error code */
1714 dberr_t
innobase_start_or_create_for_mysql(void)1715 innobase_start_or_create_for_mysql(void)
1716 /*====================================*/
1717 {
1718 	bool		create_new_db = false;
1719 	lsn_t		flushed_lsn;
1720 	ulint		sum_of_data_file_sizes;
1721 	ulint		tablespace_size_in_header;
1722 	dberr_t		err;
1723 	ulint		srv_n_log_files_found = srv_n_log_files;
1724 	mtr_t		mtr;
1725 	purge_pq_t*	purge_queue;
1726 	char		logfilename[10000];
1727 	char*		logfile0	= NULL;
1728 	size_t		dirnamelen;
1729 	unsigned	i = 0;
1730 
1731 	/* Reset the start state. */
1732 	srv_start_state = SRV_START_STATE_NONE;
1733 
1734 	if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
1735 		srv_read_only_mode = true;
1736 	}
1737 
1738 	high_level_read_only = srv_read_only_mode
1739 		|| srv_force_recovery > SRV_FORCE_NO_TRX_UNDO;
1740 
1741 	if (srv_read_only_mode) {
1742 		ib::info() << "Started in read only mode";
1743 
1744 		/* There is no write except to intrinsic table and so turn-off
1745 		doublewrite mechanism completely. */
1746 		srv_use_doublewrite_buf = FALSE;
1747 	}
1748 
1749 #ifdef HAVE_LZO1X
1750 	if (lzo_init() != LZO_E_OK) {
1751 		ib::warn() << "lzo_init() failed, support disabled";
1752 		srv_lzo_disabled = true;
1753 	} else {
1754 		ib::info() << "LZO1X support available";
1755 		srv_lzo_disabled = false;
1756 	}
1757 #endif /* HAVE_LZO1X */
1758 
1759 #ifdef UNIV_LINUX
1760 # ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
1761 	ib::info() << "PUNCH HOLE support available";
1762 # else
1763 	ib::info() << "PUNCH HOLE support not available";
1764 # endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
1765 #endif /* UNIV_LINUX */
1766 
1767 	if (sizeof(ulint) != sizeof(void*)) {
1768 		ib::error() << "Size of InnoDB's ulint is " << sizeof(ulint)
1769 			<< ", but size of void* is " << sizeof(void*)
1770 			<< ". The sizes should be the same so that on"
1771 			" a 64-bit platforms you can allocate more than 4 GB"
1772 			" of memory.";
1773 	}
1774 
1775 #ifdef UNIV_DEBUG
1776 	ib::info() << "!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!";
1777 #endif
1778 
1779 #ifdef UNIV_IBUF_DEBUG
1780 	ib::info() << "!!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!";
1781 # ifdef UNIV_IBUF_COUNT_DEBUG
1782 	ib::info() << "!!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on !!!!!!!!!";
1783 	ib::error() << "Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG";
1784 # endif
1785 #endif
1786 
1787 #ifdef UNIV_LOG_LSN_DEBUG
1788 	ib::info() << "!!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!";
1789 #endif /* UNIV_LOG_LSN_DEBUG */
1790 
1791 #if defined(COMPILER_HINTS_ENABLED)
1792 	ib::info() << "Compiler hints enabled.";
1793 #endif /* defined(COMPILER_HINTS_ENABLED) */
1794 
1795 	ib::info() << IB_ATOMICS_STARTUP_MSG;
1796 	ib::info() << MUTEX_TYPE;
1797 	ib::info() << IB_MEMORY_BARRIER_STARTUP_MSG;
1798 
1799 #ifndef HAVE_MEMORY_BARRIER
1800 #if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined _WIN32
1801 #else
1802 	ib::warn() << "MySQL was built without a memory barrier capability on"
1803 		" this architecture, which might allow a mutex/rw_lock"
1804 		" violation under high thread concurrency. This may cause a"
1805 		" hang.";
1806 #endif /* IA32 or AMD64 */
1807 #endif /* HAVE_MEMORY_BARRIER */
1808 
1809 	ib::info() << "Compressed tables use zlib " ZLIB_VERSION
1810 #ifdef UNIV_ZIP_DEBUG
1811 	      " with validation"
1812 #endif /* UNIV_ZIP_DEBUG */
1813 	      ;
1814 #ifdef UNIV_ZIP_COPY
1815 	ib::info() << "and extra copying";
1816 #endif /* UNIV_ZIP_COPY */
1817 
1818 	/* Since InnoDB does not currently clean up all its internal data
1819 	structures in MySQL Embedded Server Library server_end(), we
1820 	print an error message if someone tries to start up InnoDB a
1821 	second time during the process lifetime. */
1822 
1823 	if (srv_start_has_been_called) {
1824 		ib::error() << "Startup called second time"
1825 			" during the process lifetime."
1826 			" In the MySQL Embedded Server Library"
1827 			" you cannot call server_init() more than"
1828 			" once during the process lifetime.";
1829 	}
1830 
1831 	srv_start_has_been_called = TRUE;
1832 
1833 	srv_is_being_started = true;
1834 
1835 #ifdef _WIN32
1836 	srv_use_native_aio = TRUE;
1837 
1838 #elif defined(LINUX_NATIVE_AIO)
1839 
1840 	if (srv_use_native_aio) {
1841 		ib::info() << "Using Linux native AIO";
1842 	}
1843 #else
1844 	/* Currently native AIO is supported only on windows and linux
1845 	and that also when the support is compiled in. In all other
1846 	cases, we ignore the setting of innodb_use_native_aio. */
1847 	srv_use_native_aio = FALSE;
1848 #endif /* _WIN32 */
1849 
1850 	/* Register performance schema stages before any real work has been
1851 	started which may need to be instrumented. */
1852 	mysql_stage_register("innodb", srv_stages, UT_ARR_SIZE(srv_stages));
1853 
1854 	if (srv_file_flush_method_str == NULL) {
1855 		/* These are the default options */
1856 #ifndef _WIN32
1857 		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1858 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
1859 		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1860 
1861 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
1862 		srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
1863 
1864 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1865 		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1866 
1867 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
1868 		srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
1869 
1870 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
1871 		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;
1872 
1873 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1874 		srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1875 
1876 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
1877 		srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
1878 #else
1879 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1880 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
1881 		srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
1882 		srv_use_native_aio = FALSE;
1883 
1884 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
1885 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1886 		srv_use_native_aio = FALSE;
1887 
1888 	} else if (0 == ut_strcmp(srv_file_flush_method_str,
1889 				  "async_unbuffered")) {
1890 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1891 #endif /* _WIN32 */
1892 	} else {
1893 		ib::error() << "Unrecognized value "
1894 			<< srv_file_flush_method_str
1895 			<< " for innodb_flush_method";
1896 		return(srv_init_abort(DB_ERROR));
1897 	}
1898 
1899 	/* Note that the call srv_boot() also changes the values of
1900 	some variables to the units used by InnoDB internally */
1901 
1902 	/* Set the maximum number of threads which can wait for a semaphore
1903 	inside InnoDB: this is the 'sync wait array' size, as well as the
1904 	maximum number of threads that can wait in the 'srv_conc array' for
1905 	their time to enter InnoDB. */
1906 
1907 	srv_max_n_threads = 1   /* io_ibuf_thread */
1908 			    + 1 /* io_log_thread */
1909 			    + 1 /* lock_wait_timeout_thread */
1910 			    + 1 /* srv_error_monitor_thread */
1911 			    + 1 /* srv_monitor_thread */
1912 			    + 1 /* srv_master_thread */
1913 			    + 1 /* srv_redo_log_follow_thread */
1914 			    + 1 /* srv_purge_coordinator_thread */
1915 			    + 1 /* buf_dump_thread */
1916 			    + 1 /* dict_stats_thread */
1917 			    + 1 /* fts_optimize_thread */
1918 			    + 1 /* trx_rollback_or_clean_all_recovered */
1919 			    + 128 /* added as margin, for use of
1920 				  InnoDB Memcached etc. */
1921 			    + max_connections
1922 			    + srv_n_read_io_threads
1923 			    + srv_n_write_io_threads
1924 			    + srv_n_purge_threads
1925 			    + srv_n_page_cleaners
1926 			    /* FTS Parallel Sort */
1927 			    + fts_sort_pll_degree * FTS_NUM_AUX_INDEX
1928 			      * max_connections;
1929 
1930 	if (srv_buf_pool_size >= BUF_POOL_SIZE_THRESHOLD) {
1931 
1932 		if (srv_buf_pool_instances == srv_buf_pool_instances_default) {
1933 #if defined(_WIN32) && !defined(_WIN64)
1934 			/* Do not allocate too large of a buffer pool on
1935 			Windows 32-bit systems, which can have trouble
1936 			allocating larger single contiguous memory blocks. */
1937 			srv_buf_pool_instances = ut_min(
1938 				static_cast<ulong>(MAX_BUFFER_POOLS),
1939 				static_cast<ulong>(srv_buf_pool_size
1940 						   / (128 * 1024 * 1024)));
1941 #else /* defined(_WIN32) && !defined(_WIN64) */
1942 			/* Default to 8 instances when size > 1GB. */
1943 			srv_buf_pool_instances = 8;
1944 #endif /* defined(_WIN32) && !defined(_WIN64) */
1945 		}
1946 	} else {
1947 		/* If buffer pool is less than 1 GiB, assume fewer
1948 		threads. Also use only one buffer pool instance. */
1949 		if (srv_buf_pool_instances != srv_buf_pool_instances_default
1950 		    && srv_buf_pool_instances != 1) {
1951 			/* We can't distinguish whether the user has explicitly
1952 			started mysqld with --innodb-buffer-pool-instances=0,
1953 			(srv_buf_pool_instances_default is 0) or has not
1954 			specified that option at all. Thus we have the
1955 			limitation that if the user started with =0, we
1956 			will not emit a warning here, but we should actually
1957 			do so. */
1958 			ib::info()
1959 				<< "Adjusting innodb_buffer_pool_instances"
1960 				" from " << srv_buf_pool_instances << " to 1"
1961 				" since innodb_buffer_pool_size is less than "
1962 				<< BUF_POOL_SIZE_THRESHOLD / (1024 * 1024)
1963 				<< " MiB";
1964 		}
1965 
1966 		srv_buf_pool_instances = 1;
1967 	}
1968 
1969 	if (srv_buf_pool_chunk_unit * srv_buf_pool_instances
1970 	    > srv_buf_pool_size) {
1971 		/* Size unit of buffer pool is larger than srv_buf_pool_size.
1972 		adjust srv_buf_pool_chunk_unit for srv_buf_pool_size. */
1973 		srv_buf_pool_chunk_unit
1974 			= static_cast<ulong>(srv_buf_pool_size)
1975 			  / srv_buf_pool_instances;
1976 		if (srv_buf_pool_size % srv_buf_pool_instances != 0) {
1977 			++srv_buf_pool_chunk_unit;
1978 		}
1979 	}
1980 
1981 	srv_buf_pool_size = buf_pool_size_align(srv_buf_pool_size);
1982 
1983 	if (srv_n_page_cleaners > srv_buf_pool_instances) {
1984 		/* limit of page_cleaner parallelizability
1985 		is number of buffer pool instances. */
1986 		srv_n_page_cleaners = srv_buf_pool_instances;
1987 	}
1988 
1989 	srv_boot();
1990 
1991 	ib::info() << (ut_crc32_sse2_enabled ? "Using" : "Not using")
1992 		<< " CPU crc32 instructions";
1993 
1994 	if (!srv_read_only_mode) {
1995 
1996 		mutex_create(LATCH_ID_SRV_MONITOR_FILE,
1997 			     &srv_monitor_file_mutex);
1998 
1999 		if (srv_innodb_status) {
2000 
2001 			srv_monitor_file_name = static_cast<char*>(
2002 				ut_malloc_nokey(
2003 					strlen(fil_path_to_mysql_datadir)
2004 					+ 20 + sizeof "/innodb_status."));
2005 
2006 			sprintf(srv_monitor_file_name,
2007 				"%s/innodb_status." ULINTPF,
2008 				fil_path_to_mysql_datadir,
2009 				os_proc_get_number());
2010 
2011 			srv_monitor_file = fopen(srv_monitor_file_name, "w+");
2012 
2013 			if (!srv_monitor_file) {
2014 				ib::error() << "Unable to create "
2015 					<< srv_monitor_file_name << ": "
2016 					<< strerror(errno);
2017 				return(srv_init_abort(DB_ERROR));
2018 			}
2019 		} else {
2020 
2021 			srv_monitor_file_name = NULL;
2022 			srv_monitor_file = os_file_create_tmpfile(NULL);
2023 
2024 			if (!srv_monitor_file) {
2025 				return(srv_init_abort(DB_ERROR));
2026 			}
2027 		}
2028 
2029 		mutex_create(LATCH_ID_SRV_DICT_TMPFILE,
2030 			     &srv_dict_tmpfile_mutex);
2031 
2032 		srv_dict_tmpfile = os_file_create_tmpfile(NULL);
2033 
2034 		if (!srv_dict_tmpfile) {
2035 			return(srv_init_abort(DB_ERROR));
2036 		}
2037 
2038 		mutex_create(LATCH_ID_SRV_MISC_TMPFILE,
2039 			     &srv_misc_tmpfile_mutex);
2040 
2041 		srv_misc_tmpfile = os_file_create_tmpfile(NULL);
2042 
2043 		if (!srv_misc_tmpfile) {
2044 			return(srv_init_abort(DB_ERROR));
2045 		}
2046 	}
2047 
2048 	srv_n_file_io_threads = srv_n_read_io_threads;
2049 
2050 	srv_n_file_io_threads += srv_n_write_io_threads;
2051 
2052 	if (!srv_read_only_mode) {
2053 		/* Add the log and ibuf IO threads. */
2054 		srv_n_file_io_threads += 2;
2055 	} else {
2056 		ib::info() << "Disabling background log and ibuf IO write"
2057 			<< " threads.";
2058 	}
2059 
2060 	ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
2061 
2062 	if (!os_aio_init(srv_n_read_io_threads,
2063 			 srv_n_write_io_threads,
2064 			 SRV_MAX_N_PENDING_SYNC_IOS)) {
2065 
2066 		ib::error() << "Cannot initialize AIO sub-system";
2067 
2068 		return(srv_init_abort(DB_ERROR));
2069 	}
2070 
2071 	fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
2072 
2073 	double	size;
2074 	char	unit;
2075 
2076 	if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
2077 		size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024);
2078 		unit = 'G';
2079 	} else {
2080 		size = ((double) srv_buf_pool_size) / (1024 * 1024);
2081 		unit = 'M';
2082 	}
2083 
2084 	double	chunk_size;
2085 	char	chunk_unit;
2086 
2087 	if (srv_buf_pool_chunk_unit >= 1024 * 1024 * 1024) {
2088 		chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024 / 1024;
2089 		chunk_unit = 'G';
2090 	} else {
2091 		chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024;
2092 		chunk_unit = 'M';
2093 	}
2094 
2095 	ib::info() << "Initializing buffer pool, total size = "
2096 		<< size << unit << ", instances = " << srv_buf_pool_instances
2097 		<< ", chunk size = " << chunk_size << chunk_unit;
2098 
2099 	err = buf_pool_init(srv_buf_pool_size, static_cast<bool>(srv_numa_interleave),
2100 	                    srv_buf_pool_instances);
2101 
2102 	if (err != DB_SUCCESS) {
2103 		ib::error() << "Cannot allocate memory for the buffer pool";
2104 
2105 		return(srv_init_abort(DB_ERROR));
2106 	}
2107 
2108 	ib::info() << "Completed initialization of buffer pool";
2109 
2110 #ifdef UNIV_DEBUG
2111 	/* We have observed deadlocks with a 5MB buffer pool but
2112 	the actual lower limit could very well be a little higher. */
2113 
2114 	if (srv_buf_pool_size <= 5 * 1024 * 1024) {
2115 
2116 		ib::info() << "Small buffer pool size ("
2117 			<< srv_buf_pool_size / 1024 / 1024
2118 			<< "M), the flst_validate() debug function can cause a"
2119 			<< " deadlock if the buffer pool fills up.";
2120 	}
2121 #endif /* UNIV_DEBUG */
2122 
2123 	fsp_init();
2124 	log_init();
2125 	log_online_init();
2126 
2127 	recv_sys_create();
2128 	recv_sys_init(buf_pool_get_curr_size());
2129 	lock_sys_create(srv_lock_table_size);
2130 	srv_start_state_set(SRV_START_STATE_LOCK_SYS);
2131 
2132 	/* Create i/o-handler threads: */
2133 
2134 	for (ulint t = 0; t < srv_n_file_io_threads; ++t) {
2135 
2136 		n[t] = t;
2137 
2138 		os_thread_create(io_handler_thread, n + t, thread_ids + t);
2139 	}
2140 
2141 	/* Even in read-only mode there could be flush job generated by
2142 	intrinsic table operations. */
2143 	buf_flush_page_cleaner_init();
2144 
2145 	os_thread_create(buf_flush_page_cleaner_coordinator,
2146 			 NULL, NULL);
2147 
2148 	for (i = 1; i < srv_n_page_cleaners; ++i) {
2149 		os_thread_create(buf_flush_page_cleaner_worker,
2150 				 NULL, NULL);
2151 	}
2152 
2153 	for (i = 0; i < srv_buf_pool_instances; i++) {
2154 		os_thread_create(buf_lru_manager, reinterpret_cast<void *>(i),
2155 				 NULL);
2156 	}
2157 
2158 	/* Make sure page cleaner is active. */
2159 	os_rmb;
2160 	while (!buf_page_cleaner_is_active
2161 	       || buf_lru_manager_running_threads < srv_buf_pool_instances) {
2162 
2163 		os_thread_sleep(10000);
2164 		os_rmb;
2165 	}
2166 
2167 	srv_start_state_set(SRV_START_STATE_IO);
2168 
2169 	if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE
2170 	    >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
2171 		/* log_block_convert_lsn_to_no() limits the returned block
2172 		number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
2173 		bytes, then we have a limit of 512 GB. If that limit is to
2174 		be raised, then log_block_convert_lsn_to_no() must be
2175 		modified. */
2176 		ib::error() << "Combined size of log files must be < 512 GB";
2177 
2178 		return(srv_init_abort(DB_ERROR));
2179 	}
2180 
2181 	if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) {
2182 		/* fil_io() takes ulint as an argument and we are passing
2183 		(next_offset / UNIV_PAGE_SIZE) to it in log_group_write_buf().
2184 		So (next_offset / UNIV_PAGE_SIZE) must be less than ULINT_MAX.
2185 		So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
2186 		means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
2187 		is 64 TB on 32 bit systems. */
2188 		ib::error() << "Combined size of log files must be < "
2189 			<< ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE << " GB";
2190 
2191 		return(srv_init_abort(DB_ERROR));
2192 	}
2193 
2194 	os_normalize_path(srv_data_home);
2195 
2196 	/* Check if the data files exist or not. */
2197 	err = srv_sys_space.check_file_spec(
2198 		&create_new_db, MIN_EXPECTED_TABLESPACE_SIZE);
2199 
2200 	if (err != DB_SUCCESS) {
2201 		return(srv_init_abort(DB_ERROR));
2202 	}
2203 
2204 	srv_startup_is_before_trx_rollback_phase = !create_new_db;
2205 
2206 	/* Check if undo tablespaces and redo log files exist before creating
2207 	a new system tablespace */
2208 	if (create_new_db) {
2209 		err = srv_check_undo_redo_logs_exists();
2210 		if (err != DB_SUCCESS) {
2211 			return(srv_init_abort(DB_ERROR));
2212 		}
2213 		recv_sys_debug_free();
2214 	}
2215 
2216 	/* Open or create the data files. */
2217 	ulint	sum_of_new_sizes;
2218 
2219 	err = srv_sys_space.open_or_create(
2220 		false, create_new_db, &sum_of_new_sizes, &flushed_lsn);
2221 
2222 	switch (err) {
2223 	case DB_SUCCESS:
2224 		err = srv_sys_enable_encryption(create_new_db);
2225 		if (err != DB_SUCCESS) {
2226 			return(srv_init_abort(err));
2227 		}
2228 		break;
2229 	case DB_CANNOT_OPEN_FILE:
2230 		ib::error()
2231 			<< "Could not open or create the system tablespace. If"
2232 			" you tried to add new data files to the system"
2233 			" tablespace, and it failed here, you should now"
2234 			" edit innodb_data_file_path in my.cnf back to what"
2235 			" it was, and remove the new ibdata files InnoDB"
2236 			" created in this failed attempt. InnoDB only wrote"
2237 			" those files full of zeros, but did not yet use"
2238 			" them in any way. But be careful: do not remove"
2239 			" old data files which contain your precious data!";
2240 		/* fall through */
2241 	default:
2242 		/* Other errors might come from Datafile::validate_first_page() */
2243 		return(srv_init_abort(err));
2244 	}
2245 
2246 	dirnamelen = strlen(srv_log_group_home_dir);
2247 	ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
2248 	memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
2249 
2250 	/* Add a path separator if needed. */
2251 	if (dirnamelen && logfilename[dirnamelen - 1] != OS_PATH_SEPARATOR) {
2252 		logfilename[dirnamelen++] = OS_PATH_SEPARATOR;
2253 	}
2254 
2255 	srv_log_file_size_requested = srv_log_file_size;
2256 
2257 	if (create_new_db) {
2258 
2259 		buf_flush_sync_all_buf_pools();
2260 
2261 		flushed_lsn = log_get_lsn();
2262 
2263 		err = create_log_files(
2264 			logfilename, dirnamelen, flushed_lsn, logfile0);
2265 
2266 		if (err != DB_SUCCESS) {
2267 			return(srv_init_abort(err));
2268 		}
2269 	} else {
2270 		for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
2271 			os_offset_t	size;
2272 			os_file_stat_t	stat_info;
2273 
2274 			sprintf(logfilename + dirnamelen,
2275 				"ib_logfile%u", i);
2276 
2277 			err = os_file_get_status(
2278 				logfilename, &stat_info, false,
2279 				srv_read_only_mode);
2280 
2281 			if (err == DB_NOT_FOUND) {
2282 				if (i == 0) {
2283 					if (flushed_lsn
2284 					    < static_cast<lsn_t>(1000)) {
2285 						ib::error()
2286 							<< "Cannot create"
2287 							" log files because"
2288 							" data files are"
2289 							" corrupt or the"
2290 							" database was not"
2291 							" shut down cleanly"
2292 							" after creating"
2293 							" the data files.";
2294 						return(srv_init_abort(
2295 							DB_ERROR));
2296 					}
2297 
2298 					err = create_log_files(
2299 						logfilename, dirnamelen,
2300 						flushed_lsn, logfile0);
2301 
2302 					if (err != DB_SUCCESS) {
2303 						return(srv_init_abort(err));
2304 					}
2305 
2306 					create_log_files_rename(
2307 						logfilename, dirnamelen,
2308 						flushed_lsn, logfile0);
2309 
2310 					/* Suppress the message about
2311 					crash recovery. */
2312 					flushed_lsn = log_get_lsn();
2313 					goto files_checked;
2314 				} else if (i < 2) {
2315 					/* must have at least 2 log files */
2316 					ib::error() << "Only one log file"
2317 						" found.";
2318 					return(srv_init_abort(err));
2319 				}
2320 
2321 				/* opened all files */
2322 				break;
2323 			}
2324 
2325 			if (!srv_file_check_mode(logfilename)) {
2326 				return(srv_init_abort(DB_ERROR));
2327 			}
2328 
2329 			err = open_log_file(&files[i], logfilename, &size);
2330 
2331 			if (err != DB_SUCCESS) {
2332 				return(srv_init_abort(err));
2333 			}
2334 
2335 			ut_a(size != (os_offset_t) -1);
2336 
2337 			if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
2338 
2339 				ib::error() << "Log file " << logfilename
2340 					<< " size " << size << " is not a"
2341 					" multiple of innodb_page_size";
2342 				return(srv_init_abort(DB_ERROR));
2343 			}
2344 
2345 			size >>= UNIV_PAGE_SIZE_SHIFT;
2346 
2347 			if (i == 0) {
2348 				srv_log_file_size = size;
2349 			} else if (size != srv_log_file_size) {
2350 
2351 				ib::error() << "Log file " << logfilename
2352 					<< " is of different size "
2353 					<< (size << UNIV_PAGE_SIZE_SHIFT)
2354 					<< " bytes than other log files "
2355 					<< (srv_log_file_size
2356 					    << UNIV_PAGE_SIZE_SHIFT)
2357 					<< " bytes!";
2358 				return(srv_init_abort(DB_ERROR));
2359 			}
2360 		}
2361 
2362 		srv_n_log_files_found = i;
2363 
2364 		/* Create the in-memory file space objects. */
2365 
2366 		sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
2367 
2368 		/* Disable the doublewrite buffer for log files. */
2369 		fil_space_t*	log_space = fil_space_create(
2370 			"innodb_redo_log",
2371 			SRV_LOG_SPACE_FIRST_ID,
2372 			fsp_flags_set_page_size(0, univ_page_size),
2373 			FIL_TYPE_LOG, NULL);
2374 
2375 		ut_a(fil_validate());
2376 		ut_a(log_space);
2377 
2378 		/* srv_log_file_size is measured in pages; if page size is 16KB,
2379 		then we have a limit of 64TB on 32 bit systems */
2380 		ut_a(srv_log_file_size <= ULINT_MAX);
2381 
2382 		for (unsigned j = 0; j < i; j++) {
2383 			sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
2384 
2385 			if (!fil_node_create(logfilename,
2386 					     (ulint) srv_log_file_size,
2387 					     log_space, false, false)) {
2388 				return(srv_init_abort(DB_ERROR));
2389 			}
2390 		}
2391 
2392 		if (!log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
2393 				    SRV_LOG_SPACE_FIRST_ID)) {
2394 			return(srv_init_abort(DB_ERROR));
2395 		}
2396 
2397 		/* Read the first log file header to get the encryption
2398 		information if it exist. */
2399 		if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2400 		    if (!log_read_encryption()) {
2401 			return(srv_init_abort(DB_ERROR));
2402 		    }
2403 		}
2404 	}
2405 
2406 files_checked:
2407 	/* Open all log files and data files in the system
2408 	tablespace: we keep them open until database
2409 	shutdown */
2410 
2411 	fil_open_log_and_system_tablespace_files();
2412 
2413 	err = srv_undo_tablespaces_init(
2414 		create_new_db,
2415 		srv_undo_tablespaces,
2416 		&srv_undo_tablespaces_open);
2417 
2418 	/* If the force recovery is set very high then we carry on regardless
2419 	of all errors. Basically this is fingers crossed mode. */
2420 
2421 	if (err != DB_SUCCESS
2422 	    && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
2423 
2424 		return(srv_init_abort(err));
2425 	}
2426 
2427 	/* Initialize objects used by dict stats gathering thread, which
2428 	can also be used by recovery if it tries to drop some table */
2429 	if (!srv_read_only_mode) {
2430 		dict_stats_thread_init();
2431 	}
2432 
2433 	trx_sys_file_format_init();
2434 
2435 	trx_sys_create();
2436 
2437 	if (create_new_db) {
2438 
2439 		ut_a(!srv_read_only_mode);
2440 		srv_init_log_online();
2441 
2442 		mtr_start(&mtr);
2443 
2444 		bool ret = fsp_header_init(0, sum_of_new_sizes, &mtr);
2445 
2446 		mtr_commit(&mtr);
2447 
2448 		if (!ret) {
2449 			return(srv_init_abort(DB_ERROR));
2450 		}
2451 
2452 		/* To maintain backward compatibility we create only
2453 		the first rollback segment before the double write buffer.
2454 		All the remaining rollback segments will be created later,
2455 		after the double write buffer has been created. */
2456 		trx_sys_create_sys_pages();
2457 
2458 		purge_queue = trx_sys_init_at_db_start();
2459 
2460 		DBUG_EXECUTE_IF("check_no_undo",
2461 				ut_ad(purge_queue->empty());
2462 				);
2463 
2464 		/* Create the per-buffer pool instance doublewrite buffers */
2465 		err = buf_parallel_dblwr_create();
2466 		if (err != DB_SUCCESS)
2467 			return(srv_init_abort(err));
2468 
2469 		/* The purge system needs to create the purge view and
2470 		therefore requires that the trx_sys is inited. */
2471 
2472 		trx_purge_sys_create(srv_n_purge_threads, purge_queue);
2473 
2474 		err = dict_create();
2475 
2476 		if (err != DB_SUCCESS) {
2477 			return(srv_init_abort(err));
2478 		}
2479 
2480 		buf_flush_sync_all_buf_pools();
2481 
2482 		flushed_lsn = log_get_lsn();
2483 
2484 		fil_write_flushed_lsn(flushed_lsn);
2485 
2486 		create_log_files_rename(
2487 			logfilename, dirnamelen, flushed_lsn, logfile0);
2488 
2489 	} else {
2490 
2491 		/* Check if we support the max format that is stamped
2492 		on the system tablespace.
2493 		Note:  We are NOT allowed to make any modifications to
2494 		the TRX_SYS_PAGE_NO page before recovery  because this
2495 		page also contains the max_trx_id etc. important system
2496 		variables that are required for recovery.  We need to
2497 		ensure that we return the system to a state where normal
2498 		recovery is guaranteed to work. We do this by
2499 		invalidating the buffer cache, this will force the
2500 		reread of the page and restoration to its last known
2501 		consistent state, this is REQUIRED for the recovery
2502 		process to work. */
2503 		err = trx_sys_file_format_max_check(
2504 			srv_max_file_format_at_startup);
2505 
2506 		if (err != DB_SUCCESS) {
2507 			return(srv_init_abort(err));
2508 		}
2509 
2510 		/* Invalidate the buffer pool to ensure that we reread
2511 		the page that we read above, during recovery.
2512 		Note that this is not as heavy weight as it seems. At
2513 		this point there will be only ONE page in the buf_LRU
2514 		and there must be no page in the buf_flush list. */
2515 		buf_pool_invalidate();
2516 
2517 		/* Scan and locate truncate log files. Parsed located files
2518 		and add table to truncate information to central vector for
2519 		truncate fix-up action post recovery. */
2520 		err = TruncateLogParser::scan_and_parse(srv_log_group_home_dir);
2521 		if (err != DB_SUCCESS) {
2522 
2523 			return(srv_init_abort(DB_ERROR));
2524 		}
2525 
2526 		/* Start monitor thread early enough so that e.g. crash
2527 		recovery failing to find free pages in the buffer pool is
2528 		diagnosed. */
2529 		if (!srv_read_only_mode)
2530 		{
2531 			/* Create the thread which prints InnoDB monitor
2532 			info */
2533 			os_thread_create(
2534 				srv_monitor_thread,
2535 				NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
2536 
2537 			srv_start_state_set(SRV_START_STATE_MONITOR);
2538 		}
2539 
2540 		/* We always try to do a recovery, even if the database had
2541 		been shut down normally: this is the normal startup path */
2542 
2543 		err = recv_recovery_from_checkpoint_start(flushed_lsn);
2544 
2545 		/* Doublewrite-recovered pages should have been either
2546 		processed, either it should have been impossible to process
2547 		them due to a missing tablespace or innodb_force_recovery
2548 		setting, or server being read-only, or instance being
2549 		corrupted. */
2550 		ut_ad(recv_sys->dblwr.pages.empty()
2551 		      || err == DB_TABLESPACE_NOT_FOUND
2552 		      || (err == DB_SUCCESS
2553 			  && (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO))
2554 		      || err == DB_ERROR || err == DB_CORRUPTION
2555 		      || err == DB_READ_ONLY);
2556 		buf_parallel_dblwr_finish_recovery();
2557 
2558 		if (err == DB_SUCCESS) {
2559 			/* Initialize the change buffer. */
2560 			err = dict_boot();
2561 			DBUG_EXECUTE_IF("ib_dic_boot_error",
2562 					err = DB_ERROR;);
2563 		}
2564 
2565 		if (err != DB_SUCCESS) {
2566 
2567 			/* A tablespace was not found during recovery. The
2568 			user must force recovery. */
2569 
2570 			if (err == DB_TABLESPACE_NOT_FOUND) {
2571 
2572 				srv_fatal_error();
2573 
2574 				ut_error;
2575 			}
2576 
2577 			return(srv_init_abort(DB_ERROR));
2578 		}
2579 
2580 		err = buf_parallel_dblwr_create();
2581 		if (err != DB_SUCCESS)
2582 			return(srv_init_abort(err));
2583 
2584 		purge_queue = trx_sys_init_at_db_start();
2585 
2586 		if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2587 			/* Apply the hashed log records to the
2588 			respective file pages, for the last batch of
2589 			recv_group_scan_log_recs(). */
2590 
2591 			recv_apply_hashed_log_recs(TRUE);
2592 			DBUG_PRINT("ib_log", ("apply completed"));
2593 
2594 			if (recv_needed_recovery) {
2595 				trx_sys_print_mysql_binlog_offset();
2596 			}
2597 		}
2598 
2599 		if (recv_sys->found_corrupt_log) {
2600 			ib::warn()
2601 				<< "The log file may have been corrupt and it"
2602 				" is possible that the log scan or parsing"
2603 				" did not proceed far enough in recovery."
2604 				" Please run CHECK TABLE on your InnoDB tables"
2605 				" to check that they are ok!"
2606 				" It may be safest to recover your"
2607 				" InnoDB database from a backup!";
2608 		}
2609 
2610 		/* The purge system needs to create the purge view and
2611 		therefore requires that the trx_sys is inited. */
2612 
2613 		trx_purge_sys_create(srv_n_purge_threads, purge_queue);
2614 
2615 		/* recv_recovery_from_checkpoint_finish needs trx lists which
2616 		are initialized in trx_sys_init_at_db_start(). */
2617 
2618 		recv_recovery_from_checkpoint_finish();
2619 
2620 		/* Fix-up truncate of tables in the system tablespace
2621 		if server crashed while truncate was active. The non-
2622 		system tables are done after tablespace discovery. Do
2623 		this now because this procedure assumes that no pages
2624 		have changed since redo recovery.  Tablespace discovery
2625 		can do updates to pages in the system tablespace.*/
2626 		err = truncate_t::fixup_tables_in_system_tablespace();
2627 
2628 		if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
2629 			/* Open or Create SYS_TABLESPACES and SYS_DATAFILES
2630 			so that tablespace names and other metadata can be
2631 			found. */
2632 			srv_sys_tablespaces_open = true;
2633 			err = dict_create_or_check_sys_tablespace();
2634 			if (err != DB_SUCCESS) {
2635 				return(srv_init_abort(err));
2636 			}
2637 
2638 			/* The following call is necessary for the insert
2639 			buffer to work with multiple tablespaces. We must
2640 			know the mapping between space id's and .ibd file
2641 			names.
2642 
2643 			In a crash recovery, we check that the info in data
2644 			dictionary is consistent with what we already know
2645 			about space id's from the calls to fil_ibd_load().
2646 
2647 			In a normal startup, we create the space objects for
2648 			every table in the InnoDB data dictionary that has
2649 			an .ibd file.
2650 
2651 			We also determine the maximum tablespace id used.
2652 
2653 			The 'validate' flag indicates that when a tablespace
2654 			is opened, we also read the header page and validate
2655 			the contents to the data dictionary. This is time
2656 			consuming, especially for databases with lots of ibd
2657 			files.  So only do it after a crash and not forcing
2658 			recovery.  Open rw transactions at this point is not
2659 			a good reason to validate. */
2660 			bool validate = recv_needed_recovery
2661 				&& srv_force_recovery == 0;
2662 
2663 			dict_check_tablespaces_and_store_max_id(validate);
2664 		}
2665 
2666 		/* Rotate the encryption key for recovery. It's because
2667 		server could crash in middle of key rotation. Some tablespace
2668 		didn't complete key rotation. Here, we will resume the
2669 		rotation. */
2670 		if (!srv_read_only_mode
2671 		    && srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2672 			fil_encryption_rotate();
2673 		}
2674 
2675 
2676 		/* Fix-up truncate of table if server crashed while truncate
2677 		was active. */
2678 		err = truncate_t::fixup_tables_in_non_system_tablespace();
2679 
2680 		if (err != DB_SUCCESS) {
2681 			return(srv_init_abort(err));
2682 		}
2683 
2684 		if (!srv_force_recovery
2685 		    && !recv_sys->found_corrupt_log
2686 		    && (srv_log_file_size_requested != srv_log_file_size
2687 			|| srv_n_log_files_found != srv_n_log_files)) {
2688 
2689 			/* Prepare to replace the redo log files. */
2690 
2691 			if (srv_read_only_mode) {
2692 				ib::error() << "Cannot resize log files"
2693 					" in read-only mode.";
2694 				return(srv_init_abort(DB_READ_ONLY));
2695 			}
2696 
2697 			/* Prepare to delete the old redo log files */
2698 			flushed_lsn = srv_prepare_to_delete_redo_log_files(i);
2699 
2700 			/* Prohibit redo log writes from any other
2701 			threads until creating a log checkpoint at the
2702 			end of create_log_files(). */
2703 			ut_d(recv_no_log_write = true);
2704 			ut_ad(!buf_pool_check_no_pending_io());
2705 
2706 			RECOVERY_CRASH(3);
2707 
2708 			/* Stamp the LSN to the data files. */
2709 			fil_write_flushed_lsn(flushed_lsn);
2710 
2711 			RECOVERY_CRASH(4);
2712 
2713 			/* If log tracking is enabled, make it catch up with
2714 			the old logs synchronously. */
2715 			bool saved_srv_track_changed_pages
2716 				= srv_track_changed_pages;
2717 			if (srv_track_changed_pages) {
2718 				log_mutex_enter();
2719 				lsn_t checkpoint_lsn
2720 					= log_sys->last_checkpoint_lsn;
2721 				log_mutex_exit();
2722 				ib::info()
2723 					<< "Tracking redo log synchronously "
2724 					"until " << checkpoint_lsn;
2725 				if (!log_online_follow_redo_log()) {
2726 					return(srv_init_abort(DB_ERROR));
2727 				}
2728 				srv_track_changed_pages = false;
2729 			}
2730 
2731 			/* Close and free the redo log files, so that
2732 			we can replace them. */
2733 			fil_close_log_files(true);
2734 
2735 			RECOVERY_CRASH(5);
2736 
2737 			/* Free the old log file space. */
2738 			log_group_close_all();
2739 
2740 			ib::warn() << "Starting to delete and rewrite log"
2741 				" files.";
2742 
2743 			srv_log_file_size = srv_log_file_size_requested;
2744 
2745 			err = create_log_files(
2746 				logfilename, dirnamelen, flushed_lsn,
2747 				logfile0);
2748 
2749 			if (err != DB_SUCCESS) {
2750 				return(srv_init_abort(err));
2751 			}
2752 
2753 			if (saved_srv_track_changed_pages) {
2754 				log_mutex_enter();
2755 				lsn_t checkpoint_lsn
2756 					= log_sys->last_checkpoint_lsn;
2757 				log_sys->last_checkpoint_lsn = log_sys->lsn;
2758 				log_mutex_exit();
2759 				ib::info()
2760 					<< "Tracking redo log synchronously "
2761 					"until " << checkpoint_lsn;
2762 				srv_track_changed_pages = true;
2763 				if (!log_online_follow_redo_log()) {
2764 					return(srv_init_abort(DB_ERROR));
2765 				}
2766 				srv_track_changed_pages = false;
2767 			}
2768 
2769 			/* create_log_files() can increase system lsn that is
2770 			why FIL_PAGE_FILE_FLUSH_LSN have to be updated */
2771 			flushed_lsn = log_get_lsn();
2772 			fil_write_flushed_lsn(flushed_lsn);
2773 			fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
2774 
2775 			create_log_files_rename(
2776 				logfilename, dirnamelen, log_get_lsn(),
2777 				logfile0);
2778 
2779 			if (saved_srv_track_changed_pages) {
2780 				srv_track_changed_pages = true;
2781 			}
2782 		}
2783 
2784 		recv_recovery_rollback_active();
2785 
2786 		/* It is possible that file_format tag has never
2787 		been set. In this case we initialize it to minimum
2788 		value.  Important to note that we can do it ONLY after
2789 		we have finished the recovery process so that the
2790 		image of TRX_SYS_PAGE_NO is not stale. */
2791 		trx_sys_file_format_tag_init();
2792 	}
2793 
2794 	if (!create_new_db) {
2795 		/* Check and reset any no-redo rseg slot on disk used by
2796 		pre-5.7.2 redo resg with no data to purge. */
2797 		trx_rseg_reset_pending();
2798 	}
2799 
2800 	if (!create_new_db && sum_of_new_sizes > 0) {
2801 		/* New data file(s) were added */
2802 		mtr_start(&mtr);
2803 
2804 		fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
2805 
2806 		mtr_commit(&mtr);
2807 
2808 		/* Immediately write the log record about increased tablespace
2809 		size to disk, so that it is durable even if mysqld would crash
2810 		quickly */
2811 
2812 		log_buffer_flush_to_disk();
2813 	}
2814 
2815 	/* Open temp-tablespace and keep it open until shutdown. */
2816 	err = srv_open_tmp_tablespace(create_new_db, &srv_tmp_space);
2817 
2818 	if (err != DB_SUCCESS) {
2819 		return(srv_init_abort(err));
2820 	}
2821 
2822 	/* Create the doublewrite buffer to a new tablespace */
2823 	if (buf_dblwr == NULL && !buf_dblwr_create()) {
2824 		return(srv_init_abort(DB_ERROR));
2825 	}
2826 
2827 	/* Here the double write buffer has already been created and so
2828 	any new rollback segments will be allocated after the double
2829 	write buffer. The default segment should already exist.
2830 	We create the new segments only if it's a new database or
2831 	the database was shutdown cleanly. */
2832 
2833 	/* Note: When creating the extra rollback segments during an upgrade
2834 	we violate the latching order, even if the change buffer is empty.
2835 	We make an exception in sync0sync.cc and check srv_is_being_started
2836 	for that violation. It cannot create a deadlock because we are still
2837 	running in single threaded mode essentially. Only the IO threads
2838 	should be running at this stage. */
2839 
2840 	/* Deprecate innodb_undo_logs.  But still use it if it is set to
2841 	non-default and innodb_rollback_segments is default. */
2842 	ut_a(srv_rollback_segments > 0);
2843 	ut_a(srv_rollback_segments <= TRX_SYS_N_RSEGS);
2844 	ut_a(srv_undo_logs > 0);
2845 	ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS);
2846 	if (srv_undo_logs < TRX_SYS_N_RSEGS) {
2847 		ib::warn() << deprecated_undo_logs;
2848 		if (srv_rollback_segments == TRX_SYS_N_RSEGS) {
2849 			srv_rollback_segments = srv_undo_logs;
2850 		}
2851 	}
2852 
2853 	/* The number of rsegs that exist in InnoDB is given by status
2854 	variable srv_available_undo_logs. The number of rsegs to use can
2855 	be set using the dynamic global variable srv_rollback_segments. */
2856 
2857 	srv_available_undo_logs = trx_sys_create_rsegs(
2858 		srv_undo_tablespaces, srv_rollback_segments, srv_tmp_undo_logs);
2859 
2860 	if (srv_available_undo_logs == ULINT_UNDEFINED) {
2861 		/* Can only happen if server is read only. */
2862 		ut_a(srv_read_only_mode);
2863 		srv_rollback_segments = ULONG_UNDEFINED;
2864 	} else if (srv_available_undo_logs < srv_rollback_segments
2865 		   && !srv_force_recovery && !recv_needed_recovery) {
2866 		ib::error() << "System or UNDO tablespace is running of out"
2867 			    << " of space";
2868 		/* Should due to out of file space. */
2869 		return(srv_init_abort(DB_ERROR));
2870 	}
2871 
2872 	srv_startup_is_before_trx_rollback_phase = false;
2873 
2874 	if (!srv_read_only_mode) {
2875 		/* Create the thread which watches the timeouts
2876 		for lock waits */
2877 		os_thread_create(
2878 			lock_wait_timeout_thread,
2879 			NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
2880 
2881 		/* Create the thread which warns of long semaphore waits */
2882 		os_thread_create(
2883 			srv_error_monitor_thread,
2884 			NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
2885 
2886 		/* Create the thread which prints InnoDB monitor info */
2887 		if (!srv_start_state_is_set(SRV_START_STATE_MONITOR)) {
2888 
2889 			os_thread_create(
2890 				srv_monitor_thread,
2891 				NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
2892 
2893 			srv_start_state_set(SRV_START_STATE_MONITOR);
2894 		}
2895 	}
2896 
2897 	/* wake main loop of page cleaner up */
2898 	os_event_set(buf_flush_event);
2899 
2900 	/* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
2901 	err = dict_create_or_check_foreign_constraint_tables();
2902 	if (err != DB_SUCCESS) {
2903 		return(srv_init_abort(err));
2904 	}
2905 
2906 	/* Create the SYS_TABLESPACES system table */
2907 	err = dict_create_or_check_sys_tablespace();
2908 	if (err != DB_SUCCESS) {
2909 		return(srv_init_abort(err));
2910 	}
2911 	srv_sys_tablespaces_open = true;
2912 
2913 	/* Create the SYS_VIRTUAL system table */
2914 	err = dict_create_or_check_sys_virtual();
2915 	if (err != DB_SUCCESS) {
2916 		return(srv_init_abort(err));
2917 	}
2918 
2919 	/* Create the SYS_ZIP_DICT system table */
2920 	err = dict_create_or_check_sys_zip_dict();
2921 	if (err != DB_SUCCESS) {
2922 		return(err);
2923 	}
2924 
2925 	srv_is_being_started = false;
2926 
2927 	ut_a(trx_purge_state() == PURGE_STATE_INIT);
2928 
2929 	/* Create the master thread which does purge and other utility
2930 	operations */
2931 
2932 	if (!srv_read_only_mode) {
2933 
2934 		os_thread_create(
2935 			srv_master_thread,
2936 			NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
2937 
2938 		srv_start_state_set(SRV_START_STATE_MASTER);
2939 	}
2940 
2941 	/* Enable row log encryption if it is set */
2942 	log_tmp_enable_encryption_if_set();
2943 
2944 	if (!srv_read_only_mode
2945 	    && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
2946 
2947 		os_thread_create(
2948 			srv_purge_coordinator_thread,
2949 			NULL, thread_ids + 6 + SRV_MAX_N_IO_THREADS);
2950 
2951 		ut_a(UT_ARR_SIZE(thread_ids)
2952 		     > 6 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS);
2953 
2954 		/* We've already created the purge coordinator thread above. */
2955 		for (i = 1; i < srv_n_purge_threads; ++i) {
2956 			os_thread_create(
2957 				srv_worker_thread, NULL,
2958 				thread_ids + 6 + i + SRV_MAX_N_IO_THREADS);
2959 		}
2960 
2961 		srv_start_wait_for_purge_to_start();
2962 
2963 		srv_start_state_set(SRV_START_STATE_PURGE);
2964 	} else {
2965 		purge_sys->state = PURGE_STATE_DISABLED;
2966 	}
2967 
2968 	sum_of_data_file_sizes = srv_sys_space.get_sum_of_sizes();
2969 	ut_a(sum_of_new_sizes != ULINT_UNDEFINED);
2970 
2971 	tablespace_size_in_header = fsp_header_get_tablespace_size();
2972 
2973 	if (!srv_read_only_mode
2974 	    && !srv_sys_space.can_auto_extend_last_file()
2975 	    && sum_of_data_file_sizes != tablespace_size_in_header) {
2976 
2977 		ib::error() << "Tablespace size stored in header is "
2978 			<< tablespace_size_in_header << " pages, but the sum"
2979 			" of data file sizes is " << sum_of_data_file_sizes
2980 			<< " pages";
2981 
2982 		if (srv_force_recovery == 0
2983 		    && sum_of_data_file_sizes < tablespace_size_in_header) {
2984 			/* This is a fatal error, the tail of a tablespace is
2985 			missing */
2986 
2987 			ib::error()
2988 				<< "Cannot start InnoDB."
2989 				" The tail of the system tablespace is"
2990 				" missing. Have you edited"
2991 				" innodb_data_file_path in my.cnf in an"
2992 				" inappropriate way, removing"
2993 				" ibdata files from there?"
2994 				" You can set innodb_force_recovery=1"
2995 				" in my.cnf to force"
2996 				" a startup if you are trying"
2997 				" to recover a badly corrupt database.";
2998 
2999 			return(srv_init_abort(DB_ERROR));
3000 		}
3001 	}
3002 
3003 	if (!srv_read_only_mode
3004 	    && srv_sys_space.can_auto_extend_last_file()
3005 	    && sum_of_data_file_sizes < tablespace_size_in_header) {
3006 
3007 		ib::error() << "Tablespace size stored in header is "
3008 			<< tablespace_size_in_header << " pages, but the sum"
3009 			" of data file sizes is only "
3010 			<< sum_of_data_file_sizes << " pages";
3011 
3012 		if (srv_force_recovery == 0) {
3013 
3014 			ib::error()
3015 				<< "Cannot start InnoDB. The tail of"
3016 				" the system tablespace is"
3017 				" missing. Have you edited"
3018 				" innodb_data_file_path in my.cnf in an"
3019 				" InnoDB: inappropriate way, removing"
3020 				" ibdata files from there?"
3021 				" You can set innodb_force_recovery=1"
3022 				" in my.cnf to force"
3023 				" InnoDB: a startup if you are trying to"
3024 				" recover a badly corrupt database.";
3025 
3026 			return(srv_init_abort(DB_ERROR));
3027 		}
3028 	}
3029 
3030 	if (!srv_file_per_table && srv_pass_corrupt_table) {
3031 		ib::warn()
3032 			<< "The option innodb_file_per_table is disabled,"
3033 			   " so using the option innodb_pass_corrupt_table "
3034 			   "doesn't make sense.";
3035 	}
3036 
3037 	if (srv_print_verbose_log) {
3038 		ib::info()
3039 			<< "Percona XtraDB (http://www.percona.com) " INNODB_VERSION_STR
3040 			<< " started; log sequence number "
3041 			<< srv_start_lsn;
3042 	}
3043 
3044 	if (srv_force_recovery > 0) {
3045 		ib::info() << "!!! innodb_force_recovery is set to "
3046 			<< srv_force_recovery << " !!!";
3047 	}
3048 
3049 	if (srv_force_recovery == 0) {
3050 		/* In the insert buffer we may have even bigger tablespace
3051 		id's, because we may have dropped those tablespaces, but
3052 		insert buffer merge has not had time to clean the records from
3053 		the ibuf tree. */
3054 
3055 		ibuf_update_max_tablespace_id();
3056 	}
3057 
3058 	if (!srv_read_only_mode) {
3059 		if (create_new_db) {
3060 			srv_buffer_pool_load_at_startup = FALSE;
3061 		}
3062 
3063 		/* Create the buffer pool dump/load thread */
3064 		os_thread_create(buf_dump_thread, NULL, NULL);
3065 
3066 		/* Create the dict stats gathering thread */
3067 		os_thread_create(dict_stats_thread, NULL, NULL);
3068 
3069 		/* Create the thread that will optimize the FTS sub-system. */
3070 		fts_optimize_init();
3071 
3072 		fil_system_enter();
3073 		btr_scrub_init();
3074 		fil_crypt_threads_init();
3075 		fil_system_exit();
3076 
3077 		srv_start_state_set(SRV_START_STATE_STAT);
3078 	}
3079 
3080 	/* Create the buffer pool resize thread */
3081 	os_thread_create(buf_resize_thread, NULL, NULL);
3082 
3083 	srv_was_started = TRUE;
3084 	return(DB_SUCCESS);
3085 }
3086 
3087 #if 0
3088 /********************************************************************
3089 Sync all FTS cache before shutdown */
3090 static
3091 void
3092 srv_fts_close(void)
3093 /*===============*/
3094 {
3095 	dict_table_t*	table;
3096 
3097 	for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3098 	     table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
3099 		fts_t*          fts = table->fts;
3100 
3101 		if (fts != NULL) {
3102 			fts_sync_table(table);
3103 		}
3104 	}
3105 
3106 	for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
3107 	     table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
3108 		fts_t*          fts = table->fts;
3109 
3110 		if (fts != NULL) {
3111 			fts_sync_table(table);
3112 		}
3113 	}
3114 }
3115 #endif
3116 
3117 /****************************************************************//**
3118 Shuts down the InnoDB database.
3119 @return DB_SUCCESS or error code */
3120 dberr_t
innobase_shutdown_for_mysql(void)3121 innobase_shutdown_for_mysql(void)
3122 /*=============================*/
3123 {
3124 	if (!srv_was_started) {
3125 		if (srv_is_being_started) {
3126 			ib::warn() << "Shutting down an improperly started,"
3127 				" or created database!";
3128 		}
3129 
3130 		return(DB_SUCCESS);
3131 	}
3132 
3133 	if (!srv_read_only_mode) {
3134 		fts_optimize_shutdown();
3135 		dict_stats_shutdown();
3136 	}
3137 
3138 	/* 1. Flush the buffer pool to disk, write the current lsn to
3139 	the tablespace header(s), and copy all log data to archive.
3140 	The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
3141 	just free data structures after the shutdown. */
3142 
3143 	logs_empty_and_mark_files_at_shutdown();
3144 
3145 	if (srv_conc_get_active_threads() != 0) {
3146 		ib::warn() << "Query counter shows "
3147 			<< srv_conc_get_active_threads() << " queries still"
3148 			" inside InnoDB at shutdown";
3149 	}
3150 
3151 	/* 2. Make all threads created by InnoDB to exit */
3152 	srv_shutdown_all_bg_threads();
3153 
3154 
3155 	if (srv_monitor_file) {
3156 		fclose(srv_monitor_file);
3157 		srv_monitor_file = 0;
3158 		if (srv_monitor_file_name) {
3159 			unlink(srv_monitor_file_name);
3160 			ut_free(srv_monitor_file_name);
3161 		}
3162 	}
3163 
3164 	if (srv_dict_tmpfile) {
3165 		fclose(srv_dict_tmpfile);
3166 		srv_dict_tmpfile = 0;
3167 	}
3168 
3169 	if (srv_misc_tmpfile) {
3170 		fclose(srv_misc_tmpfile);
3171 		srv_misc_tmpfile = 0;
3172 	}
3173 
3174 	if (!srv_read_only_mode) {
3175 		dict_stats_thread_deinit();
3176 		/* Shutdown key rotation threads */
3177 		fil_crypt_threads_cleanup();
3178 		btr_scrub_cleanup();
3179 	}
3180 
3181 	/* This must be disabled before closing the buffer pool
3182 	and closing the data dictionary.  */
3183 	btr_search_disable(true);
3184 
3185 	ibuf_close();
3186 	log_online_shutdown();
3187 	log_shutdown();
3188 	trx_sys_file_format_close();
3189 	trx_sys_close();
3190 	lock_sys_close();
3191 
3192 	trx_pool_close();
3193 
3194 	/* We don't create these mutexes in RO mode because we don't create
3195 	the temp files that the cover. */
3196 	if (!srv_read_only_mode) {
3197 		mutex_free(&srv_monitor_file_mutex);
3198 		mutex_free(&srv_dict_tmpfile_mutex);
3199 		mutex_free(&srv_misc_tmpfile_mutex);
3200 	}
3201 
3202 	dict_close();
3203 	btr_search_sys_free();
3204 
3205 	/* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
3206 	them */
3207 	os_aio_free();
3208 	que_close();
3209 	row_mysql_close();
3210 	srv_free();
3211 	fil_close();
3212 
3213 	/* 4. Free all allocated memory */
3214 
3215 	pars_lexer_close();
3216 	log_mem_free();
3217 	buf_pool_free(srv_buf_pool_instances);
3218 
3219 	/* 6. Free the thread management resoruces. */
3220 	os_thread_free();
3221 
3222 	/* 7. Free the synchronisation infrastructure. */
3223 	sync_check_close();
3224 
3225 	if (dict_foreign_err_file) {
3226 		fclose(dict_foreign_err_file);
3227 	}
3228 
3229 	if (srv_print_verbose_log) {
3230 		ib::info() << "Shutdown completed; log sequence number "
3231 			<< srv_shutdown_lsn;
3232 	}
3233 
3234 	srv_was_started = FALSE;
3235 	srv_start_has_been_called = FALSE;
3236 
3237 	unlock_keyrings(NULL);
3238 
3239 	return(DB_SUCCESS);
3240 }
3241 #endif /* !UNIV_HOTBACKUP */
3242 
3243 
3244 /********************************************************************
3245 Signal all per-table background threads to shutdown, and wait for them to do
3246 so. */
3247 void
srv_shutdown_table_bg_threads(void)3248 srv_shutdown_table_bg_threads(void)
3249 /*===============================*/
3250 {
3251 	dict_table_t*	table;
3252 	dict_table_t*	first;
3253 	dict_table_t*	last = NULL;
3254 
3255 	mutex_enter(&dict_sys->mutex);
3256 
3257 	/* Signal all threads that they should stop. */
3258 	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3259 	first = table;
3260 	while (table) {
3261 		dict_table_t*	next;
3262 		fts_t*		fts = table->fts;
3263 
3264 		if (fts != NULL) {
3265 			fts_start_shutdown(table, fts);
3266 		}
3267 
3268 		next = UT_LIST_GET_NEXT(table_LRU, table);
3269 
3270 		if (!next) {
3271 			last = table;
3272 		}
3273 
3274 		table = next;
3275 	}
3276 
3277 	/* We must release dict_sys->mutex here; if we hold on to it in the
3278 	loop below, we will deadlock if any of the background threads try to
3279 	acquire it (for example, the FTS thread by calling que_eval_sql).
3280 
3281 	Releasing it here and going through dict_sys->table_LRU without
3282 	holding it is safe because:
3283 
3284 	 a) MySQL only starts the shutdown procedure after all client
3285 	 threads have been disconnected and no new ones are accepted, so no
3286 	 new tables are added or old ones dropped.
3287 
3288 	 b) Despite its name, the list is not LRU, and the order stays
3289 	 fixed.
3290 
3291 	To safeguard against the above assumptions ever changing, we store
3292 	the first and last items in the list above, and then check that
3293 	they've stayed the same below. */
3294 
3295 	mutex_exit(&dict_sys->mutex);
3296 
3297 	/* Wait for the threads of each table to stop. This is not inside
3298 	the above loop, because by signaling all the threads first we can
3299 	overlap their shutting down delays. */
3300 	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3301 	ut_a(first == table);
3302 	while (table) {
3303 		dict_table_t*	next;
3304 		fts_t*		fts = table->fts;
3305 
3306 		if (fts != NULL) {
3307 			fts_shutdown(table, fts);
3308 		}
3309 
3310 		next = UT_LIST_GET_NEXT(table_LRU, table);
3311 
3312 		if (table == last) {
3313 			ut_a(!next);
3314 		}
3315 
3316 		table = next;
3317 	}
3318 }
3319 
3320 /** Get the meta-data filename from the table name for a
3321 single-table tablespace.
3322 @param[in]	table		table object
3323 @param[out]	filename	filename
3324 @param[in]	max_len		filename max length */
3325 void
srv_get_meta_data_filename(dict_table_t * table,char * filename,ulint max_len)3326 srv_get_meta_data_filename(
3327 	dict_table_t*	table,
3328 	char*		filename,
3329 	ulint		max_len)
3330 {
3331 	ulint		len;
3332 	char*		path;
3333 
3334 	/* Make sure the data_dir_path is set. */
3335 	dict_get_and_save_data_dir_path(table, false);
3336 
3337 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
3338 		ut_a(table->data_dir_path);
3339 
3340 		path = fil_make_filepath(
3341 			table->data_dir_path, table->name.m_name, CFG, true);
3342 	} else {
3343 		path = fil_make_filepath(NULL, table->name.m_name, CFG, false);
3344 	}
3345 
3346 	ut_a(path);
3347 	len = ut_strlen(path);
3348 	ut_a(max_len >= len);
3349 
3350 	strcpy(filename, path);
3351 
3352 	ut_free(path);
3353 }
3354 
3355 /** Get the encryption-data filename from the table name for a
3356 single-table tablespace.
3357 @param[in]	table		table object
3358 @param[out]	filename	filename
3359 @param[in]	max_len		filename max length */
3360 void
srv_get_encryption_data_filename(dict_table_t * table,char * filename,ulint max_len)3361 srv_get_encryption_data_filename(
3362 	dict_table_t*	table,
3363 	char*		filename,
3364 	ulint		max_len)
3365 {
3366 	ulint		len;
3367 	char*		path;
3368 
3369 	/* Make sure the data_dir_path is set. */
3370 	dict_get_and_save_data_dir_path(table, false);
3371 
3372 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
3373 		ut_a(table->data_dir_path);
3374 
3375 		path = fil_make_filepath(
3376 			table->data_dir_path, table->name.m_name, CFP, true);
3377 	} else {
3378 		path = fil_make_filepath(NULL, table->name.m_name, CFP, false);
3379 	}
3380 
3381 	ut_a(path);
3382 	len = ut_strlen(path);
3383 	ut_a(max_len >= len);
3384 
3385 	strcpy(filename, path);
3386 
3387 	ut_free(path);
3388 }
3389