1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved.
4 Copyright (c) 2008, Google Inc.
5 Copyright (c) 2009, Percona Inc.
6 
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12 
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19 
20 This program is free software; you can redistribute it and/or modify
21 it under the terms of the GNU General Public License, version 2.0,
22 as published by the Free Software Foundation.
23 
24 This program is also distributed with certain software (including
25 but not limited to OpenSSL) that is licensed under separate terms,
26 as designated in a particular file or component or in included license
27 documentation.  The authors of MySQL hereby grant you an additional
28 permission to link the program and your derivative works with the
29 separately licensed software that they have included with MySQL.
30 
31 This program is distributed in the hope that it will be useful,
32 but WITHOUT ANY WARRANTY; without even the implied warranty of
33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34 GNU General Public License, version 2.0, for more details.
35 
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
39 
40 *****************************************************************************/
41 
42 /********************************************************************//**
43 @file srv/srv0start.cc
44 Starts the InnoDB database server
45 
46 Created 2/16/1996 Heikki Tuuri
47 *************************************************************************/
48 
49 #include "my_global.h"
50 
51 #include "ha_prototypes.h"
52 
53 #include "mysqld.h"
54 #include "mysql/psi/mysql_stage.h"
55 #include "mysql/psi/psi.h"
56 
57 #include "my_dir.h"
58 #include <cstdio>
59 
60 #include "row0ftsort.h"
61 #include "ut0mem.h"
62 #include "mem0mem.h"
63 #include "data0data.h"
64 #include "data0type.h"
65 #include "dict0dict.h"
66 #include "buf0buf.h"
67 #include "buf0dump.h"
68 #include "os0file.h"
69 #include "os0thread.h"
70 #include "fil0fil.h"
71 #include "fsp0fsp.h"
72 #include "rem0rec.h"
73 #include "mtr0mtr.h"
74 #include "log0log.h"
75 #include "log0recv.h"
76 #include "page0page.h"
77 #include "page0cur.h"
78 #include "trx0trx.h"
79 #include "trx0sys.h"
80 #include "btr0btr.h"
81 #include "btr0cur.h"
82 #include "rem0rec.h"
83 #include "ibuf0ibuf.h"
84 #include "srv0start.h"
85 #include "srv0srv.h"
86 #include "fsp0sysspace.h"
87 #include "row0trunc.h"
88 #include "xb0xb.h"
89 #ifndef UNIV_HOTBACKUP
90 # include "trx0rseg.h"
91 # include "os0proc.h"
92 # include "buf0flu.h"
93 # include "buf0rea.h"
94 # include "dict0boot.h"
95 # include "dict0load.h"
96 # include "dict0stats_bg.h"
97 # include "que0que.h"
98 # include "usr0sess.h"
99 # include "lock0lock.h"
100 # include "trx0roll.h"
101 # include "trx0purge.h"
102 # include "lock0lock.h"
103 # include "pars0pars.h"
104 # include "btr0sea.h"
105 # include "rem0cmp.h"
106 # include "dict0crea.h"
107 # include "row0ins.h"
108 # include "row0sel.h"
109 # include "row0upd.h"
110 # include "row0row.h"
111 # include "row0mysql.h"
112 # include "row0trunc.h"
113 # include "btr0pcur.h"
114 # include "os0event.h"
115 # include "zlib.h"
116 # include "ut0crc32.h"
117 # include "ut0new.h"
118 
119 #ifdef HAVE_LZO1X
120 #include <lzo/lzo1x.h>
121 extern bool srv_lzo_disabled;
122 #endif /* HAVE_LZO1X */
123 
124 /** Log sequence number immediately after startup */
125 lsn_t	srv_start_lsn;
126 /** Log sequence number at shutdown */
127 lsn_t	srv_shutdown_lsn;
128 
129 /** TRUE if a raw partition is in use */
130 ibool	srv_start_raw_disk_in_use = FALSE;
131 
132 /** UNDO tablespaces starts with space id. */
133 ulint	srv_undo_space_id_start;
134 
135 /** Number of IO threads to use */
136 ulint	srv_n_file_io_threads = 0;
137 
138 /** TRUE if the server is being started, before rolling back any
139 incomplete transactions */
140 bool	srv_startup_is_before_trx_rollback_phase = false;
141 /** TRUE if the server is being started */
142 bool	srv_is_being_started = false;
143 /** TRUE if SYS_TABLESPACES is available for lookups */
144 bool	srv_sys_tablespaces_open = false;
145 /** TRUE if the server was successfully started */
146 ibool	srv_was_started = FALSE;
147 /** TRUE if innobase_start_or_create_for_mysql() has been called */
148 static ibool	srv_start_has_been_called = FALSE;
149 
150 /** Bit flags for tracking background thread creation. They are used to
151 determine which threads need to be stopped if we need to abort during
152 the initialisation step. */
153 enum srv_start_state_t {
154 	SRV_START_STATE_NONE = 0,		/*!< No thread started */
155 	SRV_START_STATE_LOCK_SYS = 1,		/*!< Started lock-timeout
156 						thread. */
157 	SRV_START_STATE_IO = 2,			/*!< Started IO threads */
158 	SRV_START_STATE_MONITOR = 4,		/*!< Started montior thread */
159 	SRV_START_STATE_MASTER = 8,		/*!< Started master threadd. */
160 	SRV_START_STATE_PURGE = 16,		/*!< Started purge thread(s) */
161 	SRV_START_STATE_STAT = 32		/*!< Started bufdump + dict stat
162 						and FTS optimize thread. */
163 };
164 
165 /** Track server thrd starting phases */
166 static ulint	srv_start_state;
167 
168 /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
169 SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
170 enum srv_shutdown_t	srv_shutdown_state = SRV_SHUTDOWN_NONE;
171 
172 /** Files comprising the system tablespace */
173 pfs_os_file_t	files[1000];
174 
175 /** io_handler_thread parameters for thread identification */
176 static ulint		n[SRV_MAX_N_IO_THREADS + 6];
177 /** io_handler_thread identifiers, 32 is the maximum number of purge threads  */
178 static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32];
179 
180 /** Name of srv_monitor_file */
181 static char*	srv_monitor_file_name;
182 #endif /* !UNIV_HOTBACKUP */
183 
184 /** Minimum expected tablespace size. (10M) */
185 static const ulint MIN_EXPECTED_TABLESPACE_SIZE = 5 * 1024 * 1024;
186 
187 /** */
188 #define SRV_MAX_N_PENDING_SYNC_IOS	100
189 
190 #ifdef UNIV_PFS_THREAD
191 /* Keys to register InnoDB threads with performance schema */
192 mysql_pfs_key_t	buf_dump_thread_key;
193 mysql_pfs_key_t	dict_stats_thread_key;
194 mysql_pfs_key_t	io_handler_thread_key;
195 mysql_pfs_key_t	io_ibuf_thread_key;
196 mysql_pfs_key_t	io_log_thread_key;
197 mysql_pfs_key_t	io_read_thread_key;
198 mysql_pfs_key_t	io_write_thread_key;
199 mysql_pfs_key_t	srv_error_monitor_thread_key;
200 mysql_pfs_key_t	srv_lock_timeout_thread_key;
201 mysql_pfs_key_t	srv_master_thread_key;
202 mysql_pfs_key_t	srv_monitor_thread_key;
203 mysql_pfs_key_t	srv_purge_thread_key;
204 mysql_pfs_key_t	srv_worker_thread_key;
205 #endif /* UNIV_PFS_THREAD */
206 
207 #ifdef HAVE_PSI_STAGE_INTERFACE
208 /** Array of all InnoDB stage events for monitoring activities via
209 performance schema. */
210 static PSI_stage_info*	srv_stages[] =
211 {
212 	&srv_stage_alter_table_end,
213 	&srv_stage_alter_table_flush,
214 	&srv_stage_alter_table_insert,
215 	&srv_stage_alter_table_log_index,
216 	&srv_stage_alter_table_log_table,
217 	&srv_stage_alter_table_merge_sort,
218 	&srv_stage_alter_table_read_pk_internal_sort,
219 	&srv_stage_buffer_pool_load,
220 };
221 #endif /* HAVE_PSI_STAGE_INTERFACE */
222 
223 /*********************************************************************//**
224 Check if a file can be opened in read-write mode.
225 @return true if it doesn't exist or can be opened in rw mode. */
226 static
227 bool
srv_file_check_mode(const char * name)228 srv_file_check_mode(
229 /*================*/
230 	const char*	name)		/*!< in: filename to check */
231 {
232 	os_file_stat_t	stat;
233 
234 	memset(&stat, 0x0, sizeof(stat));
235 
236 	dberr_t		err = os_file_get_status(
237 		name, &stat, true, srv_read_only_mode);
238 
239 	if (err == DB_FAIL) {
240 		ib::error() << "os_file_get_status() failed on '" << name
241 			<< "'. Can't determine file permissions.";
242 		return(false);
243 
244 	} else if (err == DB_SUCCESS) {
245 
246 		/* Note: stat.rw_perm is only valid of files */
247 
248 		if (stat.type == OS_FILE_TYPE_FILE) {
249 
250 			if (!stat.rw_perm) {
251 				const char*	mode = srv_read_only_mode
252 					? "read" : "read-write";
253 				ib::error() << name << " can't be opened in "
254 					<< mode << " mode.";
255 				return(false);
256 			}
257 		} else {
258 			/* Not a regular file, bail out. */
259 			ib::error() << "'" << name << "' not a regular file.";
260 
261 			return(false);
262 		}
263 	} else {
264 
265 		/* This is OK. If the file create fails on RO media, there
266 		is nothing we can do. */
267 
268 		ut_a(err == DB_NOT_FOUND);
269 	}
270 
271 	return(true);
272 }
273 
274 #ifndef UNIV_HOTBACKUP
275 /********************************************************************//**
276 I/o-handler thread function.
277 @return OS_THREAD_DUMMY_RETURN */
278 extern "C"
279 os_thread_ret_t
DECLARE_THREAD(io_handler_thread)280 DECLARE_THREAD(io_handler_thread)(
281 /*==============================*/
282 	void*	arg)	/*!< in: pointer to the number of the segment in
283 			the aio array */
284 {
285 	ulint	segment;
286 
287 	segment = *((ulint*) arg);
288 
289 #ifdef UNIV_DEBUG_THREAD_CREATION
290 	ib::info() << "Io handler thread " << segment << " starts, id "
291 		<< os_thread_pf(os_thread_get_curr_id());
292 #endif
293 
294 #ifdef UNIV_PFS_THREAD
295 	/* For read only mode, we don't need ibuf and log I/O thread.
296 	Please see innobase_start_or_create_for_mysql() */
297 	ulint   start = (srv_read_only_mode) ? 0 : 2;
298 
299 	if (segment < start) {
300 		if (segment == 0) {
301 			pfs_register_thread(io_ibuf_thread_key);
302 		} else {
303 			ut_ad(segment == 1);
304 			pfs_register_thread(io_log_thread_key);
305 		}
306 	} else if (segment >= start
307 		   && segment < (start + srv_n_read_io_threads)) {
308 			pfs_register_thread(io_read_thread_key);
309 
310 	} else if (segment >= (start + srv_n_read_io_threads)
311 		   && segment < (start + srv_n_read_io_threads
312 				 + srv_n_write_io_threads)) {
313 		pfs_register_thread(io_write_thread_key);
314 
315 	} else {
316 		pfs_register_thread(io_handler_thread_key);
317 	}
318 #endif /* UNIV_PFS_THREAD */
319 
320 	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS
321 	       || buf_page_cleaner_is_active
322 	       || !os_aio_all_slots_free()) {
323 		fil_aio_wait(segment);
324 	}
325 
326 	/* We count the number of threads in os_thread_exit(). A created
327 	thread should always use that to exit and not use return() to exit.
328 	The thread actually never comes here because it is exited in an
329 	os_event_wait(). */
330 
331 	os_thread_exit();
332 
333 	OS_THREAD_DUMMY_RETURN;
334 }
335 #endif /* !UNIV_HOTBACKUP */
336 
337 #ifndef UNIV_HOTBACKUP
338 /*********************************************************************//**
339 Creates a log file.
340 @return DB_SUCCESS or error code */
341 static MY_ATTRIBUTE((nonnull, warn_unused_result))
342 dberr_t
create_log_file(pfs_os_file_t * file,const char * name)343 create_log_file(
344 /*============*/
345 	pfs_os_file_t*	file,	/*!< out: file handle */
346 	const char*	name)	/*!< in: log file name */
347 {
348 	bool		ret;
349 
350 	*file = os_file_create(
351 		innodb_log_file_key, name,
352 		OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
353 		OS_LOG_FILE, srv_read_only_mode, &ret);
354 
355 	if (!ret) {
356 		ib::error() << "Cannot create " << name;
357 		return(DB_ERROR);
358 	}
359 
360 	ib::info() << "Setting log file " << name << " size to "
361 		<< (srv_log_file_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
362 		<< " MB";
363 
364 	ret = os_file_set_size(name, *file,
365 			       (os_offset_t) srv_log_file_size
366 			       << UNIV_PAGE_SIZE_SHIFT,
367 			       srv_read_only_mode);
368 	if (!ret) {
369 		ib::error() << "Cannot set log file " << name << " to size "
370 			<< (srv_log_file_size >> (20 - UNIV_PAGE_SIZE_SHIFT))
371 			<< " MB";
372 		return(DB_ERROR);
373 	}
374 
375 	ret = os_file_close(*file);
376 	ut_a(ret);
377 
378 	return(DB_SUCCESS);
379 }
380 
381 /** Initial number of the first redo log file */
382 #define INIT_LOG_FILE0	(SRV_N_LOG_FILES_MAX + 1)
383 
384 /*********************************************************************//**
385 Creates all log files.
386 @return DB_SUCCESS or error code */
387 static
388 dberr_t
create_log_files(char * logfilename,size_t dirnamelen,lsn_t lsn,char * & logfile0)389 create_log_files(
390 /*=============*/
391 	char*	logfilename,	/*!< in/out: buffer for log file name */
392 	size_t	dirnamelen,	/*!< in: length of the directory path */
393 	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
394 	char*&	logfile0)	/*!< out: name of the first log file */
395 {
396 	dberr_t err;
397 
398 	if (srv_read_only_mode) {
399 		ib::error() << "Cannot create log files in read-only mode";
400 		return(DB_READ_ONLY);
401 	}
402 
403 	/* Remove any old log files. */
404 	for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
405 		sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
406 
407 		/* Ignore errors about non-existent files or files
408 		that cannot be removed. The create_log_file() will
409 		return an error when the file exists. */
410 #ifdef _WIN32
411 		DeleteFile((LPCTSTR) logfilename);
412 #else
413 		unlink(logfilename);
414 #endif
415 		/* Crashing after deleting the first
416 		file should be recoverable. The buffer
417 		pool was clean, and we can simply create
418 		all log files from the scratch. */
419 		RECOVERY_CRASH(6);
420 	}
421 
422 	ut_ad(!buf_pool_check_no_pending_io());
423 
424 	RECOVERY_CRASH(7);
425 
426 	for (unsigned i = 0; i < srv_n_log_files; i++) {
427 		sprintf(logfilename + dirnamelen,
428 			"ib_logfile%u", i ? i : INIT_LOG_FILE0);
429 
430 		err = create_log_file(&files[i], logfilename);
431 
432 		if (err != DB_SUCCESS) {
433 			return(err);
434 		}
435 	}
436 
437 	RECOVERY_CRASH(8);
438 
439 	/* We did not create the first log file initially as
440 	ib_logfile0, so that crash recovery cannot find it until it
441 	has been completed and renamed. */
442 	sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
443 
444 	/* Disable the doublewrite buffer for log files, not required */
445 
446 	fil_space_t*	log_space = fil_space_create(
447 		"innodb_redo_log", SRV_LOG_SPACE_FIRST_ID,
448 		fsp_flags_set_page_size(0, univ_page_size),
449 		FIL_TYPE_LOG);
450 	ut_a(fil_validate());
451 	ut_a(log_space != NULL);
452 
453 	logfile0 = fil_node_create(
454 		logfilename, (ulint) srv_log_file_size,
455 		log_space, false, false);
456 	ut_a(logfile0);
457 
458 	for (unsigned i = 1; i < srv_n_log_files; i++) {
459 
460 		sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
461 
462 		if (!fil_node_create(logfilename,
463 				     (ulint) srv_log_file_size,
464 				     log_space, false, false)) {
465 
466 			ib::error()
467 				<< "Cannot create file node for log file "
468 				<< logfilename;
469 
470 			return(DB_ERROR);
471 		}
472 	}
473 
474 	if (!log_group_init(0, srv_n_log_files,
475 			    srv_log_file_size * UNIV_PAGE_SIZE,
476 			    SRV_LOG_SPACE_FIRST_ID)) {
477 		return(DB_ERROR);
478 	}
479 
480 	fil_open_log_and_system_tablespace_files();
481 
482 	/* Create a log checkpoint. */
483 	log_mutex_enter();
484 	ut_d(recv_no_log_write = false);
485 	recv_reset_logs(lsn);
486 	log_mutex_exit();
487 
488 	return(DB_SUCCESS);
489 }
490 
491 /*********************************************************************//**
492 Renames the first log file. */
493 static
494 void
create_log_files_rename(char * logfilename,size_t dirnamelen,lsn_t lsn,char * logfile0)495 create_log_files_rename(
496 /*====================*/
497 	char*	logfilename,	/*!< in/out: buffer for log file name */
498 	size_t	dirnamelen,	/*!< in: length of the directory path */
499 	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
500 	char*	logfile0)	/*!< in/out: name of the first log file */
501 {
502 	/* If innodb_flush_method=O_DSYNC,
503 	we need to explicitly flush the log buffers. */
504 	fil_flush(SRV_LOG_SPACE_FIRST_ID);
505 	/* Close the log files, so that we can rename
506 	the first one. */
507 	fil_close_log_files(false);
508 
509 	/* Rename the first log file, now that a log
510 	checkpoint has been created. */
511 	sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
512 
513 	RECOVERY_CRASH(9);
514 
515 	ib::info() << "Renaming log file " << logfile0 << " to "
516 		<< logfilename;
517 
518 	log_mutex_enter();
519 	ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
520 	bool success = os_file_rename(
521 		innodb_log_file_key, logfile0, logfilename);
522 	ut_a(success);
523 
524 	RECOVERY_CRASH(10);
525 
526 	/* Replace the first file with ib_logfile0. */
527 	strcpy(logfile0, logfilename);
528 	log_mutex_exit();
529 
530 	fil_open_log_and_system_tablespace_files();
531 
532 	ib::warn() << "New log files created, LSN=" << lsn;
533 }
534 
535 /*********************************************************************//**
536 Opens a log file.
537 @return DB_SUCCESS or error code */
538 static MY_ATTRIBUTE((nonnull, warn_unused_result))
539 dberr_t
open_log_file(pfs_os_file_t * file,const char * name,os_offset_t * size)540 open_log_file(
541 /*==========*/
542 	pfs_os_file_t*	file,	/*!< out: file handle */
543 	const char*	name,	/*!< in: log file name */
544 	os_offset_t*	size)	/*!< out: file size */
545 {
546 	bool	ret;
547 
548 	*file = os_file_create(innodb_log_file_key, name,
549 			       OS_FILE_OPEN, OS_FILE_AIO,
550 			       OS_LOG_FILE, srv_read_only_mode, &ret);
551 	if (!ret) {
552 		ib::error() << "Unable to open '" << name << "'";
553 		return(DB_ERROR);
554 	}
555 
556 	*size = os_file_get_size(*file);
557 
558 	ret = os_file_close(*file);
559 	ut_a(ret);
560 	return(DB_SUCCESS);
561 }
562 
563 /*********************************************************************//**
564 Create undo tablespace.
565 @return DB_SUCCESS or error code */
566 static
567 dberr_t
srv_undo_tablespace_create(const char * name,ulint size)568 srv_undo_tablespace_create(
569 /*=======================*/
570 	const char*	name,		/*!< in: tablespace name */
571 	ulint		size)		/*!< in: tablespace size in pages */
572 {
573 	pfs_os_file_t	fh;
574 	bool		ret;
575 	dberr_t		err = DB_SUCCESS;
576 
577 	os_file_create_subdirs_if_needed(name);
578 
579 	fh = os_file_create(
580 		innodb_data_file_key,
581 		name,
582 		srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
583 		OS_FILE_NORMAL, OS_DATA_FILE, srv_read_only_mode, &ret);
584 
585 	if (srv_read_only_mode && ret) {
586 
587 		ib::info() << name << " opened in read-only mode";
588 
589 	} else if (ret == FALSE) {
590 		if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS) {
591 
592 			ib::error() << "Can't create UNDO tablespace "
593 				<< name;
594 		}
595 		err = DB_ERROR;
596 	} else {
597 		ut_a(!srv_read_only_mode);
598 
599 		/* We created the data file and now write it full of zeros */
600 
601 		ib::info() << "Data file " << name << " did not exist: new to"
602 			" be created";
603 
604 		ib::info() << "Setting file " << name << " size to "
605 			<< (size >> (20 - UNIV_PAGE_SIZE_SHIFT)) << " MB";
606 
607 		ib::info() << "Database physically writes the file full: "
608 			<< "wait...";
609 
610 		ret = os_file_set_size(
611 			name, fh, size << UNIV_PAGE_SIZE_SHIFT,
612 			srv_read_only_mode);
613 
614 		if (!ret) {
615 			ib::info() << "Error in creating " << name
616 				<< ": probably out of disk space";
617 
618 			err = DB_ERROR;
619 		}
620 
621 		os_file_close(fh);
622 	}
623 
624 	return(err);
625 }
626 /*********************************************************************//**
627 Open an undo tablespace.
628 @return DB_SUCCESS or error code */
629 static
630 dberr_t
srv_undo_tablespace_open(const char * name,ulint space_id)631 srv_undo_tablespace_open(
632 /*=====================*/
633 	const char*	name,		/*!< in: tablespace file name */
634 	ulint		space_id)	/*!< in: tablespace id */
635 {
636 	pfs_os_file_t	fh;
637 	bool		ret;
638 	ulint		flags;
639 	dberr_t		err	= DB_ERROR;
640 	char		undo_name[sizeof "innodb_undo000"];
641 
642 	ut_snprintf(undo_name, sizeof(undo_name),
643 		   "innodb_undo%03u", static_cast<unsigned>(space_id));
644 
645 	if (!srv_file_check_mode(name)) {
646 		ib::error() << "UNDO tablespaces must be " <<
647 			(srv_read_only_mode ? "writable" : "readable") << "!";
648 
649 		return(DB_ERROR);
650 	}
651 
652 	fh = os_file_create(
653 		innodb_data_file_key, name,
654 		OS_FILE_OPEN_RETRY
655 		| OS_FILE_ON_ERROR_NO_EXIT
656 		| OS_FILE_ON_ERROR_SILENT,
657 		OS_FILE_NORMAL,
658 		OS_DATA_FILE,
659 		srv_read_only_mode,
660 		&ret);
661 
662 	/* If the file open was successful then load the tablespace. */
663 
664 	if (ret) {
665 		os_offset_t	size;
666 		fil_space_t*	space;
667 
668 		bool	atomic_write;
669 
670 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
671 		if (!srv_use_doublewrite_buf) {
672 			atomic_write = fil_fusionio_enable_atomic_write(fh);
673 		} else {
674 			atomic_write = false;
675 		}
676 #else
677 		atomic_write = false;
678 #endif /* !NO_FALLOCATE && UNIV_LINUX */
679 
680 		size = os_file_get_size(fh);
681 		ut_a(size != (os_offset_t) -1);
682 
683 		ret = os_file_close(fh);
684 		ut_a(ret);
685 
686 		/* Load the tablespace into InnoDB's internal
687 		data structures. */
688 
689 		/* We set the biggest space id to the undo tablespace
690 		because InnoDB hasn't opened any other tablespace apart
691 		from the system tablespace. */
692 
693 		fil_set_max_space_id_if_bigger(space_id);
694 
695 		/* Set the compressed page size to 0 (non-compressed) */
696 		flags = fsp_flags_init(
697 			univ_page_size, false, false, false, false);
698 		space = fil_space_create(
699 			undo_name, space_id, flags, FIL_TYPE_TABLESPACE);
700 
701 		ut_a(fil_validate());
702 		ut_a(space);
703 
704 		os_offset_t	n_pages = size / UNIV_PAGE_SIZE;
705 
706 		/* On 32-bit platforms, ulint is 32 bits and os_offset_t
707 		is 64 bits. It is OK to cast the n_pages to ulint because
708 		the unit has been scaled to pages and page number is always
709 		32 bits. */
710 		if (fil_node_create(
711 			name, (ulint) n_pages, space, false, atomic_write)) {
712 
713 			err = DB_SUCCESS;
714 		}
715 	}
716 
717 	return(err);
718 }
719 
720 /** Check if undo tablespaces and redo log files exist before creating a
721 new system tablespace
722 @retval DB_SUCCESS  if all undo and redo logs are not found
723 @retval DB_ERROR    if any undo and redo logs are found */
724 static
725 dberr_t
srv_check_undo_redo_logs_exists()726 srv_check_undo_redo_logs_exists()
727 {
728 	bool		ret;
729 	pfs_os_file_t	fh;
730 	char	name[OS_FILE_MAX_PATH];
731 
732 	/* Check if any undo tablespaces exist */
733 	for (ulint i = 1; i <= srv_undo_tablespaces; ++i) {
734 
735 		ut_snprintf(
736 			name, sizeof(name),
737 			"%s%cundo%03lu",
738 			srv_undo_dir, OS_PATH_SEPARATOR,
739 			i);
740 
741 		fh = os_file_create(
742 			innodb_data_file_key, name,
743 			OS_FILE_OPEN_RETRY
744 			| OS_FILE_ON_ERROR_NO_EXIT
745 			| OS_FILE_ON_ERROR_SILENT,
746 			OS_FILE_NORMAL,
747 			OS_DATA_FILE,
748 			srv_read_only_mode,
749 			&ret);
750 
751 		if (ret) {
752 			os_file_close(fh);
753 			ib::error()
754 				<< "undo tablespace '" << name << "' exists."
755 				" Creating system tablespace with existing undo"
756 				" tablespaces is not supported. Please delete"
757 				" all undo tablespaces before creating new"
758 				" system tablespace.";
759 			return(DB_ERROR);
760 		}
761 	}
762 
763 	/* Check if any redo log files exist */
764 	char	logfilename[OS_FILE_MAX_PATH];
765 	size_t dirnamelen = strlen(srv_log_group_home_dir);
766 	memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
767 
768 	for (unsigned i = 0; i < srv_n_log_files; i++) {
769 		sprintf(logfilename + dirnamelen,
770 			"ib_logfile%u", i);
771 
772 		fh = os_file_create(
773 			innodb_log_file_key, logfilename,
774 			OS_FILE_OPEN_RETRY
775 			| OS_FILE_ON_ERROR_NO_EXIT
776 			| OS_FILE_ON_ERROR_SILENT,
777 			OS_FILE_NORMAL,
778 			OS_LOG_FILE,
779 			srv_read_only_mode,
780 			&ret);
781 
782 		if (ret) {
783 			os_file_close(fh);
784 			ib::error() << "redo log file '" << logfilename
785 				<< "' exists. Creating system tablespace with"
786 				" existing redo log files is not recommended."
787 				" Please delete all redo log files before"
788 				" creating new system tablespace.";
789 			return(DB_ERROR);
790 		}
791 	}
792 
793 	return(DB_SUCCESS);
794 }
795 
796 undo::undo_spaces_t	undo::Truncate::s_fix_up_spaces;
797 
798 /********************************************************************
799 Opens the configured number of undo tablespaces.
800 @return DB_SUCCESS or error code */
801 dberr_t
srv_undo_tablespaces_init(bool create_new_db,bool backup_mode,const ulint n_conf_tablespaces,ulint * n_opened)802 srv_undo_tablespaces_init(
803 /*======================*/
804 	bool		create_new_db,		/*!< in: TRUE if new db being
805 						created */
806 	bool		backup_mode,		/*!< in: TRUE disables reading
807 						the system tablespace (used in
808 						XtraBackup), FALSE is passed on
809 						recovery. */
810 	const ulint	n_conf_tablespaces,	/*!< in: configured undo
811 						tablespaces */
812 	ulint*		n_opened)		/*!< out: number of UNDO
813 						tablespaces successfully
814 						discovered and opened */
815 {
816 	ulint			i;
817 	dberr_t			err = DB_SUCCESS;
818 	ulint			prev_space_id = 0;
819 	ulint			n_undo_tablespaces;
820 	ulint			undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
821 
822 	*n_opened = 0;
823 
824 	ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS);
825 	ut_a(!create_new_db || !backup_mode);
826 
827 	memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
828 
829 	/* Create the undo spaces only if we are creating a new
830 	instance. We don't allow creating of new undo tablespaces
831 	in an existing instance (yet).  This restriction exists because
832 	we check in several places for SYSTEM tablespaces to be less than
833 	the min of user defined tablespace ids. Once we implement saving
834 	the location of the undo tablespaces and their space ids this
835 	restriction will/should be lifted. */
836 
837 	for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) {
838 		char		name[OS_FILE_MAX_PATH];
839 		ulint		space_id;
840 
841 		DBUG_EXECUTE_IF("innodb_undo_upgrade",
842 			if (i == 0) {
843 				dict_hdr_get_new_id(
844 					NULL, NULL, &space_id, NULL, true);
845 				dict_hdr_get_new_id(
846 					NULL, NULL, &space_id, NULL, true);
847 				dict_hdr_get_new_id(
848 					NULL, NULL, &space_id, NULL, true);
849 			});
850 
851 		dict_hdr_get_new_id(NULL, NULL, &space_id, NULL, true);
852 
853 		fil_set_max_space_id_if_bigger(space_id);
854 
855 		if (i == 0) {
856 			srv_undo_space_id_start = space_id;
857 			prev_space_id = srv_undo_space_id_start - 1;
858 		}
859 
860 		ut_snprintf(
861 			name, sizeof(name),
862 			"%s%cundo%03lu",
863 			srv_undo_dir, OS_PATH_SEPARATOR, space_id);
864 
865 		undo_tablespace_ids[i] = space_id;
866 
867 		err = srv_undo_tablespace_create(
868 			name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
869 
870 		if (err != DB_SUCCESS) {
871 			ib::error() << "Could not create undo tablespace '"
872 				<< name << "'.";
873 			return(err);
874 		}
875 	}
876 
877 	/* Get the tablespace ids of all the undo segments excluding the system
878 	tablespace (0). If we are creating a new instance then
879 	we build the undo_tablespace_ids ourselves since they don't
880 	already exist. If we are in the backup mode, don't read the trx header,
881 	we just need to add all available undo tablespaces to fil_system. */
882 
883 	if (!create_new_db && !backup_mode) {
884 		n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces(
885 			undo_tablespace_ids);
886 
887 		srv_undo_tablespaces_active = n_undo_tablespaces;
888 
889 		if (srv_undo_tablespaces_active != 0) {
890 			srv_undo_space_id_start = undo_tablespace_ids[0];
891 			prev_space_id = srv_undo_space_id_start - 1;
892 		}
893 
894 		/* Check if any of the UNDO tablespace needs fix-up because
895 		server crashed while truncate was active on UNDO tablespace.*/
896 		for (i = 0; i < n_undo_tablespaces; ++i) {
897 
898 			undo::Truncate	undo_trunc;
899 
900 			if (undo_trunc.needs_fix_up(undo_tablespace_ids[i])) {
901 
902 				char	name[OS_FILE_MAX_PATH];
903 
904 				ut_snprintf(name, sizeof(name),
905 					    "%s%cundo%03lu",
906 					    srv_undo_dir, OS_PATH_SEPARATOR,
907 					    undo_tablespace_ids[i]);
908 
909 				os_file_delete(innodb_data_file_key, name);
910 
911 				err = srv_undo_tablespace_create(
912 					name,
913 					SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
914 
915 				if (err != DB_SUCCESS) {
916 					ib::error() << "Could not fix-up undo "
917 						" tablespace truncate '"
918 						<< name << "'.";
919 					return(err);
920 				}
921 
922 				undo::Truncate::s_fix_up_spaces.push_back(
923 					undo_tablespace_ids[i]);
924 			}
925 		}
926 	} else {
927 		n_undo_tablespaces = n_conf_tablespaces;
928 
929 		undo_tablespace_ids[n_conf_tablespaces] = ULINT_UNDEFINED;
930 	}
931 	if (backup_mode) {
932 		// Locate all undo files in the srv_undo_dir and
933 		// fill corresponding undo_tablespace_ids.
934 		int j = 0;
935 		MY_DIR* dir = my_dir(srv_undo_dir, MY_WANT_STAT);
936 		ut_a(dir);
937 		for (uint i = 0; i < dir->number_off_files; ++i) {
938 			const fileinfo& file = dir->dir_entry[i];
939 			ulint id = 0;
940 			int pos;
941 			if (MY_S_ISREG(file.mystat->st_mode) &&
942 			    sscanf(file.name, "undo%03lu%n", &id, &pos) == 1 &&
943 			    pos == (int) strlen(file.name)) {
944 				undo_tablespace_ids[j++] = id;
945 			}
946 		}
947 		my_dirend(dir);
948 		if (j > 0) {
949 			srv_undo_space_id_start = undo_tablespace_ids[0];
950 			prev_space_id = srv_undo_space_id_start - 1;
951 			n_undo_tablespaces = j;
952 			undo_tablespace_ids[j] = ULINT_UNDEFINED;
953 		}
954 	}
955 
956 	/* Open all the undo tablespaces that are currently in use. If we
957 	fail to open any of these it is a fatal error. The tablespace ids
958 	should be contiguous. It is a fatal error because they are required
959 	for recovery and are referenced by the UNDO logs (a.k.a RBS). */
960 
961 	for (i = 0; i < n_undo_tablespaces; ++i) {
962 		char	name[OS_FILE_MAX_PATH];
963 
964 		ut_snprintf(
965 			name, sizeof(name),
966 			"%s%cundo%03lu",
967 			srv_undo_dir, OS_PATH_SEPARATOR,
968 			undo_tablespace_ids[i]);
969 
970 		/* Should be no gaps in undo tablespace ids. */
971 		ut_a(prev_space_id + 1 == undo_tablespace_ids[i]);
972 
973 		/* The system space id should not be in this array. */
974 		ut_a(undo_tablespace_ids[i] != 0);
975 		ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED);
976 
977 		fil_set_max_space_id_if_bigger(undo_tablespace_ids[i]);
978 
979 		err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
980 
981 		if (err != DB_SUCCESS) {
982 			ib::error() << "Unable to open undo tablespace '"
983 				<< name << "'.";
984 			return(err);
985 		}
986 
987 		prev_space_id = undo_tablespace_ids[i];
988 
989 		++*n_opened;
990 	}
991 
992 	/* Open any extra unused undo tablespaces. These must be contiguous.
993 	We stop at the first failure. These are undo tablespaces that are
994 	not in use and therefore not required by recovery. We only check
995 	that there are no gaps. */
996 
997 	for (i = prev_space_id + 1; i < TRX_SYS_N_RSEGS; ++i) {
998 		char	name[OS_FILE_MAX_PATH];
999 
1000 		ut_snprintf(
1001 			name, sizeof(name),
1002 			"%s%cundo%03lu", srv_undo_dir, OS_PATH_SEPARATOR, i);
1003 
1004 		err = srv_undo_tablespace_open(name, i);
1005 
1006 		if (err != DB_SUCCESS) {
1007 			break;
1008 		}
1009 
1010 		/** Note the first undo tablespace id in case of
1011 		no active undo tablespace. */
1012 		if (n_undo_tablespaces == 0) {
1013 			srv_undo_space_id_start = i;
1014 		}
1015 
1016 		++n_undo_tablespaces;
1017 
1018 		++*n_opened;
1019 	}
1020 
1021 	/** Explictly specify the srv_undo_space_id_start
1022 	as zero when there are no undo tablespaces. */
1023 	if (n_undo_tablespaces == 0) {
1024 		srv_undo_space_id_start = 0;
1025 	}
1026 
1027 	/* If the user says that there are fewer than what we find we
1028 	tolerate that discrepancy but not the inverse. Because there could
1029 	be unused undo tablespaces for future use. */
1030 
1031 	if (n_conf_tablespaces > n_undo_tablespaces) {
1032 		ib::error() << "Expected to open " << n_conf_tablespaces
1033 			<< " undo tablespaces but was able to find only "
1034 			<< n_undo_tablespaces << " undo tablespaces. Set the"
1035 			" innodb_undo_tablespaces parameter to the correct"
1036 			" value and retry. Suggested value is "
1037 			<< n_undo_tablespaces;
1038 
1039 		return(err != DB_SUCCESS ? err : DB_ERROR);
1040 
1041 	} else  if (n_undo_tablespaces > 0) {
1042 
1043 		ib::info() << "Opened " << n_undo_tablespaces
1044 			<< " undo tablespaces";
1045 
1046 		ib::info() << srv_undo_tablespaces_active << " undo tablespaces"
1047 			<< " made active";
1048 
1049 		if (n_conf_tablespaces == 0 && !backup_mode) {
1050 			ib::warn() << "Will use system tablespace for all newly"
1051 				<< " created rollback-segment as"
1052 				<< " innodb_undo_tablespaces=0";
1053 		}
1054 	}
1055 
1056 	if (create_new_db) {
1057 		mtr_t	mtr;
1058 
1059 		mtr_start(&mtr);
1060 
1061 		/* The undo log tablespace */
1062 		for (i = 0; i < n_undo_tablespaces; ++i) {
1063 
1064 			fsp_header_init(
1065 				undo_tablespace_ids[i],
1066 				SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
1067 		}
1068 
1069 		mtr_commit(&mtr);
1070 	}
1071 
1072 	if (!undo::Truncate::s_fix_up_spaces.empty()) {
1073 
1074 		/* Step-1: Initialize the tablespace header and rsegs header. */
1075 		mtr_t		mtr;
1076 		trx_sysf_t*	sys_header;
1077 
1078 		mtr_start(&mtr);
1079 		/* Turn off REDO logging. We are in server start mode and fixing
1080 		UNDO tablespace even before REDO log is read. Let's say we
1081 		do REDO logging here then this REDO log record will be applied
1082 		as part of the current recovery process. We surely don't need
1083 		that as this is fix-up action parallel to REDO logging. */
1084 		mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
1085 		sys_header = trx_sysf_get(&mtr);
1086 
1087 		for (undo::undo_spaces_t::const_iterator it
1088 			     = undo::Truncate::s_fix_up_spaces.begin();
1089 		     it != undo::Truncate::s_fix_up_spaces.end();
1090 		     ++it) {
1091 
1092 			undo::Truncate::add_space_to_trunc_list(*it);
1093 
1094 			fsp_header_init(
1095 				*it, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
1096 
1097 			mtr_x_lock(fil_space_get_latch(*it, NULL), &mtr);
1098 
1099 			for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
1100 
1101 				ulint	space_id = trx_sysf_rseg_get_space(
1102 						sys_header, i, &mtr);
1103 
1104 				if (space_id == *it) {
1105 					trx_rseg_header_create(
1106 						*it, univ_page_size, ULINT_MAX,
1107 						i, &mtr);
1108 				}
1109 			}
1110 
1111 			undo::Truncate::clear_trunc_list();
1112 		}
1113 		mtr_commit(&mtr);
1114 
1115 		/* Step-2: Flush the dirty pages from the buffer pool. */
1116 		for (undo::undo_spaces_t::const_iterator it
1117 			     = undo::Truncate::s_fix_up_spaces.begin();
1118 		     it != undo::Truncate::s_fix_up_spaces.end();
1119 		     ++it) {
1120 
1121 			buf_LRU_flush_or_remove_pages(
1122 				TRX_SYS_SPACE, BUF_REMOVE_FLUSH_WRITE, NULL);
1123 
1124 			buf_LRU_flush_or_remove_pages(
1125 				*it, BUF_REMOVE_FLUSH_WRITE, NULL);
1126 
1127 			/* Remove the truncate redo log file. */
1128 			undo::Truncate	undo_trunc;
1129 			undo_trunc.done_logging(*it);
1130 		}
1131 	}
1132 
1133 	return(DB_SUCCESS);
1134 }
1135 
1136 /********************************************************************
1137 Wait for the purge thread(s) to start up. */
1138 static
1139 void
srv_start_wait_for_purge_to_start()1140 srv_start_wait_for_purge_to_start()
1141 /*===============================*/
1142 {
1143 	/* Wait for the purge coordinator and master thread to startup. */
1144 
1145 	purge_state_t	state = trx_purge_state();
1146 
1147 	ut_a(state != PURGE_STATE_DISABLED);
1148 
1149 	while (srv_shutdown_state == SRV_SHUTDOWN_NONE
1150 	       && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
1151 	       && state == PURGE_STATE_INIT) {
1152 
1153 		switch (state = trx_purge_state()) {
1154 		case PURGE_STATE_RUN:
1155 		case PURGE_STATE_STOP:
1156 			break;
1157 
1158 		case PURGE_STATE_INIT:
1159 			ib::info() << "Waiting for purge to start";
1160 
1161 			os_thread_sleep(50000);
1162 			break;
1163 
1164 		case PURGE_STATE_EXIT:
1165 		case PURGE_STATE_DISABLED:
1166 			ut_error;
1167 		}
1168 	}
1169 }
1170 
1171 /** Create the temporary file tablespace.
1172 @param[in]	create_new_db	whether we are creating a new database
1173 @param[in,out]	tmp_space	Shared Temporary SysTablespace
1174 @return DB_SUCCESS or error code. */
1175 static
1176 dberr_t
srv_open_tmp_tablespace(bool create_new_db,SysTablespace * tmp_space)1177 srv_open_tmp_tablespace(
1178 	bool		create_new_db,
1179 	SysTablespace*	tmp_space)
1180 {
1181 	ulint	sum_of_new_sizes;
1182 
1183 	/* Will try to remove if there is existing file left-over by last
1184 	unclean shutdown */
1185 	tmp_space->set_sanity_check_status(true);
1186 	tmp_space->delete_files();
1187 	tmp_space->set_ignore_read_only(true);
1188 
1189 	ib::info() << "Creating shared tablespace for temporary tables";
1190 
1191 	bool	create_new_temp_space;
1192 	ulint	temp_space_id = ULINT_UNDEFINED;
1193 
1194 	dict_hdr_get_new_id(NULL, NULL, &temp_space_id, NULL, true);
1195 
1196 	tmp_space->set_space_id(temp_space_id);
1197 
1198 	RECOVERY_CRASH(100);
1199 
1200 	dberr_t	err = tmp_space->check_file_spec(
1201 			&create_new_temp_space, 12 * 1024 * 1024);
1202 
1203 	if (err == DB_FAIL) {
1204 
1205 		ib::error() << "The " << tmp_space->name()
1206 			<< " data file must be writable!";
1207 
1208 		err = DB_ERROR;
1209 
1210 	} else if (err != DB_SUCCESS) {
1211 		ib::error() << "Could not create the shared "
1212 			<< tmp_space->name() << ".";
1213 
1214 	} else if ((err = tmp_space->open_or_create(
1215 			    true, create_new_db, &sum_of_new_sizes, NULL))
1216 		   != DB_SUCCESS) {
1217 
1218 		ib::error() << "Unable to create the shared "
1219 			<< tmp_space->name();
1220 
1221 	} else {
1222 
1223 		mtr_t	mtr;
1224 		ulint	size = tmp_space->get_sum_of_sizes();
1225 
1226 		ut_a(temp_space_id != ULINT_UNDEFINED);
1227 		ut_a(tmp_space->space_id() == temp_space_id);
1228 
1229 		/* Open this shared temp tablespace in the fil_system so that
1230 		it stays open until shutdown. */
1231 		if (fil_space_open(tmp_space->name())) {
1232 
1233 			/* Initialize the header page */
1234 			mtr_start(&mtr);
1235 			mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
1236 
1237 			fsp_header_init(tmp_space->space_id(), size, &mtr);
1238 
1239 			mtr_commit(&mtr);
1240 		} else {
1241 			/* This file was just opened in the code above! */
1242 			ib::error() << "The " << tmp_space->name()
1243 				<< " data file cannot be re-opened"
1244 				" after check_file_spec() succeeded!";
1245 
1246 			err = DB_ERROR;
1247 		}
1248 	}
1249 
1250 	return(err);
1251 }
1252 
1253 /****************************************************************//**
1254 Set state to indicate start of particular group of threads in InnoDB. */
1255 UNIV_INLINE
1256 void
srv_start_state_set(srv_start_state_t state)1257 srv_start_state_set(
1258 /*================*/
1259 	srv_start_state_t state)	/*!< in: indicate current state of
1260 					thread startup */
1261 {
1262 	srv_start_state |= state;
1263 }
1264 
1265 /****************************************************************//**
1266 Check if following group of threads is started.
1267 @return true if started */
1268 UNIV_INLINE
1269 bool
srv_start_state_is_set(srv_start_state_t state)1270 srv_start_state_is_set(
1271 /*===================*/
1272 	srv_start_state_t state)	/*!< in: state to check for */
1273 {
1274 	return(srv_start_state & state);
1275 }
1276 
1277 /**
1278 Shutdown all background threads created by InnoDB. */
1279 void
srv_shutdown_all_bg_threads()1280 srv_shutdown_all_bg_threads()
1281 {
1282 	ulint	i;
1283 
1284 	srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
1285 
1286 	if (!srv_start_state) {
1287 		return;
1288 	}
1289 
1290 	/* All threads end up waiting for certain events. Put those events
1291 	to the signaled state. Then the threads will exit themselves after
1292 	os_event_wait(). */
1293 	for (i = 0; i < 1000; i++) {
1294 		/* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
1295 		HERE OR EARLIER */
1296 
1297 		if (!srv_read_only_mode) {
1298 
1299 			if (srv_start_state_is_set(SRV_START_STATE_LOCK_SYS)) {
1300 				/* a. Let the lock timeout thread exit */
1301 				os_event_set(lock_sys->timeout_event);
1302 			}
1303 
1304 			/* b. srv error monitor thread exits automatically,
1305 			no need to do anything here */
1306 
1307 			if (srv_start_state_is_set(SRV_START_STATE_MASTER)) {
1308 				/* c. We wake the master thread so that
1309 				it exits */
1310 				srv_wake_master_thread();
1311 			}
1312 
1313 			if (srv_start_state_is_set(SRV_START_STATE_PURGE)) {
1314 				/* d. Wakeup purge threads. */
1315 				srv_purge_wakeup();
1316 			}
1317 		}
1318 
1319 		if (srv_start_state_is_set(SRV_START_STATE_IO)) {
1320 			/* e. Exit the i/o threads */
1321 			if (!srv_read_only_mode) {
1322 				if (recv_sys->flush_start != NULL) {
1323 					os_event_set(recv_sys->flush_start);
1324 				}
1325 				if (recv_sys->flush_end != NULL) {
1326 					os_event_set(recv_sys->flush_end);
1327 				}
1328 			}
1329 
1330 			os_event_set(buf_flush_event);
1331 
1332 			if (!buf_page_cleaner_is_active
1333 			    && os_aio_all_slots_free()) {
1334 				os_aio_wake_all_threads_at_shutdown();
1335 			}
1336 		}
1337 
1338 		/* f. dict_stats_thread is signaled from
1339 		logs_empty_and_mark_files_at_shutdown() and should have
1340 		already quit or is quitting right now. */
1341 
1342 		bool	active = os_thread_active();
1343 
1344 		os_thread_sleep(100000);
1345 
1346 		if (!active) {
1347 			break;
1348 		}
1349 	}
1350 
1351 	if (i == 1000) {
1352 		ib::warn() << os_thread_count << " threads created by InnoDB"
1353 			" had not exited at shutdown!";
1354 #ifdef UNIV_DEBUG
1355 		os_aio_print_pending_io(stderr);
1356 		ut_ad(0);
1357 #endif /* UNIV_DEBUG */
1358 	} else {
1359 		/* Reset the start state. */
1360 		srv_start_state = SRV_START_STATE_NONE;
1361 	}
1362 }
1363 
1364 #ifdef UNIV_DEBUG
1365 # define srv_init_abort(_db_err)	\
1366 	srv_init_abort_low(create_new_db, __FILE__, __LINE__, _db_err)
1367 #else
1368 # define srv_init_abort(_db_err)	\
1369 	srv_init_abort_low(create_new_db, _db_err)
1370 #endif /* UNIV_DEBUG */
1371 
1372 /** Innobase start-up aborted. Perform cleanup actions.
1373 @param[in]	create_new_db	TRUE if new db is  being created
1374 @param[in]	file		File name
1375 @param[in]	line		Line number
1376 @param[in]	err		Reason for aborting InnoDB startup
1377 @return DB_SUCCESS or error code. */
1378 static
1379 dberr_t
srv_init_abort_low(bool create_new_db,const char * file,ulint line,dberr_t err)1380 srv_init_abort_low(
1381 	bool		create_new_db,
1382 #ifdef UNIV_DEBUG
1383 	const char*	file,
1384 	ulint		line,
1385 #endif /* UNIV_DEBUG */
1386 	dberr_t		err)
1387 {
1388 	if (create_new_db) {
1389 		ib::error() << "InnoDB Database creation was aborted"
1390 #ifdef UNIV_DEBUG
1391 			" at " << innobase_basename(file) << "[" << line << "]"
1392 #endif /* UNIV_DEBUG */
1393 			" with error " << ut_strerr(err) << ". You may need"
1394 			" to delete the ibdata1 file before trying to start"
1395 			" up again.";
1396 	} else {
1397 		ib::error() << "Plugin initialization aborted"
1398 #ifdef UNIV_DEBUG
1399 			" at " << innobase_basename(file) << "[" << line << "]"
1400 #endif /* UNIV_DEBUG */
1401 			" with error " << ut_strerr(err);
1402 	}
1403 
1404 	srv_shutdown_all_bg_threads();
1405 	return(err);
1406 }
1407 
1408 /** Prepare to delete the redo log files. Flush the dirty pages from all the
1409 buffer pools.  Flush the redo log buffer to the redo log file.
1410 @param[in]	n_files		number of old redo log files
1411 @return lsn upto which data pages have been flushed. */
1412 static
1413 lsn_t
srv_prepare_to_delete_redo_log_files(ulint n_files)1414 srv_prepare_to_delete_redo_log_files(
1415 	ulint	n_files)
1416 {
1417 	lsn_t	flushed_lsn;
1418 	ulint	pending_io = 0;
1419 	ulint	count = 0;
1420 
1421 	do {
1422 		/* Clean the buffer pool. */
1423 		buf_flush_sync_all_buf_pools();
1424 
1425 		RECOVERY_CRASH(1);
1426 
1427 		log_mutex_enter();
1428 
1429 		fil_names_clear(log_sys->lsn, false);
1430 
1431 		flushed_lsn = log_sys->lsn;
1432 
1433 		{
1434 			ib::warn	warning;
1435 			if (srv_log_file_size == 0) {
1436 				warning << "Upgrading redo log: ";
1437 			} else {
1438 				warning << "Resizing redo log from "
1439 					<< n_files << "*"
1440 					<< srv_log_file_size << " to ";
1441 			}
1442 			warning << srv_n_log_files << "*"
1443 				<< srv_log_file_size_requested
1444 				<< " pages, LSN=" << flushed_lsn;
1445 		}
1446 
1447 		/* Flush the old log files. */
1448 		log_mutex_exit();
1449 
1450 		log_write_up_to(flushed_lsn, true);
1451 
1452 		/* If innodb_flush_method=O_DSYNC,
1453 		we need to explicitly flush the log buffers. */
1454 		fil_flush(SRV_LOG_SPACE_FIRST_ID);
1455 
1456 		ut_ad(flushed_lsn == log_get_lsn());
1457 
1458 		/* Check if the buffer pools are clean.  If not
1459 		retry till it is clean. */
1460 		pending_io = buf_pool_check_no_pending_io();
1461 
1462 		if (pending_io > 0) {
1463 			count++;
1464 			/* Print a message every 60 seconds if we
1465 			are waiting to clean the buffer pools */
1466 			if (srv_print_verbose_log && count > 600) {
1467 				ib::info() << "Waiting for "
1468 					<< pending_io << " buffer "
1469 					<< "page I/Os to complete";
1470 				count = 0;
1471 			}
1472 		}
1473 		os_thread_sleep(100000);
1474 
1475 	} while (buf_pool_check_no_pending_io());
1476 
1477 	return(flushed_lsn);
1478 }
1479 
1480 /********************************************************************
1481 Starts InnoDB and creates a new database if database files
1482 are not found and the user wants.
1483 @return DB_SUCCESS or error code */
1484 dberr_t
innobase_start_or_create_for_mysql(void)1485 innobase_start_or_create_for_mysql(void)
1486 /*====================================*/
1487 {
1488 	bool		create_new_db = false;
1489 	lsn_t		flushed_lsn;
1490 	ulint		sum_of_data_file_sizes;
1491 	ulint		tablespace_size_in_header;
1492 	dberr_t		err;
1493 	ulint		srv_n_log_files_found = srv_n_log_files;
1494 	mtr_t		mtr;
1495 	purge_pq_t*	purge_queue;
1496 	char		logfilename[10000];
1497 	char*		logfile0	= NULL;
1498 	size_t		dirnamelen;
1499 	unsigned	i = 0;
1500 
1501 	/* Reset the start state. */
1502 	srv_start_state = SRV_START_STATE_NONE;
1503 
1504 	if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
1505 		srv_read_only_mode = true;
1506 	}
1507 
1508 	high_level_read_only = srv_read_only_mode
1509 		|| srv_force_recovery > SRV_FORCE_NO_TRX_UNDO;
1510 
1511 	if (srv_read_only_mode) {
1512 		ib::info() << "Started in read only mode";
1513 
1514 		/* There is no write except to intrinsic table and so turn-off
1515 		doublewrite mechanism completely. */
1516 		srv_use_doublewrite_buf = FALSE;
1517 	}
1518 
1519 #ifdef HAVE_LZO1X
1520 	if (lzo_init() != LZO_E_OK) {
1521 		ib::warn() << "lzo_init() failed, support disabled";
1522 		srv_lzo_disabled = true;
1523 	} else {
1524 		ib::info() << "LZO1X support available";
1525 		srv_lzo_disabled = false;
1526 	}
1527 #endif /* HAVE_LZO1X */
1528 
1529 #ifdef UNIV_LINUX
1530 # ifdef HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE
1531 	ib::info() << "PUNCH HOLE support available";
1532 # else
1533 	ib::info() << "PUNCH HOLE support not available";
1534 # endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE */
1535 #endif /* UNIV_LINUX */
1536 
1537 	if (sizeof(ulint) != sizeof(void*)) {
1538 		ib::error() << "Size of InnoDB's ulint is " << sizeof(ulint)
1539 			<< ", but size of void* is " << sizeof(void*)
1540 			<< ". The sizes should be the same so that on"
1541 			" a 64-bit platforms you can allocate more than 4 GB"
1542 			" of memory.";
1543 	}
1544 
1545 #ifdef UNIV_DEBUG
1546 	ib::info() << "!!!!!!!! UNIV_DEBUG switched on !!!!!!!!!";
1547 #endif
1548 
1549 #ifdef UNIV_IBUF_DEBUG
1550 	ib::info() << "!!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!";
1551 # ifdef UNIV_IBUF_COUNT_DEBUG
1552 	ib::info() << "!!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on !!!!!!!!!";
1553 	ib::error() << "Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG";
1554 # endif
1555 #endif
1556 
1557 #ifdef UNIV_LOG_LSN_DEBUG
1558 	ib::info() << "!!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!";
1559 #endif /* UNIV_LOG_LSN_DEBUG */
1560 
1561 #if defined(COMPILER_HINTS_ENABLED)
1562 	ib::info() << "Compiler hints enabled.";
1563 #endif /* defined(COMPILER_HINTS_ENABLED) */
1564 
1565 	ib::info() << IB_ATOMICS_STARTUP_MSG;
1566 	ib::info() << MUTEX_TYPE;
1567 	ib::info() << IB_MEMORY_BARRIER_STARTUP_MSG;
1568 
1569 #ifndef HAVE_MEMORY_BARRIER
1570 #if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined _WIN32
1571 #else
1572 	ib::warn() << "MySQL was built without a memory barrier capability on"
1573 		" this architecture, which might allow a mutex/rw_lock"
1574 		" violation under high thread concurrency. This may cause a"
1575 		" hang.";
1576 #endif /* IA32 or AMD64 */
1577 #endif /* HAVE_MEMORY_BARRIER */
1578 
1579 	ib::info() << "Compressed tables use zlib " ZLIB_VERSION
1580 #ifdef UNIV_ZIP_DEBUG
1581 	      " with validation"
1582 #endif /* UNIV_ZIP_DEBUG */
1583 	      ;
1584 #ifdef UNIV_ZIP_COPY
1585 	ib::info() << "and extra copying";
1586 #endif /* UNIV_ZIP_COPY */
1587 
1588 	/* Since InnoDB does not currently clean up all its internal data
1589 	structures in MySQL Embedded Server Library server_end(), we
1590 	print an error message if someone tries to start up InnoDB a
1591 	second time during the process lifetime. */
1592 
1593 	if (srv_start_has_been_called) {
1594 		ib::error() << "Startup called second time"
1595 			" during the process lifetime."
1596 			" In the MySQL Embedded Server Library"
1597 			" you cannot call server_init() more than"
1598 			" once during the process lifetime.";
1599 	}
1600 
1601 	srv_start_has_been_called = TRUE;
1602 
1603 	srv_is_being_started = true;
1604 
1605 #ifdef _WIN32
1606 	srv_use_native_aio = TRUE;
1607 
1608 #elif defined(LINUX_NATIVE_AIO)
1609 
1610 	if (srv_use_native_aio) {
1611 		ib::info() << "Using Linux native AIO";
1612 	}
1613 #else
1614 	/* Currently native AIO is supported only on windows and linux
1615 	and that also when the support is compiled in. In all other
1616 	cases, we ignore the setting of innodb_use_native_aio. */
1617 	srv_use_native_aio = FALSE;
1618 #endif /* _WIN32 */
1619 
1620 	/* Register performance schema stages before any real work has been
1621 	started which may need to be instrumented. */
1622 	mysql_stage_register("innodb", srv_stages, UT_ARR_SIZE(srv_stages));
1623 
1624 	if (srv_file_flush_method_str == NULL) {
1625 		/* These are the default options */
1626 #ifndef _WIN32
1627 		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1628 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
1629 		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1630 
1631 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
1632 		srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
1633 
1634 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1635 		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1636 
1637 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
1638 		srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
1639 
1640 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
1641 		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;
1642 
1643 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1644 		srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1645 
1646 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
1647 		srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
1648 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
1649 
1650 		/* ALL_O_DIRECT is currently accepted, but ignored by
1651 		XtraBackup */
1652 		ib::warn() << "ignoring innodb_flush_method=ALL_O_DIRECT\n";
1653 #else
1654 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1655 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
1656 		srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
1657 		srv_use_native_aio = FALSE;
1658 
1659 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
1660 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1661 		srv_use_native_aio = FALSE;
1662 
1663 	} else if (0 == ut_strcmp(srv_file_flush_method_str,
1664 				  "async_unbuffered")) {
1665 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1666 #endif /* _WIN32 */
1667 	} else {
1668 		ib::error() << "Unrecognized value "
1669 			<< srv_file_flush_method_str
1670 			<< " for innodb_flush_method";
1671 		return(srv_init_abort(DB_ERROR));
1672 	}
1673 
1674 	/* Note that the call srv_boot() also changes the values of
1675 	some variables to the units used by InnoDB internally */
1676 
1677 	/* Set the maximum number of threads which can wait for a semaphore
1678 	inside InnoDB: this is the 'sync wait array' size, as well as the
1679 	maximum number of threads that can wait in the 'srv_conc array' for
1680 	their time to enter InnoDB. */
1681 
1682 	srv_max_n_threads = 1   /* io_ibuf_thread */
1683 			    + 1 /* io_log_thread */
1684 			    + 1 /* lock_wait_timeout_thread */
1685 			    + 1 /* srv_error_monitor_thread */
1686 			    + 1 /* srv_monitor_thread */
1687 			    + 1 /* srv_master_thread */
1688 			    + 1 /* srv_purge_coordinator_thread */
1689 			    + 1 /* buf_dump_thread */
1690 			    + 1 /* dict_stats_thread */
1691 			    + 1 /* fts_optimize_thread */
1692 			    + 1 /* recv_writer_thread */
1693 			    + 1 /* trx_rollback_or_clean_all_recovered */
1694 			    + 128 /* added as margin, for use of
1695 				  InnoDB Memcached etc. */
1696 			    + max_connections
1697 			    + srv_n_read_io_threads
1698 			    + srv_n_write_io_threads
1699 			    + srv_n_purge_threads
1700 			    + srv_n_page_cleaners
1701 			    /* FTS Parallel Sort */
1702 			    + fts_sort_pll_degree * FTS_NUM_AUX_INDEX
1703 			      * max_connections;
1704 
1705 	if (srv_buf_pool_size >= BUF_POOL_SIZE_THRESHOLD) {
1706 
1707 		if (srv_buf_pool_instances == srv_buf_pool_instances_default) {
1708 #if defined(_WIN32) && !defined(_WIN64)
1709 			/* Do not allocate too large of a buffer pool on
1710 			Windows 32-bit systems, which can have trouble
1711 			allocating larger single contiguous memory blocks. */
1712 			srv_buf_pool_instances = ut_min(
1713 				static_cast<ulong>(MAX_BUFFER_POOLS),
1714 				static_cast<ulong>(srv_buf_pool_size
1715 						   / (128 * 1024 * 1024)));
1716 #else /* defined(_WIN32) && !defined(_WIN64) */
1717 			/* Default to 8 instances when size > 1GB. */
1718 			srv_buf_pool_instances = 8;
1719 #endif /* defined(_WIN32) && !defined(_WIN64) */
1720 		}
1721 	} else {
1722 		/* If buffer pool is less than 1 GiB, assume fewer
1723 		threads. Also use only one buffer pool instance. */
1724 		if (srv_buf_pool_instances != srv_buf_pool_instances_default
1725 		    && srv_buf_pool_instances != 1) {
1726 			/* We can't distinguish whether the user has explicitly
1727 			started mysqld with --innodb-buffer-pool-instances=0,
1728 			(srv_buf_pool_instances_default is 0) or has not
1729 			specified that option at all. Thus we have the
1730 			limitation that if the user started with =0, we
1731 			will not emit a warning here, but we should actually
1732 			do so. */
1733 			ib::info()
1734 				<< "Adjusting innodb_buffer_pool_instances"
1735 				" from " << srv_buf_pool_instances << " to 1"
1736 				" since innodb_buffer_pool_size is less than "
1737 				<< BUF_POOL_SIZE_THRESHOLD / (1024 * 1024)
1738 				<< " MiB";
1739 		}
1740 
1741 		srv_buf_pool_instances = 1;
1742 	}
1743 
1744 	if (srv_buf_pool_chunk_unit * srv_buf_pool_instances
1745 	    > srv_buf_pool_size) {
1746 		/* Size unit of buffer pool is larger than srv_buf_pool_size.
1747 		adjust srv_buf_pool_chunk_unit for srv_buf_pool_size. */
1748 		srv_buf_pool_chunk_unit
1749 			= static_cast<ulong>(srv_buf_pool_size)
1750 			  / srv_buf_pool_instances;
1751 		if (srv_buf_pool_size % srv_buf_pool_instances != 0) {
1752 			++srv_buf_pool_chunk_unit;
1753 		}
1754 	}
1755 
1756 	srv_buf_pool_size = buf_pool_size_align(srv_buf_pool_size);
1757 
1758 	if (srv_n_page_cleaners > srv_buf_pool_instances) {
1759 		/* limit of page_cleaner parallelizability
1760 		is number of buffer pool instances. */
1761 		srv_n_page_cleaners = srv_buf_pool_instances;
1762 	}
1763 
1764 	srv_boot();
1765 
1766 	ib::info() << (ut_crc32_sse2_enabled ? "Using" : "Not using")
1767 		<< " CPU crc32 instructions";
1768 
1769 	if (!srv_read_only_mode) {
1770 
1771 		mutex_create(LATCH_ID_SRV_MONITOR_FILE,
1772 			     &srv_monitor_file_mutex);
1773 
1774 		if (srv_innodb_status) {
1775 
1776 			srv_monitor_file_name = static_cast<char*>(
1777 				ut_malloc_nokey(
1778 					strlen(fil_path_to_mysql_datadir)
1779 					+ 20 + sizeof "/innodb_status."));
1780 
1781 			sprintf(srv_monitor_file_name,
1782 				"%s/innodb_status." ULINTPF,
1783 				fil_path_to_mysql_datadir,
1784 				os_proc_get_number());
1785 
1786 			srv_monitor_file = fopen(srv_monitor_file_name, "w+");
1787 
1788 			if (!srv_monitor_file) {
1789 				ib::error() << "Unable to create "
1790 					<< srv_monitor_file_name << ": "
1791 					<< strerror(errno);
1792 				return(srv_init_abort(DB_ERROR));
1793 			}
1794 		} else {
1795 
1796 			srv_monitor_file_name = NULL;
1797 			srv_monitor_file = os_file_create_tmpfile(NULL);
1798 
1799 			if (!srv_monitor_file) {
1800 				return(srv_init_abort(DB_ERROR));
1801 			}
1802 		}
1803 
1804 		mutex_create(LATCH_ID_SRV_DICT_TMPFILE,
1805 			     &srv_dict_tmpfile_mutex);
1806 
1807 		srv_dict_tmpfile = os_file_create_tmpfile(NULL);
1808 
1809 		if (!srv_dict_tmpfile) {
1810 			return(srv_init_abort(DB_ERROR));
1811 		}
1812 
1813 		mutex_create(LATCH_ID_SRV_MISC_TMPFILE,
1814 			     &srv_misc_tmpfile_mutex);
1815 
1816 		srv_misc_tmpfile = os_file_create_tmpfile(NULL);
1817 
1818 		if (!srv_misc_tmpfile) {
1819 			return(srv_init_abort(DB_ERROR));
1820 		}
1821 	}
1822 
1823 	srv_n_file_io_threads = srv_n_read_io_threads;
1824 
1825 	srv_n_file_io_threads += srv_n_write_io_threads;
1826 
1827 	if (!srv_read_only_mode) {
1828 		/* Add the log and ibuf IO threads. */
1829 		srv_n_file_io_threads += 2;
1830 	} else {
1831 		ib::info() << "Disabling background log and ibuf IO write"
1832 			<< " threads.";
1833 	}
1834 
1835 	ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
1836 
1837 	if (!os_aio_init(srv_n_read_io_threads,
1838 			 srv_n_write_io_threads,
1839 			 SRV_MAX_N_PENDING_SYNC_IOS)) {
1840 
1841 		ib::error() << "Cannot initialize AIO sub-system";
1842 
1843 		return(srv_init_abort(DB_ERROR));
1844 	}
1845 
1846 	fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
1847 
1848 	double	size;
1849 	char	unit;
1850 
1851 	if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
1852 		size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024);
1853 		unit = 'G';
1854 	} else {
1855 		size = ((double) srv_buf_pool_size) / (1024 * 1024);
1856 		unit = 'M';
1857 	}
1858 
1859 	double	chunk_size;
1860 	char	chunk_unit;
1861 
1862 	if (srv_buf_pool_chunk_unit >= 1024 * 1024 * 1024) {
1863 		chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024 / 1024;
1864 		chunk_unit = 'G';
1865 	} else {
1866 		chunk_size = srv_buf_pool_chunk_unit / 1024.0 / 1024;
1867 		chunk_unit = 'M';
1868 	}
1869 
1870 	ib::info() << "Initializing buffer pool, total size = "
1871 		<< size << unit << ", instances = " << srv_buf_pool_instances
1872 		<< ", chunk size = " << chunk_size << chunk_unit;
1873 
1874 	err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
1875 
1876 	if (err != DB_SUCCESS) {
1877 		ib::error() << "Cannot allocate memory for the buffer pool";
1878 
1879 		return(srv_init_abort(DB_ERROR));
1880 	}
1881 
1882 	ib::info() << "Completed initialization of buffer pool";
1883 
1884 #ifdef UNIV_DEBUG
1885 	/* We have observed deadlocks with a 5MB buffer pool but
1886 	the actual lower limit could very well be a little higher. */
1887 
1888 	if (srv_buf_pool_size <= 5 * 1024 * 1024) {
1889 
1890 		ib::info() << "Small buffer pool size ("
1891 			<< srv_buf_pool_size / 1024 / 1024
1892 			<< "M), the flst_validate() debug function can cause a"
1893 			<< " deadlock if the buffer pool fills up.";
1894 	}
1895 #endif /* UNIV_DEBUG */
1896 
1897 	fsp_init();
1898 	log_init();
1899 
1900 	recv_sys_create();
1901 	recv_sys_init(buf_pool_get_curr_size());
1902 	lock_sys_create(srv_lock_table_size);
1903 	srv_start_state_set(SRV_START_STATE_LOCK_SYS);
1904 
1905 	/* Create i/o-handler threads: */
1906 
1907 	for (ulint t = 0; t < srv_n_file_io_threads; ++t) {
1908 
1909 		n[t] = t;
1910 
1911 		os_thread_create(io_handler_thread, n + t, thread_ids + t);
1912 	}
1913 
1914 	/* Even in read-only mode there could be flush job generated by
1915 	intrinsic table operations. */
1916 	buf_flush_page_cleaner_init();
1917 
1918 	os_thread_create(buf_flush_page_cleaner_coordinator,
1919 			 NULL, NULL);
1920 
1921 	for (i = 1; i < srv_n_page_cleaners; ++i) {
1922 		os_thread_create(buf_flush_page_cleaner_worker,
1923 				 NULL, NULL);
1924 	}
1925 
1926 	/* Make sure page cleaner is active. */
1927 	while (!buf_page_cleaner_is_active) {
1928 		os_thread_sleep(10000);
1929 	}
1930 
1931 	srv_start_state_set(SRV_START_STATE_IO);
1932 
1933 	if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) {
1934 		/* fil_io() takes ulint as an argument and we are passing
1935 		(next_offset / UNIV_PAGE_SIZE) to it in log_group_write_buf().
1936 		So (next_offset / UNIV_PAGE_SIZE) must be less than ULINT_MAX.
1937 		So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
1938 		means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
1939 		is 64 TB on 32 bit systems. */
1940 		ib::error() << "Combined size of log files must be < "
1941 			<< ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE << " GB";
1942 
1943 		return(srv_init_abort(DB_ERROR));
1944 	}
1945 
1946 	os_normalize_path(srv_data_home);
1947 
1948 	/* Check if the data files exist or not. */
1949 	err = srv_sys_space.check_file_spec(
1950 		&create_new_db, MIN_EXPECTED_TABLESPACE_SIZE);
1951 
1952 	if (err != DB_SUCCESS) {
1953 		return(srv_init_abort(DB_ERROR));
1954 	}
1955 
1956 	srv_startup_is_before_trx_rollback_phase = !create_new_db;
1957 
1958 	/* Check if undo tablespaces and redo log files exist before creating
1959 	a new system tablespace */
1960 	if (create_new_db) {
1961 		err = srv_check_undo_redo_logs_exists();
1962 		if (err != DB_SUCCESS) {
1963 			return(srv_init_abort(DB_ERROR));
1964 		}
1965 		recv_sys_debug_free();
1966 	}
1967 
1968 	/* Open or create the data files. */
1969 	ulint	sum_of_new_sizes;
1970 
1971 	err = srv_sys_space.open_or_create(
1972 		false, create_new_db, &sum_of_new_sizes, &flushed_lsn);
1973 
1974 	switch (err) {
1975 	case DB_SUCCESS:
1976 		break;
1977 	case DB_CANNOT_OPEN_FILE:
1978 		ib::error()
1979 			<< "Could not open or create the system tablespace. If"
1980 			" you tried to add new data files to the system"
1981 			" tablespace, and it failed here, you should now"
1982 			" edit innodb_data_file_path in my.cnf back to what"
1983 			" it was, and remove the new ibdata files InnoDB"
1984 			" created in this failed attempt. InnoDB only wrote"
1985 			" those files full of zeros, but did not yet use"
1986 			" them in any way. But be careful: do not remove"
1987 			" old data files which contain your precious data!";
1988 		/* fall through */
1989 	default:
1990 		/* Other errors might come from Datafile::validate_first_page() */
1991 		return(srv_init_abort(err));
1992 	}
1993 
1994 	dirnamelen = strlen(srv_log_group_home_dir);
1995 	ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
1996 	memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
1997 
1998 	/* Add a path separator if needed. */
1999 	if (dirnamelen && logfilename[dirnamelen - 1] != OS_PATH_SEPARATOR) {
2000 		logfilename[dirnamelen++] = OS_PATH_SEPARATOR;
2001 	}
2002 
2003 	srv_log_file_size_requested = srv_log_file_size;
2004 
2005 	if (create_new_db) {
2006 
2007 		buf_flush_sync_all_buf_pools();
2008 
2009 		flushed_lsn = log_get_lsn();
2010 
2011 		err = create_log_files(
2012 			logfilename, dirnamelen, flushed_lsn, logfile0);
2013 
2014 		if (err != DB_SUCCESS) {
2015 			return(srv_init_abort(err));
2016 		}
2017 	} else {
2018 		for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
2019 			os_offset_t	size;
2020 			os_file_stat_t	stat_info;
2021 
2022 			sprintf(logfilename + dirnamelen,
2023 				"ib_logfile%u", i);
2024 
2025 			err = os_file_get_status(
2026 				logfilename, &stat_info, false,
2027 				srv_read_only_mode);
2028 
2029 			if (err == DB_NOT_FOUND) {
2030 				if (i == 0) {
2031 					if (flushed_lsn
2032 					    < static_cast<lsn_t>(1000)) {
2033 						ib::error()
2034 							<< "Cannot create"
2035 							" log files because"
2036 							" data files are"
2037 							" corrupt or the"
2038 							" database was not"
2039 							" shut down cleanly"
2040 							" after creating"
2041 							" the data files.";
2042 						return(srv_init_abort(
2043 							DB_ERROR));
2044 					}
2045 
2046 					err = create_log_files(
2047 						logfilename, dirnamelen,
2048 						flushed_lsn, logfile0);
2049 
2050 					if (err != DB_SUCCESS) {
2051 						return(srv_init_abort(err));
2052 					}
2053 
2054 					create_log_files_rename(
2055 						logfilename, dirnamelen,
2056 						flushed_lsn, logfile0);
2057 
2058 					/* Suppress the message about
2059 					crash recovery. */
2060 					flushed_lsn = log_get_lsn();
2061 					goto files_checked;
2062 #if 0
2063 				} else if (i < 2) {
2064 					/* must have at least 2 log files */
2065 					ib::error() << "Only one log file"
2066 						" found.";
2067 					return(srv_init_abort(err));
2068 #endif
2069 				}
2070 
2071 				/* opened all files */
2072 				break;
2073 			}
2074 
2075 			if (!srv_file_check_mode(logfilename)) {
2076 				return(srv_init_abort(DB_ERROR));
2077 			}
2078 
2079 			err = open_log_file(&files[i], logfilename, &size);
2080 
2081 			if (err != DB_SUCCESS) {
2082 				return(srv_init_abort(err));
2083 			}
2084 
2085 			ut_a(size != (os_offset_t) -1);
2086 
2087 			if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
2088 
2089 				ib::error() << "Log file " << logfilename
2090 					<< " size " << size << " is not a"
2091 					" multiple of innodb_page_size";
2092 				return(srv_init_abort(DB_ERROR));
2093 			}
2094 
2095 			size >>= UNIV_PAGE_SIZE_SHIFT;
2096 
2097 			if (i == 0) {
2098 				srv_log_file_size = size;
2099 			} else if (size != srv_log_file_size) {
2100 
2101 				ib::error() << "Log file " << logfilename
2102 					<< " is of different size "
2103 					<< (size << UNIV_PAGE_SIZE_SHIFT)
2104 					<< " bytes than other log files "
2105 					<< (srv_log_file_size
2106 					    << UNIV_PAGE_SIZE_SHIFT)
2107 					<< " bytes!";
2108 				return(srv_init_abort(DB_ERROR));
2109 			}
2110 		}
2111 
2112 		srv_n_log_files_found = i;
2113 
2114 		/* Create the in-memory file space objects. */
2115 
2116 		sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
2117 
2118 		/* Disable the doublewrite buffer for log files. */
2119 		fil_space_t*	log_space = fil_space_create(
2120 			"innodb_redo_log",
2121 			SRV_LOG_SPACE_FIRST_ID,
2122 			fsp_flags_set_page_size(0, univ_page_size),
2123 			FIL_TYPE_LOG);
2124 
2125 		ut_a(fil_validate());
2126 		ut_a(log_space);
2127 
2128 		/* srv_log_file_size is measured in pages; if page size is 16KB,
2129 		then we have a limit of 64TB on 32 bit systems */
2130 		ut_a(srv_log_file_size <= ULINT_MAX);
2131 
2132 		for (unsigned j = 0; j < i; j++) {
2133 			sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
2134 
2135 			if (!fil_node_create(logfilename,
2136 					     (ulint) srv_log_file_size,
2137 					     log_space, false, false)) {
2138 				return(srv_init_abort(DB_ERROR));
2139 			}
2140 		}
2141 
2142 		if (!log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
2143 				    SRV_LOG_SPACE_FIRST_ID)) {
2144 			return(srv_init_abort(DB_ERROR));
2145 		}
2146 	}
2147 
2148 files_checked:
2149 	/* Open all log files and data files in the system
2150 	tablespace: we keep them open until database
2151 	shutdown */
2152 
2153 	fil_open_log_and_system_tablespace_files();
2154 
2155 	err = srv_undo_tablespaces_init(
2156 		create_new_db,
2157 		FALSE,
2158 		srv_undo_tablespaces,
2159 		&srv_undo_tablespaces_open);
2160 
2161 	/* If the force recovery is set very high then we carry on regardless
2162 	of all errors. Basically this is fingers crossed mode. */
2163 
2164 	if (err != DB_SUCCESS
2165 	    && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
2166 
2167 		return(srv_init_abort(err));
2168 	}
2169 
2170 	/* Initialize objects used by dict stats gathering thread, which
2171 	can also be used by recovery if it tries to drop some table */
2172 	if (!srv_read_only_mode) {
2173 		dict_stats_thread_init();
2174 	}
2175 
2176 	trx_sys_file_format_init();
2177 
2178 	trx_sys_create();
2179 
2180 	if (create_new_db) {
2181 
2182 		ut_a(!srv_read_only_mode);
2183 
2184 		mtr_start(&mtr);
2185 
2186 		bool ret = fsp_header_init(0, sum_of_new_sizes, &mtr);
2187 
2188 		mtr_commit(&mtr);
2189 
2190 		if (!ret) {
2191 			return(srv_init_abort(DB_ERROR));
2192 		}
2193 
2194 		/* To maintain backward compatibility we create only
2195 		the first rollback segment before the double write buffer.
2196 		All the remaining rollback segments will be created later,
2197 		after the double write buffer has been created. */
2198 		trx_sys_create_sys_pages();
2199 
2200 		purge_queue = trx_sys_init_at_db_start();
2201 
2202 		DBUG_EXECUTE_IF("check_no_undo",
2203 				ut_ad(purge_queue->empty());
2204 				);
2205 
2206 		/* The purge system needs to create the purge view and
2207 		therefore requires that the trx_sys is inited. */
2208 
2209 		trx_purge_sys_create(srv_n_purge_threads, purge_queue);
2210 
2211 		err = dict_create();
2212 
2213 		if (err != DB_SUCCESS) {
2214 			return(srv_init_abort(err));
2215 		}
2216 
2217 		buf_flush_sync_all_buf_pools();
2218 
2219 		flushed_lsn = log_get_lsn();
2220 
2221 		fil_write_flushed_lsn_to_data_files(flushed_lsn);
2222 
2223 		create_log_files_rename(
2224 			logfilename, dirnamelen, flushed_lsn, logfile0);
2225 
2226 	} else {
2227 
2228 		/* Check if we support the max format that is stamped
2229 		on the system tablespace.
2230 		Note:  We are NOT allowed to make any modifications to
2231 		the TRX_SYS_PAGE_NO page before recovery  because this
2232 		page also contains the max_trx_id etc. important system
2233 		variables that are required for recovery.  We need to
2234 		ensure that we return the system to a state where normal
2235 		recovery is guaranteed to work. We do this by
2236 		invalidating the buffer cache, this will force the
2237 		reread of the page and restoration to its last known
2238 		consistent state, this is REQUIRED for the recovery
2239 		process to work. */
2240 		err = trx_sys_file_format_max_check(
2241 			srv_max_file_format_at_startup);
2242 
2243 		if (err != DB_SUCCESS) {
2244 			return(srv_init_abort(err));
2245 		}
2246 
2247 		/* Invalidate the buffer pool to ensure that we reread
2248 		the page that we read above, during recovery.
2249 		Note that this is not as heavy weight as it seems. At
2250 		this point there will be only ONE page in the buf_LRU
2251 		and there must be no page in the buf_flush list. */
2252 		buf_pool_invalidate();
2253 
2254 		/* Scan and locate truncate log files. Parsed located files
2255 		and add table to truncate information to central vector for
2256 		truncate fix-up action post recovery. */
2257 		err = TruncateLogParser::scan_and_parse(srv_log_group_home_dir);
2258 		if (err != DB_SUCCESS) {
2259 
2260 			return(srv_init_abort(DB_ERROR));
2261 		}
2262 
2263 		/* We always try to do a recovery, even if the database had
2264 		been shut down normally: this is the normal startup path */
2265 
2266 		err = recv_recovery_from_checkpoint_start(flushed_lsn);
2267 
2268 		recv_sys->dblwr.pages.clear();
2269 
2270 		if (err == DB_SUCCESS) {
2271 			/* Initialize the change buffer. */
2272 			err = dict_boot();
2273 		}
2274 
2275 		if (err != DB_SUCCESS) {
2276 
2277 			/* A tablespace was not found during recovery. The
2278 			user must force recovery. */
2279 
2280 			if (err == DB_TABLESPACE_NOT_FOUND) {
2281 
2282 				srv_fatal_error();
2283 
2284 				ut_error;
2285 			}
2286 
2287 			return(srv_init_abort(DB_ERROR));
2288 		}
2289 
2290 		purge_queue = trx_sys_init_at_db_start();
2291 
2292 		if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2293 			/* Apply the hashed log records to the
2294 			respective file pages, for the last batch of
2295 			recv_group_scan_log_recs(). */
2296 
2297 			if (!srv_read_only_mode) {
2298 				log_mutex_enter();
2299 				recv_apply_hashed_log_recs(FALSE);
2300 				log_mutex_exit();
2301 			} else {
2302 				recv_apply_hashed_log_recs(TRUE);
2303 			}
2304 			DBUG_PRINT("ib_log", ("apply completed"));
2305 
2306 			if (recv_needed_recovery) {
2307 				trx_sys_print_mysql_binlog_offset();
2308 			}
2309 		}
2310 
2311 		if (recv_sys->found_corrupt_log) {
2312 			ib::warn()
2313 				<< "The log file may have been corrupt and it"
2314 				" is possible that the log scan or parsing"
2315 				" did not proceed far enough in recovery."
2316 				" Please run CHECK TABLE on your InnoDB tables"
2317 				" to check that they are ok!"
2318 				" It may be safest to recover your"
2319 				" InnoDB database from a backup!";
2320 		}
2321 
2322 		/* The purge system needs to create the purge view and
2323 		therefore requires that the trx_sys is inited. */
2324 
2325 		trx_purge_sys_create(srv_n_purge_threads, purge_queue);
2326 
2327 		/* recv_recovery_from_checkpoint_finish needs trx lists which
2328 		are initialized in trx_sys_init_at_db_start(). */
2329 
2330 		recv_recovery_from_checkpoint_finish();
2331 
2332 		/* Fix-up truncate of tables in the system tablespace
2333 		if server crashed while truncate was active. The non-
2334 		system tables are done after tablespace discovery. Do
2335 		this now because this procedure assumes that no pages
2336 		have changed since redo recovery.  Tablespace discovery
2337 		can do updates to pages in the system tablespace.*/
2338 		err = truncate_t::fixup_tables_in_system_tablespace();
2339 
2340 		if (srv_apply_log_only) {
2341 
2342 			/* wake main loop of page cleaner up */
2343 			os_event_set(buf_flush_event);
2344 
2345 			goto skip_processes;
2346 		}
2347 
2348 		if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
2349 			/* Open or Create SYS_TABLESPACES and SYS_DATAFILES
2350 			so that tablespace names and other metadata can be
2351 			found. */
2352 
2353 			/* do not create SYS_DATAFILES in xtrabackup */
2354 #if 0
2355 			srv_sys_tablespaces_open = true;
2356 			err = dict_create_or_check_sys_tablespace();
2357 			if (err != DB_SUCCESS) {
2358 				return(srv_init_abort(err));
2359 			}
2360 #endif
2361 
2362 			/* The following call is necessary for the insert
2363 			buffer to work with multiple tablespaces. We must
2364 			know the mapping between space id's and .ibd file
2365 			names.
2366 
2367 			In a crash recovery, we check that the info in data
2368 			dictionary is consistent with what we already know
2369 			about space id's from the calls to fil_ibd_load().
2370 
2371 			In a normal startup, we create the space objects for
2372 			every table in the InnoDB data dictionary that has
2373 			an .ibd file.
2374 
2375 			We also determine the maximum tablespace id used.
2376 
2377 			The 'validate' flag indicates that when a tablespace
2378 			is opened, we also read the header page and validate
2379 			the contents to the data dictionary. This is time
2380 			consuming, especially for databases with lots of ibd
2381 			files.  So only do it after a crash and not forcing
2382 			recovery.  Open rw transactions at this point is not
2383 			a good reason to validate. */
2384 			bool validate = recv_needed_recovery
2385 				&& srv_force_recovery == 0;
2386 
2387 			dict_check_tablespaces_and_store_max_id(validate);
2388 		}
2389 
2390 		/* Rotate the encryption key for recovery. It's because
2391 		server could crash in middle of key rotation. Some tablespace
2392 		didn't complete key rotation. Here, we will resume the
2393 		rotation. */
2394 		if (!srv_read_only_mode
2395 		    && srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
2396 			fil_encryption_rotate();
2397 		}
2398 
2399 
2400 		/* Fix-up truncate of table if server crashed while truncate
2401 		was active. */
2402 		err = truncate_t::fixup_tables_in_non_system_tablespace();
2403 
2404 		if (err != DB_SUCCESS) {
2405 			return(srv_init_abort(err));
2406 		}
2407 
2408 		if (!srv_force_recovery
2409 		    && !recv_sys->found_corrupt_log
2410 		    && (srv_log_file_size_requested != srv_log_file_size
2411 			|| srv_n_log_files_found != srv_n_log_files)) {
2412 
2413 			/* Prepare to replace the redo log files. */
2414 
2415 			if (srv_read_only_mode) {
2416 				ib::error() << "Cannot resize log files"
2417 					" in read-only mode.";
2418 				return(srv_init_abort(DB_READ_ONLY));
2419 			}
2420 
2421 			/* Prepare to delete the old redo log files */
2422 			flushed_lsn = srv_prepare_to_delete_redo_log_files(i);
2423 
2424 			/* Prohibit redo log writes from any other
2425 			threads until creating a log checkpoint at the
2426 			end of create_log_files(). */
2427 			ut_d(recv_no_log_write = true);
2428 			ut_ad(!buf_pool_check_no_pending_io());
2429 
2430 			RECOVERY_CRASH(3);
2431 
2432 			/* Stamp the LSN to the data files. */
2433 			fil_write_flushed_lsn_to_data_files(flushed_lsn);
2434 
2435 			RECOVERY_CRASH(4);
2436 
2437 			/* Close and free the redo log files, so that
2438 			we can replace them. */
2439 			fil_close_log_files(true);
2440 
2441 			RECOVERY_CRASH(5);
2442 
2443 			/* Free the old log file space. */
2444 			log_group_close_all();
2445 
2446 			ib::warn() << "Starting to delete and rewrite log"
2447 				" files.";
2448 
2449 			srv_log_file_size = srv_log_file_size_requested;
2450 
2451 			err = create_log_files(
2452 				logfilename, dirnamelen, flushed_lsn,
2453 				logfile0);
2454 
2455 			if (err != DB_SUCCESS) {
2456 				return(srv_init_abort(err));
2457 			}
2458 
2459 			create_log_files_rename(
2460 				logfilename, dirnamelen, flushed_lsn,
2461 				logfile0);
2462 		}
2463 
2464 		recv_recovery_rollback_active();
2465 
2466 		/* It is possible that file_format tag has never
2467 		been set. In this case we initialize it to minimum
2468 		value.  Important to note that we can do it ONLY after
2469 		we have finished the recovery process so that the
2470 		image of TRX_SYS_PAGE_NO is not stale. */
2471 		if (!srv_read_only_mode) {
2472 			trx_sys_file_format_tag_init();
2473 		}
2474 	}
2475 
2476 	if (!create_new_db && sum_of_new_sizes > 0) {
2477 		/* New data file(s) were added */
2478 		mtr_start(&mtr);
2479 
2480 		fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
2481 
2482 		mtr_commit(&mtr);
2483 
2484 		/* Immediately write the log record about increased tablespace
2485 		size to disk, so that it is durable even if mysqld would crash
2486 		quickly */
2487 
2488 		log_buffer_flush_to_disk();
2489 	}
2490 
2491 	/* Open temp-tablespace and keep it open until shutdown. */
2492 
2493 	err = srv_open_tmp_tablespace(create_new_db, &srv_tmp_space);
2494 
2495 	if (err != DB_SUCCESS) {
2496 		return(srv_init_abort(err));
2497 	}
2498 
2499 	/* Create the doublewrite buffer to a new tablespace */
2500 	if (buf_dblwr == NULL && !buf_dblwr_create()) {
2501 		return(srv_init_abort(DB_ERROR));
2502 	}
2503 
2504 	/* Here the double write buffer has already been created and so
2505 	any new rollback segments will be allocated after the double
2506 	write buffer. The default segment should already exist.
2507 	We create the new segments only if it's a new database or
2508 	the database was shutdown cleanly. */
2509 
2510 	/* Note: When creating the extra rollback segments during an upgrade
2511 	we violate the latching order, even if the change buffer is empty.
2512 	We make an exception in sync0sync.cc and check srv_is_being_started
2513 	for that violation. It cannot create a deadlock because we are still
2514 	running in single threaded mode essentially. Only the IO threads
2515 	should be running at this stage. */
2516 
2517 	/* Deprecate innodb_undo_logs.  But still use it if it is set to
2518 	non-default and innodb_rollback_segments is default. */
2519 	ut_a(srv_rollback_segments > 0);
2520 	ut_a(srv_rollback_segments <= TRX_SYS_N_RSEGS);
2521 //	ut_a(srv_undo_logs > 0);
2522 //	ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS);
2523 	if (srv_undo_logs != 0 && srv_undo_logs < TRX_SYS_N_RSEGS) {
2524 		ib::warn() << deprecated_undo_logs;
2525 		if (srv_rollback_segments == TRX_SYS_N_RSEGS) {
2526 			srv_rollback_segments = srv_undo_logs;
2527 		}
2528 	}
2529 
2530 	/* The number of rsegs that exist in InnoDB is given by status
2531 	variable srv_available_undo_logs. The number of rsegs to use can
2532 	be set using the dynamic global variable srv_rollback_segments. */
2533 
2534 	srv_available_undo_logs = trx_sys_create_rsegs(
2535 		srv_undo_tablespaces, srv_rollback_segments, srv_tmp_undo_logs);
2536 
2537 	if (srv_available_undo_logs == ULINT_UNDEFINED) {
2538 		/* Can only happen if server is read only. */
2539 		ut_a(srv_read_only_mode);
2540 		srv_rollback_segments = ULONG_UNDEFINED;
2541 	} else if (srv_available_undo_logs < srv_rollback_segments
2542 		   && !srv_force_recovery && !recv_needed_recovery) {
2543 		ib::error() << "System or UNDO tablespace is running of out"
2544 			    << " of space";
2545 		/* Should due to out of file space. */
2546 		return(srv_init_abort(DB_ERROR));
2547 	}
2548 
2549 	srv_startup_is_before_trx_rollback_phase = false;
2550 
2551 	if (!srv_read_only_mode) {
2552 		/* Create the thread which watches the timeouts
2553 		for lock waits */
2554 		os_thread_create(
2555 			lock_wait_timeout_thread,
2556 			NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
2557 
2558 		/* Create the thread which warns of long semaphore waits */
2559 		os_thread_create(
2560 			srv_error_monitor_thread,
2561 			NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
2562 
2563 		/* Create the thread which prints InnoDB monitor info */
2564 		os_thread_create(
2565 			srv_monitor_thread,
2566 			NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
2567 
2568 		srv_start_state_set(SRV_START_STATE_MONITOR);
2569 	}
2570 
2571 	/* Do not re-create system tables in XtraBackup */
2572 #if 0
2573 	/* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
2574 	err = dict_create_or_check_foreign_constraint_tables();
2575 	if (err != DB_SUCCESS) {
2576 		return(srv_init_abort(err));
2577 	}
2578 
2579 	/* Create the SYS_TABLESPACES system table */
2580 	err = dict_create_or_check_sys_tablespace();
2581 	if (err != DB_SUCCESS) {
2582 		return(srv_init_abort(err));
2583 	}
2584 	srv_sys_tablespaces_open = true;
2585 
2586 	/* Create the SYS_VIRTUAL system table */
2587 	err = dict_create_or_check_sys_virtual();
2588 	if (err != DB_SUCCESS) {
2589 		return(srv_init_abort(err));
2590 	}
2591 #endif
2592 
2593 	srv_is_being_started = false;
2594 
2595 	ut_a(trx_purge_state() == PURGE_STATE_INIT);
2596 
2597 	/* Create the master thread which does purge and other utility
2598 	operations */
2599 
2600 	if (!srv_read_only_mode) {
2601 
2602 		os_thread_create(
2603 			srv_master_thread,
2604 			NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
2605 
2606 		srv_start_state_set(SRV_START_STATE_MASTER);
2607 	}
2608 
2609 	if (!srv_read_only_mode
2610 	    && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
2611 
2612 		os_thread_create(
2613 			srv_purge_coordinator_thread,
2614 			NULL, thread_ids + 5 + SRV_MAX_N_IO_THREADS);
2615 
2616 		ut_a(UT_ARR_SIZE(thread_ids)
2617 		     > 5 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS);
2618 
2619 		/* We've already created the purge coordinator thread above. */
2620 		for (i = 1; i < srv_n_purge_threads; ++i) {
2621 			os_thread_create(
2622 				srv_worker_thread, NULL,
2623 				thread_ids + 5 + i + SRV_MAX_N_IO_THREADS);
2624 		}
2625 
2626 		srv_start_wait_for_purge_to_start();
2627 
2628 		srv_start_state_set(SRV_START_STATE_PURGE);
2629 	} else {
2630 		purge_sys->state = PURGE_STATE_DISABLED;
2631 	}
2632 
2633 	/* wake main loop of page cleaner up */
2634 	os_event_set(buf_flush_event);
2635 
2636 	sum_of_data_file_sizes = srv_sys_space.get_sum_of_sizes();
2637 	ut_a(sum_of_new_sizes != ULINT_UNDEFINED);
2638 
2639 	tablespace_size_in_header = fsp_header_get_tablespace_size();
2640 
2641 	if (!srv_read_only_mode
2642 	    && !srv_sys_space.can_auto_extend_last_file()
2643 	    && sum_of_data_file_sizes != tablespace_size_in_header) {
2644 
2645 		ib::error() << "Tablespace size stored in header is "
2646 			<< tablespace_size_in_header << " pages, but the sum"
2647 			" of data file sizes is " << sum_of_data_file_sizes
2648 			<< " pages";
2649 
2650 		if (srv_force_recovery == 0
2651 		    && sum_of_data_file_sizes < tablespace_size_in_header) {
2652 			/* This is a fatal error, the tail of a tablespace is
2653 			missing */
2654 
2655 			ib::error()
2656 				<< "Cannot start InnoDB."
2657 				" The tail of the system tablespace is"
2658 				" missing. Have you edited"
2659 				" innodb_data_file_path in my.cnf in an"
2660 				" inappropriate way, removing"
2661 				" ibdata files from there?"
2662 				" You can set innodb_force_recovery=1"
2663 				" in my.cnf to force"
2664 				" a startup if you are trying"
2665 				" to recover a badly corrupt database.";
2666 
2667 			return(srv_init_abort(DB_ERROR));
2668 		}
2669 	}
2670 
2671 	if (!srv_read_only_mode
2672 	    && srv_sys_space.can_auto_extend_last_file()
2673 	    && sum_of_data_file_sizes < tablespace_size_in_header) {
2674 
2675 #if 0
2676 		ib::error() << "Tablespace size stored in header is "
2677 			<< tablespace_size_in_header << " pages, but the sum"
2678 			" of data file sizes is only "
2679 			<< sum_of_data_file_sizes << " pages";
2680 
2681 		if (srv_force_recovery == 0) {
2682 
2683 			ib::error()
2684 				<< "Cannot start InnoDB. The tail of"
2685 				" the system tablespace is"
2686 				" missing. Have you edited"
2687 				" innodb_data_file_path in my.cnf in an"
2688 				" InnoDB: inappropriate way, removing"
2689 				" ibdata files from there?"
2690 				" You can set innodb_force_recovery=1"
2691 				" in my.cnf to force"
2692 				" InnoDB: a startup if you are trying to"
2693 				" recover a badly corrupt database.";
2694 
2695 			return(srv_init_abort(DB_ERROR));
2696 		}
2697 #endif
2698 	}
2699 
2700 	if (srv_rebuild_indexes) {
2701 		xb_compact_rebuild_indexes();
2702 	}
2703 
2704 	if (srv_print_verbose_log) {
2705 		ib::info() << INNODB_VERSION_STR
2706 			<< " started; log sequence number "
2707 			<< srv_start_lsn;
2708 	}
2709 
2710 	if (srv_force_recovery > 0) {
2711 		ib::info() << "!!! innodb_force_recovery is set to "
2712 			<< srv_force_recovery << " !!!";
2713 	}
2714 
2715 	if (srv_force_recovery == 0) {
2716 		/* In the insert buffer we may have even bigger tablespace
2717 		id's, because we may have dropped those tablespaces, but
2718 		insert buffer merge has not had time to clean the records from
2719 		the ibuf tree. */
2720 
2721 		ibuf_update_max_tablespace_id();
2722 	}
2723 
2724 	if (!srv_read_only_mode) {
2725 		if (create_new_db) {
2726 			srv_buffer_pool_load_at_startup = FALSE;
2727 		}
2728 
2729 		/* Don't create the buffer pool dump/load thread
2730 		for XtraBackup */
2731 #if 0
2732 		os_thread_create(buf_dump_thread, NULL, NULL);
2733 #endif
2734 
2735 		/* Create the dict stats gathering thread */
2736 		os_thread_create(dict_stats_thread, NULL, NULL);
2737 
2738 		/* Create the thread that will optimize the FTS sub-system. */
2739 		fts_optimize_init();
2740 
2741 		srv_start_state_set(SRV_START_STATE_STAT);
2742 	}
2743 
2744 skip_processes:
2745 	/* Create the buffer pool resize thread */
2746 	os_thread_create(buf_resize_thread, NULL, NULL);
2747 
2748 	srv_was_started = TRUE;
2749 	return(DB_SUCCESS);
2750 }
2751 
2752 #if 0
2753 /********************************************************************
2754 Sync all FTS cache before shutdown */
2755 static
2756 void
2757 srv_fts_close(void)
2758 /*===============*/
2759 {
2760 	dict_table_t*	table;
2761 
2762 	for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
2763 	     table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
2764 		fts_t*          fts = table->fts;
2765 
2766 		if (fts != NULL) {
2767 			fts_sync_table(table);
2768 		}
2769 	}
2770 
2771 	for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
2772 	     table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
2773 		fts_t*          fts = table->fts;
2774 
2775 		if (fts != NULL) {
2776 			fts_sync_table(table);
2777 		}
2778 	}
2779 }
2780 #endif
2781 
2782 /****************************************************************//**
2783 Shuts down the InnoDB database.
2784 @return DB_SUCCESS or error code */
2785 dberr_t
innobase_shutdown_for_mysql(void)2786 innobase_shutdown_for_mysql(void)
2787 /*=============================*/
2788 {
2789 	if (!srv_was_started) {
2790 		if (srv_is_being_started) {
2791 			ib::warn() << "Shutting down an improperly started,"
2792 				" or created database!";
2793 		}
2794 
2795 		return(DB_SUCCESS);
2796 	}
2797 
2798 	if (!srv_read_only_mode && !srv_apply_log_only) {
2799 		fts_optimize_shutdown();
2800 		dict_stats_shutdown();
2801 	}
2802 
2803 	/* 1. Flush the buffer pool to disk, write the current lsn to
2804 	the tablespace header(s), and copy all log data to archive.
2805 	The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
2806 	just free data structures after the shutdown. */
2807 
2808 	logs_empty_and_mark_files_at_shutdown();
2809 
2810 	if (srv_conc_get_active_threads() != 0) {
2811 		ib::warn() << "Query counter shows "
2812 			<< srv_conc_get_active_threads() << " queries still"
2813 			" inside InnoDB at shutdown";
2814 	}
2815 
2816 	/* 2. Make all threads created by InnoDB to exit */
2817 	srv_shutdown_all_bg_threads();
2818 
2819 
2820 	if (srv_monitor_file) {
2821 		fclose(srv_monitor_file);
2822 		srv_monitor_file = 0;
2823 		if (srv_monitor_file_name) {
2824 			unlink(srv_monitor_file_name);
2825 			ut_free(srv_monitor_file_name);
2826 		}
2827 	}
2828 
2829 	if (srv_dict_tmpfile) {
2830 		fclose(srv_dict_tmpfile);
2831 		srv_dict_tmpfile = 0;
2832 	}
2833 
2834 	if (srv_misc_tmpfile) {
2835 		fclose(srv_misc_tmpfile);
2836 		srv_misc_tmpfile = 0;
2837 	}
2838 
2839 	if (!srv_read_only_mode) {
2840 		dict_stats_thread_deinit();
2841 	}
2842 
2843 	/* This must be disabled before closing the buffer pool
2844 	and closing the data dictionary.  */
2845 	btr_search_disable(true);
2846 
2847 	ibuf_close();
2848 	log_shutdown();
2849 	trx_sys_file_format_close();
2850 	trx_sys_close();
2851 	lock_sys_close();
2852 
2853 	trx_pool_close();
2854 
2855 	/* We don't create these mutexes in RO mode because we don't create
2856 	the temp files that the cover. */
2857 	if (!srv_read_only_mode) {
2858 		mutex_free(&srv_monitor_file_mutex);
2859 		mutex_free(&srv_dict_tmpfile_mutex);
2860 		mutex_free(&srv_misc_tmpfile_mutex);
2861 	}
2862 
2863 	dict_close();
2864 	btr_search_sys_free();
2865 
2866 	/* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
2867 	them */
2868 	os_aio_free();
2869 	que_close();
2870 	row_mysql_close();
2871 	srv_free();
2872 	fil_close();
2873 
2874 	/* 4. Free all allocated memory */
2875 
2876 	pars_lexer_close();
2877 	log_mem_free();
2878 	buf_pool_free(srv_buf_pool_instances);
2879 
2880 	/* 6. Free the thread management resoruces. */
2881 	os_thread_free();
2882 
2883 	/* 7. Free the synchronisation infrastructure. */
2884 	sync_check_close();
2885 
2886 	if (dict_foreign_err_file) {
2887 		fclose(dict_foreign_err_file);
2888 	}
2889 
2890 	if (srv_print_verbose_log) {
2891 		ib::info() << "Shutdown completed; log sequence number "
2892 			<< srv_shutdown_lsn;
2893 	}
2894 
2895 	srv_was_started = FALSE;
2896 	srv_start_has_been_called = FALSE;
2897 
2898 	return(DB_SUCCESS);
2899 }
2900 #endif /* !UNIV_HOTBACKUP */
2901 
2902 
2903 /********************************************************************
2904 Signal all per-table background threads to shutdown, and wait for them to do
2905 so. */
2906 void
srv_shutdown_table_bg_threads(void)2907 srv_shutdown_table_bg_threads(void)
2908 /*===============================*/
2909 {
2910 	dict_table_t*	table;
2911 	dict_table_t*	first;
2912 	dict_table_t*	last = NULL;
2913 
2914 	mutex_enter(&dict_sys->mutex);
2915 
2916 	/* Signal all threads that they should stop. */
2917 	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
2918 	first = table;
2919 	while (table) {
2920 		dict_table_t*	next;
2921 		fts_t*		fts = table->fts;
2922 
2923 		if (fts != NULL) {
2924 			fts_start_shutdown(table, fts);
2925 		}
2926 
2927 		next = UT_LIST_GET_NEXT(table_LRU, table);
2928 
2929 		if (!next) {
2930 			last = table;
2931 		}
2932 
2933 		table = next;
2934 	}
2935 
2936 	/* We must release dict_sys->mutex here; if we hold on to it in the
2937 	loop below, we will deadlock if any of the background threads try to
2938 	acquire it (for example, the FTS thread by calling que_eval_sql).
2939 
2940 	Releasing it here and going through dict_sys->table_LRU without
2941 	holding it is safe because:
2942 
2943 	 a) MySQL only starts the shutdown procedure after all client
2944 	 threads have been disconnected and no new ones are accepted, so no
2945 	 new tables are added or old ones dropped.
2946 
2947 	 b) Despite its name, the list is not LRU, and the order stays
2948 	 fixed.
2949 
2950 	To safeguard against the above assumptions ever changing, we store
2951 	the first and last items in the list above, and then check that
2952 	they've stayed the same below. */
2953 
2954 	mutex_exit(&dict_sys->mutex);
2955 
2956 	/* Wait for the threads of each table to stop. This is not inside
2957 	the above loop, because by signaling all the threads first we can
2958 	overlap their shutting down delays. */
2959 	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
2960 	ut_a(first == table);
2961 	while (table) {
2962 		dict_table_t*	next;
2963 		fts_t*		fts = table->fts;
2964 
2965 		if (fts != NULL) {
2966 			fts_shutdown(table, fts);
2967 		}
2968 
2969 		next = UT_LIST_GET_NEXT(table_LRU, table);
2970 
2971 		if (table == last) {
2972 			ut_a(!next);
2973 		}
2974 
2975 		table = next;
2976 	}
2977 }
2978 
2979 /** Get the meta-data filename from the table name for a
2980 single-table tablespace.
2981 @param[in]	table		table object
2982 @param[out]	filename	filename
2983 @param[in]	max_len		filename max length */
2984 void
srv_get_meta_data_filename(dict_table_t * table,char * filename,ulint max_len)2985 srv_get_meta_data_filename(
2986 	dict_table_t*	table,
2987 	char*		filename,
2988 	ulint		max_len)
2989 {
2990 	ulint		len;
2991 	char*		path;
2992 
2993 	/* Make sure the data_dir_path is set. */
2994 	dict_get_and_save_data_dir_path(table, false);
2995 
2996 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
2997 		ut_a(table->data_dir_path);
2998 
2999 		path = fil_make_filepath(
3000 			table->data_dir_path, table->name.m_name, CFG, true);
3001 	} else {
3002 		path = fil_make_filepath(NULL, table->name.m_name, CFG, false);
3003 	}
3004 
3005 	ut_a(path);
3006 	len = ut_strlen(path);
3007 	ut_a(max_len >= len);
3008 
3009 	strcpy(filename, path);
3010 
3011 	ut_free(path);
3012 }
3013 
3014 /** Get the encryption-data filename from the table name for a
3015 single-table tablespace.
3016 @param[in]	table		table object
3017 @param[out]	filename	filename
3018 @param[in]	max_len		filename max length */
3019 void
srv_get_encryption_data_filename(dict_table_t * table,char * filename,ulint max_len)3020 srv_get_encryption_data_filename(
3021 	dict_table_t*	table,
3022 	char*		filename,
3023 	ulint		max_len)
3024 {
3025 	ulint		len;
3026 	char*		path;
3027 
3028 	/* Make sure the data_dir_path is set. */
3029 	dict_get_and_save_data_dir_path(table, false);
3030 
3031 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
3032 		ut_a(table->data_dir_path);
3033 
3034 		path = fil_make_filepath(
3035 			table->data_dir_path, table->name.m_name, CFP, true);
3036 	} else {
3037 		path = fil_make_filepath(NULL, table->name.m_name, CFP, false);
3038 	}
3039 
3040 	ut_a(path);
3041 	len = ut_strlen(path);
3042 	ut_a(max_len >= len);
3043 
3044 	strcpy(filename, path);
3045 
3046 	ut_free(path);
3047 }
3048