1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved.
4 Copyright (c) 2008, Google Inc.
5 Copyright (c) 2009, Percona Inc.
6 
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12 
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19 
20 This program is free software; you can redistribute it and/or modify
21 it under the terms of the GNU General Public License, version 2.0,
22 as published by the Free Software Foundation.
23 
24 This program is also distributed with certain software (including
25 but not limited to OpenSSL) that is licensed under separate terms,
26 as designated in a particular file or component or in included license
27 documentation.  The authors of MySQL hereby grant you an additional
28 permission to link the program and your derivative works with the
29 separately licensed software that they have included with MySQL.
30 
31 This program is distributed in the hope that it will be useful,
32 but WITHOUT ANY WARRANTY; without even the implied warranty of
33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34 GNU General Public License, version 2.0, for more details.
35 
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
39 
40 *****************************************************************************/
41 
42 /********************************************************************//**
43 @file srv/srv0start.cc
44 Starts the InnoDB database server
45 
46 Created 2/16/1996 Heikki Tuuri
47 *************************************************************************/
48 
49 #include "mysqld.h"
50 #include "pars0pars.h"
51 #include "row0ftsort.h"
52 #include "ut0mem.h"
53 #include "mem0mem.h"
54 #include "data0data.h"
55 #include "data0type.h"
56 #include "dict0dict.h"
57 #include "buf0buf.h"
58 #include "buf0dump.h"
59 #include "os0file.h"
60 #include "os0thread.h"
61 #include "fil0fil.h"
62 #include "fsp0fsp.h"
63 #include "rem0rec.h"
64 #include "mtr0mtr.h"
65 #include "log0log.h"
66 #include "log0online.h"
67 #include "log0recv.h"
68 #include "page0page.h"
69 #include "page0cur.h"
70 #include "trx0trx.h"
71 #include "trx0sys.h"
72 #include "btr0btr.h"
73 #include "btr0cur.h"
74 #include "rem0rec.h"
75 #include "ibuf0ibuf.h"
76 #include "srv0start.h"
77 #include "srv0srv.h"
78 #ifndef UNIV_HOTBACKUP
79 # include "trx0rseg.h"
80 # include "os0proc.h"
81 # include "sync0sync.h"
82 # include "buf0flu.h"
83 # include "buf0rea.h"
84 # include "dict0boot.h"
85 # include "dict0load.h"
86 # include "dict0stats_bg.h"
87 # include "que0que.h"
88 # include "usr0sess.h"
89 # include "lock0lock.h"
90 # include "trx0roll.h"
91 # include "trx0purge.h"
92 # include "lock0lock.h"
93 # include "pars0pars.h"
94 # include "btr0sea.h"
95 # include "rem0cmp.h"
96 # include "dict0crea.h"
97 # include "row0ins.h"
98 # include "row0sel.h"
99 # include "row0upd.h"
100 # include "row0row.h"
101 # include "row0mysql.h"
102 # include "btr0pcur.h"
103 # include "os0sync.h"
104 # include "zlib.h"
105 # include "ut0crc32.h"
106 
107 /** Log sequence number immediately after startup */
108 UNIV_INTERN lsn_t	srv_start_lsn;
109 /** Log sequence number at shutdown */
110 UNIV_INTERN lsn_t	srv_shutdown_lsn;
111 
112 #ifdef HAVE_DARWIN_THREADS
113 # include <sys/utsname.h>
114 /** TRUE if the F_FULLFSYNC option is available */
115 UNIV_INTERN ibool	srv_have_fullfsync = FALSE;
116 #endif
117 
118 /** TRUE if a raw partition is in use */
119 UNIV_INTERN ibool	srv_start_raw_disk_in_use = FALSE;
120 
121 /** UNDO tablespaces starts with space id. */
122 ulint	srv_undo_space_id_start;
123 
124 /** TRUE if the server is being started, before rolling back any
125 incomplete transactions */
126 UNIV_INTERN ibool	srv_startup_is_before_trx_rollback_phase = FALSE;
127 /** TRUE if the server is being started */
128 UNIV_INTERN ibool	srv_is_being_started = FALSE;
129 /** TRUE if the server was successfully started */
130 UNIV_INTERN ibool	srv_was_started = FALSE;
131 /** TRUE if innobase_start_or_create_for_mysql() has been called */
132 static ibool		srv_start_has_been_called = FALSE;
133 
134 /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
135 SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
136 UNIV_INTERN enum srv_shutdown_state	srv_shutdown_state = SRV_SHUTDOWN_NONE;
137 
138 /** Files comprising the system tablespace */
139 static pfs_os_file_t	files[1000];
140 
141 /** io_handler_thread parameters for thread identification */
142 static ulint		n[SRV_MAX_N_IO_THREADS];
143 /** io_handler_thread identifiers, 32 is the maximum number of purge threads.
144 The extra elements at the end are allocated as follows:
145 SRV_MAX_N_IO_THREADS + 1: srv_master_thread
146 SRV_MAX_N_IO_THREADS + 2: lock_wait_timeout_thread
147 SRV_MAX_N_IO_THREADS + 3: srv_error_monitor_thread
148 SRV_MAX_N_IO_THREADS + 4: srv_monitor_thread
149 SRV_MAX_N_IO_THREADS + 5: srv_redo_log_follow_thread
150 SRV_MAX_N_IO_THREADS + 6: srv_purge_coordinator_thread
151 SRV_MAX_N_IO_THREADS + 7: srv_worker_thread
152 ...
153 SRV_MAX_N_IO_THREADS + 7 + srv_n_purge_threads - 1: srv_worker_thread */
154 static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 7
155 				   + SRV_MAX_N_PURGE_THREADS];
156 
157 /** We use this mutex to test the return value of pthread_mutex_trylock
158    on successful locking. HP-UX does NOT return 0, though Linux et al do. */
159 static os_fast_mutex_t	srv_os_test_mutex;
160 
161 /** Name of srv_monitor_file */
162 static char*	srv_monitor_file_name;
163 #endif /* !UNIV_HOTBACKUP */
164 
165 /** Default undo tablespace size in UNIV_PAGEs count (10MB). */
166 static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
167 	((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
168 
169 /** */
170 #define SRV_N_PENDING_IOS_PER_THREAD	OS_AIO_N_PENDING_IOS_PER_THREAD
171 #define SRV_MAX_N_PENDING_SYNC_IOS	100
172 
173 /** The round off to MB is similar as done in srv_parse_megabytes() */
174 #define CALC_NUMBER_OF_PAGES(size)  ((size) / (1024 * 1024)) * \
175 				  ((1024 * 1024) / (UNIV_PAGE_SIZE))
176 #ifdef UNIV_PFS_THREAD
177 /* Keys to register InnoDB threads with performance schema */
178 UNIV_INTERN mysql_pfs_key_t	io_handler_thread_key;
179 UNIV_INTERN mysql_pfs_key_t	srv_lock_timeout_thread_key;
180 UNIV_INTERN mysql_pfs_key_t	srv_error_monitor_thread_key;
181 UNIV_INTERN mysql_pfs_key_t	srv_monitor_thread_key;
182 UNIV_INTERN mysql_pfs_key_t	srv_master_thread_key;
183 UNIV_INTERN mysql_pfs_key_t	srv_purge_thread_key;
184 UNIV_INTERN mysql_pfs_key_t	srv_log_tracking_thread_key;
185 #endif /* UNIV_PFS_THREAD */
186 
187 /*********************************************************************//**
188 Convert a numeric string that optionally ends in G or M or K, to a number
189 containing megabytes.
190 @return	next character in string */
191 static
192 char*
srv_parse_megabytes(char * str,ulint * megs)193 srv_parse_megabytes(
194 /*================*/
195 	char*	str,	/*!< in: string containing a quantity in bytes */
196 	ulint*	megs)	/*!< out: the number in megabytes */
197 {
198 	char*	endp;
199 	ulint	size;
200 
201 	size = strtoul(str, &endp, 10);
202 
203 	str = endp;
204 
205 	switch (*str) {
206 	case 'G': case 'g':
207 		size *= 1024;
208 		/* fall through */
209 	case 'M': case 'm':
210 		str++;
211 		break;
212 	case 'K': case 'k':
213 		size /= 1024;
214 		str++;
215 		break;
216 	default:
217 		size /= 1024 * 1024;
218 		break;
219 	}
220 
221 	*megs = size;
222 	return(str);
223 }
224 
225 /*********************************************************************//**
226 Check if a file can be opened in read-write mode.
227 @return	true if it doesn't exist or can be opened in rw mode. */
228 static
229 bool
srv_file_check_mode(const char * name)230 srv_file_check_mode(
231 /*================*/
232 	const char*	name)		/*!< in: filename to check */
233 {
234 	os_file_stat_t	stat;
235 
236 	memset(&stat, 0x0, sizeof(stat));
237 
238 	dberr_t		err = os_file_get_status(name, &stat, true);
239 
240 	if (err == DB_FAIL) {
241 
242 		ib_logf(IB_LOG_LEVEL_ERROR,
243 			"os_file_get_status() failed on '%s'. Can't determine "
244 			"file permissions", name);
245 
246 		return(false);
247 
248 	} else if (err == DB_SUCCESS) {
249 
250 		/* Note: stat.rw_perm is only valid of files */
251 
252 		if (stat.type == OS_FILE_TYPE_FILE) {
253 
254 			if (!stat.rw_perm) {
255 
256 				ib_logf(IB_LOG_LEVEL_ERROR,
257 					"%s can't be opened in %s mode",
258 					name,
259 					srv_read_only_mode
260 					? "read" : "read-write");
261 
262 				return(false);
263 			}
264 		} else {
265 			/* Not a regular file, bail out. */
266 
267 			ib_logf(IB_LOG_LEVEL_ERROR,
268 				"'%s' not a regular file.", name);
269 
270 			return(false);
271 		}
272 	} else {
273 
274 		/* This is OK. If the file create fails on RO media, there
275 		is nothing we can do. */
276 
277 		ut_a(err == DB_NOT_FOUND);
278 	}
279 
280 	return(true);
281 }
282 
283 /*********************************************************************//**
284 Reads the data files and their sizes from a character string given in
285 the .cnf file.
286 @return	TRUE if ok, FALSE on parse error */
287 UNIV_INTERN
288 ibool
srv_parse_data_file_paths_and_sizes(char * str)289 srv_parse_data_file_paths_and_sizes(
290 /*================================*/
291 	char*	str)	/*!< in/out: the data file path string */
292 {
293 	char*	input_str;
294 	char*	path;
295 	ulint	size;
296 	ulint	i	= 0;
297 
298 	srv_auto_extend_last_data_file = FALSE;
299 	srv_last_file_size_max = 0;
300 	srv_data_file_names = NULL;
301 	srv_data_file_sizes = NULL;
302 	srv_data_file_is_raw_partition = NULL;
303 
304 	input_str = str;
305 
306 	/* First calculate the number of data files and check syntax:
307 	path:size[M | G];path:size[M | G]... . Note that a Windows path may
308 	contain a drive name and a ':'. */
309 
310 	while (*str != '\0') {
311 		path = str;
312 
313 		while ((*str != ':' && *str != '\0')
314 		       || (*str == ':'
315 			   && (*(str + 1) == '\\' || *(str + 1) == '/'
316 			       || *(str + 1) == ':'))) {
317 			str++;
318 		}
319 
320 		if (*str == '\0') {
321 			return(FALSE);
322 		}
323 
324 		str++;
325 
326 		str = srv_parse_megabytes(str, &size);
327 
328 		if (0 == strncmp(str, ":autoextend",
329 				 (sizeof ":autoextend") - 1)) {
330 
331 			str += (sizeof ":autoextend") - 1;
332 
333 			if (0 == strncmp(str, ":max:",
334 					 (sizeof ":max:") - 1)) {
335 
336 				str += (sizeof ":max:") - 1;
337 
338 				str = srv_parse_megabytes(str, &size);
339 			}
340 
341 			if (*str != '\0') {
342 
343 				return(FALSE);
344 			}
345 		}
346 
347 		if (strlen(str) >= 6
348 		    && *str == 'n'
349 		    && *(str + 1) == 'e'
350 		    && *(str + 2) == 'w') {
351 			str += 3;
352 		}
353 
354 		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
355 			str += 3;
356 		}
357 
358 		if (size == 0) {
359 			return(FALSE);
360 		}
361 
362 		i++;
363 
364 		if (*str == ';') {
365 			str++;
366 		} else if (*str != '\0') {
367 
368 			return(FALSE);
369 		}
370 	}
371 
372 	if (i == 0) {
373 		/* If innodb_data_file_path was defined it must contain
374 		at least one data file definition */
375 
376 		return(FALSE);
377 	}
378 
379 	srv_data_file_names = static_cast<char**>(
380 		malloc(i * sizeof *srv_data_file_names));
381 
382 	srv_data_file_sizes = static_cast<ulint*>(
383 		malloc(i * sizeof *srv_data_file_sizes));
384 
385 	srv_data_file_is_raw_partition = static_cast<ulint*>(
386 		malloc(i * sizeof *srv_data_file_is_raw_partition));
387 
388 	srv_n_data_files = i;
389 
390 	/* Then store the actual values to our arrays */
391 
392 	str = input_str;
393 	i = 0;
394 
395 	while (*str != '\0') {
396 		path = str;
397 
398 		/* Note that we must step over the ':' in a Windows path;
399 		a Windows path normally looks like C:\ibdata\ibdata1:1G, but
400 		a Windows raw partition may have a specification like
401 		\\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
402 
403 		while ((*str != ':' && *str != '\0')
404 		       || (*str == ':'
405 			   && (*(str + 1) == '\\' || *(str + 1) == '/'
406 			       || *(str + 1) == ':'))) {
407 			str++;
408 		}
409 
410 		if (*str == ':') {
411 			/* Make path a null-terminated string */
412 			*str = '\0';
413 			str++;
414 		}
415 
416 		str = srv_parse_megabytes(str, &size);
417 
418 		srv_data_file_names[i] = path;
419 		srv_data_file_sizes[i] = size;
420 
421 		if (0 == strncmp(str, ":autoextend",
422 				 (sizeof ":autoextend") - 1)) {
423 
424 			srv_auto_extend_last_data_file = TRUE;
425 
426 			str += (sizeof ":autoextend") - 1;
427 
428 			if (0 == strncmp(str, ":max:",
429 					 (sizeof ":max:") - 1)) {
430 
431 				str += (sizeof ":max:") - 1;
432 
433 				str = srv_parse_megabytes(
434 					str, &srv_last_file_size_max);
435 			}
436 
437 			if (*str != '\0') {
438 
439 				return(FALSE);
440 			}
441 		}
442 
443 		(srv_data_file_is_raw_partition)[i] = 0;
444 
445 		if (strlen(str) >= 6
446 		    && *str == 'n'
447 		    && *(str + 1) == 'e'
448 		    && *(str + 2) == 'w') {
449 			str += 3;
450 			/* Initialize new raw device only during bootstrap */
451 			(srv_data_file_is_raw_partition)[i] =
452 			opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
453 		}
454 
455 		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
456 			str += 3;
457 
458 			/* Initialize new raw device only during bootstrap */
459 			if ((srv_data_file_is_raw_partition)[i] == 0) {
460 				(srv_data_file_is_raw_partition)[i] =
461 				opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
462 			}
463 		}
464 
465 		i++;
466 
467 		if (*str == ';') {
468 			str++;
469 		}
470 	}
471 
472 	return(TRUE);
473 }
474 
475 /*********************************************************************//**
476 Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
477 and srv_parse_log_group_home_dirs(). */
478 UNIV_INTERN
479 void
srv_free_paths_and_sizes(void)480 srv_free_paths_and_sizes(void)
481 /*==========================*/
482 {
483 	free(srv_data_file_names);
484 	srv_data_file_names = NULL;
485 	free(srv_data_file_sizes);
486 	srv_data_file_sizes = NULL;
487 	free(srv_data_file_is_raw_partition);
488 	srv_data_file_is_raw_partition = NULL;
489 }
490 
491 #ifndef UNIV_HOTBACKUP
492 
493 static ulint io_tid_i = 0;
494 
495 /********************************************************************//**
496 I/o-handler thread function.
497 @return	OS_THREAD_DUMMY_RETURN */
498 extern "C" UNIV_INTERN
499 os_thread_ret_t
DECLARE_THREAD(io_handler_thread)500 DECLARE_THREAD(io_handler_thread)(
501 /*==============================*/
502 	void*	arg)	/*!< in: pointer to the number of the segment in
503 			the aio array */
504 {
505 	ulint	segment;
506 	ulint	tid_i = os_atomic_increment_ulint(&io_tid_i, 1) - 1;
507 
508 	ut_ad(tid_i < srv_n_file_io_threads);
509 
510 	segment = *((ulint*) arg);
511 
512 	srv_io_tids[tid_i] = os_thread_get_tid();
513 	os_thread_set_priority(srv_io_tids[tid_i], srv_sched_priority_io);
514 
515 #ifdef UNIV_DEBUG_THREAD_CREATION
516 	fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment,
517 		os_thread_pf(os_thread_get_curr_id()));
518 #endif
519 
520 #ifdef UNIV_PFS_THREAD
521 	pfs_register_thread(io_handler_thread_key);
522 #endif /* UNIV_PFS_THREAD */
523 
524 	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
525 		srv_current_thread_priority = srv_io_thread_priority;
526 		fil_aio_wait(segment);
527 	}
528 
529 	/* We count the number of threads in os_thread_exit(). A created
530 	thread should always use that to exit and not use return() to exit.
531 	The thread actually never comes here because it is exited in an
532 	os_event_wait(). */
533 
534 	os_thread_exit(NULL);
535 
536 	OS_THREAD_DUMMY_RETURN;
537 }
538 #endif /* !UNIV_HOTBACKUP */
539 
540 /*********************************************************************//**
541 Normalizes a directory path for Windows: converts slashes to backslashes. */
542 UNIV_INTERN
543 void
srv_normalize_path_for_win(char * str MY_ATTRIBUTE ((unused)))544 srv_normalize_path_for_win(
545 /*=======================*/
546 	char*	str MY_ATTRIBUTE((unused)))	/*!< in/out: null-terminated
547 						character string */
548 {
549 #ifdef __WIN__
550 	for (; *str; str++) {
551 
552 		if (*str == '/') {
553 			*str = '\\';
554 		}
555 	}
556 #endif
557 }
558 
559 #ifndef UNIV_HOTBACKUP
560 /*********************************************************************//**
561 Creates a log file.
562 @return	DB_SUCCESS or error code */
563 static MY_ATTRIBUTE((nonnull, warn_unused_result))
564 dberr_t
create_log_file(pfs_os_file_t * file,const char * name)565 create_log_file(
566 /*============*/
567 	pfs_os_file_t*	file,	/*!< out: file handle */
568 	const char*	name)	/*!< in: log file name */
569 {
570 	ibool		ret;
571 
572 	*file = os_file_create(
573 		innodb_file_log_key, name,
574 		OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
575 		OS_LOG_FILE, &ret);
576 
577 	if (!ret) {
578 		ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
579 		return(DB_ERROR);
580 	}
581 
582 	ib_logf(IB_LOG_LEVEL_INFO,
583 		"Setting log file %s size to %lu MB",
584 		name, (ulong) srv_log_file_size
585 		>> (20 - UNIV_PAGE_SIZE_SHIFT));
586 
587 	ret = os_file_set_size(name, *file,
588 			       (os_offset_t) srv_log_file_size
589 			       << UNIV_PAGE_SIZE_SHIFT);
590 	if (!ret) {
591 		ib_logf(IB_LOG_LEVEL_ERROR, "Cannot set log file"
592 			" %s to size %lu MB", name, (ulong) srv_log_file_size
593 			>> (20 - UNIV_PAGE_SIZE_SHIFT));
594 		return(DB_ERROR);
595 	}
596 
597 	ret = os_file_close(*file);
598 	ut_a(ret);
599 
600 	return(DB_SUCCESS);
601 }
602 
603 /** Initial number of the first redo log file */
604 #define INIT_LOG_FILE0	(SRV_N_LOG_FILES_MAX + 1)
605 
606 #ifdef DBUG_OFF
607 # define RECOVERY_CRASH(x) do {} while(0)
608 #else
609 # define RECOVERY_CRASH(x) do {						\
610 	if (srv_force_recovery_crash == x) {				\
611 		fprintf(stderr, "innodb_force_recovery_crash=%lu\n",	\
612 			srv_force_recovery_crash);			\
613 		fflush(stderr);						\
614 		exit(3);						\
615 	}								\
616 } while (0)
617 #endif
618 
619 /*********************************************************************//**
620 Creates all log files.
621 @return	DB_SUCCESS or error code */
622 static
623 dberr_t
create_log_files(bool create_new_db,char * logfilename,size_t dirnamelen,lsn_t lsn,char * & logfile0)624 create_log_files(
625 /*=============*/
626 	bool	create_new_db,	/*!< in: TRUE if new database is being
627 				created */
628 	char*	logfilename,	/*!< in/out: buffer for log file name */
629 	size_t	dirnamelen,	/*!< in: length of the directory path */
630 	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
631 	char*&	logfile0)	/*!< out: name of the first log file */
632 {
633 	if (srv_read_only_mode) {
634 		ib_logf(IB_LOG_LEVEL_ERROR,
635 			"Cannot create log files in read-only mode");
636 		return(DB_READ_ONLY);
637 	}
638 
639 	/* We prevent system tablespace creation with existing files in
640 	data directory. So we do not delete log files when creating new system
641 	tablespace */
642 	if (!create_new_db) {
643 		/* Remove any old log files. */
644 		for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
645 			sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
646 
647 			/* Ignore errors about non-existent files or files
648 			that cannot be removed. The create_log_file() will
649 			return an error when the file exists. */
650 #ifdef __WIN__
651 			DeleteFile((LPCTSTR) logfilename);
652 #else
653 			unlink(logfilename);
654 #endif
655 			/* Crashing after deleting the first
656 			file should be recoverable. The buffer
657 			pool was clean, and we can simply create
658 			all log files from the scratch. */
659 			RECOVERY_CRASH(6);
660 		}
661 	}
662 
663 	ut_ad(!buf_pool_check_no_pending_io());
664 
665 	RECOVERY_CRASH(7);
666 
667 	for (unsigned i = 0; i < srv_n_log_files; i++) {
668 		sprintf(logfilename + dirnamelen,
669 			"ib_logfile%u", i ? i : INIT_LOG_FILE0);
670 
671 		dberr_t err = create_log_file(&files[i], logfilename);
672 
673 		if (err != DB_SUCCESS) {
674 			return(err);
675 		}
676 	}
677 
678 	RECOVERY_CRASH(8);
679 
680 	/* We did not create the first log file initially as
681 	ib_logfile0, so that crash recovery cannot find it until it
682 	has been completed and renamed. */
683 	sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
684 
685 	fil_space_create(
686 		logfilename, SRV_LOG_SPACE_FIRST_ID,
687 		fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
688 		FIL_LOG);
689 	ut_a(fil_validate());
690 
691 	logfile0 = fil_node_create(
692 		logfilename, (ulint) srv_log_file_size,
693 		SRV_LOG_SPACE_FIRST_ID, FALSE);
694 	ut_a(logfile0);
695 
696 	for (unsigned i = 1; i < srv_n_log_files; i++) {
697 		sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
698 
699 		if (!fil_node_create(
700 			    logfilename,
701 			    (ulint) srv_log_file_size,
702 			    SRV_LOG_SPACE_FIRST_ID, FALSE)) {
703 			ut_error;
704 		}
705 	}
706 
707 #ifdef UNIV_LOG_ARCHIVE
708 	/* Create the file space object for archived logs. */
709 	fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
710 			 0, FIL_LOG);
711 #endif
712 	log_group_init(0, srv_n_log_files,
713 		       srv_log_file_size * UNIV_PAGE_SIZE,
714 		       SRV_LOG_SPACE_FIRST_ID,
715 		       SRV_LOG_SPACE_FIRST_ID + 1);
716 
717 	fil_open_log_and_system_tablespace_files();
718 
719 	/* Create a log checkpoint. */
720 	mutex_enter(&log_sys->mutex);
721 	ut_d(recv_no_log_write = FALSE);
722 	recv_reset_logs(
723 #ifdef UNIV_LOG_ARCHIVE
724 		UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no,
725 		TRUE,
726 #endif
727 		lsn);
728 	mutex_exit(&log_sys->mutex);
729 
730 	return(DB_SUCCESS);
731 }
732 
733 /*********************************************************************//**
734 Renames the first log file. */
735 static
736 void
create_log_files_rename(char * logfilename,size_t dirnamelen,lsn_t lsn,char * logfile0)737 create_log_files_rename(
738 /*====================*/
739 	char*	logfilename,	/*!< in/out: buffer for log file name */
740 	size_t	dirnamelen,	/*!< in: length of the directory path */
741 	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
742 	char*	logfile0)	/*!< in/out: name of the first log file */
743 {
744 	/* If innodb_flush_method=O_DSYNC,
745 	we need to explicitly flush the log buffers. */
746 	fil_flush(SRV_LOG_SPACE_FIRST_ID);
747 	/* Close the log files, so that we can rename
748 	the first one. */
749 	fil_close_log_files(false);
750 
751 	/* Rename the first log file, now that a log
752 	checkpoint has been created. */
753 	sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
754 
755 	RECOVERY_CRASH(9);
756 
757 	ib_logf(IB_LOG_LEVEL_INFO,
758 		"Renaming log file %s to %s", logfile0, logfilename);
759 
760 	mutex_enter(&log_sys->mutex);
761 	ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
762 	ibool success = os_file_rename(
763 		innodb_file_log_key, logfile0, logfilename);
764 	ut_a(success);
765 
766 	RECOVERY_CRASH(10);
767 
768 	/* Replace the first file with ib_logfile0. */
769 	strcpy(logfile0, logfilename);
770 	mutex_exit(&log_sys->mutex);
771 
772 	fil_open_log_and_system_tablespace_files();
773 
774 	ib_logf(IB_LOG_LEVEL_WARN, "New log files created, LSN=" LSN_PF, lsn);
775 }
776 
777 /*********************************************************************//**
778 Opens a log file.
779 @return	DB_SUCCESS or error code */
780 static MY_ATTRIBUTE((nonnull, warn_unused_result))
781 dberr_t
open_log_file(pfs_os_file_t * file,const char * name,os_offset_t * size)782 open_log_file(
783 /*==========*/
784 	pfs_os_file_t*	file,	/*!< out: file handle */
785 	const char*	name,	/*!< in: log file name */
786 	os_offset_t*	size)	/*!< out: file size */
787 {
788 	ibool	ret;
789 
790 	*file = os_file_create(innodb_file_log_key, name,
791 			       OS_FILE_OPEN, OS_FILE_AIO,
792 			       OS_LOG_FILE, &ret);
793 	if (!ret) {
794 		ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
795 		return(DB_ERROR);
796 	}
797 
798 	*size = os_file_get_size(*file);
799 
800 	ret = os_file_close(*file);
801 	ut_a(ret);
802 	return(DB_SUCCESS);
803 }
804 
805 /*********************************************************************//**
806 Creates or opens database data files and closes them.
807 @return	DB_SUCCESS or error code */
808 static MY_ATTRIBUTE((nonnull, warn_unused_result))
809 dberr_t
open_or_create_data_files(ibool * create_new_db,lsn_t * min_arch_log_no,lsn_t * max_arch_log_no,lsn_t * min_flushed_lsn,lsn_t * max_flushed_lsn,ulint * sum_of_new_sizes)810 open_or_create_data_files(
811 /*======================*/
812 	ibool*		create_new_db,	/*!< out: TRUE if new database should be
813 					created */
814 #ifdef UNIV_LOG_ARCHIVE
815 	lsn_t*		min_arch_log_no,/*!< out: min of archived log
816 					numbers in data files */
817 	lsn_t*		max_arch_log_no,/*!< out: max of archived log
818 					numbers in data files */
819 #endif /* UNIV_LOG_ARCHIVE */
820 	lsn_t*		min_flushed_lsn,/*!< out: min of flushed lsn
821 					values in data files */
822 	lsn_t*		max_flushed_lsn,/*!< out: max of flushed lsn
823 					values in data files */
824 	ulint*		sum_of_new_sizes)/*!< out: sum of sizes of the
825 					new files added */
826 {
827 	ibool		ret;
828 	ulint		i;
829 	ibool		one_opened	= FALSE;
830 	ibool		one_created	= FALSE;
831 	os_offset_t	size;
832 	ulint		flags;
833 	ulint		space;
834 	ulint		rounded_size_pages;
835 	char		name[10000];
836 
837 	if (srv_n_data_files >= 1000) {
838 
839 		ib_logf(IB_LOG_LEVEL_ERROR,
840 			"Can only have < 1000 data files, you have "
841 			"defined %lu", (ulong) srv_n_data_files);
842 
843 		return(DB_ERROR);
844 	}
845 
846 	*sum_of_new_sizes = 0;
847 
848 	*create_new_db = FALSE;
849 
850 	srv_normalize_path_for_win(srv_data_home);
851 
852 	for (i = 0; i < srv_n_data_files; i++) {
853 		ulint	dirnamelen;
854 
855 		srv_normalize_path_for_win(srv_data_file_names[i]);
856 		dirnamelen = strlen(srv_data_home);
857 
858 		ut_a(dirnamelen + strlen(srv_data_file_names[i])
859 		     < (sizeof name) - 1);
860 
861 		memcpy(name, srv_data_home, dirnamelen);
862 
863 		/* Add a path separator if needed. */
864 		if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
865 			name[dirnamelen++] = SRV_PATH_SEPARATOR;
866 		}
867 
868 		strcpy(name + dirnamelen, srv_data_file_names[i]);
869 
870 		/* Note: It will return true if the file doesn' exist. */
871 
872 		if (!srv_file_check_mode(name)) {
873 
874 			return(DB_FAIL);
875 
876 		} else if (srv_data_file_is_raw_partition[i] == 0) {
877 
878 			/* First we try to create the file: if it already
879 			exists, ret will get value FALSE */
880 
881 			files[i] = os_file_create(
882 				innodb_file_data_key, name, OS_FILE_CREATE,
883 				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
884 
885 			if (srv_read_only_mode) {
886 
887 				if (ret) {
888 					goto size_check;
889 				}
890 
891 				ib_logf(IB_LOG_LEVEL_ERROR,
892 					"Opening %s failed!", name);
893 
894 				return(DB_ERROR);
895 
896 			} else if (!ret
897 				   && os_file_get_last_error(false)
898 				   != OS_FILE_ALREADY_EXISTS
899 #ifdef UNIV_AIX
900 				   /* AIX 5.1 after security patch ML7 may have
901 			           errno set to 0 here, which causes our
902 				   function to return 100; work around that
903 				   AIX problem */
904 				   && os_file_get_last_error(false) != 100
905 #endif /* UNIV_AIX */
906 			    ) {
907 				ib_logf(IB_LOG_LEVEL_ERROR,
908 					"Creating or opening %s failed!",
909 					name);
910 
911 				return(DB_ERROR);
912 			}
913 
914 		} else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
915 
916 			ut_a(!srv_read_only_mode);
917 
918 			/* The partition is opened, not created; then it is
919 			written over */
920 
921 			srv_start_raw_disk_in_use = TRUE;
922 			srv_created_new_raw = TRUE;
923 
924 			files[i] = os_file_create(
925 				innodb_file_data_key, name, OS_FILE_OPEN_RAW,
926 				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
927 
928 			if (!ret) {
929 				ib_logf(IB_LOG_LEVEL_ERROR,
930 					"Error in opening %s", name);
931 
932 				return(DB_ERROR);
933 			}
934 
935 			const char*	check_msg;
936 			check_msg = fil_read_first_page(
937 				files[i], FALSE, &flags, &space,
938 				min_flushed_lsn, max_flushed_lsn);
939 
940 			/* If first page is valid, don't overwrite DB.
941 			It prevents overwriting DB when mysql_install_db
942 			starts mysqld multiple times during bootstrap. */
943 			if (check_msg == NULL) {
944 
945 				srv_created_new_raw = FALSE;
946 				ret = FALSE;
947 			}
948 
949 		} else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
950 			srv_start_raw_disk_in_use = TRUE;
951 
952 			ret = FALSE;
953 		} else {
954 			ut_a(0);
955 		}
956 
957 		if (ret == FALSE) {
958 			const char* check_msg;
959 			/* We open the data file */
960 
961 			if (one_created) {
962 				ib_logf(IB_LOG_LEVEL_ERROR,
963 					"Data files can only be added at "
964 					"the end of a tablespace, but "
965 					"data file %s existed beforehand.",
966 					name);
967 				return(DB_ERROR);
968 			}
969 			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
970 				ut_a(!srv_read_only_mode);
971 				files[i] = os_file_create(
972 					innodb_file_data_key,
973 					name, OS_FILE_OPEN_RAW,
974 					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
975 			} else if (i == 0) {
976 				files[i] = os_file_create(
977 					innodb_file_data_key,
978 					name, OS_FILE_OPEN_RETRY,
979 					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
980 			} else {
981 				files[i] = os_file_create(
982 					innodb_file_data_key,
983 					name, OS_FILE_OPEN, OS_FILE_NORMAL,
984 					OS_DATA_FILE, &ret);
985 			}
986 
987 			if (!ret) {
988 
989 				os_file_get_last_error(true);
990 
991 				ib_logf(IB_LOG_LEVEL_ERROR,
992 					"Can't open '%s'", name);
993 
994 				return(DB_ERROR);
995 			}
996 
997 			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
998 
999 				goto skip_size_check;
1000 			}
1001 
1002 size_check:
1003 			size = os_file_get_size(files[i]);
1004 			ut_a(size != (os_offset_t) -1);
1005 
1006 			/* Under some error conditions like disk full
1007 			narios or file size reaching filesystem
1008 			limit the data file could contain an incomplete
1009 			extent at the end. When we extend a data file
1010 			and if some failure happens, then also the data
1011 			file could contain an incomplete extent.  So we
1012 			need to round the size downward to a megabyte.*/
1013 
1014 			rounded_size_pages = (ulint) CALC_NUMBER_OF_PAGES(size);
1015 
1016 			if (i == srv_n_data_files - 1
1017 			    && srv_auto_extend_last_data_file) {
1018 
1019 				if (srv_data_file_sizes[i] > rounded_size_pages
1020 				    || (srv_last_file_size_max > 0
1021 					&& srv_last_file_size_max
1022 					< rounded_size_pages)) {
1023 
1024 					ib_logf(IB_LOG_LEVEL_ERROR,
1025 						"auto-extending "
1026 						"data file %s is "
1027 						"of a different size "
1028 						"%lu pages (rounded "
1029 						"down to MB) than specified "
1030 						"in the .cnf file: "
1031 						"initial %lu pages, "
1032 						"max %lu (relevant if "
1033 						"non-zero) pages!",
1034 						name,
1035 						(ulong) rounded_size_pages,
1036 						(ulong) srv_data_file_sizes[i],
1037 						(ulong)
1038 						srv_last_file_size_max);
1039 
1040 					return(DB_ERROR);
1041 				}
1042 
1043 				srv_data_file_sizes[i] = rounded_size_pages;
1044 			}
1045 
1046 			if (rounded_size_pages != srv_data_file_sizes[i]) {
1047 
1048 				ib_logf(IB_LOG_LEVEL_ERROR,
1049 					"Data file %s is of a different "
1050 					"size %lu pages (rounded down to MB) "
1051 					"than specified in the .cnf file "
1052 					"%lu pages!",
1053 					name,
1054 					(ulong) rounded_size_pages,
1055 					(ulong) srv_data_file_sizes[i]);
1056 
1057 				return(DB_ERROR);
1058 			}
1059 skip_size_check:
1060 
1061 			/* This is the earliest location where we can load
1062 			the double write buffer. */
1063 			if (i == 0) {
1064 				buf_dblwr_init_or_load_pages(
1065 					files[i], srv_data_file_names[i], true);
1066 			}
1067 
1068 			bool retry = true;
1069 check_first_page:
1070 			check_msg = fil_read_first_page(
1071 				files[i], one_opened, &flags, &space,
1072 				min_flushed_lsn, max_flushed_lsn);
1073 
1074 			if (check_msg) {
1075 
1076 				if (retry) {
1077 					fsp_open_info	fsp;
1078 					const ulint	page_no = 0;
1079 
1080 					retry = false;
1081 					fsp.id = 0;
1082 					fsp.filepath = srv_data_file_names[i];
1083 					fsp.file = files[i];
1084 
1085 					if (fil_user_tablespace_restore_page(
1086 						&fsp, page_no)) {
1087 						goto check_first_page;
1088 					}
1089 				}
1090 
1091 				ib_logf(IB_LOG_LEVEL_ERROR,
1092 						"%s in data file %s",
1093 						check_msg, name);
1094 				return(DB_ERROR);
1095 			}
1096 
1097 			/* The first file of the system tablespace must
1098 			have space ID = TRX_SYS_SPACE.  The FSP_SPACE_ID
1099 			field in files greater than ibdata1 are unreliable. */
1100 			ut_a(one_opened || space == TRX_SYS_SPACE);
1101 
1102 			/* Check the flags for the first system tablespace
1103 			file only. */
1104 			if (!one_opened
1105 			    && UNIV_PAGE_SIZE
1106 			       != fsp_flags_get_page_size(flags)) {
1107 
1108 				ib_logf(IB_LOG_LEVEL_ERROR,
1109 					"Data file \"%s\" uses page size %lu,"
1110 					"but the start-up parameter "
1111 					"is --innodb-page-size=%lu",
1112 					name,
1113 					fsp_flags_get_page_size(flags),
1114 					UNIV_PAGE_SIZE);
1115 
1116 				return(DB_ERROR);
1117 			}
1118 
1119 			one_opened = TRUE;
1120 		} else if (!srv_read_only_mode) {
1121 			/* We created the data file and now write it full of
1122 			zeros */
1123 
1124 			one_created = TRUE;
1125 
1126 			if (i > 0) {
1127 				ib_logf(IB_LOG_LEVEL_INFO,
1128 					"Data file %s did not"
1129 					" exist: new to be created",
1130 					name);
1131 			} else {
1132 				ib_logf(IB_LOG_LEVEL_INFO,
1133 					"The first specified "
1134 					"data file %s did not exist: "
1135 					"a new database to be created!",
1136 					name);
1137 
1138 				*create_new_db = TRUE;
1139 			}
1140 
1141 			ib_logf(IB_LOG_LEVEL_INFO,
1142 				"Setting file %s size to %lu MB",
1143 				name,
1144 				(ulong) (srv_data_file_sizes[i]
1145 					 >> (20 - UNIV_PAGE_SIZE_SHIFT)));
1146 
1147 			ib_logf(IB_LOG_LEVEL_INFO,
1148 				"Database physically writes the"
1149 				" file full: wait...");
1150 
1151 			ret = os_file_set_size(
1152 				name, files[i],
1153 				(os_offset_t) srv_data_file_sizes[i]
1154 				<< UNIV_PAGE_SIZE_SHIFT);
1155 
1156 			if (!ret) {
1157 				ib_logf(IB_LOG_LEVEL_ERROR,
1158 					"Error in creating %s: "
1159 					"probably out of disk space",
1160 					name);
1161 
1162 				return(DB_ERROR);
1163 			}
1164 
1165 			*sum_of_new_sizes += srv_data_file_sizes[i];
1166 		}
1167 
1168 		ret = os_file_close(files[i]);
1169 		ut_a(ret);
1170 
1171 		if (i == 0) {
1172 			flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
1173 			fil_space_create(name, 0, flags, FIL_TABLESPACE);
1174 		}
1175 
1176 		ut_a(fil_validate());
1177 
1178 		if (!fil_node_create(name, srv_data_file_sizes[i], 0,
1179 				     srv_data_file_is_raw_partition[i] != 0)) {
1180 			return(DB_ERROR);
1181 		}
1182 	}
1183 
1184 	return(DB_SUCCESS);
1185 }
1186 
1187 /*********************************************************************//**
1188 Create undo tablespace.
1189 @return	DB_SUCCESS or error code */
1190 static
1191 dberr_t
srv_undo_tablespace_create(const char * name,ulint size)1192 srv_undo_tablespace_create(
1193 /*=======================*/
1194 	const char*	name,		/*!< in: tablespace name */
1195 	ulint		size)		/*!< in: tablespace size in pages */
1196 {
1197 	pfs_os_file_t	fh;
1198 	ibool		ret;
1199 	dberr_t		err = DB_SUCCESS;
1200 
1201 	os_file_create_subdirs_if_needed(name);
1202 
1203 	fh = os_file_create(
1204 		innodb_file_data_key,
1205 		name,
1206 		srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
1207 		OS_FILE_NORMAL, OS_DATA_FILE, &ret);
1208 
1209 	if (srv_read_only_mode && ret) {
1210 		ib_logf(IB_LOG_LEVEL_INFO,
1211 			"%s opened in read-only mode", name);
1212 	} else if (ret == FALSE) {
1213 		if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
1214 #ifdef UNIV_AIX
1215 			/* AIX 5.1 after security patch ML7 may have
1216 			errno set to 0 here, which causes our function
1217 			to return 100; work around that AIX problem */
1218 		    && os_file_get_last_error(false) != 100
1219 #endif /* UNIV_AIX */
1220 		) {
1221 			ib_logf(IB_LOG_LEVEL_ERROR,
1222 				"Can't create UNDO tablespace %s", name);
1223 		} else {
1224 			ib_logf(IB_LOG_LEVEL_ERROR,
1225 				"Creating system tablespace with"
1226 				" existing undo tablespaces is not"
1227 				" supported. Please delete all undo"
1228 				" tablespaces before creating new"
1229 				" system tablespace.");
1230 		}
1231 		err = DB_ERROR;
1232 	} else {
1233 		ut_a(!srv_read_only_mode);
1234 
1235 		/* We created the data file and now write it full of zeros */
1236 
1237 		ib_logf(IB_LOG_LEVEL_INFO,
1238 			"Data file %s did not exist: new to be created",
1239 			name);
1240 
1241 		ib_logf(IB_LOG_LEVEL_INFO,
1242 			"Setting file %s size to %lu MB",
1243 			name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
1244 
1245 		ib_logf(IB_LOG_LEVEL_INFO,
1246 			"Database physically writes the file full: wait...");
1247 
1248 		ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT);
1249 
1250 		if (!ret) {
1251 			ib_logf(IB_LOG_LEVEL_INFO,
1252 				"Error in creating %s: probably out of "
1253 				"disk space", name);
1254 
1255 			err = DB_ERROR;
1256 		}
1257 
1258 		os_file_close(fh);
1259 	}
1260 
1261 	return(err);
1262 }
1263 
1264 /*********************************************************************//**
1265 Open an undo tablespace.
1266 @return	DB_SUCCESS or error code */
1267 static
1268 dberr_t
srv_undo_tablespace_open(const char * name,ulint space)1269 srv_undo_tablespace_open(
1270 /*=====================*/
1271 	const char*	name,		/*!< in: tablespace name */
1272 	ulint		space)		/*!< in: tablespace id */
1273 {
1274 	pfs_os_file_t	fh;
1275 	dberr_t		err	= DB_ERROR;
1276 	ibool		ret;
1277 	ulint		flags;
1278 
1279 	if (!srv_file_check_mode(name)) {
1280 		ib_logf(IB_LOG_LEVEL_ERROR,
1281 			"UNDO tablespaces must be %s!",
1282 			srv_read_only_mode ? "writable" : "readable");
1283 
1284 		return(DB_ERROR);
1285 	}
1286 
1287 	fh = os_file_create(
1288 		innodb_file_data_key, name,
1289 		OS_FILE_OPEN_RETRY
1290 		| OS_FILE_ON_ERROR_NO_EXIT
1291 		| OS_FILE_ON_ERROR_SILENT,
1292 		OS_FILE_NORMAL,
1293 		OS_DATA_FILE,
1294 		&ret);
1295 
1296 	/* If the file open was successful then load the tablespace. */
1297 
1298 	if (ret) {
1299 		os_offset_t	size;
1300 
1301 		size = os_file_get_size(fh);
1302 		ut_a(size != (os_offset_t) -1);
1303 
1304 		ret = os_file_close(fh);
1305 		ut_a(ret);
1306 
1307 		/* Load the tablespace into InnoDB's internal
1308 		data structures. */
1309 
1310 		/* We set the biggest space id to the undo tablespace
1311 		because InnoDB hasn't opened any other tablespace apart
1312 		from the system tablespace. */
1313 
1314 		fil_set_max_space_id_if_bigger(space);
1315 
1316 		/* Set the compressed page size to 0 (non-compressed) */
1317 		flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
1318 		fil_space_create(name, space, flags, FIL_TABLESPACE);
1319 
1320 		ut_a(fil_validate());
1321 
1322 		os_offset_t	n_pages = size / UNIV_PAGE_SIZE;
1323 
1324 		/* On 64 bit Windows ulint can be 32 bit and os_offset_t
1325 		is 64 bit. It is OK to cast the n_pages to ulint because
1326 		the unit has been scaled to pages and they are always
1327 		32 bit. */
1328 		if (fil_node_create(name, (ulint) n_pages, space, FALSE)) {
1329 			err = DB_SUCCESS;
1330 		}
1331 	}
1332 
1333 	return(err);
1334 }
1335 
1336 /********************************************************************
1337 Opens the configured number of undo tablespaces.
1338 @return	DB_SUCCESS or error code */
1339 static
1340 dberr_t
srv_undo_tablespaces_init(ibool create_new_db,const ulint n_conf_tablespaces,ulint * n_opened)1341 srv_undo_tablespaces_init(
1342 /*======================*/
1343 	ibool		create_new_db,		/*!< in: TRUE if new db being
1344 						created */
1345 	const ulint	n_conf_tablespaces,	/*!< in: configured undo
1346 						tablespaces */
1347 	ulint*		n_opened)		/*!< out: number of UNDO
1348 						tablespaces successfully
1349 						discovered and opened */
1350 {
1351 	ulint		i;
1352 	dberr_t		err = DB_SUCCESS;
1353 	ulint		prev_space_id = 0;
1354 	ulint		n_undo_tablespaces;
1355 	ulint		undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
1356 
1357 	*n_opened = 0;
1358 
1359 	ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS);
1360 
1361 	memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
1362 
1363 	/* Create the undo spaces only if we are creating a new
1364 	instance. We don't allow creating of new undo tablespaces
1365 	in an existing instance (yet).  This restriction exists because
1366 	we check in several places for SYSTEM tablespaces to be less than
1367 	the min of user defined tablespace ids. Once we implement saving
1368 	the location of the undo tablespaces and their space ids this
1369 	restriction will/should be lifted. */
1370 
1371 	for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) {
1372 		char	name[OS_FILE_MAX_PATH];
1373 		ulint	space_id  = i + 1;
1374 
1375 		DBUG_EXECUTE_IF("innodb_undo_upgrade",
1376 				space_id = i + 3;);
1377 
1378 		ut_snprintf(
1379 			name, sizeof(name),
1380 			"%s%cundo%03lu",
1381 			srv_undo_dir, SRV_PATH_SEPARATOR, space_id);
1382 
1383 		if (i == 0) {
1384 			srv_undo_space_id_start = space_id;
1385 			prev_space_id = srv_undo_space_id_start - 1;
1386 		}
1387 
1388 		undo_tablespace_ids[i] = space_id;
1389 
1390 		err = srv_undo_tablespace_create(
1391 			name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
1392 
1393 		if (err != DB_SUCCESS) {
1394 
1395 			ib_logf(IB_LOG_LEVEL_ERROR,
1396 				"Could not create undo tablespace '%s'.",
1397 				name);
1398 
1399 			return(err);
1400 		}
1401 	}
1402 
1403 	/* Get the tablespace ids of all the undo segments excluding
1404 	the system tablespace (0). If we are creating a new instance then
1405 	we build the undo_tablespace_ids ourselves since they don't
1406 	already exist. */
1407 
1408 	if (!create_new_db) {
1409 		n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces(
1410 			undo_tablespace_ids);
1411 
1412 		if (n_undo_tablespaces != 0) {
1413 			srv_undo_space_id_start = undo_tablespace_ids[0];
1414 			prev_space_id = srv_undo_space_id_start - 1;
1415 		}
1416 
1417 	} else {
1418 		n_undo_tablespaces = n_conf_tablespaces;
1419 
1420 		undo_tablespace_ids[n_conf_tablespaces] = ULINT_UNDEFINED;
1421 	}
1422 
1423 	/* Open all the undo tablespaces that are currently in use. If we
1424 	fail to open any of these it is a fatal error. The tablespace ids
1425 	should be contiguous. It is a fatal error because they are required
1426 	for recovery and are referenced by the UNDO logs (a.k.a RBS). */
1427 
1428 	for (i = 0; i < n_undo_tablespaces; ++i) {
1429 		char	name[OS_FILE_MAX_PATH];
1430 
1431 		ut_snprintf(
1432 			name, sizeof(name),
1433 			"%s%cundo%03lu",
1434 			srv_undo_dir, SRV_PATH_SEPARATOR,
1435 			undo_tablespace_ids[i]);
1436 
1437 		/* Should be no gaps in undo tablespace ids. */
1438 		ut_a(prev_space_id + 1 == undo_tablespace_ids[i]);
1439 
1440 		/* The system space id should not be in this array. */
1441 		ut_a(undo_tablespace_ids[i] != 0);
1442 		ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED);
1443 
1444 		err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
1445 
1446 		if (err != DB_SUCCESS) {
1447 
1448 			ib_logf(IB_LOG_LEVEL_ERROR,
1449 				"Unable to open undo tablespace '%s'.", name);
1450 
1451 			return(err);
1452 		}
1453 
1454 		prev_space_id = undo_tablespace_ids[i];
1455 
1456 		++*n_opened;
1457 	}
1458 
1459 	/* Open any extra unused undo tablespaces. These must be contiguous.
1460 	We stop at the first failure. These are undo tablespaces that are
1461 	not in use and therefore not required by recovery. We only check
1462 	that there are no gaps. */
1463 
1464 	for (i = prev_space_id + 1; i < TRX_SYS_N_RSEGS; ++i) {
1465 		char	name[OS_FILE_MAX_PATH];
1466 
1467 		ut_snprintf(
1468 			name, sizeof(name),
1469 			"%s%cundo%03lu", srv_undo_dir, SRV_PATH_SEPARATOR, i);
1470 
1471 		/* Undo space ids start from 1. */
1472 		err = srv_undo_tablespace_open(name, i);
1473 
1474 		if (err != DB_SUCCESS) {
1475 			break;
1476 		}
1477 
1478 		/** Note the first undo tablespace id in case of
1479 		no active undo tablespace. */
1480 		if (n_undo_tablespaces == 0) {
1481 			srv_undo_space_id_start = i;
1482 		}
1483 
1484 		++n_undo_tablespaces;
1485 
1486 		++*n_opened;
1487 	}
1488 
1489 	/** Explictly specify the srv_undo_space_id_start
1490 	as zero when there are no undo tablespaces. */
1491 	if (n_undo_tablespaces == 0) {
1492 		srv_undo_space_id_start = 0;
1493 	}
1494 
1495 	/* If the user says that there are fewer than what we find we
1496 	tolerate that discrepancy but not the inverse. Because there could
1497 	be unused undo tablespaces for future use. */
1498 
1499 	if (n_conf_tablespaces > n_undo_tablespaces) {
1500 		ut_print_timestamp(stderr);
1501 		fprintf(stderr,
1502 			" InnoDB: Expected to open %lu undo "
1503 			"tablespaces but was able\n",
1504 			n_conf_tablespaces);
1505 		ut_print_timestamp(stderr);
1506 		fprintf(stderr,
1507 			" InnoDB: to find only %lu undo "
1508 			"tablespaces.\n", n_undo_tablespaces);
1509 		ut_print_timestamp(stderr);
1510 		fprintf(stderr,
1511 			" InnoDB: Set the "
1512 			"innodb_undo_tablespaces parameter to "
1513 			"the\n");
1514 		ut_print_timestamp(stderr);
1515 		fprintf(stderr,
1516 			" InnoDB: correct value and retry. Suggested "
1517 			"value is %lu\n", n_undo_tablespaces);
1518 
1519 		return(err != DB_SUCCESS ? err : DB_ERROR);
1520 
1521 	} else  if (n_undo_tablespaces > 0) {
1522 
1523 		ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces",
1524 			n_undo_tablespaces);
1525 
1526 		if (n_conf_tablespaces == 0) {
1527 			ib_logf(IB_LOG_LEVEL_WARN,
1528 				"Using the system tablespace for all UNDO "
1529 				"logging because innodb_undo_tablespaces=0");
1530 		}
1531 	}
1532 
1533 	if (create_new_db) {
1534 		mtr_t	mtr;
1535 
1536 		mtr_start(&mtr);
1537 
1538 		/* The undo log tablespace */
1539 		for (i = 0; i < n_undo_tablespaces; ++i) {
1540 
1541 			fsp_header_init(
1542 				undo_tablespace_ids[i],
1543 				SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
1544 		}
1545 
1546 		mtr_commit(&mtr);
1547 	}
1548 
1549 	return(DB_SUCCESS);
1550 }
1551 
1552 /********************************************************************
1553 Wait for the purge thread(s) to start up. */
1554 static
1555 void
srv_start_wait_for_purge_to_start()1556 srv_start_wait_for_purge_to_start()
1557 /*===============================*/
1558 {
1559 	/* Wait for the purge coordinator and master thread to startup. */
1560 
1561 	purge_state_t	state = trx_purge_state();
1562 
1563 	ut_a(state != PURGE_STATE_DISABLED);
1564 
1565 	while (srv_shutdown_state == SRV_SHUTDOWN_NONE
1566 	       && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
1567 	       && state == PURGE_STATE_INIT) {
1568 
1569 		switch (state = trx_purge_state()) {
1570 		case PURGE_STATE_RUN:
1571 		case PURGE_STATE_STOP:
1572 			break;
1573 
1574 		case PURGE_STATE_INIT:
1575 			ib_logf(IB_LOG_LEVEL_INFO,
1576 				"Waiting for purge to start");
1577 
1578 			os_thread_sleep(50000);
1579 			break;
1580 
1581 		case PURGE_STATE_EXIT:
1582 		case PURGE_STATE_DISABLED:
1583 			ut_error;
1584 		}
1585 	}
1586 }
1587 
1588 /*********************************************************************//**
1589 Initializes the log tracking subsystem and starts its thread.  */
1590 static
1591 void
init_log_online(void)1592 init_log_online(void)
1593 /*=================*/
1594 {
1595 	if (UNIV_UNLIKELY(srv_force_recovery > 0 || srv_read_only_mode)) {
1596 		srv_track_changed_pages = FALSE;
1597 		return;
1598 	}
1599 
1600 	if (srv_track_changed_pages) {
1601 
1602 		log_online_read_init();
1603 
1604 		/* Create the thread that follows the redo log to output the
1605 		   changed page bitmap */
1606 		os_thread_create(&srv_redo_log_follow_thread, NULL,
1607 				 thread_ids + 5 + SRV_MAX_N_IO_THREADS);
1608 	}
1609 }
1610 
1611 /********************************************************************
1612 Starts InnoDB and creates a new database if database files
1613 are not found and the user wants.
1614 @return	DB_SUCCESS or error code */
1615 UNIV_INTERN
1616 dberr_t
innobase_start_or_create_for_mysql(void)1617 innobase_start_or_create_for_mysql(void)
1618 /*====================================*/
1619 {
1620 	ibool		create_new_db;
1621 	lsn_t		min_flushed_lsn;
1622 	lsn_t		max_flushed_lsn;
1623 #ifdef UNIV_LOG_ARCHIVE
1624 	lsn_t		min_arch_log_no	= LSN_MAX;
1625 	lsn_t		max_arch_log_no	= LSN_MAX;
1626 #endif /* UNIV_LOG_ARCHIVE */
1627 	ulint		sum_of_new_sizes;
1628 	ulint		sum_of_data_file_sizes;
1629 	ulint		tablespace_size_in_header;
1630 	dberr_t		err;
1631 	unsigned	i;
1632 	ulint		srv_n_log_files_found = srv_n_log_files;
1633 	ulint		io_limit;
1634 	mtr_t		mtr;
1635 	ib_bh_t*	ib_bh;
1636 	ulint		n_recovered_trx;
1637 	char		logfilename[10000];
1638 	char*		logfile0	= NULL;
1639 	size_t		dirnamelen;
1640 
1641 	if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
1642 		srv_read_only_mode = 1;
1643 	}
1644 
1645 	high_level_read_only = srv_read_only_mode
1646 		|| srv_force_recovery > SRV_FORCE_NO_TRX_UNDO;
1647 
1648 	if (srv_read_only_mode) {
1649 		ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode");
1650 	}
1651 
1652 #ifdef HAVE_DARWIN_THREADS
1653 # ifdef F_FULLFSYNC
1654 	/* This executable has been compiled on Mac OS X 10.3 or later.
1655 	Assume that F_FULLFSYNC is available at run-time. */
1656 	srv_have_fullfsync = TRUE;
1657 # else /* F_FULLFSYNC */
1658 	/* This executable has been compiled on Mac OS X 10.2
1659 	or earlier.  Determine if the executable is running
1660 	on Mac OS X 10.3 or later. */
1661 	struct utsname utsname;
1662 	if (uname(&utsname)) {
1663 		ut_print_timestamp(stderr);
1664 		fputs(" InnoDB: cannot determine Mac OS X version!\n", stderr);
1665 	} else {
1666 		srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
1667 	}
1668 	if (!srv_have_fullfsync) {
1669 		ut_print_timestamp(stderr);
1670 		fputs(" InnoDB: On Mac OS X, fsync() may be "
1671 		      "broken on internal drives,\n", stderr);
1672 		ut_print_timestamp(stderr);
1673 		fputs(" InnoDB: making transactions unsafe!\n", stderr);
1674 	}
1675 # endif /* F_FULLFSYNC */
1676 #endif /* HAVE_DARWIN_THREADS */
1677 
1678 	ib_logf(IB_LOG_LEVEL_INFO,
1679 		"Using %s to ref count buffer pool pages",
1680 #ifdef PAGE_ATOMIC_REF_COUNT
1681 		"atomics"
1682 #else
1683 		"mutexes"
1684 #endif /* PAGE_ATOMIC_REF_COUNT */
1685 	);
1686 
1687 
1688 	if (sizeof(ulint) != sizeof(void*)) {
1689 		ut_print_timestamp(stderr);
1690 		fprintf(stderr,
1691 			" InnoDB: Error: size of InnoDB's ulint is %lu, "
1692 			"but size of void*\n", (ulong) sizeof(ulint));
1693 		ut_print_timestamp(stderr);
1694 		fprintf(stderr,
1695 			" InnoDB: is %lu. The sizes should be the same "
1696 			"so that on a 64-bit\n",
1697 			(ulong) sizeof(void*));
1698 		ut_print_timestamp(stderr);
1699 		fprintf(stderr,
1700 			" InnoDB: platforms you can allocate more than 4 GB "
1701 			"of memory.\n");
1702 	}
1703 
1704 #ifdef UNIV_DEBUG
1705 	ut_print_timestamp(stderr);
1706 	fprintf(stderr,
1707 		" InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
1708 #endif
1709 
1710 #ifdef UNIV_IBUF_DEBUG
1711 	ut_print_timestamp(stderr);
1712 	fprintf(stderr,
1713 		" InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n");
1714 # ifdef UNIV_IBUF_COUNT_DEBUG
1715 	ut_print_timestamp(stderr);
1716 	fprintf(stderr,
1717 		" InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on "
1718 		"!!!!!!!!!\n");
1719 	ut_print_timestamp(stderr);
1720 	fprintf(stderr,
1721 		" InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG\n");
1722 # endif
1723 #endif
1724 
1725 #ifdef UNIV_BLOB_DEBUG
1726 	fprintf(stderr,
1727 		"InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n"
1728 		"InnoDB: Server restart may fail with UNIV_BLOB_DEBUG\n");
1729 #endif /* UNIV_BLOB_DEBUG */
1730 
1731 #ifdef UNIV_SYNC_DEBUG
1732 	ut_print_timestamp(stderr);
1733 	fprintf(stderr,
1734 		" InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n");
1735 #endif
1736 
1737 #ifdef UNIV_SEARCH_DEBUG
1738 	ut_print_timestamp(stderr);
1739 	fprintf(stderr,
1740 		" InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n");
1741 #endif
1742 
1743 #ifdef UNIV_LOG_LSN_DEBUG
1744 	ut_print_timestamp(stderr);
1745 	fprintf(stderr,
1746 		" InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!\n");
1747 #endif /* UNIV_LOG_LSN_DEBUG */
1748 #ifdef UNIV_MEM_DEBUG
1749 	ut_print_timestamp(stderr);
1750 	fprintf(stderr,
1751 		" InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
1752 #endif
1753 
1754 	if (srv_use_sys_malloc) {
1755 		ib_logf(IB_LOG_LEVEL_INFO,
1756 			"The InnoDB memory heap is disabled");
1757 	}
1758 
1759 #if defined(COMPILER_HINTS_ENABLED)
1760 	ib_logf(IB_LOG_LEVEL_INFO,
1761 		" InnoDB: Compiler hints enabled.");
1762 #endif /* defined(COMPILER_HINTS_ENABLED) */
1763 
1764 	ib_logf(IB_LOG_LEVEL_INFO,
1765 		"" IB_ATOMICS_STARTUP_MSG "");
1766 
1767 	ib_logf(IB_LOG_LEVEL_INFO,
1768 		"" IB_MEMORY_BARRIER_STARTUP_MSG "");
1769 
1770 #ifndef HAVE_MEMORY_BARRIER
1771 #if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__
1772 #else
1773 	ib_logf(IB_LOG_LEVEL_WARN,
1774 		"MySQL was built without a memory barrier capability on this"
1775 		" architecture, which might allow a mutex/rw_lock violation"
1776 		" under high thread concurrency. This may cause a hang.");
1777 #endif /* IA32 or AMD64 */
1778 #endif /* HAVE_MEMORY_BARRIER */
1779 
1780 	ib_logf(IB_LOG_LEVEL_INFO,
1781 		"Compressed tables use zlib " ZLIB_VERSION
1782 #ifdef UNIV_ZIP_DEBUG
1783 	      " with validation"
1784 #endif /* UNIV_ZIP_DEBUG */
1785 	      );
1786 #ifdef UNIV_ZIP_COPY
1787 	ib_logf(IB_LOG_LEVEL_INFO, "and extra copying");
1788 #endif /* UNIV_ZIP_COPY */
1789 
1790 
1791 	/* Since InnoDB does not currently clean up all its internal data
1792 	structures in MySQL Embedded Server Library server_end(), we
1793 	print an error message if someone tries to start up InnoDB a
1794 	second time during the process lifetime. */
1795 
1796 	if (srv_start_has_been_called) {
1797 		ut_print_timestamp(stderr);
1798 		fprintf(stderr, " InnoDB: Error: startup called second time "
1799 			"during the process\n");
1800 		ut_print_timestamp(stderr);
1801 		fprintf(stderr, " InnoDB: lifetime. In the MySQL Embedded "
1802 			"Server Library you\n");
1803 		ut_print_timestamp(stderr);
1804 		fprintf(stderr, " InnoDB: cannot call server_init() more "
1805 			"than once during the\n");
1806 		ut_print_timestamp(stderr);
1807 		fprintf(stderr, " InnoDB: process lifetime.\n");
1808 	}
1809 
1810 	srv_start_has_been_called = TRUE;
1811 
1812 #ifdef UNIV_DEBUG
1813 	log_do_write = TRUE;
1814 #endif /* UNIV_DEBUG */
1815 	/*	yydebug = TRUE; */
1816 
1817 	srv_is_being_started = TRUE;
1818 	srv_startup_is_before_trx_rollback_phase = TRUE;
1819 
1820 #ifdef __WIN__
1821 	switch (os_get_os_version()) {
1822 	case OS_WIN95:
1823 	case OS_WIN31:
1824 	case OS_WINNT:
1825 		/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
1826 		and NT use simulated aio. In NT Windows provides async i/o,
1827 		but when run in conjunction with InnoDB Hot Backup, it seemed
1828 		to corrupt the data files. */
1829 
1830 		srv_use_native_aio = FALSE;
1831 		break;
1832 
1833 	case OS_WIN2000:
1834 	case OS_WINXP:
1835 		/* On 2000 and XP, async IO is available. */
1836 		srv_use_native_aio = TRUE;
1837 		break;
1838 
1839 	default:
1840 		/* Vista and later have both async IO and condition variables */
1841 		srv_use_native_aio = TRUE;
1842 		srv_use_native_conditions = TRUE;
1843 		break;
1844 	}
1845 
1846 #elif defined(LINUX_NATIVE_AIO)
1847 
1848 	if (srv_use_native_aio) {
1849 		ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO");
1850 	}
1851 #else
1852 	/* Currently native AIO is supported only on windows and linux
1853 	and that also when the support is compiled in. In all other
1854 	cases, we ignore the setting of innodb_use_native_aio. */
1855 	srv_use_native_aio = FALSE;
1856 #endif /* __WIN__ */
1857 
1858 	if (srv_file_flush_method_str == NULL) {
1859 		/* These are the default options */
1860 
1861 		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1862 
1863 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1864 #ifndef __WIN__
1865 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
1866 		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1867 
1868 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
1869 		srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
1870 
1871 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1872 		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1873 
1874 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
1875 		srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
1876 
1877 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
1878 		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;
1879 
1880 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1881 		srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1882 
1883 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
1884 		srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
1885 #else
1886 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
1887 		srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
1888 		srv_use_native_aio = FALSE;
1889 
1890 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
1891 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1892 		srv_use_native_aio = FALSE;
1893 
1894 	} else if (0 == ut_strcmp(srv_file_flush_method_str,
1895 				  "async_unbuffered")) {
1896 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1897 #endif /* __WIN__ */
1898 	} else {
1899 		ib_logf(IB_LOG_LEVEL_ERROR,
1900 			"Unrecognized value %s for innodb_flush_method",
1901 			srv_file_flush_method_str);
1902 		return(DB_ERROR);
1903 	}
1904 
1905 	/* Note that the call srv_boot() also changes the values of
1906 	some variables to the units used by InnoDB internally */
1907 
1908 	/* Set the maximum number of threads which can wait for a semaphore
1909 	inside InnoDB: this is the 'sync wait array' size, as well as the
1910 	maximum number of threads that can wait in the 'srv_conc array' for
1911 	their time to enter InnoDB. */
1912 
1913 #define BUF_POOL_SIZE_THRESHOLD (1024 * 1024 * 1024)
1914 	srv_max_n_threads = 1   /* io_ibuf_thread */
1915 			    + 1 /* io_log_thread */
1916 			    + 1 /* lock_wait_timeout_thread */
1917 			    + 1 /* srv_error_monitor_thread */
1918 			    + 1 /* srv_monitor_thread */
1919 			    + 1 /* srv_master_thread */
1920 			    + 1 /* srv_redo_log_follow_thread */
1921 			    + 1 /* srv_purge_coordinator_thread */
1922 			    + 1 /* buf_dump_thread */
1923 			    + 1 /* dict_stats_thread */
1924 			    + 1 /* fts_optimize_thread */
1925 			    + 1 /* recv_writer_thread */
1926 			    + 1 /* buf_flush_page_cleaner_thread */
1927 			    + 1 /* trx_rollback_or_clean_all_recovered */
1928 			    + 128 /* added as margin, for use of
1929 				  InnoDB Memcached etc. */
1930 			    + max_connections
1931 			    + srv_n_read_io_threads
1932 			    + srv_n_write_io_threads
1933 			    + srv_n_purge_threads
1934 			    /* FTS Parallel Sort */
1935 			    + fts_sort_pll_degree * FTS_NUM_AUX_INDEX
1936 			      * max_connections;
1937 
1938 	if (srv_buf_pool_size < BUF_POOL_SIZE_THRESHOLD) {
1939 		/* If buffer pool is less than 1 GB,
1940 		use only one buffer pool instance */
1941 		srv_buf_pool_instances = 1;
1942 	}
1943 
1944 	srv_boot();
1945 
1946 	ib_logf(IB_LOG_LEVEL_INFO,
1947 		"%s CPU crc32 instructions",
1948 		ut_crc32_sse2_enabled ? "Using" : "Not using");
1949 
1950 	if (!srv_read_only_mode) {
1951 
1952 		mutex_create(srv_monitor_file_mutex_key,
1953 			     &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
1954 
1955 		if (srv_innodb_status) {
1956 
1957 			srv_monitor_file_name = static_cast<char*>(
1958 				mem_alloc(
1959 					strlen(fil_path_to_mysql_datadir)
1960 					+ 20 + sizeof "/innodb_status."));
1961 
1962 			sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
1963 				fil_path_to_mysql_datadir,
1964 				os_proc_get_number());
1965 
1966 			srv_monitor_file = fopen(srv_monitor_file_name, "w+");
1967 
1968 			if (!srv_monitor_file) {
1969 
1970 				ib_logf(IB_LOG_LEVEL_ERROR,
1971 					"Unable to create %s: %s",
1972 					srv_monitor_file_name,
1973 					strerror(errno));
1974 
1975 				return(DB_ERROR);
1976 			}
1977 		} else {
1978 			srv_monitor_file_name = NULL;
1979 			srv_monitor_file = os_file_create_tmpfile(NULL);
1980 
1981 			if (!srv_monitor_file) {
1982 				return(DB_ERROR);
1983 			}
1984 		}
1985 
1986 		mutex_create(srv_dict_tmpfile_mutex_key,
1987 			     &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
1988 
1989 		srv_dict_tmpfile = os_file_create_tmpfile(NULL);
1990 
1991 		if (!srv_dict_tmpfile) {
1992 			return(DB_ERROR);
1993 		}
1994 
1995 		mutex_create(srv_misc_tmpfile_mutex_key,
1996 			     &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
1997 
1998 		srv_misc_tmpfile = os_file_create_tmpfile(NULL);
1999 
2000 		if (!srv_misc_tmpfile) {
2001 			return(DB_ERROR);
2002 		}
2003 	}
2004 
2005 	/* If user has set the value of innodb_file_io_threads then
2006 	we'll emit a message telling the user that this parameter
2007 	is now deprecated. */
2008 	if (srv_n_file_io_threads != 4) {
2009 		ib_logf(IB_LOG_LEVEL_WARN,
2010 			"innodb_file_io_threads is deprecated. Please use "
2011 			"innodb_read_io_threads and innodb_write_io_threads "
2012 			"instead");
2013 	}
2014 
2015 	/* Now overwrite the value on srv_n_file_io_threads */
2016 	srv_n_file_io_threads = srv_n_read_io_threads;
2017 
2018 	if (!srv_read_only_mode) {
2019 		/* Add the log and ibuf IO threads. */
2020 		srv_n_file_io_threads += 2;
2021 		srv_n_file_io_threads += srv_n_write_io_threads;
2022 	} else {
2023 		ib_logf(IB_LOG_LEVEL_INFO,
2024 			"Disabling background IO write threads.");
2025 
2026 		srv_n_write_io_threads = 0;
2027 	}
2028 
2029 	ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
2030 
2031 	io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
2032 
2033 	/* On Windows when using native aio the number of aio requests
2034 	that a thread can handle at a given time is limited to 32
2035 	i.e.: SRV_N_PENDING_IOS_PER_THREAD */
2036 # ifdef __WIN__
2037 	if (srv_use_native_aio) {
2038 		io_limit = SRV_N_PENDING_IOS_PER_THREAD;
2039 	}
2040 # endif /* __WIN__ */
2041 
2042 	if (!os_aio_init(io_limit,
2043 			 srv_n_read_io_threads,
2044 			 srv_n_write_io_threads,
2045 			 SRV_MAX_N_PENDING_SYNC_IOS)) {
2046 
2047 		ib_logf(IB_LOG_LEVEL_ERROR,
2048 			"Fatal : Cannot initialize AIO sub-system");
2049 
2050 		return(DB_ERROR);
2051 	}
2052 
2053 	fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
2054 
2055 	double	size;
2056 	char	unit;
2057 
2058 	if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
2059 		size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024);
2060 		unit = 'G';
2061 	} else {
2062 		size = ((double) srv_buf_pool_size) / (1024 * 1024);
2063 		unit = 'M';
2064 	}
2065 
2066 	/* Print time to initialize the buffer pool */
2067 	ib_logf(IB_LOG_LEVEL_INFO,
2068 		"Initializing buffer pool, size = %.1f%c", size, unit);
2069 
2070 	err = buf_pool_init(srv_buf_pool_size, static_cast<bool>(srv_numa_interleave),
2071 			    srv_buf_pool_instances);
2072 
2073 	if (err != DB_SUCCESS) {
2074 		ib_logf(IB_LOG_LEVEL_ERROR,
2075 			"Cannot allocate memory for the buffer pool");
2076 
2077 		return(DB_ERROR);
2078 	}
2079 
2080 	ib_logf(IB_LOG_LEVEL_INFO,
2081 		"Completed initialization of buffer pool");
2082 
2083 #ifdef UNIV_DEBUG
2084 	/* We have observed deadlocks with a 5MB buffer pool but
2085 	the actual lower limit could very well be a little higher. */
2086 
2087 	if (srv_buf_pool_size <= 5 * 1024 * 1024) {
2088 
2089 		ib_logf(IB_LOG_LEVEL_INFO,
2090 			"Small buffer pool size (%luM), the flst_validate() "
2091 			"debug function can cause a deadlock if the "
2092 			"buffer pool fills up.",
2093 			srv_buf_pool_size / 1024 / 1024);
2094 	}
2095 #endif /* UNIV_DEBUG */
2096 
2097 	fsp_init();
2098 	log_init();
2099 	log_online_init();
2100 
2101 	lock_sys_create(srv_lock_table_size);
2102 
2103 	/* Create i/o-handler threads: */
2104 
2105 	for (i = 0; i < srv_n_file_io_threads; ++i) {
2106 
2107 		n[i] = i;
2108 
2109 		os_thread_create(io_handler_thread, n + i, thread_ids + i);
2110 	}
2111 
2112 	if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE
2113 	    >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
2114 		/* log_block_convert_lsn_to_no() limits the returned block
2115 		number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
2116 		bytes, then we have a limit of 512 GB. If that limit is to
2117 		be raised, then log_block_convert_lsn_to_no() must be
2118 		modified. */
2119 		ib_logf(IB_LOG_LEVEL_ERROR,
2120 			"Combined size of log files must be < 512 GB");
2121 
2122 		return(DB_ERROR);
2123 	}
2124 
2125 	if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) {
2126 		/* fil_io() takes ulint as an argument and we are passing
2127 		(next_offset / UNIV_PAGE_SIZE) to it in log_group_write_buf().
2128 		So (next_offset / UNIV_PAGE_SIZE) must be less than ULINT_MAX.
2129 		So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
2130 		means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
2131 		is 64 TB on 32 bit systems. */
2132 		fprintf(stderr,
2133 			" InnoDB: Error: combined size of log files"
2134 			" must be < %lu GB\n",
2135 			ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE);
2136 
2137 		return(DB_ERROR);
2138 	}
2139 
2140 	sum_of_new_sizes = 0;
2141 
2142 	for (i = 0; i < srv_n_data_files; i++) {
2143 #ifndef __WIN__
2144 		if (sizeof(off_t) < 5
2145 		    && srv_data_file_sizes[i]
2146 		    >= (ulint) (1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
2147 			ut_print_timestamp(stderr);
2148 			fprintf(stderr,
2149 				" InnoDB: Error: file size must be < 4 GB"
2150 				" with this MySQL binary\n");
2151 			ut_print_timestamp(stderr);
2152 			fprintf(stderr,
2153 				" InnoDB: and operating system combination,"
2154 				" in some OS's < 2 GB\n");
2155 
2156 			return(DB_ERROR);
2157 		}
2158 #endif
2159 		sum_of_new_sizes += srv_data_file_sizes[i];
2160 	}
2161 
2162 	if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
2163 		ib_logf(IB_LOG_LEVEL_ERROR,
2164 			"Tablespace size must be at least 10 MB");
2165 
2166 		return(DB_ERROR);
2167 	}
2168 
2169 	recv_sys_create();
2170 	recv_sys_init(buf_pool_get_curr_size());
2171 
2172 	err = open_or_create_data_files(&create_new_db,
2173 #ifdef UNIV_LOG_ARCHIVE
2174 					&min_arch_log_no, &max_arch_log_no,
2175 #endif /* UNIV_LOG_ARCHIVE */
2176 					&min_flushed_lsn, &max_flushed_lsn,
2177 					&sum_of_new_sizes);
2178 	if (err == DB_FAIL) {
2179 
2180 		ib_logf(IB_LOG_LEVEL_ERROR,
2181 			"The system tablespace must be writable!");
2182 
2183 		return(DB_ERROR);
2184 
2185 	} else if (err != DB_SUCCESS) {
2186 
2187 		ib_logf(IB_LOG_LEVEL_ERROR,
2188 			"Could not open or create the system tablespace. If "
2189 			"you tried to add new data files to the system "
2190 			"tablespace, and it failed here, you should now "
2191 			"edit innodb_data_file_path in my.cnf back to what "
2192 			"it was, and remove the new ibdata files InnoDB "
2193 			"created in this failed attempt. InnoDB only wrote "
2194 			"those files full of zeros, but did not yet use "
2195 			"them in any way. But be careful: do not remove "
2196 			"old data files which contain your precious data!");
2197 
2198 		return(err);
2199 	}
2200 
2201 #ifdef UNIV_LOG_ARCHIVE
2202 	srv_normalize_path_for_win(srv_arch_dir);
2203 #endif /* UNIV_LOG_ARCHIVE */
2204 
2205 	dirnamelen = strlen(srv_log_group_home_dir);
2206 	ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
2207 	memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
2208 
2209 	/* Add a path separator if needed. */
2210 	if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
2211 		logfilename[dirnamelen++] = SRV_PATH_SEPARATOR;
2212 	}
2213 
2214 	srv_log_file_size_requested = srv_log_file_size;
2215 
2216 	if (create_new_db) {
2217 		bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
2218 		ut_a(success);
2219 
2220 		min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2221 
2222 		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2223 
2224 		err = create_log_files(create_new_db, logfilename, dirnamelen,
2225 				       max_flushed_lsn, logfile0);
2226 
2227 		if (err != DB_SUCCESS) {
2228 			return(err);
2229 		}
2230 	} else {
2231 		for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
2232 			os_offset_t	size;
2233 			os_file_stat_t	stat_info;
2234 
2235 			sprintf(logfilename + dirnamelen,
2236 				"ib_logfile%u", i);
2237 
2238 			err = os_file_get_status(
2239 				logfilename, &stat_info, false);
2240 
2241 			if (err == DB_NOT_FOUND) {
2242 				if (i == 0) {
2243 					if (max_flushed_lsn
2244 					    != min_flushed_lsn) {
2245 						ib_logf(IB_LOG_LEVEL_ERROR,
2246 							"Cannot create"
2247 							" log files because"
2248 							" data files are"
2249 							" corrupt or"
2250 							" not in sync"
2251 							" with each other");
2252 						return(DB_ERROR);
2253 					}
2254 
2255 					if (max_flushed_lsn < (lsn_t) 1000) {
2256 						ib_logf(IB_LOG_LEVEL_ERROR,
2257 							"Cannot create"
2258 							" log files because"
2259 							" data files are"
2260 							" corrupt or the"
2261 							" database was not"
2262 							" shut down cleanly"
2263 							" after creating"
2264 							" the data files.");
2265 						return(DB_ERROR);
2266 					}
2267 
2268 					err = create_log_files(
2269 						create_new_db, logfilename,
2270 						dirnamelen, max_flushed_lsn,
2271 						logfile0);
2272 
2273 					if (err != DB_SUCCESS) {
2274 						return(err);
2275 					}
2276 
2277 					create_log_files_rename(
2278 						logfilename, dirnamelen,
2279 						max_flushed_lsn, logfile0);
2280 
2281 					/* Suppress the message about
2282 					crash recovery. */
2283 					max_flushed_lsn = min_flushed_lsn
2284 						= log_get_lsn();
2285 					goto files_checked;
2286 				} else if (i < 2) {
2287 					/* must have at least 2 log files */
2288 					ib_logf(IB_LOG_LEVEL_ERROR,
2289 						"Only one log file found.");
2290 					return(err);
2291 				}
2292 
2293 				/* opened all files */
2294 				break;
2295 			}
2296 
2297 			if (!srv_file_check_mode(logfilename)) {
2298 				return(DB_ERROR);
2299 			}
2300 
2301 			err = open_log_file(&files[i], logfilename, &size);
2302 
2303 			if (err != DB_SUCCESS) {
2304 				return(err);
2305 			}
2306 
2307 			ut_a(size != (os_offset_t) -1);
2308 
2309 			if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
2310 				ib_logf(IB_LOG_LEVEL_ERROR,
2311 					"Log file %s size "
2312 					UINT64PF " is not a multiple of"
2313 					" innodb_page_size",
2314 					logfilename, size);
2315 				return(DB_ERROR);
2316 			}
2317 
2318 			size >>= UNIV_PAGE_SIZE_SHIFT;
2319 
2320 			if (i == 0) {
2321 				srv_log_file_size = size;
2322 			} else if (size != srv_log_file_size) {
2323 				ib_logf(IB_LOG_LEVEL_ERROR,
2324 					"Log file %s is"
2325 					" of different size " UINT64PF " bytes"
2326 					" than other log"
2327 					" files " UINT64PF " bytes!",
2328 					logfilename,
2329 					size << UNIV_PAGE_SIZE_SHIFT,
2330 					(os_offset_t) srv_log_file_size
2331 					<< UNIV_PAGE_SIZE_SHIFT);
2332 				return(DB_ERROR);
2333 			}
2334 		}
2335 
2336 		srv_n_log_files_found = i;
2337 
2338 		/* Create the in-memory file space objects. */
2339 
2340 		sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
2341 
2342 		fil_space_create(logfilename,
2343 				 SRV_LOG_SPACE_FIRST_ID,
2344 				 fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
2345 				 FIL_LOG);
2346 
2347 		ut_a(fil_validate());
2348 
2349 		/* srv_log_file_size is measured in pages; if page size is 16KB,
2350 		then we have a limit of 64TB on 32 bit systems */
2351 		ut_a(srv_log_file_size <= ULINT_MAX);
2352 
2353 		for (unsigned j = 0; j < i; j++) {
2354 			sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
2355 
2356 			if (!fil_node_create(logfilename,
2357 					     (ulint) srv_log_file_size,
2358 					     SRV_LOG_SPACE_FIRST_ID, FALSE)) {
2359 				return(DB_ERROR);
2360 			}
2361 		}
2362 
2363 #ifdef UNIV_LOG_ARCHIVE
2364 		/* Create the file space object for archived logs. Under
2365 		MySQL, no archiving ever done. */
2366 		fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
2367 				 0, FIL_LOG);
2368 #endif /* UNIV_LOG_ARCHIVE */
2369 		log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
2370 			       SRV_LOG_SPACE_FIRST_ID,
2371 			       SRV_LOG_SPACE_FIRST_ID + 1);
2372 	}
2373 
2374 files_checked:
2375 	/* Open all log files and data files in the system
2376 	tablespace: we keep them open until database
2377 	shutdown */
2378 
2379 	fil_open_log_and_system_tablespace_files();
2380 
2381 	err = srv_undo_tablespaces_init(
2382 		create_new_db,
2383 		srv_undo_tablespaces,
2384 		&srv_undo_tablespaces_open);
2385 
2386 	/* If the force recovery is set very high then we carry on regardless
2387 	of all errors. Basically this is fingers crossed mode. */
2388 
2389 	if (err != DB_SUCCESS
2390 	    && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
2391 
2392 		return(err);
2393 	}
2394 
2395 	/* Initialize objects used by dict stats gathering thread, which
2396 	can also be used by recovery if it tries to drop some table */
2397 	if (!srv_read_only_mode) {
2398 		dict_stats_thread_init();
2399 	}
2400 
2401 	trx_sys_file_format_init();
2402 
2403 	trx_sys_create();
2404 
2405 	bool srv_monitor_thread_started = false;
2406 
2407 	if (create_new_db) {
2408 
2409 		ut_a(!srv_read_only_mode);
2410 		init_log_online();
2411 
2412 		mtr_start(&mtr);
2413 
2414 		fsp_header_init(0, sum_of_new_sizes, &mtr);
2415 
2416 		mtr_commit(&mtr);
2417 
2418 		/* To maintain backward compatibility we create only
2419 		the first rollback segment before the double write buffer.
2420 		All the remaining rollback segments will be created later,
2421 		after the double write buffer has been created. */
2422 		trx_sys_create_sys_pages();
2423 
2424 		ib_bh = trx_sys_init_at_db_start();
2425 		n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
2426 
2427 		/* The purge system needs to create the purge view and
2428 		therefore requires that the trx_sys is inited. */
2429 
2430 		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
2431 
2432 		err = dict_create();
2433 
2434 		if (err != DB_SUCCESS) {
2435 			return(err);
2436 		}
2437 
2438 		srv_startup_is_before_trx_rollback_phase = FALSE;
2439 
2440 		bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
2441 		ut_a(success);
2442 
2443 		min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2444 
2445 		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2446 
2447 		/* Stamp the LSN to the data files. */
2448 		fil_write_flushed_lsn_to_data_files(max_flushed_lsn, 0);
2449 
2450 		fil_flush_file_spaces(FIL_TABLESPACE);
2451 
2452 		create_log_files_rename(logfilename, dirnamelen,
2453 					max_flushed_lsn, logfile0);
2454 	} else {
2455 
2456 		/* Check if we support the max format that is stamped
2457 		on the system tablespace.
2458 		Note:  We are NOT allowed to make any modifications to
2459 		the TRX_SYS_PAGE_NO page before recovery  because this
2460 		page also contains the max_trx_id etc. important system
2461 		variables that are required for recovery.  We need to
2462 		ensure that we return the system to a state where normal
2463 		recovery is guaranteed to work. We do this by
2464 		invalidating the buffer cache, this will force the
2465 		reread of the page and restoration to its last known
2466 		consistent state, this is REQUIRED for the recovery
2467 		process to work. */
2468 		err = trx_sys_file_format_max_check(
2469 			srv_max_file_format_at_startup);
2470 
2471 		if (err != DB_SUCCESS) {
2472 			return(err);
2473 		}
2474 
2475 		/* Invalidate the buffer pool to ensure that we reread
2476 		the page that we read above, during recovery.
2477 		Note that this is not as heavy weight as it seems. At
2478 		this point there will be only ONE page in the buf_LRU
2479 		and there must be no page in the buf_flush list. */
2480 		buf_pool_invalidate();
2481 
2482 		/* Start monitor thread early enough so that e.g. crash
2483 		recovery failing to find free pages in the buffer pool is
2484 		diagnosed. */
2485 		if (!srv_read_only_mode)
2486 		{
2487 			/* Create the thread which prints InnoDB monitor
2488 			info */
2489 			os_thread_create(
2490 				srv_monitor_thread,
2491 				NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
2492 
2493 			srv_monitor_thread_started = true;
2494 		}
2495 
2496 		/* We always try to do a recovery, even if the database had
2497 		been shut down normally: this is the normal startup path */
2498 
2499 		err = recv_recovery_from_checkpoint_start(
2500 			LOG_CHECKPOINT, LSN_MAX,
2501 			min_flushed_lsn, max_flushed_lsn);
2502 
2503 		if (err != DB_SUCCESS) {
2504 
2505 			return(DB_ERROR);
2506 		}
2507 
2508 		init_log_online();
2509 
2510 		/* Since the insert buffer init is in dict_boot, and the
2511 		insert buffer is needed in any disk i/o, first we call
2512 		dict_boot(). Note that trx_sys_init_at_db_start() only needs
2513 		to access space 0, and the insert buffer at this stage already
2514 		works for space 0. */
2515 
2516 		err = dict_boot();
2517 		DBUG_EXECUTE_IF("ib_dic_boot_error",
2518 				err = DB_ERROR;);
2519 		if (err != DB_SUCCESS) {
2520 			return(err);
2521 		}
2522 
2523 		ib_bh = trx_sys_init_at_db_start();
2524 		n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
2525 
2526 		/* The purge system needs to create the purge view and
2527 		therefore requires that the trx_sys is inited. */
2528 
2529 		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
2530 
2531 		/* recv_recovery_from_checkpoint_finish needs trx lists which
2532 		are initialized in trx_sys_init_at_db_start(). */
2533 
2534 		recv_recovery_from_checkpoint_finish();
2535 
2536 		if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
2537 			/* The following call is necessary for the insert
2538 			buffer to work with multiple tablespaces. We must
2539 			know the mapping between space id's and .ibd file
2540 			names.
2541 
2542 			In a crash recovery, we check that the info in data
2543 			dictionary is consistent with what we already know
2544 			about space id's from the call of
2545 			fil_load_single_table_tablespaces().
2546 
2547 			In a normal startup, we create the space objects for
2548 			every table in the InnoDB data dictionary that has
2549 			an .ibd file.
2550 
2551 			We also determine the maximum tablespace id used. */
2552 			dict_check_t	dict_check;
2553 
2554 			if (recv_needed_recovery) {
2555 				dict_check = DICT_CHECK_ALL_LOADED;
2556 			} else if (n_recovered_trx) {
2557 				dict_check = DICT_CHECK_SOME_LOADED;
2558 			} else {
2559 				dict_check = DICT_CHECK_NONE_LOADED;
2560 			}
2561 
2562 			dict_check_tablespaces_and_store_max_id(dict_check);
2563 		}
2564 
2565 		if (!srv_force_recovery
2566 		    && !recv_sys->found_corrupt_log
2567 		    && (srv_log_file_size_requested != srv_log_file_size
2568 			|| srv_n_log_files_found != srv_n_log_files)) {
2569 			/* Prepare to replace the redo log files. */
2570 
2571 			if (srv_read_only_mode) {
2572 				ib_logf(IB_LOG_LEVEL_ERROR,
2573 					"Cannot resize log files "
2574 					"in read-only mode.");
2575 				return(DB_READ_ONLY);
2576 			}
2577 
2578 			/* Clean the buffer pool. */
2579 			bool success = buf_flush_list(
2580 				ULINT_MAX, LSN_MAX, NULL);
2581 			ut_a(success);
2582 
2583 			RECOVERY_CRASH(1);
2584 
2585 			min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2586 
2587 			ib_logf(IB_LOG_LEVEL_WARN,
2588 				"Resizing redo log from %u*%u to %u*%u pages"
2589 				", LSN=" LSN_PF,
2590 				(unsigned) i,
2591 				(unsigned) srv_log_file_size,
2592 				(unsigned) srv_n_log_files,
2593 				(unsigned) srv_log_file_size_requested,
2594 				max_flushed_lsn);
2595 
2596 			buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2597 
2598 			RECOVERY_CRASH(2);
2599 
2600 			/* Flush the old log files. */
2601 			log_buffer_flush_to_disk();
2602 			/* If innodb_flush_method=O_DSYNC,
2603 			we need to explicitly flush the log buffers. */
2604 			fil_flush(SRV_LOG_SPACE_FIRST_ID);
2605 
2606 			ut_ad(max_flushed_lsn == log_get_lsn());
2607 
2608 			/* Prohibit redo log writes from any other
2609 			threads until creating a log checkpoint at the
2610 			end of create_log_files(). */
2611 			ut_d(recv_no_log_write = TRUE);
2612 			ut_ad(!buf_pool_check_no_pending_io());
2613 
2614 			RECOVERY_CRASH(3);
2615 
2616 			/* Stamp the LSN to the data files. */
2617 			fil_write_flushed_lsn_to_data_files(
2618 				max_flushed_lsn, 0);
2619 
2620 			fil_flush_file_spaces(FIL_TABLESPACE);
2621 
2622 			RECOVERY_CRASH(4);
2623 
2624 			/* Close and free the redo log files, so that
2625 			we can replace them. */
2626 			fil_close_log_files(true);
2627 
2628 			RECOVERY_CRASH(5);
2629 
2630 			/* Free the old log file space. */
2631 			log_group_close_all();
2632 
2633 			ib_logf(IB_LOG_LEVEL_WARN,
2634 				"Starting to delete and rewrite log files.");
2635 
2636 			srv_log_file_size = srv_log_file_size_requested;
2637 
2638 			err = create_log_files(create_new_db, logfilename,
2639 					       dirnamelen, max_flushed_lsn,
2640 					       logfile0);
2641 
2642 			if (err != DB_SUCCESS) {
2643 				return(err);
2644 			}
2645 
2646 			/* create_log_files() can increase system lsn that is
2647 			why FIL_PAGE_FILE_FLUSH_LSN have to be updated */
2648 			min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2649 			fil_write_flushed_lsn_to_data_files(min_flushed_lsn, 0);
2650 			fil_flush_file_spaces(FIL_TABLESPACE);
2651 
2652 			create_log_files_rename(logfilename, dirnamelen,
2653 						log_get_lsn(), logfile0);
2654 		}
2655 
2656 		srv_startup_is_before_trx_rollback_phase = FALSE;
2657 		recv_recovery_rollback_active();
2658 
2659 		/* It is possible that file_format tag has never
2660 		been set. In this case we initialize it to minimum
2661 		value.  Important to note that we can do it ONLY after
2662 		we have finished the recovery process so that the
2663 		image of TRX_SYS_PAGE_NO is not stale. */
2664 		trx_sys_file_format_tag_init();
2665 	}
2666 
2667 	if (!create_new_db && sum_of_new_sizes > 0) {
2668 		/* New data file(s) were added */
2669 		mtr_start(&mtr);
2670 
2671 		fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
2672 
2673 		mtr_commit(&mtr);
2674 
2675 		/* Immediately write the log record about increased tablespace
2676 		size to disk, so that it is durable even if mysqld would crash
2677 		quickly */
2678 
2679 		log_buffer_flush_to_disk();
2680 	}
2681 
2682 #ifdef UNIV_LOG_ARCHIVE
2683 	if (!srv_read_only_mode) {
2684 		if (!srv_log_archive_on) {
2685 			ut_a(DB_SUCCESS == log_archive_noarchivelog());
2686 		} else {
2687 			bool	start_archive;
2688 
2689 			mutex_enter(&(log_sys->mutex));
2690 
2691 			start_archive = false;
2692 
2693 			if (log_sys->archiving_state == LOG_ARCH_OFF) {
2694 				start_archive = true;
2695 			}
2696 
2697 			mutex_exit(&(log_sys->mutex));
2698 
2699 			if (start_archive) {
2700 				ut_a(DB_SUCCESS == log_archive_archivelog());
2701 			}
2702 		}
2703 	}
2704 #endif /* UNIV_LOG_ARCHIVE */
2705 
2706 	/* fprintf(stderr, "Max allowed record size %lu\n",
2707 	page_get_free_space_of_empty() / 2); */
2708 
2709 	if (buf_dblwr == NULL) {
2710 		/* Create the doublewrite buffer to a new tablespace */
2711 
2712 		buf_dblwr_create();
2713 	}
2714 
2715 	/* Here the double write buffer has already been created and so
2716 	any new rollback segments will be allocated after the double
2717 	write buffer. The default segment should already exist.
2718 	We create the new segments only if it's a new database or
2719 	the database was shutdown cleanly. */
2720 
2721 	/* Note: When creating the extra rollback segments during an upgrade
2722 	we violate the latching order, even if the change buffer is empty.
2723 	We make an exception in sync0sync.cc and check srv_is_being_started
2724 	for that violation. It cannot create a deadlock because we are still
2725 	running in single threaded mode essentially. Only the IO threads
2726 	should be running at this stage. */
2727 
2728 	ut_a(srv_undo_logs > 0);
2729 	ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS);
2730 
2731 	/* The number of rsegs that exist in InnoDB is given by status
2732 	variable srv_available_undo_logs. The number of rsegs to use can
2733 	be set using the dynamic global variable srv_undo_logs. */
2734 
2735 	srv_available_undo_logs = trx_sys_create_rsegs(
2736 		srv_undo_tablespaces, srv_undo_logs);
2737 
2738 	if (srv_available_undo_logs == ULINT_UNDEFINED) {
2739 		/* Can only happen if server is read only. */
2740 		ut_a(srv_read_only_mode);
2741 		srv_undo_logs = ULONG_UNDEFINED;
2742 	}
2743 
2744 	if (!srv_read_only_mode) {
2745 		/* Create the thread which watches the timeouts
2746 		for lock waits */
2747 		os_thread_create(
2748 			lock_wait_timeout_thread,
2749 			NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
2750 
2751 		/* Create the thread which warns of long semaphore waits */
2752 		os_thread_create(
2753 			srv_error_monitor_thread,
2754 			NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
2755 
2756 		/* Create the thread which prints InnoDB monitor info */
2757 		if (!srv_monitor_thread_started) {
2758 
2759 			os_thread_create(
2760 				srv_monitor_thread,
2761 				NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
2762 
2763 			srv_monitor_thread_started = true;
2764 		}
2765 	}
2766 
2767 	/* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
2768 	err = dict_create_or_check_foreign_constraint_tables();
2769 	if (err != DB_SUCCESS) {
2770 		return(err);
2771 	}
2772 
2773 	/* Create the SYS_TABLESPACES system table */
2774 	err = dict_create_or_check_sys_tablespace();
2775 	if (err != DB_SUCCESS) {
2776 		return(err);
2777 	}
2778 
2779 	/* Create the SYS_ZIP_DICT system table */
2780 	err = dict_create_or_check_sys_zip_dict();
2781 	if (err != DB_SUCCESS) {
2782 		return(err);
2783 	}
2784 
2785 	srv_is_being_started = FALSE;
2786 
2787 	ut_a(trx_purge_state() == PURGE_STATE_INIT);
2788 
2789 	/* Create the master thread which does purge and other utility
2790 	operations */
2791 
2792 	if (!srv_read_only_mode) {
2793 
2794 		os_thread_create(
2795 			srv_master_thread,
2796 			NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
2797 	}
2798 
2799 	if (!srv_read_only_mode
2800 	    && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
2801 
2802 		os_thread_create(
2803 			srv_purge_coordinator_thread,
2804 			NULL, thread_ids + 6 + SRV_MAX_N_IO_THREADS);
2805 
2806 		ut_a(UT_ARR_SIZE(thread_ids)
2807 		     > 6 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS);
2808 
2809 		/* We've already created the purge coordinator thread above. */
2810 		for (i = 1; i < srv_n_purge_threads; ++i) {
2811 			os_thread_create(
2812 				srv_worker_thread, NULL,
2813 				thread_ids + 6 + i + SRV_MAX_N_IO_THREADS);
2814 		}
2815 
2816 		srv_start_wait_for_purge_to_start();
2817 
2818 	} else {
2819 		purge_sys->state = PURGE_STATE_DISABLED;
2820 	}
2821 
2822 	if (!srv_read_only_mode) {
2823 		os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
2824 	}
2825 	os_thread_create(buf_flush_lru_manager_thread, NULL, NULL);
2826 
2827 #ifdef UNIV_DEBUG
2828 	/* buf_debug_prints = TRUE; */
2829 #endif /* UNIV_DEBUG */
2830 	sum_of_data_file_sizes = 0;
2831 
2832 	for (i = 0; i < srv_n_data_files; i++) {
2833 		sum_of_data_file_sizes += srv_data_file_sizes[i];
2834 	}
2835 
2836 	tablespace_size_in_header = fsp_header_get_tablespace_size();
2837 
2838 	if (!srv_read_only_mode
2839 	    && !srv_auto_extend_last_data_file
2840 	    && sum_of_data_file_sizes != tablespace_size_in_header) {
2841 
2842 		ut_print_timestamp(stderr);
2843 		fprintf(stderr,
2844 			" InnoDB: Error: tablespace size"
2845 			" stored in header is %lu pages, but\n",
2846 			(ulong) tablespace_size_in_header);
2847 		ut_print_timestamp(stderr);
2848 		fprintf(stderr,
2849 			"InnoDB: the sum of data file sizes is %lu pages\n",
2850 			(ulong) sum_of_data_file_sizes);
2851 
2852 		if (srv_force_recovery == 0
2853 		    && sum_of_data_file_sizes < tablespace_size_in_header) {
2854 			/* This is a fatal error, the tail of a tablespace is
2855 			missing */
2856 
2857 			ut_print_timestamp(stderr);
2858 			fprintf(stderr,
2859 				" InnoDB: Cannot start InnoDB."
2860 				" The tail of the system tablespace is\n");
2861 			ut_print_timestamp(stderr);
2862 			fprintf(stderr,
2863 				" InnoDB: missing. Have you edited"
2864 				" innodb_data_file_path in my.cnf in an\n");
2865 			ut_print_timestamp(stderr);
2866 			fprintf(stderr,
2867 				" InnoDB: inappropriate way, removing"
2868 				" ibdata files from there?\n");
2869 			ut_print_timestamp(stderr);
2870 			fprintf(stderr,
2871 				" InnoDB: You can set innodb_force_recovery=1"
2872 				" in my.cnf to force\n");
2873 			ut_print_timestamp(stderr);
2874 			fprintf(stderr,
2875 				" InnoDB: a startup if you are trying"
2876 				" to recover a badly corrupt database.\n");
2877 
2878 			return(DB_ERROR);
2879 		}
2880 	}
2881 
2882 	if (!srv_read_only_mode
2883 	    && srv_auto_extend_last_data_file
2884 	    && sum_of_data_file_sizes < tablespace_size_in_header) {
2885 
2886 		ut_print_timestamp(stderr);
2887 		fprintf(stderr,
2888 			" InnoDB: Error: tablespace size stored in header"
2889 			" is %lu pages, but\n",
2890 			(ulong) tablespace_size_in_header);
2891 		ut_print_timestamp(stderr);
2892 		fprintf(stderr,
2893 			" InnoDB: the sum of data file sizes"
2894 			" is only %lu pages\n",
2895 			(ulong) sum_of_data_file_sizes);
2896 
2897 		if (srv_force_recovery == 0) {
2898 
2899 			ut_print_timestamp(stderr);
2900 			fprintf(stderr,
2901 				" InnoDB: Cannot start InnoDB. The tail of"
2902 				" the system tablespace is\n");
2903 			ut_print_timestamp(stderr);
2904 			fprintf(stderr,
2905 				" InnoDB: missing. Have you edited"
2906 				" innodb_data_file_path in my.cnf in an\n");
2907 			ut_print_timestamp(stderr);
2908 			fprintf(stderr,
2909 				" InnoDB: inappropriate way, removing"
2910 				" ibdata files from there?\n");
2911 			ut_print_timestamp(stderr);
2912 			fprintf(stderr,
2913 				" InnoDB: You can set innodb_force_recovery=1"
2914 				" in my.cnf to force\n");
2915 			ut_print_timestamp(stderr);
2916 			fprintf(stderr,
2917 				" InnoDB: a startup if you are trying to"
2918 				" recover a badly corrupt database.\n");
2919 
2920 			return(DB_ERROR);
2921 		}
2922 	}
2923 
2924 	/* Check that os_fast_mutexes work as expected */
2925 	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &srv_os_test_mutex);
2926 
2927 	if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) {
2928 		ut_print_timestamp(stderr);
2929 		fprintf(stderr,
2930 			" InnoDB: Error: pthread_mutex_trylock returns"
2931 			" an unexpected value on\n");
2932 		ut_print_timestamp(stderr);
2933 		fprintf(stderr,
2934 			" InnoDB: success! Cannot continue.\n");
2935 		exit(1);
2936 	}
2937 
2938 	os_fast_mutex_unlock(&srv_os_test_mutex);
2939 
2940 	os_fast_mutex_lock(&srv_os_test_mutex);
2941 
2942 	os_fast_mutex_unlock(&srv_os_test_mutex);
2943 
2944 	os_fast_mutex_free(&srv_os_test_mutex);
2945 
2946 	if (!srv_file_per_table && srv_pass_corrupt_table) {
2947 		fprintf(stderr, "InnoDB: Warning:"
2948 			" The option innodb_file_per_table is disabled,"
2949 			" so using the option innodb_pass_corrupt_table doesn't make sense.\n");
2950 	}
2951 
2952 	if (srv_print_verbose_log) {
2953 		ib_logf(IB_LOG_LEVEL_INFO,
2954 			" Percona XtraDB (http://www.percona.com) %s started; "
2955 			"log sequence number " LSN_PF "",
2956 			INNODB_VERSION_STR, srv_start_lsn);
2957 	}
2958 
2959 	if (srv_force_recovery > 0) {
2960 		ib_logf(IB_LOG_LEVEL_INFO,
2961 			"!!! innodb_force_recovery is set to %lu !!!",
2962 			(ulong) srv_force_recovery);
2963 	}
2964 
2965 	if (srv_force_recovery == 0) {
2966 		/* In the insert buffer we may have even bigger tablespace
2967 		id's, because we may have dropped those tablespaces, but
2968 		insert buffer merge has not had time to clean the records from
2969 		the ibuf tree. */
2970 
2971 		ibuf_update_max_tablespace_id();
2972 	}
2973 
2974 	if (!srv_read_only_mode) {
2975 		/* Create the buffer pool dump/load thread */
2976 		os_thread_create(buf_dump_thread, NULL, NULL);
2977 
2978 		/* Create the dict stats gathering thread */
2979 		os_thread_create(dict_stats_thread, NULL, NULL);
2980 
2981 		/* Create the thread that will optimize the FTS sub-system. */
2982 		fts_optimize_init();
2983 	}
2984 
2985 	srv_was_started = TRUE;
2986 
2987 	return(DB_SUCCESS);
2988 }
2989 
2990 #if 0
2991 /********************************************************************
2992 Sync all FTS cache before shutdown */
2993 static
2994 void
2995 srv_fts_close(void)
2996 /*===============*/
2997 {
2998 	dict_table_t*	table;
2999 
3000 	for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3001 	     table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
3002 		fts_t*          fts = table->fts;
3003 
3004 		if (fts != NULL) {
3005 			fts_sync_table(table);
3006 		}
3007 	}
3008 
3009 	for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
3010 	     table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
3011 		fts_t*          fts = table->fts;
3012 
3013 		if (fts != NULL) {
3014 			fts_sync_table(table);
3015 		}
3016 	}
3017 }
3018 #endif
3019 
3020 /****************************************************************//**
3021 Shuts down the InnoDB database.
3022 @return	DB_SUCCESS or error code */
3023 UNIV_INTERN
3024 dberr_t
innobase_shutdown_for_mysql(void)3025 innobase_shutdown_for_mysql(void)
3026 /*=============================*/
3027 {
3028 	ulint	i;
3029 
3030 	if (!srv_was_started) {
3031 		if (srv_is_being_started) {
3032 			ib_logf(IB_LOG_LEVEL_WARN,
3033 				"Shutting down an improperly started, "
3034 				"or created database!");
3035 		}
3036 
3037 		return(DB_SUCCESS);
3038 	}
3039 
3040 	if (!srv_read_only_mode) {
3041 		/* Shutdown the FTS optimize sub system. */
3042 		fts_optimize_start_shutdown();
3043 
3044 		fts_optimize_end();
3045 	}
3046 
3047 	/* 1. Flush the buffer pool to disk, write the current lsn to
3048 	the tablespace header(s), and copy all log data to archive.
3049 	The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
3050 	just free data structures after the shutdown. */
3051 
3052 	logs_empty_and_mark_files_at_shutdown();
3053 
3054 	if (srv_conc_get_active_threads() != 0) {
3055 		ib_logf(IB_LOG_LEVEL_WARN,
3056 			"Query counter shows %ld queries still "
3057 			"inside InnoDB at shutdown",
3058 			srv_conc_get_active_threads());
3059 	}
3060 
3061 	/* 2. Make all threads created by InnoDB to exit */
3062 
3063 	srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
3064 
3065 	/* All threads end up waiting for certain events. Put those events
3066 	to the signaled state. Then the threads will exit themselves after
3067 	os_event_wait(). */
3068 
3069 	for (i = 0; i < 1000; i++) {
3070 		/* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
3071 		HERE OR EARLIER */
3072 
3073 		if (!srv_read_only_mode) {
3074 			/* a. Let the lock timeout thread exit */
3075 			os_event_set(lock_sys->timeout_event);
3076 
3077 			/* b. srv error monitor thread exits automatically,
3078 			no need to do anything here */
3079 
3080 			/* c. We wake the master thread so that it exits */
3081 			srv_wake_master_thread();
3082 
3083 			/* d. Wakeup purge threads. */
3084 			srv_purge_wakeup();
3085 		}
3086 
3087 		/* e. Exit the i/o threads */
3088 
3089 		os_aio_wake_all_threads_at_shutdown();
3090 
3091 		/* f. dict_stats_thread is signaled from
3092 		logs_empty_and_mark_files_at_shutdown() and should have
3093 		already quit or is quitting right now. */
3094 
3095 		os_rmb;
3096 		if (os_thread_count == 0) {
3097 			/* All the threads have exited or are just exiting;
3098 			NOTE that the threads may not have completed their
3099 			exit yet. Should we use pthread_join() to make sure
3100 			they have exited? If we did, we would have to
3101 			remove the pthread_detach() from
3102 			os_thread_exit().  Now we just sleep 0.1
3103 			seconds and hope that is enough! */
3104 
3105 			os_thread_sleep(100000);
3106 
3107 			break;
3108 		}
3109 
3110 		os_thread_sleep(100000);
3111 	}
3112 
3113 	if (i == 1000) {
3114 		ib_logf(IB_LOG_LEVEL_WARN,
3115 			"%lu threads created by InnoDB"
3116 			" had not exited at shutdown!",
3117 			(ulong) os_thread_count);
3118 	}
3119 
3120 	if (srv_monitor_file) {
3121 		fclose(srv_monitor_file);
3122 		srv_monitor_file = 0;
3123 		if (srv_monitor_file_name) {
3124 			unlink(srv_monitor_file_name);
3125 			mem_free(srv_monitor_file_name);
3126 		}
3127 	}
3128 
3129 	if (srv_dict_tmpfile) {
3130 		fclose(srv_dict_tmpfile);
3131 		srv_dict_tmpfile = 0;
3132 	}
3133 
3134 	if (srv_misc_tmpfile) {
3135 		fclose(srv_misc_tmpfile);
3136 		srv_misc_tmpfile = 0;
3137 	}
3138 
3139 	if (!srv_read_only_mode) {
3140 		dict_stats_thread_deinit();
3141 	}
3142 
3143 	/* This must be disabled before closing the buffer pool
3144 	and closing the data dictionary.  */
3145 	btr_search_disable();
3146 
3147 	ibuf_close();
3148 	log_online_shutdown();
3149 	log_shutdown();
3150 	trx_sys_file_format_close();
3151 	trx_sys_close();
3152 	lock_sys_close();
3153 
3154 	/* We don't create these mutexes in RO mode because we don't create
3155 	the temp files that the cover. */
3156 	if (!srv_read_only_mode) {
3157 		mutex_free(&srv_monitor_file_mutex);
3158 		mutex_free(&srv_dict_tmpfile_mutex);
3159 		mutex_free(&srv_misc_tmpfile_mutex);
3160 	}
3161 
3162 	dict_close();
3163 	btr_search_sys_free();
3164 
3165 	/* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
3166 	them */
3167 	os_aio_free();
3168 	que_close();
3169 	row_mysql_close();
3170 	srv_mon_free();
3171 	srv_free();
3172 	fil_close();
3173 
3174 	/* 4. Free all allocated memory */
3175 
3176 	pars_lexer_close();
3177 	log_mem_free();
3178 	buf_pool_free(srv_buf_pool_instances);
3179 	mem_close();
3180 	sync_close();
3181 
3182 	/* ut_free_all_mem() frees all allocated memory not freed yet
3183 	in shutdown, and it will also free the ut_list_mutex, so it
3184 	should be the last one for all operation */
3185 	ut_free_all_mem();
3186 
3187 	os_rmb;
3188 	if (os_thread_count != 0
3189 	    || os_event_count != 0
3190 	    || os_mutex_count != 0
3191 	    || os_fast_mutex_count != 0) {
3192 		ib_logf(IB_LOG_LEVEL_WARN,
3193 			"Some resources were not cleaned up in shutdown: "
3194 			"threads %lu, events %lu, os_mutexes %lu, "
3195 			"os_fast_mutexes %lu",
3196 			(ulong) os_thread_count, (ulong) os_event_count,
3197 			(ulong) os_mutex_count, (ulong) os_fast_mutex_count);
3198 	}
3199 
3200 	if (dict_foreign_err_file) {
3201 		fclose(dict_foreign_err_file);
3202 	}
3203 
3204 	if (srv_print_verbose_log) {
3205 		ib_logf(IB_LOG_LEVEL_INFO,
3206 			"Shutdown completed; log sequence number " LSN_PF "",
3207 			srv_shutdown_lsn);
3208 	}
3209 
3210 	srv_was_started = FALSE;
3211 	srv_start_has_been_called = FALSE;
3212 
3213 	return(DB_SUCCESS);
3214 }
3215 #endif /* !UNIV_HOTBACKUP */
3216 
3217 
3218 /********************************************************************
3219 Signal all per-table background threads to shutdown, and wait for them to do
3220 so. */
3221 UNIV_INTERN
3222 void
srv_shutdown_table_bg_threads(void)3223 srv_shutdown_table_bg_threads(void)
3224 /*===============================*/
3225 {
3226 	dict_table_t*	table;
3227 	dict_table_t*	first;
3228 	dict_table_t*	last = NULL;
3229 
3230 	mutex_enter(&dict_sys->mutex);
3231 
3232 	/* Signal all threads that they should stop. */
3233 	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3234 	first = table;
3235 	while (table) {
3236 		dict_table_t*	next;
3237 		fts_t*		fts = table->fts;
3238 
3239 		if (fts != NULL) {
3240 			fts_start_shutdown(table, fts);
3241 		}
3242 
3243 		next = UT_LIST_GET_NEXT(table_LRU, table);
3244 
3245 		if (!next) {
3246 			last = table;
3247 		}
3248 
3249 		table = next;
3250 	}
3251 
3252 	/* We must release dict_sys->mutex here; if we hold on to it in the
3253 	loop below, we will deadlock if any of the background threads try to
3254 	acquire it (for example, the FTS thread by calling que_eval_sql).
3255 
3256 	Releasing it here and going through dict_sys->table_LRU without
3257 	holding it is safe because:
3258 
3259 	 a) MySQL only starts the shutdown procedure after all client
3260 	 threads have been disconnected and no new ones are accepted, so no
3261 	 new tables are added or old ones dropped.
3262 
3263 	 b) Despite its name, the list is not LRU, and the order stays
3264 	 fixed.
3265 
3266 	To safeguard against the above assumptions ever changing, we store
3267 	the first and last items in the list above, and then check that
3268 	they've stayed the same below. */
3269 
3270 	mutex_exit(&dict_sys->mutex);
3271 
3272 	/* Wait for the threads of each table to stop. This is not inside
3273 	the above loop, because by signaling all the threads first we can
3274 	overlap their shutting down delays. */
3275 	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3276 	ut_a(first == table);
3277 	while (table) {
3278 		dict_table_t*	next;
3279 		fts_t*		fts = table->fts;
3280 
3281 		if (fts != NULL) {
3282 			fts_shutdown(table, fts);
3283 		}
3284 
3285 		next = UT_LIST_GET_NEXT(table_LRU, table);
3286 
3287 		if (table == last) {
3288 			ut_a(!next);
3289 		}
3290 
3291 		table = next;
3292 	}
3293 }
3294 
3295 /*****************************************************************//**
3296 Get the meta-data filename from the table name. */
3297 UNIV_INTERN
3298 void
srv_get_meta_data_filename(dict_table_t * table,char * filename,ulint max_len)3299 srv_get_meta_data_filename(
3300 /*=======================*/
3301 	dict_table_t*	table,		/*!< in: table */
3302 	char*			filename,	/*!< out: filename */
3303 	ulint			max_len)	/*!< in: filename max length */
3304 {
3305 	ulint			len;
3306 	char*			path;
3307 	char*			suffix;
3308 	static const ulint	suffix_len = strlen(".cfg");
3309 
3310 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
3311 		dict_get_and_save_data_dir_path(table, false);
3312 		ut_a(table->data_dir_path);
3313 
3314 		path = os_file_make_remote_pathname(
3315 			table->data_dir_path, table->name, "cfg");
3316 	} else {
3317 		path = fil_make_ibd_name(table->name, false);
3318 	}
3319 
3320 	ut_a(path);
3321 	len = ut_strlen(path);
3322 	ut_a(max_len >= len);
3323 
3324 	suffix = path + (len - suffix_len);
3325 	if (strncmp(suffix, ".cfg", suffix_len) == 0) {
3326 		strcpy(filename, path);
3327 	} else {
3328 		ut_ad(strncmp(suffix, ".ibd", suffix_len) == 0);
3329 
3330 		strncpy(filename, path, max_len - suffix_len);
3331 		suffix = filename + (len - suffix_len);
3332 		strcpy(suffix, ".cfg");
3333 	}
3334 
3335 	mem_free(path);
3336 
3337 	srv_normalize_path_for_win(filename);
3338 }
3339