1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved.
4 Copyright (c) 2008, Google Inc.
5 Copyright (c) 2009, Percona Inc.
6 
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12 
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19 
20 This program is free software; you can redistribute it and/or modify
21 it under the terms of the GNU General Public License, version 2.0,
22 as published by the Free Software Foundation.
23 
24 This program is also distributed with certain software (including
25 but not limited to OpenSSL) that is licensed under separate terms,
26 as designated in a particular file or component or in included license
27 documentation.  The authors of MySQL hereby grant you an additional
28 permission to link the program and your derivative works with the
29 separately licensed software that they have included with MySQL.
30 
31 This program is distributed in the hope that it will be useful,
32 but WITHOUT ANY WARRANTY; without even the implied warranty of
33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
34 GNU General Public License, version 2.0, for more details.
35 
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
39 
40 *****************************************************************************/
41 
42 /********************************************************************//**
43 @file srv/srv0start.cc
44 Starts the InnoDB database server
45 
46 Created 2/16/1996 Heikki Tuuri
47 *************************************************************************/
48 
49 #include "mysqld.h"
50 #include "pars0pars.h"
51 #include "row0ftsort.h"
52 #include "ut0mem.h"
53 #include "mem0mem.h"
54 #include "data0data.h"
55 #include "data0type.h"
56 #include "dict0dict.h"
57 #include "buf0buf.h"
58 #include "buf0dump.h"
59 #include "os0file.h"
60 #include "os0thread.h"
61 #include "fil0fil.h"
62 #include "fsp0fsp.h"
63 #include "rem0rec.h"
64 #include "mtr0mtr.h"
65 #include "log0log.h"
66 #include "log0recv.h"
67 #include "page0page.h"
68 #include "page0cur.h"
69 #include "trx0trx.h"
70 #include "trx0sys.h"
71 #include "btr0btr.h"
72 #include "btr0cur.h"
73 #include "rem0rec.h"
74 #include "ibuf0ibuf.h"
75 #include "srv0start.h"
76 #include "srv0srv.h"
77 #ifndef UNIV_HOTBACKUP
78 # include "trx0rseg.h"
79 # include "os0proc.h"
80 # include "sync0sync.h"
81 # include "buf0flu.h"
82 # include "buf0rea.h"
83 # include "dict0boot.h"
84 # include "dict0load.h"
85 # include "dict0stats_bg.h"
86 # include "que0que.h"
87 # include "usr0sess.h"
88 # include "lock0lock.h"
89 # include "trx0roll.h"
90 # include "trx0purge.h"
91 # include "lock0lock.h"
92 # include "pars0pars.h"
93 # include "btr0sea.h"
94 # include "rem0cmp.h"
95 # include "dict0crea.h"
96 # include "row0ins.h"
97 # include "row0sel.h"
98 # include "row0upd.h"
99 # include "row0row.h"
100 # include "row0mysql.h"
101 # include "btr0pcur.h"
102 # include "os0sync.h"
103 # include "zlib.h"
104 # include "ut0crc32.h"
105 
106 /** Log sequence number immediately after startup */
107 UNIV_INTERN lsn_t	srv_start_lsn;
108 /** Log sequence number at shutdown */
109 UNIV_INTERN lsn_t	srv_shutdown_lsn;
110 
111 #ifdef HAVE_DARWIN_THREADS
112 # include <sys/utsname.h>
113 /** TRUE if the F_FULLFSYNC option is available */
114 UNIV_INTERN ibool	srv_have_fullfsync = FALSE;
115 #endif
116 
117 /** TRUE if a raw partition is in use */
118 UNIV_INTERN ibool	srv_start_raw_disk_in_use = FALSE;
119 
120 /** UNDO tablespaces starts with space id. */
121 ulint	srv_undo_space_id_start;
122 
123 /** TRUE if the server is being started, before rolling back any
124 incomplete transactions */
125 UNIV_INTERN ibool	srv_startup_is_before_trx_rollback_phase = FALSE;
126 /** TRUE if the server is being started */
127 UNIV_INTERN ibool	srv_is_being_started = FALSE;
128 /** TRUE if the server was successfully started */
129 UNIV_INTERN ibool	srv_was_started = FALSE;
130 /** TRUE if innobase_start_or_create_for_mysql() has been called */
131 static ibool		srv_start_has_been_called = FALSE;
132 
133 /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
134 SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
135 UNIV_INTERN enum srv_shutdown_state	srv_shutdown_state = SRV_SHUTDOWN_NONE;
136 
137 /** Files comprising the system tablespace */
138 static pfs_os_file_t	files[1000];
139 
140 /** io_handler_thread parameters for thread identification */
141 static ulint		n[SRV_MAX_N_IO_THREADS + 6];
142 /** io_handler_thread identifiers, 32 is the maximum number of purge threads  */
143 static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32];
144 
145 /** We use this mutex to test the return value of pthread_mutex_trylock
146    on successful locking. HP-UX does NOT return 0, though Linux et al do. */
147 static os_fast_mutex_t	srv_os_test_mutex;
148 
149 /** Name of srv_monitor_file */
150 static char*	srv_monitor_file_name;
151 #endif /* !UNIV_HOTBACKUP */
152 
153 /** Default undo tablespace size in UNIV_PAGEs count (10MB). */
154 static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
155 	((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
156 
157 /** */
158 #define SRV_N_PENDING_IOS_PER_THREAD	OS_AIO_N_PENDING_IOS_PER_THREAD
159 #define SRV_MAX_N_PENDING_SYNC_IOS	100
160 
161 /** The round off to MB is similar as done in srv_parse_megabytes() */
162 #define CALC_NUMBER_OF_PAGES(size)  ((size) / (1024 * 1024)) * \
163 				  ((1024 * 1024) / (UNIV_PAGE_SIZE))
164 #ifdef UNIV_PFS_THREAD
165 /* Keys to register InnoDB threads with performance schema */
166 UNIV_INTERN mysql_pfs_key_t	io_handler_thread_key;
167 UNIV_INTERN mysql_pfs_key_t	srv_lock_timeout_thread_key;
168 UNIV_INTERN mysql_pfs_key_t	srv_error_monitor_thread_key;
169 UNIV_INTERN mysql_pfs_key_t	srv_monitor_thread_key;
170 UNIV_INTERN mysql_pfs_key_t	srv_master_thread_key;
171 UNIV_INTERN mysql_pfs_key_t	srv_purge_thread_key;
172 #endif /* UNIV_PFS_THREAD */
173 
174 /*********************************************************************//**
175 Convert a numeric string that optionally ends in G or M or K, to a number
176 containing megabytes.
177 @return	next character in string */
178 static
179 char*
srv_parse_megabytes(char * str,ulint * megs)180 srv_parse_megabytes(
181 /*================*/
182 	char*	str,	/*!< in: string containing a quantity in bytes */
183 	ulint*	megs)	/*!< out: the number in megabytes */
184 {
185 	char*	endp;
186 	ulint	size;
187 
188 	size = strtoul(str, &endp, 10);
189 
190 	str = endp;
191 
192 	switch (*str) {
193 	case 'G': case 'g':
194 		size *= 1024;
195 		/* fall through */
196 	case 'M': case 'm':
197 		str++;
198 		break;
199 	case 'K': case 'k':
200 		size /= 1024;
201 		str++;
202 		break;
203 	default:
204 		size /= 1024 * 1024;
205 		break;
206 	}
207 
208 	*megs = size;
209 	return(str);
210 }
211 
212 /*********************************************************************//**
213 Check if a file can be opened in read-write mode.
214 @return	true if it doesn't exist or can be opened in rw mode. */
215 static
216 bool
srv_file_check_mode(const char * name)217 srv_file_check_mode(
218 /*================*/
219 	const char*	name)		/*!< in: filename to check */
220 {
221 	os_file_stat_t	stat;
222 
223 	memset(&stat, 0x0, sizeof(stat));
224 
225 	dberr_t		err = os_file_get_status(name, &stat, true);
226 
227 	if (err == DB_FAIL) {
228 
229 		ib_logf(IB_LOG_LEVEL_ERROR,
230 			"os_file_get_status() failed on '%s'. Can't determine "
231 			"file permissions", name);
232 
233 		return(false);
234 
235 	} else if (err == DB_SUCCESS) {
236 
237 		/* Note: stat.rw_perm is only valid of files */
238 
239 		if (stat.type == OS_FILE_TYPE_FILE) {
240 
241 			if (!stat.rw_perm) {
242 
243 				ib_logf(IB_LOG_LEVEL_ERROR,
244 					"%s can't be opened in %s mode",
245 					name,
246 					srv_read_only_mode
247 					? "read" : "read-write");
248 
249 				return(false);
250 			}
251 		} else {
252 			/* Not a regular file, bail out. */
253 
254 			ib_logf(IB_LOG_LEVEL_ERROR,
255 				"'%s' not a regular file.", name);
256 
257 			return(false);
258 		}
259 	} else {
260 
261 		/* This is OK. If the file create fails on RO media, there
262 		is nothing we can do. */
263 
264 		ut_a(err == DB_NOT_FOUND);
265 	}
266 
267 	return(true);
268 }
269 
270 /*********************************************************************//**
271 Reads the data files and their sizes from a character string given in
272 the .cnf file.
273 @return	TRUE if ok, FALSE on parse error */
274 UNIV_INTERN
275 ibool
srv_parse_data_file_paths_and_sizes(char * str)276 srv_parse_data_file_paths_and_sizes(
277 /*================================*/
278 	char*	str)	/*!< in/out: the data file path string */
279 {
280 	char*	input_str;
281 	char*	path;
282 	ulint	size;
283 	ulint	i	= 0;
284 
285 	srv_auto_extend_last_data_file = FALSE;
286 	srv_last_file_size_max = 0;
287 	srv_data_file_names = NULL;
288 	srv_data_file_sizes = NULL;
289 	srv_data_file_is_raw_partition = NULL;
290 
291 	input_str = str;
292 
293 	/* First calculate the number of data files and check syntax:
294 	path:size[M | G];path:size[M | G]... . Note that a Windows path may
295 	contain a drive name and a ':'. */
296 
297 	while (*str != '\0') {
298 		path = str;
299 
300 		while ((*str != ':' && *str != '\0')
301 		       || (*str == ':'
302 			   && (*(str + 1) == '\\' || *(str + 1) == '/'
303 			       || *(str + 1) == ':'))) {
304 			str++;
305 		}
306 
307 		if (*str == '\0') {
308 			return(FALSE);
309 		}
310 
311 		str++;
312 
313 		str = srv_parse_megabytes(str, &size);
314 
315 		if (0 == strncmp(str, ":autoextend",
316 				 (sizeof ":autoextend") - 1)) {
317 
318 			str += (sizeof ":autoextend") - 1;
319 
320 			if (0 == strncmp(str, ":max:",
321 					 (sizeof ":max:") - 1)) {
322 
323 				str += (sizeof ":max:") - 1;
324 
325 				str = srv_parse_megabytes(str, &size);
326 			}
327 
328 			if (*str != '\0') {
329 
330 				return(FALSE);
331 			}
332 		}
333 
334 		if (strlen(str) >= 6
335 		    && *str == 'n'
336 		    && *(str + 1) == 'e'
337 		    && *(str + 2) == 'w') {
338 			str += 3;
339 		}
340 
341 		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
342 			str += 3;
343 		}
344 
345 		if (size == 0) {
346 			return(FALSE);
347 		}
348 
349 		i++;
350 
351 		if (*str == ';') {
352 			str++;
353 		} else if (*str != '\0') {
354 
355 			return(FALSE);
356 		}
357 	}
358 
359 	if (i == 0) {
360 		/* If innodb_data_file_path was defined it must contain
361 		at least one data file definition */
362 
363 		return(FALSE);
364 	}
365 
366 	srv_data_file_names = static_cast<char**>(
367 		malloc(i * sizeof *srv_data_file_names));
368 
369 	srv_data_file_sizes = static_cast<ulint*>(
370 		malloc(i * sizeof *srv_data_file_sizes));
371 
372 	srv_data_file_is_raw_partition = static_cast<ulint*>(
373 		malloc(i * sizeof *srv_data_file_is_raw_partition));
374 
375 	srv_n_data_files = i;
376 
377 	/* Then store the actual values to our arrays */
378 
379 	str = input_str;
380 	i = 0;
381 
382 	while (*str != '\0') {
383 		path = str;
384 
385 		/* Note that we must step over the ':' in a Windows path;
386 		a Windows path normally looks like C:\ibdata\ibdata1:1G, but
387 		a Windows raw partition may have a specification like
388 		\\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
389 
390 		while ((*str != ':' && *str != '\0')
391 		       || (*str == ':'
392 			   && (*(str + 1) == '\\' || *(str + 1) == '/'
393 			       || *(str + 1) == ':'))) {
394 			str++;
395 		}
396 
397 		if (*str == ':') {
398 			/* Make path a null-terminated string */
399 			*str = '\0';
400 			str++;
401 		}
402 
403 		str = srv_parse_megabytes(str, &size);
404 
405 		srv_data_file_names[i] = path;
406 		srv_data_file_sizes[i] = size;
407 
408 		if (0 == strncmp(str, ":autoextend",
409 				 (sizeof ":autoextend") - 1)) {
410 
411 			srv_auto_extend_last_data_file = TRUE;
412 
413 			str += (sizeof ":autoextend") - 1;
414 
415 			if (0 == strncmp(str, ":max:",
416 					 (sizeof ":max:") - 1)) {
417 
418 				str += (sizeof ":max:") - 1;
419 
420 				str = srv_parse_megabytes(
421 					str, &srv_last_file_size_max);
422 			}
423 
424 			if (*str != '\0') {
425 
426 				return(FALSE);
427 			}
428 		}
429 
430 		(srv_data_file_is_raw_partition)[i] = 0;
431 
432 		if (strlen(str) >= 6
433 		    && *str == 'n'
434 		    && *(str + 1) == 'e'
435 		    && *(str + 2) == 'w') {
436 			str += 3;
437 			/* Initialize new raw device only during bootstrap */
438 			(srv_data_file_is_raw_partition)[i] =
439 			opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
440 		}
441 
442 		if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
443 			str += 3;
444 
445 			/* Initialize new raw device only during bootstrap */
446 			if ((srv_data_file_is_raw_partition)[i] == 0) {
447 				(srv_data_file_is_raw_partition)[i] =
448 				opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
449 			}
450 		}
451 
452 		i++;
453 
454 		if (*str == ';') {
455 			str++;
456 		}
457 	}
458 
459 	return(TRUE);
460 }
461 
462 /*********************************************************************//**
463 Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
464 and srv_parse_log_group_home_dirs(). */
465 UNIV_INTERN
466 void
srv_free_paths_and_sizes(void)467 srv_free_paths_and_sizes(void)
468 /*==========================*/
469 {
470 	free(srv_data_file_names);
471 	srv_data_file_names = NULL;
472 	free(srv_data_file_sizes);
473 	srv_data_file_sizes = NULL;
474 	free(srv_data_file_is_raw_partition);
475 	srv_data_file_is_raw_partition = NULL;
476 }
477 
478 #ifndef UNIV_HOTBACKUP
479 /********************************************************************//**
480 I/o-handler thread function.
481 @return	OS_THREAD_DUMMY_RETURN */
482 extern "C" UNIV_INTERN
483 os_thread_ret_t
DECLARE_THREAD(io_handler_thread)484 DECLARE_THREAD(io_handler_thread)(
485 /*==============================*/
486 	void*	arg)	/*!< in: pointer to the number of the segment in
487 			the aio array */
488 {
489 	ulint	segment;
490 
491 	segment = *((ulint*) arg);
492 
493 #ifdef UNIV_DEBUG_THREAD_CREATION
494 	fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment,
495 		os_thread_pf(os_thread_get_curr_id()));
496 #endif
497 
498 #ifdef UNIV_PFS_THREAD
499 	pfs_register_thread(io_handler_thread_key);
500 #endif /* UNIV_PFS_THREAD */
501 
502 	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
503 		fil_aio_wait(segment);
504 	}
505 
506 	/* We count the number of threads in os_thread_exit(). A created
507 	thread should always use that to exit and not use return() to exit.
508 	The thread actually never comes here because it is exited in an
509 	os_event_wait(). */
510 
511 	os_thread_exit(NULL);
512 
513 	OS_THREAD_DUMMY_RETURN;
514 }
515 #endif /* !UNIV_HOTBACKUP */
516 
517 /*********************************************************************//**
518 Normalizes a directory path for Windows: converts slashes to backslashes. */
519 UNIV_INTERN
520 void
srv_normalize_path_for_win(char * str MY_ATTRIBUTE ((unused)))521 srv_normalize_path_for_win(
522 /*=======================*/
523 	char*	str MY_ATTRIBUTE((unused)))	/*!< in/out: null-terminated
524 						character string */
525 {
526 #ifdef __WIN__
527 	for (; *str; str++) {
528 
529 		if (*str == '/') {
530 			*str = '\\';
531 		}
532 	}
533 #endif
534 }
535 
536 #ifndef UNIV_HOTBACKUP
537 /*********************************************************************//**
538 Creates a log file.
539 @return	DB_SUCCESS or error code */
540 static MY_ATTRIBUTE((nonnull, warn_unused_result))
541 dberr_t
create_log_file(pfs_os_file_t * file,const char * name)542 create_log_file(
543 /*============*/
544 	pfs_os_file_t*	file,	/*!< out: file handle */
545 	const char*	name)	/*!< in: log file name */
546 {
547 	ibool		ret;
548 
549 	*file = os_file_create(
550 		innodb_file_log_key, name,
551 		OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
552 		OS_LOG_FILE, &ret);
553 
554 	if (!ret) {
555 		ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
556 		return(DB_ERROR);
557 	}
558 
559 	ib_logf(IB_LOG_LEVEL_INFO,
560 		"Setting log file %s size to %lu MB",
561 		name, (ulong) srv_log_file_size
562 		>> (20 - UNIV_PAGE_SIZE_SHIFT));
563 
564 	ret = os_file_set_size(name, *file,
565 			       (os_offset_t) srv_log_file_size
566 			       << UNIV_PAGE_SIZE_SHIFT);
567 	if (!ret) {
568 		ib_logf(IB_LOG_LEVEL_ERROR, "Cannot set log file"
569 			" %s to size %lu MB", name, (ulong) srv_log_file_size
570 			>> (20 - UNIV_PAGE_SIZE_SHIFT));
571 		return(DB_ERROR);
572 	}
573 
574 	ret = os_file_close(*file);
575 	ut_a(ret);
576 
577 	return(DB_SUCCESS);
578 }
579 
580 /** Initial number of the first redo log file */
581 #define INIT_LOG_FILE0	(SRV_N_LOG_FILES_MAX + 1)
582 
583 #ifdef DBUG_OFF
584 # define RECOVERY_CRASH(x) do {} while(0)
585 #else
586 # define RECOVERY_CRASH(x) do {						\
587 	if (srv_force_recovery_crash == x) {				\
588 		fprintf(stderr, "innodb_force_recovery_crash=%lu\n",	\
589 			srv_force_recovery_crash);			\
590 		fflush(stderr);						\
591 		exit(3);						\
592 	}								\
593 } while (0)
594 #endif
595 
596 /*********************************************************************//**
597 Creates all log files.
598 @return	DB_SUCCESS or error code */
599 static
600 dberr_t
create_log_files(bool create_new_db,char * logfilename,size_t dirnamelen,lsn_t lsn,char * & logfile0)601 create_log_files(
602 /*=============*/
603 	bool	create_new_db,	/*!< in: TRUE if new database is being
604 				created */
605 	char*	logfilename,	/*!< in/out: buffer for log file name */
606 	size_t	dirnamelen,	/*!< in: length of the directory path */
607 	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
608 	char*&	logfile0)	/*!< out: name of the first log file */
609 {
610 	if (srv_read_only_mode) {
611 		ib_logf(IB_LOG_LEVEL_ERROR,
612 			"Cannot create log files in read-only mode");
613 		return(DB_READ_ONLY);
614 	}
615 
616 	/* We prevent system tablespace creation with existing files in
617 	data directory. So we do not delete log files when creating new system
618 	tablespace */
619 	if (!create_new_db) {
620 		/* Remove any old log files. */
621 		for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
622 			sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
623 
624 			/* Ignore errors about non-existent files or files
625 			that cannot be removed. The create_log_file() will
626 			return an error when the file exists. */
627 #ifdef __WIN__
628 			DeleteFile((LPCTSTR) logfilename);
629 #else
630 			unlink(logfilename);
631 #endif
632 			/* Crashing after deleting the first
633 			file should be recoverable. The buffer
634 			pool was clean, and we can simply create
635 			all log files from the scratch. */
636 			RECOVERY_CRASH(6);
637 		}
638 	}
639 
640 	ut_ad(!buf_pool_check_no_pending_io());
641 
642 	RECOVERY_CRASH(7);
643 
644 	for (unsigned i = 0; i < srv_n_log_files; i++) {
645 		sprintf(logfilename + dirnamelen,
646 			"ib_logfile%u", i ? i : INIT_LOG_FILE0);
647 
648 		dberr_t err = create_log_file(&files[i], logfilename);
649 
650 		if (err != DB_SUCCESS) {
651 			return(err);
652 		}
653 	}
654 
655 	RECOVERY_CRASH(8);
656 
657 	/* We did not create the first log file initially as
658 	ib_logfile0, so that crash recovery cannot find it until it
659 	has been completed and renamed. */
660 	sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
661 
662 	fil_space_create(
663 		logfilename, SRV_LOG_SPACE_FIRST_ID,
664 		fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
665 		FIL_LOG);
666 	ut_a(fil_validate());
667 
668 	logfile0 = fil_node_create(
669 		logfilename, (ulint) srv_log_file_size,
670 		SRV_LOG_SPACE_FIRST_ID, FALSE);
671 	ut_a(logfile0);
672 
673 	for (unsigned i = 1; i < srv_n_log_files; i++) {
674 		sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
675 
676 		if (!fil_node_create(
677 			    logfilename,
678 			    (ulint) srv_log_file_size,
679 			    SRV_LOG_SPACE_FIRST_ID, FALSE)) {
680 			ut_error;
681 		}
682 	}
683 
684 	log_group_init(0, srv_n_log_files,
685 		       srv_log_file_size * UNIV_PAGE_SIZE,
686 		       SRV_LOG_SPACE_FIRST_ID,
687 		       SRV_LOG_SPACE_FIRST_ID + 1);
688 
689 	fil_open_log_and_system_tablespace_files();
690 
691 	/* Create a log checkpoint. */
692 	mutex_enter(&log_sys->mutex);
693 	ut_d(recv_no_log_write = FALSE);
694 	recv_reset_logs(lsn);
695 	mutex_exit(&log_sys->mutex);
696 
697 	return(DB_SUCCESS);
698 }
699 
700 /*********************************************************************//**
701 Renames the first log file. */
702 static
703 void
create_log_files_rename(char * logfilename,size_t dirnamelen,lsn_t lsn,char * logfile0)704 create_log_files_rename(
705 /*====================*/
706 	char*	logfilename,	/*!< in/out: buffer for log file name */
707 	size_t	dirnamelen,	/*!< in: length of the directory path */
708 	lsn_t	lsn,		/*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
709 	char*	logfile0)	/*!< in/out: name of the first log file */
710 {
711 	/* If innodb_flush_method=O_DSYNC,
712 	we need to explicitly flush the log buffers. */
713 	fil_flush(SRV_LOG_SPACE_FIRST_ID);
714 	/* Close the log files, so that we can rename
715 	the first one. */
716 	fil_close_log_files(false);
717 
718 	/* Rename the first log file, now that a log
719 	checkpoint has been created. */
720 	sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
721 
722 	RECOVERY_CRASH(9);
723 
724 	ib_logf(IB_LOG_LEVEL_INFO,
725 		"Renaming log file %s to %s", logfile0, logfilename);
726 
727 	mutex_enter(&log_sys->mutex);
728 	ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
729 	ibool success = os_file_rename(
730 		innodb_file_log_key, logfile0, logfilename);
731 	ut_a(success);
732 
733 	RECOVERY_CRASH(10);
734 
735 	/* Replace the first file with ib_logfile0. */
736 	strcpy(logfile0, logfilename);
737 	mutex_exit(&log_sys->mutex);
738 
739 	fil_open_log_and_system_tablespace_files();
740 
741 	ib_logf(IB_LOG_LEVEL_WARN, "New log files created, LSN=" LSN_PF, lsn);
742 }
743 
744 /*********************************************************************//**
745 Opens a log file.
746 @return	DB_SUCCESS or error code */
747 static MY_ATTRIBUTE((nonnull, warn_unused_result))
748 dberr_t
open_log_file(pfs_os_file_t * file,const char * name,os_offset_t * size)749 open_log_file(
750 /*==========*/
751 	pfs_os_file_t*	file,	/*!< out: file handle */
752 	const char*	name,	/*!< in: log file name */
753 	os_offset_t*	size)	/*!< out: file size */
754 {
755 	ibool	ret;
756 
757 	*file = os_file_create(innodb_file_log_key, name,
758 			       OS_FILE_OPEN, OS_FILE_AIO,
759 			       OS_LOG_FILE, &ret);
760 	if (!ret) {
761 		ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
762 		return(DB_ERROR);
763 	}
764 
765 	*size = os_file_get_size(*file);
766 
767 	ret = os_file_close(*file);
768 	ut_a(ret);
769 	return(DB_SUCCESS);
770 }
771 
772 /*********************************************************************//**
773 Creates or opens database data files and closes them.
774 @return	DB_SUCCESS or error code */
775 static MY_ATTRIBUTE((nonnull, warn_unused_result))
776 dberr_t
open_or_create_data_files(ibool * create_new_db,ulint * min_arch_log_no,ulint * max_arch_log_no,lsn_t * min_flushed_lsn,lsn_t * max_flushed_lsn,ulint * sum_of_new_sizes)777 open_or_create_data_files(
778 /*======================*/
779 	ibool*		create_new_db,	/*!< out: TRUE if new database should be
780 					created */
781 #ifdef UNIV_LOG_ARCHIVE
782 	ulint*		min_arch_log_no,/*!< out: min of archived log
783 					numbers in data files */
784 	ulint*		max_arch_log_no,/*!< out: max of archived log
785 					numbers in data files */
786 #endif /* UNIV_LOG_ARCHIVE */
787 	lsn_t*		min_flushed_lsn,/*!< out: min of flushed lsn
788 					values in data files */
789 	lsn_t*		max_flushed_lsn,/*!< out: max of flushed lsn
790 					values in data files */
791 	ulint*		sum_of_new_sizes)/*!< out: sum of sizes of the
792 					new files added */
793 {
794 	ibool		ret;
795 	ulint		i;
796 	ibool		one_opened	= FALSE;
797 	ibool		one_created	= FALSE;
798 	os_offset_t	size;
799 	ulint		flags;
800 	ulint		space;
801 	ulint		rounded_size_pages;
802 	char		name[10000];
803 
804 	if (srv_n_data_files >= 1000) {
805 
806 		ib_logf(IB_LOG_LEVEL_ERROR,
807 			"Can only have < 1000 data files, you have "
808 			"defined %lu", (ulong) srv_n_data_files);
809 
810 		return(DB_ERROR);
811 	}
812 
813 	*sum_of_new_sizes = 0;
814 
815 	*create_new_db = FALSE;
816 
817 	srv_normalize_path_for_win(srv_data_home);
818 
819 	for (i = 0; i < srv_n_data_files; i++) {
820 		ulint	dirnamelen;
821 
822 		srv_normalize_path_for_win(srv_data_file_names[i]);
823 		dirnamelen = strlen(srv_data_home);
824 
825 		ut_a(dirnamelen + strlen(srv_data_file_names[i])
826 		     < (sizeof name) - 1);
827 
828 		memcpy(name, srv_data_home, dirnamelen);
829 
830 		/* Add a path separator if needed. */
831 		if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
832 			name[dirnamelen++] = SRV_PATH_SEPARATOR;
833 		}
834 
835 		strcpy(name + dirnamelen, srv_data_file_names[i]);
836 
837 		/* Note: It will return true if the file doesn' exist. */
838 
839 		if (!srv_file_check_mode(name)) {
840 
841 			return(DB_FAIL);
842 
843 		} else if (srv_data_file_is_raw_partition[i] == 0) {
844 
845 			/* First we try to create the file: if it already
846 			exists, ret will get value FALSE */
847 
848 			files[i] = os_file_create(
849 				innodb_file_data_key, name, OS_FILE_CREATE,
850 				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
851 
852 			if (srv_read_only_mode) {
853 
854 				if (ret) {
855 					goto size_check;
856 				}
857 
858 				ib_logf(IB_LOG_LEVEL_ERROR,
859 					"Opening %s failed!", name);
860 
861 				return(DB_ERROR);
862 
863 			} else if (!ret
864 				   && os_file_get_last_error(false)
865 				   != OS_FILE_ALREADY_EXISTS
866 #ifdef UNIV_AIX
867 				   /* AIX 5.1 after security patch ML7 may have
868 			           errno set to 0 here, which causes our
869 				   function to return 100; work around that
870 				   AIX problem */
871 				   && os_file_get_last_error(false) != 100
872 #endif /* UNIV_AIX */
873 			    ) {
874 				ib_logf(IB_LOG_LEVEL_ERROR,
875 					"Creating or opening %s failed!",
876 					name);
877 
878 				return(DB_ERROR);
879 			}
880 
881 		} else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
882 
883 			ut_a(!srv_read_only_mode);
884 
885 			/* The partition is opened, not created; then it is
886 			written over */
887 
888 			srv_start_raw_disk_in_use = TRUE;
889 			srv_created_new_raw = TRUE;
890 
891 			files[i] = os_file_create(
892 				innodb_file_data_key, name, OS_FILE_OPEN_RAW,
893 				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
894 
895 			if (!ret) {
896 				ib_logf(IB_LOG_LEVEL_ERROR,
897 					"Error in opening %s", name);
898 
899 				return(DB_ERROR);
900 			}
901 
902 			const char*	check_msg;
903 			check_msg = fil_read_first_page(
904 				files[i], FALSE, &flags, &space,
905 #ifdef UNIV_LOG_ARCHIVE
906 				min_arch_log_no, max_arch_log_no,
907 #endif /* UNIV_LOG_ARCHIVE */
908 				min_flushed_lsn, max_flushed_lsn);
909 
910 			/* If first page is valid, don't overwrite DB.
911 			It prevents overwriting DB when mysql_install_db
912 			starts mysqld multiple times during bootstrap. */
913 			if (check_msg == NULL) {
914 
915 				srv_created_new_raw = FALSE;
916 				ret = FALSE;
917 			}
918 
919 		} else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
920 			srv_start_raw_disk_in_use = TRUE;
921 
922 			ret = FALSE;
923 		} else {
924 			ut_a(0);
925 		}
926 
927 		if (ret == FALSE) {
928 			const char* check_msg;
929 			/* We open the data file */
930 
931 			if (one_created) {
932 				ib_logf(IB_LOG_LEVEL_ERROR,
933 					"Data files can only be added at "
934 					"the end of a tablespace, but "
935 					"data file %s existed beforehand.",
936 					name);
937 				return(DB_ERROR);
938 			}
939 			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
940 				ut_a(!srv_read_only_mode);
941 				files[i] = os_file_create(
942 					innodb_file_data_key,
943 					name, OS_FILE_OPEN_RAW,
944 					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
945 			} else if (i == 0) {
946 				files[i] = os_file_create(
947 					innodb_file_data_key,
948 					name, OS_FILE_OPEN_RETRY,
949 					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
950 			} else {
951 				files[i] = os_file_create(
952 					innodb_file_data_key,
953 					name, OS_FILE_OPEN, OS_FILE_NORMAL,
954 					OS_DATA_FILE, &ret);
955 			}
956 
957 			if (!ret) {
958 
959 				os_file_get_last_error(true);
960 
961 				ib_logf(IB_LOG_LEVEL_ERROR,
962 					"Can't open '%s'", name);
963 
964 				return(DB_ERROR);
965 			}
966 
967 			if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
968 
969 				goto skip_size_check;
970 			}
971 
972 size_check:
973 			size = os_file_get_size(files[i]);
974 			ut_a(size != (os_offset_t) -1);
975 
976 			/* Under some error conditions like disk full
977 			narios or file size reaching filesystem
978 			limit the data file could contain an incomplete
979 			extent at the end. When we extend a data file
980 			and if some failure happens, then also the data
981 			file could contain an incomplete extent.  So we
982 			need to round the size downward to a megabyte.*/
983 
984 			rounded_size_pages = (ulint) CALC_NUMBER_OF_PAGES(size);
985 
986 			if (i == srv_n_data_files - 1
987 			    && srv_auto_extend_last_data_file) {
988 
989 				if (srv_data_file_sizes[i] > rounded_size_pages
990 				    || (srv_last_file_size_max > 0
991 					&& srv_last_file_size_max
992 					< rounded_size_pages)) {
993 
994 					ib_logf(IB_LOG_LEVEL_ERROR,
995 						"auto-extending "
996 						"data file %s is "
997 						"of a different size "
998 						"%lu pages (rounded "
999 						"down to MB) than specified "
1000 						"in the .cnf file: "
1001 						"initial %lu pages, "
1002 						"max %lu (relevant if "
1003 						"non-zero) pages!",
1004 						name,
1005 						(ulong) rounded_size_pages,
1006 						(ulong) srv_data_file_sizes[i],
1007 						(ulong)
1008 						srv_last_file_size_max);
1009 
1010 					return(DB_ERROR);
1011 				}
1012 
1013 				srv_data_file_sizes[i] = rounded_size_pages;
1014 			}
1015 
1016 			if (rounded_size_pages != srv_data_file_sizes[i]) {
1017 
1018 				ib_logf(IB_LOG_LEVEL_ERROR,
1019 					"Data file %s is of a different "
1020 					"size %lu pages (rounded down to MB) "
1021 					"than specified in the .cnf file "
1022 					"%lu pages!",
1023 					name,
1024 					(ulong) rounded_size_pages,
1025 					(ulong) srv_data_file_sizes[i]);
1026 
1027 				return(DB_ERROR);
1028 			}
1029 skip_size_check:
1030 
1031 			/* This is the earliest location where we can load
1032 			the double write buffer. */
1033 			if (i == 0) {
1034 				buf_dblwr_init_or_load_pages(
1035 					files[i], srv_data_file_names[i], true);
1036 			}
1037 
1038 			bool retry = true;
1039 check_first_page:
1040 			check_msg = fil_read_first_page(
1041 				files[i], one_opened, &flags, &space,
1042 #ifdef UNIV_LOG_ARCHIVE
1043 				min_arch_log_no, max_arch_log_no,
1044 #endif /* UNIV_LOG_ARCHIVE */
1045 				min_flushed_lsn, max_flushed_lsn);
1046 
1047 			if (check_msg) {
1048 
1049 				if (retry) {
1050 					fsp_open_info	fsp;
1051 					const ulint	page_no = 0;
1052 
1053 					retry = false;
1054 					fsp.id = 0;
1055 					fsp.filepath = srv_data_file_names[i];
1056 					fsp.file = files[i];
1057 
1058 					if (fil_user_tablespace_restore_page(
1059 						&fsp, page_no)) {
1060 						goto check_first_page;
1061 					}
1062 				}
1063 
1064 				ib_logf(IB_LOG_LEVEL_ERROR,
1065 						"%s in data file %s",
1066 						check_msg, name);
1067 				return(DB_ERROR);
1068 			}
1069 
1070 			/* The first file of the system tablespace must
1071 			have space ID = TRX_SYS_SPACE.  The FSP_SPACE_ID
1072 			field in files greater than ibdata1 are unreliable. */
1073 			ut_a(one_opened || space == TRX_SYS_SPACE);
1074 
1075 			/* Check the flags for the first system tablespace
1076 			file only. */
1077 			if (!one_opened
1078 			    && UNIV_PAGE_SIZE
1079 			       != fsp_flags_get_page_size(flags)) {
1080 
1081 				ib_logf(IB_LOG_LEVEL_ERROR,
1082 					"Data file \"%s\" uses page size %lu,"
1083 					"but the start-up parameter "
1084 					"is --innodb-page-size=%lu",
1085 					name,
1086 					fsp_flags_get_page_size(flags),
1087 					UNIV_PAGE_SIZE);
1088 
1089 				return(DB_ERROR);
1090 			}
1091 
1092 			one_opened = TRUE;
1093 		} else if (!srv_read_only_mode) {
1094 			/* We created the data file and now write it full of
1095 			zeros */
1096 
1097 			one_created = TRUE;
1098 
1099 			if (i > 0) {
1100 				ib_logf(IB_LOG_LEVEL_INFO,
1101 					"Data file %s did not"
1102 					" exist: new to be created",
1103 					name);
1104 			} else {
1105 				ib_logf(IB_LOG_LEVEL_INFO,
1106 					"The first specified "
1107 					"data file %s did not exist: "
1108 					"a new database to be created!",
1109 					name);
1110 
1111 				*create_new_db = TRUE;
1112 			}
1113 
1114 			ib_logf(IB_LOG_LEVEL_INFO,
1115 				"Setting file %s size to %lu MB",
1116 				name,
1117 				(ulong) (srv_data_file_sizes[i]
1118 					 >> (20 - UNIV_PAGE_SIZE_SHIFT)));
1119 
1120 			ib_logf(IB_LOG_LEVEL_INFO,
1121 				"Database physically writes the"
1122 				" file full: wait...");
1123 
1124 			ret = os_file_set_size(
1125 				name, files[i],
1126 				(os_offset_t) srv_data_file_sizes[i]
1127 				<< UNIV_PAGE_SIZE_SHIFT);
1128 
1129 			if (!ret) {
1130 				ib_logf(IB_LOG_LEVEL_ERROR,
1131 					"Error in creating %s: "
1132 					"probably out of disk space",
1133 					name);
1134 
1135 				return(DB_ERROR);
1136 			}
1137 
1138 			*sum_of_new_sizes += srv_data_file_sizes[i];
1139 		}
1140 
1141 		ret = os_file_close(files[i]);
1142 		ut_a(ret);
1143 
1144 		if (i == 0) {
1145 			flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
1146 			fil_space_create(name, 0, flags, FIL_TABLESPACE);
1147 		}
1148 
1149 		ut_a(fil_validate());
1150 
1151 		if (!fil_node_create(name, srv_data_file_sizes[i], 0,
1152 				     srv_data_file_is_raw_partition[i] != 0)) {
1153 			return(DB_ERROR);
1154 		}
1155 	}
1156 
1157 	return(DB_SUCCESS);
1158 }
1159 
1160 /*********************************************************************//**
1161 Create undo tablespace.
1162 @return	DB_SUCCESS or error code */
1163 static
1164 dberr_t
srv_undo_tablespace_create(const char * name,ulint size)1165 srv_undo_tablespace_create(
1166 /*=======================*/
1167 	const char*	name,		/*!< in: tablespace name */
1168 	ulint		size)		/*!< in: tablespace size in pages */
1169 {
1170 	pfs_os_file_t	fh;
1171 	ibool		ret;
1172 	dberr_t		err = DB_SUCCESS;
1173 
1174 	os_file_create_subdirs_if_needed(name);
1175 
1176 	fh = os_file_create(
1177 		innodb_file_data_key,
1178 		name,
1179 		srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
1180 		OS_FILE_NORMAL, OS_DATA_FILE, &ret);
1181 
1182 	if (srv_read_only_mode && ret) {
1183 		ib_logf(IB_LOG_LEVEL_INFO,
1184 			"%s opened in read-only mode", name);
1185 	} else if (ret == FALSE) {
1186 		if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
1187 #ifdef UNIV_AIX
1188 			/* AIX 5.1 after security patch ML7 may have
1189 			errno set to 0 here, which causes our function
1190 			to return 100; work around that AIX problem */
1191 		    && os_file_get_last_error(false) != 100
1192 #endif /* UNIV_AIX */
1193 		) {
1194 			ib_logf(IB_LOG_LEVEL_ERROR,
1195 				"Can't create UNDO tablespace %s", name);
1196 		} else {
1197 			ib_logf(IB_LOG_LEVEL_ERROR,
1198 				"Creating system tablespace with"
1199 				" existing undo tablespaces is not"
1200 				" supported. Please delete all undo"
1201 				" tablespaces before creating new"
1202 				" system tablespace.");
1203 		}
1204 		err = DB_ERROR;
1205 	} else {
1206 		ut_a(!srv_read_only_mode);
1207 
1208 		/* We created the data file and now write it full of zeros */
1209 
1210 		ib_logf(IB_LOG_LEVEL_INFO,
1211 			"Data file %s did not exist: new to be created",
1212 			name);
1213 
1214 		ib_logf(IB_LOG_LEVEL_INFO,
1215 			"Setting file %s size to %lu MB",
1216 			name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
1217 
1218 		ib_logf(IB_LOG_LEVEL_INFO,
1219 			"Database physically writes the file full: wait...");
1220 
1221 		ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT);
1222 
1223 		if (!ret) {
1224 			ib_logf(IB_LOG_LEVEL_INFO,
1225 				"Error in creating %s: probably out of "
1226 				"disk space", name);
1227 
1228 			err = DB_ERROR;
1229 		}
1230 
1231 		os_file_close(fh);
1232 	}
1233 
1234 	return(err);
1235 }
1236 
1237 /*********************************************************************//**
1238 Open an undo tablespace.
1239 @return	DB_SUCCESS or error code */
1240 static
1241 dberr_t
srv_undo_tablespace_open(const char * name,ulint space)1242 srv_undo_tablespace_open(
1243 /*=====================*/
1244 	const char*	name,		/*!< in: tablespace name */
1245 	ulint		space)		/*!< in: tablespace id */
1246 {
1247 	pfs_os_file_t	fh;
1248 	dberr_t		err	= DB_ERROR;
1249 	ibool		ret;
1250 	ulint		flags;
1251 
1252 	if (!srv_file_check_mode(name)) {
1253 		ib_logf(IB_LOG_LEVEL_ERROR,
1254 			"UNDO tablespaces must be %s!",
1255 			srv_read_only_mode ? "writable" : "readable");
1256 
1257 		return(DB_ERROR);
1258 	}
1259 
1260 	fh = os_file_create(
1261 		innodb_file_data_key, name,
1262 		OS_FILE_OPEN_RETRY
1263 		| OS_FILE_ON_ERROR_NO_EXIT
1264 		| OS_FILE_ON_ERROR_SILENT,
1265 		OS_FILE_NORMAL,
1266 		OS_DATA_FILE,
1267 		&ret);
1268 
1269 	/* If the file open was successful then load the tablespace. */
1270 
1271 	if (ret) {
1272 		os_offset_t	size;
1273 
1274 		size = os_file_get_size(fh);
1275 		ut_a(size != (os_offset_t) -1);
1276 
1277 		ret = os_file_close(fh);
1278 		ut_a(ret);
1279 
1280 		/* Load the tablespace into InnoDB's internal
1281 		data structures. */
1282 
1283 		/* We set the biggest space id to the undo tablespace
1284 		because InnoDB hasn't opened any other tablespace apart
1285 		from the system tablespace. */
1286 
1287 		fil_set_max_space_id_if_bigger(space);
1288 
1289 		/* Set the compressed page size to 0 (non-compressed) */
1290 		flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
1291 		fil_space_create(name, space, flags, FIL_TABLESPACE);
1292 
1293 		ut_a(fil_validate());
1294 
1295 		os_offset_t	n_pages = size / UNIV_PAGE_SIZE;
1296 
1297 		/* On 64 bit Windows ulint can be 32 bit and os_offset_t
1298 		is 64 bit. It is OK to cast the n_pages to ulint because
1299 		the unit has been scaled to pages and they are always
1300 		32 bit. */
1301 		if (fil_node_create(name, (ulint) n_pages, space, FALSE)) {
1302 			err = DB_SUCCESS;
1303 		}
1304 	}
1305 
1306 	return(err);
1307 }
1308 
1309 /********************************************************************
1310 Opens the configured number of undo tablespaces.
1311 @return	DB_SUCCESS or error code */
1312 static
1313 dberr_t
srv_undo_tablespaces_init(ibool create_new_db,const ulint n_conf_tablespaces,ulint * n_opened)1314 srv_undo_tablespaces_init(
1315 /*======================*/
1316 	ibool		create_new_db,		/*!< in: TRUE if new db being
1317 						created */
1318 	const ulint	n_conf_tablespaces,	/*!< in: configured undo
1319 						tablespaces */
1320 	ulint*		n_opened)		/*!< out: number of UNDO
1321 						tablespaces successfully
1322 						discovered and opened */
1323 {
1324 	ulint		i;
1325 	dberr_t		err = DB_SUCCESS;
1326 	ulint		prev_space_id = 0;
1327 	ulint		n_undo_tablespaces;
1328 	ulint		undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
1329 
1330 	*n_opened = 0;
1331 
1332 	ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS);
1333 
1334 	memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
1335 
1336 	/* Create the undo spaces only if we are creating a new
1337 	instance. We don't allow creating of new undo tablespaces
1338 	in an existing instance (yet).  This restriction exists because
1339 	we check in several places for SYSTEM tablespaces to be less than
1340 	the min of user defined tablespace ids. Once we implement saving
1341 	the location of the undo tablespaces and their space ids this
1342 	restriction will/should be lifted. */
1343 
1344 	for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) {
1345 		char	name[OS_FILE_MAX_PATH];
1346 		ulint	space_id  = i + 1;
1347 
1348 		DBUG_EXECUTE_IF("innodb_undo_upgrade",
1349 				space_id = i + 3;);
1350 
1351 		ut_snprintf(
1352 			name, sizeof(name),
1353 			"%s%cundo%03lu",
1354 			srv_undo_dir, SRV_PATH_SEPARATOR, space_id);
1355 
1356 		if (i == 0) {
1357 			srv_undo_space_id_start = space_id;
1358 			prev_space_id = srv_undo_space_id_start - 1;
1359 		}
1360 
1361 		undo_tablespace_ids[i] = space_id;
1362 
1363 		err = srv_undo_tablespace_create(
1364 			name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
1365 
1366 		if (err != DB_SUCCESS) {
1367 
1368 			ib_logf(IB_LOG_LEVEL_ERROR,
1369 				"Could not create undo tablespace '%s'.",
1370 				name);
1371 
1372 			return(err);
1373 		}
1374 	}
1375 
1376 	/* Get the tablespace ids of all the undo segments excluding
1377 	the system tablespace (0). If we are creating a new instance then
1378 	we build the undo_tablespace_ids ourselves since they don't
1379 	already exist. */
1380 
1381 	if (!create_new_db) {
1382 		n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces(
1383 			undo_tablespace_ids);
1384 
1385 		if (n_undo_tablespaces != 0) {
1386 			srv_undo_space_id_start = undo_tablespace_ids[0];
1387 			prev_space_id = srv_undo_space_id_start - 1;
1388 		}
1389 
1390 	} else {
1391 		n_undo_tablespaces = n_conf_tablespaces;
1392 
1393 		undo_tablespace_ids[n_conf_tablespaces] = ULINT_UNDEFINED;
1394 	}
1395 
1396 	/* Open all the undo tablespaces that are currently in use. If we
1397 	fail to open any of these it is a fatal error. The tablespace ids
1398 	should be contiguous. It is a fatal error because they are required
1399 	for recovery and are referenced by the UNDO logs (a.k.a RBS). */
1400 
1401 	for (i = 0; i < n_undo_tablespaces; ++i) {
1402 		char	name[OS_FILE_MAX_PATH];
1403 
1404 		ut_snprintf(
1405 			name, sizeof(name),
1406 			"%s%cundo%03lu",
1407 			srv_undo_dir, SRV_PATH_SEPARATOR,
1408 			undo_tablespace_ids[i]);
1409 
1410 		/* Should be no gaps in undo tablespace ids. */
1411 		ut_a(prev_space_id + 1 == undo_tablespace_ids[i]);
1412 
1413 		/* The system space id should not be in this array. */
1414 		ut_a(undo_tablespace_ids[i] != 0);
1415 		ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED);
1416 
1417 		err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
1418 
1419 		if (err != DB_SUCCESS) {
1420 
1421 			ib_logf(IB_LOG_LEVEL_ERROR,
1422 				"Unable to open undo tablespace '%s'.", name);
1423 
1424 			return(err);
1425 		}
1426 
1427 		prev_space_id = undo_tablespace_ids[i];
1428 
1429 		++*n_opened;
1430 	}
1431 
1432 	/* Open any extra unused undo tablespaces. These must be contiguous.
1433 	We stop at the first failure. These are undo tablespaces that are
1434 	not in use and therefore not required by recovery. We only check
1435 	that there are no gaps. */
1436 
1437 	for (i = prev_space_id + 1; i < TRX_SYS_N_RSEGS; ++i) {
1438 		char	name[OS_FILE_MAX_PATH];
1439 
1440 		ut_snprintf(
1441 			name, sizeof(name),
1442 			"%s%cundo%03lu", srv_undo_dir, SRV_PATH_SEPARATOR, i);
1443 
1444 		/* Undo space ids start from 1. */
1445 		err = srv_undo_tablespace_open(name, i);
1446 
1447 		if (err != DB_SUCCESS) {
1448 			break;
1449 		}
1450 
1451 		/** Note the first undo tablespace id in case of
1452 		no active undo tablespace. */
1453 		if (n_undo_tablespaces == 0) {
1454 			srv_undo_space_id_start = i;
1455 		}
1456 
1457 		++n_undo_tablespaces;
1458 
1459 		++*n_opened;
1460 	}
1461 
1462 	/** Explictly specify the srv_undo_space_id_start
1463 	as zero when there are no undo tablespaces. */
1464 	if (n_undo_tablespaces == 0) {
1465 		srv_undo_space_id_start = 0;
1466 	}
1467 
1468 	/* If the user says that there are fewer than what we find we
1469 	tolerate that discrepancy but not the inverse. Because there could
1470 	be unused undo tablespaces for future use. */
1471 
1472 	if (n_conf_tablespaces > n_undo_tablespaces) {
1473 		ut_print_timestamp(stderr);
1474 		fprintf(stderr,
1475 			" InnoDB: Expected to open %lu undo "
1476 			"tablespaces but was able\n",
1477 			n_conf_tablespaces);
1478 		ut_print_timestamp(stderr);
1479 		fprintf(stderr,
1480 			" InnoDB: to find only %lu undo "
1481 			"tablespaces.\n", n_undo_tablespaces);
1482 		ut_print_timestamp(stderr);
1483 		fprintf(stderr,
1484 			" InnoDB: Set the "
1485 			"innodb_undo_tablespaces parameter to "
1486 			"the\n");
1487 		ut_print_timestamp(stderr);
1488 		fprintf(stderr,
1489 			" InnoDB: correct value and retry. Suggested "
1490 			"value is %lu\n", n_undo_tablespaces);
1491 
1492 		return(err != DB_SUCCESS ? err : DB_ERROR);
1493 
1494 	} else  if (n_undo_tablespaces > 0) {
1495 
1496 		ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces",
1497 			n_undo_tablespaces);
1498 
1499 		if (n_conf_tablespaces == 0) {
1500 			ib_logf(IB_LOG_LEVEL_WARN,
1501 				"Using the system tablespace for all UNDO "
1502 				"logging because innodb_undo_tablespaces=0");
1503 		}
1504 	}
1505 
1506 	if (create_new_db) {
1507 		mtr_t	mtr;
1508 
1509 		mtr_start(&mtr);
1510 
1511 		/* The undo log tablespace */
1512 		for (i = 0; i < n_undo_tablespaces; ++i) {
1513 
1514 			fsp_header_init(
1515 				undo_tablespace_ids[i],
1516 				SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
1517 		}
1518 
1519 		mtr_commit(&mtr);
1520 	}
1521 
1522 	return(DB_SUCCESS);
1523 }
1524 
1525 /********************************************************************
1526 Wait for the purge thread(s) to start up. */
1527 static
1528 void
srv_start_wait_for_purge_to_start()1529 srv_start_wait_for_purge_to_start()
1530 /*===============================*/
1531 {
1532 	/* Wait for the purge coordinator and master thread to startup. */
1533 
1534 	purge_state_t	state = trx_purge_state();
1535 
1536 	ut_a(state != PURGE_STATE_DISABLED);
1537 
1538 	while (srv_shutdown_state == SRV_SHUTDOWN_NONE
1539 	       && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
1540 	       && state == PURGE_STATE_INIT) {
1541 
1542 		switch (state = trx_purge_state()) {
1543 		case PURGE_STATE_RUN:
1544 		case PURGE_STATE_STOP:
1545 			break;
1546 
1547 		case PURGE_STATE_INIT:
1548 			ib_logf(IB_LOG_LEVEL_INFO,
1549 				"Waiting for purge to start");
1550 
1551 			os_thread_sleep(50000);
1552 			break;
1553 
1554 		case PURGE_STATE_EXIT:
1555 		case PURGE_STATE_DISABLED:
1556 			ut_error;
1557 		}
1558 	}
1559 }
1560 
1561 /********************************************************************
1562 Starts InnoDB and creates a new database if database files
1563 are not found and the user wants.
1564 @return	DB_SUCCESS or error code */
1565 UNIV_INTERN
1566 dberr_t
innobase_start_or_create_for_mysql(void)1567 innobase_start_or_create_for_mysql(void)
1568 /*====================================*/
1569 {
1570 	ibool		create_new_db;
1571 	lsn_t		min_flushed_lsn;
1572 	lsn_t		max_flushed_lsn;
1573 #ifdef UNIV_LOG_ARCHIVE
1574 	ulint		min_arch_log_no;
1575 	ulint		max_arch_log_no;
1576 #endif /* UNIV_LOG_ARCHIVE */
1577 	ulint		sum_of_new_sizes;
1578 	ulint		sum_of_data_file_sizes;
1579 	ulint		tablespace_size_in_header;
1580 	dberr_t		err;
1581 	unsigned	i;
1582 	ulint		srv_n_log_files_found = srv_n_log_files;
1583 	ulint		io_limit;
1584 	mtr_t		mtr;
1585 	ib_bh_t*	ib_bh;
1586 	ulint		n_recovered_trx;
1587 	char		logfilename[10000];
1588 	char*		logfile0	= NULL;
1589 	size_t		dirnamelen;
1590 
1591 	if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
1592 		srv_read_only_mode = 1;
1593 	}
1594 
1595 	high_level_read_only = srv_read_only_mode
1596 		|| srv_force_recovery > SRV_FORCE_NO_TRX_UNDO;
1597 
1598 	if (srv_read_only_mode) {
1599 		ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode");
1600 	}
1601 
1602 #ifdef HAVE_DARWIN_THREADS
1603 # ifdef F_FULLFSYNC
1604 	/* This executable has been compiled on Mac OS X 10.3 or later.
1605 	Assume that F_FULLFSYNC is available at run-time. */
1606 	srv_have_fullfsync = TRUE;
1607 # else /* F_FULLFSYNC */
1608 	/* This executable has been compiled on Mac OS X 10.2
1609 	or earlier.  Determine if the executable is running
1610 	on Mac OS X 10.3 or later. */
1611 	struct utsname utsname;
1612 	if (uname(&utsname)) {
1613 		ut_print_timestamp(stderr);
1614 		fputs(" InnoDB: cannot determine Mac OS X version!\n", stderr);
1615 	} else {
1616 		srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
1617 	}
1618 	if (!srv_have_fullfsync) {
1619 		ut_print_timestamp(stderr);
1620 		fputs(" InnoDB: On Mac OS X, fsync() may be "
1621 		      "broken on internal drives,\n", stderr);
1622 		ut_print_timestamp(stderr);
1623 		fputs(" InnoDB: making transactions unsafe!\n", stderr);
1624 	}
1625 # endif /* F_FULLFSYNC */
1626 #endif /* HAVE_DARWIN_THREADS */
1627 
1628 	ib_logf(IB_LOG_LEVEL_INFO,
1629 		"Using %s to ref count buffer pool pages",
1630 #ifdef PAGE_ATOMIC_REF_COUNT
1631 		"atomics"
1632 #else
1633 		"mutexes"
1634 #endif /* PAGE_ATOMIC_REF_COUNT */
1635 	);
1636 
1637 
1638 	if (sizeof(ulint) != sizeof(void*)) {
1639 		ut_print_timestamp(stderr);
1640 		fprintf(stderr,
1641 			" InnoDB: Error: size of InnoDB's ulint is %lu, "
1642 			"but size of void*\n", (ulong) sizeof(ulint));
1643 		ut_print_timestamp(stderr);
1644 		fprintf(stderr,
1645 			" InnoDB: is %lu. The sizes should be the same "
1646 			"so that on a 64-bit\n",
1647 			(ulong) sizeof(void*));
1648 		ut_print_timestamp(stderr);
1649 		fprintf(stderr,
1650 			" InnoDB: platforms you can allocate more than 4 GB "
1651 			"of memory.\n");
1652 	}
1653 
1654 #ifdef UNIV_DEBUG
1655 	ut_print_timestamp(stderr);
1656 	fprintf(stderr,
1657 		" InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
1658 #endif
1659 
1660 #ifdef UNIV_IBUF_DEBUG
1661 	ut_print_timestamp(stderr);
1662 	fprintf(stderr,
1663 		" InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n");
1664 # ifdef UNIV_IBUF_COUNT_DEBUG
1665 	ut_print_timestamp(stderr);
1666 	fprintf(stderr,
1667 		" InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on "
1668 		"!!!!!!!!!\n");
1669 	ut_print_timestamp(stderr);
1670 	fprintf(stderr,
1671 		" InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG\n");
1672 # endif
1673 #endif
1674 
1675 #ifdef UNIV_BLOB_DEBUG
1676 	fprintf(stderr,
1677 		"InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n"
1678 		"InnoDB: Server restart may fail with UNIV_BLOB_DEBUG\n");
1679 #endif /* UNIV_BLOB_DEBUG */
1680 
1681 #ifdef UNIV_SYNC_DEBUG
1682 	ut_print_timestamp(stderr);
1683 	fprintf(stderr,
1684 		" InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n");
1685 #endif
1686 
1687 #ifdef UNIV_SEARCH_DEBUG
1688 	ut_print_timestamp(stderr);
1689 	fprintf(stderr,
1690 		" InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n");
1691 #endif
1692 
1693 #ifdef UNIV_LOG_LSN_DEBUG
1694 	ut_print_timestamp(stderr);
1695 	fprintf(stderr,
1696 		" InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!\n");
1697 #endif /* UNIV_LOG_LSN_DEBUG */
1698 #ifdef UNIV_MEM_DEBUG
1699 	ut_print_timestamp(stderr);
1700 	fprintf(stderr,
1701 		" InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
1702 #endif
1703 
1704 	if (srv_use_sys_malloc) {
1705 		ib_logf(IB_LOG_LEVEL_INFO,
1706 			"The InnoDB memory heap is disabled");
1707 	}
1708 
1709 #if defined(COMPILER_HINTS_ENABLED)
1710 	ib_logf(IB_LOG_LEVEL_INFO,
1711 		" InnoDB: Compiler hints enabled.");
1712 #endif /* defined(COMPILER_HINTS_ENABLED) */
1713 
1714 	ib_logf(IB_LOG_LEVEL_INFO,
1715 		"" IB_ATOMICS_STARTUP_MSG "");
1716 
1717 	ib_logf(IB_LOG_LEVEL_INFO,
1718 		"" IB_MEMORY_BARRIER_STARTUP_MSG "");
1719 
1720 #ifndef HAVE_MEMORY_BARRIER
1721 #if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__
1722 #else
1723 	ib_logf(IB_LOG_LEVEL_WARN,
1724 		"MySQL was built without a memory barrier capability on this"
1725 		" architecture, which might allow a mutex/rw_lock violation"
1726 		" under high thread concurrency. This may cause a hang.");
1727 #endif /* IA32 or AMD64 */
1728 #endif /* HAVE_MEMORY_BARRIER */
1729 
1730 	ib_logf(IB_LOG_LEVEL_INFO,
1731 		"Compressed tables use zlib " ZLIB_VERSION
1732 #ifdef UNIV_ZIP_DEBUG
1733 	      " with validation"
1734 #endif /* UNIV_ZIP_DEBUG */
1735 	      );
1736 #ifdef UNIV_ZIP_COPY
1737 	ib_logf(IB_LOG_LEVEL_INFO, "and extra copying");
1738 #endif /* UNIV_ZIP_COPY */
1739 
1740 
1741 	/* Since InnoDB does not currently clean up all its internal data
1742 	structures in MySQL Embedded Server Library server_end(), we
1743 	print an error message if someone tries to start up InnoDB a
1744 	second time during the process lifetime. */
1745 
1746 	if (srv_start_has_been_called) {
1747 		ut_print_timestamp(stderr);
1748 		fprintf(stderr, " InnoDB: Error: startup called second time "
1749 			"during the process\n");
1750 		ut_print_timestamp(stderr);
1751 		fprintf(stderr, " InnoDB: lifetime. In the MySQL Embedded "
1752 			"Server Library you\n");
1753 		ut_print_timestamp(stderr);
1754 		fprintf(stderr, " InnoDB: cannot call server_init() more "
1755 			"than once during the\n");
1756 		ut_print_timestamp(stderr);
1757 		fprintf(stderr, " InnoDB: process lifetime.\n");
1758 	}
1759 
1760 	srv_start_has_been_called = TRUE;
1761 
1762 #ifdef UNIV_DEBUG
1763 	log_do_write = TRUE;
1764 #endif /* UNIV_DEBUG */
1765 	/*	yydebug = TRUE; */
1766 
1767 	srv_is_being_started = TRUE;
1768 	srv_startup_is_before_trx_rollback_phase = TRUE;
1769 
1770 #ifdef __WIN__
1771 	switch (os_get_os_version()) {
1772 	case OS_WIN95:
1773 	case OS_WIN31:
1774 	case OS_WINNT:
1775 		/* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
1776 		and NT use simulated aio. In NT Windows provides async i/o,
1777 		but when run in conjunction with InnoDB Hot Backup, it seemed
1778 		to corrupt the data files. */
1779 
1780 		srv_use_native_aio = FALSE;
1781 		break;
1782 
1783 	case OS_WIN2000:
1784 	case OS_WINXP:
1785 		/* On 2000 and XP, async IO is available. */
1786 		srv_use_native_aio = TRUE;
1787 		break;
1788 
1789 	default:
1790 		/* Vista and later have both async IO and condition variables */
1791 		srv_use_native_aio = TRUE;
1792 		srv_use_native_conditions = TRUE;
1793 		break;
1794 	}
1795 
1796 #elif defined(LINUX_NATIVE_AIO)
1797 
1798 	if (srv_use_native_aio) {
1799 		ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO");
1800 	}
1801 #else
1802 	/* Currently native AIO is supported only on windows and linux
1803 	and that also when the support is compiled in. In all other
1804 	cases, we ignore the setting of innodb_use_native_aio. */
1805 	srv_use_native_aio = FALSE;
1806 #endif /* __WIN__ */
1807 
1808 	if (srv_file_flush_method_str == NULL) {
1809 		/* These are the default options */
1810 
1811 		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1812 
1813 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1814 #ifndef __WIN__
1815 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
1816 		srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1817 
1818 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
1819 		srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
1820 
1821 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1822 		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1823 
1824 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
1825 		srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;
1826 
1827 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1828 		srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1829 
1830 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
1831 		srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
1832 #else
1833 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
1834 		srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
1835 		srv_use_native_aio = FALSE;
1836 
1837 	} else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
1838 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1839 		srv_use_native_aio = FALSE;
1840 
1841 	} else if (0 == ut_strcmp(srv_file_flush_method_str,
1842 				  "async_unbuffered")) {
1843 		srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1844 #endif /* __WIN__ */
1845 	} else {
1846 		ib_logf(IB_LOG_LEVEL_ERROR,
1847 			"Unrecognized value %s for innodb_flush_method",
1848 			srv_file_flush_method_str);
1849 		return(DB_ERROR);
1850 	}
1851 
1852 	/* Note that the call srv_boot() also changes the values of
1853 	some variables to the units used by InnoDB internally */
1854 
1855 	/* Set the maximum number of threads which can wait for a semaphore
1856 	inside InnoDB: this is the 'sync wait array' size, as well as the
1857 	maximum number of threads that can wait in the 'srv_conc array' for
1858 	their time to enter InnoDB. */
1859 
1860 #define BUF_POOL_SIZE_THRESHOLD (1024 * 1024 * 1024)
1861 	srv_max_n_threads = 1   /* io_ibuf_thread */
1862 			    + 1 /* io_log_thread */
1863 			    + 1 /* lock_wait_timeout_thread */
1864 			    + 1 /* srv_error_monitor_thread */
1865 			    + 1 /* srv_monitor_thread */
1866 			    + 1 /* srv_master_thread */
1867 			    + 1 /* srv_purge_coordinator_thread */
1868 			    + 1 /* buf_dump_thread */
1869 			    + 1 /* dict_stats_thread */
1870 			    + 1 /* fts_optimize_thread */
1871 			    + 1 /* recv_writer_thread */
1872 			    + 1 /* buf_flush_page_cleaner_thread */
1873 			    + 1 /* trx_rollback_or_clean_all_recovered */
1874 			    + 128 /* added as margin, for use of
1875 				  InnoDB Memcached etc. */
1876 			    + max_connections
1877 			    + srv_n_read_io_threads
1878 			    + srv_n_write_io_threads
1879 			    + srv_n_purge_threads
1880 			    /* FTS Parallel Sort */
1881 			    + fts_sort_pll_degree * FTS_NUM_AUX_INDEX
1882 			      * max_connections;
1883 
1884 	if (srv_buf_pool_size < BUF_POOL_SIZE_THRESHOLD) {
1885 		/* If buffer pool is less than 1 GB,
1886 		use only one buffer pool instance */
1887 		srv_buf_pool_instances = 1;
1888 	}
1889 
1890 	srv_boot();
1891 
1892 	ib_logf(IB_LOG_LEVEL_INFO,
1893 		"%s CPU crc32 instructions",
1894 		ut_crc32_sse2_enabled ? "Using" : "Not using");
1895 
1896 	if (!srv_read_only_mode) {
1897 
1898 		mutex_create(srv_monitor_file_mutex_key,
1899 			     &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
1900 
1901 		if (srv_innodb_status) {
1902 
1903 			srv_monitor_file_name = static_cast<char*>(
1904 				mem_alloc(
1905 					strlen(fil_path_to_mysql_datadir)
1906 					+ 20 + sizeof "/innodb_status."));
1907 
1908 			sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
1909 				fil_path_to_mysql_datadir,
1910 				os_proc_get_number());
1911 
1912 			srv_monitor_file = fopen(srv_monitor_file_name, "w+");
1913 
1914 			if (!srv_monitor_file) {
1915 
1916 				ib_logf(IB_LOG_LEVEL_ERROR,
1917 					"Unable to create %s: %s",
1918 					srv_monitor_file_name,
1919 					strerror(errno));
1920 
1921 				return(DB_ERROR);
1922 			}
1923 		} else {
1924 			srv_monitor_file_name = NULL;
1925 			srv_monitor_file = os_file_create_tmpfile(NULL);
1926 
1927 			if (!srv_monitor_file) {
1928 				return(DB_ERROR);
1929 			}
1930 		}
1931 
1932 		mutex_create(srv_dict_tmpfile_mutex_key,
1933 			     &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
1934 
1935 		srv_dict_tmpfile = os_file_create_tmpfile(NULL);
1936 
1937 		if (!srv_dict_tmpfile) {
1938 			return(DB_ERROR);
1939 		}
1940 
1941 		mutex_create(srv_misc_tmpfile_mutex_key,
1942 			     &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
1943 
1944 		srv_misc_tmpfile = os_file_create_tmpfile(NULL);
1945 
1946 		if (!srv_misc_tmpfile) {
1947 			return(DB_ERROR);
1948 		}
1949 	}
1950 
1951 	/* If user has set the value of innodb_file_io_threads then
1952 	we'll emit a message telling the user that this parameter
1953 	is now deprecated. */
1954 	if (srv_n_file_io_threads != 4) {
1955 		ib_logf(IB_LOG_LEVEL_WARN,
1956 			"innodb_file_io_threads is deprecated. Please use "
1957 			"innodb_read_io_threads and innodb_write_io_threads "
1958 			"instead");
1959 	}
1960 
1961 	/* Now overwrite the value on srv_n_file_io_threads */
1962 	srv_n_file_io_threads = srv_n_read_io_threads;
1963 
1964 	if (!srv_read_only_mode) {
1965 		/* Add the log and ibuf IO threads. */
1966 		srv_n_file_io_threads += 2;
1967 		srv_n_file_io_threads += srv_n_write_io_threads;
1968 	} else {
1969 		ib_logf(IB_LOG_LEVEL_INFO,
1970 			"Disabling background IO write threads.");
1971 
1972 		srv_n_write_io_threads = 0;
1973 	}
1974 
1975 	ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
1976 
1977 	io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
1978 
1979 	/* On Windows when using native aio the number of aio requests
1980 	that a thread can handle at a given time is limited to 32
1981 	i.e.: SRV_N_PENDING_IOS_PER_THREAD */
1982 # ifdef __WIN__
1983 	if (srv_use_native_aio) {
1984 		io_limit = SRV_N_PENDING_IOS_PER_THREAD;
1985 	}
1986 # endif /* __WIN__ */
1987 
1988 	if (!os_aio_init(io_limit,
1989 			 srv_n_read_io_threads,
1990 			 srv_n_write_io_threads,
1991 			 SRV_MAX_N_PENDING_SYNC_IOS)) {
1992 
1993 		ib_logf(IB_LOG_LEVEL_ERROR,
1994 			"Fatal : Cannot initialize AIO sub-system");
1995 
1996 		return(DB_ERROR);
1997 	}
1998 
1999 	fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
2000 
2001 	double	size;
2002 	char	unit;
2003 
2004 	if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
2005 		size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024);
2006 		unit = 'G';
2007 	} else {
2008 		size = ((double) srv_buf_pool_size) / (1024 * 1024);
2009 		unit = 'M';
2010 	}
2011 
2012 	/* Print time to initialize the buffer pool */
2013 	ib_logf(IB_LOG_LEVEL_INFO,
2014 		"Initializing buffer pool, size = %.1f%c", size, unit);
2015 
2016 	err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
2017 
2018 	if (err != DB_SUCCESS) {
2019 		ib_logf(IB_LOG_LEVEL_ERROR,
2020 			"Cannot allocate memory for the buffer pool");
2021 
2022 		return(DB_ERROR);
2023 	}
2024 
2025 	ib_logf(IB_LOG_LEVEL_INFO,
2026 		"Completed initialization of buffer pool");
2027 
2028 #ifdef UNIV_DEBUG
2029 	/* We have observed deadlocks with a 5MB buffer pool but
2030 	the actual lower limit could very well be a little higher. */
2031 
2032 	if (srv_buf_pool_size <= 5 * 1024 * 1024) {
2033 
2034 		ib_logf(IB_LOG_LEVEL_INFO,
2035 			"Small buffer pool size (%luM), the flst_validate() "
2036 			"debug function can cause a deadlock if the "
2037 			"buffer pool fills up.",
2038 			srv_buf_pool_size / 1024 / 1024);
2039 	}
2040 #endif /* UNIV_DEBUG */
2041 
2042 	fsp_init();
2043 	log_init();
2044 
2045 	lock_sys_create(srv_lock_table_size);
2046 
2047 	/* Create i/o-handler threads: */
2048 
2049 	for (i = 0; i < srv_n_file_io_threads; ++i) {
2050 
2051 		n[i] = i;
2052 
2053 		os_thread_create(io_handler_thread, n + i, thread_ids + i);
2054 	}
2055 
2056 #ifdef UNIV_LOG_ARCHIVE
2057 	if (0 != ut_strcmp(srv_log_group_home_dir, srv_arch_dir)) {
2058 		ut_print_timestamp(stderr);
2059 		fprintf(stderr, " InnoDB: Error: you must set the log group home dir in my.cnf\n");
2060 		ut_print_timestamp(stderr);
2061 		fprintf(stderr, " InnoDB: the same as log arch dir.\n");
2062 
2063 		return(DB_ERROR);
2064 	}
2065 #endif /* UNIV_LOG_ARCHIVE */
2066 
2067 	if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE
2068 	    >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
2069 		/* log_block_convert_lsn_to_no() limits the returned block
2070 		number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
2071 		bytes, then we have a limit of 512 GB. If that limit is to
2072 		be raised, then log_block_convert_lsn_to_no() must be
2073 		modified. */
2074 		ib_logf(IB_LOG_LEVEL_ERROR,
2075 			"Combined size of log files must be < 512 GB");
2076 
2077 		return(DB_ERROR);
2078 	}
2079 
2080 	if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) {
2081 		/* fil_io() takes ulint as an argument and we are passing
2082 		(next_offset / UNIV_PAGE_SIZE) to it in log_group_write_buf().
2083 		So (next_offset / UNIV_PAGE_SIZE) must be less than ULINT_MAX.
2084 		So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
2085 		means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
2086 		is 64 TB on 32 bit systems. */
2087 		fprintf(stderr,
2088 			" InnoDB: Error: combined size of log files"
2089 			" must be < %lu GB\n",
2090 			ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE);
2091 
2092 		return(DB_ERROR);
2093 	}
2094 
2095 	sum_of_new_sizes = 0;
2096 
2097 	for (i = 0; i < srv_n_data_files; i++) {
2098 #ifndef __WIN__
2099 		if (sizeof(off_t) < 5
2100 		    && srv_data_file_sizes[i]
2101 		    >= (ulint) (1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
2102 			ut_print_timestamp(stderr);
2103 			fprintf(stderr,
2104 				" InnoDB: Error: file size must be < 4 GB"
2105 				" with this MySQL binary\n");
2106 			ut_print_timestamp(stderr);
2107 			fprintf(stderr,
2108 				" InnoDB: and operating system combination,"
2109 				" in some OS's < 2 GB\n");
2110 
2111 			return(DB_ERROR);
2112 		}
2113 #endif
2114 		sum_of_new_sizes += srv_data_file_sizes[i];
2115 	}
2116 
2117 	if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
2118 		ib_logf(IB_LOG_LEVEL_ERROR,
2119 			"Tablespace size must be at least 10 MB");
2120 
2121 		return(DB_ERROR);
2122 	}
2123 
2124 	recv_sys_create();
2125 	recv_sys_init(buf_pool_get_curr_size());
2126 
2127 	err = open_or_create_data_files(&create_new_db,
2128 #ifdef UNIV_LOG_ARCHIVE
2129 					&min_arch_log_no, &max_arch_log_no,
2130 #endif /* UNIV_LOG_ARCHIVE */
2131 					&min_flushed_lsn, &max_flushed_lsn,
2132 					&sum_of_new_sizes);
2133 	if (err == DB_FAIL) {
2134 
2135 		ib_logf(IB_LOG_LEVEL_ERROR,
2136 			"The system tablespace must be writable!");
2137 
2138 		return(DB_ERROR);
2139 
2140 	} else if (err != DB_SUCCESS) {
2141 
2142 		ib_logf(IB_LOG_LEVEL_ERROR,
2143 			"Could not open or create the system tablespace. If "
2144 			"you tried to add new data files to the system "
2145 			"tablespace, and it failed here, you should now "
2146 			"edit innodb_data_file_path in my.cnf back to what "
2147 			"it was, and remove the new ibdata files InnoDB "
2148 			"created in this failed attempt. InnoDB only wrote "
2149 			"those files full of zeros, but did not yet use "
2150 			"them in any way. But be careful: do not remove "
2151 			"old data files which contain your precious data!");
2152 
2153 		return(err);
2154 	}
2155 
2156 #ifdef UNIV_LOG_ARCHIVE
2157 	srv_normalize_path_for_win(srv_arch_dir);
2158 	srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
2159 #endif /* UNIV_LOG_ARCHIVE */
2160 
2161 	dirnamelen = strlen(srv_log_group_home_dir);
2162 	ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
2163 	memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
2164 
2165 	/* Add a path separator if needed. */
2166 	if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
2167 		logfilename[dirnamelen++] = SRV_PATH_SEPARATOR;
2168 	}
2169 
2170 	srv_log_file_size_requested = srv_log_file_size;
2171 
2172 	if (create_new_db) {
2173 		bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
2174 		ut_a(success);
2175 
2176 		min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2177 
2178 		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2179 
2180 		err = create_log_files(create_new_db, logfilename, dirnamelen,
2181 				       max_flushed_lsn, logfile0);
2182 
2183 		if (err != DB_SUCCESS) {
2184 			return(err);
2185 		}
2186 	} else {
2187 		for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
2188 			os_offset_t	size;
2189 			os_file_stat_t	stat_info;
2190 
2191 			sprintf(logfilename + dirnamelen,
2192 				"ib_logfile%u", i);
2193 
2194 			err = os_file_get_status(
2195 				logfilename, &stat_info, false);
2196 
2197 			if (err == DB_NOT_FOUND) {
2198 				if (i == 0) {
2199 					if (max_flushed_lsn
2200 					    != min_flushed_lsn) {
2201 						ib_logf(IB_LOG_LEVEL_ERROR,
2202 							"Cannot create"
2203 							" log files because"
2204 							" data files are"
2205 							" corrupt or"
2206 							" not in sync"
2207 							" with each other");
2208 						return(DB_ERROR);
2209 					}
2210 
2211 					if (max_flushed_lsn < (lsn_t) 1000) {
2212 						ib_logf(IB_LOG_LEVEL_ERROR,
2213 							"Cannot create"
2214 							" log files because"
2215 							" data files are"
2216 							" corrupt or the"
2217 							" database was not"
2218 							" shut down cleanly"
2219 							" after creating"
2220 							" the data files.");
2221 						return(DB_ERROR);
2222 					}
2223 
2224 					err = create_log_files(
2225 						create_new_db, logfilename,
2226 						dirnamelen, max_flushed_lsn,
2227 						logfile0);
2228 
2229 					if (err != DB_SUCCESS) {
2230 						return(err);
2231 					}
2232 
2233 					create_log_files_rename(
2234 						logfilename, dirnamelen,
2235 						max_flushed_lsn, logfile0);
2236 
2237 					/* Suppress the message about
2238 					crash recovery. */
2239 					max_flushed_lsn = min_flushed_lsn
2240 						= log_get_lsn();
2241 					goto files_checked;
2242 				} else if (i < 2) {
2243 					/* must have at least 2 log files */
2244 					ib_logf(IB_LOG_LEVEL_ERROR,
2245 						"Only one log file found.");
2246 					return(err);
2247 				}
2248 
2249 				/* opened all files */
2250 				break;
2251 			}
2252 
2253 			if (!srv_file_check_mode(logfilename)) {
2254 				return(DB_ERROR);
2255 			}
2256 
2257 			err = open_log_file(&files[i], logfilename, &size);
2258 
2259 			if (err != DB_SUCCESS) {
2260 				return(err);
2261 			}
2262 
2263 			ut_a(size != (os_offset_t) -1);
2264 
2265 			if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
2266 				ib_logf(IB_LOG_LEVEL_ERROR,
2267 					"Log file %s size "
2268 					UINT64PF " is not a multiple of"
2269 					" innodb_page_size",
2270 					logfilename, size);
2271 				return(DB_ERROR);
2272 			}
2273 
2274 			size >>= UNIV_PAGE_SIZE_SHIFT;
2275 
2276 			if (i == 0) {
2277 				srv_log_file_size = size;
2278 			} else if (size != srv_log_file_size) {
2279 				ib_logf(IB_LOG_LEVEL_ERROR,
2280 					"Log file %s is"
2281 					" of different size " UINT64PF " bytes"
2282 					" than other log"
2283 					" files " UINT64PF " bytes!",
2284 					logfilename,
2285 					size << UNIV_PAGE_SIZE_SHIFT,
2286 					(os_offset_t) srv_log_file_size
2287 					<< UNIV_PAGE_SIZE_SHIFT);
2288 				return(DB_ERROR);
2289 			}
2290 		}
2291 
2292 		srv_n_log_files_found = i;
2293 
2294 		/* Create the in-memory file space objects. */
2295 
2296 		sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
2297 
2298 		fil_space_create(logfilename,
2299 				 SRV_LOG_SPACE_FIRST_ID,
2300 				 fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
2301 				 FIL_LOG);
2302 
2303 		ut_a(fil_validate());
2304 
2305 		/* srv_log_file_size is measured in pages; if page size is 16KB,
2306 		then we have a limit of 64TB on 32 bit systems */
2307 		ut_a(srv_log_file_size <= ULINT_MAX);
2308 
2309 		for (unsigned j = 0; j < i; j++) {
2310 			sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
2311 
2312 			if (!fil_node_create(logfilename,
2313 					     (ulint) srv_log_file_size,
2314 					     SRV_LOG_SPACE_FIRST_ID, FALSE)) {
2315 				return(DB_ERROR);
2316 			}
2317 		}
2318 
2319 #ifdef UNIV_LOG_ARCHIVE
2320 		/* Create the file space object for archived logs. Under
2321 		MySQL, no archiving ever done. */
2322 		fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
2323 				 0, FIL_LOG);
2324 #endif /* UNIV_LOG_ARCHIVE */
2325 		log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
2326 			       SRV_LOG_SPACE_FIRST_ID,
2327 			       SRV_LOG_SPACE_FIRST_ID + 1);
2328 	}
2329 
2330 files_checked:
2331 	/* Open all log files and data files in the system
2332 	tablespace: we keep them open until database
2333 	shutdown */
2334 
2335 	fil_open_log_and_system_tablespace_files();
2336 
2337 	err = srv_undo_tablespaces_init(
2338 		create_new_db,
2339 		srv_undo_tablespaces,
2340 		&srv_undo_tablespaces_open);
2341 
2342 	/* If the force recovery is set very high then we carry on regardless
2343 	of all errors. Basically this is fingers crossed mode. */
2344 
2345 	if (err != DB_SUCCESS
2346 	    && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
2347 
2348 		return(err);
2349 	}
2350 
2351 	/* Initialize objects used by dict stats gathering thread, which
2352 	can also be used by recovery if it tries to drop some table */
2353 	if (!srv_read_only_mode) {
2354 		dict_stats_thread_init();
2355 	}
2356 
2357 	trx_sys_file_format_init();
2358 
2359 	trx_sys_create();
2360 
2361 	if (create_new_db) {
2362 
2363 		ut_a(!srv_read_only_mode);
2364 
2365 		mtr_start(&mtr);
2366 
2367 		fsp_header_init(0, sum_of_new_sizes, &mtr);
2368 
2369 		mtr_commit(&mtr);
2370 
2371 		/* To maintain backward compatibility we create only
2372 		the first rollback segment before the double write buffer.
2373 		All the remaining rollback segments will be created later,
2374 		after the double write buffer has been created. */
2375 		trx_sys_create_sys_pages();
2376 
2377 		ib_bh = trx_sys_init_at_db_start();
2378 		n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
2379 
2380 		/* The purge system needs to create the purge view and
2381 		therefore requires that the trx_sys is inited. */
2382 
2383 		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
2384 
2385 		err = dict_create();
2386 
2387 		if (err != DB_SUCCESS) {
2388 			return(err);
2389 		}
2390 
2391 		srv_startup_is_before_trx_rollback_phase = FALSE;
2392 
2393 		bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
2394 		ut_a(success);
2395 
2396 		min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2397 
2398 		buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2399 
2400 		/* Stamp the LSN to the data files. */
2401 		fil_write_flushed_lsn_to_data_files(max_flushed_lsn, 0);
2402 
2403 		fil_flush_file_spaces(FIL_TABLESPACE);
2404 
2405 		create_log_files_rename(logfilename, dirnamelen,
2406 					max_flushed_lsn, logfile0);
2407 #ifdef UNIV_LOG_ARCHIVE
2408 	} else if (srv_archive_recovery) {
2409 
2410 		ib_logf(IB_LOG_LEVEL_INFO,
2411 			" Starting archive recovery from a backup...");
2412 
2413 		err = recv_recovery_from_archive_start(
2414 			min_flushed_lsn, srv_archive_recovery_limit_lsn,
2415 			min_arch_log_no);
2416 		if (err != DB_SUCCESS) {
2417 
2418 			return(DB_ERROR);
2419 		}
2420 		/* Since ibuf init is in dict_boot, and ibuf is needed
2421 		in any disk i/o, first call dict_boot */
2422 
2423 		err = dict_boot();
2424 
2425 		if (err != DB_SUCCESS) {
2426 			return(err);
2427 		}
2428 
2429 		ib_bh = trx_sys_init_at_db_start();
2430 		n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
2431 
2432 		/* The purge system needs to create the purge view and
2433 		therefore requires that the trx_sys is inited. */
2434 
2435 		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
2436 
2437 		srv_startup_is_before_trx_rollback_phase = FALSE;
2438 
2439 		recv_recovery_from_archive_finish();
2440 #endif /* UNIV_LOG_ARCHIVE */
2441 	} else {
2442 
2443 		/* Check if we support the max format that is stamped
2444 		on the system tablespace.
2445 		Note:  We are NOT allowed to make any modifications to
2446 		the TRX_SYS_PAGE_NO page before recovery  because this
2447 		page also contains the max_trx_id etc. important system
2448 		variables that are required for recovery.  We need to
2449 		ensure that we return the system to a state where normal
2450 		recovery is guaranteed to work. We do this by
2451 		invalidating the buffer cache, this will force the
2452 		reread of the page and restoration to its last known
2453 		consistent state, this is REQUIRED for the recovery
2454 		process to work. */
2455 		err = trx_sys_file_format_max_check(
2456 			srv_max_file_format_at_startup);
2457 
2458 		if (err != DB_SUCCESS) {
2459 			return(err);
2460 		}
2461 
2462 		/* Invalidate the buffer pool to ensure that we reread
2463 		the page that we read above, during recovery.
2464 		Note that this is not as heavy weight as it seems. At
2465 		this point there will be only ONE page in the buf_LRU
2466 		and there must be no page in the buf_flush list. */
2467 		buf_pool_invalidate();
2468 
2469 		/* We always try to do a recovery, even if the database had
2470 		been shut down normally: this is the normal startup path */
2471 
2472 		err = recv_recovery_from_checkpoint_start(
2473 			LOG_CHECKPOINT, LSN_MAX,
2474 			min_flushed_lsn, max_flushed_lsn);
2475 
2476 		if (err != DB_SUCCESS) {
2477 
2478 			return(DB_ERROR);
2479 		}
2480 
2481 		/* Since the insert buffer init is in dict_boot, and the
2482 		insert buffer is needed in any disk i/o, first we call
2483 		dict_boot(). Note that trx_sys_init_at_db_start() only needs
2484 		to access space 0, and the insert buffer at this stage already
2485 		works for space 0. */
2486 
2487 		err = dict_boot();
2488 
2489 		if (err != DB_SUCCESS) {
2490 			return(err);
2491 		}
2492 
2493 		ib_bh = trx_sys_init_at_db_start();
2494 		n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
2495 
2496 		/* The purge system needs to create the purge view and
2497 		therefore requires that the trx_sys is inited. */
2498 
2499 		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
2500 
2501 		/* recv_recovery_from_checkpoint_finish needs trx lists which
2502 		are initialized in trx_sys_init_at_db_start(). */
2503 
2504 		recv_recovery_from_checkpoint_finish();
2505 
2506 		if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
2507 			/* The following call is necessary for the insert
2508 			buffer to work with multiple tablespaces. We must
2509 			know the mapping between space id's and .ibd file
2510 			names.
2511 
2512 			In a crash recovery, we check that the info in data
2513 			dictionary is consistent with what we already know
2514 			about space id's from the call of
2515 			fil_load_single_table_tablespaces().
2516 
2517 			In a normal startup, we create the space objects for
2518 			every table in the InnoDB data dictionary that has
2519 			an .ibd file.
2520 
2521 			We also determine the maximum tablespace id used. */
2522 			dict_check_t	dict_check;
2523 
2524 			if (recv_needed_recovery) {
2525 				dict_check = DICT_CHECK_ALL_LOADED;
2526 			} else if (n_recovered_trx) {
2527 				dict_check = DICT_CHECK_SOME_LOADED;
2528 			} else {
2529 				dict_check = DICT_CHECK_NONE_LOADED;
2530 			}
2531 
2532 			dict_check_tablespaces_and_store_max_id(dict_check);
2533 		}
2534 
2535 		if (!srv_force_recovery
2536 		    && !recv_sys->found_corrupt_log
2537 		    && (srv_log_file_size_requested != srv_log_file_size
2538 			|| srv_n_log_files_found != srv_n_log_files)) {
2539 			/* Prepare to replace the redo log files. */
2540 
2541 			if (srv_read_only_mode) {
2542 				ib_logf(IB_LOG_LEVEL_ERROR,
2543 					"Cannot resize log files "
2544 					"in read-only mode.");
2545 				return(DB_READ_ONLY);
2546 			}
2547 
2548 			/* Clean the buffer pool. */
2549 			bool success = buf_flush_list(
2550 				ULINT_MAX, LSN_MAX, NULL);
2551 			ut_a(success);
2552 
2553 			RECOVERY_CRASH(1);
2554 
2555 			min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2556 
2557 			ib_logf(IB_LOG_LEVEL_WARN,
2558 				"Resizing redo log from %u*%u to %u*%u pages"
2559 				", LSN=" LSN_PF,
2560 				(unsigned) i,
2561 				(unsigned) srv_log_file_size,
2562 				(unsigned) srv_n_log_files,
2563 				(unsigned) srv_log_file_size_requested,
2564 				max_flushed_lsn);
2565 
2566 			buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2567 
2568 			RECOVERY_CRASH(2);
2569 
2570 			/* Flush the old log files. */
2571 			log_buffer_flush_to_disk();
2572 			/* If innodb_flush_method=O_DSYNC,
2573 			we need to explicitly flush the log buffers. */
2574 			fil_flush(SRV_LOG_SPACE_FIRST_ID);
2575 
2576 			ut_ad(max_flushed_lsn == log_get_lsn());
2577 
2578 			/* Prohibit redo log writes from any other
2579 			threads until creating a log checkpoint at the
2580 			end of create_log_files(). */
2581 			ut_d(recv_no_log_write = TRUE);
2582 			ut_ad(!buf_pool_check_no_pending_io());
2583 
2584 			RECOVERY_CRASH(3);
2585 
2586 			/* Stamp the LSN to the data files. */
2587 			fil_write_flushed_lsn_to_data_files(
2588 				max_flushed_lsn, 0);
2589 
2590 			fil_flush_file_spaces(FIL_TABLESPACE);
2591 
2592 			RECOVERY_CRASH(4);
2593 
2594 			/* Close and free the redo log files, so that
2595 			we can replace them. */
2596 			fil_close_log_files(true);
2597 
2598 			RECOVERY_CRASH(5);
2599 
2600 			/* Free the old log file space. */
2601 			log_group_close_all();
2602 
2603 			ib_logf(IB_LOG_LEVEL_WARN,
2604 				"Starting to delete and rewrite log files.");
2605 
2606 			srv_log_file_size = srv_log_file_size_requested;
2607 
2608 			err = create_log_files(create_new_db, logfilename,
2609 					       dirnamelen, max_flushed_lsn,
2610 					       logfile0);
2611 
2612 			if (err != DB_SUCCESS) {
2613 				return(err);
2614 			}
2615 
2616 			create_log_files_rename(logfilename, dirnamelen,
2617 						max_flushed_lsn, logfile0);
2618 		}
2619 
2620 		srv_startup_is_before_trx_rollback_phase = FALSE;
2621 		recv_recovery_rollback_active();
2622 
2623 		/* It is possible that file_format tag has never
2624 		been set. In this case we initialize it to minimum
2625 		value.  Important to note that we can do it ONLY after
2626 		we have finished the recovery process so that the
2627 		image of TRX_SYS_PAGE_NO is not stale. */
2628 		trx_sys_file_format_tag_init();
2629 	}
2630 
2631 	if (!create_new_db && sum_of_new_sizes > 0) {
2632 		/* New data file(s) were added */
2633 		mtr_start(&mtr);
2634 
2635 		fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
2636 
2637 		mtr_commit(&mtr);
2638 
2639 		/* Immediately write the log record about increased tablespace
2640 		size to disk, so that it is durable even if mysqld would crash
2641 		quickly */
2642 
2643 		log_buffer_flush_to_disk();
2644 	}
2645 
2646 #ifdef UNIV_LOG_ARCHIVE
2647 	/* Archiving is always off under MySQL */
2648 	if (!srv_log_archive_on) {
2649 		ut_a(DB_SUCCESS == log_archive_noarchivelog());
2650 	} else {
2651 		mutex_enter(&(log_sys->mutex));
2652 
2653 		start_archive = FALSE;
2654 
2655 		if (log_sys->archiving_state == LOG_ARCH_OFF) {
2656 			start_archive = TRUE;
2657 		}
2658 
2659 		mutex_exit(&(log_sys->mutex));
2660 
2661 		if (start_archive) {
2662 			ut_a(DB_SUCCESS == log_archive_archivelog());
2663 		}
2664 	}
2665 #endif /* UNIV_LOG_ARCHIVE */
2666 
2667 	/* fprintf(stderr, "Max allowed record size %lu\n",
2668 	page_get_free_space_of_empty() / 2); */
2669 
2670 	if (buf_dblwr == NULL) {
2671 		/* Create the doublewrite buffer to a new tablespace */
2672 
2673 		buf_dblwr_create();
2674 	}
2675 
2676 	/* Here the double write buffer has already been created and so
2677 	any new rollback segments will be allocated after the double
2678 	write buffer. The default segment should already exist.
2679 	We create the new segments only if it's a new database or
2680 	the database was shutdown cleanly. */
2681 
2682 	/* Note: When creating the extra rollback segments during an upgrade
2683 	we violate the latching order, even if the change buffer is empty.
2684 	We make an exception in sync0sync.cc and check srv_is_being_started
2685 	for that violation. It cannot create a deadlock because we are still
2686 	running in single threaded mode essentially. Only the IO threads
2687 	should be running at this stage. */
2688 
2689 	ut_a(srv_undo_logs > 0);
2690 	ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS);
2691 
2692 	/* The number of rsegs that exist in InnoDB is given by status
2693 	variable srv_available_undo_logs. The number of rsegs to use can
2694 	be set using the dynamic global variable srv_undo_logs. */
2695 
2696 	srv_available_undo_logs = trx_sys_create_rsegs(
2697 		srv_undo_tablespaces, srv_undo_logs);
2698 
2699 	if (srv_available_undo_logs == ULINT_UNDEFINED) {
2700 		/* Can only happen if server is read only. */
2701 		ut_a(srv_read_only_mode);
2702 		srv_undo_logs = ULONG_UNDEFINED;
2703 	}
2704 
2705 	if (!srv_read_only_mode) {
2706 		/* Create the thread which watches the timeouts
2707 		for lock waits */
2708 		os_thread_create(
2709 			lock_wait_timeout_thread,
2710 			NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
2711 
2712 		/* Create the thread which warns of long semaphore waits */
2713 		os_thread_create(
2714 			srv_error_monitor_thread,
2715 			NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
2716 
2717 		/* Create the thread which prints InnoDB monitor info */
2718 		os_thread_create(
2719 			srv_monitor_thread,
2720 			NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
2721 	}
2722 
2723 	/* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
2724 	err = dict_create_or_check_foreign_constraint_tables();
2725 	if (err != DB_SUCCESS) {
2726 		return(err);
2727 	}
2728 
2729 	/* Create the SYS_TABLESPACES system table */
2730 	err = dict_create_or_check_sys_tablespace();
2731 	if (err != DB_SUCCESS) {
2732 		return(err);
2733 	}
2734 
2735 	srv_is_being_started = FALSE;
2736 
2737 	ut_a(trx_purge_state() == PURGE_STATE_INIT);
2738 
2739 	/* Create the master thread which does purge and other utility
2740 	operations */
2741 
2742 	if (!srv_read_only_mode) {
2743 
2744 		os_thread_create(
2745 			srv_master_thread,
2746 			NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
2747 	}
2748 
2749 	if (!srv_read_only_mode
2750 	    && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
2751 
2752 		os_thread_create(
2753 			srv_purge_coordinator_thread,
2754 			NULL, thread_ids + 5 + SRV_MAX_N_IO_THREADS);
2755 
2756 		ut_a(UT_ARR_SIZE(thread_ids)
2757 		     > 5 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS);
2758 
2759 		/* We've already created the purge coordinator thread above. */
2760 		for (i = 1; i < srv_n_purge_threads; ++i) {
2761 			os_thread_create(
2762 				srv_worker_thread, NULL,
2763 				thread_ids + 5 + i + SRV_MAX_N_IO_THREADS);
2764 		}
2765 
2766 		srv_start_wait_for_purge_to_start();
2767 
2768 	} else {
2769 		purge_sys->state = PURGE_STATE_DISABLED;
2770 	}
2771 
2772 	if (!srv_read_only_mode) {
2773 		os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
2774 	}
2775 
2776 #ifdef UNIV_DEBUG
2777 	/* buf_debug_prints = TRUE; */
2778 #endif /* UNIV_DEBUG */
2779 	sum_of_data_file_sizes = 0;
2780 
2781 	for (i = 0; i < srv_n_data_files; i++) {
2782 		sum_of_data_file_sizes += srv_data_file_sizes[i];
2783 	}
2784 
2785 	tablespace_size_in_header = fsp_header_get_tablespace_size();
2786 
2787 	if (!srv_read_only_mode
2788 	    && !srv_auto_extend_last_data_file
2789 	    && sum_of_data_file_sizes != tablespace_size_in_header) {
2790 
2791 		ut_print_timestamp(stderr);
2792 		fprintf(stderr,
2793 			" InnoDB: Error: tablespace size"
2794 			" stored in header is %lu pages, but\n",
2795 			(ulong) tablespace_size_in_header);
2796 		ut_print_timestamp(stderr);
2797 		fprintf(stderr,
2798 			"InnoDB: the sum of data file sizes is %lu pages\n",
2799 			(ulong) sum_of_data_file_sizes);
2800 
2801 		if (srv_force_recovery == 0
2802 		    && sum_of_data_file_sizes < tablespace_size_in_header) {
2803 			/* This is a fatal error, the tail of a tablespace is
2804 			missing */
2805 
2806 			ut_print_timestamp(stderr);
2807 			fprintf(stderr,
2808 				" InnoDB: Cannot start InnoDB."
2809 				" The tail of the system tablespace is\n");
2810 			ut_print_timestamp(stderr);
2811 			fprintf(stderr,
2812 				" InnoDB: missing. Have you edited"
2813 				" innodb_data_file_path in my.cnf in an\n");
2814 			ut_print_timestamp(stderr);
2815 			fprintf(stderr,
2816 				" InnoDB: inappropriate way, removing"
2817 				" ibdata files from there?\n");
2818 			ut_print_timestamp(stderr);
2819 			fprintf(stderr,
2820 				" InnoDB: You can set innodb_force_recovery=1"
2821 				" in my.cnf to force\n");
2822 			ut_print_timestamp(stderr);
2823 			fprintf(stderr,
2824 				" InnoDB: a startup if you are trying"
2825 				" to recover a badly corrupt database.\n");
2826 
2827 			return(DB_ERROR);
2828 		}
2829 	}
2830 
2831 	if (!srv_read_only_mode
2832 	    && srv_auto_extend_last_data_file
2833 	    && sum_of_data_file_sizes < tablespace_size_in_header) {
2834 
2835 		ut_print_timestamp(stderr);
2836 		fprintf(stderr,
2837 			" InnoDB: Error: tablespace size stored in header"
2838 			" is %lu pages, but\n",
2839 			(ulong) tablespace_size_in_header);
2840 		ut_print_timestamp(stderr);
2841 		fprintf(stderr,
2842 			" InnoDB: the sum of data file sizes"
2843 			" is only %lu pages\n",
2844 			(ulong) sum_of_data_file_sizes);
2845 
2846 		if (srv_force_recovery == 0) {
2847 
2848 			ut_print_timestamp(stderr);
2849 			fprintf(stderr,
2850 				" InnoDB: Cannot start InnoDB. The tail of"
2851 				" the system tablespace is\n");
2852 			ut_print_timestamp(stderr);
2853 			fprintf(stderr,
2854 				" InnoDB: missing. Have you edited"
2855 				" innodb_data_file_path in my.cnf in an\n");
2856 			ut_print_timestamp(stderr);
2857 			fprintf(stderr,
2858 				" InnoDB: inappropriate way, removing"
2859 				" ibdata files from there?\n");
2860 			ut_print_timestamp(stderr);
2861 			fprintf(stderr,
2862 				" InnoDB: You can set innodb_force_recovery=1"
2863 				" in my.cnf to force\n");
2864 			ut_print_timestamp(stderr);
2865 			fprintf(stderr,
2866 				" InnoDB: a startup if you are trying to"
2867 				" recover a badly corrupt database.\n");
2868 
2869 			return(DB_ERROR);
2870 		}
2871 	}
2872 
2873 	/* Check that os_fast_mutexes work as expected */
2874 	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &srv_os_test_mutex);
2875 
2876 	if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) {
2877 		ut_print_timestamp(stderr);
2878 		fprintf(stderr,
2879 			" InnoDB: Error: pthread_mutex_trylock returns"
2880 			" an unexpected value on\n");
2881 		ut_print_timestamp(stderr);
2882 		fprintf(stderr,
2883 			" InnoDB: success! Cannot continue.\n");
2884 		exit(1);
2885 	}
2886 
2887 	os_fast_mutex_unlock(&srv_os_test_mutex);
2888 
2889 	os_fast_mutex_lock(&srv_os_test_mutex);
2890 
2891 	os_fast_mutex_unlock(&srv_os_test_mutex);
2892 
2893 	os_fast_mutex_free(&srv_os_test_mutex);
2894 
2895 	if (srv_print_verbose_log) {
2896 		ib_logf(IB_LOG_LEVEL_INFO,
2897 			"%s started; log sequence number " LSN_PF "",
2898 			INNODB_VERSION_STR, srv_start_lsn);
2899 	}
2900 
2901 	if (srv_force_recovery > 0) {
2902 		ib_logf(IB_LOG_LEVEL_INFO,
2903 			"!!! innodb_force_recovery is set to %lu !!!",
2904 			(ulong) srv_force_recovery);
2905 	}
2906 
2907 	if (srv_force_recovery == 0) {
2908 		/* In the insert buffer we may have even bigger tablespace
2909 		id's, because we may have dropped those tablespaces, but
2910 		insert buffer merge has not had time to clean the records from
2911 		the ibuf tree. */
2912 
2913 		ibuf_update_max_tablespace_id();
2914 	}
2915 
2916 	if (!srv_read_only_mode) {
2917 		/* Create the buffer pool dump/load thread */
2918 		os_thread_create(buf_dump_thread, NULL, NULL);
2919 
2920 		/* Create the dict stats gathering thread */
2921 		os_thread_create(dict_stats_thread, NULL, NULL);
2922 
2923 		/* Create the thread that will optimize the FTS sub-system. */
2924 		fts_optimize_init();
2925 	}
2926 
2927 	srv_was_started = TRUE;
2928 
2929 	return(DB_SUCCESS);
2930 }
2931 
2932 #if 0
2933 /********************************************************************
2934 Sync all FTS cache before shutdown */
2935 static
2936 void
2937 srv_fts_close(void)
2938 /*===============*/
2939 {
2940 	dict_table_t*	table;
2941 
2942 	for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
2943 	     table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
2944 		fts_t*          fts = table->fts;
2945 
2946 		if (fts != NULL) {
2947 			fts_sync_table(table);
2948 		}
2949 	}
2950 
2951 	for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
2952 	     table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
2953 		fts_t*          fts = table->fts;
2954 
2955 		if (fts != NULL) {
2956 			fts_sync_table(table);
2957 		}
2958 	}
2959 }
2960 #endif
2961 
2962 /****************************************************************//**
2963 Shuts down the InnoDB database.
2964 @return	DB_SUCCESS or error code */
2965 UNIV_INTERN
2966 dberr_t
innobase_shutdown_for_mysql(void)2967 innobase_shutdown_for_mysql(void)
2968 /*=============================*/
2969 {
2970 	ulint	i;
2971 
2972 	if (!srv_was_started) {
2973 		if (srv_is_being_started) {
2974 			ib_logf(IB_LOG_LEVEL_WARN,
2975 				"Shutting down an improperly started, "
2976 				"or created database!");
2977 		}
2978 
2979 		return(DB_SUCCESS);
2980 	}
2981 
2982 	if (!srv_read_only_mode) {
2983 		/* Shutdown the FTS optimize sub system. */
2984 		fts_optimize_start_shutdown();
2985 
2986 		fts_optimize_end();
2987 	}
2988 
2989 	/* 1. Flush the buffer pool to disk, write the current lsn to
2990 	the tablespace header(s), and copy all log data to archive.
2991 	The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
2992 	just free data structures after the shutdown. */
2993 
2994 	logs_empty_and_mark_files_at_shutdown();
2995 
2996 	if (srv_conc_get_active_threads() != 0) {
2997 		ib_logf(IB_LOG_LEVEL_WARN,
2998 			"Query counter shows %ld queries still "
2999 			"inside InnoDB at shutdown",
3000 			srv_conc_get_active_threads());
3001 	}
3002 
3003 	/* 2. Make all threads created by InnoDB to exit */
3004 
3005 	srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
3006 
3007 	/* All threads end up waiting for certain events. Put those events
3008 	to the signaled state. Then the threads will exit themselves after
3009 	os_event_wait(). */
3010 
3011 	for (i = 0; i < 1000; i++) {
3012 		/* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
3013 		HERE OR EARLIER */
3014 
3015 		if (!srv_read_only_mode) {
3016 			/* a. Let the lock timeout thread exit */
3017 			os_event_set(lock_sys->timeout_event);
3018 
3019 			/* b. srv error monitor thread exits automatically,
3020 			no need to do anything here */
3021 
3022 			/* c. We wake the master thread so that it exits */
3023 			srv_wake_master_thread();
3024 
3025 			/* d. Wakeup purge threads. */
3026 			srv_purge_wakeup();
3027 		}
3028 
3029 		/* e. Exit the i/o threads */
3030 
3031 		os_aio_wake_all_threads_at_shutdown();
3032 
3033 		/* f. dict_stats_thread is signaled from
3034 		logs_empty_and_mark_files_at_shutdown() and should have
3035 		already quit or is quitting right now. */
3036 
3037 		os_mutex_enter(os_sync_mutex);
3038 
3039 		if (os_thread_count == 0) {
3040 			/* All the threads have exited or are just exiting;
3041 			NOTE that the threads may not have completed their
3042 			exit yet. Should we use pthread_join() to make sure
3043 			they have exited? If we did, we would have to
3044 			remove the pthread_detach() from
3045 			os_thread_exit().  Now we just sleep 0.1
3046 			seconds and hope that is enough! */
3047 
3048 			os_mutex_exit(os_sync_mutex);
3049 
3050 			os_thread_sleep(100000);
3051 
3052 			break;
3053 		}
3054 
3055 		os_mutex_exit(os_sync_mutex);
3056 
3057 		os_thread_sleep(100000);
3058 	}
3059 
3060 	if (i == 1000) {
3061 		ib_logf(IB_LOG_LEVEL_WARN,
3062 			"%lu threads created by InnoDB"
3063 			" had not exited at shutdown!",
3064 			(ulong) os_thread_count);
3065 	}
3066 
3067 	if (srv_monitor_file) {
3068 		fclose(srv_monitor_file);
3069 		srv_monitor_file = 0;
3070 		if (srv_monitor_file_name) {
3071 			unlink(srv_monitor_file_name);
3072 			mem_free(srv_monitor_file_name);
3073 		}
3074 	}
3075 
3076 	if (srv_dict_tmpfile) {
3077 		fclose(srv_dict_tmpfile);
3078 		srv_dict_tmpfile = 0;
3079 	}
3080 
3081 	if (srv_misc_tmpfile) {
3082 		fclose(srv_misc_tmpfile);
3083 		srv_misc_tmpfile = 0;
3084 	}
3085 
3086 	if (!srv_read_only_mode) {
3087 		dict_stats_thread_deinit();
3088 	}
3089 
3090 	/* This must be disabled before closing the buffer pool
3091 	and closing the data dictionary.  */
3092 	btr_search_disable();
3093 
3094 	ibuf_close();
3095 	log_shutdown();
3096 	trx_sys_file_format_close();
3097 	trx_sys_close();
3098 	lock_sys_close();
3099 
3100 	/* We don't create these mutexes in RO mode because we don't create
3101 	the temp files that the cover. */
3102 	if (!srv_read_only_mode) {
3103 		mutex_free(&srv_monitor_file_mutex);
3104 		mutex_free(&srv_dict_tmpfile_mutex);
3105 		mutex_free(&srv_misc_tmpfile_mutex);
3106 	}
3107 
3108 	dict_close();
3109 	btr_search_sys_free();
3110 
3111 	/* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
3112 	them */
3113 	os_aio_free();
3114 	que_close();
3115 	row_mysql_close();
3116 	srv_mon_free();
3117 	sync_close();
3118 	srv_free();
3119 	fil_close();
3120 
3121 	/* 4. Free the os_conc_mutex and all os_events and os_mutexes */
3122 
3123 	os_sync_free();
3124 
3125 	/* 5. Free all allocated memory */
3126 
3127 	pars_lexer_close();
3128 	log_mem_free();
3129 	buf_pool_free(srv_buf_pool_instances);
3130 	mem_close();
3131 
3132 	/* ut_free_all_mem() frees all allocated memory not freed yet
3133 	in shutdown, and it will also free the ut_list_mutex, so it
3134 	should be the last one for all operation */
3135 	ut_free_all_mem();
3136 
3137 	if (os_thread_count != 0
3138 	    || os_event_count != 0
3139 	    || os_mutex_count != 0
3140 	    || os_fast_mutex_count != 0) {
3141 		ib_logf(IB_LOG_LEVEL_WARN,
3142 			"Some resources were not cleaned up in shutdown: "
3143 			"threads %lu, events %lu, os_mutexes %lu, "
3144 			"os_fast_mutexes %lu",
3145 			(ulong) os_thread_count, (ulong) os_event_count,
3146 			(ulong) os_mutex_count, (ulong) os_fast_mutex_count);
3147 	}
3148 
3149 	if (dict_foreign_err_file) {
3150 		fclose(dict_foreign_err_file);
3151 	}
3152 
3153 	if (srv_print_verbose_log) {
3154 		ib_logf(IB_LOG_LEVEL_INFO,
3155 			"Shutdown completed; log sequence number " LSN_PF "",
3156 			srv_shutdown_lsn);
3157 	}
3158 
3159 	srv_was_started = FALSE;
3160 	srv_start_has_been_called = FALSE;
3161 
3162 	return(DB_SUCCESS);
3163 }
3164 #endif /* !UNIV_HOTBACKUP */
3165 
3166 
3167 /********************************************************************
3168 Signal all per-table background threads to shutdown, and wait for them to do
3169 so. */
3170 UNIV_INTERN
3171 void
srv_shutdown_table_bg_threads(void)3172 srv_shutdown_table_bg_threads(void)
3173 /*===============================*/
3174 {
3175 	dict_table_t*	table;
3176 	dict_table_t*	first;
3177 	dict_table_t*	last = NULL;
3178 
3179 	mutex_enter(&dict_sys->mutex);
3180 
3181 	/* Signal all threads that they should stop. */
3182 	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3183 	first = table;
3184 	while (table) {
3185 		dict_table_t*	next;
3186 		fts_t*		fts = table->fts;
3187 
3188 		if (fts != NULL) {
3189 			fts_start_shutdown(table, fts);
3190 		}
3191 
3192 		next = UT_LIST_GET_NEXT(table_LRU, table);
3193 
3194 		if (!next) {
3195 			last = table;
3196 		}
3197 
3198 		table = next;
3199 	}
3200 
3201 	/* We must release dict_sys->mutex here; if we hold on to it in the
3202 	loop below, we will deadlock if any of the background threads try to
3203 	acquire it (for example, the FTS thread by calling que_eval_sql).
3204 
3205 	Releasing it here and going through dict_sys->table_LRU without
3206 	holding it is safe because:
3207 
3208 	 a) MySQL only starts the shutdown procedure after all client
3209 	 threads have been disconnected and no new ones are accepted, so no
3210 	 new tables are added or old ones dropped.
3211 
3212 	 b) Despite its name, the list is not LRU, and the order stays
3213 	 fixed.
3214 
3215 	To safeguard against the above assumptions ever changing, we store
3216 	the first and last items in the list above, and then check that
3217 	they've stayed the same below. */
3218 
3219 	mutex_exit(&dict_sys->mutex);
3220 
3221 	/* Wait for the threads of each table to stop. This is not inside
3222 	the above loop, because by signaling all the threads first we can
3223 	overlap their shutting down delays. */
3224 	table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3225 	ut_a(first == table);
3226 	while (table) {
3227 		dict_table_t*	next;
3228 		fts_t*		fts = table->fts;
3229 
3230 		if (fts != NULL) {
3231 			fts_shutdown(table, fts);
3232 		}
3233 
3234 		next = UT_LIST_GET_NEXT(table_LRU, table);
3235 
3236 		if (table == last) {
3237 			ut_a(!next);
3238 		}
3239 
3240 		table = next;
3241 	}
3242 }
3243 
3244 /*****************************************************************//**
3245 Get the meta-data filename from the table name. */
3246 UNIV_INTERN
3247 void
srv_get_meta_data_filename(dict_table_t * table,char * filename,ulint max_len)3248 srv_get_meta_data_filename(
3249 /*=======================*/
3250 	dict_table_t*	table,		/*!< in: table */
3251 	char*			filename,	/*!< out: filename */
3252 	ulint			max_len)	/*!< in: filename max length */
3253 {
3254 	ulint			len;
3255 	char*			path;
3256 	char*			suffix;
3257 	static const ulint	suffix_len = strlen(".cfg");
3258 
3259 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
3260 		dict_get_and_save_data_dir_path(table, false);
3261 		ut_a(table->data_dir_path);
3262 
3263 		path = os_file_make_remote_pathname(
3264 			table->data_dir_path, table->name, "cfg");
3265 	} else {
3266 		path = fil_make_ibd_name(table->name, false);
3267 	}
3268 
3269 	ut_a(path);
3270 	len = ut_strlen(path);
3271 	ut_a(max_len >= len);
3272 
3273 	suffix = path + (len - suffix_len);
3274 	if (strncmp(suffix, ".cfg", suffix_len) == 0) {
3275 		strcpy(filename, path);
3276 	} else {
3277 		ut_ad(strncmp(suffix, ".ibd", suffix_len) == 0);
3278 
3279 		strncpy(filename, path, len - suffix_len);
3280 		suffix = filename + (len - suffix_len);
3281 		strcpy(suffix, ".cfg");
3282 	}
3283 
3284 	mem_free(path);
3285 
3286 	srv_normalize_path_for_win(filename);
3287 }
3288