1 /*****************************************************************************
2
3 Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved.
4 Copyright (c) 2008, Google Inc.
5 Copyright (c) 2009, Percona Inc.
6
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19
20 This program is free software; you can redistribute it and/or modify
21 it under the terms of the GNU General Public License, version 2.0,
22 as published by the Free Software Foundation.
23
24 This program is also distributed with certain software (including
25 but not limited to OpenSSL) that is licensed under separate terms,
26 as designated in a particular file or component or in included license
27 documentation. The authors of MySQL hereby grant you an additional
28 permission to link the program and your derivative works with the
29 separately licensed software that they have included with MySQL.
30
31 This program is distributed in the hope that it will be useful,
32 but WITHOUT ANY WARRANTY; without even the implied warranty of
33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 GNU General Public License, version 2.0, for more details.
35
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
39
40 *****************************************************************************/
41
42 /********************************************************************//**
43 @file srv/srv0start.cc
44 Starts the InnoDB database server
45
46 Created 2/16/1996 Heikki Tuuri
47 *************************************************************************/
48
49 #include "mysqld.h"
50 #include "pars0pars.h"
51 #include "row0ftsort.h"
52 #include "ut0mem.h"
53 #include "mem0mem.h"
54 #include "data0data.h"
55 #include "data0type.h"
56 #include "dict0dict.h"
57 #include "buf0buf.h"
58 #include "buf0dump.h"
59 #include "os0file.h"
60 #include "os0thread.h"
61 #include "fil0fil.h"
62 #include "fsp0fsp.h"
63 #include "rem0rec.h"
64 #include "mtr0mtr.h"
65 #include "log0log.h"
66 #include "log0recv.h"
67 #include "page0page.h"
68 #include "page0cur.h"
69 #include "trx0trx.h"
70 #include "trx0sys.h"
71 #include "btr0btr.h"
72 #include "btr0cur.h"
73 #include "rem0rec.h"
74 #include "ibuf0ibuf.h"
75 #include "srv0start.h"
76 #include "srv0srv.h"
77 #ifndef UNIV_HOTBACKUP
78 # include "trx0rseg.h"
79 # include "os0proc.h"
80 # include "sync0sync.h"
81 # include "buf0flu.h"
82 # include "buf0rea.h"
83 # include "dict0boot.h"
84 # include "dict0load.h"
85 # include "dict0stats_bg.h"
86 # include "que0que.h"
87 # include "usr0sess.h"
88 # include "lock0lock.h"
89 # include "trx0roll.h"
90 # include "trx0purge.h"
91 # include "lock0lock.h"
92 # include "pars0pars.h"
93 # include "btr0sea.h"
94 # include "rem0cmp.h"
95 # include "dict0crea.h"
96 # include "row0ins.h"
97 # include "row0sel.h"
98 # include "row0upd.h"
99 # include "row0row.h"
100 # include "row0mysql.h"
101 # include "btr0pcur.h"
102 # include "os0sync.h"
103 # include "zlib.h"
104 # include "ut0crc32.h"
105
106 /** Log sequence number immediately after startup */
107 UNIV_INTERN lsn_t srv_start_lsn;
108 /** Log sequence number at shutdown */
109 UNIV_INTERN lsn_t srv_shutdown_lsn;
110
111 #ifdef HAVE_DARWIN_THREADS
112 # include <sys/utsname.h>
113 /** TRUE if the F_FULLFSYNC option is available */
114 UNIV_INTERN ibool srv_have_fullfsync = FALSE;
115 #endif
116
117 /** TRUE if a raw partition is in use */
118 UNIV_INTERN ibool srv_start_raw_disk_in_use = FALSE;
119
120 /** UNDO tablespaces starts with space id. */
121 ulint srv_undo_space_id_start;
122
123 /** TRUE if the server is being started, before rolling back any
124 incomplete transactions */
125 UNIV_INTERN ibool srv_startup_is_before_trx_rollback_phase = FALSE;
126 /** TRUE if the server is being started */
127 UNIV_INTERN ibool srv_is_being_started = FALSE;
128 /** TRUE if the server was successfully started */
129 UNIV_INTERN ibool srv_was_started = FALSE;
130 /** TRUE if innobase_start_or_create_for_mysql() has been called */
131 static ibool srv_start_has_been_called = FALSE;
132
133 /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
134 SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
135 UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE;
136
137 /** Files comprising the system tablespace */
138 static pfs_os_file_t files[1000];
139
140 /** io_handler_thread parameters for thread identification */
141 static ulint n[SRV_MAX_N_IO_THREADS + 6];
142 /** io_handler_thread identifiers, 32 is the maximum number of purge threads */
143 static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32];
144
145 /** We use this mutex to test the return value of pthread_mutex_trylock
146 on successful locking. HP-UX does NOT return 0, though Linux et al do. */
147 static os_fast_mutex_t srv_os_test_mutex;
148
149 /** Name of srv_monitor_file */
150 static char* srv_monitor_file_name;
151 #endif /* !UNIV_HOTBACKUP */
152
153 /** Default undo tablespace size in UNIV_PAGEs count (10MB). */
154 static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
155 ((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
156
157 /** */
158 #define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD
159 #define SRV_MAX_N_PENDING_SYNC_IOS 100
160
161 /** The round off to MB is similar as done in srv_parse_megabytes() */
162 #define CALC_NUMBER_OF_PAGES(size) ((size) / (1024 * 1024)) * \
163 ((1024 * 1024) / (UNIV_PAGE_SIZE))
164 #ifdef UNIV_PFS_THREAD
165 /* Keys to register InnoDB threads with performance schema */
166 UNIV_INTERN mysql_pfs_key_t io_handler_thread_key;
167 UNIV_INTERN mysql_pfs_key_t srv_lock_timeout_thread_key;
168 UNIV_INTERN mysql_pfs_key_t srv_error_monitor_thread_key;
169 UNIV_INTERN mysql_pfs_key_t srv_monitor_thread_key;
170 UNIV_INTERN mysql_pfs_key_t srv_master_thread_key;
171 UNIV_INTERN mysql_pfs_key_t srv_purge_thread_key;
172 #endif /* UNIV_PFS_THREAD */
173
174 /*********************************************************************//**
175 Convert a numeric string that optionally ends in G or M or K, to a number
176 containing megabytes.
177 @return next character in string */
178 static
179 char*
srv_parse_megabytes(char * str,ulint * megs)180 srv_parse_megabytes(
181 /*================*/
182 char* str, /*!< in: string containing a quantity in bytes */
183 ulint* megs) /*!< out: the number in megabytes */
184 {
185 char* endp;
186 ulint size;
187
188 size = strtoul(str, &endp, 10);
189
190 str = endp;
191
192 switch (*str) {
193 case 'G': case 'g':
194 size *= 1024;
195 /* fall through */
196 case 'M': case 'm':
197 str++;
198 break;
199 case 'K': case 'k':
200 size /= 1024;
201 str++;
202 break;
203 default:
204 size /= 1024 * 1024;
205 break;
206 }
207
208 *megs = size;
209 return(str);
210 }
211
212 /*********************************************************************//**
213 Check if a file can be opened in read-write mode.
214 @return true if it doesn't exist or can be opened in rw mode. */
215 static
216 bool
srv_file_check_mode(const char * name)217 srv_file_check_mode(
218 /*================*/
219 const char* name) /*!< in: filename to check */
220 {
221 os_file_stat_t stat;
222
223 memset(&stat, 0x0, sizeof(stat));
224
225 dberr_t err = os_file_get_status(name, &stat, true);
226
227 if (err == DB_FAIL) {
228
229 ib_logf(IB_LOG_LEVEL_ERROR,
230 "os_file_get_status() failed on '%s'. Can't determine "
231 "file permissions", name);
232
233 return(false);
234
235 } else if (err == DB_SUCCESS) {
236
237 /* Note: stat.rw_perm is only valid of files */
238
239 if (stat.type == OS_FILE_TYPE_FILE) {
240
241 if (!stat.rw_perm) {
242
243 ib_logf(IB_LOG_LEVEL_ERROR,
244 "%s can't be opened in %s mode",
245 name,
246 srv_read_only_mode
247 ? "read" : "read-write");
248
249 return(false);
250 }
251 } else {
252 /* Not a regular file, bail out. */
253
254 ib_logf(IB_LOG_LEVEL_ERROR,
255 "'%s' not a regular file.", name);
256
257 return(false);
258 }
259 } else {
260
261 /* This is OK. If the file create fails on RO media, there
262 is nothing we can do. */
263
264 ut_a(err == DB_NOT_FOUND);
265 }
266
267 return(true);
268 }
269
270 /*********************************************************************//**
271 Reads the data files and their sizes from a character string given in
272 the .cnf file.
273 @return TRUE if ok, FALSE on parse error */
274 UNIV_INTERN
275 ibool
srv_parse_data_file_paths_and_sizes(char * str)276 srv_parse_data_file_paths_and_sizes(
277 /*================================*/
278 char* str) /*!< in/out: the data file path string */
279 {
280 char* input_str;
281 char* path;
282 ulint size;
283 ulint i = 0;
284
285 srv_auto_extend_last_data_file = FALSE;
286 srv_last_file_size_max = 0;
287 srv_data_file_names = NULL;
288 srv_data_file_sizes = NULL;
289 srv_data_file_is_raw_partition = NULL;
290
291 input_str = str;
292
293 /* First calculate the number of data files and check syntax:
294 path:size[M | G];path:size[M | G]... . Note that a Windows path may
295 contain a drive name and a ':'. */
296
297 while (*str != '\0') {
298 path = str;
299
300 while ((*str != ':' && *str != '\0')
301 || (*str == ':'
302 && (*(str + 1) == '\\' || *(str + 1) == '/'
303 || *(str + 1) == ':'))) {
304 str++;
305 }
306
307 if (*str == '\0') {
308 return(FALSE);
309 }
310
311 str++;
312
313 str = srv_parse_megabytes(str, &size);
314
315 if (0 == strncmp(str, ":autoextend",
316 (sizeof ":autoextend") - 1)) {
317
318 str += (sizeof ":autoextend") - 1;
319
320 if (0 == strncmp(str, ":max:",
321 (sizeof ":max:") - 1)) {
322
323 str += (sizeof ":max:") - 1;
324
325 str = srv_parse_megabytes(str, &size);
326 }
327
328 if (*str != '\0') {
329
330 return(FALSE);
331 }
332 }
333
334 if (strlen(str) >= 6
335 && *str == 'n'
336 && *(str + 1) == 'e'
337 && *(str + 2) == 'w') {
338 str += 3;
339 }
340
341 if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
342 str += 3;
343 }
344
345 if (size == 0) {
346 return(FALSE);
347 }
348
349 i++;
350
351 if (*str == ';') {
352 str++;
353 } else if (*str != '\0') {
354
355 return(FALSE);
356 }
357 }
358
359 if (i == 0) {
360 /* If innodb_data_file_path was defined it must contain
361 at least one data file definition */
362
363 return(FALSE);
364 }
365
366 srv_data_file_names = static_cast<char**>(
367 malloc(i * sizeof *srv_data_file_names));
368
369 srv_data_file_sizes = static_cast<ulint*>(
370 malloc(i * sizeof *srv_data_file_sizes));
371
372 srv_data_file_is_raw_partition = static_cast<ulint*>(
373 malloc(i * sizeof *srv_data_file_is_raw_partition));
374
375 srv_n_data_files = i;
376
377 /* Then store the actual values to our arrays */
378
379 str = input_str;
380 i = 0;
381
382 while (*str != '\0') {
383 path = str;
384
385 /* Note that we must step over the ':' in a Windows path;
386 a Windows path normally looks like C:\ibdata\ibdata1:1G, but
387 a Windows raw partition may have a specification like
388 \\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
389
390 while ((*str != ':' && *str != '\0')
391 || (*str == ':'
392 && (*(str + 1) == '\\' || *(str + 1) == '/'
393 || *(str + 1) == ':'))) {
394 str++;
395 }
396
397 if (*str == ':') {
398 /* Make path a null-terminated string */
399 *str = '\0';
400 str++;
401 }
402
403 str = srv_parse_megabytes(str, &size);
404
405 srv_data_file_names[i] = path;
406 srv_data_file_sizes[i] = size;
407
408 if (0 == strncmp(str, ":autoextend",
409 (sizeof ":autoextend") - 1)) {
410
411 srv_auto_extend_last_data_file = TRUE;
412
413 str += (sizeof ":autoextend") - 1;
414
415 if (0 == strncmp(str, ":max:",
416 (sizeof ":max:") - 1)) {
417
418 str += (sizeof ":max:") - 1;
419
420 str = srv_parse_megabytes(
421 str, &srv_last_file_size_max);
422 }
423
424 if (*str != '\0') {
425
426 return(FALSE);
427 }
428 }
429
430 (srv_data_file_is_raw_partition)[i] = 0;
431
432 if (strlen(str) >= 6
433 && *str == 'n'
434 && *(str + 1) == 'e'
435 && *(str + 2) == 'w') {
436 str += 3;
437 /* Initialize new raw device only during bootstrap */
438 (srv_data_file_is_raw_partition)[i] =
439 opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
440 }
441
442 if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
443 str += 3;
444
445 /* Initialize new raw device only during bootstrap */
446 if ((srv_data_file_is_raw_partition)[i] == 0) {
447 (srv_data_file_is_raw_partition)[i] =
448 opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
449 }
450 }
451
452 i++;
453
454 if (*str == ';') {
455 str++;
456 }
457 }
458
459 return(TRUE);
460 }
461
462 /*********************************************************************//**
463 Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
464 and srv_parse_log_group_home_dirs(). */
465 UNIV_INTERN
466 void
srv_free_paths_and_sizes(void)467 srv_free_paths_and_sizes(void)
468 /*==========================*/
469 {
470 free(srv_data_file_names);
471 srv_data_file_names = NULL;
472 free(srv_data_file_sizes);
473 srv_data_file_sizes = NULL;
474 free(srv_data_file_is_raw_partition);
475 srv_data_file_is_raw_partition = NULL;
476 }
477
478 #ifndef UNIV_HOTBACKUP
479 /********************************************************************//**
480 I/o-handler thread function.
481 @return OS_THREAD_DUMMY_RETURN */
482 extern "C" UNIV_INTERN
483 os_thread_ret_t
DECLARE_THREAD(io_handler_thread)484 DECLARE_THREAD(io_handler_thread)(
485 /*==============================*/
486 void* arg) /*!< in: pointer to the number of the segment in
487 the aio array */
488 {
489 ulint segment;
490
491 segment = *((ulint*) arg);
492
493 #ifdef UNIV_DEBUG_THREAD_CREATION
494 fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment,
495 os_thread_pf(os_thread_get_curr_id()));
496 #endif
497
498 #ifdef UNIV_PFS_THREAD
499 pfs_register_thread(io_handler_thread_key);
500 #endif /* UNIV_PFS_THREAD */
501
502 while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
503 fil_aio_wait(segment);
504 }
505
506 /* We count the number of threads in os_thread_exit(). A created
507 thread should always use that to exit and not use return() to exit.
508 The thread actually never comes here because it is exited in an
509 os_event_wait(). */
510
511 os_thread_exit(NULL);
512
513 OS_THREAD_DUMMY_RETURN;
514 }
515 #endif /* !UNIV_HOTBACKUP */
516
517 /*********************************************************************//**
518 Normalizes a directory path for Windows: converts slashes to backslashes. */
519 UNIV_INTERN
520 void
srv_normalize_path_for_win(char * str MY_ATTRIBUTE ((unused)))521 srv_normalize_path_for_win(
522 /*=======================*/
523 char* str MY_ATTRIBUTE((unused))) /*!< in/out: null-terminated
524 character string */
525 {
526 #ifdef __WIN__
527 for (; *str; str++) {
528
529 if (*str == '/') {
530 *str = '\\';
531 }
532 }
533 #endif
534 }
535
536 #ifndef UNIV_HOTBACKUP
537 /*********************************************************************//**
538 Creates a log file.
539 @return DB_SUCCESS or error code */
540 static MY_ATTRIBUTE((nonnull, warn_unused_result))
541 dberr_t
create_log_file(pfs_os_file_t * file,const char * name)542 create_log_file(
543 /*============*/
544 pfs_os_file_t* file, /*!< out: file handle */
545 const char* name) /*!< in: log file name */
546 {
547 ibool ret;
548
549 *file = os_file_create(
550 innodb_file_log_key, name,
551 OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
552 OS_LOG_FILE, &ret);
553
554 if (!ret) {
555 ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
556 return(DB_ERROR);
557 }
558
559 ib_logf(IB_LOG_LEVEL_INFO,
560 "Setting log file %s size to %lu MB",
561 name, (ulong) srv_log_file_size
562 >> (20 - UNIV_PAGE_SIZE_SHIFT));
563
564 ret = os_file_set_size(name, *file,
565 (os_offset_t) srv_log_file_size
566 << UNIV_PAGE_SIZE_SHIFT);
567 if (!ret) {
568 ib_logf(IB_LOG_LEVEL_ERROR, "Cannot set log file"
569 " %s to size %lu MB", name, (ulong) srv_log_file_size
570 >> (20 - UNIV_PAGE_SIZE_SHIFT));
571 return(DB_ERROR);
572 }
573
574 ret = os_file_close(*file);
575 ut_a(ret);
576
577 return(DB_SUCCESS);
578 }
579
580 /** Initial number of the first redo log file */
581 #define INIT_LOG_FILE0 (SRV_N_LOG_FILES_MAX + 1)
582
583 #ifdef DBUG_OFF
584 # define RECOVERY_CRASH(x) do {} while(0)
585 #else
586 # define RECOVERY_CRASH(x) do { \
587 if (srv_force_recovery_crash == x) { \
588 fprintf(stderr, "innodb_force_recovery_crash=%lu\n", \
589 srv_force_recovery_crash); \
590 fflush(stderr); \
591 exit(3); \
592 } \
593 } while (0)
594 #endif
595
596 /*********************************************************************//**
597 Creates all log files.
598 @return DB_SUCCESS or error code */
599 static
600 dberr_t
create_log_files(bool create_new_db,char * logfilename,size_t dirnamelen,lsn_t lsn,char * & logfile0)601 create_log_files(
602 /*=============*/
603 bool create_new_db, /*!< in: TRUE if new database is being
604 created */
605 char* logfilename, /*!< in/out: buffer for log file name */
606 size_t dirnamelen, /*!< in: length of the directory path */
607 lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
608 char*& logfile0) /*!< out: name of the first log file */
609 {
610 if (srv_read_only_mode) {
611 ib_logf(IB_LOG_LEVEL_ERROR,
612 "Cannot create log files in read-only mode");
613 return(DB_READ_ONLY);
614 }
615
616 /* We prevent system tablespace creation with existing files in
617 data directory. So we do not delete log files when creating new system
618 tablespace */
619 if (!create_new_db) {
620 /* Remove any old log files. */
621 for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
622 sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
623
624 /* Ignore errors about non-existent files or files
625 that cannot be removed. The create_log_file() will
626 return an error when the file exists. */
627 #ifdef __WIN__
628 DeleteFile((LPCTSTR) logfilename);
629 #else
630 unlink(logfilename);
631 #endif
632 /* Crashing after deleting the first
633 file should be recoverable. The buffer
634 pool was clean, and we can simply create
635 all log files from the scratch. */
636 RECOVERY_CRASH(6);
637 }
638 }
639
640 ut_ad(!buf_pool_check_no_pending_io());
641
642 RECOVERY_CRASH(7);
643
644 for (unsigned i = 0; i < srv_n_log_files; i++) {
645 sprintf(logfilename + dirnamelen,
646 "ib_logfile%u", i ? i : INIT_LOG_FILE0);
647
648 dberr_t err = create_log_file(&files[i], logfilename);
649
650 if (err != DB_SUCCESS) {
651 return(err);
652 }
653 }
654
655 RECOVERY_CRASH(8);
656
657 /* We did not create the first log file initially as
658 ib_logfile0, so that crash recovery cannot find it until it
659 has been completed and renamed. */
660 sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
661
662 fil_space_create(
663 logfilename, SRV_LOG_SPACE_FIRST_ID,
664 fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
665 FIL_LOG);
666 ut_a(fil_validate());
667
668 logfile0 = fil_node_create(
669 logfilename, (ulint) srv_log_file_size,
670 SRV_LOG_SPACE_FIRST_ID, FALSE);
671 ut_a(logfile0);
672
673 for (unsigned i = 1; i < srv_n_log_files; i++) {
674 sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
675
676 if (!fil_node_create(
677 logfilename,
678 (ulint) srv_log_file_size,
679 SRV_LOG_SPACE_FIRST_ID, FALSE)) {
680 ut_error;
681 }
682 }
683
684 log_group_init(0, srv_n_log_files,
685 srv_log_file_size * UNIV_PAGE_SIZE,
686 SRV_LOG_SPACE_FIRST_ID,
687 SRV_LOG_SPACE_FIRST_ID + 1);
688
689 fil_open_log_and_system_tablespace_files();
690
691 /* Create a log checkpoint. */
692 mutex_enter(&log_sys->mutex);
693 ut_d(recv_no_log_write = FALSE);
694 recv_reset_logs(lsn);
695 mutex_exit(&log_sys->mutex);
696
697 return(DB_SUCCESS);
698 }
699
700 /*********************************************************************//**
701 Renames the first log file. */
702 static
703 void
create_log_files_rename(char * logfilename,size_t dirnamelen,lsn_t lsn,char * logfile0)704 create_log_files_rename(
705 /*====================*/
706 char* logfilename, /*!< in/out: buffer for log file name */
707 size_t dirnamelen, /*!< in: length of the directory path */
708 lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
709 char* logfile0) /*!< in/out: name of the first log file */
710 {
711 /* If innodb_flush_method=O_DSYNC,
712 we need to explicitly flush the log buffers. */
713 fil_flush(SRV_LOG_SPACE_FIRST_ID);
714 /* Close the log files, so that we can rename
715 the first one. */
716 fil_close_log_files(false);
717
718 /* Rename the first log file, now that a log
719 checkpoint has been created. */
720 sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
721
722 RECOVERY_CRASH(9);
723
724 ib_logf(IB_LOG_LEVEL_INFO,
725 "Renaming log file %s to %s", logfile0, logfilename);
726
727 mutex_enter(&log_sys->mutex);
728 ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
729 ibool success = os_file_rename(
730 innodb_file_log_key, logfile0, logfilename);
731 ut_a(success);
732
733 RECOVERY_CRASH(10);
734
735 /* Replace the first file with ib_logfile0. */
736 strcpy(logfile0, logfilename);
737 mutex_exit(&log_sys->mutex);
738
739 fil_open_log_and_system_tablespace_files();
740
741 ib_logf(IB_LOG_LEVEL_WARN, "New log files created, LSN=" LSN_PF, lsn);
742 }
743
744 /*********************************************************************//**
745 Opens a log file.
746 @return DB_SUCCESS or error code */
747 static MY_ATTRIBUTE((nonnull, warn_unused_result))
748 dberr_t
open_log_file(pfs_os_file_t * file,const char * name,os_offset_t * size)749 open_log_file(
750 /*==========*/
751 pfs_os_file_t* file, /*!< out: file handle */
752 const char* name, /*!< in: log file name */
753 os_offset_t* size) /*!< out: file size */
754 {
755 ibool ret;
756
757 *file = os_file_create(innodb_file_log_key, name,
758 OS_FILE_OPEN, OS_FILE_AIO,
759 OS_LOG_FILE, &ret);
760 if (!ret) {
761 ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
762 return(DB_ERROR);
763 }
764
765 *size = os_file_get_size(*file);
766
767 ret = os_file_close(*file);
768 ut_a(ret);
769 return(DB_SUCCESS);
770 }
771
772 /*********************************************************************//**
773 Creates or opens database data files and closes them.
774 @return DB_SUCCESS or error code */
775 static MY_ATTRIBUTE((nonnull, warn_unused_result))
776 dberr_t
open_or_create_data_files(ibool * create_new_db,ulint * min_arch_log_no,ulint * max_arch_log_no,lsn_t * min_flushed_lsn,lsn_t * max_flushed_lsn,ulint * sum_of_new_sizes)777 open_or_create_data_files(
778 /*======================*/
779 ibool* create_new_db, /*!< out: TRUE if new database should be
780 created */
781 #ifdef UNIV_LOG_ARCHIVE
782 ulint* min_arch_log_no,/*!< out: min of archived log
783 numbers in data files */
784 ulint* max_arch_log_no,/*!< out: max of archived log
785 numbers in data files */
786 #endif /* UNIV_LOG_ARCHIVE */
787 lsn_t* min_flushed_lsn,/*!< out: min of flushed lsn
788 values in data files */
789 lsn_t* max_flushed_lsn,/*!< out: max of flushed lsn
790 values in data files */
791 ulint* sum_of_new_sizes)/*!< out: sum of sizes of the
792 new files added */
793 {
794 ibool ret;
795 ulint i;
796 ibool one_opened = FALSE;
797 ibool one_created = FALSE;
798 os_offset_t size;
799 ulint flags;
800 ulint space;
801 ulint rounded_size_pages;
802 char name[10000];
803
804 if (srv_n_data_files >= 1000) {
805
806 ib_logf(IB_LOG_LEVEL_ERROR,
807 "Can only have < 1000 data files, you have "
808 "defined %lu", (ulong) srv_n_data_files);
809
810 return(DB_ERROR);
811 }
812
813 *sum_of_new_sizes = 0;
814
815 *create_new_db = FALSE;
816
817 srv_normalize_path_for_win(srv_data_home);
818
819 for (i = 0; i < srv_n_data_files; i++) {
820 ulint dirnamelen;
821
822 srv_normalize_path_for_win(srv_data_file_names[i]);
823 dirnamelen = strlen(srv_data_home);
824
825 ut_a(dirnamelen + strlen(srv_data_file_names[i])
826 < (sizeof name) - 1);
827
828 memcpy(name, srv_data_home, dirnamelen);
829
830 /* Add a path separator if needed. */
831 if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
832 name[dirnamelen++] = SRV_PATH_SEPARATOR;
833 }
834
835 strcpy(name + dirnamelen, srv_data_file_names[i]);
836
837 /* Note: It will return true if the file doesn' exist. */
838
839 if (!srv_file_check_mode(name)) {
840
841 return(DB_FAIL);
842
843 } else if (srv_data_file_is_raw_partition[i] == 0) {
844
845 /* First we try to create the file: if it already
846 exists, ret will get value FALSE */
847
848 files[i] = os_file_create(
849 innodb_file_data_key, name, OS_FILE_CREATE,
850 OS_FILE_NORMAL, OS_DATA_FILE, &ret);
851
852 if (srv_read_only_mode) {
853
854 if (ret) {
855 goto size_check;
856 }
857
858 ib_logf(IB_LOG_LEVEL_ERROR,
859 "Opening %s failed!", name);
860
861 return(DB_ERROR);
862
863 } else if (!ret
864 && os_file_get_last_error(false)
865 != OS_FILE_ALREADY_EXISTS
866 #ifdef UNIV_AIX
867 /* AIX 5.1 after security patch ML7 may have
868 errno set to 0 here, which causes our
869 function to return 100; work around that
870 AIX problem */
871 && os_file_get_last_error(false) != 100
872 #endif /* UNIV_AIX */
873 ) {
874 ib_logf(IB_LOG_LEVEL_ERROR,
875 "Creating or opening %s failed!",
876 name);
877
878 return(DB_ERROR);
879 }
880
881 } else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
882
883 ut_a(!srv_read_only_mode);
884
885 /* The partition is opened, not created; then it is
886 written over */
887
888 srv_start_raw_disk_in_use = TRUE;
889 srv_created_new_raw = TRUE;
890
891 files[i] = os_file_create(
892 innodb_file_data_key, name, OS_FILE_OPEN_RAW,
893 OS_FILE_NORMAL, OS_DATA_FILE, &ret);
894
895 if (!ret) {
896 ib_logf(IB_LOG_LEVEL_ERROR,
897 "Error in opening %s", name);
898
899 return(DB_ERROR);
900 }
901
902 const char* check_msg;
903 check_msg = fil_read_first_page(
904 files[i], FALSE, &flags, &space,
905 #ifdef UNIV_LOG_ARCHIVE
906 min_arch_log_no, max_arch_log_no,
907 #endif /* UNIV_LOG_ARCHIVE */
908 min_flushed_lsn, max_flushed_lsn);
909
910 /* If first page is valid, don't overwrite DB.
911 It prevents overwriting DB when mysql_install_db
912 starts mysqld multiple times during bootstrap. */
913 if (check_msg == NULL) {
914
915 srv_created_new_raw = FALSE;
916 ret = FALSE;
917 }
918
919 } else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
920 srv_start_raw_disk_in_use = TRUE;
921
922 ret = FALSE;
923 } else {
924 ut_a(0);
925 }
926
927 if (ret == FALSE) {
928 const char* check_msg;
929 /* We open the data file */
930
931 if (one_created) {
932 ib_logf(IB_LOG_LEVEL_ERROR,
933 "Data files can only be added at "
934 "the end of a tablespace, but "
935 "data file %s existed beforehand.",
936 name);
937 return(DB_ERROR);
938 }
939 if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
940 ut_a(!srv_read_only_mode);
941 files[i] = os_file_create(
942 innodb_file_data_key,
943 name, OS_FILE_OPEN_RAW,
944 OS_FILE_NORMAL, OS_DATA_FILE, &ret);
945 } else if (i == 0) {
946 files[i] = os_file_create(
947 innodb_file_data_key,
948 name, OS_FILE_OPEN_RETRY,
949 OS_FILE_NORMAL, OS_DATA_FILE, &ret);
950 } else {
951 files[i] = os_file_create(
952 innodb_file_data_key,
953 name, OS_FILE_OPEN, OS_FILE_NORMAL,
954 OS_DATA_FILE, &ret);
955 }
956
957 if (!ret) {
958
959 os_file_get_last_error(true);
960
961 ib_logf(IB_LOG_LEVEL_ERROR,
962 "Can't open '%s'", name);
963
964 return(DB_ERROR);
965 }
966
967 if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
968
969 goto skip_size_check;
970 }
971
972 size_check:
973 size = os_file_get_size(files[i]);
974 ut_a(size != (os_offset_t) -1);
975
976 /* Under some error conditions like disk full
977 narios or file size reaching filesystem
978 limit the data file could contain an incomplete
979 extent at the end. When we extend a data file
980 and if some failure happens, then also the data
981 file could contain an incomplete extent. So we
982 need to round the size downward to a megabyte.*/
983
984 rounded_size_pages = (ulint) CALC_NUMBER_OF_PAGES(size);
985
986 if (i == srv_n_data_files - 1
987 && srv_auto_extend_last_data_file) {
988
989 if (srv_data_file_sizes[i] > rounded_size_pages
990 || (srv_last_file_size_max > 0
991 && srv_last_file_size_max
992 < rounded_size_pages)) {
993
994 ib_logf(IB_LOG_LEVEL_ERROR,
995 "auto-extending "
996 "data file %s is "
997 "of a different size "
998 "%lu pages (rounded "
999 "down to MB) than specified "
1000 "in the .cnf file: "
1001 "initial %lu pages, "
1002 "max %lu (relevant if "
1003 "non-zero) pages!",
1004 name,
1005 (ulong) rounded_size_pages,
1006 (ulong) srv_data_file_sizes[i],
1007 (ulong)
1008 srv_last_file_size_max);
1009
1010 return(DB_ERROR);
1011 }
1012
1013 srv_data_file_sizes[i] = rounded_size_pages;
1014 }
1015
1016 if (rounded_size_pages != srv_data_file_sizes[i]) {
1017
1018 ib_logf(IB_LOG_LEVEL_ERROR,
1019 "Data file %s is of a different "
1020 "size %lu pages (rounded down to MB) "
1021 "than specified in the .cnf file "
1022 "%lu pages!",
1023 name,
1024 (ulong) rounded_size_pages,
1025 (ulong) srv_data_file_sizes[i]);
1026
1027 return(DB_ERROR);
1028 }
1029 skip_size_check:
1030
1031 /* This is the earliest location where we can load
1032 the double write buffer. */
1033 if (i == 0) {
1034 buf_dblwr_init_or_load_pages(
1035 files[i], srv_data_file_names[i], true);
1036 }
1037
1038 bool retry = true;
1039 check_first_page:
1040 check_msg = fil_read_first_page(
1041 files[i], one_opened, &flags, &space,
1042 #ifdef UNIV_LOG_ARCHIVE
1043 min_arch_log_no, max_arch_log_no,
1044 #endif /* UNIV_LOG_ARCHIVE */
1045 min_flushed_lsn, max_flushed_lsn);
1046
1047 if (check_msg) {
1048
1049 if (retry) {
1050 fsp_open_info fsp;
1051 const ulint page_no = 0;
1052
1053 retry = false;
1054 fsp.id = 0;
1055 fsp.filepath = srv_data_file_names[i];
1056 fsp.file = files[i];
1057
1058 if (fil_user_tablespace_restore_page(
1059 &fsp, page_no)) {
1060 goto check_first_page;
1061 }
1062 }
1063
1064 ib_logf(IB_LOG_LEVEL_ERROR,
1065 "%s in data file %s",
1066 check_msg, name);
1067 return(DB_ERROR);
1068 }
1069
1070 /* The first file of the system tablespace must
1071 have space ID = TRX_SYS_SPACE. The FSP_SPACE_ID
1072 field in files greater than ibdata1 are unreliable. */
1073 ut_a(one_opened || space == TRX_SYS_SPACE);
1074
1075 /* Check the flags for the first system tablespace
1076 file only. */
1077 if (!one_opened
1078 && UNIV_PAGE_SIZE
1079 != fsp_flags_get_page_size(flags)) {
1080
1081 ib_logf(IB_LOG_LEVEL_ERROR,
1082 "Data file \"%s\" uses page size %lu,"
1083 "but the start-up parameter "
1084 "is --innodb-page-size=%lu",
1085 name,
1086 fsp_flags_get_page_size(flags),
1087 UNIV_PAGE_SIZE);
1088
1089 return(DB_ERROR);
1090 }
1091
1092 one_opened = TRUE;
1093 } else if (!srv_read_only_mode) {
1094 /* We created the data file and now write it full of
1095 zeros */
1096
1097 one_created = TRUE;
1098
1099 if (i > 0) {
1100 ib_logf(IB_LOG_LEVEL_INFO,
1101 "Data file %s did not"
1102 " exist: new to be created",
1103 name);
1104 } else {
1105 ib_logf(IB_LOG_LEVEL_INFO,
1106 "The first specified "
1107 "data file %s did not exist: "
1108 "a new database to be created!",
1109 name);
1110
1111 *create_new_db = TRUE;
1112 }
1113
1114 ib_logf(IB_LOG_LEVEL_INFO,
1115 "Setting file %s size to %lu MB",
1116 name,
1117 (ulong) (srv_data_file_sizes[i]
1118 >> (20 - UNIV_PAGE_SIZE_SHIFT)));
1119
1120 ib_logf(IB_LOG_LEVEL_INFO,
1121 "Database physically writes the"
1122 " file full: wait...");
1123
1124 ret = os_file_set_size(
1125 name, files[i],
1126 (os_offset_t) srv_data_file_sizes[i]
1127 << UNIV_PAGE_SIZE_SHIFT);
1128
1129 if (!ret) {
1130 ib_logf(IB_LOG_LEVEL_ERROR,
1131 "Error in creating %s: "
1132 "probably out of disk space",
1133 name);
1134
1135 return(DB_ERROR);
1136 }
1137
1138 *sum_of_new_sizes += srv_data_file_sizes[i];
1139 }
1140
1141 ret = os_file_close(files[i]);
1142 ut_a(ret);
1143
1144 if (i == 0) {
1145 flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
1146 fil_space_create(name, 0, flags, FIL_TABLESPACE);
1147 }
1148
1149 ut_a(fil_validate());
1150
1151 if (!fil_node_create(name, srv_data_file_sizes[i], 0,
1152 srv_data_file_is_raw_partition[i] != 0)) {
1153 return(DB_ERROR);
1154 }
1155 }
1156
1157 return(DB_SUCCESS);
1158 }
1159
1160 /*********************************************************************//**
1161 Create undo tablespace.
1162 @return DB_SUCCESS or error code */
1163 static
1164 dberr_t
srv_undo_tablespace_create(const char * name,ulint size)1165 srv_undo_tablespace_create(
1166 /*=======================*/
1167 const char* name, /*!< in: tablespace name */
1168 ulint size) /*!< in: tablespace size in pages */
1169 {
1170 pfs_os_file_t fh;
1171 ibool ret;
1172 dberr_t err = DB_SUCCESS;
1173
1174 os_file_create_subdirs_if_needed(name);
1175
1176 fh = os_file_create(
1177 innodb_file_data_key,
1178 name,
1179 srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
1180 OS_FILE_NORMAL, OS_DATA_FILE, &ret);
1181
1182 if (srv_read_only_mode && ret) {
1183 ib_logf(IB_LOG_LEVEL_INFO,
1184 "%s opened in read-only mode", name);
1185 } else if (ret == FALSE) {
1186 if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
1187 #ifdef UNIV_AIX
1188 /* AIX 5.1 after security patch ML7 may have
1189 errno set to 0 here, which causes our function
1190 to return 100; work around that AIX problem */
1191 && os_file_get_last_error(false) != 100
1192 #endif /* UNIV_AIX */
1193 ) {
1194 ib_logf(IB_LOG_LEVEL_ERROR,
1195 "Can't create UNDO tablespace %s", name);
1196 } else {
1197 ib_logf(IB_LOG_LEVEL_ERROR,
1198 "Creating system tablespace with"
1199 " existing undo tablespaces is not"
1200 " supported. Please delete all undo"
1201 " tablespaces before creating new"
1202 " system tablespace.");
1203 }
1204 err = DB_ERROR;
1205 } else {
1206 ut_a(!srv_read_only_mode);
1207
1208 /* We created the data file and now write it full of zeros */
1209
1210 ib_logf(IB_LOG_LEVEL_INFO,
1211 "Data file %s did not exist: new to be created",
1212 name);
1213
1214 ib_logf(IB_LOG_LEVEL_INFO,
1215 "Setting file %s size to %lu MB",
1216 name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
1217
1218 ib_logf(IB_LOG_LEVEL_INFO,
1219 "Database physically writes the file full: wait...");
1220
1221 ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT);
1222
1223 if (!ret) {
1224 ib_logf(IB_LOG_LEVEL_INFO,
1225 "Error in creating %s: probably out of "
1226 "disk space", name);
1227
1228 err = DB_ERROR;
1229 }
1230
1231 os_file_close(fh);
1232 }
1233
1234 return(err);
1235 }
1236
1237 /*********************************************************************//**
1238 Open an undo tablespace.
1239 @return DB_SUCCESS or error code */
1240 static
1241 dberr_t
srv_undo_tablespace_open(const char * name,ulint space)1242 srv_undo_tablespace_open(
1243 /*=====================*/
1244 const char* name, /*!< in: tablespace name */
1245 ulint space) /*!< in: tablespace id */
1246 {
1247 pfs_os_file_t fh;
1248 dberr_t err = DB_ERROR;
1249 ibool ret;
1250 ulint flags;
1251
1252 if (!srv_file_check_mode(name)) {
1253 ib_logf(IB_LOG_LEVEL_ERROR,
1254 "UNDO tablespaces must be %s!",
1255 srv_read_only_mode ? "writable" : "readable");
1256
1257 return(DB_ERROR);
1258 }
1259
1260 fh = os_file_create(
1261 innodb_file_data_key, name,
1262 OS_FILE_OPEN_RETRY
1263 | OS_FILE_ON_ERROR_NO_EXIT
1264 | OS_FILE_ON_ERROR_SILENT,
1265 OS_FILE_NORMAL,
1266 OS_DATA_FILE,
1267 &ret);
1268
1269 /* If the file open was successful then load the tablespace. */
1270
1271 if (ret) {
1272 os_offset_t size;
1273
1274 size = os_file_get_size(fh);
1275 ut_a(size != (os_offset_t) -1);
1276
1277 ret = os_file_close(fh);
1278 ut_a(ret);
1279
1280 /* Load the tablespace into InnoDB's internal
1281 data structures. */
1282
1283 /* We set the biggest space id to the undo tablespace
1284 because InnoDB hasn't opened any other tablespace apart
1285 from the system tablespace. */
1286
1287 fil_set_max_space_id_if_bigger(space);
1288
1289 /* Set the compressed page size to 0 (non-compressed) */
1290 flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
1291 fil_space_create(name, space, flags, FIL_TABLESPACE);
1292
1293 ut_a(fil_validate());
1294
1295 os_offset_t n_pages = size / UNIV_PAGE_SIZE;
1296
1297 /* On 64 bit Windows ulint can be 32 bit and os_offset_t
1298 is 64 bit. It is OK to cast the n_pages to ulint because
1299 the unit has been scaled to pages and they are always
1300 32 bit. */
1301 if (fil_node_create(name, (ulint) n_pages, space, FALSE)) {
1302 err = DB_SUCCESS;
1303 }
1304 }
1305
1306 return(err);
1307 }
1308
1309 /********************************************************************
1310 Opens the configured number of undo tablespaces.
1311 @return DB_SUCCESS or error code */
1312 static
1313 dberr_t
srv_undo_tablespaces_init(ibool create_new_db,const ulint n_conf_tablespaces,ulint * n_opened)1314 srv_undo_tablespaces_init(
1315 /*======================*/
1316 ibool create_new_db, /*!< in: TRUE if new db being
1317 created */
1318 const ulint n_conf_tablespaces, /*!< in: configured undo
1319 tablespaces */
1320 ulint* n_opened) /*!< out: number of UNDO
1321 tablespaces successfully
1322 discovered and opened */
1323 {
1324 ulint i;
1325 dberr_t err = DB_SUCCESS;
1326 ulint prev_space_id = 0;
1327 ulint n_undo_tablespaces;
1328 ulint undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
1329
1330 *n_opened = 0;
1331
1332 ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS);
1333
1334 memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
1335
1336 /* Create the undo spaces only if we are creating a new
1337 instance. We don't allow creating of new undo tablespaces
1338 in an existing instance (yet). This restriction exists because
1339 we check in several places for SYSTEM tablespaces to be less than
1340 the min of user defined tablespace ids. Once we implement saving
1341 the location of the undo tablespaces and their space ids this
1342 restriction will/should be lifted. */
1343
1344 for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) {
1345 char name[OS_FILE_MAX_PATH];
1346 ulint space_id = i + 1;
1347
1348 DBUG_EXECUTE_IF("innodb_undo_upgrade",
1349 space_id = i + 3;);
1350
1351 ut_snprintf(
1352 name, sizeof(name),
1353 "%s%cundo%03lu",
1354 srv_undo_dir, SRV_PATH_SEPARATOR, space_id);
1355
1356 if (i == 0) {
1357 srv_undo_space_id_start = space_id;
1358 prev_space_id = srv_undo_space_id_start - 1;
1359 }
1360
1361 undo_tablespace_ids[i] = space_id;
1362
1363 err = srv_undo_tablespace_create(
1364 name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
1365
1366 if (err != DB_SUCCESS) {
1367
1368 ib_logf(IB_LOG_LEVEL_ERROR,
1369 "Could not create undo tablespace '%s'.",
1370 name);
1371
1372 return(err);
1373 }
1374 }
1375
1376 /* Get the tablespace ids of all the undo segments excluding
1377 the system tablespace (0). If we are creating a new instance then
1378 we build the undo_tablespace_ids ourselves since they don't
1379 already exist. */
1380
1381 if (!create_new_db) {
1382 n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces(
1383 undo_tablespace_ids);
1384
1385 if (n_undo_tablespaces != 0) {
1386 srv_undo_space_id_start = undo_tablespace_ids[0];
1387 prev_space_id = srv_undo_space_id_start - 1;
1388 }
1389
1390 } else {
1391 n_undo_tablespaces = n_conf_tablespaces;
1392
1393 undo_tablespace_ids[n_conf_tablespaces] = ULINT_UNDEFINED;
1394 }
1395
1396 /* Open all the undo tablespaces that are currently in use. If we
1397 fail to open any of these it is a fatal error. The tablespace ids
1398 should be contiguous. It is a fatal error because they are required
1399 for recovery and are referenced by the UNDO logs (a.k.a RBS). */
1400
1401 for (i = 0; i < n_undo_tablespaces; ++i) {
1402 char name[OS_FILE_MAX_PATH];
1403
1404 ut_snprintf(
1405 name, sizeof(name),
1406 "%s%cundo%03lu",
1407 srv_undo_dir, SRV_PATH_SEPARATOR,
1408 undo_tablespace_ids[i]);
1409
1410 /* Should be no gaps in undo tablespace ids. */
1411 ut_a(prev_space_id + 1 == undo_tablespace_ids[i]);
1412
1413 /* The system space id should not be in this array. */
1414 ut_a(undo_tablespace_ids[i] != 0);
1415 ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED);
1416
1417 err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
1418
1419 if (err != DB_SUCCESS) {
1420
1421 ib_logf(IB_LOG_LEVEL_ERROR,
1422 "Unable to open undo tablespace '%s'.", name);
1423
1424 return(err);
1425 }
1426
1427 prev_space_id = undo_tablespace_ids[i];
1428
1429 ++*n_opened;
1430 }
1431
1432 /* Open any extra unused undo tablespaces. These must be contiguous.
1433 We stop at the first failure. These are undo tablespaces that are
1434 not in use and therefore not required by recovery. We only check
1435 that there are no gaps. */
1436
1437 for (i = prev_space_id + 1; i < TRX_SYS_N_RSEGS; ++i) {
1438 char name[OS_FILE_MAX_PATH];
1439
1440 ut_snprintf(
1441 name, sizeof(name),
1442 "%s%cundo%03lu", srv_undo_dir, SRV_PATH_SEPARATOR, i);
1443
1444 /* Undo space ids start from 1. */
1445 err = srv_undo_tablespace_open(name, i);
1446
1447 if (err != DB_SUCCESS) {
1448 break;
1449 }
1450
1451 /** Note the first undo tablespace id in case of
1452 no active undo tablespace. */
1453 if (n_undo_tablespaces == 0) {
1454 srv_undo_space_id_start = i;
1455 }
1456
1457 ++n_undo_tablespaces;
1458
1459 ++*n_opened;
1460 }
1461
1462 /** Explictly specify the srv_undo_space_id_start
1463 as zero when there are no undo tablespaces. */
1464 if (n_undo_tablespaces == 0) {
1465 srv_undo_space_id_start = 0;
1466 }
1467
1468 /* If the user says that there are fewer than what we find we
1469 tolerate that discrepancy but not the inverse. Because there could
1470 be unused undo tablespaces for future use. */
1471
1472 if (n_conf_tablespaces > n_undo_tablespaces) {
1473 ut_print_timestamp(stderr);
1474 fprintf(stderr,
1475 " InnoDB: Expected to open %lu undo "
1476 "tablespaces but was able\n",
1477 n_conf_tablespaces);
1478 ut_print_timestamp(stderr);
1479 fprintf(stderr,
1480 " InnoDB: to find only %lu undo "
1481 "tablespaces.\n", n_undo_tablespaces);
1482 ut_print_timestamp(stderr);
1483 fprintf(stderr,
1484 " InnoDB: Set the "
1485 "innodb_undo_tablespaces parameter to "
1486 "the\n");
1487 ut_print_timestamp(stderr);
1488 fprintf(stderr,
1489 " InnoDB: correct value and retry. Suggested "
1490 "value is %lu\n", n_undo_tablespaces);
1491
1492 return(err != DB_SUCCESS ? err : DB_ERROR);
1493
1494 } else if (n_undo_tablespaces > 0) {
1495
1496 ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces",
1497 n_undo_tablespaces);
1498
1499 if (n_conf_tablespaces == 0) {
1500 ib_logf(IB_LOG_LEVEL_WARN,
1501 "Using the system tablespace for all UNDO "
1502 "logging because innodb_undo_tablespaces=0");
1503 }
1504 }
1505
1506 if (create_new_db) {
1507 mtr_t mtr;
1508
1509 mtr_start(&mtr);
1510
1511 /* The undo log tablespace */
1512 for (i = 0; i < n_undo_tablespaces; ++i) {
1513
1514 fsp_header_init(
1515 undo_tablespace_ids[i],
1516 SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
1517 }
1518
1519 mtr_commit(&mtr);
1520 }
1521
1522 return(DB_SUCCESS);
1523 }
1524
1525 /********************************************************************
1526 Wait for the purge thread(s) to start up. */
1527 static
1528 void
srv_start_wait_for_purge_to_start()1529 srv_start_wait_for_purge_to_start()
1530 /*===============================*/
1531 {
1532 /* Wait for the purge coordinator and master thread to startup. */
1533
1534 purge_state_t state = trx_purge_state();
1535
1536 ut_a(state != PURGE_STATE_DISABLED);
1537
1538 while (srv_shutdown_state == SRV_SHUTDOWN_NONE
1539 && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
1540 && state == PURGE_STATE_INIT) {
1541
1542 switch (state = trx_purge_state()) {
1543 case PURGE_STATE_RUN:
1544 case PURGE_STATE_STOP:
1545 break;
1546
1547 case PURGE_STATE_INIT:
1548 ib_logf(IB_LOG_LEVEL_INFO,
1549 "Waiting for purge to start");
1550
1551 os_thread_sleep(50000);
1552 break;
1553
1554 case PURGE_STATE_EXIT:
1555 case PURGE_STATE_DISABLED:
1556 ut_error;
1557 }
1558 }
1559 }
1560
1561 /********************************************************************
1562 Starts InnoDB and creates a new database if database files
1563 are not found and the user wants.
1564 @return DB_SUCCESS or error code */
1565 UNIV_INTERN
1566 dberr_t
innobase_start_or_create_for_mysql(void)1567 innobase_start_or_create_for_mysql(void)
1568 /*====================================*/
1569 {
1570 ibool create_new_db;
1571 lsn_t min_flushed_lsn;
1572 lsn_t max_flushed_lsn;
1573 #ifdef UNIV_LOG_ARCHIVE
1574 ulint min_arch_log_no;
1575 ulint max_arch_log_no;
1576 #endif /* UNIV_LOG_ARCHIVE */
1577 ulint sum_of_new_sizes;
1578 ulint sum_of_data_file_sizes;
1579 ulint tablespace_size_in_header;
1580 dberr_t err;
1581 unsigned i;
1582 ulint srv_n_log_files_found = srv_n_log_files;
1583 ulint io_limit;
1584 mtr_t mtr;
1585 ib_bh_t* ib_bh;
1586 ulint n_recovered_trx;
1587 char logfilename[10000];
1588 char* logfile0 = NULL;
1589 size_t dirnamelen;
1590
1591 if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
1592 srv_read_only_mode = 1;
1593 }
1594
1595 high_level_read_only = srv_read_only_mode
1596 || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO;
1597
1598 if (srv_read_only_mode) {
1599 ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode");
1600 }
1601
1602 #ifdef HAVE_DARWIN_THREADS
1603 # ifdef F_FULLFSYNC
1604 /* This executable has been compiled on Mac OS X 10.3 or later.
1605 Assume that F_FULLFSYNC is available at run-time. */
1606 srv_have_fullfsync = TRUE;
1607 # else /* F_FULLFSYNC */
1608 /* This executable has been compiled on Mac OS X 10.2
1609 or earlier. Determine if the executable is running
1610 on Mac OS X 10.3 or later. */
1611 struct utsname utsname;
1612 if (uname(&utsname)) {
1613 ut_print_timestamp(stderr);
1614 fputs(" InnoDB: cannot determine Mac OS X version!\n", stderr);
1615 } else {
1616 srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
1617 }
1618 if (!srv_have_fullfsync) {
1619 ut_print_timestamp(stderr);
1620 fputs(" InnoDB: On Mac OS X, fsync() may be "
1621 "broken on internal drives,\n", stderr);
1622 ut_print_timestamp(stderr);
1623 fputs(" InnoDB: making transactions unsafe!\n", stderr);
1624 }
1625 # endif /* F_FULLFSYNC */
1626 #endif /* HAVE_DARWIN_THREADS */
1627
1628 ib_logf(IB_LOG_LEVEL_INFO,
1629 "Using %s to ref count buffer pool pages",
1630 #ifdef PAGE_ATOMIC_REF_COUNT
1631 "atomics"
1632 #else
1633 "mutexes"
1634 #endif /* PAGE_ATOMIC_REF_COUNT */
1635 );
1636
1637
1638 if (sizeof(ulint) != sizeof(void*)) {
1639 ut_print_timestamp(stderr);
1640 fprintf(stderr,
1641 " InnoDB: Error: size of InnoDB's ulint is %lu, "
1642 "but size of void*\n", (ulong) sizeof(ulint));
1643 ut_print_timestamp(stderr);
1644 fprintf(stderr,
1645 " InnoDB: is %lu. The sizes should be the same "
1646 "so that on a 64-bit\n",
1647 (ulong) sizeof(void*));
1648 ut_print_timestamp(stderr);
1649 fprintf(stderr,
1650 " InnoDB: platforms you can allocate more than 4 GB "
1651 "of memory.\n");
1652 }
1653
1654 #ifdef UNIV_DEBUG
1655 ut_print_timestamp(stderr);
1656 fprintf(stderr,
1657 " InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
1658 #endif
1659
1660 #ifdef UNIV_IBUF_DEBUG
1661 ut_print_timestamp(stderr);
1662 fprintf(stderr,
1663 " InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n");
1664 # ifdef UNIV_IBUF_COUNT_DEBUG
1665 ut_print_timestamp(stderr);
1666 fprintf(stderr,
1667 " InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on "
1668 "!!!!!!!!!\n");
1669 ut_print_timestamp(stderr);
1670 fprintf(stderr,
1671 " InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG\n");
1672 # endif
1673 #endif
1674
1675 #ifdef UNIV_BLOB_DEBUG
1676 fprintf(stderr,
1677 "InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n"
1678 "InnoDB: Server restart may fail with UNIV_BLOB_DEBUG\n");
1679 #endif /* UNIV_BLOB_DEBUG */
1680
1681 #ifdef UNIV_SYNC_DEBUG
1682 ut_print_timestamp(stderr);
1683 fprintf(stderr,
1684 " InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n");
1685 #endif
1686
1687 #ifdef UNIV_SEARCH_DEBUG
1688 ut_print_timestamp(stderr);
1689 fprintf(stderr,
1690 " InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n");
1691 #endif
1692
1693 #ifdef UNIV_LOG_LSN_DEBUG
1694 ut_print_timestamp(stderr);
1695 fprintf(stderr,
1696 " InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!\n");
1697 #endif /* UNIV_LOG_LSN_DEBUG */
1698 #ifdef UNIV_MEM_DEBUG
1699 ut_print_timestamp(stderr);
1700 fprintf(stderr,
1701 " InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
1702 #endif
1703
1704 if (srv_use_sys_malloc) {
1705 ib_logf(IB_LOG_LEVEL_INFO,
1706 "The InnoDB memory heap is disabled");
1707 }
1708
1709 #if defined(COMPILER_HINTS_ENABLED)
1710 ib_logf(IB_LOG_LEVEL_INFO,
1711 " InnoDB: Compiler hints enabled.");
1712 #endif /* defined(COMPILER_HINTS_ENABLED) */
1713
1714 ib_logf(IB_LOG_LEVEL_INFO,
1715 "" IB_ATOMICS_STARTUP_MSG "");
1716
1717 ib_logf(IB_LOG_LEVEL_INFO,
1718 "" IB_MEMORY_BARRIER_STARTUP_MSG "");
1719
1720 #ifndef HAVE_MEMORY_BARRIER
1721 #if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__
1722 #else
1723 ib_logf(IB_LOG_LEVEL_WARN,
1724 "MySQL was built without a memory barrier capability on this"
1725 " architecture, which might allow a mutex/rw_lock violation"
1726 " under high thread concurrency. This may cause a hang.");
1727 #endif /* IA32 or AMD64 */
1728 #endif /* HAVE_MEMORY_BARRIER */
1729
1730 ib_logf(IB_LOG_LEVEL_INFO,
1731 "Compressed tables use zlib " ZLIB_VERSION
1732 #ifdef UNIV_ZIP_DEBUG
1733 " with validation"
1734 #endif /* UNIV_ZIP_DEBUG */
1735 );
1736 #ifdef UNIV_ZIP_COPY
1737 ib_logf(IB_LOG_LEVEL_INFO, "and extra copying");
1738 #endif /* UNIV_ZIP_COPY */
1739
1740
1741 /* Since InnoDB does not currently clean up all its internal data
1742 structures in MySQL Embedded Server Library server_end(), we
1743 print an error message if someone tries to start up InnoDB a
1744 second time during the process lifetime. */
1745
1746 if (srv_start_has_been_called) {
1747 ut_print_timestamp(stderr);
1748 fprintf(stderr, " InnoDB: Error: startup called second time "
1749 "during the process\n");
1750 ut_print_timestamp(stderr);
1751 fprintf(stderr, " InnoDB: lifetime. In the MySQL Embedded "
1752 "Server Library you\n");
1753 ut_print_timestamp(stderr);
1754 fprintf(stderr, " InnoDB: cannot call server_init() more "
1755 "than once during the\n");
1756 ut_print_timestamp(stderr);
1757 fprintf(stderr, " InnoDB: process lifetime.\n");
1758 }
1759
1760 srv_start_has_been_called = TRUE;
1761
1762 #ifdef UNIV_DEBUG
1763 log_do_write = TRUE;
1764 #endif /* UNIV_DEBUG */
1765 /* yydebug = TRUE; */
1766
1767 srv_is_being_started = TRUE;
1768 srv_startup_is_before_trx_rollback_phase = TRUE;
1769
1770 #ifdef __WIN__
1771 switch (os_get_os_version()) {
1772 case OS_WIN95:
1773 case OS_WIN31:
1774 case OS_WINNT:
1775 /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
1776 and NT use simulated aio. In NT Windows provides async i/o,
1777 but when run in conjunction with InnoDB Hot Backup, it seemed
1778 to corrupt the data files. */
1779
1780 srv_use_native_aio = FALSE;
1781 break;
1782
1783 case OS_WIN2000:
1784 case OS_WINXP:
1785 /* On 2000 and XP, async IO is available. */
1786 srv_use_native_aio = TRUE;
1787 break;
1788
1789 default:
1790 /* Vista and later have both async IO and condition variables */
1791 srv_use_native_aio = TRUE;
1792 srv_use_native_conditions = TRUE;
1793 break;
1794 }
1795
1796 #elif defined(LINUX_NATIVE_AIO)
1797
1798 if (srv_use_native_aio) {
1799 ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO");
1800 }
1801 #else
1802 /* Currently native AIO is supported only on windows and linux
1803 and that also when the support is compiled in. In all other
1804 cases, we ignore the setting of innodb_use_native_aio. */
1805 srv_use_native_aio = FALSE;
1806 #endif /* __WIN__ */
1807
1808 if (srv_file_flush_method_str == NULL) {
1809 /* These are the default options */
1810
1811 srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1812
1813 srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1814 #ifndef __WIN__
1815 } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
1816 srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1817
1818 } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
1819 srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
1820
1821 } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1822 srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1823
1824 } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
1825 srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;
1826
1827 } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1828 srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1829
1830 } else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
1831 srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
1832 #else
1833 } else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
1834 srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
1835 srv_use_native_aio = FALSE;
1836
1837 } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
1838 srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1839 srv_use_native_aio = FALSE;
1840
1841 } else if (0 == ut_strcmp(srv_file_flush_method_str,
1842 "async_unbuffered")) {
1843 srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1844 #endif /* __WIN__ */
1845 } else {
1846 ib_logf(IB_LOG_LEVEL_ERROR,
1847 "Unrecognized value %s for innodb_flush_method",
1848 srv_file_flush_method_str);
1849 return(DB_ERROR);
1850 }
1851
1852 /* Note that the call srv_boot() also changes the values of
1853 some variables to the units used by InnoDB internally */
1854
1855 /* Set the maximum number of threads which can wait for a semaphore
1856 inside InnoDB: this is the 'sync wait array' size, as well as the
1857 maximum number of threads that can wait in the 'srv_conc array' for
1858 their time to enter InnoDB. */
1859
1860 #define BUF_POOL_SIZE_THRESHOLD (1024 * 1024 * 1024)
1861 srv_max_n_threads = 1 /* io_ibuf_thread */
1862 + 1 /* io_log_thread */
1863 + 1 /* lock_wait_timeout_thread */
1864 + 1 /* srv_error_monitor_thread */
1865 + 1 /* srv_monitor_thread */
1866 + 1 /* srv_master_thread */
1867 + 1 /* srv_purge_coordinator_thread */
1868 + 1 /* buf_dump_thread */
1869 + 1 /* dict_stats_thread */
1870 + 1 /* fts_optimize_thread */
1871 + 1 /* recv_writer_thread */
1872 + 1 /* buf_flush_page_cleaner_thread */
1873 + 1 /* trx_rollback_or_clean_all_recovered */
1874 + 128 /* added as margin, for use of
1875 InnoDB Memcached etc. */
1876 + max_connections
1877 + srv_n_read_io_threads
1878 + srv_n_write_io_threads
1879 + srv_n_purge_threads
1880 /* FTS Parallel Sort */
1881 + fts_sort_pll_degree * FTS_NUM_AUX_INDEX
1882 * max_connections;
1883
1884 if (srv_buf_pool_size < BUF_POOL_SIZE_THRESHOLD) {
1885 /* If buffer pool is less than 1 GB,
1886 use only one buffer pool instance */
1887 srv_buf_pool_instances = 1;
1888 }
1889
1890 srv_boot();
1891
1892 ib_logf(IB_LOG_LEVEL_INFO,
1893 "%s CPU crc32 instructions",
1894 ut_crc32_sse2_enabled ? "Using" : "Not using");
1895
1896 if (!srv_read_only_mode) {
1897
1898 mutex_create(srv_monitor_file_mutex_key,
1899 &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
1900
1901 if (srv_innodb_status) {
1902
1903 srv_monitor_file_name = static_cast<char*>(
1904 mem_alloc(
1905 strlen(fil_path_to_mysql_datadir)
1906 + 20 + sizeof "/innodb_status."));
1907
1908 sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
1909 fil_path_to_mysql_datadir,
1910 os_proc_get_number());
1911
1912 srv_monitor_file = fopen(srv_monitor_file_name, "w+");
1913
1914 if (!srv_monitor_file) {
1915
1916 ib_logf(IB_LOG_LEVEL_ERROR,
1917 "Unable to create %s: %s",
1918 srv_monitor_file_name,
1919 strerror(errno));
1920
1921 return(DB_ERROR);
1922 }
1923 } else {
1924 srv_monitor_file_name = NULL;
1925 srv_monitor_file = os_file_create_tmpfile(NULL);
1926
1927 if (!srv_monitor_file) {
1928 return(DB_ERROR);
1929 }
1930 }
1931
1932 mutex_create(srv_dict_tmpfile_mutex_key,
1933 &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
1934
1935 srv_dict_tmpfile = os_file_create_tmpfile(NULL);
1936
1937 if (!srv_dict_tmpfile) {
1938 return(DB_ERROR);
1939 }
1940
1941 mutex_create(srv_misc_tmpfile_mutex_key,
1942 &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
1943
1944 srv_misc_tmpfile = os_file_create_tmpfile(NULL);
1945
1946 if (!srv_misc_tmpfile) {
1947 return(DB_ERROR);
1948 }
1949 }
1950
1951 /* If user has set the value of innodb_file_io_threads then
1952 we'll emit a message telling the user that this parameter
1953 is now deprecated. */
1954 if (srv_n_file_io_threads != 4) {
1955 ib_logf(IB_LOG_LEVEL_WARN,
1956 "innodb_file_io_threads is deprecated. Please use "
1957 "innodb_read_io_threads and innodb_write_io_threads "
1958 "instead");
1959 }
1960
1961 /* Now overwrite the value on srv_n_file_io_threads */
1962 srv_n_file_io_threads = srv_n_read_io_threads;
1963
1964 if (!srv_read_only_mode) {
1965 /* Add the log and ibuf IO threads. */
1966 srv_n_file_io_threads += 2;
1967 srv_n_file_io_threads += srv_n_write_io_threads;
1968 } else {
1969 ib_logf(IB_LOG_LEVEL_INFO,
1970 "Disabling background IO write threads.");
1971
1972 srv_n_write_io_threads = 0;
1973 }
1974
1975 ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
1976
1977 io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
1978
1979 /* On Windows when using native aio the number of aio requests
1980 that a thread can handle at a given time is limited to 32
1981 i.e.: SRV_N_PENDING_IOS_PER_THREAD */
1982 # ifdef __WIN__
1983 if (srv_use_native_aio) {
1984 io_limit = SRV_N_PENDING_IOS_PER_THREAD;
1985 }
1986 # endif /* __WIN__ */
1987
1988 if (!os_aio_init(io_limit,
1989 srv_n_read_io_threads,
1990 srv_n_write_io_threads,
1991 SRV_MAX_N_PENDING_SYNC_IOS)) {
1992
1993 ib_logf(IB_LOG_LEVEL_ERROR,
1994 "Fatal : Cannot initialize AIO sub-system");
1995
1996 return(DB_ERROR);
1997 }
1998
1999 fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
2000
2001 double size;
2002 char unit;
2003
2004 if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
2005 size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024);
2006 unit = 'G';
2007 } else {
2008 size = ((double) srv_buf_pool_size) / (1024 * 1024);
2009 unit = 'M';
2010 }
2011
2012 /* Print time to initialize the buffer pool */
2013 ib_logf(IB_LOG_LEVEL_INFO,
2014 "Initializing buffer pool, size = %.1f%c", size, unit);
2015
2016 err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances);
2017
2018 if (err != DB_SUCCESS) {
2019 ib_logf(IB_LOG_LEVEL_ERROR,
2020 "Cannot allocate memory for the buffer pool");
2021
2022 return(DB_ERROR);
2023 }
2024
2025 ib_logf(IB_LOG_LEVEL_INFO,
2026 "Completed initialization of buffer pool");
2027
2028 #ifdef UNIV_DEBUG
2029 /* We have observed deadlocks with a 5MB buffer pool but
2030 the actual lower limit could very well be a little higher. */
2031
2032 if (srv_buf_pool_size <= 5 * 1024 * 1024) {
2033
2034 ib_logf(IB_LOG_LEVEL_INFO,
2035 "Small buffer pool size (%luM), the flst_validate() "
2036 "debug function can cause a deadlock if the "
2037 "buffer pool fills up.",
2038 srv_buf_pool_size / 1024 / 1024);
2039 }
2040 #endif /* UNIV_DEBUG */
2041
2042 fsp_init();
2043 log_init();
2044
2045 lock_sys_create(srv_lock_table_size);
2046
2047 /* Create i/o-handler threads: */
2048
2049 for (i = 0; i < srv_n_file_io_threads; ++i) {
2050
2051 n[i] = i;
2052
2053 os_thread_create(io_handler_thread, n + i, thread_ids + i);
2054 }
2055
2056 #ifdef UNIV_LOG_ARCHIVE
2057 if (0 != ut_strcmp(srv_log_group_home_dir, srv_arch_dir)) {
2058 ut_print_timestamp(stderr);
2059 fprintf(stderr, " InnoDB: Error: you must set the log group home dir in my.cnf\n");
2060 ut_print_timestamp(stderr);
2061 fprintf(stderr, " InnoDB: the same as log arch dir.\n");
2062
2063 return(DB_ERROR);
2064 }
2065 #endif /* UNIV_LOG_ARCHIVE */
2066
2067 if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE
2068 >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
2069 /* log_block_convert_lsn_to_no() limits the returned block
2070 number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
2071 bytes, then we have a limit of 512 GB. If that limit is to
2072 be raised, then log_block_convert_lsn_to_no() must be
2073 modified. */
2074 ib_logf(IB_LOG_LEVEL_ERROR,
2075 "Combined size of log files must be < 512 GB");
2076
2077 return(DB_ERROR);
2078 }
2079
2080 if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) {
2081 /* fil_io() takes ulint as an argument and we are passing
2082 (next_offset / UNIV_PAGE_SIZE) to it in log_group_write_buf().
2083 So (next_offset / UNIV_PAGE_SIZE) must be less than ULINT_MAX.
2084 So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
2085 means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
2086 is 64 TB on 32 bit systems. */
2087 fprintf(stderr,
2088 " InnoDB: Error: combined size of log files"
2089 " must be < %lu GB\n",
2090 ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE);
2091
2092 return(DB_ERROR);
2093 }
2094
2095 sum_of_new_sizes = 0;
2096
2097 for (i = 0; i < srv_n_data_files; i++) {
2098 #ifndef __WIN__
2099 if (sizeof(off_t) < 5
2100 && srv_data_file_sizes[i]
2101 >= (ulint) (1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
2102 ut_print_timestamp(stderr);
2103 fprintf(stderr,
2104 " InnoDB: Error: file size must be < 4 GB"
2105 " with this MySQL binary\n");
2106 ut_print_timestamp(stderr);
2107 fprintf(stderr,
2108 " InnoDB: and operating system combination,"
2109 " in some OS's < 2 GB\n");
2110
2111 return(DB_ERROR);
2112 }
2113 #endif
2114 sum_of_new_sizes += srv_data_file_sizes[i];
2115 }
2116
2117 if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
2118 ib_logf(IB_LOG_LEVEL_ERROR,
2119 "Tablespace size must be at least 10 MB");
2120
2121 return(DB_ERROR);
2122 }
2123
2124 recv_sys_create();
2125 recv_sys_init(buf_pool_get_curr_size());
2126
2127 err = open_or_create_data_files(&create_new_db,
2128 #ifdef UNIV_LOG_ARCHIVE
2129 &min_arch_log_no, &max_arch_log_no,
2130 #endif /* UNIV_LOG_ARCHIVE */
2131 &min_flushed_lsn, &max_flushed_lsn,
2132 &sum_of_new_sizes);
2133 if (err == DB_FAIL) {
2134
2135 ib_logf(IB_LOG_LEVEL_ERROR,
2136 "The system tablespace must be writable!");
2137
2138 return(DB_ERROR);
2139
2140 } else if (err != DB_SUCCESS) {
2141
2142 ib_logf(IB_LOG_LEVEL_ERROR,
2143 "Could not open or create the system tablespace. If "
2144 "you tried to add new data files to the system "
2145 "tablespace, and it failed here, you should now "
2146 "edit innodb_data_file_path in my.cnf back to what "
2147 "it was, and remove the new ibdata files InnoDB "
2148 "created in this failed attempt. InnoDB only wrote "
2149 "those files full of zeros, but did not yet use "
2150 "them in any way. But be careful: do not remove "
2151 "old data files which contain your precious data!");
2152
2153 return(err);
2154 }
2155
2156 #ifdef UNIV_LOG_ARCHIVE
2157 srv_normalize_path_for_win(srv_arch_dir);
2158 srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
2159 #endif /* UNIV_LOG_ARCHIVE */
2160
2161 dirnamelen = strlen(srv_log_group_home_dir);
2162 ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
2163 memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
2164
2165 /* Add a path separator if needed. */
2166 if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
2167 logfilename[dirnamelen++] = SRV_PATH_SEPARATOR;
2168 }
2169
2170 srv_log_file_size_requested = srv_log_file_size;
2171
2172 if (create_new_db) {
2173 bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
2174 ut_a(success);
2175
2176 min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2177
2178 buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2179
2180 err = create_log_files(create_new_db, logfilename, dirnamelen,
2181 max_flushed_lsn, logfile0);
2182
2183 if (err != DB_SUCCESS) {
2184 return(err);
2185 }
2186 } else {
2187 for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
2188 os_offset_t size;
2189 os_file_stat_t stat_info;
2190
2191 sprintf(logfilename + dirnamelen,
2192 "ib_logfile%u", i);
2193
2194 err = os_file_get_status(
2195 logfilename, &stat_info, false);
2196
2197 if (err == DB_NOT_FOUND) {
2198 if (i == 0) {
2199 if (max_flushed_lsn
2200 != min_flushed_lsn) {
2201 ib_logf(IB_LOG_LEVEL_ERROR,
2202 "Cannot create"
2203 " log files because"
2204 " data files are"
2205 " corrupt or"
2206 " not in sync"
2207 " with each other");
2208 return(DB_ERROR);
2209 }
2210
2211 if (max_flushed_lsn < (lsn_t) 1000) {
2212 ib_logf(IB_LOG_LEVEL_ERROR,
2213 "Cannot create"
2214 " log files because"
2215 " data files are"
2216 " corrupt or the"
2217 " database was not"
2218 " shut down cleanly"
2219 " after creating"
2220 " the data files.");
2221 return(DB_ERROR);
2222 }
2223
2224 err = create_log_files(
2225 create_new_db, logfilename,
2226 dirnamelen, max_flushed_lsn,
2227 logfile0);
2228
2229 if (err != DB_SUCCESS) {
2230 return(err);
2231 }
2232
2233 create_log_files_rename(
2234 logfilename, dirnamelen,
2235 max_flushed_lsn, logfile0);
2236
2237 /* Suppress the message about
2238 crash recovery. */
2239 max_flushed_lsn = min_flushed_lsn
2240 = log_get_lsn();
2241 goto files_checked;
2242 } else if (i < 2) {
2243 /* must have at least 2 log files */
2244 ib_logf(IB_LOG_LEVEL_ERROR,
2245 "Only one log file found.");
2246 return(err);
2247 }
2248
2249 /* opened all files */
2250 break;
2251 }
2252
2253 if (!srv_file_check_mode(logfilename)) {
2254 return(DB_ERROR);
2255 }
2256
2257 err = open_log_file(&files[i], logfilename, &size);
2258
2259 if (err != DB_SUCCESS) {
2260 return(err);
2261 }
2262
2263 ut_a(size != (os_offset_t) -1);
2264
2265 if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
2266 ib_logf(IB_LOG_LEVEL_ERROR,
2267 "Log file %s size "
2268 UINT64PF " is not a multiple of"
2269 " innodb_page_size",
2270 logfilename, size);
2271 return(DB_ERROR);
2272 }
2273
2274 size >>= UNIV_PAGE_SIZE_SHIFT;
2275
2276 if (i == 0) {
2277 srv_log_file_size = size;
2278 } else if (size != srv_log_file_size) {
2279 ib_logf(IB_LOG_LEVEL_ERROR,
2280 "Log file %s is"
2281 " of different size " UINT64PF " bytes"
2282 " than other log"
2283 " files " UINT64PF " bytes!",
2284 logfilename,
2285 size << UNIV_PAGE_SIZE_SHIFT,
2286 (os_offset_t) srv_log_file_size
2287 << UNIV_PAGE_SIZE_SHIFT);
2288 return(DB_ERROR);
2289 }
2290 }
2291
2292 srv_n_log_files_found = i;
2293
2294 /* Create the in-memory file space objects. */
2295
2296 sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
2297
2298 fil_space_create(logfilename,
2299 SRV_LOG_SPACE_FIRST_ID,
2300 fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
2301 FIL_LOG);
2302
2303 ut_a(fil_validate());
2304
2305 /* srv_log_file_size is measured in pages; if page size is 16KB,
2306 then we have a limit of 64TB on 32 bit systems */
2307 ut_a(srv_log_file_size <= ULINT_MAX);
2308
2309 for (unsigned j = 0; j < i; j++) {
2310 sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
2311
2312 if (!fil_node_create(logfilename,
2313 (ulint) srv_log_file_size,
2314 SRV_LOG_SPACE_FIRST_ID, FALSE)) {
2315 return(DB_ERROR);
2316 }
2317 }
2318
2319 #ifdef UNIV_LOG_ARCHIVE
2320 /* Create the file space object for archived logs. Under
2321 MySQL, no archiving ever done. */
2322 fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
2323 0, FIL_LOG);
2324 #endif /* UNIV_LOG_ARCHIVE */
2325 log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
2326 SRV_LOG_SPACE_FIRST_ID,
2327 SRV_LOG_SPACE_FIRST_ID + 1);
2328 }
2329
2330 files_checked:
2331 /* Open all log files and data files in the system
2332 tablespace: we keep them open until database
2333 shutdown */
2334
2335 fil_open_log_and_system_tablespace_files();
2336
2337 err = srv_undo_tablespaces_init(
2338 create_new_db,
2339 srv_undo_tablespaces,
2340 &srv_undo_tablespaces_open);
2341
2342 /* If the force recovery is set very high then we carry on regardless
2343 of all errors. Basically this is fingers crossed mode. */
2344
2345 if (err != DB_SUCCESS
2346 && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
2347
2348 return(err);
2349 }
2350
2351 /* Initialize objects used by dict stats gathering thread, which
2352 can also be used by recovery if it tries to drop some table */
2353 if (!srv_read_only_mode) {
2354 dict_stats_thread_init();
2355 }
2356
2357 trx_sys_file_format_init();
2358
2359 trx_sys_create();
2360
2361 if (create_new_db) {
2362
2363 ut_a(!srv_read_only_mode);
2364
2365 mtr_start(&mtr);
2366
2367 fsp_header_init(0, sum_of_new_sizes, &mtr);
2368
2369 mtr_commit(&mtr);
2370
2371 /* To maintain backward compatibility we create only
2372 the first rollback segment before the double write buffer.
2373 All the remaining rollback segments will be created later,
2374 after the double write buffer has been created. */
2375 trx_sys_create_sys_pages();
2376
2377 ib_bh = trx_sys_init_at_db_start();
2378 n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
2379
2380 /* The purge system needs to create the purge view and
2381 therefore requires that the trx_sys is inited. */
2382
2383 trx_purge_sys_create(srv_n_purge_threads, ib_bh);
2384
2385 err = dict_create();
2386
2387 if (err != DB_SUCCESS) {
2388 return(err);
2389 }
2390
2391 srv_startup_is_before_trx_rollback_phase = FALSE;
2392
2393 bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
2394 ut_a(success);
2395
2396 min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2397
2398 buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2399
2400 /* Stamp the LSN to the data files. */
2401 fil_write_flushed_lsn_to_data_files(max_flushed_lsn, 0);
2402
2403 fil_flush_file_spaces(FIL_TABLESPACE);
2404
2405 create_log_files_rename(logfilename, dirnamelen,
2406 max_flushed_lsn, logfile0);
2407 #ifdef UNIV_LOG_ARCHIVE
2408 } else if (srv_archive_recovery) {
2409
2410 ib_logf(IB_LOG_LEVEL_INFO,
2411 " Starting archive recovery from a backup...");
2412
2413 err = recv_recovery_from_archive_start(
2414 min_flushed_lsn, srv_archive_recovery_limit_lsn,
2415 min_arch_log_no);
2416 if (err != DB_SUCCESS) {
2417
2418 return(DB_ERROR);
2419 }
2420 /* Since ibuf init is in dict_boot, and ibuf is needed
2421 in any disk i/o, first call dict_boot */
2422
2423 err = dict_boot();
2424
2425 if (err != DB_SUCCESS) {
2426 return(err);
2427 }
2428
2429 ib_bh = trx_sys_init_at_db_start();
2430 n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
2431
2432 /* The purge system needs to create the purge view and
2433 therefore requires that the trx_sys is inited. */
2434
2435 trx_purge_sys_create(srv_n_purge_threads, ib_bh);
2436
2437 srv_startup_is_before_trx_rollback_phase = FALSE;
2438
2439 recv_recovery_from_archive_finish();
2440 #endif /* UNIV_LOG_ARCHIVE */
2441 } else {
2442
2443 /* Check if we support the max format that is stamped
2444 on the system tablespace.
2445 Note: We are NOT allowed to make any modifications to
2446 the TRX_SYS_PAGE_NO page before recovery because this
2447 page also contains the max_trx_id etc. important system
2448 variables that are required for recovery. We need to
2449 ensure that we return the system to a state where normal
2450 recovery is guaranteed to work. We do this by
2451 invalidating the buffer cache, this will force the
2452 reread of the page and restoration to its last known
2453 consistent state, this is REQUIRED for the recovery
2454 process to work. */
2455 err = trx_sys_file_format_max_check(
2456 srv_max_file_format_at_startup);
2457
2458 if (err != DB_SUCCESS) {
2459 return(err);
2460 }
2461
2462 /* Invalidate the buffer pool to ensure that we reread
2463 the page that we read above, during recovery.
2464 Note that this is not as heavy weight as it seems. At
2465 this point there will be only ONE page in the buf_LRU
2466 and there must be no page in the buf_flush list. */
2467 buf_pool_invalidate();
2468
2469 /* We always try to do a recovery, even if the database had
2470 been shut down normally: this is the normal startup path */
2471
2472 err = recv_recovery_from_checkpoint_start(
2473 LOG_CHECKPOINT, LSN_MAX,
2474 min_flushed_lsn, max_flushed_lsn);
2475
2476 if (err != DB_SUCCESS) {
2477
2478 return(DB_ERROR);
2479 }
2480
2481 /* Since the insert buffer init is in dict_boot, and the
2482 insert buffer is needed in any disk i/o, first we call
2483 dict_boot(). Note that trx_sys_init_at_db_start() only needs
2484 to access space 0, and the insert buffer at this stage already
2485 works for space 0. */
2486
2487 err = dict_boot();
2488
2489 if (err != DB_SUCCESS) {
2490 return(err);
2491 }
2492
2493 ib_bh = trx_sys_init_at_db_start();
2494 n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
2495
2496 /* The purge system needs to create the purge view and
2497 therefore requires that the trx_sys is inited. */
2498
2499 trx_purge_sys_create(srv_n_purge_threads, ib_bh);
2500
2501 /* recv_recovery_from_checkpoint_finish needs trx lists which
2502 are initialized in trx_sys_init_at_db_start(). */
2503
2504 recv_recovery_from_checkpoint_finish();
2505
2506 if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
2507 /* The following call is necessary for the insert
2508 buffer to work with multiple tablespaces. We must
2509 know the mapping between space id's and .ibd file
2510 names.
2511
2512 In a crash recovery, we check that the info in data
2513 dictionary is consistent with what we already know
2514 about space id's from the call of
2515 fil_load_single_table_tablespaces().
2516
2517 In a normal startup, we create the space objects for
2518 every table in the InnoDB data dictionary that has
2519 an .ibd file.
2520
2521 We also determine the maximum tablespace id used. */
2522 dict_check_t dict_check;
2523
2524 if (recv_needed_recovery) {
2525 dict_check = DICT_CHECK_ALL_LOADED;
2526 } else if (n_recovered_trx) {
2527 dict_check = DICT_CHECK_SOME_LOADED;
2528 } else {
2529 dict_check = DICT_CHECK_NONE_LOADED;
2530 }
2531
2532 dict_check_tablespaces_and_store_max_id(dict_check);
2533 }
2534
2535 if (!srv_force_recovery
2536 && !recv_sys->found_corrupt_log
2537 && (srv_log_file_size_requested != srv_log_file_size
2538 || srv_n_log_files_found != srv_n_log_files)) {
2539 /* Prepare to replace the redo log files. */
2540
2541 if (srv_read_only_mode) {
2542 ib_logf(IB_LOG_LEVEL_ERROR,
2543 "Cannot resize log files "
2544 "in read-only mode.");
2545 return(DB_READ_ONLY);
2546 }
2547
2548 /* Clean the buffer pool. */
2549 bool success = buf_flush_list(
2550 ULINT_MAX, LSN_MAX, NULL);
2551 ut_a(success);
2552
2553 RECOVERY_CRASH(1);
2554
2555 min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2556
2557 ib_logf(IB_LOG_LEVEL_WARN,
2558 "Resizing redo log from %u*%u to %u*%u pages"
2559 ", LSN=" LSN_PF,
2560 (unsigned) i,
2561 (unsigned) srv_log_file_size,
2562 (unsigned) srv_n_log_files,
2563 (unsigned) srv_log_file_size_requested,
2564 max_flushed_lsn);
2565
2566 buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2567
2568 RECOVERY_CRASH(2);
2569
2570 /* Flush the old log files. */
2571 log_buffer_flush_to_disk();
2572 /* If innodb_flush_method=O_DSYNC,
2573 we need to explicitly flush the log buffers. */
2574 fil_flush(SRV_LOG_SPACE_FIRST_ID);
2575
2576 ut_ad(max_flushed_lsn == log_get_lsn());
2577
2578 /* Prohibit redo log writes from any other
2579 threads until creating a log checkpoint at the
2580 end of create_log_files(). */
2581 ut_d(recv_no_log_write = TRUE);
2582 ut_ad(!buf_pool_check_no_pending_io());
2583
2584 RECOVERY_CRASH(3);
2585
2586 /* Stamp the LSN to the data files. */
2587 fil_write_flushed_lsn_to_data_files(
2588 max_flushed_lsn, 0);
2589
2590 fil_flush_file_spaces(FIL_TABLESPACE);
2591
2592 RECOVERY_CRASH(4);
2593
2594 /* Close and free the redo log files, so that
2595 we can replace them. */
2596 fil_close_log_files(true);
2597
2598 RECOVERY_CRASH(5);
2599
2600 /* Free the old log file space. */
2601 log_group_close_all();
2602
2603 ib_logf(IB_LOG_LEVEL_WARN,
2604 "Starting to delete and rewrite log files.");
2605
2606 srv_log_file_size = srv_log_file_size_requested;
2607
2608 err = create_log_files(create_new_db, logfilename,
2609 dirnamelen, max_flushed_lsn,
2610 logfile0);
2611
2612 if (err != DB_SUCCESS) {
2613 return(err);
2614 }
2615
2616 create_log_files_rename(logfilename, dirnamelen,
2617 max_flushed_lsn, logfile0);
2618 }
2619
2620 srv_startup_is_before_trx_rollback_phase = FALSE;
2621 recv_recovery_rollback_active();
2622
2623 /* It is possible that file_format tag has never
2624 been set. In this case we initialize it to minimum
2625 value. Important to note that we can do it ONLY after
2626 we have finished the recovery process so that the
2627 image of TRX_SYS_PAGE_NO is not stale. */
2628 trx_sys_file_format_tag_init();
2629 }
2630
2631 if (!create_new_db && sum_of_new_sizes > 0) {
2632 /* New data file(s) were added */
2633 mtr_start(&mtr);
2634
2635 fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
2636
2637 mtr_commit(&mtr);
2638
2639 /* Immediately write the log record about increased tablespace
2640 size to disk, so that it is durable even if mysqld would crash
2641 quickly */
2642
2643 log_buffer_flush_to_disk();
2644 }
2645
2646 #ifdef UNIV_LOG_ARCHIVE
2647 /* Archiving is always off under MySQL */
2648 if (!srv_log_archive_on) {
2649 ut_a(DB_SUCCESS == log_archive_noarchivelog());
2650 } else {
2651 mutex_enter(&(log_sys->mutex));
2652
2653 start_archive = FALSE;
2654
2655 if (log_sys->archiving_state == LOG_ARCH_OFF) {
2656 start_archive = TRUE;
2657 }
2658
2659 mutex_exit(&(log_sys->mutex));
2660
2661 if (start_archive) {
2662 ut_a(DB_SUCCESS == log_archive_archivelog());
2663 }
2664 }
2665 #endif /* UNIV_LOG_ARCHIVE */
2666
2667 /* fprintf(stderr, "Max allowed record size %lu\n",
2668 page_get_free_space_of_empty() / 2); */
2669
2670 if (buf_dblwr == NULL) {
2671 /* Create the doublewrite buffer to a new tablespace */
2672
2673 buf_dblwr_create();
2674 }
2675
2676 /* Here the double write buffer has already been created and so
2677 any new rollback segments will be allocated after the double
2678 write buffer. The default segment should already exist.
2679 We create the new segments only if it's a new database or
2680 the database was shutdown cleanly. */
2681
2682 /* Note: When creating the extra rollback segments during an upgrade
2683 we violate the latching order, even if the change buffer is empty.
2684 We make an exception in sync0sync.cc and check srv_is_being_started
2685 for that violation. It cannot create a deadlock because we are still
2686 running in single threaded mode essentially. Only the IO threads
2687 should be running at this stage. */
2688
2689 ut_a(srv_undo_logs > 0);
2690 ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS);
2691
2692 /* The number of rsegs that exist in InnoDB is given by status
2693 variable srv_available_undo_logs. The number of rsegs to use can
2694 be set using the dynamic global variable srv_undo_logs. */
2695
2696 srv_available_undo_logs = trx_sys_create_rsegs(
2697 srv_undo_tablespaces, srv_undo_logs);
2698
2699 if (srv_available_undo_logs == ULINT_UNDEFINED) {
2700 /* Can only happen if server is read only. */
2701 ut_a(srv_read_only_mode);
2702 srv_undo_logs = ULONG_UNDEFINED;
2703 }
2704
2705 if (!srv_read_only_mode) {
2706 /* Create the thread which watches the timeouts
2707 for lock waits */
2708 os_thread_create(
2709 lock_wait_timeout_thread,
2710 NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
2711
2712 /* Create the thread which warns of long semaphore waits */
2713 os_thread_create(
2714 srv_error_monitor_thread,
2715 NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
2716
2717 /* Create the thread which prints InnoDB monitor info */
2718 os_thread_create(
2719 srv_monitor_thread,
2720 NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
2721 }
2722
2723 /* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
2724 err = dict_create_or_check_foreign_constraint_tables();
2725 if (err != DB_SUCCESS) {
2726 return(err);
2727 }
2728
2729 /* Create the SYS_TABLESPACES system table */
2730 err = dict_create_or_check_sys_tablespace();
2731 if (err != DB_SUCCESS) {
2732 return(err);
2733 }
2734
2735 srv_is_being_started = FALSE;
2736
2737 ut_a(trx_purge_state() == PURGE_STATE_INIT);
2738
2739 /* Create the master thread which does purge and other utility
2740 operations */
2741
2742 if (!srv_read_only_mode) {
2743
2744 os_thread_create(
2745 srv_master_thread,
2746 NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
2747 }
2748
2749 if (!srv_read_only_mode
2750 && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
2751
2752 os_thread_create(
2753 srv_purge_coordinator_thread,
2754 NULL, thread_ids + 5 + SRV_MAX_N_IO_THREADS);
2755
2756 ut_a(UT_ARR_SIZE(thread_ids)
2757 > 5 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS);
2758
2759 /* We've already created the purge coordinator thread above. */
2760 for (i = 1; i < srv_n_purge_threads; ++i) {
2761 os_thread_create(
2762 srv_worker_thread, NULL,
2763 thread_ids + 5 + i + SRV_MAX_N_IO_THREADS);
2764 }
2765
2766 srv_start_wait_for_purge_to_start();
2767
2768 } else {
2769 purge_sys->state = PURGE_STATE_DISABLED;
2770 }
2771
2772 if (!srv_read_only_mode) {
2773 os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
2774 }
2775
2776 #ifdef UNIV_DEBUG
2777 /* buf_debug_prints = TRUE; */
2778 #endif /* UNIV_DEBUG */
2779 sum_of_data_file_sizes = 0;
2780
2781 for (i = 0; i < srv_n_data_files; i++) {
2782 sum_of_data_file_sizes += srv_data_file_sizes[i];
2783 }
2784
2785 tablespace_size_in_header = fsp_header_get_tablespace_size();
2786
2787 if (!srv_read_only_mode
2788 && !srv_auto_extend_last_data_file
2789 && sum_of_data_file_sizes != tablespace_size_in_header) {
2790
2791 ut_print_timestamp(stderr);
2792 fprintf(stderr,
2793 " InnoDB: Error: tablespace size"
2794 " stored in header is %lu pages, but\n",
2795 (ulong) tablespace_size_in_header);
2796 ut_print_timestamp(stderr);
2797 fprintf(stderr,
2798 "InnoDB: the sum of data file sizes is %lu pages\n",
2799 (ulong) sum_of_data_file_sizes);
2800
2801 if (srv_force_recovery == 0
2802 && sum_of_data_file_sizes < tablespace_size_in_header) {
2803 /* This is a fatal error, the tail of a tablespace is
2804 missing */
2805
2806 ut_print_timestamp(stderr);
2807 fprintf(stderr,
2808 " InnoDB: Cannot start InnoDB."
2809 " The tail of the system tablespace is\n");
2810 ut_print_timestamp(stderr);
2811 fprintf(stderr,
2812 " InnoDB: missing. Have you edited"
2813 " innodb_data_file_path in my.cnf in an\n");
2814 ut_print_timestamp(stderr);
2815 fprintf(stderr,
2816 " InnoDB: inappropriate way, removing"
2817 " ibdata files from there?\n");
2818 ut_print_timestamp(stderr);
2819 fprintf(stderr,
2820 " InnoDB: You can set innodb_force_recovery=1"
2821 " in my.cnf to force\n");
2822 ut_print_timestamp(stderr);
2823 fprintf(stderr,
2824 " InnoDB: a startup if you are trying"
2825 " to recover a badly corrupt database.\n");
2826
2827 return(DB_ERROR);
2828 }
2829 }
2830
2831 if (!srv_read_only_mode
2832 && srv_auto_extend_last_data_file
2833 && sum_of_data_file_sizes < tablespace_size_in_header) {
2834
2835 ut_print_timestamp(stderr);
2836 fprintf(stderr,
2837 " InnoDB: Error: tablespace size stored in header"
2838 " is %lu pages, but\n",
2839 (ulong) tablespace_size_in_header);
2840 ut_print_timestamp(stderr);
2841 fprintf(stderr,
2842 " InnoDB: the sum of data file sizes"
2843 " is only %lu pages\n",
2844 (ulong) sum_of_data_file_sizes);
2845
2846 if (srv_force_recovery == 0) {
2847
2848 ut_print_timestamp(stderr);
2849 fprintf(stderr,
2850 " InnoDB: Cannot start InnoDB. The tail of"
2851 " the system tablespace is\n");
2852 ut_print_timestamp(stderr);
2853 fprintf(stderr,
2854 " InnoDB: missing. Have you edited"
2855 " innodb_data_file_path in my.cnf in an\n");
2856 ut_print_timestamp(stderr);
2857 fprintf(stderr,
2858 " InnoDB: inappropriate way, removing"
2859 " ibdata files from there?\n");
2860 ut_print_timestamp(stderr);
2861 fprintf(stderr,
2862 " InnoDB: You can set innodb_force_recovery=1"
2863 " in my.cnf to force\n");
2864 ut_print_timestamp(stderr);
2865 fprintf(stderr,
2866 " InnoDB: a startup if you are trying to"
2867 " recover a badly corrupt database.\n");
2868
2869 return(DB_ERROR);
2870 }
2871 }
2872
2873 /* Check that os_fast_mutexes work as expected */
2874 os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &srv_os_test_mutex);
2875
2876 if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) {
2877 ut_print_timestamp(stderr);
2878 fprintf(stderr,
2879 " InnoDB: Error: pthread_mutex_trylock returns"
2880 " an unexpected value on\n");
2881 ut_print_timestamp(stderr);
2882 fprintf(stderr,
2883 " InnoDB: success! Cannot continue.\n");
2884 exit(1);
2885 }
2886
2887 os_fast_mutex_unlock(&srv_os_test_mutex);
2888
2889 os_fast_mutex_lock(&srv_os_test_mutex);
2890
2891 os_fast_mutex_unlock(&srv_os_test_mutex);
2892
2893 os_fast_mutex_free(&srv_os_test_mutex);
2894
2895 if (srv_print_verbose_log) {
2896 ib_logf(IB_LOG_LEVEL_INFO,
2897 "%s started; log sequence number " LSN_PF "",
2898 INNODB_VERSION_STR, srv_start_lsn);
2899 }
2900
2901 if (srv_force_recovery > 0) {
2902 ib_logf(IB_LOG_LEVEL_INFO,
2903 "!!! innodb_force_recovery is set to %lu !!!",
2904 (ulong) srv_force_recovery);
2905 }
2906
2907 if (srv_force_recovery == 0) {
2908 /* In the insert buffer we may have even bigger tablespace
2909 id's, because we may have dropped those tablespaces, but
2910 insert buffer merge has not had time to clean the records from
2911 the ibuf tree. */
2912
2913 ibuf_update_max_tablespace_id();
2914 }
2915
2916 if (!srv_read_only_mode) {
2917 /* Create the buffer pool dump/load thread */
2918 os_thread_create(buf_dump_thread, NULL, NULL);
2919
2920 /* Create the dict stats gathering thread */
2921 os_thread_create(dict_stats_thread, NULL, NULL);
2922
2923 /* Create the thread that will optimize the FTS sub-system. */
2924 fts_optimize_init();
2925 }
2926
2927 srv_was_started = TRUE;
2928
2929 return(DB_SUCCESS);
2930 }
2931
2932 #if 0
2933 /********************************************************************
2934 Sync all FTS cache before shutdown */
2935 static
2936 void
2937 srv_fts_close(void)
2938 /*===============*/
2939 {
2940 dict_table_t* table;
2941
2942 for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
2943 table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
2944 fts_t* fts = table->fts;
2945
2946 if (fts != NULL) {
2947 fts_sync_table(table);
2948 }
2949 }
2950
2951 for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
2952 table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
2953 fts_t* fts = table->fts;
2954
2955 if (fts != NULL) {
2956 fts_sync_table(table);
2957 }
2958 }
2959 }
2960 #endif
2961
2962 /****************************************************************//**
2963 Shuts down the InnoDB database.
2964 @return DB_SUCCESS or error code */
2965 UNIV_INTERN
2966 dberr_t
innobase_shutdown_for_mysql(void)2967 innobase_shutdown_for_mysql(void)
2968 /*=============================*/
2969 {
2970 ulint i;
2971
2972 if (!srv_was_started) {
2973 if (srv_is_being_started) {
2974 ib_logf(IB_LOG_LEVEL_WARN,
2975 "Shutting down an improperly started, "
2976 "or created database!");
2977 }
2978
2979 return(DB_SUCCESS);
2980 }
2981
2982 if (!srv_read_only_mode) {
2983 /* Shutdown the FTS optimize sub system. */
2984 fts_optimize_start_shutdown();
2985
2986 fts_optimize_end();
2987 }
2988
2989 /* 1. Flush the buffer pool to disk, write the current lsn to
2990 the tablespace header(s), and copy all log data to archive.
2991 The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
2992 just free data structures after the shutdown. */
2993
2994 logs_empty_and_mark_files_at_shutdown();
2995
2996 if (srv_conc_get_active_threads() != 0) {
2997 ib_logf(IB_LOG_LEVEL_WARN,
2998 "Query counter shows %ld queries still "
2999 "inside InnoDB at shutdown",
3000 srv_conc_get_active_threads());
3001 }
3002
3003 /* 2. Make all threads created by InnoDB to exit */
3004
3005 srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
3006
3007 /* All threads end up waiting for certain events. Put those events
3008 to the signaled state. Then the threads will exit themselves after
3009 os_event_wait(). */
3010
3011 for (i = 0; i < 1000; i++) {
3012 /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
3013 HERE OR EARLIER */
3014
3015 if (!srv_read_only_mode) {
3016 /* a. Let the lock timeout thread exit */
3017 os_event_set(lock_sys->timeout_event);
3018
3019 /* b. srv error monitor thread exits automatically,
3020 no need to do anything here */
3021
3022 /* c. We wake the master thread so that it exits */
3023 srv_wake_master_thread();
3024
3025 /* d. Wakeup purge threads. */
3026 srv_purge_wakeup();
3027 }
3028
3029 /* e. Exit the i/o threads */
3030
3031 os_aio_wake_all_threads_at_shutdown();
3032
3033 /* f. dict_stats_thread is signaled from
3034 logs_empty_and_mark_files_at_shutdown() and should have
3035 already quit or is quitting right now. */
3036
3037 os_mutex_enter(os_sync_mutex);
3038
3039 if (os_thread_count == 0) {
3040 /* All the threads have exited or are just exiting;
3041 NOTE that the threads may not have completed their
3042 exit yet. Should we use pthread_join() to make sure
3043 they have exited? If we did, we would have to
3044 remove the pthread_detach() from
3045 os_thread_exit(). Now we just sleep 0.1
3046 seconds and hope that is enough! */
3047
3048 os_mutex_exit(os_sync_mutex);
3049
3050 os_thread_sleep(100000);
3051
3052 break;
3053 }
3054
3055 os_mutex_exit(os_sync_mutex);
3056
3057 os_thread_sleep(100000);
3058 }
3059
3060 if (i == 1000) {
3061 ib_logf(IB_LOG_LEVEL_WARN,
3062 "%lu threads created by InnoDB"
3063 " had not exited at shutdown!",
3064 (ulong) os_thread_count);
3065 }
3066
3067 if (srv_monitor_file) {
3068 fclose(srv_monitor_file);
3069 srv_monitor_file = 0;
3070 if (srv_monitor_file_name) {
3071 unlink(srv_monitor_file_name);
3072 mem_free(srv_monitor_file_name);
3073 }
3074 }
3075
3076 if (srv_dict_tmpfile) {
3077 fclose(srv_dict_tmpfile);
3078 srv_dict_tmpfile = 0;
3079 }
3080
3081 if (srv_misc_tmpfile) {
3082 fclose(srv_misc_tmpfile);
3083 srv_misc_tmpfile = 0;
3084 }
3085
3086 if (!srv_read_only_mode) {
3087 dict_stats_thread_deinit();
3088 }
3089
3090 /* This must be disabled before closing the buffer pool
3091 and closing the data dictionary. */
3092 btr_search_disable();
3093
3094 ibuf_close();
3095 log_shutdown();
3096 trx_sys_file_format_close();
3097 trx_sys_close();
3098 lock_sys_close();
3099
3100 /* We don't create these mutexes in RO mode because we don't create
3101 the temp files that the cover. */
3102 if (!srv_read_only_mode) {
3103 mutex_free(&srv_monitor_file_mutex);
3104 mutex_free(&srv_dict_tmpfile_mutex);
3105 mutex_free(&srv_misc_tmpfile_mutex);
3106 }
3107
3108 dict_close();
3109 btr_search_sys_free();
3110
3111 /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
3112 them */
3113 os_aio_free();
3114 que_close();
3115 row_mysql_close();
3116 srv_mon_free();
3117 sync_close();
3118 srv_free();
3119 fil_close();
3120
3121 /* 4. Free the os_conc_mutex and all os_events and os_mutexes */
3122
3123 os_sync_free();
3124
3125 /* 5. Free all allocated memory */
3126
3127 pars_lexer_close();
3128 log_mem_free();
3129 buf_pool_free(srv_buf_pool_instances);
3130 mem_close();
3131
3132 /* ut_free_all_mem() frees all allocated memory not freed yet
3133 in shutdown, and it will also free the ut_list_mutex, so it
3134 should be the last one for all operation */
3135 ut_free_all_mem();
3136
3137 if (os_thread_count != 0
3138 || os_event_count != 0
3139 || os_mutex_count != 0
3140 || os_fast_mutex_count != 0) {
3141 ib_logf(IB_LOG_LEVEL_WARN,
3142 "Some resources were not cleaned up in shutdown: "
3143 "threads %lu, events %lu, os_mutexes %lu, "
3144 "os_fast_mutexes %lu",
3145 (ulong) os_thread_count, (ulong) os_event_count,
3146 (ulong) os_mutex_count, (ulong) os_fast_mutex_count);
3147 }
3148
3149 if (dict_foreign_err_file) {
3150 fclose(dict_foreign_err_file);
3151 }
3152
3153 if (srv_print_verbose_log) {
3154 ib_logf(IB_LOG_LEVEL_INFO,
3155 "Shutdown completed; log sequence number " LSN_PF "",
3156 srv_shutdown_lsn);
3157 }
3158
3159 srv_was_started = FALSE;
3160 srv_start_has_been_called = FALSE;
3161
3162 return(DB_SUCCESS);
3163 }
3164 #endif /* !UNIV_HOTBACKUP */
3165
3166
3167 /********************************************************************
3168 Signal all per-table background threads to shutdown, and wait for them to do
3169 so. */
3170 UNIV_INTERN
3171 void
srv_shutdown_table_bg_threads(void)3172 srv_shutdown_table_bg_threads(void)
3173 /*===============================*/
3174 {
3175 dict_table_t* table;
3176 dict_table_t* first;
3177 dict_table_t* last = NULL;
3178
3179 mutex_enter(&dict_sys->mutex);
3180
3181 /* Signal all threads that they should stop. */
3182 table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3183 first = table;
3184 while (table) {
3185 dict_table_t* next;
3186 fts_t* fts = table->fts;
3187
3188 if (fts != NULL) {
3189 fts_start_shutdown(table, fts);
3190 }
3191
3192 next = UT_LIST_GET_NEXT(table_LRU, table);
3193
3194 if (!next) {
3195 last = table;
3196 }
3197
3198 table = next;
3199 }
3200
3201 /* We must release dict_sys->mutex here; if we hold on to it in the
3202 loop below, we will deadlock if any of the background threads try to
3203 acquire it (for example, the FTS thread by calling que_eval_sql).
3204
3205 Releasing it here and going through dict_sys->table_LRU without
3206 holding it is safe because:
3207
3208 a) MySQL only starts the shutdown procedure after all client
3209 threads have been disconnected and no new ones are accepted, so no
3210 new tables are added or old ones dropped.
3211
3212 b) Despite its name, the list is not LRU, and the order stays
3213 fixed.
3214
3215 To safeguard against the above assumptions ever changing, we store
3216 the first and last items in the list above, and then check that
3217 they've stayed the same below. */
3218
3219 mutex_exit(&dict_sys->mutex);
3220
3221 /* Wait for the threads of each table to stop. This is not inside
3222 the above loop, because by signaling all the threads first we can
3223 overlap their shutting down delays. */
3224 table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3225 ut_a(first == table);
3226 while (table) {
3227 dict_table_t* next;
3228 fts_t* fts = table->fts;
3229
3230 if (fts != NULL) {
3231 fts_shutdown(table, fts);
3232 }
3233
3234 next = UT_LIST_GET_NEXT(table_LRU, table);
3235
3236 if (table == last) {
3237 ut_a(!next);
3238 }
3239
3240 table = next;
3241 }
3242 }
3243
3244 /*****************************************************************//**
3245 Get the meta-data filename from the table name. */
3246 UNIV_INTERN
3247 void
srv_get_meta_data_filename(dict_table_t * table,char * filename,ulint max_len)3248 srv_get_meta_data_filename(
3249 /*=======================*/
3250 dict_table_t* table, /*!< in: table */
3251 char* filename, /*!< out: filename */
3252 ulint max_len) /*!< in: filename max length */
3253 {
3254 ulint len;
3255 char* path;
3256 char* suffix;
3257 static const ulint suffix_len = strlen(".cfg");
3258
3259 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
3260 dict_get_and_save_data_dir_path(table, false);
3261 ut_a(table->data_dir_path);
3262
3263 path = os_file_make_remote_pathname(
3264 table->data_dir_path, table->name, "cfg");
3265 } else {
3266 path = fil_make_ibd_name(table->name, false);
3267 }
3268
3269 ut_a(path);
3270 len = ut_strlen(path);
3271 ut_a(max_len >= len);
3272
3273 suffix = path + (len - suffix_len);
3274 if (strncmp(suffix, ".cfg", suffix_len) == 0) {
3275 strcpy(filename, path);
3276 } else {
3277 ut_ad(strncmp(suffix, ".ibd", suffix_len) == 0);
3278
3279 strncpy(filename, path, len - suffix_len);
3280 suffix = filename + (len - suffix_len);
3281 strcpy(suffix, ".cfg");
3282 }
3283
3284 mem_free(path);
3285
3286 srv_normalize_path_for_win(filename);
3287 }
3288