1 /*****************************************************************************
2
3 Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved.
4 Copyright (c) 2008, Google Inc.
5 Copyright (c) 2009, Percona Inc.
6
7 Portions of this file contain modifications contributed and copyrighted by
8 Google, Inc. Those modifications are gratefully acknowledged and are described
9 briefly in the InnoDB documentation. The contributions by Google are
10 incorporated with their permission, and subject to the conditions contained in
11 the file COPYING.Google.
12
13 Portions of this file contain modifications contributed and copyrighted
14 by Percona Inc.. Those modifications are
15 gratefully acknowledged and are described briefly in the InnoDB
16 documentation. The contributions by Percona Inc. are incorporated with
17 their permission, and subject to the conditions contained in the file
18 COPYING.Percona.
19
20 This program is free software; you can redistribute it and/or modify
21 it under the terms of the GNU General Public License, version 2.0,
22 as published by the Free Software Foundation.
23
24 This program is also distributed with certain software (including
25 but not limited to OpenSSL) that is licensed under separate terms,
26 as designated in a particular file or component or in included license
27 documentation. The authors of MySQL hereby grant you an additional
28 permission to link the program and your derivative works with the
29 separately licensed software that they have included with MySQL.
30
31 This program is distributed in the hope that it will be useful,
32 but WITHOUT ANY WARRANTY; without even the implied warranty of
33 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34 GNU General Public License, version 2.0, for more details.
35
36 You should have received a copy of the GNU General Public License along with
37 this program; if not, write to the Free Software Foundation, Inc.,
38 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
39
40 *****************************************************************************/
41
42 /********************************************************************//**
43 @file srv/srv0start.cc
44 Starts the InnoDB database server
45
46 Created 2/16/1996 Heikki Tuuri
47 *************************************************************************/
48
49 #include "mysqld.h"
50 #include "pars0pars.h"
51 #include "row0ftsort.h"
52 #include "ut0mem.h"
53 #include "mem0mem.h"
54 #include "data0data.h"
55 #include "data0type.h"
56 #include "dict0dict.h"
57 #include "buf0buf.h"
58 #include "buf0dump.h"
59 #include "os0file.h"
60 #include "os0thread.h"
61 #include "fil0fil.h"
62 #include "fsp0fsp.h"
63 #include "rem0rec.h"
64 #include "mtr0mtr.h"
65 #include "log0log.h"
66 #include "log0online.h"
67 #include "log0recv.h"
68 #include "page0page.h"
69 #include "page0cur.h"
70 #include "trx0trx.h"
71 #include "trx0sys.h"
72 #include "btr0btr.h"
73 #include "btr0cur.h"
74 #include "rem0rec.h"
75 #include "ibuf0ibuf.h"
76 #include "srv0start.h"
77 #include "srv0srv.h"
78 #ifndef UNIV_HOTBACKUP
79 # include "trx0rseg.h"
80 # include "os0proc.h"
81 # include "sync0sync.h"
82 # include "buf0flu.h"
83 # include "buf0rea.h"
84 # include "dict0boot.h"
85 # include "dict0load.h"
86 # include "dict0stats_bg.h"
87 # include "que0que.h"
88 # include "usr0sess.h"
89 # include "lock0lock.h"
90 # include "trx0roll.h"
91 # include "trx0purge.h"
92 # include "lock0lock.h"
93 # include "pars0pars.h"
94 # include "btr0sea.h"
95 # include "rem0cmp.h"
96 # include "dict0crea.h"
97 # include "row0ins.h"
98 # include "row0sel.h"
99 # include "row0upd.h"
100 # include "row0row.h"
101 # include "row0mysql.h"
102 # include "btr0pcur.h"
103 # include "os0sync.h"
104 # include "zlib.h"
105 # include "ut0crc32.h"
106
107 /** Log sequence number immediately after startup */
108 UNIV_INTERN lsn_t srv_start_lsn;
109 /** Log sequence number at shutdown */
110 UNIV_INTERN lsn_t srv_shutdown_lsn;
111
112 #ifdef HAVE_DARWIN_THREADS
113 # include <sys/utsname.h>
114 /** TRUE if the F_FULLFSYNC option is available */
115 UNIV_INTERN ibool srv_have_fullfsync = FALSE;
116 #endif
117
118 /** TRUE if a raw partition is in use */
119 UNIV_INTERN ibool srv_start_raw_disk_in_use = FALSE;
120
121 /** UNDO tablespaces starts with space id. */
122 ulint srv_undo_space_id_start;
123
124 /** TRUE if the server is being started, before rolling back any
125 incomplete transactions */
126 UNIV_INTERN ibool srv_startup_is_before_trx_rollback_phase = FALSE;
127 /** TRUE if the server is being started */
128 UNIV_INTERN ibool srv_is_being_started = FALSE;
129 /** TRUE if the server was successfully started */
130 UNIV_INTERN ibool srv_was_started = FALSE;
131 /** TRUE if innobase_start_or_create_for_mysql() has been called */
132 static ibool srv_start_has_been_called = FALSE;
133
134 /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
135 SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
136 UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE;
137
138 /** Files comprising the system tablespace */
139 static pfs_os_file_t files[1000];
140
141 /** io_handler_thread parameters for thread identification */
142 static ulint n[SRV_MAX_N_IO_THREADS];
143 /** io_handler_thread identifiers, 32 is the maximum number of purge threads.
144 The extra elements at the end are allocated as follows:
145 SRV_MAX_N_IO_THREADS + 1: srv_master_thread
146 SRV_MAX_N_IO_THREADS + 2: lock_wait_timeout_thread
147 SRV_MAX_N_IO_THREADS + 3: srv_error_monitor_thread
148 SRV_MAX_N_IO_THREADS + 4: srv_monitor_thread
149 SRV_MAX_N_IO_THREADS + 5: srv_redo_log_follow_thread
150 SRV_MAX_N_IO_THREADS + 6: srv_purge_coordinator_thread
151 SRV_MAX_N_IO_THREADS + 7: srv_worker_thread
152 ...
153 SRV_MAX_N_IO_THREADS + 7 + srv_n_purge_threads - 1: srv_worker_thread */
154 static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7
155 + SRV_MAX_N_PURGE_THREADS];
156
157 /** We use this mutex to test the return value of pthread_mutex_trylock
158 on successful locking. HP-UX does NOT return 0, though Linux et al do. */
159 static os_fast_mutex_t srv_os_test_mutex;
160
161 /** Name of srv_monitor_file */
162 static char* srv_monitor_file_name;
163 #endif /* !UNIV_HOTBACKUP */
164
165 /** Default undo tablespace size in UNIV_PAGEs count (10MB). */
166 static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
167 ((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
168
169 /** */
170 #define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD
171 #define SRV_MAX_N_PENDING_SYNC_IOS 100
172
173 /** The round off to MB is similar as done in srv_parse_megabytes() */
174 #define CALC_NUMBER_OF_PAGES(size) ((size) / (1024 * 1024)) * \
175 ((1024 * 1024) / (UNIV_PAGE_SIZE))
176 #ifdef UNIV_PFS_THREAD
177 /* Keys to register InnoDB threads with performance schema */
178 UNIV_INTERN mysql_pfs_key_t io_handler_thread_key;
179 UNIV_INTERN mysql_pfs_key_t srv_lock_timeout_thread_key;
180 UNIV_INTERN mysql_pfs_key_t srv_error_monitor_thread_key;
181 UNIV_INTERN mysql_pfs_key_t srv_monitor_thread_key;
182 UNIV_INTERN mysql_pfs_key_t srv_master_thread_key;
183 UNIV_INTERN mysql_pfs_key_t srv_purge_thread_key;
184 UNIV_INTERN mysql_pfs_key_t srv_log_tracking_thread_key;
185 #endif /* UNIV_PFS_THREAD */
186
187 /*********************************************************************//**
188 Convert a numeric string that optionally ends in G or M or K, to a number
189 containing megabytes.
190 @return next character in string */
191 static
192 char*
srv_parse_megabytes(char * str,ulint * megs)193 srv_parse_megabytes(
194 /*================*/
195 char* str, /*!< in: string containing a quantity in bytes */
196 ulint* megs) /*!< out: the number in megabytes */
197 {
198 char* endp;
199 ulint size;
200
201 size = strtoul(str, &endp, 10);
202
203 str = endp;
204
205 switch (*str) {
206 case 'G': case 'g':
207 size *= 1024;
208 /* fall through */
209 case 'M': case 'm':
210 str++;
211 break;
212 case 'K': case 'k':
213 size /= 1024;
214 str++;
215 break;
216 default:
217 size /= 1024 * 1024;
218 break;
219 }
220
221 *megs = size;
222 return(str);
223 }
224
225 /*********************************************************************//**
226 Check if a file can be opened in read-write mode.
227 @return true if it doesn't exist or can be opened in rw mode. */
228 static
229 bool
srv_file_check_mode(const char * name)230 srv_file_check_mode(
231 /*================*/
232 const char* name) /*!< in: filename to check */
233 {
234 os_file_stat_t stat;
235
236 memset(&stat, 0x0, sizeof(stat));
237
238 dberr_t err = os_file_get_status(name, &stat, true);
239
240 if (err == DB_FAIL) {
241
242 ib_logf(IB_LOG_LEVEL_ERROR,
243 "os_file_get_status() failed on '%s'. Can't determine "
244 "file permissions", name);
245
246 return(false);
247
248 } else if (err == DB_SUCCESS) {
249
250 /* Note: stat.rw_perm is only valid of files */
251
252 if (stat.type == OS_FILE_TYPE_FILE) {
253
254 if (!stat.rw_perm) {
255
256 ib_logf(IB_LOG_LEVEL_ERROR,
257 "%s can't be opened in %s mode",
258 name,
259 srv_read_only_mode
260 ? "read" : "read-write");
261
262 return(false);
263 }
264 } else {
265 /* Not a regular file, bail out. */
266
267 ib_logf(IB_LOG_LEVEL_ERROR,
268 "'%s' not a regular file.", name);
269
270 return(false);
271 }
272 } else {
273
274 /* This is OK. If the file create fails on RO media, there
275 is nothing we can do. */
276
277 ut_a(err == DB_NOT_FOUND);
278 }
279
280 return(true);
281 }
282
283 /*********************************************************************//**
284 Reads the data files and their sizes from a character string given in
285 the .cnf file.
286 @return TRUE if ok, FALSE on parse error */
287 UNIV_INTERN
288 ibool
srv_parse_data_file_paths_and_sizes(char * str)289 srv_parse_data_file_paths_and_sizes(
290 /*================================*/
291 char* str) /*!< in/out: the data file path string */
292 {
293 char* input_str;
294 char* path;
295 ulint size;
296 ulint i = 0;
297
298 srv_auto_extend_last_data_file = FALSE;
299 srv_last_file_size_max = 0;
300 srv_data_file_names = NULL;
301 srv_data_file_sizes = NULL;
302 srv_data_file_is_raw_partition = NULL;
303
304 input_str = str;
305
306 /* First calculate the number of data files and check syntax:
307 path:size[M | G];path:size[M | G]... . Note that a Windows path may
308 contain a drive name and a ':'. */
309
310 while (*str != '\0') {
311 path = str;
312
313 while ((*str != ':' && *str != '\0')
314 || (*str == ':'
315 && (*(str + 1) == '\\' || *(str + 1) == '/'
316 || *(str + 1) == ':'))) {
317 str++;
318 }
319
320 if (*str == '\0') {
321 return(FALSE);
322 }
323
324 str++;
325
326 str = srv_parse_megabytes(str, &size);
327
328 if (0 == strncmp(str, ":autoextend",
329 (sizeof ":autoextend") - 1)) {
330
331 str += (sizeof ":autoextend") - 1;
332
333 if (0 == strncmp(str, ":max:",
334 (sizeof ":max:") - 1)) {
335
336 str += (sizeof ":max:") - 1;
337
338 str = srv_parse_megabytes(str, &size);
339 }
340
341 if (*str != '\0') {
342
343 return(FALSE);
344 }
345 }
346
347 if (strlen(str) >= 6
348 && *str == 'n'
349 && *(str + 1) == 'e'
350 && *(str + 2) == 'w') {
351 str += 3;
352 }
353
354 if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
355 str += 3;
356 }
357
358 if (size == 0) {
359 return(FALSE);
360 }
361
362 i++;
363
364 if (*str == ';') {
365 str++;
366 } else if (*str != '\0') {
367
368 return(FALSE);
369 }
370 }
371
372 if (i == 0) {
373 /* If innodb_data_file_path was defined it must contain
374 at least one data file definition */
375
376 return(FALSE);
377 }
378
379 srv_data_file_names = static_cast<char**>(
380 malloc(i * sizeof *srv_data_file_names));
381
382 srv_data_file_sizes = static_cast<ulint*>(
383 malloc(i * sizeof *srv_data_file_sizes));
384
385 srv_data_file_is_raw_partition = static_cast<ulint*>(
386 malloc(i * sizeof *srv_data_file_is_raw_partition));
387
388 srv_n_data_files = i;
389
390 /* Then store the actual values to our arrays */
391
392 str = input_str;
393 i = 0;
394
395 while (*str != '\0') {
396 path = str;
397
398 /* Note that we must step over the ':' in a Windows path;
399 a Windows path normally looks like C:\ibdata\ibdata1:1G, but
400 a Windows raw partition may have a specification like
401 \\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
402
403 while ((*str != ':' && *str != '\0')
404 || (*str == ':'
405 && (*(str + 1) == '\\' || *(str + 1) == '/'
406 || *(str + 1) == ':'))) {
407 str++;
408 }
409
410 if (*str == ':') {
411 /* Make path a null-terminated string */
412 *str = '\0';
413 str++;
414 }
415
416 str = srv_parse_megabytes(str, &size);
417
418 srv_data_file_names[i] = path;
419 srv_data_file_sizes[i] = size;
420
421 if (0 == strncmp(str, ":autoextend",
422 (sizeof ":autoextend") - 1)) {
423
424 srv_auto_extend_last_data_file = TRUE;
425
426 str += (sizeof ":autoextend") - 1;
427
428 if (0 == strncmp(str, ":max:",
429 (sizeof ":max:") - 1)) {
430
431 str += (sizeof ":max:") - 1;
432
433 str = srv_parse_megabytes(
434 str, &srv_last_file_size_max);
435 }
436
437 if (*str != '\0') {
438
439 return(FALSE);
440 }
441 }
442
443 (srv_data_file_is_raw_partition)[i] = 0;
444
445 if (strlen(str) >= 6
446 && *str == 'n'
447 && *(str + 1) == 'e'
448 && *(str + 2) == 'w') {
449 str += 3;
450 /* Initialize new raw device only during bootstrap */
451 (srv_data_file_is_raw_partition)[i] =
452 opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
453 }
454
455 if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
456 str += 3;
457
458 /* Initialize new raw device only during bootstrap */
459 if ((srv_data_file_is_raw_partition)[i] == 0) {
460 (srv_data_file_is_raw_partition)[i] =
461 opt_bootstrap ? SRV_NEW_RAW : SRV_OLD_RAW;
462 }
463 }
464
465 i++;
466
467 if (*str == ';') {
468 str++;
469 }
470 }
471
472 return(TRUE);
473 }
474
475 /*********************************************************************//**
476 Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
477 and srv_parse_log_group_home_dirs(). */
478 UNIV_INTERN
479 void
srv_free_paths_and_sizes(void)480 srv_free_paths_and_sizes(void)
481 /*==========================*/
482 {
483 free(srv_data_file_names);
484 srv_data_file_names = NULL;
485 free(srv_data_file_sizes);
486 srv_data_file_sizes = NULL;
487 free(srv_data_file_is_raw_partition);
488 srv_data_file_is_raw_partition = NULL;
489 }
490
491 #ifndef UNIV_HOTBACKUP
492
493 static ulint io_tid_i = 0;
494
495 /********************************************************************//**
496 I/o-handler thread function.
497 @return OS_THREAD_DUMMY_RETURN */
498 extern "C" UNIV_INTERN
499 os_thread_ret_t
DECLARE_THREAD(io_handler_thread)500 DECLARE_THREAD(io_handler_thread)(
501 /*==============================*/
502 void* arg) /*!< in: pointer to the number of the segment in
503 the aio array */
504 {
505 ulint segment;
506 ulint tid_i = os_atomic_increment_ulint(&io_tid_i, 1) - 1;
507
508 ut_ad(tid_i < srv_n_file_io_threads);
509
510 segment = *((ulint*) arg);
511
512 srv_io_tids[tid_i] = os_thread_get_tid();
513 os_thread_set_priority(srv_io_tids[tid_i], srv_sched_priority_io);
514
515 #ifdef UNIV_DEBUG_THREAD_CREATION
516 fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment,
517 os_thread_pf(os_thread_get_curr_id()));
518 #endif
519
520 #ifdef UNIV_PFS_THREAD
521 pfs_register_thread(io_handler_thread_key);
522 #endif /* UNIV_PFS_THREAD */
523
524 while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
525 srv_current_thread_priority = srv_io_thread_priority;
526 fil_aio_wait(segment);
527 }
528
529 /* We count the number of threads in os_thread_exit(). A created
530 thread should always use that to exit and not use return() to exit.
531 The thread actually never comes here because it is exited in an
532 os_event_wait(). */
533
534 os_thread_exit(NULL);
535
536 OS_THREAD_DUMMY_RETURN;
537 }
538 #endif /* !UNIV_HOTBACKUP */
539
540 /*********************************************************************//**
541 Normalizes a directory path for Windows: converts slashes to backslashes. */
542 UNIV_INTERN
543 void
srv_normalize_path_for_win(char * str MY_ATTRIBUTE ((unused)))544 srv_normalize_path_for_win(
545 /*=======================*/
546 char* str MY_ATTRIBUTE((unused))) /*!< in/out: null-terminated
547 character string */
548 {
549 #ifdef __WIN__
550 for (; *str; str++) {
551
552 if (*str == '/') {
553 *str = '\\';
554 }
555 }
556 #endif
557 }
558
559 #ifndef UNIV_HOTBACKUP
560 /*********************************************************************//**
561 Creates a log file.
562 @return DB_SUCCESS or error code */
563 static MY_ATTRIBUTE((nonnull, warn_unused_result))
564 dberr_t
create_log_file(pfs_os_file_t * file,const char * name)565 create_log_file(
566 /*============*/
567 pfs_os_file_t* file, /*!< out: file handle */
568 const char* name) /*!< in: log file name */
569 {
570 ibool ret;
571
572 *file = os_file_create(
573 innodb_file_log_key, name,
574 OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
575 OS_LOG_FILE, &ret);
576
577 if (!ret) {
578 ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
579 return(DB_ERROR);
580 }
581
582 ib_logf(IB_LOG_LEVEL_INFO,
583 "Setting log file %s size to %lu MB",
584 name, (ulong) srv_log_file_size
585 >> (20 - UNIV_PAGE_SIZE_SHIFT));
586
587 ret = os_file_set_size(name, *file,
588 (os_offset_t) srv_log_file_size
589 << UNIV_PAGE_SIZE_SHIFT);
590 if (!ret) {
591 ib_logf(IB_LOG_LEVEL_ERROR, "Cannot set log file"
592 " %s to size %lu MB", name, (ulong) srv_log_file_size
593 >> (20 - UNIV_PAGE_SIZE_SHIFT));
594 return(DB_ERROR);
595 }
596
597 ret = os_file_close(*file);
598 ut_a(ret);
599
600 return(DB_SUCCESS);
601 }
602
603 /** Initial number of the first redo log file */
604 #define INIT_LOG_FILE0 (SRV_N_LOG_FILES_MAX + 1)
605
606 #ifdef DBUG_OFF
607 # define RECOVERY_CRASH(x) do {} while(0)
608 #else
609 # define RECOVERY_CRASH(x) do { \
610 if (srv_force_recovery_crash == x) { \
611 fprintf(stderr, "innodb_force_recovery_crash=%lu\n", \
612 srv_force_recovery_crash); \
613 fflush(stderr); \
614 exit(3); \
615 } \
616 } while (0)
617 #endif
618
619 /*********************************************************************//**
620 Creates all log files.
621 @return DB_SUCCESS or error code */
622 static
623 dberr_t
create_log_files(bool create_new_db,char * logfilename,size_t dirnamelen,lsn_t lsn,char * & logfile0)624 create_log_files(
625 /*=============*/
626 bool create_new_db, /*!< in: TRUE if new database is being
627 created */
628 char* logfilename, /*!< in/out: buffer for log file name */
629 size_t dirnamelen, /*!< in: length of the directory path */
630 lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
631 char*& logfile0) /*!< out: name of the first log file */
632 {
633 if (srv_read_only_mode) {
634 ib_logf(IB_LOG_LEVEL_ERROR,
635 "Cannot create log files in read-only mode");
636 return(DB_READ_ONLY);
637 }
638
639 /* We prevent system tablespace creation with existing files in
640 data directory. So we do not delete log files when creating new system
641 tablespace */
642 if (!create_new_db) {
643 /* Remove any old log files. */
644 for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
645 sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
646
647 /* Ignore errors about non-existent files or files
648 that cannot be removed. The create_log_file() will
649 return an error when the file exists. */
650 #ifdef __WIN__
651 DeleteFile((LPCTSTR) logfilename);
652 #else
653 unlink(logfilename);
654 #endif
655 /* Crashing after deleting the first
656 file should be recoverable. The buffer
657 pool was clean, and we can simply create
658 all log files from the scratch. */
659 RECOVERY_CRASH(6);
660 }
661 }
662
663 ut_ad(!buf_pool_check_no_pending_io());
664
665 RECOVERY_CRASH(7);
666
667 for (unsigned i = 0; i < srv_n_log_files; i++) {
668 sprintf(logfilename + dirnamelen,
669 "ib_logfile%u", i ? i : INIT_LOG_FILE0);
670
671 dberr_t err = create_log_file(&files[i], logfilename);
672
673 if (err != DB_SUCCESS) {
674 return(err);
675 }
676 }
677
678 RECOVERY_CRASH(8);
679
680 /* We did not create the first log file initially as
681 ib_logfile0, so that crash recovery cannot find it until it
682 has been completed and renamed. */
683 sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
684
685 fil_space_create(
686 logfilename, SRV_LOG_SPACE_FIRST_ID,
687 fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
688 FIL_LOG);
689 ut_a(fil_validate());
690
691 logfile0 = fil_node_create(
692 logfilename, (ulint) srv_log_file_size,
693 SRV_LOG_SPACE_FIRST_ID, FALSE);
694 ut_a(logfile0);
695
696 for (unsigned i = 1; i < srv_n_log_files; i++) {
697 sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
698
699 if (!fil_node_create(
700 logfilename,
701 (ulint) srv_log_file_size,
702 SRV_LOG_SPACE_FIRST_ID, FALSE)) {
703 ut_error;
704 }
705 }
706
707 #ifdef UNIV_LOG_ARCHIVE
708 /* Create the file space object for archived logs. */
709 fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
710 0, FIL_LOG);
711 #endif
712 log_group_init(0, srv_n_log_files,
713 srv_log_file_size * UNIV_PAGE_SIZE,
714 SRV_LOG_SPACE_FIRST_ID,
715 SRV_LOG_SPACE_FIRST_ID + 1);
716
717 fil_open_log_and_system_tablespace_files();
718
719 /* Create a log checkpoint. */
720 mutex_enter(&log_sys->mutex);
721 ut_d(recv_no_log_write = FALSE);
722 recv_reset_logs(
723 #ifdef UNIV_LOG_ARCHIVE
724 UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no,
725 TRUE,
726 #endif
727 lsn);
728 mutex_exit(&log_sys->mutex);
729
730 return(DB_SUCCESS);
731 }
732
733 /*********************************************************************//**
734 Renames the first log file. */
735 static
736 void
create_log_files_rename(char * logfilename,size_t dirnamelen,lsn_t lsn,char * logfile0)737 create_log_files_rename(
738 /*====================*/
739 char* logfilename, /*!< in/out: buffer for log file name */
740 size_t dirnamelen, /*!< in: length of the directory path */
741 lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
742 char* logfile0) /*!< in/out: name of the first log file */
743 {
744 /* If innodb_flush_method=O_DSYNC,
745 we need to explicitly flush the log buffers. */
746 fil_flush(SRV_LOG_SPACE_FIRST_ID);
747 /* Close the log files, so that we can rename
748 the first one. */
749 fil_close_log_files(false);
750
751 /* Rename the first log file, now that a log
752 checkpoint has been created. */
753 sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
754
755 RECOVERY_CRASH(9);
756
757 ib_logf(IB_LOG_LEVEL_INFO,
758 "Renaming log file %s to %s", logfile0, logfilename);
759
760 mutex_enter(&log_sys->mutex);
761 ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
762 ibool success = os_file_rename(
763 innodb_file_log_key, logfile0, logfilename);
764 ut_a(success);
765
766 RECOVERY_CRASH(10);
767
768 /* Replace the first file with ib_logfile0. */
769 strcpy(logfile0, logfilename);
770 mutex_exit(&log_sys->mutex);
771
772 fil_open_log_and_system_tablespace_files();
773
774 ib_logf(IB_LOG_LEVEL_WARN, "New log files created, LSN=" LSN_PF, lsn);
775 }
776
777 /*********************************************************************//**
778 Opens a log file.
779 @return DB_SUCCESS or error code */
780 static MY_ATTRIBUTE((nonnull, warn_unused_result))
781 dberr_t
open_log_file(pfs_os_file_t * file,const char * name,os_offset_t * size)782 open_log_file(
783 /*==========*/
784 pfs_os_file_t* file, /*!< out: file handle */
785 const char* name, /*!< in: log file name */
786 os_offset_t* size) /*!< out: file size */
787 {
788 ibool ret;
789
790 *file = os_file_create(innodb_file_log_key, name,
791 OS_FILE_OPEN, OS_FILE_AIO,
792 OS_LOG_FILE, &ret);
793 if (!ret) {
794 ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
795 return(DB_ERROR);
796 }
797
798 *size = os_file_get_size(*file);
799
800 ret = os_file_close(*file);
801 ut_a(ret);
802 return(DB_SUCCESS);
803 }
804
805 /*********************************************************************//**
806 Creates or opens database data files and closes them.
807 @return DB_SUCCESS or error code */
808 static MY_ATTRIBUTE((nonnull, warn_unused_result))
809 dberr_t
open_or_create_data_files(ibool * create_new_db,lsn_t * min_arch_log_no,lsn_t * max_arch_log_no,lsn_t * min_flushed_lsn,lsn_t * max_flushed_lsn,ulint * sum_of_new_sizes)810 open_or_create_data_files(
811 /*======================*/
812 ibool* create_new_db, /*!< out: TRUE if new database should be
813 created */
814 #ifdef UNIV_LOG_ARCHIVE
815 lsn_t* min_arch_log_no,/*!< out: min of archived log
816 numbers in data files */
817 lsn_t* max_arch_log_no,/*!< out: max of archived log
818 numbers in data files */
819 #endif /* UNIV_LOG_ARCHIVE */
820 lsn_t* min_flushed_lsn,/*!< out: min of flushed lsn
821 values in data files */
822 lsn_t* max_flushed_lsn,/*!< out: max of flushed lsn
823 values in data files */
824 ulint* sum_of_new_sizes)/*!< out: sum of sizes of the
825 new files added */
826 {
827 ibool ret;
828 ulint i;
829 ibool one_opened = FALSE;
830 ibool one_created = FALSE;
831 os_offset_t size;
832 ulint flags;
833 ulint space;
834 ulint rounded_size_pages;
835 char name[10000];
836
837 if (srv_n_data_files >= 1000) {
838
839 ib_logf(IB_LOG_LEVEL_ERROR,
840 "Can only have < 1000 data files, you have "
841 "defined %lu", (ulong) srv_n_data_files);
842
843 return(DB_ERROR);
844 }
845
846 *sum_of_new_sizes = 0;
847
848 *create_new_db = FALSE;
849
850 srv_normalize_path_for_win(srv_data_home);
851
852 for (i = 0; i < srv_n_data_files; i++) {
853 ulint dirnamelen;
854
855 srv_normalize_path_for_win(srv_data_file_names[i]);
856 dirnamelen = strlen(srv_data_home);
857
858 ut_a(dirnamelen + strlen(srv_data_file_names[i])
859 < (sizeof name) - 1);
860
861 memcpy(name, srv_data_home, dirnamelen);
862
863 /* Add a path separator if needed. */
864 if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
865 name[dirnamelen++] = SRV_PATH_SEPARATOR;
866 }
867
868 strcpy(name + dirnamelen, srv_data_file_names[i]);
869
870 /* Note: It will return true if the file doesn' exist. */
871
872 if (!srv_file_check_mode(name)) {
873
874 return(DB_FAIL);
875
876 } else if (srv_data_file_is_raw_partition[i] == 0) {
877
878 /* First we try to create the file: if it already
879 exists, ret will get value FALSE */
880
881 files[i] = os_file_create(
882 innodb_file_data_key, name, OS_FILE_CREATE,
883 OS_FILE_NORMAL, OS_DATA_FILE, &ret);
884
885 if (srv_read_only_mode) {
886
887 if (ret) {
888 goto size_check;
889 }
890
891 ib_logf(IB_LOG_LEVEL_ERROR,
892 "Opening %s failed!", name);
893
894 return(DB_ERROR);
895
896 } else if (!ret
897 && os_file_get_last_error(false)
898 != OS_FILE_ALREADY_EXISTS
899 #ifdef UNIV_AIX
900 /* AIX 5.1 after security patch ML7 may have
901 errno set to 0 here, which causes our
902 function to return 100; work around that
903 AIX problem */
904 && os_file_get_last_error(false) != 100
905 #endif /* UNIV_AIX */
906 ) {
907 ib_logf(IB_LOG_LEVEL_ERROR,
908 "Creating or opening %s failed!",
909 name);
910
911 return(DB_ERROR);
912 }
913
914 } else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
915
916 ut_a(!srv_read_only_mode);
917
918 /* The partition is opened, not created; then it is
919 written over */
920
921 srv_start_raw_disk_in_use = TRUE;
922 srv_created_new_raw = TRUE;
923
924 files[i] = os_file_create(
925 innodb_file_data_key, name, OS_FILE_OPEN_RAW,
926 OS_FILE_NORMAL, OS_DATA_FILE, &ret);
927
928 if (!ret) {
929 ib_logf(IB_LOG_LEVEL_ERROR,
930 "Error in opening %s", name);
931
932 return(DB_ERROR);
933 }
934
935 const char* check_msg;
936 check_msg = fil_read_first_page(
937 files[i], FALSE, &flags, &space,
938 min_flushed_lsn, max_flushed_lsn);
939
940 /* If first page is valid, don't overwrite DB.
941 It prevents overwriting DB when mysql_install_db
942 starts mysqld multiple times during bootstrap. */
943 if (check_msg == NULL) {
944
945 srv_created_new_raw = FALSE;
946 ret = FALSE;
947 }
948
949 } else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
950 srv_start_raw_disk_in_use = TRUE;
951
952 ret = FALSE;
953 } else {
954 ut_a(0);
955 }
956
957 if (ret == FALSE) {
958 const char* check_msg;
959 /* We open the data file */
960
961 if (one_created) {
962 ib_logf(IB_LOG_LEVEL_ERROR,
963 "Data files can only be added at "
964 "the end of a tablespace, but "
965 "data file %s existed beforehand.",
966 name);
967 return(DB_ERROR);
968 }
969 if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
970 ut_a(!srv_read_only_mode);
971 files[i] = os_file_create(
972 innodb_file_data_key,
973 name, OS_FILE_OPEN_RAW,
974 OS_FILE_NORMAL, OS_DATA_FILE, &ret);
975 } else if (i == 0) {
976 files[i] = os_file_create(
977 innodb_file_data_key,
978 name, OS_FILE_OPEN_RETRY,
979 OS_FILE_NORMAL, OS_DATA_FILE, &ret);
980 } else {
981 files[i] = os_file_create(
982 innodb_file_data_key,
983 name, OS_FILE_OPEN, OS_FILE_NORMAL,
984 OS_DATA_FILE, &ret);
985 }
986
987 if (!ret) {
988
989 os_file_get_last_error(true);
990
991 ib_logf(IB_LOG_LEVEL_ERROR,
992 "Can't open '%s'", name);
993
994 return(DB_ERROR);
995 }
996
997 if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
998
999 goto skip_size_check;
1000 }
1001
1002 size_check:
1003 size = os_file_get_size(files[i]);
1004 ut_a(size != (os_offset_t) -1);
1005
1006 /* Under some error conditions like disk full
1007 narios or file size reaching filesystem
1008 limit the data file could contain an incomplete
1009 extent at the end. When we extend a data file
1010 and if some failure happens, then also the data
1011 file could contain an incomplete extent. So we
1012 need to round the size downward to a megabyte.*/
1013
1014 rounded_size_pages = (ulint) CALC_NUMBER_OF_PAGES(size);
1015
1016 if (i == srv_n_data_files - 1
1017 && srv_auto_extend_last_data_file) {
1018
1019 if (srv_data_file_sizes[i] > rounded_size_pages
1020 || (srv_last_file_size_max > 0
1021 && srv_last_file_size_max
1022 < rounded_size_pages)) {
1023
1024 ib_logf(IB_LOG_LEVEL_ERROR,
1025 "auto-extending "
1026 "data file %s is "
1027 "of a different size "
1028 "%lu pages (rounded "
1029 "down to MB) than specified "
1030 "in the .cnf file: "
1031 "initial %lu pages, "
1032 "max %lu (relevant if "
1033 "non-zero) pages!",
1034 name,
1035 (ulong) rounded_size_pages,
1036 (ulong) srv_data_file_sizes[i],
1037 (ulong)
1038 srv_last_file_size_max);
1039
1040 return(DB_ERROR);
1041 }
1042
1043 srv_data_file_sizes[i] = rounded_size_pages;
1044 }
1045
1046 if (rounded_size_pages != srv_data_file_sizes[i]) {
1047
1048 ib_logf(IB_LOG_LEVEL_ERROR,
1049 "Data file %s is of a different "
1050 "size %lu pages (rounded down to MB) "
1051 "than specified in the .cnf file "
1052 "%lu pages!",
1053 name,
1054 (ulong) rounded_size_pages,
1055 (ulong) srv_data_file_sizes[i]);
1056
1057 return(DB_ERROR);
1058 }
1059 skip_size_check:
1060
1061 /* This is the earliest location where we can load
1062 the double write buffer. */
1063 if (i == 0) {
1064 buf_dblwr_init_or_load_pages(
1065 files[i], srv_data_file_names[i], true);
1066 }
1067
1068 bool retry = true;
1069 check_first_page:
1070 check_msg = fil_read_first_page(
1071 files[i], one_opened, &flags, &space,
1072 min_flushed_lsn, max_flushed_lsn);
1073
1074 if (check_msg) {
1075
1076 if (retry) {
1077 fsp_open_info fsp;
1078 const ulint page_no = 0;
1079
1080 retry = false;
1081 fsp.id = 0;
1082 fsp.filepath = srv_data_file_names[i];
1083 fsp.file = files[i];
1084
1085 if (fil_user_tablespace_restore_page(
1086 &fsp, page_no)) {
1087 goto check_first_page;
1088 }
1089 }
1090
1091 ib_logf(IB_LOG_LEVEL_ERROR,
1092 "%s in data file %s",
1093 check_msg, name);
1094 return(DB_ERROR);
1095 }
1096
1097 /* The first file of the system tablespace must
1098 have space ID = TRX_SYS_SPACE. The FSP_SPACE_ID
1099 field in files greater than ibdata1 are unreliable. */
1100 ut_a(one_opened || space == TRX_SYS_SPACE);
1101
1102 /* Check the flags for the first system tablespace
1103 file only. */
1104 if (!one_opened
1105 && UNIV_PAGE_SIZE
1106 != fsp_flags_get_page_size(flags)) {
1107
1108 ib_logf(IB_LOG_LEVEL_ERROR,
1109 "Data file \"%s\" uses page size %lu,"
1110 "but the start-up parameter "
1111 "is --innodb-page-size=%lu",
1112 name,
1113 fsp_flags_get_page_size(flags),
1114 UNIV_PAGE_SIZE);
1115
1116 return(DB_ERROR);
1117 }
1118
1119 one_opened = TRUE;
1120 } else if (!srv_read_only_mode) {
1121 /* We created the data file and now write it full of
1122 zeros */
1123
1124 one_created = TRUE;
1125
1126 if (i > 0) {
1127 ib_logf(IB_LOG_LEVEL_INFO,
1128 "Data file %s did not"
1129 " exist: new to be created",
1130 name);
1131 } else {
1132 ib_logf(IB_LOG_LEVEL_INFO,
1133 "The first specified "
1134 "data file %s did not exist: "
1135 "a new database to be created!",
1136 name);
1137
1138 *create_new_db = TRUE;
1139 }
1140
1141 ib_logf(IB_LOG_LEVEL_INFO,
1142 "Setting file %s size to %lu MB",
1143 name,
1144 (ulong) (srv_data_file_sizes[i]
1145 >> (20 - UNIV_PAGE_SIZE_SHIFT)));
1146
1147 ib_logf(IB_LOG_LEVEL_INFO,
1148 "Database physically writes the"
1149 " file full: wait...");
1150
1151 ret = os_file_set_size(
1152 name, files[i],
1153 (os_offset_t) srv_data_file_sizes[i]
1154 << UNIV_PAGE_SIZE_SHIFT);
1155
1156 if (!ret) {
1157 ib_logf(IB_LOG_LEVEL_ERROR,
1158 "Error in creating %s: "
1159 "probably out of disk space",
1160 name);
1161
1162 return(DB_ERROR);
1163 }
1164
1165 *sum_of_new_sizes += srv_data_file_sizes[i];
1166 }
1167
1168 ret = os_file_close(files[i]);
1169 ut_a(ret);
1170
1171 if (i == 0) {
1172 flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
1173 fil_space_create(name, 0, flags, FIL_TABLESPACE);
1174 }
1175
1176 ut_a(fil_validate());
1177
1178 if (!fil_node_create(name, srv_data_file_sizes[i], 0,
1179 srv_data_file_is_raw_partition[i] != 0)) {
1180 return(DB_ERROR);
1181 }
1182 }
1183
1184 return(DB_SUCCESS);
1185 }
1186
1187 /*********************************************************************//**
1188 Create undo tablespace.
1189 @return DB_SUCCESS or error code */
1190 static
1191 dberr_t
srv_undo_tablespace_create(const char * name,ulint size)1192 srv_undo_tablespace_create(
1193 /*=======================*/
1194 const char* name, /*!< in: tablespace name */
1195 ulint size) /*!< in: tablespace size in pages */
1196 {
1197 pfs_os_file_t fh;
1198 ibool ret;
1199 dberr_t err = DB_SUCCESS;
1200
1201 os_file_create_subdirs_if_needed(name);
1202
1203 fh = os_file_create(
1204 innodb_file_data_key,
1205 name,
1206 srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
1207 OS_FILE_NORMAL, OS_DATA_FILE, &ret);
1208
1209 if (srv_read_only_mode && ret) {
1210 ib_logf(IB_LOG_LEVEL_INFO,
1211 "%s opened in read-only mode", name);
1212 } else if (ret == FALSE) {
1213 if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
1214 #ifdef UNIV_AIX
1215 /* AIX 5.1 after security patch ML7 may have
1216 errno set to 0 here, which causes our function
1217 to return 100; work around that AIX problem */
1218 && os_file_get_last_error(false) != 100
1219 #endif /* UNIV_AIX */
1220 ) {
1221 ib_logf(IB_LOG_LEVEL_ERROR,
1222 "Can't create UNDO tablespace %s", name);
1223 } else {
1224 ib_logf(IB_LOG_LEVEL_ERROR,
1225 "Creating system tablespace with"
1226 " existing undo tablespaces is not"
1227 " supported. Please delete all undo"
1228 " tablespaces before creating new"
1229 " system tablespace.");
1230 }
1231 err = DB_ERROR;
1232 } else {
1233 ut_a(!srv_read_only_mode);
1234
1235 /* We created the data file and now write it full of zeros */
1236
1237 ib_logf(IB_LOG_LEVEL_INFO,
1238 "Data file %s did not exist: new to be created",
1239 name);
1240
1241 ib_logf(IB_LOG_LEVEL_INFO,
1242 "Setting file %s size to %lu MB",
1243 name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
1244
1245 ib_logf(IB_LOG_LEVEL_INFO,
1246 "Database physically writes the file full: wait...");
1247
1248 ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT);
1249
1250 if (!ret) {
1251 ib_logf(IB_LOG_LEVEL_INFO,
1252 "Error in creating %s: probably out of "
1253 "disk space", name);
1254
1255 err = DB_ERROR;
1256 }
1257
1258 os_file_close(fh);
1259 }
1260
1261 return(err);
1262 }
1263
1264 /*********************************************************************//**
1265 Open an undo tablespace.
1266 @return DB_SUCCESS or error code */
1267 static
1268 dberr_t
srv_undo_tablespace_open(const char * name,ulint space)1269 srv_undo_tablespace_open(
1270 /*=====================*/
1271 const char* name, /*!< in: tablespace name */
1272 ulint space) /*!< in: tablespace id */
1273 {
1274 pfs_os_file_t fh;
1275 dberr_t err = DB_ERROR;
1276 ibool ret;
1277 ulint flags;
1278
1279 if (!srv_file_check_mode(name)) {
1280 ib_logf(IB_LOG_LEVEL_ERROR,
1281 "UNDO tablespaces must be %s!",
1282 srv_read_only_mode ? "writable" : "readable");
1283
1284 return(DB_ERROR);
1285 }
1286
1287 fh = os_file_create(
1288 innodb_file_data_key, name,
1289 OS_FILE_OPEN_RETRY
1290 | OS_FILE_ON_ERROR_NO_EXIT
1291 | OS_FILE_ON_ERROR_SILENT,
1292 OS_FILE_NORMAL,
1293 OS_DATA_FILE,
1294 &ret);
1295
1296 /* If the file open was successful then load the tablespace. */
1297
1298 if (ret) {
1299 os_offset_t size;
1300
1301 size = os_file_get_size(fh);
1302 ut_a(size != (os_offset_t) -1);
1303
1304 ret = os_file_close(fh);
1305 ut_a(ret);
1306
1307 /* Load the tablespace into InnoDB's internal
1308 data structures. */
1309
1310 /* We set the biggest space id to the undo tablespace
1311 because InnoDB hasn't opened any other tablespace apart
1312 from the system tablespace. */
1313
1314 fil_set_max_space_id_if_bigger(space);
1315
1316 /* Set the compressed page size to 0 (non-compressed) */
1317 flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
1318 fil_space_create(name, space, flags, FIL_TABLESPACE);
1319
1320 ut_a(fil_validate());
1321
1322 os_offset_t n_pages = size / UNIV_PAGE_SIZE;
1323
1324 /* On 64 bit Windows ulint can be 32 bit and os_offset_t
1325 is 64 bit. It is OK to cast the n_pages to ulint because
1326 the unit has been scaled to pages and they are always
1327 32 bit. */
1328 if (fil_node_create(name, (ulint) n_pages, space, FALSE)) {
1329 err = DB_SUCCESS;
1330 }
1331 }
1332
1333 return(err);
1334 }
1335
1336 /********************************************************************
1337 Opens the configured number of undo tablespaces.
1338 @return DB_SUCCESS or error code */
1339 static
1340 dberr_t
srv_undo_tablespaces_init(ibool create_new_db,const ulint n_conf_tablespaces,ulint * n_opened)1341 srv_undo_tablespaces_init(
1342 /*======================*/
1343 ibool create_new_db, /*!< in: TRUE if new db being
1344 created */
1345 const ulint n_conf_tablespaces, /*!< in: configured undo
1346 tablespaces */
1347 ulint* n_opened) /*!< out: number of UNDO
1348 tablespaces successfully
1349 discovered and opened */
1350 {
1351 ulint i;
1352 dberr_t err = DB_SUCCESS;
1353 ulint prev_space_id = 0;
1354 ulint n_undo_tablespaces;
1355 ulint undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
1356
1357 *n_opened = 0;
1358
1359 ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS);
1360
1361 memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
1362
1363 /* Create the undo spaces only if we are creating a new
1364 instance. We don't allow creating of new undo tablespaces
1365 in an existing instance (yet). This restriction exists because
1366 we check in several places for SYSTEM tablespaces to be less than
1367 the min of user defined tablespace ids. Once we implement saving
1368 the location of the undo tablespaces and their space ids this
1369 restriction will/should be lifted. */
1370
1371 for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) {
1372 char name[OS_FILE_MAX_PATH];
1373 ulint space_id = i + 1;
1374
1375 DBUG_EXECUTE_IF("innodb_undo_upgrade",
1376 space_id = i + 3;);
1377
1378 ut_snprintf(
1379 name, sizeof(name),
1380 "%s%cundo%03lu",
1381 srv_undo_dir, SRV_PATH_SEPARATOR, space_id);
1382
1383 if (i == 0) {
1384 srv_undo_space_id_start = space_id;
1385 prev_space_id = srv_undo_space_id_start - 1;
1386 }
1387
1388 undo_tablespace_ids[i] = space_id;
1389
1390 err = srv_undo_tablespace_create(
1391 name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
1392
1393 if (err != DB_SUCCESS) {
1394
1395 ib_logf(IB_LOG_LEVEL_ERROR,
1396 "Could not create undo tablespace '%s'.",
1397 name);
1398
1399 return(err);
1400 }
1401 }
1402
1403 /* Get the tablespace ids of all the undo segments excluding
1404 the system tablespace (0). If we are creating a new instance then
1405 we build the undo_tablespace_ids ourselves since they don't
1406 already exist. */
1407
1408 if (!create_new_db) {
1409 n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces(
1410 undo_tablespace_ids);
1411
1412 if (n_undo_tablespaces != 0) {
1413 srv_undo_space_id_start = undo_tablespace_ids[0];
1414 prev_space_id = srv_undo_space_id_start - 1;
1415 }
1416
1417 } else {
1418 n_undo_tablespaces = n_conf_tablespaces;
1419
1420 undo_tablespace_ids[n_conf_tablespaces] = ULINT_UNDEFINED;
1421 }
1422
1423 /* Open all the undo tablespaces that are currently in use. If we
1424 fail to open any of these it is a fatal error. The tablespace ids
1425 should be contiguous. It is a fatal error because they are required
1426 for recovery and are referenced by the UNDO logs (a.k.a RBS). */
1427
1428 for (i = 0; i < n_undo_tablespaces; ++i) {
1429 char name[OS_FILE_MAX_PATH];
1430
1431 ut_snprintf(
1432 name, sizeof(name),
1433 "%s%cundo%03lu",
1434 srv_undo_dir, SRV_PATH_SEPARATOR,
1435 undo_tablespace_ids[i]);
1436
1437 /* Should be no gaps in undo tablespace ids. */
1438 ut_a(prev_space_id + 1 == undo_tablespace_ids[i]);
1439
1440 /* The system space id should not be in this array. */
1441 ut_a(undo_tablespace_ids[i] != 0);
1442 ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED);
1443
1444 err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
1445
1446 if (err != DB_SUCCESS) {
1447
1448 ib_logf(IB_LOG_LEVEL_ERROR,
1449 "Unable to open undo tablespace '%s'.", name);
1450
1451 return(err);
1452 }
1453
1454 prev_space_id = undo_tablespace_ids[i];
1455
1456 ++*n_opened;
1457 }
1458
1459 /* Open any extra unused undo tablespaces. These must be contiguous.
1460 We stop at the first failure. These are undo tablespaces that are
1461 not in use and therefore not required by recovery. We only check
1462 that there are no gaps. */
1463
1464 for (i = prev_space_id + 1; i < TRX_SYS_N_RSEGS; ++i) {
1465 char name[OS_FILE_MAX_PATH];
1466
1467 ut_snprintf(
1468 name, sizeof(name),
1469 "%s%cundo%03lu", srv_undo_dir, SRV_PATH_SEPARATOR, i);
1470
1471 /* Undo space ids start from 1. */
1472 err = srv_undo_tablespace_open(name, i);
1473
1474 if (err != DB_SUCCESS) {
1475 break;
1476 }
1477
1478 /** Note the first undo tablespace id in case of
1479 no active undo tablespace. */
1480 if (n_undo_tablespaces == 0) {
1481 srv_undo_space_id_start = i;
1482 }
1483
1484 ++n_undo_tablespaces;
1485
1486 ++*n_opened;
1487 }
1488
1489 /** Explictly specify the srv_undo_space_id_start
1490 as zero when there are no undo tablespaces. */
1491 if (n_undo_tablespaces == 0) {
1492 srv_undo_space_id_start = 0;
1493 }
1494
1495 /* If the user says that there are fewer than what we find we
1496 tolerate that discrepancy but not the inverse. Because there could
1497 be unused undo tablespaces for future use. */
1498
1499 if (n_conf_tablespaces > n_undo_tablespaces) {
1500 ut_print_timestamp(stderr);
1501 fprintf(stderr,
1502 " InnoDB: Expected to open %lu undo "
1503 "tablespaces but was able\n",
1504 n_conf_tablespaces);
1505 ut_print_timestamp(stderr);
1506 fprintf(stderr,
1507 " InnoDB: to find only %lu undo "
1508 "tablespaces.\n", n_undo_tablespaces);
1509 ut_print_timestamp(stderr);
1510 fprintf(stderr,
1511 " InnoDB: Set the "
1512 "innodb_undo_tablespaces parameter to "
1513 "the\n");
1514 ut_print_timestamp(stderr);
1515 fprintf(stderr,
1516 " InnoDB: correct value and retry. Suggested "
1517 "value is %lu\n", n_undo_tablespaces);
1518
1519 return(err != DB_SUCCESS ? err : DB_ERROR);
1520
1521 } else if (n_undo_tablespaces > 0) {
1522
1523 ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces",
1524 n_undo_tablespaces);
1525
1526 if (n_conf_tablespaces == 0) {
1527 ib_logf(IB_LOG_LEVEL_WARN,
1528 "Using the system tablespace for all UNDO "
1529 "logging because innodb_undo_tablespaces=0");
1530 }
1531 }
1532
1533 if (create_new_db) {
1534 mtr_t mtr;
1535
1536 mtr_start(&mtr);
1537
1538 /* The undo log tablespace */
1539 for (i = 0; i < n_undo_tablespaces; ++i) {
1540
1541 fsp_header_init(
1542 undo_tablespace_ids[i],
1543 SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
1544 }
1545
1546 mtr_commit(&mtr);
1547 }
1548
1549 return(DB_SUCCESS);
1550 }
1551
1552 /********************************************************************
1553 Wait for the purge thread(s) to start up. */
1554 static
1555 void
srv_start_wait_for_purge_to_start()1556 srv_start_wait_for_purge_to_start()
1557 /*===============================*/
1558 {
1559 /* Wait for the purge coordinator and master thread to startup. */
1560
1561 purge_state_t state = trx_purge_state();
1562
1563 ut_a(state != PURGE_STATE_DISABLED);
1564
1565 while (srv_shutdown_state == SRV_SHUTDOWN_NONE
1566 && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
1567 && state == PURGE_STATE_INIT) {
1568
1569 switch (state = trx_purge_state()) {
1570 case PURGE_STATE_RUN:
1571 case PURGE_STATE_STOP:
1572 break;
1573
1574 case PURGE_STATE_INIT:
1575 ib_logf(IB_LOG_LEVEL_INFO,
1576 "Waiting for purge to start");
1577
1578 os_thread_sleep(50000);
1579 break;
1580
1581 case PURGE_STATE_EXIT:
1582 case PURGE_STATE_DISABLED:
1583 ut_error;
1584 }
1585 }
1586 }
1587
1588 /*********************************************************************//**
1589 Initializes the log tracking subsystem and starts its thread. */
1590 static
1591 void
init_log_online(void)1592 init_log_online(void)
1593 /*=================*/
1594 {
1595 if (UNIV_UNLIKELY(srv_force_recovery > 0 || srv_read_only_mode)) {
1596 srv_track_changed_pages = FALSE;
1597 return;
1598 }
1599
1600 if (srv_track_changed_pages) {
1601
1602 log_online_read_init();
1603
1604 /* Create the thread that follows the redo log to output the
1605 changed page bitmap */
1606 os_thread_create(&srv_redo_log_follow_thread, NULL,
1607 thread_ids + 5 + SRV_MAX_N_IO_THREADS);
1608 }
1609 }
1610
1611 /********************************************************************
1612 Starts InnoDB and creates a new database if database files
1613 are not found and the user wants.
1614 @return DB_SUCCESS or error code */
1615 UNIV_INTERN
1616 dberr_t
innobase_start_or_create_for_mysql(void)1617 innobase_start_or_create_for_mysql(void)
1618 /*====================================*/
1619 {
1620 ibool create_new_db;
1621 lsn_t min_flushed_lsn;
1622 lsn_t max_flushed_lsn;
1623 #ifdef UNIV_LOG_ARCHIVE
1624 lsn_t min_arch_log_no = LSN_MAX;
1625 lsn_t max_arch_log_no = LSN_MAX;
1626 #endif /* UNIV_LOG_ARCHIVE */
1627 ulint sum_of_new_sizes;
1628 ulint sum_of_data_file_sizes;
1629 ulint tablespace_size_in_header;
1630 dberr_t err;
1631 unsigned i;
1632 ulint srv_n_log_files_found = srv_n_log_files;
1633 ulint io_limit;
1634 mtr_t mtr;
1635 ib_bh_t* ib_bh;
1636 ulint n_recovered_trx;
1637 char logfilename[10000];
1638 char* logfile0 = NULL;
1639 size_t dirnamelen;
1640
1641 if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) {
1642 srv_read_only_mode = 1;
1643 }
1644
1645 high_level_read_only = srv_read_only_mode
1646 || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO;
1647
1648 if (srv_read_only_mode) {
1649 ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode");
1650 }
1651
1652 #ifdef HAVE_DARWIN_THREADS
1653 # ifdef F_FULLFSYNC
1654 /* This executable has been compiled on Mac OS X 10.3 or later.
1655 Assume that F_FULLFSYNC is available at run-time. */
1656 srv_have_fullfsync = TRUE;
1657 # else /* F_FULLFSYNC */
1658 /* This executable has been compiled on Mac OS X 10.2
1659 or earlier. Determine if the executable is running
1660 on Mac OS X 10.3 or later. */
1661 struct utsname utsname;
1662 if (uname(&utsname)) {
1663 ut_print_timestamp(stderr);
1664 fputs(" InnoDB: cannot determine Mac OS X version!\n", stderr);
1665 } else {
1666 srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
1667 }
1668 if (!srv_have_fullfsync) {
1669 ut_print_timestamp(stderr);
1670 fputs(" InnoDB: On Mac OS X, fsync() may be "
1671 "broken on internal drives,\n", stderr);
1672 ut_print_timestamp(stderr);
1673 fputs(" InnoDB: making transactions unsafe!\n", stderr);
1674 }
1675 # endif /* F_FULLFSYNC */
1676 #endif /* HAVE_DARWIN_THREADS */
1677
1678 ib_logf(IB_LOG_LEVEL_INFO,
1679 "Using %s to ref count buffer pool pages",
1680 #ifdef PAGE_ATOMIC_REF_COUNT
1681 "atomics"
1682 #else
1683 "mutexes"
1684 #endif /* PAGE_ATOMIC_REF_COUNT */
1685 );
1686
1687
1688 if (sizeof(ulint) != sizeof(void*)) {
1689 ut_print_timestamp(stderr);
1690 fprintf(stderr,
1691 " InnoDB: Error: size of InnoDB's ulint is %lu, "
1692 "but size of void*\n", (ulong) sizeof(ulint));
1693 ut_print_timestamp(stderr);
1694 fprintf(stderr,
1695 " InnoDB: is %lu. The sizes should be the same "
1696 "so that on a 64-bit\n",
1697 (ulong) sizeof(void*));
1698 ut_print_timestamp(stderr);
1699 fprintf(stderr,
1700 " InnoDB: platforms you can allocate more than 4 GB "
1701 "of memory.\n");
1702 }
1703
1704 #ifdef UNIV_DEBUG
1705 ut_print_timestamp(stderr);
1706 fprintf(stderr,
1707 " InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
1708 #endif
1709
1710 #ifdef UNIV_IBUF_DEBUG
1711 ut_print_timestamp(stderr);
1712 fprintf(stderr,
1713 " InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n");
1714 # ifdef UNIV_IBUF_COUNT_DEBUG
1715 ut_print_timestamp(stderr);
1716 fprintf(stderr,
1717 " InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on "
1718 "!!!!!!!!!\n");
1719 ut_print_timestamp(stderr);
1720 fprintf(stderr,
1721 " InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG\n");
1722 # endif
1723 #endif
1724
1725 #ifdef UNIV_BLOB_DEBUG
1726 fprintf(stderr,
1727 "InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n"
1728 "InnoDB: Server restart may fail with UNIV_BLOB_DEBUG\n");
1729 #endif /* UNIV_BLOB_DEBUG */
1730
1731 #ifdef UNIV_SYNC_DEBUG
1732 ut_print_timestamp(stderr);
1733 fprintf(stderr,
1734 " InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n");
1735 #endif
1736
1737 #ifdef UNIV_SEARCH_DEBUG
1738 ut_print_timestamp(stderr);
1739 fprintf(stderr,
1740 " InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n");
1741 #endif
1742
1743 #ifdef UNIV_LOG_LSN_DEBUG
1744 ut_print_timestamp(stderr);
1745 fprintf(stderr,
1746 " InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!\n");
1747 #endif /* UNIV_LOG_LSN_DEBUG */
1748 #ifdef UNIV_MEM_DEBUG
1749 ut_print_timestamp(stderr);
1750 fprintf(stderr,
1751 " InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
1752 #endif
1753
1754 if (srv_use_sys_malloc) {
1755 ib_logf(IB_LOG_LEVEL_INFO,
1756 "The InnoDB memory heap is disabled");
1757 }
1758
1759 #if defined(COMPILER_HINTS_ENABLED)
1760 ib_logf(IB_LOG_LEVEL_INFO,
1761 " InnoDB: Compiler hints enabled.");
1762 #endif /* defined(COMPILER_HINTS_ENABLED) */
1763
1764 ib_logf(IB_LOG_LEVEL_INFO,
1765 "" IB_ATOMICS_STARTUP_MSG "");
1766
1767 ib_logf(IB_LOG_LEVEL_INFO,
1768 "" IB_MEMORY_BARRIER_STARTUP_MSG "");
1769
1770 #ifndef HAVE_MEMORY_BARRIER
1771 #if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__
1772 #else
1773 ib_logf(IB_LOG_LEVEL_WARN,
1774 "MySQL was built without a memory barrier capability on this"
1775 " architecture, which might allow a mutex/rw_lock violation"
1776 " under high thread concurrency. This may cause a hang.");
1777 #endif /* IA32 or AMD64 */
1778 #endif /* HAVE_MEMORY_BARRIER */
1779
1780 ib_logf(IB_LOG_LEVEL_INFO,
1781 "Compressed tables use zlib " ZLIB_VERSION
1782 #ifdef UNIV_ZIP_DEBUG
1783 " with validation"
1784 #endif /* UNIV_ZIP_DEBUG */
1785 );
1786 #ifdef UNIV_ZIP_COPY
1787 ib_logf(IB_LOG_LEVEL_INFO, "and extra copying");
1788 #endif /* UNIV_ZIP_COPY */
1789
1790
1791 /* Since InnoDB does not currently clean up all its internal data
1792 structures in MySQL Embedded Server Library server_end(), we
1793 print an error message if someone tries to start up InnoDB a
1794 second time during the process lifetime. */
1795
1796 if (srv_start_has_been_called) {
1797 ut_print_timestamp(stderr);
1798 fprintf(stderr, " InnoDB: Error: startup called second time "
1799 "during the process\n");
1800 ut_print_timestamp(stderr);
1801 fprintf(stderr, " InnoDB: lifetime. In the MySQL Embedded "
1802 "Server Library you\n");
1803 ut_print_timestamp(stderr);
1804 fprintf(stderr, " InnoDB: cannot call server_init() more "
1805 "than once during the\n");
1806 ut_print_timestamp(stderr);
1807 fprintf(stderr, " InnoDB: process lifetime.\n");
1808 }
1809
1810 srv_start_has_been_called = TRUE;
1811
1812 #ifdef UNIV_DEBUG
1813 log_do_write = TRUE;
1814 #endif /* UNIV_DEBUG */
1815 /* yydebug = TRUE; */
1816
1817 srv_is_being_started = TRUE;
1818 srv_startup_is_before_trx_rollback_phase = TRUE;
1819
1820 #ifdef __WIN__
1821 switch (os_get_os_version()) {
1822 case OS_WIN95:
1823 case OS_WIN31:
1824 case OS_WINNT:
1825 /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
1826 and NT use simulated aio. In NT Windows provides async i/o,
1827 but when run in conjunction with InnoDB Hot Backup, it seemed
1828 to corrupt the data files. */
1829
1830 srv_use_native_aio = FALSE;
1831 break;
1832
1833 case OS_WIN2000:
1834 case OS_WINXP:
1835 /* On 2000 and XP, async IO is available. */
1836 srv_use_native_aio = TRUE;
1837 break;
1838
1839 default:
1840 /* Vista and later have both async IO and condition variables */
1841 srv_use_native_aio = TRUE;
1842 srv_use_native_conditions = TRUE;
1843 break;
1844 }
1845
1846 #elif defined(LINUX_NATIVE_AIO)
1847
1848 if (srv_use_native_aio) {
1849 ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO");
1850 }
1851 #else
1852 /* Currently native AIO is supported only on windows and linux
1853 and that also when the support is compiled in. In all other
1854 cases, we ignore the setting of innodb_use_native_aio. */
1855 srv_use_native_aio = FALSE;
1856 #endif /* __WIN__ */
1857
1858 if (srv_file_flush_method_str == NULL) {
1859 /* These are the default options */
1860
1861 srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1862
1863 srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1864 #ifndef __WIN__
1865 } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
1866 srv_unix_file_flush_method = SRV_UNIX_FSYNC;
1867
1868 } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
1869 srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
1870
1871 } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
1872 srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
1873
1874 } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
1875 srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
1876
1877 } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
1878 srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;
1879
1880 } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
1881 srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
1882
1883 } else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
1884 srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
1885 #else
1886 } else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
1887 srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
1888 srv_use_native_aio = FALSE;
1889
1890 } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
1891 srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1892 srv_use_native_aio = FALSE;
1893
1894 } else if (0 == ut_strcmp(srv_file_flush_method_str,
1895 "async_unbuffered")) {
1896 srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
1897 #endif /* __WIN__ */
1898 } else {
1899 ib_logf(IB_LOG_LEVEL_ERROR,
1900 "Unrecognized value %s for innodb_flush_method",
1901 srv_file_flush_method_str);
1902 return(DB_ERROR);
1903 }
1904
1905 /* Note that the call srv_boot() also changes the values of
1906 some variables to the units used by InnoDB internally */
1907
1908 /* Set the maximum number of threads which can wait for a semaphore
1909 inside InnoDB: this is the 'sync wait array' size, as well as the
1910 maximum number of threads that can wait in the 'srv_conc array' for
1911 their time to enter InnoDB. */
1912
1913 #define BUF_POOL_SIZE_THRESHOLD (1024 * 1024 * 1024)
1914 srv_max_n_threads = 1 /* io_ibuf_thread */
1915 + 1 /* io_log_thread */
1916 + 1 /* lock_wait_timeout_thread */
1917 + 1 /* srv_error_monitor_thread */
1918 + 1 /* srv_monitor_thread */
1919 + 1 /* srv_master_thread */
1920 + 1 /* srv_redo_log_follow_thread */
1921 + 1 /* srv_purge_coordinator_thread */
1922 + 1 /* buf_dump_thread */
1923 + 1 /* dict_stats_thread */
1924 + 1 /* fts_optimize_thread */
1925 + 1 /* recv_writer_thread */
1926 + 1 /* buf_flush_page_cleaner_thread */
1927 + 1 /* trx_rollback_or_clean_all_recovered */
1928 + 128 /* added as margin, for use of
1929 InnoDB Memcached etc. */
1930 + max_connections
1931 + srv_n_read_io_threads
1932 + srv_n_write_io_threads
1933 + srv_n_purge_threads
1934 /* FTS Parallel Sort */
1935 + fts_sort_pll_degree * FTS_NUM_AUX_INDEX
1936 * max_connections;
1937
1938 if (srv_buf_pool_size < BUF_POOL_SIZE_THRESHOLD) {
1939 /* If buffer pool is less than 1 GB,
1940 use only one buffer pool instance */
1941 srv_buf_pool_instances = 1;
1942 }
1943
1944 srv_boot();
1945
1946 ib_logf(IB_LOG_LEVEL_INFO,
1947 "%s CPU crc32 instructions",
1948 ut_crc32_sse2_enabled ? "Using" : "Not using");
1949
1950 if (!srv_read_only_mode) {
1951
1952 mutex_create(srv_monitor_file_mutex_key,
1953 &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
1954
1955 if (srv_innodb_status) {
1956
1957 srv_monitor_file_name = static_cast<char*>(
1958 mem_alloc(
1959 strlen(fil_path_to_mysql_datadir)
1960 + 20 + sizeof "/innodb_status."));
1961
1962 sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
1963 fil_path_to_mysql_datadir,
1964 os_proc_get_number());
1965
1966 srv_monitor_file = fopen(srv_monitor_file_name, "w+");
1967
1968 if (!srv_monitor_file) {
1969
1970 ib_logf(IB_LOG_LEVEL_ERROR,
1971 "Unable to create %s: %s",
1972 srv_monitor_file_name,
1973 strerror(errno));
1974
1975 return(DB_ERROR);
1976 }
1977 } else {
1978 srv_monitor_file_name = NULL;
1979 srv_monitor_file = os_file_create_tmpfile(NULL);
1980
1981 if (!srv_monitor_file) {
1982 return(DB_ERROR);
1983 }
1984 }
1985
1986 mutex_create(srv_dict_tmpfile_mutex_key,
1987 &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
1988
1989 srv_dict_tmpfile = os_file_create_tmpfile(NULL);
1990
1991 if (!srv_dict_tmpfile) {
1992 return(DB_ERROR);
1993 }
1994
1995 mutex_create(srv_misc_tmpfile_mutex_key,
1996 &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
1997
1998 srv_misc_tmpfile = os_file_create_tmpfile(NULL);
1999
2000 if (!srv_misc_tmpfile) {
2001 return(DB_ERROR);
2002 }
2003 }
2004
2005 /* If user has set the value of innodb_file_io_threads then
2006 we'll emit a message telling the user that this parameter
2007 is now deprecated. */
2008 if (srv_n_file_io_threads != 4) {
2009 ib_logf(IB_LOG_LEVEL_WARN,
2010 "innodb_file_io_threads is deprecated. Please use "
2011 "innodb_read_io_threads and innodb_write_io_threads "
2012 "instead");
2013 }
2014
2015 /* Now overwrite the value on srv_n_file_io_threads */
2016 srv_n_file_io_threads = srv_n_read_io_threads;
2017
2018 if (!srv_read_only_mode) {
2019 /* Add the log and ibuf IO threads. */
2020 srv_n_file_io_threads += 2;
2021 srv_n_file_io_threads += srv_n_write_io_threads;
2022 } else {
2023 ib_logf(IB_LOG_LEVEL_INFO,
2024 "Disabling background IO write threads.");
2025
2026 srv_n_write_io_threads = 0;
2027 }
2028
2029 ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
2030
2031 io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
2032
2033 /* On Windows when using native aio the number of aio requests
2034 that a thread can handle at a given time is limited to 32
2035 i.e.: SRV_N_PENDING_IOS_PER_THREAD */
2036 # ifdef __WIN__
2037 if (srv_use_native_aio) {
2038 io_limit = SRV_N_PENDING_IOS_PER_THREAD;
2039 }
2040 # endif /* __WIN__ */
2041
2042 if (!os_aio_init(io_limit,
2043 srv_n_read_io_threads,
2044 srv_n_write_io_threads,
2045 SRV_MAX_N_PENDING_SYNC_IOS)) {
2046
2047 ib_logf(IB_LOG_LEVEL_ERROR,
2048 "Fatal : Cannot initialize AIO sub-system");
2049
2050 return(DB_ERROR);
2051 }
2052
2053 fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
2054
2055 double size;
2056 char unit;
2057
2058 if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
2059 size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024);
2060 unit = 'G';
2061 } else {
2062 size = ((double) srv_buf_pool_size) / (1024 * 1024);
2063 unit = 'M';
2064 }
2065
2066 /* Print time to initialize the buffer pool */
2067 ib_logf(IB_LOG_LEVEL_INFO,
2068 "Initializing buffer pool, size = %.1f%c", size, unit);
2069
2070 err = buf_pool_init(srv_buf_pool_size, static_cast<bool>(srv_numa_interleave),
2071 srv_buf_pool_instances);
2072
2073 if (err != DB_SUCCESS) {
2074 ib_logf(IB_LOG_LEVEL_ERROR,
2075 "Cannot allocate memory for the buffer pool");
2076
2077 return(DB_ERROR);
2078 }
2079
2080 ib_logf(IB_LOG_LEVEL_INFO,
2081 "Completed initialization of buffer pool");
2082
2083 #ifdef UNIV_DEBUG
2084 /* We have observed deadlocks with a 5MB buffer pool but
2085 the actual lower limit could very well be a little higher. */
2086
2087 if (srv_buf_pool_size <= 5 * 1024 * 1024) {
2088
2089 ib_logf(IB_LOG_LEVEL_INFO,
2090 "Small buffer pool size (%luM), the flst_validate() "
2091 "debug function can cause a deadlock if the "
2092 "buffer pool fills up.",
2093 srv_buf_pool_size / 1024 / 1024);
2094 }
2095 #endif /* UNIV_DEBUG */
2096
2097 fsp_init();
2098 log_init();
2099 log_online_init();
2100
2101 lock_sys_create(srv_lock_table_size);
2102
2103 /* Create i/o-handler threads: */
2104
2105 for (i = 0; i < srv_n_file_io_threads; ++i) {
2106
2107 n[i] = i;
2108
2109 os_thread_create(io_handler_thread, n + i, thread_ids + i);
2110 }
2111
2112 if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE
2113 >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
2114 /* log_block_convert_lsn_to_no() limits the returned block
2115 number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
2116 bytes, then we have a limit of 512 GB. If that limit is to
2117 be raised, then log_block_convert_lsn_to_no() must be
2118 modified. */
2119 ib_logf(IB_LOG_LEVEL_ERROR,
2120 "Combined size of log files must be < 512 GB");
2121
2122 return(DB_ERROR);
2123 }
2124
2125 if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) {
2126 /* fil_io() takes ulint as an argument and we are passing
2127 (next_offset / UNIV_PAGE_SIZE) to it in log_group_write_buf().
2128 So (next_offset / UNIV_PAGE_SIZE) must be less than ULINT_MAX.
2129 So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
2130 means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
2131 is 64 TB on 32 bit systems. */
2132 fprintf(stderr,
2133 " InnoDB: Error: combined size of log files"
2134 " must be < %lu GB\n",
2135 ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE);
2136
2137 return(DB_ERROR);
2138 }
2139
2140 sum_of_new_sizes = 0;
2141
2142 for (i = 0; i < srv_n_data_files; i++) {
2143 #ifndef __WIN__
2144 if (sizeof(off_t) < 5
2145 && srv_data_file_sizes[i]
2146 >= (ulint) (1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
2147 ut_print_timestamp(stderr);
2148 fprintf(stderr,
2149 " InnoDB: Error: file size must be < 4 GB"
2150 " with this MySQL binary\n");
2151 ut_print_timestamp(stderr);
2152 fprintf(stderr,
2153 " InnoDB: and operating system combination,"
2154 " in some OS's < 2 GB\n");
2155
2156 return(DB_ERROR);
2157 }
2158 #endif
2159 sum_of_new_sizes += srv_data_file_sizes[i];
2160 }
2161
2162 if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
2163 ib_logf(IB_LOG_LEVEL_ERROR,
2164 "Tablespace size must be at least 10 MB");
2165
2166 return(DB_ERROR);
2167 }
2168
2169 recv_sys_create();
2170 recv_sys_init(buf_pool_get_curr_size());
2171
2172 err = open_or_create_data_files(&create_new_db,
2173 #ifdef UNIV_LOG_ARCHIVE
2174 &min_arch_log_no, &max_arch_log_no,
2175 #endif /* UNIV_LOG_ARCHIVE */
2176 &min_flushed_lsn, &max_flushed_lsn,
2177 &sum_of_new_sizes);
2178 if (err == DB_FAIL) {
2179
2180 ib_logf(IB_LOG_LEVEL_ERROR,
2181 "The system tablespace must be writable!");
2182
2183 return(DB_ERROR);
2184
2185 } else if (err != DB_SUCCESS) {
2186
2187 ib_logf(IB_LOG_LEVEL_ERROR,
2188 "Could not open or create the system tablespace. If "
2189 "you tried to add new data files to the system "
2190 "tablespace, and it failed here, you should now "
2191 "edit innodb_data_file_path in my.cnf back to what "
2192 "it was, and remove the new ibdata files InnoDB "
2193 "created in this failed attempt. InnoDB only wrote "
2194 "those files full of zeros, but did not yet use "
2195 "them in any way. But be careful: do not remove "
2196 "old data files which contain your precious data!");
2197
2198 return(err);
2199 }
2200
2201 #ifdef UNIV_LOG_ARCHIVE
2202 srv_normalize_path_for_win(srv_arch_dir);
2203 #endif /* UNIV_LOG_ARCHIVE */
2204
2205 dirnamelen = strlen(srv_log_group_home_dir);
2206 ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
2207 memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
2208
2209 /* Add a path separator if needed. */
2210 if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
2211 logfilename[dirnamelen++] = SRV_PATH_SEPARATOR;
2212 }
2213
2214 srv_log_file_size_requested = srv_log_file_size;
2215
2216 if (create_new_db) {
2217 bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
2218 ut_a(success);
2219
2220 min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2221
2222 buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2223
2224 err = create_log_files(create_new_db, logfilename, dirnamelen,
2225 max_flushed_lsn, logfile0);
2226
2227 if (err != DB_SUCCESS) {
2228 return(err);
2229 }
2230 } else {
2231 for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
2232 os_offset_t size;
2233 os_file_stat_t stat_info;
2234
2235 sprintf(logfilename + dirnamelen,
2236 "ib_logfile%u", i);
2237
2238 err = os_file_get_status(
2239 logfilename, &stat_info, false);
2240
2241 if (err == DB_NOT_FOUND) {
2242 if (i == 0) {
2243 if (max_flushed_lsn
2244 != min_flushed_lsn) {
2245 ib_logf(IB_LOG_LEVEL_ERROR,
2246 "Cannot create"
2247 " log files because"
2248 " data files are"
2249 " corrupt or"
2250 " not in sync"
2251 " with each other");
2252 return(DB_ERROR);
2253 }
2254
2255 if (max_flushed_lsn < (lsn_t) 1000) {
2256 ib_logf(IB_LOG_LEVEL_ERROR,
2257 "Cannot create"
2258 " log files because"
2259 " data files are"
2260 " corrupt or the"
2261 " database was not"
2262 " shut down cleanly"
2263 " after creating"
2264 " the data files.");
2265 return(DB_ERROR);
2266 }
2267
2268 err = create_log_files(
2269 create_new_db, logfilename,
2270 dirnamelen, max_flushed_lsn,
2271 logfile0);
2272
2273 if (err != DB_SUCCESS) {
2274 return(err);
2275 }
2276
2277 create_log_files_rename(
2278 logfilename, dirnamelen,
2279 max_flushed_lsn, logfile0);
2280
2281 /* Suppress the message about
2282 crash recovery. */
2283 max_flushed_lsn = min_flushed_lsn
2284 = log_get_lsn();
2285 goto files_checked;
2286 } else if (i < 2) {
2287 /* must have at least 2 log files */
2288 ib_logf(IB_LOG_LEVEL_ERROR,
2289 "Only one log file found.");
2290 return(err);
2291 }
2292
2293 /* opened all files */
2294 break;
2295 }
2296
2297 if (!srv_file_check_mode(logfilename)) {
2298 return(DB_ERROR);
2299 }
2300
2301 err = open_log_file(&files[i], logfilename, &size);
2302
2303 if (err != DB_SUCCESS) {
2304 return(err);
2305 }
2306
2307 ut_a(size != (os_offset_t) -1);
2308
2309 if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
2310 ib_logf(IB_LOG_LEVEL_ERROR,
2311 "Log file %s size "
2312 UINT64PF " is not a multiple of"
2313 " innodb_page_size",
2314 logfilename, size);
2315 return(DB_ERROR);
2316 }
2317
2318 size >>= UNIV_PAGE_SIZE_SHIFT;
2319
2320 if (i == 0) {
2321 srv_log_file_size = size;
2322 } else if (size != srv_log_file_size) {
2323 ib_logf(IB_LOG_LEVEL_ERROR,
2324 "Log file %s is"
2325 " of different size " UINT64PF " bytes"
2326 " than other log"
2327 " files " UINT64PF " bytes!",
2328 logfilename,
2329 size << UNIV_PAGE_SIZE_SHIFT,
2330 (os_offset_t) srv_log_file_size
2331 << UNIV_PAGE_SIZE_SHIFT);
2332 return(DB_ERROR);
2333 }
2334 }
2335
2336 srv_n_log_files_found = i;
2337
2338 /* Create the in-memory file space objects. */
2339
2340 sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
2341
2342 fil_space_create(logfilename,
2343 SRV_LOG_SPACE_FIRST_ID,
2344 fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
2345 FIL_LOG);
2346
2347 ut_a(fil_validate());
2348
2349 /* srv_log_file_size is measured in pages; if page size is 16KB,
2350 then we have a limit of 64TB on 32 bit systems */
2351 ut_a(srv_log_file_size <= ULINT_MAX);
2352
2353 for (unsigned j = 0; j < i; j++) {
2354 sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
2355
2356 if (!fil_node_create(logfilename,
2357 (ulint) srv_log_file_size,
2358 SRV_LOG_SPACE_FIRST_ID, FALSE)) {
2359 return(DB_ERROR);
2360 }
2361 }
2362
2363 #ifdef UNIV_LOG_ARCHIVE
2364 /* Create the file space object for archived logs. Under
2365 MySQL, no archiving ever done. */
2366 fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
2367 0, FIL_LOG);
2368 #endif /* UNIV_LOG_ARCHIVE */
2369 log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
2370 SRV_LOG_SPACE_FIRST_ID,
2371 SRV_LOG_SPACE_FIRST_ID + 1);
2372 }
2373
2374 files_checked:
2375 /* Open all log files and data files in the system
2376 tablespace: we keep them open until database
2377 shutdown */
2378
2379 fil_open_log_and_system_tablespace_files();
2380
2381 err = srv_undo_tablespaces_init(
2382 create_new_db,
2383 srv_undo_tablespaces,
2384 &srv_undo_tablespaces_open);
2385
2386 /* If the force recovery is set very high then we carry on regardless
2387 of all errors. Basically this is fingers crossed mode. */
2388
2389 if (err != DB_SUCCESS
2390 && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
2391
2392 return(err);
2393 }
2394
2395 /* Initialize objects used by dict stats gathering thread, which
2396 can also be used by recovery if it tries to drop some table */
2397 if (!srv_read_only_mode) {
2398 dict_stats_thread_init();
2399 }
2400
2401 trx_sys_file_format_init();
2402
2403 trx_sys_create();
2404
2405 bool srv_monitor_thread_started = false;
2406
2407 if (create_new_db) {
2408
2409 ut_a(!srv_read_only_mode);
2410 init_log_online();
2411
2412 mtr_start(&mtr);
2413
2414 fsp_header_init(0, sum_of_new_sizes, &mtr);
2415
2416 mtr_commit(&mtr);
2417
2418 /* To maintain backward compatibility we create only
2419 the first rollback segment before the double write buffer.
2420 All the remaining rollback segments will be created later,
2421 after the double write buffer has been created. */
2422 trx_sys_create_sys_pages();
2423
2424 ib_bh = trx_sys_init_at_db_start();
2425 n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
2426
2427 /* The purge system needs to create the purge view and
2428 therefore requires that the trx_sys is inited. */
2429
2430 trx_purge_sys_create(srv_n_purge_threads, ib_bh);
2431
2432 err = dict_create();
2433
2434 if (err != DB_SUCCESS) {
2435 return(err);
2436 }
2437
2438 srv_startup_is_before_trx_rollback_phase = FALSE;
2439
2440 bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
2441 ut_a(success);
2442
2443 min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2444
2445 buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2446
2447 /* Stamp the LSN to the data files. */
2448 fil_write_flushed_lsn_to_data_files(max_flushed_lsn, 0);
2449
2450 fil_flush_file_spaces(FIL_TABLESPACE);
2451
2452 create_log_files_rename(logfilename, dirnamelen,
2453 max_flushed_lsn, logfile0);
2454 } else {
2455
2456 /* Check if we support the max format that is stamped
2457 on the system tablespace.
2458 Note: We are NOT allowed to make any modifications to
2459 the TRX_SYS_PAGE_NO page before recovery because this
2460 page also contains the max_trx_id etc. important system
2461 variables that are required for recovery. We need to
2462 ensure that we return the system to a state where normal
2463 recovery is guaranteed to work. We do this by
2464 invalidating the buffer cache, this will force the
2465 reread of the page and restoration to its last known
2466 consistent state, this is REQUIRED for the recovery
2467 process to work. */
2468 err = trx_sys_file_format_max_check(
2469 srv_max_file_format_at_startup);
2470
2471 if (err != DB_SUCCESS) {
2472 return(err);
2473 }
2474
2475 /* Invalidate the buffer pool to ensure that we reread
2476 the page that we read above, during recovery.
2477 Note that this is not as heavy weight as it seems. At
2478 this point there will be only ONE page in the buf_LRU
2479 and there must be no page in the buf_flush list. */
2480 buf_pool_invalidate();
2481
2482 /* Start monitor thread early enough so that e.g. crash
2483 recovery failing to find free pages in the buffer pool is
2484 diagnosed. */
2485 if (!srv_read_only_mode)
2486 {
2487 /* Create the thread which prints InnoDB monitor
2488 info */
2489 os_thread_create(
2490 srv_monitor_thread,
2491 NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
2492
2493 srv_monitor_thread_started = true;
2494 }
2495
2496 /* We always try to do a recovery, even if the database had
2497 been shut down normally: this is the normal startup path */
2498
2499 err = recv_recovery_from_checkpoint_start(
2500 LOG_CHECKPOINT, LSN_MAX,
2501 min_flushed_lsn, max_flushed_lsn);
2502
2503 if (err != DB_SUCCESS) {
2504
2505 return(DB_ERROR);
2506 }
2507
2508 init_log_online();
2509
2510 /* Since the insert buffer init is in dict_boot, and the
2511 insert buffer is needed in any disk i/o, first we call
2512 dict_boot(). Note that trx_sys_init_at_db_start() only needs
2513 to access space 0, and the insert buffer at this stage already
2514 works for space 0. */
2515
2516 err = dict_boot();
2517 DBUG_EXECUTE_IF("ib_dic_boot_error",
2518 err = DB_ERROR;);
2519 if (err != DB_SUCCESS) {
2520 return(err);
2521 }
2522
2523 ib_bh = trx_sys_init_at_db_start();
2524 n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
2525
2526 /* The purge system needs to create the purge view and
2527 therefore requires that the trx_sys is inited. */
2528
2529 trx_purge_sys_create(srv_n_purge_threads, ib_bh);
2530
2531 /* recv_recovery_from_checkpoint_finish needs trx lists which
2532 are initialized in trx_sys_init_at_db_start(). */
2533
2534 recv_recovery_from_checkpoint_finish();
2535
2536 if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
2537 /* The following call is necessary for the insert
2538 buffer to work with multiple tablespaces. We must
2539 know the mapping between space id's and .ibd file
2540 names.
2541
2542 In a crash recovery, we check that the info in data
2543 dictionary is consistent with what we already know
2544 about space id's from the call of
2545 fil_load_single_table_tablespaces().
2546
2547 In a normal startup, we create the space objects for
2548 every table in the InnoDB data dictionary that has
2549 an .ibd file.
2550
2551 We also determine the maximum tablespace id used. */
2552 dict_check_t dict_check;
2553
2554 if (recv_needed_recovery) {
2555 dict_check = DICT_CHECK_ALL_LOADED;
2556 } else if (n_recovered_trx) {
2557 dict_check = DICT_CHECK_SOME_LOADED;
2558 } else {
2559 dict_check = DICT_CHECK_NONE_LOADED;
2560 }
2561
2562 dict_check_tablespaces_and_store_max_id(dict_check);
2563 }
2564
2565 if (!srv_force_recovery
2566 && !recv_sys->found_corrupt_log
2567 && (srv_log_file_size_requested != srv_log_file_size
2568 || srv_n_log_files_found != srv_n_log_files)) {
2569 /* Prepare to replace the redo log files. */
2570
2571 if (srv_read_only_mode) {
2572 ib_logf(IB_LOG_LEVEL_ERROR,
2573 "Cannot resize log files "
2574 "in read-only mode.");
2575 return(DB_READ_ONLY);
2576 }
2577
2578 /* Clean the buffer pool. */
2579 bool success = buf_flush_list(
2580 ULINT_MAX, LSN_MAX, NULL);
2581 ut_a(success);
2582
2583 RECOVERY_CRASH(1);
2584
2585 min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2586
2587 ib_logf(IB_LOG_LEVEL_WARN,
2588 "Resizing redo log from %u*%u to %u*%u pages"
2589 ", LSN=" LSN_PF,
2590 (unsigned) i,
2591 (unsigned) srv_log_file_size,
2592 (unsigned) srv_n_log_files,
2593 (unsigned) srv_log_file_size_requested,
2594 max_flushed_lsn);
2595
2596 buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
2597
2598 RECOVERY_CRASH(2);
2599
2600 /* Flush the old log files. */
2601 log_buffer_flush_to_disk();
2602 /* If innodb_flush_method=O_DSYNC,
2603 we need to explicitly flush the log buffers. */
2604 fil_flush(SRV_LOG_SPACE_FIRST_ID);
2605
2606 ut_ad(max_flushed_lsn == log_get_lsn());
2607
2608 /* Prohibit redo log writes from any other
2609 threads until creating a log checkpoint at the
2610 end of create_log_files(). */
2611 ut_d(recv_no_log_write = TRUE);
2612 ut_ad(!buf_pool_check_no_pending_io());
2613
2614 RECOVERY_CRASH(3);
2615
2616 /* Stamp the LSN to the data files. */
2617 fil_write_flushed_lsn_to_data_files(
2618 max_flushed_lsn, 0);
2619
2620 fil_flush_file_spaces(FIL_TABLESPACE);
2621
2622 RECOVERY_CRASH(4);
2623
2624 /* Close and free the redo log files, so that
2625 we can replace them. */
2626 fil_close_log_files(true);
2627
2628 RECOVERY_CRASH(5);
2629
2630 /* Free the old log file space. */
2631 log_group_close_all();
2632
2633 ib_logf(IB_LOG_LEVEL_WARN,
2634 "Starting to delete and rewrite log files.");
2635
2636 srv_log_file_size = srv_log_file_size_requested;
2637
2638 err = create_log_files(create_new_db, logfilename,
2639 dirnamelen, max_flushed_lsn,
2640 logfile0);
2641
2642 if (err != DB_SUCCESS) {
2643 return(err);
2644 }
2645
2646 /* create_log_files() can increase system lsn that is
2647 why FIL_PAGE_FILE_FLUSH_LSN have to be updated */
2648 min_flushed_lsn = max_flushed_lsn = log_get_lsn();
2649 fil_write_flushed_lsn_to_data_files(min_flushed_lsn, 0);
2650 fil_flush_file_spaces(FIL_TABLESPACE);
2651
2652 create_log_files_rename(logfilename, dirnamelen,
2653 log_get_lsn(), logfile0);
2654 }
2655
2656 srv_startup_is_before_trx_rollback_phase = FALSE;
2657 recv_recovery_rollback_active();
2658
2659 /* It is possible that file_format tag has never
2660 been set. In this case we initialize it to minimum
2661 value. Important to note that we can do it ONLY after
2662 we have finished the recovery process so that the
2663 image of TRX_SYS_PAGE_NO is not stale. */
2664 trx_sys_file_format_tag_init();
2665 }
2666
2667 if (!create_new_db && sum_of_new_sizes > 0) {
2668 /* New data file(s) were added */
2669 mtr_start(&mtr);
2670
2671 fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
2672
2673 mtr_commit(&mtr);
2674
2675 /* Immediately write the log record about increased tablespace
2676 size to disk, so that it is durable even if mysqld would crash
2677 quickly */
2678
2679 log_buffer_flush_to_disk();
2680 }
2681
2682 #ifdef UNIV_LOG_ARCHIVE
2683 if (!srv_read_only_mode) {
2684 if (!srv_log_archive_on) {
2685 ut_a(DB_SUCCESS == log_archive_noarchivelog());
2686 } else {
2687 bool start_archive;
2688
2689 mutex_enter(&(log_sys->mutex));
2690
2691 start_archive = false;
2692
2693 if (log_sys->archiving_state == LOG_ARCH_OFF) {
2694 start_archive = true;
2695 }
2696
2697 mutex_exit(&(log_sys->mutex));
2698
2699 if (start_archive) {
2700 ut_a(DB_SUCCESS == log_archive_archivelog());
2701 }
2702 }
2703 }
2704 #endif /* UNIV_LOG_ARCHIVE */
2705
2706 /* fprintf(stderr, "Max allowed record size %lu\n",
2707 page_get_free_space_of_empty() / 2); */
2708
2709 if (buf_dblwr == NULL) {
2710 /* Create the doublewrite buffer to a new tablespace */
2711
2712 buf_dblwr_create();
2713 }
2714
2715 /* Here the double write buffer has already been created and so
2716 any new rollback segments will be allocated after the double
2717 write buffer. The default segment should already exist.
2718 We create the new segments only if it's a new database or
2719 the database was shutdown cleanly. */
2720
2721 /* Note: When creating the extra rollback segments during an upgrade
2722 we violate the latching order, even if the change buffer is empty.
2723 We make an exception in sync0sync.cc and check srv_is_being_started
2724 for that violation. It cannot create a deadlock because we are still
2725 running in single threaded mode essentially. Only the IO threads
2726 should be running at this stage. */
2727
2728 ut_a(srv_undo_logs > 0);
2729 ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS);
2730
2731 /* The number of rsegs that exist in InnoDB is given by status
2732 variable srv_available_undo_logs. The number of rsegs to use can
2733 be set using the dynamic global variable srv_undo_logs. */
2734
2735 srv_available_undo_logs = trx_sys_create_rsegs(
2736 srv_undo_tablespaces, srv_undo_logs);
2737
2738 if (srv_available_undo_logs == ULINT_UNDEFINED) {
2739 /* Can only happen if server is read only. */
2740 ut_a(srv_read_only_mode);
2741 srv_undo_logs = ULONG_UNDEFINED;
2742 }
2743
2744 if (!srv_read_only_mode) {
2745 /* Create the thread which watches the timeouts
2746 for lock waits */
2747 os_thread_create(
2748 lock_wait_timeout_thread,
2749 NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
2750
2751 /* Create the thread which warns of long semaphore waits */
2752 os_thread_create(
2753 srv_error_monitor_thread,
2754 NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
2755
2756 /* Create the thread which prints InnoDB monitor info */
2757 if (!srv_monitor_thread_started) {
2758
2759 os_thread_create(
2760 srv_monitor_thread,
2761 NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
2762
2763 srv_monitor_thread_started = true;
2764 }
2765 }
2766
2767 /* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
2768 err = dict_create_or_check_foreign_constraint_tables();
2769 if (err != DB_SUCCESS) {
2770 return(err);
2771 }
2772
2773 /* Create the SYS_TABLESPACES system table */
2774 err = dict_create_or_check_sys_tablespace();
2775 if (err != DB_SUCCESS) {
2776 return(err);
2777 }
2778
2779 /* Create the SYS_ZIP_DICT system table */
2780 err = dict_create_or_check_sys_zip_dict();
2781 if (err != DB_SUCCESS) {
2782 return(err);
2783 }
2784
2785 srv_is_being_started = FALSE;
2786
2787 ut_a(trx_purge_state() == PURGE_STATE_INIT);
2788
2789 /* Create the master thread which does purge and other utility
2790 operations */
2791
2792 if (!srv_read_only_mode) {
2793
2794 os_thread_create(
2795 srv_master_thread,
2796 NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
2797 }
2798
2799 if (!srv_read_only_mode
2800 && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
2801
2802 os_thread_create(
2803 srv_purge_coordinator_thread,
2804 NULL, thread_ids + 6 + SRV_MAX_N_IO_THREADS);
2805
2806 ut_a(UT_ARR_SIZE(thread_ids)
2807 > 6 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS);
2808
2809 /* We've already created the purge coordinator thread above. */
2810 for (i = 1; i < srv_n_purge_threads; ++i) {
2811 os_thread_create(
2812 srv_worker_thread, NULL,
2813 thread_ids + 6 + i + SRV_MAX_N_IO_THREADS);
2814 }
2815
2816 srv_start_wait_for_purge_to_start();
2817
2818 } else {
2819 purge_sys->state = PURGE_STATE_DISABLED;
2820 }
2821
2822 if (!srv_read_only_mode) {
2823 os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
2824 }
2825 os_thread_create(buf_flush_lru_manager_thread, NULL, NULL);
2826
2827 #ifdef UNIV_DEBUG
2828 /* buf_debug_prints = TRUE; */
2829 #endif /* UNIV_DEBUG */
2830 sum_of_data_file_sizes = 0;
2831
2832 for (i = 0; i < srv_n_data_files; i++) {
2833 sum_of_data_file_sizes += srv_data_file_sizes[i];
2834 }
2835
2836 tablespace_size_in_header = fsp_header_get_tablespace_size();
2837
2838 if (!srv_read_only_mode
2839 && !srv_auto_extend_last_data_file
2840 && sum_of_data_file_sizes != tablespace_size_in_header) {
2841
2842 ut_print_timestamp(stderr);
2843 fprintf(stderr,
2844 " InnoDB: Error: tablespace size"
2845 " stored in header is %lu pages, but\n",
2846 (ulong) tablespace_size_in_header);
2847 ut_print_timestamp(stderr);
2848 fprintf(stderr,
2849 "InnoDB: the sum of data file sizes is %lu pages\n",
2850 (ulong) sum_of_data_file_sizes);
2851
2852 if (srv_force_recovery == 0
2853 && sum_of_data_file_sizes < tablespace_size_in_header) {
2854 /* This is a fatal error, the tail of a tablespace is
2855 missing */
2856
2857 ut_print_timestamp(stderr);
2858 fprintf(stderr,
2859 " InnoDB: Cannot start InnoDB."
2860 " The tail of the system tablespace is\n");
2861 ut_print_timestamp(stderr);
2862 fprintf(stderr,
2863 " InnoDB: missing. Have you edited"
2864 " innodb_data_file_path in my.cnf in an\n");
2865 ut_print_timestamp(stderr);
2866 fprintf(stderr,
2867 " InnoDB: inappropriate way, removing"
2868 " ibdata files from there?\n");
2869 ut_print_timestamp(stderr);
2870 fprintf(stderr,
2871 " InnoDB: You can set innodb_force_recovery=1"
2872 " in my.cnf to force\n");
2873 ut_print_timestamp(stderr);
2874 fprintf(stderr,
2875 " InnoDB: a startup if you are trying"
2876 " to recover a badly corrupt database.\n");
2877
2878 return(DB_ERROR);
2879 }
2880 }
2881
2882 if (!srv_read_only_mode
2883 && srv_auto_extend_last_data_file
2884 && sum_of_data_file_sizes < tablespace_size_in_header) {
2885
2886 ut_print_timestamp(stderr);
2887 fprintf(stderr,
2888 " InnoDB: Error: tablespace size stored in header"
2889 " is %lu pages, but\n",
2890 (ulong) tablespace_size_in_header);
2891 ut_print_timestamp(stderr);
2892 fprintf(stderr,
2893 " InnoDB: the sum of data file sizes"
2894 " is only %lu pages\n",
2895 (ulong) sum_of_data_file_sizes);
2896
2897 if (srv_force_recovery == 0) {
2898
2899 ut_print_timestamp(stderr);
2900 fprintf(stderr,
2901 " InnoDB: Cannot start InnoDB. The tail of"
2902 " the system tablespace is\n");
2903 ut_print_timestamp(stderr);
2904 fprintf(stderr,
2905 " InnoDB: missing. Have you edited"
2906 " innodb_data_file_path in my.cnf in an\n");
2907 ut_print_timestamp(stderr);
2908 fprintf(stderr,
2909 " InnoDB: inappropriate way, removing"
2910 " ibdata files from there?\n");
2911 ut_print_timestamp(stderr);
2912 fprintf(stderr,
2913 " InnoDB: You can set innodb_force_recovery=1"
2914 " in my.cnf to force\n");
2915 ut_print_timestamp(stderr);
2916 fprintf(stderr,
2917 " InnoDB: a startup if you are trying to"
2918 " recover a badly corrupt database.\n");
2919
2920 return(DB_ERROR);
2921 }
2922 }
2923
2924 /* Check that os_fast_mutexes work as expected */
2925 os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &srv_os_test_mutex);
2926
2927 if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) {
2928 ut_print_timestamp(stderr);
2929 fprintf(stderr,
2930 " InnoDB: Error: pthread_mutex_trylock returns"
2931 " an unexpected value on\n");
2932 ut_print_timestamp(stderr);
2933 fprintf(stderr,
2934 " InnoDB: success! Cannot continue.\n");
2935 exit(1);
2936 }
2937
2938 os_fast_mutex_unlock(&srv_os_test_mutex);
2939
2940 os_fast_mutex_lock(&srv_os_test_mutex);
2941
2942 os_fast_mutex_unlock(&srv_os_test_mutex);
2943
2944 os_fast_mutex_free(&srv_os_test_mutex);
2945
2946 if (!srv_file_per_table && srv_pass_corrupt_table) {
2947 fprintf(stderr, "InnoDB: Warning:"
2948 " The option innodb_file_per_table is disabled,"
2949 " so using the option innodb_pass_corrupt_table doesn't make sense.\n");
2950 }
2951
2952 if (srv_print_verbose_log) {
2953 ib_logf(IB_LOG_LEVEL_INFO,
2954 " Percona XtraDB (http://www.percona.com) %s started; "
2955 "log sequence number " LSN_PF "",
2956 INNODB_VERSION_STR, srv_start_lsn);
2957 }
2958
2959 if (srv_force_recovery > 0) {
2960 ib_logf(IB_LOG_LEVEL_INFO,
2961 "!!! innodb_force_recovery is set to %lu !!!",
2962 (ulong) srv_force_recovery);
2963 }
2964
2965 if (srv_force_recovery == 0) {
2966 /* In the insert buffer we may have even bigger tablespace
2967 id's, because we may have dropped those tablespaces, but
2968 insert buffer merge has not had time to clean the records from
2969 the ibuf tree. */
2970
2971 ibuf_update_max_tablespace_id();
2972 }
2973
2974 if (!srv_read_only_mode) {
2975 /* Create the buffer pool dump/load thread */
2976 os_thread_create(buf_dump_thread, NULL, NULL);
2977
2978 /* Create the dict stats gathering thread */
2979 os_thread_create(dict_stats_thread, NULL, NULL);
2980
2981 /* Create the thread that will optimize the FTS sub-system. */
2982 fts_optimize_init();
2983 }
2984
2985 srv_was_started = TRUE;
2986
2987 return(DB_SUCCESS);
2988 }
2989
2990 #if 0
2991 /********************************************************************
2992 Sync all FTS cache before shutdown */
2993 static
2994 void
2995 srv_fts_close(void)
2996 /*===============*/
2997 {
2998 dict_table_t* table;
2999
3000 for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3001 table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
3002 fts_t* fts = table->fts;
3003
3004 if (fts != NULL) {
3005 fts_sync_table(table);
3006 }
3007 }
3008
3009 for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
3010 table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
3011 fts_t* fts = table->fts;
3012
3013 if (fts != NULL) {
3014 fts_sync_table(table);
3015 }
3016 }
3017 }
3018 #endif
3019
3020 /****************************************************************//**
3021 Shuts down the InnoDB database.
3022 @return DB_SUCCESS or error code */
3023 UNIV_INTERN
3024 dberr_t
innobase_shutdown_for_mysql(void)3025 innobase_shutdown_for_mysql(void)
3026 /*=============================*/
3027 {
3028 ulint i;
3029
3030 if (!srv_was_started) {
3031 if (srv_is_being_started) {
3032 ib_logf(IB_LOG_LEVEL_WARN,
3033 "Shutting down an improperly started, "
3034 "or created database!");
3035 }
3036
3037 return(DB_SUCCESS);
3038 }
3039
3040 if (!srv_read_only_mode) {
3041 /* Shutdown the FTS optimize sub system. */
3042 fts_optimize_start_shutdown();
3043
3044 fts_optimize_end();
3045 }
3046
3047 /* 1. Flush the buffer pool to disk, write the current lsn to
3048 the tablespace header(s), and copy all log data to archive.
3049 The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
3050 just free data structures after the shutdown. */
3051
3052 logs_empty_and_mark_files_at_shutdown();
3053
3054 if (srv_conc_get_active_threads() != 0) {
3055 ib_logf(IB_LOG_LEVEL_WARN,
3056 "Query counter shows %ld queries still "
3057 "inside InnoDB at shutdown",
3058 srv_conc_get_active_threads());
3059 }
3060
3061 /* 2. Make all threads created by InnoDB to exit */
3062
3063 srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
3064
3065 /* All threads end up waiting for certain events. Put those events
3066 to the signaled state. Then the threads will exit themselves after
3067 os_event_wait(). */
3068
3069 for (i = 0; i < 1000; i++) {
3070 /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
3071 HERE OR EARLIER */
3072
3073 if (!srv_read_only_mode) {
3074 /* a. Let the lock timeout thread exit */
3075 os_event_set(lock_sys->timeout_event);
3076
3077 /* b. srv error monitor thread exits automatically,
3078 no need to do anything here */
3079
3080 /* c. We wake the master thread so that it exits */
3081 srv_wake_master_thread();
3082
3083 /* d. Wakeup purge threads. */
3084 srv_purge_wakeup();
3085 }
3086
3087 /* e. Exit the i/o threads */
3088
3089 os_aio_wake_all_threads_at_shutdown();
3090
3091 /* f. dict_stats_thread is signaled from
3092 logs_empty_and_mark_files_at_shutdown() and should have
3093 already quit or is quitting right now. */
3094
3095 os_rmb;
3096 if (os_thread_count == 0) {
3097 /* All the threads have exited or are just exiting;
3098 NOTE that the threads may not have completed their
3099 exit yet. Should we use pthread_join() to make sure
3100 they have exited? If we did, we would have to
3101 remove the pthread_detach() from
3102 os_thread_exit(). Now we just sleep 0.1
3103 seconds and hope that is enough! */
3104
3105 os_thread_sleep(100000);
3106
3107 break;
3108 }
3109
3110 os_thread_sleep(100000);
3111 }
3112
3113 if (i == 1000) {
3114 ib_logf(IB_LOG_LEVEL_WARN,
3115 "%lu threads created by InnoDB"
3116 " had not exited at shutdown!",
3117 (ulong) os_thread_count);
3118 }
3119
3120 if (srv_monitor_file) {
3121 fclose(srv_monitor_file);
3122 srv_monitor_file = 0;
3123 if (srv_monitor_file_name) {
3124 unlink(srv_monitor_file_name);
3125 mem_free(srv_monitor_file_name);
3126 }
3127 }
3128
3129 if (srv_dict_tmpfile) {
3130 fclose(srv_dict_tmpfile);
3131 srv_dict_tmpfile = 0;
3132 }
3133
3134 if (srv_misc_tmpfile) {
3135 fclose(srv_misc_tmpfile);
3136 srv_misc_tmpfile = 0;
3137 }
3138
3139 if (!srv_read_only_mode) {
3140 dict_stats_thread_deinit();
3141 }
3142
3143 /* This must be disabled before closing the buffer pool
3144 and closing the data dictionary. */
3145 btr_search_disable();
3146
3147 ibuf_close();
3148 log_online_shutdown();
3149 log_shutdown();
3150 trx_sys_file_format_close();
3151 trx_sys_close();
3152 lock_sys_close();
3153
3154 /* We don't create these mutexes in RO mode because we don't create
3155 the temp files that the cover. */
3156 if (!srv_read_only_mode) {
3157 mutex_free(&srv_monitor_file_mutex);
3158 mutex_free(&srv_dict_tmpfile_mutex);
3159 mutex_free(&srv_misc_tmpfile_mutex);
3160 }
3161
3162 dict_close();
3163 btr_search_sys_free();
3164
3165 /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
3166 them */
3167 os_aio_free();
3168 que_close();
3169 row_mysql_close();
3170 srv_mon_free();
3171 srv_free();
3172 fil_close();
3173
3174 /* 4. Free all allocated memory */
3175
3176 pars_lexer_close();
3177 log_mem_free();
3178 buf_pool_free(srv_buf_pool_instances);
3179 mem_close();
3180 sync_close();
3181
3182 /* ut_free_all_mem() frees all allocated memory not freed yet
3183 in shutdown, and it will also free the ut_list_mutex, so it
3184 should be the last one for all operation */
3185 ut_free_all_mem();
3186
3187 os_rmb;
3188 if (os_thread_count != 0
3189 || os_event_count != 0
3190 || os_mutex_count != 0
3191 || os_fast_mutex_count != 0) {
3192 ib_logf(IB_LOG_LEVEL_WARN,
3193 "Some resources were not cleaned up in shutdown: "
3194 "threads %lu, events %lu, os_mutexes %lu, "
3195 "os_fast_mutexes %lu",
3196 (ulong) os_thread_count, (ulong) os_event_count,
3197 (ulong) os_mutex_count, (ulong) os_fast_mutex_count);
3198 }
3199
3200 if (dict_foreign_err_file) {
3201 fclose(dict_foreign_err_file);
3202 }
3203
3204 if (srv_print_verbose_log) {
3205 ib_logf(IB_LOG_LEVEL_INFO,
3206 "Shutdown completed; log sequence number " LSN_PF "",
3207 srv_shutdown_lsn);
3208 }
3209
3210 srv_was_started = FALSE;
3211 srv_start_has_been_called = FALSE;
3212
3213 return(DB_SUCCESS);
3214 }
3215 #endif /* !UNIV_HOTBACKUP */
3216
3217
3218 /********************************************************************
3219 Signal all per-table background threads to shutdown, and wait for them to do
3220 so. */
3221 UNIV_INTERN
3222 void
srv_shutdown_table_bg_threads(void)3223 srv_shutdown_table_bg_threads(void)
3224 /*===============================*/
3225 {
3226 dict_table_t* table;
3227 dict_table_t* first;
3228 dict_table_t* last = NULL;
3229
3230 mutex_enter(&dict_sys->mutex);
3231
3232 /* Signal all threads that they should stop. */
3233 table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3234 first = table;
3235 while (table) {
3236 dict_table_t* next;
3237 fts_t* fts = table->fts;
3238
3239 if (fts != NULL) {
3240 fts_start_shutdown(table, fts);
3241 }
3242
3243 next = UT_LIST_GET_NEXT(table_LRU, table);
3244
3245 if (!next) {
3246 last = table;
3247 }
3248
3249 table = next;
3250 }
3251
3252 /* We must release dict_sys->mutex here; if we hold on to it in the
3253 loop below, we will deadlock if any of the background threads try to
3254 acquire it (for example, the FTS thread by calling que_eval_sql).
3255
3256 Releasing it here and going through dict_sys->table_LRU without
3257 holding it is safe because:
3258
3259 a) MySQL only starts the shutdown procedure after all client
3260 threads have been disconnected and no new ones are accepted, so no
3261 new tables are added or old ones dropped.
3262
3263 b) Despite its name, the list is not LRU, and the order stays
3264 fixed.
3265
3266 To safeguard against the above assumptions ever changing, we store
3267 the first and last items in the list above, and then check that
3268 they've stayed the same below. */
3269
3270 mutex_exit(&dict_sys->mutex);
3271
3272 /* Wait for the threads of each table to stop. This is not inside
3273 the above loop, because by signaling all the threads first we can
3274 overlap their shutting down delays. */
3275 table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
3276 ut_a(first == table);
3277 while (table) {
3278 dict_table_t* next;
3279 fts_t* fts = table->fts;
3280
3281 if (fts != NULL) {
3282 fts_shutdown(table, fts);
3283 }
3284
3285 next = UT_LIST_GET_NEXT(table_LRU, table);
3286
3287 if (table == last) {
3288 ut_a(!next);
3289 }
3290
3291 table = next;
3292 }
3293 }
3294
3295 /*****************************************************************//**
3296 Get the meta-data filename from the table name. */
3297 UNIV_INTERN
3298 void
srv_get_meta_data_filename(dict_table_t * table,char * filename,ulint max_len)3299 srv_get_meta_data_filename(
3300 /*=======================*/
3301 dict_table_t* table, /*!< in: table */
3302 char* filename, /*!< out: filename */
3303 ulint max_len) /*!< in: filename max length */
3304 {
3305 ulint len;
3306 char* path;
3307 char* suffix;
3308 static const ulint suffix_len = strlen(".cfg");
3309
3310 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
3311 dict_get_and_save_data_dir_path(table, false);
3312 ut_a(table->data_dir_path);
3313
3314 path = os_file_make_remote_pathname(
3315 table->data_dir_path, table->name, "cfg");
3316 } else {
3317 path = fil_make_ibd_name(table->name, false);
3318 }
3319
3320 ut_a(path);
3321 len = ut_strlen(path);
3322 ut_a(max_len >= len);
3323
3324 suffix = path + (len - suffix_len);
3325 if (strncmp(suffix, ".cfg", suffix_len) == 0) {
3326 strcpy(filename, path);
3327 } else {
3328 ut_ad(strncmp(suffix, ".ibd", suffix_len) == 0);
3329
3330 strncpy(filename, path, max_len - suffix_len);
3331 suffix = filename + (len - suffix_len);
3332 strcpy(suffix, ".cfg");
3333 }
3334
3335 mem_free(path);
3336
3337 srv_normalize_path_for_win(filename);
3338 }
3339