1 /*****************************************************************************
2
3 Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file trx/trx0sys.cc
29 Transaction system
30
31 Created 3/26/1996 Heikki Tuuri
32 *******************************************************/
33
34 #include "trx0sys.h"
35
36 #ifdef UNIV_NONINL
37 #include "trx0sys.ic"
38 #endif
39
40 #ifdef UNIV_HOTBACKUP
41 #include "fsp0types.h"
42
43 #else /* !UNIV_HOTBACKUP */
44 #include "fsp0fsp.h"
45 #include "mtr0log.h"
46 #include "mtr0log.h"
47 #include "trx0trx.h"
48 #include "trx0rseg.h"
49 #include "trx0undo.h"
50 #include "srv0srv.h"
51 #include "srv0start.h"
52 #include "trx0purge.h"
53 #include "log0log.h"
54 #include "log0recv.h"
55 #include "os0file.h"
56 #include "read0read.h"
57
58 /** The file format tag structure with id and name. */
59 struct file_format_t {
60 ulint id; /*!< id of the file format */
61 const char* name; /*!< text representation of the
62 file format */
63 ib_mutex_t mutex; /*!< covers changes to the above
64 fields */
65 };
66
67 /** The transaction system */
68 UNIV_INTERN trx_sys_t* trx_sys = NULL;
69
70 /** In a MySQL replication slave, in crash recovery we store the master log
71 file name and position here. */
72 /* @{ */
73 /** Master binlog file name */
74 UNIV_INTERN char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
75 /** Master binlog file position. We have successfully got the updates
76 up to this position. -1 means that no crash recovery was needed, or
77 there was no master log position info inside InnoDB.*/
78 UNIV_INTERN ib_int64_t trx_sys_mysql_master_log_pos = -1;
79 /* @} */
80
81 /** If this MySQL server uses binary logging, after InnoDB has been inited
82 and if it has done a crash recovery, we store the binlog file name and position
83 here. */
84 /* @{ */
85 /** Binlog file name */
86 UNIV_INTERN char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
87 /** Binlog file position, or -1 if unknown */
88 UNIV_INTERN ib_int64_t trx_sys_mysql_bin_log_pos = -1;
89 /* @} */
90 #endif /* !UNIV_HOTBACKUP */
91
92 /** List of animal names representing file format. */
93 static const char* file_format_name_map[] = {
94 "Antelope",
95 "Barracuda",
96 "Cheetah",
97 "Dragon",
98 "Elk",
99 "Fox",
100 "Gazelle",
101 "Hornet",
102 "Impala",
103 "Jaguar",
104 "Kangaroo",
105 "Leopard",
106 "Moose",
107 "Nautilus",
108 "Ocelot",
109 "Porpoise",
110 "Quail",
111 "Rabbit",
112 "Shark",
113 "Tiger",
114 "Urchin",
115 "Viper",
116 "Whale",
117 "Xenops",
118 "Yak",
119 "Zebra"
120 };
121
122 /** The number of elements in the file format name array. */
123 static const ulint FILE_FORMAT_NAME_N
124 = sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
125
126 #ifdef UNIV_PFS_MUTEX
127 /* Key to register the mutex with performance schema */
128 UNIV_INTERN mysql_pfs_key_t file_format_max_mutex_key;
129 UNIV_INTERN mysql_pfs_key_t trx_sys_mutex_key;
130 #endif /* UNIV_PFS_RWLOCK */
131
132 #ifndef UNIV_HOTBACKUP
133 #ifdef UNIV_DEBUG
134 /* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
135 UNIV_INTERN uint trx_rseg_n_slots_debug = 0;
136 #endif
137
138 /** This is used to track the maximum file format id known to InnoDB. It's
139 updated via SET GLOBAL innodb_file_format_max = 'x' or when we open
140 or create a table. */
141 static file_format_t file_format_max;
142
143 #ifdef UNIV_DEBUG
144 /****************************************************************//**
145 Checks whether a trx is in one of rw_trx_list or ro_trx_list.
146 @return TRUE if is in */
147 UNIV_INTERN
148 ibool
trx_in_trx_list(const trx_t * in_trx)149 trx_in_trx_list(
150 /*============*/
151 const trx_t* in_trx) /*!< in: transaction */
152 {
153 const trx_t* trx;
154 trx_list_t* trx_list;
155
156 /* Non-locking autocommits should not hold any locks. */
157 assert_trx_in_list(in_trx);
158
159 trx_list = in_trx->read_only
160 ? &trx_sys->ro_trx_list : &trx_sys->rw_trx_list;
161
162 ut_ad(mutex_own(&trx_sys->mutex));
163
164 ut_ad(trx_assert_started(in_trx));
165
166 for (trx = UT_LIST_GET_FIRST(*trx_list);
167 trx != NULL && trx != in_trx;
168 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
169
170 assert_trx_in_list(trx);
171 ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
172 }
173
174 return(trx != NULL);
175 }
176 #endif /* UNIV_DEBUG */
177
178 /*****************************************************************//**
179 Writes the value of max_trx_id to the file based trx system header. */
180 UNIV_INTERN
181 void
trx_sys_flush_max_trx_id(void)182 trx_sys_flush_max_trx_id(void)
183 /*==========================*/
184 {
185 mtr_t mtr;
186 trx_sysf_t* sys_header;
187
188 ut_ad(mutex_own(&trx_sys->mutex));
189
190 if (!srv_read_only_mode) {
191 mtr_start(&mtr);
192
193 sys_header = trx_sysf_get(&mtr);
194
195 mlog_write_ull(
196 sys_header + TRX_SYS_TRX_ID_STORE,
197 trx_sys->max_trx_id, &mtr);
198
199 mtr_commit(&mtr);
200 }
201 }
202
203 /*****************************************************************//**
204 Updates the offset information about the end of the MySQL binlog entry
205 which corresponds to the transaction just being committed. In a MySQL
206 replication slave updates the latest master binlog position up to which
207 replication has proceeded. */
208 UNIV_INTERN
209 void
trx_sys_update_mysql_binlog_offset(const char * file_name,ib_int64_t offset,ulint field,mtr_t * mtr)210 trx_sys_update_mysql_binlog_offset(
211 /*===============================*/
212 const char* file_name,/*!< in: MySQL log file name */
213 ib_int64_t offset, /*!< in: position in that log file */
214 ulint field, /*!< in: offset of the MySQL log info field in
215 the trx sys header */
216 mtr_t* mtr) /*!< in: mtr */
217 {
218 trx_sysf_t* sys_header;
219
220 if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
221
222 /* We cannot fit the name to the 512 bytes we have reserved */
223
224 return;
225 }
226
227 sys_header = trx_sysf_get(mtr);
228
229 if (mach_read_from_4(sys_header + field
230 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
231 != TRX_SYS_MYSQL_LOG_MAGIC_N) {
232
233 mlog_write_ulint(sys_header + field
234 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
235 TRX_SYS_MYSQL_LOG_MAGIC_N,
236 MLOG_4BYTES, mtr);
237 }
238
239 if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
240 file_name)) {
241
242 mlog_write_string(sys_header + field
243 + TRX_SYS_MYSQL_LOG_NAME,
244 (byte*) file_name, 1 + ut_strlen(file_name),
245 mtr);
246 }
247
248 if (mach_read_from_4(sys_header + field
249 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
250 || (offset >> 32) > 0) {
251
252 mlog_write_ulint(sys_header + field
253 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
254 (ulint)(offset >> 32),
255 MLOG_4BYTES, mtr);
256 }
257
258 mlog_write_ulint(sys_header + field
259 + TRX_SYS_MYSQL_LOG_OFFSET_LOW,
260 (ulint)(offset & 0xFFFFFFFFUL),
261 MLOG_4BYTES, mtr);
262 }
263
264 /*****************************************************************//**
265 Stores the MySQL binlog offset info in the trx system header if
266 the magic number shows it valid, and print the info to stderr */
267 UNIV_INTERN
268 void
trx_sys_print_mysql_binlog_offset(void)269 trx_sys_print_mysql_binlog_offset(void)
270 /*===================================*/
271 {
272 trx_sysf_t* sys_header;
273 mtr_t mtr;
274 ulint trx_sys_mysql_bin_log_pos_high;
275 ulint trx_sys_mysql_bin_log_pos_low;
276
277 mtr_start(&mtr);
278
279 sys_header = trx_sysf_get(&mtr);
280
281 if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
282 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
283 != TRX_SYS_MYSQL_LOG_MAGIC_N) {
284
285 mtr_commit(&mtr);
286
287 return;
288 }
289
290 trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
291 sys_header + TRX_SYS_MYSQL_LOG_INFO
292 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
293 trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
294 sys_header + TRX_SYS_MYSQL_LOG_INFO
295 + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
296
297 trx_sys_mysql_bin_log_pos
298 = (((ib_int64_t) trx_sys_mysql_bin_log_pos_high) << 32)
299 + (ib_int64_t) trx_sys_mysql_bin_log_pos_low;
300
301 ut_memcpy(trx_sys_mysql_bin_log_name,
302 sys_header + TRX_SYS_MYSQL_LOG_INFO
303 + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
304
305 fprintf(stderr,
306 "InnoDB: Last MySQL binlog file position %lu %lu,"
307 " file name %s\n",
308 trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
309 trx_sys_mysql_bin_log_name);
310
311 mtr_commit(&mtr);
312 }
313
314 /*****************************************************************//**
315 Prints to stderr the MySQL master log offset info in the trx system header if
316 the magic number shows it valid. */
317 UNIV_INTERN
318 void
trx_sys_print_mysql_master_log_pos(void)319 trx_sys_print_mysql_master_log_pos(void)
320 /*====================================*/
321 {
322 trx_sysf_t* sys_header;
323 mtr_t mtr;
324
325 mtr_start(&mtr);
326
327 sys_header = trx_sysf_get(&mtr);
328
329 if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
330 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
331 != TRX_SYS_MYSQL_LOG_MAGIC_N) {
332
333 mtr_commit(&mtr);
334
335 return;
336 }
337
338 fprintf(stderr,
339 "InnoDB: In a MySQL replication slave the last"
340 " master binlog file\n"
341 "InnoDB: position %lu %lu, file name %s\n",
342 (ulong) mach_read_from_4(sys_header
343 + TRX_SYS_MYSQL_MASTER_LOG_INFO
344 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
345 (ulong) mach_read_from_4(sys_header
346 + TRX_SYS_MYSQL_MASTER_LOG_INFO
347 + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
348 sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
349 + TRX_SYS_MYSQL_LOG_NAME);
350 /* Copy the master log position info to global variables we can
351 use in ha_innobase.cc to initialize glob_mi to right values */
352
353 ut_memcpy(trx_sys_mysql_master_log_name,
354 sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
355 + TRX_SYS_MYSQL_LOG_NAME,
356 TRX_SYS_MYSQL_LOG_NAME_LEN);
357
358 trx_sys_mysql_master_log_pos
359 = (((ib_int64_t) mach_read_from_4(
360 sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
361 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
362 + ((ib_int64_t) mach_read_from_4(
363 sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
364 + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
365 mtr_commit(&mtr);
366 }
367
368 /****************************************************************//**
369 Looks for a free slot for a rollback segment in the trx system file copy.
370 @return slot index or ULINT_UNDEFINED if not found */
371 UNIV_INTERN
372 ulint
trx_sysf_rseg_find_free(mtr_t * mtr)373 trx_sysf_rseg_find_free(
374 /*====================*/
375 mtr_t* mtr) /*!< in: mtr */
376 {
377 ulint i;
378 trx_sysf_t* sys_header;
379
380 sys_header = trx_sysf_get(mtr);
381
382 for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
383 ulint page_no;
384
385 page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
386
387 if (page_no == FIL_NULL) {
388
389 return(i);
390 }
391 }
392
393 return(ULINT_UNDEFINED);
394 }
395
396 /*****************************************************************//**
397 Creates the file page for the transaction system. This function is called only
398 at the database creation, before trx_sys_init. */
399 static
400 void
trx_sysf_create(mtr_t * mtr)401 trx_sysf_create(
402 /*============*/
403 mtr_t* mtr) /*!< in: mtr */
404 {
405 trx_sysf_t* sys_header;
406 ulint slot_no;
407 buf_block_t* block;
408 page_t* page;
409 ulint page_no;
410 byte* ptr;
411 ulint len;
412
413 ut_ad(mtr);
414
415 /* Note that below we first reserve the file space x-latch, and
416 then enter the kernel: we must do it in this order to conform
417 to the latching order rules. */
418
419 mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
420
421 /* Create the trx sys file block in a new allocated file segment */
422 block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
423 mtr);
424 buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
425
426 ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
427
428 page = buf_block_get_frame(block);
429
430 mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
431 MLOG_2BYTES, mtr);
432
433 /* Reset the doublewrite buffer magic number to zero so that we
434 know that the doublewrite buffer has not yet been created (this
435 suppresses a Valgrind warning) */
436
437 mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
438 + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
439
440 sys_header = trx_sysf_get(mtr);
441
442 /* Start counting transaction ids from number 1 up */
443 mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1);
444
445 /* Reset the rollback segment slots. Old versions of InnoDB
446 define TRX_SYS_N_RSEGS as 256 (TRX_SYS_OLD_N_RSEGS) and expect
447 that the whole array is initialized. */
448 ptr = TRX_SYS_RSEGS + sys_header;
449 len = ut_max(TRX_SYS_OLD_N_RSEGS, TRX_SYS_N_RSEGS)
450 * TRX_SYS_RSEG_SLOT_SIZE;
451 memset(ptr, 0xff, len);
452 ptr += len;
453 ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END));
454
455 /* Initialize all of the page. This part used to be uninitialized. */
456 memset(ptr, 0, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - ptr);
457
458 mlog_log_string(sys_header, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
459 + page - sys_header, mtr);
460
461 /* Create the first rollback segment in the SYSTEM tablespace */
462 slot_no = trx_sysf_rseg_find_free(mtr);
463 page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, slot_no,
464 mtr);
465
466 ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
467 ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO);
468 }
469
470 /*****************************************************************//**
471 Compare two trx_rseg_t instances on last_trx_no. */
472 static
473 int
trx_rseg_compare_last_trx_no(const void * p1,const void * p2)474 trx_rseg_compare_last_trx_no(
475 /*=========================*/
476 const void* p1, /*!< in: elem to compare */
477 const void* p2) /*!< in: elem to compare */
478 {
479 ib_int64_t cmp;
480
481 const rseg_queue_t* rseg_q1 = (const rseg_queue_t*) p1;
482 const rseg_queue_t* rseg_q2 = (const rseg_queue_t*) p2;
483
484 cmp = rseg_q1->trx_no - rseg_q2->trx_no;
485
486 if (cmp < 0) {
487 return(-1);
488 } else if (cmp > 0) {
489 return(1);
490 }
491
492 return(0);
493 }
494
495 /*****************************************************************//**
496 Creates and initializes the central memory structures for the transaction
497 system. This is called when the database is started.
498 @return min binary heap of rsegs to purge */
499 UNIV_INTERN
500 ib_bh_t*
trx_sys_init_at_db_start(void)501 trx_sys_init_at_db_start(void)
502 /*==========================*/
503 {
504 mtr_t mtr;
505 ib_bh_t* ib_bh;
506 trx_sysf_t* sys_header;
507 ib_uint64_t rows_to_undo = 0;
508 const char* unit = "";
509
510 /* We create the min binary heap here and pass ownership to
511 purge when we init the purge sub-system. Purge is responsible
512 for freeing the binary heap. */
513
514 ib_bh = ib_bh_create(
515 trx_rseg_compare_last_trx_no,
516 sizeof(rseg_queue_t), TRX_SYS_N_RSEGS);
517
518 mtr_start(&mtr);
519
520 sys_header = trx_sysf_get(&mtr);
521
522 if (srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
523 trx_rseg_array_init(sys_header, ib_bh, &mtr);
524 }
525
526 /* VERY important: after the database is started, max_trx_id value is
527 divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
528 trx_sys_get_new_trx_id will evaluate to TRUE when the function
529 is first time called, and the value for trx id will be written
530 to the disk-based header! Thus trx id values will not overlap when
531 the database is repeatedly started! */
532
533 trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN
534 + ut_uint64_align_up(mach_read_from_8(sys_header
535 + TRX_SYS_TRX_ID_STORE),
536 TRX_SYS_TRX_ID_WRITE_MARGIN);
537
538 ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id);
539
540 UT_LIST_INIT(trx_sys->mysql_trx_list);
541
542 trx_dummy_sess = sess_open();
543
544 trx_lists_init_at_db_start();
545
546 /* This S lock is not strictly required, it is here only to satisfy
547 the debug code (assertions). We are still running in single threaded
548 bootstrap mode. */
549
550 mutex_enter(&trx_sys->mutex);
551
552 ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
553
554 if (UT_LIST_GET_LEN(trx_sys->rw_trx_list) > 0) {
555 const trx_t* trx;
556
557 for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
558 trx != NULL;
559 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
560
561 ut_ad(trx->is_recovered);
562 assert_trx_in_rw_list(trx);
563
564 if (trx_state_eq(trx, TRX_STATE_ACTIVE)) {
565 rows_to_undo += trx->undo_no;
566 }
567 }
568
569 if (rows_to_undo > 1000000000) {
570 unit = "M";
571 rows_to_undo = rows_to_undo / 1000000;
572 }
573
574 fprintf(stderr,
575 "InnoDB: %lu transaction(s) which must be"
576 " rolled back or cleaned up\n"
577 "InnoDB: in total %lu%s row operations to undo\n",
578 (ulong) UT_LIST_GET_LEN(trx_sys->rw_trx_list),
579 (ulong) rows_to_undo, unit);
580
581 fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
582 trx_sys->max_trx_id);
583 }
584
585 mutex_exit(&trx_sys->mutex);
586
587 UT_LIST_INIT(trx_sys->view_list);
588
589 mtr_commit(&mtr);
590
591 return(ib_bh);
592 }
593
594 /*****************************************************************//**
595 Creates the trx_sys instance and initializes ib_bh and mutex. */
596 UNIV_INTERN
597 void
trx_sys_create(void)598 trx_sys_create(void)
599 /*================*/
600 {
601 ut_ad(trx_sys == NULL);
602
603 trx_sys = static_cast<trx_sys_t*>(mem_zalloc(sizeof(*trx_sys)));
604
605 mutex_create(trx_sys_mutex_key, &trx_sys->mutex, SYNC_TRX_SYS);
606 }
607
608 /*****************************************************************//**
609 Creates and initializes the transaction system at the database creation. */
610 UNIV_INTERN
611 void
trx_sys_create_sys_pages(void)612 trx_sys_create_sys_pages(void)
613 /*==========================*/
614 {
615 mtr_t mtr;
616
617 mtr_start(&mtr);
618
619 trx_sysf_create(&mtr);
620
621 mtr_commit(&mtr);
622 }
623
624 /*****************************************************************//**
625 Update the file format tag.
626 @return always TRUE */
627 static
628 ibool
trx_sys_file_format_max_write(ulint format_id,const char ** name)629 trx_sys_file_format_max_write(
630 /*==========================*/
631 ulint format_id, /*!< in: file format id */
632 const char** name) /*!< out: max file format name, can
633 be NULL */
634 {
635 mtr_t mtr;
636 byte* ptr;
637 buf_block_t* block;
638 ib_uint64_t tag_value;
639
640 mtr_start(&mtr);
641
642 block = buf_page_get(
643 TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
644
645 file_format_max.id = format_id;
646 file_format_max.name = trx_sys_file_format_id_to_name(format_id);
647
648 ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
649 tag_value = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
650
651 if (name) {
652 *name = file_format_max.name;
653 }
654
655 mlog_write_ull(ptr, tag_value, &mtr);
656
657 mtr_commit(&mtr);
658
659 return(TRUE);
660 }
661
662 /*****************************************************************//**
663 Read the file format tag.
664 @return the file format or ULINT_UNDEFINED if not set. */
665 static
666 ulint
trx_sys_file_format_max_read(void)667 trx_sys_file_format_max_read(void)
668 /*==============================*/
669 {
670 mtr_t mtr;
671 const byte* ptr;
672 const buf_block_t* block;
673 ib_id_t file_format_id;
674
675 /* Since this is called during the startup phase it's safe to
676 read the value without a covering mutex. */
677 mtr_start(&mtr);
678
679 block = buf_page_get(
680 TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
681
682 ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
683 file_format_id = mach_read_from_8(ptr);
684
685 mtr_commit(&mtr);
686
687 file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
688
689 if (file_format_id >= FILE_FORMAT_NAME_N) {
690
691 /* Either it has never been tagged, or garbage in it. */
692 return(ULINT_UNDEFINED);
693 }
694
695 return((ulint) file_format_id);
696 }
697
698 /*****************************************************************//**
699 Get the name representation of the file format from its id.
700 @return pointer to the name */
701 UNIV_INTERN
702 const char*
trx_sys_file_format_id_to_name(const ulint id)703 trx_sys_file_format_id_to_name(
704 /*===========================*/
705 const ulint id) /*!< in: id of the file format */
706 {
707 ut_a(id < FILE_FORMAT_NAME_N);
708
709 return(file_format_name_map[id]);
710 }
711
712 /*****************************************************************//**
713 Check for the max file format tag stored on disk. Note: If max_format_id
714 is == UNIV_FORMAT_MAX + 1 then we only print a warning.
715 @return DB_SUCCESS or error code */
716 UNIV_INTERN
717 dberr_t
trx_sys_file_format_max_check(ulint max_format_id)718 trx_sys_file_format_max_check(
719 /*==========================*/
720 ulint max_format_id) /*!< in: max format id to check */
721 {
722 ulint format_id;
723
724 /* Check the file format in the tablespace. Do not try to
725 recover if the file format is not supported by the engine
726 unless forced by the user. */
727 format_id = trx_sys_file_format_max_read();
728 if (format_id == ULINT_UNDEFINED) {
729 /* Format ID was not set. Set it to minimum possible
730 value. */
731 format_id = UNIV_FORMAT_MIN;
732 }
733
734 ib_logf(IB_LOG_LEVEL_INFO,
735 "Highest supported file format is %s.",
736 trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX));
737
738 if (format_id > UNIV_FORMAT_MAX) {
739
740 ut_a(format_id < FILE_FORMAT_NAME_N);
741
742 ib_logf(max_format_id <= UNIV_FORMAT_MAX
743 ? IB_LOG_LEVEL_ERROR : IB_LOG_LEVEL_WARN,
744 "The system tablespace is in a file "
745 "format that this version doesn't support - %s.",
746 trx_sys_file_format_id_to_name(format_id));
747
748 if (max_format_id <= UNIV_FORMAT_MAX) {
749 return(DB_ERROR);
750 }
751 }
752
753 format_id = (format_id > max_format_id) ? format_id : max_format_id;
754
755 /* We don't need a mutex here, as this function should only
756 be called once at start up. */
757 file_format_max.id = format_id;
758 file_format_max.name = trx_sys_file_format_id_to_name(format_id);
759
760 return(DB_SUCCESS);
761 }
762
763 /*****************************************************************//**
764 Set the file format id unconditionally except if it's already the
765 same value.
766 @return TRUE if value updated */
767 UNIV_INTERN
768 ibool
trx_sys_file_format_max_set(ulint format_id,const char ** name)769 trx_sys_file_format_max_set(
770 /*========================*/
771 ulint format_id, /*!< in: file format id */
772 const char** name) /*!< out: max file format name or
773 NULL if not needed. */
774 {
775 ibool ret = FALSE;
776
777 ut_a(format_id <= UNIV_FORMAT_MAX);
778
779 mutex_enter(&file_format_max.mutex);
780
781 /* Only update if not already same value. */
782 if (format_id != file_format_max.id) {
783
784 ret = trx_sys_file_format_max_write(format_id, name);
785 }
786
787 mutex_exit(&file_format_max.mutex);
788
789 return(ret);
790 }
791
792 /********************************************************************//**
793 Tags the system table space with minimum format id if it has not been
794 tagged yet.
795 WARNING: This function is only called during the startup and AFTER the
796 redo log application during recovery has finished. */
797 UNIV_INTERN
798 void
trx_sys_file_format_tag_init(void)799 trx_sys_file_format_tag_init(void)
800 /*==============================*/
801 {
802 ulint format_id;
803
804 format_id = trx_sys_file_format_max_read();
805
806 /* If format_id is not set then set it to the minimum. */
807 if (format_id == ULINT_UNDEFINED) {
808 trx_sys_file_format_max_set(UNIV_FORMAT_MIN, NULL);
809 }
810 }
811
812 /********************************************************************//**
813 Update the file format tag in the system tablespace only if the given
814 format id is greater than the known max id.
815 @return TRUE if format_id was bigger than the known max id */
816 UNIV_INTERN
817 ibool
trx_sys_file_format_max_upgrade(const char ** name,ulint format_id)818 trx_sys_file_format_max_upgrade(
819 /*============================*/
820 const char** name, /*!< out: max file format name */
821 ulint format_id) /*!< in: file format identifier */
822 {
823 ibool ret = FALSE;
824
825 ut_a(name);
826 ut_a(file_format_max.name != NULL);
827 ut_a(format_id <= UNIV_FORMAT_MAX);
828
829 mutex_enter(&file_format_max.mutex);
830
831 if (format_id > file_format_max.id) {
832
833 ret = trx_sys_file_format_max_write(format_id, name);
834 }
835
836 mutex_exit(&file_format_max.mutex);
837
838 return(ret);
839 }
840
841 /*****************************************************************//**
842 Get the name representation of the file format from its id.
843 @return pointer to the max format name */
844 UNIV_INTERN
845 const char*
trx_sys_file_format_max_get(void)846 trx_sys_file_format_max_get(void)
847 /*=============================*/
848 {
849 return(file_format_max.name);
850 }
851
852 /*****************************************************************//**
853 Initializes the tablespace tag system. */
854 UNIV_INTERN
855 void
trx_sys_file_format_init(void)856 trx_sys_file_format_init(void)
857 /*==========================*/
858 {
859 mutex_create(file_format_max_mutex_key,
860 &file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
861
862 /* We don't need a mutex here, as this function should only
863 be called once at start up. */
864 file_format_max.id = UNIV_FORMAT_MIN;
865
866 file_format_max.name = trx_sys_file_format_id_to_name(
867 file_format_max.id);
868 }
869
870 /*****************************************************************//**
871 Closes the tablespace tag system. */
872 UNIV_INTERN
873 void
trx_sys_file_format_close(void)874 trx_sys_file_format_close(void)
875 /*===========================*/
876 {
877 /* Does nothing at the moment */
878 }
879
880 /*********************************************************************
881 Creates the rollback segments.
882 @return number of rollback segments that are active. */
883 UNIV_INTERN
884 ulint
trx_sys_create_rsegs(ulint n_spaces,ulint n_rsegs)885 trx_sys_create_rsegs(
886 /*=================*/
887 ulint n_spaces, /*!< number of tablespaces for UNDO logs */
888 ulint n_rsegs) /*!< number of rollback segments to create */
889 {
890 mtr_t mtr;
891 ulint n_used;
892
893 ut_a(n_spaces < TRX_SYS_N_RSEGS);
894 ut_a(n_rsegs <= TRX_SYS_N_RSEGS);
895
896 if (srv_read_only_mode) {
897 return(ULINT_UNDEFINED);
898 }
899
900 /* This is executed in single-threaded mode therefore it is not
901 necessary to use the same mtr in trx_rseg_create(). n_used cannot
902 change while the function is executing. */
903
904 mtr_start(&mtr);
905 n_used = trx_sysf_rseg_find_free(&mtr);
906 mtr_commit(&mtr);
907
908 if (n_used == ULINT_UNDEFINED) {
909 n_used = TRX_SYS_N_RSEGS;
910 }
911
912 /* Do not create additional rollback segments if innodb_force_recovery
913 has been set and the database was not shutdown cleanly. */
914
915 if (!srv_force_recovery && !recv_needed_recovery && n_used < n_rsegs) {
916 ulint i;
917 ulint new_rsegs = n_rsegs - n_used;
918
919 for (i = 0; i < new_rsegs; ++i) {
920 ulint space_id;
921 space_id = (n_spaces == 0) ? 0
922 : (srv_undo_space_id_start + i % n_spaces);
923
924 /* Tablespace 0 is the system tablespace. */
925 if (trx_rseg_create(space_id) != NULL) {
926 ++n_used;
927 } else {
928 break;
929 }
930 }
931 }
932
933 ib_logf(IB_LOG_LEVEL_INFO,
934 "%lu rollback segment(s) are active.", n_used);
935
936 return(n_used);
937 }
938
939 #else /* !UNIV_HOTBACKUP */
940 /*****************************************************************//**
941 Prints to stderr the MySQL binlog info in the system header if the
942 magic number shows it valid. */
943 UNIV_INTERN
944 void
trx_sys_print_mysql_binlog_offset_from_page(const byte * page)945 trx_sys_print_mysql_binlog_offset_from_page(
946 /*========================================*/
947 const byte* page) /*!< in: buffer containing the trx
948 system header page, i.e., page number
949 TRX_SYS_PAGE_NO in the tablespace */
950 {
951 const trx_sysf_t* sys_header;
952
953 sys_header = page + TRX_SYS;
954
955 if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
956 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
957 == TRX_SYS_MYSQL_LOG_MAGIC_N) {
958
959 fprintf(stderr,
960 "mysqlbackup: Last MySQL binlog file position %lu %lu,"
961 " file name %s\n",
962 (ulong) mach_read_from_4(
963 sys_header + TRX_SYS_MYSQL_LOG_INFO
964 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
965 (ulong) mach_read_from_4(
966 sys_header + TRX_SYS_MYSQL_LOG_INFO
967 + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
968 sys_header + TRX_SYS_MYSQL_LOG_INFO
969 + TRX_SYS_MYSQL_LOG_NAME);
970 }
971 }
972
973 /*****************************************************************//**
974 Reads the file format id from the first system table space file.
975 Even if the call succeeds and returns TRUE, the returned format id
976 may be ULINT_UNDEFINED signalling that the format id was not present
977 in the data file.
978 @return TRUE if call succeeds */
979 UNIV_INTERN
980 ibool
trx_sys_read_file_format_id(const char * pathname,ulint * format_id)981 trx_sys_read_file_format_id(
982 /*========================*/
983 const char *pathname, /*!< in: pathname of the first system
984 table space file */
985 ulint *format_id) /*!< out: file format of the system table
986 space */
987 {
988 os_file_t file;
989 ibool success;
990 byte buf[UNIV_PAGE_SIZE * 2];
991 page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
992 const byte* ptr;
993 ib_id_t file_format_id;
994
995 *format_id = ULINT_UNDEFINED;
996
997 file = os_file_create_simple_no_error_handling(
998 innodb_file_data_key,
999 pathname,
1000 OS_FILE_OPEN,
1001 OS_FILE_READ_ONLY,
1002 &success
1003 );
1004 if (!success) {
1005 /* The following call prints an error message */
1006 os_file_get_last_error(true);
1007
1008 ut_print_timestamp(stderr);
1009
1010 fprintf(stderr,
1011 " mysqlbackup: Error: trying to read system "
1012 "tablespace file format,\n"
1013 " mysqlbackup: but could not open the tablespace "
1014 "file %s!\n", pathname);
1015 return(FALSE);
1016 }
1017
1018 /* Read the page on which file format is stored */
1019
1020 success = os_file_read_no_error_handling(
1021 file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, UNIV_PAGE_SIZE);
1022
1023 if (!success) {
1024 /* The following call prints an error message */
1025 os_file_get_last_error(true);
1026
1027 ut_print_timestamp(stderr);
1028
1029 fprintf(stderr,
1030 " mysqlbackup: Error: trying to read system "
1031 "tablespace file format,\n"
1032 " mysqlbackup: but failed to read the tablespace "
1033 "file %s!\n", pathname);
1034
1035 os_file_close(file);
1036 return(FALSE);
1037 }
1038 os_file_close(file);
1039
1040 /* get the file format from the page */
1041 ptr = page + TRX_SYS_FILE_FORMAT_TAG;
1042 file_format_id = mach_read_from_8(ptr);
1043 file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
1044
1045 if (file_format_id >= FILE_FORMAT_NAME_N) {
1046
1047 /* Either it has never been tagged, or garbage in it. */
1048 return(TRUE);
1049 }
1050
1051 *format_id = (ulint) file_format_id;
1052
1053 return(TRUE);
1054 }
1055
1056 /*****************************************************************//**
1057 Reads the file format id from the given per-table data file.
1058 @return TRUE if call succeeds */
1059 UNIV_INTERN
1060 ibool
trx_sys_read_pertable_file_format_id(const char * pathname,ulint * format_id)1061 trx_sys_read_pertable_file_format_id(
1062 /*=================================*/
1063 const char *pathname, /*!< in: pathname of a per-table
1064 datafile */
1065 ulint *format_id) /*!< out: file format of the per-table
1066 data file */
1067 {
1068 os_file_t file;
1069 ibool success;
1070 byte buf[UNIV_PAGE_SIZE * 2];
1071 page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
1072 const byte* ptr;
1073 ib_uint32_t flags;
1074
1075 *format_id = ULINT_UNDEFINED;
1076
1077 file = os_file_create_simple_no_error_handling(
1078 innodb_file_data_key,
1079 pathname,
1080 OS_FILE_OPEN,
1081 OS_FILE_READ_ONLY,
1082 &success
1083 );
1084 if (!success) {
1085 /* The following call prints an error message */
1086 os_file_get_last_error(true);
1087
1088 ut_print_timestamp(stderr);
1089
1090 fprintf(stderr,
1091 " mysqlbackup: Error: trying to read per-table "
1092 "tablespace format,\n"
1093 " mysqlbackup: but could not open the tablespace "
1094 "file %s!\n", pathname);
1095
1096 return(FALSE);
1097 }
1098
1099 /* Read the first page of the per-table datafile */
1100
1101 success = os_file_read_no_error_handling(file, page, 0, UNIV_PAGE_SIZE);
1102
1103 if (!success) {
1104 /* The following call prints an error message */
1105 os_file_get_last_error(true);
1106
1107 ut_print_timestamp(stderr);
1108
1109 fprintf(stderr,
1110 " mysqlbackup: Error: trying to per-table data file "
1111 "format,\n"
1112 " mysqlbackup: but failed to read the tablespace "
1113 "file %s!\n", pathname);
1114
1115 os_file_close(file);
1116 return(FALSE);
1117 }
1118 os_file_close(file);
1119
1120 /* get the file format from the page */
1121 ptr = page + 54;
1122 flags = mach_read_from_4(ptr);
1123
1124 if (!fsp_flags_is_valid(flags) {
1125 /* bad tablespace flags */
1126 return(FALSE);
1127 }
1128
1129 *format_id = FSP_FLAGS_GET_POST_ANTELOPE(flags);
1130
1131 return(TRUE);
1132 }
1133
1134
1135 /*****************************************************************//**
1136 Get the name representation of the file format from its id.
1137 @return pointer to the name */
1138 UNIV_INTERN
1139 const char*
1140 trx_sys_file_format_id_to_name(
1141 /*===========================*/
1142 const ulint id) /*!< in: id of the file format */
1143 {
1144 if (!(id < FILE_FORMAT_NAME_N)) {
1145 /* unknown id */
1146 return("Unknown");
1147 }
1148
1149 return(file_format_name_map[id]);
1150 }
1151
1152 #endif /* !UNIV_HOTBACKUP */
1153
1154 #ifndef UNIV_HOTBACKUP
1155 /*********************************************************************
1156 Shutdown/Close the transaction system. */
1157 UNIV_INTERN
1158 void
1159 trx_sys_close(void)
1160 /*===============*/
1161 {
1162 ulint i;
1163 trx_t* trx;
1164 read_view_t* view;
1165
1166 ut_ad(trx_sys != NULL);
1167 ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
1168
1169 /* Check that all read views are closed except read view owned
1170 by a purge. */
1171
1172 mutex_enter(&trx_sys->mutex);
1173
1174 if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
1175 fprintf(stderr,
1176 "InnoDB: Error: all read views were not closed"
1177 " before shutdown:\n"
1178 "InnoDB: %lu read views open \n",
1179 UT_LIST_GET_LEN(trx_sys->view_list) - 1);
1180 }
1181
1182 mutex_exit(&trx_sys->mutex);
1183
1184 sess_close(trx_dummy_sess);
1185 trx_dummy_sess = NULL;
1186
1187 trx_purge_sys_close();
1188
1189 /* Free the double write data structures. */
1190 buf_dblwr_free();
1191
1192 ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
1193
1194 /* Only prepared transactions may be left in the system. Free them. */
1195 ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == trx_sys->n_prepared_trx);
1196
1197 while ((trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) != NULL) {
1198 trx_free_prepared(trx);
1199 }
1200
1201 /* There can't be any active transactions. */
1202 for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
1203 trx_rseg_t* rseg;
1204
1205 rseg = trx_sys->rseg_array[i];
1206
1207 if (rseg != NULL) {
1208 trx_rseg_mem_free(rseg);
1209 } else {
1210 break;
1211 }
1212 }
1213
1214 view = UT_LIST_GET_FIRST(trx_sys->view_list);
1215
1216 while (view != NULL) {
1217 read_view_t* prev_view = view;
1218
1219 view = UT_LIST_GET_NEXT(view_list, prev_view);
1220
1221 /* Views are allocated from the trx_sys->global_read_view_heap.
1222 So, we simply remove the element here. */
1223 UT_LIST_REMOVE(view_list, trx_sys->view_list, prev_view);
1224 }
1225
1226 ut_a(UT_LIST_GET_LEN(trx_sys->view_list) == 0);
1227 ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
1228 ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == 0);
1229 ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
1230
1231 mutex_free(&trx_sys->mutex);
1232
1233 mem_free(trx_sys);
1234
1235 trx_sys = NULL;
1236 }
1237
1238 /*********************************************************************
1239 Check if there are any active (non-prepared) transactions.
1240 @return total number of active transactions or 0 if none */
1241 UNIV_INTERN
1242 ulint
1243 trx_sys_any_active_transactions(void)
1244 /*=================================*/
1245 {
1246 ulint total_trx = 0;
1247
1248 mutex_enter(&trx_sys->mutex);
1249
1250 total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list)
1251 + UT_LIST_GET_LEN(trx_sys->mysql_trx_list);
1252
1253 ut_a(total_trx >= trx_sys->n_prepared_trx);
1254 total_trx -= trx_sys->n_prepared_trx;
1255
1256 mutex_exit(&trx_sys->mutex);
1257
1258 return(total_trx);
1259 }
1260
1261 #ifdef UNIV_DEBUG
1262 /*************************************************************//**
1263 Validate the trx_list_t.
1264 @return TRUE if valid. */
1265 static
1266 ibool
1267 trx_sys_validate_trx_list_low(
1268 /*===========================*/
1269 trx_list_t* trx_list) /*!< in: &trx_sys->ro_trx_list
1270 or &trx_sys->rw_trx_list */
1271 {
1272 const trx_t* trx;
1273 const trx_t* prev_trx = NULL;
1274
1275 ut_ad(mutex_own(&trx_sys->mutex));
1276
1277 ut_ad(trx_list == &trx_sys->ro_trx_list
1278 || trx_list == &trx_sys->rw_trx_list);
1279
1280 for (trx = UT_LIST_GET_FIRST(*trx_list);
1281 trx != NULL;
1282 prev_trx = trx, trx = UT_LIST_GET_NEXT(trx_list, prev_trx)) {
1283
1284 assert_trx_in_list(trx);
1285 ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
1286
1287 ut_a(prev_trx == NULL || prev_trx->id > trx->id);
1288 }
1289
1290 return(TRUE);
1291 }
1292
1293 /*************************************************************//**
1294 Validate the trx_sys_t::ro_trx_list and trx_sys_t::rw_trx_list.
1295 @return TRUE if lists are valid. */
1296 UNIV_INTERN
1297 ibool
1298 trx_sys_validate_trx_list(void)
1299 /*===========================*/
1300 {
1301 ut_ad(mutex_own(&trx_sys->mutex));
1302
1303 ut_a(trx_sys_validate_trx_list_low(&trx_sys->ro_trx_list));
1304 ut_a(trx_sys_validate_trx_list_low(&trx_sys->rw_trx_list));
1305
1306 return(TRUE);
1307 }
1308 #endif /* UNIV_DEBUG */
1309 #endif /* !UNIV_HOTBACKUP */
1310