1 /*****************************************************************************
2
3 Copyright (c) 1996, 2021, Oracle and/or its affiliates.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file trx/trx0sys.cc
29 Transaction system
30
31 Created 3/26/1996 Heikki Tuuri
32 *******************************************************/
33
34 #include "ha_prototypes.h"
35
36 #include "mysqld.h"
37 #include "trx0sys.h"
38 #include "sql_error.h"
39 #ifdef UNIV_NONINL
40 #include "trx0sys.ic"
41 #endif
42
43 #ifdef UNIV_HOTBACKUP
44 #include "fsp0types.h"
45
46 #else /* !UNIV_HOTBACKUP */
47 #include "fsp0fsp.h"
48 #include "mtr0log.h"
49 #include "mtr0log.h"
50 #include "trx0trx.h"
51 #include "trx0rseg.h"
52 #include "trx0undo.h"
53 #include "srv0srv.h"
54 #include "srv0start.h"
55 #include "trx0purge.h"
56 #include "log0log.h"
57 #include "log0recv.h"
58 #include "os0file.h"
59 #include "read0read.h"
60 #include "fsp0sysspace.h"
61
62 /** The file format tag structure with id and name. */
63 struct file_format_t {
64 ulint id; /*!< id of the file format */
65 const char* name; /*!< text representation of the
66 file format */
67 ib_mutex_t mutex; /*!< covers changes to the above
68 fields */
69 };
70
71 /** The transaction system */
72 trx_sys_t* trx_sys = NULL;
73 #endif /* !UNIV_HOTBACKUP */
74
75 /** List of animal names representing file format. */
76 static const char* file_format_name_map[] = {
77 "Antelope",
78 "Barracuda",
79 "Cheetah",
80 "Dragon",
81 "Elk",
82 "Fox",
83 "Gazelle",
84 "Hornet",
85 "Impala",
86 "Jaguar",
87 "Kangaroo",
88 "Leopard",
89 "Moose",
90 "Nautilus",
91 "Ocelot",
92 "Porpoise",
93 "Quail",
94 "Rabbit",
95 "Shark",
96 "Tiger",
97 "Urchin",
98 "Viper",
99 "Whale",
100 "Xenops",
101 "Yak",
102 "Zebra"
103 };
104
105 /** The number of elements in the file format name array. */
106 static const ulint FILE_FORMAT_NAME_N
107 = sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
108
109 /** Check whether transaction id is valid.
110 @param[in] id transaction id to check
111 @param[in] name table name */
112 void
check_trx_id_sanity(trx_id_t id,const table_name_t & name)113 ReadView::check_trx_id_sanity(
114 trx_id_t id,
115 const table_name_t& name)
116 {
117 if (id >= trx_sys->max_trx_id) {
118
119 ib::warn() << "A transaction id"
120 << " in a record of table "
121 << name
122 << " is newer than the"
123 << " system-wide maximum.";
124 ut_ad(0);
125 THD *thd = current_thd;
126 if (thd != NULL) {
127 char table_name[MAX_FULL_NAME_LEN + 1];
128
129 innobase_format_name(
130 table_name, sizeof(table_name),
131 name.m_name);
132
133 push_warning_printf(thd, Sql_condition::SL_WARNING,
134 ER_SIGNAL_WARN,
135 "InnoDB: Transaction id"
136 " in a record of table"
137 " %s is newer than system-wide"
138 " maximum.", table_name);
139 }
140 }
141 }
142
143 #ifndef UNIV_HOTBACKUP
144 #ifdef UNIV_DEBUG
145 /* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
146 uint trx_rseg_n_slots_debug = 0;
147 #endif
148
149 /** This is used to track the maximum file format id known to InnoDB. It's
150 updated via SET GLOBAL innodb_file_format_max = 'x' or when we open
151 or create a table. */
152 static file_format_t file_format_max;
153
154 #ifdef UNIV_DEBUG
155 /****************************************************************//**
156 Checks whether a trx is in one of rw_trx_list
157 @return true if is in */
158 bool
trx_in_rw_trx_list(const trx_t * in_trx)159 trx_in_rw_trx_list(
160 /*============*/
161 const trx_t* in_trx) /*!< in: transaction */
162 {
163 const trx_t* trx;
164
165 /* Non-locking autocommits should not hold any locks. */
166 check_trx_state(in_trx);
167
168 ut_ad(trx_sys_mutex_own());
169
170 ut_ad(trx_assert_started(in_trx));
171
172 for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
173 trx != NULL && trx != in_trx;
174 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
175
176 check_trx_state(trx);
177
178 ut_ad(trx->rsegs.m_redo.rseg != NULL && !trx->read_only);
179 }
180
181 return(trx != 0);
182 }
183 #endif /* UNIV_DEBUG */
184
185 /*****************************************************************//**
186 Writes the value of max_trx_id to the file based trx system header. */
187 void
trx_sys_flush_max_trx_id(void)188 trx_sys_flush_max_trx_id(void)
189 /*==========================*/
190 {
191 mtr_t mtr;
192 trx_sysf_t* sys_header;
193
194 ut_ad(trx_sys_mutex_own());
195
196 if (!srv_read_only_mode) {
197 mtr_start(&mtr);
198
199 sys_header = trx_sysf_get(&mtr);
200
201 mlog_write_ull(
202 sys_header + TRX_SYS_TRX_ID_STORE,
203 trx_sys->max_trx_id, &mtr);
204
205 mtr_commit(&mtr);
206 }
207 }
208
209 /*****************************************************************//**
210 Updates the offset information about the end of the MySQL binlog entry
211 which corresponds to the transaction just being committed. In a MySQL
212 replication slave updates the latest master binlog position up to which
213 replication has proceeded. */
214 void
trx_sys_update_mysql_binlog_offset(const char * file_name,int64_t offset,ulint field,mtr_t * mtr)215 trx_sys_update_mysql_binlog_offset(
216 /*===============================*/
217 const char* file_name,/*!< in: MySQL log file name */
218 int64_t offset, /*!< in: position in that log file */
219 ulint field, /*!< in: offset of the MySQL log info field in
220 the trx sys header */
221 mtr_t* mtr) /*!< in: mtr */
222 {
223 trx_sysf_t* sys_header;
224
225 if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
226
227 /* We cannot fit the name to the 512 bytes we have reserved */
228
229 return;
230 }
231
232 sys_header = trx_sysf_get(mtr);
233
234 if (mach_read_from_4(sys_header + field
235 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
236 != TRX_SYS_MYSQL_LOG_MAGIC_N) {
237
238 mlog_write_ulint(sys_header + field
239 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
240 TRX_SYS_MYSQL_LOG_MAGIC_N,
241 MLOG_4BYTES, mtr);
242 }
243
244 if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
245 file_name)) {
246
247 mlog_write_string(sys_header + field
248 + TRX_SYS_MYSQL_LOG_NAME,
249 (byte*) file_name, 1 + ut_strlen(file_name),
250 mtr);
251 }
252
253 if (mach_read_from_4(sys_header + field
254 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
255 || (offset >> 32) > 0) {
256
257 mlog_write_ulint(sys_header + field
258 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
259 (ulint)(offset >> 32),
260 MLOG_4BYTES, mtr);
261 }
262
263 mlog_write_ulint(sys_header + field
264 + TRX_SYS_MYSQL_LOG_OFFSET_LOW,
265 (ulint)(offset & 0xFFFFFFFFUL),
266 MLOG_4BYTES, mtr);
267 }
268
269 /*****************************************************************//**
270 Stores the MySQL binlog offset info in the trx system header if
271 the magic number shows it valid, and print the info to stderr */
272 void
trx_sys_print_mysql_binlog_offset(void)273 trx_sys_print_mysql_binlog_offset(void)
274 /*===================================*/
275 {
276 trx_sysf_t* sys_header;
277 mtr_t mtr;
278 ulint trx_sys_mysql_bin_log_pos_high;
279 ulint trx_sys_mysql_bin_log_pos_low;
280
281 mtr_start(&mtr);
282
283 sys_header = trx_sysf_get(&mtr);
284
285 if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
286 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
287 != TRX_SYS_MYSQL_LOG_MAGIC_N) {
288
289 mtr_commit(&mtr);
290
291 return;
292 }
293
294 trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
295 sys_header + TRX_SYS_MYSQL_LOG_INFO
296 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
297 trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
298 sys_header + TRX_SYS_MYSQL_LOG_INFO
299 + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
300
301 ib::info() << "Last MySQL binlog file position "
302 << trx_sys_mysql_bin_log_pos_high << " "
303 << trx_sys_mysql_bin_log_pos_low << ", file name "
304 << sys_header + TRX_SYS_MYSQL_LOG_INFO
305 + TRX_SYS_MYSQL_LOG_NAME;
306
307 mtr_commit(&mtr);
308 }
309
310 /****************************************************************//**
311 Looks for a free slot for a rollback segment in the trx system file copy.
312 @return slot index or ULINT_UNDEFINED if not found */
313 ulint
trx_sysf_rseg_find_free(mtr_t * mtr,bool include_tmp_slots,ulint nth_free_slots)314 trx_sysf_rseg_find_free(
315 /*====================*/
316 mtr_t* mtr, /*!< in/out: mtr */
317 bool include_tmp_slots, /*!< in: if true, report slots reserved
318 for temp-tablespace as free slots. */
319 ulint nth_free_slots) /*!< in: allocate nth free slot.
320 0 means next free slot. */
321 {
322 ulint i;
323 trx_sysf_t* sys_header;
324
325 sys_header = trx_sysf_get(mtr);
326
327 ulint found_free_slots = 0;
328 for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
329 ulint page_no;
330
331 if (!include_tmp_slots && trx_sys_is_noredo_rseg_slot(i)) {
332 continue;
333 }
334
335 page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
336
337 if (page_no == FIL_NULL
338 || (include_tmp_slots
339 && trx_sys_is_noredo_rseg_slot(i))) {
340
341 if (found_free_slots++ >= nth_free_slots) {
342 return(i);
343 }
344 }
345 }
346
347 return(ULINT_UNDEFINED);
348 }
349
350 /****************************************************************//**
351 Looks for used slots for redo rollback segment.
352 @return number of used slots */
353 static
354 ulint
trx_sysf_used_slots_for_redo_rseg(mtr_t * mtr)355 trx_sysf_used_slots_for_redo_rseg(
356 /*==============================*/
357 mtr_t* mtr) /*!< in: mtr */
358 {
359 trx_sysf_t* sys_header;
360 ulint n_used = 0;
361
362 sys_header = trx_sysf_get(mtr);
363
364 for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
365
366 if (trx_sys_is_noredo_rseg_slot(i)) {
367 continue;
368 }
369
370 ulint page_no;
371
372 page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
373
374 if (page_no != FIL_NULL) {
375 ++n_used;
376 }
377 }
378
379 return(n_used);
380 }
381
382 /*****************************************************************//**
383 Creates the file page for the transaction system. This function is called only
384 at the database creation, before trx_sys_init. */
385 static
386 void
trx_sysf_create(mtr_t * mtr)387 trx_sysf_create(
388 /*============*/
389 mtr_t* mtr) /*!< in: mtr */
390 {
391 trx_sysf_t* sys_header;
392 ulint slot_no;
393 buf_block_t* block;
394 page_t* page;
395 ulint page_no;
396 byte* ptr;
397 ulint len;
398
399 ut_ad(mtr);
400
401 /* Note that below we first reserve the file space x-latch, and
402 then enter the kernel: we must do it in this order to conform
403 to the latching order rules. */
404
405 mtr_x_lock_space(TRX_SYS_SPACE, mtr);
406
407 /* Create the trx sys file block in a new allocated file segment */
408 block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
409 mtr);
410 buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
411
412 ut_a(block->page.id.page_no() == TRX_SYS_PAGE_NO);
413
414 page = buf_block_get_frame(block);
415
416 mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
417 MLOG_2BYTES, mtr);
418
419 /* Reset the doublewrite buffer magic number to zero so that we
420 know that the doublewrite buffer has not yet been created (this
421 suppresses a Valgrind warning) */
422
423 mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
424 + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
425
426 sys_header = trx_sysf_get(mtr);
427
428 /* Start counting transaction ids from number 1 up */
429 mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1);
430
431 /* Reset the rollback segment slots. Old versions of InnoDB
432 define TRX_SYS_N_RSEGS as 256 (TRX_SYS_OLD_N_RSEGS) and expect
433 that the whole array is initialized. */
434 ptr = TRX_SYS_RSEGS + sys_header;
435 len = ut_max(TRX_SYS_OLD_N_RSEGS, TRX_SYS_N_RSEGS)
436 * TRX_SYS_RSEG_SLOT_SIZE;
437 memset(ptr, 0xff, len);
438 ptr += len;
439 ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END));
440
441 /* Initialize all of the page. This part used to be uninitialized. */
442 memset(ptr, 0, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - ptr);
443
444 mlog_log_string(sys_header, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
445 + page - sys_header, mtr);
446
447 /* Create the first rollback segment in the SYSTEM tablespace */
448 slot_no = trx_sysf_rseg_find_free(mtr, false, 0);
449 page_no = trx_rseg_header_create(TRX_SYS_SPACE, univ_page_size,
450 ULINT_MAX, slot_no, mtr);
451
452 ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
453 ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO);
454 }
455
456 /*****************************************************************//**
457 Creates and initializes the central memory structures for the transaction
458 system. This is called when the database is started.
459 @return min binary heap of rsegs to purge */
460 purge_pq_t*
trx_sys_init_at_db_start(void)461 trx_sys_init_at_db_start(void)
462 /*==========================*/
463 {
464 purge_pq_t* purge_queue;
465 trx_sysf_t* sys_header;
466 ib_uint64_t rows_to_undo = 0;
467 const char* unit = "";
468
469 /* We create the min binary heap here and pass ownership to
470 purge when we init the purge sub-system. Purge is responsible
471 for freeing the binary heap. */
472 purge_queue = UT_NEW_NOKEY(purge_pq_t());
473 ut_a(purge_queue != NULL);
474
475 if (srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
476 trx_rseg_array_init(purge_queue);
477 }
478
479 /* VERY important: after the database is started, max_trx_id value is
480 divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
481 trx_sys_get_new_trx_id will evaluate to TRUE when the function
482 is first time called, and the value for trx id will be written
483 to the disk-based header! Thus trx id values will not overlap when
484 the database is repeatedly started! */
485
486 mtr_t mtr;
487 mtr.start();
488
489 sys_header = trx_sysf_get(&mtr);
490
491 trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN
492 + ut_uint64_align_up(mach_read_from_8(sys_header
493 + TRX_SYS_TRX_ID_STORE),
494 TRX_SYS_TRX_ID_WRITE_MARGIN);
495
496 mtr.commit();
497 ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id);
498
499 trx_dummy_sess = sess_open();
500
501 trx_lists_init_at_db_start();
502
503 /* This mutex is not strictly required, it is here only to satisfy
504 the debug code (assertions). We are still running in single threaded
505 bootstrap mode. */
506
507 trx_sys_mutex_enter();
508
509 if (UT_LIST_GET_LEN(trx_sys->rw_trx_list) > 0) {
510 const trx_t* trx;
511
512 for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
513 trx != NULL;
514 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
515
516 ut_ad(trx->is_recovered);
517 assert_trx_in_rw_list(trx);
518
519 if (trx_state_eq(trx, TRX_STATE_ACTIVE)) {
520 rows_to_undo += trx->undo_no;
521 }
522 }
523
524 if (rows_to_undo > 1000000000) {
525 unit = "M";
526 rows_to_undo = rows_to_undo / 1000000;
527 }
528
529 ib::info() << UT_LIST_GET_LEN(trx_sys->rw_trx_list)
530 << " transaction(s) which must be rolled back or"
531 " cleaned up in total " << rows_to_undo << unit
532 << " row operations to undo";
533
534 ib::info() << "Trx id counter is " << trx_sys->max_trx_id;
535 }
536
537 trx_sys_mutex_exit();
538
539 return(purge_queue);
540 }
541
542 /*****************************************************************//**
543 Creates the trx_sys instance and initializes purge_queue and mutex. */
544 void
trx_sys_create(void)545 trx_sys_create(void)
546 /*================*/
547 {
548 ut_ad(trx_sys == NULL);
549
550 trx_sys = static_cast<trx_sys_t*>(ut_zalloc_nokey(sizeof(*trx_sys)));
551
552 mutex_create(LATCH_ID_TRX_SYS, &trx_sys->mutex);
553
554 UT_LIST_INIT(trx_sys->serialisation_list, &trx_t::no_list);
555 UT_LIST_INIT(trx_sys->rw_trx_list, &trx_t::trx_list);
556 UT_LIST_INIT(trx_sys->mysql_trx_list, &trx_t::mysql_trx_list);
557
558 trx_sys->mvcc = UT_NEW_NOKEY(MVCC(1024));
559
560 new(&trx_sys->rw_trx_ids) trx_ids_t(ut_allocator<trx_id_t>(
561 mem_key_trx_sys_t_rw_trx_ids));
562
563 new(&trx_sys->rw_trx_set) TrxIdSet();
564 }
565
566 /*****************************************************************//**
567 Creates and initializes the transaction system at the database creation. */
568 void
trx_sys_create_sys_pages(void)569 trx_sys_create_sys_pages(void)
570 /*==========================*/
571 {
572 mtr_t mtr;
573
574 mtr_start(&mtr);
575
576 trx_sysf_create(&mtr);
577
578 mtr_commit(&mtr);
579 }
580
581 /*****************************************************************//**
582 Update the file format tag.
583 @return always TRUE */
584 static
585 ibool
trx_sys_file_format_max_write(ulint format_id,const char ** name)586 trx_sys_file_format_max_write(
587 /*==========================*/
588 ulint format_id, /*!< in: file format id */
589 const char** name) /*!< out: max file format name, can
590 be NULL */
591 {
592 mtr_t mtr;
593 byte* ptr;
594 buf_block_t* block;
595 ib_uint64_t tag_value;
596
597 mtr_start(&mtr);
598
599 block = buf_page_get(
600 page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), univ_page_size,
601 RW_X_LATCH, &mtr);
602
603 file_format_max.id = format_id;
604 file_format_max.name = trx_sys_file_format_id_to_name(format_id);
605
606 ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
607 tag_value = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
608
609 if (name) {
610 *name = file_format_max.name;
611 }
612
613 mlog_write_ull(ptr, tag_value, &mtr);
614
615 mtr_commit(&mtr);
616
617 return(TRUE);
618 }
619
620 /*****************************************************************//**
621 Read the file format tag.
622 @return the file format or ULINT_UNDEFINED if not set. */
623 static
624 ulint
trx_sys_file_format_max_read(void)625 trx_sys_file_format_max_read(void)
626 /*==============================*/
627 {
628 mtr_t mtr;
629 const byte* ptr;
630 const buf_block_t* block;
631 ib_id_t file_format_id;
632
633 /* Since this is called during the startup phase it's safe to
634 read the value without a covering mutex. */
635 mtr_start(&mtr);
636
637 block = buf_page_get(
638 page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), univ_page_size,
639 RW_X_LATCH, &mtr);
640
641 ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
642 file_format_id = mach_read_from_8(ptr);
643
644 mtr_commit(&mtr);
645
646 file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
647
648 if (file_format_id >= FILE_FORMAT_NAME_N) {
649
650 /* Either it has never been tagged, or garbage in it. */
651 return(ULINT_UNDEFINED);
652 }
653
654 return((ulint) file_format_id);
655 }
656
657 /*****************************************************************//**
658 Get the name representation of the file format from its id.
659 @return pointer to the name */
660 const char*
trx_sys_file_format_id_to_name(const ulint id)661 trx_sys_file_format_id_to_name(
662 /*===========================*/
663 const ulint id) /*!< in: id of the file format */
664 {
665 ut_a(id < FILE_FORMAT_NAME_N);
666
667 return(file_format_name_map[id]);
668 }
669
670 /*****************************************************************//**
671 Check for the max file format tag stored on disk. Note: If max_format_id
672 is == UNIV_FORMAT_MAX + 1 then we only print a warning.
673 @return DB_SUCCESS or error code */
674 dberr_t
trx_sys_file_format_max_check(ulint max_format_id)675 trx_sys_file_format_max_check(
676 /*==========================*/
677 ulint max_format_id) /*!< in: max format id to check */
678 {
679 ulint format_id;
680
681 /* Check the file format in the tablespace. Do not try to
682 recover if the file format is not supported by the engine
683 unless forced by the user. */
684 format_id = trx_sys_file_format_max_read();
685 if (format_id == ULINT_UNDEFINED) {
686 /* Format ID was not set. Set it to minimum possible
687 value. */
688 format_id = UNIV_FORMAT_MIN;
689 }
690
691 ib::info() << "Highest supported file format is "
692 << trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX) << ".";
693
694 if (format_id > UNIV_FORMAT_MAX) {
695
696 ut_a(format_id < FILE_FORMAT_NAME_N);
697
698 const std::string msg = std::string("The system"
699 " tablespace is in a file format that this version"
700 " doesn't support - ")
701 + trx_sys_file_format_id_to_name(format_id)
702 + ".";
703
704 if (max_format_id <= UNIV_FORMAT_MAX) {
705 ib::error() << msg;
706 } else {
707 ib::warn() << msg;
708 }
709
710 if (max_format_id <= UNIV_FORMAT_MAX) {
711 return(DB_ERROR);
712 }
713 }
714
715 format_id = (format_id > max_format_id) ? format_id : max_format_id;
716
717 /* We don't need a mutex here, as this function should only
718 be called once at start up. */
719 file_format_max.id = format_id;
720 file_format_max.name = trx_sys_file_format_id_to_name(format_id);
721
722 return(DB_SUCCESS);
723 }
724
725 /*****************************************************************//**
726 Set the file format id unconditionally except if it's already the
727 same value.
728 @return TRUE if value updated */
729 ibool
trx_sys_file_format_max_set(ulint format_id,const char ** name)730 trx_sys_file_format_max_set(
731 /*========================*/
732 ulint format_id, /*!< in: file format id */
733 const char** name) /*!< out: max file format name or
734 NULL if not needed. */
735 {
736 ibool ret = FALSE;
737
738 ut_a(format_id <= UNIV_FORMAT_MAX);
739
740 mutex_enter(&file_format_max.mutex);
741
742 /* Only update if not already same value. */
743 if (format_id != file_format_max.id) {
744
745 ret = trx_sys_file_format_max_write(format_id, name);
746 }
747
748 mutex_exit(&file_format_max.mutex);
749
750 return(ret);
751 }
752
753 /********************************************************************//**
754 Tags the system table space with minimum format id if it has not been
755 tagged yet.
756 WARNING: This function is only called during the startup and AFTER the
757 redo log application during recovery has finished. */
758 void
trx_sys_file_format_tag_init(void)759 trx_sys_file_format_tag_init(void)
760 /*==============================*/
761 {
762 ulint format_id;
763
764 format_id = trx_sys_file_format_max_read();
765
766 /* If format_id is not set then set it to the minimum. */
767 if (format_id == ULINT_UNDEFINED) {
768 trx_sys_file_format_max_set(UNIV_FORMAT_MIN, NULL);
769 }
770 }
771
772 /********************************************************************//**
773 Update the file format tag in the system tablespace only if the given
774 format id is greater than the known max id.
775 @return TRUE if format_id was bigger than the known max id */
776 ibool
trx_sys_file_format_max_upgrade(const char ** name,ulint format_id)777 trx_sys_file_format_max_upgrade(
778 /*============================*/
779 const char** name, /*!< out: max file format name */
780 ulint format_id) /*!< in: file format identifier */
781 {
782 ibool ret = FALSE;
783
784 ut_a(name);
785 ut_a(file_format_max.name != NULL);
786 ut_a(format_id <= UNIV_FORMAT_MAX);
787
788 mutex_enter(&file_format_max.mutex);
789
790 if (format_id > file_format_max.id) {
791
792 ret = trx_sys_file_format_max_write(format_id, name);
793 }
794
795 mutex_exit(&file_format_max.mutex);
796
797 return(ret);
798 }
799
800 /*****************************************************************//**
801 Get the name representation of the file format from its id.
802 @return pointer to the max format name */
803 const char*
trx_sys_file_format_max_get(void)804 trx_sys_file_format_max_get(void)
805 /*=============================*/
806 {
807 return(file_format_max.name);
808 }
809
810 /*****************************************************************//**
811 Initializes the tablespace tag system. */
812 void
trx_sys_file_format_init(void)813 trx_sys_file_format_init(void)
814 /*==========================*/
815 {
816 mutex_create(LATCH_ID_FILE_FORMAT_MAX, &file_format_max.mutex);
817
818 /* We don't need a mutex here, as this function should only
819 be called once at start up. */
820 file_format_max.id = UNIV_FORMAT_MIN;
821
822 file_format_max.name = trx_sys_file_format_id_to_name(
823 file_format_max.id);
824 }
825
826 /*****************************************************************//**
827 Closes the tablespace tag system. */
828 void
trx_sys_file_format_close(void)829 trx_sys_file_format_close(void)
830 /*===========================*/
831 {
832 mutex_free(&file_format_max.mutex);
833 }
834
835 /*********************************************************************
836 Creates non-redo rollback segments.
837 @return number of non-redo rollback segments created. */
838 static
839 ulint
trx_sys_create_noredo_rsegs(ulint n_nonredo_rseg)840 trx_sys_create_noredo_rsegs(
841 /*========================*/
842 ulint n_nonredo_rseg) /*!< number of non-redo rollback segment
843 to create. */
844 {
845 ulint n_created = 0;
846
847 /* Create non-redo rollback segments residing in temp-tablespace.
848 non-redo rollback segments don't perform redo logging and so
849 are used for undo logging of objects/table that don't need to be
850 recover on crash.
851 (Non-Redo rollback segments are created on every server startup).
852 Slot-0: reserved for system-tablespace.
853 Slot-1....Slot-N: reserved for temp-tablespace.
854 Slot-N+1....Slot-127: reserved for system/undo-tablespace. */
855 for (ulint i = 0; i < n_nonredo_rseg; i++) {
856 ulint space = srv_tmp_space.space_id();
857 if (trx_rseg_create(space, i) == NULL) {
858 break;
859 }
860 ++n_created;
861 }
862
863 return(n_created);
864 }
865
866 /*********************************************************************
867 Creates the rollback segments.
868 @return number of rollback segments that are active. */
869 ulint
trx_sys_create_rsegs(ulint n_spaces,ulint n_rsegs,ulint n_tmp_rsegs)870 trx_sys_create_rsegs(
871 /*=================*/
872 ulint n_spaces, /*!< number of tablespaces for UNDO logs */
873 ulint n_rsegs, /*!< number of rollback segments to create */
874 ulint n_tmp_rsegs) /*!< number of rollback segments reserved for
875 temp-tables. */
876 {
877 mtr_t mtr;
878 ulint n_used;
879 ulint n_noredo_created;
880
881 ut_a(n_spaces < TRX_SYS_N_RSEGS);
882 ut_a(n_rsegs <= TRX_SYS_N_RSEGS);
883 ut_a(n_tmp_rsegs > 0 && n_tmp_rsegs < TRX_SYS_N_RSEGS);
884
885 if (srv_read_only_mode) {
886 return(ULINT_UNDEFINED);
887 }
888
889 /* Create non-redo rollback segments. */
890 n_noredo_created = trx_sys_create_noredo_rsegs(n_tmp_rsegs);
891
892 /* This is executed in single-threaded mode therefore it is not
893 necessary to use the same mtr in trx_rseg_create(). n_used cannot
894 change while the function is executing. */
895 mtr_start(&mtr);
896 n_used = trx_sysf_used_slots_for_redo_rseg(&mtr) + n_noredo_created;
897 mtr_commit(&mtr);
898
899 ut_ad(n_used <= TRX_SYS_N_RSEGS);
900
901 /* By default 1 redo rseg is always active that is hosted in
902 system tablespace. */
903 ulint n_redo_active;
904 if (n_rsegs <= n_tmp_rsegs) {
905 n_redo_active = 1;
906 } else if (n_rsegs > n_used) {
907 n_redo_active = n_used - n_tmp_rsegs;
908 } else {
909 n_redo_active = n_rsegs - n_tmp_rsegs;
910 }
911
912 /* Do not create additional rollback segments if innodb_force_recovery
913 has been set and the database was not shutdown cleanly. */
914 if (!srv_force_recovery && !recv_needed_recovery && n_used < n_rsegs) {
915 ulint i;
916 ulint new_rsegs = n_rsegs - n_used;
917
918 for (i = 0; i < new_rsegs; ++i) {
919 ulint space_id;
920 space_id = (n_spaces == 0) ? 0
921 : (srv_undo_space_id_start + i % n_spaces);
922
923 ut_ad(n_spaces == 0
924 || srv_is_undo_tablespace(space_id));
925
926 if (trx_rseg_create(space_id, 0) != NULL) {
927 ++n_used;
928 ++n_redo_active;
929
930 ulint last_undo_space =
931 srv_undo_space_id_start
932 + (srv_undo_tablespaces_active - 1);
933
934 /* Increase the number of active undo
935 tablespace in case new rollback segment
936 assigned to new undo tablespace. */
937 if (space_id > last_undo_space) {
938 srv_undo_tablespaces_active++;
939
940 ut_ad(space_id == last_undo_space + 1);
941 }
942 } else {
943 break;
944 }
945 }
946 }
947
948 ib::info() << n_used - srv_tmp_undo_logs
949 << " redo rollback segment(s) found. "
950 << n_redo_active
951 << " redo rollback segment(s) are active.";
952
953 ib::info() << n_noredo_created << " non-redo rollback segment(s) are"
954 " active.";
955
956 return(n_used);
957 }
958
959 #else /* !UNIV_HOTBACKUP */
960 /*****************************************************************//**
961 Prints to stderr the MySQL binlog info in the system header if the
962 magic number shows it valid. */
963 void
trx_sys_print_mysql_binlog_offset_from_page(const byte * page)964 trx_sys_print_mysql_binlog_offset_from_page(
965 /*========================================*/
966 const byte* page) /*!< in: buffer containing the trx
967 system header page, i.e., page number
968 TRX_SYS_PAGE_NO in the tablespace */
969 {
970 const trx_sysf_t* sys_header;
971
972 sys_header = page + TRX_SYS;
973
974 if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
975 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
976 == TRX_SYS_MYSQL_LOG_MAGIC_N) {
977
978 ib::info() << "mysqlbackup: Last MySQL binlog file position "
979 << mach_read_from_4(
980 sys_header + TRX_SYS_MYSQL_LOG_INFO
981 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) << " "
982 << mach_read_from_4(
983 sys_header + TRX_SYS_MYSQL_LOG_INFO
984 + TRX_SYS_MYSQL_LOG_OFFSET_LOW)
985 << ", file name " << sys_header
986 + TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME;
987 }
988 }
989
990 /*****************************************************************//**
991 Reads the file format id from the first system table space file.
992 Even if the call succeeds and returns TRUE, the returned format id
993 may be ULINT_UNDEFINED signalling that the format id was not present
994 in the data file.
995 @return TRUE if call succeeds */
996 ibool
trx_sys_read_file_format_id(const char * pathname,ulint * format_id)997 trx_sys_read_file_format_id(
998 /*========================*/
999 const char *pathname, /*!< in: pathname of the first system
1000 table space file */
1001 ulint *format_id) /*!< out: file format of the system table
1002 space */
1003 {
1004 os_file_t file;
1005 bool success;
1006 byte buf[UNIV_PAGE_SIZE * 2];
1007 page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
1008 const byte* ptr;
1009 ib_id_t file_format_id;
1010
1011 *format_id = ULINT_UNDEFINED;
1012
1013 file = os_file_create_simple_no_error_handling(
1014 innodb_data_file_key,
1015 pathname,
1016 OS_FILE_OPEN,
1017 OS_FILE_READ_ONLY,
1018 srv_read_only_mode,
1019 &success
1020 );
1021 if (!success) {
1022 /* The following call prints an error message */
1023 os_file_get_last_error(true);
1024
1025 ib::error() << "mysqlbackup: Error: trying to read system"
1026 " tablespace file format, but could not open the"
1027 " tablespace file " << pathname << "!";
1028 return(FALSE);
1029 }
1030
1031 /* Read the page on which file format is stored */
1032
1033 IORequest read_req(IORequest::READ)
1034
1035 dberr_t err = os_file_read_no_error_handling(
1036 read_req, file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE,
1037 UNIV_PAGE_SIZE, NULL);
1038
1039 if (err != DB_SUCCESS) {
1040 /* The following call prints an error message */
1041 os_file_get_last_error(true);
1042
1043 ib::error() << "mysqlbackup: Error: trying to read system"
1044 " tablespace file format, but failed to read the"
1045 " tablespace file " << pathname << "!";
1046
1047 os_file_close(file);
1048 return(FALSE);
1049 }
1050 os_file_close(file);
1051
1052 /* get the file format from the page */
1053 ptr = page + TRX_SYS_FILE_FORMAT_TAG;
1054 file_format_id = mach_read_from_8(ptr);
1055 file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
1056
1057 if (file_format_id >= FILE_FORMAT_NAME_N) {
1058
1059 /* Either it has never been tagged, or garbage in it. */
1060 return(TRUE);
1061 }
1062
1063 *format_id = (ulint) file_format_id;
1064
1065 return(TRUE);
1066 }
1067
1068 /*****************************************************************//**
1069 Reads the file format id from the given per-table data file.
1070 @return TRUE if call succeeds */
1071 ibool
trx_sys_read_pertable_file_format_id(const char * pathname,ulint * format_id)1072 trx_sys_read_pertable_file_format_id(
1073 /*=================================*/
1074 const char *pathname, /*!< in: pathname of a per-table
1075 datafile */
1076 ulint *format_id) /*!< out: file format of the per-table
1077 data file */
1078 {
1079 os_file_t file;
1080 bool success;
1081 byte buf[UNIV_PAGE_SIZE * 2];
1082 page_t* page = ut_align(buf, UNIV_PAGE_SIZE);
1083 const byte* ptr;
1084 ib_uint32_t flags;
1085
1086 *format_id = ULINT_UNDEFINED;
1087
1088 file = os_file_create_simple_no_error_handling(
1089 innodb_data_file_key,
1090 pathname,
1091 OS_FILE_OPEN,
1092 OS_FILE_READ_ONLY,
1093 srv_read_only_mode,
1094 &success
1095 );
1096 if (!success) {
1097 /* The following call prints an error message */
1098 os_file_get_last_error(true);
1099
1100 ib::error() << "mysqlbackup: Error: trying to read per-table"
1101 " tablespace format, but could not open the tablespace"
1102 " file " << pathname << "!";
1103
1104 return(FALSE);
1105 }
1106
1107 IORequest read_req(IORequest::READ);
1108
1109 /* Read the first page of the per-table datafile */
1110
1111 dberr_t err = os_file_read_no_error_handling(
1112 read_req, file, page, 0, UNIV_PAGE_SIZE, NULL);
1113
1114 if (err != DB_SUCCESS) {
1115 /* The following call prints an error message */
1116 os_file_get_last_error(true);
1117
1118 ib::error() << "mysqlbackup: Error: trying to per-table data"
1119 " file format, but failed to read the tablespace file "
1120 << pathname << "!";
1121
1122 os_file_close(file);
1123 return(FALSE);
1124 }
1125 os_file_close(file);
1126
1127 /* get the file format from the page */
1128 ptr = page + 54;
1129 flags = mach_read_from_4(ptr);
1130
1131 if (!fsp_flags_is_valid(flags) {
1132 /* bad tablespace flags */
1133 return(FALSE);
1134 }
1135
1136 *format_id = FSP_FLAGS_GET_POST_ANTELOPE(flags);
1137
1138 return(TRUE);
1139 }
1140
1141
1142 /*****************************************************************//**
1143 Get the name representation of the file format from its id.
1144 @return pointer to the name */
1145 const char*
1146 trx_sys_file_format_id_to_name(
1147 /*===========================*/
1148 const ulint id) /*!< in: id of the file format */
1149 {
1150 if (!(id < FILE_FORMAT_NAME_N)) {
1151 /* unknown id */
1152 return("Unknown");
1153 }
1154
1155 return(file_format_name_map[id]);
1156 }
1157
1158 #endif /* !UNIV_HOTBACKUP */
1159
1160 #ifndef UNIV_HOTBACKUP
1161 /*********************************************************************
1162 Shutdown/Close the transaction system. */
1163 void
1164 trx_sys_close(void)
1165 /*===============*/
1166 {
1167 ut_ad(trx_sys != NULL);
1168 ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
1169
1170 ulint size = trx_sys->mvcc->size();
1171
1172 if (size > 0) {
1173 ib::error() << "All read views were not closed before"
1174 " shutdown: " << size << " read views open";
1175 }
1176
1177 sess_close(trx_dummy_sess);
1178 trx_dummy_sess = NULL;
1179
1180 trx_purge_sys_close();
1181
1182 /* Free the double write data structures. */
1183 buf_dblwr_free();
1184 buf_parallel_dblwr_free(srv_fast_shutdown != 2);
1185
1186 /* Only prepared transactions may be left in the system. Free them. */
1187 ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == trx_sys->n_prepared_trx);
1188
1189 for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
1190 trx != NULL;
1191 trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) {
1192
1193 trx_free_prepared(trx);
1194
1195 UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
1196 }
1197
1198 /* There can't be any active transactions. */
1199 trx_rseg_t** rseg_array = static_cast<trx_rseg_t**>(
1200 trx_sys->rseg_array);
1201
1202 for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
1203 trx_rseg_t* rseg;
1204
1205 rseg = trx_sys->rseg_array[i];
1206
1207 if (rseg != NULL) {
1208 trx_rseg_mem_free(rseg, rseg_array);
1209 }
1210 }
1211
1212 for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
1213 trx_rseg_t* rseg;
1214
1215 rseg = trx_sys->pending_purge_rseg_array[i];
1216
1217 if (rseg != NULL) {
1218 trx_rseg_mem_free(rseg,
1219 trx_sys->pending_purge_rseg_array);
1220 }
1221 }
1222
1223 UT_DELETE(trx_sys->mvcc);
1224
1225 ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == 0);
1226 ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
1227 ut_a(UT_LIST_GET_LEN(trx_sys->serialisation_list) == 0);
1228
1229 /* We used placement new to create this mutex. Call the destructor. */
1230 mutex_free(&trx_sys->mutex);
1231
1232 trx_sys->rw_trx_ids.~trx_ids_t();
1233
1234 trx_sys->rw_trx_set.~TrxIdSet();
1235
1236 ut_free(trx_sys);
1237
1238 trx_sys = NULL;
1239 }
1240
1241 /** @brief Convert an undo log to TRX_UNDO_PREPARED state on shutdown.
1242
1243 If any prepared ACTIVE transactions exist, and their rollback was
1244 prevented by innodb_force_recovery, we convert these transactions to
1245 XA PREPARE state in the main-memory data structures, so that shutdown
1246 will proceed normally. These transactions will again recover as ACTIVE
1247 on the next restart, and they will be rolled back unless
1248 innodb_force_recovery prevents it again.
1249
1250 @param[in] trx transaction
1251 @param[in,out] undo undo log to convert to TRX_UNDO_PREPARED */
1252 static
1253 void
1254 trx_undo_fake_prepared(
1255 const trx_t* trx,
1256 trx_undo_t* undo)
1257 {
1258 ut_ad(srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
1259 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
1260 ut_ad(trx->is_recovered);
1261
1262 if (undo != NULL) {
1263 ut_ad(undo->state == TRX_UNDO_ACTIVE);
1264 undo->state = TRX_UNDO_PREPARED;
1265 }
1266 }
1267
1268 /*********************************************************************
1269 Check if there are any active (non-prepared) transactions.
1270 @return total number of active transactions or 0 if none */
1271 ulint
1272 trx_sys_any_active_transactions(void)
1273 /*=================================*/
1274 {
1275 trx_sys_mutex_enter();
1276
1277 ulint total_trx = UT_LIST_GET_LEN(trx_sys->mysql_trx_list);
1278
1279 if (total_trx == 0) {
1280 total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
1281 ut_a(total_trx >= trx_sys->n_prepared_trx);
1282
1283 if (total_trx > trx_sys->n_prepared_trx
1284 && srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
1285 for (trx_t* trx = UT_LIST_GET_FIRST(
1286 trx_sys->rw_trx_list);
1287 trx != NULL;
1288 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
1289 if (!trx_state_eq(trx, TRX_STATE_ACTIVE)
1290 || !trx->is_recovered) {
1291 continue;
1292 }
1293 /* This was a recovered transaction
1294 whose rollback was disabled by
1295 the innodb_force_recovery setting.
1296 Pretend that it is in XA PREPARE
1297 state so that shutdown will work. */
1298 trx_undo_fake_prepared(
1299 trx, trx->rsegs.m_redo.insert_undo);
1300 trx_undo_fake_prepared(
1301 trx, trx->rsegs.m_redo.update_undo);
1302 trx_undo_fake_prepared(
1303 trx, trx->rsegs.m_noredo.insert_undo);
1304 trx_undo_fake_prepared(
1305 trx, trx->rsegs.m_noredo.update_undo);
1306 trx->state = TRX_STATE_PREPARED;
1307 trx_sys->n_prepared_trx++;
1308 trx_sys->n_prepared_recovered_trx++;
1309 }
1310 }
1311
1312 ut_a(total_trx >= trx_sys->n_prepared_trx);
1313 total_trx -= trx_sys->n_prepared_trx;
1314 }
1315
1316 trx_sys_mutex_exit();
1317
1318 return(total_trx);
1319 }
1320
1321 #ifdef UNIV_DEBUG
1322 /*************************************************************//**
1323 Validate the trx_ut_list_t.
1324 @return true if valid. */
1325 static
1326 bool
1327 trx_sys_validate_trx_list_low(
1328 /*===========================*/
1329 trx_ut_list_t* trx_list) /*!< in: &trx_sys->rw_trx_list */
1330 {
1331 const trx_t* trx;
1332 const trx_t* prev_trx = NULL;
1333
1334 ut_ad(trx_sys_mutex_own());
1335
1336 ut_ad(trx_list == &trx_sys->rw_trx_list);
1337
1338 for (trx = UT_LIST_GET_FIRST(*trx_list);
1339 trx != NULL;
1340 prev_trx = trx, trx = UT_LIST_GET_NEXT(trx_list, prev_trx)) {
1341
1342 check_trx_state(trx);
1343 ut_a(prev_trx == NULL || prev_trx->id > trx->id);
1344 }
1345
1346 return(true);
1347 }
1348
1349 /*************************************************************//**
1350 Validate the trx_sys_t::rw_trx_list.
1351 @return true if the list is valid. */
1352 bool
1353 trx_sys_validate_trx_list()
1354 /*=======================*/
1355 {
1356 ut_ad(trx_sys_mutex_own());
1357
1358 ut_a(trx_sys_validate_trx_list_low(&trx_sys->rw_trx_list));
1359
1360 return(true);
1361 }
1362 #endif /* UNIV_DEBUG */
1363 #endif /* !UNIV_HOTBACKUP */
1364