1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file trx/trx0sys.cc
29 Transaction system
30 
31 Created 3/26/1996 Heikki Tuuri
32 *******************************************************/
33 
34 #include "ha_prototypes.h"
35 
36 #include "mysqld.h"
37 #include "trx0sys.h"
38 #include "sql_error.h"
39 #ifdef UNIV_NONINL
40 #include "trx0sys.ic"
41 #endif
42 
43 #ifdef UNIV_HOTBACKUP
44 #include "fsp0types.h"
45 
46 #else	/* !UNIV_HOTBACKUP */
47 #include "fsp0fsp.h"
48 #include "mtr0log.h"
49 #include "mtr0log.h"
50 #include "trx0trx.h"
51 #include "trx0rseg.h"
52 #include "trx0undo.h"
53 #include "srv0srv.h"
54 #include "srv0start.h"
55 #include "trx0purge.h"
56 #include "log0log.h"
57 #include "log0recv.h"
58 #include "os0file.h"
59 #include "read0read.h"
60 #include "fsp0sysspace.h"
61 
62 #ifdef WITH_WSREP
63 #include "ha_prototypes.h" /* wsrep_is_wsrep_xid() */
64 #endif /* */
65 
66 /** The file format tag structure with id and name. */
67 struct file_format_t {
68 	ulint		id;		/*!< id of the file format */
69 	const char*	name;		/*!< text representation of the
70 					file format */
71 	ib_mutex_t		mutex;		/*!< covers changes to the above
72 					fields */
73 };
74 
75 /** The transaction system */
76 trx_sys_t*		trx_sys		= NULL;
77 #endif /* !UNIV_HOTBACKUP */
78 
79 /** List of animal names representing file format. */
80 static const char*	file_format_name_map[] = {
81 	"Antelope",
82 	"Barracuda",
83 	"Cheetah",
84 	"Dragon",
85 	"Elk",
86 	"Fox",
87 	"Gazelle",
88 	"Hornet",
89 	"Impala",
90 	"Jaguar",
91 	"Kangaroo",
92 	"Leopard",
93 	"Moose",
94 	"Nautilus",
95 	"Ocelot",
96 	"Porpoise",
97 	"Quail",
98 	"Rabbit",
99 	"Shark",
100 	"Tiger",
101 	"Urchin",
102 	"Viper",
103 	"Whale",
104 	"Xenops",
105 	"Yak",
106 	"Zebra"
107 };
108 
109 /** The number of elements in the file format name array. */
110 static const ulint	FILE_FORMAT_NAME_N
111 	= sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
112 
113 /** Check whether transaction id is valid.
114 @param[in]	id              transaction id to check
115 @param[in]      name            table name */
116 void
check_trx_id_sanity(trx_id_t id,const table_name_t & name)117 ReadView::check_trx_id_sanity(
118 	trx_id_t		id,
119 	const table_name_t&	name)
120 {
121 	if (id >= trx_sys->max_trx_id) {
122 
123 		ib::warn() << "A transaction id"
124 			   << " in a record of table "
125 			   << name
126 			   << " is newer than the"
127 			   << " system-wide maximum.";
128 		ut_ad(0);
129 		THD *thd = current_thd;
130 		if (thd != NULL) {
131 			char    table_name[MAX_FULL_NAME_LEN + 1];
132 
133 			innobase_format_name(
134 				table_name, sizeof(table_name),
135 				name.m_name);
136 
137 			push_warning_printf(thd, Sql_condition::SL_WARNING,
138 					    ER_SIGNAL_WARN,
139 					    "InnoDB: Transaction id"
140 					    " in a record of table"
141 					    " %s is newer than system-wide"
142 					    " maximum.", table_name);
143 		}
144 	}
145 }
146 
147 #ifndef UNIV_HOTBACKUP
148 #ifdef UNIV_DEBUG
149 /* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
150 uint	trx_rseg_n_slots_debug = 0;
151 #endif
152 
153 /** This is used to track the maximum file format id known to InnoDB. It's
154 updated via SET GLOBAL innodb_file_format_max = 'x' or when we open
155 or create a table. */
156 static	file_format_t	file_format_max;
157 
158 #ifdef UNIV_DEBUG
159 /****************************************************************//**
160 Checks whether a trx is in one of rw_trx_list
161 @return true if is in */
162 bool
trx_in_rw_trx_list(const trx_t * in_trx)163 trx_in_rw_trx_list(
164 /*============*/
165 	const trx_t*	in_trx)	/*!< in: transaction */
166 {
167 	const trx_t*	trx;
168 
169 	/* Non-locking autocommits should not hold any locks. */
170 	check_trx_state(in_trx);
171 
172 	ut_ad(trx_sys_mutex_own());
173 
174 	ut_ad(trx_assert_started(in_trx));
175 
176 	for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
177 	     trx != NULL && trx != in_trx;
178 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
179 
180 		check_trx_state(trx);
181 
182 		ut_ad(trx->rsegs.m_redo.rseg != NULL && !trx->read_only);
183 	}
184 
185 	return(trx != 0);
186 }
187 #endif /* UNIV_DEBUG */
188 
189 /*****************************************************************//**
190 Writes the value of max_trx_id to the file based trx system header. */
191 void
trx_sys_flush_max_trx_id(void)192 trx_sys_flush_max_trx_id(void)
193 /*==========================*/
194 {
195 	mtr_t		mtr;
196 	trx_sysf_t*	sys_header;
197 
198 	ut_ad(trx_sys_mutex_own());
199 
200 	if (!srv_read_only_mode) {
201 		mtr_start(&mtr);
202 
203 		sys_header = trx_sysf_get(&mtr);
204 
205 		mlog_write_ull(
206 			sys_header + TRX_SYS_TRX_ID_STORE,
207 			trx_sys->max_trx_id, &mtr);
208 
209 		mtr_commit(&mtr);
210 	}
211 }
212 
213 /*****************************************************************//**
214 Updates the offset information about the end of the MySQL binlog entry
215 which corresponds to the transaction just being committed. In a MySQL
216 replication slave updates the latest master binlog position up to which
217 replication has proceeded. */
218 void
trx_sys_update_mysql_binlog_offset(const char * file_name,int64_t offset,ulint field,trx_sysf_t * sys_header,mtr_t * mtr)219 trx_sys_update_mysql_binlog_offset(
220 /*===============================*/
221 	const char*	file_name,/*!< in: MySQL log file name */
222 	int64_t		offset,	/*!< in: position in that log file */
223 	ulint		field,	/*!< in: offset of the MySQL log info field in
224 				the trx sys header */
225 #ifdef WITH_WSREP
226         trx_sysf_t*     sys_header, /*!< in: trx sys header */
227 #endif /* WITH_WSREP */
228 	mtr_t*		mtr)	/*!< in: mtr */
229 {
230 #ifndef WITH_WSREP
231 	trx_sysf_t*	sys_header;
232 #endif /* !WITH_WSREP */
233 	if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
234 
235 		/* We cannot fit the name to the 512 bytes we have reserved */
236 
237 		return;
238 	}
239 
240 #ifndef WITH_WSREP
241 	sys_header = trx_sysf_get(mtr);
242 #endif /* !WITH_WSREP */
243 
244 	if (mach_read_from_4(sys_header + field
245 			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
246 	    != TRX_SYS_MYSQL_LOG_MAGIC_N) {
247 
248 		mlog_write_ulint(sys_header + field
249 				 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
250 				 TRX_SYS_MYSQL_LOG_MAGIC_N,
251 				 MLOG_4BYTES, mtr);
252 	}
253 
254 	if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
255 			file_name)) {
256 
257 		mlog_write_string(sys_header + field
258 				  + TRX_SYS_MYSQL_LOG_NAME,
259 				  (byte*) file_name, 1 + ut_strlen(file_name),
260 				  mtr);
261 	}
262 
263 	if (mach_read_from_4(sys_header + field
264 			     + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
265 	    || (offset >> 32) > 0) {
266 
267 		mlog_write_ulint(sys_header + field
268 				 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
269 				 (ulint)(offset >> 32),
270 				 MLOG_4BYTES, mtr);
271 	}
272 
273 	mlog_write_ulint(sys_header + field
274 			 + TRX_SYS_MYSQL_LOG_OFFSET_LOW,
275 			 (ulint)(offset & 0xFFFFFFFFUL),
276 			 MLOG_4BYTES, mtr);
277 }
278 
279 #ifdef WITH_WSREP
280 
281 #ifdef UNIV_DEBUG
282 static long long trx_sys_cur_xid_seqno = -1;
283 static unsigned char trx_sys_cur_xid_uuid[16];
284 
read_wsrep_xid_seqno(const XID * xid)285 long long read_wsrep_xid_seqno(const XID* xid)
286 {
287     long long seqno;
288     //char data[XIDDATASIZE];
289     //data = xid->get_data();
290     memcpy(&seqno, xid->get_data() + 24, sizeof(long long));
291     //memcpy(&seqno, xid->data + 24, sizeof(long long));
292     return seqno;
293 }
294 
read_wsrep_xid_uuid(const XID * xid,unsigned char * buf)295 void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf)
296 {
297 	memcpy(buf, xid->get_data() + 8, 16);
298     //memcpy(buf, xid->data + 8, 16);
299 }
300 
301 #endif /* UNIV_DEBUG */
302 
303 void
trx_sys_update_wsrep_checkpoint(const XID * xid,trx_sysf_t * sys_header,mtr_t * mtr)304 trx_sys_update_wsrep_checkpoint(
305         const XID*      xid,        /*!< in: transaction XID */
306         trx_sysf_t*     sys_header, /*!< in: sys_header */
307         mtr_t*          mtr)        /*!< in: mtr */
308 {
309 
310 #ifdef UNIV_DEBUG
311         {
312             /* Check that seqno is monotonically increasing */
313             unsigned char xid_uuid[16];
314             long long xid_seqno = read_wsrep_xid_seqno(xid);
315             read_wsrep_xid_uuid(xid, xid_uuid);
316             if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8))
317             {
318                 ut_ad(xid_seqno > trx_sys_cur_xid_seqno);
319                 trx_sys_cur_xid_seqno = xid_seqno;
320             }
321             else
322             {
323                 memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16);
324             }
325             trx_sys_cur_xid_seqno = xid_seqno;
326         }
327 #endif /* UNIV_DEBUG */
328 
329         ut_ad(xid && mtr && sys_header);
330         ut_a(xid->get_format_id() == -1 || wsrep_is_wsrep_xid(xid));
331 
332         if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
333                              + TRX_SYS_WSREP_XID_MAGIC_N_FLD)
334             != TRX_SYS_WSREP_XID_MAGIC_N) {
335                 mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
336                                  + TRX_SYS_WSREP_XID_MAGIC_N_FLD,
337                                  TRX_SYS_WSREP_XID_MAGIC_N,
338                                  MLOG_4BYTES, mtr);
339         }
340 
341         mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
342                          + TRX_SYS_WSREP_XID_FORMAT,
343                          (int)xid->get_format_id(),
344                          MLOG_4BYTES, mtr);
345         mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
346                          + TRX_SYS_WSREP_XID_GTRID_LEN,
347                          (int)xid->get_gtrid_length(),
348                          MLOG_4BYTES, mtr);
349         mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
350                          + TRX_SYS_WSREP_XID_BQUAL_LEN,
351                          (int)xid->get_bqual_length(),
352                          MLOG_4BYTES, mtr);
353         mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO
354                           + TRX_SYS_WSREP_XID_DATA,
355                           (const unsigned char*) xid->get_data(),
356                           XIDDATASIZE, mtr);
357 
358 }
359 
360 void
trx_sys_read_wsrep_checkpoint(XID * xid)361 trx_sys_read_wsrep_checkpoint(XID* xid)
362 /*===================================*/
363 {
364         trx_sysf_t* sys_header;
365 	mtr_t	    mtr;
366         ulint       magic;
367 
368         ut_ad(xid);
369 
370 	mtr_start(&mtr);
371 
372 	sys_header = trx_sysf_get(&mtr);
373 
374         if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
375                                       + TRX_SYS_WSREP_XID_MAGIC_N_FLD))
376             != TRX_SYS_WSREP_XID_MAGIC_N) {
377 		xid->reset();
378                 trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
379                 mtr_commit(&mtr);
380                 return;
381         }
382 
383         xid->set_format_id((long)mach_read_from_4(
384                 sys_header
385                 + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT));
386         xid->set_gtrid_length((long)mach_read_from_4(
387                 sys_header
388                 + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN));
389         xid->set_bqual_length((long)mach_read_from_4(
390                 sys_header
391                 + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN));
392         //ut_memcpy(xid->data,
393         //          sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
394         //          XIDDATASIZE);
395         xid->set_data(
396                       sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
397                       XIDDATASIZE);
398 
399 	mtr_commit(&mtr);
400 }
401 
402 #endif /* WITH_WSREP */
403 
404 /*****************************************************************//**
405 Stores the MySQL binlog offset info in the trx system header if
406 the magic number shows it valid, and print the info to stderr */
407 void
trx_sys_print_mysql_binlog_offset(void)408 trx_sys_print_mysql_binlog_offset(void)
409 /*===================================*/
410 {
411 	trx_sysf_t*	sys_header;
412 	mtr_t		mtr;
413 	ulint		trx_sys_mysql_bin_log_pos_high;
414 	ulint		trx_sys_mysql_bin_log_pos_low;
415 
416 	mtr_start(&mtr);
417 
418 	sys_header = trx_sysf_get(&mtr);
419 
420 	if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
421 			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
422 	    != TRX_SYS_MYSQL_LOG_MAGIC_N) {
423 
424 		mtr_commit(&mtr);
425 
426 		return;
427 	}
428 
429 	trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
430 		sys_header + TRX_SYS_MYSQL_LOG_INFO
431 		+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
432 	trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
433 		sys_header + TRX_SYS_MYSQL_LOG_INFO
434 		+ TRX_SYS_MYSQL_LOG_OFFSET_LOW);
435 
436 	ib::info() << "Last MySQL binlog file position "
437 		<< trx_sys_mysql_bin_log_pos_high << " "
438 		<< trx_sys_mysql_bin_log_pos_low << ", file name "
439 		<< sys_header + TRX_SYS_MYSQL_LOG_INFO
440 		+ TRX_SYS_MYSQL_LOG_NAME;
441 
442 	mtr_commit(&mtr);
443 }
444 
445 /****************************************************************//**
446 Looks for a free slot for a rollback segment in the trx system file copy.
447 @return slot index or ULINT_UNDEFINED if not found */
448 ulint
trx_sysf_rseg_find_free(mtr_t * mtr,bool include_tmp_slots,ulint nth_free_slots)449 trx_sysf_rseg_find_free(
450 /*====================*/
451 	mtr_t*	mtr,			/*!< in/out: mtr */
452 	bool	include_tmp_slots,	/*!< in: if true, report slots reserved
453 					for temp-tablespace as free slots. */
454 	ulint	nth_free_slots)		/*!< in: allocate nth free slot.
455 					0 means next free slot. */
456 {
457 	ulint		i;
458 	trx_sysf_t*	sys_header;
459 
460 	sys_header = trx_sysf_get(mtr);
461 
462 	ulint	found_free_slots = 0;
463 	for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
464 		ulint	page_no;
465 
466 		if (!include_tmp_slots && trx_sys_is_noredo_rseg_slot(i)) {
467 			continue;
468 		}
469 
470 		page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
471 
472 		if (page_no == FIL_NULL
473 		    || (include_tmp_slots
474 			&& trx_sys_is_noredo_rseg_slot(i))) {
475 
476 			if (found_free_slots++ >= nth_free_slots) {
477 				return(i);
478 			}
479 		}
480 	}
481 
482 	return(ULINT_UNDEFINED);
483 }
484 
485 /****************************************************************//**
486 Looks for used slots for redo rollback segment.
487 @return number of used slots */
488 static
489 ulint
trx_sysf_used_slots_for_redo_rseg(mtr_t * mtr)490 trx_sysf_used_slots_for_redo_rseg(
491 /*==============================*/
492 	mtr_t*	mtr)			/*!< in: mtr */
493 {
494 	trx_sysf_t*	sys_header;
495 	ulint		n_used = 0;
496 
497 	sys_header = trx_sysf_get(mtr);
498 
499 	for (ulint i = 0; i < TRX_SYS_N_RSEGS; i++) {
500 
501 		if (trx_sys_is_noredo_rseg_slot(i)) {
502 			continue;
503 		}
504 
505 		ulint	page_no;
506 
507 		page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
508 
509 		if (page_no != FIL_NULL) {
510 			++n_used;
511 		}
512 	}
513 
514 	return(n_used);
515 }
516 
517 /*****************************************************************//**
518 Creates the file page for the transaction system. This function is called only
519 at the database creation, before trx_sys_init. */
520 static
521 void
trx_sysf_create(mtr_t * mtr)522 trx_sysf_create(
523 /*============*/
524 	mtr_t*	mtr)	/*!< in: mtr */
525 {
526 	trx_sysf_t*	sys_header;
527 	ulint		slot_no;
528 	buf_block_t*	block;
529 	page_t*		page;
530 	ulint		page_no;
531 	byte*		ptr;
532 	ulint		len;
533 
534 	ut_ad(mtr);
535 
536 	/* Note that below we first reserve the file space x-latch, and
537 	then enter the kernel: we must do it in this order to conform
538 	to the latching order rules. */
539 
540 	mtr_x_lock_space(TRX_SYS_SPACE, mtr);
541 
542 	/* Create the trx sys file block in a new allocated file segment */
543 	block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
544 			    mtr);
545 	buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
546 
547 	ut_a(block->page.id.page_no() == TRX_SYS_PAGE_NO);
548 
549 	page = buf_block_get_frame(block);
550 
551 	mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
552 			 MLOG_2BYTES, mtr);
553 
554 	/* Reset the doublewrite buffer magic number to zero so that we
555 	know that the doublewrite buffer has not yet been created (this
556 	suppresses a Valgrind warning) */
557 
558 	mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
559 			 + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
560 
561 	sys_header = trx_sysf_get(mtr);
562 
563 	/* Start counting transaction ids from number 1 up */
564 	mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1);
565 
566 	/* Reset the rollback segment slots.  Old versions of InnoDB
567 	define TRX_SYS_N_RSEGS as 256 (TRX_SYS_OLD_N_RSEGS) and expect
568 	that the whole array is initialized. */
569 	ptr = TRX_SYS_RSEGS + sys_header;
570 	len = ut_max(TRX_SYS_OLD_N_RSEGS, TRX_SYS_N_RSEGS)
571 		* TRX_SYS_RSEG_SLOT_SIZE;
572 	memset(ptr, 0xff, len);
573 	ptr += len;
574 	ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END));
575 
576 	/* Initialize all of the page.  This part used to be uninitialized. */
577 	memset(ptr, 0, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - ptr);
578 
579 	mlog_log_string(sys_header, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
580 			+ page - sys_header, mtr);
581 
582 	/* Create the first rollback segment in the SYSTEM tablespace */
583 	slot_no = trx_sysf_rseg_find_free(mtr, false, 0);
584 	page_no = trx_rseg_header_create(TRX_SYS_SPACE, univ_page_size,
585 					 ULINT_MAX, slot_no, mtr);
586 
587 	ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
588 	ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO);
589 }
590 
591 /*****************************************************************//**
592 Creates and initializes the central memory structures for the transaction
593 system. This is called when the database is started.
594 @return min binary heap of rsegs to purge */
595 purge_pq_t*
trx_sys_init_at_db_start(void)596 trx_sys_init_at_db_start(void)
597 /*==========================*/
598 {
599 	purge_pq_t*	purge_queue;
600 	trx_sysf_t*	sys_header;
601 	ib_uint64_t	rows_to_undo	= 0;
602 	const char*	unit		= "";
603 
604 	/* We create the min binary heap here and pass ownership to
605 	purge when we init the purge sub-system. Purge is responsible
606 	for freeing the binary heap. */
607 	purge_queue = UT_NEW_NOKEY(purge_pq_t());
608 	ut_a(purge_queue != NULL);
609 
610 	if (srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
611 		trx_rseg_array_init(purge_queue);
612 	}
613 
614 	/* VERY important: after the database is started, max_trx_id value is
615 	divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
616 	trx_sys_get_new_trx_id will evaluate to TRUE when the function
617 	is first time called, and the value for trx id will be written
618 	to the disk-based header! Thus trx id values will not overlap when
619 	the database is repeatedly started! */
620 
621 	mtr_t	mtr;
622 	mtr.start();
623 
624 	sys_header = trx_sysf_get(&mtr);
625 
626 	trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN
627 		+ ut_uint64_align_up(mach_read_from_8(sys_header
628 						   + TRX_SYS_TRX_ID_STORE),
629 				     TRX_SYS_TRX_ID_WRITE_MARGIN);
630 
631 	mtr.commit();
632 	ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id);
633 
634 	trx_dummy_sess = sess_open();
635 
636 	trx_lists_init_at_db_start();
637 
638 	/* This mutex is not strictly required, it is here only to satisfy
639 	the debug code (assertions). We are still running in single threaded
640 	bootstrap mode. */
641 
642 	trx_sys_mutex_enter();
643 
644 	if (UT_LIST_GET_LEN(trx_sys->rw_trx_list) > 0) {
645 		const trx_t*	trx;
646 
647 		for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
648 		     trx != NULL;
649 		     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
650 
651 			ut_ad(trx->is_recovered);
652 			assert_trx_in_rw_list(trx);
653 
654 			if (trx_state_eq(trx, TRX_STATE_ACTIVE)) {
655 				rows_to_undo += trx->undo_no;
656 			}
657 		}
658 
659 		if (rows_to_undo > 1000000000) {
660 			unit = "M";
661 			rows_to_undo = rows_to_undo / 1000000;
662 		}
663 
664 		ib::info() << UT_LIST_GET_LEN(trx_sys->rw_trx_list)
665 			<< " transaction(s) which must be rolled back or"
666 			" cleaned up in total " << rows_to_undo << unit
667 			<< " row operations to undo";
668 
669 		ib::info() << "Trx id counter is " << trx_sys->max_trx_id;
670 	}
671 
672 	trx_sys_mutex_exit();
673 
674 	return(purge_queue);
675 }
676 
677 /*****************************************************************//**
678 Creates the trx_sys instance and initializes purge_queue and mutex. */
679 void
trx_sys_create(void)680 trx_sys_create(void)
681 /*================*/
682 {
683 	ut_ad(trx_sys == NULL);
684 
685 	trx_sys = static_cast<trx_sys_t*>(ut_zalloc_nokey(sizeof(*trx_sys)));
686 
687 	mutex_create(LATCH_ID_TRX_SYS, &trx_sys->mutex);
688 
689 	UT_LIST_INIT(trx_sys->serialisation_list, &trx_t::no_list);
690 	UT_LIST_INIT(trx_sys->rw_trx_list, &trx_t::trx_list);
691 	UT_LIST_INIT(trx_sys->mysql_trx_list, &trx_t::mysql_trx_list);
692 
693 	trx_sys->mvcc = UT_NEW_NOKEY(MVCC(1024));
694 
695 	new(&trx_sys->rw_trx_ids) trx_ids_t(ut_allocator<trx_id_t>(
696 			mem_key_trx_sys_t_rw_trx_ids));
697 
698 	new(&trx_sys->rw_trx_set) TrxIdSet();
699 }
700 
701 /*****************************************************************//**
702 Creates and initializes the transaction system at the database creation. */
703 void
trx_sys_create_sys_pages(void)704 trx_sys_create_sys_pages(void)
705 /*==========================*/
706 {
707 	mtr_t	mtr;
708 
709 	mtr_start(&mtr);
710 
711 	trx_sysf_create(&mtr);
712 
713 	mtr_commit(&mtr);
714 }
715 
716 /*****************************************************************//**
717 Update the file format tag.
718 @return always TRUE */
719 static
720 ibool
trx_sys_file_format_max_write(ulint format_id,const char ** name)721 trx_sys_file_format_max_write(
722 /*==========================*/
723 	ulint		format_id,	/*!< in: file format id */
724 	const char**	name)		/*!< out: max file format name, can
725 					be NULL */
726 {
727 	mtr_t		mtr;
728 	byte*		ptr;
729 	buf_block_t*	block;
730 	ib_uint64_t	tag_value;
731 
732 	mtr_start(&mtr);
733 
734 	block = buf_page_get(
735 		page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), univ_page_size,
736 		RW_X_LATCH, &mtr);
737 
738 	file_format_max.id = format_id;
739 	file_format_max.name = trx_sys_file_format_id_to_name(format_id);
740 
741 	ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
742 	tag_value = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
743 
744 	if (name) {
745 		*name = file_format_max.name;
746 	}
747 
748 	mlog_write_ull(ptr, tag_value, &mtr);
749 
750 	mtr_commit(&mtr);
751 
752 	return(TRUE);
753 }
754 
755 /*****************************************************************//**
756 Read the file format tag.
757 @return the file format or ULINT_UNDEFINED if not set. */
758 static
759 ulint
trx_sys_file_format_max_read(void)760 trx_sys_file_format_max_read(void)
761 /*==============================*/
762 {
763 	mtr_t			mtr;
764 	const byte*		ptr;
765 	const buf_block_t*	block;
766 	ib_id_t			file_format_id;
767 
768 	/* Since this is called during the startup phase it's safe to
769 	read the value without a covering mutex. */
770 	mtr_start(&mtr);
771 
772 	block = buf_page_get(
773 		page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO), univ_page_size,
774 		RW_X_LATCH, &mtr);
775 
776 	ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
777 	file_format_id = mach_read_from_8(ptr);
778 
779 	mtr_commit(&mtr);
780 
781 	file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
782 
783 	if (file_format_id >= FILE_FORMAT_NAME_N) {
784 
785 		/* Either it has never been tagged, or garbage in it. */
786 		return(ULINT_UNDEFINED);
787 	}
788 
789 	return((ulint) file_format_id);
790 }
791 
792 /*****************************************************************//**
793 Get the name representation of the file format from its id.
794 @return pointer to the name */
795 const char*
trx_sys_file_format_id_to_name(const ulint id)796 trx_sys_file_format_id_to_name(
797 /*===========================*/
798 	const ulint	id)	/*!< in: id of the file format */
799 {
800 	ut_a(id < FILE_FORMAT_NAME_N);
801 
802 	return(file_format_name_map[id]);
803 }
804 
805 /*****************************************************************//**
806 Check for the max file format tag stored on disk. Note: If max_format_id
807 is == UNIV_FORMAT_MAX + 1 then we only print a warning.
808 @return DB_SUCCESS or error code */
809 dberr_t
trx_sys_file_format_max_check(ulint max_format_id)810 trx_sys_file_format_max_check(
811 /*==========================*/
812 	ulint	max_format_id)	/*!< in: max format id to check */
813 {
814 	ulint	format_id;
815 
816 	/* Check the file format in the tablespace. Do not try to
817 	recover if the file format is not supported by the engine
818 	unless forced by the user. */
819 	format_id = trx_sys_file_format_max_read();
820 	if (format_id == ULINT_UNDEFINED) {
821 		/* Format ID was not set. Set it to minimum possible
822 		value. */
823 		format_id = UNIV_FORMAT_MIN;
824 	}
825 
826 	ib::info() << "Highest supported file format is "
827 		<< trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX) << ".";
828 
829 	if (format_id > UNIV_FORMAT_MAX) {
830 
831 		ut_a(format_id < FILE_FORMAT_NAME_N);
832 
833 		const std::string	msg = std::string("The system"
834 			" tablespace is in a file format that this version"
835 			" doesn't support - ")
836 			+ trx_sys_file_format_id_to_name(format_id)
837 			+ ".";
838 
839 		if (max_format_id <= UNIV_FORMAT_MAX) {
840 			ib::error() << msg;
841 		} else {
842 			ib::warn() << msg;
843 		}
844 
845 		if (max_format_id <= UNIV_FORMAT_MAX) {
846 			return(DB_ERROR);
847 		}
848 	}
849 
850 	format_id = (format_id > max_format_id) ? format_id : max_format_id;
851 
852 	/* We don't need a mutex here, as this function should only
853 	be called once at start up. */
854 	file_format_max.id = format_id;
855 	file_format_max.name = trx_sys_file_format_id_to_name(format_id);
856 
857 	return(DB_SUCCESS);
858 }
859 
860 /*****************************************************************//**
861 Set the file format id unconditionally except if it's already the
862 same value.
863 @return TRUE if value updated */
864 ibool
trx_sys_file_format_max_set(ulint format_id,const char ** name)865 trx_sys_file_format_max_set(
866 /*========================*/
867 	ulint		format_id,	/*!< in: file format id */
868 	const char**	name)		/*!< out: max file format name or
869 					NULL if not needed. */
870 {
871 	ibool		ret = FALSE;
872 
873 	ut_a(format_id <= UNIV_FORMAT_MAX);
874 
875 	mutex_enter(&file_format_max.mutex);
876 
877 	/* Only update if not already same value. */
878 	if (format_id != file_format_max.id) {
879 
880 		ret = trx_sys_file_format_max_write(format_id, name);
881 	}
882 
883 	mutex_exit(&file_format_max.mutex);
884 
885 	return(ret);
886 }
887 
888 /********************************************************************//**
889 Tags the system table space with minimum format id if it has not been
890 tagged yet.
891 WARNING: This function is only called during the startup and AFTER the
892 redo log application during recovery has finished. */
893 void
trx_sys_file_format_tag_init(void)894 trx_sys_file_format_tag_init(void)
895 /*==============================*/
896 {
897 	ulint	format_id;
898 
899 	format_id = trx_sys_file_format_max_read();
900 
901 	/* If format_id is not set then set it to the minimum. */
902 	if (format_id == ULINT_UNDEFINED) {
903 		trx_sys_file_format_max_set(UNIV_FORMAT_MIN, NULL);
904 	}
905 }
906 
907 /********************************************************************//**
908 Update the file format tag in the system tablespace only if the given
909 format id is greater than the known max id.
910 @return TRUE if format_id was bigger than the known max id */
911 ibool
trx_sys_file_format_max_upgrade(const char ** name,ulint format_id)912 trx_sys_file_format_max_upgrade(
913 /*============================*/
914 	const char**	name,		/*!< out: max file format name */
915 	ulint		format_id)	/*!< in: file format identifier */
916 {
917 	ibool		ret = FALSE;
918 
919 	ut_a(name);
920 	ut_a(file_format_max.name != NULL);
921 	ut_a(format_id <= UNIV_FORMAT_MAX);
922 
923 	mutex_enter(&file_format_max.mutex);
924 
925 	if (format_id > file_format_max.id) {
926 
927 		ret = trx_sys_file_format_max_write(format_id, name);
928 	}
929 
930 	mutex_exit(&file_format_max.mutex);
931 
932 	return(ret);
933 }
934 
935 /*****************************************************************//**
936 Get the name representation of the file format from its id.
937 @return pointer to the max format name */
938 const char*
trx_sys_file_format_max_get(void)939 trx_sys_file_format_max_get(void)
940 /*=============================*/
941 {
942 	return(file_format_max.name);
943 }
944 
945 /*****************************************************************//**
946 Initializes the tablespace tag system. */
947 void
trx_sys_file_format_init(void)948 trx_sys_file_format_init(void)
949 /*==========================*/
950 {
951 	mutex_create(LATCH_ID_FILE_FORMAT_MAX, &file_format_max.mutex);
952 
953 	/* We don't need a mutex here, as this function should only
954 	be called once at start up. */
955 	file_format_max.id = UNIV_FORMAT_MIN;
956 
957 	file_format_max.name = trx_sys_file_format_id_to_name(
958 		file_format_max.id);
959 }
960 
961 /*****************************************************************//**
962 Closes the tablespace tag system. */
963 void
trx_sys_file_format_close(void)964 trx_sys_file_format_close(void)
965 /*===========================*/
966 {
967 	mutex_free(&file_format_max.mutex);
968 }
969 
970 /*********************************************************************
971 Creates non-redo rollback segments.
972 @return number of non-redo rollback segments created. */
973 static
974 ulint
trx_sys_create_noredo_rsegs(ulint n_nonredo_rseg)975 trx_sys_create_noredo_rsegs(
976 /*========================*/
977 	ulint	n_nonredo_rseg)	/*!< number of non-redo rollback segment
978 				to create. */
979 {
980 	ulint n_created = 0;
981 
982 	/* Create non-redo rollback segments residing in temp-tablespace.
983 	non-redo rollback segments don't perform redo logging and so
984 	are used for undo logging of objects/table that don't need to be
985 	recover on crash.
986 	(Non-Redo rollback segments are created on every server startup).
987 	Slot-0: reserved for system-tablespace.
988 	Slot-1....Slot-N: reserved for temp-tablespace.
989 	Slot-N+1....Slot-127: reserved for system/undo-tablespace. */
990 	for (ulint i = 0; i < n_nonredo_rseg; i++) {
991 		ulint space = srv_tmp_space.space_id();
992 		if (trx_rseg_create(space, i) == NULL) {
993 			break;
994 		}
995 		++n_created;
996 	}
997 
998 	return(n_created);
999 }
1000 
1001 /*********************************************************************
1002 Creates the rollback segments.
1003 @return number of rollback segments that are active. */
1004 ulint
trx_sys_create_rsegs(ulint n_spaces,ulint n_rsegs,ulint n_tmp_rsegs)1005 trx_sys_create_rsegs(
1006 /*=================*/
1007 	ulint	n_spaces,	/*!< number of tablespaces for UNDO logs */
1008 	ulint	n_rsegs,	/*!< number of rollback segments to create */
1009 	ulint	n_tmp_rsegs)	/*!< number of rollback segments reserved for
1010 				temp-tables. */
1011 {
1012 	mtr_t	mtr;
1013 	ulint	n_used;
1014 	ulint	n_noredo_created;
1015 
1016 	ut_a(n_spaces < TRX_SYS_N_RSEGS);
1017 	ut_a(n_rsegs <= TRX_SYS_N_RSEGS);
1018 	ut_a(n_tmp_rsegs > 0 && n_tmp_rsegs < TRX_SYS_N_RSEGS);
1019 
1020 	if (srv_read_only_mode) {
1021 		return(ULINT_UNDEFINED);
1022 	}
1023 
1024 	/* Create non-redo rollback segments. */
1025 	n_noredo_created = trx_sys_create_noredo_rsegs(n_tmp_rsegs);
1026 
1027 	/* This is executed in single-threaded mode therefore it is not
1028 	necessary to use the same mtr in trx_rseg_create(). n_used cannot
1029 	change while the function is executing. */
1030 	mtr_start(&mtr);
1031 	n_used = trx_sysf_used_slots_for_redo_rseg(&mtr) + n_noredo_created;
1032 	mtr_commit(&mtr);
1033 
1034 	ut_ad(n_used <= TRX_SYS_N_RSEGS);
1035 
1036 	/* By default 1 redo rseg is always active that is hosted in
1037 	system tablespace. */
1038 	ulint	n_redo_active;
1039 	if (n_rsegs <= n_tmp_rsegs) {
1040 		n_redo_active = 1;
1041 	} else if (n_rsegs > n_used) {
1042 		n_redo_active = n_used - n_tmp_rsegs;
1043 	} else {
1044 		n_redo_active = n_rsegs - n_tmp_rsegs;
1045 	}
1046 
1047 	/* Do not create additional rollback segments if innodb_force_recovery
1048 	has been set and the database was not shutdown cleanly. */
1049 	if (!srv_force_recovery && !recv_needed_recovery && n_used < n_rsegs) {
1050 		ulint	i;
1051 		ulint	new_rsegs = n_rsegs - n_used;
1052 
1053 		for (i = 0; i < new_rsegs; ++i) {
1054 			ulint	space_id;
1055 			space_id = (n_spaces == 0) ? 0
1056 				: (srv_undo_space_id_start + i % n_spaces);
1057 
1058 			ut_ad(n_spaces == 0
1059 			      || srv_is_undo_tablespace(space_id));
1060 
1061 			if (trx_rseg_create(space_id, 0) != NULL) {
1062 				++n_used;
1063 				++n_redo_active;
1064 
1065 				ulint	last_undo_space =
1066 					srv_undo_space_id_start
1067 					+ (srv_undo_tablespaces_active - 1);
1068 
1069 				/* Increase the number of active undo
1070 				tablespace in case new rollback segment
1071 				assigned to new undo tablespace. */
1072 				if (space_id > last_undo_space) {
1073 					srv_undo_tablespaces_active++;
1074 
1075 					ut_ad(space_id == last_undo_space + 1);
1076 				}
1077 			} else {
1078 				break;
1079 			}
1080 		}
1081 	}
1082 
1083 	ib::info() << n_used - srv_tmp_undo_logs
1084 		<< " redo rollback segment(s) found. "
1085 		<< n_redo_active
1086 		<< " redo rollback segment(s) are active.";
1087 
1088 	ib::info() << n_noredo_created << " non-redo rollback segment(s) are"
1089 		" active.";
1090 
1091 	return(n_used);
1092 }
1093 
1094 #else /* !UNIV_HOTBACKUP */
1095 /*****************************************************************//**
1096 Prints to stderr the MySQL binlog info in the system header if the
1097 magic number shows it valid. */
1098 void
trx_sys_print_mysql_binlog_offset_from_page(const byte * page)1099 trx_sys_print_mysql_binlog_offset_from_page(
1100 /*========================================*/
1101 	const byte*	page)	/*!< in: buffer containing the trx
1102 				system header page, i.e., page number
1103 				TRX_SYS_PAGE_NO in the tablespace */
1104 {
1105 	const trx_sysf_t*	sys_header;
1106 
1107 	sys_header = page + TRX_SYS;
1108 
1109 	if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
1110 			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
1111 	    == TRX_SYS_MYSQL_LOG_MAGIC_N) {
1112 
1113 		ib::info() << "mysqlbackup: Last MySQL binlog file position "
1114 			<< mach_read_from_4(
1115 				sys_header + TRX_SYS_MYSQL_LOG_INFO
1116 				+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH) << " "
1117 			<< mach_read_from_4(
1118 				sys_header + TRX_SYS_MYSQL_LOG_INFO
1119 				+ TRX_SYS_MYSQL_LOG_OFFSET_LOW)
1120 			<< ", file name " << sys_header
1121 			+ TRX_SYS_MYSQL_LOG_INFO + TRX_SYS_MYSQL_LOG_NAME;
1122 	}
1123 }
1124 
1125 /*****************************************************************//**
1126 Reads the file format id from the first system table space file.
1127 Even if the call succeeds and returns TRUE, the returned format id
1128 may be ULINT_UNDEFINED signalling that the format id was not present
1129 in the data file.
1130 @return TRUE if call succeeds */
1131 ibool
trx_sys_read_file_format_id(const char * pathname,ulint * format_id)1132 trx_sys_read_file_format_id(
1133 /*========================*/
1134 	const char *pathname,  /*!< in: pathname of the first system
1135 				        table space file */
1136 	ulint *format_id)      /*!< out: file format of the system table
1137 				         space */
1138 {
1139 	os_file_t	file;
1140 	bool		success;
1141 	byte		buf[UNIV_PAGE_SIZE * 2];
1142 	page_t*		page = ut_align(buf, UNIV_PAGE_SIZE);
1143 	const byte*	ptr;
1144 	ib_id_t		file_format_id;
1145 
1146 	*format_id = ULINT_UNDEFINED;
1147 
1148 	file = os_file_create_simple_no_error_handling(
1149 		innodb_data_file_key,
1150 		pathname,
1151 		OS_FILE_OPEN,
1152 		OS_FILE_READ_ONLY,
1153 		srv_read_only_mode,
1154 		&success
1155 	);
1156 	if (!success) {
1157 		/* The following call prints an error message */
1158 		os_file_get_last_error(true);
1159 
1160 		ib::error() << "mysqlbackup: Error: trying to read system"
1161 			" tablespace file format, but could not open the"
1162 			" tablespace file " << pathname << "!";
1163 		return(FALSE);
1164 	}
1165 
1166 	/* Read the page on which file format is stored */
1167 
1168 	IORequest	read_req(IORequest::READ)
1169 
1170 	dberr_t	err = os_file_read_no_error_handling(
1171 		read_req, file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE,
1172 		UNIV_PAGE_SIZE, NULL);
1173 
1174 	if (err != DB_SUCCESS) {
1175 		/* The following call prints an error message */
1176 		os_file_get_last_error(true);
1177 
1178 		ib::error() << "mysqlbackup: Error: trying to read system"
1179 			" tablespace file format, but failed to read the"
1180 			" tablespace file " << pathname << "!";
1181 
1182 		os_file_close(file);
1183 		return(FALSE);
1184 	}
1185 	os_file_close(file);
1186 
1187 	/* get the file format from the page */
1188 	ptr = page + TRX_SYS_FILE_FORMAT_TAG;
1189 	file_format_id = mach_read_from_8(ptr);
1190 	file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
1191 
1192 	if (file_format_id >= FILE_FORMAT_NAME_N) {
1193 
1194 		/* Either it has never been tagged, or garbage in it. */
1195 		return(TRUE);
1196 	}
1197 
1198 	*format_id = (ulint) file_format_id;
1199 
1200 	return(TRUE);
1201 }
1202 
1203 /*****************************************************************//**
1204 Reads the file format id from the given per-table data file.
1205 @return TRUE if call succeeds */
1206 ibool
trx_sys_read_pertable_file_format_id(const char * pathname,ulint * format_id)1207 trx_sys_read_pertable_file_format_id(
1208 /*=================================*/
1209 	const char *pathname,  /*!< in: pathname of a per-table
1210 				        datafile */
1211 	ulint *format_id)      /*!< out: file format of the per-table
1212 				         data file */
1213 {
1214 	os_file_t	file;
1215 	bool		success;
1216 	byte		buf[UNIV_PAGE_SIZE * 2];
1217 	page_t*		page = ut_align(buf, UNIV_PAGE_SIZE);
1218 	const byte*	ptr;
1219 	ib_uint32_t	flags;
1220 
1221 	*format_id = ULINT_UNDEFINED;
1222 
1223 	file = os_file_create_simple_no_error_handling(
1224 		innodb_data_file_key,
1225 		pathname,
1226 		OS_FILE_OPEN,
1227 		OS_FILE_READ_ONLY,
1228 		srv_read_only_mode,
1229 		&success
1230 	);
1231 	if (!success) {
1232 		/* The following call prints an error message */
1233 		os_file_get_last_error(true);
1234 
1235 		ib::error() << "mysqlbackup: Error: trying to read per-table"
1236 			" tablespace format, but could not open the tablespace"
1237 			" file " << pathname << "!";
1238 
1239 		return(FALSE);
1240 	}
1241 
1242 	IORequest	read_req(IORequest::READ);
1243 
1244 	/* Read the first page of the per-table datafile */
1245 
1246 	dberr_t	err = os_file_read_no_error_handling(
1247 		read_req, file, page, 0, UNIV_PAGE_SIZE, NULL);
1248 
1249 	if (err != DB_SUCCESS) {
1250 		/* The following call prints an error message */
1251 		os_file_get_last_error(true);
1252 
1253 		ib::error() << "mysqlbackup: Error: trying to per-table data"
1254 			" file format, but failed to read the tablespace file "
1255 			<< pathname << "!";
1256 
1257 		os_file_close(file);
1258 		return(FALSE);
1259 	}
1260 	os_file_close(file);
1261 
1262 	/* get the file format from the page */
1263 	ptr = page + 54;
1264 	flags = mach_read_from_4(ptr);
1265 
1266 	if (!fsp_flags_is_valid(flags) {
1267 		/* bad tablespace flags */
1268 		return(FALSE);
1269 	}
1270 
1271 	*format_id = FSP_FLAGS_GET_POST_ANTELOPE(flags);
1272 
1273 	return(TRUE);
1274 }
1275 
1276 
1277 /*****************************************************************//**
1278 Get the name representation of the file format from its id.
1279 @return pointer to the name */
1280 const char*
1281 trx_sys_file_format_id_to_name(
1282 /*===========================*/
1283 	const ulint	id)	/*!< in: id of the file format */
1284 {
1285 	if (!(id < FILE_FORMAT_NAME_N)) {
1286 		/* unknown id */
1287 		return("Unknown");
1288 	}
1289 
1290 	return(file_format_name_map[id]);
1291 }
1292 
1293 #endif /* !UNIV_HOTBACKUP */
1294 
1295 #ifndef UNIV_HOTBACKUP
1296 /*********************************************************************
1297 Shutdown/Close the transaction system. */
1298 void
1299 trx_sys_close(void)
1300 /*===============*/
1301 {
1302 	ut_ad(trx_sys != NULL);
1303 	ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
1304 
1305 	ulint	size = trx_sys->mvcc->size();
1306 
1307 	if (size > 0) {
1308 		ib::error() << "All read views were not closed before"
1309 			" shutdown: " << size << " read views open";
1310 	}
1311 
1312 	sess_close(trx_dummy_sess);
1313 	trx_dummy_sess = NULL;
1314 
1315 	trx_purge_sys_close();
1316 
1317 	/* Free the double write data structures. */
1318 	buf_dblwr_free();
1319 
1320 	/* Only prepared transactions may be left in the system. Free them. */
1321 	ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == trx_sys->n_prepared_trx);
1322 
1323 	for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
1324 	     trx != NULL;
1325 	     trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) {
1326 
1327 		trx_free_prepared(trx);
1328 
1329 		UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
1330 	}
1331 
1332 	/* There can't be any active transactions. */
1333 	trx_rseg_t** rseg_array = static_cast<trx_rseg_t**>(
1334 		trx_sys->rseg_array);
1335 
1336 	for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
1337 		trx_rseg_t*	rseg;
1338 
1339 		rseg = trx_sys->rseg_array[i];
1340 
1341 		if (rseg != NULL) {
1342 			trx_rseg_mem_free(rseg, rseg_array);
1343 		}
1344 	}
1345 
1346 	for (ulint i = 0; i < TRX_SYS_N_RSEGS; ++i) {
1347 		trx_rseg_t*	rseg;
1348 
1349 		rseg = trx_sys->pending_purge_rseg_array[i];
1350 
1351 		if (rseg != NULL) {
1352 			trx_rseg_mem_free(rseg,
1353 				trx_sys->pending_purge_rseg_array);
1354 		}
1355 	}
1356 
1357 	UT_DELETE(trx_sys->mvcc);
1358 
1359 	ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == 0);
1360 	ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
1361 	ut_a(UT_LIST_GET_LEN(trx_sys->serialisation_list) == 0);
1362 
1363 	/* We used placement new to create this mutex. Call the destructor. */
1364 	mutex_free(&trx_sys->mutex);
1365 
1366 	trx_sys->rw_trx_ids.~trx_ids_t();
1367 
1368 	trx_sys->rw_trx_set.~TrxIdSet();
1369 
1370 	ut_free(trx_sys);
1371 
1372 	trx_sys = NULL;
1373 }
1374 
1375 /** @brief Convert an undo log to TRX_UNDO_PREPARED state on shutdown.
1376 
1377 If any prepared ACTIVE transactions exist, and their rollback was
1378 prevented by innodb_force_recovery, we convert these transactions to
1379 XA PREPARE state in the main-memory data structures, so that shutdown
1380 will proceed normally. These transactions will again recover as ACTIVE
1381 on the next restart, and they will be rolled back unless
1382 innodb_force_recovery prevents it again.
1383 
1384 @param[in]	trx	transaction
1385 @param[in,out]	undo	undo log to convert to TRX_UNDO_PREPARED */
1386 static
1387 void
1388 trx_undo_fake_prepared(
1389 	const trx_t*	trx,
1390 	trx_undo_t*	undo)
1391 {
1392 	ut_ad(srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
1393 	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
1394 	ut_ad(trx->is_recovered);
1395 
1396 	if (undo != NULL) {
1397 		ut_ad(undo->state == TRX_UNDO_ACTIVE);
1398 		undo->state = TRX_UNDO_PREPARED;
1399 	}
1400 }
1401 
1402 /*********************************************************************
1403 Check if there are any active (non-prepared) transactions.
1404 @return total number of active transactions or 0 if none */
1405 ulint
1406 trx_sys_any_active_transactions(void)
1407 /*=================================*/
1408 {
1409 	trx_sys_mutex_enter();
1410 
1411 	ulint	total_trx = UT_LIST_GET_LEN(trx_sys->mysql_trx_list);
1412 
1413 	if (total_trx == 0) {
1414 		total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
1415 		ut_a(total_trx >= trx_sys->n_prepared_trx);
1416 
1417 		if (total_trx > trx_sys->n_prepared_trx
1418 		    && srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
1419 			for (trx_t* trx = UT_LIST_GET_FIRST(
1420 				     trx_sys->rw_trx_list);
1421 			     trx != NULL;
1422 			     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
1423 				if (!trx_state_eq(trx, TRX_STATE_ACTIVE)
1424 				    || !trx->is_recovered) {
1425 					continue;
1426 				}
1427 				/* This was a recovered transaction
1428 				whose rollback was disabled by
1429 				the innodb_force_recovery setting.
1430 				Pretend that it is in XA PREPARE
1431 				state so that shutdown will work. */
1432 				trx_undo_fake_prepared(
1433 					trx, trx->rsegs.m_redo.insert_undo);
1434 				trx_undo_fake_prepared(
1435 					trx, trx->rsegs.m_redo.update_undo);
1436 				trx_undo_fake_prepared(
1437 					trx, trx->rsegs.m_noredo.insert_undo);
1438 				trx_undo_fake_prepared(
1439 					trx, trx->rsegs.m_noredo.update_undo);
1440 				trx->state = TRX_STATE_PREPARED;
1441 				trx_sys->n_prepared_trx++;
1442 				trx_sys->n_prepared_recovered_trx++;
1443 			}
1444 		}
1445 
1446 		ut_a(total_trx >= trx_sys->n_prepared_trx);
1447 		total_trx -= trx_sys->n_prepared_trx;
1448 	}
1449 
1450 	trx_sys_mutex_exit();
1451 
1452 	return(total_trx);
1453 }
1454 
1455 #ifdef UNIV_DEBUG
1456 /*************************************************************//**
1457 Validate the trx_ut_list_t.
1458 @return true if valid. */
1459 static
1460 bool
1461 trx_sys_validate_trx_list_low(
1462 /*===========================*/
1463 	trx_ut_list_t*	trx_list)	/*!< in: &trx_sys->rw_trx_list */
1464 {
1465 	const trx_t*	trx;
1466 	const trx_t*	prev_trx = NULL;
1467 
1468 	ut_ad(trx_sys_mutex_own());
1469 
1470 	ut_ad(trx_list == &trx_sys->rw_trx_list);
1471 
1472 	for (trx = UT_LIST_GET_FIRST(*trx_list);
1473 	     trx != NULL;
1474 	     prev_trx = trx, trx = UT_LIST_GET_NEXT(trx_list, prev_trx)) {
1475 
1476 		check_trx_state(trx);
1477 		ut_a(prev_trx == NULL || prev_trx->id > trx->id);
1478 	}
1479 
1480 	return(true);
1481 }
1482 
1483 /*************************************************************//**
1484 Validate the trx_sys_t::rw_trx_list.
1485 @return true if the list is valid. */
1486 bool
1487 trx_sys_validate_trx_list()
1488 /*=======================*/
1489 {
1490 	ut_ad(trx_sys_mutex_own());
1491 
1492 	ut_a(trx_sys_validate_trx_list_low(&trx_sys->rw_trx_list));
1493 
1494 	return(true);
1495 }
1496 #endif /* UNIV_DEBUG */
1497 #endif /* !UNIV_HOTBACKUP */
1498