1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file trx/trx0sys.cc
29 Transaction system
30 
31 Created 3/26/1996 Heikki Tuuri
32 *******************************************************/
33 
34 #include "trx0sys.h"
35 
36 #ifdef UNIV_NONINL
37 #include "trx0sys.ic"
38 #endif
39 
40 #ifdef UNIV_HOTBACKUP
41 #include "fsp0types.h"
42 
43 #else	/* !UNIV_HOTBACKUP */
44 #include "fsp0fsp.h"
45 #include "mtr0log.h"
46 #include "mtr0log.h"
47 #include "trx0trx.h"
48 #include "trx0rseg.h"
49 #include "trx0undo.h"
50 #include "srv0srv.h"
51 #include "srv0start.h"
52 #include "trx0purge.h"
53 #include "log0log.h"
54 #include "log0recv.h"
55 #include "os0file.h"
56 #include "read0read.h"
57 
58 #ifdef WITH_WSREP
59 #include "ha_prototypes.h" /* wsrep_is_wsrep_xid() */
60 #endif /* */
61 
62 /** The file format tag structure with id and name. */
63 struct file_format_t {
64 	ulint		id;		/*!< id of the file format */
65 	const char*	name;		/*!< text representation of the
66 					file format */
67 	ib_mutex_t		mutex;		/*!< covers changes to the above
68 					fields */
69 };
70 
71 /** The transaction system */
72 UNIV_INTERN trx_sys_t*		trx_sys		= NULL;
73 
74 /** In a MySQL replication slave, in crash recovery we store the master log
75 file name and position here. */
76 /* @{ */
77 /** Master binlog file name */
78 UNIV_INTERN char	trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
79 /** Master binlog file position.  We have successfully got the updates
80 up to this position.  -1 means that no crash recovery was needed, or
81 there was no master log position info inside InnoDB.*/
82 UNIV_INTERN ib_int64_t	trx_sys_mysql_master_log_pos	= -1;
83 /* @} */
84 
85 /** If this MySQL server uses binary logging, after InnoDB has been inited
86 and if it has done a crash recovery, we store the binlog file name and position
87 here. */
88 /* @{ */
89 /** Binlog file name */
90 UNIV_INTERN char	trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN];
91 /** Binlog file position, or -1 if unknown */
92 UNIV_INTERN ib_int64_t	trx_sys_mysql_bin_log_pos	= -1;
93 /* @} */
94 #endif /* !UNIV_HOTBACKUP */
95 
96 /** List of animal names representing file format. */
97 static const char*	file_format_name_map[] = {
98 	"Antelope",
99 	"Barracuda",
100 	"Cheetah",
101 	"Dragon",
102 	"Elk",
103 	"Fox",
104 	"Gazelle",
105 	"Hornet",
106 	"Impala",
107 	"Jaguar",
108 	"Kangaroo",
109 	"Leopard",
110 	"Moose",
111 	"Nautilus",
112 	"Ocelot",
113 	"Porpoise",
114 	"Quail",
115 	"Rabbit",
116 	"Shark",
117 	"Tiger",
118 	"Urchin",
119 	"Viper",
120 	"Whale",
121 	"Xenops",
122 	"Yak",
123 	"Zebra"
124 };
125 
126 /** The number of elements in the file format name array. */
127 static const ulint	FILE_FORMAT_NAME_N
128 	= sizeof(file_format_name_map) / sizeof(file_format_name_map[0]);
129 
130 #ifdef UNIV_PFS_MUTEX
131 /* Key to register the mutex with performance schema */
132 UNIV_INTERN mysql_pfs_key_t	file_format_max_mutex_key;
133 UNIV_INTERN mysql_pfs_key_t	trx_sys_mutex_key;
134 #endif /* UNIV_PFS_RWLOCK */
135 
136 #ifndef UNIV_HOTBACKUP
137 #ifdef UNIV_DEBUG
138 /* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
139 UNIV_INTERN uint	trx_rseg_n_slots_debug = 0;
140 #endif
141 
142 /** This is used to track the maximum file format id known to InnoDB. It's
143 updated via SET GLOBAL innodb_file_format_max = 'x' or when we open
144 or create a table. */
145 static	file_format_t	file_format_max;
146 
147 #ifdef UNIV_DEBUG
148 /****************************************************************//**
149 Checks whether a trx is in one of rw_trx_list or ro_trx_list.
150 @return	TRUE if is in */
151 UNIV_INTERN
152 ibool
trx_in_trx_list(const trx_t * in_trx)153 trx_in_trx_list(
154 /*============*/
155 	const trx_t*	in_trx)	/*!< in: transaction */
156 {
157 	const trx_t*	trx;
158 	trx_list_t*	trx_list;
159 
160 	/* Non-locking autocommits should not hold any locks. */
161 	assert_trx_in_list(in_trx);
162 
163 	trx_list = in_trx->read_only
164 		? &trx_sys->ro_trx_list : &trx_sys->rw_trx_list;
165 
166 	ut_ad(mutex_own(&trx_sys->mutex));
167 
168 	ut_ad(trx_assert_started(in_trx));
169 
170 	for (trx = UT_LIST_GET_FIRST(*trx_list);
171 	     trx != NULL && trx != in_trx;
172 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
173 
174 		assert_trx_in_list(trx);
175 		ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
176 	}
177 
178 	return(trx != NULL);
179 }
180 #endif /* UNIV_DEBUG */
181 
182 /*****************************************************************//**
183 Writes the value of max_trx_id to the file based trx system header. */
184 UNIV_INTERN
185 void
trx_sys_flush_max_trx_id(void)186 trx_sys_flush_max_trx_id(void)
187 /*==========================*/
188 {
189 	mtr_t		mtr;
190 	trx_sysf_t*	sys_header;
191 
192 #ifndef WITH_WSREP
193 	/* wsrep_fake_trx_id  violates this assert
194 	 * Copied from trx_sys_get_new_trx_id
195 	 */
196 	ut_ad(mutex_own(&trx_sys->mutex));
197 #endif /* WITH_WSREP */
198 
199 	if (!srv_read_only_mode) {
200 		mtr_start(&mtr);
201 
202 		sys_header = trx_sysf_get(&mtr);
203 
204 		mlog_write_ull(
205 			sys_header + TRX_SYS_TRX_ID_STORE,
206 			trx_sys->max_trx_id, &mtr);
207 
208 		mtr_commit(&mtr);
209 	}
210 }
211 
212 /*****************************************************************//**
213 Updates the offset information about the end of the MySQL binlog entry
214 which corresponds to the transaction just being committed. In a MySQL
215 replication slave updates the latest master binlog position up to which
216 replication has proceeded. */
217 UNIV_INTERN
218 void
trx_sys_update_mysql_binlog_offset(const char * file_name,ib_int64_t offset,ulint field,trx_sysf_t * sys_header,mtr_t * mtr)219 trx_sys_update_mysql_binlog_offset(
220 /*===============================*/
221 	const char*	file_name,/*!< in: MySQL log file name */
222 	ib_int64_t	offset,	/*!< in: position in that log file */
223 	ulint		field,	/*!< in: offset of the MySQL log info field in
224 				the trx sys header */
225 #ifdef WITH_WSREP
226         trx_sysf_t*     sys_header, /*!< in: trx sys header */
227 #endif /* WITH_WSREP */
228 	mtr_t*		mtr)	/*!< in: mtr */
229 {
230 #ifndef WITH_WSREP
231 	trx_sysf_t*	sys_header;
232 #endif /* !WITH_WSREP */
233 	if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
234 
235 		/* We cannot fit the name to the 512 bytes we have reserved */
236 
237 		return;
238 	}
239 
240 #ifndef WITH_WSREP
241 	sys_header = trx_sysf_get(mtr);
242 #endif /* !WITH_WSREP */
243 
244 	if (mach_read_from_4(sys_header + field
245 			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
246 	    != TRX_SYS_MYSQL_LOG_MAGIC_N) {
247 
248 		mlog_write_ulint(sys_header + field
249 				 + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
250 				 TRX_SYS_MYSQL_LOG_MAGIC_N,
251 				 MLOG_4BYTES, mtr);
252 	}
253 
254 	if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME),
255 			file_name)) {
256 
257 		mlog_write_string(sys_header + field
258 				  + TRX_SYS_MYSQL_LOG_NAME,
259 				  (byte*) file_name, 1 + ut_strlen(file_name),
260 				  mtr);
261 	}
262 
263 	if (mach_read_from_4(sys_header + field
264 			     + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0
265 	    || (offset >> 32) > 0) {
266 
267 		mlog_write_ulint(sys_header + field
268 				 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH,
269 				 (ulint)(offset >> 32),
270 				 MLOG_4BYTES, mtr);
271 	}
272 
273 	mlog_write_ulint(sys_header + field
274 			 + TRX_SYS_MYSQL_LOG_OFFSET_LOW,
275 			 (ulint)(offset & 0xFFFFFFFFUL),
276 			 MLOG_4BYTES, mtr);
277 }
278 
279 /*****************************************************************//**
280 Stores the MySQL binlog offset info in the trx system header if
281 the magic number shows it valid, and print the info to stderr */
282 UNIV_INTERN
283 void
trx_sys_print_mysql_binlog_offset(void)284 trx_sys_print_mysql_binlog_offset(void)
285 /*===================================*/
286 {
287 	trx_sysf_t*	sys_header;
288 	mtr_t		mtr;
289 	ulint		trx_sys_mysql_bin_log_pos_high;
290 	ulint		trx_sys_mysql_bin_log_pos_low;
291 
292 	mtr_start(&mtr);
293 
294 	sys_header = trx_sysf_get(&mtr);
295 
296 	if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
297 			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
298 	    != TRX_SYS_MYSQL_LOG_MAGIC_N) {
299 
300 		mtr_commit(&mtr);
301 
302 		return;
303 	}
304 
305 	trx_sys_mysql_bin_log_pos_high = mach_read_from_4(
306 		sys_header + TRX_SYS_MYSQL_LOG_INFO
307 		+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
308 	trx_sys_mysql_bin_log_pos_low = mach_read_from_4(
309 		sys_header + TRX_SYS_MYSQL_LOG_INFO
310 		+ TRX_SYS_MYSQL_LOG_OFFSET_LOW);
311 
312 	trx_sys_mysql_bin_log_pos
313 		= (((ib_int64_t) trx_sys_mysql_bin_log_pos_high) << 32)
314 		+ (ib_int64_t) trx_sys_mysql_bin_log_pos_low;
315 
316 	ut_memcpy(trx_sys_mysql_bin_log_name,
317 		  sys_header + TRX_SYS_MYSQL_LOG_INFO
318 		  + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN);
319 
320 	fprintf(stderr,
321 		"InnoDB: Last MySQL binlog file position %lu %lu,"
322 		" file name %s\n",
323 		trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low,
324 		trx_sys_mysql_bin_log_name);
325 
326 	mtr_commit(&mtr);
327 }
328 
329 #ifdef WITH_WSREP
330 
331 #ifdef UNIV_DEBUG
332 static long long trx_sys_cur_xid_seqno = -1;
333 static unsigned char trx_sys_cur_xid_uuid[16];
334 
read_wsrep_xid_seqno(const XID * xid)335 long long read_wsrep_xid_seqno(const XID* xid)
336 {
337     long long seqno;
338     memcpy(&seqno, xid->data + 24, sizeof(long long));
339     return seqno;
340 }
341 
read_wsrep_xid_uuid(const XID * xid,unsigned char * buf)342 void read_wsrep_xid_uuid(const XID* xid, unsigned char* buf)
343 {
344     memcpy(buf, xid->data + 8, 16);
345 }
346 
347 #endif /* UNIV_DEBUG */
348 
349 void
trx_sys_update_wsrep_checkpoint(const XID * xid,trx_sysf_t * sys_header,mtr_t * mtr)350 trx_sys_update_wsrep_checkpoint(
351         const XID*      xid,        /*!< in: transaction XID */
352         trx_sysf_t*     sys_header, /*!< in: sys_header */
353         mtr_t*          mtr)        /*!< in: mtr */
354 {
355 
356 #ifdef UNIV_DEBUG
357         {
358             /* Check that seqno is monotonically increasing */
359             unsigned char xid_uuid[16];
360             long long xid_seqno = read_wsrep_xid_seqno(xid);
361             read_wsrep_xid_uuid(xid, xid_uuid);
362             if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8))
363             {
364                 ut_ad(xid_seqno > trx_sys_cur_xid_seqno);
365                 trx_sys_cur_xid_seqno = xid_seqno;
366             }
367             else
368             {
369                 memcpy(trx_sys_cur_xid_uuid, xid_uuid, 16);
370             }
371             trx_sys_cur_xid_seqno = xid_seqno;
372         }
373 #endif /* UNIV_DEBUG */
374 
375         ut_ad(xid && mtr && sys_header);
376         ut_a(xid->formatID == -1 || wsrep_is_wsrep_xid(xid));
377 
378         if (mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
379                              + TRX_SYS_WSREP_XID_MAGIC_N_FLD)
380             != TRX_SYS_WSREP_XID_MAGIC_N) {
381                 mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
382                                  + TRX_SYS_WSREP_XID_MAGIC_N_FLD,
383                                  TRX_SYS_WSREP_XID_MAGIC_N,
384                                  MLOG_4BYTES, mtr);
385         }
386 
387         mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
388                          + TRX_SYS_WSREP_XID_FORMAT,
389                          (int)xid->formatID,
390                          MLOG_4BYTES, mtr);
391         mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
392                          + TRX_SYS_WSREP_XID_GTRID_LEN,
393                          (int)xid->gtrid_length,
394                          MLOG_4BYTES, mtr);
395         mlog_write_ulint(sys_header + TRX_SYS_WSREP_XID_INFO
396                          + TRX_SYS_WSREP_XID_BQUAL_LEN,
397                          (int)xid->bqual_length,
398                          MLOG_4BYTES, mtr);
399         mlog_write_string(sys_header + TRX_SYS_WSREP_XID_INFO
400                           + TRX_SYS_WSREP_XID_DATA,
401                           (const unsigned char*) xid->data,
402                           XIDDATASIZE, mtr);
403 
404 }
405 
406 void
trx_sys_read_wsrep_checkpoint(XID * xid)407 trx_sys_read_wsrep_checkpoint(XID* xid)
408 /*===================================*/
409 {
410         trx_sysf_t* sys_header;
411 	mtr_t	    mtr;
412         ulint       magic;
413 
414         ut_ad(xid);
415 
416 	mtr_start(&mtr);
417 
418 	sys_header = trx_sysf_get(&mtr);
419 
420         if ((magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO
421                                       + TRX_SYS_WSREP_XID_MAGIC_N_FLD))
422             != TRX_SYS_WSREP_XID_MAGIC_N) {
423                 memset(xid, 0, sizeof(*xid));
424                 xid->formatID = -1;
425                 trx_sys_update_wsrep_checkpoint(xid, sys_header, &mtr);
426                 mtr_commit(&mtr);
427                 return;
428         }
429 
430         xid->formatID     = (int)mach_read_from_4(
431                 sys_header
432                 + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT);
433         xid->gtrid_length = (int)mach_read_from_4(
434                 sys_header
435                 + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN);
436         xid->bqual_length = (int)mach_read_from_4(
437                 sys_header
438                 + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN);
439         ut_memcpy(xid->data,
440                   sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
441                   XIDDATASIZE);
442 
443 	mtr_commit(&mtr);
444 }
445 
446 #endif /* WITH_WSREP */
447 
448 /*****************************************************************//**
449 Prints to stderr the MySQL master log offset info in the trx system header if
450 the magic number shows it valid. */
451 UNIV_INTERN
452 void
trx_sys_print_mysql_master_log_pos(void)453 trx_sys_print_mysql_master_log_pos(void)
454 /*====================================*/
455 {
456 	trx_sysf_t*	sys_header;
457 	mtr_t		mtr;
458 
459 	mtr_start(&mtr);
460 
461 	sys_header = trx_sysf_get(&mtr);
462 
463 	if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
464 			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
465 	    != TRX_SYS_MYSQL_LOG_MAGIC_N) {
466 
467 		mtr_commit(&mtr);
468 
469 		return;
470 	}
471 
472 	fprintf(stderr,
473 		"InnoDB: In a MySQL replication slave the last"
474 		" master binlog file\n"
475 		"InnoDB: position %lu %lu, file name %s\n",
476 		(ulong) mach_read_from_4(sys_header
477 					 + TRX_SYS_MYSQL_MASTER_LOG_INFO
478 					 + TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
479 		(ulong) mach_read_from_4(sys_header
480 					 + TRX_SYS_MYSQL_MASTER_LOG_INFO
481 					 + TRX_SYS_MYSQL_LOG_OFFSET_LOW),
482 		sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
483 		+ TRX_SYS_MYSQL_LOG_NAME);
484 	/* Copy the master log position info to global variables we can
485 	use in ha_innobase.cc to initialize glob_mi to right values */
486 
487 	ut_memcpy(trx_sys_mysql_master_log_name,
488 		  sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
489 		  + TRX_SYS_MYSQL_LOG_NAME,
490 		  TRX_SYS_MYSQL_LOG_NAME_LEN);
491 
492 	trx_sys_mysql_master_log_pos
493 		= (((ib_int64_t) mach_read_from_4(
494 			    sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
495 			    + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32)
496 		+ ((ib_int64_t) mach_read_from_4(
497 			   sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO
498 			   + TRX_SYS_MYSQL_LOG_OFFSET_LOW));
499 	mtr_commit(&mtr);
500 }
501 
502 /****************************************************************//**
503 Looks for a free slot for a rollback segment in the trx system file copy.
504 @return	slot index or ULINT_UNDEFINED if not found */
505 UNIV_INTERN
506 ulint
trx_sysf_rseg_find_free(mtr_t * mtr)507 trx_sysf_rseg_find_free(
508 /*====================*/
509 	mtr_t*	mtr)	/*!< in: mtr */
510 {
511 	ulint		i;
512 	trx_sysf_t*	sys_header;
513 
514 	sys_header = trx_sysf_get(mtr);
515 
516 	for (i = 0; i < TRX_SYS_N_RSEGS; i++) {
517 		ulint	page_no;
518 
519 		page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr);
520 
521 		if (page_no == FIL_NULL) {
522 
523 			return(i);
524 		}
525 	}
526 
527 	return(ULINT_UNDEFINED);
528 }
529 
530 /*****************************************************************//**
531 Creates the file page for the transaction system. This function is called only
532 at the database creation, before trx_sys_init. */
533 static
534 void
trx_sysf_create(mtr_t * mtr)535 trx_sysf_create(
536 /*============*/
537 	mtr_t*	mtr)	/*!< in: mtr */
538 {
539 	trx_sysf_t*	sys_header;
540 	ulint		slot_no;
541 	buf_block_t*	block;
542 	page_t*		page;
543 	ulint		page_no;
544 	byte*		ptr;
545 	ulint		len;
546 
547 	ut_ad(mtr);
548 
549 	/* Note that below we first reserve the file space x-latch, and
550 	then enter the kernel: we must do it in this order to conform
551 	to the latching order rules. */
552 
553 	mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE, NULL), mtr);
554 
555 	/* Create the trx sys file block in a new allocated file segment */
556 	block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER,
557 			    mtr);
558 	buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
559 
560 	ut_a(buf_block_get_page_no(block) == TRX_SYS_PAGE_NO);
561 
562 	page = buf_block_get_frame(block);
563 
564 	mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS,
565 			 MLOG_2BYTES, mtr);
566 
567 	/* Reset the doublewrite buffer magic number to zero so that we
568 	know that the doublewrite buffer has not yet been created (this
569 	suppresses a Valgrind warning) */
570 
571 	mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE
572 			 + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr);
573 
574 	sys_header = trx_sysf_get(mtr);
575 
576 	/* Start counting transaction ids from number 1 up */
577 	mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1);
578 
579 	/* Reset the rollback segment slots.  Old versions of InnoDB
580 	define TRX_SYS_N_RSEGS as 256 (TRX_SYS_OLD_N_RSEGS) and expect
581 	that the whole array is initialized. */
582 	ptr = TRX_SYS_RSEGS + sys_header;
583 	len = ut_max(TRX_SYS_OLD_N_RSEGS, TRX_SYS_N_RSEGS)
584 		* TRX_SYS_RSEG_SLOT_SIZE;
585 	memset(ptr, 0xff, len);
586 	ptr += len;
587 	ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END));
588 
589 	/* Initialize all of the page.  This part used to be uninitialized. */
590 	memset(ptr, 0, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - ptr);
591 
592 	mlog_log_string(sys_header, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
593 			+ page - sys_header, mtr);
594 
595 	/* Create the first rollback segment in the SYSTEM tablespace */
596 	slot_no = trx_sysf_rseg_find_free(mtr);
597 	page_no = trx_rseg_header_create(TRX_SYS_SPACE, 0, ULINT_MAX, slot_no,
598 					 mtr);
599 
600 	ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
601 	ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO);
602 }
603 
604 /*****************************************************************//**
605 Compare two trx_rseg_t instances on last_trx_no. */
606 static
607 int
trx_rseg_compare_last_trx_no(const void * p1,const void * p2)608 trx_rseg_compare_last_trx_no(
609 /*=========================*/
610 	const void*	p1,		/*!< in: elem to compare */
611 	const void*	p2)		/*!< in: elem to compare */
612 {
613 	ib_int64_t	cmp;
614 
615 	const rseg_queue_t*	rseg_q1 = (const rseg_queue_t*) p1;
616 	const rseg_queue_t*	rseg_q2 = (const rseg_queue_t*) p2;
617 
618 	cmp = rseg_q1->trx_no - rseg_q2->trx_no;
619 
620 	if (cmp < 0) {
621 		return(-1);
622 	} else if (cmp > 0) {
623 		return(1);
624 	}
625 
626 	return(0);
627 }
628 
629 /*****************************************************************//**
630 Creates and initializes the central memory structures for the transaction
631 system. This is called when the database is started.
632 @return min binary heap of rsegs to purge */
633 UNIV_INTERN
634 ib_bh_t*
trx_sys_init_at_db_start(void)635 trx_sys_init_at_db_start(void)
636 /*==========================*/
637 {
638 	mtr_t		mtr;
639 	ib_bh_t*	ib_bh;
640 	trx_sysf_t*	sys_header;
641 	ib_uint64_t	rows_to_undo	= 0;
642 	const char*	unit		= "";
643 
644 	/* We create the min binary heap here and pass ownership to
645 	purge when we init the purge sub-system. Purge is responsible
646 	for freeing the binary heap. */
647 
648 	ib_bh = ib_bh_create(
649 		trx_rseg_compare_last_trx_no,
650 		sizeof(rseg_queue_t), TRX_SYS_N_RSEGS);
651 
652 	mtr_start(&mtr);
653 
654 	sys_header = trx_sysf_get(&mtr);
655 
656 	if (srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
657 		trx_rseg_array_init(sys_header, ib_bh, &mtr);
658 	}
659 
660 	/* VERY important: after the database is started, max_trx_id value is
661 	divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
662 	trx_sys_get_new_trx_id will evaluate to TRUE when the function
663 	is first time called, and the value for trx id will be written
664 	to the disk-based header! Thus trx id values will not overlap when
665 	the database is repeatedly started! */
666 
667 	trx_sys->max_trx_id = 2 * TRX_SYS_TRX_ID_WRITE_MARGIN
668 		+ ut_uint64_align_up(mach_read_from_8(sys_header
669 						   + TRX_SYS_TRX_ID_STORE),
670 				     TRX_SYS_TRX_ID_WRITE_MARGIN);
671 
672 	ut_d(trx_sys->rw_max_trx_id = trx_sys->max_trx_id);
673 
674 	UT_LIST_INIT(trx_sys->mysql_trx_list);
675 
676 	trx_dummy_sess = sess_open();
677 
678 	trx_lists_init_at_db_start();
679 
680 	/* This S lock is not strictly required, it is here only to satisfy
681 	the debug code (assertions). We are still running in single threaded
682 	bootstrap mode. */
683 
684 	mutex_enter(&trx_sys->mutex);
685 
686 	ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
687 
688 	if (UT_LIST_GET_LEN(trx_sys->rw_trx_list) > 0) {
689 		const trx_t*	trx;
690 
691 		for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
692 		     trx != NULL;
693 		     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
694 
695 			ut_ad(trx->is_recovered);
696 			assert_trx_in_rw_list(trx);
697 
698 			if (trx_state_eq(trx, TRX_STATE_ACTIVE)) {
699 				rows_to_undo += trx->undo_no;
700 			}
701 		}
702 
703 		if (rows_to_undo > 1000000000) {
704 			unit = "M";
705 			rows_to_undo = rows_to_undo / 1000000;
706 		}
707 
708 		fprintf(stderr,
709 			"InnoDB: %lu transaction(s) which must be"
710 			" rolled back or cleaned up\n"
711 			"InnoDB: in total %lu%s row operations to undo\n",
712 			(ulong) UT_LIST_GET_LEN(trx_sys->rw_trx_list),
713 			(ulong) rows_to_undo, unit);
714 
715 		fprintf(stderr, "InnoDB: Trx id counter is " TRX_ID_FMT "\n",
716 			trx_sys->max_trx_id);
717 	}
718 
719 	mutex_exit(&trx_sys->mutex);
720 
721 	UT_LIST_INIT(trx_sys->view_list);
722 
723 	mtr_commit(&mtr);
724 
725 	return(ib_bh);
726 }
727 
728 /*****************************************************************//**
729 Creates the trx_sys instance and initializes ib_bh and mutex. */
730 UNIV_INTERN
731 void
trx_sys_create(void)732 trx_sys_create(void)
733 /*================*/
734 {
735 	ut_ad(trx_sys == NULL);
736 
737 	trx_sys = static_cast<trx_sys_t*>(mem_zalloc(sizeof(*trx_sys)));
738 
739 	mutex_create(trx_sys_mutex_key, &trx_sys->mutex, SYNC_TRX_SYS);
740 }
741 
742 /*****************************************************************//**
743 Creates and initializes the transaction system at the database creation. */
744 UNIV_INTERN
745 void
trx_sys_create_sys_pages(void)746 trx_sys_create_sys_pages(void)
747 /*==========================*/
748 {
749 	mtr_t	mtr;
750 
751 	mtr_start(&mtr);
752 
753 	trx_sysf_create(&mtr);
754 
755 	mtr_commit(&mtr);
756 }
757 
758 /*****************************************************************//**
759 Update the file format tag.
760 @return	always TRUE */
761 static
762 ibool
trx_sys_file_format_max_write(ulint format_id,const char ** name)763 trx_sys_file_format_max_write(
764 /*==========================*/
765 	ulint		format_id,	/*!< in: file format id */
766 	const char**	name)		/*!< out: max file format name, can
767 					be NULL */
768 {
769 	mtr_t		mtr;
770 	byte*		ptr;
771 	buf_block_t*	block;
772 	ib_uint64_t	tag_value;
773 
774 	mtr_start(&mtr);
775 
776 	block = buf_page_get(
777 		TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
778 
779 	file_format_max.id = format_id;
780 	file_format_max.name = trx_sys_file_format_id_to_name(format_id);
781 
782 	ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
783 	tag_value = format_id + TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
784 
785 	if (name) {
786 		*name = file_format_max.name;
787 	}
788 
789 	mlog_write_ull(ptr, tag_value, &mtr);
790 
791 	mtr_commit(&mtr);
792 
793 	return(TRUE);
794 }
795 
796 /*****************************************************************//**
797 Read the file format tag.
798 @return	the file format or ULINT_UNDEFINED if not set. */
799 static
800 ulint
trx_sys_file_format_max_read(void)801 trx_sys_file_format_max_read(void)
802 /*==============================*/
803 {
804 	mtr_t			mtr;
805 	const byte*		ptr;
806 	const buf_block_t*	block;
807 	ib_id_t			file_format_id;
808 
809 	/* Since this is called during the startup phase it's safe to
810 	read the value without a covering mutex. */
811 	mtr_start(&mtr);
812 
813 	block = buf_page_get(
814 		TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
815 
816 	ptr = buf_block_get_frame(block) + TRX_SYS_FILE_FORMAT_TAG;
817 	file_format_id = mach_read_from_8(ptr);
818 
819 	mtr_commit(&mtr);
820 
821 	file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
822 
823 	if (file_format_id >= FILE_FORMAT_NAME_N) {
824 
825 		/* Either it has never been tagged, or garbage in it. */
826 		return(ULINT_UNDEFINED);
827 	}
828 
829 	return((ulint) file_format_id);
830 }
831 
832 /*****************************************************************//**
833 Get the name representation of the file format from its id.
834 @return	pointer to the name */
835 UNIV_INTERN
836 const char*
trx_sys_file_format_id_to_name(const ulint id)837 trx_sys_file_format_id_to_name(
838 /*===========================*/
839 	const ulint	id)	/*!< in: id of the file format */
840 {
841 	ut_a(id < FILE_FORMAT_NAME_N);
842 
843 	return(file_format_name_map[id]);
844 }
845 
846 /*****************************************************************//**
847 Check for the max file format tag stored on disk. Note: If max_format_id
848 is == UNIV_FORMAT_MAX + 1 then we only print a warning.
849 @return	DB_SUCCESS or error code */
850 UNIV_INTERN
851 dberr_t
trx_sys_file_format_max_check(ulint max_format_id)852 trx_sys_file_format_max_check(
853 /*==========================*/
854 	ulint	max_format_id)	/*!< in: max format id to check */
855 {
856 	ulint	format_id;
857 
858 	/* Check the file format in the tablespace. Do not try to
859 	recover if the file format is not supported by the engine
860 	unless forced by the user. */
861 	format_id = trx_sys_file_format_max_read();
862 	if (format_id == ULINT_UNDEFINED) {
863 		/* Format ID was not set. Set it to minimum possible
864 		value. */
865 		format_id = UNIV_FORMAT_MIN;
866 	}
867 
868 	ib_logf(IB_LOG_LEVEL_INFO,
869 		"Highest supported file format is %s.",
870 		trx_sys_file_format_id_to_name(UNIV_FORMAT_MAX));
871 
872 	if (format_id > UNIV_FORMAT_MAX) {
873 
874 		ut_a(format_id < FILE_FORMAT_NAME_N);
875 
876 		ib_logf(max_format_id <= UNIV_FORMAT_MAX
877 			? IB_LOG_LEVEL_ERROR : IB_LOG_LEVEL_WARN,
878 			"The system tablespace is in a file "
879 			"format that this version doesn't support - %s.",
880 			trx_sys_file_format_id_to_name(format_id));
881 
882 		if (max_format_id <= UNIV_FORMAT_MAX) {
883 			return(DB_ERROR);
884 		}
885 	}
886 
887 	format_id = (format_id > max_format_id) ? format_id : max_format_id;
888 
889 	/* We don't need a mutex here, as this function should only
890 	be called once at start up. */
891 	file_format_max.id = format_id;
892 	file_format_max.name = trx_sys_file_format_id_to_name(format_id);
893 
894 	return(DB_SUCCESS);
895 }
896 
897 /*****************************************************************//**
898 Set the file format id unconditionally except if it's already the
899 same value.
900 @return	TRUE if value updated */
901 UNIV_INTERN
902 ibool
trx_sys_file_format_max_set(ulint format_id,const char ** name)903 trx_sys_file_format_max_set(
904 /*========================*/
905 	ulint		format_id,	/*!< in: file format id */
906 	const char**	name)		/*!< out: max file format name or
907 					NULL if not needed. */
908 {
909 	ibool		ret = FALSE;
910 
911 	ut_a(format_id <= UNIV_FORMAT_MAX);
912 
913 	mutex_enter(&file_format_max.mutex);
914 
915 	/* Only update if not already same value. */
916 	if (format_id != file_format_max.id) {
917 
918 		ret = trx_sys_file_format_max_write(format_id, name);
919 	}
920 
921 	mutex_exit(&file_format_max.mutex);
922 
923 	return(ret);
924 }
925 
926 /********************************************************************//**
927 Tags the system table space with minimum format id if it has not been
928 tagged yet.
929 WARNING: This function is only called during the startup and AFTER the
930 redo log application during recovery has finished. */
931 UNIV_INTERN
932 void
trx_sys_file_format_tag_init(void)933 trx_sys_file_format_tag_init(void)
934 /*==============================*/
935 {
936 	ulint	format_id;
937 
938 	format_id = trx_sys_file_format_max_read();
939 
940 	/* If format_id is not set then set it to the minimum. */
941 	if (format_id == ULINT_UNDEFINED) {
942 		trx_sys_file_format_max_set(UNIV_FORMAT_MIN, NULL);
943 	}
944 }
945 
946 /********************************************************************//**
947 Update the file format tag in the system tablespace only if the given
948 format id is greater than the known max id.
949 @return	TRUE if format_id was bigger than the known max id */
950 UNIV_INTERN
951 ibool
trx_sys_file_format_max_upgrade(const char ** name,ulint format_id)952 trx_sys_file_format_max_upgrade(
953 /*============================*/
954 	const char**	name,		/*!< out: max file format name */
955 	ulint		format_id)	/*!< in: file format identifier */
956 {
957 	ibool		ret = FALSE;
958 
959 	ut_a(name);
960 	ut_a(file_format_max.name != NULL);
961 	ut_a(format_id <= UNIV_FORMAT_MAX);
962 
963 	mutex_enter(&file_format_max.mutex);
964 
965 	if (format_id > file_format_max.id) {
966 
967 		ret = trx_sys_file_format_max_write(format_id, name);
968 	}
969 
970 	mutex_exit(&file_format_max.mutex);
971 
972 	return(ret);
973 }
974 
975 /*****************************************************************//**
976 Get the name representation of the file format from its id.
977 @return	pointer to the max format name */
978 UNIV_INTERN
979 const char*
trx_sys_file_format_max_get(void)980 trx_sys_file_format_max_get(void)
981 /*=============================*/
982 {
983 	return(file_format_max.name);
984 }
985 
986 /*****************************************************************//**
987 Initializes the tablespace tag system. */
988 UNIV_INTERN
989 void
trx_sys_file_format_init(void)990 trx_sys_file_format_init(void)
991 /*==========================*/
992 {
993 	mutex_create(file_format_max_mutex_key,
994 		     &file_format_max.mutex, SYNC_FILE_FORMAT_TAG);
995 
996 	/* We don't need a mutex here, as this function should only
997 	be called once at start up. */
998 	file_format_max.id = UNIV_FORMAT_MIN;
999 
1000 	file_format_max.name = trx_sys_file_format_id_to_name(
1001 		file_format_max.id);
1002 }
1003 
1004 /*****************************************************************//**
1005 Closes the tablespace tag system. */
1006 UNIV_INTERN
1007 void
trx_sys_file_format_close(void)1008 trx_sys_file_format_close(void)
1009 /*===========================*/
1010 {
1011 	/* Does nothing at the moment */
1012 }
1013 
1014 /*********************************************************************
1015 Creates the rollback segments.
1016 @return number of rollback segments that are active. */
1017 UNIV_INTERN
1018 ulint
trx_sys_create_rsegs(ulint n_spaces,ulint n_rsegs)1019 trx_sys_create_rsegs(
1020 /*=================*/
1021 	ulint	n_spaces,	/*!< number of tablespaces for UNDO logs */
1022 	ulint	n_rsegs)	/*!< number of rollback segments to create */
1023 {
1024 	mtr_t	mtr;
1025 	ulint	n_used;
1026 
1027 	ut_a(n_spaces < TRX_SYS_N_RSEGS);
1028 	ut_a(n_rsegs <= TRX_SYS_N_RSEGS);
1029 
1030 	if (srv_read_only_mode) {
1031 		return(ULINT_UNDEFINED);
1032 	}
1033 
1034 	/* This is executed in single-threaded mode therefore it is not
1035 	necessary to use the same mtr in trx_rseg_create(). n_used cannot
1036 	change while the function is executing. */
1037 
1038 	mtr_start(&mtr);
1039 	n_used = trx_sysf_rseg_find_free(&mtr);
1040 	mtr_commit(&mtr);
1041 
1042 	if (n_used == ULINT_UNDEFINED) {
1043 		n_used = TRX_SYS_N_RSEGS;
1044 	}
1045 
1046 	/* Do not create additional rollback segments if innodb_force_recovery
1047 	has been set and the database was not shutdown cleanly. */
1048 
1049 	if (!srv_force_recovery && !recv_needed_recovery && n_used < n_rsegs) {
1050 		ulint	i;
1051 		ulint	new_rsegs = n_rsegs - n_used;
1052 
1053 		for (i = 0; i < new_rsegs; ++i) {
1054 			ulint	space_id;
1055 			space_id = (n_spaces == 0) ? 0
1056 				: (srv_undo_space_id_start + i % n_spaces);
1057 
1058 			/* Tablespace 0 is the system tablespace. */
1059 			if (trx_rseg_create(space_id) != NULL) {
1060 				++n_used;
1061 			} else {
1062 				break;
1063 			}
1064 		}
1065 	}
1066 
1067 	ib_logf(IB_LOG_LEVEL_INFO,
1068 		"%lu rollback segment(s) are active.", n_used);
1069 
1070 	return(n_used);
1071 }
1072 
1073 #else /* !UNIV_HOTBACKUP */
1074 /*****************************************************************//**
1075 Prints to stderr the MySQL binlog info in the system header if the
1076 magic number shows it valid. */
1077 UNIV_INTERN
1078 void
trx_sys_print_mysql_binlog_offset_from_page(const byte * page)1079 trx_sys_print_mysql_binlog_offset_from_page(
1080 /*========================================*/
1081 	const byte*	page)	/*!< in: buffer containing the trx
1082 				system header page, i.e., page number
1083 				TRX_SYS_PAGE_NO in the tablespace */
1084 {
1085 	const trx_sysf_t*	sys_header;
1086 
1087 	sys_header = page + TRX_SYS;
1088 
1089 	if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO
1090 			     + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD)
1091 	    == TRX_SYS_MYSQL_LOG_MAGIC_N) {
1092 
1093 		fprintf(stderr,
1094 			"mysqlbackup: Last MySQL binlog file position %lu %lu,"
1095 			" file name %s\n",
1096 			(ulong) mach_read_from_4(
1097 				sys_header + TRX_SYS_MYSQL_LOG_INFO
1098 				+ TRX_SYS_MYSQL_LOG_OFFSET_HIGH),
1099 			(ulong) mach_read_from_4(
1100 				sys_header + TRX_SYS_MYSQL_LOG_INFO
1101 				+ TRX_SYS_MYSQL_LOG_OFFSET_LOW),
1102 			sys_header + TRX_SYS_MYSQL_LOG_INFO
1103 			+ TRX_SYS_MYSQL_LOG_NAME);
1104 	}
1105 }
1106 
1107 /*****************************************************************//**
1108 Reads the file format id from the first system table space file.
1109 Even if the call succeeds and returns TRUE, the returned format id
1110 may be ULINT_UNDEFINED signalling that the format id was not present
1111 in the data file.
1112 @return TRUE if call succeeds */
1113 UNIV_INTERN
1114 ibool
trx_sys_read_file_format_id(const char * pathname,ulint * format_id)1115 trx_sys_read_file_format_id(
1116 /*========================*/
1117 	const char *pathname,  /*!< in: pathname of the first system
1118 				        table space file */
1119 	ulint *format_id)      /*!< out: file format of the system table
1120 				         space */
1121 {
1122 	os_file_t	file;
1123 	ibool		success;
1124 	byte		buf[UNIV_PAGE_SIZE * 2];
1125 	page_t*		page = ut_align(buf, UNIV_PAGE_SIZE);
1126 	const byte*	ptr;
1127 	ib_id_t		file_format_id;
1128 
1129 	*format_id = ULINT_UNDEFINED;
1130 
1131 	file = os_file_create_simple_no_error_handling(
1132 		innodb_file_data_key,
1133 		pathname,
1134 		OS_FILE_OPEN,
1135 		OS_FILE_READ_ONLY,
1136 		&success
1137 	);
1138 	if (!success) {
1139 		/* The following call prints an error message */
1140 		os_file_get_last_error(true);
1141 
1142 		ut_print_timestamp(stderr);
1143 
1144 		fprintf(stderr,
1145 			"  mysqlbackup: Error: trying to read system "
1146 			"tablespace file format,\n"
1147 			"  mysqlbackup: but could not open the tablespace "
1148 			"file %s!\n", pathname);
1149 		return(FALSE);
1150 	}
1151 
1152 	/* Read the page on which file format is stored */
1153 
1154 	success = os_file_read_no_error_handling(
1155 		file, page, TRX_SYS_PAGE_NO * UNIV_PAGE_SIZE, UNIV_PAGE_SIZE);
1156 
1157 	if (!success) {
1158 		/* The following call prints an error message */
1159 		os_file_get_last_error(true);
1160 
1161 		ut_print_timestamp(stderr);
1162 
1163 		fprintf(stderr,
1164 			"  mysqlbackup: Error: trying to read system "
1165 			"tablespace file format,\n"
1166 			"  mysqlbackup: but failed to read the tablespace "
1167 			"file %s!\n", pathname);
1168 
1169 		os_file_close(file);
1170 		return(FALSE);
1171 	}
1172 	os_file_close(file);
1173 
1174 	/* get the file format from the page */
1175 	ptr = page + TRX_SYS_FILE_FORMAT_TAG;
1176 	file_format_id = mach_read_from_8(ptr);
1177 	file_format_id -= TRX_SYS_FILE_FORMAT_TAG_MAGIC_N;
1178 
1179 	if (file_format_id >= FILE_FORMAT_NAME_N) {
1180 
1181 		/* Either it has never been tagged, or garbage in it. */
1182 		return(TRUE);
1183 	}
1184 
1185 	*format_id = (ulint) file_format_id;
1186 
1187 	return(TRUE);
1188 }
1189 
1190 /*****************************************************************//**
1191 Reads the file format id from the given per-table data file.
1192 @return TRUE if call succeeds */
1193 UNIV_INTERN
1194 ibool
trx_sys_read_pertable_file_format_id(const char * pathname,ulint * format_id)1195 trx_sys_read_pertable_file_format_id(
1196 /*=================================*/
1197 	const char *pathname,  /*!< in: pathname of a per-table
1198 				        datafile */
1199 	ulint *format_id)      /*!< out: file format of the per-table
1200 				         data file */
1201 {
1202 	os_file_t	file;
1203 	ibool		success;
1204 	byte		buf[UNIV_PAGE_SIZE * 2];
1205 	page_t*		page = ut_align(buf, UNIV_PAGE_SIZE);
1206 	const byte*	ptr;
1207 	ib_uint32_t	flags;
1208 
1209 	*format_id = ULINT_UNDEFINED;
1210 
1211 	file = os_file_create_simple_no_error_handling(
1212 		innodb_file_data_key,
1213 		pathname,
1214 		OS_FILE_OPEN,
1215 		OS_FILE_READ_ONLY,
1216 		&success
1217 	);
1218 	if (!success) {
1219 		/* The following call prints an error message */
1220 		os_file_get_last_error(true);
1221 
1222 		ut_print_timestamp(stderr);
1223 
1224 		fprintf(stderr,
1225 			"  mysqlbackup: Error: trying to read per-table "
1226 			"tablespace format,\n"
1227 			"  mysqlbackup: but could not open the tablespace "
1228 			"file %s!\n", pathname);
1229 
1230 		return(FALSE);
1231 	}
1232 
1233 	/* Read the first page of the per-table datafile */
1234 
1235 	success = os_file_read_no_error_handling(file, page, 0, UNIV_PAGE_SIZE);
1236 
1237 	if (!success) {
1238 		/* The following call prints an error message */
1239 		os_file_get_last_error(true);
1240 
1241 		ut_print_timestamp(stderr);
1242 
1243 		fprintf(stderr,
1244 			"  mysqlbackup: Error: trying to per-table data file "
1245 			"format,\n"
1246 			"  mysqlbackup: but failed to read the tablespace "
1247 			"file %s!\n", pathname);
1248 
1249 		os_file_close(file);
1250 		return(FALSE);
1251 	}
1252 	os_file_close(file);
1253 
1254 	/* get the file format from the page */
1255 	ptr = page + 54;
1256 	flags = mach_read_from_4(ptr);
1257 
1258 	if (!fsp_flags_is_valid(flags) {
1259 		/* bad tablespace flags */
1260 		return(FALSE);
1261 	}
1262 
1263 	*format_id = FSP_FLAGS_GET_POST_ANTELOPE(flags);
1264 
1265 	return(TRUE);
1266 }
1267 
1268 
1269 /*****************************************************************//**
1270 Get the name representation of the file format from its id.
1271 @return	pointer to the name */
1272 UNIV_INTERN
1273 const char*
1274 trx_sys_file_format_id_to_name(
1275 /*===========================*/
1276 	const ulint	id)	/*!< in: id of the file format */
1277 {
1278 	if (!(id < FILE_FORMAT_NAME_N)) {
1279 		/* unknown id */
1280 		return("Unknown");
1281 	}
1282 
1283 	return(file_format_name_map[id]);
1284 }
1285 
1286 #endif /* !UNIV_HOTBACKUP */
1287 
1288 #ifndef UNIV_HOTBACKUP
1289 /*********************************************************************
1290 Shutdown/Close the transaction system. */
1291 UNIV_INTERN
1292 void
1293 trx_sys_close(void)
1294 /*===============*/
1295 {
1296 	ulint		i;
1297 	trx_t*		trx;
1298 	read_view_t*	view;
1299 
1300 	ut_ad(trx_sys != NULL);
1301 	ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
1302 
1303 	/* Check that all read views are closed except read view owned
1304 	by a purge. */
1305 
1306 	mutex_enter(&trx_sys->mutex);
1307 
1308 	if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) {
1309 		fprintf(stderr,
1310 			"InnoDB: Error: all read views were not closed"
1311 			" before shutdown:\n"
1312 			"InnoDB: %lu read views open \n",
1313 			UT_LIST_GET_LEN(trx_sys->view_list) - 1);
1314 	}
1315 
1316 	mutex_exit(&trx_sys->mutex);
1317 
1318 	sess_close(trx_dummy_sess);
1319 	trx_dummy_sess = NULL;
1320 
1321 	trx_purge_sys_close();
1322 
1323 	/* Free the double write data structures. */
1324 	buf_dblwr_free();
1325 
1326 	ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
1327 
1328 	/* Only prepared transactions may be left in the system. Free them. */
1329 	ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == trx_sys->n_prepared_trx);
1330 
1331 	while ((trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) != NULL) {
1332 		trx_free_prepared(trx);
1333 	}
1334 
1335 	/* There can't be any active transactions. */
1336 	for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
1337 		trx_rseg_t*	rseg;
1338 
1339 		rseg = trx_sys->rseg_array[i];
1340 
1341 		if (rseg != NULL) {
1342 			trx_rseg_mem_free(rseg);
1343 		} else {
1344 			break;
1345 		}
1346 	}
1347 
1348 	view = UT_LIST_GET_FIRST(trx_sys->view_list);
1349 
1350 	while (view != NULL) {
1351 		read_view_t*	prev_view = view;
1352 
1353 		view = UT_LIST_GET_NEXT(view_list, prev_view);
1354 
1355 		/* Views are allocated from the trx_sys->global_read_view_heap.
1356 		So, we simply remove the element here. */
1357 		UT_LIST_REMOVE(view_list, trx_sys->view_list, prev_view);
1358 	}
1359 
1360 	ut_a(UT_LIST_GET_LEN(trx_sys->view_list) == 0);
1361 	ut_a(UT_LIST_GET_LEN(trx_sys->ro_trx_list) == 0);
1362 	ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == 0);
1363 	ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
1364 
1365 	mutex_free(&trx_sys->mutex);
1366 
1367 	mem_free(trx_sys);
1368 
1369 	trx_sys = NULL;
1370 }
1371 
1372 /*********************************************************************
1373 Check if there are any active (non-prepared) transactions.
1374 @return total number of active transactions or 0 if none */
1375 UNIV_INTERN
1376 ulint
1377 trx_sys_any_active_transactions(void)
1378 /*=================================*/
1379 {
1380 	ulint	total_trx = 0;
1381 
1382 	mutex_enter(&trx_sys->mutex);
1383 
1384 	total_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list)
1385 		  + UT_LIST_GET_LEN(trx_sys->mysql_trx_list);
1386 
1387 	ut_a(total_trx >= trx_sys->n_prepared_trx);
1388 	total_trx -= trx_sys->n_prepared_trx;
1389 
1390 	mutex_exit(&trx_sys->mutex);
1391 
1392 	return(total_trx);
1393 }
1394 
1395 #ifdef UNIV_DEBUG
1396 /*************************************************************//**
1397 Validate the trx_list_t.
1398 @return TRUE if valid. */
1399 static
1400 ibool
1401 trx_sys_validate_trx_list_low(
1402 /*===========================*/
1403 	trx_list_t*	trx_list)	/*!< in: &trx_sys->ro_trx_list
1404 					or &trx_sys->rw_trx_list */
1405 {
1406 	const trx_t*	trx;
1407 	const trx_t*	prev_trx = NULL;
1408 
1409 	ut_ad(mutex_own(&trx_sys->mutex));
1410 
1411 	ut_ad(trx_list == &trx_sys->ro_trx_list
1412 	      || trx_list == &trx_sys->rw_trx_list);
1413 
1414 	for (trx = UT_LIST_GET_FIRST(*trx_list);
1415 	     trx != NULL;
1416 	     prev_trx = trx, trx = UT_LIST_GET_NEXT(trx_list, prev_trx)) {
1417 
1418 		assert_trx_in_list(trx);
1419 		ut_ad(trx->read_only == (trx_list == &trx_sys->ro_trx_list));
1420 
1421 		ut_a(prev_trx == NULL || prev_trx->id > trx->id);
1422 	}
1423 
1424 	return(TRUE);
1425 }
1426 
1427 /*************************************************************//**
1428 Validate the trx_sys_t::ro_trx_list and trx_sys_t::rw_trx_list.
1429 @return TRUE if lists are valid. */
1430 UNIV_INTERN
1431 ibool
1432 trx_sys_validate_trx_list(void)
1433 /*===========================*/
1434 {
1435 	ut_ad(mutex_own(&trx_sys->mutex));
1436 
1437 	ut_a(trx_sys_validate_trx_list_low(&trx_sys->ro_trx_list));
1438 	ut_a(trx_sys_validate_trx_list_low(&trx_sys->rw_trx_list));
1439 
1440 	return(TRUE);
1441 }
1442 #endif /* UNIV_DEBUG */
1443 #endif /* !UNIV_HOTBACKUP */
1444