1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file trx/trx0sys.cc
28  Transaction system
29 
30  Created 3/26/1996 Heikki Tuuri
31  *******************************************************/
32 
33 #include <sys/types.h>
34 #include <new>
35 
36 #include "current_thd.h"
37 #include "ha_prototypes.h"
38 #include "mysql/plugin.h"
39 #include "sql_error.h"
40 #include "trx0sys.h"
41 
42 #ifndef UNIV_HOTBACKUP
43 #include "fsp0fsp.h"
44 #include "fsp0sysspace.h"
45 #include "log0log.h"
46 #include "log0recv.h"
47 #include "mtr0log.h"
48 #include "os0file.h"
49 #include "read0read.h"
50 #include "srv0srv.h"
51 #include "srv0start.h"
52 #include "trx0purge.h"
53 #include "trx0rseg.h"
54 #include "trx0trx.h"
55 #include "trx0undo.h"
56 
57 /** The transaction system */
58 trx_sys_t *trx_sys = nullptr;
59 
60 /** Check whether transaction id is valid.
61 @param[in]	id	transaction id to check
62 @param[in]	name	table name */
check_trx_id_sanity(trx_id_t id,const table_name_t & name)63 void ReadView::check_trx_id_sanity(trx_id_t id, const table_name_t &name) {
64   if (&name == &dict_sys->dynamic_metadata->name) {
65     /* The table mysql.innodb_dynamic_metadata uses a
66     constant DB_TRX_ID=~0. */
67     ut_ad(id == (1ULL << 48) - 1);
68     return;
69   }
70 
71   if (id >= trx_sys->max_trx_id) {
72     ib::warn(ER_IB_MSG_1196)
73         << "A transaction id"
74         << " in a record of table " << name << " is newer than the"
75         << " system-wide maximum.";
76     ut_ad(0);
77     THD *thd = current_thd;
78     if (thd != nullptr) {
79       char table_name[MAX_FULL_NAME_LEN + 1];
80 
81       innobase_format_name(table_name, sizeof(table_name), name.m_name);
82 
83       push_warning_printf(thd, Sql_condition::SL_WARNING, ER_SIGNAL_WARN,
84                           "InnoDB: Transaction id"
85                           " in a record of table"
86                           " %s is newer than system-wide"
87                           " maximum.",
88                           table_name);
89     }
90   }
91 }
92 
93 #ifdef UNIV_DEBUG
94 /* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
95 uint trx_rseg_n_slots_debug = 0;
96 #endif /* UNIV_DEBUG */
97 
98 /** Writes the value of max_trx_id to the file based trx system header. */
trx_sys_flush_max_trx_id(void)99 void trx_sys_flush_max_trx_id(void) {
100   mtr_t mtr;
101   trx_sysf_t *sys_header;
102 
103   ut_ad(trx_sys_mutex_own());
104 
105   if (!srv_read_only_mode) {
106     mtr_start(&mtr);
107 
108     sys_header = trx_sysf_get(&mtr);
109 
110     mlog_write_ull(sys_header + TRX_SYS_TRX_ID_STORE, trx_sys->max_trx_id,
111                    &mtr);
112 
113     mtr_commit(&mtr);
114   }
115 }
116 
trx_sys_persist_gtid_num(trx_id_t gtid_trx_no)117 void trx_sys_persist_gtid_num(trx_id_t gtid_trx_no) {
118   mtr_t mtr;
119   mtr.start();
120   auto sys_header = trx_sysf_get(&mtr);
121   auto page = sys_header - TRX_SYS;
122   /* Update GTID transaction number. All transactions with lower
123   transaction number are no longer processed for GTID. */
124   mlog_write_ull(page + TRX_SYS_TRX_NUM_GTID, gtid_trx_no, &mtr);
125   mtr.commit();
126 }
127 
trx_sys_oldest_trx_no()128 trx_id_t trx_sys_oldest_trx_no() {
129   ut_ad(trx_sys_mutex_own());
130   /* Get the oldest transaction from serialisation list. */
131   if (UT_LIST_GET_LEN(trx_sys->serialisation_list) > 0) {
132     auto trx = UT_LIST_GET_FIRST(trx_sys->serialisation_list);
133     return (trx->no);
134   }
135   return (trx_sys->max_trx_id);
136 }
137 
trx_sys_get_binlog_prepared(std::vector<trx_id_t> & trx_ids)138 void trx_sys_get_binlog_prepared(std::vector<trx_id_t> &trx_ids) {
139   trx_sys_mutex_enter();
140   /* Exit fast if no prepared transaction. */
141   if (trx_sys->n_prepared_trx == 0) {
142     trx_sys_mutex_exit();
143     return;
144   }
145   /* Check and find binary log prepared transaction. */
146   for (auto trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); trx != nullptr;
147        trx = UT_LIST_GET_NEXT(trx_list, trx)) {
148     assert_trx_in_rw_list(trx);
149     if (trx_state_eq(trx, TRX_STATE_PREPARED) && trx_is_mysql_xa(trx)) {
150       trx_ids.push_back(trx->id);
151     }
152   }
153   trx_sys_mutex_exit();
154 }
155 
156 /** Read binary log positions from buffer passed.
157 @param[in]	binlog_buf	binary log buffer from trx sys page
158 @param[out]	file_name	binary log file name
159 @param[out]	high		offset part high order bytes
160 @param[out]	low		offset part low order bytes
161 @return	true, if buffer has valid binary log position. */
read_binlog_position(const byte * binlog_buf,const char * & file_name,uint32_t & high,uint32_t & low)162 static bool read_binlog_position(const byte *binlog_buf, const char *&file_name,
163                                  uint32_t &high, uint32_t &low) {
164   /* Initialize out parameters. */
165   file_name = nullptr;
166   high = low = 0;
167 
168   /* Check if binary log position is stored. */
169   if (mach_read_from_4(binlog_buf + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD) !=
170       TRX_SYS_MYSQL_LOG_MAGIC_N) {
171     return (true);
172   }
173 
174   /* Read binary log file name. */
175   file_name =
176       reinterpret_cast<const char *>(binlog_buf + TRX_SYS_MYSQL_LOG_NAME);
177 
178   /* read log file offset. */
179   high = mach_read_from_4(binlog_buf + TRX_SYS_MYSQL_LOG_OFFSET_HIGH);
180   low = mach_read_from_4(binlog_buf + TRX_SYS_MYSQL_LOG_OFFSET_LOW);
181 
182   return (false);
183 }
184 
185 /** Write binary log position into passed buffer.
186 @param[in]	file_name	binary log file name
187 @param[in]	offset		binary log offset
188 @param[out]	binlog_buf	buffer from trx sys page to write to
189 @param[in,out]	mtr		mini transaction */
write_binlog_position(const char * file_name,uint64_t offset,byte * binlog_buf,mtr_t * mtr)190 static void write_binlog_position(const char *file_name, uint64_t offset,
191                                   byte *binlog_buf, mtr_t *mtr) {
192   if (file_name == nullptr ||
193       ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) {
194     /* We cannot fit the name to the 512 bytes we have reserved */
195     return;
196   }
197   const char *current_name = nullptr;
198   uint32_t high = 0;
199   uint32_t low = 0;
200 
201   auto empty = read_binlog_position(binlog_buf, current_name, high, low);
202 
203   if (empty) {
204     mlog_write_ulint(binlog_buf + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD,
205                      TRX_SYS_MYSQL_LOG_MAGIC_N, MLOG_4BYTES, mtr);
206   }
207 
208   if (empty || 0 != strcmp(current_name, file_name)) {
209     mlog_write_string(binlog_buf + TRX_SYS_MYSQL_LOG_NAME, (byte *)file_name,
210                       1 + ut_strlen(file_name), mtr);
211   }
212   auto in_high = static_cast<ulint>(offset >> 32);
213   auto in_low = static_cast<ulint>(offset & 0xFFFFFFFFUL);
214 
215   if (empty || high != in_high) {
216     mlog_write_ulint(binlog_buf + TRX_SYS_MYSQL_LOG_OFFSET_HIGH, in_high,
217                      MLOG_4BYTES, mtr);
218   }
219   mlog_write_ulint(binlog_buf + TRX_SYS_MYSQL_LOG_OFFSET_LOW, in_low,
220                    MLOG_4BYTES, mtr);
221 }
222 
trx_sys_read_binlog_position(char * file,uint64_t & offset)223 void trx_sys_read_binlog_position(char *file, uint64_t &offset) {
224   const char *current_name = nullptr;
225   uint32_t high = 0;
226   uint32_t low = 0;
227 
228   mtr_t mtr;
229   mtr_start(&mtr);
230 
231   byte *binlog_pos = trx_sysf_get(&mtr) + TRX_SYS_MYSQL_LOG_INFO;
232   auto empty = read_binlog_position(binlog_pos, current_name, high, low);
233 
234   if (empty) {
235     file[0] = '\0';
236     offset = 0;
237     mtr_commit(&mtr);
238     return;
239   }
240 
241   strncpy(file, current_name, TRX_SYS_MYSQL_LOG_NAME_LEN);
242   offset = static_cast<uint64_t>(high);
243   offset = (offset << 32);
244   offset |= static_cast<uint64_t>(low);
245 
246   mtr_commit(&mtr);
247 }
248 
249 /** Check if binary log position is changed.
250 @param[in]	file_name	previous binary log file name
251 @param[in]	offset		previous binary log file offset
252 @param[out]	binlog_buf	buffer from trx sys page to write to
253 @return true, iff binary log position is modified from previous position. */
binlog_position_changed(const char * file_name,uint64_t offset,byte * binlog_buf)254 static bool binlog_position_changed(const char *file_name, uint64_t offset,
255                                     byte *binlog_buf) {
256   const char *cur_name = nullptr;
257   uint32_t high = 0;
258   uint32_t low = 0;
259   bool empty = read_binlog_position(binlog_buf, cur_name, high, low);
260 
261   if (empty) {
262     return (false);
263   }
264 
265   if (0 != strcmp(cur_name, file_name)) {
266     return (true);
267   }
268 
269   auto cur_offset = static_cast<uint64_t>(high);
270   cur_offset = (cur_offset << 32);
271   cur_offset |= static_cast<uint64_t>(low);
272   return (offset != cur_offset);
273 }
274 
trx_sys_write_binlog_position(const char * last_file,uint64_t last_offset,const char * file,uint64_t offset)275 bool trx_sys_write_binlog_position(const char *last_file, uint64_t last_offset,
276                                    const char *file, uint64_t offset) {
277   mtr_t mtr;
278   mtr_start(&mtr);
279   byte *binlog_pos = trx_sysf_get(&mtr) + TRX_SYS_MYSQL_LOG_INFO;
280 
281   /* Return If position is already updated. */
282   if (binlog_position_changed(last_file, last_offset, binlog_pos)) {
283     mtr_commit(&mtr);
284     return (false);
285   }
286   write_binlog_position(file, offset, binlog_pos, &mtr);
287   mtr_commit(&mtr);
288   return (true);
289 }
290 
trx_sys_update_mysql_binlog_offset(trx_t * trx,mtr_t * mtr)291 void trx_sys_update_mysql_binlog_offset(trx_t *trx, mtr_t *mtr) {
292   trx_sys_update_binlog_position(trx);
293 
294   const char *file_name = trx->mysql_log_file_name;
295   uint64_t offset = trx->mysql_log_offset;
296 
297   /* Reset log file name in transaction. */
298   trx->mysql_log_file_name = nullptr;
299 
300   byte *binlog_pos = trx_sysf_get(mtr) + TRX_SYS_MYSQL_LOG_INFO;
301 
302   if (file_name == nullptr || file_name[0] == '\0') {
303     /* Don't write blank name in binary log file position. */
304     return;
305   }
306   write_binlog_position(file_name, offset, binlog_pos, mtr);
307 }
308 
309 /** Find the page number in the TRX_SYS page for a given slot/rseg_id
310 @param[in]	rseg_id		slot number in the TRX_SYS page rseg array
311 @return page number from the TRX_SYS page rseg array */
trx_sysf_rseg_find_page_no(ulint rseg_id)312 page_no_t trx_sysf_rseg_find_page_no(ulint rseg_id) {
313   page_no_t page_no;
314   mtr_t mtr;
315   mtr.start();
316 
317   trx_sysf_t *sys_header = trx_sysf_get(&mtr);
318 
319   page_no = trx_sysf_rseg_get_page_no(sys_header, rseg_id, &mtr);
320 
321   mtr.commit();
322 
323   return (page_no);
324 }
325 
326 /*****************************************************************/ /**
327  Read WSREP XID information from the trx system header if the magic value
328  shows it is valid. This code has been copied from MySQL patches by Codership
329  with some modifications.
330  @return true if the magic value is valid. Otherwise
331  return false and leave 'xid' unchanged. */
trx_sys_read_wsrep_checkpoint(XID * xid)332 bool trx_sys_read_wsrep_checkpoint(XID *xid)
333 /*===================================*/
334 {
335   trx_sysf_t *sys_header;
336   mtr_t mtr;
337   ulint magic;
338 
339   ut_ad(xid);
340 
341   mtr_start(&mtr);
342 
343   sys_header = trx_sysf_get(&mtr);
344   magic = mach_read_from_4(sys_header + TRX_SYS_WSREP_XID_INFO +
345                            TRX_SYS_WSREP_XID_MAGIC_N_FLD);
346 
347   if (magic != TRX_SYS_WSREP_XID_MAGIC_N) {
348     mtr_commit(&mtr);
349     return (false);
350   }
351 
352   xid->set_format_id((long)mach_read_from_4(
353       sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_FORMAT));
354   xid->set_gtrid_length((long)mach_read_from_4(
355       sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_GTRID_LEN));
356   xid->set_bqual_length((long)mach_read_from_4(
357       sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_BQUAL_LEN));
358   xid->set_data(sys_header + TRX_SYS_WSREP_XID_INFO + TRX_SYS_WSREP_XID_DATA,
359                 XIDDATASIZE);
360 
361   mtr_commit(&mtr);
362 
363   return (true);
364 }
365 
366 /** Look for a free slot for a rollback segment in the trx system file copy.
367 @param[in,out]	mtr		mtr
368 @return slot index or ULINT_UNDEFINED if not found */
trx_sysf_rseg_find_free(mtr_t * mtr)369 ulint trx_sysf_rseg_find_free(mtr_t *mtr) {
370   trx_sysf_t *sys_header = trx_sysf_get(mtr);
371 
372   for (ulint slot_no = 0; slot_no < TRX_SYS_N_RSEGS; slot_no++) {
373     page_no_t page_no = trx_sysf_rseg_get_page_no(sys_header, slot_no, mtr);
374 
375     if (page_no == FIL_NULL) {
376       return (slot_no);
377     }
378   }
379 
380   return (ULINT_UNDEFINED);
381 }
382 
383 /** Creates the file page for the transaction system. This function is called
384  only at the database creation, before trx_sys_init. */
trx_sysf_create(mtr_t * mtr)385 static void trx_sysf_create(mtr_t *mtr) /*!< in: mtr */
386 {
387   trx_sysf_t *sys_header;
388   ulint slot_no;
389   buf_block_t *block;
390   page_t *page;
391   ulint page_no;
392   byte *ptr;
393   ulint len;
394 
395   ut_ad(mtr);
396 
397   /* Note that below we first reserve the file space x-latch, and
398   then enter the kernel: we must do it in this order to conform
399   to the latching order rules. */
400 
401   mtr_x_lock_space(fil_space_get_sys_space(), mtr);
402 
403   /* Create the trx sys file block in a new allocated file segment */
404   block = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER, mtr);
405   buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
406 
407   ut_a(block->page.id.page_no() == TRX_SYS_PAGE_NO);
408 
409   page = buf_block_get_frame(block);
410 
411   mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS, MLOG_2BYTES,
412                    mtr);
413 
414   /* Reset the doublewrite buffer magic number to zero so that we
415   know that the doublewrite buffer has not yet been created (this
416   suppresses a Valgrind warning) */
417 
418   mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_MAGIC, 0,
419                    MLOG_4BYTES, mtr);
420 
421   sys_header = trx_sysf_get(mtr);
422 
423   /* Start counting transaction ids from number 1 up */
424   mach_write_to_8(sys_header + TRX_SYS_TRX_ID_STORE, 1);
425 
426   /* Reset the rollback segment slots.  Old versions of InnoDB
427   define TRX_SYS_N_RSEGS as 256 (TRX_SYS_OLD_N_RSEGS) and expect
428   that the whole array is initialized. */
429   ptr = TRX_SYS_RSEGS + sys_header;
430   len = ut_max(TRX_SYS_OLD_N_RSEGS, TRX_SYS_N_RSEGS) * TRX_SYS_RSEG_SLOT_SIZE;
431   memset(ptr, 0xff, len);
432   ptr += len;
433   ut_a(ptr <= page + (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END));
434 
435   /* Initialize all of the page.  This part used to be uninitialized. */
436   memset(ptr, 0, UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - ptr);
437 
438   mlog_log_string(sys_header,
439                   UNIV_PAGE_SIZE - FIL_PAGE_DATA_END + page - sys_header, mtr);
440 
441   /* Create the first rollback segment in the SYSTEM tablespace */
442   slot_no = trx_sysf_rseg_find_free(mtr);
443   page_no = trx_rseg_header_create(TRX_SYS_SPACE, univ_page_size, PAGE_NO_MAX,
444                                    slot_no, mtr);
445 
446   ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID);
447   ut_a(page_no == FSP_FIRST_RSEG_PAGE_NO);
448 }
449 
450 /** Creates and initializes the central memory structures for the transaction
451  system. This is called when the database is started.
452  @return min binary heap of rsegs to purge */
trx_sys_init_at_db_start(void)453 purge_pq_t *trx_sys_init_at_db_start(void) {
454   purge_pq_t *purge_queue;
455   trx_sysf_t *sys_header;
456   ib_uint64_t rows_to_undo = 0;
457   const char *unit = "";
458 
459   /* We create the min binary heap here and pass ownership to
460   purge when we init the purge sub-system. Purge is responsible
461   for freeing the binary heap. */
462   purge_queue = UT_NEW_NOKEY(purge_pq_t());
463   ut_a(purge_queue != nullptr);
464 
465   if (srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
466     /* Create the memory objects for all the rollback segments
467     referred to in the TRX_SYS page or any undo tablespace
468     RSEG_ARRAY page. */
469     trx_rsegs_init(purge_queue);
470   }
471 
472   /* VERY important: after the database is started, max_trx_id value is
473   divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
474   trx_sys_get_new_trx_id will evaluate to TRUE when the function
475   is first time called, and the value for trx id will be written
476   to the disk-based header! Thus trx id values will not overlap when
477   the database is repeatedly started! */
478 
479   mtr_t mtr;
480   mtr.start();
481 
482   sys_header = trx_sysf_get(&mtr);
483 
484   trx_sys->max_trx_id =
485       2 * TRX_SYS_TRX_ID_WRITE_MARGIN +
486       ut_uint64_align_up(mach_read_from_8(sys_header + TRX_SYS_TRX_ID_STORE),
487                          TRX_SYS_TRX_ID_WRITE_MARGIN);
488 
489   mtr.commit();
490 
491 #ifdef UNIV_DEBUG
492   /* max_trx_id is the next transaction ID to assign. Initialize maximum
493   transaction number to one less if all transactions are already purged. */
494   if (trx_sys->rw_max_trx_no == 0) {
495     trx_sys->rw_max_trx_no = trx_sys->max_trx_id - 1;
496   }
497 #endif /* UNIV_DEBUG */
498 
499   trx_dummy_sess = sess_open();
500 
501   trx_lists_init_at_db_start();
502 
503   /* This mutex is not strictly required, it is here only to satisfy
504   the debug code (assertions). We are still running in single threaded
505   bootstrap mode. */
506 
507   trx_sys_mutex_enter();
508 
509   if (UT_LIST_GET_LEN(trx_sys->rw_trx_list) > 0) {
510     const trx_t *trx;
511 
512     for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); trx != nullptr;
513          trx = UT_LIST_GET_NEXT(trx_list, trx)) {
514       ut_ad(trx->is_recovered);
515       assert_trx_in_rw_list(trx);
516 
517       if (trx_state_eq(trx, TRX_STATE_ACTIVE)) {
518         rows_to_undo += trx->undo_no;
519       }
520     }
521 
522     if (rows_to_undo > 1000000000) {
523       unit = "M";
524       rows_to_undo = rows_to_undo / 1000000;
525     }
526 
527     ib::info(ER_IB_MSG_1198)
528         << UT_LIST_GET_LEN(trx_sys->rw_trx_list)
529         << " transaction(s) which must be rolled back or"
530            " cleaned up in total "
531         << rows_to_undo << unit << " row operations to undo";
532 
533     ib::info(ER_IB_MSG_1199) << "Trx id counter is " << trx_sys->max_trx_id;
534   }
535 
536   trx_sys->found_prepared_trx = trx_sys->n_prepared_trx > 0;
537 
538   trx_sys_mutex_exit();
539 
540   return (purge_queue);
541 }
542 
543 /** Creates the trx_sys instance and initializes purge_queue and mutex. */
trx_sys_create(void)544 void trx_sys_create(void) {
545   ut_ad(trx_sys == nullptr);
546 
547   trx_sys = static_cast<trx_sys_t *>(ut_zalloc_nokey(sizeof(*trx_sys)));
548 
549   mutex_create(LATCH_ID_TRX_SYS, &trx_sys->mutex);
550 
551   UT_LIST_INIT(trx_sys->serialisation_list, &trx_t::no_list);
552   UT_LIST_INIT(trx_sys->rw_trx_list, &trx_t::trx_list);
553   UT_LIST_INIT(trx_sys->mysql_trx_list, &trx_t::mysql_trx_list);
554 
555   trx_sys->mvcc = UT_NEW_NOKEY(MVCC(1024));
556 
557   trx_sys->min_active_id = 0;
558 
559   ut_d(trx_sys->rw_max_trx_no = 0);
560 
561   new (&trx_sys->rw_trx_ids)
562       trx_ids_t(ut_allocator<trx_id_t>(mem_key_trx_sys_t_rw_trx_ids));
563 
564   new (&trx_sys->rw_trx_set) TrxIdSet();
565 
566   new (&trx_sys->rsegs) Rsegs();
567   trx_sys->rsegs.set_empty();
568 
569   new (&trx_sys->tmp_rsegs) Rsegs();
570   trx_sys->tmp_rsegs.set_empty();
571 }
572 
573 /** Creates and initializes the transaction system at the database creation. */
trx_sys_create_sys_pages(void)574 void trx_sys_create_sys_pages(void) {
575   mtr_t mtr;
576 
577   mtr_start(&mtr);
578 
579   trx_sysf_create(&mtr);
580 
581   mtr_commit(&mtr);
582 }
583 
584 /*********************************************************************
585 Shutdown/Close the transaction system. */
trx_sys_close(void)586 void trx_sys_close(void) {
587   ut_ad(srv_shutdown_state.load() == SRV_SHUTDOWN_EXIT_THREADS);
588 
589   if (trx_sys == nullptr) {
590     return;
591   }
592 
593   ulint size = trx_sys->mvcc->size();
594 
595   if (size > 0) {
596     ib::error(ER_IB_MSG_1201) << "All read views were not closed before"
597                                  " shutdown: "
598                               << size << " read views open";
599   }
600 
601   sess_close(trx_dummy_sess);
602   trx_dummy_sess = nullptr;
603 
604   trx_purge_sys_close();
605 
606   /* Only prepared or active-recovered transactions may be left in the system.
607   The active-recovered transactions are allowed only if we did not force to
608   rollback them during shutdown (which might happen if e.g. it is fast
609   shutdown). Free all of them. */
610   trx_sys_after_background_threads_shutdown_validate();
611 
612   for (trx_t *trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); trx != nullptr;
613        trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list)) {
614     trx_free_prepared_or_active_recovered(trx);
615   }
616 
617   /* There can't be any active transactions. */
618   trx_sys->rsegs.~Rsegs();
619 
620   trx_sys->tmp_rsegs.~Rsegs();
621 
622   UT_DELETE(trx_sys->mvcc);
623 
624   ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) == 0);
625   ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
626   ut_a(UT_LIST_GET_LEN(trx_sys->serialisation_list) == 0);
627 
628   /* We used placement new to create this mutex. Call the destructor. */
629   mutex_free(&trx_sys->mutex);
630 
631   trx_sys->rw_trx_ids.~trx_ids_t();
632 
633   trx_sys->rw_trx_set.~TrxIdSet();
634 
635   ut_free(trx_sys);
636 
637   trx_sys = nullptr;
638 }
639 
trx_sys_before_pre_dd_shutdown_validate()640 void trx_sys_before_pre_dd_shutdown_validate() {
641   /** All connections are closed and close_connection unregisters
642   associated trx from mysql_trx_list. We still might have some non
643   started transactions in mysql_trx_list. */
644   trx_sys_mutex_enter();
645   for (trx_t *trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); trx != nullptr;
646        trx = UT_LIST_GET_NEXT(mysql_trx_list, trx)) {
647     ut_a(trx->state == TRX_STATE_NOT_STARTED);
648   }
649   trx_sys_mutex_exit();
650 }
651 
trx_sys_after_pre_dd_shutdown_validate()652 void trx_sys_after_pre_dd_shutdown_validate() {
653   trx_sys_before_pre_dd_shutdown_validate();
654 
655   /** Additionally, the only left transactions are those that have
656   state == TRX_STATE_PREPARED, unless we didn't expect to rollback
657   all recovered transactions (e.g. fast shutdown) in which case we
658   could also have some transactions with is_recovered == true and
659   state == TRX_STATE_ACTIVE. */
660 
661   const auto active_recovered_trxs = trx_sys_recovered_active_trxs_count();
662   if (srv_shutdown_waits_for_rollback_of_recovered_transactions()) {
663     ut_a(active_recovered_trxs == 0);
664   }
665 
666   trx_sys_mutex_enter();
667   ut_a(UT_LIST_GET_LEN(trx_sys->rw_trx_list) ==
668        trx_sys->n_prepared_trx + active_recovered_trxs);
669   trx_sys_mutex_exit();
670 }
671 
trx_sys_after_background_threads_shutdown_validate()672 void trx_sys_after_background_threads_shutdown_validate() {
673   trx_sys_after_pre_dd_shutdown_validate();
674 
675   trx_sys_mutex_enter();
676   ut_a(UT_LIST_GET_LEN(trx_sys->mysql_trx_list) == 0);
677   trx_sys_mutex_exit();
678 }
679 
trx_sys_recovered_active_trxs_count()680 size_t trx_sys_recovered_active_trxs_count() {
681   size_t total_trx = 0;
682   trx_sys_mutex_enter();
683   /* Recovered transactions are never citizens of mysql_trx_list,
684   so it's enough to check rw_trx_list. */
685   for (trx_t *trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); trx != nullptr;
686        trx = UT_LIST_GET_NEXT(trx_list, trx)) {
687     if (trx_state_eq(trx, TRX_STATE_ACTIVE) && trx->is_recovered) {
688       total_trx++;
689     }
690   }
691   trx_sys_mutex_exit();
692   return (total_trx);
693 }
694 
695 #ifdef UNIV_DEBUG
696 /** Validate the trx_ut_list_t.
697  @return true if valid. */
trx_sys_validate_trx_list_low(trx_ut_list_t * trx_list)698 static bool trx_sys_validate_trx_list_low(
699     trx_ut_list_t *trx_list) /*!< in: &trx_sys->rw_trx_list */
700 {
701   const trx_t *trx;
702   const trx_t *prev_trx = nullptr;
703 
704   ut_ad(trx_sys_mutex_own());
705 
706   ut_ad(trx_list == &trx_sys->rw_trx_list);
707 
708   for (trx = UT_LIST_GET_FIRST(*trx_list); trx != nullptr;
709        prev_trx = trx, trx = UT_LIST_GET_NEXT(trx_list, prev_trx)) {
710     check_trx_state(trx);
711     ut_a(prev_trx == nullptr || prev_trx->id > trx->id);
712   }
713 
714   return (true);
715 }
716 
717 /** Validate the trx_sys_t::rw_trx_list.
718  @return true if the list is valid. */
trx_sys_validate_trx_list()719 bool trx_sys_validate_trx_list() {
720   ut_ad(trx_sys_mutex_own());
721 
722   ut_a(trx_sys_validate_trx_list_low(&trx_sys->rw_trx_list));
723 
724   return (true);
725 }
726 #endif /* UNIV_DEBUG */
727 #endif /* !UNIV_HOTBACKUP */
728 
729 /** A list of undo tablespace IDs found in the TRX_SYS page. These are the
730 old type of undo tablespaces that do not have space_IDs in the reserved
731 range nor contain an RSEG_ARRAY page. This cannot be part of the trx_sys_t
732 object because it must be built before that is initialized. */
733 Space_Ids *trx_sys_undo_spaces;
734 
735 /** Initialize trx_sys_undo_spaces, called once during srv_start(). */
trx_sys_undo_spaces_init()736 void trx_sys_undo_spaces_init() {
737   trx_sys_undo_spaces = UT_NEW(Space_Ids(), mem_key_undo_spaces);
738 
739   trx_sys_undo_spaces->reserve(TRX_SYS_N_RSEGS);
740 }
741 
742 /** Free the resources occupied by trx_sys_undo_spaces,
743 called once during thread de-initialization. */
trx_sys_undo_spaces_deinit()744 void trx_sys_undo_spaces_deinit() {
745   if (trx_sys_undo_spaces != nullptr) {
746     trx_sys_undo_spaces->clear();
747     UT_DELETE(trx_sys_undo_spaces);
748     trx_sys_undo_spaces = nullptr;
749   }
750 }
751