1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file trx/trx0trx.cc
28  The transaction
29 
30  Created 3/26/1996 Heikki Tuuri
31  *******************************************************/
32 
33 #include <sys/types.h>
34 #include <time.h>
35 #include <new>
36 #include <set>
37 
38 #include <sql_thd_internal_api.h>
39 
40 #include "btr0sea.h"
41 #include "clone0clone.h"
42 #include "current_thd.h"
43 #include "dict0dd.h"
44 #include "fsp0sysspace.h"
45 #include "ha_prototypes.h"
46 #include "lock0lock.h"
47 #include "log0log.h"
48 #include "os0proc.h"
49 #include "que0que.h"
50 #include "read0read.h"
51 #include "row0mysql.h"
52 #include "srv0mon.h"
53 #include "srv0srv.h"
54 #include "srv0start.h"
55 #include "trx0purge.h"
56 #include "trx0rec.h"
57 #include "trx0roll.h"
58 #include "trx0rseg.h"
59 #include "trx0trx.h"
60 #include "trx0undo.h"
61 #include "trx0xa.h"
62 #include "usr0sess.h"
63 #include "ut0new.h"
64 #include "ut0pool.h"
65 #include "ut0vec.h"
66 
67 #include "my_dbug.h"
68 #include "mysql/plugin.h"
69 #include "sql/clone_handler.h"
70 
71 static const ulint MAX_DETAILED_ERROR_LEN = 256;
72 
73 /** Set of table_id */
74 typedef std::set<table_id_t, std::less<table_id_t>, ut_allocator<table_id_t>>
75     table_id_set;
76 
77 /** Map of transactions to affected table_id */
78 typedef std::map<trx_t *, table_id_set, std::less<trx_t *>,
79                  ut_allocator<std::pair<trx_t *const, table_id_set>>>
80     trx_table_map;
81 
82 /** Map of resurrected transactions to affected table_id */
83 static trx_table_map resurrected_trx_tables;
84 
85 /** Dummy session used currently in MySQL interface */
86 sess_t *trx_dummy_sess = nullptr;
87 
88 /** Constructor */
TrxVersion(trx_t * trx)89 TrxVersion::TrxVersion(trx_t *trx) : m_trx(trx), m_version(trx->version) {
90   /* No op */
91 }
92 
93 /* The following function makes the transaction committed in memory
94 and makes its changes to data visible to other transactions.
95 In particular it releases implicit and explicit locks held by transaction and
96 transitions to the transaction to the TRX_STATE_COMMITTED_IN_MEMORY state.
97 NOTE that there is a small discrepancy from the strict formal
98 visibility rules here: a human user of the database can see
99 modifications made by another transaction T even before the necessary
100 log segment has been flushed to the disk. If the database happens to
101 crash before the flush, the user has seen modifications from T which
102 will never be a committed transaction. However, any transaction T2
103 which sees the modifications of the committing transaction T, and
104 which also itself makes modifications to the database, will get an lsn
105 larger than the committing transaction T. In the case where the log
106 flush fails, and T never gets committed, also T2 will never get
107 committed.
108 @param[in,out]  trx         The transaction for which will be committed in
109                             memory
110 @param[in]      serialized  true if serialisation log was written. Affects the
111                             list of things we need to clean up during
112                             trx_erase_lists.
113 */
114 static void trx_release_impl_and_expl_locks(trx_t *trx, bool serialized);
115 
116 /** Set flush observer for the transaction
117 @param[in,out]	trx		transaction struct
118 @param[in]	observer	flush observer */
trx_set_flush_observer(trx_t * trx,FlushObserver * observer)119 void trx_set_flush_observer(trx_t *trx, FlushObserver *observer) {
120   trx->flush_observer = observer;
121 }
122 
123 /** Set detailed error message for the transaction. */
trx_set_detailed_error(trx_t * trx,const char * msg)124 void trx_set_detailed_error(trx_t *trx,      /*!< in: transaction struct */
125                             const char *msg) /*!< in: detailed error message */
126 {
127   ut_strlcpy(trx->detailed_error, msg, MAX_DETAILED_ERROR_LEN);
128 }
129 
130 /** Set detailed error message for the transaction from a file. Note that the
131  file is rewinded before reading from it. */
trx_set_detailed_error_from_file(trx_t * trx,FILE * file)132 void trx_set_detailed_error_from_file(
133     trx_t *trx, /*!< in: transaction struct */
134     FILE *file) /*!< in: file to read message from */
135 {
136   os_file_read_string(file, trx->detailed_error, MAX_DETAILED_ERROR_LEN);
137 }
138 
139 /** Initialize transaction object.
140  @param trx trx to initialize */
trx_init(trx_t * trx)141 static void trx_init(trx_t *trx) {
142   /* This is called at the end of commit, do not reset the
143   trx_t::state here to NOT_STARTED. The FORCED_ROLLBACK
144   status is required for asynchronous handling. */
145 
146   trx->id = 0;
147 
148   trx->no = TRX_ID_MAX;
149 
150   trx->persists_gtid = false;
151 
152   trx->skip_lock_inheritance = false;
153 
154   trx->is_recovered = false;
155 
156   trx->op_info = "";
157 
158   trx->isolation_level = TRX_ISO_REPEATABLE_READ;
159 
160   trx->check_foreigns = true;
161 
162   trx->check_unique_secondary = true;
163 
164   trx->lock.n_rec_locks.store(0);
165 
166   trx->lock.blocking_trx.store(nullptr);
167 
168   trx->dict_operation = TRX_DICT_OP_NONE;
169 
170   trx->ddl_operation = false;
171 
172   trx->error_state = DB_SUCCESS;
173 
174   trx->error_key_num = ULINT_UNDEFINED;
175 
176   trx->undo_no = 0;
177 
178   trx->rsegs.m_redo.rseg = nullptr;
179 
180   trx->rsegs.m_noredo.rseg = nullptr;
181 
182   trx->read_only = false;
183 
184   trx->auto_commit = false;
185 
186   trx->will_lock = 0;
187 
188   trx->lock.inherit_all.store(false);
189 
190   trx->internal = false;
191 
192   trx->in_truncate = false;
193 #ifdef UNIV_DEBUG
194   trx->is_dd_trx = false;
195   trx->in_rollback = false;
196   trx->lock.in_rollback = false;
197 #endif /* UNIV_DEBUG */
198 
199   ut_d(trx->start_file = nullptr);
200 
201   ut_d(trx->start_line = 0);
202 
203   trx->magic_n = TRX_MAGIC_N;
204 
205   trx->lock.que_state = TRX_QUE_RUNNING;
206 
207   trx->last_sql_stat_start.least_undo_no = 0;
208 
209   ut_ad(!MVCC::is_view_active(trx->read_view));
210 
211   trx->lock.rec_cached = 0;
212 
213   trx->lock.table_cached = 0;
214 
215   trx->error_index = nullptr;
216 
217   /* During asynchronous rollback, we should reset forced rollback flag
218   only after rollback is complete to avoid race with the thread owning
219   the transaction. */
220 
221   if (!TrxInInnoDB::is_async_rollback(trx)) {
222     os_thread_id_t thread_id = trx->killed_by;
223     os_compare_and_swap_thread_id(&trx->killed_by, thread_id, 0);
224 
225     /* Note: Do not set to 0, the ref count is decremented inside
226     the TrxInInnoDB() destructor. We only need to clear the flags. */
227 
228     trx->in_innodb &= TRX_FORCE_ROLLBACK_MASK;
229   }
230 
231   trx->flush_observer = nullptr;
232 
233   ++trx->version;
234 }
235 
236 /** For managing the life-cycle of the trx_t instance that we get
237 from the pool. */
238 struct TrxFactory {
239   /** Initializes a transaction object. It must be explicitly started
240   with trx_start_if_not_started() before using it. The default isolation
241   level is TRX_ISO_REPEATABLE_READ.
242   @param trx Transaction instance to initialise */
initTrxFactory243   static void init(trx_t *trx) {
244     /* Explicitly call the constructor of the already
245     allocated object. trx_t objects are allocated by
246     ut_zalloc() in Pool::Pool() which would not call
247     the constructors of the trx_t members. */
248     new (&trx->mod_tables) trx_mod_tables_t();
249 
250     new (&trx->lock.rec_pool) lock_pool_t();
251 
252     new (&trx->lock.table_pool) lock_pool_t();
253 
254     new (&trx->lock.table_locks) lock_pool_t();
255 
256     trx_init(trx);
257 
258     trx->state = TRX_STATE_NOT_STARTED;
259 
260     trx->dict_operation_lock_mode = 0;
261 
262     trx->xid = UT_NEW_NOKEY(xid_t());
263 
264     trx->detailed_error =
265         reinterpret_cast<char *>(ut_zalloc_nokey(MAX_DETAILED_ERROR_LEN));
266 
267     trx->lock.lock_heap = mem_heap_create_typed(1024, MEM_HEAP_FOR_LOCK_HEAP);
268 
269     lock_trx_lock_list_init(&trx->lock.trx_locks);
270 
271     UT_LIST_INIT(trx->trx_savepoints, &trx_named_savept_t::trx_savepoints);
272 
273     mutex_create(LATCH_ID_TRX, &trx->mutex);
274     mutex_create(LATCH_ID_TRX_UNDO, &trx->undo_mutex);
275 
276     lock_trx_alloc_locks(trx);
277   }
278 
279   /** Release resources held by the transaction object.
280   @param trx the transaction for which to release resources */
destroyTrxFactory281   static void destroy(trx_t *trx) {
282     ut_a(trx->magic_n == TRX_MAGIC_N);
283     ut_ad(!trx->in_rw_trx_list);
284     ut_ad(!trx->in_mysql_trx_list);
285 
286     ut_a(trx->lock.wait_lock == nullptr);
287     ut_a(trx->lock.wait_thr == nullptr);
288     ut_a(trx->lock.blocking_trx.load() == nullptr);
289 
290     ut_a(!trx->has_search_latch);
291 
292     ut_a(trx->dict_operation_lock_mode == 0);
293 
294     if (trx->lock.lock_heap != nullptr) {
295       mem_heap_free(trx->lock.lock_heap);
296       trx->lock.lock_heap = nullptr;
297     }
298 
299     ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
300 
301     UT_DELETE(trx->xid);
302     ut_free(trx->detailed_error);
303 
304     mutex_free(&trx->mutex);
305     mutex_free(&trx->undo_mutex);
306 
307     trx->mod_tables.~trx_mod_tables_t();
308 
309     ut_ad(trx->read_view == nullptr);
310 
311     if (!trx->lock.rec_pool.empty()) {
312       /* See lock_trx_alloc_locks() why we only free
313       the first element. */
314 
315       ut_free(trx->lock.rec_pool[0]);
316     }
317 
318     if (!trx->lock.table_pool.empty()) {
319       /* See lock_trx_alloc_locks() why we only free
320       the first element. */
321 
322       ut_free(trx->lock.table_pool[0]);
323     }
324 
325     trx->lock.rec_pool.~lock_pool_t();
326 
327     trx->lock.table_pool.~lock_pool_t();
328 
329     trx->lock.table_locks.~lock_pool_t();
330   }
331 
332   /** Enforce any invariants here, this is called before the transaction
333   is added to the pool.
334   @return true if all OK */
debugTrxFactory335   static bool debug(const trx_t *trx) {
336     ut_a(trx->error_state == DB_SUCCESS);
337 
338     ut_a(trx->magic_n == TRX_MAGIC_N);
339 
340     ut_ad(!trx->read_only);
341 
342     ut_ad(trx->state == TRX_STATE_NOT_STARTED ||
343           trx->state == TRX_STATE_FORCED_ROLLBACK);
344 
345     ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
346 
347     ut_ad(trx->mysql_thd == nullptr);
348 
349     ut_ad(!trx->in_rw_trx_list);
350     ut_ad(!trx->in_mysql_trx_list);
351 
352     ut_a(trx->lock.wait_thr == nullptr);
353     ut_a(trx->lock.wait_lock == nullptr);
354     ut_a(trx->lock.blocking_trx.load() == nullptr);
355 
356     ut_a(!trx->has_search_latch);
357 
358     ut_a(trx->dict_operation_lock_mode == 0);
359 
360     ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
361 
362     ut_ad(trx->lock.autoinc_locks == nullptr);
363 
364     ut_ad(trx->lock.table_locks.empty());
365 
366     ut_ad(!trx->lock.inherit_all.load());
367 
368     ut_ad(!trx->abort);
369 
370     ut_ad(trx->killed_by == 0);
371 
372     return (true);
373   }
374 };
375 
376 /** The lock strategy for TrxPool */
377 struct TrxPoolLock {
TrxPoolLockTrxPoolLock378   TrxPoolLock() {}
379 
380   /** Create the mutex */
createTrxPoolLock381   void create() { mutex_create(LATCH_ID_TRX_POOL, &m_mutex); }
382 
383   /** Acquire the mutex */
enterTrxPoolLock384   void enter() { mutex_enter(&m_mutex); }
385 
386   /** Release the mutex */
exitTrxPoolLock387   void exit() { mutex_exit(&m_mutex); }
388 
389   /** Free the mutex */
destroyTrxPoolLock390   void destroy() { mutex_free(&m_mutex); }
391 
392   /** Mutex to use */
393   ib_mutex_t m_mutex;
394 };
395 
396 /** The lock strategy for the TrxPoolManager */
397 struct TrxPoolManagerLock {
TrxPoolManagerLockTrxPoolManagerLock398   TrxPoolManagerLock() {}
399 
400   /** Create the mutex */
createTrxPoolManagerLock401   void create() { mutex_create(LATCH_ID_TRX_POOL_MANAGER, &m_mutex); }
402 
403   /** Acquire the mutex */
enterTrxPoolManagerLock404   void enter() { mutex_enter(&m_mutex); }
405 
406   /** Release the mutex */
exitTrxPoolManagerLock407   void exit() { mutex_exit(&m_mutex); }
408 
409   /** Free the mutex */
destroyTrxPoolManagerLock410   void destroy() { mutex_free(&m_mutex); }
411 
412   /** Mutex to use */
413   ib_mutex_t m_mutex;
414 };
415 
416 /** Use explicit mutexes for the trx_t pool and its manager. */
417 typedef Pool<trx_t, TrxFactory, TrxPoolLock> trx_pool_t;
418 typedef PoolManager<trx_pool_t, TrxPoolManagerLock> trx_pools_t;
419 
420 /** The trx_t pool manager */
421 static trx_pools_t *trx_pools;
422 
423 /** Size of on trx_t pool in bytes. */
424 static const ulint MAX_TRX_BLOCK_SIZE = 1024 * 1024 * 4;
425 
426 /** Create the trx_t pool */
trx_pool_init()427 void trx_pool_init() {
428   trx_pools = UT_NEW_NOKEY(trx_pools_t(MAX_TRX_BLOCK_SIZE));
429 
430   ut_a(trx_pools != nullptr);
431 }
432 
433 /** Destroy the trx_t pool */
trx_pool_close()434 void trx_pool_close() {
435   UT_DELETE(trx_pools);
436 
437   trx_pools = nullptr;
438 }
439 
440 /** @return a trx_t instance from trx_pools. */
trx_create_low()441 static trx_t *trx_create_low() {
442   trx_t *trx = trx_pools->get();
443 
444   assert_trx_is_free(trx);
445 
446   mem_heap_t *heap;
447   ib_alloc_t *alloc;
448 
449   /* We just got trx from pool, it should be non locking */
450   ut_ad(trx->will_lock == 0);
451 
452   trx->persists_gtid = false;
453 
454   trx->api_trx = false;
455 
456   trx->api_auto_commit = false;
457 
458   trx->read_write = true;
459 
460   /* Background trx should not be forced to rollback,
461   we will unset the flag for user trx. */
462   trx->in_innodb |= TRX_FORCE_ROLLBACK_DISABLE;
463 
464   /* Trx state can be TRX_STATE_FORCED_ROLLBACK if
465   the trx was forced to rollback before it's reused.*/
466   trx->state = TRX_STATE_NOT_STARTED;
467 
468   heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void *) * 8);
469 
470   alloc = ib_heap_allocator_create(heap);
471 
472   /* Remember to free the vector explicitly in trx_free(). */
473   trx->lock.autoinc_locks = ib_vector_create(alloc, sizeof(void **), 4);
474 
475   /* Should have been either just initialized or .clear()ed by
476   trx_free(). */
477   ut_a(trx->mod_tables.size() == 0);
478 
479   return (trx);
480 }
481 
482 /**
483 Release a trx_t instance back to the pool.
484 @param trx the instance to release. */
trx_free(trx_t * & trx)485 static void trx_free(trx_t *&trx) {
486   assert_trx_is_free(trx);
487 
488   trx->mysql_thd = nullptr;
489 
490   // FIXME: We need to avoid this heap free/alloc for each commit.
491   if (trx->lock.autoinc_locks != nullptr) {
492     ut_ad(ib_vector_is_empty(trx->lock.autoinc_locks));
493     /* We allocated a dedicated heap for the vector. */
494     ib_vector_free(trx->lock.autoinc_locks);
495     trx->lock.autoinc_locks = nullptr;
496   }
497 
498   trx->mod_tables.clear();
499 
500   ut_ad(trx->read_view == nullptr);
501   ut_ad(trx->is_dd_trx == false);
502 
503   /* trx locking state should have been reset before returning trx
504   to pool */
505   ut_ad(trx->will_lock == 0);
506 
507   trx_pools->mem_free(trx);
508 
509   trx = nullptr;
510 }
511 
512 /** Creates a transaction object for background operations by the master thread.
513  @return own: transaction object */
trx_allocate_for_background(void)514 trx_t *trx_allocate_for_background(void) {
515   trx_t *trx;
516 
517   trx = trx_create_low();
518 
519   trx->sess = trx_dummy_sess;
520 
521   return (trx);
522 }
523 
524 /** Creates a transaction object for MySQL.
525  @return own: transaction object */
trx_allocate_for_mysql(void)526 trx_t *trx_allocate_for_mysql(void) {
527   trx_t *trx;
528 
529   trx = trx_allocate_for_background();
530 
531   trx_sys_mutex_enter();
532 
533   ut_d(trx->in_mysql_trx_list = TRUE);
534   UT_LIST_ADD_FIRST(trx_sys->mysql_trx_list, trx);
535 
536   trx_sys_mutex_exit();
537 
538   return (trx);
539 }
540 
541 /** Check state of transaction before freeing it.
542 @param[in,out]	trx	transaction object to validate */
trx_validate_state_before_free(trx_t * trx)543 static void trx_validate_state_before_free(trx_t *trx) {
544   if (trx->declared_to_be_inside_innodb) {
545     ib::error(ER_IB_MSG_1202)
546         << "Freeing a trx (" << trx << ", " << trx_get_id_for_print(trx)
547         << ") which is declared"
548            " to be processing inside InnoDB";
549 
550     trx_print(stderr, trx, 600);
551     putc('\n', stderr);
552 
553     /* This is an error but not a fatal error. We must keep
554     the counters like srv_conc_n_threads accurate. */
555     srv_conc_force_exit_innodb(trx);
556   }
557 
558   if (trx->n_mysql_tables_in_use != 0 || trx->mysql_n_tables_locked != 0) {
559     ib::error(ER_IB_MSG_1203)
560         << "MySQL is freeing a thd though trx->n_mysql_tables_in_use is "
561         << trx->n_mysql_tables_in_use << " and trx->mysql_n_tables_locked is "
562         << trx->mysql_n_tables_locked << ".";
563 
564     trx_print(stderr, trx, 600);
565     ut_print_buf(stderr, trx, sizeof(trx_t));
566     putc('\n', stderr);
567   }
568 
569   trx->dict_operation = TRX_DICT_OP_NONE;
570   assert_trx_is_inactive(trx);
571 }
572 
573 /** Free and initialize a transaction object instantiated during recovery.
574 @param[in,out]	trx	transaction object to free and initialize */
trx_free_resurrected(trx_t * trx)575 void trx_free_resurrected(trx_t *trx) {
576   trx_validate_state_before_free(trx);
577 
578   trx_init(trx);
579 
580   trx_free(trx);
581 }
582 
583 /** Free a transaction that was allocated by background or user threads.
584 @param[in,out]	trx	transaction object to free */
trx_free_for_background(trx_t * trx)585 void trx_free_for_background(trx_t *trx) {
586   trx_validate_state_before_free(trx);
587 
588   trx_free(trx);
589 }
590 
trx_free_prepared_or_active_recovered(trx_t * trx)591 void trx_free_prepared_or_active_recovered(trx_t *trx) {
592   ut_a(trx->magic_n == TRX_MAGIC_N);
593   ulint expected_undo_state;
594   if (trx->state == TRX_STATE_ACTIVE) {
595     ut_a(trx_state_eq(trx, TRX_STATE_ACTIVE));
596     ut_a(trx->is_recovered);
597     expected_undo_state = TRX_UNDO_ACTIVE;
598   } else {
599     ut_a(trx_state_eq(trx, TRX_STATE_PREPARED));
600     expected_undo_state = TRX_UNDO_PREPARED;
601   }
602 
603   assert_trx_in_rw_list(trx);
604 
605   trx_release_impl_and_expl_locks(trx, false);
606   trx_undo_free_trx_with_prepared_or_active_logs(trx, expected_undo_state);
607 
608   ut_ad(!trx->in_rw_trx_list);
609   ut_a(!trx->read_only);
610 
611   trx->state = TRX_STATE_NOT_STARTED;
612 
613   /* Undo trx_resurrect_table_locks(). */
614   lock_trx_lock_list_init(&trx->lock.trx_locks);
615 
616   trx_free(trx);
617 }
618 
619 /** Disconnect a transaction from MySQL and optionally mark it as if
620 it's been recovered. For the marking the transaction must be in prepared state.
621 The recovery-marked transaction is going to survive "alone" so its association
622 with the mysql handle is destroyed now rather than when it will be
623 finally freed.
624 @param[in,out]	trx		transaction
625 @param[in]	prepared	boolean value to specify whether trx is
626                                 for recovery or not. */
trx_disconnect_from_mysql(trx_t * trx,bool prepared)627 inline void trx_disconnect_from_mysql(trx_t *trx, bool prepared) {
628   trx_sys_mutex_enter();
629 
630   ut_ad(trx->in_mysql_trx_list);
631   ut_d(trx->in_mysql_trx_list = FALSE);
632 
633   UT_LIST_REMOVE(trx_sys->mysql_trx_list, trx);
634 
635   if (trx->read_view != nullptr) {
636     trx_sys->mvcc->view_close(trx->read_view, true);
637   }
638 
639   ut_ad(trx_sys_validate_trx_list());
640 
641   if (prepared) {
642     ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
643 
644     trx->is_recovered = true;
645     trx->mysql_thd = nullptr;
646     /* todo/fixme: suggest to do it at innodb prepare */
647     trx->will_lock = 0;
648   }
649 
650   trx_sys_mutex_exit();
651 }
652 
653 /** Disconnect a transaction from MySQL.
654 @param[in,out]	trx	transaction */
trx_disconnect_plain(trx_t * trx)655 inline void trx_disconnect_plain(trx_t *trx) {
656   trx_disconnect_from_mysql(trx, false);
657 }
658 
659 /** Disconnect a prepared transaction from MySQL.
660 @param[in,out]	trx	transaction */
trx_disconnect_prepared(trx_t * trx)661 void trx_disconnect_prepared(trx_t *trx) {
662   trx_disconnect_from_mysql(trx, true);
663 }
664 
665 /** Free a transaction object for MySQL.
666 @param[in,out]	trx	transaction */
trx_free_for_mysql(trx_t * trx)667 void trx_free_for_mysql(trx_t *trx) {
668   trx_disconnect_plain(trx);
669   trx_free_for_background(trx);
670 }
671 
672 /** Resurrect the table IDs for a resurrected transaction.
673 @param[in]	trx		resurrected transaction
674 @param[in]	undo_ptr	pointer to undo segment
675 @param[in]	undo		undo log */
trx_resurrect_table_ids(trx_t * trx,const trx_undo_ptr_t * undo_ptr,const trx_undo_t * undo)676 static void trx_resurrect_table_ids(trx_t *trx, const trx_undo_ptr_t *undo_ptr,
677                                     const trx_undo_t *undo) {
678   mtr_t mtr;
679   page_t *undo_page;
680   trx_undo_rec_t *undo_rec;
681 
682   ut_ad(undo == undo_ptr->insert_undo || undo == undo_ptr->update_undo);
683 
684   if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) || undo->empty) {
685     return;
686   }
687 
688   table_id_set empty;
689   table_id_set &tables =
690       resurrected_trx_tables.insert(trx_table_map::value_type(trx, empty))
691           .first->second;
692 
693   mtr_start(&mtr);
694 
695   /* trx_rseg_mem_create() may have acquired an X-latch on this
696   page, so we cannot acquire an S-latch. */
697   undo_page = trx_undo_page_get(page_id_t(undo->space, undo->top_page_no),
698                                 undo->page_size, &mtr);
699 
700   undo_rec = undo_page + undo->top_offset;
701 
702   do {
703     ulint type;
704     undo_no_t undo_no;
705     table_id_t table_id;
706     ulint cmpl_info;
707     bool updated_extern;
708     type_cmpl_t type_cmpl;
709 
710     page_t *undo_rec_page = page_align(undo_rec);
711 
712     if (undo_rec_page != undo_page) {
713       mtr.release_page(undo_page, MTR_MEMO_PAGE_X_FIX);
714       undo_page = undo_rec_page;
715     }
716 
717     trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, &updated_extern,
718                           &undo_no, &table_id, type_cmpl);
719     tables.insert(table_id);
720 
721     undo_rec = trx_undo_get_prev_rec(undo_rec, undo->hdr_page_no,
722                                      undo->hdr_offset, false, &mtr);
723   } while (undo_rec);
724 
725   mtr_commit(&mtr);
726 }
727 
728 /** Resurrect table locks for resurrected transactions. */
trx_resurrect_locks()729 void trx_resurrect_locks() {
730   for (trx_table_map::const_iterator t = resurrected_trx_tables.begin();
731        t != resurrected_trx_tables.end(); t++) {
732     trx_t *trx = t->first;
733     const table_id_set &tables = t->second;
734     ut_ad(trx->is_recovered);
735 
736     for (table_id_set::const_iterator i = tables.begin(); i != tables.end();
737          i++) {
738       dict_table_t *table =
739           dd_table_open_on_id(*i, nullptr, nullptr, false, true);
740       if (table) {
741         ut_ad(!table->is_temporary());
742 
743         if (table->ibd_file_missing || table->is_temporary()) {
744           mutex_enter(&dict_sys->mutex);
745           dd_table_close(table, nullptr, nullptr, true);
746           dict_table_remove_from_cache(table);
747           mutex_exit(&dict_sys->mutex);
748           continue;
749         }
750 
751         if (trx->state == TRX_STATE_PREPARED && !dict_table_is_sdi(table->id)) {
752           trx->mod_tables.insert(table);
753         }
754         DICT_TF2_FLAG_SET(table, DICT_TF2_RESURRECT_PREPARED);
755 
756         lock_table_ix_resurrect(table, trx);
757 
758         DBUG_PRINT("ib_trx", ("resurrect" TRX_ID_FMT "  table '%s' IX lock",
759                               trx_get_id_for_print(trx), table->name.m_name));
760 
761         dd_table_close(table, nullptr, nullptr, false);
762       }
763     }
764   }
765 
766   resurrected_trx_tables.clear();
767 }
768 
769 /** Resurrect the transactions that were doing inserts at the time of the
770  crash, they need to be undone.
771  @return trx_t instance */
trx_resurrect_insert(trx_undo_t * undo,trx_rseg_t * rseg)772 static trx_t *trx_resurrect_insert(
773     trx_undo_t *undo, /*!< in: entry to UNDO */
774     trx_rseg_t *rseg) /*!< in: rollback segment */
775 {
776   trx_t *trx;
777 
778   trx = trx_allocate_for_background();
779 
780   ut_d(trx->start_file = __FILE__);
781   ut_d(trx->start_line = __LINE__);
782 
783   rseg->trx_ref_count++;
784   trx->rsegs.m_redo.rseg = rseg;
785   *trx->xid = undo->xid;
786   trx->id = undo->trx_id;
787   trx->rsegs.m_redo.insert_undo = undo;
788   trx->is_recovered = true;
789 
790   /* This is single-threaded startup code, we do not need the
791   protection of trx->mutex or trx_sys->mutex here. */
792 
793   if (undo->state != TRX_UNDO_ACTIVE) {
794     /* Prepared transactions are left in the prepared state
795     waiting for a commit or abort decision from MySQL */
796 
797     if (undo->state == TRX_UNDO_PREPARED) {
798       ib::info(ER_IB_MSG_1204) << "Transaction " << trx_get_id_for_print(trx)
799                                << " was in the XA prepared state.";
800 
801       if (srv_force_recovery == 0) {
802         if (!srv_rollback_prepared_trx) {
803           trx->state = TRX_STATE_PREPARED;
804           ++trx_sys->n_prepared_trx;
805         } else {
806           /* XtraBackup is asked to rollback prepared XA
807           transactions */
808           trx->state = TRX_STATE_ACTIVE;
809         }
810       } else {
811         ib::info(ER_IB_MSG_1205) << "Since innodb_force_recovery"
812                                     " > 0, we will force a rollback.";
813 
814         trx->state = TRX_STATE_ACTIVE;
815       }
816     } else {
817       trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
818     }
819 
820     /* We give a dummy value for the trx no; this should have no
821     relevance since purge is not interested in committed
822     transaction numbers, unless they are in the history
823     list, in which case it looks the number from the disk based
824     undo log structure */
825 
826     trx->no = trx->id;
827 
828   } else {
829     trx->state = TRX_STATE_ACTIVE;
830 
831     /* A running transaction always has the number
832     field inited to TRX_ID_MAX */
833 
834     trx->no = TRX_ID_MAX;
835   }
836 
837   /* trx_start_low() is not called with resurrect, so need to initialize
838   start time here.*/
839   if (trx->state == TRX_STATE_ACTIVE || trx->state == TRX_STATE_PREPARED) {
840     trx->start_time = ut_time();
841   }
842 
843   trx->ddl_operation = undo->dict_operation;
844 
845   if (undo->dict_operation) {
846     trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
847   }
848 
849   if (!undo->empty) {
850     trx->undo_no = undo->top_undo_no + 1;
851     trx->undo_rseg_space = undo->rseg->space_id;
852   }
853 
854   return (trx);
855 }
856 
857 /** Prepared transactions are left in the prepared state waiting for a
858  commit or abort decision from MySQL */
trx_resurrect_update_in_prepared_state(trx_t * trx,const trx_undo_t * undo)859 static void trx_resurrect_update_in_prepared_state(
860     trx_t *trx,             /*!< in,out: transaction */
861     const trx_undo_t *undo) /*!< in: update UNDO record */
862 {
863   /* This is single-threaded startup code, we do not need the
864   protection of trx->mutex or trx_sys->mutex here. */
865 
866   if (undo->state == TRX_UNDO_PREPARED) {
867     ib::info(ER_IB_MSG_1206) << "Transaction " << trx_get_id_for_print(trx)
868                              << " was in the XA prepared state.";
869 
870     ut_ad(trx->state != TRX_STATE_FORCED_ROLLBACK);
871 
872     if (!srv_rollback_prepared_trx) {
873       if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
874         ++trx_sys->n_prepared_trx;
875       } else {
876         ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
877       }
878 
879       trx->state = TRX_STATE_PREPARED;
880     } else {
881       if (!trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
882         ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
883       }
884       /* XtraBackup is asked to rollback prepared XA
885       transactions */
886       trx->state = TRX_STATE_ACTIVE;
887     }
888   } else {
889     trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
890   }
891 }
892 
893 /** Resurrect the transactions that were doing updates the time of the
894  crash, they need to be undone. */
trx_resurrect_update(trx_t * trx,trx_undo_t * undo,trx_rseg_t * rseg)895 static void trx_resurrect_update(
896     trx_t *trx,       /*!< in/out: transaction */
897     trx_undo_t *undo, /*!< in/out: update UNDO record */
898     trx_rseg_t *rseg) /*!< in/out: rollback segment */
899 {
900   /* This resurected transaction might also have been doing inserts.
901   If so, this rseg is already assigned by trx_resurrect_insert(). */
902   if (trx->rsegs.m_redo.rseg != nullptr) {
903     ut_a(trx->rsegs.m_redo.rseg == rseg);
904     ut_ad(trx->id == undo->trx_id);
905     ut_ad(trx->is_recovered);
906     /* For GTID persistence, we might have empty update undo for
907     insert only transactions. */
908     if (undo->empty && trx_state_eq(trx, TRX_STATE_PREPARED)) {
909       undo->set_prepared(trx->xid);
910     }
911     ut_ad(undo->xid.eq(trx->xid));
912   } else {
913     rseg->trx_ref_count++;
914     trx->rsegs.m_redo.rseg = rseg;
915     *trx->xid = undo->xid;
916     trx->id = undo->trx_id;
917     trx->is_recovered = true;
918   }
919 
920   /* Assign the update_undo segment. */
921   ut_a(trx->rsegs.m_redo.update_undo == nullptr);
922   trx->rsegs.m_redo.update_undo = undo;
923 
924   /* This is single-threaded startup code, we do not need the
925   protection of trx->mutex or trx_sys->mutex here. */
926 
927   if (undo->state != TRX_UNDO_ACTIVE) {
928     trx_resurrect_update_in_prepared_state(trx, undo);
929 
930     /* We give a dummy value for the trx number */
931 
932     trx->no = trx->id;
933 
934   } else {
935     trx->state = TRX_STATE_ACTIVE;
936 
937     /* A running transaction always has the number field inited to
938     TRX_ID_MAX */
939 
940     trx->no = TRX_ID_MAX;
941   }
942 
943   /* trx_start_low() is not called with resurrect, so need to initialize
944   start time here.*/
945   if (trx->state == TRX_STATE_ACTIVE || trx->state == TRX_STATE_PREPARED) {
946     trx->start_time = ut_time();
947   }
948 
949   trx->ddl_operation = undo->dict_operation;
950 
951   if (undo->dict_operation) {
952     trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
953   }
954 
955   if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
956     trx->undo_no = undo->top_undo_no + 1;
957     trx->undo_rseg_space = undo->rseg->space_id;
958   }
959 }
960 
961 /** Resurrect the transactions that were doing inserts and updates at
962 the time of a crash, they need to be undone.
963 @param[in]	rseg	rollback segment */
trx_resurrect(trx_rseg_t * rseg)964 static void trx_resurrect(trx_rseg_t *rseg) {
965   trx_t *trx;
966   trx_undo_t *undo;
967 
968   ut_ad(rseg != nullptr);
969 
970   /* Resurrect transactions that were doing inserts. */
971   for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list); undo != nullptr;
972        undo = UT_LIST_GET_NEXT(undo_list, undo)) {
973     trx = trx_resurrect_insert(undo, rseg);
974 
975     trx_sys_rw_trx_add(trx);
976 
977     trx_resurrect_table_ids(trx, &trx->rsegs.m_redo, undo);
978   }
979 
980   /* Ressurrect transactions that were doing updates. */
981   for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list); undo != nullptr;
982        undo = UT_LIST_GET_NEXT(undo_list, undo)) {
983     /* Check the trx_sys->rw_trx_set first. */
984     trx_sys_mutex_enter();
985 
986     trx_t *trx = trx_get_rw_trx_by_id(undo->trx_id);
987 
988     trx_sys_mutex_exit();
989 
990     if (trx == nullptr) {
991       trx = trx_allocate_for_background();
992 
993       ut_d(trx->start_file = __FILE__);
994       ut_d(trx->start_line = __LINE__);
995     }
996 
997     trx_resurrect_update(trx, undo, rseg);
998 
999     trx_sys_rw_trx_add(trx);
1000 
1001     trx_resurrect_table_ids(trx, &trx->rsegs.m_redo, undo);
1002   }
1003 }
1004 
1005 /** Creates trx objects for transactions and initializes the trx list of
1006  trx_sys at database start. Rollback segments and undo log lists must
1007  already exist when this function is called, because the lists of
1008  transactions to be rolled back or cleaned up are built based on the
1009  undo log lists. */
trx_lists_init_at_db_start(void)1010 void trx_lists_init_at_db_start(void) {
1011   ut_a(srv_is_being_started);
1012 
1013   if (srv_apply_log_only) {
1014     return;
1015   }
1016 
1017   /* Look through the rollback segments in the TRX_SYS for
1018   transaction undo logs. */
1019   for (auto rseg : trx_sys->rsegs) {
1020     trx_resurrect(rseg);
1021   }
1022 
1023   /* Look through the rollback segments in each RSEG_ARRAY for
1024   transaction undo logs. */
1025   undo::spaces->s_lock();
1026   for (auto undo_space : undo::spaces->m_spaces) {
1027     undo_space->rsegs()->s_lock();
1028     for (auto rseg : *undo_space->rsegs()) {
1029       trx_resurrect(rseg);
1030     }
1031     undo_space->rsegs()->s_unlock();
1032   }
1033   undo::spaces->s_unlock();
1034 
1035   TrxIdSet::iterator end = trx_sys->rw_trx_set.end();
1036 
1037   for (TrxIdSet::iterator it = trx_sys->rw_trx_set.begin(); it != end; ++it) {
1038     ut_ad(it->m_trx->in_rw_trx_list);
1039 
1040     if (it->m_trx->state == TRX_STATE_ACTIVE ||
1041         it->m_trx->state == TRX_STATE_PREPARED) {
1042       trx_sys->rw_trx_ids.push_back(it->m_id);
1043     }
1044 
1045     UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, it->m_trx);
1046   }
1047 }
1048 
1049 /** Get next redo rollback segment in round-robin fashion.
1050 While InnoDB is running in multi-threaded mode, the vectors of undo
1051 tablespaces and rsegs do not shrink.  So they do not need protection
1052 to get a pointer to an rseg.
1053 If an rseg is not marked for undo tablespace truncation, we assign
1054 it to a transaction. We increment trx_ref_count to keep the purge
1055 thread from truncating the undo tablespace that contains this rseg
1056 until the transaction is done with it.
1057 @return assigned rollback segment instance */
get_next_redo_rseg_from_undo_spaces()1058 static trx_rseg_t *get_next_redo_rseg_from_undo_spaces() {
1059   undo::Tablespace *undo_space;
1060 
1061   /* The number of undo tablespaces cannot be changed while
1062   we have this s_lock. */
1063   undo::spaces->s_lock();
1064 
1065   /* Use all known undo tablespaces.  Some may be inactive. */
1066   ulint target_undo_tablespaces = undo::spaces->size();
1067 
1068   ut_ad(target_undo_tablespaces > 0);
1069 
1070   /* The number of rollback segments may be changed at any instant.
1071   So use the value at this instant.  Rollback segments are never
1072   deleted from an rseg list, so srv_rollback_segments is always
1073   less than rsegs->size(). */
1074   ulint target_rollback_segments = srv_rollback_segments;
1075 
1076   static ulint rseg_counter = 0;
1077   trx_rseg_t *rseg = nullptr;
1078   ulint current = rseg_counter;
1079 
1080   /* Increment the static redo_rseg_slot so the next call from any thread
1081   starts with the next rseg. */
1082   os_atomic_increment_ulint(&rseg_counter, 1);
1083 
1084   while (rseg == nullptr) {
1085     /* Traverse the rsegs like this: (space, rseg_id)
1086     (0,0), (1,0), ... (n,0), (0,1), (1,1), ... (n,1), ... */
1087     ulint window =
1088         current % (target_rollback_segments * target_undo_tablespaces);
1089     ulint spaces_slot = window % target_undo_tablespaces;
1090     ulint rseg_slot = window / target_undo_tablespaces;
1091 
1092     current++;
1093 
1094     undo_space = undo::spaces->at(spaces_slot);
1095 
1096     /* Avoid any rseg that resides in a tablespace that has been made
1097     inactive either explicitly or by being marked for truncate. We do
1098     not want to wait here on an x_lock for an rseg in an undo tablespace
1099     that is being truncated.  So check this first without the latch.
1100     It could be set immediately after this, but that is a very short gap
1101     and the get_active() call below will use an rseg->s_lock. */
1102     if (!undo_space->is_active_no_latch()) {
1103       continue;
1104     }
1105 
1106     /* This is done here because we know the rsegs() pointer is good. */
1107     ut_ad(target_rollback_segments <= undo_space->rsegs()->size());
1108 
1109     /* Check again with a shared lock. */
1110     rseg = undo_space->get_active(rseg_slot);
1111     if (rseg == nullptr) {
1112       continue;
1113     }
1114   }
1115 
1116   undo::spaces->s_unlock();
1117 
1118   ut_ad(rseg->trx_ref_count > 0);
1119 
1120   return (rseg);
1121 }
1122 
1123 /** Get the next redo rollback segment in round-robin fashion.
1124 The assigned slots may have gaps but the vector does not.
1125 @return assigned rollback segment instance */
get_next_redo_rseg_from_trx_sys()1126 static trx_rseg_t *get_next_redo_rseg_from_trx_sys() {
1127   static ulint rseg_counter = 0;
1128   ulong n_rollback_segments = srv_rollback_segments;
1129 
1130   /* Versions 5.6 and 5.7 of InnoDB would allow 128 as the max for
1131   innodb_rollback_segments but would only use 96 since 32 slots were
1132   used for temporary rsegs. Now those rsegs are in trx_sys_t::tmp_rsegs
1133   and trx_sys_t::rsegs which each can hold all 128.  As a result,
1134   an existing system tablespace might have gaps in the slot assignment.
1135   The Rsegs vector only contains the rsegs that exist. Since
1136   srv_rollback_segments can be set to a smaller number at runtime,
1137   it might be smaller than Rsegs::size().  But srv_rollback_segments
1138   can never be larger than Rsegs::size() because when the user increases
1139   innodb_rollback_segments, the rollback segments are created and rseg
1140   objects are added to the vector ready to use before
1141   srv_rollback_segments is increased. */
1142   ut_ad(n_rollback_segments <= trx_sys->rsegs.size());
1143 
1144   /* Try the next slot that no other thread is looking at */
1145   ulint slot =
1146       os_atomic_increment_ulint(&rseg_counter, 1) % n_rollback_segments;
1147 
1148   /* s_lock the vector since it might be sorted when added to. */
1149   trx_sys->rsegs.s_lock();
1150   trx_rseg_t *rseg = trx_sys->rsegs.at(slot);
1151   trx_sys->rsegs.s_unlock();
1152 
1153   /* It is not neccessary to s_lock Rsegs::m_latch here because the
1154   system tablespace is never truncated like other undo tablespaces. */
1155   rseg->trx_ref_count++;
1156 
1157   ut_ad(rseg->space_id == TRX_SYS_SPACE);
1158 
1159   return (rseg);
1160 }
1161 
1162 /** Get next redo rollback segment in round-robin fashion.
1163 We assume that the assigned slots are not contiguous and have gaps.
1164 @return assigned rollback segment instance */
get_next_redo_rseg()1165 static trx_rseg_t *get_next_redo_rseg() {
1166   if (!trx_sys->rsegs.is_empty()) {
1167     return (get_next_redo_rseg_from_trx_sys());
1168   } else {
1169     return (get_next_redo_rseg_from_undo_spaces());
1170   }
1171 }
1172 
1173 /** Get the next noredo rollback segment.
1174 @return assigned rollback segment instance */
get_next_temp_rseg()1175 static trx_rseg_t *get_next_temp_rseg() {
1176   static ulint temp_rseg_counter = 0;
1177   ulong n_rollback_segments = srv_rollback_segments;
1178 
1179   ut_ad(n_rollback_segments <= trx_sys->tmp_rsegs.size());
1180 
1181   /* Try the next slot that no other thread is looking at */
1182   ulint slot =
1183       os_atomic_increment_ulint(&temp_rseg_counter, 1) % n_rollback_segments;
1184 
1185   /* No need to s_lock the vector since it is only added to at the end,
1186   and it is never resized or sorted. */
1187   trx_rseg_t *rseg = trx_sys->tmp_rsegs.at(slot);
1188 
1189   ut_ad(rseg->id == slot);
1190   ut_ad(fsp_is_system_temporary(rseg->space_id));
1191 
1192   return (rseg);
1193 }
1194 
1195 /** Assign a durable rollback segment to a transaction in a round-robin
1196 fashion.
1197 @param[in,out]	trx	transaction that involves a durable write. */
trx_assign_rseg_durable(trx_t * trx)1198 void trx_assign_rseg_durable(trx_t *trx) {
1199   ut_ad(trx->rsegs.m_redo.rseg == nullptr);
1200 
1201   trx->rsegs.m_redo.rseg = srv_read_only_mode ? nullptr : get_next_redo_rseg();
1202 }
1203 
1204 /** Assign a temp-tablespace bound rollback-segment to a transaction.
1205 @param[in,out]	trx	transaction that involves write to temp-table. */
trx_assign_rseg_temp(trx_t * trx)1206 void trx_assign_rseg_temp(trx_t *trx) {
1207   ut_ad(trx->rsegs.m_noredo.rseg == nullptr);
1208   ut_ad(!trx_is_autocommit_non_locking(trx));
1209 
1210   trx->rsegs.m_noredo.rseg =
1211       srv_read_only_mode ? nullptr : get_next_temp_rseg();
1212 
1213   if (trx->id == 0) {
1214     mutex_enter(&trx_sys->mutex);
1215 
1216     trx->id = trx_sys_get_new_trx_id();
1217 
1218     trx_sys->rw_trx_ids.push_back(trx->id);
1219 
1220     trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
1221 
1222     mutex_exit(&trx_sys->mutex);
1223   }
1224 }
1225 
1226 /** Starts a transaction. */
trx_start_low(trx_t * trx,bool read_write)1227 static void trx_start_low(
1228     trx_t *trx,      /*!< in: transaction */
1229     bool read_write) /*!< in: true if read-write transaction */
1230 {
1231   ut_ad(!trx->in_rollback);
1232   ut_ad(!trx->is_recovered);
1233   ut_ad(trx->start_line != 0);
1234   ut_ad(trx->start_file != nullptr);
1235   ut_ad(trx->roll_limit == 0);
1236   ut_ad(!trx->lock.in_rollback);
1237   ut_ad(trx->error_state == DB_SUCCESS);
1238   ut_ad(trx->rsegs.m_redo.rseg == nullptr);
1239   ut_ad(trx->rsegs.m_noredo.rseg == nullptr);
1240   ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
1241   ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1242   ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK));
1243   ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC));
1244 
1245   ++trx->version;
1246 
1247   /* Check whether it is an AUTOCOMMIT SELECT */
1248   trx->auto_commit = (trx->api_trx && trx->api_auto_commit) ||
1249                      thd_trx_is_auto_commit(trx->mysql_thd);
1250 
1251   trx->read_only = (trx->api_trx && !trx->read_write) ||
1252                    (!trx->internal && thd_trx_is_read_only(trx->mysql_thd)) ||
1253                    srv_read_only_mode;
1254 
1255   if (!trx->auto_commit) {
1256     ++trx->will_lock;
1257   } else if (trx->will_lock == 0) {
1258     trx->read_only = true;
1259   }
1260   trx->persists_gtid = false;
1261 
1262 #ifdef UNIV_DEBUG
1263   /* If the transaction is DD attachable trx, it should be AC-NL-RO
1264   (AutoCommit-NonLocking-ReadOnly) trx */
1265   if (trx->is_dd_trx) {
1266     ut_ad(trx->read_only);
1267     ut_ad(trx->auto_commit);
1268     ut_ad(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED ||
1269           trx->isolation_level == TRX_ISO_READ_COMMITTED);
1270   }
1271 #endif /* UNIV_DEBUG */
1272 
1273   if (trx->mysql_thd != nullptr && !trx->ddl_operation) {
1274     trx->ddl_operation = thd_is_dd_update_stmt(trx->mysql_thd);
1275   }
1276 
1277   /* The initial value for trx->no: TRX_ID_MAX is used in
1278   read_view_open_now: */
1279 
1280   trx->no = TRX_ID_MAX;
1281 
1282   ut_a(ib_vector_is_empty(trx->lock.autoinc_locks));
1283   ut_a(trx->lock.table_locks.empty());
1284 
1285   /* If this transaction came from trx_allocate_for_mysql(),
1286   trx->in_mysql_trx_list would hold. In that case, the trx->state
1287   change must be protected by the trx_sys->mutex, so that
1288   lock_print_info_all_transactions() will have a consistent view. */
1289 
1290   ut_ad(!trx->in_rw_trx_list);
1291 
1292   /* We tend to over assert and that complicates the code somewhat.
1293   e.g., the transaction state can be set earlier but we are forced to
1294   set it under the protection of the trx_sys_t::mutex because some
1295   trx list assertions are triggered unnecessarily. */
1296 
1297   /* By default all transactions are in the read-only list unless they
1298   are non-locking auto-commit read only transactions or background
1299   (internal) transactions. Note: Transactions marked explicitly as
1300   read only can write to temporary tables, we put those on the RO
1301   list too. */
1302 
1303   if (!trx->read_only &&
1304       (trx->mysql_thd == nullptr || read_write || trx->ddl_operation)) {
1305     trx_assign_rseg_durable(trx);
1306 
1307     /* Temporary rseg is assigned only if the transaction
1308     updates a temporary table */
1309 
1310     trx_sys_mutex_enter();
1311 
1312     trx->id = trx_sys_get_new_trx_id();
1313 
1314     trx_sys->rw_trx_ids.push_back(trx->id);
1315 
1316     trx_sys_rw_trx_add(trx);
1317 
1318     ut_ad(trx->rsegs.m_redo.rseg != nullptr || srv_read_only_mode ||
1319           srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
1320 
1321     UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, trx);
1322 
1323     ut_d(trx->in_rw_trx_list = true);
1324 
1325     trx->state = TRX_STATE_ACTIVE;
1326 
1327     ut_ad(trx_sys_validate_trx_list());
1328 
1329     trx_sys_mutex_exit();
1330 
1331   } else {
1332     trx->id = 0;
1333 
1334     if (!trx_is_autocommit_non_locking(trx)) {
1335       /* If this is a read-only transaction that is writing
1336       to a temporary table then it needs a transaction id
1337       to write to the temporary table. */
1338 
1339       if (read_write) {
1340         trx_sys_mutex_enter();
1341 
1342         ut_ad(!srv_read_only_mode);
1343 
1344         trx->id = trx_sys_get_new_trx_id();
1345 
1346         trx_sys->rw_trx_ids.push_back(trx->id);
1347 
1348         trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
1349 
1350         trx_sys_mutex_exit();
1351       }
1352 
1353       trx->state = TRX_STATE_ACTIVE;
1354 
1355     } else {
1356       ut_ad(!read_write);
1357       trx->state = TRX_STATE_ACTIVE;
1358     }
1359   }
1360 
1361   if (trx->mysql_thd != nullptr) {
1362     trx->start_time = thd_start_time_in_secs(trx->mysql_thd);
1363   } else {
1364     trx->start_time = ut_time();
1365   }
1366 
1367   /* This value will only be read by a thread inspecting lock sys queue after
1368   the thread which enqueues this trx releases the queue's latch. */
1369   trx->lock.schedule_weight.store(0, std::memory_order_relaxed);
1370 
1371   ut_a(trx->error_state == DB_SUCCESS);
1372 
1373   MONITOR_INC(MONITOR_TRX_ACTIVE);
1374 }
1375 
1376 /** Set the transaction serialisation number.
1377  @return true if the transaction number was added to the serialisation_list. */
trx_serialisation_number_get(trx_t * trx,trx_undo_ptr_t * redo_rseg_undo_ptr,trx_undo_ptr_t * temp_rseg_undo_ptr)1378 static bool trx_serialisation_number_get(
1379     trx_t *trx,                         /*!< in/out: transaction */
1380     trx_undo_ptr_t *redo_rseg_undo_ptr, /*!< in/out: Set trx
1381                                         serialisation number in
1382                                         referred undo rseg. */
1383     trx_undo_ptr_t *temp_rseg_undo_ptr) /*!< in/out: Set trx
1384                                         serialisation number in
1385                                         referred undo rseg. */
1386 {
1387   bool added_trx_no;
1388   trx_rseg_t *redo_rseg = nullptr;
1389   trx_rseg_t *temp_rseg = nullptr;
1390 
1391   if (redo_rseg_undo_ptr != nullptr) {
1392     ut_ad(mutex_own(&redo_rseg_undo_ptr->rseg->mutex));
1393     redo_rseg = redo_rseg_undo_ptr->rseg;
1394   }
1395 
1396   if (temp_rseg_undo_ptr != nullptr) {
1397     ut_ad(mutex_own(&temp_rseg_undo_ptr->rseg->mutex));
1398     temp_rseg = temp_rseg_undo_ptr->rseg;
1399   }
1400 
1401   trx_sys_mutex_enter();
1402 
1403   trx->no = trx_sys_get_new_trx_id();
1404 
1405   /* Update the latest transaction number. */
1406   ut_d(trx_sys->rw_max_trx_no = trx->no);
1407 
1408   /* Track the minimum serialisation number. */
1409   if (!trx->read_only) {
1410     UT_LIST_ADD_LAST(trx_sys->serialisation_list, trx);
1411     added_trx_no = true;
1412   } else {
1413     added_trx_no = false;
1414   }
1415 
1416   /* If the rollack segment is not empty then the
1417   new trx_t::no can't be less than any trx_t::no
1418   already in the rollback segment. User threads only
1419   produce events when a rollback segment is empty. */
1420   if ((redo_rseg != nullptr && redo_rseg->last_page_no == FIL_NULL) ||
1421       (temp_rseg != nullptr && temp_rseg->last_page_no == FIL_NULL)) {
1422     TrxUndoRsegs elem(trx->no);
1423 
1424     if (redo_rseg != nullptr && redo_rseg->last_page_no == FIL_NULL) {
1425       elem.push_back(redo_rseg);
1426     }
1427 
1428     if (temp_rseg != nullptr && temp_rseg->last_page_no == FIL_NULL) {
1429       elem.push_back(temp_rseg);
1430     }
1431 
1432     mutex_enter(&purge_sys->pq_mutex);
1433 
1434     /* This is to reduce the pressure on the trx_sys_t::mutex
1435     though in reality it should make very little (read no)
1436     difference because this code path is only taken when the
1437     rbs is empty. */
1438 
1439     trx_sys_mutex_exit();
1440 
1441     purge_sys->purge_queue->push(elem);
1442 
1443     mutex_exit(&purge_sys->pq_mutex);
1444   } else {
1445     trx_sys_mutex_exit();
1446   }
1447 
1448   return (added_trx_no);
1449 }
1450 
1451 /** Assign the transaction its history serialisation number and write the
1452  update UNDO log record to the assigned rollback segment.
1453  @return true if a serialisation log was written */
trx_write_serialisation_history(trx_t * trx,mtr_t * mtr)1454 static bool trx_write_serialisation_history(
1455     trx_t *trx, /*!< in/out: transaction */
1456     mtr_t *mtr) /*!< in/out: mini-transaction */
1457 {
1458   /* Change the undo log segment states from TRX_UNDO_ACTIVE to some
1459   other state: these modifications to the file data structure define
1460   the transaction as committed in the file based domain, at the
1461   serialization point of the log sequence number lsn obtained below. */
1462 
1463   /* We have to hold the rseg mutex because update log headers have
1464   to be put to the history list in the (serialisation) order of the
1465   UNDO trx number. This is required for the purge in-memory data
1466   structures too. */
1467 
1468   bool own_redo_rseg_mutex = false;
1469   bool own_temp_rseg_mutex = false;
1470 
1471   /* Get rollback segment mutex. */
1472   if (trx->rsegs.m_redo.rseg != nullptr && trx_is_redo_rseg_updated(trx)) {
1473     trx->rsegs.m_redo.rseg->latch();
1474     own_redo_rseg_mutex = true;
1475   }
1476 
1477   mtr_t temp_mtr;
1478 
1479   if (trx->rsegs.m_noredo.rseg != nullptr && trx_is_temp_rseg_updated(trx)) {
1480     trx->rsegs.m_noredo.rseg->latch();
1481     own_temp_rseg_mutex = true;
1482     mtr_start(&temp_mtr);
1483     temp_mtr.set_log_mode(MTR_LOG_NO_REDO);
1484   }
1485 
1486   /* If transaction involves insert then truncate undo logs. */
1487   if (trx->rsegs.m_redo.insert_undo != nullptr) {
1488     trx_undo_set_state_at_finish(trx->rsegs.m_redo.insert_undo, mtr);
1489   }
1490 
1491   if (trx->rsegs.m_noredo.insert_undo != nullptr) {
1492     trx_undo_set_state_at_finish(trx->rsegs.m_noredo.insert_undo, &temp_mtr);
1493   }
1494 
1495   bool serialised = false;
1496 
1497   /* If transaction involves update then add rollback segments
1498   to purge queue. */
1499   if (trx->rsegs.m_redo.update_undo != nullptr ||
1500       trx->rsegs.m_noredo.update_undo != nullptr) {
1501     /* Assign the transaction serialisation number and add these
1502     rollback segments to purge trx-no sorted priority queue
1503     if this is the first UNDO log being written to assigned
1504     rollback segments. */
1505 
1506     trx_undo_ptr_t *redo_rseg_undo_ptr =
1507         trx->rsegs.m_redo.update_undo != nullptr ? &trx->rsegs.m_redo : nullptr;
1508 
1509     trx_undo_ptr_t *temp_rseg_undo_ptr =
1510         trx->rsegs.m_noredo.update_undo != nullptr ? &trx->rsegs.m_noredo
1511                                                    : nullptr;
1512 
1513     /* Will set trx->no and will add rseg to purge queue. */
1514     serialised = trx_serialisation_number_get(trx, redo_rseg_undo_ptr,
1515                                               temp_rseg_undo_ptr);
1516 
1517     /* It is not necessary to obtain trx->undo_mutex here because
1518     only a single OS thread is allowed to do the transaction commit
1519     for this transaction. */
1520     if (trx->rsegs.m_redo.update_undo != nullptr) {
1521       page_t *undo_hdr_page;
1522 
1523       undo_hdr_page =
1524           trx_undo_set_state_at_finish(trx->rsegs.m_redo.update_undo, mtr);
1525 
1526       /* Delay update of rseg_history_len if we plan to add
1527       non-redo update_undo too. This is to avoid immediate
1528       invocation of purge as we need to club these 2 segments
1529       with same trx-no as single unit. */
1530       bool update_rseg_len = !(trx->rsegs.m_noredo.update_undo != nullptr);
1531 
1532       /* Set flag if GTID information need to persist. */
1533       auto undo_ptr = &trx->rsegs.m_redo;
1534       trx_undo_gtid_set(trx, undo_ptr->update_undo);
1535 
1536       trx_undo_update_cleanup(trx, undo_ptr, undo_hdr_page, update_rseg_len,
1537                               (update_rseg_len ? 1 : 0), mtr);
1538     }
1539 
1540     DBUG_EXECUTE_IF("ib_trx_crash_during_commit", DBUG_SUICIDE(););
1541 
1542     if (trx->rsegs.m_noredo.update_undo != nullptr) {
1543       page_t *undo_hdr_page;
1544 
1545       undo_hdr_page = trx_undo_set_state_at_finish(
1546           trx->rsegs.m_noredo.update_undo, &temp_mtr);
1547 
1548       ulint n_added_logs = (redo_rseg_undo_ptr != nullptr) ? 2 : 1;
1549 
1550       trx_undo_update_cleanup(trx, &trx->rsegs.m_noredo, undo_hdr_page, true,
1551                               n_added_logs, &temp_mtr);
1552     }
1553   }
1554 
1555   if (own_redo_rseg_mutex) {
1556     trx->rsegs.m_redo.rseg->unlatch();
1557     own_redo_rseg_mutex = false;
1558   }
1559 
1560   if (own_temp_rseg_mutex) {
1561     trx->rsegs.m_noredo.rseg->unlatch();
1562     own_temp_rseg_mutex = false;
1563     mtr_commit(&temp_mtr);
1564   }
1565 
1566   MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
1567 
1568   /* Update the latest MySQL binlog name and offset information
1569   in trx sys header only if MySQL binary logging is on and clone
1570   is has ensured commit order at final stage. */
1571   if (Clone_handler::need_commit_order()) {
1572     trx_sys_update_mysql_binlog_offset(trx, mtr);
1573   }
1574 
1575   return (serialised);
1576 }
1577 
1578 /********************************************************************
1579 Finalize a transaction containing updates for a FTS table. */
trx_finalize_for_fts_table(fts_trx_table_t * ftt)1580 static void trx_finalize_for_fts_table(
1581     fts_trx_table_t *ftt) /* in: FTS trx table */
1582 {
1583   fts_t *fts = ftt->table->fts;
1584   fts_doc_ids_t *doc_ids = ftt->added_doc_ids;
1585 
1586   mutex_enter(&fts->bg_threads_mutex);
1587 
1588   if (fts->fts_status & BG_THREAD_STOP) {
1589     /* The table is about to be dropped, no use
1590     adding anything to its work queue. */
1591 
1592     mutex_exit(&fts->bg_threads_mutex);
1593   } else {
1594     mem_heap_t *heap;
1595     mutex_exit(&fts->bg_threads_mutex);
1596 
1597     ut_a(fts->add_wq);
1598 
1599     heap = static_cast<mem_heap_t *>(doc_ids->self_heap->arg);
1600 
1601     ib_wqueue_add(fts->add_wq, doc_ids, heap);
1602 
1603     /* fts_trx_table_t no longer owns the list. */
1604     ftt->added_doc_ids = nullptr;
1605   }
1606 }
1607 
1608 /** Finalize a transaction containing updates to FTS tables. */
trx_finalize_for_fts(trx_t * trx,bool is_commit)1609 static void trx_finalize_for_fts(
1610     trx_t *trx,     /*!< in/out: transaction */
1611     bool is_commit) /*!< in: true if the transaction was
1612                     committed, false if it was rolled back. */
1613 {
1614   if (is_commit) {
1615     const ib_rbt_node_t *node;
1616     ib_rbt_t *tables;
1617     fts_savepoint_t *savepoint;
1618 
1619     savepoint = static_cast<fts_savepoint_t *>(
1620         ib_vector_last(trx->fts_trx->savepoints));
1621 
1622     tables = savepoint->tables;
1623 
1624     for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
1625       fts_trx_table_t **ftt;
1626 
1627       ftt = rbt_value(fts_trx_table_t *, node);
1628 
1629       if ((*ftt)->added_doc_ids) {
1630         trx_finalize_for_fts_table(*ftt);
1631       }
1632     }
1633   }
1634 
1635   fts_trx_free(trx->fts_trx);
1636   trx->fts_trx = nullptr;
1637 }
1638 
1639 /** If required, flushes the log to disk based on the value of
1640  innodb_flush_log_at_trx_commit. */
trx_flush_log_if_needed_low(lsn_t lsn)1641 static void trx_flush_log_if_needed_low(lsn_t lsn) /*!< in: lsn up to which logs
1642                                                    are to be flushed. */
1643 {
1644 #ifdef _WIN32
1645   bool flush = true;
1646 #else
1647   bool flush = srv_unix_file_flush_method != SRV_UNIX_NOSYNC;
1648 #endif /* _WIN32 */
1649 
1650   Wait_stats wait_stats;
1651 
1652   switch (srv_flush_log_at_trx_commit) {
1653     case 2:
1654       /* Write the log but do not flush it to disk */
1655       flush = false;
1656       /* fall through */
1657     case 1:
1658       /* Write the log and optionally flush it to disk */
1659       wait_stats = log_write_up_to(*log_sys, lsn, flush);
1660 
1661       MONITOR_INC_WAIT_STATS(MONITOR_TRX_ON_LOG_, wait_stats);
1662 
1663       return;
1664     case 0:
1665       /* Do nothing */
1666       return;
1667   }
1668 }
1669 
1670 /** If required, flushes the log to disk based on the value of
1671  innodb_flush_log_at_trx_commit. */
trx_flush_log_if_needed(lsn_t lsn,trx_t * trx)1672 static void trx_flush_log_if_needed(lsn_t lsn, /*!< in: lsn up to which logs are
1673                                                to be flushed. */
1674                                     trx_t *trx) /*!< in/out: transaction */
1675 {
1676   trx->op_info = "flushing log";
1677 
1678   DEBUG_SYNC_C("trx_flush_log_if_needed");
1679 
1680   if (trx->ddl_operation || trx->ddl_must_flush) {
1681     log_write_up_to(*log_sys, lsn, true);
1682   } else {
1683     trx_flush_log_if_needed_low(lsn);
1684   }
1685 
1686   trx->op_info = "";
1687 }
1688 
1689 /** For each table that has been modified by the given transaction: update
1690  its dict_table_t::update_time with the current timestamp. Clear the list
1691  of the modified tables at the end. */
trx_update_mod_tables_timestamp(trx_t * trx)1692 static void trx_update_mod_tables_timestamp(trx_t *trx) /*!< in: transaction */
1693 {
1694   ut_ad(trx->id != 0);
1695 
1696   /* consider using trx->start_time if calling time() is too
1697   expensive here */
1698   time_t now = ut_time();
1699 
1700   trx_mod_tables_t::const_iterator end = trx->mod_tables.end();
1701 
1702   for (trx_mod_tables_t::const_iterator it = trx->mod_tables.begin(); it != end;
1703        ++it) {
1704     /* This could be executed by multiple threads concurrently
1705     on the same table object. This is fine because time_t is
1706     word size or less. And _purely_ _theoretically_, even if
1707     time_t write is not atomic, likely the value of 'now' is
1708     the same in all threads and even if it is not, getting a
1709     "garbage" in table->update_time is justified because
1710     protecting it with a latch here would be too performance
1711     intrusive. */
1712     (*it)->update_time = now;
1713   }
1714 
1715   trx->mod_tables.clear();
1716 }
1717 
1718 /**
1719 Erase the transaction from running transaction lists and serialization
1720 list. Active RW transaction list of a MVCC snapshot(ReadView::prepare)
1721 won't include this transaction after this call. All implicit locks are
1722 also released by this call as trx is removed from rw_trx_list.
1723 @param[in]	trx		Transaction to erase, must have an ID > 0
1724 @param[in]	serialised	true if serialisation log was written
1725 @param[in]	gtid_desc	GTID information to persist */
trx_erase_lists(trx_t * trx,bool serialised,Gtid_desc & gtid_desc)1726 static void trx_erase_lists(trx_t *trx, bool serialised, Gtid_desc &gtid_desc) {
1727   ut_ad(trx->id > 0);
1728   ut_ad(trx_sys_mutex_own());
1729 
1730   if (serialised) {
1731     UT_LIST_REMOVE(trx_sys->serialisation_list, trx);
1732 
1733     /* Add GTID to be persisted to disk table. It must be done ...
1734     1.After the transaction is marked committed in undo. Otherwise
1735       GTID might get committed before the transaction commit on disk.
1736     2.Before it is removed from serialization list. Otherwise the transaction
1737       undo could get purged before persisting GTID on disk table. */
1738     if (gtid_desc.m_is_set) {
1739       auto &gtid_persistor = clone_sys->get_gtid_persistor();
1740       gtid_persistor.add(gtid_desc);
1741     }
1742   }
1743 
1744   trx_ids_t::iterator it = std::lower_bound(trx_sys->rw_trx_ids.begin(),
1745                                             trx_sys->rw_trx_ids.end(), trx->id);
1746   ut_ad(*it == trx->id);
1747   trx_sys->rw_trx_ids.erase(it);
1748 
1749   if (trx->read_only || trx->rsegs.m_redo.rseg == nullptr) {
1750     ut_ad(!trx->in_rw_trx_list);
1751   } else {
1752     UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
1753     ut_d(trx->in_rw_trx_list = false);
1754     ut_ad(trx_sys_validate_trx_list());
1755 
1756     if (trx->read_view != nullptr) {
1757       trx_sys->mvcc->view_close(trx->read_view, true);
1758     }
1759   }
1760 
1761   trx_sys->rw_trx_set.erase(TrxTrack(trx->id));
1762 
1763   /* Set minimal active trx id. */
1764   trx_id_t min_id = trx_sys->rw_trx_ids.empty() ? trx_sys->max_trx_id
1765                                                 : trx_sys->rw_trx_ids.front();
1766 
1767   trx_sys->min_active_id.store(min_id);
1768 }
1769 
trx_release_impl_and_expl_locks(trx_t * trx,bool serialized)1770 static void trx_release_impl_and_expl_locks(trx_t *trx, bool serialized) {
1771   check_trx_state(trx);
1772   ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) ||
1773         trx_state_eq(trx, TRX_STATE_PREPARED));
1774 
1775   bool trx_sys_latch_is_needed =
1776       (trx->id > 0) || trx_state_eq(trx, TRX_STATE_PREPARED);
1777 
1778   /* Check and get GTID to be persisted. Do it outside trx_sys mutex. */
1779   Gtid_desc gtid_desc;
1780   auto &gtid_persistor = clone_sys->get_gtid_persistor();
1781   gtid_persistor.get_gtid_info(trx, gtid_desc);
1782 
1783   if (trx_sys_latch_is_needed) {
1784     trx_sys_mutex_enter();
1785   }
1786 
1787   if (trx->id > 0) {
1788     /* For consistent snapshot, we need to remove current
1789     transaction from running transaction id list for mvcc
1790     before doing commit and releasing locks. */
1791     trx_erase_lists(trx, serialized, gtid_desc);
1792   }
1793 
1794   if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
1795     ut_a(trx_sys->n_prepared_trx > 0);
1796     --trx_sys->n_prepared_trx;
1797   }
1798 
1799   trx_mutex_enter(trx);
1800   /* Please consider this particular point in time as the moment the trx's
1801   implicit locks become released.
1802   This change is protected by both trx_sys->mutex and trx->mutex.
1803   Therefore, there are two secure ways to check if the trx still can hold
1804   implicit locks:
1805   (1) if you only know id of the trx, then you can obtain trx_sys->mutex and
1806       check if trx is still in rw_trx_set. This works, because the call to
1807       trx_erase_list() which removes trx from this list several lines above is
1808       also protected by trx_sys->mutex. We use this approach in
1809       lock_rec_convert_impl_to_expl() by using trx_rw_is_active()
1810   (2) if you have pointer to trx, and you know it is safe to access (say, you
1811       hold reference to this trx which prevents it from being freed) then you
1812       can obtain trx->mutex and check if trx->state is equal to
1813       TRX_STATE_COMMITTED_IN_MEMORY. We use this approach in
1814       lock_rec_convert_impl_to_expl_for_trx() when deciding for the final time
1815       if we really want to create explicit lock on behalf of implicit lock
1816       holder. */
1817   trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
1818   trx_mutex_exit(trx);
1819 
1820   if (trx_sys_latch_is_needed) {
1821     trx_sys_mutex_exit();
1822   }
1823 
1824   lock_trx_release_locks(trx);
1825 }
1826 
1827 /** Commits a transaction in memory. */
trx_commit_in_memory(trx_t * trx,const mtr_t * mtr,bool serialised)1828 static void trx_commit_in_memory(
1829     trx_t *trx,       /*!< in/out: transaction */
1830     const mtr_t *mtr, /*!< in: mini-transaction of
1831                       trx_write_serialisation_history(), or NULL if
1832                       the transaction did not modify anything */
1833     bool serialised)
1834 /*!< in: true if serialisation log was
1835 written */
1836 {
1837   trx->must_flush_log_later = false;
1838   trx->ddl_must_flush = false;
1839 
1840   if (trx_is_autocommit_non_locking(trx)) {
1841     ut_ad(trx->id == 0);
1842     ut_ad(trx->read_only);
1843     ut_a(!trx->is_recovered);
1844     ut_ad(trx->rsegs.m_redo.rseg == nullptr);
1845     ut_ad(!trx->in_rw_trx_list);
1846 
1847     /* Note: We are asserting without holding the locksys latch. But
1848     that is OK because this transaction is not waiting and cannot
1849     be rolled back and no new locks can (or should not) be added
1850     because it is flagged as a non-locking read-only transaction. */
1851 
1852     ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1853 
1854     /* This state change is not protected by any mutex, therefore
1855     there is an inherent race here around state transition during
1856     printouts. We ignore this race for the sake of efficiency.
1857     However, the trx_sys_t::mutex will protect the trx_t instance
1858     and it cannot be removed from the mysql_trx_list and freed
1859     without first acquiring the trx_sys_t::mutex. */
1860 
1861     ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
1862 
1863     if (trx->read_view != nullptr) {
1864       trx_sys->mvcc->view_close(trx->read_view, false);
1865     }
1866 
1867     MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
1868 
1869     /* AC-NL-RO transactions can't be rolled back asynchronously. */
1870     ut_ad(!trx->abort);
1871     ut_ad(!(trx->in_innodb & (TRX_FORCE_ROLLBACK | TRX_FORCE_ROLLBACK_ASYNC)));
1872 
1873     trx->state = TRX_STATE_NOT_STARTED;
1874 
1875   } else {
1876     trx_release_impl_and_expl_locks(trx, serialised);
1877 
1878     /* Remove the transaction from the list of active
1879     transactions now that it no longer holds any user locks. */
1880 
1881     ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
1882     DEBUG_SYNC_C("after_trx_committed_in_memory");
1883 
1884     if (trx->read_only || trx->rsegs.m_redo.rseg == nullptr) {
1885       MONITOR_INC(MONITOR_TRX_RO_COMMIT);
1886       if (trx->read_view != nullptr) {
1887         trx_sys->mvcc->view_close(trx->read_view, false);
1888       }
1889 
1890     } else {
1891       ut_ad(trx->id > 0);
1892       MONITOR_INC(MONITOR_TRX_RW_COMMIT);
1893     }
1894   }
1895 
1896   if (trx->rsegs.m_redo.rseg != nullptr) {
1897     trx_rseg_t *rseg = trx->rsegs.m_redo.rseg;
1898     ut_ad(rseg->trx_ref_count > 0);
1899 
1900     /* Multiple transactions can simultaneously decrement
1901     the atomic counter. */
1902     rseg->trx_ref_count--;
1903   }
1904 
1905   /* Reset flag that SE persists GTID. */
1906   auto &gtid_persistor = clone_sys->get_gtid_persistor();
1907   gtid_persistor.set_persist_gtid(trx, false);
1908 
1909   if (mtr != nullptr) {
1910     if (trx->rsegs.m_redo.insert_undo != nullptr) {
1911       trx_undo_insert_cleanup(&trx->rsegs.m_redo, false);
1912     }
1913 
1914     if (trx->rsegs.m_noredo.insert_undo != nullptr) {
1915       trx_undo_insert_cleanup(&trx->rsegs.m_noredo, true);
1916     }
1917 
1918     /* NOTE that we could possibly make a group commit more
1919     efficient here: call os_thread_yield here to allow also other
1920     trxs to come to commit! */
1921 
1922     /*-------------------------------------*/
1923 
1924     /* Depending on the my.cnf options, we may now write the log
1925     buffer to the log files, making the transaction durable if
1926     the OS does not crash. We may also flush the log files to
1927     disk, making the transaction durable also at an OS crash or a
1928     power outage.
1929 
1930     The idea in InnoDB's group commit is that a group of
1931     transactions gather behind a trx doing a physical disk write
1932     to log files, and when that physical write has been completed,
1933     one of those transactions does a write which commits the whole
1934     group. Note that this group commit will only bring benefit if
1935     there are > 2 users in the database. Then at least 2 users can
1936     gather behind one doing the physical log write to disk.
1937 
1938     If we are calling trx_commit() under prepare_commit_mutex, we
1939     will delay possible log write and flush to a separate function
1940     trx_commit_complete_for_mysql(), which is only called when the
1941     thread has released the mutex. This is to make the
1942     group commit algorithm to work. Otherwise, the prepare_commit
1943     mutex would serialize all commits and prevent a group of
1944     transactions from gathering. */
1945 
1946     lsn_t lsn = mtr->commit_lsn();
1947 
1948     if (lsn == 0) {
1949       /* Nothing to be done. */
1950     } else if (trx->flush_log_later) {
1951       /* Do nothing yet */
1952       trx->must_flush_log_later = true;
1953 
1954       /* Remember current ddl_operation, because trx_init()
1955       later will set ddl_operation to false. And the final
1956       flush is even later. */
1957       trx->ddl_must_flush = trx->ddl_operation;
1958     } else if ((srv_flush_log_at_trx_commit == 0 ||
1959                 thd_requested_durability(trx->mysql_thd) ==
1960                     HA_IGNORE_DURABILITY) &&
1961                (!trx->ddl_operation)) {
1962       /* Do nothing */
1963     } else {
1964       trx_flush_log_if_needed(lsn, trx);
1965     }
1966 
1967     trx->commit_lsn = lsn;
1968 
1969     /* Tell server some activity has happened, since the trx
1970     does changes something. Background utility threads like
1971     master thread, purge thread or page_cleaner thread might
1972     have some work to do. */
1973     srv_active_wake_master_thread();
1974   }
1975 
1976   /* Free all savepoints, starting from the first. */
1977   trx_named_savept_t *savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
1978 
1979   trx_roll_savepoints_free(trx, savep);
1980 
1981   if (trx->fts_trx != nullptr) {
1982     trx_finalize_for_fts(trx, trx->undo_no != 0);
1983   }
1984 
1985   trx_mutex_enter(trx);
1986   trx->dict_operation = TRX_DICT_OP_NONE;
1987 
1988   /* Because we can rollback transactions asynchronously, we change
1989   the state at the last step. trx_t::abort cannot change once commit
1990   or rollback has started because we will have released the locks by
1991   the time we get here. */
1992 
1993   if (trx->abort) {
1994     trx->abort = false;
1995     trx->state = TRX_STATE_FORCED_ROLLBACK;
1996   } else {
1997     trx->state = TRX_STATE_NOT_STARTED;
1998   }
1999 
2000   /* trx->in_mysql_trx_list would hold between
2001   trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
2002   hold for recovered transactions or system transactions. */
2003   assert_trx_is_free(trx);
2004 
2005   trx_init(trx);
2006 
2007   trx_mutex_exit(trx);
2008 
2009   ut_a(trx->error_state == DB_SUCCESS);
2010 }
2011 
2012 /** Commits a transaction and a mini-transaction. */
trx_commit_low(trx_t * trx,mtr_t * mtr)2013 void trx_commit_low(
2014     trx_t *trx, /*!< in/out: transaction */
2015     mtr_t *mtr) /*!< in/out: mini-transaction (will be committed),
2016                 or NULL if trx made no modifications */
2017 {
2018   assert_trx_nonlocking_or_in_list(trx);
2019   ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
2020   ut_ad(!mtr || mtr->is_active());
2021   /* undo_no is non-zero if we're doing the final commit. */
2022   if (trx->fts_trx != nullptr && trx->undo_no != 0 &&
2023       trx->lock.que_state != TRX_QUE_ROLLING_BACK) {
2024     dberr_t error;
2025 
2026     ut_a(!trx_is_autocommit_non_locking(trx));
2027 
2028     error = fts_commit(trx);
2029 
2030     /* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY
2031     instead of dying. This is a possible scenario if there
2032     is a crash between insert to DELETED table committing
2033     and transaction committing. The fix would be able to
2034     return error from this function */
2035     if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) {
2036       /* FTS-FIXME: once we can return values from this
2037       function, we should do so and signal an error
2038       instead of just dying. */
2039 
2040       ut_error;
2041     }
2042   }
2043 
2044   bool serialised;
2045 
2046   if (mtr != nullptr) {
2047     mtr->set_sync();
2048 
2049     serialised = trx_write_serialisation_history(trx, mtr);
2050 
2051     /* The following call commits the mini-transaction, making the
2052     whole transaction committed in the file-based world, at this
2053     log sequence number. The transaction becomes 'durable' when
2054     we write the log to disk, but in the logical sense the commit
2055     in the file-based data structures (undo logs etc.) happens
2056     here.
2057 
2058     NOTE that transaction numbers, which are assigned only to
2059     transactions with an update undo log, do not necessarily come
2060     in exactly the same order as commit lsn's, if the transactions
2061     have different rollback segments. To get exactly the same
2062     order we should hold the kernel mutex up to this point,
2063     adding to the contention of the kernel mutex. However, if
2064     a transaction T2 is able to see modifications made by
2065     a transaction T1, T2 will always get a bigger transaction
2066     number and a bigger commit lsn than T1. */
2067 
2068     /*--------------*/
2069 
2070     DBUG_EXECUTE_IF("trx_commit_to_the_end_of_log_block", {
2071       const size_t space_left = mtr->get_expected_log_size();
2072       mtr_commit_mlog_test_filling_block(*log_sys, space_left);
2073     });
2074 
2075     mtr_commit(mtr);
2076 
2077     DBUG_PRINT("trx_commit", ("commit lsn at " LSN_PF, mtr->commit_lsn()));
2078 
2079     DBUG_EXECUTE_IF(
2080         "ib_crash_during_trx_commit_in_mem", if (trx_is_rseg_updated(trx)) {
2081           log_make_latest_checkpoint();
2082           DBUG_SUICIDE();
2083         });
2084     /*--------------*/
2085 
2086   } else {
2087     serialised = false;
2088   }
2089 #ifdef UNIV_DEBUG
2090   /* In case of this function is called from a stack executing
2091      THD::release_resources -> ...
2092         innobase_connection_close() ->
2093                trx_rollback_for_mysql... -> .
2094      mysql's thd does not seem to have
2095      thd->debug_sync_control defined any longer. However the stack
2096      is possible only with a prepared trx not updating any data.
2097   */
2098   if (trx->mysql_thd != nullptr && trx_is_redo_rseg_updated(trx)) {
2099     DEBUG_SYNC_C("before_trx_state_committed_in_memory");
2100   }
2101 #endif
2102 
2103   trx_commit_in_memory(trx, mtr, serialised);
2104 }
2105 
2106 /** Commits a transaction. */
trx_commit(trx_t * trx)2107 void trx_commit(trx_t *trx) /*!< in/out: transaction */
2108 {
2109   mtr_t *mtr;
2110   mtr_t local_mtr;
2111 
2112   DBUG_EXECUTE_IF("ib_trx_commit_crash_before_trx_commit_start",
2113                   DBUG_SUICIDE(););
2114 
2115   if (trx_is_rseg_updated(trx)) {
2116     mtr = &local_mtr;
2117 
2118     DBUG_EXECUTE_IF("ib_trx_commit_crash_rseg_updated", DBUG_SUICIDE(););
2119 
2120     mtr_start_sync(mtr);
2121 
2122   } else {
2123     mtr = nullptr;
2124   }
2125 
2126   trx_commit_low(trx, mtr);
2127 }
2128 
2129 /** Cleans up a transaction at database startup. The cleanup is needed if
2130  the transaction already got to the middle of a commit when the database
2131  crashed, and we cannot roll it back. */
trx_cleanup_at_db_startup(trx_t * trx)2132 void trx_cleanup_at_db_startup(trx_t *trx) /*!< in: transaction */
2133 {
2134   ut_ad(trx->is_recovered);
2135 
2136   /* Cleanup any durable undo logs in non-temporary rollback segments.
2137   At database start-up there are no active transactions recorded in
2138   any rollback segments in the temporary tablespace because all those
2139   changes are all lost on restart. */
2140   if (trx->rsegs.m_redo.insert_undo != nullptr) {
2141     trx_undo_insert_cleanup(&trx->rsegs.m_redo, false);
2142   }
2143 
2144   memset(&trx->rsegs, 0x0, sizeof(trx->rsegs));
2145   trx->undo_no = 0;
2146   trx->undo_rseg_space = 0;
2147   trx->last_sql_stat_start.least_undo_no = 0;
2148 
2149   trx_sys_mutex_enter();
2150 
2151   ut_a(!trx->read_only);
2152 
2153   UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
2154 
2155   ut_d(trx->in_rw_trx_list = FALSE);
2156 
2157   trx_sys_mutex_exit();
2158 
2159   /* Change the transaction state without mutex protection, now
2160   that it no longer is in the trx_list. Recovered transactions
2161   are never placed in the mysql_trx_list. */
2162   ut_ad(trx->is_recovered);
2163   ut_ad(!trx->in_rw_trx_list);
2164   ut_ad(!trx->in_mysql_trx_list);
2165   trx->state = TRX_STATE_NOT_STARTED;
2166 }
2167 
2168 /** Assigns a read view for a consistent read query. All the consistent reads
2169  within the same transaction will get the same read view, which is created
2170  when this function is first called for a new started transaction.
2171  @return consistent read view */
trx_assign_read_view(trx_t * trx)2172 ReadView *trx_assign_read_view(trx_t *trx) /*!< in/out: active transaction */
2173 {
2174   ut_ad(trx->state == TRX_STATE_ACTIVE);
2175 
2176   if (srv_read_only_mode) {
2177     ut_ad(trx->read_view == nullptr);
2178     return (nullptr);
2179 
2180   } else if (!MVCC::is_view_active(trx->read_view)) {
2181     trx_sys->mvcc->view_open(trx->read_view, trx);
2182   }
2183 
2184   return (trx->read_view);
2185 }
2186 
2187 /** Prepares a transaction for commit/rollback. */
trx_commit_or_rollback_prepare(trx_t * trx)2188 void trx_commit_or_rollback_prepare(trx_t *trx) /*!< in/out: transaction */
2189 {
2190   /* We are reading trx->state without holding trx_sys->mutex
2191   here, because the commit or rollback should be invoked for a
2192   running (or recovered prepared) transaction that is associated
2193   with the current thread. */
2194 
2195   switch (trx->state) {
2196     case TRX_STATE_NOT_STARTED:
2197     case TRX_STATE_FORCED_ROLLBACK:
2198 
2199       trx_start_low(trx, true);
2200       /* fall through */
2201 
2202     case TRX_STATE_ACTIVE:
2203     case TRX_STATE_PREPARED:
2204 
2205       /* If the trx is in a lock wait state, moves the waiting
2206       query thread to the suspended state */
2207 
2208       if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
2209         ut_a(trx->lock.wait_thr != nullptr);
2210         trx->lock.wait_thr->state = QUE_THR_SUSPENDED;
2211         trx->lock.wait_thr = nullptr;
2212 
2213         trx->lock.que_state = TRX_QUE_RUNNING;
2214       }
2215 
2216       ut_a(trx->lock.n_active_thrs == 1);
2217       return;
2218 
2219     case TRX_STATE_COMMITTED_IN_MEMORY:
2220       break;
2221   }
2222 
2223   ut_error;
2224 }
2225 
2226 /** Creates a commit command node struct.
2227  @return own: commit node struct */
trx_commit_node_create(mem_heap_t * heap)2228 commit_node_t *trx_commit_node_create(
2229     mem_heap_t *heap) /*!< in: mem heap where created */
2230 {
2231   commit_node_t *node;
2232 
2233   node = static_cast<commit_node_t *>(mem_heap_alloc(heap, sizeof(*node)));
2234   node->common.type = QUE_NODE_COMMIT;
2235   node->state = COMMIT_NODE_SEND;
2236 
2237   return (node);
2238 }
2239 
2240 /** Performs an execution step for a commit type node in a query graph.
2241  @return query thread to run next, or NULL */
trx_commit_step(que_thr_t * thr)2242 que_thr_t *trx_commit_step(que_thr_t *thr) /*!< in: query thread */
2243 {
2244   commit_node_t *node;
2245 
2246   node = static_cast<commit_node_t *>(thr->run_node);
2247 
2248   ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
2249 
2250   if (thr->prev_node == que_node_get_parent(node)) {
2251     node->state = COMMIT_NODE_SEND;
2252   }
2253 
2254   if (node->state == COMMIT_NODE_SEND) {
2255     trx_t *trx;
2256 
2257     node->state = COMMIT_NODE_WAIT;
2258 
2259     trx = thr_get_trx(thr);
2260 
2261     ut_a(trx->lock.wait_thr == nullptr);
2262     ut_a(trx->lock.que_state != TRX_QUE_LOCK_WAIT);
2263 
2264     trx_commit_or_rollback_prepare(trx);
2265 
2266     trx->lock.que_state = TRX_QUE_COMMITTING;
2267 
2268     trx_commit(trx);
2269 
2270     ut_ad(trx->lock.wait_thr == nullptr);
2271 
2272     trx->lock.que_state = TRX_QUE_RUNNING;
2273 
2274     thr = nullptr;
2275   } else {
2276     ut_ad(node->state == COMMIT_NODE_WAIT);
2277 
2278     node->state = COMMIT_NODE_SEND;
2279 
2280     thr->run_node = que_node_get_parent(node);
2281   }
2282 
2283   return (thr);
2284 }
2285 
2286 /** Does the transaction commit for MySQL.
2287  @return DB_SUCCESS or error number */
trx_commit_for_mysql(trx_t * trx)2288 dberr_t trx_commit_for_mysql(trx_t *trx) /*!< in/out: transaction */
2289 {
2290   DEBUG_SYNC_C("trx_commit_for_mysql_checks_for_aborted");
2291   TrxInInnoDB trx_in_innodb(trx, true);
2292 
2293   if (trx_in_innodb.is_aborted() && trx->killed_by != os_thread_get_curr_id()) {
2294     return (DB_FORCED_ABORT);
2295   }
2296 
2297   /* Because we do not do the commit by sending an Innobase
2298   sig to the transaction, we must here make sure that trx has been
2299   started. */
2300 
2301   dberr_t db_err = DB_SUCCESS;
2302 
2303   switch (trx->state) {
2304     case TRX_STATE_NOT_STARTED:
2305     case TRX_STATE_FORCED_ROLLBACK:
2306 
2307       ut_d(trx->start_file = __FILE__);
2308       ut_d(trx->start_line = __LINE__);
2309 
2310       trx_start_low(trx, true);
2311       /* fall through */
2312     case TRX_STATE_ACTIVE:
2313     case TRX_STATE_PREPARED:
2314       trx->op_info = "committing";
2315 
2316       /* For GTID persistence we need update undo segment. */
2317       db_err = trx_undo_gtid_add_update_undo(trx, false, false);
2318       if (db_err != DB_SUCCESS) {
2319         return (db_err);
2320       }
2321 
2322       /* Flush prepare GTID for XA prepared transactions. */
2323       trx_undo_gtid_flush_prepare(trx);
2324 
2325       if (trx->id != 0) {
2326         trx_update_mod_tables_timestamp(trx);
2327       }
2328 
2329       trx_commit(trx);
2330 
2331       MONITOR_DEC(MONITOR_TRX_ACTIVE);
2332       trx->op_info = "";
2333       return (DB_SUCCESS);
2334     case TRX_STATE_COMMITTED_IN_MEMORY:
2335       break;
2336   }
2337   ut_error;
2338   return (DB_CORRUPTION);
2339 }
2340 
2341 /** If required, flushes the log to disk if we called trx_commit_for_mysql()
2342  with trx->flush_log_later == TRUE. */
trx_commit_complete_for_mysql(trx_t * trx)2343 void trx_commit_complete_for_mysql(trx_t *trx) /*!< in/out: transaction */
2344 {
2345   if (trx->id != 0 || !trx->must_flush_log_later ||
2346       (thd_requested_durability(trx->mysql_thd) == HA_IGNORE_DURABILITY &&
2347        !trx->ddl_must_flush)) {
2348     /* If we removed trx->ddl_must_flush from condition above, we would
2349     need to take care of fixing innobase_flush_logs for a scenario in
2350     which srv_flush_log_at_trx_commit == 0. */
2351     return;
2352   }
2353 
2354   trx_flush_log_if_needed(trx->commit_lsn, trx);
2355 
2356   trx->must_flush_log_later = false;
2357   trx->ddl_must_flush = false;
2358 }
2359 
2360 /** Marks the latest SQL statement ended. */
trx_mark_sql_stat_end(trx_t * trx)2361 void trx_mark_sql_stat_end(trx_t *trx) /*!< in: trx handle */
2362 {
2363   ut_a(trx);
2364 
2365   lock_on_statement_end(trx);
2366 
2367   switch (trx->state) {
2368     case TRX_STATE_PREPARED:
2369     case TRX_STATE_COMMITTED_IN_MEMORY:
2370       break;
2371     case TRX_STATE_NOT_STARTED:
2372     case TRX_STATE_FORCED_ROLLBACK:
2373       trx->undo_no = 0;
2374       trx->undo_rseg_space = 0;
2375       /* fall through */
2376     case TRX_STATE_ACTIVE:
2377       trx->last_sql_stat_start.least_undo_no = trx->undo_no;
2378 
2379       if (trx->fts_trx != nullptr) {
2380         fts_savepoint_laststmt_refresh(trx);
2381       }
2382 
2383       return;
2384   }
2385 
2386   ut_error;
2387 }
2388 
2389 /** Prints info about a transaction.
2390  Caller must hold trx_sys->mutex. */
trx_print_low(FILE * f,const trx_t * trx,ulint max_query_len,ulint n_rec_locks,ulint n_trx_locks,ulint heap_size)2391 void trx_print_low(FILE *f,
2392                    /*!< in: output stream */
2393                    const trx_t *trx,
2394                    /*!< in: transaction */
2395                    ulint max_query_len,
2396                    /*!< in: max query length to print,
2397                    or 0 to use the default max length */
2398                    ulint n_rec_locks,
2399                    /*!< in: lock_number_of_rows_locked(&trx->lock) */
2400                    ulint n_trx_locks,
2401                    /*!< in: length of trx->lock.trx_locks */
2402                    ulint heap_size)
2403 /*!< in: mem_heap_get_size(trx->lock.lock_heap) */
2404 {
2405   ibool newline;
2406   const char *op_info;
2407 
2408   ut_ad(trx_sys_mutex_own());
2409 
2410   fprintf(f, "TRANSACTION " TRX_ID_FMT, trx_get_id_for_print(trx));
2411 
2412   /* trx->state cannot change from or to NOT_STARTED while we
2413   are holding the trx_sys->mutex. It may change from ACTIVE to
2414   PREPARED or COMMITTED. */
2415   switch (trx->state) {
2416     case TRX_STATE_NOT_STARTED:
2417       fputs(", not started", f);
2418       goto state_ok;
2419     case TRX_STATE_FORCED_ROLLBACK:
2420       fputs(", forced rollback", f);
2421       goto state_ok;
2422     case TRX_STATE_ACTIVE:
2423       fprintf(f, ", ACTIVE %lu sec",
2424               (ulong)difftime(time(nullptr), trx->start_time));
2425       goto state_ok;
2426     case TRX_STATE_PREPARED:
2427       fprintf(f, ", ACTIVE (PREPARED) %lu sec",
2428               (ulong)difftime(time(nullptr), trx->start_time));
2429       goto state_ok;
2430     case TRX_STATE_COMMITTED_IN_MEMORY:
2431       fputs(", COMMITTED IN MEMORY", f);
2432       goto state_ok;
2433   }
2434   fprintf(f, ", state %lu", (ulong)trx->state);
2435   ut_ad(0);
2436 state_ok:
2437 
2438   /* prevent a race condition */
2439   op_info = trx->op_info;
2440 
2441   if (*op_info) {
2442     putc(' ', f);
2443     fputs(op_info, f);
2444   }
2445 
2446   if (trx->is_recovered) {
2447     fputs(" recovered trx", f);
2448   }
2449 
2450   if (trx->declared_to_be_inside_innodb) {
2451     fprintf(f, ", thread declared inside InnoDB %lu",
2452             (ulong)trx->n_tickets_to_enter_innodb);
2453   }
2454 
2455   putc('\n', f);
2456 
2457   if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
2458     fprintf(f, "mysql tables in use %lu, locked %lu\n",
2459             (ulong)trx->n_mysql_tables_in_use,
2460             (ulong)trx->mysql_n_tables_locked);
2461   }
2462 
2463   newline = TRUE;
2464 
2465   /* trx->lock.que_state of an ACTIVE transaction may change
2466   while we are not holding trx->mutex. We perform a dirty read
2467   for performance reasons. */
2468 
2469   switch (trx->lock.que_state) {
2470     case TRX_QUE_RUNNING:
2471       newline = FALSE;
2472       break;
2473     case TRX_QUE_LOCK_WAIT:
2474       fputs("LOCK WAIT ", f);
2475       break;
2476     case TRX_QUE_ROLLING_BACK:
2477       fputs("ROLLING BACK ", f);
2478       break;
2479     case TRX_QUE_COMMITTING:
2480       fputs("COMMITTING ", f);
2481       break;
2482     default:
2483       fprintf(f, "que state %lu ", (ulong)trx->lock.que_state);
2484   }
2485 
2486   if (n_trx_locks > 0 || heap_size > 400) {
2487     newline = TRUE;
2488 
2489     fprintf(f,
2490             "%lu lock struct(s), heap size %lu,"
2491             " %lu row lock(s)",
2492             (ulong)n_trx_locks, (ulong)heap_size, (ulong)n_rec_locks);
2493   }
2494 
2495   if (trx->has_search_latch) {
2496     newline = TRUE;
2497     fputs(", holds adaptive hash latch", f);
2498   }
2499 
2500   if (trx->undo_no != 0) {
2501     newline = TRUE;
2502     fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
2503   }
2504 
2505   if (newline) {
2506     putc('\n', f);
2507   }
2508 
2509   if (trx->state != TRX_STATE_NOT_STARTED && trx->mysql_thd != nullptr) {
2510     innobase_mysql_print_thd(f, trx->mysql_thd,
2511                              static_cast<uint>(max_query_len));
2512   }
2513 }
2514 
trx_print_latched(FILE * f,const trx_t * trx,ulint max_query_len)2515 void trx_print_latched(FILE *f, const trx_t *trx, ulint max_query_len) {
2516   /* We need exclusive access to lock_sys for lock_number_of_rows_locked(),
2517   and accessing trx->lock fields without trx->mutex.*/
2518   ut_ad(locksys::owns_exclusive_global_latch());
2519   ut_ad(trx_sys_mutex_own());
2520 
2521   trx_print_low(f, trx, max_query_len, lock_number_of_rows_locked(&trx->lock),
2522                 UT_LIST_GET_LEN(trx->lock.trx_locks),
2523                 mem_heap_get_size(trx->lock.lock_heap));
2524 }
2525 
trx_print(FILE * f,const trx_t * trx,ulint max_query_len)2526 void trx_print(FILE *f, const trx_t *trx, ulint max_query_len) {
2527   /* trx_print_latched() requires exclusive global latch */
2528   locksys::Global_exclusive_latch_guard guard{};
2529   mutex_enter(&trx_sys->mutex);
2530   trx_print_latched(f, trx, max_query_len);
2531   mutex_exit(&trx_sys->mutex);
2532 }
2533 
2534 #ifdef UNIV_DEBUG
trx_can_be_handled_by_current_thread(const trx_t * trx)2535 bool trx_can_be_handled_by_current_thread(const trx_t *trx) {
2536   return (trx->mysql_thd == nullptr || trx->mysql_thd == current_thd);
2537 }
2538 
2539 /** Asserts that a transaction has been started.
2540  The caller must hold trx_sys->mutex.
2541  @return true if started */
trx_assert_started(const trx_t * trx)2542 ibool trx_assert_started(const trx_t *trx) /*!< in: transaction */
2543 {
2544   ut_ad(trx_sys_mutex_own());
2545 
2546   /* Non-locking autocommits should not hold any locks and this
2547   function is only called from the locking code. */
2548   check_trx_state(trx);
2549 
2550   /* trx->state can change from or to NOT_STARTED while we are holding
2551   trx_sys->mutex for non-locking autocommit selects but not for other
2552   types of transactions. It may change from ACTIVE to PREPARED. */
2553 
2554   switch (trx->state) {
2555     case TRX_STATE_PREPARED:
2556       return (TRUE);
2557 
2558     case TRX_STATE_ACTIVE:
2559     case TRX_STATE_COMMITTED_IN_MEMORY:
2560       return (TRUE);
2561 
2562     case TRX_STATE_NOT_STARTED:
2563     case TRX_STATE_FORCED_ROLLBACK:
2564       break;
2565   }
2566 
2567   ut_error;
2568 }
2569 
2570 /*
2571 Interaction between Lock-sys and trx->mutex-es is rather complicated.
2572 In particular we allow a thread performing Lock-sys operations to request
2573 another trx->mutex even though it already holds one for a different trx.
2574 Therefore one has to prove that it is impossible to form a deadlock cycle in the
2575 imaginary wait-for-graph in which edges go from thread trying to obtain
2576 trx->mutex to a thread which holds it at the moment.
2577 
2578 In the past it was simple, because Lock-sys was protected by a global mutex,
2579 which meant that there was at most one thread which could try to posses more
2580 than one trx->mutex - one can not form a cycle in a graph in which only
2581 one node has both incoming and outgoing edges.
2582 
2583 Today it is much harder to prove, because we have sharded the Lock-sys mutex,
2584 and now multiple threads can perform Lock-sys operations in parallel, as long
2585 as they happen in different shards.
2586 
2587 Here's my attempt at the proof.
2588 
2589 Assumption 1.
2590   If a thread attempts to acquire more then one trx->mutex, then it either has
2591   exclusive global latch, or it attempts to acquire exactly two of them, and at
2592   just before calling mutex_enter for the second time it saw
2593   trx1->lock.wait_lock==nullptr, trx2->lock.wait_lock!=nullptr, and it held the
2594   latch for the shard containing trx2->lock.wait_lock.
2595 
2596 @see asserts in trx_before_mutex_enter
2597 
2598 Assumption 2.
2599   The Lock-sys latches are taken before any trx->mutex.
2600 
2601 @see asserts in sync0debug.cc
2602 
2603 Assumption 3.
2604   Changing trx->lock.wait_lock from NULL to non-NULL requires latching
2605   trx->mutex and the shard containing new wait_lock value.
2606 
2607 @see asserts in lock_set_lock_and_trx_wait()
2608 
2609 Assumption 4.
2610   Changing trx->lock.wait_lock from non-NULL to NULL requires latching the shard
2611   containing old wait_lock value.
2612 
2613 @see asserts in lock_reset_lock_and_trx_wait()
2614 
2615 Assumption 5.
2616   If a thread is latching two Lock-sys shards then it's acquiring and releasing
2617   both shards together (that is, without interleaving it with trx->mutex
2618   operations).
2619 
2620 @see Shard_latches_guard
2621 
2622 Theorem 1.
2623   If the Assumptions 1-5 hold, then it's impossible for trx_mutex_enter() call
2624   to deadlock.
2625 
2626 By proving the theorem, and observing that the assertions hold for multiple runs
2627 of test suite on debug build, we gain more and more confidence that
2628 trx_mutex_enter() calls can not deadlock.
2629 
2630 The intuitive, albeit imprecise, version of the proof is that by Assumption 1
2631 each edge of the deadlock cycle leads from a trx with NULL trx->lock.wait_lock
2632 to one with non-NULL wait_lock, which means it has only one edge.
2633 
2634 The difficulty lays in that wait_lock is a field which can be modified over time
2635 from several threads, so care must be taken to clarify at which moment in time
2636 we make our observations and from whose perspective.
2637 
2638 We will now formally prove Theorem 1.
2639 Assume otherwise, that is that we are in a thread which have just started a call
2640 to mutex_enter(trx_a->mutex) and caused a deadlock.
2641 
2642 Fact 0. There is no thread which possesses exclusive Lock-sys latch, since to
2643         form a deadlock one needs at least two threads inside Lock-sys
2644 Fact 1. Each thread participating in the deadlock holds one trx mutex and waits
2645         for the second one it tried to acquire
2646 Fact 2. Thus each thread participating in the deadlock had gone through "else"
2647         branch inside trx_before_mutex_enter(), so it verifies Assumption 1.
2648 Fact 3.	Our thread owns_lock_shard(trx_a->lock.wait_lock)
2649 Fact 4. Another thread has latched trx_a->mutex as the first of its two latches
2650 
2651 Consider the situation from the point of view of this other thread, which is now
2652 in the deadlock waiting for mutex_enter(trx_b->mutex) for some trx_b!=trx_a.
2653 By Fact 2 and assumption 1, it had to take the "else" branch on the way there,
2654 and thus it has saw: trx_a->lock.wait_lock == nullptr at some moment in time.
2655 This observation was either before or after our observation that
2656 trx_a->lock.wait_lock != nullptr (again Fact 2 and Assumption 1).
2657 
2658 If our thread observed non-NULL value first, then it means a change from
2659 non-NULL to NULL has happened, which by Assumption 4 requires a shard latch,
2660 which only our thread posses - and we couldn't manipulate the wait_lock as we
2661 are in a deadlock.
2662 
2663 If the other thread observed NULL first, then it means that the value has
2664 changed to non-NULL, which requires trx_a->mutex according to Assumption 3, yet
2665 this mutex was held entire time by the other thread, since it observed the NULL
2666 just before it deadlock, so it could not change it, either.
2667 
2668 So, there is no way the value of wait_lock has changed from NULL to non-NULL or
2669 vice-versa, yet one thread sees NULL and the other non-NULL - contradiction ends
2670 the proof.
2671 */
2672 
2673 static thread_local const trx_t *trx_first_latched_trx = nullptr;
2674 static thread_local int32_t trx_latched_count = 0;
2675 static thread_local bool trx_allowed_two_latches = false;
2676 
trx_before_mutex_enter(const trx_t * trx,bool first_of_two)2677 void trx_before_mutex_enter(const trx_t *trx, bool first_of_two) {
2678   if (0 == trx_latched_count++) {
2679     ut_a(trx_first_latched_trx == nullptr);
2680     trx_first_latched_trx = trx;
2681     if (first_of_two) {
2682       trx_allowed_two_latches = true;
2683     }
2684   } else {
2685     ut_a(!first_of_two);
2686     if (!locksys::owns_exclusive_global_latch()) {
2687       ut_a(trx_allowed_two_latches);
2688       ut_a(trx_latched_count == 2);
2689       ut_a(trx_first_latched_trx->lock.wait_lock == nullptr);
2690       ut_a(trx_first_latched_trx != trx);
2691       /* This is not very safe, because to read trx->lock.wait_lock we
2692       should already either latch trx->mutex (which we don't) or shard with
2693       trx->lock.wait_lock. But our claim is precisely that we have latched
2694       this shard, and we want to check that here. */
2695       ut_a(trx->lock.wait_lock != nullptr);
2696       ut_a(locksys::owns_lock_shard(trx->lock.wait_lock));
2697     }
2698   }
2699 }
trx_before_mutex_exit(const trx_t * trx)2700 void trx_before_mutex_exit(const trx_t *trx) {
2701   ut_a(0 < trx_latched_count);
2702   if (0 == --trx_latched_count) {
2703     ut_a(trx_first_latched_trx == trx);
2704     trx_first_latched_trx = nullptr;
2705     trx_allowed_two_latches = false;
2706   }
2707 }
2708 #endif /* UNIV_DEBUG */
2709 
2710 /** Compares the "weight" (or size) of two transactions. Transactions that
2711  have edited non-transactional tables are considered heavier than ones
2712  that have not.
2713  @return true if weight(a) >= weight(b) */
trx_weight_ge(const trx_t * a,const trx_t * b)2714 bool trx_weight_ge(const trx_t *a, /*!< in: transaction to be compared */
2715                    const trx_t *b) /*!< in: transaction to be compared */
2716 {
2717   /* To read TRX_WEIGHT we need a exclusive global lock_sys latch */
2718   ut_ad(locksys::owns_exclusive_global_latch());
2719   ibool a_notrans_edit;
2720   ibool b_notrans_edit;
2721 
2722   /* If mysql_thd is NULL for a transaction we assume that it has
2723   not edited non-transactional tables. */
2724 
2725   a_notrans_edit =
2726       a->mysql_thd != nullptr && thd_has_edited_nontrans_tables(a->mysql_thd);
2727 
2728   b_notrans_edit =
2729       b->mysql_thd != nullptr && thd_has_edited_nontrans_tables(b->mysql_thd);
2730 
2731   if (a_notrans_edit != b_notrans_edit) {
2732     return (a_notrans_edit);
2733   }
2734 
2735   /* Either both had edited non-transactional tables or both had
2736   not, we fall back to comparing the number of altered/locked
2737   rows. */
2738 
2739   return (TRX_WEIGHT(a) >= TRX_WEIGHT(b));
2740 }
2741 
2742 /** Prepares a transaction for given rollback segment.
2743  @return lsn_t: lsn assigned for commit of scheduled rollback segment */
trx_prepare_low(trx_t * trx,trx_undo_ptr_t * undo_ptr,bool noredo_logging)2744 static lsn_t trx_prepare_low(
2745     trx_t *trx,               /*!< in/out: transaction */
2746     trx_undo_ptr_t *undo_ptr, /*!< in/out: pointer to rollback
2747                               segment scheduled for prepare. */
2748     bool noredo_logging)      /*!< in: turn-off redo logging. */
2749 {
2750   if (undo_ptr->insert_undo != nullptr || undo_ptr->update_undo != nullptr) {
2751     mtr_t mtr;
2752     trx_rseg_t *rseg = undo_ptr->rseg;
2753 
2754     mtr_start_sync(&mtr);
2755 
2756     if (noredo_logging) {
2757       mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
2758     }
2759 
2760     /* Change the undo log segment states from TRX_UNDO_ACTIVE to
2761     TRX_UNDO_PREPARED: these modifications to the file data
2762     structure define the transaction as prepared in the file-based
2763     world, at the serialization point of lsn. */
2764 
2765     rseg->latch();
2766 
2767     if (undo_ptr->insert_undo != nullptr) {
2768       /* It is not necessary to obtain trx->undo_mutex here
2769       because only a single OS thread is allowed to do the
2770       transaction prepare for this transaction. */
2771       trx_undo_set_state_at_prepare(trx, undo_ptr->insert_undo, false, &mtr);
2772     }
2773 
2774     if (undo_ptr->update_undo != nullptr) {
2775       if (!noredo_logging) {
2776         trx_undo_gtid_set(trx, undo_ptr->update_undo);
2777       }
2778       trx_undo_set_state_at_prepare(trx, undo_ptr->update_undo, false, &mtr);
2779     }
2780 
2781     rseg->unlatch();
2782 
2783     /*--------------*/
2784     /* This mtr commit makes the transaction prepared in
2785     file-based world. */
2786     mtr_commit(&mtr);
2787     /*--------------*/
2788 
2789     if (!noredo_logging) {
2790       const lsn_t lsn = mtr.commit_lsn();
2791       ut_ad(lsn > 0 || !mtr_t::s_logging.is_enabled());
2792       return lsn;
2793     }
2794   }
2795 
2796   return 0;
2797 }
2798 
trx_is_mysql_xa(const trx_t * trx)2799 bool trx_is_mysql_xa(const trx_t *trx) {
2800   auto my_xid = trx->xid->get_my_xid();
2801   return (my_xid != 0);
2802 }
2803 
2804 /** Prepares a transaction. */
trx_prepare(trx_t * trx)2805 static void trx_prepare(trx_t *trx) /*!< in/out: transaction */
2806 {
2807   /* This transaction has crossed the point of no return and cannot
2808   be rolled back asynchronously now. It must commit or rollback
2809   synchronously. */
2810 
2811   lsn_t lsn = 0;
2812 
2813   /* Only fresh user transactions can be prepared.
2814   Recovered transactions cannot. */
2815   ut_a(!trx->is_recovered);
2816 
2817   DBUG_EXECUTE_IF("ib_trx_crash_during_xa_prepare_step", DBUG_SUICIDE(););
2818 
2819   if (trx->rsegs.m_redo.rseg != nullptr && trx_is_redo_rseg_updated(trx)) {
2820     lsn = trx_prepare_low(trx, &trx->rsegs.m_redo, false);
2821   }
2822 
2823   if (trx->rsegs.m_noredo.rseg != nullptr && trx_is_temp_rseg_updated(trx)) {
2824     trx_prepare_low(trx, &trx->rsegs.m_noredo, true);
2825   }
2826 
2827   /* Check and get GTID to be persisted. Do it outside trx_sys mutex. */
2828   auto &gtid_persistor = clone_sys->get_gtid_persistor();
2829   Gtid_desc gtid_desc;
2830   gtid_persistor.get_gtid_info(trx, gtid_desc);
2831 
2832   /*--------------------------------------*/
2833   ut_a(trx->state == TRX_STATE_ACTIVE);
2834   trx_sys_mutex_enter();
2835   trx->state = TRX_STATE_PREPARED;
2836   trx_sys->n_prepared_trx++;
2837   /* Add GTID to be persisted to disk table, if needed. */
2838   if (gtid_desc.m_is_set) {
2839     gtid_persistor.add(gtid_desc);
2840   }
2841   trx_sys_mutex_exit();
2842   /*--------------------------------------*/
2843 
2844   /* Reset after successfully adding GTID to in memory table. */
2845   trx->persists_gtid = false;
2846 
2847   /* Force isolation level to RC and release GAP locks
2848   for test purpose. */
2849   DBUG_EXECUTE_IF("ib_force_release_gap_lock_prepare",
2850                   trx->isolation_level = TRX_ISO_READ_COMMITTED;);
2851 
2852   /* Release read locks after PREPARE for READ COMMITTED
2853   and lower isolation. */
2854   if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
2855     /* Stop inheriting GAP locks. */
2856     trx->skip_lock_inheritance = true;
2857 
2858     /* Release only GAP locks for now. */
2859     lock_trx_release_read_locks(trx, true);
2860   }
2861 
2862   switch (thd_requested_durability(trx->mysql_thd)) {
2863     case HA_IGNORE_DURABILITY:
2864       /* We set the HA_IGNORE_DURABILITY during prepare phase of
2865       binlog group commit to not flush redo log for every transaction
2866       here. So that we can flush prepared records of transactions to
2867       redo log in a group right before writing them to binary log
2868       during flush stage of binlog group commit. */
2869       break;
2870     case HA_REGULAR_DURABILITY:
2871       if (lsn == 0) {
2872         break;
2873       }
2874       /* Depending on the my.cnf options, we may now write the log
2875       buffer to the log files, making the prepared state of the
2876       transaction durable if the OS does not crash. We may also
2877       flush the log files to disk, making the prepared state of the
2878       transaction durable also at an OS crash or a power outage.
2879 
2880       The idea in InnoDB's group prepare is that a group of
2881       transactions gather behind a trx doing a physical disk write
2882       to log files, and when that physical write has been completed,
2883       one of those transactions does a write which prepares the whole
2884       group. Note that this group prepare will only bring benefit if
2885       there are > 2 users in the database. Then at least 2 users can
2886       gather behind one doing the physical log write to disk.
2887 
2888       We must not be holding any mutexes or latches here. */
2889 
2890       /* We should trust trx->ddl_operation instead of
2891       ddl_must_flush here */
2892       trx->ddl_must_flush = false;
2893       trx_flush_log_if_needed(lsn, trx);
2894   }
2895 }
2896 
2897 /**
2898 Does the transaction prepare for MySQL.
2899 @param[in, out] trx		Transaction instance to prepare */
trx_prepare_for_mysql(trx_t * trx)2900 dberr_t trx_prepare_for_mysql(trx_t *trx) {
2901   trx_start_if_not_started_xa(trx, false);
2902 
2903   TrxInInnoDB trx_in_innodb(trx, true);
2904 
2905   if (trx_in_innodb.is_aborted() && trx->killed_by != os_thread_get_curr_id()) {
2906     return (DB_FORCED_ABORT);
2907   }
2908 
2909   /* For GTID persistence we need update undo segment. */
2910   auto db_err = trx_undo_gtid_add_update_undo(trx, true, false);
2911   if (db_err != DB_SUCCESS) {
2912     return (db_err);
2913   }
2914 
2915   trx->op_info = "preparing";
2916 
2917   trx_prepare(trx);
2918 
2919   trx->op_info = "";
2920 
2921   return (DB_SUCCESS);
2922 }
2923 
2924 /**
2925   Get the table name and database name for the given dd_table object.
2926 
2927   @param[in,out]  table Handler table name object pointer.
2928   @param[in]      dd_table  Pointer table name DD object.
2929   @param[in]      mem_root  Mem_root for space allocation.
2930 
2931   @retval     true   Error, e.g. Memory allocation failure.
2932   @retval     false  Success
2933 */
2934 
get_table_name_info(st_handler_tablename * table,const dict_table_t * dd_table,MEM_ROOT * mem_root)2935 static bool get_table_name_info(st_handler_tablename *table,
2936                                 const dict_table_t *dd_table,
2937                                 MEM_ROOT *mem_root) {
2938   const char *ptr;
2939 
2940   size_t len = dict_get_db_name_len(dd_table->name.m_name);
2941   table->db = strmake_root(mem_root, dd_table->name.m_name, len);
2942   if (table->db == nullptr) return true;
2943 
2944   ptr = dict_remove_db_name(dd_table->name.m_name);
2945   len = ut_strlen(ptr);
2946   table->tablename = strmake_root(mem_root, ptr, len);
2947   if (table->tablename == nullptr) return true;
2948 
2949   return false;
2950 }
2951 
2952 /**
2953   Get prepared transaction info from InnoDB data structure.
2954 
2955   @param[in,out]  txn_list  Handler layer tansaction list.
2956   @param[in]      trx       Innodb transaction info.
2957   @param[in]      mem_root  Mem_root for space allocation.
2958 
2959   @retval     true          Error, e.g. Memory allocation failure.
2960   @retval     false         Success
2961 */
2962 
get_info_about_prepared_transaction(XA_recover_txn * txn_list,const trx_t * trx,MEM_ROOT * mem_root)2963 static bool get_info_about_prepared_transaction(XA_recover_txn *txn_list,
2964                                                 const trx_t *trx,
2965                                                 MEM_ROOT *mem_root) {
2966   txn_list->id = *trx->xid;
2967   txn_list->mod_tables = new (mem_root) List<st_handler_tablename>();
2968   if (!txn_list->mod_tables) return true;
2969 
2970   for (auto dd_table : trx->mod_tables) {
2971     st_handler_tablename *table = new (mem_root) st_handler_tablename();
2972 
2973     if (!table || get_table_name_info(table, dd_table, mem_root) ||
2974         txn_list->mod_tables->push_back(table, mem_root))
2975       return true;
2976   }
2977   return false;
2978 }
2979 
2980 /** This function is used to find number of prepared transactions and
2981  their transaction objects for a recovery.
2982  @return number of prepared transactions stored in xid_list */
trx_recover_for_mysql(XA_recover_txn * txn_list,ulint len,MEM_ROOT * mem_root)2983 int trx_recover_for_mysql(
2984     XA_recover_txn *txn_list, /*!< in/out: prepared transactions */
2985     ulint len,                /*!< in: number of slots in xid_list */
2986     MEM_ROOT *mem_root)       /*!< in: memory for table names */
2987 {
2988   const trx_t *trx;
2989   ulint count = 0;
2990 
2991   ut_ad(txn_list);
2992   ut_ad(len);
2993 
2994   /* We should set those transactions which are in the prepared state
2995   to the xid_list */
2996 
2997   trx_sys_mutex_enter();
2998 
2999   for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); trx != nullptr;
3000        trx = UT_LIST_GET_NEXT(trx_list, trx)) {
3001     assert_trx_in_rw_list(trx);
3002 
3003     /* The state of a read-write transaction cannot change
3004     from or to NOT_STARTED while we are holding the
3005     trx_sys->mutex. It may change to PREPARED, but not if
3006     trx->is_recovered. */
3007     if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
3008       if (get_info_about_prepared_transaction(&txn_list[count], trx, mem_root))
3009         break;
3010 
3011       if (count == 0) {
3012         ib::info(ER_IB_MSG_1207) << "Starting recovery for"
3013                                     " XA transactions...";
3014       }
3015 
3016       ib::info(ER_IB_MSG_1208) << "Transaction " << trx_get_id_for_print(trx)
3017                                << " in prepared state after recovery";
3018 
3019       ib::info(ER_IB_MSG_1209)
3020           << "Transaction contains changes to " << trx->undo_no << " rows";
3021 
3022       count++;
3023 
3024       if (count == len) {
3025         break;
3026       }
3027     }
3028   }
3029 
3030   trx_sys_mutex_exit();
3031 
3032   if (count > 0) {
3033     ib::info(ER_IB_MSG_1210) << count
3034                              << " transactions in prepared state"
3035                                 " after recovery";
3036   }
3037 
3038   return (int(count));
3039 }
3040 
3041 /** This function is used to find one X/Open XA distributed transaction
3042  which is in the prepared state
3043  @return trx on match, the trx->xid will be invalidated;
3044  */
trx_get_trx_by_xid_low(const XID * xid)3045 static MY_ATTRIBUTE((warn_unused_result)) trx_t *trx_get_trx_by_xid_low(
3046     const XID *xid) /*!< in: X/Open XA transaction
3047                     identifier */
3048 {
3049   trx_t *trx;
3050 
3051   ut_ad(trx_sys_mutex_own());
3052 
3053   for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); trx != nullptr;
3054        trx = UT_LIST_GET_NEXT(trx_list, trx)) {
3055     assert_trx_in_rw_list(trx);
3056 
3057     /* Compare two X/Open XA transaction id's: their
3058     length should be the same and binary comparison
3059     of gtrid_length+bqual_length bytes should be
3060     the same */
3061 
3062     if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_PREPARED) &&
3063         xid->eq(trx->xid)) {
3064       /* Invalidate the XID, so that subsequent calls
3065       will not find it. */
3066       trx->xid->reset();
3067       break;
3068     }
3069   }
3070 
3071   return (trx);
3072 }
3073 
trx_get_trx_by_xid(const XID * xid)3074 trx_t *trx_get_trx_by_xid(const XID *xid) {
3075   trx_t *trx;
3076 
3077   if (xid == nullptr) {
3078     return (nullptr);
3079   }
3080 
3081   trx_sys_mutex_enter();
3082 
3083   /* Recovered/Resurrected transactions are always only on the
3084   trx_sys_t::rw_trx_list. */
3085   trx = trx_get_trx_by_xid_low(xid);
3086 
3087   trx_sys_mutex_exit();
3088 
3089   return (trx);
3090 }
3091 
3092 /** Starts the transaction if it is not yet started. */
trx_start_if_not_started_xa_low(trx_t * trx,bool read_write)3093 void trx_start_if_not_started_xa_low(
3094     trx_t *trx,      /*!< in/out: transaction */
3095     bool read_write) /*!< in: true if read write transaction */
3096 {
3097   switch (trx->state) {
3098     case TRX_STATE_NOT_STARTED:
3099     case TRX_STATE_FORCED_ROLLBACK:
3100       trx_start_low(trx, read_write);
3101       return;
3102 
3103     case TRX_STATE_ACTIVE:
3104       if (trx->id == 0 && read_write) {
3105         /* If the transaction is tagged as read-only then
3106         it can only write to temp tables and for such
3107         transactions we don't want to move them to the
3108         trx_sys_t::rw_trx_list. */
3109         if (!trx->read_only) {
3110           trx_set_rw_mode(trx);
3111         } else if (!srv_read_only_mode) {
3112           trx_assign_rseg_temp(trx);
3113         }
3114       }
3115       return;
3116     case TRX_STATE_PREPARED:
3117     case TRX_STATE_COMMITTED_IN_MEMORY:
3118       break;
3119   }
3120 
3121   ut_error;
3122 }
3123 
3124 /** Starts the transaction if it is not yet started. */
trx_start_if_not_started_low(trx_t * trx,bool read_write)3125 void trx_start_if_not_started_low(
3126     trx_t *trx,      /*!< in: transaction */
3127     bool read_write) /*!< in: true if read write transaction */
3128 {
3129   switch (trx->state) {
3130     case TRX_STATE_NOT_STARTED:
3131     case TRX_STATE_FORCED_ROLLBACK:
3132 
3133       trx_start_low(trx, read_write);
3134       return;
3135 
3136     case TRX_STATE_ACTIVE:
3137 
3138       if (read_write && trx->id == 0 && !trx->read_only) {
3139         trx_set_rw_mode(trx);
3140       }
3141       return;
3142 
3143     case TRX_STATE_PREPARED:
3144     case TRX_STATE_COMMITTED_IN_MEMORY:
3145       break;
3146   }
3147 
3148   ut_error;
3149 }
3150 
3151 /** Starts a transaction for internal processing. */
trx_start_internal_low(trx_t * trx)3152 void trx_start_internal_low(trx_t *trx) /*!< in/out: transaction */
3153 {
3154   /* Ensure it is not flagged as an auto-commit-non-locking
3155   transaction. */
3156 
3157   trx->will_lock = 1;
3158 
3159   trx->internal = true;
3160 
3161   trx_start_low(trx, true);
3162 }
3163 
3164 /** Starts a read-only transaction for internal processing.
3165 @param[in,out] trx	transaction to be started */
trx_start_internal_read_only_low(trx_t * trx)3166 void trx_start_internal_read_only_low(trx_t *trx) {
3167   /* Ensure it is not flagged as an auto-commit-non-locking
3168   transaction. */
3169 
3170   trx->will_lock = 1;
3171 
3172   trx->internal = true;
3173 
3174   trx_start_low(trx, false);
3175 }
3176 
3177 /** Set the transaction as a read-write transaction if it is not already
3178  tagged as such. Read-only transactions that are writing to temporary
3179  tables are assigned an ID and a rollback segment but are not added
3180  to the trx read-write list because their updates should not be visible
3181  to other transactions and therefore their changes can be ignored by
3182  by MVCC. */
trx_set_rw_mode(trx_t * trx)3183 void trx_set_rw_mode(trx_t *trx) /*!< in/out: transaction that is RW */
3184 {
3185   ut_ad(trx->rsegs.m_redo.rseg == nullptr);
3186   ut_ad(!trx->in_rw_trx_list);
3187   ut_ad(!trx_is_autocommit_non_locking(trx));
3188   ut_ad(!trx->read_only);
3189 
3190   if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
3191     return;
3192   }
3193 
3194   /* Function is promoting existing trx from ro mode to rw mode.
3195   In this process it has acquired trx_sys->mutex as it plan to
3196   move trx from ro list to rw list. If in future, some other thread
3197   looks at this trx object while it is being promoted then ensure
3198   that both threads are synced by acquring trx->mutex to avoid decision
3199   based on in-consistent view formed during promotion. */
3200 
3201   trx_assign_rseg_durable(trx);
3202 
3203   ut_ad(trx->rsegs.m_redo.rseg != nullptr);
3204 
3205   mutex_enter(&trx_sys->mutex);
3206 
3207   ut_ad(trx->id == 0);
3208   trx->id = trx_sys_get_new_trx_id();
3209 
3210   trx_sys->rw_trx_ids.push_back(trx->id);
3211 
3212   trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
3213 
3214   /* So that we can see our own changes. */
3215   if (MVCC::is_view_active(trx->read_view)) {
3216     MVCC::set_view_creator_trx_id(trx->read_view, trx->id);
3217   }
3218 
3219   UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, trx);
3220 
3221   ut_d(trx->in_rw_trx_list = true);
3222 
3223   mutex_exit(&trx_sys->mutex);
3224 }
3225 
trx_kill_blocking(trx_t * trx)3226 void trx_kill_blocking(trx_t *trx) {
3227   if (!trx_is_high_priority(trx)) {
3228     return;
3229   }
3230   hit_list_t hit_list;
3231   lock_make_trx_hit_list(trx, hit_list);
3232   if (hit_list.empty()) {
3233     return;
3234   }
3235 
3236   DEBUG_SYNC_C("trx_kill_blocking_enter");
3237 
3238   ulint had_dict_lock = trx->dict_operation_lock_mode;
3239 
3240   switch (had_dict_lock) {
3241     case 0:
3242       break;
3243 
3244     case RW_S_LATCH:
3245       /* Release foreign key check latch */
3246       row_mysql_unfreeze_data_dictionary(trx);
3247       break;
3248 
3249     default:
3250       /* There should never be a lock wait when the
3251       dictionary latch is reserved in X mode.  Dictionary
3252       transactions should only acquire locks on dictionary
3253       tables, not other tables. All access to dictionary
3254       tables should be covered by dictionary
3255       transactions. */
3256       ut_error;
3257   }
3258 
3259   ut_a(trx->dict_operation_lock_mode == 0);
3260 
3261   /** Kill the transactions in the lock acquisition order old -> new. */
3262   hit_list_t::reverse_iterator end = hit_list.rend();
3263 
3264   for (hit_list_t::reverse_iterator it = hit_list.rbegin(); it != end; ++it) {
3265     trx_t *victim_trx = it->m_trx;
3266     ulint version = it->m_version;
3267 
3268     /* Shouldn't commit suicide. */
3269     ut_ad(victim_trx != trx);
3270     ut_ad(victim_trx->mysql_thd != trx->mysql_thd);
3271 
3272     /* Check that the transaction isn't active inside
3273     InnoDB code. We have to wait while it is executing
3274     in the InnoDB context. This can potentially take a
3275     long time */
3276 
3277     trx_mutex_enter(victim_trx);
3278     ut_ad(version <= victim_trx->version);
3279 
3280     ulint loop_count = 0;
3281     /* start with optimistic sleep time of 20 micro seconds. */
3282     ulint sleep_time = 20;
3283 
3284     bool exited_innodb = false;
3285 
3286     while ((victim_trx->in_innodb & TRX_FORCE_ROLLBACK_MASK) > 0 &&
3287            victim_trx->version == version) {
3288       trx_mutex_exit(victim_trx);
3289 
3290       /* Declare this OS thread to exit InnoDB, before waiting */
3291       if (trx->declared_to_be_inside_innodb) {
3292         exited_innodb = true;
3293         srv_conc_force_exit_innodb(trx);
3294       }
3295 
3296       loop_count++;
3297       /* If the wait is long, don't hog the cpu. */
3298       if (loop_count < 100) {
3299         /* 20 microseconds */
3300         sleep_time = 20;
3301       } else if (loop_count < 1000) {
3302         /* 1 millisecond */
3303         sleep_time = 1000;
3304       } else {
3305         /* 100 milliseconds */
3306         sleep_time = 100000;
3307       }
3308 
3309       os_thread_sleep(sleep_time);
3310 
3311       trx_mutex_enter(victim_trx);
3312     }
3313 
3314     /* Return back inside InnoDB */
3315     if (exited_innodb) {
3316       exited_innodb = false;
3317       /* Exit transaction mutex before entering Innodb. */
3318       trx_mutex_exit(victim_trx);
3319       srv_conc_force_enter_innodb(trx);
3320       trx_mutex_enter(victim_trx);
3321     }
3322 
3323     /* Compare the version to check if the transaction has
3324     already finished */
3325     if (victim_trx->version != version) {
3326       trx_mutex_exit(victim_trx);
3327       continue;
3328     }
3329 
3330     /* We should never kill background transactions. */
3331     ut_ad(victim_trx->mysql_thd != nullptr);
3332 
3333     ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE));
3334     ut_ad(victim_trx->in_innodb & TRX_FORCE_ROLLBACK);
3335     ut_ad(victim_trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC);
3336     ut_ad(victim_trx->killed_by == os_thread_get_curr_id());
3337     ut_ad(victim_trx->version == it->m_version);
3338 
3339     /* We don't kill Read Only, Background or high priority
3340     transactions. */
3341     ut_a(!victim_trx->read_only);
3342     ut_a(victim_trx->mysql_thd != nullptr);
3343 
3344     trx_mutex_exit(victim_trx);
3345 
3346 #ifdef UNIV_DEBUG
3347     char buffer[1024];
3348     char *thr_text;
3349     trx_id_t id;
3350 
3351     thr_text = thd_security_context(victim_trx->mysql_thd, buffer,
3352                                     sizeof(buffer), 512);
3353     id = victim_trx->id;
3354 #endif /* UNIV_DEBUG */
3355     trx_rollback_for_mysql(victim_trx);
3356 
3357 #ifdef UNIV_DEBUG
3358     ib::info(ER_IB_MSG_1211)
3359         << "High Priority Transaction (ID): " << trx->id
3360         << " killed transaction (ID): " << id << " in hit list"
3361         << " - " << thr_text;
3362 #endif /* UNIV_DEBUG */
3363     trx_mutex_enter(victim_trx);
3364 
3365     version++;
3366     ut_ad(victim_trx->version == version);
3367 
3368     os_thread_id_t thread_id = victim_trx->killed_by;
3369     os_compare_and_swap_thread_id(&victim_trx->killed_by, thread_id, 0);
3370 
3371     victim_trx->in_innodb &= TRX_FORCE_ROLLBACK_MASK;
3372 
3373     trx_mutex_exit(victim_trx);
3374   }
3375 
3376   if (had_dict_lock) {
3377     row_mysql_freeze_data_dictionary(trx);
3378   }
3379 }
3380 
3381 /* To get current session thread default THD */
3382 THD *thd_get_current_thd();
3383 
trx_sys_update_binlog_position(trx_t * trx)3384 void trx_sys_update_binlog_position(trx_t *trx) {
3385   THD *thd = trx->mysql_thd;
3386   /* For XA commit/rollback by XID, transaction thd could be null. */
3387   if (thd == nullptr) {
3388     thd = thd_get_current_thd();
3389     if (thd == nullptr) {
3390       return;
3391     }
3392   }
3393   ulonglong pos;
3394   thd_binlog_pos(thd, &trx->mysql_log_file_name, &pos);
3395   trx->mysql_log_offset = static_cast<uint64_t>(pos);
3396 }
3397