1 /*****************************************************************************
2
3 Copyright (c) 1996, 2020, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24
25 *****************************************************************************/
26
27 /** @file trx/trx0trx.cc
28 The transaction
29
30 Created 3/26/1996 Heikki Tuuri
31 *******************************************************/
32
33 #include <sys/types.h>
34 #include <time.h>
35 #include <new>
36 #include <set>
37
38 #include <sql_thd_internal_api.h>
39
40 #include "btr0sea.h"
41 #include "clone0clone.h"
42 #include "current_thd.h"
43 #include "dict0dd.h"
44 #include "fsp0sysspace.h"
45 #include "ha_prototypes.h"
46 #include "lock0lock.h"
47 #include "log0log.h"
48 #include "os0proc.h"
49 #include "que0que.h"
50 #include "read0read.h"
51 #include "row0mysql.h"
52 #include "srv0mon.h"
53 #include "srv0srv.h"
54 #include "srv0start.h"
55 #include "trx0purge.h"
56 #include "trx0rec.h"
57 #include "trx0roll.h"
58 #include "trx0rseg.h"
59 #include "trx0trx.h"
60 #include "trx0undo.h"
61 #include "trx0xa.h"
62 #include "usr0sess.h"
63 #include "ut0new.h"
64 #include "ut0pool.h"
65 #include "ut0vec.h"
66
67 #include "my_dbug.h"
68 #include "mysql/plugin.h"
69 #include "sql/clone_handler.h"
70
71 static const ulint MAX_DETAILED_ERROR_LEN = 256;
72
73 /** Set of table_id */
74 typedef std::set<table_id_t, std::less<table_id_t>, ut_allocator<table_id_t>>
75 table_id_set;
76
77 /** Map of transactions to affected table_id */
78 typedef std::map<trx_t *, table_id_set, std::less<trx_t *>,
79 ut_allocator<std::pair<trx_t *const, table_id_set>>>
80 trx_table_map;
81
82 /** Map of resurrected transactions to affected table_id */
83 static trx_table_map resurrected_trx_tables;
84
85 /** Dummy session used currently in MySQL interface */
86 sess_t *trx_dummy_sess = nullptr;
87
88 /** Constructor */
TrxVersion(trx_t * trx)89 TrxVersion::TrxVersion(trx_t *trx) : m_trx(trx), m_version(trx->version) {
90 /* No op */
91 }
92
93 /* The following function makes the transaction committed in memory
94 and makes its changes to data visible to other transactions.
95 In particular it releases implicit and explicit locks held by transaction and
96 transitions to the transaction to the TRX_STATE_COMMITTED_IN_MEMORY state.
97 NOTE that there is a small discrepancy from the strict formal
98 visibility rules here: a human user of the database can see
99 modifications made by another transaction T even before the necessary
100 log segment has been flushed to the disk. If the database happens to
101 crash before the flush, the user has seen modifications from T which
102 will never be a committed transaction. However, any transaction T2
103 which sees the modifications of the committing transaction T, and
104 which also itself makes modifications to the database, will get an lsn
105 larger than the committing transaction T. In the case where the log
106 flush fails, and T never gets committed, also T2 will never get
107 committed.
108 @param[in,out] trx The transaction for which will be committed in
109 memory
110 @param[in] serialized true if serialisation log was written. Affects the
111 list of things we need to clean up during
112 trx_erase_lists.
113 */
114 static void trx_release_impl_and_expl_locks(trx_t *trx, bool serialized);
115
116 /** Set flush observer for the transaction
117 @param[in,out] trx transaction struct
118 @param[in] observer flush observer */
trx_set_flush_observer(trx_t * trx,FlushObserver * observer)119 void trx_set_flush_observer(trx_t *trx, FlushObserver *observer) {
120 trx->flush_observer = observer;
121 }
122
123 /** Set detailed error message for the transaction. */
trx_set_detailed_error(trx_t * trx,const char * msg)124 void trx_set_detailed_error(trx_t *trx, /*!< in: transaction struct */
125 const char *msg) /*!< in: detailed error message */
126 {
127 ut_strlcpy(trx->detailed_error, msg, MAX_DETAILED_ERROR_LEN);
128 }
129
130 /** Set detailed error message for the transaction from a file. Note that the
131 file is rewinded before reading from it. */
trx_set_detailed_error_from_file(trx_t * trx,FILE * file)132 void trx_set_detailed_error_from_file(
133 trx_t *trx, /*!< in: transaction struct */
134 FILE *file) /*!< in: file to read message from */
135 {
136 os_file_read_string(file, trx->detailed_error, MAX_DETAILED_ERROR_LEN);
137 }
138
139 /** Initialize transaction object.
140 @param trx trx to initialize */
trx_init(trx_t * trx)141 static void trx_init(trx_t *trx) {
142 /* This is called at the end of commit, do not reset the
143 trx_t::state here to NOT_STARTED. The FORCED_ROLLBACK
144 status is required for asynchronous handling. */
145
146 trx->id = 0;
147
148 trx->no = TRX_ID_MAX;
149
150 trx->persists_gtid = false;
151
152 trx->skip_lock_inheritance = false;
153
154 trx->is_recovered = false;
155
156 trx->op_info = "";
157
158 trx->isolation_level = TRX_ISO_REPEATABLE_READ;
159
160 trx->check_foreigns = true;
161
162 trx->check_unique_secondary = true;
163
164 trx->lock.n_rec_locks.store(0);
165
166 trx->lock.blocking_trx.store(nullptr);
167
168 trx->dict_operation = TRX_DICT_OP_NONE;
169
170 trx->ddl_operation = false;
171
172 trx->error_state = DB_SUCCESS;
173
174 trx->error_key_num = ULINT_UNDEFINED;
175
176 trx->undo_no = 0;
177
178 trx->rsegs.m_redo.rseg = nullptr;
179
180 trx->rsegs.m_noredo.rseg = nullptr;
181
182 trx->read_only = false;
183
184 trx->auto_commit = false;
185
186 trx->will_lock = 0;
187
188 trx->lock.inherit_all.store(false);
189
190 trx->internal = false;
191
192 trx->in_truncate = false;
193 #ifdef UNIV_DEBUG
194 trx->is_dd_trx = false;
195 trx->in_rollback = false;
196 trx->lock.in_rollback = false;
197 #endif /* UNIV_DEBUG */
198
199 ut_d(trx->start_file = nullptr);
200
201 ut_d(trx->start_line = 0);
202
203 trx->magic_n = TRX_MAGIC_N;
204
205 trx->lock.que_state = TRX_QUE_RUNNING;
206
207 trx->last_sql_stat_start.least_undo_no = 0;
208
209 ut_ad(!MVCC::is_view_active(trx->read_view));
210
211 trx->lock.rec_cached = 0;
212
213 trx->lock.table_cached = 0;
214
215 trx->error_index = nullptr;
216
217 /* During asynchronous rollback, we should reset forced rollback flag
218 only after rollback is complete to avoid race with the thread owning
219 the transaction. */
220
221 if (!TrxInInnoDB::is_async_rollback(trx)) {
222 os_thread_id_t thread_id = trx->killed_by;
223 os_compare_and_swap_thread_id(&trx->killed_by, thread_id, 0);
224
225 /* Note: Do not set to 0, the ref count is decremented inside
226 the TrxInInnoDB() destructor. We only need to clear the flags. */
227
228 trx->in_innodb &= TRX_FORCE_ROLLBACK_MASK;
229 }
230
231 trx->flush_observer = nullptr;
232
233 ++trx->version;
234 }
235
236 /** For managing the life-cycle of the trx_t instance that we get
237 from the pool. */
238 struct TrxFactory {
239 /** Initializes a transaction object. It must be explicitly started
240 with trx_start_if_not_started() before using it. The default isolation
241 level is TRX_ISO_REPEATABLE_READ.
242 @param trx Transaction instance to initialise */
initTrxFactory243 static void init(trx_t *trx) {
244 /* Explicitly call the constructor of the already
245 allocated object. trx_t objects are allocated by
246 ut_zalloc() in Pool::Pool() which would not call
247 the constructors of the trx_t members. */
248 new (&trx->mod_tables) trx_mod_tables_t();
249
250 new (&trx->lock.rec_pool) lock_pool_t();
251
252 new (&trx->lock.table_pool) lock_pool_t();
253
254 new (&trx->lock.table_locks) lock_pool_t();
255
256 trx_init(trx);
257
258 trx->state = TRX_STATE_NOT_STARTED;
259
260 trx->dict_operation_lock_mode = 0;
261
262 trx->xid = UT_NEW_NOKEY(xid_t());
263
264 trx->detailed_error =
265 reinterpret_cast<char *>(ut_zalloc_nokey(MAX_DETAILED_ERROR_LEN));
266
267 trx->lock.lock_heap = mem_heap_create_typed(1024, MEM_HEAP_FOR_LOCK_HEAP);
268
269 lock_trx_lock_list_init(&trx->lock.trx_locks);
270
271 UT_LIST_INIT(trx->trx_savepoints, &trx_named_savept_t::trx_savepoints);
272
273 mutex_create(LATCH_ID_TRX, &trx->mutex);
274 mutex_create(LATCH_ID_TRX_UNDO, &trx->undo_mutex);
275
276 lock_trx_alloc_locks(trx);
277 }
278
279 /** Release resources held by the transaction object.
280 @param trx the transaction for which to release resources */
destroyTrxFactory281 static void destroy(trx_t *trx) {
282 ut_a(trx->magic_n == TRX_MAGIC_N);
283 ut_ad(!trx->in_rw_trx_list);
284 ut_ad(!trx->in_mysql_trx_list);
285
286 ut_a(trx->lock.wait_lock == nullptr);
287 ut_a(trx->lock.wait_thr == nullptr);
288 ut_a(trx->lock.blocking_trx.load() == nullptr);
289
290 ut_a(!trx->has_search_latch);
291
292 ut_a(trx->dict_operation_lock_mode == 0);
293
294 if (trx->lock.lock_heap != nullptr) {
295 mem_heap_free(trx->lock.lock_heap);
296 trx->lock.lock_heap = nullptr;
297 }
298
299 ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
300
301 UT_DELETE(trx->xid);
302 ut_free(trx->detailed_error);
303
304 mutex_free(&trx->mutex);
305 mutex_free(&trx->undo_mutex);
306
307 trx->mod_tables.~trx_mod_tables_t();
308
309 ut_ad(trx->read_view == nullptr);
310
311 if (!trx->lock.rec_pool.empty()) {
312 /* See lock_trx_alloc_locks() why we only free
313 the first element. */
314
315 ut_free(trx->lock.rec_pool[0]);
316 }
317
318 if (!trx->lock.table_pool.empty()) {
319 /* See lock_trx_alloc_locks() why we only free
320 the first element. */
321
322 ut_free(trx->lock.table_pool[0]);
323 }
324
325 trx->lock.rec_pool.~lock_pool_t();
326
327 trx->lock.table_pool.~lock_pool_t();
328
329 trx->lock.table_locks.~lock_pool_t();
330 }
331
332 /** Enforce any invariants here, this is called before the transaction
333 is added to the pool.
334 @return true if all OK */
debugTrxFactory335 static bool debug(const trx_t *trx) {
336 ut_a(trx->error_state == DB_SUCCESS);
337
338 ut_a(trx->magic_n == TRX_MAGIC_N);
339
340 ut_ad(!trx->read_only);
341
342 ut_ad(trx->state == TRX_STATE_NOT_STARTED ||
343 trx->state == TRX_STATE_FORCED_ROLLBACK);
344
345 ut_ad(trx->dict_operation == TRX_DICT_OP_NONE);
346
347 ut_ad(trx->mysql_thd == nullptr);
348
349 ut_ad(!trx->in_rw_trx_list);
350 ut_ad(!trx->in_mysql_trx_list);
351
352 ut_a(trx->lock.wait_thr == nullptr);
353 ut_a(trx->lock.wait_lock == nullptr);
354 ut_a(trx->lock.blocking_trx.load() == nullptr);
355
356 ut_a(!trx->has_search_latch);
357
358 ut_a(trx->dict_operation_lock_mode == 0);
359
360 ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
361
362 ut_ad(trx->lock.autoinc_locks == nullptr);
363
364 ut_ad(trx->lock.table_locks.empty());
365
366 ut_ad(!trx->lock.inherit_all.load());
367
368 ut_ad(!trx->abort);
369
370 ut_ad(trx->killed_by == 0);
371
372 return (true);
373 }
374 };
375
376 /** The lock strategy for TrxPool */
377 struct TrxPoolLock {
TrxPoolLockTrxPoolLock378 TrxPoolLock() {}
379
380 /** Create the mutex */
createTrxPoolLock381 void create() { mutex_create(LATCH_ID_TRX_POOL, &m_mutex); }
382
383 /** Acquire the mutex */
enterTrxPoolLock384 void enter() { mutex_enter(&m_mutex); }
385
386 /** Release the mutex */
exitTrxPoolLock387 void exit() { mutex_exit(&m_mutex); }
388
389 /** Free the mutex */
destroyTrxPoolLock390 void destroy() { mutex_free(&m_mutex); }
391
392 /** Mutex to use */
393 ib_mutex_t m_mutex;
394 };
395
396 /** The lock strategy for the TrxPoolManager */
397 struct TrxPoolManagerLock {
TrxPoolManagerLockTrxPoolManagerLock398 TrxPoolManagerLock() {}
399
400 /** Create the mutex */
createTrxPoolManagerLock401 void create() { mutex_create(LATCH_ID_TRX_POOL_MANAGER, &m_mutex); }
402
403 /** Acquire the mutex */
enterTrxPoolManagerLock404 void enter() { mutex_enter(&m_mutex); }
405
406 /** Release the mutex */
exitTrxPoolManagerLock407 void exit() { mutex_exit(&m_mutex); }
408
409 /** Free the mutex */
destroyTrxPoolManagerLock410 void destroy() { mutex_free(&m_mutex); }
411
412 /** Mutex to use */
413 ib_mutex_t m_mutex;
414 };
415
416 /** Use explicit mutexes for the trx_t pool and its manager. */
417 typedef Pool<trx_t, TrxFactory, TrxPoolLock> trx_pool_t;
418 typedef PoolManager<trx_pool_t, TrxPoolManagerLock> trx_pools_t;
419
420 /** The trx_t pool manager */
421 static trx_pools_t *trx_pools;
422
423 /** Size of on trx_t pool in bytes. */
424 static const ulint MAX_TRX_BLOCK_SIZE = 1024 * 1024 * 4;
425
426 /** Create the trx_t pool */
trx_pool_init()427 void trx_pool_init() {
428 trx_pools = UT_NEW_NOKEY(trx_pools_t(MAX_TRX_BLOCK_SIZE));
429
430 ut_a(trx_pools != nullptr);
431 }
432
433 /** Destroy the trx_t pool */
trx_pool_close()434 void trx_pool_close() {
435 UT_DELETE(trx_pools);
436
437 trx_pools = nullptr;
438 }
439
440 /** @return a trx_t instance from trx_pools. */
trx_create_low()441 static trx_t *trx_create_low() {
442 trx_t *trx = trx_pools->get();
443
444 assert_trx_is_free(trx);
445
446 mem_heap_t *heap;
447 ib_alloc_t *alloc;
448
449 /* We just got trx from pool, it should be non locking */
450 ut_ad(trx->will_lock == 0);
451
452 trx->persists_gtid = false;
453
454 trx->api_trx = false;
455
456 trx->api_auto_commit = false;
457
458 trx->read_write = true;
459
460 /* Background trx should not be forced to rollback,
461 we will unset the flag for user trx. */
462 trx->in_innodb |= TRX_FORCE_ROLLBACK_DISABLE;
463
464 /* Trx state can be TRX_STATE_FORCED_ROLLBACK if
465 the trx was forced to rollback before it's reused.*/
466 trx->state = TRX_STATE_NOT_STARTED;
467
468 heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void *) * 8);
469
470 alloc = ib_heap_allocator_create(heap);
471
472 /* Remember to free the vector explicitly in trx_free(). */
473 trx->lock.autoinc_locks = ib_vector_create(alloc, sizeof(void **), 4);
474
475 /* Should have been either just initialized or .clear()ed by
476 trx_free(). */
477 ut_a(trx->mod_tables.size() == 0);
478
479 return (trx);
480 }
481
482 /**
483 Release a trx_t instance back to the pool.
484 @param trx the instance to release. */
trx_free(trx_t * & trx)485 static void trx_free(trx_t *&trx) {
486 assert_trx_is_free(trx);
487
488 trx->mysql_thd = nullptr;
489
490 // FIXME: We need to avoid this heap free/alloc for each commit.
491 if (trx->lock.autoinc_locks != nullptr) {
492 ut_ad(ib_vector_is_empty(trx->lock.autoinc_locks));
493 /* We allocated a dedicated heap for the vector. */
494 ib_vector_free(trx->lock.autoinc_locks);
495 trx->lock.autoinc_locks = nullptr;
496 }
497
498 trx->mod_tables.clear();
499
500 ut_ad(trx->read_view == nullptr);
501 ut_ad(trx->is_dd_trx == false);
502
503 /* trx locking state should have been reset before returning trx
504 to pool */
505 ut_ad(trx->will_lock == 0);
506
507 trx_pools->mem_free(trx);
508
509 trx = nullptr;
510 }
511
512 /** Creates a transaction object for background operations by the master thread.
513 @return own: transaction object */
trx_allocate_for_background(void)514 trx_t *trx_allocate_for_background(void) {
515 trx_t *trx;
516
517 trx = trx_create_low();
518
519 trx->sess = trx_dummy_sess;
520
521 return (trx);
522 }
523
524 /** Creates a transaction object for MySQL.
525 @return own: transaction object */
trx_allocate_for_mysql(void)526 trx_t *trx_allocate_for_mysql(void) {
527 trx_t *trx;
528
529 trx = trx_allocate_for_background();
530
531 trx_sys_mutex_enter();
532
533 ut_d(trx->in_mysql_trx_list = TRUE);
534 UT_LIST_ADD_FIRST(trx_sys->mysql_trx_list, trx);
535
536 trx_sys_mutex_exit();
537
538 return (trx);
539 }
540
541 /** Check state of transaction before freeing it.
542 @param[in,out] trx transaction object to validate */
trx_validate_state_before_free(trx_t * trx)543 static void trx_validate_state_before_free(trx_t *trx) {
544 if (trx->declared_to_be_inside_innodb) {
545 ib::error(ER_IB_MSG_1202)
546 << "Freeing a trx (" << trx << ", " << trx_get_id_for_print(trx)
547 << ") which is declared"
548 " to be processing inside InnoDB";
549
550 trx_print(stderr, trx, 600);
551 putc('\n', stderr);
552
553 /* This is an error but not a fatal error. We must keep
554 the counters like srv_conc_n_threads accurate. */
555 srv_conc_force_exit_innodb(trx);
556 }
557
558 if (trx->n_mysql_tables_in_use != 0 || trx->mysql_n_tables_locked != 0) {
559 ib::error(ER_IB_MSG_1203)
560 << "MySQL is freeing a thd though trx->n_mysql_tables_in_use is "
561 << trx->n_mysql_tables_in_use << " and trx->mysql_n_tables_locked is "
562 << trx->mysql_n_tables_locked << ".";
563
564 trx_print(stderr, trx, 600);
565 ut_print_buf(stderr, trx, sizeof(trx_t));
566 putc('\n', stderr);
567 }
568
569 trx->dict_operation = TRX_DICT_OP_NONE;
570 assert_trx_is_inactive(trx);
571 }
572
573 /** Free and initialize a transaction object instantiated during recovery.
574 @param[in,out] trx transaction object to free and initialize */
trx_free_resurrected(trx_t * trx)575 void trx_free_resurrected(trx_t *trx) {
576 trx_validate_state_before_free(trx);
577
578 trx_init(trx);
579
580 trx_free(trx);
581 }
582
583 /** Free a transaction that was allocated by background or user threads.
584 @param[in,out] trx transaction object to free */
trx_free_for_background(trx_t * trx)585 void trx_free_for_background(trx_t *trx) {
586 trx_validate_state_before_free(trx);
587
588 trx_free(trx);
589 }
590
trx_free_prepared_or_active_recovered(trx_t * trx)591 void trx_free_prepared_or_active_recovered(trx_t *trx) {
592 ut_a(trx->magic_n == TRX_MAGIC_N);
593 ulint expected_undo_state;
594 if (trx->state == TRX_STATE_ACTIVE) {
595 ut_a(trx_state_eq(trx, TRX_STATE_ACTIVE));
596 ut_a(trx->is_recovered);
597 expected_undo_state = TRX_UNDO_ACTIVE;
598 } else {
599 ut_a(trx_state_eq(trx, TRX_STATE_PREPARED));
600 expected_undo_state = TRX_UNDO_PREPARED;
601 }
602
603 assert_trx_in_rw_list(trx);
604
605 trx_release_impl_and_expl_locks(trx, false);
606 trx_undo_free_trx_with_prepared_or_active_logs(trx, expected_undo_state);
607
608 ut_ad(!trx->in_rw_trx_list);
609 ut_a(!trx->read_only);
610
611 trx->state = TRX_STATE_NOT_STARTED;
612
613 /* Undo trx_resurrect_table_locks(). */
614 lock_trx_lock_list_init(&trx->lock.trx_locks);
615
616 trx_free(trx);
617 }
618
619 /** Disconnect a transaction from MySQL and optionally mark it as if
620 it's been recovered. For the marking the transaction must be in prepared state.
621 The recovery-marked transaction is going to survive "alone" so its association
622 with the mysql handle is destroyed now rather than when it will be
623 finally freed.
624 @param[in,out] trx transaction
625 @param[in] prepared boolean value to specify whether trx is
626 for recovery or not. */
trx_disconnect_from_mysql(trx_t * trx,bool prepared)627 inline void trx_disconnect_from_mysql(trx_t *trx, bool prepared) {
628 trx_sys_mutex_enter();
629
630 ut_ad(trx->in_mysql_trx_list);
631 ut_d(trx->in_mysql_trx_list = FALSE);
632
633 UT_LIST_REMOVE(trx_sys->mysql_trx_list, trx);
634
635 if (trx->read_view != nullptr) {
636 trx_sys->mvcc->view_close(trx->read_view, true);
637 }
638
639 ut_ad(trx_sys_validate_trx_list());
640
641 if (prepared) {
642 ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
643
644 trx->is_recovered = true;
645 trx->mysql_thd = nullptr;
646 /* todo/fixme: suggest to do it at innodb prepare */
647 trx->will_lock = 0;
648 }
649
650 trx_sys_mutex_exit();
651 }
652
653 /** Disconnect a transaction from MySQL.
654 @param[in,out] trx transaction */
trx_disconnect_plain(trx_t * trx)655 inline void trx_disconnect_plain(trx_t *trx) {
656 trx_disconnect_from_mysql(trx, false);
657 }
658
659 /** Disconnect a prepared transaction from MySQL.
660 @param[in,out] trx transaction */
trx_disconnect_prepared(trx_t * trx)661 void trx_disconnect_prepared(trx_t *trx) {
662 trx_disconnect_from_mysql(trx, true);
663 }
664
665 /** Free a transaction object for MySQL.
666 @param[in,out] trx transaction */
trx_free_for_mysql(trx_t * trx)667 void trx_free_for_mysql(trx_t *trx) {
668 trx_disconnect_plain(trx);
669 trx_free_for_background(trx);
670 }
671
672 /** Resurrect the table IDs for a resurrected transaction.
673 @param[in] trx resurrected transaction
674 @param[in] undo_ptr pointer to undo segment
675 @param[in] undo undo log */
trx_resurrect_table_ids(trx_t * trx,const trx_undo_ptr_t * undo_ptr,const trx_undo_t * undo)676 static void trx_resurrect_table_ids(trx_t *trx, const trx_undo_ptr_t *undo_ptr,
677 const trx_undo_t *undo) {
678 mtr_t mtr;
679 page_t *undo_page;
680 trx_undo_rec_t *undo_rec;
681
682 ut_ad(undo == undo_ptr->insert_undo || undo == undo_ptr->update_undo);
683
684 if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) || undo->empty) {
685 return;
686 }
687
688 table_id_set empty;
689 table_id_set &tables =
690 resurrected_trx_tables.insert(trx_table_map::value_type(trx, empty))
691 .first->second;
692
693 mtr_start(&mtr);
694
695 /* trx_rseg_mem_create() may have acquired an X-latch on this
696 page, so we cannot acquire an S-latch. */
697 undo_page = trx_undo_page_get(page_id_t(undo->space, undo->top_page_no),
698 undo->page_size, &mtr);
699
700 undo_rec = undo_page + undo->top_offset;
701
702 do {
703 ulint type;
704 undo_no_t undo_no;
705 table_id_t table_id;
706 ulint cmpl_info;
707 bool updated_extern;
708 type_cmpl_t type_cmpl;
709
710 page_t *undo_rec_page = page_align(undo_rec);
711
712 if (undo_rec_page != undo_page) {
713 mtr.release_page(undo_page, MTR_MEMO_PAGE_X_FIX);
714 undo_page = undo_rec_page;
715 }
716
717 trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, &updated_extern,
718 &undo_no, &table_id, type_cmpl);
719 tables.insert(table_id);
720
721 undo_rec = trx_undo_get_prev_rec(undo_rec, undo->hdr_page_no,
722 undo->hdr_offset, false, &mtr);
723 } while (undo_rec);
724
725 mtr_commit(&mtr);
726 }
727
728 /** Resurrect table locks for resurrected transactions. */
trx_resurrect_locks()729 void trx_resurrect_locks() {
730 for (trx_table_map::const_iterator t = resurrected_trx_tables.begin();
731 t != resurrected_trx_tables.end(); t++) {
732 trx_t *trx = t->first;
733 const table_id_set &tables = t->second;
734 ut_ad(trx->is_recovered);
735
736 for (table_id_set::const_iterator i = tables.begin(); i != tables.end();
737 i++) {
738 dict_table_t *table =
739 dd_table_open_on_id(*i, nullptr, nullptr, false, true);
740 if (table) {
741 ut_ad(!table->is_temporary());
742
743 if (table->ibd_file_missing || table->is_temporary()) {
744 mutex_enter(&dict_sys->mutex);
745 dd_table_close(table, nullptr, nullptr, true);
746 dict_table_remove_from_cache(table);
747 mutex_exit(&dict_sys->mutex);
748 continue;
749 }
750
751 if (trx->state == TRX_STATE_PREPARED && !dict_table_is_sdi(table->id)) {
752 trx->mod_tables.insert(table);
753 }
754 DICT_TF2_FLAG_SET(table, DICT_TF2_RESURRECT_PREPARED);
755
756 lock_table_ix_resurrect(table, trx);
757
758 DBUG_PRINT("ib_trx", ("resurrect" TRX_ID_FMT " table '%s' IX lock",
759 trx_get_id_for_print(trx), table->name.m_name));
760
761 dd_table_close(table, nullptr, nullptr, false);
762 }
763 }
764 }
765
766 resurrected_trx_tables.clear();
767 }
768
769 /** Resurrect the transactions that were doing inserts at the time of the
770 crash, they need to be undone.
771 @return trx_t instance */
trx_resurrect_insert(trx_undo_t * undo,trx_rseg_t * rseg)772 static trx_t *trx_resurrect_insert(
773 trx_undo_t *undo, /*!< in: entry to UNDO */
774 trx_rseg_t *rseg) /*!< in: rollback segment */
775 {
776 trx_t *trx;
777
778 trx = trx_allocate_for_background();
779
780 ut_d(trx->start_file = __FILE__);
781 ut_d(trx->start_line = __LINE__);
782
783 rseg->trx_ref_count++;
784 trx->rsegs.m_redo.rseg = rseg;
785 *trx->xid = undo->xid;
786 trx->id = undo->trx_id;
787 trx->rsegs.m_redo.insert_undo = undo;
788 trx->is_recovered = true;
789
790 /* This is single-threaded startup code, we do not need the
791 protection of trx->mutex or trx_sys->mutex here. */
792
793 if (undo->state != TRX_UNDO_ACTIVE) {
794 /* Prepared transactions are left in the prepared state
795 waiting for a commit or abort decision from MySQL */
796
797 if (undo->state == TRX_UNDO_PREPARED) {
798 ib::info(ER_IB_MSG_1204) << "Transaction " << trx_get_id_for_print(trx)
799 << " was in the XA prepared state.";
800
801 if (srv_force_recovery == 0) {
802 if (!srv_rollback_prepared_trx) {
803 trx->state = TRX_STATE_PREPARED;
804 ++trx_sys->n_prepared_trx;
805 } else {
806 /* XtraBackup is asked to rollback prepared XA
807 transactions */
808 trx->state = TRX_STATE_ACTIVE;
809 }
810 } else {
811 ib::info(ER_IB_MSG_1205) << "Since innodb_force_recovery"
812 " > 0, we will force a rollback.";
813
814 trx->state = TRX_STATE_ACTIVE;
815 }
816 } else {
817 trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
818 }
819
820 /* We give a dummy value for the trx no; this should have no
821 relevance since purge is not interested in committed
822 transaction numbers, unless they are in the history
823 list, in which case it looks the number from the disk based
824 undo log structure */
825
826 trx->no = trx->id;
827
828 } else {
829 trx->state = TRX_STATE_ACTIVE;
830
831 /* A running transaction always has the number
832 field inited to TRX_ID_MAX */
833
834 trx->no = TRX_ID_MAX;
835 }
836
837 /* trx_start_low() is not called with resurrect, so need to initialize
838 start time here.*/
839 if (trx->state == TRX_STATE_ACTIVE || trx->state == TRX_STATE_PREPARED) {
840 trx->start_time = ut_time();
841 }
842
843 trx->ddl_operation = undo->dict_operation;
844
845 if (undo->dict_operation) {
846 trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
847 }
848
849 if (!undo->empty) {
850 trx->undo_no = undo->top_undo_no + 1;
851 trx->undo_rseg_space = undo->rseg->space_id;
852 }
853
854 return (trx);
855 }
856
857 /** Prepared transactions are left in the prepared state waiting for a
858 commit or abort decision from MySQL */
trx_resurrect_update_in_prepared_state(trx_t * trx,const trx_undo_t * undo)859 static void trx_resurrect_update_in_prepared_state(
860 trx_t *trx, /*!< in,out: transaction */
861 const trx_undo_t *undo) /*!< in: update UNDO record */
862 {
863 /* This is single-threaded startup code, we do not need the
864 protection of trx->mutex or trx_sys->mutex here. */
865
866 if (undo->state == TRX_UNDO_PREPARED) {
867 ib::info(ER_IB_MSG_1206) << "Transaction " << trx_get_id_for_print(trx)
868 << " was in the XA prepared state.";
869
870 ut_ad(trx->state != TRX_STATE_FORCED_ROLLBACK);
871
872 if (!srv_rollback_prepared_trx) {
873 if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
874 ++trx_sys->n_prepared_trx;
875 } else {
876 ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
877 }
878
879 trx->state = TRX_STATE_PREPARED;
880 } else {
881 if (!trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
882 ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
883 }
884 /* XtraBackup is asked to rollback prepared XA
885 transactions */
886 trx->state = TRX_STATE_ACTIVE;
887 }
888 } else {
889 trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
890 }
891 }
892
893 /** Resurrect the transactions that were doing updates the time of the
894 crash, they need to be undone. */
trx_resurrect_update(trx_t * trx,trx_undo_t * undo,trx_rseg_t * rseg)895 static void trx_resurrect_update(
896 trx_t *trx, /*!< in/out: transaction */
897 trx_undo_t *undo, /*!< in/out: update UNDO record */
898 trx_rseg_t *rseg) /*!< in/out: rollback segment */
899 {
900 /* This resurected transaction might also have been doing inserts.
901 If so, this rseg is already assigned by trx_resurrect_insert(). */
902 if (trx->rsegs.m_redo.rseg != nullptr) {
903 ut_a(trx->rsegs.m_redo.rseg == rseg);
904 ut_ad(trx->id == undo->trx_id);
905 ut_ad(trx->is_recovered);
906 /* For GTID persistence, we might have empty update undo for
907 insert only transactions. */
908 if (undo->empty && trx_state_eq(trx, TRX_STATE_PREPARED)) {
909 undo->set_prepared(trx->xid);
910 }
911 ut_ad(undo->xid.eq(trx->xid));
912 } else {
913 rseg->trx_ref_count++;
914 trx->rsegs.m_redo.rseg = rseg;
915 *trx->xid = undo->xid;
916 trx->id = undo->trx_id;
917 trx->is_recovered = true;
918 }
919
920 /* Assign the update_undo segment. */
921 ut_a(trx->rsegs.m_redo.update_undo == nullptr);
922 trx->rsegs.m_redo.update_undo = undo;
923
924 /* This is single-threaded startup code, we do not need the
925 protection of trx->mutex or trx_sys->mutex here. */
926
927 if (undo->state != TRX_UNDO_ACTIVE) {
928 trx_resurrect_update_in_prepared_state(trx, undo);
929
930 /* We give a dummy value for the trx number */
931
932 trx->no = trx->id;
933
934 } else {
935 trx->state = TRX_STATE_ACTIVE;
936
937 /* A running transaction always has the number field inited to
938 TRX_ID_MAX */
939
940 trx->no = TRX_ID_MAX;
941 }
942
943 /* trx_start_low() is not called with resurrect, so need to initialize
944 start time here.*/
945 if (trx->state == TRX_STATE_ACTIVE || trx->state == TRX_STATE_PREPARED) {
946 trx->start_time = ut_time();
947 }
948
949 trx->ddl_operation = undo->dict_operation;
950
951 if (undo->dict_operation) {
952 trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
953 }
954
955 if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
956 trx->undo_no = undo->top_undo_no + 1;
957 trx->undo_rseg_space = undo->rseg->space_id;
958 }
959 }
960
961 /** Resurrect the transactions that were doing inserts and updates at
962 the time of a crash, they need to be undone.
963 @param[in] rseg rollback segment */
trx_resurrect(trx_rseg_t * rseg)964 static void trx_resurrect(trx_rseg_t *rseg) {
965 trx_t *trx;
966 trx_undo_t *undo;
967
968 ut_ad(rseg != nullptr);
969
970 /* Resurrect transactions that were doing inserts. */
971 for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list); undo != nullptr;
972 undo = UT_LIST_GET_NEXT(undo_list, undo)) {
973 trx = trx_resurrect_insert(undo, rseg);
974
975 trx_sys_rw_trx_add(trx);
976
977 trx_resurrect_table_ids(trx, &trx->rsegs.m_redo, undo);
978 }
979
980 /* Ressurrect transactions that were doing updates. */
981 for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list); undo != nullptr;
982 undo = UT_LIST_GET_NEXT(undo_list, undo)) {
983 /* Check the trx_sys->rw_trx_set first. */
984 trx_sys_mutex_enter();
985
986 trx_t *trx = trx_get_rw_trx_by_id(undo->trx_id);
987
988 trx_sys_mutex_exit();
989
990 if (trx == nullptr) {
991 trx = trx_allocate_for_background();
992
993 ut_d(trx->start_file = __FILE__);
994 ut_d(trx->start_line = __LINE__);
995 }
996
997 trx_resurrect_update(trx, undo, rseg);
998
999 trx_sys_rw_trx_add(trx);
1000
1001 trx_resurrect_table_ids(trx, &trx->rsegs.m_redo, undo);
1002 }
1003 }
1004
1005 /** Creates trx objects for transactions and initializes the trx list of
1006 trx_sys at database start. Rollback segments and undo log lists must
1007 already exist when this function is called, because the lists of
1008 transactions to be rolled back or cleaned up are built based on the
1009 undo log lists. */
trx_lists_init_at_db_start(void)1010 void trx_lists_init_at_db_start(void) {
1011 ut_a(srv_is_being_started);
1012
1013 if (srv_apply_log_only) {
1014 return;
1015 }
1016
1017 /* Look through the rollback segments in the TRX_SYS for
1018 transaction undo logs. */
1019 for (auto rseg : trx_sys->rsegs) {
1020 trx_resurrect(rseg);
1021 }
1022
1023 /* Look through the rollback segments in each RSEG_ARRAY for
1024 transaction undo logs. */
1025 undo::spaces->s_lock();
1026 for (auto undo_space : undo::spaces->m_spaces) {
1027 undo_space->rsegs()->s_lock();
1028 for (auto rseg : *undo_space->rsegs()) {
1029 trx_resurrect(rseg);
1030 }
1031 undo_space->rsegs()->s_unlock();
1032 }
1033 undo::spaces->s_unlock();
1034
1035 TrxIdSet::iterator end = trx_sys->rw_trx_set.end();
1036
1037 for (TrxIdSet::iterator it = trx_sys->rw_trx_set.begin(); it != end; ++it) {
1038 ut_ad(it->m_trx->in_rw_trx_list);
1039
1040 if (it->m_trx->state == TRX_STATE_ACTIVE ||
1041 it->m_trx->state == TRX_STATE_PREPARED) {
1042 trx_sys->rw_trx_ids.push_back(it->m_id);
1043 }
1044
1045 UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, it->m_trx);
1046 }
1047 }
1048
1049 /** Get next redo rollback segment in round-robin fashion.
1050 While InnoDB is running in multi-threaded mode, the vectors of undo
1051 tablespaces and rsegs do not shrink. So they do not need protection
1052 to get a pointer to an rseg.
1053 If an rseg is not marked for undo tablespace truncation, we assign
1054 it to a transaction. We increment trx_ref_count to keep the purge
1055 thread from truncating the undo tablespace that contains this rseg
1056 until the transaction is done with it.
1057 @return assigned rollback segment instance */
get_next_redo_rseg_from_undo_spaces()1058 static trx_rseg_t *get_next_redo_rseg_from_undo_spaces() {
1059 undo::Tablespace *undo_space;
1060
1061 /* The number of undo tablespaces cannot be changed while
1062 we have this s_lock. */
1063 undo::spaces->s_lock();
1064
1065 /* Use all known undo tablespaces. Some may be inactive. */
1066 ulint target_undo_tablespaces = undo::spaces->size();
1067
1068 ut_ad(target_undo_tablespaces > 0);
1069
1070 /* The number of rollback segments may be changed at any instant.
1071 So use the value at this instant. Rollback segments are never
1072 deleted from an rseg list, so srv_rollback_segments is always
1073 less than rsegs->size(). */
1074 ulint target_rollback_segments = srv_rollback_segments;
1075
1076 static ulint rseg_counter = 0;
1077 trx_rseg_t *rseg = nullptr;
1078 ulint current = rseg_counter;
1079
1080 /* Increment the static redo_rseg_slot so the next call from any thread
1081 starts with the next rseg. */
1082 os_atomic_increment_ulint(&rseg_counter, 1);
1083
1084 while (rseg == nullptr) {
1085 /* Traverse the rsegs like this: (space, rseg_id)
1086 (0,0), (1,0), ... (n,0), (0,1), (1,1), ... (n,1), ... */
1087 ulint window =
1088 current % (target_rollback_segments * target_undo_tablespaces);
1089 ulint spaces_slot = window % target_undo_tablespaces;
1090 ulint rseg_slot = window / target_undo_tablespaces;
1091
1092 current++;
1093
1094 undo_space = undo::spaces->at(spaces_slot);
1095
1096 /* Avoid any rseg that resides in a tablespace that has been made
1097 inactive either explicitly or by being marked for truncate. We do
1098 not want to wait here on an x_lock for an rseg in an undo tablespace
1099 that is being truncated. So check this first without the latch.
1100 It could be set immediately after this, but that is a very short gap
1101 and the get_active() call below will use an rseg->s_lock. */
1102 if (!undo_space->is_active_no_latch()) {
1103 continue;
1104 }
1105
1106 /* This is done here because we know the rsegs() pointer is good. */
1107 ut_ad(target_rollback_segments <= undo_space->rsegs()->size());
1108
1109 /* Check again with a shared lock. */
1110 rseg = undo_space->get_active(rseg_slot);
1111 if (rseg == nullptr) {
1112 continue;
1113 }
1114 }
1115
1116 undo::spaces->s_unlock();
1117
1118 ut_ad(rseg->trx_ref_count > 0);
1119
1120 return (rseg);
1121 }
1122
1123 /** Get the next redo rollback segment in round-robin fashion.
1124 The assigned slots may have gaps but the vector does not.
1125 @return assigned rollback segment instance */
get_next_redo_rseg_from_trx_sys()1126 static trx_rseg_t *get_next_redo_rseg_from_trx_sys() {
1127 static ulint rseg_counter = 0;
1128 ulong n_rollback_segments = srv_rollback_segments;
1129
1130 /* Versions 5.6 and 5.7 of InnoDB would allow 128 as the max for
1131 innodb_rollback_segments but would only use 96 since 32 slots were
1132 used for temporary rsegs. Now those rsegs are in trx_sys_t::tmp_rsegs
1133 and trx_sys_t::rsegs which each can hold all 128. As a result,
1134 an existing system tablespace might have gaps in the slot assignment.
1135 The Rsegs vector only contains the rsegs that exist. Since
1136 srv_rollback_segments can be set to a smaller number at runtime,
1137 it might be smaller than Rsegs::size(). But srv_rollback_segments
1138 can never be larger than Rsegs::size() because when the user increases
1139 innodb_rollback_segments, the rollback segments are created and rseg
1140 objects are added to the vector ready to use before
1141 srv_rollback_segments is increased. */
1142 ut_ad(n_rollback_segments <= trx_sys->rsegs.size());
1143
1144 /* Try the next slot that no other thread is looking at */
1145 ulint slot =
1146 os_atomic_increment_ulint(&rseg_counter, 1) % n_rollback_segments;
1147
1148 /* s_lock the vector since it might be sorted when added to. */
1149 trx_sys->rsegs.s_lock();
1150 trx_rseg_t *rseg = trx_sys->rsegs.at(slot);
1151 trx_sys->rsegs.s_unlock();
1152
1153 /* It is not neccessary to s_lock Rsegs::m_latch here because the
1154 system tablespace is never truncated like other undo tablespaces. */
1155 rseg->trx_ref_count++;
1156
1157 ut_ad(rseg->space_id == TRX_SYS_SPACE);
1158
1159 return (rseg);
1160 }
1161
1162 /** Get next redo rollback segment in round-robin fashion.
1163 We assume that the assigned slots are not contiguous and have gaps.
1164 @return assigned rollback segment instance */
get_next_redo_rseg()1165 static trx_rseg_t *get_next_redo_rseg() {
1166 if (!trx_sys->rsegs.is_empty()) {
1167 return (get_next_redo_rseg_from_trx_sys());
1168 } else {
1169 return (get_next_redo_rseg_from_undo_spaces());
1170 }
1171 }
1172
1173 /** Get the next noredo rollback segment.
1174 @return assigned rollback segment instance */
get_next_temp_rseg()1175 static trx_rseg_t *get_next_temp_rseg() {
1176 static ulint temp_rseg_counter = 0;
1177 ulong n_rollback_segments = srv_rollback_segments;
1178
1179 ut_ad(n_rollback_segments <= trx_sys->tmp_rsegs.size());
1180
1181 /* Try the next slot that no other thread is looking at */
1182 ulint slot =
1183 os_atomic_increment_ulint(&temp_rseg_counter, 1) % n_rollback_segments;
1184
1185 /* No need to s_lock the vector since it is only added to at the end,
1186 and it is never resized or sorted. */
1187 trx_rseg_t *rseg = trx_sys->tmp_rsegs.at(slot);
1188
1189 ut_ad(rseg->id == slot);
1190 ut_ad(fsp_is_system_temporary(rseg->space_id));
1191
1192 return (rseg);
1193 }
1194
1195 /** Assign a durable rollback segment to a transaction in a round-robin
1196 fashion.
1197 @param[in,out] trx transaction that involves a durable write. */
trx_assign_rseg_durable(trx_t * trx)1198 void trx_assign_rseg_durable(trx_t *trx) {
1199 ut_ad(trx->rsegs.m_redo.rseg == nullptr);
1200
1201 trx->rsegs.m_redo.rseg = srv_read_only_mode ? nullptr : get_next_redo_rseg();
1202 }
1203
1204 /** Assign a temp-tablespace bound rollback-segment to a transaction.
1205 @param[in,out] trx transaction that involves write to temp-table. */
trx_assign_rseg_temp(trx_t * trx)1206 void trx_assign_rseg_temp(trx_t *trx) {
1207 ut_ad(trx->rsegs.m_noredo.rseg == nullptr);
1208 ut_ad(!trx_is_autocommit_non_locking(trx));
1209
1210 trx->rsegs.m_noredo.rseg =
1211 srv_read_only_mode ? nullptr : get_next_temp_rseg();
1212
1213 if (trx->id == 0) {
1214 mutex_enter(&trx_sys->mutex);
1215
1216 trx->id = trx_sys_get_new_trx_id();
1217
1218 trx_sys->rw_trx_ids.push_back(trx->id);
1219
1220 trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
1221
1222 mutex_exit(&trx_sys->mutex);
1223 }
1224 }
1225
1226 /** Starts a transaction. */
trx_start_low(trx_t * trx,bool read_write)1227 static void trx_start_low(
1228 trx_t *trx, /*!< in: transaction */
1229 bool read_write) /*!< in: true if read-write transaction */
1230 {
1231 ut_ad(!trx->in_rollback);
1232 ut_ad(!trx->is_recovered);
1233 ut_ad(trx->start_line != 0);
1234 ut_ad(trx->start_file != nullptr);
1235 ut_ad(trx->roll_limit == 0);
1236 ut_ad(!trx->lock.in_rollback);
1237 ut_ad(trx->error_state == DB_SUCCESS);
1238 ut_ad(trx->rsegs.m_redo.rseg == nullptr);
1239 ut_ad(trx->rsegs.m_noredo.rseg == nullptr);
1240 ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
1241 ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1242 ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK));
1243 ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC));
1244
1245 ++trx->version;
1246
1247 /* Check whether it is an AUTOCOMMIT SELECT */
1248 trx->auto_commit = (trx->api_trx && trx->api_auto_commit) ||
1249 thd_trx_is_auto_commit(trx->mysql_thd);
1250
1251 trx->read_only = (trx->api_trx && !trx->read_write) ||
1252 (!trx->internal && thd_trx_is_read_only(trx->mysql_thd)) ||
1253 srv_read_only_mode;
1254
1255 if (!trx->auto_commit) {
1256 ++trx->will_lock;
1257 } else if (trx->will_lock == 0) {
1258 trx->read_only = true;
1259 }
1260 trx->persists_gtid = false;
1261
1262 #ifdef UNIV_DEBUG
1263 /* If the transaction is DD attachable trx, it should be AC-NL-RO
1264 (AutoCommit-NonLocking-ReadOnly) trx */
1265 if (trx->is_dd_trx) {
1266 ut_ad(trx->read_only);
1267 ut_ad(trx->auto_commit);
1268 ut_ad(trx->isolation_level == TRX_ISO_READ_UNCOMMITTED ||
1269 trx->isolation_level == TRX_ISO_READ_COMMITTED);
1270 }
1271 #endif /* UNIV_DEBUG */
1272
1273 if (trx->mysql_thd != nullptr && !trx->ddl_operation) {
1274 trx->ddl_operation = thd_is_dd_update_stmt(trx->mysql_thd);
1275 }
1276
1277 /* The initial value for trx->no: TRX_ID_MAX is used in
1278 read_view_open_now: */
1279
1280 trx->no = TRX_ID_MAX;
1281
1282 ut_a(ib_vector_is_empty(trx->lock.autoinc_locks));
1283 ut_a(trx->lock.table_locks.empty());
1284
1285 /* If this transaction came from trx_allocate_for_mysql(),
1286 trx->in_mysql_trx_list would hold. In that case, the trx->state
1287 change must be protected by the trx_sys->mutex, so that
1288 lock_print_info_all_transactions() will have a consistent view. */
1289
1290 ut_ad(!trx->in_rw_trx_list);
1291
1292 /* We tend to over assert and that complicates the code somewhat.
1293 e.g., the transaction state can be set earlier but we are forced to
1294 set it under the protection of the trx_sys_t::mutex because some
1295 trx list assertions are triggered unnecessarily. */
1296
1297 /* By default all transactions are in the read-only list unless they
1298 are non-locking auto-commit read only transactions or background
1299 (internal) transactions. Note: Transactions marked explicitly as
1300 read only can write to temporary tables, we put those on the RO
1301 list too. */
1302
1303 if (!trx->read_only &&
1304 (trx->mysql_thd == nullptr || read_write || trx->ddl_operation)) {
1305 trx_assign_rseg_durable(trx);
1306
1307 /* Temporary rseg is assigned only if the transaction
1308 updates a temporary table */
1309
1310 trx_sys_mutex_enter();
1311
1312 trx->id = trx_sys_get_new_trx_id();
1313
1314 trx_sys->rw_trx_ids.push_back(trx->id);
1315
1316 trx_sys_rw_trx_add(trx);
1317
1318 ut_ad(trx->rsegs.m_redo.rseg != nullptr || srv_read_only_mode ||
1319 srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
1320
1321 UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, trx);
1322
1323 ut_d(trx->in_rw_trx_list = true);
1324
1325 trx->state = TRX_STATE_ACTIVE;
1326
1327 ut_ad(trx_sys_validate_trx_list());
1328
1329 trx_sys_mutex_exit();
1330
1331 } else {
1332 trx->id = 0;
1333
1334 if (!trx_is_autocommit_non_locking(trx)) {
1335 /* If this is a read-only transaction that is writing
1336 to a temporary table then it needs a transaction id
1337 to write to the temporary table. */
1338
1339 if (read_write) {
1340 trx_sys_mutex_enter();
1341
1342 ut_ad(!srv_read_only_mode);
1343
1344 trx->id = trx_sys_get_new_trx_id();
1345
1346 trx_sys->rw_trx_ids.push_back(trx->id);
1347
1348 trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
1349
1350 trx_sys_mutex_exit();
1351 }
1352
1353 trx->state = TRX_STATE_ACTIVE;
1354
1355 } else {
1356 ut_ad(!read_write);
1357 trx->state = TRX_STATE_ACTIVE;
1358 }
1359 }
1360
1361 if (trx->mysql_thd != nullptr) {
1362 trx->start_time = thd_start_time_in_secs(trx->mysql_thd);
1363 } else {
1364 trx->start_time = ut_time();
1365 }
1366
1367 /* This value will only be read by a thread inspecting lock sys queue after
1368 the thread which enqueues this trx releases the queue's latch. */
1369 trx->lock.schedule_weight.store(0, std::memory_order_relaxed);
1370
1371 ut_a(trx->error_state == DB_SUCCESS);
1372
1373 MONITOR_INC(MONITOR_TRX_ACTIVE);
1374 }
1375
1376 /** Set the transaction serialisation number.
1377 @return true if the transaction number was added to the serialisation_list. */
trx_serialisation_number_get(trx_t * trx,trx_undo_ptr_t * redo_rseg_undo_ptr,trx_undo_ptr_t * temp_rseg_undo_ptr)1378 static bool trx_serialisation_number_get(
1379 trx_t *trx, /*!< in/out: transaction */
1380 trx_undo_ptr_t *redo_rseg_undo_ptr, /*!< in/out: Set trx
1381 serialisation number in
1382 referred undo rseg. */
1383 trx_undo_ptr_t *temp_rseg_undo_ptr) /*!< in/out: Set trx
1384 serialisation number in
1385 referred undo rseg. */
1386 {
1387 bool added_trx_no;
1388 trx_rseg_t *redo_rseg = nullptr;
1389 trx_rseg_t *temp_rseg = nullptr;
1390
1391 if (redo_rseg_undo_ptr != nullptr) {
1392 ut_ad(mutex_own(&redo_rseg_undo_ptr->rseg->mutex));
1393 redo_rseg = redo_rseg_undo_ptr->rseg;
1394 }
1395
1396 if (temp_rseg_undo_ptr != nullptr) {
1397 ut_ad(mutex_own(&temp_rseg_undo_ptr->rseg->mutex));
1398 temp_rseg = temp_rseg_undo_ptr->rseg;
1399 }
1400
1401 trx_sys_mutex_enter();
1402
1403 trx->no = trx_sys_get_new_trx_id();
1404
1405 /* Update the latest transaction number. */
1406 ut_d(trx_sys->rw_max_trx_no = trx->no);
1407
1408 /* Track the minimum serialisation number. */
1409 if (!trx->read_only) {
1410 UT_LIST_ADD_LAST(trx_sys->serialisation_list, trx);
1411 added_trx_no = true;
1412 } else {
1413 added_trx_no = false;
1414 }
1415
1416 /* If the rollack segment is not empty then the
1417 new trx_t::no can't be less than any trx_t::no
1418 already in the rollback segment. User threads only
1419 produce events when a rollback segment is empty. */
1420 if ((redo_rseg != nullptr && redo_rseg->last_page_no == FIL_NULL) ||
1421 (temp_rseg != nullptr && temp_rseg->last_page_no == FIL_NULL)) {
1422 TrxUndoRsegs elem(trx->no);
1423
1424 if (redo_rseg != nullptr && redo_rseg->last_page_no == FIL_NULL) {
1425 elem.push_back(redo_rseg);
1426 }
1427
1428 if (temp_rseg != nullptr && temp_rseg->last_page_no == FIL_NULL) {
1429 elem.push_back(temp_rseg);
1430 }
1431
1432 mutex_enter(&purge_sys->pq_mutex);
1433
1434 /* This is to reduce the pressure on the trx_sys_t::mutex
1435 though in reality it should make very little (read no)
1436 difference because this code path is only taken when the
1437 rbs is empty. */
1438
1439 trx_sys_mutex_exit();
1440
1441 purge_sys->purge_queue->push(elem);
1442
1443 mutex_exit(&purge_sys->pq_mutex);
1444 } else {
1445 trx_sys_mutex_exit();
1446 }
1447
1448 return (added_trx_no);
1449 }
1450
1451 /** Assign the transaction its history serialisation number and write the
1452 update UNDO log record to the assigned rollback segment.
1453 @return true if a serialisation log was written */
trx_write_serialisation_history(trx_t * trx,mtr_t * mtr)1454 static bool trx_write_serialisation_history(
1455 trx_t *trx, /*!< in/out: transaction */
1456 mtr_t *mtr) /*!< in/out: mini-transaction */
1457 {
1458 /* Change the undo log segment states from TRX_UNDO_ACTIVE to some
1459 other state: these modifications to the file data structure define
1460 the transaction as committed in the file based domain, at the
1461 serialization point of the log sequence number lsn obtained below. */
1462
1463 /* We have to hold the rseg mutex because update log headers have
1464 to be put to the history list in the (serialisation) order of the
1465 UNDO trx number. This is required for the purge in-memory data
1466 structures too. */
1467
1468 bool own_redo_rseg_mutex = false;
1469 bool own_temp_rseg_mutex = false;
1470
1471 /* Get rollback segment mutex. */
1472 if (trx->rsegs.m_redo.rseg != nullptr && trx_is_redo_rseg_updated(trx)) {
1473 trx->rsegs.m_redo.rseg->latch();
1474 own_redo_rseg_mutex = true;
1475 }
1476
1477 mtr_t temp_mtr;
1478
1479 if (trx->rsegs.m_noredo.rseg != nullptr && trx_is_temp_rseg_updated(trx)) {
1480 trx->rsegs.m_noredo.rseg->latch();
1481 own_temp_rseg_mutex = true;
1482 mtr_start(&temp_mtr);
1483 temp_mtr.set_log_mode(MTR_LOG_NO_REDO);
1484 }
1485
1486 /* If transaction involves insert then truncate undo logs. */
1487 if (trx->rsegs.m_redo.insert_undo != nullptr) {
1488 trx_undo_set_state_at_finish(trx->rsegs.m_redo.insert_undo, mtr);
1489 }
1490
1491 if (trx->rsegs.m_noredo.insert_undo != nullptr) {
1492 trx_undo_set_state_at_finish(trx->rsegs.m_noredo.insert_undo, &temp_mtr);
1493 }
1494
1495 bool serialised = false;
1496
1497 /* If transaction involves update then add rollback segments
1498 to purge queue. */
1499 if (trx->rsegs.m_redo.update_undo != nullptr ||
1500 trx->rsegs.m_noredo.update_undo != nullptr) {
1501 /* Assign the transaction serialisation number and add these
1502 rollback segments to purge trx-no sorted priority queue
1503 if this is the first UNDO log being written to assigned
1504 rollback segments. */
1505
1506 trx_undo_ptr_t *redo_rseg_undo_ptr =
1507 trx->rsegs.m_redo.update_undo != nullptr ? &trx->rsegs.m_redo : nullptr;
1508
1509 trx_undo_ptr_t *temp_rseg_undo_ptr =
1510 trx->rsegs.m_noredo.update_undo != nullptr ? &trx->rsegs.m_noredo
1511 : nullptr;
1512
1513 /* Will set trx->no and will add rseg to purge queue. */
1514 serialised = trx_serialisation_number_get(trx, redo_rseg_undo_ptr,
1515 temp_rseg_undo_ptr);
1516
1517 /* It is not necessary to obtain trx->undo_mutex here because
1518 only a single OS thread is allowed to do the transaction commit
1519 for this transaction. */
1520 if (trx->rsegs.m_redo.update_undo != nullptr) {
1521 page_t *undo_hdr_page;
1522
1523 undo_hdr_page =
1524 trx_undo_set_state_at_finish(trx->rsegs.m_redo.update_undo, mtr);
1525
1526 /* Delay update of rseg_history_len if we plan to add
1527 non-redo update_undo too. This is to avoid immediate
1528 invocation of purge as we need to club these 2 segments
1529 with same trx-no as single unit. */
1530 bool update_rseg_len = !(trx->rsegs.m_noredo.update_undo != nullptr);
1531
1532 /* Set flag if GTID information need to persist. */
1533 auto undo_ptr = &trx->rsegs.m_redo;
1534 trx_undo_gtid_set(trx, undo_ptr->update_undo);
1535
1536 trx_undo_update_cleanup(trx, undo_ptr, undo_hdr_page, update_rseg_len,
1537 (update_rseg_len ? 1 : 0), mtr);
1538 }
1539
1540 DBUG_EXECUTE_IF("ib_trx_crash_during_commit", DBUG_SUICIDE(););
1541
1542 if (trx->rsegs.m_noredo.update_undo != nullptr) {
1543 page_t *undo_hdr_page;
1544
1545 undo_hdr_page = trx_undo_set_state_at_finish(
1546 trx->rsegs.m_noredo.update_undo, &temp_mtr);
1547
1548 ulint n_added_logs = (redo_rseg_undo_ptr != nullptr) ? 2 : 1;
1549
1550 trx_undo_update_cleanup(trx, &trx->rsegs.m_noredo, undo_hdr_page, true,
1551 n_added_logs, &temp_mtr);
1552 }
1553 }
1554
1555 if (own_redo_rseg_mutex) {
1556 trx->rsegs.m_redo.rseg->unlatch();
1557 own_redo_rseg_mutex = false;
1558 }
1559
1560 if (own_temp_rseg_mutex) {
1561 trx->rsegs.m_noredo.rseg->unlatch();
1562 own_temp_rseg_mutex = false;
1563 mtr_commit(&temp_mtr);
1564 }
1565
1566 MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
1567
1568 /* Update the latest MySQL binlog name and offset information
1569 in trx sys header only if MySQL binary logging is on and clone
1570 is has ensured commit order at final stage. */
1571 if (Clone_handler::need_commit_order()) {
1572 trx_sys_update_mysql_binlog_offset(trx, mtr);
1573 }
1574
1575 return (serialised);
1576 }
1577
1578 /********************************************************************
1579 Finalize a transaction containing updates for a FTS table. */
trx_finalize_for_fts_table(fts_trx_table_t * ftt)1580 static void trx_finalize_for_fts_table(
1581 fts_trx_table_t *ftt) /* in: FTS trx table */
1582 {
1583 fts_t *fts = ftt->table->fts;
1584 fts_doc_ids_t *doc_ids = ftt->added_doc_ids;
1585
1586 mutex_enter(&fts->bg_threads_mutex);
1587
1588 if (fts->fts_status & BG_THREAD_STOP) {
1589 /* The table is about to be dropped, no use
1590 adding anything to its work queue. */
1591
1592 mutex_exit(&fts->bg_threads_mutex);
1593 } else {
1594 mem_heap_t *heap;
1595 mutex_exit(&fts->bg_threads_mutex);
1596
1597 ut_a(fts->add_wq);
1598
1599 heap = static_cast<mem_heap_t *>(doc_ids->self_heap->arg);
1600
1601 ib_wqueue_add(fts->add_wq, doc_ids, heap);
1602
1603 /* fts_trx_table_t no longer owns the list. */
1604 ftt->added_doc_ids = nullptr;
1605 }
1606 }
1607
1608 /** Finalize a transaction containing updates to FTS tables. */
trx_finalize_for_fts(trx_t * trx,bool is_commit)1609 static void trx_finalize_for_fts(
1610 trx_t *trx, /*!< in/out: transaction */
1611 bool is_commit) /*!< in: true if the transaction was
1612 committed, false if it was rolled back. */
1613 {
1614 if (is_commit) {
1615 const ib_rbt_node_t *node;
1616 ib_rbt_t *tables;
1617 fts_savepoint_t *savepoint;
1618
1619 savepoint = static_cast<fts_savepoint_t *>(
1620 ib_vector_last(trx->fts_trx->savepoints));
1621
1622 tables = savepoint->tables;
1623
1624 for (node = rbt_first(tables); node; node = rbt_next(tables, node)) {
1625 fts_trx_table_t **ftt;
1626
1627 ftt = rbt_value(fts_trx_table_t *, node);
1628
1629 if ((*ftt)->added_doc_ids) {
1630 trx_finalize_for_fts_table(*ftt);
1631 }
1632 }
1633 }
1634
1635 fts_trx_free(trx->fts_trx);
1636 trx->fts_trx = nullptr;
1637 }
1638
1639 /** If required, flushes the log to disk based on the value of
1640 innodb_flush_log_at_trx_commit. */
trx_flush_log_if_needed_low(lsn_t lsn)1641 static void trx_flush_log_if_needed_low(lsn_t lsn) /*!< in: lsn up to which logs
1642 are to be flushed. */
1643 {
1644 #ifdef _WIN32
1645 bool flush = true;
1646 #else
1647 bool flush = srv_unix_file_flush_method != SRV_UNIX_NOSYNC;
1648 #endif /* _WIN32 */
1649
1650 Wait_stats wait_stats;
1651
1652 switch (srv_flush_log_at_trx_commit) {
1653 case 2:
1654 /* Write the log but do not flush it to disk */
1655 flush = false;
1656 /* fall through */
1657 case 1:
1658 /* Write the log and optionally flush it to disk */
1659 wait_stats = log_write_up_to(*log_sys, lsn, flush);
1660
1661 MONITOR_INC_WAIT_STATS(MONITOR_TRX_ON_LOG_, wait_stats);
1662
1663 return;
1664 case 0:
1665 /* Do nothing */
1666 return;
1667 }
1668 }
1669
1670 /** If required, flushes the log to disk based on the value of
1671 innodb_flush_log_at_trx_commit. */
trx_flush_log_if_needed(lsn_t lsn,trx_t * trx)1672 static void trx_flush_log_if_needed(lsn_t lsn, /*!< in: lsn up to which logs are
1673 to be flushed. */
1674 trx_t *trx) /*!< in/out: transaction */
1675 {
1676 trx->op_info = "flushing log";
1677
1678 DEBUG_SYNC_C("trx_flush_log_if_needed");
1679
1680 if (trx->ddl_operation || trx->ddl_must_flush) {
1681 log_write_up_to(*log_sys, lsn, true);
1682 } else {
1683 trx_flush_log_if_needed_low(lsn);
1684 }
1685
1686 trx->op_info = "";
1687 }
1688
1689 /** For each table that has been modified by the given transaction: update
1690 its dict_table_t::update_time with the current timestamp. Clear the list
1691 of the modified tables at the end. */
trx_update_mod_tables_timestamp(trx_t * trx)1692 static void trx_update_mod_tables_timestamp(trx_t *trx) /*!< in: transaction */
1693 {
1694 ut_ad(trx->id != 0);
1695
1696 /* consider using trx->start_time if calling time() is too
1697 expensive here */
1698 time_t now = ut_time();
1699
1700 trx_mod_tables_t::const_iterator end = trx->mod_tables.end();
1701
1702 for (trx_mod_tables_t::const_iterator it = trx->mod_tables.begin(); it != end;
1703 ++it) {
1704 /* This could be executed by multiple threads concurrently
1705 on the same table object. This is fine because time_t is
1706 word size or less. And _purely_ _theoretically_, even if
1707 time_t write is not atomic, likely the value of 'now' is
1708 the same in all threads and even if it is not, getting a
1709 "garbage" in table->update_time is justified because
1710 protecting it with a latch here would be too performance
1711 intrusive. */
1712 (*it)->update_time = now;
1713 }
1714
1715 trx->mod_tables.clear();
1716 }
1717
1718 /**
1719 Erase the transaction from running transaction lists and serialization
1720 list. Active RW transaction list of a MVCC snapshot(ReadView::prepare)
1721 won't include this transaction after this call. All implicit locks are
1722 also released by this call as trx is removed from rw_trx_list.
1723 @param[in] trx Transaction to erase, must have an ID > 0
1724 @param[in] serialised true if serialisation log was written
1725 @param[in] gtid_desc GTID information to persist */
trx_erase_lists(trx_t * trx,bool serialised,Gtid_desc & gtid_desc)1726 static void trx_erase_lists(trx_t *trx, bool serialised, Gtid_desc >id_desc) {
1727 ut_ad(trx->id > 0);
1728 ut_ad(trx_sys_mutex_own());
1729
1730 if (serialised) {
1731 UT_LIST_REMOVE(trx_sys->serialisation_list, trx);
1732
1733 /* Add GTID to be persisted to disk table. It must be done ...
1734 1.After the transaction is marked committed in undo. Otherwise
1735 GTID might get committed before the transaction commit on disk.
1736 2.Before it is removed from serialization list. Otherwise the transaction
1737 undo could get purged before persisting GTID on disk table. */
1738 if (gtid_desc.m_is_set) {
1739 auto >id_persistor = clone_sys->get_gtid_persistor();
1740 gtid_persistor.add(gtid_desc);
1741 }
1742 }
1743
1744 trx_ids_t::iterator it = std::lower_bound(trx_sys->rw_trx_ids.begin(),
1745 trx_sys->rw_trx_ids.end(), trx->id);
1746 ut_ad(*it == trx->id);
1747 trx_sys->rw_trx_ids.erase(it);
1748
1749 if (trx->read_only || trx->rsegs.m_redo.rseg == nullptr) {
1750 ut_ad(!trx->in_rw_trx_list);
1751 } else {
1752 UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
1753 ut_d(trx->in_rw_trx_list = false);
1754 ut_ad(trx_sys_validate_trx_list());
1755
1756 if (trx->read_view != nullptr) {
1757 trx_sys->mvcc->view_close(trx->read_view, true);
1758 }
1759 }
1760
1761 trx_sys->rw_trx_set.erase(TrxTrack(trx->id));
1762
1763 /* Set minimal active trx id. */
1764 trx_id_t min_id = trx_sys->rw_trx_ids.empty() ? trx_sys->max_trx_id
1765 : trx_sys->rw_trx_ids.front();
1766
1767 trx_sys->min_active_id.store(min_id);
1768 }
1769
trx_release_impl_and_expl_locks(trx_t * trx,bool serialized)1770 static void trx_release_impl_and_expl_locks(trx_t *trx, bool serialized) {
1771 check_trx_state(trx);
1772 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) ||
1773 trx_state_eq(trx, TRX_STATE_PREPARED));
1774
1775 bool trx_sys_latch_is_needed =
1776 (trx->id > 0) || trx_state_eq(trx, TRX_STATE_PREPARED);
1777
1778 /* Check and get GTID to be persisted. Do it outside trx_sys mutex. */
1779 Gtid_desc gtid_desc;
1780 auto >id_persistor = clone_sys->get_gtid_persistor();
1781 gtid_persistor.get_gtid_info(trx, gtid_desc);
1782
1783 if (trx_sys_latch_is_needed) {
1784 trx_sys_mutex_enter();
1785 }
1786
1787 if (trx->id > 0) {
1788 /* For consistent snapshot, we need to remove current
1789 transaction from running transaction id list for mvcc
1790 before doing commit and releasing locks. */
1791 trx_erase_lists(trx, serialized, gtid_desc);
1792 }
1793
1794 if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
1795 ut_a(trx_sys->n_prepared_trx > 0);
1796 --trx_sys->n_prepared_trx;
1797 }
1798
1799 trx_mutex_enter(trx);
1800 /* Please consider this particular point in time as the moment the trx's
1801 implicit locks become released.
1802 This change is protected by both trx_sys->mutex and trx->mutex.
1803 Therefore, there are two secure ways to check if the trx still can hold
1804 implicit locks:
1805 (1) if you only know id of the trx, then you can obtain trx_sys->mutex and
1806 check if trx is still in rw_trx_set. This works, because the call to
1807 trx_erase_list() which removes trx from this list several lines above is
1808 also protected by trx_sys->mutex. We use this approach in
1809 lock_rec_convert_impl_to_expl() by using trx_rw_is_active()
1810 (2) if you have pointer to trx, and you know it is safe to access (say, you
1811 hold reference to this trx which prevents it from being freed) then you
1812 can obtain trx->mutex and check if trx->state is equal to
1813 TRX_STATE_COMMITTED_IN_MEMORY. We use this approach in
1814 lock_rec_convert_impl_to_expl_for_trx() when deciding for the final time
1815 if we really want to create explicit lock on behalf of implicit lock
1816 holder. */
1817 trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
1818 trx_mutex_exit(trx);
1819
1820 if (trx_sys_latch_is_needed) {
1821 trx_sys_mutex_exit();
1822 }
1823
1824 lock_trx_release_locks(trx);
1825 }
1826
1827 /** Commits a transaction in memory. */
trx_commit_in_memory(trx_t * trx,const mtr_t * mtr,bool serialised)1828 static void trx_commit_in_memory(
1829 trx_t *trx, /*!< in/out: transaction */
1830 const mtr_t *mtr, /*!< in: mini-transaction of
1831 trx_write_serialisation_history(), or NULL if
1832 the transaction did not modify anything */
1833 bool serialised)
1834 /*!< in: true if serialisation log was
1835 written */
1836 {
1837 trx->must_flush_log_later = false;
1838 trx->ddl_must_flush = false;
1839
1840 if (trx_is_autocommit_non_locking(trx)) {
1841 ut_ad(trx->id == 0);
1842 ut_ad(trx->read_only);
1843 ut_a(!trx->is_recovered);
1844 ut_ad(trx->rsegs.m_redo.rseg == nullptr);
1845 ut_ad(!trx->in_rw_trx_list);
1846
1847 /* Note: We are asserting without holding the locksys latch. But
1848 that is OK because this transaction is not waiting and cannot
1849 be rolled back and no new locks can (or should not) be added
1850 because it is flagged as a non-locking read-only transaction. */
1851
1852 ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1853
1854 /* This state change is not protected by any mutex, therefore
1855 there is an inherent race here around state transition during
1856 printouts. We ignore this race for the sake of efficiency.
1857 However, the trx_sys_t::mutex will protect the trx_t instance
1858 and it cannot be removed from the mysql_trx_list and freed
1859 without first acquiring the trx_sys_t::mutex. */
1860
1861 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
1862
1863 if (trx->read_view != nullptr) {
1864 trx_sys->mvcc->view_close(trx->read_view, false);
1865 }
1866
1867 MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
1868
1869 /* AC-NL-RO transactions can't be rolled back asynchronously. */
1870 ut_ad(!trx->abort);
1871 ut_ad(!(trx->in_innodb & (TRX_FORCE_ROLLBACK | TRX_FORCE_ROLLBACK_ASYNC)));
1872
1873 trx->state = TRX_STATE_NOT_STARTED;
1874
1875 } else {
1876 trx_release_impl_and_expl_locks(trx, serialised);
1877
1878 /* Remove the transaction from the list of active
1879 transactions now that it no longer holds any user locks. */
1880
1881 ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
1882 DEBUG_SYNC_C("after_trx_committed_in_memory");
1883
1884 if (trx->read_only || trx->rsegs.m_redo.rseg == nullptr) {
1885 MONITOR_INC(MONITOR_TRX_RO_COMMIT);
1886 if (trx->read_view != nullptr) {
1887 trx_sys->mvcc->view_close(trx->read_view, false);
1888 }
1889
1890 } else {
1891 ut_ad(trx->id > 0);
1892 MONITOR_INC(MONITOR_TRX_RW_COMMIT);
1893 }
1894 }
1895
1896 if (trx->rsegs.m_redo.rseg != nullptr) {
1897 trx_rseg_t *rseg = trx->rsegs.m_redo.rseg;
1898 ut_ad(rseg->trx_ref_count > 0);
1899
1900 /* Multiple transactions can simultaneously decrement
1901 the atomic counter. */
1902 rseg->trx_ref_count--;
1903 }
1904
1905 /* Reset flag that SE persists GTID. */
1906 auto >id_persistor = clone_sys->get_gtid_persistor();
1907 gtid_persistor.set_persist_gtid(trx, false);
1908
1909 if (mtr != nullptr) {
1910 if (trx->rsegs.m_redo.insert_undo != nullptr) {
1911 trx_undo_insert_cleanup(&trx->rsegs.m_redo, false);
1912 }
1913
1914 if (trx->rsegs.m_noredo.insert_undo != nullptr) {
1915 trx_undo_insert_cleanup(&trx->rsegs.m_noredo, true);
1916 }
1917
1918 /* NOTE that we could possibly make a group commit more
1919 efficient here: call os_thread_yield here to allow also other
1920 trxs to come to commit! */
1921
1922 /*-------------------------------------*/
1923
1924 /* Depending on the my.cnf options, we may now write the log
1925 buffer to the log files, making the transaction durable if
1926 the OS does not crash. We may also flush the log files to
1927 disk, making the transaction durable also at an OS crash or a
1928 power outage.
1929
1930 The idea in InnoDB's group commit is that a group of
1931 transactions gather behind a trx doing a physical disk write
1932 to log files, and when that physical write has been completed,
1933 one of those transactions does a write which commits the whole
1934 group. Note that this group commit will only bring benefit if
1935 there are > 2 users in the database. Then at least 2 users can
1936 gather behind one doing the physical log write to disk.
1937
1938 If we are calling trx_commit() under prepare_commit_mutex, we
1939 will delay possible log write and flush to a separate function
1940 trx_commit_complete_for_mysql(), which is only called when the
1941 thread has released the mutex. This is to make the
1942 group commit algorithm to work. Otherwise, the prepare_commit
1943 mutex would serialize all commits and prevent a group of
1944 transactions from gathering. */
1945
1946 lsn_t lsn = mtr->commit_lsn();
1947
1948 if (lsn == 0) {
1949 /* Nothing to be done. */
1950 } else if (trx->flush_log_later) {
1951 /* Do nothing yet */
1952 trx->must_flush_log_later = true;
1953
1954 /* Remember current ddl_operation, because trx_init()
1955 later will set ddl_operation to false. And the final
1956 flush is even later. */
1957 trx->ddl_must_flush = trx->ddl_operation;
1958 } else if ((srv_flush_log_at_trx_commit == 0 ||
1959 thd_requested_durability(trx->mysql_thd) ==
1960 HA_IGNORE_DURABILITY) &&
1961 (!trx->ddl_operation)) {
1962 /* Do nothing */
1963 } else {
1964 trx_flush_log_if_needed(lsn, trx);
1965 }
1966
1967 trx->commit_lsn = lsn;
1968
1969 /* Tell server some activity has happened, since the trx
1970 does changes something. Background utility threads like
1971 master thread, purge thread or page_cleaner thread might
1972 have some work to do. */
1973 srv_active_wake_master_thread();
1974 }
1975
1976 /* Free all savepoints, starting from the first. */
1977 trx_named_savept_t *savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
1978
1979 trx_roll_savepoints_free(trx, savep);
1980
1981 if (trx->fts_trx != nullptr) {
1982 trx_finalize_for_fts(trx, trx->undo_no != 0);
1983 }
1984
1985 trx_mutex_enter(trx);
1986 trx->dict_operation = TRX_DICT_OP_NONE;
1987
1988 /* Because we can rollback transactions asynchronously, we change
1989 the state at the last step. trx_t::abort cannot change once commit
1990 or rollback has started because we will have released the locks by
1991 the time we get here. */
1992
1993 if (trx->abort) {
1994 trx->abort = false;
1995 trx->state = TRX_STATE_FORCED_ROLLBACK;
1996 } else {
1997 trx->state = TRX_STATE_NOT_STARTED;
1998 }
1999
2000 /* trx->in_mysql_trx_list would hold between
2001 trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
2002 hold for recovered transactions or system transactions. */
2003 assert_trx_is_free(trx);
2004
2005 trx_init(trx);
2006
2007 trx_mutex_exit(trx);
2008
2009 ut_a(trx->error_state == DB_SUCCESS);
2010 }
2011
2012 /** Commits a transaction and a mini-transaction. */
trx_commit_low(trx_t * trx,mtr_t * mtr)2013 void trx_commit_low(
2014 trx_t *trx, /*!< in/out: transaction */
2015 mtr_t *mtr) /*!< in/out: mini-transaction (will be committed),
2016 or NULL if trx made no modifications */
2017 {
2018 assert_trx_nonlocking_or_in_list(trx);
2019 ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
2020 ut_ad(!mtr || mtr->is_active());
2021 /* undo_no is non-zero if we're doing the final commit. */
2022 if (trx->fts_trx != nullptr && trx->undo_no != 0 &&
2023 trx->lock.que_state != TRX_QUE_ROLLING_BACK) {
2024 dberr_t error;
2025
2026 ut_a(!trx_is_autocommit_non_locking(trx));
2027
2028 error = fts_commit(trx);
2029
2030 /* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY
2031 instead of dying. This is a possible scenario if there
2032 is a crash between insert to DELETED table committing
2033 and transaction committing. The fix would be able to
2034 return error from this function */
2035 if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) {
2036 /* FTS-FIXME: once we can return values from this
2037 function, we should do so and signal an error
2038 instead of just dying. */
2039
2040 ut_error;
2041 }
2042 }
2043
2044 bool serialised;
2045
2046 if (mtr != nullptr) {
2047 mtr->set_sync();
2048
2049 serialised = trx_write_serialisation_history(trx, mtr);
2050
2051 /* The following call commits the mini-transaction, making the
2052 whole transaction committed in the file-based world, at this
2053 log sequence number. The transaction becomes 'durable' when
2054 we write the log to disk, but in the logical sense the commit
2055 in the file-based data structures (undo logs etc.) happens
2056 here.
2057
2058 NOTE that transaction numbers, which are assigned only to
2059 transactions with an update undo log, do not necessarily come
2060 in exactly the same order as commit lsn's, if the transactions
2061 have different rollback segments. To get exactly the same
2062 order we should hold the kernel mutex up to this point,
2063 adding to the contention of the kernel mutex. However, if
2064 a transaction T2 is able to see modifications made by
2065 a transaction T1, T2 will always get a bigger transaction
2066 number and a bigger commit lsn than T1. */
2067
2068 /*--------------*/
2069
2070 DBUG_EXECUTE_IF("trx_commit_to_the_end_of_log_block", {
2071 const size_t space_left = mtr->get_expected_log_size();
2072 mtr_commit_mlog_test_filling_block(*log_sys, space_left);
2073 });
2074
2075 mtr_commit(mtr);
2076
2077 DBUG_PRINT("trx_commit", ("commit lsn at " LSN_PF, mtr->commit_lsn()));
2078
2079 DBUG_EXECUTE_IF(
2080 "ib_crash_during_trx_commit_in_mem", if (trx_is_rseg_updated(trx)) {
2081 log_make_latest_checkpoint();
2082 DBUG_SUICIDE();
2083 });
2084 /*--------------*/
2085
2086 } else {
2087 serialised = false;
2088 }
2089 #ifdef UNIV_DEBUG
2090 /* In case of this function is called from a stack executing
2091 THD::release_resources -> ...
2092 innobase_connection_close() ->
2093 trx_rollback_for_mysql... -> .
2094 mysql's thd does not seem to have
2095 thd->debug_sync_control defined any longer. However the stack
2096 is possible only with a prepared trx not updating any data.
2097 */
2098 if (trx->mysql_thd != nullptr && trx_is_redo_rseg_updated(trx)) {
2099 DEBUG_SYNC_C("before_trx_state_committed_in_memory");
2100 }
2101 #endif
2102
2103 trx_commit_in_memory(trx, mtr, serialised);
2104 }
2105
2106 /** Commits a transaction. */
trx_commit(trx_t * trx)2107 void trx_commit(trx_t *trx) /*!< in/out: transaction */
2108 {
2109 mtr_t *mtr;
2110 mtr_t local_mtr;
2111
2112 DBUG_EXECUTE_IF("ib_trx_commit_crash_before_trx_commit_start",
2113 DBUG_SUICIDE(););
2114
2115 if (trx_is_rseg_updated(trx)) {
2116 mtr = &local_mtr;
2117
2118 DBUG_EXECUTE_IF("ib_trx_commit_crash_rseg_updated", DBUG_SUICIDE(););
2119
2120 mtr_start_sync(mtr);
2121
2122 } else {
2123 mtr = nullptr;
2124 }
2125
2126 trx_commit_low(trx, mtr);
2127 }
2128
2129 /** Cleans up a transaction at database startup. The cleanup is needed if
2130 the transaction already got to the middle of a commit when the database
2131 crashed, and we cannot roll it back. */
trx_cleanup_at_db_startup(trx_t * trx)2132 void trx_cleanup_at_db_startup(trx_t *trx) /*!< in: transaction */
2133 {
2134 ut_ad(trx->is_recovered);
2135
2136 /* Cleanup any durable undo logs in non-temporary rollback segments.
2137 At database start-up there are no active transactions recorded in
2138 any rollback segments in the temporary tablespace because all those
2139 changes are all lost on restart. */
2140 if (trx->rsegs.m_redo.insert_undo != nullptr) {
2141 trx_undo_insert_cleanup(&trx->rsegs.m_redo, false);
2142 }
2143
2144 memset(&trx->rsegs, 0x0, sizeof(trx->rsegs));
2145 trx->undo_no = 0;
2146 trx->undo_rseg_space = 0;
2147 trx->last_sql_stat_start.least_undo_no = 0;
2148
2149 trx_sys_mutex_enter();
2150
2151 ut_a(!trx->read_only);
2152
2153 UT_LIST_REMOVE(trx_sys->rw_trx_list, trx);
2154
2155 ut_d(trx->in_rw_trx_list = FALSE);
2156
2157 trx_sys_mutex_exit();
2158
2159 /* Change the transaction state without mutex protection, now
2160 that it no longer is in the trx_list. Recovered transactions
2161 are never placed in the mysql_trx_list. */
2162 ut_ad(trx->is_recovered);
2163 ut_ad(!trx->in_rw_trx_list);
2164 ut_ad(!trx->in_mysql_trx_list);
2165 trx->state = TRX_STATE_NOT_STARTED;
2166 }
2167
2168 /** Assigns a read view for a consistent read query. All the consistent reads
2169 within the same transaction will get the same read view, which is created
2170 when this function is first called for a new started transaction.
2171 @return consistent read view */
trx_assign_read_view(trx_t * trx)2172 ReadView *trx_assign_read_view(trx_t *trx) /*!< in/out: active transaction */
2173 {
2174 ut_ad(trx->state == TRX_STATE_ACTIVE);
2175
2176 if (srv_read_only_mode) {
2177 ut_ad(trx->read_view == nullptr);
2178 return (nullptr);
2179
2180 } else if (!MVCC::is_view_active(trx->read_view)) {
2181 trx_sys->mvcc->view_open(trx->read_view, trx);
2182 }
2183
2184 return (trx->read_view);
2185 }
2186
2187 /** Prepares a transaction for commit/rollback. */
trx_commit_or_rollback_prepare(trx_t * trx)2188 void trx_commit_or_rollback_prepare(trx_t *trx) /*!< in/out: transaction */
2189 {
2190 /* We are reading trx->state without holding trx_sys->mutex
2191 here, because the commit or rollback should be invoked for a
2192 running (or recovered prepared) transaction that is associated
2193 with the current thread. */
2194
2195 switch (trx->state) {
2196 case TRX_STATE_NOT_STARTED:
2197 case TRX_STATE_FORCED_ROLLBACK:
2198
2199 trx_start_low(trx, true);
2200 /* fall through */
2201
2202 case TRX_STATE_ACTIVE:
2203 case TRX_STATE_PREPARED:
2204
2205 /* If the trx is in a lock wait state, moves the waiting
2206 query thread to the suspended state */
2207
2208 if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
2209 ut_a(trx->lock.wait_thr != nullptr);
2210 trx->lock.wait_thr->state = QUE_THR_SUSPENDED;
2211 trx->lock.wait_thr = nullptr;
2212
2213 trx->lock.que_state = TRX_QUE_RUNNING;
2214 }
2215
2216 ut_a(trx->lock.n_active_thrs == 1);
2217 return;
2218
2219 case TRX_STATE_COMMITTED_IN_MEMORY:
2220 break;
2221 }
2222
2223 ut_error;
2224 }
2225
2226 /** Creates a commit command node struct.
2227 @return own: commit node struct */
trx_commit_node_create(mem_heap_t * heap)2228 commit_node_t *trx_commit_node_create(
2229 mem_heap_t *heap) /*!< in: mem heap where created */
2230 {
2231 commit_node_t *node;
2232
2233 node = static_cast<commit_node_t *>(mem_heap_alloc(heap, sizeof(*node)));
2234 node->common.type = QUE_NODE_COMMIT;
2235 node->state = COMMIT_NODE_SEND;
2236
2237 return (node);
2238 }
2239
2240 /** Performs an execution step for a commit type node in a query graph.
2241 @return query thread to run next, or NULL */
trx_commit_step(que_thr_t * thr)2242 que_thr_t *trx_commit_step(que_thr_t *thr) /*!< in: query thread */
2243 {
2244 commit_node_t *node;
2245
2246 node = static_cast<commit_node_t *>(thr->run_node);
2247
2248 ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
2249
2250 if (thr->prev_node == que_node_get_parent(node)) {
2251 node->state = COMMIT_NODE_SEND;
2252 }
2253
2254 if (node->state == COMMIT_NODE_SEND) {
2255 trx_t *trx;
2256
2257 node->state = COMMIT_NODE_WAIT;
2258
2259 trx = thr_get_trx(thr);
2260
2261 ut_a(trx->lock.wait_thr == nullptr);
2262 ut_a(trx->lock.que_state != TRX_QUE_LOCK_WAIT);
2263
2264 trx_commit_or_rollback_prepare(trx);
2265
2266 trx->lock.que_state = TRX_QUE_COMMITTING;
2267
2268 trx_commit(trx);
2269
2270 ut_ad(trx->lock.wait_thr == nullptr);
2271
2272 trx->lock.que_state = TRX_QUE_RUNNING;
2273
2274 thr = nullptr;
2275 } else {
2276 ut_ad(node->state == COMMIT_NODE_WAIT);
2277
2278 node->state = COMMIT_NODE_SEND;
2279
2280 thr->run_node = que_node_get_parent(node);
2281 }
2282
2283 return (thr);
2284 }
2285
2286 /** Does the transaction commit for MySQL.
2287 @return DB_SUCCESS or error number */
trx_commit_for_mysql(trx_t * trx)2288 dberr_t trx_commit_for_mysql(trx_t *trx) /*!< in/out: transaction */
2289 {
2290 DEBUG_SYNC_C("trx_commit_for_mysql_checks_for_aborted");
2291 TrxInInnoDB trx_in_innodb(trx, true);
2292
2293 if (trx_in_innodb.is_aborted() && trx->killed_by != os_thread_get_curr_id()) {
2294 return (DB_FORCED_ABORT);
2295 }
2296
2297 /* Because we do not do the commit by sending an Innobase
2298 sig to the transaction, we must here make sure that trx has been
2299 started. */
2300
2301 dberr_t db_err = DB_SUCCESS;
2302
2303 switch (trx->state) {
2304 case TRX_STATE_NOT_STARTED:
2305 case TRX_STATE_FORCED_ROLLBACK:
2306
2307 ut_d(trx->start_file = __FILE__);
2308 ut_d(trx->start_line = __LINE__);
2309
2310 trx_start_low(trx, true);
2311 /* fall through */
2312 case TRX_STATE_ACTIVE:
2313 case TRX_STATE_PREPARED:
2314 trx->op_info = "committing";
2315
2316 /* For GTID persistence we need update undo segment. */
2317 db_err = trx_undo_gtid_add_update_undo(trx, false, false);
2318 if (db_err != DB_SUCCESS) {
2319 return (db_err);
2320 }
2321
2322 /* Flush prepare GTID for XA prepared transactions. */
2323 trx_undo_gtid_flush_prepare(trx);
2324
2325 if (trx->id != 0) {
2326 trx_update_mod_tables_timestamp(trx);
2327 }
2328
2329 trx_commit(trx);
2330
2331 MONITOR_DEC(MONITOR_TRX_ACTIVE);
2332 trx->op_info = "";
2333 return (DB_SUCCESS);
2334 case TRX_STATE_COMMITTED_IN_MEMORY:
2335 break;
2336 }
2337 ut_error;
2338 return (DB_CORRUPTION);
2339 }
2340
2341 /** If required, flushes the log to disk if we called trx_commit_for_mysql()
2342 with trx->flush_log_later == TRUE. */
trx_commit_complete_for_mysql(trx_t * trx)2343 void trx_commit_complete_for_mysql(trx_t *trx) /*!< in/out: transaction */
2344 {
2345 if (trx->id != 0 || !trx->must_flush_log_later ||
2346 (thd_requested_durability(trx->mysql_thd) == HA_IGNORE_DURABILITY &&
2347 !trx->ddl_must_flush)) {
2348 /* If we removed trx->ddl_must_flush from condition above, we would
2349 need to take care of fixing innobase_flush_logs for a scenario in
2350 which srv_flush_log_at_trx_commit == 0. */
2351 return;
2352 }
2353
2354 trx_flush_log_if_needed(trx->commit_lsn, trx);
2355
2356 trx->must_flush_log_later = false;
2357 trx->ddl_must_flush = false;
2358 }
2359
2360 /** Marks the latest SQL statement ended. */
trx_mark_sql_stat_end(trx_t * trx)2361 void trx_mark_sql_stat_end(trx_t *trx) /*!< in: trx handle */
2362 {
2363 ut_a(trx);
2364
2365 lock_on_statement_end(trx);
2366
2367 switch (trx->state) {
2368 case TRX_STATE_PREPARED:
2369 case TRX_STATE_COMMITTED_IN_MEMORY:
2370 break;
2371 case TRX_STATE_NOT_STARTED:
2372 case TRX_STATE_FORCED_ROLLBACK:
2373 trx->undo_no = 0;
2374 trx->undo_rseg_space = 0;
2375 /* fall through */
2376 case TRX_STATE_ACTIVE:
2377 trx->last_sql_stat_start.least_undo_no = trx->undo_no;
2378
2379 if (trx->fts_trx != nullptr) {
2380 fts_savepoint_laststmt_refresh(trx);
2381 }
2382
2383 return;
2384 }
2385
2386 ut_error;
2387 }
2388
2389 /** Prints info about a transaction.
2390 Caller must hold trx_sys->mutex. */
trx_print_low(FILE * f,const trx_t * trx,ulint max_query_len,ulint n_rec_locks,ulint n_trx_locks,ulint heap_size)2391 void trx_print_low(FILE *f,
2392 /*!< in: output stream */
2393 const trx_t *trx,
2394 /*!< in: transaction */
2395 ulint max_query_len,
2396 /*!< in: max query length to print,
2397 or 0 to use the default max length */
2398 ulint n_rec_locks,
2399 /*!< in: lock_number_of_rows_locked(&trx->lock) */
2400 ulint n_trx_locks,
2401 /*!< in: length of trx->lock.trx_locks */
2402 ulint heap_size)
2403 /*!< in: mem_heap_get_size(trx->lock.lock_heap) */
2404 {
2405 ibool newline;
2406 const char *op_info;
2407
2408 ut_ad(trx_sys_mutex_own());
2409
2410 fprintf(f, "TRANSACTION " TRX_ID_FMT, trx_get_id_for_print(trx));
2411
2412 /* trx->state cannot change from or to NOT_STARTED while we
2413 are holding the trx_sys->mutex. It may change from ACTIVE to
2414 PREPARED or COMMITTED. */
2415 switch (trx->state) {
2416 case TRX_STATE_NOT_STARTED:
2417 fputs(", not started", f);
2418 goto state_ok;
2419 case TRX_STATE_FORCED_ROLLBACK:
2420 fputs(", forced rollback", f);
2421 goto state_ok;
2422 case TRX_STATE_ACTIVE:
2423 fprintf(f, ", ACTIVE %lu sec",
2424 (ulong)difftime(time(nullptr), trx->start_time));
2425 goto state_ok;
2426 case TRX_STATE_PREPARED:
2427 fprintf(f, ", ACTIVE (PREPARED) %lu sec",
2428 (ulong)difftime(time(nullptr), trx->start_time));
2429 goto state_ok;
2430 case TRX_STATE_COMMITTED_IN_MEMORY:
2431 fputs(", COMMITTED IN MEMORY", f);
2432 goto state_ok;
2433 }
2434 fprintf(f, ", state %lu", (ulong)trx->state);
2435 ut_ad(0);
2436 state_ok:
2437
2438 /* prevent a race condition */
2439 op_info = trx->op_info;
2440
2441 if (*op_info) {
2442 putc(' ', f);
2443 fputs(op_info, f);
2444 }
2445
2446 if (trx->is_recovered) {
2447 fputs(" recovered trx", f);
2448 }
2449
2450 if (trx->declared_to_be_inside_innodb) {
2451 fprintf(f, ", thread declared inside InnoDB %lu",
2452 (ulong)trx->n_tickets_to_enter_innodb);
2453 }
2454
2455 putc('\n', f);
2456
2457 if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
2458 fprintf(f, "mysql tables in use %lu, locked %lu\n",
2459 (ulong)trx->n_mysql_tables_in_use,
2460 (ulong)trx->mysql_n_tables_locked);
2461 }
2462
2463 newline = TRUE;
2464
2465 /* trx->lock.que_state of an ACTIVE transaction may change
2466 while we are not holding trx->mutex. We perform a dirty read
2467 for performance reasons. */
2468
2469 switch (trx->lock.que_state) {
2470 case TRX_QUE_RUNNING:
2471 newline = FALSE;
2472 break;
2473 case TRX_QUE_LOCK_WAIT:
2474 fputs("LOCK WAIT ", f);
2475 break;
2476 case TRX_QUE_ROLLING_BACK:
2477 fputs("ROLLING BACK ", f);
2478 break;
2479 case TRX_QUE_COMMITTING:
2480 fputs("COMMITTING ", f);
2481 break;
2482 default:
2483 fprintf(f, "que state %lu ", (ulong)trx->lock.que_state);
2484 }
2485
2486 if (n_trx_locks > 0 || heap_size > 400) {
2487 newline = TRUE;
2488
2489 fprintf(f,
2490 "%lu lock struct(s), heap size %lu,"
2491 " %lu row lock(s)",
2492 (ulong)n_trx_locks, (ulong)heap_size, (ulong)n_rec_locks);
2493 }
2494
2495 if (trx->has_search_latch) {
2496 newline = TRUE;
2497 fputs(", holds adaptive hash latch", f);
2498 }
2499
2500 if (trx->undo_no != 0) {
2501 newline = TRUE;
2502 fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
2503 }
2504
2505 if (newline) {
2506 putc('\n', f);
2507 }
2508
2509 if (trx->state != TRX_STATE_NOT_STARTED && trx->mysql_thd != nullptr) {
2510 innobase_mysql_print_thd(f, trx->mysql_thd,
2511 static_cast<uint>(max_query_len));
2512 }
2513 }
2514
trx_print_latched(FILE * f,const trx_t * trx,ulint max_query_len)2515 void trx_print_latched(FILE *f, const trx_t *trx, ulint max_query_len) {
2516 /* We need exclusive access to lock_sys for lock_number_of_rows_locked(),
2517 and accessing trx->lock fields without trx->mutex.*/
2518 ut_ad(locksys::owns_exclusive_global_latch());
2519 ut_ad(trx_sys_mutex_own());
2520
2521 trx_print_low(f, trx, max_query_len, lock_number_of_rows_locked(&trx->lock),
2522 UT_LIST_GET_LEN(trx->lock.trx_locks),
2523 mem_heap_get_size(trx->lock.lock_heap));
2524 }
2525
trx_print(FILE * f,const trx_t * trx,ulint max_query_len)2526 void trx_print(FILE *f, const trx_t *trx, ulint max_query_len) {
2527 /* trx_print_latched() requires exclusive global latch */
2528 locksys::Global_exclusive_latch_guard guard{};
2529 mutex_enter(&trx_sys->mutex);
2530 trx_print_latched(f, trx, max_query_len);
2531 mutex_exit(&trx_sys->mutex);
2532 }
2533
2534 #ifdef UNIV_DEBUG
trx_can_be_handled_by_current_thread(const trx_t * trx)2535 bool trx_can_be_handled_by_current_thread(const trx_t *trx) {
2536 return (trx->mysql_thd == nullptr || trx->mysql_thd == current_thd);
2537 }
2538
2539 /** Asserts that a transaction has been started.
2540 The caller must hold trx_sys->mutex.
2541 @return true if started */
trx_assert_started(const trx_t * trx)2542 ibool trx_assert_started(const trx_t *trx) /*!< in: transaction */
2543 {
2544 ut_ad(trx_sys_mutex_own());
2545
2546 /* Non-locking autocommits should not hold any locks and this
2547 function is only called from the locking code. */
2548 check_trx_state(trx);
2549
2550 /* trx->state can change from or to NOT_STARTED while we are holding
2551 trx_sys->mutex for non-locking autocommit selects but not for other
2552 types of transactions. It may change from ACTIVE to PREPARED. */
2553
2554 switch (trx->state) {
2555 case TRX_STATE_PREPARED:
2556 return (TRUE);
2557
2558 case TRX_STATE_ACTIVE:
2559 case TRX_STATE_COMMITTED_IN_MEMORY:
2560 return (TRUE);
2561
2562 case TRX_STATE_NOT_STARTED:
2563 case TRX_STATE_FORCED_ROLLBACK:
2564 break;
2565 }
2566
2567 ut_error;
2568 }
2569
2570 /*
2571 Interaction between Lock-sys and trx->mutex-es is rather complicated.
2572 In particular we allow a thread performing Lock-sys operations to request
2573 another trx->mutex even though it already holds one for a different trx.
2574 Therefore one has to prove that it is impossible to form a deadlock cycle in the
2575 imaginary wait-for-graph in which edges go from thread trying to obtain
2576 trx->mutex to a thread which holds it at the moment.
2577
2578 In the past it was simple, because Lock-sys was protected by a global mutex,
2579 which meant that there was at most one thread which could try to posses more
2580 than one trx->mutex - one can not form a cycle in a graph in which only
2581 one node has both incoming and outgoing edges.
2582
2583 Today it is much harder to prove, because we have sharded the Lock-sys mutex,
2584 and now multiple threads can perform Lock-sys operations in parallel, as long
2585 as they happen in different shards.
2586
2587 Here's my attempt at the proof.
2588
2589 Assumption 1.
2590 If a thread attempts to acquire more then one trx->mutex, then it either has
2591 exclusive global latch, or it attempts to acquire exactly two of them, and at
2592 just before calling mutex_enter for the second time it saw
2593 trx1->lock.wait_lock==nullptr, trx2->lock.wait_lock!=nullptr, and it held the
2594 latch for the shard containing trx2->lock.wait_lock.
2595
2596 @see asserts in trx_before_mutex_enter
2597
2598 Assumption 2.
2599 The Lock-sys latches are taken before any trx->mutex.
2600
2601 @see asserts in sync0debug.cc
2602
2603 Assumption 3.
2604 Changing trx->lock.wait_lock from NULL to non-NULL requires latching
2605 trx->mutex and the shard containing new wait_lock value.
2606
2607 @see asserts in lock_set_lock_and_trx_wait()
2608
2609 Assumption 4.
2610 Changing trx->lock.wait_lock from non-NULL to NULL requires latching the shard
2611 containing old wait_lock value.
2612
2613 @see asserts in lock_reset_lock_and_trx_wait()
2614
2615 Assumption 5.
2616 If a thread is latching two Lock-sys shards then it's acquiring and releasing
2617 both shards together (that is, without interleaving it with trx->mutex
2618 operations).
2619
2620 @see Shard_latches_guard
2621
2622 Theorem 1.
2623 If the Assumptions 1-5 hold, then it's impossible for trx_mutex_enter() call
2624 to deadlock.
2625
2626 By proving the theorem, and observing that the assertions hold for multiple runs
2627 of test suite on debug build, we gain more and more confidence that
2628 trx_mutex_enter() calls can not deadlock.
2629
2630 The intuitive, albeit imprecise, version of the proof is that by Assumption 1
2631 each edge of the deadlock cycle leads from a trx with NULL trx->lock.wait_lock
2632 to one with non-NULL wait_lock, which means it has only one edge.
2633
2634 The difficulty lays in that wait_lock is a field which can be modified over time
2635 from several threads, so care must be taken to clarify at which moment in time
2636 we make our observations and from whose perspective.
2637
2638 We will now formally prove Theorem 1.
2639 Assume otherwise, that is that we are in a thread which have just started a call
2640 to mutex_enter(trx_a->mutex) and caused a deadlock.
2641
2642 Fact 0. There is no thread which possesses exclusive Lock-sys latch, since to
2643 form a deadlock one needs at least two threads inside Lock-sys
2644 Fact 1. Each thread participating in the deadlock holds one trx mutex and waits
2645 for the second one it tried to acquire
2646 Fact 2. Thus each thread participating in the deadlock had gone through "else"
2647 branch inside trx_before_mutex_enter(), so it verifies Assumption 1.
2648 Fact 3. Our thread owns_lock_shard(trx_a->lock.wait_lock)
2649 Fact 4. Another thread has latched trx_a->mutex as the first of its two latches
2650
2651 Consider the situation from the point of view of this other thread, which is now
2652 in the deadlock waiting for mutex_enter(trx_b->mutex) for some trx_b!=trx_a.
2653 By Fact 2 and assumption 1, it had to take the "else" branch on the way there,
2654 and thus it has saw: trx_a->lock.wait_lock == nullptr at some moment in time.
2655 This observation was either before or after our observation that
2656 trx_a->lock.wait_lock != nullptr (again Fact 2 and Assumption 1).
2657
2658 If our thread observed non-NULL value first, then it means a change from
2659 non-NULL to NULL has happened, which by Assumption 4 requires a shard latch,
2660 which only our thread posses - and we couldn't manipulate the wait_lock as we
2661 are in a deadlock.
2662
2663 If the other thread observed NULL first, then it means that the value has
2664 changed to non-NULL, which requires trx_a->mutex according to Assumption 3, yet
2665 this mutex was held entire time by the other thread, since it observed the NULL
2666 just before it deadlock, so it could not change it, either.
2667
2668 So, there is no way the value of wait_lock has changed from NULL to non-NULL or
2669 vice-versa, yet one thread sees NULL and the other non-NULL - contradiction ends
2670 the proof.
2671 */
2672
2673 static thread_local const trx_t *trx_first_latched_trx = nullptr;
2674 static thread_local int32_t trx_latched_count = 0;
2675 static thread_local bool trx_allowed_two_latches = false;
2676
trx_before_mutex_enter(const trx_t * trx,bool first_of_two)2677 void trx_before_mutex_enter(const trx_t *trx, bool first_of_two) {
2678 if (0 == trx_latched_count++) {
2679 ut_a(trx_first_latched_trx == nullptr);
2680 trx_first_latched_trx = trx;
2681 if (first_of_two) {
2682 trx_allowed_two_latches = true;
2683 }
2684 } else {
2685 ut_a(!first_of_two);
2686 if (!locksys::owns_exclusive_global_latch()) {
2687 ut_a(trx_allowed_two_latches);
2688 ut_a(trx_latched_count == 2);
2689 ut_a(trx_first_latched_trx->lock.wait_lock == nullptr);
2690 ut_a(trx_first_latched_trx != trx);
2691 /* This is not very safe, because to read trx->lock.wait_lock we
2692 should already either latch trx->mutex (which we don't) or shard with
2693 trx->lock.wait_lock. But our claim is precisely that we have latched
2694 this shard, and we want to check that here. */
2695 ut_a(trx->lock.wait_lock != nullptr);
2696 ut_a(locksys::owns_lock_shard(trx->lock.wait_lock));
2697 }
2698 }
2699 }
trx_before_mutex_exit(const trx_t * trx)2700 void trx_before_mutex_exit(const trx_t *trx) {
2701 ut_a(0 < trx_latched_count);
2702 if (0 == --trx_latched_count) {
2703 ut_a(trx_first_latched_trx == trx);
2704 trx_first_latched_trx = nullptr;
2705 trx_allowed_two_latches = false;
2706 }
2707 }
2708 #endif /* UNIV_DEBUG */
2709
2710 /** Compares the "weight" (or size) of two transactions. Transactions that
2711 have edited non-transactional tables are considered heavier than ones
2712 that have not.
2713 @return true if weight(a) >= weight(b) */
trx_weight_ge(const trx_t * a,const trx_t * b)2714 bool trx_weight_ge(const trx_t *a, /*!< in: transaction to be compared */
2715 const trx_t *b) /*!< in: transaction to be compared */
2716 {
2717 /* To read TRX_WEIGHT we need a exclusive global lock_sys latch */
2718 ut_ad(locksys::owns_exclusive_global_latch());
2719 ibool a_notrans_edit;
2720 ibool b_notrans_edit;
2721
2722 /* If mysql_thd is NULL for a transaction we assume that it has
2723 not edited non-transactional tables. */
2724
2725 a_notrans_edit =
2726 a->mysql_thd != nullptr && thd_has_edited_nontrans_tables(a->mysql_thd);
2727
2728 b_notrans_edit =
2729 b->mysql_thd != nullptr && thd_has_edited_nontrans_tables(b->mysql_thd);
2730
2731 if (a_notrans_edit != b_notrans_edit) {
2732 return (a_notrans_edit);
2733 }
2734
2735 /* Either both had edited non-transactional tables or both had
2736 not, we fall back to comparing the number of altered/locked
2737 rows. */
2738
2739 return (TRX_WEIGHT(a) >= TRX_WEIGHT(b));
2740 }
2741
2742 /** Prepares a transaction for given rollback segment.
2743 @return lsn_t: lsn assigned for commit of scheduled rollback segment */
trx_prepare_low(trx_t * trx,trx_undo_ptr_t * undo_ptr,bool noredo_logging)2744 static lsn_t trx_prepare_low(
2745 trx_t *trx, /*!< in/out: transaction */
2746 trx_undo_ptr_t *undo_ptr, /*!< in/out: pointer to rollback
2747 segment scheduled for prepare. */
2748 bool noredo_logging) /*!< in: turn-off redo logging. */
2749 {
2750 if (undo_ptr->insert_undo != nullptr || undo_ptr->update_undo != nullptr) {
2751 mtr_t mtr;
2752 trx_rseg_t *rseg = undo_ptr->rseg;
2753
2754 mtr_start_sync(&mtr);
2755
2756 if (noredo_logging) {
2757 mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
2758 }
2759
2760 /* Change the undo log segment states from TRX_UNDO_ACTIVE to
2761 TRX_UNDO_PREPARED: these modifications to the file data
2762 structure define the transaction as prepared in the file-based
2763 world, at the serialization point of lsn. */
2764
2765 rseg->latch();
2766
2767 if (undo_ptr->insert_undo != nullptr) {
2768 /* It is not necessary to obtain trx->undo_mutex here
2769 because only a single OS thread is allowed to do the
2770 transaction prepare for this transaction. */
2771 trx_undo_set_state_at_prepare(trx, undo_ptr->insert_undo, false, &mtr);
2772 }
2773
2774 if (undo_ptr->update_undo != nullptr) {
2775 if (!noredo_logging) {
2776 trx_undo_gtid_set(trx, undo_ptr->update_undo);
2777 }
2778 trx_undo_set_state_at_prepare(trx, undo_ptr->update_undo, false, &mtr);
2779 }
2780
2781 rseg->unlatch();
2782
2783 /*--------------*/
2784 /* This mtr commit makes the transaction prepared in
2785 file-based world. */
2786 mtr_commit(&mtr);
2787 /*--------------*/
2788
2789 if (!noredo_logging) {
2790 const lsn_t lsn = mtr.commit_lsn();
2791 ut_ad(lsn > 0 || !mtr_t::s_logging.is_enabled());
2792 return lsn;
2793 }
2794 }
2795
2796 return 0;
2797 }
2798
trx_is_mysql_xa(const trx_t * trx)2799 bool trx_is_mysql_xa(const trx_t *trx) {
2800 auto my_xid = trx->xid->get_my_xid();
2801 return (my_xid != 0);
2802 }
2803
2804 /** Prepares a transaction. */
trx_prepare(trx_t * trx)2805 static void trx_prepare(trx_t *trx) /*!< in/out: transaction */
2806 {
2807 /* This transaction has crossed the point of no return and cannot
2808 be rolled back asynchronously now. It must commit or rollback
2809 synchronously. */
2810
2811 lsn_t lsn = 0;
2812
2813 /* Only fresh user transactions can be prepared.
2814 Recovered transactions cannot. */
2815 ut_a(!trx->is_recovered);
2816
2817 DBUG_EXECUTE_IF("ib_trx_crash_during_xa_prepare_step", DBUG_SUICIDE(););
2818
2819 if (trx->rsegs.m_redo.rseg != nullptr && trx_is_redo_rseg_updated(trx)) {
2820 lsn = trx_prepare_low(trx, &trx->rsegs.m_redo, false);
2821 }
2822
2823 if (trx->rsegs.m_noredo.rseg != nullptr && trx_is_temp_rseg_updated(trx)) {
2824 trx_prepare_low(trx, &trx->rsegs.m_noredo, true);
2825 }
2826
2827 /* Check and get GTID to be persisted. Do it outside trx_sys mutex. */
2828 auto >id_persistor = clone_sys->get_gtid_persistor();
2829 Gtid_desc gtid_desc;
2830 gtid_persistor.get_gtid_info(trx, gtid_desc);
2831
2832 /*--------------------------------------*/
2833 ut_a(trx->state == TRX_STATE_ACTIVE);
2834 trx_sys_mutex_enter();
2835 trx->state = TRX_STATE_PREPARED;
2836 trx_sys->n_prepared_trx++;
2837 /* Add GTID to be persisted to disk table, if needed. */
2838 if (gtid_desc.m_is_set) {
2839 gtid_persistor.add(gtid_desc);
2840 }
2841 trx_sys_mutex_exit();
2842 /*--------------------------------------*/
2843
2844 /* Reset after successfully adding GTID to in memory table. */
2845 trx->persists_gtid = false;
2846
2847 /* Force isolation level to RC and release GAP locks
2848 for test purpose. */
2849 DBUG_EXECUTE_IF("ib_force_release_gap_lock_prepare",
2850 trx->isolation_level = TRX_ISO_READ_COMMITTED;);
2851
2852 /* Release read locks after PREPARE for READ COMMITTED
2853 and lower isolation. */
2854 if (trx->isolation_level <= TRX_ISO_READ_COMMITTED) {
2855 /* Stop inheriting GAP locks. */
2856 trx->skip_lock_inheritance = true;
2857
2858 /* Release only GAP locks for now. */
2859 lock_trx_release_read_locks(trx, true);
2860 }
2861
2862 switch (thd_requested_durability(trx->mysql_thd)) {
2863 case HA_IGNORE_DURABILITY:
2864 /* We set the HA_IGNORE_DURABILITY during prepare phase of
2865 binlog group commit to not flush redo log for every transaction
2866 here. So that we can flush prepared records of transactions to
2867 redo log in a group right before writing them to binary log
2868 during flush stage of binlog group commit. */
2869 break;
2870 case HA_REGULAR_DURABILITY:
2871 if (lsn == 0) {
2872 break;
2873 }
2874 /* Depending on the my.cnf options, we may now write the log
2875 buffer to the log files, making the prepared state of the
2876 transaction durable if the OS does not crash. We may also
2877 flush the log files to disk, making the prepared state of the
2878 transaction durable also at an OS crash or a power outage.
2879
2880 The idea in InnoDB's group prepare is that a group of
2881 transactions gather behind a trx doing a physical disk write
2882 to log files, and when that physical write has been completed,
2883 one of those transactions does a write which prepares the whole
2884 group. Note that this group prepare will only bring benefit if
2885 there are > 2 users in the database. Then at least 2 users can
2886 gather behind one doing the physical log write to disk.
2887
2888 We must not be holding any mutexes or latches here. */
2889
2890 /* We should trust trx->ddl_operation instead of
2891 ddl_must_flush here */
2892 trx->ddl_must_flush = false;
2893 trx_flush_log_if_needed(lsn, trx);
2894 }
2895 }
2896
2897 /**
2898 Does the transaction prepare for MySQL.
2899 @param[in, out] trx Transaction instance to prepare */
trx_prepare_for_mysql(trx_t * trx)2900 dberr_t trx_prepare_for_mysql(trx_t *trx) {
2901 trx_start_if_not_started_xa(trx, false);
2902
2903 TrxInInnoDB trx_in_innodb(trx, true);
2904
2905 if (trx_in_innodb.is_aborted() && trx->killed_by != os_thread_get_curr_id()) {
2906 return (DB_FORCED_ABORT);
2907 }
2908
2909 /* For GTID persistence we need update undo segment. */
2910 auto db_err = trx_undo_gtid_add_update_undo(trx, true, false);
2911 if (db_err != DB_SUCCESS) {
2912 return (db_err);
2913 }
2914
2915 trx->op_info = "preparing";
2916
2917 trx_prepare(trx);
2918
2919 trx->op_info = "";
2920
2921 return (DB_SUCCESS);
2922 }
2923
2924 /**
2925 Get the table name and database name for the given dd_table object.
2926
2927 @param[in,out] table Handler table name object pointer.
2928 @param[in] dd_table Pointer table name DD object.
2929 @param[in] mem_root Mem_root for space allocation.
2930
2931 @retval true Error, e.g. Memory allocation failure.
2932 @retval false Success
2933 */
2934
get_table_name_info(st_handler_tablename * table,const dict_table_t * dd_table,MEM_ROOT * mem_root)2935 static bool get_table_name_info(st_handler_tablename *table,
2936 const dict_table_t *dd_table,
2937 MEM_ROOT *mem_root) {
2938 const char *ptr;
2939
2940 size_t len = dict_get_db_name_len(dd_table->name.m_name);
2941 table->db = strmake_root(mem_root, dd_table->name.m_name, len);
2942 if (table->db == nullptr) return true;
2943
2944 ptr = dict_remove_db_name(dd_table->name.m_name);
2945 len = ut_strlen(ptr);
2946 table->tablename = strmake_root(mem_root, ptr, len);
2947 if (table->tablename == nullptr) return true;
2948
2949 return false;
2950 }
2951
2952 /**
2953 Get prepared transaction info from InnoDB data structure.
2954
2955 @param[in,out] txn_list Handler layer tansaction list.
2956 @param[in] trx Innodb transaction info.
2957 @param[in] mem_root Mem_root for space allocation.
2958
2959 @retval true Error, e.g. Memory allocation failure.
2960 @retval false Success
2961 */
2962
get_info_about_prepared_transaction(XA_recover_txn * txn_list,const trx_t * trx,MEM_ROOT * mem_root)2963 static bool get_info_about_prepared_transaction(XA_recover_txn *txn_list,
2964 const trx_t *trx,
2965 MEM_ROOT *mem_root) {
2966 txn_list->id = *trx->xid;
2967 txn_list->mod_tables = new (mem_root) List<st_handler_tablename>();
2968 if (!txn_list->mod_tables) return true;
2969
2970 for (auto dd_table : trx->mod_tables) {
2971 st_handler_tablename *table = new (mem_root) st_handler_tablename();
2972
2973 if (!table || get_table_name_info(table, dd_table, mem_root) ||
2974 txn_list->mod_tables->push_back(table, mem_root))
2975 return true;
2976 }
2977 return false;
2978 }
2979
2980 /** This function is used to find number of prepared transactions and
2981 their transaction objects for a recovery.
2982 @return number of prepared transactions stored in xid_list */
trx_recover_for_mysql(XA_recover_txn * txn_list,ulint len,MEM_ROOT * mem_root)2983 int trx_recover_for_mysql(
2984 XA_recover_txn *txn_list, /*!< in/out: prepared transactions */
2985 ulint len, /*!< in: number of slots in xid_list */
2986 MEM_ROOT *mem_root) /*!< in: memory for table names */
2987 {
2988 const trx_t *trx;
2989 ulint count = 0;
2990
2991 ut_ad(txn_list);
2992 ut_ad(len);
2993
2994 /* We should set those transactions which are in the prepared state
2995 to the xid_list */
2996
2997 trx_sys_mutex_enter();
2998
2999 for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); trx != nullptr;
3000 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
3001 assert_trx_in_rw_list(trx);
3002
3003 /* The state of a read-write transaction cannot change
3004 from or to NOT_STARTED while we are holding the
3005 trx_sys->mutex. It may change to PREPARED, but not if
3006 trx->is_recovered. */
3007 if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
3008 if (get_info_about_prepared_transaction(&txn_list[count], trx, mem_root))
3009 break;
3010
3011 if (count == 0) {
3012 ib::info(ER_IB_MSG_1207) << "Starting recovery for"
3013 " XA transactions...";
3014 }
3015
3016 ib::info(ER_IB_MSG_1208) << "Transaction " << trx_get_id_for_print(trx)
3017 << " in prepared state after recovery";
3018
3019 ib::info(ER_IB_MSG_1209)
3020 << "Transaction contains changes to " << trx->undo_no << " rows";
3021
3022 count++;
3023
3024 if (count == len) {
3025 break;
3026 }
3027 }
3028 }
3029
3030 trx_sys_mutex_exit();
3031
3032 if (count > 0) {
3033 ib::info(ER_IB_MSG_1210) << count
3034 << " transactions in prepared state"
3035 " after recovery";
3036 }
3037
3038 return (int(count));
3039 }
3040
3041 /** This function is used to find one X/Open XA distributed transaction
3042 which is in the prepared state
3043 @return trx on match, the trx->xid will be invalidated;
3044 */
trx_get_trx_by_xid_low(const XID * xid)3045 static MY_ATTRIBUTE((warn_unused_result)) trx_t *trx_get_trx_by_xid_low(
3046 const XID *xid) /*!< in: X/Open XA transaction
3047 identifier */
3048 {
3049 trx_t *trx;
3050
3051 ut_ad(trx_sys_mutex_own());
3052
3053 for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); trx != nullptr;
3054 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
3055 assert_trx_in_rw_list(trx);
3056
3057 /* Compare two X/Open XA transaction id's: their
3058 length should be the same and binary comparison
3059 of gtrid_length+bqual_length bytes should be
3060 the same */
3061
3062 if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_PREPARED) &&
3063 xid->eq(trx->xid)) {
3064 /* Invalidate the XID, so that subsequent calls
3065 will not find it. */
3066 trx->xid->reset();
3067 break;
3068 }
3069 }
3070
3071 return (trx);
3072 }
3073
trx_get_trx_by_xid(const XID * xid)3074 trx_t *trx_get_trx_by_xid(const XID *xid) {
3075 trx_t *trx;
3076
3077 if (xid == nullptr) {
3078 return (nullptr);
3079 }
3080
3081 trx_sys_mutex_enter();
3082
3083 /* Recovered/Resurrected transactions are always only on the
3084 trx_sys_t::rw_trx_list. */
3085 trx = trx_get_trx_by_xid_low(xid);
3086
3087 trx_sys_mutex_exit();
3088
3089 return (trx);
3090 }
3091
3092 /** Starts the transaction if it is not yet started. */
trx_start_if_not_started_xa_low(trx_t * trx,bool read_write)3093 void trx_start_if_not_started_xa_low(
3094 trx_t *trx, /*!< in/out: transaction */
3095 bool read_write) /*!< in: true if read write transaction */
3096 {
3097 switch (trx->state) {
3098 case TRX_STATE_NOT_STARTED:
3099 case TRX_STATE_FORCED_ROLLBACK:
3100 trx_start_low(trx, read_write);
3101 return;
3102
3103 case TRX_STATE_ACTIVE:
3104 if (trx->id == 0 && read_write) {
3105 /* If the transaction is tagged as read-only then
3106 it can only write to temp tables and for such
3107 transactions we don't want to move them to the
3108 trx_sys_t::rw_trx_list. */
3109 if (!trx->read_only) {
3110 trx_set_rw_mode(trx);
3111 } else if (!srv_read_only_mode) {
3112 trx_assign_rseg_temp(trx);
3113 }
3114 }
3115 return;
3116 case TRX_STATE_PREPARED:
3117 case TRX_STATE_COMMITTED_IN_MEMORY:
3118 break;
3119 }
3120
3121 ut_error;
3122 }
3123
3124 /** Starts the transaction if it is not yet started. */
trx_start_if_not_started_low(trx_t * trx,bool read_write)3125 void trx_start_if_not_started_low(
3126 trx_t *trx, /*!< in: transaction */
3127 bool read_write) /*!< in: true if read write transaction */
3128 {
3129 switch (trx->state) {
3130 case TRX_STATE_NOT_STARTED:
3131 case TRX_STATE_FORCED_ROLLBACK:
3132
3133 trx_start_low(trx, read_write);
3134 return;
3135
3136 case TRX_STATE_ACTIVE:
3137
3138 if (read_write && trx->id == 0 && !trx->read_only) {
3139 trx_set_rw_mode(trx);
3140 }
3141 return;
3142
3143 case TRX_STATE_PREPARED:
3144 case TRX_STATE_COMMITTED_IN_MEMORY:
3145 break;
3146 }
3147
3148 ut_error;
3149 }
3150
3151 /** Starts a transaction for internal processing. */
trx_start_internal_low(trx_t * trx)3152 void trx_start_internal_low(trx_t *trx) /*!< in/out: transaction */
3153 {
3154 /* Ensure it is not flagged as an auto-commit-non-locking
3155 transaction. */
3156
3157 trx->will_lock = 1;
3158
3159 trx->internal = true;
3160
3161 trx_start_low(trx, true);
3162 }
3163
3164 /** Starts a read-only transaction for internal processing.
3165 @param[in,out] trx transaction to be started */
trx_start_internal_read_only_low(trx_t * trx)3166 void trx_start_internal_read_only_low(trx_t *trx) {
3167 /* Ensure it is not flagged as an auto-commit-non-locking
3168 transaction. */
3169
3170 trx->will_lock = 1;
3171
3172 trx->internal = true;
3173
3174 trx_start_low(trx, false);
3175 }
3176
3177 /** Set the transaction as a read-write transaction if it is not already
3178 tagged as such. Read-only transactions that are writing to temporary
3179 tables are assigned an ID and a rollback segment but are not added
3180 to the trx read-write list because their updates should not be visible
3181 to other transactions and therefore their changes can be ignored by
3182 by MVCC. */
trx_set_rw_mode(trx_t * trx)3183 void trx_set_rw_mode(trx_t *trx) /*!< in/out: transaction that is RW */
3184 {
3185 ut_ad(trx->rsegs.m_redo.rseg == nullptr);
3186 ut_ad(!trx->in_rw_trx_list);
3187 ut_ad(!trx_is_autocommit_non_locking(trx));
3188 ut_ad(!trx->read_only);
3189
3190 if (srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO) {
3191 return;
3192 }
3193
3194 /* Function is promoting existing trx from ro mode to rw mode.
3195 In this process it has acquired trx_sys->mutex as it plan to
3196 move trx from ro list to rw list. If in future, some other thread
3197 looks at this trx object while it is being promoted then ensure
3198 that both threads are synced by acquring trx->mutex to avoid decision
3199 based on in-consistent view formed during promotion. */
3200
3201 trx_assign_rseg_durable(trx);
3202
3203 ut_ad(trx->rsegs.m_redo.rseg != nullptr);
3204
3205 mutex_enter(&trx_sys->mutex);
3206
3207 ut_ad(trx->id == 0);
3208 trx->id = trx_sys_get_new_trx_id();
3209
3210 trx_sys->rw_trx_ids.push_back(trx->id);
3211
3212 trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
3213
3214 /* So that we can see our own changes. */
3215 if (MVCC::is_view_active(trx->read_view)) {
3216 MVCC::set_view_creator_trx_id(trx->read_view, trx->id);
3217 }
3218
3219 UT_LIST_ADD_FIRST(trx_sys->rw_trx_list, trx);
3220
3221 ut_d(trx->in_rw_trx_list = true);
3222
3223 mutex_exit(&trx_sys->mutex);
3224 }
3225
trx_kill_blocking(trx_t * trx)3226 void trx_kill_blocking(trx_t *trx) {
3227 if (!trx_is_high_priority(trx)) {
3228 return;
3229 }
3230 hit_list_t hit_list;
3231 lock_make_trx_hit_list(trx, hit_list);
3232 if (hit_list.empty()) {
3233 return;
3234 }
3235
3236 DEBUG_SYNC_C("trx_kill_blocking_enter");
3237
3238 ulint had_dict_lock = trx->dict_operation_lock_mode;
3239
3240 switch (had_dict_lock) {
3241 case 0:
3242 break;
3243
3244 case RW_S_LATCH:
3245 /* Release foreign key check latch */
3246 row_mysql_unfreeze_data_dictionary(trx);
3247 break;
3248
3249 default:
3250 /* There should never be a lock wait when the
3251 dictionary latch is reserved in X mode. Dictionary
3252 transactions should only acquire locks on dictionary
3253 tables, not other tables. All access to dictionary
3254 tables should be covered by dictionary
3255 transactions. */
3256 ut_error;
3257 }
3258
3259 ut_a(trx->dict_operation_lock_mode == 0);
3260
3261 /** Kill the transactions in the lock acquisition order old -> new. */
3262 hit_list_t::reverse_iterator end = hit_list.rend();
3263
3264 for (hit_list_t::reverse_iterator it = hit_list.rbegin(); it != end; ++it) {
3265 trx_t *victim_trx = it->m_trx;
3266 ulint version = it->m_version;
3267
3268 /* Shouldn't commit suicide. */
3269 ut_ad(victim_trx != trx);
3270 ut_ad(victim_trx->mysql_thd != trx->mysql_thd);
3271
3272 /* Check that the transaction isn't active inside
3273 InnoDB code. We have to wait while it is executing
3274 in the InnoDB context. This can potentially take a
3275 long time */
3276
3277 trx_mutex_enter(victim_trx);
3278 ut_ad(version <= victim_trx->version);
3279
3280 ulint loop_count = 0;
3281 /* start with optimistic sleep time of 20 micro seconds. */
3282 ulint sleep_time = 20;
3283
3284 bool exited_innodb = false;
3285
3286 while ((victim_trx->in_innodb & TRX_FORCE_ROLLBACK_MASK) > 0 &&
3287 victim_trx->version == version) {
3288 trx_mutex_exit(victim_trx);
3289
3290 /* Declare this OS thread to exit InnoDB, before waiting */
3291 if (trx->declared_to_be_inside_innodb) {
3292 exited_innodb = true;
3293 srv_conc_force_exit_innodb(trx);
3294 }
3295
3296 loop_count++;
3297 /* If the wait is long, don't hog the cpu. */
3298 if (loop_count < 100) {
3299 /* 20 microseconds */
3300 sleep_time = 20;
3301 } else if (loop_count < 1000) {
3302 /* 1 millisecond */
3303 sleep_time = 1000;
3304 } else {
3305 /* 100 milliseconds */
3306 sleep_time = 100000;
3307 }
3308
3309 os_thread_sleep(sleep_time);
3310
3311 trx_mutex_enter(victim_trx);
3312 }
3313
3314 /* Return back inside InnoDB */
3315 if (exited_innodb) {
3316 exited_innodb = false;
3317 /* Exit transaction mutex before entering Innodb. */
3318 trx_mutex_exit(victim_trx);
3319 srv_conc_force_enter_innodb(trx);
3320 trx_mutex_enter(victim_trx);
3321 }
3322
3323 /* Compare the version to check if the transaction has
3324 already finished */
3325 if (victim_trx->version != version) {
3326 trx_mutex_exit(victim_trx);
3327 continue;
3328 }
3329
3330 /* We should never kill background transactions. */
3331 ut_ad(victim_trx->mysql_thd != nullptr);
3332
3333 ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE));
3334 ut_ad(victim_trx->in_innodb & TRX_FORCE_ROLLBACK);
3335 ut_ad(victim_trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC);
3336 ut_ad(victim_trx->killed_by == os_thread_get_curr_id());
3337 ut_ad(victim_trx->version == it->m_version);
3338
3339 /* We don't kill Read Only, Background or high priority
3340 transactions. */
3341 ut_a(!victim_trx->read_only);
3342 ut_a(victim_trx->mysql_thd != nullptr);
3343
3344 trx_mutex_exit(victim_trx);
3345
3346 #ifdef UNIV_DEBUG
3347 char buffer[1024];
3348 char *thr_text;
3349 trx_id_t id;
3350
3351 thr_text = thd_security_context(victim_trx->mysql_thd, buffer,
3352 sizeof(buffer), 512);
3353 id = victim_trx->id;
3354 #endif /* UNIV_DEBUG */
3355 trx_rollback_for_mysql(victim_trx);
3356
3357 #ifdef UNIV_DEBUG
3358 ib::info(ER_IB_MSG_1211)
3359 << "High Priority Transaction (ID): " << trx->id
3360 << " killed transaction (ID): " << id << " in hit list"
3361 << " - " << thr_text;
3362 #endif /* UNIV_DEBUG */
3363 trx_mutex_enter(victim_trx);
3364
3365 version++;
3366 ut_ad(victim_trx->version == version);
3367
3368 os_thread_id_t thread_id = victim_trx->killed_by;
3369 os_compare_and_swap_thread_id(&victim_trx->killed_by, thread_id, 0);
3370
3371 victim_trx->in_innodb &= TRX_FORCE_ROLLBACK_MASK;
3372
3373 trx_mutex_exit(victim_trx);
3374 }
3375
3376 if (had_dict_lock) {
3377 row_mysql_freeze_data_dictionary(trx);
3378 }
3379 }
3380
3381 /* To get current session thread default THD */
3382 THD *thd_get_current_thd();
3383
trx_sys_update_binlog_position(trx_t * trx)3384 void trx_sys_update_binlog_position(trx_t *trx) {
3385 THD *thd = trx->mysql_thd;
3386 /* For XA commit/rollback by XID, transaction thd could be null. */
3387 if (thd == nullptr) {
3388 thd = thd_get_current_thd();
3389 if (thd == nullptr) {
3390 return;
3391 }
3392 }
3393 ulonglong pos;
3394 thd_binlog_pos(thd, &trx->mysql_log_file_name, &pos);
3395 trx->mysql_log_offset = static_cast<uint64_t>(pos);
3396 }
3397