1 /*****************************************************************************
2
3 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2015, 2021, MariaDB Corporation.
5
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17
18 *****************************************************************************/
19
20 /**************************************************//**
21 @file include/trx0trx.h
22 The transaction
23
24 Created 3/26/1996 Heikki Tuuri
25 *******************************************************/
26
27 #ifndef trx0trx_h
28 #define trx0trx_h
29
30 #include "trx0types.h"
31 #include "lock0types.h"
32 #include "que0types.h"
33 #include "mem0mem.h"
34 #include "trx0xa.h"
35 #include "ut0vec.h"
36 #include "fts0fts.h"
37 #include "read0types.h"
38
39 #include <vector>
40 #include <set>
41
42 // Forward declaration
43 struct mtr_t;
44 class FlushObserver;
45 struct rw_trx_hash_element_t;
46
47 /******************************************************************//**
48 Set detailed error message for the transaction. */
49 void
50 trx_set_detailed_error(
51 /*===================*/
52 trx_t* trx, /*!< in: transaction struct */
53 const char* msg); /*!< in: detailed error message */
54 /*************************************************************//**
55 Set detailed error message for the transaction from a file. Note that the
56 file is rewinded before reading from it. */
57 void
58 trx_set_detailed_error_from_file(
59 /*=============================*/
60 trx_t* trx, /*!< in: transaction struct */
61 FILE* file); /*!< in: file to read message from */
62 /****************************************************************//**
63 Retrieves the error_info field from a trx.
64 @return the error info */
65 UNIV_INLINE
66 const dict_index_t*
67 trx_get_error_info(
68 /*===============*/
69 const trx_t* trx); /*!< in: trx object */
70
71 /** @return an allocated transaction */
72 trx_t *trx_create();
73
74 /** At shutdown, frees a transaction object. */
75 void trx_free_at_shutdown(trx_t *trx);
76
77 /** Disconnect a prepared transaction from MySQL.
78 @param[in,out] trx transaction */
79 void trx_disconnect_prepared(trx_t *trx);
80
81 /** Initialize (resurrect) transactions at startup. */
82 dberr_t trx_lists_init_at_db_start();
83
84 /*************************************************************//**
85 Starts the transaction if it is not yet started. */
86 void
87 trx_start_if_not_started_xa_low(
88 /*============================*/
89 trx_t* trx, /*!< in/out: transaction */
90 bool read_write); /*!< in: true if read write transaction */
91 /*************************************************************//**
92 Starts the transaction if it is not yet started. */
93 void
94 trx_start_if_not_started_low(
95 /*=========================*/
96 trx_t* trx, /*!< in/out: transaction */
97 bool read_write); /*!< in: true if read write transaction */
98
99 /*************************************************************//**
100 Starts a transaction for internal processing. */
101 void
102 trx_start_internal_low(
103 /*===================*/
104 trx_t* trx); /*!< in/out: transaction */
105
106 /** Starts a read-only transaction for internal processing.
107 @param[in,out] trx transaction to be started */
108 void
109 trx_start_internal_read_only_low(
110 trx_t* trx);
111
112 #ifdef UNIV_DEBUG
113 #define trx_start_if_not_started_xa(t, rw) \
114 do { \
115 (t)->start_line = __LINE__; \
116 (t)->start_file = __FILE__; \
117 trx_start_if_not_started_xa_low((t), rw); \
118 } while (false)
119
120 #define trx_start_if_not_started(t, rw) \
121 do { \
122 (t)->start_line = __LINE__; \
123 (t)->start_file = __FILE__; \
124 trx_start_if_not_started_low((t), rw); \
125 } while (false)
126
127 #define trx_start_internal(t) \
128 do { \
129 (t)->start_line = __LINE__; \
130 (t)->start_file = __FILE__; \
131 trx_start_internal_low((t)); \
132 } while (false)
133
134 #define trx_start_internal_read_only(t) \
135 do { \
136 (t)->start_line = __LINE__; \
137 (t)->start_file = __FILE__; \
138 trx_start_internal_read_only_low(t); \
139 } while (false)
140 #else
141 #define trx_start_if_not_started(t, rw) \
142 trx_start_if_not_started_low((t), rw)
143
144 #define trx_start_internal(t) \
145 trx_start_internal_low((t))
146
147 #define trx_start_internal_read_only(t) \
148 trx_start_internal_read_only_low(t)
149
150 #define trx_start_if_not_started_xa(t, rw) \
151 trx_start_if_not_started_xa_low((t), (rw))
152 #endif /* UNIV_DEBUG */
153
154 /*************************************************************//**
155 Starts the transaction for a DDL operation. */
156 void
157 trx_start_for_ddl_low(
158 /*==================*/
159 trx_t* trx, /*!< in/out: transaction */
160 trx_dict_op_t op); /*!< in: dictionary operation type */
161
162 #ifdef UNIV_DEBUG
163 #define trx_start_for_ddl(t, o) \
164 do { \
165 ut_ad((t)->start_file == 0); \
166 (t)->start_line = __LINE__; \
167 (t)->start_file = __FILE__; \
168 trx_start_for_ddl_low((t), (o)); \
169 } while (0)
170 #else
171 #define trx_start_for_ddl(t, o) \
172 trx_start_for_ddl_low((t), (o))
173 #endif /* UNIV_DEBUG */
174
175 /**********************************************************************//**
176 Does the transaction commit for MySQL.
177 @return DB_SUCCESS or error number */
178 dberr_t
179 trx_commit_for_mysql(
180 /*=================*/
181 trx_t* trx); /*!< in/out: transaction */
182 /** XA PREPARE a transaction.
183 @param[in,out] trx transaction to prepare */
184 void trx_prepare_for_mysql(trx_t* trx);
185 /**********************************************************************//**
186 This function is used to find number of prepared transactions and
187 their transaction objects for a recovery.
188 @return number of prepared transactions */
189 int
190 trx_recover_for_mysql(
191 /*==================*/
192 XID* xid_list, /*!< in/out: prepared transactions */
193 uint len); /*!< in: number of slots in xid_list */
194 /** Look up an X/Open distributed transaction in XA PREPARE state.
195 @param[in] xid X/Open XA transaction identifier
196 @return transaction on match (the trx_t::xid will be invalidated);
197 note that the trx may have been committed before the caller acquires
198 trx_t::mutex
199 @retval NULL if no match */
200 trx_t* trx_get_trx_by_xid(const XID* xid);
201 /**********************************************************************//**
202 If required, flushes the log to disk if we called trx_commit_for_mysql()
203 with trx->flush_log_later == TRUE. */
204 void
205 trx_commit_complete_for_mysql(
206 /*==========================*/
207 trx_t* trx); /*!< in/out: transaction */
208 /**********************************************************************//**
209 Marks the latest SQL statement ended. */
210 void
211 trx_mark_sql_stat_end(
212 /*==================*/
213 trx_t* trx); /*!< in: trx handle */
214 /****************************************************************//**
215 Prepares a transaction for commit/rollback. */
216 void
217 trx_commit_or_rollback_prepare(
218 /*===========================*/
219 trx_t* trx); /*!< in/out: transaction */
220 /*********************************************************************//**
221 Creates a commit command node struct.
222 @return own: commit node struct */
223 commit_node_t*
224 trx_commit_node_create(
225 /*===================*/
226 mem_heap_t* heap); /*!< in: mem heap where created */
227 /***********************************************************//**
228 Performs an execution step for a commit type node in a query graph.
229 @return query thread to run next, or NULL */
230 que_thr_t*
231 trx_commit_step(
232 /*============*/
233 que_thr_t* thr); /*!< in: query thread */
234
235 /**********************************************************************//**
236 Prints info about a transaction.
237 Caller must hold trx_sys.mutex. */
238 void
239 trx_print_low(
240 /*==========*/
241 FILE* f,
242 /*!< in: output stream */
243 const trx_t* trx,
244 /*!< in: transaction */
245 ulint max_query_len,
246 /*!< in: max query length to print,
247 or 0 to use the default max length */
248 ulint n_rec_locks,
249 /*!< in: lock_number_of_rows_locked(&trx->lock) */
250 ulint n_trx_locks,
251 /*!< in: length of trx->lock.trx_locks */
252 ulint heap_size);
253 /*!< in: mem_heap_get_size(trx->lock.lock_heap) */
254
255 /**********************************************************************//**
256 Prints info about a transaction.
257 The caller must hold lock_sys.mutex and trx_sys.mutex.
258 When possible, use trx_print() instead. */
259 void
260 trx_print_latched(
261 /*==============*/
262 FILE* f, /*!< in: output stream */
263 const trx_t* trx, /*!< in: transaction */
264 ulint max_query_len); /*!< in: max query length to print,
265 or 0 to use the default max length */
266
267 /**********************************************************************//**
268 Prints info about a transaction.
269 Acquires and releases lock_sys.mutex. */
270 void
271 trx_print(
272 /*======*/
273 FILE* f, /*!< in: output stream */
274 const trx_t* trx, /*!< in: transaction */
275 ulint max_query_len); /*!< in: max query length to print,
276 or 0 to use the default max length */
277
278 /**********************************************************************//**
279 Determine if a transaction is a dictionary operation.
280 @return dictionary operation mode */
281 UNIV_INLINE
282 enum trx_dict_op_t
283 trx_get_dict_operation(
284 /*===================*/
285 const trx_t* trx) /*!< in: transaction */
286 MY_ATTRIBUTE((warn_unused_result));
287 /**********************************************************************//**
288 Flag a transaction a dictionary operation. */
289 UNIV_INLINE
290 void
291 trx_set_dict_operation(
292 /*===================*/
293 trx_t* trx, /*!< in/out: transaction */
294 enum trx_dict_op_t op); /*!< in: operation, not
295 TRX_DICT_OP_NONE */
296
297 /**********************************************************************//**
298 Determines if a transaction is in the given state.
299 The caller must hold trx_sys.mutex, or it must be the thread
300 that is serving a running transaction.
301 A running RW transaction must be in trx_sys.rw_trx_hash.
302 @return TRUE if trx->state == state */
303 UNIV_INLINE
304 bool
305 trx_state_eq(
306 /*=========*/
307 const trx_t* trx, /*!< in: transaction */
308 trx_state_t state, /*!< in: state;
309 if state != TRX_STATE_NOT_STARTED
310 asserts that
311 trx->state != TRX_STATE_NOT_STARTED */
312 bool relaxed = false)
313 /*!< in: whether to allow
314 trx->state == TRX_STATE_NOT_STARTED
315 after an error has been reported */
316 MY_ATTRIBUTE((nonnull, warn_unused_result));
317
318 /**********************************************************************//**
319 Determines if the currently running transaction has been interrupted.
320 @return true if interrupted */
321 bool
322 trx_is_interrupted(
323 /*===============*/
324 const trx_t* trx); /*!< in: transaction */
325
326 /*******************************************************************//**
327 Calculates the "weight" of a transaction. The weight of one transaction
328 is estimated as the number of altered rows + the number of locked rows.
329 @param t transaction
330 @return transaction weight */
331 #define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks))
332
333 /*******************************************************************//**
334 Compares the "weight" (or size) of two transactions. Transactions that
335 have edited non-transactional tables are considered heavier than ones
336 that have not.
337 @return true if weight(a) >= weight(b) */
338 bool
339 trx_weight_ge(
340 /*==========*/
341 const trx_t* a, /*!< in: the transaction to be compared */
342 const trx_t* b); /*!< in: the transaction to be compared */
343 /* Maximum length of a string that can be returned by
344 trx_get_que_state_str(). */
345 #define TRX_QUE_STATE_STR_MAX_LEN 12 /* "ROLLING BACK" */
346
347 /*******************************************************************//**
348 Retrieves transaction's que state in a human readable string. The string
349 should not be free()'d or modified.
350 @return string in the data segment */
351 UNIV_INLINE
352 const char*
353 trx_get_que_state_str(
354 /*==================*/
355 const trx_t* trx); /*!< in: transaction */
356
357 /** Retreieves the transaction ID.
358 In a given point in time it is guaranteed that IDs of the running
359 transactions are unique. The values returned by this function for readonly
360 transactions may be reused, so a subsequent RO transaction may get the same ID
361 as a RO transaction that existed in the past. The values returned by this
362 function should be used for printing purposes only.
363 @param[in] trx transaction whose id to retrieve
364 @return transaction id */
365 UNIV_INLINE
366 trx_id_t
367 trx_get_id_for_print(
368 const trx_t* trx);
369
370 /** Create the trx_t pool */
371 void
372 trx_pool_init();
373
374 /** Destroy the trx_t pool */
375 void
376 trx_pool_close();
377
378 /**
379 Set the transaction as a read-write transaction if it is not already
380 tagged as such.
381 @param[in,out] trx Transaction that needs to be "upgraded" to RW from RO */
382 void
383 trx_set_rw_mode(
384 trx_t* trx);
385
386 /**
387 Transactions that aren't started by the MySQL server don't set
388 the trx_t::mysql_thd field. For such transactions we set the lock
389 wait timeout to 0 instead of the user configured value that comes
390 from innodb_lock_wait_timeout via trx_t::mysql_thd.
391 @param trx transaction
392 @return lock wait timeout in seconds */
393 #define trx_lock_wait_timeout_get(t) \
394 ((t)->mysql_thd != NULL \
395 ? thd_lock_wait_timeout((t)->mysql_thd) \
396 : 0)
397
398 typedef std::vector<ib_lock_t*, ut_allocator<ib_lock_t*> > lock_list;
399
400 /*******************************************************************//**
401 Latching protocol for trx_lock_t::que_state. trx_lock_t::que_state
402 captures the state of the query thread during the execution of a query.
403 This is different from a transaction state. The query state of a transaction
404 can be updated asynchronously by other threads. The other threads can be
405 system threads, like the timeout monitor thread or user threads executing
406 other queries. Another thing to be mindful of is that there is a delay between
407 when a query thread is put into LOCK_WAIT state and before it actually starts
408 waiting. Between these two events it is possible that the query thread is
409 granted the lock it was waiting for, which implies that the state can be changed
410 asynchronously.
411
412 All these operations take place within the context of locking. Therefore state
413 changes within the locking code must acquire both the lock mutex and the
414 trx->mutex when changing trx->lock.que_state to TRX_QUE_LOCK_WAIT or
415 trx->lock.wait_lock to non-NULL but when the lock wait ends it is sufficient
416 to only acquire the trx->mutex.
417 To query the state either of the mutexes is sufficient within the locking
418 code and no mutex is required when the query thread is no longer waiting. */
419
420 /** The locks and state of an active transaction. Protected by
421 lock_sys.mutex, trx->mutex or both. */
422 struct trx_lock_t {
423 ulint n_active_thrs; /*!< number of active query threads */
424
425 trx_que_t que_state; /*!< valid when trx->state
426 == TRX_STATE_ACTIVE: TRX_QUE_RUNNING,
427 TRX_QUE_LOCK_WAIT, ... */
428
429 lock_t* wait_lock; /*!< if trx execution state is
430 TRX_QUE_LOCK_WAIT, this points to
431 the lock request, otherwise this is
432 NULL; set to non-NULL when holding
433 both trx->mutex and lock_sys.mutex;
434 set to NULL when holding
435 lock_sys.mutex; readers should
436 hold lock_sys.mutex, except when
437 they are holding trx->mutex and
438 wait_lock==NULL */
439 ib_uint64_t deadlock_mark; /*!< A mark field that is initialized
440 to and checked against lock_mark_counter
441 by lock_deadlock_recursive(). */
442 bool was_chosen_as_deadlock_victim;
443 /*!< when the transaction decides to
444 wait for a lock, it sets this to false;
445 if another transaction chooses this
446 transaction as a victim in deadlock
447 resolution, it sets this to true.
448 Protected by trx->mutex. */
449 time_t wait_started; /*!< lock wait started at this time,
450 protected only by lock_sys.mutex */
451
452 que_thr_t* wait_thr; /*!< query thread belonging to this
453 trx that is in QUE_THR_LOCK_WAIT
454 state. For threads suspended in a
455 lock wait, this is protected by
456 lock_sys.mutex. Otherwise, this may
457 only be modified by the thread that is
458 serving the running transaction. */
459 #ifdef WITH_WSREP
460 bool was_chosen_as_wsrep_victim;
461 /*!< high priority wsrep thread has
462 marked this trx to abort */
463 #endif /* WITH_WSREP */
464
465 /** Pre-allocated record locks */
466 struct {
467 ib_lock_t lock; byte pad[256];
468 } rec_pool[8];
469
470 /** Pre-allocated table locks */
471 ib_lock_t table_pool[8];
472
473 /** Next available rec_pool[] entry */
474 unsigned rec_cached;
475
476 /** Next available table_pool[] entry */
477 unsigned table_cached;
478
479 mem_heap_t* lock_heap; /*!< memory heap for trx_locks;
480 protected by lock_sys.mutex */
481
482 trx_lock_list_t trx_locks; /*!< locks requested by the transaction;
483 insertions are protected by trx->mutex
484 and lock_sys.mutex; removals are
485 protected by lock_sys.mutex */
486
487 lock_list table_locks; /*!< All table locks requested by this
488 transaction, including AUTOINC locks */
489
490 /** List of pending trx_t::evict_table() */
491 UT_LIST_BASE_NODE_T(dict_table_t) evicted_tables;
492
493 bool cancel; /*!< true if the transaction is being
494 rolled back either via deadlock
495 detection or due to lock timeout. The
496 caller has to acquire the trx_t::mutex
497 in order to cancel the locks. In
498 lock_trx_table_locks_remove() we
499 check for this cancel of a transaction's
500 locks and avoid reacquiring the trx
501 mutex to prevent recursive deadlocks.
502 Protected by both the lock sys mutex
503 and the trx_t::mutex. */
504 ulint n_rec_locks; /*!< number of rec locks in this trx */
505 };
506
507 /** Logical first modification time of a table in a transaction */
508 class trx_mod_table_time_t
509 {
510 /** First modification of the table */
511 undo_no_t first;
512 /** First modification of a system versioned column */
513 undo_no_t first_versioned;
514
515 /** Magic value signifying that a system versioned column of a
516 table was never modified in a transaction. */
517 static const undo_no_t UNVERSIONED = IB_ID_MAX;
518
519 public:
520 /** Constructor
521 @param[in] rows number of modified rows so far */
trx_mod_table_time_t(undo_no_t rows)522 trx_mod_table_time_t(undo_no_t rows)
523 : first(rows), first_versioned(UNVERSIONED) {}
524
525 #ifdef UNIV_DEBUG
526 /** Validation
527 @param[in] rows number of modified rows so far
528 @return whether the object is valid */
529 bool valid(undo_no_t rows = UNVERSIONED) const
530 {
531 return first <= first_versioned && first <= rows;
532 }
533 #endif /* UNIV_DEBUG */
534 /** @return if versioned columns were modified */
is_versioned()535 bool is_versioned() const { return first_versioned != UNVERSIONED; }
536
537 /** After writing an undo log record, set is_versioned() if needed
538 @param[in] rows number of modified rows so far */
set_versioned(undo_no_t rows)539 void set_versioned(undo_no_t rows)
540 {
541 ut_ad(!is_versioned());
542 first_versioned = rows;
543 ut_ad(valid());
544 }
545
546 /** Invoked after partial rollback
547 @param[in] limit number of surviving modified rows
548 @return whether this should be erased from trx_t::mod_tables */
rollback(undo_no_t limit)549 bool rollback(undo_no_t limit)
550 {
551 ut_ad(valid());
552 if (first >= limit) {
553 return true;
554 }
555
556 if (first_versioned < limit && is_versioned()) {
557 first_versioned = UNVERSIONED;
558 }
559
560 return false;
561 }
562 };
563
564 /** Collection of persistent tables and their first modification
565 in a transaction.
566 We store pointers to the table objects in memory because
567 we know that a table object will not be destroyed while a transaction
568 that modified it is running. */
569 typedef std::map<
570 dict_table_t*, trx_mod_table_time_t,
571 std::less<dict_table_t*>,
572 ut_allocator<std::pair<dict_table_t* const, trx_mod_table_time_t> > >
573 trx_mod_tables_t;
574
575 /** The transaction handle
576
577 Normally, there is a 1:1 relationship between a transaction handle
578 (trx) and a session (client connection). One session is associated
579 with exactly one user transaction. There are some exceptions to this:
580
581 * For DDL operations, a subtransaction is allocated that modifies the
582 data dictionary tables. Lock waits and deadlocks are prevented by
583 acquiring the dict_sys.latch before starting the subtransaction
584 and releasing it after committing the subtransaction.
585
586 * The purge system uses a special transaction that is not associated
587 with any session.
588
589 * If the system crashed or it was quickly shut down while there were
590 transactions in the ACTIVE or PREPARED state, these transactions would
591 no longer be associated with a session when the server is restarted.
592
593 A session may be served by at most one thread at a time. The serving
594 thread of a session might change in some MySQL implementations.
595 Therefore we do not have os_thread_get_curr_id() assertions in the code.
596
597 Normally, only the thread that is currently associated with a running
598 transaction may access (read and modify) the trx object, and it may do
599 so without holding any mutex. The following are exceptions to this:
600
601 * trx_rollback_recovered() may access resurrected (connectionless)
602 transactions (state == TRX_STATE_ACTIVE && is_recovered)
603 while the system is already processing new user transactions (!is_recovered).
604
605 * trx_print_low() may access transactions not associated with the current
606 thread. The caller must be holding lock_sys.mutex.
607
608 * When a transaction handle is in the trx_sys.trx_list, some of its fields
609 must not be modified without holding trx->mutex.
610
611 * The locking code (in particular, lock_deadlock_recursive() and
612 lock_rec_convert_impl_to_expl()) will access transactions associated
613 to other connections. The locks of transactions are protected by
614 lock_sys.mutex (insertions also by trx->mutex). */
615
616 /** Represents an instance of rollback segment along with its state variables.*/
617 struct trx_undo_ptr_t {
618 trx_rseg_t* rseg; /*!< rollback segment assigned to the
619 transaction, or NULL if not assigned
620 yet */
621 trx_undo_t* undo; /*!< pointer to the undo log, or
622 NULL if nothing logged yet */
623 };
624
625 /** An instance of temporary rollback segment. */
626 struct trx_temp_undo_t {
627 /** temporary rollback segment, or NULL if not assigned yet */
628 trx_rseg_t* rseg;
629 /** pointer to the undo log, or NULL if nothing logged yet */
630 trx_undo_t* undo;
631 };
632
633 /** Rollback segments assigned to a transaction for undo logging. */
634 struct trx_rsegs_t {
635 /** undo log ptr holding reference to a rollback segment that resides in
636 system/undo tablespace used for undo logging of tables that needs
637 to be recovered on crash. */
638 trx_undo_ptr_t m_redo;
639
640 /** undo log for temporary tables; discarded immediately after
641 transaction commit/rollback */
642 trx_temp_undo_t m_noredo;
643 };
644
645 struct trx_t {
646 private:
647 /**
648 Count of references.
649
650 We can't release the locks nor commit the transaction until this reference
651 is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify
652 that it is no longer "active".
653 */
654
655 Atomic_counter<int32_t> n_ref;
656
657
658 public:
659 TrxMutex mutex; /*!< Mutex protecting the fields
660 state and lock (except some fields
661 of lock, which are protected by
662 lock_sys.mutex) */
663
664 trx_id_t id; /*!< transaction id */
665
666 trx_id_t no; /*!< transaction serialization number:
667 max trx id shortly before the
668 transaction is moved to
669 COMMITTED_IN_MEMORY state.
670 Protected by trx_sys_t::mutex
671 when trx is in rw_trx_hash. Initially
672 set to TRX_ID_MAX. */
673
674 /** State of the trx from the point of view of concurrency control
675 and the valid state transitions.
676
677 Possible states:
678
679 TRX_STATE_NOT_STARTED
680 TRX_STATE_ACTIVE
681 TRX_STATE_PREPARED
682 TRX_STATE_PREPARED_RECOVERED (special case of TRX_STATE_PREPARED)
683 TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED)
684
685 Valid state transitions are:
686
687 Regular transactions:
688 * NOT_STARTED -> ACTIVE -> COMMITTED -> NOT_STARTED
689
690 Auto-commit non-locking read-only:
691 * NOT_STARTED -> ACTIVE -> NOT_STARTED
692
693 XA (2PC):
694 * NOT_STARTED -> ACTIVE -> PREPARED -> COMMITTED -> NOT_STARTED
695
696 Recovered XA:
697 * NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
698
699 Recovered XA followed by XA ROLLBACK:
700 * NOT_STARTED -> PREPARED -> ACTIVE -> COMMITTED -> (freed)
701
702 XA (2PC) (shutdown or disconnect before ROLLBACK or COMMIT):
703 * NOT_STARTED -> PREPARED -> (freed)
704
705 Disconnected XA can become recovered:
706 * ... -> ACTIVE -> PREPARED (connected) -> PREPARED (disconnected)
707 Disconnected means from mysql e.g due to the mysql client disconnection.
708 Latching and various transaction lists membership rules:
709
710 XA (2PC) transactions are always treated as non-autocommit.
711
712 Transitions to ACTIVE or NOT_STARTED occur when transaction
713 is not in rw_trx_hash (no trx_sys.mutex needed).
714
715 Autocommit non-locking read-only transactions move between states
716 without holding any mutex. They are not in rw_trx_hash.
717
718 All transactions, unless they are determined to be ac-nl-ro,
719 explicitly tagged as read-only or read-write, will first be put
720 on the read-only transaction list. Only when a !read-only transaction
721 in the read-only list tries to acquire an X or IX lock on a table
722 do we remove it from the read-only list and put it on the read-write
723 list. During this switch we assign it a rollback segment.
724
725 When a transaction is NOT_STARTED, it can be in trx_list. It cannot be
726 in rw_trx_hash.
727
728 ACTIVE->PREPARED->COMMITTED is only possible when trx is in rw_trx_hash.
729 The transition ACTIVE->PREPARED is protected by trx_sys.mutex.
730
731 ACTIVE->COMMITTED is possible when the transaction is in
732 rw_trx_hash.
733
734 Transitions to COMMITTED are protected by trx_t::mutex. */
735 trx_state_t state;
736 #ifdef WITH_WSREP
737 /** whether wsrep_on(mysql_thd) held at the start of transaction */
738 bool wsrep;
is_wsreptrx_t739 bool is_wsrep() const { return UNIV_UNLIKELY(wsrep); }
740 /** true, if BF thread is performing unique secondary index scanning */
741 bool wsrep_UK_scan;
is_wsrep_UK_scantrx_t742 bool is_wsrep_UK_scan() const { return UNIV_UNLIKELY(wsrep_UK_scan); }
743 #else /* WITH_WSREP */
is_wsreptrx_t744 bool is_wsrep() const { return false; }
745 #endif /* WITH_WSREP */
746
747 ReadView read_view; /*!< consistent read view used in the
748 transaction, or NULL if not yet set */
749 trx_lock_t lock; /*!< Information about the transaction
750 locks and state. Protected by
751 lock_sys.mutex (insertions also
752 by trx_t::mutex). */
753
754 /* These fields are not protected by any mutex. */
755
756 /** false=normal transaction, true=recovered (must be rolled back)
757 or disconnected transaction in XA PREPARE STATE.
758
759 This field is accessed by the thread that owns the transaction,
760 without holding any mutex.
761 There is only one foreign-thread access in trx_print_low()
762 and a possible race condition with trx_disconnect_prepared(). */
763 bool is_recovered;
764 const char* op_info; /*!< English text describing the
765 current operation, or an empty
766 string */
767 ulint isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
768 bool check_foreigns; /*!< normally TRUE, but if the user
769 wants to suppress foreign key checks,
770 (in table imports, for example) we
771 set this FALSE */
772 /*------------------------------*/
773 /* MySQL has a transaction coordinator to coordinate two phase
774 commit between multiple storage engines and the binary log. When
775 an engine participates in a transaction, it's responsible for
776 registering itself using the trans_register_ha() API. */
777 bool is_registered; /* This flag is set to true after the
778 transaction has been registered with
779 the coordinator using the XA API, and
780 is set to false after commit or
781 rollback. */
782 /** whether this is holding the prepare mutex */
783 bool active_commit_ordered;
784 /*------------------------------*/
785 bool check_unique_secondary;
786 /*!< normally TRUE, but if the user
787 wants to speed up inserts by
788 suppressing unique key checks
789 for secondary indexes when we decide
790 if we can use the insert buffer for
791 them, we set this FALSE */
792 bool flush_log_later;/* In 2PC, we hold the
793 prepare_commit mutex across
794 both phases. In that case, we
795 defer flush of the logs to disk
796 until after we release the
797 mutex. */
798 bool must_flush_log_later;/*!< set in commit()
799 if flush_log_later was
800 set and redo log was written;
801 in that case we will
802 flush the log in
803 trx_commit_complete_for_mysql() */
804 ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
805 trx_dict_op_t dict_operation; /**< @see enum trx_dict_op_t */
806
807 /* Fields protected by the srv_conc_mutex. */
808 bool declared_to_be_inside_innodb;
809 /*!< this is TRUE if we have declared
810 this transaction in
811 srv_conc_enter_innodb to be inside the
812 InnoDB engine */
813 ib_uint32_t n_tickets_to_enter_innodb;
814 /*!< this can be > 0 only when
815 declared_to_... is TRUE; when we come
816 to srv_conc_innodb_enter, if the value
817 here is > 0, we decrement this by 1 */
818 ib_uint32_t dict_operation_lock_mode;
819 /*!< 0, RW_S_LATCH, or RW_X_LATCH:
820 the latch mode trx currently holds
821 on dict_sys.latch. Protected
822 by dict_sys.latch. */
823
824 /** wall-clock time of the latest transition to TRX_STATE_ACTIVE;
825 used for diagnostic purposes only */
826 time_t start_time;
827 /** microsecond_interval_timer() of transaction start */
828 ulonglong start_time_micro;
829 lsn_t commit_lsn; /*!< lsn at the time of the commit */
830 table_id_t table_id; /*!< Table to drop iff dict_operation
831 == TRX_DICT_OP_TABLE, or 0. */
832 /*------------------------------*/
833 THD* mysql_thd; /*!< MySQL thread handle corresponding
834 to this trx, or NULL */
835
836 const char* mysql_log_file_name;
837 /*!< if MySQL binlog is used, this field
838 contains a pointer to the latest file
839 name; this is NULL if binlog is not
840 used */
841 ulonglong mysql_log_offset;
842 /*!< if MySQL binlog is used, this
843 field contains the end offset of the
844 binlog entry */
845 /*------------------------------*/
846 ib_uint32_t n_mysql_tables_in_use; /*!< number of Innobase tables
847 used in the processing of the current
848 SQL statement in MySQL */
849 ib_uint32_t mysql_n_tables_locked;
850 /*!< how many tables the current SQL
851 statement uses, except those
852 in consistent read */
853 /*------------------------------*/
854 UT_LIST_NODE_T(trx_t) trx_list; /*!< list of all transactions;
855 protected by trx_sys.mutex */
856 /*------------------------------*/
857 dberr_t error_state; /*!< 0 if no error, otherwise error
858 number; NOTE That ONLY the thread
859 doing the transaction is allowed to
860 set this field: this is NOT protected
861 by any mutex */
862 const dict_index_t*error_info; /*!< if the error number indicates a
863 duplicate key error, a pointer to
864 the problematic index is stored here */
865 ulint error_key_num; /*!< if the index creation fails to a
866 duplicate key error, a mysql key
867 number of that index is stored here */
868 que_t* graph; /*!< query currently run in the session,
869 or NULL if none; NOTE that the query
870 belongs to the session, and it can
871 survive over a transaction commit, if
872 it is a stored procedure with a COMMIT
873 WORK statement, for instance */
874 /*------------------------------*/
875 UT_LIST_BASE_NODE_T(trx_named_savept_t)
876 trx_savepoints; /*!< savepoints set with SAVEPOINT ...,
877 oldest first */
878 /*------------------------------*/
879 undo_no_t undo_no; /*!< next undo log record number to
880 assign; since the undo log is
881 private for a transaction, this
882 is a simple ascending sequence
883 with no gaps; thus it represents
884 the number of modified/inserted
885 rows in a transaction */
886 trx_savept_t last_sql_stat_start;
887 /*!< undo_no when the last sql statement
888 was started: in case of an error, trx
889 is rolled back down to this number */
890 trx_rsegs_t rsegs; /* rollback segments for undo logging */
891 undo_no_t roll_limit; /*!< least undo number to undo during
892 a partial rollback; 0 otherwise */
893 bool in_rollback; /*!< true when the transaction is
894 executing a partial or full rollback */
895 ulint pages_undone; /*!< number of undo log pages undone
896 since the last undo log truncation */
897 /*------------------------------*/
898 ulint n_autoinc_rows; /*!< no. of AUTO-INC rows required for
899 an SQL statement. This is useful for
900 multi-row INSERTs */
901 ib_vector_t* autoinc_locks; /* AUTOINC locks held by this
902 transaction. Note that these are
903 also in the lock list trx_locks. This
904 vector needs to be freed explicitly
905 when the trx instance is destroyed.
906 Protected by lock_sys.mutex. */
907 /*------------------------------*/
908 bool read_only; /*!< true if transaction is flagged
909 as a READ-ONLY transaction.
910 if auto_commit && !will_lock
911 then it will be handled as a
912 AC-NL-RO-SELECT (Auto Commit Non-Locking
913 Read Only Select). A read only
914 transaction will not be assigned an
915 UNDO log. */
916 bool auto_commit; /*!< true if it is an autocommit */
917 bool will_lock; /*!< set to inform trx_start_low() that
918 the transaction may acquire locks */
919 /*------------------------------*/
920 fts_trx_t* fts_trx; /*!< FTS information, or NULL if
921 transaction hasn't modified tables
922 with FTS indexes (yet). */
923 doc_id_t fts_next_doc_id;/* The document id used for updates */
924 /*------------------------------*/
925 ib_uint32_t flush_tables; /*!< if "covering" the FLUSH TABLES",
926 count of tables being flushed. */
927
928 /*------------------------------*/
929 bool ddl; /*!< true if it is an internal
930 transaction for DDL */
931 bool internal; /*!< true if it is a system/internal
932 transaction background task. This
933 includes DDL transactions too. Such
934 transactions are always treated as
935 read-write. */
936 /*------------------------------*/
937 #ifdef UNIV_DEBUG
938 unsigned start_line; /*!< Track where it was started from */
939 const char* start_file; /*!< Filename where it was started */
940 #endif /* UNIV_DEBUG */
941
942 XID* xid; /*!< X/Open XA transaction
943 identification to identify a
944 transaction branch */
945 trx_mod_tables_t mod_tables; /*!< List of tables that were modified
946 by this transaction */
947 /*------------------------------*/
948 char* detailed_error; /*!< detailed error message for last
949 error, or empty. */
950 private:
951 /** flush observer used to track flushing of non-redo logged pages
952 during bulk create index */
953 FlushObserver* flush_observer;
954 public:
955 #ifdef WITH_WSREP
956 os_event_t wsrep_event; /* event waited for in srv_conc_slot */
957 #endif /* WITH_WSREP */
958
959 rw_trx_hash_element_t *rw_trx_hash_element;
960 LF_PINS *rw_trx_hash_pins;
961 ulint magic_n;
962
963 /** @return whether any persistent undo log has been generated */
has_logged_persistenttrx_t964 bool has_logged_persistent() const
965 {
966 return(rsegs.m_redo.undo);
967 }
968
969 /** @return whether any undo log has been generated */
has_loggedtrx_t970 bool has_logged() const
971 {
972 return(has_logged_persistent() || rsegs.m_noredo.undo);
973 }
974
975 /** @return rollback segment for modifying temporary tables */
get_temp_rsegtrx_t976 trx_rseg_t* get_temp_rseg()
977 {
978 if (trx_rseg_t* rseg = rsegs.m_noredo.rseg) {
979 ut_ad(id != 0);
980 return(rseg);
981 }
982
983 return(assign_temp_rseg());
984 }
985
986 /** Set the innodb_log_optimize_ddl page flush observer
987 @param[in,out] space tablespace
988 @param[in,out] stage performance_schema accounting */
989 void set_flush_observer(fil_space_t* space, ut_stage_alter_t* stage);
990
991 /** Remove the flush observer */
992 void remove_flush_observer();
993
994 /** @return the flush observer */
get_flush_observertrx_t995 FlushObserver* get_flush_observer() const
996 {
997 return flush_observer;
998 }
999
1000 /** Transition to committed state, to release implicit locks. */
1001 inline void commit_state();
1002
1003 /** Release any explicit locks of a committing transaction. */
1004 inline void release_locks();
1005
1006 /** Evict a table definition due to the rollback of ALTER TABLE.
1007 @param[in] table_id table identifier */
1008 void evict_table(table_id_t table_id);
1009
1010 private:
1011 /** Mark a transaction committed in the main memory data structures. */
1012 inline void commit_in_memory(const mtr_t *mtr);
1013 public:
1014 /** Commit the transaction. */
1015 void commit();
1016
1017 /** Commit the transaction in a mini-transaction.
1018 @param mtr mini-transaction (if there are any persistent modifications) */
1019 void commit_low(mtr_t *mtr= nullptr);
1020
1021
1022
is_referencedtrx_t1023 bool is_referenced() const { return n_ref > 0; }
1024
1025
referencetrx_t1026 void reference()
1027 {
1028 #ifdef UNIV_DEBUG
1029 auto old_n_ref=
1030 #endif
1031 n_ref++;
1032 ut_ad(old_n_ref >= 0);
1033 }
1034
1035
release_referencetrx_t1036 void release_reference()
1037 {
1038 #ifdef UNIV_DEBUG
1039 auto old_n_ref=
1040 #endif
1041 n_ref--;
1042 ut_ad(old_n_ref > 0);
1043 }
1044
1045 /** @return whether the table has lock on
1046 mysql.innodb_table_stats and mysql.innodb_index_stats */
1047 bool has_stats_table_lock() const;
1048
1049 /** Free the memory to trx_pools */
1050 void free();
1051
1052
assert_freedtrx_t1053 void assert_freed() const
1054 {
1055 ut_ad(state == TRX_STATE_NOT_STARTED);
1056 ut_ad(!id);
1057 ut_ad(!has_logged());
1058 ut_ad(!is_referenced());
1059 ut_ad(!is_wsrep());
1060 #ifdef WITH_WSREP
1061 ut_ad(!lock.was_chosen_as_wsrep_victim);
1062 #endif
1063 ut_ad(!read_view.is_open());
1064 ut_ad(!lock.wait_thr);
1065 ut_ad(UT_LIST_GET_LEN(lock.trx_locks) == 0);
1066 ut_ad(lock.table_locks.empty());
1067 ut_ad(!autoinc_locks || ib_vector_is_empty(autoinc_locks));
1068 ut_ad(UT_LIST_GET_LEN(lock.evicted_tables) == 0);
1069 ut_ad(dict_operation == TRX_DICT_OP_NONE);
1070 }
1071
1072
1073 /** @return whether this is a non-locking autocommit transaction */
is_autocommit_non_lockingtrx_t1074 bool is_autocommit_non_locking() const { return auto_commit && !will_lock; }
1075
1076 private:
1077 /** Assign a rollback segment for modifying temporary tables.
1078 @return the assigned rollback segment */
1079 trx_rseg_t *assign_temp_rseg();
1080 };
1081
1082 /**
1083 Check if transaction is started.
1084 @param[in] trx Transaction whose state we need to check
1085 @reutrn true if transaction is in state started */
trx_is_started(const trx_t * trx)1086 inline bool trx_is_started(const trx_t* trx)
1087 {
1088 return trx->state != TRX_STATE_NOT_STARTED;
1089 }
1090
1091 /* Transaction isolation levels (trx->isolation_level) */
1092 #define TRX_ISO_READ_UNCOMMITTED 0 /* dirty read: non-locking
1093 SELECTs are performed so that
1094 we do not look at a possible
1095 earlier version of a record;
1096 thus they are not 'consistent'
1097 reads under this isolation
1098 level; otherwise like level
1099 2 */
1100
1101 #define TRX_ISO_READ_COMMITTED 1 /* somewhat Oracle-like
1102 isolation, except that in
1103 range UPDATE and DELETE we
1104 must block phantom rows
1105 with next-key locks;
1106 SELECT ... FOR UPDATE and ...
1107 LOCK IN SHARE MODE only lock
1108 the index records, NOT the
1109 gaps before them, and thus
1110 allow free inserting;
1111 each consistent read reads its
1112 own snapshot */
1113
1114 #define TRX_ISO_REPEATABLE_READ 2 /* this is the default;
1115 all consistent reads in the
1116 same trx read the same
1117 snapshot;
1118 full next-key locking used
1119 in locking reads to block
1120 insertions into gaps */
1121
1122 #define TRX_ISO_SERIALIZABLE 3 /* all plain SELECTs are
1123 converted to LOCK IN SHARE
1124 MODE reads */
1125
1126 /* Treatment of duplicate values (trx->duplicates; for example, in inserts).
1127 Multiple flags can be combined with bitwise OR. */
1128 #define TRX_DUP_IGNORE 1U /* duplicate rows are to be updated */
1129 #define TRX_DUP_REPLACE 2U /* duplicate rows are to be replaced */
1130
1131
1132 /** Commit node states */
1133 enum commit_node_state {
1134 COMMIT_NODE_SEND = 1, /*!< about to send a commit signal to
1135 the transaction */
1136 COMMIT_NODE_WAIT /*!< commit signal sent to the transaction,
1137 waiting for completion */
1138 };
1139
1140 /** Commit command node in a query graph */
1141 struct commit_node_t{
1142 que_common_t common; /*!< node type: QUE_NODE_COMMIT */
1143 enum commit_node_state
1144 state; /*!< node execution state */
1145 };
1146
1147
1148 /** Test if trx->mutex is owned. */
1149 #define trx_mutex_own(t) mutex_own(&t->mutex)
1150
1151 /** Acquire the trx->mutex. */
1152 #define trx_mutex_enter(t) do { \
1153 mutex_enter(&t->mutex); \
1154 } while (0)
1155
1156 /** Release the trx->mutex. */
1157 #define trx_mutex_exit(t) do { \
1158 mutex_exit(&t->mutex); \
1159 } while (0)
1160
1161 #include "trx0trx.inl"
1162
1163 #endif
1164