1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2015, 2021, MariaDB Corporation.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /**************************************************//**
21 @file include/trx0trx.h
22 The transaction
23 
24 Created 3/26/1996 Heikki Tuuri
25 *******************************************************/
26 
27 #ifndef trx0trx_h
28 #define trx0trx_h
29 
30 #include "trx0types.h"
31 #include "lock0types.h"
32 #include "que0types.h"
33 #include "mem0mem.h"
34 #include "trx0xa.h"
35 #include "ut0vec.h"
36 #include "fts0fts.h"
37 #include "read0types.h"
38 
39 #include <vector>
40 #include <set>
41 
42 // Forward declaration
43 struct mtr_t;
44 class FlushObserver;
45 struct rw_trx_hash_element_t;
46 
47 /******************************************************************//**
48 Set detailed error message for the transaction. */
49 void
50 trx_set_detailed_error(
51 /*===================*/
52 	trx_t*		trx,	/*!< in: transaction struct */
53 	const char*	msg);	/*!< in: detailed error message */
54 /*************************************************************//**
55 Set detailed error message for the transaction from a file. Note that the
56 file is rewinded before reading from it. */
57 void
58 trx_set_detailed_error_from_file(
59 /*=============================*/
60 	trx_t*	trx,	/*!< in: transaction struct */
61 	FILE*	file);	/*!< in: file to read message from */
62 /****************************************************************//**
63 Retrieves the error_info field from a trx.
64 @return the error info */
65 UNIV_INLINE
66 const dict_index_t*
67 trx_get_error_info(
68 /*===============*/
69 	const trx_t*	trx);	/*!< in: trx object */
70 
71 /** @return an allocated transaction */
72 trx_t *trx_create();
73 
74 /** At shutdown, frees a transaction object. */
75 void trx_free_at_shutdown(trx_t *trx);
76 
77 /** Disconnect a prepared transaction from MySQL.
78 @param[in,out]	trx	transaction */
79 void trx_disconnect_prepared(trx_t *trx);
80 
81 /** Initialize (resurrect) transactions at startup. */
82 dberr_t trx_lists_init_at_db_start();
83 
84 /*************************************************************//**
85 Starts the transaction if it is not yet started. */
86 void
87 trx_start_if_not_started_xa_low(
88 /*============================*/
89 	trx_t*	trx,		/*!< in/out: transaction */
90 	bool	read_write);	/*!< in: true if read write transaction */
91 /*************************************************************//**
92 Starts the transaction if it is not yet started. */
93 void
94 trx_start_if_not_started_low(
95 /*=========================*/
96 	trx_t*	trx,		/*!< in/out: transaction */
97 	bool	read_write);	/*!< in: true if read write transaction */
98 
99 /*************************************************************//**
100 Starts a transaction for internal processing. */
101 void
102 trx_start_internal_low(
103 /*===================*/
104 	trx_t*	trx);		/*!< in/out: transaction */
105 
106 /** Starts a read-only transaction for internal processing.
107 @param[in,out] trx	transaction to be started */
108 void
109 trx_start_internal_read_only_low(
110 	trx_t*	trx);
111 
112 #ifdef UNIV_DEBUG
113 #define trx_start_if_not_started_xa(t, rw)			\
114 	do {							\
115 	(t)->start_line = __LINE__;				\
116 	(t)->start_file = __FILE__;				\
117 	trx_start_if_not_started_xa_low((t), rw);		\
118 	} while (false)
119 
120 #define trx_start_if_not_started(t, rw)				\
121 	do {							\
122 	(t)->start_line = __LINE__;				\
123 	(t)->start_file = __FILE__;				\
124 	trx_start_if_not_started_low((t), rw);			\
125 	} while (false)
126 
127 #define trx_start_internal(t)					\
128 	do {							\
129 	(t)->start_line = __LINE__;				\
130 	(t)->start_file = __FILE__;				\
131 	trx_start_internal_low((t));				\
132 	} while (false)
133 
134 #define trx_start_internal_read_only(t)				\
135 	do {							\
136 	(t)->start_line = __LINE__;				\
137 	(t)->start_file = __FILE__;				\
138 	trx_start_internal_read_only_low(t);			\
139 	} while (false)
140 #else
141 #define trx_start_if_not_started(t, rw)				\
142 	trx_start_if_not_started_low((t), rw)
143 
144 #define trx_start_internal(t)					\
145 	trx_start_internal_low((t))
146 
147 #define trx_start_internal_read_only(t)				\
148 	trx_start_internal_read_only_low(t)
149 
150 #define trx_start_if_not_started_xa(t, rw)			\
151 	trx_start_if_not_started_xa_low((t), (rw))
152 #endif /* UNIV_DEBUG */
153 
154 /*************************************************************//**
155 Starts the transaction for a DDL operation. */
156 void
157 trx_start_for_ddl_low(
158 /*==================*/
159 	trx_t*		trx,	/*!< in/out: transaction */
160 	trx_dict_op_t	op);	/*!< in: dictionary operation type */
161 
162 #ifdef UNIV_DEBUG
163 #define trx_start_for_ddl(t, o)					\
164 	do {							\
165 	ut_ad((t)->start_file == 0);				\
166 	(t)->start_line = __LINE__;				\
167 	(t)->start_file = __FILE__;				\
168 	trx_start_for_ddl_low((t), (o));			\
169 	} while (0)
170 #else
171 #define trx_start_for_ddl(t, o)					\
172 	trx_start_for_ddl_low((t), (o))
173 #endif /* UNIV_DEBUG */
174 
175 /**********************************************************************//**
176 Does the transaction commit for MySQL.
177 @return DB_SUCCESS or error number */
178 dberr_t
179 trx_commit_for_mysql(
180 /*=================*/
181 	trx_t*	trx);	/*!< in/out: transaction */
182 /** XA PREPARE a transaction.
183 @param[in,out]	trx	transaction to prepare */
184 void trx_prepare_for_mysql(trx_t* trx);
185 /**********************************************************************//**
186 This function is used to find number of prepared transactions and
187 their transaction objects for a recovery.
188 @return number of prepared transactions */
189 int
190 trx_recover_for_mysql(
191 /*==================*/
192 	XID*	xid_list,	/*!< in/out: prepared transactions */
193 	uint	len);		/*!< in: number of slots in xid_list */
194 /** Look up an X/Open distributed transaction in XA PREPARE state.
195 @param[in]	xid	X/Open XA transaction identifier
196 @return	transaction on match (the trx_t::xid will be invalidated);
197 note that the trx may have been committed before the caller acquires
198 trx_t::mutex
199 @retval	NULL if no match */
200 trx_t* trx_get_trx_by_xid(const XID* xid);
201 /**********************************************************************//**
202 If required, flushes the log to disk if we called trx_commit_for_mysql()
203 with trx->flush_log_later == TRUE. */
204 void
205 trx_commit_complete_for_mysql(
206 /*==========================*/
207 	trx_t*	trx);	/*!< in/out: transaction */
208 /**********************************************************************//**
209 Marks the latest SQL statement ended. */
210 void
211 trx_mark_sql_stat_end(
212 /*==================*/
213 	trx_t*	trx);	/*!< in: trx handle */
214 /****************************************************************//**
215 Prepares a transaction for commit/rollback. */
216 void
217 trx_commit_or_rollback_prepare(
218 /*===========================*/
219 	trx_t*	trx);	/*!< in/out: transaction */
220 /*********************************************************************//**
221 Creates a commit command node struct.
222 @return own: commit node struct */
223 commit_node_t*
224 trx_commit_node_create(
225 /*===================*/
226 	mem_heap_t*	heap);	/*!< in: mem heap where created */
227 /***********************************************************//**
228 Performs an execution step for a commit type node in a query graph.
229 @return query thread to run next, or NULL */
230 que_thr_t*
231 trx_commit_step(
232 /*============*/
233 	que_thr_t*	thr);	/*!< in: query thread */
234 
235 /**********************************************************************//**
236 Prints info about a transaction.
237 Caller must hold trx_sys.mutex. */
238 void
239 trx_print_low(
240 /*==========*/
241 	FILE*		f,
242 			/*!< in: output stream */
243 	const trx_t*	trx,
244 			/*!< in: transaction */
245 	ulint		max_query_len,
246 			/*!< in: max query length to print,
247 			or 0 to use the default max length */
248 	ulint		n_rec_locks,
249 			/*!< in: lock_number_of_rows_locked(&trx->lock) */
250 	ulint		n_trx_locks,
251 			/*!< in: length of trx->lock.trx_locks */
252 	ulint		heap_size);
253 			/*!< in: mem_heap_get_size(trx->lock.lock_heap) */
254 
255 /**********************************************************************//**
256 Prints info about a transaction.
257 The caller must hold lock_sys.mutex and trx_sys.mutex.
258 When possible, use trx_print() instead. */
259 void
260 trx_print_latched(
261 /*==============*/
262 	FILE*		f,		/*!< in: output stream */
263 	const trx_t*	trx,		/*!< in: transaction */
264 	ulint		max_query_len);	/*!< in: max query length to print,
265 					or 0 to use the default max length */
266 
267 /**********************************************************************//**
268 Prints info about a transaction.
269 Acquires and releases lock_sys.mutex. */
270 void
271 trx_print(
272 /*======*/
273 	FILE*		f,		/*!< in: output stream */
274 	const trx_t*	trx,		/*!< in: transaction */
275 	ulint		max_query_len);	/*!< in: max query length to print,
276 					or 0 to use the default max length */
277 
278 /**********************************************************************//**
279 Determine if a transaction is a dictionary operation.
280 @return dictionary operation mode */
281 UNIV_INLINE
282 enum trx_dict_op_t
283 trx_get_dict_operation(
284 /*===================*/
285 	const trx_t*	trx)	/*!< in: transaction */
286 	MY_ATTRIBUTE((warn_unused_result));
287 /**********************************************************************//**
288 Flag a transaction a dictionary operation. */
289 UNIV_INLINE
290 void
291 trx_set_dict_operation(
292 /*===================*/
293 	trx_t*			trx,	/*!< in/out: transaction */
294 	enum trx_dict_op_t	op);	/*!< in: operation, not
295 					TRX_DICT_OP_NONE */
296 
297 /**********************************************************************//**
298 Determines if a transaction is in the given state.
299 The caller must hold trx_sys.mutex, or it must be the thread
300 that is serving a running transaction.
301 A running RW transaction must be in trx_sys.rw_trx_hash.
302 @return TRUE if trx->state == state */
303 UNIV_INLINE
304 bool
305 trx_state_eq(
306 /*=========*/
307 	const trx_t*	trx,	/*!< in: transaction */
308 	trx_state_t	state,	/*!< in: state;
309 				if state != TRX_STATE_NOT_STARTED
310 				asserts that
311 				trx->state != TRX_STATE_NOT_STARTED */
312 	bool		relaxed = false)
313 				/*!< in: whether to allow
314 				trx->state == TRX_STATE_NOT_STARTED
315 				after an error has been reported */
316 	MY_ATTRIBUTE((nonnull, warn_unused_result));
317 
318 /**********************************************************************//**
319 Determines if the currently running transaction has been interrupted.
320 @return true if interrupted */
321 bool
322 trx_is_interrupted(
323 /*===============*/
324 	const trx_t*	trx);	/*!< in: transaction */
325 
326 /*******************************************************************//**
327 Calculates the "weight" of a transaction. The weight of one transaction
328 is estimated as the number of altered rows + the number of locked rows.
329 @param t transaction
330 @return transaction weight */
331 #define TRX_WEIGHT(t)	((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks))
332 
333 /*******************************************************************//**
334 Compares the "weight" (or size) of two transactions. Transactions that
335 have edited non-transactional tables are considered heavier than ones
336 that have not.
337 @return true if weight(a) >= weight(b) */
338 bool
339 trx_weight_ge(
340 /*==========*/
341 	const trx_t*	a,	/*!< in: the transaction to be compared */
342 	const trx_t*	b);	/*!< in: the transaction to be compared */
343 /* Maximum length of a string that can be returned by
344 trx_get_que_state_str(). */
345 #define TRX_QUE_STATE_STR_MAX_LEN	12 /* "ROLLING BACK" */
346 
347 /*******************************************************************//**
348 Retrieves transaction's que state in a human readable string. The string
349 should not be free()'d or modified.
350 @return string in the data segment */
351 UNIV_INLINE
352 const char*
353 trx_get_que_state_str(
354 /*==================*/
355 	const trx_t*	trx);	/*!< in: transaction */
356 
357 /** Retreieves the transaction ID.
358 In a given point in time it is guaranteed that IDs of the running
359 transactions are unique. The values returned by this function for readonly
360 transactions may be reused, so a subsequent RO transaction may get the same ID
361 as a RO transaction that existed in the past. The values returned by this
362 function should be used for printing purposes only.
363 @param[in]	trx	transaction whose id to retrieve
364 @return transaction id */
365 UNIV_INLINE
366 trx_id_t
367 trx_get_id_for_print(
368 	const trx_t*	trx);
369 
370 /** Create the trx_t pool */
371 void
372 trx_pool_init();
373 
374 /** Destroy the trx_t pool */
375 void
376 trx_pool_close();
377 
378 /**
379 Set the transaction as a read-write transaction if it is not already
380 tagged as such.
381 @param[in,out] trx	Transaction that needs to be "upgraded" to RW from RO */
382 void
383 trx_set_rw_mode(
384 	trx_t*		trx);
385 
386 /**
387 Transactions that aren't started by the MySQL server don't set
388 the trx_t::mysql_thd field. For such transactions we set the lock
389 wait timeout to 0 instead of the user configured value that comes
390 from innodb_lock_wait_timeout via trx_t::mysql_thd.
391 @param trx transaction
392 @return lock wait timeout in seconds */
393 #define trx_lock_wait_timeout_get(t)					\
394 	((t)->mysql_thd != NULL						\
395 	 ? thd_lock_wait_timeout((t)->mysql_thd)			\
396 	 : 0)
397 
398 typedef std::vector<ib_lock_t*, ut_allocator<ib_lock_t*> >	lock_list;
399 
400 /*******************************************************************//**
401 Latching protocol for trx_lock_t::que_state.  trx_lock_t::que_state
402 captures the state of the query thread during the execution of a query.
403 This is different from a transaction state. The query state of a transaction
404 can be updated asynchronously by other threads.  The other threads can be
405 system threads, like the timeout monitor thread or user threads executing
406 other queries. Another thing to be mindful of is that there is a delay between
407 when a query thread is put into LOCK_WAIT state and before it actually starts
408 waiting.  Between these two events it is possible that the query thread is
409 granted the lock it was waiting for, which implies that the state can be changed
410 asynchronously.
411 
412 All these operations take place within the context of locking. Therefore state
413 changes within the locking code must acquire both the lock mutex and the
414 trx->mutex when changing trx->lock.que_state to TRX_QUE_LOCK_WAIT or
415 trx->lock.wait_lock to non-NULL but when the lock wait ends it is sufficient
416 to only acquire the trx->mutex.
417 To query the state either of the mutexes is sufficient within the locking
418 code and no mutex is required when the query thread is no longer waiting. */
419 
420 /** The locks and state of an active transaction. Protected by
421 lock_sys.mutex, trx->mutex or both. */
422 struct trx_lock_t {
423 	ulint		n_active_thrs;	/*!< number of active query threads */
424 
425 	trx_que_t	que_state;	/*!< valid when trx->state
426 					== TRX_STATE_ACTIVE: TRX_QUE_RUNNING,
427 					TRX_QUE_LOCK_WAIT, ... */
428 
429 	lock_t*		wait_lock;	/*!< if trx execution state is
430 					TRX_QUE_LOCK_WAIT, this points to
431 					the lock request, otherwise this is
432 					NULL; set to non-NULL when holding
433 					both trx->mutex and lock_sys.mutex;
434 					set to NULL when holding
435 					lock_sys.mutex; readers should
436 					hold lock_sys.mutex, except when
437 					they are holding trx->mutex and
438 					wait_lock==NULL */
439 	ib_uint64_t	deadlock_mark;	/*!< A mark field that is initialized
440 					to and checked against lock_mark_counter
441 					by lock_deadlock_recursive(). */
442 	bool		was_chosen_as_deadlock_victim;
443 					/*!< when the transaction decides to
444 					wait for a lock, it sets this to false;
445 					if another transaction chooses this
446 					transaction as a victim in deadlock
447 					resolution, it sets this to true.
448 					Protected by trx->mutex. */
449 	time_t		wait_started;	/*!< lock wait started at this time,
450 					protected only by lock_sys.mutex */
451 
452 	que_thr_t*	wait_thr;	/*!< query thread belonging to this
453 					trx that is in QUE_THR_LOCK_WAIT
454 					state. For threads suspended in a
455 					lock wait, this is protected by
456 					lock_sys.mutex. Otherwise, this may
457 					only be modified by the thread that is
458 					serving the running transaction. */
459 #ifdef WITH_WSREP
460 	bool		was_chosen_as_wsrep_victim;
461 					/*!< high priority wsrep thread has
462 					marked this trx to abort */
463 #endif /* WITH_WSREP */
464 
465 	/** Pre-allocated record locks */
466 	struct {
467 		ib_lock_t lock; byte pad[256];
468 	} rec_pool[8];
469 
470 	/** Pre-allocated table locks */
471 	ib_lock_t	table_pool[8];
472 
473 	/** Next available rec_pool[] entry */
474 	unsigned	rec_cached;
475 
476 	/** Next available table_pool[] entry */
477 	unsigned	table_cached;
478 
479 	mem_heap_t*	lock_heap;	/*!< memory heap for trx_locks;
480 					protected by lock_sys.mutex */
481 
482 	trx_lock_list_t trx_locks;	/*!< locks requested by the transaction;
483 					insertions are protected by trx->mutex
484 					and lock_sys.mutex; removals are
485 					protected by lock_sys.mutex */
486 
487 	lock_list	table_locks;	/*!< All table locks requested by this
488 					transaction, including AUTOINC locks */
489 
490 	/** List of pending trx_t::evict_table() */
491 	UT_LIST_BASE_NODE_T(dict_table_t) evicted_tables;
492 
493 	bool		cancel;		/*!< true if the transaction is being
494 					rolled back either via deadlock
495 					detection or due to lock timeout. The
496 					caller has to acquire the trx_t::mutex
497 					in order to cancel the locks. In
498 					lock_trx_table_locks_remove() we
499 					check for this cancel of a transaction's
500 					locks and avoid reacquiring the trx
501 					mutex to prevent recursive deadlocks.
502 					Protected by both the lock sys mutex
503 					and the trx_t::mutex. */
504 	ulint		n_rec_locks;	/*!< number of rec locks in this trx */
505 };
506 
507 /** Logical first modification time of a table in a transaction */
508 class trx_mod_table_time_t
509 {
510 	/** First modification of the table */
511 	undo_no_t	first;
512 	/** First modification of a system versioned column */
513 	undo_no_t	first_versioned;
514 
515 	/** Magic value signifying that a system versioned column of a
516 	table was never modified in a transaction. */
517 	static const undo_no_t UNVERSIONED = IB_ID_MAX;
518 
519 public:
520 	/** Constructor
521 	@param[in]	rows	number of modified rows so far */
trx_mod_table_time_t(undo_no_t rows)522 	trx_mod_table_time_t(undo_no_t rows)
523 		: first(rows), first_versioned(UNVERSIONED) {}
524 
525 #ifdef UNIV_DEBUG
526 	/** Validation
527 	@param[in]	rows	number of modified rows so far
528 	@return	whether the object is valid */
529 	bool valid(undo_no_t rows = UNVERSIONED) const
530 	{
531 		return first <= first_versioned && first <= rows;
532 	}
533 #endif /* UNIV_DEBUG */
534 	/** @return if versioned columns were modified */
is_versioned()535 	bool is_versioned() const { return first_versioned != UNVERSIONED; }
536 
537 	/** After writing an undo log record, set is_versioned() if needed
538 	@param[in]	rows	number of modified rows so far */
set_versioned(undo_no_t rows)539 	void set_versioned(undo_no_t rows)
540 	{
541 		ut_ad(!is_versioned());
542 		first_versioned = rows;
543 		ut_ad(valid());
544 	}
545 
546 	/** Invoked after partial rollback
547 	@param[in]	limit	number of surviving modified rows
548 	@return	whether this should be erased from trx_t::mod_tables */
rollback(undo_no_t limit)549 	bool rollback(undo_no_t limit)
550 	{
551 		ut_ad(valid());
552 		if (first >= limit) {
553 			return true;
554 		}
555 
556 		if (first_versioned < limit && is_versioned()) {
557 			first_versioned = UNVERSIONED;
558 		}
559 
560 		return false;
561 	}
562 };
563 
564 /** Collection of persistent tables and their first modification
565 in a transaction.
566 We store pointers to the table objects in memory because
567 we know that a table object will not be destroyed while a transaction
568 that modified it is running. */
569 typedef std::map<
570 	dict_table_t*, trx_mod_table_time_t,
571 	std::less<dict_table_t*>,
572 	ut_allocator<std::pair<dict_table_t* const, trx_mod_table_time_t> > >
573 	trx_mod_tables_t;
574 
575 /** The transaction handle
576 
577 Normally, there is a 1:1 relationship between a transaction handle
578 (trx) and a session (client connection). One session is associated
579 with exactly one user transaction. There are some exceptions to this:
580 
581 * For DDL operations, a subtransaction is allocated that modifies the
582 data dictionary tables. Lock waits and deadlocks are prevented by
583 acquiring the dict_sys.latch before starting the subtransaction
584 and releasing it after committing the subtransaction.
585 
586 * The purge system uses a special transaction that is not associated
587 with any session.
588 
589 * If the system crashed or it was quickly shut down while there were
590 transactions in the ACTIVE or PREPARED state, these transactions would
591 no longer be associated with a session when the server is restarted.
592 
593 A session may be served by at most one thread at a time. The serving
594 thread of a session might change in some MySQL implementations.
595 Therefore we do not have os_thread_get_curr_id() assertions in the code.
596 
597 Normally, only the thread that is currently associated with a running
598 transaction may access (read and modify) the trx object, and it may do
599 so without holding any mutex. The following are exceptions to this:
600 
601 * trx_rollback_recovered() may access resurrected (connectionless)
602 transactions (state == TRX_STATE_ACTIVE && is_recovered)
603 while the system is already processing new user transactions (!is_recovered).
604 
605 * trx_print_low() may access transactions not associated with the current
606 thread. The caller must be holding lock_sys.mutex.
607 
608 * When a transaction handle is in the trx_sys.trx_list, some of its fields
609 must not be modified without holding trx->mutex.
610 
611 * The locking code (in particular, lock_deadlock_recursive() and
612 lock_rec_convert_impl_to_expl()) will access transactions associated
613 to other connections. The locks of transactions are protected by
614 lock_sys.mutex (insertions also by trx->mutex). */
615 
616 /** Represents an instance of rollback segment along with its state variables.*/
617 struct trx_undo_ptr_t {
618 	trx_rseg_t*	rseg;		/*!< rollback segment assigned to the
619 					transaction, or NULL if not assigned
620 					yet */
621 	trx_undo_t*	undo;		/*!< pointer to the undo log, or
622 					NULL if nothing logged yet */
623 };
624 
625 /** An instance of temporary rollback segment. */
626 struct trx_temp_undo_t {
627 	/** temporary rollback segment, or NULL if not assigned yet */
628 	trx_rseg_t*	rseg;
629 	/** pointer to the undo log, or NULL if nothing logged yet */
630 	trx_undo_t*	undo;
631 };
632 
633 /** Rollback segments assigned to a transaction for undo logging. */
634 struct trx_rsegs_t {
635 	/** undo log ptr holding reference to a rollback segment that resides in
636 	system/undo tablespace used for undo logging of tables that needs
637 	to be recovered on crash. */
638 	trx_undo_ptr_t	m_redo;
639 
640 	/** undo log for temporary tables; discarded immediately after
641 	transaction commit/rollback */
642 	trx_temp_undo_t	m_noredo;
643 };
644 
645 struct trx_t {
646 private:
647   /**
648     Count of references.
649 
650     We can't release the locks nor commit the transaction until this reference
651     is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify
652     that it is no longer "active".
653   */
654 
655   Atomic_counter<int32_t> n_ref;
656 
657 
658 public:
659 	TrxMutex	mutex;		/*!< Mutex protecting the fields
660 					state and lock (except some fields
661 					of lock, which are protected by
662 					lock_sys.mutex) */
663 
664 	trx_id_t	id;		/*!< transaction id */
665 
666 	trx_id_t	no;		/*!< transaction serialization number:
667 					max trx id shortly before the
668 					transaction is moved to
669 					COMMITTED_IN_MEMORY state.
670 					Protected by trx_sys_t::mutex
671 					when trx is in rw_trx_hash. Initially
672 					set to TRX_ID_MAX. */
673 
674 	/** State of the trx from the point of view of concurrency control
675 	and the valid state transitions.
676 
677 	Possible states:
678 
679 	TRX_STATE_NOT_STARTED
680 	TRX_STATE_ACTIVE
681 	TRX_STATE_PREPARED
682 	TRX_STATE_PREPARED_RECOVERED (special case of TRX_STATE_PREPARED)
683 	TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED)
684 
685 	Valid state transitions are:
686 
687 	Regular transactions:
688 	* NOT_STARTED -> ACTIVE -> COMMITTED -> NOT_STARTED
689 
690 	Auto-commit non-locking read-only:
691 	* NOT_STARTED -> ACTIVE -> NOT_STARTED
692 
693 	XA (2PC):
694 	* NOT_STARTED -> ACTIVE -> PREPARED -> COMMITTED -> NOT_STARTED
695 
696 	Recovered XA:
697 	* NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
698 
699 	Recovered XA followed by XA ROLLBACK:
700 	* NOT_STARTED -> PREPARED -> ACTIVE -> COMMITTED -> (freed)
701 
702 	XA (2PC) (shutdown or disconnect before ROLLBACK or COMMIT):
703 	* NOT_STARTED -> PREPARED -> (freed)
704 
705 	Disconnected XA can become recovered:
706 	* ... -> ACTIVE -> PREPARED (connected) -> PREPARED (disconnected)
707 	Disconnected means from mysql e.g due to the mysql client disconnection.
708 	Latching and various transaction lists membership rules:
709 
710 	XA (2PC) transactions are always treated as non-autocommit.
711 
712 	Transitions to ACTIVE or NOT_STARTED occur when transaction
713 	is not in rw_trx_hash (no trx_sys.mutex needed).
714 
715 	Autocommit non-locking read-only transactions move between states
716 	without holding any mutex. They are not in rw_trx_hash.
717 
718 	All transactions, unless they are determined to be ac-nl-ro,
719 	explicitly tagged as read-only or read-write, will first be put
720 	on the read-only transaction list. Only when a !read-only transaction
721 	in the read-only list tries to acquire an X or IX lock on a table
722 	do we remove it from the read-only list and put it on the read-write
723 	list. During this switch we assign it a rollback segment.
724 
725 	When a transaction is NOT_STARTED, it can be in trx_list. It cannot be
726 	in rw_trx_hash.
727 
728 	ACTIVE->PREPARED->COMMITTED is only possible when trx is in rw_trx_hash.
729 	The transition ACTIVE->PREPARED is protected by trx_sys.mutex.
730 
731 	ACTIVE->COMMITTED is possible when the transaction is in
732 	rw_trx_hash.
733 
734 	Transitions to COMMITTED are protected by trx_t::mutex. */
735 	trx_state_t	state;
736 #ifdef WITH_WSREP
737 	/** whether wsrep_on(mysql_thd) held at the start of transaction */
738 	bool		wsrep;
is_wsreptrx_t739 	bool is_wsrep() const { return UNIV_UNLIKELY(wsrep); }
740 	/** true, if BF thread is performing unique secondary index scanning */
741 	bool wsrep_UK_scan;
is_wsrep_UK_scantrx_t742 	bool is_wsrep_UK_scan() const { return UNIV_UNLIKELY(wsrep_UK_scan); }
743 #else /* WITH_WSREP */
is_wsreptrx_t744 	bool is_wsrep() const { return false; }
745 #endif /* WITH_WSREP */
746 
747 	ReadView	read_view;	/*!< consistent read view used in the
748 					transaction, or NULL if not yet set */
749 	trx_lock_t	lock;		/*!< Information about the transaction
750 					locks and state. Protected by
751 					lock_sys.mutex (insertions also
752 					by trx_t::mutex). */
753 
754 	/* These fields are not protected by any mutex. */
755 
756 	/** false=normal transaction, true=recovered (must be rolled back)
757 	or disconnected transaction in XA PREPARE STATE.
758 
759 	This field is accessed by the thread that owns the transaction,
760 	without holding any mutex.
761 	There is only one foreign-thread access in trx_print_low()
762 	and a possible race condition with trx_disconnect_prepared(). */
763 	bool		is_recovered;
764 	const char*	op_info;	/*!< English text describing the
765 					current operation, or an empty
766 					string */
767 	ulint		isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
768 	bool		check_foreigns;	/*!< normally TRUE, but if the user
769 					wants to suppress foreign key checks,
770 					(in table imports, for example) we
771 					set this FALSE */
772 	/*------------------------------*/
773 	/* MySQL has a transaction coordinator to coordinate two phase
774 	commit between multiple storage engines and the binary log. When
775 	an engine participates in a transaction, it's responsible for
776 	registering itself using the trans_register_ha() API. */
777 	bool		is_registered;	/* This flag is set to true after the
778 					transaction has been registered with
779 					the coordinator using the XA API, and
780 					is set to false  after commit or
781 					rollback. */
782 	/** whether this is holding the prepare mutex */
783 	bool		active_commit_ordered;
784 	/*------------------------------*/
785 	bool		check_unique_secondary;
786 					/*!< normally TRUE, but if the user
787 					wants to speed up inserts by
788 					suppressing unique key checks
789 					for secondary indexes when we decide
790 					if we can use the insert buffer for
791 					them, we set this FALSE */
792 	bool		flush_log_later;/* In 2PC, we hold the
793 					prepare_commit mutex across
794 					both phases. In that case, we
795 					defer flush of the logs to disk
796 					until after we release the
797 					mutex. */
798 	bool		must_flush_log_later;/*!< set in commit()
799 					if flush_log_later was
800 					set and redo log was written;
801 					in that case we will
802 					flush the log in
803 					trx_commit_complete_for_mysql() */
804 	ulint		duplicates;	/*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
805 	trx_dict_op_t	dict_operation;	/**< @see enum trx_dict_op_t */
806 
807 	/* Fields protected by the srv_conc_mutex. */
808 	bool		declared_to_be_inside_innodb;
809 					/*!< this is TRUE if we have declared
810 					this transaction in
811 					srv_conc_enter_innodb to be inside the
812 					InnoDB engine */
813 	ib_uint32_t	n_tickets_to_enter_innodb;
814 					/*!< this can be > 0 only when
815 					declared_to_... is TRUE; when we come
816 					to srv_conc_innodb_enter, if the value
817 					here is > 0, we decrement this by 1 */
818 	ib_uint32_t	dict_operation_lock_mode;
819 					/*!< 0, RW_S_LATCH, or RW_X_LATCH:
820 					the latch mode trx currently holds
821 					on dict_sys.latch. Protected
822 					by dict_sys.latch. */
823 
824 	/** wall-clock time of the latest transition to TRX_STATE_ACTIVE;
825 	used for diagnostic purposes only */
826 	time_t		start_time;
827 	/** microsecond_interval_timer() of transaction start */
828 	ulonglong	start_time_micro;
829 	lsn_t		commit_lsn;	/*!< lsn at the time of the commit */
830 	table_id_t	table_id;	/*!< Table to drop iff dict_operation
831 					== TRX_DICT_OP_TABLE, or 0. */
832 	/*------------------------------*/
833 	THD*		mysql_thd;	/*!< MySQL thread handle corresponding
834 					to this trx, or NULL */
835 
836 	const char*	mysql_log_file_name;
837 					/*!< if MySQL binlog is used, this field
838 					contains a pointer to the latest file
839 					name; this is NULL if binlog is not
840 					used */
841 	ulonglong	mysql_log_offset;
842 					/*!< if MySQL binlog is used, this
843 					field contains the end offset of the
844 					binlog entry */
845 	/*------------------------------*/
846 	ib_uint32_t	n_mysql_tables_in_use; /*!< number of Innobase tables
847 					used in the processing of the current
848 					SQL statement in MySQL */
849 	ib_uint32_t	mysql_n_tables_locked;
850 					/*!< how many tables the current SQL
851 					statement uses, except those
852 					in consistent read */
853 	/*------------------------------*/
854 	UT_LIST_NODE_T(trx_t) trx_list;	/*!< list of all transactions;
855 					protected by trx_sys.mutex */
856 	/*------------------------------*/
857 	dberr_t		error_state;	/*!< 0 if no error, otherwise error
858 					number; NOTE That ONLY the thread
859 					doing the transaction is allowed to
860 					set this field: this is NOT protected
861 					by any mutex */
862 	const dict_index_t*error_info;	/*!< if the error number indicates a
863 					duplicate key error, a pointer to
864 					the problematic index is stored here */
865 	ulint		error_key_num;	/*!< if the index creation fails to a
866 					duplicate key error, a mysql key
867 					number of that index is stored here */
868 	que_t*		graph;		/*!< query currently run in the session,
869 					or NULL if none; NOTE that the query
870 					belongs to the session, and it can
871 					survive over a transaction commit, if
872 					it is a stored procedure with a COMMIT
873 					WORK statement, for instance */
874 	/*------------------------------*/
875 	UT_LIST_BASE_NODE_T(trx_named_savept_t)
876 			trx_savepoints;	/*!< savepoints set with SAVEPOINT ...,
877 					oldest first */
878 	/*------------------------------*/
879 	undo_no_t	undo_no;	/*!< next undo log record number to
880 					assign; since the undo log is
881 					private for a transaction, this
882 					is a simple ascending sequence
883 					with no gaps; thus it represents
884 					the number of modified/inserted
885 					rows in a transaction */
886 	trx_savept_t	last_sql_stat_start;
887 					/*!< undo_no when the last sql statement
888 					was started: in case of an error, trx
889 					is rolled back down to this number */
890 	trx_rsegs_t	rsegs;		/* rollback segments for undo logging */
891 	undo_no_t	roll_limit;	/*!< least undo number to undo during
892 					a partial rollback; 0 otherwise */
893 	bool		in_rollback;	/*!< true when the transaction is
894 					executing a partial or full rollback */
895 	ulint		pages_undone;	/*!< number of undo log pages undone
896 					since the last undo log truncation */
897 	/*------------------------------*/
898 	ulint		n_autoinc_rows;	/*!< no. of AUTO-INC rows required for
899 					an SQL statement. This is useful for
900 					multi-row INSERTs */
901 	ib_vector_t*    autoinc_locks;  /* AUTOINC locks held by this
902 					transaction. Note that these are
903 					also in the lock list trx_locks. This
904 					vector needs to be freed explicitly
905 					when the trx instance is destroyed.
906 					Protected by lock_sys.mutex. */
907 	/*------------------------------*/
908 	bool		read_only;	/*!< true if transaction is flagged
909 					as a READ-ONLY transaction.
910 					if auto_commit && !will_lock
911 					then it will be handled as a
912 					AC-NL-RO-SELECT (Auto Commit Non-Locking
913 					Read Only Select). A read only
914 					transaction will not be assigned an
915 					UNDO log. */
916 	bool		auto_commit;	/*!< true if it is an autocommit */
917 	bool		will_lock;	/*!< set to inform trx_start_low() that
918 					the transaction may acquire locks */
919 	/*------------------------------*/
920 	fts_trx_t*	fts_trx;	/*!< FTS information, or NULL if
921 					transaction hasn't modified tables
922 					with FTS indexes (yet). */
923 	doc_id_t	fts_next_doc_id;/* The document id used for updates */
924 	/*------------------------------*/
925 	ib_uint32_t	flush_tables;	/*!< if "covering" the FLUSH TABLES",
926 					count of tables being flushed. */
927 
928 	/*------------------------------*/
929 	bool		ddl;		/*!< true if it is an internal
930 					transaction for DDL */
931 	bool		internal;	/*!< true if it is a system/internal
932 					transaction background task. This
933 					includes DDL transactions too.  Such
934 					transactions are always treated as
935 					read-write. */
936 	/*------------------------------*/
937 #ifdef UNIV_DEBUG
938 	unsigned	start_line;	/*!< Track where it was started from */
939 	const char*	start_file;	/*!< Filename where it was started */
940 #endif /* UNIV_DEBUG */
941 
942 	XID*		xid;		/*!< X/Open XA transaction
943 					identification to identify a
944 					transaction branch */
945 	trx_mod_tables_t mod_tables;	/*!< List of tables that were modified
946 					by this transaction */
947 	/*------------------------------*/
948 	char*		detailed_error;	/*!< detailed error message for last
949 					error, or empty. */
950 private:
951 	/** flush observer used to track flushing of non-redo logged pages
952 	during bulk create index */
953 	FlushObserver*	flush_observer;
954 public:
955 #ifdef WITH_WSREP
956 	os_event_t	wsrep_event;	/* event waited for in srv_conc_slot */
957 #endif /* WITH_WSREP */
958 
959 	rw_trx_hash_element_t *rw_trx_hash_element;
960 	LF_PINS *rw_trx_hash_pins;
961 	ulint		magic_n;
962 
963 	/** @return whether any persistent undo log has been generated */
has_logged_persistenttrx_t964 	bool has_logged_persistent() const
965 	{
966 		return(rsegs.m_redo.undo);
967 	}
968 
969 	/** @return whether any undo log has been generated */
has_loggedtrx_t970 	bool has_logged() const
971 	{
972 		return(has_logged_persistent() || rsegs.m_noredo.undo);
973 	}
974 
975 	/** @return rollback segment for modifying temporary tables */
get_temp_rsegtrx_t976 	trx_rseg_t* get_temp_rseg()
977 	{
978 		if (trx_rseg_t* rseg = rsegs.m_noredo.rseg) {
979 			ut_ad(id != 0);
980 			return(rseg);
981 		}
982 
983 		return(assign_temp_rseg());
984 	}
985 
986 	/** Set the innodb_log_optimize_ddl page flush observer
987 	@param[in,out]	space	tablespace
988 	@param[in,out]	stage	performance_schema accounting */
989 	void set_flush_observer(fil_space_t* space, ut_stage_alter_t* stage);
990 
991 	/** Remove the flush observer */
992 	void remove_flush_observer();
993 
994 	/** @return the flush observer */
get_flush_observertrx_t995 	FlushObserver* get_flush_observer() const
996 	{
997 		return flush_observer;
998 	}
999 
1000   /** Transition to committed state, to release implicit locks. */
1001   inline void commit_state();
1002 
1003   /** Release any explicit locks of a committing transaction. */
1004   inline void release_locks();
1005 
1006   /** Evict a table definition due to the rollback of ALTER TABLE.
1007   @param[in]	table_id	table identifier */
1008   void evict_table(table_id_t table_id);
1009 
1010 private:
1011   /** Mark a transaction committed in the main memory data structures. */
1012   inline void commit_in_memory(const mtr_t *mtr);
1013 public:
1014   /** Commit the transaction. */
1015   void commit();
1016 
1017   /** Commit the transaction in a mini-transaction.
1018   @param mtr  mini-transaction (if there are any persistent modifications) */
1019   void commit_low(mtr_t *mtr= nullptr);
1020 
1021 
1022 
is_referencedtrx_t1023   bool is_referenced() const { return n_ref > 0; }
1024 
1025 
referencetrx_t1026   void reference()
1027   {
1028 #ifdef UNIV_DEBUG
1029     auto old_n_ref=
1030 #endif
1031     n_ref++;
1032     ut_ad(old_n_ref >= 0);
1033   }
1034 
1035 
release_referencetrx_t1036   void release_reference()
1037   {
1038 #ifdef UNIV_DEBUG
1039     auto old_n_ref=
1040 #endif
1041     n_ref--;
1042     ut_ad(old_n_ref > 0);
1043   }
1044 
1045   /** @return whether the table has lock on
1046   mysql.innodb_table_stats and mysql.innodb_index_stats */
1047   bool has_stats_table_lock() const;
1048 
1049   /** Free the memory to trx_pools */
1050   void free();
1051 
1052 
assert_freedtrx_t1053   void assert_freed() const
1054   {
1055     ut_ad(state == TRX_STATE_NOT_STARTED);
1056     ut_ad(!id);
1057     ut_ad(!has_logged());
1058     ut_ad(!is_referenced());
1059     ut_ad(!is_wsrep());
1060 #ifdef WITH_WSREP
1061     ut_ad(!lock.was_chosen_as_wsrep_victim);
1062 #endif
1063     ut_ad(!read_view.is_open());
1064     ut_ad(!lock.wait_thr);
1065     ut_ad(UT_LIST_GET_LEN(lock.trx_locks) == 0);
1066     ut_ad(lock.table_locks.empty());
1067     ut_ad(!autoinc_locks || ib_vector_is_empty(autoinc_locks));
1068     ut_ad(UT_LIST_GET_LEN(lock.evicted_tables) == 0);
1069     ut_ad(dict_operation == TRX_DICT_OP_NONE);
1070   }
1071 
1072 
1073   /** @return whether this is a non-locking autocommit transaction */
is_autocommit_non_lockingtrx_t1074   bool is_autocommit_non_locking() const { return auto_commit && !will_lock; }
1075 
1076 private:
1077   /** Assign a rollback segment for modifying temporary tables.
1078   @return the assigned rollback segment */
1079   trx_rseg_t *assign_temp_rseg();
1080 };
1081 
1082 /**
1083 Check if transaction is started.
1084 @param[in] trx		Transaction whose state we need to check
1085 @reutrn true if transaction is in state started */
trx_is_started(const trx_t * trx)1086 inline bool trx_is_started(const trx_t* trx)
1087 {
1088 	return trx->state != TRX_STATE_NOT_STARTED;
1089 }
1090 
1091 /* Transaction isolation levels (trx->isolation_level) */
1092 #define TRX_ISO_READ_UNCOMMITTED	0	/* dirty read: non-locking
1093 						SELECTs are performed so that
1094 						we do not look at a possible
1095 						earlier version of a record;
1096 						thus they are not 'consistent'
1097 						reads under this isolation
1098 						level; otherwise like level
1099 						2 */
1100 
1101 #define TRX_ISO_READ_COMMITTED		1	/* somewhat Oracle-like
1102 						isolation, except that in
1103 						range UPDATE and DELETE we
1104 						must block phantom rows
1105 						with next-key locks;
1106 						SELECT ... FOR UPDATE and ...
1107 						LOCK IN SHARE MODE only lock
1108 						the index records, NOT the
1109 						gaps before them, and thus
1110 						allow free inserting;
1111 						each consistent read reads its
1112 						own snapshot */
1113 
1114 #define TRX_ISO_REPEATABLE_READ		2	/* this is the default;
1115 						all consistent reads in the
1116 						same trx read the same
1117 						snapshot;
1118 						full next-key locking used
1119 						in locking reads to block
1120 						insertions into gaps */
1121 
1122 #define TRX_ISO_SERIALIZABLE		3	/* all plain SELECTs are
1123 						converted to LOCK IN SHARE
1124 						MODE reads */
1125 
1126 /* Treatment of duplicate values (trx->duplicates; for example, in inserts).
1127 Multiple flags can be combined with bitwise OR. */
1128 #define TRX_DUP_IGNORE	1U	/* duplicate rows are to be updated */
1129 #define TRX_DUP_REPLACE	2U	/* duplicate rows are to be replaced */
1130 
1131 
1132 /** Commit node states */
1133 enum commit_node_state {
1134 	COMMIT_NODE_SEND = 1,	/*!< about to send a commit signal to
1135 				the transaction */
1136 	COMMIT_NODE_WAIT	/*!< commit signal sent to the transaction,
1137 				waiting for completion */
1138 };
1139 
1140 /** Commit command node in a query graph */
1141 struct commit_node_t{
1142 	que_common_t	common;	/*!< node type: QUE_NODE_COMMIT */
1143 	enum commit_node_state
1144 			state;	/*!< node execution state */
1145 };
1146 
1147 
1148 /** Test if trx->mutex is owned. */
1149 #define trx_mutex_own(t) mutex_own(&t->mutex)
1150 
1151 /** Acquire the trx->mutex. */
1152 #define trx_mutex_enter(t) do {			\
1153 	mutex_enter(&t->mutex);			\
1154 } while (0)
1155 
1156 /** Release the trx->mutex. */
1157 #define trx_mutex_exit(t) do {			\
1158 	mutex_exit(&t->mutex);			\
1159 } while (0)
1160 
1161 #include "trx0trx.inl"
1162 
1163 #endif
1164