1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2015, 2021, MariaDB Corporation.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /**************************************************//**
21 @file include/trx0trx.h
22 The transaction
23 
24 Created 3/26/1996 Heikki Tuuri
25 *******************************************************/
26 
27 #ifndef trx0trx_h
28 #define trx0trx_h
29 
30 #include "trx0types.h"
31 #include "lock0types.h"
32 #include "que0types.h"
33 #include "mem0mem.h"
34 #include "trx0xa.h"
35 #include "ut0vec.h"
36 #include "fts0fts.h"
37 #include "read0types.h"
38 
39 #include <vector>
40 #include <set>
41 
42 // Forward declaration
43 struct mtr_t;
44 class FlushObserver;
45 struct rw_trx_hash_element_t;
46 
47 /******************************************************************//**
48 Set detailed error message for the transaction. */
49 void
50 trx_set_detailed_error(
51 /*===================*/
52 	trx_t*		trx,	/*!< in: transaction struct */
53 	const char*	msg);	/*!< in: detailed error message */
54 /*************************************************************//**
55 Set detailed error message for the transaction from a file. Note that the
56 file is rewinded before reading from it. */
57 void
58 trx_set_detailed_error_from_file(
59 /*=============================*/
60 	trx_t*	trx,	/*!< in: transaction struct */
61 	FILE*	file);	/*!< in: file to read message from */
62 /****************************************************************//**
63 Retrieves the error_info field from a trx.
64 @return the error info */
65 UNIV_INLINE
66 const dict_index_t*
67 trx_get_error_info(
68 /*===============*/
69 	const trx_t*	trx);	/*!< in: trx object */
70 
71 /** @return an allocated transaction */
72 trx_t *trx_create();
73 
74 /** At shutdown, frees a transaction object. */
75 void trx_free_at_shutdown(trx_t *trx);
76 
77 /** Disconnect a prepared transaction from MySQL.
78 @param[in,out]	trx	transaction */
79 void trx_disconnect_prepared(trx_t *trx);
80 
81 /** Initialize (resurrect) transactions at startup. */
82 dberr_t trx_lists_init_at_db_start();
83 
84 /*************************************************************//**
85 Starts the transaction if it is not yet started. */
86 void
87 trx_start_if_not_started_xa_low(
88 /*============================*/
89 	trx_t*	trx,		/*!< in/out: transaction */
90 	bool	read_write);	/*!< in: true if read write transaction */
91 /*************************************************************//**
92 Starts the transaction if it is not yet started. */
93 void
94 trx_start_if_not_started_low(
95 /*=========================*/
96 	trx_t*	trx,		/*!< in/out: transaction */
97 	bool	read_write);	/*!< in: true if read write transaction */
98 
99 /*************************************************************//**
100 Starts a transaction for internal processing. */
101 void
102 trx_start_internal_low(
103 /*===================*/
104 	trx_t*	trx);		/*!< in/out: transaction */
105 
106 /** Starts a read-only transaction for internal processing.
107 @param[in,out] trx	transaction to be started */
108 void
109 trx_start_internal_read_only_low(
110 	trx_t*	trx);
111 
112 #ifdef UNIV_DEBUG
113 #define trx_start_if_not_started_xa(t, rw)			\
114 	do {							\
115 	(t)->start_line = __LINE__;				\
116 	(t)->start_file = __FILE__;				\
117 	trx_start_if_not_started_xa_low((t), rw);		\
118 	} while (false)
119 
120 #define trx_start_if_not_started(t, rw)				\
121 	do {							\
122 	(t)->start_line = __LINE__;				\
123 	(t)->start_file = __FILE__;				\
124 	trx_start_if_not_started_low((t), rw);			\
125 	} while (false)
126 
127 #define trx_start_internal(t)					\
128 	do {							\
129 	(t)->start_line = __LINE__;				\
130 	(t)->start_file = __FILE__;				\
131 	trx_start_internal_low((t));				\
132 	} while (false)
133 
134 #define trx_start_internal_read_only(t)				\
135 	do {							\
136 	(t)->start_line = __LINE__;				\
137 	(t)->start_file = __FILE__;				\
138 	trx_start_internal_read_only_low(t);			\
139 	} while (false)
140 #else
141 #define trx_start_if_not_started(t, rw)				\
142 	trx_start_if_not_started_low((t), rw)
143 
144 #define trx_start_internal(t)					\
145 	trx_start_internal_low((t))
146 
147 #define trx_start_internal_read_only(t)				\
148 	trx_start_internal_read_only_low(t)
149 
150 #define trx_start_if_not_started_xa(t, rw)			\
151 	trx_start_if_not_started_xa_low((t), (rw))
152 #endif /* UNIV_DEBUG */
153 
154 /*************************************************************//**
155 Starts the transaction for a DDL operation. */
156 void
157 trx_start_for_ddl_low(
158 /*==================*/
159 	trx_t*		trx,	/*!< in/out: transaction */
160 	trx_dict_op_t	op);	/*!< in: dictionary operation type */
161 
162 #ifdef UNIV_DEBUG
163 #define trx_start_for_ddl(t, o)					\
164 	do {							\
165 	ut_ad((t)->start_file == 0);				\
166 	(t)->start_line = __LINE__;				\
167 	(t)->start_file = __FILE__;				\
168 	trx_start_for_ddl_low((t), (o));			\
169 	} while (0)
170 #else
171 #define trx_start_for_ddl(t, o)					\
172 	trx_start_for_ddl_low((t), (o))
173 #endif /* UNIV_DEBUG */
174 
175 /****************************************************************//**
176 Commits a transaction. */
177 void
178 trx_commit(
179 /*=======*/
180 	trx_t*	trx);	/*!< in/out: transaction */
181 
182 /** Commit a transaction and a mini-transaction.
183 @param[in,out]	trx	transaction
184 @param[in,out]	mtr	mini-transaction (NULL if no modifications) */
185 void trx_commit_low(trx_t* trx, mtr_t* mtr);
186 /**********************************************************************//**
187 Does the transaction commit for MySQL.
188 @return DB_SUCCESS or error number */
189 dberr_t
190 trx_commit_for_mysql(
191 /*=================*/
192 	trx_t*	trx);	/*!< in/out: transaction */
193 /** XA PREPARE a transaction.
194 @param[in,out]	trx	transaction to prepare */
195 void trx_prepare_for_mysql(trx_t* trx);
196 /**********************************************************************//**
197 This function is used to find number of prepared transactions and
198 their transaction objects for a recovery.
199 @return number of prepared transactions */
200 int
201 trx_recover_for_mysql(
202 /*==================*/
203 	XID*	xid_list,	/*!< in/out: prepared transactions */
204 	uint	len);		/*!< in: number of slots in xid_list */
205 /** Look up an X/Open distributed transaction in XA PREPARE state.
206 @param[in]	xid	X/Open XA transaction identifier
207 @return	transaction on match (the trx_t::xid will be invalidated);
208 note that the trx may have been committed before the caller acquires
209 trx_t::mutex
210 @retval	NULL if no match */
211 trx_t* trx_get_trx_by_xid(const XID* xid);
212 /**********************************************************************//**
213 If required, flushes the log to disk if we called trx_commit_for_mysql()
214 with trx->flush_log_later == TRUE. */
215 void
216 trx_commit_complete_for_mysql(
217 /*==========================*/
218 	trx_t*	trx);	/*!< in/out: transaction */
219 /**********************************************************************//**
220 Marks the latest SQL statement ended. */
221 void
222 trx_mark_sql_stat_end(
223 /*==================*/
224 	trx_t*	trx);	/*!< in: trx handle */
225 /****************************************************************//**
226 Prepares a transaction for commit/rollback. */
227 void
228 trx_commit_or_rollback_prepare(
229 /*===========================*/
230 	trx_t*	trx);	/*!< in/out: transaction */
231 /*********************************************************************//**
232 Creates a commit command node struct.
233 @return own: commit node struct */
234 commit_node_t*
235 trx_commit_node_create(
236 /*===================*/
237 	mem_heap_t*	heap);	/*!< in: mem heap where created */
238 /***********************************************************//**
239 Performs an execution step for a commit type node in a query graph.
240 @return query thread to run next, or NULL */
241 que_thr_t*
242 trx_commit_step(
243 /*============*/
244 	que_thr_t*	thr);	/*!< in: query thread */
245 
246 /**********************************************************************//**
247 Prints info about a transaction.
248 Caller must hold trx_sys.mutex. */
249 void
250 trx_print_low(
251 /*==========*/
252 	FILE*		f,
253 			/*!< in: output stream */
254 	const trx_t*	trx,
255 			/*!< in: transaction */
256 	ulint		max_query_len,
257 			/*!< in: max query length to print,
258 			or 0 to use the default max length */
259 	ulint		n_rec_locks,
260 			/*!< in: lock_number_of_rows_locked(&trx->lock) */
261 	ulint		n_trx_locks,
262 			/*!< in: length of trx->lock.trx_locks */
263 	ulint		heap_size);
264 			/*!< in: mem_heap_get_size(trx->lock.lock_heap) */
265 
266 /**********************************************************************//**
267 Prints info about a transaction.
268 The caller must hold lock_sys.mutex and trx_sys.mutex.
269 When possible, use trx_print() instead. */
270 void
271 trx_print_latched(
272 /*==============*/
273 	FILE*		f,		/*!< in: output stream */
274 	const trx_t*	trx,		/*!< in: transaction */
275 	ulint		max_query_len);	/*!< in: max query length to print,
276 					or 0 to use the default max length */
277 
278 /**********************************************************************//**
279 Prints info about a transaction.
280 Acquires and releases lock_sys.mutex. */
281 void
282 trx_print(
283 /*======*/
284 	FILE*		f,		/*!< in: output stream */
285 	const trx_t*	trx,		/*!< in: transaction */
286 	ulint		max_query_len);	/*!< in: max query length to print,
287 					or 0 to use the default max length */
288 
289 /**********************************************************************//**
290 Determine if a transaction is a dictionary operation.
291 @return dictionary operation mode */
292 UNIV_INLINE
293 enum trx_dict_op_t
294 trx_get_dict_operation(
295 /*===================*/
296 	const trx_t*	trx)	/*!< in: transaction */
297 	MY_ATTRIBUTE((warn_unused_result));
298 /**********************************************************************//**
299 Flag a transaction a dictionary operation. */
300 UNIV_INLINE
301 void
302 trx_set_dict_operation(
303 /*===================*/
304 	trx_t*			trx,	/*!< in/out: transaction */
305 	enum trx_dict_op_t	op);	/*!< in: operation, not
306 					TRX_DICT_OP_NONE */
307 
308 /**********************************************************************//**
309 Determines if a transaction is in the given state.
310 The caller must hold trx_sys.mutex, or it must be the thread
311 that is serving a running transaction.
312 A running RW transaction must be in trx_sys.rw_trx_hash.
313 @return TRUE if trx->state == state */
314 UNIV_INLINE
315 bool
316 trx_state_eq(
317 /*=========*/
318 	const trx_t*	trx,	/*!< in: transaction */
319 	trx_state_t	state,	/*!< in: state;
320 				if state != TRX_STATE_NOT_STARTED
321 				asserts that
322 				trx->state != TRX_STATE_NOT_STARTED */
323 	bool		relaxed = false)
324 				/*!< in: whether to allow
325 				trx->state == TRX_STATE_NOT_STARTED
326 				after an error has been reported */
327 	MY_ATTRIBUTE((nonnull, warn_unused_result));
328 
329 /**********************************************************************//**
330 Determines if the currently running transaction has been interrupted.
331 @return true if interrupted */
332 bool
333 trx_is_interrupted(
334 /*===============*/
335 	const trx_t*	trx);	/*!< in: transaction */
336 
337 /*******************************************************************//**
338 Calculates the "weight" of a transaction. The weight of one transaction
339 is estimated as the number of altered rows + the number of locked rows.
340 @param t transaction
341 @return transaction weight */
342 #define TRX_WEIGHT(t)	((t)->undo_no + UT_LIST_GET_LEN((t)->lock.trx_locks))
343 
344 /*******************************************************************//**
345 Compares the "weight" (or size) of two transactions. Transactions that
346 have edited non-transactional tables are considered heavier than ones
347 that have not.
348 @return true if weight(a) >= weight(b) */
349 bool
350 trx_weight_ge(
351 /*==========*/
352 	const trx_t*	a,	/*!< in: the transaction to be compared */
353 	const trx_t*	b);	/*!< in: the transaction to be compared */
354 /* Maximum length of a string that can be returned by
355 trx_get_que_state_str(). */
356 #define TRX_QUE_STATE_STR_MAX_LEN	12 /* "ROLLING BACK" */
357 
358 /*******************************************************************//**
359 Retrieves transaction's que state in a human readable string. The string
360 should not be free()'d or modified.
361 @return string in the data segment */
362 UNIV_INLINE
363 const char*
364 trx_get_que_state_str(
365 /*==================*/
366 	const trx_t*	trx);	/*!< in: transaction */
367 
368 /** Retreieves the transaction ID.
369 In a given point in time it is guaranteed that IDs of the running
370 transactions are unique. The values returned by this function for readonly
371 transactions may be reused, so a subsequent RO transaction may get the same ID
372 as a RO transaction that existed in the past. The values returned by this
373 function should be used for printing purposes only.
374 @param[in]	trx	transaction whose id to retrieve
375 @return transaction id */
376 UNIV_INLINE
377 trx_id_t
378 trx_get_id_for_print(
379 	const trx_t*	trx);
380 
381 /** Create the trx_t pool */
382 void
383 trx_pool_init();
384 
385 /** Destroy the trx_t pool */
386 void
387 trx_pool_close();
388 
389 /**
390 Set the transaction as a read-write transaction if it is not already
391 tagged as such.
392 @param[in,out] trx	Transaction that needs to be "upgraded" to RW from RO */
393 void
394 trx_set_rw_mode(
395 	trx_t*		trx);
396 
397 /**
398 Transactions that aren't started by the MySQL server don't set
399 the trx_t::mysql_thd field. For such transactions we set the lock
400 wait timeout to 0 instead of the user configured value that comes
401 from innodb_lock_wait_timeout via trx_t::mysql_thd.
402 @param trx transaction
403 @return lock wait timeout in seconds */
404 #define trx_lock_wait_timeout_get(t)					\
405 	((t)->mysql_thd != NULL						\
406 	 ? thd_lock_wait_timeout((t)->mysql_thd)			\
407 	 : 0)
408 
409 typedef std::vector<ib_lock_t*, ut_allocator<ib_lock_t*> >	lock_list;
410 
411 /*******************************************************************//**
412 Latching protocol for trx_lock_t::que_state.  trx_lock_t::que_state
413 captures the state of the query thread during the execution of a query.
414 This is different from a transaction state. The query state of a transaction
415 can be updated asynchronously by other threads.  The other threads can be
416 system threads, like the timeout monitor thread or user threads executing
417 other queries. Another thing to be mindful of is that there is a delay between
418 when a query thread is put into LOCK_WAIT state and before it actually starts
419 waiting.  Between these two events it is possible that the query thread is
420 granted the lock it was waiting for, which implies that the state can be changed
421 asynchronously.
422 
423 All these operations take place within the context of locking. Therefore state
424 changes within the locking code must acquire both the lock mutex and the
425 trx->mutex when changing trx->lock.que_state to TRX_QUE_LOCK_WAIT or
426 trx->lock.wait_lock to non-NULL but when the lock wait ends it is sufficient
427 to only acquire the trx->mutex.
428 To query the state either of the mutexes is sufficient within the locking
429 code and no mutex is required when the query thread is no longer waiting. */
430 
431 /** The locks and state of an active transaction. Protected by
432 lock_sys.mutex, trx->mutex or both. */
433 struct trx_lock_t {
434 	ulint		n_active_thrs;	/*!< number of active query threads */
435 
436 	trx_que_t	que_state;	/*!< valid when trx->state
437 					== TRX_STATE_ACTIVE: TRX_QUE_RUNNING,
438 					TRX_QUE_LOCK_WAIT, ... */
439 
440 	lock_t*		wait_lock;	/*!< if trx execution state is
441 					TRX_QUE_LOCK_WAIT, this points to
442 					the lock request, otherwise this is
443 					NULL; set to non-NULL when holding
444 					both trx->mutex and lock_sys.mutex;
445 					set to NULL when holding
446 					lock_sys.mutex; readers should
447 					hold lock_sys.mutex, except when
448 					they are holding trx->mutex and
449 					wait_lock==NULL */
450 	ib_uint64_t	deadlock_mark;	/*!< A mark field that is initialized
451 					to and checked against lock_mark_counter
452 					by lock_deadlock_recursive(). */
453 	bool		was_chosen_as_deadlock_victim;
454 					/*!< when the transaction decides to
455 					wait for a lock, it sets this to false;
456 					if another transaction chooses this
457 					transaction as a victim in deadlock
458 					resolution, it sets this to true.
459 					Protected by trx->mutex. */
460 	time_t		wait_started;	/*!< lock wait started at this time,
461 					protected only by lock_sys.mutex */
462 
463 	que_thr_t*	wait_thr;	/*!< query thread belonging to this
464 					trx that is in QUE_THR_LOCK_WAIT
465 					state. For threads suspended in a
466 					lock wait, this is protected by
467 					lock_sys.mutex. Otherwise, this may
468 					only be modified by the thread that is
469 					serving the running transaction. */
470 
471 	/** Pre-allocated record locks */
472 	struct {
473 		ib_lock_t lock; byte pad[256];
474 	} rec_pool[8];
475 
476 	/** Pre-allocated table locks */
477 	ib_lock_t	table_pool[8];
478 
479 	/** Next available rec_pool[] entry */
480 	unsigned	rec_cached;
481 
482 	/** Next available table_pool[] entry */
483 	unsigned	table_cached;
484 
485 	mem_heap_t*	lock_heap;	/*!< memory heap for trx_locks;
486 					protected by lock_sys.mutex */
487 
488 	trx_lock_list_t trx_locks;	/*!< locks requested by the transaction;
489 					insertions are protected by trx->mutex
490 					and lock_sys.mutex; removals are
491 					protected by lock_sys.mutex */
492 
493 	lock_list	table_locks;	/*!< All table locks requested by this
494 					transaction, including AUTOINC locks */
495 
496 	bool		cancel;		/*!< true if the transaction is being
497 					rolled back either via deadlock
498 					detection or due to lock timeout. The
499 					caller has to acquire the trx_t::mutex
500 					in order to cancel the locks. In
501 					lock_trx_table_locks_remove() we
502 					check for this cancel of a transaction's
503 					locks and avoid reacquiring the trx
504 					mutex to prevent recursive deadlocks.
505 					Protected by both the lock sys mutex
506 					and the trx_t::mutex. */
507 	ulint		n_rec_locks;	/*!< number of rec locks in this trx */
508 };
509 
510 /** Logical first modification time of a table in a transaction */
511 class trx_mod_table_time_t
512 {
513 	/** First modification of the table */
514 	undo_no_t	first;
515 	/** First modification of a system versioned column */
516 	undo_no_t	first_versioned;
517 
518 	/** Magic value signifying that a system versioned column of a
519 	table was never modified in a transaction. */
520 	static const undo_no_t UNVERSIONED = IB_ID_MAX;
521 
522 public:
523 	/** Constructor
524 	@param[in]	rows	number of modified rows so far */
525 	trx_mod_table_time_t(undo_no_t rows)
526 		: first(rows), first_versioned(UNVERSIONED) {}
527 
528 #ifdef UNIV_DEBUG
529 	/** Validation
530 	@param[in]	rows	number of modified rows so far
531 	@return	whether the object is valid */
532 	bool valid(undo_no_t rows = UNVERSIONED) const
533 	{
534 		return first <= first_versioned && first <= rows;
535 	}
536 #endif /* UNIV_DEBUG */
537 	/** @return if versioned columns were modified */
538 	bool is_versioned() const { return first_versioned != UNVERSIONED; }
539 
540 	/** After writing an undo log record, set is_versioned() if needed
541 	@param[in]	rows	number of modified rows so far */
542 	void set_versioned(undo_no_t rows)
543 	{
544 		ut_ad(!is_versioned());
545 		first_versioned = rows;
546 		ut_ad(valid());
547 	}
548 
549 	/** Invoked after partial rollback
550 	@param[in]	limit	number of surviving modified rows
551 	@return	whether this should be erased from trx_t::mod_tables */
552 	bool rollback(undo_no_t limit)
553 	{
554 		ut_ad(valid());
555 		if (first >= limit) {
556 			return true;
557 		}
558 
559 		if (first_versioned < limit && is_versioned()) {
560 			first_versioned = UNVERSIONED;
561 		}
562 
563 		return false;
564 	}
565 };
566 
567 /** Collection of persistent tables and their first modification
568 in a transaction.
569 We store pointers to the table objects in memory because
570 we know that a table object will not be destroyed while a transaction
571 that modified it is running. */
572 typedef std::map<
573 	dict_table_t*, trx_mod_table_time_t,
574 	std::less<dict_table_t*>,
575 	ut_allocator<std::pair<dict_table_t* const, trx_mod_table_time_t> > >
576 	trx_mod_tables_t;
577 
578 /** The transaction handle
579 
580 Normally, there is a 1:1 relationship between a transaction handle
581 (trx) and a session (client connection). One session is associated
582 with exactly one user transaction. There are some exceptions to this:
583 
584 * For DDL operations, a subtransaction is allocated that modifies the
585 data dictionary tables. Lock waits and deadlocks are prevented by
586 acquiring the dict_operation_lock before starting the subtransaction
587 and releasing it after committing the subtransaction.
588 
589 * The purge system uses a special transaction that is not associated
590 with any session.
591 
592 * If the system crashed or it was quickly shut down while there were
593 transactions in the ACTIVE or PREPARED state, these transactions would
594 no longer be associated with a session when the server is restarted.
595 
596 A session may be served by at most one thread at a time. The serving
597 thread of a session might change in some MySQL implementations.
598 Therefore we do not have os_thread_get_curr_id() assertions in the code.
599 
600 Normally, only the thread that is currently associated with a running
601 transaction may access (read and modify) the trx object, and it may do
602 so without holding any mutex. The following are exceptions to this:
603 
604 * trx_rollback_recovered() may access resurrected (connectionless)
605 transactions (state == TRX_STATE_ACTIVE && is_recovered)
606 while the system is already processing new user transactions (!is_recovered).
607 
608 * trx_print_low() may access transactions not associated with the current
609 thread. The caller must be holding lock_sys.mutex.
610 
611 * When a transaction handle is in the trx_sys.trx_list, some of its fields
612 must not be modified without holding trx->mutex.
613 
614 * The locking code (in particular, lock_deadlock_recursive() and
615 lock_rec_convert_impl_to_expl()) will access transactions associated
616 to other connections. The locks of transactions are protected by
617 lock_sys.mutex (insertions also by trx->mutex). */
618 
619 /** Represents an instance of rollback segment along with its state variables.*/
620 struct trx_undo_ptr_t {
621 	trx_rseg_t*	rseg;		/*!< rollback segment assigned to the
622 					transaction, or NULL if not assigned
623 					yet */
624 	trx_undo_t*	undo;		/*!< pointer to the undo log, or
625 					NULL if nothing logged yet */
626 };
627 
628 /** An instance of temporary rollback segment. */
629 struct trx_temp_undo_t {
630 	/** temporary rollback segment, or NULL if not assigned yet */
631 	trx_rseg_t*	rseg;
632 	/** pointer to the undo log, or NULL if nothing logged yet */
633 	trx_undo_t*	undo;
634 };
635 
636 /** Rollback segments assigned to a transaction for undo logging. */
637 struct trx_rsegs_t {
638 	/** undo log ptr holding reference to a rollback segment that resides in
639 	system/undo tablespace used for undo logging of tables that needs
640 	to be recovered on crash. */
641 	trx_undo_ptr_t	m_redo;
642 
643 	/** undo log for temporary tables; discarded immediately after
644 	transaction commit/rollback */
645 	trx_temp_undo_t	m_noredo;
646 };
647 
648 struct trx_t {
649 private:
650   /**
651     Count of references.
652 
653     We can't release the locks nor commit the transaction until this reference
654     is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify
655     that it is no longer "active".
656   */
657 
658   int32_t n_ref;
659 
660 
661 public:
662 	TrxMutex	mutex;		/*!< Mutex protecting the fields
663 					state and lock (except some fields
664 					of lock, which are protected by
665 					lock_sys.mutex) */
666 
667 	trx_id_t	id;		/*!< transaction id */
668 
669 	trx_id_t	no;		/*!< transaction serialization number:
670 					max trx id shortly before the
671 					transaction is moved to
672 					COMMITTED_IN_MEMORY state.
673 					Protected by trx_sys_t::mutex
674 					when trx is in rw_trx_hash. Initially
675 					set to TRX_ID_MAX. */
676 
677 	/** State of the trx from the point of view of concurrency control
678 	and the valid state transitions.
679 
680 	Possible states:
681 
682 	TRX_STATE_NOT_STARTED
683 	TRX_STATE_ACTIVE
684 	TRX_STATE_PREPARED
685 	TRX_STATE_PREPARED_RECOVERED (special case of TRX_STATE_PREPARED)
686 	TRX_STATE_COMMITTED_IN_MEMORY (alias below COMMITTED)
687 
688 	Valid state transitions are:
689 
690 	Regular transactions:
691 	* NOT_STARTED -> ACTIVE -> COMMITTED -> NOT_STARTED
692 
693 	Auto-commit non-locking read-only:
694 	* NOT_STARTED -> ACTIVE -> NOT_STARTED
695 
696 	XA (2PC):
697 	* NOT_STARTED -> ACTIVE -> PREPARED -> COMMITTED -> NOT_STARTED
698 
699 	Recovered XA:
700 	* NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
701 
702 	Recovered XA followed by XA ROLLBACK:
703 	* NOT_STARTED -> PREPARED -> ACTIVE -> COMMITTED -> (freed)
704 
705 	XA (2PC) (shutdown or disconnect before ROLLBACK or COMMIT):
706 	* NOT_STARTED -> PREPARED -> (freed)
707 
708 	Disconnected XA can become recovered:
709 	* ... -> ACTIVE -> PREPARED (connected) -> PREPARED (disconnected)
710 	Disconnected means from mysql e.g due to the mysql client disconnection.
711 	Latching and various transaction lists membership rules:
712 
713 	XA (2PC) transactions are always treated as non-autocommit.
714 
715 	Transitions to ACTIVE or NOT_STARTED occur when transaction
716 	is not in rw_trx_hash (no trx_sys.mutex needed).
717 
718 	Autocommit non-locking read-only transactions move between states
719 	without holding any mutex. They are not in rw_trx_hash.
720 
721 	All transactions, unless they are determined to be ac-nl-ro,
722 	explicitly tagged as read-only or read-write, will first be put
723 	on the read-only transaction list. Only when a !read-only transaction
724 	in the read-only list tries to acquire an X or IX lock on a table
725 	do we remove it from the read-only list and put it on the read-write
726 	list. During this switch we assign it a rollback segment.
727 
728 	When a transaction is NOT_STARTED, it can be in trx_list. It cannot be
729 	in rw_trx_hash.
730 
731 	ACTIVE->PREPARED->COMMITTED is only possible when trx is in rw_trx_hash.
732 	The transition ACTIVE->PREPARED is protected by trx_sys.mutex.
733 
734 	ACTIVE->COMMITTED is possible when the transaction is in
735 	rw_trx_hash.
736 
737 	Transitions to COMMITTED are protected by trx_t::mutex. */
738 	trx_state_t	state;
739 #ifdef WITH_WSREP
740 	/** whether wsrep_on(mysql_thd) held at the start of transaction */
741 	bool		wsrep;
742 	bool is_wsrep() const { return UNIV_UNLIKELY(wsrep); }
743 	/** true, if BF thread is performing unique secondary index scanning */
744 	bool wsrep_UK_scan;
745 	bool is_wsrep_UK_scan() const { return UNIV_UNLIKELY(wsrep_UK_scan); }
746 #else /* WITH_WSREP */
747 	bool is_wsrep() const { return false; }
748 #endif /* WITH_WSREP */
749 
750 	ReadView	read_view;	/*!< consistent read view used in the
751 					transaction, or NULL if not yet set */
752 	trx_lock_t	lock;		/*!< Information about the transaction
753 					locks and state. Protected by
754 					lock_sys.mutex (insertions also
755 					by trx_t::mutex). */
756 
757 	/* These fields are not protected by any mutex. */
758 
759 	/** false=normal transaction, true=recovered (must be rolled back)
760 	or disconnected transaction in XA PREPARE STATE.
761 
762 	This field is accessed by the thread that owns the transaction,
763 	without holding any mutex.
764 	There is only one foreign-thread access in trx_print_low()
765 	and a possible race condition with trx_disconnect_prepared(). */
766 	bool		is_recovered;
767 	const char*	op_info;	/*!< English text describing the
768 					current operation, or an empty
769 					string */
770 	ulint		isolation_level;/*!< TRX_ISO_REPEATABLE_READ, ... */
771 	bool		check_foreigns;	/*!< normally TRUE, but if the user
772 					wants to suppress foreign key checks,
773 					(in table imports, for example) we
774 					set this FALSE */
775 	/*------------------------------*/
776 	/* MySQL has a transaction coordinator to coordinate two phase
777 	commit between multiple storage engines and the binary log. When
778 	an engine participates in a transaction, it's responsible for
779 	registering itself using the trans_register_ha() API. */
780 	bool		is_registered;	/* This flag is set to true after the
781 					transaction has been registered with
782 					the coordinator using the XA API, and
783 					is set to false  after commit or
784 					rollback. */
785 	/** whether this is holding the prepare mutex */
786 	bool		active_commit_ordered;
787 	/*------------------------------*/
788 	bool		check_unique_secondary;
789 					/*!< normally TRUE, but if the user
790 					wants to speed up inserts by
791 					suppressing unique key checks
792 					for secondary indexes when we decide
793 					if we can use the insert buffer for
794 					them, we set this FALSE */
795 	bool		flush_log_later;/* In 2PC, we hold the
796 					prepare_commit mutex across
797 					both phases. In that case, we
798 					defer flush of the logs to disk
799 					until after we release the
800 					mutex. */
801 	bool		must_flush_log_later;/*!< this flag is set to TRUE in
802 					trx_commit() if flush_log_later was
803 					TRUE, and there were modifications by
804 					the transaction; in that case we must
805 					flush the log in
806 					trx_commit_complete_for_mysql() */
807 	ulint		duplicates;	/*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
808 	trx_dict_op_t	dict_operation;	/**< @see enum trx_dict_op_t */
809 
810 	/* Fields protected by the srv_conc_mutex. */
811 	bool		declared_to_be_inside_innodb;
812 					/*!< this is TRUE if we have declared
813 					this transaction in
814 					srv_conc_enter_innodb to be inside the
815 					InnoDB engine */
816 	ib_uint32_t	n_tickets_to_enter_innodb;
817 					/*!< this can be > 0 only when
818 					declared_to_... is TRUE; when we come
819 					to srv_conc_innodb_enter, if the value
820 					here is > 0, we decrement this by 1 */
821 	ib_uint32_t	dict_operation_lock_mode;
822 					/*!< 0, RW_S_LATCH, or RW_X_LATCH:
823 					the latch mode trx currently holds
824 					on dict_operation_lock. Protected
825 					by dict_operation_lock. */
826 
827 	/** wall-clock time of the latest transition to TRX_STATE_ACTIVE;
828 	used for diagnostic purposes only */
829 	time_t		start_time;
830 	/** microsecond_interval_timer() of transaction start */
831 	ulonglong	start_time_micro;
832 	lsn_t		commit_lsn;	/*!< lsn at the time of the commit */
833 	table_id_t	table_id;	/*!< Table to drop iff dict_operation
834 					== TRX_DICT_OP_TABLE, or 0. */
835 	/*------------------------------*/
836 	THD*		mysql_thd;	/*!< MySQL thread handle corresponding
837 					to this trx, or NULL */
838 
839 	const char*	mysql_log_file_name;
840 					/*!< if MySQL binlog is used, this field
841 					contains a pointer to the latest file
842 					name; this is NULL if binlog is not
843 					used */
844 	ulonglong	mysql_log_offset;
845 					/*!< if MySQL binlog is used, this
846 					field contains the end offset of the
847 					binlog entry */
848 	/*------------------------------*/
849 	ib_uint32_t	n_mysql_tables_in_use; /*!< number of Innobase tables
850 					used in the processing of the current
851 					SQL statement in MySQL */
852 	ib_uint32_t	mysql_n_tables_locked;
853 					/*!< how many tables the current SQL
854 					statement uses, except those
855 					in consistent read */
856 	/*------------------------------*/
857 	UT_LIST_NODE_T(trx_t) trx_list;	/*!< list of all transactions;
858 					protected by trx_sys.mutex */
859 	/*------------------------------*/
860 	dberr_t		error_state;	/*!< 0 if no error, otherwise error
861 					number; NOTE That ONLY the thread
862 					doing the transaction is allowed to
863 					set this field: this is NOT protected
864 					by any mutex */
865 	const dict_index_t*error_info;	/*!< if the error number indicates a
866 					duplicate key error, a pointer to
867 					the problematic index is stored here */
868 	ulint		error_key_num;	/*!< if the index creation fails to a
869 					duplicate key error, a mysql key
870 					number of that index is stored here */
871 	que_t*		graph;		/*!< query currently run in the session,
872 					or NULL if none; NOTE that the query
873 					belongs to the session, and it can
874 					survive over a transaction commit, if
875 					it is a stored procedure with a COMMIT
876 					WORK statement, for instance */
877 	/*------------------------------*/
878 	UT_LIST_BASE_NODE_T(trx_named_savept_t)
879 			trx_savepoints;	/*!< savepoints set with SAVEPOINT ...,
880 					oldest first */
881 	/*------------------------------*/
882 	undo_no_t	undo_no;	/*!< next undo log record number to
883 					assign; since the undo log is
884 					private for a transaction, this
885 					is a simple ascending sequence
886 					with no gaps; thus it represents
887 					the number of modified/inserted
888 					rows in a transaction */
889 	trx_savept_t	last_sql_stat_start;
890 					/*!< undo_no when the last sql statement
891 					was started: in case of an error, trx
892 					is rolled back down to this number */
893 	trx_rsegs_t	rsegs;		/* rollback segments for undo logging */
894 	undo_no_t	roll_limit;	/*!< least undo number to undo during
895 					a partial rollback; 0 otherwise */
896 	bool		in_rollback;	/*!< true when the transaction is
897 					executing a partial or full rollback */
898 	ulint		pages_undone;	/*!< number of undo log pages undone
899 					since the last undo log truncation */
900 	/*------------------------------*/
901 	ulint		n_autoinc_rows;	/*!< no. of AUTO-INC rows required for
902 					an SQL statement. This is useful for
903 					multi-row INSERTs */
904 	ib_vector_t*    autoinc_locks;  /* AUTOINC locks held by this
905 					transaction. Note that these are
906 					also in the lock list trx_locks. This
907 					vector needs to be freed explicitly
908 					when the trx instance is destroyed.
909 					Protected by lock_sys.mutex. */
910 	/*------------------------------*/
911 	bool		read_only;	/*!< true if transaction is flagged
912 					as a READ-ONLY transaction.
913 					if auto_commit && !will_lock
914 					then it will be handled as a
915 					AC-NL-RO-SELECT (Auto Commit Non-Locking
916 					Read Only Select). A read only
917 					transaction will not be assigned an
918 					UNDO log. */
919 	bool		auto_commit;	/*!< true if it is an autocommit */
920 	bool		will_lock;	/*!< set to inform trx_start_low() that
921 					the transaction may acquire locks */
922 	/*------------------------------*/
923 	fts_trx_t*	fts_trx;	/*!< FTS information, or NULL if
924 					transaction hasn't modified tables
925 					with FTS indexes (yet). */
926 	doc_id_t	fts_next_doc_id;/* The document id used for updates */
927 	/*------------------------------*/
928 	ib_uint32_t	flush_tables;	/*!< if "covering" the FLUSH TABLES",
929 					count of tables being flushed. */
930 
931 	/*------------------------------*/
932 	bool		ddl;		/*!< true if it is an internal
933 					transaction for DDL */
934 	bool		internal;	/*!< true if it is a system/internal
935 					transaction background task. This
936 					includes DDL transactions too.  Such
937 					transactions are always treated as
938 					read-write. */
939 	/*------------------------------*/
940 #ifdef UNIV_DEBUG
941 	unsigned	start_line;	/*!< Track where it was started from */
942 	const char*	start_file;	/*!< Filename where it was started */
943 #endif /* UNIV_DEBUG */
944 
945 	XID*		xid;		/*!< X/Open XA transaction
946 					identification to identify a
947 					transaction branch */
948 	trx_mod_tables_t mod_tables;	/*!< List of tables that were modified
949 					by this transaction */
950 	/*------------------------------*/
951 	char*		detailed_error;	/*!< detailed error message for last
952 					error, or empty. */
953 private:
954 	/** flush observer used to track flushing of non-redo logged pages
955 	during bulk create index */
956 	FlushObserver*	flush_observer;
957 public:
958 #ifdef WITH_WSREP
959 	os_event_t	wsrep_event;	/* event waited for in srv_conc_slot */
960 #endif /* WITH_WSREP */
961 
962 	rw_trx_hash_element_t *rw_trx_hash_element;
963 	LF_PINS *rw_trx_hash_pins;
964 	ulint		magic_n;
965 
966 	/** @return whether any persistent undo log has been generated */
967 	bool has_logged_persistent() const
968 	{
969 		return(rsegs.m_redo.undo);
970 	}
971 
972 	/** @return whether any undo log has been generated */
973 	bool has_logged() const
974 	{
975 		return(has_logged_persistent() || rsegs.m_noredo.undo);
976 	}
977 
978 	/** @return rollback segment for modifying temporary tables */
979 	trx_rseg_t* get_temp_rseg()
980 	{
981 		if (trx_rseg_t* rseg = rsegs.m_noredo.rseg) {
982 			ut_ad(id != 0);
983 			return(rseg);
984 		}
985 
986 		return(assign_temp_rseg());
987 	}
988 
989 	/** Set the innodb_log_optimize_ddl page flush observer
990 	@param[in,out]	space	tablespace
991 	@param[in,out]	stage	performance_schema accounting */
992 	void set_flush_observer(fil_space_t* space, ut_stage_alter_t* stage);
993 
994 	/** Remove the flush observer */
995 	void remove_flush_observer();
996 
997 	/** @return the flush observer */
998 	FlushObserver* get_flush_observer() const
999 	{
1000 		return flush_observer;
1001 	}
1002 
1003   /** Transition to committed state, to release implicit locks. */
1004   inline void commit_state();
1005 
1006   /** Release any explicit locks of a committing transaction. */
1007   inline void release_locks();
1008 
1009 
1010   bool is_referenced()
1011   {
1012     return my_atomic_load32_explicit(&n_ref, MY_MEMORY_ORDER_RELAXED) > 0;
1013   }
1014 
1015 
1016   void reference()
1017   {
1018 #ifdef UNIV_DEBUG
1019   int32_t old_n_ref=
1020 #endif
1021     my_atomic_add32_explicit(&n_ref, 1, MY_MEMORY_ORDER_RELAXED);
1022     ut_ad(old_n_ref >= 0);
1023   }
1024 
1025 
1026   void release_reference()
1027   {
1028 #ifdef UNIV_DEBUG
1029   int32_t old_n_ref=
1030 #endif
1031     my_atomic_add32_explicit(&n_ref, -1, MY_MEMORY_ORDER_RELAXED);
1032     ut_ad(old_n_ref > 0);
1033   }
1034 
1035   /** @return whether the table has lock on
1036   mysql.innodb_table_stats and mysql.innodb_index_stats */
1037   bool has_stats_table_lock() const;
1038 
1039   /** Free the memory to trx_pools */
1040   void free();
1041 
1042 
1043   void assert_freed() const
1044   {
1045     ut_ad(state == TRX_STATE_NOT_STARTED);
1046     ut_ad(!id);
1047     ut_ad(!has_logged());
1048     ut_ad(!const_cast<trx_t*>(this)->is_referenced());
1049     ut_ad(!is_wsrep());
1050     ut_ad(!read_view.is_open());
1051     ut_ad(!lock.wait_thr);
1052     ut_ad(UT_LIST_GET_LEN(lock.trx_locks) == 0);
1053     ut_ad(lock.table_locks.empty());
1054     ut_ad(!autoinc_locks || ib_vector_is_empty(autoinc_locks));
1055     ut_ad(dict_operation == TRX_DICT_OP_NONE);
1056   }
1057 
1058   /** @return whether this is a non-locking autocommit transaction */
1059   bool is_autocommit_non_locking() const { return auto_commit && !will_lock; }
1060 
1061 private:
1062   /** Assign a rollback segment for modifying temporary tables.
1063   @return the assigned rollback segment */
1064   trx_rseg_t *assign_temp_rseg();
1065 };
1066 
1067 /**
1068 Check if transaction is started.
1069 @param[in] trx		Transaction whose state we need to check
1070 @reutrn true if transaction is in state started */
1071 inline bool trx_is_started(const trx_t* trx)
1072 {
1073 	return trx->state != TRX_STATE_NOT_STARTED;
1074 }
1075 
1076 /* Transaction isolation levels (trx->isolation_level) */
1077 #define TRX_ISO_READ_UNCOMMITTED	0	/* dirty read: non-locking
1078 						SELECTs are performed so that
1079 						we do not look at a possible
1080 						earlier version of a record;
1081 						thus they are not 'consistent'
1082 						reads under this isolation
1083 						level; otherwise like level
1084 						2 */
1085 
1086 #define TRX_ISO_READ_COMMITTED		1	/* somewhat Oracle-like
1087 						isolation, except that in
1088 						range UPDATE and DELETE we
1089 						must block phantom rows
1090 						with next-key locks;
1091 						SELECT ... FOR UPDATE and ...
1092 						LOCK IN SHARE MODE only lock
1093 						the index records, NOT the
1094 						gaps before them, and thus
1095 						allow free inserting;
1096 						each consistent read reads its
1097 						own snapshot */
1098 
1099 #define TRX_ISO_REPEATABLE_READ		2	/* this is the default;
1100 						all consistent reads in the
1101 						same trx read the same
1102 						snapshot;
1103 						full next-key locking used
1104 						in locking reads to block
1105 						insertions into gaps */
1106 
1107 #define TRX_ISO_SERIALIZABLE		3	/* all plain SELECTs are
1108 						converted to LOCK IN SHARE
1109 						MODE reads */
1110 
1111 /* Treatment of duplicate values (trx->duplicates; for example, in inserts).
1112 Multiple flags can be combined with bitwise OR. */
1113 #define TRX_DUP_IGNORE	1U	/* duplicate rows are to be updated */
1114 #define TRX_DUP_REPLACE	2U	/* duplicate rows are to be replaced */
1115 
1116 
1117 /** Commit node states */
1118 enum commit_node_state {
1119 	COMMIT_NODE_SEND = 1,	/*!< about to send a commit signal to
1120 				the transaction */
1121 	COMMIT_NODE_WAIT	/*!< commit signal sent to the transaction,
1122 				waiting for completion */
1123 };
1124 
1125 /** Commit command node in a query graph */
1126 struct commit_node_t{
1127 	que_common_t	common;	/*!< node type: QUE_NODE_COMMIT */
1128 	enum commit_node_state
1129 			state;	/*!< node execution state */
1130 };
1131 
1132 
1133 /** Test if trx->mutex is owned. */
1134 #define trx_mutex_own(t) mutex_own(&t->mutex)
1135 
1136 /** Acquire the trx->mutex. */
1137 #define trx_mutex_enter(t) do {			\
1138 	mutex_enter(&t->mutex);			\
1139 } while (0)
1140 
1141 /** Release the trx->mutex. */
1142 #define trx_mutex_exit(t) do {			\
1143 	mutex_exit(&t->mutex);			\
1144 } while (0)
1145 
1146 #include "trx0trx.inl"
1147 
1148 #endif
1149