1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file trx/trx0trx.cc
29 The transaction
30 
31 Created 3/26/1996 Heikki Tuuri
32 *******************************************************/
33 
34 #include "trx0trx.h"
35 
36 #ifdef UNIV_NONINL
37 #include "trx0trx.ic"
38 #endif
39 
40 #include "trx0undo.h"
41 #include "trx0rseg.h"
42 #include "log0log.h"
43 #include "que0que.h"
44 #include "lock0lock.h"
45 #include "trx0roll.h"
46 #include "usr0sess.h"
47 #include "read0read.h"
48 #include "srv0srv.h"
49 #include "srv0start.h"
50 #include "btr0sea.h"
51 #include "os0proc.h"
52 #include "trx0xa.h"
53 #include "trx0rec.h"
54 #include "trx0purge.h"
55 #include "ha_prototypes.h"
56 #include "srv0mon.h"
57 #include "ut0vec.h"
58 
59 #include<set>
60 
61 /** Set of table_id */
62 typedef std::set<table_id_t>	table_id_set;
63 
64 /** Dummy session used currently in MySQL interface */
65 UNIV_INTERN sess_t*		trx_dummy_sess = NULL;
66 
67 #ifdef UNIV_PFS_MUTEX
68 /* Key to register the mutex with performance schema */
69 UNIV_INTERN mysql_pfs_key_t	trx_mutex_key;
70 /* Key to register the mutex with performance schema */
71 UNIV_INTERN mysql_pfs_key_t	trx_undo_mutex_key;
72 #endif /* UNIV_PFS_MUTEX */
73 
74 /*************************************************************//**
75 Set detailed error message for the transaction. */
76 UNIV_INTERN
77 void
trx_set_detailed_error(trx_t * trx,const char * msg)78 trx_set_detailed_error(
79 /*===================*/
80 	trx_t*		trx,	/*!< in: transaction struct */
81 	const char*	msg)	/*!< in: detailed error message */
82 {
83 	ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
84 }
85 
86 /*************************************************************//**
87 Set detailed error message for the transaction from a file. Note that the
88 file is rewinded before reading from it. */
89 UNIV_INTERN
90 void
trx_set_detailed_error_from_file(trx_t * trx,FILE * file)91 trx_set_detailed_error_from_file(
92 /*=============================*/
93 	trx_t*	trx,	/*!< in: transaction struct */
94 	FILE*	file)	/*!< in: file to read message from */
95 {
96 	os_file_read_string(file, trx->detailed_error,
97 			    sizeof(trx->detailed_error));
98 }
99 
100 /****************************************************************//**
101 Creates and initializes a transaction object. It must be explicitly
102 started with trx_start_if_not_started() before using it. The default
103 isolation level is TRX_ISO_REPEATABLE_READ.
104 @return transaction instance, should never be NULL */
105 static
106 trx_t*
trx_create(void)107 trx_create(void)
108 /*============*/
109 {
110 	trx_t*		trx;
111 	mem_heap_t*	heap;
112 	ib_alloc_t*	heap_alloc;
113 
114 	trx = static_cast<trx_t*>(mem_zalloc(sizeof(*trx)));
115 
116 	mutex_create(trx_mutex_key, &trx->mutex, SYNC_TRX);
117 
118 	trx->magic_n = TRX_MAGIC_N;
119 
120 	trx->state = TRX_STATE_NOT_STARTED;
121 
122 	trx->isolation_level = TRX_ISO_REPEATABLE_READ;
123 
124 	trx->no = TRX_ID_MAX;
125 
126 	trx->support_xa = TRUE;
127 
128 	trx->check_foreigns = TRUE;
129 	trx->check_unique_secondary = TRUE;
130 
131 	trx->dict_operation = TRX_DICT_OP_NONE;
132 
133 	mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
134 
135 	trx->error_state = DB_SUCCESS;
136 
137 	trx->lock.que_state = TRX_QUE_RUNNING;
138 
139 	trx->lock.lock_heap = mem_heap_create_typed(
140 		256, MEM_HEAP_FOR_LOCK_HEAP);
141 
142 	trx->search_latch_timeout = BTR_SEA_TIMEOUT;
143 
144 	trx->global_read_view_heap = mem_heap_create(256);
145 
146 	trx->xid.formatID = -1;
147 
148 	trx->op_info = "";
149 
150 	trx->api_trx = false;
151 
152 	trx->api_auto_commit = false;
153 
154 	trx->read_write = true;
155 
156 	heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8);
157 	heap_alloc = ib_heap_allocator_create(heap);
158 
159 	/* Remember to free the vector explicitly in trx_free(). */
160 	trx->autoinc_locks = ib_vector_create(heap_alloc, sizeof(void**), 4);
161 
162 	/* Remember to free the vector explicitly in trx_free(). */
163 	heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 128);
164 	heap_alloc = ib_heap_allocator_create(heap);
165 
166 	trx->lock.table_locks = ib_vector_create(
167 		heap_alloc, sizeof(void**), 32);
168 
169 #ifdef WITH_WSREP
170 	trx->wsrep_event = NULL;
171 #endif /* WITH_WSREP */
172  	return(trx);
173 }
174 
175 /********************************************************************//**
176 Creates a transaction object for background operations by the master thread.
177 @return	own: transaction object */
178 UNIV_INTERN
179 trx_t*
trx_allocate_for_background(void)180 trx_allocate_for_background(void)
181 /*=============================*/
182 {
183 	trx_t*	trx;
184 
185 	trx = trx_create();
186 
187 	trx->sess = trx_dummy_sess;
188 
189 	return(trx);
190 }
191 
192 /********************************************************************//**
193 Creates a transaction object for MySQL.
194 @return	own: transaction object */
195 UNIV_INTERN
196 trx_t*
trx_allocate_for_mysql(void)197 trx_allocate_for_mysql(void)
198 /*========================*/
199 {
200 	trx_t*	trx;
201 
202 	trx = trx_allocate_for_background();
203 
204 	mutex_enter(&trx_sys->mutex);
205 
206 	ut_d(trx->in_mysql_trx_list = TRUE);
207 	UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
208 
209 	mutex_exit(&trx_sys->mutex);
210 
211 	return(trx);
212 }
213 
214 /********************************************************************//**
215 Frees a transaction object. */
216 static
217 void
trx_free(trx_t * trx)218 trx_free(
219 /*=====*/
220 	trx_t*	trx)	/*!< in, own: trx object */
221 {
222 	ut_a(trx->magic_n == TRX_MAGIC_N);
223 	ut_ad(!trx->in_ro_trx_list);
224 	ut_ad(!trx->in_rw_trx_list);
225 	ut_ad(!trx->in_mysql_trx_list);
226 
227 	mutex_free(&trx->undo_mutex);
228 
229 	if (trx->undo_no_arr != NULL) {
230 		trx_undo_arr_free(trx->undo_no_arr);
231 	}
232 
233 	ut_a(trx->lock.wait_lock == NULL);
234 	ut_a(trx->lock.wait_thr == NULL);
235 
236 	ut_a(!trx->has_search_latch);
237 
238 	ut_a(trx->dict_operation_lock_mode == 0);
239 
240 	if (trx->lock.lock_heap) {
241 		mem_heap_free(trx->lock.lock_heap);
242 	}
243 
244 	ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
245 
246 	if (trx->global_read_view_heap) {
247 		mem_heap_free(trx->global_read_view_heap);
248 	}
249 
250 	ut_a(ib_vector_is_empty(trx->autoinc_locks));
251 	/* We allocated a dedicated heap for the vector. */
252 	ib_vector_free(trx->autoinc_locks);
253 
254 	if (trx->lock.table_locks != NULL) {
255 		/* We allocated a dedicated heap for the vector. */
256 		ib_vector_free(trx->lock.table_locks);
257 	}
258 
259 	mutex_free(&trx->mutex);
260 
261 	mem_free(trx);
262 }
263 
264 /********************************************************************//**
265 Frees a transaction object of a background operation of the master thread. */
266 UNIV_INTERN
267 void
trx_free_for_background(trx_t * trx)268 trx_free_for_background(
269 /*====================*/
270 	trx_t*	trx)	/*!< in, own: trx object */
271 {
272 	if (trx->declared_to_be_inside_innodb) {
273 
274 		ib_logf(IB_LOG_LEVEL_ERROR,
275 			"Freeing a trx (%p, " TRX_ID_FMT ") which is declared "
276 			"to be processing inside InnoDB", trx, trx->id);
277 
278 		trx_print(stderr, trx, 600);
279 		putc('\n', stderr);
280 
281 		/* This is an error but not a fatal error. We must keep
282 		the counters like srv_conc_n_threads accurate. */
283 		srv_conc_force_exit_innodb(trx);
284 	}
285 
286 	if (trx->n_mysql_tables_in_use != 0
287 	    || trx->mysql_n_tables_locked != 0) {
288 
289 		ib_logf(IB_LOG_LEVEL_ERROR,
290 			"MySQL is freeing a thd though "
291 			"trx->n_mysql_tables_in_use is %lu and "
292 			"trx->mysql_n_tables_locked is %lu.",
293 			(ulong) trx->n_mysql_tables_in_use,
294 			(ulong) trx->mysql_n_tables_locked);
295 
296 		trx_print(stderr, trx, 600);
297 		ut_print_buf(stderr, trx, sizeof(trx_t));
298 		putc('\n', stderr);
299 	}
300 
301 	ut_a(trx->state == TRX_STATE_NOT_STARTED);
302 	ut_a(trx->insert_undo == NULL);
303 	ut_a(trx->update_undo == NULL);
304 	ut_a(trx->read_view == NULL);
305 
306 	trx_free(trx);
307 }
308 
309 /********************************************************************//**
310 At shutdown, frees a transaction object that is in the PREPARED state. */
311 UNIV_INTERN
312 void
trx_free_prepared(trx_t * trx)313 trx_free_prepared(
314 /*==============*/
315 	trx_t*	trx)	/*!< in, own: trx object */
316 {
317 	ut_a(trx_state_eq(trx, TRX_STATE_PREPARED));
318 	ut_a(trx->magic_n == TRX_MAGIC_N);
319 
320 	lock_trx_release_locks(trx);
321 	trx_undo_free_prepared(trx);
322 
323 	assert_trx_in_rw_list(trx);
324 
325 	ut_a(!trx->read_only);
326 
327 	UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
328 	ut_d(trx->in_rw_trx_list = FALSE);
329 
330 	/* Undo trx_resurrect_table_locks(). */
331 	UT_LIST_INIT(trx->lock.trx_locks);
332 
333 	trx_free(trx);
334 }
335 
336 /********************************************************************//**
337 Frees a transaction object for MySQL. */
338 UNIV_INTERN
339 void
trx_free_for_mysql(trx_t * trx)340 trx_free_for_mysql(
341 /*===============*/
342 	trx_t*	trx)	/*!< in, own: trx object */
343 {
344 	mutex_enter(&trx_sys->mutex);
345 
346 	ut_ad(trx->in_mysql_trx_list);
347 	ut_d(trx->in_mysql_trx_list = FALSE);
348 	UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
349 
350 	ut_ad(trx_sys_validate_trx_list());
351 
352 	mutex_exit(&trx_sys->mutex);
353 
354 	trx_free_for_background(trx);
355 }
356 
357 /****************************************************************//**
358 Inserts the trx handle in the trx system trx list in the right position.
359 The list is sorted on the trx id so that the biggest id is at the list
360 start. This function is used at the database startup to insert incomplete
361 transactions to the list. */
362 static
363 void
trx_list_rw_insert_ordered(trx_t * trx)364 trx_list_rw_insert_ordered(
365 /*=======================*/
366 	trx_t*	trx)	/*!< in: trx handle */
367 {
368 	trx_t*	trx2;
369 
370 	ut_ad(!trx->read_only);
371 
372 	ut_d(trx->start_file = __FILE__);
373 	ut_d(trx->start_line = __LINE__);
374 
375 	ut_a(srv_is_being_started);
376 	ut_ad(!trx->in_ro_trx_list);
377 	ut_ad(!trx->in_rw_trx_list);
378 	ut_ad(trx->state != TRX_STATE_NOT_STARTED);
379 	ut_ad(trx->is_recovered);
380 
381 	for (trx2 = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
382 	     trx2 != NULL;
383 	     trx2 = UT_LIST_GET_NEXT(trx_list, trx2)) {
384 
385 		assert_trx_in_rw_list(trx2);
386 
387 		if (trx->id >= trx2->id) {
388 
389 			ut_ad(trx->id > trx2->id);
390 			break;
391 		}
392 	}
393 
394 	if (trx2 != NULL) {
395 		trx2 = UT_LIST_GET_PREV(trx_list, trx2);
396 
397 		if (trx2 == NULL) {
398 			UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
399 		} else {
400 			UT_LIST_INSERT_AFTER(
401 				trx_list, trx_sys->rw_trx_list, trx2, trx);
402 		}
403 	} else {
404 		UT_LIST_ADD_LAST(trx_list, trx_sys->rw_trx_list, trx);
405 	}
406 
407 #ifdef UNIV_DEBUG
408 	if (trx->id > trx_sys->rw_max_trx_id) {
409 		trx_sys->rw_max_trx_id = trx->id;
410 	}
411 #endif /* UNIV_DEBUG */
412 
413 	ut_ad(!trx->in_rw_trx_list);
414 	ut_d(trx->in_rw_trx_list = TRUE);
415 }
416 
417 /****************************************************************//**
418 Resurrect the table locks for a resurrected transaction. */
419 static
420 void
trx_resurrect_table_locks(trx_t * trx,const trx_undo_t * undo)421 trx_resurrect_table_locks(
422 /*======================*/
423 	trx_t*			trx,	/*!< in/out: transaction */
424 	const trx_undo_t*	undo)	/*!< in: undo log */
425 {
426 	mtr_t			mtr;
427 	page_t*			undo_page;
428 	trx_undo_rec_t*		undo_rec;
429 	table_id_set		tables;
430 
431 	ut_ad(undo == trx->insert_undo || undo == trx->update_undo);
432 
433 	if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
434 	    || undo->empty) {
435 		return;
436 	}
437 
438 	mtr_start(&mtr);
439 	/* trx_rseg_mem_create() may have acquired an X-latch on this
440 	page, so we cannot acquire an S-latch. */
441 	undo_page = trx_undo_page_get(
442 		undo->space, undo->zip_size, undo->top_page_no, &mtr);
443 	undo_rec = undo_page + undo->top_offset;
444 
445 	do {
446 		ulint		type;
447 		ulint		cmpl_info;
448 		bool		updated_extern;
449 		undo_no_t	undo_no;
450 		table_id_t	table_id;
451 
452 		page_t*		undo_rec_page = page_align(undo_rec);
453 
454 		if (undo_rec_page != undo_page) {
455 			if (!mtr_memo_release(&mtr,
456 					      buf_block_align(undo_page),
457 					      MTR_MEMO_PAGE_X_FIX)) {
458 				/* The page of the previous undo_rec
459 				should have been latched by
460 				trx_undo_page_get() or
461 				trx_undo_get_prev_rec(). */
462 				ut_ad(0);
463 			}
464 
465 			undo_page = undo_rec_page;
466 		}
467 
468 		trx_undo_rec_get_pars(
469 			undo_rec, &type, &cmpl_info,
470 			&updated_extern, &undo_no, &table_id);
471 		tables.insert(table_id);
472 
473 		undo_rec = trx_undo_get_prev_rec(
474 			undo_rec, undo->hdr_page_no,
475 			undo->hdr_offset, false, &mtr);
476 	} while (undo_rec);
477 
478 	mtr_commit(&mtr);
479 
480 	for (table_id_set::const_iterator i = tables.begin();
481 	     i != tables.end(); i++) {
482 		if (dict_table_t* table = dict_table_open_on_id(
483 			    *i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) {
484 			if (table->ibd_file_missing
485 			    || dict_table_is_temporary(table)) {
486 				mutex_enter(&dict_sys->mutex);
487 				dict_table_close(table, TRUE, FALSE);
488 				dict_table_remove_from_cache(table);
489 				mutex_exit(&dict_sys->mutex);
490 				continue;
491 			}
492 
493 			lock_table_ix_resurrect(table, trx);
494 
495 			DBUG_PRINT("ib_trx",
496 				   ("resurrect" TRX_ID_FMT
497 				    "  table '%s' IX lock from %s undo",
498 				    trx->id, table->name,
499 				    undo == trx->insert_undo
500 				    ? "insert" : "update"));
501 
502 			dict_table_close(table, FALSE, FALSE);
503 		}
504 	}
505 }
506 
507 /****************************************************************//**
508 Resurrect the transactions that were doing inserts the time of the
509 crash, they need to be undone.
510 @return trx_t instance  */
511 static
512 trx_t*
trx_resurrect_insert(trx_undo_t * undo,trx_rseg_t * rseg)513 trx_resurrect_insert(
514 /*=================*/
515 	trx_undo_t*	undo,		/*!< in: entry to UNDO */
516 	trx_rseg_t*	rseg)		/*!< in: rollback segment */
517 {
518 	trx_t*		trx;
519 
520 	trx = trx_allocate_for_background();
521 
522 	trx->rseg = rseg;
523 	trx->xid = undo->xid;
524 	trx->id = undo->trx_id;
525 	trx->insert_undo = undo;
526 	trx->is_recovered = TRUE;
527 
528 	/* This is single-threaded startup code, we do not need the
529 	protection of trx->mutex or trx_sys->mutex here. */
530 
531 	if (undo->state != TRX_UNDO_ACTIVE) {
532 
533 		/* Prepared transactions are left in the prepared state
534 		waiting for a commit or abort decision from MySQL */
535 
536 		if (undo->state == TRX_UNDO_PREPARED) {
537 
538 			fprintf(stderr,
539 				"InnoDB: Transaction " TRX_ID_FMT " was in the"
540 				" XA prepared state.\n", trx->id);
541 
542 			if (srv_force_recovery == 0) {
543 
544 				trx->state = TRX_STATE_PREPARED;
545 				trx_sys->n_prepared_trx++;
546 				trx_sys->n_prepared_recovered_trx++;
547 			} else {
548 				fprintf(stderr,
549 					"InnoDB: Since innodb_force_recovery"
550 					" > 0, we will rollback it anyway.\n");
551 
552 				trx->state = TRX_STATE_ACTIVE;
553 			}
554 		} else {
555 			trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
556 		}
557 
558 		/* We give a dummy value for the trx no; this should have no
559 		relevance since purge is not interested in committed
560 		transaction numbers, unless they are in the history
561 		list, in which case it looks the number from the disk based
562 		undo log structure */
563 
564 		trx->no = trx->id;
565 	} else {
566 		trx->state = TRX_STATE_ACTIVE;
567 
568 		/* A running transaction always has the number
569 		field inited to TRX_ID_MAX */
570 
571 		trx->no = TRX_ID_MAX;
572 	}
573 
574 	/* trx_start_low() is not called with resurrect, so need to initialize
575 	start time here.*/
576 	if (trx->state == TRX_STATE_ACTIVE
577 	    || trx->state == TRX_STATE_PREPARED) {
578 		trx->start_time = ut_time();
579 	}
580 
581 	if (undo->dict_operation) {
582 		trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
583 		trx->table_id = undo->table_id;
584 	}
585 
586 	if (!undo->empty) {
587 		trx->undo_no = undo->top_undo_no + 1;
588 	}
589 
590 	return(trx);
591 }
592 
593 /****************************************************************//**
594 Prepared transactions are left in the prepared state waiting for a
595 commit or abort decision from MySQL */
596 static
597 void
trx_resurrect_update_in_prepared_state(trx_t * trx,const trx_undo_t * undo)598 trx_resurrect_update_in_prepared_state(
599 /*===================================*/
600 	trx_t*			trx,	/*!< in,out: transaction */
601 	const trx_undo_t*	undo)	/*!< in: update UNDO record */
602 {
603 	/* This is single-threaded startup code, we do not need the
604 	protection of trx->mutex or trx_sys->mutex here. */
605 
606 	if (undo->state == TRX_UNDO_PREPARED) {
607 		fprintf(stderr,
608 			"InnoDB: Transaction " TRX_ID_FMT
609 			" was in the XA prepared state.\n", trx->id);
610 
611 		if (srv_force_recovery == 0) {
612 			if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
613 				trx_sys->n_prepared_trx++;
614 				trx_sys->n_prepared_recovered_trx++;
615 			} else {
616 				ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
617 			}
618 
619 			trx->state = TRX_STATE_PREPARED;
620 		} else {
621 			fprintf(stderr,
622 				"InnoDB: Since innodb_force_recovery"
623 				" > 0, we will rollback it anyway.\n");
624 
625 			trx->state = TRX_STATE_ACTIVE;
626 		}
627 	} else {
628 		trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
629 	}
630 }
631 
632 /****************************************************************//**
633 Resurrect the transactions that were doing updates the time of the
634 crash, they need to be undone. */
635 static
636 void
trx_resurrect_update(trx_t * trx,trx_undo_t * undo,trx_rseg_t * rseg)637 trx_resurrect_update(
638 /*=================*/
639 	trx_t*		trx,	/*!< in/out: transaction */
640 	trx_undo_t*	undo,	/*!< in/out: update UNDO record */
641 	trx_rseg_t*	rseg)	/*!< in/out: rollback segment */
642 {
643 	trx->rseg = rseg;
644 	trx->xid = undo->xid;
645 	trx->id = undo->trx_id;
646 	trx->update_undo = undo;
647 	trx->is_recovered = TRUE;
648 
649 	/* This is single-threaded startup code, we do not need the
650 	protection of trx->mutex or trx_sys->mutex here. */
651 
652 	if (undo->state != TRX_UNDO_ACTIVE) {
653 		trx_resurrect_update_in_prepared_state(trx, undo);
654 
655 		/* We give a dummy value for the trx number */
656 
657 		trx->no = trx->id;
658 
659 	} else {
660 		trx->state = TRX_STATE_ACTIVE;
661 
662 		/* A running transaction always has the number field inited to
663 		TRX_ID_MAX */
664 
665 		trx->no = TRX_ID_MAX;
666 	}
667 
668 	/* trx_start_low() is not called with resurrect, so need to initialize
669 	start time here.*/
670 	if (trx->state == TRX_STATE_ACTIVE
671 	    || trx->state == TRX_STATE_PREPARED) {
672 		trx->start_time = ut_time();
673 	}
674 
675 	if (undo->dict_operation) {
676 		trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
677 		trx->table_id = undo->table_id;
678 	}
679 
680 	if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
681 
682 		trx->undo_no = undo->top_undo_no + 1;
683 	}
684 }
685 
686 /****************************************************************//**
687 Creates trx objects for transactions and initializes the trx list of
688 trx_sys at database start. Rollback segment and undo log lists must
689 already exist when this function is called, because the lists of
690 transactions to be rolled back or cleaned up are built based on the
691 undo log lists. */
692 UNIV_INTERN
693 void
trx_lists_init_at_db_start(void)694 trx_lists_init_at_db_start(void)
695 /*============================*/
696 {
697 	ulint		i;
698 
699 	ut_a(srv_is_being_started);
700 
701 	UT_LIST_INIT(trx_sys->ro_trx_list);
702 	UT_LIST_INIT(trx_sys->rw_trx_list);
703 
704 	/* Look from the rollback segments if there exist undo logs for
705 	transactions */
706 
707 	for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
708 		trx_undo_t*	undo;
709 		trx_rseg_t*	rseg;
710 
711 		rseg = trx_sys->rseg_array[i];
712 
713 		if (rseg == NULL) {
714 			continue;
715 		}
716 
717 		/* Resurrect transactions that were doing inserts. */
718 		for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
719 		     undo != NULL;
720 		     undo = UT_LIST_GET_NEXT(undo_list, undo)) {
721 			trx_t*	trx;
722 
723 			trx = trx_resurrect_insert(undo, rseg);
724 
725 			trx_list_rw_insert_ordered(trx);
726 
727 			trx_resurrect_table_locks(trx, undo);
728 		}
729 
730 		/* Ressurrect transactions that were doing updates. */
731 		for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
732 		     undo != NULL;
733 		     undo = UT_LIST_GET_NEXT(undo_list, undo)) {
734 			trx_t*	trx;
735 			ibool	trx_created;
736 
737 			/* Check the trx_sys->rw_trx_list first. */
738 			mutex_enter(&trx_sys->mutex);
739 			trx = trx_get_rw_trx_by_id(undo->trx_id);
740 			mutex_exit(&trx_sys->mutex);
741 
742 			if (trx == NULL) {
743 				trx = trx_allocate_for_background();
744 				trx_created = TRUE;
745 			} else {
746 				trx_created = FALSE;
747 			}
748 
749 			trx_resurrect_update(trx, undo, rseg);
750 
751 			if (trx_created) {
752 				trx_list_rw_insert_ordered(trx);
753 			}
754 
755 			trx_resurrect_table_locks(trx, undo);
756 		}
757 	}
758 }
759 
760 /******************************************************************//**
761 Assigns a rollback segment to a transaction in a round-robin fashion.
762 @return	assigned rollback segment instance */
763 static
764 trx_rseg_t*
trx_assign_rseg_low(ulong max_undo_logs,ulint n_tablespaces)765 trx_assign_rseg_low(
766 /*================*/
767 	ulong	max_undo_logs,	/*!< in: maximum number of UNDO logs to use */
768 	ulint	n_tablespaces)	/*!< in: number of rollback tablespaces */
769 {
770 	ulint		i;
771 	trx_rseg_t*	rseg;
772 	static ulint	latest_rseg = 0;
773 
774 	if (srv_read_only_mode) {
775 		ut_a(max_undo_logs == ULONG_UNDEFINED);
776 		return(NULL);
777 	}
778 
779 	/* This breaks true round robin but that should be OK. */
780 
781 	ut_a(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS);
782 
783 	i = latest_rseg++;
784         i %= max_undo_logs;
785 
786 	/* Note: The assumption here is that there can't be any gaps in
787 	the array. Once we implement more flexible rollback segment
788 	management this may not hold. The assertion checks for that case. */
789 
790 	if (trx_sys->rseg_array[0] == NULL) {
791 		return(NULL);
792 	}
793 
794 	/* Skip the system tablespace if we have more than one tablespace
795 	defined for rollback segments. We want all UNDO records to be in
796 	the non-system tablespaces. */
797 
798 	do {
799 		rseg = trx_sys->rseg_array[i];
800 		ut_a(rseg == NULL || i == rseg->id);
801 
802 		i = (rseg == NULL) ? 0 : i + 1;
803 
804 	} while (rseg == NULL
805 		 || (rseg->space == 0
806 		     && n_tablespaces > 0
807 		     && trx_sys->rseg_array[1] != NULL));
808 
809 	return(rseg);
810 }
811 
812 /****************************************************************//**
813 Assign a read-only transaction a rollback-segment, if it is attempting
814 to write to a TEMPORARY table. */
815 UNIV_INTERN
816 void
trx_assign_rseg(trx_t * trx)817 trx_assign_rseg(
818 /*============*/
819 	trx_t*		trx)		/*!< A read-only transaction that
820 					needs to be assigned a RBS. */
821 {
822 	ut_a(trx->rseg == 0);
823 	ut_a(trx->read_only);
824 	ut_a(!srv_read_only_mode);
825 	ut_a(!trx_is_autocommit_non_locking(trx));
826 
827 	trx->rseg = trx_assign_rseg_low(srv_undo_logs, srv_undo_tablespaces);
828 }
829 
830 /****************************************************************//**
831 Starts a transaction. */
832 static
833 void
trx_start_low(trx_t * trx)834 trx_start_low(
835 /*==========*/
836 	trx_t*	trx)		/*!< in: transaction */
837 {
838 	ut_ad(trx->rseg == NULL);
839 
840 	ut_ad(trx->start_file != 0);
841 	ut_ad(trx->start_line != 0);
842 	ut_ad(!trx->is_recovered);
843 	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
844 	ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
845 
846 	/* Check whether it is an AUTOCOMMIT SELECT */
847 	trx->auto_commit = (trx->api_trx && trx->api_auto_commit)
848 			   || thd_trx_is_auto_commit(trx->mysql_thd);
849 
850 	trx->read_only =
851 		(trx->api_trx && !trx->read_write)
852 		|| (!trx->ddl && thd_trx_is_read_only(trx->mysql_thd))
853 		|| srv_read_only_mode;
854 
855 	if (!trx->auto_commit) {
856 		++trx->will_lock;
857 	} else if (trx->will_lock == 0) {
858 		trx->read_only = TRUE;
859 	}
860 
861 	if (!trx->read_only) {
862 		trx->rseg = trx_assign_rseg_low(
863 			srv_undo_logs, srv_undo_tablespaces);
864 	}
865 
866 #ifdef WITH_WSREP
867         memset(&trx->xid, 0, sizeof(trx->xid));
868         trx->xid.formatID = -1;
869 #endif /* WITH_WSREP */
870 
871 	/* The initial value for trx->no: TRX_ID_MAX is used in
872 	read_view_open_now: */
873 
874 	trx->no = TRX_ID_MAX;
875 
876 	ut_a(ib_vector_is_empty(trx->autoinc_locks));
877 	ut_a(ib_vector_is_empty(trx->lock.table_locks));
878 
879 	mutex_enter(&trx_sys->mutex);
880 
881 	/* If this transaction came from trx_allocate_for_mysql(),
882 	trx->in_mysql_trx_list would hold. In that case, the trx->state
883 	change must be protected by the trx_sys->mutex, so that
884 	lock_print_info_all_transactions() will have a consistent view. */
885 
886 	trx->state = TRX_STATE_ACTIVE;
887 
888 	trx->id = trx_sys_get_new_trx_id();
889 
890 	ut_ad(!trx->in_rw_trx_list);
891 	ut_ad(!trx->in_ro_trx_list);
892 
893 	if (trx->read_only) {
894 
895 		/* Note: The trx_sys_t::ro_trx_list doesn't really need to
896 		be ordered, we should exploit this using a list type that
897 		doesn't need a list wide lock to increase concurrency. */
898 
899 		if (!trx_is_autocommit_non_locking(trx)) {
900 			UT_LIST_ADD_FIRST(trx_list, trx_sys->ro_trx_list, trx);
901 			ut_d(trx->in_ro_trx_list = TRUE);
902 		}
903 	} else {
904 
905 		ut_ad(trx->rseg != NULL
906 		      || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
907 
908 		ut_ad(!trx_is_autocommit_non_locking(trx));
909 		UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
910 		ut_d(trx->in_rw_trx_list = TRUE);
911 #ifdef UNIV_DEBUG
912 		if (trx->id > trx_sys->rw_max_trx_id) {
913 			trx_sys->rw_max_trx_id = trx->id;
914 		}
915 #endif /* UNIV_DEBUG */
916 	}
917 
918 	ut_ad(trx_sys_validate_trx_list());
919 
920 	mutex_exit(&trx_sys->mutex);
921 
922 	trx->start_time = ut_time();
923 
924 	MONITOR_INC(MONITOR_TRX_ACTIVE);
925 }
926 
927 /****************************************************************//**
928 Set the transaction serialisation number. */
929 static
930 void
trx_serialisation_number_get(trx_t * trx)931 trx_serialisation_number_get(
932 /*=========================*/
933 	trx_t*		trx)	/*!< in: transaction */
934 {
935 	trx_rseg_t*	rseg;
936 
937 	rseg = trx->rseg;
938 
939 	ut_ad(mutex_own(&rseg->mutex));
940 
941 	mutex_enter(&trx_sys->mutex);
942 
943 	trx->no = trx_sys_get_new_trx_id();
944 
945 	/* If the rollack segment is not empty then the
946 	new trx_t::no can't be less than any trx_t::no
947 	already in the rollback segment. User threads only
948 	produce events when a rollback segment is empty. */
949 
950 	if (rseg->last_page_no == FIL_NULL) {
951 		void*		ptr;
952 		rseg_queue_t	rseg_queue;
953 
954 		rseg_queue.rseg = rseg;
955 		rseg_queue.trx_no = trx->no;
956 
957 		mutex_enter(&purge_sys->bh_mutex);
958 
959 		/* This is to reduce the pressure on the trx_sys_t::mutex
960 		though in reality it should make very little (read no)
961 		difference because this code path is only taken when the
962 		rbs is empty. */
963 
964 		mutex_exit(&trx_sys->mutex);
965 
966 		ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
967 		ut_a(ptr);
968 
969 		mutex_exit(&purge_sys->bh_mutex);
970 	} else {
971 		mutex_exit(&trx_sys->mutex);
972 	}
973 }
974 
975 /****************************************************************//**
976 Assign the transaction its history serialisation number and write the
977 update UNDO log record to the assigned rollback segment. */
978 static MY_ATTRIBUTE((nonnull))
979 void
trx_write_serialisation_history(trx_t * trx,mtr_t * mtr)980 trx_write_serialisation_history(
981 /*============================*/
982 	trx_t*		trx,	/*!< in/out: transaction */
983 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
984 {
985 #ifdef WITH_WSREP
986         trx_sysf_t* sys_header;
987 #endif /* WITH_WSREP */
988 	trx_rseg_t*	rseg;
989 
990 	rseg = trx->rseg;
991 
992 	/* Change the undo log segment states from TRX_UNDO_ACTIVE
993 	to some other state: these modifications to the file data
994 	structure define the transaction as committed in the file
995 	based domain, at the serialization point of the log sequence
996 	number lsn obtained below. */
997 
998 	if (trx->update_undo != NULL) {
999 		page_t*		undo_hdr_page;
1000 		trx_undo_t*	undo = trx->update_undo;
1001 
1002 		/* We have to hold the rseg mutex because update
1003 		log headers have to be put to the history list in the
1004 		(serialisation) order of the UNDO trx number. This is
1005 		required for the purge in-memory data structures too. */
1006 
1007 		mutex_enter(&rseg->mutex);
1008 
1009 		/* Assign the transaction serialisation number and also
1010 		update the purge min binary heap if this is the first
1011 		UNDO log being written to the assigned rollback segment. */
1012 
1013 		trx_serialisation_number_get(trx);
1014 
1015 		/* It is not necessary to obtain trx->undo_mutex here
1016 		because only a single OS thread is allowed to do the
1017 		transaction commit for this transaction. */
1018 
1019 		undo_hdr_page = trx_undo_set_state_at_finish(undo, mtr);
1020 
1021 		trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
1022 	} else {
1023 		mutex_enter(&rseg->mutex);
1024 	}
1025 
1026 	if (trx->insert_undo != NULL) {
1027 		trx_undo_set_state_at_finish(trx->insert_undo, mtr);
1028 	}
1029 
1030 	mutex_exit(&rseg->mutex);
1031 
1032 	MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
1033 
1034 #ifdef WITH_WSREP
1035         sys_header = trx_sysf_get(mtr);
1036         /* Update latest MySQL wsrep XID in trx sys header. */
1037         if (wsrep_is_wsrep_xid(&trx->xid))
1038         {
1039             trx_sys_update_wsrep_checkpoint(&trx->xid, sys_header, mtr);
1040         }
1041 #endif /* WITH_WSREP */
1042 
1043 	/* Update the latest MySQL binlog name and offset info
1044 	in trx sys header if MySQL binlogging is on or the database
1045 	server is a MySQL replication slave */
1046 
1047 	if (trx->mysql_log_file_name
1048 	    && trx->mysql_log_file_name[0] != '\0') {
1049 
1050 		trx_sys_update_mysql_binlog_offset(
1051 			trx->mysql_log_file_name,
1052 			trx->mysql_log_offset,
1053 #ifdef WITH_WSREP
1054 			TRX_SYS_MYSQL_LOG_INFO, sys_header, mtr);
1055 #else
1056 			TRX_SYS_MYSQL_LOG_INFO, mtr);
1057 #endif /* WITH_WSREP */
1058 
1059 		trx->mysql_log_file_name = NULL;
1060 	}
1061 }
1062 
1063 /********************************************************************
1064 Finalize a transaction containing updates for a FTS table. */
1065 static MY_ATTRIBUTE((nonnull))
1066 void
trx_finalize_for_fts_table(fts_trx_table_t * ftt)1067 trx_finalize_for_fts_table(
1068 /*=======================*/
1069         fts_trx_table_t*        ftt)            /* in: FTS trx table */
1070 {
1071 	fts_t*                  fts = ftt->table->fts;
1072 	fts_doc_ids_t*          doc_ids = ftt->added_doc_ids;
1073 
1074 	mutex_enter(&fts->bg_threads_mutex);
1075 
1076 	if (fts->fts_status & BG_THREAD_STOP) {
1077 		/* The table is about to be dropped, no use
1078 		adding anything to its work queue. */
1079 
1080 		mutex_exit(&fts->bg_threads_mutex);
1081 	} else {
1082 		mem_heap_t*     heap;
1083 		mutex_exit(&fts->bg_threads_mutex);
1084 
1085 		ut_a(fts->add_wq);
1086 
1087 		heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg);
1088 
1089 		ib_wqueue_add(fts->add_wq, doc_ids, heap);
1090 
1091 		/* fts_trx_table_t no longer owns the list. */
1092 		ftt->added_doc_ids = NULL;
1093 	}
1094 }
1095 
1096 /******************************************************************//**
1097 Finalize a transaction containing updates to FTS tables. */
1098 static MY_ATTRIBUTE((nonnull))
1099 void
trx_finalize_for_fts(trx_t * trx,bool is_commit)1100 trx_finalize_for_fts(
1101 /*=================*/
1102 	trx_t*	trx,		/*!< in/out: transaction */
1103 	bool	is_commit)	/*!< in: true if the transaction was
1104 				committed, false if it was rolled back. */
1105 {
1106 	if (is_commit) {
1107 		const ib_rbt_node_t*	node;
1108 		ib_rbt_t*		tables;
1109 		fts_savepoint_t*	savepoint;
1110 
1111 		savepoint = static_cast<fts_savepoint_t*>(
1112 			ib_vector_last(trx->fts_trx->savepoints));
1113 
1114 		tables = savepoint->tables;
1115 
1116 		for (node = rbt_first(tables);
1117 		     node;
1118 		     node = rbt_next(tables, node)) {
1119 			fts_trx_table_t**	ftt;
1120 
1121 			ftt = rbt_value(fts_trx_table_t*, node);
1122 
1123 			if ((*ftt)->added_doc_ids) {
1124 				trx_finalize_for_fts_table(*ftt);
1125 			}
1126 		}
1127 	}
1128 
1129 	fts_trx_free(trx->fts_trx);
1130 	trx->fts_trx = NULL;
1131 }
1132 
1133 /**********************************************************************//**
1134 If required, flushes the log to disk based on the value of
1135 innodb_flush_log_at_trx_commit. */
1136 static
1137 void
trx_flush_log_if_needed_low(lsn_t lsn)1138 trx_flush_log_if_needed_low(
1139 /*========================*/
1140 	lsn_t	lsn)	/*!< in: lsn up to which logs are to be
1141 			flushed. */
1142 {
1143 	switch (srv_flush_log_at_trx_commit) {
1144 	case 0:
1145 		/* Do nothing */
1146 		break;
1147 	case 1:
1148 		/* Write the log and optionally flush it to disk */
1149 		log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
1150 				srv_unix_file_flush_method != SRV_UNIX_NOSYNC);
1151 		break;
1152 	case 2:
1153 		/* Write the log but do not flush it to disk */
1154 		log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
1155 
1156 		break;
1157 	default:
1158 		ut_error;
1159 	}
1160 }
1161 
1162 /**********************************************************************//**
1163 If required, flushes the log to disk based on the value of
1164 innodb_flush_log_at_trx_commit. */
1165 static MY_ATTRIBUTE((nonnull))
1166 void
trx_flush_log_if_needed(lsn_t lsn,trx_t * trx)1167 trx_flush_log_if_needed(
1168 /*====================*/
1169 	lsn_t	lsn,	/*!< in: lsn up to which logs are to be
1170 			flushed. */
1171 	trx_t*	trx)	/*!< in/out: transaction */
1172 {
1173 	trx->op_info = "flushing log";
1174 	trx_flush_log_if_needed_low(lsn);
1175 	trx->op_info = "";
1176 }
1177 
1178 /****************************************************************//**
1179 Commits a transaction in memory. */
1180 static MY_ATTRIBUTE((nonnull))
1181 void
trx_commit_in_memory(trx_t * trx,lsn_t lsn)1182 trx_commit_in_memory(
1183 /*=================*/
1184 	trx_t*	trx,	/*!< in/out: transaction */
1185 	lsn_t	lsn)	/*!< in: log sequence number of the mini-transaction
1186 			commit of trx_write_serialisation_history(), or 0
1187 			if the transaction did not modify anything */
1188 {
1189 	trx->must_flush_log_later = FALSE;
1190 
1191 	if (trx_is_autocommit_non_locking(trx)) {
1192 		ut_ad(trx->read_only);
1193 		ut_a(!trx->is_recovered);
1194 		ut_ad(trx->rseg == NULL);
1195 		ut_ad(!trx->in_ro_trx_list);
1196 		ut_ad(!trx->in_rw_trx_list);
1197 
1198 		/* Note: We are asserting without holding the lock mutex. But
1199 		that is OK because this transaction is not waiting and cannot
1200 		be rolled back and no new locks can (or should not) be added
1201 		becuase it is flagged as a non-locking read-only transaction. */
1202 
1203 		ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1204 
1205 		/* This state change is not protected by any mutex, therefore
1206 		there is an inherent race here around state transition during
1207 		printouts. We ignore this race for the sake of efficiency.
1208 		However, the trx_sys_t::mutex will protect the trx_t instance
1209 		and it cannot be removed from the mysql_trx_list and freed
1210 		without first acquiring the trx_sys_t::mutex. */
1211 
1212 		ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
1213 
1214 		trx->state = TRX_STATE_NOT_STARTED;
1215 
1216 		read_view_remove(trx->global_read_view, false);
1217 
1218 		MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
1219 	} else {
1220 		lock_trx_release_locks(trx);
1221 
1222 		/* Remove the transaction from the list of active
1223 		transactions now that it no longer holds any user locks. */
1224 
1225 		ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
1226 
1227 		mutex_enter(&trx_sys->mutex);
1228 
1229 		assert_trx_in_list(trx);
1230 
1231 		if (trx->read_only) {
1232 			UT_LIST_REMOVE(trx_list, trx_sys->ro_trx_list, trx);
1233 			ut_d(trx->in_ro_trx_list = FALSE);
1234 			MONITOR_INC(MONITOR_TRX_RO_COMMIT);
1235 		} else {
1236 			UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
1237 			ut_d(trx->in_rw_trx_list = FALSE);
1238 			MONITOR_INC(MONITOR_TRX_RW_COMMIT);
1239 		}
1240 
1241 		/* If this transaction came from trx_allocate_for_mysql(),
1242 		trx->in_mysql_trx_list would hold. In that case, the
1243 		trx->state change must be protected by trx_sys->mutex, so that
1244 		lock_print_info_all_transactions() will have a consistent
1245 		view. */
1246 
1247 		trx->state = TRX_STATE_NOT_STARTED;
1248 
1249 		/* We already own the trx_sys_t::mutex, by doing it here we
1250 		avoid a potential context switch later. */
1251 		read_view_remove(trx->global_read_view, true);
1252 
1253 		ut_ad(trx_sys_validate_trx_list());
1254 
1255 		mutex_exit(&trx_sys->mutex);
1256 	}
1257 
1258 	if (trx->global_read_view != NULL) {
1259 
1260 		mem_heap_empty(trx->global_read_view_heap);
1261 
1262 		trx->global_read_view = NULL;
1263 	}
1264 
1265 	trx->read_view = NULL;
1266 
1267 	if (lsn) {
1268 		if (trx->insert_undo != NULL) {
1269 
1270 			trx_undo_insert_cleanup(trx);
1271 		}
1272 
1273 		/* NOTE that we could possibly make a group commit more
1274 		efficient here: call os_thread_yield here to allow also other
1275 		trxs to come to commit! */
1276 
1277 		/*-------------------------------------*/
1278 
1279 		/* Depending on the my.cnf options, we may now write the log
1280 		buffer to the log files, making the transaction durable if
1281 		the OS does not crash. We may also flush the log files to
1282 		disk, making the transaction durable also at an OS crash or a
1283 		power outage.
1284 
1285 		The idea in InnoDB's group commit is that a group of
1286 		transactions gather behind a trx doing a physical disk write
1287 		to log files, and when that physical write has been completed,
1288 		one of those transactions does a write which commits the whole
1289 		group. Note that this group commit will only bring benefit if
1290 		there are > 2 users in the database. Then at least 2 users can
1291 		gather behind one doing the physical log write to disk.
1292 
1293 		If we are calling trx_commit() under prepare_commit_mutex, we
1294 		will delay possible log write and flush to a separate function
1295 		trx_commit_complete_for_mysql(), which is only called when the
1296 		thread has released the mutex. This is to make the
1297 		group commit algorithm to work. Otherwise, the prepare_commit
1298 		mutex would serialize all commits and prevent a group of
1299 		transactions from gathering. */
1300 
1301 		if (trx->flush_log_later) {
1302 			/* Do nothing yet */
1303 			trx->must_flush_log_later = TRUE;
1304 		} else if (srv_flush_log_at_trx_commit == 0
1305 			   || thd_requested_durability(trx->mysql_thd)
1306 			   == HA_IGNORE_DURABILITY) {
1307 			/* Do nothing */
1308 		} else {
1309 			trx_flush_log_if_needed(lsn, trx);
1310 		}
1311 
1312 		trx->commit_lsn = lsn;
1313 
1314 		/* Tell server some activity has happened, since the trx
1315 		does changes something. Background utility threads like
1316 		master thread, purge thread or page_cleaner thread might
1317 		have some work to do. */
1318 		srv_active_wake_master_thread();
1319 	}
1320 
1321 	/* undo_no is non-zero if we're doing the final commit. */
1322 	bool			not_rollback = trx->undo_no != 0;
1323 	/* Free all savepoints, starting from the first. */
1324 	trx_named_savept_t*	savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
1325 	trx_roll_savepoints_free(trx, savep);
1326 
1327 	trx->rseg = NULL;
1328 	trx->undo_no = 0;
1329 	trx->last_sql_stat_start.least_undo_no = 0;
1330 
1331 	trx->ddl = false;
1332 #ifdef UNIV_DEBUG
1333 	ut_ad(trx->start_file != 0);
1334 	ut_ad(trx->start_line != 0);
1335 	trx->start_file = 0;
1336 	trx->start_line = 0;
1337 #endif /* UNIV_DEBUG */
1338 
1339 	trx->will_lock = 0;
1340 	trx->read_only = FALSE;
1341 	trx->auto_commit = FALSE;
1342 
1343         if (trx->fts_trx) {
1344                 trx_finalize_for_fts(trx, not_rollback);
1345         }
1346 
1347 	ut_ad(trx->lock.wait_thr == NULL);
1348 	ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1349 	ut_ad(!trx->in_ro_trx_list);
1350 	ut_ad(!trx->in_rw_trx_list);
1351 
1352 #ifdef WITH_WSREP
1353 	if (wsrep_on(trx->mysql_thd)) {
1354 		trx->lock.was_chosen_as_deadlock_victim = FALSE;
1355 	}
1356 #endif
1357 	trx->dict_operation = TRX_DICT_OP_NONE;
1358 
1359 	trx->error_state = DB_SUCCESS;
1360 
1361 	/* trx->in_mysql_trx_list would hold between
1362 	trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
1363 	hold for recovered transactions or system transactions. */
1364 }
1365 
1366 /****************************************************************//**
1367 Commits a transaction and a mini-transaction. */
1368 UNIV_INTERN
1369 void
trx_commit_low(trx_t * trx,mtr_t * mtr)1370 trx_commit_low(
1371 /*===========*/
1372 	trx_t*	trx,	/*!< in/out: transaction */
1373 	mtr_t*	mtr)	/*!< in/out: mini-transaction (will be committed),
1374 			or NULL if trx made no modifications */
1375 {
1376 	lsn_t	lsn;
1377 
1378 	assert_trx_nonlocking_or_in_list(trx);
1379 	ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
1380 	ut_ad(!mtr || mtr->state == MTR_ACTIVE);
1381 	ut_ad(!mtr == !(trx->insert_undo || trx->update_undo));
1382 
1383 	/* undo_no is non-zero if we're doing the final commit. */
1384 	if (trx->fts_trx && trx->undo_no != 0) {
1385 		dberr_t	error;
1386 
1387 		ut_a(!trx_is_autocommit_non_locking(trx));
1388 
1389 		error = fts_commit(trx);
1390 
1391 		/* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY
1392 		instead of dying. This is a possible scenario if there
1393 		is a crash between insert to DELETED table committing
1394 		and transaction committing. The fix would be able to
1395 		return error from this function */
1396 		if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) {
1397 			/* FTS-FIXME: once we can return values from this
1398 			function, we should do so and signal an error
1399 			instead of just dying. */
1400 
1401 			ut_error;
1402 		}
1403 	}
1404 
1405 	if (mtr) {
1406 		trx_write_serialisation_history(trx, mtr);
1407 		/* The following call commits the mini-transaction, making the
1408 		whole transaction committed in the file-based world, at this
1409 		log sequence number. The transaction becomes 'durable' when
1410 		we write the log to disk, but in the logical sense the commit
1411 		in the file-based data structures (undo logs etc.) happens
1412 		here.
1413 
1414 		NOTE that transaction numbers, which are assigned only to
1415 		transactions with an update undo log, do not necessarily come
1416 		in exactly the same order as commit lsn's, if the transactions
1417 		have different rollback segments. To get exactly the same
1418 		order we should hold the kernel mutex up to this point,
1419 		adding to the contention of the kernel mutex. However, if
1420 		a transaction T2 is able to see modifications made by
1421 		a transaction T1, T2 will always get a bigger transaction
1422 		number and a bigger commit lsn than T1. */
1423 
1424 		/*--------------*/
1425 		mtr_commit(mtr);
1426 		/*--------------*/
1427 		lsn = mtr->end_lsn;
1428 	} else {
1429 		lsn = 0;
1430 	}
1431 
1432 	trx_commit_in_memory(trx, lsn);
1433 }
1434 
1435 /****************************************************************//**
1436 Commits a transaction. */
1437 UNIV_INTERN
1438 void
trx_commit(trx_t * trx)1439 trx_commit(
1440 /*=======*/
1441 	trx_t*	trx)	/*!< in/out: transaction */
1442 {
1443 	mtr_t	local_mtr;
1444 	mtr_t*	mtr;
1445 
1446 	if (trx->insert_undo || trx->update_undo) {
1447 		mtr = &local_mtr;
1448 		mtr_start(mtr);
1449 	} else {
1450 		mtr = NULL;
1451 	}
1452 
1453 	trx_commit_low(trx, mtr);
1454 }
1455 
1456 /****************************************************************//**
1457 Cleans up a transaction at database startup. The cleanup is needed if
1458 the transaction already got to the middle of a commit when the database
1459 crashed, and we cannot roll it back. */
1460 UNIV_INTERN
1461 void
trx_cleanup_at_db_startup(trx_t * trx)1462 trx_cleanup_at_db_startup(
1463 /*======================*/
1464 	trx_t*	trx)	/*!< in: transaction */
1465 {
1466 	ut_ad(trx->is_recovered);
1467 
1468 	if (trx->insert_undo != NULL) {
1469 
1470 		trx_undo_insert_cleanup(trx);
1471 	}
1472 
1473 	trx->rseg = NULL;
1474 	trx->undo_no = 0;
1475 	trx->last_sql_stat_start.least_undo_no = 0;
1476 
1477 	mutex_enter(&trx_sys->mutex);
1478 
1479 	ut_a(!trx->read_only);
1480 
1481 	UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
1482 
1483 	assert_trx_in_rw_list(trx);
1484 	ut_d(trx->in_rw_trx_list = FALSE);
1485 
1486 	mutex_exit(&trx_sys->mutex);
1487 
1488 	/* Change the transaction state without mutex protection, now
1489 	that it no longer is in the trx_list. Recovered transactions
1490 	are never placed in the mysql_trx_list. */
1491 	ut_ad(trx->is_recovered);
1492 	ut_ad(!trx->in_ro_trx_list);
1493 	ut_ad(!trx->in_rw_trx_list);
1494 	ut_ad(!trx->in_mysql_trx_list);
1495 	trx->state = TRX_STATE_NOT_STARTED;
1496 }
1497 
1498 /********************************************************************//**
1499 Assigns a read view for a consistent read query. All the consistent reads
1500 within the same transaction will get the same read view, which is created
1501 when this function is first called for a new started transaction.
1502 @return	consistent read view */
1503 UNIV_INTERN
1504 read_view_t*
trx_assign_read_view(trx_t * trx)1505 trx_assign_read_view(
1506 /*=================*/
1507 	trx_t*	trx)	/*!< in: active transaction */
1508 {
1509 	ut_ad(trx->state == TRX_STATE_ACTIVE);
1510 
1511 	if (trx->read_view != NULL) {
1512 		return(trx->read_view);
1513 	}
1514 
1515 	if (!trx->read_view) {
1516 
1517 		trx->read_view = read_view_open_now(
1518 			trx->id, trx->global_read_view_heap);
1519 
1520 		trx->global_read_view = trx->read_view;
1521 	}
1522 
1523 	return(trx->read_view);
1524 }
1525 
1526 /****************************************************************//**
1527 Prepares a transaction for commit/rollback. */
1528 UNIV_INTERN
1529 void
trx_commit_or_rollback_prepare(trx_t * trx)1530 trx_commit_or_rollback_prepare(
1531 /*===========================*/
1532 	trx_t*	trx)		/*!< in/out: transaction */
1533 {
1534 	/* We are reading trx->state without holding trx_sys->mutex
1535 	here, because the commit or rollback should be invoked for a
1536 	running (or recovered prepared) transaction that is associated
1537 	with the current thread. */
1538 
1539 	switch (trx->state) {
1540 	case TRX_STATE_NOT_STARTED:
1541 #ifdef WITH_WSREP
1542 		ut_d(trx->start_file = __FILE__);
1543 		ut_d(trx->start_line = __LINE__);
1544 #endif /* WITH_WSREP */
1545 		trx_start_low(trx);
1546 		/* fall through */
1547 	case TRX_STATE_ACTIVE:
1548 	case TRX_STATE_PREPARED:
1549 		/* If the trx is in a lock wait state, moves the waiting
1550 		query thread to the suspended state */
1551 
1552 		if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1553 
1554 			ut_a(trx->lock.wait_thr != NULL);
1555 			trx->lock.wait_thr->state = QUE_THR_SUSPENDED;
1556 			trx->lock.wait_thr = NULL;
1557 
1558 			trx->lock.que_state = TRX_QUE_RUNNING;
1559 		}
1560 
1561 		ut_a(trx->lock.n_active_thrs == 1);
1562 		return;
1563 	case TRX_STATE_COMMITTED_IN_MEMORY:
1564 		break;
1565 	}
1566 
1567 	ut_error;
1568 }
1569 
1570 /*********************************************************************//**
1571 Creates a commit command node struct.
1572 @return	own: commit node struct */
1573 UNIV_INTERN
1574 commit_node_t*
trx_commit_node_create(mem_heap_t * heap)1575 trx_commit_node_create(
1576 /*===================*/
1577 	mem_heap_t*	heap)	/*!< in: mem heap where created */
1578 {
1579 	commit_node_t*	node;
1580 
1581 	node = static_cast<commit_node_t*>(mem_heap_alloc(heap, sizeof(*node)));
1582 	node->common.type  = QUE_NODE_COMMIT;
1583 	node->state = COMMIT_NODE_SEND;
1584 
1585 	return(node);
1586 }
1587 
1588 /***********************************************************//**
1589 Performs an execution step for a commit type node in a query graph.
1590 @return	query thread to run next, or NULL */
1591 UNIV_INTERN
1592 que_thr_t*
trx_commit_step(que_thr_t * thr)1593 trx_commit_step(
1594 /*============*/
1595 	que_thr_t*	thr)	/*!< in: query thread */
1596 {
1597 	commit_node_t*	node;
1598 
1599 	node = static_cast<commit_node_t*>(thr->run_node);
1600 
1601 	ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
1602 
1603 	if (thr->prev_node == que_node_get_parent(node)) {
1604 		node->state = COMMIT_NODE_SEND;
1605 	}
1606 
1607 	if (node->state == COMMIT_NODE_SEND) {
1608 		trx_t*	trx;
1609 
1610 		node->state = COMMIT_NODE_WAIT;
1611 
1612 		trx = thr_get_trx(thr);
1613 
1614 		ut_a(trx->lock.wait_thr == NULL);
1615 		ut_a(trx->lock.que_state != TRX_QUE_LOCK_WAIT);
1616 
1617 		trx_commit_or_rollback_prepare(trx);
1618 
1619 		trx->lock.que_state = TRX_QUE_COMMITTING;
1620 
1621 		trx_commit(trx);
1622 
1623 		ut_ad(trx->lock.wait_thr == NULL);
1624 
1625 		trx->lock.que_state = TRX_QUE_RUNNING;
1626 
1627 		thr = NULL;
1628 	} else {
1629 		ut_ad(node->state == COMMIT_NODE_WAIT);
1630 
1631 		node->state = COMMIT_NODE_SEND;
1632 
1633 		thr->run_node = que_node_get_parent(node);
1634 	}
1635 
1636 	return(thr);
1637 }
1638 
1639 /**********************************************************************//**
1640 Does the transaction commit for MySQL.
1641 @return	DB_SUCCESS or error number */
1642 UNIV_INTERN
1643 dberr_t
trx_commit_for_mysql(trx_t * trx)1644 trx_commit_for_mysql(
1645 /*=================*/
1646 	trx_t*	trx)	/*!< in/out: transaction */
1647 {
1648 	/* Because we do not do the commit by sending an Innobase
1649 	sig to the transaction, we must here make sure that trx has been
1650 	started. */
1651 
1652 	ut_a(trx);
1653 
1654 	switch (trx->state) {
1655 	case TRX_STATE_NOT_STARTED:
1656 		/* Update the info whether we should skip XA steps that eat
1657 		CPU time.
1658 
1659 		For the duration of the transaction trx->support_xa is
1660 		not reread from thd so any changes in the value take
1661 		effect in the next transaction. This is to avoid a
1662 		scenario where some undo log records generated by a
1663 		transaction contain XA information and other undo log
1664 		records, generated by the same transaction do not. */
1665 		trx->support_xa = thd_supports_xa(trx->mysql_thd);
1666 
1667 		ut_d(trx->start_file = __FILE__);
1668 		ut_d(trx->start_line = __LINE__);
1669 
1670 		trx_start_low(trx);
1671 		/* fall through */
1672 	case TRX_STATE_ACTIVE:
1673 	case TRX_STATE_PREPARED:
1674 		trx->op_info = "committing";
1675 		trx_commit(trx);
1676 		MONITOR_DEC(MONITOR_TRX_ACTIVE);
1677 		trx->op_info = "";
1678 		return(DB_SUCCESS);
1679 	case TRX_STATE_COMMITTED_IN_MEMORY:
1680 		break;
1681 	}
1682 	ut_error;
1683 	return(DB_CORRUPTION);
1684 }
1685 
1686 /**********************************************************************//**
1687 If required, flushes the log to disk if we called trx_commit_for_mysql()
1688 with trx->flush_log_later == TRUE. */
1689 UNIV_INTERN
1690 void
trx_commit_complete_for_mysql(trx_t * trx)1691 trx_commit_complete_for_mysql(
1692 /*==========================*/
1693 	trx_t*	trx)	/*!< in/out: transaction */
1694 {
1695 	ut_a(trx);
1696 
1697 	if (!trx->must_flush_log_later
1698 	    || thd_requested_durability(trx->mysql_thd)
1699 	       == HA_IGNORE_DURABILITY) {
1700 		return;
1701 	}
1702 
1703 	trx_flush_log_if_needed(trx->commit_lsn, trx);
1704 
1705 	trx->must_flush_log_later = FALSE;
1706 }
1707 
1708 /**********************************************************************//**
1709 Marks the latest SQL statement ended. */
1710 UNIV_INTERN
1711 void
trx_mark_sql_stat_end(trx_t * trx)1712 trx_mark_sql_stat_end(
1713 /*==================*/
1714 	trx_t*	trx)	/*!< in: trx handle */
1715 {
1716 	ut_a(trx);
1717 
1718 	switch (trx->state) {
1719 	case TRX_STATE_PREPARED:
1720 	case TRX_STATE_COMMITTED_IN_MEMORY:
1721 		break;
1722 	case TRX_STATE_NOT_STARTED:
1723 		trx->undo_no = 0;
1724 		/* fall through */
1725 	case TRX_STATE_ACTIVE:
1726 		trx->last_sql_stat_start.least_undo_no = trx->undo_no;
1727 
1728 		if (trx->fts_trx) {
1729 			fts_savepoint_laststmt_refresh(trx);
1730 		}
1731 
1732 		return;
1733 	}
1734 
1735 	ut_error;
1736 }
1737 
1738 /**********************************************************************//**
1739 Prints info about a transaction.
1740 Caller must hold trx_sys->mutex. */
1741 UNIV_INTERN
1742 void
trx_print_low(FILE * f,const trx_t * trx,ulint max_query_len,ulint n_rec_locks,ulint n_trx_locks,ulint heap_size)1743 trx_print_low(
1744 /*==========*/
1745 	FILE*		f,
1746 			/*!< in: output stream */
1747 	const trx_t*	trx,
1748 			/*!< in: transaction */
1749 	ulint		max_query_len,
1750 			/*!< in: max query length to print,
1751 			or 0 to use the default max length */
1752 	ulint		n_rec_locks,
1753 			/*!< in: lock_number_of_rows_locked(&trx->lock) */
1754 	ulint		n_trx_locks,
1755 			/*!< in: length of trx->lock.trx_locks */
1756 	ulint		heap_size)
1757 			/*!< in: mem_heap_get_size(trx->lock.lock_heap) */
1758 {
1759 	ibool		newline;
1760 	const char*	op_info;
1761 
1762 	ut_ad(mutex_own(&trx_sys->mutex));
1763 
1764 	fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
1765 
1766 	/* trx->state cannot change from or to NOT_STARTED while we
1767 	are holding the trx_sys->mutex. It may change from ACTIVE to
1768 	PREPARED or COMMITTED. */
1769 	switch (trx->state) {
1770 	case TRX_STATE_NOT_STARTED:
1771 		fputs(", not started", f);
1772 		goto state_ok;
1773 	case TRX_STATE_ACTIVE:
1774 		fprintf(f, ", ACTIVE %lu sec",
1775 			(ulong) difftime(time(NULL), trx->start_time));
1776 		goto state_ok;
1777 	case TRX_STATE_PREPARED:
1778 		fprintf(f, ", ACTIVE (PREPARED) %lu sec",
1779 			(ulong) difftime(time(NULL), trx->start_time));
1780 		goto state_ok;
1781 	case TRX_STATE_COMMITTED_IN_MEMORY:
1782 		fputs(", COMMITTED IN MEMORY", f);
1783 		goto state_ok;
1784 	}
1785 	fprintf(f, ", state %lu", (ulong) trx->state);
1786 	ut_ad(0);
1787 state_ok:
1788 
1789 	/* prevent a race condition */
1790 	op_info = trx->op_info;
1791 
1792 	if (*op_info) {
1793 		putc(' ', f);
1794 		fputs(op_info, f);
1795 	}
1796 
1797 	if (trx->is_recovered) {
1798 		fputs(" recovered trx", f);
1799 	}
1800 
1801 	if (trx->declared_to_be_inside_innodb) {
1802 		fprintf(f, ", thread declared inside InnoDB %lu",
1803 			(ulong) trx->n_tickets_to_enter_innodb);
1804 	}
1805 
1806 	putc('\n', f);
1807 
1808 	if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
1809 		fprintf(f, "mysql tables in use %lu, locked %lu\n",
1810 			(ulong) trx->n_mysql_tables_in_use,
1811 			(ulong) trx->mysql_n_tables_locked);
1812 	}
1813 
1814 	newline = TRUE;
1815 
1816 	/* trx->lock.que_state of an ACTIVE transaction may change
1817 	while we are not holding trx->mutex. We perform a dirty read
1818 	for performance reasons. */
1819 
1820 	switch (trx->lock.que_state) {
1821 	case TRX_QUE_RUNNING:
1822 		newline = FALSE; break;
1823 	case TRX_QUE_LOCK_WAIT:
1824 		fputs("LOCK WAIT ", f); break;
1825 	case TRX_QUE_ROLLING_BACK:
1826 		fputs("ROLLING BACK ", f); break;
1827 	case TRX_QUE_COMMITTING:
1828 		fputs("COMMITTING ", f); break;
1829 	default:
1830 		fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
1831 	}
1832 
1833 	if (n_trx_locks > 0 || heap_size > 400) {
1834 		newline = TRUE;
1835 
1836 		fprintf(f, "%lu lock struct(s), heap size %lu,"
1837 			" %lu row lock(s)",
1838 			(ulong) n_trx_locks,
1839 			(ulong) heap_size,
1840 			(ulong) n_rec_locks);
1841 	}
1842 
1843 	if (trx->has_search_latch) {
1844 		newline = TRUE;
1845 		fputs(", holds adaptive hash latch", f);
1846 	}
1847 
1848 	if (trx->undo_no != 0) {
1849 		newline = TRUE;
1850 		fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
1851 	}
1852 
1853 	if (newline) {
1854 		putc('\n', f);
1855 	}
1856 
1857 	if (trx->mysql_thd != NULL) {
1858 		innobase_mysql_print_thd(
1859 			f, trx->mysql_thd, static_cast<uint>(max_query_len));
1860 	}
1861 }
1862 
1863 /**********************************************************************//**
1864 Prints info about a transaction.
1865 The caller must hold lock_sys->mutex and trx_sys->mutex.
1866 When possible, use trx_print() instead. */
1867 UNIV_INTERN
1868 void
trx_print_latched(FILE * f,const trx_t * trx,ulint max_query_len)1869 trx_print_latched(
1870 /*==============*/
1871 	FILE*		f,		/*!< in: output stream */
1872 	const trx_t*	trx,		/*!< in: transaction */
1873 	ulint		max_query_len)	/*!< in: max query length to print,
1874 					or 0 to use the default max length */
1875 {
1876 	ut_ad(lock_mutex_own());
1877 	ut_ad(mutex_own(&trx_sys->mutex));
1878 
1879 	trx_print_low(f, trx, max_query_len,
1880 		      lock_number_of_rows_locked(&trx->lock),
1881 		      UT_LIST_GET_LEN(trx->lock.trx_locks),
1882 		      mem_heap_get_size(trx->lock.lock_heap));
1883 }
1884 
1885 #ifdef WITH_WSREP
1886 /**********************************************************************//**
1887 Prints info about a transaction.
1888 Transaction information may be retrieved without having trx_sys->mutex acquired
1889 so it may not be completely accurate. The caller must own lock_sys->mutex
1890 and the trx must have some locks to make sure that it does not escape
1891 without locking lock_sys->mutex. */
1892 UNIV_INTERN
1893 void
wsrep_trx_print_locking(FILE * f,const trx_t * trx,ulint max_query_len)1894 wsrep_trx_print_locking(
1895 /*==========*/
1896 	FILE*		f,
1897 			/*!< in: output stream */
1898 	const trx_t*	trx,
1899 			/*!< in: transaction */
1900 	ulint		max_query_len)
1901 			/*!< in: max query length to print,
1902 			or 0 to use the default max length */
1903 {
1904 	ibool		newline;
1905 	const char*	op_info;
1906 
1907 	ut_ad(lock_mutex_own());
1908 	ut_ad(trx->lock.trx_locks.count > 0);
1909 
1910 	fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
1911 
1912 	/* trx->state may change since trx_sys->mutex is not required */
1913 	switch (trx->state) {
1914 	case TRX_STATE_NOT_STARTED:
1915 		fputs(", not started", f);
1916 		goto state_ok;
1917 	case TRX_STATE_ACTIVE:
1918 		fprintf(f, ", ACTIVE %lu sec",
1919 			(ulong) difftime(time(NULL), trx->start_time));
1920 		goto state_ok;
1921 	case TRX_STATE_PREPARED:
1922 		fprintf(f, ", ACTIVE (PREPARED) %lu sec",
1923 			(ulong) difftime(time(NULL), trx->start_time));
1924 		goto state_ok;
1925 	case TRX_STATE_COMMITTED_IN_MEMORY:
1926 		fputs(", COMMITTED IN MEMORY", f);
1927 		goto state_ok;
1928 	}
1929 	fprintf(f, ", state %lu", (ulong) trx->state);
1930 	ut_ad(0);
1931 state_ok:
1932 
1933 	/* prevent a race condition */
1934 	op_info = trx->op_info;
1935 
1936 	if (*op_info) {
1937 		putc(' ', f);
1938 		fputs(op_info, f);
1939 	}
1940 
1941 	if (trx->is_recovered) {
1942 		fputs(" recovered trx", f);
1943 	}
1944 
1945 	if (trx->declared_to_be_inside_innodb) {
1946 		fprintf(f, ", thread declared inside InnoDB %lu",
1947 			(ulong) trx->n_tickets_to_enter_innodb);
1948 	}
1949 
1950 	putc('\n', f);
1951 
1952 	if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
1953 		fprintf(f, "mysql tables in use %lu, locked %lu\n",
1954 			(ulong) trx->n_mysql_tables_in_use,
1955 			(ulong) trx->mysql_n_tables_locked);
1956 	}
1957 
1958 	newline = TRUE;
1959 
1960 	/* trx->lock.que_state of an ACTIVE transaction may change
1961 	while we are not holding trx->mutex. We perform a dirty read
1962 	for performance reasons. */
1963 
1964 	switch (trx->lock.que_state) {
1965 	case TRX_QUE_RUNNING:
1966 		newline = FALSE; break;
1967 	case TRX_QUE_LOCK_WAIT:
1968 		fputs("LOCK WAIT ", f); break;
1969 	case TRX_QUE_ROLLING_BACK:
1970 		fputs("ROLLING BACK ", f); break;
1971 	case TRX_QUE_COMMITTING:
1972 		fputs("COMMITTING ", f); break;
1973 	default:
1974 		fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
1975 	}
1976 
1977 	if (trx->has_search_latch) {
1978 		newline = TRUE;
1979 		fputs(", holds adaptive hash latch", f);
1980 	}
1981 
1982 	if (trx->undo_no != 0) {
1983 		newline = TRUE;
1984 		fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
1985 	}
1986 
1987 	if (newline) {
1988 		putc('\n', f);
1989 	}
1990 
1991 	if (trx->mysql_thd != NULL) {
1992 		innobase_mysql_print_thd(
1993 			f, trx->mysql_thd, static_cast<uint>(max_query_len));
1994 	}
1995 }
1996 #endif /* WITH_WSREP */
1997 /**********************************************************************//**
1998 Prints info about a transaction.
1999 Acquires and releases lock_sys->mutex and trx_sys->mutex. */
2000 UNIV_INTERN
2001 void
trx_print(FILE * f,const trx_t * trx,ulint max_query_len)2002 trx_print(
2003 /*======*/
2004 	FILE*		f,		/*!< in: output stream */
2005 	const trx_t*	trx,		/*!< in: transaction */
2006 	ulint		max_query_len)	/*!< in: max query length to print,
2007 					or 0 to use the default max length */
2008 {
2009 	ulint	n_rec_locks;
2010 	ulint	n_trx_locks;
2011 	ulint	heap_size;
2012 
2013 	lock_mutex_enter();
2014 	n_rec_locks = lock_number_of_rows_locked(&trx->lock);
2015 	n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
2016 	heap_size = mem_heap_get_size(trx->lock.lock_heap);
2017 	lock_mutex_exit();
2018 
2019 	mutex_enter(&trx_sys->mutex);
2020 	trx_print_low(f, trx, max_query_len,
2021 		      n_rec_locks, n_trx_locks, heap_size);
2022 	mutex_exit(&trx_sys->mutex);
2023 }
2024 
2025 #ifdef UNIV_DEBUG
2026 /**********************************************************************//**
2027 Asserts that a transaction has been started.
2028 The caller must hold trx_sys->mutex.
2029 @return TRUE if started */
2030 UNIV_INTERN
2031 ibool
trx_assert_started(const trx_t * trx)2032 trx_assert_started(
2033 /*===============*/
2034 	const trx_t*	trx)	/*!< in: transaction */
2035 {
2036 	ut_ad(mutex_own(&trx_sys->mutex));
2037 
2038 	/* Non-locking autocommits should not hold any locks and this
2039 	function is only called from the locking code. */
2040 	assert_trx_in_list(trx);
2041 
2042 	/* trx->state can change from or to NOT_STARTED while we are holding
2043 	trx_sys->mutex for non-locking autocommit selects but not for other
2044 	types of transactions. It may change from ACTIVE to PREPARED. Unless
2045 	we are holding lock_sys->mutex, it may also change to COMMITTED. */
2046 
2047 	switch (trx->state) {
2048 	case TRX_STATE_PREPARED:
2049 		return(TRUE);
2050 
2051 	case TRX_STATE_ACTIVE:
2052 	case TRX_STATE_COMMITTED_IN_MEMORY:
2053 		return(TRUE);
2054 
2055 	case TRX_STATE_NOT_STARTED:
2056 		break;
2057 	}
2058 
2059 	ut_error;
2060 	return(FALSE);
2061 }
2062 #endif /* UNIV_DEBUG */
2063 
2064 /*******************************************************************//**
2065 Compares the "weight" (or size) of two transactions. Transactions that
2066 have edited non-transactional tables are considered heavier than ones
2067 that have not.
2068 @return	TRUE if weight(a) >= weight(b) */
2069 UNIV_INTERN
2070 ibool
trx_weight_ge(const trx_t * a,const trx_t * b)2071 trx_weight_ge(
2072 /*==========*/
2073 	const trx_t*	a,	/*!< in: the first transaction to be compared */
2074 	const trx_t*	b)	/*!< in: the second transaction to be compared */
2075 {
2076 	ibool	a_notrans_edit;
2077 	ibool	b_notrans_edit;
2078 
2079 	/* If mysql_thd is NULL for a transaction we assume that it has
2080 	not edited non-transactional tables. */
2081 
2082 	a_notrans_edit = a->mysql_thd != NULL
2083 		&& thd_has_edited_nontrans_tables(a->mysql_thd);
2084 
2085 	b_notrans_edit = b->mysql_thd != NULL
2086 		&& thd_has_edited_nontrans_tables(b->mysql_thd);
2087 
2088 	if (a_notrans_edit != b_notrans_edit) {
2089 
2090 		return(a_notrans_edit);
2091 	}
2092 
2093 	/* Either both had edited non-transactional tables or both had
2094 	not, we fall back to comparing the number of altered/locked
2095 	rows. */
2096 
2097 #if 0
2098 	fprintf(stderr,
2099 		"%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
2100 		__func__,
2101 		a->undo_no, UT_LIST_GET_LEN(a->lock.trx_locks),
2102 		b->undo_no, UT_LIST_GET_LEN(b->lock.trx_locks));
2103 #endif
2104 
2105 	return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
2106 }
2107 
2108 /****************************************************************//**
2109 Prepares a transaction. */
2110 static
2111 void
trx_prepare(trx_t * trx)2112 trx_prepare(
2113 /*========*/
2114 	trx_t*	trx)	/*!< in/out: transaction */
2115 {
2116 	trx_rseg_t*	rseg;
2117 	lsn_t		lsn;
2118 	mtr_t		mtr;
2119 
2120 	rseg = trx->rseg;
2121 	/* Only fresh user transactions can be prepared.
2122 	Recovered transactions cannot. */
2123 	ut_a(!trx->is_recovered);
2124 
2125 	if (trx->insert_undo != NULL || trx->update_undo != NULL) {
2126 
2127 		mtr_start(&mtr);
2128 
2129 		/* Change the undo log segment states from TRX_UNDO_ACTIVE
2130 		to TRX_UNDO_PREPARED: these modifications to the file data
2131 		structure define the transaction as prepared in the
2132 		file-based world, at the serialization point of lsn. */
2133 
2134 		mutex_enter(&rseg->mutex);
2135 
2136 		if (trx->insert_undo != NULL) {
2137 
2138 			/* It is not necessary to obtain trx->undo_mutex here
2139 			because only a single OS thread is allowed to do the
2140 			transaction prepare for this transaction. */
2141 
2142 			trx_undo_set_state_at_prepare(trx, trx->insert_undo,
2143 						      &mtr);
2144 		}
2145 
2146 		if (trx->update_undo) {
2147 			trx_undo_set_state_at_prepare(
2148 				trx, trx->update_undo, &mtr);
2149 		}
2150 
2151 		mutex_exit(&rseg->mutex);
2152 
2153 		/*--------------*/
2154 		mtr_commit(&mtr);	/* This mtr commit makes the
2155 					transaction prepared in the file-based
2156 					world */
2157 		/*--------------*/
2158 		lsn = mtr.end_lsn;
2159 		ut_ad(lsn);
2160 	} else {
2161 		lsn = 0;
2162 	}
2163 
2164 	/*--------------------------------------*/
2165 	ut_a(trx->state == TRX_STATE_ACTIVE);
2166 	mutex_enter(&trx_sys->mutex);
2167 	trx->state = TRX_STATE_PREPARED;
2168 	trx_sys->n_prepared_trx++;
2169 	mutex_exit(&trx_sys->mutex);
2170 	/*--------------------------------------*/
2171 
2172 	if (lsn) {
2173 		/* Depending on the my.cnf options, we may now write the log
2174 		buffer to the log files, making the prepared state of the
2175 		transaction durable if the OS does not crash. We may also
2176 		flush the log files to disk, making the prepared state of the
2177 		transaction durable also at an OS crash or a power outage.
2178 
2179 		The idea in InnoDB's group prepare is that a group of
2180 		transactions gather behind a trx doing a physical disk write
2181 		to log files, and when that physical write has been completed,
2182 		one of those transactions does a write which prepares the whole
2183 		group. Note that this group prepare will only bring benefit if
2184 		there are > 2 users in the database. Then at least 2 users can
2185 		gather behind one doing the physical log write to disk.
2186 
2187 		TODO: find out if MySQL holds some mutex when calling this.
2188 		That would spoil our group prepare algorithm. */
2189 
2190 		trx_flush_log_if_needed(lsn, trx);
2191 	}
2192 }
2193 
2194 /**********************************************************************//**
2195 Does the transaction prepare for MySQL. */
2196 UNIV_INTERN
2197 void
trx_prepare_for_mysql(trx_t * trx)2198 trx_prepare_for_mysql(
2199 /*==================*/
2200 	trx_t*	trx)	/*!< in/out: trx handle */
2201 {
2202 	trx_start_if_not_started_xa(trx);
2203 
2204 	trx->op_info = "preparing";
2205 
2206 	trx_prepare(trx);
2207 
2208 	trx->op_info = "";
2209 }
2210 
2211 /**********************************************************************//**
2212 This function is used to find number of prepared transactions and
2213 their transaction objects for a recovery.
2214 @return	number of prepared transactions stored in xid_list */
2215 UNIV_INTERN
2216 int
trx_recover_for_mysql(XID * xid_list,ulint len)2217 trx_recover_for_mysql(
2218 /*==================*/
2219 	XID*	xid_list,	/*!< in/out: prepared transactions */
2220 	ulint	len)		/*!< in: number of slots in xid_list */
2221 {
2222 	const trx_t*	trx;
2223 	ulint		count = 0;
2224 
2225 	ut_ad(xid_list);
2226 	ut_ad(len);
2227 
2228 	/* We should set those transactions which are in the prepared state
2229 	to the xid_list */
2230 
2231 	mutex_enter(&trx_sys->mutex);
2232 
2233 	for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
2234 	     trx != NULL;
2235 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
2236 
2237 		assert_trx_in_rw_list(trx);
2238 
2239 		/* The state of a read-write transaction cannot change
2240 		from or to NOT_STARTED while we are holding the
2241 		trx_sys->mutex. It may change to PREPARED, but not if
2242 		trx->is_recovered. It may also change to COMMITTED. */
2243 		if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
2244 			xid_list[count] = trx->xid;
2245 
2246 			if (count == 0) {
2247 				ut_print_timestamp(stderr);
2248 				fprintf(stderr,
2249 					"  InnoDB: Starting recovery for"
2250 					" XA transactions...\n");
2251 			}
2252 
2253 			ut_print_timestamp(stderr);
2254 			fprintf(stderr,
2255 				"  InnoDB: Transaction " TRX_ID_FMT " in"
2256 				" prepared state after recovery\n",
2257 				trx->id);
2258 
2259 			ut_print_timestamp(stderr);
2260 			fprintf(stderr,
2261 				"  InnoDB: Transaction contains changes"
2262 				" to " TRX_ID_FMT " rows\n",
2263 				trx->undo_no);
2264 
2265 			count++;
2266 
2267 			if (count == len) {
2268 				break;
2269 			}
2270 		}
2271 	}
2272 
2273 	mutex_exit(&trx_sys->mutex);
2274 
2275 	if (count > 0){
2276 		ut_print_timestamp(stderr);
2277 		fprintf(stderr,
2278 			"  InnoDB: %d transactions in prepared state"
2279 			" after recovery\n",
2280 			int (count));
2281 	}
2282 
2283 	return(int (count));
2284 }
2285 
2286 /*******************************************************************//**
2287 This function is used to find one X/Open XA distributed transaction
2288 which is in the prepared state
2289 @return	trx on match, the trx->xid will be invalidated;
2290 note that the trx may have been committed, unless the caller is
2291 holding lock_sys->mutex */
2292 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2293 trx_t*
trx_get_trx_by_xid_low(const XID * xid)2294 trx_get_trx_by_xid_low(
2295 /*===================*/
2296 	const XID*	xid)		/*!< in: X/Open XA transaction
2297 					identifier */
2298 {
2299 	trx_t*		trx;
2300 
2301 	ut_ad(mutex_own(&trx_sys->mutex));
2302 
2303 	for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
2304 	     trx != NULL;
2305 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
2306 
2307 		assert_trx_in_rw_list(trx);
2308 
2309 		/* Compare two X/Open XA transaction id's: their
2310 		length should be the same and binary comparison
2311 		of gtrid_length+bqual_length bytes should be
2312 		the same */
2313 
2314 		if (trx->is_recovered
2315 		    && trx_state_eq(trx, TRX_STATE_PREPARED)
2316 		    && xid->gtrid_length == trx->xid.gtrid_length
2317 		    && xid->bqual_length == trx->xid.bqual_length
2318 		    && memcmp(xid->data, trx->xid.data,
2319 			      xid->gtrid_length + xid->bqual_length) == 0) {
2320 
2321 #ifdef WITH_WSREP
2322 			/* The commit of a prepared recovered Galera
2323 			transaction needs a valid trx->xid for
2324 			invoking trx_sys_update_wsrep_checkpoint(). */
2325 			if (!wsrep_is_wsrep_xid(&trx->xid)) {
2326 #endif
2327 			/* Invalidate the XID, so that subsequent calls
2328 			will not find it. */
2329 			memset(&trx->xid, 0, sizeof(trx->xid));
2330 			trx->xid.formatID = -1;
2331 #ifdef WITH_WSREP
2332 			}
2333 #endif /* WITH_WSREP */
2334 			break;
2335 		}
2336 	}
2337 
2338 	return(trx);
2339 }
2340 
2341 /*******************************************************************//**
2342 This function is used to find one X/Open XA distributed transaction
2343 which is in the prepared state
2344 @return	trx or NULL; on match, the trx->xid will be invalidated;
2345 note that the trx may have been committed, unless the caller is
2346 holding lock_sys->mutex */
2347 UNIV_INTERN
2348 trx_t*
trx_get_trx_by_xid(const XID * xid)2349 trx_get_trx_by_xid(
2350 /*===============*/
2351 	const XID*	xid)	/*!< in: X/Open XA transaction identifier */
2352 {
2353 	trx_t*	trx;
2354 
2355 	if (xid == NULL) {
2356 
2357 		return(NULL);
2358 	}
2359 
2360 	mutex_enter(&trx_sys->mutex);
2361 
2362 	/* Recovered/Resurrected transactions are always only on the
2363 	trx_sys_t::rw_trx_list. */
2364 	trx = trx_get_trx_by_xid_low(xid);
2365 
2366 	mutex_exit(&trx_sys->mutex);
2367 
2368 	return(trx);
2369 }
2370 
2371 /*************************************************************//**
2372 Starts the transaction if it is not yet started. */
2373 UNIV_INTERN
2374 void
trx_start_if_not_started_xa_low(trx_t * trx)2375 trx_start_if_not_started_xa_low(
2376 /*============================*/
2377 	trx_t*	trx)	/*!< in: transaction */
2378 {
2379 	switch (trx->state) {
2380 	case TRX_STATE_NOT_STARTED:
2381 
2382 		/* Update the info whether we should skip XA steps
2383 		that eat CPU time.
2384 
2385 		For the duration of the transaction trx->support_xa is
2386 		not reread from thd so any changes in the value take
2387 		effect in the next transaction. This is to avoid a
2388 		scenario where some undo generated by a transaction,
2389 		has XA stuff, and other undo, generated by the same
2390 		transaction, doesn't. */
2391 		trx->support_xa = thd_supports_xa(trx->mysql_thd);
2392 
2393 #ifdef WITH_WSREP
2394 		ut_d(trx->start_file = __FILE__);
2395 		ut_d(trx->start_line = __LINE__);
2396 #endif /* WITH_WSREP */
2397 		trx_start_low(trx);
2398 		/* fall through */
2399 	case TRX_STATE_ACTIVE:
2400 		return;
2401 	case TRX_STATE_PREPARED:
2402 	case TRX_STATE_COMMITTED_IN_MEMORY:
2403 		break;
2404 	}
2405 
2406 	ut_error;
2407 }
2408 
2409 /*************************************************************//**
2410 Starts the transaction if it is not yet started. */
2411 UNIV_INTERN
2412 void
trx_start_if_not_started_low(trx_t * trx)2413 trx_start_if_not_started_low(
2414 /*=========================*/
2415 	trx_t*	trx)	/*!< in: transaction */
2416 {
2417 	switch (trx->state) {
2418 	case TRX_STATE_NOT_STARTED:
2419 #ifdef WITH_WSREP
2420 		ut_d(trx->start_file = __FILE__);
2421 		ut_d(trx->start_line = __LINE__);
2422 #endif /* WITH_WSREP */
2423 		trx_start_low(trx);
2424 		/* fall through */
2425 	case TRX_STATE_ACTIVE:
2426 		return;
2427 	case TRX_STATE_PREPARED:
2428 	case TRX_STATE_COMMITTED_IN_MEMORY:
2429 		break;
2430 	}
2431 
2432 	ut_error;
2433 }
2434 
2435 /*************************************************************//**
2436 Starts the transaction for a DDL operation. */
2437 UNIV_INTERN
2438 void
trx_start_for_ddl_low(trx_t * trx,trx_dict_op_t op)2439 trx_start_for_ddl_low(
2440 /*==================*/
2441 	trx_t*		trx,	/*!< in/out: transaction */
2442 	trx_dict_op_t	op)	/*!< in: dictionary operation type */
2443 {
2444 	switch (trx->state) {
2445 	case TRX_STATE_NOT_STARTED:
2446 		/* Flag this transaction as a dictionary operation, so that
2447 		the data dictionary will be locked in crash recovery. */
2448 
2449 		trx_set_dict_operation(trx, op);
2450 
2451 		/* Ensure it is not flagged as an auto-commit-non-locking
2452 		transation. */
2453 		trx->will_lock = 1;
2454 
2455 		trx->ddl = true;
2456 #ifdef WITH_WSREP
2457 		ut_d(trx->start_file = __FILE__);
2458 		ut_d(trx->start_line = __LINE__);
2459 #endif /* WITH_WSREP */
2460 
2461 		trx_start_low(trx);
2462 		return;
2463 
2464 	case TRX_STATE_ACTIVE:
2465 		/* We have this start if not started idiom, therefore we
2466 		can't add stronger checks here. */
2467 		trx->ddl = true;
2468 
2469 		ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
2470 		ut_ad(trx->will_lock > 0);
2471 		return;
2472 	case TRX_STATE_PREPARED:
2473 	case TRX_STATE_COMMITTED_IN_MEMORY:
2474 		break;
2475 	}
2476 
2477 	ut_error;
2478 }
2479 
2480