1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2016, 2021, MariaDB Corporation.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9 
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13 
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17 
18 *****************************************************************************/
19 
20 /**************************************************//**
21 @file trx/trx0roll.cc
22 Transaction rollback
23 
24 Created 3/26/1996 Heikki Tuuri
25 *******************************************************/
26 
27 #include "trx0roll.h"
28 
29 #include <my_service_manager.h>
30 #include <mysql/service_wsrep.h>
31 
32 #include "fsp0fsp.h"
33 #include "lock0lock.h"
34 #include "mach0data.h"
35 #include "pars0pars.h"
36 #include "que0que.h"
37 #include "row0mysql.h"
38 #include "row0undo.h"
39 #include "srv0mon.h"
40 #include "srv0start.h"
41 #include "trx0rec.h"
42 #include "trx0rseg.h"
43 #include "trx0sys.h"
44 #include "trx0trx.h"
45 #include "trx0undo.h"
46 
47 /** This many pages must be undone before a truncate is tried within
48 rollback */
49 static const ulint TRX_ROLL_TRUNC_THRESHOLD = 1;
50 
51 /** true if trx_rollback_all_recovered() thread is active */
52 bool			trx_rollback_is_active;
53 
54 /** In crash recovery, the current trx to be rolled back; NULL otherwise */
55 const trx_t*		trx_roll_crash_recv_trx;
56 
57 /** Finish transaction rollback.
58 @param[in,out]	trx	transaction
59 @return	whether the rollback was completed normally
60 @retval	false	if the rollback was aborted by shutdown  */
trx_rollback_finish(trx_t * trx)61 static bool trx_rollback_finish(trx_t* trx)
62 {
63 	trx->mod_tables.clear();
64 	bool finished = trx->error_state == DB_SUCCESS;
65 	if (UNIV_LIKELY(finished)) {
66 		trx_commit(trx);
67 	} else {
68 		ut_a(trx->error_state == DB_INTERRUPTED);
69 		ut_ad(srv_shutdown_state != SRV_SHUTDOWN_NONE);
70 		ut_a(!srv_undo_sources);
71 		ut_ad(srv_fast_shutdown);
72 		ut_d(trx->in_rollback = false);
73 		if (trx_undo_t*& undo = trx->rsegs.m_redo.undo) {
74 			UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->undo_list,
75 				       undo);
76 			ut_free(undo);
77 			undo = NULL;
78 		}
79 		if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) {
80 			UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->undo_list,
81 				       undo);
82 			ut_free(undo);
83 			undo = NULL;
84 		}
85 		trx_commit_low(trx, NULL);
86 	}
87 
88 	trx->lock.que_state = TRX_QUE_RUNNING;
89 
90 	return finished;
91 }
92 
93 /*******************************************************************//**
94 Rollback a transaction used in MySQL. */
95 static
96 void
trx_rollback_to_savepoint_low(trx_t * trx,trx_savept_t * savept)97 trx_rollback_to_savepoint_low(
98 /*==========================*/
99 	trx_t*		trx,	/*!< in: transaction handle */
100 	trx_savept_t*	savept)	/*!< in: pointer to savepoint undo number, if
101 				partial rollback requested, or NULL for
102 				complete rollback */
103 {
104 	que_thr_t*	thr;
105 	mem_heap_t*	heap;
106 	roll_node_t*	roll_node;
107 
108 	heap = mem_heap_create(512);
109 
110 	roll_node = roll_node_create(heap);
111 	ut_ad(!trx->in_rollback);
112 
113 	if (savept != NULL) {
114 		roll_node->savept = savept;
115 		ut_ad(trx->mysql_thd);
116 		ut_ad(!trx->is_recovered);
117 		ut_ad(trx->state == TRX_STATE_ACTIVE);
118 	} else {
119 		ut_d(trx_state_t state = trx->state);
120 		ut_ad(state == TRX_STATE_ACTIVE
121 		      || state == TRX_STATE_PREPARED
122 		      || state == TRX_STATE_PREPARED_RECOVERED);
123 	}
124 
125 	trx->error_state = DB_SUCCESS;
126 
127 	if (trx->has_logged()) {
128 
129 		ut_ad(trx->rsegs.m_redo.rseg != 0
130 		      || trx->rsegs.m_noredo.rseg != 0);
131 
132 		thr = pars_complete_graph_for_exec(roll_node, trx, heap, NULL);
133 
134 		ut_a(thr == que_fork_start_command(
135 			static_cast<que_fork_t*>(que_node_get_parent(thr))));
136 
137 		que_run_threads(thr);
138 
139 		ut_a(roll_node->undo_thr != NULL);
140 		que_run_threads(roll_node->undo_thr);
141 
142 		/* Free the memory reserved by the undo graph. */
143 		que_graph_free(static_cast<que_t*>(
144 				       roll_node->undo_thr->common.parent));
145 	}
146 
147 	if (savept == NULL) {
148 		trx_rollback_finish(trx);
149 		MONITOR_INC(MONITOR_TRX_ROLLBACK);
150 	} else {
151 		ut_a(trx->error_state == DB_SUCCESS);
152 		const undo_no_t limit = savept->least_undo_no;
153 		for (trx_mod_tables_t::iterator i = trx->mod_tables.begin();
154 		     i != trx->mod_tables.end(); ) {
155 			trx_mod_tables_t::iterator j = i++;
156 			ut_ad(j->second.valid());
157 			if (j->second.rollback(limit)) {
158 				trx->mod_tables.erase(j);
159 			}
160 		}
161 		trx->lock.que_state = TRX_QUE_RUNNING;
162 		MONITOR_INC(MONITOR_TRX_ROLLBACK_SAVEPOINT);
163 	}
164 
165 	mem_heap_free(heap);
166 
167 	/* There might be work for utility threads.*/
168 	srv_active_wake_master_thread();
169 
170 	MONITOR_DEC(MONITOR_TRX_ACTIVE);
171 }
172 
173 /*******************************************************************//**
174 Rollback a transaction to a given savepoint or do a complete rollback.
175 @return error code or DB_SUCCESS */
176 dberr_t
trx_rollback_to_savepoint(trx_t * trx,trx_savept_t * savept)177 trx_rollback_to_savepoint(
178 /*======================*/
179 	trx_t*		trx,	/*!< in: transaction handle */
180 	trx_savept_t*	savept)	/*!< in: pointer to savepoint undo number, if
181 				partial rollback requested, or NULL for
182 				complete rollback */
183 {
184 	ut_ad(!trx_mutex_own(trx));
185 
186 	trx_start_if_not_started_xa(trx, true);
187 
188 	trx_rollback_to_savepoint_low(trx, savept);
189 
190 	return(trx->error_state);
191 }
192 
193 /*******************************************************************//**
194 Rollback a transaction used in MySQL.
195 @return error code or DB_SUCCESS */
196 static
197 dberr_t
trx_rollback_for_mysql_low(trx_t * trx)198 trx_rollback_for_mysql_low(
199 /*=======================*/
200 	trx_t*	trx)	/*!< in/out: transaction */
201 {
202 	trx->op_info = "rollback";
203 
204 	/* If we are doing the XA recovery of prepared transactions,
205 	then the transaction object does not have an InnoDB session
206 	object, and we set a dummy session that we use for all MySQL
207 	transactions. */
208 
209 	trx_rollback_to_savepoint_low(trx, NULL);
210 
211 	trx->op_info = "";
212 
213 	return(trx->error_state);
214 }
215 
216 /** Rollback a transaction used in MySQL
217 @param[in, out]	trx	transaction
218 @return error code or DB_SUCCESS */
trx_rollback_for_mysql(trx_t * trx)219 dberr_t trx_rollback_for_mysql(trx_t* trx)
220 {
221 	/* We are reading trx->state without holding trx_sys.mutex
222 	here, because the rollback should be invoked for a running
223 	active MySQL transaction (or recovered prepared transaction)
224 	that is associated with the current thread. */
225 
226 	switch (trx->state) {
227 	case TRX_STATE_NOT_STARTED:
228 		trx->will_lock = false;
229 		ut_ad(trx->mysql_thd);
230 #ifdef WITH_WSREP
231 		trx->wsrep = false;
232 #endif
233 		return(DB_SUCCESS);
234 
235 	case TRX_STATE_ACTIVE:
236 		ut_ad(trx->mysql_thd);
237 		ut_ad(!trx->is_recovered);
238 		ut_ad(!trx->is_autocommit_non_locking() || trx->read_only);
239 		return(trx_rollback_for_mysql_low(trx));
240 
241 	case TRX_STATE_PREPARED:
242 	case TRX_STATE_PREPARED_RECOVERED:
243 		ut_ad(!trx->is_autocommit_non_locking());
244 		if (trx->has_logged_persistent()) {
245 			/* The XA ROLLBACK of a XA PREPARE transaction
246 			will consist of multiple mini-transactions.
247 
248 			As the very first step of XA ROLLBACK, we must
249 			change the undo log state back from
250 			TRX_UNDO_PREPARED to TRX_UNDO_ACTIVE, in order
251 			to ensure that recovery will complete the
252 			rollback.
253 
254 			Failure to perform this step could cause a
255 			situation where we would roll back part of
256 			a XA PREPARE transaction, the server would be
257 			killed, and finally, the transaction would be
258 			recovered in XA PREPARE state, with some of
259 			the actions already having been rolled back. */
260 			ut_ad(trx->rsegs.m_redo.undo->rseg
261 			      == trx->rsegs.m_redo.rseg);
262 			mtr_t		mtr;
263 			mtr.start();
264 			mutex_enter(&trx->rsegs.m_redo.rseg->mutex);
265 			if (trx_undo_t* undo = trx->rsegs.m_redo.undo) {
266 				trx_undo_set_state_at_prepare(trx, undo, true,
267 							      &mtr);
268 			}
269 			mutex_exit(&trx->rsegs.m_redo.rseg->mutex);
270 			/* Write the redo log for the XA ROLLBACK
271 			state change to the global buffer. It is
272 			not necessary to flush the redo log. If
273 			a durable log write of a later mini-transaction
274 			takes place for whatever reason, then this state
275 			change will be durable as well. */
276 			mtr.commit();
277 			ut_ad(mtr.commit_lsn() > 0);
278 		}
279 		return(trx_rollback_for_mysql_low(trx));
280 
281 	case TRX_STATE_COMMITTED_IN_MEMORY:
282 		ut_ad(!trx->is_autocommit_non_locking());
283 		break;
284 	}
285 
286 	ut_error;
287 	return(DB_CORRUPTION);
288 }
289 
290 /*******************************************************************//**
291 Rollback the latest SQL statement for MySQL.
292 @return error code or DB_SUCCESS */
293 dberr_t
trx_rollback_last_sql_stat_for_mysql(trx_t * trx)294 trx_rollback_last_sql_stat_for_mysql(
295 /*=================================*/
296 	trx_t*	trx)	/*!< in/out: transaction */
297 {
298 	dberr_t	err;
299 
300 	/* We are reading trx->state without holding trx_sys.mutex
301 	here, because the statement rollback should be invoked for a
302 	running active MySQL transaction that is associated with the
303 	current thread. */
304 	ut_ad(trx->mysql_thd);
305 
306 	switch (trx->state) {
307 	case TRX_STATE_NOT_STARTED:
308 		return(DB_SUCCESS);
309 
310 	case TRX_STATE_ACTIVE:
311 		ut_ad(trx->mysql_thd);
312 		ut_ad(!trx->is_recovered);
313 		ut_ad(!trx->is_autocommit_non_locking() || trx->read_only);
314 
315 		trx->op_info = "rollback of SQL statement";
316 
317 		err = trx_rollback_to_savepoint(
318 			trx, &trx->last_sql_stat_start);
319 
320 		if (trx->fts_trx != NULL) {
321 			fts_savepoint_rollback_last_stmt(trx);
322 		}
323 
324 		/* The following call should not be needed,
325 		but we play it safe: */
326 		trx_mark_sql_stat_end(trx);
327 
328 		trx->op_info = "";
329 
330 		return(err);
331 
332 	case TRX_STATE_PREPARED:
333 	case TRX_STATE_PREPARED_RECOVERED:
334 	case TRX_STATE_COMMITTED_IN_MEMORY:
335 		/* The statement rollback is only allowed on an ACTIVE
336 		transaction, not a PREPARED or COMMITTED one. */
337 		break;
338 	}
339 
340 	ut_error;
341 	return(DB_CORRUPTION);
342 }
343 
344 /*******************************************************************//**
345 Search for a savepoint using name.
346 @return savepoint if found else NULL */
347 static
348 trx_named_savept_t*
trx_savepoint_find(trx_t * trx,const char * name)349 trx_savepoint_find(
350 /*===============*/
351 	trx_t*		trx,			/*!< in: transaction */
352 	const char*	name)			/*!< in: savepoint name */
353 {
354 	trx_named_savept_t*	savep;
355 
356 	for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
357 	     savep != NULL;
358 	     savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
359 
360 		if (0 == ut_strcmp(savep->name, name)) {
361 			return(savep);
362 		}
363 	}
364 
365 	return(NULL);
366 }
367 
368 /*******************************************************************//**
369 Frees a single savepoint struct. */
370 static
371 void
trx_roll_savepoint_free(trx_t * trx,trx_named_savept_t * savep)372 trx_roll_savepoint_free(
373 /*=====================*/
374 	trx_t*			trx,	/*!< in: transaction handle */
375 	trx_named_savept_t*	savep)	/*!< in: savepoint to free */
376 {
377 	UT_LIST_REMOVE(trx->trx_savepoints, savep);
378 
379 	ut_free(savep->name);
380 	ut_free(savep);
381 }
382 
383 /*******************************************************************//**
384 Frees savepoint structs starting from savep. */
385 void
trx_roll_savepoints_free(trx_t * trx,trx_named_savept_t * savep)386 trx_roll_savepoints_free(
387 /*=====================*/
388 	trx_t*			trx,	/*!< in: transaction handle */
389 	trx_named_savept_t*	savep)	/*!< in: free all savepoints starting
390 					with this savepoint i*/
391 {
392 	while (savep != NULL) {
393 		trx_named_savept_t*	next_savep;
394 
395 		next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
396 
397 		trx_roll_savepoint_free(trx, savep);
398 
399 		savep = next_savep;
400 	}
401 }
402 
403 /*******************************************************************//**
404 Rolls back a transaction back to a named savepoint. Modifications after the
405 savepoint are undone but InnoDB does NOT release the corresponding locks
406 which are stored in memory. If a lock is 'implicit', that is, a new inserted
407 row holds a lock where the lock information is carried by the trx id stored in
408 the row, these locks are naturally released in the rollback. Savepoints which
409 were set after this savepoint are deleted.
410 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
411 otherwise DB_SUCCESS */
412 static MY_ATTRIBUTE((nonnull, warn_unused_result))
413 dberr_t
trx_rollback_to_savepoint_for_mysql_low(trx_t * trx,trx_named_savept_t * savep,int64_t * mysql_binlog_cache_pos)414 trx_rollback_to_savepoint_for_mysql_low(
415 /*====================================*/
416 	trx_t*			trx,	/*!< in/out: transaction */
417 	trx_named_savept_t*	savep,	/*!< in/out: savepoint */
418 	int64_t*		mysql_binlog_cache_pos)
419 					/*!< out: the MySQL binlog
420 					cache position corresponding
421 					to this savepoint; MySQL needs
422 					this information to remove the
423 					binlog entries of the queries
424 					executed after the savepoint */
425 {
426 	dberr_t	err;
427 
428 	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
429 	ut_ad(trx->mysql_thd);
430 
431 	/* Free all savepoints strictly later than savep. */
432 
433 	trx_roll_savepoints_free(
434 		trx, UT_LIST_GET_NEXT(trx_savepoints, savep));
435 
436 	*mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
437 
438 	trx->op_info = "rollback to a savepoint";
439 
440 	err = trx_rollback_to_savepoint(trx, &savep->savept);
441 
442 	/* Store the current undo_no of the transaction so that
443 	we know where to roll back if we have to roll back the
444 	next SQL statement: */
445 
446 	trx_mark_sql_stat_end(trx);
447 
448 	trx->op_info = "";
449 
450 #ifdef WITH_WSREP
451 	if (trx->is_wsrep()) {
452 		trx->lock.was_chosen_as_deadlock_victim = false;
453 	}
454 #endif
455 	return(err);
456 }
457 
458 /*******************************************************************//**
459 Rolls back a transaction back to a named savepoint. Modifications after the
460 savepoint are undone but InnoDB does NOT release the corresponding locks
461 which are stored in memory. If a lock is 'implicit', that is, a new inserted
462 row holds a lock where the lock information is carried by the trx id stored in
463 the row, these locks are naturally released in the rollback. Savepoints which
464 were set after this savepoint are deleted.
465 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
466 otherwise DB_SUCCESS */
467 dberr_t
trx_rollback_to_savepoint_for_mysql(trx_t * trx,const char * savepoint_name,int64_t * mysql_binlog_cache_pos)468 trx_rollback_to_savepoint_for_mysql(
469 /*================================*/
470 	trx_t*		trx,			/*!< in: transaction handle */
471 	const char*	savepoint_name,		/*!< in: savepoint name */
472 	int64_t*	mysql_binlog_cache_pos)	/*!< out: the MySQL binlog cache
473 						position corresponding to this
474 						savepoint; MySQL needs this
475 						information to remove the
476 						binlog entries of the queries
477 						executed after the savepoint */
478 {
479 	trx_named_savept_t*	savep;
480 
481 	/* We are reading trx->state without holding trx_sys.mutex
482 	here, because the savepoint rollback should be invoked for a
483 	running active MySQL transaction that is associated with the
484 	current thread. */
485 	ut_ad(trx->mysql_thd);
486 
487 	savep = trx_savepoint_find(trx, savepoint_name);
488 
489 	if (savep == NULL) {
490 		return(DB_NO_SAVEPOINT);
491 	}
492 
493 	switch (trx->state) {
494 	case TRX_STATE_NOT_STARTED:
495 		ib::error() << "Transaction has a savepoint "
496 			<< savep->name
497 			<< " though it is not started";
498 		return(DB_ERROR);
499 
500 	case TRX_STATE_ACTIVE:
501 
502 		return(trx_rollback_to_savepoint_for_mysql_low(
503 				trx, savep, mysql_binlog_cache_pos));
504 
505 	case TRX_STATE_PREPARED:
506 	case TRX_STATE_PREPARED_RECOVERED:
507 	case TRX_STATE_COMMITTED_IN_MEMORY:
508 		/* The savepoint rollback is only allowed on an ACTIVE
509 		transaction, not a PREPARED or COMMITTED one. */
510 		break;
511 	}
512 
513 	ut_error;
514 	return(DB_CORRUPTION);
515 }
516 
517 /*******************************************************************//**
518 Creates a named savepoint. If the transaction is not yet started, starts it.
519 If there is already a savepoint of the same name, this call erases that old
520 savepoint and replaces it with a new. Savepoints are deleted in a transaction
521 commit or rollback.
522 @return always DB_SUCCESS */
523 dberr_t
trx_savepoint_for_mysql(trx_t * trx,const char * savepoint_name,int64_t binlog_cache_pos)524 trx_savepoint_for_mysql(
525 /*====================*/
526 	trx_t*		trx,			/*!< in: transaction handle */
527 	const char*	savepoint_name,		/*!< in: savepoint name */
528 	int64_t		binlog_cache_pos)	/*!< in: MySQL binlog cache
529 						position corresponding to this
530 						connection at the time of the
531 						savepoint */
532 {
533 	trx_named_savept_t*	savep;
534 
535 	trx_start_if_not_started_xa(trx, false);
536 
537 	savep = trx_savepoint_find(trx, savepoint_name);
538 
539 	if (savep) {
540 		/* There is a savepoint with the same name: free that */
541 
542 		UT_LIST_REMOVE(trx->trx_savepoints, savep);
543 
544 		ut_free(savep->name);
545 		ut_free(savep);
546 	}
547 
548 	/* Create a new savepoint and add it as the last in the list */
549 
550 	savep = static_cast<trx_named_savept_t*>(
551 		ut_malloc_nokey(sizeof(*savep)));
552 
553 	savep->name = mem_strdup(savepoint_name);
554 
555 	savep->savept = trx_savept_take(trx);
556 
557 	savep->mysql_binlog_cache_pos = binlog_cache_pos;
558 
559 	UT_LIST_ADD_LAST(trx->trx_savepoints, savep);
560 
561 	return(DB_SUCCESS);
562 }
563 
564 /*******************************************************************//**
565 Releases only the named savepoint. Savepoints which were set after this
566 savepoint are left as is.
567 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
568 otherwise DB_SUCCESS */
569 dberr_t
trx_release_savepoint_for_mysql(trx_t * trx,const char * savepoint_name)570 trx_release_savepoint_for_mysql(
571 /*============================*/
572 	trx_t*		trx,			/*!< in: transaction handle */
573 	const char*	savepoint_name)		/*!< in: savepoint name */
574 {
575 	trx_named_savept_t*	savep;
576 
577 	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE, true)
578 	      || trx_state_eq(trx, TRX_STATE_PREPARED, true));
579 	ut_ad(trx->mysql_thd);
580 
581 	savep = trx_savepoint_find(trx, savepoint_name);
582 
583 	if (savep != NULL) {
584 		trx_roll_savepoint_free(trx, savep);
585 	}
586 
587 	return(savep != NULL ? DB_SUCCESS : DB_NO_SAVEPOINT);
588 }
589 
590 /*******************************************************************//**
591 Returns a transaction savepoint taken at this point in time.
592 @return savepoint */
593 trx_savept_t
trx_savept_take(trx_t * trx)594 trx_savept_take(
595 /*============*/
596 	trx_t*	trx)	/*!< in: transaction */
597 {
598 	trx_savept_t	savept;
599 
600 	savept.least_undo_no = trx->undo_no;
601 
602 	return(savept);
603 }
604 
605 /*******************************************************************//**
606 Roll back an active transaction. */
607 static
608 void
trx_rollback_active(trx_t * trx)609 trx_rollback_active(
610 /*================*/
611 	trx_t*	trx)	/*!< in/out: transaction */
612 {
613 	mem_heap_t*	heap;
614 	que_fork_t*	fork;
615 	que_thr_t*	thr;
616 	roll_node_t*	roll_node;
617 	const trx_id_t	trx_id = trx->id;
618 
619 	ut_ad(trx_id);
620 
621 	heap = mem_heap_create(512);
622 
623 	fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
624 	fork->trx = trx;
625 
626 	thr = que_thr_create(fork, heap, NULL);
627 
628 	roll_node = roll_node_create(heap);
629 
630 	thr->child = roll_node;
631 	roll_node->common.parent = thr;
632 
633 	trx->graph = fork;
634 
635 	ut_a(thr == que_fork_start_command(fork));
636 
637 	trx_roll_crash_recv_trx	= trx;
638 
639 	const bool dictionary_locked = trx_get_dict_operation(trx)
640 		!= TRX_DICT_OP_NONE;
641 
642 	if (dictionary_locked) {
643 		row_mysql_lock_data_dictionary(trx);
644 	}
645 
646 	que_run_threads(thr);
647 	ut_a(roll_node->undo_thr != NULL);
648 
649 	que_run_threads(roll_node->undo_thr);
650 
651 	que_graph_free(
652 		static_cast<que_t*>(roll_node->undo_thr->common.parent));
653 
654 	if (UNIV_UNLIKELY(!trx_rollback_finish(trx))) {
655 		ut_ad(!dictionary_locked);
656 		goto func_exit;
657 	}
658 
659 	ut_a(trx->lock.que_state == TRX_QUE_RUNNING);
660 
661 	if (!dictionary_locked || !trx->table_id) {
662 	} else if (dict_table_t* table = dict_table_open_on_id(
663 			   trx->table_id, TRUE, DICT_TABLE_OP_NORMAL)) {
664 		ib::info() << "Dropping table " << table->name
665 			   << ", with id " << trx->table_id
666 			   << " in recovery";
667 
668 		dict_table_close_and_drop(trx, table);
669 
670 		trx_commit_for_mysql(trx);
671 	}
672 
673 	ib::info() << "Rolled back recovered transaction " << trx_id;
674 
675 func_exit:
676 	if (dictionary_locked) {
677 		row_mysql_unlock_data_dictionary(trx);
678 	}
679 
680 	mem_heap_free(heap);
681 
682 	trx_roll_crash_recv_trx	= NULL;
683 }
684 
685 
686 struct trx_roll_count_callback_arg
687 {
688   uint32_t n_trx;
689   uint64_t n_rows;
trx_roll_count_callback_argtrx_roll_count_callback_arg690   trx_roll_count_callback_arg(): n_trx(0), n_rows(0) {}
691 };
692 
693 
trx_roll_count_callback(rw_trx_hash_element_t * element,trx_roll_count_callback_arg * arg)694 static my_bool trx_roll_count_callback(rw_trx_hash_element_t *element,
695                                        trx_roll_count_callback_arg *arg)
696 {
697   mutex_enter(&element->mutex);
698   if (trx_t *trx= element->trx)
699   {
700     if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE))
701     {
702       arg->n_trx++;
703       arg->n_rows+= trx->undo_no;
704     }
705   }
706   mutex_exit(&element->mutex);
707   return 0;
708 }
709 
710 /** Report progress when rolling back a row of a recovered transaction. */
trx_roll_report_progress()711 void trx_roll_report_progress()
712 {
713 	time_t now = time(NULL);
714 	mutex_enter(&recv_sys->mutex);
715 	bool report = recv_sys->report(now);
716 	mutex_exit(&recv_sys->mutex);
717 
718 	if (report) {
719 		trx_roll_count_callback_arg arg;
720 
721 		/* Get number of recovered active transactions and number of
722 		rows they modified. Numbers must be accurate, because only this
723 		thread is allowed to touch recovered transactions. */
724 		trx_sys.rw_trx_hash.iterate_no_dups(
725 			reinterpret_cast<my_hash_walk_action>
726 			(trx_roll_count_callback), &arg);
727 
728 		if (arg.n_rows > 0) {
729 			service_manager_extend_timeout(
730 				INNODB_EXTEND_TIMEOUT_INTERVAL,
731 				"To roll back: " UINT32PF " transactions, "
732 				UINT64PF " rows", arg.n_trx, arg.n_rows);
733 		}
734 
735 		ib::info() << "To roll back: " << arg.n_trx
736 			   << " transactions, " << arg.n_rows << " rows";
737 
738 	}
739 }
740 
741 
trx_rollback_recovered_callback(rw_trx_hash_element_t * element,std::vector<trx_t * > * trx_list)742 static my_bool trx_rollback_recovered_callback(rw_trx_hash_element_t *element,
743                                                std::vector<trx_t*> *trx_list)
744 {
745   mutex_enter(&element->mutex);
746   if (trx_t *trx= element->trx)
747   {
748     mutex_enter(&trx->mutex);
749     if (trx_state_eq(trx, TRX_STATE_ACTIVE) && trx->is_recovered)
750       trx_list->push_back(trx);
751     mutex_exit(&trx->mutex);
752   }
753   mutex_exit(&element->mutex);
754   return 0;
755 }
756 
757 
758 /**
759   Rollback any incomplete transactions which were encountered in crash recovery.
760 
761   If the transaction already was committed, then we clean up a possible insert
762   undo log. If the transaction was not yet committed, then we roll it back.
763 
764   Note: For XA recovered transactions, we rely on MySQL to
765   do rollback. They will be in TRX_STATE_PREPARED state. If the server
766   is shutdown and they are still lingering in trx_sys_t::trx_list
767   then the shutdown will hang.
768 
769   @param[in]  all  true=roll back all recovered active transactions;
770                    false=roll back any incomplete dictionary transaction
771 */
772 
trx_rollback_recovered(bool all)773 void trx_rollback_recovered(bool all)
774 {
775   std::vector<trx_t*> trx_list;
776 
777   ut_a(srv_force_recovery < SRV_FORCE_NO_TRX_UNDO);
778 
779   /*
780     Collect list of recovered ACTIVE transaction ids first. Once collected, no
781     other thread is allowed to modify or remove these transactions from
782     rw_trx_hash.
783   */
784   trx_sys.rw_trx_hash.iterate_no_dups(reinterpret_cast<my_hash_walk_action>
785                                       (trx_rollback_recovered_callback),
786                                       &trx_list);
787 
788   while (!trx_list.empty())
789   {
790     trx_t *trx= trx_list.back();
791     trx_list.pop_back();
792 
793     ut_ad(trx);
794     ut_d(trx_mutex_enter(trx));
795     ut_ad(trx->is_recovered);
796     ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
797     ut_d(trx_mutex_exit(trx));
798 
799     if (srv_shutdown_state != SRV_SHUTDOWN_NONE && !srv_undo_sources &&
800         srv_fast_shutdown)
801       goto discard;
802 
803     if (all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
804         || trx->has_stats_table_lock())
805     {
806       trx_rollback_active(trx);
807       if (trx->error_state != DB_SUCCESS)
808       {
809         ut_ad(trx->error_state == DB_INTERRUPTED);
810         trx->error_state= DB_SUCCESS;
811         ut_ad(!srv_undo_sources);
812         ut_ad(srv_fast_shutdown);
813 discard:
814         /* Note: before kill_server() invoked innobase_end() via
815         unireg_end(), it invoked close_connections(), which should initiate
816         the rollback of any user transactions via THD::cleanup() in the
817         connection threads, and wait for all THD::cleanup() to complete.
818         So, no active user transactions should exist at this point.
819 
820         srv_undo_sources=false was cleared early in innobase_end().
821 
822         Generally, the server guarantees that all connections using
823         InnoDB must be disconnected by the time we are reaching this code,
824         be it during shutdown or UNINSTALL PLUGIN.
825 
826         Because there is no possible race condition with any
827         concurrent user transaction, we do not have to invoke
828         trx->commit_state() or wait for !trx->is_referenced()
829         before trx_sys.deregister_rw(trx). */
830         trx_sys.deregister_rw(trx);
831         trx_free_at_shutdown(trx);
832       }
833       else
834         trx->free();
835     }
836   }
837 }
838 
839 
840 /*******************************************************************//**
841 Rollback or clean up any incomplete transactions which were
842 encountered in crash recovery.  If the transaction already was
843 committed, then we clean up a possible insert undo log. If the
844 transaction was not yet committed, then we roll it back.
845 Note: this is done in a background thread.
846 @return a dummy parameter */
847 extern "C"
848 os_thread_ret_t
DECLARE_THREAD(trx_rollback_all_recovered)849 DECLARE_THREAD(trx_rollback_all_recovered)(void*)
850 {
851 	my_thread_init();
852 	ut_ad(!srv_read_only_mode);
853 
854 #ifdef UNIV_PFS_THREAD
855 	pfs_register_thread(trx_rollback_clean_thread_key);
856 #endif /* UNIV_PFS_THREAD */
857 
858 	if (trx_sys.rw_trx_hash.size()) {
859 		ib::info() << "Starting in background the rollback of"
860 			" recovered transactions";
861 		trx_rollback_recovered(true);
862 		ib::info() << "Rollback of non-prepared transactions"
863 			" completed";
864 	}
865 
866 	trx_rollback_is_active = false;
867 
868 	my_thread_end();
869 	/* We count the number of threads in os_thread_exit(). A created
870 	thread should always use that to exit and not use return() to exit. */
871 
872 	os_thread_exit();
873 
874 	OS_THREAD_DUMMY_RETURN;
875 }
876 
877 /** Try to truncate the undo logs.
878 @param[in,out]	trx	transaction */
879 static
880 void
trx_roll_try_truncate(trx_t * trx)881 trx_roll_try_truncate(trx_t* trx)
882 {
883 	trx->pages_undone = 0;
884 
885 	undo_no_t	undo_no		= trx->undo_no;
886 
887 	if (trx_undo_t*	undo = trx->rsegs.m_redo.undo) {
888 		ut_ad(undo->rseg == trx->rsegs.m_redo.rseg);
889 		mutex_enter(&undo->rseg->mutex);
890 		trx_undo_truncate_end(undo, undo_no, false);
891 		mutex_exit(&undo->rseg->mutex);
892 	}
893 
894 	if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) {
895 		ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg);
896 		mutex_enter(&undo->rseg->mutex);
897 		trx_undo_truncate_end(undo, undo_no, true);
898 		mutex_exit(&undo->rseg->mutex);
899 	}
900 }
901 
902 /***********************************************************************//**
903 Pops the topmost undo log record in a single undo log and updates the info
904 about the topmost record in the undo log memory struct.
905 @return undo log record, the page s-latched */
906 static
907 trx_undo_rec_t*
trx_roll_pop_top_rec(trx_t * trx,trx_undo_t * undo,mtr_t * mtr)908 trx_roll_pop_top_rec(
909 /*=================*/
910 	trx_t*		trx,	/*!< in: transaction */
911 	trx_undo_t*	undo,	/*!< in: undo log */
912 	mtr_t*		mtr)	/*!< in: mtr */
913 {
914 	page_t*	undo_page = trx_undo_page_get_s_latched(
915 		page_id_t(undo->rseg->space->id, undo->top_page_no), mtr);
916 
917 	ulint	offset = undo->top_offset;
918 
919 	trx_undo_rec_t*	prev_rec = trx_undo_get_prev_rec(
920 		undo_page + offset, undo->hdr_page_no, undo->hdr_offset,
921 		true, mtr);
922 
923 	if (prev_rec == NULL) {
924 		undo->top_undo_no = IB_ID_MAX;
925 		ut_ad(undo->empty());
926 	} else {
927 		page_t*	prev_rec_page = page_align(prev_rec);
928 
929 		if (prev_rec_page != undo_page) {
930 
931 			trx->pages_undone++;
932 		}
933 
934 		undo->top_page_no = page_get_page_no(prev_rec_page);
935 		undo->top_offset  = ulint(prev_rec - prev_rec_page);
936 		undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
937 		ut_ad(!undo->empty());
938 	}
939 
940 	return(undo_page + offset);
941 }
942 
943 /** Get the last undo log record of a transaction (for rollback).
944 @param[in,out]	trx		transaction
945 @param[out]	roll_ptr	DB_ROLL_PTR to the undo record
946 @param[in,out]	heap		memory heap for allocation
947 @return	undo log record copied to heap
948 @retval	NULL if none left or the roll_limit (savepoint) was reached */
949 trx_undo_rec_t*
trx_roll_pop_top_rec_of_trx(trx_t * trx,roll_ptr_t * roll_ptr,mem_heap_t * heap)950 trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
951 {
952 	if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
953 		trx_roll_try_truncate(trx);
954 	}
955 
956 	trx_undo_t*	undo	= NULL;
957 	trx_undo_t*	update	= trx->rsegs.m_redo.undo;
958 	trx_undo_t*	temp	= trx->rsegs.m_noredo.undo;
959 	const undo_no_t	limit	= trx->roll_limit;
960 
961 	ut_ad(!update || !temp || update->empty() || temp->empty()
962 	      || update->top_undo_no != temp->top_undo_no);
963 
964 	if (update && !update->empty() && update->top_undo_no >= limit) {
965 		if (!undo) {
966 			undo = update;
967 		} else if (undo->top_undo_no < update->top_undo_no) {
968 			undo = update;
969 		}
970 	}
971 
972 	if (temp && !temp->empty() && temp->top_undo_no >= limit) {
973 		if (!undo) {
974 			undo = temp;
975 		} else if (undo->top_undo_no < temp->top_undo_no) {
976 			undo = temp;
977 		}
978 	}
979 
980 	if (undo == NULL) {
981 		trx_roll_try_truncate(trx);
982 		/* Mark any ROLLBACK TO SAVEPOINT completed, so that
983 		if the transaction object is committed and reused
984 		later, we will default to a full ROLLBACK. */
985 		trx->roll_limit = 0;
986 		trx->in_rollback = false;
987 		return(NULL);
988 	}
989 
990 	ut_ad(!undo->empty());
991 	ut_ad(limit <= undo->top_undo_no);
992 
993 	*roll_ptr = trx_undo_build_roll_ptr(
994 		false, undo->rseg->id, undo->top_page_no, undo->top_offset);
995 
996 	mtr_t	mtr;
997 	mtr.start();
998 
999 	trx_undo_rec_t*	undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
1000 	const undo_no_t	undo_no = trx_undo_rec_get_undo_no(undo_rec);
1001 	switch (trx_undo_rec_get_type(undo_rec)) {
1002 	case TRX_UNDO_INSERT_METADATA:
1003 		/* This record type was introduced in MDEV-11369
1004 		instant ADD COLUMN, which was implemented after
1005 		MDEV-12288 removed the insert_undo log. There is no
1006 		instant ADD COLUMN for temporary tables. Therefore,
1007 		this record can only be present in the main undo log. */
1008 		/* fall through */
1009 	case TRX_UNDO_RENAME_TABLE:
1010 		ut_ad(undo == update);
1011 		/* fall through */
1012 	case TRX_UNDO_INSERT_REC:
1013 		*roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS;
1014 	}
1015 
1016 	trx->undo_no = undo_no;
1017 
1018 	trx_undo_rec_t*	undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
1019 	mtr.commit();
1020 
1021 	return(undo_rec_copy);
1022 }
1023 
1024 /****************************************************************//**
1025 Builds an undo 'query' graph for a transaction. The actual rollback is
1026 performed by executing this query graph like a query subprocedure call.
1027 The reply about the completion of the rollback will be sent by this
1028 graph.
1029 @return own: the query graph */
1030 static
1031 que_t*
trx_roll_graph_build(trx_t * trx)1032 trx_roll_graph_build(
1033 /*=================*/
1034 	trx_t*	trx)	/*!< in/out: transaction */
1035 {
1036 	mem_heap_t*	heap;
1037 	que_fork_t*	fork;
1038 	que_thr_t*	thr;
1039 
1040 	ut_ad(trx_mutex_own(trx));
1041 
1042 	heap = mem_heap_create(512);
1043 	fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
1044 	fork->trx = trx;
1045 
1046 	thr = que_thr_create(fork, heap, NULL);
1047 
1048 	thr->child = row_undo_node_create(trx, thr, heap);
1049 
1050 	return(fork);
1051 }
1052 
1053 /*********************************************************************//**
1054 Starts a rollback operation, creates the UNDO graph that will do the
1055 actual undo operation.
1056 @return query graph thread that will perform the UNDO operations. */
1057 static
1058 que_thr_t*
trx_rollback_start(trx_t * trx,undo_no_t roll_limit)1059 trx_rollback_start(
1060 /*===============*/
1061 	trx_t*		trx,		/*!< in: transaction */
1062 	undo_no_t	roll_limit)	/*!< in: rollback to undo no (for
1063 					partial undo), 0 if we are rolling back
1064 					the entire transaction */
1065 {
1066 	ut_ad(trx_mutex_own(trx));
1067 
1068 	/* Initialize the rollback field in the transaction */
1069 
1070 	ut_ad(!trx->roll_limit);
1071 	ut_ad(!trx->in_rollback);
1072 
1073 	trx->roll_limit = roll_limit;
1074 	trx->in_rollback = true;
1075 
1076 	ut_a(trx->roll_limit <= trx->undo_no);
1077 
1078 	trx->pages_undone = 0;
1079 
1080 	/* Build a 'query' graph which will perform the undo operations */
1081 
1082 	que_t*	roll_graph = trx_roll_graph_build(trx);
1083 
1084 	trx->graph = roll_graph;
1085 
1086 	trx->lock.que_state = TRX_QUE_ROLLING_BACK;
1087 
1088 	return(que_fork_start_command(roll_graph));
1089 }
1090 
1091 /*********************************************************************//**
1092 Creates a rollback command node struct.
1093 @return own: rollback node struct */
1094 roll_node_t*
roll_node_create(mem_heap_t * heap)1095 roll_node_create(
1096 /*=============*/
1097 	mem_heap_t*	heap)	/*!< in: mem heap where created */
1098 {
1099 	roll_node_t*	node;
1100 
1101 	node = static_cast<roll_node_t*>(mem_heap_zalloc(heap, sizeof(*node)));
1102 
1103 	node->state = ROLL_NODE_SEND;
1104 
1105 	node->common.type = QUE_NODE_ROLLBACK;
1106 
1107 	return(node);
1108 }
1109 
1110 /***********************************************************//**
1111 Performs an execution step for a rollback command node in a query graph.
1112 @return query thread to run next, or NULL */
1113 que_thr_t*
trx_rollback_step(que_thr_t * thr)1114 trx_rollback_step(
1115 /*==============*/
1116 	que_thr_t*	thr)	/*!< in: query thread */
1117 {
1118 	roll_node_t*	node;
1119 
1120 	node = static_cast<roll_node_t*>(thr->run_node);
1121 
1122 	ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
1123 
1124 	if (thr->prev_node == que_node_get_parent(node)) {
1125 		node->state = ROLL_NODE_SEND;
1126 	}
1127 
1128 	if (node->state == ROLL_NODE_SEND) {
1129 		trx_t*		trx;
1130 		ib_id_t		roll_limit;
1131 
1132 		trx = thr_get_trx(thr);
1133 
1134 		trx_mutex_enter(trx);
1135 
1136 		node->state = ROLL_NODE_WAIT;
1137 
1138 		ut_a(node->undo_thr == NULL);
1139 
1140 		roll_limit = node->savept ? node->savept->least_undo_no : 0;
1141 
1142 		trx_commit_or_rollback_prepare(trx);
1143 
1144 		node->undo_thr = trx_rollback_start(trx, roll_limit);
1145 
1146 		trx_mutex_exit(trx);
1147 
1148 	} else {
1149 		ut_ad(node->state == ROLL_NODE_WAIT);
1150 
1151 		thr->run_node = que_node_get_parent(node);
1152 	}
1153 
1154 	return(thr);
1155 }
1156