1 /*****************************************************************************
2
3 Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2016, 2021, MariaDB Corporation.
5
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17
18 *****************************************************************************/
19
20 /**************************************************//**
21 @file trx/trx0roll.cc
22 Transaction rollback
23
24 Created 3/26/1996 Heikki Tuuri
25 *******************************************************/
26
27 #include "trx0roll.h"
28
29 #include <my_service_manager.h>
30 #include <mysql/service_wsrep.h>
31
32 #include "fsp0fsp.h"
33 #include "lock0lock.h"
34 #include "mach0data.h"
35 #include "pars0pars.h"
36 #include "que0que.h"
37 #include "row0mysql.h"
38 #include "row0undo.h"
39 #include "srv0mon.h"
40 #include "srv0start.h"
41 #include "trx0rec.h"
42 #include "trx0rseg.h"
43 #include "trx0sys.h"
44 #include "trx0trx.h"
45 #include "trx0undo.h"
46
47 /** This many pages must be undone before a truncate is tried within
48 rollback */
49 static const ulint TRX_ROLL_TRUNC_THRESHOLD = 1;
50
51 /** true if trx_rollback_all_recovered() thread is active */
52 bool trx_rollback_is_active;
53
54 /** In crash recovery, the current trx to be rolled back; NULL otherwise */
55 const trx_t* trx_roll_crash_recv_trx;
56
57 /** Finish transaction rollback.
58 @param[in,out] trx transaction
59 @return whether the rollback was completed normally
60 @retval false if the rollback was aborted by shutdown */
trx_rollback_finish(trx_t * trx)61 static bool trx_rollback_finish(trx_t* trx)
62 {
63 trx->mod_tables.clear();
64 bool finished = trx->error_state == DB_SUCCESS;
65 if (UNIV_LIKELY(finished)) {
66 trx_commit(trx);
67 } else {
68 ut_a(trx->error_state == DB_INTERRUPTED);
69 ut_ad(srv_shutdown_state != SRV_SHUTDOWN_NONE);
70 ut_a(!srv_undo_sources);
71 ut_ad(srv_fast_shutdown);
72 ut_d(trx->in_rollback = false);
73 if (trx_undo_t*& undo = trx->rsegs.m_redo.undo) {
74 UT_LIST_REMOVE(trx->rsegs.m_redo.rseg->undo_list,
75 undo);
76 ut_free(undo);
77 undo = NULL;
78 }
79 if (trx_undo_t*& undo = trx->rsegs.m_noredo.undo) {
80 UT_LIST_REMOVE(trx->rsegs.m_noredo.rseg->undo_list,
81 undo);
82 ut_free(undo);
83 undo = NULL;
84 }
85 trx_commit_low(trx, NULL);
86 }
87
88 trx->lock.que_state = TRX_QUE_RUNNING;
89
90 return finished;
91 }
92
93 /*******************************************************************//**
94 Rollback a transaction used in MySQL. */
95 static
96 void
trx_rollback_to_savepoint_low(trx_t * trx,trx_savept_t * savept)97 trx_rollback_to_savepoint_low(
98 /*==========================*/
99 trx_t* trx, /*!< in: transaction handle */
100 trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if
101 partial rollback requested, or NULL for
102 complete rollback */
103 {
104 que_thr_t* thr;
105 mem_heap_t* heap;
106 roll_node_t* roll_node;
107
108 heap = mem_heap_create(512);
109
110 roll_node = roll_node_create(heap);
111 ut_ad(!trx->in_rollback);
112
113 if (savept != NULL) {
114 roll_node->savept = savept;
115 ut_ad(trx->mysql_thd);
116 ut_ad(!trx->is_recovered);
117 ut_ad(trx->state == TRX_STATE_ACTIVE);
118 } else {
119 ut_d(trx_state_t state = trx->state);
120 ut_ad(state == TRX_STATE_ACTIVE
121 || state == TRX_STATE_PREPARED
122 || state == TRX_STATE_PREPARED_RECOVERED);
123 }
124
125 trx->error_state = DB_SUCCESS;
126
127 if (trx->has_logged()) {
128
129 ut_ad(trx->rsegs.m_redo.rseg != 0
130 || trx->rsegs.m_noredo.rseg != 0);
131
132 thr = pars_complete_graph_for_exec(roll_node, trx, heap, NULL);
133
134 ut_a(thr == que_fork_start_command(
135 static_cast<que_fork_t*>(que_node_get_parent(thr))));
136
137 que_run_threads(thr);
138
139 ut_a(roll_node->undo_thr != NULL);
140 que_run_threads(roll_node->undo_thr);
141
142 /* Free the memory reserved by the undo graph. */
143 que_graph_free(static_cast<que_t*>(
144 roll_node->undo_thr->common.parent));
145 }
146
147 if (savept == NULL) {
148 trx_rollback_finish(trx);
149 MONITOR_INC(MONITOR_TRX_ROLLBACK);
150 } else {
151 ut_a(trx->error_state == DB_SUCCESS);
152 const undo_no_t limit = savept->least_undo_no;
153 for (trx_mod_tables_t::iterator i = trx->mod_tables.begin();
154 i != trx->mod_tables.end(); ) {
155 trx_mod_tables_t::iterator j = i++;
156 ut_ad(j->second.valid());
157 if (j->second.rollback(limit)) {
158 trx->mod_tables.erase(j);
159 }
160 }
161 trx->lock.que_state = TRX_QUE_RUNNING;
162 MONITOR_INC(MONITOR_TRX_ROLLBACK_SAVEPOINT);
163 }
164
165 mem_heap_free(heap);
166
167 /* There might be work for utility threads.*/
168 srv_active_wake_master_thread();
169
170 MONITOR_DEC(MONITOR_TRX_ACTIVE);
171 }
172
173 /*******************************************************************//**
174 Rollback a transaction to a given savepoint or do a complete rollback.
175 @return error code or DB_SUCCESS */
176 dberr_t
trx_rollback_to_savepoint(trx_t * trx,trx_savept_t * savept)177 trx_rollback_to_savepoint(
178 /*======================*/
179 trx_t* trx, /*!< in: transaction handle */
180 trx_savept_t* savept) /*!< in: pointer to savepoint undo number, if
181 partial rollback requested, or NULL for
182 complete rollback */
183 {
184 ut_ad(!trx_mutex_own(trx));
185
186 trx_start_if_not_started_xa(trx, true);
187
188 trx_rollback_to_savepoint_low(trx, savept);
189
190 return(trx->error_state);
191 }
192
193 /*******************************************************************//**
194 Rollback a transaction used in MySQL.
195 @return error code or DB_SUCCESS */
196 static
197 dberr_t
trx_rollback_for_mysql_low(trx_t * trx)198 trx_rollback_for_mysql_low(
199 /*=======================*/
200 trx_t* trx) /*!< in/out: transaction */
201 {
202 trx->op_info = "rollback";
203
204 /* If we are doing the XA recovery of prepared transactions,
205 then the transaction object does not have an InnoDB session
206 object, and we set a dummy session that we use for all MySQL
207 transactions. */
208
209 trx_rollback_to_savepoint_low(trx, NULL);
210
211 trx->op_info = "";
212
213 return(trx->error_state);
214 }
215
216 /** Rollback a transaction used in MySQL
217 @param[in, out] trx transaction
218 @return error code or DB_SUCCESS */
trx_rollback_for_mysql(trx_t * trx)219 dberr_t trx_rollback_for_mysql(trx_t* trx)
220 {
221 /* We are reading trx->state without holding trx_sys.mutex
222 here, because the rollback should be invoked for a running
223 active MySQL transaction (or recovered prepared transaction)
224 that is associated with the current thread. */
225
226 switch (trx->state) {
227 case TRX_STATE_NOT_STARTED:
228 trx->will_lock = false;
229 ut_ad(trx->mysql_thd);
230 #ifdef WITH_WSREP
231 trx->wsrep = false;
232 #endif
233 return(DB_SUCCESS);
234
235 case TRX_STATE_ACTIVE:
236 ut_ad(trx->mysql_thd);
237 ut_ad(!trx->is_recovered);
238 ut_ad(!trx->is_autocommit_non_locking() || trx->read_only);
239 return(trx_rollback_for_mysql_low(trx));
240
241 case TRX_STATE_PREPARED:
242 case TRX_STATE_PREPARED_RECOVERED:
243 ut_ad(!trx->is_autocommit_non_locking());
244 if (trx->has_logged_persistent()) {
245 /* The XA ROLLBACK of a XA PREPARE transaction
246 will consist of multiple mini-transactions.
247
248 As the very first step of XA ROLLBACK, we must
249 change the undo log state back from
250 TRX_UNDO_PREPARED to TRX_UNDO_ACTIVE, in order
251 to ensure that recovery will complete the
252 rollback.
253
254 Failure to perform this step could cause a
255 situation where we would roll back part of
256 a XA PREPARE transaction, the server would be
257 killed, and finally, the transaction would be
258 recovered in XA PREPARE state, with some of
259 the actions already having been rolled back. */
260 ut_ad(trx->rsegs.m_redo.undo->rseg
261 == trx->rsegs.m_redo.rseg);
262 mtr_t mtr;
263 mtr.start();
264 mutex_enter(&trx->rsegs.m_redo.rseg->mutex);
265 if (trx_undo_t* undo = trx->rsegs.m_redo.undo) {
266 trx_undo_set_state_at_prepare(trx, undo, true,
267 &mtr);
268 }
269 mutex_exit(&trx->rsegs.m_redo.rseg->mutex);
270 /* Write the redo log for the XA ROLLBACK
271 state change to the global buffer. It is
272 not necessary to flush the redo log. If
273 a durable log write of a later mini-transaction
274 takes place for whatever reason, then this state
275 change will be durable as well. */
276 mtr.commit();
277 ut_ad(mtr.commit_lsn() > 0);
278 }
279 return(trx_rollback_for_mysql_low(trx));
280
281 case TRX_STATE_COMMITTED_IN_MEMORY:
282 ut_ad(!trx->is_autocommit_non_locking());
283 break;
284 }
285
286 ut_error;
287 return(DB_CORRUPTION);
288 }
289
290 /*******************************************************************//**
291 Rollback the latest SQL statement for MySQL.
292 @return error code or DB_SUCCESS */
293 dberr_t
trx_rollback_last_sql_stat_for_mysql(trx_t * trx)294 trx_rollback_last_sql_stat_for_mysql(
295 /*=================================*/
296 trx_t* trx) /*!< in/out: transaction */
297 {
298 dberr_t err;
299
300 /* We are reading trx->state without holding trx_sys.mutex
301 here, because the statement rollback should be invoked for a
302 running active MySQL transaction that is associated with the
303 current thread. */
304 ut_ad(trx->mysql_thd);
305
306 switch (trx->state) {
307 case TRX_STATE_NOT_STARTED:
308 return(DB_SUCCESS);
309
310 case TRX_STATE_ACTIVE:
311 ut_ad(trx->mysql_thd);
312 ut_ad(!trx->is_recovered);
313 ut_ad(!trx->is_autocommit_non_locking() || trx->read_only);
314
315 trx->op_info = "rollback of SQL statement";
316
317 err = trx_rollback_to_savepoint(
318 trx, &trx->last_sql_stat_start);
319
320 if (trx->fts_trx != NULL) {
321 fts_savepoint_rollback_last_stmt(trx);
322 }
323
324 /* The following call should not be needed,
325 but we play it safe: */
326 trx_mark_sql_stat_end(trx);
327
328 trx->op_info = "";
329
330 return(err);
331
332 case TRX_STATE_PREPARED:
333 case TRX_STATE_PREPARED_RECOVERED:
334 case TRX_STATE_COMMITTED_IN_MEMORY:
335 /* The statement rollback is only allowed on an ACTIVE
336 transaction, not a PREPARED or COMMITTED one. */
337 break;
338 }
339
340 ut_error;
341 return(DB_CORRUPTION);
342 }
343
344 /*******************************************************************//**
345 Search for a savepoint using name.
346 @return savepoint if found else NULL */
347 static
348 trx_named_savept_t*
trx_savepoint_find(trx_t * trx,const char * name)349 trx_savepoint_find(
350 /*===============*/
351 trx_t* trx, /*!< in: transaction */
352 const char* name) /*!< in: savepoint name */
353 {
354 trx_named_savept_t* savep;
355
356 for (savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
357 savep != NULL;
358 savep = UT_LIST_GET_NEXT(trx_savepoints, savep)) {
359
360 if (0 == ut_strcmp(savep->name, name)) {
361 return(savep);
362 }
363 }
364
365 return(NULL);
366 }
367
368 /*******************************************************************//**
369 Frees a single savepoint struct. */
370 static
371 void
trx_roll_savepoint_free(trx_t * trx,trx_named_savept_t * savep)372 trx_roll_savepoint_free(
373 /*=====================*/
374 trx_t* trx, /*!< in: transaction handle */
375 trx_named_savept_t* savep) /*!< in: savepoint to free */
376 {
377 UT_LIST_REMOVE(trx->trx_savepoints, savep);
378
379 ut_free(savep->name);
380 ut_free(savep);
381 }
382
383 /*******************************************************************//**
384 Frees savepoint structs starting from savep. */
385 void
trx_roll_savepoints_free(trx_t * trx,trx_named_savept_t * savep)386 trx_roll_savepoints_free(
387 /*=====================*/
388 trx_t* trx, /*!< in: transaction handle */
389 trx_named_savept_t* savep) /*!< in: free all savepoints starting
390 with this savepoint i*/
391 {
392 while (savep != NULL) {
393 trx_named_savept_t* next_savep;
394
395 next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep);
396
397 trx_roll_savepoint_free(trx, savep);
398
399 savep = next_savep;
400 }
401 }
402
403 /*******************************************************************//**
404 Rolls back a transaction back to a named savepoint. Modifications after the
405 savepoint are undone but InnoDB does NOT release the corresponding locks
406 which are stored in memory. If a lock is 'implicit', that is, a new inserted
407 row holds a lock where the lock information is carried by the trx id stored in
408 the row, these locks are naturally released in the rollback. Savepoints which
409 were set after this savepoint are deleted.
410 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
411 otherwise DB_SUCCESS */
412 static MY_ATTRIBUTE((nonnull, warn_unused_result))
413 dberr_t
trx_rollback_to_savepoint_for_mysql_low(trx_t * trx,trx_named_savept_t * savep,int64_t * mysql_binlog_cache_pos)414 trx_rollback_to_savepoint_for_mysql_low(
415 /*====================================*/
416 trx_t* trx, /*!< in/out: transaction */
417 trx_named_savept_t* savep, /*!< in/out: savepoint */
418 int64_t* mysql_binlog_cache_pos)
419 /*!< out: the MySQL binlog
420 cache position corresponding
421 to this savepoint; MySQL needs
422 this information to remove the
423 binlog entries of the queries
424 executed after the savepoint */
425 {
426 dberr_t err;
427
428 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
429 ut_ad(trx->mysql_thd);
430
431 /* Free all savepoints strictly later than savep. */
432
433 trx_roll_savepoints_free(
434 trx, UT_LIST_GET_NEXT(trx_savepoints, savep));
435
436 *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos;
437
438 trx->op_info = "rollback to a savepoint";
439
440 err = trx_rollback_to_savepoint(trx, &savep->savept);
441
442 /* Store the current undo_no of the transaction so that
443 we know where to roll back if we have to roll back the
444 next SQL statement: */
445
446 trx_mark_sql_stat_end(trx);
447
448 trx->op_info = "";
449
450 #ifdef WITH_WSREP
451 if (trx->is_wsrep()) {
452 trx->lock.was_chosen_as_deadlock_victim = false;
453 }
454 #endif
455 return(err);
456 }
457
458 /*******************************************************************//**
459 Rolls back a transaction back to a named savepoint. Modifications after the
460 savepoint are undone but InnoDB does NOT release the corresponding locks
461 which are stored in memory. If a lock is 'implicit', that is, a new inserted
462 row holds a lock where the lock information is carried by the trx id stored in
463 the row, these locks are naturally released in the rollback. Savepoints which
464 were set after this savepoint are deleted.
465 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
466 otherwise DB_SUCCESS */
467 dberr_t
trx_rollback_to_savepoint_for_mysql(trx_t * trx,const char * savepoint_name,int64_t * mysql_binlog_cache_pos)468 trx_rollback_to_savepoint_for_mysql(
469 /*================================*/
470 trx_t* trx, /*!< in: transaction handle */
471 const char* savepoint_name, /*!< in: savepoint name */
472 int64_t* mysql_binlog_cache_pos) /*!< out: the MySQL binlog cache
473 position corresponding to this
474 savepoint; MySQL needs this
475 information to remove the
476 binlog entries of the queries
477 executed after the savepoint */
478 {
479 trx_named_savept_t* savep;
480
481 /* We are reading trx->state without holding trx_sys.mutex
482 here, because the savepoint rollback should be invoked for a
483 running active MySQL transaction that is associated with the
484 current thread. */
485 ut_ad(trx->mysql_thd);
486
487 savep = trx_savepoint_find(trx, savepoint_name);
488
489 if (savep == NULL) {
490 return(DB_NO_SAVEPOINT);
491 }
492
493 switch (trx->state) {
494 case TRX_STATE_NOT_STARTED:
495 ib::error() << "Transaction has a savepoint "
496 << savep->name
497 << " though it is not started";
498 return(DB_ERROR);
499
500 case TRX_STATE_ACTIVE:
501
502 return(trx_rollback_to_savepoint_for_mysql_low(
503 trx, savep, mysql_binlog_cache_pos));
504
505 case TRX_STATE_PREPARED:
506 case TRX_STATE_PREPARED_RECOVERED:
507 case TRX_STATE_COMMITTED_IN_MEMORY:
508 /* The savepoint rollback is only allowed on an ACTIVE
509 transaction, not a PREPARED or COMMITTED one. */
510 break;
511 }
512
513 ut_error;
514 return(DB_CORRUPTION);
515 }
516
517 /*******************************************************************//**
518 Creates a named savepoint. If the transaction is not yet started, starts it.
519 If there is already a savepoint of the same name, this call erases that old
520 savepoint and replaces it with a new. Savepoints are deleted in a transaction
521 commit or rollback.
522 @return always DB_SUCCESS */
523 dberr_t
trx_savepoint_for_mysql(trx_t * trx,const char * savepoint_name,int64_t binlog_cache_pos)524 trx_savepoint_for_mysql(
525 /*====================*/
526 trx_t* trx, /*!< in: transaction handle */
527 const char* savepoint_name, /*!< in: savepoint name */
528 int64_t binlog_cache_pos) /*!< in: MySQL binlog cache
529 position corresponding to this
530 connection at the time of the
531 savepoint */
532 {
533 trx_named_savept_t* savep;
534
535 trx_start_if_not_started_xa(trx, false);
536
537 savep = trx_savepoint_find(trx, savepoint_name);
538
539 if (savep) {
540 /* There is a savepoint with the same name: free that */
541
542 UT_LIST_REMOVE(trx->trx_savepoints, savep);
543
544 ut_free(savep->name);
545 ut_free(savep);
546 }
547
548 /* Create a new savepoint and add it as the last in the list */
549
550 savep = static_cast<trx_named_savept_t*>(
551 ut_malloc_nokey(sizeof(*savep)));
552
553 savep->name = mem_strdup(savepoint_name);
554
555 savep->savept = trx_savept_take(trx);
556
557 savep->mysql_binlog_cache_pos = binlog_cache_pos;
558
559 UT_LIST_ADD_LAST(trx->trx_savepoints, savep);
560
561 return(DB_SUCCESS);
562 }
563
564 /*******************************************************************//**
565 Releases only the named savepoint. Savepoints which were set after this
566 savepoint are left as is.
567 @return if no savepoint of the name found then DB_NO_SAVEPOINT,
568 otherwise DB_SUCCESS */
569 dberr_t
trx_release_savepoint_for_mysql(trx_t * trx,const char * savepoint_name)570 trx_release_savepoint_for_mysql(
571 /*============================*/
572 trx_t* trx, /*!< in: transaction handle */
573 const char* savepoint_name) /*!< in: savepoint name */
574 {
575 trx_named_savept_t* savep;
576
577 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE, true)
578 || trx_state_eq(trx, TRX_STATE_PREPARED, true));
579 ut_ad(trx->mysql_thd);
580
581 savep = trx_savepoint_find(trx, savepoint_name);
582
583 if (savep != NULL) {
584 trx_roll_savepoint_free(trx, savep);
585 }
586
587 return(savep != NULL ? DB_SUCCESS : DB_NO_SAVEPOINT);
588 }
589
590 /*******************************************************************//**
591 Returns a transaction savepoint taken at this point in time.
592 @return savepoint */
593 trx_savept_t
trx_savept_take(trx_t * trx)594 trx_savept_take(
595 /*============*/
596 trx_t* trx) /*!< in: transaction */
597 {
598 trx_savept_t savept;
599
600 savept.least_undo_no = trx->undo_no;
601
602 return(savept);
603 }
604
605 /*******************************************************************//**
606 Roll back an active transaction. */
607 static
608 void
trx_rollback_active(trx_t * trx)609 trx_rollback_active(
610 /*================*/
611 trx_t* trx) /*!< in/out: transaction */
612 {
613 mem_heap_t* heap;
614 que_fork_t* fork;
615 que_thr_t* thr;
616 roll_node_t* roll_node;
617 const trx_id_t trx_id = trx->id;
618
619 ut_ad(trx_id);
620
621 heap = mem_heap_create(512);
622
623 fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap);
624 fork->trx = trx;
625
626 thr = que_thr_create(fork, heap, NULL);
627
628 roll_node = roll_node_create(heap);
629
630 thr->child = roll_node;
631 roll_node->common.parent = thr;
632
633 trx->graph = fork;
634
635 ut_a(thr == que_fork_start_command(fork));
636
637 trx_roll_crash_recv_trx = trx;
638
639 const bool dictionary_locked = trx_get_dict_operation(trx)
640 != TRX_DICT_OP_NONE;
641
642 if (dictionary_locked) {
643 row_mysql_lock_data_dictionary(trx);
644 }
645
646 que_run_threads(thr);
647 ut_a(roll_node->undo_thr != NULL);
648
649 que_run_threads(roll_node->undo_thr);
650
651 que_graph_free(
652 static_cast<que_t*>(roll_node->undo_thr->common.parent));
653
654 if (UNIV_UNLIKELY(!trx_rollback_finish(trx))) {
655 ut_ad(!dictionary_locked);
656 goto func_exit;
657 }
658
659 ut_a(trx->lock.que_state == TRX_QUE_RUNNING);
660
661 if (!dictionary_locked || !trx->table_id) {
662 } else if (dict_table_t* table = dict_table_open_on_id(
663 trx->table_id, TRUE, DICT_TABLE_OP_NORMAL)) {
664 ib::info() << "Dropping table " << table->name
665 << ", with id " << trx->table_id
666 << " in recovery";
667
668 dict_table_close_and_drop(trx, table);
669
670 trx_commit_for_mysql(trx);
671 }
672
673 ib::info() << "Rolled back recovered transaction " << trx_id;
674
675 func_exit:
676 if (dictionary_locked) {
677 row_mysql_unlock_data_dictionary(trx);
678 }
679
680 mem_heap_free(heap);
681
682 trx_roll_crash_recv_trx = NULL;
683 }
684
685
686 struct trx_roll_count_callback_arg
687 {
688 uint32_t n_trx;
689 uint64_t n_rows;
trx_roll_count_callback_argtrx_roll_count_callback_arg690 trx_roll_count_callback_arg(): n_trx(0), n_rows(0) {}
691 };
692
693
trx_roll_count_callback(rw_trx_hash_element_t * element,trx_roll_count_callback_arg * arg)694 static my_bool trx_roll_count_callback(rw_trx_hash_element_t *element,
695 trx_roll_count_callback_arg *arg)
696 {
697 mutex_enter(&element->mutex);
698 if (trx_t *trx= element->trx)
699 {
700 if (trx->is_recovered && trx_state_eq(trx, TRX_STATE_ACTIVE))
701 {
702 arg->n_trx++;
703 arg->n_rows+= trx->undo_no;
704 }
705 }
706 mutex_exit(&element->mutex);
707 return 0;
708 }
709
710 /** Report progress when rolling back a row of a recovered transaction. */
trx_roll_report_progress()711 void trx_roll_report_progress()
712 {
713 time_t now = time(NULL);
714 mutex_enter(&recv_sys->mutex);
715 bool report = recv_sys->report(now);
716 mutex_exit(&recv_sys->mutex);
717
718 if (report) {
719 trx_roll_count_callback_arg arg;
720
721 /* Get number of recovered active transactions and number of
722 rows they modified. Numbers must be accurate, because only this
723 thread is allowed to touch recovered transactions. */
724 trx_sys.rw_trx_hash.iterate_no_dups(
725 reinterpret_cast<my_hash_walk_action>
726 (trx_roll_count_callback), &arg);
727
728 if (arg.n_rows > 0) {
729 service_manager_extend_timeout(
730 INNODB_EXTEND_TIMEOUT_INTERVAL,
731 "To roll back: " UINT32PF " transactions, "
732 UINT64PF " rows", arg.n_trx, arg.n_rows);
733 }
734
735 ib::info() << "To roll back: " << arg.n_trx
736 << " transactions, " << arg.n_rows << " rows";
737
738 }
739 }
740
741
trx_rollback_recovered_callback(rw_trx_hash_element_t * element,std::vector<trx_t * > * trx_list)742 static my_bool trx_rollback_recovered_callback(rw_trx_hash_element_t *element,
743 std::vector<trx_t*> *trx_list)
744 {
745 mutex_enter(&element->mutex);
746 if (trx_t *trx= element->trx)
747 {
748 mutex_enter(&trx->mutex);
749 if (trx_state_eq(trx, TRX_STATE_ACTIVE) && trx->is_recovered)
750 trx_list->push_back(trx);
751 mutex_exit(&trx->mutex);
752 }
753 mutex_exit(&element->mutex);
754 return 0;
755 }
756
757
758 /**
759 Rollback any incomplete transactions which were encountered in crash recovery.
760
761 If the transaction already was committed, then we clean up a possible insert
762 undo log. If the transaction was not yet committed, then we roll it back.
763
764 Note: For XA recovered transactions, we rely on MySQL to
765 do rollback. They will be in TRX_STATE_PREPARED state. If the server
766 is shutdown and they are still lingering in trx_sys_t::trx_list
767 then the shutdown will hang.
768
769 @param[in] all true=roll back all recovered active transactions;
770 false=roll back any incomplete dictionary transaction
771 */
772
trx_rollback_recovered(bool all)773 void trx_rollback_recovered(bool all)
774 {
775 std::vector<trx_t*> trx_list;
776
777 ut_a(srv_force_recovery < SRV_FORCE_NO_TRX_UNDO);
778
779 /*
780 Collect list of recovered ACTIVE transaction ids first. Once collected, no
781 other thread is allowed to modify or remove these transactions from
782 rw_trx_hash.
783 */
784 trx_sys.rw_trx_hash.iterate_no_dups(reinterpret_cast<my_hash_walk_action>
785 (trx_rollback_recovered_callback),
786 &trx_list);
787
788 while (!trx_list.empty())
789 {
790 trx_t *trx= trx_list.back();
791 trx_list.pop_back();
792
793 ut_ad(trx);
794 ut_d(trx_mutex_enter(trx));
795 ut_ad(trx->is_recovered);
796 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
797 ut_d(trx_mutex_exit(trx));
798
799 if (srv_shutdown_state != SRV_SHUTDOWN_NONE && !srv_undo_sources &&
800 srv_fast_shutdown)
801 goto discard;
802
803 if (all || trx_get_dict_operation(trx) != TRX_DICT_OP_NONE
804 || trx->has_stats_table_lock())
805 {
806 trx_rollback_active(trx);
807 if (trx->error_state != DB_SUCCESS)
808 {
809 ut_ad(trx->error_state == DB_INTERRUPTED);
810 trx->error_state= DB_SUCCESS;
811 ut_ad(!srv_undo_sources);
812 ut_ad(srv_fast_shutdown);
813 discard:
814 /* Note: before kill_server() invoked innobase_end() via
815 unireg_end(), it invoked close_connections(), which should initiate
816 the rollback of any user transactions via THD::cleanup() in the
817 connection threads, and wait for all THD::cleanup() to complete.
818 So, no active user transactions should exist at this point.
819
820 srv_undo_sources=false was cleared early in innobase_end().
821
822 Generally, the server guarantees that all connections using
823 InnoDB must be disconnected by the time we are reaching this code,
824 be it during shutdown or UNINSTALL PLUGIN.
825
826 Because there is no possible race condition with any
827 concurrent user transaction, we do not have to invoke
828 trx->commit_state() or wait for !trx->is_referenced()
829 before trx_sys.deregister_rw(trx). */
830 trx_sys.deregister_rw(trx);
831 trx_free_at_shutdown(trx);
832 }
833 else
834 trx->free();
835 }
836 }
837 }
838
839
840 /*******************************************************************//**
841 Rollback or clean up any incomplete transactions which were
842 encountered in crash recovery. If the transaction already was
843 committed, then we clean up a possible insert undo log. If the
844 transaction was not yet committed, then we roll it back.
845 Note: this is done in a background thread.
846 @return a dummy parameter */
847 extern "C"
848 os_thread_ret_t
DECLARE_THREAD(trx_rollback_all_recovered)849 DECLARE_THREAD(trx_rollback_all_recovered)(void*)
850 {
851 my_thread_init();
852 ut_ad(!srv_read_only_mode);
853
854 #ifdef UNIV_PFS_THREAD
855 pfs_register_thread(trx_rollback_clean_thread_key);
856 #endif /* UNIV_PFS_THREAD */
857
858 if (trx_sys.rw_trx_hash.size()) {
859 ib::info() << "Starting in background the rollback of"
860 " recovered transactions";
861 trx_rollback_recovered(true);
862 ib::info() << "Rollback of non-prepared transactions"
863 " completed";
864 }
865
866 trx_rollback_is_active = false;
867
868 my_thread_end();
869 /* We count the number of threads in os_thread_exit(). A created
870 thread should always use that to exit and not use return() to exit. */
871
872 os_thread_exit();
873
874 OS_THREAD_DUMMY_RETURN;
875 }
876
877 /** Try to truncate the undo logs.
878 @param[in,out] trx transaction */
879 static
880 void
trx_roll_try_truncate(trx_t * trx)881 trx_roll_try_truncate(trx_t* trx)
882 {
883 trx->pages_undone = 0;
884
885 undo_no_t undo_no = trx->undo_no;
886
887 if (trx_undo_t* undo = trx->rsegs.m_redo.undo) {
888 ut_ad(undo->rseg == trx->rsegs.m_redo.rseg);
889 mutex_enter(&undo->rseg->mutex);
890 trx_undo_truncate_end(undo, undo_no, false);
891 mutex_exit(&undo->rseg->mutex);
892 }
893
894 if (trx_undo_t* undo = trx->rsegs.m_noredo.undo) {
895 ut_ad(undo->rseg == trx->rsegs.m_noredo.rseg);
896 mutex_enter(&undo->rseg->mutex);
897 trx_undo_truncate_end(undo, undo_no, true);
898 mutex_exit(&undo->rseg->mutex);
899 }
900 }
901
902 /***********************************************************************//**
903 Pops the topmost undo log record in a single undo log and updates the info
904 about the topmost record in the undo log memory struct.
905 @return undo log record, the page s-latched */
906 static
907 trx_undo_rec_t*
trx_roll_pop_top_rec(trx_t * trx,trx_undo_t * undo,mtr_t * mtr)908 trx_roll_pop_top_rec(
909 /*=================*/
910 trx_t* trx, /*!< in: transaction */
911 trx_undo_t* undo, /*!< in: undo log */
912 mtr_t* mtr) /*!< in: mtr */
913 {
914 page_t* undo_page = trx_undo_page_get_s_latched(
915 page_id_t(undo->rseg->space->id, undo->top_page_no), mtr);
916
917 ulint offset = undo->top_offset;
918
919 trx_undo_rec_t* prev_rec = trx_undo_get_prev_rec(
920 undo_page + offset, undo->hdr_page_no, undo->hdr_offset,
921 true, mtr);
922
923 if (prev_rec == NULL) {
924 undo->top_undo_no = IB_ID_MAX;
925 ut_ad(undo->empty());
926 } else {
927 page_t* prev_rec_page = page_align(prev_rec);
928
929 if (prev_rec_page != undo_page) {
930
931 trx->pages_undone++;
932 }
933
934 undo->top_page_no = page_get_page_no(prev_rec_page);
935 undo->top_offset = ulint(prev_rec - prev_rec_page);
936 undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec);
937 ut_ad(!undo->empty());
938 }
939
940 return(undo_page + offset);
941 }
942
943 /** Get the last undo log record of a transaction (for rollback).
944 @param[in,out] trx transaction
945 @param[out] roll_ptr DB_ROLL_PTR to the undo record
946 @param[in,out] heap memory heap for allocation
947 @return undo log record copied to heap
948 @retval NULL if none left or the roll_limit (savepoint) was reached */
949 trx_undo_rec_t*
trx_roll_pop_top_rec_of_trx(trx_t * trx,roll_ptr_t * roll_ptr,mem_heap_t * heap)950 trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
951 {
952 if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) {
953 trx_roll_try_truncate(trx);
954 }
955
956 trx_undo_t* undo = NULL;
957 trx_undo_t* update = trx->rsegs.m_redo.undo;
958 trx_undo_t* temp = trx->rsegs.m_noredo.undo;
959 const undo_no_t limit = trx->roll_limit;
960
961 ut_ad(!update || !temp || update->empty() || temp->empty()
962 || update->top_undo_no != temp->top_undo_no);
963
964 if (update && !update->empty() && update->top_undo_no >= limit) {
965 if (!undo) {
966 undo = update;
967 } else if (undo->top_undo_no < update->top_undo_no) {
968 undo = update;
969 }
970 }
971
972 if (temp && !temp->empty() && temp->top_undo_no >= limit) {
973 if (!undo) {
974 undo = temp;
975 } else if (undo->top_undo_no < temp->top_undo_no) {
976 undo = temp;
977 }
978 }
979
980 if (undo == NULL) {
981 trx_roll_try_truncate(trx);
982 /* Mark any ROLLBACK TO SAVEPOINT completed, so that
983 if the transaction object is committed and reused
984 later, we will default to a full ROLLBACK. */
985 trx->roll_limit = 0;
986 trx->in_rollback = false;
987 return(NULL);
988 }
989
990 ut_ad(!undo->empty());
991 ut_ad(limit <= undo->top_undo_no);
992
993 *roll_ptr = trx_undo_build_roll_ptr(
994 false, undo->rseg->id, undo->top_page_no, undo->top_offset);
995
996 mtr_t mtr;
997 mtr.start();
998
999 trx_undo_rec_t* undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr);
1000 const undo_no_t undo_no = trx_undo_rec_get_undo_no(undo_rec);
1001 switch (trx_undo_rec_get_type(undo_rec)) {
1002 case TRX_UNDO_INSERT_METADATA:
1003 /* This record type was introduced in MDEV-11369
1004 instant ADD COLUMN, which was implemented after
1005 MDEV-12288 removed the insert_undo log. There is no
1006 instant ADD COLUMN for temporary tables. Therefore,
1007 this record can only be present in the main undo log. */
1008 /* fall through */
1009 case TRX_UNDO_RENAME_TABLE:
1010 ut_ad(undo == update);
1011 /* fall through */
1012 case TRX_UNDO_INSERT_REC:
1013 *roll_ptr |= 1ULL << ROLL_PTR_INSERT_FLAG_POS;
1014 }
1015
1016 trx->undo_no = undo_no;
1017
1018 trx_undo_rec_t* undo_rec_copy = trx_undo_rec_copy(undo_rec, heap);
1019 mtr.commit();
1020
1021 return(undo_rec_copy);
1022 }
1023
1024 /****************************************************************//**
1025 Builds an undo 'query' graph for a transaction. The actual rollback is
1026 performed by executing this query graph like a query subprocedure call.
1027 The reply about the completion of the rollback will be sent by this
1028 graph.
1029 @return own: the query graph */
1030 static
1031 que_t*
trx_roll_graph_build(trx_t * trx)1032 trx_roll_graph_build(
1033 /*=================*/
1034 trx_t* trx) /*!< in/out: transaction */
1035 {
1036 mem_heap_t* heap;
1037 que_fork_t* fork;
1038 que_thr_t* thr;
1039
1040 ut_ad(trx_mutex_own(trx));
1041
1042 heap = mem_heap_create(512);
1043 fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap);
1044 fork->trx = trx;
1045
1046 thr = que_thr_create(fork, heap, NULL);
1047
1048 thr->child = row_undo_node_create(trx, thr, heap);
1049
1050 return(fork);
1051 }
1052
1053 /*********************************************************************//**
1054 Starts a rollback operation, creates the UNDO graph that will do the
1055 actual undo operation.
1056 @return query graph thread that will perform the UNDO operations. */
1057 static
1058 que_thr_t*
trx_rollback_start(trx_t * trx,undo_no_t roll_limit)1059 trx_rollback_start(
1060 /*===============*/
1061 trx_t* trx, /*!< in: transaction */
1062 undo_no_t roll_limit) /*!< in: rollback to undo no (for
1063 partial undo), 0 if we are rolling back
1064 the entire transaction */
1065 {
1066 ut_ad(trx_mutex_own(trx));
1067
1068 /* Initialize the rollback field in the transaction */
1069
1070 ut_ad(!trx->roll_limit);
1071 ut_ad(!trx->in_rollback);
1072
1073 trx->roll_limit = roll_limit;
1074 trx->in_rollback = true;
1075
1076 ut_a(trx->roll_limit <= trx->undo_no);
1077
1078 trx->pages_undone = 0;
1079
1080 /* Build a 'query' graph which will perform the undo operations */
1081
1082 que_t* roll_graph = trx_roll_graph_build(trx);
1083
1084 trx->graph = roll_graph;
1085
1086 trx->lock.que_state = TRX_QUE_ROLLING_BACK;
1087
1088 return(que_fork_start_command(roll_graph));
1089 }
1090
1091 /*********************************************************************//**
1092 Creates a rollback command node struct.
1093 @return own: rollback node struct */
1094 roll_node_t*
roll_node_create(mem_heap_t * heap)1095 roll_node_create(
1096 /*=============*/
1097 mem_heap_t* heap) /*!< in: mem heap where created */
1098 {
1099 roll_node_t* node;
1100
1101 node = static_cast<roll_node_t*>(mem_heap_zalloc(heap, sizeof(*node)));
1102
1103 node->state = ROLL_NODE_SEND;
1104
1105 node->common.type = QUE_NODE_ROLLBACK;
1106
1107 return(node);
1108 }
1109
1110 /***********************************************************//**
1111 Performs an execution step for a rollback command node in a query graph.
1112 @return query thread to run next, or NULL */
1113 que_thr_t*
trx_rollback_step(que_thr_t * thr)1114 trx_rollback_step(
1115 /*==============*/
1116 que_thr_t* thr) /*!< in: query thread */
1117 {
1118 roll_node_t* node;
1119
1120 node = static_cast<roll_node_t*>(thr->run_node);
1121
1122 ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK);
1123
1124 if (thr->prev_node == que_node_get_parent(node)) {
1125 node->state = ROLL_NODE_SEND;
1126 }
1127
1128 if (node->state == ROLL_NODE_SEND) {
1129 trx_t* trx;
1130 ib_id_t roll_limit;
1131
1132 trx = thr_get_trx(thr);
1133
1134 trx_mutex_enter(trx);
1135
1136 node->state = ROLL_NODE_WAIT;
1137
1138 ut_a(node->undo_thr == NULL);
1139
1140 roll_limit = node->savept ? node->savept->least_undo_no : 0;
1141
1142 trx_commit_or_rollback_prepare(trx);
1143
1144 node->undo_thr = trx_rollback_start(trx, roll_limit);
1145
1146 trx_mutex_exit(trx);
1147
1148 } else {
1149 ut_ad(node->state == ROLL_NODE_WAIT);
1150
1151 thr->run_node = que_node_get_parent(node);
1152 }
1153
1154 return(thr);
1155 }
1156