1 /*****************************************************************************
2
3 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file trx/trx0trx.cc
29 The transaction
30
31 Created 3/26/1996 Heikki Tuuri
32 *******************************************************/
33
34 #include "trx0trx.h"
35
36 #ifdef UNIV_NONINL
37 #include "trx0trx.ic"
38 #endif
39
40 #include "trx0undo.h"
41 #include "trx0rseg.h"
42 #include "log0log.h"
43 #include "que0que.h"
44 #include "lock0lock.h"
45 #include "trx0roll.h"
46 #include "usr0sess.h"
47 #include "read0read.h"
48 #include "srv0srv.h"
49 #include "srv0start.h"
50 #include "btr0sea.h"
51 #include "os0proc.h"
52 #include "trx0xa.h"
53 #include "trx0rec.h"
54 #include "trx0purge.h"
55 #include "ha_prototypes.h"
56 #include "srv0mon.h"
57 #include "ut0vec.h"
58
59 #include<set>
60
61 /** Set of table_id */
62 typedef std::set<table_id_t> table_id_set;
63
64 /** Dummy session used currently in MySQL interface */
65 UNIV_INTERN sess_t* trx_dummy_sess = NULL;
66
67 #ifdef UNIV_PFS_MUTEX
68 /* Key to register the mutex with performance schema */
69 UNIV_INTERN mysql_pfs_key_t trx_mutex_key;
70 /* Key to register the mutex with performance schema */
71 UNIV_INTERN mysql_pfs_key_t trx_undo_mutex_key;
72 #endif /* UNIV_PFS_MUTEX */
73
74 /*************************************************************//**
75 Set detailed error message for the transaction. */
76 UNIV_INTERN
77 void
trx_set_detailed_error(trx_t * trx,const char * msg)78 trx_set_detailed_error(
79 /*===================*/
80 trx_t* trx, /*!< in: transaction struct */
81 const char* msg) /*!< in: detailed error message */
82 {
83 ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
84 }
85
86 /*************************************************************//**
87 Set detailed error message for the transaction from a file. Note that the
88 file is rewinded before reading from it. */
89 UNIV_INTERN
90 void
trx_set_detailed_error_from_file(trx_t * trx,FILE * file)91 trx_set_detailed_error_from_file(
92 /*=============================*/
93 trx_t* trx, /*!< in: transaction struct */
94 FILE* file) /*!< in: file to read message from */
95 {
96 os_file_read_string(file, trx->detailed_error,
97 sizeof(trx->detailed_error));
98 }
99
100 /****************************************************************//**
101 Creates and initializes a transaction object. It must be explicitly
102 started with trx_start_if_not_started() before using it. The default
103 isolation level is TRX_ISO_REPEATABLE_READ.
104 @return transaction instance, should never be NULL */
105 static
106 trx_t*
trx_create(void)107 trx_create(void)
108 /*============*/
109 {
110 trx_t* trx;
111 mem_heap_t* heap;
112 ib_alloc_t* heap_alloc;
113
114 trx = static_cast<trx_t*>(mem_zalloc(sizeof(*trx)));
115
116 mutex_create(trx_mutex_key, &trx->mutex, SYNC_TRX);
117
118 trx->magic_n = TRX_MAGIC_N;
119
120 trx->state = TRX_STATE_NOT_STARTED;
121
122 trx->isolation_level = TRX_ISO_REPEATABLE_READ;
123
124 trx->no = TRX_ID_MAX;
125
126 trx->support_xa = TRUE;
127
128 trx->check_foreigns = TRUE;
129 trx->check_unique_secondary = TRUE;
130
131 trx->dict_operation = TRX_DICT_OP_NONE;
132
133 mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
134
135 trx->error_state = DB_SUCCESS;
136
137 trx->lock.que_state = TRX_QUE_RUNNING;
138
139 trx->lock.lock_heap = mem_heap_create_typed(
140 256, MEM_HEAP_FOR_LOCK_HEAP);
141
142 trx->search_latch_timeout = BTR_SEA_TIMEOUT;
143
144 trx->global_read_view_heap = mem_heap_create(256);
145
146 trx->xid.formatID = -1;
147
148 trx->op_info = "";
149
150 trx->api_trx = false;
151
152 trx->api_auto_commit = false;
153
154 trx->read_write = true;
155
156 heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8);
157 heap_alloc = ib_heap_allocator_create(heap);
158
159 /* Remember to free the vector explicitly in trx_free(). */
160 trx->autoinc_locks = ib_vector_create(heap_alloc, sizeof(void**), 4);
161
162 /* Remember to free the vector explicitly in trx_free(). */
163 heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 128);
164 heap_alloc = ib_heap_allocator_create(heap);
165
166 trx->lock.table_locks = ib_vector_create(
167 heap_alloc, sizeof(void**), 32);
168
169 #ifdef WITH_WSREP
170 trx->wsrep_event = NULL;
171 #endif /* WITH_WSREP */
172 return(trx);
173 }
174
175 /********************************************************************//**
176 Creates a transaction object for background operations by the master thread.
177 @return own: transaction object */
178 UNIV_INTERN
179 trx_t*
trx_allocate_for_background(void)180 trx_allocate_for_background(void)
181 /*=============================*/
182 {
183 trx_t* trx;
184
185 trx = trx_create();
186
187 trx->sess = trx_dummy_sess;
188
189 return(trx);
190 }
191
192 /********************************************************************//**
193 Creates a transaction object for MySQL.
194 @return own: transaction object */
195 UNIV_INTERN
196 trx_t*
trx_allocate_for_mysql(void)197 trx_allocate_for_mysql(void)
198 /*========================*/
199 {
200 trx_t* trx;
201
202 trx = trx_allocate_for_background();
203
204 mutex_enter(&trx_sys->mutex);
205
206 ut_d(trx->in_mysql_trx_list = TRUE);
207 UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
208
209 mutex_exit(&trx_sys->mutex);
210
211 return(trx);
212 }
213
214 /********************************************************************//**
215 Frees a transaction object. */
216 static
217 void
trx_free(trx_t * trx)218 trx_free(
219 /*=====*/
220 trx_t* trx) /*!< in, own: trx object */
221 {
222 ut_a(trx->magic_n == TRX_MAGIC_N);
223 ut_ad(!trx->in_ro_trx_list);
224 ut_ad(!trx->in_rw_trx_list);
225 ut_ad(!trx->in_mysql_trx_list);
226
227 mutex_free(&trx->undo_mutex);
228
229 if (trx->undo_no_arr != NULL) {
230 trx_undo_arr_free(trx->undo_no_arr);
231 }
232
233 ut_a(trx->lock.wait_lock == NULL);
234 ut_a(trx->lock.wait_thr == NULL);
235
236 ut_a(!trx->has_search_latch);
237
238 ut_a(trx->dict_operation_lock_mode == 0);
239
240 if (trx->lock.lock_heap) {
241 mem_heap_free(trx->lock.lock_heap);
242 }
243
244 ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
245
246 if (trx->global_read_view_heap) {
247 mem_heap_free(trx->global_read_view_heap);
248 }
249
250 ut_a(ib_vector_is_empty(trx->autoinc_locks));
251 /* We allocated a dedicated heap for the vector. */
252 ib_vector_free(trx->autoinc_locks);
253
254 if (trx->lock.table_locks != NULL) {
255 /* We allocated a dedicated heap for the vector. */
256 ib_vector_free(trx->lock.table_locks);
257 }
258
259 mutex_free(&trx->mutex);
260
261 mem_free(trx);
262 }
263
264 /********************************************************************//**
265 Frees a transaction object of a background operation of the master thread. */
266 UNIV_INTERN
267 void
trx_free_for_background(trx_t * trx)268 trx_free_for_background(
269 /*====================*/
270 trx_t* trx) /*!< in, own: trx object */
271 {
272 if (trx->declared_to_be_inside_innodb) {
273
274 ib_logf(IB_LOG_LEVEL_ERROR,
275 "Freeing a trx (%p, " TRX_ID_FMT ") which is declared "
276 "to be processing inside InnoDB", trx, trx->id);
277
278 trx_print(stderr, trx, 600);
279 putc('\n', stderr);
280
281 /* This is an error but not a fatal error. We must keep
282 the counters like srv_conc_n_threads accurate. */
283 srv_conc_force_exit_innodb(trx);
284 }
285
286 if (trx->n_mysql_tables_in_use != 0
287 || trx->mysql_n_tables_locked != 0) {
288
289 ib_logf(IB_LOG_LEVEL_ERROR,
290 "MySQL is freeing a thd though "
291 "trx->n_mysql_tables_in_use is %lu and "
292 "trx->mysql_n_tables_locked is %lu.",
293 (ulong) trx->n_mysql_tables_in_use,
294 (ulong) trx->mysql_n_tables_locked);
295
296 trx_print(stderr, trx, 600);
297 ut_print_buf(stderr, trx, sizeof(trx_t));
298 putc('\n', stderr);
299 }
300
301 ut_a(trx->state == TRX_STATE_NOT_STARTED);
302 ut_a(trx->insert_undo == NULL);
303 ut_a(trx->update_undo == NULL);
304 ut_a(trx->read_view == NULL);
305
306 trx_free(trx);
307 }
308
309 /********************************************************************//**
310 At shutdown, frees a transaction object that is in the PREPARED state. */
311 UNIV_INTERN
312 void
trx_free_prepared(trx_t * trx)313 trx_free_prepared(
314 /*==============*/
315 trx_t* trx) /*!< in, own: trx object */
316 {
317 ut_a(trx_state_eq(trx, TRX_STATE_PREPARED));
318 ut_a(trx->magic_n == TRX_MAGIC_N);
319
320 lock_trx_release_locks(trx);
321 trx_undo_free_prepared(trx);
322
323 assert_trx_in_rw_list(trx);
324
325 ut_a(!trx->read_only);
326
327 UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
328 ut_d(trx->in_rw_trx_list = FALSE);
329
330 /* Undo trx_resurrect_table_locks(). */
331 UT_LIST_INIT(trx->lock.trx_locks);
332
333 trx_free(trx);
334 }
335
336 /********************************************************************//**
337 Frees a transaction object for MySQL. */
338 UNIV_INTERN
339 void
trx_free_for_mysql(trx_t * trx)340 trx_free_for_mysql(
341 /*===============*/
342 trx_t* trx) /*!< in, own: trx object */
343 {
344 mutex_enter(&trx_sys->mutex);
345
346 ut_ad(trx->in_mysql_trx_list);
347 ut_d(trx->in_mysql_trx_list = FALSE);
348 UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
349
350 ut_ad(trx_sys_validate_trx_list());
351
352 mutex_exit(&trx_sys->mutex);
353
354 trx_free_for_background(trx);
355 }
356
357 /****************************************************************//**
358 Inserts the trx handle in the trx system trx list in the right position.
359 The list is sorted on the trx id so that the biggest id is at the list
360 start. This function is used at the database startup to insert incomplete
361 transactions to the list. */
362 static
363 void
trx_list_rw_insert_ordered(trx_t * trx)364 trx_list_rw_insert_ordered(
365 /*=======================*/
366 trx_t* trx) /*!< in: trx handle */
367 {
368 trx_t* trx2;
369
370 ut_ad(!trx->read_only);
371
372 ut_d(trx->start_file = __FILE__);
373 ut_d(trx->start_line = __LINE__);
374
375 ut_a(srv_is_being_started);
376 ut_ad(!trx->in_ro_trx_list);
377 ut_ad(!trx->in_rw_trx_list);
378 ut_ad(trx->state != TRX_STATE_NOT_STARTED);
379 ut_ad(trx->is_recovered);
380
381 for (trx2 = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
382 trx2 != NULL;
383 trx2 = UT_LIST_GET_NEXT(trx_list, trx2)) {
384
385 assert_trx_in_rw_list(trx2);
386
387 if (trx->id >= trx2->id) {
388
389 ut_ad(trx->id > trx2->id);
390 break;
391 }
392 }
393
394 if (trx2 != NULL) {
395 trx2 = UT_LIST_GET_PREV(trx_list, trx2);
396
397 if (trx2 == NULL) {
398 UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
399 } else {
400 UT_LIST_INSERT_AFTER(
401 trx_list, trx_sys->rw_trx_list, trx2, trx);
402 }
403 } else {
404 UT_LIST_ADD_LAST(trx_list, trx_sys->rw_trx_list, trx);
405 }
406
407 #ifdef UNIV_DEBUG
408 if (trx->id > trx_sys->rw_max_trx_id) {
409 trx_sys->rw_max_trx_id = trx->id;
410 }
411 #endif /* UNIV_DEBUG */
412
413 ut_ad(!trx->in_rw_trx_list);
414 ut_d(trx->in_rw_trx_list = TRUE);
415 }
416
417 /****************************************************************//**
418 Resurrect the table locks for a resurrected transaction. */
419 static
420 void
trx_resurrect_table_locks(trx_t * trx,const trx_undo_t * undo)421 trx_resurrect_table_locks(
422 /*======================*/
423 trx_t* trx, /*!< in/out: transaction */
424 const trx_undo_t* undo) /*!< in: undo log */
425 {
426 mtr_t mtr;
427 page_t* undo_page;
428 trx_undo_rec_t* undo_rec;
429 table_id_set tables;
430
431 ut_ad(undo == trx->insert_undo || undo == trx->update_undo);
432
433 if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
434 || undo->empty) {
435 return;
436 }
437
438 mtr_start(&mtr);
439 /* trx_rseg_mem_create() may have acquired an X-latch on this
440 page, so we cannot acquire an S-latch. */
441 undo_page = trx_undo_page_get(
442 undo->space, undo->zip_size, undo->top_page_no, &mtr);
443 undo_rec = undo_page + undo->top_offset;
444
445 do {
446 ulint type;
447 ulint cmpl_info;
448 bool updated_extern;
449 undo_no_t undo_no;
450 table_id_t table_id;
451
452 page_t* undo_rec_page = page_align(undo_rec);
453
454 if (undo_rec_page != undo_page) {
455 if (!mtr_memo_release(&mtr,
456 buf_block_align(undo_page),
457 MTR_MEMO_PAGE_X_FIX)) {
458 /* The page of the previous undo_rec
459 should have been latched by
460 trx_undo_page_get() or
461 trx_undo_get_prev_rec(). */
462 ut_ad(0);
463 }
464
465 undo_page = undo_rec_page;
466 }
467
468 trx_undo_rec_get_pars(
469 undo_rec, &type, &cmpl_info,
470 &updated_extern, &undo_no, &table_id);
471 tables.insert(table_id);
472
473 undo_rec = trx_undo_get_prev_rec(
474 undo_rec, undo->hdr_page_no,
475 undo->hdr_offset, false, &mtr);
476 } while (undo_rec);
477
478 mtr_commit(&mtr);
479
480 for (table_id_set::const_iterator i = tables.begin();
481 i != tables.end(); i++) {
482 if (dict_table_t* table = dict_table_open_on_id(
483 *i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) {
484 if (table->ibd_file_missing
485 || dict_table_is_temporary(table)) {
486 mutex_enter(&dict_sys->mutex);
487 dict_table_close(table, TRUE, FALSE);
488 dict_table_remove_from_cache(table);
489 mutex_exit(&dict_sys->mutex);
490 continue;
491 }
492
493 lock_table_ix_resurrect(table, trx);
494
495 DBUG_PRINT("ib_trx",
496 ("resurrect" TRX_ID_FMT
497 " table '%s' IX lock from %s undo",
498 trx->id, table->name,
499 undo == trx->insert_undo
500 ? "insert" : "update"));
501
502 dict_table_close(table, FALSE, FALSE);
503 }
504 }
505 }
506
507 /****************************************************************//**
508 Resurrect the transactions that were doing inserts the time of the
509 crash, they need to be undone.
510 @return trx_t instance */
511 static
512 trx_t*
trx_resurrect_insert(trx_undo_t * undo,trx_rseg_t * rseg)513 trx_resurrect_insert(
514 /*=================*/
515 trx_undo_t* undo, /*!< in: entry to UNDO */
516 trx_rseg_t* rseg) /*!< in: rollback segment */
517 {
518 trx_t* trx;
519
520 trx = trx_allocate_for_background();
521
522 trx->rseg = rseg;
523 trx->xid = undo->xid;
524 trx->id = undo->trx_id;
525 trx->insert_undo = undo;
526 trx->is_recovered = TRUE;
527
528 /* This is single-threaded startup code, we do not need the
529 protection of trx->mutex or trx_sys->mutex here. */
530
531 if (undo->state != TRX_UNDO_ACTIVE) {
532
533 /* Prepared transactions are left in the prepared state
534 waiting for a commit or abort decision from MySQL */
535
536 if (undo->state == TRX_UNDO_PREPARED) {
537
538 fprintf(stderr,
539 "InnoDB: Transaction " TRX_ID_FMT " was in the"
540 " XA prepared state.\n", trx->id);
541
542 if (srv_force_recovery == 0) {
543
544 trx->state = TRX_STATE_PREPARED;
545 trx_sys->n_prepared_trx++;
546 trx_sys->n_prepared_recovered_trx++;
547 } else {
548 fprintf(stderr,
549 "InnoDB: Since innodb_force_recovery"
550 " > 0, we will rollback it anyway.\n");
551
552 trx->state = TRX_STATE_ACTIVE;
553 }
554 } else {
555 trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
556 }
557
558 /* We give a dummy value for the trx no; this should have no
559 relevance since purge is not interested in committed
560 transaction numbers, unless they are in the history
561 list, in which case it looks the number from the disk based
562 undo log structure */
563
564 trx->no = trx->id;
565 } else {
566 trx->state = TRX_STATE_ACTIVE;
567
568 /* A running transaction always has the number
569 field inited to TRX_ID_MAX */
570
571 trx->no = TRX_ID_MAX;
572 }
573
574 /* trx_start_low() is not called with resurrect, so need to initialize
575 start time here.*/
576 if (trx->state == TRX_STATE_ACTIVE
577 || trx->state == TRX_STATE_PREPARED) {
578 trx->start_time = ut_time();
579 }
580
581 if (undo->dict_operation) {
582 trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
583 trx->table_id = undo->table_id;
584 }
585
586 if (!undo->empty) {
587 trx->undo_no = undo->top_undo_no + 1;
588 }
589
590 return(trx);
591 }
592
593 /****************************************************************//**
594 Prepared transactions are left in the prepared state waiting for a
595 commit or abort decision from MySQL */
596 static
597 void
trx_resurrect_update_in_prepared_state(trx_t * trx,const trx_undo_t * undo)598 trx_resurrect_update_in_prepared_state(
599 /*===================================*/
600 trx_t* trx, /*!< in,out: transaction */
601 const trx_undo_t* undo) /*!< in: update UNDO record */
602 {
603 /* This is single-threaded startup code, we do not need the
604 protection of trx->mutex or trx_sys->mutex here. */
605
606 if (undo->state == TRX_UNDO_PREPARED) {
607 fprintf(stderr,
608 "InnoDB: Transaction " TRX_ID_FMT
609 " was in the XA prepared state.\n", trx->id);
610
611 if (srv_force_recovery == 0) {
612 if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
613 trx_sys->n_prepared_trx++;
614 trx_sys->n_prepared_recovered_trx++;
615 } else {
616 ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
617 }
618
619 trx->state = TRX_STATE_PREPARED;
620 } else {
621 fprintf(stderr,
622 "InnoDB: Since innodb_force_recovery"
623 " > 0, we will rollback it anyway.\n");
624
625 trx->state = TRX_STATE_ACTIVE;
626 }
627 } else {
628 trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
629 }
630 }
631
632 /****************************************************************//**
633 Resurrect the transactions that were doing updates the time of the
634 crash, they need to be undone. */
635 static
636 void
trx_resurrect_update(trx_t * trx,trx_undo_t * undo,trx_rseg_t * rseg)637 trx_resurrect_update(
638 /*=================*/
639 trx_t* trx, /*!< in/out: transaction */
640 trx_undo_t* undo, /*!< in/out: update UNDO record */
641 trx_rseg_t* rseg) /*!< in/out: rollback segment */
642 {
643 trx->rseg = rseg;
644 trx->xid = undo->xid;
645 trx->id = undo->trx_id;
646 trx->update_undo = undo;
647 trx->is_recovered = TRUE;
648
649 /* This is single-threaded startup code, we do not need the
650 protection of trx->mutex or trx_sys->mutex here. */
651
652 if (undo->state != TRX_UNDO_ACTIVE) {
653 trx_resurrect_update_in_prepared_state(trx, undo);
654
655 /* We give a dummy value for the trx number */
656
657 trx->no = trx->id;
658
659 } else {
660 trx->state = TRX_STATE_ACTIVE;
661
662 /* A running transaction always has the number field inited to
663 TRX_ID_MAX */
664
665 trx->no = TRX_ID_MAX;
666 }
667
668 /* trx_start_low() is not called with resurrect, so need to initialize
669 start time here.*/
670 if (trx->state == TRX_STATE_ACTIVE
671 || trx->state == TRX_STATE_PREPARED) {
672 trx->start_time = ut_time();
673 }
674
675 if (undo->dict_operation) {
676 trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
677 trx->table_id = undo->table_id;
678 }
679
680 if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
681
682 trx->undo_no = undo->top_undo_no + 1;
683 }
684 }
685
686 /****************************************************************//**
687 Creates trx objects for transactions and initializes the trx list of
688 trx_sys at database start. Rollback segment and undo log lists must
689 already exist when this function is called, because the lists of
690 transactions to be rolled back or cleaned up are built based on the
691 undo log lists. */
692 UNIV_INTERN
693 void
trx_lists_init_at_db_start(void)694 trx_lists_init_at_db_start(void)
695 /*============================*/
696 {
697 ulint i;
698
699 ut_a(srv_is_being_started);
700
701 UT_LIST_INIT(trx_sys->ro_trx_list);
702 UT_LIST_INIT(trx_sys->rw_trx_list);
703
704 /* Look from the rollback segments if there exist undo logs for
705 transactions */
706
707 for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
708 trx_undo_t* undo;
709 trx_rseg_t* rseg;
710
711 rseg = trx_sys->rseg_array[i];
712
713 if (rseg == NULL) {
714 continue;
715 }
716
717 /* Resurrect transactions that were doing inserts. */
718 for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
719 undo != NULL;
720 undo = UT_LIST_GET_NEXT(undo_list, undo)) {
721 trx_t* trx;
722
723 trx = trx_resurrect_insert(undo, rseg);
724
725 trx_list_rw_insert_ordered(trx);
726
727 trx_resurrect_table_locks(trx, undo);
728 }
729
730 /* Ressurrect transactions that were doing updates. */
731 for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
732 undo != NULL;
733 undo = UT_LIST_GET_NEXT(undo_list, undo)) {
734 trx_t* trx;
735 ibool trx_created;
736
737 /* Check the trx_sys->rw_trx_list first. */
738 mutex_enter(&trx_sys->mutex);
739 trx = trx_get_rw_trx_by_id(undo->trx_id);
740 mutex_exit(&trx_sys->mutex);
741
742 if (trx == NULL) {
743 trx = trx_allocate_for_background();
744 trx_created = TRUE;
745 } else {
746 trx_created = FALSE;
747 }
748
749 trx_resurrect_update(trx, undo, rseg);
750
751 if (trx_created) {
752 trx_list_rw_insert_ordered(trx);
753 }
754
755 trx_resurrect_table_locks(trx, undo);
756 }
757 }
758 }
759
760 /******************************************************************//**
761 Assigns a rollback segment to a transaction in a round-robin fashion.
762 @return assigned rollback segment instance */
763 static
764 trx_rseg_t*
trx_assign_rseg_low(ulong max_undo_logs,ulint n_tablespaces)765 trx_assign_rseg_low(
766 /*================*/
767 ulong max_undo_logs, /*!< in: maximum number of UNDO logs to use */
768 ulint n_tablespaces) /*!< in: number of rollback tablespaces */
769 {
770 ulint i;
771 trx_rseg_t* rseg;
772 static ulint latest_rseg = 0;
773
774 if (srv_read_only_mode) {
775 ut_a(max_undo_logs == ULONG_UNDEFINED);
776 return(NULL);
777 }
778
779 /* This breaks true round robin but that should be OK. */
780
781 ut_a(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS);
782
783 i = latest_rseg++;
784 i %= max_undo_logs;
785
786 /* Note: The assumption here is that there can't be any gaps in
787 the array. Once we implement more flexible rollback segment
788 management this may not hold. The assertion checks for that case. */
789
790 if (trx_sys->rseg_array[0] == NULL) {
791 return(NULL);
792 }
793
794 /* Skip the system tablespace if we have more than one tablespace
795 defined for rollback segments. We want all UNDO records to be in
796 the non-system tablespaces. */
797
798 do {
799 rseg = trx_sys->rseg_array[i];
800 ut_a(rseg == NULL || i == rseg->id);
801
802 i = (rseg == NULL) ? 0 : i + 1;
803
804 } while (rseg == NULL
805 || (rseg->space == 0
806 && n_tablespaces > 0
807 && trx_sys->rseg_array[1] != NULL));
808
809 return(rseg);
810 }
811
812 /****************************************************************//**
813 Assign a read-only transaction a rollback-segment, if it is attempting
814 to write to a TEMPORARY table. */
815 UNIV_INTERN
816 void
trx_assign_rseg(trx_t * trx)817 trx_assign_rseg(
818 /*============*/
819 trx_t* trx) /*!< A read-only transaction that
820 needs to be assigned a RBS. */
821 {
822 ut_a(trx->rseg == 0);
823 ut_a(trx->read_only);
824 ut_a(!srv_read_only_mode);
825 ut_a(!trx_is_autocommit_non_locking(trx));
826
827 trx->rseg = trx_assign_rseg_low(srv_undo_logs, srv_undo_tablespaces);
828 }
829
830 /****************************************************************//**
831 Starts a transaction. */
832 static
833 void
trx_start_low(trx_t * trx)834 trx_start_low(
835 /*==========*/
836 trx_t* trx) /*!< in: transaction */
837 {
838 ut_ad(trx->rseg == NULL);
839
840 ut_ad(trx->start_file != 0);
841 ut_ad(trx->start_line != 0);
842 ut_ad(!trx->is_recovered);
843 ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
844 ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
845
846 /* Check whether it is an AUTOCOMMIT SELECT */
847 trx->auto_commit = (trx->api_trx && trx->api_auto_commit)
848 || thd_trx_is_auto_commit(trx->mysql_thd);
849
850 trx->read_only =
851 (trx->api_trx && !trx->read_write)
852 || (!trx->ddl && thd_trx_is_read_only(trx->mysql_thd))
853 || srv_read_only_mode;
854
855 if (!trx->auto_commit) {
856 ++trx->will_lock;
857 } else if (trx->will_lock == 0) {
858 trx->read_only = TRUE;
859 }
860
861 if (!trx->read_only) {
862 trx->rseg = trx_assign_rseg_low(
863 srv_undo_logs, srv_undo_tablespaces);
864 }
865
866 #ifdef WITH_WSREP
867 memset(&trx->xid, 0, sizeof(trx->xid));
868 trx->xid.formatID = -1;
869 #endif /* WITH_WSREP */
870
871 /* The initial value for trx->no: TRX_ID_MAX is used in
872 read_view_open_now: */
873
874 trx->no = TRX_ID_MAX;
875
876 ut_a(ib_vector_is_empty(trx->autoinc_locks));
877 ut_a(ib_vector_is_empty(trx->lock.table_locks));
878
879 mutex_enter(&trx_sys->mutex);
880
881 /* If this transaction came from trx_allocate_for_mysql(),
882 trx->in_mysql_trx_list would hold. In that case, the trx->state
883 change must be protected by the trx_sys->mutex, so that
884 lock_print_info_all_transactions() will have a consistent view. */
885
886 trx->state = TRX_STATE_ACTIVE;
887
888 trx->id = trx_sys_get_new_trx_id();
889
890 ut_ad(!trx->in_rw_trx_list);
891 ut_ad(!trx->in_ro_trx_list);
892
893 if (trx->read_only) {
894
895 /* Note: The trx_sys_t::ro_trx_list doesn't really need to
896 be ordered, we should exploit this using a list type that
897 doesn't need a list wide lock to increase concurrency. */
898
899 if (!trx_is_autocommit_non_locking(trx)) {
900 UT_LIST_ADD_FIRST(trx_list, trx_sys->ro_trx_list, trx);
901 ut_d(trx->in_ro_trx_list = TRUE);
902 }
903 } else {
904
905 ut_ad(trx->rseg != NULL
906 || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
907
908 ut_ad(!trx_is_autocommit_non_locking(trx));
909 UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
910 ut_d(trx->in_rw_trx_list = TRUE);
911 #ifdef UNIV_DEBUG
912 if (trx->id > trx_sys->rw_max_trx_id) {
913 trx_sys->rw_max_trx_id = trx->id;
914 }
915 #endif /* UNIV_DEBUG */
916 }
917
918 ut_ad(trx_sys_validate_trx_list());
919
920 mutex_exit(&trx_sys->mutex);
921
922 trx->start_time = ut_time();
923
924 MONITOR_INC(MONITOR_TRX_ACTIVE);
925 }
926
927 /****************************************************************//**
928 Set the transaction serialisation number. */
929 static
930 void
trx_serialisation_number_get(trx_t * trx)931 trx_serialisation_number_get(
932 /*=========================*/
933 trx_t* trx) /*!< in: transaction */
934 {
935 trx_rseg_t* rseg;
936
937 rseg = trx->rseg;
938
939 ut_ad(mutex_own(&rseg->mutex));
940
941 mutex_enter(&trx_sys->mutex);
942
943 trx->no = trx_sys_get_new_trx_id();
944
945 /* If the rollack segment is not empty then the
946 new trx_t::no can't be less than any trx_t::no
947 already in the rollback segment. User threads only
948 produce events when a rollback segment is empty. */
949
950 if (rseg->last_page_no == FIL_NULL) {
951 void* ptr;
952 rseg_queue_t rseg_queue;
953
954 rseg_queue.rseg = rseg;
955 rseg_queue.trx_no = trx->no;
956
957 mutex_enter(&purge_sys->bh_mutex);
958
959 /* This is to reduce the pressure on the trx_sys_t::mutex
960 though in reality it should make very little (read no)
961 difference because this code path is only taken when the
962 rbs is empty. */
963
964 mutex_exit(&trx_sys->mutex);
965
966 ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
967 ut_a(ptr);
968
969 mutex_exit(&purge_sys->bh_mutex);
970 } else {
971 mutex_exit(&trx_sys->mutex);
972 }
973 }
974
975 /****************************************************************//**
976 Assign the transaction its history serialisation number and write the
977 update UNDO log record to the assigned rollback segment. */
978 static MY_ATTRIBUTE((nonnull))
979 void
trx_write_serialisation_history(trx_t * trx,mtr_t * mtr)980 trx_write_serialisation_history(
981 /*============================*/
982 trx_t* trx, /*!< in/out: transaction */
983 mtr_t* mtr) /*!< in/out: mini-transaction */
984 {
985 #ifdef WITH_WSREP
986 trx_sysf_t* sys_header;
987 #endif /* WITH_WSREP */
988 trx_rseg_t* rseg;
989
990 rseg = trx->rseg;
991
992 /* Change the undo log segment states from TRX_UNDO_ACTIVE
993 to some other state: these modifications to the file data
994 structure define the transaction as committed in the file
995 based domain, at the serialization point of the log sequence
996 number lsn obtained below. */
997
998 if (trx->update_undo != NULL) {
999 page_t* undo_hdr_page;
1000 trx_undo_t* undo = trx->update_undo;
1001
1002 /* We have to hold the rseg mutex because update
1003 log headers have to be put to the history list in the
1004 (serialisation) order of the UNDO trx number. This is
1005 required for the purge in-memory data structures too. */
1006
1007 mutex_enter(&rseg->mutex);
1008
1009 /* Assign the transaction serialisation number and also
1010 update the purge min binary heap if this is the first
1011 UNDO log being written to the assigned rollback segment. */
1012
1013 trx_serialisation_number_get(trx);
1014
1015 /* It is not necessary to obtain trx->undo_mutex here
1016 because only a single OS thread is allowed to do the
1017 transaction commit for this transaction. */
1018
1019 undo_hdr_page = trx_undo_set_state_at_finish(undo, mtr);
1020
1021 trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
1022 } else {
1023 mutex_enter(&rseg->mutex);
1024 }
1025
1026 if (trx->insert_undo != NULL) {
1027 trx_undo_set_state_at_finish(trx->insert_undo, mtr);
1028 }
1029
1030 mutex_exit(&rseg->mutex);
1031
1032 MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
1033
1034 #ifdef WITH_WSREP
1035 sys_header = trx_sysf_get(mtr);
1036 /* Update latest MySQL wsrep XID in trx sys header. */
1037 if (wsrep_is_wsrep_xid(&trx->xid))
1038 {
1039 trx_sys_update_wsrep_checkpoint(&trx->xid, sys_header, mtr);
1040 }
1041 #endif /* WITH_WSREP */
1042
1043 /* Update the latest MySQL binlog name and offset info
1044 in trx sys header if MySQL binlogging is on or the database
1045 server is a MySQL replication slave */
1046
1047 if (trx->mysql_log_file_name
1048 && trx->mysql_log_file_name[0] != '\0') {
1049
1050 trx_sys_update_mysql_binlog_offset(
1051 trx->mysql_log_file_name,
1052 trx->mysql_log_offset,
1053 #ifdef WITH_WSREP
1054 TRX_SYS_MYSQL_LOG_INFO, sys_header, mtr);
1055 #else
1056 TRX_SYS_MYSQL_LOG_INFO, mtr);
1057 #endif /* WITH_WSREP */
1058
1059 trx->mysql_log_file_name = NULL;
1060 }
1061 }
1062
1063 /********************************************************************
1064 Finalize a transaction containing updates for a FTS table. */
1065 static MY_ATTRIBUTE((nonnull))
1066 void
trx_finalize_for_fts_table(fts_trx_table_t * ftt)1067 trx_finalize_for_fts_table(
1068 /*=======================*/
1069 fts_trx_table_t* ftt) /* in: FTS trx table */
1070 {
1071 fts_t* fts = ftt->table->fts;
1072 fts_doc_ids_t* doc_ids = ftt->added_doc_ids;
1073
1074 mutex_enter(&fts->bg_threads_mutex);
1075
1076 if (fts->fts_status & BG_THREAD_STOP) {
1077 /* The table is about to be dropped, no use
1078 adding anything to its work queue. */
1079
1080 mutex_exit(&fts->bg_threads_mutex);
1081 } else {
1082 mem_heap_t* heap;
1083 mutex_exit(&fts->bg_threads_mutex);
1084
1085 ut_a(fts->add_wq);
1086
1087 heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg);
1088
1089 ib_wqueue_add(fts->add_wq, doc_ids, heap);
1090
1091 /* fts_trx_table_t no longer owns the list. */
1092 ftt->added_doc_ids = NULL;
1093 }
1094 }
1095
1096 /******************************************************************//**
1097 Finalize a transaction containing updates to FTS tables. */
1098 static MY_ATTRIBUTE((nonnull))
1099 void
trx_finalize_for_fts(trx_t * trx,bool is_commit)1100 trx_finalize_for_fts(
1101 /*=================*/
1102 trx_t* trx, /*!< in/out: transaction */
1103 bool is_commit) /*!< in: true if the transaction was
1104 committed, false if it was rolled back. */
1105 {
1106 if (is_commit) {
1107 const ib_rbt_node_t* node;
1108 ib_rbt_t* tables;
1109 fts_savepoint_t* savepoint;
1110
1111 savepoint = static_cast<fts_savepoint_t*>(
1112 ib_vector_last(trx->fts_trx->savepoints));
1113
1114 tables = savepoint->tables;
1115
1116 for (node = rbt_first(tables);
1117 node;
1118 node = rbt_next(tables, node)) {
1119 fts_trx_table_t** ftt;
1120
1121 ftt = rbt_value(fts_trx_table_t*, node);
1122
1123 if ((*ftt)->added_doc_ids) {
1124 trx_finalize_for_fts_table(*ftt);
1125 }
1126 }
1127 }
1128
1129 fts_trx_free(trx->fts_trx);
1130 trx->fts_trx = NULL;
1131 }
1132
1133 /**********************************************************************//**
1134 If required, flushes the log to disk based on the value of
1135 innodb_flush_log_at_trx_commit. */
1136 static
1137 void
trx_flush_log_if_needed_low(lsn_t lsn)1138 trx_flush_log_if_needed_low(
1139 /*========================*/
1140 lsn_t lsn) /*!< in: lsn up to which logs are to be
1141 flushed. */
1142 {
1143 switch (srv_flush_log_at_trx_commit) {
1144 case 0:
1145 /* Do nothing */
1146 break;
1147 case 1:
1148 /* Write the log and optionally flush it to disk */
1149 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
1150 srv_unix_file_flush_method != SRV_UNIX_NOSYNC);
1151 break;
1152 case 2:
1153 /* Write the log but do not flush it to disk */
1154 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
1155
1156 break;
1157 default:
1158 ut_error;
1159 }
1160 }
1161
1162 /**********************************************************************//**
1163 If required, flushes the log to disk based on the value of
1164 innodb_flush_log_at_trx_commit. */
1165 static MY_ATTRIBUTE((nonnull))
1166 void
trx_flush_log_if_needed(lsn_t lsn,trx_t * trx)1167 trx_flush_log_if_needed(
1168 /*====================*/
1169 lsn_t lsn, /*!< in: lsn up to which logs are to be
1170 flushed. */
1171 trx_t* trx) /*!< in/out: transaction */
1172 {
1173 trx->op_info = "flushing log";
1174 trx_flush_log_if_needed_low(lsn);
1175 trx->op_info = "";
1176 }
1177
1178 /****************************************************************//**
1179 Commits a transaction in memory. */
1180 static MY_ATTRIBUTE((nonnull))
1181 void
trx_commit_in_memory(trx_t * trx,lsn_t lsn)1182 trx_commit_in_memory(
1183 /*=================*/
1184 trx_t* trx, /*!< in/out: transaction */
1185 lsn_t lsn) /*!< in: log sequence number of the mini-transaction
1186 commit of trx_write_serialisation_history(), or 0
1187 if the transaction did not modify anything */
1188 {
1189 trx->must_flush_log_later = FALSE;
1190
1191 if (trx_is_autocommit_non_locking(trx)) {
1192 ut_ad(trx->read_only);
1193 ut_a(!trx->is_recovered);
1194 ut_ad(trx->rseg == NULL);
1195 ut_ad(!trx->in_ro_trx_list);
1196 ut_ad(!trx->in_rw_trx_list);
1197
1198 /* Note: We are asserting without holding the lock mutex. But
1199 that is OK because this transaction is not waiting and cannot
1200 be rolled back and no new locks can (or should not) be added
1201 becuase it is flagged as a non-locking read-only transaction. */
1202
1203 ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1204
1205 /* This state change is not protected by any mutex, therefore
1206 there is an inherent race here around state transition during
1207 printouts. We ignore this race for the sake of efficiency.
1208 However, the trx_sys_t::mutex will protect the trx_t instance
1209 and it cannot be removed from the mysql_trx_list and freed
1210 without first acquiring the trx_sys_t::mutex. */
1211
1212 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
1213
1214 trx->state = TRX_STATE_NOT_STARTED;
1215
1216 read_view_remove(trx->global_read_view, false);
1217
1218 MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
1219 } else {
1220 lock_trx_release_locks(trx);
1221
1222 /* Remove the transaction from the list of active
1223 transactions now that it no longer holds any user locks. */
1224
1225 ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
1226
1227 mutex_enter(&trx_sys->mutex);
1228
1229 assert_trx_in_list(trx);
1230
1231 if (trx->read_only) {
1232 UT_LIST_REMOVE(trx_list, trx_sys->ro_trx_list, trx);
1233 ut_d(trx->in_ro_trx_list = FALSE);
1234 MONITOR_INC(MONITOR_TRX_RO_COMMIT);
1235 } else {
1236 UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
1237 ut_d(trx->in_rw_trx_list = FALSE);
1238 MONITOR_INC(MONITOR_TRX_RW_COMMIT);
1239 }
1240
1241 /* If this transaction came from trx_allocate_for_mysql(),
1242 trx->in_mysql_trx_list would hold. In that case, the
1243 trx->state change must be protected by trx_sys->mutex, so that
1244 lock_print_info_all_transactions() will have a consistent
1245 view. */
1246
1247 trx->state = TRX_STATE_NOT_STARTED;
1248
1249 /* We already own the trx_sys_t::mutex, by doing it here we
1250 avoid a potential context switch later. */
1251 read_view_remove(trx->global_read_view, true);
1252
1253 ut_ad(trx_sys_validate_trx_list());
1254
1255 mutex_exit(&trx_sys->mutex);
1256 }
1257
1258 if (trx->global_read_view != NULL) {
1259
1260 mem_heap_empty(trx->global_read_view_heap);
1261
1262 trx->global_read_view = NULL;
1263 }
1264
1265 trx->read_view = NULL;
1266
1267 if (lsn) {
1268 if (trx->insert_undo != NULL) {
1269
1270 trx_undo_insert_cleanup(trx);
1271 }
1272
1273 /* NOTE that we could possibly make a group commit more
1274 efficient here: call os_thread_yield here to allow also other
1275 trxs to come to commit! */
1276
1277 /*-------------------------------------*/
1278
1279 /* Depending on the my.cnf options, we may now write the log
1280 buffer to the log files, making the transaction durable if
1281 the OS does not crash. We may also flush the log files to
1282 disk, making the transaction durable also at an OS crash or a
1283 power outage.
1284
1285 The idea in InnoDB's group commit is that a group of
1286 transactions gather behind a trx doing a physical disk write
1287 to log files, and when that physical write has been completed,
1288 one of those transactions does a write which commits the whole
1289 group. Note that this group commit will only bring benefit if
1290 there are > 2 users in the database. Then at least 2 users can
1291 gather behind one doing the physical log write to disk.
1292
1293 If we are calling trx_commit() under prepare_commit_mutex, we
1294 will delay possible log write and flush to a separate function
1295 trx_commit_complete_for_mysql(), which is only called when the
1296 thread has released the mutex. This is to make the
1297 group commit algorithm to work. Otherwise, the prepare_commit
1298 mutex would serialize all commits and prevent a group of
1299 transactions from gathering. */
1300
1301 if (trx->flush_log_later) {
1302 /* Do nothing yet */
1303 trx->must_flush_log_later = TRUE;
1304 } else if (srv_flush_log_at_trx_commit == 0
1305 || thd_requested_durability(trx->mysql_thd)
1306 == HA_IGNORE_DURABILITY) {
1307 /* Do nothing */
1308 } else {
1309 trx_flush_log_if_needed(lsn, trx);
1310 }
1311
1312 trx->commit_lsn = lsn;
1313
1314 /* Tell server some activity has happened, since the trx
1315 does changes something. Background utility threads like
1316 master thread, purge thread or page_cleaner thread might
1317 have some work to do. */
1318 srv_active_wake_master_thread();
1319 }
1320
1321 /* undo_no is non-zero if we're doing the final commit. */
1322 bool not_rollback = trx->undo_no != 0;
1323 /* Free all savepoints, starting from the first. */
1324 trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
1325 trx_roll_savepoints_free(trx, savep);
1326
1327 trx->rseg = NULL;
1328 trx->undo_no = 0;
1329 trx->last_sql_stat_start.least_undo_no = 0;
1330
1331 trx->ddl = false;
1332 #ifdef UNIV_DEBUG
1333 ut_ad(trx->start_file != 0);
1334 ut_ad(trx->start_line != 0);
1335 trx->start_file = 0;
1336 trx->start_line = 0;
1337 #endif /* UNIV_DEBUG */
1338
1339 trx->will_lock = 0;
1340 trx->read_only = FALSE;
1341 trx->auto_commit = FALSE;
1342
1343 if (trx->fts_trx) {
1344 trx_finalize_for_fts(trx, not_rollback);
1345 }
1346
1347 ut_ad(trx->lock.wait_thr == NULL);
1348 ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1349 ut_ad(!trx->in_ro_trx_list);
1350 ut_ad(!trx->in_rw_trx_list);
1351
1352 #ifdef WITH_WSREP
1353 if (wsrep_on(trx->mysql_thd)) {
1354 trx->lock.was_chosen_as_deadlock_victim = FALSE;
1355 }
1356 #endif
1357 trx->dict_operation = TRX_DICT_OP_NONE;
1358
1359 trx->error_state = DB_SUCCESS;
1360
1361 /* trx->in_mysql_trx_list would hold between
1362 trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
1363 hold for recovered transactions or system transactions. */
1364 }
1365
1366 /****************************************************************//**
1367 Commits a transaction and a mini-transaction. */
1368 UNIV_INTERN
1369 void
trx_commit_low(trx_t * trx,mtr_t * mtr)1370 trx_commit_low(
1371 /*===========*/
1372 trx_t* trx, /*!< in/out: transaction */
1373 mtr_t* mtr) /*!< in/out: mini-transaction (will be committed),
1374 or NULL if trx made no modifications */
1375 {
1376 lsn_t lsn;
1377
1378 assert_trx_nonlocking_or_in_list(trx);
1379 ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
1380 ut_ad(!mtr || mtr->state == MTR_ACTIVE);
1381 ut_ad(!mtr == !(trx->insert_undo || trx->update_undo));
1382
1383 /* undo_no is non-zero if we're doing the final commit. */
1384 if (trx->fts_trx && trx->undo_no != 0) {
1385 dberr_t error;
1386
1387 ut_a(!trx_is_autocommit_non_locking(trx));
1388
1389 error = fts_commit(trx);
1390
1391 /* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY
1392 instead of dying. This is a possible scenario if there
1393 is a crash between insert to DELETED table committing
1394 and transaction committing. The fix would be able to
1395 return error from this function */
1396 if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) {
1397 /* FTS-FIXME: once we can return values from this
1398 function, we should do so and signal an error
1399 instead of just dying. */
1400
1401 ut_error;
1402 }
1403 }
1404
1405 if (mtr) {
1406 trx_write_serialisation_history(trx, mtr);
1407 /* The following call commits the mini-transaction, making the
1408 whole transaction committed in the file-based world, at this
1409 log sequence number. The transaction becomes 'durable' when
1410 we write the log to disk, but in the logical sense the commit
1411 in the file-based data structures (undo logs etc.) happens
1412 here.
1413
1414 NOTE that transaction numbers, which are assigned only to
1415 transactions with an update undo log, do not necessarily come
1416 in exactly the same order as commit lsn's, if the transactions
1417 have different rollback segments. To get exactly the same
1418 order we should hold the kernel mutex up to this point,
1419 adding to the contention of the kernel mutex. However, if
1420 a transaction T2 is able to see modifications made by
1421 a transaction T1, T2 will always get a bigger transaction
1422 number and a bigger commit lsn than T1. */
1423
1424 /*--------------*/
1425 mtr_commit(mtr);
1426 /*--------------*/
1427 lsn = mtr->end_lsn;
1428 } else {
1429 lsn = 0;
1430 }
1431
1432 trx_commit_in_memory(trx, lsn);
1433 }
1434
1435 /****************************************************************//**
1436 Commits a transaction. */
1437 UNIV_INTERN
1438 void
trx_commit(trx_t * trx)1439 trx_commit(
1440 /*=======*/
1441 trx_t* trx) /*!< in/out: transaction */
1442 {
1443 mtr_t local_mtr;
1444 mtr_t* mtr;
1445
1446 if (trx->insert_undo || trx->update_undo) {
1447 mtr = &local_mtr;
1448 mtr_start(mtr);
1449 } else {
1450 mtr = NULL;
1451 }
1452
1453 trx_commit_low(trx, mtr);
1454 }
1455
1456 /****************************************************************//**
1457 Cleans up a transaction at database startup. The cleanup is needed if
1458 the transaction already got to the middle of a commit when the database
1459 crashed, and we cannot roll it back. */
1460 UNIV_INTERN
1461 void
trx_cleanup_at_db_startup(trx_t * trx)1462 trx_cleanup_at_db_startup(
1463 /*======================*/
1464 trx_t* trx) /*!< in: transaction */
1465 {
1466 ut_ad(trx->is_recovered);
1467
1468 if (trx->insert_undo != NULL) {
1469
1470 trx_undo_insert_cleanup(trx);
1471 }
1472
1473 trx->rseg = NULL;
1474 trx->undo_no = 0;
1475 trx->last_sql_stat_start.least_undo_no = 0;
1476
1477 mutex_enter(&trx_sys->mutex);
1478
1479 ut_a(!trx->read_only);
1480
1481 UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
1482
1483 assert_trx_in_rw_list(trx);
1484 ut_d(trx->in_rw_trx_list = FALSE);
1485
1486 mutex_exit(&trx_sys->mutex);
1487
1488 /* Change the transaction state without mutex protection, now
1489 that it no longer is in the trx_list. Recovered transactions
1490 are never placed in the mysql_trx_list. */
1491 ut_ad(trx->is_recovered);
1492 ut_ad(!trx->in_ro_trx_list);
1493 ut_ad(!trx->in_rw_trx_list);
1494 ut_ad(!trx->in_mysql_trx_list);
1495 trx->state = TRX_STATE_NOT_STARTED;
1496 }
1497
1498 /********************************************************************//**
1499 Assigns a read view for a consistent read query. All the consistent reads
1500 within the same transaction will get the same read view, which is created
1501 when this function is first called for a new started transaction.
1502 @return consistent read view */
1503 UNIV_INTERN
1504 read_view_t*
trx_assign_read_view(trx_t * trx)1505 trx_assign_read_view(
1506 /*=================*/
1507 trx_t* trx) /*!< in: active transaction */
1508 {
1509 ut_ad(trx->state == TRX_STATE_ACTIVE);
1510
1511 if (trx->read_view != NULL) {
1512 return(trx->read_view);
1513 }
1514
1515 if (!trx->read_view) {
1516
1517 trx->read_view = read_view_open_now(
1518 trx->id, trx->global_read_view_heap);
1519
1520 trx->global_read_view = trx->read_view;
1521 }
1522
1523 return(trx->read_view);
1524 }
1525
1526 /****************************************************************//**
1527 Prepares a transaction for commit/rollback. */
1528 UNIV_INTERN
1529 void
trx_commit_or_rollback_prepare(trx_t * trx)1530 trx_commit_or_rollback_prepare(
1531 /*===========================*/
1532 trx_t* trx) /*!< in/out: transaction */
1533 {
1534 /* We are reading trx->state without holding trx_sys->mutex
1535 here, because the commit or rollback should be invoked for a
1536 running (or recovered prepared) transaction that is associated
1537 with the current thread. */
1538
1539 switch (trx->state) {
1540 case TRX_STATE_NOT_STARTED:
1541 #ifdef WITH_WSREP
1542 ut_d(trx->start_file = __FILE__);
1543 ut_d(trx->start_line = __LINE__);
1544 #endif /* WITH_WSREP */
1545 trx_start_low(trx);
1546 /* fall through */
1547 case TRX_STATE_ACTIVE:
1548 case TRX_STATE_PREPARED:
1549 /* If the trx is in a lock wait state, moves the waiting
1550 query thread to the suspended state */
1551
1552 if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1553
1554 ut_a(trx->lock.wait_thr != NULL);
1555 trx->lock.wait_thr->state = QUE_THR_SUSPENDED;
1556 trx->lock.wait_thr = NULL;
1557
1558 trx->lock.que_state = TRX_QUE_RUNNING;
1559 }
1560
1561 ut_a(trx->lock.n_active_thrs == 1);
1562 return;
1563 case TRX_STATE_COMMITTED_IN_MEMORY:
1564 break;
1565 }
1566
1567 ut_error;
1568 }
1569
1570 /*********************************************************************//**
1571 Creates a commit command node struct.
1572 @return own: commit node struct */
1573 UNIV_INTERN
1574 commit_node_t*
trx_commit_node_create(mem_heap_t * heap)1575 trx_commit_node_create(
1576 /*===================*/
1577 mem_heap_t* heap) /*!< in: mem heap where created */
1578 {
1579 commit_node_t* node;
1580
1581 node = static_cast<commit_node_t*>(mem_heap_alloc(heap, sizeof(*node)));
1582 node->common.type = QUE_NODE_COMMIT;
1583 node->state = COMMIT_NODE_SEND;
1584
1585 return(node);
1586 }
1587
1588 /***********************************************************//**
1589 Performs an execution step for a commit type node in a query graph.
1590 @return query thread to run next, or NULL */
1591 UNIV_INTERN
1592 que_thr_t*
trx_commit_step(que_thr_t * thr)1593 trx_commit_step(
1594 /*============*/
1595 que_thr_t* thr) /*!< in: query thread */
1596 {
1597 commit_node_t* node;
1598
1599 node = static_cast<commit_node_t*>(thr->run_node);
1600
1601 ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
1602
1603 if (thr->prev_node == que_node_get_parent(node)) {
1604 node->state = COMMIT_NODE_SEND;
1605 }
1606
1607 if (node->state == COMMIT_NODE_SEND) {
1608 trx_t* trx;
1609
1610 node->state = COMMIT_NODE_WAIT;
1611
1612 trx = thr_get_trx(thr);
1613
1614 ut_a(trx->lock.wait_thr == NULL);
1615 ut_a(trx->lock.que_state != TRX_QUE_LOCK_WAIT);
1616
1617 trx_commit_or_rollback_prepare(trx);
1618
1619 trx->lock.que_state = TRX_QUE_COMMITTING;
1620
1621 trx_commit(trx);
1622
1623 ut_ad(trx->lock.wait_thr == NULL);
1624
1625 trx->lock.que_state = TRX_QUE_RUNNING;
1626
1627 thr = NULL;
1628 } else {
1629 ut_ad(node->state == COMMIT_NODE_WAIT);
1630
1631 node->state = COMMIT_NODE_SEND;
1632
1633 thr->run_node = que_node_get_parent(node);
1634 }
1635
1636 return(thr);
1637 }
1638
1639 /**********************************************************************//**
1640 Does the transaction commit for MySQL.
1641 @return DB_SUCCESS or error number */
1642 UNIV_INTERN
1643 dberr_t
trx_commit_for_mysql(trx_t * trx)1644 trx_commit_for_mysql(
1645 /*=================*/
1646 trx_t* trx) /*!< in/out: transaction */
1647 {
1648 /* Because we do not do the commit by sending an Innobase
1649 sig to the transaction, we must here make sure that trx has been
1650 started. */
1651
1652 ut_a(trx);
1653
1654 switch (trx->state) {
1655 case TRX_STATE_NOT_STARTED:
1656 /* Update the info whether we should skip XA steps that eat
1657 CPU time.
1658
1659 For the duration of the transaction trx->support_xa is
1660 not reread from thd so any changes in the value take
1661 effect in the next transaction. This is to avoid a
1662 scenario where some undo log records generated by a
1663 transaction contain XA information and other undo log
1664 records, generated by the same transaction do not. */
1665 trx->support_xa = thd_supports_xa(trx->mysql_thd);
1666
1667 ut_d(trx->start_file = __FILE__);
1668 ut_d(trx->start_line = __LINE__);
1669
1670 trx_start_low(trx);
1671 /* fall through */
1672 case TRX_STATE_ACTIVE:
1673 case TRX_STATE_PREPARED:
1674 trx->op_info = "committing";
1675 trx_commit(trx);
1676 MONITOR_DEC(MONITOR_TRX_ACTIVE);
1677 trx->op_info = "";
1678 return(DB_SUCCESS);
1679 case TRX_STATE_COMMITTED_IN_MEMORY:
1680 break;
1681 }
1682 ut_error;
1683 return(DB_CORRUPTION);
1684 }
1685
1686 /**********************************************************************//**
1687 If required, flushes the log to disk if we called trx_commit_for_mysql()
1688 with trx->flush_log_later == TRUE. */
1689 UNIV_INTERN
1690 void
trx_commit_complete_for_mysql(trx_t * trx)1691 trx_commit_complete_for_mysql(
1692 /*==========================*/
1693 trx_t* trx) /*!< in/out: transaction */
1694 {
1695 ut_a(trx);
1696
1697 if (!trx->must_flush_log_later
1698 || thd_requested_durability(trx->mysql_thd)
1699 == HA_IGNORE_DURABILITY) {
1700 return;
1701 }
1702
1703 trx_flush_log_if_needed(trx->commit_lsn, trx);
1704
1705 trx->must_flush_log_later = FALSE;
1706 }
1707
1708 /**********************************************************************//**
1709 Marks the latest SQL statement ended. */
1710 UNIV_INTERN
1711 void
trx_mark_sql_stat_end(trx_t * trx)1712 trx_mark_sql_stat_end(
1713 /*==================*/
1714 trx_t* trx) /*!< in: trx handle */
1715 {
1716 ut_a(trx);
1717
1718 switch (trx->state) {
1719 case TRX_STATE_PREPARED:
1720 case TRX_STATE_COMMITTED_IN_MEMORY:
1721 break;
1722 case TRX_STATE_NOT_STARTED:
1723 trx->undo_no = 0;
1724 /* fall through */
1725 case TRX_STATE_ACTIVE:
1726 trx->last_sql_stat_start.least_undo_no = trx->undo_no;
1727
1728 if (trx->fts_trx) {
1729 fts_savepoint_laststmt_refresh(trx);
1730 }
1731
1732 return;
1733 }
1734
1735 ut_error;
1736 }
1737
1738 /**********************************************************************//**
1739 Prints info about a transaction.
1740 Caller must hold trx_sys->mutex. */
1741 UNIV_INTERN
1742 void
trx_print_low(FILE * f,const trx_t * trx,ulint max_query_len,ulint n_rec_locks,ulint n_trx_locks,ulint heap_size)1743 trx_print_low(
1744 /*==========*/
1745 FILE* f,
1746 /*!< in: output stream */
1747 const trx_t* trx,
1748 /*!< in: transaction */
1749 ulint max_query_len,
1750 /*!< in: max query length to print,
1751 or 0 to use the default max length */
1752 ulint n_rec_locks,
1753 /*!< in: lock_number_of_rows_locked(&trx->lock) */
1754 ulint n_trx_locks,
1755 /*!< in: length of trx->lock.trx_locks */
1756 ulint heap_size)
1757 /*!< in: mem_heap_get_size(trx->lock.lock_heap) */
1758 {
1759 ibool newline;
1760 const char* op_info;
1761
1762 ut_ad(mutex_own(&trx_sys->mutex));
1763
1764 fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
1765
1766 /* trx->state cannot change from or to NOT_STARTED while we
1767 are holding the trx_sys->mutex. It may change from ACTIVE to
1768 PREPARED or COMMITTED. */
1769 switch (trx->state) {
1770 case TRX_STATE_NOT_STARTED:
1771 fputs(", not started", f);
1772 goto state_ok;
1773 case TRX_STATE_ACTIVE:
1774 fprintf(f, ", ACTIVE %lu sec",
1775 (ulong) difftime(time(NULL), trx->start_time));
1776 goto state_ok;
1777 case TRX_STATE_PREPARED:
1778 fprintf(f, ", ACTIVE (PREPARED) %lu sec",
1779 (ulong) difftime(time(NULL), trx->start_time));
1780 goto state_ok;
1781 case TRX_STATE_COMMITTED_IN_MEMORY:
1782 fputs(", COMMITTED IN MEMORY", f);
1783 goto state_ok;
1784 }
1785 fprintf(f, ", state %lu", (ulong) trx->state);
1786 ut_ad(0);
1787 state_ok:
1788
1789 /* prevent a race condition */
1790 op_info = trx->op_info;
1791
1792 if (*op_info) {
1793 putc(' ', f);
1794 fputs(op_info, f);
1795 }
1796
1797 if (trx->is_recovered) {
1798 fputs(" recovered trx", f);
1799 }
1800
1801 if (trx->declared_to_be_inside_innodb) {
1802 fprintf(f, ", thread declared inside InnoDB %lu",
1803 (ulong) trx->n_tickets_to_enter_innodb);
1804 }
1805
1806 putc('\n', f);
1807
1808 if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
1809 fprintf(f, "mysql tables in use %lu, locked %lu\n",
1810 (ulong) trx->n_mysql_tables_in_use,
1811 (ulong) trx->mysql_n_tables_locked);
1812 }
1813
1814 newline = TRUE;
1815
1816 /* trx->lock.que_state of an ACTIVE transaction may change
1817 while we are not holding trx->mutex. We perform a dirty read
1818 for performance reasons. */
1819
1820 switch (trx->lock.que_state) {
1821 case TRX_QUE_RUNNING:
1822 newline = FALSE; break;
1823 case TRX_QUE_LOCK_WAIT:
1824 fputs("LOCK WAIT ", f); break;
1825 case TRX_QUE_ROLLING_BACK:
1826 fputs("ROLLING BACK ", f); break;
1827 case TRX_QUE_COMMITTING:
1828 fputs("COMMITTING ", f); break;
1829 default:
1830 fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
1831 }
1832
1833 if (n_trx_locks > 0 || heap_size > 400) {
1834 newline = TRUE;
1835
1836 fprintf(f, "%lu lock struct(s), heap size %lu,"
1837 " %lu row lock(s)",
1838 (ulong) n_trx_locks,
1839 (ulong) heap_size,
1840 (ulong) n_rec_locks);
1841 }
1842
1843 if (trx->has_search_latch) {
1844 newline = TRUE;
1845 fputs(", holds adaptive hash latch", f);
1846 }
1847
1848 if (trx->undo_no != 0) {
1849 newline = TRUE;
1850 fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
1851 }
1852
1853 if (newline) {
1854 putc('\n', f);
1855 }
1856
1857 if (trx->mysql_thd != NULL) {
1858 innobase_mysql_print_thd(
1859 f, trx->mysql_thd, static_cast<uint>(max_query_len));
1860 }
1861 }
1862
1863 /**********************************************************************//**
1864 Prints info about a transaction.
1865 The caller must hold lock_sys->mutex and trx_sys->mutex.
1866 When possible, use trx_print() instead. */
1867 UNIV_INTERN
1868 void
trx_print_latched(FILE * f,const trx_t * trx,ulint max_query_len)1869 trx_print_latched(
1870 /*==============*/
1871 FILE* f, /*!< in: output stream */
1872 const trx_t* trx, /*!< in: transaction */
1873 ulint max_query_len) /*!< in: max query length to print,
1874 or 0 to use the default max length */
1875 {
1876 ut_ad(lock_mutex_own());
1877 ut_ad(mutex_own(&trx_sys->mutex));
1878
1879 trx_print_low(f, trx, max_query_len,
1880 lock_number_of_rows_locked(&trx->lock),
1881 UT_LIST_GET_LEN(trx->lock.trx_locks),
1882 mem_heap_get_size(trx->lock.lock_heap));
1883 }
1884
1885 #ifdef WITH_WSREP
1886 /**********************************************************************//**
1887 Prints info about a transaction.
1888 Transaction information may be retrieved without having trx_sys->mutex acquired
1889 so it may not be completely accurate. The caller must own lock_sys->mutex
1890 and the trx must have some locks to make sure that it does not escape
1891 without locking lock_sys->mutex. */
1892 UNIV_INTERN
1893 void
wsrep_trx_print_locking(FILE * f,const trx_t * trx,ulint max_query_len)1894 wsrep_trx_print_locking(
1895 /*==========*/
1896 FILE* f,
1897 /*!< in: output stream */
1898 const trx_t* trx,
1899 /*!< in: transaction */
1900 ulint max_query_len)
1901 /*!< in: max query length to print,
1902 or 0 to use the default max length */
1903 {
1904 ibool newline;
1905 const char* op_info;
1906
1907 ut_ad(lock_mutex_own());
1908 ut_ad(trx->lock.trx_locks.count > 0);
1909
1910 fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
1911
1912 /* trx->state may change since trx_sys->mutex is not required */
1913 switch (trx->state) {
1914 case TRX_STATE_NOT_STARTED:
1915 fputs(", not started", f);
1916 goto state_ok;
1917 case TRX_STATE_ACTIVE:
1918 fprintf(f, ", ACTIVE %lu sec",
1919 (ulong) difftime(time(NULL), trx->start_time));
1920 goto state_ok;
1921 case TRX_STATE_PREPARED:
1922 fprintf(f, ", ACTIVE (PREPARED) %lu sec",
1923 (ulong) difftime(time(NULL), trx->start_time));
1924 goto state_ok;
1925 case TRX_STATE_COMMITTED_IN_MEMORY:
1926 fputs(", COMMITTED IN MEMORY", f);
1927 goto state_ok;
1928 }
1929 fprintf(f, ", state %lu", (ulong) trx->state);
1930 ut_ad(0);
1931 state_ok:
1932
1933 /* prevent a race condition */
1934 op_info = trx->op_info;
1935
1936 if (*op_info) {
1937 putc(' ', f);
1938 fputs(op_info, f);
1939 }
1940
1941 if (trx->is_recovered) {
1942 fputs(" recovered trx", f);
1943 }
1944
1945 if (trx->declared_to_be_inside_innodb) {
1946 fprintf(f, ", thread declared inside InnoDB %lu",
1947 (ulong) trx->n_tickets_to_enter_innodb);
1948 }
1949
1950 putc('\n', f);
1951
1952 if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
1953 fprintf(f, "mysql tables in use %lu, locked %lu\n",
1954 (ulong) trx->n_mysql_tables_in_use,
1955 (ulong) trx->mysql_n_tables_locked);
1956 }
1957
1958 newline = TRUE;
1959
1960 /* trx->lock.que_state of an ACTIVE transaction may change
1961 while we are not holding trx->mutex. We perform a dirty read
1962 for performance reasons. */
1963
1964 switch (trx->lock.que_state) {
1965 case TRX_QUE_RUNNING:
1966 newline = FALSE; break;
1967 case TRX_QUE_LOCK_WAIT:
1968 fputs("LOCK WAIT ", f); break;
1969 case TRX_QUE_ROLLING_BACK:
1970 fputs("ROLLING BACK ", f); break;
1971 case TRX_QUE_COMMITTING:
1972 fputs("COMMITTING ", f); break;
1973 default:
1974 fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
1975 }
1976
1977 if (trx->has_search_latch) {
1978 newline = TRUE;
1979 fputs(", holds adaptive hash latch", f);
1980 }
1981
1982 if (trx->undo_no != 0) {
1983 newline = TRUE;
1984 fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
1985 }
1986
1987 if (newline) {
1988 putc('\n', f);
1989 }
1990
1991 if (trx->mysql_thd != NULL) {
1992 innobase_mysql_print_thd(
1993 f, trx->mysql_thd, static_cast<uint>(max_query_len));
1994 }
1995 }
1996 #endif /* WITH_WSREP */
1997 /**********************************************************************//**
1998 Prints info about a transaction.
1999 Acquires and releases lock_sys->mutex and trx_sys->mutex. */
2000 UNIV_INTERN
2001 void
trx_print(FILE * f,const trx_t * trx,ulint max_query_len)2002 trx_print(
2003 /*======*/
2004 FILE* f, /*!< in: output stream */
2005 const trx_t* trx, /*!< in: transaction */
2006 ulint max_query_len) /*!< in: max query length to print,
2007 or 0 to use the default max length */
2008 {
2009 ulint n_rec_locks;
2010 ulint n_trx_locks;
2011 ulint heap_size;
2012
2013 lock_mutex_enter();
2014 n_rec_locks = lock_number_of_rows_locked(&trx->lock);
2015 n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
2016 heap_size = mem_heap_get_size(trx->lock.lock_heap);
2017 lock_mutex_exit();
2018
2019 mutex_enter(&trx_sys->mutex);
2020 trx_print_low(f, trx, max_query_len,
2021 n_rec_locks, n_trx_locks, heap_size);
2022 mutex_exit(&trx_sys->mutex);
2023 }
2024
2025 #ifdef UNIV_DEBUG
2026 /**********************************************************************//**
2027 Asserts that a transaction has been started.
2028 The caller must hold trx_sys->mutex.
2029 @return TRUE if started */
2030 UNIV_INTERN
2031 ibool
trx_assert_started(const trx_t * trx)2032 trx_assert_started(
2033 /*===============*/
2034 const trx_t* trx) /*!< in: transaction */
2035 {
2036 ut_ad(mutex_own(&trx_sys->mutex));
2037
2038 /* Non-locking autocommits should not hold any locks and this
2039 function is only called from the locking code. */
2040 assert_trx_in_list(trx);
2041
2042 /* trx->state can change from or to NOT_STARTED while we are holding
2043 trx_sys->mutex for non-locking autocommit selects but not for other
2044 types of transactions. It may change from ACTIVE to PREPARED. Unless
2045 we are holding lock_sys->mutex, it may also change to COMMITTED. */
2046
2047 switch (trx->state) {
2048 case TRX_STATE_PREPARED:
2049 return(TRUE);
2050
2051 case TRX_STATE_ACTIVE:
2052 case TRX_STATE_COMMITTED_IN_MEMORY:
2053 return(TRUE);
2054
2055 case TRX_STATE_NOT_STARTED:
2056 break;
2057 }
2058
2059 ut_error;
2060 return(FALSE);
2061 }
2062 #endif /* UNIV_DEBUG */
2063
2064 /*******************************************************************//**
2065 Compares the "weight" (or size) of two transactions. Transactions that
2066 have edited non-transactional tables are considered heavier than ones
2067 that have not.
2068 @return TRUE if weight(a) >= weight(b) */
2069 UNIV_INTERN
2070 ibool
trx_weight_ge(const trx_t * a,const trx_t * b)2071 trx_weight_ge(
2072 /*==========*/
2073 const trx_t* a, /*!< in: the first transaction to be compared */
2074 const trx_t* b) /*!< in: the second transaction to be compared */
2075 {
2076 ibool a_notrans_edit;
2077 ibool b_notrans_edit;
2078
2079 /* If mysql_thd is NULL for a transaction we assume that it has
2080 not edited non-transactional tables. */
2081
2082 a_notrans_edit = a->mysql_thd != NULL
2083 && thd_has_edited_nontrans_tables(a->mysql_thd);
2084
2085 b_notrans_edit = b->mysql_thd != NULL
2086 && thd_has_edited_nontrans_tables(b->mysql_thd);
2087
2088 if (a_notrans_edit != b_notrans_edit) {
2089
2090 return(a_notrans_edit);
2091 }
2092
2093 /* Either both had edited non-transactional tables or both had
2094 not, we fall back to comparing the number of altered/locked
2095 rows. */
2096
2097 #if 0
2098 fprintf(stderr,
2099 "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
2100 __func__,
2101 a->undo_no, UT_LIST_GET_LEN(a->lock.trx_locks),
2102 b->undo_no, UT_LIST_GET_LEN(b->lock.trx_locks));
2103 #endif
2104
2105 return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
2106 }
2107
2108 /****************************************************************//**
2109 Prepares a transaction. */
2110 static
2111 void
trx_prepare(trx_t * trx)2112 trx_prepare(
2113 /*========*/
2114 trx_t* trx) /*!< in/out: transaction */
2115 {
2116 trx_rseg_t* rseg;
2117 lsn_t lsn;
2118 mtr_t mtr;
2119
2120 rseg = trx->rseg;
2121 /* Only fresh user transactions can be prepared.
2122 Recovered transactions cannot. */
2123 ut_a(!trx->is_recovered);
2124
2125 if (trx->insert_undo != NULL || trx->update_undo != NULL) {
2126
2127 mtr_start(&mtr);
2128
2129 /* Change the undo log segment states from TRX_UNDO_ACTIVE
2130 to TRX_UNDO_PREPARED: these modifications to the file data
2131 structure define the transaction as prepared in the
2132 file-based world, at the serialization point of lsn. */
2133
2134 mutex_enter(&rseg->mutex);
2135
2136 if (trx->insert_undo != NULL) {
2137
2138 /* It is not necessary to obtain trx->undo_mutex here
2139 because only a single OS thread is allowed to do the
2140 transaction prepare for this transaction. */
2141
2142 trx_undo_set_state_at_prepare(trx, trx->insert_undo,
2143 &mtr);
2144 }
2145
2146 if (trx->update_undo) {
2147 trx_undo_set_state_at_prepare(
2148 trx, trx->update_undo, &mtr);
2149 }
2150
2151 mutex_exit(&rseg->mutex);
2152
2153 /*--------------*/
2154 mtr_commit(&mtr); /* This mtr commit makes the
2155 transaction prepared in the file-based
2156 world */
2157 /*--------------*/
2158 lsn = mtr.end_lsn;
2159 ut_ad(lsn);
2160 } else {
2161 lsn = 0;
2162 }
2163
2164 /*--------------------------------------*/
2165 ut_a(trx->state == TRX_STATE_ACTIVE);
2166 mutex_enter(&trx_sys->mutex);
2167 trx->state = TRX_STATE_PREPARED;
2168 trx_sys->n_prepared_trx++;
2169 mutex_exit(&trx_sys->mutex);
2170 /*--------------------------------------*/
2171
2172 if (lsn) {
2173 /* Depending on the my.cnf options, we may now write the log
2174 buffer to the log files, making the prepared state of the
2175 transaction durable if the OS does not crash. We may also
2176 flush the log files to disk, making the prepared state of the
2177 transaction durable also at an OS crash or a power outage.
2178
2179 The idea in InnoDB's group prepare is that a group of
2180 transactions gather behind a trx doing a physical disk write
2181 to log files, and when that physical write has been completed,
2182 one of those transactions does a write which prepares the whole
2183 group. Note that this group prepare will only bring benefit if
2184 there are > 2 users in the database. Then at least 2 users can
2185 gather behind one doing the physical log write to disk.
2186
2187 TODO: find out if MySQL holds some mutex when calling this.
2188 That would spoil our group prepare algorithm. */
2189
2190 trx_flush_log_if_needed(lsn, trx);
2191 }
2192 }
2193
2194 /**********************************************************************//**
2195 Does the transaction prepare for MySQL. */
2196 UNIV_INTERN
2197 void
trx_prepare_for_mysql(trx_t * trx)2198 trx_prepare_for_mysql(
2199 /*==================*/
2200 trx_t* trx) /*!< in/out: trx handle */
2201 {
2202 trx_start_if_not_started_xa(trx);
2203
2204 trx->op_info = "preparing";
2205
2206 trx_prepare(trx);
2207
2208 trx->op_info = "";
2209 }
2210
2211 /**********************************************************************//**
2212 This function is used to find number of prepared transactions and
2213 their transaction objects for a recovery.
2214 @return number of prepared transactions stored in xid_list */
2215 UNIV_INTERN
2216 int
trx_recover_for_mysql(XID * xid_list,ulint len)2217 trx_recover_for_mysql(
2218 /*==================*/
2219 XID* xid_list, /*!< in/out: prepared transactions */
2220 ulint len) /*!< in: number of slots in xid_list */
2221 {
2222 const trx_t* trx;
2223 ulint count = 0;
2224
2225 ut_ad(xid_list);
2226 ut_ad(len);
2227
2228 /* We should set those transactions which are in the prepared state
2229 to the xid_list */
2230
2231 mutex_enter(&trx_sys->mutex);
2232
2233 for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
2234 trx != NULL;
2235 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
2236
2237 assert_trx_in_rw_list(trx);
2238
2239 /* The state of a read-write transaction cannot change
2240 from or to NOT_STARTED while we are holding the
2241 trx_sys->mutex. It may change to PREPARED, but not if
2242 trx->is_recovered. It may also change to COMMITTED. */
2243 if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
2244 xid_list[count] = trx->xid;
2245
2246 if (count == 0) {
2247 ut_print_timestamp(stderr);
2248 fprintf(stderr,
2249 " InnoDB: Starting recovery for"
2250 " XA transactions...\n");
2251 }
2252
2253 ut_print_timestamp(stderr);
2254 fprintf(stderr,
2255 " InnoDB: Transaction " TRX_ID_FMT " in"
2256 " prepared state after recovery\n",
2257 trx->id);
2258
2259 ut_print_timestamp(stderr);
2260 fprintf(stderr,
2261 " InnoDB: Transaction contains changes"
2262 " to " TRX_ID_FMT " rows\n",
2263 trx->undo_no);
2264
2265 count++;
2266
2267 if (count == len) {
2268 break;
2269 }
2270 }
2271 }
2272
2273 mutex_exit(&trx_sys->mutex);
2274
2275 if (count > 0){
2276 ut_print_timestamp(stderr);
2277 fprintf(stderr,
2278 " InnoDB: %d transactions in prepared state"
2279 " after recovery\n",
2280 int (count));
2281 }
2282
2283 return(int (count));
2284 }
2285
2286 /*******************************************************************//**
2287 This function is used to find one X/Open XA distributed transaction
2288 which is in the prepared state
2289 @return trx on match, the trx->xid will be invalidated;
2290 note that the trx may have been committed, unless the caller is
2291 holding lock_sys->mutex */
2292 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2293 trx_t*
trx_get_trx_by_xid_low(const XID * xid)2294 trx_get_trx_by_xid_low(
2295 /*===================*/
2296 const XID* xid) /*!< in: X/Open XA transaction
2297 identifier */
2298 {
2299 trx_t* trx;
2300
2301 ut_ad(mutex_own(&trx_sys->mutex));
2302
2303 for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
2304 trx != NULL;
2305 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
2306
2307 assert_trx_in_rw_list(trx);
2308
2309 /* Compare two X/Open XA transaction id's: their
2310 length should be the same and binary comparison
2311 of gtrid_length+bqual_length bytes should be
2312 the same */
2313
2314 if (trx->is_recovered
2315 && trx_state_eq(trx, TRX_STATE_PREPARED)
2316 && xid->gtrid_length == trx->xid.gtrid_length
2317 && xid->bqual_length == trx->xid.bqual_length
2318 && memcmp(xid->data, trx->xid.data,
2319 xid->gtrid_length + xid->bqual_length) == 0) {
2320
2321 #ifdef WITH_WSREP
2322 /* The commit of a prepared recovered Galera
2323 transaction needs a valid trx->xid for
2324 invoking trx_sys_update_wsrep_checkpoint(). */
2325 if (!wsrep_is_wsrep_xid(&trx->xid)) {
2326 #endif
2327 /* Invalidate the XID, so that subsequent calls
2328 will not find it. */
2329 memset(&trx->xid, 0, sizeof(trx->xid));
2330 trx->xid.formatID = -1;
2331 #ifdef WITH_WSREP
2332 }
2333 #endif /* WITH_WSREP */
2334 break;
2335 }
2336 }
2337
2338 return(trx);
2339 }
2340
2341 /*******************************************************************//**
2342 This function is used to find one X/Open XA distributed transaction
2343 which is in the prepared state
2344 @return trx or NULL; on match, the trx->xid will be invalidated;
2345 note that the trx may have been committed, unless the caller is
2346 holding lock_sys->mutex */
2347 UNIV_INTERN
2348 trx_t*
trx_get_trx_by_xid(const XID * xid)2349 trx_get_trx_by_xid(
2350 /*===============*/
2351 const XID* xid) /*!< in: X/Open XA transaction identifier */
2352 {
2353 trx_t* trx;
2354
2355 if (xid == NULL) {
2356
2357 return(NULL);
2358 }
2359
2360 mutex_enter(&trx_sys->mutex);
2361
2362 /* Recovered/Resurrected transactions are always only on the
2363 trx_sys_t::rw_trx_list. */
2364 trx = trx_get_trx_by_xid_low(xid);
2365
2366 mutex_exit(&trx_sys->mutex);
2367
2368 return(trx);
2369 }
2370
2371 /*************************************************************//**
2372 Starts the transaction if it is not yet started. */
2373 UNIV_INTERN
2374 void
trx_start_if_not_started_xa_low(trx_t * trx)2375 trx_start_if_not_started_xa_low(
2376 /*============================*/
2377 trx_t* trx) /*!< in: transaction */
2378 {
2379 switch (trx->state) {
2380 case TRX_STATE_NOT_STARTED:
2381
2382 /* Update the info whether we should skip XA steps
2383 that eat CPU time.
2384
2385 For the duration of the transaction trx->support_xa is
2386 not reread from thd so any changes in the value take
2387 effect in the next transaction. This is to avoid a
2388 scenario where some undo generated by a transaction,
2389 has XA stuff, and other undo, generated by the same
2390 transaction, doesn't. */
2391 trx->support_xa = thd_supports_xa(trx->mysql_thd);
2392
2393 #ifdef WITH_WSREP
2394 ut_d(trx->start_file = __FILE__);
2395 ut_d(trx->start_line = __LINE__);
2396 #endif /* WITH_WSREP */
2397 trx_start_low(trx);
2398 /* fall through */
2399 case TRX_STATE_ACTIVE:
2400 return;
2401 case TRX_STATE_PREPARED:
2402 case TRX_STATE_COMMITTED_IN_MEMORY:
2403 break;
2404 }
2405
2406 ut_error;
2407 }
2408
2409 /*************************************************************//**
2410 Starts the transaction if it is not yet started. */
2411 UNIV_INTERN
2412 void
trx_start_if_not_started_low(trx_t * trx)2413 trx_start_if_not_started_low(
2414 /*=========================*/
2415 trx_t* trx) /*!< in: transaction */
2416 {
2417 switch (trx->state) {
2418 case TRX_STATE_NOT_STARTED:
2419 #ifdef WITH_WSREP
2420 ut_d(trx->start_file = __FILE__);
2421 ut_d(trx->start_line = __LINE__);
2422 #endif /* WITH_WSREP */
2423 trx_start_low(trx);
2424 /* fall through */
2425 case TRX_STATE_ACTIVE:
2426 return;
2427 case TRX_STATE_PREPARED:
2428 case TRX_STATE_COMMITTED_IN_MEMORY:
2429 break;
2430 }
2431
2432 ut_error;
2433 }
2434
2435 /*************************************************************//**
2436 Starts the transaction for a DDL operation. */
2437 UNIV_INTERN
2438 void
trx_start_for_ddl_low(trx_t * trx,trx_dict_op_t op)2439 trx_start_for_ddl_low(
2440 /*==================*/
2441 trx_t* trx, /*!< in/out: transaction */
2442 trx_dict_op_t op) /*!< in: dictionary operation type */
2443 {
2444 switch (trx->state) {
2445 case TRX_STATE_NOT_STARTED:
2446 /* Flag this transaction as a dictionary operation, so that
2447 the data dictionary will be locked in crash recovery. */
2448
2449 trx_set_dict_operation(trx, op);
2450
2451 /* Ensure it is not flagged as an auto-commit-non-locking
2452 transation. */
2453 trx->will_lock = 1;
2454
2455 trx->ddl = true;
2456 #ifdef WITH_WSREP
2457 ut_d(trx->start_file = __FILE__);
2458 ut_d(trx->start_line = __LINE__);
2459 #endif /* WITH_WSREP */
2460
2461 trx_start_low(trx);
2462 return;
2463
2464 case TRX_STATE_ACTIVE:
2465 /* We have this start if not started idiom, therefore we
2466 can't add stronger checks here. */
2467 trx->ddl = true;
2468
2469 ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
2470 ut_ad(trx->will_lock > 0);
2471 return;
2472 case TRX_STATE_PREPARED:
2473 case TRX_STATE_COMMITTED_IN_MEMORY:
2474 break;
2475 }
2476
2477 ut_error;
2478 }
2479
2480