1 /*****************************************************************************
2
3 Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file trx/trx0trx.cc
29 The transaction
30
31 Created 3/26/1996 Heikki Tuuri
32 *******************************************************/
33
34 #include "btr0types.h"
35 #include "trx0trx.h"
36
37 #ifdef UNIV_NONINL
38 #include "trx0trx.ic"
39 #endif
40
41 #include "trx0undo.h"
42 #include "trx0rseg.h"
43 #include "log0log.h"
44 #include "que0que.h"
45 #include "lock0lock.h"
46 #include "trx0roll.h"
47 #include "usr0sess.h"
48 #include "read0read.h"
49 #include "srv0srv.h"
50 #include "srv0start.h"
51 #include "btr0sea.h"
52 #include "os0proc.h"
53 #include "trx0xa.h"
54 #include "trx0rec.h"
55 #include "trx0purge.h"
56 #include "ha_prototypes.h"
57 #include "srv0mon.h"
58 #include "ut0vec.h"
59
60 #include<set>
61
62 /** Set of table_id */
63 typedef std::set<table_id_t> table_id_set;
64
65 /** Dummy session used currently in MySQL interface */
66 UNIV_INTERN sess_t* trx_dummy_sess = NULL;
67
68 #ifdef UNIV_PFS_MUTEX
69 /* Key to register the mutex with performance schema */
70 UNIV_INTERN mysql_pfs_key_t trx_mutex_key;
71 /* Key to register the mutex with performance schema */
72 UNIV_INTERN mysql_pfs_key_t trx_undo_mutex_key;
73 #endif /* UNIV_PFS_MUTEX */
74
75 /*************************************************************//**
76 Set detailed error message for the transaction. */
77 UNIV_INTERN
78 void
trx_set_detailed_error(trx_t * trx,const char * msg)79 trx_set_detailed_error(
80 /*===================*/
81 trx_t* trx, /*!< in: transaction struct */
82 const char* msg) /*!< in: detailed error message */
83 {
84 ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
85 }
86
87 /*************************************************************//**
88 Set detailed error message for the transaction from a file. Note that the
89 file is rewinded before reading from it. */
90 UNIV_INTERN
91 void
trx_set_detailed_error_from_file(trx_t * trx,FILE * file)92 trx_set_detailed_error_from_file(
93 /*=============================*/
94 trx_t* trx, /*!< in: transaction struct */
95 FILE* file) /*!< in: file to read message from */
96 {
97 os_file_read_string(file, trx->detailed_error,
98 sizeof(trx->detailed_error));
99 }
100
101 /*************************************************************//**
102 Callback function for trx_find_descriptor() to compare trx IDs. */
103 UNIV_INTERN
104 int
trx_descr_cmp(const void * a,const void * b)105 trx_descr_cmp(
106 /*==========*/
107 const void *a, /*!< in: pointer to first comparison argument */
108 const void *b) /*!< in: pointer to second comparison argument */
109 {
110 const trx_id_t* da = (const trx_id_t*) a;
111 const trx_id_t* db = (const trx_id_t*) b;
112
113 if (*da < *db) {
114 return -1;
115 } else if (*da > *db) {
116 return 1;
117 }
118
119 return 0;
120 }
121
122 /*************************************************************//**
123 Reserve a slot for a given trx in the global descriptors array. */
124 UNIV_INLINE
125 void
trx_reserve_descriptor(const trx_t * trx)126 trx_reserve_descriptor(
127 /*===================*/
128 const trx_t* trx) /*!< in: trx pointer */
129 {
130 ulint n_used;
131 ulint n_max;
132 trx_id_t* descr;
133
134 ut_ad(mutex_own(&trx_sys->mutex) || srv_is_being_started);
135 ut_ad(srv_is_being_started ||
136 !trx_find_descriptor(trx_sys->descriptors,
137 trx_sys->descr_n_used,
138 trx->id));
139
140 n_used = trx_sys->descr_n_used + 1;
141 n_max = trx_sys->descr_n_max;
142
143 if (UNIV_UNLIKELY(n_used > n_max)) {
144
145 n_max = n_max * 2;
146
147 trx_sys->descriptors = static_cast<trx_id_t*>(
148 ut_realloc(trx_sys->descriptors,
149 n_max * sizeof(trx_id_t)));
150
151 trx_sys->descr_n_max = n_max;
152 srv_descriptors_memory = n_max * sizeof(trx_id_t);
153 }
154
155 descr = trx_sys->descriptors + n_used - 1;
156
157 if (UNIV_UNLIKELY(n_used > 1 && trx->id < descr[-1])) {
158
159 /* Find the slot where it should be inserted. We could use a
160 binary search, but in reality linear search should be faster,
161 because the slot we are looking for is near the array end. */
162
163 trx_id_t* tdescr;
164
165 for (tdescr = descr - 1;
166 tdescr >= trx_sys->descriptors && *tdescr > trx->id;
167 tdescr--) {
168 }
169
170 tdescr++;
171
172 ut_memmove(tdescr + 1, tdescr, (descr - tdescr) *
173 sizeof(trx_id_t));
174
175 descr = tdescr;
176 }
177
178 *descr = trx->id;
179
180 trx_sys->descr_n_used = n_used;
181 }
182
183 /*************************************************************//**
184 Release a slot for a given trx in the global descriptors array. */
185 UNIV_INTERN
186 void
trx_release_descriptor(trx_t * trx)187 trx_release_descriptor(
188 /*===================*/
189 trx_t* trx) /*!< in: trx pointer */
190 {
191 ulint size;
192 trx_id_t* descr;
193
194 ut_ad(mutex_own(&trx_sys->mutex));
195
196 if (UNIV_LIKELY(trx->in_trx_serial_list)) {
197
198 UT_LIST_REMOVE(trx_serial_list, trx_sys->trx_serial_list,
199 trx);
200 trx->in_trx_serial_list = false;
201 }
202
203 descr = trx_find_descriptor(trx_sys->descriptors,
204 trx_sys->descr_n_used,
205 trx->id);
206
207 if (UNIV_UNLIKELY(descr == NULL)) {
208
209 return;
210 }
211
212 size = (trx_sys->descriptors + trx_sys->descr_n_used - 1 - descr) *
213 sizeof(trx_id_t);
214
215 if (UNIV_LIKELY(size > 0)) {
216
217 ut_memmove(descr, descr + 1, size);
218 }
219
220 trx_sys->descr_n_used--;
221 }
222
223 /****************************************************************//**
224 Creates and initializes a transaction object. It must be explicitly
225 started with trx_start_if_not_started() before using it. The default
226 isolation level is TRX_ISO_REPEATABLE_READ.
227 @return transaction instance, should never be NULL */
228 static
229 trx_t*
trx_create(void)230 trx_create(void)
231 /*============*/
232 {
233 trx_t* trx;
234 mem_heap_t* heap;
235 ib_alloc_t* heap_alloc;
236
237 trx = static_cast<trx_t*>(mem_zalloc(sizeof(*trx)));
238
239 mutex_create(trx_mutex_key, &trx->mutex, SYNC_TRX);
240
241 trx->magic_n = TRX_MAGIC_N;
242
243 trx->state = TRX_STATE_NOT_STARTED;
244
245 trx->isolation_level = TRX_ISO_REPEATABLE_READ;
246
247 trx->no = TRX_ID_MAX;
248 trx->in_trx_serial_list = false;
249
250 trx->support_xa = TRUE;
251
252 trx->fake_changes = FALSE;
253
254 trx->check_foreigns = TRUE;
255 trx->check_unique_secondary = TRUE;
256
257 trx->dict_operation = TRX_DICT_OP_NONE;
258
259 trx->idle_start = 0;
260 trx->last_stmt_start = 0;
261
262 mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
263
264 trx->error_state = DB_SUCCESS;
265
266 trx->lock.que_state = TRX_QUE_RUNNING;
267
268 trx->lock.lock_heap = mem_heap_create_typed(
269 256, MEM_HEAP_FOR_LOCK_HEAP);
270
271 trx->search_latch_timeout = BTR_SEA_TIMEOUT;
272
273 trx->io_reads = 0;
274 trx->io_read = 0;
275 trx->io_reads_wait_timer = 0;
276 trx->lock_que_wait_timer = 0;
277 trx->innodb_que_wait_timer = 0;
278 trx->distinct_page_access = 0;
279 trx->distinct_page_access_hash = NULL;
280 trx->take_stats = FALSE;
281
282 trx->xid.formatID = -1;
283
284 trx->op_info = "";
285
286 trx->api_trx = false;
287
288 trx->api_auto_commit = false;
289
290 trx->read_write = true;
291
292 heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 8);
293 heap_alloc = ib_heap_allocator_create(heap);
294
295 /* Remember to free the vector explicitly in trx_free(). */
296 trx->autoinc_locks = ib_vector_create(heap_alloc, sizeof(void**), 4);
297
298 /* Remember to free the vector explicitly in trx_free(). */
299 heap = mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 128);
300 heap_alloc = ib_heap_allocator_create(heap);
301
302 trx->lock.table_locks = ib_vector_create(
303 heap_alloc, sizeof(void**), 32);
304
305 return(trx);
306 }
307
308 /********************************************************************//**
309 Creates a transaction object for background operations by the master thread.
310 @return own: transaction object */
311 UNIV_INTERN
312 trx_t*
trx_allocate_for_background(void)313 trx_allocate_for_background(void)
314 /*=============================*/
315 {
316 trx_t* trx;
317
318 trx = trx_create();
319
320 trx->sess = trx_dummy_sess;
321
322 return(trx);
323 }
324
325 /********************************************************************//**
326 Creates a transaction object for MySQL.
327 @return own: transaction object */
328 UNIV_INTERN
329 trx_t*
trx_allocate_for_mysql(void)330 trx_allocate_for_mysql(void)
331 /*========================*/
332 {
333 trx_t* trx;
334
335 trx = trx_allocate_for_background();
336
337 mutex_enter(&trx_sys->mutex);
338
339 ut_d(trx->in_mysql_trx_list = TRUE);
340 UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
341
342 mutex_exit(&trx_sys->mutex);
343
344 if (UNIV_UNLIKELY(trx->take_stats)) {
345 trx->distinct_page_access_hash
346 = static_cast<byte *>(mem_alloc(DPAH_SIZE));
347 memset(trx->distinct_page_access_hash, 0, DPAH_SIZE);
348 }
349
350 return(trx);
351 }
352
353 /********************************************************************//**
354 Frees a transaction object without releasing the corresponding descriptor.
355 Should be used by callers that already own trx_sys->mutex. */
356 static
357 void
trx_free_low(trx_t * trx)358 trx_free_low(
359 /*=========*/
360 trx_t* trx) /*!< in, own: trx object */
361 {
362 ut_a(trx->magic_n == TRX_MAGIC_N);
363 ut_ad(!trx->in_ro_trx_list);
364 ut_ad(!trx->in_rw_trx_list);
365 ut_ad(!trx->in_mysql_trx_list);
366
367 mutex_free(&trx->undo_mutex);
368
369 if (trx->undo_no_arr != NULL) {
370 trx_undo_arr_free(trx->undo_no_arr);
371 }
372
373 ut_a(trx->lock.wait_lock == NULL);
374 ut_a(trx->lock.wait_thr == NULL);
375
376 ut_a(!trx->has_search_latch);
377 #ifdef UNIV_SYNC_DEBUG
378 ut_ad(!btr_search_own_any());
379 #endif
380
381 ut_a(trx->dict_operation_lock_mode == 0);
382
383 if (trx->lock.lock_heap) {
384 mem_heap_free(trx->lock.lock_heap);
385 }
386
387 ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
388
389 ut_a(ib_vector_is_empty(trx->autoinc_locks));
390 /* We allocated a dedicated heap for the vector. */
391 ib_vector_free(trx->autoinc_locks);
392
393 if (trx->lock.table_locks != NULL) {
394 /* We allocated a dedicated heap for the vector. */
395 ib_vector_free(trx->lock.table_locks);
396 }
397
398 mutex_free(&trx->mutex);
399
400 read_view_free(trx->prebuilt_view);
401
402 mem_free(trx);
403 }
404
405 /********************************************************************//**
406 Frees a transaction object. */
407 static
408 void
trx_free(trx_t * trx)409 trx_free(
410 /*=========*/
411 trx_t* trx) /*!< in, own: trx object */
412 {
413 mutex_enter(&trx_sys->mutex);
414 trx_release_descriptor(trx);
415 mutex_exit(&trx_sys->mutex);
416
417 trx_free_low(trx);
418 }
419
420 /********************************************************************//**
421 Frees a transaction object of a background operation of the master thread. */
422 UNIV_INTERN
423 void
trx_free_for_background(trx_t * trx)424 trx_free_for_background(
425 /*====================*/
426 trx_t* trx) /*!< in, own: trx object */
427 {
428
429 if (trx->distinct_page_access_hash)
430 {
431 mem_free(trx->distinct_page_access_hash);
432 trx->distinct_page_access_hash= NULL;
433 }
434
435 if (trx->declared_to_be_inside_innodb) {
436
437 ib_logf(IB_LOG_LEVEL_ERROR,
438 "Freeing a trx (%p, " TRX_ID_FMT ") which is declared "
439 "to be processing inside InnoDB", trx, trx->id);
440
441 trx_print(stderr, trx, 600);
442 putc('\n', stderr);
443
444 /* This is an error but not a fatal error. We must keep
445 the counters like srv_conc_n_threads accurate. */
446 srv_conc_force_exit_innodb(trx);
447 }
448
449 if (trx->n_mysql_tables_in_use != 0
450 || trx->mysql_n_tables_locked != 0) {
451
452 ib_logf(IB_LOG_LEVEL_ERROR,
453 "MySQL is freeing a thd though "
454 "trx->n_mysql_tables_in_use is %lu and "
455 "trx->mysql_n_tables_locked is %lu.",
456 (ulong) trx->n_mysql_tables_in_use,
457 (ulong) trx->mysql_n_tables_locked);
458
459 trx_print(stderr, trx, 600);
460 ut_print_buf(stderr, trx, sizeof(trx_t));
461 putc('\n', stderr);
462 }
463
464 ut_a(trx->state == TRX_STATE_NOT_STARTED);
465 ut_a(trx->insert_undo == NULL);
466 ut_a(trx->update_undo == NULL);
467 ut_a(trx->read_view == NULL);
468
469 trx_free(trx);
470 }
471
472 /********************************************************************//**
473 At shutdown, frees a transaction object that is in the PREPARED state. */
474 UNIV_INTERN
475 void
trx_free_prepared(trx_t * trx)476 trx_free_prepared(
477 /*==============*/
478 trx_t* trx) /*!< in, own: trx object */
479 {
480 ut_a(trx_state_eq(trx, TRX_STATE_PREPARED));
481 ut_a(trx->magic_n == TRX_MAGIC_N);
482
483 lock_trx_release_locks(trx);
484 trx_undo_free_prepared(trx);
485
486 assert_trx_in_rw_list(trx);
487
488 ut_a(!trx->read_only);
489
490 UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
491 ut_d(trx->in_rw_trx_list = FALSE);
492
493 mutex_enter(&trx_sys->mutex);
494 trx_release_descriptor(trx);
495 mutex_exit(&trx_sys->mutex);
496
497 /* Undo trx_resurrect_table_locks(). */
498 UT_LIST_INIT(trx->lock.trx_locks);
499
500 trx_free_low(trx);
501
502 ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->rw_trx_list));
503 }
504
505 /********************************************************************//**
506 Frees a transaction object for MySQL. */
507 UNIV_INTERN
508 void
trx_free_for_mysql(trx_t * trx)509 trx_free_for_mysql(
510 /*===============*/
511 trx_t* trx) /*!< in, own: trx object */
512 {
513 if (trx->distinct_page_access_hash)
514 {
515 mem_free(trx->distinct_page_access_hash);
516 trx->distinct_page_access_hash= NULL;
517 }
518
519 mutex_enter(&trx_sys->mutex);
520
521 ut_ad(trx->in_mysql_trx_list);
522 ut_d(trx->in_mysql_trx_list = FALSE);
523 UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
524
525 ut_ad(trx_sys_validate_trx_list());
526
527 mutex_exit(&trx_sys->mutex);
528
529 trx_free_for_background(trx);
530 }
531
532 /****************************************************************//**
533 Inserts the trx handle in the trx system trx list in the right position.
534 The list is sorted on the trx id so that the biggest id is at the list
535 start. This function is used at the database startup to insert incomplete
536 transactions to the list. */
537 static
538 void
trx_list_rw_insert_ordered(trx_t * trx)539 trx_list_rw_insert_ordered(
540 /*=======================*/
541 trx_t* trx) /*!< in: trx handle */
542 {
543 trx_t* trx2;
544
545 ut_ad(!trx->read_only);
546
547 ut_d(trx->start_file = __FILE__);
548 ut_d(trx->start_line = __LINE__);
549
550 ut_a(srv_is_being_started);
551 ut_ad(!trx->in_ro_trx_list);
552 ut_ad(!trx->in_rw_trx_list);
553 ut_ad(trx->state != TRX_STATE_NOT_STARTED);
554 ut_ad(trx->is_recovered);
555
556 for (trx2 = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
557 trx2 != NULL;
558 trx2 = UT_LIST_GET_NEXT(trx_list, trx2)) {
559
560 assert_trx_in_rw_list(trx2);
561
562 if (trx->id >= trx2->id) {
563
564 ut_ad(trx->id > trx2->id);
565 break;
566 }
567 }
568
569 if (trx2 != NULL) {
570 trx2 = UT_LIST_GET_PREV(trx_list, trx2);
571
572 if (trx2 == NULL) {
573 UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
574 } else {
575 UT_LIST_INSERT_AFTER(
576 trx_list, trx_sys->rw_trx_list, trx2, trx);
577 }
578 } else {
579 UT_LIST_ADD_LAST(trx_list, trx_sys->rw_trx_list, trx);
580 }
581
582 #ifdef UNIV_DEBUG
583 if (trx->id > trx_sys->rw_max_trx_id) {
584 trx_sys->rw_max_trx_id = trx->id;
585 }
586 #endif /* UNIV_DEBUG */
587
588 ut_ad(!trx->in_rw_trx_list);
589 ut_d(trx->in_rw_trx_list = TRUE);
590 }
591
592 /****************************************************************//**
593 Resurrect the table locks for a resurrected transaction. */
594 static
595 void
trx_resurrect_table_locks(trx_t * trx,const trx_undo_t * undo)596 trx_resurrect_table_locks(
597 /*======================*/
598 trx_t* trx, /*!< in/out: transaction */
599 const trx_undo_t* undo) /*!< in: undo log */
600 {
601 mtr_t mtr;
602 page_t* undo_page;
603 trx_undo_rec_t* undo_rec;
604 table_id_set tables;
605
606 ut_ad(undo == trx->insert_undo || undo == trx->update_undo);
607
608 if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
609 || undo->empty) {
610 return;
611 }
612
613 mtr_start(&mtr);
614 /* trx_rseg_mem_create() may have acquired an X-latch on this
615 page, so we cannot acquire an S-latch. */
616 undo_page = trx_undo_page_get(
617 undo->space, undo->zip_size, undo->top_page_no, &mtr);
618 undo_rec = undo_page + undo->top_offset;
619
620 do {
621 ulint type;
622 ulint cmpl_info;
623 bool updated_extern;
624 undo_no_t undo_no;
625 table_id_t table_id;
626
627 page_t* undo_rec_page = page_align(undo_rec);
628
629 if (undo_rec_page != undo_page) {
630 if (!mtr_memo_release(&mtr,
631 buf_block_align(undo_page),
632 MTR_MEMO_PAGE_X_FIX)) {
633 /* The page of the previous undo_rec
634 should have been latched by
635 trx_undo_page_get() or
636 trx_undo_get_prev_rec(). */
637 ut_ad(0);
638 }
639
640 undo_page = undo_rec_page;
641 }
642
643 trx_undo_rec_get_pars(
644 undo_rec, &type, &cmpl_info,
645 &updated_extern, &undo_no, &table_id);
646 tables.insert(table_id);
647
648 undo_rec = trx_undo_get_prev_rec(
649 undo_rec, undo->hdr_page_no,
650 undo->hdr_offset, false, &mtr);
651 } while (undo_rec);
652
653 mtr_commit(&mtr);
654
655 for (table_id_set::const_iterator i = tables.begin();
656 i != tables.end(); i++) {
657 if (dict_table_t* table = dict_table_open_on_id(
658 *i, FALSE, DICT_TABLE_OP_LOAD_TABLESPACE)) {
659 if (table->ibd_file_missing
660 || dict_table_is_temporary(table)) {
661 mutex_enter(&dict_sys->mutex);
662 dict_table_close(table, TRUE, FALSE);
663 dict_table_remove_from_cache(table);
664 mutex_exit(&dict_sys->mutex);
665 continue;
666 }
667
668 lock_table_ix_resurrect(table, trx);
669
670 DBUG_PRINT("ib_trx",
671 ("resurrect" TRX_ID_FMT
672 " table '%s' IX lock from %s undo",
673 trx->id, table->name,
674 undo == trx->insert_undo
675 ? "insert" : "update"));
676
677 dict_table_close(table, FALSE, FALSE);
678 }
679 }
680 }
681
682 /****************************************************************//**
683 Resurrect the transactions that were doing inserts the time of the
684 crash, they need to be undone.
685 @return trx_t instance */
686 static
687 trx_t*
trx_resurrect_insert(trx_undo_t * undo,trx_rseg_t * rseg)688 trx_resurrect_insert(
689 /*=================*/
690 trx_undo_t* undo, /*!< in: entry to UNDO */
691 trx_rseg_t* rseg) /*!< in: rollback segment */
692 {
693 trx_t* trx;
694
695 trx = trx_allocate_for_background();
696
697 trx->rseg = rseg;
698 trx->xid = undo->xid;
699 trx->id = undo->trx_id;
700 trx->insert_undo = undo;
701 trx->is_recovered = TRUE;
702
703 /* This is single-threaded startup code, we do not need the
704 protection of trx->mutex or trx_sys->mutex here. */
705
706 if (undo->state != TRX_UNDO_ACTIVE) {
707
708 /* Prepared transactions are left in the prepared state
709 waiting for a commit or abort decision from MySQL */
710
711 if (undo->state == TRX_UNDO_PREPARED) {
712
713 fprintf(stderr,
714 "InnoDB: Transaction " TRX_ID_FMT " was in the"
715 " XA prepared state.\n", trx->id);
716
717 if (srv_force_recovery == 0) {
718
719 trx->state = TRX_STATE_PREPARED;
720 trx_sys->n_prepared_trx++;
721 trx_sys->n_prepared_recovered_trx++;
722 } else {
723 fprintf(stderr,
724 "InnoDB: Since innodb_force_recovery"
725 " > 0, we will rollback it anyway.\n");
726
727 trx->state = TRX_STATE_ACTIVE;
728 }
729 } else {
730 trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
731 }
732
733 /* We give a dummy value for the trx no; this should have no
734 relevance since purge is not interested in committed
735 transaction numbers, unless they are in the history
736 list, in which case it looks the number from the disk based
737 undo log structure */
738
739 trx->no = trx->id;
740 } else {
741 trx->state = TRX_STATE_ACTIVE;
742
743 /* A running transaction always has the number
744 field inited to TRX_ID_MAX */
745
746 trx->no = TRX_ID_MAX;
747 }
748
749 /* trx_start_low() is not called with resurrect, so need to initialize
750 start time here.*/
751 if (trx->state == TRX_STATE_ACTIVE
752 || trx->state == TRX_STATE_PREPARED) {
753 trx->start_time = ut_time();
754 }
755
756 if (undo->dict_operation) {
757 trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
758 trx->table_id = undo->table_id;
759 }
760
761 if (!undo->empty) {
762 trx->undo_no = undo->top_undo_no + 1;
763 }
764
765 return(trx);
766 }
767
768 /****************************************************************//**
769 Prepared transactions are left in the prepared state waiting for a
770 commit or abort decision from MySQL */
771 static
772 void
trx_resurrect_update_in_prepared_state(trx_t * trx,const trx_undo_t * undo)773 trx_resurrect_update_in_prepared_state(
774 /*===================================*/
775 trx_t* trx, /*!< in,out: transaction */
776 const trx_undo_t* undo) /*!< in: update UNDO record */
777 {
778 /* This is single-threaded startup code, we do not need the
779 protection of trx->mutex or trx_sys->mutex here. */
780
781 if (undo->state == TRX_UNDO_PREPARED) {
782 fprintf(stderr,
783 "InnoDB: Transaction " TRX_ID_FMT
784 " was in the XA prepared state.\n", trx->id);
785
786 if (srv_force_recovery == 0) {
787 if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
788 trx_sys->n_prepared_trx++;
789 trx_sys->n_prepared_recovered_trx++;
790 } else {
791 ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED));
792 }
793
794 trx->state = TRX_STATE_PREPARED;
795 } else {
796 fprintf(stderr,
797 "InnoDB: Since innodb_force_recovery"
798 " > 0, we will rollback it anyway.\n");
799
800 trx->state = TRX_STATE_ACTIVE;
801 }
802 } else {
803 trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
804 }
805 }
806
807 /****************************************************************//**
808 Resurrect the transactions that were doing updates the time of the
809 crash, they need to be undone. */
810 static
811 void
trx_resurrect_update(trx_t * trx,trx_undo_t * undo,trx_rseg_t * rseg)812 trx_resurrect_update(
813 /*=================*/
814 trx_t* trx, /*!< in/out: transaction */
815 trx_undo_t* undo, /*!< in/out: update UNDO record */
816 trx_rseg_t* rseg) /*!< in/out: rollback segment */
817 {
818 trx->rseg = rseg;
819 trx->xid = undo->xid;
820 trx->id = undo->trx_id;
821 trx->update_undo = undo;
822 trx->is_recovered = TRUE;
823
824 /* This is single-threaded startup code, we do not need the
825 protection of trx->mutex or trx_sys->mutex here. */
826
827 if (undo->state != TRX_UNDO_ACTIVE) {
828 trx_resurrect_update_in_prepared_state(trx, undo);
829
830 /* We give a dummy value for the trx number */
831
832 trx->no = trx->id;
833
834 } else {
835 trx->state = TRX_STATE_ACTIVE;
836
837 /* A running transaction always has the number field inited to
838 TRX_ID_MAX */
839
840 trx->no = TRX_ID_MAX;
841 }
842
843 /* trx_start_low() is not called with resurrect, so need to initialize
844 start time here.*/
845 if (trx->state == TRX_STATE_ACTIVE
846 || trx->state == TRX_STATE_PREPARED) {
847 trx->start_time = ut_time();
848 }
849
850 if (undo->dict_operation) {
851 trx_set_dict_operation(trx, TRX_DICT_OP_TABLE);
852 trx->table_id = undo->table_id;
853 }
854
855 if (!undo->empty && undo->top_undo_no >= trx->undo_no) {
856
857 trx->undo_no = undo->top_undo_no + 1;
858 }
859 }
860
861 /****************************************************************//**
862 Creates trx objects for transactions and initializes the trx list of
863 trx_sys at database start. Rollback segment and undo log lists must
864 already exist when this function is called, because the lists of
865 transactions to be rolled back or cleaned up are built based on the
866 undo log lists. */
867 UNIV_INTERN
868 void
trx_lists_init_at_db_start(void)869 trx_lists_init_at_db_start(void)
870 /*============================*/
871 {
872 ulint i;
873
874 ut_a(srv_is_being_started);
875
876 UT_LIST_INIT(trx_sys->ro_trx_list);
877 UT_LIST_INIT(trx_sys->rw_trx_list);
878 UT_LIST_INIT(trx_sys->trx_serial_list);
879
880 /* Look from the rollback segments if there exist undo logs for
881 transactions */
882
883 for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
884 trx_undo_t* undo;
885 trx_rseg_t* rseg;
886
887 rseg = trx_sys->rseg_array[i];
888
889 if (rseg == NULL) {
890 continue;
891 }
892
893 /* Resurrect transactions that were doing inserts. */
894 for (undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
895 undo != NULL;
896 undo = UT_LIST_GET_NEXT(undo_list, undo)) {
897 trx_t* trx;
898
899 trx = trx_resurrect_insert(undo, rseg);
900
901 if (trx->state == TRX_STATE_ACTIVE ||
902 trx->state == TRX_STATE_PREPARED) {
903
904 trx_reserve_descriptor(trx);
905 }
906 trx_list_rw_insert_ordered(trx);
907
908 trx_resurrect_table_locks(trx, undo);
909 }
910
911 /* Ressurrect transactions that were doing updates. */
912 for (undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
913 undo != NULL;
914 undo = UT_LIST_GET_NEXT(undo_list, undo)) {
915 trx_t* trx;
916 ibool trx_created;
917
918 /* Check the trx_sys->rw_trx_list first. */
919 mutex_enter(&trx_sys->mutex);
920 trx = trx_get_rw_trx_by_id(undo->trx_id);
921 mutex_exit(&trx_sys->mutex);
922
923 if (trx == NULL) {
924 trx = trx_allocate_for_background();
925 trx_created = TRUE;
926 } else {
927 trx_created = FALSE;
928 }
929
930 trx_resurrect_update(trx, undo, rseg);
931
932 if (trx_created) {
933 if (trx->state == TRX_STATE_ACTIVE ||
934 trx->state == TRX_STATE_PREPARED) {
935
936 trx_reserve_descriptor(trx);
937 }
938 trx_list_rw_insert_ordered(trx);
939 }
940
941 trx_resurrect_table_locks(trx, undo);
942 }
943 }
944 }
945
946 /******************************************************************//**
947 Assigns a rollback segment to a transaction in a round-robin fashion.
948 @return assigned rollback segment instance */
949 static
950 trx_rseg_t*
trx_assign_rseg_low(ulong max_undo_logs,ulint n_tablespaces)951 trx_assign_rseg_low(
952 /*================*/
953 ulong max_undo_logs, /*!< in: maximum number of UNDO logs to use */
954 ulint n_tablespaces) /*!< in: number of rollback tablespaces */
955 {
956 ulint i;
957 trx_rseg_t* rseg;
958 static ulint latest_rseg = 0;
959
960 if (srv_read_only_mode) {
961 ut_a(max_undo_logs == ULONG_UNDEFINED);
962 return(NULL);
963 }
964
965 /* This breaks true round robin but that should be OK. */
966
967 ut_a(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS);
968
969 i = latest_rseg++;
970 i %= max_undo_logs;
971
972 /* Note: The assumption here is that there can't be any gaps in
973 the array. Once we implement more flexible rollback segment
974 management this may not hold. The assertion checks for that case. */
975
976 if (trx_sys->rseg_array[0] == NULL) {
977 return(NULL);
978 }
979
980 /* Skip the system tablespace if we have more than one tablespace
981 defined for rollback segments. We want all UNDO records to be in
982 the non-system tablespaces. */
983
984 do {
985 rseg = trx_sys->rseg_array[i];
986 ut_a(rseg == NULL || i == rseg->id);
987
988 i = (rseg == NULL) ? 0 : i + 1;
989
990 } while (rseg == NULL
991 || (rseg->space == 0
992 && n_tablespaces > 0
993 && trx_sys->rseg_array[1] != NULL));
994
995 return(rseg);
996 }
997
998 /****************************************************************//**
999 Assign a read-only transaction a rollback-segment, if it is attempting
1000 to write to a TEMPORARY table. */
1001 UNIV_INTERN
1002 void
trx_assign_rseg(trx_t * trx)1003 trx_assign_rseg(
1004 /*============*/
1005 trx_t* trx) /*!< A read-only transaction that
1006 needs to be assigned a RBS. */
1007 {
1008 ut_a(trx->rseg == 0);
1009 ut_a(trx->read_only);
1010 ut_a(!srv_read_only_mode);
1011 ut_a(!trx_is_autocommit_non_locking(trx));
1012
1013 trx->rseg = trx_assign_rseg_low(srv_undo_logs, srv_undo_tablespaces);
1014 }
1015
1016 /****************************************************************//**
1017 Starts a transaction. */
1018 static
1019 void
trx_start_low(trx_t * trx)1020 trx_start_low(
1021 /*==========*/
1022 trx_t* trx) /*!< in: transaction */
1023 {
1024 ut_ad(trx->rseg == NULL);
1025
1026 ut_ad(trx->start_file != 0);
1027 ut_ad(trx->start_line != 0);
1028 ut_ad(!trx->is_recovered);
1029 ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED));
1030 ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1031
1032 /* Check whether it is an AUTOCOMMIT SELECT */
1033 trx->auto_commit = (trx->api_trx && trx->api_auto_commit)
1034 || thd_trx_is_auto_commit(trx->mysql_thd);
1035
1036 trx->read_only =
1037 (trx->api_trx && !trx->read_write)
1038 || (!trx->ddl && thd_trx_is_read_only(trx->mysql_thd))
1039 || srv_read_only_mode;
1040
1041 if (!trx->auto_commit) {
1042 ++trx->will_lock;
1043 } else if (trx->will_lock == 0) {
1044 trx->read_only = TRUE;
1045 }
1046
1047 if (!trx->read_only) {
1048 trx->rseg = trx_assign_rseg_low(
1049 srv_undo_logs, srv_undo_tablespaces);
1050 }
1051
1052 /* The initial value for trx->no: TRX_ID_MAX is used in
1053 read_view_open_now: */
1054
1055 trx->no = TRX_ID_MAX;
1056
1057 ut_a(ib_vector_is_empty(trx->autoinc_locks));
1058 ut_a(ib_vector_is_empty(trx->lock.table_locks));
1059
1060 mutex_enter(&trx_sys->mutex);
1061
1062 /* If this transaction came from trx_allocate_for_mysql(),
1063 trx->in_mysql_trx_list would hold. In that case, the trx->state
1064 change must be protected by the trx_sys->mutex, so that
1065 lock_print_info_all_transactions() will have a consistent view. */
1066
1067 trx->state = TRX_STATE_ACTIVE;
1068
1069 trx->id = trx_sys_get_new_trx_id();
1070
1071 /* Cache the state of fake_changes that transaction will use for
1072 lifetime. Any change in session/global fake_changes configuration during
1073 lifetime of transaction will not be honored by already started
1074 transaction. */
1075 trx->fake_changes = thd_fake_changes(trx->mysql_thd);
1076
1077 ut_ad(!trx->in_rw_trx_list);
1078 ut_ad(!trx->in_ro_trx_list);
1079
1080 if (trx->read_only) {
1081
1082 /* Note: The trx_sys_t::ro_trx_list doesn't really need to
1083 be ordered, we should exploit this using a list type that
1084 doesn't need a list wide lock to increase concurrency. */
1085
1086 if (!trx_is_autocommit_non_locking(trx)) {
1087 UT_LIST_ADD_FIRST(trx_list, trx_sys->ro_trx_list, trx);
1088 ut_d(trx->in_ro_trx_list = TRUE);
1089 }
1090 } else {
1091
1092 ut_ad(trx->rseg != NULL
1093 || srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO);
1094
1095 ut_ad(!trx_is_autocommit_non_locking(trx));
1096 UT_LIST_ADD_FIRST(trx_list, trx_sys->rw_trx_list, trx);
1097 ut_d(trx->in_rw_trx_list = TRUE);
1098
1099 #ifdef UNIV_DEBUG
1100 if (trx->id > trx_sys->rw_max_trx_id) {
1101 trx_sys->rw_max_trx_id = trx->id;
1102 }
1103 #endif /* UNIV_DEBUG */
1104
1105 trx_reserve_descriptor(trx);
1106 }
1107
1108 ut_ad(trx_sys_validate_trx_list());
1109
1110 mutex_exit(&trx_sys->mutex);
1111
1112 trx->start_time = ut_time();
1113
1114 MONITOR_INC(MONITOR_TRX_ACTIVE);
1115 }
1116
1117 /****************************************************************//**
1118 Set the transaction serialisation number. */
1119 static
1120 void
trx_serialisation_number_get(trx_t * trx)1121 trx_serialisation_number_get(
1122 /*=========================*/
1123 trx_t* trx) /*!< in: transaction */
1124 {
1125 trx_rseg_t* rseg;
1126
1127 rseg = trx->rseg;
1128
1129 ut_ad(mutex_own(&rseg->mutex));
1130
1131 mutex_enter(&trx_sys->mutex);
1132
1133 trx->no = trx_sys_get_new_trx_id();
1134
1135 if (UNIV_LIKELY(!trx->in_trx_serial_list)) {
1136
1137 UT_LIST_ADD_LAST(trx_serial_list, trx_sys->trx_serial_list,
1138 trx);
1139
1140 trx->in_trx_serial_list = true;
1141 }
1142
1143 /* If the rollack segment is not empty then the
1144 new trx_t::no can't be less than any trx_t::no
1145 already in the rollback segment. User threads only
1146 produce events when a rollback segment is empty. */
1147
1148 if (rseg->last_page_no == FIL_NULL) {
1149 void* ptr;
1150 rseg_queue_t rseg_queue;
1151
1152 rseg_queue.rseg = rseg;
1153 rseg_queue.trx_no = trx->no;
1154
1155 mutex_enter(&purge_sys->bh_mutex);
1156
1157 /* This is to reduce the pressure on the trx_sys_t::mutex
1158 though in reality it should make very little (read no)
1159 difference because this code path is only taken when the
1160 rbs is empty. */
1161
1162 mutex_exit(&trx_sys->mutex);
1163
1164 ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
1165 ut_a(ptr);
1166
1167 mutex_exit(&purge_sys->bh_mutex);
1168 } else {
1169 mutex_exit(&trx_sys->mutex);
1170 }
1171 }
1172
1173 /****************************************************************//**
1174 Assign the transaction its history serialisation number and write the
1175 update UNDO log record to the assigned rollback segment. */
1176 static MY_ATTRIBUTE((nonnull))
1177 void
trx_write_serialisation_history(trx_t * trx,mtr_t * mtr)1178 trx_write_serialisation_history(
1179 /*============================*/
1180 trx_t* trx, /*!< in/out: transaction */
1181 mtr_t* mtr) /*!< in/out: mini-transaction */
1182 {
1183 trx_rseg_t* rseg;
1184
1185 rseg = trx->rseg;
1186
1187 /* Change the undo log segment states from TRX_UNDO_ACTIVE
1188 to some other state: these modifications to the file data
1189 structure define the transaction as committed in the file
1190 based domain, at the serialization point of the log sequence
1191 number lsn obtained below. */
1192
1193 if (trx->update_undo != NULL) {
1194 page_t* undo_hdr_page;
1195 trx_undo_t* undo = trx->update_undo;
1196
1197 /* We have to hold the rseg mutex because update
1198 log headers have to be put to the history list in the
1199 (serialisation) order of the UNDO trx number. This is
1200 required for the purge in-memory data structures too. */
1201
1202 mutex_enter(&rseg->mutex);
1203
1204 /* Assign the transaction serialisation number and also
1205 update the purge min binary heap if this is the first
1206 UNDO log being written to the assigned rollback segment. */
1207
1208 trx_serialisation_number_get(trx);
1209
1210 /* It is not necessary to obtain trx->undo_mutex here
1211 because only a single OS thread is allowed to do the
1212 transaction commit for this transaction. */
1213
1214 undo_hdr_page = trx_undo_set_state_at_finish(undo, mtr);
1215
1216 trx_undo_update_cleanup(trx, undo_hdr_page, mtr);
1217 } else {
1218 mutex_enter(&rseg->mutex);
1219 }
1220
1221 if (trx->insert_undo != NULL) {
1222 trx_undo_set_state_at_finish(trx->insert_undo, mtr);
1223 }
1224
1225 mutex_exit(&rseg->mutex);
1226
1227 MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
1228
1229 /* Update the latest MySQL binlog name and offset info
1230 in trx sys header if MySQL binlogging is on or the database
1231 server is a MySQL replication slave */
1232
1233 if (trx->mysql_log_file_name
1234 && trx->mysql_log_file_name[0] != '\0') {
1235
1236 trx_sys_update_mysql_binlog_offset(
1237 trx->mysql_log_file_name,
1238 trx->mysql_log_offset,
1239 TRX_SYS_MYSQL_LOG_INFO, mtr);
1240
1241 trx->mysql_log_file_name = NULL;
1242 }
1243 }
1244
1245 /********************************************************************
1246 Finalize a transaction containing updates for a FTS table. */
1247 static MY_ATTRIBUTE((nonnull))
1248 void
trx_finalize_for_fts_table(fts_trx_table_t * ftt)1249 trx_finalize_for_fts_table(
1250 /*=======================*/
1251 fts_trx_table_t* ftt) /* in: FTS trx table */
1252 {
1253 fts_t* fts = ftt->table->fts;
1254 fts_doc_ids_t* doc_ids = ftt->added_doc_ids;
1255
1256 mutex_enter(&fts->bg_threads_mutex);
1257
1258 if (fts->fts_status & BG_THREAD_STOP) {
1259 /* The table is about to be dropped, no use
1260 adding anything to its work queue. */
1261
1262 mutex_exit(&fts->bg_threads_mutex);
1263 } else {
1264 mem_heap_t* heap;
1265 mutex_exit(&fts->bg_threads_mutex);
1266
1267 ut_a(fts->add_wq);
1268
1269 heap = static_cast<mem_heap_t*>(doc_ids->self_heap->arg);
1270
1271 ib_wqueue_add(fts->add_wq, doc_ids, heap);
1272
1273 /* fts_trx_table_t no longer owns the list. */
1274 ftt->added_doc_ids = NULL;
1275 }
1276 }
1277
1278 /******************************************************************//**
1279 Finalize a transaction containing updates to FTS tables. */
1280 static MY_ATTRIBUTE((nonnull))
1281 void
trx_finalize_for_fts(trx_t * trx,bool is_commit)1282 trx_finalize_for_fts(
1283 /*=================*/
1284 trx_t* trx, /*!< in/out: transaction */
1285 bool is_commit) /*!< in: true if the transaction was
1286 committed, false if it was rolled back. */
1287 {
1288 if (is_commit) {
1289 const ib_rbt_node_t* node;
1290 ib_rbt_t* tables;
1291 fts_savepoint_t* savepoint;
1292
1293 savepoint = static_cast<fts_savepoint_t*>(
1294 ib_vector_last(trx->fts_trx->savepoints));
1295
1296 tables = savepoint->tables;
1297
1298 for (node = rbt_first(tables);
1299 node;
1300 node = rbt_next(tables, node)) {
1301 fts_trx_table_t** ftt;
1302
1303 ftt = rbt_value(fts_trx_table_t*, node);
1304
1305 if ((*ftt)->added_doc_ids) {
1306 trx_finalize_for_fts_table(*ftt);
1307 }
1308 }
1309 }
1310
1311 fts_trx_free(trx->fts_trx);
1312 trx->fts_trx = NULL;
1313 }
1314
1315 /**********************************************************************//**
1316 If required, flushes the log to disk based on the value of
1317 innodb_flush_log_at_trx_commit. */
1318 static
1319 void
trx_flush_log_if_needed_low(lsn_t lsn,trx_t * trx)1320 trx_flush_log_if_needed_low(
1321 /*========================*/
1322 lsn_t lsn, /*!< in: lsn up to which logs are to be
1323 flushed. */
1324 trx_t* trx) /*!< in: transaction */
1325 {
1326 ulint flush_log_at_trx_commit;
1327
1328 flush_log_at_trx_commit = srv_use_global_flush_log_at_trx_commit
1329 ? thd_flush_log_at_trx_commit(NULL)
1330 : thd_flush_log_at_trx_commit(trx->mysql_thd);
1331
1332 switch (flush_log_at_trx_commit) {
1333 case 0:
1334 /* Do nothing */
1335 break;
1336 case 1:
1337 /* Write the log and optionally flush it to disk */
1338 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
1339 srv_unix_file_flush_method != SRV_UNIX_NOSYNC);
1340 break;
1341 case 2:
1342 /* Write the log but do not flush it to disk */
1343 log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
1344
1345 break;
1346 default:
1347 ut_error;
1348 }
1349 }
1350
1351 /**********************************************************************//**
1352 If required, flushes the log to disk based on the value of
1353 innodb_flush_log_at_trx_commit. */
1354 static MY_ATTRIBUTE((nonnull))
1355 void
trx_flush_log_if_needed(lsn_t lsn,trx_t * trx)1356 trx_flush_log_if_needed(
1357 /*====================*/
1358 lsn_t lsn, /*!< in: lsn up to which logs are to be
1359 flushed. */
1360 trx_t* trx) /*!< in/out: transaction */
1361 {
1362 trx->op_info = "flushing log";
1363 trx_flush_log_if_needed_low(lsn, trx);
1364 trx->op_info = "";
1365 }
1366
1367 /****************************************************************//**
1368 Commits a transaction in memory. */
1369 static MY_ATTRIBUTE((nonnull))
1370 void
trx_commit_in_memory(trx_t * trx,lsn_t lsn)1371 trx_commit_in_memory(
1372 /*=================*/
1373 trx_t* trx, /*!< in/out: transaction */
1374 lsn_t lsn) /*!< in: log sequence number of the mini-transaction
1375 commit of trx_write_serialisation_history(), or 0
1376 if the transaction did not modify anything */
1377 {
1378 trx->must_flush_log_later = FALSE;
1379
1380 if (trx_is_autocommit_non_locking(trx)) {
1381 ut_ad(trx->read_only);
1382 ut_a(!trx->is_recovered);
1383 ut_ad(trx->rseg == NULL);
1384 ut_ad(!trx->in_ro_trx_list);
1385 ut_ad(!trx->in_rw_trx_list);
1386
1387 /* Note: We are asserting without holding the lock mutex. But
1388 that is OK because this transaction is not waiting and cannot
1389 be rolled back and no new locks can (or should not) be added
1390 becuase it is flagged as a non-locking read-only transaction. */
1391
1392 ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1393
1394 /* This state change is not protected by any mutex, therefore
1395 there is an inherent race here around state transition during
1396 printouts. We ignore this race for the sake of efficiency.
1397 However, the trx_sys_t::mutex will protect the trx_t instance
1398 and it cannot be removed from the mysql_trx_list and freed
1399 without first acquiring the trx_sys_t::mutex. */
1400
1401 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
1402
1403 trx->state = TRX_STATE_NOT_STARTED;
1404
1405 read_view_remove(trx->global_read_view, false);
1406
1407 MONITOR_INC(MONITOR_TRX_NL_RO_COMMIT);
1408 } else {
1409 lock_trx_release_locks(trx);
1410
1411 /* Remove the transaction from the list of active
1412 transactions now that it no longer holds any user locks. */
1413
1414 ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
1415
1416 mutex_enter(&trx_sys->mutex);
1417
1418 assert_trx_in_list(trx);
1419
1420 if (trx->read_only) {
1421 UT_LIST_REMOVE(trx_list, trx_sys->ro_trx_list, trx);
1422 ut_d(trx->in_ro_trx_list = FALSE);
1423 MONITOR_INC(MONITOR_TRX_RO_COMMIT);
1424 } else {
1425 UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
1426 ut_d(trx->in_rw_trx_list = FALSE);
1427 ut_ad(trx_sys->descr_n_used <=
1428 UT_LIST_GET_LEN(trx_sys->rw_trx_list));
1429 MONITOR_INC(MONITOR_TRX_RW_COMMIT);
1430 }
1431
1432 /* If this transaction came from trx_allocate_for_mysql(),
1433 trx->in_mysql_trx_list would hold. In that case, the
1434 trx->state change must be protected by trx_sys->mutex, so that
1435 lock_print_info_all_transactions() will have a consistent
1436 view. */
1437
1438 trx->state = TRX_STATE_NOT_STARTED;
1439
1440 /* We already own the trx_sys_t::mutex, by doing it here we
1441 avoid a potential context switch later. */
1442 read_view_remove(trx->global_read_view, true);
1443
1444 ut_ad(trx_sys_validate_trx_list());
1445
1446 mutex_exit(&trx_sys->mutex);
1447 }
1448
1449 if (trx->global_read_view != NULL) {
1450
1451 trx->global_read_view = NULL;
1452 }
1453
1454 trx->read_view = NULL;
1455
1456 if (lsn) {
1457 ulint flush_log_at_trx_commit;
1458
1459 if (trx->insert_undo != NULL) {
1460
1461 trx_undo_insert_cleanup(trx);
1462 }
1463
1464 if (srv_use_global_flush_log_at_trx_commit) {
1465 flush_log_at_trx_commit = thd_flush_log_at_trx_commit(NULL);
1466 } else {
1467 flush_log_at_trx_commit = thd_flush_log_at_trx_commit(trx->mysql_thd);
1468 }
1469
1470 /* NOTE that we could possibly make a group commit more
1471 efficient here: call os_thread_yield here to allow also other
1472 trxs to come to commit! */
1473
1474 /*-------------------------------------*/
1475
1476 /* Depending on the my.cnf options, we may now write the log
1477 buffer to the log files, making the transaction durable if
1478 the OS does not crash. We may also flush the log files to
1479 disk, making the transaction durable also at an OS crash or a
1480 power outage.
1481
1482 The idea in InnoDB's group commit is that a group of
1483 transactions gather behind a trx doing a physical disk write
1484 to log files, and when that physical write has been completed,
1485 one of those transactions does a write which commits the whole
1486 group. Note that this group commit will only bring benefit if
1487 there are > 2 users in the database. Then at least 2 users can
1488 gather behind one doing the physical log write to disk.
1489
1490 If we are calling trx_commit() under prepare_commit_mutex, we
1491 will delay possible log write and flush to a separate function
1492 trx_commit_complete_for_mysql(), which is only called when the
1493 thread has released the mutex. This is to make the
1494 group commit algorithm to work. Otherwise, the prepare_commit
1495 mutex would serialize all commits and prevent a group of
1496 transactions from gathering. */
1497
1498 if (trx->flush_log_later) {
1499 /* Do nothing yet */
1500 trx->must_flush_log_later = TRUE;
1501 } else if (flush_log_at_trx_commit == 0
1502 || thd_requested_durability(trx->mysql_thd)
1503 == HA_IGNORE_DURABILITY) {
1504 /* Do nothing */
1505 } else {
1506 trx_flush_log_if_needed(lsn, trx);
1507 }
1508
1509 trx->commit_lsn = lsn;
1510
1511 /* Tell server some activity has happened, since the trx
1512 does changes something. Background utility threads like
1513 master thread, purge thread or page_cleaner thread might
1514 have some work to do. */
1515 srv_active_wake_master_thread();
1516 }
1517
1518 /* undo_no is non-zero if we're doing the final commit. */
1519 bool not_rollback = trx->undo_no != 0;
1520 /* Free all savepoints, starting from the first. */
1521 trx_named_savept_t* savep = UT_LIST_GET_FIRST(trx->trx_savepoints);
1522 trx_roll_savepoints_free(trx, savep);
1523
1524 trx->rseg = NULL;
1525 trx->undo_no = 0;
1526 trx->last_sql_stat_start.least_undo_no = 0;
1527
1528 trx->ddl = false;
1529 #ifdef UNIV_DEBUG
1530 ut_ad(trx->start_file != 0);
1531 ut_ad(trx->start_line != 0);
1532 trx->start_file = 0;
1533 trx->start_line = 0;
1534 #endif /* UNIV_DEBUG */
1535
1536 trx->will_lock = 0;
1537 trx->read_only = FALSE;
1538 trx->auto_commit = FALSE;
1539
1540 if (trx->fts_trx) {
1541 trx_finalize_for_fts(trx, not_rollback);
1542 }
1543
1544 ut_ad(trx->lock.wait_thr == NULL);
1545 ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
1546 ut_ad(!trx->in_ro_trx_list);
1547 ut_ad(!trx->in_rw_trx_list);
1548
1549 trx->dict_operation = TRX_DICT_OP_NONE;
1550
1551 trx->error_state = DB_SUCCESS;
1552
1553 /* trx->in_mysql_trx_list would hold between
1554 trx_allocate_for_mysql() and trx_free_for_mysql(). It does not
1555 hold for recovered transactions or system transactions. */
1556 }
1557
1558 /****************************************************************//**
1559 Commits a transaction and a mini-transaction. */
1560 UNIV_INTERN
1561 void
trx_commit_low(trx_t * trx,mtr_t * mtr)1562 trx_commit_low(
1563 /*===========*/
1564 trx_t* trx, /*!< in/out: transaction */
1565 mtr_t* mtr) /*!< in/out: mini-transaction (will be committed),
1566 or NULL if trx made no modifications */
1567 {
1568 lsn_t lsn;
1569
1570 assert_trx_nonlocking_or_in_list(trx);
1571 ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
1572 ut_ad(!mtr || mtr->state == MTR_ACTIVE);
1573 ut_ad(!mtr == !(trx->insert_undo || trx->update_undo));
1574
1575 /* undo_no is non-zero if we're doing the final commit. */
1576 if (trx->fts_trx && trx->undo_no != 0) {
1577 dberr_t error;
1578
1579 ut_a(!trx_is_autocommit_non_locking(trx));
1580
1581 error = fts_commit(trx);
1582
1583 /* FTS-FIXME: Temporarily tolerate DB_DUPLICATE_KEY
1584 instead of dying. This is a possible scenario if there
1585 is a crash between insert to DELETED table committing
1586 and transaction committing. The fix would be able to
1587 return error from this function */
1588 if (error != DB_SUCCESS && error != DB_DUPLICATE_KEY) {
1589 /* FTS-FIXME: once we can return values from this
1590 function, we should do so and signal an error
1591 instead of just dying. */
1592
1593 ut_error;
1594 }
1595 }
1596
1597 if (mtr) {
1598 trx_write_serialisation_history(trx, mtr);
1599 /* The following call commits the mini-transaction, making the
1600 whole transaction committed in the file-based world, at this
1601 log sequence number. The transaction becomes 'durable' when
1602 we write the log to disk, but in the logical sense the commit
1603 in the file-based data structures (undo logs etc.) happens
1604 here.
1605
1606 NOTE that transaction numbers, which are assigned only to
1607 transactions with an update undo log, do not necessarily come
1608 in exactly the same order as commit lsn's, if the transactions
1609 have different rollback segments. To get exactly the same
1610 order we should hold the kernel mutex up to this point,
1611 adding to the contention of the kernel mutex. However, if
1612 a transaction T2 is able to see modifications made by
1613 a transaction T1, T2 will always get a bigger transaction
1614 number and a bigger commit lsn than T1. */
1615
1616 /*--------------*/
1617 mtr_commit(mtr);
1618 /*--------------*/
1619 lsn = mtr->end_lsn;
1620 } else {
1621 lsn = 0;
1622 }
1623
1624 trx_commit_in_memory(trx, lsn);
1625 }
1626
1627 /****************************************************************//**
1628 Commits a transaction. */
1629 UNIV_INTERN
1630 void
trx_commit(trx_t * trx)1631 trx_commit(
1632 /*=======*/
1633 trx_t* trx) /*!< in/out: transaction */
1634 {
1635 mtr_t local_mtr;
1636 mtr_t* mtr;
1637
1638 if (trx->insert_undo || trx->update_undo) {
1639 mtr = &local_mtr;
1640 mtr_start(mtr);
1641 } else {
1642 mtr = NULL;
1643 }
1644
1645 trx_commit_low(trx, mtr);
1646 }
1647
1648 /****************************************************************//**
1649 Cleans up a transaction at database startup. The cleanup is needed if
1650 the transaction already got to the middle of a commit when the database
1651 crashed, and we cannot roll it back. */
1652 UNIV_INTERN
1653 void
trx_cleanup_at_db_startup(trx_t * trx)1654 trx_cleanup_at_db_startup(
1655 /*======================*/
1656 trx_t* trx) /*!< in: transaction */
1657 {
1658 ut_ad(trx->is_recovered);
1659
1660 if (trx->insert_undo != NULL) {
1661
1662 trx_undo_insert_cleanup(trx);
1663 }
1664
1665 trx->rseg = NULL;
1666 trx->undo_no = 0;
1667 trx->last_sql_stat_start.least_undo_no = 0;
1668
1669 mutex_enter(&trx_sys->mutex);
1670
1671 ut_a(!trx->read_only);
1672
1673 UT_LIST_REMOVE(trx_list, trx_sys->rw_trx_list, trx);
1674 ut_ad(trx_sys->descr_n_used <= UT_LIST_GET_LEN(trx_sys->rw_trx_list));
1675
1676 assert_trx_in_rw_list(trx);
1677 ut_d(trx->in_rw_trx_list = FALSE);
1678
1679 trx->state = TRX_STATE_NOT_STARTED;
1680 trx_release_descriptor(trx);
1681
1682 mutex_exit(&trx_sys->mutex);
1683
1684 /* Change the transaction state without mutex protection, now
1685 that it no longer is in the trx_list. Recovered transactions
1686 are never placed in the mysql_trx_list. */
1687 ut_ad(trx->is_recovered);
1688 ut_ad(!trx->in_ro_trx_list);
1689 ut_ad(!trx->in_rw_trx_list);
1690 ut_ad(!trx->in_mysql_trx_list);
1691 }
1692
1693 /********************************************************************//**
1694 Assigns a read view for a consistent read query. All the consistent reads
1695 within the same transaction will get the same read view, which is created
1696 when this function is first called for a new started transaction.
1697 @return consistent read view */
1698 UNIV_INTERN
1699 read_view_t*
trx_assign_read_view(trx_t * trx)1700 trx_assign_read_view(
1701 /*=================*/
1702 trx_t* trx) /*!< in: active transaction */
1703 {
1704 ut_ad(trx->state == TRX_STATE_ACTIVE);
1705
1706 if (trx->read_view != NULL) {
1707 return(trx->read_view);
1708 }
1709
1710 trx->read_view = read_view_open_now(trx->id, trx->prebuilt_view);
1711 trx->global_read_view = trx->read_view;
1712
1713 return(trx->read_view);
1714 }
1715
1716 /********************************************************************//**
1717 Clones the read view from another transaction. All consistent reads within
1718 the receiver transaction will get the same read view as the donor transaction
1719 @return read view clone */
1720 UNIV_INTERN
1721 read_view_t*
trx_clone_read_view(trx_t * trx,trx_t * from_trx)1722 trx_clone_read_view(
1723 /*================*/
1724 trx_t* trx, /*!< in: receiver transaction */
1725 trx_t* from_trx) /*!< in: donor transaction */
1726 {
1727 ut_ad(lock_mutex_own());
1728 ut_ad(mutex_own(&trx_sys->mutex));
1729 ut_ad(trx_mutex_own(from_trx));
1730 ut_ad(trx->read_view == NULL);
1731
1732 if (from_trx->state != TRX_STATE_ACTIVE ||
1733 from_trx->read_view == NULL) {
1734
1735 return(NULL);
1736 }
1737
1738 trx->read_view = read_view_clone(from_trx->read_view,
1739 trx->prebuilt_view);
1740
1741 read_view_add(trx->read_view);
1742
1743 trx->global_read_view = trx->read_view;
1744
1745 return(trx->read_view);
1746 }
1747
1748 /****************************************************************//**
1749 Prepares a transaction for commit/rollback. */
1750 UNIV_INTERN
1751 void
trx_commit_or_rollback_prepare(trx_t * trx)1752 trx_commit_or_rollback_prepare(
1753 /*===========================*/
1754 trx_t* trx) /*!< in/out: transaction */
1755 {
1756 /* We are reading trx->state without holding trx_sys->mutex
1757 here, because the commit or rollback should be invoked for a
1758 running (or recovered prepared) transaction that is associated
1759 with the current thread. */
1760
1761 switch (trx->state) {
1762 case TRX_STATE_NOT_STARTED:
1763 trx_start_low(trx);
1764 /* fall through */
1765 case TRX_STATE_ACTIVE:
1766 case TRX_STATE_PREPARED:
1767 /* If the trx is in a lock wait state, moves the waiting
1768 query thread to the suspended state */
1769
1770 if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1771
1772 ulint sec;
1773 ulint ms;
1774 ib_uint64_t now;
1775
1776 ut_a(trx->lock.wait_thr != NULL);
1777 trx->lock.wait_thr->state = QUE_THR_SUSPENDED;
1778 trx->lock.wait_thr = NULL;
1779
1780 if (UNIV_UNLIKELY(trx->take_stats)) {
1781 ut_usectime(&sec, &ms);
1782 now = (ib_uint64_t)sec * 1000000 + ms;
1783 trx->lock_que_wait_timer += now - trx->lock_que_wait_ustarted;
1784 }
1785
1786 trx->lock.que_state = TRX_QUE_RUNNING;
1787 }
1788
1789 ut_a(trx->lock.n_active_thrs == 1);
1790 return;
1791 case TRX_STATE_COMMITTED_IN_MEMORY:
1792 break;
1793 }
1794
1795 ut_error;
1796 }
1797
1798 /*********************************************************************//**
1799 Creates a commit command node struct.
1800 @return own: commit node struct */
1801 UNIV_INTERN
1802 commit_node_t*
trx_commit_node_create(mem_heap_t * heap)1803 trx_commit_node_create(
1804 /*===================*/
1805 mem_heap_t* heap) /*!< in: mem heap where created */
1806 {
1807 commit_node_t* node;
1808
1809 node = static_cast<commit_node_t*>(mem_heap_alloc(heap, sizeof(*node)));
1810 node->common.type = QUE_NODE_COMMIT;
1811 node->state = COMMIT_NODE_SEND;
1812
1813 return(node);
1814 }
1815
1816 /***********************************************************//**
1817 Performs an execution step for a commit type node in a query graph.
1818 @return query thread to run next, or NULL */
1819 UNIV_INTERN
1820 que_thr_t*
trx_commit_step(que_thr_t * thr)1821 trx_commit_step(
1822 /*============*/
1823 que_thr_t* thr) /*!< in: query thread */
1824 {
1825 commit_node_t* node;
1826
1827 node = static_cast<commit_node_t*>(thr->run_node);
1828
1829 ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
1830
1831 if (thr->prev_node == que_node_get_parent(node)) {
1832 node->state = COMMIT_NODE_SEND;
1833 }
1834
1835 if (node->state == COMMIT_NODE_SEND) {
1836 trx_t* trx;
1837
1838 node->state = COMMIT_NODE_WAIT;
1839
1840 trx = thr_get_trx(thr);
1841
1842 ut_a(trx->lock.wait_thr == NULL);
1843 ut_a(trx->lock.que_state != TRX_QUE_LOCK_WAIT);
1844
1845 trx_commit_or_rollback_prepare(trx);
1846
1847 trx->lock.que_state = TRX_QUE_COMMITTING;
1848
1849 trx_commit(trx);
1850
1851 ut_ad(trx->lock.wait_thr == NULL);
1852
1853 trx->lock.que_state = TRX_QUE_RUNNING;
1854
1855 thr = NULL;
1856 } else {
1857 ut_ad(node->state == COMMIT_NODE_WAIT);
1858
1859 node->state = COMMIT_NODE_SEND;
1860
1861 thr->run_node = que_node_get_parent(node);
1862 }
1863
1864 return(thr);
1865 }
1866
1867 /**********************************************************************//**
1868 Does the transaction commit for MySQL.
1869 @return DB_SUCCESS or error number */
1870 UNIV_INTERN
1871 dberr_t
trx_commit_for_mysql(trx_t * trx)1872 trx_commit_for_mysql(
1873 /*=================*/
1874 trx_t* trx) /*!< in/out: transaction */
1875 {
1876 /* Because we do not do the commit by sending an Innobase
1877 sig to the transaction, we must here make sure that trx has been
1878 started. */
1879
1880 ut_a(trx);
1881
1882 switch (trx->state) {
1883 case TRX_STATE_NOT_STARTED:
1884 /* Update the info whether we should skip XA steps that eat
1885 CPU time.
1886
1887 For the duration of the transaction trx->support_xa is
1888 not reread from thd so any changes in the value take
1889 effect in the next transaction. This is to avoid a
1890 scenario where some undo log records generated by a
1891 transaction contain XA information and other undo log
1892 records, generated by the same transaction do not. */
1893 trx->support_xa = thd_supports_xa(trx->mysql_thd);
1894
1895 ut_d(trx->start_file = __FILE__);
1896 ut_d(trx->start_line = __LINE__);
1897
1898 trx_start_low(trx);
1899 /* fall through */
1900 case TRX_STATE_ACTIVE:
1901 case TRX_STATE_PREPARED:
1902 trx->op_info = "committing";
1903 trx_commit(trx);
1904 MONITOR_DEC(MONITOR_TRX_ACTIVE);
1905 trx->op_info = "";
1906 return(DB_SUCCESS);
1907 case TRX_STATE_COMMITTED_IN_MEMORY:
1908 break;
1909 }
1910 ut_error;
1911 return(DB_CORRUPTION);
1912 }
1913
1914 /**********************************************************************//**
1915 If required, flushes the log to disk if we called trx_commit_for_mysql()
1916 with trx->flush_log_later == TRUE. */
1917 UNIV_INTERN
1918 void
trx_commit_complete_for_mysql(trx_t * trx)1919 trx_commit_complete_for_mysql(
1920 /*==========================*/
1921 trx_t* trx) /*!< in/out: transaction */
1922 {
1923 ut_a(trx);
1924
1925 if (!trx->must_flush_log_later
1926 || thd_requested_durability(trx->mysql_thd)
1927 == HA_IGNORE_DURABILITY) {
1928 return;
1929 }
1930
1931 trx_flush_log_if_needed(trx->commit_lsn, trx);
1932
1933 trx->must_flush_log_later = FALSE;
1934 }
1935
1936 /**********************************************************************//**
1937 Marks the latest SQL statement ended. */
1938 UNIV_INTERN
1939 void
trx_mark_sql_stat_end(trx_t * trx)1940 trx_mark_sql_stat_end(
1941 /*==================*/
1942 trx_t* trx) /*!< in: trx handle */
1943 {
1944 ut_a(trx);
1945
1946 switch (trx->state) {
1947 case TRX_STATE_PREPARED:
1948 case TRX_STATE_COMMITTED_IN_MEMORY:
1949 break;
1950 case TRX_STATE_NOT_STARTED:
1951 trx->undo_no = 0;
1952 /* fall through */
1953 case TRX_STATE_ACTIVE:
1954 trx->last_sql_stat_start.least_undo_no = trx->undo_no;
1955
1956 if (trx->fts_trx) {
1957 fts_savepoint_laststmt_refresh(trx);
1958 }
1959
1960 return;
1961 }
1962
1963 ut_error;
1964 }
1965
1966 /**********************************************************************//**
1967 Prints info about a transaction.
1968 Caller must hold trx_sys->mutex. */
1969 UNIV_INTERN
1970 void
trx_print_low(FILE * f,const trx_t * trx,ulint max_query_len,ulint n_rec_locks,ulint n_trx_locks,ulint heap_size)1971 trx_print_low(
1972 /*==========*/
1973 FILE* f,
1974 /*!< in: output stream */
1975 const trx_t* trx,
1976 /*!< in: transaction */
1977 ulint max_query_len,
1978 /*!< in: max query length to print,
1979 or 0 to use the default max length */
1980 ulint n_rec_locks,
1981 /*!< in: lock_number_of_rows_locked(&trx->lock) */
1982 ulint n_trx_locks,
1983 /*!< in: length of trx->lock.trx_locks */
1984 ulint heap_size)
1985 /*!< in: mem_heap_get_size(trx->lock.lock_heap) */
1986 {
1987 ibool newline;
1988 const char* op_info;
1989
1990 ut_ad(mutex_own(&trx_sys->mutex));
1991
1992 fprintf(f, "TRANSACTION " TRX_ID_FMT, trx->id);
1993
1994 /* trx->state cannot change from or to NOT_STARTED while we
1995 are holding the trx_sys->mutex. It may change from ACTIVE to
1996 PREPARED or COMMITTED. */
1997 switch (trx->state) {
1998 case TRX_STATE_NOT_STARTED:
1999 fputs(", not started", f);
2000 goto state_ok;
2001 case TRX_STATE_ACTIVE:
2002 fprintf(f, ", ACTIVE %lu sec",
2003 (ulong) difftime(time(NULL), trx->start_time));
2004 goto state_ok;
2005 case TRX_STATE_PREPARED:
2006 fprintf(f, ", ACTIVE (PREPARED) %lu sec",
2007 (ulong) difftime(time(NULL), trx->start_time));
2008 goto state_ok;
2009 case TRX_STATE_COMMITTED_IN_MEMORY:
2010 fputs(", COMMITTED IN MEMORY", f);
2011 goto state_ok;
2012 }
2013 fprintf(f, ", state %lu", (ulong) trx->state);
2014 ut_ad(0);
2015 state_ok:
2016
2017 /* prevent a race condition */
2018 op_info = trx->op_info;
2019
2020 if (*op_info) {
2021 putc(' ', f);
2022 fputs(op_info, f);
2023 }
2024
2025 if (trx->is_recovered) {
2026 fputs(" recovered trx", f);
2027 }
2028
2029 if (trx->declared_to_be_inside_innodb) {
2030 fprintf(f, ", thread declared inside InnoDB %lu",
2031 (ulong) trx->n_tickets_to_enter_innodb);
2032 }
2033
2034 putc('\n', f);
2035
2036 if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
2037 fprintf(f, "mysql tables in use %lu, locked %lu\n",
2038 (ulong) trx->n_mysql_tables_in_use,
2039 (ulong) trx->mysql_n_tables_locked);
2040 }
2041
2042 newline = TRUE;
2043
2044 /* trx->lock.que_state of an ACTIVE transaction may change
2045 while we are not holding trx->mutex. We perform a dirty read
2046 for performance reasons. */
2047
2048 switch (trx->lock.que_state) {
2049 case TRX_QUE_RUNNING:
2050 newline = FALSE; break;
2051 case TRX_QUE_LOCK_WAIT:
2052 fputs("LOCK WAIT ", f); break;
2053 case TRX_QUE_ROLLING_BACK:
2054 fputs("ROLLING BACK ", f); break;
2055 case TRX_QUE_COMMITTING:
2056 fputs("COMMITTING ", f); break;
2057 default:
2058 fprintf(f, "que state %lu ", (ulong) trx->lock.que_state);
2059 }
2060
2061 if (n_trx_locks > 0 || heap_size > 400) {
2062 newline = TRUE;
2063
2064 fprintf(f, "%lu lock struct(s), heap size %lu,"
2065 " %lu row lock(s)",
2066 (ulong) n_trx_locks,
2067 (ulong) heap_size,
2068 (ulong) n_rec_locks);
2069 }
2070
2071 if (trx->has_search_latch) {
2072 newline = TRUE;
2073 fputs(", holds adaptive hash latch", f);
2074 }
2075
2076 if (trx->undo_no != 0) {
2077 newline = TRUE;
2078 fprintf(f, ", undo log entries " TRX_ID_FMT, trx->undo_no);
2079 }
2080
2081 if (newline) {
2082 putc('\n', f);
2083 }
2084
2085 if (trx->mysql_thd != NULL) {
2086 innobase_mysql_print_thd(
2087 f, trx->mysql_thd, static_cast<uint>(max_query_len));
2088 }
2089 }
2090
2091 /**********************************************************************//**
2092 Prints info about a transaction.
2093 The caller must hold lock_sys->mutex and trx_sys->mutex.
2094 When possible, use trx_print() instead. */
2095 UNIV_INTERN
2096 void
trx_print_latched(FILE * f,const trx_t * trx,ulint max_query_len)2097 trx_print_latched(
2098 /*==============*/
2099 FILE* f, /*!< in: output stream */
2100 const trx_t* trx, /*!< in: transaction */
2101 ulint max_query_len) /*!< in: max query length to print,
2102 or 0 to use the default max length */
2103 {
2104 ut_ad(lock_mutex_own());
2105 ut_ad(mutex_own(&trx_sys->mutex));
2106
2107 trx_print_low(f, trx, max_query_len,
2108 lock_number_of_rows_locked(&trx->lock),
2109 UT_LIST_GET_LEN(trx->lock.trx_locks),
2110 mem_heap_get_size(trx->lock.lock_heap));
2111 }
2112
2113 /**********************************************************************//**
2114 Prints info about a transaction.
2115 Acquires and releases lock_sys->mutex and trx_sys->mutex. */
2116 UNIV_INTERN
2117 void
trx_print(FILE * f,const trx_t * trx,ulint max_query_len)2118 trx_print(
2119 /*======*/
2120 FILE* f, /*!< in: output stream */
2121 const trx_t* trx, /*!< in: transaction */
2122 ulint max_query_len) /*!< in: max query length to print,
2123 or 0 to use the default max length */
2124 {
2125 ulint n_rec_locks;
2126 ulint n_trx_locks;
2127 ulint heap_size;
2128
2129 lock_mutex_enter();
2130 n_rec_locks = lock_number_of_rows_locked(&trx->lock);
2131 n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
2132 heap_size = mem_heap_get_size(trx->lock.lock_heap);
2133 lock_mutex_exit();
2134
2135 mutex_enter(&trx_sys->mutex);
2136 trx_print_low(f, trx, max_query_len,
2137 n_rec_locks, n_trx_locks, heap_size);
2138 mutex_exit(&trx_sys->mutex);
2139 }
2140
2141 #ifdef UNIV_DEBUG
2142 /**********************************************************************//**
2143 Asserts that a transaction has been started.
2144 The caller must hold trx_sys->mutex.
2145 @return TRUE if started */
2146 UNIV_INTERN
2147 ibool
trx_assert_started(const trx_t * trx)2148 trx_assert_started(
2149 /*===============*/
2150 const trx_t* trx) /*!< in: transaction */
2151 {
2152 ut_ad(mutex_own(&trx_sys->mutex));
2153
2154 /* Non-locking autocommits should not hold any locks and this
2155 function is only called from the locking code. */
2156 assert_trx_in_list(trx);
2157
2158 /* trx->state can change from or to NOT_STARTED while we are holding
2159 trx_sys->mutex for non-locking autocommit selects but not for other
2160 types of transactions. It may change from ACTIVE to PREPARED. Unless
2161 we are holding lock_sys->mutex, it may also change to COMMITTED. */
2162
2163 switch (trx->state) {
2164 case TRX_STATE_PREPARED:
2165 return(TRUE);
2166
2167 case TRX_STATE_ACTIVE:
2168 case TRX_STATE_COMMITTED_IN_MEMORY:
2169 return(TRUE);
2170
2171 case TRX_STATE_NOT_STARTED:
2172 break;
2173 }
2174
2175 ut_error;
2176 return(FALSE);
2177 }
2178 #endif /* UNIV_DEBUG */
2179
2180 /*******************************************************************//**
2181 Compares the "weight" (or size) of two transactions. Transactions that
2182 have edited non-transactional tables are considered heavier than ones
2183 that have not.
2184 @return TRUE if weight(a) >= weight(b) */
2185 UNIV_INTERN
2186 ibool
trx_weight_ge(const trx_t * a,const trx_t * b)2187 trx_weight_ge(
2188 /*==========*/
2189 const trx_t* a, /*!< in: the first transaction to be compared */
2190 const trx_t* b) /*!< in: the second transaction to be compared */
2191 {
2192 ibool a_notrans_edit;
2193 ibool b_notrans_edit;
2194
2195 /* If mysql_thd is NULL for a transaction we assume that it has
2196 not edited non-transactional tables. */
2197
2198 a_notrans_edit = a->mysql_thd != NULL
2199 && thd_has_edited_nontrans_tables(a->mysql_thd);
2200
2201 b_notrans_edit = b->mysql_thd != NULL
2202 && thd_has_edited_nontrans_tables(b->mysql_thd);
2203
2204 if (a_notrans_edit != b_notrans_edit) {
2205
2206 return(a_notrans_edit);
2207 }
2208
2209 /* Either both had edited non-transactional tables or both had
2210 not, we fall back to comparing the number of altered/locked
2211 rows. */
2212
2213 #if 0
2214 fprintf(stderr,
2215 "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
2216 __func__,
2217 a->undo_no, UT_LIST_GET_LEN(a->lock.trx_locks),
2218 b->undo_no, UT_LIST_GET_LEN(b->lock.trx_locks));
2219 #endif
2220
2221 return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
2222 }
2223
2224 /****************************************************************//**
2225 Prepares a transaction. */
2226 static
2227 void
trx_prepare(trx_t * trx)2228 trx_prepare(
2229 /*========*/
2230 trx_t* trx) /*!< in/out: transaction */
2231 {
2232 trx_rseg_t* rseg;
2233 lsn_t lsn;
2234 mtr_t mtr;
2235
2236 rseg = trx->rseg;
2237 /* Only fresh user transactions can be prepared.
2238 Recovered transactions cannot. */
2239 ut_a(!trx->is_recovered);
2240
2241 if (trx->insert_undo != NULL || trx->update_undo != NULL) {
2242
2243 mtr_start(&mtr);
2244
2245 /* Change the undo log segment states from TRX_UNDO_ACTIVE
2246 to TRX_UNDO_PREPARED: these modifications to the file data
2247 structure define the transaction as prepared in the
2248 file-based world, at the serialization point of lsn. */
2249
2250 mutex_enter(&rseg->mutex);
2251
2252 if (trx->insert_undo != NULL) {
2253
2254 /* It is not necessary to obtain trx->undo_mutex here
2255 because only a single OS thread is allowed to do the
2256 transaction prepare for this transaction. */
2257
2258 trx_undo_set_state_at_prepare(trx, trx->insert_undo,
2259 &mtr);
2260 }
2261
2262 if (trx->update_undo) {
2263 trx_undo_set_state_at_prepare(
2264 trx, trx->update_undo, &mtr);
2265 }
2266
2267 mutex_exit(&rseg->mutex);
2268
2269 /*--------------*/
2270 mtr_commit(&mtr); /* This mtr commit makes the
2271 transaction prepared in the file-based
2272 world */
2273 /*--------------*/
2274 lsn = mtr.end_lsn;
2275 ut_ad(lsn);
2276 } else {
2277 lsn = 0;
2278 }
2279
2280 /*--------------------------------------*/
2281 ut_a(trx->state == TRX_STATE_ACTIVE);
2282 mutex_enter(&trx_sys->mutex);
2283 trx->state = TRX_STATE_PREPARED;
2284 trx_sys->n_prepared_trx++;
2285 mutex_exit(&trx_sys->mutex);
2286 /*--------------------------------------*/
2287
2288 if (lsn) {
2289 /* Depending on the my.cnf options, we may now write the log
2290 buffer to the log files, making the prepared state of the
2291 transaction durable if the OS does not crash. We may also
2292 flush the log files to disk, making the prepared state of the
2293 transaction durable also at an OS crash or a power outage.
2294
2295 The idea in InnoDB's group prepare is that a group of
2296 transactions gather behind a trx doing a physical disk write
2297 to log files, and when that physical write has been completed,
2298 one of those transactions does a write which prepares the whole
2299 group. Note that this group prepare will only bring benefit if
2300 there are > 2 users in the database. Then at least 2 users can
2301 gather behind one doing the physical log write to disk.
2302
2303 TODO: find out if MySQL holds some mutex when calling this.
2304 That would spoil our group prepare algorithm. */
2305
2306 trx_flush_log_if_needed(lsn, trx);
2307 }
2308 }
2309
2310 /**********************************************************************//**
2311 Does the transaction prepare for MySQL. */
2312 UNIV_INTERN
2313 void
trx_prepare_for_mysql(trx_t * trx)2314 trx_prepare_for_mysql(
2315 /*==================*/
2316 trx_t* trx) /*!< in/out: trx handle */
2317 {
2318 trx_start_if_not_started_xa(trx);
2319
2320 trx->op_info = "preparing";
2321
2322 trx_prepare(trx);
2323
2324 trx->op_info = "";
2325 }
2326
2327 /**********************************************************************//**
2328 This function is used to find number of prepared transactions and
2329 their transaction objects for a recovery.
2330 @return number of prepared transactions stored in xid_list */
2331 UNIV_INTERN
2332 int
trx_recover_for_mysql(XID * xid_list,ulint len)2333 trx_recover_for_mysql(
2334 /*==================*/
2335 XID* xid_list, /*!< in/out: prepared transactions */
2336 ulint len) /*!< in: number of slots in xid_list */
2337 {
2338 const trx_t* trx;
2339 ulint count = 0;
2340
2341 ut_ad(xid_list);
2342 ut_ad(len);
2343
2344 /* We should set those transactions which are in the prepared state
2345 to the xid_list */
2346
2347 mutex_enter(&trx_sys->mutex);
2348
2349 for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
2350 trx != NULL;
2351 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
2352
2353 assert_trx_in_rw_list(trx);
2354
2355 /* The state of a read-write transaction cannot change
2356 from or to NOT_STARTED while we are holding the
2357 trx_sys->mutex. It may change to PREPARED, but not if
2358 trx->is_recovered. It may also change to COMMITTED. */
2359 if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
2360 xid_list[count] = trx->xid;
2361
2362 if (count == 0) {
2363 ut_print_timestamp(stderr);
2364 fprintf(stderr,
2365 " InnoDB: Starting recovery for"
2366 " XA transactions...\n");
2367 }
2368
2369 ut_print_timestamp(stderr);
2370 fprintf(stderr,
2371 " InnoDB: Transaction " TRX_ID_FMT " in"
2372 " prepared state after recovery\n",
2373 trx->id);
2374
2375 ut_print_timestamp(stderr);
2376 fprintf(stderr,
2377 " InnoDB: Transaction contains changes"
2378 " to " TRX_ID_FMT " rows\n",
2379 trx->undo_no);
2380
2381 count++;
2382
2383 if (count == len) {
2384 break;
2385 }
2386 }
2387 }
2388
2389 mutex_exit(&trx_sys->mutex);
2390
2391 if (count > 0){
2392 ut_print_timestamp(stderr);
2393 fprintf(stderr,
2394 " InnoDB: %d transactions in prepared state"
2395 " after recovery\n",
2396 int (count));
2397 }
2398
2399 return(int (count));
2400 }
2401
2402 /*******************************************************************//**
2403 This function is used to find one X/Open XA distributed transaction
2404 which is in the prepared state
2405 @return trx on match, the trx->xid will be invalidated;
2406 note that the trx may have been committed, unless the caller is
2407 holding lock_sys->mutex */
2408 static MY_ATTRIBUTE((nonnull, warn_unused_result))
2409 trx_t*
trx_get_trx_by_xid_low(const XID * xid)2410 trx_get_trx_by_xid_low(
2411 /*===================*/
2412 const XID* xid) /*!< in: X/Open XA transaction
2413 identifier */
2414 {
2415 trx_t* trx;
2416
2417 ut_ad(mutex_own(&trx_sys->mutex));
2418
2419 for (trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
2420 trx != NULL;
2421 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
2422
2423 assert_trx_in_rw_list(trx);
2424
2425 /* Compare two X/Open XA transaction id's: their
2426 length should be the same and binary comparison
2427 of gtrid_length+bqual_length bytes should be
2428 the same */
2429
2430 if (trx->is_recovered
2431 && trx_state_eq(trx, TRX_STATE_PREPARED)
2432 && xid->gtrid_length == trx->xid.gtrid_length
2433 && xid->bqual_length == trx->xid.bqual_length
2434 && memcmp(xid->data, trx->xid.data,
2435 xid->gtrid_length + xid->bqual_length) == 0) {
2436
2437 /* Invalidate the XID, so that subsequent calls
2438 will not find it. */
2439 memset(static_cast<void*>(&trx->xid), 0,
2440 sizeof(trx->xid));
2441 trx->xid.formatID = -1;
2442 break;
2443 }
2444 }
2445
2446 return(trx);
2447 }
2448
2449 /*******************************************************************//**
2450 This function is used to find one X/Open XA distributed transaction
2451 which is in the prepared state
2452 @return trx or NULL; on match, the trx->xid will be invalidated;
2453 note that the trx may have been committed, unless the caller is
2454 holding lock_sys->mutex */
2455 UNIV_INTERN
2456 trx_t*
trx_get_trx_by_xid(const XID * xid)2457 trx_get_trx_by_xid(
2458 /*===============*/
2459 const XID* xid) /*!< in: X/Open XA transaction identifier */
2460 {
2461 trx_t* trx;
2462
2463 if (xid == NULL) {
2464
2465 return(NULL);
2466 }
2467
2468 mutex_enter(&trx_sys->mutex);
2469
2470 /* Recovered/Resurrected transactions are always only on the
2471 trx_sys_t::rw_trx_list. */
2472 trx = trx_get_trx_by_xid_low(xid);
2473
2474 mutex_exit(&trx_sys->mutex);
2475
2476 return(trx);
2477 }
2478
2479 /*************************************************************//**
2480 Starts the transaction if it is not yet started. */
2481 UNIV_INTERN
2482 void
trx_start_if_not_started_xa_low(trx_t * trx)2483 trx_start_if_not_started_xa_low(
2484 /*============================*/
2485 trx_t* trx) /*!< in: transaction */
2486 {
2487 switch (trx->state) {
2488 case TRX_STATE_NOT_STARTED:
2489
2490 /* Update the info whether we should skip XA steps
2491 that eat CPU time.
2492
2493 For the duration of the transaction trx->support_xa is
2494 not reread from thd so any changes in the value take
2495 effect in the next transaction. This is to avoid a
2496 scenario where some undo generated by a transaction,
2497 has XA stuff, and other undo, generated by the same
2498 transaction, doesn't. */
2499 trx->support_xa = thd_supports_xa(trx->mysql_thd);
2500
2501 trx_start_low(trx);
2502 /* fall through */
2503 case TRX_STATE_ACTIVE:
2504 return;
2505 case TRX_STATE_PREPARED:
2506 case TRX_STATE_COMMITTED_IN_MEMORY:
2507 break;
2508 }
2509
2510 ut_error;
2511 }
2512
2513 /*************************************************************//**
2514 Starts the transaction if it is not yet started. */
2515 UNIV_INTERN
2516 void
trx_start_if_not_started_low(trx_t * trx)2517 trx_start_if_not_started_low(
2518 /*=========================*/
2519 trx_t* trx) /*!< in: transaction */
2520 {
2521 switch (trx->state) {
2522 case TRX_STATE_NOT_STARTED:
2523 trx_start_low(trx);
2524 /* fall through */
2525 case TRX_STATE_ACTIVE:
2526 return;
2527 case TRX_STATE_PREPARED:
2528 case TRX_STATE_COMMITTED_IN_MEMORY:
2529 break;
2530 }
2531
2532 ut_error;
2533 }
2534
2535 /*************************************************************//**
2536 Starts the transaction for a DDL operation. */
2537 UNIV_INTERN
2538 void
trx_start_for_ddl_low(trx_t * trx,trx_dict_op_t op)2539 trx_start_for_ddl_low(
2540 /*==================*/
2541 trx_t* trx, /*!< in/out: transaction */
2542 trx_dict_op_t op) /*!< in: dictionary operation type */
2543 {
2544 switch (trx->state) {
2545 case TRX_STATE_NOT_STARTED:
2546 /* Flag this transaction as a dictionary operation, so that
2547 the data dictionary will be locked in crash recovery. */
2548
2549 trx_set_dict_operation(trx, op);
2550
2551 /* Ensure it is not flagged as an auto-commit-non-locking
2552 transation. */
2553 trx->will_lock = 1;
2554
2555 trx->ddl = true;
2556
2557 trx_start_low(trx);
2558 return;
2559
2560 case TRX_STATE_ACTIVE:
2561 /* We have this start if not started idiom, therefore we
2562 can't add stronger checks here. */
2563 trx->ddl = true;
2564
2565 ut_ad(trx->dict_operation != TRX_DICT_OP_NONE);
2566 ut_ad(trx->will_lock > 0);
2567 return;
2568 case TRX_STATE_PREPARED:
2569 case TRX_STATE_COMMITTED_IN_MEMORY:
2570 break;
2571 }
2572
2573 ut_error;
2574 }
2575
2576