1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License as published by the Free Software
7 Foundation; version 2 of the License.
8 
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc.,
15 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16 
17 *****************************************************************************/
18 
19 /**************************************************//**
20 @file trx/trx0trx.c
21 The transaction
22 
23 Created 3/26/1996 Heikki Tuuri
24 *******************************************************/
25 
26 #include "trx0trx.h"
27 
28 #ifdef UNIV_NONINL
29 #include "trx0trx.ic"
30 #endif
31 
32 #include "trx0undo.h"
33 #include "trx0rseg.h"
34 #include "log0log.h"
35 #include "que0que.h"
36 #include "lock0lock.h"
37 #include "trx0roll.h"
38 #include "usr0sess.h"
39 #include "read0read.h"
40 #include "srv0srv.h"
41 #include "btr0sea.h"
42 #include "os0proc.h"
43 #include "trx0xa.h"
44 #include "trx0purge.h"
45 #include "ha_prototypes.h"
46 
47 /** Dummy session used currently in MySQL interface */
48 UNIV_INTERN sess_t*		trx_dummy_sess = NULL;
49 
50 /** Number of transactions currently allocated for MySQL: protected by
51 the kernel mutex */
52 UNIV_INTERN ulint	trx_n_mysql_transactions = 0;
53 /** Number of transactions currently in the XA PREPARED state: protected by
54 the kernel mutex */
55 UNIV_INTERN ulint	trx_n_prepared = 0;
56 
57 #ifdef UNIV_PFS_MUTEX
58 /* Key to register the mutex with performance schema */
59 UNIV_INTERN mysql_pfs_key_t	trx_undo_mutex_key;
60 #endif /* UNIV_PFS_MUTEX */
61 
62 /*************************************************************//**
63 Set detailed error message for the transaction. */
64 UNIV_INTERN
65 void
trx_set_detailed_error(trx_t * trx,const char * msg)66 trx_set_detailed_error(
67 /*===================*/
68 	trx_t*		trx,	/*!< in: transaction struct */
69 	const char*	msg)	/*!< in: detailed error message */
70 {
71 	ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error));
72 }
73 
74 /*************************************************************//**
75 Set detailed error message for the transaction from a file. Note that the
76 file is rewinded before reading from it. */
77 UNIV_INTERN
78 void
trx_set_detailed_error_from_file(trx_t * trx,FILE * file)79 trx_set_detailed_error_from_file(
80 /*=============================*/
81 	trx_t*	trx,	/*!< in: transaction struct */
82 	FILE*	file)	/*!< in: file to read message from */
83 {
84 	os_file_read_string(file, trx->detailed_error,
85 			    sizeof(trx->detailed_error));
86 }
87 
88 /****************************************************************//**
89 Creates and initializes a transaction object.
90 @return	own: the transaction */
91 UNIV_INTERN
92 trx_t*
trx_create(sess_t * sess)93 trx_create(
94 /*=======*/
95 	sess_t*	sess)	/*!< in: session */
96 {
97 	trx_t*	trx;
98 
99 	ut_ad(mutex_own(&kernel_mutex));
100 	ut_ad(sess);
101 
102 	trx = mem_alloc(sizeof(trx_t));
103 
104 	trx->magic_n = TRX_MAGIC_N;
105 
106 	trx->op_info = "";
107 
108 	trx->is_purge = 0;
109 	trx->is_recovered = 0;
110 	trx->conc_state = TRX_NOT_STARTED;
111 
112 	trx->is_registered = 0;
113 	trx->owns_prepare_mutex = 0;
114 
115 	trx->start_time = ut_time();
116 
117 	trx->isolation_level = TRX_ISO_REPEATABLE_READ;
118 
119 	trx->id = 0;
120 	trx->no = IB_ULONGLONG_MAX;
121 
122 	trx->support_xa = TRUE;
123 
124 	trx->check_foreigns = TRUE;
125 	trx->check_unique_secondary = TRUE;
126 
127 	trx->flush_log_later = FALSE;
128 	trx->must_flush_log_later = FALSE;
129 
130 	trx->dict_operation = TRX_DICT_OP_NONE;
131 	trx->table_id = 0;
132 
133 	trx->mysql_thd = NULL;
134 	trx->duplicates = 0;
135 
136 	trx->n_mysql_tables_in_use = 0;
137 	trx->mysql_n_tables_locked = 0;
138 
139 	trx->mysql_log_file_name = NULL;
140 	trx->mysql_log_offset = 0;
141 
142 	mutex_create(trx_undo_mutex_key, &trx->undo_mutex, SYNC_TRX_UNDO);
143 
144 	trx->rseg = NULL;
145 
146 	trx->undo_no = 0;
147 	trx->last_sql_stat_start.least_undo_no = 0;
148 	trx->insert_undo = NULL;
149 	trx->update_undo = NULL;
150 	trx->undo_no_arr = NULL;
151 
152 	trx->error_state = DB_SUCCESS;
153 	trx->error_key_num = 0;
154 	trx->detailed_error[0] = '\0';
155 
156 	trx->sess = sess;
157 	trx->que_state = TRX_QUE_RUNNING;
158 	trx->n_active_thrs = 0;
159 
160 	trx->handling_signals = FALSE;
161 
162 	UT_LIST_INIT(trx->signals);
163 	UT_LIST_INIT(trx->reply_signals);
164 
165 	trx->graph = NULL;
166 
167 	trx->wait_lock = NULL;
168 	trx->was_chosen_as_deadlock_victim = FALSE;
169 	UT_LIST_INIT(trx->wait_thrs);
170 
171 	trx->lock_heap = mem_heap_create_in_buffer(256);
172 	UT_LIST_INIT(trx->trx_locks);
173 
174 	UT_LIST_INIT(trx->trx_savepoints);
175 
176 	trx->dict_operation_lock_mode = 0;
177 	trx->has_search_latch = FALSE;
178 	trx->search_latch_timeout = BTR_SEA_TIMEOUT;
179 
180 	trx->declared_to_be_inside_innodb = FALSE;
181 	trx->n_tickets_to_enter_innodb = 0;
182 
183 	trx->global_read_view_heap = mem_heap_create(256);
184 	trx->global_read_view = NULL;
185 	trx->read_view = NULL;
186 
187 	/* Set X/Open XA transaction identification to NULL */
188 	memset(&trx->xid, 0, sizeof(trx->xid));
189 	trx->xid.formatID = -1;
190 
191 	trx->n_autoinc_rows = 0;
192 
193 	/* Remember to free the vector explicitly. */
194 	trx->autoinc_locks = ib_vector_create(
195 		mem_heap_create(sizeof(ib_vector_t) + sizeof(void*) * 4), 4);
196 
197 	return(trx);
198 }
199 
200 /********************************************************************//**
201 Creates a transaction object for MySQL.
202 @return	own: transaction object */
203 UNIV_INTERN
204 trx_t*
trx_allocate_for_mysql(void)205 trx_allocate_for_mysql(void)
206 /*========================*/
207 {
208 	trx_t*	trx;
209 
210 	mutex_enter(&kernel_mutex);
211 
212 	trx = trx_create(trx_dummy_sess);
213 
214 	trx_n_mysql_transactions++;
215 
216 	UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx);
217 
218 	mutex_exit(&kernel_mutex);
219 
220 	return(trx);
221 }
222 
223 /********************************************************************//**
224 Creates a transaction object for background operations by the master thread.
225 @return	own: transaction object */
226 UNIV_INTERN
227 trx_t*
trx_allocate_for_background(void)228 trx_allocate_for_background(void)
229 /*=============================*/
230 {
231 	trx_t*	trx;
232 
233 	mutex_enter(&kernel_mutex);
234 
235 	trx = trx_create(trx_dummy_sess);
236 
237 	mutex_exit(&kernel_mutex);
238 
239 	return(trx);
240 }
241 
242 /********************************************************************//**
243 Releases the search latch if trx has reserved it. */
244 UNIV_INTERN
245 void
trx_search_latch_release_if_reserved(trx_t * trx)246 trx_search_latch_release_if_reserved(
247 /*=================================*/
248 	trx_t*	   trx) /*!< in: transaction */
249 {
250 	if (trx->has_search_latch) {
251 		rw_lock_s_unlock(&btr_search_latch);
252 
253 		trx->has_search_latch = FALSE;
254 	}
255 }
256 
257 /********************************************************************//**
258 Frees a transaction object. */
259 UNIV_INTERN
260 void
trx_free(trx_t * trx)261 trx_free(
262 /*=====*/
263 	trx_t*	trx)	/*!< in, own: trx object */
264 {
265 	ut_ad(mutex_own(&kernel_mutex));
266 
267 	if (trx->declared_to_be_inside_innodb) {
268 		ut_print_timestamp(stderr);
269 		fputs("  InnoDB: Error: Freeing a trx which is declared"
270 		      " to be processing\n"
271 		      "InnoDB: inside InnoDB.\n", stderr);
272 		trx_print(stderr, trx, 600);
273 		putc('\n', stderr);
274 
275 		/* This is an error but not a fatal error. We must keep
276 		the counters like srv_conc_n_threads accurate. */
277 		srv_conc_force_exit_innodb(trx);
278 	}
279 
280 	if (trx->n_mysql_tables_in_use != 0
281 	    || trx->mysql_n_tables_locked != 0) {
282 
283 		ut_print_timestamp(stderr);
284 		fprintf(stderr,
285 			"  InnoDB: Error: MySQL is freeing a thd\n"
286 			"InnoDB: though trx->n_mysql_tables_in_use is %lu\n"
287 			"InnoDB: and trx->mysql_n_tables_locked is %lu.\n",
288 			(ulong)trx->n_mysql_tables_in_use,
289 			(ulong)trx->mysql_n_tables_locked);
290 
291 		trx_print(stderr, trx, 600);
292 
293 		ut_print_buf(stderr, trx, sizeof(trx_t));
294 		putc('\n', stderr);
295 	}
296 
297 	ut_a(trx->magic_n == TRX_MAGIC_N);
298 
299 	trx->magic_n = 11112222;
300 
301 	ut_a(trx->conc_state == TRX_NOT_STARTED);
302 
303 	mutex_free(&(trx->undo_mutex));
304 
305 	ut_a(trx->insert_undo == NULL);
306 	ut_a(trx->update_undo == NULL);
307 
308 	if (trx->undo_no_arr) {
309 		trx_undo_arr_free(trx->undo_no_arr);
310 	}
311 
312 	ut_a(UT_LIST_GET_LEN(trx->signals) == 0);
313 	ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0);
314 
315 	ut_a(trx->wait_lock == NULL);
316 	ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
317 
318 	ut_a(!trx->has_search_latch);
319 
320 	ut_a(trx->dict_operation_lock_mode == 0);
321 
322 	if (trx->lock_heap) {
323 		mem_heap_free(trx->lock_heap);
324 	}
325 
326 	ut_a(UT_LIST_GET_LEN(trx->trx_locks) == 0);
327 
328 	if (trx->global_read_view_heap) {
329 		mem_heap_free(trx->global_read_view_heap);
330 	}
331 
332 	trx->global_read_view = NULL;
333 
334 	ut_a(trx->read_view == NULL);
335 
336 	ut_a(ib_vector_is_empty(trx->autoinc_locks));
337 	/* We allocated a dedicated heap for the vector. */
338 	ib_vector_free(trx->autoinc_locks);
339 
340 	mem_free(trx);
341 }
342 
343 /********************************************************************//**
344 At shutdown, frees a transaction object that is in the PREPARED state. */
345 UNIV_INTERN
346 void
trx_free_prepared(trx_t * trx)347 trx_free_prepared(
348 /*==============*/
349 	trx_t*	trx)	/*!< in, own: trx object */
350 {
351 	ut_ad(mutex_own(&kernel_mutex));
352 	ut_a(trx->conc_state == TRX_PREPARED);
353 	ut_a(trx->magic_n == TRX_MAGIC_N);
354 
355 	/* Prepared transactions are sort of active; they allow
356 	ROLLBACK and COMMIT operations. Because the system does not
357 	contain any other transactions than prepared transactions at
358 	the shutdown stage and because a transaction cannot become
359 	PREPARED while holding locks, it is safe to release the locks
360 	held by PREPARED transactions here at shutdown.*/
361 	lock_release_off_kernel(trx);
362 
363 	trx_undo_free_prepared(trx);
364 
365 	mutex_free(&trx->undo_mutex);
366 
367 	if (trx->undo_no_arr) {
368 		trx_undo_arr_free(trx->undo_no_arr);
369 	}
370 
371 	ut_a(UT_LIST_GET_LEN(trx->signals) == 0);
372 	ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0);
373 
374 	ut_a(trx->wait_lock == NULL);
375 	ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
376 
377 	ut_a(!trx->has_search_latch);
378 
379 	ut_a(trx->dict_operation_lock_mode == 0);
380 
381 	if (trx->lock_heap) {
382 		mem_heap_free(trx->lock_heap);
383 	}
384 
385 	if (trx->global_read_view_heap) {
386 		mem_heap_free(trx->global_read_view_heap);
387 	}
388 
389 	ut_a(ib_vector_is_empty(trx->autoinc_locks));
390 	ib_vector_free(trx->autoinc_locks);
391 
392 	UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
393 
394 	mem_free(trx);
395 }
396 
397 /********************************************************************//**
398 Frees a transaction object for MySQL. */
399 UNIV_INTERN
400 void
trx_free_for_mysql(trx_t * trx)401 trx_free_for_mysql(
402 /*===============*/
403 	trx_t*	trx)	/*!< in, own: trx object */
404 {
405 	mutex_enter(&kernel_mutex);
406 
407 	UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx);
408 
409 	trx_free(trx);
410 
411 	ut_a(trx_n_mysql_transactions > 0);
412 
413 	trx_n_mysql_transactions--;
414 
415 	mutex_exit(&kernel_mutex);
416 }
417 
418 /********************************************************************//**
419 Frees a transaction object of a background operation of the master thread. */
420 UNIV_INTERN
421 void
trx_free_for_background(trx_t * trx)422 trx_free_for_background(
423 /*====================*/
424 	trx_t*	trx)	/*!< in, own: trx object */
425 {
426 	mutex_enter(&kernel_mutex);
427 
428 	trx_free(trx);
429 
430 	mutex_exit(&kernel_mutex);
431 }
432 
433 /****************************************************************//**
434 Inserts the trx handle in the trx system trx list in the right position.
435 The list is sorted on the trx id so that the biggest id is at the list
436 start. This function is used at the database startup to insert incomplete
437 transactions to the list. */
438 static
439 void
trx_list_insert_ordered(trx_t * trx)440 trx_list_insert_ordered(
441 /*====================*/
442 	trx_t*	trx)	/*!< in: trx handle */
443 {
444 	trx_t*	trx2;
445 
446 	ut_ad(mutex_own(&kernel_mutex));
447 
448 	trx2 = UT_LIST_GET_FIRST(trx_sys->trx_list);
449 
450 	while (trx2 != NULL) {
451 		if (trx->id >= trx2->id) {
452 
453 			ut_ad(trx->id > trx2->id);
454 			break;
455 		}
456 		trx2 = UT_LIST_GET_NEXT(trx_list, trx2);
457 	}
458 
459 	if (trx2 != NULL) {
460 		trx2 = UT_LIST_GET_PREV(trx_list, trx2);
461 
462 		if (trx2 == NULL) {
463 			UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
464 		} else {
465 			UT_LIST_INSERT_AFTER(trx_list, trx_sys->trx_list,
466 					     trx2, trx);
467 		}
468 	} else {
469 		UT_LIST_ADD_LAST(trx_list, trx_sys->trx_list, trx);
470 	}
471 }
472 
473 /****************************************************************//**
474 Creates trx objects for transactions and initializes the trx list of
475 trx_sys at database start. Rollback segment and undo log lists must
476 already exist when this function is called, because the lists of
477 transactions to be rolled back or cleaned up are built based on the
478 undo log lists. */
479 UNIV_INTERN
480 void
trx_lists_init_at_db_start(void)481 trx_lists_init_at_db_start(void)
482 /*============================*/
483 {
484 	trx_rseg_t*	rseg;
485 	trx_undo_t*	undo;
486 	trx_t*		trx;
487 
488 	ut_ad(mutex_own(&kernel_mutex));
489 	UT_LIST_INIT(trx_sys->trx_list);
490 
491 	/* Look from the rollback segments if there exist undo logs for
492 	transactions */
493 
494 	rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
495 
496 	while (rseg != NULL) {
497 		undo = UT_LIST_GET_FIRST(rseg->insert_undo_list);
498 
499 		while (undo != NULL) {
500 
501 			trx = trx_create(trx_dummy_sess);
502 
503 			trx->is_recovered = TRUE;
504 			trx->id = undo->trx_id;
505 			trx->xid = undo->xid;
506 			trx->insert_undo = undo;
507 			trx->rseg = rseg;
508 
509 			if (undo->state != TRX_UNDO_ACTIVE) {
510 
511 				/* Prepared transactions are left in
512 				the prepared state waiting for a
513 				commit or abort decision from MySQL */
514 
515 				if (undo->state == TRX_UNDO_PREPARED) {
516 
517 					fprintf(stderr,
518 						"InnoDB: Transaction "
519 						TRX_ID_FMT
520 						" was in the"
521 						" XA prepared state.\n",
522 						(ullint) trx->id);
523 
524 					if (srv_force_recovery == 0) {
525 
526 						trx->conc_state = TRX_PREPARED;
527 						trx_n_prepared++;
528 					} else {
529 						fprintf(stderr,
530 							"InnoDB: Since"
531 							" innodb_force_recovery"
532 							" > 0, we will"
533 							" rollback it"
534 							" anyway.\n");
535 
536 						trx->conc_state = TRX_ACTIVE;
537 					}
538 				} else {
539 					trx->conc_state
540 						= TRX_COMMITTED_IN_MEMORY;
541 				}
542 
543 				/* We give a dummy value for the trx no;
544 				this should have no relevance since purge
545 				is not interested in committed transaction
546 				numbers, unless they are in the history
547 				list, in which case it looks the number
548 				from the disk based undo log structure */
549 
550 				trx->no = trx->id;
551 			} else {
552 				trx->conc_state = TRX_ACTIVE;
553 
554 				/* A running transaction always has the number
555 				field inited to IB_ULONGLONG_MAX */
556 
557 				trx->no = IB_ULONGLONG_MAX;
558 			}
559 
560 			if (undo->dict_operation) {
561 				trx_set_dict_operation(
562 					trx, TRX_DICT_OP_TABLE);
563 				trx->table_id = undo->table_id;
564 			}
565 
566 			if (!undo->empty) {
567 				trx->undo_no = undo->top_undo_no + 1;
568 			}
569 
570 			trx_list_insert_ordered(trx);
571 
572 			undo = UT_LIST_GET_NEXT(undo_list, undo);
573 		}
574 
575 		undo = UT_LIST_GET_FIRST(rseg->update_undo_list);
576 
577 		while (undo != NULL) {
578 			trx = trx_get_on_id(undo->trx_id);
579 
580 			if (NULL == trx) {
581 				trx = trx_create(trx_dummy_sess);
582 
583 				trx->is_recovered = TRUE;
584 				trx->id = undo->trx_id;
585 				trx->xid = undo->xid;
586 
587 				if (undo->state != TRX_UNDO_ACTIVE) {
588 
589 					/* Prepared transactions are left in
590 					the prepared state waiting for a
591 					commit or abort decision from MySQL */
592 
593 					if (undo->state == TRX_UNDO_PREPARED) {
594 						fprintf(stderr,
595 							"InnoDB: Transaction "
596 							TRX_ID_FMT " was in the"
597 							" XA prepared state.\n",
598 							(ullint) trx->id);
599 
600 						if (srv_force_recovery == 0) {
601 
602 							trx->conc_state
603 								= TRX_PREPARED;
604 							trx_n_prepared++;
605 						} else {
606 							fprintf(stderr,
607 								"InnoDB: Since"
608 								" innodb_force_recovery"
609 								" > 0, we will"
610 								" rollback it"
611 								" anyway.\n");
612 
613 							trx->conc_state
614 								= TRX_ACTIVE;
615 						}
616 					} else {
617 						trx->conc_state
618 							= TRX_COMMITTED_IN_MEMORY;
619 					}
620 
621 					/* We give a dummy value for the trx
622 					number */
623 
624 					trx->no = trx->id;
625 				} else {
626 					trx->conc_state = TRX_ACTIVE;
627 
628 					/* A running transaction always has
629 					the number field inited to
630 					IB_ULONGLONG_MAX */
631 
632 					trx->no = IB_ULONGLONG_MAX;
633 				}
634 
635 				trx->rseg = rseg;
636 				trx_list_insert_ordered(trx);
637 
638 				if (undo->dict_operation) {
639 					trx_set_dict_operation(
640 						trx, TRX_DICT_OP_TABLE);
641 					trx->table_id = undo->table_id;
642 				}
643 			}
644 
645 			trx->update_undo = undo;
646 
647 			if ((!undo->empty)
648 			    && undo->top_undo_no >= trx->undo_no) {
649 
650 				trx->undo_no = undo->top_undo_no + 1;
651 			}
652 
653 			undo = UT_LIST_GET_NEXT(undo_list, undo);
654 		}
655 
656 		rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
657 	}
658 }
659 
660 /******************************************************************//**
661 Assigns a rollback segment to a transaction in a round-robin fashion.
662 @return	assigned rollback segment instance */
663 UNIV_INLINE
664 trx_rseg_t*
trx_assign_rseg(ulint max_undo_logs)665 trx_assign_rseg(
666 /*============*/
667 	ulint	max_undo_logs)	/*!< in: maximum number of UNDO logs to use */
668 {
669 	trx_rseg_t*	rseg = trx_sys->latest_rseg;
670 
671 	ut_ad(mutex_own(&kernel_mutex));
672 
673 	rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
674 
675 	if (rseg == NULL || rseg->id == max_undo_logs - 1) {
676 		rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
677 	}
678 
679 	trx_sys->latest_rseg = rseg;
680 
681 	return(rseg);
682 }
683 
684 /****************************************************************//**
685 Starts a new transaction.
686 @return	TRUE */
687 UNIV_INTERN
688 ibool
trx_start_low(trx_t * trx,ulint rseg_id)689 trx_start_low(
690 /*==========*/
691 	trx_t*	trx,	/*!< in: transaction */
692 	ulint	rseg_id)/*!< in: rollback segment id; if ULINT_UNDEFINED
693 			is passed, the system chooses the rollback segment
694 			automatically in a round-robin fashion */
695 {
696 	trx_rseg_t*	rseg;
697 
698 	ut_ad(mutex_own(&kernel_mutex));
699 	ut_ad(trx->rseg == NULL);
700 
701 	if (trx->is_purge) {
702 		trx->id = 0;
703 		trx->conc_state = TRX_ACTIVE;
704 		trx->start_time = time(NULL);
705 
706 		return(TRUE);
707 	}
708 
709 	ut_ad(trx->conc_state != TRX_ACTIVE);
710 
711 	ut_a(rseg_id == ULINT_UNDEFINED);
712 
713 	rseg = trx_assign_rseg(srv_rollback_segments);
714 
715 	trx->id = trx_sys_get_new_trx_id();
716 
717 	/* The initial value for trx->no: IB_ULONGLONG_MAX is used in
718 	read_view_open_now: */
719 
720 	trx->no = IB_ULONGLONG_MAX;
721 
722 	trx->rseg = rseg;
723 
724 	trx->conc_state = TRX_ACTIVE;
725 	trx->start_time = time(NULL);
726 
727 	UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx);
728 
729 	return(TRUE);
730 }
731 
732 /****************************************************************//**
733 Starts a new transaction.
734 @return	TRUE */
735 UNIV_INTERN
736 ibool
trx_start(trx_t * trx,ulint rseg_id)737 trx_start(
738 /*======*/
739 	trx_t*	trx,	/*!< in: transaction */
740 	ulint	rseg_id)/*!< in: rollback segment id; if ULINT_UNDEFINED
741 			is passed, the system chooses the rollback segment
742 			automatically in a round-robin fashion */
743 {
744 	ibool	ret;
745 
746 	/* Update the info whether we should skip XA steps that eat CPU time
747 	For the duration of the transaction trx->support_xa is not reread
748 	from thd so any changes in the value take effect in the next
749 	transaction. This is to avoid a scenario where some undo
750 	generated by a transaction, has XA stuff, and other undo,
751 	generated by the same transaction, doesn't. */
752 	trx->support_xa = thd_supports_xa(trx->mysql_thd);
753 
754 	mutex_enter(&kernel_mutex);
755 
756 	ret = trx_start_low(trx, rseg_id);
757 
758 	mutex_exit(&kernel_mutex);
759 
760 	return(ret);
761 }
762 
763 /****************************************************************//**
764 Set the transaction serialisation number. */
765 static
766 void
trx_serialisation_number_get(trx_t * trx)767 trx_serialisation_number_get(
768 /*=========================*/
769 	trx_t*		trx)	/*!< in: transaction */
770 {
771 	trx_rseg_t*	rseg;
772 
773 	rseg = trx->rseg;
774 
775 	ut_ad(mutex_own(&rseg->mutex));
776 
777 	mutex_enter(&kernel_mutex);
778 
779 	trx->no = trx_sys_get_new_trx_id();
780 
781 	/* If the rollack segment is not empty then the
782 	new trx_t::no can't be less than any trx_t::no
783 	already in the rollback segment. User threads only
784 	produce events when a rollback segment is empty. */
785 
786 	if (rseg->last_page_no == FIL_NULL) {
787 		void*		ptr;
788 		rseg_queue_t	rseg_queue;
789 
790 		rseg_queue.rseg = rseg;
791 		rseg_queue.trx_no = trx->no;
792 
793 		mutex_enter(&purge_sys->bh_mutex);
794 
795 		/* This is to reduce the pressure on the kernel mutex,
796 		though in reality it should make very little (read no)
797 		difference because this code path is only taken when the
798 		rbs is empty. */
799 
800 		mutex_exit(&kernel_mutex);
801 
802 		ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
803 		ut_a(ptr);
804 
805 		mutex_exit(&purge_sys->bh_mutex);
806 	} else {
807 		mutex_exit(&kernel_mutex);
808 	}
809 }
810 
811 /****************************************************************//**
812 Assign the transaction its history serialisation number and write the
813 update UNDO log record to the assigned rollback segment.
814 @return the LSN of the UNDO log write. */
815 static
816 ib_uint64_t
trx_write_serialisation_history(trx_t * trx)817 trx_write_serialisation_history(
818 /*============================*/
819 	trx_t*		trx)	/*!< in: transaction */
820 {
821 	mtr_t		mtr;
822 	trx_rseg_t*	rseg;
823 
824 	ut_ad(!mutex_own(&kernel_mutex));
825 
826 	rseg = trx->rseg;
827 
828 	mtr_start(&mtr);
829 
830 	/* Change the undo log segment states from TRX_UNDO_ACTIVE
831 	to some other state: these modifications to the file data
832 	structure define the transaction as committed in the file
833 	based domain, at the serialization point of the log sequence
834 	number lsn obtained below. */
835 
836 	if (trx->update_undo != NULL) {
837 		page_t*		undo_hdr_page;
838 		trx_undo_t*	undo = trx->update_undo;
839 
840 		/* We have to hold the rseg mutex because update
841 		log headers have to be put to the history list in the
842 		(serialisation) order of the UNDO trx number. This is
843 		required for the purge in-memory data structures too. */
844 
845 		mutex_enter(&rseg->mutex);
846 
847 		/* Assign the transaction serialisation number and also
848 		update the purge min binary heap if this is the first
849 		UNDO log being written to the assigned rollback segment. */
850 
851 		trx_serialisation_number_get(trx);
852 
853 		/* It is not necessary to obtain trx->undo_mutex here
854 		because only a single OS thread is allowed to do the
855 		transaction commit for this transaction. */
856 
857 		undo_hdr_page = trx_undo_set_state_at_finish(undo, &mtr);
858 
859 		trx_undo_update_cleanup(trx, undo_hdr_page, &mtr);
860 	} else {
861 		mutex_enter(&rseg->mutex);
862 	}
863 
864 	if (trx->insert_undo != NULL) {
865 		trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
866 	}
867 
868 	mutex_exit(&rseg->mutex);
869 
870 	/* Update the latest MySQL binlog name and offset info
871 	in trx sys header if MySQL binlogging is on or the database
872 	server is a MySQL replication slave */
873 
874 	if (trx->mysql_log_file_name
875 	    && trx->mysql_log_file_name[0] != '\0') {
876 
877 		trx_sys_update_mysql_binlog_offset(
878 			trx->mysql_log_file_name,
879 			trx->mysql_log_offset,
880 			TRX_SYS_MYSQL_LOG_INFO, &mtr);
881 
882 		trx->mysql_log_file_name = NULL;
883 	}
884 
885 	/* The following call commits the mini-transaction, making the
886 	whole transaction committed in the file-based world, at this
887 	log sequence number. The transaction becomes 'durable' when
888 	we write the log to disk, but in the logical sense the commit
889 	in the file-based data structures (undo logs etc.) happens
890 	here.
891 
892 	NOTE that transaction numbers, which are assigned only to
893 	transactions with an update undo log, do not necessarily come
894 	in exactly the same order as commit lsn's, if the transactions
895 	have different rollback segments. To get exactly the same
896 	order we should hold the kernel mutex up to this point,
897 	adding to the contention of the kernel mutex. However, if
898 	a transaction T2 is able to see modifications made by
899 	a transaction T1, T2 will always get a bigger transaction
900 	number and a bigger commit lsn than T1. */
901 
902 	/*--------------*/
903 	mtr_commit(&mtr);
904 	/*--------------*/
905 
906 	return(mtr.end_lsn);
907 }
908 
909 /****************************************************************//**
910 Commits a transaction. */
911 UNIV_INTERN
912 void
trx_commit_off_kernel(trx_t * trx)913 trx_commit_off_kernel(
914 /*==================*/
915 	trx_t*	trx)	/*!< in: transaction */
916 {
917 	ib_uint64_t	lsn;
918 
919 	ut_ad(mutex_own(&kernel_mutex));
920 
921 	trx->must_flush_log_later = FALSE;
922 
923 	/* If the transaction made any updates then we need to write the
924 	UNDO logs for the updates to the assigned rollback segment. */
925 
926 	if (trx->insert_undo != NULL || trx->update_undo != NULL) {
927 		mutex_exit(&kernel_mutex);
928 
929 		lsn = trx_write_serialisation_history(trx);
930 
931 		mutex_enter(&kernel_mutex);
932 	} else {
933 		lsn = 0;
934 	}
935 
936 	ut_ad(trx->conc_state == TRX_ACTIVE || trx->conc_state == TRX_PREPARED);
937 	ut_ad(mutex_own(&kernel_mutex));
938 
939 	if (UNIV_UNLIKELY(trx->conc_state == TRX_PREPARED)) {
940 		ut_a(trx_n_prepared > 0);
941 		trx_n_prepared--;
942 	}
943 
944 	/* The following assignment makes the transaction committed in memory
945 	and makes its changes to data visible to other transactions.
946 	NOTE that there is a small discrepancy from the strict formal
947 	visibility rules here: a human user of the database can see
948 	modifications made by another transaction T even before the necessary
949 	log segment has been flushed to the disk. If the database happens to
950 	crash before the flush, the user has seen modifications from T which
951 	will never be a committed transaction. However, any transaction T2
952 	which sees the modifications of the committing transaction T, and
953 	which also itself makes modifications to the database, will get an lsn
954 	larger than the committing transaction T. In the case where the log
955 	flush fails, and T never gets committed, also T2 will never get
956 	committed. */
957 
958 	/*--------------------------------------*/
959 	trx->conc_state = TRX_COMMITTED_IN_MEMORY;
960 	/*--------------------------------------*/
961 
962 	/* If we release kernel_mutex below and we are still doing
963 	recovery i.e.: back ground rollback thread is still active
964 	then there is a chance that the rollback thread may see
965 	this trx as COMMITTED_IN_MEMORY and goes adhead to clean it
966 	up calling trx_cleanup_at_db_startup(). This can happen
967 	in the case we are committing a trx here that is left in
968 	PREPARED state during the crash. Note that commit of the
969 	rollback of a PREPARED trx happens in the recovery thread
970 	while the rollback of other transactions happen in the
971 	background thread. To avoid this race we unconditionally
972 	unset the is_recovered flag from the trx. */
973 
974 	trx->is_recovered = FALSE;
975 
976 	lock_release_off_kernel(trx);
977 
978 	if (trx->global_read_view) {
979 		read_view_close(trx->global_read_view);
980 		mem_heap_empty(trx->global_read_view_heap);
981 		trx->global_read_view = NULL;
982 	}
983 
984 	trx->read_view = NULL;
985 
986 	if (lsn) {
987 
988 		mutex_exit(&kernel_mutex);
989 
990 		if (trx->insert_undo != NULL) {
991 
992 			trx_undo_insert_cleanup(trx);
993 		}
994 
995 		/* NOTE that we could possibly make a group commit more
996 		efficient here: call os_thread_yield here to allow also other
997 		trxs to come to commit! */
998 
999 		/*-------------------------------------*/
1000 
1001 		/* Depending on the my.cnf options, we may now write the log
1002 		buffer to the log files, making the transaction durable if
1003 		the OS does not crash. We may also flush the log files to
1004 		disk, making the transaction durable also at an OS crash or a
1005 		power outage.
1006 
1007 		The idea in InnoDB's group commit is that a group of
1008 		transactions gather behind a trx doing a physical disk write
1009 		to log files, and when that physical write has been completed,
1010 		one of those transactions does a write which commits the whole
1011 		group. Note that this group commit will only bring benefit if
1012 		there are > 2 users in the database. Then at least 2 users can
1013 		gather behind one doing the physical log write to disk.
1014 
1015 		If we are calling trx_commit() under prepare_commit_mutex, we
1016 		will delay possible log write and flush to a separate function
1017 		trx_commit_complete_for_mysql(), which is only called when the
1018 		thread has released the mutex. This is to make the
1019 		group commit algorithm to work. Otherwise, the prepare_commit
1020 		mutex would serialize all commits and prevent a group of
1021 		transactions from gathering. */
1022 
1023 		if (trx->flush_log_later) {
1024 			/* Do nothing yet */
1025 			trx->must_flush_log_later = TRUE;
1026 		} else if (srv_flush_log_at_trx_commit == 0) {
1027 			/* Do nothing */
1028 		} else if (srv_flush_log_at_trx_commit == 1) {
1029 			if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1030 				/* Write the log but do not flush it to disk */
1031 
1032 				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
1033 						FALSE);
1034 			} else {
1035 				/* Write the log to the log files AND flush
1036 				them to disk */
1037 
1038 				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1039 			}
1040 		} else if (srv_flush_log_at_trx_commit == 2) {
1041 
1042 			/* Write the log but do not flush it to disk */
1043 
1044 			log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
1045 		} else {
1046 			ut_error;
1047 		}
1048 
1049 		trx->commit_lsn = lsn;
1050 
1051 		/*-------------------------------------*/
1052 
1053 		mutex_enter(&kernel_mutex);
1054 	}
1055 
1056 	/* Free all savepoints */
1057 	trx_roll_free_all_savepoints(trx);
1058 
1059 	trx->conc_state = TRX_NOT_STARTED;
1060 	trx->rseg = NULL;
1061 	trx->undo_no = 0;
1062 	trx->last_sql_stat_start.least_undo_no = 0;
1063 
1064 	ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
1065 	ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0);
1066 
1067 	UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
1068 
1069 	trx->error_state = DB_SUCCESS;
1070 }
1071 
1072 /****************************************************************//**
1073 Cleans up a transaction at database startup. The cleanup is needed if
1074 the transaction already got to the middle of a commit when the database
1075 crashed, and we cannot roll it back. */
1076 UNIV_INTERN
1077 void
trx_cleanup_at_db_startup(trx_t * trx)1078 trx_cleanup_at_db_startup(
1079 /*======================*/
1080 	trx_t*	trx)	/*!< in: transaction */
1081 {
1082 	if (trx->insert_undo != NULL) {
1083 
1084 		trx_undo_insert_cleanup(trx);
1085 	}
1086 
1087 	trx->conc_state = TRX_NOT_STARTED;
1088 	trx->rseg = NULL;
1089 	trx->undo_no = 0;
1090 	trx->last_sql_stat_start.least_undo_no = 0;
1091 
1092 	UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
1093 }
1094 
1095 /********************************************************************//**
1096 Assigns a read view for a consistent read query. All the consistent reads
1097 within the same transaction will get the same read view, which is created
1098 when this function is first called for a new started transaction.
1099 @return	consistent read view */
1100 UNIV_INTERN
1101 read_view_t*
trx_assign_read_view(trx_t * trx)1102 trx_assign_read_view(
1103 /*=================*/
1104 	trx_t*	trx)	/*!< in: active transaction */
1105 {
1106 	ut_ad(trx->conc_state == TRX_ACTIVE);
1107 
1108 	if (trx->read_view) {
1109 		return(trx->read_view);
1110 	}
1111 
1112 	mutex_enter(&kernel_mutex);
1113 
1114 	if (!trx->read_view) {
1115 		trx->read_view = read_view_open_now(
1116 			trx->id, trx->global_read_view_heap);
1117 		trx->global_read_view = trx->read_view;
1118 	}
1119 
1120 	mutex_exit(&kernel_mutex);
1121 
1122 	return(trx->read_view);
1123 }
1124 
1125 /****************************************************************//**
1126 Commits a transaction. NOTE that the kernel mutex is temporarily released. */
1127 static
1128 void
trx_handle_commit_sig_off_kernel(trx_t * trx,que_thr_t ** next_thr)1129 trx_handle_commit_sig_off_kernel(
1130 /*=============================*/
1131 	trx_t*		trx,		/*!< in: transaction */
1132 	que_thr_t**	next_thr)	/*!< in/out: next query thread to run;
1133 					if the value which is passed in is
1134 					a pointer to a NULL pointer, then the
1135 					calling function can start running
1136 					a new query thread */
1137 {
1138 	trx_sig_t*	sig;
1139 	trx_sig_t*	next_sig;
1140 
1141 	ut_ad(mutex_own(&kernel_mutex));
1142 
1143 	trx->que_state = TRX_QUE_COMMITTING;
1144 
1145 	trx_commit_off_kernel(trx);
1146 
1147 	ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0);
1148 
1149 	/* Remove all TRX_SIG_COMMIT signals from the signal queue and send
1150 	reply messages to them */
1151 
1152 	sig = UT_LIST_GET_FIRST(trx->signals);
1153 
1154 	while (sig != NULL) {
1155 		next_sig = UT_LIST_GET_NEXT(signals, sig);
1156 
1157 		if (sig->type == TRX_SIG_COMMIT) {
1158 
1159 			trx_sig_reply(sig, next_thr);
1160 			trx_sig_remove(trx, sig);
1161 		}
1162 
1163 		sig = next_sig;
1164 	}
1165 
1166 	trx->que_state = TRX_QUE_RUNNING;
1167 }
1168 
1169 /***********************************************************//**
1170 The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to
1171 the TRX_QUE_RUNNING state and releases query threads which were
1172 waiting for a lock in the wait_thrs list. */
1173 UNIV_INTERN
1174 void
trx_end_lock_wait(trx_t * trx)1175 trx_end_lock_wait(
1176 /*==============*/
1177 	trx_t*	trx)	/*!< in: transaction */
1178 {
1179 	que_thr_t*	thr;
1180 
1181 	ut_ad(mutex_own(&kernel_mutex));
1182 	ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
1183 
1184 	thr = UT_LIST_GET_FIRST(trx->wait_thrs);
1185 
1186 	while (thr != NULL) {
1187 		que_thr_end_wait_no_next_thr(thr);
1188 
1189 		UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
1190 
1191 		thr = UT_LIST_GET_FIRST(trx->wait_thrs);
1192 	}
1193 
1194 	trx->que_state = TRX_QUE_RUNNING;
1195 }
1196 
1197 /***********************************************************//**
1198 Moves the query threads in the lock wait list to the SUSPENDED state and puts
1199 the transaction to the TRX_QUE_RUNNING state. */
1200 static
1201 void
trx_lock_wait_to_suspended(trx_t * trx)1202 trx_lock_wait_to_suspended(
1203 /*=======================*/
1204 	trx_t*	trx)	/*!< in: transaction in the TRX_QUE_LOCK_WAIT state */
1205 {
1206 	que_thr_t*	thr;
1207 
1208 	ut_ad(mutex_own(&kernel_mutex));
1209 	ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT);
1210 
1211 	thr = UT_LIST_GET_FIRST(trx->wait_thrs);
1212 
1213 	while (thr != NULL) {
1214 		thr->state = QUE_THR_SUSPENDED;
1215 
1216 		UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr);
1217 
1218 		thr = UT_LIST_GET_FIRST(trx->wait_thrs);
1219 	}
1220 
1221 	trx->que_state = TRX_QUE_RUNNING;
1222 }
1223 
1224 /***********************************************************//**
1225 Moves the query threads in the sig reply wait list of trx to the SUSPENDED
1226 state. */
1227 static
1228 void
trx_sig_reply_wait_to_suspended(trx_t * trx)1229 trx_sig_reply_wait_to_suspended(
1230 /*============================*/
1231 	trx_t*	trx)	/*!< in: transaction */
1232 {
1233 	trx_sig_t*	sig;
1234 	que_thr_t*	thr;
1235 
1236 	ut_ad(mutex_own(&kernel_mutex));
1237 
1238 	sig = UT_LIST_GET_FIRST(trx->reply_signals);
1239 
1240 	while (sig != NULL) {
1241 		thr = sig->receiver;
1242 
1243 		ut_ad(thr->state == QUE_THR_SIG_REPLY_WAIT);
1244 
1245 		thr->state = QUE_THR_SUSPENDED;
1246 
1247 		sig->receiver = NULL;
1248 
1249 		UT_LIST_REMOVE(reply_signals, trx->reply_signals, sig);
1250 
1251 		sig = UT_LIST_GET_FIRST(trx->reply_signals);
1252 	}
1253 }
1254 
1255 /*****************************************************************//**
1256 Checks the compatibility of a new signal with the other signals in the
1257 queue.
1258 @return	TRUE if the signal can be queued */
1259 static
1260 ibool
trx_sig_is_compatible(trx_t * trx,ulint type,ulint sender)1261 trx_sig_is_compatible(
1262 /*==================*/
1263 	trx_t*	trx,	/*!< in: trx handle */
1264 	ulint	type,	/*!< in: signal type */
1265 	ulint	sender)	/*!< in: TRX_SIG_SELF or TRX_SIG_OTHER_SESS */
1266 {
1267 	trx_sig_t*	sig;
1268 
1269 	ut_ad(mutex_own(&kernel_mutex));
1270 
1271 	if (UT_LIST_GET_LEN(trx->signals) == 0) {
1272 
1273 		return(TRUE);
1274 	}
1275 
1276 	if (sender == TRX_SIG_SELF) {
1277 		if (type == TRX_SIG_ERROR_OCCURRED) {
1278 
1279 			return(TRUE);
1280 
1281 		} else if (type == TRX_SIG_BREAK_EXECUTION) {
1282 
1283 			return(TRUE);
1284 		} else {
1285 			return(FALSE);
1286 		}
1287 	}
1288 
1289 	ut_ad(sender == TRX_SIG_OTHER_SESS);
1290 
1291 	sig = UT_LIST_GET_FIRST(trx->signals);
1292 
1293 	if (type == TRX_SIG_COMMIT) {
1294 		while (sig != NULL) {
1295 
1296 			if (sig->type == TRX_SIG_TOTAL_ROLLBACK) {
1297 
1298 				return(FALSE);
1299 			}
1300 
1301 			sig = UT_LIST_GET_NEXT(signals, sig);
1302 		}
1303 
1304 		return(TRUE);
1305 
1306 	} else if (type == TRX_SIG_TOTAL_ROLLBACK) {
1307 		while (sig != NULL) {
1308 
1309 			if (sig->type == TRX_SIG_COMMIT) {
1310 
1311 				return(FALSE);
1312 			}
1313 
1314 			sig = UT_LIST_GET_NEXT(signals, sig);
1315 		}
1316 
1317 		return(TRUE);
1318 
1319 	} else if (type == TRX_SIG_BREAK_EXECUTION) {
1320 
1321 		return(TRUE);
1322 	} else {
1323 		ut_error;
1324 
1325 		return(FALSE);
1326 	}
1327 }
1328 
1329 /****************************************************************//**
1330 Sends a signal to a trx object. */
1331 UNIV_INTERN
1332 void
trx_sig_send(trx_t * trx,ulint type,ulint sender,que_thr_t * receiver_thr,trx_savept_t * savept,que_thr_t ** next_thr)1333 trx_sig_send(
1334 /*=========*/
1335 	trx_t*		trx,		/*!< in: trx handle */
1336 	ulint		type,		/*!< in: signal type */
1337 	ulint		sender,		/*!< in: TRX_SIG_SELF or
1338 					TRX_SIG_OTHER_SESS */
1339 	que_thr_t*	receiver_thr,	/*!< in: query thread which wants the
1340 					reply, or NULL; if type is
1341 					TRX_SIG_END_WAIT, this must be NULL */
1342 	trx_savept_t*	savept,		/*!< in: possible rollback savepoint, or
1343 					NULL */
1344 	que_thr_t**	next_thr)	/*!< in/out: next query thread to run;
1345 					if the value which is passed in is
1346 					a pointer to a NULL pointer, then the
1347 					calling function can start running
1348 					a new query thread; if the parameter
1349 					is NULL, it is ignored */
1350 {
1351 	trx_sig_t*	sig;
1352 	trx_t*		receiver_trx;
1353 
1354 	ut_ad(trx);
1355 	ut_ad(mutex_own(&kernel_mutex));
1356 
1357 	if (!trx_sig_is_compatible(trx, type, sender)) {
1358 		/* The signal is not compatible with the other signals in
1359 		the queue: die */
1360 
1361 		ut_error;
1362 	}
1363 
1364 	/* Queue the signal object */
1365 
1366 	if (UT_LIST_GET_LEN(trx->signals) == 0) {
1367 
1368 		/* The signal list is empty: the 'sig' slot must be unused
1369 		(we improve performance a bit by avoiding mem_alloc) */
1370 		sig = &(trx->sig);
1371 	} else {
1372 		/* It might be that the 'sig' slot is unused also in this
1373 		case, but we choose the easy way of using mem_alloc */
1374 
1375 		sig = mem_alloc(sizeof(trx_sig_t));
1376 	}
1377 
1378 	UT_LIST_ADD_LAST(signals, trx->signals, sig);
1379 
1380 	sig->type = type;
1381 	sig->sender = sender;
1382 	sig->receiver = receiver_thr;
1383 
1384 	if (savept) {
1385 		sig->savept = *savept;
1386 	}
1387 
1388 	if (receiver_thr) {
1389 		receiver_trx = thr_get_trx(receiver_thr);
1390 
1391 		UT_LIST_ADD_LAST(reply_signals, receiver_trx->reply_signals,
1392 				 sig);
1393 	}
1394 
1395 	if (trx->sess->state == SESS_ERROR) {
1396 
1397 		trx_sig_reply_wait_to_suspended(trx);
1398 	}
1399 
1400 	if ((sender != TRX_SIG_SELF) || (type == TRX_SIG_BREAK_EXECUTION)) {
1401 		ut_error;
1402 	}
1403 
1404 	/* If there were no other signals ahead in the queue, try to start
1405 	handling of the signal */
1406 
1407 	if (UT_LIST_GET_FIRST(trx->signals) == sig) {
1408 
1409 		trx_sig_start_handle(trx, next_thr);
1410 	}
1411 }
1412 
1413 /****************************************************************//**
1414 Ends signal handling. If the session is in the error state, and
1415 trx->graph_before_signal_handling != NULL, then returns control to the error
1416 handling routine of the graph (currently just returns the control to the
1417 graph root which then will send an error message to the client). */
1418 UNIV_INTERN
1419 void
trx_end_signal_handling(trx_t * trx)1420 trx_end_signal_handling(
1421 /*====================*/
1422 	trx_t*	trx)	/*!< in: trx */
1423 {
1424 	ut_ad(mutex_own(&kernel_mutex));
1425 	ut_ad(trx->handling_signals == TRUE);
1426 
1427 	trx->handling_signals = FALSE;
1428 
1429 	trx->graph = trx->graph_before_signal_handling;
1430 
1431 	if (trx->graph && (trx->sess->state == SESS_ERROR)) {
1432 
1433 		que_fork_error_handle(trx, trx->graph);
1434 	}
1435 }
1436 
1437 /****************************************************************//**
1438 Starts handling of a trx signal. */
1439 UNIV_INTERN
1440 void
trx_sig_start_handle(trx_t * trx,que_thr_t ** next_thr)1441 trx_sig_start_handle(
1442 /*=================*/
1443 	trx_t*		trx,		/*!< in: trx handle */
1444 	que_thr_t**	next_thr)	/*!< in/out: next query thread to run;
1445 					if the value which is passed in is
1446 					a pointer to a NULL pointer, then the
1447 					calling function can start running
1448 					a new query thread; if the parameter
1449 					is NULL, it is ignored */
1450 {
1451 	trx_sig_t*	sig;
1452 	ulint		type;
1453 loop:
1454 	/* We loop in this function body as long as there are queued signals
1455 	we can process immediately */
1456 
1457 	ut_ad(trx);
1458 	ut_ad(mutex_own(&kernel_mutex));
1459 
1460 	if (trx->handling_signals && (UT_LIST_GET_LEN(trx->signals) == 0)) {
1461 
1462 		trx_end_signal_handling(trx);
1463 
1464 		return;
1465 	}
1466 
1467 	if (trx->conc_state == TRX_NOT_STARTED) {
1468 
1469 		trx_start_low(trx, ULINT_UNDEFINED);
1470 	}
1471 
1472 	/* If the trx is in a lock wait state, moves the waiting query threads
1473 	to the suspended state */
1474 
1475 	if (trx->que_state == TRX_QUE_LOCK_WAIT) {
1476 
1477 		trx_lock_wait_to_suspended(trx);
1478 	}
1479 
1480 	/* If the session is in the error state and this trx has threads
1481 	waiting for reply from signals, moves these threads to the suspended
1482 	state, canceling wait reservations; note that if the transaction has
1483 	sent a commit or rollback signal to itself, and its session is not in
1484 	the error state, then nothing is done here. */
1485 
1486 	if (trx->sess->state == SESS_ERROR) {
1487 		trx_sig_reply_wait_to_suspended(trx);
1488 	}
1489 
1490 	/* If there are no running query threads, we can start processing of a
1491 	signal, otherwise we have to wait until all query threads of this
1492 	transaction are aware of the arrival of the signal. */
1493 
1494 	if (trx->n_active_thrs > 0) {
1495 
1496 		return;
1497 	}
1498 
1499 	if (trx->handling_signals == FALSE) {
1500 		trx->graph_before_signal_handling = trx->graph;
1501 
1502 		trx->handling_signals = TRUE;
1503 	}
1504 
1505 	sig = UT_LIST_GET_FIRST(trx->signals);
1506 	type = sig->type;
1507 
1508 	if (type == TRX_SIG_COMMIT) {
1509 
1510 		trx_handle_commit_sig_off_kernel(trx, next_thr);
1511 
1512 	} else if ((type == TRX_SIG_TOTAL_ROLLBACK)
1513 		   || (type == TRX_SIG_ROLLBACK_TO_SAVEPT)) {
1514 
1515 		trx_rollback(trx, sig, next_thr);
1516 
1517 		/* No further signals can be handled until the rollback
1518 		completes, therefore we return */
1519 
1520 		return;
1521 
1522 	} else if (type == TRX_SIG_ERROR_OCCURRED) {
1523 
1524 		trx_rollback(trx, sig, next_thr);
1525 
1526 		/* No further signals can be handled until the rollback
1527 		completes, therefore we return */
1528 
1529 		return;
1530 
1531 	} else if (type == TRX_SIG_BREAK_EXECUTION) {
1532 
1533 		trx_sig_reply(sig, next_thr);
1534 		trx_sig_remove(trx, sig);
1535 	} else {
1536 		ut_error;
1537 	}
1538 
1539 	goto loop;
1540 }
1541 
1542 /****************************************************************//**
1543 Send the reply message when a signal in the queue of the trx has been
1544 handled. */
1545 UNIV_INTERN
1546 void
trx_sig_reply(trx_sig_t * sig,que_thr_t ** next_thr)1547 trx_sig_reply(
1548 /*==========*/
1549 	trx_sig_t*	sig,		/*!< in: signal */
1550 	que_thr_t**	next_thr)	/*!< in/out: next query thread to run;
1551 					if the value which is passed in is
1552 					a pointer to a NULL pointer, then the
1553 					calling function can start running
1554 					a new query thread */
1555 {
1556 	trx_t*	receiver_trx;
1557 
1558 	ut_ad(sig);
1559 	ut_ad(mutex_own(&kernel_mutex));
1560 
1561 	if (sig->receiver != NULL) {
1562 		ut_ad((sig->receiver)->state == QUE_THR_SIG_REPLY_WAIT);
1563 
1564 		receiver_trx = thr_get_trx(sig->receiver);
1565 
1566 		UT_LIST_REMOVE(reply_signals, receiver_trx->reply_signals,
1567 			       sig);
1568 		ut_ad(receiver_trx->sess->state != SESS_ERROR);
1569 
1570 		que_thr_end_wait(sig->receiver, next_thr);
1571 
1572 		sig->receiver = NULL;
1573 
1574 	}
1575 }
1576 
1577 /****************************************************************//**
1578 Removes a signal object from the trx signal queue. */
1579 UNIV_INTERN
1580 void
trx_sig_remove(trx_t * trx,trx_sig_t * sig)1581 trx_sig_remove(
1582 /*===========*/
1583 	trx_t*		trx,	/*!< in: trx handle */
1584 	trx_sig_t*	sig)	/*!< in, own: signal */
1585 {
1586 	ut_ad(trx && sig);
1587 	ut_ad(mutex_own(&kernel_mutex));
1588 
1589 	ut_ad(sig->receiver == NULL);
1590 
1591 	UT_LIST_REMOVE(signals, trx->signals, sig);
1592 	sig->type = 0;	/* reset the field to catch possible bugs */
1593 
1594 	if (sig != &(trx->sig)) {
1595 		mem_free(sig);
1596 	}
1597 }
1598 
1599 /*********************************************************************//**
1600 Creates a commit command node struct.
1601 @return	own: commit node struct */
1602 UNIV_INTERN
1603 commit_node_t*
commit_node_create(mem_heap_t * heap)1604 commit_node_create(
1605 /*===============*/
1606 	mem_heap_t*	heap)	/*!< in: mem heap where created */
1607 {
1608 	commit_node_t*	node;
1609 
1610 	node = mem_heap_alloc(heap, sizeof(commit_node_t));
1611 	node->common.type  = QUE_NODE_COMMIT;
1612 	node->state = COMMIT_NODE_SEND;
1613 
1614 	return(node);
1615 }
1616 
1617 /***********************************************************//**
1618 Performs an execution step for a commit type node in a query graph.
1619 @return	query thread to run next, or NULL */
1620 UNIV_INTERN
1621 que_thr_t*
trx_commit_step(que_thr_t * thr)1622 trx_commit_step(
1623 /*============*/
1624 	que_thr_t*	thr)	/*!< in: query thread */
1625 {
1626 	commit_node_t*	node;
1627 	que_thr_t*	next_thr;
1628 
1629 	node = thr->run_node;
1630 
1631 	ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT);
1632 
1633 	if (thr->prev_node == que_node_get_parent(node)) {
1634 		node->state = COMMIT_NODE_SEND;
1635 	}
1636 
1637 	if (node->state == COMMIT_NODE_SEND) {
1638 		mutex_enter(&kernel_mutex);
1639 
1640 		node->state = COMMIT_NODE_WAIT;
1641 
1642 		next_thr = NULL;
1643 
1644 		thr->state = QUE_THR_SIG_REPLY_WAIT;
1645 
1646 		/* Send the commit signal to the transaction */
1647 
1648 		trx_sig_send(thr_get_trx(thr), TRX_SIG_COMMIT, TRX_SIG_SELF,
1649 			     thr, NULL, &next_thr);
1650 
1651 		mutex_exit(&kernel_mutex);
1652 
1653 		return(next_thr);
1654 	}
1655 
1656 	ut_ad(node->state == COMMIT_NODE_WAIT);
1657 
1658 	node->state = COMMIT_NODE_SEND;
1659 
1660 	thr->run_node = que_node_get_parent(node);
1661 
1662 	return(thr);
1663 }
1664 
1665 /**********************************************************************//**
1666 Does the transaction commit for MySQL.
1667 @return	DB_SUCCESS or error number */
1668 UNIV_INTERN
1669 ulint
trx_commit_for_mysql(trx_t * trx)1670 trx_commit_for_mysql(
1671 /*=================*/
1672 	trx_t*	trx)	/*!< in: trx handle */
1673 {
1674 	/* Because we do not do the commit by sending an Innobase
1675 	sig to the transaction, we must here make sure that trx has been
1676 	started. */
1677 
1678 	ut_a(trx);
1679 
1680 	trx_start_if_not_started(trx);
1681 
1682 	trx->op_info = "committing";
1683 
1684 	mutex_enter(&kernel_mutex);
1685 
1686 	trx_commit_off_kernel(trx);
1687 
1688 	mutex_exit(&kernel_mutex);
1689 
1690 	trx->op_info = "";
1691 
1692 	return(DB_SUCCESS);
1693 }
1694 
1695 /**********************************************************************//**
1696 If required, flushes the log to disk if we called trx_commit_for_mysql()
1697 with trx->flush_log_later == TRUE.
1698 @return	0 or error number */
1699 UNIV_INTERN
1700 ulint
trx_commit_complete_for_mysql(trx_t * trx)1701 trx_commit_complete_for_mysql(
1702 /*==========================*/
1703 	trx_t*	trx)	/*!< in: trx handle */
1704 {
1705 	ib_uint64_t	lsn	= trx->commit_lsn;
1706 
1707 	ut_a(trx);
1708 
1709 	trx->op_info = "flushing log";
1710 
1711 	if (!trx->must_flush_log_later) {
1712 		/* Do nothing */
1713 	} else if (srv_flush_log_at_trx_commit == 0) {
1714 		/* Do nothing */
1715 	} else if (srv_flush_log_at_trx_commit == 1) {
1716 		if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1717 			/* Write the log but do not flush it to disk */
1718 
1719 			log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
1720 		} else {
1721 			/* Write the log to the log files AND flush them to
1722 			disk */
1723 
1724 			log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
1725 		}
1726 	} else if (srv_flush_log_at_trx_commit == 2) {
1727 
1728 		/* Write the log but do not flush it to disk */
1729 
1730 		log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
1731 	} else {
1732 		ut_error;
1733 	}
1734 
1735 	trx->must_flush_log_later = FALSE;
1736 
1737 	trx->op_info = "";
1738 
1739 	return(0);
1740 }
1741 
1742 /**********************************************************************//**
1743 Marks the latest SQL statement ended. */
1744 UNIV_INTERN
1745 void
trx_mark_sql_stat_end(trx_t * trx)1746 trx_mark_sql_stat_end(
1747 /*==================*/
1748 	trx_t*	trx)	/*!< in: trx handle */
1749 {
1750 	ut_a(trx);
1751 
1752 	if (trx->conc_state == TRX_NOT_STARTED) {
1753 		trx->undo_no = 0;
1754 	}
1755 
1756 	trx->last_sql_stat_start.least_undo_no = trx->undo_no;
1757 }
1758 
1759 /**********************************************************************//**
1760 Prints info about a transaction to the given file. The caller must own the
1761 kernel mutex. */
1762 UNIV_INTERN
1763 void
trx_print(FILE * f,trx_t * trx,ulint max_query_len)1764 trx_print(
1765 /*======*/
1766 	FILE*	f,		/*!< in: output stream */
1767 	trx_t*	trx,		/*!< in: transaction */
1768 	ulint	max_query_len)	/*!< in: max query length to print, or 0 to
1769 				   use the default max length */
1770 {
1771 	ibool	newline;
1772 
1773 	fprintf(f, "TRANSACTION " TRX_ID_FMT, (ullint) trx->id);
1774 
1775 	switch (trx->conc_state) {
1776 	case TRX_NOT_STARTED:
1777 		fputs(", not started", f);
1778 		break;
1779 	case TRX_ACTIVE:
1780 		fprintf(f, ", ACTIVE %lu sec",
1781 			(ulong)difftime(time(NULL), trx->start_time));
1782 		break;
1783 	case TRX_PREPARED:
1784 		fprintf(f, ", ACTIVE (PREPARED) %lu sec",
1785 			(ulong)difftime(time(NULL), trx->start_time));
1786 		break;
1787 	case TRX_COMMITTED_IN_MEMORY:
1788 		fputs(", COMMITTED IN MEMORY", f);
1789 		break;
1790 	default:
1791 		fprintf(f, " state %lu", (ulong) trx->conc_state);
1792 	}
1793 
1794 	if (*trx->op_info) {
1795 		putc(' ', f);
1796 		fputs(trx->op_info, f);
1797 	}
1798 
1799 	if (trx->is_recovered) {
1800 		fputs(" recovered trx", f);
1801 	}
1802 
1803 	if (trx->is_purge) {
1804 		fputs(" purge trx", f);
1805 	}
1806 
1807 	if (trx->declared_to_be_inside_innodb) {
1808 		fprintf(f, ", thread declared inside InnoDB %lu",
1809 			(ulong) trx->n_tickets_to_enter_innodb);
1810 	}
1811 
1812 	putc('\n', f);
1813 
1814 	if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) {
1815 		fprintf(f, "mysql tables in use %lu, locked %lu\n",
1816 			(ulong) trx->n_mysql_tables_in_use,
1817 			(ulong) trx->mysql_n_tables_locked);
1818 	}
1819 
1820 	newline = TRUE;
1821 
1822 	switch (trx->que_state) {
1823 	case TRX_QUE_RUNNING:
1824 		newline = FALSE; break;
1825 	case TRX_QUE_LOCK_WAIT:
1826 		fputs("LOCK WAIT ", f); break;
1827 	case TRX_QUE_ROLLING_BACK:
1828 		fputs("ROLLING BACK ", f); break;
1829 	case TRX_QUE_COMMITTING:
1830 		fputs("COMMITTING ", f); break;
1831 	default:
1832 		fprintf(f, "que state %lu ", (ulong) trx->que_state);
1833 	}
1834 
1835 	if (0 < UT_LIST_GET_LEN(trx->trx_locks)
1836 	    || mem_heap_get_size(trx->lock_heap) > 400) {
1837 		newline = TRUE;
1838 
1839 		fprintf(f, "%lu lock struct(s), heap size %lu,"
1840 			" %lu row lock(s)",
1841 			(ulong) UT_LIST_GET_LEN(trx->trx_locks),
1842 			(ulong) mem_heap_get_size(trx->lock_heap),
1843 			(ulong) lock_number_of_rows_locked(trx));
1844 	}
1845 
1846 	if (trx->has_search_latch) {
1847 		newline = TRUE;
1848 		fputs(", holds adaptive hash latch", f);
1849 	}
1850 
1851 	if (trx->undo_no != 0) {
1852 		newline = TRUE;
1853 		fprintf(f, ", undo log entries %llu",
1854 			(ullint) trx->undo_no);
1855 	}
1856 
1857 	if (newline) {
1858 		putc('\n', f);
1859 	}
1860 
1861 	if (trx->mysql_thd != NULL) {
1862 		innobase_mysql_print_thd(f, trx->mysql_thd, max_query_len);
1863 	}
1864 }
1865 
1866 /*******************************************************************//**
1867 Compares the "weight" (or size) of two transactions. Transactions that
1868 have edited non-transactional tables are considered heavier than ones
1869 that have not.
1870 @return	TRUE if weight(a) >= weight(b) */
1871 UNIV_INTERN
1872 ibool
trx_weight_ge(const trx_t * a,const trx_t * b)1873 trx_weight_ge(
1874 /*==========*/
1875 	const trx_t*	a,	/*!< in: the first transaction to be compared */
1876 	const trx_t*	b)	/*!< in: the second transaction to be compared */
1877 {
1878 	ibool	a_notrans_edit;
1879 	ibool	b_notrans_edit;
1880 
1881 	/* If mysql_thd is NULL for a transaction we assume that it has
1882 	not edited non-transactional tables. */
1883 
1884 	a_notrans_edit = a->mysql_thd != NULL
1885 		&& thd_has_edited_nontrans_tables(a->mysql_thd);
1886 
1887 	b_notrans_edit = b->mysql_thd != NULL
1888 		&& thd_has_edited_nontrans_tables(b->mysql_thd);
1889 
1890 	if (a_notrans_edit != b_notrans_edit) {
1891 
1892 		return(a_notrans_edit);
1893 	}
1894 
1895 	/* Either both had edited non-transactional tables or both had
1896 	not, we fall back to comparing the number of altered/locked
1897 	rows. */
1898 
1899 #if 0
1900 	fprintf(stderr,
1901 		"%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
1902 		__func__,
1903 		a->undo_no, UT_LIST_GET_LEN(a->trx_locks),
1904 		b->undo_no, UT_LIST_GET_LEN(b->trx_locks));
1905 #endif
1906 
1907 	return(TRX_WEIGHT(a) >= TRX_WEIGHT(b));
1908 }
1909 
1910 /****************************************************************//**
1911 Prepares a transaction. */
1912 UNIV_INTERN
1913 void
trx_prepare_off_kernel(trx_t * trx)1914 trx_prepare_off_kernel(
1915 /*===================*/
1916 	trx_t*	trx)	/*!< in: transaction */
1917 {
1918 	trx_rseg_t*	rseg;
1919 	ib_uint64_t	lsn		= 0;
1920 	mtr_t		mtr;
1921 
1922 	ut_ad(mutex_own(&kernel_mutex));
1923 
1924 	rseg = trx->rseg;
1925 
1926 	if (trx->insert_undo != NULL || trx->update_undo != NULL) {
1927 
1928 		mutex_exit(&kernel_mutex);
1929 
1930 		mtr_start(&mtr);
1931 
1932 		/* Change the undo log segment states from TRX_UNDO_ACTIVE
1933 		to TRX_UNDO_PREPARED: these modifications to the file data
1934 		structure define the transaction as prepared in the
1935 		file-based world, at the serialization point of lsn. */
1936 
1937 		mutex_enter(&(rseg->mutex));
1938 
1939 		if (trx->insert_undo != NULL) {
1940 
1941 			/* It is not necessary to obtain trx->undo_mutex here
1942 			because only a single OS thread is allowed to do the
1943 			transaction prepare for this transaction. */
1944 
1945 			trx_undo_set_state_at_prepare(trx, trx->insert_undo,
1946 						      &mtr);
1947 		}
1948 
1949 		if (trx->update_undo) {
1950 			trx_undo_set_state_at_prepare(
1951 				trx, trx->update_undo, &mtr);
1952 		}
1953 
1954 		mutex_exit(&(rseg->mutex));
1955 
1956 		/*--------------*/
1957 		mtr_commit(&mtr);	/* This mtr commit makes the
1958 					transaction prepared in the file-based
1959 					world */
1960 		/*--------------*/
1961 		lsn = mtr.end_lsn;
1962 
1963 		mutex_enter(&kernel_mutex);
1964 	}
1965 
1966 	ut_ad(mutex_own(&kernel_mutex));
1967 
1968 	/*--------------------------------------*/
1969 	trx->conc_state = TRX_PREPARED;
1970 	trx_n_prepared++;
1971 	/*--------------------------------------*/
1972 
1973 	if (lsn) {
1974 		/* Depending on the my.cnf options, we may now write the log
1975 		buffer to the log files, making the prepared state of the
1976 		transaction durable if the OS does not crash. We may also
1977 		flush the log files to disk, making the prepared state of the
1978 		transaction durable also at an OS crash or a power outage.
1979 
1980 		The idea in InnoDB's group prepare is that a group of
1981 		transactions gather behind a trx doing a physical disk write
1982 		to log files, and when that physical write has been completed,
1983 		one of those transactions does a write which prepares the whole
1984 		group. Note that this group prepare will only bring benefit if
1985 		there are > 2 users in the database. Then at least 2 users can
1986 		gather behind one doing the physical log write to disk.
1987 
1988 		TODO: find out if MySQL holds some mutex when calling this.
1989 		That would spoil our group prepare algorithm. */
1990 
1991 		mutex_exit(&kernel_mutex);
1992 
1993 		if (srv_flush_log_at_trx_commit == 0) {
1994 			/* Do nothing */
1995 		} else if (srv_flush_log_at_trx_commit == 1) {
1996 			if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) {
1997 				/* Write the log but do not flush it to disk */
1998 
1999 				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP,
2000 						FALSE);
2001 			} else {
2002 				/* Write the log to the log files AND flush
2003 				them to disk */
2004 
2005 				log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE);
2006 			}
2007 		} else if (srv_flush_log_at_trx_commit == 2) {
2008 
2009 			/* Write the log but do not flush it to disk */
2010 
2011 			log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE);
2012 		} else {
2013 			ut_error;
2014 		}
2015 
2016 		mutex_enter(&kernel_mutex);
2017 	}
2018 }
2019 
2020 /**********************************************************************//**
2021 Does the transaction prepare for MySQL.
2022 @return	0 or error number */
2023 UNIV_INTERN
2024 ulint
trx_prepare_for_mysql(trx_t * trx)2025 trx_prepare_for_mysql(
2026 /*==================*/
2027 	trx_t*	trx)	/*!< in: trx handle */
2028 {
2029 	/* Because we do not do the prepare by sending an Innobase
2030 	sig to the transaction, we must here make sure that trx has been
2031 	started. */
2032 
2033 	ut_a(trx);
2034 
2035 	trx->op_info = "preparing";
2036 
2037 	trx_start_if_not_started(trx);
2038 
2039 	mutex_enter(&kernel_mutex);
2040 
2041 	trx_prepare_off_kernel(trx);
2042 
2043 	mutex_exit(&kernel_mutex);
2044 
2045 	trx->op_info = "";
2046 
2047 	return(0);
2048 }
2049 
2050 /**********************************************************************//**
2051 This function is used to find number of prepared transactions and
2052 their transaction objects for a recovery.
2053 @return	number of prepared transactions stored in xid_list */
2054 UNIV_INTERN
2055 int
trx_recover_for_mysql(XID * xid_list,ulint len)2056 trx_recover_for_mysql(
2057 /*==================*/
2058 	XID*	xid_list,	/*!< in/out: prepared transactions */
2059 	ulint	len)		/*!< in: number of slots in xid_list */
2060 {
2061 	trx_t*	trx;
2062 	ulint	count = 0;
2063 
2064 	ut_ad(xid_list);
2065 	ut_ad(len);
2066 
2067 	/* We should set those transactions which are in the prepared state
2068 	to the xid_list */
2069 
2070 	mutex_enter(&kernel_mutex);
2071 
2072 	trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
2073 
2074 	while (trx) {
2075 		if (trx->conc_state == TRX_PREPARED) {
2076 			xid_list[count] = trx->xid;
2077 
2078 			if (count == 0) {
2079 				ut_print_timestamp(stderr);
2080 				fprintf(stderr,
2081 					"  InnoDB: Starting recovery for"
2082 					" XA transactions...\n");
2083 			}
2084 
2085 			ut_print_timestamp(stderr);
2086 			fprintf(stderr,
2087 				"  InnoDB: Transaction " TRX_ID_FMT " in"
2088 				" prepared state after recovery\n",
2089 				(ullint) trx->id);
2090 
2091 			ut_print_timestamp(stderr);
2092 			fprintf(stderr,
2093 				"  InnoDB: Transaction contains changes"
2094 				" to %llu rows\n",
2095 				(ullint) trx->undo_no);
2096 
2097 			count++;
2098 
2099 			if (count == len) {
2100 				break;
2101 			}
2102 		}
2103 
2104 		trx = UT_LIST_GET_NEXT(trx_list, trx);
2105 	}
2106 
2107 	mutex_exit(&kernel_mutex);
2108 
2109 	if (count > 0){
2110 		ut_print_timestamp(stderr);
2111 		fprintf(stderr,
2112 			"  InnoDB: %lu transactions in prepared state"
2113 			" after recovery\n",
2114 			(ulong) count);
2115 	}
2116 
2117 	return ((int) count);
2118 }
2119 
2120 /*******************************************************************//**
2121 This function is used to find one X/Open XA distributed transaction
2122 which is in the prepared state
2123 @return	trx or NULL; on match, the trx->xid will be invalidated */
2124 UNIV_INTERN
2125 trx_t*
trx_get_trx_by_xid(const XID * xid)2126 trx_get_trx_by_xid(
2127 /*===============*/
2128 	const XID*	xid)	/*!< in: X/Open XA transaction identifier */
2129 {
2130 	trx_t*	trx;
2131 
2132 	if (xid == NULL) {
2133 
2134 		return(NULL);
2135 	}
2136 
2137 	mutex_enter(&kernel_mutex);
2138 
2139 	trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
2140 
2141 	while (trx) {
2142 		/* Compare two X/Open XA transaction id's: their
2143 		length should be the same and binary comparison
2144 		of gtrid_length+bqual_length bytes should be
2145 		the same */
2146 
2147 		if (trx->is_recovered
2148 		    && trx->conc_state == TRX_PREPARED
2149 		    && xid->gtrid_length == trx->xid.gtrid_length
2150 		    && xid->bqual_length == trx->xid.bqual_length
2151 		    && memcmp(xid->data, trx->xid.data,
2152 			      xid->gtrid_length + xid->bqual_length) == 0) {
2153 
2154 			/* Invalidate the XID, so that subsequent calls
2155 			will not find it. */
2156 			memset(&trx->xid, 0, sizeof(trx->xid));
2157 			trx->xid.formatID = -1;
2158 			break;
2159 		}
2160 
2161 		trx = UT_LIST_GET_NEXT(trx_list, trx);
2162 	}
2163 
2164 	mutex_exit(&kernel_mutex);
2165 
2166 	return(trx);
2167 }
2168