1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file lock/lock0lock.cc
29 The transaction lock system
30 
31 Created 5/7/1996 Heikki Tuuri
32 *******************************************************/
33 
34 #define LOCK_MODULE_IMPLEMENTATION
35 
36 #include <mysql/service_thd_engine_lock.h>
37 #include "ha_prototypes.h"
38 
39 #include "lock0lock.h"
40 #include "lock0priv.h"
41 
42 #ifdef UNIV_NONINL
43 #include "lock0lock.ic"
44 #include "lock0priv.ic"
45 #endif
46 
47 #include "dict0mem.h"
48 #include "usr0sess.h"
49 #include "trx0purge.h"
50 #include "trx0sys.h"
51 #include "srv0mon.h"
52 #include "ut0vec.h"
53 #include "btr0btr.h"
54 #include "dict0boot.h"
55 #include "ut0new.h"
56 #include "row0sel.h"
57 #include "row0mysql.h"
58 #include "pars0pars.h"
59 
60 #include <set>
61 #ifdef WITH_WSREP
62 extern my_bool wsrep_debug;
63 extern my_bool wsrep_log_conflicts;
64 #include <wsrep_mysqld.h>
65 #endif
66 
67 /* Flag to enable/disable deadlock detector. */
68 my_bool	innobase_deadlock_detect = TRUE;
69 
70 /** Total number of cached record locks */
71 static const ulint	REC_LOCK_CACHE = 8;
72 
73 /** Maximum record lock size in bytes */
74 static const ulint	REC_LOCK_SIZE = sizeof(ib_lock_t) + 256;
75 
76 /** Total number of cached table locks */
77 static const ulint	TABLE_LOCK_CACHE = 8;
78 
79 /** Size in bytes, of the table lock instance */
80 static const ulint	TABLE_LOCK_SIZE = sizeof(ib_lock_t);
81 
82 /** Deadlock checker. */
83 class DeadlockChecker {
84 public:
85 	/** Checks if a joining lock request results in a deadlock. If
86 	a deadlock is found this function will resolve the deadlock
87 	by choosing a victim transaction and rolling it back. It
88 	will attempt to resolve all deadlocks. The returned transaction
89 	id will be the joining transaction id or 0 if some other
90 	transaction was chosen as a victim and rolled back or no
91 	deadlock found.
92 
93 	@param lock lock the transaction is requesting
94 	@param trx transaction requesting the lock
95 
96 	@return id of transaction chosen as victim or 0 */
97 	static const trx_t* check_and_resolve(
98 		const lock_t*	lock,
99 		trx_t*		trx);
100 
101 private:
102 	/** Do a shallow copy. Default destructor OK.
103 	@param trx the start transaction (start node)
104 	@param wait_lock lock that a transaction wants
105 	@param mark_start visited node counter */
DeadlockChecker(const trx_t * trx,const lock_t * wait_lock,ib_uint64_t mark_start)106 	DeadlockChecker(
107 		const trx_t*	trx,
108 		const lock_t*	wait_lock,
109 		ib_uint64_t	mark_start)
110 		:
111 		m_cost(),
112 		m_start(trx),
113 		m_too_deep(),
114 		m_wait_lock(wait_lock),
115 		m_mark_start(mark_start),
116 		m_n_elems()
117 	{
118 	}
119 
120 	/** Check if the search is too deep. */
is_too_deep() const121 	bool is_too_deep() const
122 	{
123 		return(m_n_elems > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
124 		       || m_cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK);
125 	}
126 
127 	/** Save current state.
128 	@param lock lock to push on the stack.
129 	@param heap_no the heap number to push on the stack.
130 	@return false if stack is full. */
push(const lock_t * lock,ulint heap_no)131 	bool push(const lock_t*	lock, ulint heap_no)
132 	{
133 		ut_ad((lock_get_type_low(lock) & LOCK_REC)
134 		      || (lock_get_type_low(lock) & LOCK_TABLE));
135 
136 		ut_ad(((lock_get_type_low(lock) & LOCK_TABLE) != 0)
137 		      == (heap_no == ULINT_UNDEFINED));
138 
139 		/* Ensure that the stack is bounded. */
140 		if (m_n_elems >= UT_ARR_SIZE(s_states)) {
141 			return(false);
142 		}
143 
144 		state_t&	state = s_states[m_n_elems++];
145 
146 		state.m_lock = lock;
147 		state.m_wait_lock = m_wait_lock;
148 		state.m_heap_no =heap_no;
149 
150 		return(true);
151 	}
152 
153 	/** Restore state.
154 	@param[out] lock current lock
155 	@param[out] heap_no current heap_no */
pop(const lock_t * & lock,ulint & heap_no)156 	void pop(const lock_t*& lock, ulint& heap_no)
157 	{
158 		ut_a(m_n_elems > 0);
159 
160 		const state_t&	state = s_states[--m_n_elems];
161 
162 		lock = state.m_lock;
163 		heap_no = state.m_heap_no;
164 		m_wait_lock = state.m_wait_lock;
165 	}
166 
167 	/** Check whether the node has been visited.
168 	@param lock lock to check
169 	@return true if the node has been visited */
is_visited(const lock_t * lock) const170 	bool is_visited(const lock_t* lock) const
171 	{
172 		return(lock->trx->lock.deadlock_mark > m_mark_start);
173 	}
174 
175 	/** Get the next lock in the queue that is owned by a transaction
176 	whose sub-tree has not already been searched.
177 	Note: "next" here means PREV for table locks.
178 	@param lock Lock in queue
179 	@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
180 	@return next lock or NULL if at end of queue */
181 	const lock_t* get_next_lock(const lock_t* lock, ulint heap_no) const;
182 
183 	/** Get the first lock to search. The search starts from the current
184 	wait_lock. What we are really interested in is an edge from the
185 	current wait_lock's owning transaction to another transaction that has
186 	a lock ahead in the queue. We skip locks where the owning transaction's
187 	sub-tree has already been searched.
188 
189 	Note: The record locks are traversed from the oldest lock to the
190 	latest. For table locks we go from latest to oldest.
191 
192 	For record locks, we first position the iterator on first lock on
193 	the page and then reposition on the actual heap_no. This is required
194 	due to the way the record lock has is implemented.
195 
196 	@param[out] heap_no if rec lock, else ULINT_UNDEFINED.
197 
198 	@return first lock or NULL */
199 	const lock_t* get_first_lock(ulint* heap_no) const;
200 
201 	/** Notify that a deadlock has been detected and print the conflicting
202 	transaction info.
203 	@param lock lock causing deadlock */
204 	void notify(const lock_t* lock) const;
205 
206 	/** Select the victim transaction that should be rolledback.
207 	@return victim transaction */
208 	const trx_t* select_victim() const;
209 
210 	/** Rollback transaction selected as the victim. */
211 	void trx_rollback();
212 
213 	/** Looks iteratively for a deadlock. Note: the joining transaction
214 	may have been granted its lock by the deadlock checks.
215 
216 	@return 0 if no deadlock else the victim transaction.*/
217 	const trx_t* search();
218 
219 	/** Print transaction data to the deadlock file and possibly to stderr.
220 	@param trx transaction
221 	@param max_query_len max query length to print */
222 	static void print(const trx_t* trx, ulint max_query_len);
223 
224 	/** rewind(3) the file used for storing the latest detected deadlock
225 	and print a heading message to stderr if printing of all deadlocks to
226 	stderr is enabled. */
227 	static void start_print();
228 
229 	/** Print lock data to the deadlock file and possibly to stderr.
230 	@param lock record or table type lock */
231 	static void print(const lock_t* lock);
232 
233 	/** Print a message to the deadlock file and possibly to stderr.
234 	@param msg message to print */
235 	static void print(const char* msg);
236 
237 	/** Print info about transaction that was rolled back.
238 	@param trx transaction rolled back
239 	@param lock lock trx wants */
240 	static void rollback_print(const trx_t* trx, const lock_t* lock);
241 
242 private:
243 	/** DFS state information, used during deadlock checking. */
244 	struct state_t {
245 		const lock_t*	m_lock;		/*!< Current lock */
246 		const lock_t*	m_wait_lock;	/*!< Waiting for lock */
247 		ulint		m_heap_no;	/*!< heap number if rec lock */
248 	};
249 
250 	/** Used in deadlock tracking. Protected by lock_sys->mutex. */
251 	static ib_uint64_t	s_lock_mark_counter;
252 
253 	/** Calculation steps thus far. It is the count of the nodes visited. */
254 	ulint			m_cost;
255 
256 	/** Joining transaction that is requesting a lock in an
257 	incompatible mode */
258 	const trx_t*		m_start;
259 
260 	/** TRUE if search was too deep and was aborted */
261 	bool			m_too_deep;
262 
263 	/** Lock that trx wants */
264 	const lock_t*		m_wait_lock;
265 
266 	/**  Value of lock_mark_count at the start of the deadlock check. */
267 	ib_uint64_t		m_mark_start;
268 
269 	/** Number of states pushed onto the stack */
270 	size_t			m_n_elems;
271 
272 	/** This is to avoid malloc/free calls. */
273 	static state_t		s_states[MAX_STACK_SIZE];
274 };
275 
276 /** Counter to mark visited nodes during deadlock search. */
277 ib_uint64_t	DeadlockChecker::s_lock_mark_counter = 0;
278 
279 /** The stack used for deadlock searches. */
280 DeadlockChecker::state_t	DeadlockChecker::s_states[MAX_STACK_SIZE];
281 
282 #ifdef UNIV_DEBUG
283 /*********************************************************************//**
284 Validates the lock system.
285 @return TRUE if ok */
286 static
287 bool
288 lock_validate();
289 /*============*/
290 
291 /*********************************************************************//**
292 Validates the record lock queues on a page.
293 @return TRUE if ok */
294 static
295 ibool
296 lock_rec_validate_page(
297 /*===================*/
298 	const buf_block_t*	block)	/*!< in: buffer block */
299 	MY_ATTRIBUTE((warn_unused_result));
300 #endif /* UNIV_DEBUG */
301 
302 /* The lock system */
303 lock_sys_t*	lock_sys	= NULL;
304 
305 /** We store info on the latest deadlock error to this buffer. InnoDB
306 Monitor will then fetch it and print */
307 bool	lock_deadlock_found = false;
308 
309 /** Only created if !srv_read_only_mode */
310 static FILE*		lock_latest_err_file;
311 
312 /*********************************************************************//**
313 Reports that a transaction id is insensible, i.e., in the future. */
314 void
lock_report_trx_id_insanity(trx_id_t trx_id,const rec_t * rec,dict_index_t * index,const ulint * offsets,trx_id_t max_trx_id)315 lock_report_trx_id_insanity(
316 /*========================*/
317 	trx_id_t	trx_id,		/*!< in: trx id */
318 	const rec_t*	rec,		/*!< in: user record */
319 	dict_index_t*	index,		/*!< in: index */
320 	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
321 	trx_id_t	max_trx_id)	/*!< in: trx_sys_get_max_trx_id() */
322 {
323 	ib::error()
324 		<< "Transaction id " << trx_id
325 		<< " associated with record" << rec_offsets_print(rec, offsets)
326 		<< " in index " << index->name
327 		<< " of table " << index->table->name
328 		<< " is greater than the global counter " << max_trx_id
329 		<< "! The table is corrupted.";
330 }
331 
332 /*********************************************************************//**
333 Checks that a transaction id is sensible, i.e., not in the future.
334 @return true if ok */
335 #ifdef UNIV_DEBUG
336 
337 #else
338 static MY_ATTRIBUTE((warn_unused_result))
339 #endif
340 bool
lock_check_trx_id_sanity(trx_id_t trx_id,const rec_t * rec,dict_index_t * index,const ulint * offsets)341 lock_check_trx_id_sanity(
342 /*=====================*/
343 	trx_id_t	trx_id,		/*!< in: trx id */
344 	const rec_t*	rec,		/*!< in: user record */
345 	dict_index_t*	index,		/*!< in: index */
346 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
347 {
348 	ut_ad(rec_offs_validate(rec, index, offsets));
349 
350 	trx_id_t	max_trx_id = trx_sys_get_max_trx_id();
351 	bool		is_ok = trx_id < max_trx_id;
352 
353 	if (!is_ok) {
354 		lock_report_trx_id_insanity(
355 			trx_id, rec, index, offsets, max_trx_id);
356 	}
357 
358 	return(is_ok);
359 }
360 
361 /*********************************************************************//**
362 Checks that a record is seen in a consistent read.
363 @return true if sees, or false if an earlier version of the record
364 should be retrieved */
365 bool
lock_clust_rec_cons_read_sees(const rec_t * rec,dict_index_t * index,const ulint * offsets,ReadView * view)366 lock_clust_rec_cons_read_sees(
367 /*==========================*/
368 	const rec_t*	rec,	/*!< in: user record which should be read or
369 				passed over by a read cursor */
370 	dict_index_t*	index,	/*!< in: clustered index */
371 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
372 	ReadView*	view)	/*!< in: consistent read view */
373 {
374 	ut_ad(dict_index_is_clust(index));
375 	ut_ad(page_rec_is_user_rec(rec));
376 	ut_ad(rec_offs_validate(rec, index, offsets));
377 
378 	/* Temp-tables are not shared across connections and multiple
379 	transactions from different connections cannot simultaneously
380 	operate on same temp-table and so read of temp-table is
381 	always consistent read. */
382 	if (srv_read_only_mode || dict_table_is_temporary(index->table)) {
383 		ut_ad(view == 0 || dict_table_is_temporary(index->table));
384 		return(true);
385 	}
386 
387 	/* NOTE that we call this function while holding the search
388 	system latch. */
389 
390 	trx_id_t	trx_id = row_get_rec_trx_id(rec, index, offsets);
391 
392 	return(view->changes_visible(trx_id, index->table->name));
393 }
394 
395 /*********************************************************************//**
396 Checks that a non-clustered index record is seen in a consistent read.
397 
398 NOTE that a non-clustered index page contains so little information on
399 its modifications that also in the case false, the present version of
400 rec may be the right, but we must check this from the clustered index
401 record.
402 
403 @return true if certainly sees, or false if an earlier version of the
404 clustered index record might be needed */
405 bool
lock_sec_rec_cons_read_sees(const rec_t * rec,const dict_index_t * index,const ReadView * view)406 lock_sec_rec_cons_read_sees(
407 /*========================*/
408 	const rec_t*		rec,	/*!< in: user record which
409 					should be read or passed over
410 					by a read cursor */
411 	const dict_index_t*	index,	/*!< in: index */
412 	const ReadView*	view)	/*!< in: consistent read view */
413 {
414 	ut_ad(page_rec_is_user_rec(rec));
415 
416 	/* NOTE that we might call this function while holding the search
417 	system latch. */
418 
419 	if (recv_recovery_is_on()) {
420 
421 		return(false);
422 
423 	} else if (dict_table_is_temporary(index->table)) {
424 
425 		/* Temp-tables are not shared across connections and multiple
426 		transactions from different connections cannot simultaneously
427 		operate on same temp-table and so read of temp-table is
428 		always consistent read. */
429 
430 		return(true);
431 	}
432 
433 	trx_id_t	max_trx_id = page_get_max_trx_id(page_align(rec));
434 
435 	ut_ad(max_trx_id > 0);
436 
437 	return(view->sees(max_trx_id));
438 }
439 
440 /*********************************************************************//**
441 Creates the lock system at database start. */
442 void
lock_sys_create(ulint n_cells)443 lock_sys_create(
444 /*============*/
445 	ulint	n_cells)	/*!< in: number of slots in lock hash table */
446 {
447 	ulint	lock_sys_sz;
448 
449 	lock_sys_sz = sizeof(*lock_sys) + OS_THREAD_MAX_N * sizeof(srv_slot_t);
450 
451 	lock_sys = static_cast<lock_sys_t*>(ut_zalloc_nokey(lock_sys_sz));
452 
453 	void*	ptr = &lock_sys[1];
454 
455 	lock_sys->waiting_threads = static_cast<srv_slot_t*>(ptr);
456 
457 	lock_sys->last_slot = lock_sys->waiting_threads;
458 
459 	mutex_create(LATCH_ID_LOCK_SYS, &lock_sys->mutex);
460 
461 	mutex_create(LATCH_ID_LOCK_SYS_WAIT, &lock_sys->wait_mutex);
462 
463 	lock_sys->timeout_event = os_event_create(0);
464 
465 	lock_sys->rec_hash = hash_create(n_cells);
466 	lock_sys->prdt_hash = hash_create(n_cells);
467 	lock_sys->prdt_page_hash = hash_create(n_cells);
468 
469 	if (!srv_read_only_mode) {
470 		lock_latest_err_file = os_file_create_tmpfile(NULL);
471 		ut_a(lock_latest_err_file);
472 	}
473 }
474 
475 /** Calculates the fold value of a lock: used in migrating the hash table.
476 @param[in]	lock	record lock object
477 @return	folded value */
478 static
479 ulint
lock_rec_lock_fold(const lock_t * lock)480 lock_rec_lock_fold(
481 	const lock_t*	lock)
482 {
483 	return(lock_rec_fold(lock->un_member.rec_lock.space,
484 			     lock->un_member.rec_lock.page_no));
485 }
486 
487 /** Resize the lock hash tables.
488 @param[in]	n_cells	number of slots in lock hash table */
489 void
lock_sys_resize(ulint n_cells)490 lock_sys_resize(
491 	ulint	n_cells)
492 {
493 	hash_table_t*	old_hash;
494 
495 	lock_mutex_enter();
496 
497 	old_hash = lock_sys->rec_hash;
498 	lock_sys->rec_hash = hash_create(n_cells);
499 	HASH_MIGRATE(old_hash, lock_sys->rec_hash, lock_t, hash,
500 		     lock_rec_lock_fold);
501 	hash_table_free(old_hash);
502 
503 	old_hash = lock_sys->prdt_hash;
504 	lock_sys->prdt_hash = hash_create(n_cells);
505 	HASH_MIGRATE(old_hash, lock_sys->prdt_hash, lock_t, hash,
506 		     lock_rec_lock_fold);
507 	hash_table_free(old_hash);
508 
509 	old_hash = lock_sys->prdt_page_hash;
510 	lock_sys->prdt_page_hash = hash_create(n_cells);
511 	HASH_MIGRATE(old_hash, lock_sys->prdt_page_hash, lock_t, hash,
512 		     lock_rec_lock_fold);
513 	hash_table_free(old_hash);
514 
515 	/* need to update block->lock_hash_val */
516 	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
517 		buf_pool_t*	buf_pool = buf_pool_from_array(i);
518 
519 		buf_pool_mutex_enter(buf_pool);
520 		buf_page_t*	bpage;
521 		bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
522 
523 		while (bpage != NULL) {
524 			if (buf_page_get_state(bpage)
525 			    == BUF_BLOCK_FILE_PAGE) {
526 				buf_block_t*	block;
527 				block = reinterpret_cast<buf_block_t*>(
528 					bpage);
529 
530 				block->lock_hash_val
531 					= lock_rec_hash(
532 						bpage->id.space(),
533 						bpage->id.page_no());
534 			}
535 			bpage = UT_LIST_GET_NEXT(LRU, bpage);
536 		}
537 		buf_pool_mutex_exit(buf_pool);
538 	}
539 
540 	lock_mutex_exit();
541 }
542 
543 /*********************************************************************//**
544 Closes the lock system at database shutdown. */
545 void
lock_sys_close(void)546 lock_sys_close(void)
547 /*================*/
548 {
549 	if (lock_latest_err_file != NULL) {
550 		fclose(lock_latest_err_file);
551 		lock_latest_err_file = NULL;
552 	}
553 
554 	hash_table_free(lock_sys->rec_hash);
555 	hash_table_free(lock_sys->prdt_hash);
556 	hash_table_free(lock_sys->prdt_page_hash);
557 
558 	os_event_destroy(lock_sys->timeout_event);
559 
560 	mutex_destroy(&lock_sys->mutex);
561 	mutex_destroy(&lock_sys->wait_mutex);
562 
563 	srv_slot_t*	slot = lock_sys->waiting_threads;
564 
565 	for (ulint i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
566 		if (slot->event != NULL) {
567 			os_event_destroy(slot->event);
568 		}
569 	}
570 
571 	ut_free(lock_sys);
572 
573 	lock_sys = NULL;
574 }
575 
576 /*********************************************************************//**
577 Gets the size of a lock struct.
578 @return size in bytes */
579 ulint
lock_get_size(void)580 lock_get_size(void)
581 /*===============*/
582 {
583 	return((ulint) sizeof(lock_t));
584 }
585 
586 /*********************************************************************//**
587 Gets the source table of an ALTER TABLE transaction.  The table must be
588 covered by an IX or IS table lock.
589 @return the source table of transaction, if it is covered by an IX or
590 IS table lock; dest if there is no source table, and NULL if the
591 transaction is locking more than two tables or an inconsistency is
592 found */
593 dict_table_t*
lock_get_src_table(trx_t * trx,dict_table_t * dest,lock_mode * mode)594 lock_get_src_table(
595 /*===============*/
596 	trx_t*		trx,	/*!< in: transaction */
597 	dict_table_t*	dest,	/*!< in: destination of ALTER TABLE */
598 	lock_mode*	mode)	/*!< out: lock mode of the source table */
599 {
600 	dict_table_t*	src;
601 	lock_t*		lock;
602 
603 	ut_ad(!lock_mutex_own());
604 
605 	src = NULL;
606 	*mode = LOCK_NONE;
607 
608 	/* The trx mutex protects the trx_locks for our purposes.
609 	Other transactions could want to convert one of our implicit
610 	record locks to an explicit one. For that, they would need our
611 	trx mutex. Waiting locks can be removed while only holding
612 	lock_sys->mutex, but this is a running transaction and cannot
613 	thus be holding any waiting locks. */
614 	trx_mutex_enter(trx);
615 
616 	for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
617 	     lock != NULL;
618 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
619 		lock_table_t*	tab_lock;
620 		lock_mode	lock_mode;
621 		if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
622 			/* We are only interested in table locks. */
623 			continue;
624 		}
625 		tab_lock = &lock->un_member.tab_lock;
626 		if (dest == tab_lock->table) {
627 			/* We are not interested in the destination table. */
628 			continue;
629 		} else if (!src) {
630 			/* This presumably is the source table. */
631 			src = tab_lock->table;
632 			if (UT_LIST_GET_LEN(src->locks) != 1
633 			    || UT_LIST_GET_FIRST(src->locks) != lock) {
634 				/* We only support the case when
635 				there is only one lock on this table. */
636 				src = NULL;
637 				goto func_exit;
638 			}
639 		} else if (src != tab_lock->table) {
640 			/* The transaction is locking more than
641 			two tables (src and dest): abort */
642 			src = NULL;
643 			goto func_exit;
644 		}
645 
646 		/* Check that the source table is locked by
647 		LOCK_IX or LOCK_IS. */
648 		lock_mode = lock_get_mode(lock);
649 		if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) {
650 			if (*mode != LOCK_NONE && *mode != lock_mode) {
651 				/* There are multiple locks on src. */
652 				src = NULL;
653 				goto func_exit;
654 			}
655 			*mode = lock_mode;
656 		}
657 	}
658 
659 	if (!src) {
660 		/* No source table lock found: flag the situation to caller */
661 		src = dest;
662 	}
663 
664 func_exit:
665 	trx_mutex_exit(trx);
666 	return(src);
667 }
668 
669 /*********************************************************************//**
670 Determine if the given table is exclusively "owned" by the given
671 transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
672 on the table.
673 @return TRUE if table is only locked by trx, with LOCK_IX, and
674 possibly LOCK_AUTO_INC */
675 ibool
lock_is_table_exclusive(const dict_table_t * table,const trx_t * trx)676 lock_is_table_exclusive(
677 /*====================*/
678 	const dict_table_t*	table,	/*!< in: table */
679 	const trx_t*		trx)	/*!< in: transaction */
680 {
681 	const lock_t*	lock;
682 	ibool		ok	= FALSE;
683 
684 	ut_ad(table);
685 	ut_ad(trx);
686 
687 	lock_mutex_enter();
688 
689 	for (lock = UT_LIST_GET_FIRST(table->locks);
690 	     lock != NULL;
691 	     lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) {
692 		if (lock->trx != trx) {
693 			/* A lock on the table is held
694 			by some other transaction. */
695 			goto not_ok;
696 		}
697 
698 		if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
699 			/* We are interested in table locks only. */
700 			continue;
701 		}
702 
703 		switch (lock_get_mode(lock)) {
704 		case LOCK_IX:
705 			ok = TRUE;
706 			break;
707 		case LOCK_AUTO_INC:
708 			/* It is allowed for trx to hold an
709 			auto_increment lock. */
710 			break;
711 		default:
712 not_ok:
713 			/* Other table locks than LOCK_IX are not allowed. */
714 			ok = FALSE;
715 			goto func_exit;
716 		}
717 	}
718 
719 func_exit:
720 	lock_mutex_exit();
721 
722 	return(ok);
723 }
724 
725 /*********************************************************************//**
726 Sets the wait flag of a lock and the back pointer in trx to lock. */
727 UNIV_INLINE
728 void
lock_set_lock_and_trx_wait(lock_t * lock,trx_t * trx)729 lock_set_lock_and_trx_wait(
730 /*=======================*/
731 	lock_t*	lock,	/*!< in: lock */
732 	trx_t*	trx)	/*!< in/out: trx */
733 {
734 	ut_ad(lock);
735 	ut_ad(lock->trx == trx);
736 	ut_ad(trx->lock.wait_lock == NULL);
737 	ut_ad(lock_mutex_own());
738 	ut_ad(trx_mutex_own(trx));
739 
740 	trx->lock.wait_lock = lock;
741 	lock->type_mode |= LOCK_WAIT;
742 }
743 
744 /**********************************************************************//**
745 The back pointer to a waiting lock request in the transaction is set to NULL
746 and the wait bit in lock type_mode is reset. */
747 UNIV_INLINE
748 void
lock_reset_lock_and_trx_wait(lock_t * lock)749 lock_reset_lock_and_trx_wait(
750 /*=========================*/
751 	lock_t*	lock)	/*!< in/out: record lock */
752 {
753 	ut_ad(lock->trx->lock.wait_lock == lock);
754 	ut_ad(lock_get_wait(lock));
755 	ut_ad(lock_mutex_own());
756 
757 	lock->trx->lock.wait_lock = NULL;
758 	lock->type_mode &= ~LOCK_WAIT;
759 }
760 
761 /*********************************************************************//**
762 Gets the gap flag of a record lock.
763 @return LOCK_GAP or 0 */
764 UNIV_INLINE
765 ulint
lock_rec_get_gap(const lock_t * lock)766 lock_rec_get_gap(
767 /*=============*/
768 	const lock_t*	lock)	/*!< in: record lock */
769 {
770 	ut_ad(lock);
771 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
772 
773 	return(lock->type_mode & LOCK_GAP);
774 }
775 
776 /*********************************************************************//**
777 Gets the LOCK_REC_NOT_GAP flag of a record lock.
778 @return LOCK_REC_NOT_GAP or 0 */
779 UNIV_INLINE
780 ulint
lock_rec_get_rec_not_gap(const lock_t * lock)781 lock_rec_get_rec_not_gap(
782 /*=====================*/
783 	const lock_t*	lock)	/*!< in: record lock */
784 {
785 	ut_ad(lock);
786 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
787 
788 	return(lock->type_mode & LOCK_REC_NOT_GAP);
789 }
790 
791 /*********************************************************************//**
792 Gets the waiting insert flag of a record lock.
793 @return LOCK_INSERT_INTENTION or 0 */
794 UNIV_INLINE
795 ulint
lock_rec_get_insert_intention(const lock_t * lock)796 lock_rec_get_insert_intention(
797 /*==========================*/
798 	const lock_t*	lock)	/*!< in: record lock */
799 {
800 	ut_ad(lock);
801 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
802 
803 	return(lock->type_mode & LOCK_INSERT_INTENTION);
804 }
805 
806 /*********************************************************************//**
807 Checks if a lock request for a new lock has to wait for request lock2.
808 @return TRUE if new lock has to wait for lock2 to be removed */
809 UNIV_INLINE
810 ibool
lock_rec_has_to_wait(ibool for_locking,const trx_t * trx,ulint type_mode,const lock_t * lock2,bool lock_is_on_supremum)811 lock_rec_has_to_wait(
812 /*=================*/
813 #ifdef WITH_WSREP
814 	ibool		for_locking, /*!< is caller locking or releasing */
815 #endif /* WITH_WSREP */
816 	const trx_t*	trx,	/*!< in: trx of new lock */
817 	ulint		type_mode,/*!< in: precise mode of the new lock
818 				to set: LOCK_S or LOCK_X, possibly
819 				ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
820 				LOCK_INSERT_INTENTION */
821 	const lock_t*	lock2,	/*!< in: another record lock; NOTE that
822 				it is assumed that this has a lock bit
823 				set on the same record as in the new
824 				lock we are setting */
825 	bool		lock_is_on_supremum)
826 				/*!< in: TRUE if we are setting the
827 				lock on the 'supremum' record of an
828 				index page: we know then that the lock
829 				request is really for a 'gap' type lock */
830 {
831 	ut_ad(trx && lock2);
832 	ut_ad(lock_get_type_low(lock2) == LOCK_REC);
833 
834 	if (trx != lock2->trx
835 	    && !lock_mode_compatible(static_cast<lock_mode>(
836 			             LOCK_MODE_MASK & type_mode),
837 				     lock_get_mode(lock2))) {
838 
839 		/* We have somewhat complex rules when gap type record locks
840 		cause waits */
841 
842 		if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
843 		    && !(type_mode & LOCK_INSERT_INTENTION)) {
844 
845 			/* Gap type locks without LOCK_INSERT_INTENTION flag
846 			do not need to wait for anything. This is because
847 			different users can have conflicting lock types
848 			on gaps. */
849 
850 			return(FALSE);
851 		}
852 
853 		if (!(type_mode & LOCK_INSERT_INTENTION)
854 		    && lock_rec_get_gap(lock2)) {
855 
856 			/* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
857 			does not need to wait for a gap type lock */
858 
859 			return(FALSE);
860 		}
861 
862 		if ((type_mode & LOCK_GAP)
863 		    && lock_rec_get_rec_not_gap(lock2)) {
864 
865 			/* Lock on gap does not need to wait for
866 			a LOCK_REC_NOT_GAP type lock */
867 
868 			return(FALSE);
869 		}
870 
871 		if (lock_rec_get_insert_intention(lock2)) {
872 
873 			/* No lock request needs to wait for an insert
874 			intention lock to be removed. This is ok since our
875 			rules allow conflicting locks on gaps. This eliminates
876 			a spurious deadlock caused by a next-key lock waiting
877 			for an insert intention lock; when the insert
878 			intention lock was granted, the insert deadlocked on
879 			the waiting next-key lock.
880 
881 			Also, insert intention locks do not disturb each
882 			other. */
883 
884 			return(FALSE);
885 		}
886 #ifdef WITH_WSREP
887 		/* if BF thread is locking and has conflict with another BF
888 		   thread, we need to look at trx ordering and lock types */
889 		if (wsrep_thd_is_BF(trx->mysql_thd, FALSE)         &&
890 		    wsrep_thd_is_BF(lock2->trx->mysql_thd, TRUE)) {
891 
892 			if (wsrep_debug) {
893 				fprintf(stderr,
894 					"BF-BF lock conflict, locking: %lu\n",
895 					for_locking);
896 				lock_rec_print(stderr, lock2);
897 			}
898 
899 			if (wsrep_trx_order_before(trx->mysql_thd,
900 						   lock2->trx->mysql_thd) &&
901 			    (type_mode & LOCK_MODE_MASK) == LOCK_X        &&
902 			    (lock2->type_mode & LOCK_MODE_MASK) == LOCK_X)
903 			{
904 				if (for_locking || wsrep_debug) {
905 					/* exclusive lock conflicts are not
906 					   accepted */
907                                   ib::info() <<	"BF-BF X lock conflict," <<
908                                     "mode: " << type_mode << " supremum: " <<
909                                     lock_is_on_supremum;
910                                   ib::info() << "conflicts states: my: " <<
911                                     wsrep_thd_conflict_state(trx->mysql_thd, FALSE) <<
912                                     " locked: " <<
913                                     wsrep_thd_conflict_state(lock2->trx->mysql_thd, FALSE);
914                                   lock_rec_print(stderr, lock2);
915                                   if (for_locking) return FALSE;
916                                   //abort();
917 				}
918 			} else {
919 				/* if lock2->index->n_uniq <=
920 				   lock2->index->n_user_defined_cols
921 				   operation is on uniq index
922 				*/
923                           if (wsrep_debug) ib::info() <<
924                                              "BF conflict, modes: " <<
925                                              type_mode << " and " << lock2->type_mode <<
926                                              "idx: " <<
927                                              lock2->index->name <<
928                                              " - " <<
929                                              lock2->index->table_name <<
930                                              "n_uniq " <<
931                                              lock2->index->n_uniq <<
932                                              " n_user " <<
933                                              lock2->index->n_user_defined_cols;
934                           return FALSE;
935 			}
936 		}
937 #endif /* WITH_WSREP */
938 
939 		return(TRUE);
940 	}
941 
942 	return(FALSE);
943 }
944 
945 /*********************************************************************//**
946 Checks if a lock request lock1 has to wait for request lock2.
947 @return TRUE if lock1 has to wait for lock2 to be removed */
948 ibool
lock_has_to_wait(const lock_t * lock1,const lock_t * lock2)949 lock_has_to_wait(
950 /*=============*/
951 	const lock_t*	lock1,	/*!< in: waiting lock */
952 	const lock_t*	lock2)	/*!< in: another lock; NOTE that it is
953 				assumed that this has a lock bit set
954 				on the same record as in lock1 if the
955 				locks are record locks */
956 {
957 	ut_ad(lock1 && lock2);
958 
959 	if (lock1->trx != lock2->trx
960 	    && !lock_mode_compatible(lock_get_mode(lock1),
961 				     lock_get_mode(lock2))) {
962 		if (lock_get_type_low(lock1) == LOCK_REC) {
963 			ut_ad(lock_get_type_low(lock2) == LOCK_REC);
964 
965 			/* If this lock request is for a supremum record
966 			then the second bit on the lock bitmap is set */
967 
968 			if (lock1->type_mode
969 			    & (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
970 				return(lock_prdt_has_to_wait(
971 					lock1->trx, lock1->type_mode,
972 					lock_get_prdt_from_lock(lock1),
973 					lock2));
974 			} else {
975 #ifdef WITH_WSREP
976 				return(lock_rec_has_to_wait(FALSE,
977 #else
978 				return(lock_rec_has_to_wait(
979 #endif /* WITH_WSREP */
980 					lock1->trx, lock1->type_mode, lock2,
981 					lock_rec_get_nth_bit(lock1, true)));
982 			}
983 		}
984 
985 		return(TRUE);
986 	}
987 
988 	return(FALSE);
989 }
990 
991 /*============== RECORD LOCK BASIC FUNCTIONS ============================*/
992 
993 /**********************************************************************//**
994 Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
995 if none found.
996 @return bit index == heap number of the record, or ULINT_UNDEFINED if
997 none found */
998 ulint
lock_rec_find_set_bit(const lock_t * lock)999 lock_rec_find_set_bit(
1000 /*==================*/
1001 	const lock_t*	lock)	/*!< in: record lock with at least one bit set */
1002 {
1003 	for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
1004 
1005 		if (lock_rec_get_nth_bit(lock, i)) {
1006 
1007 			return(i);
1008 		}
1009 	}
1010 
1011 	return(ULINT_UNDEFINED);
1012 }
1013 
1014 /** Reset the nth bit of a record lock.
1015 @param[in,out] lock record lock
1016 @param[in] i index of the bit that will be reset
1017 @return previous value of the bit */
1018 UNIV_INLINE
1019 byte
lock_rec_reset_nth_bit(lock_t * lock,ulint i)1020 lock_rec_reset_nth_bit(
1021 	lock_t*	lock,
1022 	ulint	i)
1023 {
1024 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
1025 	ut_ad(i < lock->un_member.rec_lock.n_bits);
1026 
1027 	byte*	b = reinterpret_cast<byte*>(&lock[1]) + (i >> 3);
1028 	byte	mask = 1 << (i & 7);
1029 	byte	bit = *b & mask;
1030 	*b &= ~mask;
1031 
1032 	if (bit != 0) {
1033 		ut_ad(lock->trx->lock.n_rec_locks > 0);
1034 		--lock->trx->lock.n_rec_locks;
1035 	}
1036 
1037 	return(bit);
1038 }
1039 
1040 /** Reset the nth bit of a record lock.
1041 @param[in,out]	lock record lock
1042 @param[in] i	index of the bit that will be reset
1043 @param[in] type	whether the lock is in wait mode */
1044 void
lock_rec_trx_wait(lock_t * lock,ulint i,ulint type)1045 lock_rec_trx_wait(
1046 	lock_t*	lock,
1047 	ulint	i,
1048 	ulint	type)
1049 {
1050 	lock_rec_reset_nth_bit(lock, i);
1051 
1052 	if (type & LOCK_WAIT) {
1053 		lock_reset_lock_and_trx_wait(lock);
1054 	}
1055 }
1056 
1057 /*********************************************************************//**
1058 Determines if there are explicit record locks on a page.
1059 @return an explicit record lock on the page, or NULL if there are none */
1060 lock_t*
lock_rec_expl_exist_on_page(ulint space,ulint page_no)1061 lock_rec_expl_exist_on_page(
1062 /*========================*/
1063 	ulint	space,	/*!< in: space id */
1064 	ulint	page_no)/*!< in: page number */
1065 {
1066 	lock_t*	lock;
1067 
1068 	lock_mutex_enter();
1069 	/* Only used in ibuf pages, so rec_hash is good enough */
1070 	lock = lock_rec_get_first_on_page_addr(lock_sys->rec_hash,
1071 					       space, page_no);
1072 	lock_mutex_exit();
1073 
1074 	return(lock);
1075 }
1076 
1077 /*********************************************************************//**
1078 Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
1079 pointer in the transaction! This function is used in lock object creation
1080 and resetting. */
1081 static
1082 void
lock_rec_bitmap_reset(lock_t * lock)1083 lock_rec_bitmap_reset(
1084 /*==================*/
1085 	lock_t*	lock)	/*!< in: record lock */
1086 {
1087 	ulint	n_bytes;
1088 
1089 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
1090 
1091 	/* Reset to zero the bitmap which resides immediately after the lock
1092 	struct */
1093 
1094 	n_bytes = lock_rec_get_n_bits(lock) / 8;
1095 
1096 	ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
1097 
1098 	memset(&lock[1], 0, n_bytes);
1099 }
1100 
1101 /*********************************************************************//**
1102 Copies a record lock to heap.
1103 @return copy of lock */
1104 static
1105 lock_t*
lock_rec_copy(const lock_t * lock,mem_heap_t * heap)1106 lock_rec_copy(
1107 /*==========*/
1108 	const lock_t*	lock,	/*!< in: record lock */
1109 	mem_heap_t*	heap)	/*!< in: memory heap */
1110 {
1111 	ulint	size;
1112 
1113 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
1114 
1115 	size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
1116 
1117 	return(static_cast<lock_t*>(mem_heap_dup(heap, lock, size)));
1118 }
1119 
1120 /*********************************************************************//**
1121 Gets the previous record lock set on a record.
1122 @return previous lock on the same record, NULL if none exists */
1123 const lock_t*
lock_rec_get_prev(const lock_t * in_lock,ulint heap_no)1124 lock_rec_get_prev(
1125 /*==============*/
1126 	const lock_t*	in_lock,/*!< in: record lock */
1127 	ulint		heap_no)/*!< in: heap number of the record */
1128 {
1129 	lock_t*		lock;
1130 	ulint		space;
1131 	ulint		page_no;
1132 	lock_t*		found_lock	= NULL;
1133 	hash_table_t*	hash;
1134 
1135 	ut_ad(lock_mutex_own());
1136 	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
1137 
1138 	space = in_lock->un_member.rec_lock.space;
1139 	page_no = in_lock->un_member.rec_lock.page_no;
1140 
1141 	hash = lock_hash_get(in_lock->type_mode);
1142 
1143 	for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
1144 	     /* No op */;
1145 	     lock = lock_rec_get_next_on_page(lock)) {
1146 
1147 		ut_ad(lock);
1148 
1149 		if (lock == in_lock) {
1150 
1151 			return(found_lock);
1152 		}
1153 
1154 		if (lock_rec_get_nth_bit(lock, heap_no)) {
1155 
1156 			found_lock = lock;
1157 		}
1158 	}
1159 }
1160 
1161 /*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
1162 
1163 /*********************************************************************//**
1164 Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
1165 to precise_mode.
1166 @return lock or NULL */
1167 UNIV_INLINE
1168 lock_t*
lock_rec_has_expl(ulint precise_mode,const buf_block_t * block,ulint heap_no,const trx_t * trx)1169 lock_rec_has_expl(
1170 /*==============*/
1171 	ulint			precise_mode,/*!< in: LOCK_S or LOCK_X
1172 					possibly ORed to LOCK_GAP or
1173 					LOCK_REC_NOT_GAP, for a
1174 					supremum record we regard this
1175 					always a gap type request */
1176 	const buf_block_t*	block,	/*!< in: buffer block containing
1177 					the record */
1178 	ulint			heap_no,/*!< in: heap number of the record */
1179 	const trx_t*		trx)	/*!< in: transaction */
1180 {
1181 	lock_t*	lock;
1182 
1183 	ut_ad(lock_mutex_own());
1184 	ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
1185 	      || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
1186 	ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
1187 
1188 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
1189 	     lock != NULL;
1190 	     lock = lock_rec_get_next(heap_no, lock)) {
1191 
1192 		if (lock->trx == trx
1193 		    && !lock_rec_get_insert_intention(lock)
1194 		    && lock_mode_stronger_or_eq(
1195 			    lock_get_mode(lock),
1196 			    static_cast<lock_mode>(
1197 				    precise_mode & LOCK_MODE_MASK))
1198 		    && !lock_get_wait(lock)
1199 		    && (!lock_rec_get_rec_not_gap(lock)
1200 			|| (precise_mode & LOCK_REC_NOT_GAP)
1201 			|| heap_no == PAGE_HEAP_NO_SUPREMUM)
1202 		    && (!lock_rec_get_gap(lock)
1203 			|| (precise_mode & LOCK_GAP)
1204 			|| heap_no == PAGE_HEAP_NO_SUPREMUM)) {
1205 
1206 			return(lock);
1207 		}
1208 	}
1209 
1210 	return(NULL);
1211 }
1212 
1213 #ifdef UNIV_DEBUG
1214 /*********************************************************************//**
1215 Checks if some other transaction has a lock request in the queue.
1216 @return lock or NULL */
1217 static
1218 const lock_t*
lock_rec_other_has_expl_req(lock_mode mode,const buf_block_t * block,bool wait,ulint heap_no,const trx_t * trx)1219 lock_rec_other_has_expl_req(
1220 /*========================*/
1221 	lock_mode		mode,	/*!< in: LOCK_S or LOCK_X */
1222 	const buf_block_t*	block,	/*!< in: buffer block containing
1223 					the record */
1224 	bool			wait,	/*!< in: whether also waiting locks
1225 					are taken into account */
1226 	ulint			heap_no,/*!< in: heap number of the record */
1227 	const trx_t*		trx)	/*!< in: transaction, or NULL if
1228 					requests by all transactions
1229 					are taken into account */
1230 {
1231 
1232 	ut_ad(lock_mutex_own());
1233 	ut_ad(mode == LOCK_X || mode == LOCK_S);
1234 
1235 	/* Only GAP lock can be on SUPREMUM, and we are not looking for
1236 	GAP lock */
1237 	if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
1238 		return(NULL);
1239 	}
1240 
1241 	for (const lock_t* lock = lock_rec_get_first(lock_sys->rec_hash,
1242 						     block, heap_no);
1243 	     lock != NULL;
1244 	     lock = lock_rec_get_next_const(heap_no, lock)) {
1245 
1246 		if (lock->trx != trx
1247 		    && !lock_rec_get_gap(lock)
1248 		    && (wait || !lock_get_wait(lock))
1249 		    && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
1250 
1251 			return(lock);
1252 		}
1253 	}
1254 
1255 	return(NULL);
1256 }
1257 #endif /* UNIV_DEBUG */
1258 
1259 #ifdef WITH_WSREP
1260 static void
wsrep_kill_victim(const trx_t * const trx,const lock_t * lock)1261 wsrep_kill_victim(const trx_t * const trx, const lock_t *lock) {
1262         ut_ad(lock_mutex_own());
1263         ut_ad(trx_mutex_own(lock->trx));
1264 
1265 	/* quit for native mysql */
1266 	if (!wsrep_on(trx->mysql_thd)) return;
1267 
1268 	my_bool bf_this  = wsrep_thd_is_BF(trx->mysql_thd, FALSE);
1269 	my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE);
1270 
1271 	if ((bf_this && !bf_other) ||
1272 		(bf_this && bf_other && wsrep_trx_order_before(
1273 			trx->mysql_thd, lock->trx->mysql_thd))) {
1274 
1275 		if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1276 			if (wsrep_debug)
1277 				fprintf(stderr, "WSREP: BF victim waiting\n");
1278 			/* cannot release lock, until our lock
1279 			is in the queue*/
1280 		} else if (lock->trx != trx) {
1281 			if (wsrep_log_conflicts) {
1282 				if (bf_this)
1283 					fputs("\n*** Priority TRANSACTION:\n",
1284 					      stderr);
1285 				else
1286 					fputs("\n*** Victim TRANSACTION:\n",
1287 					      stderr);
1288 				wsrep_trx_print_locking(stderr, trx, 3000);
1289 
1290 				if (bf_other)
1291 					fputs("\n*** Priority TRANSACTION:\n",
1292 					      stderr);
1293 				else
1294 					fputs("\n*** Victim TRANSACTION:\n",
1295 					      stderr);
1296 				wsrep_trx_print_locking(stderr, lock->trx, 3000);
1297 
1298 				fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n",
1299 				      stderr);
1300 
1301 				if (lock_get_type(lock) == LOCK_REC) {
1302 					lock_rec_print(stderr, lock);
1303 				} else {
1304 					lock_table_print(stderr, lock);
1305 				}
1306 			}
1307 			wsrep_innobase_kill_one_trx(trx->mysql_thd,
1308 				(const trx_t*) trx, lock->trx, TRUE);
1309 		}
1310 	}
1311 }
1312 #endif
1313 /*********************************************************************//**
1314 Checks if some other transaction has a conflicting explicit lock request
1315 in the queue, so that we have to wait.
1316 @return lock or NULL */
1317 static
1318 const lock_t*
lock_rec_other_has_conflicting(ulint mode,const buf_block_t * block,ulint heap_no,const trx_t * trx)1319 lock_rec_other_has_conflicting(
1320 /*===========================*/
1321 	ulint			mode,	/*!< in: LOCK_S or LOCK_X,
1322 					possibly ORed to LOCK_GAP or
1323 					LOC_REC_NOT_GAP,
1324 					LOCK_INSERT_INTENTION */
1325 	const buf_block_t*	block,	/*!< in: buffer block containing
1326 					the record */
1327 	ulint			heap_no,/*!< in: heap number of the record */
1328 	const trx_t*		trx)	/*!< in: our transaction */
1329 {
1330 	const lock_t*		lock;
1331 
1332 	ut_ad(lock_mutex_own());
1333 
1334 	bool	is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
1335 
1336 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
1337 	     lock != NULL;
1338 	     lock = lock_rec_get_next_const(heap_no, lock)) {
1339 
1340 #ifdef WITH_WSREP
1341 		if (lock_rec_has_to_wait(TRUE, trx, mode, lock, is_supremum)) {
1342 			if (wsrep_on(trx->mysql_thd)) {
1343 				trx_mutex_enter(lock->trx);
1344 				wsrep_kill_victim(trx, lock);
1345 				trx_mutex_exit(lock->trx);
1346                         }
1347 #else
1348 		if (lock_rec_has_to_wait(trx, mode, lock, is_supremum)) {
1349 #endif /* WITH_WSREP */
1350 			return(lock);
1351 		}
1352 	}
1353 
1354 	return(NULL);
1355 }
1356 
1357 /*********************************************************************//**
1358 Checks if some transaction has an implicit x-lock on a record in a secondary
1359 index.
1360 @return transaction id of the transaction which has the x-lock, or 0;
1361 NOTE that this function can return false positives but never false
1362 negatives. The caller must confirm all positive results by calling
1363 trx_is_active(). */
1364 static
1365 trx_t*
1366 lock_sec_rec_some_has_impl(
1367 /*=======================*/
1368 	const rec_t*	rec,	/*!< in: user record */
1369 	dict_index_t*	index,	/*!< in: secondary index */
1370 	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
1371 {
1372 	trx_t*		trx;
1373 	trx_id_t	max_trx_id;
1374 	const page_t*	page = page_align(rec);
1375 
1376 	ut_ad(!lock_mutex_own());
1377 	ut_ad(!trx_sys_mutex_own());
1378 	ut_ad(!dict_index_is_clust(index));
1379 	ut_ad(page_rec_is_user_rec(rec));
1380 	ut_ad(rec_offs_validate(rec, index, offsets));
1381 
1382 	max_trx_id = page_get_max_trx_id(page);
1383 
1384 	/* Some transaction may have an implicit x-lock on the record only
1385 	if the max trx id for the page >= min trx id for the trx list, or
1386 	database recovery is running. We do not write the changes of a page
1387 	max trx id to the log, and therefore during recovery, this value
1388 	for a page may be incorrect. */
1389 
1390 	if (max_trx_id < trx_rw_min_trx_id() && !recv_recovery_is_on()) {
1391 
1392 		trx = 0;
1393 
1394 	} else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) {
1395 
1396 		/* The page is corrupt: try to avoid a crash by returning 0 */
1397 		trx = 0;
1398 
1399 	/* In this case it is possible that some transaction has an implicit
1400 	x-lock. We have to look in the clustered index. */
1401 
1402 	} else {
1403 		trx = row_vers_impl_x_locked(rec, index, offsets);
1404 	}
1405 
1406 	return(trx);
1407 }
1408 
1409 #ifdef UNIV_DEBUG
1410 /*********************************************************************//**
1411 Checks if some transaction, other than given trx_id, has an explicit
1412 lock on the given rec, in the given precise_mode.
1413 @return	the transaction, whose id is not equal to trx_id, that has an
1414 explicit lock on the given rec, in the given precise_mode or NULL.*/
1415 static
1416 trx_t*
1417 lock_rec_other_trx_holds_expl(
1418 /*==========================*/
1419 	ulint			precise_mode,	/*!< in: LOCK_S or LOCK_X
1420 						possibly ORed to LOCK_GAP or
1421 						LOCK_REC_NOT_GAP. */
1422 	trx_t*			trx,		/*!< in: trx holding implicit
1423 						lock on rec */
1424 	const rec_t*		rec,		/*!< in: user record */
1425 	const buf_block_t*	block)		/*!< in: buffer block
1426 						containing the record */
1427 {
1428 	trx_t* holds = NULL;
1429 
1430 	lock_mutex_enter();
1431 
1432 	if (trx_t* impl_trx = trx_rw_is_active(trx->id, NULL, false)) {
1433 		ulint heap_no = page_rec_get_heap_no(rec);
1434 		mutex_enter(&trx_sys->mutex);
1435 
1436 		for (trx_t* t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
1437 		     t != NULL;
1438 		     t = UT_LIST_GET_NEXT(trx_list, t)) {
1439 
1440 			lock_t* expl_lock = lock_rec_has_expl(
1441 				precise_mode, block, heap_no, t);
1442 
1443 			if (expl_lock && expl_lock->trx != impl_trx) {
1444 				/* An explicit lock is held by trx other than
1445 				the trx holding the implicit lock. */
1446 				holds = expl_lock->trx;
1447 				break;
1448 			}
1449 		}
1450 
1451 		mutex_exit(&trx_sys->mutex);
1452 	}
1453 
1454 	lock_mutex_exit();
1455 
1456 	return(holds);
1457 }
1458 #endif /* UNIV_DEBUG */
1459 
1460 /*********************************************************************//**
1461 Return approximate number or record locks (bits set in the bitmap) for
1462 this transaction. Since delete-marked records may be removed, the
1463 record count will not be precise.
1464 The caller must be holding lock_sys->mutex. */
1465 ulint
1466 lock_number_of_rows_locked(
1467 /*=======================*/
1468 	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
1469 {
1470 	ut_ad(lock_mutex_own());
1471 
1472 	return(trx_lock->n_rec_locks);
1473 }
1474 
1475 /*********************************************************************//**
1476 Return the number of table locks for a transaction.
1477 The caller must be holding lock_sys->mutex. */
1478 ulint
1479 lock_number_of_tables_locked(
1480 /*=========================*/
1481 	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
1482 {
1483 	const lock_t*	lock;
1484 	ulint		n_tables = 0;
1485 
1486 	ut_ad(lock_mutex_own());
1487 
1488 	for (lock = UT_LIST_GET_FIRST(trx_lock->trx_locks);
1489 	     lock != NULL;
1490 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
1491 
1492 		if (lock_get_type_low(lock) == LOCK_TABLE) {
1493 			n_tables++;
1494 		}
1495 	}
1496 
1497 	return(n_tables);
1498 }
1499 
1500 /*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
1501 #ifdef WITH_WSREP
1502 static
1503 void
1504 wsrep_print_wait_locks(
1505 /*============*/
1506 	lock_t*		c_lock) /* conflicting lock to print */
1507 {
1508 	if (wsrep_debug &&  c_lock->trx->lock.wait_lock != c_lock) {
1509 		ib::info() << "WSREP: c_lock != wait lock";
1510 		if (lock_get_type_low(c_lock) & LOCK_TABLE)
1511 			lock_table_print(stderr, c_lock);
1512 		else
1513 			lock_rec_print(stderr, c_lock);
1514 
1515 		if (lock_get_type_low(c_lock->trx->lock.wait_lock) & LOCK_TABLE)
1516 			lock_table_print(stderr, c_lock->trx->lock.wait_lock);
1517 		else
1518 			lock_rec_print(stderr, c_lock->trx->lock.wait_lock);
1519 	}
1520 }
1521 #endif /* WITH_WSREP */
1522 
1523 /**
1524 Check of the lock is on m_rec_id.
1525 @param[in] lock			Lock to compare with
1526 @return true if the record lock is on m_rec_id*/
1527 /**
1528 @param[in] rhs			Lock to compare with
1529 @return true if the record lock equals rhs */
1530 bool
1531 RecLock::is_on_row(const lock_t* lock) const
1532 {
1533 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
1534 
1535 	const lock_rec_t&	other = lock->un_member.rec_lock;
1536 
1537 	return(other.space == m_rec_id.m_space_id
1538 	       && other.page_no == m_rec_id.m_page_no
1539 	       && lock_rec_get_nth_bit(lock, m_rec_id.m_heap_no));
1540 }
1541 
1542 /**
1543 Do some checks and prepare for creating a new record lock */
1544 void
1545 RecLock::prepare() const
1546 {
1547 	ut_ad(lock_mutex_own());
1548 	ut_ad(m_trx == thr_get_trx(m_thr));
1549 
1550 	/* Test if there already is some other reason to suspend thread:
1551 	we do not enqueue a lock request if the query thread should be
1552 	stopped anyway */
1553 
1554 	if (que_thr_stop(m_thr)) {
1555 		ut_error;
1556 	}
1557 
1558 	switch (trx_get_dict_operation(m_trx)) {
1559 	case TRX_DICT_OP_NONE:
1560 		break;
1561 	case TRX_DICT_OP_TABLE:
1562 	case TRX_DICT_OP_INDEX:
1563 		ib::error() << "A record lock wait happens in a dictionary"
1564 			" operation. index " << m_index->name
1565 			<< " of table " << m_index->table->name
1566 			<< ". " << BUG_REPORT_MSG;
1567 		ut_ad(0);
1568 	}
1569 
1570 	ut_ad(m_index->table->n_ref_count > 0
1571 	      || !m_index->table->can_be_evicted);
1572 }
1573 
1574 /**
1575 Create the lock instance
1576 @param[in, out] trx	The transaction requesting the lock
1577 @param[in, out] index	Index on which record lock is required
1578 @param[in] mode		The lock mode desired
1579 @param[in] rec_id	The record id
1580 @param[in] size		Size of the lock + bitmap requested
1581 @return a record lock instance */
1582 lock_t*
1583 RecLock::lock_alloc(
1584 	trx_t*		trx,
1585 	dict_index_t*	index,
1586 	ulint		mode,
1587 	const RecID&	rec_id,
1588 	ulint		size)
1589 {
1590 	ut_ad(lock_mutex_own());
1591 
1592 	lock_t*	lock;
1593 
1594 	if (trx->lock.rec_cached >= trx->lock.rec_pool.size()
1595 	    || sizeof(*lock) + size > REC_LOCK_SIZE) {
1596 
1597 		ulint		n_bytes = size + sizeof(*lock);
1598 		mem_heap_t*	heap = trx->lock.lock_heap;
1599 
1600 		lock = reinterpret_cast<lock_t*>(mem_heap_alloc(heap, n_bytes));
1601 	} else {
1602 
1603 		lock = trx->lock.rec_pool[trx->lock.rec_cached];
1604 		++trx->lock.rec_cached;
1605 	}
1606 
1607 	lock->trx = trx;
1608 
1609 	lock->index = index;
1610 
1611 	/* Setup the lock attributes */
1612 
1613 	lock->type_mode = LOCK_REC | (mode & ~LOCK_TYPE_MASK);
1614 
1615 	lock_rec_t&	rec_lock = lock->un_member.rec_lock;
1616 
1617 	/* Predicate lock always on INFIMUM (0) */
1618 
1619 	if (is_predicate_lock(mode)) {
1620 
1621 		rec_lock.n_bits = 8;
1622 
1623 		memset(&lock[1], 0x0, 1);
1624 
1625 	} else {
1626 		ut_ad(8 * size < UINT32_MAX);
1627 		rec_lock.n_bits = static_cast<uint32_t>(8 * size);
1628 
1629 		memset(&lock[1], 0x0, size);
1630 	}
1631 
1632 	rec_lock.space = rec_id.m_space_id;
1633 
1634 	rec_lock.page_no = rec_id.m_page_no;
1635 
1636 	/* Set the bit corresponding to rec */
1637 
1638 	lock_rec_set_nth_bit(lock, rec_id.m_heap_no);
1639 
1640 	MONITOR_INC(MONITOR_NUM_RECLOCK);
1641 
1642 	MONITOR_INC(MONITOR_RECLOCK_CREATED);
1643 
1644 	return(lock);
1645 }
1646 
1647 /**
1648 Add the lock to the record lock hash and the transaction's lock list
1649 @param[in,out] lock	Newly created record lock to add to the rec hash
1650 @param[in] add_to_hash	If the lock should be added to the hash table */
1651 void
1652 RecLock::lock_add(lock_t* lock, bool add_to_hash)
1653 {
1654 	ut_ad(lock_mutex_own());
1655 	ut_ad(trx_mutex_own(lock->trx));
1656 
1657 	if (add_to_hash) {
1658 		ulint	key = m_rec_id.fold();
1659 
1660 		++lock->index->table->n_rec_locks;
1661 
1662 		HASH_INSERT(lock_t, hash, lock_hash_get(m_mode), key, lock);
1663 	}
1664 
1665 	if (m_mode & LOCK_WAIT) {
1666 		lock_set_lock_and_trx_wait(lock, lock->trx);
1667 	}
1668 
1669 	UT_LIST_ADD_LAST(lock->trx->lock.trx_locks, lock);
1670 }
1671 
1672 /**
1673 Create a new lock.
1674 @param[in,out] trx		Transaction requesting the lock
1675 @param[in] owns_trx_mutex	true if caller owns the trx_t::mutex
1676 @param[in] add_to_hash		add the lock to hash table
1677 @param[in] prdt			Predicate lock (optional)
1678 @return a new lock instance */
1679 #ifdef WITH_WSREP
1680 lock_t*
1681 RecLock::create(
1682 	lock_t* const c_lock,
1683 	trx_t*  trx,
1684 	bool    owns_trx_mutex,
1685 	bool	add_to_hash,
1686 	const lock_prdt_t* prdt)
1687 #else
1688 lock_t*
1689 RecLock::create(
1690 	trx_t*	trx,
1691 	bool	owns_trx_mutex,
1692 	bool	add_to_hash,
1693 	const	lock_prdt_t* prdt)
1694 #endif /* WITH_WSREP */
1695 {
1696 	ut_ad(lock_mutex_own());
1697 	ut_ad(owns_trx_mutex == trx_mutex_own(trx));
1698 
1699 	/* Create the explicit lock instance and initialise it. */
1700 
1701 	lock_t*	lock = lock_alloc(trx, m_index, m_mode, m_rec_id, m_size);
1702 
1703 	if (prdt != NULL && (m_mode & LOCK_PREDICATE)) {
1704 
1705 		lock_prdt_set_prdt(lock, prdt);
1706 	}
1707 
1708 #ifdef WITH_WSREP
1709 	if (c_lock                      &&
1710 	    wsrep_on(trx->mysql_thd)    &&
1711 	    wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
1712 		lock_t *hash	= (lock_t *)c_lock->hash;
1713 		lock_t *prev	= NULL;
1714 
1715 		while (hash 						       &&
1716 		       wsrep_thd_is_BF(((lock_t *)hash)->trx->mysql_thd, TRUE) &&
1717 		       wsrep_trx_order_before(
1718 				((lock_t *)hash)->trx->mysql_thd,
1719 				trx->mysql_thd)) {
1720 			prev = hash;
1721 			hash = (lock_t *)hash->hash;
1722 		}
1723 		lock->hash = hash;
1724 		if (prev) {
1725 			prev->hash = lock;
1726 		} else {
1727 			c_lock->hash = lock;
1728 		}
1729 		/*
1730 		 * delayed conflict resolution '...kill_one_trx' was not called,
1731 		 * if victim was waiting for some other lock
1732 		 */
1733 		trx_mutex_enter(c_lock->trx);
1734 		if (c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1735 
1736 			c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE;
1737 
1738 			if (wsrep_debug) wsrep_print_wait_locks(c_lock);
1739 
1740 			trx->lock.que_state = TRX_QUE_LOCK_WAIT;
1741 			lock_set_lock_and_trx_wait(lock, trx);
1742 			UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
1743 
1744 			ut_ad(m_thr != NULL);
1745 			trx->lock.wait_thr = m_thr;
1746 			m_thr->state = QUE_THR_LOCK_WAIT;
1747 
1748 			/* have to release trx mutex for the duration of
1749 			   victim lock release. This will eventually call
1750 			   lock_grant, which wants to grant trx mutex again
1751 			*/
1752 			if (owns_trx_mutex) trx_mutex_exit(trx);
1753 			lock_cancel_waiting_and_release(
1754 				c_lock->trx->lock.wait_lock);
1755 			if (owns_trx_mutex) trx_mutex_enter(trx);
1756 
1757 			/* trx might not wait for c_lock, but some other lock
1758 			   does not matter if wait_lock was released above
1759 			 */
1760 			if (c_lock->trx->lock.wait_lock == c_lock) {
1761 				if (wsrep_debug) ib::info() <<
1762 					"victim trx waits for some other lock than c_lock";
1763 				lock_reset_lock_and_trx_wait(lock);
1764 			}
1765 			trx_mutex_exit(c_lock->trx);
1766 
1767 			if (wsrep_debug)
1768 				ib::info() << "WSREP: c_lock canceled " << c_lock->trx->id;
1769 
1770                         ++lock->index->table->n_rec_locks;
1771 			/* have to bail out here to avoid lock_set_lock... */
1772 			return(lock);
1773 		}
1774                 trx_mutex_exit(c_lock->trx);
1775                 /* we don't want to add to hash anymore, but need other updates from lock_add */
1776 		++lock->index->table->n_rec_locks;
1777                 lock_add(lock, false);
1778 	} else {
1779 #endif /* WITH_WSREP */
1780 	/* Ensure that another transaction doesn't access the trx
1781 	lock state and lock data structures while we are adding the
1782 	lock and changing the transaction state to LOCK_WAIT */
1783 
1784 	if (!owns_trx_mutex) {
1785 		trx_mutex_enter(trx);
1786 	}
1787 
1788 	lock_add(lock, add_to_hash);
1789 
1790 	if (!owns_trx_mutex) {
1791 		trx_mutex_exit(trx);
1792         }
1793 #ifdef WITH_WSREP
1794 	}
1795 #endif /* WITH_WSREP */
1796 
1797 	return(lock);
1798 }
1799 
1800 
1801 /**
1802 Check the outcome of the deadlock check
1803 @param[in,out] victim_trx	Transaction selected for rollback
1804 @param[in,out] lock		Lock being requested
1805 @return DB_LOCK_WAIT, DB_DEADLOCK or DB_SUCCESS_LOCKED_REC */
1806 dberr_t
1807 RecLock::check_deadlock_result(const trx_t* victim_trx, lock_t* lock)
1808 {
1809 	ut_ad(lock_mutex_own());
1810 	ut_ad(m_trx == lock->trx);
1811 	ut_ad(trx_mutex_own(m_trx));
1812 
1813 	if (victim_trx != NULL) {
1814 
1815 		ut_ad(victim_trx == m_trx);
1816 
1817 		lock_reset_lock_and_trx_wait(lock);
1818 
1819 		lock_rec_reset_nth_bit(lock, m_rec_id.m_heap_no);
1820 
1821 		return(DB_DEADLOCK);
1822 
1823 	} else if (m_trx->lock.wait_lock == NULL) {
1824 
1825 		/* If there was a deadlock but we chose another
1826 		transaction as a victim, it is possible that we
1827 		already have the lock now granted! */
1828 
1829 		return(DB_SUCCESS_LOCKED_REC);
1830 	}
1831 
1832 	return(DB_LOCK_WAIT);
1833 }
1834 
1835 /**
1836 Check and resolve any deadlocks
1837 @param[in, out] lock		The lock being acquired
1838 @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
1839 	DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
1840 	there was a deadlock, but another transaction was chosen
1841 	as a victim, and we got the lock immediately: no need to
1842 	wait then */
1843 dberr_t
1844 RecLock::deadlock_check(lock_t* lock)
1845 {
1846 	ut_ad(lock_mutex_own());
1847 	ut_ad(lock->trx == m_trx);
1848 	ut_ad(trx_mutex_own(m_trx));
1849 
1850 	const trx_t*	victim_trx =
1851 			DeadlockChecker::check_and_resolve(lock, m_trx);
1852 
1853 	/* Check the outcome of the deadlock test. It is possible that
1854 	the transaction that blocked our lock was rolled back and we
1855 	were granted our lock. */
1856 
1857 	dberr_t	err = check_deadlock_result(victim_trx, lock);
1858 
1859 	if (err == DB_LOCK_WAIT) {
1860 
1861 		set_wait_state(lock);
1862 
1863 		MONITOR_INC(MONITOR_LOCKREC_WAIT);
1864 	}
1865 
1866 	return(err);
1867 }
1868 
1869 /**
1870 Collect the transactions that will need to be rolled back asynchronously
1871 @param[in, out] trx	Transaction to be rolled back */
1872 void
1873 RecLock::mark_trx_for_rollback(trx_t* trx)
1874 {
1875 	trx->abort = true;
1876 
1877 	ut_ad(!trx->read_only);
1878 	ut_ad(trx_mutex_own(m_trx));
1879 	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK));
1880 	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC));
1881 	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE));
1882 
1883 	/* Note that we will attempt an async rollback. The _ASYNC
1884 	flag will be cleared if the transaction is rolled back
1885 	synchronously before we get a chance to do it. */
1886 
1887 	trx->in_innodb |= TRX_FORCE_ROLLBACK | TRX_FORCE_ROLLBACK_ASYNC;
1888 
1889 	bool		cas;
1890 	os_thread_id_t	thread_id = os_thread_get_curr_id();
1891 
1892 	cas = os_compare_and_swap_thread_id(&trx->killed_by, 0, thread_id);
1893 
1894 	ut_a(cas);
1895 
1896 	m_trx->hit_list.push_back(hit_list_t::value_type(trx));
1897 
1898 #ifdef UNIV_DEBUG
1899 	THD*	thd = trx->mysql_thd;
1900 
1901 	if (thd != NULL) {
1902 
1903 		char	buffer[1024];
1904 		ib::info() << "Blocking transaction: ID: " << trx->id << " - "
1905 			<< " Blocked transaction ID: "<< m_trx->id << " - "
1906 			<< thd_security_context(thd, buffer, sizeof(buffer),
1907 						512);
1908 	}
1909 #endif /* UNIV_DEBUG */
1910 }
1911 
1912 /**
1913 Setup the requesting transaction state for lock grant
1914 @param[in,out] lock		Lock for which to change state */
1915 void
1916 RecLock::set_wait_state(lock_t* lock)
1917 {
1918 	ut_ad(lock_mutex_own());
1919 	ut_ad(m_trx == lock->trx);
1920 	ut_ad(trx_mutex_own(m_trx));
1921 	ut_ad(lock_get_wait(lock));
1922 
1923 	m_trx->lock.wait_started = ut_time();
1924 
1925 	m_trx->lock.que_state = TRX_QUE_LOCK_WAIT;
1926 
1927 	m_trx->lock.was_chosen_as_deadlock_victim = false;
1928 
1929 	bool	stopped = que_thr_stop(m_thr);
1930 	ut_a(stopped);
1931 }
1932 
1933 /**
1934 Enqueue a lock wait for normal transaction. If it is a high priority transaction
1935 then jump the record lock wait queue and if the transaction at the head of the
1936 queue is itself waiting roll it back, also do a deadlock check and resolve.
1937 @param[in, out] wait_for	The lock that the joining transaction is
1938 				waiting for
1939 @param[in] prdt			Predicate [optional]
1940 @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
1941 	DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
1942 	there was a deadlock, but another transaction was chosen
1943 	as a victim, and we got the lock immediately: no need to
1944 	wait then */
1945 dberr_t
1946 #ifdef WITH_WSREP
1947 RecLock::add_to_waitq(lock_t* const wait_for, const lock_prdt_t* prdt)
1948 #else
1949 RecLock::add_to_waitq(const lock_t* wait_for, const lock_prdt_t* prdt)
1950 #endif
1951 {
1952 	ut_ad(lock_mutex_own());
1953 	ut_ad(m_trx == thr_get_trx(m_thr));
1954 	ut_ad(trx_mutex_own(m_trx));
1955 
1956 	DEBUG_SYNC_C("rec_lock_add_to_waitq");
1957 
1958 	m_mode |= LOCK_WAIT;
1959 
1960 	/* Do the preliminary checks, and set query thread state */
1961 
1962 	prepare();
1963 
1964 	bool	high_priority = trx_is_high_priority(m_trx);
1965 
1966 	/* Don't queue the lock to hash table, if high priority transaction. */
1967 #ifdef WITH_WSREP
1968 	lock_t*	lock = create(wait_for, m_trx, true, !high_priority, prdt);
1969 #else
1970 	lock_t*	lock = create(m_trx, true, !high_priority, prdt);
1971 #endif /* WITH_WSREP */
1972 	/* Attempt to jump over the low priority waiting locks. */
1973 	if (high_priority && jump_queue(lock, wait_for)) {
1974 
1975 		/* Lock is granted */
1976 		return(DB_SUCCESS);
1977 	}
1978 
1979 	dberr_t err= DB_LOCK_WAIT;
1980 #ifdef WITH_WSREP
1981 	if (wsrep_thd_is_BF(m_trx->mysql_thd, FALSE) && !lock_get_wait(lock)) {
1982 		if (wsrep_debug) ib::info() <<
1983 			"BF thread got lock granted early, ID " << lock->trx->id;
1984 		err = DB_SUCCESS;
1985         } else {
1986 #endif /* WITH_WSREP */
1987         ut_ad(lock_get_wait(lock));
1988 
1989 	err = deadlock_check(lock);
1990 #ifdef WITH_WSREP
1991         }
1992 #endif /* WITH_WSREP */
1993 	ut_ad(trx_mutex_own(m_trx));
1994 
1995 	/* m_trx->mysql_thd is NULL if it's an internal trx. So current_thd is used */
1996 	if (err == DB_LOCK_WAIT) {
1997 		thd_report_row_lock_wait(current_thd, wait_for->trx->mysql_thd);
1998 	}
1999 	return(err);
2000 }
2001 
2002 /*********************************************************************//**
2003 Adds a record lock request in the record queue. The request is normally
2004 added as the last in the queue, but if there are no waiting lock requests
2005 on the record, and the request to be added is not a waiting request, we
2006 can reuse a suitable record lock object already existing on the same page,
2007 just setting the appropriate bit in its bitmap. This is a low-level function
2008 which does NOT check for deadlocks or lock compatibility!
2009 @return lock where the bit was set */
2010 static
2011 void
2012 lock_rec_add_to_queue(
2013 /*==================*/
2014 	ulint			type_mode,/*!< in: lock mode, wait, gap
2015 					etc. flags; type is ignored
2016 					and replaced by LOCK_REC */
2017 	const buf_block_t*	block,	/*!< in: buffer block containing
2018 					the record */
2019 	ulint			heap_no,/*!< in: heap number of the record */
2020 	dict_index_t*		index,	/*!< in: index of record */
2021 	trx_t*			trx,	/*!< in/out: transaction */
2022 	bool			caller_owns_trx_mutex)
2023 					/*!< in: TRUE if caller owns the
2024 					transaction mutex */
2025 {
2026 #ifdef UNIV_DEBUG
2027 	ut_ad(lock_mutex_own());
2028 	ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
2029 	ut_ad(dict_index_is_clust(index)
2030 	      || dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
2031 	switch (type_mode & LOCK_MODE_MASK) {
2032 	case LOCK_X:
2033 	case LOCK_S:
2034 		break;
2035 	default:
2036 		ut_error;
2037 	}
2038 
2039 	if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
2040 		lock_mode	mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
2041 			? LOCK_X
2042 			: LOCK_S;
2043 		const lock_t*	other_lock
2044 			= lock_rec_other_has_expl_req(
2045 				mode, block, false, heap_no, trx);
2046 #ifdef WITH_WSREP
2047 		//ut_a(!other_lock || (wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
2048                 //                     wsrep_thd_is_BF(other_lock->trx->mysql_thd, TRUE)));
2049 		if (other_lock &&
2050 			!wsrep_thd_is_BF(trx->mysql_thd, FALSE) &&
2051 			!wsrep_thd_is_BF(other_lock->trx->mysql_thd, TRUE)) {
2052 
2053 			ib::info() << "WSREP BF lock conflict for my lock:\n BF:" <<
2054                           ((wsrep_thd_is_BF(trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
2055                           wsrep_thd_exec_mode(trx->mysql_thd) << " conflict: " <<
2056                           wsrep_thd_conflict_state(trx->mysql_thd) << " seqno: " <<
2057                           wsrep_thd_trx_seqno(trx->mysql_thd) << " SQL: " <<
2058                           wsrep_thd_query(trx->mysql_thd);
2059                         trx_t* otrx = other_lock->trx;
2060 			ib::info() << "WSREP other lock:\n BF:" <<
2061                           ((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
2062                           wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " <<
2063                           wsrep_thd_conflict_state(otrx->mysql_thd) << " seqno: " <<
2064                           wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " <<
2065                           wsrep_thd_query(otrx->mysql_thd);
2066 		}
2067 #else
2068 		ut_a(!other_lock);
2069 #endif /* WITH_WSREP */
2070 	}
2071 #endif /* UNIV_DEBUG */
2072 
2073 	type_mode |= LOCK_REC;
2074 
2075 	/* If rec is the supremum record, then we can reset the gap bit, as
2076 	all locks on the supremum are automatically of the gap type, and we
2077 	try to avoid unnecessary memory consumption of a new record lock
2078 	struct for a gap type lock */
2079 
2080 	if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
2081 		ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
2082 
2083 		/* There should never be LOCK_REC_NOT_GAP on a supremum
2084 		record, but let us play safe */
2085 
2086 		type_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
2087 	}
2088 
2089 	lock_t*		lock;
2090 	lock_t*		first_lock;
2091 	hash_table_t*	hash = lock_hash_get(type_mode);
2092 
2093 	/* Look for a waiting lock request on the same record or on a gap */
2094 
2095 	for (first_lock = lock = lock_rec_get_first_on_page(hash, block);
2096 	     lock != NULL;
2097 	     lock = lock_rec_get_next_on_page(lock)) {
2098 
2099 		if (lock_get_wait(lock)
2100 		    && lock_rec_get_nth_bit(lock, heap_no)) {
2101 
2102 			break;
2103 		}
2104 	}
2105 
2106 	if (lock == NULL && !(type_mode & LOCK_WAIT)) {
2107 
2108 		/* Look for a similar record lock on the same page:
2109 		if one is found and there are no waiting lock requests,
2110 		we can just set the bit */
2111 
2112 		lock = lock_rec_find_similar_on_page(
2113 			type_mode, heap_no, first_lock, trx);
2114 
2115 		if (lock != NULL) {
2116 
2117 			lock_rec_set_nth_bit(lock, heap_no);
2118 
2119 			return;
2120 		}
2121 	}
2122 
2123 	RecLock		rec_lock(index, block, heap_no, type_mode);
2124 
2125 #ifdef WITH_WSREP
2126 	rec_lock.create(NULL, trx, caller_owns_trx_mutex, true);
2127 #else
2128 	rec_lock.create(trx, caller_owns_trx_mutex, true);
2129 #endif /* WITH_WSREP */
2130 }
2131 
2132 /*********************************************************************//**
2133 This is a fast routine for locking a record in the most common cases:
2134 there are no explicit locks on the page, or there is just one lock, owned
2135 by this transaction, and of the right type_mode. This is a low-level function
2136 which does NOT look at implicit locks! Checks lock compatibility within
2137 explicit locks. This function sets a normal next-key lock, or in the case of
2138 a page supremum record, a gap type lock.
2139 @return whether the locking succeeded */
2140 UNIV_INLINE
2141 lock_rec_req_status
2142 lock_rec_lock_fast(
2143 /*===============*/
2144 	bool			impl,	/*!< in: if TRUE, no lock is set
2145 					if no wait is necessary: we
2146 					assume that the caller will
2147 					set an implicit lock */
2148 	ulint			mode,	/*!< in: lock mode: LOCK_X or
2149 					LOCK_S possibly ORed to either
2150 					LOCK_GAP or LOCK_REC_NOT_GAP */
2151 	const buf_block_t*	block,	/*!< in: buffer block containing
2152 					the record */
2153 	ulint			heap_no,/*!< in: heap number of record */
2154 	dict_index_t*		index,	/*!< in: index of record */
2155 	que_thr_t*		thr)	/*!< in: query thread */
2156 {
2157 	ut_ad(lock_mutex_own());
2158 	ut_ad(!srv_read_only_mode);
2159 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
2160 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
2161 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
2162 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)
2163 	      || srv_read_only_mode);
2164 	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
2165 	      || (LOCK_MODE_MASK & mode) == LOCK_X);
2166 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
2167 	      || mode - (LOCK_MODE_MASK & mode) == 0
2168 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
2169 	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
2170 
2171 	DBUG_EXECUTE_IF("innodb_report_deadlock", return(LOCK_REC_FAIL););
2172 
2173 	lock_t*	lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
2174 
2175 	trx_t*	trx = thr_get_trx(thr);
2176 
2177 	lock_rec_req_status	status = LOCK_REC_SUCCESS;
2178 
2179 	if (lock == NULL) {
2180 
2181 		if (!impl) {
2182 			RecLock	rec_lock(index, block, heap_no, mode);
2183 			/* Note that we don't own the trx mutex. */
2184 #ifdef WITH_WSREP
2185 			rec_lock.create(NULL, trx, false, true);
2186 #else
2187 			rec_lock.create(trx, false, true);
2188 #endif /* WITH_WSREP */
2189 		}
2190 
2191 		status = LOCK_REC_SUCCESS_CREATED;
2192 	} else {
2193 		trx_mutex_enter(trx);
2194 
2195 		if (lock_rec_get_next_on_page(lock)
2196 		     || lock->trx != trx
2197 		     || lock->type_mode != (mode | LOCK_REC)
2198 		     || lock_rec_get_n_bits(lock) <= heap_no) {
2199 
2200 			status = LOCK_REC_FAIL;
2201 		} else if (!impl) {
2202 			/* If the nth bit of the record lock is already set
2203 			then we do not set a new lock bit, otherwise we do
2204 			set */
2205 			if (!lock_rec_get_nth_bit(lock, heap_no)) {
2206 				lock_rec_set_nth_bit(lock, heap_no);
2207 				status = LOCK_REC_SUCCESS_CREATED;
2208 			}
2209 		}
2210 
2211 		trx_mutex_exit(trx);
2212 	}
2213 
2214 	return(status);
2215 }
2216 
2217 /*********************************************************************//**
2218 This is the general, and slower, routine for locking a record. This is a
2219 low-level function which does NOT look at implicit locks! Checks lock
2220 compatibility within explicit locks. This function sets a normal next-key
2221 lock, or in the case of a page supremum record, a gap type lock.
2222 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
2223 or DB_QUE_THR_SUSPENDED */
2224 static
2225 dberr_t
2226 lock_rec_lock_slow(
2227 /*===============*/
2228 	ibool			impl,	/*!< in: if TRUE, no lock is set
2229 					if no wait is necessary: we
2230 					assume that the caller will
2231 					set an implicit lock */
2232 	ulint			mode,	/*!< in: lock mode: LOCK_X or
2233 					LOCK_S possibly ORed to either
2234 					LOCK_GAP or LOCK_REC_NOT_GAP */
2235 	const buf_block_t*	block,	/*!< in: buffer block containing
2236 					the record */
2237 	ulint			heap_no,/*!< in: heap number of record */
2238 	dict_index_t*		index,	/*!< in: index of record */
2239 	que_thr_t*		thr)	/*!< in: query thread */
2240 {
2241 	ut_ad(lock_mutex_own());
2242 	ut_ad(!srv_read_only_mode);
2243 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
2244 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
2245 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
2246 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
2247 	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
2248 	      || (LOCK_MODE_MASK & mode) == LOCK_X);
2249 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
2250 	      || mode - (LOCK_MODE_MASK & mode) == 0
2251 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
2252 	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
2253 
2254 	DBUG_EXECUTE_IF("innodb_report_deadlock", return(DB_DEADLOCK););
2255 
2256 	dberr_t	err;
2257 	trx_t*	trx = thr_get_trx(thr);
2258 
2259 	trx_mutex_enter(trx);
2260 
2261 	if (lock_rec_has_expl(mode, block, heap_no, trx)) {
2262 
2263 		/* The trx already has a strong enough lock on rec: do
2264 		nothing */
2265 
2266 		err = DB_SUCCESS;
2267 
2268 	} else {
2269 
2270 #ifdef WITH_WSREP
2271           lock_t* const wait_for = (lock_t *)lock_rec_other_has_conflicting(
2272 #else
2273 		const lock_t* wait_for = lock_rec_other_has_conflicting(
2274 #endif
2275 			mode, block, heap_no, trx);
2276 
2277 		if (wait_for != NULL) {
2278 
2279 			/* If another transaction has a non-gap conflicting
2280 			request in the queue, as this transaction does not
2281 			have a lock strong enough already granted on the
2282 			record, we may have to wait. */
2283 
2284 			RecLock	rec_lock(thr, index, block, heap_no, mode);
2285 
2286 			err = rec_lock.add_to_waitq(wait_for);
2287 
2288 		} else if (!impl) {
2289 
2290 			/* Set the requested lock on the record, note that
2291 			we already own the transaction mutex. */
2292 
2293 			lock_rec_add_to_queue(
2294 				LOCK_REC | mode, block, heap_no, index, trx,
2295 				true);
2296 
2297 			err = DB_SUCCESS_LOCKED_REC;
2298 		} else {
2299 			err = DB_SUCCESS;
2300 		}
2301 	}
2302 
2303 	trx_mutex_exit(trx);
2304 
2305 	return(err);
2306 }
2307 
2308 /*********************************************************************//**
2309 Tries to lock the specified record in the mode requested. If not immediately
2310 possible, enqueues a waiting lock request. This is a low-level function
2311 which does NOT look at implicit locks! Checks lock compatibility within
2312 explicit locks. This function sets a normal next-key lock, or in the case
2313 of a page supremum record, a gap type lock.
2314 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
2315 or DB_QUE_THR_SUSPENDED */
2316 static
2317 dberr_t
2318 lock_rec_lock(
2319 /*==========*/
2320 	bool			impl,	/*!< in: if true, no lock is set
2321 					if no wait is necessary: we
2322 					assume that the caller will
2323 					set an implicit lock */
2324 	ulint			mode,	/*!< in: lock mode: LOCK_X or
2325 					LOCK_S possibly ORed to either
2326 					LOCK_GAP or LOCK_REC_NOT_GAP */
2327 	const buf_block_t*	block,	/*!< in: buffer block containing
2328 					the record */
2329 	ulint			heap_no,/*!< in: heap number of record */
2330 	dict_index_t*		index,	/*!< in: index of record */
2331 	que_thr_t*		thr)	/*!< in: query thread */
2332 {
2333 	ut_ad(lock_mutex_own());
2334 	ut_ad(!srv_read_only_mode);
2335 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
2336 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
2337 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
2338 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
2339 	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
2340 	      || (LOCK_MODE_MASK & mode) == LOCK_X);
2341 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
2342 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
2343 	      || mode - (LOCK_MODE_MASK & mode) == 0);
2344 	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
2345 
2346 	/* We try a simplified and faster subroutine for the most
2347 	common cases */
2348 	switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
2349 	case LOCK_REC_SUCCESS:
2350 		return(DB_SUCCESS);
2351 	case LOCK_REC_SUCCESS_CREATED:
2352 		return(DB_SUCCESS_LOCKED_REC);
2353 	case LOCK_REC_FAIL:
2354 		return(lock_rec_lock_slow(impl, mode, block,
2355 					  heap_no, index, thr));
2356 	}
2357 
2358 	ut_error;
2359 	return(DB_ERROR);
2360 }
2361 
2362 /*********************************************************************//**
2363 Checks if a waiting record lock request still has to wait in a queue.
2364 @return lock that is causing the wait */
2365 static
2366 const lock_t*
2367 lock_rec_has_to_wait_in_queue(
2368 /*==========================*/
2369 	const lock_t*	wait_lock)	/*!< in: waiting record lock */
2370 {
2371 	const lock_t*	lock;
2372 	ulint		space;
2373 	ulint		page_no;
2374 	ulint		heap_no;
2375 	ulint		bit_mask;
2376 	ulint		bit_offset;
2377 	hash_table_t*	hash;
2378 
2379 	ut_ad(lock_mutex_own());
2380 	ut_ad(lock_get_wait(wait_lock));
2381 	ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
2382 
2383 	space = wait_lock->un_member.rec_lock.space;
2384 	page_no = wait_lock->un_member.rec_lock.page_no;
2385 	heap_no = lock_rec_find_set_bit(wait_lock);
2386 
2387 	bit_offset = heap_no / 8;
2388 	bit_mask = static_cast<ulint>(1 << (heap_no % 8));
2389 
2390 	hash = lock_hash_get(wait_lock->type_mode);
2391 
2392 	for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
2393 	     lock != wait_lock;
2394 	     lock = lock_rec_get_next_on_page_const(lock)) {
2395 
2396 		const byte*	p = (const byte*) &lock[1];
2397 
2398 		if (heap_no < lock_rec_get_n_bits(lock)
2399 		    && (p[bit_offset] & bit_mask)
2400 		    && lock_has_to_wait(wait_lock, lock)) {
2401 #ifdef WITH_WSREP
2402 			if (wsrep_thd_is_BF(wait_lock->trx->mysql_thd, FALSE) &&
2403 			    wsrep_thd_is_BF(lock->trx->mysql_thd, TRUE)) {
2404 				/* don't wait for another BF lock */
2405 				continue;
2406 			}
2407 #endif
2408 
2409 			return(lock);
2410 		}
2411 	}
2412 
2413 	return(NULL);
2414 }
2415 
2416 /*************************************************************//**
2417 Grants a lock to a waiting lock request and releases the waiting transaction.
2418 The caller must hold lock_sys->mutex but not lock->trx->mutex. */
2419 static
2420 void
2421 lock_grant(
2422 /*=======*/
2423 	lock_t*	lock)	/*!< in/out: waiting lock request */
2424 {
2425 	ut_ad(lock_mutex_own());
2426 
2427 	lock_reset_lock_and_trx_wait(lock);
2428 	trx_mutex_enter(lock->trx);
2429 
2430 	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
2431 		dict_table_t*	table = lock->un_member.tab_lock.table;
2432 
2433 		if (table->autoinc_trx == lock->trx) {
2434 			ib::error() << "Transaction already had an"
2435 				<< " AUTO-INC lock!";
2436 		} else {
2437 			table->autoinc_trx = lock->trx;
2438 
2439 			ib_vector_push(lock->trx->autoinc_locks, &lock);
2440 		}
2441 	}
2442 
2443 	DBUG_PRINT("ib_lock", ("wait for trx " TRX_ID_FMT " ends",
2444 			       trx_get_id_for_print(lock->trx)));
2445 
2446 	/* If we are resolving a deadlock by choosing another transaction
2447 	as a victim, then our original transaction may not be in the
2448 	TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
2449 	for it */
2450 
2451 	if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
2452 		que_thr_t*	thr;
2453 
2454 		thr = que_thr_end_lock_wait(lock->trx);
2455 
2456 		if (thr != NULL) {
2457 			lock_wait_release_thread_if_suspended(thr);
2458 		}
2459 	}
2460 
2461 	trx_mutex_exit(lock->trx);
2462 }
2463 
2464 /**
2465 Jump the queue for the record over all low priority transactions and
2466 add the lock. If all current granted locks are compatible, grant the
2467 lock. Otherwise, mark all granted transaction for asynchronous
2468 rollback and add to hit list.
2469 @param[in, out]	lock		Lock being requested
2470 @param[in]	conflict_lock	First conflicting lock from the head
2471 @return true if the lock is granted */
2472 bool
2473 RecLock::jump_queue(
2474 	lock_t*		lock,
2475 	const lock_t*	conflict_lock)
2476 {
2477 	ut_ad(m_trx == lock->trx);
2478 	ut_ad(trx_mutex_own(m_trx));
2479 	ut_ad(conflict_lock->trx != m_trx);
2480 	ut_ad(trx_is_high_priority(m_trx));
2481 	ut_ad(m_rec_id.m_heap_no != ULINT32_UNDEFINED);
2482 
2483 	bool	high_priority = false;
2484 
2485 	/* Find out the position to add the lock. If there are other high
2486 	priority transactions in waiting state then we should add it after
2487 	the last high priority transaction. Otherwise, we can add it after
2488 	the last granted lock jumping over the wait queue. */
2489 	bool grant_lock = lock_add_priority(lock, conflict_lock,
2490 					    &high_priority);
2491 
2492 	if (grant_lock) {
2493 
2494 		ut_ad(conflict_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT);
2495 		ut_ad(conflict_lock->trx->lock.wait_lock == conflict_lock);
2496 
2497 #ifdef UNIV_DEBUG
2498 		ib::info() << "Granting High Priority Transaction (ID): "
2499 			   << lock->trx->id << " the lock jumping over"
2500 			   << " waiting Transaction (ID): "
2501 			   << conflict_lock->trx->id;
2502 #endif /* UNIV_DEBUG */
2503 
2504 		lock_reset_lock_and_trx_wait(lock);
2505 		return(true);
2506 	}
2507 
2508 	/* If another high priority transaction is found waiting
2509 	victim transactions are already marked for rollback. */
2510 	if (high_priority) {
2511 
2512 		return(false);
2513 	}
2514 
2515 	/* The lock is placed after the last granted lock in the queue. Check and add
2516 	low priority transactinos to hit list for ASYNC rollback. */
2517 	make_trx_hit_list(lock, conflict_lock);
2518 
2519 	return(false);
2520 }
2521 
2522 /** Find position in lock queue and add the high priority transaction
2523 lock. Intention and GAP only locks can be granted even if there are
2524 waiting locks in front of the queue. To add the High priority
2525 transaction in a safe position we keep the following rule.
2526 
2527 1. If the lock can be granted, add it before the first waiting lock
2528 in the queue so that all currently waiting locks need to do conflict
2529 check before getting granted.
2530 
2531 2. If the lock has to wait, add it after the last granted lock or the
2532 last waiting high priority transaction in the queue whichever is later.
2533 This ensures that the transaction is granted only after doing conflict
2534 check with all granted transactions.
2535 @param[in]	lock		Lock being requested
2536 @param[in]	conflict_lock	First conflicting lock from the head
2537 @param[out]	high_priority	high priority transaction ahead in queue
2538 @return true if the lock can be granted */
2539 bool
2540 RecLock::lock_add_priority(
2541 	lock_t*		lock,
2542 	const lock_t*	conflict_lock,
2543 	bool*		high_priority)
2544 {
2545 	ut_ad(high_priority);
2546 
2547 	*high_priority = false;
2548 
2549 	/* If the first conflicting lock is waiting for the current row,
2550 	then all other granted locks are compatible and the lock can be
2551 	directly granted if no other high priority transactions are
2552 	waiting. We need to recheck with all granted transaction as there
2553 	could be granted GAP or Intention locks down the queue. */
2554 	bool	grant_lock = (conflict_lock->is_waiting());
2555 	lock_t*	lock_head = NULL;
2556 	lock_t*	grant_position = NULL;
2557 	lock_t*	add_position = NULL;
2558 
2559 	/* Different lock (such as predicate lock) are on different hash */
2560 	hash_table_t*	lock_hash = lock_hash_get(m_mode);
2561 
2562 	HASH_SEARCH(hash, lock_hash, m_rec_id.fold(), lock_t*,
2563 		    lock_head, ut_ad(lock_head->is_record_lock()), true);
2564 
2565 	ut_ad(lock_head);
2566 
2567 	for (lock_t* next = lock_head; next != NULL; next = next->hash) {
2568 
2569 		/* check only for locks on the current row */
2570 		if (!is_on_row(next)) {
2571 			continue;
2572 		}
2573 
2574 		if (next->is_waiting()) {
2575 			/* grant lock position is the granted lock just before
2576 			the first wait lock in the queue. */
2577 			if (grant_position == NULL) {
2578 				grant_position = add_position;
2579 			}
2580 
2581 			if (trx_is_high_priority(next->trx)) {
2582 
2583 				*high_priority = true;
2584 				grant_lock = false;
2585 				add_position = next;
2586 			}
2587 		} else {
2588 
2589 			add_position = next;
2590 			/* Cannot grant lock if there is any conflicting
2591 			granted lock. */
2592 			if (grant_lock && lock_has_to_wait(lock, next)) {
2593 				grant_lock = false;
2594 			}
2595 		}
2596 	}
2597 
2598 	/* If the lock is to be granted it is safe to add before the first
2599 	waiting lock in the queue. */
2600 	if (grant_lock) {
2601 
2602 		ut_ad(!lock_has_to_wait(lock, grant_position));
2603 		add_position = grant_position;
2604 	}
2605 
2606 	ut_ad(add_position != NULL);
2607 
2608 	/* Add the lock to lock hash table. */
2609 	lock->hash = add_position->hash;
2610 	add_position->hash = lock;
2611 	++lock->index->table->n_rec_locks;
2612 
2613 	return(grant_lock);
2614 }
2615 
2616 /** Iterate over the granted locks and prepare the hit list for ASYNC Rollback.
2617 If the transaction is waiting for some other lock then wake up with deadlock error.
2618 Currently we don't mark following transactions for ASYNC Rollback.
2619 1. Read only transactions
2620 2. Background transactions
2621 3. Other High priority transactions
2622 @param[in]	lock		Lock being requested
2623 @param[in]	conflict_lock	First conflicting lock from the head */
2624 void
2625 RecLock::make_trx_hit_list(
2626 	lock_t*		lock,
2627 	const lock_t*	conflict_lock)
2628 {
2629 	const lock_t*	next;
2630 
2631 	for (next = conflict_lock; next != NULL; next = next->hash) {
2632 
2633 		/* All locks ahead in the queue are checked. */
2634 		if (next == lock) {
2635 
2636 			ut_ad(next->is_waiting());
2637 			break;
2638 		}
2639 
2640 		trx_t*	trx = next->trx;
2641 		/* Check only for conflicting, granted locks on the current row.
2642 		Currently, we don't rollback read only transactions, transactions
2643 		owned by background threads. */
2644 		if (trx == lock->trx
2645 		    || !is_on_row(next)
2646 		    || next->is_waiting()
2647 		    || trx->read_only
2648 		    || trx->mysql_thd == NULL
2649 		    || !lock_has_to_wait(lock, next)) {
2650 
2651 			continue;
2652 		}
2653 
2654 		trx_mutex_enter(trx);
2655 
2656 		/* Skip high priority transactions, if already marked for abort
2657 		by some other transaction or if ASYNC rollback is disabled. A
2658 		transaction must complete kill/abort of a victim transaction once
2659 		marked and added to hit list. */
2660 		if (trx_is_high_priority(trx)
2661 		    || (trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE) != 0
2662 		    || trx->abort) {
2663 
2664 			trx_mutex_exit(trx);
2665 			continue;
2666 		}
2667 
2668 		/* If the transaction is waiting on some other resource then
2669 		wake it up with DEAD_LOCK error so that it can rollback. */
2670 		if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
2671 
2672 			/* Assert that it is not waiting for current record. */
2673 			ut_ad(trx->lock.wait_lock != next);
2674 #ifdef UNIV_DEBUG
2675 			ib::info() << "High Priority Transaction (ID): "
2676 				   << lock->trx->id << " waking up blocking"
2677 				   << " transaction (ID): " << trx->id;
2678 #endif /* UNIV_DEBUG */
2679 			trx->lock.was_chosen_as_deadlock_victim = true;
2680 			lock_cancel_waiting_and_release(trx->lock.wait_lock);
2681 			trx_mutex_exit(trx);
2682 			continue;
2683 		}
2684 
2685 		/* Mark for ASYNC Rollback and add to hit list. */
2686 		mark_trx_for_rollback(trx);
2687 		trx_mutex_exit(trx);
2688 	}
2689 
2690 	ut_ad(next == lock);
2691 }
2692 
2693 /*************************************************************//**
2694 Cancels a waiting record lock request and releases the waiting transaction
2695 that requested it. NOTE: does NOT check if waiting lock requests behind this
2696 one can now be granted! */
2697 static
2698 void
2699 lock_rec_cancel(
2700 /*============*/
2701 	lock_t*	lock)	/*!< in: waiting record lock request */
2702 {
2703 	que_thr_t*	thr;
2704 
2705 	ut_ad(lock_mutex_own());
2706 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
2707 
2708 	/* Reset the bit (there can be only one set bit) in the lock bitmap */
2709 	lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
2710 
2711 	/* Reset the wait flag and the back pointer to lock in trx */
2712 
2713 	lock_reset_lock_and_trx_wait(lock);
2714 
2715 	/* The following function releases the trx from lock wait */
2716 
2717 	trx_mutex_enter(lock->trx);
2718 
2719 	thr = que_thr_end_lock_wait(lock->trx);
2720 
2721 	if (thr != NULL) {
2722 		lock_wait_release_thread_if_suspended(thr);
2723 	}
2724 
2725 	trx_mutex_exit(lock->trx);
2726 }
2727 
2728 /** Grant lock to waiting requests that no longer conflicts
2729 @param[in]	in_lock		record lock object: grant all non-conflicting
2730 				locks waiting behind this lock object */
2731 static
2732 void
2733 lock_rec_grant(lock_t* in_lock)
2734 {
2735 	lock_t*		lock;
2736 
2737 	ulint		space = in_lock->space();
2738 	ulint		page_no = in_lock->page_number();
2739 	hash_table_t*	lock_hash = in_lock->hash_table();
2740 
2741 	/* Check if waiting locks in the queue can now be granted: grant
2742 	locks if there are no conflicting locks ahead. Stop at the first
2743 	X lock that is waiting or has been granted. */
2744 
2745 	for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
2746 	     lock != NULL;
2747 	     lock = lock_rec_get_next_on_page(lock)) {
2748 
2749 		if (lock_get_wait(lock)
2750 		    && !lock_rec_has_to_wait_in_queue(lock)) {
2751 
2752 			/* Grant the lock */
2753 			ut_ad(lock->trx != in_lock->trx);
2754 			lock_grant(lock);
2755 		}
2756 	}
2757 }
2758 
2759 /*************************************************************//**
2760 Removes a record lock request, waiting or granted, from the queue and
2761 grants locks to other transactions in the queue if they now are entitled
2762 to a lock. NOTE: all record locks contained in in_lock are removed. */
2763 void
2764 lock_rec_dequeue_from_page(
2765 /*=======================*/
2766 	lock_t*		in_lock)	/*!< in: record lock object: all
2767 					record locks which are contained in
2768 					this lock object are removed;
2769 					transactions waiting behind will
2770 					get their lock requests granted,
2771 					if they are now qualified to it */
2772 {
2773 	ulint		space;
2774 	ulint		page_no;
2775 	trx_lock_t*	trx_lock;
2776 	hash_table_t*	lock_hash;
2777 
2778 	ut_ad(lock_mutex_own());
2779 	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2780 	/* We may or may not be holding in_lock->trx->mutex here. */
2781 
2782 	trx_lock = &in_lock->trx->lock;
2783 
2784 	space = in_lock->un_member.rec_lock.space;
2785 	page_no = in_lock->un_member.rec_lock.page_no;
2786 
2787 	ut_ad(in_lock->index->table->n_rec_locks > 0);
2788 	in_lock->index->table->n_rec_locks--;
2789 
2790 	lock_hash = lock_hash_get(in_lock->type_mode);
2791 
2792 	HASH_DELETE(lock_t, hash, lock_hash,
2793 		    lock_rec_fold(space, page_no), in_lock);
2794 
2795 	UT_LIST_REMOVE(trx_lock->trx_locks, in_lock);
2796 
2797 	MONITOR_INC(MONITOR_RECLOCK_REMOVED);
2798 	MONITOR_DEC(MONITOR_NUM_RECLOCK);
2799 
2800 	lock_rec_grant(in_lock);
2801 }
2802 
2803 /*************************************************************//**
2804 Removes a record lock request, waiting or granted, from the queue. */
2805 void
2806 lock_rec_discard(
2807 /*=============*/
2808 	lock_t*		in_lock)	/*!< in: record lock object: all
2809 					record locks which are contained
2810 					in this lock object are removed */
2811 {
2812 	ulint		space;
2813 	ulint		page_no;
2814 	trx_lock_t*	trx_lock;
2815 
2816 	ut_ad(lock_mutex_own());
2817 	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2818 
2819 	trx_lock = &in_lock->trx->lock;
2820 
2821 	space = in_lock->un_member.rec_lock.space;
2822 	page_no = in_lock->un_member.rec_lock.page_no;
2823 
2824 	ut_ad(in_lock->index->table->n_rec_locks > 0);
2825 	in_lock->index->table->n_rec_locks--;
2826 
2827 	HASH_DELETE(lock_t, hash, lock_hash_get(in_lock->type_mode),
2828 			    lock_rec_fold(space, page_no), in_lock);
2829 
2830 	UT_LIST_REMOVE(trx_lock->trx_locks, in_lock);
2831 
2832 	MONITOR_INC(MONITOR_RECLOCK_REMOVED);
2833 	MONITOR_DEC(MONITOR_NUM_RECLOCK);
2834 }
2835 
2836 /*************************************************************//**
2837 Removes record lock objects set on an index page which is discarded. This
2838 function does not move locks, or check for waiting locks, therefore the
2839 lock bitmaps must already be reset when this function is called. */
2840 static
2841 void
2842 lock_rec_free_all_from_discard_page_low(
2843 /*====================================*/
2844 	ulint		space,
2845 	ulint		page_no,
2846 	hash_table_t*	lock_hash)
2847 {
2848 	lock_t*	lock;
2849 	lock_t*	next_lock;
2850 
2851 	lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
2852 
2853 	while (lock != NULL) {
2854 		ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
2855 		ut_ad(!lock_get_wait(lock));
2856 
2857 		next_lock = lock_rec_get_next_on_page(lock);
2858 
2859 		lock_rec_discard(lock);
2860 
2861 		lock = next_lock;
2862 	}
2863 }
2864 
2865 /*************************************************************//**
2866 Removes record lock objects set on an index page which is discarded. This
2867 function does not move locks, or check for waiting locks, therefore the
2868 lock bitmaps must already be reset when this function is called. */
2869 void
2870 lock_rec_free_all_from_discard_page(
2871 /*================================*/
2872 	const buf_block_t*	block)	/*!< in: page to be discarded */
2873 {
2874 	ulint	space;
2875 	ulint	page_no;
2876 
2877 	ut_ad(lock_mutex_own());
2878 
2879 	space = block->page.id.space();
2880 	page_no = block->page.id.page_no();
2881 
2882 	lock_rec_free_all_from_discard_page_low(
2883 		space, page_no, lock_sys->rec_hash);
2884 	lock_rec_free_all_from_discard_page_low(
2885 		space, page_no, lock_sys->prdt_hash);
2886 	lock_rec_free_all_from_discard_page_low(
2887 		space, page_no, lock_sys->prdt_page_hash);
2888 }
2889 
2890 /*============= RECORD LOCK MOVING AND INHERITING ===================*/
2891 
2892 /*************************************************************//**
2893 Resets the lock bits for a single record. Releases transactions waiting for
2894 lock requests here. */
2895 static
2896 void
2897 lock_rec_reset_and_release_wait_low(
2898 /*================================*/
2899 	hash_table_t*		hash,	/*!< in: hash table */
2900 	const buf_block_t*	block,	/*!< in: buffer block containing
2901 					the record */
2902 	ulint			heap_no)/*!< in: heap number of record */
2903 {
2904 	lock_t*	lock;
2905 
2906 	ut_ad(lock_mutex_own());
2907 
2908 	for (lock = lock_rec_get_first(hash, block, heap_no);
2909 	     lock != NULL;
2910 	     lock = lock_rec_get_next(heap_no, lock)) {
2911 
2912 		if (lock_get_wait(lock)) {
2913 			lock_rec_cancel(lock);
2914 		} else {
2915 			lock_rec_reset_nth_bit(lock, heap_no);
2916 		}
2917 	}
2918 }
2919 
2920 /*************************************************************//**
2921 Resets the lock bits for a single record. Releases transactions waiting for
2922 lock requests here. */
2923 static
2924 void
2925 lock_rec_reset_and_release_wait(
2926 /*============================*/
2927 	const buf_block_t*	block,	/*!< in: buffer block containing
2928 					the record */
2929 	ulint			heap_no)/*!< in: heap number of record */
2930 {
2931 	lock_rec_reset_and_release_wait_low(
2932 		lock_sys->rec_hash, block, heap_no);
2933 
2934 	lock_rec_reset_and_release_wait_low(
2935 		lock_sys->prdt_hash, block, PAGE_HEAP_NO_INFIMUM);
2936 	lock_rec_reset_and_release_wait_low(
2937 		lock_sys->prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM);
2938 }
2939 
2940 /*************************************************************//**
2941 Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
2942 of another record as gap type locks, but does not reset the lock bits of
2943 the other record. Also waiting lock requests on rec are inherited as
2944 GRANTED gap locks. */
2945 static
2946 void
2947 lock_rec_inherit_to_gap(
2948 /*====================*/
2949 	const buf_block_t*	heir_block,	/*!< in: block containing the
2950 						record which inherits */
2951 	const buf_block_t*	block,		/*!< in: block containing the
2952 						record from which inherited;
2953 						does NOT reset the locks on
2954 						this record */
2955 	ulint			heir_heap_no,	/*!< in: heap_no of the
2956 						inheriting record */
2957 	ulint			heap_no)	/*!< in: heap_no of the
2958 						donating record */
2959 {
2960 	lock_t*	lock;
2961 
2962 	ut_ad(lock_mutex_own());
2963 
2964 	/* If srv_locks_unsafe_for_binlog is TRUE or session is using
2965 	READ COMMITTED isolation level, we do not want locks set
2966 	by an UPDATE or a DELETE to be inherited as gap type locks. But we
2967 	DO want S-locks/X-locks(taken for replace) set by a consistency
2968 	constraint to be inherited also then. */
2969 
2970 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
2971 	     lock != NULL;
2972 	     lock = lock_rec_get_next(heap_no, lock)) {
2973 
2974 		/* Skip inheriting lock if set */
2975 		if (lock->trx->skip_lock_inheritance) {
2976 
2977 			continue;
2978 		}
2979 
2980 		if (!lock_rec_get_insert_intention(lock)
2981 		    && !((srv_locks_unsafe_for_binlog
2982 			  || lock->trx->isolation_level
2983 			  <= TRX_ISO_READ_COMMITTED)
2984 			 && lock_get_mode(lock) ==
2985 			 (lock->trx->duplicates ? LOCK_S : LOCK_X))) {
2986 			lock_rec_add_to_queue(
2987 				LOCK_REC | LOCK_GAP | lock_get_mode(lock),
2988 				heir_block, heir_heap_no, lock->index,
2989 				lock->trx, FALSE);
2990 		}
2991 	}
2992 }
2993 
2994 /*************************************************************//**
2995 Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
2996 of another record as gap type locks, but does not reset the lock bits of the
2997 other record. Also waiting lock requests are inherited as GRANTED gap locks. */
2998 static
2999 void
3000 lock_rec_inherit_to_gap_if_gap_lock(
3001 /*================================*/
3002 	const buf_block_t*	block,		/*!< in: buffer block */
3003 	ulint			heir_heap_no,	/*!< in: heap_no of
3004 						record which inherits */
3005 	ulint			heap_no)	/*!< in: heap_no of record
3006 						from which inherited;
3007 						does NOT reset the locks
3008 						on this record */
3009 {
3010 	lock_t*	lock;
3011 
3012 	lock_mutex_enter();
3013 
3014 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
3015 	     lock != NULL;
3016 	     lock = lock_rec_get_next(heap_no, lock)) {
3017 
3018 		/* Skip inheriting lock if set */
3019 		if (lock->trx->skip_lock_inheritance) {
3020 
3021 			continue;
3022 		}
3023 
3024 		if (!lock_rec_get_insert_intention(lock)
3025 		    && (heap_no == PAGE_HEAP_NO_SUPREMUM
3026 			|| !lock_rec_get_rec_not_gap(lock))) {
3027 
3028 			lock_rec_add_to_queue(
3029 				LOCK_REC | LOCK_GAP | lock_get_mode(lock),
3030 				block, heir_heap_no, lock->index,
3031 				lock->trx, FALSE);
3032 		}
3033 	}
3034 
3035 	lock_mutex_exit();
3036 }
3037 
3038 /*************************************************************//**
3039 Moves the locks of a record to another record and resets the lock bits of
3040 the donating record. */
3041 void
3042 lock_rec_move_low(
3043 /*==============*/
3044 	hash_table_t*		lock_hash,	/*!< in: hash table to use */
3045 	const buf_block_t*	receiver,	/*!< in: buffer block containing
3046 						the receiving record */
3047 	const buf_block_t*	donator,	/*!< in: buffer block containing
3048 						the donating record */
3049 	ulint			receiver_heap_no,/*!< in: heap_no of the record
3050 						which gets the locks; there
3051 						must be no lock requests
3052 						on it! */
3053 	ulint			donator_heap_no)/*!< in: heap_no of the record
3054 						which gives the locks */
3055 {
3056 	lock_t*	lock;
3057 
3058 	ut_ad(lock_mutex_own());
3059 
3060 	/* If the lock is predicate lock, it resides on INFIMUM record */
3061 	ut_ad(lock_rec_get_first(
3062 		lock_hash, receiver, receiver_heap_no) == NULL
3063 	      || lock_hash == lock_sys->prdt_hash
3064 	      || lock_hash == lock_sys->prdt_page_hash);
3065 
3066 	for (lock = lock_rec_get_first(lock_hash,
3067 				       donator, donator_heap_no);
3068 	     lock != NULL;
3069 	     lock = lock_rec_get_next(donator_heap_no, lock)) {
3070 
3071 		const ulint	type_mode = lock->type_mode;
3072 
3073 		lock_rec_reset_nth_bit(lock, donator_heap_no);
3074 
3075 		if (type_mode & LOCK_WAIT) {
3076 			lock_reset_lock_and_trx_wait(lock);
3077 		}
3078 
3079 		/* Note that we FIRST reset the bit, and then set the lock:
3080 		the function works also if donator == receiver */
3081 
3082 		lock_rec_add_to_queue(
3083 			type_mode, receiver, receiver_heap_no,
3084 			lock->index, lock->trx, FALSE);
3085 	}
3086 
3087 	ut_ad(lock_rec_get_first(lock_sys->rec_hash,
3088 				 donator, donator_heap_no) == NULL);
3089 }
3090 
3091 /** Move all the granted locks to the front of the given lock list.
3092 All the waiting locks will be at the end of the list.
3093 @param[in,out]	lock_list	the given lock list.  */
3094 static
3095 void
3096 lock_move_granted_locks_to_front(
3097 	UT_LIST_BASE_NODE_T(lock_t)&	lock_list)
3098 {
3099 	lock_t*	lock;
3100 
3101 	bool seen_waiting_lock = false;
3102 
3103 	for (lock = UT_LIST_GET_FIRST(lock_list); lock;
3104 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
3105 
3106 		if (!seen_waiting_lock) {
3107 			if (lock->is_waiting()) {
3108 				seen_waiting_lock = true;
3109 			}
3110 			continue;
3111 		}
3112 
3113 		ut_ad(seen_waiting_lock);
3114 
3115 		if (!lock->is_waiting()) {
3116 			lock_t* prev = UT_LIST_GET_PREV(trx_locks, lock);
3117 			ut_a(prev);
3118 			UT_LIST_MOVE_TO_FRONT(lock_list, lock);
3119 			lock = prev;
3120 		}
3121 	}
3122 }
3123 
3124 /*************************************************************//**
3125 Updates the lock table when we have reorganized a page. NOTE: we copy
3126 also the locks set on the infimum of the page; the infimum may carry
3127 locks if an update of a record is occurring on the page, and its locks
3128 were temporarily stored on the infimum. */
3129 void
3130 lock_move_reorganize_page(
3131 /*======================*/
3132 	const buf_block_t*	block,	/*!< in: old index page, now
3133 					reorganized */
3134 	const buf_block_t*	oblock)	/*!< in: copy of the old, not
3135 					reorganized page */
3136 {
3137 	lock_t*		lock;
3138 	UT_LIST_BASE_NODE_T(lock_t)	old_locks;
3139 	mem_heap_t*	heap		= NULL;
3140 	ulint		comp;
3141 
3142 	lock_mutex_enter();
3143 
3144 	/* FIXME: This needs to deal with predicate lock too */
3145 	lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
3146 
3147 	if (lock == NULL) {
3148 		lock_mutex_exit();
3149 
3150 		return;
3151 	}
3152 
3153 	heap = mem_heap_create(256);
3154 
3155 	/* Copy first all the locks on the page to heap and reset the
3156 	bitmaps in the original locks; chain the copies of the locks
3157 	using the trx_locks field in them. */
3158 
3159 	UT_LIST_INIT(old_locks, &lock_t::trx_locks);
3160 
3161 	do {
3162 		/* Make a copy of the lock */
3163 		lock_t*	old_lock = lock_rec_copy(lock, heap);
3164 
3165 		UT_LIST_ADD_LAST(old_locks, old_lock);
3166 
3167 		/* Reset bitmap of lock */
3168 		lock_rec_bitmap_reset(lock);
3169 
3170 		if (lock_get_wait(lock)) {
3171 
3172 			lock_reset_lock_and_trx_wait(lock);
3173 		}
3174 
3175 		lock = lock_rec_get_next_on_page(lock);
3176 	} while (lock != NULL);
3177 
3178 	comp = page_is_comp(block->frame);
3179 	ut_ad(comp == page_is_comp(oblock->frame));
3180 
3181 	lock_move_granted_locks_to_front(old_locks);
3182 
3183 	DBUG_EXECUTE_IF("do_lock_reverse_page_reorganize",
3184 			UT_LIST_REVERSE(old_locks););
3185 
3186 	for (lock = UT_LIST_GET_FIRST(old_locks); lock;
3187 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
3188 
3189 		/* NOTE: we copy also the locks set on the infimum and
3190 		supremum of the page; the infimum may carry locks if an
3191 		update of a record is occurring on the page, and its locks
3192 		were temporarily stored on the infimum */
3193 		const rec_t*	rec1 = page_get_infimum_rec(
3194 			buf_block_get_frame(block));
3195 		const rec_t*	rec2 = page_get_infimum_rec(
3196 			buf_block_get_frame(oblock));
3197 
3198 		/* Set locks according to old locks */
3199 		for (;;) {
3200 			ulint	old_heap_no;
3201 			ulint	new_heap_no;
3202 
3203 			if (comp) {
3204 				old_heap_no = rec_get_heap_no_new(rec2);
3205 				new_heap_no = rec_get_heap_no_new(rec1);
3206 
3207 				rec1 = page_rec_get_next_low(rec1, TRUE);
3208 				rec2 = page_rec_get_next_low(rec2, TRUE);
3209 			} else {
3210 				old_heap_no = rec_get_heap_no_old(rec2);
3211 				new_heap_no = rec_get_heap_no_old(rec1);
3212 				ut_ad(!memcmp(rec1, rec2,
3213 					      rec_get_data_size_old(rec2)));
3214 
3215 				rec1 = page_rec_get_next_low(rec1, FALSE);
3216 				rec2 = page_rec_get_next_low(rec2, FALSE);
3217 			}
3218 
3219 			/* Clear the bit in old_lock. */
3220 			if (old_heap_no < lock->un_member.rec_lock.n_bits
3221 			    && lock_rec_reset_nth_bit(lock, old_heap_no)) {
3222 				/* NOTE that the old lock bitmap could be too
3223 				small for the new heap number! */
3224 
3225 				lock_rec_add_to_queue(
3226 					lock->type_mode, block, new_heap_no,
3227 					lock->index, lock->trx, FALSE);
3228 			}
3229 
3230 			if (new_heap_no == PAGE_HEAP_NO_SUPREMUM) {
3231 				ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
3232 				break;
3233 			}
3234 		}
3235 
3236 #ifdef UNIV_DEBUG
3237 		{
3238 			ulint	i = lock_rec_find_set_bit(lock);
3239 
3240 			/* Check that all locks were moved. */
3241 			if (i != ULINT_UNDEFINED) {
3242 				ib::fatal() << "lock_move_reorganize_page(): "
3243 					<< i << " not moved in "
3244 					<< (void*) lock;
3245 			}
3246 		}
3247 #endif /* UNIV_DEBUG */
3248 	}
3249 
3250 	lock_mutex_exit();
3251 
3252 	mem_heap_free(heap);
3253 
3254 #ifdef UNIV_DEBUG_LOCK_VALIDATE
3255 	ut_ad(lock_rec_validate_page(block));
3256 #endif
3257 }
3258 
3259 /*************************************************************//**
3260 Moves the explicit locks on user records to another page if a record
3261 list end is moved to another page. */
3262 void
3263 lock_move_rec_list_end(
3264 /*===================*/
3265 	const buf_block_t*	new_block,	/*!< in: index page to move to */
3266 	const buf_block_t*	block,		/*!< in: index page */
3267 	const rec_t*		rec)		/*!< in: record on page: this
3268 						is the first record moved */
3269 {
3270 	lock_t*		lock;
3271 	const ulint	comp	= page_rec_is_comp(rec);
3272 
3273 	ut_ad(buf_block_get_frame(block) == page_align(rec));
3274 	ut_ad(comp == page_is_comp(buf_block_get_frame(new_block)));
3275 
3276 	lock_mutex_enter();
3277 
3278 	/* Note: when we move locks from record to record, waiting locks
3279 	and possible granted gap type locks behind them are enqueued in
3280 	the original order, because new elements are inserted to a hash
3281 	table to the end of the hash chain, and lock_rec_add_to_queue
3282 	does not reuse locks if there are waiters in the queue. */
3283 
3284 	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
3285 	     lock = lock_rec_get_next_on_page(lock)) {
3286 		const rec_t*	rec1	= rec;
3287 		const rec_t*	rec2;
3288 		const ulint	type_mode = lock->type_mode;
3289 
3290 		if (comp) {
3291 			if (page_offset(rec1) == PAGE_NEW_INFIMUM) {
3292 				rec1 = page_rec_get_next_low(rec1, TRUE);
3293 			}
3294 
3295 			rec2 = page_rec_get_next_low(
3296 				buf_block_get_frame(new_block)
3297 				+ PAGE_NEW_INFIMUM, TRUE);
3298 		} else {
3299 			if (page_offset(rec1) == PAGE_OLD_INFIMUM) {
3300 				rec1 = page_rec_get_next_low(rec1, FALSE);
3301 			}
3302 
3303 			rec2 = page_rec_get_next_low(
3304 				buf_block_get_frame(new_block)
3305 				+ PAGE_OLD_INFIMUM, FALSE);
3306 		}
3307 
3308 		/* Copy lock requests on user records to new page and
3309 		reset the lock bits on the old */
3310 
3311 		for (;;) {
3312 			ulint	rec1_heap_no;
3313 			ulint	rec2_heap_no;
3314 
3315 			if (comp) {
3316 				rec1_heap_no = rec_get_heap_no_new(rec1);
3317 
3318 				if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
3319 					break;
3320 				}
3321 
3322 				rec2_heap_no = rec_get_heap_no_new(rec2);
3323 				rec1 = page_rec_get_next_low(rec1, TRUE);
3324 				rec2 = page_rec_get_next_low(rec2, TRUE);
3325 			} else {
3326 				rec1_heap_no = rec_get_heap_no_old(rec1);
3327 
3328 				if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
3329 					break;
3330 				}
3331 
3332 				rec2_heap_no = rec_get_heap_no_old(rec2);
3333 
3334 				ut_ad(!memcmp(rec1, rec2,
3335 					      rec_get_data_size_old(rec2)));
3336 
3337 				rec1 = page_rec_get_next_low(rec1, FALSE);
3338 				rec2 = page_rec_get_next_low(rec2, FALSE);
3339 			}
3340 
3341 			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
3342 			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
3343 				if (type_mode & LOCK_WAIT) {
3344 					lock_reset_lock_and_trx_wait(lock);
3345 				}
3346 
3347 				lock_rec_add_to_queue(
3348 					type_mode, new_block, rec2_heap_no,
3349 					lock->index, lock->trx, FALSE);
3350 			}
3351 		}
3352 	}
3353 
3354 	lock_mutex_exit();
3355 
3356 #ifdef UNIV_DEBUG_LOCK_VALIDATE
3357 	ut_ad(lock_rec_validate_page(block));
3358 	ut_ad(lock_rec_validate_page(new_block));
3359 #endif
3360 }
3361 
3362 /*************************************************************//**
3363 Moves the explicit locks on user records to another page if a record
3364 list start is moved to another page. */
3365 void
3366 lock_move_rec_list_start(
3367 /*=====================*/
3368 	const buf_block_t*	new_block,	/*!< in: index page to
3369 						move to */
3370 	const buf_block_t*	block,		/*!< in: index page */
3371 	const rec_t*		rec,		/*!< in: record on page:
3372 						this is the first
3373 						record NOT copied */
3374 	const rec_t*		old_end)	/*!< in: old
3375 						previous-to-last
3376 						record on new_page
3377 						before the records
3378 						were copied */
3379 {
3380 	lock_t*		lock;
3381 	const ulint	comp	= page_rec_is_comp(rec);
3382 
3383 	ut_ad(block->frame == page_align(rec));
3384 	ut_ad(new_block->frame == page_align(old_end));
3385 	ut_ad(comp == page_rec_is_comp(old_end));
3386 
3387 	lock_mutex_enter();
3388 
3389 	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
3390 	     lock = lock_rec_get_next_on_page(lock)) {
3391 		const rec_t*	rec1;
3392 		const rec_t*	rec2;
3393 		const ulint	type_mode = lock->type_mode;
3394 
3395 		if (comp) {
3396 			rec1 = page_rec_get_next_low(
3397 				buf_block_get_frame(block)
3398 				+ PAGE_NEW_INFIMUM, TRUE);
3399 			rec2 = page_rec_get_next_low(old_end, TRUE);
3400 		} else {
3401 			rec1 = page_rec_get_next_low(
3402 				buf_block_get_frame(block)
3403 				+ PAGE_OLD_INFIMUM, FALSE);
3404 			rec2 = page_rec_get_next_low(old_end, FALSE);
3405 		}
3406 
3407 		/* Copy lock requests on user records to new page and
3408 		reset the lock bits on the old */
3409 
3410 		while (rec1 != rec) {
3411 			ulint	rec1_heap_no;
3412 			ulint	rec2_heap_no;
3413 
3414 			if (comp) {
3415 				rec1_heap_no = rec_get_heap_no_new(rec1);
3416 				rec2_heap_no = rec_get_heap_no_new(rec2);
3417 
3418 				rec1 = page_rec_get_next_low(rec1, TRUE);
3419 				rec2 = page_rec_get_next_low(rec2, TRUE);
3420 			} else {
3421 				rec1_heap_no = rec_get_heap_no_old(rec1);
3422 				rec2_heap_no = rec_get_heap_no_old(rec2);
3423 
3424 				ut_ad(!memcmp(rec1, rec2,
3425 					      rec_get_data_size_old(rec2)));
3426 
3427 				rec1 = page_rec_get_next_low(rec1, FALSE);
3428 				rec2 = page_rec_get_next_low(rec2, FALSE);
3429 			}
3430 
3431 			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
3432 			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
3433 				if (type_mode & LOCK_WAIT) {
3434 					lock_reset_lock_and_trx_wait(lock);
3435 				}
3436 
3437 				lock_rec_add_to_queue(
3438 					type_mode, new_block, rec2_heap_no,
3439 					lock->index, lock->trx, FALSE);
3440 			}
3441 		}
3442 
3443 #ifdef UNIV_DEBUG
3444 		if (page_rec_is_supremum(rec)) {
3445 			ulint	i;
3446 
3447 			for (i = PAGE_HEAP_NO_USER_LOW;
3448 			     i < lock_rec_get_n_bits(lock); i++) {
3449 				if (lock_rec_get_nth_bit(lock, i)) {
3450 					ib::fatal()
3451 						<< "lock_move_rec_list_start():"
3452 						<< i << " not moved in "
3453 						<<  (void*) lock;
3454 				}
3455 			}
3456 		}
3457 #endif /* UNIV_DEBUG */
3458 	}
3459 
3460 	lock_mutex_exit();
3461 
3462 #ifdef UNIV_DEBUG_LOCK_VALIDATE
3463 	ut_ad(lock_rec_validate_page(block));
3464 #endif
3465 }
3466 
3467 /*************************************************************//**
3468 Moves the explicit locks on user records to another page if a record
3469 list start is moved to another page. */
3470 void
3471 lock_rtr_move_rec_list(
3472 /*===================*/
3473 	const buf_block_t*	new_block,	/*!< in: index page to
3474 						move to */
3475 	const buf_block_t*	block,		/*!< in: index page */
3476 	rtr_rec_move_t*		rec_move,       /*!< in: recording records
3477 						moved */
3478 	ulint			num_move)       /*!< in: num of rec to move */
3479 {
3480 	lock_t*		lock;
3481 	ulint		comp;
3482 
3483 	if (!num_move) {
3484 		return;
3485 	}
3486 
3487 	comp = page_rec_is_comp(rec_move[0].old_rec);
3488 
3489 	ut_ad(block->frame == page_align(rec_move[0].old_rec));
3490 	ut_ad(new_block->frame == page_align(rec_move[0].new_rec));
3491 	ut_ad(comp == page_rec_is_comp(rec_move[0].new_rec));
3492 
3493 	lock_mutex_enter();
3494 
3495 	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
3496 	     lock = lock_rec_get_next_on_page(lock)) {
3497 		ulint		moved = 0;
3498 		const rec_t*	rec1;
3499 		const rec_t*	rec2;
3500 		const ulint	type_mode = lock->type_mode;
3501 
3502 		/* Copy lock requests on user records to new page and
3503 		reset the lock bits on the old */
3504 
3505 		while (moved < num_move) {
3506 			ulint	rec1_heap_no;
3507 			ulint	rec2_heap_no;
3508 
3509 			rec1 = rec_move[moved].old_rec;
3510 			rec2 = rec_move[moved].new_rec;
3511 
3512 			if (comp) {
3513 				rec1_heap_no = rec_get_heap_no_new(rec1);
3514 				rec2_heap_no = rec_get_heap_no_new(rec2);
3515 
3516 			} else {
3517 				rec1_heap_no = rec_get_heap_no_old(rec1);
3518 				rec2_heap_no = rec_get_heap_no_old(rec2);
3519 
3520 				ut_ad(!memcmp(rec1, rec2,
3521 					      rec_get_data_size_old(rec2)));
3522 			}
3523 
3524 			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
3525 			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
3526 				if (type_mode & LOCK_WAIT) {
3527 					lock_reset_lock_and_trx_wait(lock);
3528 				}
3529 
3530 				lock_rec_add_to_queue(
3531 					type_mode, new_block, rec2_heap_no,
3532 					lock->index, lock->trx, FALSE);
3533 
3534 				rec_move[moved].moved = true;
3535 			}
3536 
3537 			moved++;
3538 		}
3539 	}
3540 
3541 	lock_mutex_exit();
3542 
3543 #ifdef UNIV_DEBUG_LOCK_VALIDATE
3544 	ut_ad(lock_rec_validate_page(block));
3545 #endif
3546 }
3547 /*************************************************************//**
3548 Updates the lock table when a page is split to the right. */
3549 void
3550 lock_update_split_right(
3551 /*====================*/
3552 	const buf_block_t*	right_block,	/*!< in: right page */
3553 	const buf_block_t*	left_block)	/*!< in: left page */
3554 {
3555 	ulint	heap_no = lock_get_min_heap_no(right_block);
3556 
3557 	lock_mutex_enter();
3558 
3559 	/* Move the locks on the supremum of the left page to the supremum
3560 	of the right page */
3561 
3562 	lock_rec_move(right_block, left_block,
3563 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3564 
3565 	/* Inherit the locks to the supremum of left page from the successor
3566 	of the infimum on right page */
3567 
3568 	lock_rec_inherit_to_gap(left_block, right_block,
3569 				PAGE_HEAP_NO_SUPREMUM, heap_no);
3570 
3571 	lock_mutex_exit();
3572 }
3573 
3574 /*************************************************************//**
3575 Updates the lock table when a page is merged to the right. */
3576 void
3577 lock_update_merge_right(
3578 /*====================*/
3579 	const buf_block_t*	right_block,	/*!< in: right page to
3580 						which merged */
3581 	const rec_t*		orig_succ,	/*!< in: original
3582 						successor of infimum
3583 						on the right page
3584 						before merge */
3585 	const buf_block_t*	left_block)	/*!< in: merged index
3586 						page which will be
3587 						discarded */
3588 {
3589 	lock_mutex_enter();
3590 
3591 	/* Inherit the locks from the supremum of the left page to the
3592 	original successor of infimum on the right page, to which the left
3593 	page was merged */
3594 
3595 	lock_rec_inherit_to_gap(right_block, left_block,
3596 				page_rec_get_heap_no(orig_succ),
3597 				PAGE_HEAP_NO_SUPREMUM);
3598 
3599 	/* Reset the locks on the supremum of the left page, releasing
3600 	waiting transactions */
3601 
3602 	lock_rec_reset_and_release_wait_low(
3603 		lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
3604 
3605 #ifdef UNIV_DEBUG
3606 	/* there should exist no page lock on the left page,
3607 	otherwise, it will be blocked from merge */
3608 	ulint	space = left_block->page.id.space();
3609 	ulint	page_no = left_block->page.id.page_no();
3610 	ut_ad(lock_rec_get_first_on_page_addr(
3611 			lock_sys->prdt_page_hash, space, page_no) == NULL);
3612 #endif /* UNIV_DEBUG */
3613 
3614 	lock_rec_free_all_from_discard_page(left_block);
3615 
3616 	lock_mutex_exit();
3617 
3618 }
3619 
3620 /*************************************************************//**
3621 Updates the lock table when the root page is copied to another in
3622 btr_root_raise_and_insert. Note that we leave lock structs on the
3623 root page, even though they do not make sense on other than leaf
3624 pages: the reason is that in a pessimistic update the infimum record
3625 of the root page will act as a dummy carrier of the locks of the record
3626 to be updated. */
3627 void
3628 lock_update_root_raise(
3629 /*===================*/
3630 	const buf_block_t*	block,	/*!< in: index page to which copied */
3631 	const buf_block_t*	root)	/*!< in: root page */
3632 {
3633 	lock_mutex_enter();
3634 
3635 	/* Move the locks on the supremum of the root to the supremum
3636 	of block */
3637 
3638 	lock_rec_move(block, root,
3639 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3640 	lock_mutex_exit();
3641 }
3642 
3643 /*************************************************************//**
3644 Updates the lock table when a page is copied to another and the original page
3645 is removed from the chain of leaf pages, except if page is the root! */
3646 void
3647 lock_update_copy_and_discard(
3648 /*=========================*/
3649 	const buf_block_t*	new_block,	/*!< in: index page to
3650 						which copied */
3651 	const buf_block_t*	block)		/*!< in: index page;
3652 						NOT the root! */
3653 {
3654 	lock_mutex_enter();
3655 
3656 	/* Move the locks on the supremum of the old page to the supremum
3657 	of new_page */
3658 
3659 	lock_rec_move(new_block, block,
3660 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3661 	lock_rec_free_all_from_discard_page(block);
3662 
3663 	lock_mutex_exit();
3664 }
3665 
3666 /*************************************************************//**
3667 Updates the lock table when a page is split to the left. */
3668 void
3669 lock_update_split_left(
3670 /*===================*/
3671 	const buf_block_t*	right_block,	/*!< in: right page */
3672 	const buf_block_t*	left_block)	/*!< in: left page */
3673 {
3674 	ulint	heap_no = lock_get_min_heap_no(right_block);
3675 
3676 	lock_mutex_enter();
3677 
3678 	/* Inherit the locks to the supremum of the left page from the
3679 	successor of the infimum on the right page */
3680 
3681 	lock_rec_inherit_to_gap(left_block, right_block,
3682 				PAGE_HEAP_NO_SUPREMUM, heap_no);
3683 
3684 	lock_mutex_exit();
3685 }
3686 
3687 /*************************************************************//**
3688 Updates the lock table when a page is merged to the left. */
3689 void
3690 lock_update_merge_left(
3691 /*===================*/
3692 	const buf_block_t*	left_block,	/*!< in: left page to
3693 						which merged */
3694 	const rec_t*		orig_pred,	/*!< in: original predecessor
3695 						of supremum on the left page
3696 						before merge */
3697 	const buf_block_t*	right_block)	/*!< in: merged index page
3698 						which will be discarded */
3699 {
3700 	const rec_t*	left_next_rec;
3701 
3702 	ut_ad(left_block->frame == page_align(orig_pred));
3703 
3704 	lock_mutex_enter();
3705 
3706 	left_next_rec = page_rec_get_next_const(orig_pred);
3707 
3708 	if (!page_rec_is_supremum(left_next_rec)) {
3709 
3710 		/* Inherit the locks on the supremum of the left page to the
3711 		first record which was moved from the right page */
3712 
3713 		lock_rec_inherit_to_gap(left_block, left_block,
3714 					page_rec_get_heap_no(left_next_rec),
3715 					PAGE_HEAP_NO_SUPREMUM);
3716 
3717 		/* Reset the locks on the supremum of the left page,
3718 		releasing waiting transactions */
3719 
3720 		lock_rec_reset_and_release_wait_low(
3721 			lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
3722 	}
3723 
3724 	/* Move the locks from the supremum of right page to the supremum
3725 	of the left page */
3726 
3727 	lock_rec_move(left_block, right_block,
3728 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3729 
3730 #ifdef UNIV_DEBUG
3731 	/* there should exist no page lock on the right page,
3732 	otherwise, it will be blocked from merge */
3733 	ulint	space = right_block->page.id.space();
3734 	ulint	page_no = right_block->page.id.page_no();
3735 	lock_t*	lock_test = lock_rec_get_first_on_page_addr(
3736 		lock_sys->prdt_page_hash, space, page_no);
3737 	ut_ad(!lock_test);
3738 #endif /* UNIV_DEBUG */
3739 
3740 	lock_rec_free_all_from_discard_page(right_block);
3741 
3742 	lock_mutex_exit();
3743 }
3744 
3745 /*************************************************************//**
3746 Resets the original locks on heir and replaces them with gap type locks
3747 inherited from rec. */
3748 void
3749 lock_rec_reset_and_inherit_gap_locks(
3750 /*=================================*/
3751 	const buf_block_t*	heir_block,	/*!< in: block containing the
3752 						record which inherits */
3753 	const buf_block_t*	block,		/*!< in: block containing the
3754 						record from which inherited;
3755 						does NOT reset the locks on
3756 						this record */
3757 	ulint			heir_heap_no,	/*!< in: heap_no of the
3758 						inheriting record */
3759 	ulint			heap_no)	/*!< in: heap_no of the
3760 						donating record */
3761 {
3762 	lock_mutex_enter();
3763 
3764 	lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
3765 
3766 	lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
3767 
3768 	lock_mutex_exit();
3769 }
3770 
3771 /*************************************************************//**
3772 Updates the lock table when a page is discarded. */
3773 void
3774 lock_update_discard(
3775 /*================*/
3776 	const buf_block_t*	heir_block,	/*!< in: index page
3777 						which will inherit the locks */
3778 	ulint			heir_heap_no,	/*!< in: heap_no of the record
3779 						which will inherit the locks */
3780 	const buf_block_t*	block)		/*!< in: index page
3781 						which will be discarded */
3782 {
3783 	const rec_t*	rec;
3784 	ulint		heap_no;
3785 	const page_t*	page = block->frame;
3786 
3787 	lock_mutex_enter();
3788 
3789 	if (!lock_rec_get_first_on_page(lock_sys->rec_hash, block)
3790 	    && (!lock_rec_get_first_on_page(lock_sys->prdt_hash, block))) {
3791 		/* No locks exist on page, nothing to do */
3792 
3793 		lock_mutex_exit();
3794 
3795 		return;
3796 	}
3797 
3798 	/* Inherit all the locks on the page to the record and reset all
3799 	the locks on the page */
3800 
3801 	if (page_is_comp(page)) {
3802 		rec = page + PAGE_NEW_INFIMUM;
3803 
3804 		do {
3805 			heap_no = rec_get_heap_no_new(rec);
3806 
3807 			lock_rec_inherit_to_gap(heir_block, block,
3808 						heir_heap_no, heap_no);
3809 
3810 			lock_rec_reset_and_release_wait(block, heap_no);
3811 
3812 			rec = page + rec_get_next_offs(rec, TRUE);
3813 		} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3814 	} else {
3815 		rec = page + PAGE_OLD_INFIMUM;
3816 
3817 		do {
3818 			heap_no = rec_get_heap_no_old(rec);
3819 
3820 			lock_rec_inherit_to_gap(heir_block, block,
3821 						heir_heap_no, heap_no);
3822 
3823 			lock_rec_reset_and_release_wait(block, heap_no);
3824 
3825 			rec = page + rec_get_next_offs(rec, FALSE);
3826 		} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3827 	}
3828 
3829 	lock_rec_free_all_from_discard_page(block);
3830 
3831 	lock_mutex_exit();
3832 }
3833 
3834 /*************************************************************//**
3835 Updates the lock table when a new user record is inserted. */
3836 void
3837 lock_update_insert(
3838 /*===============*/
3839 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
3840 	const rec_t*		rec)	/*!< in: the inserted record */
3841 {
3842 	ulint	receiver_heap_no;
3843 	ulint	donator_heap_no;
3844 
3845 	ut_ad(block->frame == page_align(rec));
3846 
3847 	/* Inherit the gap-locking locks for rec, in gap mode, from the next
3848 	record */
3849 
3850 	if (page_rec_is_comp(rec)) {
3851 		receiver_heap_no = rec_get_heap_no_new(rec);
3852 		donator_heap_no = rec_get_heap_no_new(
3853 			page_rec_get_next_low(rec, TRUE));
3854 	} else {
3855 		receiver_heap_no = rec_get_heap_no_old(rec);
3856 		donator_heap_no = rec_get_heap_no_old(
3857 			page_rec_get_next_low(rec, FALSE));
3858 	}
3859 
3860 	lock_rec_inherit_to_gap_if_gap_lock(
3861 		block, receiver_heap_no, donator_heap_no);
3862 }
3863 
3864 /*************************************************************//**
3865 Updates the lock table when a record is removed. */
3866 void
3867 lock_update_delete(
3868 /*===============*/
3869 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
3870 	const rec_t*		rec)	/*!< in: the record to be removed */
3871 {
3872 	const page_t*	page = block->frame;
3873 	ulint		heap_no;
3874 	ulint		next_heap_no;
3875 
3876 	ut_ad(page == page_align(rec));
3877 
3878 	if (page_is_comp(page)) {
3879 		heap_no = rec_get_heap_no_new(rec);
3880 		next_heap_no = rec_get_heap_no_new(page
3881 						   + rec_get_next_offs(rec,
3882 								       TRUE));
3883 	} else {
3884 		heap_no = rec_get_heap_no_old(rec);
3885 		next_heap_no = rec_get_heap_no_old(page
3886 						   + rec_get_next_offs(rec,
3887 								       FALSE));
3888 	}
3889 
3890 	lock_mutex_enter();
3891 
3892 	/* Let the next record inherit the locks from rec, in gap mode */
3893 
3894 	lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
3895 
3896 	/* Reset the lock bits on rec and release waiting transactions */
3897 
3898 	lock_rec_reset_and_release_wait(block, heap_no);
3899 
3900 	lock_mutex_exit();
3901 }
3902 
3903 /*********************************************************************//**
3904 Stores on the page infimum record the explicit locks of another record.
3905 This function is used to store the lock state of a record when it is
3906 updated and the size of the record changes in the update. The record
3907 is moved in such an update, perhaps to another page. The infimum record
3908 acts as a dummy carrier record, taking care of lock releases while the
3909 actual record is being moved. */
3910 void
3911 lock_rec_store_on_page_infimum(
3912 /*===========================*/
3913 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
3914 	const rec_t*		rec)	/*!< in: record whose lock state
3915 					is stored on the infimum
3916 					record of the same page; lock
3917 					bits are reset on the
3918 					record */
3919 {
3920 	ulint	heap_no = page_rec_get_heap_no(rec);
3921 
3922 	ut_ad(block->frame == page_align(rec));
3923 
3924 	lock_mutex_enter();
3925 
3926 	lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
3927 
3928 	lock_mutex_exit();
3929 }
3930 
3931 /*********************************************************************//**
3932 Restores the state of explicit lock requests on a single record, where the
3933 state was stored on the infimum of the page. */
3934 void
3935 lock_rec_restore_from_page_infimum(
3936 /*===============================*/
3937 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
3938 	const rec_t*		rec,	/*!< in: record whose lock state
3939 					is restored */
3940 	const buf_block_t*	donator)/*!< in: page (rec is not
3941 					necessarily on this page)
3942 					whose infimum stored the lock
3943 					state; lock bits are reset on
3944 					the infimum */
3945 {
3946 	ulint	heap_no = page_rec_get_heap_no(rec);
3947 
3948 	lock_mutex_enter();
3949 
3950 	lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
3951 
3952 	lock_mutex_exit();
3953 }
3954 
3955 /*========================= TABLE LOCKS ==============================*/
3956 
3957 /** Functor for accessing the embedded node within a table lock. */
3958 struct TableLockGetNode {
3959 	ut_list_node<lock_t>& operator() (lock_t& elem)
3960 	{
3961 		return(elem.un_member.tab_lock.locks);
3962 	}
3963 };
3964 
3965 /*********************************************************************//**
3966 Creates a table lock object and adds it as the last in the lock queue
3967 of the table. Does NOT check for deadlocks or lock compatibility.
3968 @return own: new lock object */
3969 UNIV_INLINE
3970 lock_t*
3971 lock_table_create(
3972 /*==============*/
3973 #ifdef WITH_WSREP
3974 	lock_t*		c_lock, /* conflicting lock */
3975 #endif
3976 	dict_table_t*	table,	/*!< in/out: database table
3977 				in dictionary cache */
3978 	ulint		type_mode,/*!< in: lock mode possibly ORed with
3979 				LOCK_WAIT */
3980 	trx_t*		trx)	/*!< in: trx */
3981 {
3982 	lock_t*		lock;
3983 
3984 	ut_ad(table && trx);
3985 	ut_ad(lock_mutex_own());
3986 	ut_ad(trx_mutex_own(trx));
3987 
3988 	check_trx_state(trx);
3989 
3990 	if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
3991 		++table->n_waiting_or_granted_auto_inc_locks;
3992 	}
3993 
3994 	/* For AUTOINC locking we reuse the lock instance only if
3995 	there is no wait involved else we allocate the waiting lock
3996 	from the transaction lock heap. */
3997 	if (type_mode == LOCK_AUTO_INC) {
3998 
3999 		lock = table->autoinc_lock;
4000 
4001 		table->autoinc_trx = trx;
4002 
4003 		ib_vector_push(trx->autoinc_locks, &lock);
4004 
4005 	} else if (trx->lock.table_cached < trx->lock.table_pool.size()) {
4006 		lock = trx->lock.table_pool[trx->lock.table_cached++];
4007 	} else {
4008 
4009 		lock = static_cast<lock_t*>(
4010 			mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock)));
4011 
4012 	}
4013 
4014 	lock->type_mode = ib_uint32_t(type_mode | LOCK_TABLE);
4015 	lock->trx = trx;
4016 
4017 	lock->un_member.tab_lock.table = table;
4018 
4019 	ut_ad(table->n_ref_count > 0 || !table->can_be_evicted);
4020 
4021 	UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
4022 #ifdef WITH_WSREP
4023 	if(wsrep_on(trx->mysql_thd)) {
4024 		if (c_lock && wsrep_thd_is_BF(trx->mysql_thd, FALSE)) {
4025         		ut_list_insert(table->locks, c_lock, lock, TableLockGetNode());
4026                 	if (wsrep_debug)
4027 				ib::info() << "table lock BF conflict for " <<
4028 				c_lock->trx->id;
4029         	} else {
4030                 	ut_list_append(table->locks, lock, TableLockGetNode());
4031         	}
4032 
4033 		if (c_lock) trx_mutex_enter(c_lock->trx);
4034 		if (c_lock && c_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
4035 
4036  			c_lock->trx->lock.was_chosen_as_deadlock_victim = TRUE;
4037 
4038 			if (wsrep_debug) wsrep_print_wait_locks(c_lock);
4039 
4040 			/* have to release trx mutex for the duration of
4041 			   victim lock release. This will eventually call
4042 			   lock_grant, which wants to grant trx mutex again
4043 			*/
4044 			/* caller has trx_mutex, have to release for lock cancel */
4045 			trx_mutex_exit(trx);
4046 			lock_cancel_waiting_and_release(c_lock->trx->lock.wait_lock);
4047 			trx_mutex_enter(trx);
4048 
4049 			/* trx might not wait for c_lock, but some other lock
4050 			does not matter if wait_lock was released above
4051 			*/
4052 			if (c_lock->trx->lock.wait_lock == c_lock) {
4053 				lock_reset_lock_and_trx_wait(lock);
4054 			}
4055 
4056 			if (wsrep_debug) {
4057 				fprintf(stderr, "WSREP: c_lock canceled %llu\n",
4058 					(ulonglong) c_lock->trx->id);
4059 			}
4060 		}
4061 		if (c_lock) trx_mutex_exit(c_lock->trx);
4062 	} else {
4063 #endif /* WITH_WSREP */
4064 
4065 	ut_list_append(table->locks, lock, TableLockGetNode());
4066 #ifdef WITH_WSREP
4067 	}
4068 #endif /* WITH_WSREP */
4069 
4070 	if (type_mode & LOCK_WAIT) {
4071 
4072 		lock_set_lock_and_trx_wait(lock, trx);
4073 	}
4074 
4075 	lock->trx->lock.table_locks.push_back(lock);
4076 
4077 	MONITOR_INC(MONITOR_TABLELOCK_CREATED);
4078 	MONITOR_INC(MONITOR_NUM_TABLELOCK);
4079 
4080 	return(lock);
4081 }
4082 
4083 /*************************************************************//**
4084 Pops autoinc lock requests from the transaction's autoinc_locks. We
4085 handle the case where there are gaps in the array and they need to
4086 be popped off the stack. */
4087 UNIV_INLINE
4088 void
4089 lock_table_pop_autoinc_locks(
4090 /*=========================*/
4091 	trx_t*	trx)	/*!< in/out: transaction that owns the AUTOINC locks */
4092 {
4093 	ut_ad(lock_mutex_own());
4094 	ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
4095 
4096 	/* Skip any gaps, gaps are NULL lock entries in the
4097 	trx->autoinc_locks vector. */
4098 
4099 	do {
4100 		ib_vector_pop(trx->autoinc_locks);
4101 
4102 		if (ib_vector_is_empty(trx->autoinc_locks)) {
4103 			return;
4104 		}
4105 
4106 	} while (*(lock_t**) ib_vector_get_last(trx->autoinc_locks) == NULL);
4107 }
4108 
4109 /*************************************************************//**
4110 Removes an autoinc lock request from the transaction's autoinc_locks. */
4111 UNIV_INLINE
4112 void
4113 lock_table_remove_autoinc_lock(
4114 /*===========================*/
4115 	lock_t*	lock,	/*!< in: table lock */
4116 	trx_t*	trx)	/*!< in/out: transaction that owns the lock */
4117 {
4118 	lock_t*	autoinc_lock;
4119 	lint	i = ib_vector_size(trx->autoinc_locks) - 1;
4120 
4121 	ut_ad(lock_mutex_own());
4122 	ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC);
4123 	ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
4124 	ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
4125 
4126 	/* With stored functions and procedures the user may drop
4127 	a table within the same "statement". This special case has
4128 	to be handled by deleting only those AUTOINC locks that were
4129 	held by the table being dropped. */
4130 
4131 	autoinc_lock = *static_cast<lock_t**>(
4132 		ib_vector_get(trx->autoinc_locks, i));
4133 
4134 	/* This is the default fast case. */
4135 
4136 	if (autoinc_lock == lock) {
4137 		lock_table_pop_autoinc_locks(trx);
4138 	} else {
4139 		/* The last element should never be NULL */
4140 		ut_a(autoinc_lock != NULL);
4141 
4142 		/* Handle freeing the locks from within the stack. */
4143 
4144 		while (--i >= 0) {
4145 			autoinc_lock = *static_cast<lock_t**>(
4146 				ib_vector_get(trx->autoinc_locks, i));
4147 
4148 			if (autoinc_lock == lock) {
4149 				void*	null_var = NULL;
4150 				ib_vector_set(trx->autoinc_locks, i, &null_var);
4151 				return;
4152 			}
4153 		}
4154 
4155 		/* Must find the autoinc lock. */
4156 		ut_error;
4157 	}
4158 }
4159 
4160 /*************************************************************//**
4161 Removes a table lock request from the queue and the trx list of locks;
4162 this is a low-level function which does NOT check if waiting requests
4163 can now be granted. */
4164 UNIV_INLINE
4165 void
4166 lock_table_remove_low(
4167 /*==================*/
4168 	lock_t*	lock)	/*!< in/out: table lock */
4169 {
4170 	trx_t*		trx;
4171 	dict_table_t*	table;
4172 
4173 	ut_ad(lock_mutex_own());
4174 
4175 	trx = lock->trx;
4176 	table = lock->un_member.tab_lock.table;
4177 
4178 	/* Remove the table from the transaction's AUTOINC vector, if
4179 	the lock that is being released is an AUTOINC lock. */
4180 	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
4181 
4182 		/* The table's AUTOINC lock can get transferred to
4183 		another transaction before we get here. */
4184 		if (table->autoinc_trx == trx) {
4185 			table->autoinc_trx = NULL;
4186 		}
4187 
4188 		/* The locks must be freed in the reverse order from
4189 		the one in which they were acquired. This is to avoid
4190 		traversing the AUTOINC lock vector unnecessarily.
4191 
4192 		We only store locks that were granted in the
4193 		trx->autoinc_locks vector (see lock_table_create()
4194 		and lock_grant()). Therefore it can be empty and we
4195 		need to check for that. */
4196 
4197 		if (!lock_get_wait(lock)
4198 		    && !ib_vector_is_empty(trx->autoinc_locks)) {
4199 
4200 			lock_table_remove_autoinc_lock(lock, trx);
4201 		}
4202 
4203 		ut_a(table->n_waiting_or_granted_auto_inc_locks > 0);
4204 		table->n_waiting_or_granted_auto_inc_locks--;
4205 	}
4206 
4207 	UT_LIST_REMOVE(trx->lock.trx_locks, lock);
4208 	ut_list_remove(table->locks, lock, TableLockGetNode());
4209 
4210 	MONITOR_INC(MONITOR_TABLELOCK_REMOVED);
4211 	MONITOR_DEC(MONITOR_NUM_TABLELOCK);
4212 }
4213 
4214 /*********************************************************************//**
4215 Enqueues a waiting request for a table lock which cannot be granted
4216 immediately. Checks for deadlocks.
4217 @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
4218 DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
4219 transaction was chosen as a victim, and we got the lock immediately:
4220 no need to wait then */
4221 static
4222 dberr_t
4223 lock_table_enqueue_waiting(
4224 /*=======================*/
4225 #ifdef WITH_WSREP
4226 	lock_t*		c_lock, /* conflicting lock */
4227 #endif
4228 	ulint		mode,	/*!< in: lock mode this transaction is
4229 				requesting */
4230 	dict_table_t*	table,	/*!< in/out: table */
4231 	que_thr_t*	thr)	/*!< in: query thread */
4232 {
4233 	trx_t*		trx;
4234 	lock_t*		lock;
4235 
4236 	ut_ad(lock_mutex_own());
4237 	ut_ad(!srv_read_only_mode);
4238 
4239 	trx = thr_get_trx(thr);
4240 	ut_ad(trx_mutex_own(trx));
4241 
4242 	/* Test if there already is some other reason to suspend thread:
4243 	we do not enqueue a lock request if the query thread should be
4244 	stopped anyway */
4245 
4246 	if (que_thr_stop(thr)) {
4247 		ut_error;
4248 
4249 		return(DB_QUE_THR_SUSPENDED);
4250 	}
4251 
4252 	switch (trx_get_dict_operation(trx)) {
4253 	case TRX_DICT_OP_NONE:
4254 		break;
4255 	case TRX_DICT_OP_TABLE:
4256 	case TRX_DICT_OP_INDEX:
4257 		ib::error() << "A table lock wait happens in a dictionary"
4258 			" operation. Table " << table->name
4259 			<< ". " << BUG_REPORT_MSG;
4260 		ut_ad(0);
4261 	}
4262 
4263 	/* Enqueue the lock request that will wait to be granted */
4264 #ifdef WITH_WSREP
4265 	if (trx->lock.was_chosen_as_deadlock_victim) {
4266 		return(DB_DEADLOCK);
4267 	}
4268 	lock = lock_table_create(c_lock, table, mode | LOCK_WAIT, trx);
4269 #else
4270 	lock = lock_table_create(table, mode | LOCK_WAIT, trx);
4271 #endif /* WITH_WSREP */
4272 
4273 	const trx_t*	victim_trx =
4274 			DeadlockChecker::check_and_resolve(lock, trx);
4275 
4276 	if (victim_trx != 0) {
4277 		ut_ad(victim_trx == trx);
4278 
4279 		/* The order here is important, we don't want to
4280 		lose the state of the lock before calling remove. */
4281 		lock_table_remove_low(lock);
4282 		lock_reset_lock_and_trx_wait(lock);
4283 
4284 		return(DB_DEADLOCK);
4285 
4286 	} else if (trx->lock.wait_lock == NULL) {
4287 		/* Deadlock resolution chose another transaction as a victim,
4288 		and we accidentally got our lock granted! */
4289 
4290 		return(DB_SUCCESS);
4291 	}
4292 
4293 	trx->lock.que_state = TRX_QUE_LOCK_WAIT;
4294 
4295 	trx->lock.wait_started = ut_time();
4296 	trx->lock.was_chosen_as_deadlock_victim = false;
4297 
4298 	ut_a(que_thr_stop(thr));
4299 
4300 	MONITOR_INC(MONITOR_TABLELOCK_WAIT);
4301 
4302 	return(DB_LOCK_WAIT);
4303 }
4304 
4305 /*********************************************************************//**
4306 Checks if other transactions have an incompatible mode lock request in
4307 the lock queue.
4308 @return lock or NULL */
4309 UNIV_INLINE
4310 const lock_t*
4311 lock_table_other_has_incompatible(
4312 /*==============================*/
4313 	const trx_t*		trx,	/*!< in: transaction, or NULL if all
4314 					transactions should be included */
4315 	ulint			wait,	/*!< in: LOCK_WAIT if also
4316 					waiting locks are taken into
4317 					account, or 0 if not */
4318 	const dict_table_t*	table,	/*!< in: table */
4319 	lock_mode		mode)	/*!< in: lock mode */
4320 {
4321 	const lock_t*	lock;
4322 
4323 	ut_ad(lock_mutex_own());
4324 
4325 	for (lock = UT_LIST_GET_LAST(table->locks);
4326 	     lock != NULL;
4327 	     lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock)) {
4328 
4329 		if (lock->trx != trx
4330 		    && !lock_mode_compatible(lock_get_mode(lock), mode)
4331 		    && (wait || !lock_get_wait(lock))) {
4332 
4333 #ifdef WITH_WSREP
4334 			if (wsrep_on(trx->mysql_thd)) {
4335 				if (wsrep_debug)
4336 					ib::info() << "WSREP: table lock abort";
4337 				trx_mutex_enter(lock->trx);
4338 				wsrep_kill_victim((trx_t *)trx, (lock_t *)lock);
4339 				trx_mutex_exit(lock->trx);
4340 			}
4341 #endif
4342 			return(lock);
4343 		}
4344 	}
4345 
4346 	return(NULL);
4347 }
4348 
4349 /*********************************************************************//**
4350 Locks the specified database table in the mode given. If the lock cannot
4351 be granted immediately, the query thread is put to wait.
4352 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
4353 dberr_t
4354 lock_table(
4355 /*=======*/
4356 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is set,
4357 				does nothing */
4358 	dict_table_t*	table,	/*!< in/out: database table
4359 				in dictionary cache */
4360 	lock_mode	mode,	/*!< in: lock mode */
4361 	que_thr_t*	thr)	/*!< in: query thread */
4362 {
4363 	trx_t*		trx;
4364 	dberr_t		err;
4365 	const lock_t*	wait_for;
4366 
4367 	ut_ad(table && thr);
4368 
4369 	/* Given limited visibility of temp-table we can avoid
4370 	locking overhead */
4371 	if ((flags & BTR_NO_LOCKING_FLAG)
4372 	    || srv_read_only_mode
4373 	    || dict_table_is_temporary(table)) {
4374 
4375 		return(DB_SUCCESS);
4376 	}
4377 
4378 	ut_a(flags == 0);
4379 
4380 	trx = thr_get_trx(thr);
4381 
4382 	/* Look for equal or stronger locks the same trx already
4383 	has on the table. No need to acquire the lock mutex here
4384 	because only this transacton can add/access table locks
4385 	to/from trx_t::table_locks. */
4386 
4387 	if (lock_table_has(trx, table, mode)) {
4388 
4389 		return(DB_SUCCESS);
4390 	}
4391 
4392 	/* Read only transactions can write to temp tables, we don't want
4393 	to promote them to RW transactions. Their updates cannot be visible
4394 	to other transactions. Therefore we can keep them out
4395 	of the read views. */
4396 
4397 	if ((mode == LOCK_IX || mode == LOCK_X)
4398 	    && !trx->read_only
4399 	    && trx->rsegs.m_redo.rseg == 0) {
4400 
4401 		trx_set_rw_mode(trx);
4402 	}
4403 
4404 	lock_mutex_enter();
4405 
4406 	/* We have to check if the new lock is compatible with any locks
4407 	other transactions have in the table lock queue. */
4408 
4409 	wait_for = lock_table_other_has_incompatible(
4410 		trx, LOCK_WAIT, table, mode);
4411 
4412 	trx_mutex_enter(trx);
4413 
4414 	/* Another trx has a request on the table in an incompatible
4415 	mode: this trx may have to wait */
4416 
4417 	if (wait_for != NULL) {
4418 #ifdef WITH_WSREP
4419 		err = lock_table_enqueue_waiting((lock_t *)wait_for,
4420 			mode | flags, table, thr);
4421 #else
4422 		err = lock_table_enqueue_waiting(mode | flags, table, thr);
4423 #endif /* WITH_WSREP */
4424 	} else {
4425 #ifdef WITH_WSREP
4426 		lock_table_create((lock_t *)wait_for, table, mode | flags, trx);
4427 #else
4428 		lock_table_create(table, mode | flags, trx);
4429 #endif /* WITH_WSREP */
4430 
4431 		ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
4432 
4433 		err = DB_SUCCESS;
4434 	}
4435 
4436 	lock_mutex_exit();
4437 
4438 	trx_mutex_exit(trx);
4439 
4440 	return(err);
4441 }
4442 
4443 /*********************************************************************//**
4444 Creates a table IX lock object for a resurrected transaction. */
4445 void
4446 lock_table_ix_resurrect(
4447 /*====================*/
4448 	dict_table_t*	table,	/*!< in/out: table */
4449 	trx_t*		trx)	/*!< in/out: transaction */
4450 {
4451 	ut_ad(trx->is_recovered);
4452 
4453 	if (lock_table_has(trx, table, LOCK_IX)) {
4454 		return;
4455 	}
4456 
4457 	lock_mutex_enter();
4458 
4459 	/* We have to check if the new lock is compatible with any locks
4460 	other transactions have in the table lock queue. */
4461 
4462 	ut_ad(!lock_table_other_has_incompatible(
4463 		      trx, LOCK_WAIT, table, LOCK_IX));
4464 
4465 	trx_mutex_enter(trx);
4466 #ifdef WITH_WSREP
4467 	lock_table_create(NULL, table, LOCK_IX, trx);
4468 #else
4469 	lock_table_create(table, LOCK_IX, trx);
4470 #endif /* WITH_WSREP */
4471 	lock_mutex_exit();
4472 	trx_mutex_exit(trx);
4473 }
4474 
4475 /*********************************************************************//**
4476 Checks if a waiting table lock request still has to wait in a queue.
4477 @return TRUE if still has to wait */
4478 static
4479 bool
4480 lock_table_has_to_wait_in_queue(
4481 /*============================*/
4482 	const lock_t*	wait_lock)	/*!< in: waiting table lock */
4483 {
4484 	const dict_table_t*	table;
4485 	const lock_t*		lock;
4486 
4487 	ut_ad(lock_mutex_own());
4488 	ut_ad(lock_get_wait(wait_lock));
4489 
4490 	table = wait_lock->un_member.tab_lock.table;
4491 
4492 	for (lock = UT_LIST_GET_FIRST(table->locks);
4493 	     lock != wait_lock;
4494 	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
4495 
4496 		if (lock_has_to_wait(wait_lock, lock)) {
4497 
4498 			return(true);
4499 		}
4500 	}
4501 
4502 	return(false);
4503 }
4504 
4505 /*************************************************************//**
4506 Removes a table lock request, waiting or granted, from the queue and grants
4507 locks to other transactions in the queue, if they now are entitled to a
4508 lock. */
4509 static
4510 void
4511 lock_table_dequeue(
4512 /*===============*/
4513 	lock_t*	in_lock)/*!< in/out: table lock object; transactions waiting
4514 			behind will get their lock requests granted, if
4515 			they are now qualified to it */
4516 {
4517 	ut_ad(lock_mutex_own());
4518 	ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
4519 
4520 	lock_t*	lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
4521 
4522 	lock_table_remove_low(in_lock);
4523 
4524 	/* Check if waiting locks in the queue can now be granted: grant
4525 	locks if there are no conflicting locks ahead. */
4526 
4527 	for (/* No op */;
4528 	     lock != NULL;
4529 	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
4530 
4531 		if (lock_get_wait(lock)
4532 		    && !lock_table_has_to_wait_in_queue(lock)) {
4533 
4534 			/* Grant the lock */
4535 			ut_ad(in_lock->trx != lock->trx);
4536 			lock_grant(lock);
4537 		}
4538 	}
4539 }
4540 
4541 /** Sets a lock on a table based on the given mode.
4542 @param[in]	table	table to lock
4543 @param[in,out]	trx	transaction
4544 @param[in]	mode	LOCK_X or LOCK_S
4545 @return error code or DB_SUCCESS. */
4546 dberr_t
4547 lock_table_for_trx(
4548 	dict_table_t*	table,
4549 	trx_t*		trx,
4550 	enum lock_mode	mode)
4551 {
4552 	mem_heap_t*	heap;
4553 	que_thr_t*	thr;
4554 	dberr_t		err;
4555 	sel_node_t*	node;
4556 	heap = mem_heap_create(512);
4557 
4558 	node = sel_node_create(heap);
4559 	thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
4560 	thr->graph->state = QUE_FORK_ACTIVE;
4561 
4562 	/* We use the select query graph as the dummy graph needed
4563 	in the lock module call */
4564 
4565 	thr = static_cast<que_thr_t*>(
4566 		que_fork_get_first_thr(
4567 			static_cast<que_fork_t*>(que_node_get_parent(thr))));
4568 
4569 	que_thr_move_to_run_state_for_mysql(thr, trx);
4570 
4571 run_again:
4572 	thr->run_node = thr;
4573 	thr->prev_node = thr->common.parent;
4574 
4575 	err = lock_table(0, table, mode, thr);
4576 
4577 	trx->error_state = err;
4578 
4579 	if (UNIV_LIKELY(err == DB_SUCCESS)) {
4580 		que_thr_stop_for_mysql_no_error(thr, trx);
4581 	} else {
4582 		que_thr_stop_for_mysql(thr);
4583 
4584 		if (err != DB_QUE_THR_SUSPENDED) {
4585 			bool	was_lock_wait;
4586 
4587 			was_lock_wait = row_mysql_handle_errors(
4588 				&err, trx, thr, NULL);
4589 
4590 			if (was_lock_wait) {
4591 				goto run_again;
4592 			}
4593 		} else {
4594 			que_thr_t*	run_thr;
4595 			que_node_t*	parent;
4596 
4597 			parent = que_node_get_parent(thr);
4598 
4599 			run_thr = que_fork_start_command(
4600 				static_cast<que_fork_t*>(parent));
4601 
4602 			ut_a(run_thr == thr);
4603 
4604 			/* There was a lock wait but the thread was not
4605 			in a ready to run or running state. */
4606 			trx->error_state = DB_LOCK_WAIT;
4607 
4608 			goto run_again;
4609 
4610 		}
4611 	}
4612 
4613 	que_graph_free(thr->graph);
4614 	trx->op_info = "";
4615 
4616 	return(err);
4617 }
4618 
4619 /*=========================== LOCK RELEASE ==============================*/
4620 
4621 /*************************************************************//**
4622 Removes a granted record lock of a transaction from the queue and grants
4623 locks to other transactions waiting in the queue if they now are entitled
4624 to a lock. */
4625 void
4626 lock_rec_unlock(
4627 /*============*/
4628 	trx_t*			trx,	/*!< in/out: transaction that has
4629 					set a record lock */
4630 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
4631 	const rec_t*		rec,	/*!< in: record */
4632 	lock_mode		lock_mode)/*!< in: LOCK_S or LOCK_X */
4633 {
4634 	lock_t*		first_lock;
4635 	lock_t*		lock;
4636 	ulint		heap_no;
4637 	const char*	stmt;
4638 	size_t		stmt_len;
4639 
4640 	ut_ad(trx);
4641 	ut_ad(rec);
4642 	ut_ad(block->frame == page_align(rec));
4643 	ut_ad(!trx->lock.wait_lock);
4644 	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
4645 
4646 	heap_no = page_rec_get_heap_no(rec);
4647 
4648 	lock_mutex_enter();
4649 	trx_mutex_enter(trx);
4650 
4651 	first_lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
4652 
4653 	/* Find the last lock with the same lock_mode and transaction
4654 	on the record. */
4655 
4656 	for (lock = first_lock; lock != NULL;
4657 	     lock = lock_rec_get_next(heap_no, lock)) {
4658 		if (lock->trx == trx && lock_get_mode(lock) == lock_mode) {
4659 			goto released;
4660 		}
4661 	}
4662 
4663 	lock_mutex_exit();
4664 	trx_mutex_exit(trx);
4665 
4666 	stmt = innobase_get_stmt_unsafe(trx->mysql_thd, &stmt_len);
4667 
4668 	{
4669 		ib::error	err;
4670 		err << "Unlock row could not find a " << lock_mode
4671 			<< " mode lock on the record. Current statement: ";
4672 		err.write(stmt, stmt_len);
4673 	}
4674 
4675 	return;
4676 
4677 released:
4678 	ut_a(!lock_get_wait(lock));
4679 	lock_rec_reset_nth_bit(lock, heap_no);
4680 
4681 	/* Check if we can now grant waiting lock requests */
4682 
4683 	for (lock = first_lock; lock != NULL;
4684 	     lock = lock_rec_get_next(heap_no, lock)) {
4685 		if (lock_get_wait(lock)
4686 		    && !lock_rec_has_to_wait_in_queue(lock)) {
4687 
4688 			/* Grant the lock */
4689 			ut_ad(trx != lock->trx);
4690 			lock_grant(lock);
4691 		}
4692 	}
4693 
4694 	lock_mutex_exit();
4695 	trx_mutex_exit(trx);
4696 }
4697 
4698 #ifdef UNIV_DEBUG
4699 /*********************************************************************//**
4700 Check if a transaction that has X or IX locks has set the dict_op
4701 code correctly. */
4702 static
4703 void
4704 lock_check_dict_lock(
4705 /*==================*/
4706 	const lock_t*	lock)	/*!< in: lock to check */
4707 {
4708 	if (lock_get_type_low(lock) == LOCK_REC) {
4709 
4710 		/* Check if the transcation locked a record
4711 		in a system table in X mode. It should have set
4712 		the dict_op code correctly if it did. */
4713 		if (lock->index->table->id < DICT_HDR_FIRST_ID
4714 		    && lock_get_mode(lock) == LOCK_X) {
4715 
4716 			ut_ad(lock_get_mode(lock) != LOCK_IX);
4717 			ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
4718 		}
4719 	} else {
4720 		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
4721 
4722 		const dict_table_t*	table;
4723 
4724 		table = lock->un_member.tab_lock.table;
4725 
4726 		/* Check if the transcation locked a system table
4727 		in IX mode. It should have set the dict_op code
4728 		correctly if it did. */
4729 		if (table->id < DICT_HDR_FIRST_ID
4730 		    && (lock_get_mode(lock) == LOCK_X
4731 			|| lock_get_mode(lock) == LOCK_IX)) {
4732 
4733 			ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
4734 		}
4735 	}
4736 }
4737 #endif /* UNIV_DEBUG */
4738 
4739 /** Remove GAP lock from a next key record lock
4740 @param[in,out]	lock	lock object */
4741 static
4742 void
4743 lock_remove_gap_lock(lock_t* lock)
4744 {
4745 	/* Remove lock on supremum */
4746 	lock_rec_reset_nth_bit(lock, PAGE_HEAP_NO_SUPREMUM);
4747 
4748 	/* Remove GAP lock for other records */
4749 	lock->remove_gap_lock();
4750 }
4751 
4752 /** Release read locks of a transacion. It is called during XA
4753 prepare to release locks early.
4754 @param[in,out]	trx		transaction
4755 @param[in]	only_gap	release only GAP locks */
4756 void
4757 lock_trx_release_read_locks(
4758 	trx_t*		trx,
4759 	bool		only_gap)
4760 {
4761 	lock_t*		lock;
4762 	lock_t*		next_lock;
4763 	ulint		count = 0;
4764 
4765 	/* Avoid taking lock_sys if trx didn't acquire any lock */
4766 	if (UT_LIST_GET_LEN(trx->lock.trx_locks) == 0) {
4767 
4768 		return;
4769 	}
4770 
4771 	lock_mutex_enter();
4772 
4773 	lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
4774 
4775 	while (lock != NULL) {
4776 
4777 		next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
4778 
4779 		/* Check only for record lock */
4780 		if (!lock->is_record_lock()
4781 		    || lock->is_insert_intention()
4782 		    || lock->is_predicate()) {
4783 
4784 			lock = next_lock;
4785 			continue;
4786 		}
4787 
4788 		/* Release any GAP only lock. */
4789 		if (lock->is_gap()) {
4790 
4791 			lock_rec_dequeue_from_page(lock);
4792 			lock = next_lock;
4793 			continue;
4794 		}
4795 
4796 		/* Don't release any non-GAP lock if not asked. */
4797 		if (lock->is_record_not_gap() && only_gap) {
4798 
4799 			lock = next_lock;
4800 			continue;
4801 		}
4802 
4803 		/* Release Shared Next Key Lock(SH + GAP) if asked for */
4804 		if (lock->mode() == LOCK_S && !only_gap) {
4805 
4806 			lock_rec_dequeue_from_page(lock);
4807 			lock = next_lock;
4808 			continue;
4809 		}
4810 
4811 		/* Release GAP lock from Next Key lock */
4812 		lock_remove_gap_lock(lock);
4813 
4814 		/* Grant locks */
4815 		lock_rec_grant(lock);
4816 
4817 		lock = next_lock;
4818 
4819 		++count;
4820 
4821 		if (count == LOCK_RELEASE_INTERVAL) {
4822 			/* Release the mutex for a while, so that we
4823 			do not monopolize it */
4824 
4825 			lock_mutex_exit();
4826 
4827 			lock_mutex_enter();
4828 
4829 			count = 0;
4830 		}
4831 	}
4832 
4833 	lock_mutex_exit();
4834 }
4835 
4836 /*********************************************************************//**
4837 Releases transaction locks, and releases possible other transactions waiting
4838 because of these locks. */
4839 static
4840 void
4841 lock_release(
4842 /*=========*/
4843 	trx_t*	trx)	/*!< in/out: transaction */
4844 {
4845 	lock_t*		lock;
4846 	ulint		count = 0;
4847 	trx_id_t	max_trx_id = trx_sys_get_max_trx_id();
4848 
4849 	ut_ad(lock_mutex_own());
4850 	ut_ad(!trx_mutex_own(trx));
4851 	ut_ad(!trx->is_dd_trx);
4852 
4853 	for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
4854 	     lock != NULL;
4855 	     lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
4856 
4857 		ut_d(lock_check_dict_lock(lock));
4858 
4859 		if (lock_get_type_low(lock) == LOCK_REC) {
4860 
4861 			lock_rec_dequeue_from_page(lock);
4862 		} else {
4863 			dict_table_t*	table;
4864 
4865 			table = lock->un_member.tab_lock.table;
4866 
4867 			if (lock_get_mode(lock) != LOCK_IS
4868 			    && trx->undo_no != 0) {
4869 
4870 				/* The trx may have modified the table. We
4871 				block the use of the MySQL query cache for
4872 				all currently active transactions. */
4873 
4874 				table->query_cache_inv_id = max_trx_id;
4875 			}
4876 
4877 			lock_table_dequeue(lock);
4878 		}
4879 
4880 		if (count == LOCK_RELEASE_INTERVAL) {
4881 			/* Release the mutex for a while, so that we
4882 			do not monopolize it */
4883 
4884 			lock_mutex_exit();
4885 
4886 			lock_mutex_enter();
4887 
4888 			count = 0;
4889 		}
4890 
4891 		++count;
4892 	}
4893 }
4894 
4895 /* True if a lock mode is S or X */
4896 #define IS_LOCK_S_OR_X(lock) \
4897 	(lock_get_mode(lock) == LOCK_S \
4898 	 || lock_get_mode(lock) == LOCK_X)
4899 
4900 /*********************************************************************//**
4901 Removes table locks of the transaction on a table to be dropped. */
4902 static
4903 void
4904 lock_trx_table_locks_remove(
4905 /*========================*/
4906 	const lock_t*	lock_to_remove)		/*!< in: lock to remove */
4907 {
4908 	trx_t*		trx = lock_to_remove->trx;
4909 
4910 	ut_ad(lock_mutex_own());
4911 
4912 	/* It is safe to read this because we are holding the lock mutex */
4913 	if (!trx->lock.cancel) {
4914 		trx_mutex_enter(trx);
4915 	} else {
4916 		ut_ad(trx_mutex_own(trx));
4917 	}
4918 
4919 	typedef lock_pool_t::reverse_iterator iterator;
4920 
4921 	iterator	end = trx->lock.table_locks.rend();
4922 
4923 	for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
4924 
4925 		const lock_t*	lock = *it;
4926 
4927 		if (lock == NULL) {
4928 			continue;
4929 		}
4930 
4931 		ut_a(trx == lock->trx);
4932 		ut_a(lock_get_type_low(lock) & LOCK_TABLE);
4933 		ut_a(lock->un_member.tab_lock.table != NULL);
4934 
4935 		if (lock == lock_to_remove) {
4936 
4937 			*it = NULL;
4938 
4939 			if (!trx->lock.cancel) {
4940 				trx_mutex_exit(trx);
4941 			}
4942 
4943 			return;
4944 		}
4945 	}
4946 
4947 	if (!trx->lock.cancel) {
4948 		trx_mutex_exit(trx);
4949 	}
4950 
4951 	/* Lock must exist in the vector. */
4952 	ut_error;
4953 }
4954 
4955 /*********************************************************************//**
4956 Removes locks of a transaction on a table to be dropped.
4957 If remove_also_table_sx_locks is TRUE then table-level S and X locks are
4958 also removed in addition to other table-level and record-level locks.
4959 No lock that is going to be removed is allowed to be a wait lock. */
4960 static
4961 void
4962 lock_remove_all_on_table_for_trx(
4963 /*=============================*/
4964 	dict_table_t*	table,			/*!< in: table to be dropped */
4965 	trx_t*		trx,			/*!< in: a transaction */
4966 	ibool		remove_also_table_sx_locks)/*!< in: also removes
4967 						table S and X locks */
4968 {
4969 	lock_t*		lock;
4970 	lock_t*		prev_lock;
4971 
4972 	ut_ad(lock_mutex_own());
4973 
4974 	for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
4975 	     lock != NULL;
4976 	     lock = prev_lock) {
4977 
4978 		prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
4979 
4980 		if (lock_get_type_low(lock) == LOCK_REC
4981 		    && lock->index->table == table) {
4982 			ut_a(!lock_get_wait(lock));
4983 
4984 			lock_rec_discard(lock);
4985 		} else if (lock_get_type_low(lock) & LOCK_TABLE
4986 			   && lock->un_member.tab_lock.table == table
4987 			   && (remove_also_table_sx_locks
4988 			       || !IS_LOCK_S_OR_X(lock))) {
4989 
4990 			ut_a(!lock_get_wait(lock));
4991 
4992 			lock_trx_table_locks_remove(lock);
4993 			lock_table_remove_low(lock);
4994 		}
4995 	}
4996 }
4997 
4998 /*******************************************************************//**
4999 Remove any explicit record locks held by recovering transactions on
5000 the table.
5001 @return number of recovered transactions examined */
5002 static
5003 ulint
5004 lock_remove_recovered_trx_record_locks(
5005 /*===================================*/
5006 	dict_table_t*	table)	/*!< in: check if there are any locks
5007 				held on records in this table or on the
5008 				table itself */
5009 {
5010 	ut_a(table != NULL);
5011 	ut_ad(lock_mutex_own());
5012 
5013 	ulint		n_recovered_trx = 0;
5014 
5015 	mutex_enter(&trx_sys->mutex);
5016 
5017 	for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
5018 	     trx != NULL;
5019 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
5020 
5021 		assert_trx_in_rw_list(trx);
5022 
5023 		if (!trx->is_recovered) {
5024 			continue;
5025 		}
5026 
5027 		/* Because we are holding the lock_sys->mutex,
5028 		implicit locks cannot be converted to explicit ones
5029 		while we are scanning the explicit locks. */
5030 
5031 		lock_t*	next_lock;
5032 
5033 		for (lock_t* lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
5034 		     lock != NULL;
5035 		     lock = next_lock) {
5036 
5037 			ut_a(lock->trx == trx);
5038 
5039 			/* Recovered transactions can't wait on a lock. */
5040 
5041 			ut_a(!lock_get_wait(lock));
5042 
5043 			next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
5044 
5045 			switch (lock_get_type_low(lock)) {
5046 			default:
5047 				ut_error;
5048 			case LOCK_TABLE:
5049 				if (lock->un_member.tab_lock.table == table) {
5050 					lock_trx_table_locks_remove(lock);
5051 					lock_table_remove_low(lock);
5052 				}
5053 				break;
5054 			case LOCK_REC:
5055 				if (lock->index->table == table) {
5056 					lock_rec_discard(lock);
5057 				}
5058 			}
5059 		}
5060 
5061 		++n_recovered_trx;
5062 	}
5063 
5064 	mutex_exit(&trx_sys->mutex);
5065 
5066 	return(n_recovered_trx);
5067 }
5068 
5069 /*********************************************************************//**
5070 Removes locks on a table to be dropped or truncated.
5071 If remove_also_table_sx_locks is TRUE then table-level S and X locks are
5072 also removed in addition to other table-level and record-level locks.
5073 No lock, that is going to be removed, is allowed to be a wait lock. */
5074 void
5075 lock_remove_all_on_table(
5076 /*=====================*/
5077 	dict_table_t*	table,			/*!< in: table to be dropped
5078 						or truncated */
5079 	ibool		remove_also_table_sx_locks)/*!< in: also removes
5080 						table S and X locks */
5081 {
5082 	lock_t*		lock;
5083 
5084 	lock_mutex_enter();
5085 
5086 	for (lock = UT_LIST_GET_FIRST(table->locks);
5087 	     lock != NULL;
5088 	     /* No op */) {
5089 
5090 		lock_t*	prev_lock;
5091 
5092 		prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
5093 
5094 		/* If we should remove all locks (remove_also_table_sx_locks
5095 		is TRUE), or if the lock is not table-level S or X lock,
5096 		then check we are not going to remove a wait lock. */
5097 		if (remove_also_table_sx_locks
5098 		    || !(lock_get_type(lock) == LOCK_TABLE
5099 			 && IS_LOCK_S_OR_X(lock))) {
5100 
5101 			ut_a(!lock_get_wait(lock));
5102 		}
5103 
5104 		lock_remove_all_on_table_for_trx(
5105 			table, lock->trx, remove_also_table_sx_locks);
5106 
5107 		if (prev_lock == NULL) {
5108 			if (lock == UT_LIST_GET_FIRST(table->locks)) {
5109 				/* lock was not removed, pick its successor */
5110 				lock = UT_LIST_GET_NEXT(
5111 					un_member.tab_lock.locks, lock);
5112 			} else {
5113 				/* lock was removed, pick the first one */
5114 				lock = UT_LIST_GET_FIRST(table->locks);
5115 			}
5116 		} else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks,
5117 					    prev_lock) != lock) {
5118 			/* If lock was removed by
5119 			lock_remove_all_on_table_for_trx() then pick the
5120 			successor of prev_lock ... */
5121 			lock = UT_LIST_GET_NEXT(
5122 				un_member.tab_lock.locks, prev_lock);
5123 		} else {
5124 			/* ... otherwise pick the successor of lock. */
5125 			lock = UT_LIST_GET_NEXT(
5126 				un_member.tab_lock.locks, lock);
5127 		}
5128 	}
5129 
5130 	/* Note: Recovered transactions don't have table level IX or IS locks
5131 	but can have implicit record locks that have been converted to explicit
5132 	record locks. Such record locks cannot be freed by traversing the
5133 	transaction lock list in dict_table_t (as above). */
5134 
5135 	if (!lock_sys->rollback_complete
5136 	    && lock_remove_recovered_trx_record_locks(table) == 0) {
5137 
5138 		lock_sys->rollback_complete = TRUE;
5139 	}
5140 
5141 	lock_mutex_exit();
5142 }
5143 
5144 /*===================== VALIDATION AND DEBUGGING ====================*/
5145 
5146 /*********************************************************************//**
5147 Prints info of a table lock. */
5148 void
5149 lock_table_print(
5150 /*=============*/
5151 	FILE*		file,	/*!< in: file where to print */
5152 	const lock_t*	lock)	/*!< in: table type lock */
5153 {
5154 	ut_ad(lock_mutex_own());
5155 	ut_a(lock_get_type_low(lock) == LOCK_TABLE);
5156 
5157 	fputs("TABLE LOCK table ", file);
5158 	ut_print_name(file, lock->trx,
5159 		      lock->un_member.tab_lock.table->name.m_name);
5160 	fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
5161 
5162 	if (lock_get_mode(lock) == LOCK_S) {
5163 		fputs(" lock mode S", file);
5164 	} else if (lock_get_mode(lock) == LOCK_X) {
5165 		ut_ad(lock->trx->id != 0);
5166 		fputs(" lock mode X", file);
5167 	} else if (lock_get_mode(lock) == LOCK_IS) {
5168 		fputs(" lock mode IS", file);
5169 	} else if (lock_get_mode(lock) == LOCK_IX) {
5170 		ut_ad(lock->trx->id != 0);
5171 		fputs(" lock mode IX", file);
5172 	} else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
5173 		fputs(" lock mode AUTO-INC", file);
5174 	} else {
5175 		fprintf(file, " unknown lock mode %lu",
5176 			(ulong) lock_get_mode(lock));
5177 	}
5178 
5179 	if (lock_get_wait(lock)) {
5180 		fputs(" waiting", file);
5181 	}
5182 
5183 	putc('\n', file);
5184 }
5185 
5186 /*********************************************************************//**
5187 Prints info of a record lock. */
5188 void
5189 lock_rec_print(
5190 /*===========*/
5191 	FILE*		file,	/*!< in: file where to print */
5192 	const lock_t*	lock)	/*!< in: record type lock */
5193 {
5194 	ulint			space;
5195 	ulint			page_no;
5196 	mtr_t			mtr;
5197 	mem_heap_t*		heap		= NULL;
5198 	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
5199 	ulint*			offsets		= offsets_;
5200 	rec_offs_init(offsets_);
5201 
5202 	ut_ad(lock_mutex_own());
5203 	ut_a(lock_get_type_low(lock) == LOCK_REC);
5204 
5205 	space = lock->un_member.rec_lock.space;
5206 	page_no = lock->un_member.rec_lock.page_no;
5207 
5208 	fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu "
5209 		"index %s of table ",
5210 		(ulong) space, (ulong) page_no,
5211 		(ulong) lock_rec_get_n_bits(lock),
5212 		lock->index->name());
5213 	ut_print_name(file, lock->trx, lock->index->table_name);
5214 	fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
5215 
5216 	if (lock_get_mode(lock) == LOCK_S) {
5217 		fputs(" lock mode S", file);
5218 	} else if (lock_get_mode(lock) == LOCK_X) {
5219 		fputs(" lock_mode X", file);
5220 	} else {
5221 		ut_error;
5222 	}
5223 
5224 	if (lock_rec_get_gap(lock)) {
5225 		fputs(" locks gap before rec", file);
5226 	}
5227 
5228 	if (lock_rec_get_rec_not_gap(lock)) {
5229 		fputs(" locks rec but not gap", file);
5230 	}
5231 
5232 	if (lock_rec_get_insert_intention(lock)) {
5233 		fputs(" insert intention", file);
5234 	}
5235 
5236 	if (lock_get_wait(lock)) {
5237 		fputs(" waiting", file);
5238 	}
5239 
5240 	mtr_start(&mtr);
5241 
5242 	putc('\n', file);
5243 
5244 	const buf_block_t*	block;
5245 
5246 	block = buf_page_try_get(page_id_t(space, page_no), &mtr);
5247 
5248 	for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
5249 
5250 		if (!lock_rec_get_nth_bit(lock, i)) {
5251 			continue;
5252 		}
5253 
5254 		fprintf(file, "Record lock, heap no %lu", (ulong) i);
5255 
5256 		if (block) {
5257 			const rec_t*	rec;
5258 
5259 			rec = page_find_rec_with_heap_no(
5260 				buf_block_get_frame(block), i);
5261 
5262 			offsets = rec_get_offsets(
5263 				rec, lock->index, offsets,
5264 				ULINT_UNDEFINED, &heap);
5265 
5266 			putc(' ', file);
5267 			rec_print_new(file, rec, offsets);
5268 		}
5269 
5270 		putc('\n', file);
5271 	}
5272 
5273 	mtr_commit(&mtr);
5274 
5275 	if (heap) {
5276 		mem_heap_free(heap);
5277 	}
5278 }
5279 
5280 #ifdef UNIV_DEBUG
5281 /* Print the number of lock structs from lock_print_info_summary() only
5282 in non-production builds for performance reasons, see
5283 http://bugs.mysql.com/36942 */
5284 #define PRINT_NUM_OF_LOCK_STRUCTS
5285 #endif /* UNIV_DEBUG */
5286 
5287 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
5288 /*********************************************************************//**
5289 Calculates the number of record lock structs in the record lock hash table.
5290 @return number of record locks */
5291 static
5292 ulint
5293 lock_get_n_rec_locks(void)
5294 /*======================*/
5295 {
5296 	ulint	n_locks	= 0;
5297 	ulint	i;
5298 
5299 	ut_ad(lock_mutex_own());
5300 
5301 	for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
5302 		const lock_t*	lock;
5303 
5304 		for (lock = static_cast<const lock_t*>(
5305 				HASH_GET_FIRST(lock_sys->rec_hash, i));
5306 		     lock != 0;
5307 		     lock = static_cast<const lock_t*>(
5308 				HASH_GET_NEXT(hash, lock))) {
5309 
5310 			n_locks++;
5311 		}
5312 	}
5313 
5314 	return(n_locks);
5315 }
5316 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
5317 
5318 /*********************************************************************//**
5319 Prints info of locks for all transactions.
5320 @return FALSE if not able to obtain lock mutex
5321 and exits without printing info */
5322 ibool
5323 lock_print_info_summary(
5324 /*====================*/
5325 	FILE*	file,	/*!< in: file where to print */
5326 	ibool	nowait)	/*!< in: whether to wait for the lock mutex */
5327 {
5328 	/* if nowait is FALSE, wait on the lock mutex,
5329 	otherwise return immediately if fail to obtain the
5330 	mutex. */
5331 	if (!nowait) {
5332 		lock_mutex_enter();
5333 	} else if (lock_mutex_enter_nowait()) {
5334 		fputs("FAIL TO OBTAIN LOCK MUTEX,"
5335 		      " SKIP LOCK INFO PRINTING\n", file);
5336 		return(FALSE);
5337 	}
5338 
5339 	if (lock_deadlock_found) {
5340 		fputs("------------------------\n"
5341 		      "LATEST DETECTED DEADLOCK\n"
5342 		      "------------------------\n", file);
5343 
5344 		if (!srv_read_only_mode) {
5345 			ut_copy_file(file, lock_latest_err_file);
5346 		}
5347 	}
5348 
5349 	fputs("------------\n"
5350 	      "TRANSACTIONS\n"
5351 	      "------------\n", file);
5352 
5353 	fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
5354 		trx_sys_get_max_trx_id());
5355 
5356 	fprintf(file,
5357 		"Purge done for trx's n:o < " TRX_ID_FMT
5358 		" undo n:o < " TRX_ID_FMT " state: ",
5359 		purge_sys->iter.trx_no,
5360 		purge_sys->iter.undo_no);
5361 
5362 	/* Note: We are reading the state without the latch. One because it
5363 	will violate the latching order and two because we are merely querying
5364 	the state of the variable for display. */
5365 
5366 	switch (purge_sys->state){
5367 	case PURGE_STATE_INIT:
5368 		/* Should never be in this state while the system is running. */
5369 		ut_error;
5370 
5371 	case PURGE_STATE_EXIT:
5372 		fprintf(file, "exited");
5373 		break;
5374 
5375 	case PURGE_STATE_DISABLED:
5376 		fprintf(file, "disabled");
5377 		break;
5378 
5379 	case PURGE_STATE_RUN:
5380 		fprintf(file, "running");
5381 		/* Check if it is waiting for more data to arrive. */
5382 		if (!purge_sys->running) {
5383 			fprintf(file, " but idle");
5384 		}
5385 		break;
5386 
5387 	case PURGE_STATE_STOP:
5388 		fprintf(file, "stopped");
5389 		break;
5390 	}
5391 
5392 	fprintf(file, "\n");
5393 
5394 	fprintf(file,
5395 		"History list length %lu\n",
5396 		(ulong) trx_sys->rseg_history_len);
5397 
5398 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
5399 	fprintf(file,
5400 		"Total number of lock structs in row lock hash table %lu\n",
5401 		(ulong) lock_get_n_rec_locks());
5402 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
5403 	return(TRUE);
5404 }
5405 
5406 /** Functor to print not-started transaction from the mysql_trx_list. */
5407 
5408 struct	PrintNotStarted {
5409 
5410 	PrintNotStarted(FILE* file) : m_file(file) { }
5411 
5412 	void	operator()(const trx_t* trx)
5413 	{
5414 		ut_ad(trx->in_mysql_trx_list);
5415 		ut_ad(mutex_own(&trx_sys->mutex));
5416 
5417 		/* See state transitions and locking rules in trx0trx.h */
5418 
5419 		if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
5420 
5421 			fputs("---", m_file);
5422 			trx_print_latched(m_file, trx, 600);
5423 		}
5424 	}
5425 
5426 	FILE*		m_file;
5427 };
5428 
5429 /** Iterate over a transaction's locks. Keeping track of the
5430 iterator using an ordinal value. */
5431 
5432 class TrxLockIterator {
5433 public:
5434 	TrxLockIterator() { rewind(); }
5435 
5436 	/** Get the m_index(th) lock of a transaction.
5437 	@return current lock or 0 */
5438 	const lock_t* current(const trx_t* trx) const
5439 	{
5440 		lock_t*	lock;
5441 		ulint	i = 0;
5442 
5443 		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
5444 		     lock != NULL && i < m_index;
5445 		     lock = UT_LIST_GET_NEXT(trx_locks, lock), ++i) {
5446 
5447 			/* No op */
5448 		}
5449 
5450 		return(lock);
5451 	}
5452 
5453 	/** Set the ordinal value to 0 */
5454 	void rewind()
5455 	{
5456 		m_index = 0;
5457 	}
5458 
5459 	/** Increment the ordinal value.
5460 	@retun the current index value */
5461 	ulint next()
5462 	{
5463 		return(++m_index);
5464 	}
5465 
5466 private:
5467 	/** Current iterator position */
5468 	ulint		m_index;
5469 };
5470 
5471 /** This iterates over both the RW and RO trx_sys lists. We need to keep
5472 track where the iterator was up to and we do that using an ordinal value. */
5473 
5474 class TrxListIterator {
5475 public:
5476 	TrxListIterator() : m_index()
5477 	{
5478 		/* We iterate over the RW trx list first. */
5479 
5480 		m_trx_list = &trx_sys->rw_trx_list;
5481 	}
5482 
5483 	/** Get the current transaction whose ordinality is m_index.
5484 	@return current transaction or 0 */
5485 
5486 	const trx_t* current()
5487 	{
5488 		return(reposition());
5489 	}
5490 
5491 	/** Advance the transaction current ordinal value and reset the
5492 	transaction lock ordinal value */
5493 
5494 	void next()
5495 	{
5496 		++m_index;
5497 		m_lock_iter.rewind();
5498 	}
5499 
5500 	TrxLockIterator& lock_iter()
5501 	{
5502 		return(m_lock_iter);
5503 	}
5504 
5505 private:
5506 	/** Reposition the "cursor" on the current transaction. If it
5507 	is the first time then the "cursor" will be positioned on the
5508 	first transaction.
5509 
5510 	@return transaction instance or 0 */
5511 	const trx_t* reposition() const
5512 	{
5513 		ulint	i;
5514 		trx_t*	trx;
5515 
5516 		/* Make the transaction at the ordinal value of m_index
5517 		the current transaction. ie. reposition/restore */
5518 
5519 		for (i = 0, trx = UT_LIST_GET_FIRST(*m_trx_list);
5520 		     trx != NULL && (i < m_index);
5521 		     trx = UT_LIST_GET_NEXT(trx_list, trx), ++i) {
5522 
5523 			check_trx_state(trx);
5524 		}
5525 
5526 		return(trx);
5527 	}
5528 
5529 	/** Ordinal value of the transaction in the current transaction list */
5530 	ulint			m_index;
5531 
5532 	/** Current transaction list */
5533 	trx_ut_list_t*		m_trx_list;
5534 
5535 	/** For iterating over a transaction's locks */
5536 	TrxLockIterator		m_lock_iter;
5537 };
5538 
5539 /** Prints transaction lock wait and MVCC state.
5540 @param[in,out]	file	file where to print
5541 @param[in]	trx	transaction */
5542 void
5543 lock_trx_print_wait_and_mvcc_state(
5544 	FILE*		file,
5545 	const trx_t*	trx)
5546 {
5547 	fprintf(file, "---");
5548 
5549 	trx_print_latched(file, trx, 600);
5550 
5551 	const ReadView*	read_view = trx_get_read_view(trx);
5552 
5553 	if (read_view != NULL) {
5554 		read_view->print_limits(file);
5555 	}
5556 
5557 	if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
5558 
5559 		fprintf(file,
5560 			"------- TRX HAS BEEN WAITING %lu SEC"
5561 			" FOR THIS LOCK TO BE GRANTED:\n",
5562 			(ulong) difftime(ut_time(), trx->lock.wait_started));
5563 
5564 		if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
5565 			lock_rec_print(file, trx->lock.wait_lock);
5566 		} else {
5567 			lock_table_print(file, trx->lock.wait_lock);
5568 		}
5569 
5570 		fprintf(file, "------------------\n");
5571 	}
5572 }
5573 
5574 /*********************************************************************//**
5575 Prints info of locks for a transaction. This function will release the
5576 lock mutex and the trx_sys_t::mutex if the page was read from disk.
5577 @return true if page was read from the tablespace */
5578 static
5579 bool
5580 lock_rec_fetch_page(
5581 /*================*/
5582 	const lock_t*	lock)	/*!< in: record lock */
5583 {
5584 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
5585 
5586 	ulint			space_id = lock->un_member.rec_lock.space;
5587 	fil_space_t*		space;
5588 	bool			found;
5589 	const page_size_t&	page_size = fil_space_get_page_size(space_id,
5590 								    &found);
5591 	ulint			page_no = lock->un_member.rec_lock.page_no;
5592 
5593 	/* Check if the .ibd file exists. */
5594 	if (found) {
5595 		mtr_t	mtr;
5596 
5597 		lock_mutex_exit();
5598 
5599 		mutex_exit(&trx_sys->mutex);
5600 
5601 		DEBUG_SYNC_C("innodb_monitor_before_lock_page_read");
5602 
5603 		/* Check if the space is exists or not. only
5604 		when the space is valid, try to get the page. */
5605 		space = fil_space_acquire(space_id);
5606 		if (space) {
5607 			mtr_start(&mtr);
5608 			buf_page_get_gen(
5609 				page_id_t(space_id, page_no), page_size,
5610 				RW_NO_LATCH, NULL,
5611 				BUF_GET_POSSIBLY_FREED,
5612 				__FILE__, __LINE__, &mtr);
5613 			mtr_commit(&mtr);
5614 			fil_space_release(space);
5615 		}
5616 
5617 		lock_mutex_enter();
5618 
5619 		mutex_enter(&trx_sys->mutex);
5620 
5621 		return(true);
5622 	}
5623 
5624 	return(false);
5625 }
5626 
5627 /*********************************************************************//**
5628 Prints info of locks for a transaction.
5629 @return true if all printed, false if latches were released. */
5630 static
5631 bool
5632 lock_trx_print_locks(
5633 /*=================*/
5634 	FILE*		file,		/*!< in/out: File to write */
5635 	const trx_t*	trx,		/*!< in: current transaction */
5636 	TrxLockIterator&iter,		/*!< in: transaction lock iterator */
5637 	bool		load_block)	/*!< in: if true then read block
5638 					from disk */
5639 {
5640 	const lock_t* lock;
5641 
5642 	/* Iterate over the transaction's locks. */
5643 	while ((lock = iter.current(trx)) != 0) {
5644 
5645 		if (lock_get_type_low(lock) == LOCK_REC) {
5646 
5647 			if (load_block) {
5648 
5649 				/* Note: lock_rec_fetch_page() will
5650 				release both the lock mutex and the
5651 				trx_sys_t::mutex if it does a read
5652 				from disk. */
5653 
5654 				if (lock_rec_fetch_page(lock)) {
5655 					/* We need to resync the
5656 					current transaction. */
5657 					return(false);
5658 				}
5659 
5660 				/* It is a single table tablespace
5661 				and the .ibd file is missing
5662 				(TRUNCATE TABLE probably stole the
5663 				locks): just print the lock without
5664 				attempting to load the page in the
5665 				buffer pool. */
5666 
5667 				fprintf(file,
5668 					"RECORD LOCKS on non-existing"
5669 					" space %u\n",
5670 					lock->un_member.rec_lock.space);
5671 			}
5672 
5673 			/* Print all the record locks on the page from
5674 			the record lock bitmap */
5675 
5676 			lock_rec_print(file, lock);
5677 
5678 			load_block = true;
5679 
5680 		} else {
5681 			ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
5682 
5683 			lock_table_print(file, lock);
5684 		}
5685 
5686 		if (iter.next() >= 10) {
5687 
5688 			fprintf(file,
5689 				"10 LOCKS PRINTED FOR THIS TRX:"
5690 				" SUPPRESSING FURTHER PRINTS\n");
5691 
5692 			break;
5693 		}
5694 	}
5695 
5696 	return(true);
5697 }
5698 
5699 /*********************************************************************//**
5700 Prints info of locks for each transaction. This function assumes that the
5701 caller holds the lock mutex and more importantly it will release the lock
5702 mutex on behalf of the caller. (This should be fixed in the future). */
5703 void
5704 lock_print_info_all_transactions(
5705 /*=============================*/
5706 	FILE*		file)	/*!< in/out: file where to print */
5707 {
5708 	ut_ad(lock_mutex_own());
5709 
5710 	fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
5711 
5712 	mutex_enter(&trx_sys->mutex);
5713 
5714 	/* First print info on non-active transactions */
5715 
5716 	/* NOTE: information of auto-commit non-locking read-only
5717 	transactions will be omitted here. The information will be
5718 	available from INFORMATION_SCHEMA.INNODB_TRX. */
5719 
5720 	PrintNotStarted	print_not_started(file);
5721 	ut_list_map(trx_sys->mysql_trx_list, print_not_started);
5722 
5723 	const trx_t*	trx;
5724 	TrxListIterator	trx_iter;
5725 	const trx_t*	prev_trx = 0;
5726 
5727 	/* Control whether a block should be fetched from the buffer pool. */
5728 	bool		load_block = true;
5729 	bool		monitor = srv_print_innodb_lock_monitor;
5730 
5731 	while ((trx = trx_iter.current()) != 0) {
5732 
5733 		check_trx_state(trx);
5734 
5735 		if (trx != prev_trx) {
5736 			lock_trx_print_wait_and_mvcc_state(file, trx);
5737 			prev_trx = trx;
5738 
5739 			/* The transaction that read in the page is no
5740 			longer the one that read the page in. We need to
5741 			force a page read. */
5742 			load_block = true;
5743 		}
5744 
5745 		/* If we need to print the locked record contents then we
5746 		need to fetch the containing block from the buffer pool. */
5747 		if (monitor) {
5748 
5749 			/* Print the locks owned by the current transaction. */
5750 			TrxLockIterator& lock_iter = trx_iter.lock_iter();
5751 
5752 			if (!lock_trx_print_locks(
5753 					file, trx, lock_iter, load_block)) {
5754 
5755 				/* Resync trx_iter, the trx_sys->mutex and
5756 				the lock mutex were released. A page was
5757 				successfully read in.  We need to print its
5758 				contents on the next call to
5759 				lock_trx_print_locks(). On the next call to
5760 				lock_trx_print_locks() we should simply print
5761 				the contents of the page just read in.*/
5762 				load_block = false;
5763 
5764 				continue;
5765 			}
5766 		}
5767 
5768 		load_block = true;
5769 
5770 		/* All record lock details were printed without fetching
5771 		a page from disk, or we didn't need to print the detail. */
5772 		trx_iter.next();
5773 	}
5774 
5775 	lock_mutex_exit();
5776 	mutex_exit(&trx_sys->mutex);
5777 
5778 	ut_ad(lock_validate());
5779 }
5780 
5781 #ifdef UNIV_DEBUG
5782 /*********************************************************************//**
5783 Find the the lock in the trx_t::trx_lock_t::table_locks vector.
5784 @return true if found */
5785 static
5786 bool
5787 lock_trx_table_locks_find(
5788 /*======================*/
5789 	trx_t*		trx,		/*!< in: trx to validate */
5790 	const lock_t*	find_lock)	/*!< in: lock to find */
5791 {
5792 	bool		found = false;
5793 
5794 	trx_mutex_enter(trx);
5795 
5796 	typedef lock_pool_t::const_reverse_iterator iterator;
5797 
5798 	iterator	end = trx->lock.table_locks.rend();
5799 
5800 	for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
5801 
5802 		const lock_t*	lock = *it;
5803 
5804 		if (lock == NULL) {
5805 
5806 			continue;
5807 
5808 		} else if (lock == find_lock) {
5809 
5810 			/* Can't be duplicates. */
5811 			ut_a(!found);
5812 			found = true;
5813 		}
5814 
5815 		ut_a(trx == lock->trx);
5816 		ut_a(lock_get_type_low(lock) & LOCK_TABLE);
5817 		ut_a(lock->un_member.tab_lock.table != NULL);
5818 	}
5819 
5820 	trx_mutex_exit(trx);
5821 
5822 	return(found);
5823 }
5824 
5825 /*********************************************************************//**
5826 Validates the lock queue on a table.
5827 @return TRUE if ok */
5828 static
5829 ibool
5830 lock_table_queue_validate(
5831 /*======================*/
5832 	const dict_table_t*	table)	/*!< in: table */
5833 {
5834 	const lock_t*	lock;
5835 
5836 	ut_ad(lock_mutex_own());
5837 	ut_ad(trx_sys_mutex_own());
5838 
5839 	for (lock = UT_LIST_GET_FIRST(table->locks);
5840 	     lock != NULL;
5841 	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
5842 
5843 		/* lock->trx->state cannot change from or to NOT_STARTED
5844 		while we are holding the trx_sys->mutex. It may change
5845 		from ACTIVE to PREPARED, but it may not change to
5846 		COMMITTED, because we are holding the lock_sys->mutex. */
5847 		ut_ad(trx_assert_started(lock->trx));
5848 
5849 		if (!lock_get_wait(lock)) {
5850 
5851 			ut_a(!lock_table_other_has_incompatible(
5852 				     lock->trx, 0, table,
5853 				     lock_get_mode(lock)));
5854 		} else {
5855 
5856 			ut_a(lock_table_has_to_wait_in_queue(lock));
5857 		}
5858 
5859 		ut_a(lock_trx_table_locks_find(lock->trx, lock));
5860 	}
5861 
5862 	return(TRUE);
5863 }
5864 
5865 /*********************************************************************//**
5866 Validates the lock queue on a single record.
5867 @return TRUE if ok */
5868 static
5869 ibool
5870 lock_rec_queue_validate(
5871 /*====================*/
5872 	ibool			locked_lock_trx_sys,
5873 					/*!< in: if the caller holds
5874 					both the lock mutex and
5875 					trx_sys_t->lock. */
5876 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
5877 	const rec_t*		rec,	/*!< in: record to look at */
5878 	const dict_index_t*	index,	/*!< in: index, or NULL if not known */
5879 	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
5880 {
5881 	const trx_t*	impl_trx;
5882 	const lock_t*	lock;
5883 	ulint		heap_no;
5884 
5885 	ut_a(rec);
5886 	ut_a(block->frame == page_align(rec));
5887 	ut_ad(rec_offs_validate(rec, index, offsets));
5888 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
5889 	ut_ad(lock_mutex_own() == locked_lock_trx_sys);
5890 	ut_ad(!index || dict_index_is_clust(index)
5891 	      || !dict_index_is_online_ddl(index));
5892 
5893 	heap_no = page_rec_get_heap_no(rec);
5894 
5895 	if (!locked_lock_trx_sys) {
5896 		lock_mutex_enter();
5897 		mutex_enter(&trx_sys->mutex);
5898 	}
5899 
5900 	if (!page_rec_is_user_rec(rec)) {
5901 
5902 		for (lock = lock_rec_get_first(lock_sys->rec_hash,
5903 					       block, heap_no);
5904 		     lock != NULL;
5905 		     lock = lock_rec_get_next_const(heap_no, lock)) {
5906 
5907 			ut_ad(!trx_is_ac_nl_ro(lock->trx));
5908 
5909 			if (lock_get_wait(lock)) {
5910 				ut_a(lock_rec_has_to_wait_in_queue(lock));
5911 			}
5912 
5913 			if (index != NULL) {
5914 				ut_a(lock->index == index);
5915 			}
5916 		}
5917 
5918 		goto func_exit;
5919 	}
5920 
5921 	if (index == NULL) {
5922 
5923 		/* Nothing we can do */
5924 
5925 	} else if (dict_index_is_clust(index)) {
5926 		trx_id_t	trx_id;
5927 
5928 		/* Unlike the non-debug code, this invariant can only succeed
5929 		if the check and assertion are covered by the lock mutex. */
5930 
5931 		trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
5932 		impl_trx = trx_rw_is_active_low(trx_id, NULL);
5933 
5934 		ut_ad(lock_mutex_own());
5935 		/* impl_trx cannot be committed until lock_mutex_exit()
5936 		because lock_trx_release_locks() acquires lock_sys->mutex */
5937 
5938 		if (impl_trx != NULL) {
5939 			const lock_t*	other_lock
5940 				= lock_rec_other_has_expl_req(
5941 					LOCK_S, block, true, heap_no,
5942 					impl_trx);
5943 
5944 			/* The impl_trx is holding an implicit lock on the
5945 			given record 'rec'. So there cannot be another
5946 			explicit granted lock.  Also, there can be another
5947 			explicit waiting lock only if the impl_trx has an
5948 			explicit granted lock. */
5949 
5950 			if (other_lock != NULL) {
5951 #ifdef WITH_WSREP
5952                           //ut_a(lock_get_wait(other_lock));
5953 				if (!lock_get_wait(other_lock)) {
5954 
5955 			ib::info() << "WSREP impl BF lock conflict for my impl lock:\n BF:" <<
5956                           ((wsrep_thd_is_BF(impl_trx->mysql_thd, FALSE)) ? "BF" : "normal") << " exec: " <<
5957                           wsrep_thd_exec_mode(impl_trx->mysql_thd) << " conflict: " <<
5958                           wsrep_thd_conflict_state(impl_trx->mysql_thd) << " seqno: " <<
5959                           wsrep_thd_trx_seqno(impl_trx->mysql_thd) << " SQL: " <<
5960                           wsrep_thd_query(impl_trx->mysql_thd);
5961                         trx_t* otrx = other_lock->trx;
5962 			ib::info() << "WSREP other lock:\n BF:" <<
5963                           ((wsrep_thd_is_BF(otrx->mysql_thd, FALSE)) ? "BF" : "normal")  << " exec: " <<
5964                           wsrep_thd_exec_mode(otrx->mysql_thd) << " conflict: " <<
5965                           wsrep_thd_conflict_state(otrx->mysql_thd) << " seqno: " <<
5966                           wsrep_thd_trx_seqno(otrx->mysql_thd) << " SQL: " <<
5967                           wsrep_thd_query(otrx->mysql_thd);
5968                                 }
5969 
5970 				//ut_a(lock_rec_has_expl(
5971 				//	LOCK_X | LOCK_REC_NOT_GAP,
5972 				//	block, heap_no, impl_trx));
5973 				if (!lock_rec_has_expl(
5974 					LOCK_X | LOCK_REC_NOT_GAP,
5975 					block, heap_no, impl_trx)) {
5976                                   ib::info() << "WSREP impl BF lock conflict";
5977                                 }
5978 #else
5979 				ut_a(lock_get_wait(other_lock));
5980 				ut_a(lock_rec_has_expl(
5981 					LOCK_X | LOCK_REC_NOT_GAP,
5982 					block, heap_no, impl_trx));
5983 #endif
5984 			}
5985 		}
5986 	}
5987 
5988 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
5989 	     lock != NULL;
5990 	     lock = lock_rec_get_next_const(heap_no, lock)) {
5991 
5992 		ut_ad(!trx_is_ac_nl_ro(lock->trx));
5993 
5994 		if (index) {
5995 			ut_a(lock->index == index);
5996 		}
5997 
5998 		if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
5999 
6000 			lock_mode	mode;
6001 
6002 			if (lock_get_mode(lock) == LOCK_S) {
6003 				mode = LOCK_X;
6004 			} else {
6005 				mode = LOCK_S;
6006 			}
6007 
6008 			const lock_t*	other_lock
6009 				= lock_rec_other_has_expl_req(
6010 					mode, block, false, heap_no,
6011 					lock->trx);
6012 #ifdef WITH_WSREP
6013 			ut_a(!other_lock || wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE) ||
6014 			     wsrep_thd_is_BF(other_lock->trx->mysql_thd, FALSE));
6015 #else
6016 			ut_a(!other_lock);
6017 #endif /* WITH_WSREP */
6018 
6019 		} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
6020 
6021 			ut_a(lock_rec_has_to_wait_in_queue(lock));
6022 		}
6023 	}
6024 
6025 func_exit:
6026 	if (!locked_lock_trx_sys) {
6027 		lock_mutex_exit();
6028 		mutex_exit(&trx_sys->mutex);
6029 	}
6030 
6031 	return(TRUE);
6032 }
6033 
6034 /*********************************************************************//**
6035 Validates the record lock queues on a page.
6036 @return TRUE if ok */
6037 static
6038 ibool
6039 lock_rec_validate_page(
6040 /*===================*/
6041 	const buf_block_t*	block)	/*!< in: buffer block */
6042 {
6043 	const lock_t*	lock;
6044 	const rec_t*	rec;
6045 	ulint		nth_lock	= 0;
6046 	ulint		nth_bit		= 0;
6047 	ulint		i;
6048 	mem_heap_t*	heap		= NULL;
6049 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
6050 	ulint*		offsets		= offsets_;
6051 	rec_offs_init(offsets_);
6052 
6053 	ut_ad(!lock_mutex_own());
6054 
6055 	lock_mutex_enter();
6056 	mutex_enter(&trx_sys->mutex);
6057 loop:
6058 	lock = lock_rec_get_first_on_page_addr(
6059 		lock_sys->rec_hash,
6060 		block->page.id.space(), block->page.id.page_no());
6061 
6062 	if (!lock) {
6063 		goto function_exit;
6064 	}
6065 
6066 	ut_ad(!block->page.file_page_was_freed);
6067 
6068 	for (i = 0; i < nth_lock; i++) {
6069 
6070 		lock = lock_rec_get_next_on_page_const(lock);
6071 
6072 		if (!lock) {
6073 			goto function_exit;
6074 		}
6075 	}
6076 
6077 	ut_ad(!trx_is_ac_nl_ro(lock->trx));
6078 
6079 # ifdef UNIV_DEBUG
6080 	/* Only validate the record queues when this thread is not
6081 	holding a space->latch.  Deadlocks are possible due to
6082 	latching order violation when UNIV_DEBUG is defined while
6083 	UNIV_DEBUG is not. */
6084 	if (!sync_check_find(SYNC_FSP))
6085 # endif /* UNIV_DEBUG */
6086 	for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
6087 
6088 		if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
6089 
6090 			rec = page_find_rec_with_heap_no(block->frame, i);
6091 			ut_a(rec);
6092 			offsets = rec_get_offsets(rec, lock->index, offsets,
6093 						  ULINT_UNDEFINED, &heap);
6094 
6095 			/* If this thread is holding the file space
6096 			latch (fil_space_t::latch), the following
6097 			check WILL break the latching order and may
6098 			cause a deadlock of threads. */
6099 
6100 			lock_rec_queue_validate(
6101 				TRUE, block, rec, lock->index, offsets);
6102 
6103 			nth_bit = i + 1;
6104 
6105 			goto loop;
6106 		}
6107 	}
6108 
6109 	nth_bit = 0;
6110 	nth_lock++;
6111 
6112 	goto loop;
6113 
6114 function_exit:
6115 	lock_mutex_exit();
6116 	mutex_exit(&trx_sys->mutex);
6117 
6118 	if (heap != NULL) {
6119 		mem_heap_free(heap);
6120 	}
6121 	return(TRUE);
6122 }
6123 
6124 /*********************************************************************//**
6125 Validates the table locks.
6126 @return TRUE if ok */
6127 static
6128 ibool
6129 lock_validate_table_locks(
6130 /*======================*/
6131 	const trx_ut_list_t*	trx_list)	/*!< in: trx list */
6132 {
6133 	const trx_t*	trx;
6134 
6135 	ut_ad(lock_mutex_own());
6136 	ut_ad(trx_sys_mutex_own());
6137 
6138 	ut_ad(trx_list == &trx_sys->rw_trx_list);
6139 
6140 	for (trx = UT_LIST_GET_FIRST(*trx_list);
6141 	     trx != NULL;
6142 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
6143 
6144 		const lock_t*	lock;
6145 
6146 		check_trx_state(trx);
6147 
6148 		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
6149 		     lock != NULL;
6150 		     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
6151 
6152 			if (lock_get_type_low(lock) & LOCK_TABLE) {
6153 
6154 				lock_table_queue_validate(
6155 					lock->un_member.tab_lock.table);
6156 			}
6157 		}
6158 	}
6159 
6160 	return(TRUE);
6161 }
6162 
6163 /*********************************************************************//**
6164 Validate record locks up to a limit.
6165 @return lock at limit or NULL if no more locks in the hash bucket */
6166 static MY_ATTRIBUTE((warn_unused_result))
6167 const lock_t*
6168 lock_rec_validate(
6169 /*==============*/
6170 	ulint		start,		/*!< in: lock_sys->rec_hash
6171 					bucket */
6172 	ib_uint64_t*	limit)		/*!< in/out: upper limit of
6173 					(space, page_no) */
6174 {
6175 	ut_ad(lock_mutex_own());
6176 	ut_ad(trx_sys_mutex_own());
6177 
6178 	for (const lock_t* lock = static_cast<const lock_t*>(
6179 			HASH_GET_FIRST(lock_sys->rec_hash, start));
6180 	     lock != NULL;
6181 	     lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock))) {
6182 
6183 		ib_uint64_t	current;
6184 
6185 		ut_ad(!trx_is_ac_nl_ro(lock->trx));
6186 		ut_ad(lock_get_type(lock) == LOCK_REC);
6187 
6188 		current = ut_ull_create(
6189 			lock->un_member.rec_lock.space,
6190 			lock->un_member.rec_lock.page_no);
6191 
6192 		if (current > *limit) {
6193 			*limit = current + 1;
6194 			return(lock);
6195 		}
6196 	}
6197 
6198 	return(0);
6199 }
6200 
6201 /*********************************************************************//**
6202 Validate a record lock's block */
6203 static
6204 void
6205 lock_rec_block_validate(
6206 /*====================*/
6207 	ulint		space_id,
6208 	ulint		page_no)
6209 {
6210 	/* The lock and the block that it is referring to may be freed at
6211 	this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check.
6212 	If the lock exists in lock_rec_validate_page() we assert
6213 	!block->page.file_page_was_freed. */
6214 
6215 	buf_block_t*	block;
6216 	mtr_t		mtr;
6217 
6218 	/* Make sure that the tablespace is not deleted while we are
6219 	trying to access the page. */
6220 	if (fil_space_t* space = fil_space_acquire(space_id)) {
6221 		mtr_start(&mtr);
6222 
6223 		block = buf_page_get_gen(
6224 			page_id_t(space_id, page_no),
6225 			page_size_t(space->flags),
6226 			RW_X_LATCH, NULL,
6227 			BUF_GET_POSSIBLY_FREED,
6228 			__FILE__, __LINE__, &mtr);
6229 
6230 		buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
6231 
6232 		ut_ad(lock_rec_validate_page(block));
6233 		mtr_commit(&mtr);
6234 
6235 		fil_space_release(space);
6236 	}
6237 }
6238 
6239 /*********************************************************************//**
6240 Validates the lock system.
6241 @return TRUE if ok */
6242 static
6243 bool
6244 lock_validate()
6245 /*===========*/
6246 {
6247 	typedef	std::pair<ulint, ulint>		page_addr_t;
6248 	typedef std::set<
6249 		page_addr_t,
6250 		std::less<page_addr_t>,
6251 		ut_allocator<page_addr_t> >	page_addr_set;
6252 
6253 	page_addr_set	pages;
6254 
6255 	lock_mutex_enter();
6256 	mutex_enter(&trx_sys->mutex);
6257 
6258 	ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list));
6259 
6260 	/* Iterate over all the record locks and validate the locks. We
6261 	don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex.
6262 	Release both mutexes during the validation check. */
6263 
6264 	for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
6265 		const lock_t*	lock;
6266 		ib_uint64_t	limit = 0;
6267 
6268 		while ((lock = lock_rec_validate(i, &limit)) != 0) {
6269 
6270 			ulint	space = lock->un_member.rec_lock.space;
6271 			ulint	page_no = lock->un_member.rec_lock.page_no;
6272 
6273 			pages.insert(std::make_pair(space, page_no));
6274 		}
6275 	}
6276 
6277 	mutex_exit(&trx_sys->mutex);
6278 	lock_mutex_exit();
6279 
6280 	for (page_addr_set::const_iterator it = pages.begin();
6281 	     it != pages.end();
6282 	     ++it) {
6283 		lock_rec_block_validate((*it).first, (*it).second);
6284 	}
6285 
6286 	return(true);
6287 }
6288 #endif /* UNIV_DEBUG */
6289 /*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
6290 
6291 /*********************************************************************//**
6292 Checks if locks of other transactions prevent an immediate insert of
6293 a record. If they do, first tests if the query thread should anyway
6294 be suspended for some reason; if not, then puts the transaction and
6295 the query thread to the lock wait state and inserts a waiting request
6296 for a gap x-lock to the lock queue.
6297 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
6298 dberr_t
6299 lock_rec_insert_check_and_lock(
6300 /*===========================*/
6301 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is
6302 				set, does nothing */
6303 	const rec_t*	rec,	/*!< in: record after which to insert */
6304 	buf_block_t*	block,	/*!< in/out: buffer block of rec */
6305 	dict_index_t*	index,	/*!< in: index */
6306 	que_thr_t*	thr,	/*!< in: query thread */
6307 	mtr_t*		mtr,	/*!< in/out: mini-transaction */
6308 	ibool*		inherit)/*!< out: set to TRUE if the new
6309 				inserted record maybe should inherit
6310 				LOCK_GAP type locks from the successor
6311 				record */
6312 {
6313 	ut_ad(block->frame == page_align(rec));
6314 	ut_ad(!dict_index_is_online_ddl(index)
6315 	      || dict_index_is_clust(index)
6316 	      || (flags & BTR_CREATE_FLAG));
6317 	ut_ad(mtr->is_named_space(index->space));
6318 
6319 	if (flags & BTR_NO_LOCKING_FLAG) {
6320 
6321 		return(DB_SUCCESS);
6322 	}
6323 
6324 	ut_ad(!dict_table_is_temporary(index->table));
6325 
6326 	dberr_t		err;
6327 	lock_t*		lock;
6328 	ibool		inherit_in = *inherit;
6329 	trx_t*		trx = thr_get_trx(thr);
6330 	const rec_t*	next_rec = page_rec_get_next_const(rec);
6331 	ulint		heap_no = page_rec_get_heap_no(next_rec);
6332 
6333 	lock_mutex_enter();
6334 	/* Because this code is invoked for a running transaction by
6335 	the thread that is serving the transaction, it is not necessary
6336 	to hold trx->mutex here. */
6337 
6338 	/* When inserting a record into an index, the table must be at
6339 	least IX-locked. When we are building an index, we would pass
6340 	BTR_NO_LOCKING_FLAG and skip the locking altogether. */
6341 	ut_ad(lock_table_has(trx, index->table, LOCK_IX));
6342 
6343 	lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
6344 
6345 	if (lock == NULL) {
6346 		/* We optimize CPU time usage in the simplest case */
6347 
6348 		lock_mutex_exit();
6349 
6350 		if (inherit_in && !dict_index_is_clust(index)) {
6351 			/* Update the page max trx id field */
6352 			page_update_max_trx_id(block,
6353 					       buf_block_get_page_zip(block),
6354 					       trx->id, mtr);
6355 		}
6356 
6357 		*inherit = FALSE;
6358 
6359 		return(DB_SUCCESS);
6360 	}
6361 
6362 	/* Spatial index does not use GAP lock protection. It uses
6363 	"predicate lock" to protect the "range" */
6364 	if (dict_index_is_spatial(index)) {
6365 		return(DB_SUCCESS);
6366 	}
6367 
6368 	*inherit = TRUE;
6369 
6370 	/* If another transaction has an explicit lock request which locks
6371 	the gap, waiting or granted, on the successor, the insert has to wait.
6372 
6373 	An exception is the case where the lock by the another transaction
6374 	is a gap type lock which it placed to wait for its turn to insert. We
6375 	do not consider that kind of a lock conflicting with our insert. This
6376 	eliminates an unnecessary deadlock which resulted when 2 transactions
6377 	had to wait for their insert. Both had waiting gap type lock requests
6378 	on the successor, which produced an unnecessary deadlock. */
6379 
6380 	const ulint	type_mode = LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION;
6381 
6382 #ifdef WITH_WSREP
6383 	lock_t*	wait_for = (lock_t *)lock_rec_other_has_conflicting(
6384 #else
6385 	const lock_t*	wait_for = lock_rec_other_has_conflicting(
6386 #endif
6387 				type_mode, block, heap_no, trx);
6388 
6389 
6390 	if (wait_for != NULL) {
6391 
6392 		RecLock	rec_lock(thr, index, block, heap_no, type_mode);
6393 
6394 		trx_mutex_enter(trx);
6395 
6396 		err = rec_lock.add_to_waitq(wait_for);
6397 
6398 		trx_mutex_exit(trx);
6399 
6400 	} else {
6401 		err = DB_SUCCESS;
6402 	}
6403 
6404 	lock_mutex_exit();
6405 
6406 	switch (err) {
6407 	case DB_SUCCESS_LOCKED_REC:
6408 		err = DB_SUCCESS;
6409 		/* fall through */
6410 	case DB_SUCCESS:
6411 		if (!inherit_in || dict_index_is_clust(index)) {
6412 			break;
6413 		}
6414 
6415 		/* Update the page max trx id field */
6416 		page_update_max_trx_id(
6417 			block, buf_block_get_page_zip(block), trx->id, mtr);
6418 	default:
6419 		/* We only care about the two return values. */
6420 		break;
6421 	}
6422 
6423 #ifdef UNIV_DEBUG
6424 	{
6425 		mem_heap_t*	heap		= NULL;
6426 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
6427 		const ulint*	offsets;
6428 		rec_offs_init(offsets_);
6429 
6430 		offsets = rec_get_offsets(next_rec, index, offsets_,
6431 					  ULINT_UNDEFINED, &heap);
6432 
6433 		ut_ad(lock_rec_queue_validate(
6434 				FALSE, block, next_rec, index, offsets));
6435 
6436 		if (heap != NULL) {
6437 			mem_heap_free(heap);
6438 		}
6439 	}
6440 #endif /* UNIV_DEBUG */
6441 
6442 	return(err);
6443 }
6444 
6445 /*********************************************************************//**
6446 Creates an explicit record lock for a running transaction that currently only
6447 has an implicit lock on the record. The transaction instance must have a
6448 reference count > 0 so that it can't be committed and freed before this
6449 function has completed. */
6450 static
6451 void
6452 lock_rec_convert_impl_to_expl_for_trx(
6453 /*==================================*/
6454 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6455 	const rec_t*		rec,	/*!< in: user record on page */
6456 	dict_index_t*		index,	/*!< in: index of record */
6457 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6458 	trx_t*			trx,	/*!< in/out: active transaction */
6459 	ulint			heap_no)/*!< in: rec heap number to lock */
6460 {
6461 	ut_ad(trx_is_referenced(trx));
6462 
6463 	DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
6464 
6465 	lock_mutex_enter();
6466 
6467 	ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
6468 
6469 	if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
6470 	    && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
6471 				  block, heap_no, trx)) {
6472 
6473 		ulint	type_mode;
6474 
6475 		type_mode = (LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP);
6476 
6477 		lock_rec_add_to_queue(
6478 			type_mode, block, heap_no, index, trx, FALSE);
6479 	}
6480 
6481 	lock_mutex_exit();
6482 
6483 	trx_release_reference(trx);
6484 
6485 	DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx");
6486 }
6487 
6488 /*********************************************************************//**
6489 If a transaction has an implicit x-lock on a record, but no explicit x-lock
6490 set on the record, sets one for it. */
6491 static
6492 void
6493 lock_rec_convert_impl_to_expl(
6494 /*==========================*/
6495 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6496 	const rec_t*		rec,	/*!< in: user record on page */
6497 	dict_index_t*		index,	/*!< in: index of record */
6498 	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
6499 {
6500 	trx_t*		trx;
6501 
6502 	ut_ad(!lock_mutex_own());
6503 	ut_ad(page_rec_is_user_rec(rec));
6504 	ut_ad(rec_offs_validate(rec, index, offsets));
6505 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
6506 
6507 	if (dict_index_is_clust(index)) {
6508 		trx_id_t	trx_id;
6509 
6510 		trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
6511 
6512 		trx = trx_rw_is_active(trx_id, NULL, true);
6513 	} else {
6514 		ut_ad(!dict_index_is_online_ddl(index));
6515 
6516 		trx = lock_sec_rec_some_has_impl(rec, index, offsets);
6517 
6518 		ut_ad(!trx || !lock_rec_other_trx_holds_expl(
6519 				LOCK_S | LOCK_REC_NOT_GAP, trx, rec, block));
6520 	}
6521 
6522 	if (trx != 0) {
6523 		ulint	heap_no = page_rec_get_heap_no(rec);
6524 
6525 		ut_ad(trx_is_referenced(trx));
6526 
6527 		/* If the transaction is still active and has no
6528 		explicit x-lock set on the record, set one for it.
6529 		trx cannot be committed until the ref count is zero. */
6530 
6531 		lock_rec_convert_impl_to_expl_for_trx(
6532 			block, rec, index, offsets, trx, heap_no);
6533 	}
6534 }
6535 
6536 void
6537 lock_rec_convert_active_impl_to_expl(
6538 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6539 	const rec_t*		rec,	/*!< in: user record on page */
6540 	dict_index_t*		index,	/*!< in: index of record */
6541 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6542 	trx_t*			trx,	/*!< in/out: active transaction */
6543 	ulint			heap_no)/*!< in: rec heap number to lock */
6544 {
6545 	trx_reference(trx, true);
6546 	lock_rec_convert_impl_to_expl_for_trx(block, rec, index, offsets,
6547 					      trx, heap_no);
6548 }
6549 /*********************************************************************//**
6550 Checks if locks of other transactions prevent an immediate modify (update,
6551 delete mark, or delete unmark) of a clustered index record. If they do,
6552 first tests if the query thread should anyway be suspended for some
6553 reason; if not, then puts the transaction and the query thread to the
6554 lock wait state and inserts a waiting request for a record x-lock to the
6555 lock queue.
6556 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
6557 dberr_t
6558 lock_clust_rec_modify_check_and_lock(
6559 /*=================================*/
6560 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6561 					bit is set, does nothing */
6562 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6563 	const rec_t*		rec,	/*!< in: record which should be
6564 					modified */
6565 	dict_index_t*		index,	/*!< in: clustered index */
6566 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6567 	que_thr_t*		thr)	/*!< in: query thread */
6568 {
6569 	dberr_t	err;
6570 	ulint	heap_no;
6571 
6572 	ut_ad(rec_offs_validate(rec, index, offsets));
6573 	ut_ad(dict_index_is_clust(index));
6574 	ut_ad(block->frame == page_align(rec));
6575 
6576 	if (flags & BTR_NO_LOCKING_FLAG) {
6577 
6578 		return(DB_SUCCESS);
6579 	}
6580 	ut_ad(!dict_table_is_temporary(index->table));
6581 
6582 	heap_no = rec_offs_comp(offsets)
6583 		? rec_get_heap_no_new(rec)
6584 		: rec_get_heap_no_old(rec);
6585 
6586 	/* If a transaction has no explicit x-lock set on the record, set one
6587 	for it */
6588 
6589 	lock_rec_convert_impl_to_expl(block, rec, index, offsets);
6590 
6591 	lock_mutex_enter();
6592 
6593 	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
6594 
6595 	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
6596 			    block, heap_no, index, thr);
6597 
6598 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
6599 
6600 	lock_mutex_exit();
6601 
6602 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
6603 
6604 	if (err == DB_SUCCESS_LOCKED_REC) {
6605 		err = DB_SUCCESS;
6606 	}
6607 
6608 	return(err);
6609 }
6610 
6611 /*********************************************************************//**
6612 Checks if locks of other transactions prevent an immediate modify (delete
6613 mark or delete unmark) of a secondary index record.
6614 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
6615 dberr_t
6616 lock_sec_rec_modify_check_and_lock(
6617 /*===============================*/
6618 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6619 				bit is set, does nothing */
6620 	buf_block_t*	block,	/*!< in/out: buffer block of rec */
6621 	const rec_t*	rec,	/*!< in: record which should be
6622 				modified; NOTE: as this is a secondary
6623 				index, we always have to modify the
6624 				clustered index record first: see the
6625 				comment below */
6626 	dict_index_t*	index,	/*!< in: secondary index */
6627 	que_thr_t*	thr,	/*!< in: query thread
6628 				(can be NULL if BTR_NO_LOCKING_FLAG) */
6629 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
6630 {
6631 	dberr_t	err;
6632 	ulint	heap_no;
6633 
6634 	ut_ad(!dict_index_is_clust(index));
6635 	ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
6636 	ut_ad(block->frame == page_align(rec));
6637 	ut_ad(mtr->is_named_space(index->space));
6638 
6639 	if (flags & BTR_NO_LOCKING_FLAG) {
6640 
6641 		return(DB_SUCCESS);
6642 	}
6643 	ut_ad(!dict_table_is_temporary(index->table));
6644 
6645 	heap_no = page_rec_get_heap_no(rec);
6646 
6647 	/* Another transaction cannot have an implicit lock on the record,
6648 	because when we come here, we already have modified the clustered
6649 	index record, and this would not have been possible if another active
6650 	transaction had modified this secondary index record. */
6651 
6652 	lock_mutex_enter();
6653 
6654 	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
6655 
6656 	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
6657 			    block, heap_no, index, thr);
6658 
6659 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
6660 
6661 	lock_mutex_exit();
6662 
6663 #ifdef UNIV_DEBUG
6664 	{
6665 		mem_heap_t*	heap		= NULL;
6666 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
6667 		const ulint*	offsets;
6668 		rec_offs_init(offsets_);
6669 
6670 		offsets = rec_get_offsets(rec, index, offsets_,
6671 					  ULINT_UNDEFINED, &heap);
6672 
6673 		ut_ad(lock_rec_queue_validate(
6674 			FALSE, block, rec, index, offsets));
6675 
6676 		if (heap != NULL) {
6677 			mem_heap_free(heap);
6678 		}
6679 	}
6680 #endif /* UNIV_DEBUG */
6681 
6682 	if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) {
6683 		/* Update the page max trx id field */
6684 		/* It might not be necessary to do this if
6685 		err == DB_SUCCESS (no new lock created),
6686 		but it should not cost too much performance. */
6687 		page_update_max_trx_id(block,
6688 				       buf_block_get_page_zip(block),
6689 				       thr_get_trx(thr)->id, mtr);
6690 		err = DB_SUCCESS;
6691 	}
6692 
6693 	return(err);
6694 }
6695 
6696 /*********************************************************************//**
6697 Like lock_clust_rec_read_check_and_lock(), but reads a
6698 secondary index record.
6699 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
6700 or DB_QUE_THR_SUSPENDED */
6701 dberr_t
6702 lock_sec_rec_read_check_and_lock(
6703 /*=============================*/
6704 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6705 					bit is set, does nothing */
6706 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6707 	const rec_t*		rec,	/*!< in: user record or page
6708 					supremum record which should
6709 					be read or passed over by a
6710 					read cursor */
6711 	dict_index_t*		index,	/*!< in: secondary index */
6712 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6713 	lock_mode		mode,	/*!< in: mode of the lock which
6714 					the read cursor should set on
6715 					records: LOCK_S or LOCK_X; the
6716 					latter is possible in
6717 					SELECT FOR UPDATE */
6718 	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
6719 					LOCK_REC_NOT_GAP */
6720 	que_thr_t*		thr)	/*!< in: query thread */
6721 {
6722 	dberr_t	err;
6723 	ulint	heap_no;
6724 
6725 	ut_ad(!dict_index_is_clust(index));
6726 	ut_ad(!dict_index_is_online_ddl(index));
6727 	ut_ad(block->frame == page_align(rec));
6728 	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
6729 	ut_ad(rec_offs_validate(rec, index, offsets));
6730 	ut_ad(mode == LOCK_X || mode == LOCK_S);
6731 
6732 	if ((flags & BTR_NO_LOCKING_FLAG)
6733 	    || srv_read_only_mode
6734 	    || dict_table_is_temporary(index->table)) {
6735 
6736 		return(DB_SUCCESS);
6737 	}
6738 
6739 	heap_no = page_rec_get_heap_no(rec);
6740 
6741 	/* Some transaction may have an implicit x-lock on the record only
6742 	if the max trx id for the page >= min trx id for the trx list or a
6743 	database recovery is running. */
6744 
6745 	if ((page_get_max_trx_id(block->frame) >= trx_rw_min_trx_id()
6746 	     || recv_recovery_is_on())
6747 	    && !page_rec_is_supremum(rec)) {
6748 
6749 		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
6750 	}
6751 
6752 	lock_mutex_enter();
6753 
6754 	ut_ad(mode != LOCK_X
6755 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
6756 	ut_ad(mode != LOCK_S
6757 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
6758 
6759 	err = lock_rec_lock(FALSE, mode | gap_mode,
6760 			    block, heap_no, index, thr);
6761 
6762 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
6763 
6764 	lock_mutex_exit();
6765 
6766 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
6767 
6768 	return(err);
6769 }
6770 
6771 /*********************************************************************//**
6772 Checks if locks of other transactions prevent an immediate read, or passing
6773 over by a read cursor, of a clustered index record. If they do, first tests
6774 if the query thread should anyway be suspended for some reason; if not, then
6775 puts the transaction and the query thread to the lock wait state and inserts a
6776 waiting request for a record lock to the lock queue. Sets the requested mode
6777 lock on the record.
6778 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
6779 or DB_QUE_THR_SUSPENDED */
6780 dberr_t
6781 lock_clust_rec_read_check_and_lock(
6782 /*===============================*/
6783 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6784 					bit is set, does nothing */
6785 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6786 	const rec_t*		rec,	/*!< in: user record or page
6787 					supremum record which should
6788 					be read or passed over by a
6789 					read cursor */
6790 	dict_index_t*		index,	/*!< in: clustered index */
6791 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6792 	lock_mode		mode,	/*!< in: mode of the lock which
6793 					the read cursor should set on
6794 					records: LOCK_S or LOCK_X; the
6795 					latter is possible in
6796 					SELECT FOR UPDATE */
6797 	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
6798 					LOCK_REC_NOT_GAP */
6799 	que_thr_t*		thr)	/*!< in: query thread */
6800 {
6801 	dberr_t	err;
6802 	ulint	heap_no;
6803 
6804 	ut_ad(dict_index_is_clust(index));
6805 	ut_ad(block->frame == page_align(rec));
6806 	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
6807 	ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
6808 	      || gap_mode == LOCK_REC_NOT_GAP);
6809 	ut_ad(rec_offs_validate(rec, index, offsets));
6810 
6811 	if ((flags & BTR_NO_LOCKING_FLAG)
6812 	    || srv_read_only_mode
6813 	    || dict_table_is_temporary(index->table)) {
6814 
6815 		return(DB_SUCCESS);
6816 	}
6817 
6818 	heap_no = page_rec_get_heap_no(rec);
6819 
6820 	if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
6821 
6822 		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
6823 	}
6824 
6825 	lock_mutex_enter();
6826 
6827 	ut_ad(mode != LOCK_X
6828 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
6829 	ut_ad(mode != LOCK_S
6830 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
6831 
6832 	err = lock_rec_lock(FALSE, mode | gap_mode, block, heap_no, index, thr);
6833 
6834 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
6835 
6836 	lock_mutex_exit();
6837 
6838 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
6839 
6840 	DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock");
6841 
6842 	return(err);
6843 }
6844 /*********************************************************************//**
6845 Checks if locks of other transactions prevent an immediate read, or passing
6846 over by a read cursor, of a clustered index record. If they do, first tests
6847 if the query thread should anyway be suspended for some reason; if not, then
6848 puts the transaction and the query thread to the lock wait state and inserts a
6849 waiting request for a record lock to the lock queue. Sets the requested mode
6850 lock on the record. This is an alternative version of
6851 lock_clust_rec_read_check_and_lock() that does not require the parameter
6852 "offsets".
6853 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
6854 dberr_t
6855 lock_clust_rec_read_check_and_lock_alt(
6856 /*===================================*/
6857 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6858 					bit is set, does nothing */
6859 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6860 	const rec_t*		rec,	/*!< in: user record or page
6861 					supremum record which should
6862 					be read or passed over by a
6863 					read cursor */
6864 	dict_index_t*		index,	/*!< in: clustered index */
6865 	lock_mode		mode,	/*!< in: mode of the lock which
6866 					the read cursor should set on
6867 					records: LOCK_S or LOCK_X; the
6868 					latter is possible in
6869 					SELECT FOR UPDATE */
6870 	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
6871 					LOCK_REC_NOT_GAP */
6872 	que_thr_t*		thr)	/*!< in: query thread */
6873 {
6874 	mem_heap_t*	tmp_heap	= NULL;
6875 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
6876 	ulint*		offsets		= offsets_;
6877 	dberr_t		err;
6878 	rec_offs_init(offsets_);
6879 
6880 	offsets = rec_get_offsets(rec, index, offsets,
6881 				  ULINT_UNDEFINED, &tmp_heap);
6882 	err = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
6883 						 offsets, mode, gap_mode, thr);
6884 	if (tmp_heap) {
6885 		mem_heap_free(tmp_heap);
6886 	}
6887 
6888 	if (err == DB_SUCCESS_LOCKED_REC) {
6889 		err = DB_SUCCESS;
6890 	}
6891 
6892 	return(err);
6893 }
6894 
6895 /*******************************************************************//**
6896 Release the last lock from the transaction's autoinc locks. */
6897 UNIV_INLINE
6898 void
6899 lock_release_autoinc_last_lock(
6900 /*===========================*/
6901 	ib_vector_t*	autoinc_locks)	/*!< in/out: vector of AUTOINC locks */
6902 {
6903 	ulint		last;
6904 	lock_t*		lock;
6905 
6906 	ut_ad(lock_mutex_own());
6907 	ut_a(!ib_vector_is_empty(autoinc_locks));
6908 
6909 	/* The lock to be release must be the last lock acquired. */
6910 	last = ib_vector_size(autoinc_locks) - 1;
6911 	lock = *static_cast<lock_t**>(ib_vector_get(autoinc_locks, last));
6912 
6913 	/* Should have only AUTOINC locks in the vector. */
6914 	ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
6915 	ut_a(lock_get_type(lock) == LOCK_TABLE);
6916 
6917 	ut_a(lock->un_member.tab_lock.table != NULL);
6918 
6919 	/* This will remove the lock from the trx autoinc_locks too. */
6920 	lock_table_dequeue(lock);
6921 
6922 	/* Remove from the table vector too. */
6923 	lock_trx_table_locks_remove(lock);
6924 }
6925 
6926 /*******************************************************************//**
6927 Check if a transaction holds any autoinc locks.
6928 @return TRUE if the transaction holds any AUTOINC locks. */
6929 static
6930 ibool
6931 lock_trx_holds_autoinc_locks(
6932 /*=========================*/
6933 	const trx_t*	trx)		/*!< in: transaction */
6934 {
6935 	ut_a(trx->autoinc_locks != NULL);
6936 
6937 	return(!ib_vector_is_empty(trx->autoinc_locks));
6938 }
6939 
6940 /*******************************************************************//**
6941 Release all the transaction's autoinc locks. */
6942 static
6943 void
6944 lock_release_autoinc_locks(
6945 /*=======================*/
6946 	trx_t*		trx)		/*!< in/out: transaction */
6947 {
6948 	ut_ad(lock_mutex_own());
6949 	/* If this is invoked for a running transaction by the thread
6950 	that is serving the transaction, then it is not necessary to
6951 	hold trx->mutex here. */
6952 
6953 	ut_a(trx->autoinc_locks != NULL);
6954 
6955 	/* We release the locks in the reverse order. This is to
6956 	avoid searching the vector for the element to delete at
6957 	the lower level. See (lock_table_remove_low()) for details. */
6958 	while (!ib_vector_is_empty(trx->autoinc_locks)) {
6959 
6960 		/* lock_table_remove_low() will also remove the lock from
6961 		the transaction's autoinc_locks vector. */
6962 		lock_release_autoinc_last_lock(trx->autoinc_locks);
6963 	}
6964 
6965 	/* Should release all locks. */
6966 	ut_a(ib_vector_is_empty(trx->autoinc_locks));
6967 }
6968 
6969 /*******************************************************************//**
6970 Gets the type of a lock. Non-inline version for using outside of the
6971 lock module.
6972 @return LOCK_TABLE or LOCK_REC */
6973 ulint
6974 lock_get_type(
6975 /*==========*/
6976 	const lock_t*	lock)	/*!< in: lock */
6977 {
6978 	return(lock_get_type_low(lock));
6979 }
6980 
6981 /*******************************************************************//**
6982 Gets the id of the transaction owning a lock.
6983 @return transaction id */
6984 trx_id_t
6985 lock_get_trx_id(
6986 /*============*/
6987 	const lock_t*	lock)	/*!< in: lock */
6988 {
6989 	return(trx_get_id_for_print(lock->trx));
6990 }
6991 
6992 /*******************************************************************//**
6993 Gets the mode of a lock in a human readable string.
6994 The string should not be free()'d or modified.
6995 @return lock mode */
6996 const char*
6997 lock_get_mode_str(
6998 /*==============*/
6999 	const lock_t*	lock)	/*!< in: lock */
7000 {
7001 	ibool	is_gap_lock;
7002 
7003 	is_gap_lock = lock_get_type_low(lock) == LOCK_REC
7004 		&& lock_rec_get_gap(lock);
7005 
7006 	switch (lock_get_mode(lock)) {
7007 	case LOCK_S:
7008 		if (is_gap_lock) {
7009 			return("S,GAP");
7010 		} else {
7011 			return("S");
7012 		}
7013 	case LOCK_X:
7014 		if (is_gap_lock) {
7015 			return("X,GAP");
7016 		} else {
7017 			return("X");
7018 		}
7019 	case LOCK_IS:
7020 		if (is_gap_lock) {
7021 			return("IS,GAP");
7022 		} else {
7023 			return("IS");
7024 		}
7025 	case LOCK_IX:
7026 		if (is_gap_lock) {
7027 			return("IX,GAP");
7028 		} else {
7029 			return("IX");
7030 		}
7031 	case LOCK_AUTO_INC:
7032 		return("AUTO_INC");
7033 	default:
7034 		return("UNKNOWN");
7035 	}
7036 }
7037 
7038 /*******************************************************************//**
7039 Gets the type of a lock in a human readable string.
7040 The string should not be free()'d or modified.
7041 @return lock type */
7042 const char*
7043 lock_get_type_str(
7044 /*==============*/
7045 	const lock_t*	lock)	/*!< in: lock */
7046 {
7047 	switch (lock_get_type_low(lock)) {
7048 	case LOCK_REC:
7049 		return("RECORD");
7050 	case LOCK_TABLE:
7051 		return("TABLE");
7052 	default:
7053 		return("UNKNOWN");
7054 	}
7055 }
7056 
7057 /*******************************************************************//**
7058 Gets the table on which the lock is.
7059 @return table */
7060 UNIV_INLINE
7061 dict_table_t*
7062 lock_get_table(
7063 /*===========*/
7064 	const lock_t*	lock)	/*!< in: lock */
7065 {
7066 	switch (lock_get_type_low(lock)) {
7067 	case LOCK_REC:
7068 		ut_ad(dict_index_is_clust(lock->index)
7069 		      || !dict_index_is_online_ddl(lock->index));
7070 		return(lock->index->table);
7071 	case LOCK_TABLE:
7072 		return(lock->un_member.tab_lock.table);
7073 	default:
7074 		ut_error;
7075 		return(NULL);
7076 	}
7077 }
7078 
7079 /*******************************************************************//**
7080 Gets the id of the table on which the lock is.
7081 @return id of the table */
7082 table_id_t
7083 lock_get_table_id(
7084 /*==============*/
7085 	const lock_t*	lock)	/*!< in: lock */
7086 {
7087 	dict_table_t*	table;
7088 
7089 	table = lock_get_table(lock);
7090 
7091 	return(table->id);
7092 }
7093 
7094 /** Determine which table a lock is associated with.
7095 @param[in]	lock	the lock
7096 @return name of the table */
7097 const table_name_t&
7098 lock_get_table_name(
7099 	const lock_t*	lock)
7100 {
7101 	return(lock_get_table(lock)->name);
7102 }
7103 
7104 /*******************************************************************//**
7105 For a record lock, gets the index on which the lock is.
7106 @return index */
7107 const dict_index_t*
7108 lock_rec_get_index(
7109 /*===============*/
7110 	const lock_t*	lock)	/*!< in: lock */
7111 {
7112 	ut_a(lock_get_type_low(lock) == LOCK_REC);
7113 	ut_ad(dict_index_is_clust(lock->index)
7114 	      || !dict_index_is_online_ddl(lock->index));
7115 
7116 	return(lock->index);
7117 }
7118 
7119 /*******************************************************************//**
7120 For a record lock, gets the name of the index on which the lock is.
7121 The string should not be free()'d or modified.
7122 @return name of the index */
7123 const char*
7124 lock_rec_get_index_name(
7125 /*====================*/
7126 	const lock_t*	lock)	/*!< in: lock */
7127 {
7128 	ut_a(lock_get_type_low(lock) == LOCK_REC);
7129 	ut_ad(dict_index_is_clust(lock->index)
7130 	      || !dict_index_is_online_ddl(lock->index));
7131 
7132 	return(lock->index->name);
7133 }
7134 
7135 /*******************************************************************//**
7136 For a record lock, gets the tablespace number on which the lock is.
7137 @return tablespace number */
7138 ulint
7139 lock_rec_get_space_id(
7140 /*==================*/
7141 	const lock_t*	lock)	/*!< in: lock */
7142 {
7143 	ut_a(lock_get_type_low(lock) == LOCK_REC);
7144 
7145 	return(lock->un_member.rec_lock.space);
7146 }
7147 
7148 /*******************************************************************//**
7149 For a record lock, gets the page number on which the lock is.
7150 @return page number */
7151 ulint
7152 lock_rec_get_page_no(
7153 /*=================*/
7154 	const lock_t*	lock)	/*!< in: lock */
7155 {
7156 	ut_a(lock_get_type_low(lock) == LOCK_REC);
7157 
7158 	return(lock->un_member.rec_lock.page_no);
7159 }
7160 
7161 /*********************************************************************//**
7162 Cancels a waiting lock request and releases possible other transactions
7163 waiting behind it. */
7164 void
7165 lock_cancel_waiting_and_release(
7166 /*============================*/
7167 	lock_t*	lock)	/*!< in/out: waiting lock request */
7168 {
7169 	que_thr_t*	thr;
7170 
7171 	ut_ad(lock_mutex_own());
7172 	ut_ad(trx_mutex_own(lock->trx));
7173 
7174 	lock->trx->lock.cancel = true;
7175 
7176 	if (lock_get_type_low(lock) == LOCK_REC) {
7177 
7178 		lock_rec_dequeue_from_page(lock);
7179 	} else {
7180 		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
7181 
7182 		if (lock->trx->autoinc_locks != NULL) {
7183 			/* Release the transaction's AUTOINC locks. */
7184 			lock_release_autoinc_locks(lock->trx);
7185 		}
7186 
7187 		lock_table_dequeue(lock);
7188 	}
7189 
7190 	/* Reset the wait flag and the back pointer to lock in trx. */
7191 
7192 	lock_reset_lock_and_trx_wait(lock);
7193 
7194 	/* The following function releases the trx from lock wait. */
7195 
7196 	thr = que_thr_end_lock_wait(lock->trx);
7197 
7198 	if (thr != NULL) {
7199 		lock_wait_release_thread_if_suspended(thr);
7200 	}
7201 
7202 	lock->trx->lock.cancel = false;
7203 }
7204 
7205 /*********************************************************************//**
7206 Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
7207 function should be called at the the end of an SQL statement, by the
7208 connection thread that owns the transaction (trx->mysql_thd). */
7209 void
7210 lock_unlock_table_autoinc(
7211 /*======================*/
7212 	trx_t*	trx)	/*!< in/out: transaction */
7213 {
7214 	ut_ad(!lock_mutex_own());
7215 	ut_ad(!trx_mutex_own(trx));
7216 	ut_ad(!trx->lock.wait_lock);
7217 
7218 	/* This can be invoked on NOT_STARTED, ACTIVE, PREPARED,
7219 	but not COMMITTED transactions. */
7220 
7221 	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)
7222 	      || trx_state_eq(trx, TRX_STATE_FORCED_ROLLBACK)
7223 	      || !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
7224 
7225 	/* This function is invoked for a running transaction by the
7226 	thread that is serving the transaction. Therefore it is not
7227 	necessary to hold trx->mutex here. */
7228 
7229 	if (lock_trx_holds_autoinc_locks(trx)) {
7230 		lock_mutex_enter();
7231 
7232 		lock_release_autoinc_locks(trx);
7233 
7234 		lock_mutex_exit();
7235 	}
7236 }
7237 
7238 /*********************************************************************//**
7239 Releases a transaction's locks, and releases possible other transactions
7240 waiting because of these locks. Change the state of the transaction to
7241 TRX_STATE_COMMITTED_IN_MEMORY. */
7242 void
7243 lock_trx_release_locks(
7244 /*===================*/
7245 	trx_t*	trx)	/*!< in/out: transaction */
7246 {
7247 	check_trx_state(trx);
7248 
7249 	if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
7250 
7251 		mutex_enter(&trx_sys->mutex);
7252 
7253 		ut_a(trx_sys->n_prepared_trx > 0);
7254 		--trx_sys->n_prepared_trx;
7255 
7256 		if (trx->is_recovered) {
7257 			ut_a(trx_sys->n_prepared_recovered_trx > 0);
7258 			trx_sys->n_prepared_recovered_trx--;
7259 		}
7260 
7261 		mutex_exit(&trx_sys->mutex);
7262 	} else {
7263 		ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
7264 	}
7265 
7266 	bool	release_lock;
7267 
7268 	release_lock = (UT_LIST_GET_LEN(trx->lock.trx_locks) > 0);
7269 
7270 	/* Don't take lock_sys mutex if trx didn't acquire any lock. */
7271 	if (release_lock) {
7272 
7273 		/* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY
7274 		is protected by both the lock_sys->mutex and the trx->mutex. */
7275 		lock_mutex_enter();
7276 	}
7277 
7278 	trx_mutex_enter(trx);
7279 
7280 	/* The following assignment makes the transaction committed in memory
7281 	and makes its changes to data visible to other transactions.
7282 	NOTE that there is a small discrepancy from the strict formal
7283 	visibility rules here: a human user of the database can see
7284 	modifications made by another transaction T even before the necessary
7285 	log segment has been flushed to the disk. If the database happens to
7286 	crash before the flush, the user has seen modifications from T which
7287 	will never be a committed transaction. However, any transaction T2
7288 	which sees the modifications of the committing transaction T, and
7289 	which also itself makes modifications to the database, will get an lsn
7290 	larger than the committing transaction T. In the case where the log
7291 	flush fails, and T never gets committed, also T2 will never get
7292 	committed. */
7293 
7294 	/*--------------------------------------*/
7295 	trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
7296 	/*--------------------------------------*/
7297 
7298 	if (trx_is_referenced(trx)) {
7299 
7300 		ut_a(release_lock);
7301 
7302 		lock_mutex_exit();
7303 
7304 		while (trx_is_referenced(trx)) {
7305 
7306 			trx_mutex_exit(trx);
7307 
7308 			DEBUG_SYNC_C("waiting_trx_is_not_referenced");
7309 
7310 			/** Doing an implicit to explicit conversion
7311 			should not be expensive. */
7312 			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
7313 
7314 			trx_mutex_enter(trx);
7315 		}
7316 
7317 		trx_mutex_exit(trx);
7318 
7319 		lock_mutex_enter();
7320 
7321 		trx_mutex_enter(trx);
7322 	}
7323 
7324 	ut_ad(!trx_is_referenced(trx));
7325 
7326 	/* If the background thread trx_rollback_or_clean_recovered()
7327 	is still active then there is a chance that the rollback
7328 	thread may see this trx as COMMITTED_IN_MEMORY and goes ahead
7329 	to clean it up calling trx_cleanup_at_db_startup(). This can
7330 	happen in the case we are committing a trx here that is left
7331 	in PREPARED state during the crash. Note that commit of the
7332 	rollback of a PREPARED trx happens in the recovery thread
7333 	while the rollback of other transactions happen in the
7334 	background thread. To avoid this race we unconditionally unset
7335 	the is_recovered flag. */
7336 
7337 	trx->is_recovered = false;
7338 
7339 	trx_mutex_exit(trx);
7340 
7341 	if (release_lock) {
7342 
7343 		lock_release(trx);
7344 
7345 		lock_mutex_exit();
7346 	}
7347 
7348 	trx->lock.n_rec_locks = 0;
7349 
7350 	/* We don't remove the locks one by one from the vector for
7351 	efficiency reasons. We simply reset it because we would have
7352 	released all the locks anyway. */
7353 
7354 	trx->lock.table_locks.clear();
7355 
7356 	ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
7357 	ut_a(ib_vector_is_empty(trx->autoinc_locks));
7358 	ut_a(trx->lock.table_locks.empty());
7359 
7360 	mem_heap_empty(trx->lock.lock_heap);
7361 }
7362 
7363 /*********************************************************************//**
7364 Check whether the transaction has already been rolled back because it
7365 was selected as a deadlock victim, or if it has to wait then cancel
7366 the wait lock.
7367 @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
7368 dberr_t
7369 lock_trx_handle_wait(
7370 /*=================*/
7371 	trx_t*	trx)	/*!< in/out: trx lock state */
7372 {
7373 	dberr_t	err;
7374 
7375 #ifdef WITH_WSREP
7376         if (trx->wsrep_killed_by_query == 0) {
7377 #endif /* WITH_WSREP */
7378 	lock_mutex_enter();
7379 
7380 	trx_mutex_enter(trx);
7381 #ifdef WITH_WSREP
7382         }
7383 #endif /* WITH_WSREP */
7384 
7385 	if (trx->lock.was_chosen_as_deadlock_victim) {
7386 		err = DB_DEADLOCK;
7387 	} else if (trx->lock.wait_lock != NULL) {
7388 		lock_cancel_waiting_and_release(trx->lock.wait_lock);
7389 		err = DB_LOCK_WAIT;
7390 	} else {
7391 		/* The lock was probably granted before we got here. */
7392 		err = DB_SUCCESS;
7393 	}
7394 
7395 #ifdef WITH_WSREP
7396         if (trx->wsrep_killed_by_query == 0) {
7397 #endif /* WITH_WSREP */
7398 	lock_mutex_exit();
7399 
7400 	trx_mutex_exit(trx);
7401 #ifdef WITH_WSREP
7402         }
7403 #endif /* WITH_WSREP */
7404 
7405 	return(err);
7406 }
7407 
7408 /*********************************************************************//**
7409 Get the number of locks on a table.
7410 @return number of locks */
7411 ulint
7412 lock_table_get_n_locks(
7413 /*===================*/
7414 	const dict_table_t*	table)	/*!< in: table */
7415 {
7416 	ulint		n_table_locks;
7417 
7418 	lock_mutex_enter();
7419 
7420 	n_table_locks = UT_LIST_GET_LEN(table->locks);
7421 
7422 	lock_mutex_exit();
7423 
7424 	return(n_table_locks);
7425 }
7426 
7427 #ifdef UNIV_DEBUG
7428 /*******************************************************************//**
7429 Do an exhaustive check for any locks (table or rec) against the table.
7430 @return lock if found */
7431 static
7432 const lock_t*
7433 lock_table_locks_lookup(
7434 /*====================*/
7435 	const dict_table_t*	table,		/*!< in: check if there are
7436 						any locks held on records in
7437 						this table or on the table
7438 						itself */
7439 	const trx_ut_list_t*	trx_list)	/*!< in: trx list to check */
7440 {
7441 	trx_t*			trx;
7442 
7443 	ut_a(table != NULL);
7444 	ut_ad(lock_mutex_own());
7445 	ut_ad(trx_sys_mutex_own());
7446 
7447 	for (trx = UT_LIST_GET_FIRST(*trx_list);
7448 	     trx != NULL;
7449 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
7450 
7451 		const lock_t*	lock;
7452 
7453 		check_trx_state(trx);
7454 
7455 		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
7456 		     lock != NULL;
7457 		     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
7458 
7459 			ut_a(lock->trx == trx);
7460 
7461 			if (lock_get_type_low(lock) == LOCK_REC) {
7462 				ut_ad(!dict_index_is_online_ddl(lock->index)
7463 				      || dict_index_is_clust(lock->index));
7464 				if (lock->index->table == table) {
7465 					return(lock);
7466 				}
7467 			} else if (lock->un_member.tab_lock.table == table) {
7468 				return(lock);
7469 			}
7470 		}
7471 	}
7472 
7473 	return(NULL);
7474 }
7475 #endif /* UNIV_DEBUG */
7476 
7477 /*******************************************************************//**
7478 Check if there are any locks (table or rec) against table.
7479 @return true if table has either table or record locks. */
7480 bool
7481 lock_table_has_locks(
7482 /*=================*/
7483 	const dict_table_t*	table)	/*!< in: check if there are any locks
7484 					held on records in this table or on the
7485 					table itself */
7486 {
7487 	ibool			has_locks;
7488 
7489 	lock_mutex_enter();
7490 
7491 	has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
7492 
7493 #ifdef UNIV_DEBUG
7494 	if (!has_locks) {
7495 		mutex_enter(&trx_sys->mutex);
7496 
7497 		ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
7498 
7499 		mutex_exit(&trx_sys->mutex);
7500 	}
7501 #endif /* UNIV_DEBUG */
7502 
7503 	lock_mutex_exit();
7504 
7505 	return(has_locks);
7506 }
7507 
7508 /*******************************************************************//**
7509 Initialise the table lock list. */
7510 void
7511 lock_table_lock_list_init(
7512 /*======================*/
7513 	table_lock_list_t*	lock_list)	/*!< List to initialise */
7514 {
7515 	UT_LIST_INIT(*lock_list, &lock_table_t::locks);
7516 }
7517 
7518 /*******************************************************************//**
7519 Initialise the trx lock list. */
7520 void
7521 lock_trx_lock_list_init(
7522 /*====================*/
7523 	trx_lock_list_t*	lock_list)	/*!< List to initialise */
7524 {
7525 	UT_LIST_INIT(*lock_list, &lock_t::trx_locks);
7526 }
7527 
7528 /*******************************************************************//**
7529 Set the lock system timeout event. */
7530 void
7531 lock_set_timeout_event()
7532 /*====================*/
7533 {
7534 	os_event_set(lock_sys->timeout_event);
7535 }
7536 
7537 #ifdef UNIV_DEBUG
7538 /*******************************************************************//**
7539 Check if the transaction holds any locks on the sys tables
7540 or its records.
7541 @return the strongest lock found on any sys table or 0 for none */
7542 const lock_t*
7543 lock_trx_has_sys_table_locks(
7544 /*=========================*/
7545 	const trx_t*	trx)	/*!< in: transaction to check */
7546 {
7547 	const lock_t*	strongest_lock = 0;
7548 	lock_mode	strongest = LOCK_NONE;
7549 
7550 	lock_mutex_enter();
7551 
7552 	typedef lock_pool_t::const_reverse_iterator iterator;
7553 
7554 	iterator	end = trx->lock.table_locks.rend();
7555 	iterator	it = trx->lock.table_locks.rbegin();
7556 
7557 	/* Find a valid mode. Note: ib_vector_size() can be 0. */
7558 
7559 	for (/* No op */; it != end; ++it) {
7560 		const lock_t*	lock = *it;
7561 
7562 		if (lock != NULL
7563 		    && dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
7564 
7565 			strongest = lock_get_mode(lock);
7566 			ut_ad(strongest != LOCK_NONE);
7567 			strongest_lock = lock;
7568 			break;
7569 		}
7570 	}
7571 
7572 	if (strongest == LOCK_NONE) {
7573 		lock_mutex_exit();
7574 		return(NULL);
7575 	}
7576 
7577 	for (/* No op */; it != end; ++it) {
7578 		const lock_t*	lock = *it;
7579 
7580 		if (lock == NULL) {
7581 			continue;
7582 		}
7583 
7584 		ut_ad(trx == lock->trx);
7585 		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
7586 		ut_ad(lock->un_member.tab_lock.table != NULL);
7587 
7588 		lock_mode	mode = lock_get_mode(lock);
7589 
7590 		if (dict_is_sys_table(lock->un_member.tab_lock.table->id)
7591 		    && lock_mode_stronger_or_eq(mode, strongest)) {
7592 
7593 			strongest = mode;
7594 			strongest_lock = lock;
7595 		}
7596 	}
7597 
7598 	lock_mutex_exit();
7599 
7600 	return(strongest_lock);
7601 }
7602 
7603 /*******************************************************************//**
7604 Check if the transaction holds an exclusive lock on a record.
7605 @return whether the locks are held */
7606 bool
7607 lock_trx_has_rec_x_lock(
7608 /*====================*/
7609 	const trx_t*		trx,	/*!< in: transaction to check */
7610 	const dict_table_t*	table,	/*!< in: table to check */
7611 	const buf_block_t*	block,	/*!< in: buffer block of the record */
7612 	ulint			heap_no)/*!< in: record heap number */
7613 {
7614 	ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
7615 
7616 	lock_mutex_enter();
7617 	ut_a(lock_table_has(trx, table, LOCK_IX)
7618 	     || dict_table_is_temporary(table));
7619 	ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
7620 			       block, heap_no, trx)
7621 	     || dict_table_is_temporary(table));
7622 	lock_mutex_exit();
7623 	return(true);
7624 }
7625 #endif /* UNIV_DEBUG */
7626 
7627 /** rewind(3) the file used for storing the latest detected deadlock and
7628 print a heading message to stderr if printing of all deadlocks to stderr
7629 is enabled. */
7630 void
7631 DeadlockChecker::start_print()
7632 {
7633 	ut_ad(lock_mutex_own());
7634 
7635 	rewind(lock_latest_err_file);
7636 	ut_print_timestamp(lock_latest_err_file);
7637 
7638 	if (srv_print_all_deadlocks) {
7639 		ib::info() << "Transactions deadlock detected, dumping"
7640 			<< " detailed information.";
7641 	}
7642 }
7643 
7644 /** Print a message to the deadlock file and possibly to stderr.
7645 @param msg message to print */
7646 void
7647 DeadlockChecker::print(const char* msg)
7648 {
7649 	fputs(msg, lock_latest_err_file);
7650 
7651 	if (srv_print_all_deadlocks) {
7652 		ib::info() << msg;
7653 	}
7654 }
7655 
7656 /** Print transaction data to the deadlock file and possibly to stderr.
7657 @param trx transaction
7658 @param max_query_len max query length to print */
7659 void
7660 DeadlockChecker::print(const trx_t* trx, ulint max_query_len)
7661 {
7662 	ut_ad(lock_mutex_own());
7663 
7664 	ulint	n_rec_locks = lock_number_of_rows_locked(&trx->lock);
7665 	ulint	n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
7666 	ulint	heap_size = mem_heap_get_size(trx->lock.lock_heap);
7667 
7668 	mutex_enter(&trx_sys->mutex);
7669 
7670 	trx_print_low(lock_latest_err_file, trx, max_query_len,
7671 		      n_rec_locks, n_trx_locks, heap_size);
7672 
7673 	if (srv_print_all_deadlocks) {
7674 		trx_print_low(stderr, trx, max_query_len,
7675 			      n_rec_locks, n_trx_locks, heap_size);
7676 	}
7677 
7678 	mutex_exit(&trx_sys->mutex);
7679 }
7680 
7681 /** Print lock data to the deadlock file and possibly to stderr.
7682 @param lock record or table type lock */
7683 void
7684 DeadlockChecker::print(const lock_t* lock)
7685 {
7686 	ut_ad(lock_mutex_own());
7687 
7688 	if (lock_get_type_low(lock) == LOCK_REC) {
7689 		lock_rec_print(lock_latest_err_file, lock);
7690 
7691 		if (srv_print_all_deadlocks) {
7692 			lock_rec_print(stderr, lock);
7693 		}
7694 	} else {
7695 		lock_table_print(lock_latest_err_file, lock);
7696 
7697 		if (srv_print_all_deadlocks) {
7698 			lock_table_print(stderr, lock);
7699 		}
7700 	}
7701 }
7702 
7703 /** Get the next lock in the queue that is owned by a transaction whose
7704 sub-tree has not already been searched.
7705 Note: "next" here means PREV for table locks.
7706 
7707 @param lock Lock in queue
7708 @param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
7709 
7710 @return next lock or NULL if at end of queue */
7711 const lock_t*
7712 DeadlockChecker::get_next_lock(const lock_t* lock, ulint heap_no) const
7713 {
7714 	ut_ad(lock_mutex_own());
7715 
7716 	do {
7717 		if (lock_get_type_low(lock) == LOCK_REC) {
7718 			ut_ad(heap_no != ULINT_UNDEFINED);
7719 			lock = lock_rec_get_next_const(heap_no, lock);
7720 		} else {
7721 			ut_ad(heap_no == ULINT_UNDEFINED);
7722 			ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
7723 
7724 			lock = UT_LIST_GET_NEXT(
7725 				un_member.tab_lock.locks, lock);
7726 		}
7727 
7728 	} while (lock != NULL && is_visited(lock));
7729 
7730 	ut_ad(lock == NULL
7731 	      || lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
7732 
7733 	return(lock);
7734 }
7735 
7736 /** Get the first lock to search. The search starts from the current
7737 wait_lock. What we are really interested in is an edge from the
7738 current wait_lock's owning transaction to another transaction that has
7739 a lock ahead in the queue. We skip locks where the owning transaction's
7740 sub-tree has already been searched.
7741 
7742 Note: The record locks are traversed from the oldest lock to the
7743 latest. For table locks we go from latest to oldest.
7744 
7745 For record locks, we first position the "iterator" on the first lock on
7746 the page and then reposition on the actual heap_no. This is required
7747 due to the way the record lock has is implemented.
7748 
7749 @param[out] heap_no if rec lock, else ULINT_UNDEFINED.
7750 @return first lock or NULL */
7751 const lock_t*
7752 DeadlockChecker::get_first_lock(ulint* heap_no) const
7753 {
7754 	ut_ad(lock_mutex_own());
7755 
7756 	const lock_t*	lock = m_wait_lock;
7757 
7758 	if (lock_get_type_low(lock) == LOCK_REC) {
7759 		hash_table_t*	lock_hash;
7760 
7761 		lock_hash = lock->type_mode & LOCK_PREDICATE
7762 			? lock_sys->prdt_hash
7763 			: lock_sys->rec_hash;
7764 
7765 		/* We are only interested in records that match the heap_no. */
7766 		*heap_no = lock_rec_find_set_bit(lock);
7767 
7768 		ut_ad(*heap_no <= 0xffff);
7769 		ut_ad(*heap_no != ULINT_UNDEFINED);
7770 
7771 		/* Find the locks on the page. */
7772 		lock = lock_rec_get_first_on_page_addr(
7773 			lock_hash,
7774 			lock->un_member.rec_lock.space,
7775 			lock->un_member.rec_lock.page_no);
7776 
7777 		/* Position on the first lock on the physical record.*/
7778 		if (!lock_rec_get_nth_bit(lock, *heap_no)) {
7779 			lock = lock_rec_get_next_const(*heap_no, lock);
7780 		}
7781 
7782 		ut_a(!lock_get_wait(lock));
7783 	} else {
7784 		/* Table locks don't care about the heap_no. */
7785 		*heap_no = ULINT_UNDEFINED;
7786 		ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
7787 		dict_table_t*	table = lock->un_member.tab_lock.table;
7788 		lock = UT_LIST_GET_FIRST(table->locks);
7789 	}
7790 
7791 	/* Must find at least two locks, otherwise there cannot be a
7792 	waiting lock, secondly the first lock cannot be the wait_lock. */
7793 	ut_a(lock != NULL);
7794 	ut_a(lock != m_wait_lock);
7795 
7796 	/* Check that the lock type doesn't change. */
7797 	ut_ad(lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
7798 
7799 	return(lock);
7800 }
7801 
7802 /** Notify that a deadlock has been detected and print the conflicting
7803 transaction info.
7804 @param lock lock causing deadlock */
7805 void
7806 DeadlockChecker::notify(const lock_t* lock) const
7807 {
7808 	ut_ad(lock_mutex_own());
7809 
7810 	start_print();
7811 
7812 	print("\n*** (1) TRANSACTION:\n");
7813 
7814 	print(m_wait_lock->trx, 3000);
7815 
7816 	print("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n");
7817 
7818 	print(m_wait_lock);
7819 
7820 	print("*** (2) TRANSACTION:\n");
7821 
7822 	print(lock->trx, 3000);
7823 
7824 	print("*** (2) HOLDS THE LOCK(S):\n");
7825 
7826 	print(lock);
7827 
7828 	/* It is possible that the joining transaction was granted its
7829 	lock when we rolled back some other waiting transaction. */
7830 
7831 	if (m_start->lock.wait_lock != 0) {
7832 		print("*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
7833 
7834 		print(m_start->lock.wait_lock);
7835 	}
7836 
7837 	DBUG_PRINT("ib_lock", ("deadlock detected"));
7838 }
7839 
7840 /** Select the victim transaction that should be rolledback.
7841 @return victim transaction */
7842 const trx_t*
7843 DeadlockChecker::select_victim() const
7844 {
7845 	ut_ad(lock_mutex_own());
7846 	ut_ad(m_start->lock.wait_lock != 0);
7847 	ut_ad(m_wait_lock->trx != m_start);
7848 
7849 	if (thd_trx_priority(m_start->mysql_thd) > 0
7850 	    || thd_trx_priority(m_wait_lock->trx->mysql_thd) > 0) {
7851 
7852 		const trx_t*	victim;
7853 
7854 		victim = trx_arbitrate(m_start, m_wait_lock->trx);
7855 
7856 		if (victim != NULL) {
7857 
7858 			return(victim);
7859 		}
7860 	}
7861 
7862 	if (trx_weight_ge(m_wait_lock->trx, m_start)) {
7863 
7864 		/* The joining transaction is 'smaller',
7865 		choose it as the victim and roll it back. */
7866 #ifdef WITH_WSREP
7867 	  if (wsrep_thd_is_BF(m_start->mysql_thd, TRUE))
7868 		return(m_wait_lock->trx);
7869 	else
7870 #endif /* WITH_WSREP */
7871 
7872 		return(m_start);
7873 	}
7874 
7875 #ifdef WITH_WSREP
7876 	if (wsrep_thd_is_BF(m_wait_lock->trx->mysql_thd, TRUE))
7877 		return(m_start);
7878 	else
7879 #endif /* WITH_WSREP */
7880 	return(m_wait_lock->trx);
7881 }
7882 
7883 /** Looks iteratively for a deadlock. Note: the joining transaction may
7884 have been granted its lock by the deadlock checks.
7885 @return 0 if no deadlock else the victim transaction instance.*/
7886 const trx_t*
7887 DeadlockChecker::search()
7888 {
7889 	ut_ad(lock_mutex_own());
7890 	ut_ad(!trx_mutex_own(m_start));
7891 
7892 	ut_ad(m_start != NULL);
7893 	ut_ad(m_wait_lock != NULL);
7894 	check_trx_state(m_wait_lock->trx);
7895 	ut_ad(m_mark_start <= s_lock_mark_counter);
7896 
7897 	/* Look at the locks ahead of wait_lock in the lock queue. */
7898 	ulint		heap_no;
7899 	const lock_t*	lock = get_first_lock(&heap_no);
7900 
7901 	for (;;) {
7902 
7903 		/* We should never visit the same sub-tree more than once. */
7904 		ut_ad(lock == NULL || !is_visited(lock));
7905 
7906 		while (m_n_elems > 0 && lock == NULL) {
7907 
7908 			/* Restore previous search state. */
7909 
7910 			pop(lock, heap_no);
7911 
7912 			lock = get_next_lock(lock, heap_no);
7913 		}
7914 
7915 		if (lock == NULL) {
7916 			break;
7917 		} else if (lock == m_wait_lock) {
7918 
7919 			/* We can mark this subtree as searched */
7920 			ut_ad(lock->trx->lock.deadlock_mark <= m_mark_start);
7921 
7922 			lock->trx->lock.deadlock_mark = ++s_lock_mark_counter;
7923 
7924 			/* We are not prepared for an overflow. This 64-bit
7925 			counter should never wrap around. At 10^9 increments
7926 			per second, it would take 10^3 years of uptime. */
7927 
7928 			ut_ad(s_lock_mark_counter > 0);
7929 
7930 			/* Backtrack */
7931 			lock = NULL;
7932 
7933 		} else if (!lock_has_to_wait(m_wait_lock, lock)) {
7934 
7935 			/* No conflict, next lock */
7936 			lock = get_next_lock(lock, heap_no);
7937 
7938 		} else if (lock->trx == m_start) {
7939 
7940 			/* Found a cycle. */
7941 
7942 			notify(lock);
7943 
7944 			return(select_victim());
7945 
7946 		} else if (is_too_deep()) {
7947 
7948 			/* Search too deep to continue. */
7949 			m_too_deep = true;
7950 			return(m_start);
7951 
7952 		} else if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
7953 
7954 			/* Another trx ahead has requested a lock in an
7955 			incompatible mode, and is itself waiting for a lock. */
7956 
7957 			++m_cost;
7958 
7959 			if (!push(lock, heap_no)) {
7960 				m_too_deep = true;
7961 				return(m_start);
7962 			}
7963 
7964 
7965 			m_wait_lock = lock->trx->lock.wait_lock;
7966 
7967 			lock = get_first_lock(&heap_no);
7968 
7969 			if (is_visited(lock)) {
7970 				lock = get_next_lock(lock, heap_no);
7971 			}
7972 
7973 		} else {
7974 			lock = get_next_lock(lock, heap_no);
7975 		}
7976 	}
7977 
7978 	ut_a(lock == NULL && m_n_elems == 0);
7979 
7980 	/* No deadlock found. */
7981 	return(0);
7982 }
7983 
7984 /** Print info about transaction that was rolled back.
7985 @param trx transaction rolled back
7986 @param lock lock trx wants */
7987 void
7988 DeadlockChecker::rollback_print(const trx_t*	trx, const lock_t* lock)
7989 {
7990 	ut_ad(lock_mutex_own());
7991 
7992 	/* If the lock search exceeds the max step
7993 	or the max depth, the current trx will be
7994 	the victim. Print its information. */
7995 	start_print();
7996 
7997 	print("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
7998 	      " WAITS-FOR GRAPH, WE WILL ROLL BACK"
7999 	      " FOLLOWING TRANSACTION \n\n"
8000 	      "*** TRANSACTION:\n");
8001 
8002 	print(trx, 3000);
8003 
8004 	print("*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
8005 
8006 	print(lock);
8007 }
8008 
8009 /** Rollback transaction selected as the victim. */
8010 void
8011 DeadlockChecker::trx_rollback()
8012 {
8013 	ut_ad(lock_mutex_own());
8014 
8015 	trx_t*	trx = m_wait_lock->trx;
8016 
8017 	print("*** WE ROLL BACK TRANSACTION (1)\n");
8018 
8019 	trx_mutex_enter(trx);
8020 
8021 	trx->lock.was_chosen_as_deadlock_victim = true;
8022 
8023 	lock_cancel_waiting_and_release(trx->lock.wait_lock);
8024 
8025 	trx_mutex_exit(trx);
8026 }
8027 
8028 /** Checks if a joining lock request results in a deadlock. If a deadlock is
8029 found this function will resolve the deadlock by choosing a victim transaction
8030 and rolling it back. It will attempt to resolve all deadlocks. The returned
8031 transaction id will be the joining transaction instance or NULL if some other
8032 transaction was chosen as a victim and rolled back or no deadlock found.
8033 
8034 @param[in]	lock lock the transaction is requesting
8035 @param[in,out]	trx transaction requesting the lock
8036 
8037 @return transaction instanace chosen as victim or 0 */
8038 const trx_t*
8039 DeadlockChecker::check_and_resolve(const lock_t* lock, trx_t* trx)
8040 {
8041 	ut_ad(lock_mutex_own());
8042 	ut_ad(trx_mutex_own(trx));
8043 	check_trx_state(trx);
8044 	ut_ad(!srv_read_only_mode);
8045 
8046 	/* If transaction is marked for ASYNC rollback then we should
8047 	not allow it to wait for another lock causing possible deadlock.
8048 	We return current transaction as deadlock victim here. */
8049 	if (trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC) {
8050 		return(trx);
8051 	} else if (!innobase_deadlock_detect) {
8052 		return(NULL);
8053 	}
8054 
8055 	/*  Release the mutex to obey the latching order.
8056 	This is safe, because DeadlockChecker::check_and_resolve()
8057 	is invoked when a lock wait is enqueued for the currently
8058 	running transaction. Because m_trx is a running transaction
8059 	(it is not currently suspended because of a lock wait),
8060 	its state can only be changed by this thread, which is
8061 	currently associated with the transaction. */
8062 
8063 	trx_mutex_exit(trx);
8064 
8065 	const trx_t*	victim_trx;
8066 
8067 	/* Try and resolve as many deadlocks as possible. */
8068 	do {
8069 		DeadlockChecker	checker(trx, lock, s_lock_mark_counter);
8070 
8071 		victim_trx = checker.search();
8072 
8073 		/* Search too deep, we rollback the joining transaction only
8074 		if it is possible to rollback. Otherwise we rollback the
8075 		transaction that is holding the lock that the joining
8076 		transaction wants. */
8077 		if (checker.is_too_deep()) {
8078 
8079 			ut_ad(trx == checker.m_start);
8080 			ut_ad(trx == victim_trx);
8081 
8082 			rollback_print(victim_trx, lock);
8083 
8084 			MONITOR_INC(MONITOR_DEADLOCK);
8085 
8086 			break;
8087 
8088 		} else if (victim_trx != NULL && victim_trx != trx) {
8089 
8090 			ut_ad(victim_trx == checker.m_wait_lock->trx);
8091 
8092 			checker.trx_rollback();
8093 
8094 			lock_deadlock_found = true;
8095 
8096 			MONITOR_INC(MONITOR_DEADLOCK);
8097 		}
8098 
8099 	} while (victim_trx != NULL && victim_trx != trx);
8100 
8101 	/* If the joining transaction was selected as the victim. */
8102 	if (victim_trx != NULL) {
8103 
8104 		print("*** WE ROLL BACK TRANSACTION (2)\n");
8105 
8106 		lock_deadlock_found = true;
8107 	}
8108 
8109 	trx_mutex_enter(trx);
8110 
8111 	return(victim_trx);
8112 }
8113 
8114 /**
8115 Allocate cached locks for the transaction.
8116 @param trx		allocate cached record locks for this transaction */
8117 void
8118 lock_trx_alloc_locks(trx_t* trx)
8119 {
8120 	ulint	sz = REC_LOCK_SIZE * REC_LOCK_CACHE;
8121 	byte*	ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
8122 
8123 	/* We allocate one big chunk and then distribute it among
8124 	the rest of the elements. The allocated chunk pointer is always
8125 	at index 0. */
8126 
8127 	for (ulint i = 0; i < REC_LOCK_CACHE; ++i, ptr += REC_LOCK_SIZE) {
8128 		trx->lock.rec_pool.push_back(
8129 			reinterpret_cast<ib_lock_t*>(ptr));
8130 	}
8131 
8132 	sz = TABLE_LOCK_SIZE * TABLE_LOCK_CACHE;
8133 	ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
8134 
8135 	for (ulint i = 0; i < TABLE_LOCK_CACHE; ++i, ptr += TABLE_LOCK_SIZE) {
8136 		trx->lock.table_pool.push_back(
8137 			reinterpret_cast<ib_lock_t*>(ptr));
8138 	}
8139 
8140 }
8141