1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file lock/lock0lock.cc
29 The transaction lock system
30 
31 Created 5/7/1996 Heikki Tuuri
32 *******************************************************/
33 
34 #define LOCK_MODULE_IMPLEMENTATION
35 
36 #include <mysql/service_thd_engine_lock.h>
37 #include "ha_prototypes.h"
38 
39 #include "lock0lock.h"
40 #include "lock0priv.h"
41 
42 #ifdef UNIV_NONINL
43 #include "lock0lock.ic"
44 #include "lock0priv.ic"
45 #endif
46 
47 #include "dict0mem.h"
48 #include "usr0sess.h"
49 #include "trx0purge.h"
50 #include "trx0sys.h"
51 #include "srv0mon.h"
52 #include "ut0vec.h"
53 #include "btr0btr.h"
54 #include "dict0boot.h"
55 #include "ut0new.h"
56 #include "row0sel.h"
57 #include "row0mysql.h"
58 #include "pars0pars.h"
59 
60 #include <set>
61 
62 /* Flag to enable/disable deadlock detector. */
63 my_bool	innobase_deadlock_detect = TRUE;
64 
65 /** Total number of cached record locks */
66 static const ulint	REC_LOCK_CACHE = 8;
67 
68 /** Maximum record lock size in bytes */
69 static const ulint	REC_LOCK_SIZE = sizeof(ib_lock_t) + 256;
70 
71 /** Total number of cached table locks */
72 static const ulint	TABLE_LOCK_CACHE = 8;
73 
74 /** Size in bytes, of the table lock instance */
75 static const ulint	TABLE_LOCK_SIZE = sizeof(ib_lock_t);
76 
77 /** Deadlock checker. */
78 class DeadlockChecker {
79 public:
80 	/** Checks if a joining lock request results in a deadlock. If
81 	a deadlock is found this function will resolve the deadlock
82 	by choosing a victim transaction and rolling it back. It
83 	will attempt to resolve all deadlocks. The returned transaction
84 	id will be the joining transaction id or 0 if some other
85 	transaction was chosen as a victim and rolled back or no
86 	deadlock found.
87 
88 	@param lock lock the transaction is requesting
89 	@param trx transaction requesting the lock
90 
91 	@return id of transaction chosen as victim or 0 */
92 	static const trx_t* check_and_resolve(
93 		const lock_t*	lock,
94 		trx_t*		trx);
95 
96 private:
97 	/** Do a shallow copy. Default destructor OK.
98 	@param trx the start transaction (start node)
99 	@param wait_lock lock that a transaction wants
100 	@param mark_start visited node counter */
DeadlockChecker(const trx_t * trx,const lock_t * wait_lock,ib_uint64_t mark_start)101 	DeadlockChecker(
102 		const trx_t*	trx,
103 		const lock_t*	wait_lock,
104 		ib_uint64_t	mark_start)
105 		:
106 		m_cost(),
107 		m_start(trx),
108 		m_too_deep(),
109 		m_wait_lock(wait_lock),
110 		m_mark_start(mark_start),
111 		m_n_elems()
112 	{
113 	}
114 
115 	/** Check if the search is too deep. */
is_too_deep() const116 	bool is_too_deep() const
117 	{
118 		return(m_n_elems > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
119 		       || m_cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK);
120 	}
121 
122 	/** Save current state.
123 	@param lock lock to push on the stack.
124 	@param heap_no the heap number to push on the stack.
125 	@return false if stack is full. */
push(const lock_t * lock,ulint heap_no)126 	bool push(const lock_t*	lock, ulint heap_no)
127 	{
128 		ut_ad((lock_get_type_low(lock) & LOCK_REC)
129 		      || (lock_get_type_low(lock) & LOCK_TABLE));
130 
131 		ut_ad(((lock_get_type_low(lock) & LOCK_TABLE) != 0)
132 		      == (heap_no == ULINT_UNDEFINED));
133 
134 		/* Ensure that the stack is bounded. */
135 		if (m_n_elems >= UT_ARR_SIZE(s_states)) {
136 			return(false);
137 		}
138 
139 		state_t&	state = s_states[m_n_elems++];
140 
141 		state.m_lock = lock;
142 		state.m_wait_lock = m_wait_lock;
143 		state.m_heap_no =heap_no;
144 
145 		return(true);
146 	}
147 
148 	/** Restore state.
149 	@param[out] lock current lock
150 	@param[out] heap_no current heap_no */
pop(const lock_t * & lock,ulint & heap_no)151 	void pop(const lock_t*& lock, ulint& heap_no)
152 	{
153 		ut_a(m_n_elems > 0);
154 
155 		const state_t&	state = s_states[--m_n_elems];
156 
157 		lock = state.m_lock;
158 		heap_no = state.m_heap_no;
159 		m_wait_lock = state.m_wait_lock;
160 	}
161 
162 	/** Check whether the node has been visited.
163 	@param lock lock to check
164 	@return true if the node has been visited */
is_visited(const lock_t * lock) const165 	bool is_visited(const lock_t* lock) const
166 	{
167 		return(lock->trx->lock.deadlock_mark > m_mark_start);
168 	}
169 
170 	/** Get the next lock in the queue that is owned by a transaction
171 	whose sub-tree has not already been searched.
172 	Note: "next" here means PREV for table locks.
173 	@param lock Lock in queue
174 	@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
175 	@return next lock or NULL if at end of queue */
176 	const lock_t* get_next_lock(const lock_t* lock, ulint heap_no) const;
177 
178 	/** Get the first lock to search. The search starts from the current
179 	wait_lock. What we are really interested in is an edge from the
180 	current wait_lock's owning transaction to another transaction that has
181 	a lock ahead in the queue. We skip locks where the owning transaction's
182 	sub-tree has already been searched.
183 
184 	Note: The record locks are traversed from the oldest lock to the
185 	latest. For table locks we go from latest to oldest.
186 
187 	For record locks, we first position the iterator on first lock on
188 	the page and then reposition on the actual heap_no. This is required
189 	due to the way the record lock has is implemented.
190 
191 	@param[out] heap_no if rec lock, else ULINT_UNDEFINED.
192 
193 	@return first lock or NULL */
194 	const lock_t* get_first_lock(ulint* heap_no) const;
195 
196 	/** Notify that a deadlock has been detected and print the conflicting
197 	transaction info.
198 	@param lock lock causing deadlock */
199 	void notify(const lock_t* lock) const;
200 
201 	/** Select the victim transaction that should be rolledback.
202 	@return victim transaction */
203 	const trx_t* select_victim() const;
204 
205 	/** Rollback transaction selected as the victim. */
206 	void trx_rollback();
207 
208 	/** Looks iteratively for a deadlock. Note: the joining transaction
209 	may have been granted its lock by the deadlock checks.
210 
211 	@return 0 if no deadlock else the victim transaction.*/
212 	const trx_t* search();
213 
214 	/** Print transaction data to the deadlock file and possibly to stderr.
215 	@param trx transaction
216 	@param max_query_len max query length to print */
217 	static void print(const trx_t* trx, ulint max_query_len);
218 
219 	/** rewind(3) the file used for storing the latest detected deadlock
220 	and print a heading message to stderr if printing of all deadlocks to
221 	stderr is enabled. */
222 	static void start_print();
223 
224 	/** Print lock data to the deadlock file and possibly to stderr.
225 	@param lock record or table type lock */
226 	static void print(const lock_t* lock);
227 
228 	/** Print a message to the deadlock file and possibly to stderr.
229 	@param msg message to print */
230 	static void print(const char* msg);
231 
232 	/** Print info about transaction that was rolled back.
233 	@param trx transaction rolled back
234 	@param lock lock trx wants */
235 	static void rollback_print(const trx_t* trx, const lock_t* lock);
236 
237 private:
238 	/** DFS state information, used during deadlock checking. */
239 	struct state_t {
240 		const lock_t*	m_lock;		/*!< Current lock */
241 		const lock_t*	m_wait_lock;	/*!< Waiting for lock */
242 		ulint		m_heap_no;	/*!< heap number if rec lock */
243 	};
244 
245 	/** Used in deadlock tracking. Protected by lock_sys->mutex. */
246 	static ib_uint64_t	s_lock_mark_counter;
247 
248 	/** Calculation steps thus far. It is the count of the nodes visited. */
249 	ulint			m_cost;
250 
251 	/** Joining transaction that is requesting a lock in an
252 	incompatible mode */
253 	const trx_t*		m_start;
254 
255 	/** TRUE if search was too deep and was aborted */
256 	bool			m_too_deep;
257 
258 	/** Lock that trx wants */
259 	const lock_t*		m_wait_lock;
260 
261 	/**  Value of lock_mark_count at the start of the deadlock check. */
262 	ib_uint64_t		m_mark_start;
263 
264 	/** Number of states pushed onto the stack */
265 	size_t			m_n_elems;
266 
267 	/** This is to avoid malloc/free calls. */
268 	static state_t		s_states[MAX_STACK_SIZE];
269 };
270 
271 /** Counter to mark visited nodes during deadlock search. */
272 ib_uint64_t	DeadlockChecker::s_lock_mark_counter = 0;
273 
274 /** The stack used for deadlock searches. */
275 DeadlockChecker::state_t	DeadlockChecker::s_states[MAX_STACK_SIZE];
276 
277 #ifdef UNIV_DEBUG
278 /*********************************************************************//**
279 Validates the lock system.
280 @return TRUE if ok */
281 static
282 bool
283 lock_validate();
284 /*============*/
285 
286 /*********************************************************************//**
287 Validates the record lock queues on a page.
288 @return TRUE if ok */
289 static
290 ibool
291 lock_rec_validate_page(
292 /*===================*/
293 	const buf_block_t*	block)	/*!< in: buffer block */
294 	MY_ATTRIBUTE((warn_unused_result));
295 #endif /* UNIV_DEBUG */
296 
297 /* The lock system */
298 lock_sys_t*	lock_sys	= NULL;
299 
300 /** We store info on the latest deadlock error to this buffer. InnoDB
301 Monitor will then fetch it and print */
302 bool	lock_deadlock_found = false;
303 
304 /** Only created if !srv_read_only_mode */
305 static FILE*		lock_latest_err_file;
306 
307 /*********************************************************************//**
308 Reports that a transaction id is insensible, i.e., in the future. */
309 void
lock_report_trx_id_insanity(trx_id_t trx_id,const rec_t * rec,dict_index_t * index,const ulint * offsets,trx_id_t max_trx_id)310 lock_report_trx_id_insanity(
311 /*========================*/
312 	trx_id_t	trx_id,		/*!< in: trx id */
313 	const rec_t*	rec,		/*!< in: user record */
314 	dict_index_t*	index,		/*!< in: index */
315 	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
316 	trx_id_t	max_trx_id)	/*!< in: trx_sys_get_max_trx_id() */
317 {
318 	ib::error()
319 		<< "Transaction id " << trx_id
320 		<< " associated with record" << rec_offsets_print(rec, offsets)
321 		<< " in index " << index->name
322 		<< " of table " << index->table->name
323 		<< " is greater than the global counter " << max_trx_id
324 		<< "! The table is corrupted.";
325 }
326 
327 /*********************************************************************//**
328 Checks that a transaction id is sensible, i.e., not in the future.
329 @return true if ok */
330 #ifdef UNIV_DEBUG
331 
332 #else
333 static MY_ATTRIBUTE((warn_unused_result))
334 #endif
335 bool
lock_check_trx_id_sanity(trx_id_t trx_id,const rec_t * rec,dict_index_t * index,const ulint * offsets)336 lock_check_trx_id_sanity(
337 /*=====================*/
338 	trx_id_t	trx_id,		/*!< in: trx id */
339 	const rec_t*	rec,		/*!< in: user record */
340 	dict_index_t*	index,		/*!< in: index */
341 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
342 {
343 	ut_ad(rec_offs_validate(rec, index, offsets));
344 
345 	trx_id_t	max_trx_id = trx_sys_get_max_trx_id();
346 	bool		is_ok = trx_id < max_trx_id;
347 
348 	if (!is_ok) {
349 		lock_report_trx_id_insanity(
350 			trx_id, rec, index, offsets, max_trx_id);
351 	}
352 
353 	return(is_ok);
354 }
355 
356 /*********************************************************************//**
357 Checks that a record is seen in a consistent read.
358 @return true if sees, or false if an earlier version of the record
359 should be retrieved */
360 bool
lock_clust_rec_cons_read_sees(const rec_t * rec,dict_index_t * index,const ulint * offsets,ReadView * view)361 lock_clust_rec_cons_read_sees(
362 /*==========================*/
363 	const rec_t*	rec,	/*!< in: user record which should be read or
364 				passed over by a read cursor */
365 	dict_index_t*	index,	/*!< in: clustered index */
366 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
367 	ReadView*	view)	/*!< in: consistent read view */
368 {
369 	ut_ad(dict_index_is_clust(index));
370 	ut_ad(page_rec_is_user_rec(rec));
371 	ut_ad(rec_offs_validate(rec, index, offsets));
372 
373 	/* Temp-tables are not shared across connections and multiple
374 	transactions from different connections cannot simultaneously
375 	operate on same temp-table and so read of temp-table is
376 	always consistent read. */
377 	if (srv_read_only_mode || dict_table_is_temporary(index->table)) {
378 		ut_ad(view == 0 || dict_table_is_temporary(index->table));
379 		return(true);
380 	}
381 
382 	/* NOTE that we call this function while holding the search
383 	system latch. */
384 
385 	trx_id_t	trx_id = row_get_rec_trx_id(rec, index, offsets);
386 
387 	return(view->changes_visible(trx_id, index->table->name));
388 }
389 
390 /*********************************************************************//**
391 Checks that a non-clustered index record is seen in a consistent read.
392 
393 NOTE that a non-clustered index page contains so little information on
394 its modifications that also in the case false, the present version of
395 rec may be the right, but we must check this from the clustered index
396 record.
397 
398 @return true if certainly sees, or false if an earlier version of the
399 clustered index record might be needed */
400 bool
lock_sec_rec_cons_read_sees(const rec_t * rec,const dict_index_t * index,const ReadView * view)401 lock_sec_rec_cons_read_sees(
402 /*========================*/
403 	const rec_t*		rec,	/*!< in: user record which
404 					should be read or passed over
405 					by a read cursor */
406 	const dict_index_t*	index,	/*!< in: index */
407 	const ReadView*	view)	/*!< in: consistent read view */
408 {
409 	ut_ad(page_rec_is_user_rec(rec));
410 
411 	/* NOTE that we might call this function while holding the search
412 	system latch. */
413 
414 	if (recv_recovery_is_on()) {
415 
416 		return(false);
417 
418 	} else if (dict_table_is_temporary(index->table)) {
419 
420 		/* Temp-tables are not shared across connections and multiple
421 		transactions from different connections cannot simultaneously
422 		operate on same temp-table and so read of temp-table is
423 		always consistent read. */
424 
425 		return(true);
426 	}
427 
428 	trx_id_t	max_trx_id = page_get_max_trx_id(page_align(rec));
429 
430 	ut_ad(max_trx_id > 0);
431 
432 	return(view->sees(max_trx_id));
433 }
434 
435 /*********************************************************************//**
436 Creates the lock system at database start. */
437 void
lock_sys_create(ulint n_cells)438 lock_sys_create(
439 /*============*/
440 	ulint	n_cells)	/*!< in: number of slots in lock hash table */
441 {
442 	ulint	lock_sys_sz;
443 
444 	lock_sys_sz = sizeof(*lock_sys) + OS_THREAD_MAX_N * sizeof(srv_slot_t);
445 
446 	lock_sys = static_cast<lock_sys_t*>(ut_zalloc_nokey(lock_sys_sz));
447 
448 	void*	ptr = &lock_sys[1];
449 
450 	lock_sys->waiting_threads = static_cast<srv_slot_t*>(ptr);
451 
452 	lock_sys->last_slot = lock_sys->waiting_threads;
453 
454 	mutex_create(LATCH_ID_LOCK_SYS, &lock_sys->mutex);
455 
456 	mutex_create(LATCH_ID_LOCK_SYS_WAIT, &lock_sys->wait_mutex);
457 
458 	lock_sys->timeout_event = os_event_create(0);
459 
460 	lock_sys->rec_hash = hash_create(n_cells);
461 	lock_sys->prdt_hash = hash_create(n_cells);
462 	lock_sys->prdt_page_hash = hash_create(n_cells);
463 
464 	if (!srv_read_only_mode) {
465 		lock_latest_err_file = os_file_create_tmpfile(NULL);
466 		ut_a(lock_latest_err_file);
467 	}
468 }
469 
470 /** Calculates the fold value of a lock: used in migrating the hash table.
471 @param[in]	lock	record lock object
472 @return	folded value */
473 static
474 ulint
lock_rec_lock_fold(const lock_t * lock)475 lock_rec_lock_fold(
476 	const lock_t*	lock)
477 {
478 	return(lock_rec_fold(lock->un_member.rec_lock.space,
479 			     lock->un_member.rec_lock.page_no));
480 }
481 
482 /** Resize the lock hash tables.
483 @param[in]	n_cells	number of slots in lock hash table */
484 void
lock_sys_resize(ulint n_cells)485 lock_sys_resize(
486 	ulint	n_cells)
487 {
488 	hash_table_t*	old_hash;
489 
490 	lock_mutex_enter();
491 
492 	old_hash = lock_sys->rec_hash;
493 	lock_sys->rec_hash = hash_create(n_cells);
494 	HASH_MIGRATE(old_hash, lock_sys->rec_hash, lock_t, hash,
495 		     lock_rec_lock_fold);
496 	hash_table_free(old_hash);
497 
498 	old_hash = lock_sys->prdt_hash;
499 	lock_sys->prdt_hash = hash_create(n_cells);
500 	HASH_MIGRATE(old_hash, lock_sys->prdt_hash, lock_t, hash,
501 		     lock_rec_lock_fold);
502 	hash_table_free(old_hash);
503 
504 	old_hash = lock_sys->prdt_page_hash;
505 	lock_sys->prdt_page_hash = hash_create(n_cells);
506 	HASH_MIGRATE(old_hash, lock_sys->prdt_page_hash, lock_t, hash,
507 		     lock_rec_lock_fold);
508 	hash_table_free(old_hash);
509 
510 	/* need to update block->lock_hash_val */
511 	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
512 		buf_pool_t*	buf_pool = buf_pool_from_array(i);
513 
514 		buf_pool_mutex_enter(buf_pool);
515 		buf_page_t*	bpage;
516 		bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
517 
518 		while (bpage != NULL) {
519 			if (buf_page_get_state(bpage)
520 			    == BUF_BLOCK_FILE_PAGE) {
521 				buf_block_t*	block;
522 				block = reinterpret_cast<buf_block_t*>(
523 					bpage);
524 
525 				block->lock_hash_val
526 					= lock_rec_hash(
527 						bpage->id.space(),
528 						bpage->id.page_no());
529 			}
530 			bpage = UT_LIST_GET_NEXT(LRU, bpage);
531 		}
532 		buf_pool_mutex_exit(buf_pool);
533 	}
534 
535 	lock_mutex_exit();
536 }
537 
538 /*********************************************************************//**
539 Closes the lock system at database shutdown. */
540 void
lock_sys_close(void)541 lock_sys_close(void)
542 /*================*/
543 {
544 	if (lock_latest_err_file != NULL) {
545 		fclose(lock_latest_err_file);
546 		lock_latest_err_file = NULL;
547 	}
548 
549 	hash_table_free(lock_sys->rec_hash);
550 	hash_table_free(lock_sys->prdt_hash);
551 	hash_table_free(lock_sys->prdt_page_hash);
552 
553 	os_event_destroy(lock_sys->timeout_event);
554 
555 	mutex_destroy(&lock_sys->mutex);
556 	mutex_destroy(&lock_sys->wait_mutex);
557 
558 	srv_slot_t*	slot = lock_sys->waiting_threads;
559 
560 	for (ulint i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
561 		if (slot->event != NULL) {
562 			os_event_destroy(slot->event);
563 		}
564 	}
565 
566 	ut_free(lock_sys);
567 
568 	lock_sys = NULL;
569 }
570 
571 /*********************************************************************//**
572 Gets the size of a lock struct.
573 @return size in bytes */
574 ulint
lock_get_size(void)575 lock_get_size(void)
576 /*===============*/
577 {
578 	return((ulint) sizeof(lock_t));
579 }
580 
581 /*********************************************************************//**
582 Gets the source table of an ALTER TABLE transaction.  The table must be
583 covered by an IX or IS table lock.
584 @return the source table of transaction, if it is covered by an IX or
585 IS table lock; dest if there is no source table, and NULL if the
586 transaction is locking more than two tables or an inconsistency is
587 found */
588 dict_table_t*
lock_get_src_table(trx_t * trx,dict_table_t * dest,lock_mode * mode)589 lock_get_src_table(
590 /*===============*/
591 	trx_t*		trx,	/*!< in: transaction */
592 	dict_table_t*	dest,	/*!< in: destination of ALTER TABLE */
593 	lock_mode*	mode)	/*!< out: lock mode of the source table */
594 {
595 	dict_table_t*	src;
596 	lock_t*		lock;
597 
598 	ut_ad(!lock_mutex_own());
599 
600 	src = NULL;
601 	*mode = LOCK_NONE;
602 
603 	/* The trx mutex protects the trx_locks for our purposes.
604 	Other transactions could want to convert one of our implicit
605 	record locks to an explicit one. For that, they would need our
606 	trx mutex. Waiting locks can be removed while only holding
607 	lock_sys->mutex, but this is a running transaction and cannot
608 	thus be holding any waiting locks. */
609 	trx_mutex_enter(trx);
610 
611 	for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
612 	     lock != NULL;
613 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
614 		lock_table_t*	tab_lock;
615 		lock_mode	lock_mode;
616 		if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
617 			/* We are only interested in table locks. */
618 			continue;
619 		}
620 		tab_lock = &lock->un_member.tab_lock;
621 		if (dest == tab_lock->table) {
622 			/* We are not interested in the destination table. */
623 			continue;
624 		} else if (!src) {
625 			/* This presumably is the source table. */
626 			src = tab_lock->table;
627 			if (UT_LIST_GET_LEN(src->locks) != 1
628 			    || UT_LIST_GET_FIRST(src->locks) != lock) {
629 				/* We only support the case when
630 				there is only one lock on this table. */
631 				src = NULL;
632 				goto func_exit;
633 			}
634 		} else if (src != tab_lock->table) {
635 			/* The transaction is locking more than
636 			two tables (src and dest): abort */
637 			src = NULL;
638 			goto func_exit;
639 		}
640 
641 		/* Check that the source table is locked by
642 		LOCK_IX or LOCK_IS. */
643 		lock_mode = lock_get_mode(lock);
644 		if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) {
645 			if (*mode != LOCK_NONE && *mode != lock_mode) {
646 				/* There are multiple locks on src. */
647 				src = NULL;
648 				goto func_exit;
649 			}
650 			*mode = lock_mode;
651 		}
652 	}
653 
654 	if (!src) {
655 		/* No source table lock found: flag the situation to caller */
656 		src = dest;
657 	}
658 
659 func_exit:
660 	trx_mutex_exit(trx);
661 	return(src);
662 }
663 
664 /*********************************************************************//**
665 Determine if the given table is exclusively "owned" by the given
666 transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
667 on the table.
668 @return TRUE if table is only locked by trx, with LOCK_IX, and
669 possibly LOCK_AUTO_INC */
670 ibool
lock_is_table_exclusive(const dict_table_t * table,const trx_t * trx)671 lock_is_table_exclusive(
672 /*====================*/
673 	const dict_table_t*	table,	/*!< in: table */
674 	const trx_t*		trx)	/*!< in: transaction */
675 {
676 	const lock_t*	lock;
677 	ibool		ok	= FALSE;
678 
679 	ut_ad(table);
680 	ut_ad(trx);
681 
682 	lock_mutex_enter();
683 
684 	for (lock = UT_LIST_GET_FIRST(table->locks);
685 	     lock != NULL;
686 	     lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) {
687 		if (lock->trx != trx) {
688 			/* A lock on the table is held
689 			by some other transaction. */
690 			goto not_ok;
691 		}
692 
693 		if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
694 			/* We are interested in table locks only. */
695 			continue;
696 		}
697 
698 		switch (lock_get_mode(lock)) {
699 		case LOCK_IX:
700 			ok = TRUE;
701 			break;
702 		case LOCK_AUTO_INC:
703 			/* It is allowed for trx to hold an
704 			auto_increment lock. */
705 			break;
706 		default:
707 not_ok:
708 			/* Other table locks than LOCK_IX are not allowed. */
709 			ok = FALSE;
710 			goto func_exit;
711 		}
712 	}
713 
714 func_exit:
715 	lock_mutex_exit();
716 
717 	return(ok);
718 }
719 
720 /*********************************************************************//**
721 Sets the wait flag of a lock and the back pointer in trx to lock. */
722 UNIV_INLINE
723 void
lock_set_lock_and_trx_wait(lock_t * lock,trx_t * trx)724 lock_set_lock_and_trx_wait(
725 /*=======================*/
726 	lock_t*	lock,	/*!< in: lock */
727 	trx_t*	trx)	/*!< in/out: trx */
728 {
729 	ut_ad(lock);
730 	ut_ad(lock->trx == trx);
731 	ut_ad(trx->lock.wait_lock == NULL);
732 	ut_ad(lock_mutex_own());
733 	ut_ad(trx_mutex_own(trx));
734 
735 	trx->lock.wait_lock = lock;
736 	lock->type_mode |= LOCK_WAIT;
737 }
738 
739 /**********************************************************************//**
740 The back pointer to a waiting lock request in the transaction is set to NULL
741 and the wait bit in lock type_mode is reset. */
742 UNIV_INLINE
743 void
lock_reset_lock_and_trx_wait(lock_t * lock)744 lock_reset_lock_and_trx_wait(
745 /*=========================*/
746 	lock_t*	lock)	/*!< in/out: record lock */
747 {
748 	ut_ad(lock->trx->lock.wait_lock == lock);
749 	ut_ad(lock_get_wait(lock));
750 	ut_ad(lock_mutex_own());
751 
752 	lock->trx->lock.wait_lock = NULL;
753 	lock->type_mode &= ~LOCK_WAIT;
754 }
755 
756 /*********************************************************************//**
757 Gets the gap flag of a record lock.
758 @return LOCK_GAP or 0 */
759 UNIV_INLINE
760 ulint
lock_rec_get_gap(const lock_t * lock)761 lock_rec_get_gap(
762 /*=============*/
763 	const lock_t*	lock)	/*!< in: record lock */
764 {
765 	ut_ad(lock);
766 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
767 
768 	return(lock->type_mode & LOCK_GAP);
769 }
770 
771 /*********************************************************************//**
772 Gets the LOCK_REC_NOT_GAP flag of a record lock.
773 @return LOCK_REC_NOT_GAP or 0 */
774 UNIV_INLINE
775 ulint
lock_rec_get_rec_not_gap(const lock_t * lock)776 lock_rec_get_rec_not_gap(
777 /*=====================*/
778 	const lock_t*	lock)	/*!< in: record lock */
779 {
780 	ut_ad(lock);
781 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
782 
783 	return(lock->type_mode & LOCK_REC_NOT_GAP);
784 }
785 
786 /*********************************************************************//**
787 Gets the waiting insert flag of a record lock.
788 @return LOCK_INSERT_INTENTION or 0 */
789 UNIV_INLINE
790 ulint
lock_rec_get_insert_intention(const lock_t * lock)791 lock_rec_get_insert_intention(
792 /*==========================*/
793 	const lock_t*	lock)	/*!< in: record lock */
794 {
795 	ut_ad(lock);
796 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
797 
798 	return(lock->type_mode & LOCK_INSERT_INTENTION);
799 }
800 
801 /*********************************************************************//**
802 Checks if a lock request for a new lock has to wait for request lock2.
803 @return TRUE if new lock has to wait for lock2 to be removed */
804 UNIV_INLINE
805 ibool
lock_rec_has_to_wait(const trx_t * trx,ulint type_mode,const lock_t * lock2,bool lock_is_on_supremum)806 lock_rec_has_to_wait(
807 /*=================*/
808 	const trx_t*	trx,	/*!< in: trx of new lock */
809 	ulint		type_mode,/*!< in: precise mode of the new lock
810 				to set: LOCK_S or LOCK_X, possibly
811 				ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
812 				LOCK_INSERT_INTENTION */
813 	const lock_t*	lock2,	/*!< in: another record lock; NOTE that
814 				it is assumed that this has a lock bit
815 				set on the same record as in the new
816 				lock we are setting */
817 	bool		lock_is_on_supremum)
818 				/*!< in: TRUE if we are setting the
819 				lock on the 'supremum' record of an
820 				index page: we know then that the lock
821 				request is really for a 'gap' type lock */
822 {
823 	ut_ad(trx && lock2);
824 	ut_ad(lock_get_type_low(lock2) == LOCK_REC);
825 
826 	if (trx != lock2->trx
827 	    && !lock_mode_compatible(static_cast<lock_mode>(
828 			             LOCK_MODE_MASK & type_mode),
829 				     lock_get_mode(lock2))) {
830 
831 		/* We have somewhat complex rules when gap type record locks
832 		cause waits */
833 
834 		if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
835 		    && !(type_mode & LOCK_INSERT_INTENTION)) {
836 
837 			/* Gap type locks without LOCK_INSERT_INTENTION flag
838 			do not need to wait for anything. This is because
839 			different users can have conflicting lock types
840 			on gaps. */
841 
842 			return(FALSE);
843 		}
844 
845 		if (!(type_mode & LOCK_INSERT_INTENTION)
846 		    && lock_rec_get_gap(lock2)) {
847 
848 			/* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
849 			does not need to wait for a gap type lock */
850 
851 			return(FALSE);
852 		}
853 
854 		if ((type_mode & LOCK_GAP)
855 		    && lock_rec_get_rec_not_gap(lock2)) {
856 
857 			/* Lock on gap does not need to wait for
858 			a LOCK_REC_NOT_GAP type lock */
859 
860 			return(FALSE);
861 		}
862 
863 		if (lock_rec_get_insert_intention(lock2)) {
864 
865 			/* No lock request needs to wait for an insert
866 			intention lock to be removed. This is ok since our
867 			rules allow conflicting locks on gaps. This eliminates
868 			a spurious deadlock caused by a next-key lock waiting
869 			for an insert intention lock; when the insert
870 			intention lock was granted, the insert deadlocked on
871 			the waiting next-key lock.
872 
873 			Also, insert intention locks do not disturb each
874 			other. */
875 
876 			return(FALSE);
877 		}
878 
879 		return(TRUE);
880 	}
881 
882 	return(FALSE);
883 }
884 
885 /*********************************************************************//**
886 Checks if a lock request lock1 has to wait for request lock2.
887 @return TRUE if lock1 has to wait for lock2 to be removed */
888 ibool
lock_has_to_wait(const lock_t * lock1,const lock_t * lock2)889 lock_has_to_wait(
890 /*=============*/
891 	const lock_t*	lock1,	/*!< in: waiting lock */
892 	const lock_t*	lock2)	/*!< in: another lock; NOTE that it is
893 				assumed that this has a lock bit set
894 				on the same record as in lock1 if the
895 				locks are record locks */
896 {
897 	ut_ad(lock1 && lock2);
898 
899 	if (lock1->trx != lock2->trx
900 	    && !lock_mode_compatible(lock_get_mode(lock1),
901 				     lock_get_mode(lock2))) {
902 		if (lock_get_type_low(lock1) == LOCK_REC) {
903 			ut_ad(lock_get_type_low(lock2) == LOCK_REC);
904 
905 			/* If this lock request is for a supremum record
906 			then the second bit on the lock bitmap is set */
907 
908 			if (lock1->type_mode
909 			    & (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
910 				return(lock_prdt_has_to_wait(
911 					lock1->trx, lock1->type_mode,
912 					lock_get_prdt_from_lock(lock1),
913 					lock2));
914 			} else {
915 				return(lock_rec_has_to_wait(
916 					lock1->trx, lock1->type_mode, lock2,
917 					lock_rec_get_nth_bit(lock1, true)));
918 			}
919 		}
920 
921 		return(TRUE);
922 	}
923 
924 	return(FALSE);
925 }
926 
927 /*============== RECORD LOCK BASIC FUNCTIONS ============================*/
928 
929 /**********************************************************************//**
930 Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
931 if none found.
932 @return bit index == heap number of the record, or ULINT_UNDEFINED if
933 none found */
934 ulint
lock_rec_find_set_bit(const lock_t * lock)935 lock_rec_find_set_bit(
936 /*==================*/
937 	const lock_t*	lock)	/*!< in: record lock with at least one bit set */
938 {
939 	for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
940 
941 		if (lock_rec_get_nth_bit(lock, i)) {
942 
943 			return(i);
944 		}
945 	}
946 
947 	return(ULINT_UNDEFINED);
948 }
949 
950 /** Reset the nth bit of a record lock.
951 @param[in,out] lock record lock
952 @param[in] i index of the bit that will be reset
953 @return previous value of the bit */
954 UNIV_INLINE
955 byte
lock_rec_reset_nth_bit(lock_t * lock,ulint i)956 lock_rec_reset_nth_bit(
957 	lock_t*	lock,
958 	ulint	i)
959 {
960 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
961 	ut_ad(i < lock->un_member.rec_lock.n_bits);
962 
963 	byte*	b = reinterpret_cast<byte*>(&lock[1]) + (i >> 3);
964 	byte	mask = 1 << (i & 7);
965 	byte	bit = *b & mask;
966 	*b &= ~mask;
967 
968 	if (bit != 0) {
969 		ut_ad(lock->trx->lock.n_rec_locks > 0);
970 		--lock->trx->lock.n_rec_locks;
971 	}
972 
973 	return(bit);
974 }
975 
976 /** Reset the nth bit of a record lock.
977 @param[in,out]	lock record lock
978 @param[in] i	index of the bit that will be reset
979 @param[in] type	whether the lock is in wait mode */
980 void
lock_rec_trx_wait(lock_t * lock,ulint i,ulint type)981 lock_rec_trx_wait(
982 	lock_t*	lock,
983 	ulint	i,
984 	ulint	type)
985 {
986 	lock_rec_reset_nth_bit(lock, i);
987 
988 	if (type & LOCK_WAIT) {
989 		lock_reset_lock_and_trx_wait(lock);
990 	}
991 }
992 
993 /*********************************************************************//**
994 Determines if there are explicit record locks on a page.
995 @return an explicit record lock on the page, or NULL if there are none */
996 lock_t*
lock_rec_expl_exist_on_page(ulint space,ulint page_no)997 lock_rec_expl_exist_on_page(
998 /*========================*/
999 	ulint	space,	/*!< in: space id */
1000 	ulint	page_no)/*!< in: page number */
1001 {
1002 	lock_t*	lock;
1003 
1004 	lock_mutex_enter();
1005 	/* Only used in ibuf pages, so rec_hash is good enough */
1006 	lock = lock_rec_get_first_on_page_addr(lock_sys->rec_hash,
1007 					       space, page_no);
1008 	lock_mutex_exit();
1009 
1010 	return(lock);
1011 }
1012 
1013 /*********************************************************************//**
1014 Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
1015 pointer in the transaction! This function is used in lock object creation
1016 and resetting. */
1017 static
1018 void
lock_rec_bitmap_reset(lock_t * lock)1019 lock_rec_bitmap_reset(
1020 /*==================*/
1021 	lock_t*	lock)	/*!< in: record lock */
1022 {
1023 	ulint	n_bytes;
1024 
1025 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
1026 
1027 	/* Reset to zero the bitmap which resides immediately after the lock
1028 	struct */
1029 
1030 	n_bytes = lock_rec_get_n_bits(lock) / 8;
1031 
1032 	ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
1033 
1034 	memset(&lock[1], 0, n_bytes);
1035 }
1036 
1037 /*********************************************************************//**
1038 Copies a record lock to heap.
1039 @return copy of lock */
1040 static
1041 lock_t*
lock_rec_copy(const lock_t * lock,mem_heap_t * heap)1042 lock_rec_copy(
1043 /*==========*/
1044 	const lock_t*	lock,	/*!< in: record lock */
1045 	mem_heap_t*	heap)	/*!< in: memory heap */
1046 {
1047 	ulint	size;
1048 
1049 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
1050 
1051 	size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
1052 
1053 	return(static_cast<lock_t*>(mem_heap_dup(heap, lock, size)));
1054 }
1055 
1056 /*********************************************************************//**
1057 Gets the previous record lock set on a record.
1058 @return previous lock on the same record, NULL if none exists */
1059 const lock_t*
lock_rec_get_prev(const lock_t * in_lock,ulint heap_no)1060 lock_rec_get_prev(
1061 /*==============*/
1062 	const lock_t*	in_lock,/*!< in: record lock */
1063 	ulint		heap_no)/*!< in: heap number of the record */
1064 {
1065 	lock_t*		lock;
1066 	ulint		space;
1067 	ulint		page_no;
1068 	lock_t*		found_lock	= NULL;
1069 	hash_table_t*	hash;
1070 
1071 	ut_ad(lock_mutex_own());
1072 	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
1073 
1074 	space = in_lock->un_member.rec_lock.space;
1075 	page_no = in_lock->un_member.rec_lock.page_no;
1076 
1077 	hash = lock_hash_get(in_lock->type_mode);
1078 
1079 	for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
1080 	     /* No op */;
1081 	     lock = lock_rec_get_next_on_page(lock)) {
1082 
1083 		ut_ad(lock);
1084 
1085 		if (lock == in_lock) {
1086 
1087 			return(found_lock);
1088 		}
1089 
1090 		if (lock_rec_get_nth_bit(lock, heap_no)) {
1091 
1092 			found_lock = lock;
1093 		}
1094 	}
1095 }
1096 
1097 /*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
1098 
1099 /*********************************************************************//**
1100 Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
1101 to precise_mode.
1102 @return lock or NULL */
1103 UNIV_INLINE
1104 lock_t*
lock_rec_has_expl(ulint precise_mode,const buf_block_t * block,ulint heap_no,const trx_t * trx)1105 lock_rec_has_expl(
1106 /*==============*/
1107 	ulint			precise_mode,/*!< in: LOCK_S or LOCK_X
1108 					possibly ORed to LOCK_GAP or
1109 					LOCK_REC_NOT_GAP, for a
1110 					supremum record we regard this
1111 					always a gap type request */
1112 	const buf_block_t*	block,	/*!< in: buffer block containing
1113 					the record */
1114 	ulint			heap_no,/*!< in: heap number of the record */
1115 	const trx_t*		trx)	/*!< in: transaction */
1116 {
1117 	lock_t*	lock;
1118 
1119 	ut_ad(lock_mutex_own());
1120 	ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
1121 	      || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
1122 	ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
1123 
1124 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
1125 	     lock != NULL;
1126 	     lock = lock_rec_get_next(heap_no, lock)) {
1127 
1128 		if (lock->trx == trx
1129 		    && !lock_rec_get_insert_intention(lock)
1130 		    && lock_mode_stronger_or_eq(
1131 			    lock_get_mode(lock),
1132 			    static_cast<lock_mode>(
1133 				    precise_mode & LOCK_MODE_MASK))
1134 		    && !lock_get_wait(lock)
1135 		    && (!lock_rec_get_rec_not_gap(lock)
1136 			|| (precise_mode & LOCK_REC_NOT_GAP)
1137 			|| heap_no == PAGE_HEAP_NO_SUPREMUM)
1138 		    && (!lock_rec_get_gap(lock)
1139 			|| (precise_mode & LOCK_GAP)
1140 			|| heap_no == PAGE_HEAP_NO_SUPREMUM)) {
1141 
1142 			return(lock);
1143 		}
1144 	}
1145 
1146 	return(NULL);
1147 }
1148 
1149 #ifdef UNIV_DEBUG
1150 /*********************************************************************//**
1151 Checks if some other transaction has a lock request in the queue.
1152 @return lock or NULL */
1153 static
1154 const lock_t*
lock_rec_other_has_expl_req(lock_mode mode,const buf_block_t * block,bool wait,ulint heap_no,const trx_t * trx)1155 lock_rec_other_has_expl_req(
1156 /*========================*/
1157 	lock_mode		mode,	/*!< in: LOCK_S or LOCK_X */
1158 	const buf_block_t*	block,	/*!< in: buffer block containing
1159 					the record */
1160 	bool			wait,	/*!< in: whether also waiting locks
1161 					are taken into account */
1162 	ulint			heap_no,/*!< in: heap number of the record */
1163 	const trx_t*		trx)	/*!< in: transaction, or NULL if
1164 					requests by all transactions
1165 					are taken into account */
1166 {
1167 
1168 	ut_ad(lock_mutex_own());
1169 	ut_ad(mode == LOCK_X || mode == LOCK_S);
1170 
1171 	/* Only GAP lock can be on SUPREMUM, and we are not looking for
1172 	GAP lock */
1173 	if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
1174 		return(NULL);
1175 	}
1176 
1177 	for (const lock_t* lock = lock_rec_get_first(lock_sys->rec_hash,
1178 						     block, heap_no);
1179 	     lock != NULL;
1180 	     lock = lock_rec_get_next_const(heap_no, lock)) {
1181 
1182 		if (lock->trx != trx
1183 		    && !lock_rec_get_gap(lock)
1184 		    && (wait || !lock_get_wait(lock))
1185 		    && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
1186 
1187 			return(lock);
1188 		}
1189 	}
1190 
1191 	return(NULL);
1192 }
1193 #endif /* UNIV_DEBUG */
1194 
1195 /*********************************************************************//**
1196 Checks if some other transaction has a conflicting explicit lock request
1197 in the queue, so that we have to wait.
1198 @return lock or NULL */
1199 static
1200 const lock_t*
lock_rec_other_has_conflicting(ulint mode,const buf_block_t * block,ulint heap_no,const trx_t * trx)1201 lock_rec_other_has_conflicting(
1202 /*===========================*/
1203 	ulint			mode,	/*!< in: LOCK_S or LOCK_X,
1204 					possibly ORed to LOCK_GAP or
1205 					LOC_REC_NOT_GAP,
1206 					LOCK_INSERT_INTENTION */
1207 	const buf_block_t*	block,	/*!< in: buffer block containing
1208 					the record */
1209 	ulint			heap_no,/*!< in: heap number of the record */
1210 	const trx_t*		trx)	/*!< in: our transaction */
1211 {
1212 	const lock_t*		lock;
1213 
1214 	ut_ad(lock_mutex_own());
1215 
1216 	bool	is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
1217 
1218 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
1219 	     lock != NULL;
1220 	     lock = lock_rec_get_next_const(heap_no, lock)) {
1221 
1222 		if (lock_rec_has_to_wait(trx, mode, lock, is_supremum)) {
1223 			return(lock);
1224 		}
1225 	}
1226 
1227 	return(NULL);
1228 }
1229 
1230 /*********************************************************************//**
1231 Checks if some transaction has an implicit x-lock on a record in a secondary
1232 index.
1233 @return transaction id of the transaction which has the x-lock, or 0;
1234 NOTE that this function can return false positives but never false
1235 negatives. The caller must confirm all positive results by calling
1236 trx_is_active(). */
1237 static
1238 trx_t*
lock_sec_rec_some_has_impl(const rec_t * rec,dict_index_t * index,const ulint * offsets)1239 lock_sec_rec_some_has_impl(
1240 /*=======================*/
1241 	const rec_t*	rec,	/*!< in: user record */
1242 	dict_index_t*	index,	/*!< in: secondary index */
1243 	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
1244 {
1245 	trx_t*		trx;
1246 	trx_id_t	max_trx_id;
1247 	const page_t*	page = page_align(rec);
1248 
1249 	ut_ad(!lock_mutex_own());
1250 	ut_ad(!trx_sys_mutex_own());
1251 	ut_ad(!dict_index_is_clust(index));
1252 	ut_ad(page_rec_is_user_rec(rec));
1253 	ut_ad(rec_offs_validate(rec, index, offsets));
1254 
1255 	max_trx_id = page_get_max_trx_id(page);
1256 
1257 	/* Some transaction may have an implicit x-lock on the record only
1258 	if the max trx id for the page >= min trx id for the trx list, or
1259 	database recovery is running. We do not write the changes of a page
1260 	max trx id to the log, and therefore during recovery, this value
1261 	for a page may be incorrect. */
1262 
1263 	if (max_trx_id < trx_rw_min_trx_id() && !recv_recovery_is_on()) {
1264 
1265 		trx = 0;
1266 
1267 	} else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) {
1268 
1269 		/* The page is corrupt: try to avoid a crash by returning 0 */
1270 		trx = 0;
1271 
1272 	/* In this case it is possible that some transaction has an implicit
1273 	x-lock. We have to look in the clustered index. */
1274 
1275 	} else {
1276 		trx = row_vers_impl_x_locked(rec, index, offsets);
1277 	}
1278 
1279 	return(trx);
1280 }
1281 
1282 #ifdef UNIV_DEBUG
1283 /*********************************************************************//**
1284 Checks if some transaction, other than given trx_id, has an explicit
1285 lock on the given rec, in the given precise_mode.
1286 @return	the transaction, whose id is not equal to trx_id, that has an
1287 explicit lock on the given rec, in the given precise_mode or NULL.*/
1288 static
1289 trx_t*
lock_rec_other_trx_holds_expl(ulint precise_mode,trx_t * trx,const rec_t * rec,const buf_block_t * block)1290 lock_rec_other_trx_holds_expl(
1291 /*==========================*/
1292 	ulint			precise_mode,	/*!< in: LOCK_S or LOCK_X
1293 						possibly ORed to LOCK_GAP or
1294 						LOCK_REC_NOT_GAP. */
1295 	trx_t*			trx,		/*!< in: trx holding implicit
1296 						lock on rec */
1297 	const rec_t*		rec,		/*!< in: user record */
1298 	const buf_block_t*	block)		/*!< in: buffer block
1299 						containing the record */
1300 {
1301 	trx_t* holds = NULL;
1302 
1303 	lock_mutex_enter();
1304 
1305 	if (trx_t* impl_trx = trx_rw_is_active(trx->id, NULL, false)) {
1306 		ulint heap_no = page_rec_get_heap_no(rec);
1307 		mutex_enter(&trx_sys->mutex);
1308 
1309 		for (trx_t* t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
1310 		     t != NULL;
1311 		     t = UT_LIST_GET_NEXT(trx_list, t)) {
1312 
1313 			lock_t* expl_lock = lock_rec_has_expl(
1314 				precise_mode, block, heap_no, t);
1315 
1316 			if (expl_lock && expl_lock->trx != impl_trx) {
1317 				/* An explicit lock is held by trx other than
1318 				the trx holding the implicit lock. */
1319 				holds = expl_lock->trx;
1320 				break;
1321 			}
1322 		}
1323 
1324 		mutex_exit(&trx_sys->mutex);
1325 	}
1326 
1327 	lock_mutex_exit();
1328 
1329 	return(holds);
1330 }
1331 #endif /* UNIV_DEBUG */
1332 
1333 /*********************************************************************//**
1334 Return approximate number or record locks (bits set in the bitmap) for
1335 this transaction. Since delete-marked records may be removed, the
1336 record count will not be precise.
1337 The caller must be holding lock_sys->mutex. */
1338 ulint
lock_number_of_rows_locked(const trx_lock_t * trx_lock)1339 lock_number_of_rows_locked(
1340 /*=======================*/
1341 	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
1342 {
1343 	ut_ad(lock_mutex_own());
1344 
1345 	return(trx_lock->n_rec_locks);
1346 }
1347 
1348 /*********************************************************************//**
1349 Return the number of table locks for a transaction.
1350 The caller must be holding lock_sys->mutex. */
1351 ulint
lock_number_of_tables_locked(const trx_lock_t * trx_lock)1352 lock_number_of_tables_locked(
1353 /*=========================*/
1354 	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
1355 {
1356 	const lock_t*	lock;
1357 	ulint		n_tables = 0;
1358 
1359 	ut_ad(lock_mutex_own());
1360 
1361 	for (lock = UT_LIST_GET_FIRST(trx_lock->trx_locks);
1362 	     lock != NULL;
1363 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
1364 
1365 		if (lock_get_type_low(lock) == LOCK_TABLE) {
1366 			n_tables++;
1367 		}
1368 	}
1369 
1370 	return(n_tables);
1371 }
1372 
1373 /*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
1374 
1375 /**
1376 Check of the lock is on m_rec_id.
1377 @param[in] lock			Lock to compare with
1378 @return true if the record lock is on m_rec_id*/
1379 /**
1380 @param[in] rhs			Lock to compare with
1381 @return true if the record lock equals rhs */
1382 bool
is_on_row(const lock_t * lock) const1383 RecLock::is_on_row(const lock_t* lock) const
1384 {
1385 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
1386 
1387 	const lock_rec_t&	other = lock->un_member.rec_lock;
1388 
1389 	return(other.space == m_rec_id.m_space_id
1390 	       && other.page_no == m_rec_id.m_page_no
1391 	       && lock_rec_get_nth_bit(lock, m_rec_id.m_heap_no));
1392 }
1393 
1394 /**
1395 Do some checks and prepare for creating a new record lock */
1396 void
prepare() const1397 RecLock::prepare() const
1398 {
1399 	ut_ad(lock_mutex_own());
1400 	ut_ad(m_trx == thr_get_trx(m_thr));
1401 
1402 	/* Test if there already is some other reason to suspend thread:
1403 	we do not enqueue a lock request if the query thread should be
1404 	stopped anyway */
1405 
1406 	if (que_thr_stop(m_thr)) {
1407 		ut_error;
1408 	}
1409 
1410 	switch (trx_get_dict_operation(m_trx)) {
1411 	case TRX_DICT_OP_NONE:
1412 		break;
1413 	case TRX_DICT_OP_TABLE:
1414 	case TRX_DICT_OP_INDEX:
1415 		ib::error() << "A record lock wait happens in a dictionary"
1416 			" operation. index " << m_index->name
1417 			<< " of table " << m_index->table->name
1418 			<< ". " << BUG_REPORT_MSG;
1419 		ut_ad(0);
1420 	}
1421 
1422 	ut_ad(m_index->table->n_ref_count > 0
1423 	      || !m_index->table->can_be_evicted);
1424 }
1425 
1426 /**
1427 Create the lock instance
1428 @param[in, out] trx	The transaction requesting the lock
1429 @param[in, out] index	Index on which record lock is required
1430 @param[in] mode		The lock mode desired
1431 @param[in] rec_id	The record id
1432 @param[in] size		Size of the lock + bitmap requested
1433 @return a record lock instance */
1434 lock_t*
lock_alloc(trx_t * trx,dict_index_t * index,ulint mode,const RecID & rec_id,ulint size)1435 RecLock::lock_alloc(
1436 	trx_t*		trx,
1437 	dict_index_t*	index,
1438 	ulint		mode,
1439 	const RecID&	rec_id,
1440 	ulint		size)
1441 {
1442 	ut_ad(lock_mutex_own());
1443 
1444 	lock_t*	lock;
1445 
1446 	if (trx->lock.rec_cached >= trx->lock.rec_pool.size()
1447 	    || sizeof(*lock) + size > REC_LOCK_SIZE) {
1448 
1449 		ulint		n_bytes = size + sizeof(*lock);
1450 		mem_heap_t*	heap = trx->lock.lock_heap;
1451 
1452 		lock = reinterpret_cast<lock_t*>(mem_heap_alloc(heap, n_bytes));
1453 	} else {
1454 
1455 		lock = trx->lock.rec_pool[trx->lock.rec_cached];
1456 		++trx->lock.rec_cached;
1457 	}
1458 
1459 	lock->trx = trx;
1460 
1461 	lock->index = index;
1462 
1463 	/* Setup the lock attributes */
1464 
1465 	lock->type_mode = LOCK_REC | (mode & ~LOCK_TYPE_MASK);
1466 
1467 	lock_rec_t&	rec_lock = lock->un_member.rec_lock;
1468 
1469 	/* Predicate lock always on INFIMUM (0) */
1470 
1471 	if (is_predicate_lock(mode)) {
1472 
1473 		rec_lock.n_bits = 8;
1474 
1475 		memset(&lock[1], 0x0, 1);
1476 
1477 	} else {
1478 		ut_ad(8 * size < UINT32_MAX);
1479 		rec_lock.n_bits = static_cast<uint32_t>(8 * size);
1480 
1481 		memset(&lock[1], 0x0, size);
1482 	}
1483 
1484 	rec_lock.space = rec_id.m_space_id;
1485 
1486 	rec_lock.page_no = rec_id.m_page_no;
1487 
1488 	/* Set the bit corresponding to rec */
1489 
1490 	lock_rec_set_nth_bit(lock, rec_id.m_heap_no);
1491 
1492 	MONITOR_INC(MONITOR_NUM_RECLOCK);
1493 
1494 	MONITOR_INC(MONITOR_RECLOCK_CREATED);
1495 
1496 	return(lock);
1497 }
1498 
1499 /**
1500 Add the lock to the record lock hash and the transaction's lock list
1501 @param[in,out] lock	Newly created record lock to add to the rec hash
1502 @param[in] add_to_hash	If the lock should be added to the hash table */
1503 void
lock_add(lock_t * lock,bool add_to_hash)1504 RecLock::lock_add(lock_t* lock, bool add_to_hash)
1505 {
1506 	ut_ad(lock_mutex_own());
1507 	ut_ad(trx_mutex_own(lock->trx));
1508 
1509 	if (add_to_hash) {
1510 		ulint	key = m_rec_id.fold();
1511 
1512 		++lock->index->table->n_rec_locks;
1513 
1514 		HASH_INSERT(lock_t, hash, lock_hash_get(m_mode), key, lock);
1515 	}
1516 
1517 	if (m_mode & LOCK_WAIT) {
1518 		lock_set_lock_and_trx_wait(lock, lock->trx);
1519 	}
1520 
1521 	UT_LIST_ADD_LAST(lock->trx->lock.trx_locks, lock);
1522 }
1523 
1524 /**
1525 Create a new lock.
1526 @param[in,out] trx		Transaction requesting the lock
1527 @param[in] owns_trx_mutex	true if caller owns the trx_t::mutex
1528 @param[in] add_to_hash		add the lock to hash table
1529 @param[in] prdt			Predicate lock (optional)
1530 @return a new lock instance */
1531 lock_t*
create(trx_t * trx,bool owns_trx_mutex,bool add_to_hash,const lock_prdt_t * prdt)1532 RecLock::create(
1533 	trx_t*	trx,
1534 	bool	owns_trx_mutex,
1535 	bool	add_to_hash,
1536 	const	lock_prdt_t* prdt)
1537 {
1538 	ut_ad(lock_mutex_own());
1539 	ut_ad(owns_trx_mutex == trx_mutex_own(trx));
1540 
1541 	/* Create the explicit lock instance and initialise it. */
1542 
1543 	lock_t*	lock = lock_alloc(trx, m_index, m_mode, m_rec_id, m_size);
1544 
1545 	if (prdt != NULL && (m_mode & LOCK_PREDICATE)) {
1546 
1547 		lock_prdt_set_prdt(lock, prdt);
1548 	}
1549 
1550 	/* Ensure that another transaction doesn't access the trx
1551 	lock state and lock data structures while we are adding the
1552 	lock and changing the transaction state to LOCK_WAIT */
1553 
1554 	if (!owns_trx_mutex) {
1555 		trx_mutex_enter(trx);
1556 	}
1557 
1558 	lock_add(lock, add_to_hash);
1559 
1560 	if (!owns_trx_mutex) {
1561 		trx_mutex_exit(trx);
1562 	}
1563 
1564 	return(lock);
1565 }
1566 
1567 /**
1568 Check the outcome of the deadlock check
1569 @param[in,out] victim_trx	Transaction selected for rollback
1570 @param[in,out] lock		Lock being requested
1571 @return DB_LOCK_WAIT, DB_DEADLOCK or DB_SUCCESS_LOCKED_REC */
1572 dberr_t
check_deadlock_result(const trx_t * victim_trx,lock_t * lock)1573 RecLock::check_deadlock_result(const trx_t* victim_trx, lock_t* lock)
1574 {
1575 	ut_ad(lock_mutex_own());
1576 	ut_ad(m_trx == lock->trx);
1577 	ut_ad(trx_mutex_own(m_trx));
1578 
1579 	if (victim_trx != NULL) {
1580 
1581 		ut_ad(victim_trx == m_trx);
1582 
1583 		lock_reset_lock_and_trx_wait(lock);
1584 
1585 		lock_rec_reset_nth_bit(lock, m_rec_id.m_heap_no);
1586 
1587 		return(DB_DEADLOCK);
1588 
1589 	} else if (m_trx->lock.wait_lock == NULL) {
1590 
1591 		/* If there was a deadlock but we chose another
1592 		transaction as a victim, it is possible that we
1593 		already have the lock now granted! */
1594 
1595 		return(DB_SUCCESS_LOCKED_REC);
1596 	}
1597 
1598 	return(DB_LOCK_WAIT);
1599 }
1600 
1601 /**
1602 Check and resolve any deadlocks
1603 @param[in, out] lock		The lock being acquired
1604 @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
1605 	DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
1606 	there was a deadlock, but another transaction was chosen
1607 	as a victim, and we got the lock immediately: no need to
1608 	wait then */
1609 dberr_t
deadlock_check(lock_t * lock)1610 RecLock::deadlock_check(lock_t* lock)
1611 {
1612 	ut_ad(lock_mutex_own());
1613 	ut_ad(lock->trx == m_trx);
1614 	ut_ad(trx_mutex_own(m_trx));
1615 
1616 	const trx_t*	victim_trx =
1617 			DeadlockChecker::check_and_resolve(lock, m_trx);
1618 
1619 	/* Check the outcome of the deadlock test. It is possible that
1620 	the transaction that blocked our lock was rolled back and we
1621 	were granted our lock. */
1622 
1623 	dberr_t	err = check_deadlock_result(victim_trx, lock);
1624 
1625 	if (err == DB_LOCK_WAIT) {
1626 
1627 		set_wait_state(lock);
1628 
1629 		MONITOR_INC(MONITOR_LOCKREC_WAIT);
1630 	}
1631 
1632 	return(err);
1633 }
1634 
1635 /**
1636 Collect the transactions that will need to be rolled back asynchronously
1637 @param[in, out] trx	Transaction to be rolled back */
1638 void
mark_trx_for_rollback(trx_t * trx)1639 RecLock::mark_trx_for_rollback(trx_t* trx)
1640 {
1641 	trx->abort = true;
1642 
1643 	ut_ad(!trx->read_only);
1644 	ut_ad(trx_mutex_own(m_trx));
1645 	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK));
1646 	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC));
1647 	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE));
1648 
1649 	/* Note that we will attempt an async rollback. The _ASYNC
1650 	flag will be cleared if the transaction is rolled back
1651 	synchronously before we get a chance to do it. */
1652 
1653 	trx->in_innodb |= TRX_FORCE_ROLLBACK | TRX_FORCE_ROLLBACK_ASYNC;
1654 
1655 	bool		cas;
1656 	os_thread_id_t	thread_id = os_thread_get_curr_id();
1657 
1658 	cas = os_compare_and_swap_thread_id(&trx->killed_by, 0, thread_id);
1659 
1660 	ut_a(cas);
1661 
1662 	m_trx->hit_list.push_back(hit_list_t::value_type(trx));
1663 
1664 #ifdef UNIV_DEBUG
1665 	THD*	thd = trx->mysql_thd;
1666 
1667 	if (thd != NULL) {
1668 
1669 		char	buffer[1024];
1670 		ib::info() << "Blocking transaction: ID: " << trx->id << " - "
1671 			<< " Blocked transaction ID: "<< m_trx->id << " - "
1672 			<< thd_security_context(thd, buffer, sizeof(buffer),
1673 						512);
1674 	}
1675 #endif /* UNIV_DEBUG */
1676 }
1677 
1678 /**
1679 Setup the requesting transaction state for lock grant
1680 @param[in,out] lock		Lock for which to change state */
1681 void
set_wait_state(lock_t * lock)1682 RecLock::set_wait_state(lock_t* lock)
1683 {
1684 	ut_ad(lock_mutex_own());
1685 	ut_ad(m_trx == lock->trx);
1686 	ut_ad(trx_mutex_own(m_trx));
1687 	ut_ad(lock_get_wait(lock));
1688 
1689 	m_trx->lock.wait_started = ut_time();
1690 
1691 	m_trx->lock.que_state = TRX_QUE_LOCK_WAIT;
1692 
1693 	m_trx->lock.was_chosen_as_deadlock_victim = false;
1694 
1695 	bool	stopped = que_thr_stop(m_thr);
1696 	ut_a(stopped);
1697 }
1698 
1699 /**
1700 Enqueue a lock wait for normal transaction. If it is a high priority transaction
1701 then jump the record lock wait queue and if the transaction at the head of the
1702 queue is itself waiting roll it back, also do a deadlock check and resolve.
1703 @param[in, out] wait_for	The lock that the joining transaction is
1704 				waiting for
1705 @param[in] prdt			Predicate [optional]
1706 @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
1707 	DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
1708 	there was a deadlock, but another transaction was chosen
1709 	as a victim, and we got the lock immediately: no need to
1710 	wait then */
1711 dberr_t
add_to_waitq(const lock_t * wait_for,const lock_prdt_t * prdt)1712 RecLock::add_to_waitq(const lock_t* wait_for, const lock_prdt_t* prdt)
1713 {
1714 	ut_ad(lock_mutex_own());
1715 	ut_ad(m_trx == thr_get_trx(m_thr));
1716 	ut_ad(trx_mutex_own(m_trx));
1717 
1718 	DEBUG_SYNC_C("rec_lock_add_to_waitq");
1719 
1720 	m_mode |= LOCK_WAIT;
1721 
1722 	/* Do the preliminary checks, and set query thread state */
1723 
1724 	prepare();
1725 
1726 	bool	high_priority = trx_is_high_priority(m_trx);
1727 
1728 	/* Don't queue the lock to hash table, if high priority transaction. */
1729 	lock_t*	lock = create(m_trx, true, !high_priority, prdt);
1730 
1731 	/* Attempt to jump over the low priority waiting locks. */
1732 	if (high_priority && jump_queue(lock, wait_for)) {
1733 
1734 		/* Lock is granted */
1735 		return(DB_SUCCESS);
1736 	}
1737 
1738 	ut_ad(lock_get_wait(lock));
1739 
1740 	dberr_t	err = deadlock_check(lock);
1741 
1742 	ut_ad(trx_mutex_own(m_trx));
1743 
1744 	/* m_trx->mysql_thd is NULL if it's an internal trx. So current_thd is used */
1745 	if (err == DB_LOCK_WAIT) {
1746 		thd_report_row_lock_wait(current_thd, wait_for->trx->mysql_thd);
1747 	}
1748 	return(err);
1749 }
1750 
1751 /*********************************************************************//**
1752 Adds a record lock request in the record queue. The request is normally
1753 added as the last in the queue, but if there are no waiting lock requests
1754 on the record, and the request to be added is not a waiting request, we
1755 can reuse a suitable record lock object already existing on the same page,
1756 just setting the appropriate bit in its bitmap. This is a low-level function
1757 which does NOT check for deadlocks or lock compatibility!
1758 @return lock where the bit was set */
1759 static
1760 void
lock_rec_add_to_queue(ulint type_mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,trx_t * trx,bool caller_owns_trx_mutex)1761 lock_rec_add_to_queue(
1762 /*==================*/
1763 	ulint			type_mode,/*!< in: lock mode, wait, gap
1764 					etc. flags; type is ignored
1765 					and replaced by LOCK_REC */
1766 	const buf_block_t*	block,	/*!< in: buffer block containing
1767 					the record */
1768 	ulint			heap_no,/*!< in: heap number of the record */
1769 	dict_index_t*		index,	/*!< in: index of record */
1770 	trx_t*			trx,	/*!< in/out: transaction */
1771 	bool			caller_owns_trx_mutex)
1772 					/*!< in: TRUE if caller owns the
1773 					transaction mutex */
1774 {
1775 #ifdef UNIV_DEBUG
1776 	ut_ad(lock_mutex_own());
1777 	ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
1778 	ut_ad(dict_index_is_clust(index)
1779 	      || dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
1780 	switch (type_mode & LOCK_MODE_MASK) {
1781 	case LOCK_X:
1782 	case LOCK_S:
1783 		break;
1784 	default:
1785 		ut_error;
1786 	}
1787 
1788 	if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
1789 		lock_mode	mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
1790 			? LOCK_X
1791 			: LOCK_S;
1792 		const lock_t*	other_lock
1793 			= lock_rec_other_has_expl_req(
1794 				mode, block, false, heap_no, trx);
1795 		ut_a(!other_lock);
1796 	}
1797 #endif /* UNIV_DEBUG */
1798 
1799 	type_mode |= LOCK_REC;
1800 
1801 	/* If rec is the supremum record, then we can reset the gap bit, as
1802 	all locks on the supremum are automatically of the gap type, and we
1803 	try to avoid unnecessary memory consumption of a new record lock
1804 	struct for a gap type lock */
1805 
1806 	if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
1807 		ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
1808 
1809 		/* There should never be LOCK_REC_NOT_GAP on a supremum
1810 		record, but let us play safe */
1811 
1812 		type_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
1813 	}
1814 
1815 	lock_t*		lock;
1816 	lock_t*		first_lock;
1817 	hash_table_t*	hash = lock_hash_get(type_mode);
1818 
1819 	/* Look for a waiting lock request on the same record or on a gap */
1820 
1821 	for (first_lock = lock = lock_rec_get_first_on_page(hash, block);
1822 	     lock != NULL;
1823 	     lock = lock_rec_get_next_on_page(lock)) {
1824 
1825 		if (lock_get_wait(lock)
1826 		    && lock_rec_get_nth_bit(lock, heap_no)) {
1827 
1828 			break;
1829 		}
1830 	}
1831 
1832 	if (lock == NULL && !(type_mode & LOCK_WAIT)) {
1833 
1834 		/* Look for a similar record lock on the same page:
1835 		if one is found and there are no waiting lock requests,
1836 		we can just set the bit */
1837 
1838 		lock = lock_rec_find_similar_on_page(
1839 			type_mode, heap_no, first_lock, trx);
1840 
1841 		if (lock != NULL) {
1842 
1843 			lock_rec_set_nth_bit(lock, heap_no);
1844 
1845 			return;
1846 		}
1847 	}
1848 
1849 	RecLock		rec_lock(index, block, heap_no, type_mode);
1850 
1851 	rec_lock.create(trx, caller_owns_trx_mutex, true);
1852 }
1853 
1854 /*********************************************************************//**
1855 This is a fast routine for locking a record in the most common cases:
1856 there are no explicit locks on the page, or there is just one lock, owned
1857 by this transaction, and of the right type_mode. This is a low-level function
1858 which does NOT look at implicit locks! Checks lock compatibility within
1859 explicit locks. This function sets a normal next-key lock, or in the case of
1860 a page supremum record, a gap type lock.
1861 @return whether the locking succeeded */
1862 UNIV_INLINE
1863 lock_rec_req_status
lock_rec_lock_fast(bool impl,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,que_thr_t * thr)1864 lock_rec_lock_fast(
1865 /*===============*/
1866 	bool			impl,	/*!< in: if TRUE, no lock is set
1867 					if no wait is necessary: we
1868 					assume that the caller will
1869 					set an implicit lock */
1870 	ulint			mode,	/*!< in: lock mode: LOCK_X or
1871 					LOCK_S possibly ORed to either
1872 					LOCK_GAP or LOCK_REC_NOT_GAP */
1873 	const buf_block_t*	block,	/*!< in: buffer block containing
1874 					the record */
1875 	ulint			heap_no,/*!< in: heap number of record */
1876 	dict_index_t*		index,	/*!< in: index of record */
1877 	que_thr_t*		thr)	/*!< in: query thread */
1878 {
1879 	ut_ad(lock_mutex_own());
1880 	ut_ad(!srv_read_only_mode);
1881 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
1882 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
1883 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
1884 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)
1885 	      || srv_read_only_mode);
1886 	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
1887 	      || (LOCK_MODE_MASK & mode) == LOCK_X);
1888 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
1889 	      || mode - (LOCK_MODE_MASK & mode) == 0
1890 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
1891 	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
1892 
1893 	DBUG_EXECUTE_IF("innodb_report_deadlock", return(LOCK_REC_FAIL););
1894 
1895 	lock_t*	lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
1896 
1897 	trx_t*	trx = thr_get_trx(thr);
1898 
1899 	lock_rec_req_status	status = LOCK_REC_SUCCESS;
1900 
1901 	if (lock == NULL) {
1902 
1903 		if (!impl) {
1904 			RecLock	rec_lock(index, block, heap_no, mode);
1905 
1906 			/* Note that we don't own the trx mutex. */
1907 			rec_lock.create(trx, false, true);
1908 		}
1909 
1910 		status = LOCK_REC_SUCCESS_CREATED;
1911 	} else {
1912 		trx_mutex_enter(trx);
1913 
1914 		if (lock_rec_get_next_on_page(lock)
1915 		     || lock->trx != trx
1916 		     || lock->type_mode != (mode | LOCK_REC)
1917 		     || lock_rec_get_n_bits(lock) <= heap_no) {
1918 
1919 			status = LOCK_REC_FAIL;
1920 		} else if (!impl) {
1921 			/* If the nth bit of the record lock is already set
1922 			then we do not set a new lock bit, otherwise we do
1923 			set */
1924 			if (!lock_rec_get_nth_bit(lock, heap_no)) {
1925 				lock_rec_set_nth_bit(lock, heap_no);
1926 				status = LOCK_REC_SUCCESS_CREATED;
1927 			}
1928 		}
1929 
1930 		trx_mutex_exit(trx);
1931 	}
1932 
1933 	return(status);
1934 }
1935 
1936 /*********************************************************************//**
1937 This is the general, and slower, routine for locking a record. This is a
1938 low-level function which does NOT look at implicit locks! Checks lock
1939 compatibility within explicit locks. This function sets a normal next-key
1940 lock, or in the case of a page supremum record, a gap type lock.
1941 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
1942 or DB_QUE_THR_SUSPENDED */
1943 static
1944 dberr_t
lock_rec_lock_slow(ibool impl,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,que_thr_t * thr)1945 lock_rec_lock_slow(
1946 /*===============*/
1947 	ibool			impl,	/*!< in: if TRUE, no lock is set
1948 					if no wait is necessary: we
1949 					assume that the caller will
1950 					set an implicit lock */
1951 	ulint			mode,	/*!< in: lock mode: LOCK_X or
1952 					LOCK_S possibly ORed to either
1953 					LOCK_GAP or LOCK_REC_NOT_GAP */
1954 	const buf_block_t*	block,	/*!< in: buffer block containing
1955 					the record */
1956 	ulint			heap_no,/*!< in: heap number of record */
1957 	dict_index_t*		index,	/*!< in: index of record */
1958 	que_thr_t*		thr)	/*!< in: query thread */
1959 {
1960 	ut_ad(lock_mutex_own());
1961 	ut_ad(!srv_read_only_mode);
1962 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
1963 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
1964 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
1965 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
1966 	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
1967 	      || (LOCK_MODE_MASK & mode) == LOCK_X);
1968 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
1969 	      || mode - (LOCK_MODE_MASK & mode) == 0
1970 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
1971 	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
1972 
1973 	DBUG_EXECUTE_IF("innodb_report_deadlock", return(DB_DEADLOCK););
1974 
1975 	dberr_t	err;
1976 	trx_t*	trx = thr_get_trx(thr);
1977 
1978 	trx_mutex_enter(trx);
1979 
1980 	if (lock_rec_has_expl(mode, block, heap_no, trx)) {
1981 
1982 		/* The trx already has a strong enough lock on rec: do
1983 		nothing */
1984 
1985 		err = DB_SUCCESS;
1986 
1987 	} else {
1988 
1989 		const lock_t* wait_for = lock_rec_other_has_conflicting(
1990 			mode, block, heap_no, trx);
1991 
1992 		if (wait_for != NULL) {
1993 
1994 			/* If another transaction has a non-gap conflicting
1995 			request in the queue, as this transaction does not
1996 			have a lock strong enough already granted on the
1997 			record, we may have to wait. */
1998 
1999 			RecLock	rec_lock(thr, index, block, heap_no, mode);
2000 
2001 			err = rec_lock.add_to_waitq(wait_for);
2002 
2003 		} else if (!impl) {
2004 
2005 			/* Set the requested lock on the record, note that
2006 			we already own the transaction mutex. */
2007 
2008 			lock_rec_add_to_queue(
2009 				LOCK_REC | mode, block, heap_no, index, trx,
2010 				true);
2011 
2012 			err = DB_SUCCESS_LOCKED_REC;
2013 		} else {
2014 			err = DB_SUCCESS;
2015 		}
2016 	}
2017 
2018 	trx_mutex_exit(trx);
2019 
2020 	return(err);
2021 }
2022 
2023 /*********************************************************************//**
2024 Tries to lock the specified record in the mode requested. If not immediately
2025 possible, enqueues a waiting lock request. This is a low-level function
2026 which does NOT look at implicit locks! Checks lock compatibility within
2027 explicit locks. This function sets a normal next-key lock, or in the case
2028 of a page supremum record, a gap type lock.
2029 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
2030 or DB_QUE_THR_SUSPENDED */
2031 static
2032 dberr_t
lock_rec_lock(bool impl,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,que_thr_t * thr)2033 lock_rec_lock(
2034 /*==========*/
2035 	bool			impl,	/*!< in: if true, no lock is set
2036 					if no wait is necessary: we
2037 					assume that the caller will
2038 					set an implicit lock */
2039 	ulint			mode,	/*!< in: lock mode: LOCK_X or
2040 					LOCK_S possibly ORed to either
2041 					LOCK_GAP or LOCK_REC_NOT_GAP */
2042 	const buf_block_t*	block,	/*!< in: buffer block containing
2043 					the record */
2044 	ulint			heap_no,/*!< in: heap number of record */
2045 	dict_index_t*		index,	/*!< in: index of record */
2046 	que_thr_t*		thr)	/*!< in: query thread */
2047 {
2048 	ut_ad(lock_mutex_own());
2049 	ut_ad(!srv_read_only_mode);
2050 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
2051 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
2052 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
2053 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
2054 	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
2055 	      || (LOCK_MODE_MASK & mode) == LOCK_X);
2056 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
2057 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
2058 	      || mode - (LOCK_MODE_MASK & mode) == 0);
2059 	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
2060 
2061 	/* We try a simplified and faster subroutine for the most
2062 	common cases */
2063 	switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
2064 	case LOCK_REC_SUCCESS:
2065 		return(DB_SUCCESS);
2066 	case LOCK_REC_SUCCESS_CREATED:
2067 		return(DB_SUCCESS_LOCKED_REC);
2068 	case LOCK_REC_FAIL:
2069 		return(lock_rec_lock_slow(impl, mode, block,
2070 					  heap_no, index, thr));
2071 	}
2072 
2073 	ut_error;
2074 	return(DB_ERROR);
2075 }
2076 
2077 /*********************************************************************//**
2078 Checks if a waiting record lock request still has to wait in a queue.
2079 @return lock that is causing the wait */
2080 static
2081 const lock_t*
lock_rec_has_to_wait_in_queue(const lock_t * wait_lock)2082 lock_rec_has_to_wait_in_queue(
2083 /*==========================*/
2084 	const lock_t*	wait_lock)	/*!< in: waiting record lock */
2085 {
2086 	const lock_t*	lock;
2087 	ulint		space;
2088 	ulint		page_no;
2089 	ulint		heap_no;
2090 	ulint		bit_mask;
2091 	ulint		bit_offset;
2092 	hash_table_t*	hash;
2093 
2094 	ut_ad(lock_mutex_own());
2095 	ut_ad(lock_get_wait(wait_lock));
2096 	ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
2097 
2098 	space = wait_lock->un_member.rec_lock.space;
2099 	page_no = wait_lock->un_member.rec_lock.page_no;
2100 	heap_no = lock_rec_find_set_bit(wait_lock);
2101 
2102 	bit_offset = heap_no / 8;
2103 	bit_mask = static_cast<ulint>(1 << (heap_no % 8));
2104 
2105 	hash = lock_hash_get(wait_lock->type_mode);
2106 
2107 	for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
2108 	     lock != wait_lock;
2109 	     lock = lock_rec_get_next_on_page_const(lock)) {
2110 
2111 		const byte*	p = (const byte*) &lock[1];
2112 
2113 		if (heap_no < lock_rec_get_n_bits(lock)
2114 		    && (p[bit_offset] & bit_mask)
2115 		    && lock_has_to_wait(wait_lock, lock)) {
2116 
2117 			return(lock);
2118 		}
2119 	}
2120 
2121 	return(NULL);
2122 }
2123 
2124 /*************************************************************//**
2125 Grants a lock to a waiting lock request and releases the waiting transaction.
2126 The caller must hold lock_sys->mutex but not lock->trx->mutex. */
2127 static
2128 void
lock_grant(lock_t * lock)2129 lock_grant(
2130 /*=======*/
2131 	lock_t*	lock)	/*!< in/out: waiting lock request */
2132 {
2133 	ut_ad(lock_mutex_own());
2134 
2135 	lock_reset_lock_and_trx_wait(lock);
2136 
2137 	trx_mutex_enter(lock->trx);
2138 
2139 	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
2140 		dict_table_t*	table = lock->un_member.tab_lock.table;
2141 
2142 		if (table->autoinc_trx == lock->trx) {
2143 			ib::error() << "Transaction already had an"
2144 				<< " AUTO-INC lock!";
2145 		} else {
2146 			table->autoinc_trx = lock->trx;
2147 
2148 			ib_vector_push(lock->trx->autoinc_locks, &lock);
2149 		}
2150 	}
2151 
2152 	DBUG_PRINT("ib_lock", ("wait for trx " TRX_ID_FMT " ends",
2153 			       trx_get_id_for_print(lock->trx)));
2154 
2155 	/* If we are resolving a deadlock by choosing another transaction
2156 	as a victim, then our original transaction may not be in the
2157 	TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
2158 	for it */
2159 
2160 	if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
2161 		que_thr_t*	thr;
2162 
2163 		thr = que_thr_end_lock_wait(lock->trx);
2164 
2165 		if (thr != NULL) {
2166 			lock_wait_release_thread_if_suspended(thr);
2167 		}
2168 	}
2169 
2170 	trx_mutex_exit(lock->trx);
2171 }
2172 
2173 /**
2174 Jump the queue for the record over all low priority transactions and
2175 add the lock. If all current granted locks are compatible, grant the
2176 lock. Otherwise, mark all granted transaction for asynchronous
2177 rollback and add to hit list.
2178 @param[in, out]	lock		Lock being requested
2179 @param[in]	conflict_lock	First conflicting lock from the head
2180 @return true if the lock is granted */
2181 bool
jump_queue(lock_t * lock,const lock_t * conflict_lock)2182 RecLock::jump_queue(
2183 	lock_t*		lock,
2184 	const lock_t*	conflict_lock)
2185 {
2186 	ut_ad(m_trx == lock->trx);
2187 	ut_ad(trx_mutex_own(m_trx));
2188 	ut_ad(conflict_lock->trx != m_trx);
2189 	ut_ad(trx_is_high_priority(m_trx));
2190 	ut_ad(m_rec_id.m_heap_no != ULINT32_UNDEFINED);
2191 
2192 	bool	high_priority = false;
2193 
2194 	/* Find out the position to add the lock. If there are other high
2195 	priority transactions in waiting state then we should add it after
2196 	the last high priority transaction. Otherwise, we can add it after
2197 	the last granted lock jumping over the wait queue. */
2198 	bool grant_lock = lock_add_priority(lock, conflict_lock,
2199 					    &high_priority);
2200 
2201 	if (grant_lock) {
2202 
2203 		ut_ad(conflict_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT);
2204 		ut_ad(conflict_lock->trx->lock.wait_lock == conflict_lock);
2205 
2206 #ifdef UNIV_DEBUG
2207 		ib::info() << "Granting High Priority Transaction (ID): "
2208 			   << lock->trx->id << " the lock jumping over"
2209 			   << " waiting Transaction (ID): "
2210 			   << conflict_lock->trx->id;
2211 #endif /* UNIV_DEBUG */
2212 
2213 		lock_reset_lock_and_trx_wait(lock);
2214 		return(true);
2215 	}
2216 
2217 	/* If another high priority transaction is found waiting
2218 	victim transactions are already marked for rollback. */
2219 	if (high_priority) {
2220 
2221 		return(false);
2222 	}
2223 
2224 	/* The lock is placed after the last granted lock in the queue. Check and add
2225 	low priority transactinos to hit list for ASYNC rollback. */
2226 	make_trx_hit_list(lock, conflict_lock);
2227 
2228 	return(false);
2229 }
2230 
2231 /** Find position in lock queue and add the high priority transaction
2232 lock. Intention and GAP only locks can be granted even if there are
2233 waiting locks in front of the queue. To add the High priority
2234 transaction in a safe position we keep the following rule.
2235 
2236 1. If the lock can be granted, add it before the first waiting lock
2237 in the queue so that all currently waiting locks need to do conflict
2238 check before getting granted.
2239 
2240 2. If the lock has to wait, add it after the last granted lock or the
2241 last waiting high priority transaction in the queue whichever is later.
2242 This ensures that the transaction is granted only after doing conflict
2243 check with all granted transactions.
2244 @param[in]	lock		Lock being requested
2245 @param[in]	conflict_lock	First conflicting lock from the head
2246 @param[out]	high_priority	high priority transaction ahead in queue
2247 @return true if the lock can be granted */
2248 bool
lock_add_priority(lock_t * lock,const lock_t * conflict_lock,bool * high_priority)2249 RecLock::lock_add_priority(
2250 	lock_t*		lock,
2251 	const lock_t*	conflict_lock,
2252 	bool*		high_priority)
2253 {
2254 	ut_ad(high_priority);
2255 
2256 	*high_priority = false;
2257 
2258 	/* If the first conflicting lock is waiting for the current row,
2259 	then all other granted locks are compatible and the lock can be
2260 	directly granted if no other high priority transactions are
2261 	waiting. We need to recheck with all granted transaction as there
2262 	could be granted GAP or Intention locks down the queue. */
2263 	bool	grant_lock = (conflict_lock->is_waiting());
2264 	lock_t*	lock_head = NULL;
2265 	lock_t*	grant_position = NULL;
2266 	lock_t*	add_position = NULL;
2267 
2268 	/* Different lock (such as predicate lock) are on different hash */
2269 	hash_table_t*	lock_hash = lock_hash_get(m_mode);
2270 
2271 	HASH_SEARCH(hash, lock_hash, m_rec_id.fold(), lock_t*,
2272 		    lock_head, ut_ad(lock_head->is_record_lock()), true);
2273 
2274 	ut_ad(lock_head);
2275 
2276 	for (lock_t* next = lock_head; next != NULL; next = next->hash) {
2277 
2278 		/* check only for locks on the current row */
2279 		if (!is_on_row(next)) {
2280 			continue;
2281 		}
2282 
2283 		if (next->is_waiting()) {
2284 			/* grant lock position is the granted lock just before
2285 			the first wait lock in the queue. */
2286 			if (grant_position == NULL) {
2287 				grant_position = add_position;
2288 			}
2289 
2290 			if (trx_is_high_priority(next->trx)) {
2291 
2292 				*high_priority = true;
2293 				grant_lock = false;
2294 				add_position = next;
2295 			}
2296 		} else {
2297 
2298 			add_position = next;
2299 			/* Cannot grant lock if there is any conflicting
2300 			granted lock. */
2301 			if (grant_lock && lock_has_to_wait(lock, next)) {
2302 				grant_lock = false;
2303 			}
2304 		}
2305 	}
2306 
2307 	/* If the lock is to be granted it is safe to add before the first
2308 	waiting lock in the queue. */
2309 	if (grant_lock) {
2310 
2311 		ut_ad(!lock_has_to_wait(lock, grant_position));
2312 		add_position = grant_position;
2313 	}
2314 
2315 	ut_ad(add_position != NULL);
2316 
2317 	/* Add the lock to lock hash table. */
2318 	lock->hash = add_position->hash;
2319 	add_position->hash = lock;
2320 	++lock->index->table->n_rec_locks;
2321 
2322 	return(grant_lock);
2323 }
2324 
2325 /** Iterate over the granted locks and prepare the hit list for ASYNC Rollback.
2326 If the transaction is waiting for some other lock then wake up with deadlock error.
2327 Currently we don't mark following transactions for ASYNC Rollback.
2328 1. Read only transactions
2329 2. Background transactions
2330 3. Other High priority transactions
2331 @param[in]	lock		Lock being requested
2332 @param[in]	conflict_lock	First conflicting lock from the head */
2333 void
make_trx_hit_list(lock_t * lock,const lock_t * conflict_lock)2334 RecLock::make_trx_hit_list(
2335 	lock_t*		lock,
2336 	const lock_t*	conflict_lock)
2337 {
2338 	const lock_t*	next;
2339 
2340 	for (next = conflict_lock; next != NULL; next = next->hash) {
2341 
2342 		/* All locks ahead in the queue are checked. */
2343 		if (next == lock) {
2344 
2345 			ut_ad(next->is_waiting());
2346 			break;
2347 		}
2348 
2349 		trx_t*	trx = next->trx;
2350 		/* Check only for conflicting, granted locks on the current row.
2351 		Currently, we don't rollback read only transactions, transactions
2352 		owned by background threads. */
2353 		if (trx == lock->trx
2354 		    || !is_on_row(next)
2355 		    || next->is_waiting()
2356 		    || trx->read_only
2357 		    || trx->mysql_thd == NULL
2358 		    || !lock_has_to_wait(lock, next)) {
2359 
2360 			continue;
2361 		}
2362 
2363 		trx_mutex_enter(trx);
2364 
2365 		/* Skip high priority transactions, if already marked for abort
2366 		by some other transaction or if ASYNC rollback is disabled. A
2367 		transaction must complete kill/abort of a victim transaction once
2368 		marked and added to hit list. */
2369 		if (trx_is_high_priority(trx)
2370 		    || (trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE) != 0
2371 		    || trx->abort) {
2372 
2373 			trx_mutex_exit(trx);
2374 			continue;
2375 		}
2376 
2377 		/* If the transaction is waiting on some other resource then
2378 		wake it up with DEAD_LOCK error so that it can rollback. */
2379 		if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
2380 
2381 			/* Assert that it is not waiting for current record. */
2382 			ut_ad(trx->lock.wait_lock != next);
2383 #ifdef UNIV_DEBUG
2384 			ib::info() << "High Priority Transaction (ID): "
2385 				   << lock->trx->id << " waking up blocking"
2386 				   << " transaction (ID): " << trx->id;
2387 #endif /* UNIV_DEBUG */
2388 			trx->lock.was_chosen_as_deadlock_victim = true;
2389 			lock_cancel_waiting_and_release(trx->lock.wait_lock);
2390 			trx_mutex_exit(trx);
2391 			continue;
2392 		}
2393 
2394 		/* Mark for ASYNC Rollback and add to hit list. */
2395 		mark_trx_for_rollback(trx);
2396 		trx_mutex_exit(trx);
2397 	}
2398 
2399 	ut_ad(next == lock);
2400 }
2401 
2402 /*************************************************************//**
2403 Cancels a waiting record lock request and releases the waiting transaction
2404 that requested it. NOTE: does NOT check if waiting lock requests behind this
2405 one can now be granted! */
2406 static
2407 void
lock_rec_cancel(lock_t * lock)2408 lock_rec_cancel(
2409 /*============*/
2410 	lock_t*	lock)	/*!< in: waiting record lock request */
2411 {
2412 	que_thr_t*	thr;
2413 
2414 	ut_ad(lock_mutex_own());
2415 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
2416 
2417 	/* Reset the bit (there can be only one set bit) in the lock bitmap */
2418 	lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
2419 
2420 	/* Reset the wait flag and the back pointer to lock in trx */
2421 
2422 	lock_reset_lock_and_trx_wait(lock);
2423 
2424 	/* The following function releases the trx from lock wait */
2425 
2426 	trx_mutex_enter(lock->trx);
2427 
2428 	thr = que_thr_end_lock_wait(lock->trx);
2429 
2430 	if (thr != NULL) {
2431 		lock_wait_release_thread_if_suspended(thr);
2432 	}
2433 
2434 	trx_mutex_exit(lock->trx);
2435 }
2436 
2437 /** Grant lock to waiting requests that no longer conflicts
2438 @param[in]	in_lock		record lock object: grant all non-conflicting
2439 				locks waiting behind this lock object */
2440 static
2441 void
lock_rec_grant(lock_t * in_lock)2442 lock_rec_grant(lock_t* in_lock)
2443 {
2444 	lock_t*		lock;
2445 
2446 	ulint		space = in_lock->space();
2447 	ulint		page_no = in_lock->page_number();
2448 	hash_table_t*	lock_hash = in_lock->hash_table();
2449 
2450 	/* Check if waiting locks in the queue can now be granted: grant
2451 	locks if there are no conflicting locks ahead. Stop at the first
2452 	X lock that is waiting or has been granted. */
2453 
2454 	for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
2455 	     lock != NULL;
2456 	     lock = lock_rec_get_next_on_page(lock)) {
2457 
2458 		if (lock_get_wait(lock)
2459 		    && !lock_rec_has_to_wait_in_queue(lock)) {
2460 
2461 			/* Grant the lock */
2462 			ut_ad(lock->trx != in_lock->trx);
2463 			lock_grant(lock);
2464 		}
2465 	}
2466 }
2467 
2468 /*************************************************************//**
2469 Removes a record lock request, waiting or granted, from the queue and
2470 grants locks to other transactions in the queue if they now are entitled
2471 to a lock. NOTE: all record locks contained in in_lock are removed. */
2472 void
lock_rec_dequeue_from_page(lock_t * in_lock)2473 lock_rec_dequeue_from_page(
2474 /*=======================*/
2475 	lock_t*		in_lock)	/*!< in: record lock object: all
2476 					record locks which are contained in
2477 					this lock object are removed;
2478 					transactions waiting behind will
2479 					get their lock requests granted,
2480 					if they are now qualified to it */
2481 {
2482 	ulint		space;
2483 	ulint		page_no;
2484 	trx_lock_t*	trx_lock;
2485 	hash_table_t*	lock_hash;
2486 
2487 	ut_ad(lock_mutex_own());
2488 	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2489 	/* We may or may not be holding in_lock->trx->mutex here. */
2490 
2491 	trx_lock = &in_lock->trx->lock;
2492 
2493 	space = in_lock->un_member.rec_lock.space;
2494 	page_no = in_lock->un_member.rec_lock.page_no;
2495 
2496 	ut_ad(in_lock->index->table->n_rec_locks > 0);
2497 	in_lock->index->table->n_rec_locks--;
2498 
2499 	lock_hash = lock_hash_get(in_lock->type_mode);
2500 
2501 	HASH_DELETE(lock_t, hash, lock_hash,
2502 		    lock_rec_fold(space, page_no), in_lock);
2503 
2504 	UT_LIST_REMOVE(trx_lock->trx_locks, in_lock);
2505 
2506 	MONITOR_INC(MONITOR_RECLOCK_REMOVED);
2507 	MONITOR_DEC(MONITOR_NUM_RECLOCK);
2508 
2509 	lock_rec_grant(in_lock);
2510 }
2511 
2512 /*************************************************************//**
2513 Removes a record lock request, waiting or granted, from the queue. */
2514 void
lock_rec_discard(lock_t * in_lock)2515 lock_rec_discard(
2516 /*=============*/
2517 	lock_t*		in_lock)	/*!< in: record lock object: all
2518 					record locks which are contained
2519 					in this lock object are removed */
2520 {
2521 	ulint		space;
2522 	ulint		page_no;
2523 	trx_lock_t*	trx_lock;
2524 
2525 	ut_ad(lock_mutex_own());
2526 	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2527 
2528 	trx_lock = &in_lock->trx->lock;
2529 
2530 	space = in_lock->un_member.rec_lock.space;
2531 	page_no = in_lock->un_member.rec_lock.page_no;
2532 
2533 	ut_ad(in_lock->index->table->n_rec_locks > 0);
2534 	in_lock->index->table->n_rec_locks--;
2535 
2536 	HASH_DELETE(lock_t, hash, lock_hash_get(in_lock->type_mode),
2537 			    lock_rec_fold(space, page_no), in_lock);
2538 
2539 	UT_LIST_REMOVE(trx_lock->trx_locks, in_lock);
2540 
2541 	MONITOR_INC(MONITOR_RECLOCK_REMOVED);
2542 	MONITOR_DEC(MONITOR_NUM_RECLOCK);
2543 }
2544 
2545 /*************************************************************//**
2546 Removes record lock objects set on an index page which is discarded. This
2547 function does not move locks, or check for waiting locks, therefore the
2548 lock bitmaps must already be reset when this function is called. */
2549 static
2550 void
lock_rec_free_all_from_discard_page_low(ulint space,ulint page_no,hash_table_t * lock_hash)2551 lock_rec_free_all_from_discard_page_low(
2552 /*====================================*/
2553 	ulint		space,
2554 	ulint		page_no,
2555 	hash_table_t*	lock_hash)
2556 {
2557 	lock_t*	lock;
2558 	lock_t*	next_lock;
2559 
2560 	lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
2561 
2562 	while (lock != NULL) {
2563 		ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
2564 		ut_ad(!lock_get_wait(lock));
2565 
2566 		next_lock = lock_rec_get_next_on_page(lock);
2567 
2568 		lock_rec_discard(lock);
2569 
2570 		lock = next_lock;
2571 	}
2572 }
2573 
2574 /*************************************************************//**
2575 Removes record lock objects set on an index page which is discarded. This
2576 function does not move locks, or check for waiting locks, therefore the
2577 lock bitmaps must already be reset when this function is called. */
2578 void
lock_rec_free_all_from_discard_page(const buf_block_t * block)2579 lock_rec_free_all_from_discard_page(
2580 /*================================*/
2581 	const buf_block_t*	block)	/*!< in: page to be discarded */
2582 {
2583 	ulint	space;
2584 	ulint	page_no;
2585 
2586 	ut_ad(lock_mutex_own());
2587 
2588 	space = block->page.id.space();
2589 	page_no = block->page.id.page_no();
2590 
2591 	lock_rec_free_all_from_discard_page_low(
2592 		space, page_no, lock_sys->rec_hash);
2593 	lock_rec_free_all_from_discard_page_low(
2594 		space, page_no, lock_sys->prdt_hash);
2595 	lock_rec_free_all_from_discard_page_low(
2596 		space, page_no, lock_sys->prdt_page_hash);
2597 }
2598 
2599 /*============= RECORD LOCK MOVING AND INHERITING ===================*/
2600 
2601 /*************************************************************//**
2602 Resets the lock bits for a single record. Releases transactions waiting for
2603 lock requests here. */
2604 static
2605 void
lock_rec_reset_and_release_wait_low(hash_table_t * hash,const buf_block_t * block,ulint heap_no)2606 lock_rec_reset_and_release_wait_low(
2607 /*================================*/
2608 	hash_table_t*		hash,	/*!< in: hash table */
2609 	const buf_block_t*	block,	/*!< in: buffer block containing
2610 					the record */
2611 	ulint			heap_no)/*!< in: heap number of record */
2612 {
2613 	lock_t*	lock;
2614 
2615 	ut_ad(lock_mutex_own());
2616 
2617 	for (lock = lock_rec_get_first(hash, block, heap_no);
2618 	     lock != NULL;
2619 	     lock = lock_rec_get_next(heap_no, lock)) {
2620 
2621 		if (lock_get_wait(lock)) {
2622 			lock_rec_cancel(lock);
2623 		} else {
2624 			lock_rec_reset_nth_bit(lock, heap_no);
2625 		}
2626 	}
2627 }
2628 
2629 /*************************************************************//**
2630 Resets the lock bits for a single record. Releases transactions waiting for
2631 lock requests here. */
2632 static
2633 void
lock_rec_reset_and_release_wait(const buf_block_t * block,ulint heap_no)2634 lock_rec_reset_and_release_wait(
2635 /*============================*/
2636 	const buf_block_t*	block,	/*!< in: buffer block containing
2637 					the record */
2638 	ulint			heap_no)/*!< in: heap number of record */
2639 {
2640 	lock_rec_reset_and_release_wait_low(
2641 		lock_sys->rec_hash, block, heap_no);
2642 
2643 	lock_rec_reset_and_release_wait_low(
2644 		lock_sys->prdt_hash, block, PAGE_HEAP_NO_INFIMUM);
2645 	lock_rec_reset_and_release_wait_low(
2646 		lock_sys->prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM);
2647 }
2648 
2649 /*************************************************************//**
2650 Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
2651 of another record as gap type locks, but does not reset the lock bits of
2652 the other record. Also waiting lock requests on rec are inherited as
2653 GRANTED gap locks. */
2654 static
2655 void
lock_rec_inherit_to_gap(const buf_block_t * heir_block,const buf_block_t * block,ulint heir_heap_no,ulint heap_no)2656 lock_rec_inherit_to_gap(
2657 /*====================*/
2658 	const buf_block_t*	heir_block,	/*!< in: block containing the
2659 						record which inherits */
2660 	const buf_block_t*	block,		/*!< in: block containing the
2661 						record from which inherited;
2662 						does NOT reset the locks on
2663 						this record */
2664 	ulint			heir_heap_no,	/*!< in: heap_no of the
2665 						inheriting record */
2666 	ulint			heap_no)	/*!< in: heap_no of the
2667 						donating record */
2668 {
2669 	lock_t*	lock;
2670 
2671 	ut_ad(lock_mutex_own());
2672 
2673 	/* If srv_locks_unsafe_for_binlog is TRUE or session is using
2674 	READ COMMITTED isolation level, we do not want locks set
2675 	by an UPDATE or a DELETE to be inherited as gap type locks. But we
2676 	DO want S-locks/X-locks(taken for replace) set by a consistency
2677 	constraint to be inherited also then. */
2678 
2679 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
2680 	     lock != NULL;
2681 	     lock = lock_rec_get_next(heap_no, lock)) {
2682 
2683 		/* Skip inheriting lock if set */
2684 		if (lock->trx->skip_lock_inheritance) {
2685 
2686 			continue;
2687 		}
2688 
2689 		if (!lock_rec_get_insert_intention(lock)
2690 		    && !((srv_locks_unsafe_for_binlog
2691 			  || lock->trx->isolation_level
2692 			  <= TRX_ISO_READ_COMMITTED)
2693 			 && lock_get_mode(lock) ==
2694 			 (lock->trx->duplicates ? LOCK_S : LOCK_X))) {
2695 			lock_rec_add_to_queue(
2696 				LOCK_REC | LOCK_GAP | lock_get_mode(lock),
2697 				heir_block, heir_heap_no, lock->index,
2698 				lock->trx, FALSE);
2699 		}
2700 	}
2701 }
2702 
2703 /*************************************************************//**
2704 Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
2705 of another record as gap type locks, but does not reset the lock bits of the
2706 other record. Also waiting lock requests are inherited as GRANTED gap locks. */
2707 static
2708 void
lock_rec_inherit_to_gap_if_gap_lock(const buf_block_t * block,ulint heir_heap_no,ulint heap_no)2709 lock_rec_inherit_to_gap_if_gap_lock(
2710 /*================================*/
2711 	const buf_block_t*	block,		/*!< in: buffer block */
2712 	ulint			heir_heap_no,	/*!< in: heap_no of
2713 						record which inherits */
2714 	ulint			heap_no)	/*!< in: heap_no of record
2715 						from which inherited;
2716 						does NOT reset the locks
2717 						on this record */
2718 {
2719 	lock_t*	lock;
2720 
2721 	lock_mutex_enter();
2722 
2723 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
2724 	     lock != NULL;
2725 	     lock = lock_rec_get_next(heap_no, lock)) {
2726 
2727 		/* Skip inheriting lock if set */
2728 		if (lock->trx->skip_lock_inheritance) {
2729 
2730 			continue;
2731 		}
2732 
2733 		if (!lock_rec_get_insert_intention(lock)
2734 		    && (heap_no == PAGE_HEAP_NO_SUPREMUM
2735 			|| !lock_rec_get_rec_not_gap(lock))) {
2736 
2737 			lock_rec_add_to_queue(
2738 				LOCK_REC | LOCK_GAP | lock_get_mode(lock),
2739 				block, heir_heap_no, lock->index,
2740 				lock->trx, FALSE);
2741 		}
2742 	}
2743 
2744 	lock_mutex_exit();
2745 }
2746 
2747 /*************************************************************//**
2748 Moves the locks of a record to another record and resets the lock bits of
2749 the donating record. */
2750 void
lock_rec_move_low(hash_table_t * lock_hash,const buf_block_t * receiver,const buf_block_t * donator,ulint receiver_heap_no,ulint donator_heap_no)2751 lock_rec_move_low(
2752 /*==============*/
2753 	hash_table_t*		lock_hash,	/*!< in: hash table to use */
2754 	const buf_block_t*	receiver,	/*!< in: buffer block containing
2755 						the receiving record */
2756 	const buf_block_t*	donator,	/*!< in: buffer block containing
2757 						the donating record */
2758 	ulint			receiver_heap_no,/*!< in: heap_no of the record
2759 						which gets the locks; there
2760 						must be no lock requests
2761 						on it! */
2762 	ulint			donator_heap_no)/*!< in: heap_no of the record
2763 						which gives the locks */
2764 {
2765 	lock_t*	lock;
2766 
2767 	ut_ad(lock_mutex_own());
2768 
2769 	/* If the lock is predicate lock, it resides on INFIMUM record */
2770 	ut_ad(lock_rec_get_first(
2771 		lock_hash, receiver, receiver_heap_no) == NULL
2772 	      || lock_hash == lock_sys->prdt_hash
2773 	      || lock_hash == lock_sys->prdt_page_hash);
2774 
2775 	for (lock = lock_rec_get_first(lock_hash,
2776 				       donator, donator_heap_no);
2777 	     lock != NULL;
2778 	     lock = lock_rec_get_next(donator_heap_no, lock)) {
2779 
2780 		const ulint	type_mode = lock->type_mode;
2781 
2782 		lock_rec_reset_nth_bit(lock, donator_heap_no);
2783 
2784 		if (type_mode & LOCK_WAIT) {
2785 			lock_reset_lock_and_trx_wait(lock);
2786 		}
2787 
2788 		/* Note that we FIRST reset the bit, and then set the lock:
2789 		the function works also if donator == receiver */
2790 
2791 		lock_rec_add_to_queue(
2792 			type_mode, receiver, receiver_heap_no,
2793 			lock->index, lock->trx, FALSE);
2794 	}
2795 
2796 	ut_ad(lock_rec_get_first(lock_sys->rec_hash,
2797 				 donator, donator_heap_no) == NULL);
2798 }
2799 
2800 /** Move all the granted locks to the front of the given lock list.
2801 All the waiting locks will be at the end of the list.
2802 @param[in,out]	lock_list	the given lock list.  */
2803 static
2804 void
lock_move_granted_locks_to_front(UT_LIST_BASE_NODE_T (lock_t)& lock_list)2805 lock_move_granted_locks_to_front(
2806 	UT_LIST_BASE_NODE_T(lock_t)&	lock_list)
2807 {
2808 	lock_t*	lock;
2809 
2810 	bool seen_waiting_lock = false;
2811 
2812 	for (lock = UT_LIST_GET_FIRST(lock_list); lock;
2813 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
2814 
2815 		if (!seen_waiting_lock) {
2816 			if (lock->is_waiting()) {
2817 				seen_waiting_lock = true;
2818 			}
2819 			continue;
2820 		}
2821 
2822 		ut_ad(seen_waiting_lock);
2823 
2824 		if (!lock->is_waiting()) {
2825 			lock_t* prev = UT_LIST_GET_PREV(trx_locks, lock);
2826 			ut_a(prev);
2827 			UT_LIST_MOVE_TO_FRONT(lock_list, lock);
2828 			lock = prev;
2829 		}
2830 	}
2831 }
2832 
2833 /*************************************************************//**
2834 Updates the lock table when we have reorganized a page. NOTE: we copy
2835 also the locks set on the infimum of the page; the infimum may carry
2836 locks if an update of a record is occurring on the page, and its locks
2837 were temporarily stored on the infimum. */
2838 void
lock_move_reorganize_page(const buf_block_t * block,const buf_block_t * oblock)2839 lock_move_reorganize_page(
2840 /*======================*/
2841 	const buf_block_t*	block,	/*!< in: old index page, now
2842 					reorganized */
2843 	const buf_block_t*	oblock)	/*!< in: copy of the old, not
2844 					reorganized page */
2845 {
2846 	lock_t*		lock;
2847 	UT_LIST_BASE_NODE_T(lock_t)	old_locks;
2848 	mem_heap_t*	heap		= NULL;
2849 	ulint		comp;
2850 
2851 	lock_mutex_enter();
2852 
2853 	/* FIXME: This needs to deal with predicate lock too */
2854 	lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
2855 
2856 	if (lock == NULL) {
2857 		lock_mutex_exit();
2858 
2859 		return;
2860 	}
2861 
2862 	heap = mem_heap_create(256);
2863 
2864 	/* Copy first all the locks on the page to heap and reset the
2865 	bitmaps in the original locks; chain the copies of the locks
2866 	using the trx_locks field in them. */
2867 
2868 	UT_LIST_INIT(old_locks, &lock_t::trx_locks);
2869 
2870 	do {
2871 		/* Make a copy of the lock */
2872 		lock_t*	old_lock = lock_rec_copy(lock, heap);
2873 
2874 		UT_LIST_ADD_LAST(old_locks, old_lock);
2875 
2876 		/* Reset bitmap of lock */
2877 		lock_rec_bitmap_reset(lock);
2878 
2879 		if (lock_get_wait(lock)) {
2880 
2881 			lock_reset_lock_and_trx_wait(lock);
2882 		}
2883 
2884 		lock = lock_rec_get_next_on_page(lock);
2885 	} while (lock != NULL);
2886 
2887 	comp = page_is_comp(block->frame);
2888 	ut_ad(comp == page_is_comp(oblock->frame));
2889 
2890 	lock_move_granted_locks_to_front(old_locks);
2891 
2892 	DBUG_EXECUTE_IF("do_lock_reverse_page_reorganize",
2893 			UT_LIST_REVERSE(old_locks););
2894 
2895 	for (lock = UT_LIST_GET_FIRST(old_locks); lock;
2896 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
2897 
2898 		/* NOTE: we copy also the locks set on the infimum and
2899 		supremum of the page; the infimum may carry locks if an
2900 		update of a record is occurring on the page, and its locks
2901 		were temporarily stored on the infimum */
2902 		const rec_t*	rec1 = page_get_infimum_rec(
2903 			buf_block_get_frame(block));
2904 		const rec_t*	rec2 = page_get_infimum_rec(
2905 			buf_block_get_frame(oblock));
2906 
2907 		/* Set locks according to old locks */
2908 		for (;;) {
2909 			ulint	old_heap_no;
2910 			ulint	new_heap_no;
2911 
2912 			if (comp) {
2913 				old_heap_no = rec_get_heap_no_new(rec2);
2914 				new_heap_no = rec_get_heap_no_new(rec1);
2915 
2916 				rec1 = page_rec_get_next_low(rec1, TRUE);
2917 				rec2 = page_rec_get_next_low(rec2, TRUE);
2918 			} else {
2919 				old_heap_no = rec_get_heap_no_old(rec2);
2920 				new_heap_no = rec_get_heap_no_old(rec1);
2921 				ut_ad(!memcmp(rec1, rec2,
2922 					      rec_get_data_size_old(rec2)));
2923 
2924 				rec1 = page_rec_get_next_low(rec1, FALSE);
2925 				rec2 = page_rec_get_next_low(rec2, FALSE);
2926 			}
2927 
2928 			/* Clear the bit in old_lock. */
2929 			if (old_heap_no < lock->un_member.rec_lock.n_bits
2930 			    && lock_rec_reset_nth_bit(lock, old_heap_no)) {
2931 				/* NOTE that the old lock bitmap could be too
2932 				small for the new heap number! */
2933 
2934 				lock_rec_add_to_queue(
2935 					lock->type_mode, block, new_heap_no,
2936 					lock->index, lock->trx, FALSE);
2937 			}
2938 
2939 			if (new_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2940 				ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
2941 				break;
2942 			}
2943 		}
2944 
2945 #ifdef UNIV_DEBUG
2946 		{
2947 			ulint	i = lock_rec_find_set_bit(lock);
2948 
2949 			/* Check that all locks were moved. */
2950 			if (i != ULINT_UNDEFINED) {
2951 				ib::fatal() << "lock_move_reorganize_page(): "
2952 					<< i << " not moved in "
2953 					<< (void*) lock;
2954 			}
2955 		}
2956 #endif /* UNIV_DEBUG */
2957 	}
2958 
2959 	lock_mutex_exit();
2960 
2961 	mem_heap_free(heap);
2962 
2963 #ifdef UNIV_DEBUG_LOCK_VALIDATE
2964 	ut_ad(lock_rec_validate_page(block));
2965 #endif
2966 }
2967 
2968 /*************************************************************//**
2969 Moves the explicit locks on user records to another page if a record
2970 list end is moved to another page. */
2971 void
lock_move_rec_list_end(const buf_block_t * new_block,const buf_block_t * block,const rec_t * rec)2972 lock_move_rec_list_end(
2973 /*===================*/
2974 	const buf_block_t*	new_block,	/*!< in: index page to move to */
2975 	const buf_block_t*	block,		/*!< in: index page */
2976 	const rec_t*		rec)		/*!< in: record on page: this
2977 						is the first record moved */
2978 {
2979 	lock_t*		lock;
2980 	const ulint	comp	= page_rec_is_comp(rec);
2981 
2982 	ut_ad(buf_block_get_frame(block) == page_align(rec));
2983 	ut_ad(comp == page_is_comp(buf_block_get_frame(new_block)));
2984 
2985 	lock_mutex_enter();
2986 
2987 	/* Note: when we move locks from record to record, waiting locks
2988 	and possible granted gap type locks behind them are enqueued in
2989 	the original order, because new elements are inserted to a hash
2990 	table to the end of the hash chain, and lock_rec_add_to_queue
2991 	does not reuse locks if there are waiters in the queue. */
2992 
2993 	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
2994 	     lock = lock_rec_get_next_on_page(lock)) {
2995 		const rec_t*	rec1	= rec;
2996 		const rec_t*	rec2;
2997 		const ulint	type_mode = lock->type_mode;
2998 
2999 		if (comp) {
3000 			if (page_offset(rec1) == PAGE_NEW_INFIMUM) {
3001 				rec1 = page_rec_get_next_low(rec1, TRUE);
3002 			}
3003 
3004 			rec2 = page_rec_get_next_low(
3005 				buf_block_get_frame(new_block)
3006 				+ PAGE_NEW_INFIMUM, TRUE);
3007 		} else {
3008 			if (page_offset(rec1) == PAGE_OLD_INFIMUM) {
3009 				rec1 = page_rec_get_next_low(rec1, FALSE);
3010 			}
3011 
3012 			rec2 = page_rec_get_next_low(
3013 				buf_block_get_frame(new_block)
3014 				+ PAGE_OLD_INFIMUM, FALSE);
3015 		}
3016 
3017 		/* Copy lock requests on user records to new page and
3018 		reset the lock bits on the old */
3019 
3020 		for (;;) {
3021 			ulint	rec1_heap_no;
3022 			ulint	rec2_heap_no;
3023 
3024 			if (comp) {
3025 				rec1_heap_no = rec_get_heap_no_new(rec1);
3026 
3027 				if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
3028 					break;
3029 				}
3030 
3031 				rec2_heap_no = rec_get_heap_no_new(rec2);
3032 				rec1 = page_rec_get_next_low(rec1, TRUE);
3033 				rec2 = page_rec_get_next_low(rec2, TRUE);
3034 			} else {
3035 				rec1_heap_no = rec_get_heap_no_old(rec1);
3036 
3037 				if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
3038 					break;
3039 				}
3040 
3041 				rec2_heap_no = rec_get_heap_no_old(rec2);
3042 
3043 				ut_ad(!memcmp(rec1, rec2,
3044 					      rec_get_data_size_old(rec2)));
3045 
3046 				rec1 = page_rec_get_next_low(rec1, FALSE);
3047 				rec2 = page_rec_get_next_low(rec2, FALSE);
3048 			}
3049 
3050 			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
3051 			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
3052 				if (type_mode & LOCK_WAIT) {
3053 					lock_reset_lock_and_trx_wait(lock);
3054 				}
3055 
3056 				lock_rec_add_to_queue(
3057 					type_mode, new_block, rec2_heap_no,
3058 					lock->index, lock->trx, FALSE);
3059 			}
3060 		}
3061 	}
3062 
3063 	lock_mutex_exit();
3064 
3065 #ifdef UNIV_DEBUG_LOCK_VALIDATE
3066 	ut_ad(lock_rec_validate_page(block));
3067 	ut_ad(lock_rec_validate_page(new_block));
3068 #endif
3069 }
3070 
3071 /*************************************************************//**
3072 Moves the explicit locks on user records to another page if a record
3073 list start is moved to another page. */
3074 void
lock_move_rec_list_start(const buf_block_t * new_block,const buf_block_t * block,const rec_t * rec,const rec_t * old_end)3075 lock_move_rec_list_start(
3076 /*=====================*/
3077 	const buf_block_t*	new_block,	/*!< in: index page to
3078 						move to */
3079 	const buf_block_t*	block,		/*!< in: index page */
3080 	const rec_t*		rec,		/*!< in: record on page:
3081 						this is the first
3082 						record NOT copied */
3083 	const rec_t*		old_end)	/*!< in: old
3084 						previous-to-last
3085 						record on new_page
3086 						before the records
3087 						were copied */
3088 {
3089 	lock_t*		lock;
3090 	const ulint	comp	= page_rec_is_comp(rec);
3091 
3092 	ut_ad(block->frame == page_align(rec));
3093 	ut_ad(new_block->frame == page_align(old_end));
3094 	ut_ad(comp == page_rec_is_comp(old_end));
3095 
3096 	lock_mutex_enter();
3097 
3098 	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
3099 	     lock = lock_rec_get_next_on_page(lock)) {
3100 		const rec_t*	rec1;
3101 		const rec_t*	rec2;
3102 		const ulint	type_mode = lock->type_mode;
3103 
3104 		if (comp) {
3105 			rec1 = page_rec_get_next_low(
3106 				buf_block_get_frame(block)
3107 				+ PAGE_NEW_INFIMUM, TRUE);
3108 			rec2 = page_rec_get_next_low(old_end, TRUE);
3109 		} else {
3110 			rec1 = page_rec_get_next_low(
3111 				buf_block_get_frame(block)
3112 				+ PAGE_OLD_INFIMUM, FALSE);
3113 			rec2 = page_rec_get_next_low(old_end, FALSE);
3114 		}
3115 
3116 		/* Copy lock requests on user records to new page and
3117 		reset the lock bits on the old */
3118 
3119 		while (rec1 != rec) {
3120 			ulint	rec1_heap_no;
3121 			ulint	rec2_heap_no;
3122 
3123 			if (comp) {
3124 				rec1_heap_no = rec_get_heap_no_new(rec1);
3125 				rec2_heap_no = rec_get_heap_no_new(rec2);
3126 
3127 				rec1 = page_rec_get_next_low(rec1, TRUE);
3128 				rec2 = page_rec_get_next_low(rec2, TRUE);
3129 			} else {
3130 				rec1_heap_no = rec_get_heap_no_old(rec1);
3131 				rec2_heap_no = rec_get_heap_no_old(rec2);
3132 
3133 				ut_ad(!memcmp(rec1, rec2,
3134 					      rec_get_data_size_old(rec2)));
3135 
3136 				rec1 = page_rec_get_next_low(rec1, FALSE);
3137 				rec2 = page_rec_get_next_low(rec2, FALSE);
3138 			}
3139 
3140 			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
3141 			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
3142 				if (type_mode & LOCK_WAIT) {
3143 					lock_reset_lock_and_trx_wait(lock);
3144 				}
3145 
3146 				lock_rec_add_to_queue(
3147 					type_mode, new_block, rec2_heap_no,
3148 					lock->index, lock->trx, FALSE);
3149 			}
3150 		}
3151 
3152 #ifdef UNIV_DEBUG
3153 		if (page_rec_is_supremum(rec)) {
3154 			ulint	i;
3155 
3156 			for (i = PAGE_HEAP_NO_USER_LOW;
3157 			     i < lock_rec_get_n_bits(lock); i++) {
3158 				if (lock_rec_get_nth_bit(lock, i)) {
3159 					ib::fatal()
3160 						<< "lock_move_rec_list_start():"
3161 						<< i << " not moved in "
3162 						<<  (void*) lock;
3163 				}
3164 			}
3165 		}
3166 #endif /* UNIV_DEBUG */
3167 	}
3168 
3169 	lock_mutex_exit();
3170 
3171 #ifdef UNIV_DEBUG_LOCK_VALIDATE
3172 	ut_ad(lock_rec_validate_page(block));
3173 #endif
3174 }
3175 
3176 /*************************************************************//**
3177 Moves the explicit locks on user records to another page if a record
3178 list start is moved to another page. */
3179 void
lock_rtr_move_rec_list(const buf_block_t * new_block,const buf_block_t * block,rtr_rec_move_t * rec_move,ulint num_move)3180 lock_rtr_move_rec_list(
3181 /*===================*/
3182 	const buf_block_t*	new_block,	/*!< in: index page to
3183 						move to */
3184 	const buf_block_t*	block,		/*!< in: index page */
3185 	rtr_rec_move_t*		rec_move,       /*!< in: recording records
3186 						moved */
3187 	ulint			num_move)       /*!< in: num of rec to move */
3188 {
3189 	lock_t*		lock;
3190 	ulint		comp;
3191 
3192 	if (!num_move) {
3193 		return;
3194 	}
3195 
3196 	comp = page_rec_is_comp(rec_move[0].old_rec);
3197 
3198 	ut_ad(block->frame == page_align(rec_move[0].old_rec));
3199 	ut_ad(new_block->frame == page_align(rec_move[0].new_rec));
3200 	ut_ad(comp == page_rec_is_comp(rec_move[0].new_rec));
3201 
3202 	lock_mutex_enter();
3203 
3204 	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
3205 	     lock = lock_rec_get_next_on_page(lock)) {
3206 		ulint		moved = 0;
3207 		const rec_t*	rec1;
3208 		const rec_t*	rec2;
3209 		const ulint	type_mode = lock->type_mode;
3210 
3211 		/* Copy lock requests on user records to new page and
3212 		reset the lock bits on the old */
3213 
3214 		while (moved < num_move) {
3215 			ulint	rec1_heap_no;
3216 			ulint	rec2_heap_no;
3217 
3218 			rec1 = rec_move[moved].old_rec;
3219 			rec2 = rec_move[moved].new_rec;
3220 
3221 			if (comp) {
3222 				rec1_heap_no = rec_get_heap_no_new(rec1);
3223 				rec2_heap_no = rec_get_heap_no_new(rec2);
3224 
3225 			} else {
3226 				rec1_heap_no = rec_get_heap_no_old(rec1);
3227 				rec2_heap_no = rec_get_heap_no_old(rec2);
3228 
3229 				ut_ad(!memcmp(rec1, rec2,
3230 					      rec_get_data_size_old(rec2)));
3231 			}
3232 
3233 			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
3234 			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
3235 				if (type_mode & LOCK_WAIT) {
3236 					lock_reset_lock_and_trx_wait(lock);
3237 				}
3238 
3239 				lock_rec_add_to_queue(
3240 					type_mode, new_block, rec2_heap_no,
3241 					lock->index, lock->trx, FALSE);
3242 
3243 				rec_move[moved].moved = true;
3244 			}
3245 
3246 			moved++;
3247 		}
3248 	}
3249 
3250 	lock_mutex_exit();
3251 
3252 #ifdef UNIV_DEBUG_LOCK_VALIDATE
3253 	ut_ad(lock_rec_validate_page(block));
3254 #endif
3255 }
3256 /*************************************************************//**
3257 Updates the lock table when a page is split to the right. */
3258 void
lock_update_split_right(const buf_block_t * right_block,const buf_block_t * left_block)3259 lock_update_split_right(
3260 /*====================*/
3261 	const buf_block_t*	right_block,	/*!< in: right page */
3262 	const buf_block_t*	left_block)	/*!< in: left page */
3263 {
3264 	ulint	heap_no = lock_get_min_heap_no(right_block);
3265 
3266 	lock_mutex_enter();
3267 
3268 	/* Move the locks on the supremum of the left page to the supremum
3269 	of the right page */
3270 
3271 	lock_rec_move(right_block, left_block,
3272 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3273 
3274 	/* Inherit the locks to the supremum of left page from the successor
3275 	of the infimum on right page */
3276 
3277 	lock_rec_inherit_to_gap(left_block, right_block,
3278 				PAGE_HEAP_NO_SUPREMUM, heap_no);
3279 
3280 	lock_mutex_exit();
3281 }
3282 
3283 /*************************************************************//**
3284 Updates the lock table when a page is merged to the right. */
3285 void
lock_update_merge_right(const buf_block_t * right_block,const rec_t * orig_succ,const buf_block_t * left_block)3286 lock_update_merge_right(
3287 /*====================*/
3288 	const buf_block_t*	right_block,	/*!< in: right page to
3289 						which merged */
3290 	const rec_t*		orig_succ,	/*!< in: original
3291 						successor of infimum
3292 						on the right page
3293 						before merge */
3294 	const buf_block_t*	left_block)	/*!< in: merged index
3295 						page which will be
3296 						discarded */
3297 {
3298 	lock_mutex_enter();
3299 
3300 	/* Inherit the locks from the supremum of the left page to the
3301 	original successor of infimum on the right page, to which the left
3302 	page was merged */
3303 
3304 	lock_rec_inherit_to_gap(right_block, left_block,
3305 				page_rec_get_heap_no(orig_succ),
3306 				PAGE_HEAP_NO_SUPREMUM);
3307 
3308 	/* Reset the locks on the supremum of the left page, releasing
3309 	waiting transactions */
3310 
3311 	lock_rec_reset_and_release_wait_low(
3312 		lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
3313 
3314 #ifdef UNIV_DEBUG
3315 	/* there should exist no page lock on the left page,
3316 	otherwise, it will be blocked from merge */
3317 	ulint	space = left_block->page.id.space();
3318 	ulint	page_no = left_block->page.id.page_no();
3319 	ut_ad(lock_rec_get_first_on_page_addr(
3320 			lock_sys->prdt_page_hash, space, page_no) == NULL);
3321 #endif /* UNIV_DEBUG */
3322 
3323 	lock_rec_free_all_from_discard_page(left_block);
3324 
3325 	lock_mutex_exit();
3326 
3327 }
3328 
3329 /*************************************************************//**
3330 Updates the lock table when the root page is copied to another in
3331 btr_root_raise_and_insert. Note that we leave lock structs on the
3332 root page, even though they do not make sense on other than leaf
3333 pages: the reason is that in a pessimistic update the infimum record
3334 of the root page will act as a dummy carrier of the locks of the record
3335 to be updated. */
3336 void
lock_update_root_raise(const buf_block_t * block,const buf_block_t * root)3337 lock_update_root_raise(
3338 /*===================*/
3339 	const buf_block_t*	block,	/*!< in: index page to which copied */
3340 	const buf_block_t*	root)	/*!< in: root page */
3341 {
3342 	lock_mutex_enter();
3343 
3344 	/* Move the locks on the supremum of the root to the supremum
3345 	of block */
3346 
3347 	lock_rec_move(block, root,
3348 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3349 	lock_mutex_exit();
3350 }
3351 
3352 /*************************************************************//**
3353 Updates the lock table when a page is copied to another and the original page
3354 is removed from the chain of leaf pages, except if page is the root! */
3355 void
lock_update_copy_and_discard(const buf_block_t * new_block,const buf_block_t * block)3356 lock_update_copy_and_discard(
3357 /*=========================*/
3358 	const buf_block_t*	new_block,	/*!< in: index page to
3359 						which copied */
3360 	const buf_block_t*	block)		/*!< in: index page;
3361 						NOT the root! */
3362 {
3363 	lock_mutex_enter();
3364 
3365 	/* Move the locks on the supremum of the old page to the supremum
3366 	of new_page */
3367 
3368 	lock_rec_move(new_block, block,
3369 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3370 	lock_rec_free_all_from_discard_page(block);
3371 
3372 	lock_mutex_exit();
3373 }
3374 
3375 /*************************************************************//**
3376 Updates the lock table when a page is split to the left. */
3377 void
lock_update_split_left(const buf_block_t * right_block,const buf_block_t * left_block)3378 lock_update_split_left(
3379 /*===================*/
3380 	const buf_block_t*	right_block,	/*!< in: right page */
3381 	const buf_block_t*	left_block)	/*!< in: left page */
3382 {
3383 	ulint	heap_no = lock_get_min_heap_no(right_block);
3384 
3385 	lock_mutex_enter();
3386 
3387 	/* Inherit the locks to the supremum of the left page from the
3388 	successor of the infimum on the right page */
3389 
3390 	lock_rec_inherit_to_gap(left_block, right_block,
3391 				PAGE_HEAP_NO_SUPREMUM, heap_no);
3392 
3393 	lock_mutex_exit();
3394 }
3395 
3396 /*************************************************************//**
3397 Updates the lock table when a page is merged to the left. */
3398 void
lock_update_merge_left(const buf_block_t * left_block,const rec_t * orig_pred,const buf_block_t * right_block)3399 lock_update_merge_left(
3400 /*===================*/
3401 	const buf_block_t*	left_block,	/*!< in: left page to
3402 						which merged */
3403 	const rec_t*		orig_pred,	/*!< in: original predecessor
3404 						of supremum on the left page
3405 						before merge */
3406 	const buf_block_t*	right_block)	/*!< in: merged index page
3407 						which will be discarded */
3408 {
3409 	const rec_t*	left_next_rec;
3410 
3411 	ut_ad(left_block->frame == page_align(orig_pred));
3412 
3413 	lock_mutex_enter();
3414 
3415 	left_next_rec = page_rec_get_next_const(orig_pred);
3416 
3417 	if (!page_rec_is_supremum(left_next_rec)) {
3418 
3419 		/* Inherit the locks on the supremum of the left page to the
3420 		first record which was moved from the right page */
3421 
3422 		lock_rec_inherit_to_gap(left_block, left_block,
3423 					page_rec_get_heap_no(left_next_rec),
3424 					PAGE_HEAP_NO_SUPREMUM);
3425 
3426 		/* Reset the locks on the supremum of the left page,
3427 		releasing waiting transactions */
3428 
3429 		lock_rec_reset_and_release_wait_low(
3430 			lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
3431 	}
3432 
3433 	/* Move the locks from the supremum of right page to the supremum
3434 	of the left page */
3435 
3436 	lock_rec_move(left_block, right_block,
3437 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3438 
3439 #ifdef UNIV_DEBUG
3440 	/* there should exist no page lock on the right page,
3441 	otherwise, it will be blocked from merge */
3442 	ulint	space = right_block->page.id.space();
3443 	ulint	page_no = right_block->page.id.page_no();
3444 	lock_t*	lock_test = lock_rec_get_first_on_page_addr(
3445 		lock_sys->prdt_page_hash, space, page_no);
3446 	ut_ad(!lock_test);
3447 #endif /* UNIV_DEBUG */
3448 
3449 	lock_rec_free_all_from_discard_page(right_block);
3450 
3451 	lock_mutex_exit();
3452 }
3453 
3454 /*************************************************************//**
3455 Resets the original locks on heir and replaces them with gap type locks
3456 inherited from rec. */
3457 void
lock_rec_reset_and_inherit_gap_locks(const buf_block_t * heir_block,const buf_block_t * block,ulint heir_heap_no,ulint heap_no)3458 lock_rec_reset_and_inherit_gap_locks(
3459 /*=================================*/
3460 	const buf_block_t*	heir_block,	/*!< in: block containing the
3461 						record which inherits */
3462 	const buf_block_t*	block,		/*!< in: block containing the
3463 						record from which inherited;
3464 						does NOT reset the locks on
3465 						this record */
3466 	ulint			heir_heap_no,	/*!< in: heap_no of the
3467 						inheriting record */
3468 	ulint			heap_no)	/*!< in: heap_no of the
3469 						donating record */
3470 {
3471 	lock_mutex_enter();
3472 
3473 	lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
3474 
3475 	lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
3476 
3477 	lock_mutex_exit();
3478 }
3479 
3480 /*************************************************************//**
3481 Updates the lock table when a page is discarded. */
3482 void
lock_update_discard(const buf_block_t * heir_block,ulint heir_heap_no,const buf_block_t * block)3483 lock_update_discard(
3484 /*================*/
3485 	const buf_block_t*	heir_block,	/*!< in: index page
3486 						which will inherit the locks */
3487 	ulint			heir_heap_no,	/*!< in: heap_no of the record
3488 						which will inherit the locks */
3489 	const buf_block_t*	block)		/*!< in: index page
3490 						which will be discarded */
3491 {
3492 	const rec_t*	rec;
3493 	ulint		heap_no;
3494 	const page_t*	page = block->frame;
3495 
3496 	lock_mutex_enter();
3497 
3498 	if (!lock_rec_get_first_on_page(lock_sys->rec_hash, block)
3499 	    && (!lock_rec_get_first_on_page(lock_sys->prdt_hash, block))) {
3500 		/* No locks exist on page, nothing to do */
3501 
3502 		lock_mutex_exit();
3503 
3504 		return;
3505 	}
3506 
3507 	/* Inherit all the locks on the page to the record and reset all
3508 	the locks on the page */
3509 
3510 	if (page_is_comp(page)) {
3511 		rec = page + PAGE_NEW_INFIMUM;
3512 
3513 		do {
3514 			heap_no = rec_get_heap_no_new(rec);
3515 
3516 			lock_rec_inherit_to_gap(heir_block, block,
3517 						heir_heap_no, heap_no);
3518 
3519 			lock_rec_reset_and_release_wait(block, heap_no);
3520 
3521 			rec = page + rec_get_next_offs(rec, TRUE);
3522 		} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3523 	} else {
3524 		rec = page + PAGE_OLD_INFIMUM;
3525 
3526 		do {
3527 			heap_no = rec_get_heap_no_old(rec);
3528 
3529 			lock_rec_inherit_to_gap(heir_block, block,
3530 						heir_heap_no, heap_no);
3531 
3532 			lock_rec_reset_and_release_wait(block, heap_no);
3533 
3534 			rec = page + rec_get_next_offs(rec, FALSE);
3535 		} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3536 	}
3537 
3538 	lock_rec_free_all_from_discard_page(block);
3539 
3540 	lock_mutex_exit();
3541 }
3542 
3543 /*************************************************************//**
3544 Updates the lock table when a new user record is inserted. */
3545 void
lock_update_insert(const buf_block_t * block,const rec_t * rec)3546 lock_update_insert(
3547 /*===============*/
3548 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
3549 	const rec_t*		rec)	/*!< in: the inserted record */
3550 {
3551 	ulint	receiver_heap_no;
3552 	ulint	donator_heap_no;
3553 
3554 	ut_ad(block->frame == page_align(rec));
3555 
3556 	/* Inherit the gap-locking locks for rec, in gap mode, from the next
3557 	record */
3558 
3559 	if (page_rec_is_comp(rec)) {
3560 		receiver_heap_no = rec_get_heap_no_new(rec);
3561 		donator_heap_no = rec_get_heap_no_new(
3562 			page_rec_get_next_low(rec, TRUE));
3563 	} else {
3564 		receiver_heap_no = rec_get_heap_no_old(rec);
3565 		donator_heap_no = rec_get_heap_no_old(
3566 			page_rec_get_next_low(rec, FALSE));
3567 	}
3568 
3569 	lock_rec_inherit_to_gap_if_gap_lock(
3570 		block, receiver_heap_no, donator_heap_no);
3571 }
3572 
3573 /*************************************************************//**
3574 Updates the lock table when a record is removed. */
3575 void
lock_update_delete(const buf_block_t * block,const rec_t * rec)3576 lock_update_delete(
3577 /*===============*/
3578 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
3579 	const rec_t*		rec)	/*!< in: the record to be removed */
3580 {
3581 	const page_t*	page = block->frame;
3582 	ulint		heap_no;
3583 	ulint		next_heap_no;
3584 
3585 	ut_ad(page == page_align(rec));
3586 
3587 	if (page_is_comp(page)) {
3588 		heap_no = rec_get_heap_no_new(rec);
3589 		next_heap_no = rec_get_heap_no_new(page
3590 						   + rec_get_next_offs(rec,
3591 								       TRUE));
3592 	} else {
3593 		heap_no = rec_get_heap_no_old(rec);
3594 		next_heap_no = rec_get_heap_no_old(page
3595 						   + rec_get_next_offs(rec,
3596 								       FALSE));
3597 	}
3598 
3599 	lock_mutex_enter();
3600 
3601 	/* Let the next record inherit the locks from rec, in gap mode */
3602 
3603 	lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
3604 
3605 	/* Reset the lock bits on rec and release waiting transactions */
3606 
3607 	lock_rec_reset_and_release_wait(block, heap_no);
3608 
3609 	lock_mutex_exit();
3610 }
3611 
3612 /*********************************************************************//**
3613 Stores on the page infimum record the explicit locks of another record.
3614 This function is used to store the lock state of a record when it is
3615 updated and the size of the record changes in the update. The record
3616 is moved in such an update, perhaps to another page. The infimum record
3617 acts as a dummy carrier record, taking care of lock releases while the
3618 actual record is being moved. */
3619 void
lock_rec_store_on_page_infimum(const buf_block_t * block,const rec_t * rec)3620 lock_rec_store_on_page_infimum(
3621 /*===========================*/
3622 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
3623 	const rec_t*		rec)	/*!< in: record whose lock state
3624 					is stored on the infimum
3625 					record of the same page; lock
3626 					bits are reset on the
3627 					record */
3628 {
3629 	ulint	heap_no = page_rec_get_heap_no(rec);
3630 
3631 	ut_ad(block->frame == page_align(rec));
3632 
3633 	lock_mutex_enter();
3634 
3635 	lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
3636 
3637 	lock_mutex_exit();
3638 }
3639 
3640 /*********************************************************************//**
3641 Restores the state of explicit lock requests on a single record, where the
3642 state was stored on the infimum of the page. */
3643 void
lock_rec_restore_from_page_infimum(const buf_block_t * block,const rec_t * rec,const buf_block_t * donator)3644 lock_rec_restore_from_page_infimum(
3645 /*===============================*/
3646 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
3647 	const rec_t*		rec,	/*!< in: record whose lock state
3648 					is restored */
3649 	const buf_block_t*	donator)/*!< in: page (rec is not
3650 					necessarily on this page)
3651 					whose infimum stored the lock
3652 					state; lock bits are reset on
3653 					the infimum */
3654 {
3655 	ulint	heap_no = page_rec_get_heap_no(rec);
3656 
3657 	lock_mutex_enter();
3658 
3659 	lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
3660 
3661 	lock_mutex_exit();
3662 }
3663 
3664 /*========================= TABLE LOCKS ==============================*/
3665 
3666 /** Functor for accessing the embedded node within a table lock. */
3667 struct TableLockGetNode {
operator ()TableLockGetNode3668 	ut_list_node<lock_t>& operator() (lock_t& elem)
3669 	{
3670 		return(elem.un_member.tab_lock.locks);
3671 	}
3672 };
3673 
3674 /*********************************************************************//**
3675 Creates a table lock object and adds it as the last in the lock queue
3676 of the table. Does NOT check for deadlocks or lock compatibility.
3677 @return own: new lock object */
3678 UNIV_INLINE
3679 lock_t*
lock_table_create(dict_table_t * table,ulint type_mode,trx_t * trx)3680 lock_table_create(
3681 /*==============*/
3682 	dict_table_t*	table,	/*!< in/out: database table
3683 				in dictionary cache */
3684 	ulint		type_mode,/*!< in: lock mode possibly ORed with
3685 				LOCK_WAIT */
3686 	trx_t*		trx)	/*!< in: trx */
3687 {
3688 	lock_t*		lock;
3689 
3690 	ut_ad(table && trx);
3691 	ut_ad(lock_mutex_own());
3692 	ut_ad(trx_mutex_own(trx));
3693 
3694 	check_trx_state(trx);
3695 
3696 	if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
3697 		++table->n_waiting_or_granted_auto_inc_locks;
3698 	}
3699 
3700 	/* For AUTOINC locking we reuse the lock instance only if
3701 	there is no wait involved else we allocate the waiting lock
3702 	from the transaction lock heap. */
3703 	if (type_mode == LOCK_AUTO_INC) {
3704 
3705 		lock = table->autoinc_lock;
3706 
3707 		table->autoinc_trx = trx;
3708 
3709 		ib_vector_push(trx->autoinc_locks, &lock);
3710 
3711 	} else if (trx->lock.table_cached < trx->lock.table_pool.size()) {
3712 		lock = trx->lock.table_pool[trx->lock.table_cached++];
3713 	} else {
3714 
3715 		lock = static_cast<lock_t*>(
3716 			mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock)));
3717 
3718 	}
3719 
3720 	lock->type_mode = ib_uint32_t(type_mode | LOCK_TABLE);
3721 	lock->trx = trx;
3722 
3723 	lock->un_member.tab_lock.table = table;
3724 
3725 	ut_ad(table->n_ref_count > 0 || !table->can_be_evicted);
3726 
3727 	UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
3728 
3729 	ut_list_append(table->locks, lock, TableLockGetNode());
3730 
3731 	if (type_mode & LOCK_WAIT) {
3732 
3733 		lock_set_lock_and_trx_wait(lock, trx);
3734 	}
3735 
3736 	lock->trx->lock.table_locks.push_back(lock);
3737 
3738 	MONITOR_INC(MONITOR_TABLELOCK_CREATED);
3739 	MONITOR_INC(MONITOR_NUM_TABLELOCK);
3740 
3741 	return(lock);
3742 }
3743 
3744 /*************************************************************//**
3745 Pops autoinc lock requests from the transaction's autoinc_locks. We
3746 handle the case where there are gaps in the array and they need to
3747 be popped off the stack. */
3748 UNIV_INLINE
3749 void
lock_table_pop_autoinc_locks(trx_t * trx)3750 lock_table_pop_autoinc_locks(
3751 /*=========================*/
3752 	trx_t*	trx)	/*!< in/out: transaction that owns the AUTOINC locks */
3753 {
3754 	ut_ad(lock_mutex_own());
3755 	ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
3756 
3757 	/* Skip any gaps, gaps are NULL lock entries in the
3758 	trx->autoinc_locks vector. */
3759 
3760 	do {
3761 		ib_vector_pop(trx->autoinc_locks);
3762 
3763 		if (ib_vector_is_empty(trx->autoinc_locks)) {
3764 			return;
3765 		}
3766 
3767 	} while (*(lock_t**) ib_vector_get_last(trx->autoinc_locks) == NULL);
3768 }
3769 
3770 /*************************************************************//**
3771 Removes an autoinc lock request from the transaction's autoinc_locks. */
3772 UNIV_INLINE
3773 void
lock_table_remove_autoinc_lock(lock_t * lock,trx_t * trx)3774 lock_table_remove_autoinc_lock(
3775 /*===========================*/
3776 	lock_t*	lock,	/*!< in: table lock */
3777 	trx_t*	trx)	/*!< in/out: transaction that owns the lock */
3778 {
3779 	lock_t*	autoinc_lock;
3780 	lint	i = ib_vector_size(trx->autoinc_locks) - 1;
3781 
3782 	ut_ad(lock_mutex_own());
3783 	ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC);
3784 	ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
3785 	ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
3786 
3787 	/* With stored functions and procedures the user may drop
3788 	a table within the same "statement". This special case has
3789 	to be handled by deleting only those AUTOINC locks that were
3790 	held by the table being dropped. */
3791 
3792 	autoinc_lock = *static_cast<lock_t**>(
3793 		ib_vector_get(trx->autoinc_locks, i));
3794 
3795 	/* This is the default fast case. */
3796 
3797 	if (autoinc_lock == lock) {
3798 		lock_table_pop_autoinc_locks(trx);
3799 	} else {
3800 		/* The last element should never be NULL */
3801 		ut_a(autoinc_lock != NULL);
3802 
3803 		/* Handle freeing the locks from within the stack. */
3804 
3805 		while (--i >= 0) {
3806 			autoinc_lock = *static_cast<lock_t**>(
3807 				ib_vector_get(trx->autoinc_locks, i));
3808 
3809 			if (autoinc_lock == lock) {
3810 				void*	null_var = NULL;
3811 				ib_vector_set(trx->autoinc_locks, i, &null_var);
3812 				return;
3813 			}
3814 		}
3815 
3816 		/* Must find the autoinc lock. */
3817 		ut_error;
3818 	}
3819 }
3820 
3821 /*************************************************************//**
3822 Removes a table lock request from the queue and the trx list of locks;
3823 this is a low-level function which does NOT check if waiting requests
3824 can now be granted. */
3825 UNIV_INLINE
3826 void
lock_table_remove_low(lock_t * lock)3827 lock_table_remove_low(
3828 /*==================*/
3829 	lock_t*	lock)	/*!< in/out: table lock */
3830 {
3831 	trx_t*		trx;
3832 	dict_table_t*	table;
3833 
3834 	ut_ad(lock_mutex_own());
3835 
3836 	trx = lock->trx;
3837 	table = lock->un_member.tab_lock.table;
3838 
3839 	/* Remove the table from the transaction's AUTOINC vector, if
3840 	the lock that is being released is an AUTOINC lock. */
3841 	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
3842 
3843 		/* The table's AUTOINC lock can get transferred to
3844 		another transaction before we get here. */
3845 		if (table->autoinc_trx == trx) {
3846 			table->autoinc_trx = NULL;
3847 		}
3848 
3849 		/* The locks must be freed in the reverse order from
3850 		the one in which they were acquired. This is to avoid
3851 		traversing the AUTOINC lock vector unnecessarily.
3852 
3853 		We only store locks that were granted in the
3854 		trx->autoinc_locks vector (see lock_table_create()
3855 		and lock_grant()). Therefore it can be empty and we
3856 		need to check for that. */
3857 
3858 		if (!lock_get_wait(lock)
3859 		    && !ib_vector_is_empty(trx->autoinc_locks)) {
3860 
3861 			lock_table_remove_autoinc_lock(lock, trx);
3862 		}
3863 
3864 		ut_a(table->n_waiting_or_granted_auto_inc_locks > 0);
3865 		table->n_waiting_or_granted_auto_inc_locks--;
3866 	}
3867 
3868 	UT_LIST_REMOVE(trx->lock.trx_locks, lock);
3869 	ut_list_remove(table->locks, lock, TableLockGetNode());
3870 
3871 	MONITOR_INC(MONITOR_TABLELOCK_REMOVED);
3872 	MONITOR_DEC(MONITOR_NUM_TABLELOCK);
3873 }
3874 
3875 /*********************************************************************//**
3876 Enqueues a waiting request for a table lock which cannot be granted
3877 immediately. Checks for deadlocks.
3878 @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
3879 DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
3880 transaction was chosen as a victim, and we got the lock immediately:
3881 no need to wait then */
3882 static
3883 dberr_t
lock_table_enqueue_waiting(ulint mode,dict_table_t * table,que_thr_t * thr)3884 lock_table_enqueue_waiting(
3885 /*=======================*/
3886 	ulint		mode,	/*!< in: lock mode this transaction is
3887 				requesting */
3888 	dict_table_t*	table,	/*!< in/out: table */
3889 	que_thr_t*	thr)	/*!< in: query thread */
3890 {
3891 	trx_t*		trx;
3892 	lock_t*		lock;
3893 
3894 	ut_ad(lock_mutex_own());
3895 	ut_ad(!srv_read_only_mode);
3896 
3897 	trx = thr_get_trx(thr);
3898 	ut_ad(trx_mutex_own(trx));
3899 
3900 	/* Test if there already is some other reason to suspend thread:
3901 	we do not enqueue a lock request if the query thread should be
3902 	stopped anyway */
3903 
3904 	if (que_thr_stop(thr)) {
3905 		ut_error;
3906 
3907 		return(DB_QUE_THR_SUSPENDED);
3908 	}
3909 
3910 	switch (trx_get_dict_operation(trx)) {
3911 	case TRX_DICT_OP_NONE:
3912 		break;
3913 	case TRX_DICT_OP_TABLE:
3914 	case TRX_DICT_OP_INDEX:
3915 		ib::error() << "A table lock wait happens in a dictionary"
3916 			" operation. Table " << table->name
3917 			<< ". " << BUG_REPORT_MSG;
3918 		ut_ad(0);
3919 	}
3920 
3921 	/* Enqueue the lock request that will wait to be granted */
3922 	lock = lock_table_create(table, mode | LOCK_WAIT, trx);
3923 
3924 	const trx_t*	victim_trx =
3925 			DeadlockChecker::check_and_resolve(lock, trx);
3926 
3927 	if (victim_trx != 0) {
3928 		ut_ad(victim_trx == trx);
3929 
3930 		/* The order here is important, we don't want to
3931 		lose the state of the lock before calling remove. */
3932 		lock_table_remove_low(lock);
3933 		lock_reset_lock_and_trx_wait(lock);
3934 
3935 		return(DB_DEADLOCK);
3936 
3937 	} else if (trx->lock.wait_lock == NULL) {
3938 		/* Deadlock resolution chose another transaction as a victim,
3939 		and we accidentally got our lock granted! */
3940 
3941 		return(DB_SUCCESS);
3942 	}
3943 
3944 	trx->lock.que_state = TRX_QUE_LOCK_WAIT;
3945 
3946 	trx->lock.wait_started = ut_time();
3947 	trx->lock.was_chosen_as_deadlock_victim = false;
3948 
3949 	ut_a(que_thr_stop(thr));
3950 
3951 	MONITOR_INC(MONITOR_TABLELOCK_WAIT);
3952 
3953 	return(DB_LOCK_WAIT);
3954 }
3955 
3956 /*********************************************************************//**
3957 Checks if other transactions have an incompatible mode lock request in
3958 the lock queue.
3959 @return lock or NULL */
3960 UNIV_INLINE
3961 const lock_t*
lock_table_other_has_incompatible(const trx_t * trx,ulint wait,const dict_table_t * table,lock_mode mode)3962 lock_table_other_has_incompatible(
3963 /*==============================*/
3964 	const trx_t*		trx,	/*!< in: transaction, or NULL if all
3965 					transactions should be included */
3966 	ulint			wait,	/*!< in: LOCK_WAIT if also
3967 					waiting locks are taken into
3968 					account, or 0 if not */
3969 	const dict_table_t*	table,	/*!< in: table */
3970 	lock_mode		mode)	/*!< in: lock mode */
3971 {
3972 	const lock_t*	lock;
3973 
3974 	ut_ad(lock_mutex_own());
3975 
3976 	for (lock = UT_LIST_GET_LAST(table->locks);
3977 	     lock != NULL;
3978 	     lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock)) {
3979 
3980 		if (lock->trx != trx
3981 		    && !lock_mode_compatible(lock_get_mode(lock), mode)
3982 		    && (wait || !lock_get_wait(lock))) {
3983 
3984 			return(lock);
3985 		}
3986 	}
3987 
3988 	return(NULL);
3989 }
3990 
3991 /*********************************************************************//**
3992 Locks the specified database table in the mode given. If the lock cannot
3993 be granted immediately, the query thread is put to wait.
3994 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
3995 dberr_t
lock_table(ulint flags,dict_table_t * table,lock_mode mode,que_thr_t * thr)3996 lock_table(
3997 /*=======*/
3998 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is set,
3999 				does nothing */
4000 	dict_table_t*	table,	/*!< in/out: database table
4001 				in dictionary cache */
4002 	lock_mode	mode,	/*!< in: lock mode */
4003 	que_thr_t*	thr)	/*!< in: query thread */
4004 {
4005 	trx_t*		trx;
4006 	dberr_t		err;
4007 	const lock_t*	wait_for;
4008 
4009 	ut_ad(table && thr);
4010 
4011 	/* Given limited visibility of temp-table we can avoid
4012 	locking overhead */
4013 	if ((flags & BTR_NO_LOCKING_FLAG)
4014 	    || srv_read_only_mode
4015 	    || dict_table_is_temporary(table)) {
4016 
4017 		return(DB_SUCCESS);
4018 	}
4019 
4020 	ut_a(flags == 0);
4021 
4022 	trx = thr_get_trx(thr);
4023 
4024 	/* Look for equal or stronger locks the same trx already
4025 	has on the table. No need to acquire the lock mutex here
4026 	because only this transacton can add/access table locks
4027 	to/from trx_t::table_locks. */
4028 
4029 	if (lock_table_has(trx, table, mode)) {
4030 
4031 		return(DB_SUCCESS);
4032 	}
4033 
4034 	/* Read only transactions can write to temp tables, we don't want
4035 	to promote them to RW transactions. Their updates cannot be visible
4036 	to other transactions. Therefore we can keep them out
4037 	of the read views. */
4038 
4039 	if ((mode == LOCK_IX || mode == LOCK_X)
4040 	    && !trx->read_only
4041 	    && trx->rsegs.m_redo.rseg == 0) {
4042 
4043 		trx_set_rw_mode(trx);
4044 	}
4045 
4046 	lock_mutex_enter();
4047 
4048 	/* We have to check if the new lock is compatible with any locks
4049 	other transactions have in the table lock queue. */
4050 
4051 	wait_for = lock_table_other_has_incompatible(
4052 		trx, LOCK_WAIT, table, mode);
4053 
4054 	trx_mutex_enter(trx);
4055 
4056 	/* Another trx has a request on the table in an incompatible
4057 	mode: this trx may have to wait */
4058 
4059 	if (wait_for != NULL) {
4060 		err = lock_table_enqueue_waiting(mode | flags, table, thr);
4061 	} else {
4062 		lock_table_create(table, mode | flags, trx);
4063 
4064 		ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
4065 
4066 		err = DB_SUCCESS;
4067 	}
4068 
4069 	lock_mutex_exit();
4070 
4071 	trx_mutex_exit(trx);
4072 
4073 	return(err);
4074 }
4075 
4076 /*********************************************************************//**
4077 Creates a table IX lock object for a resurrected transaction. */
4078 void
lock_table_ix_resurrect(dict_table_t * table,trx_t * trx)4079 lock_table_ix_resurrect(
4080 /*====================*/
4081 	dict_table_t*	table,	/*!< in/out: table */
4082 	trx_t*		trx)	/*!< in/out: transaction */
4083 {
4084 	ut_ad(trx->is_recovered);
4085 
4086 	if (lock_table_has(trx, table, LOCK_IX)) {
4087 		return;
4088 	}
4089 
4090 	lock_mutex_enter();
4091 
4092 	/* We have to check if the new lock is compatible with any locks
4093 	other transactions have in the table lock queue. */
4094 
4095 	ut_ad(!lock_table_other_has_incompatible(
4096 		      trx, LOCK_WAIT, table, LOCK_IX));
4097 
4098 	trx_mutex_enter(trx);
4099 	lock_table_create(table, LOCK_IX, trx);
4100 	lock_mutex_exit();
4101 	trx_mutex_exit(trx);
4102 }
4103 
4104 /*********************************************************************//**
4105 Checks if a waiting table lock request still has to wait in a queue.
4106 @return TRUE if still has to wait */
4107 static
4108 bool
lock_table_has_to_wait_in_queue(const lock_t * wait_lock)4109 lock_table_has_to_wait_in_queue(
4110 /*============================*/
4111 	const lock_t*	wait_lock)	/*!< in: waiting table lock */
4112 {
4113 	const dict_table_t*	table;
4114 	const lock_t*		lock;
4115 
4116 	ut_ad(lock_mutex_own());
4117 	ut_ad(lock_get_wait(wait_lock));
4118 
4119 	table = wait_lock->un_member.tab_lock.table;
4120 
4121 	for (lock = UT_LIST_GET_FIRST(table->locks);
4122 	     lock != wait_lock;
4123 	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
4124 
4125 		if (lock_has_to_wait(wait_lock, lock)) {
4126 
4127 			return(true);
4128 		}
4129 	}
4130 
4131 	return(false);
4132 }
4133 
4134 /*************************************************************//**
4135 Removes a table lock request, waiting or granted, from the queue and grants
4136 locks to other transactions in the queue, if they now are entitled to a
4137 lock. */
4138 static
4139 void
lock_table_dequeue(lock_t * in_lock)4140 lock_table_dequeue(
4141 /*===============*/
4142 	lock_t*	in_lock)/*!< in/out: table lock object; transactions waiting
4143 			behind will get their lock requests granted, if
4144 			they are now qualified to it */
4145 {
4146 	ut_ad(lock_mutex_own());
4147 	ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
4148 
4149 	lock_t*	lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
4150 
4151 	lock_table_remove_low(in_lock);
4152 
4153 	/* Check if waiting locks in the queue can now be granted: grant
4154 	locks if there are no conflicting locks ahead. */
4155 
4156 	for (/* No op */;
4157 	     lock != NULL;
4158 	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
4159 
4160 		if (lock_get_wait(lock)
4161 		    && !lock_table_has_to_wait_in_queue(lock)) {
4162 
4163 			/* Grant the lock */
4164 			ut_ad(in_lock->trx != lock->trx);
4165 			lock_grant(lock);
4166 		}
4167 	}
4168 }
4169 
4170 /** Sets a lock on a table based on the given mode.
4171 @param[in]	table	table to lock
4172 @param[in,out]	trx	transaction
4173 @param[in]	mode	LOCK_X or LOCK_S
4174 @return error code or DB_SUCCESS. */
4175 dberr_t
lock_table_for_trx(dict_table_t * table,trx_t * trx,enum lock_mode mode)4176 lock_table_for_trx(
4177 	dict_table_t*	table,
4178 	trx_t*		trx,
4179 	enum lock_mode	mode)
4180 {
4181 	mem_heap_t*	heap;
4182 	que_thr_t*	thr;
4183 	dberr_t		err;
4184 	sel_node_t*	node;
4185 	heap = mem_heap_create(512);
4186 
4187 	node = sel_node_create(heap);
4188 	thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
4189 	thr->graph->state = QUE_FORK_ACTIVE;
4190 
4191 	/* We use the select query graph as the dummy graph needed
4192 	in the lock module call */
4193 
4194 	thr = static_cast<que_thr_t*>(
4195 		que_fork_get_first_thr(
4196 			static_cast<que_fork_t*>(que_node_get_parent(thr))));
4197 
4198 	que_thr_move_to_run_state_for_mysql(thr, trx);
4199 
4200 run_again:
4201 	thr->run_node = thr;
4202 	thr->prev_node = thr->common.parent;
4203 
4204 	err = lock_table(0, table, mode, thr);
4205 
4206 	trx->error_state = err;
4207 
4208 	if (UNIV_LIKELY(err == DB_SUCCESS)) {
4209 		que_thr_stop_for_mysql_no_error(thr, trx);
4210 	} else {
4211 		que_thr_stop_for_mysql(thr);
4212 
4213 		if (err != DB_QUE_THR_SUSPENDED) {
4214 			bool	was_lock_wait;
4215 
4216 			was_lock_wait = row_mysql_handle_errors(
4217 				&err, trx, thr, NULL);
4218 
4219 			if (was_lock_wait) {
4220 				goto run_again;
4221 			}
4222 		} else {
4223 			que_thr_t*	run_thr;
4224 			que_node_t*	parent;
4225 
4226 			parent = que_node_get_parent(thr);
4227 
4228 			run_thr = que_fork_start_command(
4229 				static_cast<que_fork_t*>(parent));
4230 
4231 			ut_a(run_thr == thr);
4232 
4233 			/* There was a lock wait but the thread was not
4234 			in a ready to run or running state. */
4235 			trx->error_state = DB_LOCK_WAIT;
4236 
4237 			goto run_again;
4238 
4239 		}
4240 	}
4241 
4242 	que_graph_free(thr->graph);
4243 	trx->op_info = "";
4244 
4245 	return(err);
4246 }
4247 
4248 /*=========================== LOCK RELEASE ==============================*/
4249 
4250 /*************************************************************//**
4251 Removes a granted record lock of a transaction from the queue and grants
4252 locks to other transactions waiting in the queue if they now are entitled
4253 to a lock. */
4254 void
lock_rec_unlock(trx_t * trx,const buf_block_t * block,const rec_t * rec,lock_mode lock_mode)4255 lock_rec_unlock(
4256 /*============*/
4257 	trx_t*			trx,	/*!< in/out: transaction that has
4258 					set a record lock */
4259 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
4260 	const rec_t*		rec,	/*!< in: record */
4261 	lock_mode		lock_mode)/*!< in: LOCK_S or LOCK_X */
4262 {
4263 	lock_t*		first_lock;
4264 	lock_t*		lock;
4265 	ulint		heap_no;
4266 	const char*	stmt;
4267 	size_t		stmt_len;
4268 
4269 	ut_ad(trx);
4270 	ut_ad(rec);
4271 	ut_ad(block->frame == page_align(rec));
4272 	ut_ad(!trx->lock.wait_lock);
4273 	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
4274 
4275 	heap_no = page_rec_get_heap_no(rec);
4276 
4277 	lock_mutex_enter();
4278 	trx_mutex_enter(trx);
4279 
4280 	first_lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
4281 
4282 	/* Find the last lock with the same lock_mode and transaction
4283 	on the record. */
4284 
4285 	for (lock = first_lock; lock != NULL;
4286 	     lock = lock_rec_get_next(heap_no, lock)) {
4287 		if (lock->trx == trx && lock_get_mode(lock) == lock_mode) {
4288 			goto released;
4289 		}
4290 	}
4291 
4292 	lock_mutex_exit();
4293 	trx_mutex_exit(trx);
4294 
4295 	stmt = innobase_get_stmt_unsafe(trx->mysql_thd, &stmt_len);
4296 
4297 	{
4298 		ib::error	err;
4299 		err << "Unlock row could not find a " << lock_mode
4300 			<< " mode lock on the record. Current statement: ";
4301 		err.write(stmt, stmt_len);
4302 	}
4303 
4304 	return;
4305 
4306 released:
4307 	ut_a(!lock_get_wait(lock));
4308 	lock_rec_reset_nth_bit(lock, heap_no);
4309 
4310 	/* Check if we can now grant waiting lock requests */
4311 
4312 	for (lock = first_lock; lock != NULL;
4313 	     lock = lock_rec_get_next(heap_no, lock)) {
4314 		if (lock_get_wait(lock)
4315 		    && !lock_rec_has_to_wait_in_queue(lock)) {
4316 
4317 			/* Grant the lock */
4318 			ut_ad(trx != lock->trx);
4319 			lock_grant(lock);
4320 		}
4321 	}
4322 
4323 	lock_mutex_exit();
4324 	trx_mutex_exit(trx);
4325 }
4326 
4327 #ifdef UNIV_DEBUG
4328 /*********************************************************************//**
4329 Check if a transaction that has X or IX locks has set the dict_op
4330 code correctly. */
4331 static
4332 void
lock_check_dict_lock(const lock_t * lock)4333 lock_check_dict_lock(
4334 /*==================*/
4335 	const lock_t*	lock)	/*!< in: lock to check */
4336 {
4337 	if (lock_get_type_low(lock) == LOCK_REC) {
4338 
4339 		/* Check if the transcation locked a record
4340 		in a system table in X mode. It should have set
4341 		the dict_op code correctly if it did. */
4342 		if (lock->index->table->id < DICT_HDR_FIRST_ID
4343 		    && lock_get_mode(lock) == LOCK_X) {
4344 
4345 			ut_ad(lock_get_mode(lock) != LOCK_IX);
4346 			ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
4347 		}
4348 	} else {
4349 		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
4350 
4351 		const dict_table_t*	table;
4352 
4353 		table = lock->un_member.tab_lock.table;
4354 
4355 		/* Check if the transcation locked a system table
4356 		in IX mode. It should have set the dict_op code
4357 		correctly if it did. */
4358 		if (table->id < DICT_HDR_FIRST_ID
4359 		    && (lock_get_mode(lock) == LOCK_X
4360 			|| lock_get_mode(lock) == LOCK_IX)) {
4361 
4362 			ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
4363 		}
4364 	}
4365 }
4366 #endif /* UNIV_DEBUG */
4367 
4368 /** Remove GAP lock from a next key record lock
4369 @param[in,out]	lock	lock object */
4370 static
4371 void
lock_remove_gap_lock(lock_t * lock)4372 lock_remove_gap_lock(lock_t* lock)
4373 {
4374 	/* Remove lock on supremum */
4375 	lock_rec_reset_nth_bit(lock, PAGE_HEAP_NO_SUPREMUM);
4376 
4377 	/* Remove GAP lock for other records */
4378 	lock->remove_gap_lock();
4379 }
4380 
4381 /** Release read locks of a transacion. It is called during XA
4382 prepare to release locks early.
4383 @param[in,out]	trx		transaction
4384 @param[in]	only_gap	release only GAP locks */
4385 void
lock_trx_release_read_locks(trx_t * trx,bool only_gap)4386 lock_trx_release_read_locks(
4387 	trx_t*		trx,
4388 	bool		only_gap)
4389 {
4390 	lock_t*		lock;
4391 	lock_t*		next_lock;
4392 	ulint		count = 0;
4393 
4394 	/* Avoid taking lock_sys if trx didn't acquire any lock */
4395 	if (UT_LIST_GET_LEN(trx->lock.trx_locks) == 0) {
4396 
4397 		return;
4398 	}
4399 
4400 	lock_mutex_enter();
4401 
4402 	lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
4403 
4404 	while (lock != NULL) {
4405 
4406 		next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
4407 
4408 		/* Check only for record lock */
4409 		if (!lock->is_record_lock()
4410 		    || lock->is_insert_intention()
4411 		    || lock->is_predicate()) {
4412 
4413 			lock = next_lock;
4414 			continue;
4415 		}
4416 
4417 		/* Release any GAP only lock. */
4418 		if (lock->is_gap()) {
4419 
4420 			lock_rec_dequeue_from_page(lock);
4421 			lock = next_lock;
4422 			continue;
4423 		}
4424 
4425 		/* Don't release any non-GAP lock if not asked. */
4426 		if (lock->is_record_not_gap() && only_gap) {
4427 
4428 			lock = next_lock;
4429 			continue;
4430 		}
4431 
4432 		/* Release Shared Next Key Lock(SH + GAP) if asked for */
4433 		if (lock->mode() == LOCK_S && !only_gap) {
4434 
4435 			lock_rec_dequeue_from_page(lock);
4436 			lock = next_lock;
4437 			continue;
4438 		}
4439 
4440 		/* Release GAP lock from Next Key lock */
4441 		lock_remove_gap_lock(lock);
4442 
4443 		/* Grant locks */
4444 		lock_rec_grant(lock);
4445 
4446 		lock = next_lock;
4447 
4448 		++count;
4449 
4450 		if (count == LOCK_RELEASE_INTERVAL) {
4451 			/* Release the mutex for a while, so that we
4452 			do not monopolize it */
4453 
4454 			lock_mutex_exit();
4455 
4456 			lock_mutex_enter();
4457 
4458 			count = 0;
4459 		}
4460 	}
4461 
4462 	lock_mutex_exit();
4463 }
4464 
4465 /*********************************************************************//**
4466 Releases transaction locks, and releases possible other transactions waiting
4467 because of these locks. */
4468 static
4469 void
lock_release(trx_t * trx)4470 lock_release(
4471 /*=========*/
4472 	trx_t*	trx)	/*!< in/out: transaction */
4473 {
4474 	lock_t*		lock;
4475 	ulint		count = 0;
4476 	trx_id_t	max_trx_id = trx_sys_get_max_trx_id();
4477 
4478 	ut_ad(lock_mutex_own());
4479 	ut_ad(!trx_mutex_own(trx));
4480 	ut_ad(!trx->is_dd_trx);
4481 
4482 	for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
4483 	     lock != NULL;
4484 	     lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
4485 
4486 		ut_d(lock_check_dict_lock(lock));
4487 
4488 		if (lock_get_type_low(lock) == LOCK_REC) {
4489 
4490 			lock_rec_dequeue_from_page(lock);
4491 		} else {
4492 			dict_table_t*	table;
4493 
4494 			table = lock->un_member.tab_lock.table;
4495 
4496 			if (lock_get_mode(lock) != LOCK_IS
4497 			    && trx->undo_no != 0) {
4498 
4499 				/* The trx may have modified the table. We
4500 				block the use of the MySQL query cache for
4501 				all currently active transactions. */
4502 
4503 				table->query_cache_inv_id = max_trx_id;
4504 			}
4505 
4506 			lock_table_dequeue(lock);
4507 		}
4508 
4509 		if (count == LOCK_RELEASE_INTERVAL) {
4510 			/* Release the mutex for a while, so that we
4511 			do not monopolize it */
4512 
4513 			lock_mutex_exit();
4514 
4515 			lock_mutex_enter();
4516 
4517 			count = 0;
4518 		}
4519 
4520 		++count;
4521 	}
4522 }
4523 
4524 /* True if a lock mode is S or X */
4525 #define IS_LOCK_S_OR_X(lock) \
4526 	(lock_get_mode(lock) == LOCK_S \
4527 	 || lock_get_mode(lock) == LOCK_X)
4528 
4529 /*********************************************************************//**
4530 Removes table locks of the transaction on a table to be dropped. */
4531 static
4532 void
lock_trx_table_locks_remove(const lock_t * lock_to_remove)4533 lock_trx_table_locks_remove(
4534 /*========================*/
4535 	const lock_t*	lock_to_remove)		/*!< in: lock to remove */
4536 {
4537 	trx_t*		trx = lock_to_remove->trx;
4538 
4539 	ut_ad(lock_mutex_own());
4540 
4541 	/* It is safe to read this because we are holding the lock mutex */
4542 	if (!trx->lock.cancel) {
4543 		trx_mutex_enter(trx);
4544 	} else {
4545 		ut_ad(trx_mutex_own(trx));
4546 	}
4547 
4548 	typedef lock_pool_t::reverse_iterator iterator;
4549 
4550 	iterator	end = trx->lock.table_locks.rend();
4551 
4552 	for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
4553 
4554 		const lock_t*	lock = *it;
4555 
4556 		if (lock == NULL) {
4557 			continue;
4558 		}
4559 
4560 		ut_a(trx == lock->trx);
4561 		ut_a(lock_get_type_low(lock) & LOCK_TABLE);
4562 		ut_a(lock->un_member.tab_lock.table != NULL);
4563 
4564 		if (lock == lock_to_remove) {
4565 
4566 			*it = NULL;
4567 
4568 			if (!trx->lock.cancel) {
4569 				trx_mutex_exit(trx);
4570 			}
4571 
4572 			return;
4573 		}
4574 	}
4575 
4576 	if (!trx->lock.cancel) {
4577 		trx_mutex_exit(trx);
4578 	}
4579 
4580 	/* Lock must exist in the vector. */
4581 	ut_error;
4582 }
4583 
4584 /*********************************************************************//**
4585 Removes locks of a transaction on a table to be dropped.
4586 If remove_also_table_sx_locks is TRUE then table-level S and X locks are
4587 also removed in addition to other table-level and record-level locks.
4588 No lock that is going to be removed is allowed to be a wait lock. */
4589 static
4590 void
lock_remove_all_on_table_for_trx(dict_table_t * table,trx_t * trx,ibool remove_also_table_sx_locks)4591 lock_remove_all_on_table_for_trx(
4592 /*=============================*/
4593 	dict_table_t*	table,			/*!< in: table to be dropped */
4594 	trx_t*		trx,			/*!< in: a transaction */
4595 	ibool		remove_also_table_sx_locks)/*!< in: also removes
4596 						table S and X locks */
4597 {
4598 	lock_t*		lock;
4599 	lock_t*		prev_lock;
4600 
4601 	ut_ad(lock_mutex_own());
4602 
4603 	for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
4604 	     lock != NULL;
4605 	     lock = prev_lock) {
4606 
4607 		prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
4608 
4609 		if (lock_get_type_low(lock) == LOCK_REC
4610 		    && lock->index->table == table) {
4611 			ut_a(!lock_get_wait(lock));
4612 
4613 			lock_rec_discard(lock);
4614 		} else if (lock_get_type_low(lock) & LOCK_TABLE
4615 			   && lock->un_member.tab_lock.table == table
4616 			   && (remove_also_table_sx_locks
4617 			       || !IS_LOCK_S_OR_X(lock))) {
4618 
4619 			ut_a(!lock_get_wait(lock));
4620 
4621 			lock_trx_table_locks_remove(lock);
4622 			lock_table_remove_low(lock);
4623 		}
4624 	}
4625 }
4626 
4627 /*******************************************************************//**
4628 Remove any explicit record locks held by recovering transactions on
4629 the table.
4630 @return number of recovered transactions examined */
4631 static
4632 ulint
lock_remove_recovered_trx_record_locks(dict_table_t * table)4633 lock_remove_recovered_trx_record_locks(
4634 /*===================================*/
4635 	dict_table_t*	table)	/*!< in: check if there are any locks
4636 				held on records in this table or on the
4637 				table itself */
4638 {
4639 	ut_a(table != NULL);
4640 	ut_ad(lock_mutex_own());
4641 
4642 	ulint		n_recovered_trx = 0;
4643 
4644 	mutex_enter(&trx_sys->mutex);
4645 
4646 	for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
4647 	     trx != NULL;
4648 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
4649 
4650 		assert_trx_in_rw_list(trx);
4651 
4652 		if (!trx->is_recovered) {
4653 			continue;
4654 		}
4655 
4656 		/* Because we are holding the lock_sys->mutex,
4657 		implicit locks cannot be converted to explicit ones
4658 		while we are scanning the explicit locks. */
4659 
4660 		lock_t*	next_lock;
4661 
4662 		for (lock_t* lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
4663 		     lock != NULL;
4664 		     lock = next_lock) {
4665 
4666 			ut_a(lock->trx == trx);
4667 
4668 			/* Recovered transactions can't wait on a lock. */
4669 
4670 			ut_a(!lock_get_wait(lock));
4671 
4672 			next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
4673 
4674 			switch (lock_get_type_low(lock)) {
4675 			default:
4676 				ut_error;
4677 			case LOCK_TABLE:
4678 				if (lock->un_member.tab_lock.table == table) {
4679 					lock_trx_table_locks_remove(lock);
4680 					lock_table_remove_low(lock);
4681 				}
4682 				break;
4683 			case LOCK_REC:
4684 				if (lock->index->table == table) {
4685 					lock_rec_discard(lock);
4686 				}
4687 			}
4688 		}
4689 
4690 		++n_recovered_trx;
4691 	}
4692 
4693 	mutex_exit(&trx_sys->mutex);
4694 
4695 	return(n_recovered_trx);
4696 }
4697 
4698 /*********************************************************************//**
4699 Removes locks on a table to be dropped or truncated.
4700 If remove_also_table_sx_locks is TRUE then table-level S and X locks are
4701 also removed in addition to other table-level and record-level locks.
4702 No lock, that is going to be removed, is allowed to be a wait lock. */
4703 void
lock_remove_all_on_table(dict_table_t * table,ibool remove_also_table_sx_locks)4704 lock_remove_all_on_table(
4705 /*=====================*/
4706 	dict_table_t*	table,			/*!< in: table to be dropped
4707 						or truncated */
4708 	ibool		remove_also_table_sx_locks)/*!< in: also removes
4709 						table S and X locks */
4710 {
4711 	lock_t*		lock;
4712 
4713 	lock_mutex_enter();
4714 
4715 	for (lock = UT_LIST_GET_FIRST(table->locks);
4716 	     lock != NULL;
4717 	     /* No op */) {
4718 
4719 		lock_t*	prev_lock;
4720 
4721 		prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
4722 
4723 		/* If we should remove all locks (remove_also_table_sx_locks
4724 		is TRUE), or if the lock is not table-level S or X lock,
4725 		then check we are not going to remove a wait lock. */
4726 		if (remove_also_table_sx_locks
4727 		    || !(lock_get_type(lock) == LOCK_TABLE
4728 			 && IS_LOCK_S_OR_X(lock))) {
4729 
4730 			ut_a(!lock_get_wait(lock));
4731 		}
4732 
4733 		lock_remove_all_on_table_for_trx(
4734 			table, lock->trx, remove_also_table_sx_locks);
4735 
4736 		if (prev_lock == NULL) {
4737 			if (lock == UT_LIST_GET_FIRST(table->locks)) {
4738 				/* lock was not removed, pick its successor */
4739 				lock = UT_LIST_GET_NEXT(
4740 					un_member.tab_lock.locks, lock);
4741 			} else {
4742 				/* lock was removed, pick the first one */
4743 				lock = UT_LIST_GET_FIRST(table->locks);
4744 			}
4745 		} else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks,
4746 					    prev_lock) != lock) {
4747 			/* If lock was removed by
4748 			lock_remove_all_on_table_for_trx() then pick the
4749 			successor of prev_lock ... */
4750 			lock = UT_LIST_GET_NEXT(
4751 				un_member.tab_lock.locks, prev_lock);
4752 		} else {
4753 			/* ... otherwise pick the successor of lock. */
4754 			lock = UT_LIST_GET_NEXT(
4755 				un_member.tab_lock.locks, lock);
4756 		}
4757 	}
4758 
4759 	/* Note: Recovered transactions don't have table level IX or IS locks
4760 	but can have implicit record locks that have been converted to explicit
4761 	record locks. Such record locks cannot be freed by traversing the
4762 	transaction lock list in dict_table_t (as above). */
4763 
4764 	if (!lock_sys->rollback_complete
4765 	    && lock_remove_recovered_trx_record_locks(table) == 0) {
4766 
4767 		lock_sys->rollback_complete = TRUE;
4768 	}
4769 
4770 	lock_mutex_exit();
4771 }
4772 
4773 /*===================== VALIDATION AND DEBUGGING ====================*/
4774 
4775 /*********************************************************************//**
4776 Prints info of a table lock. */
4777 void
lock_table_print(FILE * file,const lock_t * lock)4778 lock_table_print(
4779 /*=============*/
4780 	FILE*		file,	/*!< in: file where to print */
4781 	const lock_t*	lock)	/*!< in: table type lock */
4782 {
4783 	ut_ad(lock_mutex_own());
4784 	ut_a(lock_get_type_low(lock) == LOCK_TABLE);
4785 
4786 	fputs("TABLE LOCK table ", file);
4787 	ut_print_name(file, lock->trx,
4788 		      lock->un_member.tab_lock.table->name.m_name);
4789 	fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
4790 
4791 	if (lock_get_mode(lock) == LOCK_S) {
4792 		fputs(" lock mode S", file);
4793 	} else if (lock_get_mode(lock) == LOCK_X) {
4794 		ut_ad(lock->trx->id != 0);
4795 		fputs(" lock mode X", file);
4796 	} else if (lock_get_mode(lock) == LOCK_IS) {
4797 		fputs(" lock mode IS", file);
4798 	} else if (lock_get_mode(lock) == LOCK_IX) {
4799 		ut_ad(lock->trx->id != 0);
4800 		fputs(" lock mode IX", file);
4801 	} else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
4802 		fputs(" lock mode AUTO-INC", file);
4803 	} else {
4804 		fprintf(file, " unknown lock mode %lu",
4805 			(ulong) lock_get_mode(lock));
4806 	}
4807 
4808 	if (lock_get_wait(lock)) {
4809 		fputs(" waiting", file);
4810 	}
4811 
4812 	putc('\n', file);
4813 }
4814 
4815 /*********************************************************************//**
4816 Prints info of a record lock. */
4817 void
lock_rec_print(FILE * file,const lock_t * lock)4818 lock_rec_print(
4819 /*===========*/
4820 	FILE*		file,	/*!< in: file where to print */
4821 	const lock_t*	lock)	/*!< in: record type lock */
4822 {
4823 	ulint			space;
4824 	ulint			page_no;
4825 	mtr_t			mtr;
4826 	mem_heap_t*		heap		= NULL;
4827 	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
4828 	ulint*			offsets		= offsets_;
4829 	rec_offs_init(offsets_);
4830 
4831 	ut_ad(lock_mutex_own());
4832 	ut_a(lock_get_type_low(lock) == LOCK_REC);
4833 
4834 	space = lock->un_member.rec_lock.space;
4835 	page_no = lock->un_member.rec_lock.page_no;
4836 
4837 	fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu "
4838 		"index %s of table ",
4839 		(ulong) space, (ulong) page_no,
4840 		(ulong) lock_rec_get_n_bits(lock),
4841 		lock->index->name());
4842 	ut_print_name(file, lock->trx, lock->index->table_name);
4843 	fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
4844 
4845 	if (lock_get_mode(lock) == LOCK_S) {
4846 		fputs(" lock mode S", file);
4847 	} else if (lock_get_mode(lock) == LOCK_X) {
4848 		fputs(" lock_mode X", file);
4849 	} else {
4850 		ut_error;
4851 	}
4852 
4853 	if (lock_rec_get_gap(lock)) {
4854 		fputs(" locks gap before rec", file);
4855 	}
4856 
4857 	if (lock_rec_get_rec_not_gap(lock)) {
4858 		fputs(" locks rec but not gap", file);
4859 	}
4860 
4861 	if (lock_rec_get_insert_intention(lock)) {
4862 		fputs(" insert intention", file);
4863 	}
4864 
4865 	if (lock_get_wait(lock)) {
4866 		fputs(" waiting", file);
4867 	}
4868 
4869 	mtr_start(&mtr);
4870 
4871 	putc('\n', file);
4872 
4873 	const buf_block_t*	block;
4874 
4875 	block = buf_page_try_get(page_id_t(space, page_no), &mtr);
4876 
4877 	for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
4878 
4879 		if (!lock_rec_get_nth_bit(lock, i)) {
4880 			continue;
4881 		}
4882 
4883 		fprintf(file, "Record lock, heap no %lu", (ulong) i);
4884 
4885 		if (block) {
4886 			const rec_t*	rec;
4887 
4888 			rec = page_find_rec_with_heap_no(
4889 				buf_block_get_frame(block), i);
4890 
4891 			offsets = rec_get_offsets(
4892 				rec, lock->index, offsets,
4893 				ULINT_UNDEFINED, &heap);
4894 
4895 			putc(' ', file);
4896 			rec_print_new(file, rec, offsets);
4897 		}
4898 
4899 		putc('\n', file);
4900 	}
4901 
4902 	mtr_commit(&mtr);
4903 
4904 	if (heap) {
4905 		mem_heap_free(heap);
4906 	}
4907 }
4908 
4909 #ifdef UNIV_DEBUG
4910 /* Print the number of lock structs from lock_print_info_summary() only
4911 in non-production builds for performance reasons, see
4912 http://bugs.mysql.com/36942 */
4913 #define PRINT_NUM_OF_LOCK_STRUCTS
4914 #endif /* UNIV_DEBUG */
4915 
4916 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
4917 /*********************************************************************//**
4918 Calculates the number of record lock structs in the record lock hash table.
4919 @return number of record locks */
4920 static
4921 ulint
lock_get_n_rec_locks(void)4922 lock_get_n_rec_locks(void)
4923 /*======================*/
4924 {
4925 	ulint	n_locks	= 0;
4926 	ulint	i;
4927 
4928 	ut_ad(lock_mutex_own());
4929 
4930 	for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
4931 		const lock_t*	lock;
4932 
4933 		for (lock = static_cast<const lock_t*>(
4934 				HASH_GET_FIRST(lock_sys->rec_hash, i));
4935 		     lock != 0;
4936 		     lock = static_cast<const lock_t*>(
4937 				HASH_GET_NEXT(hash, lock))) {
4938 
4939 			n_locks++;
4940 		}
4941 	}
4942 
4943 	return(n_locks);
4944 }
4945 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
4946 
4947 /*********************************************************************//**
4948 Prints info of locks for all transactions.
4949 @return FALSE if not able to obtain lock mutex
4950 and exits without printing info */
4951 ibool
lock_print_info_summary(FILE * file,ibool nowait)4952 lock_print_info_summary(
4953 /*====================*/
4954 	FILE*	file,	/*!< in: file where to print */
4955 	ibool	nowait)	/*!< in: whether to wait for the lock mutex */
4956 {
4957 	/* if nowait is FALSE, wait on the lock mutex,
4958 	otherwise return immediately if fail to obtain the
4959 	mutex. */
4960 	if (!nowait) {
4961 		lock_mutex_enter();
4962 	} else if (lock_mutex_enter_nowait()) {
4963 		fputs("FAIL TO OBTAIN LOCK MUTEX,"
4964 		      " SKIP LOCK INFO PRINTING\n", file);
4965 		return(FALSE);
4966 	}
4967 
4968 	if (lock_deadlock_found) {
4969 		fputs("------------------------\n"
4970 		      "LATEST DETECTED DEADLOCK\n"
4971 		      "------------------------\n", file);
4972 
4973 		if (!srv_read_only_mode) {
4974 			ut_copy_file(file, lock_latest_err_file);
4975 		}
4976 	}
4977 
4978 	fputs("------------\n"
4979 	      "TRANSACTIONS\n"
4980 	      "------------\n", file);
4981 
4982 	fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
4983 		trx_sys_get_max_trx_id());
4984 
4985 	fprintf(file,
4986 		"Purge done for trx's n:o < " TRX_ID_FMT
4987 		" undo n:o < " TRX_ID_FMT " state: ",
4988 		purge_sys->iter.trx_no,
4989 		purge_sys->iter.undo_no);
4990 
4991 	/* Note: We are reading the state without the latch. One because it
4992 	will violate the latching order and two because we are merely querying
4993 	the state of the variable for display. */
4994 
4995 	switch (purge_sys->state){
4996 	case PURGE_STATE_INIT:
4997 		/* Should never be in this state while the system is running. */
4998 		ut_error;
4999 
5000 	case PURGE_STATE_EXIT:
5001 		fprintf(file, "exited");
5002 		break;
5003 
5004 	case PURGE_STATE_DISABLED:
5005 		fprintf(file, "disabled");
5006 		break;
5007 
5008 	case PURGE_STATE_RUN:
5009 		fprintf(file, "running");
5010 		/* Check if it is waiting for more data to arrive. */
5011 		if (!purge_sys->running) {
5012 			fprintf(file, " but idle");
5013 		}
5014 		break;
5015 
5016 	case PURGE_STATE_STOP:
5017 		fprintf(file, "stopped");
5018 		break;
5019 	}
5020 
5021 	fprintf(file, "\n");
5022 
5023 	fprintf(file,
5024 		"History list length %lu\n",
5025 		(ulong) trx_sys->rseg_history_len);
5026 
5027 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
5028 	fprintf(file,
5029 		"Total number of lock structs in row lock hash table %lu\n",
5030 		(ulong) lock_get_n_rec_locks());
5031 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
5032 	return(TRUE);
5033 }
5034 
5035 /** Functor to print not-started transaction from the mysql_trx_list. */
5036 
5037 struct	PrintNotStarted {
5038 
PrintNotStartedPrintNotStarted5039 	PrintNotStarted(FILE* file) : m_file(file) { }
5040 
operator ()PrintNotStarted5041 	void	operator()(const trx_t* trx)
5042 	{
5043 		ut_ad(trx->in_mysql_trx_list);
5044 		ut_ad(mutex_own(&trx_sys->mutex));
5045 
5046 		/* See state transitions and locking rules in trx0trx.h */
5047 
5048 		if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
5049 
5050 			fputs("---", m_file);
5051 			trx_print_latched(m_file, trx, 600);
5052 		}
5053 	}
5054 
5055 	FILE*		m_file;
5056 };
5057 
5058 /** Iterate over a transaction's locks. Keeping track of the
5059 iterator using an ordinal value. */
5060 
5061 class TrxLockIterator {
5062 public:
TrxLockIterator()5063 	TrxLockIterator() { rewind(); }
5064 
5065 	/** Get the m_index(th) lock of a transaction.
5066 	@return current lock or 0 */
current(const trx_t * trx) const5067 	const lock_t* current(const trx_t* trx) const
5068 	{
5069 		lock_t*	lock;
5070 		ulint	i = 0;
5071 
5072 		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
5073 		     lock != NULL && i < m_index;
5074 		     lock = UT_LIST_GET_NEXT(trx_locks, lock), ++i) {
5075 
5076 			/* No op */
5077 		}
5078 
5079 		return(lock);
5080 	}
5081 
5082 	/** Set the ordinal value to 0 */
rewind()5083 	void rewind()
5084 	{
5085 		m_index = 0;
5086 	}
5087 
5088 	/** Increment the ordinal value.
5089 	@retun the current index value */
next()5090 	ulint next()
5091 	{
5092 		return(++m_index);
5093 	}
5094 
5095 private:
5096 	/** Current iterator position */
5097 	ulint		m_index;
5098 };
5099 
5100 /** This iterates over both the RW and RO trx_sys lists. We need to keep
5101 track where the iterator was up to and we do that using an ordinal value. */
5102 
5103 class TrxListIterator {
5104 public:
TrxListIterator()5105 	TrxListIterator() : m_index()
5106 	{
5107 		/* We iterate over the RW trx list first. */
5108 
5109 		m_trx_list = &trx_sys->rw_trx_list;
5110 	}
5111 
5112 	/** Get the current transaction whose ordinality is m_index.
5113 	@return current transaction or 0 */
5114 
current()5115 	const trx_t* current()
5116 	{
5117 		return(reposition());
5118 	}
5119 
5120 	/** Advance the transaction current ordinal value and reset the
5121 	transaction lock ordinal value */
5122 
next()5123 	void next()
5124 	{
5125 		++m_index;
5126 		m_lock_iter.rewind();
5127 	}
5128 
lock_iter()5129 	TrxLockIterator& lock_iter()
5130 	{
5131 		return(m_lock_iter);
5132 	}
5133 
5134 private:
5135 	/** Reposition the "cursor" on the current transaction. If it
5136 	is the first time then the "cursor" will be positioned on the
5137 	first transaction.
5138 
5139 	@return transaction instance or 0 */
reposition() const5140 	const trx_t* reposition() const
5141 	{
5142 		ulint	i;
5143 		trx_t*	trx;
5144 
5145 		/* Make the transaction at the ordinal value of m_index
5146 		the current transaction. ie. reposition/restore */
5147 
5148 		for (i = 0, trx = UT_LIST_GET_FIRST(*m_trx_list);
5149 		     trx != NULL && (i < m_index);
5150 		     trx = UT_LIST_GET_NEXT(trx_list, trx), ++i) {
5151 
5152 			check_trx_state(trx);
5153 		}
5154 
5155 		return(trx);
5156 	}
5157 
5158 	/** Ordinal value of the transaction in the current transaction list */
5159 	ulint			m_index;
5160 
5161 	/** Current transaction list */
5162 	trx_ut_list_t*		m_trx_list;
5163 
5164 	/** For iterating over a transaction's locks */
5165 	TrxLockIterator		m_lock_iter;
5166 };
5167 
5168 /** Prints transaction lock wait and MVCC state.
5169 @param[in,out]	file	file where to print
5170 @param[in]	trx	transaction */
5171 void
lock_trx_print_wait_and_mvcc_state(FILE * file,const trx_t * trx)5172 lock_trx_print_wait_and_mvcc_state(
5173 	FILE*		file,
5174 	const trx_t*	trx)
5175 {
5176 	fprintf(file, "---");
5177 
5178 	trx_print_latched(file, trx, 600);
5179 
5180 	const ReadView*	read_view = trx_get_read_view(trx);
5181 
5182 	if (read_view != NULL) {
5183 		read_view->print_limits(file);
5184 	}
5185 
5186 	if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
5187 
5188 		fprintf(file,
5189 			"------- TRX HAS BEEN WAITING %lu SEC"
5190 			" FOR THIS LOCK TO BE GRANTED:\n",
5191 			(ulong) difftime(ut_time(), trx->lock.wait_started));
5192 
5193 		if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
5194 			lock_rec_print(file, trx->lock.wait_lock);
5195 		} else {
5196 			lock_table_print(file, trx->lock.wait_lock);
5197 		}
5198 
5199 		fprintf(file, "------------------\n");
5200 	}
5201 }
5202 
5203 /*********************************************************************//**
5204 Prints info of locks for a transaction. This function will release the
5205 lock mutex and the trx_sys_t::mutex if the page was read from disk.
5206 @return true if page was read from the tablespace */
5207 static
5208 bool
lock_rec_fetch_page(const lock_t * lock)5209 lock_rec_fetch_page(
5210 /*================*/
5211 	const lock_t*	lock)	/*!< in: record lock */
5212 {
5213 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
5214 
5215 	ulint			space_id = lock->un_member.rec_lock.space;
5216 	fil_space_t*		space;
5217 	bool			found;
5218 	const page_size_t&	page_size = fil_space_get_page_size(space_id,
5219 								    &found);
5220 	ulint			page_no = lock->un_member.rec_lock.page_no;
5221 
5222 	/* Check if the .ibd file exists. */
5223 	if (found) {
5224 		mtr_t	mtr;
5225 
5226 		lock_mutex_exit();
5227 
5228 		mutex_exit(&trx_sys->mutex);
5229 
5230 		DEBUG_SYNC_C("innodb_monitor_before_lock_page_read");
5231 
5232 		/* Check if the space is exists or not. only
5233 		when the space is valid, try to get the page. */
5234 		space = fil_space_acquire(space_id);
5235 		if (space) {
5236 			mtr_start(&mtr);
5237 			buf_page_get_gen(
5238 				page_id_t(space_id, page_no), page_size,
5239 				RW_NO_LATCH, NULL,
5240 				BUF_GET_POSSIBLY_FREED,
5241 				__FILE__, __LINE__, &mtr);
5242 			mtr_commit(&mtr);
5243 			fil_space_release(space);
5244 		}
5245 
5246 		lock_mutex_enter();
5247 
5248 		mutex_enter(&trx_sys->mutex);
5249 
5250 		return(true);
5251 	}
5252 
5253 	return(false);
5254 }
5255 
5256 /*********************************************************************//**
5257 Prints info of locks for a transaction.
5258 @return true if all printed, false if latches were released. */
5259 static
5260 bool
lock_trx_print_locks(FILE * file,const trx_t * trx,TrxLockIterator & iter,bool load_block)5261 lock_trx_print_locks(
5262 /*=================*/
5263 	FILE*		file,		/*!< in/out: File to write */
5264 	const trx_t*	trx,		/*!< in: current transaction */
5265 	TrxLockIterator&iter,		/*!< in: transaction lock iterator */
5266 	bool		load_block)	/*!< in: if true then read block
5267 					from disk */
5268 {
5269 	const lock_t* lock;
5270 
5271 	/* Iterate over the transaction's locks. */
5272 	while ((lock = iter.current(trx)) != 0) {
5273 
5274 		if (lock_get_type_low(lock) == LOCK_REC) {
5275 
5276 			if (load_block) {
5277 
5278 				/* Note: lock_rec_fetch_page() will
5279 				release both the lock mutex and the
5280 				trx_sys_t::mutex if it does a read
5281 				from disk. */
5282 
5283 				if (lock_rec_fetch_page(lock)) {
5284 					/* We need to resync the
5285 					current transaction. */
5286 					return(false);
5287 				}
5288 
5289 				/* It is a single table tablespace
5290 				and the .ibd file is missing
5291 				(TRUNCATE TABLE probably stole the
5292 				locks): just print the lock without
5293 				attempting to load the page in the
5294 				buffer pool. */
5295 
5296 				fprintf(file,
5297 					"RECORD LOCKS on non-existing"
5298 					" space %u\n",
5299 					lock->un_member.rec_lock.space);
5300 			}
5301 
5302 			/* Print all the record locks on the page from
5303 			the record lock bitmap */
5304 
5305 			lock_rec_print(file, lock);
5306 
5307 			load_block = true;
5308 
5309 		} else {
5310 			ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
5311 
5312 			lock_table_print(file, lock);
5313 		}
5314 
5315 		if (iter.next() >= 10) {
5316 
5317 			fprintf(file,
5318 				"10 LOCKS PRINTED FOR THIS TRX:"
5319 				" SUPPRESSING FURTHER PRINTS\n");
5320 
5321 			break;
5322 		}
5323 	}
5324 
5325 	return(true);
5326 }
5327 
5328 /*********************************************************************//**
5329 Prints info of locks for each transaction. This function assumes that the
5330 caller holds the lock mutex and more importantly it will release the lock
5331 mutex on behalf of the caller. (This should be fixed in the future). */
5332 void
lock_print_info_all_transactions(FILE * file)5333 lock_print_info_all_transactions(
5334 /*=============================*/
5335 	FILE*		file)	/*!< in/out: file where to print */
5336 {
5337 	ut_ad(lock_mutex_own());
5338 
5339 	fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
5340 
5341 	mutex_enter(&trx_sys->mutex);
5342 
5343 	/* First print info on non-active transactions */
5344 
5345 	/* NOTE: information of auto-commit non-locking read-only
5346 	transactions will be omitted here. The information will be
5347 	available from INFORMATION_SCHEMA.INNODB_TRX. */
5348 
5349 	PrintNotStarted	print_not_started(file);
5350 	ut_list_map(trx_sys->mysql_trx_list, print_not_started);
5351 
5352 	const trx_t*	trx;
5353 	TrxListIterator	trx_iter;
5354 	const trx_t*	prev_trx = 0;
5355 
5356 	/* Control whether a block should be fetched from the buffer pool. */
5357 	bool		load_block = true;
5358 	bool		monitor = srv_print_innodb_lock_monitor;
5359 
5360 	while ((trx = trx_iter.current()) != 0) {
5361 
5362 		check_trx_state(trx);
5363 
5364 		if (trx != prev_trx) {
5365 			lock_trx_print_wait_and_mvcc_state(file, trx);
5366 			prev_trx = trx;
5367 
5368 			/* The transaction that read in the page is no
5369 			longer the one that read the page in. We need to
5370 			force a page read. */
5371 			load_block = true;
5372 		}
5373 
5374 		/* If we need to print the locked record contents then we
5375 		need to fetch the containing block from the buffer pool. */
5376 		if (monitor) {
5377 
5378 			/* Print the locks owned by the current transaction. */
5379 			TrxLockIterator& lock_iter = trx_iter.lock_iter();
5380 
5381 			if (!lock_trx_print_locks(
5382 					file, trx, lock_iter, load_block)) {
5383 
5384 				/* Resync trx_iter, the trx_sys->mutex and
5385 				the lock mutex were released. A page was
5386 				successfully read in.  We need to print its
5387 				contents on the next call to
5388 				lock_trx_print_locks(). On the next call to
5389 				lock_trx_print_locks() we should simply print
5390 				the contents of the page just read in.*/
5391 				load_block = false;
5392 
5393 				continue;
5394 			}
5395 		}
5396 
5397 		load_block = true;
5398 
5399 		/* All record lock details were printed without fetching
5400 		a page from disk, or we didn't need to print the detail. */
5401 		trx_iter.next();
5402 	}
5403 
5404 	lock_mutex_exit();
5405 	mutex_exit(&trx_sys->mutex);
5406 
5407 	ut_ad(lock_validate());
5408 }
5409 
5410 #ifdef UNIV_DEBUG
5411 /*********************************************************************//**
5412 Find the the lock in the trx_t::trx_lock_t::table_locks vector.
5413 @return true if found */
5414 static
5415 bool
lock_trx_table_locks_find(trx_t * trx,const lock_t * find_lock)5416 lock_trx_table_locks_find(
5417 /*======================*/
5418 	trx_t*		trx,		/*!< in: trx to validate */
5419 	const lock_t*	find_lock)	/*!< in: lock to find */
5420 {
5421 	bool		found = false;
5422 
5423 	trx_mutex_enter(trx);
5424 
5425 	typedef lock_pool_t::const_reverse_iterator iterator;
5426 
5427 	iterator	end = trx->lock.table_locks.rend();
5428 
5429 	for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
5430 
5431 		const lock_t*	lock = *it;
5432 
5433 		if (lock == NULL) {
5434 
5435 			continue;
5436 
5437 		} else if (lock == find_lock) {
5438 
5439 			/* Can't be duplicates. */
5440 			ut_a(!found);
5441 			found = true;
5442 		}
5443 
5444 		ut_a(trx == lock->trx);
5445 		ut_a(lock_get_type_low(lock) & LOCK_TABLE);
5446 		ut_a(lock->un_member.tab_lock.table != NULL);
5447 	}
5448 
5449 	trx_mutex_exit(trx);
5450 
5451 	return(found);
5452 }
5453 
5454 /*********************************************************************//**
5455 Validates the lock queue on a table.
5456 @return TRUE if ok */
5457 static
5458 ibool
lock_table_queue_validate(const dict_table_t * table)5459 lock_table_queue_validate(
5460 /*======================*/
5461 	const dict_table_t*	table)	/*!< in: table */
5462 {
5463 	const lock_t*	lock;
5464 
5465 	ut_ad(lock_mutex_own());
5466 	ut_ad(trx_sys_mutex_own());
5467 
5468 	for (lock = UT_LIST_GET_FIRST(table->locks);
5469 	     lock != NULL;
5470 	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
5471 
5472 		/* lock->trx->state cannot change from or to NOT_STARTED
5473 		while we are holding the trx_sys->mutex. It may change
5474 		from ACTIVE to PREPARED, but it may not change to
5475 		COMMITTED, because we are holding the lock_sys->mutex. */
5476 		ut_ad(trx_assert_started(lock->trx));
5477 
5478 		if (!lock_get_wait(lock)) {
5479 
5480 			ut_a(!lock_table_other_has_incompatible(
5481 				     lock->trx, 0, table,
5482 				     lock_get_mode(lock)));
5483 		} else {
5484 
5485 			ut_a(lock_table_has_to_wait_in_queue(lock));
5486 		}
5487 
5488 		ut_a(lock_trx_table_locks_find(lock->trx, lock));
5489 	}
5490 
5491 	return(TRUE);
5492 }
5493 
5494 /*********************************************************************//**
5495 Validates the lock queue on a single record.
5496 @return TRUE if ok */
5497 static
5498 ibool
lock_rec_queue_validate(ibool locked_lock_trx_sys,const buf_block_t * block,const rec_t * rec,const dict_index_t * index,const ulint * offsets)5499 lock_rec_queue_validate(
5500 /*====================*/
5501 	ibool			locked_lock_trx_sys,
5502 					/*!< in: if the caller holds
5503 					both the lock mutex and
5504 					trx_sys_t->lock. */
5505 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
5506 	const rec_t*		rec,	/*!< in: record to look at */
5507 	const dict_index_t*	index,	/*!< in: index, or NULL if not known */
5508 	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
5509 {
5510 	const trx_t*	impl_trx;
5511 	const lock_t*	lock;
5512 	ulint		heap_no;
5513 
5514 	ut_a(rec);
5515 	ut_a(block->frame == page_align(rec));
5516 	ut_ad(rec_offs_validate(rec, index, offsets));
5517 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
5518 	ut_ad(lock_mutex_own() == locked_lock_trx_sys);
5519 	ut_ad(!index || dict_index_is_clust(index)
5520 	      || !dict_index_is_online_ddl(index));
5521 
5522 	heap_no = page_rec_get_heap_no(rec);
5523 
5524 	if (!locked_lock_trx_sys) {
5525 		lock_mutex_enter();
5526 		mutex_enter(&trx_sys->mutex);
5527 	}
5528 
5529 	if (!page_rec_is_user_rec(rec)) {
5530 
5531 		for (lock = lock_rec_get_first(lock_sys->rec_hash,
5532 					       block, heap_no);
5533 		     lock != NULL;
5534 		     lock = lock_rec_get_next_const(heap_no, lock)) {
5535 
5536 			ut_ad(!trx_is_ac_nl_ro(lock->trx));
5537 
5538 			if (lock_get_wait(lock)) {
5539 				ut_a(lock_rec_has_to_wait_in_queue(lock));
5540 			}
5541 
5542 			if (index != NULL) {
5543 				ut_a(lock->index == index);
5544 			}
5545 		}
5546 
5547 		goto func_exit;
5548 	}
5549 
5550 	if (index == NULL) {
5551 
5552 		/* Nothing we can do */
5553 
5554 	} else if (dict_index_is_clust(index)) {
5555 		trx_id_t	trx_id;
5556 
5557 		/* Unlike the non-debug code, this invariant can only succeed
5558 		if the check and assertion are covered by the lock mutex. */
5559 
5560 		trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
5561 		impl_trx = trx_rw_is_active_low(trx_id, NULL);
5562 
5563 		ut_ad(lock_mutex_own());
5564 		/* impl_trx cannot be committed until lock_mutex_exit()
5565 		because lock_trx_release_locks() acquires lock_sys->mutex */
5566 
5567 		if (impl_trx != NULL) {
5568 			const lock_t*	other_lock
5569 				= lock_rec_other_has_expl_req(
5570 					LOCK_S, block, true, heap_no,
5571 					impl_trx);
5572 
5573 			/* The impl_trx is holding an implicit lock on the
5574 			given record 'rec'. So there cannot be another
5575 			explicit granted lock.  Also, there can be another
5576 			explicit waiting lock only if the impl_trx has an
5577 			explicit granted lock. */
5578 
5579 			if (other_lock != NULL) {
5580 				ut_a(lock_get_wait(other_lock));
5581 				ut_a(lock_rec_has_expl(
5582 					LOCK_X | LOCK_REC_NOT_GAP,
5583 					block, heap_no, impl_trx));
5584 			}
5585 		}
5586 	}
5587 
5588 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
5589 	     lock != NULL;
5590 	     lock = lock_rec_get_next_const(heap_no, lock)) {
5591 
5592 		ut_ad(!trx_is_ac_nl_ro(lock->trx));
5593 
5594 		if (index) {
5595 			ut_a(lock->index == index);
5596 		}
5597 
5598 		if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
5599 
5600 			lock_mode	mode;
5601 
5602 			if (lock_get_mode(lock) == LOCK_S) {
5603 				mode = LOCK_X;
5604 			} else {
5605 				mode = LOCK_S;
5606 			}
5607 
5608 			const lock_t*	other_lock
5609 				= lock_rec_other_has_expl_req(
5610 					mode, block, false, heap_no,
5611 					lock->trx);
5612 			ut_a(!other_lock);
5613 
5614 		} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
5615 
5616 			ut_a(lock_rec_has_to_wait_in_queue(lock));
5617 		}
5618 	}
5619 
5620 func_exit:
5621 	if (!locked_lock_trx_sys) {
5622 		lock_mutex_exit();
5623 		mutex_exit(&trx_sys->mutex);
5624 	}
5625 
5626 	return(TRUE);
5627 }
5628 
5629 /*********************************************************************//**
5630 Validates the record lock queues on a page.
5631 @return TRUE if ok */
5632 static
5633 ibool
lock_rec_validate_page(const buf_block_t * block)5634 lock_rec_validate_page(
5635 /*===================*/
5636 	const buf_block_t*	block)	/*!< in: buffer block */
5637 {
5638 	const lock_t*	lock;
5639 	const rec_t*	rec;
5640 	ulint		nth_lock	= 0;
5641 	ulint		nth_bit		= 0;
5642 	ulint		i;
5643 	mem_heap_t*	heap		= NULL;
5644 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
5645 	ulint*		offsets		= offsets_;
5646 	rec_offs_init(offsets_);
5647 
5648 	ut_ad(!lock_mutex_own());
5649 
5650 	lock_mutex_enter();
5651 	mutex_enter(&trx_sys->mutex);
5652 loop:
5653 	lock = lock_rec_get_first_on_page_addr(
5654 		lock_sys->rec_hash,
5655 		block->page.id.space(), block->page.id.page_no());
5656 
5657 	if (!lock) {
5658 		goto function_exit;
5659 	}
5660 
5661 	ut_ad(!block->page.file_page_was_freed);
5662 
5663 	for (i = 0; i < nth_lock; i++) {
5664 
5665 		lock = lock_rec_get_next_on_page_const(lock);
5666 
5667 		if (!lock) {
5668 			goto function_exit;
5669 		}
5670 	}
5671 
5672 	ut_ad(!trx_is_ac_nl_ro(lock->trx));
5673 
5674 # ifdef UNIV_DEBUG
5675 	/* Only validate the record queues when this thread is not
5676 	holding a space->latch.  Deadlocks are possible due to
5677 	latching order violation when UNIV_DEBUG is defined while
5678 	UNIV_DEBUG is not. */
5679 	if (!sync_check_find(SYNC_FSP))
5680 # endif /* UNIV_DEBUG */
5681 	for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
5682 
5683 		if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
5684 
5685 			rec = page_find_rec_with_heap_no(block->frame, i);
5686 			ut_a(rec);
5687 			offsets = rec_get_offsets(rec, lock->index, offsets,
5688 						  ULINT_UNDEFINED, &heap);
5689 
5690 			/* If this thread is holding the file space
5691 			latch (fil_space_t::latch), the following
5692 			check WILL break the latching order and may
5693 			cause a deadlock of threads. */
5694 
5695 			lock_rec_queue_validate(
5696 				TRUE, block, rec, lock->index, offsets);
5697 
5698 			nth_bit = i + 1;
5699 
5700 			goto loop;
5701 		}
5702 	}
5703 
5704 	nth_bit = 0;
5705 	nth_lock++;
5706 
5707 	goto loop;
5708 
5709 function_exit:
5710 	lock_mutex_exit();
5711 	mutex_exit(&trx_sys->mutex);
5712 
5713 	if (heap != NULL) {
5714 		mem_heap_free(heap);
5715 	}
5716 	return(TRUE);
5717 }
5718 
5719 /*********************************************************************//**
5720 Validates the table locks.
5721 @return TRUE if ok */
5722 static
5723 ibool
lock_validate_table_locks(const trx_ut_list_t * trx_list)5724 lock_validate_table_locks(
5725 /*======================*/
5726 	const trx_ut_list_t*	trx_list)	/*!< in: trx list */
5727 {
5728 	const trx_t*	trx;
5729 
5730 	ut_ad(lock_mutex_own());
5731 	ut_ad(trx_sys_mutex_own());
5732 
5733 	ut_ad(trx_list == &trx_sys->rw_trx_list);
5734 
5735 	for (trx = UT_LIST_GET_FIRST(*trx_list);
5736 	     trx != NULL;
5737 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
5738 
5739 		const lock_t*	lock;
5740 
5741 		check_trx_state(trx);
5742 
5743 		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
5744 		     lock != NULL;
5745 		     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
5746 
5747 			if (lock_get_type_low(lock) & LOCK_TABLE) {
5748 
5749 				lock_table_queue_validate(
5750 					lock->un_member.tab_lock.table);
5751 			}
5752 		}
5753 	}
5754 
5755 	return(TRUE);
5756 }
5757 
5758 /*********************************************************************//**
5759 Validate record locks up to a limit.
5760 @return lock at limit or NULL if no more locks in the hash bucket */
5761 static MY_ATTRIBUTE((warn_unused_result))
5762 const lock_t*
lock_rec_validate(ulint start,ib_uint64_t * limit)5763 lock_rec_validate(
5764 /*==============*/
5765 	ulint		start,		/*!< in: lock_sys->rec_hash
5766 					bucket */
5767 	ib_uint64_t*	limit)		/*!< in/out: upper limit of
5768 					(space, page_no) */
5769 {
5770 	ut_ad(lock_mutex_own());
5771 	ut_ad(trx_sys_mutex_own());
5772 
5773 	for (const lock_t* lock = static_cast<const lock_t*>(
5774 			HASH_GET_FIRST(lock_sys->rec_hash, start));
5775 	     lock != NULL;
5776 	     lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock))) {
5777 
5778 		ib_uint64_t	current;
5779 
5780 		ut_ad(!trx_is_ac_nl_ro(lock->trx));
5781 		ut_ad(lock_get_type(lock) == LOCK_REC);
5782 
5783 		current = ut_ull_create(
5784 			lock->un_member.rec_lock.space,
5785 			lock->un_member.rec_lock.page_no);
5786 
5787 		if (current > *limit) {
5788 			*limit = current + 1;
5789 			return(lock);
5790 		}
5791 	}
5792 
5793 	return(0);
5794 }
5795 
5796 /*********************************************************************//**
5797 Validate a record lock's block */
5798 static
5799 void
lock_rec_block_validate(ulint space_id,ulint page_no)5800 lock_rec_block_validate(
5801 /*====================*/
5802 	ulint		space_id,
5803 	ulint		page_no)
5804 {
5805 	/* The lock and the block that it is referring to may be freed at
5806 	this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check.
5807 	If the lock exists in lock_rec_validate_page() we assert
5808 	!block->page.file_page_was_freed. */
5809 
5810 	buf_block_t*	block;
5811 	mtr_t		mtr;
5812 
5813 	/* Make sure that the tablespace is not deleted while we are
5814 	trying to access the page. */
5815 	if (fil_space_t* space = fil_space_acquire(space_id)) {
5816 		mtr_start(&mtr);
5817 
5818 		block = buf_page_get_gen(
5819 			page_id_t(space_id, page_no),
5820 			page_size_t(space->flags),
5821 			RW_X_LATCH, NULL,
5822 			BUF_GET_POSSIBLY_FREED,
5823 			__FILE__, __LINE__, &mtr);
5824 
5825 		buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
5826 
5827 		ut_ad(lock_rec_validate_page(block));
5828 		mtr_commit(&mtr);
5829 
5830 		fil_space_release(space);
5831 	}
5832 }
5833 
5834 /*********************************************************************//**
5835 Validates the lock system.
5836 @return TRUE if ok */
5837 static
5838 bool
lock_validate()5839 lock_validate()
5840 /*===========*/
5841 {
5842 	typedef	std::pair<ulint, ulint>		page_addr_t;
5843 	typedef std::set<
5844 		page_addr_t,
5845 		std::less<page_addr_t>,
5846 		ut_allocator<page_addr_t> >	page_addr_set;
5847 
5848 	page_addr_set	pages;
5849 
5850 	lock_mutex_enter();
5851 	mutex_enter(&trx_sys->mutex);
5852 
5853 	ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list));
5854 
5855 	/* Iterate over all the record locks and validate the locks. We
5856 	don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex.
5857 	Release both mutexes during the validation check. */
5858 
5859 	for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
5860 		const lock_t*	lock;
5861 		ib_uint64_t	limit = 0;
5862 
5863 		while ((lock = lock_rec_validate(i, &limit)) != 0) {
5864 
5865 			ulint	space = lock->un_member.rec_lock.space;
5866 			ulint	page_no = lock->un_member.rec_lock.page_no;
5867 
5868 			pages.insert(std::make_pair(space, page_no));
5869 		}
5870 	}
5871 
5872 	mutex_exit(&trx_sys->mutex);
5873 	lock_mutex_exit();
5874 
5875 	for (page_addr_set::const_iterator it = pages.begin();
5876 	     it != pages.end();
5877 	     ++it) {
5878 		lock_rec_block_validate((*it).first, (*it).second);
5879 	}
5880 
5881 	return(true);
5882 }
5883 #endif /* UNIV_DEBUG */
5884 /*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
5885 
5886 /*********************************************************************//**
5887 Checks if locks of other transactions prevent an immediate insert of
5888 a record. If they do, first tests if the query thread should anyway
5889 be suspended for some reason; if not, then puts the transaction and
5890 the query thread to the lock wait state and inserts a waiting request
5891 for a gap x-lock to the lock queue.
5892 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
5893 dberr_t
lock_rec_insert_check_and_lock(ulint flags,const rec_t * rec,buf_block_t * block,dict_index_t * index,que_thr_t * thr,mtr_t * mtr,ibool * inherit)5894 lock_rec_insert_check_and_lock(
5895 /*===========================*/
5896 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is
5897 				set, does nothing */
5898 	const rec_t*	rec,	/*!< in: record after which to insert */
5899 	buf_block_t*	block,	/*!< in/out: buffer block of rec */
5900 	dict_index_t*	index,	/*!< in: index */
5901 	que_thr_t*	thr,	/*!< in: query thread */
5902 	mtr_t*		mtr,	/*!< in/out: mini-transaction */
5903 	ibool*		inherit)/*!< out: set to TRUE if the new
5904 				inserted record maybe should inherit
5905 				LOCK_GAP type locks from the successor
5906 				record */
5907 {
5908 	ut_ad(block->frame == page_align(rec));
5909 	ut_ad(!dict_index_is_online_ddl(index)
5910 	      || dict_index_is_clust(index)
5911 	      || (flags & BTR_CREATE_FLAG));
5912 	ut_ad(mtr->is_named_space(index->space));
5913 
5914 	if (flags & BTR_NO_LOCKING_FLAG) {
5915 
5916 		return(DB_SUCCESS);
5917 	}
5918 
5919 	ut_ad(!dict_table_is_temporary(index->table));
5920 
5921 	dberr_t		err;
5922 	lock_t*		lock;
5923 	ibool		inherit_in = *inherit;
5924 	trx_t*		trx = thr_get_trx(thr);
5925 	const rec_t*	next_rec = page_rec_get_next_const(rec);
5926 	ulint		heap_no = page_rec_get_heap_no(next_rec);
5927 
5928 	lock_mutex_enter();
5929 	/* Because this code is invoked for a running transaction by
5930 	the thread that is serving the transaction, it is not necessary
5931 	to hold trx->mutex here. */
5932 
5933 	/* When inserting a record into an index, the table must be at
5934 	least IX-locked. When we are building an index, we would pass
5935 	BTR_NO_LOCKING_FLAG and skip the locking altogether. */
5936 	ut_ad(lock_table_has(trx, index->table, LOCK_IX));
5937 
5938 	lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
5939 
5940 	if (lock == NULL) {
5941 		/* We optimize CPU time usage in the simplest case */
5942 
5943 		lock_mutex_exit();
5944 
5945 		if (inherit_in && !dict_index_is_clust(index)) {
5946 			/* Update the page max trx id field */
5947 			page_update_max_trx_id(block,
5948 					       buf_block_get_page_zip(block),
5949 					       trx->id, mtr);
5950 		}
5951 
5952 		*inherit = FALSE;
5953 
5954 		return(DB_SUCCESS);
5955 	}
5956 
5957 	/* Spatial index does not use GAP lock protection. It uses
5958 	"predicate lock" to protect the "range" */
5959 	if (dict_index_is_spatial(index)) {
5960 		return(DB_SUCCESS);
5961 	}
5962 
5963 	*inherit = TRUE;
5964 
5965 	/* If another transaction has an explicit lock request which locks
5966 	the gap, waiting or granted, on the successor, the insert has to wait.
5967 
5968 	An exception is the case where the lock by the another transaction
5969 	is a gap type lock which it placed to wait for its turn to insert. We
5970 	do not consider that kind of a lock conflicting with our insert. This
5971 	eliminates an unnecessary deadlock which resulted when 2 transactions
5972 	had to wait for their insert. Both had waiting gap type lock requests
5973 	on the successor, which produced an unnecessary deadlock. */
5974 
5975 	const ulint	type_mode = LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION;
5976 
5977 	const lock_t*	wait_for = lock_rec_other_has_conflicting(
5978 				type_mode, block, heap_no, trx);
5979 
5980 	if (wait_for != NULL) {
5981 
5982 		RecLock	rec_lock(thr, index, block, heap_no, type_mode);
5983 
5984 		trx_mutex_enter(trx);
5985 
5986 		err = rec_lock.add_to_waitq(wait_for);
5987 
5988 		trx_mutex_exit(trx);
5989 
5990 	} else {
5991 		err = DB_SUCCESS;
5992 	}
5993 
5994 	lock_mutex_exit();
5995 
5996 	switch (err) {
5997 	case DB_SUCCESS_LOCKED_REC:
5998 		err = DB_SUCCESS;
5999 		/* fall through */
6000 	case DB_SUCCESS:
6001 		if (!inherit_in || dict_index_is_clust(index)) {
6002 			break;
6003 		}
6004 
6005 		/* Update the page max trx id field */
6006 		page_update_max_trx_id(
6007 			block, buf_block_get_page_zip(block), trx->id, mtr);
6008 	default:
6009 		/* We only care about the two return values. */
6010 		break;
6011 	}
6012 
6013 #ifdef UNIV_DEBUG
6014 	{
6015 		mem_heap_t*	heap		= NULL;
6016 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
6017 		const ulint*	offsets;
6018 		rec_offs_init(offsets_);
6019 
6020 		offsets = rec_get_offsets(next_rec, index, offsets_,
6021 					  ULINT_UNDEFINED, &heap);
6022 
6023 		ut_ad(lock_rec_queue_validate(
6024 				FALSE, block, next_rec, index, offsets));
6025 
6026 		if (heap != NULL) {
6027 			mem_heap_free(heap);
6028 		}
6029 	}
6030 #endif /* UNIV_DEBUG */
6031 
6032 	return(err);
6033 }
6034 
6035 /*********************************************************************//**
6036 Creates an explicit record lock for a running transaction that currently only
6037 has an implicit lock on the record. The transaction instance must have a
6038 reference count > 0 so that it can't be committed and freed before this
6039 function has completed. */
6040 static
6041 void
lock_rec_convert_impl_to_expl_for_trx(const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,trx_t * trx,ulint heap_no)6042 lock_rec_convert_impl_to_expl_for_trx(
6043 /*==================================*/
6044 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6045 	const rec_t*		rec,	/*!< in: user record on page */
6046 	dict_index_t*		index,	/*!< in: index of record */
6047 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6048 	trx_t*			trx,	/*!< in/out: active transaction */
6049 	ulint			heap_no)/*!< in: rec heap number to lock */
6050 {
6051 	ut_ad(trx_is_referenced(trx));
6052 
6053 	DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
6054 
6055 	lock_mutex_enter();
6056 
6057 	ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
6058 
6059 	if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
6060 	    && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
6061 				  block, heap_no, trx)) {
6062 
6063 		ulint	type_mode;
6064 
6065 		type_mode = (LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP);
6066 
6067 		lock_rec_add_to_queue(
6068 			type_mode, block, heap_no, index, trx, FALSE);
6069 	}
6070 
6071 	lock_mutex_exit();
6072 
6073 	trx_release_reference(trx);
6074 
6075 	DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx");
6076 }
6077 
6078 /*********************************************************************//**
6079 If a transaction has an implicit x-lock on a record, but no explicit x-lock
6080 set on the record, sets one for it. */
6081 static
6082 void
lock_rec_convert_impl_to_expl(const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets)6083 lock_rec_convert_impl_to_expl(
6084 /*==========================*/
6085 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6086 	const rec_t*		rec,	/*!< in: user record on page */
6087 	dict_index_t*		index,	/*!< in: index of record */
6088 	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
6089 {
6090 	trx_t*		trx;
6091 
6092 	ut_ad(!lock_mutex_own());
6093 	ut_ad(page_rec_is_user_rec(rec));
6094 	ut_ad(rec_offs_validate(rec, index, offsets));
6095 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
6096 
6097 	if (dict_index_is_clust(index)) {
6098 		trx_id_t	trx_id;
6099 
6100 		trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
6101 
6102 		trx = trx_rw_is_active(trx_id, NULL, true);
6103 	} else {
6104 		ut_ad(!dict_index_is_online_ddl(index));
6105 
6106 		trx = lock_sec_rec_some_has_impl(rec, index, offsets);
6107 
6108 		ut_ad(!trx || !lock_rec_other_trx_holds_expl(
6109 				LOCK_S | LOCK_REC_NOT_GAP, trx, rec, block));
6110 	}
6111 
6112 	if (trx != 0) {
6113 		ulint	heap_no = page_rec_get_heap_no(rec);
6114 
6115 		ut_ad(trx_is_referenced(trx));
6116 
6117 		/* If the transaction is still active and has no
6118 		explicit x-lock set on the record, set one for it.
6119 		trx cannot be committed until the ref count is zero. */
6120 
6121 		lock_rec_convert_impl_to_expl_for_trx(
6122 			block, rec, index, offsets, trx, heap_no);
6123 	}
6124 }
6125 
6126 void
lock_rec_convert_active_impl_to_expl(const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,trx_t * trx,ulint heap_no)6127 lock_rec_convert_active_impl_to_expl(
6128 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6129 	const rec_t*		rec,	/*!< in: user record on page */
6130 	dict_index_t*		index,	/*!< in: index of record */
6131 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6132 	trx_t*			trx,	/*!< in/out: active transaction */
6133 	ulint			heap_no)/*!< in: rec heap number to lock */
6134 {
6135 	trx_reference(trx, true);
6136 	lock_rec_convert_impl_to_expl_for_trx(block, rec, index, offsets,
6137 					      trx, heap_no);
6138 }
6139 /*********************************************************************//**
6140 Checks if locks of other transactions prevent an immediate modify (update,
6141 delete mark, or delete unmark) of a clustered index record. If they do,
6142 first tests if the query thread should anyway be suspended for some
6143 reason; if not, then puts the transaction and the query thread to the
6144 lock wait state and inserts a waiting request for a record x-lock to the
6145 lock queue.
6146 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
6147 dberr_t
lock_clust_rec_modify_check_and_lock(ulint flags,const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,que_thr_t * thr)6148 lock_clust_rec_modify_check_and_lock(
6149 /*=================================*/
6150 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6151 					bit is set, does nothing */
6152 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6153 	const rec_t*		rec,	/*!< in: record which should be
6154 					modified */
6155 	dict_index_t*		index,	/*!< in: clustered index */
6156 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6157 	que_thr_t*		thr)	/*!< in: query thread */
6158 {
6159 	dberr_t	err;
6160 	ulint	heap_no;
6161 
6162 	ut_ad(rec_offs_validate(rec, index, offsets));
6163 	ut_ad(dict_index_is_clust(index));
6164 	ut_ad(block->frame == page_align(rec));
6165 
6166 	if (flags & BTR_NO_LOCKING_FLAG) {
6167 
6168 		return(DB_SUCCESS);
6169 	}
6170 	ut_ad(!dict_table_is_temporary(index->table));
6171 
6172 	heap_no = rec_offs_comp(offsets)
6173 		? rec_get_heap_no_new(rec)
6174 		: rec_get_heap_no_old(rec);
6175 
6176 	/* If a transaction has no explicit x-lock set on the record, set one
6177 	for it */
6178 
6179 	lock_rec_convert_impl_to_expl(block, rec, index, offsets);
6180 
6181 	lock_mutex_enter();
6182 
6183 	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
6184 
6185 	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
6186 			    block, heap_no, index, thr);
6187 
6188 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
6189 
6190 	lock_mutex_exit();
6191 
6192 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
6193 
6194 	if (err == DB_SUCCESS_LOCKED_REC) {
6195 		err = DB_SUCCESS;
6196 	}
6197 
6198 	return(err);
6199 }
6200 
6201 /*********************************************************************//**
6202 Checks if locks of other transactions prevent an immediate modify (delete
6203 mark or delete unmark) of a secondary index record.
6204 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
6205 dberr_t
lock_sec_rec_modify_check_and_lock(ulint flags,buf_block_t * block,const rec_t * rec,dict_index_t * index,que_thr_t * thr,mtr_t * mtr)6206 lock_sec_rec_modify_check_and_lock(
6207 /*===============================*/
6208 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6209 				bit is set, does nothing */
6210 	buf_block_t*	block,	/*!< in/out: buffer block of rec */
6211 	const rec_t*	rec,	/*!< in: record which should be
6212 				modified; NOTE: as this is a secondary
6213 				index, we always have to modify the
6214 				clustered index record first: see the
6215 				comment below */
6216 	dict_index_t*	index,	/*!< in: secondary index */
6217 	que_thr_t*	thr,	/*!< in: query thread
6218 				(can be NULL if BTR_NO_LOCKING_FLAG) */
6219 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
6220 {
6221 	dberr_t	err;
6222 	ulint	heap_no;
6223 
6224 	ut_ad(!dict_index_is_clust(index));
6225 	ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
6226 	ut_ad(block->frame == page_align(rec));
6227 	ut_ad(mtr->is_named_space(index->space));
6228 
6229 	if (flags & BTR_NO_LOCKING_FLAG) {
6230 
6231 		return(DB_SUCCESS);
6232 	}
6233 	ut_ad(!dict_table_is_temporary(index->table));
6234 
6235 	heap_no = page_rec_get_heap_no(rec);
6236 
6237 	/* Another transaction cannot have an implicit lock on the record,
6238 	because when we come here, we already have modified the clustered
6239 	index record, and this would not have been possible if another active
6240 	transaction had modified this secondary index record. */
6241 
6242 	lock_mutex_enter();
6243 
6244 	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
6245 
6246 	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
6247 			    block, heap_no, index, thr);
6248 
6249 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
6250 
6251 	lock_mutex_exit();
6252 
6253 #ifdef UNIV_DEBUG
6254 	{
6255 		mem_heap_t*	heap		= NULL;
6256 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
6257 		const ulint*	offsets;
6258 		rec_offs_init(offsets_);
6259 
6260 		offsets = rec_get_offsets(rec, index, offsets_,
6261 					  ULINT_UNDEFINED, &heap);
6262 
6263 		ut_ad(lock_rec_queue_validate(
6264 			FALSE, block, rec, index, offsets));
6265 
6266 		if (heap != NULL) {
6267 			mem_heap_free(heap);
6268 		}
6269 	}
6270 #endif /* UNIV_DEBUG */
6271 
6272 	if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) {
6273 		/* Update the page max trx id field */
6274 		/* It might not be necessary to do this if
6275 		err == DB_SUCCESS (no new lock created),
6276 		but it should not cost too much performance. */
6277 		page_update_max_trx_id(block,
6278 				       buf_block_get_page_zip(block),
6279 				       thr_get_trx(thr)->id, mtr);
6280 		err = DB_SUCCESS;
6281 	}
6282 
6283 	return(err);
6284 }
6285 
6286 /*********************************************************************//**
6287 Like lock_clust_rec_read_check_and_lock(), but reads a
6288 secondary index record.
6289 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
6290 or DB_QUE_THR_SUSPENDED */
6291 dberr_t
lock_sec_rec_read_check_and_lock(ulint flags,const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,lock_mode mode,ulint gap_mode,que_thr_t * thr)6292 lock_sec_rec_read_check_and_lock(
6293 /*=============================*/
6294 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6295 					bit is set, does nothing */
6296 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6297 	const rec_t*		rec,	/*!< in: user record or page
6298 					supremum record which should
6299 					be read or passed over by a
6300 					read cursor */
6301 	dict_index_t*		index,	/*!< in: secondary index */
6302 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6303 	lock_mode		mode,	/*!< in: mode of the lock which
6304 					the read cursor should set on
6305 					records: LOCK_S or LOCK_X; the
6306 					latter is possible in
6307 					SELECT FOR UPDATE */
6308 	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
6309 					LOCK_REC_NOT_GAP */
6310 	que_thr_t*		thr)	/*!< in: query thread */
6311 {
6312 	dberr_t	err;
6313 	ulint	heap_no;
6314 
6315 	ut_ad(!dict_index_is_clust(index));
6316 	ut_ad(!dict_index_is_online_ddl(index));
6317 	ut_ad(block->frame == page_align(rec));
6318 	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
6319 	ut_ad(rec_offs_validate(rec, index, offsets));
6320 	ut_ad(mode == LOCK_X || mode == LOCK_S);
6321 
6322 	if ((flags & BTR_NO_LOCKING_FLAG)
6323 	    || srv_read_only_mode
6324 	    || dict_table_is_temporary(index->table)) {
6325 
6326 		return(DB_SUCCESS);
6327 	}
6328 
6329 	heap_no = page_rec_get_heap_no(rec);
6330 
6331 	/* Some transaction may have an implicit x-lock on the record only
6332 	if the max trx id for the page >= min trx id for the trx list or a
6333 	database recovery is running. */
6334 
6335 	if ((page_get_max_trx_id(block->frame) >= trx_rw_min_trx_id()
6336 	     || recv_recovery_is_on())
6337 	    && !page_rec_is_supremum(rec)) {
6338 
6339 		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
6340 	}
6341 
6342 	lock_mutex_enter();
6343 
6344 	ut_ad(mode != LOCK_X
6345 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
6346 	ut_ad(mode != LOCK_S
6347 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
6348 
6349 	err = lock_rec_lock(FALSE, mode | gap_mode,
6350 			    block, heap_no, index, thr);
6351 
6352 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
6353 
6354 	lock_mutex_exit();
6355 
6356 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
6357 
6358 	return(err);
6359 }
6360 
6361 /*********************************************************************//**
6362 Checks if locks of other transactions prevent an immediate read, or passing
6363 over by a read cursor, of a clustered index record. If they do, first tests
6364 if the query thread should anyway be suspended for some reason; if not, then
6365 puts the transaction and the query thread to the lock wait state and inserts a
6366 waiting request for a record lock to the lock queue. Sets the requested mode
6367 lock on the record.
6368 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
6369 or DB_QUE_THR_SUSPENDED */
6370 dberr_t
lock_clust_rec_read_check_and_lock(ulint flags,const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,lock_mode mode,ulint gap_mode,que_thr_t * thr)6371 lock_clust_rec_read_check_and_lock(
6372 /*===============================*/
6373 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6374 					bit is set, does nothing */
6375 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6376 	const rec_t*		rec,	/*!< in: user record or page
6377 					supremum record which should
6378 					be read or passed over by a
6379 					read cursor */
6380 	dict_index_t*		index,	/*!< in: clustered index */
6381 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6382 	lock_mode		mode,	/*!< in: mode of the lock which
6383 					the read cursor should set on
6384 					records: LOCK_S or LOCK_X; the
6385 					latter is possible in
6386 					SELECT FOR UPDATE */
6387 	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
6388 					LOCK_REC_NOT_GAP */
6389 	que_thr_t*		thr)	/*!< in: query thread */
6390 {
6391 	dberr_t	err;
6392 	ulint	heap_no;
6393 
6394 	ut_ad(dict_index_is_clust(index));
6395 	ut_ad(block->frame == page_align(rec));
6396 	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
6397 	ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
6398 	      || gap_mode == LOCK_REC_NOT_GAP);
6399 	ut_ad(rec_offs_validate(rec, index, offsets));
6400 
6401 	if ((flags & BTR_NO_LOCKING_FLAG)
6402 	    || srv_read_only_mode
6403 	    || dict_table_is_temporary(index->table)) {
6404 
6405 		return(DB_SUCCESS);
6406 	}
6407 
6408 	heap_no = page_rec_get_heap_no(rec);
6409 
6410 	if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
6411 
6412 		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
6413 	}
6414 
6415 	lock_mutex_enter();
6416 
6417 	ut_ad(mode != LOCK_X
6418 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
6419 	ut_ad(mode != LOCK_S
6420 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
6421 
6422 	err = lock_rec_lock(FALSE, mode | gap_mode, block, heap_no, index, thr);
6423 
6424 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
6425 
6426 	lock_mutex_exit();
6427 
6428 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
6429 
6430 	DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock");
6431 
6432 	return(err);
6433 }
6434 /*********************************************************************//**
6435 Checks if locks of other transactions prevent an immediate read, or passing
6436 over by a read cursor, of a clustered index record. If they do, first tests
6437 if the query thread should anyway be suspended for some reason; if not, then
6438 puts the transaction and the query thread to the lock wait state and inserts a
6439 waiting request for a record lock to the lock queue. Sets the requested mode
6440 lock on the record. This is an alternative version of
6441 lock_clust_rec_read_check_and_lock() that does not require the parameter
6442 "offsets".
6443 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
6444 dberr_t
lock_clust_rec_read_check_and_lock_alt(ulint flags,const buf_block_t * block,const rec_t * rec,dict_index_t * index,lock_mode mode,ulint gap_mode,que_thr_t * thr)6445 lock_clust_rec_read_check_and_lock_alt(
6446 /*===================================*/
6447 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6448 					bit is set, does nothing */
6449 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6450 	const rec_t*		rec,	/*!< in: user record or page
6451 					supremum record which should
6452 					be read or passed over by a
6453 					read cursor */
6454 	dict_index_t*		index,	/*!< in: clustered index */
6455 	lock_mode		mode,	/*!< in: mode of the lock which
6456 					the read cursor should set on
6457 					records: LOCK_S or LOCK_X; the
6458 					latter is possible in
6459 					SELECT FOR UPDATE */
6460 	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
6461 					LOCK_REC_NOT_GAP */
6462 	que_thr_t*		thr)	/*!< in: query thread */
6463 {
6464 	mem_heap_t*	tmp_heap	= NULL;
6465 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
6466 	ulint*		offsets		= offsets_;
6467 	dberr_t		err;
6468 	rec_offs_init(offsets_);
6469 
6470 	offsets = rec_get_offsets(rec, index, offsets,
6471 				  ULINT_UNDEFINED, &tmp_heap);
6472 	err = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
6473 						 offsets, mode, gap_mode, thr);
6474 	if (tmp_heap) {
6475 		mem_heap_free(tmp_heap);
6476 	}
6477 
6478 	if (err == DB_SUCCESS_LOCKED_REC) {
6479 		err = DB_SUCCESS;
6480 	}
6481 
6482 	return(err);
6483 }
6484 
6485 /*******************************************************************//**
6486 Release the last lock from the transaction's autoinc locks. */
6487 UNIV_INLINE
6488 void
lock_release_autoinc_last_lock(ib_vector_t * autoinc_locks)6489 lock_release_autoinc_last_lock(
6490 /*===========================*/
6491 	ib_vector_t*	autoinc_locks)	/*!< in/out: vector of AUTOINC locks */
6492 {
6493 	ulint		last;
6494 	lock_t*		lock;
6495 
6496 	ut_ad(lock_mutex_own());
6497 	ut_a(!ib_vector_is_empty(autoinc_locks));
6498 
6499 	/* The lock to be release must be the last lock acquired. */
6500 	last = ib_vector_size(autoinc_locks) - 1;
6501 	lock = *static_cast<lock_t**>(ib_vector_get(autoinc_locks, last));
6502 
6503 	/* Should have only AUTOINC locks in the vector. */
6504 	ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
6505 	ut_a(lock_get_type(lock) == LOCK_TABLE);
6506 
6507 	ut_a(lock->un_member.tab_lock.table != NULL);
6508 
6509 	/* This will remove the lock from the trx autoinc_locks too. */
6510 	lock_table_dequeue(lock);
6511 
6512 	/* Remove from the table vector too. */
6513 	lock_trx_table_locks_remove(lock);
6514 }
6515 
6516 /*******************************************************************//**
6517 Check if a transaction holds any autoinc locks.
6518 @return TRUE if the transaction holds any AUTOINC locks. */
6519 static
6520 ibool
lock_trx_holds_autoinc_locks(const trx_t * trx)6521 lock_trx_holds_autoinc_locks(
6522 /*=========================*/
6523 	const trx_t*	trx)		/*!< in: transaction */
6524 {
6525 	ut_a(trx->autoinc_locks != NULL);
6526 
6527 	return(!ib_vector_is_empty(trx->autoinc_locks));
6528 }
6529 
6530 /*******************************************************************//**
6531 Release all the transaction's autoinc locks. */
6532 static
6533 void
lock_release_autoinc_locks(trx_t * trx)6534 lock_release_autoinc_locks(
6535 /*=======================*/
6536 	trx_t*		trx)		/*!< in/out: transaction */
6537 {
6538 	ut_ad(lock_mutex_own());
6539 	/* If this is invoked for a running transaction by the thread
6540 	that is serving the transaction, then it is not necessary to
6541 	hold trx->mutex here. */
6542 
6543 	ut_a(trx->autoinc_locks != NULL);
6544 
6545 	/* We release the locks in the reverse order. This is to
6546 	avoid searching the vector for the element to delete at
6547 	the lower level. See (lock_table_remove_low()) for details. */
6548 	while (!ib_vector_is_empty(trx->autoinc_locks)) {
6549 
6550 		/* lock_table_remove_low() will also remove the lock from
6551 		the transaction's autoinc_locks vector. */
6552 		lock_release_autoinc_last_lock(trx->autoinc_locks);
6553 	}
6554 
6555 	/* Should release all locks. */
6556 	ut_a(ib_vector_is_empty(trx->autoinc_locks));
6557 }
6558 
6559 /*******************************************************************//**
6560 Gets the type of a lock. Non-inline version for using outside of the
6561 lock module.
6562 @return LOCK_TABLE or LOCK_REC */
6563 ulint
lock_get_type(const lock_t * lock)6564 lock_get_type(
6565 /*==========*/
6566 	const lock_t*	lock)	/*!< in: lock */
6567 {
6568 	return(lock_get_type_low(lock));
6569 }
6570 
6571 /*******************************************************************//**
6572 Gets the id of the transaction owning a lock.
6573 @return transaction id */
6574 trx_id_t
lock_get_trx_id(const lock_t * lock)6575 lock_get_trx_id(
6576 /*============*/
6577 	const lock_t*	lock)	/*!< in: lock */
6578 {
6579 	return(trx_get_id_for_print(lock->trx));
6580 }
6581 
6582 /*******************************************************************//**
6583 Gets the mode of a lock in a human readable string.
6584 The string should not be free()'d or modified.
6585 @return lock mode */
6586 const char*
lock_get_mode_str(const lock_t * lock)6587 lock_get_mode_str(
6588 /*==============*/
6589 	const lock_t*	lock)	/*!< in: lock */
6590 {
6591 	ibool	is_gap_lock;
6592 
6593 	is_gap_lock = lock_get_type_low(lock) == LOCK_REC
6594 		&& lock_rec_get_gap(lock);
6595 
6596 	switch (lock_get_mode(lock)) {
6597 	case LOCK_S:
6598 		if (is_gap_lock) {
6599 			return("S,GAP");
6600 		} else {
6601 			return("S");
6602 		}
6603 	case LOCK_X:
6604 		if (is_gap_lock) {
6605 			return("X,GAP");
6606 		} else {
6607 			return("X");
6608 		}
6609 	case LOCK_IS:
6610 		if (is_gap_lock) {
6611 			return("IS,GAP");
6612 		} else {
6613 			return("IS");
6614 		}
6615 	case LOCK_IX:
6616 		if (is_gap_lock) {
6617 			return("IX,GAP");
6618 		} else {
6619 			return("IX");
6620 		}
6621 	case LOCK_AUTO_INC:
6622 		return("AUTO_INC");
6623 	default:
6624 		return("UNKNOWN");
6625 	}
6626 }
6627 
6628 /*******************************************************************//**
6629 Gets the type of a lock in a human readable string.
6630 The string should not be free()'d or modified.
6631 @return lock type */
6632 const char*
lock_get_type_str(const lock_t * lock)6633 lock_get_type_str(
6634 /*==============*/
6635 	const lock_t*	lock)	/*!< in: lock */
6636 {
6637 	switch (lock_get_type_low(lock)) {
6638 	case LOCK_REC:
6639 		return("RECORD");
6640 	case LOCK_TABLE:
6641 		return("TABLE");
6642 	default:
6643 		return("UNKNOWN");
6644 	}
6645 }
6646 
6647 /*******************************************************************//**
6648 Gets the table on which the lock is.
6649 @return table */
6650 UNIV_INLINE
6651 dict_table_t*
lock_get_table(const lock_t * lock)6652 lock_get_table(
6653 /*===========*/
6654 	const lock_t*	lock)	/*!< in: lock */
6655 {
6656 	switch (lock_get_type_low(lock)) {
6657 	case LOCK_REC:
6658 		ut_ad(dict_index_is_clust(lock->index)
6659 		      || !dict_index_is_online_ddl(lock->index));
6660 		return(lock->index->table);
6661 	case LOCK_TABLE:
6662 		return(lock->un_member.tab_lock.table);
6663 	default:
6664 		ut_error;
6665 		return(NULL);
6666 	}
6667 }
6668 
6669 /*******************************************************************//**
6670 Gets the id of the table on which the lock is.
6671 @return id of the table */
6672 table_id_t
lock_get_table_id(const lock_t * lock)6673 lock_get_table_id(
6674 /*==============*/
6675 	const lock_t*	lock)	/*!< in: lock */
6676 {
6677 	dict_table_t*	table;
6678 
6679 	table = lock_get_table(lock);
6680 
6681 	return(table->id);
6682 }
6683 
6684 /** Determine which table a lock is associated with.
6685 @param[in]	lock	the lock
6686 @return name of the table */
6687 const table_name_t&
lock_get_table_name(const lock_t * lock)6688 lock_get_table_name(
6689 	const lock_t*	lock)
6690 {
6691 	return(lock_get_table(lock)->name);
6692 }
6693 
6694 /*******************************************************************//**
6695 For a record lock, gets the index on which the lock is.
6696 @return index */
6697 const dict_index_t*
lock_rec_get_index(const lock_t * lock)6698 lock_rec_get_index(
6699 /*===============*/
6700 	const lock_t*	lock)	/*!< in: lock */
6701 {
6702 	ut_a(lock_get_type_low(lock) == LOCK_REC);
6703 	ut_ad(dict_index_is_clust(lock->index)
6704 	      || !dict_index_is_online_ddl(lock->index));
6705 
6706 	return(lock->index);
6707 }
6708 
6709 /*******************************************************************//**
6710 For a record lock, gets the name of the index on which the lock is.
6711 The string should not be free()'d or modified.
6712 @return name of the index */
6713 const char*
lock_rec_get_index_name(const lock_t * lock)6714 lock_rec_get_index_name(
6715 /*====================*/
6716 	const lock_t*	lock)	/*!< in: lock */
6717 {
6718 	ut_a(lock_get_type_low(lock) == LOCK_REC);
6719 	ut_ad(dict_index_is_clust(lock->index)
6720 	      || !dict_index_is_online_ddl(lock->index));
6721 
6722 	return(lock->index->name);
6723 }
6724 
6725 /*******************************************************************//**
6726 For a record lock, gets the tablespace number on which the lock is.
6727 @return tablespace number */
6728 ulint
lock_rec_get_space_id(const lock_t * lock)6729 lock_rec_get_space_id(
6730 /*==================*/
6731 	const lock_t*	lock)	/*!< in: lock */
6732 {
6733 	ut_a(lock_get_type_low(lock) == LOCK_REC);
6734 
6735 	return(lock->un_member.rec_lock.space);
6736 }
6737 
6738 /*******************************************************************//**
6739 For a record lock, gets the page number on which the lock is.
6740 @return page number */
6741 ulint
lock_rec_get_page_no(const lock_t * lock)6742 lock_rec_get_page_no(
6743 /*=================*/
6744 	const lock_t*	lock)	/*!< in: lock */
6745 {
6746 	ut_a(lock_get_type_low(lock) == LOCK_REC);
6747 
6748 	return(lock->un_member.rec_lock.page_no);
6749 }
6750 
6751 /*********************************************************************//**
6752 Cancels a waiting lock request and releases possible other transactions
6753 waiting behind it. */
6754 void
lock_cancel_waiting_and_release(lock_t * lock)6755 lock_cancel_waiting_and_release(
6756 /*============================*/
6757 	lock_t*	lock)	/*!< in/out: waiting lock request */
6758 {
6759 	que_thr_t*	thr;
6760 
6761 	ut_ad(lock_mutex_own());
6762 	ut_ad(trx_mutex_own(lock->trx));
6763 
6764 	lock->trx->lock.cancel = true;
6765 
6766 	if (lock_get_type_low(lock) == LOCK_REC) {
6767 
6768 		lock_rec_dequeue_from_page(lock);
6769 	} else {
6770 		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
6771 
6772 		if (lock->trx->autoinc_locks != NULL) {
6773 			/* Release the transaction's AUTOINC locks. */
6774 			lock_release_autoinc_locks(lock->trx);
6775 		}
6776 
6777 		lock_table_dequeue(lock);
6778 	}
6779 
6780 	/* Reset the wait flag and the back pointer to lock in trx. */
6781 
6782 	lock_reset_lock_and_trx_wait(lock);
6783 
6784 	/* The following function releases the trx from lock wait. */
6785 
6786 	thr = que_thr_end_lock_wait(lock->trx);
6787 
6788 	if (thr != NULL) {
6789 		lock_wait_release_thread_if_suspended(thr);
6790 	}
6791 
6792 	lock->trx->lock.cancel = false;
6793 }
6794 
6795 /*********************************************************************//**
6796 Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
6797 function should be called at the the end of an SQL statement, by the
6798 connection thread that owns the transaction (trx->mysql_thd). */
6799 void
lock_unlock_table_autoinc(trx_t * trx)6800 lock_unlock_table_autoinc(
6801 /*======================*/
6802 	trx_t*	trx)	/*!< in/out: transaction */
6803 {
6804 	ut_ad(!lock_mutex_own());
6805 	ut_ad(!trx_mutex_own(trx));
6806 	ut_ad(!trx->lock.wait_lock);
6807 
6808 	/* This can be invoked on NOT_STARTED, ACTIVE, PREPARED,
6809 	but not COMMITTED transactions. */
6810 
6811 	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)
6812 	      || trx_state_eq(trx, TRX_STATE_FORCED_ROLLBACK)
6813 	      || !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
6814 
6815 	/* This function is invoked for a running transaction by the
6816 	thread that is serving the transaction. Therefore it is not
6817 	necessary to hold trx->mutex here. */
6818 
6819 	if (lock_trx_holds_autoinc_locks(trx)) {
6820 		lock_mutex_enter();
6821 
6822 		lock_release_autoinc_locks(trx);
6823 
6824 		lock_mutex_exit();
6825 	}
6826 }
6827 
6828 /*********************************************************************//**
6829 Releases a transaction's locks, and releases possible other transactions
6830 waiting because of these locks. Change the state of the transaction to
6831 TRX_STATE_COMMITTED_IN_MEMORY. */
6832 void
lock_trx_release_locks(trx_t * trx)6833 lock_trx_release_locks(
6834 /*===================*/
6835 	trx_t*	trx)	/*!< in/out: transaction */
6836 {
6837 	check_trx_state(trx);
6838 
6839 	if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
6840 
6841 		mutex_enter(&trx_sys->mutex);
6842 
6843 		ut_a(trx_sys->n_prepared_trx > 0);
6844 		--trx_sys->n_prepared_trx;
6845 
6846 		if (trx->is_recovered) {
6847 			ut_a(trx_sys->n_prepared_recovered_trx > 0);
6848 			trx_sys->n_prepared_recovered_trx--;
6849 		}
6850 
6851 		mutex_exit(&trx_sys->mutex);
6852 	} else {
6853 		ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
6854 	}
6855 
6856 	bool	release_lock;
6857 
6858 	release_lock = (UT_LIST_GET_LEN(trx->lock.trx_locks) > 0);
6859 
6860 	/* Don't take lock_sys mutex if trx didn't acquire any lock. */
6861 	if (release_lock) {
6862 
6863 		/* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY
6864 		is protected by both the lock_sys->mutex and the trx->mutex. */
6865 		lock_mutex_enter();
6866 	}
6867 
6868 	trx_mutex_enter(trx);
6869 
6870 	/* The following assignment makes the transaction committed in memory
6871 	and makes its changes to data visible to other transactions.
6872 	NOTE that there is a small discrepancy from the strict formal
6873 	visibility rules here: a human user of the database can see
6874 	modifications made by another transaction T even before the necessary
6875 	log segment has been flushed to the disk. If the database happens to
6876 	crash before the flush, the user has seen modifications from T which
6877 	will never be a committed transaction. However, any transaction T2
6878 	which sees the modifications of the committing transaction T, and
6879 	which also itself makes modifications to the database, will get an lsn
6880 	larger than the committing transaction T. In the case where the log
6881 	flush fails, and T never gets committed, also T2 will never get
6882 	committed. */
6883 
6884 	/*--------------------------------------*/
6885 	trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
6886 	/*--------------------------------------*/
6887 
6888 	if (trx_is_referenced(trx)) {
6889 
6890 		ut_a(release_lock);
6891 
6892 		lock_mutex_exit();
6893 
6894 		while (trx_is_referenced(trx)) {
6895 
6896 			trx_mutex_exit(trx);
6897 
6898 			DEBUG_SYNC_C("waiting_trx_is_not_referenced");
6899 
6900 			/** Doing an implicit to explicit conversion
6901 			should not be expensive. */
6902 			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
6903 
6904 			trx_mutex_enter(trx);
6905 		}
6906 
6907 		trx_mutex_exit(trx);
6908 
6909 		lock_mutex_enter();
6910 
6911 		trx_mutex_enter(trx);
6912 	}
6913 
6914 	ut_ad(!trx_is_referenced(trx));
6915 
6916 	/* If the background thread trx_rollback_or_clean_recovered()
6917 	is still active then there is a chance that the rollback
6918 	thread may see this trx as COMMITTED_IN_MEMORY and goes ahead
6919 	to clean it up calling trx_cleanup_at_db_startup(). This can
6920 	happen in the case we are committing a trx here that is left
6921 	in PREPARED state during the crash. Note that commit of the
6922 	rollback of a PREPARED trx happens in the recovery thread
6923 	while the rollback of other transactions happen in the
6924 	background thread. To avoid this race we unconditionally unset
6925 	the is_recovered flag. */
6926 
6927 	trx->is_recovered = false;
6928 
6929 	trx_mutex_exit(trx);
6930 
6931 	if (release_lock) {
6932 
6933 		lock_release(trx);
6934 
6935 		lock_mutex_exit();
6936 	}
6937 
6938 	trx->lock.n_rec_locks = 0;
6939 
6940 	/* We don't remove the locks one by one from the vector for
6941 	efficiency reasons. We simply reset it because we would have
6942 	released all the locks anyway. */
6943 
6944 	trx->lock.table_locks.clear();
6945 
6946 	ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
6947 	ut_a(ib_vector_is_empty(trx->autoinc_locks));
6948 	ut_a(trx->lock.table_locks.empty());
6949 
6950 	mem_heap_empty(trx->lock.lock_heap);
6951 }
6952 
6953 /*********************************************************************//**
6954 Check whether the transaction has already been rolled back because it
6955 was selected as a deadlock victim, or if it has to wait then cancel
6956 the wait lock.
6957 @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
6958 dberr_t
lock_trx_handle_wait(trx_t * trx)6959 lock_trx_handle_wait(
6960 /*=================*/
6961 	trx_t*	trx)	/*!< in/out: trx lock state */
6962 {
6963 	dberr_t	err;
6964 
6965 	lock_mutex_enter();
6966 
6967 	trx_mutex_enter(trx);
6968 
6969 	if (trx->lock.was_chosen_as_deadlock_victim) {
6970 		err = DB_DEADLOCK;
6971 	} else if (trx->lock.wait_lock != NULL) {
6972 		lock_cancel_waiting_and_release(trx->lock.wait_lock);
6973 		err = DB_LOCK_WAIT;
6974 	} else {
6975 		/* The lock was probably granted before we got here. */
6976 		err = DB_SUCCESS;
6977 	}
6978 
6979 	lock_mutex_exit();
6980 
6981 	trx_mutex_exit(trx);
6982 
6983 	return(err);
6984 }
6985 
6986 /*********************************************************************//**
6987 Get the number of locks on a table.
6988 @return number of locks */
6989 ulint
lock_table_get_n_locks(const dict_table_t * table)6990 lock_table_get_n_locks(
6991 /*===================*/
6992 	const dict_table_t*	table)	/*!< in: table */
6993 {
6994 	ulint		n_table_locks;
6995 
6996 	lock_mutex_enter();
6997 
6998 	n_table_locks = UT_LIST_GET_LEN(table->locks);
6999 
7000 	lock_mutex_exit();
7001 
7002 	return(n_table_locks);
7003 }
7004 
7005 #ifdef UNIV_DEBUG
7006 /*******************************************************************//**
7007 Do an exhaustive check for any locks (table or rec) against the table.
7008 @return lock if found */
7009 static
7010 const lock_t*
lock_table_locks_lookup(const dict_table_t * table,const trx_ut_list_t * trx_list)7011 lock_table_locks_lookup(
7012 /*====================*/
7013 	const dict_table_t*	table,		/*!< in: check if there are
7014 						any locks held on records in
7015 						this table or on the table
7016 						itself */
7017 	const trx_ut_list_t*	trx_list)	/*!< in: trx list to check */
7018 {
7019 	trx_t*			trx;
7020 
7021 	ut_a(table != NULL);
7022 	ut_ad(lock_mutex_own());
7023 	ut_ad(trx_sys_mutex_own());
7024 
7025 	for (trx = UT_LIST_GET_FIRST(*trx_list);
7026 	     trx != NULL;
7027 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
7028 
7029 		const lock_t*	lock;
7030 
7031 		check_trx_state(trx);
7032 
7033 		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
7034 		     lock != NULL;
7035 		     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
7036 
7037 			ut_a(lock->trx == trx);
7038 
7039 			if (lock_get_type_low(lock) == LOCK_REC) {
7040 				ut_ad(!dict_index_is_online_ddl(lock->index)
7041 				      || dict_index_is_clust(lock->index));
7042 				if (lock->index->table == table) {
7043 					return(lock);
7044 				}
7045 			} else if (lock->un_member.tab_lock.table == table) {
7046 				return(lock);
7047 			}
7048 		}
7049 	}
7050 
7051 	return(NULL);
7052 }
7053 #endif /* UNIV_DEBUG */
7054 
7055 /*******************************************************************//**
7056 Check if there are any locks (table or rec) against table.
7057 @return true if table has either table or record locks. */
7058 bool
lock_table_has_locks(const dict_table_t * table)7059 lock_table_has_locks(
7060 /*=================*/
7061 	const dict_table_t*	table)	/*!< in: check if there are any locks
7062 					held on records in this table or on the
7063 					table itself */
7064 {
7065 	ibool			has_locks;
7066 
7067 	lock_mutex_enter();
7068 
7069 	has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
7070 
7071 #ifdef UNIV_DEBUG
7072 	if (!has_locks) {
7073 		mutex_enter(&trx_sys->mutex);
7074 
7075 		ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
7076 
7077 		mutex_exit(&trx_sys->mutex);
7078 	}
7079 #endif /* UNIV_DEBUG */
7080 
7081 	lock_mutex_exit();
7082 
7083 	return(has_locks);
7084 }
7085 
7086 /*******************************************************************//**
7087 Initialise the table lock list. */
7088 void
lock_table_lock_list_init(table_lock_list_t * lock_list)7089 lock_table_lock_list_init(
7090 /*======================*/
7091 	table_lock_list_t*	lock_list)	/*!< List to initialise */
7092 {
7093 	UT_LIST_INIT(*lock_list, &lock_table_t::locks);
7094 }
7095 
7096 /*******************************************************************//**
7097 Initialise the trx lock list. */
7098 void
lock_trx_lock_list_init(trx_lock_list_t * lock_list)7099 lock_trx_lock_list_init(
7100 /*====================*/
7101 	trx_lock_list_t*	lock_list)	/*!< List to initialise */
7102 {
7103 	UT_LIST_INIT(*lock_list, &lock_t::trx_locks);
7104 }
7105 
7106 /*******************************************************************//**
7107 Set the lock system timeout event. */
7108 void
lock_set_timeout_event()7109 lock_set_timeout_event()
7110 /*====================*/
7111 {
7112 	os_event_set(lock_sys->timeout_event);
7113 }
7114 
7115 #ifdef UNIV_DEBUG
7116 /*******************************************************************//**
7117 Check if the transaction holds any locks on the sys tables
7118 or its records.
7119 @return the strongest lock found on any sys table or 0 for none */
7120 const lock_t*
lock_trx_has_sys_table_locks(const trx_t * trx)7121 lock_trx_has_sys_table_locks(
7122 /*=========================*/
7123 	const trx_t*	trx)	/*!< in: transaction to check */
7124 {
7125 	const lock_t*	strongest_lock = 0;
7126 	lock_mode	strongest = LOCK_NONE;
7127 
7128 	lock_mutex_enter();
7129 
7130 	typedef lock_pool_t::const_reverse_iterator iterator;
7131 
7132 	iterator	end = trx->lock.table_locks.rend();
7133 	iterator	it = trx->lock.table_locks.rbegin();
7134 
7135 	/* Find a valid mode. Note: ib_vector_size() can be 0. */
7136 
7137 	for (/* No op */; it != end; ++it) {
7138 		const lock_t*	lock = *it;
7139 
7140 		if (lock != NULL
7141 		    && dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
7142 
7143 			strongest = lock_get_mode(lock);
7144 			ut_ad(strongest != LOCK_NONE);
7145 			strongest_lock = lock;
7146 			break;
7147 		}
7148 	}
7149 
7150 	if (strongest == LOCK_NONE) {
7151 		lock_mutex_exit();
7152 		return(NULL);
7153 	}
7154 
7155 	for (/* No op */; it != end; ++it) {
7156 		const lock_t*	lock = *it;
7157 
7158 		if (lock == NULL) {
7159 			continue;
7160 		}
7161 
7162 		ut_ad(trx == lock->trx);
7163 		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
7164 		ut_ad(lock->un_member.tab_lock.table != NULL);
7165 
7166 		lock_mode	mode = lock_get_mode(lock);
7167 
7168 		if (dict_is_sys_table(lock->un_member.tab_lock.table->id)
7169 		    && lock_mode_stronger_or_eq(mode, strongest)) {
7170 
7171 			strongest = mode;
7172 			strongest_lock = lock;
7173 		}
7174 	}
7175 
7176 	lock_mutex_exit();
7177 
7178 	return(strongest_lock);
7179 }
7180 
7181 /*******************************************************************//**
7182 Check if the transaction holds an exclusive lock on a record.
7183 @return whether the locks are held */
7184 bool
lock_trx_has_rec_x_lock(const trx_t * trx,const dict_table_t * table,const buf_block_t * block,ulint heap_no)7185 lock_trx_has_rec_x_lock(
7186 /*====================*/
7187 	const trx_t*		trx,	/*!< in: transaction to check */
7188 	const dict_table_t*	table,	/*!< in: table to check */
7189 	const buf_block_t*	block,	/*!< in: buffer block of the record */
7190 	ulint			heap_no)/*!< in: record heap number */
7191 {
7192 	ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
7193 
7194 	lock_mutex_enter();
7195 	ut_a(lock_table_has(trx, table, LOCK_IX)
7196 	     || dict_table_is_temporary(table));
7197 	ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
7198 			       block, heap_no, trx)
7199 	     || dict_table_is_temporary(table));
7200 	lock_mutex_exit();
7201 	return(true);
7202 }
7203 #endif /* UNIV_DEBUG */
7204 
7205 /** rewind(3) the file used for storing the latest detected deadlock and
7206 print a heading message to stderr if printing of all deadlocks to stderr
7207 is enabled. */
7208 void
start_print()7209 DeadlockChecker::start_print()
7210 {
7211 	ut_ad(lock_mutex_own());
7212 
7213 	rewind(lock_latest_err_file);
7214 	ut_print_timestamp(lock_latest_err_file);
7215 
7216 	if (srv_print_all_deadlocks) {
7217 		ib::info() << "Transactions deadlock detected, dumping"
7218 			<< " detailed information.";
7219 	}
7220 }
7221 
7222 /** Print a message to the deadlock file and possibly to stderr.
7223 @param msg message to print */
7224 void
print(const char * msg)7225 DeadlockChecker::print(const char* msg)
7226 {
7227 	fputs(msg, lock_latest_err_file);
7228 
7229 	if (srv_print_all_deadlocks) {
7230 		ib::info() << msg;
7231 	}
7232 }
7233 
7234 /** Print transaction data to the deadlock file and possibly to stderr.
7235 @param trx transaction
7236 @param max_query_len max query length to print */
7237 void
print(const trx_t * trx,ulint max_query_len)7238 DeadlockChecker::print(const trx_t* trx, ulint max_query_len)
7239 {
7240 	ut_ad(lock_mutex_own());
7241 
7242 	ulint	n_rec_locks = lock_number_of_rows_locked(&trx->lock);
7243 	ulint	n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
7244 	ulint	heap_size = mem_heap_get_size(trx->lock.lock_heap);
7245 
7246 	mutex_enter(&trx_sys->mutex);
7247 
7248 	trx_print_low(lock_latest_err_file, trx, max_query_len,
7249 		      n_rec_locks, n_trx_locks, heap_size);
7250 
7251 	if (srv_print_all_deadlocks) {
7252 		trx_print_low(stderr, trx, max_query_len,
7253 			      n_rec_locks, n_trx_locks, heap_size);
7254 	}
7255 
7256 	mutex_exit(&trx_sys->mutex);
7257 }
7258 
7259 /** Print lock data to the deadlock file and possibly to stderr.
7260 @param lock record or table type lock */
7261 void
print(const lock_t * lock)7262 DeadlockChecker::print(const lock_t* lock)
7263 {
7264 	ut_ad(lock_mutex_own());
7265 
7266 	if (lock_get_type_low(lock) == LOCK_REC) {
7267 		lock_rec_print(lock_latest_err_file, lock);
7268 
7269 		if (srv_print_all_deadlocks) {
7270 			lock_rec_print(stderr, lock);
7271 		}
7272 	} else {
7273 		lock_table_print(lock_latest_err_file, lock);
7274 
7275 		if (srv_print_all_deadlocks) {
7276 			lock_table_print(stderr, lock);
7277 		}
7278 	}
7279 }
7280 
7281 /** Get the next lock in the queue that is owned by a transaction whose
7282 sub-tree has not already been searched.
7283 Note: "next" here means PREV for table locks.
7284 
7285 @param lock Lock in queue
7286 @param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
7287 
7288 @return next lock or NULL if at end of queue */
7289 const lock_t*
get_next_lock(const lock_t * lock,ulint heap_no) const7290 DeadlockChecker::get_next_lock(const lock_t* lock, ulint heap_no) const
7291 {
7292 	ut_ad(lock_mutex_own());
7293 
7294 	do {
7295 		if (lock_get_type_low(lock) == LOCK_REC) {
7296 			ut_ad(heap_no != ULINT_UNDEFINED);
7297 			lock = lock_rec_get_next_const(heap_no, lock);
7298 		} else {
7299 			ut_ad(heap_no == ULINT_UNDEFINED);
7300 			ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
7301 
7302 			lock = UT_LIST_GET_NEXT(
7303 				un_member.tab_lock.locks, lock);
7304 		}
7305 
7306 	} while (lock != NULL && is_visited(lock));
7307 
7308 	ut_ad(lock == NULL
7309 	      || lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
7310 
7311 	return(lock);
7312 }
7313 
7314 /** Get the first lock to search. The search starts from the current
7315 wait_lock. What we are really interested in is an edge from the
7316 current wait_lock's owning transaction to another transaction that has
7317 a lock ahead in the queue. We skip locks where the owning transaction's
7318 sub-tree has already been searched.
7319 
7320 Note: The record locks are traversed from the oldest lock to the
7321 latest. For table locks we go from latest to oldest.
7322 
7323 For record locks, we first position the "iterator" on the first lock on
7324 the page and then reposition on the actual heap_no. This is required
7325 due to the way the record lock has is implemented.
7326 
7327 @param[out] heap_no if rec lock, else ULINT_UNDEFINED.
7328 @return first lock or NULL */
7329 const lock_t*
get_first_lock(ulint * heap_no) const7330 DeadlockChecker::get_first_lock(ulint* heap_no) const
7331 {
7332 	ut_ad(lock_mutex_own());
7333 
7334 	const lock_t*	lock = m_wait_lock;
7335 
7336 	if (lock_get_type_low(lock) == LOCK_REC) {
7337 		hash_table_t*	lock_hash;
7338 
7339 		lock_hash = lock->type_mode & LOCK_PREDICATE
7340 			? lock_sys->prdt_hash
7341 			: lock_sys->rec_hash;
7342 
7343 		/* We are only interested in records that match the heap_no. */
7344 		*heap_no = lock_rec_find_set_bit(lock);
7345 
7346 		ut_ad(*heap_no <= 0xffff);
7347 		ut_ad(*heap_no != ULINT_UNDEFINED);
7348 
7349 		/* Find the locks on the page. */
7350 		lock = lock_rec_get_first_on_page_addr(
7351 			lock_hash,
7352 			lock->un_member.rec_lock.space,
7353 			lock->un_member.rec_lock.page_no);
7354 
7355 		/* Position on the first lock on the physical record.*/
7356 		if (!lock_rec_get_nth_bit(lock, *heap_no)) {
7357 			lock = lock_rec_get_next_const(*heap_no, lock);
7358 		}
7359 
7360 		ut_a(!lock_get_wait(lock));
7361 	} else {
7362 		/* Table locks don't care about the heap_no. */
7363 		*heap_no = ULINT_UNDEFINED;
7364 		ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
7365 		dict_table_t*	table = lock->un_member.tab_lock.table;
7366 		lock = UT_LIST_GET_FIRST(table->locks);
7367 	}
7368 
7369 	/* Must find at least two locks, otherwise there cannot be a
7370 	waiting lock, secondly the first lock cannot be the wait_lock. */
7371 	ut_a(lock != NULL);
7372 	ut_a(lock != m_wait_lock);
7373 
7374 	/* Check that the lock type doesn't change. */
7375 	ut_ad(lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
7376 
7377 	return(lock);
7378 }
7379 
7380 /** Notify that a deadlock has been detected and print the conflicting
7381 transaction info.
7382 @param lock lock causing deadlock */
7383 void
notify(const lock_t * lock) const7384 DeadlockChecker::notify(const lock_t* lock) const
7385 {
7386 	ut_ad(lock_mutex_own());
7387 
7388 	start_print();
7389 
7390 	print("\n*** (1) TRANSACTION:\n");
7391 
7392 	print(m_wait_lock->trx, 3000);
7393 
7394 	print("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n");
7395 
7396 	print(m_wait_lock);
7397 
7398 	print("*** (2) TRANSACTION:\n");
7399 
7400 	print(lock->trx, 3000);
7401 
7402 	print("*** (2) HOLDS THE LOCK(S):\n");
7403 
7404 	print(lock);
7405 
7406 	/* It is possible that the joining transaction was granted its
7407 	lock when we rolled back some other waiting transaction. */
7408 
7409 	if (m_start->lock.wait_lock != 0) {
7410 		print("*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
7411 
7412 		print(m_start->lock.wait_lock);
7413 	}
7414 
7415 	DBUG_PRINT("ib_lock", ("deadlock detected"));
7416 }
7417 
7418 /** Select the victim transaction that should be rolledback.
7419 @return victim transaction */
7420 const trx_t*
select_victim() const7421 DeadlockChecker::select_victim() const
7422 {
7423 	ut_ad(lock_mutex_own());
7424 	ut_ad(m_start->lock.wait_lock != 0);
7425 	ut_ad(m_wait_lock->trx != m_start);
7426 
7427 	if (thd_trx_priority(m_start->mysql_thd) > 0
7428 	    || thd_trx_priority(m_wait_lock->trx->mysql_thd) > 0) {
7429 
7430 		const trx_t*	victim;
7431 
7432 		victim = trx_arbitrate(m_start, m_wait_lock->trx);
7433 
7434 		if (victim != NULL) {
7435 
7436 			return(victim);
7437 		}
7438 	}
7439 
7440 	if (trx_weight_ge(m_wait_lock->trx, m_start)) {
7441 
7442 		/* The joining transaction is 'smaller',
7443 		choose it as the victim and roll it back. */
7444 
7445 		return(m_start);
7446 	}
7447 
7448 	return(m_wait_lock->trx);
7449 }
7450 
7451 /** Looks iteratively for a deadlock. Note: the joining transaction may
7452 have been granted its lock by the deadlock checks.
7453 @return 0 if no deadlock else the victim transaction instance.*/
7454 const trx_t*
search()7455 DeadlockChecker::search()
7456 {
7457 	ut_ad(lock_mutex_own());
7458 	ut_ad(!trx_mutex_own(m_start));
7459 
7460 	ut_ad(m_start != NULL);
7461 	ut_ad(m_wait_lock != NULL);
7462 	check_trx_state(m_wait_lock->trx);
7463 	ut_ad(m_mark_start <= s_lock_mark_counter);
7464 
7465 	/* Look at the locks ahead of wait_lock in the lock queue. */
7466 	ulint		heap_no;
7467 	const lock_t*	lock = get_first_lock(&heap_no);
7468 
7469 	for (;;) {
7470 
7471 		/* We should never visit the same sub-tree more than once. */
7472 		ut_ad(lock == NULL || !is_visited(lock));
7473 
7474 		while (m_n_elems > 0 && lock == NULL) {
7475 
7476 			/* Restore previous search state. */
7477 
7478 			pop(lock, heap_no);
7479 
7480 			lock = get_next_lock(lock, heap_no);
7481 		}
7482 
7483 		if (lock == NULL) {
7484 			break;
7485 		} else if (lock == m_wait_lock) {
7486 
7487 			/* We can mark this subtree as searched */
7488 			ut_ad(lock->trx->lock.deadlock_mark <= m_mark_start);
7489 
7490 			lock->trx->lock.deadlock_mark = ++s_lock_mark_counter;
7491 
7492 			/* We are not prepared for an overflow. This 64-bit
7493 			counter should never wrap around. At 10^9 increments
7494 			per second, it would take 10^3 years of uptime. */
7495 
7496 			ut_ad(s_lock_mark_counter > 0);
7497 
7498 			/* Backtrack */
7499 			lock = NULL;
7500 
7501 		} else if (!lock_has_to_wait(m_wait_lock, lock)) {
7502 
7503 			/* No conflict, next lock */
7504 			lock = get_next_lock(lock, heap_no);
7505 
7506 		} else if (lock->trx == m_start) {
7507 
7508 			/* Found a cycle. */
7509 
7510 			notify(lock);
7511 
7512 			return(select_victim());
7513 
7514 		} else if (is_too_deep()) {
7515 
7516 			/* Search too deep to continue. */
7517 			m_too_deep = true;
7518 			return(m_start);
7519 
7520 		} else if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
7521 
7522 			/* Another trx ahead has requested a lock in an
7523 			incompatible mode, and is itself waiting for a lock. */
7524 
7525 			++m_cost;
7526 
7527 			if (!push(lock, heap_no)) {
7528 				m_too_deep = true;
7529 				return(m_start);
7530 			}
7531 
7532 
7533 			m_wait_lock = lock->trx->lock.wait_lock;
7534 
7535 			lock = get_first_lock(&heap_no);
7536 
7537 			if (is_visited(lock)) {
7538 				lock = get_next_lock(lock, heap_no);
7539 			}
7540 
7541 		} else {
7542 			lock = get_next_lock(lock, heap_no);
7543 		}
7544 	}
7545 
7546 	ut_a(lock == NULL && m_n_elems == 0);
7547 
7548 	/* No deadlock found. */
7549 	return(0);
7550 }
7551 
7552 /** Print info about transaction that was rolled back.
7553 @param trx transaction rolled back
7554 @param lock lock trx wants */
7555 void
rollback_print(const trx_t * trx,const lock_t * lock)7556 DeadlockChecker::rollback_print(const trx_t*	trx, const lock_t* lock)
7557 {
7558 	ut_ad(lock_mutex_own());
7559 
7560 	/* If the lock search exceeds the max step
7561 	or the max depth, the current trx will be
7562 	the victim. Print its information. */
7563 	start_print();
7564 
7565 	print("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
7566 	      " WAITS-FOR GRAPH, WE WILL ROLL BACK"
7567 	      " FOLLOWING TRANSACTION \n\n"
7568 	      "*** TRANSACTION:\n");
7569 
7570 	print(trx, 3000);
7571 
7572 	print("*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
7573 
7574 	print(lock);
7575 }
7576 
7577 /** Rollback transaction selected as the victim. */
7578 void
trx_rollback()7579 DeadlockChecker::trx_rollback()
7580 {
7581 	ut_ad(lock_mutex_own());
7582 
7583 	trx_t*	trx = m_wait_lock->trx;
7584 
7585 	print("*** WE ROLL BACK TRANSACTION (1)\n");
7586 
7587 	trx_mutex_enter(trx);
7588 
7589 	trx->lock.was_chosen_as_deadlock_victim = true;
7590 
7591 	lock_cancel_waiting_and_release(trx->lock.wait_lock);
7592 
7593 	trx_mutex_exit(trx);
7594 }
7595 
7596 /** Checks if a joining lock request results in a deadlock. If a deadlock is
7597 found this function will resolve the deadlock by choosing a victim transaction
7598 and rolling it back. It will attempt to resolve all deadlocks. The returned
7599 transaction id will be the joining transaction instance or NULL if some other
7600 transaction was chosen as a victim and rolled back or no deadlock found.
7601 
7602 @param[in]	lock lock the transaction is requesting
7603 @param[in,out]	trx transaction requesting the lock
7604 
7605 @return transaction instanace chosen as victim or 0 */
7606 const trx_t*
check_and_resolve(const lock_t * lock,trx_t * trx)7607 DeadlockChecker::check_and_resolve(const lock_t* lock, trx_t* trx)
7608 {
7609 	ut_ad(lock_mutex_own());
7610 	ut_ad(trx_mutex_own(trx));
7611 	check_trx_state(trx);
7612 	ut_ad(!srv_read_only_mode);
7613 
7614 	/* If transaction is marked for ASYNC rollback then we should
7615 	not allow it to wait for another lock causing possible deadlock.
7616 	We return current transaction as deadlock victim here. */
7617 	if (trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC) {
7618 		return(trx);
7619 	} else if (!innobase_deadlock_detect) {
7620 		return(NULL);
7621 	}
7622 
7623 	/*  Release the mutex to obey the latching order.
7624 	This is safe, because DeadlockChecker::check_and_resolve()
7625 	is invoked when a lock wait is enqueued for the currently
7626 	running transaction. Because m_trx is a running transaction
7627 	(it is not currently suspended because of a lock wait),
7628 	its state can only be changed by this thread, which is
7629 	currently associated with the transaction. */
7630 
7631 	trx_mutex_exit(trx);
7632 
7633 	const trx_t*	victim_trx;
7634 
7635 	/* Try and resolve as many deadlocks as possible. */
7636 	do {
7637 		DeadlockChecker	checker(trx, lock, s_lock_mark_counter);
7638 
7639 		victim_trx = checker.search();
7640 
7641 		/* Search too deep, we rollback the joining transaction only
7642 		if it is possible to rollback. Otherwise we rollback the
7643 		transaction that is holding the lock that the joining
7644 		transaction wants. */
7645 		if (checker.is_too_deep()) {
7646 
7647 			ut_ad(trx == checker.m_start);
7648 			ut_ad(trx == victim_trx);
7649 
7650 			rollback_print(victim_trx, lock);
7651 
7652 			MONITOR_INC(MONITOR_DEADLOCK);
7653 
7654 			break;
7655 
7656 		} else if (victim_trx != NULL && victim_trx != trx) {
7657 
7658 			ut_ad(victim_trx == checker.m_wait_lock->trx);
7659 
7660 			checker.trx_rollback();
7661 
7662 			lock_deadlock_found = true;
7663 
7664 			MONITOR_INC(MONITOR_DEADLOCK);
7665 		}
7666 
7667 	} while (victim_trx != NULL && victim_trx != trx);
7668 
7669 	/* If the joining transaction was selected as the victim. */
7670 	if (victim_trx != NULL) {
7671 
7672 		print("*** WE ROLL BACK TRANSACTION (2)\n");
7673 
7674 		lock_deadlock_found = true;
7675 	}
7676 
7677 	trx_mutex_enter(trx);
7678 
7679 	return(victim_trx);
7680 }
7681 
7682 /**
7683 Allocate cached locks for the transaction.
7684 @param trx		allocate cached record locks for this transaction */
7685 void
lock_trx_alloc_locks(trx_t * trx)7686 lock_trx_alloc_locks(trx_t* trx)
7687 {
7688 	ulint	sz = REC_LOCK_SIZE * REC_LOCK_CACHE;
7689 	byte*	ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
7690 
7691 	/* We allocate one big chunk and then distribute it among
7692 	the rest of the elements. The allocated chunk pointer is always
7693 	at index 0. */
7694 
7695 	for (ulint i = 0; i < REC_LOCK_CACHE; ++i, ptr += REC_LOCK_SIZE) {
7696 		trx->lock.rec_pool.push_back(
7697 			reinterpret_cast<ib_lock_t*>(ptr));
7698 	}
7699 
7700 	sz = TABLE_LOCK_SIZE * TABLE_LOCK_CACHE;
7701 	ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
7702 
7703 	for (ulint i = 0; i < TABLE_LOCK_CACHE; ++i, ptr += TABLE_LOCK_SIZE) {
7704 		trx->lock.table_pool.push_back(
7705 			reinterpret_cast<ib_lock_t*>(ptr));
7706 	}
7707 
7708 }
7709