1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file lock/lock0lock.cc
29 The transaction lock system
30 
31 Created 5/7/1996 Heikki Tuuri
32 *******************************************************/
33 
34 #define LOCK_MODULE_IMPLEMENTATION
35 
36 #include <mysql/service_thd_engine_lock.h>
37 #include "ha_prototypes.h"
38 
39 #include "lock0lock.h"
40 #include "lock0priv.h"
41 
42 #ifdef UNIV_NONINL
43 #include "lock0lock.ic"
44 #include "lock0priv.ic"
45 #endif
46 
47 #include "dict0mem.h"
48 #include "usr0sess.h"
49 #include "trx0purge.h"
50 #include "trx0sys.h"
51 #include "srv0mon.h"
52 #include "ut0vec.h"
53 #include "btr0btr.h"
54 #include "dict0boot.h"
55 #include "ut0new.h"
56 #include "row0sel.h"
57 #include "row0mysql.h"
58 #include "pars0pars.h"
59 
60 #include <set>
61 
62 /* Flag to enable/disable deadlock detector. */
63 my_bool	innobase_deadlock_detect = TRUE;
64 
65 /** Total number of cached record locks */
66 static const ulint	REC_LOCK_CACHE = 8;
67 
68 /** Maximum record lock size in bytes */
69 static const ulint	REC_LOCK_SIZE = sizeof(ib_lock_t) + 256;
70 
71 /** Total number of cached table locks */
72 static const ulint	TABLE_LOCK_CACHE = 8;
73 
74 /** Size in bytes, of the table lock instance */
75 static const ulint	TABLE_LOCK_SIZE = sizeof(ib_lock_t);
76 
77 /** Deadlock checker. */
78 class DeadlockChecker {
79 public:
80 	/** Checks if a joining lock request results in a deadlock. If
81 	a deadlock is found this function will resolve the deadlock
82 	by choosing a victim transaction and rolling it back. It
83 	will attempt to resolve all deadlocks. The returned transaction
84 	id will be the joining transaction id or 0 if some other
85 	transaction was chosen as a victim and rolled back or no
86 	deadlock found.
87 
88 	@param lock lock the transaction is requesting
89 	@param trx transaction requesting the lock
90 
91 	@return id of transaction chosen as victim or 0 */
92 	static const trx_t* check_and_resolve(
93 		const lock_t*	lock,
94 		trx_t*		trx);
95 
96 private:
97 	/** Do a shallow copy. Default destructor OK.
98 	@param trx the start transaction (start node)
99 	@param wait_lock lock that a transaction wants
100 	@param mark_start visited node counter */
DeadlockChecker(const trx_t * trx,const lock_t * wait_lock,ib_uint64_t mark_start)101 	DeadlockChecker(
102 		const trx_t*	trx,
103 		const lock_t*	wait_lock,
104 		ib_uint64_t	mark_start)
105 		:
106 		m_cost(),
107 		m_start(trx),
108 		m_too_deep(),
109 		m_wait_lock(wait_lock),
110 		m_mark_start(mark_start),
111 		m_n_elems()
112 	{
113 	}
114 
115 	/** Check if the search is too deep. */
is_too_deep() const116 	bool is_too_deep() const
117 	{
118 		return(m_n_elems > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK
119 		       || m_cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK);
120 	}
121 
122 	/** Save current state.
123 	@param lock lock to push on the stack.
124 	@param heap_no the heap number to push on the stack.
125 	@return false if stack is full. */
push(const lock_t * lock,ulint heap_no)126 	bool push(const lock_t*	lock, ulint heap_no)
127 	{
128 		ut_ad((lock_get_type_low(lock) & LOCK_REC)
129 		      || (lock_get_type_low(lock) & LOCK_TABLE));
130 
131 		ut_ad(((lock_get_type_low(lock) & LOCK_TABLE) != 0)
132 		      == (heap_no == ULINT_UNDEFINED));
133 
134 		/* Ensure that the stack is bounded. */
135 		if (m_n_elems >= UT_ARR_SIZE(s_states)) {
136 			return(false);
137 		}
138 
139 		state_t&	state = s_states[m_n_elems++];
140 
141 		state.m_lock = lock;
142 		state.m_wait_lock = m_wait_lock;
143 		state.m_heap_no =heap_no;
144 
145 		return(true);
146 	}
147 
148 	/** Restore state.
149 	@param[out] lock current lock
150 	@param[out] heap_no current heap_no */
pop(const lock_t * & lock,ulint & heap_no)151 	void pop(const lock_t*& lock, ulint& heap_no)
152 	{
153 		ut_a(m_n_elems > 0);
154 
155 		const state_t&	state = s_states[--m_n_elems];
156 
157 		lock = state.m_lock;
158 		heap_no = state.m_heap_no;
159 		m_wait_lock = state.m_wait_lock;
160 	}
161 
162 	/** Check whether the node has been visited.
163 	@param lock lock to check
164 	@return true if the node has been visited */
is_visited(const lock_t * lock) const165 	bool is_visited(const lock_t* lock) const
166 	{
167 		return(lock->trx->lock.deadlock_mark > m_mark_start);
168 	}
169 
170 	/** Get the next lock in the queue that is owned by a transaction
171 	whose sub-tree has not already been searched.
172 	Note: "next" here means PREV for table locks.
173 	@param lock Lock in queue
174 	@param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
175 	@return next lock or NULL if at end of queue */
176 	const lock_t* get_next_lock(const lock_t* lock, ulint heap_no) const;
177 
178 	/** Get the first lock to search. The search starts from the current
179 	wait_lock. What we are really interested in is an edge from the
180 	current wait_lock's owning transaction to another transaction that has
181 	a lock ahead in the queue. We skip locks where the owning transaction's
182 	sub-tree has already been searched.
183 
184 	Note: The record locks are traversed from the oldest lock to the
185 	latest. For table locks we go from latest to oldest.
186 
187 	For record locks, we first position the iterator on first lock on
188 	the page and then reposition on the actual heap_no. This is required
189 	due to the way the record lock has is implemented.
190 
191 	@param[out] heap_no if rec lock, else ULINT_UNDEFINED.
192 
193 	@return first lock or NULL */
194 	const lock_t* get_first_lock(ulint* heap_no) const;
195 
196 	/** Notify that a deadlock has been detected and print the conflicting
197 	transaction info.
198 	@param lock lock causing deadlock */
199 	void notify(const lock_t* lock) const;
200 
201 	/** Select the victim transaction that should be rolledback.
202 	@return victim transaction */
203 	const trx_t* select_victim() const;
204 
205 	/** Rollback transaction selected as the victim. */
206 	void trx_rollback();
207 
208 	/** Looks iteratively for a deadlock. Note: the joining transaction
209 	may have been granted its lock by the deadlock checks.
210 
211 	@return 0 if no deadlock else the victim transaction.*/
212 	const trx_t* search();
213 
214 	/** Print transaction data to the deadlock file and possibly to stderr.
215 	@param trx transaction
216 	@param max_query_len max query length to print */
217 	static void print(const trx_t* trx, ulint max_query_len);
218 
219 	/** rewind(3) the file used for storing the latest detected deadlock
220 	and print a heading message to stderr if printing of all deadlocks to
221 	stderr is enabled. */
222 	static void start_print();
223 
224 	/** Print lock data to the deadlock file and possibly to stderr.
225 	@param lock record or table type lock */
226 	static void print(const lock_t* lock);
227 
228 	/** Print a message to the deadlock file and possibly to stderr.
229 	@param msg message to print */
230 	static void print(const char* msg);
231 
232 	/** Print info about transaction that was rolled back.
233 	@param trx transaction rolled back
234 	@param lock lock trx wants */
235 	static void rollback_print(const trx_t* trx, const lock_t* lock);
236 
237 private:
238 	/** DFS state information, used during deadlock checking. */
239 	struct state_t {
240 		const lock_t*	m_lock;		/*!< Current lock */
241 		const lock_t*	m_wait_lock;	/*!< Waiting for lock */
242 		ulint		m_heap_no;	/*!< heap number if rec lock */
243 	};
244 
245 	/** Used in deadlock tracking. Protected by lock_sys->mutex. */
246 	static ib_uint64_t	s_lock_mark_counter;
247 
248 	/** Calculation steps thus far. It is the count of the nodes visited. */
249 	ulint			m_cost;
250 
251 	/** Joining transaction that is requesting a lock in an
252 	incompatible mode */
253 	const trx_t*		m_start;
254 
255 	/** TRUE if search was too deep and was aborted */
256 	bool			m_too_deep;
257 
258 	/** Lock that trx wants */
259 	const lock_t*		m_wait_lock;
260 
261 	/**  Value of lock_mark_count at the start of the deadlock check. */
262 	ib_uint64_t		m_mark_start;
263 
264 	/** Number of states pushed onto the stack */
265 	size_t			m_n_elems;
266 
267 	/** This is to avoid malloc/free calls. */
268 	static state_t		s_states[MAX_STACK_SIZE];
269 };
270 
271 /** Counter to mark visited nodes during deadlock search. */
272 ib_uint64_t	DeadlockChecker::s_lock_mark_counter = 0;
273 
274 /** The stack used for deadlock searches. */
275 DeadlockChecker::state_t	DeadlockChecker::s_states[MAX_STACK_SIZE];
276 
277 #ifdef UNIV_DEBUG
278 /*********************************************************************//**
279 Validates the lock system.
280 @return TRUE if ok */
281 static
282 bool
283 lock_validate();
284 /*============*/
285 
286 /*********************************************************************//**
287 Validates the record lock queues on a page.
288 @return TRUE if ok */
289 static
290 ibool
291 lock_rec_validate_page(
292 /*===================*/
293 	const buf_block_t*	block)	/*!< in: buffer block */
294 	MY_ATTRIBUTE((warn_unused_result));
295 #endif /* UNIV_DEBUG */
296 
297 /* The lock system */
298 lock_sys_t*	lock_sys	= NULL;
299 
300 /** We store info on the latest deadlock error to this buffer. InnoDB
301 Monitor will then fetch it and print */
302 bool	lock_deadlock_found = false;
303 
304 /** Only created if !srv_read_only_mode */
305 static FILE*		lock_latest_err_file;
306 
307 /*********************************************************************//**
308 Reports that a transaction id is insensible, i.e., in the future. */
309 void
lock_report_trx_id_insanity(trx_id_t trx_id,const rec_t * rec,dict_index_t * index,const ulint * offsets,trx_id_t max_trx_id)310 lock_report_trx_id_insanity(
311 /*========================*/
312 	trx_id_t	trx_id,		/*!< in: trx id */
313 	const rec_t*	rec,		/*!< in: user record */
314 	dict_index_t*	index,		/*!< in: index */
315 	const ulint*	offsets,	/*!< in: rec_get_offsets(rec, index) */
316 	trx_id_t	max_trx_id)	/*!< in: trx_sys_get_max_trx_id() */
317 {
318 	ib::error()
319 		<< "Transaction id " << trx_id
320 		<< " associated with record" << rec_offsets_print(rec, offsets)
321 		<< " in index " << index->name
322 		<< " of table " << index->table->name
323 		<< " is greater than the global counter " << max_trx_id
324 		<< "! The table is corrupted.";
325 }
326 
327 /*********************************************************************//**
328 Checks that a transaction id is sensible, i.e., not in the future.
329 @return true if ok */
330 #ifdef UNIV_DEBUG
331 
332 #else
333 static MY_ATTRIBUTE((warn_unused_result))
334 #endif
335 bool
lock_check_trx_id_sanity(trx_id_t trx_id,const rec_t * rec,dict_index_t * index,const ulint * offsets)336 lock_check_trx_id_sanity(
337 /*=====================*/
338 	trx_id_t	trx_id,		/*!< in: trx id */
339 	const rec_t*	rec,		/*!< in: user record */
340 	dict_index_t*	index,		/*!< in: index */
341 	const ulint*	offsets)	/*!< in: rec_get_offsets(rec, index) */
342 {
343 	ut_ad(rec_offs_validate(rec, index, offsets));
344 
345 	trx_id_t	max_trx_id = trx_sys_get_max_trx_id();
346 	bool		is_ok = trx_id < max_trx_id;
347 
348 	if (!is_ok) {
349 		lock_report_trx_id_insanity(
350 			trx_id, rec, index, offsets, max_trx_id);
351 	}
352 
353 	return(is_ok);
354 }
355 
356 /*********************************************************************//**
357 Checks that a record is seen in a consistent read.
358 @return true if sees, or false if an earlier version of the record
359 should be retrieved */
360 bool
lock_clust_rec_cons_read_sees(const rec_t * rec,dict_index_t * index,const ulint * offsets,ReadView * view)361 lock_clust_rec_cons_read_sees(
362 /*==========================*/
363 	const rec_t*	rec,	/*!< in: user record which should be read or
364 				passed over by a read cursor */
365 	dict_index_t*	index,	/*!< in: clustered index */
366 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
367 	ReadView*	view)	/*!< in: consistent read view */
368 {
369 	ut_ad(dict_index_is_clust(index));
370 	ut_ad(page_rec_is_user_rec(rec));
371 	ut_ad(rec_offs_validate(rec, index, offsets));
372 
373 	/* Temp-tables are not shared across connections and multiple
374 	transactions from different connections cannot simultaneously
375 	operate on same temp-table and so read of temp-table is
376 	always consistent read. */
377 	if (srv_read_only_mode || dict_table_is_temporary(index->table)) {
378 		ut_ad(view == 0 || dict_table_is_temporary(index->table));
379 		return(true);
380 	}
381 
382 	/* NOTE that we call this function while holding the search
383 	system latch. */
384 
385 	trx_id_t	trx_id = row_get_rec_trx_id(rec, index, offsets);
386 
387 	return(view->changes_visible(trx_id, index->table->name));
388 }
389 
390 /*********************************************************************//**
391 Checks that a non-clustered index record is seen in a consistent read.
392 
393 NOTE that a non-clustered index page contains so little information on
394 its modifications that also in the case false, the present version of
395 rec may be the right, but we must check this from the clustered index
396 record.
397 
398 @return true if certainly sees, or false if an earlier version of the
399 clustered index record might be needed */
400 bool
lock_sec_rec_cons_read_sees(const rec_t * rec,const dict_index_t * index,const ReadView * view)401 lock_sec_rec_cons_read_sees(
402 /*========================*/
403 	const rec_t*		rec,	/*!< in: user record which
404 					should be read or passed over
405 					by a read cursor */
406 	const dict_index_t*	index,	/*!< in: index */
407 	const ReadView*	view)	/*!< in: consistent read view */
408 {
409 	ut_ad(page_rec_is_user_rec(rec));
410 
411 	/* NOTE that we might call this function while holding the search
412 	system latch. */
413 
414 	if (recv_recovery_is_on()) {
415 
416 		return(false);
417 
418 	} else if (dict_table_is_temporary(index->table)) {
419 
420 		/* Temp-tables are not shared across connections and multiple
421 		transactions from different connections cannot simultaneously
422 		operate on same temp-table and so read of temp-table is
423 		always consistent read. */
424 
425 		return(true);
426 	}
427 
428 	trx_id_t	max_trx_id = page_get_max_trx_id(page_align(rec));
429 
430 	ut_ad(max_trx_id > 0);
431 
432 	return(view->sees(max_trx_id));
433 }
434 
435 /*********************************************************************//**
436 Creates the lock system at database start. */
437 void
lock_sys_create(ulint n_cells)438 lock_sys_create(
439 /*============*/
440 	ulint	n_cells)	/*!< in: number of slots in lock hash table */
441 {
442 	ulint	lock_sys_sz;
443 
444 	lock_sys_sz = sizeof(*lock_sys) + OS_THREAD_MAX_N * sizeof(srv_slot_t);
445 
446 	lock_sys = static_cast<lock_sys_t*>(ut_zalloc_nokey(lock_sys_sz));
447 
448 	void*	ptr = &lock_sys[1];
449 
450 	lock_sys->waiting_threads = static_cast<srv_slot_t*>(ptr);
451 
452 	lock_sys->last_slot = lock_sys->waiting_threads;
453 
454 	mutex_create(LATCH_ID_LOCK_SYS, &lock_sys->mutex);
455 
456 	mutex_create(LATCH_ID_LOCK_SYS_WAIT, &lock_sys->wait_mutex);
457 
458 	lock_sys->timeout_event = os_event_create(0);
459 
460 	lock_sys->rec_hash = hash_create(n_cells);
461 	lock_sys->prdt_hash = hash_create(n_cells);
462 	lock_sys->prdt_page_hash = hash_create(n_cells);
463 
464 	if (!srv_read_only_mode) {
465 		lock_latest_err_file = os_file_create_tmpfile(NULL);
466 		ut_a(lock_latest_err_file);
467 	}
468 }
469 
470 /** Calculates the fold value of a lock: used in migrating the hash table.
471 @param[in]	lock	record lock object
472 @return	folded value */
473 static
474 ulint
lock_rec_lock_fold(const lock_t * lock)475 lock_rec_lock_fold(
476 	const lock_t*	lock)
477 {
478 	return(lock_rec_fold(lock->un_member.rec_lock.space,
479 			     lock->un_member.rec_lock.page_no));
480 }
481 
482 /** Resize the lock hash tables.
483 @param[in]	n_cells	number of slots in lock hash table */
484 void
lock_sys_resize(ulint n_cells)485 lock_sys_resize(
486 	ulint	n_cells)
487 {
488 	hash_table_t*	old_hash;
489 
490 	lock_mutex_enter();
491 
492 	old_hash = lock_sys->rec_hash;
493 	lock_sys->rec_hash = hash_create(n_cells);
494 	HASH_MIGRATE(old_hash, lock_sys->rec_hash, lock_t, hash,
495 		     lock_rec_lock_fold);
496 	hash_table_free(old_hash);
497 
498 	old_hash = lock_sys->prdt_hash;
499 	lock_sys->prdt_hash = hash_create(n_cells);
500 	HASH_MIGRATE(old_hash, lock_sys->prdt_hash, lock_t, hash,
501 		     lock_rec_lock_fold);
502 	hash_table_free(old_hash);
503 
504 	old_hash = lock_sys->prdt_page_hash;
505 	lock_sys->prdt_page_hash = hash_create(n_cells);
506 	HASH_MIGRATE(old_hash, lock_sys->prdt_page_hash, lock_t, hash,
507 		     lock_rec_lock_fold);
508 	hash_table_free(old_hash);
509 
510 	/* need to update block->lock_hash_val */
511 	for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
512 		buf_pool_t*	buf_pool = buf_pool_from_array(i);
513 
514 		mutex_enter(&buf_pool->LRU_list_mutex);
515 		buf_page_t*	bpage;
516 		bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
517 
518 		while (bpage != NULL) {
519 			if (buf_page_get_state(bpage)
520 			    == BUF_BLOCK_FILE_PAGE) {
521 				buf_block_t*	block;
522 				block = reinterpret_cast<buf_block_t*>(
523 					bpage);
524 
525 				block->lock_hash_val
526 					= lock_rec_hash(
527 						bpage->id.space(),
528 						bpage->id.page_no());
529 			}
530 			bpage = UT_LIST_GET_NEXT(LRU, bpage);
531 		}
532 		mutex_exit(&buf_pool->LRU_list_mutex);
533 	}
534 
535 	lock_mutex_exit();
536 }
537 
538 /*********************************************************************//**
539 Closes the lock system at database shutdown. */
540 void
lock_sys_close(void)541 lock_sys_close(void)
542 /*================*/
543 {
544 	if (lock_latest_err_file != NULL) {
545 		fclose(lock_latest_err_file);
546 		lock_latest_err_file = NULL;
547 	}
548 
549 	hash_table_free(lock_sys->rec_hash);
550 	hash_table_free(lock_sys->prdt_hash);
551 	hash_table_free(lock_sys->prdt_page_hash);
552 
553 	os_event_destroy(lock_sys->timeout_event);
554 
555 	mutex_destroy(&lock_sys->mutex);
556 	mutex_destroy(&lock_sys->wait_mutex);
557 
558 	srv_slot_t*	slot = lock_sys->waiting_threads;
559 
560 	for (ulint i = 0; i < OS_THREAD_MAX_N; i++, ++slot) {
561 		if (slot->event != NULL) {
562 			os_event_destroy(slot->event);
563 		}
564 	}
565 
566 	ut_free(lock_sys);
567 
568 	lock_sys = NULL;
569 }
570 
571 /*********************************************************************//**
572 Gets the size of a lock struct.
573 @return size in bytes */
574 ulint
lock_get_size(void)575 lock_get_size(void)
576 /*===============*/
577 {
578 	return((ulint) sizeof(lock_t));
579 }
580 
581 /*********************************************************************//**
582 Gets the source table of an ALTER TABLE transaction.  The table must be
583 covered by an IX or IS table lock.
584 @return the source table of transaction, if it is covered by an IX or
585 IS table lock; dest if there is no source table, and NULL if the
586 transaction is locking more than two tables or an inconsistency is
587 found */
588 dict_table_t*
lock_get_src_table(trx_t * trx,dict_table_t * dest,lock_mode * mode)589 lock_get_src_table(
590 /*===============*/
591 	trx_t*		trx,	/*!< in: transaction */
592 	dict_table_t*	dest,	/*!< in: destination of ALTER TABLE */
593 	lock_mode*	mode)	/*!< out: lock mode of the source table */
594 {
595 	dict_table_t*	src;
596 	lock_t*		lock;
597 
598 	ut_ad(!lock_mutex_own());
599 
600 	src = NULL;
601 	*mode = LOCK_NONE;
602 
603 	/* The trx mutex protects the trx_locks for our purposes.
604 	Other transactions could want to convert one of our implicit
605 	record locks to an explicit one. For that, they would need our
606 	trx mutex. Waiting locks can be removed while only holding
607 	lock_sys->mutex, but this is a running transaction and cannot
608 	thus be holding any waiting locks. */
609 	trx_mutex_enter(trx);
610 
611 	for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
612 	     lock != NULL;
613 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
614 		lock_table_t*	tab_lock;
615 		lock_mode	lock_mode;
616 		if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
617 			/* We are only interested in table locks. */
618 			continue;
619 		}
620 		tab_lock = &lock->un_member.tab_lock;
621 		if (dest == tab_lock->table) {
622 			/* We are not interested in the destination table. */
623 			continue;
624 		} else if (!src) {
625 			/* This presumably is the source table. */
626 			src = tab_lock->table;
627 			if (UT_LIST_GET_LEN(src->locks) != 1
628 			    || UT_LIST_GET_FIRST(src->locks) != lock) {
629 				/* We only support the case when
630 				there is only one lock on this table. */
631 				src = NULL;
632 				goto func_exit;
633 			}
634 		} else if (src != tab_lock->table) {
635 			/* The transaction is locking more than
636 			two tables (src and dest): abort */
637 			src = NULL;
638 			goto func_exit;
639 		}
640 
641 		/* Check that the source table is locked by
642 		LOCK_IX or LOCK_IS. */
643 		lock_mode = lock_get_mode(lock);
644 		if (lock_mode == LOCK_IX || lock_mode == LOCK_IS) {
645 			if (*mode != LOCK_NONE && *mode != lock_mode) {
646 				/* There are multiple locks on src. */
647 				src = NULL;
648 				goto func_exit;
649 			}
650 			*mode = lock_mode;
651 		}
652 	}
653 
654 	if (!src) {
655 		/* No source table lock found: flag the situation to caller */
656 		src = dest;
657 	}
658 
659 func_exit:
660 	trx_mutex_exit(trx);
661 	return(src);
662 }
663 
664 /*********************************************************************//**
665 Determine if the given table is exclusively "owned" by the given
666 transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC
667 on the table.
668 @return TRUE if table is only locked by trx, with LOCK_IX, and
669 possibly LOCK_AUTO_INC */
670 ibool
lock_is_table_exclusive(const dict_table_t * table,const trx_t * trx)671 lock_is_table_exclusive(
672 /*====================*/
673 	const dict_table_t*	table,	/*!< in: table */
674 	const trx_t*		trx)	/*!< in: transaction */
675 {
676 	const lock_t*	lock;
677 	ibool		ok	= FALSE;
678 
679 	ut_ad(table);
680 	ut_ad(trx);
681 
682 	lock_mutex_enter();
683 
684 	for (lock = UT_LIST_GET_FIRST(table->locks);
685 	     lock != NULL;
686 	     lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) {
687 		if (lock->trx != trx) {
688 			/* A lock on the table is held
689 			by some other transaction. */
690 			goto not_ok;
691 		}
692 
693 		if (!(lock_get_type_low(lock) & LOCK_TABLE)) {
694 			/* We are interested in table locks only. */
695 			continue;
696 		}
697 
698 		switch (lock_get_mode(lock)) {
699 		case LOCK_IX:
700 			ok = TRUE;
701 			break;
702 		case LOCK_AUTO_INC:
703 			/* It is allowed for trx to hold an
704 			auto_increment lock. */
705 			break;
706 		default:
707 not_ok:
708 			/* Other table locks than LOCK_IX are not allowed. */
709 			ok = FALSE;
710 			goto func_exit;
711 		}
712 	}
713 
714 func_exit:
715 	lock_mutex_exit();
716 
717 	return(ok);
718 }
719 
720 /*********************************************************************//**
721 Sets the wait flag of a lock and the back pointer in trx to lock. */
722 UNIV_INLINE
723 void
lock_set_lock_and_trx_wait(lock_t * lock,trx_t * trx)724 lock_set_lock_and_trx_wait(
725 /*=======================*/
726 	lock_t*	lock,	/*!< in: lock */
727 	trx_t*	trx)	/*!< in/out: trx */
728 {
729 	ut_ad(lock);
730 	ut_ad(lock->trx == trx);
731 	ut_ad(trx->lock.wait_lock == NULL);
732 	ut_ad(lock_mutex_own());
733 	ut_ad(trx_mutex_own(trx));
734 
735 	trx->lock.wait_lock = lock;
736 	lock->type_mode |= LOCK_WAIT;
737 }
738 
739 /**********************************************************************//**
740 The back pointer to a waiting lock request in the transaction is set to NULL
741 and the wait bit in lock type_mode is reset. */
742 UNIV_INLINE
743 void
lock_reset_lock_and_trx_wait(lock_t * lock)744 lock_reset_lock_and_trx_wait(
745 /*=========================*/
746 	lock_t*	lock)	/*!< in/out: record lock */
747 {
748 	ut_ad(lock->trx->lock.wait_lock == lock);
749 	ut_ad(lock_get_wait(lock));
750 	ut_ad(lock_mutex_own());
751 
752 	lock->trx->lock.wait_lock = NULL;
753 	lock->type_mode &= ~LOCK_WAIT;
754 }
755 
756 /*********************************************************************//**
757 Gets the gap flag of a record lock.
758 @return LOCK_GAP or 0 */
759 UNIV_INLINE
760 ulint
lock_rec_get_gap(const lock_t * lock)761 lock_rec_get_gap(
762 /*=============*/
763 	const lock_t*	lock)	/*!< in: record lock */
764 {
765 	ut_ad(lock);
766 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
767 
768 	return(lock->type_mode & LOCK_GAP);
769 }
770 
771 /*********************************************************************//**
772 Gets the LOCK_REC_NOT_GAP flag of a record lock.
773 @return LOCK_REC_NOT_GAP or 0 */
774 UNIV_INLINE
775 ulint
lock_rec_get_rec_not_gap(const lock_t * lock)776 lock_rec_get_rec_not_gap(
777 /*=====================*/
778 	const lock_t*	lock)	/*!< in: record lock */
779 {
780 	ut_ad(lock);
781 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
782 
783 	return(lock->type_mode & LOCK_REC_NOT_GAP);
784 }
785 
786 /*********************************************************************//**
787 Gets the waiting insert flag of a record lock.
788 @return LOCK_INSERT_INTENTION or 0 */
789 UNIV_INLINE
790 ulint
lock_rec_get_insert_intention(const lock_t * lock)791 lock_rec_get_insert_intention(
792 /*==========================*/
793 	const lock_t*	lock)	/*!< in: record lock */
794 {
795 	ut_ad(lock);
796 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
797 
798 	return(lock->type_mode & LOCK_INSERT_INTENTION);
799 }
800 
801 /*********************************************************************//**
802 Checks if a lock request for a new lock has to wait for request lock2.
803 @return TRUE if new lock has to wait for lock2 to be removed */
804 UNIV_INLINE
805 ibool
lock_rec_has_to_wait(const trx_t * trx,ulint type_mode,const lock_t * lock2,bool lock_is_on_supremum)806 lock_rec_has_to_wait(
807 /*=================*/
808 	const trx_t*	trx,	/*!< in: trx of new lock */
809 	ulint		type_mode,/*!< in: precise mode of the new lock
810 				to set: LOCK_S or LOCK_X, possibly
811 				ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
812 				LOCK_INSERT_INTENTION */
813 	const lock_t*	lock2,	/*!< in: another record lock; NOTE that
814 				it is assumed that this has a lock bit
815 				set on the same record as in the new
816 				lock we are setting */
817 	bool		lock_is_on_supremum)
818 				/*!< in: TRUE if we are setting the
819 				lock on the 'supremum' record of an
820 				index page: we know then that the lock
821 				request is really for a 'gap' type lock */
822 {
823 	ut_ad(trx && lock2);
824 	ut_ad(lock_get_type_low(lock2) == LOCK_REC);
825 
826 	if (trx != lock2->trx
827 	    && !lock_mode_compatible(static_cast<lock_mode>(
828 			             LOCK_MODE_MASK & type_mode),
829 				     lock_get_mode(lock2))) {
830 
831 		/* We have somewhat complex rules when gap type record locks
832 		cause waits */
833 
834 		if ((lock_is_on_supremum || (type_mode & LOCK_GAP))
835 		    && !(type_mode & LOCK_INSERT_INTENTION)) {
836 
837 			/* Gap type locks without LOCK_INSERT_INTENTION flag
838 			do not need to wait for anything. This is because
839 			different users can have conflicting lock types
840 			on gaps. */
841 
842 			return(FALSE);
843 		}
844 
845 		if (!(type_mode & LOCK_INSERT_INTENTION)
846 		    && lock_rec_get_gap(lock2)) {
847 
848 			/* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
849 			does not need to wait for a gap type lock */
850 
851 			return(FALSE);
852 		}
853 
854 		if ((type_mode & LOCK_GAP)
855 		    && lock_rec_get_rec_not_gap(lock2)) {
856 
857 			/* Lock on gap does not need to wait for
858 			a LOCK_REC_NOT_GAP type lock */
859 
860 			return(FALSE);
861 		}
862 
863 		if (lock_rec_get_insert_intention(lock2)) {
864 
865 			/* No lock request needs to wait for an insert
866 			intention lock to be removed. This is ok since our
867 			rules allow conflicting locks on gaps. This eliminates
868 			a spurious deadlock caused by a next-key lock waiting
869 			for an insert intention lock; when the insert
870 			intention lock was granted, the insert deadlocked on
871 			the waiting next-key lock.
872 
873 			Also, insert intention locks do not disturb each
874 			other. */
875 
876 			return(FALSE);
877 		}
878 
879 		return(TRUE);
880 	}
881 
882 	return(FALSE);
883 }
884 
885 /*********************************************************************//**
886 Checks if a lock request lock1 has to wait for request lock2.
887 @return TRUE if lock1 has to wait for lock2 to be removed */
888 ibool
lock_has_to_wait(const lock_t * lock1,const lock_t * lock2)889 lock_has_to_wait(
890 /*=============*/
891 	const lock_t*	lock1,	/*!< in: waiting lock */
892 	const lock_t*	lock2)	/*!< in: another lock; NOTE that it is
893 				assumed that this has a lock bit set
894 				on the same record as in lock1 if the
895 				locks are record locks */
896 {
897 	ut_ad(lock1 && lock2);
898 
899 	if (lock1->trx != lock2->trx
900 	    && !lock_mode_compatible(lock_get_mode(lock1),
901 				     lock_get_mode(lock2))) {
902 		if (lock_get_type_low(lock1) == LOCK_REC) {
903 			ut_ad(lock_get_type_low(lock2) == LOCK_REC);
904 
905 			/* If this lock request is for a supremum record
906 			then the second bit on the lock bitmap is set */
907 
908 			if (lock1->type_mode
909 			    & (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
910 				return(lock_prdt_has_to_wait(
911 					lock1->trx, lock1->type_mode,
912 					lock_get_prdt_from_lock(lock1),
913 					lock2));
914 			} else {
915 				return(lock_rec_has_to_wait(
916 					lock1->trx, lock1->type_mode, lock2,
917 					lock_rec_get_nth_bit(lock1, true)));
918 			}
919 		}
920 
921 		return(TRUE);
922 	}
923 
924 	return(FALSE);
925 }
926 
927 /*============== RECORD LOCK BASIC FUNCTIONS ============================*/
928 
929 /**********************************************************************//**
930 Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
931 if none found.
932 @return bit index == heap number of the record, or ULINT_UNDEFINED if
933 none found */
934 ulint
lock_rec_find_set_bit(const lock_t * lock)935 lock_rec_find_set_bit(
936 /*==================*/
937 	const lock_t*	lock)	/*!< in: record lock with at least one bit set */
938 {
939 	for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
940 
941 		if (lock_rec_get_nth_bit(lock, i)) {
942 
943 			return(i);
944 		}
945 	}
946 
947 	return(ULINT_UNDEFINED);
948 }
949 
950 /** Reset the nth bit of a record lock.
951 @param[in,out] lock record lock
952 @param[in] i index of the bit that will be reset
953 @return previous value of the bit */
954 UNIV_INLINE
955 byte
lock_rec_reset_nth_bit(lock_t * lock,ulint i)956 lock_rec_reset_nth_bit(
957 	lock_t*	lock,
958 	ulint	i)
959 {
960 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
961 	ut_ad(i < lock->un_member.rec_lock.n_bits);
962 
963 	byte*	b = reinterpret_cast<byte*>(&lock[1]) + (i >> 3);
964 	byte	mask = 1 << (i & 7);
965 	byte	bit = *b & mask;
966 	*b &= ~mask;
967 
968 	if (bit != 0) {
969 		ut_ad(lock->trx->lock.n_rec_locks > 0);
970 		--lock->trx->lock.n_rec_locks;
971 	}
972 
973 	return(bit);
974 }
975 
976 /** Reset the nth bit of a record lock.
977 @param[in,out]	lock record lock
978 @param[in] i	index of the bit that will be reset
979 @param[in] type	whether the lock is in wait mode */
980 void
lock_rec_trx_wait(lock_t * lock,ulint i,ulint type)981 lock_rec_trx_wait(
982 	lock_t*	lock,
983 	ulint	i,
984 	ulint	type)
985 {
986 	lock_rec_reset_nth_bit(lock, i);
987 
988 	if (type & LOCK_WAIT) {
989 		lock_reset_lock_and_trx_wait(lock);
990 	}
991 }
992 
993 /*********************************************************************//**
994 Determines if there are explicit record locks on a page.
995 @return an explicit record lock on the page, or NULL if there are none */
996 lock_t*
lock_rec_expl_exist_on_page(ulint space,ulint page_no)997 lock_rec_expl_exist_on_page(
998 /*========================*/
999 	ulint	space,	/*!< in: space id */
1000 	ulint	page_no)/*!< in: page number */
1001 {
1002 	lock_t*	lock;
1003 
1004 	lock_mutex_enter();
1005 	/* Only used in ibuf pages, so rec_hash is good enough */
1006 	lock = lock_rec_get_first_on_page_addr(lock_sys->rec_hash,
1007 					       space, page_no);
1008 	lock_mutex_exit();
1009 
1010 	return(lock);
1011 }
1012 
1013 /*********************************************************************//**
1014 Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
1015 pointer in the transaction! This function is used in lock object creation
1016 and resetting. */
1017 static
1018 void
lock_rec_bitmap_reset(lock_t * lock)1019 lock_rec_bitmap_reset(
1020 /*==================*/
1021 	lock_t*	lock)	/*!< in: record lock */
1022 {
1023 	ulint	n_bytes;
1024 
1025 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
1026 
1027 	/* Reset to zero the bitmap which resides immediately after the lock
1028 	struct */
1029 
1030 	n_bytes = lock_rec_get_n_bits(lock) / 8;
1031 
1032 	ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
1033 
1034 	memset(&lock[1], 0, n_bytes);
1035 }
1036 
1037 /*********************************************************************//**
1038 Copies a record lock to heap.
1039 @return copy of lock */
1040 static
1041 lock_t*
lock_rec_copy(const lock_t * lock,mem_heap_t * heap)1042 lock_rec_copy(
1043 /*==========*/
1044 	const lock_t*	lock,	/*!< in: record lock */
1045 	mem_heap_t*	heap)	/*!< in: memory heap */
1046 {
1047 	ulint	size;
1048 
1049 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
1050 
1051 	size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
1052 
1053 	return(static_cast<lock_t*>(mem_heap_dup(heap, lock, size)));
1054 }
1055 
1056 /*********************************************************************//**
1057 Gets the previous record lock set on a record.
1058 @return previous lock on the same record, NULL if none exists */
1059 const lock_t*
lock_rec_get_prev(const lock_t * in_lock,ulint heap_no)1060 lock_rec_get_prev(
1061 /*==============*/
1062 	const lock_t*	in_lock,/*!< in: record lock */
1063 	ulint		heap_no)/*!< in: heap number of the record */
1064 {
1065 	lock_t*		lock;
1066 	ulint		space;
1067 	ulint		page_no;
1068 	lock_t*		found_lock	= NULL;
1069 	hash_table_t*	hash;
1070 
1071 	ut_ad(lock_mutex_own());
1072 	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
1073 
1074 	space = in_lock->un_member.rec_lock.space;
1075 	page_no = in_lock->un_member.rec_lock.page_no;
1076 
1077 	hash = lock_hash_get(in_lock->type_mode);
1078 
1079 	for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
1080 	     /* No op */;
1081 	     lock = lock_rec_get_next_on_page(lock)) {
1082 
1083 		ut_ad(lock);
1084 
1085 		if (lock == in_lock) {
1086 
1087 			return(found_lock);
1088 		}
1089 
1090 		if (lock_rec_get_nth_bit(lock, heap_no)) {
1091 
1092 			found_lock = lock;
1093 		}
1094 	}
1095 }
1096 
1097 /*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
1098 
1099 /*********************************************************************//**
1100 Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
1101 to precise_mode.
1102 @return lock or NULL */
1103 UNIV_INLINE
1104 lock_t*
lock_rec_has_expl(ulint precise_mode,const buf_block_t * block,ulint heap_no,const trx_t * trx)1105 lock_rec_has_expl(
1106 /*==============*/
1107 	ulint			precise_mode,/*!< in: LOCK_S or LOCK_X
1108 					possibly ORed to LOCK_GAP or
1109 					LOCK_REC_NOT_GAP, for a
1110 					supremum record we regard this
1111 					always a gap type request */
1112 	const buf_block_t*	block,	/*!< in: buffer block containing
1113 					the record */
1114 	ulint			heap_no,/*!< in: heap number of the record */
1115 	const trx_t*		trx)	/*!< in: transaction */
1116 {
1117 	lock_t*	lock;
1118 
1119 	ut_ad(lock_mutex_own());
1120 	ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
1121 	      || (precise_mode & LOCK_MODE_MASK) == LOCK_X);
1122 	ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
1123 
1124 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
1125 	     lock != NULL;
1126 	     lock = lock_rec_get_next(heap_no, lock)) {
1127 
1128 		if (lock->trx == trx
1129 		    && !lock_rec_get_insert_intention(lock)
1130 		    && lock_mode_stronger_or_eq(
1131 			    lock_get_mode(lock),
1132 			    static_cast<lock_mode>(
1133 				    precise_mode & LOCK_MODE_MASK))
1134 		    && !lock_get_wait(lock)
1135 		    && (!lock_rec_get_rec_not_gap(lock)
1136 			|| (precise_mode & LOCK_REC_NOT_GAP)
1137 			|| heap_no == PAGE_HEAP_NO_SUPREMUM)
1138 		    && (!lock_rec_get_gap(lock)
1139 			|| (precise_mode & LOCK_GAP)
1140 			|| heap_no == PAGE_HEAP_NO_SUPREMUM)) {
1141 
1142 			return(lock);
1143 		}
1144 	}
1145 
1146 	return(NULL);
1147 }
1148 
1149 #ifdef UNIV_DEBUG
1150 /*********************************************************************//**
1151 Checks if some other transaction has a lock request in the queue.
1152 @return lock or NULL */
1153 static MY_ATTRIBUTE((warn_unused_result))
1154 const lock_t*
lock_rec_other_has_expl_req(lock_mode mode,const buf_block_t * block,bool wait,ulint heap_no,const trx_t * trx)1155 lock_rec_other_has_expl_req(
1156 /*========================*/
1157 	lock_mode		mode,	/*!< in: LOCK_S or LOCK_X */
1158 	const buf_block_t*	block,	/*!< in: buffer block containing
1159 					the record */
1160 	bool			wait,	/*!< in: whether also waiting locks
1161 					are taken into account */
1162 	ulint			heap_no,/*!< in: heap number of the record */
1163 	const trx_t*		trx)	/*!< in: transaction, or NULL if
1164 					requests by all transactions
1165 					are taken into account */
1166 {
1167 
1168 	ut_ad(lock_mutex_own());
1169 	ut_ad(mode == LOCK_X || mode == LOCK_S);
1170 
1171 	/* Only GAP lock can be on SUPREMUM, and we are not looking for
1172 	GAP lock */
1173 	if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
1174 		return(NULL);
1175 	}
1176 
1177 	for (const lock_t* lock = lock_rec_get_first(lock_sys->rec_hash,
1178 						     block, heap_no);
1179 	     lock != NULL;
1180 	     lock = lock_rec_get_next_const(heap_no, lock)) {
1181 
1182 		if (lock->trx != trx
1183 		    && !lock_rec_get_gap(lock)
1184 		    && (wait || !lock_get_wait(lock))
1185 		    && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) {
1186 
1187 			return(lock);
1188 		}
1189 	}
1190 
1191 	return(NULL);
1192 }
1193 #endif /* UNIV_DEBUG */
1194 
1195 /*********************************************************************//**
1196 Checks if some other transaction has a conflicting explicit lock request
1197 in the queue, so that we have to wait.
1198 @return lock or NULL */
1199 static
1200 const lock_t*
lock_rec_other_has_conflicting(ulint mode,const buf_block_t * block,ulint heap_no,const trx_t * trx)1201 lock_rec_other_has_conflicting(
1202 /*===========================*/
1203 	ulint			mode,	/*!< in: LOCK_S or LOCK_X,
1204 					possibly ORed to LOCK_GAP or
1205 					LOC_REC_NOT_GAP,
1206 					LOCK_INSERT_INTENTION */
1207 	const buf_block_t*	block,	/*!< in: buffer block containing
1208 					the record */
1209 	ulint			heap_no,/*!< in: heap number of the record */
1210 	const trx_t*		trx)	/*!< in: our transaction */
1211 {
1212 	const lock_t*		lock;
1213 
1214 	ut_ad(lock_mutex_own());
1215 
1216 	bool	is_supremum = (heap_no == PAGE_HEAP_NO_SUPREMUM);
1217 
1218 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
1219 	     lock != NULL;
1220 	     lock = lock_rec_get_next_const(heap_no, lock)) {
1221 
1222 		if (lock_rec_has_to_wait(trx, mode, lock, is_supremum)) {
1223 			return(lock);
1224 		}
1225 	}
1226 
1227 	return(NULL);
1228 }
1229 
1230 /*********************************************************************//**
1231 Checks if some transaction has an implicit x-lock on a record in a secondary
1232 index.
1233 @return transaction id of the transaction which has the x-lock, or 0;
1234 NOTE that this function can return false positives but never false
1235 negatives. The caller must confirm all positive results by calling
1236 trx_is_active(). */
1237 static
1238 trx_t*
lock_sec_rec_some_has_impl(const rec_t * rec,dict_index_t * index,const ulint * offsets)1239 lock_sec_rec_some_has_impl(
1240 /*=======================*/
1241 	const rec_t*	rec,	/*!< in: user record */
1242 	dict_index_t*	index,	/*!< in: secondary index */
1243 	const ulint*	offsets)/*!< in: rec_get_offsets(rec, index) */
1244 {
1245 	trx_t*		trx;
1246 	trx_id_t	max_trx_id;
1247 	const page_t*	page = page_align(rec);
1248 
1249 	ut_ad(!lock_mutex_own());
1250 	ut_ad(!trx_sys_mutex_own());
1251 	ut_ad(!dict_index_is_clust(index));
1252 	ut_ad(page_rec_is_user_rec(rec));
1253 	ut_ad(rec_offs_validate(rec, index, offsets));
1254 
1255 	max_trx_id = page_get_max_trx_id(page);
1256 
1257 	/* Some transaction may have an implicit x-lock on the record only
1258 	if the max trx id for the page >= min trx id for the trx list, or
1259 	database recovery is running. We do not write the changes of a page
1260 	max trx id to the log, and therefore during recovery, this value
1261 	for a page may be incorrect. */
1262 
1263 	if (max_trx_id < trx_rw_min_trx_id() && !recv_recovery_is_on()) {
1264 
1265 		trx = 0;
1266 
1267 	} else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) {
1268 
1269 		/* The page is corrupt: try to avoid a crash by returning 0 */
1270 		trx = 0;
1271 
1272 	/* In this case it is possible that some transaction has an implicit
1273 	x-lock. We have to look in the clustered index. */
1274 
1275 	} else {
1276 		trx = row_vers_impl_x_locked(rec, index, offsets);
1277 	}
1278 
1279 	return(trx);
1280 }
1281 
1282 #ifdef UNIV_DEBUG
1283 /*********************************************************************//**
1284 Checks if some transaction, other than given trx_id, has an explicit
1285 lock on the given rec, in the given precise_mode.
1286 @return	the transaction, whose id is not equal to trx_id, that has an
1287 explicit lock on the given rec, in the given precise_mode or NULL.*/
1288 static
1289 trx_t*
lock_rec_other_trx_holds_expl(ulint precise_mode,trx_t * trx,const rec_t * rec,const buf_block_t * block)1290 lock_rec_other_trx_holds_expl(
1291 /*==========================*/
1292 	ulint			precise_mode,	/*!< in: LOCK_S or LOCK_X
1293 						possibly ORed to LOCK_GAP or
1294 						LOCK_REC_NOT_GAP. */
1295 	trx_t*			trx,		/*!< in: trx holding implicit
1296 						lock on rec */
1297 	const rec_t*		rec,		/*!< in: user record */
1298 	const buf_block_t*	block)		/*!< in: buffer block
1299 						containing the record */
1300 {
1301 	trx_t* holds = NULL;
1302 
1303 	lock_mutex_enter();
1304 
1305 	if (trx_t* impl_trx = trx_rw_is_active(trx->id, NULL, false)) {
1306 		ulint heap_no = page_rec_get_heap_no(rec);
1307 		mutex_enter(&trx_sys->mutex);
1308 
1309 		for (trx_t* t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
1310 		     t != NULL;
1311 		     t = UT_LIST_GET_NEXT(trx_list, t)) {
1312 
1313 			lock_t* expl_lock = lock_rec_has_expl(
1314 				precise_mode, block, heap_no, t);
1315 
1316 			if (expl_lock && expl_lock->trx != impl_trx) {
1317 				/* An explicit lock is held by trx other than
1318 				the trx holding the implicit lock. */
1319 				holds = expl_lock->trx;
1320 				break;
1321 			}
1322 		}
1323 
1324 		mutex_exit(&trx_sys->mutex);
1325 	}
1326 
1327 	lock_mutex_exit();
1328 
1329 	return(holds);
1330 }
1331 #endif /* UNIV_DEBUG */
1332 
1333 /*********************************************************************//**
1334 Return approximate number or record locks (bits set in the bitmap) for
1335 this transaction. Since delete-marked records may be removed, the
1336 record count will not be precise.
1337 The caller must be holding lock_sys->mutex. */
1338 ulint
lock_number_of_rows_locked(const trx_lock_t * trx_lock)1339 lock_number_of_rows_locked(
1340 /*=======================*/
1341 	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
1342 {
1343 	ut_ad(lock_mutex_own());
1344 
1345 	return(trx_lock->n_rec_locks);
1346 }
1347 
1348 /*********************************************************************//**
1349 Return the number of table locks for a transaction.
1350 The caller must be holding lock_sys->mutex. */
1351 ulint
lock_number_of_tables_locked(const trx_lock_t * trx_lock)1352 lock_number_of_tables_locked(
1353 /*=========================*/
1354 	const trx_lock_t*	trx_lock)	/*!< in: transaction locks */
1355 {
1356 	const lock_t*	lock;
1357 	ulint		n_tables = 0;
1358 
1359 	ut_ad(lock_mutex_own());
1360 
1361 	for (lock = UT_LIST_GET_FIRST(trx_lock->trx_locks);
1362 	     lock != NULL;
1363 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
1364 
1365 		if (lock_get_type_low(lock) == LOCK_TABLE) {
1366 			n_tables++;
1367 		}
1368 	}
1369 
1370 	return(n_tables);
1371 }
1372 
1373 /*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
1374 
1375 /**
1376 Check of the lock is on m_rec_id.
1377 @param[in] lock			Lock to compare with
1378 @return true if the record lock is on m_rec_id*/
1379 /**
1380 @param[in] rhs			Lock to compare with
1381 @return true if the record lock equals rhs */
1382 bool
is_on_row(const lock_t * lock) const1383 RecLock::is_on_row(const lock_t* lock) const
1384 {
1385 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
1386 
1387 	const lock_rec_t&	other = lock->un_member.rec_lock;
1388 
1389 	return(other.space == m_rec_id.m_space_id
1390 	       && other.page_no == m_rec_id.m_page_no
1391 	       && lock_rec_get_nth_bit(lock, m_rec_id.m_heap_no));
1392 }
1393 
1394 /**
1395 Do some checks and prepare for creating a new record lock */
1396 void
prepare() const1397 RecLock::prepare() const
1398 {
1399 	ut_ad(lock_mutex_own());
1400 	ut_ad(m_trx == thr_get_trx(m_thr));
1401 
1402 	/* Test if there already is some other reason to suspend thread:
1403 	we do not enqueue a lock request if the query thread should be
1404 	stopped anyway */
1405 
1406 	if (que_thr_stop(m_thr)) {
1407 		ut_error;
1408 	}
1409 
1410 	switch (trx_get_dict_operation(m_trx)) {
1411 	case TRX_DICT_OP_NONE:
1412 		break;
1413 	case TRX_DICT_OP_TABLE:
1414 	case TRX_DICT_OP_INDEX:
1415 		ib::error() << "A record lock wait happens in a dictionary"
1416 			" operation. index " << m_index->name
1417 			<< " of table " << m_index->table->name
1418 			<< ". " << BUG_REPORT_MSG;
1419 		ut_ad(0);
1420 	}
1421 
1422 	ut_ad(m_index->table->n_ref_count > 0
1423 	      || !m_index->table->can_be_evicted);
1424 }
1425 
1426 /**
1427 Create the lock instance
1428 @param[in, out] trx	The transaction requesting the lock
1429 @param[in, out] index	Index on which record lock is required
1430 @param[in] mode		The lock mode desired
1431 @param[in] rec_id	The record id
1432 @param[in] size		Size of the lock + bitmap requested
1433 @return a record lock instance */
1434 lock_t*
lock_alloc(trx_t * trx,dict_index_t * index,ulint mode,const RecID & rec_id,ulint size)1435 RecLock::lock_alloc(
1436 	trx_t*		trx,
1437 	dict_index_t*	index,
1438 	ulint		mode,
1439 	const RecID&	rec_id,
1440 	ulint		size)
1441 {
1442 	ut_ad(lock_mutex_own());
1443 
1444 	lock_t*	lock;
1445 
1446 	if (trx->lock.rec_cached >= trx->lock.rec_pool.size()
1447 	    || sizeof(*lock) + size > REC_LOCK_SIZE) {
1448 
1449 		ulint		n_bytes = size + sizeof(*lock);
1450 		mem_heap_t*	heap = trx->lock.lock_heap;
1451 
1452 		lock = reinterpret_cast<lock_t*>(mem_heap_alloc(heap, n_bytes));
1453 	} else {
1454 
1455 		lock = trx->lock.rec_pool[trx->lock.rec_cached];
1456 		++trx->lock.rec_cached;
1457 	}
1458 
1459 	lock->trx = trx;
1460 
1461 	lock->index = index;
1462 
1463 	/* Setup the lock attributes */
1464 
1465 	lock->type_mode = LOCK_REC | (mode & ~LOCK_TYPE_MASK);
1466 
1467 	lock_rec_t&	rec_lock = lock->un_member.rec_lock;
1468 
1469 	/* Predicate lock always on INFIMUM (0) */
1470 
1471 	if (is_predicate_lock(mode)) {
1472 
1473 		rec_lock.n_bits = 8;
1474 
1475 		memset(&lock[1], 0x0, 1);
1476 
1477 	} else {
1478 		ut_ad(8 * size < UINT32_MAX);
1479 		rec_lock.n_bits = static_cast<uint32_t>(8 * size);
1480 
1481 		memset(&lock[1], 0x0, size);
1482 	}
1483 
1484 	rec_lock.space = rec_id.m_space_id;
1485 
1486 	rec_lock.page_no = rec_id.m_page_no;
1487 
1488 	/* Set the bit corresponding to rec */
1489 
1490 	lock_rec_set_nth_bit(lock, rec_id.m_heap_no);
1491 
1492 	MONITOR_INC(MONITOR_NUM_RECLOCK);
1493 
1494 	MONITOR_INC(MONITOR_RECLOCK_CREATED);
1495 
1496 	return(lock);
1497 }
1498 
1499 /**
1500 Add the lock to the record lock hash and the transaction's lock list
1501 @param[in,out] lock	Newly created record lock to add to the rec hash
1502 @param[in] add_to_hash	If the lock should be added to the hash table */
1503 void
lock_add(lock_t * lock,bool add_to_hash)1504 RecLock::lock_add(lock_t* lock, bool add_to_hash)
1505 {
1506 	ut_ad(lock_mutex_own());
1507 	ut_ad(trx_mutex_own(lock->trx));
1508 
1509 	if (add_to_hash) {
1510 		ulint	key = m_rec_id.fold();
1511 
1512 		++lock->index->table->n_rec_locks;
1513 
1514 		HASH_INSERT(lock_t, hash, lock_hash_get(m_mode), key, lock);
1515 	}
1516 
1517 	if (m_mode & LOCK_WAIT) {
1518 		lock_set_lock_and_trx_wait(lock, lock->trx);
1519 	}
1520 
1521 	UT_LIST_ADD_LAST(lock->trx->lock.trx_locks, lock);
1522 }
1523 
1524 /**
1525 Create a new lock.
1526 @param[in,out] trx		Transaction requesting the lock
1527 @param[in] owns_trx_mutex	true if caller owns the trx_t::mutex
1528 @param[in] add_to_hash		add the lock to hash table
1529 @param[in] prdt			Predicate lock (optional)
1530 @return a new lock instance */
1531 lock_t*
create(trx_t * trx,bool owns_trx_mutex,bool add_to_hash,const lock_prdt_t * prdt)1532 RecLock::create(
1533 	trx_t*	trx,
1534 	bool	owns_trx_mutex,
1535 	bool	add_to_hash,
1536 	const	lock_prdt_t* prdt)
1537 {
1538 	ut_ad(lock_mutex_own());
1539 	ut_ad(owns_trx_mutex == trx_mutex_own(trx));
1540 
1541 	/* Create the explicit lock instance and initialise it. */
1542 
1543 	lock_t*	lock = lock_alloc(trx, m_index, m_mode, m_rec_id, m_size);
1544 
1545 	if (prdt != NULL && (m_mode & LOCK_PREDICATE)) {
1546 
1547 		lock_prdt_set_prdt(lock, prdt);
1548 	}
1549 
1550 	/* Ensure that another transaction doesn't access the trx
1551 	lock state and lock data structures while we are adding the
1552 	lock and changing the transaction state to LOCK_WAIT */
1553 
1554 	if (!owns_trx_mutex) {
1555 		trx_mutex_enter(trx);
1556 	}
1557 
1558 	lock_add(lock, add_to_hash);
1559 
1560 	if (!owns_trx_mutex) {
1561 		trx_mutex_exit(trx);
1562 	}
1563 
1564 	return(lock);
1565 }
1566 
1567 /**
1568 Check the outcome of the deadlock check
1569 @param[in,out] victim_trx	Transaction selected for rollback
1570 @param[in,out] lock		Lock being requested
1571 @return DB_LOCK_WAIT, DB_DEADLOCK or DB_SUCCESS_LOCKED_REC */
1572 dberr_t
check_deadlock_result(const trx_t * victim_trx,lock_t * lock)1573 RecLock::check_deadlock_result(const trx_t* victim_trx, lock_t* lock)
1574 {
1575 	ut_ad(lock_mutex_own());
1576 	ut_ad(m_trx == lock->trx);
1577 	ut_ad(trx_mutex_own(m_trx));
1578 
1579 	if (victim_trx != NULL) {
1580 
1581 		ut_ad(victim_trx == m_trx);
1582 
1583 		lock_reset_lock_and_trx_wait(lock);
1584 
1585 		lock_rec_reset_nth_bit(lock, m_rec_id.m_heap_no);
1586 
1587 		return(DB_DEADLOCK);
1588 
1589 	} else if (m_trx->lock.wait_lock == NULL) {
1590 
1591 		/* If there was a deadlock but we chose another
1592 		transaction as a victim, it is possible that we
1593 		already have the lock now granted! */
1594 
1595 		return(DB_SUCCESS_LOCKED_REC);
1596 	}
1597 
1598 	return(DB_LOCK_WAIT);
1599 }
1600 
1601 /**
1602 Check and resolve any deadlocks
1603 @param[in, out] lock		The lock being acquired
1604 @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
1605 	DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
1606 	there was a deadlock, but another transaction was chosen
1607 	as a victim, and we got the lock immediately: no need to
1608 	wait then */
1609 dberr_t
deadlock_check(lock_t * lock)1610 RecLock::deadlock_check(lock_t* lock)
1611 {
1612 	ut_ad(lock_mutex_own());
1613 	ut_ad(lock->trx == m_trx);
1614 	ut_ad(trx_mutex_own(m_trx));
1615 
1616 	const trx_t*	victim_trx =
1617 			DeadlockChecker::check_and_resolve(lock, m_trx);
1618 
1619 	/* Check the outcome of the deadlock test. It is possible that
1620 	the transaction that blocked our lock was rolled back and we
1621 	were granted our lock. */
1622 
1623 	dberr_t	err = check_deadlock_result(victim_trx, lock);
1624 
1625 	if (err == DB_LOCK_WAIT) {
1626 
1627 		set_wait_state(lock);
1628 
1629 		MONITOR_INC(MONITOR_LOCKREC_WAIT);
1630 	}
1631 
1632 	return(err);
1633 }
1634 
1635 /**
1636 Collect the transactions that will need to be rolled back asynchronously
1637 @param[in, out] trx	Transaction to be rolled back */
1638 void
mark_trx_for_rollback(trx_t * trx)1639 RecLock::mark_trx_for_rollback(trx_t* trx)
1640 {
1641 	trx->abort = true;
1642 
1643 	ut_ad(!trx->read_only);
1644 	ut_ad(trx_mutex_own(m_trx));
1645 	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK));
1646 	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC));
1647 	ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE));
1648 
1649 	/* Note that we will attempt an async rollback. The _ASYNC
1650 	flag will be cleared if the transaction is rolled back
1651 	synchronously before we get a chance to do it. */
1652 
1653 	trx->in_innodb |= TRX_FORCE_ROLLBACK | TRX_FORCE_ROLLBACK_ASYNC;
1654 
1655 	bool		cas;
1656 	os_thread_id_t	thread_id = os_thread_get_curr_id();
1657 
1658 	cas = os_compare_and_swap_thread_id(&trx->killed_by, 0, thread_id);
1659 
1660 	ut_a(cas);
1661 
1662 	m_trx->hit_list.push_back(hit_list_t::value_type(trx));
1663 
1664 #ifdef UNIV_DEBUG
1665 	THD*	thd = trx->mysql_thd;
1666 
1667 	if (thd != NULL) {
1668 
1669 		char	buffer[1024];
1670 		ib::info() << "Blocking transaction: ID: " << trx->id << " - "
1671 			<< " Blocked transaction ID: "<< m_trx->id << " - "
1672 			<< thd_security_context(thd, buffer, sizeof(buffer),
1673 						512);
1674 	}
1675 #endif /* UNIV_DEBUG */
1676 }
1677 
1678 /**
1679 Setup the requesting transaction state for lock grant
1680 @param[in,out] lock		Lock for which to change state */
1681 void
set_wait_state(lock_t * lock)1682 RecLock::set_wait_state(lock_t* lock)
1683 {
1684 	ut_ad(lock_mutex_own());
1685 	ut_ad(m_trx == lock->trx);
1686 	ut_ad(trx_mutex_own(m_trx));
1687 	ut_ad(lock_get_wait(lock));
1688 
1689 	m_trx->lock.wait_started = ut_time();
1690 
1691 	m_trx->lock.que_state = TRX_QUE_LOCK_WAIT;
1692 
1693 	m_trx->lock.was_chosen_as_deadlock_victim = false;
1694 
1695 	m_trx->stats.start_lock_wait();
1696 
1697 	bool	stopped = que_thr_stop(m_thr);
1698 	ut_a(stopped);
1699 }
1700 
1701 /**
1702 Enqueue a lock wait for normal transaction. If it is a high priority transaction
1703 then jump the record lock wait queue and if the transaction at the head of the
1704 queue is itself waiting roll it back, also do a deadlock check and resolve.
1705 @param[in, out] wait_for	The lock that the joining transaction is
1706 				waiting for
1707 @param[in] prdt			Predicate [optional]
1708 @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
1709 	DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
1710 	there was a deadlock, but another transaction was chosen
1711 	as a victim, and we got the lock immediately: no need to
1712 	wait then */
1713 dberr_t
add_to_waitq(const lock_t * wait_for,const lock_prdt_t * prdt)1714 RecLock::add_to_waitq(const lock_t* wait_for, const lock_prdt_t* prdt)
1715 {
1716 	ut_ad(lock_mutex_own());
1717 	ut_ad(m_trx == thr_get_trx(m_thr));
1718 	ut_ad(trx_mutex_own(m_trx));
1719 
1720 	DEBUG_SYNC_C("rec_lock_add_to_waitq");
1721 
1722 	m_mode |= LOCK_WAIT;
1723 
1724 	/* Do the preliminary checks, and set query thread state */
1725 
1726 	prepare();
1727 
1728 	bool	high_priority = trx_is_high_priority(m_trx);
1729 
1730 	/* Don't queue the lock to hash table, if high priority transaction. */
1731 	lock_t*	lock = create(m_trx, true, !high_priority, prdt);
1732 
1733 	/* Attempt to jump over the low priority waiting locks. */
1734 	if (high_priority && jump_queue(lock, wait_for)) {
1735 
1736 		/* Lock is granted */
1737 		return(DB_SUCCESS);
1738 	}
1739 
1740 	ut_ad(lock_get_wait(lock));
1741 
1742 	dberr_t	err = deadlock_check(lock);
1743 
1744 	ut_ad(trx_mutex_own(m_trx));
1745 
1746 	/* m_trx->mysql_thd is NULL if it's an internal trx. So current_thd is used */
1747 	if (err == DB_LOCK_WAIT) {
1748 		thd_report_row_lock_wait(current_thd, wait_for->trx->mysql_thd);
1749 	}
1750 	return(err);
1751 }
1752 
1753 /*********************************************************************//**
1754 Adds a record lock request in the record queue. The request is normally
1755 added as the last in the queue, but if there are no waiting lock requests
1756 on the record, and the request to be added is not a waiting request, we
1757 can reuse a suitable record lock object already existing on the same page,
1758 just setting the appropriate bit in its bitmap. This is a low-level function
1759 which does NOT check for deadlocks or lock compatibility!
1760 @return lock where the bit was set */
1761 static
1762 void
lock_rec_add_to_queue(ulint type_mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,trx_t * trx,bool caller_owns_trx_mutex)1763 lock_rec_add_to_queue(
1764 /*==================*/
1765 	ulint			type_mode,/*!< in: lock mode, wait, gap
1766 					etc. flags; type is ignored
1767 					and replaced by LOCK_REC */
1768 	const buf_block_t*	block,	/*!< in: buffer block containing
1769 					the record */
1770 	ulint			heap_no,/*!< in: heap number of the record */
1771 	dict_index_t*		index,	/*!< in: index of record */
1772 	trx_t*			trx,	/*!< in/out: transaction */
1773 	bool			caller_owns_trx_mutex)
1774 					/*!< in: TRUE if caller owns the
1775 					transaction mutex */
1776 {
1777 #ifdef UNIV_DEBUG
1778 	ut_ad(lock_mutex_own());
1779 	ut_ad(caller_owns_trx_mutex == trx_mutex_own(trx));
1780 	ut_ad(dict_index_is_clust(index)
1781 	      || dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
1782 	switch (type_mode & LOCK_MODE_MASK) {
1783 	case LOCK_X:
1784 	case LOCK_S:
1785 		break;
1786 	default:
1787 		ut_error;
1788 	}
1789 
1790 	if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
1791 		lock_mode	mode = (type_mode & LOCK_MODE_MASK) == LOCK_S
1792 			? LOCK_X
1793 			: LOCK_S;
1794 		const lock_t*	other_lock
1795 			= lock_rec_other_has_expl_req(
1796 				mode, block, false, heap_no, trx);
1797 		ut_a(!other_lock);
1798 	}
1799 #endif /* UNIV_DEBUG */
1800 
1801 	type_mode |= LOCK_REC;
1802 
1803 	/* If rec is the supremum record, then we can reset the gap bit, as
1804 	all locks on the supremum are automatically of the gap type, and we
1805 	try to avoid unnecessary memory consumption of a new record lock
1806 	struct for a gap type lock */
1807 
1808 	if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
1809 		ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
1810 
1811 		/* There should never be LOCK_REC_NOT_GAP on a supremum
1812 		record, but let us play safe */
1813 
1814 		type_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
1815 	}
1816 
1817 	lock_t*		lock;
1818 	lock_t*		first_lock;
1819 	hash_table_t*	hash = lock_hash_get(type_mode);
1820 
1821 	/* Look for a waiting lock request on the same record or on a gap */
1822 
1823 	for (first_lock = lock = lock_rec_get_first_on_page(hash, block);
1824 	     lock != NULL;
1825 	     lock = lock_rec_get_next_on_page(lock)) {
1826 
1827 		if (lock_get_wait(lock)
1828 		    && lock_rec_get_nth_bit(lock, heap_no)) {
1829 
1830 			break;
1831 		}
1832 	}
1833 
1834 	if (lock == NULL && !(type_mode & LOCK_WAIT)) {
1835 
1836 		/* Look for a similar record lock on the same page:
1837 		if one is found and there are no waiting lock requests,
1838 		we can just set the bit */
1839 
1840 		lock = lock_rec_find_similar_on_page(
1841 			type_mode, heap_no, first_lock, trx);
1842 
1843 		if (lock != NULL) {
1844 
1845 			lock_rec_set_nth_bit(lock, heap_no);
1846 
1847 			return;
1848 		}
1849 	}
1850 
1851 	RecLock		rec_lock(index, block, heap_no, type_mode);
1852 
1853 	rec_lock.create(trx, caller_owns_trx_mutex, true);
1854 }
1855 
1856 /*********************************************************************//**
1857 This is a fast routine for locking a record in the most common cases:
1858 there are no explicit locks on the page, or there is just one lock, owned
1859 by this transaction, and of the right type_mode. This is a low-level function
1860 which does NOT look at implicit locks! Checks lock compatibility within
1861 explicit locks. This function sets a normal next-key lock, or in the case of
1862 a page supremum record, a gap type lock.
1863 @return whether the locking succeeded */
1864 UNIV_INLINE
1865 lock_rec_req_status
lock_rec_lock_fast(bool impl,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,que_thr_t * thr)1866 lock_rec_lock_fast(
1867 /*===============*/
1868 	bool			impl,	/*!< in: if TRUE, no lock is set
1869 					if no wait is necessary: we
1870 					assume that the caller will
1871 					set an implicit lock */
1872 	ulint			mode,	/*!< in: lock mode: LOCK_X or
1873 					LOCK_S possibly ORed to either
1874 					LOCK_GAP or LOCK_REC_NOT_GAP */
1875 	const buf_block_t*	block,	/*!< in: buffer block containing
1876 					the record */
1877 	ulint			heap_no,/*!< in: heap number of record */
1878 	dict_index_t*		index,	/*!< in: index of record */
1879 	que_thr_t*		thr)	/*!< in: query thread */
1880 {
1881 	ut_ad(lock_mutex_own());
1882 	ut_ad(!srv_read_only_mode);
1883 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
1884 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
1885 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
1886 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)
1887 	      || srv_read_only_mode);
1888 	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
1889 	      || (LOCK_MODE_MASK & mode) == LOCK_X);
1890 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
1891 	      || mode - (LOCK_MODE_MASK & mode) == 0
1892 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
1893 	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
1894 
1895 	DBUG_EXECUTE_IF("innodb_report_deadlock", return(LOCK_REC_FAIL););
1896 
1897 	lock_t*	lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
1898 
1899 	trx_t*	trx = thr_get_trx(thr);
1900 
1901 	lock_rec_req_status	status = LOCK_REC_SUCCESS;
1902 
1903 	if (lock == NULL) {
1904 
1905 		if (!impl) {
1906 			RecLock	rec_lock(index, block, heap_no, mode);
1907 
1908 			/* Note that we don't own the trx mutex. */
1909 			rec_lock.create(trx, false, true);
1910 		}
1911 
1912 		status = LOCK_REC_SUCCESS_CREATED;
1913 	} else {
1914 		trx_mutex_enter(trx);
1915 
1916 		if (lock_rec_get_next_on_page(lock)
1917 		     || lock->trx != trx
1918 		     || lock->type_mode != (mode | LOCK_REC)
1919 		     || lock_rec_get_n_bits(lock) <= heap_no) {
1920 
1921 			status = LOCK_REC_FAIL;
1922 		} else if (!impl) {
1923 			/* If the nth bit of the record lock is already set
1924 			then we do not set a new lock bit, otherwise we do
1925 			set */
1926 			if (!lock_rec_get_nth_bit(lock, heap_no)) {
1927 				lock_rec_set_nth_bit(lock, heap_no);
1928 				status = LOCK_REC_SUCCESS_CREATED;
1929 			}
1930 		}
1931 
1932 		trx_mutex_exit(trx);
1933 	}
1934 
1935 	return(status);
1936 }
1937 
1938 /*********************************************************************//**
1939 This is the general, and slower, routine for locking a record. This is a
1940 low-level function which does NOT look at implicit locks! Checks lock
1941 compatibility within explicit locks. This function sets a normal next-key
1942 lock, or in the case of a page supremum record, a gap type lock.
1943 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
1944 or DB_QUE_THR_SUSPENDED */
1945 static
1946 dberr_t
lock_rec_lock_slow(ibool impl,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,que_thr_t * thr)1947 lock_rec_lock_slow(
1948 /*===============*/
1949 	ibool			impl,	/*!< in: if TRUE, no lock is set
1950 					if no wait is necessary: we
1951 					assume that the caller will
1952 					set an implicit lock */
1953 	ulint			mode,	/*!< in: lock mode: LOCK_X or
1954 					LOCK_S possibly ORed to either
1955 					LOCK_GAP or LOCK_REC_NOT_GAP */
1956 	const buf_block_t*	block,	/*!< in: buffer block containing
1957 					the record */
1958 	ulint			heap_no,/*!< in: heap number of record */
1959 	dict_index_t*		index,	/*!< in: index of record */
1960 	que_thr_t*		thr)	/*!< in: query thread */
1961 {
1962 	ut_ad(lock_mutex_own());
1963 	ut_ad(!srv_read_only_mode);
1964 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
1965 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
1966 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
1967 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
1968 	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
1969 	      || (LOCK_MODE_MASK & mode) == LOCK_X);
1970 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
1971 	      || mode - (LOCK_MODE_MASK & mode) == 0
1972 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
1973 	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
1974 
1975 	DBUG_EXECUTE_IF("innodb_report_deadlock", return(DB_DEADLOCK););
1976 
1977 	dberr_t	err;
1978 	trx_t*	trx = thr_get_trx(thr);
1979 
1980 	trx_mutex_enter(trx);
1981 
1982 	if (lock_rec_has_expl(mode, block, heap_no, trx)) {
1983 
1984 		/* The trx already has a strong enough lock on rec: do
1985 		nothing */
1986 
1987 		err = DB_SUCCESS;
1988 
1989 	} else {
1990 
1991 		const lock_t* wait_for = lock_rec_other_has_conflicting(
1992 			mode, block, heap_no, trx);
1993 
1994 		if (wait_for != NULL) {
1995 
1996 			/* If another transaction has a non-gap conflicting
1997 			request in the queue, as this transaction does not
1998 			have a lock strong enough already granted on the
1999 			record, we may have to wait. */
2000 
2001 			RecLock	rec_lock(thr, index, block, heap_no, mode);
2002 
2003 			err = rec_lock.add_to_waitq(wait_for);
2004 
2005 		} else if (!impl) {
2006 
2007 			/* Set the requested lock on the record, note that
2008 			we already own the transaction mutex. */
2009 
2010 			lock_rec_add_to_queue(
2011 				LOCK_REC | mode, block, heap_no, index, trx,
2012 				true);
2013 
2014 			err = DB_SUCCESS_LOCKED_REC;
2015 		} else {
2016 			err = DB_SUCCESS;
2017 		}
2018 	}
2019 
2020 	trx_mutex_exit(trx);
2021 
2022 	return(err);
2023 }
2024 
2025 /*********************************************************************//**
2026 Tries to lock the specified record in the mode requested. If not immediately
2027 possible, enqueues a waiting lock request. This is a low-level function
2028 which does NOT look at implicit locks! Checks lock compatibility within
2029 explicit locks. This function sets a normal next-key lock, or in the case
2030 of a page supremum record, a gap type lock.
2031 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
2032 or DB_QUE_THR_SUSPENDED */
2033 static
2034 dberr_t
lock_rec_lock(bool impl,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,que_thr_t * thr)2035 lock_rec_lock(
2036 /*==========*/
2037 	bool			impl,	/*!< in: if true, no lock is set
2038 					if no wait is necessary: we
2039 					assume that the caller will
2040 					set an implicit lock */
2041 	ulint			mode,	/*!< in: lock mode: LOCK_X or
2042 					LOCK_S possibly ORed to either
2043 					LOCK_GAP or LOCK_REC_NOT_GAP */
2044 	const buf_block_t*	block,	/*!< in: buffer block containing
2045 					the record */
2046 	ulint			heap_no,/*!< in: heap number of record */
2047 	dict_index_t*		index,	/*!< in: index of record */
2048 	que_thr_t*		thr)	/*!< in: query thread */
2049 {
2050 	ut_ad(lock_mutex_own());
2051 	ut_ad(!srv_read_only_mode);
2052 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_S
2053 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
2054 	ut_ad((LOCK_MODE_MASK & mode) != LOCK_X
2055 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
2056 	ut_ad((LOCK_MODE_MASK & mode) == LOCK_S
2057 	      || (LOCK_MODE_MASK & mode) == LOCK_X);
2058 	ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP
2059 	      || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP
2060 	      || mode - (LOCK_MODE_MASK & mode) == 0);
2061 	ut_ad(dict_index_is_clust(index) || !dict_index_is_online_ddl(index));
2062 
2063 	/* We try a simplified and faster subroutine for the most
2064 	common cases */
2065 	switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
2066 	case LOCK_REC_SUCCESS:
2067 		return(DB_SUCCESS);
2068 	case LOCK_REC_SUCCESS_CREATED:
2069 		return(DB_SUCCESS_LOCKED_REC);
2070 	case LOCK_REC_FAIL:
2071 		return(lock_rec_lock_slow(impl, mode, block,
2072 					  heap_no, index, thr));
2073 	}
2074 
2075 	ut_error;
2076 	return(DB_ERROR);
2077 }
2078 
2079 /*********************************************************************//**
2080 Checks if a waiting record lock request still has to wait in a queue.
2081 @return lock that is causing the wait */
2082 static
2083 const lock_t*
lock_rec_has_to_wait_in_queue(const lock_t * wait_lock)2084 lock_rec_has_to_wait_in_queue(
2085 /*==========================*/
2086 	const lock_t*	wait_lock)	/*!< in: waiting record lock */
2087 {
2088 	const lock_t*	lock;
2089 	ulint		space;
2090 	ulint		page_no;
2091 	ulint		heap_no;
2092 	ulint		bit_mask;
2093 	ulint		bit_offset;
2094 	hash_table_t*	hash;
2095 
2096 	ut_ad(lock_mutex_own());
2097 	ut_ad(lock_get_wait(wait_lock));
2098 	ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
2099 
2100 	space = wait_lock->un_member.rec_lock.space;
2101 	page_no = wait_lock->un_member.rec_lock.page_no;
2102 	heap_no = lock_rec_find_set_bit(wait_lock);
2103 
2104 	bit_offset = heap_no / 8;
2105 	bit_mask = static_cast<ulint>(1 << (heap_no % 8));
2106 
2107 	hash = lock_hash_get(wait_lock->type_mode);
2108 
2109 	for (lock = lock_rec_get_first_on_page_addr(hash, space, page_no);
2110 	     lock != wait_lock;
2111 	     lock = lock_rec_get_next_on_page_const(lock)) {
2112 
2113 		const byte*	p = (const byte*) &lock[1];
2114 
2115 		if (heap_no < lock_rec_get_n_bits(lock)
2116 		    && (p[bit_offset] & bit_mask)
2117 		    && lock_has_to_wait(wait_lock, lock)) {
2118 
2119 			return(lock);
2120 		}
2121 	}
2122 
2123 	return(NULL);
2124 }
2125 
2126 /*************************************************************//**
2127 Grants a lock to a waiting lock request and releases the waiting transaction.
2128 The caller must hold lock_sys->mutex but not lock->trx->mutex. */
2129 static
2130 void
lock_grant(lock_t * lock)2131 lock_grant(
2132 /*=======*/
2133 	lock_t*	lock)	/*!< in/out: waiting lock request */
2134 {
2135 	ut_ad(lock_mutex_own());
2136 
2137 	lock_reset_lock_and_trx_wait(lock);
2138 
2139 	trx_mutex_enter(lock->trx);
2140 
2141 	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
2142 		dict_table_t*	table = lock->un_member.tab_lock.table;
2143 
2144 		if (table->autoinc_trx == lock->trx) {
2145 			ib::error() << "Transaction already had an"
2146 				<< " AUTO-INC lock!";
2147 		} else {
2148 			table->autoinc_trx = lock->trx;
2149 
2150 			ib_vector_push(lock->trx->autoinc_locks, &lock);
2151 		}
2152 	}
2153 
2154 	DBUG_PRINT("ib_lock", ("wait for trx " TRX_ID_FMT " ends",
2155 			       trx_get_id_for_print(lock->trx)));
2156 
2157 	/* If we are resolving a deadlock by choosing another transaction
2158 	as a victim, then our original transaction may not be in the
2159 	TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
2160 	for it */
2161 
2162 	if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
2163 		que_thr_t*	thr;
2164 
2165 		thr = que_thr_end_lock_wait(lock->trx);
2166 
2167 		if (thr != NULL) {
2168 			lock_wait_release_thread_if_suspended(thr);
2169 		}
2170 	}
2171 
2172 	trx_mutex_exit(lock->trx);
2173 }
2174 
2175 /**
2176 Jump the queue for the record over all low priority transactions and
2177 add the lock. If all current granted locks are compatible, grant the
2178 lock. Otherwise, mark all granted transaction for asynchronous
2179 rollback and add to hit list.
2180 @param[in, out]	lock		Lock being requested
2181 @param[in]	conflict_lock	First conflicting lock from the head
2182 @return true if the lock is granted */
2183 bool
jump_queue(lock_t * lock,const lock_t * conflict_lock)2184 RecLock::jump_queue(
2185 	lock_t*		lock,
2186 	const lock_t*	conflict_lock)
2187 {
2188 	ut_ad(m_trx == lock->trx);
2189 	ut_ad(trx_mutex_own(m_trx));
2190 	ut_ad(conflict_lock->trx != m_trx);
2191 	ut_ad(trx_is_high_priority(m_trx));
2192 	ut_ad(m_rec_id.m_heap_no != ULINT32_UNDEFINED);
2193 
2194 	bool	high_priority = false;
2195 
2196 	/* Find out the position to add the lock. If there are other high
2197 	priority transactions in waiting state then we should add it after
2198 	the last high priority transaction. Otherwise, we can add it after
2199 	the last granted lock jumping over the wait queue. */
2200 	bool grant_lock = lock_add_priority(lock, conflict_lock,
2201 					    &high_priority);
2202 
2203 	if (grant_lock) {
2204 
2205 		ut_ad(conflict_lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT);
2206 		ut_ad(conflict_lock->trx->lock.wait_lock == conflict_lock);
2207 
2208 #ifdef UNIV_DEBUG
2209 		ib::info() << "Granting High Priority Transaction (ID): "
2210 			   << lock->trx->id << " the lock jumping over"
2211 			   << " waiting Transaction (ID): "
2212 			   << conflict_lock->trx->id;
2213 #endif /* UNIV_DEBUG */
2214 
2215 		lock_reset_lock_and_trx_wait(lock);
2216 		return(true);
2217 	}
2218 
2219 	/* If another high priority transaction is found waiting
2220 	victim transactions are already marked for rollback. */
2221 	if (high_priority) {
2222 
2223 		return(false);
2224 	}
2225 
2226 	/* The lock is placed after the last granted lock in the queue. Check and add
2227 	low priority transactinos to hit list for ASYNC rollback. */
2228 	make_trx_hit_list(lock, conflict_lock);
2229 
2230 	return(false);
2231 }
2232 
2233 /** Find position in lock queue and add the high priority transaction
2234 lock. Intention and GAP only locks can be granted even if there are
2235 waiting locks in front of the queue. To add the High priority
2236 transaction in a safe position we keep the following rule.
2237 
2238 1. If the lock can be granted, add it before the first waiting lock
2239 in the queue so that all currently waiting locks need to do conflict
2240 check before getting granted.
2241 
2242 2. If the lock has to wait, add it after the last granted lock or the
2243 last waiting high priority transaction in the queue whichever is later.
2244 This ensures that the transaction is granted only after doing conflict
2245 check with all granted transactions.
2246 @param[in]	lock		Lock being requested
2247 @param[in]	conflict_lock	First conflicting lock from the head
2248 @param[out]	high_priority	high priority transaction ahead in queue
2249 @return true if the lock can be granted */
2250 bool
lock_add_priority(lock_t * lock,const lock_t * conflict_lock,bool * high_priority)2251 RecLock::lock_add_priority(
2252 	lock_t*		lock,
2253 	const lock_t*	conflict_lock,
2254 	bool*		high_priority)
2255 {
2256 	ut_ad(high_priority);
2257 
2258 	*high_priority = false;
2259 
2260 	/* If the first conflicting lock is waiting for the current row,
2261 	then all other granted locks are compatible and the lock can be
2262 	directly granted if no other high priority transactions are
2263 	waiting. We need to recheck with all granted transaction as there
2264 	could be granted GAP or Intention locks down the queue. */
2265 	bool	grant_lock = (conflict_lock->is_waiting());
2266 	lock_t*	lock_head = NULL;
2267 	lock_t*	grant_position = NULL;
2268 	lock_t*	add_position = NULL;
2269 
2270 	/* Different lock (such as predicate lock) are on different hash */
2271 	hash_table_t*	lock_hash = lock_hash_get(m_mode);
2272 
2273 	HASH_SEARCH(hash, lock_hash, m_rec_id.fold(), lock_t*,
2274 		    lock_head, ut_ad(lock_head->is_record_lock()), true);
2275 
2276 	ut_ad(lock_head);
2277 
2278 	for (lock_t* next = lock_head; next != NULL; next = next->hash) {
2279 
2280 		/* check only for locks on the current row */
2281 		if (!is_on_row(next)) {
2282 			continue;
2283 		}
2284 
2285 		if (next->is_waiting()) {
2286 			/* grant lock position is the granted lock just before
2287 			the first wait lock in the queue. */
2288 			if (grant_position == NULL) {
2289 				grant_position = add_position;
2290 			}
2291 
2292 			if (trx_is_high_priority(next->trx)) {
2293 
2294 				*high_priority = true;
2295 				grant_lock = false;
2296 				add_position = next;
2297 			}
2298 		} else {
2299 
2300 			add_position = next;
2301 			/* Cannot grant lock if there is any conflicting
2302 			granted lock. */
2303 			if (grant_lock && lock_has_to_wait(lock, next)) {
2304 				grant_lock = false;
2305 			}
2306 		}
2307 	}
2308 
2309 	/* If the lock is to be granted it is safe to add before the first
2310 	waiting lock in the queue. */
2311 	if (grant_lock) {
2312 
2313 		ut_ad(!lock_has_to_wait(lock, grant_position));
2314 		add_position = grant_position;
2315 	}
2316 
2317 	ut_ad(add_position != NULL);
2318 
2319 	/* Add the lock to lock hash table. */
2320 	lock->hash = add_position->hash;
2321 	add_position->hash = lock;
2322 	++lock->index->table->n_rec_locks;
2323 
2324 	return(grant_lock);
2325 }
2326 
2327 /** Iterate over the granted locks and prepare the hit list for ASYNC Rollback.
2328 If the transaction is waiting for some other lock then wake up with deadlock error.
2329 Currently we don't mark following transactions for ASYNC Rollback.
2330 1. Read only transactions
2331 2. Background transactions
2332 3. Other High priority transactions
2333 @param[in]	lock		Lock being requested
2334 @param[in]	conflict_lock	First conflicting lock from the head */
2335 void
make_trx_hit_list(lock_t * lock,const lock_t * conflict_lock)2336 RecLock::make_trx_hit_list(
2337 	lock_t*		lock,
2338 	const lock_t*	conflict_lock)
2339 {
2340 	const lock_t*	next;
2341 
2342 	for (next = conflict_lock; next != NULL; next = next->hash) {
2343 
2344 		/* All locks ahead in the queue are checked. */
2345 		if (next == lock) {
2346 
2347 			ut_ad(next->is_waiting());
2348 			break;
2349 		}
2350 
2351 		trx_t*	trx = next->trx;
2352 		/* Check only for conflicting, granted locks on the current row.
2353 		Currently, we don't rollback read only transactions, transactions
2354 		owned by background threads. */
2355 		if (trx == lock->trx
2356 		    || !is_on_row(next)
2357 		    || next->is_waiting()
2358 		    || trx->read_only
2359 		    || trx->mysql_thd == NULL
2360 		    || !lock_has_to_wait(lock, next)) {
2361 
2362 			continue;
2363 		}
2364 
2365 		trx_mutex_enter(trx);
2366 
2367 		/* Skip high priority transactions, if already marked for abort
2368 		by some other transaction or if ASYNC rollback is disabled. A
2369 		transaction must complete kill/abort of a victim transaction once
2370 		marked and added to hit list. */
2371 		if (trx_is_high_priority(trx)
2372 		    || (trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE) != 0
2373 		    || trx->abort) {
2374 
2375 			trx_mutex_exit(trx);
2376 			continue;
2377 		}
2378 
2379 		/* If the transaction is waiting on some other resource then
2380 		wake it up with DEAD_LOCK error so that it can rollback. */
2381 		if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
2382 
2383 			/* Assert that it is not waiting for current record. */
2384 			ut_ad(trx->lock.wait_lock != next);
2385 #ifdef UNIV_DEBUG
2386 			ib::info() << "High Priority Transaction (ID): "
2387 				   << lock->trx->id << " waking up blocking"
2388 				   << " transaction (ID): " << trx->id;
2389 #endif /* UNIV_DEBUG */
2390 			trx->lock.was_chosen_as_deadlock_victim = true;
2391 			lock_cancel_waiting_and_release(trx->lock.wait_lock);
2392 			trx_mutex_exit(trx);
2393 			continue;
2394 		}
2395 
2396 		/* Mark for ASYNC Rollback and add to hit list. */
2397 		mark_trx_for_rollback(trx);
2398 		trx_mutex_exit(trx);
2399 	}
2400 
2401 	ut_ad(next == lock);
2402 }
2403 
2404 /*************************************************************//**
2405 Cancels a waiting record lock request and releases the waiting transaction
2406 that requested it. NOTE: does NOT check if waiting lock requests behind this
2407 one can now be granted! */
2408 static
2409 void
lock_rec_cancel(lock_t * lock)2410 lock_rec_cancel(
2411 /*============*/
2412 	lock_t*	lock)	/*!< in: waiting record lock request */
2413 {
2414 	que_thr_t*	thr;
2415 
2416 	ut_ad(lock_mutex_own());
2417 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
2418 
2419 	/* Reset the bit (there can be only one set bit) in the lock bitmap */
2420 	lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
2421 
2422 	/* Reset the wait flag and the back pointer to lock in trx */
2423 
2424 	lock_reset_lock_and_trx_wait(lock);
2425 
2426 	/* The following function releases the trx from lock wait */
2427 
2428 	trx_mutex_enter(lock->trx);
2429 
2430 	thr = que_thr_end_lock_wait(lock->trx);
2431 
2432 	if (thr != NULL) {
2433 		lock_wait_release_thread_if_suspended(thr);
2434 	}
2435 
2436 	trx_mutex_exit(lock->trx);
2437 }
2438 
2439 /** Grant lock to waiting requests that no longer conflicts
2440 @param[in]	in_lock		record lock object: grant all non-conflicting
2441 				locks waiting behind this lock object */
2442 static
2443 void
lock_rec_grant(lock_t * in_lock)2444 lock_rec_grant(lock_t* in_lock)
2445 {
2446 	lock_t*		lock;
2447 
2448 	ulint		space = in_lock->space();
2449 	ulint		page_no = in_lock->page_number();
2450 	hash_table_t*	lock_hash = in_lock->hash_table();
2451 
2452 	/* Check if waiting locks in the queue can now be granted: grant
2453 	locks if there are no conflicting locks ahead. Stop at the first
2454 	X lock that is waiting or has been granted. */
2455 
2456 	for (lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
2457 	     lock != NULL;
2458 	     lock = lock_rec_get_next_on_page(lock)) {
2459 
2460 		if (lock_get_wait(lock)
2461 		    && !lock_rec_has_to_wait_in_queue(lock)) {
2462 
2463 			/* Grant the lock */
2464 			ut_ad(lock->trx != in_lock->trx);
2465 			lock_grant(lock);
2466 		}
2467 	}
2468 }
2469 
2470 /*************************************************************//**
2471 Removes a record lock request, waiting or granted, from the queue and
2472 grants locks to other transactions in the queue if they now are entitled
2473 to a lock. NOTE: all record locks contained in in_lock are removed. */
2474 void
lock_rec_dequeue_from_page(lock_t * in_lock)2475 lock_rec_dequeue_from_page(
2476 /*=======================*/
2477 	lock_t*		in_lock)	/*!< in: record lock object: all
2478 					record locks which are contained in
2479 					this lock object are removed;
2480 					transactions waiting behind will
2481 					get their lock requests granted,
2482 					if they are now qualified to it */
2483 {
2484 	ulint		space;
2485 	ulint		page_no;
2486 	trx_lock_t*	trx_lock;
2487 	hash_table_t*	lock_hash;
2488 
2489 	ut_ad(lock_mutex_own());
2490 	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2491 	/* We may or may not be holding in_lock->trx->mutex here. */
2492 
2493 	trx_lock = &in_lock->trx->lock;
2494 
2495 	space = in_lock->un_member.rec_lock.space;
2496 	page_no = in_lock->un_member.rec_lock.page_no;
2497 
2498 	ut_ad(in_lock->index->table->n_rec_locks > 0);
2499 	in_lock->index->table->n_rec_locks--;
2500 
2501 	lock_hash = lock_hash_get(in_lock->type_mode);
2502 
2503 	HASH_DELETE(lock_t, hash, lock_hash,
2504 		    lock_rec_fold(space, page_no), in_lock);
2505 
2506 	UT_LIST_REMOVE(trx_lock->trx_locks, in_lock);
2507 
2508 	MONITOR_INC(MONITOR_RECLOCK_REMOVED);
2509 	MONITOR_DEC(MONITOR_NUM_RECLOCK);
2510 
2511 	lock_rec_grant(in_lock);
2512 }
2513 
2514 /*************************************************************//**
2515 Removes a record lock request, waiting or granted, from the queue. */
2516 void
lock_rec_discard(lock_t * in_lock)2517 lock_rec_discard(
2518 /*=============*/
2519 	lock_t*		in_lock)	/*!< in: record lock object: all
2520 					record locks which are contained
2521 					in this lock object are removed */
2522 {
2523 	ulint		space;
2524 	ulint		page_no;
2525 	trx_lock_t*	trx_lock;
2526 
2527 	ut_ad(lock_mutex_own());
2528 	ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2529 
2530 	trx_lock = &in_lock->trx->lock;
2531 
2532 	space = in_lock->un_member.rec_lock.space;
2533 	page_no = in_lock->un_member.rec_lock.page_no;
2534 
2535 	ut_ad(in_lock->index->table->n_rec_locks > 0);
2536 	in_lock->index->table->n_rec_locks--;
2537 
2538 	HASH_DELETE(lock_t, hash, lock_hash_get(in_lock->type_mode),
2539 			    lock_rec_fold(space, page_no), in_lock);
2540 
2541 	UT_LIST_REMOVE(trx_lock->trx_locks, in_lock);
2542 
2543 	MONITOR_INC(MONITOR_RECLOCK_REMOVED);
2544 	MONITOR_DEC(MONITOR_NUM_RECLOCK);
2545 }
2546 
2547 /*************************************************************//**
2548 Removes record lock objects set on an index page which is discarded. This
2549 function does not move locks, or check for waiting locks, therefore the
2550 lock bitmaps must already be reset when this function is called. */
2551 static
2552 void
lock_rec_free_all_from_discard_page_low(ulint space,ulint page_no,hash_table_t * lock_hash)2553 lock_rec_free_all_from_discard_page_low(
2554 /*====================================*/
2555 	ulint		space,
2556 	ulint		page_no,
2557 	hash_table_t*	lock_hash)
2558 {
2559 	lock_t*	lock;
2560 	lock_t*	next_lock;
2561 
2562 	lock = lock_rec_get_first_on_page_addr(lock_hash, space, page_no);
2563 
2564 	while (lock != NULL) {
2565 		ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
2566 		ut_ad(!lock_get_wait(lock));
2567 
2568 		next_lock = lock_rec_get_next_on_page(lock);
2569 
2570 		lock_rec_discard(lock);
2571 
2572 		lock = next_lock;
2573 	}
2574 }
2575 
2576 /*************************************************************//**
2577 Removes record lock objects set on an index page which is discarded. This
2578 function does not move locks, or check for waiting locks, therefore the
2579 lock bitmaps must already be reset when this function is called. */
2580 void
lock_rec_free_all_from_discard_page(const buf_block_t * block)2581 lock_rec_free_all_from_discard_page(
2582 /*================================*/
2583 	const buf_block_t*	block)	/*!< in: page to be discarded */
2584 {
2585 	ulint	space;
2586 	ulint	page_no;
2587 
2588 	ut_ad(lock_mutex_own());
2589 
2590 	space = block->page.id.space();
2591 	page_no = block->page.id.page_no();
2592 
2593 	lock_rec_free_all_from_discard_page_low(
2594 		space, page_no, lock_sys->rec_hash);
2595 	lock_rec_free_all_from_discard_page_low(
2596 		space, page_no, lock_sys->prdt_hash);
2597 	lock_rec_free_all_from_discard_page_low(
2598 		space, page_no, lock_sys->prdt_page_hash);
2599 }
2600 
2601 /*============= RECORD LOCK MOVING AND INHERITING ===================*/
2602 
2603 /*************************************************************//**
2604 Resets the lock bits for a single record. Releases transactions waiting for
2605 lock requests here. */
2606 static
2607 void
lock_rec_reset_and_release_wait_low(hash_table_t * hash,const buf_block_t * block,ulint heap_no)2608 lock_rec_reset_and_release_wait_low(
2609 /*================================*/
2610 	hash_table_t*		hash,	/*!< in: hash table */
2611 	const buf_block_t*	block,	/*!< in: buffer block containing
2612 					the record */
2613 	ulint			heap_no)/*!< in: heap number of record */
2614 {
2615 	lock_t*	lock;
2616 
2617 	ut_ad(lock_mutex_own());
2618 
2619 	for (lock = lock_rec_get_first(hash, block, heap_no);
2620 	     lock != NULL;
2621 	     lock = lock_rec_get_next(heap_no, lock)) {
2622 
2623 		if (lock_get_wait(lock)) {
2624 			lock_rec_cancel(lock);
2625 		} else {
2626 			lock_rec_reset_nth_bit(lock, heap_no);
2627 		}
2628 	}
2629 }
2630 
2631 /*************************************************************//**
2632 Resets the lock bits for a single record. Releases transactions waiting for
2633 lock requests here. */
2634 static
2635 void
lock_rec_reset_and_release_wait(const buf_block_t * block,ulint heap_no)2636 lock_rec_reset_and_release_wait(
2637 /*============================*/
2638 	const buf_block_t*	block,	/*!< in: buffer block containing
2639 					the record */
2640 	ulint			heap_no)/*!< in: heap number of record */
2641 {
2642 	lock_rec_reset_and_release_wait_low(
2643 		lock_sys->rec_hash, block, heap_no);
2644 
2645 	lock_rec_reset_and_release_wait_low(
2646 		lock_sys->prdt_hash, block, PAGE_HEAP_NO_INFIMUM);
2647 	lock_rec_reset_and_release_wait_low(
2648 		lock_sys->prdt_page_hash, block, PAGE_HEAP_NO_INFIMUM);
2649 }
2650 
2651 /*************************************************************//**
2652 Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
2653 of another record as gap type locks, but does not reset the lock bits of
2654 the other record. Also waiting lock requests on rec are inherited as
2655 GRANTED gap locks. */
2656 static
2657 void
lock_rec_inherit_to_gap(const buf_block_t * heir_block,const buf_block_t * block,ulint heir_heap_no,ulint heap_no)2658 lock_rec_inherit_to_gap(
2659 /*====================*/
2660 	const buf_block_t*	heir_block,	/*!< in: block containing the
2661 						record which inherits */
2662 	const buf_block_t*	block,		/*!< in: block containing the
2663 						record from which inherited;
2664 						does NOT reset the locks on
2665 						this record */
2666 	ulint			heir_heap_no,	/*!< in: heap_no of the
2667 						inheriting record */
2668 	ulint			heap_no)	/*!< in: heap_no of the
2669 						donating record */
2670 {
2671 	lock_t*	lock;
2672 
2673 	ut_ad(lock_mutex_own());
2674 
2675 	/* If srv_locks_unsafe_for_binlog is TRUE or session is using
2676 	READ COMMITTED isolation level, we do not want locks set
2677 	by an UPDATE or a DELETE to be inherited as gap type locks. But we
2678 	DO want S-locks/X-locks(taken for replace) set by a consistency
2679 	constraint to be inherited also then. */
2680 
2681 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
2682 	     lock != NULL;
2683 	     lock = lock_rec_get_next(heap_no, lock)) {
2684 
2685 		/* Skip inheriting lock if set */
2686 		if (lock->trx->skip_lock_inheritance) {
2687 
2688 			continue;
2689 		}
2690 
2691 		if (!lock_rec_get_insert_intention(lock)
2692 		    && !((srv_locks_unsafe_for_binlog
2693 			  || lock->trx->isolation_level
2694 			  <= TRX_ISO_READ_COMMITTED)
2695 			 && lock_get_mode(lock) ==
2696 			 (lock->trx->duplicates ? LOCK_S : LOCK_X))) {
2697 			lock_rec_add_to_queue(
2698 				LOCK_REC | LOCK_GAP | lock_get_mode(lock),
2699 				heir_block, heir_heap_no, lock->index,
2700 				lock->trx, FALSE);
2701 		}
2702 	}
2703 }
2704 
2705 /*************************************************************//**
2706 Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
2707 of another record as gap type locks, but does not reset the lock bits of the
2708 other record. Also waiting lock requests are inherited as GRANTED gap locks. */
2709 static
2710 void
lock_rec_inherit_to_gap_if_gap_lock(const buf_block_t * block,ulint heir_heap_no,ulint heap_no)2711 lock_rec_inherit_to_gap_if_gap_lock(
2712 /*================================*/
2713 	const buf_block_t*	block,		/*!< in: buffer block */
2714 	ulint			heir_heap_no,	/*!< in: heap_no of
2715 						record which inherits */
2716 	ulint			heap_no)	/*!< in: heap_no of record
2717 						from which inherited;
2718 						does NOT reset the locks
2719 						on this record */
2720 {
2721 	lock_t*	lock;
2722 
2723 	lock_mutex_enter();
2724 
2725 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
2726 	     lock != NULL;
2727 	     lock = lock_rec_get_next(heap_no, lock)) {
2728 
2729 		/* Skip inheriting lock if set */
2730 		if (lock->trx->skip_lock_inheritance) {
2731 
2732 			continue;
2733 		}
2734 
2735 		if (!lock_rec_get_insert_intention(lock)
2736 		    && (heap_no == PAGE_HEAP_NO_SUPREMUM
2737 			|| !lock_rec_get_rec_not_gap(lock))) {
2738 
2739 			lock_rec_add_to_queue(
2740 				LOCK_REC | LOCK_GAP | lock_get_mode(lock),
2741 				block, heir_heap_no, lock->index,
2742 				lock->trx, FALSE);
2743 		}
2744 	}
2745 
2746 	lock_mutex_exit();
2747 }
2748 
2749 /*************************************************************//**
2750 Moves the locks of a record to another record and resets the lock bits of
2751 the donating record. */
2752 void
lock_rec_move_low(hash_table_t * lock_hash,const buf_block_t * receiver,const buf_block_t * donator,ulint receiver_heap_no,ulint donator_heap_no)2753 lock_rec_move_low(
2754 /*==============*/
2755 	hash_table_t*		lock_hash,	/*!< in: hash table to use */
2756 	const buf_block_t*	receiver,	/*!< in: buffer block containing
2757 						the receiving record */
2758 	const buf_block_t*	donator,	/*!< in: buffer block containing
2759 						the donating record */
2760 	ulint			receiver_heap_no,/*!< in: heap_no of the record
2761 						which gets the locks; there
2762 						must be no lock requests
2763 						on it! */
2764 	ulint			donator_heap_no)/*!< in: heap_no of the record
2765 						which gives the locks */
2766 {
2767 	lock_t*	lock;
2768 
2769 	ut_ad(lock_mutex_own());
2770 
2771 	/* If the lock is predicate lock, it resides on INFIMUM record */
2772 	ut_ad(lock_rec_get_first(
2773 		lock_hash, receiver, receiver_heap_no) == NULL
2774 	      || lock_hash == lock_sys->prdt_hash
2775 	      || lock_hash == lock_sys->prdt_page_hash);
2776 
2777 	for (lock = lock_rec_get_first(lock_hash,
2778 				       donator, donator_heap_no);
2779 	     lock != NULL;
2780 	     lock = lock_rec_get_next(donator_heap_no, lock)) {
2781 
2782 		const ulint	type_mode = lock->type_mode;
2783 
2784 		lock_rec_reset_nth_bit(lock, donator_heap_no);
2785 
2786 		if (type_mode & LOCK_WAIT) {
2787 			lock_reset_lock_and_trx_wait(lock);
2788 		}
2789 
2790 		/* Note that we FIRST reset the bit, and then set the lock:
2791 		the function works also if donator == receiver */
2792 
2793 		lock_rec_add_to_queue(
2794 			type_mode, receiver, receiver_heap_no,
2795 			lock->index, lock->trx, FALSE);
2796 	}
2797 
2798 	ut_ad(lock_rec_get_first(lock_sys->rec_hash,
2799 				 donator, donator_heap_no) == NULL);
2800 }
2801 
2802 /** Move all the granted locks to the front of the given lock list.
2803 All the waiting locks will be at the end of the list.
2804 @param[in,out]	lock_list	the given lock list.  */
2805 static
2806 void
lock_move_granted_locks_to_front(UT_LIST_BASE_NODE_T (lock_t)& lock_list)2807 lock_move_granted_locks_to_front(
2808 	UT_LIST_BASE_NODE_T(lock_t)&	lock_list)
2809 {
2810 	lock_t*	lock;
2811 
2812 	bool seen_waiting_lock = false;
2813 
2814 	for (lock = UT_LIST_GET_FIRST(lock_list); lock;
2815 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
2816 
2817 		if (!seen_waiting_lock) {
2818 			if (lock->is_waiting()) {
2819 				seen_waiting_lock = true;
2820 			}
2821 			continue;
2822 		}
2823 
2824 		ut_ad(seen_waiting_lock);
2825 
2826 		if (!lock->is_waiting()) {
2827 			lock_t* prev = UT_LIST_GET_PREV(trx_locks, lock);
2828 			ut_a(prev);
2829 			UT_LIST_MOVE_TO_FRONT(lock_list, lock);
2830 			lock = prev;
2831 		}
2832 	}
2833 }
2834 
2835 /*************************************************************//**
2836 Updates the lock table when we have reorganized a page. NOTE: we copy
2837 also the locks set on the infimum of the page; the infimum may carry
2838 locks if an update of a record is occurring on the page, and its locks
2839 were temporarily stored on the infimum. */
2840 void
lock_move_reorganize_page(const buf_block_t * block,const buf_block_t * oblock)2841 lock_move_reorganize_page(
2842 /*======================*/
2843 	const buf_block_t*	block,	/*!< in: old index page, now
2844 					reorganized */
2845 	const buf_block_t*	oblock)	/*!< in: copy of the old, not
2846 					reorganized page */
2847 {
2848 	lock_t*		lock;
2849 	UT_LIST_BASE_NODE_T(lock_t)	old_locks;
2850 	mem_heap_t*	heap		= NULL;
2851 	ulint		comp;
2852 
2853 	lock_mutex_enter();
2854 
2855 	/* FIXME: This needs to deal with predicate lock too */
2856 	lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
2857 
2858 	if (lock == NULL) {
2859 		lock_mutex_exit();
2860 
2861 		return;
2862 	}
2863 
2864 	heap = mem_heap_create(256);
2865 
2866 	/* Copy first all the locks on the page to heap and reset the
2867 	bitmaps in the original locks; chain the copies of the locks
2868 	using the trx_locks field in them. */
2869 
2870 	UT_LIST_INIT(old_locks, &lock_t::trx_locks);
2871 
2872 	do {
2873 		/* Make a copy of the lock */
2874 		lock_t*	old_lock = lock_rec_copy(lock, heap);
2875 
2876 		UT_LIST_ADD_LAST(old_locks, old_lock);
2877 
2878 		/* Reset bitmap of lock */
2879 		lock_rec_bitmap_reset(lock);
2880 
2881 		if (lock_get_wait(lock)) {
2882 
2883 			lock_reset_lock_and_trx_wait(lock);
2884 		}
2885 
2886 		lock = lock_rec_get_next_on_page(lock);
2887 	} while (lock != NULL);
2888 
2889 	comp = page_is_comp(block->frame);
2890 	ut_ad(comp == page_is_comp(oblock->frame));
2891 
2892 	lock_move_granted_locks_to_front(old_locks);
2893 
2894 	DBUG_EXECUTE_IF("do_lock_reverse_page_reorganize",
2895 			UT_LIST_REVERSE(old_locks););
2896 
2897 	for (lock = UT_LIST_GET_FIRST(old_locks); lock;
2898 	     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
2899 
2900 		/* NOTE: we copy also the locks set on the infimum and
2901 		supremum of the page; the infimum may carry locks if an
2902 		update of a record is occurring on the page, and its locks
2903 		were temporarily stored on the infimum */
2904 		const rec_t*	rec1 = page_get_infimum_rec(
2905 			buf_block_get_frame(block));
2906 		const rec_t*	rec2 = page_get_infimum_rec(
2907 			buf_block_get_frame(oblock));
2908 
2909 		/* Set locks according to old locks */
2910 		for (;;) {
2911 			ulint	old_heap_no;
2912 			ulint	new_heap_no;
2913 
2914 			if (comp) {
2915 				old_heap_no = rec_get_heap_no_new(rec2);
2916 				new_heap_no = rec_get_heap_no_new(rec1);
2917 
2918 				rec1 = page_rec_get_next_low(rec1, TRUE);
2919 				rec2 = page_rec_get_next_low(rec2, TRUE);
2920 			} else {
2921 				old_heap_no = rec_get_heap_no_old(rec2);
2922 				new_heap_no = rec_get_heap_no_old(rec1);
2923 				ut_ad(!memcmp(rec1, rec2,
2924 					      rec_get_data_size_old(rec2)));
2925 
2926 				rec1 = page_rec_get_next_low(rec1, FALSE);
2927 				rec2 = page_rec_get_next_low(rec2, FALSE);
2928 			}
2929 
2930 			/* Clear the bit in old_lock. */
2931 			if (old_heap_no < lock->un_member.rec_lock.n_bits
2932 			    && lock_rec_reset_nth_bit(lock, old_heap_no)) {
2933 				/* NOTE that the old lock bitmap could be too
2934 				small for the new heap number! */
2935 
2936 				lock_rec_add_to_queue(
2937 					lock->type_mode, block, new_heap_no,
2938 					lock->index, lock->trx, FALSE);
2939 			}
2940 
2941 			if (new_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2942 				ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
2943 				break;
2944 			}
2945 		}
2946 
2947 #ifdef UNIV_DEBUG
2948 		{
2949 			ulint	i = lock_rec_find_set_bit(lock);
2950 
2951 			/* Check that all locks were moved. */
2952 			if (i != ULINT_UNDEFINED) {
2953 				ib::fatal() << "lock_move_reorganize_page(): "
2954 					<< i << " not moved in "
2955 					<< (void*) lock;
2956 			}
2957 		}
2958 #endif /* UNIV_DEBUG */
2959 	}
2960 
2961 	lock_mutex_exit();
2962 
2963 	mem_heap_free(heap);
2964 
2965 #ifdef UNIV_DEBUG_LOCK_VALIDATE
2966 	ut_ad(lock_rec_validate_page(block));
2967 #endif
2968 }
2969 
2970 /*************************************************************//**
2971 Moves the explicit locks on user records to another page if a record
2972 list end is moved to another page. */
2973 void
lock_move_rec_list_end(const buf_block_t * new_block,const buf_block_t * block,const rec_t * rec)2974 lock_move_rec_list_end(
2975 /*===================*/
2976 	const buf_block_t*	new_block,	/*!< in: index page to move to */
2977 	const buf_block_t*	block,		/*!< in: index page */
2978 	const rec_t*		rec)		/*!< in: record on page: this
2979 						is the first record moved */
2980 {
2981 	lock_t*		lock;
2982 	const ulint	comp	= page_rec_is_comp(rec);
2983 
2984 	ut_ad(buf_block_get_frame(block) == page_align(rec));
2985 	ut_ad(comp == page_is_comp(buf_block_get_frame(new_block)));
2986 
2987 	lock_mutex_enter();
2988 
2989 	/* Note: when we move locks from record to record, waiting locks
2990 	and possible granted gap type locks behind them are enqueued in
2991 	the original order, because new elements are inserted to a hash
2992 	table to the end of the hash chain, and lock_rec_add_to_queue
2993 	does not reuse locks if there are waiters in the queue. */
2994 
2995 	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
2996 	     lock = lock_rec_get_next_on_page(lock)) {
2997 		const rec_t*	rec1	= rec;
2998 		const rec_t*	rec2;
2999 		const ulint	type_mode = lock->type_mode;
3000 
3001 		if (comp) {
3002 			if (page_offset(rec1) == PAGE_NEW_INFIMUM) {
3003 				rec1 = page_rec_get_next_low(rec1, TRUE);
3004 			}
3005 
3006 			rec2 = page_rec_get_next_low(
3007 				buf_block_get_frame(new_block)
3008 				+ PAGE_NEW_INFIMUM, TRUE);
3009 		} else {
3010 			if (page_offset(rec1) == PAGE_OLD_INFIMUM) {
3011 				rec1 = page_rec_get_next_low(rec1, FALSE);
3012 			}
3013 
3014 			rec2 = page_rec_get_next_low(
3015 				buf_block_get_frame(new_block)
3016 				+ PAGE_OLD_INFIMUM, FALSE);
3017 		}
3018 
3019 		/* Copy lock requests on user records to new page and
3020 		reset the lock bits on the old */
3021 
3022 		for (;;) {
3023 			ulint	rec1_heap_no;
3024 			ulint	rec2_heap_no;
3025 
3026 			if (comp) {
3027 				rec1_heap_no = rec_get_heap_no_new(rec1);
3028 
3029 				if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
3030 					break;
3031 				}
3032 
3033 				rec2_heap_no = rec_get_heap_no_new(rec2);
3034 				rec1 = page_rec_get_next_low(rec1, TRUE);
3035 				rec2 = page_rec_get_next_low(rec2, TRUE);
3036 			} else {
3037 				rec1_heap_no = rec_get_heap_no_old(rec1);
3038 
3039 				if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
3040 					break;
3041 				}
3042 
3043 				rec2_heap_no = rec_get_heap_no_old(rec2);
3044 
3045 				ut_ad(!memcmp(rec1, rec2,
3046 					      rec_get_data_size_old(rec2)));
3047 
3048 				rec1 = page_rec_get_next_low(rec1, FALSE);
3049 				rec2 = page_rec_get_next_low(rec2, FALSE);
3050 			}
3051 
3052 			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
3053 			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
3054 				if (type_mode & LOCK_WAIT) {
3055 					lock_reset_lock_and_trx_wait(lock);
3056 				}
3057 
3058 				lock_rec_add_to_queue(
3059 					type_mode, new_block, rec2_heap_no,
3060 					lock->index, lock->trx, FALSE);
3061 			}
3062 		}
3063 	}
3064 
3065 	lock_mutex_exit();
3066 
3067 #ifdef UNIV_DEBUG_LOCK_VALIDATE
3068 	ut_ad(lock_rec_validate_page(block));
3069 	ut_ad(lock_rec_validate_page(new_block));
3070 #endif
3071 }
3072 
3073 /*************************************************************//**
3074 Moves the explicit locks on user records to another page if a record
3075 list start is moved to another page. */
3076 void
lock_move_rec_list_start(const buf_block_t * new_block,const buf_block_t * block,const rec_t * rec,const rec_t * old_end)3077 lock_move_rec_list_start(
3078 /*=====================*/
3079 	const buf_block_t*	new_block,	/*!< in: index page to
3080 						move to */
3081 	const buf_block_t*	block,		/*!< in: index page */
3082 	const rec_t*		rec,		/*!< in: record on page:
3083 						this is the first
3084 						record NOT copied */
3085 	const rec_t*		old_end)	/*!< in: old
3086 						previous-to-last
3087 						record on new_page
3088 						before the records
3089 						were copied */
3090 {
3091 	lock_t*		lock;
3092 	const ulint	comp	= page_rec_is_comp(rec);
3093 
3094 	ut_ad(block->frame == page_align(rec));
3095 	ut_ad(new_block->frame == page_align(old_end));
3096 	ut_ad(comp == page_rec_is_comp(old_end));
3097 
3098 	lock_mutex_enter();
3099 
3100 	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
3101 	     lock = lock_rec_get_next_on_page(lock)) {
3102 		const rec_t*	rec1;
3103 		const rec_t*	rec2;
3104 		const ulint	type_mode = lock->type_mode;
3105 
3106 		if (comp) {
3107 			rec1 = page_rec_get_next_low(
3108 				buf_block_get_frame(block)
3109 				+ PAGE_NEW_INFIMUM, TRUE);
3110 			rec2 = page_rec_get_next_low(old_end, TRUE);
3111 		} else {
3112 			rec1 = page_rec_get_next_low(
3113 				buf_block_get_frame(block)
3114 				+ PAGE_OLD_INFIMUM, FALSE);
3115 			rec2 = page_rec_get_next_low(old_end, FALSE);
3116 		}
3117 
3118 		/* Copy lock requests on user records to new page and
3119 		reset the lock bits on the old */
3120 
3121 		while (rec1 != rec) {
3122 			ulint	rec1_heap_no;
3123 			ulint	rec2_heap_no;
3124 
3125 			if (comp) {
3126 				rec1_heap_no = rec_get_heap_no_new(rec1);
3127 				rec2_heap_no = rec_get_heap_no_new(rec2);
3128 
3129 				rec1 = page_rec_get_next_low(rec1, TRUE);
3130 				rec2 = page_rec_get_next_low(rec2, TRUE);
3131 			} else {
3132 				rec1_heap_no = rec_get_heap_no_old(rec1);
3133 				rec2_heap_no = rec_get_heap_no_old(rec2);
3134 
3135 				ut_ad(!memcmp(rec1, rec2,
3136 					      rec_get_data_size_old(rec2)));
3137 
3138 				rec1 = page_rec_get_next_low(rec1, FALSE);
3139 				rec2 = page_rec_get_next_low(rec2, FALSE);
3140 			}
3141 
3142 			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
3143 			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
3144 				if (type_mode & LOCK_WAIT) {
3145 					lock_reset_lock_and_trx_wait(lock);
3146 				}
3147 
3148 				lock_rec_add_to_queue(
3149 					type_mode, new_block, rec2_heap_no,
3150 					lock->index, lock->trx, FALSE);
3151 			}
3152 		}
3153 
3154 #ifdef UNIV_DEBUG
3155 		if (page_rec_is_supremum(rec)) {
3156 			ulint	i;
3157 
3158 			for (i = PAGE_HEAP_NO_USER_LOW;
3159 			     i < lock_rec_get_n_bits(lock); i++) {
3160 				if (lock_rec_get_nth_bit(lock, i)) {
3161 					ib::fatal()
3162 						<< "lock_move_rec_list_start():"
3163 						<< i << " not moved in "
3164 						<<  (void*) lock;
3165 				}
3166 			}
3167 		}
3168 #endif /* UNIV_DEBUG */
3169 	}
3170 
3171 	lock_mutex_exit();
3172 
3173 #ifdef UNIV_DEBUG_LOCK_VALIDATE
3174 	ut_ad(lock_rec_validate_page(block));
3175 #endif
3176 }
3177 
3178 /*************************************************************//**
3179 Moves the explicit locks on user records to another page if a record
3180 list start is moved to another page. */
3181 void
lock_rtr_move_rec_list(const buf_block_t * new_block,const buf_block_t * block,rtr_rec_move_t * rec_move,ulint num_move)3182 lock_rtr_move_rec_list(
3183 /*===================*/
3184 	const buf_block_t*	new_block,	/*!< in: index page to
3185 						move to */
3186 	const buf_block_t*	block,		/*!< in: index page */
3187 	rtr_rec_move_t*		rec_move,       /*!< in: recording records
3188 						moved */
3189 	ulint			num_move)       /*!< in: num of rec to move */
3190 {
3191 	lock_t*		lock;
3192 	ulint		comp;
3193 
3194 	if (!num_move) {
3195 		return;
3196 	}
3197 
3198 	comp = page_rec_is_comp(rec_move[0].old_rec);
3199 
3200 	ut_ad(block->frame == page_align(rec_move[0].old_rec));
3201 	ut_ad(new_block->frame == page_align(rec_move[0].new_rec));
3202 	ut_ad(comp == page_rec_is_comp(rec_move[0].new_rec));
3203 
3204 	lock_mutex_enter();
3205 
3206 	for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
3207 	     lock = lock_rec_get_next_on_page(lock)) {
3208 		ulint		moved = 0;
3209 		const rec_t*	rec1;
3210 		const rec_t*	rec2;
3211 		const ulint	type_mode = lock->type_mode;
3212 
3213 		/* Copy lock requests on user records to new page and
3214 		reset the lock bits on the old */
3215 
3216 		while (moved < num_move) {
3217 			ulint	rec1_heap_no;
3218 			ulint	rec2_heap_no;
3219 
3220 			rec1 = rec_move[moved].old_rec;
3221 			rec2 = rec_move[moved].new_rec;
3222 
3223 			if (comp) {
3224 				rec1_heap_no = rec_get_heap_no_new(rec1);
3225 				rec2_heap_no = rec_get_heap_no_new(rec2);
3226 
3227 			} else {
3228 				rec1_heap_no = rec_get_heap_no_old(rec1);
3229 				rec2_heap_no = rec_get_heap_no_old(rec2);
3230 
3231 				ut_ad(!memcmp(rec1, rec2,
3232 					      rec_get_data_size_old(rec2)));
3233 			}
3234 
3235 			if (rec1_heap_no < lock->un_member.rec_lock.n_bits
3236 			    && lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
3237 				if (type_mode & LOCK_WAIT) {
3238 					lock_reset_lock_and_trx_wait(lock);
3239 				}
3240 
3241 				lock_rec_add_to_queue(
3242 					type_mode, new_block, rec2_heap_no,
3243 					lock->index, lock->trx, FALSE);
3244 
3245 				rec_move[moved].moved = true;
3246 			}
3247 
3248 			moved++;
3249 		}
3250 	}
3251 
3252 	lock_mutex_exit();
3253 
3254 #ifdef UNIV_DEBUG_LOCK_VALIDATE
3255 	ut_ad(lock_rec_validate_page(block));
3256 #endif
3257 }
3258 /*************************************************************//**
3259 Updates the lock table when a page is split to the right. */
3260 void
lock_update_split_right(const buf_block_t * right_block,const buf_block_t * left_block)3261 lock_update_split_right(
3262 /*====================*/
3263 	const buf_block_t*	right_block,	/*!< in: right page */
3264 	const buf_block_t*	left_block)	/*!< in: left page */
3265 {
3266 	ulint	heap_no = lock_get_min_heap_no(right_block);
3267 
3268 	lock_mutex_enter();
3269 
3270 	/* Move the locks on the supremum of the left page to the supremum
3271 	of the right page */
3272 
3273 	lock_rec_move(right_block, left_block,
3274 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3275 
3276 	/* Inherit the locks to the supremum of left page from the successor
3277 	of the infimum on right page */
3278 
3279 	lock_rec_inherit_to_gap(left_block, right_block,
3280 				PAGE_HEAP_NO_SUPREMUM, heap_no);
3281 
3282 	lock_mutex_exit();
3283 }
3284 
3285 /*************************************************************//**
3286 Updates the lock table when a page is merged to the right. */
3287 void
lock_update_merge_right(const buf_block_t * right_block,const rec_t * orig_succ,const buf_block_t * left_block)3288 lock_update_merge_right(
3289 /*====================*/
3290 	const buf_block_t*	right_block,	/*!< in: right page to
3291 						which merged */
3292 	const rec_t*		orig_succ,	/*!< in: original
3293 						successor of infimum
3294 						on the right page
3295 						before merge */
3296 	const buf_block_t*	left_block)	/*!< in: merged index
3297 						page which will be
3298 						discarded */
3299 {
3300 	lock_mutex_enter();
3301 
3302 	/* Inherit the locks from the supremum of the left page to the
3303 	original successor of infimum on the right page, to which the left
3304 	page was merged */
3305 
3306 	lock_rec_inherit_to_gap(right_block, left_block,
3307 				page_rec_get_heap_no(orig_succ),
3308 				PAGE_HEAP_NO_SUPREMUM);
3309 
3310 	/* Reset the locks on the supremum of the left page, releasing
3311 	waiting transactions */
3312 
3313 	lock_rec_reset_and_release_wait_low(
3314 		lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
3315 
3316 #ifdef UNIV_DEBUG
3317 	/* there should exist no page lock on the left page,
3318 	otherwise, it will be blocked from merge */
3319 	ulint	space = left_block->page.id.space();
3320 	ulint	page_no = left_block->page.id.page_no();
3321 	ut_ad(lock_rec_get_first_on_page_addr(
3322 			lock_sys->prdt_page_hash, space, page_no) == NULL);
3323 #endif /* UNIV_DEBUG */
3324 
3325 	lock_rec_free_all_from_discard_page(left_block);
3326 
3327 	lock_mutex_exit();
3328 
3329 }
3330 
3331 /*************************************************************//**
3332 Updates the lock table when the root page is copied to another in
3333 btr_root_raise_and_insert. Note that we leave lock structs on the
3334 root page, even though they do not make sense on other than leaf
3335 pages: the reason is that in a pessimistic update the infimum record
3336 of the root page will act as a dummy carrier of the locks of the record
3337 to be updated. */
3338 void
lock_update_root_raise(const buf_block_t * block,const buf_block_t * root)3339 lock_update_root_raise(
3340 /*===================*/
3341 	const buf_block_t*	block,	/*!< in: index page to which copied */
3342 	const buf_block_t*	root)	/*!< in: root page */
3343 {
3344 	lock_mutex_enter();
3345 
3346 	/* Move the locks on the supremum of the root to the supremum
3347 	of block */
3348 
3349 	lock_rec_move(block, root,
3350 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3351 	lock_mutex_exit();
3352 }
3353 
3354 /*************************************************************//**
3355 Updates the lock table when a page is copied to another and the original page
3356 is removed from the chain of leaf pages, except if page is the root! */
3357 void
lock_update_copy_and_discard(const buf_block_t * new_block,const buf_block_t * block)3358 lock_update_copy_and_discard(
3359 /*=========================*/
3360 	const buf_block_t*	new_block,	/*!< in: index page to
3361 						which copied */
3362 	const buf_block_t*	block)		/*!< in: index page;
3363 						NOT the root! */
3364 {
3365 	lock_mutex_enter();
3366 
3367 	/* Move the locks on the supremum of the old page to the supremum
3368 	of new_page */
3369 
3370 	lock_rec_move(new_block, block,
3371 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3372 	lock_rec_free_all_from_discard_page(block);
3373 
3374 	lock_mutex_exit();
3375 }
3376 
3377 /*************************************************************//**
3378 Updates the lock table when a page is split to the left. */
3379 void
lock_update_split_left(const buf_block_t * right_block,const buf_block_t * left_block)3380 lock_update_split_left(
3381 /*===================*/
3382 	const buf_block_t*	right_block,	/*!< in: right page */
3383 	const buf_block_t*	left_block)	/*!< in: left page */
3384 {
3385 	ulint	heap_no = lock_get_min_heap_no(right_block);
3386 
3387 	lock_mutex_enter();
3388 
3389 	/* Inherit the locks to the supremum of the left page from the
3390 	successor of the infimum on the right page */
3391 
3392 	lock_rec_inherit_to_gap(left_block, right_block,
3393 				PAGE_HEAP_NO_SUPREMUM, heap_no);
3394 
3395 	lock_mutex_exit();
3396 }
3397 
3398 /*************************************************************//**
3399 Updates the lock table when a page is merged to the left. */
3400 void
lock_update_merge_left(const buf_block_t * left_block,const rec_t * orig_pred,const buf_block_t * right_block)3401 lock_update_merge_left(
3402 /*===================*/
3403 	const buf_block_t*	left_block,	/*!< in: left page to
3404 						which merged */
3405 	const rec_t*		orig_pred,	/*!< in: original predecessor
3406 						of supremum on the left page
3407 						before merge */
3408 	const buf_block_t*	right_block)	/*!< in: merged index page
3409 						which will be discarded */
3410 {
3411 	const rec_t*	left_next_rec;
3412 
3413 	ut_ad(left_block->frame == page_align(orig_pred));
3414 
3415 	lock_mutex_enter();
3416 
3417 	left_next_rec = page_rec_get_next_const(orig_pred);
3418 
3419 	if (!page_rec_is_supremum(left_next_rec)) {
3420 
3421 		/* Inherit the locks on the supremum of the left page to the
3422 		first record which was moved from the right page */
3423 
3424 		lock_rec_inherit_to_gap(left_block, left_block,
3425 					page_rec_get_heap_no(left_next_rec),
3426 					PAGE_HEAP_NO_SUPREMUM);
3427 
3428 		/* Reset the locks on the supremum of the left page,
3429 		releasing waiting transactions */
3430 
3431 		lock_rec_reset_and_release_wait_low(
3432 			lock_sys->rec_hash, left_block, PAGE_HEAP_NO_SUPREMUM);
3433 	}
3434 
3435 	/* Move the locks from the supremum of right page to the supremum
3436 	of the left page */
3437 
3438 	lock_rec_move(left_block, right_block,
3439 		      PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
3440 
3441 #ifdef UNIV_DEBUG
3442 	/* there should exist no page lock on the right page,
3443 	otherwise, it will be blocked from merge */
3444 	ulint	space = right_block->page.id.space();
3445 	ulint	page_no = right_block->page.id.page_no();
3446 	lock_t*	lock_test = lock_rec_get_first_on_page_addr(
3447 		lock_sys->prdt_page_hash, space, page_no);
3448 	ut_ad(!lock_test);
3449 #endif /* UNIV_DEBUG */
3450 
3451 	lock_rec_free_all_from_discard_page(right_block);
3452 
3453 	lock_mutex_exit();
3454 }
3455 
3456 /*************************************************************//**
3457 Resets the original locks on heir and replaces them with gap type locks
3458 inherited from rec. */
3459 void
lock_rec_reset_and_inherit_gap_locks(const buf_block_t * heir_block,const buf_block_t * block,ulint heir_heap_no,ulint heap_no)3460 lock_rec_reset_and_inherit_gap_locks(
3461 /*=================================*/
3462 	const buf_block_t*	heir_block,	/*!< in: block containing the
3463 						record which inherits */
3464 	const buf_block_t*	block,		/*!< in: block containing the
3465 						record from which inherited;
3466 						does NOT reset the locks on
3467 						this record */
3468 	ulint			heir_heap_no,	/*!< in: heap_no of the
3469 						inheriting record */
3470 	ulint			heap_no)	/*!< in: heap_no of the
3471 						donating record */
3472 {
3473 	lock_mutex_enter();
3474 
3475 	lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
3476 
3477 	lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
3478 
3479 	lock_mutex_exit();
3480 }
3481 
3482 /*************************************************************//**
3483 Updates the lock table when a page is discarded. */
3484 void
lock_update_discard(const buf_block_t * heir_block,ulint heir_heap_no,const buf_block_t * block)3485 lock_update_discard(
3486 /*================*/
3487 	const buf_block_t*	heir_block,	/*!< in: index page
3488 						which will inherit the locks */
3489 	ulint			heir_heap_no,	/*!< in: heap_no of the record
3490 						which will inherit the locks */
3491 	const buf_block_t*	block)		/*!< in: index page
3492 						which will be discarded */
3493 {
3494 	const rec_t*	rec;
3495 	ulint		heap_no;
3496 	const page_t*	page = block->frame;
3497 
3498 	lock_mutex_enter();
3499 
3500 	if (!lock_rec_get_first_on_page(lock_sys->rec_hash, block)
3501 	    && (!lock_rec_get_first_on_page(lock_sys->prdt_hash, block))) {
3502 		/* No locks exist on page, nothing to do */
3503 
3504 		lock_mutex_exit();
3505 
3506 		return;
3507 	}
3508 
3509 	/* Inherit all the locks on the page to the record and reset all
3510 	the locks on the page */
3511 
3512 	if (page_is_comp(page)) {
3513 		rec = page + PAGE_NEW_INFIMUM;
3514 
3515 		do {
3516 			heap_no = rec_get_heap_no_new(rec);
3517 
3518 			lock_rec_inherit_to_gap(heir_block, block,
3519 						heir_heap_no, heap_no);
3520 
3521 			lock_rec_reset_and_release_wait(block, heap_no);
3522 
3523 			rec = page + rec_get_next_offs(rec, TRUE);
3524 		} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3525 	} else {
3526 		rec = page + PAGE_OLD_INFIMUM;
3527 
3528 		do {
3529 			heap_no = rec_get_heap_no_old(rec);
3530 
3531 			lock_rec_inherit_to_gap(heir_block, block,
3532 						heir_heap_no, heap_no);
3533 
3534 			lock_rec_reset_and_release_wait(block, heap_no);
3535 
3536 			rec = page + rec_get_next_offs(rec, FALSE);
3537 		} while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3538 	}
3539 
3540 	lock_rec_free_all_from_discard_page(block);
3541 
3542 	lock_mutex_exit();
3543 }
3544 
3545 /*************************************************************//**
3546 Updates the lock table when a new user record is inserted. */
3547 void
lock_update_insert(const buf_block_t * block,const rec_t * rec)3548 lock_update_insert(
3549 /*===============*/
3550 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
3551 	const rec_t*		rec)	/*!< in: the inserted record */
3552 {
3553 	ulint	receiver_heap_no;
3554 	ulint	donator_heap_no;
3555 
3556 	ut_ad(block->frame == page_align(rec));
3557 
3558 	/* Inherit the gap-locking locks for rec, in gap mode, from the next
3559 	record */
3560 
3561 	if (page_rec_is_comp(rec)) {
3562 		receiver_heap_no = rec_get_heap_no_new(rec);
3563 		donator_heap_no = rec_get_heap_no_new(
3564 			page_rec_get_next_low(rec, TRUE));
3565 	} else {
3566 		receiver_heap_no = rec_get_heap_no_old(rec);
3567 		donator_heap_no = rec_get_heap_no_old(
3568 			page_rec_get_next_low(rec, FALSE));
3569 	}
3570 
3571 	lock_rec_inherit_to_gap_if_gap_lock(
3572 		block, receiver_heap_no, donator_heap_no);
3573 }
3574 
3575 /*************************************************************//**
3576 Updates the lock table when a record is removed. */
3577 void
lock_update_delete(const buf_block_t * block,const rec_t * rec)3578 lock_update_delete(
3579 /*===============*/
3580 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
3581 	const rec_t*		rec)	/*!< in: the record to be removed */
3582 {
3583 	const page_t*	page = block->frame;
3584 	ulint		heap_no;
3585 	ulint		next_heap_no;
3586 
3587 	ut_ad(page == page_align(rec));
3588 
3589 	if (page_is_comp(page)) {
3590 		heap_no = rec_get_heap_no_new(rec);
3591 		next_heap_no = rec_get_heap_no_new(page
3592 						   + rec_get_next_offs(rec,
3593 								       TRUE));
3594 	} else {
3595 		heap_no = rec_get_heap_no_old(rec);
3596 		next_heap_no = rec_get_heap_no_old(page
3597 						   + rec_get_next_offs(rec,
3598 								       FALSE));
3599 	}
3600 
3601 	lock_mutex_enter();
3602 
3603 	/* Let the next record inherit the locks from rec, in gap mode */
3604 
3605 	lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
3606 
3607 	/* Reset the lock bits on rec and release waiting transactions */
3608 
3609 	lock_rec_reset_and_release_wait(block, heap_no);
3610 
3611 	lock_mutex_exit();
3612 }
3613 
3614 /*********************************************************************//**
3615 Stores on the page infimum record the explicit locks of another record.
3616 This function is used to store the lock state of a record when it is
3617 updated and the size of the record changes in the update. The record
3618 is moved in such an update, perhaps to another page. The infimum record
3619 acts as a dummy carrier record, taking care of lock releases while the
3620 actual record is being moved. */
3621 void
lock_rec_store_on_page_infimum(const buf_block_t * block,const rec_t * rec)3622 lock_rec_store_on_page_infimum(
3623 /*===========================*/
3624 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
3625 	const rec_t*		rec)	/*!< in: record whose lock state
3626 					is stored on the infimum
3627 					record of the same page; lock
3628 					bits are reset on the
3629 					record */
3630 {
3631 	ulint	heap_no = page_rec_get_heap_no(rec);
3632 
3633 	ut_ad(block->frame == page_align(rec));
3634 
3635 	lock_mutex_enter();
3636 
3637 	lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
3638 
3639 	lock_mutex_exit();
3640 }
3641 
3642 /*********************************************************************//**
3643 Restores the state of explicit lock requests on a single record, where the
3644 state was stored on the infimum of the page. */
3645 void
lock_rec_restore_from_page_infimum(const buf_block_t * block,const rec_t * rec,const buf_block_t * donator)3646 lock_rec_restore_from_page_infimum(
3647 /*===============================*/
3648 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
3649 	const rec_t*		rec,	/*!< in: record whose lock state
3650 					is restored */
3651 	const buf_block_t*	donator)/*!< in: page (rec is not
3652 					necessarily on this page)
3653 					whose infimum stored the lock
3654 					state; lock bits are reset on
3655 					the infimum */
3656 {
3657 	ulint	heap_no = page_rec_get_heap_no(rec);
3658 
3659 	lock_mutex_enter();
3660 
3661 	lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
3662 
3663 	lock_mutex_exit();
3664 }
3665 
3666 /*========================= TABLE LOCKS ==============================*/
3667 
3668 /** Functor for accessing the embedded node within a table lock. */
3669 struct TableLockGetNode {
operator ()TableLockGetNode3670 	ut_list_node<lock_t>& operator() (lock_t& elem)
3671 	{
3672 		return(elem.un_member.tab_lock.locks);
3673 	}
3674 };
3675 
3676 /*********************************************************************//**
3677 Creates a table lock object and adds it as the last in the lock queue
3678 of the table. Does NOT check for deadlocks or lock compatibility.
3679 @return own: new lock object */
3680 UNIV_INLINE
3681 lock_t*
lock_table_create(dict_table_t * table,ulint type_mode,trx_t * trx)3682 lock_table_create(
3683 /*==============*/
3684 	dict_table_t*	table,	/*!< in/out: database table
3685 				in dictionary cache */
3686 	ulint		type_mode,/*!< in: lock mode possibly ORed with
3687 				LOCK_WAIT */
3688 	trx_t*		trx)	/*!< in: trx */
3689 {
3690 	lock_t*		lock;
3691 
3692 	ut_ad(table && trx);
3693 	ut_ad(lock_mutex_own());
3694 	ut_ad(trx_mutex_own(trx));
3695 
3696 	check_trx_state(trx);
3697 
3698 	if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) {
3699 		++table->n_waiting_or_granted_auto_inc_locks;
3700 	}
3701 
3702 	/* For AUTOINC locking we reuse the lock instance only if
3703 	there is no wait involved else we allocate the waiting lock
3704 	from the transaction lock heap. */
3705 	if (type_mode == LOCK_AUTO_INC) {
3706 
3707 		lock = table->autoinc_lock;
3708 
3709 		table->autoinc_trx = trx;
3710 
3711 		ib_vector_push(trx->autoinc_locks, &lock);
3712 
3713 	} else if (trx->lock.table_cached < trx->lock.table_pool.size()) {
3714 		lock = trx->lock.table_pool[trx->lock.table_cached++];
3715 	} else {
3716 
3717 		lock = static_cast<lock_t*>(
3718 			mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock)));
3719 
3720 	}
3721 
3722 	lock->type_mode = ib_uint32_t(type_mode | LOCK_TABLE);
3723 	lock->trx = trx;
3724 
3725 	lock->un_member.tab_lock.table = table;
3726 
3727 	ut_ad(table->n_ref_count > 0 || !table->can_be_evicted);
3728 
3729 	UT_LIST_ADD_LAST(trx->lock.trx_locks, lock);
3730 
3731 	ut_list_append(table->locks, lock, TableLockGetNode());
3732 
3733 	if (type_mode & LOCK_WAIT) {
3734 
3735 		lock_set_lock_and_trx_wait(lock, trx);
3736 	}
3737 
3738 	lock->trx->lock.table_locks.push_back(lock);
3739 
3740 	MONITOR_INC(MONITOR_TABLELOCK_CREATED);
3741 	MONITOR_INC(MONITOR_NUM_TABLELOCK);
3742 
3743 	return(lock);
3744 }
3745 
3746 /*************************************************************//**
3747 Pops autoinc lock requests from the transaction's autoinc_locks. We
3748 handle the case where there are gaps in the array and they need to
3749 be popped off the stack. */
3750 UNIV_INLINE
3751 void
lock_table_pop_autoinc_locks(trx_t * trx)3752 lock_table_pop_autoinc_locks(
3753 /*=========================*/
3754 	trx_t*	trx)	/*!< in/out: transaction that owns the AUTOINC locks */
3755 {
3756 	ut_ad(lock_mutex_own());
3757 	ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
3758 
3759 	/* Skip any gaps, gaps are NULL lock entries in the
3760 	trx->autoinc_locks vector. */
3761 
3762 	do {
3763 		ib_vector_pop(trx->autoinc_locks);
3764 
3765 		if (ib_vector_is_empty(trx->autoinc_locks)) {
3766 			return;
3767 		}
3768 
3769 	} while (*(lock_t**) ib_vector_get_last(trx->autoinc_locks) == NULL);
3770 }
3771 
3772 /*************************************************************//**
3773 Removes an autoinc lock request from the transaction's autoinc_locks. */
3774 UNIV_INLINE
3775 void
lock_table_remove_autoinc_lock(lock_t * lock,trx_t * trx)3776 lock_table_remove_autoinc_lock(
3777 /*===========================*/
3778 	lock_t*	lock,	/*!< in: table lock */
3779 	trx_t*	trx)	/*!< in/out: transaction that owns the lock */
3780 {
3781 	lock_t*	autoinc_lock;
3782 	lint	i = ib_vector_size(trx->autoinc_locks) - 1;
3783 
3784 	ut_ad(lock_mutex_own());
3785 	ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC);
3786 	ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
3787 	ut_ad(!ib_vector_is_empty(trx->autoinc_locks));
3788 
3789 	/* With stored functions and procedures the user may drop
3790 	a table within the same "statement". This special case has
3791 	to be handled by deleting only those AUTOINC locks that were
3792 	held by the table being dropped. */
3793 
3794 	autoinc_lock = *static_cast<lock_t**>(
3795 		ib_vector_get(trx->autoinc_locks, i));
3796 
3797 	/* This is the default fast case. */
3798 
3799 	if (autoinc_lock == lock) {
3800 		lock_table_pop_autoinc_locks(trx);
3801 	} else {
3802 		/* The last element should never be NULL */
3803 		ut_a(autoinc_lock != NULL);
3804 
3805 		/* Handle freeing the locks from within the stack. */
3806 
3807 		while (--i >= 0) {
3808 			autoinc_lock = *static_cast<lock_t**>(
3809 				ib_vector_get(trx->autoinc_locks, i));
3810 
3811 			if (autoinc_lock == lock) {
3812 				void*	null_var = NULL;
3813 				ib_vector_set(trx->autoinc_locks, i, &null_var);
3814 				return;
3815 			}
3816 		}
3817 
3818 		/* Must find the autoinc lock. */
3819 		ut_error;
3820 	}
3821 }
3822 
3823 /*************************************************************//**
3824 Removes a table lock request from the queue and the trx list of locks;
3825 this is a low-level function which does NOT check if waiting requests
3826 can now be granted. */
3827 UNIV_INLINE
3828 void
lock_table_remove_low(lock_t * lock)3829 lock_table_remove_low(
3830 /*==================*/
3831 	lock_t*	lock)	/*!< in/out: table lock */
3832 {
3833 	trx_t*		trx;
3834 	dict_table_t*	table;
3835 
3836 	ut_ad(lock_mutex_own());
3837 
3838 	trx = lock->trx;
3839 	table = lock->un_member.tab_lock.table;
3840 
3841 	/* Remove the table from the transaction's AUTOINC vector, if
3842 	the lock that is being released is an AUTOINC lock. */
3843 	if (lock_get_mode(lock) == LOCK_AUTO_INC) {
3844 
3845 		/* The table's AUTOINC lock can get transferred to
3846 		another transaction before we get here. */
3847 		if (table->autoinc_trx == trx) {
3848 			table->autoinc_trx = NULL;
3849 		}
3850 
3851 		/* The locks must be freed in the reverse order from
3852 		the one in which they were acquired. This is to avoid
3853 		traversing the AUTOINC lock vector unnecessarily.
3854 
3855 		We only store locks that were granted in the
3856 		trx->autoinc_locks vector (see lock_table_create()
3857 		and lock_grant()). Therefore it can be empty and we
3858 		need to check for that. */
3859 
3860 		if (!lock_get_wait(lock)
3861 		    && !ib_vector_is_empty(trx->autoinc_locks)) {
3862 
3863 			lock_table_remove_autoinc_lock(lock, trx);
3864 		}
3865 
3866 		ut_a(table->n_waiting_or_granted_auto_inc_locks > 0);
3867 		table->n_waiting_or_granted_auto_inc_locks--;
3868 	}
3869 
3870 	UT_LIST_REMOVE(trx->lock.trx_locks, lock);
3871 	ut_list_remove(table->locks, lock, TableLockGetNode());
3872 
3873 	MONITOR_INC(MONITOR_TABLELOCK_REMOVED);
3874 	MONITOR_DEC(MONITOR_NUM_TABLELOCK);
3875 }
3876 
3877 /*********************************************************************//**
3878 Enqueues a waiting request for a table lock which cannot be granted
3879 immediately. Checks for deadlocks.
3880 @return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
3881 DB_SUCCESS; DB_SUCCESS means that there was a deadlock, but another
3882 transaction was chosen as a victim, and we got the lock immediately:
3883 no need to wait then */
3884 static
3885 dberr_t
lock_table_enqueue_waiting(ulint mode,dict_table_t * table,que_thr_t * thr)3886 lock_table_enqueue_waiting(
3887 /*=======================*/
3888 	ulint		mode,	/*!< in: lock mode this transaction is
3889 				requesting */
3890 	dict_table_t*	table,	/*!< in/out: table */
3891 	que_thr_t*	thr)	/*!< in: query thread */
3892 {
3893 	trx_t*		trx;
3894 	lock_t*		lock;
3895 
3896 	ut_ad(lock_mutex_own());
3897 	ut_ad(!srv_read_only_mode);
3898 
3899 	trx = thr_get_trx(thr);
3900 	ut_ad(trx_mutex_own(trx));
3901 
3902 	/* Test if there already is some other reason to suspend thread:
3903 	we do not enqueue a lock request if the query thread should be
3904 	stopped anyway */
3905 
3906 	if (que_thr_stop(thr)) {
3907 		ut_error;
3908 
3909 		return(DB_QUE_THR_SUSPENDED);
3910 	}
3911 
3912 	switch (trx_get_dict_operation(trx)) {
3913 	case TRX_DICT_OP_NONE:
3914 		break;
3915 	case TRX_DICT_OP_TABLE:
3916 	case TRX_DICT_OP_INDEX:
3917 		ib::error() << "A table lock wait happens in a dictionary"
3918 			" operation. Table " << table->name
3919 			<< ". " << BUG_REPORT_MSG;
3920 		ut_ad(0);
3921 	}
3922 
3923 	/* Enqueue the lock request that will wait to be granted */
3924 	lock = lock_table_create(table, mode | LOCK_WAIT, trx);
3925 
3926 	const trx_t*	victim_trx =
3927 			DeadlockChecker::check_and_resolve(lock, trx);
3928 
3929 	if (victim_trx != 0) {
3930 		ut_ad(victim_trx == trx);
3931 
3932 		/* The order here is important, we don't want to
3933 		lose the state of the lock before calling remove. */
3934 		lock_table_remove_low(lock);
3935 		lock_reset_lock_and_trx_wait(lock);
3936 
3937 		return(DB_DEADLOCK);
3938 
3939 	} else if (trx->lock.wait_lock == NULL) {
3940 		/* Deadlock resolution chose another transaction as a victim,
3941 		and we accidentally got our lock granted! */
3942 
3943 		return(DB_SUCCESS);
3944 	}
3945 
3946 	trx->lock.que_state = TRX_QUE_LOCK_WAIT;
3947 
3948 	trx->lock.wait_started = ut_time();
3949 	trx->lock.was_chosen_as_deadlock_victim = false;
3950 
3951 	trx->stats.start_lock_wait();
3952 
3953 	ut_a(que_thr_stop(thr));
3954 
3955 	MONITOR_INC(MONITOR_TABLELOCK_WAIT);
3956 
3957 	return(DB_LOCK_WAIT);
3958 }
3959 
3960 /*********************************************************************//**
3961 Checks if other transactions have an incompatible mode lock request in
3962 the lock queue.
3963 @return lock or NULL */
3964 UNIV_INLINE
3965 const lock_t*
lock_table_other_has_incompatible(const trx_t * trx,ulint wait,const dict_table_t * table,lock_mode mode)3966 lock_table_other_has_incompatible(
3967 /*==============================*/
3968 	const trx_t*		trx,	/*!< in: transaction, or NULL if all
3969 					transactions should be included */
3970 	ulint			wait,	/*!< in: LOCK_WAIT if also
3971 					waiting locks are taken into
3972 					account, or 0 if not */
3973 	const dict_table_t*	table,	/*!< in: table */
3974 	lock_mode		mode)	/*!< in: lock mode */
3975 {
3976 	const lock_t*	lock;
3977 
3978 	ut_ad(lock_mutex_own());
3979 
3980 	for (lock = UT_LIST_GET_LAST(table->locks);
3981 	     lock != NULL;
3982 	     lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock)) {
3983 
3984 		if (lock->trx != trx
3985 		    && !lock_mode_compatible(lock_get_mode(lock), mode)
3986 		    && (wait || !lock_get_wait(lock))) {
3987 
3988 			return(lock);
3989 		}
3990 	}
3991 
3992 	return(NULL);
3993 }
3994 
3995 /*********************************************************************//**
3996 Locks the specified database table in the mode given. If the lock cannot
3997 be granted immediately, the query thread is put to wait.
3998 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
3999 dberr_t
lock_table(ulint flags,dict_table_t * table,lock_mode mode,que_thr_t * thr)4000 lock_table(
4001 /*=======*/
4002 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is set,
4003 				does nothing */
4004 	dict_table_t*	table,	/*!< in/out: database table
4005 				in dictionary cache */
4006 	lock_mode	mode,	/*!< in: lock mode */
4007 	que_thr_t*	thr)	/*!< in: query thread */
4008 {
4009 	trx_t*		trx;
4010 	dberr_t		err;
4011 	const lock_t*	wait_for;
4012 
4013 	ut_ad(table && thr);
4014 
4015 	/* Given limited visibility of temp-table we can avoid
4016 	locking overhead */
4017 	if ((flags & BTR_NO_LOCKING_FLAG)
4018 	    || srv_read_only_mode
4019 	    || dict_table_is_temporary(table)) {
4020 
4021 		return(DB_SUCCESS);
4022 	}
4023 
4024 	ut_a(flags == 0);
4025 
4026 	trx = thr_get_trx(thr);
4027 
4028 	/* Look for equal or stronger locks the same trx already
4029 	has on the table. No need to acquire the lock mutex here
4030 	because only this transacton can add/access table locks
4031 	to/from trx_t::table_locks. */
4032 
4033 	if (lock_table_has(trx, table, mode)) {
4034 
4035 		return(DB_SUCCESS);
4036 	}
4037 
4038 	/* Read only transactions can write to temp tables, we don't want
4039 	to promote them to RW transactions. Their updates cannot be visible
4040 	to other transactions. Therefore we can keep them out
4041 	of the read views. */
4042 
4043 	if ((mode == LOCK_IX || mode == LOCK_X)
4044 	    && !trx->read_only
4045 	    && trx->rsegs.m_redo.rseg == 0) {
4046 
4047 		trx_set_rw_mode(trx);
4048 	}
4049 
4050 	lock_mutex_enter();
4051 
4052 	/* We have to check if the new lock is compatible with any locks
4053 	other transactions have in the table lock queue. */
4054 
4055 	wait_for = lock_table_other_has_incompatible(
4056 		trx, LOCK_WAIT, table, mode);
4057 
4058 	trx_mutex_enter(trx);
4059 
4060 	/* Another trx has a request on the table in an incompatible
4061 	mode: this trx may have to wait */
4062 
4063 	if (wait_for != NULL) {
4064 		err = lock_table_enqueue_waiting(mode | flags, table, thr);
4065 	} else {
4066 		lock_table_create(table, mode | flags, trx);
4067 
4068 		ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
4069 
4070 		err = DB_SUCCESS;
4071 	}
4072 
4073 	lock_mutex_exit();
4074 
4075 	trx_mutex_exit(trx);
4076 
4077 	return(err);
4078 }
4079 
4080 /*********************************************************************//**
4081 Creates a table IX lock object for a resurrected transaction. */
4082 void
lock_table_ix_resurrect(dict_table_t * table,trx_t * trx)4083 lock_table_ix_resurrect(
4084 /*====================*/
4085 	dict_table_t*	table,	/*!< in/out: table */
4086 	trx_t*		trx)	/*!< in/out: transaction */
4087 {
4088 	ut_ad(trx->is_recovered);
4089 
4090 	if (lock_table_has(trx, table, LOCK_IX)) {
4091 		return;
4092 	}
4093 
4094 	lock_mutex_enter();
4095 
4096 	/* We have to check if the new lock is compatible with any locks
4097 	other transactions have in the table lock queue. */
4098 
4099 	ut_ad(!lock_table_other_has_incompatible(
4100 		      trx, LOCK_WAIT, table, LOCK_IX));
4101 
4102 	trx_mutex_enter(trx);
4103 	lock_table_create(table, LOCK_IX, trx);
4104 	lock_mutex_exit();
4105 	trx_mutex_exit(trx);
4106 }
4107 
4108 /*********************************************************************//**
4109 Checks if a waiting table lock request still has to wait in a queue.
4110 @return TRUE if still has to wait */
4111 static
4112 bool
lock_table_has_to_wait_in_queue(const lock_t * wait_lock)4113 lock_table_has_to_wait_in_queue(
4114 /*============================*/
4115 	const lock_t*	wait_lock)	/*!< in: waiting table lock */
4116 {
4117 	const dict_table_t*	table;
4118 	const lock_t*		lock;
4119 
4120 	ut_ad(lock_mutex_own());
4121 	ut_ad(lock_get_wait(wait_lock));
4122 
4123 	table = wait_lock->un_member.tab_lock.table;
4124 
4125 	for (lock = UT_LIST_GET_FIRST(table->locks);
4126 	     lock != wait_lock;
4127 	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
4128 
4129 		if (lock_has_to_wait(wait_lock, lock)) {
4130 
4131 			return(true);
4132 		}
4133 	}
4134 
4135 	return(false);
4136 }
4137 
4138 /*************************************************************//**
4139 Removes a table lock request, waiting or granted, from the queue and grants
4140 locks to other transactions in the queue, if they now are entitled to a
4141 lock. */
4142 static
4143 void
lock_table_dequeue(lock_t * in_lock)4144 lock_table_dequeue(
4145 /*===============*/
4146 	lock_t*	in_lock)/*!< in/out: table lock object; transactions waiting
4147 			behind will get their lock requests granted, if
4148 			they are now qualified to it */
4149 {
4150 	ut_ad(lock_mutex_own());
4151 	ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
4152 
4153 	lock_t*	lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock);
4154 
4155 	lock_table_remove_low(in_lock);
4156 
4157 	/* Check if waiting locks in the queue can now be granted: grant
4158 	locks if there are no conflicting locks ahead. */
4159 
4160 	for (/* No op */;
4161 	     lock != NULL;
4162 	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
4163 
4164 		if (lock_get_wait(lock)
4165 		    && !lock_table_has_to_wait_in_queue(lock)) {
4166 
4167 			/* Grant the lock */
4168 			ut_ad(in_lock->trx != lock->trx);
4169 			lock_grant(lock);
4170 		}
4171 	}
4172 }
4173 
4174 /** Sets a lock on a table based on the given mode.
4175 @param[in]	table	table to lock
4176 @param[in,out]	trx	transaction
4177 @param[in]	mode	LOCK_X or LOCK_S
4178 @return error code or DB_SUCCESS. */
4179 dberr_t
lock_table_for_trx(dict_table_t * table,trx_t * trx,enum lock_mode mode)4180 lock_table_for_trx(
4181 	dict_table_t*	table,
4182 	trx_t*		trx,
4183 	enum lock_mode	mode)
4184 {
4185 	mem_heap_t*	heap;
4186 	que_thr_t*	thr;
4187 	dberr_t		err;
4188 	sel_node_t*	node;
4189 	heap = mem_heap_create(512);
4190 
4191 	node = sel_node_create(heap);
4192 	thr = pars_complete_graph_for_exec(node, trx, heap, NULL);
4193 	thr->graph->state = QUE_FORK_ACTIVE;
4194 
4195 	/* We use the select query graph as the dummy graph needed
4196 	in the lock module call */
4197 
4198 	thr = static_cast<que_thr_t*>(
4199 		que_fork_get_first_thr(
4200 			static_cast<que_fork_t*>(que_node_get_parent(thr))));
4201 
4202 	que_thr_move_to_run_state_for_mysql(thr, trx);
4203 
4204 run_again:
4205 	thr->run_node = thr;
4206 	thr->prev_node = thr->common.parent;
4207 
4208 	err = lock_table(0, table, mode, thr);
4209 
4210 	trx->error_state = err;
4211 
4212 	if (UNIV_LIKELY(err == DB_SUCCESS)) {
4213 		que_thr_stop_for_mysql_no_error(thr, trx);
4214 	} else {
4215 		que_thr_stop_for_mysql(thr);
4216 
4217 		if (err != DB_QUE_THR_SUSPENDED) {
4218 			bool	was_lock_wait;
4219 
4220 			was_lock_wait = row_mysql_handle_errors(
4221 				&err, trx, thr, NULL);
4222 
4223 			if (was_lock_wait) {
4224 				goto run_again;
4225 			}
4226 		} else {
4227 			que_thr_t*	run_thr;
4228 			que_node_t*	parent;
4229 
4230 			parent = que_node_get_parent(thr);
4231 
4232 			run_thr = que_fork_start_command(
4233 				static_cast<que_fork_t*>(parent));
4234 
4235 			ut_a(run_thr == thr);
4236 
4237 			/* There was a lock wait but the thread was not
4238 			in a ready to run or running state. */
4239 			trx->error_state = DB_LOCK_WAIT;
4240 
4241 			goto run_again;
4242 
4243 		}
4244 	}
4245 
4246 	que_graph_free(thr->graph);
4247 	trx->op_info = "";
4248 
4249 	return(err);
4250 }
4251 
4252 /*=========================== LOCK RELEASE ==============================*/
4253 
4254 /*************************************************************//**
4255 Removes a granted record lock of a transaction from the queue and grants
4256 locks to other transactions waiting in the queue if they now are entitled
4257 to a lock. */
4258 void
lock_rec_unlock(trx_t * trx,const buf_block_t * block,const rec_t * rec,lock_mode lock_mode)4259 lock_rec_unlock(
4260 /*============*/
4261 	trx_t*			trx,	/*!< in/out: transaction that has
4262 					set a record lock */
4263 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
4264 	const rec_t*		rec,	/*!< in: record */
4265 	lock_mode		lock_mode)/*!< in: LOCK_S or LOCK_X */
4266 {
4267 	lock_t*		first_lock;
4268 	lock_t*		lock;
4269 	ulint		heap_no;
4270 	const char*	stmt;
4271 	size_t		stmt_len;
4272 
4273 	ut_ad(trx);
4274 	ut_ad(rec);
4275 	ut_ad(block->frame == page_align(rec));
4276 	ut_ad(!trx->lock.wait_lock);
4277 	ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
4278 
4279 	heap_no = page_rec_get_heap_no(rec);
4280 
4281 	lock_mutex_enter();
4282 	trx_mutex_enter(trx);
4283 
4284 	first_lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
4285 
4286 	/* Find the last lock with the same lock_mode and transaction
4287 	on the record. */
4288 
4289 	for (lock = first_lock; lock != NULL;
4290 	     lock = lock_rec_get_next(heap_no, lock)) {
4291 		if (lock->trx == trx && lock_get_mode(lock) == lock_mode) {
4292 			goto released;
4293 		}
4294 	}
4295 
4296 	lock_mutex_exit();
4297 	trx_mutex_exit(trx);
4298 
4299 	stmt = innobase_get_stmt_unsafe(trx->mysql_thd, &stmt_len);
4300 
4301 	{
4302 		ib::error	err;
4303 		err << "Unlock row could not find a " << lock_mode
4304 			<< " mode lock on the record. Current statement: ";
4305 		err.write(stmt, stmt_len);
4306 	}
4307 
4308 	return;
4309 
4310 released:
4311 	ut_a(!lock_get_wait(lock));
4312 	lock_rec_reset_nth_bit(lock, heap_no);
4313 
4314 	/* Check if we can now grant waiting lock requests */
4315 
4316 	for (lock = first_lock; lock != NULL;
4317 	     lock = lock_rec_get_next(heap_no, lock)) {
4318 		if (lock_get_wait(lock)
4319 		    && !lock_rec_has_to_wait_in_queue(lock)) {
4320 
4321 			/* Grant the lock */
4322 			ut_ad(trx != lock->trx);
4323 			lock_grant(lock);
4324 		}
4325 	}
4326 
4327 	lock_mutex_exit();
4328 	trx_mutex_exit(trx);
4329 }
4330 
4331 #ifdef UNIV_DEBUG
4332 /*********************************************************************//**
4333 Check if a transaction that has X or IX locks has set the dict_op
4334 code correctly. */
4335 static
4336 void
lock_check_dict_lock(const lock_t * lock)4337 lock_check_dict_lock(
4338 /*==================*/
4339 	const lock_t*	lock)	/*!< in: lock to check */
4340 {
4341 	if (lock_get_type_low(lock) == LOCK_REC) {
4342 
4343 		/* Check if the transcation locked a record
4344 		in a system table in X mode. It should have set
4345 		the dict_op code correctly if it did. */
4346 		if (lock->index->table->id < DICT_HDR_FIRST_ID
4347 		    && lock_get_mode(lock) == LOCK_X) {
4348 
4349 			ut_ad(lock_get_mode(lock) != LOCK_IX);
4350 			ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
4351 		}
4352 	} else {
4353 		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
4354 
4355 		const dict_table_t*	table;
4356 
4357 		table = lock->un_member.tab_lock.table;
4358 
4359 		/* Check if the transcation locked a system table
4360 		in IX mode. It should have set the dict_op code
4361 		correctly if it did. */
4362 		if (table->id < DICT_HDR_FIRST_ID
4363 		    && (lock_get_mode(lock) == LOCK_X
4364 			|| lock_get_mode(lock) == LOCK_IX)) {
4365 
4366 			ut_ad(lock->trx->dict_operation != TRX_DICT_OP_NONE);
4367 		}
4368 	}
4369 }
4370 #endif /* UNIV_DEBUG */
4371 
4372 /** Remove GAP lock from a next key record lock
4373 @param[in,out]	lock	lock object */
4374 static
4375 void
lock_remove_gap_lock(lock_t * lock)4376 lock_remove_gap_lock(lock_t* lock)
4377 {
4378 	/* Remove lock on supremum */
4379 	lock_rec_reset_nth_bit(lock, PAGE_HEAP_NO_SUPREMUM);
4380 
4381 	/* Remove GAP lock for other records */
4382 	lock->remove_gap_lock();
4383 }
4384 
4385 /** Release read locks of a transacion. It is called during XA
4386 prepare to release locks early.
4387 @param[in,out]	trx		transaction
4388 @param[in]	only_gap	release only GAP locks */
4389 void
lock_trx_release_read_locks(trx_t * trx,bool only_gap)4390 lock_trx_release_read_locks(
4391 	trx_t*		trx,
4392 	bool		only_gap)
4393 {
4394 	lock_t*		lock;
4395 	lock_t*		next_lock;
4396 	ulint		count = 0;
4397 
4398 	/* Avoid taking lock_sys if trx didn't acquire any lock */
4399 	if (UT_LIST_GET_LEN(trx->lock.trx_locks) == 0) {
4400 
4401 		return;
4402 	}
4403 
4404 	lock_mutex_enter();
4405 
4406 	lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
4407 
4408 	while (lock != NULL) {
4409 
4410 		next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
4411 
4412 		/* Check only for record lock */
4413 		if (!lock->is_record_lock()
4414 		    || lock->is_insert_intention()
4415 		    || lock->is_predicate()) {
4416 
4417 			lock = next_lock;
4418 			continue;
4419 		}
4420 
4421 		/* Release any GAP only lock. */
4422 		if (lock->is_gap()) {
4423 
4424 			lock_rec_dequeue_from_page(lock);
4425 			lock = next_lock;
4426 			continue;
4427 		}
4428 
4429 		/* Don't release any non-GAP lock if not asked. */
4430 		if (lock->is_record_not_gap() && only_gap) {
4431 
4432 			lock = next_lock;
4433 			continue;
4434 		}
4435 
4436 		/* Release Shared Next Key Lock(SH + GAP) if asked for */
4437 		if (lock->mode() == LOCK_S && !only_gap) {
4438 
4439 			lock_rec_dequeue_from_page(lock);
4440 			lock = next_lock;
4441 			continue;
4442 		}
4443 
4444 		/* Release GAP lock from Next Key lock */
4445 		lock_remove_gap_lock(lock);
4446 
4447 		/* Grant locks */
4448 		lock_rec_grant(lock);
4449 
4450 		lock = next_lock;
4451 
4452 		++count;
4453 
4454 		if (count == LOCK_RELEASE_INTERVAL) {
4455 			/* Release the mutex for a while, so that we
4456 			do not monopolize it */
4457 
4458 			lock_mutex_exit();
4459 
4460 			lock_mutex_enter();
4461 
4462 			count = 0;
4463 		}
4464 	}
4465 
4466 	lock_mutex_exit();
4467 }
4468 
4469 /*********************************************************************//**
4470 Releases transaction locks, and releases possible other transactions waiting
4471 because of these locks. */
4472 static
4473 void
lock_release(trx_t * trx)4474 lock_release(
4475 /*=========*/
4476 	trx_t*	trx)	/*!< in/out: transaction */
4477 {
4478 	lock_t*		lock;
4479 	ulint		count = 0;
4480 	trx_id_t	max_trx_id = trx_sys_get_max_trx_id();
4481 
4482 	ut_ad(lock_mutex_own());
4483 	ut_ad(!trx_mutex_own(trx));
4484 	ut_ad(!trx->is_dd_trx);
4485 
4486 	for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
4487 	     lock != NULL;
4488 	     lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) {
4489 
4490 		ut_d(lock_check_dict_lock(lock));
4491 
4492 		if (lock_get_type_low(lock) == LOCK_REC) {
4493 
4494 			lock_rec_dequeue_from_page(lock);
4495 		} else {
4496 			dict_table_t*	table;
4497 
4498 			table = lock->un_member.tab_lock.table;
4499 
4500 			if (lock_get_mode(lock) != LOCK_IS
4501 			    && trx->undo_no != 0) {
4502 
4503 				/* The trx may have modified the table. We
4504 				block the use of the MySQL query cache for
4505 				all currently active transactions. */
4506 
4507 				table->query_cache_inv_id = max_trx_id;
4508 			}
4509 
4510 			lock_table_dequeue(lock);
4511 		}
4512 
4513 		if (count == LOCK_RELEASE_INTERVAL) {
4514 			/* Release the mutex for a while, so that we
4515 			do not monopolize it */
4516 
4517 			lock_mutex_exit();
4518 
4519 			lock_mutex_enter();
4520 
4521 			count = 0;
4522 		}
4523 
4524 		++count;
4525 	}
4526 }
4527 
4528 /* True if a lock mode is S or X */
4529 #define IS_LOCK_S_OR_X(lock) \
4530 	(lock_get_mode(lock) == LOCK_S \
4531 	 || lock_get_mode(lock) == LOCK_X)
4532 
4533 /*********************************************************************//**
4534 Removes table locks of the transaction on a table to be dropped. */
4535 static
4536 void
lock_trx_table_locks_remove(const lock_t * lock_to_remove)4537 lock_trx_table_locks_remove(
4538 /*========================*/
4539 	const lock_t*	lock_to_remove)		/*!< in: lock to remove */
4540 {
4541 	trx_t*		trx = lock_to_remove->trx;
4542 
4543 	ut_ad(lock_mutex_own());
4544 
4545 	/* It is safe to read this because we are holding the lock mutex */
4546 	if (!trx->lock.cancel) {
4547 		trx_mutex_enter(trx);
4548 	} else {
4549 		ut_ad(trx_mutex_own(trx));
4550 	}
4551 
4552 	typedef lock_pool_t::reverse_iterator iterator;
4553 
4554 	iterator	end = trx->lock.table_locks.rend();
4555 
4556 	for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
4557 
4558 		const lock_t*	lock = *it;
4559 
4560 		if (lock == NULL) {
4561 			continue;
4562 		}
4563 
4564 		ut_a(trx == lock->trx);
4565 		ut_a(lock_get_type_low(lock) & LOCK_TABLE);
4566 		ut_a(lock->un_member.tab_lock.table != NULL);
4567 
4568 		if (lock == lock_to_remove) {
4569 
4570 			*it = NULL;
4571 
4572 			if (!trx->lock.cancel) {
4573 				trx_mutex_exit(trx);
4574 			}
4575 
4576 			return;
4577 		}
4578 	}
4579 
4580 	if (!trx->lock.cancel) {
4581 		trx_mutex_exit(trx);
4582 	}
4583 
4584 	/* Lock must exist in the vector. */
4585 	ut_error;
4586 }
4587 
4588 /*********************************************************************//**
4589 Removes locks of a transaction on a table to be dropped.
4590 If remove_also_table_sx_locks is TRUE then table-level S and X locks are
4591 also removed in addition to other table-level and record-level locks.
4592 No lock that is going to be removed is allowed to be a wait lock. */
4593 static
4594 void
lock_remove_all_on_table_for_trx(dict_table_t * table,trx_t * trx,ibool remove_also_table_sx_locks)4595 lock_remove_all_on_table_for_trx(
4596 /*=============================*/
4597 	dict_table_t*	table,			/*!< in: table to be dropped */
4598 	trx_t*		trx,			/*!< in: a transaction */
4599 	ibool		remove_also_table_sx_locks)/*!< in: also removes
4600 						table S and X locks */
4601 {
4602 	lock_t*		lock;
4603 	lock_t*		prev_lock;
4604 
4605 	ut_ad(lock_mutex_own());
4606 
4607 	for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks);
4608 	     lock != NULL;
4609 	     lock = prev_lock) {
4610 
4611 		prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
4612 
4613 		if (lock_get_type_low(lock) == LOCK_REC
4614 		    && lock->index->table == table) {
4615 			ut_a(!lock_get_wait(lock));
4616 
4617 			lock_rec_discard(lock);
4618 		} else if (lock_get_type_low(lock) & LOCK_TABLE
4619 			   && lock->un_member.tab_lock.table == table
4620 			   && (remove_also_table_sx_locks
4621 			       || !IS_LOCK_S_OR_X(lock))) {
4622 
4623 			ut_a(!lock_get_wait(lock));
4624 
4625 			lock_trx_table_locks_remove(lock);
4626 			lock_table_remove_low(lock);
4627 		}
4628 	}
4629 }
4630 
4631 /*******************************************************************//**
4632 Remove any explicit record locks held by recovering transactions on
4633 the table.
4634 @return number of recovered transactions examined */
4635 static
4636 ulint
lock_remove_recovered_trx_record_locks(dict_table_t * table)4637 lock_remove_recovered_trx_record_locks(
4638 /*===================================*/
4639 	dict_table_t*	table)	/*!< in: check if there are any locks
4640 				held on records in this table or on the
4641 				table itself */
4642 {
4643 	ut_a(table != NULL);
4644 	ut_ad(lock_mutex_own());
4645 
4646 	ulint		n_recovered_trx = 0;
4647 
4648 	mutex_enter(&trx_sys->mutex);
4649 
4650 	for (trx_t* trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list);
4651 	     trx != NULL;
4652 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
4653 
4654 		assert_trx_in_rw_list(trx);
4655 
4656 		if (!trx->is_recovered) {
4657 			continue;
4658 		}
4659 
4660 		/* Because we are holding the lock_sys->mutex,
4661 		implicit locks cannot be converted to explicit ones
4662 		while we are scanning the explicit locks. */
4663 
4664 		lock_t*	next_lock;
4665 
4666 		for (lock_t* lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
4667 		     lock != NULL;
4668 		     lock = next_lock) {
4669 
4670 			ut_a(lock->trx == trx);
4671 
4672 			/* Recovered transactions can't wait on a lock. */
4673 
4674 			ut_a(!lock_get_wait(lock));
4675 
4676 			next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
4677 
4678 			switch (lock_get_type_low(lock)) {
4679 			default:
4680 				ut_error;
4681 			case LOCK_TABLE:
4682 				if (lock->un_member.tab_lock.table == table) {
4683 					lock_trx_table_locks_remove(lock);
4684 					lock_table_remove_low(lock);
4685 				}
4686 				break;
4687 			case LOCK_REC:
4688 				if (lock->index->table == table) {
4689 					lock_rec_discard(lock);
4690 				}
4691 			}
4692 		}
4693 
4694 		++n_recovered_trx;
4695 	}
4696 
4697 	mutex_exit(&trx_sys->mutex);
4698 
4699 	return(n_recovered_trx);
4700 }
4701 
4702 /*********************************************************************//**
4703 Removes locks on a table to be dropped or truncated.
4704 If remove_also_table_sx_locks is TRUE then table-level S and X locks are
4705 also removed in addition to other table-level and record-level locks.
4706 No lock, that is going to be removed, is allowed to be a wait lock. */
4707 void
lock_remove_all_on_table(dict_table_t * table,ibool remove_also_table_sx_locks)4708 lock_remove_all_on_table(
4709 /*=====================*/
4710 	dict_table_t*	table,			/*!< in: table to be dropped
4711 						or truncated */
4712 	ibool		remove_also_table_sx_locks)/*!< in: also removes
4713 						table S and X locks */
4714 {
4715 	lock_t*		lock;
4716 
4717 	lock_mutex_enter();
4718 
4719 	for (lock = UT_LIST_GET_FIRST(table->locks);
4720 	     lock != NULL;
4721 	     /* No op */) {
4722 
4723 		lock_t*	prev_lock;
4724 
4725 		prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
4726 
4727 		/* If we should remove all locks (remove_also_table_sx_locks
4728 		is TRUE), or if the lock is not table-level S or X lock,
4729 		then check we are not going to remove a wait lock. */
4730 		if (remove_also_table_sx_locks
4731 		    || !(lock_get_type(lock) == LOCK_TABLE
4732 			 && IS_LOCK_S_OR_X(lock))) {
4733 
4734 			ut_a(!lock_get_wait(lock));
4735 		}
4736 
4737 		lock_remove_all_on_table_for_trx(
4738 			table, lock->trx, remove_also_table_sx_locks);
4739 
4740 		if (prev_lock == NULL) {
4741 			if (lock == UT_LIST_GET_FIRST(table->locks)) {
4742 				/* lock was not removed, pick its successor */
4743 				lock = UT_LIST_GET_NEXT(
4744 					un_member.tab_lock.locks, lock);
4745 			} else {
4746 				/* lock was removed, pick the first one */
4747 				lock = UT_LIST_GET_FIRST(table->locks);
4748 			}
4749 		} else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks,
4750 					    prev_lock) != lock) {
4751 			/* If lock was removed by
4752 			lock_remove_all_on_table_for_trx() then pick the
4753 			successor of prev_lock ... */
4754 			lock = UT_LIST_GET_NEXT(
4755 				un_member.tab_lock.locks, prev_lock);
4756 		} else {
4757 			/* ... otherwise pick the successor of lock. */
4758 			lock = UT_LIST_GET_NEXT(
4759 				un_member.tab_lock.locks, lock);
4760 		}
4761 	}
4762 
4763 	/* Note: Recovered transactions don't have table level IX or IS locks
4764 	but can have implicit record locks that have been converted to explicit
4765 	record locks. Such record locks cannot be freed by traversing the
4766 	transaction lock list in dict_table_t (as above). */
4767 
4768 	if (!lock_sys->rollback_complete
4769 	    && lock_remove_recovered_trx_record_locks(table) == 0) {
4770 
4771 		lock_sys->rollback_complete = TRUE;
4772 	}
4773 
4774 	lock_mutex_exit();
4775 }
4776 
4777 /*===================== VALIDATION AND DEBUGGING ====================*/
4778 
4779 /*********************************************************************//**
4780 Prints info of a table lock. */
4781 void
lock_table_print(FILE * file,const lock_t * lock)4782 lock_table_print(
4783 /*=============*/
4784 	FILE*		file,	/*!< in: file where to print */
4785 	const lock_t*	lock)	/*!< in: table type lock */
4786 {
4787 	ut_ad(lock_mutex_own());
4788 	ut_a(lock_get_type_low(lock) == LOCK_TABLE);
4789 
4790 	fputs("TABLE LOCK table ", file);
4791 	ut_print_name(file, lock->trx,
4792 		      lock->un_member.tab_lock.table->name.m_name);
4793 	fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
4794 
4795 	if (lock_get_mode(lock) == LOCK_S) {
4796 		fputs(" lock mode S", file);
4797 	} else if (lock_get_mode(lock) == LOCK_X) {
4798 		ut_ad(lock->trx->id != 0);
4799 		fputs(" lock mode X", file);
4800 	} else if (lock_get_mode(lock) == LOCK_IS) {
4801 		fputs(" lock mode IS", file);
4802 	} else if (lock_get_mode(lock) == LOCK_IX) {
4803 		ut_ad(lock->trx->id != 0);
4804 		fputs(" lock mode IX", file);
4805 	} else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
4806 		fputs(" lock mode AUTO-INC", file);
4807 	} else {
4808 		fprintf(file, " unknown lock mode %lu",
4809 			(ulong) lock_get_mode(lock));
4810 	}
4811 
4812 	if (lock_get_wait(lock)) {
4813 		fputs(" waiting", file);
4814 	}
4815 
4816 	putc('\n', file);
4817 }
4818 
4819 /*********************************************************************//**
4820 Prints info of a record lock. */
4821 void
lock_rec_print(FILE * file,const lock_t * lock)4822 lock_rec_print(
4823 /*===========*/
4824 	FILE*		file,	/*!< in: file where to print */
4825 	const lock_t*	lock)	/*!< in: record type lock */
4826 {
4827 	ulint			space;
4828 	ulint			page_no;
4829 	mtr_t			mtr;
4830 	mem_heap_t*		heap		= NULL;
4831 	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
4832 	ulint*			offsets		= offsets_;
4833 	rec_offs_init(offsets_);
4834 
4835 	ut_ad(lock_mutex_own());
4836 	ut_a(lock_get_type_low(lock) == LOCK_REC);
4837 
4838 	space = lock->un_member.rec_lock.space;
4839 	page_no = lock->un_member.rec_lock.page_no;
4840 
4841 	fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu "
4842 		"index %s of table ",
4843 		(ulong) space, (ulong) page_no,
4844 		(ulong) lock_rec_get_n_bits(lock),
4845 		lock->index->name());
4846 	ut_print_name(file, lock->trx, lock->index->table_name);
4847 	fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
4848 
4849 	if (lock_get_mode(lock) == LOCK_S) {
4850 		fputs(" lock mode S", file);
4851 	} else if (lock_get_mode(lock) == LOCK_X) {
4852 		fputs(" lock_mode X", file);
4853 	} else {
4854 		ut_error;
4855 	}
4856 
4857 	if (lock_rec_get_gap(lock)) {
4858 		fputs(" locks gap before rec", file);
4859 	}
4860 
4861 	if (lock_rec_get_rec_not_gap(lock)) {
4862 		fputs(" locks rec but not gap", file);
4863 	}
4864 
4865 	if (lock_rec_get_insert_intention(lock)) {
4866 		fputs(" insert intention", file);
4867 	}
4868 
4869 	if (lock_get_wait(lock)) {
4870 		fputs(" waiting", file);
4871 	}
4872 
4873 	mtr_start(&mtr);
4874 
4875 	putc('\n', file);
4876 
4877 	if (srv_show_verbose_locks) {
4878 		const buf_block_t*	block;
4879 
4880 		block = buf_page_try_get(page_id_t(space, page_no), &mtr);
4881 
4882 		for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
4883 
4884 			if (!lock_rec_get_nth_bit(lock, i)) {
4885 				continue;
4886 			}
4887 
4888 			fprintf(file, "Record lock, heap no %lu", (ulong) i);
4889 
4890 			if (block) {
4891 				const rec_t*	rec;
4892 
4893 				rec = page_find_rec_with_heap_no(
4894 					buf_block_get_frame(block), i);
4895 
4896 				offsets = rec_get_offsets(
4897 					rec, lock->index, offsets,
4898 					ULINT_UNDEFINED, &heap);
4899 
4900 				putc(' ', file);
4901 				rec_print_new(file, rec, offsets);
4902 			}
4903 		}
4904 
4905 		putc('\n', file);
4906 	}
4907 
4908 	mtr_commit(&mtr);
4909 
4910 	if (heap) {
4911 		mem_heap_free(heap);
4912 	}
4913 }
4914 
4915 #ifdef UNIV_DEBUG
4916 /* Print the number of lock structs from lock_print_info_summary() only
4917 in non-production builds for performance reasons, see
4918 http://bugs.mysql.com/36942 */
4919 #define PRINT_NUM_OF_LOCK_STRUCTS
4920 #endif /* UNIV_DEBUG */
4921 
4922 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
4923 /*********************************************************************//**
4924 Calculates the number of record lock structs in the record lock hash table.
4925 @return number of record locks */
4926 static
4927 ulint
lock_get_n_rec_locks(void)4928 lock_get_n_rec_locks(void)
4929 /*======================*/
4930 {
4931 	ulint	n_locks	= 0;
4932 	ulint	i;
4933 
4934 	ut_ad(lock_mutex_own());
4935 
4936 	for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
4937 		const lock_t*	lock;
4938 
4939 		for (lock = static_cast<const lock_t*>(
4940 				HASH_GET_FIRST(lock_sys->rec_hash, i));
4941 		     lock != 0;
4942 		     lock = static_cast<const lock_t*>(
4943 				HASH_GET_NEXT(hash, lock))) {
4944 
4945 			n_locks++;
4946 		}
4947 	}
4948 
4949 	return(n_locks);
4950 }
4951 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
4952 
4953 /*********************************************************************//**
4954 Prints info of locks for all transactions.
4955 @return FALSE if not able to obtain lock mutex
4956 and exits without printing info */
4957 ibool
lock_print_info_summary(FILE * file,ibool nowait)4958 lock_print_info_summary(
4959 /*====================*/
4960 	FILE*	file,	/*!< in: file where to print */
4961 	ibool	nowait)	/*!< in: whether to wait for the lock mutex */
4962 {
4963 	/* if nowait is FALSE, wait on the lock mutex,
4964 	otherwise return immediately if fail to obtain the
4965 	mutex. */
4966 	if (!nowait) {
4967 		lock_mutex_enter();
4968 	} else if (lock_mutex_enter_nowait()) {
4969 		fputs("FAIL TO OBTAIN LOCK MUTEX,"
4970 		      " SKIP LOCK INFO PRINTING\n", file);
4971 		return(FALSE);
4972 	}
4973 
4974 	if (lock_deadlock_found) {
4975 		fputs("------------------------\n"
4976 		      "LATEST DETECTED DEADLOCK\n"
4977 		      "------------------------\n", file);
4978 
4979 		if (!srv_read_only_mode) {
4980 			ut_copy_file(file, lock_latest_err_file);
4981 		}
4982 	}
4983 
4984 	fputs("------------\n"
4985 	      "TRANSACTIONS\n"
4986 	      "------------\n", file);
4987 
4988 	fprintf(file, "Trx id counter " TRX_ID_FMT "\n",
4989 		trx_sys_get_max_trx_id());
4990 
4991 	fprintf(file,
4992 		"Purge done for trx's n:o < " TRX_ID_FMT
4993 		" undo n:o < " TRX_ID_FMT " state: ",
4994 		purge_sys->iter.trx_no,
4995 		purge_sys->iter.undo_no);
4996 
4997 	/* Note: We are reading the state without the latch. One because it
4998 	will violate the latching order and two because we are merely querying
4999 	the state of the variable for display. */
5000 
5001 	switch (purge_sys->state){
5002 	case PURGE_STATE_INIT:
5003 		/* Should never be in this state while the system is running. */
5004 		ut_error;
5005 
5006 	case PURGE_STATE_EXIT:
5007 		fprintf(file, "exited");
5008 		break;
5009 
5010 	case PURGE_STATE_DISABLED:
5011 		fprintf(file, "disabled");
5012 		break;
5013 
5014 	case PURGE_STATE_RUN:
5015 		fprintf(file, "running");
5016 		/* Check if it is waiting for more data to arrive. */
5017 		if (!purge_sys->running) {
5018 			fprintf(file, " but idle");
5019 		}
5020 		break;
5021 
5022 	case PURGE_STATE_STOP:
5023 		fprintf(file, "stopped");
5024 		break;
5025 	}
5026 
5027 	fprintf(file, "\n");
5028 
5029 	fprintf(file,
5030 		"History list length %lu\n",
5031 		(ulong) trx_sys->rseg_history_len);
5032 
5033 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
5034 	fprintf(file,
5035 		"Total number of lock structs in row lock hash table %lu\n",
5036 		(ulong) lock_get_n_rec_locks());
5037 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
5038 	return(TRUE);
5039 }
5040 
5041 /** Functor to print not-started transaction from the mysql_trx_list. */
5042 
5043 struct	PrintNotStarted {
5044 
PrintNotStartedPrintNotStarted5045 	PrintNotStarted(FILE* file) : m_file(file) { }
5046 
operator ()PrintNotStarted5047 	void	operator()(const trx_t* trx)
5048 	{
5049 		ut_ad(trx->in_mysql_trx_list);
5050 		ut_ad(mutex_own(&trx_sys->mutex));
5051 
5052 		/* See state transitions and locking rules in trx0trx.h */
5053 
5054 		if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
5055 
5056 			fputs("---", m_file);
5057 			trx_print_latched(m_file, trx, 600);
5058 		}
5059 	}
5060 
5061 	FILE*		m_file;
5062 };
5063 
5064 /** Iterate over a transaction's locks. Keeping track of the
5065 iterator using an ordinal value. */
5066 
5067 class TrxLockIterator {
5068 public:
TrxLockIterator()5069 	TrxLockIterator() { rewind(); }
5070 
5071 	/** Get the m_index(th) lock of a transaction.
5072 	@return current lock or 0 */
current(const trx_t * trx) const5073 	const lock_t* current(const trx_t* trx) const
5074 	{
5075 		lock_t*	lock;
5076 		ulint	i = 0;
5077 
5078 		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
5079 		     lock != NULL && i < m_index;
5080 		     lock = UT_LIST_GET_NEXT(trx_locks, lock), ++i) {
5081 
5082 			/* No op */
5083 		}
5084 
5085 		return(lock);
5086 	}
5087 
5088 	/** Set the ordinal value to 0 */
rewind()5089 	void rewind()
5090 	{
5091 		m_index = 0;
5092 	}
5093 
5094 	/** Increment the ordinal value.
5095 	@retun the current index value */
next()5096 	ulint next()
5097 	{
5098 		return(++m_index);
5099 	}
5100 
5101 private:
5102 	/** Current iterator position */
5103 	ulint		m_index;
5104 };
5105 
5106 /** This iterates over both the RW and RO trx_sys lists. We need to keep
5107 track where the iterator was up to and we do that using an ordinal value. */
5108 
5109 class TrxListIterator {
5110 public:
TrxListIterator()5111 	TrxListIterator() : m_index()
5112 	{
5113 		/* We iterate over the RW trx list first. */
5114 
5115 		m_trx_list = &trx_sys->rw_trx_list;
5116 	}
5117 
5118 	/** Get the current transaction whose ordinality is m_index.
5119 	@return current transaction or 0 */
5120 
current()5121 	const trx_t* current()
5122 	{
5123 		return(reposition());
5124 	}
5125 
5126 	/** Advance the transaction current ordinal value and reset the
5127 	transaction lock ordinal value */
5128 
next()5129 	void next()
5130 	{
5131 		++m_index;
5132 		m_lock_iter.rewind();
5133 	}
5134 
lock_iter()5135 	TrxLockIterator& lock_iter()
5136 	{
5137 		return(m_lock_iter);
5138 	}
5139 
5140 private:
5141 	/** Reposition the "cursor" on the current transaction. If it
5142 	is the first time then the "cursor" will be positioned on the
5143 	first transaction.
5144 
5145 	@return transaction instance or 0 */
reposition() const5146 	const trx_t* reposition() const
5147 	{
5148 		ulint	i;
5149 		trx_t*	trx;
5150 
5151 		/* Make the transaction at the ordinal value of m_index
5152 		the current transaction. ie. reposition/restore */
5153 
5154 		for (i = 0, trx = UT_LIST_GET_FIRST(*m_trx_list);
5155 		     trx != NULL && (i < m_index);
5156 		     trx = UT_LIST_GET_NEXT(trx_list, trx), ++i) {
5157 
5158 			check_trx_state(trx);
5159 		}
5160 
5161 		return(trx);
5162 	}
5163 
5164 	/** Ordinal value of the transaction in the current transaction list */
5165 	ulint			m_index;
5166 
5167 	/** Current transaction list */
5168 	trx_ut_list_t*		m_trx_list;
5169 
5170 	/** For iterating over a transaction's locks */
5171 	TrxLockIterator		m_lock_iter;
5172 };
5173 
5174 /** Prints transaction lock wait and MVCC state.
5175 @param[in,out]	file	file where to print
5176 @param[in]	trx	transaction */
5177 void
lock_trx_print_wait_and_mvcc_state(FILE * file,const trx_t * trx)5178 lock_trx_print_wait_and_mvcc_state(
5179 	FILE*		file,
5180 	const trx_t*	trx)
5181 {
5182 	fprintf(file, "---");
5183 
5184 	trx_print_latched(file, trx, 600);
5185 
5186 	const ReadView*	read_view = trx_get_read_view(trx);
5187 
5188 	if (read_view != NULL) {
5189 		read_view->print_limits(file);
5190 	}
5191 
5192 	if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
5193 
5194 		fprintf(file,
5195 			"------- TRX HAS BEEN WAITING %lu SEC"
5196 			" FOR THIS LOCK TO BE GRANTED:\n",
5197 			(ulong) difftime(ut_time(), trx->lock.wait_started));
5198 
5199 		if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
5200 			lock_rec_print(file, trx->lock.wait_lock);
5201 		} else {
5202 			lock_table_print(file, trx->lock.wait_lock);
5203 		}
5204 
5205 		fprintf(file, "------------------\n");
5206 	}
5207 }
5208 
5209 /*********************************************************************//**
5210 Prints info of locks for a transaction. This function will release the
5211 lock mutex and the trx_sys_t::mutex if the page was read from disk.
5212 @return true if page was read from the tablespace */
5213 static
5214 bool
lock_rec_fetch_page(const lock_t * lock)5215 lock_rec_fetch_page(
5216 /*================*/
5217 	const lock_t*	lock)	/*!< in: record lock */
5218 {
5219 	ut_ad(lock_get_type_low(lock) == LOCK_REC);
5220 
5221 	ulint			space_id = lock->un_member.rec_lock.space;
5222 	fil_space_t*		space;
5223 	bool			found;
5224 	const page_size_t&	page_size = fil_space_get_page_size(space_id,
5225 								    &found);
5226 	ulint			page_no = lock->un_member.rec_lock.page_no;
5227 
5228 	/* Check if the .ibd file exists. */
5229 	if (found) {
5230 		mtr_t	mtr;
5231 
5232 		lock_mutex_exit();
5233 
5234 		mutex_exit(&trx_sys->mutex);
5235 
5236 		if (srv_show_verbose_locks) {
5237 			DEBUG_SYNC_C("innodb_monitor_before_lock_page_read");
5238 
5239 			/* Check if the space is exists or not. only
5240 			when the space is valid, try to get the page. */
5241 			space = fil_space_acquire(space_id);
5242 			if (space) {
5243 				mtr_start(&mtr);
5244 				buf_page_get_gen(
5245 					page_id_t(space_id, page_no), page_size,
5246 					RW_NO_LATCH, NULL,
5247 					BUF_GET_POSSIBLY_FREED,
5248 					__FILE__, __LINE__, &mtr);
5249 				mtr_commit(&mtr);
5250 				fil_space_release(space);
5251 			}
5252 		}
5253 
5254 		lock_mutex_enter();
5255 
5256 		mutex_enter(&trx_sys->mutex);
5257 
5258 		return(true);
5259 	}
5260 
5261 	return(false);
5262 }
5263 
5264 /*********************************************************************//**
5265 Prints info of locks for a transaction.
5266 @return true if all printed, false if latches were released. */
5267 static
5268 bool
lock_trx_print_locks(FILE * file,const trx_t * trx,TrxLockIterator & iter,bool load_block)5269 lock_trx_print_locks(
5270 /*=================*/
5271 	FILE*		file,		/*!< in/out: File to write */
5272 	const trx_t*	trx,		/*!< in: current transaction */
5273 	TrxLockIterator&iter,		/*!< in: transaction lock iterator */
5274 	bool		load_block)	/*!< in: if true then read block
5275 					from disk */
5276 {
5277 	const lock_t* lock;
5278 
5279 	/* Iterate over the transaction's locks. */
5280 	while ((lock = iter.current(trx)) != 0) {
5281 
5282 		if (lock_get_type_low(lock) == LOCK_REC) {
5283 
5284 			if (load_block) {
5285 
5286 				/* Note: lock_rec_fetch_page() will
5287 				release both the lock mutex and the
5288 				trx_sys_t::mutex if it does a read
5289 				from disk. */
5290 
5291 				if (lock_rec_fetch_page(lock)) {
5292 					/* We need to resync the
5293 					current transaction. */
5294 					return(false);
5295 				}
5296 
5297 				/* It is a single table tablespace
5298 				and the .ibd file is missing
5299 				(TRUNCATE TABLE probably stole the
5300 				locks): just print the lock without
5301 				attempting to load the page in the
5302 				buffer pool. */
5303 
5304 				fprintf(file,
5305 					"RECORD LOCKS on non-existing"
5306 					" space %u\n",
5307 					lock->un_member.rec_lock.space);
5308 			}
5309 
5310 			/* Print all the record locks on the page from
5311 			the record lock bitmap */
5312 
5313 			lock_rec_print(file, lock);
5314 
5315 			load_block = true;
5316 
5317 		} else {
5318 			ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
5319 
5320 			lock_table_print(file, lock);
5321 		}
5322 
5323 		if (iter.next() >= srv_show_locks_held) {
5324 
5325 			fprintf(file,
5326 				"TOO MANY LOCKS PRINTED FOR THIS TRX:"
5327 				" SUPPRESSING FURTHER PRINTS\n");
5328 
5329 			break;
5330 		}
5331 	}
5332 
5333 	return(true);
5334 }
5335 
5336 /*********************************************************************//**
5337 Prints info of locks for each transaction. This function assumes that the
5338 caller holds the lock mutex and more importantly it will release the lock
5339 mutex on behalf of the caller. (This should be fixed in the future). */
5340 void
lock_print_info_all_transactions(FILE * file)5341 lock_print_info_all_transactions(
5342 /*=============================*/
5343 	FILE*		file)	/*!< in/out: file where to print */
5344 {
5345 	ut_ad(lock_mutex_own());
5346 
5347 	fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
5348 
5349 	mutex_enter(&trx_sys->mutex);
5350 
5351 	/* First print info on non-active transactions */
5352 
5353 	/* NOTE: information of auto-commit non-locking read-only
5354 	transactions will be omitted here. The information will be
5355 	available from INFORMATION_SCHEMA.INNODB_TRX. */
5356 
5357 	PrintNotStarted	print_not_started(file);
5358 	ut_list_map(trx_sys->mysql_trx_list, print_not_started);
5359 
5360 	const trx_t*	trx;
5361 	TrxListIterator	trx_iter;
5362 	const trx_t*	prev_trx = 0;
5363 
5364 	/* Control whether a block should be fetched from the buffer pool. */
5365 	bool		load_block = true;
5366 	bool		monitor = srv_print_innodb_lock_monitor && (srv_show_locks_held != 0);
5367 
5368 	while ((trx = trx_iter.current()) != 0) {
5369 
5370 		check_trx_state(trx);
5371 
5372 		if (trx != prev_trx) {
5373 			lock_trx_print_wait_and_mvcc_state(file, trx);
5374 			prev_trx = trx;
5375 
5376 			/* The transaction that read in the page is no
5377 			longer the one that read the page in. We need to
5378 			force a page read. */
5379 			load_block = true;
5380 		}
5381 
5382 		/* If we need to print the locked record contents then we
5383 		need to fetch the containing block from the buffer pool. */
5384 		if (monitor) {
5385 
5386 			/* Print the locks owned by the current transaction. */
5387 			TrxLockIterator& lock_iter = trx_iter.lock_iter();
5388 
5389 			if (!lock_trx_print_locks(
5390 					file, trx, lock_iter, load_block)) {
5391 
5392 				/* Resync trx_iter, the trx_sys->mutex and
5393 				the lock mutex were released. A page was
5394 				successfully read in.  We need to print its
5395 				contents on the next call to
5396 				lock_trx_print_locks(). On the next call to
5397 				lock_trx_print_locks() we should simply print
5398 				the contents of the page just read in.*/
5399 				load_block = false;
5400 
5401 				continue;
5402 			}
5403 		}
5404 
5405 		load_block = true;
5406 
5407 		/* All record lock details were printed without fetching
5408 		a page from disk, or we didn't need to print the detail. */
5409 		trx_iter.next();
5410 	}
5411 
5412 	lock_mutex_exit();
5413 	mutex_exit(&trx_sys->mutex);
5414 
5415 	ut_ad(lock_validate());
5416 }
5417 
5418 
5419 #ifdef UNIV_DEBUG
5420 /*********************************************************************//**
5421 Find the the lock in the trx_t::trx_lock_t::table_locks vector.
5422 @return true if found */
5423 static
5424 bool
lock_trx_table_locks_find(trx_t * trx,const lock_t * find_lock)5425 lock_trx_table_locks_find(
5426 /*======================*/
5427 	trx_t*		trx,		/*!< in: trx to validate */
5428 	const lock_t*	find_lock)	/*!< in: lock to find */
5429 {
5430 	bool		found = false;
5431 
5432 	trx_mutex_enter(trx);
5433 
5434 	typedef lock_pool_t::const_reverse_iterator iterator;
5435 
5436 	iterator	end = trx->lock.table_locks.rend();
5437 
5438 	for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) {
5439 
5440 		const lock_t*	lock = *it;
5441 
5442 		if (lock == NULL) {
5443 
5444 			continue;
5445 
5446 		} else if (lock == find_lock) {
5447 
5448 			/* Can't be duplicates. */
5449 			ut_a(!found);
5450 			found = true;
5451 		}
5452 
5453 		ut_a(trx == lock->trx);
5454 		ut_a(lock_get_type_low(lock) & LOCK_TABLE);
5455 		ut_a(lock->un_member.tab_lock.table != NULL);
5456 	}
5457 
5458 	trx_mutex_exit(trx);
5459 
5460 	return(found);
5461 }
5462 
5463 /*********************************************************************//**
5464 Validates the lock queue on a table.
5465 @return TRUE if ok */
5466 static
5467 ibool
lock_table_queue_validate(const dict_table_t * table)5468 lock_table_queue_validate(
5469 /*======================*/
5470 	const dict_table_t*	table)	/*!< in: table */
5471 {
5472 	const lock_t*	lock;
5473 
5474 	ut_ad(lock_mutex_own());
5475 	ut_ad(trx_sys_mutex_own());
5476 
5477 	for (lock = UT_LIST_GET_FIRST(table->locks);
5478 	     lock != NULL;
5479 	     lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock)) {
5480 
5481 		/* lock->trx->state cannot change from or to NOT_STARTED
5482 		while we are holding the trx_sys->mutex. It may change
5483 		from ACTIVE to PREPARED, but it may not change to
5484 		COMMITTED, because we are holding the lock_sys->mutex. */
5485 		ut_ad(trx_assert_started(lock->trx));
5486 
5487 		if (!lock_get_wait(lock)) {
5488 
5489 			ut_a(!lock_table_other_has_incompatible(
5490 				     lock->trx, 0, table,
5491 				     lock_get_mode(lock)));
5492 		} else {
5493 
5494 			ut_a(lock_table_has_to_wait_in_queue(lock));
5495 		}
5496 
5497 		ut_a(lock_trx_table_locks_find(lock->trx, lock));
5498 	}
5499 
5500 	return(TRUE);
5501 }
5502 
5503 /*********************************************************************//**
5504 Validates the lock queue on a single record.
5505 @return TRUE if ok */
5506 static
5507 ibool
lock_rec_queue_validate(ibool locked_lock_trx_sys,const buf_block_t * block,const rec_t * rec,const dict_index_t * index,const ulint * offsets)5508 lock_rec_queue_validate(
5509 /*====================*/
5510 	ibool			locked_lock_trx_sys,
5511 					/*!< in: if the caller holds
5512 					both the lock mutex and
5513 					trx_sys_t->lock. */
5514 	const buf_block_t*	block,	/*!< in: buffer block containing rec */
5515 	const rec_t*		rec,	/*!< in: record to look at */
5516 	const dict_index_t*	index,	/*!< in: index, or NULL if not known */
5517 	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
5518 {
5519 	const trx_t*	impl_trx;
5520 	const lock_t*	lock;
5521 	ulint		heap_no;
5522 
5523 	ut_a(rec);
5524 	ut_a(block->frame == page_align(rec));
5525 	ut_ad(rec_offs_validate(rec, index, offsets));
5526 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
5527 	ut_ad(lock_mutex_own() == locked_lock_trx_sys);
5528 	ut_ad(!index || dict_index_is_clust(index)
5529 	      || !dict_index_is_online_ddl(index));
5530 
5531 	heap_no = page_rec_get_heap_no(rec);
5532 
5533 	if (!locked_lock_trx_sys) {
5534 		lock_mutex_enter();
5535 		mutex_enter(&trx_sys->mutex);
5536 	}
5537 
5538 	if (!page_rec_is_user_rec(rec)) {
5539 
5540 		for (lock = lock_rec_get_first(lock_sys->rec_hash,
5541 					       block, heap_no);
5542 		     lock != NULL;
5543 		     lock = lock_rec_get_next_const(heap_no, lock)) {
5544 
5545 			ut_ad(!trx_is_ac_nl_ro(lock->trx));
5546 
5547 			if (lock_get_wait(lock)) {
5548 				ut_a(lock_rec_has_to_wait_in_queue(lock));
5549 			}
5550 
5551 			if (index != NULL) {
5552 				ut_a(lock->index == index);
5553 			}
5554 		}
5555 
5556 		goto func_exit;
5557 	}
5558 
5559 	if (index == NULL) {
5560 
5561 		/* Nothing we can do */
5562 
5563 	} else if (dict_index_is_clust(index)) {
5564 		trx_id_t	trx_id;
5565 
5566 		/* Unlike the non-debug code, this invariant can only succeed
5567 		if the check and assertion are covered by the lock mutex. */
5568 
5569 		trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
5570 		impl_trx = trx_rw_is_active_low(trx_id, NULL);
5571 
5572 		ut_ad(lock_mutex_own());
5573 		/* impl_trx cannot be committed until lock_mutex_exit()
5574 		because lock_trx_release_locks() acquires lock_sys->mutex */
5575 
5576 		if (impl_trx != NULL) {
5577 			const lock_t*	other_lock
5578 				= lock_rec_other_has_expl_req(
5579 					LOCK_S, block, true, heap_no,
5580 					impl_trx);
5581 
5582 			/* The impl_trx is holding an implicit lock on the
5583 			given record 'rec'. So there cannot be another
5584 			explicit granted lock.  Also, there can be another
5585 			explicit waiting lock only if the impl_trx has an
5586 			explicit granted lock. */
5587 
5588 			if (other_lock != NULL) {
5589 				ut_a(lock_get_wait(other_lock));
5590 				ut_a(lock_rec_has_expl(
5591 					LOCK_X | LOCK_REC_NOT_GAP,
5592 					block, heap_no, impl_trx));
5593 			}
5594 		}
5595 	}
5596 
5597 	for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
5598 	     lock != NULL;
5599 	     lock = lock_rec_get_next_const(heap_no, lock)) {
5600 
5601 		ut_ad(!trx_is_ac_nl_ro(lock->trx));
5602 
5603 		if (index) {
5604 			ut_a(lock->index == index);
5605 		}
5606 
5607 		if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) {
5608 
5609 			lock_mode	mode;
5610 
5611 			if (lock_get_mode(lock) == LOCK_S) {
5612 				mode = LOCK_X;
5613 			} else {
5614 				mode = LOCK_S;
5615 			}
5616 
5617 			const lock_t*	other_lock
5618 				= lock_rec_other_has_expl_req(
5619 					mode, block, false, heap_no,
5620 					lock->trx);
5621 			ut_a(!other_lock);
5622 
5623 		} else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) {
5624 
5625 			ut_a(lock_rec_has_to_wait_in_queue(lock));
5626 		}
5627 	}
5628 
5629 func_exit:
5630 	if (!locked_lock_trx_sys) {
5631 		lock_mutex_exit();
5632 		mutex_exit(&trx_sys->mutex);
5633 	}
5634 
5635 	return(TRUE);
5636 }
5637 
5638 /*********************************************************************//**
5639 Validates the record lock queues on a page.
5640 @return TRUE if ok */
5641 static
5642 ibool
lock_rec_validate_page(const buf_block_t * block)5643 lock_rec_validate_page(
5644 /*===================*/
5645 	const buf_block_t*	block)	/*!< in: buffer block */
5646 {
5647 	const lock_t*	lock;
5648 	const rec_t*	rec;
5649 	ulint		nth_lock	= 0;
5650 	ulint		nth_bit		= 0;
5651 	ulint		i;
5652 	mem_heap_t*	heap		= NULL;
5653 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
5654 	ulint*		offsets		= offsets_;
5655 	rec_offs_init(offsets_);
5656 
5657 	ut_ad(!lock_mutex_own());
5658 
5659 	lock_mutex_enter();
5660 	mutex_enter(&trx_sys->mutex);
5661 loop:
5662 	lock = lock_rec_get_first_on_page_addr(
5663 		lock_sys->rec_hash,
5664 		block->page.id.space(), block->page.id.page_no());
5665 
5666 	if (!lock) {
5667 		goto function_exit;
5668 	}
5669 
5670 	ut_ad(!block->page.file_page_was_freed);
5671 
5672 	for (i = 0; i < nth_lock; i++) {
5673 
5674 		lock = lock_rec_get_next_on_page_const(lock);
5675 
5676 		if (!lock) {
5677 			goto function_exit;
5678 		}
5679 	}
5680 
5681 	ut_ad(!trx_is_ac_nl_ro(lock->trx));
5682 
5683 # ifdef UNIV_DEBUG
5684 	/* Only validate the record queues when this thread is not
5685 	holding a space->latch.  Deadlocks are possible due to
5686 	latching order violation when UNIV_DEBUG is defined while
5687 	UNIV_DEBUG is not. */
5688 	if (!sync_check_find(SYNC_FSP))
5689 # endif /* UNIV_DEBUG */
5690 	for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
5691 
5692 		if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
5693 
5694 			rec = page_find_rec_with_heap_no(block->frame, i);
5695 			ut_a(rec);
5696 			offsets = rec_get_offsets(rec, lock->index, offsets,
5697 						  ULINT_UNDEFINED, &heap);
5698 
5699 			/* If this thread is holding the file space
5700 			latch (fil_space_t::latch), the following
5701 			check WILL break the latching order and may
5702 			cause a deadlock of threads. */
5703 
5704 			lock_rec_queue_validate(
5705 				TRUE, block, rec, lock->index, offsets);
5706 
5707 			nth_bit = i + 1;
5708 
5709 			goto loop;
5710 		}
5711 	}
5712 
5713 	nth_bit = 0;
5714 	nth_lock++;
5715 
5716 	goto loop;
5717 
5718 function_exit:
5719 	lock_mutex_exit();
5720 	mutex_exit(&trx_sys->mutex);
5721 
5722 	if (heap != NULL) {
5723 		mem_heap_free(heap);
5724 	}
5725 	return(TRUE);
5726 }
5727 
5728 /*********************************************************************//**
5729 Validates the table locks.
5730 @return TRUE if ok */
5731 static
5732 ibool
lock_validate_table_locks(const trx_ut_list_t * trx_list)5733 lock_validate_table_locks(
5734 /*======================*/
5735 	const trx_ut_list_t*	trx_list)	/*!< in: trx list */
5736 {
5737 	const trx_t*	trx;
5738 
5739 	ut_ad(lock_mutex_own());
5740 	ut_ad(trx_sys_mutex_own());
5741 
5742 	ut_ad(trx_list == &trx_sys->rw_trx_list);
5743 
5744 	for (trx = UT_LIST_GET_FIRST(*trx_list);
5745 	     trx != NULL;
5746 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
5747 
5748 		const lock_t*	lock;
5749 
5750 		check_trx_state(trx);
5751 
5752 		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
5753 		     lock != NULL;
5754 		     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
5755 
5756 			if (lock_get_type_low(lock) & LOCK_TABLE) {
5757 
5758 				lock_table_queue_validate(
5759 					lock->un_member.tab_lock.table);
5760 			}
5761 		}
5762 	}
5763 
5764 	return(TRUE);
5765 }
5766 
5767 /*********************************************************************//**
5768 Validate record locks up to a limit.
5769 @return lock at limit or NULL if no more locks in the hash bucket */
5770 static MY_ATTRIBUTE((warn_unused_result))
5771 const lock_t*
lock_rec_validate(ulint start,ib_uint64_t * limit)5772 lock_rec_validate(
5773 /*==============*/
5774 	ulint		start,		/*!< in: lock_sys->rec_hash
5775 					bucket */
5776 	ib_uint64_t*	limit)		/*!< in/out: upper limit of
5777 					(space, page_no) */
5778 {
5779 	ut_ad(lock_mutex_own());
5780 	ut_ad(trx_sys_mutex_own());
5781 
5782 	for (const lock_t* lock = static_cast<const lock_t*>(
5783 			HASH_GET_FIRST(lock_sys->rec_hash, start));
5784 	     lock != NULL;
5785 	     lock = static_cast<const lock_t*>(HASH_GET_NEXT(hash, lock))) {
5786 
5787 		ib_uint64_t	current;
5788 
5789 		ut_ad(!trx_is_ac_nl_ro(lock->trx));
5790 		ut_ad(lock_get_type(lock) == LOCK_REC);
5791 
5792 		current = ut_ull_create(
5793 			lock->un_member.rec_lock.space,
5794 			lock->un_member.rec_lock.page_no);
5795 
5796 		if (current > *limit) {
5797 			*limit = current + 1;
5798 			return(lock);
5799 		}
5800 	}
5801 
5802 	return(0);
5803 }
5804 
5805 /*********************************************************************//**
5806 Validate a record lock's block */
5807 static
5808 void
lock_rec_block_validate(ulint space_id,ulint page_no)5809 lock_rec_block_validate(
5810 /*====================*/
5811 	ulint		space_id,
5812 	ulint		page_no)
5813 {
5814 	/* The lock and the block that it is referring to may be freed at
5815 	this point. We pass BUF_GET_POSSIBLY_FREED to skip a debug check.
5816 	If the lock exists in lock_rec_validate_page() we assert
5817 	!block->page.file_page_was_freed. */
5818 
5819 	buf_block_t*	block;
5820 	mtr_t		mtr;
5821 
5822 	/* Make sure that the tablespace is not deleted while we are
5823 	trying to access the page. */
5824 	if (fil_space_t* space = fil_space_acquire(space_id)) {
5825 		dberr_t err = DB_SUCCESS;
5826 		mtr_start(&mtr);
5827 
5828 		block = buf_page_get_gen(
5829 			page_id_t(space_id, page_no),
5830 			page_size_t(space->flags),
5831 			RW_X_LATCH, NULL,
5832 			BUF_GET_POSSIBLY_FREED,
5833 			__FILE__, __LINE__, &mtr, false, &err);
5834 
5835 		if (err != DB_SUCCESS) {
5836 			ib::error() << "Lock rec block validate failed for tablespace "
5837 				   << space->name
5838 				   << " space_id " << space_id
5839 				   << " page_no " << page_no << " err " << err;
5840 		}
5841 
5842 		buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
5843 
5844 		ut_ad(lock_rec_validate_page(block));
5845 		mtr_commit(&mtr);
5846 
5847 		fil_space_release(space);
5848 	}
5849 }
5850 
5851 /*********************************************************************//**
5852 Validates the lock system.
5853 @return TRUE if ok */
5854 static
5855 bool
lock_validate()5856 lock_validate()
5857 /*===========*/
5858 {
5859 	typedef	std::pair<ulint, ulint>		page_addr_t;
5860 	typedef std::set<
5861 		page_addr_t,
5862 		std::less<page_addr_t>,
5863 		ut_allocator<page_addr_t> >	page_addr_set;
5864 
5865 	page_addr_set	pages;
5866 
5867 	lock_mutex_enter();
5868 	mutex_enter(&trx_sys->mutex);
5869 
5870 	ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list));
5871 
5872 	/* Iterate over all the record locks and validate the locks. We
5873 	don't want to hog the lock_sys_t::mutex and the trx_sys_t::mutex.
5874 	Release both mutexes during the validation check. */
5875 
5876 	for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
5877 		const lock_t*	lock;
5878 		ib_uint64_t	limit = 0;
5879 
5880 		while ((lock = lock_rec_validate(i, &limit)) != 0) {
5881 
5882 			ulint	space = lock->un_member.rec_lock.space;
5883 			ulint	page_no = lock->un_member.rec_lock.page_no;
5884 
5885 			pages.insert(std::make_pair(space, page_no));
5886 		}
5887 	}
5888 
5889 	mutex_exit(&trx_sys->mutex);
5890 	lock_mutex_exit();
5891 
5892 	for (page_addr_set::const_iterator it = pages.begin();
5893 	     it != pages.end();
5894 	     ++it) {
5895 		lock_rec_block_validate((*it).first, (*it).second);
5896 	}
5897 
5898 	return(true);
5899 }
5900 #endif /* UNIV_DEBUG */
5901 /*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
5902 
5903 /*********************************************************************//**
5904 Checks if locks of other transactions prevent an immediate insert of
5905 a record. If they do, first tests if the query thread should anyway
5906 be suspended for some reason; if not, then puts the transaction and
5907 the query thread to the lock wait state and inserts a waiting request
5908 for a gap x-lock to the lock queue.
5909 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
5910 dberr_t
lock_rec_insert_check_and_lock(ulint flags,const rec_t * rec,buf_block_t * block,dict_index_t * index,que_thr_t * thr,mtr_t * mtr,ibool * inherit)5911 lock_rec_insert_check_and_lock(
5912 /*===========================*/
5913 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG bit is
5914 				set, does nothing */
5915 	const rec_t*	rec,	/*!< in: record after which to insert */
5916 	buf_block_t*	block,	/*!< in/out: buffer block of rec */
5917 	dict_index_t*	index,	/*!< in: index */
5918 	que_thr_t*	thr,	/*!< in: query thread */
5919 	mtr_t*		mtr,	/*!< in/out: mini-transaction */
5920 	ibool*		inherit)/*!< out: set to TRUE if the new
5921 				inserted record maybe should inherit
5922 				LOCK_GAP type locks from the successor
5923 				record */
5924 {
5925 	ut_ad(block->frame == page_align(rec));
5926 	ut_ad(!dict_index_is_online_ddl(index)
5927 	      || dict_index_is_clust(index)
5928 	      || (flags & BTR_CREATE_FLAG));
5929 	ut_ad(mtr->is_named_space(index->space));
5930 	ut_ad((flags & BTR_NO_LOCKING_FLAG) || thr);
5931 
5932 	if (flags & BTR_NO_LOCKING_FLAG) {
5933 
5934 		return(DB_SUCCESS);
5935 	}
5936 
5937 	ut_ad(!dict_table_is_temporary(index->table));
5938 
5939 	dberr_t		err;
5940 	lock_t*		lock;
5941 	ibool		inherit_in = *inherit;
5942 	trx_t*		trx = thr_get_trx(thr);
5943 	const rec_t*	next_rec = page_rec_get_next_const(rec);
5944 	ulint		heap_no = page_rec_get_heap_no(next_rec);
5945 
5946 	lock_mutex_enter();
5947 	/* Because this code is invoked for a running transaction by
5948 	the thread that is serving the transaction, it is not necessary
5949 	to hold trx->mutex here. */
5950 
5951 	/* When inserting a record into an index, the table must be at
5952 	least IX-locked. When we are building an index, we would pass
5953 	BTR_NO_LOCKING_FLAG and skip the locking altogether. */
5954 	ut_ad(lock_table_has(trx, index->table, LOCK_IX));
5955 
5956 	lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
5957 
5958 	if (lock == NULL) {
5959 		/* We optimize CPU time usage in the simplest case */
5960 
5961 		lock_mutex_exit();
5962 
5963 		if (inherit_in && !dict_index_is_clust(index)) {
5964 			/* Update the page max trx id field */
5965 			page_update_max_trx_id(block,
5966 					       buf_block_get_page_zip(block),
5967 					       trx->id, mtr);
5968 		}
5969 
5970 		*inherit = FALSE;
5971 
5972 		return(DB_SUCCESS);
5973 	}
5974 
5975 	/* Spatial index does not use GAP lock protection. It uses
5976 	"predicate lock" to protect the "range" */
5977 	if (dict_index_is_spatial(index)) {
5978 		return(DB_SUCCESS);
5979 	}
5980 
5981 	*inherit = TRUE;
5982 
5983 	/* If another transaction has an explicit lock request which locks
5984 	the gap, waiting or granted, on the successor, the insert has to wait.
5985 
5986 	An exception is the case where the lock by the another transaction
5987 	is a gap type lock which it placed to wait for its turn to insert. We
5988 	do not consider that kind of a lock conflicting with our insert. This
5989 	eliminates an unnecessary deadlock which resulted when 2 transactions
5990 	had to wait for their insert. Both had waiting gap type lock requests
5991 	on the successor, which produced an unnecessary deadlock. */
5992 
5993 	const ulint	type_mode = LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION;
5994 
5995 	const lock_t*	wait_for = lock_rec_other_has_conflicting(
5996 				type_mode, block, heap_no, trx);
5997 
5998 	if (wait_for != NULL) {
5999 
6000 		RecLock	rec_lock(thr, index, block, heap_no, type_mode);
6001 
6002 		trx_mutex_enter(trx);
6003 
6004 		err = rec_lock.add_to_waitq(wait_for);
6005 
6006 		trx_mutex_exit(trx);
6007 
6008 	} else {
6009 		err = DB_SUCCESS;
6010 	}
6011 
6012 	lock_mutex_exit();
6013 
6014 	switch (err) {
6015 	case DB_SUCCESS_LOCKED_REC:
6016 		err = DB_SUCCESS;
6017 		/* fall through */
6018 	case DB_SUCCESS:
6019 		if (!inherit_in || dict_index_is_clust(index)) {
6020 			break;
6021 		}
6022 
6023 		/* Update the page max trx id field */
6024 		page_update_max_trx_id(
6025 			block, buf_block_get_page_zip(block), trx->id, mtr);
6026 	default:
6027 		/* We only care about the two return values. */
6028 		break;
6029 	}
6030 
6031 #ifdef UNIV_DEBUG
6032 	{
6033 		mem_heap_t*	heap		= NULL;
6034 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
6035 		const ulint*	offsets;
6036 		rec_offs_init(offsets_);
6037 
6038 		offsets = rec_get_offsets(next_rec, index, offsets_,
6039 					  ULINT_UNDEFINED, &heap);
6040 
6041 		ut_ad(lock_rec_queue_validate(
6042 				FALSE, block, next_rec, index, offsets));
6043 
6044 		if (heap != NULL) {
6045 			mem_heap_free(heap);
6046 		}
6047 	}
6048 #endif /* UNIV_DEBUG */
6049 
6050 	return(err);
6051 }
6052 
6053 /*********************************************************************//**
6054 Creates an explicit record lock for a running transaction that currently only
6055 has an implicit lock on the record. The transaction instance must have a
6056 reference count > 0 so that it can't be committed and freed before this
6057 function has completed. */
6058 static
6059 void
lock_rec_convert_impl_to_expl_for_trx(const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,trx_t * trx,ulint heap_no)6060 lock_rec_convert_impl_to_expl_for_trx(
6061 /*==================================*/
6062 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6063 	const rec_t*		rec,	/*!< in: user record on page */
6064 	dict_index_t*		index,	/*!< in: index of record */
6065 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6066 	trx_t*			trx,	/*!< in/out: active transaction */
6067 	ulint			heap_no)/*!< in: rec heap number to lock */
6068 {
6069 	ut_ad(trx_is_referenced(trx));
6070 
6071 	DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
6072 
6073 	lock_mutex_enter();
6074 
6075 	ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
6076 
6077 	if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)
6078 	    && !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
6079 				  block, heap_no, trx)) {
6080 
6081 		ulint	type_mode;
6082 
6083 		type_mode = (LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP);
6084 
6085 		lock_rec_add_to_queue(
6086 			type_mode, block, heap_no, index, trx, FALSE);
6087 	}
6088 
6089 	lock_mutex_exit();
6090 
6091 	trx_release_reference(trx);
6092 
6093 	DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx");
6094 }
6095 
6096 /*********************************************************************//**
6097 If a transaction has an implicit x-lock on a record, but no explicit x-lock
6098 set on the record, sets one for it. */
6099 static
6100 void
lock_rec_convert_impl_to_expl(const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets)6101 lock_rec_convert_impl_to_expl(
6102 /*==========================*/
6103 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6104 	const rec_t*		rec,	/*!< in: user record on page */
6105 	dict_index_t*		index,	/*!< in: index of record */
6106 	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
6107 {
6108 	trx_t*		trx;
6109 
6110 	ut_ad(!lock_mutex_own());
6111 	ut_ad(page_rec_is_user_rec(rec));
6112 	ut_ad(rec_offs_validate(rec, index, offsets));
6113 	ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
6114 
6115 	if (dict_index_is_clust(index)) {
6116 		trx_id_t	trx_id;
6117 
6118 		trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
6119 
6120 		trx = trx_rw_is_active(trx_id, NULL, true);
6121 	} else {
6122 		ut_ad(!dict_index_is_online_ddl(index));
6123 
6124 		trx = lock_sec_rec_some_has_impl(rec, index, offsets);
6125 
6126 		ut_ad(!trx || !lock_rec_other_trx_holds_expl(
6127 				LOCK_S | LOCK_REC_NOT_GAP, trx, rec, block));
6128 	}
6129 
6130 	if (trx != 0) {
6131 		ulint	heap_no = page_rec_get_heap_no(rec);
6132 
6133 		ut_ad(trx_is_referenced(trx));
6134 
6135 		/* If the transaction is still active and has no
6136 		explicit x-lock set on the record, set one for it.
6137 		trx cannot be committed until the ref count is zero. */
6138 
6139 		lock_rec_convert_impl_to_expl_for_trx(
6140 			block, rec, index, offsets, trx, heap_no);
6141 	}
6142 }
6143 
6144 void
lock_rec_convert_active_impl_to_expl(const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,trx_t * trx,ulint heap_no)6145 lock_rec_convert_active_impl_to_expl(
6146 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6147 	const rec_t*		rec,	/*!< in: user record on page */
6148 	dict_index_t*		index,	/*!< in: index of record */
6149 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6150 	trx_t*			trx,	/*!< in/out: active transaction */
6151 	ulint			heap_no)/*!< in: rec heap number to lock */
6152 {
6153 	trx_reference(trx, true);
6154 	lock_rec_convert_impl_to_expl_for_trx(block, rec, index, offsets,
6155 					      trx, heap_no);
6156 }
6157 /*********************************************************************//**
6158 Checks if locks of other transactions prevent an immediate modify (update,
6159 delete mark, or delete unmark) of a clustered index record. If they do,
6160 first tests if the query thread should anyway be suspended for some
6161 reason; if not, then puts the transaction and the query thread to the
6162 lock wait state and inserts a waiting request for a record x-lock to the
6163 lock queue.
6164 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
6165 dberr_t
lock_clust_rec_modify_check_and_lock(ulint flags,const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,que_thr_t * thr)6166 lock_clust_rec_modify_check_and_lock(
6167 /*=================================*/
6168 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6169 					bit is set, does nothing */
6170 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6171 	const rec_t*		rec,	/*!< in: record which should be
6172 					modified */
6173 	dict_index_t*		index,	/*!< in: clustered index */
6174 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6175 	que_thr_t*		thr)	/*!< in: query thread */
6176 {
6177 	dberr_t	err;
6178 	ulint	heap_no;
6179 
6180 	ut_ad(rec_offs_validate(rec, index, offsets));
6181 	ut_ad(dict_index_is_clust(index));
6182 	ut_ad(block->frame == page_align(rec));
6183 
6184 	if (flags & BTR_NO_LOCKING_FLAG) {
6185 
6186 		return(DB_SUCCESS);
6187 	}
6188 	ut_ad(!dict_table_is_temporary(index->table));
6189 
6190 	heap_no = rec_offs_comp(offsets)
6191 		? rec_get_heap_no_new(rec)
6192 		: rec_get_heap_no_old(rec);
6193 
6194 	/* If a transaction has no explicit x-lock set on the record, set one
6195 	for it */
6196 
6197 	lock_rec_convert_impl_to_expl(block, rec, index, offsets);
6198 
6199 	lock_mutex_enter();
6200 
6201 	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
6202 
6203 	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
6204 			    block, heap_no, index, thr);
6205 
6206 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
6207 
6208 	lock_mutex_exit();
6209 
6210 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
6211 
6212 	if (err == DB_SUCCESS_LOCKED_REC) {
6213 		err = DB_SUCCESS;
6214 	}
6215 
6216 	return(err);
6217 }
6218 
6219 /*********************************************************************//**
6220 Checks if locks of other transactions prevent an immediate modify (delete
6221 mark or delete unmark) of a secondary index record.
6222 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
6223 dberr_t
lock_sec_rec_modify_check_and_lock(ulint flags,buf_block_t * block,const rec_t * rec,dict_index_t * index,que_thr_t * thr,mtr_t * mtr)6224 lock_sec_rec_modify_check_and_lock(
6225 /*===============================*/
6226 	ulint		flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6227 				bit is set, does nothing */
6228 	buf_block_t*	block,	/*!< in/out: buffer block of rec */
6229 	const rec_t*	rec,	/*!< in: record which should be
6230 				modified; NOTE: as this is a secondary
6231 				index, we always have to modify the
6232 				clustered index record first: see the
6233 				comment below */
6234 	dict_index_t*	index,	/*!< in: secondary index */
6235 	que_thr_t*	thr,	/*!< in: query thread
6236 				(can be NULL if BTR_NO_LOCKING_FLAG) */
6237 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
6238 {
6239 	dberr_t	err;
6240 	ulint	heap_no;
6241 
6242 	ut_ad(!dict_index_is_clust(index));
6243 	ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
6244 	ut_ad(block->frame == page_align(rec));
6245 	ut_ad(mtr->is_named_space(index->space));
6246 
6247 	if (flags & BTR_NO_LOCKING_FLAG) {
6248 
6249 		return(DB_SUCCESS);
6250 	}
6251 	ut_ad(!dict_table_is_temporary(index->table));
6252 
6253 	heap_no = page_rec_get_heap_no(rec);
6254 
6255 	/* Another transaction cannot have an implicit lock on the record,
6256 	because when we come here, we already have modified the clustered
6257 	index record, and this would not have been possible if another active
6258 	transaction had modified this secondary index record. */
6259 
6260 	lock_mutex_enter();
6261 
6262 	ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
6263 
6264 	err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP,
6265 			    block, heap_no, index, thr);
6266 
6267 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
6268 
6269 	lock_mutex_exit();
6270 
6271 #ifdef UNIV_DEBUG
6272 	{
6273 		mem_heap_t*	heap		= NULL;
6274 		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
6275 		const ulint*	offsets;
6276 		rec_offs_init(offsets_);
6277 
6278 		offsets = rec_get_offsets(rec, index, offsets_,
6279 					  ULINT_UNDEFINED, &heap);
6280 
6281 		ut_ad(lock_rec_queue_validate(
6282 			FALSE, block, rec, index, offsets));
6283 
6284 		if (heap != NULL) {
6285 			mem_heap_free(heap);
6286 		}
6287 	}
6288 #endif /* UNIV_DEBUG */
6289 
6290 	if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) {
6291 		/* Update the page max trx id field */
6292 		/* It might not be necessary to do this if
6293 		err == DB_SUCCESS (no new lock created),
6294 		but it should not cost too much performance. */
6295 		page_update_max_trx_id(block,
6296 				       buf_block_get_page_zip(block),
6297 				       thr_get_trx(thr)->id, mtr);
6298 		err = DB_SUCCESS;
6299 	}
6300 
6301 	return(err);
6302 }
6303 
6304 /*********************************************************************//**
6305 Like lock_clust_rec_read_check_and_lock(), but reads a
6306 secondary index record.
6307 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
6308 or DB_QUE_THR_SUSPENDED */
6309 dberr_t
lock_sec_rec_read_check_and_lock(ulint flags,const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,lock_mode mode,ulint gap_mode,que_thr_t * thr)6310 lock_sec_rec_read_check_and_lock(
6311 /*=============================*/
6312 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6313 					bit is set, does nothing */
6314 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6315 	const rec_t*		rec,	/*!< in: user record or page
6316 					supremum record which should
6317 					be read or passed over by a
6318 					read cursor */
6319 	dict_index_t*		index,	/*!< in: secondary index */
6320 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6321 	lock_mode		mode,	/*!< in: mode of the lock which
6322 					the read cursor should set on
6323 					records: LOCK_S or LOCK_X; the
6324 					latter is possible in
6325 					SELECT FOR UPDATE */
6326 	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
6327 					LOCK_REC_NOT_GAP */
6328 	que_thr_t*		thr)	/*!< in: query thread */
6329 {
6330 	dberr_t	err;
6331 	ulint	heap_no;
6332 
6333 	ut_ad(!dict_index_is_clust(index));
6334 	ut_ad(!dict_index_is_online_ddl(index));
6335 	ut_ad(block->frame == page_align(rec));
6336 	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
6337 	ut_ad(rec_offs_validate(rec, index, offsets));
6338 	ut_ad(mode == LOCK_X || mode == LOCK_S);
6339 
6340 	if ((flags & BTR_NO_LOCKING_FLAG)
6341 	    || srv_read_only_mode
6342 	    || dict_table_is_temporary(index->table)) {
6343 
6344 		return(DB_SUCCESS);
6345 	}
6346 
6347 	heap_no = page_rec_get_heap_no(rec);
6348 
6349 	/* Some transaction may have an implicit x-lock on the record only
6350 	if the max trx id for the page >= min trx id for the trx list or a
6351 	database recovery is running. */
6352 
6353 	if ((page_get_max_trx_id(block->frame) >= trx_rw_min_trx_id()
6354 	     || recv_recovery_is_on())
6355 	    && !page_rec_is_supremum(rec)) {
6356 
6357 		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
6358 	}
6359 
6360 	lock_mutex_enter();
6361 
6362 	ut_ad(mode != LOCK_X
6363 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
6364 	ut_ad(mode != LOCK_S
6365 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
6366 
6367 	err = lock_rec_lock(FALSE, mode | gap_mode,
6368 			    block, heap_no, index, thr);
6369 
6370 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
6371 
6372 	lock_mutex_exit();
6373 
6374 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
6375 
6376 	return(err);
6377 }
6378 
6379 /*********************************************************************//**
6380 Checks if locks of other transactions prevent an immediate read, or passing
6381 over by a read cursor, of a clustered index record. If they do, first tests
6382 if the query thread should anyway be suspended for some reason; if not, then
6383 puts the transaction and the query thread to the lock wait state and inserts a
6384 waiting request for a record lock to the lock queue. Sets the requested mode
6385 lock on the record.
6386 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
6387 or DB_QUE_THR_SUSPENDED */
6388 dberr_t
lock_clust_rec_read_check_and_lock(ulint flags,const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,lock_mode mode,ulint gap_mode,que_thr_t * thr)6389 lock_clust_rec_read_check_and_lock(
6390 /*===============================*/
6391 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6392 					bit is set, does nothing */
6393 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6394 	const rec_t*		rec,	/*!< in: user record or page
6395 					supremum record which should
6396 					be read or passed over by a
6397 					read cursor */
6398 	dict_index_t*		index,	/*!< in: clustered index */
6399 	const ulint*		offsets,/*!< in: rec_get_offsets(rec, index) */
6400 	lock_mode		mode,	/*!< in: mode of the lock which
6401 					the read cursor should set on
6402 					records: LOCK_S or LOCK_X; the
6403 					latter is possible in
6404 					SELECT FOR UPDATE */
6405 	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
6406 					LOCK_REC_NOT_GAP */
6407 	que_thr_t*		thr)	/*!< in: query thread */
6408 {
6409 	dberr_t	err;
6410 	ulint	heap_no;
6411 
6412 	ut_ad(dict_index_is_clust(index));
6413 	ut_ad(block->frame == page_align(rec));
6414 	ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
6415 	ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP
6416 	      || gap_mode == LOCK_REC_NOT_GAP);
6417 	ut_ad(rec_offs_validate(rec, index, offsets));
6418 
6419 	if ((flags & BTR_NO_LOCKING_FLAG)
6420 	    || srv_read_only_mode
6421 	    || dict_table_is_temporary(index->table)) {
6422 
6423 		return(DB_SUCCESS);
6424 	}
6425 
6426 	heap_no = page_rec_get_heap_no(rec);
6427 
6428 	if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
6429 
6430 		lock_rec_convert_impl_to_expl(block, rec, index, offsets);
6431 	}
6432 
6433 	lock_mutex_enter();
6434 
6435 	ut_ad(mode != LOCK_X
6436 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
6437 	ut_ad(mode != LOCK_S
6438 	      || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
6439 
6440 	err = lock_rec_lock(FALSE, mode | gap_mode, block, heap_no, index, thr);
6441 
6442 	MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
6443 
6444 	lock_mutex_exit();
6445 
6446 	ut_ad(lock_rec_queue_validate(FALSE, block, rec, index, offsets));
6447 
6448 	DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock");
6449 
6450 	return(err);
6451 }
6452 /*********************************************************************//**
6453 Checks if locks of other transactions prevent an immediate read, or passing
6454 over by a read cursor, of a clustered index record. If they do, first tests
6455 if the query thread should anyway be suspended for some reason; if not, then
6456 puts the transaction and the query thread to the lock wait state and inserts a
6457 waiting request for a record lock to the lock queue. Sets the requested mode
6458 lock on the record. This is an alternative version of
6459 lock_clust_rec_read_check_and_lock() that does not require the parameter
6460 "offsets".
6461 @return DB_SUCCESS, DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
6462 dberr_t
lock_clust_rec_read_check_and_lock_alt(ulint flags,const buf_block_t * block,const rec_t * rec,dict_index_t * index,lock_mode mode,ulint gap_mode,que_thr_t * thr)6463 lock_clust_rec_read_check_and_lock_alt(
6464 /*===================================*/
6465 	ulint			flags,	/*!< in: if BTR_NO_LOCKING_FLAG
6466 					bit is set, does nothing */
6467 	const buf_block_t*	block,	/*!< in: buffer block of rec */
6468 	const rec_t*		rec,	/*!< in: user record or page
6469 					supremum record which should
6470 					be read or passed over by a
6471 					read cursor */
6472 	dict_index_t*		index,	/*!< in: clustered index */
6473 	lock_mode		mode,	/*!< in: mode of the lock which
6474 					the read cursor should set on
6475 					records: LOCK_S or LOCK_X; the
6476 					latter is possible in
6477 					SELECT FOR UPDATE */
6478 	ulint			gap_mode,/*!< in: LOCK_ORDINARY, LOCK_GAP, or
6479 					LOCK_REC_NOT_GAP */
6480 	que_thr_t*		thr)	/*!< in: query thread */
6481 {
6482 	mem_heap_t*	tmp_heap	= NULL;
6483 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
6484 	ulint*		offsets		= offsets_;
6485 	dberr_t		err;
6486 	rec_offs_init(offsets_);
6487 
6488 	offsets = rec_get_offsets(rec, index, offsets,
6489 				  ULINT_UNDEFINED, &tmp_heap);
6490 	err = lock_clust_rec_read_check_and_lock(flags, block, rec, index,
6491 						 offsets, mode, gap_mode, thr);
6492 	if (tmp_heap) {
6493 		mem_heap_free(tmp_heap);
6494 	}
6495 
6496 	if (err == DB_SUCCESS_LOCKED_REC) {
6497 		err = DB_SUCCESS;
6498 	}
6499 
6500 	return(err);
6501 }
6502 
6503 /*******************************************************************//**
6504 Release the last lock from the transaction's autoinc locks. */
6505 UNIV_INLINE
6506 void
lock_release_autoinc_last_lock(ib_vector_t * autoinc_locks)6507 lock_release_autoinc_last_lock(
6508 /*===========================*/
6509 	ib_vector_t*	autoinc_locks)	/*!< in/out: vector of AUTOINC locks */
6510 {
6511 	ulint		last;
6512 	lock_t*		lock;
6513 
6514 	ut_ad(lock_mutex_own());
6515 	ut_a(!ib_vector_is_empty(autoinc_locks));
6516 
6517 	/* The lock to be release must be the last lock acquired. */
6518 	last = ib_vector_size(autoinc_locks) - 1;
6519 	lock = *static_cast<lock_t**>(ib_vector_get(autoinc_locks, last));
6520 
6521 	/* Should have only AUTOINC locks in the vector. */
6522 	ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
6523 	ut_a(lock_get_type(lock) == LOCK_TABLE);
6524 
6525 	ut_a(lock->un_member.tab_lock.table != NULL);
6526 
6527 	/* This will remove the lock from the trx autoinc_locks too. */
6528 	lock_table_dequeue(lock);
6529 
6530 	/* Remove from the table vector too. */
6531 	lock_trx_table_locks_remove(lock);
6532 }
6533 
6534 /*******************************************************************//**
6535 Check if a transaction holds any autoinc locks.
6536 @return TRUE if the transaction holds any AUTOINC locks. */
6537 static
6538 ibool
lock_trx_holds_autoinc_locks(const trx_t * trx)6539 lock_trx_holds_autoinc_locks(
6540 /*=========================*/
6541 	const trx_t*	trx)		/*!< in: transaction */
6542 {
6543 	ut_a(trx->autoinc_locks != NULL);
6544 
6545 	return(!ib_vector_is_empty(trx->autoinc_locks));
6546 }
6547 
6548 /*******************************************************************//**
6549 Release all the transaction's autoinc locks. */
6550 static
6551 void
lock_release_autoinc_locks(trx_t * trx)6552 lock_release_autoinc_locks(
6553 /*=======================*/
6554 	trx_t*		trx)		/*!< in/out: transaction */
6555 {
6556 	ut_ad(lock_mutex_own());
6557 	/* If this is invoked for a running transaction by the thread
6558 	that is serving the transaction, then it is not necessary to
6559 	hold trx->mutex here. */
6560 
6561 	ut_a(trx->autoinc_locks != NULL);
6562 
6563 	/* We release the locks in the reverse order. This is to
6564 	avoid searching the vector for the element to delete at
6565 	the lower level. See (lock_table_remove_low()) for details. */
6566 	while (!ib_vector_is_empty(trx->autoinc_locks)) {
6567 
6568 		/* lock_table_remove_low() will also remove the lock from
6569 		the transaction's autoinc_locks vector. */
6570 		lock_release_autoinc_last_lock(trx->autoinc_locks);
6571 	}
6572 
6573 	/* Should release all locks. */
6574 	ut_a(ib_vector_is_empty(trx->autoinc_locks));
6575 }
6576 
6577 /*******************************************************************//**
6578 Gets the type of a lock. Non-inline version for using outside of the
6579 lock module.
6580 @return LOCK_TABLE or LOCK_REC */
6581 ulint
lock_get_type(const lock_t * lock)6582 lock_get_type(
6583 /*==========*/
6584 	const lock_t*	lock)	/*!< in: lock */
6585 {
6586 	return(lock_get_type_low(lock));
6587 }
6588 
6589 /*******************************************************************//**
6590 Gets the id of the transaction owning a lock.
6591 @return transaction id */
6592 trx_id_t
lock_get_trx_id(const lock_t * lock)6593 lock_get_trx_id(
6594 /*============*/
6595 	const lock_t*	lock)	/*!< in: lock */
6596 {
6597 	return(trx_get_id_for_print(lock->trx));
6598 }
6599 
6600 /*******************************************************************//**
6601 Gets the mode of a lock in a human readable string.
6602 The string should not be free()'d or modified.
6603 @return lock mode */
6604 const char*
lock_get_mode_str(const lock_t * lock)6605 lock_get_mode_str(
6606 /*==============*/
6607 	const lock_t*	lock)	/*!< in: lock */
6608 {
6609 	ibool	is_gap_lock;
6610 
6611 	is_gap_lock = lock_get_type_low(lock) == LOCK_REC
6612 		&& lock_rec_get_gap(lock);
6613 
6614 	switch (lock_get_mode(lock)) {
6615 	case LOCK_S:
6616 		if (is_gap_lock) {
6617 			return("S,GAP");
6618 		} else {
6619 			return("S");
6620 		}
6621 	case LOCK_X:
6622 		if (is_gap_lock) {
6623 			return("X,GAP");
6624 		} else {
6625 			return("X");
6626 		}
6627 	case LOCK_IS:
6628 		if (is_gap_lock) {
6629 			return("IS,GAP");
6630 		} else {
6631 			return("IS");
6632 		}
6633 	case LOCK_IX:
6634 		if (is_gap_lock) {
6635 			return("IX,GAP");
6636 		} else {
6637 			return("IX");
6638 		}
6639 	case LOCK_AUTO_INC:
6640 		return("AUTO_INC");
6641 	default:
6642 		return("UNKNOWN");
6643 	}
6644 }
6645 
6646 /*******************************************************************//**
6647 Gets the type of a lock in a human readable string.
6648 The string should not be free()'d or modified.
6649 @return lock type */
6650 const char*
lock_get_type_str(const lock_t * lock)6651 lock_get_type_str(
6652 /*==============*/
6653 	const lock_t*	lock)	/*!< in: lock */
6654 {
6655 	switch (lock_get_type_low(lock)) {
6656 	case LOCK_REC:
6657 		return("RECORD");
6658 	case LOCK_TABLE:
6659 		return("TABLE");
6660 	default:
6661 		return("UNKNOWN");
6662 	}
6663 }
6664 
6665 /*******************************************************************//**
6666 Gets the table on which the lock is.
6667 @return table */
6668 UNIV_INLINE
6669 dict_table_t*
lock_get_table(const lock_t * lock)6670 lock_get_table(
6671 /*===========*/
6672 	const lock_t*	lock)	/*!< in: lock */
6673 {
6674 	switch (lock_get_type_low(lock)) {
6675 	case LOCK_REC:
6676 		ut_ad(dict_index_is_clust(lock->index)
6677 		      || !dict_index_is_online_ddl(lock->index));
6678 		return(lock->index->table);
6679 	case LOCK_TABLE:
6680 		return(lock->un_member.tab_lock.table);
6681 	default:
6682 		ut_error;
6683 		return(NULL);
6684 	}
6685 }
6686 
6687 /*******************************************************************//**
6688 Gets the id of the table on which the lock is.
6689 @return id of the table */
6690 table_id_t
lock_get_table_id(const lock_t * lock)6691 lock_get_table_id(
6692 /*==============*/
6693 	const lock_t*	lock)	/*!< in: lock */
6694 {
6695 	dict_table_t*	table;
6696 
6697 	table = lock_get_table(lock);
6698 
6699 	return(table->id);
6700 }
6701 
6702 /** Determine which table a lock is associated with.
6703 @param[in]	lock	the lock
6704 @return name of the table */
6705 const table_name_t&
lock_get_table_name(const lock_t * lock)6706 lock_get_table_name(
6707 	const lock_t*	lock)
6708 {
6709 	return(lock_get_table(lock)->name);
6710 }
6711 
6712 /*******************************************************************//**
6713 For a record lock, gets the index on which the lock is.
6714 @return index */
6715 const dict_index_t*
lock_rec_get_index(const lock_t * lock)6716 lock_rec_get_index(
6717 /*===============*/
6718 	const lock_t*	lock)	/*!< in: lock */
6719 {
6720 	ut_a(lock_get_type_low(lock) == LOCK_REC);
6721 	ut_ad(dict_index_is_clust(lock->index)
6722 	      || !dict_index_is_online_ddl(lock->index));
6723 
6724 	return(lock->index);
6725 }
6726 
6727 /*******************************************************************//**
6728 For a record lock, gets the name of the index on which the lock is.
6729 The string should not be free()'d or modified.
6730 @return name of the index */
6731 const char*
lock_rec_get_index_name(const lock_t * lock)6732 lock_rec_get_index_name(
6733 /*====================*/
6734 	const lock_t*	lock)	/*!< in: lock */
6735 {
6736 	ut_a(lock_get_type_low(lock) == LOCK_REC);
6737 	ut_ad(dict_index_is_clust(lock->index)
6738 	      || !dict_index_is_online_ddl(lock->index));
6739 
6740 	return(lock->index->name);
6741 }
6742 
6743 /*******************************************************************//**
6744 For a record lock, gets the tablespace number on which the lock is.
6745 @return tablespace number */
6746 ulint
lock_rec_get_space_id(const lock_t * lock)6747 lock_rec_get_space_id(
6748 /*==================*/
6749 	const lock_t*	lock)	/*!< in: lock */
6750 {
6751 	ut_a(lock_get_type_low(lock) == LOCK_REC);
6752 
6753 	return(lock->un_member.rec_lock.space);
6754 }
6755 
6756 /*******************************************************************//**
6757 For a record lock, gets the page number on which the lock is.
6758 @return page number */
6759 ulint
lock_rec_get_page_no(const lock_t * lock)6760 lock_rec_get_page_no(
6761 /*=================*/
6762 	const lock_t*	lock)	/*!< in: lock */
6763 {
6764 	ut_a(lock_get_type_low(lock) == LOCK_REC);
6765 
6766 	return(lock->un_member.rec_lock.page_no);
6767 }
6768 
6769 /*********************************************************************//**
6770 Cancels a waiting lock request and releases possible other transactions
6771 waiting behind it. */
6772 void
lock_cancel_waiting_and_release(lock_t * lock)6773 lock_cancel_waiting_and_release(
6774 /*============================*/
6775 	lock_t*	lock)	/*!< in/out: waiting lock request */
6776 {
6777 	que_thr_t*	thr;
6778 
6779 	ut_ad(lock_mutex_own());
6780 	ut_ad(trx_mutex_own(lock->trx));
6781 
6782 	lock->trx->lock.cancel = true;
6783 
6784 	if (lock_get_type_low(lock) == LOCK_REC) {
6785 
6786 		lock_rec_dequeue_from_page(lock);
6787 	} else {
6788 		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
6789 
6790 		if (lock->trx->autoinc_locks != NULL) {
6791 			/* Release the transaction's AUTOINC locks. */
6792 			lock_release_autoinc_locks(lock->trx);
6793 		}
6794 
6795 		lock_table_dequeue(lock);
6796 	}
6797 
6798 	/* Reset the wait flag and the back pointer to lock in trx. */
6799 
6800 	lock_reset_lock_and_trx_wait(lock);
6801 
6802 	/* The following function releases the trx from lock wait. */
6803 
6804 	thr = que_thr_end_lock_wait(lock->trx);
6805 
6806 	if (thr != NULL) {
6807 		lock_wait_release_thread_if_suspended(thr);
6808 	}
6809 
6810 	lock->trx->lock.cancel = false;
6811 }
6812 
6813 /*********************************************************************//**
6814 Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
6815 function should be called at the the end of an SQL statement, by the
6816 connection thread that owns the transaction (trx->mysql_thd). */
6817 void
lock_unlock_table_autoinc(trx_t * trx)6818 lock_unlock_table_autoinc(
6819 /*======================*/
6820 	trx_t*	trx)	/*!< in/out: transaction */
6821 {
6822 	ut_ad(!lock_mutex_own());
6823 	ut_ad(!trx_mutex_own(trx));
6824 	ut_ad(!trx->lock.wait_lock);
6825 
6826 	/* This can be invoked on NOT_STARTED, ACTIVE, PREPARED,
6827 	but not COMMITTED transactions. */
6828 
6829 	ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED)
6830 	      || trx_state_eq(trx, TRX_STATE_FORCED_ROLLBACK)
6831 	      || !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
6832 
6833 	/* This function is invoked for a running transaction by the
6834 	thread that is serving the transaction. Therefore it is not
6835 	necessary to hold trx->mutex here. */
6836 
6837 	if (lock_trx_holds_autoinc_locks(trx)) {
6838 		lock_mutex_enter();
6839 
6840 		lock_release_autoinc_locks(trx);
6841 
6842 		lock_mutex_exit();
6843 	}
6844 }
6845 
6846 /*********************************************************************//**
6847 Releases a transaction's locks, and releases possible other transactions
6848 waiting because of these locks. Change the state of the transaction to
6849 TRX_STATE_COMMITTED_IN_MEMORY. */
6850 void
lock_trx_release_locks(trx_t * trx)6851 lock_trx_release_locks(
6852 /*===================*/
6853 	trx_t*	trx)	/*!< in/out: transaction */
6854 {
6855 	check_trx_state(trx);
6856 
6857 	if (trx_state_eq(trx, TRX_STATE_PREPARED)) {
6858 
6859 		mutex_enter(&trx_sys->mutex);
6860 
6861 		ut_a(trx_sys->n_prepared_trx > 0);
6862 		--trx_sys->n_prepared_trx;
6863 
6864 		if (trx->is_recovered) {
6865 			ut_a(trx_sys->n_prepared_recovered_trx > 0);
6866 			trx_sys->n_prepared_recovered_trx--;
6867 		}
6868 
6869 		mutex_exit(&trx_sys->mutex);
6870 	} else {
6871 		ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
6872 	}
6873 
6874 	bool	release_lock;
6875 
6876 	release_lock = (UT_LIST_GET_LEN(trx->lock.trx_locks) > 0);
6877 
6878 	/* Don't take lock_sys mutex if trx didn't acquire any lock. */
6879 	if (release_lock) {
6880 
6881 		/* The transition of trx->state to TRX_STATE_COMMITTED_IN_MEMORY
6882 		is protected by both the lock_sys->mutex and the trx->mutex. */
6883 		lock_mutex_enter();
6884 	}
6885 
6886 	trx_mutex_enter(trx);
6887 
6888 	/* The following assignment makes the transaction committed in memory
6889 	and makes its changes to data visible to other transactions.
6890 	NOTE that there is a small discrepancy from the strict formal
6891 	visibility rules here: a human user of the database can see
6892 	modifications made by another transaction T even before the necessary
6893 	log segment has been flushed to the disk. If the database happens to
6894 	crash before the flush, the user has seen modifications from T which
6895 	will never be a committed transaction. However, any transaction T2
6896 	which sees the modifications of the committing transaction T, and
6897 	which also itself makes modifications to the database, will get an lsn
6898 	larger than the committing transaction T. In the case where the log
6899 	flush fails, and T never gets committed, also T2 will never get
6900 	committed. */
6901 
6902 	/*--------------------------------------*/
6903 	trx->state = TRX_STATE_COMMITTED_IN_MEMORY;
6904 	/*--------------------------------------*/
6905 
6906 	if (trx_is_referenced(trx)) {
6907 
6908 		ut_a(release_lock);
6909 
6910 		lock_mutex_exit();
6911 
6912 		while (trx_is_referenced(trx)) {
6913 
6914 			trx_mutex_exit(trx);
6915 
6916 			DEBUG_SYNC_C("waiting_trx_is_not_referenced");
6917 
6918 			/** Doing an implicit to explicit conversion
6919 			should not be expensive. */
6920 			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
6921 
6922 			trx_mutex_enter(trx);
6923 		}
6924 
6925 		trx_mutex_exit(trx);
6926 
6927 		lock_mutex_enter();
6928 
6929 		trx_mutex_enter(trx);
6930 	}
6931 
6932 	ut_ad(!trx_is_referenced(trx));
6933 
6934 	/* If the background thread trx_rollback_or_clean_recovered()
6935 	is still active then there is a chance that the rollback
6936 	thread may see this trx as COMMITTED_IN_MEMORY and goes ahead
6937 	to clean it up calling trx_cleanup_at_db_startup(). This can
6938 	happen in the case we are committing a trx here that is left
6939 	in PREPARED state during the crash. Note that commit of the
6940 	rollback of a PREPARED trx happens in the recovery thread
6941 	while the rollback of other transactions happen in the
6942 	background thread. To avoid this race we unconditionally unset
6943 	the is_recovered flag. */
6944 
6945 	trx->is_recovered = false;
6946 
6947 	trx_mutex_exit(trx);
6948 
6949 	if (release_lock) {
6950 
6951 		lock_release(trx);
6952 
6953 		lock_mutex_exit();
6954 	}
6955 
6956 	trx->lock.n_rec_locks = 0;
6957 
6958 	/* We don't remove the locks one by one from the vector for
6959 	efficiency reasons. We simply reset it because we would have
6960 	released all the locks anyway. */
6961 
6962 	trx->lock.table_locks.clear();
6963 
6964 	ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
6965 	ut_a(ib_vector_is_empty(trx->autoinc_locks));
6966 	ut_a(trx->lock.table_locks.empty());
6967 
6968 	mem_heap_empty(trx->lock.lock_heap);
6969 }
6970 
6971 /*********************************************************************//**
6972 Check whether the transaction has already been rolled back because it
6973 was selected as a deadlock victim, or if it has to wait then cancel
6974 the wait lock.
6975 @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
6976 dberr_t
lock_trx_handle_wait(trx_t * trx)6977 lock_trx_handle_wait(
6978 /*=================*/
6979 	trx_t*	trx)	/*!< in/out: trx lock state */
6980 {
6981 	dberr_t	err;
6982 
6983 	lock_mutex_enter();
6984 
6985 	trx_mutex_enter(trx);
6986 
6987 	if (trx->lock.was_chosen_as_deadlock_victim) {
6988 		err = DB_DEADLOCK;
6989 	} else if (trx->lock.wait_lock != NULL) {
6990 		lock_cancel_waiting_and_release(trx->lock.wait_lock);
6991 		err = DB_LOCK_WAIT;
6992 	} else {
6993 		/* The lock was probably granted before we got here. */
6994 		err = DB_SUCCESS;
6995 	}
6996 
6997 	lock_mutex_exit();
6998 
6999 	trx_mutex_exit(trx);
7000 
7001 	return(err);
7002 }
7003 
7004 /*********************************************************************//**
7005 Get the number of locks on a table.
7006 @return number of locks */
7007 ulint
lock_table_get_n_locks(const dict_table_t * table)7008 lock_table_get_n_locks(
7009 /*===================*/
7010 	const dict_table_t*	table)	/*!< in: table */
7011 {
7012 	ulint		n_table_locks;
7013 
7014 	lock_mutex_enter();
7015 
7016 	n_table_locks = UT_LIST_GET_LEN(table->locks);
7017 
7018 	lock_mutex_exit();
7019 
7020 	return(n_table_locks);
7021 }
7022 
7023 #ifdef UNIV_DEBUG
7024 /*******************************************************************//**
7025 Do an exhaustive check for any locks (table or rec) against the table.
7026 @return lock if found */
7027 static
7028 const lock_t*
lock_table_locks_lookup(const dict_table_t * table,const trx_ut_list_t * trx_list)7029 lock_table_locks_lookup(
7030 /*====================*/
7031 	const dict_table_t*	table,		/*!< in: check if there are
7032 						any locks held on records in
7033 						this table or on the table
7034 						itself */
7035 	const trx_ut_list_t*	trx_list)	/*!< in: trx list to check */
7036 {
7037 	trx_t*			trx;
7038 
7039 	ut_a(table != NULL);
7040 	ut_ad(lock_mutex_own());
7041 	ut_ad(trx_sys_mutex_own());
7042 
7043 	for (trx = UT_LIST_GET_FIRST(*trx_list);
7044 	     trx != NULL;
7045 	     trx = UT_LIST_GET_NEXT(trx_list, trx)) {
7046 
7047 		const lock_t*	lock;
7048 
7049 		check_trx_state(trx);
7050 
7051 		for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
7052 		     lock != NULL;
7053 		     lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
7054 
7055 			ut_a(lock->trx == trx);
7056 
7057 			if (lock_get_type_low(lock) == LOCK_REC) {
7058 				ut_ad(!dict_index_is_online_ddl(lock->index)
7059 				      || dict_index_is_clust(lock->index));
7060 				if (lock->index->table == table) {
7061 					return(lock);
7062 				}
7063 			} else if (lock->un_member.tab_lock.table == table) {
7064 				return(lock);
7065 			}
7066 		}
7067 	}
7068 
7069 	return(NULL);
7070 }
7071 #endif /* UNIV_DEBUG */
7072 
7073 /*******************************************************************//**
7074 Check if there are any locks (table or rec) against table.
7075 @return true if table has either table or record locks. */
7076 bool
lock_table_has_locks(const dict_table_t * table)7077 lock_table_has_locks(
7078 /*=================*/
7079 	const dict_table_t*	table)	/*!< in: check if there are any locks
7080 					held on records in this table or on the
7081 					table itself */
7082 {
7083 	ibool			has_locks;
7084 
7085 	lock_mutex_enter();
7086 
7087 	has_locks = UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks > 0;
7088 
7089 #ifdef UNIV_DEBUG
7090 	if (!has_locks) {
7091 		mutex_enter(&trx_sys->mutex);
7092 
7093 		ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
7094 
7095 		mutex_exit(&trx_sys->mutex);
7096 	}
7097 #endif /* UNIV_DEBUG */
7098 
7099 	lock_mutex_exit();
7100 
7101 	return(has_locks);
7102 }
7103 
7104 /*******************************************************************//**
7105 Initialise the table lock list. */
7106 void
lock_table_lock_list_init(table_lock_list_t * lock_list)7107 lock_table_lock_list_init(
7108 /*======================*/
7109 	table_lock_list_t*	lock_list)	/*!< List to initialise */
7110 {
7111 	UT_LIST_INIT(*lock_list, &lock_table_t::locks);
7112 }
7113 
7114 /*******************************************************************//**
7115 Initialise the trx lock list. */
7116 void
lock_trx_lock_list_init(trx_lock_list_t * lock_list)7117 lock_trx_lock_list_init(
7118 /*====================*/
7119 	trx_lock_list_t*	lock_list)	/*!< List to initialise */
7120 {
7121 	UT_LIST_INIT(*lock_list, &lock_t::trx_locks);
7122 }
7123 
7124 #ifdef UNIV_DEBUG
7125 /*******************************************************************//**
7126 Check if the transaction holds any locks on the sys tables
7127 or its records.
7128 @return the strongest lock found on any sys table or 0 for none */
7129 const lock_t*
lock_trx_has_sys_table_locks(const trx_t * trx)7130 lock_trx_has_sys_table_locks(
7131 /*=========================*/
7132 	const trx_t*	trx)	/*!< in: transaction to check */
7133 {
7134 	const lock_t*	strongest_lock = 0;
7135 	lock_mode	strongest = LOCK_NONE;
7136 
7137 	lock_mutex_enter();
7138 
7139 	typedef lock_pool_t::const_reverse_iterator iterator;
7140 
7141 	iterator	end = trx->lock.table_locks.rend();
7142 	iterator	it = trx->lock.table_locks.rbegin();
7143 
7144 	/* Find a valid mode. Note: ib_vector_size() can be 0. */
7145 
7146 	for (/* No op */; it != end; ++it) {
7147 		const lock_t*	lock = *it;
7148 
7149 		if (lock != NULL
7150 		    && dict_is_sys_table(lock->un_member.tab_lock.table->id)) {
7151 
7152 			strongest = lock_get_mode(lock);
7153 			ut_ad(strongest != LOCK_NONE);
7154 			strongest_lock = lock;
7155 			break;
7156 		}
7157 	}
7158 
7159 	if (strongest == LOCK_NONE) {
7160 		lock_mutex_exit();
7161 		return(NULL);
7162 	}
7163 
7164 	for (/* No op */; it != end; ++it) {
7165 		const lock_t*	lock = *it;
7166 
7167 		if (lock == NULL) {
7168 			continue;
7169 		}
7170 
7171 		ut_ad(trx == lock->trx);
7172 		ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
7173 		ut_ad(lock->un_member.tab_lock.table != NULL);
7174 
7175 		lock_mode	mode = lock_get_mode(lock);
7176 
7177 		if (dict_is_sys_table(lock->un_member.tab_lock.table->id)
7178 		    && lock_mode_stronger_or_eq(mode, strongest)) {
7179 
7180 			strongest = mode;
7181 			strongest_lock = lock;
7182 		}
7183 	}
7184 
7185 	lock_mutex_exit();
7186 
7187 	return(strongest_lock);
7188 }
7189 
7190 /*******************************************************************//**
7191 Check if the transaction holds an exclusive lock on a record.
7192 @return whether the locks are held */
7193 bool
lock_trx_has_rec_x_lock(const trx_t * trx,const dict_table_t * table,const buf_block_t * block,ulint heap_no)7194 lock_trx_has_rec_x_lock(
7195 /*====================*/
7196 	const trx_t*		trx,	/*!< in: transaction to check */
7197 	const dict_table_t*	table,	/*!< in: table to check */
7198 	const buf_block_t*	block,	/*!< in: buffer block of the record */
7199 	ulint			heap_no)/*!< in: record heap number */
7200 {
7201 	ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
7202 
7203 	lock_mutex_enter();
7204 	ut_a(lock_table_has(trx, table, LOCK_IX)
7205 	     || dict_table_is_temporary(table));
7206 	ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP,
7207 			       block, heap_no, trx)
7208 	     || dict_table_is_temporary(table));
7209 	lock_mutex_exit();
7210 	return(true);
7211 }
7212 #endif /* UNIV_DEBUG */
7213 
7214 /** rewind(3) the file used for storing the latest detected deadlock and
7215 print a heading message to stderr if printing of all deadlocks to stderr
7216 is enabled. */
7217 void
start_print()7218 DeadlockChecker::start_print()
7219 {
7220 	ut_ad(lock_mutex_own());
7221 
7222 	rewind(lock_latest_err_file);
7223 	ut_print_timestamp(lock_latest_err_file);
7224 
7225 	if (srv_print_all_deadlocks) {
7226 		ib::info() << "Transactions deadlock detected, dumping"
7227 			<< " detailed information.";
7228 	}
7229 }
7230 
7231 /** Print a message to the deadlock file and possibly to stderr.
7232 @param msg message to print */
7233 void
print(const char * msg)7234 DeadlockChecker::print(const char* msg)
7235 {
7236 	fputs(msg, lock_latest_err_file);
7237 
7238 	if (srv_print_all_deadlocks) {
7239 		ib::info() << msg;
7240 	}
7241 }
7242 
7243 /** Print transaction data to the deadlock file and possibly to stderr.
7244 @param trx transaction
7245 @param max_query_len max query length to print */
7246 void
print(const trx_t * trx,ulint max_query_len)7247 DeadlockChecker::print(const trx_t* trx, ulint max_query_len)
7248 {
7249 	ut_ad(lock_mutex_own());
7250 
7251 	ulint	n_rec_locks = lock_number_of_rows_locked(&trx->lock);
7252 	ulint	n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
7253 	ulint	heap_size = mem_heap_get_size(trx->lock.lock_heap);
7254 
7255 	mutex_enter(&trx_sys->mutex);
7256 
7257 	trx_print_low(lock_latest_err_file, trx, max_query_len,
7258 		      n_rec_locks, n_trx_locks, heap_size);
7259 
7260 	if (srv_print_all_deadlocks) {
7261 		trx_print_low(stderr, trx, max_query_len,
7262 			      n_rec_locks, n_trx_locks, heap_size);
7263 	}
7264 
7265 	mutex_exit(&trx_sys->mutex);
7266 }
7267 
7268 /** Print lock data to the deadlock file and possibly to stderr.
7269 @param lock record or table type lock */
7270 void
print(const lock_t * lock)7271 DeadlockChecker::print(const lock_t* lock)
7272 {
7273 	ut_ad(lock_mutex_own());
7274 
7275 	if (lock_get_type_low(lock) == LOCK_REC) {
7276 		lock_rec_print(lock_latest_err_file, lock);
7277 
7278 		if (srv_print_all_deadlocks) {
7279 			lock_rec_print(stderr, lock);
7280 		}
7281 	} else {
7282 		lock_table_print(lock_latest_err_file, lock);
7283 
7284 		if (srv_print_all_deadlocks) {
7285 			lock_table_print(stderr, lock);
7286 		}
7287 	}
7288 }
7289 
7290 /** Get the next lock in the queue that is owned by a transaction whose
7291 sub-tree has not already been searched.
7292 Note: "next" here means PREV for table locks.
7293 
7294 @param lock Lock in queue
7295 @param heap_no heap_no if lock is a record lock else ULINT_UNDEFINED
7296 
7297 @return next lock or NULL if at end of queue */
7298 const lock_t*
get_next_lock(const lock_t * lock,ulint heap_no) const7299 DeadlockChecker::get_next_lock(const lock_t* lock, ulint heap_no) const
7300 {
7301 	ut_ad(lock_mutex_own());
7302 
7303 	do {
7304 		if (lock_get_type_low(lock) == LOCK_REC) {
7305 			ut_ad(heap_no != ULINT_UNDEFINED);
7306 			lock = lock_rec_get_next_const(heap_no, lock);
7307 		} else {
7308 			ut_ad(heap_no == ULINT_UNDEFINED);
7309 			ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
7310 
7311 			lock = UT_LIST_GET_NEXT(
7312 				un_member.tab_lock.locks, lock);
7313 		}
7314 
7315 	} while (lock != NULL && is_visited(lock));
7316 
7317 	ut_ad(lock == NULL
7318 	      || lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
7319 
7320 	return(lock);
7321 }
7322 
7323 /** Get the first lock to search. The search starts from the current
7324 wait_lock. What we are really interested in is an edge from the
7325 current wait_lock's owning transaction to another transaction that has
7326 a lock ahead in the queue. We skip locks where the owning transaction's
7327 sub-tree has already been searched.
7328 
7329 Note: The record locks are traversed from the oldest lock to the
7330 latest. For table locks we go from latest to oldest.
7331 
7332 For record locks, we first position the "iterator" on the first lock on
7333 the page and then reposition on the actual heap_no. This is required
7334 due to the way the record lock has is implemented.
7335 
7336 @param[out] heap_no if rec lock, else ULINT_UNDEFINED.
7337 @return first lock or NULL */
7338 const lock_t*
get_first_lock(ulint * heap_no) const7339 DeadlockChecker::get_first_lock(ulint* heap_no) const
7340 {
7341 	ut_ad(lock_mutex_own());
7342 
7343 	const lock_t*	lock = m_wait_lock;
7344 
7345 	if (lock_get_type_low(lock) == LOCK_REC) {
7346 		hash_table_t*	lock_hash;
7347 
7348 		lock_hash = lock->type_mode & LOCK_PREDICATE
7349 			? lock_sys->prdt_hash
7350 			: lock_sys->rec_hash;
7351 
7352 		/* We are only interested in records that match the heap_no. */
7353 		*heap_no = lock_rec_find_set_bit(lock);
7354 
7355 		ut_ad(*heap_no <= 0xffff);
7356 		ut_ad(*heap_no != ULINT_UNDEFINED);
7357 
7358 		/* Find the locks on the page. */
7359 		lock = lock_rec_get_first_on_page_addr(
7360 			lock_hash,
7361 			lock->un_member.rec_lock.space,
7362 			lock->un_member.rec_lock.page_no);
7363 
7364 		/* Position on the first lock on the physical record.*/
7365 		if (!lock_rec_get_nth_bit(lock, *heap_no)) {
7366 			lock = lock_rec_get_next_const(*heap_no, lock);
7367 		}
7368 
7369 		ut_a(!lock_get_wait(lock));
7370 	} else {
7371 		/* Table locks don't care about the heap_no. */
7372 		*heap_no = ULINT_UNDEFINED;
7373 		ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
7374 		dict_table_t*	table = lock->un_member.tab_lock.table;
7375 		lock = UT_LIST_GET_FIRST(table->locks);
7376 	}
7377 
7378 	/* Must find at least two locks, otherwise there cannot be a
7379 	waiting lock, secondly the first lock cannot be the wait_lock. */
7380 	ut_a(lock != NULL);
7381 	ut_a(lock != m_wait_lock);
7382 
7383 	/* Check that the lock type doesn't change. */
7384 	ut_ad(lock_get_type_low(lock) == lock_get_type_low(m_wait_lock));
7385 
7386 	return(lock);
7387 }
7388 
7389 /** Notify that a deadlock has been detected and print the conflicting
7390 transaction info.
7391 @param lock lock causing deadlock */
7392 void
notify(const lock_t * lock) const7393 DeadlockChecker::notify(const lock_t* lock) const
7394 {
7395 	ut_ad(lock_mutex_own());
7396 
7397 	start_print();
7398 
7399 	print("\n*** (1) TRANSACTION:\n");
7400 
7401 	print(m_wait_lock->trx, 3000);
7402 
7403 	print("*** (1) WAITING FOR THIS LOCK TO BE GRANTED:\n");
7404 
7405 	print(m_wait_lock);
7406 
7407 	print("*** (2) TRANSACTION:\n");
7408 
7409 	print(lock->trx, 3000);
7410 
7411 	print("*** (2) HOLDS THE LOCK(S):\n");
7412 
7413 	print(lock);
7414 
7415 	/* It is possible that the joining transaction was granted its
7416 	lock when we rolled back some other waiting transaction. */
7417 
7418 	if (m_start->lock.wait_lock != 0) {
7419 		print("*** (2) WAITING FOR THIS LOCK TO BE GRANTED:\n");
7420 
7421 		print(m_start->lock.wait_lock);
7422 	}
7423 
7424 	DBUG_PRINT("ib_lock", ("deadlock detected"));
7425 }
7426 
7427 /** Select the victim transaction that should be rolledback.
7428 @return victim transaction */
7429 const trx_t*
select_victim() const7430 DeadlockChecker::select_victim() const
7431 {
7432 	ut_ad(lock_mutex_own());
7433 	ut_ad(m_start->lock.wait_lock != 0);
7434 	ut_ad(m_wait_lock->trx != m_start);
7435 
7436 	if (thd_trx_priority(m_start->mysql_thd) > 0
7437 	    || thd_trx_priority(m_wait_lock->trx->mysql_thd) > 0) {
7438 
7439 		const trx_t*	victim;
7440 
7441 		victim = trx_arbitrate(m_start, m_wait_lock->trx);
7442 
7443 		if (victim != NULL) {
7444 
7445 			return(victim);
7446 		}
7447 	}
7448 
7449 	if (trx_weight_ge(m_wait_lock->trx, m_start)) {
7450 
7451 		/* The joining transaction is 'smaller',
7452 		choose it as the victim and roll it back. */
7453 
7454 		return(m_start);
7455 	}
7456 
7457 	return(m_wait_lock->trx);
7458 }
7459 
7460 /** Looks iteratively for a deadlock. Note: the joining transaction may
7461 have been granted its lock by the deadlock checks.
7462 @return 0 if no deadlock else the victim transaction instance.*/
7463 const trx_t*
search()7464 DeadlockChecker::search()
7465 {
7466 	ut_ad(lock_mutex_own());
7467 	ut_ad(!trx_mutex_own(m_start));
7468 
7469 	ut_ad(m_start != NULL);
7470 	ut_ad(m_wait_lock != NULL);
7471 	check_trx_state(m_wait_lock->trx);
7472 	ut_ad(m_mark_start <= s_lock_mark_counter);
7473 
7474 	/* Look at the locks ahead of wait_lock in the lock queue. */
7475 	ulint		heap_no;
7476 	const lock_t*	lock = get_first_lock(&heap_no);
7477 
7478 	for (;;) {
7479 
7480 		/* We should never visit the same sub-tree more than once. */
7481 		ut_ad(lock == NULL || !is_visited(lock));
7482 
7483 		while (m_n_elems > 0 && lock == NULL) {
7484 
7485 			/* Restore previous search state. */
7486 
7487 			pop(lock, heap_no);
7488 
7489 			lock = get_next_lock(lock, heap_no);
7490 		}
7491 
7492 		if (lock == NULL) {
7493 			break;
7494 		} else if (lock == m_wait_lock) {
7495 
7496 			/* We can mark this subtree as searched */
7497 			ut_ad(lock->trx->lock.deadlock_mark <= m_mark_start);
7498 
7499 			lock->trx->lock.deadlock_mark = ++s_lock_mark_counter;
7500 
7501 			/* We are not prepared for an overflow. This 64-bit
7502 			counter should never wrap around. At 10^9 increments
7503 			per second, it would take 10^3 years of uptime. */
7504 
7505 			ut_ad(s_lock_mark_counter > 0);
7506 
7507 			/* Backtrack */
7508 			lock = NULL;
7509 
7510 		} else if (!lock_has_to_wait(m_wait_lock, lock)) {
7511 
7512 			/* No conflict, next lock */
7513 			lock = get_next_lock(lock, heap_no);
7514 
7515 		} else if (lock->trx == m_start) {
7516 
7517 			/* Found a cycle. */
7518 
7519 			notify(lock);
7520 
7521 			return(select_victim());
7522 
7523 		} else if (is_too_deep()) {
7524 
7525 			/* Search too deep to continue. */
7526 			m_too_deep = true;
7527 			return(m_start);
7528 
7529 		} else if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
7530 
7531 			/* Another trx ahead has requested a lock in an
7532 			incompatible mode, and is itself waiting for a lock. */
7533 
7534 			++m_cost;
7535 
7536 			if (!push(lock, heap_no)) {
7537 				m_too_deep = true;
7538 				return(m_start);
7539 			}
7540 
7541 
7542 			m_wait_lock = lock->trx->lock.wait_lock;
7543 
7544 			lock = get_first_lock(&heap_no);
7545 
7546 			if (is_visited(lock)) {
7547 				lock = get_next_lock(lock, heap_no);
7548 			}
7549 
7550 		} else {
7551 			lock = get_next_lock(lock, heap_no);
7552 		}
7553 	}
7554 
7555 	ut_a(lock == NULL && m_n_elems == 0);
7556 
7557 	/* No deadlock found. */
7558 	return(0);
7559 }
7560 
7561 /** Print info about transaction that was rolled back.
7562 @param trx transaction rolled back
7563 @param lock lock trx wants */
7564 void
rollback_print(const trx_t * trx,const lock_t * lock)7565 DeadlockChecker::rollback_print(const trx_t*	trx, const lock_t* lock)
7566 {
7567 	ut_ad(lock_mutex_own());
7568 
7569 	/* If the lock search exceeds the max step
7570 	or the max depth, the current trx will be
7571 	the victim. Print its information. */
7572 	start_print();
7573 
7574 	print("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
7575 	      " WAITS-FOR GRAPH, WE WILL ROLL BACK"
7576 	      " FOLLOWING TRANSACTION \n\n"
7577 	      "*** TRANSACTION:\n");
7578 
7579 	print(trx, 3000);
7580 
7581 	print("*** WAITING FOR THIS LOCK TO BE GRANTED:\n");
7582 
7583 	print(lock);
7584 }
7585 
7586 /** Rollback transaction selected as the victim. */
7587 void
trx_rollback()7588 DeadlockChecker::trx_rollback()
7589 {
7590 	ut_ad(lock_mutex_own());
7591 
7592 	trx_t*	trx = m_wait_lock->trx;
7593 
7594 	print("*** WE ROLL BACK TRANSACTION (1)\n");
7595 
7596 	trx_mutex_enter(trx);
7597 
7598 	trx->lock.was_chosen_as_deadlock_victim = true;
7599 
7600 	lock_cancel_waiting_and_release(trx->lock.wait_lock);
7601 
7602 	trx_mutex_exit(trx);
7603 }
7604 
7605 /** Checks if a joining lock request results in a deadlock. If a deadlock is
7606 found this function will resolve the deadlock by choosing a victim transaction
7607 and rolling it back. It will attempt to resolve all deadlocks. The returned
7608 transaction id will be the joining transaction instance or NULL if some other
7609 transaction was chosen as a victim and rolled back or no deadlock found.
7610 
7611 @param[in]	lock lock the transaction is requesting
7612 @param[in,out]	trx transaction requesting the lock
7613 
7614 @return transaction instanace chosen as victim or 0 */
7615 const trx_t*
check_and_resolve(const lock_t * lock,trx_t * trx)7616 DeadlockChecker::check_and_resolve(const lock_t* lock, trx_t* trx)
7617 {
7618 	ut_ad(lock_mutex_own());
7619 	ut_ad(trx_mutex_own(trx));
7620 	check_trx_state(trx);
7621 	ut_ad(!srv_read_only_mode);
7622 
7623 	/* If transaction is marked for ASYNC rollback then we should
7624 	not allow it to wait for another lock causing possible deadlock.
7625 	We return current transaction as deadlock victim here. */
7626 	if (trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC) {
7627 		return(trx);
7628 	} else if (!innobase_deadlock_detect) {
7629 		return(NULL);
7630 	}
7631 
7632 	/*  Release the mutex to obey the latching order.
7633 	This is safe, because DeadlockChecker::check_and_resolve()
7634 	is invoked when a lock wait is enqueued for the currently
7635 	running transaction. Because m_trx is a running transaction
7636 	(it is not currently suspended because of a lock wait),
7637 	its state can only be changed by this thread, which is
7638 	currently associated with the transaction. */
7639 
7640 	trx_mutex_exit(trx);
7641 
7642 	const trx_t*	victim_trx;
7643 
7644 	/* Try and resolve as many deadlocks as possible. */
7645 	do {
7646 		DeadlockChecker	checker(trx, lock, s_lock_mark_counter);
7647 
7648 		victim_trx = checker.search();
7649 
7650 		/* Search too deep, we rollback the joining transaction only
7651 		if it is possible to rollback. Otherwise we rollback the
7652 		transaction that is holding the lock that the joining
7653 		transaction wants. */
7654 		if (checker.is_too_deep()) {
7655 
7656 			ut_ad(trx == checker.m_start);
7657 			ut_ad(trx == victim_trx);
7658 
7659 			rollback_print(victim_trx, lock);
7660 
7661 		} else if (victim_trx != NULL && victim_trx != trx) {
7662 
7663 			ut_ad(victim_trx == checker.m_wait_lock->trx);
7664 
7665 			checker.trx_rollback();
7666 
7667 			lock_deadlock_found = true;
7668 
7669 			MONITOR_INC(MONITOR_DEADLOCK);
7670 		}
7671 
7672 	} while (victim_trx != NULL && victim_trx != trx);
7673 
7674 	/* If the joining transaction was selected as the victim. */
7675 	if (victim_trx != NULL) {
7676 
7677 		print("*** WE ROLL BACK TRANSACTION (2)\n");
7678 
7679 		lock_deadlock_found = true;
7680 
7681 		MONITOR_INC(MONITOR_DEADLOCK);
7682 	}
7683 
7684 	trx_mutex_enter(trx);
7685 
7686 	return(victim_trx);
7687 }
7688 
7689 /**
7690 Allocate cached locks for the transaction.
7691 @param trx		allocate cached record locks for this transaction */
7692 void
lock_trx_alloc_locks(trx_t * trx)7693 lock_trx_alloc_locks(trx_t* trx)
7694 {
7695 	ulint	sz = REC_LOCK_SIZE * REC_LOCK_CACHE;
7696 	byte*	ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
7697 
7698 	/* We allocate one big chunk and then distribute it among
7699 	the rest of the elements. The allocated chunk pointer is always
7700 	at index 0. */
7701 
7702 	for (ulint i = 0; i < REC_LOCK_CACHE; ++i, ptr += REC_LOCK_SIZE) {
7703 		trx->lock.rec_pool.push_back(
7704 			reinterpret_cast<ib_lock_t*>(ptr));
7705 	}
7706 
7707 	sz = TABLE_LOCK_SIZE * TABLE_LOCK_CACHE;
7708 	ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz));
7709 
7710 	for (ulint i = 0; i < TABLE_LOCK_CACHE; ++i, ptr += TABLE_LOCK_SIZE) {
7711 		trx->lock.table_pool.push_back(
7712 			reinterpret_cast<ib_lock_t*>(ptr));
7713 	}
7714 
7715 }
7716