1 /*****************************************************************************
2 
3 Copyright (c) 2007, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file include/lock0priv.h
29 Lock module internal structures and methods.
30 
31 Created July 12, 2007 Vasil Dimov
32 *******************************************************/
33 
34 #ifndef lock0priv_h
35 #define lock0priv_h
36 
37 #ifndef LOCK_MODULE_IMPLEMENTATION
38 /* If you need to access members of the structures defined in this
39 file, please write appropriate functions that retrieve them and put
40 those functions in lock/ */
41 #error Do not include lock0priv.h outside of the lock/ module
42 #endif
43 
44 #include "univ.i"
45 #include "dict0types.h"
46 #include "hash0hash.h"
47 #include "trx0types.h"
48 
49 /** A table lock */
50 struct lock_table_t {
51 	dict_table_t*	table;		/*!< database table in dictionary
52 					cache */
53 	UT_LIST_NODE_T(lock_t)
54 			locks;		/*!< list of locks on the same
55 					table */
56 	/** Print the table lock into the given output stream
57 	@param[in,out]	out	the output stream
58 	@return the given output stream. */
59 	std::ostream& print(std::ostream& out) const;
60 };
61 
62 /** Print the table lock into the given output stream
63 @param[in,out]	out	the output stream
64 @return the given output stream. */
65 inline
print(std::ostream & out)66 std::ostream& lock_table_t::print(std::ostream& out) const
67 {
68 	out << "[lock_table_t: name=" << table->name << "]";
69 	return(out);
70 }
71 
72 /** The global output operator is overloaded to conveniently
73 print the lock_table_t object into the given output stream.
74 @param[in,out]	out	the output stream
75 @param[in]	lock	the table lock
76 @return the given output stream */
77 inline
78 std::ostream&
79 operator<<(std::ostream& out, const lock_table_t& lock)
80 {
81 	return(lock.print(out));
82 }
83 
84 /** Record lock for a page */
85 struct lock_rec_t {
86 	ib_uint32_t	space;		/*!< space id */
87 	ib_uint32_t	page_no;	/*!< page number */
88 	ib_uint32_t	n_bits;		/*!< number of bits in the lock
89 					bitmap; NOTE: the lock bitmap is
90 					placed immediately after the
91 					lock struct */
92 
93 	/** Print the record lock into the given output stream
94 	@param[in,out]	out	the output stream
95 	@return the given output stream. */
96 	std::ostream& print(std::ostream& out) const;
97 };
98 
99 /** Print the record lock into the given output stream
100 @param[in,out]	out	the output stream
101 @return the given output stream. */
102 inline
print(std::ostream & out)103 std::ostream& lock_rec_t::print(std::ostream& out) const
104 {
105 	out << "[lock_rec_t: space=" << space << ", page_no=" << page_no
106 		<< ", n_bits=" << n_bits << "]";
107 	return(out);
108 }
109 
110 inline
111 std::ostream&
112 operator<<(std::ostream& out, const lock_rec_t& lock)
113 {
114 	return(lock.print(out));
115 }
116 
117 /** Lock struct; protected by lock_sys->mutex */
118 struct lock_t {
119 	trx_t*		trx;		/*!< transaction owning the
120 					lock */
121 	UT_LIST_NODE_T(lock_t)
122 			trx_locks;	/*!< list of the locks of the
123 					transaction */
124 
125 	dict_index_t*	index;		/*!< index for a record lock */
126 
127 	lock_t*		hash;		/*!< hash chain node for a record
128 					lock. The link node in a singly linked
129 					list, used during hashing. */
130 
131 	union {
132 		lock_table_t	tab_lock;/*!< table lock */
133 		lock_rec_t	rec_lock;/*!< record lock */
134 	} un_member;			/*!< lock details */
135 
136 	ib_uint32_t	type_mode;	/*!< lock type, mode, LOCK_GAP or
137 					LOCK_REC_NOT_GAP,
138 					LOCK_INSERT_INTENTION,
139 					wait flag, ORed */
140 
141 	/** Remove GAP lock from a next Key Lock */
remove_gap_locklock_t142 	void remove_gap_lock()
143 	{
144 		ut_ad(!is_gap());
145 		ut_ad(!is_insert_intention());
146 		ut_ad(is_record_lock());
147 
148 		type_mode |= LOCK_REC_NOT_GAP;
149 	}
150 
151 	/** Determine if the lock object is a record lock.
152 	@return true if record lock, false otherwise. */
is_record_locklock_t153 	bool is_record_lock() const
154 	{
155 		return(type() == LOCK_REC);
156 	}
157 
158 	/** Determine if it is predicate lock.
159 	@return true if predicate lock, false otherwise. */
is_predicatelock_t160 	bool is_predicate() const
161 	{
162 		return(type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
163 	}
164 
is_waitinglock_t165 	bool is_waiting() const
166 	{
167 		return(type_mode & LOCK_WAIT);
168 	}
169 
is_gaplock_t170 	bool is_gap() const
171 	{
172 		return(type_mode & LOCK_GAP);
173 	}
174 
is_record_not_gaplock_t175 	bool is_record_not_gap() const
176 	{
177 		return(type_mode & LOCK_REC_NOT_GAP);
178 	}
179 
is_insert_intentionlock_t180 	bool is_insert_intention() const
181 	{
182 		return(type_mode & LOCK_INSERT_INTENTION);
183 	}
184 
typelock_t185 	ulint type() const {
186 		return(type_mode & LOCK_TYPE_MASK);
187 	}
188 
modelock_t189 	enum lock_mode mode() const
190 	{
191 		return(static_cast<enum lock_mode>(type_mode & LOCK_MODE_MASK));
192 	}
193 
194 	/** Get lock hash table
195 	@return lock hash table */
hash_tablelock_t196 	hash_table_t* hash_table() const
197 	{
198 		return(lock_hash_get(type_mode));
199 	}
200 
201 	/** Get tablespace ID for the lock
202 	@return space ID */
spacelock_t203 	ulint space() const
204 	{
205 		return(un_member.rec_lock.space);
206 	}
207 
208 	/** Get page number of the lock
209 	@return page number */
page_numberlock_t210 	ulint page_number() const
211 	{
212 		return(un_member.rec_lock.page_no);
213 	}
214 
215 	/** Print the lock object into the given output stream.
216 	@param[in,out]	out	the output stream
217 	@return the given output stream. */
218 	std::ostream& print(std::ostream& out) const;
219 
220 	/** Convert the member 'type_mode' into a human readable string.
221 	@return human readable string */
222 	std::string type_mode_string() const;
223 
type_stringlock_t224 	const char* type_string() const
225 	{
226 		switch (type_mode & LOCK_TYPE_MASK) {
227 		case LOCK_REC:
228 			return("LOCK_REC");
229 		case LOCK_TABLE:
230 			return("LOCK_TABLE");
231 		default:
232 			ut_error;
233 		}
234 	}
235 };
236 
237 /** Convert the member 'type_mode' into a human readable string.
238 @return human readable string */
239 inline
240 std::string
type_mode_string()241 lock_t::type_mode_string() const
242 {
243 	std::ostringstream sout;
244 	sout << type_string();
245 	sout << " | " << lock_mode_string(mode());
246 
247 	if (is_record_not_gap()) {
248 		sout << " | LOCK_REC_NOT_GAP";
249 	}
250 
251 	if (is_waiting()) {
252 		sout << " | LOCK_WAIT";
253 	}
254 
255 	if (is_gap()) {
256 		sout << " | LOCK_GAP";
257 	}
258 
259 	if (is_insert_intention()) {
260 		sout << " | LOCK_INSERT_INTENTION";
261 	}
262 	return(sout.str());
263 }
264 
265 inline
266 std::ostream&
print(std::ostream & out)267 lock_t::print(std::ostream& out) const
268 {
269 	out << "[lock_t: type_mode=" << type_mode << "("
270 		<< type_mode_string() << ")";
271 
272 	if (is_record_lock()) {
273 		out << un_member.rec_lock;
274 	} else {
275 		out << un_member.tab_lock;
276 	}
277 
278 	out << "]";
279 	return(out);
280 }
281 
282 inline
283 std::ostream&
284 operator<<(std::ostream& out, const lock_t& lock)
285 {
286 	return(lock.print(out));
287 }
288 
289 #ifdef UNIV_DEBUG
290 extern ibool	lock_print_waits;
291 #endif /* UNIV_DEBUG */
292 
293 /** Restricts the length of search we will do in the waits-for
294 graph of transactions */
295 static const ulint	LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK = 1000000;
296 
297 /** Restricts the search depth we will do in the waits-for graph of
298 transactions */
299 static const ulint	LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK = 200;
300 
301 /** When releasing transaction locks, this specifies how often we release
302 the lock mutex for a moment to give also others access to it */
303 static const ulint	LOCK_RELEASE_INTERVAL = 1000;
304 
305 /* Safety margin when creating a new record lock: this many extra records
306 can be inserted to the page without need to create a lock with a bigger
307 bitmap */
308 
309 static const ulint	LOCK_PAGE_BITMAP_MARGIN = 64;
310 
311 /* An explicit record lock affects both the record and the gap before it.
312 An implicit x-lock does not affect the gap, it only locks the index
313 record from read or update.
314 
315 If a transaction has modified or inserted an index record, then
316 it owns an implicit x-lock on the record. On a secondary index record,
317 a transaction has an implicit x-lock also if it has modified the
318 clustered index record, the max trx id of the page where the secondary
319 index record resides is >= trx id of the transaction (or database recovery
320 is running), and there are no explicit non-gap lock requests on the
321 secondary index record.
322 
323 This complicated definition for a secondary index comes from the
324 implementation: we want to be able to determine if a secondary index
325 record has an implicit x-lock, just by looking at the present clustered
326 index record, not at the historical versions of the record. The
327 complicated definition can be explained to the user so that there is
328 nondeterminism in the access path when a query is answered: we may,
329 or may not, access the clustered index record and thus may, or may not,
330 bump into an x-lock set there.
331 
332 Different transaction can have conflicting locks set on the gap at the
333 same time. The locks on the gap are purely inhibitive: an insert cannot
334 be made, or a select cursor may have to wait if a different transaction
335 has a conflicting lock on the gap. An x-lock on the gap does not give
336 the right to insert into the gap.
337 
338 An explicit lock can be placed on a user record or the supremum record of
339 a page. The locks on the supremum record are always thought to be of the gap
340 type, though the gap bit is not set. When we perform an update of a record
341 where the size of the record changes, we may temporarily store its explicit
342 locks on the infimum record of the page, though the infimum otherwise never
343 carries locks.
344 
345 A waiting record lock can also be of the gap type. A waiting lock request
346 can be granted when there is no conflicting mode lock request by another
347 transaction ahead of it in the explicit lock queue.
348 
349 In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP.
350 It only locks the record it is placed on, not the gap before the record.
351 This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation
352 level.
353 
354 -------------------------------------------------------------------------
355 RULE 1: If there is an implicit x-lock on a record, and there are non-gap
356 -------
357 lock requests waiting in the queue, then the transaction holding the implicit
358 x-lock also has an explicit non-gap record x-lock. Therefore, as locks are
359 released, we can grant locks to waiting lock requests purely by looking at
360 the explicit lock requests in the queue.
361 
362 RULE 3: Different transactions cannot have conflicting granted non-gap locks
363 -------
364 on a record at the same time. However, they can have conflicting granted gap
365 locks.
366 RULE 4: If a there is a waiting lock request in a queue, no lock request,
367 -------
368 gap or not, can be inserted ahead of it in the queue. In record deletes
369 and page splits new gap type locks can be created by the database manager
370 for a transaction, and without rule 4, the waits-for graph of transactions
371 might become cyclic without the database noticing it, as the deadlock check
372 is only performed when a transaction itself requests a lock!
373 -------------------------------------------------------------------------
374 
375 An insert is allowed to a gap if there are no explicit lock requests by
376 other transactions on the next record. It does not matter if these lock
377 requests are granted or waiting, gap bit set or not, with the exception
378 that a gap type request set by another transaction to wait for
379 its turn to do an insert is ignored. On the other hand, an
380 implicit x-lock by another transaction does not prevent an insert, which
381 allows for more concurrency when using an Oracle-style sequence number
382 generator for the primary key with many transactions doing inserts
383 concurrently.
384 
385 A modify of a record is allowed if the transaction has an x-lock on the
386 record, or if other transactions do not have any non-gap lock requests on the
387 record.
388 
389 A read of a single user record with a cursor is allowed if the transaction
390 has a non-gap explicit, or an implicit lock on the record, or if the other
391 transactions have no x-lock requests on the record. At a page supremum a
392 read is always allowed.
393 
394 In summary, an implicit lock is seen as a granted x-lock only on the
395 record, not on the gap. An explicit lock with no gap bit set is a lock
396 both on the record and the gap. If the gap bit is set, the lock is only
397 on the gap. Different transaction cannot own conflicting locks on the
398 record at the same time, but they may own conflicting locks on the gap.
399 Granted locks on a record give an access right to the record, but gap type
400 locks just inhibit operations.
401 
402 NOTE: Finding out if some transaction has an implicit x-lock on a secondary
403 index record can be cumbersome. We may have to look at previous versions of
404 the corresponding clustered index record to find out if a delete marked
405 secondary index record was delete marked by an active transaction, not by
406 a committed one.
407 
408 FACT A: If a transaction has inserted a row, it can delete it any time
409 without need to wait for locks.
410 
411 PROOF: The transaction has an implicit x-lock on every index record inserted
412 for the row, and can thus modify each record without the need to wait. Q.E.D.
413 
414 FACT B: If a transaction has read some result set with a cursor, it can read
415 it again, and retrieves the same result set, if it has not modified the
416 result set in the meantime. Hence, there is no phantom problem. If the
417 biggest record, in the alphabetical order, touched by the cursor is removed,
418 a lock wait may occur, otherwise not.
419 
420 PROOF: When a read cursor proceeds, it sets an s-lock on each user record
421 it passes, and a gap type s-lock on each page supremum. The cursor must
422 wait until it has these locks granted. Then no other transaction can
423 have a granted x-lock on any of the user records, and therefore cannot
424 modify the user records. Neither can any other transaction insert into
425 the gaps which were passed over by the cursor. Page splits and merges,
426 and removal of obsolete versions of records do not affect this, because
427 when a user record or a page supremum is removed, the next record inherits
428 its locks as gap type locks, and therefore blocks inserts to the same gap.
429 Also, if a page supremum is inserted, it inherits its locks from the successor
430 record. When the cursor is positioned again at the start of the result set,
431 the records it will touch on its course are either records it touched
432 during the last pass or new inserted page supremums. It can immediately
433 access all these records, and when it arrives at the biggest record, it
434 notices that the result set is complete. If the biggest record was removed,
435 lock wait can occur because the next record only inherits a gap type lock,
436 and a wait may be needed. Q.E.D. */
437 
438 /* If an index record should be changed or a new inserted, we must check
439 the lock on the record or the next. When a read cursor starts reading,
440 we will set a record level s-lock on each record it passes, except on the
441 initial record on which the cursor is positioned before we start to fetch
442 records. Our index tree search has the convention that the B-tree
443 cursor is positioned BEFORE the first possibly matching record in
444 the search. Optimizations are possible here: if the record is searched
445 on an equality condition to a unique key, we could actually set a special
446 lock on the record, a lock which would not prevent any insert before
447 this record. In the next key locking an x-lock set on a record also
448 prevents inserts just before that record.
449 	There are special infimum and supremum records on each page.
450 A supremum record can be locked by a read cursor. This records cannot be
451 updated but the lock prevents insert of a user record to the end of
452 the page.
453 	Next key locks will prevent the phantom problem where new rows
454 could appear to SELECT result sets after the select operation has been
455 performed. Prevention of phantoms ensures the serilizability of
456 transactions.
457 	What should we check if an insert of a new record is wanted?
458 Only the lock on the next record on the same page, because also the
459 supremum record can carry a lock. An s-lock prevents insertion, but
460 what about an x-lock? If it was set by a searched update, then there
461 is implicitly an s-lock, too, and the insert should be prevented.
462 What if our transaction owns an x-lock to the next record, but there is
463 a waiting s-lock request on the next record? If this s-lock was placed
464 by a read cursor moving in the ascending order in the index, we cannot
465 do the insert immediately, because when we finally commit our transaction,
466 the read cursor should see also the new inserted record. So we should
467 move the read cursor backward from the next record for it to pass over
468 the new inserted record. This move backward may be too cumbersome to
469 implement. If we in this situation just enqueue a second x-lock request
470 for our transaction on the next record, then the deadlock mechanism
471 notices a deadlock between our transaction and the s-lock request
472 transaction. This seems to be an ok solution.
473 	We could have the convention that granted explicit record locks,
474 lock the corresponding records from changing, and also lock the gaps
475 before them from inserting. A waiting explicit lock request locks the gap
476 before from inserting. Implicit record x-locks, which we derive from the
477 transaction id in the clustered index record, only lock the record itself
478 from modification, not the gap before it from inserting.
479 	How should we store update locks? If the search is done by a unique
480 key, we could just modify the record trx id. Otherwise, we could put a record
481 x-lock on the record. If the update changes ordering fields of the
482 clustered index record, the inserted new record needs no record lock in
483 lock table, the trx id is enough. The same holds for a secondary index
484 record. Searched delete is similar to update.
485 
486 PROBLEM:
487 What about waiting lock requests? If a transaction is waiting to make an
488 update to a record which another modified, how does the other transaction
489 know to send the end-lock-wait signal to the waiting transaction? If we have
490 the convention that a transaction may wait for just one lock at a time, how
491 do we preserve it if lock wait ends?
492 
493 PROBLEM:
494 Checking the trx id label of a secondary index record. In the case of a
495 modification, not an insert, is this necessary? A secondary index record
496 is modified only by setting or resetting its deleted flag. A secondary index
497 record contains fields to uniquely determine the corresponding clustered
498 index record. A secondary index record is therefore only modified if we
499 also modify the clustered index record, and the trx id checking is done
500 on the clustered index record, before we come to modify the secondary index
501 record. So, in the case of delete marking or unmarking a secondary index
502 record, we do not have to care about trx ids, only the locks in the lock
503 table must be checked. In the case of a select from a secondary index, the
504 trx id is relevant, and in this case we may have to search the clustered
505 index record.
506 
507 PROBLEM: How to update record locks when page is split or merged, or
508 --------------------------------------------------------------------
509 a record is deleted or updated?
510 If the size of fields in a record changes, we perform the update by
511 a delete followed by an insert. How can we retain the locks set or
512 waiting on the record? Because a record lock is indexed in the bitmap
513 by the heap number of the record, when we remove the record from the
514 record list, it is possible still to keep the lock bits. If the page
515 is reorganized, we could make a table of old and new heap numbers,
516 and permute the bitmaps in the locks accordingly. We can add to the
517 table a row telling where the updated record ended. If the update does
518 not require a reorganization of the page, we can simply move the lock
519 bits for the updated record to the position determined by its new heap
520 number (we may have to allocate a new lock, if we run out of the bitmap
521 in the old one).
522 	A more complicated case is the one where the reinsertion of the
523 updated record is done pessimistically, because the structure of the
524 tree may change.
525 
526 PROBLEM: If a supremum record is removed in a page merge, or a record
527 ---------------------------------------------------------------------
528 removed in a purge, what to do to the waiting lock requests? In a split to
529 the right, we just move the lock requests to the new supremum. If a record
530 is removed, we could move the waiting lock request to its inheritor, the
531 next record in the index. But, the next record may already have lock
532 requests on its own queue. A new deadlock check should be made then. Maybe
533 it is easier just to release the waiting transactions. They can then enqueue
534 new lock requests on appropriate records.
535 
536 PROBLEM: When a record is inserted, what locks should it inherit from the
537 -------------------------------------------------------------------------
538 upper neighbor? An insert of a new supremum record in a page split is
539 always possible, but an insert of a new user record requires that the upper
540 neighbor does not have any lock requests by other transactions, granted or
541 waiting, in its lock queue. Solution: We can copy the locks as gap type
542 locks, so that also the waiting locks are transformed to granted gap type
543 locks on the inserted record. */
544 
545 /* LOCK COMPATIBILITY MATRIX
546  *    IS IX S  X  AI
547  * IS +	 +  +  -  +
548  * IX +	 +  -  -  +
549  * S  +	 -  +  -  -
550  * X  -	 -  -  -  -
551  * AI +	 +  -  -  -
552  *
553  * Note that for rows, InnoDB only acquires S or X locks.
554  * For tables, InnoDB normally acquires IS or IX locks.
555  * S or X table locks are only acquired for LOCK TABLES.
556  * Auto-increment (AI) locks are needed because of
557  * statement-level MySQL binlog.
558  * See also lock_mode_compatible().
559  */
560 static const byte lock_compatibility_matrix[5][5] = {
561  /**         IS     IX       S     X       AI */
562  /* IS */ {  TRUE,  TRUE,  TRUE,  FALSE,  TRUE},
563  /* IX */ {  TRUE,  TRUE,  FALSE, FALSE,  TRUE},
564  /* S  */ {  TRUE,  FALSE, TRUE,  FALSE,  FALSE},
565  /* X  */ {  FALSE, FALSE, FALSE, FALSE,  FALSE},
566  /* AI */ {  TRUE,  TRUE,  FALSE, FALSE,  FALSE}
567 };
568 
569 /* STRONGER-OR-EQUAL RELATION (mode1=row, mode2=column)
570  *    IS IX S  X  AI
571  * IS +  -  -  -  -
572  * IX +  +  -  -  -
573  * S  +  -  +  -  -
574  * X  +  +  +  +  +
575  * AI -  -  -  -  +
576  * See lock_mode_stronger_or_eq().
577  */
578 static const byte lock_strength_matrix[5][5] = {
579  /**         IS     IX       S     X       AI */
580  /* IS */ {  TRUE,  FALSE, FALSE,  FALSE, FALSE},
581  /* IX */ {  TRUE,  TRUE,  FALSE, FALSE,  FALSE},
582  /* S  */ {  TRUE,  FALSE, TRUE,  FALSE,  FALSE},
583  /* X  */ {  TRUE,  TRUE,  TRUE,  TRUE,   TRUE},
584  /* AI */ {  FALSE, FALSE, FALSE, FALSE,  TRUE}
585 };
586 
587 /** Maximum depth of the DFS stack. */
588 static const ulint MAX_STACK_SIZE = 4096;
589 
590 #define PRDT_HEAPNO	PAGE_HEAP_NO_INFIMUM
591 /** Record locking request status */
592 enum lock_rec_req_status {
593         /** Failed to acquire a lock */
594         LOCK_REC_FAIL,
595         /** Succeeded in acquiring a lock (implicit or already acquired) */
596         LOCK_REC_SUCCESS,
597         /** Explicitly created a new lock */
598         LOCK_REC_SUCCESS_CREATED
599 };
600 
601 /**
602 Record lock ID */
603 struct RecID {
604 
RecIDRecID605 	RecID(ulint space_id, ulint page_no, ulint heap_no)
606 		:
607 		m_space_id(static_cast<uint32_t>(space_id)),
608 		m_page_no(static_cast<uint32_t>(page_no)),
609 		m_heap_no(static_cast<uint32_t>(heap_no)),
610 		m_fold(lock_rec_fold(m_space_id, m_page_no))
611 	{
612 		ut_ad(space_id < UINT32_MAX);
613 		ut_ad(page_no < UINT32_MAX);
614 		ut_ad(heap_no < UINT32_MAX);
615 	}
616 
RecIDRecID617 	RecID(const buf_block_t* block, ulint heap_no)
618 		:
619 		m_space_id(block->page.id.space()),
620 		m_page_no(block->page.id.page_no()),
621 		m_heap_no(static_cast<uint32_t>(heap_no)),
622 		m_fold(lock_rec_fold(m_space_id, m_page_no))
623 	{
624 		ut_ad(heap_no < UINT32_MAX);
625 	}
626 
627 	/**
628 	@return the "folded" value of {space, page_no} */
foldRecID629 	ulint fold() const
630 	{
631 		return(m_fold);
632 	}
633 
634 	/**
635 	Tablespace ID */
636 	uint32_t		m_space_id;
637 
638 	/**
639 	Page number within the space ID */
640 	uint32_t		m_page_no;
641 
642 	/**
643 	Heap number within the page */
644 	uint32_t		m_heap_no;
645 
646 	/**
647 	Hashed key value */
648 	ulint			m_fold;
649 };
650 
651 /**
652 Create record locks */
653 class RecLock {
654 public:
655 
656 	/**
657 	@param[in,out] thr	Transaction query thread requesting the record
658 				lock
659 	@param[in] index	Index on which record lock requested
660 	@param[in] rec_id	Record lock tuple {space, page_no, heap_no}
661 	@param[in] mode		The lock mode */
RecLock(que_thr_t * thr,dict_index_t * index,const RecID & rec_id,ulint mode)662 	RecLock(que_thr_t*	thr,
663 		dict_index_t*	index,
664 		const RecID&	rec_id,
665 		ulint		mode)
666 		:
667 		m_thr(thr),
668 		m_trx(thr_get_trx(thr)),
669 		m_mode(mode),
670 		m_index(index),
671 		m_rec_id(rec_id)
672 	{
673 		ut_ad(is_predicate_lock(m_mode));
674 
675 		init(NULL);
676 	}
677 
678 	/**
679 	@param[in,out] thr	Transaction query thread requesting the record
680 				lock
681 	@param[in] index	Index on which record lock requested
682 	@param[in] block	Buffer page containing record
683 	@param[in] heap_no	Heap number within the block
684 	@param[in] mode		The lock mode
685 	@param[in] prdt		The predicate for the rtree lock */
686 	RecLock(que_thr_t*	thr,
687 		dict_index_t*	index,
688 		const buf_block_t*
689 				block,
690 		ulint		heap_no,
691 		ulint		mode,
692 		lock_prdt_t*	prdt = NULL)
693 		:
m_thr(thr)694 		m_thr(thr),
695 		m_trx(thr_get_trx(thr)),
696 		m_mode(mode),
697 		m_index(index),
698 		m_rec_id(block, heap_no)
699 	{
700 		btr_assert_not_corrupted(block, index);
701 
702 		init(block->frame);
703 	}
704 
705 	/**
706 	@param[in] index	Index on which record lock requested
707 	@param[in] rec_id	Record lock tuple {space, page_no, heap_no}
708 	@param[in] mode		The lock mode */
RecLock(dict_index_t * index,const RecID & rec_id,ulint mode)709 	RecLock(dict_index_t*	index,
710 		const RecID&	rec_id,
711 		ulint		mode)
712 		:
713 		m_thr(),
714 		m_trx(),
715 		m_mode(mode),
716 		m_index(index),
717 		m_rec_id(rec_id)
718 	{
719 		ut_ad(is_predicate_lock(m_mode));
720 
721 		init(NULL);
722 	}
723 
724 	/**
725 	@param[in] index	Index on which record lock requested
726 	@param[in] block	Buffer page containing record
727 	@param[in] heap_no	Heap number withing block
728 	@param[in] mode		The lock mode */
RecLock(dict_index_t * index,const buf_block_t * block,ulint heap_no,ulint mode)729 	RecLock(dict_index_t*	index,
730 		const buf_block_t*
731 				block,
732 		ulint		heap_no,
733 		ulint		mode)
734 		:
735 		m_thr(),
736 		m_trx(),
737 		m_mode(mode),
738 		m_index(index),
739 		m_rec_id(block, heap_no)
740 	{
741 		btr_assert_not_corrupted(block, index);
742 
743 		init(block->frame);
744 	}
745 
746 	/**
747 	Enqueue a lock wait for a transaction. If it is a high priority
748 	transaction (cannot rollback) then jump ahead in the record lock wait
749 	queue and if the transaction at the head of the queue is itself waiting
750 	roll it back.
751 	@param[in, out] wait_for	The lock that the the joining
752 					transaction is waiting for
753 	@param[in] prdt			Predicate [optional]
754 	@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
755 		DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
756 		there was a deadlock, but another transaction was chosen
757 		as a victim, and we got the lock immediately: no need to
758 		wait then */
759 	dberr_t add_to_waitq(
760 #ifdef WITH_WSREP
761 		lock_t*	const wait_for,
762 #else
763 		const lock_t*	wait_for,
764 #endif
765 		const lock_prdt_t*
766 				prdt = NULL);
767 
768 	/**
769 	Create a lock for a transaction and initialise it.
770 	@param[in, out] trx		Transaction requesting the new lock
771 	@param[in] owns_trx_mutex	true if caller owns the trx_t::mutex
772 	@param[in] add_to_hash		add the lock to hash table
773 	@param[in] prdt			Predicate lock (optional)
774 	@return new lock instance */
775 #ifdef WITH_WSREP
776 	lock_t* create(
777 		lock_t* const   c_lock,
778 		trx_t*		trx,
779 		bool		owns_trx_mutex,
780 		bool		add_to_hash,
781 		const lock_prdt_t*
782 				prdt = NULL);
783 #endif /* WITH_WSREP */
784 	lock_t* create(
785 		trx_t*		trx,
786 		bool		owns_trx_mutex,
787 		bool		add_to_hash,
788 		const lock_prdt_t*
789 				prdt = NULL);
790 	/**
791 	Check of the lock is on m_rec_id.
792 	@param[in] lock			Lock to compare with
793 	@return true if the record lock is on m_rec_id*/
794 	bool is_on_row(const lock_t* lock) const;
795 
796 	/**
797 	Create the lock instance
798 	@param[in, out] trx	The transaction requesting the lock
799 	@param[in, out] index	Index on which record lock is required
800 	@param[in] mode		The lock mode desired
801 	@param[in] rec_id	The record id
802 	@param[in] size		Size of the lock + bitmap requested
803 	@return a record lock instance */
804 	static lock_t* lock_alloc(
805 		trx_t*		trx,
806 		dict_index_t*	index,
807 		ulint		mode,
808 		const RecID&	rec_id,
809 		ulint		size);
810 
811 private:
812 	/*
813 	@return the record lock size in bytes */
lock_size()814 	size_t lock_size() const
815 	{
816 		return(m_size);
817 	}
818 
819 	/**
820 	Do some checks and prepare for creating a new record lock */
821 	void prepare() const;
822 
823 	/**
824 	Collect the transactions that will need to be rolled back asynchronously
825 	@param[in, out] trx	Transaction to be rolled back */
826 	void mark_trx_for_rollback(trx_t* trx);
827 
828 	/**
829 	Jump the queue for the record over all low priority transactions and
830 	add the lock. If all current granted locks are compatible, grant the
831 	lock. Otherwise, mark all granted transaction for asynchronous
832 	rollback and add to hit list.
833 	@param[in, out]	lock		Lock being requested
834 	@param[in]	conflict_lock	First conflicting lock from the head
835 	@return true if the lock is granted */
836 	bool jump_queue(lock_t* lock, const lock_t* conflict_lock);
837 
838 	/** Find position in lock queue and add the high priority transaction
839 	lock. Intention and GAP only locks can be granted even if there are
840 	waiting locks in front of the queue. To add the High priority
841 	transaction in a safe position we keep the following rule.
842 
843 	1. If the lock can be granted, add it before the first waiting lock
844 	in the queue so that all currently waiting locks need to do conflict
845 	check before getting granted.
846 
847 	2. If the lock has to wait, add it after the last granted lock or the
848 	last waiting high priority transaction in the queue whichever is later.
849 	This ensures that the transaction is granted only after doing conflict
850 	check with all granted transactions.
851 	@param[in]      lock            Lock being requested
852 	@param[in]      conflict_lock   First conflicting lock from the head
853 	@param[out]     high_priority   high priority transaction ahead in queue
854 	@return true if the lock can be granted */
855 	bool
856 	lock_add_priority(
857 		lock_t*		lock,
858 		const lock_t*	conflict_lock,
859 		bool*		high_priority);
860 
861 	/** Iterate over the granted locks and prepare the hit list for ASYNC Rollback.
862 	If the transaction is waiting for some other lock then wake up with deadlock error.
863 	Currently we don't mark following transactions for ASYNC Rollback.
864 	1. Read only transactions
865 	2. Background transactions
866 	3. Other High priority transactions
867 	@param[in]      lock            Lock being requested
868 	@param[in]      conflict_lock   First conflicting lock from the head */
869 	void make_trx_hit_list(lock_t* lock, const lock_t* conflict_lock);
870 
871 	/**
872 	Setup the requesting transaction state for lock grant
873 	@param[in,out] lock	Lock for which to change state */
874 	void set_wait_state(lock_t* lock);
875 
876 	/**
877 	Add the lock to the record lock hash and the transaction's lock list
878 	@param[in,out] lock	Newly created record lock to add to the
879 				rec hash and the transaction lock list
880 	@param[in] add_to_hash	If the lock should be added to the hash table */
881 	void lock_add(lock_t* lock, bool add_to_hash);
882 
883 	/**
884 	Check and resolve any deadlocks
885 	@param[in, out] lock		The lock being acquired
886 	@return DB_LOCK_WAIT, DB_DEADLOCK, or DB_QUE_THR_SUSPENDED, or
887 		DB_SUCCESS_LOCKED_REC; DB_SUCCESS_LOCKED_REC means that
888 		there was a deadlock, but another transaction was chosen
889 		as a victim, and we got the lock immediately: no need to
890 		wait then */
891 	dberr_t deadlock_check(lock_t* lock);
892 
893 	/**
894 	Check the outcome of the deadlock check
895 	@param[in,out] victim_trx	Transaction selected for rollback
896 	@param[in,out] lock		Lock being requested
897 	@return DB_LOCK_WAIT, DB_DEADLOCK or DB_SUCCESS_LOCKED_REC */
898 	dberr_t check_deadlock_result(const trx_t* victim_trx, lock_t* lock);
899 
900 	/**
901 	Setup the context from the requirements */
init(const page_t * page)902 	void init(const page_t* page)
903 	{
904 		ut_ad(lock_mutex_own());
905 		ut_ad(!srv_read_only_mode);
906 		ut_ad(dict_index_is_clust(m_index)
907 		      || !dict_index_is_online_ddl(m_index));
908 		ut_ad(m_thr == NULL || m_trx == thr_get_trx(m_thr));
909 
910 		m_size = is_predicate_lock(m_mode)
911 			  ? lock_size(m_mode) : lock_size(page);
912 
913 		/** If rec is the supremum record, then we reset the
914 		gap and LOCK_REC_NOT_GAP bits, as all locks on the
915 		supremum are automatically of the gap type */
916 
917 		if (m_rec_id.m_heap_no == PAGE_HEAP_NO_SUPREMUM) {
918 			ut_ad(!(m_mode & LOCK_REC_NOT_GAP));
919 
920 			m_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
921 		}
922 	}
923 
924 	/**
925 	Calculate the record lock physical size required for a predicate lock.
926 	@param[in] mode For predicate locks the lock mode
927 	@return the size of the lock data structure required in bytes */
lock_size(ulint mode)928 	static size_t lock_size(ulint mode)
929 	{
930 		ut_ad(is_predicate_lock(mode));
931 
932 		/* The lock is always on PAGE_HEAP_NO_INFIMUM(0),
933 		so we only need 1 bit (which is rounded up to 1
934 		byte) for lock bit setting */
935 
936 		size_t	n_bytes;
937 
938 		if (mode & LOCK_PREDICATE) {
939 			const ulint	align = UNIV_WORD_SIZE - 1;
940 
941 			/* We will attach the predicate structure
942 			after lock. Make sure the memory is
943 			aligned on 8 bytes, the mem_heap_alloc
944 			will align it with MEM_SPACE_NEEDED
945 			anyway. */
946 
947 			n_bytes = (1 + sizeof(lock_prdt_t) + align) & ~align;
948 
949 			/* This should hold now */
950 
951 			ut_ad(n_bytes == sizeof(lock_prdt_t) + UNIV_WORD_SIZE);
952 
953 		} else {
954 			n_bytes = 1;
955 		}
956 
957 		return(n_bytes);
958 	}
959 
960 	/**
961 	Calculate the record lock physical size required, non-predicate lock.
962 	@param[in] page		For non-predicate locks the buffer page
963 	@return the size of the lock data structure required in bytes */
lock_size(const page_t * page)964 	static size_t lock_size(const page_t* page)
965 	{
966 		ulint	n_recs = page_dir_get_n_heap(page);
967 
968 		/* Make lock bitmap bigger by a safety margin */
969 
970 		return(1 + ((n_recs + LOCK_PAGE_BITMAP_MARGIN) / 8));
971 	}
972 
973 	/**
974 	@return true if the requested lock mode is for a predicate
975 		or page lock */
is_predicate_lock(ulint mode)976 	static bool is_predicate_lock(ulint mode)
977 	{
978 		return(mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE));
979 	}
980 
981 private:
982 	/** The query thread of the transaction */
983 	que_thr_t*		m_thr;
984 
985 	/**
986 	Transaction requesting the record lock */
987 	trx_t*			m_trx;
988 
989 	/**
990 	Lock mode requested */
991 	ulint			m_mode;
992 
993 	/**
994 	Size of the record lock in bytes */
995 	size_t			m_size;
996 
997 	/**
998 	Index on which the record lock is required */
999 	dict_index_t*		m_index;
1000 
1001 	/**
1002 	The record lock tuple {space, page_no, heap_no} */
1003 	RecID			m_rec_id;
1004 };
1005 
1006 #ifdef UNIV_DEBUG
1007 /** The count of the types of locks. */
1008 static const ulint      lock_types = UT_ARR_SIZE(lock_compatibility_matrix);
1009 #endif /* UNIV_DEBUG */
1010 
1011 /*********************************************************************//**
1012 Gets the type of a lock.
1013 @return LOCK_TABLE or LOCK_REC */
1014 UNIV_INLINE
1015 ulint
1016 lock_get_type_low(
1017 /*==============*/
1018 	const lock_t*	lock);	/*!< in: lock */
1019 
1020 /*********************************************************************//**
1021 Gets the previous record lock set on a record.
1022 @return previous lock on the same record, NULL if none exists */
1023 const lock_t*
1024 lock_rec_get_prev(
1025 /*==============*/
1026 	const lock_t*	in_lock,/*!< in: record lock */
1027 	ulint		heap_no);/*!< in: heap number of the record */
1028 
1029 /*********************************************************************//**
1030 Cancels a waiting lock request and releases possible other transactions
1031 waiting behind it. */
1032 void
1033 lock_cancel_waiting_and_release(
1034 /*============================*/
1035 	lock_t*	lock);	/*!< in/out: waiting lock request */
1036 
1037 /*********************************************************************//**
1038 Checks if some transaction has an implicit x-lock on a record in a clustered
1039 index.
1040 @return transaction id of the transaction which has the x-lock, or 0 */
1041 UNIV_INLINE
1042 trx_id_t
1043 lock_clust_rec_some_has_impl(
1044 /*=========================*/
1045 	const rec_t*		rec,	/*!< in: user record */
1046 	const dict_index_t*	index,	/*!< in: clustered index */
1047 	const ulint*		offsets)/*!< in: rec_get_offsets(rec, index) */
1048 	MY_ATTRIBUTE((warn_unused_result));
1049 
1050 /*********************************************************************//**
1051 Gets the first or next record lock on a page.
1052 @return next lock, NULL if none exists */
1053 UNIV_INLINE
1054 const lock_t*
1055 lock_rec_get_next_on_page_const(
1056 /*============================*/
1057 	const lock_t*	lock);	/*!< in: a record lock */
1058 
1059 /*********************************************************************//**
1060 Gets the nth bit of a record lock.
1061 @return TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
1062 UNIV_INLINE
1063 ibool
1064 lock_rec_get_nth_bit(
1065 /*=================*/
1066 	const lock_t*	lock,	/*!< in: record lock */
1067 	ulint		i);	/*!< in: index of the bit */
1068 
1069 /*********************************************************************//**
1070 Gets the number of bits in a record lock bitmap.
1071 @return number of bits */
1072 UNIV_INLINE
1073 ulint
1074 lock_rec_get_n_bits(
1075 /*================*/
1076 	const lock_t*	lock);	/*!< in: record lock */
1077 
1078 /**********************************************************************//**
1079 Sets the nth bit of a record lock to TRUE. */
1080 UNIV_INLINE
1081 void
1082 lock_rec_set_nth_bit(
1083 /*=================*/
1084 	lock_t*	lock,	/*!< in: record lock */
1085 	ulint	i);	/*!< in: index of the bit */
1086 
1087 /*********************************************************************//**
1088 Gets the first or next record lock on a page.
1089 @return next lock, NULL if none exists */
1090 UNIV_INLINE
1091 lock_t*
1092 lock_rec_get_next_on_page(
1093 /*======================*/
1094 	lock_t*		lock);		/*!< in: a record lock */
1095 /*********************************************************************//**
1096 Gets the first record lock on a page, where the page is identified by its
1097 file address.
1098 @return first lock, NULL if none exists */
1099 UNIV_INLINE
1100 lock_t*
1101 lock_rec_get_first_on_page_addr(
1102 /*============================*/
1103 	hash_table_t*   lock_hash,	/* Lock hash table */
1104 	ulint           space,		/*!< in: space */
1105 	ulint           page_no);	/*!< in: page number */
1106 
1107 /*********************************************************************//**
1108 Gets the first record lock on a page, where the page is identified by a
1109 pointer to it.
1110 @return first lock, NULL if none exists */
1111 UNIV_INLINE
1112 lock_t*
1113 lock_rec_get_first_on_page(
1114 /*=======================*/
1115 	hash_table_t*		lock_hash,	/*!< in: lock hash table */
1116 	const buf_block_t*	block);		/*!< in: buffer block */
1117 
1118 
1119 /*********************************************************************//**
1120 Gets the next explicit lock request on a record.
1121 @return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
1122 UNIV_INLINE
1123 lock_t*
1124 lock_rec_get_next(
1125 /*==============*/
1126 	ulint	heap_no,/*!< in: heap number of the record */
1127 	lock_t*	lock);	/*!< in: lock */
1128 
1129 /*********************************************************************//**
1130 Gets the next explicit lock request on a record.
1131 @return next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
1132 UNIV_INLINE
1133 const lock_t*
1134 lock_rec_get_next_const(
1135 /*====================*/
1136 	ulint		heap_no,/*!< in: heap number of the record */
1137 	const lock_t*	lock);	/*!< in: lock */
1138 
1139 /*********************************************************************//**
1140 Gets the first explicit lock request on a record.
1141 @return first lock, NULL if none exists */
1142 UNIV_INLINE
1143 lock_t*
1144 lock_rec_get_first(
1145 /*===============*/
1146 	hash_table_t*		hash,	/*!< in: hash chain the lock on */
1147 	const buf_block_t*	block,	/*!< in: block containing the record */
1148 	ulint			heap_no);/*!< in: heap number of the record */
1149 
1150 /*********************************************************************//**
1151 Gets the mode of a lock.
1152 @return mode */
1153 UNIV_INLINE
1154 enum lock_mode
1155 lock_get_mode(
1156 /*==========*/
1157 	const lock_t*	lock);	/*!< in: lock */
1158 
1159 /*********************************************************************//**
1160 Calculates if lock mode 1 is compatible with lock mode 2.
1161 @return nonzero if mode1 compatible with mode2 */
1162 UNIV_INLINE
1163 ulint
1164 lock_mode_compatible(
1165 /*=================*/
1166 	enum lock_mode	mode1,	/*!< in: lock mode */
1167 	enum lock_mode	mode2);	/*!< in: lock mode */
1168 
1169 /*********************************************************************//**
1170 Calculates if lock mode 1 is stronger or equal to lock mode 2.
1171 @return nonzero if mode1 stronger or equal to mode2 */
1172 UNIV_INLINE
1173 ulint
1174 lock_mode_stronger_or_eq(
1175 /*=====================*/
1176 	enum lock_mode	mode1,	/*!< in: lock mode */
1177 	enum lock_mode	mode2);	/*!< in: lock mode */
1178 
1179 /*********************************************************************//**
1180 Gets the wait flag of a lock.
1181 @return LOCK_WAIT if waiting, 0 if not */
1182 UNIV_INLINE
1183 ulint
1184 lock_get_wait(
1185 /*==========*/
1186 	const lock_t*	lock);	/*!< in: lock */
1187 
1188 /*********************************************************************//**
1189 Looks for a suitable type record lock struct by the same trx on the same page.
1190 This can be used to save space when a new record lock should be set on a page:
1191 no new struct is needed, if a suitable old is found.
1192 @return lock or NULL */
1193 UNIV_INLINE
1194 lock_t*
1195 lock_rec_find_similar_on_page(
1196 /*==========================*/
1197 	ulint		type_mode,	/*!< in: lock type_mode field */
1198 	ulint		heap_no,	/*!< in: heap number of the record */
1199 	lock_t*		lock,		/*!< in: lock_rec_get_first_on_page() */
1200 	const trx_t*	trx);		/*!< in: transaction */
1201 
1202 /*********************************************************************//**
1203 Checks if a transaction has the specified table lock, or stronger. This
1204 function should only be called by the thread that owns the transaction.
1205 @return lock or NULL */
1206 UNIV_INLINE
1207 const lock_t*
1208 lock_table_has(
1209 /*===========*/
1210 	const trx_t*		trx,	/*!< in: transaction */
1211 	const dict_table_t*	table,	/*!< in: table */
1212 	enum lock_mode		mode);	/*!< in: lock mode */
1213 
1214 #ifndef UNIV_NONINL
1215 #include "lock0priv.ic"
1216 #endif
1217 
1218 #endif /* lock0priv_h */
1219