1 /*****************************************************************************
2 
3 Copyright (c) 1996, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file lock/lock0lock.cc
28  The transaction lock system
29 
30  Created 5/7/1996 Heikki Tuuri
31  *******************************************************/
32 
33 #define LOCK_MODULE_IMPLEMENTATION
34 
35 #include <mysql/service_thd_engine_lock.h>
36 #include <sys/types.h>
37 
38 #include <algorithm>
39 #include <set>
40 #include <unordered_map>
41 #include <unordered_set>
42 #include <vector>
43 
44 #include "btr0btr.h"
45 #include "current_thd.h"
46 #include "debug_sync.h" /* CONDITIONAL_SYNC_POINT */
47 #include "dict0boot.h"
48 #include "dict0mem.h"
49 #include "ha_prototypes.h"
50 #include "lock0lock.h"
51 #include "lock0priv.h"
52 #include "pars0pars.h"
53 #include "row0mysql.h"
54 #include "row0sel.h"
55 #include "srv0mon.h"
56 #include "trx0purge.h"
57 #include "trx0sys.h"
58 #include "usr0sess.h"
59 #include "ut0new.h"
60 #include "ut0vec.h"
61 
62 #include "my_dbug.h"
63 #include "my_psi_config.h"
64 #include "mysql/plugin.h"
65 #include "mysql/psi/psi_thread.h"
66 
67 /* Flag to enable/disable deadlock detector. */
68 bool innobase_deadlock_detect = true;
69 
70 /** Total number of cached record locks */
71 static const ulint REC_LOCK_CACHE = 8;
72 
73 /** Maximum record lock size in bytes */
74 static const ulint REC_LOCK_SIZE = sizeof(ib_lock_t) + 256;
75 
76 /** Total number of cached table locks */
77 static const ulint TABLE_LOCK_CACHE = 8;
78 
79 /** Size in bytes, of the table lock instance */
80 static const ulint TABLE_LOCK_SIZE = sizeof(ib_lock_t);
81 
82 template <typename T>
83 using Locks = std::vector<T, mem_heap_allocator<T>>;
84 
85 /** Used by lock_get_mode_str to build a lock mode description */
86 static const std::map<uint, const char *> lock_constant_names{
87     {LOCK_GAP, "GAP"},
88     {LOCK_REC_NOT_GAP, "REC_NOT_GAP"},
89     {LOCK_INSERT_INTENTION, "INSERT_INTENTION"},
90     {LOCK_PREDICATE, "PREDICATE"},
91     {LOCK_PRDT_PAGE, "PRDT_PAGE"},
92 };
93 /** Used by lock_get_mode_str to cache results. Strings pointed by these
94 pointers might be in use by performance schema and thus can not be freed
95 until the very end.
96 Protected by exclusive global lock_sys latch.
97 */
98 static std::unordered_map<uint, const char *> lock_cached_lock_mode_names;
99 
100 /** A static class for reporting notifications about deadlocks */
101 class Deadlock_notifier {
102  public:
103   Deadlock_notifier() = delete;
104 
105   /** Handles writing the information about found deadlock to the log files
106   and caches it for future lock_latest_err_file() calls (for example used by
107   SHOW ENGINE INNODB STATUS)
108   @param[in] trxs_on_cycle  trxs causing deadlock, i-th waits for i+1-th
109   @param[in] victim_trx     the trx from trx_on_cycle which will be rolled back
110   */
111   static void notify(const ut::vector<const trx_t *> &trxs_on_cycle,
112                      const trx_t *victim_trx);
113 
114  private:
115 #ifdef UNIV_DEBUG
116   /** Determines if a situation in which the lock takes part in a deadlock
117   cycle is expected (as in: handled correctly) or not (say because it is on a DD
118   table, for which there is no reason to expect a deadlock and we don't handle
119   deadlocks correctly). The purpose of the function is to use it in an assertion
120   failing as soon as the deadlock is identified, to give developer a chance to
121   investigate the root cause of the situation (without such assertion, the code
122   might continue to run and either fail at later stage when the data useful for
123   debugging is no longer on stack, or not fail at all, which is risky).
124   @param[in] lock lock found in a deadlock cycle
125   @return true if we expect that this lock can take part in a deadlock cycle */
126   static bool is_allowed_to_be_on_cycle(const lock_t *lock);
127 #endif /* UNIV_DEBUG */
128 
129   /** Print transaction data to the deadlock file and possibly to stderr.
130   @param trx transaction
131   @param max_query_len max query length to print */
132   static void print(const trx_t *trx, ulint max_query_len);
133 
134   /** rewind(3) the file used for storing the latest detected deadlock
135   and print a heading message to stderr if printing of all deadlocks to
136   stderr is enabled. */
137   static void start_print();
138 
139   /** Print lock data to the deadlock file and possibly to stderr.
140   @param lock record or table type lock */
141   static void print(const lock_t *lock);
142 
143   /** Print a message to the deadlock file and possibly to stderr.
144   @param msg message to print */
145   static void print(const char *msg);
146 
147   /** Prints a numbered section title to the deadlock file and possibly to
148   stderr. Numbers do not have to be unique, as they are used to identify
149   transactions on the cycle, and there are multiple sections per transaction.
150   @param[in]    pos_on_cycle    The zero-based position of trx on deadlock cycle
151   @param[in]    title           The title of the section */
152   static void print_title(size_t pos_on_cycle, const char *title);
153 };
154 
155 #ifdef UNIV_DEBUG
156 namespace locksys {
157 
owns_exclusive_global_latch()158 bool owns_exclusive_global_latch() {
159   return lock_sys->latches.owns_exclusive_global_latch();
160 }
161 
owns_shared_global_latch()162 bool owns_shared_global_latch() {
163   return lock_sys->latches.owns_shared_global_latch();
164 }
165 
owns_page_shard(const page_id_t & page_id)166 bool owns_page_shard(const page_id_t &page_id) {
167   return lock_sys->latches.owns_page_shard(page_id);
168 }
169 
owns_table_shard(const dict_table_t & table)170 bool owns_table_shard(const dict_table_t &table) {
171   return lock_sys->latches.owns_table_shard(table);
172 }
173 
owns_lock_shard(const lock_t * lock)174 bool owns_lock_shard(const lock_t *lock) {
175   if (lock->is_record_lock()) {
176     return lock_sys->latches.owns_page_shard(lock->rec_lock.page_id);
177   } else {
178     return lock_sys->latches.owns_table_shard(*lock->tab_lock.table);
179   }
180 }
181 }  // namespace locksys
182 
183 /** Validates the record lock queues on a page.
184  @return true if ok */
185 static bool lock_rec_validate_page(
186     const buf_block_t *block) /*!< in: buffer block */
187     MY_ATTRIBUTE((warn_unused_result));
188 #endif /* UNIV_DEBUG */
189 
190 /* The lock system */
191 lock_sys_t *lock_sys = nullptr;
192 
193 /** We store info on the latest deadlock error to this buffer. InnoDB
194 Monitor will then fetch it and print */
195 static bool lock_deadlock_found = false;
196 
197 /** Only created if !srv_read_only_mode. I/O operations on this file require
198 exclusive lock_sys latch */
199 static FILE *lock_latest_err_file;
200 
201 /** Reports that a transaction id is insensible, i.e., in the future. */
lock_report_trx_id_insanity(trx_id_t trx_id,const rec_t * rec,const dict_index_t * index,const ulint * offsets,trx_id_t max_trx_id)202 void lock_report_trx_id_insanity(
203     trx_id_t trx_id,           /*!< in: trx id */
204     const rec_t *rec,          /*!< in: user record */
205     const dict_index_t *index, /*!< in: index */
206     const ulint *offsets,      /*!< in: rec_get_offsets(rec, index) */
207     trx_id_t max_trx_id)       /*!< in: trx_sys_get_max_trx_id() */
208 {
209   ib::error(ER_IB_MSG_634) << "Transaction id " << trx_id
210                            << " associated with record"
211                            << rec_offsets_print(rec, offsets) << " in index "
212                            << index->name << " of table " << index->table->name
213                            << " is greater than the global counter "
214                            << max_trx_id << "! The table is corrupted.";
215 }
216 
217 /** Checks that a transaction id is sensible, i.e., not in the future.
218  @return true if ok */
219 #ifdef UNIV_DEBUG
220 
221 #else
222 static MY_ATTRIBUTE((warn_unused_result))
223 #endif
lock_check_trx_id_sanity(trx_id_t trx_id,const rec_t * rec,const dict_index_t * index,const ulint * offsets)224 bool lock_check_trx_id_sanity(
225     trx_id_t trx_id,           /*!< in: trx id */
226     const rec_t *rec,          /*!< in: user record */
227     const dict_index_t *index, /*!< in: index */
228     const ulint *offsets)      /*!< in: rec_get_offsets(rec, index) */
229 {
230   ut_ad(rec_offs_validate(rec, index, offsets));
231 
232   trx_id_t max_trx_id = trx_sys_get_max_trx_id();
233   bool is_ok = trx_id < max_trx_id;
234 
235   if (!is_ok) {
236     lock_report_trx_id_insanity(trx_id, rec, index, offsets, max_trx_id);
237   }
238 
239   return (is_ok);
240 }
241 
242 /** Checks that a record is seen in a consistent read.
243  @return true if sees, or false if an earlier version of the record
244  should be retrieved */
lock_clust_rec_cons_read_sees(const rec_t * rec,dict_index_t * index,const ulint * offsets,ReadView * view)245 bool lock_clust_rec_cons_read_sees(
246     const rec_t *rec,     /*!< in: user record which should be read or
247                           passed over by a read cursor */
248     dict_index_t *index,  /*!< in: clustered index */
249     const ulint *offsets, /*!< in: rec_get_offsets(rec, index) */
250     ReadView *view)       /*!< in: consistent read view */
251 {
252   ut_ad(index->is_clustered());
253   ut_ad(page_rec_is_user_rec(rec));
254   ut_ad(rec_offs_validate(rec, index, offsets));
255 
256   /* Temp-tables are not shared across connections and multiple
257   transactions from different connections cannot simultaneously
258   operate on same temp-table and so read of temp-table is
259   always consistent read. */
260   if (srv_read_only_mode || index->table->is_temporary()) {
261     ut_ad(view == nullptr || index->table->is_temporary());
262     return (true);
263   }
264 
265   /* NOTE that we call this function while holding the search
266   system latch. */
267 
268   trx_id_t trx_id = row_get_rec_trx_id(rec, index, offsets);
269 
270   return (view->changes_visible(trx_id, index->table->name));
271 }
272 
273 /** Checks that a non-clustered index record is seen in a consistent read.
274 
275  NOTE that a non-clustered index page contains so little information on
276  its modifications that also in the case false, the present version of
277  rec may be the right, but we must check this from the clustered index
278  record.
279 
280  @return true if certainly sees, or false if an earlier version of the
281  clustered index record might be needed */
lock_sec_rec_cons_read_sees(const rec_t * rec,const dict_index_t * index,const ReadView * view)282 bool lock_sec_rec_cons_read_sees(
283     const rec_t *rec,          /*!< in: user record which
284                                should be read or passed over
285                                by a read cursor */
286     const dict_index_t *index, /*!< in: index */
287     const ReadView *view)      /*!< in: consistent read view */
288 {
289   ut_ad(page_rec_is_user_rec(rec));
290 
291   /* NOTE that we might call this function while holding the search
292   system latch. */
293 
294   if (recv_recovery_is_on()) {
295     return (false);
296 
297   } else if (index->table->is_temporary()) {
298     /* Temp-tables are not shared across connections and multiple
299     transactions from different connections cannot simultaneously
300     operate on same temp-table and so read of temp-table is
301     always consistent read. */
302 
303     return (true);
304   }
305 
306   trx_id_t max_trx_id = page_get_max_trx_id(page_align(rec));
307 
308   ut_ad(max_trx_id > 0);
309 
310   return (view->sees(max_trx_id));
311 }
312 
313 /** Creates the lock system at database start. */
lock_sys_create(ulint n_cells)314 void lock_sys_create(
315     ulint n_cells) /*!< in: number of slots in lock hash table */
316 {
317   ulint lock_sys_sz;
318 
319   lock_sys_sz = sizeof(*lock_sys) + srv_max_n_threads * sizeof(srv_slot_t);
320 
321   lock_sys = static_cast<lock_sys_t *>(ut_zalloc_nokey(lock_sys_sz));
322 
323   new (lock_sys) lock_sys_t{};
324 
325   void *ptr = &lock_sys[1];
326 
327   lock_sys->waiting_threads = static_cast<srv_slot_t *>(ptr);
328 
329   lock_sys->last_slot = lock_sys->waiting_threads;
330 
331   mutex_create(LATCH_ID_LOCK_SYS_WAIT, &lock_sys->wait_mutex);
332 
333   lock_sys->timeout_event = os_event_create();
334 
335   lock_sys->rec_hash = hash_create(n_cells);
336   lock_sys->prdt_hash = hash_create(n_cells);
337   lock_sys->prdt_page_hash = hash_create(n_cells);
338 
339   if (!srv_read_only_mode) {
340     lock_latest_err_file = os_file_create_tmpfile(nullptr);
341     ut_a(lock_latest_err_file);
342   }
343 }
344 
345 /** Calculates the fold value of a lock: used in migrating the hash table.
346 @param[in]	lock	record lock object
347 @return	folded value */
lock_rec_lock_fold(const lock_t * lock)348 static ulint lock_rec_lock_fold(const lock_t *lock) {
349   return (lock_rec_fold(lock->rec_lock.page_id));
350 }
351 
352 /** Resize the lock hash tables.
353 @param[in]	n_cells	number of slots in lock hash table */
lock_sys_resize(ulint n_cells)354 void lock_sys_resize(ulint n_cells) {
355   hash_table_t *old_hash;
356 
357   /* We will rearrange locks between buckets and change the parameters of hash
358   function used in sharding of latches, so we have to prevent everyone from
359   accessing lock sys queues, or even computing shard id. */
360   locksys::Global_exclusive_latch_guard guard{};
361 
362   old_hash = lock_sys->rec_hash;
363   lock_sys->rec_hash = hash_create(n_cells);
364   HASH_MIGRATE(old_hash, lock_sys->rec_hash, lock_t, hash, lock_rec_lock_fold);
365   hash_table_free(old_hash);
366 
367   DBUG_EXECUTE_IF("syncpoint_after_lock_sys_resize_rec_hash", {
368     /* A workaround for buf_resize_thread() not using create_thd().
369     TBD: if buf_resize_thread() were to use create_thd() then should it be
370     instrumented (together or instead of os_thread_create instrumentation)? */
371     ut_ad(current_thd == nullptr);
372     THD *thd = create_thd(false, true, true, PSI_NOT_INSTRUMENTED);
373     ut_ad(current_thd == thd);
374     CONDITIONAL_SYNC_POINT("after_lock_sys_resize_rec_hash");
375     destroy_thd(thd);
376     ut_ad(current_thd == nullptr);
377   });
378 
379   old_hash = lock_sys->prdt_hash;
380   lock_sys->prdt_hash = hash_create(n_cells);
381   HASH_MIGRATE(old_hash, lock_sys->prdt_hash, lock_t, hash, lock_rec_lock_fold);
382   hash_table_free(old_hash);
383 
384   old_hash = lock_sys->prdt_page_hash;
385   lock_sys->prdt_page_hash = hash_create(n_cells);
386   HASH_MIGRATE(old_hash, lock_sys->prdt_page_hash, lock_t, hash,
387                lock_rec_lock_fold);
388   hash_table_free(old_hash);
389 
390   /* need to update block->lock_hash_val */
391   for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
392     buf_pool_t *buf_pool = buf_pool_from_array(i);
393 
394     mutex_enter(&buf_pool->LRU_list_mutex);
395     buf_page_t *bpage;
396     bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
397 
398     while (bpage != nullptr) {
399       if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
400         buf_block_t *block;
401         block = reinterpret_cast<buf_block_t *>(bpage);
402 
403         block->lock_hash_val = lock_rec_hash(bpage->id);
404       }
405       bpage = UT_LIST_GET_NEXT(LRU, bpage);
406     }
407     mutex_exit(&buf_pool->LRU_list_mutex);
408   }
409 }
410 
411 /** Closes the lock system at database shutdown. */
lock_sys_close(void)412 void lock_sys_close(void) {
413   if (lock_latest_err_file != nullptr) {
414     fclose(lock_latest_err_file);
415     lock_latest_err_file = nullptr;
416   }
417 
418   hash_table_free(lock_sys->rec_hash);
419   hash_table_free(lock_sys->prdt_hash);
420   hash_table_free(lock_sys->prdt_page_hash);
421 
422   os_event_destroy(lock_sys->timeout_event);
423 
424   mutex_destroy(&lock_sys->wait_mutex);
425 
426   srv_slot_t *slot = lock_sys->waiting_threads;
427 
428   for (uint32_t i = 0; i < srv_max_n_threads; i++, ++slot) {
429     if (slot->event != nullptr) {
430       os_event_destroy(slot->event);
431     }
432   }
433   for (auto &cached_lock_mode_name : lock_cached_lock_mode_names) {
434     ut_free(const_cast<char *>(cached_lock_mode_name.second));
435   }
436   lock_cached_lock_mode_names.clear();
437 
438   lock_sys->~lock_sys_t();
439 
440   ut_free(lock_sys);
441 
442   lock_sys = nullptr;
443 }
444 
445 /** Gets the size of a lock struct.
446  @return size in bytes */
lock_get_size(void)447 ulint lock_get_size(void) { return ((ulint)sizeof(lock_t)); }
448 
449 /** Sets the wait flag of a lock and the back pointer in trx to lock.
450 @param[in]  lock  The lock on which a transaction is waiting */
451 UNIV_INLINE
lock_set_lock_and_trx_wait(lock_t * lock)452 void lock_set_lock_and_trx_wait(lock_t *lock) {
453   auto trx = lock->trx;
454   ut_ad(trx_mutex_own(trx));
455   ut_a(trx->lock.wait_lock == nullptr);
456   ut_ad(locksys::owns_lock_shard(lock));
457 
458   trx->lock.wait_lock = lock;
459   trx->lock.wait_lock_type = lock_get_type_low(lock);
460   lock->type_mode |= LOCK_WAIT;
461 }
462 
463 /** Gets the gap flag of a record lock.
464  @return LOCK_GAP or 0 */
465 UNIV_INLINE
lock_rec_get_gap(const lock_t * lock)466 ulint lock_rec_get_gap(const lock_t *lock) /*!< in: record lock */
467 {
468   ut_ad(lock_get_type_low(lock) == LOCK_REC);
469 
470   return (lock->type_mode & LOCK_GAP);
471 }
472 
473 /** Gets the LOCK_REC_NOT_GAP flag of a record lock.
474  @return LOCK_REC_NOT_GAP or 0 */
475 UNIV_INLINE
lock_rec_get_rec_not_gap(const lock_t * lock)476 ulint lock_rec_get_rec_not_gap(const lock_t *lock) /*!< in: record lock */
477 {
478   ut_ad(lock_get_type_low(lock) == LOCK_REC);
479 
480   return (lock->type_mode & LOCK_REC_NOT_GAP);
481 }
482 
483 /** Gets the waiting insert flag of a record lock.
484  @return LOCK_INSERT_INTENTION or 0 */
485 UNIV_INLINE
lock_rec_get_insert_intention(const lock_t * lock)486 ulint lock_rec_get_insert_intention(const lock_t *lock) /*!< in: record lock */
487 {
488   ut_ad(lock_get_type_low(lock) == LOCK_REC);
489 
490   return (lock->type_mode & LOCK_INSERT_INTENTION);
491 }
492 
493 /** Checks if a lock request for a new lock has to wait for request lock2.
494  @return true if new lock has to wait for lock2 to be removed */
495 UNIV_INLINE
lock_rec_has_to_wait(const trx_t * trx,ulint type_mode,const lock_t * lock2,bool lock_is_on_supremum)496 bool lock_rec_has_to_wait(
497     const trx_t *trx,    /*!< in: trx of new lock */
498     ulint type_mode,     /*!< in: precise mode of the new lock
499                        to set: LOCK_S or LOCK_X, possibly
500                        ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
501                        LOCK_INSERT_INTENTION */
502     const lock_t *lock2, /*!< in: another record lock; NOTE that
503                          it is assumed that this has a lock bit
504                          set on the same record as in the new
505                          lock we are setting */
506     bool lock_is_on_supremum)
507 /*!< in: true if we are setting the
508 lock on the 'supremum' record of an
509 index page: we know then that the lock
510 request is really for a 'gap' type lock */
511 {
512   ut_ad(trx && lock2);
513   ut_ad(lock_get_type_low(lock2) == LOCK_REC);
514 
515   const bool is_hp = trx_is_high_priority(trx);
516   if (trx != lock2->trx &&
517       !lock_mode_compatible(static_cast<lock_mode>(LOCK_MODE_MASK & type_mode),
518                             lock_get_mode(lock2))) {
519     /* If our trx is High Priority and the existing lock is WAITING and not
520         high priority, then we can ignore it. */
521     if (is_hp && lock2->is_waiting() && !trx_is_high_priority(lock2->trx)) {
522       return (false);
523     }
524 
525     /* We have somewhat complex rules when gap type record locks
526     cause waits */
527 
528     if ((lock_is_on_supremum || (type_mode & LOCK_GAP)) &&
529         !(type_mode & LOCK_INSERT_INTENTION)) {
530       /* Gap type locks without LOCK_INSERT_INTENTION flag
531       do not need to wait for anything. This is because
532       different users can have conflicting lock types
533       on gaps. */
534 
535       return (false);
536     }
537 
538     if (!(type_mode & LOCK_INSERT_INTENTION) && lock_rec_get_gap(lock2)) {
539       /* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
540       does not need to wait for a gap type lock */
541 
542       return (false);
543     }
544 
545     if ((type_mode & LOCK_GAP) && lock_rec_get_rec_not_gap(lock2)) {
546       /* Lock on gap does not need to wait for
547       a LOCK_REC_NOT_GAP type lock */
548 
549       return (false);
550     }
551 
552     if (lock_rec_get_insert_intention(lock2)) {
553       /* No lock request needs to wait for an insert
554       intention lock to be removed. This is ok since our
555       rules allow conflicting locks on gaps. This eliminates
556       a spurious deadlock caused by a next-key lock waiting
557       for an insert intention lock; when the insert
558       intention lock was granted, the insert deadlocked on
559       the waiting next-key lock.
560 
561       Also, insert intention locks do not disturb each
562       other. */
563 
564       return (false);
565     }
566 
567     return (true);
568   }
569 
570   return (false);
571 }
572 
573 /** Checks if a lock request lock1 has to wait for request lock2.
574  @return true if lock1 has to wait for lock2 to be removed */
lock_has_to_wait(const lock_t * lock1,const lock_t * lock2)575 bool lock_has_to_wait(const lock_t *lock1, /*!< in: waiting lock */
576                       const lock_t *lock2) /*!< in: another lock; NOTE that it
577                                            is assumed that this has a lock bit
578                                            set on the same record as in lock1 if
579                                            the locks are record locks */
580 {
581   if (lock1->trx != lock2->trx &&
582       !lock_mode_compatible(lock_get_mode(lock1), lock_get_mode(lock2))) {
583     if (lock_get_type_low(lock1) == LOCK_REC) {
584       ut_ad(lock_get_type_low(lock2) == LOCK_REC);
585 
586       /* If this lock request is for a supremum record
587       then the second bit on the lock bitmap is set */
588 
589       if (lock1->type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
590         return (lock_prdt_has_to_wait(lock1->trx, lock1->type_mode,
591                                       lock_get_prdt_from_lock(lock1), lock2));
592       } else {
593         return (lock_rec_has_to_wait(lock1->trx, lock1->type_mode, lock2,
594                                      lock1->includes_supremum()));
595       }
596     }
597 
598     return (true);
599   }
600 
601   return (false);
602 }
603 
604 /*============== RECORD LOCK BASIC FUNCTIONS ============================*/
605 
606 /** Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
607  if none found.
608  @return bit index == heap number of the record, or ULINT_UNDEFINED if
609  none found */
lock_rec_find_set_bit(const lock_t * lock)610 ulint lock_rec_find_set_bit(
611     const lock_t *lock) /*!< in: record lock with at least one bit set */
612 {
613   for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
614     if (lock_rec_get_nth_bit(lock, i)) {
615       return (i);
616     }
617   }
618 
619   return (ULINT_UNDEFINED);
620 }
621 
622 /** Looks for the next set bit in the record lock bitmap.
623 @param[in] lock		record lock with at least one bit set
624 @param[in] heap_no	current set bit
625 @return The next bit index  == heap number following heap_no, or ULINT_UNDEFINED
626 if none found */
lock_rec_find_next_set_bit(const lock_t * lock,ulint heap_no)627 ulint lock_rec_find_next_set_bit(const lock_t *lock, ulint heap_no) {
628   ut_ad(heap_no != ULINT_UNDEFINED);
629 
630   for (ulint i = heap_no + 1; i < lock_rec_get_n_bits(lock); ++i) {
631     if (lock_rec_get_nth_bit(lock, i)) {
632       return (i);
633     }
634   }
635 
636   return (ULINT_UNDEFINED);
637 }
638 
639 /** Reset the nth bit of a record lock.
640 @param[in,out] lock record lock
641 @param[in] i index of the bit that will be reset
642 @return previous value of the bit */
643 UNIV_INLINE
lock_rec_reset_nth_bit(lock_t * lock,ulint i)644 byte lock_rec_reset_nth_bit(lock_t *lock, ulint i) {
645   ut_ad(lock_get_type_low(lock) == LOCK_REC);
646   ut_ad(i < lock->rec_lock.n_bits);
647 
648   byte *b = reinterpret_cast<byte *>(&lock[1]) + (i >> 3);
649   byte mask = 1 << (i & 7);
650   byte bit = *b & mask;
651   *b &= ~mask;
652 
653   if (bit != 0) {
654     ut_ad(lock->trx->lock.n_rec_locks.load() > 0);
655     lock->trx->lock.n_rec_locks.fetch_sub(1, std::memory_order_relaxed);
656   }
657 
658   return (bit);
659 }
660 
661 /** Reset the nth bit of a record lock.
662 @param[in,out]	lock record lock
663 @param[in] i	index of the bit that will be reset
664 @param[in] type	whether the lock is in wait mode */
lock_rec_trx_wait(lock_t * lock,ulint i,ulint type)665 void lock_rec_trx_wait(lock_t *lock, ulint i, ulint type) {
666   lock_rec_reset_nth_bit(lock, i);
667 
668   if (type & LOCK_WAIT) {
669     lock_reset_lock_and_trx_wait(lock);
670   }
671 }
672 
lock_rec_expl_exist_on_page(const page_id_t & page_id)673 bool lock_rec_expl_exist_on_page(const page_id_t &page_id) {
674   lock_t *lock;
675   locksys::Shard_latch_guard guard{page_id};
676   /* Only used in ibuf pages, so rec_hash is good enough */
677   lock = lock_rec_get_first_on_page_addr(lock_sys->rec_hash, page_id);
678 
679   return (lock != nullptr);
680 }
681 
682 /** Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
683  pointer in the transaction! This function is used in lock object creation
684  and resetting. */
lock_rec_bitmap_reset(lock_t * lock)685 static void lock_rec_bitmap_reset(lock_t *lock) /*!< in: record lock */
686 {
687   ulint n_bytes;
688 
689   ut_ad(lock_get_type_low(lock) == LOCK_REC);
690 
691   /* Reset to zero the bitmap which resides immediately after the lock
692   struct */
693 
694   n_bytes = lock_rec_get_n_bits(lock) / 8;
695 
696   ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
697 
698   memset(&lock[1], 0, n_bytes);
699 }
700 
701 /** Copies a record lock to heap.
702  @return copy of lock */
lock_rec_copy(const lock_t * lock,mem_heap_t * heap)703 static lock_t *lock_rec_copy(const lock_t *lock, /*!< in: record lock */
704                              mem_heap_t *heap)   /*!< in: memory heap */
705 {
706   ulint size;
707 
708   ut_ad(lock_get_type_low(lock) == LOCK_REC);
709 
710   size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
711 
712   return (static_cast<lock_t *>(mem_heap_dup(heap, lock, size)));
713 }
714 
715 /** Gets the previous record lock set on a record.
716  @return previous lock on the same record, NULL if none exists */
lock_rec_get_prev(const lock_t * in_lock,ulint heap_no)717 const lock_t *lock_rec_get_prev(
718     const lock_t *in_lock, /*!< in: record lock */
719     ulint heap_no)         /*!< in: heap number of the record */
720 {
721   lock_t *lock;
722   lock_t *found_lock = nullptr;
723   hash_table_t *hash;
724 
725   ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
726   const auto page_id = in_lock->rec_lock.page_id;
727   ut_ad(locksys::owns_page_shard(page_id));
728 
729   hash = lock_hash_get(in_lock->type_mode);
730 
731   for (lock = lock_rec_get_first_on_page_addr(hash, page_id);
732        /* No op */; lock = lock_rec_get_next_on_page(lock)) {
733     ut_ad(lock);
734 
735     if (lock == in_lock) {
736       return (found_lock);
737     }
738 
739     if (lock_rec_get_nth_bit(lock, heap_no)) {
740       found_lock = lock;
741     }
742   }
743 }
744 
745 /*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
746 
747 /** Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
748  to precise_mode.
749 @param[in]    precise_mode  LOCK_S or LOCK_X possibly ORed to LOCK_GAP or
750                             LOCK_REC_NOT_GAP, for a supremum record we regard
751                             this always a gap type request
752 @param[in]    block         buffer block containing the record
753 @param[in]    heap_no       heap number of the record
754 @param[in]    trx           transaction
755 @return lock or NULL */
756 UNIV_INLINE
lock_rec_has_expl(ulint precise_mode,const buf_block_t * block,ulint heap_no,const trx_t * trx)757 const lock_t *lock_rec_has_expl(ulint precise_mode, const buf_block_t *block,
758                                 ulint heap_no, const trx_t *trx) {
759   ut_ad(locksys::owns_page_shard(block->get_page_id()));
760   ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S ||
761         (precise_mode & LOCK_MODE_MASK) == LOCK_X);
762   ut_ad(
763       !(precise_mode & ~(ulint)(LOCK_MODE_MASK | LOCK_GAP | LOCK_REC_NOT_GAP)));
764   ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
765   ut_ad(!(precise_mode & LOCK_PREDICATE));
766   ut_ad(!(precise_mode & LOCK_PRDT_PAGE));
767   const RecID rec_id{block, heap_no};
768   const bool is_on_supremum = rec_id.is_supremum();
769   const bool is_rec_not_gap = 0 != (precise_mode & LOCK_REC_NOT_GAP);
770   const bool is_gap = 0 != (precise_mode & LOCK_GAP);
771   const auto mode = static_cast<lock_mode>(precise_mode & LOCK_MODE_MASK);
772   const auto p_implies_q = [](bool p, bool q) { return q || !p; };
773 
774   return (Lock_iter::for_each(rec_id, [&](const lock_t *lock) {
775     return (!(lock->trx == trx && !lock->is_insert_intention() &&
776               lock_mode_stronger_or_eq(lock_get_mode(lock), mode) &&
777               !lock->is_waiting() &&
778               (is_on_supremum ||
779                (p_implies_q(lock->is_record_not_gap(), is_rec_not_gap) &&
780                 p_implies_q(lock->is_gap(), is_gap)))));
781   }));
782 }
783 
784 #ifdef UNIV_DEBUG
785 /** Checks if some other transaction has a lock request in the queue.
786  @return lock or NULL */
lock_rec_other_has_expl_req(lock_mode mode,const buf_block_t * block,bool wait,ulint heap_no,const trx_t * trx)787 static const lock_t *lock_rec_other_has_expl_req(
788     lock_mode mode,           /*!< in: LOCK_S or LOCK_X */
789     const buf_block_t *block, /*!< in: buffer block containing
790                               the record */
791     bool wait,                /*!< in: whether also waiting locks
792                               are taken into account */
793     ulint heap_no,            /*!< in: heap number of the record */
794     const trx_t *trx)         /*!< in: transaction, or NULL if
795                               requests by all transactions
796                               are taken into account */
797 {
798   ut_ad(locksys::owns_page_shard(block->get_page_id()));
799   ut_ad(mode == LOCK_X || mode == LOCK_S);
800 
801   /* Only GAP lock can be on SUPREMUM, and we are not looking
802   for GAP lock */
803 
804   RecID rec_id{block, heap_no};
805 
806   if (rec_id.is_supremum()) {
807     return (nullptr);
808   }
809 
810   return (Lock_iter::for_each(rec_id, [=](const lock_t *lock) {
811     /* Ignore transactions that are being rolled back. */
812     return (!(lock->trx != trx && !lock->is_gap() &&
813               (wait || !lock->is_waiting()) &&
814               lock_mode_stronger_or_eq(lock->mode(), mode)));
815   }));
816 }
817 #endif /* UNIV_DEBUG */
818 
819 /** Checks if some other transaction has a conflicting explicit lock request
820  in the queue, so that we have to wait.
821  @return lock or NULL */
lock_rec_other_has_conflicting(ulint mode,const buf_block_t * block,ulint heap_no,const trx_t * trx)822 static const lock_t *lock_rec_other_has_conflicting(
823     ulint mode,               /*!< in: LOCK_S or LOCK_X,
824                               possibly ORed to LOCK_GAP or
825                               LOC_REC_NOT_GAP,
826                               LOCK_INSERT_INTENTION */
827     const buf_block_t *block, /*!< in: buffer block containing
828                               the record */
829     ulint heap_no,            /*!< in: heap number of the record */
830     const trx_t *trx)         /*!< in: our transaction */
831 {
832   ut_ad(locksys::owns_page_shard(block->get_page_id()));
833   ut_ad(!(mode & ~(ulint)(LOCK_MODE_MASK | LOCK_GAP | LOCK_REC_NOT_GAP |
834                           LOCK_INSERT_INTENTION)));
835   ut_ad(!(mode & LOCK_PREDICATE));
836   ut_ad(!(mode & LOCK_PRDT_PAGE));
837 
838   RecID rec_id{block, heap_no};
839   const bool is_supremum = rec_id.is_supremum();
840 
841   return (Lock_iter::for_each(rec_id, [=](const lock_t *lock) {
842     return (!(lock_rec_has_to_wait(trx, mode, lock, is_supremum)));
843   }));
844 }
845 
846 /** Checks if some transaction has an implicit x-lock on a record in a secondary
847  index.
848  @param[in]   rec       user record
849  @param[in]   index     secondary index
850  @param[in]   offsets   rec_get_offsets(rec, index)
851  @return transaction id of the transaction which has the x-lock, or 0;
852  NOTE that this function can return false positives but never false
853  negatives. The caller must confirm all positive results by checking if the trx
854  is still active. */
lock_sec_rec_some_has_impl(const rec_t * rec,dict_index_t * index,const ulint * offsets)855 static trx_t *lock_sec_rec_some_has_impl(const rec_t *rec, dict_index_t *index,
856                                          const ulint *offsets) {
857   trx_t *trx;
858   trx_id_t max_trx_id;
859   const page_t *page = page_align(rec);
860 
861   ut_ad(!locksys::owns_exclusive_global_latch());
862   ut_ad(!trx_sys_mutex_own());
863   ut_ad(!index->is_clustered());
864   ut_ad(page_rec_is_user_rec(rec));
865   ut_ad(rec_offs_validate(rec, index, offsets));
866 
867   max_trx_id = page_get_max_trx_id(page);
868 
869   /* Some transaction may have an implicit x-lock on the record only
870   if the max trx id for the page >= min trx id for the trx list, or
871   database recovery is running. We do not write the changes of a page
872   max trx id to the log, and therefore during recovery, this value
873   for a page may be incorrect. */
874 
875   if (max_trx_id < trx_rw_min_trx_id() && !recv_recovery_is_on()) {
876     trx = nullptr;
877 
878   } else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) {
879     /* The page is corrupt: try to avoid a crash by returning 0 */
880     trx = nullptr;
881 
882     /* In this case it is possible that some transaction has an implicit
883     x-lock. We have to look in the clustered index. */
884 
885   } else {
886     trx = row_vers_impl_x_locked(rec, index, offsets);
887   }
888 
889   return (trx);
890 }
891 
892 #ifdef UNIV_DEBUG
893 /** Checks if some transaction, other than given trx_id, has an explicit
894  lock on the given rec, in the given precise_mode.
895 @param[in]   precise_mode   LOCK_S or LOCK_X possibly ORed to LOCK_GAP or
896                             LOCK_REC_NOT_GAP.
897 @param[in]   trx            the trx holding implicit lock on rec
898 @param[in]   rec            user record
899 @param[in]   block          buffer block containing the record
900 @return true iff there's a transaction, whose id is not equal to trx_id,
901         that has an explicit lock on the given rec, in the given
902         precise_mode. */
lock_rec_other_trx_holds_expl(ulint precise_mode,const trx_t * trx,const rec_t * rec,const buf_block_t * block)903 static bool lock_rec_other_trx_holds_expl(ulint precise_mode, const trx_t *trx,
904                                           const rec_t *rec,
905                                           const buf_block_t *block) {
906   bool holds = false;
907 
908   /* We will inspect locks from various shards when inspecting transactions. */
909   locksys::Global_exclusive_latch_guard guard{};
910   /* If trx_rw_is_active returns non-null impl_trx it only means that impl_trx
911   was active at some moment during the call, but might already be in
912   TRX_STATE_COMMITTED_IN_MEMORY when we execute the body of the if.
913   However, we hold exclusive latch on whole lock_sys, which prevents anyone
914   from creating any new explicit locks.
915   So, all explicit locks we will see must have been created at the time when
916   the transaction was not committed yet. */
917   if (trx_t *impl_trx = trx_rw_is_active(trx->id, nullptr, false)) {
918     ulint heap_no = page_rec_get_heap_no(rec);
919     mutex_enter(&trx_sys->mutex);
920 
921     for (const trx_t *t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); t != nullptr;
922          t = UT_LIST_GET_NEXT(trx_list, t)) {
923       const lock_t *expl_lock =
924           lock_rec_has_expl(precise_mode, block, heap_no, t);
925 
926       if (expl_lock && expl_lock->trx != impl_trx) {
927         /* An explicit lock is held by trx other than
928         the trx holding the implicit lock. */
929         holds = true;
930         break;
931       }
932     }
933 
934     mutex_exit(&trx_sys->mutex);
935   }
936 
937   return (holds);
938 }
939 #endif /* UNIV_DEBUG */
940 
lock_number_of_rows_locked(const trx_lock_t * trx_lock)941 ulint lock_number_of_rows_locked(const trx_lock_t *trx_lock) {
942   /* We need exclusive lock_sys access, because trx_lock->n_rec_locks is
943   modified while holding sharded lock only, so we need to disable all writers
944   for this number to be meaningful */
945   ut_ad(locksys::owns_exclusive_global_latch());
946 
947   return (trx_lock->n_rec_locks);
948 }
949 
lock_number_of_tables_locked(const trx_t * trx)950 ulint lock_number_of_tables_locked(const trx_t *trx) {
951   ut_ad(trx_mutex_own(trx));
952 
953   return (trx->lock.table_locks.size());
954 }
955 
956 /*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
957 
958 /**
959 Do some checks and prepare for creating a new record lock */
prepare() const960 void RecLock::prepare() const {
961   ut_ad(locksys::owns_page_shard(m_rec_id.get_page_id()));
962   ut_ad(m_trx == thr_get_trx(m_thr));
963 
964   /* Test if there already is some other reason to suspend thread:
965   we do not enqueue a lock request if the query thread should be
966   stopped anyway */
967 
968   if (que_thr_stop(m_thr)) {
969     ut_error;
970   }
971 
972   switch (trx_get_dict_operation(m_trx)) {
973     case TRX_DICT_OP_NONE:
974       break;
975     case TRX_DICT_OP_TABLE:
976     case TRX_DICT_OP_INDEX:
977       ib::error(ER_IB_MSG_635)
978           << "A record lock wait happens in a dictionary"
979              " operation. index "
980           << m_index->name << " of table " << m_index->table->name << ". "
981           << BUG_REPORT_MSG;
982       ut_ad(0);
983   }
984 
985   ut_ad(m_index->table->n_ref_count > 0 || !m_index->table->can_be_evicted);
986 }
987 
988 /**
989 Create the lock instance
990 @param[in, out] trx	The transaction requesting the lock
991 @param[in, out] index	Index on which record lock is required
992 @param[in] mode		The lock mode desired
993 @param[in] rec_id	The record id
994 @param[in] size		Size of the lock + bitmap requested
995 @return a record lock instance */
lock_alloc(trx_t * trx,dict_index_t * index,ulint mode,const RecID & rec_id,ulint size)996 lock_t *RecLock::lock_alloc(trx_t *trx, dict_index_t *index, ulint mode,
997                             const RecID &rec_id, ulint size) {
998   ut_ad(locksys::owns_page_shard(rec_id.get_page_id()));
999   /* We are about to modify structures in trx->lock which needs trx->mutex */
1000   ut_ad(trx_mutex_own(trx));
1001 
1002   lock_t *lock;
1003 
1004   if (trx->lock.rec_cached >= trx->lock.rec_pool.size() ||
1005       sizeof(*lock) + size > REC_LOCK_SIZE) {
1006     ulint n_bytes = size + sizeof(*lock);
1007     mem_heap_t *heap = trx->lock.lock_heap;
1008 
1009     lock = reinterpret_cast<lock_t *>(mem_heap_alloc(heap, n_bytes));
1010   } else {
1011     lock = trx->lock.rec_pool[trx->lock.rec_cached];
1012     ++trx->lock.rec_cached;
1013   }
1014 
1015   lock->trx = trx;
1016 
1017   lock->index = index;
1018 
1019   /* Note the creation timestamp */
1020   ut_d(lock->m_seq = lock_sys->m_seq.fetch_add(1));
1021 
1022   /* Setup the lock attributes */
1023 
1024   lock->type_mode = LOCK_REC | (mode & ~LOCK_TYPE_MASK);
1025 
1026   lock_rec_t &rec_lock = lock->rec_lock;
1027 
1028   /* Predicate lock always on INFIMUM (0) */
1029 
1030   if (is_predicate_lock(mode)) {
1031     rec_lock.n_bits = 8;
1032 
1033     memset(&lock[1], 0x0, 1);
1034 
1035   } else {
1036     ut_ad(8 * size < UINT32_MAX);
1037     rec_lock.n_bits = static_cast<uint32_t>(8 * size);
1038 
1039     memset(&lock[1], 0x0, size);
1040   }
1041 
1042   rec_lock.page_id = rec_id.get_page_id();
1043 
1044   /* Set the bit corresponding to rec */
1045 
1046   lock_rec_set_nth_bit(lock, rec_id.m_heap_no);
1047 
1048   MONITOR_INC(MONITOR_NUM_RECLOCK);
1049 
1050   MONITOR_INC(MONITOR_RECLOCK_CREATED);
1051 
1052   return (lock);
1053 }
1054 
1055 /** Insert lock record to the tail of the queue where the WAITING locks reside.
1056 @param[in,out]	lock_hash	Hash table containing the locks
1057 @param[in,out]	lock		Record lock instance to insert
1058 @param[in]	rec_id	        Record being locked */
lock_rec_insert_to_waiting(hash_table_t * lock_hash,lock_t * lock,const RecID & rec_id)1059 static void lock_rec_insert_to_waiting(hash_table_t *lock_hash, lock_t *lock,
1060                                        const RecID &rec_id) {
1061   ut_ad(lock->is_waiting());
1062   ut_ad(rec_id.matches(lock));
1063   ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1064   ut_ad(locksys::owns_page_shard(rec_id.get_page_id()));
1065 
1066   const ulint fold = rec_id.fold();
1067   HASH_INSERT(lock_t, hash, lock_hash, fold, lock);
1068 }
1069 
1070 /** Insert lock record to the head of the queue where the GRANTED locks reside.
1071 @param[in,out]	lock_hash	Hash table containing the locks
1072 @param[in,out]	lock		Record lock instance to insert
1073 @param[in]	rec_id	        Record being locked */
lock_rec_insert_to_granted(hash_table_t * lock_hash,lock_t * lock,const RecID & rec_id)1074 static void lock_rec_insert_to_granted(hash_table_t *lock_hash, lock_t *lock,
1075                                        const RecID &rec_id) {
1076   ut_ad(rec_id.matches(lock));
1077   ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1078   ut_ad(locksys::owns_page_shard(rec_id.get_page_id()));
1079   ut_ad(!lock->is_waiting());
1080 
1081   /* Move the target lock to the head of the list. */
1082   auto cell =
1083       hash_get_nth_cell(lock_hash, hash_calc_hash(rec_id.fold(), lock_hash));
1084 
1085   ut_ad(lock != cell->node);
1086 
1087   auto next = reinterpret_cast<lock_t *>(cell->node);
1088 
1089   cell->node = lock;
1090   lock->hash = next;
1091 }
1092 namespace locksys {
1093 /**
1094 Adds the lock to the list of trx's locks.
1095 Requires lock->trx to be already set.
1096 Bumps the trx_lock_version.
1097 @param[in,out]  lock  The lock that we want to add to lock->trx->lock.trx_locks
1098 */
add_to_trx_locks(lock_t * lock)1099 static void add_to_trx_locks(lock_t *lock) {
1100   ut_ad(lock->trx != nullptr);
1101   ut_ad(trx_mutex_own(lock->trx));
1102   UT_LIST_ADD_LAST(lock->trx->lock.trx_locks, lock);
1103   lock->trx->lock.trx_locks_version++;
1104 }
1105 
1106 /**
1107 Removes the lock from the list of trx's locks.
1108 Bumps the trx_lock_version.
1109 @param[in,out]  lock  The lock that we want to remove from
1110                       lock->trx->lock.trx_locks
1111 */
remove_from_trx_locks(lock_t * lock)1112 static void remove_from_trx_locks(lock_t *lock) {
1113   ut_ad(lock->trx != nullptr);
1114   ut_ad(trx_mutex_own(lock->trx));
1115   UT_LIST_REMOVE(lock->trx->lock.trx_locks, lock);
1116   lock->trx->lock.trx_locks_version++;
1117 }
1118 }  // namespace locksys
1119 
lock_add(lock_t * lock)1120 void RecLock::lock_add(lock_t *lock) {
1121   ut_ad((lock->type_mode | LOCK_REC) == (m_mode | LOCK_REC));
1122   ut_ad(m_rec_id.matches(lock));
1123   ut_ad(locksys::owns_page_shard(m_rec_id.get_page_id()));
1124   ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1125   ut_ad(trx_mutex_own(lock->trx));
1126 
1127   bool wait = m_mode & LOCK_WAIT;
1128 
1129   hash_table_t *lock_hash = lock_hash_get(m_mode);
1130 
1131   lock->index->table->n_rec_locks.fetch_add(1, std::memory_order_relaxed);
1132 
1133   if (!wait) {
1134     lock_rec_insert_to_granted(lock_hash, lock, m_rec_id);
1135   } else {
1136     lock_rec_insert_to_waiting(lock_hash, lock, m_rec_id);
1137   }
1138 
1139 #ifdef HAVE_PSI_THREAD_INTERFACE
1140 #ifdef HAVE_PSI_DATA_LOCK_INTERFACE
1141   /* The performance schema THREAD_ID and EVENT_ID are used only
1142   when DATA_LOCKS are exposed.  */
1143   PSI_THREAD_CALL(get_current_thread_event_id)
1144   (&lock->m_psi_internal_thread_id, &lock->m_psi_event_id);
1145 #endif /* HAVE_PSI_DATA_LOCK_INTERFACE */
1146 #endif /* HAVE_PSI_THREAD_INTERFACE */
1147 
1148   locksys::add_to_trx_locks(lock);
1149 
1150   if (wait) {
1151     lock_set_lock_and_trx_wait(lock);
1152   }
1153 }
1154 
1155 /** Create a new lock.
1156 @param[in,out] trx		Transaction requesting the lock
1157 @param[in] prdt			Predicate lock (optional)
1158 @return a new lock instance */
create(trx_t * trx,const lock_prdt_t * prdt)1159 lock_t *RecLock::create(trx_t *trx, const lock_prdt_t *prdt) {
1160   ut_ad(locksys::owns_page_shard(m_rec_id.get_page_id()));
1161 
1162   /* Ensure that another transaction doesn't access the trx
1163   lock state and lock data structures while we are adding the
1164   lock and changing the transaction state to LOCK_WAIT.
1165   In particular it protects the lock_alloc which uses trx's private pool of
1166   lock structures.
1167   It might be the case that we already hold trx->mutex because we got here from:
1168     - lock_rec_convert_impl_to_expl_for_trx
1169     - add_to_waitq
1170   */
1171   ut_ad(trx_mutex_own(trx));
1172 
1173   /* Create the explicit lock instance and initialise it. */
1174 
1175   lock_t *lock = lock_alloc(trx, m_index, m_mode, m_rec_id, m_size);
1176 
1177 #ifdef UNIV_DEBUG
1178   /* GAP lock shouldn't be taken on DD tables with some exceptions */
1179   if (m_index->table->is_dd_table &&
1180       strstr(m_index->table->name.m_name,
1181              "mysql/st_spatial_reference_systems") == nullptr &&
1182       strstr(m_index->table->name.m_name, "mysql/innodb_table_stats") ==
1183           nullptr &&
1184       strstr(m_index->table->name.m_name, "mysql/innodb_index_stats") ==
1185           nullptr &&
1186       strstr(m_index->table->name.m_name, "mysql/table_stats") == nullptr &&
1187       strstr(m_index->table->name.m_name, "mysql/index_stats") == nullptr) {
1188     ut_ad(lock_rec_get_rec_not_gap(lock));
1189   }
1190 #endif /* UNIV_DEBUG */
1191 
1192   if (prdt != nullptr && (m_mode & LOCK_PREDICATE)) {
1193     lock_prdt_set_prdt(lock, prdt);
1194   }
1195 
1196   lock_add(lock);
1197 
1198   return (lock);
1199 }
1200 
1201 /**
1202 Collect the transactions that will need to be rolled back asynchronously
1203 @param[in, out] hit_list    The list of transactions to be rolled back, to which
1204                             the trx should be appended.
1205 @param[in]      hp_trx_id   The id of the blocked High Priority Transaction
1206 @param[in, out] trx	    The blocking transaction to be rolled back */
lock_mark_trx_for_rollback(hit_list_t & hit_list,trx_id_t hp_trx_id,trx_t * trx)1207 static void lock_mark_trx_for_rollback(hit_list_t &hit_list, trx_id_t hp_trx_id,
1208                                        trx_t *trx) {
1209   trx->abort = true;
1210 
1211   ut_ad(!trx->read_only);
1212   ut_ad(trx_mutex_own(trx));
1213   ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK));
1214   ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC));
1215   ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE));
1216 
1217   /* Note that we will attempt an async rollback. The _ASYNC
1218   flag will be cleared if the transaction is rolled back
1219   synchronously before we get a chance to do it. */
1220 
1221   trx->in_innodb |= TRX_FORCE_ROLLBACK | TRX_FORCE_ROLLBACK_ASYNC;
1222 
1223   bool cas;
1224   os_thread_id_t thread_id = os_thread_get_curr_id();
1225 
1226   cas = os_compare_and_swap_thread_id(&trx->killed_by, 0, thread_id);
1227 
1228   ut_a(cas);
1229 
1230   hit_list.push_back(hit_list_t::value_type(trx));
1231 
1232 #ifdef UNIV_DEBUG
1233   THD *thd = trx->mysql_thd;
1234 
1235   if (thd != nullptr) {
1236     char buffer[1024];
1237     ib::info(ER_IB_MSG_636)
1238         << "Blocking transaction: ID: " << trx->id << " - "
1239         << " Blocked transaction ID: " << hp_trx_id << " - "
1240         << thd_security_context(thd, buffer, sizeof(buffer), 512);
1241   }
1242 #endif /* UNIV_DEBUG */
1243 }
1244 
1245 /** Creates a new edge in wait-for graph, from waiter to blocker
1246 @param[in]  waiter    The transaction that has to wait for blocker
1247 @param[in]  blocker   The transaction which causes waiter to wait */
lock_create_wait_for_edge(trx_t * waiter,trx_t * blocker)1248 static void lock_create_wait_for_edge(trx_t *waiter, trx_t *blocker) {
1249   ut_ad(trx_mutex_own(waiter));
1250   ut_ad(waiter->lock.wait_lock != nullptr);
1251   ut_ad(locksys::owns_lock_shard(waiter->lock.wait_lock));
1252   ut_ad(waiter->lock.blocking_trx.load() == nullptr);
1253   /* We don't call lock_wait_request_check_for_cycles() here as it
1254   would be slightly premature: the trx is not yet inserted into a slot of
1255   lock_sys->waiting_threads at this point, and thus it would be invisible to
1256   the thread which analyzes these slots. What we do instead is to let the
1257   lock_wait_table_reserve_slot() function be responsible for calling
1258   lock_wait_request_check_for_cycles() once it insert the trx to a
1259   slot.*/
1260   waiter->lock.blocking_trx.store(blocker);
1261 }
1262 
1263 /**
1264 Setup the requesting transaction state for lock grant
1265 @param[in,out] lock		Lock for which to change state */
set_wait_state(lock_t * lock)1266 void RecLock::set_wait_state(lock_t *lock) {
1267   ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1268   ut_ad(m_trx == lock->trx);
1269   ut_ad(trx_mutex_own(m_trx));
1270   ut_ad(lock_get_wait(lock));
1271 
1272   m_trx->lock.wait_started = ut_time();
1273 
1274   m_trx->lock.que_state = TRX_QUE_LOCK_WAIT;
1275 
1276   m_trx->lock.was_chosen_as_deadlock_victim = false;
1277 
1278   bool stopped = que_thr_stop(m_thr);
1279   ut_a(stopped);
1280 }
1281 
add_to_waitq(const lock_t * wait_for,const lock_prdt_t * prdt)1282 dberr_t RecLock::add_to_waitq(const lock_t *wait_for, const lock_prdt_t *prdt) {
1283   ut_ad(locksys::owns_page_shard(m_rec_id.get_page_id()));
1284   ut_ad(m_trx == thr_get_trx(m_thr));
1285 
1286   /* It is not that the body of this function requires trx->mutex, but some of
1287   the functions it calls require it and it so happens that we always posses it
1288   so it makes reasoning about code easier if we simply assert this fact. */
1289   ut_ad(trx_mutex_own(m_trx));
1290 
1291   DEBUG_SYNC_C("rec_lock_add_to_waitq");
1292 
1293   if (m_trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC) {
1294     return (DB_DEADLOCK);
1295   }
1296 
1297   m_mode |= LOCK_WAIT;
1298 
1299   /* Do the preliminary checks, and set query thread state */
1300 
1301   prepare();
1302 
1303   /* Don't queue the lock to hash table, if high priority transaction. */
1304   lock_t *lock = create(m_trx, prdt);
1305 
1306   lock_create_wait_for_edge(m_trx, wait_for->trx);
1307 
1308   ut_ad(lock_get_wait(lock));
1309 
1310   set_wait_state(lock);
1311 
1312   MONITOR_INC(MONITOR_LOCKREC_WAIT);
1313 
1314   /* m_trx->mysql_thd is NULL if it's an internal trx. So current_thd
1315    is used */
1316 
1317   thd_report_row_lock_wait(current_thd, wait_for->trx->mysql_thd);
1318 
1319   return (DB_LOCK_WAIT);
1320 }
1321 /** Moves a granted lock to the front of the queue for a given record by
1322 removing it adding it to the front. As a single lock can correspond to multiple
1323 rows (and thus: queues) this function moves it to the front of whole bucket.
1324 @param	[in]	lock	a granted lock to be moved
1325 @param	[in]	rec_id	record id which specifies particular queue and bucket */
lock_rec_move_granted_to_front(lock_t * lock,const RecID & rec_id)1326 static void lock_rec_move_granted_to_front(lock_t *lock, const RecID &rec_id) {
1327   ut_ad(!lock->is_waiting());
1328   ut_ad(rec_id.matches(lock));
1329   ut_ad(locksys::owns_page_shard(rec_id.get_page_id()));
1330   ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1331 
1332   const auto hash_table = lock->hash_table();
1333   HASH_DELETE(lock_t, hash, hash_table, rec_id.fold(), lock);
1334   lock_rec_insert_to_granted(hash_table, lock, rec_id);
1335 }
1336 
1337 /** Looks for a suitable type record lock struct by the same trx on the same
1338 page. This can be used to save space when a new record lock should be set on a
1339 page: no new struct is needed, if a suitable old is found.
1340 @param[in]  type_mode                 lock type_mode field
1341 @param[in]  heap_no                   heap number of the record we plan to use.
1342                                       The lock struct we search for needs to
1343                                       have a bitmap at least as large.
1344 @param[in]  lock                      lock_rec_get_first_on_page()
1345 @param[in]  trx                       transaction
1346 @param[out] found_waiter_before_lock  true iff there is a waiting lock before
1347                                       the returned lock
1348 @return lock or nullptr if there is no lock we could reuse*/
1349 UNIV_INLINE
lock_rec_find_similar_on_page(uint32_t type_mode,size_t heap_no,lock_t * lock,const trx_t * trx,bool & found_waiter_before_lock)1350 lock_t *lock_rec_find_similar_on_page(uint32_t type_mode, size_t heap_no,
1351                                       lock_t *lock, const trx_t *trx,
1352                                       bool &found_waiter_before_lock) {
1353   ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1354   found_waiter_before_lock = false;
1355   for (/* No op */; lock != nullptr; lock = lock_rec_get_next_on_page(lock)) {
1356     if (lock->trx == trx && lock->type_mode == type_mode &&
1357         heap_no < lock_rec_get_n_bits(lock)) {
1358       return (lock);
1359     }
1360     if (lock->is_waiting()) {
1361       found_waiter_before_lock = true;
1362     }
1363   }
1364   found_waiter_before_lock = false;
1365   return (nullptr);
1366 }
1367 
1368 /** Adds a record lock request in the record queue. The request is normally
1369  added as the last in the queue, but if the request to be added is not a waiting
1370  request, we can reuse a suitable record lock object already existing on the
1371  same page, just setting the appropriate bit in its bitmap. This is a low-level
1372  function which does NOT check for deadlocks or lock compatibility!
1373 @param[in]      type_mode         lock mode, wait, gap etc. flags; type is
1374                                   ignored and replaced by LOCK_REC
1375 @param[in]      block             buffer block containing the record
1376 @param[in]      heap_no           heap number of the record
1377 @param[in]      index             index of record
1378 @param[in,out]  trx               transaction
1379 @param[in]      we_own_trx_mutex  true iff the caller own trx->mutex (optional).
1380                                   Defaults to false. */
lock_rec_add_to_queue(ulint type_mode,const buf_block_t * block,const ulint heap_no,dict_index_t * index,trx_t * trx,const bool we_own_trx_mutex=false)1381 static void lock_rec_add_to_queue(ulint type_mode, const buf_block_t *block,
1382                                   const ulint heap_no, dict_index_t *index,
1383                                   trx_t *trx,
1384                                   const bool we_own_trx_mutex = false) {
1385 #ifdef UNIV_DEBUG
1386   ut_ad(locksys::owns_page_shard(block->get_page_id()));
1387   ut_ad(we_own_trx_mutex == trx_mutex_own(trx));
1388 
1389   ut_ad(index->is_clustered() ||
1390         dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
1391   switch (type_mode & LOCK_MODE_MASK) {
1392     case LOCK_X:
1393     case LOCK_S:
1394       break;
1395     default:
1396       ut_error;
1397   }
1398 
1399   if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
1400     lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S ? LOCK_X : LOCK_S;
1401     const lock_t *other_lock =
1402         lock_rec_other_has_expl_req(mode, block, false, heap_no, trx);
1403     ut_a(!other_lock);
1404   }
1405 #endif /* UNIV_DEBUG */
1406 
1407   type_mode |= LOCK_REC;
1408 
1409   /* If rec is the supremum record, then we can reset the gap bit, as
1410   all locks on the supremum are automatically of the gap type, and we
1411   try to avoid unnecessary memory consumption of a new record lock
1412   struct for a gap type lock */
1413 
1414   if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
1415     ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
1416 
1417     /* There should never be LOCK_REC_NOT_GAP on a supremum
1418     record, but let us play safe */
1419 
1420     type_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
1421   }
1422 
1423   if (!(type_mode & LOCK_WAIT)) {
1424     hash_table_t *const hash = lock_hash_get(type_mode);
1425     lock_t *const first_lock = lock_rec_get_first_on_page(hash, block);
1426 
1427     if (first_lock != nullptr) {
1428       /* Look for a similar record lock on the same page:
1429       if one is found we can just set the bit */
1430 
1431       bool found_waiter_before_lock = false;
1432       lock_t *lock = lock_rec_find_similar_on_page(
1433           type_mode, heap_no, first_lock, trx, found_waiter_before_lock);
1434 
1435       if (lock != nullptr) {
1436         /* Some B-tree reorganization functions, when moving locks from one
1437         place to another, can leave a lock_t struct with an empty bitmap. They
1438         also clear a LOCK_WAIT flag. This means it's possible that `lock` was
1439         a waiting lock in the past, and if we want to reuse it, we have to move
1440         it to the front of the queue where granted locks reside.
1441         We only NEED to do that if there are any waiting locks in front of it.
1442         We CAN move the lock to front ONLY IF it wasn't part of any queue.
1443         In other words, moving to front is not safe if it has non-empty bitmap.
1444         Moving a lock to the front of its queue can create endless loop in the
1445         caller if it is iterating over the queue.
1446         Fortunately, the only situation in which a GRANTED lock can be after a
1447         WAITING lock in the bucket is if it was WAITING in the past and the only
1448         bit for the heap_no was cleared, so it no longer belongs to any queue.*/
1449         ut_ad(!found_waiter_before_lock ||
1450               (ULINT_UNDEFINED == lock_rec_find_set_bit(lock)));
1451 
1452         lock_rec_set_nth_bit(lock, heap_no);
1453         if (found_waiter_before_lock) {
1454           lock_rec_move_granted_to_front(lock, RecID{lock, heap_no});
1455         }
1456         return;
1457       }
1458     }
1459   }
1460 
1461   RecLock rec_lock(index, block, heap_no, type_mode);
1462 
1463   if (!we_own_trx_mutex) {
1464     trx_mutex_enter(trx);
1465   }
1466   rec_lock.create(trx);
1467   if (!we_own_trx_mutex) {
1468     trx_mutex_exit(trx);
1469   }
1470 }
1471 
1472 /** This is a fast routine for locking a record in the most common cases:
1473  there are no explicit locks on the page, or there is just one lock, owned
1474  by this transaction, and of the right type_mode. This is a low-level function
1475  which does NOT look at implicit locks! Checks lock compatibility within
1476  explicit locks. This function sets a normal next-key lock, or in the case of
1477  a page supremum record, a gap type lock.
1478  @return whether the locking succeeded LOCK_REC_SUCCESS,
1479  LOCK_REC_SUCCESS_CREATED, LOCK_REC_FAIL */
1480 UNIV_INLINE
lock_rec_lock_fast(bool impl,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,que_thr_t * thr)1481 lock_rec_req_status lock_rec_lock_fast(
1482     bool impl,                /*!< in: if true, no lock is set
1483                               if no wait is necessary: we
1484                               assume that the caller will
1485                               set an implicit lock */
1486     ulint mode,               /*!< in: lock mode: LOCK_X or
1487                               LOCK_S possibly ORed to either
1488                               LOCK_GAP or LOCK_REC_NOT_GAP */
1489     const buf_block_t *block, /*!< in: buffer block containing
1490                               the record */
1491     ulint heap_no,            /*!< in: heap number of record */
1492     dict_index_t *index,      /*!< in: index of record */
1493     que_thr_t *thr)           /*!< in: query thread */
1494 {
1495   ut_ad(locksys::owns_page_shard(block->get_page_id()));
1496   ut_ad(!srv_read_only_mode);
1497   ut_ad((LOCK_MODE_MASK & mode) != LOCK_S ||
1498         lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
1499   ut_ad((LOCK_MODE_MASK & mode) != LOCK_X ||
1500         lock_table_has(thr_get_trx(thr), index->table, LOCK_IX) ||
1501         srv_read_only_mode);
1502   ut_ad((LOCK_MODE_MASK & mode) == LOCK_S || (LOCK_MODE_MASK & mode) == LOCK_X);
1503   ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP ||
1504         mode - (LOCK_MODE_MASK & mode) == 0 ||
1505         mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
1506   ut_ad(index->is_clustered() || !dict_index_is_online_ddl(index));
1507   ut_ad(!(mode & LOCK_PREDICATE));
1508   ut_ad(!(mode & LOCK_PRDT_PAGE));
1509   DBUG_EXECUTE_IF("innodb_report_deadlock", return (LOCK_REC_FAIL););
1510 
1511   lock_t *lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
1512 
1513   trx_t *trx = thr_get_trx(thr);
1514   ut_ad(!trx_mutex_own(trx));
1515 
1516   lock_rec_req_status status = LOCK_REC_SUCCESS;
1517 
1518   if (lock == nullptr) {
1519     if (!impl) {
1520       RecLock rec_lock(index, block, heap_no, mode);
1521 
1522       trx_mutex_enter(trx);
1523       rec_lock.create(trx);
1524       trx_mutex_exit(trx);
1525 
1526       status = LOCK_REC_SUCCESS_CREATED;
1527     }
1528   } else {
1529     trx_mutex_enter(trx);
1530 
1531     if (lock_rec_get_next_on_page(lock) != nullptr || lock->trx != trx ||
1532         lock->type_mode != (mode | LOCK_REC) ||
1533         lock_rec_get_n_bits(lock) <= heap_no) {
1534       status = LOCK_REC_FAIL;
1535     } else if (!impl) {
1536       /* If the nth bit of the record lock is already set
1537       then we do not set a new lock bit, otherwise we do
1538       set */
1539       if (!lock_rec_get_nth_bit(lock, heap_no)) {
1540         lock_rec_set_nth_bit(lock, heap_no);
1541         status = LOCK_REC_SUCCESS_CREATED;
1542       }
1543     }
1544 
1545     trx_mutex_exit(trx);
1546   }
1547   ut_ad(status == LOCK_REC_SUCCESS || status == LOCK_REC_SUCCESS_CREATED ||
1548         status == LOCK_REC_FAIL);
1549   return (status);
1550 }
1551 
1552 /** A helper function for lock_rec_lock_slow(), which grants a Next Key Lock
1553 (either LOCK_X or LOCK_S as specified by `mode`) on <`block`,`heap_no`> in the
1554 `index` to the `trx`, assuming that it already has a granted `held_lock`, which
1555 is at least as strong as mode|LOCK_REC_NOT_GAP. It does so by either reusing the
1556 lock if it already covers the gap, or by ensuring a separate GAP Lock, which in
1557 combination with Record Lock satisfies the request.
1558 @param[in]      held_lock   a lock granted to `trx` which is at least as strong
1559                             as mode|LOCK_REC_NOT_GAP
1560 @param[in]      mode	    requested lock mode: LOCK_X or LOCK_S
1561 @param[in]      block	    buffer block containing the record to be locked
1562 @param[in]      heap_no	    heap number of the record to be locked
1563 @param[in]      index	    index of record to be locked
1564 @param[in]      trx         the transaction requesting the Next Key Lock */
lock_reuse_for_next_key_lock(const lock_t * held_lock,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,trx_t * trx)1565 static void lock_reuse_for_next_key_lock(const lock_t *held_lock, ulint mode,
1566                                          const buf_block_t *block,
1567                                          ulint heap_no, dict_index_t *index,
1568                                          trx_t *trx) {
1569   ut_ad(mode == LOCK_S || mode == LOCK_X);
1570   ut_ad(lock_mode_is_next_key_lock(mode));
1571 
1572   if (!held_lock->is_record_not_gap()) {
1573     ut_ad(held_lock->is_next_key_lock());
1574     return;
1575   }
1576 
1577   /* We have a Record Lock granted, so we only need a GAP Lock. We assume
1578   that GAP Locks do not conflict with anything. Therefore a GAP Lock
1579   could be granted to us right now if we've requested: */
1580   mode |= LOCK_GAP;
1581   ut_ad(nullptr == lock_rec_other_has_conflicting(mode, block, heap_no, trx));
1582 
1583   /* It might be the case we already have one, so we first check that. */
1584   if (lock_rec_has_expl(mode, block, heap_no, trx) == nullptr) {
1585     lock_rec_add_to_queue(LOCK_REC | mode, block, heap_no, index, trx);
1586   }
1587 }
1588 /** This is the general, and slower, routine for locking a record. This is a
1589 low-level function which does NOT look at implicit locks! Checks lock
1590 compatibility within explicit locks. This function sets a normal next-key
1591 lock, or in the case of a page supremum record, a gap type lock.
1592 @param[in]	impl		if true, no lock is set	if no wait is
1593                                 necessary: we assume that the caller will
1594                                 set an implicit lock
1595 @param[in]	sel_mode	select mode: SELECT_ORDINARY,
1596                                 SELECT_SKIP_LOCKED, or SELECT_NO_WAIT
1597 @param[in]	mode		lock mode: LOCK_X or LOCK_S possibly ORed to
1598                                 either LOCK_GAP or LOCK_REC_NOT_GAP
1599 @param[in]	block		buffer block containing	the record
1600 @param[in]	heap_no		heap number of record
1601 @param[in]	index		index of record
1602 @param[in,out]	thr		query thread
1603 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
1604 DB_SKIP_LOCKED, or DB_LOCK_NOWAIT */
lock_rec_lock_slow(bool impl,select_mode sel_mode,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,que_thr_t * thr)1605 static dberr_t lock_rec_lock_slow(bool impl, select_mode sel_mode, ulint mode,
1606                                   const buf_block_t *block, ulint heap_no,
1607                                   dict_index_t *index, que_thr_t *thr) {
1608   ut_ad(locksys::owns_page_shard(block->get_page_id()));
1609   ut_ad(!srv_read_only_mode);
1610   ut_ad((LOCK_MODE_MASK & mode) != LOCK_S ||
1611         lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
1612   ut_ad((LOCK_MODE_MASK & mode) != LOCK_X ||
1613         lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
1614   ut_ad((LOCK_MODE_MASK & mode) == LOCK_S || (LOCK_MODE_MASK & mode) == LOCK_X);
1615   ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP ||
1616         mode - (LOCK_MODE_MASK & mode) == LOCK_ORDINARY ||
1617         mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
1618   ut_ad(index->is_clustered() || !dict_index_is_online_ddl(index));
1619 
1620   DBUG_EXECUTE_IF("innodb_report_deadlock", return (DB_DEADLOCK););
1621 
1622   trx_t *trx = thr_get_trx(thr);
1623 
1624   ut_ad(sel_mode == SELECT_ORDINARY ||
1625         (sel_mode != SELECT_ORDINARY && !trx_is_high_priority(trx)));
1626 
1627   /* A very common type of lock in InnoDB is "Next Key Lock", which is almost
1628   equivalent to two locks: Record Lock and GAP Lock separately.
1629   Thus, in case we need to wait, we check if we already own a Record Lock,
1630   and if we do, we only need the GAP Lock.
1631   We don't do the opposite thing (of checking for GAP Lock, and only requesting
1632   Record Lock), because if Next Key Lock has to wait, then it is because of a
1633   conflict with someone who locked the record, as locks on gaps are compatible
1634   with each other, so even if we have a GAP Lock, narrowing the requested mode
1635   to Record Lock will not make the conflict go away.
1636 
1637   In current implementation locks on supremum are treated like GAP Locks,
1638   in particular they never have to wait for anything (unless they are Insert
1639   Intention locks, but we've ruled that out with asserts before getting here),
1640   so there is no gain in using the above "lock splitting" heuristic for locks on
1641   supremum, and reasoning becomes a bit simpler without this special case. */
1642 
1643   auto checked_mode =
1644       (heap_no != PAGE_HEAP_NO_SUPREMUM && lock_mode_is_next_key_lock(mode))
1645           ? mode | LOCK_REC_NOT_GAP
1646           : mode;
1647 
1648   const auto *held_lock = lock_rec_has_expl(checked_mode, block, heap_no, trx);
1649 
1650   if (held_lock != nullptr) {
1651     if (checked_mode == mode) {
1652       /* The trx already has a strong enough lock on rec: do nothing */
1653       return (DB_SUCCESS);
1654     }
1655 
1656     /* As check_mode != mode, the mode is Next Key Lock, which can not be
1657     emulated by implicit lock (which are LOCK_REC_NOT_GAP only). */
1658     ut_ad(!impl);
1659 
1660     lock_reuse_for_next_key_lock(held_lock, mode, block, heap_no, index, trx);
1661     return (DB_SUCCESS);
1662   }
1663 
1664   const lock_t *wait_for =
1665       lock_rec_other_has_conflicting(mode, block, heap_no, trx);
1666 
1667   if (wait_for != nullptr) {
1668     switch (sel_mode) {
1669       case SELECT_SKIP_LOCKED:
1670         return (DB_SKIP_LOCKED);
1671       case SELECT_NOWAIT:
1672         return (DB_LOCK_NOWAIT);
1673       case SELECT_ORDINARY:
1674         /* If another transaction has a non-gap conflicting request in the
1675         queue, as this transaction does not have a lock strong enough already
1676         granted on the record, we may have to wait. */
1677 
1678         RecLock rec_lock(thr, index, block, heap_no, mode);
1679 
1680         trx_mutex_enter(trx);
1681 
1682         dberr_t err = rec_lock.add_to_waitq(wait_for);
1683 
1684         trx_mutex_exit(trx);
1685 
1686         ut_ad(err == DB_SUCCESS_LOCKED_REC || err == DB_LOCK_WAIT ||
1687               err == DB_DEADLOCK);
1688         return (err);
1689     }
1690   }
1691   if (!impl) {
1692     /* Set the requested lock on the record. */
1693 
1694     lock_rec_add_to_queue(LOCK_REC | mode, block, heap_no, index, trx);
1695 
1696     return (DB_SUCCESS_LOCKED_REC);
1697   }
1698   return (DB_SUCCESS);
1699 }
1700 
1701 /** Tries to lock the specified record in the mode requested. If not immediately
1702 possible, enqueues a waiting lock request. This is a low-level function
1703 which does NOT look at implicit locks! Checks lock compatibility within
1704 explicit locks. This function sets a normal next-key lock, or in the case
1705 of a page supremum record, a gap type lock.
1706 @param[in]	impl		if true, no lock is set	if no wait is
1707                                 necessary: we assume that the caller will
1708                                 set an implicit lock
1709 @param[in]	sel_mode	select mode: SELECT_ORDINARY,
1710                                 SELECT_SKIP_LOCKED, or SELECT_NO_WAIT
1711 @param[in]	mode		lock mode: LOCK_X or LOCK_S possibly ORed to
1712                                 either LOCK_GAP or LOCK_REC_NOT_GAP
1713 @param[in]	block		buffer block containing	the record
1714 @param[in]	heap_no		heap number of record
1715 @param[in]	index		index of record
1716 @param[in,out]	thr		query thread
1717 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
1718 DB_SKIP_LOCKED, or DB_LOCK_NOWAIT */
lock_rec_lock(bool impl,select_mode sel_mode,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,que_thr_t * thr)1719 static dberr_t lock_rec_lock(bool impl, select_mode sel_mode, ulint mode,
1720                              const buf_block_t *block, ulint heap_no,
1721                              dict_index_t *index, que_thr_t *thr) {
1722   ut_ad(locksys::owns_page_shard(block->get_page_id()));
1723   ut_ad(!srv_read_only_mode);
1724   ut_ad((LOCK_MODE_MASK & mode) != LOCK_S ||
1725         lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
1726   ut_ad((LOCK_MODE_MASK & mode) != LOCK_X ||
1727         lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
1728   ut_ad((LOCK_MODE_MASK & mode) == LOCK_S || (LOCK_MODE_MASK & mode) == LOCK_X);
1729   ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP ||
1730         mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP ||
1731         mode - (LOCK_MODE_MASK & mode) == 0);
1732   ut_ad(index->is_clustered() || !dict_index_is_online_ddl(index));
1733   /* Implicit locks are equivalent to LOCK_X|LOCK_REC_NOT_GAP, so we can omit
1734   creation of explicit lock only if the requested mode was LOCK_REC_NOT_GAP */
1735   ut_ad(!impl || ((mode & LOCK_REC_NOT_GAP) == LOCK_REC_NOT_GAP));
1736   /* We try a simplified and faster subroutine for the most
1737   common cases */
1738   switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
1739     case LOCK_REC_SUCCESS:
1740       return (DB_SUCCESS);
1741     case LOCK_REC_SUCCESS_CREATED:
1742       return (DB_SUCCESS_LOCKED_REC);
1743     case LOCK_REC_FAIL:
1744       return (
1745           lock_rec_lock_slow(impl, sel_mode, mode, block, heap_no, index, thr));
1746     default:
1747       ut_error;
1748   }
1749 }
1750 
1751 /** Checks if a waiting record lock request still has to wait in a queue.
1752 @param[in]  wait_lock     Waiting record lock
1753 @param[in]  blocking_trx  If not nullptr, it restricts the search to only the
1754                           locks held by the blocking_trx, which is useful in
1755                           case when there might be multiple reasons for waiting
1756                           in queue, but we need to report the specific one.
1757                           Useful when reporting a deadlock cycle. (optional)
1758 @return The conflicting lock which is the reason wait_lock has to wait
1759 or nullptr if it can be granted now */
lock_rec_has_to_wait_in_queue(const lock_t * wait_lock,const trx_t * blocking_trx=nullptr)1760 static const lock_t *lock_rec_has_to_wait_in_queue(
1761     const lock_t *wait_lock, const trx_t *blocking_trx = nullptr) {
1762   const lock_t *lock;
1763   ulint heap_no;
1764   ulint bit_mask;
1765   ulint bit_offset;
1766   hash_table_t *hash;
1767 
1768   ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
1769   const auto page_id = wait_lock->rec_lock.page_id;
1770   ut_ad(locksys::owns_page_shard(page_id));
1771   ut_ad(lock_get_wait(wait_lock));
1772 
1773   heap_no = lock_rec_find_set_bit(wait_lock);
1774 
1775   bit_offset = heap_no / 8;
1776   bit_mask = static_cast<ulint>(1) << (heap_no % 8);
1777 
1778   hash = lock_hash_get(wait_lock->type_mode);
1779 
1780   for (lock = lock_rec_get_first_on_page_addr(hash, page_id); lock != wait_lock;
1781        lock = lock_rec_get_next_on_page_const(lock)) {
1782     const byte *p = (const byte *)&lock[1];
1783 
1784     if ((blocking_trx == nullptr || blocking_trx == lock->trx) &&
1785         heap_no < lock_rec_get_n_bits(lock) && (p[bit_offset] & bit_mask) &&
1786         lock_has_to_wait(wait_lock, lock)) {
1787       return (lock);
1788     }
1789   }
1790 
1791   return (nullptr);
1792 }
1793 
1794 /** Grants a lock to a waiting lock request and releases the waiting
1795 transaction. The caller must hold lock_sys latch for the shard containing the
1796 lock, but not the lock->trx->mutex.
1797 @param[in,out]    lock    waiting lock request
1798  */
lock_grant(lock_t * lock)1799 static void lock_grant(lock_t *lock) {
1800   ut_ad(locksys::owns_lock_shard(lock));
1801   ut_ad(!trx_mutex_own(lock->trx));
1802 
1803   trx_mutex_enter(lock->trx);
1804 
1805   if (lock_get_mode(lock) == LOCK_AUTO_INC) {
1806     dict_table_t *table = lock->tab_lock.table;
1807 
1808     if (table->autoinc_trx == lock->trx) {
1809       ib::error(ER_IB_MSG_637) << "Transaction already had an"
1810                                << " AUTO-INC lock!";
1811     } else {
1812       ut_ad(table->autoinc_trx == nullptr);
1813       table->autoinc_trx = lock->trx;
1814 
1815       ib_vector_push(lock->trx->lock.autoinc_locks, &lock);
1816     }
1817   }
1818 
1819   DBUG_PRINT("ib_lock", ("wait for trx " TRX_ID_FMT " ends",
1820                          trx_get_id_for_print(lock->trx)));
1821 
1822   lock_reset_wait_and_release_thread_if_suspended(lock);
1823   ut_ad(trx_mutex_own(lock->trx));
1824 
1825   trx_mutex_exit(lock->trx);
1826 }
1827 
lock_make_trx_hit_list(trx_t * hp_trx,hit_list_t & hit_list)1828 void lock_make_trx_hit_list(trx_t *hp_trx, hit_list_t &hit_list) {
1829   trx_mutex_enter(hp_trx);
1830   const trx_id_t hp_trx_id = hp_trx->id;
1831   ut_ad(trx_can_be_handled_by_current_thread(hp_trx));
1832   ut_ad(trx_is_high_priority(hp_trx));
1833   /* To avoid slow procedure involving global exclusive latch below, we first
1834   check if this transaction is waiting for a lock at all. It's unsafe to read
1835   hp->lock.wait_lock without latching whole lock_sys as it might temporarily
1836   change to NULL during a concurrent B-tree reorganization, even though the
1837   trx actually is still waiting.
1838   TBD: Is it safe to use hp_trx->lock.que_state == TRX_QUE_LOCK_WAIT given that
1839   que_state is not atomic, and writes to it happen without trx->mutex ? */
1840   const bool is_waiting = (hp_trx->lock.blocking_trx.load() != nullptr);
1841   trx_mutex_exit(hp_trx);
1842   if (!is_waiting) {
1843     return;
1844   }
1845 
1846   /* Current implementation of lock_make_trx_hit_list requires latching whole
1847   lock_sys for following reasons:
1848   1. it may call lock_cancel_waiting_and_release on a lock from completely
1849   different shard of lock_sys than hp_trx->lock.wait_lock. Trying to latch
1850   this other shard might create a deadlock cycle if it violates ordering of
1851   shard latches (and there is 50% chance it will violate it). Moreover the
1852   lock_cancel_waiting_and_release() requires an exclusive latch to avoid
1853   deadlocks among trx->mutex-es, and trx->lock.wait_lock might be a table lock,
1854   in which case exclusive latch is also needed to traverse table locks.
1855   2. it may call trx_mutex_enter on a transaction which is waiting for a
1856   lock, which violates one of assumptions used in the proof that a deadlock due
1857   to acquiring trx->mutex-es is impossible
1858   3. it attempts to read hp_trx->lock.wait_lock which might be modified by a
1859   thread during B-tree reorganization when moving locks between queues
1860   4. it attempts to operate on trx->lock.wait_lock of other transactions */
1861   locksys::Global_exclusive_latch_guard guard{};
1862 
1863   /* Check again */
1864   const lock_t *lock = hp_trx->lock.wait_lock;
1865   if (lock == nullptr || !lock->is_record_lock()) {
1866     return;
1867   }
1868   RecID rec_id{lock, lock_rec_find_set_bit(lock)};
1869   Lock_iter::for_each(
1870       rec_id,
1871       [&](lock_t *next) {
1872         trx_t *trx = next->trx;
1873         /* Check only for conflicting, granted locks on the current
1874         row. Currently, we don't rollback read only transactions,
1875         transactions owned by background threads. */
1876         if (trx == hp_trx || next->is_waiting() || trx->read_only ||
1877             trx->mysql_thd == nullptr || !lock_has_to_wait(lock, next)) {
1878           return true;
1879         }
1880 
1881         trx_mutex_enter(trx);
1882 
1883         /* Skip high priority transactions, if already marked for
1884         abort by some other transaction or if ASYNC rollback is
1885         disabled. A transaction must complete kill/abort of a
1886         victim transaction once marked and added to hit list. */
1887         if (trx_is_high_priority(trx) ||
1888             (trx->in_innodb & TRX_FORCE_ROLLBACK) != 0 ||
1889             (trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC) != 0 ||
1890             (trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE) != 0 || trx->abort) {
1891           trx_mutex_exit(trx);
1892 
1893           return true;
1894         }
1895 
1896         /* If the transaction is waiting on some other resource then
1897         wake it up with DEAD_LOCK error so that it can rollback. */
1898         if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1899           /* Assert that it is not waiting for current record. */
1900           ut_ad(trx->lock.wait_lock != next);
1901 #ifdef UNIV_DEBUG
1902           ib::info(ER_IB_MSG_639)
1903               << "High Priority Transaction (ID): " << lock->trx->id
1904               << " waking up blocking"
1905               << " transaction (ID): " << trx->id;
1906 #endif /* UNIV_DEBUG */
1907           trx->lock.was_chosen_as_deadlock_victim = true;
1908 
1909           lock_cancel_waiting_and_release(trx->lock.wait_lock);
1910 
1911           trx_mutex_exit(trx);
1912           return true;
1913         }
1914 
1915         /* Mark for ASYNC Rollback and add to hit list. */
1916         lock_mark_trx_for_rollback(hit_list, hp_trx_id, trx);
1917 
1918         trx_mutex_exit(trx);
1919         return true;
1920       },
1921       lock->hash_table());
1922 }
1923 
1924 /** Cancels a waiting record lock request and releases the waiting transaction
1925  that requested it. NOTE: does NOT check if waiting lock requests behind this
1926  one can now be granted! */
lock_rec_cancel(lock_t * lock)1927 static void lock_rec_cancel(
1928     lock_t *lock) /*!< in: waiting record lock request */
1929 {
1930   ut_ad(lock_get_type_low(lock) == LOCK_REC);
1931   ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1932 
1933   /* Reset the bit (there can be only one set bit) in the lock bitmap */
1934   lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
1935 
1936   trx_mutex_enter(lock->trx);
1937 
1938   lock_reset_wait_and_release_thread_if_suspended(lock);
1939 
1940   trx_mutex_exit(lock->trx);
1941 }
1942 
1943 /** Given a waiting_lock, and blocking_lock which is the reason it has to wait,
1944 makes sure that the (only) edge in the wait-for graph outgoing from the
1945 waiting_lock->trx points to blocking_lock->trx
1946 @param[in]    waiting_lock    A lock waiting in queue, blocked by blocking_lock
1947 @param[in]    blocking_lock   A lock which is a reason the waiting_lock has to
1948                               wait */
lock_update_wait_for_edge(const lock_t * waiting_lock,const lock_t * blocking_lock)1949 static void lock_update_wait_for_edge(const lock_t *waiting_lock,
1950                                       const lock_t *blocking_lock) {
1951   ut_ad(locksys::owns_lock_shard(waiting_lock));
1952   ut_ad(locksys::owns_lock_shard(blocking_lock));
1953   ut_ad(waiting_lock->is_waiting());
1954   ut_ad(lock_has_to_wait(waiting_lock, blocking_lock));
1955   /* Still needs to wait, but perhaps the reason has changed */
1956   if (waiting_lock->trx->lock.blocking_trx.load() != blocking_lock->trx) {
1957     waiting_lock->trx->lock.blocking_trx.store(blocking_lock->trx);
1958     /* We call lock_wait_request_check_for_cycles() because the outgoing edge of
1959     wait_lock->trx has changed it's endpoint and we need to analyze the
1960     wait-for-graph again. */
1961     lock_wait_request_check_for_cycles();
1962   }
1963 }
1964 
1965 /** Checks if a waiting record lock request still has to wait for granted locks.
1966 @param[in]	wait_lock		Waiting record lock
1967 @param[in]	granted			Granted record locks
1968 @param[in]	new_granted_index	Start of new granted locks
1969 @return The conflicting lock which is the reason wait_lock has to wait
1970 or nullptr if it can be granted now */
1971 template <typename Container>
lock_rec_has_to_wait_for_granted(const typename Container::value_type & wait_lock,const Container & granted,const size_t new_granted_index)1972 static const lock_t *lock_rec_has_to_wait_for_granted(
1973     const typename Container::value_type &wait_lock, const Container &granted,
1974     const size_t new_granted_index)
1975 
1976 {
1977   ut_ad(locksys::owns_page_shard(wait_lock->rec_lock.page_id));
1978   ut_ad(wait_lock->is_record_lock());
1979 
1980   ut_ad(new_granted_index <= granted.size());
1981 
1982   /* We iterate over granted locks in reverse order.
1983   Conceptually this corresponds to chronological order.
1984   This way, we pick as blocking_trx the oldest reason for waiting we haven't
1985   yet analyzed in deadlock checker. Our hope is that eventually (perhaps after
1986   several such updates) we will set blocking_trx to the real cause of the
1987   deadlock, which is the next node on the deadlock cycle. */
1988   for (size_t i = new_granted_index; i--;) {
1989     const auto granted_lock = granted[i];
1990     if (lock_has_to_wait(wait_lock, granted_lock)) {
1991       return (granted_lock);
1992     }
1993   }
1994 
1995   for (size_t i = new_granted_index; i < granted.size(); ++i) {
1996     const auto granted_lock = granted[i];
1997     ut_ad(granted_lock->trx->error_state != DB_DEADLOCK);
1998     ut_ad(!granted_lock->trx->lock.was_chosen_as_deadlock_victim);
1999 
2000     if (lock_has_to_wait(wait_lock, granted_lock)) {
2001       return (granted_lock);
2002     }
2003   }
2004 
2005   return (nullptr);
2006 }
2007 
2008 /** Grant a lock to waiting transactions. This function scans the queue of locks
2009 in which in_lock resides (or resided) paying attention only to locks on
2010 heap_no-th bit. For each waiting lock which was blocked by in_lock->trx it
2011 checks if it can be granted now. It iterates on waiting locks in order favoring
2012 high-priority transactions and then transactions of high
2013 trx->lock.schedule_weight.
2014 @param[in]    in_lock   Lock which was released, or
2015                         partially released by modifying its type/mode
2016                         (see lock_trx_release_read_locks) or
2017                         reseting heap_no-th bit in the bitmap
2018                         (see lock_rec_release)
2019 @param[in]    heap_no   Heap number within the page on which the
2020 lock was (or still is) held */
lock_rec_grant_by_heap_no(lock_t * in_lock,ulint heap_no)2021 static void lock_rec_grant_by_heap_no(lock_t *in_lock, ulint heap_no) {
2022   const auto hash_table = in_lock->hash_table();
2023 
2024   ut_ad(in_lock->is_record_lock());
2025   ut_ad(locksys::owns_page_shard(in_lock->rec_lock.page_id));
2026 
2027   using LockDescriptorEx = std::pair<trx_schedule_weight_t, lock_t *>;
2028   /* Preallocate for 4 lists with 32 locks. */
2029   std::unique_ptr<mem_heap_t, decltype(&mem_heap_free)> heap(
2030       mem_heap_create((sizeof(lock_t *) * 3 + sizeof(LockDescriptorEx)) * 32),
2031       mem_heap_free);
2032 
2033   RecID rec_id{in_lock, heap_no};
2034   Locks<lock_t *> low_priority_light{heap.get()};
2035   Locks<lock_t *> waiting{heap.get()};
2036   Locks<lock_t *> granted{heap.get()};
2037   Locks<LockDescriptorEx> low_priority_heavier{heap.get()};
2038 
2039   const auto in_trx = in_lock->trx;
2040 #ifdef UNIV_DEBUG
2041   bool seen_waiting_lock = false;
2042 #endif
2043   Lock_iter::for_each(
2044       rec_id,
2045       [&](lock_t *lock) {
2046         /* Split the relevant locks in the queue into:
2047         - granted = granted locks
2048         - waiting = waiting locks of high priority transactions
2049         - low_priority_heavier = waiting locks of low priority, but heavy weight
2050         - low_priority_light = waiting locks of low priority and light weight
2051         */
2052         if (!lock->is_waiting()) {
2053           /* Granted locks should be before waiting locks. */
2054           ut_ad(!seen_waiting_lock);
2055           granted.push_back(lock);
2056           return (true);
2057         }
2058         ut_d(seen_waiting_lock = true);
2059         const auto trx = lock->trx;
2060         if (trx->error_state == DB_DEADLOCK ||
2061             trx->lock.was_chosen_as_deadlock_victim) {
2062           return (true);
2063         }
2064         /* We read blocking_trx while holding this lock_sys queue latched, and
2065         each write to blocking_trx is done while holding the latch. So, even
2066         though we use memory_order_relaxed we will see modifications performed
2067         before we acquired the latch. */
2068         const auto blocking_trx =
2069             trx->lock.blocking_trx.load(std::memory_order_relaxed);
2070         /* No one should be WAITING without good reason! */
2071         ut_ad(blocking_trx);
2072         /* We will only consider granting the `lock`, if we are the reason it
2073         was waiting. */
2074         if (blocking_trx != in_trx) {
2075           return (true);
2076         }
2077         if (trx_is_high_priority(trx)) {
2078           waiting.push_back(lock);
2079           return (true);
2080         }
2081         /* The values of schedule_weight are read with memory_order_relaxed as
2082         we do not care neither about having the most recent value, nor about any
2083         relative order between this load and other operations.
2084         As std::sort requires the order to be consistent during execution we
2085         have to take a snapshot of all schedule_weight atomics, so they don't
2086         change during call to stable_sort in a way which causes the algorithm to
2087         crash. */
2088         const auto schedule_weight =
2089             trx->lock.schedule_weight.load(std::memory_order_relaxed);
2090         if (schedule_weight <= 1) {
2091           low_priority_light.push_back(lock);
2092           return (true);
2093         }
2094         low_priority_heavier.push_back(LockDescriptorEx{schedule_weight, lock});
2095 
2096         return (true);
2097       },
2098       hash_table);
2099 
2100   if (waiting.empty() && low_priority_light.empty() &&
2101       low_priority_heavier.empty()) {
2102     /* Nothing to grant. */
2103     return;
2104   }
2105   /* We want high schedule weight to be in front, and break ties by position */
2106   std::stable_sort(low_priority_heavier.begin(), low_priority_heavier.end(),
2107                    [](const LockDescriptorEx &a, const LockDescriptorEx &b) {
2108                      return (a.first > b.first);
2109                    });
2110   for (const auto &descriptor : low_priority_heavier) {
2111     waiting.push_back(descriptor.second);
2112   }
2113   waiting.insert(waiting.end(), low_priority_light.begin(),
2114                  low_priority_light.end());
2115 
2116   /* New granted locks will be added from this index. */
2117   const auto new_granted_index = granted.size();
2118 
2119   granted.reserve(granted.size() + waiting.size());
2120 
2121   for (lock_t *wait_lock : waiting) {
2122     /* Check if the transactions in the waiting queue have
2123     to wait for locks granted above. If they don't have to
2124     wait then grant them the locks and add them to the granted
2125     queue. */
2126 
2127     /* We don't expect to be a waiting trx, and we can't grant to ourselves as
2128     that would require entering trx->mutex while holding in_trx->mutex. */
2129     ut_ad(wait_lock->trx != in_trx);
2130 
2131     const lock_t *blocking_lock =
2132         lock_rec_has_to_wait_for_granted(wait_lock, granted, new_granted_index);
2133     if (blocking_lock == nullptr) {
2134       lock_grant(wait_lock);
2135 
2136       lock_rec_move_granted_to_front(wait_lock, rec_id);
2137 
2138       granted.push_back(wait_lock);
2139     } else {
2140       lock_update_wait_for_edge(wait_lock, blocking_lock);
2141     }
2142   }
2143 }
2144 
2145 /* Forward declaration to minimize the diff */
2146 static const lock_t *lock_has_to_wait_in_queue(const lock_t *wait_lock,
2147                                                const trx_t *blocking_trx);
2148 
2149 /** Given a lock, which was found in waiting queue, checks if it still has to
2150 wait in queue, and either grants it, or makes sure that the reason it has to
2151 wait is reflected in the wait-for graph.
2152 @param[in]  lock  A lock in WAITING state, which perhaps can be granted now */
lock_grant_or_update_wait_for_edge(lock_t * lock)2153 static void lock_grant_or_update_wait_for_edge(lock_t *lock) {
2154   ut_ad(lock->is_waiting());
2155   const lock_t *blocking_lock = lock_has_to_wait_in_queue(lock, nullptr);
2156   if (blocking_lock == nullptr) {
2157     /* Grant the lock */
2158     lock_grant(lock);
2159   } else {
2160     ut_ad(lock->trx != blocking_lock->trx);
2161     lock_update_wait_for_edge(lock, blocking_lock);
2162   }
2163 }
2164 
2165 /** Given a lock, and a transaction which is releasing another lock from the
2166 same queue, makes sure that if the lock was waiting for this transaction, then
2167 it will either be granted, or another reason for waiting is reflected in the
2168 wait-for graph. */
lock_grant_or_update_wait_for_edge_if_waiting(lock_t * lock,const trx_t * releasing_trx)2169 static void lock_grant_or_update_wait_for_edge_if_waiting(
2170     lock_t *lock, const trx_t *releasing_trx) {
2171   if (lock->is_waiting() && lock->trx->lock.blocking_trx == releasing_trx) {
2172     ut_ad(lock->trx != releasing_trx);
2173     lock_grant_or_update_wait_for_edge(lock);
2174   }
2175 }
2176 
2177 /** Grant lock to waiting requests that no longer conflicts.
2178 The in_lock might be modified before call to this function by clearing some flag
2179 (see for example lock_trx_release_read_locks). It also might already be removed
2180 from the hash bucket (a.k.a. waiting queue) or still reside in it. However the
2181 content of bitmap should not be changed prior to calling this function, as the
2182 bitmap will be inspected to see which heap_no at all were blocked by this
2183 in_lock, and only locks waiting for those heap_no's will be checked.
2184 @param[in,out]	in_lock		record lock object: grant all non-conflicting
2185                           locks waiting behind this lock object */
lock_rec_grant(lock_t * in_lock)2186 static void lock_rec_grant(lock_t *in_lock) {
2187   const auto page_id = in_lock->rec_lock.page_id;
2188   auto lock_hash = in_lock->hash_table();
2189 
2190   /* In some scenarios, in particular in replication appliers, it is often the
2191   case, that there are no WAITING locks, and in such situation iterating over
2192   all bits, and calling lock_rec_grant_by_heap_no() slows down the execution
2193   noticeably. (I guess that checking bits is not the costly part, but rather the
2194   allocation of vectors inside lock_rec_grant_by_heap_no). Therefore we first
2195   check if there is any lock which is waiting at all.
2196   Note: This condition could be further narrowed to check if the `lock` is
2197   waiting for the `in_lock` and/or `lock->trx` is blocked by the `in_lock->trx`,
2198   and we could optimize lock_rec_grant_by_heap_no() to allocate vectors only if
2199   there are at least two waiters to arbitrate among, but in practice the current
2200   simple heuristic is good enough. */
2201   bool found_waiter = false;
2202   for (auto lock = lock_rec_get_first_on_page_addr(lock_hash, page_id);
2203        lock != nullptr; lock = lock_rec_get_next_on_page(lock)) {
2204     if (lock->is_waiting()) {
2205       found_waiter = true;
2206       break;
2207     }
2208   }
2209   if (found_waiter) {
2210     mon_type_t grant_attempts = 0;
2211     for (ulint heap_no = 0; heap_no < lock_rec_get_n_bits(in_lock); ++heap_no) {
2212       if (lock_rec_get_nth_bit(in_lock, heap_no)) {
2213         lock_rec_grant_by_heap_no(in_lock, heap_no);
2214         ++grant_attempts;
2215       }
2216     }
2217     MONITOR_INC_VALUE(MONITOR_RECLOCK_GRANT_ATTEMPTS, grant_attempts);
2218   }
2219   MONITOR_INC(MONITOR_RECLOCK_RELEASE_ATTEMPTS);
2220 }
2221 
2222 /** Removes a record lock request, waiting or granted, from the queue and
2223 grants locks to other transactions in the queue if they now are entitled
2224 to a lock. NOTE: all record locks contained in in_lock are removed.
2225 @param[in,out]	in_lock		record lock object: all record locks which
2226                                 are contained in this lock object are removed;
2227                                 transactions waiting behind will get their
2228                                 lock requests granted, if they are now
2229                                 qualified to it */
lock_rec_dequeue_from_page(lock_t * in_lock)2230 static void lock_rec_dequeue_from_page(lock_t *in_lock) {
2231   lock_rec_discard(in_lock);
2232   lock_rec_grant(in_lock);
2233 }
2234 
2235 /** Removes a record lock request, waiting or granted, from the queue.
2236 @param[in]	in_lock		record lock object: all record locks
2237                                 which are contained in this lock object
2238                                 are removed */
lock_rec_discard(lock_t * in_lock)2239 void lock_rec_discard(lock_t *in_lock) {
2240   ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2241   const auto page_id = in_lock->rec_lock.page_id;
2242   ut_ad(locksys::owns_page_shard(page_id));
2243 
2244   ut_ad(in_lock->index->table->n_rec_locks.load() > 0);
2245   in_lock->index->table->n_rec_locks.fetch_sub(1, std::memory_order_relaxed);
2246 
2247   /* We want the state of lock queue and trx_locks list to be synchronized
2248   atomically from the point of view of people using trx->mutex, so we perform
2249   HASH_DELETE and UT_LIST_REMOVE while holding trx->mutex. */
2250 
2251   ut_ad(trx_mutex_own(in_lock->trx));
2252 
2253   locksys::remove_from_trx_locks(in_lock);
2254 
2255   HASH_DELETE(lock_t, hash, lock_hash_get(in_lock->type_mode),
2256               lock_rec_fold(page_id), in_lock);
2257 
2258   MONITOR_INC(MONITOR_RECLOCK_REMOVED);
2259   MONITOR_DEC(MONITOR_NUM_RECLOCK);
2260 }
2261 
2262 /** Removes record lock objects set on an index page which is discarded. This
2263  function does not move locks, or check for waiting locks, therefore the
2264  lock bitmaps must already be reset when this function is called. */
lock_rec_free_all_from_discard_page_low(page_id_t page_id,hash_table_t * lock_hash)2265 static void lock_rec_free_all_from_discard_page_low(page_id_t page_id,
2266                                                     hash_table_t *lock_hash) {
2267   lock_t *lock;
2268   lock_t *next_lock;
2269 
2270   lock = lock_rec_get_first_on_page_addr(lock_hash, page_id);
2271 
2272   while (lock != nullptr) {
2273     ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
2274     ut_ad(!lock_get_wait(lock));
2275 
2276     next_lock = lock_rec_get_next_on_page(lock);
2277 
2278     trx_t *trx = lock->trx;
2279     trx_mutex_enter(trx);
2280     lock_rec_discard(lock);
2281     trx_mutex_exit(trx);
2282 
2283     lock = next_lock;
2284   }
2285 }
2286 
2287 /** Removes record lock objects set on an index page which is discarded. This
2288  function does not move locks, or check for waiting locks, therefore the
2289  lock bitmaps must already be reset when this function is called. */
lock_rec_free_all_from_discard_page(const buf_block_t * block)2290 void lock_rec_free_all_from_discard_page(
2291     const buf_block_t *block) /*!< in: page to be discarded */
2292 {
2293   const auto page_id = block->get_page_id();
2294   ut_ad(locksys::owns_page_shard(page_id));
2295 
2296   lock_rec_free_all_from_discard_page_low(page_id, lock_sys->rec_hash);
2297   lock_rec_free_all_from_discard_page_low(page_id, lock_sys->prdt_hash);
2298   lock_rec_free_all_from_discard_page_low(page_id, lock_sys->prdt_page_hash);
2299 }
2300 
2301 /*============= RECORD LOCK MOVING AND INHERITING ===================*/
2302 
2303 /** Resets the lock bits for a single record. Releases transactions waiting for
2304  lock requests here. */
lock_rec_reset_and_release_wait_low(hash_table_t * hash,const buf_block_t * block,ulint heap_no)2305 static void lock_rec_reset_and_release_wait_low(
2306     hash_table_t *hash,       /*!< in: hash table */
2307     const buf_block_t *block, /*!< in: buffer block containing
2308                               the record */
2309     ulint heap_no)            /*!< in: heap number of record */
2310 {
2311   lock_t *lock;
2312 
2313   ut_ad(locksys::owns_page_shard(block->get_page_id()));
2314 
2315   for (lock = lock_rec_get_first(hash, block, heap_no); lock != nullptr;
2316        lock = lock_rec_get_next(heap_no, lock)) {
2317     if (lock_get_wait(lock)) {
2318       lock_rec_cancel(lock);
2319     } else {
2320       lock_rec_reset_nth_bit(lock, heap_no);
2321     }
2322   }
2323 }
2324 
2325 /** Resets the lock bits for a single record. Releases transactions waiting for
2326  lock requests here. */
lock_rec_reset_and_release_wait(const buf_block_t * block,ulint heap_no)2327 static void lock_rec_reset_and_release_wait(
2328     const buf_block_t *block, /*!< in: buffer block containing
2329                               the record */
2330     ulint heap_no)            /*!< in: heap number of record */
2331 {
2332   lock_rec_reset_and_release_wait_low(lock_sys->rec_hash, block, heap_no);
2333 
2334   lock_rec_reset_and_release_wait_low(lock_sys->prdt_hash, block,
2335                                       PAGE_HEAP_NO_INFIMUM);
2336   lock_rec_reset_and_release_wait_low(lock_sys->prdt_page_hash, block,
2337                                       PAGE_HEAP_NO_INFIMUM);
2338 }
2339 
lock_on_statement_end(trx_t * trx)2340 void lock_on_statement_end(trx_t *trx) { trx->lock.inherit_all.store(false); }
2341 
2342 /* Used to store information that `thr` requested a lock asking for protection
2343 at least till the end of the current statement which requires it to be inherited
2344 as gap locks even in READ COMMITTED isolation level.
2345 @param[in]  thr     the requesting thread */
2346 UNIV_INLINE
lock_protect_locks_till_statement_end(que_thr_t * thr)2347 void lock_protect_locks_till_statement_end(que_thr_t *thr) {
2348   thr_get_trx(thr)->lock.inherit_all.store(true);
2349 }
2350 
2351 /** Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
2352  of another record as gap type locks, but does not reset the lock bits of
2353  the other record. Also waiting lock requests on rec are inherited as
2354  GRANTED gap locks. */
lock_rec_inherit_to_gap(const buf_block_t * heir_block,const buf_block_t * block,ulint heir_heap_no,ulint heap_no)2355 static void lock_rec_inherit_to_gap(
2356     const buf_block_t *heir_block, /*!< in: block containing the
2357                                    record which inherits */
2358     const buf_block_t *block,      /*!< in: block containing the
2359                                    record from which inherited;
2360                                    does NOT reset the locks on
2361                                    this record */
2362     ulint heir_heap_no,            /*!< in: heap_no of the
2363                                    inheriting record */
2364     ulint heap_no)                 /*!< in: heap_no of the
2365                                    donating record */
2366 {
2367   lock_t *lock;
2368 
2369   ut_ad(locksys::owns_page_shard(heir_block->get_page_id()));
2370   ut_ad(locksys::owns_page_shard(block->get_page_id()));
2371 
2372   /* If session is using READ COMMITTED or READ UNCOMMITTED isolation
2373   level, we do not want locks set by an UPDATE or a DELETE to be
2374   inherited as gap type locks.  But we DO want S-locks/X-locks(taken for
2375   replace) set by a consistency constraint to be inherited also then. */
2376 
2377   /* We also dont inherit these locks as gap type locks for DD tables
2378   because the serialization is guaranteed by MDL on DD tables. */
2379 
2380   /* Constraint checks place LOCK_S or (in case of INSERT ... ON DUPLICATE
2381   UPDATE... or REPLACE INTO..) LOCK_X on records.
2382   If such a record is delete-marked, it may then become purged, and
2383   lock_rec_inheirt_to_gap will be called to decide the fate of each lock on it:
2384   either it will be inherited as gap lock, or discarded.
2385   In READ COMMITTED and less restricitve isolation levels we generaly avoid gap
2386   locks, but we make an exception for precisely this situation: we want to
2387   inherit locks created for constraint checks.
2388   More precisely we need to keep inheriting them only for the duration of the
2389   query which has requested them, as such inserts have two phases : first they
2390   check for constraints, then they do actuall row insert, and they trust that
2391   the locks set in the first phase will survive till the second phase.
2392   It is not easy to tell if a particular lock was created for constraint check
2393   or not, because we do not store this bit of information on it.
2394   What we do, is we use a heuristic: whenever a trx requests a lock with
2395   lock_duration_t::AT_LEAST_STATEMENT we set trx->lock.inherit_all, meaning that
2396   locks of this trx need to be inherited.
2397   And we clear trx->lock.inherit_all on statement end. */
2398 
2399   for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
2400        lock != nullptr; lock = lock_rec_get_next(heap_no, lock)) {
2401     /* Skip inheriting lock if set */
2402     if (lock->trx->skip_lock_inheritance) {
2403       continue;
2404     }
2405 
2406     if (!lock_rec_get_insert_intention(lock) &&
2407         !lock->index->table->skip_gap_locks() &&
2408         (!lock->trx->skip_gap_locks() || lock->trx->lock.inherit_all.load())) {
2409       lock_rec_add_to_queue(LOCK_REC | LOCK_GAP | lock_get_mode(lock),
2410                             heir_block, heir_heap_no, lock->index, lock->trx);
2411     }
2412   }
2413 }
2414 
2415 /** Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
2416  of another record as gap type locks, but does not reset the lock bits of the
2417  other record. Also waiting lock requests are inherited as GRANTED gap locks. */
lock_rec_inherit_to_gap_if_gap_lock(const buf_block_t * block,ulint heir_heap_no,ulint heap_no)2418 static void lock_rec_inherit_to_gap_if_gap_lock(
2419     const buf_block_t *block, /*!< in: buffer block */
2420     ulint heir_heap_no,       /*!< in: heap_no of
2421                               record which inherits */
2422     ulint heap_no)            /*!< in: heap_no of record
2423                               from which inherited;
2424                               does NOT reset the locks
2425                               on this record */
2426 {
2427   lock_t *lock;
2428 
2429   locksys::Shard_latch_guard guard{block->get_page_id()};
2430 
2431   for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
2432        lock != nullptr; lock = lock_rec_get_next(heap_no, lock)) {
2433     /* Skip inheriting lock if set */
2434     if (lock->trx->skip_lock_inheritance) {
2435       continue;
2436     }
2437 
2438     if (!lock_rec_get_insert_intention(lock) &&
2439         (heap_no == PAGE_HEAP_NO_SUPREMUM || !lock_rec_get_rec_not_gap(lock))) {
2440       lock_rec_add_to_queue(LOCK_REC | LOCK_GAP | lock_get_mode(lock), block,
2441                             heir_heap_no, lock->index, lock->trx);
2442     }
2443   }
2444 }
2445 
2446 /** Moves the locks of a record to another record and resets the lock bits of
2447  the donating record. */
lock_rec_move_low(hash_table_t * lock_hash,const buf_block_t * receiver,const buf_block_t * donator,ulint receiver_heap_no,ulint donator_heap_no)2448 static void lock_rec_move_low(
2449     hash_table_t *lock_hash,     /*!< in: hash table to use */
2450     const buf_block_t *receiver, /*!< in: buffer block containing
2451                                  the receiving record */
2452     const buf_block_t *donator,  /*!< in: buffer block containing
2453                                  the donating record */
2454     ulint receiver_heap_no,      /*!< in: heap_no of the record
2455                                 which gets the locks; there
2456                                 must be no lock requests
2457                                 on it! */
2458     ulint donator_heap_no)       /*!< in: heap_no of the record
2459                                  which gives the locks */
2460 {
2461   lock_t *lock;
2462 
2463   ut_ad(locksys::owns_page_shard(receiver->get_page_id()));
2464   ut_ad(locksys::owns_page_shard(donator->get_page_id()));
2465 
2466   /* If the lock is predicate lock, it resides on INFIMUM record */
2467   ut_ad(lock_rec_get_first(lock_hash, receiver, receiver_heap_no) == nullptr ||
2468         lock_hash == lock_sys->prdt_hash ||
2469         lock_hash == lock_sys->prdt_page_hash);
2470 
2471   for (lock = lock_rec_get_first(lock_hash, donator, donator_heap_no);
2472        lock != nullptr; lock = lock_rec_get_next(donator_heap_no, lock)) {
2473     const ulint type_mode = lock->type_mode;
2474 
2475     lock_rec_reset_nth_bit(lock, donator_heap_no);
2476 
2477     if (type_mode & LOCK_WAIT) {
2478       lock_reset_lock_and_trx_wait(lock);
2479     }
2480 
2481     /* Note that we FIRST reset the bit, and then set the lock:
2482     the function works also if donator == receiver */
2483 
2484     lock_rec_add_to_queue(type_mode, receiver, receiver_heap_no, lock->index,
2485                           lock->trx);
2486   }
2487 
2488   ut_ad(lock_rec_get_first(lock_sys->rec_hash, donator, donator_heap_no) ==
2489         nullptr);
2490 }
2491 
2492 /** Move all the granted locks to the front of the given lock list.
2493 All the waiting locks will be at the end of the list.
2494 @param[in,out]	lock_list	the given lock list.  */
lock_move_granted_locks_to_front(UT_LIST_BASE_NODE_T (lock_t)& lock_list)2495 static void lock_move_granted_locks_to_front(UT_LIST_BASE_NODE_T(lock_t) &
2496                                              lock_list) {
2497   lock_t *lock;
2498 
2499   bool seen_waiting_lock = false;
2500 
2501   for (lock = UT_LIST_GET_FIRST(lock_list); lock != nullptr;
2502        lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
2503     if (!seen_waiting_lock) {
2504       if (lock->is_waiting()) {
2505         seen_waiting_lock = true;
2506       }
2507       continue;
2508     }
2509 
2510     ut_ad(seen_waiting_lock);
2511 
2512     if (!lock->is_waiting()) {
2513       lock_t *prev = UT_LIST_GET_PREV(trx_locks, lock);
2514       ut_a(prev);
2515       UT_LIST_MOVE_TO_FRONT(lock_list, lock);
2516       lock = prev;
2517     }
2518   }
2519 }
2520 
2521 /** Moves the locks of a record to another record and resets the lock bits of
2522  the donating record. */
2523 UNIV_INLINE
lock_rec_move(const buf_block_t * receiver,const buf_block_t * donator,ulint receiver_heap_no,ulint donator_heap_no)2524 void lock_rec_move(const buf_block_t *receiver, /*!< in: buffer block containing
2525                                                 the receiving record */
2526                    const buf_block_t *donator,  /*!< in: buffer block containing
2527                                                 the donating record */
2528                    ulint receiver_heap_no,      /*!< in: heap_no of the record
2529                                        which gets the locks; there
2530                                                must be no lock requests
2531                                                on it! */
2532                    ulint donator_heap_no)       /*!< in: heap_no of the record
2533                                                 which gives the locks */
2534 {
2535   lock_rec_move_low(lock_sys->rec_hash, receiver, donator, receiver_heap_no,
2536                     donator_heap_no);
2537 }
2538 
2539 /** Updates the lock table when we have reorganized a page. NOTE: we copy
2540  also the locks set on the infimum of the page; the infimum may carry
2541  locks if an update of a record is occurring on the page, and its locks
2542  were temporarily stored on the infimum. */
lock_move_reorganize_page(const buf_block_t * block,const buf_block_t * oblock)2543 void lock_move_reorganize_page(
2544     const buf_block_t *block,  /*!< in: old index page, now
2545                                reorganized */
2546     const buf_block_t *oblock) /*!< in: copy of the old, not
2547                                reorganized page */
2548 {
2549   lock_t *lock;
2550   UT_LIST_BASE_NODE_T(lock_t) old_locks;
2551   mem_heap_t *heap = nullptr;
2552   ulint comp;
2553   {
2554     /* We only process locks on block, not oblock */
2555     locksys::Shard_latch_guard guard{block->get_page_id()};
2556 
2557     /* FIXME: This needs to deal with predicate lock too */
2558     lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
2559 
2560     if (lock == nullptr) {
2561       return;
2562     }
2563 
2564     heap = mem_heap_create(256);
2565 
2566     /* Copy first all the locks on the page to heap and reset the
2567     bitmaps in the original locks; chain the copies of the locks
2568     using the trx_locks field in them. */
2569 
2570     UT_LIST_INIT(old_locks, &lock_t::trx_locks);
2571 
2572     do {
2573       /* Make a copy of the lock */
2574       lock_t *old_lock = lock_rec_copy(lock, heap);
2575 
2576       UT_LIST_ADD_LAST(old_locks, old_lock);
2577 
2578       /* Reset bitmap of lock */
2579       lock_rec_bitmap_reset(lock);
2580 
2581       if (lock_get_wait(lock)) {
2582         lock_reset_lock_and_trx_wait(lock);
2583       }
2584 
2585       lock = lock_rec_get_next_on_page(lock);
2586     } while (lock != nullptr);
2587 
2588     comp = page_is_comp(block->frame);
2589     ut_ad(comp == page_is_comp(oblock->frame));
2590 
2591     lock_move_granted_locks_to_front(old_locks);
2592 
2593     DBUG_EXECUTE_IF("do_lock_reverse_page_reorganize",
2594                     UT_LIST_REVERSE(old_locks););
2595 
2596     for (lock = UT_LIST_GET_FIRST(old_locks); lock != nullptr;
2597          lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
2598       /* NOTE: we copy also the locks set on the infimum and
2599       supremum of the page; the infimum may carry locks if an
2600       update of a record is occurring on the page, and its locks
2601       were temporarily stored on the infimum */
2602       const rec_t *rec1 = page_get_infimum_rec(buf_block_get_frame(block));
2603       const rec_t *rec2 = page_get_infimum_rec(buf_block_get_frame(oblock));
2604 
2605       /* Set locks according to old locks */
2606       for (;;) {
2607         ulint old_heap_no;
2608         ulint new_heap_no;
2609 
2610         if (comp) {
2611           old_heap_no = rec_get_heap_no_new(rec2);
2612           new_heap_no = rec_get_heap_no_new(rec1);
2613 
2614           rec1 = page_rec_get_next_low(rec1, true);
2615           rec2 = page_rec_get_next_low(rec2, true);
2616         } else {
2617           old_heap_no = rec_get_heap_no_old(rec2);
2618           new_heap_no = rec_get_heap_no_old(rec1);
2619           ut_ad(!memcmp(rec1, rec2, rec_get_data_size_old(rec2)));
2620 
2621           rec1 = page_rec_get_next_low(rec1, false);
2622           rec2 = page_rec_get_next_low(rec2, false);
2623         }
2624 
2625         /* Clear the bit in old_lock. */
2626         if (old_heap_no < lock->rec_lock.n_bits &&
2627             lock_rec_reset_nth_bit(lock, old_heap_no)) {
2628           /* NOTE that the old lock bitmap could be too
2629           small for the new heap number! */
2630 
2631           lock_rec_add_to_queue(lock->type_mode, block, new_heap_no,
2632                                 lock->index, lock->trx);
2633         }
2634 
2635         if (new_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2636           ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
2637           break;
2638         }
2639       }
2640 
2641       ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
2642     }
2643   } /* Shard_latch_guard */
2644 
2645   mem_heap_free(heap);
2646 
2647 #ifdef UNIV_DEBUG_LOCK_VALIDATE
2648   ut_ad(lock_rec_validate_page(block));
2649 #endif /* UNIV_DEBUG_LOCK_VALIDATE */
2650 }
2651 
2652 /** Moves the explicit locks on user records to another page if a record
2653  list end is moved to another page. */
lock_move_rec_list_end(const buf_block_t * new_block,const buf_block_t * block,const rec_t * rec)2654 void lock_move_rec_list_end(
2655     const buf_block_t *new_block, /*!< in: index page to move to */
2656     const buf_block_t *block,     /*!< in: index page */
2657     const rec_t *rec)             /*!< in: record on page: this
2658                                   is the first record moved */
2659 {
2660   lock_t *lock;
2661   const ulint comp = page_rec_is_comp(rec);
2662 
2663   ut_ad(buf_block_get_frame(block) == page_align(rec));
2664   ut_ad(comp == page_is_comp(buf_block_get_frame(new_block)));
2665 
2666   {
2667     locksys::Shard_latches_guard guard{*block, *new_block};
2668 
2669     for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
2670          lock = lock_rec_get_next_on_page(lock)) {
2671       const rec_t *rec1 = rec;
2672       const rec_t *rec2;
2673       const ulint type_mode = lock->type_mode;
2674 
2675       if (comp) {
2676         if (page_offset(rec1) == PAGE_NEW_INFIMUM) {
2677           rec1 = page_rec_get_next_low(rec1, true);
2678         }
2679 
2680         rec2 = page_rec_get_next_low(
2681             buf_block_get_frame(new_block) + PAGE_NEW_INFIMUM, true);
2682       } else {
2683         if (page_offset(rec1) == PAGE_OLD_INFIMUM) {
2684           rec1 = page_rec_get_next_low(rec1, false);
2685         }
2686 
2687         rec2 = page_rec_get_next_low(
2688             buf_block_get_frame(new_block) + PAGE_OLD_INFIMUM, false);
2689       }
2690 
2691       /* Copy lock requests on user records to new page and
2692       reset the lock bits on the old */
2693 
2694       for (;;) {
2695         ulint rec1_heap_no;
2696         ulint rec2_heap_no;
2697 
2698         if (comp) {
2699           rec1_heap_no = rec_get_heap_no_new(rec1);
2700 
2701           if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2702             break;
2703           }
2704 
2705           rec2_heap_no = rec_get_heap_no_new(rec2);
2706           rec1 = page_rec_get_next_low(rec1, true);
2707           rec2 = page_rec_get_next_low(rec2, true);
2708         } else {
2709           rec1_heap_no = rec_get_heap_no_old(rec1);
2710 
2711           if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2712             break;
2713           }
2714 
2715           rec2_heap_no = rec_get_heap_no_old(rec2);
2716 
2717           ut_ad(!memcmp(rec1, rec2, rec_get_data_size_old(rec2)));
2718 
2719           rec1 = page_rec_get_next_low(rec1, false);
2720           rec2 = page_rec_get_next_low(rec2, false);
2721         }
2722 
2723         if (rec1_heap_no < lock->rec_lock.n_bits &&
2724             lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
2725           if (type_mode & LOCK_WAIT) {
2726             lock_reset_lock_and_trx_wait(lock);
2727           }
2728 
2729           lock_rec_add_to_queue(type_mode, new_block, rec2_heap_no, lock->index,
2730                                 lock->trx);
2731         }
2732       }
2733     }
2734   } /* Shard_latches_guard */
2735 
2736 #ifdef UNIV_DEBUG_LOCK_VALIDATE
2737   ut_ad(lock_rec_validate_page(block));
2738   ut_ad(lock_rec_validate_page(new_block));
2739 #endif /* UNIV_DEBUG_LOCK_VALIDATE */
2740 }
2741 
2742 /** Moves the explicit locks on user records to another page if a record
2743  list start is moved to another page. */
lock_move_rec_list_start(const buf_block_t * new_block,const buf_block_t * block,const rec_t * rec,const rec_t * old_end)2744 void lock_move_rec_list_start(const buf_block_t *new_block, /*!< in: index page
2745                                                             to move to */
2746                               const buf_block_t *block, /*!< in: index page */
2747                               const rec_t *rec,         /*!< in: record on page:
2748                                                         this is the first
2749                                                         record NOT copied */
2750                               const rec_t *old_end)     /*!< in: old
2751                                                         previous-to-last
2752                                                         record on new_page
2753                                                         before the records
2754                                                         were copied */
2755 {
2756   lock_t *lock;
2757   const ulint comp = page_rec_is_comp(rec);
2758 
2759   ut_ad(block->frame == page_align(rec));
2760   ut_ad(new_block->frame == page_align(old_end));
2761   ut_ad(comp == page_rec_is_comp(old_end));
2762 
2763   {
2764     locksys::Shard_latches_guard guard{*block, *new_block};
2765 
2766     for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
2767          lock = lock_rec_get_next_on_page(lock)) {
2768       const rec_t *rec1;
2769       const rec_t *rec2;
2770       const ulint type_mode = lock->type_mode;
2771 
2772       if (comp) {
2773         rec1 = page_rec_get_next_low(
2774             buf_block_get_frame(block) + PAGE_NEW_INFIMUM, true);
2775         rec2 = page_rec_get_next_low(old_end, true);
2776       } else {
2777         rec1 = page_rec_get_next_low(
2778             buf_block_get_frame(block) + PAGE_OLD_INFIMUM, false);
2779         rec2 = page_rec_get_next_low(old_end, false);
2780       }
2781 
2782       /* Copy lock requests on user records to new page and
2783       reset the lock bits on the old */
2784 
2785       while (rec1 != rec) {
2786         ulint rec1_heap_no;
2787         ulint rec2_heap_no;
2788 
2789         if (comp) {
2790           rec1_heap_no = rec_get_heap_no_new(rec1);
2791           rec2_heap_no = rec_get_heap_no_new(rec2);
2792 
2793           rec1 = page_rec_get_next_low(rec1, true);
2794           rec2 = page_rec_get_next_low(rec2, true);
2795         } else {
2796           rec1_heap_no = rec_get_heap_no_old(rec1);
2797           rec2_heap_no = rec_get_heap_no_old(rec2);
2798 
2799           ut_ad(!memcmp(rec1, rec2, rec_get_data_size_old(rec2)));
2800 
2801           rec1 = page_rec_get_next_low(rec1, false);
2802           rec2 = page_rec_get_next_low(rec2, false);
2803         }
2804 
2805         if (rec1_heap_no < lock->rec_lock.n_bits &&
2806             lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
2807           if (type_mode & LOCK_WAIT) {
2808             lock_reset_lock_and_trx_wait(lock);
2809           }
2810 
2811           lock_rec_add_to_queue(type_mode, new_block, rec2_heap_no, lock->index,
2812                                 lock->trx);
2813         }
2814       }
2815 
2816 #ifdef UNIV_DEBUG
2817       if (page_rec_is_supremum(rec)) {
2818         ulint i;
2819 
2820         for (i = PAGE_HEAP_NO_USER_LOW; i < lock_rec_get_n_bits(lock); i++) {
2821           ut_a(!lock_rec_get_nth_bit(lock, i));
2822         }
2823       }
2824 #endif /* UNIV_DEBUG */
2825     }
2826   } /* Shard_latches_guard */
2827 
2828 #ifdef UNIV_DEBUG_LOCK_VALIDATE
2829   ut_ad(lock_rec_validate_page(block));
2830 #endif /* UNIV_DEBUG_LOCK_VALIDATE */
2831 }
2832 
2833 /** Moves the explicit locks on user records to another page if a record
2834  list start is moved to another page. */
lock_rtr_move_rec_list(const buf_block_t * new_block,const buf_block_t * block,rtr_rec_move_t * rec_move,ulint num_move)2835 void lock_rtr_move_rec_list(const buf_block_t *new_block, /*!< in: index page to
2836                                                           move to */
2837                             const buf_block_t *block,     /*!< in: index page */
2838                             rtr_rec_move_t *rec_move, /*!< in: recording records
2839                                                       moved */
2840                             ulint num_move) /*!< in: num of rec to move */
2841 {
2842   lock_t *lock;
2843   ulint comp;
2844 
2845   if (!num_move) {
2846     return;
2847   }
2848 
2849   comp = page_rec_is_comp(rec_move[0].old_rec);
2850 
2851   ut_ad(block->frame == page_align(rec_move[0].old_rec));
2852   ut_ad(new_block->frame == page_align(rec_move[0].new_rec));
2853   ut_ad(comp == page_rec_is_comp(rec_move[0].new_rec));
2854 
2855   {
2856     locksys::Shard_latches_guard guard{*new_block, *block};
2857 
2858     for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
2859          lock = lock_rec_get_next_on_page(lock)) {
2860       ulint moved = 0;
2861       const rec_t *rec1;
2862       const rec_t *rec2;
2863       const ulint type_mode = lock->type_mode;
2864 
2865       /* Copy lock requests on user records to new page and
2866       reset the lock bits on the old */
2867 
2868       while (moved < num_move) {
2869         ulint rec1_heap_no;
2870         ulint rec2_heap_no;
2871 
2872         rec1 = rec_move[moved].old_rec;
2873         rec2 = rec_move[moved].new_rec;
2874 
2875         if (comp) {
2876           rec1_heap_no = rec_get_heap_no_new(rec1);
2877           rec2_heap_no = rec_get_heap_no_new(rec2);
2878 
2879         } else {
2880           rec1_heap_no = rec_get_heap_no_old(rec1);
2881           rec2_heap_no = rec_get_heap_no_old(rec2);
2882 
2883           ut_ad(!memcmp(rec1, rec2, rec_get_data_size_old(rec2)));
2884         }
2885 
2886         if (rec1_heap_no < lock->rec_lock.n_bits &&
2887             lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
2888           if (type_mode & LOCK_WAIT) {
2889             lock_reset_lock_and_trx_wait(lock);
2890           }
2891 
2892           lock_rec_add_to_queue(type_mode, new_block, rec2_heap_no, lock->index,
2893                                 lock->trx);
2894 
2895           rec_move[moved].moved = true;
2896         }
2897 
2898         moved++;
2899       }
2900     }
2901   } /* Shard_latches_guard */
2902 
2903 #ifdef UNIV_DEBUG_LOCK_VALIDATE
2904   ut_ad(lock_rec_validate_page(block));
2905 #endif
2906 }
2907 /** Updates the lock table when a page is split to the right. */
lock_update_split_right(const buf_block_t * right_block,const buf_block_t * left_block)2908 void lock_update_split_right(
2909     const buf_block_t *right_block, /*!< in: right page */
2910     const buf_block_t *left_block)  /*!< in: left page */
2911 {
2912   ulint heap_no = lock_get_min_heap_no(right_block);
2913 
2914   locksys::Shard_latches_guard guard{*left_block, *right_block};
2915 
2916   /* Move the locks on the supremum of the left page to the supremum
2917   of the right page */
2918 
2919   lock_rec_move(right_block, left_block, PAGE_HEAP_NO_SUPREMUM,
2920                 PAGE_HEAP_NO_SUPREMUM);
2921 
2922   /* Inherit the locks to the supremum of left page from the successor
2923   of the infimum on right page */
2924 
2925   lock_rec_inherit_to_gap(left_block, right_block, PAGE_HEAP_NO_SUPREMUM,
2926                           heap_no);
2927 }
2928 
2929 /** Updates the lock table when a page is merged to the right. */
lock_update_merge_right(const buf_block_t * right_block,const rec_t * orig_succ,const buf_block_t * left_block)2930 void lock_update_merge_right(
2931     const buf_block_t *right_block, /*!< in: right page
2932                                     to which merged */
2933     const rec_t *orig_succ,         /*!< in: original
2934                                     successor of infimum
2935                                     on the right page
2936                                     before merge */
2937     const buf_block_t *left_block)  /*!< in: merged
2938                                     index  page which
2939                                     will be  discarded */
2940 {
2941   locksys::Shard_latches_guard guard{*left_block, *right_block};
2942 
2943   /* Inherit the locks from the supremum of the left page to the original
2944   successor of infimum on the right page, to which the left page was merged. */
2945 
2946   lock_rec_inherit_to_gap(right_block, left_block,
2947                           page_rec_get_heap_no(orig_succ),
2948                           PAGE_HEAP_NO_SUPREMUM);
2949 
2950   /* Reset the locks on the supremum of the left page, releasing waiting
2951   transactions. */
2952 
2953   lock_rec_reset_and_release_wait_low(lock_sys->rec_hash, left_block,
2954                                       PAGE_HEAP_NO_SUPREMUM);
2955 
2956   /* There should exist no page lock on the left page, otherwise, it will be
2957   blocked from merge. */
2958   ut_ad(lock_rec_get_first_on_page_addr(lock_sys->prdt_page_hash,
2959                                         left_block->get_page_id()) == nullptr);
2960 
2961   lock_rec_free_all_from_discard_page(left_block);
2962 }
2963 
2964 /** Updates the lock table when the root page is copied to another in
2965  btr_root_raise_and_insert. Note that we leave lock structs on the
2966  root page, even though they do not make sense on other than leaf
2967  pages: the reason is that in a pessimistic update the infimum record
2968  of the root page will act as a dummy carrier of the locks of the record
2969  to be updated. */
lock_update_root_raise(const buf_block_t * block,const buf_block_t * root)2970 void lock_update_root_raise(
2971     const buf_block_t *block, /*!< in: index page to which copied */
2972     const buf_block_t *root)  /*!< in: root page */
2973 {
2974   locksys::Shard_latches_guard guard{*block, *root};
2975 
2976   /* Move the locks on the supremum of the root to the supremum
2977   of block */
2978 
2979   lock_rec_move(block, root, PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
2980 }
2981 
2982 /** Updates the lock table when a page is copied to another and the original
2983  page is removed from the chain of leaf pages, except if page is the root! */
lock_update_copy_and_discard(const buf_block_t * new_block,const buf_block_t * block)2984 void lock_update_copy_and_discard(
2985     const buf_block_t *new_block, /*!< in: index page to
2986                                   which copied */
2987     const buf_block_t *block)     /*!< in: index page;
2988                                   NOT the root! */
2989 {
2990   locksys::Shard_latches_guard guard{*new_block, *block};
2991 
2992   /* Move the locks on the supremum of the old page to the supremum
2993   of new_page */
2994 
2995   lock_rec_move(new_block, block, PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
2996   lock_rec_free_all_from_discard_page(block);
2997 }
2998 
2999 /** Updates the lock table when a page is split to the left. */
lock_update_split_left(const buf_block_t * right_block,const buf_block_t * left_block)3000 void lock_update_split_left(
3001     const buf_block_t *right_block, /*!< in: right page */
3002     const buf_block_t *left_block)  /*!< in: left page */
3003 {
3004   ulint heap_no = lock_get_min_heap_no(right_block);
3005 
3006   locksys::Shard_latches_guard guard{*left_block, *right_block};
3007 
3008   /* Inherit the locks to the supremum of the left page from the
3009   successor of the infimum on the right page */
3010 
3011   lock_rec_inherit_to_gap(left_block, right_block, PAGE_HEAP_NO_SUPREMUM,
3012                           heap_no);
3013 }
3014 
3015 /** Updates the lock table when a page is merged to the left. */
lock_update_merge_left(const buf_block_t * left_block,const rec_t * orig_pred,const buf_block_t * right_block)3016 void lock_update_merge_left(
3017     const buf_block_t *left_block,  /*!< in: left page to
3018                                     which merged */
3019     const rec_t *orig_pred,         /*!< in: original predecessor
3020                                     of supremum on the left page
3021                                     before merge */
3022     const buf_block_t *right_block) /*!< in: merged index page
3023                                     which will be discarded */
3024 {
3025   const rec_t *left_next_rec;
3026 
3027   ut_ad(left_block->frame == page_align(orig_pred));
3028 
3029   locksys::Shard_latches_guard guard{*left_block, *right_block};
3030 
3031   left_next_rec = page_rec_get_next_const(orig_pred);
3032 
3033   if (!page_rec_is_supremum(left_next_rec)) {
3034     /* Inherit the locks on the supremum of the left page to the
3035     first record which was moved from the right page */
3036 
3037     lock_rec_inherit_to_gap(left_block, left_block,
3038                             page_rec_get_heap_no(left_next_rec),
3039                             PAGE_HEAP_NO_SUPREMUM);
3040 
3041     /* Reset the locks on the supremum of the left page,
3042     releasing waiting transactions */
3043 
3044     lock_rec_reset_and_release_wait_low(lock_sys->rec_hash, left_block,
3045                                         PAGE_HEAP_NO_SUPREMUM);
3046   }
3047 
3048   /* Move the locks from the supremum of right page to the supremum
3049   of the left page */
3050 
3051   lock_rec_move(left_block, right_block, PAGE_HEAP_NO_SUPREMUM,
3052                 PAGE_HEAP_NO_SUPREMUM);
3053 
3054   /* there should exist no page lock on the right page,
3055   otherwise, it will be blocked from merge */
3056   ut_ad(lock_rec_get_first_on_page_addr(lock_sys->prdt_page_hash,
3057                                         right_block->get_page_id()) == nullptr);
3058 
3059   lock_rec_free_all_from_discard_page(right_block);
3060 }
3061 
3062 /** Resets the original locks on heir and replaces them with gap type locks
3063  inherited from rec. */
lock_rec_reset_and_inherit_gap_locks(const buf_block_t * heir_block,const buf_block_t * block,ulint heir_heap_no,ulint heap_no)3064 void lock_rec_reset_and_inherit_gap_locks(
3065     const buf_block_t *heir_block, /*!< in: block containing the
3066                                    record which inherits */
3067     const buf_block_t *block,      /*!< in: block containing the
3068                                    record from which inherited;
3069                                    does NOT reset the locks on
3070                                    this record */
3071     ulint heir_heap_no,            /*!< in: heap_no of the
3072                                    inheriting record */
3073     ulint heap_no)                 /*!< in: heap_no of the
3074                                    donating record */
3075 {
3076   locksys::Shard_latches_guard guard{*heir_block, *block};
3077 
3078   lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
3079 
3080   lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
3081 }
3082 
3083 /** Updates the lock table when a page is discarded. */
lock_update_discard(const buf_block_t * heir_block,ulint heir_heap_no,const buf_block_t * block)3084 void lock_update_discard(
3085     const buf_block_t *heir_block, /*!< in: index page
3086                                    which will inherit the locks */
3087     ulint heir_heap_no,            /*!< in: heap_no of the record
3088                                    which will inherit the locks */
3089     const buf_block_t *block)      /*!< in: index page
3090                                    which will be discarded */
3091 {
3092   const rec_t *rec;
3093   ulint heap_no;
3094   const page_t *page = block->frame;
3095 
3096   locksys::Shard_latches_guard guard{*heir_block, *block};
3097 
3098   if (!lock_rec_get_first_on_page(lock_sys->rec_hash, block) &&
3099       (!lock_rec_get_first_on_page(lock_sys->prdt_page_hash, block)) &&
3100       (!lock_rec_get_first_on_page(lock_sys->prdt_hash, block))) {
3101     /* No locks exist on page, nothing to do */
3102 
3103     return;
3104   }
3105 
3106   /* Inherit all the locks on the page to the record and reset all
3107   the locks on the page */
3108 
3109   if (page_is_comp(page)) {
3110     rec = page + PAGE_NEW_INFIMUM;
3111 
3112     do {
3113       heap_no = rec_get_heap_no_new(rec);
3114 
3115       lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
3116 
3117       lock_rec_reset_and_release_wait(block, heap_no);
3118 
3119       rec = page + rec_get_next_offs(rec, true);
3120     } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3121   } else {
3122     rec = page + PAGE_OLD_INFIMUM;
3123 
3124     do {
3125       heap_no = rec_get_heap_no_old(rec);
3126 
3127       lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
3128 
3129       lock_rec_reset_and_release_wait(block, heap_no);
3130 
3131       rec = page + rec_get_next_offs(rec, false);
3132     } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3133   }
3134 
3135   lock_rec_free_all_from_discard_page(block);
3136 }
3137 
3138 /** Updates the lock table when a new user record is inserted. */
lock_update_insert(const buf_block_t * block,const rec_t * rec)3139 void lock_update_insert(
3140     const buf_block_t *block, /*!< in: buffer block containing rec */
3141     const rec_t *rec)         /*!< in: the inserted record */
3142 {
3143   ulint receiver_heap_no;
3144   ulint donator_heap_no;
3145 
3146   ut_ad(block->frame == page_align(rec));
3147 
3148   /* Inherit the gap-locking locks for rec, in gap mode, from the next
3149   record */
3150 
3151   if (page_rec_is_comp(rec)) {
3152     receiver_heap_no = rec_get_heap_no_new(rec);
3153     donator_heap_no = rec_get_heap_no_new(page_rec_get_next_low(rec, true));
3154   } else {
3155     receiver_heap_no = rec_get_heap_no_old(rec);
3156     donator_heap_no = rec_get_heap_no_old(page_rec_get_next_low(rec, false));
3157   }
3158 
3159   lock_rec_inherit_to_gap_if_gap_lock(block, receiver_heap_no, donator_heap_no);
3160 }
3161 
3162 /** Updates the lock table when a record is removed. */
lock_update_delete(const buf_block_t * block,const rec_t * rec)3163 void lock_update_delete(
3164     const buf_block_t *block, /*!< in: buffer block containing rec */
3165     const rec_t *rec)         /*!< in: the record to be removed */
3166 {
3167   const page_t *page = block->frame;
3168   ulint heap_no;
3169   ulint next_heap_no;
3170 
3171   ut_ad(page == page_align(rec));
3172 
3173   if (page_is_comp(page)) {
3174     heap_no = rec_get_heap_no_new(rec);
3175     next_heap_no = rec_get_heap_no_new(page + rec_get_next_offs(rec, true));
3176   } else {
3177     heap_no = rec_get_heap_no_old(rec);
3178     next_heap_no = rec_get_heap_no_old(page + rec_get_next_offs(rec, false));
3179   }
3180 
3181   locksys::Shard_latch_guard guard{block->get_page_id()};
3182 
3183   /* Let the next record inherit the locks from rec, in gap mode */
3184 
3185   lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
3186 
3187   /* Reset the lock bits on rec and release waiting transactions */
3188 
3189   lock_rec_reset_and_release_wait(block, heap_no);
3190 }
3191 
3192 /** Stores on the page infimum record the explicit locks of another record.
3193  This function is used to store the lock state of a record when it is
3194  updated and the size of the record changes in the update. The record
3195  is moved in such an update, perhaps to another page. The infimum record
3196  acts as a dummy carrier record, taking care of lock releases while the
3197  actual record is being moved. */
lock_rec_store_on_page_infimum(const buf_block_t * block,const rec_t * rec)3198 void lock_rec_store_on_page_infimum(
3199     const buf_block_t *block, /*!< in: buffer block containing rec */
3200     const rec_t *rec)         /*!< in: record whose lock state
3201                               is stored on the infimum
3202                               record of the same page; lock
3203                               bits are reset on the
3204                               record */
3205 {
3206   ulint heap_no = page_rec_get_heap_no(rec);
3207 
3208   ut_ad(block->frame == page_align(rec));
3209 
3210   locksys::Shard_latch_guard guard{block->get_page_id()};
3211 
3212   lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
3213 }
3214 
3215 /** Restores the state of explicit lock requests on a single record, where the
3216  state was stored on the infimum of the page. */
lock_rec_restore_from_page_infimum(const buf_block_t * block,const rec_t * rec,const buf_block_t * donator)3217 void lock_rec_restore_from_page_infimum(
3218     const buf_block_t *block,   /*!< in: buffer block containing rec */
3219     const rec_t *rec,           /*!< in: record whose lock state
3220                                 is restored */
3221     const buf_block_t *donator) /*!< in: page (rec is not
3222                                 necessarily on this page)
3223                                 whose infimum stored the lock
3224                                 state; lock bits are reset on
3225                                 the infimum */
3226 {
3227   DEBUG_SYNC_C("lock_rec_restore_from_page_infimum_will_latch");
3228   ulint heap_no = page_rec_get_heap_no(rec);
3229 
3230   locksys::Shard_latches_guard guard{*block, *donator};
3231 
3232   lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
3233 }
3234 
3235 /*========================= TABLE LOCKS ==============================*/
3236 
3237 /** Functor for accessing the embedded node within a table lock. */
3238 struct TableLockGetNode {
operator ()TableLockGetNode3239   ut_list_node<lock_t> &operator()(lock_t &elem) {
3240     return (elem.tab_lock.locks);
3241   }
3242 };
3243 
3244 /** Creates a table lock object and adds it as the last in the lock queue
3245  of the table. Does NOT check for deadlocks or lock compatibility.
3246  @return own: new lock object */
3247 UNIV_INLINE
lock_table_create(dict_table_t * table,ulint type_mode,trx_t * trx)3248 lock_t *lock_table_create(dict_table_t *table, /*!< in/out: database table
3249                                                in dictionary cache */
3250                           ulint type_mode, /*!< in: lock mode possibly ORed with
3251                                          LOCK_WAIT */
3252                           trx_t *trx)      /*!< in: trx */
3253 {
3254   lock_t *lock;
3255 
3256   ut_ad(table && trx);
3257   ut_ad(locksys::owns_table_shard(*table));
3258   ut_ad(trx_mutex_own(trx));
3259   ut_ad(trx_can_be_handled_by_current_thread(trx));
3260 
3261   check_trx_state(trx);
3262   ++table->count_by_mode[type_mode & LOCK_MODE_MASK];
3263   /* For AUTOINC locking we reuse the lock instance only if
3264   there is no wait involved else we allocate the waiting lock
3265   from the transaction lock heap. */
3266   if (type_mode == LOCK_AUTO_INC) {
3267     lock = table->autoinc_lock;
3268     ut_ad(table->autoinc_trx == nullptr);
3269     table->autoinc_trx = trx;
3270 
3271     ib_vector_push(trx->lock.autoinc_locks, &lock);
3272 
3273   } else if (trx->lock.table_cached < trx->lock.table_pool.size()) {
3274     lock = trx->lock.table_pool[trx->lock.table_cached++];
3275   } else {
3276     lock = static_cast<lock_t *>(
3277         mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock)));
3278   }
3279   lock->type_mode = uint32_t(type_mode | LOCK_TABLE);
3280   lock->trx = trx;
3281   ut_d(lock->m_seq = lock_sys->m_seq.fetch_add(1));
3282 
3283   lock->tab_lock.table = table;
3284 
3285   ut_ad(table->n_ref_count > 0 || !table->can_be_evicted);
3286 
3287 #ifdef HAVE_PSI_THREAD_INTERFACE
3288 #ifdef HAVE_PSI_DATA_LOCK_INTERFACE
3289   /* The performance schema THREAD_ID and EVENT_ID
3290   are used only when DATA_LOCKS are exposed.  */
3291   PSI_THREAD_CALL(get_current_thread_event_id)
3292   (&lock->m_psi_internal_thread_id, &lock->m_psi_event_id);
3293 #endif /* HAVE_PSI_DATA_LOCK_INTERFACE */
3294 #endif /* HAVE_PSI_THREAD_INTERFACE */
3295 
3296   locksys::add_to_trx_locks(lock);
3297 
3298   ut_list_append(table->locks, lock, TableLockGetNode());
3299 
3300   if (type_mode & LOCK_WAIT) {
3301     lock_set_lock_and_trx_wait(lock);
3302   }
3303 
3304   lock->trx->lock.table_locks.push_back(lock);
3305 
3306   MONITOR_INC(MONITOR_TABLELOCK_CREATED);
3307   MONITOR_INC(MONITOR_NUM_TABLELOCK);
3308 
3309   return (lock);
3310 }
3311 
3312 /** Pops autoinc lock requests from the transaction's autoinc_locks. We
3313  handle the case where there are gaps in the array and they need to
3314  be popped off the stack. */
3315 UNIV_INLINE
lock_table_pop_autoinc_locks(trx_t * trx)3316 void lock_table_pop_autoinc_locks(
3317     trx_t *trx) /*!< in/out: transaction that owns the AUTOINC locks */
3318 {
3319   /* We will access and modify trx->lock.autoinc_locks so we need trx->mutex */
3320   ut_ad(trx_mutex_own(trx));
3321   ut_ad(!ib_vector_is_empty(trx->lock.autoinc_locks));
3322 
3323   /* Skip any gaps, gaps are NULL lock entries in the
3324   trx->autoinc_locks vector. */
3325 
3326   do {
3327     ib_vector_pop(trx->lock.autoinc_locks);
3328 
3329     if (ib_vector_is_empty(trx->lock.autoinc_locks)) {
3330       return;
3331     }
3332 
3333   } while (*(lock_t **)ib_vector_get_last(trx->lock.autoinc_locks) == nullptr);
3334 }
3335 
3336 /** Removes an autoinc lock request from the transaction's autoinc_locks. */
3337 UNIV_INLINE
lock_table_remove_autoinc_lock(lock_t * lock,trx_t * trx)3338 void lock_table_remove_autoinc_lock(
3339     lock_t *lock, /*!< in: table lock */
3340     trx_t *trx)   /*!< in/out: transaction that owns the lock */
3341 {
3342   /* We will access and modify trx->lock.autoinc_locks so we need trx->mutex */
3343   ut_ad(trx_mutex_own(trx));
3344   lock_t *autoinc_lock;
3345   lint i = ib_vector_size(trx->lock.autoinc_locks) - 1;
3346 
3347   ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC);
3348   ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
3349   ut_ad(locksys::owns_table_shard(*lock->tab_lock.table));
3350   ut_ad(!ib_vector_is_empty(trx->lock.autoinc_locks));
3351 
3352   /* With stored functions and procedures the user may drop
3353   a table within the same "statement". This special case has
3354   to be handled by deleting only those AUTOINC locks that were
3355   held by the table being dropped. */
3356 
3357   autoinc_lock =
3358       *static_cast<lock_t **>(ib_vector_get(trx->lock.autoinc_locks, i));
3359 
3360   /* This is the default fast case. */
3361 
3362   if (autoinc_lock == lock) {
3363     lock_table_pop_autoinc_locks(trx);
3364   } else {
3365     /* The last element should never be NULL */
3366     ut_a(autoinc_lock != nullptr);
3367 
3368     /* Handle freeing the locks from within the stack. */
3369 
3370     while (--i >= 0) {
3371       autoinc_lock =
3372           *static_cast<lock_t **>(ib_vector_get(trx->lock.autoinc_locks, i));
3373 
3374       if (autoinc_lock == lock) {
3375         void *null_var = nullptr;
3376         ib_vector_set(trx->lock.autoinc_locks, i, &null_var);
3377         return;
3378       }
3379     }
3380 
3381     /* Must find the autoinc lock. */
3382     ut_error;
3383   }
3384 }
3385 
3386 /** Removes a table lock request from the queue and the trx list of locks;
3387  this is a low-level function which does NOT check if waiting requests
3388  can now be granted. */
3389 UNIV_INLINE
lock_table_remove_low(lock_t * lock)3390 void lock_table_remove_low(lock_t *lock) /*!< in/out: table lock */
3391 {
3392   trx_t *trx;
3393   dict_table_t *table;
3394 
3395   trx = lock->trx;
3396   /* We will modify trx->lock.trx_locks so we need trx->mutex */
3397   ut_ad(trx_mutex_own(trx));
3398   table = lock->tab_lock.table;
3399   ut_ad(locksys::owns_table_shard(*table));
3400   const auto lock_mode = lock_get_mode(lock);
3401   /* Remove the table from the transaction's AUTOINC vector, if
3402   the lock that is being released is an AUTOINC lock. */
3403   if (lock_mode == LOCK_AUTO_INC) {
3404     /* The table's AUTOINC lock could not be granted to us yet. */
3405     ut_ad(table->autoinc_trx == trx || lock->is_waiting());
3406     if (table->autoinc_trx == trx) {
3407       table->autoinc_trx = nullptr;
3408     }
3409 
3410     /* The locks must be freed in the reverse order from
3411     the one in which they were acquired. This is to avoid
3412     traversing the AUTOINC lock vector unnecessarily.
3413 
3414     We only store locks that were granted in the
3415     trx->autoinc_locks vector (see lock_table_create()
3416     and lock_grant()). */
3417 
3418     if (!lock_get_wait(lock)) {
3419       lock_table_remove_autoinc_lock(lock, trx);
3420     }
3421   }
3422   ut_a(0 < table->count_by_mode[lock_mode]);
3423   --table->count_by_mode[lock_mode];
3424 
3425   locksys::remove_from_trx_locks(lock);
3426 
3427   ut_list_remove(table->locks, lock, TableLockGetNode());
3428 
3429   MONITOR_INC(MONITOR_TABLELOCK_REMOVED);
3430   MONITOR_DEC(MONITOR_NUM_TABLELOCK);
3431 }
3432 
3433 /** Enqueues a waiting request for a table lock which cannot be granted
3434  immediately. Checks for deadlocks.
3435  @return DB_LOCK_WAIT or DB_DEADLOCK */
lock_table_enqueue_waiting(ulint mode,dict_table_t * table,que_thr_t * thr)3436 static dberr_t lock_table_enqueue_waiting(
3437     ulint mode,          /*!< in: lock mode this transaction is
3438                          requesting */
3439     dict_table_t *table, /*!< in/out: table */
3440     que_thr_t *thr)      /*!< in: query thread */
3441 {
3442   trx_t *trx;
3443 
3444   ut_ad(locksys::owns_table_shard(*table));
3445   ut_ad(!srv_read_only_mode);
3446 
3447   trx = thr_get_trx(thr);
3448   ut_ad(trx_mutex_own(trx));
3449 
3450   /* Test if there already is some other reason to suspend thread:
3451   we do not enqueue a lock request if the query thread should be
3452   stopped anyway */
3453 
3454   if (que_thr_stop(thr)) {
3455     ut_error;
3456   }
3457 
3458   switch (trx_get_dict_operation(trx)) {
3459     case TRX_DICT_OP_NONE:
3460       break;
3461     case TRX_DICT_OP_TABLE:
3462     case TRX_DICT_OP_INDEX:
3463       ib::error(ER_IB_MSG_642) << "A table lock wait happens in a dictionary"
3464                                   " operation. Table "
3465                                << table->name << ". " << BUG_REPORT_MSG;
3466       ut_ad(0);
3467   }
3468 
3469   if (trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC) {
3470     return (DB_DEADLOCK);
3471   }
3472 
3473   /* Enqueue the lock request that will wait to be granted */
3474   lock_table_create(table, mode | LOCK_WAIT, trx);
3475 
3476   trx->lock.que_state = TRX_QUE_LOCK_WAIT;
3477 
3478   trx->lock.wait_started = ut_time();
3479   trx->lock.was_chosen_as_deadlock_victim = false;
3480 
3481   auto stopped = que_thr_stop(thr);
3482   ut_a(stopped);
3483 
3484   MONITOR_INC(MONITOR_TABLELOCK_WAIT);
3485 
3486   return (DB_LOCK_WAIT);
3487 }
3488 
3489 /** Checks if other transactions have an incompatible mode lock request in
3490  the lock queue.
3491  @return lock or NULL */
3492 UNIV_INLINE
lock_table_other_has_incompatible(const trx_t * trx,ulint wait,const dict_table_t * table,lock_mode mode)3493 const lock_t *lock_table_other_has_incompatible(
3494     const trx_t *trx,          /*!< in: transaction, or NULL if all
3495                                transactions should be included */
3496     ulint wait,                /*!< in: LOCK_WAIT if also
3497                                waiting locks are taken into
3498                                account, or 0 if not */
3499     const dict_table_t *table, /*!< in: table */
3500     lock_mode mode)            /*!< in: lock mode */
3501 {
3502   const lock_t *lock;
3503 
3504   ut_ad(locksys::owns_table_shard(*table));
3505 
3506   // According to lock_compatibility_matrix, an intention lock can wait only
3507   // for LOCK_S or LOCK_X. If there are no LOCK_S nor LOCK_X locks in the queue,
3508   // then we can avoid iterating through the list and return immediately.
3509   // This might help in OLTP scenarios, with no DDL queries,
3510   // as then there are almost no LOCK_S nor LOCK_X, but many DML queries still
3511   // need to get an intention lock to perform their action - while this never
3512   // causes them to wait for a "data lock", it might cause them to wait for
3513   // lock_sys table shard latch for the duration of table lock queue operation.
3514 
3515   if ((mode == LOCK_IS || mode == LOCK_IX) &&
3516       table->count_by_mode[LOCK_S] == 0 && table->count_by_mode[LOCK_X] == 0) {
3517     return nullptr;
3518   }
3519 
3520   for (lock = UT_LIST_GET_LAST(table->locks); lock != nullptr;
3521        lock = UT_LIST_GET_PREV(tab_lock.locks, lock)) {
3522     if (lock->trx != trx && !lock_mode_compatible(lock_get_mode(lock), mode) &&
3523         (wait || !lock_get_wait(lock))) {
3524       return (lock);
3525     }
3526   }
3527 
3528   return (nullptr);
3529 }
3530 
3531 /** Locks the specified database table in the mode given. If the lock cannot
3532  be granted immediately, the query thread is put to wait.
3533  @return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
lock_table(ulint flags,dict_table_t * table,lock_mode mode,que_thr_t * thr)3534 dberr_t lock_table(ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
3535                                 does nothing */
3536                    dict_table_t *table, /*!< in/out: database table
3537                                         in dictionary cache */
3538                    lock_mode mode,      /*!< in: lock mode */
3539                    que_thr_t *thr)      /*!< in: query thread */
3540 {
3541   trx_t *trx;
3542   dberr_t err;
3543   const lock_t *wait_for;
3544 
3545   ut_ad(table && thr);
3546 
3547   /* Given limited visibility of temp-table we can avoid
3548   locking overhead */
3549   if ((flags & BTR_NO_LOCKING_FLAG) || srv_read_only_mode ||
3550       table->is_temporary()) {
3551     return (DB_SUCCESS);
3552   }
3553 
3554   ut_a(flags == 0);
3555 
3556   trx = thr_get_trx(thr);
3557 
3558   /* Look for equal or stronger locks the same trx already has on the table.
3559   Even though lock_table_has() takes trx->mutex internally, it does not protect
3560   us at all from "higher-level" races - for instance the state could change in
3561   theory after we exit lock_table_has() and before we return DB_SUCCESS, or
3562   before somebody who called us reacts to the DB_SUCCESS.
3563   In theory trx_t::table_locks can be modified in
3564   lock_trx_table_locks_remove which is called from:
3565     lock_release_autoinc_last_lock
3566       lock_release_autoinc_locks
3567         lock_cancel_waiting_and_release
3568           (this one seems to be called only when trx is waiting and not running)
3569         lock_unlock_table_autoinc
3570           (this one seems to be called from the thread running the transaction)
3571     lock_remove_all_on_table_for_trx
3572       lock_remove_all_on_table
3573         row_drop_table_for_mysql
3574           (this one is mysterious, as it is not obvious to me why do we expect
3575           that someone will drop a table while there are locks on it)
3576         row_mysql_table_id_reassign
3577           row_discard_tablespace
3578             (there is some long explanation starting with "How do we prevent
3579             crashes caused by ongoing operations...")
3580     lock_remove_recovered_trx_record_locks
3581       (this seems to be used to remove locks of recovered transactions from
3582       table being dropped, and recovered transactions shouldn't call lock_table)
3583   Also the InnoDB Memcached plugin causes a callchain:
3584   innodb_store -> innodb_conn_init -> innodb_api_begin -> innodb_cb_cursor_lock
3585   -> ib_cursor_set_lock_mode -> ib_cursor_lock -> ib_trx_lock_table_with_retry
3586   -> lock_table_for_trx -> lock_table -> lock_table_has
3587   in which lock_table_has sees trx->mysqld_thd different than current_thd.
3588   In practice this call to lock_table_has was never protected in any way before,
3589   so the situation now, after protecting it with trx->mutex, can't be worse. */
3590 
3591   if (lock_table_has(trx, table, mode)) {
3592     /* In Debug mode we assert the same condition again, to help catch cases of
3593     race condition, if it is possible at all, for further analysis. */
3594     ut_ad(lock_table_has(trx, table, mode));
3595     return (DB_SUCCESS);
3596   }
3597 
3598   /* Read only transactions can write to temp tables, we don't want
3599   to promote them to RW transactions. Their updates cannot be visible
3600   to other transactions. Therefore we can keep them out
3601   of the read views. */
3602 
3603   if ((mode == LOCK_IX || mode == LOCK_X) && !trx->read_only &&
3604       trx->rsegs.m_redo.rseg == nullptr) {
3605     trx_set_rw_mode(trx);
3606   }
3607 
3608   locksys::Shard_latch_guard table_latch_guard{*table};
3609 
3610   /* We have to check if the new lock is compatible with any locks
3611   other transactions have in the table lock queue. */
3612 
3613   wait_for = lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode);
3614 
3615   trx_mutex_enter(trx);
3616 
3617   /* Another trx has a request on the table in an incompatible
3618   mode: this trx may have to wait */
3619 
3620   if (wait_for != nullptr) {
3621     err = lock_table_enqueue_waiting(mode | flags, table, thr);
3622     if (err == DB_LOCK_WAIT) {
3623       lock_create_wait_for_edge(trx, wait_for->trx);
3624     }
3625   } else {
3626     lock_table_create(table, mode | flags, trx);
3627 
3628     ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
3629 
3630     err = DB_SUCCESS;
3631   }
3632 
3633   trx_mutex_exit(trx);
3634 
3635   ut_ad(err == DB_SUCCESS || err == DB_LOCK_WAIT || err == DB_DEADLOCK);
3636   return (err);
3637 }
3638 
3639 /** Creates a table IX lock object for a resurrected transaction. */
lock_table_ix_resurrect(dict_table_t * table,trx_t * trx)3640 void lock_table_ix_resurrect(dict_table_t *table, /*!< in/out: table */
3641                              trx_t *trx)          /*!< in/out: transaction */
3642 {
3643   ut_ad(trx->is_recovered);
3644 
3645   if (lock_table_has(trx, table, LOCK_IX)) {
3646     return;
3647   }
3648   locksys::Shard_latch_guard table_latch_guard{*table};
3649   /* We have to check if the new lock is compatible with any locks
3650   other transactions have in the table lock queue. */
3651 
3652   ut_ad(!lock_table_other_has_incompatible(trx, LOCK_WAIT, table, LOCK_IX));
3653 
3654   trx_mutex_enter(trx);
3655   lock_table_create(table, LOCK_IX, trx);
3656   trx_mutex_exit(trx);
3657 }
3658 
3659 /** Checks if a waiting table lock request still has to wait in a queue.
3660 @param[in]  wait_lock     Waiting table lock
3661 @param[in]  blocking_trx  If not nullptr, it restricts the search to only the
3662                           locks held by the blocking_trx, which is useful in
3663                           case when there might be multiple reasons for waiting
3664                           in queue, but we need to report the specific one.
3665                           Useful when reporting a deadlock cycle. (optional)
3666 @return The conflicting lock which is the reason wait_lock has to wait
3667 or nullptr if it can be granted now */
lock_table_has_to_wait_in_queue(const lock_t * wait_lock,const trx_t * blocking_trx=nullptr)3668 static const lock_t *lock_table_has_to_wait_in_queue(
3669     const lock_t *wait_lock, const trx_t *blocking_trx = nullptr) {
3670   const dict_table_t *table;
3671   const lock_t *lock;
3672 
3673   ut_ad(lock_get_wait(wait_lock));
3674 
3675   table = wait_lock->tab_lock.table;
3676   ut_ad(locksys::owns_table_shard(*table));
3677 
3678   const auto mode = lock_get_mode(wait_lock);
3679 
3680   // According to lock_compatibility_matrix, an intention lock can wait only
3681   // for LOCK_S or LOCK_X. If there are no LOCK_S nor LOCK_X locks in the queue,
3682   // then we can avoid iterating through the list and return immediately.
3683   // This might help in OLTP scenarios, with no DDL queries,
3684   // as then there are almost no LOCK_S nor LOCK_X, but many DML queries still
3685   // need to get an intention lock to perform their action. When an occasional
3686   // DDL finishes and releases the LOCK_S or LOCK_X, it has to scan the queue
3687   // and grant any locks which were blocked by it. This can take Omega(n^2) if
3688   // each of intention locks has to verify that all the other locks.
3689 
3690   if ((mode == LOCK_IS || mode == LOCK_IX) &&
3691       table->count_by_mode[LOCK_S] == 0 && table->count_by_mode[LOCK_X] == 0) {
3692     return (nullptr);
3693   }
3694 
3695   for (lock = UT_LIST_GET_FIRST(table->locks); lock != wait_lock;
3696        lock = UT_LIST_GET_NEXT(tab_lock.locks, lock)) {
3697     if ((blocking_trx == nullptr || blocking_trx == lock->trx) &&
3698         lock_has_to_wait(wait_lock, lock)) {
3699       return (lock);
3700     }
3701   }
3702 
3703   return (nullptr);
3704 }
3705 
3706 /** Checks if a waiting lock request still has to wait in a queue.
3707 @param[in]  wait_lock     Waiting lock
3708 @param[in]  blocking_trx  If not nullptr, it restricts the search to only the
3709                           locks held by the blocking_trx, which is useful in
3710                           case when there might be multiple reasons for waiting
3711                           in queue, but we need to report the specific one.
3712                           Useful when reporting a deadlock cycle.
3713 @return The conflicting lock which is the reason wait_lock has to wait
3714 or nullptr if it can be granted now */
lock_has_to_wait_in_queue(const lock_t * wait_lock,const trx_t * blocking_trx)3715 static const lock_t *lock_has_to_wait_in_queue(const lock_t *wait_lock,
3716                                                const trx_t *blocking_trx) {
3717   if (lock_get_type_low(wait_lock) == LOCK_REC) {
3718     return lock_rec_has_to_wait_in_queue(wait_lock, blocking_trx);
3719   } else {
3720     return lock_table_has_to_wait_in_queue(wait_lock, blocking_trx);
3721   }
3722 }
3723 
3724 /** Removes a table lock request, waiting or granted, from the queue and grants
3725  locks to other transactions in the queue, if they now are entitled to a
3726  lock. */
lock_table_dequeue(lock_t * in_lock)3727 static void lock_table_dequeue(
3728     lock_t *in_lock) /*!< in/out: table lock object; transactions waiting
3729                      behind will get their lock requests granted, if
3730                      they are now qualified to it */
3731 {
3732   /* This is needed for lock_table_remove_low(), but it's easier to understand
3733   the code if we assert it here as well */
3734   ut_ad(trx_mutex_own(in_lock->trx));
3735   ut_ad(locksys::owns_table_shard(*in_lock->tab_lock.table));
3736   ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
3737 
3738   const auto mode = lock_get_mode(in_lock);
3739   const auto table = in_lock->tab_lock.table;
3740 
3741   lock_t *lock = UT_LIST_GET_NEXT(tab_lock.locks, in_lock);
3742 
3743   lock_table_remove_low(in_lock);
3744 
3745   // According to lock_compatibility_matrix, an intention lock can block only
3746   // LOCK_S or LOCK_X from being granted, and thus, releasing of an intention
3747   // lock can help in granting only LOCK_S or LOCK_X. If there are no LOCK_S nor
3748   // LOCK_X locks in the queue, then we can avoid iterating through the list and
3749   // return immediately. This might help in OLTP scenarios, with no DDL queries,
3750   // as then there are almost no LOCK_S nor LOCK_X, but many DML queries still
3751   // need to get an intention lock to perform their action - while this never
3752   // causes them to wait for a "data lock", it might cause them to wait for
3753   // lock_sys table shard latch for the duration of table lock queue operation.
3754   if ((mode == LOCK_IS || mode == LOCK_IX) &&
3755       table->count_by_mode[LOCK_S] == 0 && table->count_by_mode[LOCK_X] == 0) {
3756     return;
3757   }
3758 
3759   /* Check if waiting locks in the queue can now be granted: grant
3760   locks if there are no conflicting locks ahead. */
3761 
3762   for (/* No op */; lock != nullptr;
3763        lock = UT_LIST_GET_NEXT(tab_lock.locks, lock)) {
3764     lock_grant_or_update_wait_for_edge_if_waiting(lock, in_lock->trx);
3765   }
3766 }
3767 
3768 /** Sets a lock on a table based on the given mode.
3769 @param[in]	table	table to lock
3770 @param[in,out]	trx	transaction
3771 @param[in]	mode	LOCK_X or LOCK_S
3772 @return error code or DB_SUCCESS. */
lock_table_for_trx(dict_table_t * table,trx_t * trx,enum lock_mode mode)3773 dberr_t lock_table_for_trx(dict_table_t *table, trx_t *trx,
3774                            enum lock_mode mode) {
3775   mem_heap_t *heap;
3776   que_thr_t *thr;
3777   dberr_t err;
3778   sel_node_t *node;
3779   heap = mem_heap_create(512);
3780 
3781   node = sel_node_create(heap);
3782   thr = pars_complete_graph_for_exec(node, trx, heap, nullptr);
3783   thr->graph->state = QUE_FORK_ACTIVE;
3784 
3785   /* We use the select query graph as the dummy graph needed
3786   in the lock module call */
3787 
3788   thr = static_cast<que_thr_t *>(que_fork_get_first_thr(
3789       static_cast<que_fork_t *>(que_node_get_parent(thr))));
3790 
3791   que_thr_move_to_run_state_for_mysql(thr, trx);
3792 
3793 run_again:
3794   thr->run_node = thr;
3795   thr->prev_node = thr->common.parent;
3796 
3797   err = lock_table(0, table, mode, thr);
3798 
3799   trx->error_state = err;
3800 
3801   if (err == DB_SUCCESS) {
3802     que_thr_stop_for_mysql_no_error(thr, trx);
3803   } else {
3804     que_thr_stop_for_mysql(thr);
3805 
3806     auto was_lock_wait = row_mysql_handle_errors(&err, trx, thr, nullptr);
3807 
3808     if (was_lock_wait) {
3809       goto run_again;
3810     }
3811   }
3812 
3813   que_graph_free(thr->graph);
3814   trx->op_info = "";
3815 
3816   return (err);
3817 }
3818 
3819 /*=========================== LOCK RELEASE ==============================*/
3820 
3821 /** Grant a lock to waiting transactions.
3822 @param[in]	lock		Lock that was unlocked
3823 @param[in]	heap_no		Heap no within the page for the lock. */
lock_rec_release(lock_t * lock,ulint heap_no)3824 static void lock_rec_release(lock_t *lock, ulint heap_no) {
3825   ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
3826   ut_ad(!lock_get_wait(lock));
3827   ut_ad(lock_get_type_low(lock) == LOCK_REC);
3828   ut_ad(lock_rec_get_nth_bit(lock, heap_no));
3829   lock_rec_reset_nth_bit(lock, heap_no);
3830 
3831   lock_rec_grant_by_heap_no(lock, heap_no);
3832   MONITOR_INC(MONITOR_RECLOCK_GRANT_ATTEMPTS);
3833 }
3834 
3835 /** Removes a granted record lock of a transaction from the queue and grants
3836  locks to other transactions waiting in the queue if they now are entitled
3837  to a lock.
3838  This function is meant to be used only by row_unlock_for_mysql, and it assumes
3839  that the lock we are looking for has LOCK_REC_NOT_GAP flag.
3840  */
lock_rec_unlock(trx_t * trx,const buf_block_t * block,const rec_t * rec,lock_mode lock_mode)3841 void lock_rec_unlock(
3842     trx_t *trx,               /*!< in/out: transaction that has
3843                               set a record lock */
3844     const buf_block_t *block, /*!< in: buffer block containing rec */
3845     const rec_t *rec,         /*!< in: record */
3846     lock_mode lock_mode)      /*!< in: LOCK_S or LOCK_X */
3847 {
3848   ut_ad(block->frame == page_align(rec));
3849   ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
3850   ut_ad(lock_mode == LOCK_S || lock_mode == LOCK_X);
3851 
3852   ulint heap_no = page_rec_get_heap_no(rec);
3853 
3854   {
3855     locksys::Shard_latch_guard guard{block->get_page_id()};
3856     trx_mutex_enter_first_of_two(trx);
3857     ut_ad(!trx->lock.wait_lock);
3858 
3859     lock_t *first_lock;
3860 
3861     first_lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
3862 
3863     /* Find the last lock with the same lock_mode and transaction
3864     on the record. */
3865 
3866     for (auto lock = first_lock; lock != nullptr;
3867          lock = lock_rec_get_next(heap_no, lock)) {
3868       if (lock->trx == trx && lock_get_mode(lock) == lock_mode &&
3869           lock_rec_get_rec_not_gap(lock)) {
3870 #ifdef UNIV_DEBUG
3871         /* Since we actually found the first, not the last lock, lets check
3872            that it is also the last one */
3873         for (auto lock2 = lock_rec_get_next(heap_no, lock); lock2 != nullptr;
3874              lock2 = lock_rec_get_next(heap_no, lock2)) {
3875           ut_ad(!(lock2->trx == trx && lock_get_mode(lock2) == lock_mode &&
3876                   lock_rec_get_rec_not_gap(lock2)));
3877         }
3878 #endif
3879         lock_rec_release(lock, heap_no);
3880 
3881         trx_mutex_exit(trx);
3882 
3883         return;
3884       }
3885     }
3886 
3887     trx_mutex_exit(trx);
3888   } /* Shard_latch_guard */
3889 
3890   {
3891     size_t stmt_len;
3892 
3893     auto stmt = innobase_get_stmt_unsafe(trx->mysql_thd, &stmt_len);
3894 
3895     ib::error err(ER_IB_MSG_1228);
3896 
3897     err << "Unlock row could not find a " << lock_mode
3898         << " mode lock on the record. Current statement: ";
3899 
3900     err.write(stmt, stmt_len);
3901   }
3902 }
3903 
3904 /** Unlock the GAP Lock part of a Next Key Lock and grant it to waiters (if any)
3905 @param[in,out]	lock	lock object */
lock_release_gap_lock(lock_t * lock)3906 static void lock_release_gap_lock(lock_t *lock) {
3907   /* 1. Remove GAP lock for all records */
3908   lock->unlock_gap_lock();
3909 
3910   /* 2. Grant locks for all records */
3911   lock_rec_grant(lock);
3912 
3913   /* 3. Release explicitly all locks on supremum record. This is required
3914   because supremum record lock is always considered a GAP Lock, but the lock
3915   mode can be set to Next Key Lock for sharing lock objects with other records.
3916 
3917   We could not release all locks on supremum record in step [1] & [2] because
3918   currently lock_rec_grant accepts `lock` object as input which is also part of
3919   the lock queue. If we unlock supremum record (reset the BIT) in step-1, then
3920   step-2 would fail to grant locks because SUPREMUM record would be missing from
3921   input `lock` record bit set. */
3922   if (lock->includes_supremum()) {
3923     lock_rec_release(lock, PAGE_HEAP_NO_SUPREMUM);
3924   }
3925 }
3926 
3927 /** Used to release a lock during PREPARE. The lock is only
3928 released if rules permit it.
3929 @param[in]   lock       the lock that we consider releasing
3930 @param[in]   only_gap   true if we don't want to release records,
3931                         just the gaps between them */
lock_release_read_lock(lock_t * lock,bool only_gap)3932 static void lock_release_read_lock(lock_t *lock, bool only_gap) {
3933   if (!lock->is_record_lock() || lock->is_insert_intention() ||
3934       lock->is_predicate()) {
3935     /* DO NOTHING */
3936   } else if (lock->is_gap()) {
3937     /* Release any GAP only lock. */
3938     lock_rec_dequeue_from_page(lock);
3939   } else if (lock->is_record_not_gap() && only_gap) {
3940     /* Don't release any non-GAP lock if not asked.*/
3941   } else if (lock->mode() == LOCK_S && !only_gap) {
3942     /* Release Shared Next Key Lock(SH + GAP) if asked for */
3943     lock_rec_dequeue_from_page(lock);
3944   } else {
3945     /* Release GAP lock from Next Key lock */
3946     lock_release_gap_lock(lock);
3947   }
3948 }
3949 
3950 namespace locksys {
3951 
3952 /** A helper function which solves a chicken-and-egg problem occurring when one
3953 needs to iterate over trx's locks and perform some actions on them. Iterating
3954 over this list requires trx->mutex (or exclusive global lock_sys latch), and
3955 operating on a lock requires lock_sys latches, yet the latching order requires
3956 lock_sys latches to be taken before trx->mutex.
3957 One way around it is to use exclusive global lock_sys latch, which heavily
3958 deteriorates concurrency. Another is to try to reacquire the latches in needed
3959 order, veryfing that the list wasn't modified meanwhile.
3960 This function performs following steps:
3961 1. releases trx->mutex,
3962 2. acquires proper lock_sys shard latch,
3963 3. reaquires trx->mutex
3964 4. executes f unless trx's locks list has changed
3965 Before and after this function following should hold:
3966 - the shared global lock_sys latch is held
3967 - the trx->mutex is held
3968 @param[in]    trx     the trx, locks of which we are interested in
3969 @param[in]    shard   description of the shard we want to latch
3970 @param[in]    f       the function to execute when the shard is latched
3971 @return true if f was called, false if it couldn't be called because trx locks
3972         have changed while relatching trx->mutex
3973 */
3974 template <typename S, typename F>
try_relatch_trx_and_shard_and_do(const trx_t * const trx,const S & shard,F && f)3975 static bool try_relatch_trx_and_shard_and_do(const trx_t *const trx,
3976                                              const S &shard, F &&f) {
3977   ut_ad(locksys::owns_shared_global_latch());
3978   ut_ad(trx_mutex_own(trx));
3979 
3980   const auto expected_version = trx->lock.trx_locks_version;
3981   trx_mutex_exit(trx);
3982   DEBUG_SYNC_C("try_relatch_trx_and_shard_and_do_noted_expected_version");
3983   locksys::Shard_naked_latch_guard guard{shard};
3984   trx_mutex_enter_first_of_two(trx);
3985 
3986   /* Check that list was not modified while we were reacquiring latches */
3987   if (expected_version != trx->lock.trx_locks_version) {
3988     /* Someone has modified the list while we were re-acquiring the latches so,
3989     it is unsafe to operate on the lock. It might have been released, or maybe
3990     even assigned to another transaction (in case of AUTOINC lock). More
3991     importantly, we need to let know the caller that the list it is iterating
3992     over has been modified, which affects next/prev pointers. */
3993     return false;
3994   }
3995 
3996   std::forward<F>(f)();
3997   return true;
3998 }
3999 
4000 /** A helper function which solves a chicken-and-egg problem occurring when one
4001 needs to iterate over trx's locks and perform some actions on them. Iterating
4002 over this list requires trx->mutex (or exclusive global lock_sys latch), and
4003 operating on a lock requires lock_sys latches, yet the latching order requires
4004 lock_sys latches to be taken before trx->mutex.
4005 One way around it is to use exclusive global lock_sys latch, which heavily
4006 deteriorates concurrency. Another is to try to reacquire the latches in needed
4007 order, veryfing that the list wasn't modified meanwhile.
4008 This function performs following steps:
4009 1. releases trx->mutex,
4010 2. acquires proper lock_sys shard latch for given lock,
4011 3. reaquires trx->mutex
4012 4. executes f unless trx's locks list has changed
4013 Before and after this function following should hold:
4014 - the shared global lock_sys latch is held
4015 - the trx->mutex is held
4016 @param[in]    lock    the lock we are interested in
4017 @param[in]    f       the function to execute when the shard is latched
4018 @return true if f was called, false if it couldn't be called because trx locks
4019         have changed while relatching trx->mutex
4020 */
4021 template <typename F>
try_relatch_trx_and_shard_and_do(const lock_t * lock,F && f)4022 static bool try_relatch_trx_and_shard_and_do(const lock_t *lock, F &&f) {
4023   if (lock_get_type_low(lock) == LOCK_REC) {
4024     return try_relatch_trx_and_shard_and_do(lock->trx, lock->rec_lock.page_id,
4025                                             std::forward<F>(f));
4026   }
4027 
4028   ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
4029   return try_relatch_trx_and_shard_and_do(lock->trx, *lock->tab_lock.table,
4030                                           std::forward<F>(f));
4031 }
4032 
4033 /** Tries to release read locks of a transaction without latching the whole
4034 lock sys. This may fail, if there are many concurrent threads editing the
4035 list of locks of this transaction (for example due to B-tree pages being
4036 merged or split, or due to implicit-to-explicit conversion).
4037 It is called during XA prepare to release locks early.
4038 @param[in,out]	trx		transaction
4039 @param[in]	only_gap	release only GAP locks
4040 @return true if and only if it succeeded to do the job*/
try_release_read_locks_in_s_mode(trx_t * trx,bool only_gap)4041 static bool try_release_read_locks_in_s_mode(trx_t *trx, bool only_gap) {
4042   /* In order to access trx->lock.trx_locks safely we need to hold trx->mutex.
4043   So, conceptually we'd love to hold trx->mutex while iterating through
4044   trx->lock.trx_locks.
4045   However the latching order only allows us to obtain trx->mutex AFTER any
4046   lock_sys latch.
4047   One way around this problem is to simply latch the whole lock_sys in exclusive
4048   mode (which also prevents any changes to trx->lock.trx_locks), however this
4049   impacts performance in appliers (TPS drops by up to 10%).
4050   Here we use a different approach:
4051   1. we extract lock from the list when holding the trx->mutex,
4052   2. identify the shard of lock_sys it belongs to,
4053   3. store the current version of trx->lock.trx_locks
4054   4. release the trx->mutex,
4055   5. acquire the lock_sys shard's latch,
4056   6. and reacquire the trx->mutex,
4057   7. verify that the version of trx->lock.trx_locks has not changed
4058   8. and only then perform any action on the lock.
4059   */
4060   ut_ad(trx_mutex_own(trx));
4061   ut_ad(locksys::owns_shared_global_latch());
4062   lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
4063 
4064   while (lock != nullptr) {
4065     ut_ad(trx_mutex_own(trx));
4066     /* We didn't latch the lock_sys shard this `lock` is in, so we only read a
4067     bare minimum set of information from the `lock`, such as the type, space,
4068     page_no, and next pointer, which, as long as we hold trx->mutex, should be
4069     immutable.
4070 
4071     Store the pointer to the next lock in the list, because in some cases we are
4072     going to remove `lock` from the list, which clears the pointer to next lock
4073     */
4074     auto next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
4075     if (lock_get_type_low(lock) == LOCK_REC) {
4076       /* Following call temporarily releases trx->mutex */
4077       if (!try_relatch_trx_and_shard_and_do(
4078               lock, [=]() { lock_release_read_lock(lock, only_gap); })) {
4079         /* Someone has modified the list while we were re-acquiring the latches
4080         so we need to start over again. */
4081         return false;
4082       }
4083     }
4084     /* As we have verified that the version has not changed, it must be the case
4085     that the next_lock is still the next lock as well */
4086     lock = next_lock;
4087   }
4088   return true;
4089 }
4090 }  // namespace locksys
4091 
4092 /** Release read locks of a transacion latching the whole lock-sys in
4093 exclusive mode, which is a bit too expensive to do by default.
4094 It is called during XA prepare to release locks early.
4095 @param[in,out]	trx		transaction
4096 @param[in]	only_gap	release only GAP locks */
lock_trx_release_read_locks_in_x_mode(trx_t * trx,bool only_gap)4097 static void lock_trx_release_read_locks_in_x_mode(trx_t *trx, bool only_gap) {
4098   ut_ad(!trx_mutex_own(trx));
4099 
4100   /* We will iterate over locks from various shards. */
4101   locksys::Global_exclusive_latch_guard guard{};
4102   trx_mutex_enter_first_of_two(trx);
4103 
4104   lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
4105 
4106   while (lock != nullptr) {
4107     DEBUG_SYNC_C("lock_trx_release_read_locks_in_x_mode_will_release");
4108     /* Store the pointer to the next lock in the list, because in some cases
4109     we are going to remove `lock` from the list, which clears the pointer to
4110     next lock */
4111     lock_t *next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
4112 
4113     lock_release_read_lock(lock, only_gap);
4114 
4115     lock = next_lock;
4116   }
4117 
4118   trx_mutex_exit(trx);
4119 }
4120 
lock_trx_release_read_locks(trx_t * trx,bool only_gap)4121 void lock_trx_release_read_locks(trx_t *trx, bool only_gap) {
4122   ut_ad(trx_can_be_handled_by_current_thread(trx));
4123 
4124   size_t failures;
4125   const size_t MAX_FAILURES = 5;
4126 
4127   {
4128     locksys::Global_shared_latch_guard shared_latch_guard{};
4129     trx_mutex_enter(trx);
4130     ut_ad(trx->lock.wait_lock == nullptr);
4131 
4132     for (failures = 0; failures < MAX_FAILURES; ++failures) {
4133       if (locksys::try_release_read_locks_in_s_mode(trx, only_gap)) {
4134         break;
4135       }
4136     }
4137 
4138     trx_mutex_exit(trx);
4139   }
4140 
4141   if (failures == MAX_FAILURES) {
4142     lock_trx_release_read_locks_in_x_mode(trx, only_gap);
4143   }
4144 }
4145 
4146 /** Releases transaction locks, and releases possible other transactions waiting
4147  because of these locks.
4148 @param[in,out]  trx   transaction */
lock_release(trx_t * trx)4149 static void lock_release(trx_t *trx) {
4150   lock_t *lock;
4151   ut_ad(!locksys::owns_exclusive_global_latch());
4152   ut_ad(!trx_mutex_own(trx));
4153   ut_ad(!trx->is_dd_trx);
4154 
4155   locksys::Global_shared_latch_guard shared_latch_guard{};
4156   /* In order to access trx->lock.trx_locks safely we need to hold trx->mutex.
4157   The transaction is already in TRX_STATE_COMMITTED_IN_MEMORY state and is no
4158   longer referenced, so we are not afraid of implicit-to-explicit conversions,
4159   nor a cancellation of a wait_lock (we are running, not waiting). Still, there
4160   might be some B-tree merge or split operations running in parallel which cause
4161   locks to be moved from one page to another, which at the low level means that
4162   a new lock is created (and added to trx->lock.trx_locks) and the old one is
4163   removed (also from trx->lock.trx_locks) in that specific order.
4164   So, conceptually we'd love to hold trx->mutex while iterating through
4165   trx->lock.trx_locks.
4166   However the latching order only allows us to obtain trx->mutex AFTER any
4167   lock_sys latch. One way around this problem is to simply latch the whole
4168   lock_sys in exclusive mode (which also prevents any changes to
4169   trx->lock.trx_locks), however this impacts performance (TPS drops on
4170   sysbench {pareto,uniform}-2S-{128,1024}-usrs tests by 3% to 11%) Here we
4171   use a different approach:
4172   1. we extract lock from the list when holding the trx->mutex,
4173   2. identify the shard of lock_sys it belongs to,
4174   3. release the trx->mutex,
4175   4. acquire the lock_sys shard's latch,
4176   5. and reacquire the trx->mutex,
4177   6. verify that the lock pointer is still in trx->lock.trx_locks (so it is
4178   safe to access it),
4179   7. and only then perform any action on the lock.
4180   */
4181   trx_mutex_enter(trx);
4182 
4183   ut_ad(trx->lock.wait_lock == nullptr);
4184   while ((lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) != nullptr) {
4185     /* Following call temporarily releases trx->mutex */
4186     locksys::try_relatch_trx_and_shard_and_do(lock, [=]() {
4187       if (lock_get_type_low(lock) == LOCK_REC) {
4188         lock_rec_dequeue_from_page(lock);
4189       } else {
4190         lock_table_dequeue(lock);
4191       }
4192     });
4193   }
4194 
4195   trx_mutex_exit(trx);
4196 }
4197 
4198 /* True if a lock mode is S or X */
4199 #define IS_LOCK_S_OR_X(lock) \
4200   (lock_get_mode(lock) == LOCK_S || lock_get_mode(lock) == LOCK_X)
4201 
4202 /** Removes lock_to_remove from lock_to_remove->trx->lock.table_locks.
4203 @param[in]  lock_to_remove  lock to remove */
lock_trx_table_locks_remove(const lock_t * lock_to_remove)4204 static void lock_trx_table_locks_remove(const lock_t *lock_to_remove) {
4205   trx_t *trx = lock_to_remove->trx;
4206 
4207   ut_ad(locksys::owns_table_shard(*lock_to_remove->tab_lock.table));
4208   /* We will modify trx->lock.table_locks so we need trx->mutex */
4209   ut_ad(trx_mutex_own(trx));
4210 
4211   typedef lock_pool_t::reverse_iterator iterator;
4212 
4213   iterator end = trx->lock.table_locks.rend();
4214 
4215   iterator it = std::find(trx->lock.table_locks.rbegin(), end, lock_to_remove);
4216 
4217   /* Lock must exist in the vector. */
4218   ut_a(it != end);
4219   /* To keep it O(1) replace the removed position with lock from the back */
4220   *it = trx->lock.table_locks.back();
4221   trx->lock.table_locks.pop_back();
4222 }
4223 
4224 /** Removes locks of a transaction on a table to be dropped.
4225  If remove_also_table_sx_locks is true then table-level S and X locks are
4226  also removed in addition to other table-level and record-level locks.
4227  No lock that is going to be removed is allowed to be a wait lock. */
lock_remove_all_on_table_for_trx(dict_table_t * table,trx_t * trx,ibool remove_also_table_sx_locks)4228 static void lock_remove_all_on_table_for_trx(
4229     dict_table_t *table,              /*!< in: table to be dropped */
4230     trx_t *trx,                       /*!< in: a transaction */
4231     ibool remove_also_table_sx_locks) /*!< in: also removes
4232                                    table S and X locks */
4233 {
4234   lock_t *lock;
4235   lock_t *prev_lock;
4236 
4237   /* This is used when we drop a table and indeed have exclusive lock_sys
4238   access. */
4239   ut_ad(locksys::owns_exclusive_global_latch());
4240   /* We need trx->mutex to iterate over trx->lock.trx_lock and it is needed by
4241   lock_trx_table_locks_remove() and lock_table_remove_low() but we haven't
4242   acquired it yet. */
4243   ut_ad(!trx_mutex_own(trx));
4244   trx_mutex_enter(trx);
4245 
4246   for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks); lock != nullptr;
4247        lock = prev_lock) {
4248     prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
4249 
4250     if (lock_get_type_low(lock) == LOCK_REC && lock->index->table == table) {
4251       ut_a(!lock_get_wait(lock));
4252 
4253       lock_rec_discard(lock);
4254     } else if (lock_get_type_low(lock) & LOCK_TABLE &&
4255                lock->tab_lock.table == table &&
4256                (remove_also_table_sx_locks || !IS_LOCK_S_OR_X(lock))) {
4257       ut_a(!lock_get_wait(lock));
4258 
4259       lock_trx_table_locks_remove(lock);
4260       lock_table_remove_low(lock);
4261     }
4262   }
4263 
4264   trx_mutex_exit(trx);
4265 }
4266 
4267 /** Remove any explicit record locks held by recovering transactions on
4268  the table.
4269  @return number of recovered transactions examined */
lock_remove_recovered_trx_record_locks(dict_table_t * table)4270 static ulint lock_remove_recovered_trx_record_locks(
4271     dict_table_t *table) /*!< in: check if there are any locks
4272                          held on records in this table or on the
4273                          table itself */
4274 {
4275   ut_a(table != nullptr);
4276   /* We need exclusive lock_sys latch, as we are about to iterate over locks
4277   held by multiple transactions while they might be operating. */
4278   ut_ad(locksys::owns_exclusive_global_latch());
4279 
4280   ulint n_recovered_trx = 0;
4281 
4282   mutex_enter(&trx_sys->mutex);
4283 
4284   for (trx_t *trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); trx != nullptr;
4285        trx = UT_LIST_GET_NEXT(trx_list, trx)) {
4286     assert_trx_in_rw_list(trx);
4287 
4288     if (!trx->is_recovered) {
4289       continue;
4290     }
4291     /* We need trx->mutex to iterate over trx->lock.trx_lock and it is needed by
4292     lock_trx_table_locks_remove() and lock_table_remove_low() but we haven't
4293     acquired it yet. */
4294     ut_ad(!trx_mutex_own(trx));
4295     trx_mutex_enter(trx);
4296     /* Because we are holding the exclusive global lock_sys latch,
4297     implicit locks cannot be converted to explicit ones
4298     while we are scanning the explicit locks. */
4299 
4300     lock_t *next_lock;
4301 
4302     for (lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); lock != nullptr;
4303          lock = next_lock) {
4304       ut_a(lock->trx == trx);
4305 
4306       /* Recovered transactions can't wait on a lock. */
4307 
4308       ut_a(!lock_get_wait(lock));
4309 
4310       next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
4311 
4312       switch (lock_get_type_low(lock)) {
4313         default:
4314           ut_error;
4315         case LOCK_TABLE:
4316           if (lock->tab_lock.table == table) {
4317             lock_trx_table_locks_remove(lock);
4318             lock_table_remove_low(lock);
4319           }
4320           break;
4321         case LOCK_REC:
4322           if (lock->index->table == table) {
4323             lock_rec_discard(lock);
4324           }
4325       }
4326     }
4327 
4328     trx_mutex_exit(trx);
4329     ++n_recovered_trx;
4330   }
4331 
4332   mutex_exit(&trx_sys->mutex);
4333 
4334   return (n_recovered_trx);
4335 }
4336 
4337 /** Removes locks on a table to be dropped.
4338  If remove_also_table_sx_locks is true then table-level S and X locks are
4339  also removed in addition to other table-level and record-level locks.
4340  No lock, that is going to be removed, is allowed to be a wait lock. */
lock_remove_all_on_table(dict_table_t * table,ibool remove_also_table_sx_locks)4341 void lock_remove_all_on_table(
4342     dict_table_t *table,              /*!< in: table to be dropped
4343                                       or discarded */
4344     ibool remove_also_table_sx_locks) /*!< in: also removes
4345                                    table S and X locks */
4346 {
4347   lock_t *lock;
4348 
4349   /* We will iterate over locks (including record locks) from various shards */
4350   locksys::Global_exclusive_latch_guard guard{};
4351 
4352   for (lock = UT_LIST_GET_FIRST(table->locks); lock != nullptr;
4353        /* No op */) {
4354     lock_t *prev_lock;
4355 
4356     prev_lock = UT_LIST_GET_PREV(tab_lock.locks, lock);
4357 
4358     /* If we should remove all locks (remove_also_table_sx_locks
4359     is true), or if the lock is not table-level S or X lock,
4360     then check we are not going to remove a wait lock. */
4361     if (remove_also_table_sx_locks ||
4362         !(lock_get_type(lock) == LOCK_TABLE && IS_LOCK_S_OR_X(lock))) {
4363       ut_a(!lock_get_wait(lock));
4364     }
4365 
4366     lock_remove_all_on_table_for_trx(table, lock->trx,
4367                                      remove_also_table_sx_locks);
4368 
4369     if (prev_lock == nullptr) {
4370       if (lock == UT_LIST_GET_FIRST(table->locks)) {
4371         /* lock was not removed, pick its successor */
4372         lock = UT_LIST_GET_NEXT(tab_lock.locks, lock);
4373       } else {
4374         /* lock was removed, pick the first one */
4375         lock = UT_LIST_GET_FIRST(table->locks);
4376       }
4377     } else if (UT_LIST_GET_NEXT(tab_lock.locks, prev_lock) != lock) {
4378       /* If lock was removed by
4379       lock_remove_all_on_table_for_trx() then pick the
4380       successor of prev_lock ... */
4381       lock = UT_LIST_GET_NEXT(tab_lock.locks, prev_lock);
4382     } else {
4383       /* ... otherwise pick the successor of lock. */
4384       lock = UT_LIST_GET_NEXT(tab_lock.locks, lock);
4385     }
4386   }
4387 
4388   /* Note: Recovered transactions don't have table level IX or IS locks
4389   but can have implicit record locks that have been converted to explicit
4390   record locks. Such record locks cannot be freed by traversing the
4391   transaction lock list in dict_table_t (as above). */
4392 
4393   if (!lock_sys->rollback_complete &&
4394       lock_remove_recovered_trx_record_locks(table) == 0) {
4395     lock_sys->rollback_complete = true;
4396   }
4397 }
4398 
4399 /*===================== VALIDATION AND DEBUGGING ====================*/
4400 
4401 /** Prints info of a table lock. */
lock_table_print(FILE * file,const lock_t * lock)4402 static void lock_table_print(FILE *file,         /*!< in: file where to print */
4403                              const lock_t *lock) /*!< in: table type lock */
4404 {
4405   ut_a(lock_get_type_low(lock) == LOCK_TABLE);
4406   /* We actually hold exclusive latch here, but we require just the shard */
4407   ut_ad(locksys::owns_table_shard(*lock->tab_lock.table));
4408 
4409   fputs("TABLE LOCK table ", file);
4410   ut_print_name(file, lock->trx, lock->tab_lock.table->name.m_name);
4411   fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
4412 
4413   if (lock_get_mode(lock) == LOCK_S) {
4414     fputs(" lock mode S", file);
4415   } else if (lock_get_mode(lock) == LOCK_X) {
4416     ut_ad(lock->trx->id != 0);
4417     fputs(" lock mode X", file);
4418   } else if (lock_get_mode(lock) == LOCK_IS) {
4419     fputs(" lock mode IS", file);
4420   } else if (lock_get_mode(lock) == LOCK_IX) {
4421     ut_ad(lock->trx->id != 0);
4422     fputs(" lock mode IX", file);
4423   } else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
4424     fputs(" lock mode AUTO-INC", file);
4425   } else {
4426     fprintf(file, " unknown lock mode %lu", (ulong)lock_get_mode(lock));
4427   }
4428 
4429   if (lock_get_wait(lock)) {
4430     fputs(" waiting", file);
4431   }
4432 
4433   putc('\n', file);
4434 }
4435 
4436 /** Prints info of a record lock. */
lock_rec_print(FILE * file,const lock_t * lock)4437 static void lock_rec_print(FILE *file,         /*!< in: file where to print */
4438                            const lock_t *lock) /*!< in: record type lock */
4439 {
4440   mtr_t mtr;
4441   Rec_offsets offsets;
4442 
4443   ut_a(lock_get_type_low(lock) == LOCK_REC);
4444   const auto page_id = lock->rec_lock.page_id;
4445   /* We actually hold exclusive latch here, but we require just the shard */
4446   ut_ad(locksys::owns_page_shard(page_id));
4447 
4448   fprintf(file,
4449           "RECORD LOCKS space id %lu page no %lu n bits %llu "
4450           "index %s of table ",
4451           ulong{page_id.space()}, ulong{page_id.page_no()},
4452           ulonglong{lock_rec_get_n_bits(lock)}, lock->index->name());
4453   ut_print_name(file, lock->trx, lock->index->table_name);
4454   fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
4455 
4456   if (lock_get_mode(lock) == LOCK_S) {
4457     fputs(" lock mode S", file);
4458   } else if (lock_get_mode(lock) == LOCK_X) {
4459     fputs(" lock_mode X", file);
4460   } else {
4461     ut_error;
4462   }
4463 
4464   if (lock_rec_get_gap(lock)) {
4465     fputs(" locks gap before rec", file);
4466   }
4467 
4468   if (lock_rec_get_rec_not_gap(lock)) {
4469     fputs(" locks rec but not gap", file);
4470   }
4471 
4472   if (lock_rec_get_insert_intention(lock)) {
4473     fputs(" insert intention", file);
4474   }
4475 
4476   if (lock_get_wait(lock)) {
4477     fputs(" waiting", file);
4478   }
4479 
4480   mtr_start(&mtr);
4481 
4482   putc('\n', file);
4483 
4484   const buf_block_t *block;
4485 
4486   block = buf_page_try_get(page_id, &mtr);
4487 
4488   for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
4489     if (!lock_rec_get_nth_bit(lock, i)) {
4490       continue;
4491     }
4492 
4493     fprintf(file, "Record lock, heap no %lu", (ulong)i);
4494 
4495     if (block) {
4496       const rec_t *rec;
4497 
4498       rec = page_find_rec_with_heap_no(buf_block_get_frame(block), i);
4499 
4500       putc(' ', file);
4501       rec_print_new(file, rec, offsets.compute(rec, lock->index));
4502     }
4503 
4504     putc('\n', file);
4505   }
4506 
4507   mtr_commit(&mtr);
4508 }
4509 
4510 #ifdef UNIV_DEBUG
4511 /* Print the number of lock structs from lock_print_info_summary() only
4512 in non-production builds for performance reasons, see
4513 http://bugs.mysql.com/36942 */
4514 #define PRINT_NUM_OF_LOCK_STRUCTS
4515 #endif /* UNIV_DEBUG */
4516 
4517 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
4518 /** Calculates the number of record lock structs in the record lock hash table.
4519  @return number of record locks */
lock_get_n_rec_locks(void)4520 static ulint lock_get_n_rec_locks(void) {
4521   ulint n_locks = 0;
4522   ulint i;
4523 
4524   /* We need exclusive access to lock_sys to iterate over all buckets */
4525   ut_ad(locksys::owns_exclusive_global_latch());
4526 
4527   for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
4528     const lock_t *lock;
4529 
4530     for (lock =
4531              static_cast<const lock_t *>(HASH_GET_FIRST(lock_sys->rec_hash, i));
4532          lock != nullptr;
4533          lock = static_cast<const lock_t *>(HASH_GET_NEXT(hash, lock))) {
4534       n_locks++;
4535     }
4536   }
4537 
4538   return (n_locks);
4539 }
4540 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
4541 
lock_print_info_summary(FILE * file)4542 void lock_print_info_summary(FILE *file) {
4543   ut_ad(locksys::owns_exclusive_global_latch());
4544 
4545   if (lock_deadlock_found) {
4546     fputs(
4547         "------------------------\n"
4548         "LATEST DETECTED DEADLOCK\n"
4549         "------------------------\n",
4550         file);
4551 
4552     if (!srv_read_only_mode) {
4553       ut_copy_file(file, lock_latest_err_file);
4554     }
4555   }
4556 
4557   fputs(
4558       "------------\n"
4559       "TRANSACTIONS\n"
4560       "------------\n",
4561       file);
4562 
4563   fprintf(file, "Trx id counter " TRX_ID_FMT "\n", trx_sys_get_max_trx_id());
4564 
4565   fprintf(file,
4566           "Purge done for trx's n:o < " TRX_ID_FMT " undo n:o < " TRX_ID_FMT
4567           " state: ",
4568           purge_sys->iter.trx_no, purge_sys->iter.undo_no);
4569 
4570   /* Note: We are reading the state without the latch. One because it
4571   will violate the latching order and two because we are merely querying
4572   the state of the variable for display. */
4573 
4574   switch (purge_sys->state) {
4575     case PURGE_STATE_INIT:
4576       /* Should never be in this state while the system is running. */
4577       fprintf(file, "initializing");
4578       break;
4579 
4580     case PURGE_STATE_EXIT:
4581       fprintf(file, "exited");
4582       break;
4583 
4584     case PURGE_STATE_DISABLED:
4585       fprintf(file, "disabled");
4586       break;
4587 
4588     case PURGE_STATE_RUN:
4589       fprintf(file, "running");
4590       /* Check if it is waiting for more data to arrive. */
4591       if (!purge_sys->running) {
4592         fprintf(file, " but idle");
4593       }
4594       break;
4595 
4596     case PURGE_STATE_STOP:
4597       fprintf(file, "stopped");
4598       break;
4599   }
4600 
4601   fprintf(file, "\n");
4602 
4603   fprintf(file, "History list length %lu\n", (ulong)trx_sys->rseg_history_len);
4604 
4605 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
4606   fprintf(file, "Total number of lock structs in row lock hash table %lu\n",
4607           (ulong)lock_get_n_rec_locks());
4608 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
4609 }
4610 
4611 /** Functor to print not-started transaction from the mysql_trx_list. */
4612 
4613 struct PrintNotStarted {
PrintNotStartedPrintNotStarted4614   PrintNotStarted(FILE *file) : m_file(file) {}
4615 
operator ()PrintNotStarted4616   void operator()(const trx_t *trx) {
4617     /* We require exclusive access to lock_sys */
4618     ut_ad(locksys::owns_exclusive_global_latch());
4619     ut_ad(trx->in_mysql_trx_list);
4620     ut_ad(mutex_own(&trx_sys->mutex));
4621 
4622     /* See state transitions and locking rules in trx0trx.h */
4623 
4624     trx_mutex_enter(trx);
4625     if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
4626       fputs("---", m_file);
4627       trx_print_latched(m_file, trx, 600);
4628     }
4629     trx_mutex_exit(trx);
4630   }
4631 
4632   FILE *m_file;
4633 };
4634 
4635 /** Iterate over a transaction's locks. Keeping track of the
4636 iterator using an ordinal value. */
4637 
4638 class TrxLockIterator {
4639  public:
TrxLockIterator()4640   TrxLockIterator() { rewind(); }
4641 
4642   /** Get the m_index(th) lock of a transaction.
4643   @return current lock or 0 */
current(const trx_t * trx) const4644   const lock_t *current(const trx_t *trx) const {
4645     lock_t *lock;
4646     ulint i = 0;
4647     /* Writes to trx->lock.trx_locks are protected by trx->mutex combined with a
4648     shared lock_sys global latch, and we assume we have the exclusive latch on
4649     lock_sys here. */
4650     ut_ad(locksys::owns_exclusive_global_latch());
4651     for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
4652          lock != nullptr && i < m_index;
4653          lock = UT_LIST_GET_NEXT(trx_locks, lock), ++i) {
4654       /* No op */
4655     }
4656 
4657     return (lock);
4658   }
4659 
4660   /** Set the ordinal value to 0 */
rewind()4661   void rewind() { m_index = 0; }
4662 
4663   /** Increment the ordinal value.
4664   @return the current index value */
next()4665   ulint next() { return (++m_index); }
4666 
4667  private:
4668   /** Current iterator position */
4669   ulint m_index;
4670 };
4671 
4672 /** This iterates over both the RW and RO trx_sys lists. We need to keep
4673 track where the iterator was up to and we do that using an ordinal value. */
4674 
4675 class TrxListIterator {
4676  public:
TrxListIterator()4677   TrxListIterator() : m_index() {
4678     /* We iterate over the RW trx list first. */
4679 
4680     m_trx_list = &trx_sys->rw_trx_list;
4681   }
4682 
4683   /** Get the current transaction whose ordinality is m_index.
4684   @return current transaction or 0 */
4685 
current()4686   const trx_t *current() { return (reposition()); }
4687 
4688   /** Advance the transaction current ordinal value and reset the
4689   transaction lock ordinal value */
4690 
next()4691   void next() {
4692     ++m_index;
4693     m_lock_iter.rewind();
4694   }
4695 
lock_iter()4696   TrxLockIterator &lock_iter() { return (m_lock_iter); }
4697 
4698  private:
4699   /** Reposition the "cursor" on the current transaction. If it
4700   is the first time then the "cursor" will be positioned on the
4701   first transaction.
4702 
4703   @return transaction instance or 0 */
reposition() const4704   const trx_t *reposition() const {
4705     ulint i;
4706     trx_t *trx;
4707 
4708     /* Make the transaction at the ordinal value of m_index
4709     the current transaction. ie. reposition/restore */
4710 
4711     for (i = 0, trx = UT_LIST_GET_FIRST(*m_trx_list);
4712          trx != nullptr && (i < m_index);
4713          trx = UT_LIST_GET_NEXT(trx_list, trx), ++i) {
4714       check_trx_state(trx);
4715     }
4716 
4717     return (trx);
4718   }
4719 
4720   /** Ordinal value of the transaction in the current transaction list */
4721   ulint m_index;
4722 
4723   /** Current transaction list */
4724   trx_ut_list_t *m_trx_list;
4725 
4726   /** For iterating over a transaction's locks */
4727   TrxLockIterator m_lock_iter;
4728 };
4729 
4730 /** Prints transaction lock wait and MVCC state.
4731 @param[in,out]	file	file where to print
4732 @param[in]	trx	transaction */
lock_trx_print_wait_and_mvcc_state(FILE * file,const trx_t * trx)4733 void lock_trx_print_wait_and_mvcc_state(FILE *file, const trx_t *trx) {
4734   /* We require exclusive lock_sys access so that trx->lock.wait_lock is
4735   not being modified, and to access trx->lock.wait_started without trx->mutex.*/
4736   ut_ad(locksys::owns_exclusive_global_latch());
4737   fprintf(file, "---");
4738 
4739   trx_print_latched(file, trx, 600);
4740 
4741   const ReadView *read_view = trx_get_read_view(trx);
4742 
4743   if (read_view != nullptr) {
4744     read_view->print_limits(file);
4745   }
4746 
4747   if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
4748     fprintf(file,
4749             "------- TRX HAS BEEN WAITING %lu SEC"
4750             " FOR THIS LOCK TO BE GRANTED:\n",
4751             (ulong)difftime(ut_time(), trx->lock.wait_started));
4752 
4753     if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
4754       lock_rec_print(file, trx->lock.wait_lock);
4755     } else {
4756       lock_table_print(file, trx->lock.wait_lock);
4757     }
4758 
4759     fprintf(file, "------------------\n");
4760   }
4761 }
4762 
4763 /** Reads the page containing the record protected by the given lock.
4764 This function will temporarily release the exclusive global latch and the
4765 trx_sys_t::mutex if the page was read from disk.
4766 @param[in]  lock  the record lock
4767 @return true if a page was successfully read from the tablespace */
lock_rec_fetch_page(const lock_t * lock)4768 static bool lock_rec_fetch_page(const lock_t *lock) {
4769   ut_ad(lock_get_type_low(lock) == LOCK_REC);
4770 
4771   const page_id_t page_id = lock->rec_lock.page_id;
4772   const space_id_t space_id = page_id.space();
4773   fil_space_t *space;
4774   bool found;
4775   const page_size_t &page_size = fil_space_get_page_size(space_id, &found);
4776 
4777   /* Check if the .ibd file exists. */
4778   if (found) {
4779     mtr_t mtr;
4780 
4781     locksys::Unsafe_global_latch_manipulator::exclusive_unlatch();
4782 
4783     mutex_exit(&trx_sys->mutex);
4784 
4785     DEBUG_SYNC_C("innodb_monitor_before_lock_page_read");
4786 
4787     /* Check if the space is exists or not. only
4788     when the space is valid, try to get the page. */
4789     space = fil_space_acquire(space_id);
4790     if (space) {
4791       mtr_start(&mtr);
4792       buf_page_get_gen(page_id, page_size, RW_NO_LATCH, nullptr,
4793                        Page_fetch::POSSIBLY_FREED, __FILE__, __LINE__, &mtr);
4794       mtr_commit(&mtr);
4795       fil_space_release(space);
4796     }
4797 
4798     locksys::Unsafe_global_latch_manipulator::exclusive_latch();
4799 
4800     mutex_enter(&trx_sys->mutex);
4801 
4802     return (true);
4803   }
4804 
4805   return (false);
4806 }
4807 
4808 /** Prints info of locks for a transaction.
4809  @return true if all printed, false if latches were released. */
lock_trx_print_locks(FILE * file,const trx_t * trx,TrxLockIterator & iter,bool load_block)4810 static bool lock_trx_print_locks(
4811     FILE *file,            /*!< in/out: File to write */
4812     const trx_t *trx,      /*!< in: current transaction */
4813     TrxLockIterator &iter, /*!< in: transaction lock iterator */
4814     bool load_block)       /*!< in: if true then read block
4815                            from disk */
4816 {
4817   const lock_t *lock;
4818   /* We require exclusive access to lock_sys */
4819   ut_ad(locksys::owns_exclusive_global_latch());
4820 
4821   /* Iterate over the transaction's locks. */
4822   while ((lock = iter.current(trx)) != nullptr) {
4823     if (lock_get_type_low(lock) == LOCK_REC) {
4824       if (load_block) {
4825         /* Note: lock_rec_fetch_page() will release both the exclusive global
4826         latch and the trx_sys_t::mutex if it does a read from disk. */
4827 
4828         if (lock_rec_fetch_page(lock)) {
4829           /* We need to resync the
4830           current transaction. */
4831           return (false);
4832         }
4833 
4834         /* It is a single table tablespace
4835         and the .ibd file is missing
4836         (DISCARD TABLESPACE probably stole the
4837         locks): just print the lock without
4838         attempting to load the page in the
4839         buffer pool. */
4840 
4841         fprintf(file,
4842                 "RECORD LOCKS on non-existing"
4843                 " space %u\n",
4844                 lock->rec_lock.page_id.space());
4845       }
4846 
4847       /* Print all the record locks on the page from
4848       the record lock bitmap */
4849 
4850       lock_rec_print(file, lock);
4851 
4852       load_block = true;
4853 
4854     } else {
4855       ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
4856 
4857       lock_table_print(file, lock);
4858     }
4859 
4860     if (iter.next() >= 10) {
4861       fprintf(file,
4862               "10 LOCKS PRINTED FOR THIS TRX:"
4863               " SUPPRESSING FURTHER PRINTS\n");
4864 
4865       break;
4866     }
4867   }
4868 
4869   return (true);
4870 }
4871 
lock_print_info_all_transactions(FILE * file)4872 void lock_print_info_all_transactions(FILE *file) {
4873   /* We require exclusive access to lock_sys */
4874   ut_ad(locksys::owns_exclusive_global_latch());
4875 
4876   fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
4877 
4878   mutex_enter(&trx_sys->mutex);
4879 
4880   /* First print info on non-active transactions */
4881 
4882   /* NOTE: information of auto-commit non-locking read-only
4883   transactions will be omitted here. The information will be
4884   available from INFORMATION_SCHEMA.INNODB_TRX. */
4885 
4886   PrintNotStarted print_not_started(file);
4887   ut_list_map(trx_sys->mysql_trx_list, print_not_started);
4888 
4889   const trx_t *trx;
4890   TrxListIterator trx_iter;
4891   const trx_t *prev_trx = nullptr;
4892 
4893   /* Control whether a block should be fetched from the buffer pool. */
4894   bool load_block = true;
4895   bool monitor = srv_print_innodb_lock_monitor;
4896 
4897   while ((trx = trx_iter.current()) != nullptr) {
4898     check_trx_state(trx);
4899 
4900     if (trx != prev_trx) {
4901       lock_trx_print_wait_and_mvcc_state(file, trx);
4902       prev_trx = trx;
4903 
4904       /* The transaction that read in the page is no
4905       longer the one that read the page in. We need to
4906       force a page read. */
4907       load_block = true;
4908     }
4909 
4910     /* If we need to print the locked record contents then we
4911     need to fetch the containing block from the buffer pool. */
4912     if (monitor) {
4913       /* Print the locks owned by the current transaction. */
4914       TrxLockIterator &lock_iter = trx_iter.lock_iter();
4915 
4916       if (!lock_trx_print_locks(file, trx, lock_iter, load_block)) {
4917         /* Resync trx_iter, the trx_sys->mutex and exclusive global latch were
4918         temporarily released. A page was successfully read in. We need to print
4919         its contents on the next call to lock_trx_print_locks(). On the next
4920         call to lock_trx_print_locks() we should simply print the contents of
4921         the page just read in.*/
4922         load_block = false;
4923 
4924         continue;
4925       }
4926     }
4927 
4928     load_block = true;
4929 
4930     /* All record lock details were printed without fetching
4931     a page from disk, or we didn't need to print the detail. */
4932     trx_iter.next();
4933   }
4934 
4935   mutex_exit(&trx_sys->mutex);
4936 }
4937 
4938 #ifdef UNIV_DEBUG
4939 /** Check if the lock exists in the trx_t::trx_lock_t::table_locks vector.
4940 @param[in]    trx         the trx to validate
4941 @param[in]    find_lock   lock to find
4942 @return true if found */
lock_trx_table_locks_find(const trx_t * trx,const lock_t * find_lock)4943 static bool lock_trx_table_locks_find(const trx_t *trx,
4944                                       const lock_t *find_lock) {
4945   /* We will access trx->lock.table_locks so we need trx->mutex */
4946   trx_mutex_enter(trx);
4947 
4948   typedef lock_pool_t::const_reverse_iterator iterator;
4949 
4950   const iterator end = trx->lock.table_locks.rend();
4951   const iterator begin = trx->lock.table_locks.rbegin();
4952   const bool found = std::find(begin, end, find_lock) != end;
4953 
4954   trx_mutex_exit(trx);
4955 
4956   return (found);
4957 }
4958 
4959 /** Validates the lock queue on a table.
4960  @return true if ok */
lock_table_queue_validate(const dict_table_t * table)4961 static bool lock_table_queue_validate(
4962     const dict_table_t *table) /*!< in: table */
4963 {
4964   const lock_t *lock;
4965 
4966   /* We actually hold exclusive latch here, but we require just the shard */
4967   ut_ad(locksys::owns_table_shard(*table));
4968   ut_ad(trx_sys_mutex_own());
4969 
4970   for (lock = UT_LIST_GET_FIRST(table->locks); lock != nullptr;
4971        lock = UT_LIST_GET_NEXT(tab_lock.locks, lock)) {
4972     /* lock->trx->state cannot change from or to NOT_STARTED
4973     while we are holding the trx_sys->mutex. It may change
4974     from ACTIVE to PREPARED. It may become COMMITTED_IN_MEMORY even though we
4975     hold trx_sys->mutex in case it has trx->id==0, but even in this case it
4976     will not be freed until it can release the table lock, and we prevent
4977     this by latching its shard. */
4978     ut_ad(trx_assert_started(lock->trx));
4979 
4980     if (!lock_get_wait(lock)) {
4981       ut_a(!lock_table_other_has_incompatible(lock->trx, 0, table,
4982                                               lock_get_mode(lock)));
4983     } else {
4984       ut_a(lock_table_has_to_wait_in_queue(lock));
4985     }
4986 
4987     ut_a(lock_trx_table_locks_find(lock->trx, lock));
4988   }
4989 
4990   return (true);
4991 }
4992 namespace locksys {
4993 /** Validates the lock queue on a single record.
4994 @param[in]  block     buffer block containing rec
4995 @param[in]  rec       record to look at
4996 @param[in]  index     index, or NULL if not known
4997 @param[in]  offsets   rec_get_offsets(rec, index) */
rec_queue_validate_latched(const buf_block_t * block,const rec_t * rec,const dict_index_t * index,const ulint * offsets)4998 static void rec_queue_validate_latched(const buf_block_t *block,
4999                                        const rec_t *rec,
5000                                        const dict_index_t *index,
5001                                        const ulint *offsets) {
5002   ut_ad(owns_page_shard(block->get_page_id()));
5003   ut_ad(mutex_own(&trx_sys->mutex));
5004   ut_a(rec);
5005   ut_a(block->frame == page_align(rec));
5006   ut_ad(rec_offs_validate(rec, index, offsets));
5007   ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
5008   ut_ad(!index || index->is_clustered() || !dict_index_is_online_ddl(index));
5009 
5010   ulint heap_no = page_rec_get_heap_no(rec);
5011   RecID rec_id{block, heap_no};
5012 
5013   if (!page_rec_is_user_rec(rec)) {
5014     Lock_iter::for_each(rec_id, [&](lock_t *lock) {
5015       ut_ad(!trx_is_ac_nl_ro(lock->trx));
5016 
5017       if (lock->is_waiting()) {
5018         ut_a(lock_rec_has_to_wait_in_queue(lock));
5019       }
5020 
5021       if (index != nullptr) {
5022         ut_a(lock->index == index);
5023       }
5024 
5025       return (true);
5026     });
5027 
5028     return;
5029   }
5030 
5031   if (index == nullptr) {
5032     /* Nothing we can do */
5033 
5034   } else if (index->is_clustered()) {
5035     trx_id_t trx_id;
5036 
5037     /* Unlike the non-debug code, this invariant can only succeed
5038     if the check and assertion are covered by the lock_sys latch. */
5039 
5040     trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
5041 
5042     const trx_t *impl_trx = trx_rw_is_active_low(trx_id, nullptr);
5043     if (impl_trx != nullptr) {
5044       ut_ad(owns_page_shard(block->get_page_id()));
5045       ut_ad(trx_sys_mutex_own());
5046       /* impl_trx cannot become TRX_STATE_COMMITTED_IN_MEMORY nor removed from
5047       rw_trx_set until we release trx_sys->mutex, which means that currently all
5048       other threads in the system consider this impl_trx active and thus should
5049       respect implicit locks held by impl_trx*/
5050 
5051       const lock_t *other_lock =
5052           lock_rec_other_has_expl_req(LOCK_S, block, true, heap_no, impl_trx);
5053 
5054       /* The impl_trx is holding an implicit lock on the
5055       given record 'rec'. So there cannot be another
5056       explicit granted lock.  Also, there can be another
5057       explicit waiting lock only if the impl_trx has an
5058       explicit granted lock. */
5059 
5060       if (other_lock != nullptr) {
5061         ut_a(lock_get_wait(other_lock));
5062         ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no,
5063                                impl_trx));
5064       }
5065     }
5066   }
5067 
5068   Lock_iter::for_each(rec_id, [&](lock_t *lock) {
5069     ut_ad(!trx_is_ac_nl_ro(lock->trx));
5070 
5071     if (index != nullptr) {
5072       ut_a(lock->index == index);
5073     }
5074 
5075     if (!lock->is_gap() && !lock->is_waiting()) {
5076       lock_mode mode;
5077 
5078       if (lock_get_mode(lock) == LOCK_S) {
5079         mode = LOCK_X;
5080       } else {
5081         mode = LOCK_S;
5082       }
5083 
5084       const lock_t *other_lock =
5085           lock_rec_other_has_expl_req(mode, block, false, heap_no, lock->trx);
5086 
5087       ut_a(!other_lock);
5088 
5089     } else if (lock->is_waiting() && !lock->is_gap()) {
5090       ut_a(lock_rec_has_to_wait_in_queue(lock));
5091     }
5092 
5093     return (true);
5094   });
5095 }
5096 
5097 /** Validates the lock queue on a single record.
5098 @param[in]  block     buffer block containing rec
5099 @param[in]  rec       record to look at
5100 @param[in]  index     index, or NULL if not known
5101 @param[in]  offsets   rec_get_offsets(rec, index) */
rec_queue_latch_and_validate(const buf_block_t * block,const rec_t * rec,const dict_index_t * index,const ulint * offsets)5102 static void rec_queue_latch_and_validate(const buf_block_t *block,
5103                                          const rec_t *rec,
5104                                          const dict_index_t *index,
5105                                          const ulint *offsets) {
5106   ut_ad(!owns_exclusive_global_latch());
5107   ut_ad(!mutex_own(&trx_sys->mutex));
5108 
5109   Shard_latch_guard guard{block->get_page_id()};
5110   mutex_enter(&trx_sys->mutex);
5111   rec_queue_validate_latched(block, rec, index, offsets);
5112   mutex_exit(&trx_sys->mutex);
5113 }
5114 
5115 /** Validates the lock queue on a single record.
5116 @param[in]  block     buffer block containing rec
5117 @param[in]  rec       record to look at
5118 @param[in]  index     index, or NULL if not known */
rec_queue_latch_and_validate(const buf_block_t * block,const rec_t * rec,const dict_index_t * index)5119 static void rec_queue_latch_and_validate(const buf_block_t *block,
5120                                          const rec_t *rec,
5121                                          const dict_index_t *index) {
5122   rec_queue_latch_and_validate(block, rec, index,
5123                                Rec_offsets().compute(rec, index));
5124 }
5125 }  // namespace locksys
5126 
5127 /** Validates the record lock queues on a page.
5128  @return true if ok */
lock_rec_validate_page(const buf_block_t * block)5129 static bool lock_rec_validate_page(
5130     const buf_block_t *block) /*!< in: buffer block */
5131 {
5132   const lock_t *lock;
5133   const rec_t *rec;
5134   ulint nth_lock = 0;
5135   ulint nth_bit = 0;
5136   ulint i;
5137   Rec_offsets offsets;
5138 
5139   ut_ad(!locksys::owns_exclusive_global_latch());
5140 
5141   locksys::Shard_latch_guard guard{block->get_page_id()};
5142   mutex_enter(&trx_sys->mutex);
5143 loop:
5144   lock =
5145       lock_rec_get_first_on_page_addr(lock_sys->rec_hash, block->get_page_id());
5146 
5147   if (!lock) {
5148     goto function_exit;
5149   }
5150 
5151   ut_ad(!block->page.file_page_was_freed);
5152 
5153   for (i = 0; i < nth_lock; i++) {
5154     lock = lock_rec_get_next_on_page_const(lock);
5155 
5156     if (!lock) {
5157       goto function_exit;
5158     }
5159   }
5160 
5161   ut_ad(!trx_is_ac_nl_ro(lock->trx));
5162 
5163   if (!sync_check_find(SYNC_FSP))
5164     for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
5165       if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
5166         rec = page_find_rec_with_heap_no(block->frame, i);
5167         ut_a(rec);
5168 
5169         /* If this thread is holding the file space
5170         latch (fil_space_t::latch), the following
5171         check WILL break the latching order and may
5172         cause a deadlock of threads. */
5173 
5174         locksys::rec_queue_validate_latched(block, rec, lock->index,
5175                                             offsets.compute(rec, lock->index));
5176 
5177         nth_bit = i + 1;
5178 
5179         goto loop;
5180       }
5181     }
5182 
5183   nth_bit = 0;
5184   nth_lock++;
5185 
5186   goto loop;
5187 
5188 function_exit:
5189   mutex_exit(&trx_sys->mutex);
5190 
5191   return (true);
5192 }
5193 
5194 /** Validates the table locks.
5195  @return true if ok */
lock_validate_table_locks(const trx_ut_list_t * trx_list)5196 static bool lock_validate_table_locks(
5197     const trx_ut_list_t *trx_list) /*!< in: trx list */
5198 {
5199   const trx_t *trx;
5200 
5201   /* We need exclusive access to lock_sys to iterate over trxs' locks */
5202   ut_ad(locksys::owns_exclusive_global_latch());
5203   ut_ad(trx_sys_mutex_own());
5204 
5205   ut_ad(trx_list == &trx_sys->rw_trx_list);
5206 
5207   for (trx = UT_LIST_GET_FIRST(*trx_list); trx != nullptr;
5208        trx = UT_LIST_GET_NEXT(trx_list, trx)) {
5209     const lock_t *lock;
5210 
5211     check_trx_state(trx);
5212 
5213     for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); lock != nullptr;
5214          lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
5215       if (lock_get_type_low(lock) & LOCK_TABLE) {
5216         lock_table_queue_validate(lock->tab_lock.table);
5217       }
5218     }
5219   }
5220 
5221   return (true);
5222 }
5223 
5224 /** Validate a record lock's block */
lock_rec_block_validate(const page_id_t & page_id)5225 static void lock_rec_block_validate(const page_id_t &page_id) {
5226   /* The lock and the block that it is referring to may be freed at
5227   this point. We pass Page_fetch::POSSIBLY_FREED to skip a debug check.
5228   If the lock exists in lock_rec_validate_page() we assert
5229   !block->page.file_page_was_freed. */
5230 
5231   buf_block_t *block;
5232   mtr_t mtr;
5233 
5234   /* Make sure that the tablespace is not deleted while we are
5235   trying to access the page. */
5236   if (fil_space_t *space = fil_space_acquire(page_id.space())) {
5237     mtr_start(&mtr);
5238 
5239     block = buf_page_get_gen(page_id, page_size_t(space->flags), RW_X_LATCH,
5240                              nullptr, Page_fetch::POSSIBLY_FREED, __FILE__,
5241                              __LINE__, &mtr);
5242 
5243     buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
5244 
5245     ut_ad(lock_rec_validate_page(block));
5246     mtr_commit(&mtr);
5247 
5248     fil_space_release(space);
5249   }
5250 }
5251 
lock_validate()5252 bool lock_validate() {
5253   typedef std::set<page_id_t, std::less<page_id_t>, ut_allocator<page_id_t>>
5254       page_addr_set;
5255 
5256   page_addr_set pages;
5257   {
5258     /* lock_validate_table_locks() needs exclusive global latch, and we will
5259     inspect record locks from all shards */
5260     locksys::Global_exclusive_latch_guard guard{};
5261     mutex_enter(&trx_sys->mutex);
5262 
5263     ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list));
5264 
5265     /* Iterate over all the record locks and validate the locks. We
5266     don't want to hog the lock_sys global latch and the trx_sys_t::mutex.
5267     Thus we release both latches before the validation check. */
5268 
5269     for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
5270       for (const lock_t *lock = static_cast<const lock_t *>(
5271                HASH_GET_FIRST(lock_sys->rec_hash, i));
5272            lock != nullptr;
5273            lock = static_cast<const lock_t *>(HASH_GET_NEXT(hash, lock))) {
5274         ut_ad(!trx_is_ac_nl_ro(lock->trx));
5275         ut_ad(lock_get_type(lock) == LOCK_REC);
5276         pages.emplace(lock->rec_lock.page_id);
5277       }
5278     }
5279 
5280     mutex_exit(&trx_sys->mutex);
5281   }
5282   std::for_each(pages.cbegin(), pages.cend(), lock_rec_block_validate);
5283 
5284   return (true);
5285 }
5286 #endif /* UNIV_DEBUG */
5287 /*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
5288 
5289 /** Checks if locks of other transactions prevent an immediate insert of
5290  a record. If they do, first tests if the query thread should anyway
5291  be suspended for some reason; if not, then puts the transaction and
5292  the query thread to the lock wait state and inserts a waiting request
5293  for a gap x-lock to the lock queue.
5294  @return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
lock_rec_insert_check_and_lock(ulint flags,const rec_t * rec,buf_block_t * block,dict_index_t * index,que_thr_t * thr,mtr_t * mtr,ibool * inherit)5295 dberr_t lock_rec_insert_check_and_lock(
5296     ulint flags,         /*!< in: if BTR_NO_LOCKING_FLAG bit is
5297                          set, does nothing */
5298     const rec_t *rec,    /*!< in: record after which to insert */
5299     buf_block_t *block,  /*!< in/out: buffer block of rec */
5300     dict_index_t *index, /*!< in: index */
5301     que_thr_t *thr,      /*!< in: query thread */
5302     mtr_t *mtr,          /*!< in/out: mini-transaction */
5303     ibool *inherit)      /*!< out: set to true if the new
5304                          inserted record maybe should inherit
5305                          LOCK_GAP type locks from the successor
5306                          record */
5307 {
5308   ut_ad(block->frame == page_align(rec));
5309   ut_ad(!dict_index_is_online_ddl(index) || index->is_clustered() ||
5310         (flags & BTR_CREATE_FLAG));
5311 
5312   if (flags & BTR_NO_LOCKING_FLAG) {
5313     return (DB_SUCCESS);
5314   }
5315 
5316   ut_ad(!index->table->is_temporary());
5317 
5318   dberr_t err = DB_SUCCESS;
5319   lock_t *lock;
5320   ibool inherit_in = *inherit;
5321   trx_t *trx = thr_get_trx(thr);
5322   const rec_t *next_rec = page_rec_get_next_const(rec);
5323   ulint heap_no = page_rec_get_heap_no(next_rec);
5324 
5325   {
5326     locksys::Shard_latch_guard guard{block->get_page_id()};
5327 
5328     /* When inserting a record into an index, the table must be at
5329     least IX-locked. When we are building an index, we would pass
5330     BTR_NO_LOCKING_FLAG and skip the locking altogether. */
5331     ut_ad(lock_table_has(trx, index->table, LOCK_IX));
5332 
5333     /* Spatial index does not use GAP lock protection. It uses
5334     "predicate lock" to protect the "range" */
5335     ut_ad(!dict_index_is_spatial(index));
5336 
5337     lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
5338 
5339     if (lock == nullptr) {
5340       *inherit = false;
5341     } else {
5342       *inherit = true;
5343 
5344       /* If another transaction has an explicit lock request which locks
5345       the gap, waiting or granted, on the successor, the insert has to wait.
5346 
5347       An exception is the case where the lock by the another transaction
5348       is a gap type lock which it placed to wait for its turn to insert. We
5349       do not consider that kind of a lock conflicting with our insert. This
5350       eliminates an unnecessary deadlock which resulted when 2 transactions
5351       had to wait for their insert. Both had waiting gap type lock requests
5352       on the successor, which produced an unnecessary deadlock. */
5353 
5354       const ulint type_mode = LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION;
5355 
5356       const lock_t *wait_for =
5357           lock_rec_other_has_conflicting(type_mode, block, heap_no, trx);
5358 
5359       if (wait_for != nullptr) {
5360         RecLock rec_lock(thr, index, block, heap_no, type_mode);
5361 
5362         trx_mutex_enter(trx);
5363 
5364         err = rec_lock.add_to_waitq(wait_for);
5365 
5366         trx_mutex_exit(trx);
5367       }
5368     }
5369   } /* Shard_latch_guard */
5370 
5371   switch (err) {
5372     case DB_SUCCESS_LOCKED_REC:
5373       err = DB_SUCCESS;
5374       /* fall through */
5375     case DB_SUCCESS:
5376       if (!inherit_in || index->is_clustered()) {
5377         break;
5378       }
5379 
5380       /* Update the page max trx id field */
5381       page_update_max_trx_id(block, buf_block_get_page_zip(block), trx->id,
5382                              mtr);
5383     default:
5384       /* We only care about the two return values. */
5385       break;
5386   }
5387 
5388   ut_d(locksys::rec_queue_latch_and_validate(block, next_rec, index));
5389   ut_ad(err == DB_SUCCESS || err == DB_LOCK_WAIT || err == DB_DEADLOCK);
5390 
5391   return (err);
5392 }
5393 
5394 /** Creates an explicit record lock for a running transaction that currently
5395  only has an implicit lock on the record. The transaction instance must have a
5396  reference count > 0 so that it can't be committed and freed before this
5397  function has completed. */
lock_rec_convert_impl_to_expl_for_trx(const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,trx_t * trx,ulint heap_no)5398 static void lock_rec_convert_impl_to_expl_for_trx(
5399     const buf_block_t *block, /*!< in: buffer block of rec */
5400     const rec_t *rec,         /*!< in: user record on page */
5401     dict_index_t *index,      /*!< in: index of record */
5402     const ulint *offsets,     /*!< in: rec_get_offsets(rec, index) */
5403     trx_t *trx,               /*!< in/out: active transaction */
5404     ulint heap_no)            /*!< in: rec heap number to lock */
5405 {
5406   ut_ad(trx_is_referenced(trx));
5407 
5408   DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
5409   {
5410     locksys::Shard_latch_guard guard{block->get_page_id()};
5411     /* This trx->mutex acquisition here is not really needed.
5412     Its purpose is to prevent a state transition between calls to trx_state_eq()
5413     and lock_rec_add_to_queue().
5414     But one can prove, that even if the state did change, it is not
5415     a big problem, because we still keep reference count from dropping
5416     to zero, so the trx object is still in use, and we hold the shard latched,
5417     so trx can not release its explicit lock (if it has any) so we will
5418     notice the explicit lock in lock_rec_has_expl.
5419     On the other hand if trx does not have explicit lock, then we would create
5420     one on its behalf, which is wasteful, but does not cause a problem, as once
5421     the reference count drops to zero the trx will notice and remove this new
5422     explicit lock. Also, even if some other trx had observed that trx is already
5423     removed from rw trxs list and thus ignored the implicit lock and decided to
5424     add its own lock, it will still have to wait for shard latch before adding
5425     her lock. However it does not cost us much to simply take the trx->mutex
5426     and avoid this whole shaky reasoning. */
5427     trx_mutex_enter(trx);
5428 
5429     ut_ad(!index->is_clustered() ||
5430           trx->id ==
5431               lock_clust_rec_some_has_impl(
5432                   rec, index,
5433                   offsets ? offsets : Rec_offsets().compute(rec, index)));
5434 
5435     ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
5436 
5437     if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) &&
5438         !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no, trx)) {
5439       ulint type_mode;
5440 
5441       type_mode = (LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP);
5442 
5443       lock_rec_add_to_queue(type_mode, block, heap_no, index, trx, true);
5444     }
5445 
5446     trx_mutex_exit(trx);
5447   }
5448 
5449   trx_release_reference(trx);
5450 
5451   DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx");
5452 }
5453 
5454 /** If a transaction has an implicit x-lock on a record, but no explicit x-lock
5455 set on the record, sets one for it.
5456 @param[in]	block		buffer block of rec
5457 @param[in]	rec		user record on page
5458 @param[in]	index		index of record
5459 @param[in]	offsets		rec_get_offsets(rec, index) */
lock_rec_convert_impl_to_expl(const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets)5460 static void lock_rec_convert_impl_to_expl(const buf_block_t *block,
5461                                           const rec_t *rec, dict_index_t *index,
5462                                           const ulint *offsets) {
5463   trx_t *trx;
5464 
5465   ut_ad(!locksys::owns_exclusive_global_latch());
5466   ut_ad(page_rec_is_user_rec(rec));
5467   ut_ad(rec_offs_validate(rec, index, offsets));
5468   ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
5469 
5470   DEBUG_SYNC_C("lock_rec_convert_impl_to_expl");
5471 
5472   if (index->is_clustered()) {
5473     trx_id_t trx_id;
5474 
5475     trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
5476 
5477     trx = trx_rw_is_active(trx_id, nullptr, true);
5478   } else {
5479     ut_ad(!dict_index_is_online_ddl(index));
5480 
5481     trx = lock_sec_rec_some_has_impl(rec, index, offsets);
5482     if (trx) {
5483       DEBUG_SYNC_C("lock_rec_convert_impl_to_expl_will_validate");
5484       ut_ad(!lock_rec_other_trx_holds_expl(LOCK_S | LOCK_REC_NOT_GAP, trx, rec,
5485                                            block));
5486     }
5487   }
5488 
5489   if (trx != nullptr) {
5490     ulint heap_no = page_rec_get_heap_no(rec);
5491 
5492     ut_ad(trx_is_referenced(trx));
5493 
5494     /* If the transaction is still active and has no
5495     explicit x-lock set on the record, set one for it.
5496     trx cannot be committed until the ref count is zero. */
5497 
5498     lock_rec_convert_impl_to_expl_for_trx(block, rec, index, offsets, trx,
5499                                           heap_no);
5500   }
5501 }
5502 
lock_rec_convert_active_impl_to_expl(const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,trx_t * trx,ulint heap_no)5503 void lock_rec_convert_active_impl_to_expl(const buf_block_t *block,
5504                                           const rec_t *rec, dict_index_t *index,
5505                                           const ulint *offsets, trx_t *trx,
5506                                           ulint heap_no) {
5507   trx_reference(trx, true);
5508   lock_rec_convert_impl_to_expl_for_trx(block, rec, index, offsets, trx,
5509                                         heap_no);
5510 }
5511 
5512 /** Checks if locks of other transactions prevent an immediate modify (update,
5513  delete mark, or delete unmark) of a clustered index record. If they do,
5514  first tests if the query thread should anyway be suspended for some
5515  reason; if not, then puts the transaction and the query thread to the
5516  lock wait state and inserts a waiting request for a record x-lock to the
5517  lock queue.
5518  @return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
lock_clust_rec_modify_check_and_lock(ulint flags,const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,que_thr_t * thr)5519 dberr_t lock_clust_rec_modify_check_and_lock(
5520     ulint flags,              /*!< in: if BTR_NO_LOCKING_FLAG
5521                               bit is set, does nothing */
5522     const buf_block_t *block, /*!< in: buffer block of rec */
5523     const rec_t *rec,         /*!< in: record which should be
5524                               modified */
5525     dict_index_t *index,      /*!< in: clustered index */
5526     const ulint *offsets,     /*!< in: rec_get_offsets(rec, index) */
5527     que_thr_t *thr)           /*!< in: query thread */
5528 {
5529   dberr_t err;
5530   ulint heap_no;
5531 
5532   ut_ad(rec_offs_validate(rec, index, offsets));
5533   ut_ad(index->is_clustered());
5534   ut_ad(block->frame == page_align(rec));
5535 
5536   if (flags & BTR_NO_LOCKING_FLAG) {
5537     return (DB_SUCCESS);
5538   }
5539   ut_ad(!index->table->is_temporary());
5540 
5541   heap_no = rec_offs_comp(offsets) ? rec_get_heap_no_new(rec)
5542                                    : rec_get_heap_no_old(rec);
5543 
5544   /* If a transaction has no explicit x-lock set on the record, set one
5545   for it */
5546 
5547   lock_rec_convert_impl_to_expl(block, rec, index, offsets);
5548 
5549   {
5550     locksys::Shard_latch_guard guard{block->get_page_id()};
5551     ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5552 
5553     err = lock_rec_lock(true, SELECT_ORDINARY, LOCK_X | LOCK_REC_NOT_GAP, block,
5554                         heap_no, index, thr);
5555 
5556     MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
5557   }
5558 
5559   ut_d(locksys::rec_queue_latch_and_validate(block, rec, index, offsets));
5560 
5561   if (err == DB_SUCCESS_LOCKED_REC) {
5562     err = DB_SUCCESS;
5563   }
5564   ut_ad(err == DB_SUCCESS || err == DB_LOCK_WAIT || err == DB_DEADLOCK);
5565   return (err);
5566 }
5567 
5568 /** Checks if locks of other transactions prevent an immediate modify (delete
5569  mark or delete unmark) of a secondary index record.
5570  @return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
lock_sec_rec_modify_check_and_lock(ulint flags,buf_block_t * block,const rec_t * rec,dict_index_t * index,que_thr_t * thr,mtr_t * mtr)5571 dberr_t lock_sec_rec_modify_check_and_lock(
5572     ulint flags,         /*!< in: if BTR_NO_LOCKING_FLAG
5573                          bit is set, does nothing */
5574     buf_block_t *block,  /*!< in/out: buffer block of rec */
5575     const rec_t *rec,    /*!< in: record which should be
5576                          modified; NOTE: as this is a secondary
5577                          index, we always have to modify the
5578                          clustered index record first: see the
5579                          comment below */
5580     dict_index_t *index, /*!< in: secondary index */
5581     que_thr_t *thr,      /*!< in: query thread
5582                          (can be NULL if BTR_NO_LOCKING_FLAG) */
5583     mtr_t *mtr)          /*!< in/out: mini-transaction */
5584 {
5585   dberr_t err;
5586   ulint heap_no;
5587 
5588   ut_ad(!index->is_clustered());
5589   ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
5590   ut_ad(block->frame == page_align(rec));
5591 
5592   if (flags & BTR_NO_LOCKING_FLAG) {
5593     return (DB_SUCCESS);
5594   }
5595   ut_ad(!index->table->is_temporary());
5596 
5597   heap_no = page_rec_get_heap_no(rec);
5598 
5599   /* Another transaction cannot have an implicit lock on the record,
5600   because when we come here, we already have modified the clustered
5601   index record, and this would not have been possible if another active
5602   transaction had modified this secondary index record. */
5603   {
5604     locksys::Shard_latch_guard guard{block->get_page_id()};
5605 
5606     ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5607 
5608     err = lock_rec_lock(true, SELECT_ORDINARY, LOCK_X | LOCK_REC_NOT_GAP, block,
5609                         heap_no, index, thr);
5610 
5611     MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
5612   }
5613 
5614   ut_d(locksys::rec_queue_latch_and_validate(block, rec, index));
5615 
5616   if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) {
5617     /* Update the page max trx id field */
5618     /* It might not be necessary to do this if
5619     err == DB_SUCCESS (no new lock created),
5620     but it should not cost too much performance. */
5621     page_update_max_trx_id(block, buf_block_get_page_zip(block),
5622                            thr_get_trx(thr)->id, mtr);
5623     err = DB_SUCCESS;
5624   }
5625   ut_ad(err == DB_SUCCESS || err == DB_LOCK_WAIT || err == DB_DEADLOCK);
5626   return (err);
5627 }
5628 
lock_sec_rec_read_check_and_lock(const lock_duration_t duration,const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,const select_mode sel_mode,const lock_mode mode,const ulint gap_mode,que_thr_t * thr)5629 dberr_t lock_sec_rec_read_check_and_lock(
5630     const lock_duration_t duration, const buf_block_t *block, const rec_t *rec,
5631     dict_index_t *index, const ulint *offsets, const select_mode sel_mode,
5632     const lock_mode mode, const ulint gap_mode, que_thr_t *thr) {
5633   dberr_t err;
5634   ulint heap_no;
5635 
5636   ut_ad(!index->is_clustered());
5637   ut_ad(!dict_index_is_online_ddl(index));
5638   ut_ad(block->frame == page_align(rec));
5639   ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
5640   ut_ad(rec_offs_validate(rec, index, offsets));
5641   ut_ad(mode == LOCK_X || mode == LOCK_S);
5642 
5643   if (srv_read_only_mode || index->table->is_temporary()) {
5644     return (DB_SUCCESS);
5645   }
5646 
5647   heap_no = page_rec_get_heap_no(rec);
5648 
5649   /* Some transaction may have an implicit x-lock on the record only
5650   if the max trx id for the page >= min trx id for the trx list or a
5651   database recovery is running. */
5652 
5653   if ((page_get_max_trx_id(block->frame) >= trx_rw_min_trx_id() ||
5654        recv_recovery_is_on()) &&
5655       !page_rec_is_supremum(rec)) {
5656     lock_rec_convert_impl_to_expl(block, rec, index, offsets);
5657   }
5658   {
5659     locksys::Shard_latch_guard guard{block->get_page_id()};
5660 
5661     if (duration == lock_duration_t::AT_LEAST_STATEMENT) {
5662       lock_protect_locks_till_statement_end(thr);
5663     }
5664 
5665     ut_ad(mode != LOCK_X ||
5666           lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5667     ut_ad(mode != LOCK_S ||
5668           lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
5669 
5670     err = lock_rec_lock(false, sel_mode, mode | gap_mode, block, heap_no, index,
5671                         thr);
5672 
5673     MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
5674   }
5675   DEBUG_SYNC_C("lock_sec_rec_read_check_and_lock_has_locked");
5676 
5677   ut_d(locksys::rec_queue_latch_and_validate(block, rec, index, offsets));
5678   ut_ad(err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC ||
5679         err == DB_LOCK_WAIT || err == DB_DEADLOCK || err == DB_SKIP_LOCKED ||
5680         err == DB_LOCK_NOWAIT);
5681   return (err);
5682 }
5683 
lock_clust_rec_read_check_and_lock(const lock_duration_t duration,const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,const select_mode sel_mode,const lock_mode mode,const ulint gap_mode,que_thr_t * thr)5684 dberr_t lock_clust_rec_read_check_and_lock(
5685     const lock_duration_t duration, const buf_block_t *block, const rec_t *rec,
5686     dict_index_t *index, const ulint *offsets, const select_mode sel_mode,
5687     const lock_mode mode, const ulint gap_mode, que_thr_t *thr) {
5688   dberr_t err;
5689   ulint heap_no;
5690   DEBUG_SYNC_C("before_lock_clust_rec_read_check_and_lock");
5691   ut_ad(index->is_clustered());
5692   ut_ad(block->frame == page_align(rec));
5693   ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
5694   ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP ||
5695         gap_mode == LOCK_REC_NOT_GAP);
5696   ut_ad(rec_offs_validate(rec, index, offsets));
5697 
5698   if (srv_read_only_mode || index->table->is_temporary()) {
5699     return (DB_SUCCESS);
5700   }
5701 
5702   heap_no = page_rec_get_heap_no(rec);
5703 
5704   if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
5705     lock_rec_convert_impl_to_expl(block, rec, index, offsets);
5706   }
5707 
5708   DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock_impl_to_expl");
5709   {
5710     locksys::Shard_latch_guard guard{block->get_page_id()};
5711 
5712     if (duration == lock_duration_t::AT_LEAST_STATEMENT) {
5713       lock_protect_locks_till_statement_end(thr);
5714     }
5715 
5716     ut_ad(mode != LOCK_X ||
5717           lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5718     ut_ad(mode != LOCK_S ||
5719           lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
5720 
5721     err = lock_rec_lock(false, sel_mode, mode | gap_mode, block, heap_no, index,
5722                         thr);
5723 
5724     MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
5725   }
5726 
5727   ut_d(locksys::rec_queue_latch_and_validate(block, rec, index, offsets));
5728 
5729   DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock");
5730   ut_ad(err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC ||
5731         err == DB_LOCK_WAIT || err == DB_DEADLOCK || err == DB_SKIP_LOCKED ||
5732         err == DB_LOCK_NOWAIT);
5733   return (err);
5734 }
5735 /** Checks if locks of other transactions prevent an immediate read, or passing
5736  over by a read cursor, of a clustered index record. If they do, first tests
5737  if the query thread should anyway be suspended for some reason; if not, then
5738  puts the transaction and the query thread to the lock wait state and inserts a
5739  waiting request for a record lock to the lock queue. Sets the requested mode
5740  lock on the record. This is an alternative version of
5741  lock_clust_rec_read_check_and_lock() that does not require the parameter
5742  "offsets".
5743  @return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
lock_clust_rec_read_check_and_lock_alt(const buf_block_t * block,const rec_t * rec,dict_index_t * index,lock_mode mode,ulint gap_mode,que_thr_t * thr)5744 dberr_t lock_clust_rec_read_check_and_lock_alt(
5745     const buf_block_t *block, /*!< in: buffer block of rec */
5746     const rec_t *rec,         /*!< in: user record or page
5747                               supremum record which should
5748                               be read or passed over by a
5749                               read cursor */
5750     dict_index_t *index,      /*!< in: clustered index */
5751     lock_mode mode,           /*!< in: mode of the lock which
5752                               the read cursor should set on
5753                               records: LOCK_S or LOCK_X; the
5754                               latter is possible in
5755                               SELECT FOR UPDATE */
5756     ulint gap_mode,           /*!< in: LOCK_ORDINARY, LOCK_GAP, or
5757                              LOCK_REC_NOT_GAP */
5758     que_thr_t *thr)           /*!< in: query thread */
5759 {
5760   dberr_t err = lock_clust_rec_read_check_and_lock(
5761       lock_duration_t::REGULAR, block, rec, index,
5762       Rec_offsets().compute(rec, index), SELECT_ORDINARY, mode, gap_mode, thr);
5763 
5764   if (err == DB_SUCCESS_LOCKED_REC) {
5765     err = DB_SUCCESS;
5766   }
5767   ut_ad(err == DB_SUCCESS || err == DB_LOCK_WAIT || err == DB_DEADLOCK);
5768   return (err);
5769 }
5770 
5771 /** Release the last lock from the transaction's autoinc locks.
5772 @param[in]  trx   trx which vector of AUTOINC locks to modify */
5773 UNIV_INLINE
lock_release_autoinc_last_lock(trx_t * trx)5774 void lock_release_autoinc_last_lock(trx_t *trx) {
5775   ulint last;
5776   lock_t *lock;
5777 
5778   /* We will access trx->lock.autoinc_locks which requires trx->mutex */
5779   ut_ad(trx_mutex_own(trx));
5780   ib_vector_t *autoinc_locks = trx->lock.autoinc_locks;
5781 
5782   /* Since we do not know for which table the trx has created the last lock
5783   we can not narrow the required latch to any particular shard, and thus we
5784   require exclusive access to lock_sys here */
5785   ut_ad(locksys::owns_exclusive_global_latch());
5786   ut_a(!ib_vector_is_empty(autoinc_locks));
5787 
5788   /* The lock to be release must be the last lock acquired. */
5789   last = ib_vector_size(autoinc_locks) - 1;
5790   lock = *static_cast<lock_t **>(ib_vector_get(autoinc_locks, last));
5791 
5792   /* Should have only AUTOINC locks in the vector. */
5793   ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
5794   ut_a(lock_get_type(lock) == LOCK_TABLE);
5795 
5796   ut_a(lock->tab_lock.table != nullptr);
5797 
5798   /* This will remove the lock from the trx autoinc_locks too. */
5799   lock_table_dequeue(lock);
5800 
5801   /* Remove from the table vector too. */
5802   lock_trx_table_locks_remove(lock);
5803 }
5804 
5805 /** Check if a transaction holds any autoinc locks.
5806  @return true if the transaction holds any AUTOINC locks. */
lock_trx_holds_autoinc_locks(const trx_t * trx)5807 static bool lock_trx_holds_autoinc_locks(
5808     const trx_t *trx) /*!< in: transaction */
5809 {
5810   /* We will access trx->lock.autoinc_locks which requires trx->mutex */
5811   ut_ad(trx_mutex_own(trx));
5812   ut_a(trx->lock.autoinc_locks != nullptr);
5813 
5814   return (!ib_vector_is_empty(trx->lock.autoinc_locks));
5815 }
5816 
5817 /** Release all the transaction's autoinc locks. */
lock_release_autoinc_locks(trx_t * trx)5818 static void lock_release_autoinc_locks(trx_t *trx) /*!< in/out: transaction */
5819 {
5820   /* Since we do not know for which table(s) the trx has created the lock(s)
5821   we can not narrow the required latch to any particular shard, and thus we
5822   require exclusive access to lock_sys here */
5823   ut_ad(locksys::owns_exclusive_global_latch());
5824   ut_ad(trx_mutex_own(trx));
5825 
5826   ut_a(trx->lock.autoinc_locks != nullptr);
5827 
5828   /* We release the locks in the reverse order. This is to
5829   avoid searching the vector for the element to delete at
5830   the lower level. See (lock_table_remove_low()) for details. */
5831   while (!ib_vector_is_empty(trx->lock.autoinc_locks)) {
5832     /* lock_table_remove_low() will also remove the lock from
5833     the transaction's autoinc_locks vector. */
5834     lock_release_autoinc_last_lock(trx);
5835   }
5836 
5837   /* Should release all locks. */
5838   ut_a(ib_vector_is_empty(trx->lock.autoinc_locks));
5839 }
5840 
5841 /** Gets the type of a lock. Non-inline version for using outside of the
5842  lock module.
5843  @return LOCK_TABLE or LOCK_REC */
lock_get_type(const lock_t * lock)5844 uint32_t lock_get_type(const lock_t *lock) /*!< in: lock */
5845 {
5846   return (lock_get_type_low(lock));
5847 }
5848 
lock_get_trx_immutable_id(const lock_t * lock)5849 uint64_t lock_get_trx_immutable_id(const lock_t *lock) {
5850   return (trx_immutable_id(lock->trx));
5851 }
5852 
lock_get_trx_id(const lock_t * lock)5853 trx_id_t lock_get_trx_id(const lock_t *lock) {
5854   return (trx_get_id_for_print(lock->trx));
5855 }
5856 
lock_get_immutable_id(const lock_t * lock)5857 uint64_t lock_get_immutable_id(const lock_t *lock) {
5858   return (uint64_t{reinterpret_cast<uintptr_t>(lock)});
5859 }
5860 
5861 /** Get the performance schema event (thread_id, event_id)
5862 that created the lock.
5863 @param[in]	lock		Lock
5864 @param[out]	thread_id	Thread ID that created the lock
5865 @param[out]	event_id	Event ID that created the lock
5866 */
lock_get_psi_event(const lock_t * lock,ulonglong * thread_id,ulonglong * event_id)5867 void lock_get_psi_event(const lock_t *lock, ulonglong *thread_id,
5868                         ulonglong *event_id) {
5869 #if defined(HAVE_PSI_THREAD_INTERFACE) && defined(HAVE_PSI_DATA_LOCK_INTERFACE)
5870   *thread_id = lock->m_psi_internal_thread_id;
5871   *event_id = lock->m_psi_event_id;
5872 #else
5873   *thread_id = 0;
5874   *event_id = 0;
5875 #endif
5876 }
5877 
5878 /** Get the first lock of a trx lock list.
5879 @param[in]	trx_lock	the trx lock
5880 @return The first lock
5881 */
lock_get_first_trx_locks(const trx_lock_t * trx_lock)5882 const lock_t *lock_get_first_trx_locks(const trx_lock_t *trx_lock) {
5883   /* Writes to trx->lock.trx_locks are protected by trx->mutex combined with a
5884   shared global lock_sys latch, and we assume we have the exclusive latch on
5885   lock_sys here */
5886   ut_ad(locksys::owns_exclusive_global_latch());
5887   const lock_t *result = UT_LIST_GET_FIRST(trx_lock->trx_locks);
5888   return (result);
5889 }
5890 
5891 /** Get the next lock of a trx lock list.
5892 @param[in]	lock	the current lock
5893 @return The next lock
5894 */
lock_get_next_trx_locks(const lock_t * lock)5895 const lock_t *lock_get_next_trx_locks(const lock_t *lock) {
5896   /* Writes to trx->lock.trx_locks are protected by trx->mutex combined with a
5897   shared global lock_sys latch, and we assume we have the exclusive latch on
5898   lock_sys here */
5899   ut_ad(locksys::owns_exclusive_global_latch());
5900   const lock_t *result = UT_LIST_GET_NEXT(trx_locks, lock);
5901   return (result);
5902 }
5903 
5904 /** Gets the mode of a lock in a human readable string.
5905  The string should not be free()'d or modified.
5906  This functions is a bit complex for following reasons:
5907   - the way it is used in performance schema requires that the memory pointed
5908     by the return value is accessible for a long time
5909   - the caller never frees the memory
5910   - so, we need to maintain a pool of these strings or use string literals
5911   - there are many possible combinations of flags and thus it is impractical
5912     to maintain the list of all possible literals and if/else logic
5913   - moreover, sometimes performance_schema.data_locks is used precisely to
5914     investigate some unexpected situation, thus limiting output of this function
5915     only to expected combinations of flags might be misleading
5916  @return lock mode */
lock_get_mode_str(const lock_t * lock)5917 const char *lock_get_mode_str(const lock_t *lock) /*!< in: lock */
5918 {
5919   /* We use exclusive global lock_sys latch to protect the global
5920   lock_cached_lock_mode_names mapping. */
5921   ut_ad(locksys::owns_exclusive_global_latch());
5922 
5923   const auto type_mode = lock->type_mode;
5924   const auto mode = lock->mode();
5925   const auto type = lock->type();
5926   /* type_mode is type + mode + flags actually.
5927     We are interested in flags here.
5928     And we are not interested in LOCK_WAIT. */
5929   const auto flags = (type_mode & (~(uint)LOCK_WAIT)) - mode - type;
5930 
5931   /* Search for a cached string */
5932   const auto key = flags | mode;
5933   const auto found = lock_cached_lock_mode_names.find(key);
5934   if (found != lock_cached_lock_mode_names.end()) {
5935     return (found->second);
5936   }
5937   /* A new, unseen yet, mode of lock. We need to create new string. */
5938   ut::ostringstream name_stream;
5939   /* lock_mode_string can be used to describe mode, however the LOCK_ prefix in
5940   return mode name makes the string a bit too verbose for our purpose, as
5941   performance_schema.data_locks LOCK_MODE is a varchar(32), so we strip the
5942   prefix */
5943   const char *mode_string = lock_mode_string(mode);
5944   const char *LOCK_PREFIX = "LOCK_";
5945   if (!strncmp(mode_string, LOCK_PREFIX, strlen(LOCK_PREFIX))) {
5946     mode_string = mode_string + strlen(LOCK_PREFIX);
5947   }
5948   name_stream << mode_string;
5949   /* We concatenate constants in ascending order. */
5950   uint recognized_flags = 0;
5951   for (const auto &lock_constant : lock_constant_names) {
5952     const auto value = lock_constant.first;
5953     /* Constants have to be single bit only for this algorithm to work */
5954     ut_ad((value & (value - 1)) == 0);
5955     if (flags & value) {
5956       recognized_flags += value;
5957       name_stream << ',' << lock_constant.second;
5958     }
5959   }
5960   if (flags != recognized_flags) {
5961     return "UNKNOWN";
5962   }
5963   auto name_string = name_stream.str();
5964   char *name_buffer = (char *)ut_malloc_nokey(name_string.length() + 1);
5965   strcpy(name_buffer, name_string.c_str());
5966   lock_cached_lock_mode_names[key] = name_buffer;
5967   return (name_buffer);
5968 }
5969 
5970 /** Gets the type of a lock in a human readable string.
5971  The string should not be free()'d or modified.
5972  @return lock type */
lock_get_type_str(const lock_t * lock)5973 const char *lock_get_type_str(const lock_t *lock) /*!< in: lock */
5974 {
5975   switch (lock_get_type_low(lock)) {
5976     case LOCK_REC:
5977       return ("RECORD");
5978     case LOCK_TABLE:
5979       return ("TABLE");
5980     default:
5981       return ("UNKNOWN");
5982   }
5983 }
5984 
5985 /** Gets the table on which the lock is.
5986  @return table */
5987 UNIV_INLINE
lock_get_table(const lock_t * lock)5988 dict_table_t *lock_get_table(const lock_t *lock) /*!< in: lock */
5989 {
5990   switch (lock_get_type_low(lock)) {
5991     case LOCK_REC:
5992       ut_ad(lock->index->is_clustered() ||
5993             !dict_index_is_online_ddl(lock->index));
5994       return (lock->index->table);
5995     case LOCK_TABLE:
5996       return (lock->tab_lock.table);
5997     default:
5998       ut_error;
5999   }
6000 }
6001 
6002 /** Gets the id of the table on which the lock is.
6003  @return id of the table */
lock_get_table_id(const lock_t * lock)6004 table_id_t lock_get_table_id(const lock_t *lock) /*!< in: lock */
6005 {
6006   dict_table_t *table;
6007 
6008   table = lock_get_table(lock);
6009 
6010   return (table->id);
6011 }
6012 
6013 /** Determine which table a lock is associated with.
6014 @param[in]	lock	the lock
6015 @return name of the table */
lock_get_table_name(const lock_t * lock)6016 const table_name_t &lock_get_table_name(const lock_t *lock) {
6017   return (lock_get_table(lock)->name);
6018 }
6019 
6020 /** For a record lock, gets the index on which the lock is.
6021  @return index */
lock_rec_get_index(const lock_t * lock)6022 const dict_index_t *lock_rec_get_index(const lock_t *lock) /*!< in: lock */
6023 {
6024   ut_a(lock_get_type_low(lock) == LOCK_REC);
6025   ut_ad(lock->index->is_clustered() || !dict_index_is_online_ddl(lock->index));
6026 
6027   return (lock->index);
6028 }
6029 
6030 /** For a record lock, gets the name of the index on which the lock is.
6031  The string should not be free()'d or modified.
6032  @return name of the index */
lock_rec_get_index_name(const lock_t * lock)6033 const char *lock_rec_get_index_name(const lock_t *lock) /*!< in: lock */
6034 {
6035   ut_a(lock_get_type_low(lock) == LOCK_REC);
6036   ut_ad(lock->index->is_clustered() || !dict_index_is_online_ddl(lock->index));
6037 
6038   return (lock->index->name);
6039 }
6040 
lock_rec_get_page_id(const lock_t * lock)6041 page_id_t lock_rec_get_page_id(const lock_t *lock) {
6042   ut_a(lock_get_type_low(lock) == LOCK_REC);
6043   return lock->rec_lock.page_id;
6044 }
6045 
6046 /** Cancels a waiting lock request and releases possible other transactions
6047 waiting behind it.
6048 @param[in,out]	lock		Waiting lock request */
lock_cancel_waiting_and_release(lock_t * lock)6049 void lock_cancel_waiting_and_release(lock_t *lock) {
6050   /* Requiring exclusive global latch serves several purposes here.
6051 
6052   1. In case of table LOCK_TABLE we will call lock_release_autoinc_locks(),
6053   which iterates over locks held by this transaction and it is not clear if
6054   these locks are from the same table. Frankly it is not clear why we even
6055   release all of them here (note that none of them is our `lock` because we
6056   don't store waiting locks in the trx->autoinc_locks vector, only granted).
6057   Perhaps this is because this trx is going to be rolled back anyway, and this
6058   seemed to be a good moment to release them?
6059 
6060   2. During lock_rec_dequeue_from_page() and lock_table_dequeue() we might latch
6061   trx mutex of another transaction to grant it a lock. The rules meant to avoid
6062   deadlocks between trx mutex require us to either use an exclusive global
6063   latch, or to first latch trx which is has trx->lock.wait_lock == nullptr.
6064   As `lock == lock->trx->lock.wait_lock` and thus is not nullptr, we have to use
6065   the first approach, or complicate the proof of deadlock avoidance enormously.
6066   */
6067   ut_ad(locksys::owns_exclusive_global_latch());
6068   /* We will access lock->trx->lock.autoinc_locks which requires trx->mutex */
6069   ut_ad(trx_mutex_own(lock->trx));
6070 
6071   if (lock_get_type_low(lock) == LOCK_REC) {
6072     lock_rec_dequeue_from_page(lock);
6073   } else {
6074     ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
6075 
6076     if (lock->trx->lock.autoinc_locks != nullptr) {
6077       lock_release_autoinc_locks(lock->trx);
6078     }
6079 
6080     lock_table_dequeue(lock);
6081   }
6082 
6083   lock_reset_wait_and_release_thread_if_suspended(lock);
6084 }
6085 
6086 /** Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
6087  function should be called at the the end of an SQL statement, by the
6088  connection thread that owns the transaction (trx->mysql_thd). */
lock_unlock_table_autoinc(trx_t * trx)6089 void lock_unlock_table_autoinc(trx_t *trx) /*!< in/out: transaction */
6090 {
6091   ut_ad(!locksys::owns_exclusive_global_latch());
6092   ut_ad(!trx_mutex_own(trx));
6093 
6094   /* This can be invoked on NOT_STARTED, ACTIVE, PREPARED,
6095   but not COMMITTED transactions. */
6096 
6097   ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED) ||
6098         trx_state_eq(trx, TRX_STATE_FORCED_ROLLBACK) ||
6099         !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
6100 
6101   /* The trx->lock.autoinc_locks are protected by trx->mutex and in principle
6102   can be modified by other threads:
6103     1. When the other thread calls lock_grant on trx->lock.wait_lock.
6104       (This is impossible here, because we've verified !trx->lock.wait_lock)
6105     2. During recovery lock_remove_recoverd_trx_record_locks ->
6106        lock_table_remove_low -> lock_table_remove_autoinc_lock ->
6107        lock_table_pop_autoinc_lock.
6108        (But AFAIK recovery is a single-threaded process)
6109     3. During DROP TABLE lock_remove_all_on_table_for_trx ->
6110       lock_table_remove_low ...
6111       (I'm unsure if this is possible to happen in parallel to our trx)
6112   Please note, that from this list only lock_grant tries to add something
6113   to the trx->lock.autoinc_locks (namely the granted AUTOINC lock), and the
6114   others try to remove something. This means that we can treat the result of
6115   lock_trx_holds_autoinc_locks(trx) as a heuristic. If it returns true,
6116   then it might or (with small probability) might not hold locks, so we better
6117   call lock_release_autoinc_locks with proper latching.
6118   If it returns false, then it is guaranteed that the vector will remain empty.
6119   If we like risk, we could even call lock_trx_holds_autoinc_locks without
6120   trx->mutex protection, but:
6121     1. why risk? It is not obvious how thread-safe our vector implementation is
6122     2. trx->mutex is cheap
6123   */
6124   trx_mutex_enter(trx);
6125   ut_ad(!trx->lock.wait_lock);
6126   bool might_have_autoinc_locks = lock_trx_holds_autoinc_locks(trx);
6127   trx_mutex_exit(trx);
6128 
6129   if (might_have_autoinc_locks) {
6130     /* lock_release_autoinc_locks() requires exclusive global latch as the
6131     AUTOINC locks might be on tables from different shards. Identifying and
6132     latching them in correct order would complicate this rarely-taken path. */
6133     locksys::Global_exclusive_latch_guard guard{};
6134     trx_mutex_enter(trx);
6135     lock_release_autoinc_locks(trx);
6136     trx_mutex_exit(trx);
6137   }
6138 }
6139 
6140 /** Releases a transaction's locks, and releases possible other transactions
6141  waiting because of these locks. Change the state of the transaction to
6142  TRX_STATE_COMMITTED_IN_MEMORY. */
lock_trx_release_locks(trx_t * trx)6143 void lock_trx_release_locks(trx_t *trx) /*!< in/out: transaction */
6144 {
6145   DEBUG_SYNC_C("before_lock_trx_release_locks");
6146 
6147   trx_mutex_enter(trx);
6148 
6149   check_trx_state(trx);
6150   ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
6151 
6152   if (trx_is_referenced(trx)) {
6153     while (trx_is_referenced(trx)) {
6154       trx_mutex_exit(trx);
6155 
6156       DEBUG_SYNC_C("waiting_trx_is_not_referenced");
6157 
6158       /** Doing an implicit to explicit conversion
6159       should not be expensive. */
6160       ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
6161 
6162       trx_mutex_enter(trx);
6163     }
6164   }
6165 
6166   ut_ad(!trx_is_referenced(trx));
6167 
6168   /* If the background thread trx_rollback_or_clean_recovered()
6169   is still active then there is a chance that the rollback
6170   thread may see this trx as COMMITTED_IN_MEMORY and goes ahead
6171   to clean it up calling trx_cleanup_at_db_startup(). This can
6172   happen in the case we are committing a trx here that is left
6173   in PREPARED state during the crash. Note that commit of the
6174   rollback of a PREPARED trx happens in the recovery thread
6175   while the rollback of other transactions happen in the
6176   background thread. To avoid this race we unconditionally unset
6177   the is_recovered flag. */
6178 
6179   trx->is_recovered = false;
6180 
6181   trx_mutex_exit(trx);
6182 
6183   lock_release(trx);
6184 
6185   /* We don't remove the locks one by one from the vector for
6186   efficiency reasons. We simply reset it because we would have
6187   released all the locks anyway.
6188   At this point there should be no one else interested in our trx's
6189   locks as we've released and removed all of them, and the trx is no longer
6190   referenced so nobody will attempt implicit to explicit conversion neither.
6191   Please note that we are either the thread which runs the transaction, or we
6192   are the thread of a high priority transaction which decided to kill trx, in
6193   which case it had to first make sure that it is no longer running in InnoDB.
6194   So the race between lock_table() accessing table_locks, and our clear() should
6195   not happen.
6196   All that being said, it does not cost us anything in terms of performance to
6197   protect these operations with trx->mutex, which makes some class of errors
6198   impossible even if the above reasoning was wrong. */
6199   trx_mutex_enter(trx);
6200   trx->lock.table_locks.clear();
6201   trx->lock.n_rec_locks.store(0);
6202 
6203   ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
6204   ut_a(ib_vector_is_empty(trx->lock.autoinc_locks));
6205   ut_a(trx->lock.table_locks.empty());
6206 
6207   mem_heap_empty(trx->lock.lock_heap);
6208   trx_mutex_exit(trx);
6209 }
6210 
6211 /** Check whether the transaction has already been rolled back because it
6212  was selected as a deadlock victim, or if it has to wait then cancel
6213  the wait lock.
6214  @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
lock_trx_handle_wait(trx_t * trx)6215 dberr_t lock_trx_handle_wait(trx_t *trx) /*!< in/out: trx lock state */
6216 {
6217   dberr_t err;
6218 
6219   /* lock_cancel_waiting_and_release() requires exclusive global latch, and so
6220   does reading the trx->lock.wait_lock to prevent races with B-tree page
6221   reorganization */
6222   locksys::Global_exclusive_latch_guard guard{};
6223 
6224   trx_mutex_enter(trx);
6225 
6226   if (trx->lock.was_chosen_as_deadlock_victim) {
6227     err = DB_DEADLOCK;
6228   } else if (trx->lock.wait_lock != nullptr) {
6229     lock_cancel_waiting_and_release(trx->lock.wait_lock);
6230     err = DB_LOCK_WAIT;
6231   } else {
6232     /* The lock was probably granted before we got here. */
6233     err = DB_SUCCESS;
6234   }
6235 
6236   trx_mutex_exit(trx);
6237 
6238   return (err);
6239 }
6240 
6241 #ifdef UNIV_DEBUG
6242 /** Do an exhaustive check for any locks (table or rec) against the table.
6243  @return lock if found */
lock_table_locks_lookup(const dict_table_t * table,const trx_ut_list_t * trx_list)6244 static const lock_t *lock_table_locks_lookup(
6245     const dict_table_t *table,     /*!< in: check if there are
6246                                    any locks held on records in
6247                                    this table or on the table
6248                                    itself */
6249     const trx_ut_list_t *trx_list) /*!< in: trx list to check */
6250 {
6251   const trx_t *trx;
6252 
6253   ut_a(table != nullptr);
6254   /* We are going to iterate over multiple transactions, so even though we know
6255   which table we are looking for we can not narrow required latch to just the
6256   shard which contains the table, because accessing trx->lock.trx_locks would be
6257   unsafe */
6258   ut_ad(locksys::owns_exclusive_global_latch());
6259   ut_ad(trx_sys_mutex_own());
6260 
6261   for (trx = UT_LIST_GET_FIRST(*trx_list); trx != nullptr;
6262        trx = UT_LIST_GET_NEXT(trx_list, trx)) {
6263     const lock_t *lock;
6264 
6265     check_trx_state(trx);
6266 
6267     for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); lock != nullptr;
6268          lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
6269       ut_a(lock->trx == trx);
6270 
6271       if (lock_get_type_low(lock) == LOCK_REC) {
6272         ut_ad(!dict_index_is_online_ddl(lock->index) ||
6273               lock->index->is_clustered());
6274         if (lock->index->table == table) {
6275           return (lock);
6276         }
6277       } else if (lock->tab_lock.table == table) {
6278         return (lock);
6279       }
6280     }
6281   }
6282 
6283   return (nullptr);
6284 }
6285 #endif /* UNIV_DEBUG */
6286 
lock_table_has_locks(const dict_table_t * table)6287 bool lock_table_has_locks(const dict_table_t *table) {
6288   /** The n_rec_locks field might be modified by operation on any page shard,
6289   so we need to latch everything. Note, that the results of this function will
6290   be obsolete, as soon as we release the latch. It is called in contexts where
6291   we believe that the number of locks should either be zero or decreasing. For
6292   such scenario of usage, we might perhaps read the n_rec_locks without latch
6293   and restrict latch just to a table shard. But that would complicate the debug
6294   version of the code for no significant gain as this is not a hot path. */
6295   locksys::Global_exclusive_latch_guard guard{};
6296 
6297   bool has_locks =
6298       UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks.load() > 0;
6299 
6300 #ifdef UNIV_DEBUG
6301   if (!has_locks) {
6302     mutex_enter(&trx_sys->mutex);
6303 
6304     ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
6305 
6306     mutex_exit(&trx_sys->mutex);
6307   }
6308 #endif /* UNIV_DEBUG */
6309 
6310   return (has_locks);
6311 }
6312 
6313 /** Initialise the table lock list. */
lock_table_lock_list_init(table_lock_list_t * lock_list)6314 void lock_table_lock_list_init(
6315     table_lock_list_t *lock_list) /*!< List to initialise */
6316 {
6317   UT_LIST_INIT(*lock_list, &lock_table_t::locks);
6318 }
6319 
6320 /** Initialise the trx lock list. */
lock_trx_lock_list_init(trx_lock_list_t * lock_list)6321 void lock_trx_lock_list_init(
6322     trx_lock_list_t *lock_list) /*!< List to initialise */
6323 {
6324   UT_LIST_INIT(*lock_list, &lock_t::trx_locks);
6325 }
6326 
6327 /** Set the lock system timeout event. */
lock_set_timeout_event()6328 void lock_set_timeout_event() { os_event_set(lock_sys->timeout_event); }
6329 
6330 #ifdef UNIV_DEBUG
6331 
lock_trx_has_rec_x_lock(que_thr_t * thr,const dict_table_t * table,const buf_block_t * block,ulint heap_no)6332 bool lock_trx_has_rec_x_lock(que_thr_t *thr, const dict_table_t *table,
6333                              const buf_block_t *block, ulint heap_no) {
6334   ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
6335 
6336   const trx_t *trx = thr_get_trx(thr);
6337   locksys::Shard_latch_guard guard{block->get_page_id()};
6338   ut_a(lock_table_has(trx, table, LOCK_IX) || table->is_temporary());
6339   ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no, trx) ||
6340        table->is_temporary());
6341   return (true);
6342 }
6343 #endif /* UNIV_DEBUG */
6344 
6345 /** rewind(3) the file used for storing the latest detected deadlock and
6346 print a heading message to stderr if printing of all deadlocks to stderr
6347 is enabled. */
start_print()6348 void Deadlock_notifier::start_print() {
6349   /* I/O operations on lock_latest_err_file require exclusive latch on
6350   lock_sys */
6351   ut_ad(locksys::owns_exclusive_global_latch());
6352 
6353   rewind(lock_latest_err_file);
6354   ut_print_timestamp(lock_latest_err_file);
6355 
6356   if (srv_print_all_deadlocks) {
6357     ib::info(ER_IB_MSG_643) << "Transactions deadlock detected, dumping"
6358                             << " detailed information.";
6359   }
6360 }
6361 
6362 /** Print a message to the deadlock file and possibly to stderr.
6363 @param msg message to print */
print(const char * msg)6364 void Deadlock_notifier::print(const char *msg) {
6365   /* I/O operations on lock_latest_err_file require exclusive latch on
6366   lock_sys */
6367   ut_ad(locksys::owns_exclusive_global_latch());
6368   fputs(msg, lock_latest_err_file);
6369 
6370   if (srv_print_all_deadlocks) {
6371     ib::info(ER_IB_MSG_644) << msg;
6372   }
6373 }
6374 
6375 /** Print transaction data to the deadlock file and possibly to stderr.
6376 @param trx transaction
6377 @param max_query_len max query length to print */
print(const trx_t * trx,ulint max_query_len)6378 void Deadlock_notifier::print(const trx_t *trx, ulint max_query_len) {
6379   /* We need exclusive latch on lock_sys because:
6380     1. I/O operations on lock_latest_err_file
6381     2. lock_number_of_rows_locked()
6382     3. Accessing trx->lock fields requires either holding trx->mutex or latching
6383     the lock sys. */
6384   ut_ad(locksys::owns_exclusive_global_latch());
6385 
6386   trx_mutex_enter(trx);
6387   ulint n_rec_locks = lock_number_of_rows_locked(&trx->lock);
6388   ulint n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
6389   ulint heap_size = mem_heap_get_size(trx->lock.lock_heap);
6390   trx_mutex_exit(trx);
6391 
6392   mutex_enter(&trx_sys->mutex);
6393 
6394   trx_print_low(lock_latest_err_file, trx, max_query_len, n_rec_locks,
6395                 n_trx_locks, heap_size);
6396 
6397   if (srv_print_all_deadlocks) {
6398     trx_print_low(stderr, trx, max_query_len, n_rec_locks, n_trx_locks,
6399                   heap_size);
6400   }
6401 
6402   mutex_exit(&trx_sys->mutex);
6403 }
6404 
6405 /** Print lock data to the deadlock file and possibly to stderr.
6406 @param lock record or table type lock */
print(const lock_t * lock)6407 void Deadlock_notifier::print(const lock_t *lock) {
6408   /* I/O operations on lock_latest_err_file require exclusive latch on
6409   lock_sys. */
6410   ut_ad(locksys::owns_exclusive_global_latch());
6411 
6412   if (lock_get_type_low(lock) == LOCK_REC) {
6413     lock_rec_print(lock_latest_err_file, lock);
6414 
6415     if (srv_print_all_deadlocks) {
6416       lock_rec_print(stderr, lock);
6417     }
6418   } else {
6419     lock_table_print(lock_latest_err_file, lock);
6420 
6421     if (srv_print_all_deadlocks) {
6422       lock_table_print(stderr, lock);
6423     }
6424   }
6425 }
6426 
print_title(size_t pos_on_cycle,const char * title)6427 void Deadlock_notifier::print_title(size_t pos_on_cycle, const char *title) {
6428   /* I/O operations on lock_latest_err_file require exclusive latch on
6429   lock_sys */
6430   ut_ad(locksys::owns_exclusive_global_latch());
6431   ut::ostringstream buff;
6432   buff << "\n*** (" << (pos_on_cycle + 1) << ") " << title << ":\n";
6433   print(buff.str().c_str());
6434 }
6435 
notify(const ut::vector<const trx_t * > & trxs_on_cycle,const trx_t * victim_trx)6436 void Deadlock_notifier::notify(const ut::vector<const trx_t *> &trxs_on_cycle,
6437                                const trx_t *victim_trx) {
6438   ut_ad(locksys::owns_exclusive_global_latch());
6439 
6440   start_print();
6441   const auto n = trxs_on_cycle.size();
6442   for (size_t i = 0; i < n; ++i) {
6443     const trx_t *trx = trxs_on_cycle[i];
6444     const trx_t *blocked_trx = trxs_on_cycle[0 < i ? i - 1 : n - 1];
6445     const lock_t *blocking_lock =
6446         lock_has_to_wait_in_queue(blocked_trx->lock.wait_lock, trx);
6447     ut_a(blocking_lock);
6448 
6449     print_title(i, "TRANSACTION");
6450     print(trx, 3000);
6451 
6452     print_title(i, "HOLDS THE LOCK(S)");
6453     print(blocking_lock);
6454 
6455     print_title(i, "WAITING FOR THIS LOCK TO BE GRANTED");
6456     print(trx->lock.wait_lock);
6457   }
6458   const auto victim_it =
6459       std::find(trxs_on_cycle.begin(), trxs_on_cycle.end(), victim_trx);
6460   ut_ad(victim_it != trxs_on_cycle.end());
6461   const auto victim_pos = std::distance(trxs_on_cycle.begin(), victim_it);
6462   ut::ostringstream buff;
6463   buff << "*** WE ROLL BACK TRANSACTION (" << (victim_pos + 1) << ")\n";
6464   print(buff.str().c_str());
6465   DBUG_PRINT("ib_lock", ("deadlock detected"));
6466 
6467 #ifdef UNIV_DEBUG
6468   /* We perform this check only after information is output, to give a
6469   developer as much information as we can for debugging the problem */
6470   for (const trx_t *trx : trxs_on_cycle) {
6471     ut_ad(is_allowed_to_be_on_cycle(trx->lock.wait_lock));
6472   }
6473 #endif /* UNIV_DEBUG */
6474 
6475   lock_deadlock_found = true;
6476 }
6477 
6478 #ifdef UNIV_DEBUG
6479 
is_allowed_to_be_on_cycle(const lock_t * lock)6480 bool Deadlock_notifier::is_allowed_to_be_on_cycle(const lock_t *lock) {
6481   /* The original purpose of this validation is to check record locks from
6482   DD & SDI tables only, because we think a deadlock for these locks should be
6483   prevented by MDL and proper updating order, but later, some exemptions were
6484   introduced (for more context see comment to this function).
6485   In particular, we don't check table locks here, since there never was any
6486   guarantee saying a deadlock is impossible for table locks. */
6487   if (!lock->is_record_lock()) {
6488     return (true);
6489   }
6490   /* The only places where we don't expect deadlocks are in handling DD
6491   tables, and since WL#9538 also in code handling SDI tables.
6492   Therefore the second condition is that we only pay attention to DD and SDI
6493   tables. */
6494   const bool is_dd_or_sdi = (lock->index->table->is_dd_table ||
6495                              dict_table_is_sdi(lock->index->table->id));
6496   if (!is_dd_or_sdi) {
6497     return (true);
6498   }
6499 
6500   /* If we are still here, the lock is a record lock on some DD or SDI table.
6501   There are some such tables though, for which a deadlock is somewhat expected,
6502   for various reasons specific to these particular tables.
6503   So, we have a list of exceptions here:
6504 
6505   innodb_table_stats and innodb_index_stats
6506       These two tables are visible to the end user, so can take part in
6507       quite arbitrary queries and transactions, so deadlock is possible.
6508       Therefore we need to allow such deadlocks, as otherwise a user
6509       could crash a debug build of a server by issuing a specific sequence of
6510       queries. DB_DEADLOCK error in dict0stats is either handled (see for
6511       example dict_stats_rename_table), or ignored silently (for example in
6512       dict_stats_process_entry_from_recalc_pool), but I am not aware of any
6513       situation in which DB_DEADLOCK could cause a serious problem.
6514       Most such queries are performed via dict_stats_exec_sql() which logs an
6515       ERROR in case of a DB_DEADLOCK, and also returns error code to the caller,
6516       so both the end user and a developer should be aware of a problem in case
6517       they want to do something about it.
6518 
6519   table_stats and index_stats
6520       These two tables take part in queries which are issued by background
6521       threads, and the code which performs these queries can handle failures
6522       such as deadlocks, because they were expected at design phase. */
6523 
6524   const char *name = lock->index->table->name.m_name;
6525   return (!strcmp(name, "mysql/innodb_table_stats") ||
6526           !strcmp(name, "mysql/innodb_index_stats") ||
6527           !strcmp(name, "mysql/table_stats") ||
6528           !strcmp(name, "mysql/index_stats"));
6529 }
6530 #endif /* UNIV_DEBUG */
6531 
6532 /**
6533 Allocate cached locks for the transaction.
6534 @param trx		allocate cached record locks for this transaction */
lock_trx_alloc_locks(trx_t * trx)6535 void lock_trx_alloc_locks(trx_t *trx) {
6536   /* We will create trx->lock.table_pool and rec_pool which are protected by
6537   trx->mutex. In theory nobody else should use the trx object while it is being
6538   constructed, but how can we (the lock-sys) "know" about it and why risk? */
6539   trx_mutex_enter(trx);
6540   ulint sz = REC_LOCK_SIZE * REC_LOCK_CACHE;
6541   byte *ptr = reinterpret_cast<byte *>(ut_malloc_nokey(sz));
6542 
6543   /* We allocate one big chunk and then distribute it among
6544   the rest of the elements. The allocated chunk pointer is always
6545   at index 0. */
6546 
6547   for (ulint i = 0; i < REC_LOCK_CACHE; ++i, ptr += REC_LOCK_SIZE) {
6548     trx->lock.rec_pool.push_back(reinterpret_cast<ib_lock_t *>(ptr));
6549   }
6550 
6551   sz = TABLE_LOCK_SIZE * TABLE_LOCK_CACHE;
6552   ptr = reinterpret_cast<byte *>(ut_malloc_nokey(sz));
6553 
6554   for (ulint i = 0; i < TABLE_LOCK_CACHE; ++i, ptr += TABLE_LOCK_SIZE) {
6555     trx->lock.table_pool.push_back(reinterpret_cast<ib_lock_t *>(ptr));
6556   }
6557   trx_mutex_exit(trx);
6558 }
6559 
lock_notify_about_deadlock(const ut::vector<const trx_t * > & trxs_on_cycle,const trx_t * victim_trx)6560 void lock_notify_about_deadlock(const ut::vector<const trx_t *> &trxs_on_cycle,
6561                                 const trx_t *victim_trx) {
6562   Deadlock_notifier::notify(trxs_on_cycle, victim_trx);
6563 }
6564