1 /*****************************************************************************
2
3 Copyright (c) 1996, 2020, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24
25 *****************************************************************************/
26
27 /** @file lock/lock0lock.cc
28 The transaction lock system
29
30 Created 5/7/1996 Heikki Tuuri
31 *******************************************************/
32
33 #define LOCK_MODULE_IMPLEMENTATION
34
35 #include <mysql/service_thd_engine_lock.h>
36 #include <sys/types.h>
37
38 #include <algorithm>
39 #include <set>
40 #include <unordered_map>
41 #include <unordered_set>
42 #include <vector>
43
44 #include "btr0btr.h"
45 #include "current_thd.h"
46 #include "debug_sync.h" /* CONDITIONAL_SYNC_POINT */
47 #include "dict0boot.h"
48 #include "dict0mem.h"
49 #include "ha_prototypes.h"
50 #include "lock0lock.h"
51 #include "lock0priv.h"
52 #include "pars0pars.h"
53 #include "row0mysql.h"
54 #include "row0sel.h"
55 #include "srv0mon.h"
56 #include "trx0purge.h"
57 #include "trx0sys.h"
58 #include "usr0sess.h"
59 #include "ut0new.h"
60 #include "ut0vec.h"
61
62 #include "my_dbug.h"
63 #include "my_psi_config.h"
64 #include "mysql/plugin.h"
65 #include "mysql/psi/psi_thread.h"
66
67 /* Flag to enable/disable deadlock detector. */
68 bool innobase_deadlock_detect = true;
69
70 /** Total number of cached record locks */
71 static const ulint REC_LOCK_CACHE = 8;
72
73 /** Maximum record lock size in bytes */
74 static const ulint REC_LOCK_SIZE = sizeof(ib_lock_t) + 256;
75
76 /** Total number of cached table locks */
77 static const ulint TABLE_LOCK_CACHE = 8;
78
79 /** Size in bytes, of the table lock instance */
80 static const ulint TABLE_LOCK_SIZE = sizeof(ib_lock_t);
81
82 template <typename T>
83 using Locks = std::vector<T, mem_heap_allocator<T>>;
84
85 /** Used by lock_get_mode_str to build a lock mode description */
86 static const std::map<uint, const char *> lock_constant_names{
87 {LOCK_GAP, "GAP"},
88 {LOCK_REC_NOT_GAP, "REC_NOT_GAP"},
89 {LOCK_INSERT_INTENTION, "INSERT_INTENTION"},
90 {LOCK_PREDICATE, "PREDICATE"},
91 {LOCK_PRDT_PAGE, "PRDT_PAGE"},
92 };
93 /** Used by lock_get_mode_str to cache results. Strings pointed by these
94 pointers might be in use by performance schema and thus can not be freed
95 until the very end.
96 Protected by exclusive global lock_sys latch.
97 */
98 static std::unordered_map<uint, const char *> lock_cached_lock_mode_names;
99
100 /** A static class for reporting notifications about deadlocks */
101 class Deadlock_notifier {
102 public:
103 Deadlock_notifier() = delete;
104
105 /** Handles writing the information about found deadlock to the log files
106 and caches it for future lock_latest_err_file() calls (for example used by
107 SHOW ENGINE INNODB STATUS)
108 @param[in] trxs_on_cycle trxs causing deadlock, i-th waits for i+1-th
109 @param[in] victim_trx the trx from trx_on_cycle which will be rolled back
110 */
111 static void notify(const ut::vector<const trx_t *> &trxs_on_cycle,
112 const trx_t *victim_trx);
113
114 private:
115 #ifdef UNIV_DEBUG
116 /** Determines if a situation in which the lock takes part in a deadlock
117 cycle is expected (as in: handled correctly) or not (say because it is on a DD
118 table, for which there is no reason to expect a deadlock and we don't handle
119 deadlocks correctly). The purpose of the function is to use it in an assertion
120 failing as soon as the deadlock is identified, to give developer a chance to
121 investigate the root cause of the situation (without such assertion, the code
122 might continue to run and either fail at later stage when the data useful for
123 debugging is no longer on stack, or not fail at all, which is risky).
124 @param[in] lock lock found in a deadlock cycle
125 @return true if we expect that this lock can take part in a deadlock cycle */
126 static bool is_allowed_to_be_on_cycle(const lock_t *lock);
127 #endif /* UNIV_DEBUG */
128
129 /** Print transaction data to the deadlock file and possibly to stderr.
130 @param trx transaction
131 @param max_query_len max query length to print */
132 static void print(const trx_t *trx, ulint max_query_len);
133
134 /** rewind(3) the file used for storing the latest detected deadlock
135 and print a heading message to stderr if printing of all deadlocks to
136 stderr is enabled. */
137 static void start_print();
138
139 /** Print lock data to the deadlock file and possibly to stderr.
140 @param lock record or table type lock */
141 static void print(const lock_t *lock);
142
143 /** Print a message to the deadlock file and possibly to stderr.
144 @param msg message to print */
145 static void print(const char *msg);
146
147 /** Prints a numbered section title to the deadlock file and possibly to
148 stderr. Numbers do not have to be unique, as they are used to identify
149 transactions on the cycle, and there are multiple sections per transaction.
150 @param[in] pos_on_cycle The zero-based position of trx on deadlock cycle
151 @param[in] title The title of the section */
152 static void print_title(size_t pos_on_cycle, const char *title);
153 };
154
155 #ifdef UNIV_DEBUG
156 namespace locksys {
157
owns_exclusive_global_latch()158 bool owns_exclusive_global_latch() {
159 return lock_sys->latches.owns_exclusive_global_latch();
160 }
161
owns_shared_global_latch()162 bool owns_shared_global_latch() {
163 return lock_sys->latches.owns_shared_global_latch();
164 }
165
owns_page_shard(const page_id_t & page_id)166 bool owns_page_shard(const page_id_t &page_id) {
167 return lock_sys->latches.owns_page_shard(page_id);
168 }
169
owns_table_shard(const dict_table_t & table)170 bool owns_table_shard(const dict_table_t &table) {
171 return lock_sys->latches.owns_table_shard(table);
172 }
173
owns_lock_shard(const lock_t * lock)174 bool owns_lock_shard(const lock_t *lock) {
175 if (lock->is_record_lock()) {
176 return lock_sys->latches.owns_page_shard(lock->rec_lock.page_id);
177 } else {
178 return lock_sys->latches.owns_table_shard(*lock->tab_lock.table);
179 }
180 }
181 } // namespace locksys
182
183 /** Validates the record lock queues on a page.
184 @return true if ok */
185 static bool lock_rec_validate_page(
186 const buf_block_t *block) /*!< in: buffer block */
187 MY_ATTRIBUTE((warn_unused_result));
188 #endif /* UNIV_DEBUG */
189
190 /* The lock system */
191 lock_sys_t *lock_sys = nullptr;
192
193 /** We store info on the latest deadlock error to this buffer. InnoDB
194 Monitor will then fetch it and print */
195 static bool lock_deadlock_found = false;
196
197 /** Only created if !srv_read_only_mode. I/O operations on this file require
198 exclusive lock_sys latch */
199 static FILE *lock_latest_err_file;
200
201 /** Reports that a transaction id is insensible, i.e., in the future. */
lock_report_trx_id_insanity(trx_id_t trx_id,const rec_t * rec,const dict_index_t * index,const ulint * offsets,trx_id_t max_trx_id)202 void lock_report_trx_id_insanity(
203 trx_id_t trx_id, /*!< in: trx id */
204 const rec_t *rec, /*!< in: user record */
205 const dict_index_t *index, /*!< in: index */
206 const ulint *offsets, /*!< in: rec_get_offsets(rec, index) */
207 trx_id_t max_trx_id) /*!< in: trx_sys_get_max_trx_id() */
208 {
209 ib::error(ER_IB_MSG_634) << "Transaction id " << trx_id
210 << " associated with record"
211 << rec_offsets_print(rec, offsets) << " in index "
212 << index->name << " of table " << index->table->name
213 << " is greater than the global counter "
214 << max_trx_id << "! The table is corrupted.";
215 }
216
217 /** Checks that a transaction id is sensible, i.e., not in the future.
218 @return true if ok */
219 #ifdef UNIV_DEBUG
220
221 #else
222 static MY_ATTRIBUTE((warn_unused_result))
223 #endif
lock_check_trx_id_sanity(trx_id_t trx_id,const rec_t * rec,const dict_index_t * index,const ulint * offsets)224 bool lock_check_trx_id_sanity(
225 trx_id_t trx_id, /*!< in: trx id */
226 const rec_t *rec, /*!< in: user record */
227 const dict_index_t *index, /*!< in: index */
228 const ulint *offsets) /*!< in: rec_get_offsets(rec, index) */
229 {
230 ut_ad(rec_offs_validate(rec, index, offsets));
231
232 trx_id_t max_trx_id = trx_sys_get_max_trx_id();
233 bool is_ok = trx_id < max_trx_id;
234
235 if (!is_ok) {
236 lock_report_trx_id_insanity(trx_id, rec, index, offsets, max_trx_id);
237 }
238
239 return (is_ok);
240 }
241
242 /** Checks that a record is seen in a consistent read.
243 @return true if sees, or false if an earlier version of the record
244 should be retrieved */
lock_clust_rec_cons_read_sees(const rec_t * rec,dict_index_t * index,const ulint * offsets,ReadView * view)245 bool lock_clust_rec_cons_read_sees(
246 const rec_t *rec, /*!< in: user record which should be read or
247 passed over by a read cursor */
248 dict_index_t *index, /*!< in: clustered index */
249 const ulint *offsets, /*!< in: rec_get_offsets(rec, index) */
250 ReadView *view) /*!< in: consistent read view */
251 {
252 ut_ad(index->is_clustered());
253 ut_ad(page_rec_is_user_rec(rec));
254 ut_ad(rec_offs_validate(rec, index, offsets));
255
256 /* Temp-tables are not shared across connections and multiple
257 transactions from different connections cannot simultaneously
258 operate on same temp-table and so read of temp-table is
259 always consistent read. */
260 if (srv_read_only_mode || index->table->is_temporary()) {
261 ut_ad(view == nullptr || index->table->is_temporary());
262 return (true);
263 }
264
265 /* NOTE that we call this function while holding the search
266 system latch. */
267
268 trx_id_t trx_id = row_get_rec_trx_id(rec, index, offsets);
269
270 return (view->changes_visible(trx_id, index->table->name));
271 }
272
273 /** Checks that a non-clustered index record is seen in a consistent read.
274
275 NOTE that a non-clustered index page contains so little information on
276 its modifications that also in the case false, the present version of
277 rec may be the right, but we must check this from the clustered index
278 record.
279
280 @return true if certainly sees, or false if an earlier version of the
281 clustered index record might be needed */
lock_sec_rec_cons_read_sees(const rec_t * rec,const dict_index_t * index,const ReadView * view)282 bool lock_sec_rec_cons_read_sees(
283 const rec_t *rec, /*!< in: user record which
284 should be read or passed over
285 by a read cursor */
286 const dict_index_t *index, /*!< in: index */
287 const ReadView *view) /*!< in: consistent read view */
288 {
289 ut_ad(page_rec_is_user_rec(rec));
290
291 /* NOTE that we might call this function while holding the search
292 system latch. */
293
294 if (recv_recovery_is_on()) {
295 return (false);
296
297 } else if (index->table->is_temporary()) {
298 /* Temp-tables are not shared across connections and multiple
299 transactions from different connections cannot simultaneously
300 operate on same temp-table and so read of temp-table is
301 always consistent read. */
302
303 return (true);
304 }
305
306 trx_id_t max_trx_id = page_get_max_trx_id(page_align(rec));
307
308 ut_ad(max_trx_id > 0);
309
310 return (view->sees(max_trx_id));
311 }
312
313 /** Creates the lock system at database start. */
lock_sys_create(ulint n_cells)314 void lock_sys_create(
315 ulint n_cells) /*!< in: number of slots in lock hash table */
316 {
317 ulint lock_sys_sz;
318
319 lock_sys_sz = sizeof(*lock_sys) + srv_max_n_threads * sizeof(srv_slot_t);
320
321 lock_sys = static_cast<lock_sys_t *>(ut_zalloc_nokey(lock_sys_sz));
322
323 new (lock_sys) lock_sys_t{};
324
325 void *ptr = &lock_sys[1];
326
327 lock_sys->waiting_threads = static_cast<srv_slot_t *>(ptr);
328
329 lock_sys->last_slot = lock_sys->waiting_threads;
330
331 mutex_create(LATCH_ID_LOCK_SYS_WAIT, &lock_sys->wait_mutex);
332
333 lock_sys->timeout_event = os_event_create();
334
335 lock_sys->rec_hash = hash_create(n_cells);
336 lock_sys->prdt_hash = hash_create(n_cells);
337 lock_sys->prdt_page_hash = hash_create(n_cells);
338
339 if (!srv_read_only_mode) {
340 lock_latest_err_file = os_file_create_tmpfile(nullptr);
341 ut_a(lock_latest_err_file);
342 }
343 }
344
345 /** Calculates the fold value of a lock: used in migrating the hash table.
346 @param[in] lock record lock object
347 @return folded value */
lock_rec_lock_fold(const lock_t * lock)348 static ulint lock_rec_lock_fold(const lock_t *lock) {
349 return (lock_rec_fold(lock->rec_lock.page_id));
350 }
351
352 /** Resize the lock hash tables.
353 @param[in] n_cells number of slots in lock hash table */
lock_sys_resize(ulint n_cells)354 void lock_sys_resize(ulint n_cells) {
355 hash_table_t *old_hash;
356
357 /* We will rearrange locks between buckets and change the parameters of hash
358 function used in sharding of latches, so we have to prevent everyone from
359 accessing lock sys queues, or even computing shard id. */
360 locksys::Global_exclusive_latch_guard guard{};
361
362 old_hash = lock_sys->rec_hash;
363 lock_sys->rec_hash = hash_create(n_cells);
364 HASH_MIGRATE(old_hash, lock_sys->rec_hash, lock_t, hash, lock_rec_lock_fold);
365 hash_table_free(old_hash);
366
367 DBUG_EXECUTE_IF("syncpoint_after_lock_sys_resize_rec_hash", {
368 /* A workaround for buf_resize_thread() not using create_thd().
369 TBD: if buf_resize_thread() were to use create_thd() then should it be
370 instrumented (together or instead of os_thread_create instrumentation)? */
371 ut_ad(current_thd == nullptr);
372 THD *thd = create_thd(false, true, true, PSI_NOT_INSTRUMENTED);
373 ut_ad(current_thd == thd);
374 CONDITIONAL_SYNC_POINT("after_lock_sys_resize_rec_hash");
375 destroy_thd(thd);
376 ut_ad(current_thd == nullptr);
377 });
378
379 old_hash = lock_sys->prdt_hash;
380 lock_sys->prdt_hash = hash_create(n_cells);
381 HASH_MIGRATE(old_hash, lock_sys->prdt_hash, lock_t, hash, lock_rec_lock_fold);
382 hash_table_free(old_hash);
383
384 old_hash = lock_sys->prdt_page_hash;
385 lock_sys->prdt_page_hash = hash_create(n_cells);
386 HASH_MIGRATE(old_hash, lock_sys->prdt_page_hash, lock_t, hash,
387 lock_rec_lock_fold);
388 hash_table_free(old_hash);
389
390 /* need to update block->lock_hash_val */
391 for (ulint i = 0; i < srv_buf_pool_instances; ++i) {
392 buf_pool_t *buf_pool = buf_pool_from_array(i);
393
394 mutex_enter(&buf_pool->LRU_list_mutex);
395 buf_page_t *bpage;
396 bpage = UT_LIST_GET_FIRST(buf_pool->LRU);
397
398 while (bpage != nullptr) {
399 if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
400 buf_block_t *block;
401 block = reinterpret_cast<buf_block_t *>(bpage);
402
403 block->lock_hash_val = lock_rec_hash(bpage->id);
404 }
405 bpage = UT_LIST_GET_NEXT(LRU, bpage);
406 }
407 mutex_exit(&buf_pool->LRU_list_mutex);
408 }
409 }
410
411 /** Closes the lock system at database shutdown. */
lock_sys_close(void)412 void lock_sys_close(void) {
413 if (lock_latest_err_file != nullptr) {
414 fclose(lock_latest_err_file);
415 lock_latest_err_file = nullptr;
416 }
417
418 hash_table_free(lock_sys->rec_hash);
419 hash_table_free(lock_sys->prdt_hash);
420 hash_table_free(lock_sys->prdt_page_hash);
421
422 os_event_destroy(lock_sys->timeout_event);
423
424 mutex_destroy(&lock_sys->wait_mutex);
425
426 srv_slot_t *slot = lock_sys->waiting_threads;
427
428 for (uint32_t i = 0; i < srv_max_n_threads; i++, ++slot) {
429 if (slot->event != nullptr) {
430 os_event_destroy(slot->event);
431 }
432 }
433 for (auto &cached_lock_mode_name : lock_cached_lock_mode_names) {
434 ut_free(const_cast<char *>(cached_lock_mode_name.second));
435 }
436 lock_cached_lock_mode_names.clear();
437
438 lock_sys->~lock_sys_t();
439
440 ut_free(lock_sys);
441
442 lock_sys = nullptr;
443 }
444
445 /** Gets the size of a lock struct.
446 @return size in bytes */
lock_get_size(void)447 ulint lock_get_size(void) { return ((ulint)sizeof(lock_t)); }
448
449 /** Sets the wait flag of a lock and the back pointer in trx to lock.
450 @param[in] lock The lock on which a transaction is waiting */
451 UNIV_INLINE
lock_set_lock_and_trx_wait(lock_t * lock)452 void lock_set_lock_and_trx_wait(lock_t *lock) {
453 auto trx = lock->trx;
454 ut_ad(trx_mutex_own(trx));
455 ut_a(trx->lock.wait_lock == nullptr);
456 ut_ad(locksys::owns_lock_shard(lock));
457
458 trx->lock.wait_lock = lock;
459 trx->lock.wait_lock_type = lock_get_type_low(lock);
460 lock->type_mode |= LOCK_WAIT;
461 }
462
463 /** Gets the gap flag of a record lock.
464 @return LOCK_GAP or 0 */
465 UNIV_INLINE
lock_rec_get_gap(const lock_t * lock)466 ulint lock_rec_get_gap(const lock_t *lock) /*!< in: record lock */
467 {
468 ut_ad(lock_get_type_low(lock) == LOCK_REC);
469
470 return (lock->type_mode & LOCK_GAP);
471 }
472
473 /** Gets the LOCK_REC_NOT_GAP flag of a record lock.
474 @return LOCK_REC_NOT_GAP or 0 */
475 UNIV_INLINE
lock_rec_get_rec_not_gap(const lock_t * lock)476 ulint lock_rec_get_rec_not_gap(const lock_t *lock) /*!< in: record lock */
477 {
478 ut_ad(lock_get_type_low(lock) == LOCK_REC);
479
480 return (lock->type_mode & LOCK_REC_NOT_GAP);
481 }
482
483 /** Gets the waiting insert flag of a record lock.
484 @return LOCK_INSERT_INTENTION or 0 */
485 UNIV_INLINE
lock_rec_get_insert_intention(const lock_t * lock)486 ulint lock_rec_get_insert_intention(const lock_t *lock) /*!< in: record lock */
487 {
488 ut_ad(lock_get_type_low(lock) == LOCK_REC);
489
490 return (lock->type_mode & LOCK_INSERT_INTENTION);
491 }
492
493 /** Checks if a lock request for a new lock has to wait for request lock2.
494 @return true if new lock has to wait for lock2 to be removed */
495 UNIV_INLINE
lock_rec_has_to_wait(const trx_t * trx,ulint type_mode,const lock_t * lock2,bool lock_is_on_supremum)496 bool lock_rec_has_to_wait(
497 const trx_t *trx, /*!< in: trx of new lock */
498 ulint type_mode, /*!< in: precise mode of the new lock
499 to set: LOCK_S or LOCK_X, possibly
500 ORed to LOCK_GAP or LOCK_REC_NOT_GAP,
501 LOCK_INSERT_INTENTION */
502 const lock_t *lock2, /*!< in: another record lock; NOTE that
503 it is assumed that this has a lock bit
504 set on the same record as in the new
505 lock we are setting */
506 bool lock_is_on_supremum)
507 /*!< in: true if we are setting the
508 lock on the 'supremum' record of an
509 index page: we know then that the lock
510 request is really for a 'gap' type lock */
511 {
512 ut_ad(trx && lock2);
513 ut_ad(lock_get_type_low(lock2) == LOCK_REC);
514
515 const bool is_hp = trx_is_high_priority(trx);
516 if (trx != lock2->trx &&
517 !lock_mode_compatible(static_cast<lock_mode>(LOCK_MODE_MASK & type_mode),
518 lock_get_mode(lock2))) {
519 /* If our trx is High Priority and the existing lock is WAITING and not
520 high priority, then we can ignore it. */
521 if (is_hp && lock2->is_waiting() && !trx_is_high_priority(lock2->trx)) {
522 return (false);
523 }
524
525 /* We have somewhat complex rules when gap type record locks
526 cause waits */
527
528 if ((lock_is_on_supremum || (type_mode & LOCK_GAP)) &&
529 !(type_mode & LOCK_INSERT_INTENTION)) {
530 /* Gap type locks without LOCK_INSERT_INTENTION flag
531 do not need to wait for anything. This is because
532 different users can have conflicting lock types
533 on gaps. */
534
535 return (false);
536 }
537
538 if (!(type_mode & LOCK_INSERT_INTENTION) && lock_rec_get_gap(lock2)) {
539 /* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP
540 does not need to wait for a gap type lock */
541
542 return (false);
543 }
544
545 if ((type_mode & LOCK_GAP) && lock_rec_get_rec_not_gap(lock2)) {
546 /* Lock on gap does not need to wait for
547 a LOCK_REC_NOT_GAP type lock */
548
549 return (false);
550 }
551
552 if (lock_rec_get_insert_intention(lock2)) {
553 /* No lock request needs to wait for an insert
554 intention lock to be removed. This is ok since our
555 rules allow conflicting locks on gaps. This eliminates
556 a spurious deadlock caused by a next-key lock waiting
557 for an insert intention lock; when the insert
558 intention lock was granted, the insert deadlocked on
559 the waiting next-key lock.
560
561 Also, insert intention locks do not disturb each
562 other. */
563
564 return (false);
565 }
566
567 return (true);
568 }
569
570 return (false);
571 }
572
573 /** Checks if a lock request lock1 has to wait for request lock2.
574 @return true if lock1 has to wait for lock2 to be removed */
lock_has_to_wait(const lock_t * lock1,const lock_t * lock2)575 bool lock_has_to_wait(const lock_t *lock1, /*!< in: waiting lock */
576 const lock_t *lock2) /*!< in: another lock; NOTE that it
577 is assumed that this has a lock bit
578 set on the same record as in lock1 if
579 the locks are record locks */
580 {
581 if (lock1->trx != lock2->trx &&
582 !lock_mode_compatible(lock_get_mode(lock1), lock_get_mode(lock2))) {
583 if (lock_get_type_low(lock1) == LOCK_REC) {
584 ut_ad(lock_get_type_low(lock2) == LOCK_REC);
585
586 /* If this lock request is for a supremum record
587 then the second bit on the lock bitmap is set */
588
589 if (lock1->type_mode & (LOCK_PREDICATE | LOCK_PRDT_PAGE)) {
590 return (lock_prdt_has_to_wait(lock1->trx, lock1->type_mode,
591 lock_get_prdt_from_lock(lock1), lock2));
592 } else {
593 return (lock_rec_has_to_wait(lock1->trx, lock1->type_mode, lock2,
594 lock1->includes_supremum()));
595 }
596 }
597
598 return (true);
599 }
600
601 return (false);
602 }
603
604 /*============== RECORD LOCK BASIC FUNCTIONS ============================*/
605
606 /** Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED,
607 if none found.
608 @return bit index == heap number of the record, or ULINT_UNDEFINED if
609 none found */
lock_rec_find_set_bit(const lock_t * lock)610 ulint lock_rec_find_set_bit(
611 const lock_t *lock) /*!< in: record lock with at least one bit set */
612 {
613 for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
614 if (lock_rec_get_nth_bit(lock, i)) {
615 return (i);
616 }
617 }
618
619 return (ULINT_UNDEFINED);
620 }
621
622 /** Looks for the next set bit in the record lock bitmap.
623 @param[in] lock record lock with at least one bit set
624 @param[in] heap_no current set bit
625 @return The next bit index == heap number following heap_no, or ULINT_UNDEFINED
626 if none found */
lock_rec_find_next_set_bit(const lock_t * lock,ulint heap_no)627 ulint lock_rec_find_next_set_bit(const lock_t *lock, ulint heap_no) {
628 ut_ad(heap_no != ULINT_UNDEFINED);
629
630 for (ulint i = heap_no + 1; i < lock_rec_get_n_bits(lock); ++i) {
631 if (lock_rec_get_nth_bit(lock, i)) {
632 return (i);
633 }
634 }
635
636 return (ULINT_UNDEFINED);
637 }
638
639 /** Reset the nth bit of a record lock.
640 @param[in,out] lock record lock
641 @param[in] i index of the bit that will be reset
642 @return previous value of the bit */
643 UNIV_INLINE
lock_rec_reset_nth_bit(lock_t * lock,ulint i)644 byte lock_rec_reset_nth_bit(lock_t *lock, ulint i) {
645 ut_ad(lock_get_type_low(lock) == LOCK_REC);
646 ut_ad(i < lock->rec_lock.n_bits);
647
648 byte *b = reinterpret_cast<byte *>(&lock[1]) + (i >> 3);
649 byte mask = 1 << (i & 7);
650 byte bit = *b & mask;
651 *b &= ~mask;
652
653 if (bit != 0) {
654 ut_ad(lock->trx->lock.n_rec_locks.load() > 0);
655 lock->trx->lock.n_rec_locks.fetch_sub(1, std::memory_order_relaxed);
656 }
657
658 return (bit);
659 }
660
661 /** Reset the nth bit of a record lock.
662 @param[in,out] lock record lock
663 @param[in] i index of the bit that will be reset
664 @param[in] type whether the lock is in wait mode */
lock_rec_trx_wait(lock_t * lock,ulint i,ulint type)665 void lock_rec_trx_wait(lock_t *lock, ulint i, ulint type) {
666 lock_rec_reset_nth_bit(lock, i);
667
668 if (type & LOCK_WAIT) {
669 lock_reset_lock_and_trx_wait(lock);
670 }
671 }
672
lock_rec_expl_exist_on_page(const page_id_t & page_id)673 bool lock_rec_expl_exist_on_page(const page_id_t &page_id) {
674 lock_t *lock;
675 locksys::Shard_latch_guard guard{page_id};
676 /* Only used in ibuf pages, so rec_hash is good enough */
677 lock = lock_rec_get_first_on_page_addr(lock_sys->rec_hash, page_id);
678
679 return (lock != nullptr);
680 }
681
682 /** Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock
683 pointer in the transaction! This function is used in lock object creation
684 and resetting. */
lock_rec_bitmap_reset(lock_t * lock)685 static void lock_rec_bitmap_reset(lock_t *lock) /*!< in: record lock */
686 {
687 ulint n_bytes;
688
689 ut_ad(lock_get_type_low(lock) == LOCK_REC);
690
691 /* Reset to zero the bitmap which resides immediately after the lock
692 struct */
693
694 n_bytes = lock_rec_get_n_bits(lock) / 8;
695
696 ut_ad((lock_rec_get_n_bits(lock) % 8) == 0);
697
698 memset(&lock[1], 0, n_bytes);
699 }
700
701 /** Copies a record lock to heap.
702 @return copy of lock */
lock_rec_copy(const lock_t * lock,mem_heap_t * heap)703 static lock_t *lock_rec_copy(const lock_t *lock, /*!< in: record lock */
704 mem_heap_t *heap) /*!< in: memory heap */
705 {
706 ulint size;
707
708 ut_ad(lock_get_type_low(lock) == LOCK_REC);
709
710 size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8;
711
712 return (static_cast<lock_t *>(mem_heap_dup(heap, lock, size)));
713 }
714
715 /** Gets the previous record lock set on a record.
716 @return previous lock on the same record, NULL if none exists */
lock_rec_get_prev(const lock_t * in_lock,ulint heap_no)717 const lock_t *lock_rec_get_prev(
718 const lock_t *in_lock, /*!< in: record lock */
719 ulint heap_no) /*!< in: heap number of the record */
720 {
721 lock_t *lock;
722 lock_t *found_lock = nullptr;
723 hash_table_t *hash;
724
725 ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
726 const auto page_id = in_lock->rec_lock.page_id;
727 ut_ad(locksys::owns_page_shard(page_id));
728
729 hash = lock_hash_get(in_lock->type_mode);
730
731 for (lock = lock_rec_get_first_on_page_addr(hash, page_id);
732 /* No op */; lock = lock_rec_get_next_on_page(lock)) {
733 ut_ad(lock);
734
735 if (lock == in_lock) {
736 return (found_lock);
737 }
738
739 if (lock_rec_get_nth_bit(lock, heap_no)) {
740 found_lock = lock;
741 }
742 }
743 }
744
745 /*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/
746
747 /** Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
748 to precise_mode.
749 @param[in] precise_mode LOCK_S or LOCK_X possibly ORed to LOCK_GAP or
750 LOCK_REC_NOT_GAP, for a supremum record we regard
751 this always a gap type request
752 @param[in] block buffer block containing the record
753 @param[in] heap_no heap number of the record
754 @param[in] trx transaction
755 @return lock or NULL */
756 UNIV_INLINE
lock_rec_has_expl(ulint precise_mode,const buf_block_t * block,ulint heap_no,const trx_t * trx)757 const lock_t *lock_rec_has_expl(ulint precise_mode, const buf_block_t *block,
758 ulint heap_no, const trx_t *trx) {
759 ut_ad(locksys::owns_page_shard(block->get_page_id()));
760 ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S ||
761 (precise_mode & LOCK_MODE_MASK) == LOCK_X);
762 ut_ad(
763 !(precise_mode & ~(ulint)(LOCK_MODE_MASK | LOCK_GAP | LOCK_REC_NOT_GAP)));
764 ut_ad(!(precise_mode & LOCK_INSERT_INTENTION));
765 ut_ad(!(precise_mode & LOCK_PREDICATE));
766 ut_ad(!(precise_mode & LOCK_PRDT_PAGE));
767 const RecID rec_id{block, heap_no};
768 const bool is_on_supremum = rec_id.is_supremum();
769 const bool is_rec_not_gap = 0 != (precise_mode & LOCK_REC_NOT_GAP);
770 const bool is_gap = 0 != (precise_mode & LOCK_GAP);
771 const auto mode = static_cast<lock_mode>(precise_mode & LOCK_MODE_MASK);
772 const auto p_implies_q = [](bool p, bool q) { return q || !p; };
773
774 return (Lock_iter::for_each(rec_id, [&](const lock_t *lock) {
775 return (!(lock->trx == trx && !lock->is_insert_intention() &&
776 lock_mode_stronger_or_eq(lock_get_mode(lock), mode) &&
777 !lock->is_waiting() &&
778 (is_on_supremum ||
779 (p_implies_q(lock->is_record_not_gap(), is_rec_not_gap) &&
780 p_implies_q(lock->is_gap(), is_gap)))));
781 }));
782 }
783
784 #ifdef UNIV_DEBUG
785 /** Checks if some other transaction has a lock request in the queue.
786 @return lock or NULL */
lock_rec_other_has_expl_req(lock_mode mode,const buf_block_t * block,bool wait,ulint heap_no,const trx_t * trx)787 static const lock_t *lock_rec_other_has_expl_req(
788 lock_mode mode, /*!< in: LOCK_S or LOCK_X */
789 const buf_block_t *block, /*!< in: buffer block containing
790 the record */
791 bool wait, /*!< in: whether also waiting locks
792 are taken into account */
793 ulint heap_no, /*!< in: heap number of the record */
794 const trx_t *trx) /*!< in: transaction, or NULL if
795 requests by all transactions
796 are taken into account */
797 {
798 ut_ad(locksys::owns_page_shard(block->get_page_id()));
799 ut_ad(mode == LOCK_X || mode == LOCK_S);
800
801 /* Only GAP lock can be on SUPREMUM, and we are not looking
802 for GAP lock */
803
804 RecID rec_id{block, heap_no};
805
806 if (rec_id.is_supremum()) {
807 return (nullptr);
808 }
809
810 return (Lock_iter::for_each(rec_id, [=](const lock_t *lock) {
811 /* Ignore transactions that are being rolled back. */
812 return (!(lock->trx != trx && !lock->is_gap() &&
813 (wait || !lock->is_waiting()) &&
814 lock_mode_stronger_or_eq(lock->mode(), mode)));
815 }));
816 }
817 #endif /* UNIV_DEBUG */
818
819 /** Checks if some other transaction has a conflicting explicit lock request
820 in the queue, so that we have to wait.
821 @return lock or NULL */
lock_rec_other_has_conflicting(ulint mode,const buf_block_t * block,ulint heap_no,const trx_t * trx)822 static const lock_t *lock_rec_other_has_conflicting(
823 ulint mode, /*!< in: LOCK_S or LOCK_X,
824 possibly ORed to LOCK_GAP or
825 LOC_REC_NOT_GAP,
826 LOCK_INSERT_INTENTION */
827 const buf_block_t *block, /*!< in: buffer block containing
828 the record */
829 ulint heap_no, /*!< in: heap number of the record */
830 const trx_t *trx) /*!< in: our transaction */
831 {
832 ut_ad(locksys::owns_page_shard(block->get_page_id()));
833 ut_ad(!(mode & ~(ulint)(LOCK_MODE_MASK | LOCK_GAP | LOCK_REC_NOT_GAP |
834 LOCK_INSERT_INTENTION)));
835 ut_ad(!(mode & LOCK_PREDICATE));
836 ut_ad(!(mode & LOCK_PRDT_PAGE));
837
838 RecID rec_id{block, heap_no};
839 const bool is_supremum = rec_id.is_supremum();
840
841 return (Lock_iter::for_each(rec_id, [=](const lock_t *lock) {
842 return (!(lock_rec_has_to_wait(trx, mode, lock, is_supremum)));
843 }));
844 }
845
846 /** Checks if some transaction has an implicit x-lock on a record in a secondary
847 index.
848 @param[in] rec user record
849 @param[in] index secondary index
850 @param[in] offsets rec_get_offsets(rec, index)
851 @return transaction id of the transaction which has the x-lock, or 0;
852 NOTE that this function can return false positives but never false
853 negatives. The caller must confirm all positive results by checking if the trx
854 is still active. */
lock_sec_rec_some_has_impl(const rec_t * rec,dict_index_t * index,const ulint * offsets)855 static trx_t *lock_sec_rec_some_has_impl(const rec_t *rec, dict_index_t *index,
856 const ulint *offsets) {
857 trx_t *trx;
858 trx_id_t max_trx_id;
859 const page_t *page = page_align(rec);
860
861 ut_ad(!locksys::owns_exclusive_global_latch());
862 ut_ad(!trx_sys_mutex_own());
863 ut_ad(!index->is_clustered());
864 ut_ad(page_rec_is_user_rec(rec));
865 ut_ad(rec_offs_validate(rec, index, offsets));
866
867 max_trx_id = page_get_max_trx_id(page);
868
869 /* Some transaction may have an implicit x-lock on the record only
870 if the max trx id for the page >= min trx id for the trx list, or
871 database recovery is running. We do not write the changes of a page
872 max trx id to the log, and therefore during recovery, this value
873 for a page may be incorrect. */
874
875 if (max_trx_id < trx_rw_min_trx_id() && !recv_recovery_is_on()) {
876 trx = nullptr;
877
878 } else if (!lock_check_trx_id_sanity(max_trx_id, rec, index, offsets)) {
879 /* The page is corrupt: try to avoid a crash by returning 0 */
880 trx = nullptr;
881
882 /* In this case it is possible that some transaction has an implicit
883 x-lock. We have to look in the clustered index. */
884
885 } else {
886 trx = row_vers_impl_x_locked(rec, index, offsets);
887 }
888
889 return (trx);
890 }
891
892 #ifdef UNIV_DEBUG
893 /** Checks if some transaction, other than given trx_id, has an explicit
894 lock on the given rec, in the given precise_mode.
895 @param[in] precise_mode LOCK_S or LOCK_X possibly ORed to LOCK_GAP or
896 LOCK_REC_NOT_GAP.
897 @param[in] trx the trx holding implicit lock on rec
898 @param[in] rec user record
899 @param[in] block buffer block containing the record
900 @return true iff there's a transaction, whose id is not equal to trx_id,
901 that has an explicit lock on the given rec, in the given
902 precise_mode. */
lock_rec_other_trx_holds_expl(ulint precise_mode,const trx_t * trx,const rec_t * rec,const buf_block_t * block)903 static bool lock_rec_other_trx_holds_expl(ulint precise_mode, const trx_t *trx,
904 const rec_t *rec,
905 const buf_block_t *block) {
906 bool holds = false;
907
908 /* We will inspect locks from various shards when inspecting transactions. */
909 locksys::Global_exclusive_latch_guard guard{};
910 /* If trx_rw_is_active returns non-null impl_trx it only means that impl_trx
911 was active at some moment during the call, but might already be in
912 TRX_STATE_COMMITTED_IN_MEMORY when we execute the body of the if.
913 However, we hold exclusive latch on whole lock_sys, which prevents anyone
914 from creating any new explicit locks.
915 So, all explicit locks we will see must have been created at the time when
916 the transaction was not committed yet. */
917 if (trx_t *impl_trx = trx_rw_is_active(trx->id, nullptr, false)) {
918 ulint heap_no = page_rec_get_heap_no(rec);
919 mutex_enter(&trx_sys->mutex);
920
921 for (const trx_t *t = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); t != nullptr;
922 t = UT_LIST_GET_NEXT(trx_list, t)) {
923 const lock_t *expl_lock =
924 lock_rec_has_expl(precise_mode, block, heap_no, t);
925
926 if (expl_lock && expl_lock->trx != impl_trx) {
927 /* An explicit lock is held by trx other than
928 the trx holding the implicit lock. */
929 holds = true;
930 break;
931 }
932 }
933
934 mutex_exit(&trx_sys->mutex);
935 }
936
937 return (holds);
938 }
939 #endif /* UNIV_DEBUG */
940
lock_number_of_rows_locked(const trx_lock_t * trx_lock)941 ulint lock_number_of_rows_locked(const trx_lock_t *trx_lock) {
942 /* We need exclusive lock_sys access, because trx_lock->n_rec_locks is
943 modified while holding sharded lock only, so we need to disable all writers
944 for this number to be meaningful */
945 ut_ad(locksys::owns_exclusive_global_latch());
946
947 return (trx_lock->n_rec_locks);
948 }
949
lock_number_of_tables_locked(const trx_t * trx)950 ulint lock_number_of_tables_locked(const trx_t *trx) {
951 ut_ad(trx_mutex_own(trx));
952
953 return (trx->lock.table_locks.size());
954 }
955
956 /*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/
957
958 /**
959 Do some checks and prepare for creating a new record lock */
prepare() const960 void RecLock::prepare() const {
961 ut_ad(locksys::owns_page_shard(m_rec_id.get_page_id()));
962 ut_ad(m_trx == thr_get_trx(m_thr));
963
964 /* Test if there already is some other reason to suspend thread:
965 we do not enqueue a lock request if the query thread should be
966 stopped anyway */
967
968 if (que_thr_stop(m_thr)) {
969 ut_error;
970 }
971
972 switch (trx_get_dict_operation(m_trx)) {
973 case TRX_DICT_OP_NONE:
974 break;
975 case TRX_DICT_OP_TABLE:
976 case TRX_DICT_OP_INDEX:
977 ib::error(ER_IB_MSG_635)
978 << "A record lock wait happens in a dictionary"
979 " operation. index "
980 << m_index->name << " of table " << m_index->table->name << ". "
981 << BUG_REPORT_MSG;
982 ut_ad(0);
983 }
984
985 ut_ad(m_index->table->n_ref_count > 0 || !m_index->table->can_be_evicted);
986 }
987
988 /**
989 Create the lock instance
990 @param[in, out] trx The transaction requesting the lock
991 @param[in, out] index Index on which record lock is required
992 @param[in] mode The lock mode desired
993 @param[in] rec_id The record id
994 @param[in] size Size of the lock + bitmap requested
995 @return a record lock instance */
lock_alloc(trx_t * trx,dict_index_t * index,ulint mode,const RecID & rec_id,ulint size)996 lock_t *RecLock::lock_alloc(trx_t *trx, dict_index_t *index, ulint mode,
997 const RecID &rec_id, ulint size) {
998 ut_ad(locksys::owns_page_shard(rec_id.get_page_id()));
999 /* We are about to modify structures in trx->lock which needs trx->mutex */
1000 ut_ad(trx_mutex_own(trx));
1001
1002 lock_t *lock;
1003
1004 if (trx->lock.rec_cached >= trx->lock.rec_pool.size() ||
1005 sizeof(*lock) + size > REC_LOCK_SIZE) {
1006 ulint n_bytes = size + sizeof(*lock);
1007 mem_heap_t *heap = trx->lock.lock_heap;
1008
1009 lock = reinterpret_cast<lock_t *>(mem_heap_alloc(heap, n_bytes));
1010 } else {
1011 lock = trx->lock.rec_pool[trx->lock.rec_cached];
1012 ++trx->lock.rec_cached;
1013 }
1014
1015 lock->trx = trx;
1016
1017 lock->index = index;
1018
1019 /* Note the creation timestamp */
1020 ut_d(lock->m_seq = lock_sys->m_seq.fetch_add(1));
1021
1022 /* Setup the lock attributes */
1023
1024 lock->type_mode = LOCK_REC | (mode & ~LOCK_TYPE_MASK);
1025
1026 lock_rec_t &rec_lock = lock->rec_lock;
1027
1028 /* Predicate lock always on INFIMUM (0) */
1029
1030 if (is_predicate_lock(mode)) {
1031 rec_lock.n_bits = 8;
1032
1033 memset(&lock[1], 0x0, 1);
1034
1035 } else {
1036 ut_ad(8 * size < UINT32_MAX);
1037 rec_lock.n_bits = static_cast<uint32_t>(8 * size);
1038
1039 memset(&lock[1], 0x0, size);
1040 }
1041
1042 rec_lock.page_id = rec_id.get_page_id();
1043
1044 /* Set the bit corresponding to rec */
1045
1046 lock_rec_set_nth_bit(lock, rec_id.m_heap_no);
1047
1048 MONITOR_INC(MONITOR_NUM_RECLOCK);
1049
1050 MONITOR_INC(MONITOR_RECLOCK_CREATED);
1051
1052 return (lock);
1053 }
1054
1055 /** Insert lock record to the tail of the queue where the WAITING locks reside.
1056 @param[in,out] lock_hash Hash table containing the locks
1057 @param[in,out] lock Record lock instance to insert
1058 @param[in] rec_id Record being locked */
lock_rec_insert_to_waiting(hash_table_t * lock_hash,lock_t * lock,const RecID & rec_id)1059 static void lock_rec_insert_to_waiting(hash_table_t *lock_hash, lock_t *lock,
1060 const RecID &rec_id) {
1061 ut_ad(lock->is_waiting());
1062 ut_ad(rec_id.matches(lock));
1063 ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1064 ut_ad(locksys::owns_page_shard(rec_id.get_page_id()));
1065
1066 const ulint fold = rec_id.fold();
1067 HASH_INSERT(lock_t, hash, lock_hash, fold, lock);
1068 }
1069
1070 /** Insert lock record to the head of the queue where the GRANTED locks reside.
1071 @param[in,out] lock_hash Hash table containing the locks
1072 @param[in,out] lock Record lock instance to insert
1073 @param[in] rec_id Record being locked */
lock_rec_insert_to_granted(hash_table_t * lock_hash,lock_t * lock,const RecID & rec_id)1074 static void lock_rec_insert_to_granted(hash_table_t *lock_hash, lock_t *lock,
1075 const RecID &rec_id) {
1076 ut_ad(rec_id.matches(lock));
1077 ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1078 ut_ad(locksys::owns_page_shard(rec_id.get_page_id()));
1079 ut_ad(!lock->is_waiting());
1080
1081 /* Move the target lock to the head of the list. */
1082 auto cell =
1083 hash_get_nth_cell(lock_hash, hash_calc_hash(rec_id.fold(), lock_hash));
1084
1085 ut_ad(lock != cell->node);
1086
1087 auto next = reinterpret_cast<lock_t *>(cell->node);
1088
1089 cell->node = lock;
1090 lock->hash = next;
1091 }
1092 namespace locksys {
1093 /**
1094 Adds the lock to the list of trx's locks.
1095 Requires lock->trx to be already set.
1096 Bumps the trx_lock_version.
1097 @param[in,out] lock The lock that we want to add to lock->trx->lock.trx_locks
1098 */
add_to_trx_locks(lock_t * lock)1099 static void add_to_trx_locks(lock_t *lock) {
1100 ut_ad(lock->trx != nullptr);
1101 ut_ad(trx_mutex_own(lock->trx));
1102 UT_LIST_ADD_LAST(lock->trx->lock.trx_locks, lock);
1103 lock->trx->lock.trx_locks_version++;
1104 }
1105
1106 /**
1107 Removes the lock from the list of trx's locks.
1108 Bumps the trx_lock_version.
1109 @param[in,out] lock The lock that we want to remove from
1110 lock->trx->lock.trx_locks
1111 */
remove_from_trx_locks(lock_t * lock)1112 static void remove_from_trx_locks(lock_t *lock) {
1113 ut_ad(lock->trx != nullptr);
1114 ut_ad(trx_mutex_own(lock->trx));
1115 UT_LIST_REMOVE(lock->trx->lock.trx_locks, lock);
1116 lock->trx->lock.trx_locks_version++;
1117 }
1118 } // namespace locksys
1119
lock_add(lock_t * lock)1120 void RecLock::lock_add(lock_t *lock) {
1121 ut_ad((lock->type_mode | LOCK_REC) == (m_mode | LOCK_REC));
1122 ut_ad(m_rec_id.matches(lock));
1123 ut_ad(locksys::owns_page_shard(m_rec_id.get_page_id()));
1124 ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1125 ut_ad(trx_mutex_own(lock->trx));
1126
1127 bool wait = m_mode & LOCK_WAIT;
1128
1129 hash_table_t *lock_hash = lock_hash_get(m_mode);
1130
1131 lock->index->table->n_rec_locks.fetch_add(1, std::memory_order_relaxed);
1132
1133 if (!wait) {
1134 lock_rec_insert_to_granted(lock_hash, lock, m_rec_id);
1135 } else {
1136 lock_rec_insert_to_waiting(lock_hash, lock, m_rec_id);
1137 }
1138
1139 #ifdef HAVE_PSI_THREAD_INTERFACE
1140 #ifdef HAVE_PSI_DATA_LOCK_INTERFACE
1141 /* The performance schema THREAD_ID and EVENT_ID are used only
1142 when DATA_LOCKS are exposed. */
1143 PSI_THREAD_CALL(get_current_thread_event_id)
1144 (&lock->m_psi_internal_thread_id, &lock->m_psi_event_id);
1145 #endif /* HAVE_PSI_DATA_LOCK_INTERFACE */
1146 #endif /* HAVE_PSI_THREAD_INTERFACE */
1147
1148 locksys::add_to_trx_locks(lock);
1149
1150 if (wait) {
1151 lock_set_lock_and_trx_wait(lock);
1152 }
1153 }
1154
1155 /** Create a new lock.
1156 @param[in,out] trx Transaction requesting the lock
1157 @param[in] prdt Predicate lock (optional)
1158 @return a new lock instance */
create(trx_t * trx,const lock_prdt_t * prdt)1159 lock_t *RecLock::create(trx_t *trx, const lock_prdt_t *prdt) {
1160 ut_ad(locksys::owns_page_shard(m_rec_id.get_page_id()));
1161
1162 /* Ensure that another transaction doesn't access the trx
1163 lock state and lock data structures while we are adding the
1164 lock and changing the transaction state to LOCK_WAIT.
1165 In particular it protects the lock_alloc which uses trx's private pool of
1166 lock structures.
1167 It might be the case that we already hold trx->mutex because we got here from:
1168 - lock_rec_convert_impl_to_expl_for_trx
1169 - add_to_waitq
1170 */
1171 ut_ad(trx_mutex_own(trx));
1172
1173 /* Create the explicit lock instance and initialise it. */
1174
1175 lock_t *lock = lock_alloc(trx, m_index, m_mode, m_rec_id, m_size);
1176
1177 #ifdef UNIV_DEBUG
1178 /* GAP lock shouldn't be taken on DD tables with some exceptions */
1179 if (m_index->table->is_dd_table &&
1180 strstr(m_index->table->name.m_name,
1181 "mysql/st_spatial_reference_systems") == nullptr &&
1182 strstr(m_index->table->name.m_name, "mysql/innodb_table_stats") ==
1183 nullptr &&
1184 strstr(m_index->table->name.m_name, "mysql/innodb_index_stats") ==
1185 nullptr &&
1186 strstr(m_index->table->name.m_name, "mysql/table_stats") == nullptr &&
1187 strstr(m_index->table->name.m_name, "mysql/index_stats") == nullptr) {
1188 ut_ad(lock_rec_get_rec_not_gap(lock));
1189 }
1190 #endif /* UNIV_DEBUG */
1191
1192 if (prdt != nullptr && (m_mode & LOCK_PREDICATE)) {
1193 lock_prdt_set_prdt(lock, prdt);
1194 }
1195
1196 lock_add(lock);
1197
1198 return (lock);
1199 }
1200
1201 /**
1202 Collect the transactions that will need to be rolled back asynchronously
1203 @param[in, out] hit_list The list of transactions to be rolled back, to which
1204 the trx should be appended.
1205 @param[in] hp_trx_id The id of the blocked High Priority Transaction
1206 @param[in, out] trx The blocking transaction to be rolled back */
lock_mark_trx_for_rollback(hit_list_t & hit_list,trx_id_t hp_trx_id,trx_t * trx)1207 static void lock_mark_trx_for_rollback(hit_list_t &hit_list, trx_id_t hp_trx_id,
1208 trx_t *trx) {
1209 trx->abort = true;
1210
1211 ut_ad(!trx->read_only);
1212 ut_ad(trx_mutex_own(trx));
1213 ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK));
1214 ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC));
1215 ut_ad(!(trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE));
1216
1217 /* Note that we will attempt an async rollback. The _ASYNC
1218 flag will be cleared if the transaction is rolled back
1219 synchronously before we get a chance to do it. */
1220
1221 trx->in_innodb |= TRX_FORCE_ROLLBACK | TRX_FORCE_ROLLBACK_ASYNC;
1222
1223 bool cas;
1224 os_thread_id_t thread_id = os_thread_get_curr_id();
1225
1226 cas = os_compare_and_swap_thread_id(&trx->killed_by, 0, thread_id);
1227
1228 ut_a(cas);
1229
1230 hit_list.push_back(hit_list_t::value_type(trx));
1231
1232 #ifdef UNIV_DEBUG
1233 THD *thd = trx->mysql_thd;
1234
1235 if (thd != nullptr) {
1236 char buffer[1024];
1237 ib::info(ER_IB_MSG_636)
1238 << "Blocking transaction: ID: " << trx->id << " - "
1239 << " Blocked transaction ID: " << hp_trx_id << " - "
1240 << thd_security_context(thd, buffer, sizeof(buffer), 512);
1241 }
1242 #endif /* UNIV_DEBUG */
1243 }
1244
1245 /** Creates a new edge in wait-for graph, from waiter to blocker
1246 @param[in] waiter The transaction that has to wait for blocker
1247 @param[in] blocker The transaction which causes waiter to wait */
lock_create_wait_for_edge(trx_t * waiter,trx_t * blocker)1248 static void lock_create_wait_for_edge(trx_t *waiter, trx_t *blocker) {
1249 ut_ad(trx_mutex_own(waiter));
1250 ut_ad(waiter->lock.wait_lock != nullptr);
1251 ut_ad(locksys::owns_lock_shard(waiter->lock.wait_lock));
1252 ut_ad(waiter->lock.blocking_trx.load() == nullptr);
1253 /* We don't call lock_wait_request_check_for_cycles() here as it
1254 would be slightly premature: the trx is not yet inserted into a slot of
1255 lock_sys->waiting_threads at this point, and thus it would be invisible to
1256 the thread which analyzes these slots. What we do instead is to let the
1257 lock_wait_table_reserve_slot() function be responsible for calling
1258 lock_wait_request_check_for_cycles() once it insert the trx to a
1259 slot.*/
1260 waiter->lock.blocking_trx.store(blocker);
1261 }
1262
1263 /**
1264 Setup the requesting transaction state for lock grant
1265 @param[in,out] lock Lock for which to change state */
set_wait_state(lock_t * lock)1266 void RecLock::set_wait_state(lock_t *lock) {
1267 ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1268 ut_ad(m_trx == lock->trx);
1269 ut_ad(trx_mutex_own(m_trx));
1270 ut_ad(lock_get_wait(lock));
1271
1272 m_trx->lock.wait_started = ut_time();
1273
1274 m_trx->lock.que_state = TRX_QUE_LOCK_WAIT;
1275
1276 m_trx->lock.was_chosen_as_deadlock_victim = false;
1277
1278 bool stopped = que_thr_stop(m_thr);
1279 ut_a(stopped);
1280 }
1281
add_to_waitq(const lock_t * wait_for,const lock_prdt_t * prdt)1282 dberr_t RecLock::add_to_waitq(const lock_t *wait_for, const lock_prdt_t *prdt) {
1283 ut_ad(locksys::owns_page_shard(m_rec_id.get_page_id()));
1284 ut_ad(m_trx == thr_get_trx(m_thr));
1285
1286 /* It is not that the body of this function requires trx->mutex, but some of
1287 the functions it calls require it and it so happens that we always posses it
1288 so it makes reasoning about code easier if we simply assert this fact. */
1289 ut_ad(trx_mutex_own(m_trx));
1290
1291 DEBUG_SYNC_C("rec_lock_add_to_waitq");
1292
1293 if (m_trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC) {
1294 return (DB_DEADLOCK);
1295 }
1296
1297 m_mode |= LOCK_WAIT;
1298
1299 /* Do the preliminary checks, and set query thread state */
1300
1301 prepare();
1302
1303 /* Don't queue the lock to hash table, if high priority transaction. */
1304 lock_t *lock = create(m_trx, prdt);
1305
1306 lock_create_wait_for_edge(m_trx, wait_for->trx);
1307
1308 ut_ad(lock_get_wait(lock));
1309
1310 set_wait_state(lock);
1311
1312 MONITOR_INC(MONITOR_LOCKREC_WAIT);
1313
1314 /* m_trx->mysql_thd is NULL if it's an internal trx. So current_thd
1315 is used */
1316
1317 thd_report_row_lock_wait(current_thd, wait_for->trx->mysql_thd);
1318
1319 return (DB_LOCK_WAIT);
1320 }
1321 /** Moves a granted lock to the front of the queue for a given record by
1322 removing it adding it to the front. As a single lock can correspond to multiple
1323 rows (and thus: queues) this function moves it to the front of whole bucket.
1324 @param [in] lock a granted lock to be moved
1325 @param [in] rec_id record id which specifies particular queue and bucket */
lock_rec_move_granted_to_front(lock_t * lock,const RecID & rec_id)1326 static void lock_rec_move_granted_to_front(lock_t *lock, const RecID &rec_id) {
1327 ut_ad(!lock->is_waiting());
1328 ut_ad(rec_id.matches(lock));
1329 ut_ad(locksys::owns_page_shard(rec_id.get_page_id()));
1330 ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1331
1332 const auto hash_table = lock->hash_table();
1333 HASH_DELETE(lock_t, hash, hash_table, rec_id.fold(), lock);
1334 lock_rec_insert_to_granted(hash_table, lock, rec_id);
1335 }
1336
1337 /** Looks for a suitable type record lock struct by the same trx on the same
1338 page. This can be used to save space when a new record lock should be set on a
1339 page: no new struct is needed, if a suitable old is found.
1340 @param[in] type_mode lock type_mode field
1341 @param[in] heap_no heap number of the record we plan to use.
1342 The lock struct we search for needs to
1343 have a bitmap at least as large.
1344 @param[in] lock lock_rec_get_first_on_page()
1345 @param[in] trx transaction
1346 @param[out] found_waiter_before_lock true iff there is a waiting lock before
1347 the returned lock
1348 @return lock or nullptr if there is no lock we could reuse*/
1349 UNIV_INLINE
lock_rec_find_similar_on_page(uint32_t type_mode,size_t heap_no,lock_t * lock,const trx_t * trx,bool & found_waiter_before_lock)1350 lock_t *lock_rec_find_similar_on_page(uint32_t type_mode, size_t heap_no,
1351 lock_t *lock, const trx_t *trx,
1352 bool &found_waiter_before_lock) {
1353 ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1354 found_waiter_before_lock = false;
1355 for (/* No op */; lock != nullptr; lock = lock_rec_get_next_on_page(lock)) {
1356 if (lock->trx == trx && lock->type_mode == type_mode &&
1357 heap_no < lock_rec_get_n_bits(lock)) {
1358 return (lock);
1359 }
1360 if (lock->is_waiting()) {
1361 found_waiter_before_lock = true;
1362 }
1363 }
1364 found_waiter_before_lock = false;
1365 return (nullptr);
1366 }
1367
1368 /** Adds a record lock request in the record queue. The request is normally
1369 added as the last in the queue, but if the request to be added is not a waiting
1370 request, we can reuse a suitable record lock object already existing on the
1371 same page, just setting the appropriate bit in its bitmap. This is a low-level
1372 function which does NOT check for deadlocks or lock compatibility!
1373 @param[in] type_mode lock mode, wait, gap etc. flags; type is
1374 ignored and replaced by LOCK_REC
1375 @param[in] block buffer block containing the record
1376 @param[in] heap_no heap number of the record
1377 @param[in] index index of record
1378 @param[in,out] trx transaction
1379 @param[in] we_own_trx_mutex true iff the caller own trx->mutex (optional).
1380 Defaults to false. */
lock_rec_add_to_queue(ulint type_mode,const buf_block_t * block,const ulint heap_no,dict_index_t * index,trx_t * trx,const bool we_own_trx_mutex=false)1381 static void lock_rec_add_to_queue(ulint type_mode, const buf_block_t *block,
1382 const ulint heap_no, dict_index_t *index,
1383 trx_t *trx,
1384 const bool we_own_trx_mutex = false) {
1385 #ifdef UNIV_DEBUG
1386 ut_ad(locksys::owns_page_shard(block->get_page_id()));
1387 ut_ad(we_own_trx_mutex == trx_mutex_own(trx));
1388
1389 ut_ad(index->is_clustered() ||
1390 dict_index_get_online_status(index) != ONLINE_INDEX_CREATION);
1391 switch (type_mode & LOCK_MODE_MASK) {
1392 case LOCK_X:
1393 case LOCK_S:
1394 break;
1395 default:
1396 ut_error;
1397 }
1398
1399 if (!(type_mode & (LOCK_WAIT | LOCK_GAP))) {
1400 lock_mode mode = (type_mode & LOCK_MODE_MASK) == LOCK_S ? LOCK_X : LOCK_S;
1401 const lock_t *other_lock =
1402 lock_rec_other_has_expl_req(mode, block, false, heap_no, trx);
1403 ut_a(!other_lock);
1404 }
1405 #endif /* UNIV_DEBUG */
1406
1407 type_mode |= LOCK_REC;
1408
1409 /* If rec is the supremum record, then we can reset the gap bit, as
1410 all locks on the supremum are automatically of the gap type, and we
1411 try to avoid unnecessary memory consumption of a new record lock
1412 struct for a gap type lock */
1413
1414 if (heap_no == PAGE_HEAP_NO_SUPREMUM) {
1415 ut_ad(!(type_mode & LOCK_REC_NOT_GAP));
1416
1417 /* There should never be LOCK_REC_NOT_GAP on a supremum
1418 record, but let us play safe */
1419
1420 type_mode &= ~(LOCK_GAP | LOCK_REC_NOT_GAP);
1421 }
1422
1423 if (!(type_mode & LOCK_WAIT)) {
1424 hash_table_t *const hash = lock_hash_get(type_mode);
1425 lock_t *const first_lock = lock_rec_get_first_on_page(hash, block);
1426
1427 if (first_lock != nullptr) {
1428 /* Look for a similar record lock on the same page:
1429 if one is found we can just set the bit */
1430
1431 bool found_waiter_before_lock = false;
1432 lock_t *lock = lock_rec_find_similar_on_page(
1433 type_mode, heap_no, first_lock, trx, found_waiter_before_lock);
1434
1435 if (lock != nullptr) {
1436 /* Some B-tree reorganization functions, when moving locks from one
1437 place to another, can leave a lock_t struct with an empty bitmap. They
1438 also clear a LOCK_WAIT flag. This means it's possible that `lock` was
1439 a waiting lock in the past, and if we want to reuse it, we have to move
1440 it to the front of the queue where granted locks reside.
1441 We only NEED to do that if there are any waiting locks in front of it.
1442 We CAN move the lock to front ONLY IF it wasn't part of any queue.
1443 In other words, moving to front is not safe if it has non-empty bitmap.
1444 Moving a lock to the front of its queue can create endless loop in the
1445 caller if it is iterating over the queue.
1446 Fortunately, the only situation in which a GRANTED lock can be after a
1447 WAITING lock in the bucket is if it was WAITING in the past and the only
1448 bit for the heap_no was cleared, so it no longer belongs to any queue.*/
1449 ut_ad(!found_waiter_before_lock ||
1450 (ULINT_UNDEFINED == lock_rec_find_set_bit(lock)));
1451
1452 lock_rec_set_nth_bit(lock, heap_no);
1453 if (found_waiter_before_lock) {
1454 lock_rec_move_granted_to_front(lock, RecID{lock, heap_no});
1455 }
1456 return;
1457 }
1458 }
1459 }
1460
1461 RecLock rec_lock(index, block, heap_no, type_mode);
1462
1463 if (!we_own_trx_mutex) {
1464 trx_mutex_enter(trx);
1465 }
1466 rec_lock.create(trx);
1467 if (!we_own_trx_mutex) {
1468 trx_mutex_exit(trx);
1469 }
1470 }
1471
1472 /** This is a fast routine for locking a record in the most common cases:
1473 there are no explicit locks on the page, or there is just one lock, owned
1474 by this transaction, and of the right type_mode. This is a low-level function
1475 which does NOT look at implicit locks! Checks lock compatibility within
1476 explicit locks. This function sets a normal next-key lock, or in the case of
1477 a page supremum record, a gap type lock.
1478 @return whether the locking succeeded LOCK_REC_SUCCESS,
1479 LOCK_REC_SUCCESS_CREATED, LOCK_REC_FAIL */
1480 UNIV_INLINE
lock_rec_lock_fast(bool impl,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,que_thr_t * thr)1481 lock_rec_req_status lock_rec_lock_fast(
1482 bool impl, /*!< in: if true, no lock is set
1483 if no wait is necessary: we
1484 assume that the caller will
1485 set an implicit lock */
1486 ulint mode, /*!< in: lock mode: LOCK_X or
1487 LOCK_S possibly ORed to either
1488 LOCK_GAP or LOCK_REC_NOT_GAP */
1489 const buf_block_t *block, /*!< in: buffer block containing
1490 the record */
1491 ulint heap_no, /*!< in: heap number of record */
1492 dict_index_t *index, /*!< in: index of record */
1493 que_thr_t *thr) /*!< in: query thread */
1494 {
1495 ut_ad(locksys::owns_page_shard(block->get_page_id()));
1496 ut_ad(!srv_read_only_mode);
1497 ut_ad((LOCK_MODE_MASK & mode) != LOCK_S ||
1498 lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
1499 ut_ad((LOCK_MODE_MASK & mode) != LOCK_X ||
1500 lock_table_has(thr_get_trx(thr), index->table, LOCK_IX) ||
1501 srv_read_only_mode);
1502 ut_ad((LOCK_MODE_MASK & mode) == LOCK_S || (LOCK_MODE_MASK & mode) == LOCK_X);
1503 ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP ||
1504 mode - (LOCK_MODE_MASK & mode) == 0 ||
1505 mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
1506 ut_ad(index->is_clustered() || !dict_index_is_online_ddl(index));
1507 ut_ad(!(mode & LOCK_PREDICATE));
1508 ut_ad(!(mode & LOCK_PRDT_PAGE));
1509 DBUG_EXECUTE_IF("innodb_report_deadlock", return (LOCK_REC_FAIL););
1510
1511 lock_t *lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
1512
1513 trx_t *trx = thr_get_trx(thr);
1514 ut_ad(!trx_mutex_own(trx));
1515
1516 lock_rec_req_status status = LOCK_REC_SUCCESS;
1517
1518 if (lock == nullptr) {
1519 if (!impl) {
1520 RecLock rec_lock(index, block, heap_no, mode);
1521
1522 trx_mutex_enter(trx);
1523 rec_lock.create(trx);
1524 trx_mutex_exit(trx);
1525
1526 status = LOCK_REC_SUCCESS_CREATED;
1527 }
1528 } else {
1529 trx_mutex_enter(trx);
1530
1531 if (lock_rec_get_next_on_page(lock) != nullptr || lock->trx != trx ||
1532 lock->type_mode != (mode | LOCK_REC) ||
1533 lock_rec_get_n_bits(lock) <= heap_no) {
1534 status = LOCK_REC_FAIL;
1535 } else if (!impl) {
1536 /* If the nth bit of the record lock is already set
1537 then we do not set a new lock bit, otherwise we do
1538 set */
1539 if (!lock_rec_get_nth_bit(lock, heap_no)) {
1540 lock_rec_set_nth_bit(lock, heap_no);
1541 status = LOCK_REC_SUCCESS_CREATED;
1542 }
1543 }
1544
1545 trx_mutex_exit(trx);
1546 }
1547 ut_ad(status == LOCK_REC_SUCCESS || status == LOCK_REC_SUCCESS_CREATED ||
1548 status == LOCK_REC_FAIL);
1549 return (status);
1550 }
1551
1552 /** A helper function for lock_rec_lock_slow(), which grants a Next Key Lock
1553 (either LOCK_X or LOCK_S as specified by `mode`) on <`block`,`heap_no`> in the
1554 `index` to the `trx`, assuming that it already has a granted `held_lock`, which
1555 is at least as strong as mode|LOCK_REC_NOT_GAP. It does so by either reusing the
1556 lock if it already covers the gap, or by ensuring a separate GAP Lock, which in
1557 combination with Record Lock satisfies the request.
1558 @param[in] held_lock a lock granted to `trx` which is at least as strong
1559 as mode|LOCK_REC_NOT_GAP
1560 @param[in] mode requested lock mode: LOCK_X or LOCK_S
1561 @param[in] block buffer block containing the record to be locked
1562 @param[in] heap_no heap number of the record to be locked
1563 @param[in] index index of record to be locked
1564 @param[in] trx the transaction requesting the Next Key Lock */
lock_reuse_for_next_key_lock(const lock_t * held_lock,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,trx_t * trx)1565 static void lock_reuse_for_next_key_lock(const lock_t *held_lock, ulint mode,
1566 const buf_block_t *block,
1567 ulint heap_no, dict_index_t *index,
1568 trx_t *trx) {
1569 ut_ad(mode == LOCK_S || mode == LOCK_X);
1570 ut_ad(lock_mode_is_next_key_lock(mode));
1571
1572 if (!held_lock->is_record_not_gap()) {
1573 ut_ad(held_lock->is_next_key_lock());
1574 return;
1575 }
1576
1577 /* We have a Record Lock granted, so we only need a GAP Lock. We assume
1578 that GAP Locks do not conflict with anything. Therefore a GAP Lock
1579 could be granted to us right now if we've requested: */
1580 mode |= LOCK_GAP;
1581 ut_ad(nullptr == lock_rec_other_has_conflicting(mode, block, heap_no, trx));
1582
1583 /* It might be the case we already have one, so we first check that. */
1584 if (lock_rec_has_expl(mode, block, heap_no, trx) == nullptr) {
1585 lock_rec_add_to_queue(LOCK_REC | mode, block, heap_no, index, trx);
1586 }
1587 }
1588 /** This is the general, and slower, routine for locking a record. This is a
1589 low-level function which does NOT look at implicit locks! Checks lock
1590 compatibility within explicit locks. This function sets a normal next-key
1591 lock, or in the case of a page supremum record, a gap type lock.
1592 @param[in] impl if true, no lock is set if no wait is
1593 necessary: we assume that the caller will
1594 set an implicit lock
1595 @param[in] sel_mode select mode: SELECT_ORDINARY,
1596 SELECT_SKIP_LOCKED, or SELECT_NO_WAIT
1597 @param[in] mode lock mode: LOCK_X or LOCK_S possibly ORed to
1598 either LOCK_GAP or LOCK_REC_NOT_GAP
1599 @param[in] block buffer block containing the record
1600 @param[in] heap_no heap number of record
1601 @param[in] index index of record
1602 @param[in,out] thr query thread
1603 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
1604 DB_SKIP_LOCKED, or DB_LOCK_NOWAIT */
lock_rec_lock_slow(bool impl,select_mode sel_mode,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,que_thr_t * thr)1605 static dberr_t lock_rec_lock_slow(bool impl, select_mode sel_mode, ulint mode,
1606 const buf_block_t *block, ulint heap_no,
1607 dict_index_t *index, que_thr_t *thr) {
1608 ut_ad(locksys::owns_page_shard(block->get_page_id()));
1609 ut_ad(!srv_read_only_mode);
1610 ut_ad((LOCK_MODE_MASK & mode) != LOCK_S ||
1611 lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
1612 ut_ad((LOCK_MODE_MASK & mode) != LOCK_X ||
1613 lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
1614 ut_ad((LOCK_MODE_MASK & mode) == LOCK_S || (LOCK_MODE_MASK & mode) == LOCK_X);
1615 ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP ||
1616 mode - (LOCK_MODE_MASK & mode) == LOCK_ORDINARY ||
1617 mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP);
1618 ut_ad(index->is_clustered() || !dict_index_is_online_ddl(index));
1619
1620 DBUG_EXECUTE_IF("innodb_report_deadlock", return (DB_DEADLOCK););
1621
1622 trx_t *trx = thr_get_trx(thr);
1623
1624 ut_ad(sel_mode == SELECT_ORDINARY ||
1625 (sel_mode != SELECT_ORDINARY && !trx_is_high_priority(trx)));
1626
1627 /* A very common type of lock in InnoDB is "Next Key Lock", which is almost
1628 equivalent to two locks: Record Lock and GAP Lock separately.
1629 Thus, in case we need to wait, we check if we already own a Record Lock,
1630 and if we do, we only need the GAP Lock.
1631 We don't do the opposite thing (of checking for GAP Lock, and only requesting
1632 Record Lock), because if Next Key Lock has to wait, then it is because of a
1633 conflict with someone who locked the record, as locks on gaps are compatible
1634 with each other, so even if we have a GAP Lock, narrowing the requested mode
1635 to Record Lock will not make the conflict go away.
1636
1637 In current implementation locks on supremum are treated like GAP Locks,
1638 in particular they never have to wait for anything (unless they are Insert
1639 Intention locks, but we've ruled that out with asserts before getting here),
1640 so there is no gain in using the above "lock splitting" heuristic for locks on
1641 supremum, and reasoning becomes a bit simpler without this special case. */
1642
1643 auto checked_mode =
1644 (heap_no != PAGE_HEAP_NO_SUPREMUM && lock_mode_is_next_key_lock(mode))
1645 ? mode | LOCK_REC_NOT_GAP
1646 : mode;
1647
1648 const auto *held_lock = lock_rec_has_expl(checked_mode, block, heap_no, trx);
1649
1650 if (held_lock != nullptr) {
1651 if (checked_mode == mode) {
1652 /* The trx already has a strong enough lock on rec: do nothing */
1653 return (DB_SUCCESS);
1654 }
1655
1656 /* As check_mode != mode, the mode is Next Key Lock, which can not be
1657 emulated by implicit lock (which are LOCK_REC_NOT_GAP only). */
1658 ut_ad(!impl);
1659
1660 lock_reuse_for_next_key_lock(held_lock, mode, block, heap_no, index, trx);
1661 return (DB_SUCCESS);
1662 }
1663
1664 const lock_t *wait_for =
1665 lock_rec_other_has_conflicting(mode, block, heap_no, trx);
1666
1667 if (wait_for != nullptr) {
1668 switch (sel_mode) {
1669 case SELECT_SKIP_LOCKED:
1670 return (DB_SKIP_LOCKED);
1671 case SELECT_NOWAIT:
1672 return (DB_LOCK_NOWAIT);
1673 case SELECT_ORDINARY:
1674 /* If another transaction has a non-gap conflicting request in the
1675 queue, as this transaction does not have a lock strong enough already
1676 granted on the record, we may have to wait. */
1677
1678 RecLock rec_lock(thr, index, block, heap_no, mode);
1679
1680 trx_mutex_enter(trx);
1681
1682 dberr_t err = rec_lock.add_to_waitq(wait_for);
1683
1684 trx_mutex_exit(trx);
1685
1686 ut_ad(err == DB_SUCCESS_LOCKED_REC || err == DB_LOCK_WAIT ||
1687 err == DB_DEADLOCK);
1688 return (err);
1689 }
1690 }
1691 if (!impl) {
1692 /* Set the requested lock on the record. */
1693
1694 lock_rec_add_to_queue(LOCK_REC | mode, block, heap_no, index, trx);
1695
1696 return (DB_SUCCESS_LOCKED_REC);
1697 }
1698 return (DB_SUCCESS);
1699 }
1700
1701 /** Tries to lock the specified record in the mode requested. If not immediately
1702 possible, enqueues a waiting lock request. This is a low-level function
1703 which does NOT look at implicit locks! Checks lock compatibility within
1704 explicit locks. This function sets a normal next-key lock, or in the case
1705 of a page supremum record, a gap type lock.
1706 @param[in] impl if true, no lock is set if no wait is
1707 necessary: we assume that the caller will
1708 set an implicit lock
1709 @param[in] sel_mode select mode: SELECT_ORDINARY,
1710 SELECT_SKIP_LOCKED, or SELECT_NO_WAIT
1711 @param[in] mode lock mode: LOCK_X or LOCK_S possibly ORed to
1712 either LOCK_GAP or LOCK_REC_NOT_GAP
1713 @param[in] block buffer block containing the record
1714 @param[in] heap_no heap number of record
1715 @param[in] index index of record
1716 @param[in,out] thr query thread
1717 @return DB_SUCCESS, DB_SUCCESS_LOCKED_REC, DB_LOCK_WAIT, DB_DEADLOCK,
1718 DB_SKIP_LOCKED, or DB_LOCK_NOWAIT */
lock_rec_lock(bool impl,select_mode sel_mode,ulint mode,const buf_block_t * block,ulint heap_no,dict_index_t * index,que_thr_t * thr)1719 static dberr_t lock_rec_lock(bool impl, select_mode sel_mode, ulint mode,
1720 const buf_block_t *block, ulint heap_no,
1721 dict_index_t *index, que_thr_t *thr) {
1722 ut_ad(locksys::owns_page_shard(block->get_page_id()));
1723 ut_ad(!srv_read_only_mode);
1724 ut_ad((LOCK_MODE_MASK & mode) != LOCK_S ||
1725 lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
1726 ut_ad((LOCK_MODE_MASK & mode) != LOCK_X ||
1727 lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
1728 ut_ad((LOCK_MODE_MASK & mode) == LOCK_S || (LOCK_MODE_MASK & mode) == LOCK_X);
1729 ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP ||
1730 mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP ||
1731 mode - (LOCK_MODE_MASK & mode) == 0);
1732 ut_ad(index->is_clustered() || !dict_index_is_online_ddl(index));
1733 /* Implicit locks are equivalent to LOCK_X|LOCK_REC_NOT_GAP, so we can omit
1734 creation of explicit lock only if the requested mode was LOCK_REC_NOT_GAP */
1735 ut_ad(!impl || ((mode & LOCK_REC_NOT_GAP) == LOCK_REC_NOT_GAP));
1736 /* We try a simplified and faster subroutine for the most
1737 common cases */
1738 switch (lock_rec_lock_fast(impl, mode, block, heap_no, index, thr)) {
1739 case LOCK_REC_SUCCESS:
1740 return (DB_SUCCESS);
1741 case LOCK_REC_SUCCESS_CREATED:
1742 return (DB_SUCCESS_LOCKED_REC);
1743 case LOCK_REC_FAIL:
1744 return (
1745 lock_rec_lock_slow(impl, sel_mode, mode, block, heap_no, index, thr));
1746 default:
1747 ut_error;
1748 }
1749 }
1750
1751 /** Checks if a waiting record lock request still has to wait in a queue.
1752 @param[in] wait_lock Waiting record lock
1753 @param[in] blocking_trx If not nullptr, it restricts the search to only the
1754 locks held by the blocking_trx, which is useful in
1755 case when there might be multiple reasons for waiting
1756 in queue, but we need to report the specific one.
1757 Useful when reporting a deadlock cycle. (optional)
1758 @return The conflicting lock which is the reason wait_lock has to wait
1759 or nullptr if it can be granted now */
lock_rec_has_to_wait_in_queue(const lock_t * wait_lock,const trx_t * blocking_trx=nullptr)1760 static const lock_t *lock_rec_has_to_wait_in_queue(
1761 const lock_t *wait_lock, const trx_t *blocking_trx = nullptr) {
1762 const lock_t *lock;
1763 ulint heap_no;
1764 ulint bit_mask;
1765 ulint bit_offset;
1766 hash_table_t *hash;
1767
1768 ut_ad(lock_get_type_low(wait_lock) == LOCK_REC);
1769 const auto page_id = wait_lock->rec_lock.page_id;
1770 ut_ad(locksys::owns_page_shard(page_id));
1771 ut_ad(lock_get_wait(wait_lock));
1772
1773 heap_no = lock_rec_find_set_bit(wait_lock);
1774
1775 bit_offset = heap_no / 8;
1776 bit_mask = static_cast<ulint>(1) << (heap_no % 8);
1777
1778 hash = lock_hash_get(wait_lock->type_mode);
1779
1780 for (lock = lock_rec_get_first_on_page_addr(hash, page_id); lock != wait_lock;
1781 lock = lock_rec_get_next_on_page_const(lock)) {
1782 const byte *p = (const byte *)&lock[1];
1783
1784 if ((blocking_trx == nullptr || blocking_trx == lock->trx) &&
1785 heap_no < lock_rec_get_n_bits(lock) && (p[bit_offset] & bit_mask) &&
1786 lock_has_to_wait(wait_lock, lock)) {
1787 return (lock);
1788 }
1789 }
1790
1791 return (nullptr);
1792 }
1793
1794 /** Grants a lock to a waiting lock request and releases the waiting
1795 transaction. The caller must hold lock_sys latch for the shard containing the
1796 lock, but not the lock->trx->mutex.
1797 @param[in,out] lock waiting lock request
1798 */
lock_grant(lock_t * lock)1799 static void lock_grant(lock_t *lock) {
1800 ut_ad(locksys::owns_lock_shard(lock));
1801 ut_ad(!trx_mutex_own(lock->trx));
1802
1803 trx_mutex_enter(lock->trx);
1804
1805 if (lock_get_mode(lock) == LOCK_AUTO_INC) {
1806 dict_table_t *table = lock->tab_lock.table;
1807
1808 if (table->autoinc_trx == lock->trx) {
1809 ib::error(ER_IB_MSG_637) << "Transaction already had an"
1810 << " AUTO-INC lock!";
1811 } else {
1812 ut_ad(table->autoinc_trx == nullptr);
1813 table->autoinc_trx = lock->trx;
1814
1815 ib_vector_push(lock->trx->lock.autoinc_locks, &lock);
1816 }
1817 }
1818
1819 DBUG_PRINT("ib_lock", ("wait for trx " TRX_ID_FMT " ends",
1820 trx_get_id_for_print(lock->trx)));
1821
1822 lock_reset_wait_and_release_thread_if_suspended(lock);
1823 ut_ad(trx_mutex_own(lock->trx));
1824
1825 trx_mutex_exit(lock->trx);
1826 }
1827
lock_make_trx_hit_list(trx_t * hp_trx,hit_list_t & hit_list)1828 void lock_make_trx_hit_list(trx_t *hp_trx, hit_list_t &hit_list) {
1829 trx_mutex_enter(hp_trx);
1830 const trx_id_t hp_trx_id = hp_trx->id;
1831 ut_ad(trx_can_be_handled_by_current_thread(hp_trx));
1832 ut_ad(trx_is_high_priority(hp_trx));
1833 /* To avoid slow procedure involving global exclusive latch below, we first
1834 check if this transaction is waiting for a lock at all. It's unsafe to read
1835 hp->lock.wait_lock without latching whole lock_sys as it might temporarily
1836 change to NULL during a concurrent B-tree reorganization, even though the
1837 trx actually is still waiting.
1838 TBD: Is it safe to use hp_trx->lock.que_state == TRX_QUE_LOCK_WAIT given that
1839 que_state is not atomic, and writes to it happen without trx->mutex ? */
1840 const bool is_waiting = (hp_trx->lock.blocking_trx.load() != nullptr);
1841 trx_mutex_exit(hp_trx);
1842 if (!is_waiting) {
1843 return;
1844 }
1845
1846 /* Current implementation of lock_make_trx_hit_list requires latching whole
1847 lock_sys for following reasons:
1848 1. it may call lock_cancel_waiting_and_release on a lock from completely
1849 different shard of lock_sys than hp_trx->lock.wait_lock. Trying to latch
1850 this other shard might create a deadlock cycle if it violates ordering of
1851 shard latches (and there is 50% chance it will violate it). Moreover the
1852 lock_cancel_waiting_and_release() requires an exclusive latch to avoid
1853 deadlocks among trx->mutex-es, and trx->lock.wait_lock might be a table lock,
1854 in which case exclusive latch is also needed to traverse table locks.
1855 2. it may call trx_mutex_enter on a transaction which is waiting for a
1856 lock, which violates one of assumptions used in the proof that a deadlock due
1857 to acquiring trx->mutex-es is impossible
1858 3. it attempts to read hp_trx->lock.wait_lock which might be modified by a
1859 thread during B-tree reorganization when moving locks between queues
1860 4. it attempts to operate on trx->lock.wait_lock of other transactions */
1861 locksys::Global_exclusive_latch_guard guard{};
1862
1863 /* Check again */
1864 const lock_t *lock = hp_trx->lock.wait_lock;
1865 if (lock == nullptr || !lock->is_record_lock()) {
1866 return;
1867 }
1868 RecID rec_id{lock, lock_rec_find_set_bit(lock)};
1869 Lock_iter::for_each(
1870 rec_id,
1871 [&](lock_t *next) {
1872 trx_t *trx = next->trx;
1873 /* Check only for conflicting, granted locks on the current
1874 row. Currently, we don't rollback read only transactions,
1875 transactions owned by background threads. */
1876 if (trx == hp_trx || next->is_waiting() || trx->read_only ||
1877 trx->mysql_thd == nullptr || !lock_has_to_wait(lock, next)) {
1878 return true;
1879 }
1880
1881 trx_mutex_enter(trx);
1882
1883 /* Skip high priority transactions, if already marked for
1884 abort by some other transaction or if ASYNC rollback is
1885 disabled. A transaction must complete kill/abort of a
1886 victim transaction once marked and added to hit list. */
1887 if (trx_is_high_priority(trx) ||
1888 (trx->in_innodb & TRX_FORCE_ROLLBACK) != 0 ||
1889 (trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC) != 0 ||
1890 (trx->in_innodb & TRX_FORCE_ROLLBACK_DISABLE) != 0 || trx->abort) {
1891 trx_mutex_exit(trx);
1892
1893 return true;
1894 }
1895
1896 /* If the transaction is waiting on some other resource then
1897 wake it up with DEAD_LOCK error so that it can rollback. */
1898 if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
1899 /* Assert that it is not waiting for current record. */
1900 ut_ad(trx->lock.wait_lock != next);
1901 #ifdef UNIV_DEBUG
1902 ib::info(ER_IB_MSG_639)
1903 << "High Priority Transaction (ID): " << lock->trx->id
1904 << " waking up blocking"
1905 << " transaction (ID): " << trx->id;
1906 #endif /* UNIV_DEBUG */
1907 trx->lock.was_chosen_as_deadlock_victim = true;
1908
1909 lock_cancel_waiting_and_release(trx->lock.wait_lock);
1910
1911 trx_mutex_exit(trx);
1912 return true;
1913 }
1914
1915 /* Mark for ASYNC Rollback and add to hit list. */
1916 lock_mark_trx_for_rollback(hit_list, hp_trx_id, trx);
1917
1918 trx_mutex_exit(trx);
1919 return true;
1920 },
1921 lock->hash_table());
1922 }
1923
1924 /** Cancels a waiting record lock request and releases the waiting transaction
1925 that requested it. NOTE: does NOT check if waiting lock requests behind this
1926 one can now be granted! */
lock_rec_cancel(lock_t * lock)1927 static void lock_rec_cancel(
1928 lock_t *lock) /*!< in: waiting record lock request */
1929 {
1930 ut_ad(lock_get_type_low(lock) == LOCK_REC);
1931 ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
1932
1933 /* Reset the bit (there can be only one set bit) in the lock bitmap */
1934 lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
1935
1936 trx_mutex_enter(lock->trx);
1937
1938 lock_reset_wait_and_release_thread_if_suspended(lock);
1939
1940 trx_mutex_exit(lock->trx);
1941 }
1942
1943 /** Given a waiting_lock, and blocking_lock which is the reason it has to wait,
1944 makes sure that the (only) edge in the wait-for graph outgoing from the
1945 waiting_lock->trx points to blocking_lock->trx
1946 @param[in] waiting_lock A lock waiting in queue, blocked by blocking_lock
1947 @param[in] blocking_lock A lock which is a reason the waiting_lock has to
1948 wait */
lock_update_wait_for_edge(const lock_t * waiting_lock,const lock_t * blocking_lock)1949 static void lock_update_wait_for_edge(const lock_t *waiting_lock,
1950 const lock_t *blocking_lock) {
1951 ut_ad(locksys::owns_lock_shard(waiting_lock));
1952 ut_ad(locksys::owns_lock_shard(blocking_lock));
1953 ut_ad(waiting_lock->is_waiting());
1954 ut_ad(lock_has_to_wait(waiting_lock, blocking_lock));
1955 /* Still needs to wait, but perhaps the reason has changed */
1956 if (waiting_lock->trx->lock.blocking_trx.load() != blocking_lock->trx) {
1957 waiting_lock->trx->lock.blocking_trx.store(blocking_lock->trx);
1958 /* We call lock_wait_request_check_for_cycles() because the outgoing edge of
1959 wait_lock->trx has changed it's endpoint and we need to analyze the
1960 wait-for-graph again. */
1961 lock_wait_request_check_for_cycles();
1962 }
1963 }
1964
1965 /** Checks if a waiting record lock request still has to wait for granted locks.
1966 @param[in] wait_lock Waiting record lock
1967 @param[in] granted Granted record locks
1968 @param[in] new_granted_index Start of new granted locks
1969 @return The conflicting lock which is the reason wait_lock has to wait
1970 or nullptr if it can be granted now */
1971 template <typename Container>
lock_rec_has_to_wait_for_granted(const typename Container::value_type & wait_lock,const Container & granted,const size_t new_granted_index)1972 static const lock_t *lock_rec_has_to_wait_for_granted(
1973 const typename Container::value_type &wait_lock, const Container &granted,
1974 const size_t new_granted_index)
1975
1976 {
1977 ut_ad(locksys::owns_page_shard(wait_lock->rec_lock.page_id));
1978 ut_ad(wait_lock->is_record_lock());
1979
1980 ut_ad(new_granted_index <= granted.size());
1981
1982 /* We iterate over granted locks in reverse order.
1983 Conceptually this corresponds to chronological order.
1984 This way, we pick as blocking_trx the oldest reason for waiting we haven't
1985 yet analyzed in deadlock checker. Our hope is that eventually (perhaps after
1986 several such updates) we will set blocking_trx to the real cause of the
1987 deadlock, which is the next node on the deadlock cycle. */
1988 for (size_t i = new_granted_index; i--;) {
1989 const auto granted_lock = granted[i];
1990 if (lock_has_to_wait(wait_lock, granted_lock)) {
1991 return (granted_lock);
1992 }
1993 }
1994
1995 for (size_t i = new_granted_index; i < granted.size(); ++i) {
1996 const auto granted_lock = granted[i];
1997 ut_ad(granted_lock->trx->error_state != DB_DEADLOCK);
1998 ut_ad(!granted_lock->trx->lock.was_chosen_as_deadlock_victim);
1999
2000 if (lock_has_to_wait(wait_lock, granted_lock)) {
2001 return (granted_lock);
2002 }
2003 }
2004
2005 return (nullptr);
2006 }
2007
2008 /** Grant a lock to waiting transactions. This function scans the queue of locks
2009 in which in_lock resides (or resided) paying attention only to locks on
2010 heap_no-th bit. For each waiting lock which was blocked by in_lock->trx it
2011 checks if it can be granted now. It iterates on waiting locks in order favoring
2012 high-priority transactions and then transactions of high
2013 trx->lock.schedule_weight.
2014 @param[in] in_lock Lock which was released, or
2015 partially released by modifying its type/mode
2016 (see lock_trx_release_read_locks) or
2017 reseting heap_no-th bit in the bitmap
2018 (see lock_rec_release)
2019 @param[in] heap_no Heap number within the page on which the
2020 lock was (or still is) held */
lock_rec_grant_by_heap_no(lock_t * in_lock,ulint heap_no)2021 static void lock_rec_grant_by_heap_no(lock_t *in_lock, ulint heap_no) {
2022 const auto hash_table = in_lock->hash_table();
2023
2024 ut_ad(in_lock->is_record_lock());
2025 ut_ad(locksys::owns_page_shard(in_lock->rec_lock.page_id));
2026
2027 using LockDescriptorEx = std::pair<trx_schedule_weight_t, lock_t *>;
2028 /* Preallocate for 4 lists with 32 locks. */
2029 std::unique_ptr<mem_heap_t, decltype(&mem_heap_free)> heap(
2030 mem_heap_create((sizeof(lock_t *) * 3 + sizeof(LockDescriptorEx)) * 32),
2031 mem_heap_free);
2032
2033 RecID rec_id{in_lock, heap_no};
2034 Locks<lock_t *> low_priority_light{heap.get()};
2035 Locks<lock_t *> waiting{heap.get()};
2036 Locks<lock_t *> granted{heap.get()};
2037 Locks<LockDescriptorEx> low_priority_heavier{heap.get()};
2038
2039 const auto in_trx = in_lock->trx;
2040 #ifdef UNIV_DEBUG
2041 bool seen_waiting_lock = false;
2042 #endif
2043 Lock_iter::for_each(
2044 rec_id,
2045 [&](lock_t *lock) {
2046 /* Split the relevant locks in the queue into:
2047 - granted = granted locks
2048 - waiting = waiting locks of high priority transactions
2049 - low_priority_heavier = waiting locks of low priority, but heavy weight
2050 - low_priority_light = waiting locks of low priority and light weight
2051 */
2052 if (!lock->is_waiting()) {
2053 /* Granted locks should be before waiting locks. */
2054 ut_ad(!seen_waiting_lock);
2055 granted.push_back(lock);
2056 return (true);
2057 }
2058 ut_d(seen_waiting_lock = true);
2059 const auto trx = lock->trx;
2060 if (trx->error_state == DB_DEADLOCK ||
2061 trx->lock.was_chosen_as_deadlock_victim) {
2062 return (true);
2063 }
2064 /* We read blocking_trx while holding this lock_sys queue latched, and
2065 each write to blocking_trx is done while holding the latch. So, even
2066 though we use memory_order_relaxed we will see modifications performed
2067 before we acquired the latch. */
2068 const auto blocking_trx =
2069 trx->lock.blocking_trx.load(std::memory_order_relaxed);
2070 /* No one should be WAITING without good reason! */
2071 ut_ad(blocking_trx);
2072 /* We will only consider granting the `lock`, if we are the reason it
2073 was waiting. */
2074 if (blocking_trx != in_trx) {
2075 return (true);
2076 }
2077 if (trx_is_high_priority(trx)) {
2078 waiting.push_back(lock);
2079 return (true);
2080 }
2081 /* The values of schedule_weight are read with memory_order_relaxed as
2082 we do not care neither about having the most recent value, nor about any
2083 relative order between this load and other operations.
2084 As std::sort requires the order to be consistent during execution we
2085 have to take a snapshot of all schedule_weight atomics, so they don't
2086 change during call to stable_sort in a way which causes the algorithm to
2087 crash. */
2088 const auto schedule_weight =
2089 trx->lock.schedule_weight.load(std::memory_order_relaxed);
2090 if (schedule_weight <= 1) {
2091 low_priority_light.push_back(lock);
2092 return (true);
2093 }
2094 low_priority_heavier.push_back(LockDescriptorEx{schedule_weight, lock});
2095
2096 return (true);
2097 },
2098 hash_table);
2099
2100 if (waiting.empty() && low_priority_light.empty() &&
2101 low_priority_heavier.empty()) {
2102 /* Nothing to grant. */
2103 return;
2104 }
2105 /* We want high schedule weight to be in front, and break ties by position */
2106 std::stable_sort(low_priority_heavier.begin(), low_priority_heavier.end(),
2107 [](const LockDescriptorEx &a, const LockDescriptorEx &b) {
2108 return (a.first > b.first);
2109 });
2110 for (const auto &descriptor : low_priority_heavier) {
2111 waiting.push_back(descriptor.second);
2112 }
2113 waiting.insert(waiting.end(), low_priority_light.begin(),
2114 low_priority_light.end());
2115
2116 /* New granted locks will be added from this index. */
2117 const auto new_granted_index = granted.size();
2118
2119 granted.reserve(granted.size() + waiting.size());
2120
2121 for (lock_t *wait_lock : waiting) {
2122 /* Check if the transactions in the waiting queue have
2123 to wait for locks granted above. If they don't have to
2124 wait then grant them the locks and add them to the granted
2125 queue. */
2126
2127 /* We don't expect to be a waiting trx, and we can't grant to ourselves as
2128 that would require entering trx->mutex while holding in_trx->mutex. */
2129 ut_ad(wait_lock->trx != in_trx);
2130
2131 const lock_t *blocking_lock =
2132 lock_rec_has_to_wait_for_granted(wait_lock, granted, new_granted_index);
2133 if (blocking_lock == nullptr) {
2134 lock_grant(wait_lock);
2135
2136 lock_rec_move_granted_to_front(wait_lock, rec_id);
2137
2138 granted.push_back(wait_lock);
2139 } else {
2140 lock_update_wait_for_edge(wait_lock, blocking_lock);
2141 }
2142 }
2143 }
2144
2145 /* Forward declaration to minimize the diff */
2146 static const lock_t *lock_has_to_wait_in_queue(const lock_t *wait_lock,
2147 const trx_t *blocking_trx);
2148
2149 /** Given a lock, which was found in waiting queue, checks if it still has to
2150 wait in queue, and either grants it, or makes sure that the reason it has to
2151 wait is reflected in the wait-for graph.
2152 @param[in] lock A lock in WAITING state, which perhaps can be granted now */
lock_grant_or_update_wait_for_edge(lock_t * lock)2153 static void lock_grant_or_update_wait_for_edge(lock_t *lock) {
2154 ut_ad(lock->is_waiting());
2155 const lock_t *blocking_lock = lock_has_to_wait_in_queue(lock, nullptr);
2156 if (blocking_lock == nullptr) {
2157 /* Grant the lock */
2158 lock_grant(lock);
2159 } else {
2160 ut_ad(lock->trx != blocking_lock->trx);
2161 lock_update_wait_for_edge(lock, blocking_lock);
2162 }
2163 }
2164
2165 /** Given a lock, and a transaction which is releasing another lock from the
2166 same queue, makes sure that if the lock was waiting for this transaction, then
2167 it will either be granted, or another reason for waiting is reflected in the
2168 wait-for graph. */
lock_grant_or_update_wait_for_edge_if_waiting(lock_t * lock,const trx_t * releasing_trx)2169 static void lock_grant_or_update_wait_for_edge_if_waiting(
2170 lock_t *lock, const trx_t *releasing_trx) {
2171 if (lock->is_waiting() && lock->trx->lock.blocking_trx == releasing_trx) {
2172 ut_ad(lock->trx != releasing_trx);
2173 lock_grant_or_update_wait_for_edge(lock);
2174 }
2175 }
2176
2177 /** Grant lock to waiting requests that no longer conflicts.
2178 The in_lock might be modified before call to this function by clearing some flag
2179 (see for example lock_trx_release_read_locks). It also might already be removed
2180 from the hash bucket (a.k.a. waiting queue) or still reside in it. However the
2181 content of bitmap should not be changed prior to calling this function, as the
2182 bitmap will be inspected to see which heap_no at all were blocked by this
2183 in_lock, and only locks waiting for those heap_no's will be checked.
2184 @param[in,out] in_lock record lock object: grant all non-conflicting
2185 locks waiting behind this lock object */
lock_rec_grant(lock_t * in_lock)2186 static void lock_rec_grant(lock_t *in_lock) {
2187 const auto page_id = in_lock->rec_lock.page_id;
2188 auto lock_hash = in_lock->hash_table();
2189
2190 /* In some scenarios, in particular in replication appliers, it is often the
2191 case, that there are no WAITING locks, and in such situation iterating over
2192 all bits, and calling lock_rec_grant_by_heap_no() slows down the execution
2193 noticeably. (I guess that checking bits is not the costly part, but rather the
2194 allocation of vectors inside lock_rec_grant_by_heap_no). Therefore we first
2195 check if there is any lock which is waiting at all.
2196 Note: This condition could be further narrowed to check if the `lock` is
2197 waiting for the `in_lock` and/or `lock->trx` is blocked by the `in_lock->trx`,
2198 and we could optimize lock_rec_grant_by_heap_no() to allocate vectors only if
2199 there are at least two waiters to arbitrate among, but in practice the current
2200 simple heuristic is good enough. */
2201 bool found_waiter = false;
2202 for (auto lock = lock_rec_get_first_on_page_addr(lock_hash, page_id);
2203 lock != nullptr; lock = lock_rec_get_next_on_page(lock)) {
2204 if (lock->is_waiting()) {
2205 found_waiter = true;
2206 break;
2207 }
2208 }
2209 if (found_waiter) {
2210 mon_type_t grant_attempts = 0;
2211 for (ulint heap_no = 0; heap_no < lock_rec_get_n_bits(in_lock); ++heap_no) {
2212 if (lock_rec_get_nth_bit(in_lock, heap_no)) {
2213 lock_rec_grant_by_heap_no(in_lock, heap_no);
2214 ++grant_attempts;
2215 }
2216 }
2217 MONITOR_INC_VALUE(MONITOR_RECLOCK_GRANT_ATTEMPTS, grant_attempts);
2218 }
2219 MONITOR_INC(MONITOR_RECLOCK_RELEASE_ATTEMPTS);
2220 }
2221
2222 /** Removes a record lock request, waiting or granted, from the queue and
2223 grants locks to other transactions in the queue if they now are entitled
2224 to a lock. NOTE: all record locks contained in in_lock are removed.
2225 @param[in,out] in_lock record lock object: all record locks which
2226 are contained in this lock object are removed;
2227 transactions waiting behind will get their
2228 lock requests granted, if they are now
2229 qualified to it */
lock_rec_dequeue_from_page(lock_t * in_lock)2230 static void lock_rec_dequeue_from_page(lock_t *in_lock) {
2231 lock_rec_discard(in_lock);
2232 lock_rec_grant(in_lock);
2233 }
2234
2235 /** Removes a record lock request, waiting or granted, from the queue.
2236 @param[in] in_lock record lock object: all record locks
2237 which are contained in this lock object
2238 are removed */
lock_rec_discard(lock_t * in_lock)2239 void lock_rec_discard(lock_t *in_lock) {
2240 ut_ad(lock_get_type_low(in_lock) == LOCK_REC);
2241 const auto page_id = in_lock->rec_lock.page_id;
2242 ut_ad(locksys::owns_page_shard(page_id));
2243
2244 ut_ad(in_lock->index->table->n_rec_locks.load() > 0);
2245 in_lock->index->table->n_rec_locks.fetch_sub(1, std::memory_order_relaxed);
2246
2247 /* We want the state of lock queue and trx_locks list to be synchronized
2248 atomically from the point of view of people using trx->mutex, so we perform
2249 HASH_DELETE and UT_LIST_REMOVE while holding trx->mutex. */
2250
2251 ut_ad(trx_mutex_own(in_lock->trx));
2252
2253 locksys::remove_from_trx_locks(in_lock);
2254
2255 HASH_DELETE(lock_t, hash, lock_hash_get(in_lock->type_mode),
2256 lock_rec_fold(page_id), in_lock);
2257
2258 MONITOR_INC(MONITOR_RECLOCK_REMOVED);
2259 MONITOR_DEC(MONITOR_NUM_RECLOCK);
2260 }
2261
2262 /** Removes record lock objects set on an index page which is discarded. This
2263 function does not move locks, or check for waiting locks, therefore the
2264 lock bitmaps must already be reset when this function is called. */
lock_rec_free_all_from_discard_page_low(page_id_t page_id,hash_table_t * lock_hash)2265 static void lock_rec_free_all_from_discard_page_low(page_id_t page_id,
2266 hash_table_t *lock_hash) {
2267 lock_t *lock;
2268 lock_t *next_lock;
2269
2270 lock = lock_rec_get_first_on_page_addr(lock_hash, page_id);
2271
2272 while (lock != nullptr) {
2273 ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
2274 ut_ad(!lock_get_wait(lock));
2275
2276 next_lock = lock_rec_get_next_on_page(lock);
2277
2278 trx_t *trx = lock->trx;
2279 trx_mutex_enter(trx);
2280 lock_rec_discard(lock);
2281 trx_mutex_exit(trx);
2282
2283 lock = next_lock;
2284 }
2285 }
2286
2287 /** Removes record lock objects set on an index page which is discarded. This
2288 function does not move locks, or check for waiting locks, therefore the
2289 lock bitmaps must already be reset when this function is called. */
lock_rec_free_all_from_discard_page(const buf_block_t * block)2290 void lock_rec_free_all_from_discard_page(
2291 const buf_block_t *block) /*!< in: page to be discarded */
2292 {
2293 const auto page_id = block->get_page_id();
2294 ut_ad(locksys::owns_page_shard(page_id));
2295
2296 lock_rec_free_all_from_discard_page_low(page_id, lock_sys->rec_hash);
2297 lock_rec_free_all_from_discard_page_low(page_id, lock_sys->prdt_hash);
2298 lock_rec_free_all_from_discard_page_low(page_id, lock_sys->prdt_page_hash);
2299 }
2300
2301 /*============= RECORD LOCK MOVING AND INHERITING ===================*/
2302
2303 /** Resets the lock bits for a single record. Releases transactions waiting for
2304 lock requests here. */
lock_rec_reset_and_release_wait_low(hash_table_t * hash,const buf_block_t * block,ulint heap_no)2305 static void lock_rec_reset_and_release_wait_low(
2306 hash_table_t *hash, /*!< in: hash table */
2307 const buf_block_t *block, /*!< in: buffer block containing
2308 the record */
2309 ulint heap_no) /*!< in: heap number of record */
2310 {
2311 lock_t *lock;
2312
2313 ut_ad(locksys::owns_page_shard(block->get_page_id()));
2314
2315 for (lock = lock_rec_get_first(hash, block, heap_no); lock != nullptr;
2316 lock = lock_rec_get_next(heap_no, lock)) {
2317 if (lock_get_wait(lock)) {
2318 lock_rec_cancel(lock);
2319 } else {
2320 lock_rec_reset_nth_bit(lock, heap_no);
2321 }
2322 }
2323 }
2324
2325 /** Resets the lock bits for a single record. Releases transactions waiting for
2326 lock requests here. */
lock_rec_reset_and_release_wait(const buf_block_t * block,ulint heap_no)2327 static void lock_rec_reset_and_release_wait(
2328 const buf_block_t *block, /*!< in: buffer block containing
2329 the record */
2330 ulint heap_no) /*!< in: heap number of record */
2331 {
2332 lock_rec_reset_and_release_wait_low(lock_sys->rec_hash, block, heap_no);
2333
2334 lock_rec_reset_and_release_wait_low(lock_sys->prdt_hash, block,
2335 PAGE_HEAP_NO_INFIMUM);
2336 lock_rec_reset_and_release_wait_low(lock_sys->prdt_page_hash, block,
2337 PAGE_HEAP_NO_INFIMUM);
2338 }
2339
lock_on_statement_end(trx_t * trx)2340 void lock_on_statement_end(trx_t *trx) { trx->lock.inherit_all.store(false); }
2341
2342 /* Used to store information that `thr` requested a lock asking for protection
2343 at least till the end of the current statement which requires it to be inherited
2344 as gap locks even in READ COMMITTED isolation level.
2345 @param[in] thr the requesting thread */
2346 UNIV_INLINE
lock_protect_locks_till_statement_end(que_thr_t * thr)2347 void lock_protect_locks_till_statement_end(que_thr_t *thr) {
2348 thr_get_trx(thr)->lock.inherit_all.store(true);
2349 }
2350
2351 /** Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type)
2352 of another record as gap type locks, but does not reset the lock bits of
2353 the other record. Also waiting lock requests on rec are inherited as
2354 GRANTED gap locks. */
lock_rec_inherit_to_gap(const buf_block_t * heir_block,const buf_block_t * block,ulint heir_heap_no,ulint heap_no)2355 static void lock_rec_inherit_to_gap(
2356 const buf_block_t *heir_block, /*!< in: block containing the
2357 record which inherits */
2358 const buf_block_t *block, /*!< in: block containing the
2359 record from which inherited;
2360 does NOT reset the locks on
2361 this record */
2362 ulint heir_heap_no, /*!< in: heap_no of the
2363 inheriting record */
2364 ulint heap_no) /*!< in: heap_no of the
2365 donating record */
2366 {
2367 lock_t *lock;
2368
2369 ut_ad(locksys::owns_page_shard(heir_block->get_page_id()));
2370 ut_ad(locksys::owns_page_shard(block->get_page_id()));
2371
2372 /* If session is using READ COMMITTED or READ UNCOMMITTED isolation
2373 level, we do not want locks set by an UPDATE or a DELETE to be
2374 inherited as gap type locks. But we DO want S-locks/X-locks(taken for
2375 replace) set by a consistency constraint to be inherited also then. */
2376
2377 /* We also dont inherit these locks as gap type locks for DD tables
2378 because the serialization is guaranteed by MDL on DD tables. */
2379
2380 /* Constraint checks place LOCK_S or (in case of INSERT ... ON DUPLICATE
2381 UPDATE... or REPLACE INTO..) LOCK_X on records.
2382 If such a record is delete-marked, it may then become purged, and
2383 lock_rec_inheirt_to_gap will be called to decide the fate of each lock on it:
2384 either it will be inherited as gap lock, or discarded.
2385 In READ COMMITTED and less restricitve isolation levels we generaly avoid gap
2386 locks, but we make an exception for precisely this situation: we want to
2387 inherit locks created for constraint checks.
2388 More precisely we need to keep inheriting them only for the duration of the
2389 query which has requested them, as such inserts have two phases : first they
2390 check for constraints, then they do actuall row insert, and they trust that
2391 the locks set in the first phase will survive till the second phase.
2392 It is not easy to tell if a particular lock was created for constraint check
2393 or not, because we do not store this bit of information on it.
2394 What we do, is we use a heuristic: whenever a trx requests a lock with
2395 lock_duration_t::AT_LEAST_STATEMENT we set trx->lock.inherit_all, meaning that
2396 locks of this trx need to be inherited.
2397 And we clear trx->lock.inherit_all on statement end. */
2398
2399 for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
2400 lock != nullptr; lock = lock_rec_get_next(heap_no, lock)) {
2401 /* Skip inheriting lock if set */
2402 if (lock->trx->skip_lock_inheritance) {
2403 continue;
2404 }
2405
2406 if (!lock_rec_get_insert_intention(lock) &&
2407 !lock->index->table->skip_gap_locks() &&
2408 (!lock->trx->skip_gap_locks() || lock->trx->lock.inherit_all.load())) {
2409 lock_rec_add_to_queue(LOCK_REC | LOCK_GAP | lock_get_mode(lock),
2410 heir_block, heir_heap_no, lock->index, lock->trx);
2411 }
2412 }
2413 }
2414
2415 /** Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type)
2416 of another record as gap type locks, but does not reset the lock bits of the
2417 other record. Also waiting lock requests are inherited as GRANTED gap locks. */
lock_rec_inherit_to_gap_if_gap_lock(const buf_block_t * block,ulint heir_heap_no,ulint heap_no)2418 static void lock_rec_inherit_to_gap_if_gap_lock(
2419 const buf_block_t *block, /*!< in: buffer block */
2420 ulint heir_heap_no, /*!< in: heap_no of
2421 record which inherits */
2422 ulint heap_no) /*!< in: heap_no of record
2423 from which inherited;
2424 does NOT reset the locks
2425 on this record */
2426 {
2427 lock_t *lock;
2428
2429 locksys::Shard_latch_guard guard{block->get_page_id()};
2430
2431 for (lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
2432 lock != nullptr; lock = lock_rec_get_next(heap_no, lock)) {
2433 /* Skip inheriting lock if set */
2434 if (lock->trx->skip_lock_inheritance) {
2435 continue;
2436 }
2437
2438 if (!lock_rec_get_insert_intention(lock) &&
2439 (heap_no == PAGE_HEAP_NO_SUPREMUM || !lock_rec_get_rec_not_gap(lock))) {
2440 lock_rec_add_to_queue(LOCK_REC | LOCK_GAP | lock_get_mode(lock), block,
2441 heir_heap_no, lock->index, lock->trx);
2442 }
2443 }
2444 }
2445
2446 /** Moves the locks of a record to another record and resets the lock bits of
2447 the donating record. */
lock_rec_move_low(hash_table_t * lock_hash,const buf_block_t * receiver,const buf_block_t * donator,ulint receiver_heap_no,ulint donator_heap_no)2448 static void lock_rec_move_low(
2449 hash_table_t *lock_hash, /*!< in: hash table to use */
2450 const buf_block_t *receiver, /*!< in: buffer block containing
2451 the receiving record */
2452 const buf_block_t *donator, /*!< in: buffer block containing
2453 the donating record */
2454 ulint receiver_heap_no, /*!< in: heap_no of the record
2455 which gets the locks; there
2456 must be no lock requests
2457 on it! */
2458 ulint donator_heap_no) /*!< in: heap_no of the record
2459 which gives the locks */
2460 {
2461 lock_t *lock;
2462
2463 ut_ad(locksys::owns_page_shard(receiver->get_page_id()));
2464 ut_ad(locksys::owns_page_shard(donator->get_page_id()));
2465
2466 /* If the lock is predicate lock, it resides on INFIMUM record */
2467 ut_ad(lock_rec_get_first(lock_hash, receiver, receiver_heap_no) == nullptr ||
2468 lock_hash == lock_sys->prdt_hash ||
2469 lock_hash == lock_sys->prdt_page_hash);
2470
2471 for (lock = lock_rec_get_first(lock_hash, donator, donator_heap_no);
2472 lock != nullptr; lock = lock_rec_get_next(donator_heap_no, lock)) {
2473 const ulint type_mode = lock->type_mode;
2474
2475 lock_rec_reset_nth_bit(lock, donator_heap_no);
2476
2477 if (type_mode & LOCK_WAIT) {
2478 lock_reset_lock_and_trx_wait(lock);
2479 }
2480
2481 /* Note that we FIRST reset the bit, and then set the lock:
2482 the function works also if donator == receiver */
2483
2484 lock_rec_add_to_queue(type_mode, receiver, receiver_heap_no, lock->index,
2485 lock->trx);
2486 }
2487
2488 ut_ad(lock_rec_get_first(lock_sys->rec_hash, donator, donator_heap_no) ==
2489 nullptr);
2490 }
2491
2492 /** Move all the granted locks to the front of the given lock list.
2493 All the waiting locks will be at the end of the list.
2494 @param[in,out] lock_list the given lock list. */
lock_move_granted_locks_to_front(UT_LIST_BASE_NODE_T (lock_t)& lock_list)2495 static void lock_move_granted_locks_to_front(UT_LIST_BASE_NODE_T(lock_t) &
2496 lock_list) {
2497 lock_t *lock;
2498
2499 bool seen_waiting_lock = false;
2500
2501 for (lock = UT_LIST_GET_FIRST(lock_list); lock != nullptr;
2502 lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
2503 if (!seen_waiting_lock) {
2504 if (lock->is_waiting()) {
2505 seen_waiting_lock = true;
2506 }
2507 continue;
2508 }
2509
2510 ut_ad(seen_waiting_lock);
2511
2512 if (!lock->is_waiting()) {
2513 lock_t *prev = UT_LIST_GET_PREV(trx_locks, lock);
2514 ut_a(prev);
2515 UT_LIST_MOVE_TO_FRONT(lock_list, lock);
2516 lock = prev;
2517 }
2518 }
2519 }
2520
2521 /** Moves the locks of a record to another record and resets the lock bits of
2522 the donating record. */
2523 UNIV_INLINE
lock_rec_move(const buf_block_t * receiver,const buf_block_t * donator,ulint receiver_heap_no,ulint donator_heap_no)2524 void lock_rec_move(const buf_block_t *receiver, /*!< in: buffer block containing
2525 the receiving record */
2526 const buf_block_t *donator, /*!< in: buffer block containing
2527 the donating record */
2528 ulint receiver_heap_no, /*!< in: heap_no of the record
2529 which gets the locks; there
2530 must be no lock requests
2531 on it! */
2532 ulint donator_heap_no) /*!< in: heap_no of the record
2533 which gives the locks */
2534 {
2535 lock_rec_move_low(lock_sys->rec_hash, receiver, donator, receiver_heap_no,
2536 donator_heap_no);
2537 }
2538
2539 /** Updates the lock table when we have reorganized a page. NOTE: we copy
2540 also the locks set on the infimum of the page; the infimum may carry
2541 locks if an update of a record is occurring on the page, and its locks
2542 were temporarily stored on the infimum. */
lock_move_reorganize_page(const buf_block_t * block,const buf_block_t * oblock)2543 void lock_move_reorganize_page(
2544 const buf_block_t *block, /*!< in: old index page, now
2545 reorganized */
2546 const buf_block_t *oblock) /*!< in: copy of the old, not
2547 reorganized page */
2548 {
2549 lock_t *lock;
2550 UT_LIST_BASE_NODE_T(lock_t) old_locks;
2551 mem_heap_t *heap = nullptr;
2552 ulint comp;
2553 {
2554 /* We only process locks on block, not oblock */
2555 locksys::Shard_latch_guard guard{block->get_page_id()};
2556
2557 /* FIXME: This needs to deal with predicate lock too */
2558 lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block);
2559
2560 if (lock == nullptr) {
2561 return;
2562 }
2563
2564 heap = mem_heap_create(256);
2565
2566 /* Copy first all the locks on the page to heap and reset the
2567 bitmaps in the original locks; chain the copies of the locks
2568 using the trx_locks field in them. */
2569
2570 UT_LIST_INIT(old_locks, &lock_t::trx_locks);
2571
2572 do {
2573 /* Make a copy of the lock */
2574 lock_t *old_lock = lock_rec_copy(lock, heap);
2575
2576 UT_LIST_ADD_LAST(old_locks, old_lock);
2577
2578 /* Reset bitmap of lock */
2579 lock_rec_bitmap_reset(lock);
2580
2581 if (lock_get_wait(lock)) {
2582 lock_reset_lock_and_trx_wait(lock);
2583 }
2584
2585 lock = lock_rec_get_next_on_page(lock);
2586 } while (lock != nullptr);
2587
2588 comp = page_is_comp(block->frame);
2589 ut_ad(comp == page_is_comp(oblock->frame));
2590
2591 lock_move_granted_locks_to_front(old_locks);
2592
2593 DBUG_EXECUTE_IF("do_lock_reverse_page_reorganize",
2594 UT_LIST_REVERSE(old_locks););
2595
2596 for (lock = UT_LIST_GET_FIRST(old_locks); lock != nullptr;
2597 lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
2598 /* NOTE: we copy also the locks set on the infimum and
2599 supremum of the page; the infimum may carry locks if an
2600 update of a record is occurring on the page, and its locks
2601 were temporarily stored on the infimum */
2602 const rec_t *rec1 = page_get_infimum_rec(buf_block_get_frame(block));
2603 const rec_t *rec2 = page_get_infimum_rec(buf_block_get_frame(oblock));
2604
2605 /* Set locks according to old locks */
2606 for (;;) {
2607 ulint old_heap_no;
2608 ulint new_heap_no;
2609
2610 if (comp) {
2611 old_heap_no = rec_get_heap_no_new(rec2);
2612 new_heap_no = rec_get_heap_no_new(rec1);
2613
2614 rec1 = page_rec_get_next_low(rec1, true);
2615 rec2 = page_rec_get_next_low(rec2, true);
2616 } else {
2617 old_heap_no = rec_get_heap_no_old(rec2);
2618 new_heap_no = rec_get_heap_no_old(rec1);
2619 ut_ad(!memcmp(rec1, rec2, rec_get_data_size_old(rec2)));
2620
2621 rec1 = page_rec_get_next_low(rec1, false);
2622 rec2 = page_rec_get_next_low(rec2, false);
2623 }
2624
2625 /* Clear the bit in old_lock. */
2626 if (old_heap_no < lock->rec_lock.n_bits &&
2627 lock_rec_reset_nth_bit(lock, old_heap_no)) {
2628 /* NOTE that the old lock bitmap could be too
2629 small for the new heap number! */
2630
2631 lock_rec_add_to_queue(lock->type_mode, block, new_heap_no,
2632 lock->index, lock->trx);
2633 }
2634
2635 if (new_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2636 ut_ad(old_heap_no == PAGE_HEAP_NO_SUPREMUM);
2637 break;
2638 }
2639 }
2640
2641 ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED);
2642 }
2643 } /* Shard_latch_guard */
2644
2645 mem_heap_free(heap);
2646
2647 #ifdef UNIV_DEBUG_LOCK_VALIDATE
2648 ut_ad(lock_rec_validate_page(block));
2649 #endif /* UNIV_DEBUG_LOCK_VALIDATE */
2650 }
2651
2652 /** Moves the explicit locks on user records to another page if a record
2653 list end is moved to another page. */
lock_move_rec_list_end(const buf_block_t * new_block,const buf_block_t * block,const rec_t * rec)2654 void lock_move_rec_list_end(
2655 const buf_block_t *new_block, /*!< in: index page to move to */
2656 const buf_block_t *block, /*!< in: index page */
2657 const rec_t *rec) /*!< in: record on page: this
2658 is the first record moved */
2659 {
2660 lock_t *lock;
2661 const ulint comp = page_rec_is_comp(rec);
2662
2663 ut_ad(buf_block_get_frame(block) == page_align(rec));
2664 ut_ad(comp == page_is_comp(buf_block_get_frame(new_block)));
2665
2666 {
2667 locksys::Shard_latches_guard guard{*block, *new_block};
2668
2669 for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
2670 lock = lock_rec_get_next_on_page(lock)) {
2671 const rec_t *rec1 = rec;
2672 const rec_t *rec2;
2673 const ulint type_mode = lock->type_mode;
2674
2675 if (comp) {
2676 if (page_offset(rec1) == PAGE_NEW_INFIMUM) {
2677 rec1 = page_rec_get_next_low(rec1, true);
2678 }
2679
2680 rec2 = page_rec_get_next_low(
2681 buf_block_get_frame(new_block) + PAGE_NEW_INFIMUM, true);
2682 } else {
2683 if (page_offset(rec1) == PAGE_OLD_INFIMUM) {
2684 rec1 = page_rec_get_next_low(rec1, false);
2685 }
2686
2687 rec2 = page_rec_get_next_low(
2688 buf_block_get_frame(new_block) + PAGE_OLD_INFIMUM, false);
2689 }
2690
2691 /* Copy lock requests on user records to new page and
2692 reset the lock bits on the old */
2693
2694 for (;;) {
2695 ulint rec1_heap_no;
2696 ulint rec2_heap_no;
2697
2698 if (comp) {
2699 rec1_heap_no = rec_get_heap_no_new(rec1);
2700
2701 if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2702 break;
2703 }
2704
2705 rec2_heap_no = rec_get_heap_no_new(rec2);
2706 rec1 = page_rec_get_next_low(rec1, true);
2707 rec2 = page_rec_get_next_low(rec2, true);
2708 } else {
2709 rec1_heap_no = rec_get_heap_no_old(rec1);
2710
2711 if (rec1_heap_no == PAGE_HEAP_NO_SUPREMUM) {
2712 break;
2713 }
2714
2715 rec2_heap_no = rec_get_heap_no_old(rec2);
2716
2717 ut_ad(!memcmp(rec1, rec2, rec_get_data_size_old(rec2)));
2718
2719 rec1 = page_rec_get_next_low(rec1, false);
2720 rec2 = page_rec_get_next_low(rec2, false);
2721 }
2722
2723 if (rec1_heap_no < lock->rec_lock.n_bits &&
2724 lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
2725 if (type_mode & LOCK_WAIT) {
2726 lock_reset_lock_and_trx_wait(lock);
2727 }
2728
2729 lock_rec_add_to_queue(type_mode, new_block, rec2_heap_no, lock->index,
2730 lock->trx);
2731 }
2732 }
2733 }
2734 } /* Shard_latches_guard */
2735
2736 #ifdef UNIV_DEBUG_LOCK_VALIDATE
2737 ut_ad(lock_rec_validate_page(block));
2738 ut_ad(lock_rec_validate_page(new_block));
2739 #endif /* UNIV_DEBUG_LOCK_VALIDATE */
2740 }
2741
2742 /** Moves the explicit locks on user records to another page if a record
2743 list start is moved to another page. */
lock_move_rec_list_start(const buf_block_t * new_block,const buf_block_t * block,const rec_t * rec,const rec_t * old_end)2744 void lock_move_rec_list_start(const buf_block_t *new_block, /*!< in: index page
2745 to move to */
2746 const buf_block_t *block, /*!< in: index page */
2747 const rec_t *rec, /*!< in: record on page:
2748 this is the first
2749 record NOT copied */
2750 const rec_t *old_end) /*!< in: old
2751 previous-to-last
2752 record on new_page
2753 before the records
2754 were copied */
2755 {
2756 lock_t *lock;
2757 const ulint comp = page_rec_is_comp(rec);
2758
2759 ut_ad(block->frame == page_align(rec));
2760 ut_ad(new_block->frame == page_align(old_end));
2761 ut_ad(comp == page_rec_is_comp(old_end));
2762
2763 {
2764 locksys::Shard_latches_guard guard{*block, *new_block};
2765
2766 for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
2767 lock = lock_rec_get_next_on_page(lock)) {
2768 const rec_t *rec1;
2769 const rec_t *rec2;
2770 const ulint type_mode = lock->type_mode;
2771
2772 if (comp) {
2773 rec1 = page_rec_get_next_low(
2774 buf_block_get_frame(block) + PAGE_NEW_INFIMUM, true);
2775 rec2 = page_rec_get_next_low(old_end, true);
2776 } else {
2777 rec1 = page_rec_get_next_low(
2778 buf_block_get_frame(block) + PAGE_OLD_INFIMUM, false);
2779 rec2 = page_rec_get_next_low(old_end, false);
2780 }
2781
2782 /* Copy lock requests on user records to new page and
2783 reset the lock bits on the old */
2784
2785 while (rec1 != rec) {
2786 ulint rec1_heap_no;
2787 ulint rec2_heap_no;
2788
2789 if (comp) {
2790 rec1_heap_no = rec_get_heap_no_new(rec1);
2791 rec2_heap_no = rec_get_heap_no_new(rec2);
2792
2793 rec1 = page_rec_get_next_low(rec1, true);
2794 rec2 = page_rec_get_next_low(rec2, true);
2795 } else {
2796 rec1_heap_no = rec_get_heap_no_old(rec1);
2797 rec2_heap_no = rec_get_heap_no_old(rec2);
2798
2799 ut_ad(!memcmp(rec1, rec2, rec_get_data_size_old(rec2)));
2800
2801 rec1 = page_rec_get_next_low(rec1, false);
2802 rec2 = page_rec_get_next_low(rec2, false);
2803 }
2804
2805 if (rec1_heap_no < lock->rec_lock.n_bits &&
2806 lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
2807 if (type_mode & LOCK_WAIT) {
2808 lock_reset_lock_and_trx_wait(lock);
2809 }
2810
2811 lock_rec_add_to_queue(type_mode, new_block, rec2_heap_no, lock->index,
2812 lock->trx);
2813 }
2814 }
2815
2816 #ifdef UNIV_DEBUG
2817 if (page_rec_is_supremum(rec)) {
2818 ulint i;
2819
2820 for (i = PAGE_HEAP_NO_USER_LOW; i < lock_rec_get_n_bits(lock); i++) {
2821 ut_a(!lock_rec_get_nth_bit(lock, i));
2822 }
2823 }
2824 #endif /* UNIV_DEBUG */
2825 }
2826 } /* Shard_latches_guard */
2827
2828 #ifdef UNIV_DEBUG_LOCK_VALIDATE
2829 ut_ad(lock_rec_validate_page(block));
2830 #endif /* UNIV_DEBUG_LOCK_VALIDATE */
2831 }
2832
2833 /** Moves the explicit locks on user records to another page if a record
2834 list start is moved to another page. */
lock_rtr_move_rec_list(const buf_block_t * new_block,const buf_block_t * block,rtr_rec_move_t * rec_move,ulint num_move)2835 void lock_rtr_move_rec_list(const buf_block_t *new_block, /*!< in: index page to
2836 move to */
2837 const buf_block_t *block, /*!< in: index page */
2838 rtr_rec_move_t *rec_move, /*!< in: recording records
2839 moved */
2840 ulint num_move) /*!< in: num of rec to move */
2841 {
2842 lock_t *lock;
2843 ulint comp;
2844
2845 if (!num_move) {
2846 return;
2847 }
2848
2849 comp = page_rec_is_comp(rec_move[0].old_rec);
2850
2851 ut_ad(block->frame == page_align(rec_move[0].old_rec));
2852 ut_ad(new_block->frame == page_align(rec_move[0].new_rec));
2853 ut_ad(comp == page_rec_is_comp(rec_move[0].new_rec));
2854
2855 {
2856 locksys::Shard_latches_guard guard{*new_block, *block};
2857
2858 for (lock = lock_rec_get_first_on_page(lock_sys->rec_hash, block); lock;
2859 lock = lock_rec_get_next_on_page(lock)) {
2860 ulint moved = 0;
2861 const rec_t *rec1;
2862 const rec_t *rec2;
2863 const ulint type_mode = lock->type_mode;
2864
2865 /* Copy lock requests on user records to new page and
2866 reset the lock bits on the old */
2867
2868 while (moved < num_move) {
2869 ulint rec1_heap_no;
2870 ulint rec2_heap_no;
2871
2872 rec1 = rec_move[moved].old_rec;
2873 rec2 = rec_move[moved].new_rec;
2874
2875 if (comp) {
2876 rec1_heap_no = rec_get_heap_no_new(rec1);
2877 rec2_heap_no = rec_get_heap_no_new(rec2);
2878
2879 } else {
2880 rec1_heap_no = rec_get_heap_no_old(rec1);
2881 rec2_heap_no = rec_get_heap_no_old(rec2);
2882
2883 ut_ad(!memcmp(rec1, rec2, rec_get_data_size_old(rec2)));
2884 }
2885
2886 if (rec1_heap_no < lock->rec_lock.n_bits &&
2887 lock_rec_reset_nth_bit(lock, rec1_heap_no)) {
2888 if (type_mode & LOCK_WAIT) {
2889 lock_reset_lock_and_trx_wait(lock);
2890 }
2891
2892 lock_rec_add_to_queue(type_mode, new_block, rec2_heap_no, lock->index,
2893 lock->trx);
2894
2895 rec_move[moved].moved = true;
2896 }
2897
2898 moved++;
2899 }
2900 }
2901 } /* Shard_latches_guard */
2902
2903 #ifdef UNIV_DEBUG_LOCK_VALIDATE
2904 ut_ad(lock_rec_validate_page(block));
2905 #endif
2906 }
2907 /** Updates the lock table when a page is split to the right. */
lock_update_split_right(const buf_block_t * right_block,const buf_block_t * left_block)2908 void lock_update_split_right(
2909 const buf_block_t *right_block, /*!< in: right page */
2910 const buf_block_t *left_block) /*!< in: left page */
2911 {
2912 ulint heap_no = lock_get_min_heap_no(right_block);
2913
2914 locksys::Shard_latches_guard guard{*left_block, *right_block};
2915
2916 /* Move the locks on the supremum of the left page to the supremum
2917 of the right page */
2918
2919 lock_rec_move(right_block, left_block, PAGE_HEAP_NO_SUPREMUM,
2920 PAGE_HEAP_NO_SUPREMUM);
2921
2922 /* Inherit the locks to the supremum of left page from the successor
2923 of the infimum on right page */
2924
2925 lock_rec_inherit_to_gap(left_block, right_block, PAGE_HEAP_NO_SUPREMUM,
2926 heap_no);
2927 }
2928
2929 /** Updates the lock table when a page is merged to the right. */
lock_update_merge_right(const buf_block_t * right_block,const rec_t * orig_succ,const buf_block_t * left_block)2930 void lock_update_merge_right(
2931 const buf_block_t *right_block, /*!< in: right page
2932 to which merged */
2933 const rec_t *orig_succ, /*!< in: original
2934 successor of infimum
2935 on the right page
2936 before merge */
2937 const buf_block_t *left_block) /*!< in: merged
2938 index page which
2939 will be discarded */
2940 {
2941 locksys::Shard_latches_guard guard{*left_block, *right_block};
2942
2943 /* Inherit the locks from the supremum of the left page to the original
2944 successor of infimum on the right page, to which the left page was merged. */
2945
2946 lock_rec_inherit_to_gap(right_block, left_block,
2947 page_rec_get_heap_no(orig_succ),
2948 PAGE_HEAP_NO_SUPREMUM);
2949
2950 /* Reset the locks on the supremum of the left page, releasing waiting
2951 transactions. */
2952
2953 lock_rec_reset_and_release_wait_low(lock_sys->rec_hash, left_block,
2954 PAGE_HEAP_NO_SUPREMUM);
2955
2956 /* There should exist no page lock on the left page, otherwise, it will be
2957 blocked from merge. */
2958 ut_ad(lock_rec_get_first_on_page_addr(lock_sys->prdt_page_hash,
2959 left_block->get_page_id()) == nullptr);
2960
2961 lock_rec_free_all_from_discard_page(left_block);
2962 }
2963
2964 /** Updates the lock table when the root page is copied to another in
2965 btr_root_raise_and_insert. Note that we leave lock structs on the
2966 root page, even though they do not make sense on other than leaf
2967 pages: the reason is that in a pessimistic update the infimum record
2968 of the root page will act as a dummy carrier of the locks of the record
2969 to be updated. */
lock_update_root_raise(const buf_block_t * block,const buf_block_t * root)2970 void lock_update_root_raise(
2971 const buf_block_t *block, /*!< in: index page to which copied */
2972 const buf_block_t *root) /*!< in: root page */
2973 {
2974 locksys::Shard_latches_guard guard{*block, *root};
2975
2976 /* Move the locks on the supremum of the root to the supremum
2977 of block */
2978
2979 lock_rec_move(block, root, PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
2980 }
2981
2982 /** Updates the lock table when a page is copied to another and the original
2983 page is removed from the chain of leaf pages, except if page is the root! */
lock_update_copy_and_discard(const buf_block_t * new_block,const buf_block_t * block)2984 void lock_update_copy_and_discard(
2985 const buf_block_t *new_block, /*!< in: index page to
2986 which copied */
2987 const buf_block_t *block) /*!< in: index page;
2988 NOT the root! */
2989 {
2990 locksys::Shard_latches_guard guard{*new_block, *block};
2991
2992 /* Move the locks on the supremum of the old page to the supremum
2993 of new_page */
2994
2995 lock_rec_move(new_block, block, PAGE_HEAP_NO_SUPREMUM, PAGE_HEAP_NO_SUPREMUM);
2996 lock_rec_free_all_from_discard_page(block);
2997 }
2998
2999 /** Updates the lock table when a page is split to the left. */
lock_update_split_left(const buf_block_t * right_block,const buf_block_t * left_block)3000 void lock_update_split_left(
3001 const buf_block_t *right_block, /*!< in: right page */
3002 const buf_block_t *left_block) /*!< in: left page */
3003 {
3004 ulint heap_no = lock_get_min_heap_no(right_block);
3005
3006 locksys::Shard_latches_guard guard{*left_block, *right_block};
3007
3008 /* Inherit the locks to the supremum of the left page from the
3009 successor of the infimum on the right page */
3010
3011 lock_rec_inherit_to_gap(left_block, right_block, PAGE_HEAP_NO_SUPREMUM,
3012 heap_no);
3013 }
3014
3015 /** Updates the lock table when a page is merged to the left. */
lock_update_merge_left(const buf_block_t * left_block,const rec_t * orig_pred,const buf_block_t * right_block)3016 void lock_update_merge_left(
3017 const buf_block_t *left_block, /*!< in: left page to
3018 which merged */
3019 const rec_t *orig_pred, /*!< in: original predecessor
3020 of supremum on the left page
3021 before merge */
3022 const buf_block_t *right_block) /*!< in: merged index page
3023 which will be discarded */
3024 {
3025 const rec_t *left_next_rec;
3026
3027 ut_ad(left_block->frame == page_align(orig_pred));
3028
3029 locksys::Shard_latches_guard guard{*left_block, *right_block};
3030
3031 left_next_rec = page_rec_get_next_const(orig_pred);
3032
3033 if (!page_rec_is_supremum(left_next_rec)) {
3034 /* Inherit the locks on the supremum of the left page to the
3035 first record which was moved from the right page */
3036
3037 lock_rec_inherit_to_gap(left_block, left_block,
3038 page_rec_get_heap_no(left_next_rec),
3039 PAGE_HEAP_NO_SUPREMUM);
3040
3041 /* Reset the locks on the supremum of the left page,
3042 releasing waiting transactions */
3043
3044 lock_rec_reset_and_release_wait_low(lock_sys->rec_hash, left_block,
3045 PAGE_HEAP_NO_SUPREMUM);
3046 }
3047
3048 /* Move the locks from the supremum of right page to the supremum
3049 of the left page */
3050
3051 lock_rec_move(left_block, right_block, PAGE_HEAP_NO_SUPREMUM,
3052 PAGE_HEAP_NO_SUPREMUM);
3053
3054 /* there should exist no page lock on the right page,
3055 otherwise, it will be blocked from merge */
3056 ut_ad(lock_rec_get_first_on_page_addr(lock_sys->prdt_page_hash,
3057 right_block->get_page_id()) == nullptr);
3058
3059 lock_rec_free_all_from_discard_page(right_block);
3060 }
3061
3062 /** Resets the original locks on heir and replaces them with gap type locks
3063 inherited from rec. */
lock_rec_reset_and_inherit_gap_locks(const buf_block_t * heir_block,const buf_block_t * block,ulint heir_heap_no,ulint heap_no)3064 void lock_rec_reset_and_inherit_gap_locks(
3065 const buf_block_t *heir_block, /*!< in: block containing the
3066 record which inherits */
3067 const buf_block_t *block, /*!< in: block containing the
3068 record from which inherited;
3069 does NOT reset the locks on
3070 this record */
3071 ulint heir_heap_no, /*!< in: heap_no of the
3072 inheriting record */
3073 ulint heap_no) /*!< in: heap_no of the
3074 donating record */
3075 {
3076 locksys::Shard_latches_guard guard{*heir_block, *block};
3077
3078 lock_rec_reset_and_release_wait(heir_block, heir_heap_no);
3079
3080 lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
3081 }
3082
3083 /** Updates the lock table when a page is discarded. */
lock_update_discard(const buf_block_t * heir_block,ulint heir_heap_no,const buf_block_t * block)3084 void lock_update_discard(
3085 const buf_block_t *heir_block, /*!< in: index page
3086 which will inherit the locks */
3087 ulint heir_heap_no, /*!< in: heap_no of the record
3088 which will inherit the locks */
3089 const buf_block_t *block) /*!< in: index page
3090 which will be discarded */
3091 {
3092 const rec_t *rec;
3093 ulint heap_no;
3094 const page_t *page = block->frame;
3095
3096 locksys::Shard_latches_guard guard{*heir_block, *block};
3097
3098 if (!lock_rec_get_first_on_page(lock_sys->rec_hash, block) &&
3099 (!lock_rec_get_first_on_page(lock_sys->prdt_page_hash, block)) &&
3100 (!lock_rec_get_first_on_page(lock_sys->prdt_hash, block))) {
3101 /* No locks exist on page, nothing to do */
3102
3103 return;
3104 }
3105
3106 /* Inherit all the locks on the page to the record and reset all
3107 the locks on the page */
3108
3109 if (page_is_comp(page)) {
3110 rec = page + PAGE_NEW_INFIMUM;
3111
3112 do {
3113 heap_no = rec_get_heap_no_new(rec);
3114
3115 lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
3116
3117 lock_rec_reset_and_release_wait(block, heap_no);
3118
3119 rec = page + rec_get_next_offs(rec, true);
3120 } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3121 } else {
3122 rec = page + PAGE_OLD_INFIMUM;
3123
3124 do {
3125 heap_no = rec_get_heap_no_old(rec);
3126
3127 lock_rec_inherit_to_gap(heir_block, block, heir_heap_no, heap_no);
3128
3129 lock_rec_reset_and_release_wait(block, heap_no);
3130
3131 rec = page + rec_get_next_offs(rec, false);
3132 } while (heap_no != PAGE_HEAP_NO_SUPREMUM);
3133 }
3134
3135 lock_rec_free_all_from_discard_page(block);
3136 }
3137
3138 /** Updates the lock table when a new user record is inserted. */
lock_update_insert(const buf_block_t * block,const rec_t * rec)3139 void lock_update_insert(
3140 const buf_block_t *block, /*!< in: buffer block containing rec */
3141 const rec_t *rec) /*!< in: the inserted record */
3142 {
3143 ulint receiver_heap_no;
3144 ulint donator_heap_no;
3145
3146 ut_ad(block->frame == page_align(rec));
3147
3148 /* Inherit the gap-locking locks for rec, in gap mode, from the next
3149 record */
3150
3151 if (page_rec_is_comp(rec)) {
3152 receiver_heap_no = rec_get_heap_no_new(rec);
3153 donator_heap_no = rec_get_heap_no_new(page_rec_get_next_low(rec, true));
3154 } else {
3155 receiver_heap_no = rec_get_heap_no_old(rec);
3156 donator_heap_no = rec_get_heap_no_old(page_rec_get_next_low(rec, false));
3157 }
3158
3159 lock_rec_inherit_to_gap_if_gap_lock(block, receiver_heap_no, donator_heap_no);
3160 }
3161
3162 /** Updates the lock table when a record is removed. */
lock_update_delete(const buf_block_t * block,const rec_t * rec)3163 void lock_update_delete(
3164 const buf_block_t *block, /*!< in: buffer block containing rec */
3165 const rec_t *rec) /*!< in: the record to be removed */
3166 {
3167 const page_t *page = block->frame;
3168 ulint heap_no;
3169 ulint next_heap_no;
3170
3171 ut_ad(page == page_align(rec));
3172
3173 if (page_is_comp(page)) {
3174 heap_no = rec_get_heap_no_new(rec);
3175 next_heap_no = rec_get_heap_no_new(page + rec_get_next_offs(rec, true));
3176 } else {
3177 heap_no = rec_get_heap_no_old(rec);
3178 next_heap_no = rec_get_heap_no_old(page + rec_get_next_offs(rec, false));
3179 }
3180
3181 locksys::Shard_latch_guard guard{block->get_page_id()};
3182
3183 /* Let the next record inherit the locks from rec, in gap mode */
3184
3185 lock_rec_inherit_to_gap(block, block, next_heap_no, heap_no);
3186
3187 /* Reset the lock bits on rec and release waiting transactions */
3188
3189 lock_rec_reset_and_release_wait(block, heap_no);
3190 }
3191
3192 /** Stores on the page infimum record the explicit locks of another record.
3193 This function is used to store the lock state of a record when it is
3194 updated and the size of the record changes in the update. The record
3195 is moved in such an update, perhaps to another page. The infimum record
3196 acts as a dummy carrier record, taking care of lock releases while the
3197 actual record is being moved. */
lock_rec_store_on_page_infimum(const buf_block_t * block,const rec_t * rec)3198 void lock_rec_store_on_page_infimum(
3199 const buf_block_t *block, /*!< in: buffer block containing rec */
3200 const rec_t *rec) /*!< in: record whose lock state
3201 is stored on the infimum
3202 record of the same page; lock
3203 bits are reset on the
3204 record */
3205 {
3206 ulint heap_no = page_rec_get_heap_no(rec);
3207
3208 ut_ad(block->frame == page_align(rec));
3209
3210 locksys::Shard_latch_guard guard{block->get_page_id()};
3211
3212 lock_rec_move(block, block, PAGE_HEAP_NO_INFIMUM, heap_no);
3213 }
3214
3215 /** Restores the state of explicit lock requests on a single record, where the
3216 state was stored on the infimum of the page. */
lock_rec_restore_from_page_infimum(const buf_block_t * block,const rec_t * rec,const buf_block_t * donator)3217 void lock_rec_restore_from_page_infimum(
3218 const buf_block_t *block, /*!< in: buffer block containing rec */
3219 const rec_t *rec, /*!< in: record whose lock state
3220 is restored */
3221 const buf_block_t *donator) /*!< in: page (rec is not
3222 necessarily on this page)
3223 whose infimum stored the lock
3224 state; lock bits are reset on
3225 the infimum */
3226 {
3227 DEBUG_SYNC_C("lock_rec_restore_from_page_infimum_will_latch");
3228 ulint heap_no = page_rec_get_heap_no(rec);
3229
3230 locksys::Shard_latches_guard guard{*block, *donator};
3231
3232 lock_rec_move(block, donator, heap_no, PAGE_HEAP_NO_INFIMUM);
3233 }
3234
3235 /*========================= TABLE LOCKS ==============================*/
3236
3237 /** Functor for accessing the embedded node within a table lock. */
3238 struct TableLockGetNode {
operator ()TableLockGetNode3239 ut_list_node<lock_t> &operator()(lock_t &elem) {
3240 return (elem.tab_lock.locks);
3241 }
3242 };
3243
3244 /** Creates a table lock object and adds it as the last in the lock queue
3245 of the table. Does NOT check for deadlocks or lock compatibility.
3246 @return own: new lock object */
3247 UNIV_INLINE
lock_table_create(dict_table_t * table,ulint type_mode,trx_t * trx)3248 lock_t *lock_table_create(dict_table_t *table, /*!< in/out: database table
3249 in dictionary cache */
3250 ulint type_mode, /*!< in: lock mode possibly ORed with
3251 LOCK_WAIT */
3252 trx_t *trx) /*!< in: trx */
3253 {
3254 lock_t *lock;
3255
3256 ut_ad(table && trx);
3257 ut_ad(locksys::owns_table_shard(*table));
3258 ut_ad(trx_mutex_own(trx));
3259 ut_ad(trx_can_be_handled_by_current_thread(trx));
3260
3261 check_trx_state(trx);
3262 ++table->count_by_mode[type_mode & LOCK_MODE_MASK];
3263 /* For AUTOINC locking we reuse the lock instance only if
3264 there is no wait involved else we allocate the waiting lock
3265 from the transaction lock heap. */
3266 if (type_mode == LOCK_AUTO_INC) {
3267 lock = table->autoinc_lock;
3268 ut_ad(table->autoinc_trx == nullptr);
3269 table->autoinc_trx = trx;
3270
3271 ib_vector_push(trx->lock.autoinc_locks, &lock);
3272
3273 } else if (trx->lock.table_cached < trx->lock.table_pool.size()) {
3274 lock = trx->lock.table_pool[trx->lock.table_cached++];
3275 } else {
3276 lock = static_cast<lock_t *>(
3277 mem_heap_alloc(trx->lock.lock_heap, sizeof(*lock)));
3278 }
3279 lock->type_mode = uint32_t(type_mode | LOCK_TABLE);
3280 lock->trx = trx;
3281 ut_d(lock->m_seq = lock_sys->m_seq.fetch_add(1));
3282
3283 lock->tab_lock.table = table;
3284
3285 ut_ad(table->n_ref_count > 0 || !table->can_be_evicted);
3286
3287 #ifdef HAVE_PSI_THREAD_INTERFACE
3288 #ifdef HAVE_PSI_DATA_LOCK_INTERFACE
3289 /* The performance schema THREAD_ID and EVENT_ID
3290 are used only when DATA_LOCKS are exposed. */
3291 PSI_THREAD_CALL(get_current_thread_event_id)
3292 (&lock->m_psi_internal_thread_id, &lock->m_psi_event_id);
3293 #endif /* HAVE_PSI_DATA_LOCK_INTERFACE */
3294 #endif /* HAVE_PSI_THREAD_INTERFACE */
3295
3296 locksys::add_to_trx_locks(lock);
3297
3298 ut_list_append(table->locks, lock, TableLockGetNode());
3299
3300 if (type_mode & LOCK_WAIT) {
3301 lock_set_lock_and_trx_wait(lock);
3302 }
3303
3304 lock->trx->lock.table_locks.push_back(lock);
3305
3306 MONITOR_INC(MONITOR_TABLELOCK_CREATED);
3307 MONITOR_INC(MONITOR_NUM_TABLELOCK);
3308
3309 return (lock);
3310 }
3311
3312 /** Pops autoinc lock requests from the transaction's autoinc_locks. We
3313 handle the case where there are gaps in the array and they need to
3314 be popped off the stack. */
3315 UNIV_INLINE
lock_table_pop_autoinc_locks(trx_t * trx)3316 void lock_table_pop_autoinc_locks(
3317 trx_t *trx) /*!< in/out: transaction that owns the AUTOINC locks */
3318 {
3319 /* We will access and modify trx->lock.autoinc_locks so we need trx->mutex */
3320 ut_ad(trx_mutex_own(trx));
3321 ut_ad(!ib_vector_is_empty(trx->lock.autoinc_locks));
3322
3323 /* Skip any gaps, gaps are NULL lock entries in the
3324 trx->autoinc_locks vector. */
3325
3326 do {
3327 ib_vector_pop(trx->lock.autoinc_locks);
3328
3329 if (ib_vector_is_empty(trx->lock.autoinc_locks)) {
3330 return;
3331 }
3332
3333 } while (*(lock_t **)ib_vector_get_last(trx->lock.autoinc_locks) == nullptr);
3334 }
3335
3336 /** Removes an autoinc lock request from the transaction's autoinc_locks. */
3337 UNIV_INLINE
lock_table_remove_autoinc_lock(lock_t * lock,trx_t * trx)3338 void lock_table_remove_autoinc_lock(
3339 lock_t *lock, /*!< in: table lock */
3340 trx_t *trx) /*!< in/out: transaction that owns the lock */
3341 {
3342 /* We will access and modify trx->lock.autoinc_locks so we need trx->mutex */
3343 ut_ad(trx_mutex_own(trx));
3344 lock_t *autoinc_lock;
3345 lint i = ib_vector_size(trx->lock.autoinc_locks) - 1;
3346
3347 ut_ad(lock_get_mode(lock) == LOCK_AUTO_INC);
3348 ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
3349 ut_ad(locksys::owns_table_shard(*lock->tab_lock.table));
3350 ut_ad(!ib_vector_is_empty(trx->lock.autoinc_locks));
3351
3352 /* With stored functions and procedures the user may drop
3353 a table within the same "statement". This special case has
3354 to be handled by deleting only those AUTOINC locks that were
3355 held by the table being dropped. */
3356
3357 autoinc_lock =
3358 *static_cast<lock_t **>(ib_vector_get(trx->lock.autoinc_locks, i));
3359
3360 /* This is the default fast case. */
3361
3362 if (autoinc_lock == lock) {
3363 lock_table_pop_autoinc_locks(trx);
3364 } else {
3365 /* The last element should never be NULL */
3366 ut_a(autoinc_lock != nullptr);
3367
3368 /* Handle freeing the locks from within the stack. */
3369
3370 while (--i >= 0) {
3371 autoinc_lock =
3372 *static_cast<lock_t **>(ib_vector_get(trx->lock.autoinc_locks, i));
3373
3374 if (autoinc_lock == lock) {
3375 void *null_var = nullptr;
3376 ib_vector_set(trx->lock.autoinc_locks, i, &null_var);
3377 return;
3378 }
3379 }
3380
3381 /* Must find the autoinc lock. */
3382 ut_error;
3383 }
3384 }
3385
3386 /** Removes a table lock request from the queue and the trx list of locks;
3387 this is a low-level function which does NOT check if waiting requests
3388 can now be granted. */
3389 UNIV_INLINE
lock_table_remove_low(lock_t * lock)3390 void lock_table_remove_low(lock_t *lock) /*!< in/out: table lock */
3391 {
3392 trx_t *trx;
3393 dict_table_t *table;
3394
3395 trx = lock->trx;
3396 /* We will modify trx->lock.trx_locks so we need trx->mutex */
3397 ut_ad(trx_mutex_own(trx));
3398 table = lock->tab_lock.table;
3399 ut_ad(locksys::owns_table_shard(*table));
3400 const auto lock_mode = lock_get_mode(lock);
3401 /* Remove the table from the transaction's AUTOINC vector, if
3402 the lock that is being released is an AUTOINC lock. */
3403 if (lock_mode == LOCK_AUTO_INC) {
3404 /* The table's AUTOINC lock could not be granted to us yet. */
3405 ut_ad(table->autoinc_trx == trx || lock->is_waiting());
3406 if (table->autoinc_trx == trx) {
3407 table->autoinc_trx = nullptr;
3408 }
3409
3410 /* The locks must be freed in the reverse order from
3411 the one in which they were acquired. This is to avoid
3412 traversing the AUTOINC lock vector unnecessarily.
3413
3414 We only store locks that were granted in the
3415 trx->autoinc_locks vector (see lock_table_create()
3416 and lock_grant()). */
3417
3418 if (!lock_get_wait(lock)) {
3419 lock_table_remove_autoinc_lock(lock, trx);
3420 }
3421 }
3422 ut_a(0 < table->count_by_mode[lock_mode]);
3423 --table->count_by_mode[lock_mode];
3424
3425 locksys::remove_from_trx_locks(lock);
3426
3427 ut_list_remove(table->locks, lock, TableLockGetNode());
3428
3429 MONITOR_INC(MONITOR_TABLELOCK_REMOVED);
3430 MONITOR_DEC(MONITOR_NUM_TABLELOCK);
3431 }
3432
3433 /** Enqueues a waiting request for a table lock which cannot be granted
3434 immediately. Checks for deadlocks.
3435 @return DB_LOCK_WAIT or DB_DEADLOCK */
lock_table_enqueue_waiting(ulint mode,dict_table_t * table,que_thr_t * thr)3436 static dberr_t lock_table_enqueue_waiting(
3437 ulint mode, /*!< in: lock mode this transaction is
3438 requesting */
3439 dict_table_t *table, /*!< in/out: table */
3440 que_thr_t *thr) /*!< in: query thread */
3441 {
3442 trx_t *trx;
3443
3444 ut_ad(locksys::owns_table_shard(*table));
3445 ut_ad(!srv_read_only_mode);
3446
3447 trx = thr_get_trx(thr);
3448 ut_ad(trx_mutex_own(trx));
3449
3450 /* Test if there already is some other reason to suspend thread:
3451 we do not enqueue a lock request if the query thread should be
3452 stopped anyway */
3453
3454 if (que_thr_stop(thr)) {
3455 ut_error;
3456 }
3457
3458 switch (trx_get_dict_operation(trx)) {
3459 case TRX_DICT_OP_NONE:
3460 break;
3461 case TRX_DICT_OP_TABLE:
3462 case TRX_DICT_OP_INDEX:
3463 ib::error(ER_IB_MSG_642) << "A table lock wait happens in a dictionary"
3464 " operation. Table "
3465 << table->name << ". " << BUG_REPORT_MSG;
3466 ut_ad(0);
3467 }
3468
3469 if (trx->in_innodb & TRX_FORCE_ROLLBACK_ASYNC) {
3470 return (DB_DEADLOCK);
3471 }
3472
3473 /* Enqueue the lock request that will wait to be granted */
3474 lock_table_create(table, mode | LOCK_WAIT, trx);
3475
3476 trx->lock.que_state = TRX_QUE_LOCK_WAIT;
3477
3478 trx->lock.wait_started = ut_time();
3479 trx->lock.was_chosen_as_deadlock_victim = false;
3480
3481 auto stopped = que_thr_stop(thr);
3482 ut_a(stopped);
3483
3484 MONITOR_INC(MONITOR_TABLELOCK_WAIT);
3485
3486 return (DB_LOCK_WAIT);
3487 }
3488
3489 /** Checks if other transactions have an incompatible mode lock request in
3490 the lock queue.
3491 @return lock or NULL */
3492 UNIV_INLINE
lock_table_other_has_incompatible(const trx_t * trx,ulint wait,const dict_table_t * table,lock_mode mode)3493 const lock_t *lock_table_other_has_incompatible(
3494 const trx_t *trx, /*!< in: transaction, or NULL if all
3495 transactions should be included */
3496 ulint wait, /*!< in: LOCK_WAIT if also
3497 waiting locks are taken into
3498 account, or 0 if not */
3499 const dict_table_t *table, /*!< in: table */
3500 lock_mode mode) /*!< in: lock mode */
3501 {
3502 const lock_t *lock;
3503
3504 ut_ad(locksys::owns_table_shard(*table));
3505
3506 // According to lock_compatibility_matrix, an intention lock can wait only
3507 // for LOCK_S or LOCK_X. If there are no LOCK_S nor LOCK_X locks in the queue,
3508 // then we can avoid iterating through the list and return immediately.
3509 // This might help in OLTP scenarios, with no DDL queries,
3510 // as then there are almost no LOCK_S nor LOCK_X, but many DML queries still
3511 // need to get an intention lock to perform their action - while this never
3512 // causes them to wait for a "data lock", it might cause them to wait for
3513 // lock_sys table shard latch for the duration of table lock queue operation.
3514
3515 if ((mode == LOCK_IS || mode == LOCK_IX) &&
3516 table->count_by_mode[LOCK_S] == 0 && table->count_by_mode[LOCK_X] == 0) {
3517 return nullptr;
3518 }
3519
3520 for (lock = UT_LIST_GET_LAST(table->locks); lock != nullptr;
3521 lock = UT_LIST_GET_PREV(tab_lock.locks, lock)) {
3522 if (lock->trx != trx && !lock_mode_compatible(lock_get_mode(lock), mode) &&
3523 (wait || !lock_get_wait(lock))) {
3524 return (lock);
3525 }
3526 }
3527
3528 return (nullptr);
3529 }
3530
3531 /** Locks the specified database table in the mode given. If the lock cannot
3532 be granted immediately, the query thread is put to wait.
3533 @return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
lock_table(ulint flags,dict_table_t * table,lock_mode mode,que_thr_t * thr)3534 dberr_t lock_table(ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is set,
3535 does nothing */
3536 dict_table_t *table, /*!< in/out: database table
3537 in dictionary cache */
3538 lock_mode mode, /*!< in: lock mode */
3539 que_thr_t *thr) /*!< in: query thread */
3540 {
3541 trx_t *trx;
3542 dberr_t err;
3543 const lock_t *wait_for;
3544
3545 ut_ad(table && thr);
3546
3547 /* Given limited visibility of temp-table we can avoid
3548 locking overhead */
3549 if ((flags & BTR_NO_LOCKING_FLAG) || srv_read_only_mode ||
3550 table->is_temporary()) {
3551 return (DB_SUCCESS);
3552 }
3553
3554 ut_a(flags == 0);
3555
3556 trx = thr_get_trx(thr);
3557
3558 /* Look for equal or stronger locks the same trx already has on the table.
3559 Even though lock_table_has() takes trx->mutex internally, it does not protect
3560 us at all from "higher-level" races - for instance the state could change in
3561 theory after we exit lock_table_has() and before we return DB_SUCCESS, or
3562 before somebody who called us reacts to the DB_SUCCESS.
3563 In theory trx_t::table_locks can be modified in
3564 lock_trx_table_locks_remove which is called from:
3565 lock_release_autoinc_last_lock
3566 lock_release_autoinc_locks
3567 lock_cancel_waiting_and_release
3568 (this one seems to be called only when trx is waiting and not running)
3569 lock_unlock_table_autoinc
3570 (this one seems to be called from the thread running the transaction)
3571 lock_remove_all_on_table_for_trx
3572 lock_remove_all_on_table
3573 row_drop_table_for_mysql
3574 (this one is mysterious, as it is not obvious to me why do we expect
3575 that someone will drop a table while there are locks on it)
3576 row_mysql_table_id_reassign
3577 row_discard_tablespace
3578 (there is some long explanation starting with "How do we prevent
3579 crashes caused by ongoing operations...")
3580 lock_remove_recovered_trx_record_locks
3581 (this seems to be used to remove locks of recovered transactions from
3582 table being dropped, and recovered transactions shouldn't call lock_table)
3583 Also the InnoDB Memcached plugin causes a callchain:
3584 innodb_store -> innodb_conn_init -> innodb_api_begin -> innodb_cb_cursor_lock
3585 -> ib_cursor_set_lock_mode -> ib_cursor_lock -> ib_trx_lock_table_with_retry
3586 -> lock_table_for_trx -> lock_table -> lock_table_has
3587 in which lock_table_has sees trx->mysqld_thd different than current_thd.
3588 In practice this call to lock_table_has was never protected in any way before,
3589 so the situation now, after protecting it with trx->mutex, can't be worse. */
3590
3591 if (lock_table_has(trx, table, mode)) {
3592 /* In Debug mode we assert the same condition again, to help catch cases of
3593 race condition, if it is possible at all, for further analysis. */
3594 ut_ad(lock_table_has(trx, table, mode));
3595 return (DB_SUCCESS);
3596 }
3597
3598 /* Read only transactions can write to temp tables, we don't want
3599 to promote them to RW transactions. Their updates cannot be visible
3600 to other transactions. Therefore we can keep them out
3601 of the read views. */
3602
3603 if ((mode == LOCK_IX || mode == LOCK_X) && !trx->read_only &&
3604 trx->rsegs.m_redo.rseg == nullptr) {
3605 trx_set_rw_mode(trx);
3606 }
3607
3608 locksys::Shard_latch_guard table_latch_guard{*table};
3609
3610 /* We have to check if the new lock is compatible with any locks
3611 other transactions have in the table lock queue. */
3612
3613 wait_for = lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode);
3614
3615 trx_mutex_enter(trx);
3616
3617 /* Another trx has a request on the table in an incompatible
3618 mode: this trx may have to wait */
3619
3620 if (wait_for != nullptr) {
3621 err = lock_table_enqueue_waiting(mode | flags, table, thr);
3622 if (err == DB_LOCK_WAIT) {
3623 lock_create_wait_for_edge(trx, wait_for->trx);
3624 }
3625 } else {
3626 lock_table_create(table, mode | flags, trx);
3627
3628 ut_a(!flags || mode == LOCK_S || mode == LOCK_X);
3629
3630 err = DB_SUCCESS;
3631 }
3632
3633 trx_mutex_exit(trx);
3634
3635 ut_ad(err == DB_SUCCESS || err == DB_LOCK_WAIT || err == DB_DEADLOCK);
3636 return (err);
3637 }
3638
3639 /** Creates a table IX lock object for a resurrected transaction. */
lock_table_ix_resurrect(dict_table_t * table,trx_t * trx)3640 void lock_table_ix_resurrect(dict_table_t *table, /*!< in/out: table */
3641 trx_t *trx) /*!< in/out: transaction */
3642 {
3643 ut_ad(trx->is_recovered);
3644
3645 if (lock_table_has(trx, table, LOCK_IX)) {
3646 return;
3647 }
3648 locksys::Shard_latch_guard table_latch_guard{*table};
3649 /* We have to check if the new lock is compatible with any locks
3650 other transactions have in the table lock queue. */
3651
3652 ut_ad(!lock_table_other_has_incompatible(trx, LOCK_WAIT, table, LOCK_IX));
3653
3654 trx_mutex_enter(trx);
3655 lock_table_create(table, LOCK_IX, trx);
3656 trx_mutex_exit(trx);
3657 }
3658
3659 /** Checks if a waiting table lock request still has to wait in a queue.
3660 @param[in] wait_lock Waiting table lock
3661 @param[in] blocking_trx If not nullptr, it restricts the search to only the
3662 locks held by the blocking_trx, which is useful in
3663 case when there might be multiple reasons for waiting
3664 in queue, but we need to report the specific one.
3665 Useful when reporting a deadlock cycle. (optional)
3666 @return The conflicting lock which is the reason wait_lock has to wait
3667 or nullptr if it can be granted now */
lock_table_has_to_wait_in_queue(const lock_t * wait_lock,const trx_t * blocking_trx=nullptr)3668 static const lock_t *lock_table_has_to_wait_in_queue(
3669 const lock_t *wait_lock, const trx_t *blocking_trx = nullptr) {
3670 const dict_table_t *table;
3671 const lock_t *lock;
3672
3673 ut_ad(lock_get_wait(wait_lock));
3674
3675 table = wait_lock->tab_lock.table;
3676 ut_ad(locksys::owns_table_shard(*table));
3677
3678 const auto mode = lock_get_mode(wait_lock);
3679
3680 // According to lock_compatibility_matrix, an intention lock can wait only
3681 // for LOCK_S or LOCK_X. If there are no LOCK_S nor LOCK_X locks in the queue,
3682 // then we can avoid iterating through the list and return immediately.
3683 // This might help in OLTP scenarios, with no DDL queries,
3684 // as then there are almost no LOCK_S nor LOCK_X, but many DML queries still
3685 // need to get an intention lock to perform their action. When an occasional
3686 // DDL finishes and releases the LOCK_S or LOCK_X, it has to scan the queue
3687 // and grant any locks which were blocked by it. This can take Omega(n^2) if
3688 // each of intention locks has to verify that all the other locks.
3689
3690 if ((mode == LOCK_IS || mode == LOCK_IX) &&
3691 table->count_by_mode[LOCK_S] == 0 && table->count_by_mode[LOCK_X] == 0) {
3692 return (nullptr);
3693 }
3694
3695 for (lock = UT_LIST_GET_FIRST(table->locks); lock != wait_lock;
3696 lock = UT_LIST_GET_NEXT(tab_lock.locks, lock)) {
3697 if ((blocking_trx == nullptr || blocking_trx == lock->trx) &&
3698 lock_has_to_wait(wait_lock, lock)) {
3699 return (lock);
3700 }
3701 }
3702
3703 return (nullptr);
3704 }
3705
3706 /** Checks if a waiting lock request still has to wait in a queue.
3707 @param[in] wait_lock Waiting lock
3708 @param[in] blocking_trx If not nullptr, it restricts the search to only the
3709 locks held by the blocking_trx, which is useful in
3710 case when there might be multiple reasons for waiting
3711 in queue, but we need to report the specific one.
3712 Useful when reporting a deadlock cycle.
3713 @return The conflicting lock which is the reason wait_lock has to wait
3714 or nullptr if it can be granted now */
lock_has_to_wait_in_queue(const lock_t * wait_lock,const trx_t * blocking_trx)3715 static const lock_t *lock_has_to_wait_in_queue(const lock_t *wait_lock,
3716 const trx_t *blocking_trx) {
3717 if (lock_get_type_low(wait_lock) == LOCK_REC) {
3718 return lock_rec_has_to_wait_in_queue(wait_lock, blocking_trx);
3719 } else {
3720 return lock_table_has_to_wait_in_queue(wait_lock, blocking_trx);
3721 }
3722 }
3723
3724 /** Removes a table lock request, waiting or granted, from the queue and grants
3725 locks to other transactions in the queue, if they now are entitled to a
3726 lock. */
lock_table_dequeue(lock_t * in_lock)3727 static void lock_table_dequeue(
3728 lock_t *in_lock) /*!< in/out: table lock object; transactions waiting
3729 behind will get their lock requests granted, if
3730 they are now qualified to it */
3731 {
3732 /* This is needed for lock_table_remove_low(), but it's easier to understand
3733 the code if we assert it here as well */
3734 ut_ad(trx_mutex_own(in_lock->trx));
3735 ut_ad(locksys::owns_table_shard(*in_lock->tab_lock.table));
3736 ut_a(lock_get_type_low(in_lock) == LOCK_TABLE);
3737
3738 const auto mode = lock_get_mode(in_lock);
3739 const auto table = in_lock->tab_lock.table;
3740
3741 lock_t *lock = UT_LIST_GET_NEXT(tab_lock.locks, in_lock);
3742
3743 lock_table_remove_low(in_lock);
3744
3745 // According to lock_compatibility_matrix, an intention lock can block only
3746 // LOCK_S or LOCK_X from being granted, and thus, releasing of an intention
3747 // lock can help in granting only LOCK_S or LOCK_X. If there are no LOCK_S nor
3748 // LOCK_X locks in the queue, then we can avoid iterating through the list and
3749 // return immediately. This might help in OLTP scenarios, with no DDL queries,
3750 // as then there are almost no LOCK_S nor LOCK_X, but many DML queries still
3751 // need to get an intention lock to perform their action - while this never
3752 // causes them to wait for a "data lock", it might cause them to wait for
3753 // lock_sys table shard latch for the duration of table lock queue operation.
3754 if ((mode == LOCK_IS || mode == LOCK_IX) &&
3755 table->count_by_mode[LOCK_S] == 0 && table->count_by_mode[LOCK_X] == 0) {
3756 return;
3757 }
3758
3759 /* Check if waiting locks in the queue can now be granted: grant
3760 locks if there are no conflicting locks ahead. */
3761
3762 for (/* No op */; lock != nullptr;
3763 lock = UT_LIST_GET_NEXT(tab_lock.locks, lock)) {
3764 lock_grant_or_update_wait_for_edge_if_waiting(lock, in_lock->trx);
3765 }
3766 }
3767
3768 /** Sets a lock on a table based on the given mode.
3769 @param[in] table table to lock
3770 @param[in,out] trx transaction
3771 @param[in] mode LOCK_X or LOCK_S
3772 @return error code or DB_SUCCESS. */
lock_table_for_trx(dict_table_t * table,trx_t * trx,enum lock_mode mode)3773 dberr_t lock_table_for_trx(dict_table_t *table, trx_t *trx,
3774 enum lock_mode mode) {
3775 mem_heap_t *heap;
3776 que_thr_t *thr;
3777 dberr_t err;
3778 sel_node_t *node;
3779 heap = mem_heap_create(512);
3780
3781 node = sel_node_create(heap);
3782 thr = pars_complete_graph_for_exec(node, trx, heap, nullptr);
3783 thr->graph->state = QUE_FORK_ACTIVE;
3784
3785 /* We use the select query graph as the dummy graph needed
3786 in the lock module call */
3787
3788 thr = static_cast<que_thr_t *>(que_fork_get_first_thr(
3789 static_cast<que_fork_t *>(que_node_get_parent(thr))));
3790
3791 que_thr_move_to_run_state_for_mysql(thr, trx);
3792
3793 run_again:
3794 thr->run_node = thr;
3795 thr->prev_node = thr->common.parent;
3796
3797 err = lock_table(0, table, mode, thr);
3798
3799 trx->error_state = err;
3800
3801 if (err == DB_SUCCESS) {
3802 que_thr_stop_for_mysql_no_error(thr, trx);
3803 } else {
3804 que_thr_stop_for_mysql(thr);
3805
3806 auto was_lock_wait = row_mysql_handle_errors(&err, trx, thr, nullptr);
3807
3808 if (was_lock_wait) {
3809 goto run_again;
3810 }
3811 }
3812
3813 que_graph_free(thr->graph);
3814 trx->op_info = "";
3815
3816 return (err);
3817 }
3818
3819 /*=========================== LOCK RELEASE ==============================*/
3820
3821 /** Grant a lock to waiting transactions.
3822 @param[in] lock Lock that was unlocked
3823 @param[in] heap_no Heap no within the page for the lock. */
lock_rec_release(lock_t * lock,ulint heap_no)3824 static void lock_rec_release(lock_t *lock, ulint heap_no) {
3825 ut_ad(locksys::owns_page_shard(lock->rec_lock.page_id));
3826 ut_ad(!lock_get_wait(lock));
3827 ut_ad(lock_get_type_low(lock) == LOCK_REC);
3828 ut_ad(lock_rec_get_nth_bit(lock, heap_no));
3829 lock_rec_reset_nth_bit(lock, heap_no);
3830
3831 lock_rec_grant_by_heap_no(lock, heap_no);
3832 MONITOR_INC(MONITOR_RECLOCK_GRANT_ATTEMPTS);
3833 }
3834
3835 /** Removes a granted record lock of a transaction from the queue and grants
3836 locks to other transactions waiting in the queue if they now are entitled
3837 to a lock.
3838 This function is meant to be used only by row_unlock_for_mysql, and it assumes
3839 that the lock we are looking for has LOCK_REC_NOT_GAP flag.
3840 */
lock_rec_unlock(trx_t * trx,const buf_block_t * block,const rec_t * rec,lock_mode lock_mode)3841 void lock_rec_unlock(
3842 trx_t *trx, /*!< in/out: transaction that has
3843 set a record lock */
3844 const buf_block_t *block, /*!< in: buffer block containing rec */
3845 const rec_t *rec, /*!< in: record */
3846 lock_mode lock_mode) /*!< in: LOCK_S or LOCK_X */
3847 {
3848 ut_ad(block->frame == page_align(rec));
3849 ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE));
3850 ut_ad(lock_mode == LOCK_S || lock_mode == LOCK_X);
3851
3852 ulint heap_no = page_rec_get_heap_no(rec);
3853
3854 {
3855 locksys::Shard_latch_guard guard{block->get_page_id()};
3856 trx_mutex_enter_first_of_two(trx);
3857 ut_ad(!trx->lock.wait_lock);
3858
3859 lock_t *first_lock;
3860
3861 first_lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
3862
3863 /* Find the last lock with the same lock_mode and transaction
3864 on the record. */
3865
3866 for (auto lock = first_lock; lock != nullptr;
3867 lock = lock_rec_get_next(heap_no, lock)) {
3868 if (lock->trx == trx && lock_get_mode(lock) == lock_mode &&
3869 lock_rec_get_rec_not_gap(lock)) {
3870 #ifdef UNIV_DEBUG
3871 /* Since we actually found the first, not the last lock, lets check
3872 that it is also the last one */
3873 for (auto lock2 = lock_rec_get_next(heap_no, lock); lock2 != nullptr;
3874 lock2 = lock_rec_get_next(heap_no, lock2)) {
3875 ut_ad(!(lock2->trx == trx && lock_get_mode(lock2) == lock_mode &&
3876 lock_rec_get_rec_not_gap(lock2)));
3877 }
3878 #endif
3879 lock_rec_release(lock, heap_no);
3880
3881 trx_mutex_exit(trx);
3882
3883 return;
3884 }
3885 }
3886
3887 trx_mutex_exit(trx);
3888 } /* Shard_latch_guard */
3889
3890 {
3891 size_t stmt_len;
3892
3893 auto stmt = innobase_get_stmt_unsafe(trx->mysql_thd, &stmt_len);
3894
3895 ib::error err(ER_IB_MSG_1228);
3896
3897 err << "Unlock row could not find a " << lock_mode
3898 << " mode lock on the record. Current statement: ";
3899
3900 err.write(stmt, stmt_len);
3901 }
3902 }
3903
3904 /** Unlock the GAP Lock part of a Next Key Lock and grant it to waiters (if any)
3905 @param[in,out] lock lock object */
lock_release_gap_lock(lock_t * lock)3906 static void lock_release_gap_lock(lock_t *lock) {
3907 /* 1. Remove GAP lock for all records */
3908 lock->unlock_gap_lock();
3909
3910 /* 2. Grant locks for all records */
3911 lock_rec_grant(lock);
3912
3913 /* 3. Release explicitly all locks on supremum record. This is required
3914 because supremum record lock is always considered a GAP Lock, but the lock
3915 mode can be set to Next Key Lock for sharing lock objects with other records.
3916
3917 We could not release all locks on supremum record in step [1] & [2] because
3918 currently lock_rec_grant accepts `lock` object as input which is also part of
3919 the lock queue. If we unlock supremum record (reset the BIT) in step-1, then
3920 step-2 would fail to grant locks because SUPREMUM record would be missing from
3921 input `lock` record bit set. */
3922 if (lock->includes_supremum()) {
3923 lock_rec_release(lock, PAGE_HEAP_NO_SUPREMUM);
3924 }
3925 }
3926
3927 /** Used to release a lock during PREPARE. The lock is only
3928 released if rules permit it.
3929 @param[in] lock the lock that we consider releasing
3930 @param[in] only_gap true if we don't want to release records,
3931 just the gaps between them */
lock_release_read_lock(lock_t * lock,bool only_gap)3932 static void lock_release_read_lock(lock_t *lock, bool only_gap) {
3933 if (!lock->is_record_lock() || lock->is_insert_intention() ||
3934 lock->is_predicate()) {
3935 /* DO NOTHING */
3936 } else if (lock->is_gap()) {
3937 /* Release any GAP only lock. */
3938 lock_rec_dequeue_from_page(lock);
3939 } else if (lock->is_record_not_gap() && only_gap) {
3940 /* Don't release any non-GAP lock if not asked.*/
3941 } else if (lock->mode() == LOCK_S && !only_gap) {
3942 /* Release Shared Next Key Lock(SH + GAP) if asked for */
3943 lock_rec_dequeue_from_page(lock);
3944 } else {
3945 /* Release GAP lock from Next Key lock */
3946 lock_release_gap_lock(lock);
3947 }
3948 }
3949
3950 namespace locksys {
3951
3952 /** A helper function which solves a chicken-and-egg problem occurring when one
3953 needs to iterate over trx's locks and perform some actions on them. Iterating
3954 over this list requires trx->mutex (or exclusive global lock_sys latch), and
3955 operating on a lock requires lock_sys latches, yet the latching order requires
3956 lock_sys latches to be taken before trx->mutex.
3957 One way around it is to use exclusive global lock_sys latch, which heavily
3958 deteriorates concurrency. Another is to try to reacquire the latches in needed
3959 order, veryfing that the list wasn't modified meanwhile.
3960 This function performs following steps:
3961 1. releases trx->mutex,
3962 2. acquires proper lock_sys shard latch,
3963 3. reaquires trx->mutex
3964 4. executes f unless trx's locks list has changed
3965 Before and after this function following should hold:
3966 - the shared global lock_sys latch is held
3967 - the trx->mutex is held
3968 @param[in] trx the trx, locks of which we are interested in
3969 @param[in] shard description of the shard we want to latch
3970 @param[in] f the function to execute when the shard is latched
3971 @return true if f was called, false if it couldn't be called because trx locks
3972 have changed while relatching trx->mutex
3973 */
3974 template <typename S, typename F>
try_relatch_trx_and_shard_and_do(const trx_t * const trx,const S & shard,F && f)3975 static bool try_relatch_trx_and_shard_and_do(const trx_t *const trx,
3976 const S &shard, F &&f) {
3977 ut_ad(locksys::owns_shared_global_latch());
3978 ut_ad(trx_mutex_own(trx));
3979
3980 const auto expected_version = trx->lock.trx_locks_version;
3981 trx_mutex_exit(trx);
3982 DEBUG_SYNC_C("try_relatch_trx_and_shard_and_do_noted_expected_version");
3983 locksys::Shard_naked_latch_guard guard{shard};
3984 trx_mutex_enter_first_of_two(trx);
3985
3986 /* Check that list was not modified while we were reacquiring latches */
3987 if (expected_version != trx->lock.trx_locks_version) {
3988 /* Someone has modified the list while we were re-acquiring the latches so,
3989 it is unsafe to operate on the lock. It might have been released, or maybe
3990 even assigned to another transaction (in case of AUTOINC lock). More
3991 importantly, we need to let know the caller that the list it is iterating
3992 over has been modified, which affects next/prev pointers. */
3993 return false;
3994 }
3995
3996 std::forward<F>(f)();
3997 return true;
3998 }
3999
4000 /** A helper function which solves a chicken-and-egg problem occurring when one
4001 needs to iterate over trx's locks and perform some actions on them. Iterating
4002 over this list requires trx->mutex (or exclusive global lock_sys latch), and
4003 operating on a lock requires lock_sys latches, yet the latching order requires
4004 lock_sys latches to be taken before trx->mutex.
4005 One way around it is to use exclusive global lock_sys latch, which heavily
4006 deteriorates concurrency. Another is to try to reacquire the latches in needed
4007 order, veryfing that the list wasn't modified meanwhile.
4008 This function performs following steps:
4009 1. releases trx->mutex,
4010 2. acquires proper lock_sys shard latch for given lock,
4011 3. reaquires trx->mutex
4012 4. executes f unless trx's locks list has changed
4013 Before and after this function following should hold:
4014 - the shared global lock_sys latch is held
4015 - the trx->mutex is held
4016 @param[in] lock the lock we are interested in
4017 @param[in] f the function to execute when the shard is latched
4018 @return true if f was called, false if it couldn't be called because trx locks
4019 have changed while relatching trx->mutex
4020 */
4021 template <typename F>
try_relatch_trx_and_shard_and_do(const lock_t * lock,F && f)4022 static bool try_relatch_trx_and_shard_and_do(const lock_t *lock, F &&f) {
4023 if (lock_get_type_low(lock) == LOCK_REC) {
4024 return try_relatch_trx_and_shard_and_do(lock->trx, lock->rec_lock.page_id,
4025 std::forward<F>(f));
4026 }
4027
4028 ut_ad(lock_get_type_low(lock) == LOCK_TABLE);
4029 return try_relatch_trx_and_shard_and_do(lock->trx, *lock->tab_lock.table,
4030 std::forward<F>(f));
4031 }
4032
4033 /** Tries to release read locks of a transaction without latching the whole
4034 lock sys. This may fail, if there are many concurrent threads editing the
4035 list of locks of this transaction (for example due to B-tree pages being
4036 merged or split, or due to implicit-to-explicit conversion).
4037 It is called during XA prepare to release locks early.
4038 @param[in,out] trx transaction
4039 @param[in] only_gap release only GAP locks
4040 @return true if and only if it succeeded to do the job*/
try_release_read_locks_in_s_mode(trx_t * trx,bool only_gap)4041 static bool try_release_read_locks_in_s_mode(trx_t *trx, bool only_gap) {
4042 /* In order to access trx->lock.trx_locks safely we need to hold trx->mutex.
4043 So, conceptually we'd love to hold trx->mutex while iterating through
4044 trx->lock.trx_locks.
4045 However the latching order only allows us to obtain trx->mutex AFTER any
4046 lock_sys latch.
4047 One way around this problem is to simply latch the whole lock_sys in exclusive
4048 mode (which also prevents any changes to trx->lock.trx_locks), however this
4049 impacts performance in appliers (TPS drops by up to 10%).
4050 Here we use a different approach:
4051 1. we extract lock from the list when holding the trx->mutex,
4052 2. identify the shard of lock_sys it belongs to,
4053 3. store the current version of trx->lock.trx_locks
4054 4. release the trx->mutex,
4055 5. acquire the lock_sys shard's latch,
4056 6. and reacquire the trx->mutex,
4057 7. verify that the version of trx->lock.trx_locks has not changed
4058 8. and only then perform any action on the lock.
4059 */
4060 ut_ad(trx_mutex_own(trx));
4061 ut_ad(locksys::owns_shared_global_latch());
4062 lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
4063
4064 while (lock != nullptr) {
4065 ut_ad(trx_mutex_own(trx));
4066 /* We didn't latch the lock_sys shard this `lock` is in, so we only read a
4067 bare minimum set of information from the `lock`, such as the type, space,
4068 page_no, and next pointer, which, as long as we hold trx->mutex, should be
4069 immutable.
4070
4071 Store the pointer to the next lock in the list, because in some cases we are
4072 going to remove `lock` from the list, which clears the pointer to next lock
4073 */
4074 auto next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
4075 if (lock_get_type_low(lock) == LOCK_REC) {
4076 /* Following call temporarily releases trx->mutex */
4077 if (!try_relatch_trx_and_shard_and_do(
4078 lock, [=]() { lock_release_read_lock(lock, only_gap); })) {
4079 /* Someone has modified the list while we were re-acquiring the latches
4080 so we need to start over again. */
4081 return false;
4082 }
4083 }
4084 /* As we have verified that the version has not changed, it must be the case
4085 that the next_lock is still the next lock as well */
4086 lock = next_lock;
4087 }
4088 return true;
4089 }
4090 } // namespace locksys
4091
4092 /** Release read locks of a transacion latching the whole lock-sys in
4093 exclusive mode, which is a bit too expensive to do by default.
4094 It is called during XA prepare to release locks early.
4095 @param[in,out] trx transaction
4096 @param[in] only_gap release only GAP locks */
lock_trx_release_read_locks_in_x_mode(trx_t * trx,bool only_gap)4097 static void lock_trx_release_read_locks_in_x_mode(trx_t *trx, bool only_gap) {
4098 ut_ad(!trx_mutex_own(trx));
4099
4100 /* We will iterate over locks from various shards. */
4101 locksys::Global_exclusive_latch_guard guard{};
4102 trx_mutex_enter_first_of_two(trx);
4103
4104 lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
4105
4106 while (lock != nullptr) {
4107 DEBUG_SYNC_C("lock_trx_release_read_locks_in_x_mode_will_release");
4108 /* Store the pointer to the next lock in the list, because in some cases
4109 we are going to remove `lock` from the list, which clears the pointer to
4110 next lock */
4111 lock_t *next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
4112
4113 lock_release_read_lock(lock, only_gap);
4114
4115 lock = next_lock;
4116 }
4117
4118 trx_mutex_exit(trx);
4119 }
4120
lock_trx_release_read_locks(trx_t * trx,bool only_gap)4121 void lock_trx_release_read_locks(trx_t *trx, bool only_gap) {
4122 ut_ad(trx_can_be_handled_by_current_thread(trx));
4123
4124 size_t failures;
4125 const size_t MAX_FAILURES = 5;
4126
4127 {
4128 locksys::Global_shared_latch_guard shared_latch_guard{};
4129 trx_mutex_enter(trx);
4130 ut_ad(trx->lock.wait_lock == nullptr);
4131
4132 for (failures = 0; failures < MAX_FAILURES; ++failures) {
4133 if (locksys::try_release_read_locks_in_s_mode(trx, only_gap)) {
4134 break;
4135 }
4136 }
4137
4138 trx_mutex_exit(trx);
4139 }
4140
4141 if (failures == MAX_FAILURES) {
4142 lock_trx_release_read_locks_in_x_mode(trx, only_gap);
4143 }
4144 }
4145
4146 /** Releases transaction locks, and releases possible other transactions waiting
4147 because of these locks.
4148 @param[in,out] trx transaction */
lock_release(trx_t * trx)4149 static void lock_release(trx_t *trx) {
4150 lock_t *lock;
4151 ut_ad(!locksys::owns_exclusive_global_latch());
4152 ut_ad(!trx_mutex_own(trx));
4153 ut_ad(!trx->is_dd_trx);
4154
4155 locksys::Global_shared_latch_guard shared_latch_guard{};
4156 /* In order to access trx->lock.trx_locks safely we need to hold trx->mutex.
4157 The transaction is already in TRX_STATE_COMMITTED_IN_MEMORY state and is no
4158 longer referenced, so we are not afraid of implicit-to-explicit conversions,
4159 nor a cancellation of a wait_lock (we are running, not waiting). Still, there
4160 might be some B-tree merge or split operations running in parallel which cause
4161 locks to be moved from one page to another, which at the low level means that
4162 a new lock is created (and added to trx->lock.trx_locks) and the old one is
4163 removed (also from trx->lock.trx_locks) in that specific order.
4164 So, conceptually we'd love to hold trx->mutex while iterating through
4165 trx->lock.trx_locks.
4166 However the latching order only allows us to obtain trx->mutex AFTER any
4167 lock_sys latch. One way around this problem is to simply latch the whole
4168 lock_sys in exclusive mode (which also prevents any changes to
4169 trx->lock.trx_locks), however this impacts performance (TPS drops on
4170 sysbench {pareto,uniform}-2S-{128,1024}-usrs tests by 3% to 11%) Here we
4171 use a different approach:
4172 1. we extract lock from the list when holding the trx->mutex,
4173 2. identify the shard of lock_sys it belongs to,
4174 3. release the trx->mutex,
4175 4. acquire the lock_sys shard's latch,
4176 5. and reacquire the trx->mutex,
4177 6. verify that the lock pointer is still in trx->lock.trx_locks (so it is
4178 safe to access it),
4179 7. and only then perform any action on the lock.
4180 */
4181 trx_mutex_enter(trx);
4182
4183 ut_ad(trx->lock.wait_lock == nullptr);
4184 while ((lock = UT_LIST_GET_LAST(trx->lock.trx_locks)) != nullptr) {
4185 /* Following call temporarily releases trx->mutex */
4186 locksys::try_relatch_trx_and_shard_and_do(lock, [=]() {
4187 if (lock_get_type_low(lock) == LOCK_REC) {
4188 lock_rec_dequeue_from_page(lock);
4189 } else {
4190 lock_table_dequeue(lock);
4191 }
4192 });
4193 }
4194
4195 trx_mutex_exit(trx);
4196 }
4197
4198 /* True if a lock mode is S or X */
4199 #define IS_LOCK_S_OR_X(lock) \
4200 (lock_get_mode(lock) == LOCK_S || lock_get_mode(lock) == LOCK_X)
4201
4202 /** Removes lock_to_remove from lock_to_remove->trx->lock.table_locks.
4203 @param[in] lock_to_remove lock to remove */
lock_trx_table_locks_remove(const lock_t * lock_to_remove)4204 static void lock_trx_table_locks_remove(const lock_t *lock_to_remove) {
4205 trx_t *trx = lock_to_remove->trx;
4206
4207 ut_ad(locksys::owns_table_shard(*lock_to_remove->tab_lock.table));
4208 /* We will modify trx->lock.table_locks so we need trx->mutex */
4209 ut_ad(trx_mutex_own(trx));
4210
4211 typedef lock_pool_t::reverse_iterator iterator;
4212
4213 iterator end = trx->lock.table_locks.rend();
4214
4215 iterator it = std::find(trx->lock.table_locks.rbegin(), end, lock_to_remove);
4216
4217 /* Lock must exist in the vector. */
4218 ut_a(it != end);
4219 /* To keep it O(1) replace the removed position with lock from the back */
4220 *it = trx->lock.table_locks.back();
4221 trx->lock.table_locks.pop_back();
4222 }
4223
4224 /** Removes locks of a transaction on a table to be dropped.
4225 If remove_also_table_sx_locks is true then table-level S and X locks are
4226 also removed in addition to other table-level and record-level locks.
4227 No lock that is going to be removed is allowed to be a wait lock. */
lock_remove_all_on_table_for_trx(dict_table_t * table,trx_t * trx,ibool remove_also_table_sx_locks)4228 static void lock_remove_all_on_table_for_trx(
4229 dict_table_t *table, /*!< in: table to be dropped */
4230 trx_t *trx, /*!< in: a transaction */
4231 ibool remove_also_table_sx_locks) /*!< in: also removes
4232 table S and X locks */
4233 {
4234 lock_t *lock;
4235 lock_t *prev_lock;
4236
4237 /* This is used when we drop a table and indeed have exclusive lock_sys
4238 access. */
4239 ut_ad(locksys::owns_exclusive_global_latch());
4240 /* We need trx->mutex to iterate over trx->lock.trx_lock and it is needed by
4241 lock_trx_table_locks_remove() and lock_table_remove_low() but we haven't
4242 acquired it yet. */
4243 ut_ad(!trx_mutex_own(trx));
4244 trx_mutex_enter(trx);
4245
4246 for (lock = UT_LIST_GET_LAST(trx->lock.trx_locks); lock != nullptr;
4247 lock = prev_lock) {
4248 prev_lock = UT_LIST_GET_PREV(trx_locks, lock);
4249
4250 if (lock_get_type_low(lock) == LOCK_REC && lock->index->table == table) {
4251 ut_a(!lock_get_wait(lock));
4252
4253 lock_rec_discard(lock);
4254 } else if (lock_get_type_low(lock) & LOCK_TABLE &&
4255 lock->tab_lock.table == table &&
4256 (remove_also_table_sx_locks || !IS_LOCK_S_OR_X(lock))) {
4257 ut_a(!lock_get_wait(lock));
4258
4259 lock_trx_table_locks_remove(lock);
4260 lock_table_remove_low(lock);
4261 }
4262 }
4263
4264 trx_mutex_exit(trx);
4265 }
4266
4267 /** Remove any explicit record locks held by recovering transactions on
4268 the table.
4269 @return number of recovered transactions examined */
lock_remove_recovered_trx_record_locks(dict_table_t * table)4270 static ulint lock_remove_recovered_trx_record_locks(
4271 dict_table_t *table) /*!< in: check if there are any locks
4272 held on records in this table or on the
4273 table itself */
4274 {
4275 ut_a(table != nullptr);
4276 /* We need exclusive lock_sys latch, as we are about to iterate over locks
4277 held by multiple transactions while they might be operating. */
4278 ut_ad(locksys::owns_exclusive_global_latch());
4279
4280 ulint n_recovered_trx = 0;
4281
4282 mutex_enter(&trx_sys->mutex);
4283
4284 for (trx_t *trx = UT_LIST_GET_FIRST(trx_sys->rw_trx_list); trx != nullptr;
4285 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
4286 assert_trx_in_rw_list(trx);
4287
4288 if (!trx->is_recovered) {
4289 continue;
4290 }
4291 /* We need trx->mutex to iterate over trx->lock.trx_lock and it is needed by
4292 lock_trx_table_locks_remove() and lock_table_remove_low() but we haven't
4293 acquired it yet. */
4294 ut_ad(!trx_mutex_own(trx));
4295 trx_mutex_enter(trx);
4296 /* Because we are holding the exclusive global lock_sys latch,
4297 implicit locks cannot be converted to explicit ones
4298 while we are scanning the explicit locks. */
4299
4300 lock_t *next_lock;
4301
4302 for (lock_t *lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); lock != nullptr;
4303 lock = next_lock) {
4304 ut_a(lock->trx == trx);
4305
4306 /* Recovered transactions can't wait on a lock. */
4307
4308 ut_a(!lock_get_wait(lock));
4309
4310 next_lock = UT_LIST_GET_NEXT(trx_locks, lock);
4311
4312 switch (lock_get_type_low(lock)) {
4313 default:
4314 ut_error;
4315 case LOCK_TABLE:
4316 if (lock->tab_lock.table == table) {
4317 lock_trx_table_locks_remove(lock);
4318 lock_table_remove_low(lock);
4319 }
4320 break;
4321 case LOCK_REC:
4322 if (lock->index->table == table) {
4323 lock_rec_discard(lock);
4324 }
4325 }
4326 }
4327
4328 trx_mutex_exit(trx);
4329 ++n_recovered_trx;
4330 }
4331
4332 mutex_exit(&trx_sys->mutex);
4333
4334 return (n_recovered_trx);
4335 }
4336
4337 /** Removes locks on a table to be dropped.
4338 If remove_also_table_sx_locks is true then table-level S and X locks are
4339 also removed in addition to other table-level and record-level locks.
4340 No lock, that is going to be removed, is allowed to be a wait lock. */
lock_remove_all_on_table(dict_table_t * table,ibool remove_also_table_sx_locks)4341 void lock_remove_all_on_table(
4342 dict_table_t *table, /*!< in: table to be dropped
4343 or discarded */
4344 ibool remove_also_table_sx_locks) /*!< in: also removes
4345 table S and X locks */
4346 {
4347 lock_t *lock;
4348
4349 /* We will iterate over locks (including record locks) from various shards */
4350 locksys::Global_exclusive_latch_guard guard{};
4351
4352 for (lock = UT_LIST_GET_FIRST(table->locks); lock != nullptr;
4353 /* No op */) {
4354 lock_t *prev_lock;
4355
4356 prev_lock = UT_LIST_GET_PREV(tab_lock.locks, lock);
4357
4358 /* If we should remove all locks (remove_also_table_sx_locks
4359 is true), or if the lock is not table-level S or X lock,
4360 then check we are not going to remove a wait lock. */
4361 if (remove_also_table_sx_locks ||
4362 !(lock_get_type(lock) == LOCK_TABLE && IS_LOCK_S_OR_X(lock))) {
4363 ut_a(!lock_get_wait(lock));
4364 }
4365
4366 lock_remove_all_on_table_for_trx(table, lock->trx,
4367 remove_also_table_sx_locks);
4368
4369 if (prev_lock == nullptr) {
4370 if (lock == UT_LIST_GET_FIRST(table->locks)) {
4371 /* lock was not removed, pick its successor */
4372 lock = UT_LIST_GET_NEXT(tab_lock.locks, lock);
4373 } else {
4374 /* lock was removed, pick the first one */
4375 lock = UT_LIST_GET_FIRST(table->locks);
4376 }
4377 } else if (UT_LIST_GET_NEXT(tab_lock.locks, prev_lock) != lock) {
4378 /* If lock was removed by
4379 lock_remove_all_on_table_for_trx() then pick the
4380 successor of prev_lock ... */
4381 lock = UT_LIST_GET_NEXT(tab_lock.locks, prev_lock);
4382 } else {
4383 /* ... otherwise pick the successor of lock. */
4384 lock = UT_LIST_GET_NEXT(tab_lock.locks, lock);
4385 }
4386 }
4387
4388 /* Note: Recovered transactions don't have table level IX or IS locks
4389 but can have implicit record locks that have been converted to explicit
4390 record locks. Such record locks cannot be freed by traversing the
4391 transaction lock list in dict_table_t (as above). */
4392
4393 if (!lock_sys->rollback_complete &&
4394 lock_remove_recovered_trx_record_locks(table) == 0) {
4395 lock_sys->rollback_complete = true;
4396 }
4397 }
4398
4399 /*===================== VALIDATION AND DEBUGGING ====================*/
4400
4401 /** Prints info of a table lock. */
lock_table_print(FILE * file,const lock_t * lock)4402 static void lock_table_print(FILE *file, /*!< in: file where to print */
4403 const lock_t *lock) /*!< in: table type lock */
4404 {
4405 ut_a(lock_get_type_low(lock) == LOCK_TABLE);
4406 /* We actually hold exclusive latch here, but we require just the shard */
4407 ut_ad(locksys::owns_table_shard(*lock->tab_lock.table));
4408
4409 fputs("TABLE LOCK table ", file);
4410 ut_print_name(file, lock->trx, lock->tab_lock.table->name.m_name);
4411 fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
4412
4413 if (lock_get_mode(lock) == LOCK_S) {
4414 fputs(" lock mode S", file);
4415 } else if (lock_get_mode(lock) == LOCK_X) {
4416 ut_ad(lock->trx->id != 0);
4417 fputs(" lock mode X", file);
4418 } else if (lock_get_mode(lock) == LOCK_IS) {
4419 fputs(" lock mode IS", file);
4420 } else if (lock_get_mode(lock) == LOCK_IX) {
4421 ut_ad(lock->trx->id != 0);
4422 fputs(" lock mode IX", file);
4423 } else if (lock_get_mode(lock) == LOCK_AUTO_INC) {
4424 fputs(" lock mode AUTO-INC", file);
4425 } else {
4426 fprintf(file, " unknown lock mode %lu", (ulong)lock_get_mode(lock));
4427 }
4428
4429 if (lock_get_wait(lock)) {
4430 fputs(" waiting", file);
4431 }
4432
4433 putc('\n', file);
4434 }
4435
4436 /** Prints info of a record lock. */
lock_rec_print(FILE * file,const lock_t * lock)4437 static void lock_rec_print(FILE *file, /*!< in: file where to print */
4438 const lock_t *lock) /*!< in: record type lock */
4439 {
4440 mtr_t mtr;
4441 Rec_offsets offsets;
4442
4443 ut_a(lock_get_type_low(lock) == LOCK_REC);
4444 const auto page_id = lock->rec_lock.page_id;
4445 /* We actually hold exclusive latch here, but we require just the shard */
4446 ut_ad(locksys::owns_page_shard(page_id));
4447
4448 fprintf(file,
4449 "RECORD LOCKS space id %lu page no %lu n bits %llu "
4450 "index %s of table ",
4451 ulong{page_id.space()}, ulong{page_id.page_no()},
4452 ulonglong{lock_rec_get_n_bits(lock)}, lock->index->name());
4453 ut_print_name(file, lock->trx, lock->index->table_name);
4454 fprintf(file, " trx id " TRX_ID_FMT, trx_get_id_for_print(lock->trx));
4455
4456 if (lock_get_mode(lock) == LOCK_S) {
4457 fputs(" lock mode S", file);
4458 } else if (lock_get_mode(lock) == LOCK_X) {
4459 fputs(" lock_mode X", file);
4460 } else {
4461 ut_error;
4462 }
4463
4464 if (lock_rec_get_gap(lock)) {
4465 fputs(" locks gap before rec", file);
4466 }
4467
4468 if (lock_rec_get_rec_not_gap(lock)) {
4469 fputs(" locks rec but not gap", file);
4470 }
4471
4472 if (lock_rec_get_insert_intention(lock)) {
4473 fputs(" insert intention", file);
4474 }
4475
4476 if (lock_get_wait(lock)) {
4477 fputs(" waiting", file);
4478 }
4479
4480 mtr_start(&mtr);
4481
4482 putc('\n', file);
4483
4484 const buf_block_t *block;
4485
4486 block = buf_page_try_get(page_id, &mtr);
4487
4488 for (ulint i = 0; i < lock_rec_get_n_bits(lock); ++i) {
4489 if (!lock_rec_get_nth_bit(lock, i)) {
4490 continue;
4491 }
4492
4493 fprintf(file, "Record lock, heap no %lu", (ulong)i);
4494
4495 if (block) {
4496 const rec_t *rec;
4497
4498 rec = page_find_rec_with_heap_no(buf_block_get_frame(block), i);
4499
4500 putc(' ', file);
4501 rec_print_new(file, rec, offsets.compute(rec, lock->index));
4502 }
4503
4504 putc('\n', file);
4505 }
4506
4507 mtr_commit(&mtr);
4508 }
4509
4510 #ifdef UNIV_DEBUG
4511 /* Print the number of lock structs from lock_print_info_summary() only
4512 in non-production builds for performance reasons, see
4513 http://bugs.mysql.com/36942 */
4514 #define PRINT_NUM_OF_LOCK_STRUCTS
4515 #endif /* UNIV_DEBUG */
4516
4517 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
4518 /** Calculates the number of record lock structs in the record lock hash table.
4519 @return number of record locks */
lock_get_n_rec_locks(void)4520 static ulint lock_get_n_rec_locks(void) {
4521 ulint n_locks = 0;
4522 ulint i;
4523
4524 /* We need exclusive access to lock_sys to iterate over all buckets */
4525 ut_ad(locksys::owns_exclusive_global_latch());
4526
4527 for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
4528 const lock_t *lock;
4529
4530 for (lock =
4531 static_cast<const lock_t *>(HASH_GET_FIRST(lock_sys->rec_hash, i));
4532 lock != nullptr;
4533 lock = static_cast<const lock_t *>(HASH_GET_NEXT(hash, lock))) {
4534 n_locks++;
4535 }
4536 }
4537
4538 return (n_locks);
4539 }
4540 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
4541
lock_print_info_summary(FILE * file)4542 void lock_print_info_summary(FILE *file) {
4543 ut_ad(locksys::owns_exclusive_global_latch());
4544
4545 if (lock_deadlock_found) {
4546 fputs(
4547 "------------------------\n"
4548 "LATEST DETECTED DEADLOCK\n"
4549 "------------------------\n",
4550 file);
4551
4552 if (!srv_read_only_mode) {
4553 ut_copy_file(file, lock_latest_err_file);
4554 }
4555 }
4556
4557 fputs(
4558 "------------\n"
4559 "TRANSACTIONS\n"
4560 "------------\n",
4561 file);
4562
4563 fprintf(file, "Trx id counter " TRX_ID_FMT "\n", trx_sys_get_max_trx_id());
4564
4565 fprintf(file,
4566 "Purge done for trx's n:o < " TRX_ID_FMT " undo n:o < " TRX_ID_FMT
4567 " state: ",
4568 purge_sys->iter.trx_no, purge_sys->iter.undo_no);
4569
4570 /* Note: We are reading the state without the latch. One because it
4571 will violate the latching order and two because we are merely querying
4572 the state of the variable for display. */
4573
4574 switch (purge_sys->state) {
4575 case PURGE_STATE_INIT:
4576 /* Should never be in this state while the system is running. */
4577 fprintf(file, "initializing");
4578 break;
4579
4580 case PURGE_STATE_EXIT:
4581 fprintf(file, "exited");
4582 break;
4583
4584 case PURGE_STATE_DISABLED:
4585 fprintf(file, "disabled");
4586 break;
4587
4588 case PURGE_STATE_RUN:
4589 fprintf(file, "running");
4590 /* Check if it is waiting for more data to arrive. */
4591 if (!purge_sys->running) {
4592 fprintf(file, " but idle");
4593 }
4594 break;
4595
4596 case PURGE_STATE_STOP:
4597 fprintf(file, "stopped");
4598 break;
4599 }
4600
4601 fprintf(file, "\n");
4602
4603 fprintf(file, "History list length %lu\n", (ulong)trx_sys->rseg_history_len);
4604
4605 #ifdef PRINT_NUM_OF_LOCK_STRUCTS
4606 fprintf(file, "Total number of lock structs in row lock hash table %lu\n",
4607 (ulong)lock_get_n_rec_locks());
4608 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
4609 }
4610
4611 /** Functor to print not-started transaction from the mysql_trx_list. */
4612
4613 struct PrintNotStarted {
PrintNotStartedPrintNotStarted4614 PrintNotStarted(FILE *file) : m_file(file) {}
4615
operator ()PrintNotStarted4616 void operator()(const trx_t *trx) {
4617 /* We require exclusive access to lock_sys */
4618 ut_ad(locksys::owns_exclusive_global_latch());
4619 ut_ad(trx->in_mysql_trx_list);
4620 ut_ad(mutex_own(&trx_sys->mutex));
4621
4622 /* See state transitions and locking rules in trx0trx.h */
4623
4624 trx_mutex_enter(trx);
4625 if (trx_state_eq(trx, TRX_STATE_NOT_STARTED)) {
4626 fputs("---", m_file);
4627 trx_print_latched(m_file, trx, 600);
4628 }
4629 trx_mutex_exit(trx);
4630 }
4631
4632 FILE *m_file;
4633 };
4634
4635 /** Iterate over a transaction's locks. Keeping track of the
4636 iterator using an ordinal value. */
4637
4638 class TrxLockIterator {
4639 public:
TrxLockIterator()4640 TrxLockIterator() { rewind(); }
4641
4642 /** Get the m_index(th) lock of a transaction.
4643 @return current lock or 0 */
current(const trx_t * trx) const4644 const lock_t *current(const trx_t *trx) const {
4645 lock_t *lock;
4646 ulint i = 0;
4647 /* Writes to trx->lock.trx_locks are protected by trx->mutex combined with a
4648 shared lock_sys global latch, and we assume we have the exclusive latch on
4649 lock_sys here. */
4650 ut_ad(locksys::owns_exclusive_global_latch());
4651 for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks);
4652 lock != nullptr && i < m_index;
4653 lock = UT_LIST_GET_NEXT(trx_locks, lock), ++i) {
4654 /* No op */
4655 }
4656
4657 return (lock);
4658 }
4659
4660 /** Set the ordinal value to 0 */
rewind()4661 void rewind() { m_index = 0; }
4662
4663 /** Increment the ordinal value.
4664 @return the current index value */
next()4665 ulint next() { return (++m_index); }
4666
4667 private:
4668 /** Current iterator position */
4669 ulint m_index;
4670 };
4671
4672 /** This iterates over both the RW and RO trx_sys lists. We need to keep
4673 track where the iterator was up to and we do that using an ordinal value. */
4674
4675 class TrxListIterator {
4676 public:
TrxListIterator()4677 TrxListIterator() : m_index() {
4678 /* We iterate over the RW trx list first. */
4679
4680 m_trx_list = &trx_sys->rw_trx_list;
4681 }
4682
4683 /** Get the current transaction whose ordinality is m_index.
4684 @return current transaction or 0 */
4685
current()4686 const trx_t *current() { return (reposition()); }
4687
4688 /** Advance the transaction current ordinal value and reset the
4689 transaction lock ordinal value */
4690
next()4691 void next() {
4692 ++m_index;
4693 m_lock_iter.rewind();
4694 }
4695
lock_iter()4696 TrxLockIterator &lock_iter() { return (m_lock_iter); }
4697
4698 private:
4699 /** Reposition the "cursor" on the current transaction. If it
4700 is the first time then the "cursor" will be positioned on the
4701 first transaction.
4702
4703 @return transaction instance or 0 */
reposition() const4704 const trx_t *reposition() const {
4705 ulint i;
4706 trx_t *trx;
4707
4708 /* Make the transaction at the ordinal value of m_index
4709 the current transaction. ie. reposition/restore */
4710
4711 for (i = 0, trx = UT_LIST_GET_FIRST(*m_trx_list);
4712 trx != nullptr && (i < m_index);
4713 trx = UT_LIST_GET_NEXT(trx_list, trx), ++i) {
4714 check_trx_state(trx);
4715 }
4716
4717 return (trx);
4718 }
4719
4720 /** Ordinal value of the transaction in the current transaction list */
4721 ulint m_index;
4722
4723 /** Current transaction list */
4724 trx_ut_list_t *m_trx_list;
4725
4726 /** For iterating over a transaction's locks */
4727 TrxLockIterator m_lock_iter;
4728 };
4729
4730 /** Prints transaction lock wait and MVCC state.
4731 @param[in,out] file file where to print
4732 @param[in] trx transaction */
lock_trx_print_wait_and_mvcc_state(FILE * file,const trx_t * trx)4733 void lock_trx_print_wait_and_mvcc_state(FILE *file, const trx_t *trx) {
4734 /* We require exclusive lock_sys access so that trx->lock.wait_lock is
4735 not being modified, and to access trx->lock.wait_started without trx->mutex.*/
4736 ut_ad(locksys::owns_exclusive_global_latch());
4737 fprintf(file, "---");
4738
4739 trx_print_latched(file, trx, 600);
4740
4741 const ReadView *read_view = trx_get_read_view(trx);
4742
4743 if (read_view != nullptr) {
4744 read_view->print_limits(file);
4745 }
4746
4747 if (trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
4748 fprintf(file,
4749 "------- TRX HAS BEEN WAITING %lu SEC"
4750 " FOR THIS LOCK TO BE GRANTED:\n",
4751 (ulong)difftime(ut_time(), trx->lock.wait_started));
4752
4753 if (lock_get_type_low(trx->lock.wait_lock) == LOCK_REC) {
4754 lock_rec_print(file, trx->lock.wait_lock);
4755 } else {
4756 lock_table_print(file, trx->lock.wait_lock);
4757 }
4758
4759 fprintf(file, "------------------\n");
4760 }
4761 }
4762
4763 /** Reads the page containing the record protected by the given lock.
4764 This function will temporarily release the exclusive global latch and the
4765 trx_sys_t::mutex if the page was read from disk.
4766 @param[in] lock the record lock
4767 @return true if a page was successfully read from the tablespace */
lock_rec_fetch_page(const lock_t * lock)4768 static bool lock_rec_fetch_page(const lock_t *lock) {
4769 ut_ad(lock_get_type_low(lock) == LOCK_REC);
4770
4771 const page_id_t page_id = lock->rec_lock.page_id;
4772 const space_id_t space_id = page_id.space();
4773 fil_space_t *space;
4774 bool found;
4775 const page_size_t &page_size = fil_space_get_page_size(space_id, &found);
4776
4777 /* Check if the .ibd file exists. */
4778 if (found) {
4779 mtr_t mtr;
4780
4781 locksys::Unsafe_global_latch_manipulator::exclusive_unlatch();
4782
4783 mutex_exit(&trx_sys->mutex);
4784
4785 DEBUG_SYNC_C("innodb_monitor_before_lock_page_read");
4786
4787 /* Check if the space is exists or not. only
4788 when the space is valid, try to get the page. */
4789 space = fil_space_acquire(space_id);
4790 if (space) {
4791 mtr_start(&mtr);
4792 buf_page_get_gen(page_id, page_size, RW_NO_LATCH, nullptr,
4793 Page_fetch::POSSIBLY_FREED, __FILE__, __LINE__, &mtr);
4794 mtr_commit(&mtr);
4795 fil_space_release(space);
4796 }
4797
4798 locksys::Unsafe_global_latch_manipulator::exclusive_latch();
4799
4800 mutex_enter(&trx_sys->mutex);
4801
4802 return (true);
4803 }
4804
4805 return (false);
4806 }
4807
4808 /** Prints info of locks for a transaction.
4809 @return true if all printed, false if latches were released. */
lock_trx_print_locks(FILE * file,const trx_t * trx,TrxLockIterator & iter,bool load_block)4810 static bool lock_trx_print_locks(
4811 FILE *file, /*!< in/out: File to write */
4812 const trx_t *trx, /*!< in: current transaction */
4813 TrxLockIterator &iter, /*!< in: transaction lock iterator */
4814 bool load_block) /*!< in: if true then read block
4815 from disk */
4816 {
4817 const lock_t *lock;
4818 /* We require exclusive access to lock_sys */
4819 ut_ad(locksys::owns_exclusive_global_latch());
4820
4821 /* Iterate over the transaction's locks. */
4822 while ((lock = iter.current(trx)) != nullptr) {
4823 if (lock_get_type_low(lock) == LOCK_REC) {
4824 if (load_block) {
4825 /* Note: lock_rec_fetch_page() will release both the exclusive global
4826 latch and the trx_sys_t::mutex if it does a read from disk. */
4827
4828 if (lock_rec_fetch_page(lock)) {
4829 /* We need to resync the
4830 current transaction. */
4831 return (false);
4832 }
4833
4834 /* It is a single table tablespace
4835 and the .ibd file is missing
4836 (DISCARD TABLESPACE probably stole the
4837 locks): just print the lock without
4838 attempting to load the page in the
4839 buffer pool. */
4840
4841 fprintf(file,
4842 "RECORD LOCKS on non-existing"
4843 " space %u\n",
4844 lock->rec_lock.page_id.space());
4845 }
4846
4847 /* Print all the record locks on the page from
4848 the record lock bitmap */
4849
4850 lock_rec_print(file, lock);
4851
4852 load_block = true;
4853
4854 } else {
4855 ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
4856
4857 lock_table_print(file, lock);
4858 }
4859
4860 if (iter.next() >= 10) {
4861 fprintf(file,
4862 "10 LOCKS PRINTED FOR THIS TRX:"
4863 " SUPPRESSING FURTHER PRINTS\n");
4864
4865 break;
4866 }
4867 }
4868
4869 return (true);
4870 }
4871
lock_print_info_all_transactions(FILE * file)4872 void lock_print_info_all_transactions(FILE *file) {
4873 /* We require exclusive access to lock_sys */
4874 ut_ad(locksys::owns_exclusive_global_latch());
4875
4876 fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n");
4877
4878 mutex_enter(&trx_sys->mutex);
4879
4880 /* First print info on non-active transactions */
4881
4882 /* NOTE: information of auto-commit non-locking read-only
4883 transactions will be omitted here. The information will be
4884 available from INFORMATION_SCHEMA.INNODB_TRX. */
4885
4886 PrintNotStarted print_not_started(file);
4887 ut_list_map(trx_sys->mysql_trx_list, print_not_started);
4888
4889 const trx_t *trx;
4890 TrxListIterator trx_iter;
4891 const trx_t *prev_trx = nullptr;
4892
4893 /* Control whether a block should be fetched from the buffer pool. */
4894 bool load_block = true;
4895 bool monitor = srv_print_innodb_lock_monitor;
4896
4897 while ((trx = trx_iter.current()) != nullptr) {
4898 check_trx_state(trx);
4899
4900 if (trx != prev_trx) {
4901 lock_trx_print_wait_and_mvcc_state(file, trx);
4902 prev_trx = trx;
4903
4904 /* The transaction that read in the page is no
4905 longer the one that read the page in. We need to
4906 force a page read. */
4907 load_block = true;
4908 }
4909
4910 /* If we need to print the locked record contents then we
4911 need to fetch the containing block from the buffer pool. */
4912 if (monitor) {
4913 /* Print the locks owned by the current transaction. */
4914 TrxLockIterator &lock_iter = trx_iter.lock_iter();
4915
4916 if (!lock_trx_print_locks(file, trx, lock_iter, load_block)) {
4917 /* Resync trx_iter, the trx_sys->mutex and exclusive global latch were
4918 temporarily released. A page was successfully read in. We need to print
4919 its contents on the next call to lock_trx_print_locks(). On the next
4920 call to lock_trx_print_locks() we should simply print the contents of
4921 the page just read in.*/
4922 load_block = false;
4923
4924 continue;
4925 }
4926 }
4927
4928 load_block = true;
4929
4930 /* All record lock details were printed without fetching
4931 a page from disk, or we didn't need to print the detail. */
4932 trx_iter.next();
4933 }
4934
4935 mutex_exit(&trx_sys->mutex);
4936 }
4937
4938 #ifdef UNIV_DEBUG
4939 /** Check if the lock exists in the trx_t::trx_lock_t::table_locks vector.
4940 @param[in] trx the trx to validate
4941 @param[in] find_lock lock to find
4942 @return true if found */
lock_trx_table_locks_find(const trx_t * trx,const lock_t * find_lock)4943 static bool lock_trx_table_locks_find(const trx_t *trx,
4944 const lock_t *find_lock) {
4945 /* We will access trx->lock.table_locks so we need trx->mutex */
4946 trx_mutex_enter(trx);
4947
4948 typedef lock_pool_t::const_reverse_iterator iterator;
4949
4950 const iterator end = trx->lock.table_locks.rend();
4951 const iterator begin = trx->lock.table_locks.rbegin();
4952 const bool found = std::find(begin, end, find_lock) != end;
4953
4954 trx_mutex_exit(trx);
4955
4956 return (found);
4957 }
4958
4959 /** Validates the lock queue on a table.
4960 @return true if ok */
lock_table_queue_validate(const dict_table_t * table)4961 static bool lock_table_queue_validate(
4962 const dict_table_t *table) /*!< in: table */
4963 {
4964 const lock_t *lock;
4965
4966 /* We actually hold exclusive latch here, but we require just the shard */
4967 ut_ad(locksys::owns_table_shard(*table));
4968 ut_ad(trx_sys_mutex_own());
4969
4970 for (lock = UT_LIST_GET_FIRST(table->locks); lock != nullptr;
4971 lock = UT_LIST_GET_NEXT(tab_lock.locks, lock)) {
4972 /* lock->trx->state cannot change from or to NOT_STARTED
4973 while we are holding the trx_sys->mutex. It may change
4974 from ACTIVE to PREPARED. It may become COMMITTED_IN_MEMORY even though we
4975 hold trx_sys->mutex in case it has trx->id==0, but even in this case it
4976 will not be freed until it can release the table lock, and we prevent
4977 this by latching its shard. */
4978 ut_ad(trx_assert_started(lock->trx));
4979
4980 if (!lock_get_wait(lock)) {
4981 ut_a(!lock_table_other_has_incompatible(lock->trx, 0, table,
4982 lock_get_mode(lock)));
4983 } else {
4984 ut_a(lock_table_has_to_wait_in_queue(lock));
4985 }
4986
4987 ut_a(lock_trx_table_locks_find(lock->trx, lock));
4988 }
4989
4990 return (true);
4991 }
4992 namespace locksys {
4993 /** Validates the lock queue on a single record.
4994 @param[in] block buffer block containing rec
4995 @param[in] rec record to look at
4996 @param[in] index index, or NULL if not known
4997 @param[in] offsets rec_get_offsets(rec, index) */
rec_queue_validate_latched(const buf_block_t * block,const rec_t * rec,const dict_index_t * index,const ulint * offsets)4998 static void rec_queue_validate_latched(const buf_block_t *block,
4999 const rec_t *rec,
5000 const dict_index_t *index,
5001 const ulint *offsets) {
5002 ut_ad(owns_page_shard(block->get_page_id()));
5003 ut_ad(mutex_own(&trx_sys->mutex));
5004 ut_a(rec);
5005 ut_a(block->frame == page_align(rec));
5006 ut_ad(rec_offs_validate(rec, index, offsets));
5007 ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
5008 ut_ad(!index || index->is_clustered() || !dict_index_is_online_ddl(index));
5009
5010 ulint heap_no = page_rec_get_heap_no(rec);
5011 RecID rec_id{block, heap_no};
5012
5013 if (!page_rec_is_user_rec(rec)) {
5014 Lock_iter::for_each(rec_id, [&](lock_t *lock) {
5015 ut_ad(!trx_is_ac_nl_ro(lock->trx));
5016
5017 if (lock->is_waiting()) {
5018 ut_a(lock_rec_has_to_wait_in_queue(lock));
5019 }
5020
5021 if (index != nullptr) {
5022 ut_a(lock->index == index);
5023 }
5024
5025 return (true);
5026 });
5027
5028 return;
5029 }
5030
5031 if (index == nullptr) {
5032 /* Nothing we can do */
5033
5034 } else if (index->is_clustered()) {
5035 trx_id_t trx_id;
5036
5037 /* Unlike the non-debug code, this invariant can only succeed
5038 if the check and assertion are covered by the lock_sys latch. */
5039
5040 trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
5041
5042 const trx_t *impl_trx = trx_rw_is_active_low(trx_id, nullptr);
5043 if (impl_trx != nullptr) {
5044 ut_ad(owns_page_shard(block->get_page_id()));
5045 ut_ad(trx_sys_mutex_own());
5046 /* impl_trx cannot become TRX_STATE_COMMITTED_IN_MEMORY nor removed from
5047 rw_trx_set until we release trx_sys->mutex, which means that currently all
5048 other threads in the system consider this impl_trx active and thus should
5049 respect implicit locks held by impl_trx*/
5050
5051 const lock_t *other_lock =
5052 lock_rec_other_has_expl_req(LOCK_S, block, true, heap_no, impl_trx);
5053
5054 /* The impl_trx is holding an implicit lock on the
5055 given record 'rec'. So there cannot be another
5056 explicit granted lock. Also, there can be another
5057 explicit waiting lock only if the impl_trx has an
5058 explicit granted lock. */
5059
5060 if (other_lock != nullptr) {
5061 ut_a(lock_get_wait(other_lock));
5062 ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no,
5063 impl_trx));
5064 }
5065 }
5066 }
5067
5068 Lock_iter::for_each(rec_id, [&](lock_t *lock) {
5069 ut_ad(!trx_is_ac_nl_ro(lock->trx));
5070
5071 if (index != nullptr) {
5072 ut_a(lock->index == index);
5073 }
5074
5075 if (!lock->is_gap() && !lock->is_waiting()) {
5076 lock_mode mode;
5077
5078 if (lock_get_mode(lock) == LOCK_S) {
5079 mode = LOCK_X;
5080 } else {
5081 mode = LOCK_S;
5082 }
5083
5084 const lock_t *other_lock =
5085 lock_rec_other_has_expl_req(mode, block, false, heap_no, lock->trx);
5086
5087 ut_a(!other_lock);
5088
5089 } else if (lock->is_waiting() && !lock->is_gap()) {
5090 ut_a(lock_rec_has_to_wait_in_queue(lock));
5091 }
5092
5093 return (true);
5094 });
5095 }
5096
5097 /** Validates the lock queue on a single record.
5098 @param[in] block buffer block containing rec
5099 @param[in] rec record to look at
5100 @param[in] index index, or NULL if not known
5101 @param[in] offsets rec_get_offsets(rec, index) */
rec_queue_latch_and_validate(const buf_block_t * block,const rec_t * rec,const dict_index_t * index,const ulint * offsets)5102 static void rec_queue_latch_and_validate(const buf_block_t *block,
5103 const rec_t *rec,
5104 const dict_index_t *index,
5105 const ulint *offsets) {
5106 ut_ad(!owns_exclusive_global_latch());
5107 ut_ad(!mutex_own(&trx_sys->mutex));
5108
5109 Shard_latch_guard guard{block->get_page_id()};
5110 mutex_enter(&trx_sys->mutex);
5111 rec_queue_validate_latched(block, rec, index, offsets);
5112 mutex_exit(&trx_sys->mutex);
5113 }
5114
5115 /** Validates the lock queue on a single record.
5116 @param[in] block buffer block containing rec
5117 @param[in] rec record to look at
5118 @param[in] index index, or NULL if not known */
rec_queue_latch_and_validate(const buf_block_t * block,const rec_t * rec,const dict_index_t * index)5119 static void rec_queue_latch_and_validate(const buf_block_t *block,
5120 const rec_t *rec,
5121 const dict_index_t *index) {
5122 rec_queue_latch_and_validate(block, rec, index,
5123 Rec_offsets().compute(rec, index));
5124 }
5125 } // namespace locksys
5126
5127 /** Validates the record lock queues on a page.
5128 @return true if ok */
lock_rec_validate_page(const buf_block_t * block)5129 static bool lock_rec_validate_page(
5130 const buf_block_t *block) /*!< in: buffer block */
5131 {
5132 const lock_t *lock;
5133 const rec_t *rec;
5134 ulint nth_lock = 0;
5135 ulint nth_bit = 0;
5136 ulint i;
5137 Rec_offsets offsets;
5138
5139 ut_ad(!locksys::owns_exclusive_global_latch());
5140
5141 locksys::Shard_latch_guard guard{block->get_page_id()};
5142 mutex_enter(&trx_sys->mutex);
5143 loop:
5144 lock =
5145 lock_rec_get_first_on_page_addr(lock_sys->rec_hash, block->get_page_id());
5146
5147 if (!lock) {
5148 goto function_exit;
5149 }
5150
5151 ut_ad(!block->page.file_page_was_freed);
5152
5153 for (i = 0; i < nth_lock; i++) {
5154 lock = lock_rec_get_next_on_page_const(lock);
5155
5156 if (!lock) {
5157 goto function_exit;
5158 }
5159 }
5160
5161 ut_ad(!trx_is_ac_nl_ro(lock->trx));
5162
5163 if (!sync_check_find(SYNC_FSP))
5164 for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
5165 if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
5166 rec = page_find_rec_with_heap_no(block->frame, i);
5167 ut_a(rec);
5168
5169 /* If this thread is holding the file space
5170 latch (fil_space_t::latch), the following
5171 check WILL break the latching order and may
5172 cause a deadlock of threads. */
5173
5174 locksys::rec_queue_validate_latched(block, rec, lock->index,
5175 offsets.compute(rec, lock->index));
5176
5177 nth_bit = i + 1;
5178
5179 goto loop;
5180 }
5181 }
5182
5183 nth_bit = 0;
5184 nth_lock++;
5185
5186 goto loop;
5187
5188 function_exit:
5189 mutex_exit(&trx_sys->mutex);
5190
5191 return (true);
5192 }
5193
5194 /** Validates the table locks.
5195 @return true if ok */
lock_validate_table_locks(const trx_ut_list_t * trx_list)5196 static bool lock_validate_table_locks(
5197 const trx_ut_list_t *trx_list) /*!< in: trx list */
5198 {
5199 const trx_t *trx;
5200
5201 /* We need exclusive access to lock_sys to iterate over trxs' locks */
5202 ut_ad(locksys::owns_exclusive_global_latch());
5203 ut_ad(trx_sys_mutex_own());
5204
5205 ut_ad(trx_list == &trx_sys->rw_trx_list);
5206
5207 for (trx = UT_LIST_GET_FIRST(*trx_list); trx != nullptr;
5208 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
5209 const lock_t *lock;
5210
5211 check_trx_state(trx);
5212
5213 for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); lock != nullptr;
5214 lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
5215 if (lock_get_type_low(lock) & LOCK_TABLE) {
5216 lock_table_queue_validate(lock->tab_lock.table);
5217 }
5218 }
5219 }
5220
5221 return (true);
5222 }
5223
5224 /** Validate a record lock's block */
lock_rec_block_validate(const page_id_t & page_id)5225 static void lock_rec_block_validate(const page_id_t &page_id) {
5226 /* The lock and the block that it is referring to may be freed at
5227 this point. We pass Page_fetch::POSSIBLY_FREED to skip a debug check.
5228 If the lock exists in lock_rec_validate_page() we assert
5229 !block->page.file_page_was_freed. */
5230
5231 buf_block_t *block;
5232 mtr_t mtr;
5233
5234 /* Make sure that the tablespace is not deleted while we are
5235 trying to access the page. */
5236 if (fil_space_t *space = fil_space_acquire(page_id.space())) {
5237 mtr_start(&mtr);
5238
5239 block = buf_page_get_gen(page_id, page_size_t(space->flags), RW_X_LATCH,
5240 nullptr, Page_fetch::POSSIBLY_FREED, __FILE__,
5241 __LINE__, &mtr);
5242
5243 buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
5244
5245 ut_ad(lock_rec_validate_page(block));
5246 mtr_commit(&mtr);
5247
5248 fil_space_release(space);
5249 }
5250 }
5251
lock_validate()5252 bool lock_validate() {
5253 typedef std::set<page_id_t, std::less<page_id_t>, ut_allocator<page_id_t>>
5254 page_addr_set;
5255
5256 page_addr_set pages;
5257 {
5258 /* lock_validate_table_locks() needs exclusive global latch, and we will
5259 inspect record locks from all shards */
5260 locksys::Global_exclusive_latch_guard guard{};
5261 mutex_enter(&trx_sys->mutex);
5262
5263 ut_a(lock_validate_table_locks(&trx_sys->rw_trx_list));
5264
5265 /* Iterate over all the record locks and validate the locks. We
5266 don't want to hog the lock_sys global latch and the trx_sys_t::mutex.
5267 Thus we release both latches before the validation check. */
5268
5269 for (ulint i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) {
5270 for (const lock_t *lock = static_cast<const lock_t *>(
5271 HASH_GET_FIRST(lock_sys->rec_hash, i));
5272 lock != nullptr;
5273 lock = static_cast<const lock_t *>(HASH_GET_NEXT(hash, lock))) {
5274 ut_ad(!trx_is_ac_nl_ro(lock->trx));
5275 ut_ad(lock_get_type(lock) == LOCK_REC);
5276 pages.emplace(lock->rec_lock.page_id);
5277 }
5278 }
5279
5280 mutex_exit(&trx_sys->mutex);
5281 }
5282 std::for_each(pages.cbegin(), pages.cend(), lock_rec_block_validate);
5283
5284 return (true);
5285 }
5286 #endif /* UNIV_DEBUG */
5287 /*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/
5288
5289 /** Checks if locks of other transactions prevent an immediate insert of
5290 a record. If they do, first tests if the query thread should anyway
5291 be suspended for some reason; if not, then puts the transaction and
5292 the query thread to the lock wait state and inserts a waiting request
5293 for a gap x-lock to the lock queue.
5294 @return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
lock_rec_insert_check_and_lock(ulint flags,const rec_t * rec,buf_block_t * block,dict_index_t * index,que_thr_t * thr,mtr_t * mtr,ibool * inherit)5295 dberr_t lock_rec_insert_check_and_lock(
5296 ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG bit is
5297 set, does nothing */
5298 const rec_t *rec, /*!< in: record after which to insert */
5299 buf_block_t *block, /*!< in/out: buffer block of rec */
5300 dict_index_t *index, /*!< in: index */
5301 que_thr_t *thr, /*!< in: query thread */
5302 mtr_t *mtr, /*!< in/out: mini-transaction */
5303 ibool *inherit) /*!< out: set to true if the new
5304 inserted record maybe should inherit
5305 LOCK_GAP type locks from the successor
5306 record */
5307 {
5308 ut_ad(block->frame == page_align(rec));
5309 ut_ad(!dict_index_is_online_ddl(index) || index->is_clustered() ||
5310 (flags & BTR_CREATE_FLAG));
5311
5312 if (flags & BTR_NO_LOCKING_FLAG) {
5313 return (DB_SUCCESS);
5314 }
5315
5316 ut_ad(!index->table->is_temporary());
5317
5318 dberr_t err = DB_SUCCESS;
5319 lock_t *lock;
5320 ibool inherit_in = *inherit;
5321 trx_t *trx = thr_get_trx(thr);
5322 const rec_t *next_rec = page_rec_get_next_const(rec);
5323 ulint heap_no = page_rec_get_heap_no(next_rec);
5324
5325 {
5326 locksys::Shard_latch_guard guard{block->get_page_id()};
5327
5328 /* When inserting a record into an index, the table must be at
5329 least IX-locked. When we are building an index, we would pass
5330 BTR_NO_LOCKING_FLAG and skip the locking altogether. */
5331 ut_ad(lock_table_has(trx, index->table, LOCK_IX));
5332
5333 /* Spatial index does not use GAP lock protection. It uses
5334 "predicate lock" to protect the "range" */
5335 ut_ad(!dict_index_is_spatial(index));
5336
5337 lock = lock_rec_get_first(lock_sys->rec_hash, block, heap_no);
5338
5339 if (lock == nullptr) {
5340 *inherit = false;
5341 } else {
5342 *inherit = true;
5343
5344 /* If another transaction has an explicit lock request which locks
5345 the gap, waiting or granted, on the successor, the insert has to wait.
5346
5347 An exception is the case where the lock by the another transaction
5348 is a gap type lock which it placed to wait for its turn to insert. We
5349 do not consider that kind of a lock conflicting with our insert. This
5350 eliminates an unnecessary deadlock which resulted when 2 transactions
5351 had to wait for their insert. Both had waiting gap type lock requests
5352 on the successor, which produced an unnecessary deadlock. */
5353
5354 const ulint type_mode = LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION;
5355
5356 const lock_t *wait_for =
5357 lock_rec_other_has_conflicting(type_mode, block, heap_no, trx);
5358
5359 if (wait_for != nullptr) {
5360 RecLock rec_lock(thr, index, block, heap_no, type_mode);
5361
5362 trx_mutex_enter(trx);
5363
5364 err = rec_lock.add_to_waitq(wait_for);
5365
5366 trx_mutex_exit(trx);
5367 }
5368 }
5369 } /* Shard_latch_guard */
5370
5371 switch (err) {
5372 case DB_SUCCESS_LOCKED_REC:
5373 err = DB_SUCCESS;
5374 /* fall through */
5375 case DB_SUCCESS:
5376 if (!inherit_in || index->is_clustered()) {
5377 break;
5378 }
5379
5380 /* Update the page max trx id field */
5381 page_update_max_trx_id(block, buf_block_get_page_zip(block), trx->id,
5382 mtr);
5383 default:
5384 /* We only care about the two return values. */
5385 break;
5386 }
5387
5388 ut_d(locksys::rec_queue_latch_and_validate(block, next_rec, index));
5389 ut_ad(err == DB_SUCCESS || err == DB_LOCK_WAIT || err == DB_DEADLOCK);
5390
5391 return (err);
5392 }
5393
5394 /** Creates an explicit record lock for a running transaction that currently
5395 only has an implicit lock on the record. The transaction instance must have a
5396 reference count > 0 so that it can't be committed and freed before this
5397 function has completed. */
lock_rec_convert_impl_to_expl_for_trx(const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,trx_t * trx,ulint heap_no)5398 static void lock_rec_convert_impl_to_expl_for_trx(
5399 const buf_block_t *block, /*!< in: buffer block of rec */
5400 const rec_t *rec, /*!< in: user record on page */
5401 dict_index_t *index, /*!< in: index of record */
5402 const ulint *offsets, /*!< in: rec_get_offsets(rec, index) */
5403 trx_t *trx, /*!< in/out: active transaction */
5404 ulint heap_no) /*!< in: rec heap number to lock */
5405 {
5406 ut_ad(trx_is_referenced(trx));
5407
5408 DEBUG_SYNC_C("before_lock_rec_convert_impl_to_expl_for_trx");
5409 {
5410 locksys::Shard_latch_guard guard{block->get_page_id()};
5411 /* This trx->mutex acquisition here is not really needed.
5412 Its purpose is to prevent a state transition between calls to trx_state_eq()
5413 and lock_rec_add_to_queue().
5414 But one can prove, that even if the state did change, it is not
5415 a big problem, because we still keep reference count from dropping
5416 to zero, so the trx object is still in use, and we hold the shard latched,
5417 so trx can not release its explicit lock (if it has any) so we will
5418 notice the explicit lock in lock_rec_has_expl.
5419 On the other hand if trx does not have explicit lock, then we would create
5420 one on its behalf, which is wasteful, but does not cause a problem, as once
5421 the reference count drops to zero the trx will notice and remove this new
5422 explicit lock. Also, even if some other trx had observed that trx is already
5423 removed from rw trxs list and thus ignored the implicit lock and decided to
5424 add its own lock, it will still have to wait for shard latch before adding
5425 her lock. However it does not cost us much to simply take the trx->mutex
5426 and avoid this whole shaky reasoning. */
5427 trx_mutex_enter(trx);
5428
5429 ut_ad(!index->is_clustered() ||
5430 trx->id ==
5431 lock_clust_rec_some_has_impl(
5432 rec, index,
5433 offsets ? offsets : Rec_offsets().compute(rec, index)));
5434
5435 ut_ad(!trx_state_eq(trx, TRX_STATE_NOT_STARTED));
5436
5437 if (!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY) &&
5438 !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no, trx)) {
5439 ulint type_mode;
5440
5441 type_mode = (LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP);
5442
5443 lock_rec_add_to_queue(type_mode, block, heap_no, index, trx, true);
5444 }
5445
5446 trx_mutex_exit(trx);
5447 }
5448
5449 trx_release_reference(trx);
5450
5451 DEBUG_SYNC_C("after_lock_rec_convert_impl_to_expl_for_trx");
5452 }
5453
5454 /** If a transaction has an implicit x-lock on a record, but no explicit x-lock
5455 set on the record, sets one for it.
5456 @param[in] block buffer block of rec
5457 @param[in] rec user record on page
5458 @param[in] index index of record
5459 @param[in] offsets rec_get_offsets(rec, index) */
lock_rec_convert_impl_to_expl(const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets)5460 static void lock_rec_convert_impl_to_expl(const buf_block_t *block,
5461 const rec_t *rec, dict_index_t *index,
5462 const ulint *offsets) {
5463 trx_t *trx;
5464
5465 ut_ad(!locksys::owns_exclusive_global_latch());
5466 ut_ad(page_rec_is_user_rec(rec));
5467 ut_ad(rec_offs_validate(rec, index, offsets));
5468 ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets));
5469
5470 DEBUG_SYNC_C("lock_rec_convert_impl_to_expl");
5471
5472 if (index->is_clustered()) {
5473 trx_id_t trx_id;
5474
5475 trx_id = lock_clust_rec_some_has_impl(rec, index, offsets);
5476
5477 trx = trx_rw_is_active(trx_id, nullptr, true);
5478 } else {
5479 ut_ad(!dict_index_is_online_ddl(index));
5480
5481 trx = lock_sec_rec_some_has_impl(rec, index, offsets);
5482 if (trx) {
5483 DEBUG_SYNC_C("lock_rec_convert_impl_to_expl_will_validate");
5484 ut_ad(!lock_rec_other_trx_holds_expl(LOCK_S | LOCK_REC_NOT_GAP, trx, rec,
5485 block));
5486 }
5487 }
5488
5489 if (trx != nullptr) {
5490 ulint heap_no = page_rec_get_heap_no(rec);
5491
5492 ut_ad(trx_is_referenced(trx));
5493
5494 /* If the transaction is still active and has no
5495 explicit x-lock set on the record, set one for it.
5496 trx cannot be committed until the ref count is zero. */
5497
5498 lock_rec_convert_impl_to_expl_for_trx(block, rec, index, offsets, trx,
5499 heap_no);
5500 }
5501 }
5502
lock_rec_convert_active_impl_to_expl(const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,trx_t * trx,ulint heap_no)5503 void lock_rec_convert_active_impl_to_expl(const buf_block_t *block,
5504 const rec_t *rec, dict_index_t *index,
5505 const ulint *offsets, trx_t *trx,
5506 ulint heap_no) {
5507 trx_reference(trx, true);
5508 lock_rec_convert_impl_to_expl_for_trx(block, rec, index, offsets, trx,
5509 heap_no);
5510 }
5511
5512 /** Checks if locks of other transactions prevent an immediate modify (update,
5513 delete mark, or delete unmark) of a clustered index record. If they do,
5514 first tests if the query thread should anyway be suspended for some
5515 reason; if not, then puts the transaction and the query thread to the
5516 lock wait state and inserts a waiting request for a record x-lock to the
5517 lock queue.
5518 @return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
lock_clust_rec_modify_check_and_lock(ulint flags,const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,que_thr_t * thr)5519 dberr_t lock_clust_rec_modify_check_and_lock(
5520 ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
5521 bit is set, does nothing */
5522 const buf_block_t *block, /*!< in: buffer block of rec */
5523 const rec_t *rec, /*!< in: record which should be
5524 modified */
5525 dict_index_t *index, /*!< in: clustered index */
5526 const ulint *offsets, /*!< in: rec_get_offsets(rec, index) */
5527 que_thr_t *thr) /*!< in: query thread */
5528 {
5529 dberr_t err;
5530 ulint heap_no;
5531
5532 ut_ad(rec_offs_validate(rec, index, offsets));
5533 ut_ad(index->is_clustered());
5534 ut_ad(block->frame == page_align(rec));
5535
5536 if (flags & BTR_NO_LOCKING_FLAG) {
5537 return (DB_SUCCESS);
5538 }
5539 ut_ad(!index->table->is_temporary());
5540
5541 heap_no = rec_offs_comp(offsets) ? rec_get_heap_no_new(rec)
5542 : rec_get_heap_no_old(rec);
5543
5544 /* If a transaction has no explicit x-lock set on the record, set one
5545 for it */
5546
5547 lock_rec_convert_impl_to_expl(block, rec, index, offsets);
5548
5549 {
5550 locksys::Shard_latch_guard guard{block->get_page_id()};
5551 ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5552
5553 err = lock_rec_lock(true, SELECT_ORDINARY, LOCK_X | LOCK_REC_NOT_GAP, block,
5554 heap_no, index, thr);
5555
5556 MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
5557 }
5558
5559 ut_d(locksys::rec_queue_latch_and_validate(block, rec, index, offsets));
5560
5561 if (err == DB_SUCCESS_LOCKED_REC) {
5562 err = DB_SUCCESS;
5563 }
5564 ut_ad(err == DB_SUCCESS || err == DB_LOCK_WAIT || err == DB_DEADLOCK);
5565 return (err);
5566 }
5567
5568 /** Checks if locks of other transactions prevent an immediate modify (delete
5569 mark or delete unmark) of a secondary index record.
5570 @return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
lock_sec_rec_modify_check_and_lock(ulint flags,buf_block_t * block,const rec_t * rec,dict_index_t * index,que_thr_t * thr,mtr_t * mtr)5571 dberr_t lock_sec_rec_modify_check_and_lock(
5572 ulint flags, /*!< in: if BTR_NO_LOCKING_FLAG
5573 bit is set, does nothing */
5574 buf_block_t *block, /*!< in/out: buffer block of rec */
5575 const rec_t *rec, /*!< in: record which should be
5576 modified; NOTE: as this is a secondary
5577 index, we always have to modify the
5578 clustered index record first: see the
5579 comment below */
5580 dict_index_t *index, /*!< in: secondary index */
5581 que_thr_t *thr, /*!< in: query thread
5582 (can be NULL if BTR_NO_LOCKING_FLAG) */
5583 mtr_t *mtr) /*!< in/out: mini-transaction */
5584 {
5585 dberr_t err;
5586 ulint heap_no;
5587
5588 ut_ad(!index->is_clustered());
5589 ut_ad(!dict_index_is_online_ddl(index) || (flags & BTR_CREATE_FLAG));
5590 ut_ad(block->frame == page_align(rec));
5591
5592 if (flags & BTR_NO_LOCKING_FLAG) {
5593 return (DB_SUCCESS);
5594 }
5595 ut_ad(!index->table->is_temporary());
5596
5597 heap_no = page_rec_get_heap_no(rec);
5598
5599 /* Another transaction cannot have an implicit lock on the record,
5600 because when we come here, we already have modified the clustered
5601 index record, and this would not have been possible if another active
5602 transaction had modified this secondary index record. */
5603 {
5604 locksys::Shard_latch_guard guard{block->get_page_id()};
5605
5606 ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5607
5608 err = lock_rec_lock(true, SELECT_ORDINARY, LOCK_X | LOCK_REC_NOT_GAP, block,
5609 heap_no, index, thr);
5610
5611 MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
5612 }
5613
5614 ut_d(locksys::rec_queue_latch_and_validate(block, rec, index));
5615
5616 if (err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC) {
5617 /* Update the page max trx id field */
5618 /* It might not be necessary to do this if
5619 err == DB_SUCCESS (no new lock created),
5620 but it should not cost too much performance. */
5621 page_update_max_trx_id(block, buf_block_get_page_zip(block),
5622 thr_get_trx(thr)->id, mtr);
5623 err = DB_SUCCESS;
5624 }
5625 ut_ad(err == DB_SUCCESS || err == DB_LOCK_WAIT || err == DB_DEADLOCK);
5626 return (err);
5627 }
5628
lock_sec_rec_read_check_and_lock(const lock_duration_t duration,const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,const select_mode sel_mode,const lock_mode mode,const ulint gap_mode,que_thr_t * thr)5629 dberr_t lock_sec_rec_read_check_and_lock(
5630 const lock_duration_t duration, const buf_block_t *block, const rec_t *rec,
5631 dict_index_t *index, const ulint *offsets, const select_mode sel_mode,
5632 const lock_mode mode, const ulint gap_mode, que_thr_t *thr) {
5633 dberr_t err;
5634 ulint heap_no;
5635
5636 ut_ad(!index->is_clustered());
5637 ut_ad(!dict_index_is_online_ddl(index));
5638 ut_ad(block->frame == page_align(rec));
5639 ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
5640 ut_ad(rec_offs_validate(rec, index, offsets));
5641 ut_ad(mode == LOCK_X || mode == LOCK_S);
5642
5643 if (srv_read_only_mode || index->table->is_temporary()) {
5644 return (DB_SUCCESS);
5645 }
5646
5647 heap_no = page_rec_get_heap_no(rec);
5648
5649 /* Some transaction may have an implicit x-lock on the record only
5650 if the max trx id for the page >= min trx id for the trx list or a
5651 database recovery is running. */
5652
5653 if ((page_get_max_trx_id(block->frame) >= trx_rw_min_trx_id() ||
5654 recv_recovery_is_on()) &&
5655 !page_rec_is_supremum(rec)) {
5656 lock_rec_convert_impl_to_expl(block, rec, index, offsets);
5657 }
5658 {
5659 locksys::Shard_latch_guard guard{block->get_page_id()};
5660
5661 if (duration == lock_duration_t::AT_LEAST_STATEMENT) {
5662 lock_protect_locks_till_statement_end(thr);
5663 }
5664
5665 ut_ad(mode != LOCK_X ||
5666 lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5667 ut_ad(mode != LOCK_S ||
5668 lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
5669
5670 err = lock_rec_lock(false, sel_mode, mode | gap_mode, block, heap_no, index,
5671 thr);
5672
5673 MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
5674 }
5675 DEBUG_SYNC_C("lock_sec_rec_read_check_and_lock_has_locked");
5676
5677 ut_d(locksys::rec_queue_latch_and_validate(block, rec, index, offsets));
5678 ut_ad(err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC ||
5679 err == DB_LOCK_WAIT || err == DB_DEADLOCK || err == DB_SKIP_LOCKED ||
5680 err == DB_LOCK_NOWAIT);
5681 return (err);
5682 }
5683
lock_clust_rec_read_check_and_lock(const lock_duration_t duration,const buf_block_t * block,const rec_t * rec,dict_index_t * index,const ulint * offsets,const select_mode sel_mode,const lock_mode mode,const ulint gap_mode,que_thr_t * thr)5684 dberr_t lock_clust_rec_read_check_and_lock(
5685 const lock_duration_t duration, const buf_block_t *block, const rec_t *rec,
5686 dict_index_t *index, const ulint *offsets, const select_mode sel_mode,
5687 const lock_mode mode, const ulint gap_mode, que_thr_t *thr) {
5688 dberr_t err;
5689 ulint heap_no;
5690 DEBUG_SYNC_C("before_lock_clust_rec_read_check_and_lock");
5691 ut_ad(index->is_clustered());
5692 ut_ad(block->frame == page_align(rec));
5693 ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec));
5694 ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP ||
5695 gap_mode == LOCK_REC_NOT_GAP);
5696 ut_ad(rec_offs_validate(rec, index, offsets));
5697
5698 if (srv_read_only_mode || index->table->is_temporary()) {
5699 return (DB_SUCCESS);
5700 }
5701
5702 heap_no = page_rec_get_heap_no(rec);
5703
5704 if (heap_no != PAGE_HEAP_NO_SUPREMUM) {
5705 lock_rec_convert_impl_to_expl(block, rec, index, offsets);
5706 }
5707
5708 DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock_impl_to_expl");
5709 {
5710 locksys::Shard_latch_guard guard{block->get_page_id()};
5711
5712 if (duration == lock_duration_t::AT_LEAST_STATEMENT) {
5713 lock_protect_locks_till_statement_end(thr);
5714 }
5715
5716 ut_ad(mode != LOCK_X ||
5717 lock_table_has(thr_get_trx(thr), index->table, LOCK_IX));
5718 ut_ad(mode != LOCK_S ||
5719 lock_table_has(thr_get_trx(thr), index->table, LOCK_IS));
5720
5721 err = lock_rec_lock(false, sel_mode, mode | gap_mode, block, heap_no, index,
5722 thr);
5723
5724 MONITOR_INC(MONITOR_NUM_RECLOCK_REQ);
5725 }
5726
5727 ut_d(locksys::rec_queue_latch_and_validate(block, rec, index, offsets));
5728
5729 DEBUG_SYNC_C("after_lock_clust_rec_read_check_and_lock");
5730 ut_ad(err == DB_SUCCESS || err == DB_SUCCESS_LOCKED_REC ||
5731 err == DB_LOCK_WAIT || err == DB_DEADLOCK || err == DB_SKIP_LOCKED ||
5732 err == DB_LOCK_NOWAIT);
5733 return (err);
5734 }
5735 /** Checks if locks of other transactions prevent an immediate read, or passing
5736 over by a read cursor, of a clustered index record. If they do, first tests
5737 if the query thread should anyway be suspended for some reason; if not, then
5738 puts the transaction and the query thread to the lock wait state and inserts a
5739 waiting request for a record lock to the lock queue. Sets the requested mode
5740 lock on the record. This is an alternative version of
5741 lock_clust_rec_read_check_and_lock() that does not require the parameter
5742 "offsets".
5743 @return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */
lock_clust_rec_read_check_and_lock_alt(const buf_block_t * block,const rec_t * rec,dict_index_t * index,lock_mode mode,ulint gap_mode,que_thr_t * thr)5744 dberr_t lock_clust_rec_read_check_and_lock_alt(
5745 const buf_block_t *block, /*!< in: buffer block of rec */
5746 const rec_t *rec, /*!< in: user record or page
5747 supremum record which should
5748 be read or passed over by a
5749 read cursor */
5750 dict_index_t *index, /*!< in: clustered index */
5751 lock_mode mode, /*!< in: mode of the lock which
5752 the read cursor should set on
5753 records: LOCK_S or LOCK_X; the
5754 latter is possible in
5755 SELECT FOR UPDATE */
5756 ulint gap_mode, /*!< in: LOCK_ORDINARY, LOCK_GAP, or
5757 LOCK_REC_NOT_GAP */
5758 que_thr_t *thr) /*!< in: query thread */
5759 {
5760 dberr_t err = lock_clust_rec_read_check_and_lock(
5761 lock_duration_t::REGULAR, block, rec, index,
5762 Rec_offsets().compute(rec, index), SELECT_ORDINARY, mode, gap_mode, thr);
5763
5764 if (err == DB_SUCCESS_LOCKED_REC) {
5765 err = DB_SUCCESS;
5766 }
5767 ut_ad(err == DB_SUCCESS || err == DB_LOCK_WAIT || err == DB_DEADLOCK);
5768 return (err);
5769 }
5770
5771 /** Release the last lock from the transaction's autoinc locks.
5772 @param[in] trx trx which vector of AUTOINC locks to modify */
5773 UNIV_INLINE
lock_release_autoinc_last_lock(trx_t * trx)5774 void lock_release_autoinc_last_lock(trx_t *trx) {
5775 ulint last;
5776 lock_t *lock;
5777
5778 /* We will access trx->lock.autoinc_locks which requires trx->mutex */
5779 ut_ad(trx_mutex_own(trx));
5780 ib_vector_t *autoinc_locks = trx->lock.autoinc_locks;
5781
5782 /* Since we do not know for which table the trx has created the last lock
5783 we can not narrow the required latch to any particular shard, and thus we
5784 require exclusive access to lock_sys here */
5785 ut_ad(locksys::owns_exclusive_global_latch());
5786 ut_a(!ib_vector_is_empty(autoinc_locks));
5787
5788 /* The lock to be release must be the last lock acquired. */
5789 last = ib_vector_size(autoinc_locks) - 1;
5790 lock = *static_cast<lock_t **>(ib_vector_get(autoinc_locks, last));
5791
5792 /* Should have only AUTOINC locks in the vector. */
5793 ut_a(lock_get_mode(lock) == LOCK_AUTO_INC);
5794 ut_a(lock_get_type(lock) == LOCK_TABLE);
5795
5796 ut_a(lock->tab_lock.table != nullptr);
5797
5798 /* This will remove the lock from the trx autoinc_locks too. */
5799 lock_table_dequeue(lock);
5800
5801 /* Remove from the table vector too. */
5802 lock_trx_table_locks_remove(lock);
5803 }
5804
5805 /** Check if a transaction holds any autoinc locks.
5806 @return true if the transaction holds any AUTOINC locks. */
lock_trx_holds_autoinc_locks(const trx_t * trx)5807 static bool lock_trx_holds_autoinc_locks(
5808 const trx_t *trx) /*!< in: transaction */
5809 {
5810 /* We will access trx->lock.autoinc_locks which requires trx->mutex */
5811 ut_ad(trx_mutex_own(trx));
5812 ut_a(trx->lock.autoinc_locks != nullptr);
5813
5814 return (!ib_vector_is_empty(trx->lock.autoinc_locks));
5815 }
5816
5817 /** Release all the transaction's autoinc locks. */
lock_release_autoinc_locks(trx_t * trx)5818 static void lock_release_autoinc_locks(trx_t *trx) /*!< in/out: transaction */
5819 {
5820 /* Since we do not know for which table(s) the trx has created the lock(s)
5821 we can not narrow the required latch to any particular shard, and thus we
5822 require exclusive access to lock_sys here */
5823 ut_ad(locksys::owns_exclusive_global_latch());
5824 ut_ad(trx_mutex_own(trx));
5825
5826 ut_a(trx->lock.autoinc_locks != nullptr);
5827
5828 /* We release the locks in the reverse order. This is to
5829 avoid searching the vector for the element to delete at
5830 the lower level. See (lock_table_remove_low()) for details. */
5831 while (!ib_vector_is_empty(trx->lock.autoinc_locks)) {
5832 /* lock_table_remove_low() will also remove the lock from
5833 the transaction's autoinc_locks vector. */
5834 lock_release_autoinc_last_lock(trx);
5835 }
5836
5837 /* Should release all locks. */
5838 ut_a(ib_vector_is_empty(trx->lock.autoinc_locks));
5839 }
5840
5841 /** Gets the type of a lock. Non-inline version for using outside of the
5842 lock module.
5843 @return LOCK_TABLE or LOCK_REC */
lock_get_type(const lock_t * lock)5844 uint32_t lock_get_type(const lock_t *lock) /*!< in: lock */
5845 {
5846 return (lock_get_type_low(lock));
5847 }
5848
lock_get_trx_immutable_id(const lock_t * lock)5849 uint64_t lock_get_trx_immutable_id(const lock_t *lock) {
5850 return (trx_immutable_id(lock->trx));
5851 }
5852
lock_get_trx_id(const lock_t * lock)5853 trx_id_t lock_get_trx_id(const lock_t *lock) {
5854 return (trx_get_id_for_print(lock->trx));
5855 }
5856
lock_get_immutable_id(const lock_t * lock)5857 uint64_t lock_get_immutable_id(const lock_t *lock) {
5858 return (uint64_t{reinterpret_cast<uintptr_t>(lock)});
5859 }
5860
5861 /** Get the performance schema event (thread_id, event_id)
5862 that created the lock.
5863 @param[in] lock Lock
5864 @param[out] thread_id Thread ID that created the lock
5865 @param[out] event_id Event ID that created the lock
5866 */
lock_get_psi_event(const lock_t * lock,ulonglong * thread_id,ulonglong * event_id)5867 void lock_get_psi_event(const lock_t *lock, ulonglong *thread_id,
5868 ulonglong *event_id) {
5869 #if defined(HAVE_PSI_THREAD_INTERFACE) && defined(HAVE_PSI_DATA_LOCK_INTERFACE)
5870 *thread_id = lock->m_psi_internal_thread_id;
5871 *event_id = lock->m_psi_event_id;
5872 #else
5873 *thread_id = 0;
5874 *event_id = 0;
5875 #endif
5876 }
5877
5878 /** Get the first lock of a trx lock list.
5879 @param[in] trx_lock the trx lock
5880 @return The first lock
5881 */
lock_get_first_trx_locks(const trx_lock_t * trx_lock)5882 const lock_t *lock_get_first_trx_locks(const trx_lock_t *trx_lock) {
5883 /* Writes to trx->lock.trx_locks are protected by trx->mutex combined with a
5884 shared global lock_sys latch, and we assume we have the exclusive latch on
5885 lock_sys here */
5886 ut_ad(locksys::owns_exclusive_global_latch());
5887 const lock_t *result = UT_LIST_GET_FIRST(trx_lock->trx_locks);
5888 return (result);
5889 }
5890
5891 /** Get the next lock of a trx lock list.
5892 @param[in] lock the current lock
5893 @return The next lock
5894 */
lock_get_next_trx_locks(const lock_t * lock)5895 const lock_t *lock_get_next_trx_locks(const lock_t *lock) {
5896 /* Writes to trx->lock.trx_locks are protected by trx->mutex combined with a
5897 shared global lock_sys latch, and we assume we have the exclusive latch on
5898 lock_sys here */
5899 ut_ad(locksys::owns_exclusive_global_latch());
5900 const lock_t *result = UT_LIST_GET_NEXT(trx_locks, lock);
5901 return (result);
5902 }
5903
5904 /** Gets the mode of a lock in a human readable string.
5905 The string should not be free()'d or modified.
5906 This functions is a bit complex for following reasons:
5907 - the way it is used in performance schema requires that the memory pointed
5908 by the return value is accessible for a long time
5909 - the caller never frees the memory
5910 - so, we need to maintain a pool of these strings or use string literals
5911 - there are many possible combinations of flags and thus it is impractical
5912 to maintain the list of all possible literals and if/else logic
5913 - moreover, sometimes performance_schema.data_locks is used precisely to
5914 investigate some unexpected situation, thus limiting output of this function
5915 only to expected combinations of flags might be misleading
5916 @return lock mode */
lock_get_mode_str(const lock_t * lock)5917 const char *lock_get_mode_str(const lock_t *lock) /*!< in: lock */
5918 {
5919 /* We use exclusive global lock_sys latch to protect the global
5920 lock_cached_lock_mode_names mapping. */
5921 ut_ad(locksys::owns_exclusive_global_latch());
5922
5923 const auto type_mode = lock->type_mode;
5924 const auto mode = lock->mode();
5925 const auto type = lock->type();
5926 /* type_mode is type + mode + flags actually.
5927 We are interested in flags here.
5928 And we are not interested in LOCK_WAIT. */
5929 const auto flags = (type_mode & (~(uint)LOCK_WAIT)) - mode - type;
5930
5931 /* Search for a cached string */
5932 const auto key = flags | mode;
5933 const auto found = lock_cached_lock_mode_names.find(key);
5934 if (found != lock_cached_lock_mode_names.end()) {
5935 return (found->second);
5936 }
5937 /* A new, unseen yet, mode of lock. We need to create new string. */
5938 ut::ostringstream name_stream;
5939 /* lock_mode_string can be used to describe mode, however the LOCK_ prefix in
5940 return mode name makes the string a bit too verbose for our purpose, as
5941 performance_schema.data_locks LOCK_MODE is a varchar(32), so we strip the
5942 prefix */
5943 const char *mode_string = lock_mode_string(mode);
5944 const char *LOCK_PREFIX = "LOCK_";
5945 if (!strncmp(mode_string, LOCK_PREFIX, strlen(LOCK_PREFIX))) {
5946 mode_string = mode_string + strlen(LOCK_PREFIX);
5947 }
5948 name_stream << mode_string;
5949 /* We concatenate constants in ascending order. */
5950 uint recognized_flags = 0;
5951 for (const auto &lock_constant : lock_constant_names) {
5952 const auto value = lock_constant.first;
5953 /* Constants have to be single bit only for this algorithm to work */
5954 ut_ad((value & (value - 1)) == 0);
5955 if (flags & value) {
5956 recognized_flags += value;
5957 name_stream << ',' << lock_constant.second;
5958 }
5959 }
5960 if (flags != recognized_flags) {
5961 return "UNKNOWN";
5962 }
5963 auto name_string = name_stream.str();
5964 char *name_buffer = (char *)ut_malloc_nokey(name_string.length() + 1);
5965 strcpy(name_buffer, name_string.c_str());
5966 lock_cached_lock_mode_names[key] = name_buffer;
5967 return (name_buffer);
5968 }
5969
5970 /** Gets the type of a lock in a human readable string.
5971 The string should not be free()'d or modified.
5972 @return lock type */
lock_get_type_str(const lock_t * lock)5973 const char *lock_get_type_str(const lock_t *lock) /*!< in: lock */
5974 {
5975 switch (lock_get_type_low(lock)) {
5976 case LOCK_REC:
5977 return ("RECORD");
5978 case LOCK_TABLE:
5979 return ("TABLE");
5980 default:
5981 return ("UNKNOWN");
5982 }
5983 }
5984
5985 /** Gets the table on which the lock is.
5986 @return table */
5987 UNIV_INLINE
lock_get_table(const lock_t * lock)5988 dict_table_t *lock_get_table(const lock_t *lock) /*!< in: lock */
5989 {
5990 switch (lock_get_type_low(lock)) {
5991 case LOCK_REC:
5992 ut_ad(lock->index->is_clustered() ||
5993 !dict_index_is_online_ddl(lock->index));
5994 return (lock->index->table);
5995 case LOCK_TABLE:
5996 return (lock->tab_lock.table);
5997 default:
5998 ut_error;
5999 }
6000 }
6001
6002 /** Gets the id of the table on which the lock is.
6003 @return id of the table */
lock_get_table_id(const lock_t * lock)6004 table_id_t lock_get_table_id(const lock_t *lock) /*!< in: lock */
6005 {
6006 dict_table_t *table;
6007
6008 table = lock_get_table(lock);
6009
6010 return (table->id);
6011 }
6012
6013 /** Determine which table a lock is associated with.
6014 @param[in] lock the lock
6015 @return name of the table */
lock_get_table_name(const lock_t * lock)6016 const table_name_t &lock_get_table_name(const lock_t *lock) {
6017 return (lock_get_table(lock)->name);
6018 }
6019
6020 /** For a record lock, gets the index on which the lock is.
6021 @return index */
lock_rec_get_index(const lock_t * lock)6022 const dict_index_t *lock_rec_get_index(const lock_t *lock) /*!< in: lock */
6023 {
6024 ut_a(lock_get_type_low(lock) == LOCK_REC);
6025 ut_ad(lock->index->is_clustered() || !dict_index_is_online_ddl(lock->index));
6026
6027 return (lock->index);
6028 }
6029
6030 /** For a record lock, gets the name of the index on which the lock is.
6031 The string should not be free()'d or modified.
6032 @return name of the index */
lock_rec_get_index_name(const lock_t * lock)6033 const char *lock_rec_get_index_name(const lock_t *lock) /*!< in: lock */
6034 {
6035 ut_a(lock_get_type_low(lock) == LOCK_REC);
6036 ut_ad(lock->index->is_clustered() || !dict_index_is_online_ddl(lock->index));
6037
6038 return (lock->index->name);
6039 }
6040
lock_rec_get_page_id(const lock_t * lock)6041 page_id_t lock_rec_get_page_id(const lock_t *lock) {
6042 ut_a(lock_get_type_low(lock) == LOCK_REC);
6043 return lock->rec_lock.page_id;
6044 }
6045
6046 /** Cancels a waiting lock request and releases possible other transactions
6047 waiting behind it.
6048 @param[in,out] lock Waiting lock request */
lock_cancel_waiting_and_release(lock_t * lock)6049 void lock_cancel_waiting_and_release(lock_t *lock) {
6050 /* Requiring exclusive global latch serves several purposes here.
6051
6052 1. In case of table LOCK_TABLE we will call lock_release_autoinc_locks(),
6053 which iterates over locks held by this transaction and it is not clear if
6054 these locks are from the same table. Frankly it is not clear why we even
6055 release all of them here (note that none of them is our `lock` because we
6056 don't store waiting locks in the trx->autoinc_locks vector, only granted).
6057 Perhaps this is because this trx is going to be rolled back anyway, and this
6058 seemed to be a good moment to release them?
6059
6060 2. During lock_rec_dequeue_from_page() and lock_table_dequeue() we might latch
6061 trx mutex of another transaction to grant it a lock. The rules meant to avoid
6062 deadlocks between trx mutex require us to either use an exclusive global
6063 latch, or to first latch trx which is has trx->lock.wait_lock == nullptr.
6064 As `lock == lock->trx->lock.wait_lock` and thus is not nullptr, we have to use
6065 the first approach, or complicate the proof of deadlock avoidance enormously.
6066 */
6067 ut_ad(locksys::owns_exclusive_global_latch());
6068 /* We will access lock->trx->lock.autoinc_locks which requires trx->mutex */
6069 ut_ad(trx_mutex_own(lock->trx));
6070
6071 if (lock_get_type_low(lock) == LOCK_REC) {
6072 lock_rec_dequeue_from_page(lock);
6073 } else {
6074 ut_ad(lock_get_type_low(lock) & LOCK_TABLE);
6075
6076 if (lock->trx->lock.autoinc_locks != nullptr) {
6077 lock_release_autoinc_locks(lock->trx);
6078 }
6079
6080 lock_table_dequeue(lock);
6081 }
6082
6083 lock_reset_wait_and_release_thread_if_suspended(lock);
6084 }
6085
6086 /** Unlocks AUTO_INC type locks that were possibly reserved by a trx. This
6087 function should be called at the the end of an SQL statement, by the
6088 connection thread that owns the transaction (trx->mysql_thd). */
lock_unlock_table_autoinc(trx_t * trx)6089 void lock_unlock_table_autoinc(trx_t *trx) /*!< in/out: transaction */
6090 {
6091 ut_ad(!locksys::owns_exclusive_global_latch());
6092 ut_ad(!trx_mutex_own(trx));
6093
6094 /* This can be invoked on NOT_STARTED, ACTIVE, PREPARED,
6095 but not COMMITTED transactions. */
6096
6097 ut_ad(trx_state_eq(trx, TRX_STATE_NOT_STARTED) ||
6098 trx_state_eq(trx, TRX_STATE_FORCED_ROLLBACK) ||
6099 !trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
6100
6101 /* The trx->lock.autoinc_locks are protected by trx->mutex and in principle
6102 can be modified by other threads:
6103 1. When the other thread calls lock_grant on trx->lock.wait_lock.
6104 (This is impossible here, because we've verified !trx->lock.wait_lock)
6105 2. During recovery lock_remove_recoverd_trx_record_locks ->
6106 lock_table_remove_low -> lock_table_remove_autoinc_lock ->
6107 lock_table_pop_autoinc_lock.
6108 (But AFAIK recovery is a single-threaded process)
6109 3. During DROP TABLE lock_remove_all_on_table_for_trx ->
6110 lock_table_remove_low ...
6111 (I'm unsure if this is possible to happen in parallel to our trx)
6112 Please note, that from this list only lock_grant tries to add something
6113 to the trx->lock.autoinc_locks (namely the granted AUTOINC lock), and the
6114 others try to remove something. This means that we can treat the result of
6115 lock_trx_holds_autoinc_locks(trx) as a heuristic. If it returns true,
6116 then it might or (with small probability) might not hold locks, so we better
6117 call lock_release_autoinc_locks with proper latching.
6118 If it returns false, then it is guaranteed that the vector will remain empty.
6119 If we like risk, we could even call lock_trx_holds_autoinc_locks without
6120 trx->mutex protection, but:
6121 1. why risk? It is not obvious how thread-safe our vector implementation is
6122 2. trx->mutex is cheap
6123 */
6124 trx_mutex_enter(trx);
6125 ut_ad(!trx->lock.wait_lock);
6126 bool might_have_autoinc_locks = lock_trx_holds_autoinc_locks(trx);
6127 trx_mutex_exit(trx);
6128
6129 if (might_have_autoinc_locks) {
6130 /* lock_release_autoinc_locks() requires exclusive global latch as the
6131 AUTOINC locks might be on tables from different shards. Identifying and
6132 latching them in correct order would complicate this rarely-taken path. */
6133 locksys::Global_exclusive_latch_guard guard{};
6134 trx_mutex_enter(trx);
6135 lock_release_autoinc_locks(trx);
6136 trx_mutex_exit(trx);
6137 }
6138 }
6139
6140 /** Releases a transaction's locks, and releases possible other transactions
6141 waiting because of these locks. Change the state of the transaction to
6142 TRX_STATE_COMMITTED_IN_MEMORY. */
lock_trx_release_locks(trx_t * trx)6143 void lock_trx_release_locks(trx_t *trx) /*!< in/out: transaction */
6144 {
6145 DEBUG_SYNC_C("before_lock_trx_release_locks");
6146
6147 trx_mutex_enter(trx);
6148
6149 check_trx_state(trx);
6150 ut_ad(trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
6151
6152 if (trx_is_referenced(trx)) {
6153 while (trx_is_referenced(trx)) {
6154 trx_mutex_exit(trx);
6155
6156 DEBUG_SYNC_C("waiting_trx_is_not_referenced");
6157
6158 /** Doing an implicit to explicit conversion
6159 should not be expensive. */
6160 ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
6161
6162 trx_mutex_enter(trx);
6163 }
6164 }
6165
6166 ut_ad(!trx_is_referenced(trx));
6167
6168 /* If the background thread trx_rollback_or_clean_recovered()
6169 is still active then there is a chance that the rollback
6170 thread may see this trx as COMMITTED_IN_MEMORY and goes ahead
6171 to clean it up calling trx_cleanup_at_db_startup(). This can
6172 happen in the case we are committing a trx here that is left
6173 in PREPARED state during the crash. Note that commit of the
6174 rollback of a PREPARED trx happens in the recovery thread
6175 while the rollback of other transactions happen in the
6176 background thread. To avoid this race we unconditionally unset
6177 the is_recovered flag. */
6178
6179 trx->is_recovered = false;
6180
6181 trx_mutex_exit(trx);
6182
6183 lock_release(trx);
6184
6185 /* We don't remove the locks one by one from the vector for
6186 efficiency reasons. We simply reset it because we would have
6187 released all the locks anyway.
6188 At this point there should be no one else interested in our trx's
6189 locks as we've released and removed all of them, and the trx is no longer
6190 referenced so nobody will attempt implicit to explicit conversion neither.
6191 Please note that we are either the thread which runs the transaction, or we
6192 are the thread of a high priority transaction which decided to kill trx, in
6193 which case it had to first make sure that it is no longer running in InnoDB.
6194 So the race between lock_table() accessing table_locks, and our clear() should
6195 not happen.
6196 All that being said, it does not cost us anything in terms of performance to
6197 protect these operations with trx->mutex, which makes some class of errors
6198 impossible even if the above reasoning was wrong. */
6199 trx_mutex_enter(trx);
6200 trx->lock.table_locks.clear();
6201 trx->lock.n_rec_locks.store(0);
6202
6203 ut_a(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0);
6204 ut_a(ib_vector_is_empty(trx->lock.autoinc_locks));
6205 ut_a(trx->lock.table_locks.empty());
6206
6207 mem_heap_empty(trx->lock.lock_heap);
6208 trx_mutex_exit(trx);
6209 }
6210
6211 /** Check whether the transaction has already been rolled back because it
6212 was selected as a deadlock victim, or if it has to wait then cancel
6213 the wait lock.
6214 @return DB_DEADLOCK, DB_LOCK_WAIT or DB_SUCCESS */
lock_trx_handle_wait(trx_t * trx)6215 dberr_t lock_trx_handle_wait(trx_t *trx) /*!< in/out: trx lock state */
6216 {
6217 dberr_t err;
6218
6219 /* lock_cancel_waiting_and_release() requires exclusive global latch, and so
6220 does reading the trx->lock.wait_lock to prevent races with B-tree page
6221 reorganization */
6222 locksys::Global_exclusive_latch_guard guard{};
6223
6224 trx_mutex_enter(trx);
6225
6226 if (trx->lock.was_chosen_as_deadlock_victim) {
6227 err = DB_DEADLOCK;
6228 } else if (trx->lock.wait_lock != nullptr) {
6229 lock_cancel_waiting_and_release(trx->lock.wait_lock);
6230 err = DB_LOCK_WAIT;
6231 } else {
6232 /* The lock was probably granted before we got here. */
6233 err = DB_SUCCESS;
6234 }
6235
6236 trx_mutex_exit(trx);
6237
6238 return (err);
6239 }
6240
6241 #ifdef UNIV_DEBUG
6242 /** Do an exhaustive check for any locks (table or rec) against the table.
6243 @return lock if found */
lock_table_locks_lookup(const dict_table_t * table,const trx_ut_list_t * trx_list)6244 static const lock_t *lock_table_locks_lookup(
6245 const dict_table_t *table, /*!< in: check if there are
6246 any locks held on records in
6247 this table or on the table
6248 itself */
6249 const trx_ut_list_t *trx_list) /*!< in: trx list to check */
6250 {
6251 const trx_t *trx;
6252
6253 ut_a(table != nullptr);
6254 /* We are going to iterate over multiple transactions, so even though we know
6255 which table we are looking for we can not narrow required latch to just the
6256 shard which contains the table, because accessing trx->lock.trx_locks would be
6257 unsafe */
6258 ut_ad(locksys::owns_exclusive_global_latch());
6259 ut_ad(trx_sys_mutex_own());
6260
6261 for (trx = UT_LIST_GET_FIRST(*trx_list); trx != nullptr;
6262 trx = UT_LIST_GET_NEXT(trx_list, trx)) {
6263 const lock_t *lock;
6264
6265 check_trx_state(trx);
6266
6267 for (lock = UT_LIST_GET_FIRST(trx->lock.trx_locks); lock != nullptr;
6268 lock = UT_LIST_GET_NEXT(trx_locks, lock)) {
6269 ut_a(lock->trx == trx);
6270
6271 if (lock_get_type_low(lock) == LOCK_REC) {
6272 ut_ad(!dict_index_is_online_ddl(lock->index) ||
6273 lock->index->is_clustered());
6274 if (lock->index->table == table) {
6275 return (lock);
6276 }
6277 } else if (lock->tab_lock.table == table) {
6278 return (lock);
6279 }
6280 }
6281 }
6282
6283 return (nullptr);
6284 }
6285 #endif /* UNIV_DEBUG */
6286
lock_table_has_locks(const dict_table_t * table)6287 bool lock_table_has_locks(const dict_table_t *table) {
6288 /** The n_rec_locks field might be modified by operation on any page shard,
6289 so we need to latch everything. Note, that the results of this function will
6290 be obsolete, as soon as we release the latch. It is called in contexts where
6291 we believe that the number of locks should either be zero or decreasing. For
6292 such scenario of usage, we might perhaps read the n_rec_locks without latch
6293 and restrict latch just to a table shard. But that would complicate the debug
6294 version of the code for no significant gain as this is not a hot path. */
6295 locksys::Global_exclusive_latch_guard guard{};
6296
6297 bool has_locks =
6298 UT_LIST_GET_LEN(table->locks) > 0 || table->n_rec_locks.load() > 0;
6299
6300 #ifdef UNIV_DEBUG
6301 if (!has_locks) {
6302 mutex_enter(&trx_sys->mutex);
6303
6304 ut_ad(!lock_table_locks_lookup(table, &trx_sys->rw_trx_list));
6305
6306 mutex_exit(&trx_sys->mutex);
6307 }
6308 #endif /* UNIV_DEBUG */
6309
6310 return (has_locks);
6311 }
6312
6313 /** Initialise the table lock list. */
lock_table_lock_list_init(table_lock_list_t * lock_list)6314 void lock_table_lock_list_init(
6315 table_lock_list_t *lock_list) /*!< List to initialise */
6316 {
6317 UT_LIST_INIT(*lock_list, &lock_table_t::locks);
6318 }
6319
6320 /** Initialise the trx lock list. */
lock_trx_lock_list_init(trx_lock_list_t * lock_list)6321 void lock_trx_lock_list_init(
6322 trx_lock_list_t *lock_list) /*!< List to initialise */
6323 {
6324 UT_LIST_INIT(*lock_list, &lock_t::trx_locks);
6325 }
6326
6327 /** Set the lock system timeout event. */
lock_set_timeout_event()6328 void lock_set_timeout_event() { os_event_set(lock_sys->timeout_event); }
6329
6330 #ifdef UNIV_DEBUG
6331
lock_trx_has_rec_x_lock(que_thr_t * thr,const dict_table_t * table,const buf_block_t * block,ulint heap_no)6332 bool lock_trx_has_rec_x_lock(que_thr_t *thr, const dict_table_t *table,
6333 const buf_block_t *block, ulint heap_no) {
6334 ut_ad(heap_no > PAGE_HEAP_NO_SUPREMUM);
6335
6336 const trx_t *trx = thr_get_trx(thr);
6337 locksys::Shard_latch_guard guard{block->get_page_id()};
6338 ut_a(lock_table_has(trx, table, LOCK_IX) || table->is_temporary());
6339 ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block, heap_no, trx) ||
6340 table->is_temporary());
6341 return (true);
6342 }
6343 #endif /* UNIV_DEBUG */
6344
6345 /** rewind(3) the file used for storing the latest detected deadlock and
6346 print a heading message to stderr if printing of all deadlocks to stderr
6347 is enabled. */
start_print()6348 void Deadlock_notifier::start_print() {
6349 /* I/O operations on lock_latest_err_file require exclusive latch on
6350 lock_sys */
6351 ut_ad(locksys::owns_exclusive_global_latch());
6352
6353 rewind(lock_latest_err_file);
6354 ut_print_timestamp(lock_latest_err_file);
6355
6356 if (srv_print_all_deadlocks) {
6357 ib::info(ER_IB_MSG_643) << "Transactions deadlock detected, dumping"
6358 << " detailed information.";
6359 }
6360 }
6361
6362 /** Print a message to the deadlock file and possibly to stderr.
6363 @param msg message to print */
print(const char * msg)6364 void Deadlock_notifier::print(const char *msg) {
6365 /* I/O operations on lock_latest_err_file require exclusive latch on
6366 lock_sys */
6367 ut_ad(locksys::owns_exclusive_global_latch());
6368 fputs(msg, lock_latest_err_file);
6369
6370 if (srv_print_all_deadlocks) {
6371 ib::info(ER_IB_MSG_644) << msg;
6372 }
6373 }
6374
6375 /** Print transaction data to the deadlock file and possibly to stderr.
6376 @param trx transaction
6377 @param max_query_len max query length to print */
print(const trx_t * trx,ulint max_query_len)6378 void Deadlock_notifier::print(const trx_t *trx, ulint max_query_len) {
6379 /* We need exclusive latch on lock_sys because:
6380 1. I/O operations on lock_latest_err_file
6381 2. lock_number_of_rows_locked()
6382 3. Accessing trx->lock fields requires either holding trx->mutex or latching
6383 the lock sys. */
6384 ut_ad(locksys::owns_exclusive_global_latch());
6385
6386 trx_mutex_enter(trx);
6387 ulint n_rec_locks = lock_number_of_rows_locked(&trx->lock);
6388 ulint n_trx_locks = UT_LIST_GET_LEN(trx->lock.trx_locks);
6389 ulint heap_size = mem_heap_get_size(trx->lock.lock_heap);
6390 trx_mutex_exit(trx);
6391
6392 mutex_enter(&trx_sys->mutex);
6393
6394 trx_print_low(lock_latest_err_file, trx, max_query_len, n_rec_locks,
6395 n_trx_locks, heap_size);
6396
6397 if (srv_print_all_deadlocks) {
6398 trx_print_low(stderr, trx, max_query_len, n_rec_locks, n_trx_locks,
6399 heap_size);
6400 }
6401
6402 mutex_exit(&trx_sys->mutex);
6403 }
6404
6405 /** Print lock data to the deadlock file and possibly to stderr.
6406 @param lock record or table type lock */
print(const lock_t * lock)6407 void Deadlock_notifier::print(const lock_t *lock) {
6408 /* I/O operations on lock_latest_err_file require exclusive latch on
6409 lock_sys. */
6410 ut_ad(locksys::owns_exclusive_global_latch());
6411
6412 if (lock_get_type_low(lock) == LOCK_REC) {
6413 lock_rec_print(lock_latest_err_file, lock);
6414
6415 if (srv_print_all_deadlocks) {
6416 lock_rec_print(stderr, lock);
6417 }
6418 } else {
6419 lock_table_print(lock_latest_err_file, lock);
6420
6421 if (srv_print_all_deadlocks) {
6422 lock_table_print(stderr, lock);
6423 }
6424 }
6425 }
6426
print_title(size_t pos_on_cycle,const char * title)6427 void Deadlock_notifier::print_title(size_t pos_on_cycle, const char *title) {
6428 /* I/O operations on lock_latest_err_file require exclusive latch on
6429 lock_sys */
6430 ut_ad(locksys::owns_exclusive_global_latch());
6431 ut::ostringstream buff;
6432 buff << "\n*** (" << (pos_on_cycle + 1) << ") " << title << ":\n";
6433 print(buff.str().c_str());
6434 }
6435
notify(const ut::vector<const trx_t * > & trxs_on_cycle,const trx_t * victim_trx)6436 void Deadlock_notifier::notify(const ut::vector<const trx_t *> &trxs_on_cycle,
6437 const trx_t *victim_trx) {
6438 ut_ad(locksys::owns_exclusive_global_latch());
6439
6440 start_print();
6441 const auto n = trxs_on_cycle.size();
6442 for (size_t i = 0; i < n; ++i) {
6443 const trx_t *trx = trxs_on_cycle[i];
6444 const trx_t *blocked_trx = trxs_on_cycle[0 < i ? i - 1 : n - 1];
6445 const lock_t *blocking_lock =
6446 lock_has_to_wait_in_queue(blocked_trx->lock.wait_lock, trx);
6447 ut_a(blocking_lock);
6448
6449 print_title(i, "TRANSACTION");
6450 print(trx, 3000);
6451
6452 print_title(i, "HOLDS THE LOCK(S)");
6453 print(blocking_lock);
6454
6455 print_title(i, "WAITING FOR THIS LOCK TO BE GRANTED");
6456 print(trx->lock.wait_lock);
6457 }
6458 const auto victim_it =
6459 std::find(trxs_on_cycle.begin(), trxs_on_cycle.end(), victim_trx);
6460 ut_ad(victim_it != trxs_on_cycle.end());
6461 const auto victim_pos = std::distance(trxs_on_cycle.begin(), victim_it);
6462 ut::ostringstream buff;
6463 buff << "*** WE ROLL BACK TRANSACTION (" << (victim_pos + 1) << ")\n";
6464 print(buff.str().c_str());
6465 DBUG_PRINT("ib_lock", ("deadlock detected"));
6466
6467 #ifdef UNIV_DEBUG
6468 /* We perform this check only after information is output, to give a
6469 developer as much information as we can for debugging the problem */
6470 for (const trx_t *trx : trxs_on_cycle) {
6471 ut_ad(is_allowed_to_be_on_cycle(trx->lock.wait_lock));
6472 }
6473 #endif /* UNIV_DEBUG */
6474
6475 lock_deadlock_found = true;
6476 }
6477
6478 #ifdef UNIV_DEBUG
6479
is_allowed_to_be_on_cycle(const lock_t * lock)6480 bool Deadlock_notifier::is_allowed_to_be_on_cycle(const lock_t *lock) {
6481 /* The original purpose of this validation is to check record locks from
6482 DD & SDI tables only, because we think a deadlock for these locks should be
6483 prevented by MDL and proper updating order, but later, some exemptions were
6484 introduced (for more context see comment to this function).
6485 In particular, we don't check table locks here, since there never was any
6486 guarantee saying a deadlock is impossible for table locks. */
6487 if (!lock->is_record_lock()) {
6488 return (true);
6489 }
6490 /* The only places where we don't expect deadlocks are in handling DD
6491 tables, and since WL#9538 also in code handling SDI tables.
6492 Therefore the second condition is that we only pay attention to DD and SDI
6493 tables. */
6494 const bool is_dd_or_sdi = (lock->index->table->is_dd_table ||
6495 dict_table_is_sdi(lock->index->table->id));
6496 if (!is_dd_or_sdi) {
6497 return (true);
6498 }
6499
6500 /* If we are still here, the lock is a record lock on some DD or SDI table.
6501 There are some such tables though, for which a deadlock is somewhat expected,
6502 for various reasons specific to these particular tables.
6503 So, we have a list of exceptions here:
6504
6505 innodb_table_stats and innodb_index_stats
6506 These two tables are visible to the end user, so can take part in
6507 quite arbitrary queries and transactions, so deadlock is possible.
6508 Therefore we need to allow such deadlocks, as otherwise a user
6509 could crash a debug build of a server by issuing a specific sequence of
6510 queries. DB_DEADLOCK error in dict0stats is either handled (see for
6511 example dict_stats_rename_table), or ignored silently (for example in
6512 dict_stats_process_entry_from_recalc_pool), but I am not aware of any
6513 situation in which DB_DEADLOCK could cause a serious problem.
6514 Most such queries are performed via dict_stats_exec_sql() which logs an
6515 ERROR in case of a DB_DEADLOCK, and also returns error code to the caller,
6516 so both the end user and a developer should be aware of a problem in case
6517 they want to do something about it.
6518
6519 table_stats and index_stats
6520 These two tables take part in queries which are issued by background
6521 threads, and the code which performs these queries can handle failures
6522 such as deadlocks, because they were expected at design phase. */
6523
6524 const char *name = lock->index->table->name.m_name;
6525 return (!strcmp(name, "mysql/innodb_table_stats") ||
6526 !strcmp(name, "mysql/innodb_index_stats") ||
6527 !strcmp(name, "mysql/table_stats") ||
6528 !strcmp(name, "mysql/index_stats"));
6529 }
6530 #endif /* UNIV_DEBUG */
6531
6532 /**
6533 Allocate cached locks for the transaction.
6534 @param trx allocate cached record locks for this transaction */
lock_trx_alloc_locks(trx_t * trx)6535 void lock_trx_alloc_locks(trx_t *trx) {
6536 /* We will create trx->lock.table_pool and rec_pool which are protected by
6537 trx->mutex. In theory nobody else should use the trx object while it is being
6538 constructed, but how can we (the lock-sys) "know" about it and why risk? */
6539 trx_mutex_enter(trx);
6540 ulint sz = REC_LOCK_SIZE * REC_LOCK_CACHE;
6541 byte *ptr = reinterpret_cast<byte *>(ut_malloc_nokey(sz));
6542
6543 /* We allocate one big chunk and then distribute it among
6544 the rest of the elements. The allocated chunk pointer is always
6545 at index 0. */
6546
6547 for (ulint i = 0; i < REC_LOCK_CACHE; ++i, ptr += REC_LOCK_SIZE) {
6548 trx->lock.rec_pool.push_back(reinterpret_cast<ib_lock_t *>(ptr));
6549 }
6550
6551 sz = TABLE_LOCK_SIZE * TABLE_LOCK_CACHE;
6552 ptr = reinterpret_cast<byte *>(ut_malloc_nokey(sz));
6553
6554 for (ulint i = 0; i < TABLE_LOCK_CACHE; ++i, ptr += TABLE_LOCK_SIZE) {
6555 trx->lock.table_pool.push_back(reinterpret_cast<ib_lock_t *>(ptr));
6556 }
6557 trx_mutex_exit(trx);
6558 }
6559
lock_notify_about_deadlock(const ut::vector<const trx_t * > & trxs_on_cycle,const trx_t * victim_trx)6560 void lock_notify_about_deadlock(const ut::vector<const trx_t *> &trxs_on_cycle,
6561 const trx_t *victim_trx) {
6562 Deadlock_notifier::notify(trxs_on_cycle, victim_trx);
6563 }
6564