1 /* Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software
21    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA */
22 
23 #include "sql/mdl.h"
24 
25 #include <time.h>
26 #include <algorithm>
27 #include <atomic>
28 #include <functional>
29 
30 #include "lf.h"
31 #include "m_ctype.h"
32 #include "my_dbug.h"
33 #include "my_macros.h"
34 #include "my_murmur3.h"
35 #include "my_sharedlib.h"
36 #include "my_sys.h"
37 #include "my_systime.h"
38 #include "my_thread.h"
39 #include "mysql/components/services/psi_cond_bits.h"
40 #include "mysql/components/services/psi_memory_bits.h"
41 #include "mysql/components/services/psi_mutex_bits.h"
42 #include "mysql/components/services/psi_rwlock_bits.h"
43 #include "mysql/psi/mysql_cond.h"
44 #include "mysql/psi/mysql_mdl.h"
45 #include "mysql/psi/mysql_memory.h"
46 #include "mysql/psi/mysql_mutex.h"
47 #include "mysql/psi/mysql_stage.h"
48 #include "mysql/psi/psi_base.h"
49 #include "mysql/psi/psi_mdl.h"
50 #include "mysql/service_thd_wait.h"
51 #include "mysqld_error.h"
52 #include "prealloced_array.h"
53 #include "sql/debug_sync.h"
54 #include "sql/thr_malloc.h"
55 
56 extern MYSQL_PLUGIN_IMPORT CHARSET_INFO *system_charset_info;
57 
58 static PSI_memory_key key_memory_MDL_context_acquire_locks;
59 
60 #ifdef HAVE_PSI_INTERFACE
61 static PSI_mutex_key key_MDL_wait_LOCK_wait_status;
62 
63 static PSI_mutex_info all_mdl_mutexes[] = {{&key_MDL_wait_LOCK_wait_status,
64                                             "MDL_wait::LOCK_wait_status", 0, 0,
65                                             PSI_DOCUMENT_ME}};
66 
67 static PSI_rwlock_key key_MDL_lock_rwlock;
68 static PSI_rwlock_key key_MDL_context_LOCK_waiting_for;
69 
70 static PSI_rwlock_info all_mdl_rwlocks[] = {
71     {&key_MDL_lock_rwlock, "MDL_lock::rwlock", PSI_FLAG_RWLOCK_PR, 0,
72      PSI_DOCUMENT_ME},
73     {&key_MDL_context_LOCK_waiting_for, "MDL_context::LOCK_waiting_for",
74      PSI_FLAG_RWLOCK_PR, 0, PSI_DOCUMENT_ME}};
75 
76 static PSI_cond_key key_MDL_wait_COND_wait_status;
77 
78 static PSI_cond_info all_mdl_conds[] = {{&key_MDL_wait_COND_wait_status,
79                                          "MDL_context::COND_wait_status", 0, 0,
80                                          PSI_DOCUMENT_ME}};
81 
82 static PSI_memory_info all_mdl_memory[] = {
83     {&key_memory_MDL_context_acquire_locks, "MDL_context::acquire_locks", 0, 0,
84      PSI_DOCUMENT_ME}};
85 
86 /**
87   Initialise all the performance schema instrumentation points
88   used by the MDL subsystem.
89 */
init_mdl_psi_keys(void)90 static void init_mdl_psi_keys(void) {
91   int count;
92 
93   count = static_cast<int>(array_elements(all_mdl_mutexes));
94   mysql_mutex_register("sql", all_mdl_mutexes, count);
95 
96   count = static_cast<int>(array_elements(all_mdl_rwlocks));
97   mysql_rwlock_register("sql", all_mdl_rwlocks, count);
98 
99   count = static_cast<int>(array_elements(all_mdl_conds));
100   mysql_cond_register("sql", all_mdl_conds, count);
101 
102   count = static_cast<int>(array_elements(all_mdl_memory));
103   mysql_memory_register("sql", all_mdl_memory, count);
104 
105   MDL_key::init_psi_keys();
106 }
107 #endif /* HAVE_PSI_INTERFACE */
108 
109 /**
110   Thread state names to be used in case when we have to wait on resource
111   belonging to certain namespace.
112 */
113 
114 PSI_stage_info MDL_key::m_namespace_to_wait_state_name[NAMESPACE_END] = {
115     {0, "Waiting for global read lock", 0, PSI_DOCUMENT_ME},
116     {0, "Waiting for backup lock", 0, PSI_DOCUMENT_ME},
117     {0, "Waiting for tablespace metadata lock", 0, PSI_DOCUMENT_ME},
118     {0, "Waiting for schema metadata lock", 0, PSI_DOCUMENT_ME},
119     {0, "Waiting for table metadata lock", 0, PSI_DOCUMENT_ME},
120     {0, "Waiting for stored function metadata lock", 0, PSI_DOCUMENT_ME},
121     {0, "Waiting for stored procedure metadata lock", 0, PSI_DOCUMENT_ME},
122     {0, "Waiting for trigger metadata lock", 0, PSI_DOCUMENT_ME},
123     {0, "Waiting for event metadata lock", 0, PSI_DOCUMENT_ME},
124     {0, "Waiting for commit lock", 0, PSI_DOCUMENT_ME},
125     {0, "User lock", 0, PSI_DOCUMENT_ME}, /* Be compatible with old status. */
126     {0, "Waiting for locking service lock", 0, PSI_DOCUMENT_ME},
127     {0, "Waiting for spatial reference system lock", 0, PSI_DOCUMENT_ME},
128     {0, "Waiting for acl cache lock", 0, PSI_DOCUMENT_ME},
129     {0, "Waiting for column statistics lock", 0, PSI_DOCUMENT_ME},
130     {0, "Waiting for resource groups metadata lock", 0, PSI_DOCUMENT_ME},
131     {0, "Waiting for foreign key metadata lock", 0, PSI_DOCUMENT_ME},
132     {0, "Waiting for check constraint metadata lock", 0, PSI_DOCUMENT_ME}};
133 
134 #ifdef HAVE_PSI_INTERFACE
init_psi_keys()135 void MDL_key::init_psi_keys() {
136   int i;
137   int count;
138   PSI_stage_info *info MY_ATTRIBUTE((unused));
139 
140   count =
141       static_cast<int>(array_elements(MDL_key::m_namespace_to_wait_state_name));
142   for (i = 0; i < count; i++) {
143     /* mysql_stage_register wants an array of pointers, registering 1 by 1. */
144     info = &MDL_key::m_namespace_to_wait_state_name[i];
145     mysql_stage_register("sql", &info, 1);
146   }
147 }
148 #endif
149 
150 static bool mdl_initialized = false;
151 
152 /**
153   A collection of all MDL locks. A singleton,
154   there is only one instance of the map in the server.
155 */
156 
157 class MDL_map {
158  public:
159   void init();
160   void destroy();
161 
162   inline MDL_lock *find(LF_PINS *pins, const MDL_key *key, bool *pinned);
163   inline MDL_lock *find_or_insert(LF_PINS *pins, const MDL_key *key,
164                                   bool *pinned);
165 
166   /**
167     Decrement unused MDL_lock objects counter.
168   */
lock_object_used()169   void lock_object_used() { --m_unused_lock_objects; }
170 
171   /**
172     Increment unused MDL_lock objects counter. If number of such objects
173     exceeds threshold and unused/total objects ratio is high enough try
174     to free some of them.
175   */
lock_object_unused(MDL_context * ctx,LF_PINS * pins)176   void lock_object_unused(MDL_context *ctx, LF_PINS *pins) {
177     /*
178       Use thread local copy of unused locks counter for performance/
179       scalability reasons. It is updated on both successful and failed
180       attempts to delete unused MDL_lock objects in order to avoid infinite
181       loops,
182     */
183     int32 unused_locks = ++m_unused_lock_objects;
184 
185     while (unused_locks > mdl_locks_unused_locks_low_water &&
186            (unused_locks > m_locks.count * MDL_LOCKS_UNUSED_LOCKS_MIN_RATIO)) {
187       /*
188         If number of unused lock objects exceeds low water threshold and
189         unused/total objects ratio is high enough - try to do random dive
190         into m_locks hash, find an unused object by iterating upwards
191         through its split-ordered list and try to free it.
192         If we fail to do this - update local copy of unused objects
193         counter and retry if needed,
194 
195         Note that:
196         *) It is not big deal if "m_unused_lock_objects" due to races becomes
197            negative temporarily as we perform signed comparison.
198         *) There is a good chance that we will find an unused object quickly
199            because unused/total ratio is high enough.
200         *) There is no possibility for infinite loop since our PRNG works
201            in such way that we eventually cycle through all LF_HASH hash
202            buckets (@sa MDL_context::get_random()).
203         *) Thanks to the fact that we choose random object to expel -
204            objects which are used more often will naturally stay
205            in the cache and rarely used objects will be expelled from it.
206         *) Non-atomic read of LF_HASH::count which happens above should be
207            OK as LF_HASH code does them too + preceding atomic operation
208            provides memory barrier.
209       */
210       remove_random_unused(ctx, pins, &unused_locks);
211     }
212   }
213 
214   /**
215     Get number of unused MDL_lock objects in MDL_map cache.
216 
217     @note Does non-atomic read so can return stale results. This is OK since
218           this method is used only in unit-tests. The latter employ means
219           of thread synchronization which are external to MDL and prevent
220           memory reordering/ensure that thread calling this method have
221           up-to-date view on the memory. @sa m_unused_lock_objects.
222   */
get_unused_locks_count() const223   int32 get_unused_locks_count() const { return m_unused_lock_objects.load(); }
224 
225   /**
226     Allocate pins which are necessary for MDL_context/thread to be able
227     to work with MDL_map container.
228   */
get_pins()229   LF_PINS *get_pins() { return lf_hash_get_pins(&m_locks); }
230 
231   /**
232     Check if MDL_lock object corresponding to the key is going to be
233     singleton.
234   */
is_lock_object_singleton(const MDL_key * mdl_key) const235   bool is_lock_object_singleton(const MDL_key *mdl_key) const {
236     return (mdl_key->mdl_namespace() == MDL_key::GLOBAL ||
237             mdl_key->mdl_namespace() == MDL_key::COMMIT ||
238             mdl_key->mdl_namespace() == MDL_key::ACL_CACHE ||
239             mdl_key->mdl_namespace() == MDL_key::BACKUP_LOCK);
240   }
241 
242  private:
243   void remove_random_unused(MDL_context *ctx, LF_PINS *pins,
244                             int32 *unused_locks);
245 
246   /** LF_HASH with all locks in the server. */
247   LF_HASH m_locks;
248   /** Pre-allocated MDL_lock object for GLOBAL namespace. */
249   MDL_lock *m_global_lock;
250   /** Pre-allocated MDL_lock object for COMMIT namespace. */
251   MDL_lock *m_commit_lock;
252   /** Pre-allocated MDL_lock object for ACL_CACHE namespace. */
253   MDL_lock *m_acl_cache_lock;
254   /** Pre-allocated MDL_lock object for BACKUP_LOCK namespace. */
255   MDL_lock *m_backup_lock;
256 
257   /**
258     Number of unused MDL_lock objects in the server.
259 
260     Updated using atomic operations, read using both atomic and ordinary
261     reads. We assume that ordinary reads of 32-bit words can't result in
262     partial results, but may produce stale results thanks to memory
263     reordering, LF_HASH seems to be using similar assumption.
264 
265     Note that due to fact that updates to this counter are not atomic with
266     marking MDL_lock objects as used/unused it might easily get negative
267     for some short period of time. Code which uses its value needs to take
268     this into account.
269   */
270   std::atomic<int32> m_unused_lock_objects;
271 };
272 
273 /**
274   Threshold for number of unused MDL_lock objects.
275 
276   We will start considering freeing some unused objects only after exceeding
277   this value and if unused/total objects ratio is high enough.
278 
279   Normally this threshold is constant. It is exposed outside of MDL subsystem
280   as a variable only in order to simplify unit testing.
281 */
282 int32 mdl_locks_unused_locks_low_water =
283     MDL_LOCKS_UNUSED_LOCKS_LOW_WATER_DEFAULT;
284 
285 /**
286   A context of the recursive traversal through all contexts
287   in all sessions in search for deadlock.
288 */
289 
290 class Deadlock_detection_visitor : public MDL_wait_for_graph_visitor {
291  public:
Deadlock_detection_visitor(MDL_context * start_node_arg)292   Deadlock_detection_visitor(MDL_context *start_node_arg)
293       : m_start_node(start_node_arg),
294         m_victim(nullptr),
295         m_current_search_depth(0),
296         m_found_deadlock(false) {}
297   virtual bool enter_node(MDL_context *node);
298   virtual void leave_node(MDL_context *node);
299 
300   virtual bool inspect_edge(MDL_context *dest);
301 
get_victim() const302   MDL_context *get_victim() const { return m_victim; }
303 
304  private:
305   /**
306     Change the deadlock victim to a new one if it has lower deadlock
307     weight.
308   */
309   void opt_change_victim_to(MDL_context *new_victim);
310 
311  private:
312   /**
313     The context which has initiated the search. There
314     can be multiple searches happening in parallel at the same time.
315   */
316   MDL_context *m_start_node;
317   /** If a deadlock is found, the context that identifies the victim. */
318   MDL_context *m_victim;
319   /** Set to the 0 at start. Increased whenever
320     we descend into another MDL context (aka traverse to the next
321     wait-for graph node). When MAX_SEARCH_DEPTH is reached, we
322     assume that a deadlock is found, even if we have not found a
323     loop.
324   */
325   uint m_current_search_depth;
326   /** true if we found a deadlock. */
327   bool m_found_deadlock;
328   /**
329     Maximum depth for deadlock searches. After this depth is
330     achieved we will unconditionally declare that there is a
331     deadlock.
332 
333     @note This depth should be small enough to avoid stack
334           being exhausted by recursive search algorithm.
335 
336     TODO: Find out what is the optimal value for this parameter.
337           Current value is safe, but probably sub-optimal,
338           as there is an anecdotal evidence that real-life
339           deadlocks are even shorter typically.
340   */
341   static const uint MAX_SEARCH_DEPTH = 32;
342 };
343 
344 /**
345   Enter a node of a wait-for graph. After
346   a node is entered, inspect_edge() will be called
347   for all wait-for destinations of this node. Then
348   leave_node() will be called.
349   We call "enter_node()" for all nodes we inspect,
350   including the starting node.
351 
352   @retval  true  Maximum search depth exceeded.
353   @retval  false OK.
354 */
355 
enter_node(MDL_context * node)356 bool Deadlock_detection_visitor::enter_node(MDL_context *node) {
357   m_found_deadlock = ++m_current_search_depth >= MAX_SEARCH_DEPTH;
358   if (m_found_deadlock) {
359     DBUG_ASSERT(!m_victim);
360     opt_change_victim_to(node);
361   }
362   return m_found_deadlock;
363 }
364 
365 /**
366   Done inspecting this node. Decrease the search
367   depth. If a deadlock is found, and we are
368   backtracking to the start node, optionally
369   change the deadlock victim to one with lower
370   deadlock weight.
371 */
372 
leave_node(MDL_context * node)373 void Deadlock_detection_visitor::leave_node(MDL_context *node) {
374   --m_current_search_depth;
375   if (m_found_deadlock) opt_change_victim_to(node);
376 }
377 
378 /**
379   Inspect a wait-for graph edge from one MDL context to another.
380 
381   @retval true   A loop is found.
382   @retval false  No loop is found.
383 */
384 
inspect_edge(MDL_context * node)385 bool Deadlock_detection_visitor::inspect_edge(MDL_context *node) {
386   m_found_deadlock = node == m_start_node;
387   return m_found_deadlock;
388 }
389 
390 /**
391   Change the deadlock victim to a new one if it has lower deadlock
392   weight.
393 
394   @param new_victim New candidate for deadlock victim.
395 */
396 
opt_change_victim_to(MDL_context * new_victim)397 void Deadlock_detection_visitor::opt_change_victim_to(MDL_context *new_victim) {
398   if (m_victim == nullptr ||
399       m_victim->get_deadlock_weight() >= new_victim->get_deadlock_weight()) {
400     /* Swap victims, unlock the old one. */
401     MDL_context *tmp = m_victim;
402     m_victim = new_victim;
403     m_victim->lock_deadlock_victim();
404     if (tmp) tmp->unlock_deadlock_victim();
405   }
406 }
407 
408 /**
409   Get a bit corresponding to enum_mdl_type value in a granted/waiting bitmaps
410   and compatibility matrices.
411 */
412 
413 #define MDL_BIT(A) static_cast<MDL_lock::bitmap_t>(1U << A)
414 
415 /**
416   The lock context. Created internally for an acquired lock.
417   For a given name, there exists only one MDL_lock instance,
418   and it exists only when the lock has been granted.
419   Can be seen as an MDL subsystem's version of TABLE_SHARE.
420 
421   This is an abstract class which lacks information about
422   compatibility rules for lock types. They should be specified
423   in its descendants.
424 */
425 
426 class MDL_lock {
427  public:
428   typedef unsigned short bitmap_t;
429 
430   class Ticket_list {
431    public:
432     typedef I_P_List<MDL_ticket,
433                      I_P_List_adapter<MDL_ticket, &MDL_ticket::next_in_lock,
434                                       &MDL_ticket::prev_in_lock>,
435                      I_P_List_null_counter, I_P_List_fast_push_back<MDL_ticket>>
436         List;
operator const List&() const437     operator const List &() const { return m_list; }
Ticket_list()438     Ticket_list() : m_bitmap(0) {}
439 
440     void add_ticket(MDL_ticket *ticket);
441     void remove_ticket(MDL_ticket *ticket);
is_empty() const442     bool is_empty() const { return m_list.is_empty(); }
bitmap() const443     bitmap_t bitmap() const { return m_bitmap; }
444 
445    private:
446     void clear_bit_if_not_in_list(enum_mdl_type type);
447 
448    private:
449     /** List of tickets. */
450     List m_list;
451     /** Bitmap of types of tickets in this list. */
452     bitmap_t m_bitmap;
453   };
454 
455   typedef Ticket_list::List::Iterator Ticket_iterator;
456 
457   typedef longlong fast_path_state_t;
458 
459   /**
460     Helper struct which defines how different types of locks are handled
461     for a specific MDL_lock. In practice we use only two strategies: "scoped"
462     lock strategy for locks in GLOBAL, COMMIT, TABLESPACE and SCHEMA namespaces
463     and "object" lock strategy for all other namespaces.
464   */
465   struct MDL_lock_strategy {
466     /**
467       Compatibility (or rather "incompatibility") matrices for lock types.
468 
469       Array of bitmaps which elements specify which granted locks are
470       incompatible with the type of lock being requested.
471     */
472     bitmap_t m_granted_incompatible[MDL_TYPE_END];
473     /**
474       Arrays of bitmaps which elements specify which waiting locks are
475       incompatible with the type of lock being requested. Basically, each
476       array defines priorities between lock types.
477       We need 4 separate arrays since in order to prevent starvation for
478       some of lock request types, we use different priority matrices:
479       0) in "normal" situation.
480       1) in situation when the number of successively granted "piglet" requests
481          exceeds the max_write_lock_count limit.
482       2) in situation when the number of successively granted "hog" requests
483          exceeds the max_write_lock_count limit.
484       3) in situation when both "piglet" and "hog" counters exceed limit.
485     */
486     bitmap_t m_waiting_incompatible[4][MDL_TYPE_END];
487     /**
488       Array of increments for "unobtrusive" types of lock requests for locks.
489       @sa MDL_lock::get_unobtrusive_lock_increment().
490     */
491     fast_path_state_t m_unobtrusive_lock_increment[MDL_TYPE_END];
492     /**
493       Indicates that locks of this type are affected by
494       the max_write_lock_count limit.
495     */
496     bool m_is_affected_by_max_write_lock_count;
497 
498 #ifndef DBUG_OFF
499     /**
500      Indicate that a type is legal with this strategy. Only for asserts and
501      debug-only checks.
502      */
503     bool legal_type[MDL_TYPE_END];
504 #endif /* not defined DBUG_OFF */
505 
506     /**
507       Pointer to a static method which determines if the type of lock
508       requested requires notification of conflicting locks. NULL if there
509       are no lock types requiring notification.
510     */
511     bool (*m_needs_notification)(const MDL_ticket *ticket);
512     /**
513       Pointer to a static method which allows notification of owners of
514       conflicting locks about the fact that a type of lock requiring
515       notification was requested.
516     */
517     void (*m_notify_conflicting_locks)(MDL_context *ctx, MDL_lock *lock);
518     /**
519       Pointer to a static method which converts information about
520       locks granted using "fast" path from fast_path_state_t
521       representation to bitmap of lock types.
522     */
523     bitmap_t (*m_fast_path_granted_bitmap)(const MDL_lock &lock);
524     /**
525       Pointer to a static method which determines if waiting for the lock
526       should be aborted when when connection is lost. NULL if locks of
527       this type don't require such aborts.
528     */
529     bool (*m_needs_connection_check)(const MDL_lock *lock);
530   };
531 
532  public:
533   /** The key of the object (data) being protected. */
534   MDL_key key;
535   /**
536     Read-write lock protecting this lock context.
537 
538     @note The fact that we use read-write lock prefers readers here is
539           important as deadlock detector won't work correctly otherwise.
540 
541           For example, imagine that we have following waiters graph:
542 
543                        ctxA -> obj1 -> ctxB -> obj1 -|
544                         ^                            |
545                         |----------------------------|
546 
547           and both ctxA and ctxB start deadlock detection process:
548 
549             ctxA read-locks obj1             ctxB read-locks obj2
550             ctxA goes deeper                 ctxB goes deeper
551 
552           Now ctxC comes in who wants to start waiting on obj1, also
553           ctxD comes in who wants to start waiting on obj2.
554 
555             ctxC tries to write-lock obj1   ctxD tries to write-lock obj2
556             ctxC is blocked                 ctxD is blocked
557 
558           Now ctxA and ctxB resume their search:
559 
560             ctxA tries to read-lock obj2    ctxB tries to read-lock obj1
561 
562           If m_rwlock prefers writes (or fair) both ctxA and ctxB would be
563           blocked because of pending write locks from ctxD and ctxC
564           correspondingly. Thus we will get a deadlock in deadlock detector.
565           If m_wrlock prefers readers (actually ignoring pending writers is
566           enough) ctxA and ctxB will continue and no deadlock will occur.
567   */
568   mysql_prlock_t m_rwlock;
569 
incompatible_granted_types_bitmap() const570   const bitmap_t *incompatible_granted_types_bitmap() const {
571     return m_strategy->m_granted_incompatible;
572   }
573 
incompatible_waiting_types_bitmap() const574   const bitmap_t *incompatible_waiting_types_bitmap() const {
575     return m_strategy
576         ->m_waiting_incompatible[m_current_waiting_incompatible_idx];
577   }
578 
579   /**
580     Get index of priority matrice in MDL_lock_strategy::m_waiting_incompatible
581     array which corresponds to current values of the m_piglet_lock_count and
582     m_hog_lock_count counters and the max_write_lock_count threshold.
583   */
get_incompatible_waiting_types_bitmap_idx() const584   uint get_incompatible_waiting_types_bitmap_idx() const {
585     mysql_prlock_assert_write_owner(&m_rwlock);
586     /*
587       To prevent starvation for lock types with lower priority use:
588 
589       *) MDL_lock_strategy::m_waiting_incompatible[0] matrice by default.
590       *) MDL_lock_strategy::m_waiting_incompatible[1] when the number of
591          successively granted "piglet" requests exceeds max_write_lock_count.
592       *) MDL_lock_strategy::m_waiting_incompatible[2] when the number of
593          successively granted "hog" requests exceeds max_write_lock_count.
594       *) MDL_lock_strategy::m_waiting_incompatible[3] when both "piglet" and
595          "hog" counters exceed this limit.
596     */
597     uint idx = 0;
598     if (m_piglet_lock_count >= max_write_lock_count) idx += 1;
599     if (m_hog_lock_count >= max_write_lock_count) idx += 2;
600     return idx;
601   }
602 
603   /**
604     Switch priority matrice for the MDL_lock object if m_piglet_lock_count or/
605     and m_hog_lock_count counters have crossed max_write_lock_count threshold.
606 
607     @returns true - if priority matrice has been changed, false - otherwise.
608   */
switch_incompatible_waiting_types_bitmap_if_needed()609   bool switch_incompatible_waiting_types_bitmap_if_needed() {
610     mysql_prlock_assert_write_owner(&m_rwlock);
611 
612     uint new_idx = get_incompatible_waiting_types_bitmap_idx();
613     if (m_current_waiting_incompatible_idx == new_idx) return false;
614     m_current_waiting_incompatible_idx = new_idx;
615     return true;
616   }
617 
618   bool has_pending_conflicting_lock(enum_mdl_type type);
619 
620   bool can_grant_lock(enum_mdl_type type,
621                       const MDL_context *requestor_ctx) const;
622 
623   void reschedule_waiters();
624 
625   void remove_ticket(MDL_context *ctx, LF_PINS *pins,
626                      Ticket_list MDL_lock::*queue, MDL_ticket *ticket);
627 
628   bool visit_subgraph(MDL_ticket *waiting_ticket,
629                       MDL_wait_for_graph_visitor *gvisitor);
630 
needs_notification(const MDL_ticket * ticket) const631   bool needs_notification(const MDL_ticket *ticket) const {
632     return m_strategy->m_needs_notification
633                ? m_strategy->m_needs_notification(ticket)
634                : false;
635   }
636 
notify_conflicting_locks(MDL_context * ctx)637   void notify_conflicting_locks(MDL_context *ctx) {
638     if (m_strategy->m_notify_conflicting_locks)
639       m_strategy->m_notify_conflicting_locks(ctx, this);
640   }
641 
needs_connection_check() const642   bool needs_connection_check() const {
643     return m_strategy->m_needs_connection_check
644                ? m_strategy->m_needs_connection_check(this)
645                : false;
646   }
647 
648   inline static bool needs_hton_notification(
649       MDL_key::enum_mdl_namespace mdl_namespace);
650 
is_affected_by_max_write_lock_count() const651   bool is_affected_by_max_write_lock_count() const {
652     return m_strategy->m_is_affected_by_max_write_lock_count;
653   }
654 
655   /**
656     If we just have granted a lock of "piglet" or "hog" type and there are
657     pending lower priority locks, increase the appropriate counter. If this
658     counter now exceeds the max_write_lock_count threshold, switch priority
659     matrice for the MDL_lock object.
660 
661     @returns true - if priority matrice has been changed, false - otherwise.
662   */
count_piglets_and_hogs(enum_mdl_type type)663   bool count_piglets_and_hogs(enum_mdl_type type) {
664     mysql_prlock_assert_write_owner(&m_rwlock);
665 
666     if ((MDL_BIT(type) & MDL_OBJECT_HOG_LOCK_TYPES) != 0) {
667       if (m_waiting.bitmap() & ~MDL_OBJECT_HOG_LOCK_TYPES) {
668         m_hog_lock_count++;
669         if (switch_incompatible_waiting_types_bitmap_if_needed()) return true;
670       }
671     } else if (type == MDL_SHARED_WRITE) {
672       if (m_waiting.bitmap() & MDL_BIT(MDL_SHARED_READ_ONLY)) {
673         m_piglet_lock_count++;
674         if (switch_incompatible_waiting_types_bitmap_if_needed()) return true;
675       }
676     }
677     return false;
678   }
679 
680   /**
681     @returns "Fast path" increment for request for "unobtrusive" type
682               of lock, 0 - if it is request for "obtrusive" type of
683               lock.
684 
685     @note We split all lock types for each of MDL namespaces
686           in two sets:
687 
688           A) "unobtrusive" lock types
689             1) Each type from this set should be compatible with all other
690                types from the set (including itself).
691             2) These types should be common for DML operations
692 
693           Our goal is to optimize acquisition and release of locks of this
694           type by avoiding complex checks and manipulations on m_waiting/
695           m_granted bitmaps/lists. We replace them with a check of and
696           increment/decrement of integer counters.
697           We call the latter type of acquisition/release "fast path".
698           Use of "fast path" reduces the size of critical section associated
699           with MDL_lock::m_rwlock lock in the common case and thus increases
700           scalability.
701 
702           The amount by which acquisition/release of specific type
703           "unobtrusive" lock increases/decreases packed counter in
704           MDL_lock::m_fast_path_state is returned by this function.
705 
706           B) "obtrusive" lock types
707             1) Granted or pending lock of those type is incompatible with
708                some other types of locks or with itself.
709             2) Not common for DML operations
710 
711           These locks have to be always acquired involving manipulations on
712           m_waiting/m_granted bitmaps/lists, i.e. we have to use "slow path"
713           for them. Moreover in the presence of active/pending locks from
714           "obtrusive" set we have to acquire using "slow path" even locks of
715           "unobtrusive" type.
716 
717     @see MDL_scoped_lock::m_unobtrusive_lock_increment and
718     @see MDL_object_lock::m_unobtrusive_lock_increment for
719         definitions of these sets for scoped and per-object locks.
720   */
721   inline static fast_path_state_t get_unobtrusive_lock_increment(
722       const MDL_request *request);
723 
724   /**
725     @returns "Fast path" increment if type of lock is "unobtrusive" type,
726               0 - if it is "obtrusive" type of lock.
727   */
get_unobtrusive_lock_increment(enum_mdl_type type) const728   fast_path_state_t get_unobtrusive_lock_increment(enum_mdl_type type) const {
729     return m_strategy->m_unobtrusive_lock_increment[type];
730   }
731 
732   /**
733     Check if type of lock requested is "obtrusive" type of lock.
734 
735     @sa MDL_lock::get_unobtrusive_lock_increment() description.
736   */
is_obtrusive_lock(enum_mdl_type type) const737   bool is_obtrusive_lock(enum_mdl_type type) const {
738     return get_unobtrusive_lock_increment(type) == 0;
739   }
740 
741   /**
742     Return set of types of lock requests which were granted using
743     "fast path" algorithm in the bitmap_t form.
744 
745     This method is only called from MDL_lock::can_grant_lock() and its
746     return value is only important when we are trying to figure out if
747     we can grant an obtrusive lock. But this means that the HAS_OBTRUSIVE
748     flag is set so all changes to m_fast_path_state happen under protection
749     of MDL_lock::m_rwlock (see invariant [INV1]).
750     Since can_grant_lock() is called only when MDL_lock::m_rwlock is held,
751     it is safe to do an ordinary read of m_fast_path_state here.
752   */
fast_path_granted_bitmap() const753   bitmap_t fast_path_granted_bitmap() const {
754     return m_strategy->m_fast_path_granted_bitmap(*this);
755   }
756 
757   /** List of granted tickets for this lock. */
758   Ticket_list m_granted;
759   /** Tickets for contexts waiting to acquire a lock. */
760   Ticket_list m_waiting;
761 
762  private:
763   /**
764     Number of times high priority, "hog" lock requests (X, SNRW, SNW) have been
765     granted while lower priority lock requests (all other types) were waiting.
766     Currently used only for object locks. Protected by m_rwlock lock.
767   */
768   ulong m_hog_lock_count;
769   /**
770     Number of times high priority, "piglet" lock requests (SW) have been
771     granted while locks requests with lower priority (SRO) were waiting.
772     Currently used only for object locks. Protected by m_rwlock lock.
773   */
774   ulong m_piglet_lock_count;
775   /**
776     Index of one of the MDL_lock_strategy::m_waiting_incompatible
777     arrays which represents the current priority matrice.
778   */
779   uint m_current_waiting_incompatible_idx;
780 
781  public:
782   /**
783     Do "expensive" part of MDL_lock object initialization,
784     Called by LF_ALLOCATOR for each newly malloc()'ed MDL_lock object, is not
785     called in cases when LF_ALLOCATOR decides to reuse object which was
786     returned to it earlier. "Full" initialization happens later by calling
787     MDL_lock::reinit(). So @sa MDL_lock::reiniti()
788   */
MDL_lock()789   MDL_lock() : m_obtrusive_locks_granted_waiting_count(0) {
790     mysql_prlock_init(key_MDL_lock_rwlock, &m_rwlock);
791   }
792 
793   inline void reinit(const MDL_key *mdl_key);
794 
~MDL_lock()795   ~MDL_lock() { mysql_prlock_destroy(&m_rwlock); }
796 
797   inline static MDL_lock *create(const MDL_key *key);
798   inline static void destroy(MDL_lock *lock);
799 
800   inline MDL_context *get_lock_owner() const;
801 
802   /**
803     Get MDL lock strategy corresponding to MDL key.
804 
805     @param key Reference to MDL_key object
806 
807     @return the type of strategy scoped or object corresponding to MDL key.
808   */
809 
get_strategy(const MDL_key & key)810   inline static const MDL_lock_strategy *get_strategy(const MDL_key &key) {
811     switch (key.mdl_namespace()) {
812       case MDL_key::GLOBAL:
813       case MDL_key::TABLESPACE:
814       case MDL_key::SCHEMA:
815       case MDL_key::COMMIT:
816       case MDL_key::BACKUP_LOCK:
817       case MDL_key::RESOURCE_GROUPS:
818       case MDL_key::FOREIGN_KEY:
819       case MDL_key::CHECK_CONSTRAINT:
820         return &m_scoped_lock_strategy;
821       default:
822         return &m_object_lock_strategy;
823     }
824   }
825 
826  public:
827   /**
828     Number of granted or waiting lock requests of "obtrusive" type.
829     Also includes "obtrusive" lock requests for which we about to check
830     if they can be granted.
831 
832 
833     @sa MDL_lock::get_unobtrusive_lock_increment() description.
834 
835     @note This number doesn't include "unobtrusive" locks which were acquired
836           using "slow path".
837   */
838   uint m_obtrusive_locks_granted_waiting_count;
839   /**
840     Flag in MDL_lock::m_fast_path_state that indicates that the MDL_lock
841     object was marked for destruction and will be destroyed once all threads
842     referencing to it through hazard pointers have unpinned it.
843     Set using atomic compare-and-swap AND under protection of
844     MDL_lock::m_rwlock lock.
845     Thanks to this can be read either by using atomic compare-and-swap OR
846     using ordinary read under protection of MDL_lock::m_rwlock lock.
847   */
848   static const fast_path_state_t IS_DESTROYED = 1ULL << 62;
849   /**
850     Flag in MDL_lock::m_fast_path_state that indicates that there are
851     "obtrusive" locks which are granted, waiting or for which we are
852     about to check if they can be granted.
853     Corresponds to "MDL_lock::m_obtrusive_locks_granted_waiting_count == 0"
854     predicate.
855     Set using atomic compare-and-swap AND under protection of
856     MDL_lock::m_rwlock lock.
857     Thanks to this can be read either by using atomic compare-and-swap OR
858     using ordinary read under protection of MDL_lock::m_rwlock lock.
859 
860     Invariant [INV1]: When this flag is set all changes to m_fast_path_state
861     member has to be done under protection of m_rwlock lock.
862   */
863   static const fast_path_state_t HAS_OBTRUSIVE = 1ULL << 61;
864   /**
865     Flag in MDL_lock::m_fast_path_state that indicates that there are
866     "slow" path locks which are granted, waiting or for which we are
867     about to check if they can be granted.
868     Corresponds to MDL_lock::m_granted/m_waiting lists being non-empty
869     (except special case in MDL_context::try_acquire_lock()).
870     Set using atomic compare-and-swap AND under protection of m_rwlock
871     lock. The latter is necessary because value of this flag needs to be
872     synchronized with contents of MDL_lock::m_granted/m_waiting lists.
873   */
874   static const fast_path_state_t HAS_SLOW_PATH = 1ULL << 60;
875   /**
876     Combination of IS_DESTROYED/HAS_OBTRUSIVE/HAS_SLOW_PATH flags and packed
877     counters of specific types of "unobtrusive" locks which were granted using
878     "fast path".
879 
880     @see MDL_scoped_lock::m_unobtrusive_lock_increment and
881         @see MDL_object_lock::m_unobtrusive_lock_increment for details about how
882         counts of different types of locks are packed into this field.
883 
884     @note Doesn't include "unobtrusive" locks granted using "slow path".
885 
886     @note We use combination of atomic operations and protection by
887           MDL_lock::m_rwlock lock to work with this member:
888 
889           * Write and Read-Modify-Write operations are always carried out
890             atomically. This is necessary to avoid lost updates on 32-bit
891             platforms among other things.
892           * In some cases Reads can be done non-atomically because we don't
893             really care about value which they will return (for example,
894             if further down the line there will be an atomic compare-and-swap
895             operation, which will validate this value and provide the correct
896             value if the validation will fail).
897           * In other cases Reads can be done non-atomically since they happen
898             under protection of MDL_lock::m_rwlock and there is some invariant
899             which ensures that concurrent updates of the m_fast_path_state
900             member can't happen while  MDL_lock::m_rwlock is held
901             (@sa IS_DESTROYED, HAS_OBTRUSIVE, HAS_SLOW_PATH).
902 
903     @note IMPORTANT!!!
904           In order to enforce the above rules and other invariants,
905           MDL_lock::m_fast_path_state should not be updated directly.
906           Use fast_path_state_cas()/add()/reset() wrapper methods instead.
907   */
908   std::atomic<fast_path_state_t> m_fast_path_state;
909 
910   /**
911     Wrapper for atomic compare-and-swap operation on m_fast_path_state member
912     which enforces locking and other invariants.
913   */
fast_path_state_cas(fast_path_state_t * old_state,fast_path_state_t new_state)914   bool fast_path_state_cas(fast_path_state_t *old_state,
915                            fast_path_state_t new_state) {
916     /*
917       IS_DESTROYED, HAS_OBTRUSIVE and HAS_SLOW_PATH flags can be set or
918       cleared only while holding MDL_lock::m_rwlock lock.
919       If HAS_SLOW_PATH flag is set all changes to m_fast_path_state
920       should happen under protection of MDL_lock::m_rwlock ([INV1]).
921     */
922 #if !defined(DBUG_OFF)
923     if (((*old_state & (IS_DESTROYED | HAS_OBTRUSIVE | HAS_SLOW_PATH)) !=
924          (new_state & (IS_DESTROYED | HAS_OBTRUSIVE | HAS_SLOW_PATH))) ||
925         *old_state & HAS_OBTRUSIVE) {
926       mysql_prlock_assert_write_owner(&m_rwlock);
927     }
928 #endif
929     /*
930       We should not change state of destroyed object
931       (fast_path_state_reset() being exception).
932     */
933     DBUG_ASSERT(!(*old_state & IS_DESTROYED));
934 
935     return atomic_compare_exchange_strong(&m_fast_path_state, old_state,
936                                           new_state);
937   }
938 
939   /**
940     Wrapper for atomic add operation on m_fast_path_state member
941     which enforces locking and other invariants.
942   */
fast_path_state_add(fast_path_state_t value)943   fast_path_state_t fast_path_state_add(fast_path_state_t value) {
944     /*
945       Invariant [INV1] requires all changes to m_fast_path_state happen
946       under protection of m_rwlock if HAS_OBTRUSIVE flag is set.
947       Since this operation doesn't check this flag it can be called only
948       under protection of m_rwlock.
949     */
950     mysql_prlock_assert_write_owner(&m_rwlock);
951 
952     fast_path_state_t old_state = m_fast_path_state.fetch_add(value);
953 
954     /*
955       We should not change state of destroyed object
956       (fast_path_state_reset() being exception).
957     */
958     DBUG_ASSERT(!(old_state & IS_DESTROYED));
959     return old_state;
960   }
961 
962   /**
963     Wrapper for resetting m_fast_path_state enforcing locking invariants.
964   */
fast_path_state_reset()965   void fast_path_state_reset() {
966     /* HAS_DESTROYED flag can be cleared only under protection of m_rwlock. */
967     mysql_prlock_assert_write_owner(&m_rwlock);
968     m_fast_path_state.store(0);
969   }
970 
971   /**
972     Pointer to strategy object which defines how different types of lock
973     requests should be handled for the namespace to which this lock belongs.
974     @sa MDL_lock::m_scoped_lock_strategy and MDL_lock:m_object_lock_strategy.
975   */
976   const MDL_lock_strategy *m_strategy;
977 
978   /**
979     Get bitmap of "unobtrusive" locks granted using "fast path" algorithm
980     for scoped locks.
981 
982     @sa MDL_lock::fast_path_granted_bitmap() for explanation about why it
983         is safe to use non-atomic read of MDL_lock::m_fast_path_state here.
984   */
scoped_lock_fast_path_granted_bitmap(const MDL_lock & lock)985   static bitmap_t scoped_lock_fast_path_granted_bitmap(const MDL_lock &lock) {
986     return (lock.m_fast_path_state.load() & 0xFFFFFFFFFFFFFFFULL)
987                ? MDL_BIT(MDL_INTENTION_EXCLUSIVE)
988                : 0;
989   }
990 
991   /**
992     Check if we are requesting X lock on the object, so threads holding
993     conflicting S/SH metadata locks on it need to be notified.
994 
995     @sa MDL_lock::object_lock_notify_conflicting_locks.
996   */
object_lock_needs_notification(const MDL_ticket * ticket)997   static bool object_lock_needs_notification(const MDL_ticket *ticket) {
998     return (ticket->get_type() == MDL_EXCLUSIVE);
999   }
1000   static void object_lock_notify_conflicting_locks(MDL_context *ctx,
1001                                                    MDL_lock *lock);
1002   /**
1003     Get bitmap of "unobtrusive" locks granted using "fast path" algorithm
1004     for per-object locks.
1005 
1006     @sa MDL_lock::fast_path_granted_bitmap() for explanation about why it
1007         is safe to use non-atomic read of MDL_lock::m_fast_path_state here.
1008   */
object_lock_fast_path_granted_bitmap(const MDL_lock & lock)1009   static bitmap_t object_lock_fast_path_granted_bitmap(const MDL_lock &lock) {
1010     bitmap_t result = 0;
1011     fast_path_state_t fps = lock.m_fast_path_state;
1012     if (fps & 0xFFFFFULL) result |= MDL_BIT(MDL_SHARED);
1013     if (fps & (0xFFFFFULL << 20)) result |= MDL_BIT(MDL_SHARED_READ);
1014     if (fps & (0xFFFFFULL << 40)) result |= MDL_BIT(MDL_SHARED_WRITE);
1015     return result;
1016   }
1017 
1018   /**
1019     Check if MDL_lock object represents user-level lock,locking service
1020     lock or acl cache lock, so threads waiting for it need to check if
1021     connection is lost and abort waiting when it is.
1022   */
object_lock_needs_connection_check(const MDL_lock * lock)1023   static bool object_lock_needs_connection_check(const MDL_lock *lock) {
1024     return (lock->key.mdl_namespace() == MDL_key::USER_LEVEL_LOCK ||
1025             lock->key.mdl_namespace() == MDL_key::LOCKING_SERVICE ||
1026             lock->key.mdl_namespace() == MDL_key::ACL_CACHE);
1027   }
1028 
1029   /**
1030     Bitmap with "hog" lock types for object locks.
1031 
1032     Locks of these types can easily starve out lower priority locks.
1033     To prevent this we only grant them max_write_lock_count times in
1034     a row while other lock types are waiting.
1035   */
1036   static const bitmap_t MDL_OBJECT_HOG_LOCK_TYPES =
1037       (MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
1038        MDL_BIT(MDL_EXCLUSIVE));
1039 
1040   static const MDL_lock_strategy m_scoped_lock_strategy;
1041   static const MDL_lock_strategy m_object_lock_strategy;
1042 };
1043 
1044 static MDL_map mdl_locks;
1045 
mdl_locks_key(const uchar * record,size_t * length)1046 static const uchar *mdl_locks_key(const uchar *record, size_t *length) {
1047   const MDL_lock *lock = pointer_cast<const MDL_lock *>(record);
1048   *length = lock->key.length();
1049   return lock->key.ptr();
1050 }
1051 
1052 /**
1053   Initialize the metadata locking subsystem.
1054 
1055   This function is called at server startup.
1056 
1057   In particular, initializes the new global mutex and
1058   the associated condition variable: LOCK_mdl and COND_mdl.
1059   These locking primitives are implementation details of the MDL
1060   subsystem and are private to it.
1061 */
1062 
mdl_init()1063 void mdl_init() {
1064   DBUG_ASSERT(!mdl_initialized);
1065   mdl_initialized = true;
1066 
1067 #ifdef HAVE_PSI_INTERFACE
1068   init_mdl_psi_keys();
1069 #endif
1070 
1071   mdl_locks.init();
1072 }
1073 
1074 /**
1075   Release resources of metadata locking subsystem.
1076 
1077   Destroys the global mutex and the condition variable.
1078   Called at server shutdown.
1079 */
1080 
mdl_destroy()1081 void mdl_destroy() {
1082   if (mdl_initialized) {
1083     mdl_initialized = false;
1084     mdl_locks.destroy();
1085   }
1086 }
1087 
1088 /**
1089   Get number of unused MDL_lock objects in MDL_map cache.
1090   Mostly needed for unit-testing.
1091 */
1092 
mdl_get_unused_locks_count()1093 int32 mdl_get_unused_locks_count() {
1094   return mdl_locks.get_unused_locks_count();
1095 }
1096 
1097 extern "C" {
mdl_lock_cons(uchar * arg)1098 static void mdl_lock_cons(uchar *arg) {
1099   new (arg + LF_HASH_OVERHEAD) MDL_lock();
1100 }
1101 
mdl_lock_dtor(uchar * arg)1102 static void mdl_lock_dtor(uchar *arg) {
1103   MDL_lock *lock = (MDL_lock *)(arg + LF_HASH_OVERHEAD);
1104   lock->~MDL_lock();
1105 }
1106 
mdl_lock_reinit(uchar * dst_arg,const uchar * src_arg)1107 static void mdl_lock_reinit(uchar *dst_arg, const uchar *src_arg) {
1108   MDL_lock *dst = (MDL_lock *)dst_arg;
1109   const MDL_key *src = (const MDL_key *)src_arg;
1110   dst->reinit(src);
1111 }
1112 
1113 /**
1114   Adapter function which allows to use murmur3 with LF_HASH implementation.
1115 */
1116 
murmur3_adapter(const LF_HASH *,const uchar * key,size_t length)1117 static uint murmur3_adapter(const LF_HASH *, const uchar *key, size_t length) {
1118   return murmur3_32(key, length, 0);
1119 }
1120 
1121 } /* extern "C" */
1122 
1123 /** Initialize the container for all MDL locks. */
1124 
init()1125 void MDL_map::init() {
1126   MDL_key global_lock_key(MDL_key::GLOBAL, "", "");
1127   MDL_key commit_lock_key(MDL_key::COMMIT, "", "");
1128   MDL_key acl_cache_lock_key(MDL_key::ACL_CACHE, "", "");
1129   MDL_key backup_lock_key(MDL_key::BACKUP_LOCK, "", "");
1130 
1131   m_global_lock = MDL_lock::create(&global_lock_key);
1132   m_commit_lock = MDL_lock::create(&commit_lock_key);
1133   m_acl_cache_lock = MDL_lock::create(&acl_cache_lock_key);
1134   m_backup_lock = MDL_lock::create(&backup_lock_key);
1135 
1136   m_unused_lock_objects = 0;
1137 
1138   lf_hash_init2(&m_locks, sizeof(MDL_lock), LF_HASH_UNIQUE, 0, 0, mdl_locks_key,
1139                 &my_charset_bin, &murmur3_adapter, &mdl_lock_cons,
1140                 &mdl_lock_dtor, &mdl_lock_reinit);
1141 }
1142 
1143 /**
1144   Destroy the container for all MDL locks.
1145   @pre It must be empty.
1146 */
1147 
destroy()1148 void MDL_map::destroy() {
1149   MDL_lock::destroy(m_global_lock);
1150   MDL_lock::destroy(m_commit_lock);
1151   MDL_lock::destroy(m_acl_cache_lock);
1152   MDL_lock::destroy(m_backup_lock);
1153 
1154   lf_hash_destroy(&m_locks);
1155 }
1156 
1157 /**
1158   Find MDL_lock object corresponding to the key.
1159 
1160   @param[in,out]  pins     LF_PINS to be used for pinning pointers during
1161                            look-up and returned MDL_lock object.
1162   @param[in]      mdl_key  Key for which MDL_lock object needs to be found.
1163   @param[out]     pinned   true  - if MDL_lock object is pinned,
1164                            false - if MDL_lock object doesn't require pinning
1165                                    (i.e. it is an object for GLOBAL, COMMIT or
1166                                    ACL_CACHE namespaces).
1167 
1168   @retval MY_LF_ERRPTR   - Failure (OOM)
1169   @retval other-non-NULL - MDL_lock object found.
1170   @retval NULL           - Object not found.
1171 */
1172 
find(LF_PINS * pins,const MDL_key * mdl_key,bool * pinned)1173 MDL_lock *MDL_map::find(LF_PINS *pins, const MDL_key *mdl_key, bool *pinned) {
1174   MDL_lock *lock = nullptr;
1175 
1176   if (is_lock_object_singleton(mdl_key)) {
1177     /*
1178       Avoid look up in m_locks hash when lock for GLOBAL, COMMIT or ACL_CACHE
1179       namespace is requested. Return pointer to pre-allocated MDL_lock instance
1180       instead. Such an optimization allows us to avoid a few atomic operations
1181       for any statement changing data.
1182 
1183       It works since these namespaces contain only one element so keys
1184       for them look like '<namespace-id>\0\0'.
1185     */
1186     DBUG_ASSERT(mdl_key->length() == 3);
1187 
1188     switch (mdl_key->mdl_namespace()) {
1189       case MDL_key::GLOBAL:
1190         lock = m_global_lock;
1191         break;
1192       case MDL_key::COMMIT:
1193         lock = m_commit_lock;
1194         break;
1195       case MDL_key::ACL_CACHE:
1196         lock = m_acl_cache_lock;
1197         break;
1198       case MDL_key::BACKUP_LOCK:
1199         lock = m_backup_lock;
1200         break;
1201       default:
1202         DBUG_ASSERT(false);
1203     }
1204 
1205     *pinned = false;
1206 
1207     return lock;
1208   }
1209 
1210   lock = static_cast<MDL_lock *>(
1211       lf_hash_search(&m_locks, pins, mdl_key->ptr(), mdl_key->length()));
1212 
1213   if (lock == nullptr || lock == MY_LF_ERRPTR) {
1214     lf_hash_search_unpin(pins);
1215     *pinned = false;  // Avoid warnings on older compilers.
1216     return lock;
1217   }
1218 
1219   *pinned = true;
1220 
1221   return lock;
1222 }
1223 
1224 /**
1225   Find MDL_lock object corresponding to the key, create it
1226   if it does not exist.
1227 
1228   @param[in,out]  pins     LF_PINS to be used for pinning pointers during
1229                            look-up and returned MDL_lock object.
1230   @param[in]      mdl_key  Key for which MDL_lock object needs to be found.
1231   @param[out]     pinned   true  - if MDL_lock object is pinned,
1232                            false - if MDL_lock object doesn't require pinning
1233                                    (i.e. it is an object for GLOBAL, COMMIT or
1234                                    ACL_CACHE namespaces).
1235 
1236   @retval non-NULL - Success. MDL_lock instance for the key with
1237                      locked MDL_lock::m_rwlock.
1238   @retval NULL     - Failure (OOM).
1239 */
1240 
find_or_insert(LF_PINS * pins,const MDL_key * mdl_key,bool * pinned)1241 MDL_lock *MDL_map::find_or_insert(LF_PINS *pins, const MDL_key *mdl_key,
1242                                   bool *pinned) {
1243   MDL_lock *lock = nullptr;
1244 
1245   while ((lock = find(pins, mdl_key, pinned)) == nullptr) {
1246     /*
1247       MDL_lock for key isn't present in hash, try to insert new object.
1248       This can fail due to concurrent inserts.
1249     */
1250     int rc = lf_hash_insert(&m_locks, pins, mdl_key);
1251     if (rc == -1) /* If OOM. */
1252       return nullptr;
1253     else if (rc == 0) {
1254       /*
1255         New MDL_lock object is not used yet. So we need to
1256         increment number of unused lock objects.
1257       */
1258       ++m_unused_lock_objects;
1259     }
1260   }
1261   if (lock == MY_LF_ERRPTR) {
1262     /* If OOM in lf_hash_search. */
1263     return nullptr;
1264   }
1265 
1266   return lock;
1267 }
1268 
1269 extern "C" {
1270 /**
1271   Helper function which allows to check if MDL_lock object stored in LF_HASH
1272   is unused - i.e. doesn't have any locks on both "fast" and "slow" paths
1273   and is not marked as deleted.
1274 */
mdl_lock_match_unused(const uchar * arg)1275 static int mdl_lock_match_unused(const uchar *arg) {
1276   const MDL_lock *lock = (const MDL_lock *)arg;
1277   /*
1278     It is OK to check MDL_lock::m_fast_path_state non-atomically here
1279     since the fact that MDL_lock object is unused will be properly
1280     validated later anyway.
1281   */
1282   return (lock->m_fast_path_state.load() == 0);
1283 }
1284 } /* extern "C" */
1285 
1286 /**
1287   Try to find random MDL_lock object in MDL_map for which there are no "fast"
1288   path nor "slow" path locks. If found - mark it as destroyed, remove object
1289   from MDL_map and return it back to allocator.
1290 
1291   @param[in]     ctx           Context on which behalf we are trying to remove
1292                                unused object. Primarily needed to generate
1293                                random value to be used for random dive into
1294                                the hash in MDL_map.
1295   @param[in,out] pins          Pins for the calling thread to be used for
1296                                hash lookup and deletion.
1297   @param[out]    unused_locks  Number of unused lock objects after operation.
1298 
1299   @note
1300     In reality MDL_lock object will be returned to allocator once it is no
1301     longer pinned by any threads.
1302 */
1303 
remove_random_unused(MDL_context * ctx,LF_PINS * pins,int32 * unused_locks)1304 void MDL_map::remove_random_unused(MDL_context *ctx, LF_PINS *pins,
1305                                    int32 *unused_locks) {
1306   DEBUG_SYNC(ctx->get_thd(), "mdl_remove_random_unused_before_search");
1307 
1308   /*
1309     Try to find an unused MDL_lock object by doing random dive into the hash.
1310     Since this method is called only when unused/total lock objects ratio is
1311     high enough, there is a good chance for this technique to succeed.
1312   */
1313   MDL_lock *lock = static_cast<MDL_lock *>(lf_hash_random_match(
1314       &m_locks, pins, &mdl_lock_match_unused, ctx->get_random()));
1315 
1316   if (lock == nullptr || lock == MY_LF_ERRPTR) {
1317     /*
1318       We were unlucky and no unused objects were found. This can happen,
1319       for example, if our random dive into LF_HASH was close to the tail
1320       of split-ordered list used in its implementation or if some other
1321       thread managed to destroy or start re-using MDL_lock object
1322       concurrently.
1323      */
1324     lf_hash_search_unpin(pins);
1325     *unused_locks = m_unused_lock_objects;
1326     return;
1327   }
1328 
1329   DEBUG_SYNC(ctx->get_thd(), "mdl_remove_random_unused_after_search");
1330 
1331   /*
1332     Acquire MDL_lock::m_rwlock to ensure that IS_DESTROYED flag is set
1333     atomically AND under protection of MDL_lock::m_rwlock, so it can be
1334     safely read using both atomics and ordinary read under protection of
1335     m_rwlock. This also means that it is safe to unpin MDL_lock object
1336     after we have checked its IS_DESTROYED flag if we keep m_rwlock lock.
1337   */
1338   mysql_prlock_wrlock(&lock->m_rwlock);
1339 
1340   if (lock->m_fast_path_state & MDL_lock::IS_DESTROYED) {
1341     /*
1342       Somebody has managed to mark MDL_lock object as destroyed before
1343       we have acquired MDL_lock::m_rwlock.
1344     */
1345     mysql_prlock_unlock(&lock->m_rwlock);
1346     lf_hash_search_unpin(pins);
1347     *unused_locks = m_unused_lock_objects;
1348     return;
1349   }
1350   lf_hash_search_unpin(pins);
1351 
1352   /*
1353     Atomically check that number of "fast path" and "slow path" locks is 0 and
1354     set IS_DESTROYED flag.
1355 
1356     This is the only place where we rely on the fact that our compare-and-swap
1357     operation can't spuriously fail i.e. is of strong kind.
1358   */
1359   MDL_lock::fast_path_state_t old_state = 0;
1360 
1361   if (lock->fast_path_state_cas(&old_state, MDL_lock::IS_DESTROYED)) {
1362     /*
1363       There were no "fast path" or "slow path" references and we
1364       have successfully set IS_DESTROYED flag.
1365     */
1366     mysql_prlock_unlock(&lock->m_rwlock);
1367 
1368     DEBUG_SYNC(ctx->get_thd(),
1369                "mdl_remove_random_unused_after_is_destroyed_set");
1370 
1371     /*
1372       Even though other threads can't rely on the MDL_lock object being around
1373       once IS_DESTROYED flag is set, we know that it was not removed from
1374       the hash yet (as it is responsibility of the current thread, i.e. one
1375       which executes this MDL_map::remove_random_unused() call) and thus was
1376       not deallocated.
1377       And since lf_hash_delete() finds and pins the object for the key as its
1378       first step and keeps pins until its end it is safe to use MDL_lock::key
1379       as parameter to lf_hash_delete().
1380     */
1381     int rc =
1382         lf_hash_delete(&m_locks, pins, lock->key.ptr(), lock->key.length());
1383 
1384     /* The MDL_lock object must be present in the hash. */
1385     DBUG_ASSERT(rc != 1);
1386 
1387     if (rc == -1) {
1388       /*
1389         In unlikely case of OOM MDL_lock object stays in the hash. The best
1390         thing we can do is to reset IS_DESTROYED flag. The object will be
1391         destroyed either by further calls to lf_hash_delete() or by final
1392         call to lf_hash_destroy().
1393         Resetting needs to happen atomically AND under protection of
1394         MDL_lock::m_rwlock so it safe to read this flag both using atomics
1395         and ordinary reads under protection of m_rwlock lock.
1396       */
1397       mysql_prlock_wrlock(&lock->m_rwlock);
1398       lock->fast_path_state_reset();
1399       mysql_prlock_unlock(&lock->m_rwlock);
1400     } else {
1401       /* Success. */
1402       *unused_locks = --m_unused_lock_objects;
1403     }
1404   } else {
1405     /*
1406       Some other thread has managed to find and use this MDL_lock object after
1407       it has been found by the above call to lf_hash_random_match().
1408       There are/were "fast" or "slow path" references so MDL_lock object can't
1409       be deleted.
1410 
1411       Assert that compare-and-swap operation is of strong kind and can't
1412       fail spuriously.
1413     */
1414     DBUG_ASSERT(old_state != 0);
1415     mysql_prlock_unlock(&lock->m_rwlock);
1416     *unused_locks = m_unused_lock_objects;
1417   }
1418 }
1419 
1420 /**
1421   Initialize a metadata locking context.
1422 
1423   This is to be called when a new server connection is created.
1424 */
1425 
MDL_context()1426 MDL_context::MDL_context()
1427     : m_owner(nullptr),
1428       m_needs_thr_lock_abort(false),
1429       m_force_dml_deadlock_weight(false),
1430       m_waiting_for(nullptr),
1431       m_pins(nullptr),
1432       m_rand_state(UINT_MAX32) {
1433   mysql_prlock_init(key_MDL_context_LOCK_waiting_for, &m_LOCK_waiting_for);
1434 }
1435 
1436 /**
1437   Destroy metadata locking context.
1438 
1439   Assumes and asserts that there are no active or pending locks
1440   associated with this context at the time of the destruction.
1441 
1442   Currently does nothing. Asserts that there are no pending
1443   or satisfied lock requests. The pending locks must be released
1444   prior to destruction. This is a new way to express the assertion
1445   that all tables are closed before a connection is destroyed.
1446 */
1447 
destroy()1448 void MDL_context::destroy() {
1449   DBUG_ASSERT(m_ticket_store.is_empty());
1450 
1451   mysql_prlock_destroy(&m_LOCK_waiting_for);
1452   if (m_pins) lf_hash_put_pins(m_pins);
1453 }
1454 
1455 /**
1456   Allocate pins which are necessary to work with MDL_map container
1457   if they are not allocated already.
1458 
1459   @return true with error in DA if pinbox is exhausted, false otherwise.
1460 */
1461 
fix_pins()1462 bool MDL_context::fix_pins() {
1463   if (!m_pins) m_pins = mdl_locks.get_pins();
1464   if (m_pins == nullptr) {
1465     my_error(ER_MDL_OUT_OF_RESOURCES, MYF(0));
1466     return true;
1467   }
1468   return false;
1469 }
1470 
1471 /**
1472   Initialize a lock request.
1473 
1474   This is to be used for every lock request.
1475 
1476   Note that initialization and allocation are split into two
1477   calls. This is to allow flexible memory management of lock
1478   requests. Normally a lock request is stored in statement memory
1479   (e.g. is a member of struct TABLE_LIST), but we would also like
1480   to allow allocation of lock requests in other memory roots,
1481   for example in the grant subsystem, to lock privilege tables.
1482 
1483   The MDL subsystem does not own or manage memory of lock requests.
1484 
1485   @param  mdl_namespace  Id of namespace of object to be locked
1486   @param  db_arg         Name of database to which the object belongs
1487   @param  name_arg       Name of of the object
1488   @param  mdl_type_arg   The MDL lock type for the request.
1489   @param  mdl_duration_arg   The MDL duration for the request.
1490   @param  src_file       Source file name issuing the request.
1491   @param  src_line       Source line number issuing the request.
1492 */
1493 
init_with_source(MDL_key::enum_mdl_namespace mdl_namespace,const char * db_arg,const char * name_arg,enum_mdl_type mdl_type_arg,enum_mdl_duration mdl_duration_arg,const char * src_file,uint src_line)1494 void MDL_request::init_with_source(MDL_key::enum_mdl_namespace mdl_namespace,
1495                                    const char *db_arg, const char *name_arg,
1496                                    enum_mdl_type mdl_type_arg,
1497                                    enum_mdl_duration mdl_duration_arg,
1498                                    const char *src_file, uint src_line) {
1499 #if !defined(DBUG_OFF)
1500   // Make sure all I_S tables (except ndb tables) are in CAPITAL letters.
1501   bool is_ndb_table = (name_arg && (strncmp(name_arg, "ndb", 3) == 0));
1502   DBUG_ASSERT(
1503       mdl_namespace != MDL_key::TABLE ||
1504       my_strcasecmp(system_charset_info, "information_schema", db_arg) ||
1505       is_ndb_table || !name_arg ||
1506       my_isupper(system_charset_info, name_arg[0]));
1507 #endif
1508 
1509   key.mdl_key_init(mdl_namespace, db_arg, name_arg);
1510   type = mdl_type_arg;
1511   duration = mdl_duration_arg;
1512   ticket = nullptr;
1513   m_src_file = src_file;
1514   m_src_line = src_line;
1515 }
1516 
1517 /**
1518   Initialize a lock request using pre-built MDL_key.
1519 
1520   @sa MDL_request::init(namespace, db, name, type).
1521 
1522   @param key_arg       The pre-built MDL key for the request.
1523   @param mdl_type_arg  The MDL lock type for the request.
1524   @param mdl_duration_arg   The MDL duration for the request.
1525   @param src_file      Source file name issuing the request.
1526   @param src_line      Source line number issuing the request.
1527 */
1528 
init_by_key_with_source(const MDL_key * key_arg,enum_mdl_type mdl_type_arg,enum_mdl_duration mdl_duration_arg,const char * src_file,uint src_line)1529 void MDL_request::init_by_key_with_source(const MDL_key *key_arg,
1530                                           enum_mdl_type mdl_type_arg,
1531                                           enum_mdl_duration mdl_duration_arg,
1532                                           const char *src_file, uint src_line) {
1533   key.mdl_key_init(key_arg);
1534   type = mdl_type_arg;
1535   duration = mdl_duration_arg;
1536   ticket = nullptr;
1537   m_src_file = src_file;
1538   m_src_line = src_line;
1539 }
1540 
1541 /**
1542   Initialize a lock request using partial MDL key.
1543 
1544   @sa MDL_request::init(namespace, db, name, type).
1545 
1546   @remark The partial key must be "<database>\0<name>\0".
1547 
1548   @param  namespace_arg       Id of namespace of object to be locked
1549   @param  part_key_arg        Partial key.
1550   @param  part_key_length_arg Partial key length
1551   @param  db_length_arg       Database name length.
1552   @param  mdl_type_arg        The MDL lock type for the request.
1553   @param  mdl_duration_arg    The MDL duration for the request.
1554   @param  src_file            Source file name issuing the request.
1555   @param  src_line            Source line number issuing the request.
1556 */
1557 
init_by_part_key_with_source(MDL_key::enum_mdl_namespace namespace_arg,const char * part_key_arg,size_t part_key_length_arg,size_t db_length_arg,enum_mdl_type mdl_type_arg,enum_mdl_duration mdl_duration_arg,const char * src_file,uint src_line)1558 void MDL_request::init_by_part_key_with_source(
1559     MDL_key::enum_mdl_namespace namespace_arg, const char *part_key_arg,
1560     size_t part_key_length_arg, size_t db_length_arg,
1561     enum_mdl_type mdl_type_arg, enum_mdl_duration mdl_duration_arg,
1562     const char *src_file, uint src_line) {
1563   key.mdl_key_init(namespace_arg, part_key_arg, part_key_length_arg,
1564                    db_length_arg);
1565   type = mdl_type_arg;
1566   duration = mdl_duration_arg;
1567   ticket = nullptr;
1568   m_src_file = src_file;
1569   m_src_line = src_line;
1570 }
1571 
1572 /**
1573   Auxiliary functions needed for creation/destruction of MDL_lock objects.
1574 */
1575 
create(const MDL_key * mdl_key)1576 inline MDL_lock *MDL_lock::create(const MDL_key *mdl_key) {
1577   MDL_lock *result = new (std::nothrow) MDL_lock();
1578   if (result) result->reinit(mdl_key);
1579   return result;
1580 }
1581 
destroy(MDL_lock * lock)1582 void MDL_lock::destroy(MDL_lock *lock) { delete lock; }
1583 
1584 /**
1585   Finalize initialization or re-initialize MDL_lock returned from
1586   LF_ALLOCATOR's cache to represent object identified by provided key.
1587 
1588   @note All non-static MDL_lock members:
1589         1) either have to be reinitialized here
1590            (like IS_DESTROYED flag in MDL_lock::m_fast_path_state).
1591         2) or need to be initialized in constructor AND returned to their
1592            pristine state once they are removed from MDL_map container
1593            (like MDL_lock::m_granted or MDL_lock::m_rwlock).
1594            Otherwise it is possible that we will end up in situation
1595            when "new" (actually reused) MDL_lock object inserted in
1596            LF_HASH will inherit some values from old object.
1597 */
1598 
reinit(const MDL_key * mdl_key)1599 inline void MDL_lock::reinit(const MDL_key *mdl_key) {
1600   key.mdl_key_init(mdl_key);
1601   m_strategy = MDL_lock::get_strategy(*mdl_key);
1602   m_hog_lock_count = 0;
1603   m_piglet_lock_count = 0;
1604   m_current_waiting_incompatible_idx = 0;
1605   m_fast_path_state = 0;
1606   /*
1607     Check that we have clean "m_granted" and "m_waiting" sets/lists in both
1608     cases when we have fresh and re-used object.
1609   */
1610   DBUG_ASSERT(m_granted.is_empty() && m_waiting.is_empty());
1611   /* The same should be true for "m_obtrusive_locks_granted_waiting_count". */
1612   DBUG_ASSERT(m_obtrusive_locks_granted_waiting_count == 0);
1613 }
1614 
1615 /**
1616   @returns "Fast path" increment for request for "unobtrusive" type
1617             of lock, 0 - if it is request for "obtrusive" type of
1618             lock.
1619 
1620   @sa Description at method declaration for more details.
1621 */
1622 
get_unobtrusive_lock_increment(const MDL_request * request)1623 MDL_lock::fast_path_state_t MDL_lock::get_unobtrusive_lock_increment(
1624     const MDL_request *request) {
1625   return MDL_lock::get_strategy(request->key)
1626       ->m_unobtrusive_lock_increment[request->type];
1627 }
1628 
1629 /**
1630   Indicates whether object belongs to namespace which requires storage engine
1631   to be notified before acquiring and after releasing exclusive lock.
1632 */
1633 
needs_hton_notification(MDL_key::enum_mdl_namespace mdl_namespace)1634 bool MDL_lock::needs_hton_notification(
1635     MDL_key::enum_mdl_namespace mdl_namespace) {
1636   switch (mdl_namespace) {
1637     case MDL_key::TABLESPACE:
1638     case MDL_key::SCHEMA:
1639     case MDL_key::TABLE:
1640     case MDL_key::FUNCTION:
1641     case MDL_key::PROCEDURE:
1642     case MDL_key::TRIGGER:
1643     case MDL_key::EVENT:
1644       return true;
1645     default:
1646       return false;
1647   }
1648 }
1649 
1650 /**
1651   Auxiliary functions needed for creation/destruction of MDL_ticket
1652   objects.
1653 
1654   @todo This naive implementation should be replaced with one that saves
1655         on memory allocation by reusing released objects.
1656 */
1657 
create(MDL_context * ctx_arg,enum_mdl_type type_arg,enum_mdl_duration duration_arg)1658 MDL_ticket *MDL_ticket::create(MDL_context *ctx_arg, enum_mdl_type type_arg
1659 #ifndef DBUG_OFF
1660                                ,
1661                                enum_mdl_duration duration_arg
1662 #endif
1663 ) {
1664   return new (std::nothrow) MDL_ticket(ctx_arg, type_arg
1665 #ifndef DBUG_OFF
1666                                        ,
1667                                        duration_arg
1668 #endif
1669   );
1670 }
1671 
destroy(MDL_ticket * ticket)1672 void MDL_ticket::destroy(MDL_ticket *ticket) {
1673   mysql_mdl_destroy(ticket->m_psi);
1674   ticket->m_psi = nullptr;
1675 
1676   delete ticket;
1677 }
1678 
1679 /**
1680   Return the 'weight' of this ticket for the victim selection algorithm.
1681   Requests with lower weight are preferred to requests with higher weight
1682   when choosing a victim.
1683 
1684   @note When MDL_context::m_force_dml_deadlock_weight is set, the return value
1685         of this method is ignored and DEADLOCK_WEIGHT_DML is used for the
1686         context.
1687 */
1688 
get_deadlock_weight() const1689 uint MDL_ticket::get_deadlock_weight() const {
1690   /*
1691     Waits for user-level locks have lower weight than waits for locks
1692     typically acquired by DDL, so we don't abort DDL in case of deadlock
1693     involving user-level locks and DDL. Deadlock errors are not normally
1694     expected from DDL by users.
1695     Waits for user-level locks have higher weight than waits for locks
1696     acquired by DML, so we prefer to abort DML in case of deadlock involving
1697     user-level locks and DML. User-level locks are explicitly requested by
1698     user, so they are probably important for them. Plus users expect
1699     deadlocks from DML transactions and for DML statements executed in
1700     @@autocommit=1 mode back-off and retry algorithm hides deadlock errors.
1701   */
1702   if (m_lock->key.mdl_namespace() == MDL_key::USER_LEVEL_LOCK)
1703     return DEADLOCK_WEIGHT_ULL;
1704 
1705   /*
1706     Locks higher or equal to MDL_SHARED_UPGRADABLE:
1707     *) Are typically acquired for DDL and LOCK TABLES statements.
1708     *) Are often acquired in a way which doesn't allow simple release of
1709        locks and restart of lock acquisition process in case of deadlock
1710        (e.g. through lock_table_names() call).
1711 
1712     To avoid such statements getting aborted with ER_LOCK_DEADLOCK error
1713     we use the higher DEADLOCK_WEIGHT_DDL weight for them.
1714 
1715     Note that two DDL statements should not typically deadlock with each
1716     other since they normally acquire locks in the same order, thanks to
1717     to the fact that lock_table_names() uses MDL_context::acquire_locks()
1718     method which sorts lock requests before trying to acquire them.
1719 
1720     In cases when "strong" locks can be acquired out-of-order (e.g. for
1721     LOCK TABLES) we try to use DEADLOCK_WEIGHT_DML instead.
1722 
1723     TODO/FIXME: The below condition needs to be updated. The fact that a
1724                 lock from GLOBAL namespace is requested no longer means
1725                 that this is a DDL statement. There is a bug report about
1726                 this.
1727   */
1728   if (m_lock->key.mdl_namespace() == MDL_key::GLOBAL ||
1729       m_type >= MDL_SHARED_UPGRADABLE)
1730     return DEADLOCK_WEIGHT_DDL;
1731 
1732   return DEADLOCK_WEIGHT_DML;
1733 }
1734 
1735 /** Construct an empty wait slot. */
1736 
MDL_wait()1737 MDL_wait::MDL_wait() : m_wait_status(WS_EMPTY) {
1738   mysql_mutex_init(key_MDL_wait_LOCK_wait_status, &m_LOCK_wait_status, nullptr);
1739   mysql_cond_init(key_MDL_wait_COND_wait_status, &m_COND_wait_status);
1740 }
1741 
1742 /** Destroy system resources. */
1743 
~MDL_wait()1744 MDL_wait::~MDL_wait() {
1745   mysql_mutex_destroy(&m_LOCK_wait_status);
1746   mysql_cond_destroy(&m_COND_wait_status);
1747 }
1748 
1749 /**
1750   Set the status unless it's already set. Return false if set,
1751   true otherwise.
1752 */
1753 
set_status(enum_wait_status status_arg)1754 bool MDL_wait::set_status(enum_wait_status status_arg) {
1755   bool was_occupied = true;
1756   mysql_mutex_lock(&m_LOCK_wait_status);
1757   if (m_wait_status == WS_EMPTY) {
1758     was_occupied = false;
1759     m_wait_status = status_arg;
1760     mysql_cond_signal(&m_COND_wait_status);
1761   }
1762   mysql_mutex_unlock(&m_LOCK_wait_status);
1763   return was_occupied;
1764 }
1765 
1766 /** Query the current value of the wait slot. */
1767 
get_status()1768 MDL_wait::enum_wait_status MDL_wait::get_status() {
1769   enum_wait_status result;
1770   mysql_mutex_lock(&m_LOCK_wait_status);
1771   result = m_wait_status;
1772   mysql_mutex_unlock(&m_LOCK_wait_status);
1773   return result;
1774 }
1775 
1776 /** Clear the current value of the wait slot. */
1777 
reset_status()1778 void MDL_wait::reset_status() {
1779   mysql_mutex_lock(&m_LOCK_wait_status);
1780   m_wait_status = WS_EMPTY;
1781   mysql_mutex_unlock(&m_LOCK_wait_status);
1782 }
1783 
1784 /**
1785   Wait for the status to be assigned to this wait slot.
1786 
1787   @param owner           MDL context owner.
1788   @param abs_timeout     Absolute time after which waiting should stop.
1789   @param set_status_on_timeout true  - If in case of timeout waiting
1790                                        context should close the wait slot by
1791                                        sending TIMEOUT to itself.
1792                                false - Otherwise.
1793   @param wait_state_name  Thread state name to be set for duration of wait.
1794 
1795   @returns Signal posted.
1796 */
1797 
timed_wait(MDL_context_owner * owner,struct timespec * abs_timeout,bool set_status_on_timeout,const PSI_stage_info * wait_state_name)1798 MDL_wait::enum_wait_status MDL_wait::timed_wait(
1799     MDL_context_owner *owner, struct timespec *abs_timeout,
1800     bool set_status_on_timeout, const PSI_stage_info *wait_state_name) {
1801   PSI_stage_info old_stage;
1802   enum_wait_status result;
1803   int wait_result = 0;
1804 
1805   mysql_mutex_lock(&m_LOCK_wait_status);
1806 
1807   owner->ENTER_COND(&m_COND_wait_status, &m_LOCK_wait_status, wait_state_name,
1808                     &old_stage);
1809   thd_wait_begin(nullptr, THD_WAIT_META_DATA_LOCK);
1810   while (!m_wait_status && !owner->is_killed() && !is_timeout(wait_result)) {
1811     wait_result = mysql_cond_timedwait(&m_COND_wait_status, &m_LOCK_wait_status,
1812                                        abs_timeout);
1813   }
1814   thd_wait_end(nullptr);
1815 
1816   if (m_wait_status == WS_EMPTY) {
1817     /*
1818       Wait has ended not due to a status being set from another
1819       thread but due to this connection/statement being killed or a
1820       time out.
1821       To avoid races, which may occur if another thread sets
1822       GRANTED status before the code which calls this method
1823       processes the abort/timeout, we assign the status under
1824       protection of the m_LOCK_wait_status, within the critical
1825       section. An exception is when set_status_on_timeout is
1826       false, which means that the caller intends to restart the
1827       wait.
1828     */
1829     if (owner->is_killed())
1830       m_wait_status = KILLED;
1831     else if (set_status_on_timeout)
1832       m_wait_status = TIMEOUT;
1833   }
1834   result = m_wait_status;
1835 
1836   mysql_mutex_unlock(&m_LOCK_wait_status);
1837   owner->EXIT_COND(&old_stage);
1838 
1839   return result;
1840 }
1841 
1842 /**
1843   Clear bit corresponding to the type of metadata lock in bitmap representing
1844   set of such types if list of tickets does not contain ticket with such type.
1845 
1846   @param[in]      type    Type of metadata lock to look up in the list.
1847 */
1848 
clear_bit_if_not_in_list(enum_mdl_type type)1849 void MDL_lock::Ticket_list::clear_bit_if_not_in_list(enum_mdl_type type) {
1850   MDL_lock::Ticket_iterator it(m_list);
1851   const MDL_ticket *ticket;
1852 
1853   while ((ticket = it++))
1854     if (ticket->get_type() == type) return;
1855   m_bitmap &= ~MDL_BIT(type);
1856 }
1857 
1858 /**
1859   Add ticket to MDL_lock's list of waiting requests and
1860   update corresponding bitmap of lock types.
1861 */
1862 
add_ticket(MDL_ticket * ticket)1863 void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket) {
1864   /*
1865     Ticket being added to the list must have MDL_ticket::m_lock set,
1866     since for such tickets methods accessing this member might be
1867     called by other threads.
1868   */
1869   DBUG_ASSERT(ticket->get_lock());
1870   /*
1871     Add ticket to the *back* of the queue to ensure fairness
1872     among requests with the same priority.
1873   */
1874   m_list.push_back(ticket);
1875   m_bitmap |= MDL_BIT(ticket->get_type());
1876 }
1877 
1878 /**
1879   Remove ticket from MDL_lock's list of requests and
1880   update corresponding bitmap of lock types.
1881 */
1882 
remove_ticket(MDL_ticket * ticket)1883 void MDL_lock::Ticket_list::remove_ticket(MDL_ticket *ticket) {
1884   m_list.remove(ticket);
1885   /*
1886     Check if waiting queue has another ticket with the same type as
1887     one which was removed. If there is no such ticket, i.e. we have
1888     removed last ticket of particular type, then we need to update
1889     bitmap of waiting ticket's types.
1890     Note that in most common case, i.e. when shared lock is removed
1891     from waiting queue, we are likely to find ticket of the same
1892     type early without performing full iteration through the list.
1893     So this method should not be too expensive.
1894   */
1895   clear_bit_if_not_in_list(ticket->get_type());
1896 }
1897 
1898 /**
1899   Determine waiting contexts which requests for the lock can be
1900   satisfied, grant lock to them and wake them up.
1901 
1902   @note Together with MDL_lock::add_ticket() this method implements
1903         fair scheduling among requests with the same priority.
1904         It tries to grant lock from the head of waiters list, while
1905         add_ticket() adds new requests to the back of this list.
1906 
1907 */
1908 
reschedule_waiters()1909 void MDL_lock::reschedule_waiters() {
1910   MDL_lock::Ticket_iterator it(m_waiting);
1911   MDL_ticket *ticket;
1912 
1913   /*
1914     Find the first (and hence the oldest) waiting request which
1915     can be satisfied (taking into account priority). Grant lock to it.
1916     Repeat the process for the remainder of waiters.
1917     Note we don't need to re-start iteration from the head of the
1918     list after satisfying the first suitable request as in our case
1919     all compatible types of requests have the same priority.
1920 
1921     TODO/FIXME: We should:
1922                 - Either switch to scheduling without priorities
1923                   which will allow to stop iteration through the
1924                   list of waiters once we found the first ticket
1925                   which can't be  satisfied
1926                 - Or implement some check using bitmaps which will
1927                   allow to stop iteration in cases when, e.g., we
1928                   grant SNRW lock and there are no pending S or
1929                   SH locks.
1930   */
1931   while ((ticket = it++)) {
1932     if (can_grant_lock(ticket->get_type(), ticket->get_ctx())) {
1933       if (!ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED)) {
1934         /*
1935           Satisfy the found request by updating lock structures.
1936           It is OK to do so even after waking up the waiter since any
1937           session which tries to get any information about the state of
1938           this lock has to acquire MDL_lock::m_rwlock first and thus,
1939           when manages to do so, already sees an updated state of the
1940           MDL_lock object.
1941 
1942           It doesn't matter if we are dealing with "obtrusive" lock here,
1943           we are moving lock request from waiting to granted lists,
1944           so m_obtrusive_locks_granted_waiting_count should stay the same.
1945         */
1946         m_waiting.remove_ticket(ticket);
1947         m_granted.add_ticket(ticket);
1948 
1949         if (is_affected_by_max_write_lock_count()) {
1950           /*
1951             Increase the counter of successively granted high priority "hog" or
1952             "piglet" locks, if we have granted one and there are pending
1953             lower priority locks.
1954           */
1955           if (count_piglets_and_hogs(ticket->get_type())) {
1956             /*
1957               Switch of priority matrice might have unblocked some lower-prio
1958               locks which are still compatible with the lock type we just have
1959               granted (for example, when we grant SNW lock and there are pending
1960               requests of SR type). Restart iteration to wake them up, otherwise
1961               we might get deadlocks.
1962             */
1963             it.rewind();
1964             continue;
1965           }
1966         }
1967       }
1968       /*
1969         If we could not update the wait slot of the waiter,
1970         it can be due to fact that its connection/statement was
1971         killed or it has timed out (i.e. the slot is not empty).
1972         Since in all such cases the waiter assumes that the lock was
1973         not been granted, we should keep the request in the waiting
1974         queue and look for another request to reschedule.
1975       */
1976     }
1977   }
1978 
1979   if (is_affected_by_max_write_lock_count()) {
1980     /*
1981       Reset number of successively granted higher-prio "hog"/"piglet" locks
1982       if there are no pending lower-prio conflicting locks.
1983       This ensures:
1984       - That m_hog_lock_count/m_piglet_lock_count is correctly reset after
1985         a strong lock is released and weak locks are granted (or there are
1986         no other lock requests).
1987       - That the situation when SNW lock is granted along with some SR/SRO
1988         locks, but SW locks are still blocked is handled correctly.
1989       - That m_hog_lock_count/m_piglet_lock_count is zero in all cases
1990         when there are no pending weak locks (e.g. when weak locks are
1991         removed due to deadlock, being killed or timeout).
1992 
1993       Also switch priority matrice accordingly. Note that switch in
1994       this particular place doesn't require reschedule since:
1995 
1996       1) We never switch to a matrice which prefers lower priority locks
1997          more than "hog"/"piglet" locks here (this might have happened if
1998          MDL_lock::switch_incompatible_waiting_types_bitmap_if_needed()
1999          was used instead and max_write_lock_count was decreased
2000          concurrently).
2001       2) When we switch from matrice #1 (which prefers SRO over SW) to
2002          default matrice #0 only the priority of SW vs SRO requests changes.
2003          Since the switch happens only when there are no pending SRO
2004          locks, no reschedule is required.
2005       3) When we switch from matrice #2 (which prefers all non-"nog" over
2006          "hog" requests) to default matrice #0 only the priority of "hog" vs
2007          non-"hog" requests changes. But since this happens when there are
2008          no non-"hog" requests, no reschedule is required.
2009       4) When we switch from matrice #3 (which prefers SRO over SW and
2010          non-"hog"-minus-SW over "hog" locks) to default matrice #0 only
2011          the priority of non-"hog"-minus-SW vs "hog" and SRO vs SW changes
2012          (see invariant [INV3]). Since the switch happens only when there
2013          are no pending non-"hog"-minus-SW/SRO requests, no reschedule is
2014          required.
2015 
2016       Note that we might be switching priority matrice in a situation when we
2017       had pending SRO/non-"hog"/non-"hog"-minus-SW requests at the start of
2018       the call but they were granted during the loop. If some "piglet"/"hog"
2019       requests are compatible with those lower priority locks, they were
2020       granted as well. Those which were not compatible were not granted and
2021       should stay waiting until lower priority locks are released (in other
2022       words, the fact that a lock moved from pending to granted doesn't unblock
2023       additional requests, see invariant [INV2]).
2024     */
2025     if (m_current_waiting_incompatible_idx == 3) {
2026       /*
2027         We can't simply switch from matrice #3 to matrice #2 when there are no
2028         pending SRO locks, as that would allow a stream of concurrent SW and SRO
2029         requests to starve out "hog" locks when max_write_lock_count is set
2030         (there will be always pending SW or/and SRO locks in this case, so no
2031         switch back to matrice #0 will ever happen).
2032 
2033         So we switch from matrice #3 to #0 directly and ignore pending SW/SWLP
2034         locks. This is OK as situation when matrice #3 is active can only
2035         occur when there were max_write_lock_count SW locks granted recently
2036         (before switch from #0 -> #1 which preceded switch #3 or before switch
2037         #2 -> #3).
2038       */
2039       if ((m_waiting.bitmap() &
2040            ~(MDL_OBJECT_HOG_LOCK_TYPES | MDL_BIT(MDL_SHARED_WRITE) |
2041              MDL_BIT(MDL_SHARED_WRITE_LOW_PRIO))) == 0) {
2042         m_piglet_lock_count = 0;
2043         m_hog_lock_count = 0;
2044         m_current_waiting_incompatible_idx = 0;
2045       }
2046     } else {
2047       if ((m_waiting.bitmap() & ~MDL_OBJECT_HOG_LOCK_TYPES) == 0) {
2048         m_hog_lock_count = 0;
2049         m_current_waiting_incompatible_idx &= ~2;
2050       }
2051       if ((m_waiting.bitmap() & MDL_BIT(MDL_SHARED_READ_ONLY)) == 0) {
2052         m_piglet_lock_count = 0;
2053         m_current_waiting_incompatible_idx &= ~1;
2054       }
2055     }
2056   }
2057 }
2058 
2059 /**
2060   Strategy instances to be used with scoped metadata locks (i.e. locks
2061   from GLOBAL, COMMIT, TABLESPACE, BACKUP_LOCK and SCHEMA namespaces).
2062   The only locking modes which are supported at the moment are SHARED and
2063   INTENTION EXCLUSIVE and EXCLUSIVE.
2064 */
2065 
2066 const MDL_lock::MDL_lock_strategy MDL_lock::m_scoped_lock_strategy = {
2067     /**
2068       Compatibility (or rather "incompatibility") matrices for scoped metadata
2069       lock. Arrays of bitmaps which elements specify which granted/waiting locks
2070       are incompatible with type of lock being requested.
2071 
2072       The first array specifies if particular type of request can be satisfied
2073       if there is granted scoped lock of certain type.
2074 
2075                  | Type of active   |
2076          Request |   scoped lock    |
2077           type   | IS(*)  IX   S  X |
2078         ---------+------------------+
2079         IS       |  +      +   +  + |
2080         IX       |  +      +   -  - |
2081         S        |  +      -   +  - |
2082         X        |  +      -   -  - |
2083 
2084       Here: "+" -- means that request can be satisfied
2085             "-" -- means that request can't be satisfied and should wait
2086 
2087       (*)  Since intention shared scoped locks are compatible with all other
2088            type of locks we don't even have any accounting for them.
2089 
2090       Note that relation between scoped locks and objects locks requested
2091       by statement is not straightforward and is therefore fully defined
2092       by SQL-layer.
2093       For example, in order to support global read lock implementation
2094       SQL-layer acquires IX lock in GLOBAL namespace for each statement
2095       that can modify metadata or data (i.e. for each statement that
2096       needs SW, SU, SNW, SNRW or X object locks). OTOH, to ensure that
2097       DROP DATABASE works correctly with concurrent DDL, IX metadata locks
2098       in SCHEMA namespace are acquired for DDL statements which can update
2099       metadata in the schema (i.e. which acquire SU, SNW, SNRW and X locks
2100       on schema objects) and aren't acquired for DML.
2101     */
2102     {MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED),
2103      MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_INTENTION_EXCLUSIVE), 0, 0, 0, 0, 0,
2104      0, 0, 0,
2105      MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED) |
2106          MDL_BIT(MDL_INTENTION_EXCLUSIVE)},
2107     /**
2108       Each array in the next group specifies if a particular type of request can
2109       be satisfied if there is already a waiting request for the scoped lock of
2110       a certain type. I.e. each array specifies a matrice with priorities for
2111       different lock types.
2112 
2113       Scoped locks only use the first array which represents the "default"
2114       priority matrix. The remaing 3 matrices are not relevant for them.
2115 
2116                  |    Pending      |
2117          Request |  scoped lock    |
2118           type   | IS(*)  IX  S  X |
2119         ---------+-----------------+
2120         IS       |  +      +  +  + |
2121         IX       |  +      +  -  - |
2122         S        |  +      +  +  - |
2123         X        |  +      +  +  + |
2124 
2125       Here the meaning of "+", "-" and (*) is the same as above.
2126     */
2127     {{MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED), MDL_BIT(MDL_EXCLUSIVE), 0,
2128       0, 0, 0, 0, 0, 0, 0, 0},
2129      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
2130      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
2131      {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}},
2132     /**
2133       Array of increments for "unobtrusive" types of lock requests for scoped
2134       locks.
2135 
2136       @sa MDL_lock::get_unobtrusive_lock_increment().
2137 
2138       For scoped locks:
2139       - "unobtrusive" types: IX
2140       - "obtrusive" types: X and S
2141 
2142       We encode number of IX locks acquired using "fast path" in bits 0 .. 59
2143       of MDL_lock::m_fast_path_state.
2144     */
2145     {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
2146     /*
2147       In scoped locks, only IX lock request would starve because of X/S.
2148       But that is practically a very rare case. So we don't apply the
2149       max_write_lock_count limit to them.
2150     */
2151     false,
2152 #ifndef DBUG_OFF
2153     // In scoped locks, only IX, SHARED and X is allowed.
2154     {true, true, false, false, false, false, false, false, false, false, true},
2155 #endif /* not defined DBUG_OFF */
2156 
2157     /*
2158       Scoped locks doesn't require notification of owners of conflicting
2159       locks for any type of requests. Hence 'm_needs_notification' is NULL.
2160     */
2161     nullptr,
2162     /*
2163       For the same reason, 'm_notify_conflicting_locks' is NULL for scoped
2164       locks.
2165     */
2166     nullptr,
2167     &MDL_lock::scoped_lock_fast_path_granted_bitmap,
2168     /* Scoped locks never require connection check. */
2169     nullptr};
2170 
2171 /**
2172   Strategy instance for per-object locks.
2173   Supports all locked modes except INTENTION EXCLUSIVE locks.
2174 */
2175 
2176 const MDL_lock::MDL_lock_strategy MDL_lock::m_object_lock_strategy = {
2177     /**
2178       Compatibility (or rather "incompatibility") matrices for per-object
2179       metadata lock. Arrays of bitmaps which elements specify which granted/
2180       waiting locks are incompatible with type of lock being requested.
2181 
2182       The first array specifies if particular type of request can be satisfied
2183       if there is granted lock of certain type.
2184 
2185          Request  |  Granted requests for lock            |
2186           type    | S  SH  SR  SW  SWLP  SU  SRO  SNW  SNRW  X  |
2187         ----------+---------------------------------------------+
2188         S         | +   +   +   +    +    +   +    +    +    -  |
2189         SH        | +   +   +   +    +    +   +    +    +    -  |
2190         SR        | +   +   +   +    +    +   +    +    -    -  |
2191         SW        | +   +   +   +    +    +   -    -    -    -  |
2192         SWLP      | +   +   +   +    +    +   -    -    -    -  |
2193         SU        | +   +   +   +    +    -   +    -    -    -  |
2194         SRO       | +   +   +   -    -    +   +    +    -    -  |
2195         SNW       | +   +   +   -    -    -   +    -    -    -  |
2196         SNRW      | +   +   -   -    -    -   -    -    -    -  |
2197         X         | -   -   -   -    -    -   -    -    -    -  |
2198 
2199       Here: "+" -- means that request can be satisfied
2200             "-" -- means that request can't be satisfied and should wait
2201 
2202       @note In cases then current context already has "stronger" type
2203             of lock on the object it will be automatically granted
2204             thanks to usage of the MDL_context::find_ticket() method.
2205 
2206       @note IX locks are excluded since they are not used for per-object
2207             metadata locks.
2208     */
2209     {0, MDL_BIT(MDL_EXCLUSIVE), MDL_BIT(MDL_EXCLUSIVE),
2210      MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE),
2211      MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2212          MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_READ_ONLY),
2213      MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2214          MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_READ_ONLY),
2215      MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2216          MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_UPGRADABLE),
2217      MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2218          MDL_BIT(MDL_SHARED_WRITE_LOW_PRIO) | MDL_BIT(MDL_SHARED_WRITE),
2219      MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2220          MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_UPGRADABLE) |
2221          MDL_BIT(MDL_SHARED_WRITE_LOW_PRIO) | MDL_BIT(MDL_SHARED_WRITE),
2222      MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2223          MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_READ_ONLY) |
2224          MDL_BIT(MDL_SHARED_UPGRADABLE) | MDL_BIT(MDL_SHARED_WRITE_LOW_PRIO) |
2225          MDL_BIT(MDL_SHARED_WRITE) | MDL_BIT(MDL_SHARED_READ),
2226      MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2227          MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_READ_ONLY) |
2228          MDL_BIT(MDL_SHARED_UPGRADABLE) | MDL_BIT(MDL_SHARED_WRITE_LOW_PRIO) |
2229          MDL_BIT(MDL_SHARED_WRITE) | MDL_BIT(MDL_SHARED_READ) |
2230          MDL_BIT(MDL_SHARED_HIGH_PRIO) | MDL_BIT(MDL_SHARED)},
2231     /**
2232       Each array in the next group specifies if a particular type of request can
2233       be satisfied if there is a waiting request for the same lock of a certain
2234       type. In other words each array specifies a priority matrix for different
2235       lock types.
2236 
2237       We use each of the arrays depending on whether the number of successively
2238       granted "piglet" and "hog" lock requests exceed the max_write_lock_count
2239       threshold. This is necessary to avoid high priority lock requests starving
2240       out requests with lower priority.
2241 
2242       The first array in the group is used in default situation when both
2243       MDL_lock::m_piglet_lock_count and MDL_lock::m_hog_lock_count don't exceed
2244       the threshold.
2245 
2246       A priority matrice specified by it looks like:
2247 
2248          Request  |         Pending requests for lock          |
2249           type    | S  SH  SR  SW  SWLP  SU  SRO  SNW  SNRW  X |
2250         ----------+--------------------------------------------+
2251         S         | +   +   +   +    +    +   +    +     +   - |
2252         SH        | +   +   +   +    +    +   +    +     +   + |
2253         SR        | +   +   +   +    +    +   +    +     -   - |
2254         SW        | +   +   +   +    +    +   +    -     -   - |
2255         SWLP      | +   +   +   +    +    +   -    -     -   - |
2256         SU        | +   +   +   +    +    +   +    +     +   - |
2257         SRO       | +   +   +   -    +    +   +    +     -   - |
2258         SNW       | +   +   +   +    +    +   +    +     +   - |
2259         SNRW      | +   +   +   +    +    +   +    +     +   - |
2260         X         | +   +   +   +    +    +   +    +     +   + |
2261 
2262       Invariant [INV2]: for all priority matrices, if A is the set of
2263       incompatible waiting requests for a given request and B is the set
2264       of incompatible granted requests for the same request, then A will
2265       always be a subset of B. This means that moving a lock from waiting
2266       to granted state doesn't unblock additional requests.
2267       MDL_lock::reschedule_waiters() code relies on this.
2268     */
2269     {{0, MDL_BIT(MDL_EXCLUSIVE), 0,
2270       MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE),
2271       MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2272           MDL_BIT(MDL_SHARED_NO_WRITE),
2273       MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2274           MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_READ_ONLY),
2275       MDL_BIT(MDL_EXCLUSIVE),
2276       MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2277           MDL_BIT(MDL_SHARED_WRITE),
2278       MDL_BIT(MDL_EXCLUSIVE), MDL_BIT(MDL_EXCLUSIVE), 0},
2279      /**
2280        The second array in the group is used when the number of successively
2281        granted "piglet" (SW) locks exceeds max_write_lock_count.
2282 
2283        It is the same matrix as in the first case but with the SW lock type
2284        having lower priority than the SRO lock type.
2285      */
2286      {0, MDL_BIT(MDL_EXCLUSIVE), 0,
2287       MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE),
2288       MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2289           MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_READ_ONLY),
2290       MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2291           MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_READ_ONLY),
2292       MDL_BIT(MDL_EXCLUSIVE),
2293       MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE),
2294       MDL_BIT(MDL_EXCLUSIVE), MDL_BIT(MDL_EXCLUSIVE), 0},
2295      /**
2296        The third array in the group is used when the number of successively
2297        granted "hog" (SNW, SNRW, X) locks exceeds max_write_lock_count.
2298 
2299        In this case S, SH, SR, SW, SNRW, SRO and SU locks types have
2300        priority over all "hog" types.
2301      */
2302      {0, 0, 0, 0, 0, MDL_BIT(MDL_SHARED_READ_ONLY), 0,
2303       MDL_BIT(MDL_SHARED_WRITE),
2304       MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_UPGRADABLE) |
2305           MDL_BIT(MDL_SHARED_WRITE_LOW_PRIO) | MDL_BIT(MDL_SHARED_WRITE),
2306       MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_READ_ONLY) |
2307           MDL_BIT(MDL_SHARED_UPGRADABLE) | MDL_BIT(MDL_SHARED_WRITE_LOW_PRIO) |
2308           MDL_BIT(MDL_SHARED_WRITE) | MDL_BIT(MDL_SHARED_READ),
2309       static_cast<bitmap_t>(~MDL_OBJECT_HOG_LOCK_TYPES)},
2310      /**
2311        The fourth array in the group is used when both the number of
2312        successively granted "piglet" (SW) and the number of successively granted
2313        "hog" (SNW, SNRW, X) locks exceed max_write_lock_count.
2314 
2315        This matrice prefers SRO locks over SW/SWLP locks. And non-"hog" locks
2316        other than SW/SWLP over "hog" locks.
2317 
2318        Note that the fact that "hog" locks have the same priority vs SW/SWLP
2319        locks as in the default matrice (#0) is important and is relied upon in
2320        MDL_lock::reschedule_waiters(). This is invariant [INV3].
2321      */
2322      {0, 0, 0, 0,
2323       MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2324           MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_READ_ONLY),
2325       MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
2326           MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_READ_ONLY),
2327       0, 0, MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_UPGRADABLE),
2328       MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_READ_ONLY) |
2329           MDL_BIT(MDL_SHARED_UPGRADABLE) | MDL_BIT(MDL_SHARED_READ),
2330       static_cast<bitmap_t>(~(MDL_OBJECT_HOG_LOCK_TYPES |
2331                               MDL_BIT(MDL_SHARED_WRITE) |
2332                               MDL_BIT(MDL_SHARED_WRITE_LOW_PRIO)))}},
2333     /**
2334       Array of increments for "unobtrusive" types of lock requests for
2335       per-object locks.
2336 
2337       @sa MDL_lock::get_unobtrusive_lock_increment().
2338 
2339       For per-object locks:
2340       - "unobtrusive" types: S, SH, SR and SW
2341       - "obtrusive" types: SU, SRO, SNW, SNRW, X
2342 
2343       Number of locks acquired using "fast path" are encoded in the following
2344       bits of MDL_lock::m_fast_path_state:
2345 
2346       - bits 0 .. 19  - S and SH (we don't differentiate them once acquired)
2347       - bits 20 .. 39 - SR
2348       - bits 40 .. 59 - SW and SWLP (we don't differentiate them once acquired)
2349 
2350       Overflow is not an issue as we are unlikely to support more than 2^20 - 1
2351       concurrent connections in foreseeable future.
2352 
2353       This encoding defines the below contents of increment array.
2354     */
2355     {0, 1, 1, 1ULL << 20, 1ULL << 40, 1ULL << 40, 0, 0, 0, 0, 0},
2356     /*
2357       To prevent starvation, "hog" and "piglet" lock types are only granted
2358       max_write_lock_count times in a row while conflicting lock types are
2359       waiting.
2360     */
2361     true,
2362 #ifndef DBUG_OFF
2363     // For object locks all types, except IX, are permitted
2364     {false, true, true, true, true, true, true, true, true, true, true},
2365 #endif /* not defined DBUG_OFF */
2366 
2367     &MDL_lock::object_lock_needs_notification,
2368     &MDL_lock::object_lock_notify_conflicting_locks,
2369     &MDL_lock::object_lock_fast_path_granted_bitmap,
2370     &MDL_lock::object_lock_needs_connection_check};
2371 
2372 /**
2373   Check if request for the metadata lock can be satisfied given its
2374   current state.
2375 
2376   @param  type_arg             The requested lock type.
2377   @param  requestor_ctx        The MDL context of the requestor.
2378 
2379   @retval true   Lock request can be satisfied
2380   @retval false  There is some conflicting lock.
2381 
2382   @note In cases then current context already has "stronger" type
2383         of lock on the object it will be automatically granted
2384         thanks to usage of the MDL_context::find_ticket() method.
2385 */
2386 
can_grant_lock(enum_mdl_type type_arg,const MDL_context * requestor_ctx) const2387 bool MDL_lock::can_grant_lock(enum_mdl_type type_arg,
2388                               const MDL_context *requestor_ctx) const {
2389   bool can_grant = false;
2390   bitmap_t waiting_incompat_map = incompatible_waiting_types_bitmap()[type_arg];
2391   bitmap_t granted_incompat_map = incompatible_granted_types_bitmap()[type_arg];
2392 
2393   /*
2394     New lock request can be satisfied iff:
2395     - There are no incompatible types of satisfied requests
2396     in other contexts
2397     - There are no waiting requests which have higher priority
2398     than this request.
2399   */
2400   if (!(m_waiting.bitmap() & waiting_incompat_map)) {
2401     if (!(fast_path_granted_bitmap() & granted_incompat_map)) {
2402       if (!(m_granted.bitmap() & granted_incompat_map))
2403         can_grant = true;
2404       else {
2405         Ticket_iterator it(m_granted);
2406         MDL_ticket *ticket;
2407 
2408         /*
2409           There is an incompatible lock. Check that it belongs to some
2410           other context.
2411 
2412           If we are trying to acquire "unobtrusive" type of lock then the
2413           confliciting lock must be from "obtrusive" set, therefore it should
2414           have been acquired using "slow path" and should be present in
2415           m_granted list.
2416 
2417           If we are trying to acquire "obtrusive" type of lock then it can be
2418           either another "obtrusive" lock or "unobtrusive" type of lock
2419           acquired on "slow path" (can't be "unobtrusive" lock on fast path
2420           because of surrounding if-statement). In either case it should be
2421           present in m_granted list.
2422         */
2423         while ((ticket = it++)) {
2424           if (ticket->get_ctx() != requestor_ctx &&
2425               ticket->is_incompatible_when_granted(type_arg))
2426             break;
2427         }
2428         if (ticket == nullptr) /* Incompatible locks are our own. */
2429           can_grant = true;
2430       }
2431     } else {
2432       /*
2433         Our lock request conflicts with one of granted "fast path" locks:
2434 
2435         This means that we are trying to acquire "obtrusive" lock and:
2436         a) Either we are called from MDL_context::try_acquire_lock_impl()
2437            and then all "fast path" locks belonging to this context were
2438            materialized (as we do for "obtrusive" locks).
2439         b) Or we are called from MDL_lock::reschedule_waiters() then
2440            this context is waiting for this request and all its "fast
2441            path" locks were materialized before the wait.
2442 
2443         The above means that conflicting granted "fast path" lock cannot
2444         belong to us and our request cannot be satisfied.
2445       */
2446     }
2447   }
2448   return can_grant;
2449 }
2450 
2451 /**
2452   Return the first MDL_context which owns the lock.
2453 
2454   @return Pointer to the first MDL_context which has acquired the lock
2455           NULL if there are no such contexts.
2456 
2457   @note This method works properly only for locks acquired using
2458         "slow" path. It won't return context if it has used "fast"
2459         path to acquire the lock.
2460 */
2461 
get_lock_owner() const2462 inline MDL_context *MDL_lock::get_lock_owner() const {
2463   Ticket_iterator it(m_granted);
2464   MDL_ticket *ticket;
2465 
2466   if ((ticket = it++)) return ticket->get_ctx();
2467   return nullptr;
2468 }
2469 
2470 /** Remove a ticket from waiting or pending queue and wakeup up waiters. */
2471 
remove_ticket(MDL_context * ctx,LF_PINS * pins,Ticket_list MDL_lock::* list,MDL_ticket * ticket)2472 void MDL_lock::remove_ticket(MDL_context *ctx, LF_PINS *pins,
2473                              Ticket_list MDL_lock::*list, MDL_ticket *ticket) {
2474   bool is_obtrusive = is_obtrusive_lock(ticket->get_type());
2475   bool is_singleton = mdl_locks.is_lock_object_singleton(&key);
2476 
2477   mysql_prlock_wrlock(&m_rwlock);
2478   (this->*list).remove_ticket(ticket);
2479 
2480   /*
2481     If we are removing "obtrusive" type of request either from granted or
2482     waiting lists we need to decrement "obtrusive" requests counter.
2483     Once last ticket for "obtrusive" lock is removed we should clear
2484     HAS_OBTRUSIVE flag in m_fast_path_state as well.
2485   */
2486   bool last_obtrusive =
2487       is_obtrusive && ((--m_obtrusive_locks_granted_waiting_count) == 0);
2488   /*
2489     If both m_granted and m_waiting lists become empty as result we also
2490     need to clear HAS_SLOW_PATH flag in m_fast_path_state.
2491   */
2492   bool last_slow_path = m_granted.is_empty() && m_waiting.is_empty();
2493   bool last_use = false;
2494 
2495   if (last_slow_path || last_obtrusive) {
2496     fast_path_state_t old_state = m_fast_path_state;
2497     fast_path_state_t new_state;
2498     do {
2499       new_state = old_state;
2500       if (last_slow_path) new_state &= ~MDL_lock::HAS_SLOW_PATH;
2501       if (last_obtrusive) new_state &= ~MDL_lock::HAS_OBTRUSIVE;
2502     } while (!fast_path_state_cas(&old_state, new_state));
2503 
2504     /*
2505       We don't have any "fast" or "slow" path locks. MDL_lock object becomes
2506       unused so unused objects counter needs to be incremented.
2507     */
2508     if (new_state == 0) last_use = true;
2509   }
2510 
2511   if (last_slow_path) {
2512     /*
2513       We might end-up with the last waiting ticket being removed and non-0
2514       m_hog_lock_count/m_piglet_lock_count in the following situation:
2515 
2516       1) There is a granted "hog"/"piglet" lock blocking a lower priority lock
2517          request.
2518       2) The lower priority lock request is timed out or killed. It is not yet
2519          removed from waiters list and bitmap.
2520       3) The "Hog"/"piglet" lock is released. Its reschedule_waiters() call
2521          will still see the pending lower priority lock so it won't reset
2522          the m_hog_lock_count/m_piglet_lock_count counters.
2523       4) MDL_lock::remove_ticket() is called for the timed out/killed
2524          lower priority ticket. Which turns out to be the last ticket
2525          for this lock.
2526 
2527       Hence we need to reset these counters here.
2528     */
2529     m_hog_lock_count = 0;
2530     m_piglet_lock_count = 0;
2531     m_current_waiting_incompatible_idx = 0;
2532   } else {
2533     /*
2534       There can be some contexts waiting to acquire a lock
2535       which now might be able to do it. Grant the lock to
2536       them and wake them up!
2537 
2538       We always try to reschedule locks, since there is no easy way
2539       (i.e. by looking at the bitmaps) to find out whether it is
2540       required or not.
2541       In a general case, even when the queue's bitmap is not changed
2542       after removal of the ticket, there is a chance that some request
2543       can be satisfied (due to the fact that a granted request
2544       reflected in the bitmap might belong to the same context as a
2545       pending request).
2546     */
2547     reschedule_waiters();
2548   }
2549   mysql_prlock_unlock(&m_rwlock);
2550 
2551   /* Don't count singleton MDL_lock objects as unused. */
2552   if (last_use && !is_singleton) mdl_locks.lock_object_unused(ctx, pins);
2553 }
2554 
2555 /**
2556   Check if we have any pending locks which conflict with existing
2557   shared lock.
2558 
2559   @pre The ticket must match an acquired lock.
2560 
2561   @return true if there is a conflicting lock request, false otherwise.
2562 */
2563 
has_pending_conflicting_lock(enum_mdl_type type)2564 bool MDL_lock::has_pending_conflicting_lock(enum_mdl_type type) {
2565   bool result;
2566 
2567   mysql_mutex_assert_not_owner(&LOCK_open);
2568 
2569   mysql_prlock_rdlock(&m_rwlock);
2570   result = (m_waiting.bitmap() & incompatible_granted_types_bitmap()[type]);
2571   mysql_prlock_unlock(&m_rwlock);
2572   return result;
2573 }
2574 
~MDL_wait_for_graph_visitor()2575 MDL_wait_for_graph_visitor::~MDL_wait_for_graph_visitor() {}
2576 
~MDL_wait_for_subgraph()2577 MDL_wait_for_subgraph::~MDL_wait_for_subgraph() {}
2578 
2579 /**
2580   Check if ticket represents metadata lock of "stronger" or equal type
2581   than specified one. I.e. if metadata lock represented by ticket won't
2582   allow any of locks which are not allowed by specified type of lock.
2583 
2584   @return true  if ticket has stronger or equal type
2585           false otherwise.
2586 */
2587 
has_stronger_or_equal_type(enum_mdl_type type) const2588 bool MDL_ticket::has_stronger_or_equal_type(enum_mdl_type type) const {
2589   const MDL_lock::bitmap_t *granted_incompat_map =
2590       m_lock->incompatible_granted_types_bitmap();
2591 
2592   return !(granted_incompat_map[type] & ~(granted_incompat_map[m_type]));
2593 }
2594 
is_incompatible_when_granted(enum_mdl_type type) const2595 bool MDL_ticket::is_incompatible_when_granted(enum_mdl_type type) const {
2596   return (MDL_BIT(m_type) & m_lock->incompatible_granted_types_bitmap()[type]);
2597 }
2598 
is_incompatible_when_waiting(enum_mdl_type type) const2599 bool MDL_ticket::is_incompatible_when_waiting(enum_mdl_type type) const {
2600   return (MDL_BIT(m_type) & m_lock->incompatible_waiting_types_bitmap()[type]);
2601 }
2602 
2603 #ifndef DBUG_OFF
equivalent(const MDL_ticket * a,const MDL_ticket * b,enum_mdl_duration target_duration)2604 bool equivalent(const MDL_ticket *a, const MDL_ticket *b,
2605                 enum_mdl_duration target_duration) {
2606   if (a == b) {
2607     // Same pointer (incl. nullptr) are equivalent
2608     return true;
2609   }
2610 
2611   if (a->get_lock() != b->get_lock()) {
2612     // If they refer to different locks, they are never equivalent
2613     return false;
2614   }
2615   if (a->get_duration() == target_duration &&
2616       b->get_duration() == target_duration) {
2617     // Different objects, but which has the same lock, AND both refer to the
2618     // target duration are equivalent
2619     return true;
2620   }
2621   if (a->get_duration() != target_duration &&
2622       b->get_duration() != target_duration) {
2623     // Different objects with same lock are still equivalent, if neither has
2624     // the target duration
2625     return true;
2626   }
2627   return false;
2628 }
2629 #endif /* not defined DBUG_OFF */
2630 
2631 /**
2632   Check whether the context already holds a compatible lock ticket
2633   on an object.
2634   Start searching from list of locks for the same duration as lock
2635   being requested. If not look at lists for other durations.
2636 
2637   @param mdl_request  Lock request object for lock to be acquired
2638   @param[out] result_duration  Duration of lock which was found.
2639 
2640   @note Tickets which correspond to lock types "stronger" than one
2641         being requested are also considered compatible.
2642 
2643   @return A pointer to the lock ticket for the object or NULL otherwise.
2644 */
2645 
find_ticket(MDL_request * mdl_request,enum_mdl_duration * result_duration)2646 MDL_ticket *MDL_context::find_ticket(MDL_request *mdl_request,
2647                                      enum_mdl_duration *result_duration) {
2648   auto h = m_ticket_store.find(*mdl_request);
2649   *result_duration = h.m_dur;
2650   return h.m_ticket;
2651 }
2652 
2653 /**
2654   Try to acquire one lock.
2655 
2656   Unlike exclusive locks, shared locks are acquired one by
2657   one. This is interface is chosen to simplify introduction of
2658   the new locking API to the system. MDL_context::try_acquire_lock()
2659   is currently used from open_table(), and there we have only one
2660   table to work with.
2661 
2662   This function may also be used to try to acquire an exclusive
2663   lock on a destination table, by ALTER TABLE ... RENAME.
2664 
2665   Returns immediately without any side effect if encounters a lock
2666   conflict. Otherwise takes the lock.
2667 
2668   @param [in,out] mdl_request Lock request object for lock to be acquired
2669 
2670   @retval  false   Success. The lock may have not been acquired.
2671                    Check the ticket, if it's NULL, a conflicting lock
2672                    exists.
2673   @retval  true    Out of resources, an error has been reported.
2674 */
2675 
try_acquire_lock(MDL_request * mdl_request)2676 bool MDL_context::try_acquire_lock(MDL_request *mdl_request) {
2677   MDL_ticket *ticket = nullptr;
2678 
2679   if (try_acquire_lock_impl(mdl_request, &ticket)) return true;
2680 
2681   if (!mdl_request->ticket) {
2682     /*
2683       Our attempt to acquire lock without waiting has failed.
2684       Let us release resources which were acquired in the process.
2685 
2686       We don't need to count MDL_lock object as unused and possibly
2687       delete it here because:
2688       - Either there was a conflicting ticket in MDL_lock::m_granted/
2689         m_waiting lists during our call to MDL_lock::can_grant_lock().
2690         This ticket can't go away while MDL_lock::m_rwlock is held.
2691       - Or we have tried to acquire an "obtrusive" lock and there was
2692         a conflicting "fast path" lock in MDL_lock::m_fast_path_state
2693         counter during our call to MDL_lock::can_grant_lock().
2694         In this case HAS_OBTRUSIVE lock flag should have been set before
2695         call to MDL_lock::can_grant_lock() so release of this "fast path"
2696         lock will have to take slow path (see release_lock() and invariant
2697         [INV1]). This means that conflicting "fast path" lock can't go
2698         away until MDL_lock::m_rwlock is released or HAS_OBSTRUSIVE flag
2699         is cleared. In the latter case counting MDL_lock object as unused
2700         is responsibility of thread which is decrementing "fast path" lock
2701         counter. MDL_lock object can't be deleted under out feet since
2702         thread doing deletion needs to acquire MDL_lock::m_rwlock first.
2703     */
2704     MDL_lock *lock = ticket->m_lock;
2705 
2706     bool last_obtrusive =
2707         lock->is_obtrusive_lock(mdl_request->type) &&
2708         ((--lock->m_obtrusive_locks_granted_waiting_count) == 0);
2709     bool last_slow_path =
2710         lock->m_granted.is_empty() && lock->m_waiting.is_empty();
2711 
2712     if (last_slow_path || last_obtrusive) {
2713       MDL_lock::fast_path_state_t old_state = lock->m_fast_path_state;
2714       MDL_lock::fast_path_state_t new_state;
2715       do {
2716         new_state = old_state;
2717         if (last_slow_path) new_state &= ~MDL_lock::HAS_SLOW_PATH;
2718         if (last_obtrusive) new_state &= ~MDL_lock::HAS_OBTRUSIVE;
2719       } while (!lock->fast_path_state_cas(&old_state, new_state));
2720     }
2721 
2722     mysql_prlock_unlock(&lock->m_rwlock);
2723 
2724     /*
2725       If SEs were notified about impending lock acquisition, the failure
2726       to acquire it requires the same notification as lock release.
2727     */
2728     if (ticket->m_hton_notified) {
2729       mysql_mdl_set_status(ticket->m_psi, MDL_ticket::POST_RELEASE_NOTIFY);
2730       m_owner->notify_hton_post_release_exclusive(&mdl_request->key);
2731     }
2732 
2733     MDL_ticket::destroy(ticket);
2734   }
2735 
2736   return false;
2737 }
2738 
2739 /**
2740   "Materialize" requests for locks which were satisfied using
2741   "fast path" by properly including them into corresponding
2742   MDL_lock::m_granted bitmaps/lists and removing it from
2743   packed counter in MDL_lock::m_fast_path_state.
2744 
2745   @note In future we might optimize this method if necessary,
2746         for example, by keeping pointer to first "fast path"
2747         ticket.
2748 */
2749 
materialize_fast_path_locks()2750 void MDL_context::materialize_fast_path_locks() {
2751   int i;
2752 
2753   for (i = 0; i < MDL_DURATION_END; i++) {
2754     MDL_ticket_store::List_iterator it = m_ticket_store.list_iterator(i);
2755 
2756     MDL_ticket *matf = m_ticket_store.materialized_front(i);
2757     for (MDL_ticket *ticket = it++; ticket != matf; ticket = it++) {
2758       if (ticket->m_is_fast_path) {
2759         MDL_lock *lock = ticket->m_lock;
2760         MDL_lock::fast_path_state_t unobtrusive_lock_increment =
2761             lock->get_unobtrusive_lock_increment(ticket->get_type());
2762         ticket->m_is_fast_path = false;
2763         mysql_prlock_wrlock(&lock->m_rwlock);
2764         lock->m_granted.add_ticket(ticket);
2765         /*
2766           Atomically decrement counter in MDL_lock::m_fast_path_state.
2767           This needs to happen under protection of MDL_lock::m_rwlock to make
2768           it atomic with addition of ticket to MDL_lock::m_granted list and
2769           to enforce invariant [INV1].
2770         */
2771         MDL_lock::fast_path_state_t old_state = lock->m_fast_path_state;
2772         while (!lock->fast_path_state_cas(
2773             &old_state, ((old_state - unobtrusive_lock_increment) |
2774                          MDL_lock::HAS_SLOW_PATH))) {
2775         }
2776         mysql_prlock_unlock(&lock->m_rwlock);
2777       }
2778     }
2779   }
2780   m_ticket_store.set_materialized();
2781 }
2782 
2783 /**
2784   Auxiliary method for acquiring lock without waiting.
2785 
2786   @param [in,out] mdl_request Lock request object for lock to be acquired
2787   @param [out] out_ticket     Ticket for the request in case when lock
2788                               has not been acquired.
2789 
2790   @retval  false   Success. The lock may have not been acquired.
2791                    Check MDL_request::ticket, if it's NULL, a conflicting
2792                    lock exists. In this case "out_ticket" out parameter
2793                    points to ticket which was constructed for the request.
2794                    MDL_ticket::m_lock points to the corresponding MDL_lock
2795                    object and MDL_lock::m_rwlock write-locked.
2796   @retval  true    Out of resources, an error has been reported.
2797 */
2798 
try_acquire_lock_impl(MDL_request * mdl_request,MDL_ticket ** out_ticket)2799 bool MDL_context::try_acquire_lock_impl(MDL_request *mdl_request,
2800                                         MDL_ticket **out_ticket) {
2801   MDL_lock *lock;
2802   MDL_key *key = &mdl_request->key;
2803   MDL_ticket *ticket;
2804   enum_mdl_duration found_duration;
2805   MDL_lock::fast_path_state_t unobtrusive_lock_increment;
2806   bool force_slow;
2807   bool pinned;
2808 
2809   DBUG_ASSERT(mdl_request->ticket == nullptr);
2810 
2811   /* Don't take chances in production. */
2812   mdl_request->ticket = nullptr;
2813   mysql_mutex_assert_not_owner(&LOCK_open);
2814 
2815   /*
2816     Check whether the context already holds a shared lock on the object,
2817     and if so, grant the request.
2818   */
2819   if ((ticket = find_ticket(mdl_request, &found_duration))) {
2820     DBUG_ASSERT(ticket->m_lock);
2821     DBUG_ASSERT(ticket->has_stronger_or_equal_type(mdl_request->type));
2822     /*
2823       If the request is for a transactional lock, and we found
2824       a transactional lock, just reuse the found ticket.
2825 
2826       It's possible that we found a transactional lock,
2827       but the request is for a HANDLER lock. In that case HANDLER
2828       code will clone the ticket (see below why it's needed).
2829 
2830       If the request is for a transactional lock, and we found
2831       a HANDLER lock, create a copy, to make sure that when user
2832       does HANDLER CLOSE, the transactional lock is not released.
2833 
2834       If the request is for a handler lock, and we found a
2835       HANDLER lock, also do the clone. HANDLER CLOSE for one alias
2836       should not release the lock on the table HANDLER opened through
2837       a different alias.
2838     */
2839     mdl_request->ticket = ticket;
2840     if ((found_duration != mdl_request->duration ||
2841          mdl_request->duration == MDL_EXPLICIT) &&
2842         clone_ticket(mdl_request)) {
2843       /* Clone failed. */
2844       mdl_request->ticket = nullptr;
2845       return true;
2846     }
2847     return false;
2848   }
2849 
2850   /*
2851     Prepare context for lookup in MDL_map container by allocating pins
2852     if necessary. This also ensures that this MDL_context has pins allocated
2853     and ready for future attempts elements from MDL_map container (which
2854     might happen during lock release).
2855   */
2856   if (fix_pins()) return true;
2857 
2858   if (!(ticket = MDL_ticket::create(this, mdl_request->type
2859 #ifndef DBUG_OFF
2860                                     ,
2861                                     mdl_request->duration
2862 #endif
2863                                     )))
2864     return true;
2865 
2866   /*
2867     Get increment for "fast path" or indication that this is
2868     request for "obtrusive" type of lock outside of critical section.
2869   */
2870   unobtrusive_lock_increment =
2871       MDL_lock::get_unobtrusive_lock_increment(mdl_request);
2872 
2873   /*
2874     If this "obtrusive" type we have to take "slow path".
2875     If this context has open HANDLERs we have to take "slow path"
2876     as well for MDL_object_lock::notify_conflicting_locks() to work
2877     properly.
2878   */
2879   force_slow = !unobtrusive_lock_increment || m_needs_thr_lock_abort;
2880 
2881   /*
2882     If "obtrusive" lock is requested we need to "materialize" all fast
2883     path tickets, so MDL_lock::can_grant_lock() can safely assume
2884     that all granted "fast path" locks belong to different context.
2885   */
2886   if (!unobtrusive_lock_increment) materialize_fast_path_locks();
2887 
2888   DBUG_ASSERT(ticket->m_psi == nullptr);
2889   ticket->m_psi = mysql_mdl_create(
2890       ticket, key, mdl_request->type, mdl_request->duration,
2891       MDL_ticket::PENDING, mdl_request->m_src_file, mdl_request->m_src_line);
2892 
2893   /*
2894     We need to notify/get permission from storage engines before acquiring
2895     X lock if it is requested in one of namespaces interesting for SEs.
2896   */
2897   if (mdl_request->type == MDL_EXCLUSIVE &&
2898       MDL_lock::needs_hton_notification(key->mdl_namespace())) {
2899     mysql_mdl_set_status(ticket->m_psi, MDL_ticket::PRE_ACQUIRE_NOTIFY);
2900 
2901     bool victimized;
2902     if (m_owner->notify_hton_pre_acquire_exclusive(key, &victimized)) {
2903       MDL_ticket::destroy(ticket);
2904       my_error(victimized ? ER_LOCK_DEADLOCK : ER_LOCK_REFUSED_BY_ENGINE,
2905                MYF(0));
2906       return true;
2907     }
2908     ticket->m_hton_notified = true;
2909 
2910     mysql_mdl_set_status(ticket->m_psi, MDL_ticket::PENDING);
2911   }
2912 
2913 retry:
2914   /*
2915     The below call pins pointer to returned MDL_lock object (unless
2916     it is the singleton object for GLOBAL, COMMIT or ACL_CACHE namespaces).
2917   */
2918   if (!(lock = mdl_locks.find_or_insert(m_pins, key, &pinned))) {
2919     /*
2920       If SEs were notified about impending lock acquisition, the failure
2921       to acquire it requires the same notification as lock release.
2922     */
2923     if (ticket->m_hton_notified) {
2924       mysql_mdl_set_status(ticket->m_psi, MDL_ticket::POST_RELEASE_NOTIFY);
2925       m_owner->notify_hton_post_release_exclusive(key);
2926     }
2927     MDL_ticket::destroy(ticket);
2928     return true;
2929   }
2930 
2931   /*
2932     Code counting unused MDL_lock objects below assumes that object is not
2933     pinned iff it is a singleton.
2934   */
2935   DBUG_ASSERT(mdl_locks.is_lock_object_singleton(key) == !pinned);
2936 
2937   if (!force_slow) {
2938     /*
2939       "Fast path".
2940 
2941       Hurray! We are acquring "unobtrusive" type of lock and not forced
2942       to take "slow path" because of open HANDLERs.
2943 
2944       Let us do a few checks first to figure out if we really can acquire
2945       lock using "fast path".
2946 
2947       Since the MDL_lock object is pinned at this point (or it is the
2948       singleton) we can access its members without risk of it getting deleted
2949       under out feet.
2950 
2951       Ordinary read of MDL_lock::m_fast_path_state which we do here is OK as
2952       correctness of value returned by it will be anyway validated by atomic
2953       compare-and-swap which happens later.
2954       In theory, this algorithm will work correctly (but not very efficiently)
2955       if the read will return random values.
2956       In practice, it won't return values which are too out-of-date as the
2957       above call to MDL_map::find_or_insert() contains memory barrier.
2958     */
2959     MDL_lock::fast_path_state_t old_state = lock->m_fast_path_state;
2960     bool first_use;
2961 
2962     do {
2963       /*
2964         Check if hash look-up returned object marked as destroyed or
2965         it was marked as such while it was pinned by us. If yes we
2966         need to unpin it and retry look-up.
2967       */
2968       if (old_state & MDL_lock::IS_DESTROYED) {
2969         if (pinned) lf_hash_search_unpin(m_pins);
2970         DEBUG_SYNC(get_thd(), "mdl_acquire_lock_is_destroyed_fast_path");
2971         goto retry;
2972       }
2973 
2974       /*
2975         Check that there are no granted/pending "obtrusive" locks and nobody
2976         even is about to try to check if such lock can be acquired.
2977 
2978         In these cases we need to take "slow path".
2979       */
2980       if (old_state & MDL_lock::HAS_OBTRUSIVE) goto slow_path;
2981 
2982       /*
2983         If m_fast_path_state doesn't have HAS_SLOW_PATH set and all "fast"
2984         path counters are 0 then we are about to use an unused MDL_lock
2985         object. We need to decrement unused objects counter eventually.
2986       */
2987       first_use = (old_state == 0);
2988 
2989       /*
2990         Now we simply need to increment m_fast_path_state with a value which
2991         corresponds to type of our request (i.e. increment part this member
2992         which contains counter which corresponds to this type).
2993 
2994         This needs to be done as atomical operation with the above checks,
2995         which is achieved by using atomic compare-and-swap.
2996 
2997         @sa MDL_object_lock::m_unobtrusive_lock_increment for explanation
2998         why overflow is not an issue here.
2999       */
3000     } while (!lock->fast_path_state_cas(
3001         &old_state, old_state + unobtrusive_lock_increment));
3002 
3003     /*
3004       Lock has been acquired. Since this can only be an "unobtrusive" lock and
3005       there were no active/pending requests for "obtrusive" locks, we don't need
3006       to care about "hog" and "piglet" locks and the max_write_lock_count
3007       threshold.
3008     */
3009 
3010     if (pinned) lf_hash_search_unpin(m_pins);
3011 
3012     /*
3013       Don't count singleton MDL_lock objects as used, use "pinned == false"
3014       as an indication of such objects.
3015     */
3016     if (first_use && pinned) mdl_locks.lock_object_used();
3017 
3018     /*
3019       Since this MDL_ticket is not visible to any threads other than
3020       the current one, we can set MDL_ticket::m_lock member without
3021       protect of MDL_lock::m_rwlock. MDL_lock won't be deleted
3022       underneath our feet as MDL_lock::m_fast_path_state serves as
3023       reference counter in this case.
3024     */
3025     ticket->m_lock = lock;
3026     ticket->m_is_fast_path = true;
3027     m_ticket_store.push_front(mdl_request->duration, ticket);
3028     mdl_request->ticket = ticket;
3029 
3030     mysql_mdl_set_status(ticket->m_psi, MDL_ticket::GRANTED);
3031     return false;
3032   }
3033 
3034 slow_path:
3035 
3036   /*
3037    "Slow path".
3038 
3039     Do full-blown check and list manipulation if necessary.
3040   */
3041   mysql_prlock_wrlock(&lock->m_rwlock);
3042 
3043   /*
3044     First of all, let us check if hash look-up returned MDL_lock object which
3045     is marked as destroyed or was marked as such while it was pinned by us.
3046     If we have got such object we need to retry look-up.
3047 
3048     We can use ordinary non-atomic read in this case as this flag is set under
3049     protection of MDL_lock::m_rwlock (we can get inconsistent data for other
3050     parts of m_fast_path_state due to concurrent atomic updates, but we don't
3051     care about them yet).
3052   */
3053   MDL_lock::fast_path_state_t state = lock->m_fast_path_state;
3054 
3055   if (state & MDL_lock::IS_DESTROYED) {
3056     mysql_prlock_unlock(&lock->m_rwlock);
3057     /*
3058       We can't unpin object earlier as lf_hash_delete() might have been
3059       called for it already and so LF_ALLOCATOR is free to deallocate it
3060       once unpinned.
3061     */
3062     if (pinned) lf_hash_search_unpin(m_pins);
3063     DEBUG_SYNC(get_thd(), "mdl_acquire_lock_is_destroyed_slow_path");
3064     goto retry;
3065   }
3066 
3067   /*
3068     Object was not marked as destroyed. Since it can't be deleted from hash
3069     and deallocated until this happens we can unpin it and work with it safely
3070     while MDL_lock::m_rwlock is held.
3071   */
3072   if (pinned) lf_hash_search_unpin(m_pins);
3073 
3074   /*
3075     When we try to acquire the first obtrusive lock for MDL_lock object we
3076     need to atomically set the HAS_OBTRUSIVE flag in m_fast_path_state before
3077     we call the MDL_lock::can_grant_lock() method.
3078     This is necessary to prevent concurrent fast path acquisitions from
3079     invalidating the results of this method.
3080   */
3081   bool first_obtrusive_lock =
3082       (unobtrusive_lock_increment == 0) &&
3083       ((lock->m_obtrusive_locks_granted_waiting_count++) == 0);
3084   bool first_use = false;
3085 
3086   /*
3087     When we try to acquire the first "slow" path lock for MDL_lock object
3088     we also need to atomically set HAS_SLOW_PATH flag. It is OK to read
3089     HAS_SLOW_PATH non-atomically here since it can be only set under
3090     protection of MDL_lock::m_rwlock lock.
3091   */
3092   if (!(state & MDL_lock::HAS_SLOW_PATH) || first_obtrusive_lock) {
3093     do {
3094       /*
3095         If HAS_SLOW_PATH flag is not set and all "fast" path counters
3096         are zero we are about to use previously unused MDL_lock object.
3097         MDL_map::m_unused_lock_objects counter needs to be decremented
3098         eventually.
3099       */
3100       first_use = (state == 0);
3101     } while (!lock->fast_path_state_cas(
3102         &state, state | MDL_lock::HAS_SLOW_PATH |
3103                     (first_obtrusive_lock ? MDL_lock::HAS_OBTRUSIVE : 0)));
3104   }
3105 
3106   /*
3107     Don't count singleton MDL_lock objects as used, use "pinned == false"
3108     as an indication of such objects.
3109 
3110     If the fact that we do this atomic decrement under MDL_lock::m_rwlock
3111     ever becomes bottleneck (which is unlikely) it can be safely moved
3112     outside of critical section.
3113   */
3114   if (first_use && pinned) mdl_locks.lock_object_used();
3115 
3116   ticket->m_lock = lock;
3117 
3118   if (lock->can_grant_lock(mdl_request->type, this)) {
3119     lock->m_granted.add_ticket(ticket);
3120 
3121     if (lock->is_affected_by_max_write_lock_count()) {
3122       /*
3123         If we have acquired an higher-prio "piglet" or "hog" lock while there
3124         are pending lower priority locks, we need to increment the appropriate
3125         counter.
3126 
3127         If one or both of these counters exceed the max_write_lock_count
3128         threshold, change priority matrice for this MDL_lock object.
3129         Reschedule waiters to avoid problems when the change of matrice
3130         happens concurrently to other thread acquiring a lower priority
3131         lock between its call to MDL_lock::can_grant_lock() and
3132         MDL_context::find_deadlock().
3133       */
3134       if (lock->count_piglets_and_hogs(mdl_request->type))
3135         lock->reschedule_waiters();
3136     }
3137 
3138     mysql_prlock_unlock(&lock->m_rwlock);
3139 
3140     m_ticket_store.push_front(mdl_request->duration, ticket);
3141     mdl_request->ticket = ticket;
3142 
3143     mysql_mdl_set_status(ticket->m_psi, MDL_ticket::GRANTED);
3144   } else
3145     *out_ticket = ticket;
3146 
3147   return false;
3148 }
3149 
3150 /**
3151   Create a copy of a granted ticket.
3152   This is used to make sure that HANDLER ticket
3153   is never shared with a ticket that belongs to
3154   a transaction, so that when we HANDLER CLOSE,
3155   we don't release a transactional ticket, and
3156   vice versa -- when we COMMIT, we don't mistakenly
3157   release a ticket for an open HANDLER.
3158 
3159   @retval true   An error occured.
3160   @retval false  Success.
3161 */
3162 
clone_ticket(MDL_request * mdl_request)3163 bool MDL_context::clone_ticket(MDL_request *mdl_request) {
3164   MDL_ticket *ticket;
3165 
3166   mysql_mutex_assert_not_owner(&LOCK_open);
3167 
3168   /*
3169     Since in theory we can clone ticket belonging to a different context
3170     we need to prepare target context for possible attempts to release
3171     lock and thus possible removal of MDL_lock from MDL_map container.
3172     So we allocate pins to be able to work with this container if they
3173     are not allocated already.
3174   */
3175   if (fix_pins()) return true;
3176 
3177   /*
3178     By submitting mdl_request->type to MDL_ticket::create()
3179     we effectively downgrade the cloned lock to the level of
3180     the request.
3181   */
3182   if (!(ticket = MDL_ticket::create(this, mdl_request->type
3183 #ifndef DBUG_OFF
3184                                     ,
3185                                     mdl_request->duration
3186 #endif
3187                                     )))
3188     return true;
3189 
3190   DBUG_ASSERT(ticket->m_psi == nullptr);
3191   ticket->m_psi = mysql_mdl_create(
3192       ticket, &mdl_request->key, mdl_request->type, mdl_request->duration,
3193       MDL_ticket::PENDING, mdl_request->m_src_file, mdl_request->m_src_line);
3194 
3195   /* clone() is not supposed to be used to get a stronger lock. */
3196   DBUG_ASSERT(mdl_request->ticket->has_stronger_or_equal_type(ticket->m_type));
3197 
3198   /*
3199     If we are to clone exclusive lock in namespace requiring notification
3200     of storage engines we need to notify/get permission from SEs similarly
3201     to situation when lock acquired.
3202   */
3203   if (mdl_request->type == MDL_EXCLUSIVE &&
3204       MDL_lock::needs_hton_notification(mdl_request->key.mdl_namespace())) {
3205     DBUG_ASSERT(mdl_request->ticket->m_hton_notified);
3206 
3207     mysql_mdl_set_status(ticket->m_psi, MDL_ticket::PRE_ACQUIRE_NOTIFY);
3208 
3209     bool victimized;
3210     if (m_owner->notify_hton_pre_acquire_exclusive(&mdl_request->key,
3211                                                    &victimized)) {
3212       MDL_ticket::destroy(ticket);
3213       my_error(victimized ? ER_LOCK_DEADLOCK : ER_LOCK_REFUSED_BY_ENGINE,
3214                MYF(0));
3215       return true;
3216     }
3217     ticket->m_hton_notified = true;
3218 
3219     mysql_mdl_set_status(ticket->m_psi, MDL_ticket::PENDING);
3220   }
3221 
3222   ticket->m_lock = mdl_request->ticket->m_lock;
3223 
3224   if (mdl_request->ticket->m_is_fast_path) {
3225     /*
3226       We are cloning ticket which was acquired on "fast path".
3227       Let us use "fast path" to create clone as well.
3228     */
3229     MDL_lock::fast_path_state_t unobtrusive_lock_increment =
3230         ticket->m_lock->get_unobtrusive_lock_increment(ticket->get_type());
3231 
3232     /*
3233       "Obtrusive" type of lock can't be cloned from weaker, "unobtrusive"
3234       type of lock.
3235     */
3236     DBUG_ASSERT(unobtrusive_lock_increment != 0);
3237 
3238     /*
3239       Increment of counter in MDL_lock::m_fast_path_state needs to happen here
3240       atomically and under protection of MDL_lock::m_rwlock in order to enforce
3241       invariant [INV1].
3242     */
3243     mysql_prlock_wrlock(&ticket->m_lock->m_rwlock);
3244     ticket->m_lock->fast_path_state_add(unobtrusive_lock_increment);
3245     mysql_prlock_unlock(&ticket->m_lock->m_rwlock);
3246     ticket->m_is_fast_path = true;
3247   } else {
3248     /*
3249       We are cloning ticket which was acquired on "slow path".
3250       We will use "slow path" for new ticket as well. We also
3251       need to take into account if new ticket corresponds to
3252       "obtrusive" lock.
3253     */
3254     bool is_obtrusive = ticket->m_lock->is_obtrusive_lock(ticket->m_type);
3255     mysql_prlock_wrlock(&ticket->m_lock->m_rwlock);
3256     ticket->m_lock->m_granted.add_ticket(ticket);
3257     if (is_obtrusive) {
3258       /*
3259         We don't need to set HAS_OBTRUSIVE flag in MDL_lock::m_fast_path_state
3260         here as it is already set since the ticket being cloned already
3261         represents "obtrusive" lock for this MDL_lock object.
3262       */
3263       DBUG_ASSERT(ticket->m_lock->m_obtrusive_locks_granted_waiting_count != 0);
3264       ++ticket->m_lock->m_obtrusive_locks_granted_waiting_count;
3265     }
3266     mysql_prlock_unlock(&ticket->m_lock->m_rwlock);
3267   }
3268 
3269   mdl_request->ticket = ticket;
3270   m_ticket_store.push_front(mdl_request->duration, ticket);
3271   mysql_mdl_set_status(ticket->m_psi, MDL_ticket::GRANTED);
3272 
3273   return false;
3274 }
3275 
3276 /**
3277   Notify threads holding S/SH metadata locks on an object, which conflict
3278   with a pending X lock.
3279 
3280   @note Currently this method is guaranteed to notify shared lock
3281         owners which have MDL_context::m_needs_thr_lock_abort flag
3282         set (as for others conficting locks might have been acquired
3283         on "fast path" and thus might be absent from list of granted
3284         locks).
3285         This is OK as notification for other contexts is anyway
3286         no-op now.
3287 
3288   @note We don't notify threads holding other than S/SH types of
3289         conflicting locks on the object since notification should
3290         not be needed and anyway will be no-op for them (unless
3291         they also hold S/SH locks on the object).
3292 
3293   @param  ctx  MDL_context for current thread.
3294   @param  lock MDL_lock object representing lock which is to be
3295                acquired.
3296 */
3297 
object_lock_notify_conflicting_locks(MDL_context * ctx,MDL_lock * lock)3298 void MDL_lock::object_lock_notify_conflicting_locks(MDL_context *ctx,
3299                                                     MDL_lock *lock) {
3300   Ticket_iterator it(lock->m_granted);
3301   MDL_ticket *conflicting_ticket;
3302 
3303   while ((conflicting_ticket = it++)) {
3304     if (conflicting_ticket->get_ctx() != ctx &&
3305         (conflicting_ticket->get_type() == MDL_SHARED ||
3306          conflicting_ticket->get_type() == MDL_SHARED_HIGH_PRIO) &&
3307         conflicting_ticket->get_ctx()->get_owner()->get_thd() != nullptr) {
3308       MDL_context *conflicting_ctx = conflicting_ticket->get_ctx();
3309 
3310       /*
3311         If thread which holds conflicting lock is waiting on table-level
3312         lock or some other non-MDL resource we might need to wake it up
3313         by calling code outside of MDL.
3314 
3315         The only scenario in which it is important now looks like:
3316         Thread 1: HANDLER t1 OPEN, acquires S metadata lock on t1.
3317         Thread 2: LOCK TABLES t1 WRITE, t2 READ LOCAL, acquire
3318                   SNRW lock on t1, SR lock on t2 and TL_READ THR_LOCK
3319                   lock on t2.
3320         Thread 1: Executes UPDATE t2 SET i = 1 or some other DML which
3321                   will directly or indirectly block on THR_LOCK for t2.
3322         Thread 2: Does ALTER TABLE t1 ... which tries to upgrade SNRW
3323                   metadata lock to X lock and blocks because of S lock
3324                   from open HANDLER.
3325 
3326         A similar scenario is possible when MERGE tables are used instead
3327         of the READ LOCAL clause.
3328         Once we will get rid of the support for READ LOCAL and MERGE clauses
3329         this code can be removed.
3330       */
3331       ctx->get_owner()->notify_shared_lock(
3332           conflicting_ctx->get_owner(),
3333           conflicting_ctx->get_needs_thr_lock_abort());
3334     }
3335   }
3336 }
3337 
3338 /**
3339   Acquire one lock with waiting for conflicting locks to go away if needed.
3340 
3341   @param [in,out] mdl_request Lock request object for lock to be acquired
3342 
3343   @param lock_wait_timeout Seconds to wait before timeout.
3344 
3345   @retval  false   Success. MDL_request::ticket points to the ticket
3346                    for the lock.
3347   @retval  true    Failure (Out of resources or waiting is aborted),
3348 */
3349 
acquire_lock(MDL_request * mdl_request,Timeout_type lock_wait_timeout)3350 bool MDL_context::acquire_lock(MDL_request *mdl_request,
3351                                Timeout_type lock_wait_timeout) {
3352   if (lock_wait_timeout == 0) {
3353     /*
3354       Resort to try_acquire_lock() in case of zero timeout.
3355 
3356       This allows to avoid unnecessary deadlock detection attempt and "fake"
3357       deadlocks which might result from it.
3358       In case of failure to acquire lock, try_acquire_lock() preserves
3359       invariants by updating MDL_lock::fast_path_state and obtrusive locks
3360       count. It also performs SE notification if needed.
3361     */
3362     if (try_acquire_lock(mdl_request)) return true;
3363 
3364     if (!mdl_request->ticket) {
3365       /* We have failed to acquire lock instantly. */
3366       DEBUG_SYNC(get_thd(), "mdl_acquire_lock_wait");
3367       my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0));
3368       return true;
3369     }
3370     return false;
3371   }
3372 
3373   /* Normal, non-zero timeout case. */
3374 
3375   MDL_lock *lock;
3376   MDL_ticket *ticket = nullptr;
3377   struct timespec abs_timeout;
3378   MDL_wait::enum_wait_status wait_status;
3379   /* Do some work outside the critical section. */
3380   set_timespec(&abs_timeout, lock_wait_timeout);
3381 
3382   if (try_acquire_lock_impl(mdl_request, &ticket)) return true;
3383 
3384   if (mdl_request->ticket) {
3385     /*
3386       We have managed to acquire lock without waiting.
3387       MDL_lock, MDL_context and MDL_request were updated
3388       accordingly, so we can simply return success.
3389     */
3390     return false;
3391   }
3392 
3393   /*
3394     Our attempt to acquire lock without waiting has failed.
3395     As a result of this attempt we got MDL_ticket with m_lock
3396     member pointing to the corresponding MDL_lock object which
3397     has MDL_lock::m_rwlock write-locked.
3398   */
3399   lock = ticket->m_lock;
3400 
3401   lock->m_waiting.add_ticket(ticket);
3402 
3403   /*
3404     Once we added a pending ticket to the waiting queue,
3405     we must ensure that our wait slot is empty, so
3406     that our lock request can be scheduled. Do that in the
3407     critical section formed by the acquired write lock on MDL_lock.
3408   */
3409   m_wait.reset_status();
3410 
3411   if (lock->needs_notification(ticket)) lock->notify_conflicting_locks(this);
3412 
3413   mysql_prlock_unlock(&lock->m_rwlock);
3414 
3415 #ifdef HAVE_PSI_METADATA_INTERFACE
3416   PSI_metadata_locker_state state;
3417   PSI_metadata_locker *locker = nullptr;
3418 
3419   if (ticket->m_psi != nullptr) {
3420     locker = PSI_METADATA_CALL(start_metadata_wait)(&state, ticket->m_psi,
3421                                                     __FILE__, __LINE__);
3422   }
3423 #endif
3424 
3425   will_wait_for(ticket);
3426 
3427   /* There is a shared or exclusive lock on the object. */
3428   DEBUG_SYNC(get_thd(), "mdl_acquire_lock_wait");
3429 
3430   find_deadlock();
3431 
3432   if (lock->needs_notification(ticket) || lock->needs_connection_check()) {
3433     struct timespec abs_shortwait;
3434     set_timespec(&abs_shortwait, 1);
3435     wait_status = MDL_wait::WS_EMPTY;
3436 
3437     while (cmp_timespec(&abs_shortwait, &abs_timeout) <= 0) {
3438       /* abs_timeout is far away. Wait a short while and notify locks. */
3439       wait_status = m_wait.timed_wait(m_owner, &abs_shortwait, false,
3440                                       mdl_request->key.get_wait_state_name());
3441 
3442       if (wait_status != MDL_wait::WS_EMPTY) break;
3443 
3444       if (lock->needs_connection_check() && !m_owner->is_connected()) {
3445         /*
3446           If this is user-level lock and the client is disconnected don't wait
3447           forever: assume it's the same as statement being killed (this differs
3448           from pre-5.7 where we treat it as timeout, but is more logical).
3449           Using MDL_wait::set_status() to set status atomically wastes one
3450           condition variable wake up but should happen rarely.
3451           We don't want to do this check for all types of metadata locks since
3452           in general case we may want to complete wait/operation even when
3453           connection is lost (e.g. in case of logging into slow/general log).
3454         */
3455         if (!m_wait.set_status(MDL_wait::KILLED))
3456           wait_status = MDL_wait::KILLED;
3457         break;
3458       }
3459 
3460       if (lock->needs_notification(ticket)) {
3461         mysql_prlock_wrlock(&lock->m_rwlock);
3462         lock->notify_conflicting_locks(this);
3463         mysql_prlock_unlock(&lock->m_rwlock);
3464       }
3465 
3466       set_timespec(&abs_shortwait, 1);
3467     }
3468     if (wait_status == MDL_wait::WS_EMPTY)
3469       wait_status = m_wait.timed_wait(m_owner, &abs_timeout, true,
3470                                       mdl_request->key.get_wait_state_name());
3471   } else {
3472     wait_status = m_wait.timed_wait(m_owner, &abs_timeout, true,
3473                                     mdl_request->key.get_wait_state_name());
3474   }
3475 
3476   done_waiting_for();
3477 
3478 #ifdef HAVE_PSI_METADATA_INTERFACE
3479   if (locker != nullptr) {
3480     PSI_METADATA_CALL(end_metadata_wait)(locker, 0);
3481   }
3482 #endif
3483 
3484   if (wait_status != MDL_wait::GRANTED) {
3485     lock->remove_ticket(this, m_pins, &MDL_lock::m_waiting, ticket);
3486 
3487     /*
3488       If SEs were notified about impending lock acquisition, the failure
3489       to acquire it requires the same notification as lock release.
3490     */
3491     if (ticket->m_hton_notified) {
3492       mysql_mdl_set_status(ticket->m_psi, MDL_ticket::POST_RELEASE_NOTIFY);
3493       m_owner->notify_hton_post_release_exclusive(&mdl_request->key);
3494     }
3495 
3496     MDL_ticket::destroy(ticket);
3497     switch (wait_status) {
3498       case MDL_wait::VICTIM:
3499         my_error(ER_LOCK_DEADLOCK, MYF(0));
3500         break;
3501       case MDL_wait::TIMEOUT:
3502         my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0));
3503         break;
3504       case MDL_wait::KILLED:
3505         if (get_owner()->is_killed() == ER_QUERY_TIMEOUT)
3506           my_error(ER_QUERY_TIMEOUT, MYF(0));
3507         else
3508           my_error(ER_QUERY_INTERRUPTED, MYF(0));
3509         break;
3510       default:
3511         DBUG_ASSERT(0);
3512         break;
3513     }
3514     return true;
3515   }
3516 
3517   /*
3518     We have been granted our request.
3519     State of MDL_lock object is already being appropriately updated by a
3520     concurrent thread (@sa MDL_lock:reschedule_waiters()).
3521     So all we need to do is to update MDL_context and MDL_request objects.
3522   */
3523   DBUG_ASSERT(wait_status == MDL_wait::GRANTED);
3524 
3525   m_ticket_store.push_front(mdl_request->duration, ticket);
3526   mdl_request->ticket = ticket;
3527 
3528   mysql_mdl_set_status(ticket->m_psi, MDL_ticket::GRANTED);
3529 
3530   return false;
3531 }
3532 
3533 class MDL_request_cmp {
3534  public:
operator ()(const MDL_request * req1,const MDL_request * req2)3535   bool operator()(const MDL_request *req1, const MDL_request *req2) {
3536     int rc = req1->key.cmp(&req2->key);
3537     /*
3538       In cases when both requests correspond to the same key, we need to put
3539       the request for the stronger lock type first, to avoid extra deadlocks.
3540       We do this by simply comparing types of lock requests.
3541       This works OK since it is mostly needed to avoid the scenario when
3542       LOCK TABLES t1 WRITE, t1 READ acquires SRO lock before SNRW lock.
3543       It won't work in the general case (e.g. SRO is not stronger than SW).
3544     */
3545     if (rc == 0)
3546       rc = static_cast<int>(req2->type) - static_cast<int>(req1->type);
3547     return rc < 0;
3548   }
3549 };
3550 
3551 /**
3552   Acquire exclusive locks. There must be no granted locks in the
3553   context.
3554 
3555   This is a replacement of lock_table_names(). It is used in
3556   RENAME, DROP and other DDL SQL statements.
3557 
3558   @param  mdl_requests  List of requests for locks to be acquired.
3559 
3560   @param lock_wait_timeout  Seconds to wait before timeout.
3561 
3562   @note The list of requests should not contain non-exclusive lock requests.
3563         There should not be any acquired locks in the context.
3564 
3565   @note Assumes that one already owns scoped intention exclusive lock.
3566 
3567   @note If acquisition fails any locks with MDL_EXPLICIT duration that had
3568         already been taken, are released. Not just locks with MDL_STATEMENT
3569         and MDL_TRANSACTION duration. This makes acquition of MDL_EXPLICIT
3570         locks atomic (all or nothing). This is needed for the locking
3571         service plugin API.
3572 
3573   @retval false  Success
3574   @retval true   Failure
3575 */
3576 
acquire_locks(MDL_request_list * mdl_requests,Timeout_type lock_wait_timeout)3577 bool MDL_context::acquire_locks(MDL_request_list *mdl_requests,
3578                                 Timeout_type lock_wait_timeout) {
3579   MDL_request_list::Iterator it(*mdl_requests);
3580   MDL_request **p_req;
3581   MDL_savepoint mdl_svp = mdl_savepoint();
3582   /*
3583     Remember the first MDL_EXPLICIT ticket so that we can release
3584     any new such locks taken if acquisition fails.
3585   */
3586   MDL_ticket *explicit_front = m_ticket_store.front(MDL_EXPLICIT);
3587   const size_t req_count = mdl_requests->elements();
3588 
3589   if (req_count == 0) return false;
3590 
3591   /* Sort requests according to MDL_key. */
3592   Prealloced_array<MDL_request *, 16> sort_buf(
3593       key_memory_MDL_context_acquire_locks);
3594   if (sort_buf.reserve(req_count)) return true;
3595 
3596   for (size_t ii = 0; ii < req_count; ++ii) {
3597     sort_buf.push_back(it++);
3598   }
3599 
3600   std::sort(sort_buf.begin(), sort_buf.end(), MDL_request_cmp());
3601 
3602   size_t num_acquired = 0;
3603   for (p_req = sort_buf.begin(); p_req != sort_buf.end(); p_req++) {
3604     if (acquire_lock(*p_req, lock_wait_timeout)) goto err;
3605     ++num_acquired;
3606   }
3607   return false;
3608 
3609 err:
3610   /*
3611     Release locks we have managed to acquire so far.
3612     Use rollback_to_savepoint() since there may be duplicate
3613     requests that got assigned the same ticket.
3614   */
3615   rollback_to_savepoint(mdl_svp);
3616   /*
3617     Also release the MDL_EXPLICIT locks taken in this call.
3618     The locking service plugin API needs acquisition of such
3619     locks to be atomic as well.
3620   */
3621   release_locks_stored_before(MDL_EXPLICIT, explicit_front);
3622 
3623   /* Reset lock requests back to its initial state. */
3624   for (p_req = sort_buf.begin(); p_req != sort_buf.begin() + num_acquired;
3625        p_req++) {
3626     (*p_req)->ticket = nullptr;
3627   }
3628   return true;
3629 }
3630 
clone_tickets(const MDL_context * ticket_owner,enum_mdl_duration duration)3631 bool MDL_context::clone_tickets(const MDL_context *ticket_owner,
3632                                 enum_mdl_duration duration) {
3633   MDL_ticket *ticket;
3634   MDL_ticket_store::List_iterator it_ticket =
3635       ticket_owner->m_ticket_store.list_iterator(duration);
3636   bool ret = false;
3637   MDL_request request;
3638 
3639   while ((ticket = it_ticket++)) {
3640     DBUG_ASSERT(ticket->m_lock);
3641 
3642     MDL_REQUEST_INIT_BY_KEY(&request, ticket->get_key(), ticket->get_type(),
3643                             duration);
3644     request.ticket = ticket;
3645     ret = clone_ticket(&request);
3646     if (ret) break;
3647   }
3648 
3649   if (ret) {
3650     Ticket_iterator it_ticket1 = m_ticket_store.list_iterator(duration);
3651     while ((ticket = it_ticket1++)) {
3652       release_lock(ticket);
3653     }
3654   }
3655 
3656   return ret;
3657 }
3658 
3659 /**
3660   Upgrade a shared metadata lock.
3661 
3662   Used in ALTER TABLE and CREATE TABLE.
3663 
3664   @param mdl_ticket         Lock to upgrade.
3665   @param new_type           Lock type to upgrade to.
3666   @param lock_wait_timeout  Seconds to wait before timeout.
3667 
3668   @note In case of failure to upgrade lock (e.g. because upgrader
3669         was killed) leaves lock in its original state (locked in
3670         shared mode).
3671 
3672   @note There can be only one upgrader for a lock or we will have deadlock.
3673         This invariant is ensured by the fact that upgradeable locks SU, SNW
3674         and SNRW are not compatible with each other and themselves in case
3675         of ALTER TABLE operation.
3676         In case of CREATE TABLE operation there is chance of deadlock as 'S'
3677         is compatible with 'S'. But the deadlock is recovered by backoff and
3678         retry mechanism.
3679 
3680   @retval false  Success
3681   @retval true   Failure (thread was killed)
3682 */
3683 
upgrade_shared_lock(MDL_ticket * mdl_ticket,enum_mdl_type new_type,Timeout_type lock_wait_timeout)3684 bool MDL_context::upgrade_shared_lock(MDL_ticket *mdl_ticket,
3685                                       enum_mdl_type new_type,
3686                                       Timeout_type lock_wait_timeout) {
3687   MDL_request mdl_new_lock_request;
3688   MDL_savepoint mdl_svp = mdl_savepoint();
3689   bool is_new_ticket;
3690   MDL_lock *lock;
3691 
3692   DBUG_TRACE;
3693   DEBUG_SYNC(get_thd(), "mdl_upgrade_lock");
3694 
3695   /*
3696     Do nothing if already upgraded. Used when we FLUSH TABLE under
3697     LOCK TABLES and a table is listed twice in LOCK TABLES list.
3698   */
3699   if (mdl_ticket->has_stronger_or_equal_type(new_type)) return false;
3700 
3701   MDL_REQUEST_INIT_BY_KEY(&mdl_new_lock_request, &mdl_ticket->m_lock->key,
3702                           new_type, MDL_TRANSACTION);
3703 
3704   if (acquire_lock(&mdl_new_lock_request, lock_wait_timeout)) return true;
3705 
3706   is_new_ticket = !has_lock(mdl_svp, mdl_new_lock_request.ticket);
3707 
3708   lock = mdl_ticket->m_lock;
3709 
3710   /* Code below assumes that we were upgrading to "obtrusive" type of lock. */
3711   DBUG_ASSERT(lock->is_obtrusive_lock(new_type));
3712 
3713   /* Merge the acquired and the original lock. @todo: move to a method. */
3714   mysql_prlock_wrlock(&lock->m_rwlock);
3715   if (is_new_ticket) {
3716     lock->m_granted.remove_ticket(mdl_new_lock_request.ticket);
3717     /*
3718       We should not clear HAS_OBTRUSIVE flag in this case as we will
3719       get "obtrusive' lock as result in any case.
3720     */
3721     --lock->m_obtrusive_locks_granted_waiting_count;
3722   }
3723   /*
3724     Set the new type of lock in the ticket. To update state of
3725     MDL_lock object correctly we need to temporarily exclude
3726     ticket from the granted queue or "fast path" counter and
3727     then include lock back into granted queue.
3728     Note that with current code at this point we can't have
3729     "fast path" tickets as upgrade to "obtrusive" locks
3730     materializes tickets normally. Still we cover this case
3731     for completeness.
3732   */
3733   if (mdl_ticket->m_is_fast_path) {
3734     /*
3735       Decrement of counter in MDL_lock::m_fast_path_state needs to be done
3736       under protection of MDL_lock::m_rwlock to ensure that it is atomic with
3737       changes to MDL_lock::m_granted list and to enforce invariant [INV1].
3738       Note that since we have HAS_OBTRUSIVE flag set at this point all
3739       concurrent lock acquisitions and releases will have to acquire
3740       MDL_lock::m_rwlock, so nobody will see results of this decrement until
3741       m_rwlock is released.
3742     */
3743     lock->fast_path_state_add(
3744         -lock->get_unobtrusive_lock_increment(mdl_ticket->m_type));
3745     mdl_ticket->m_is_fast_path = false;
3746   } else {
3747     lock->m_granted.remove_ticket(mdl_ticket);
3748     /*
3749       Also if we are upgrading from "obtrusive" lock we need to temporarily
3750       decrement m_obtrusive_locks_granted_waiting_count counter.
3751       We should not clear HAS_OBTRUSIVE flag in this case as we will get
3752       "obtrusive' lock as result in any case.
3753     */
3754     if (lock->is_obtrusive_lock(mdl_ticket->m_type))
3755       --lock->m_obtrusive_locks_granted_waiting_count;
3756   }
3757 
3758   mdl_ticket->m_type = new_type;
3759 
3760   lock->m_granted.add_ticket(mdl_ticket);
3761   /*
3762     Since we always upgrade to "obtrusive" type of lock we need to
3763     increment m_obtrusive_locks_granted_waiting_count counter.
3764 
3765     HAS_OBTRUSIVE flag has been already set by acquire_lock()
3766     and should not have been cleared since then.
3767   */
3768   DBUG_ASSERT(lock->m_fast_path_state & MDL_lock::HAS_OBTRUSIVE);
3769   ++lock->m_obtrusive_locks_granted_waiting_count;
3770 
3771   mysql_prlock_unlock(&lock->m_rwlock);
3772 
3773   /*
3774     The below code can't handle situation when we upgrade to lock requiring
3775     SE notification and it turns out that we already have lock of this type
3776     associated with different ticket.
3777   */
3778   DBUG_ASSERT(is_new_ticket || !mdl_new_lock_request.ticket->m_hton_notified);
3779 
3780   mdl_ticket->m_hton_notified = mdl_new_lock_request.ticket->m_hton_notified;
3781 
3782   if (is_new_ticket) {
3783     m_ticket_store.remove(MDL_TRANSACTION, mdl_new_lock_request.ticket);
3784     MDL_ticket::destroy(mdl_new_lock_request.ticket);
3785   }
3786 
3787   return false;
3788 }
3789 
3790 /**
3791   A fragment of recursive traversal of the wait-for graph
3792   in search for deadlocks. Direct the deadlock visitor to all
3793   contexts that own the lock the current node in the wait-for
3794   graph is waiting for.
3795   As long as the initial node is remembered in the visitor,
3796   a deadlock is found when the same node is seen twice.
3797 */
3798 
visit_subgraph(MDL_ticket * waiting_ticket,MDL_wait_for_graph_visitor * gvisitor)3799 bool MDL_lock::visit_subgraph(MDL_ticket *waiting_ticket,
3800                               MDL_wait_for_graph_visitor *gvisitor) {
3801   MDL_ticket *ticket;
3802   MDL_context *src_ctx = waiting_ticket->get_ctx();
3803   bool result = true;
3804 
3805   mysql_prlock_rdlock(&m_rwlock);
3806 
3807   /*
3808     Iterators must be initialized after taking a read lock.
3809 
3810     Note that MDL_ticket's which correspond to lock requests satisfied
3811     on "fast path" are not present in m_granted list and thus
3812     corresponding edges are missing from wait-for graph.
3813     It is OK since contexts with "fast path" tickets are not allowed to
3814     wait for any resource (they have to convert "fast path" tickets to
3815     normal tickets first) and thus cannot participate in deadlock.
3816     @sa MDL_contex::will_wait_for().
3817   */
3818   Ticket_iterator granted_it(m_granted);
3819   Ticket_iterator waiting_it(m_waiting);
3820 
3821   /*
3822     MDL_lock's waiting and granted queues and MDL_context::m_waiting_for
3823     member are updated by different threads when the lock is granted
3824     (see MDL_context::acquire_lock() and MDL_lock::reschedule_waiters()).
3825     As a result, here we may encounter a situation when MDL_lock data
3826     already reflects the fact that the lock was granted but
3827     m_waiting_for member has not been updated yet.
3828 
3829     For example, imagine that:
3830 
3831     thread1: Owns SNW lock on table t1.
3832     thread2: Attempts to acquire SW lock on t1,
3833              but sees an active SNW lock.
3834              Thus adds the ticket to the waiting queue and
3835              sets m_waiting_for to point to the ticket.
3836     thread1: Releases SNW lock, updates MDL_lock object to
3837              grant SW lock to thread2 (moves the ticket for
3838              SW from waiting to the active queue).
3839              Attempts to acquire a new SNW lock on t1,
3840              sees an active SW lock (since it is present in the
3841              active queue), adds ticket for SNW lock to the waiting
3842              queue, sets m_waiting_for to point to this ticket.
3843 
3844     At this point deadlock detection algorithm run by thread1 will see that:
3845     - Thread1 waits for SNW lock on t1 (since m_waiting_for is set).
3846     - SNW lock is not granted, because it conflicts with active SW lock
3847       owned by thread 2 (since ticket for SW is present in granted queue).
3848     - Thread2 waits for SW lock (since its m_waiting_for has not been
3849       updated yet!).
3850     - SW lock is not granted because there is pending SNW lock from thread1.
3851       Therefore deadlock should exist [sic!].
3852 
3853     To avoid detection of such false deadlocks we need to check the "actual"
3854     status of the ticket being waited for, before analyzing its blockers.
3855     We do this by checking the wait status of the context which is waiting
3856     for it. To avoid races this has to be done under protection of
3857     MDL_lock::m_rwlock lock.
3858   */
3859   if (src_ctx->m_wait.get_status() != MDL_wait::WS_EMPTY) {
3860     result = false;
3861     goto end;
3862   }
3863 
3864   /*
3865     To avoid visiting nodes which were already marked as victims of
3866     deadlock detection (or whose requests were already satisfied) we
3867     enter the node only after peeking at its wait status.
3868     This is necessary to avoid active waiting in a situation
3869     when previous searches for a deadlock already selected the
3870     node we're about to enter as a victim (see the comment
3871     in MDL_context::find_deadlock() for explanation why several searches
3872     can be performed for the same wait).
3873     There is no guarantee that the node isn't chosen a victim while we
3874     are visiting it but this is OK: in the worst case we might do some
3875     extra work and one more context might be chosen as a victim.
3876   */
3877   if (gvisitor->enter_node(src_ctx)) goto end;
3878 
3879   /*
3880     We do a breadth-first search first -- that is, inspect all
3881     edges of the current node, and only then follow up to the next
3882     node. In workloads that involve wait-for graph loops this
3883     has proven to be a more efficient strategy [citation missing].
3884   */
3885   while ((ticket = granted_it++)) {
3886     /* Filter out edges that point to the same node. */
3887     if (ticket->get_ctx() != src_ctx &&
3888         ticket->is_incompatible_when_granted(waiting_ticket->get_type()) &&
3889         gvisitor->inspect_edge(ticket->get_ctx())) {
3890       goto end_leave_node;
3891     }
3892   }
3893 
3894   while ((ticket = waiting_it++)) {
3895     /* Filter out edges that point to the same node. */
3896     if (ticket->get_ctx() != src_ctx &&
3897         ticket->is_incompatible_when_waiting(waiting_ticket->get_type()) &&
3898         gvisitor->inspect_edge(ticket->get_ctx())) {
3899       goto end_leave_node;
3900     }
3901   }
3902 
3903   /* Recurse and inspect all adjacent nodes. */
3904   granted_it.rewind();
3905   while ((ticket = granted_it++)) {
3906     if (ticket->get_ctx() != src_ctx &&
3907         ticket->is_incompatible_when_granted(waiting_ticket->get_type()) &&
3908         ticket->get_ctx()->visit_subgraph(gvisitor)) {
3909       goto end_leave_node;
3910     }
3911   }
3912 
3913   waiting_it.rewind();
3914   while ((ticket = waiting_it++)) {
3915     if (ticket->get_ctx() != src_ctx &&
3916         ticket->is_incompatible_when_waiting(waiting_ticket->get_type()) &&
3917         ticket->get_ctx()->visit_subgraph(gvisitor)) {
3918       goto end_leave_node;
3919     }
3920   }
3921 
3922   result = false;
3923 
3924 end_leave_node:
3925   gvisitor->leave_node(src_ctx);
3926 
3927 end:
3928   mysql_prlock_unlock(&m_rwlock);
3929   return result;
3930 }
3931 
3932 /**
3933   Traverse a portion of wait-for graph which is reachable
3934   through the edge represented by this ticket and search
3935   for deadlocks.
3936 
3937   @retval true  A deadlock is found. A pointer to deadlock
3938                  victim is saved in the visitor.
3939   @retval false
3940 */
3941 
accept_visitor(MDL_wait_for_graph_visitor * gvisitor)3942 bool MDL_ticket::accept_visitor(MDL_wait_for_graph_visitor *gvisitor) {
3943   return m_lock->visit_subgraph(this, gvisitor);
3944 }
3945 
3946 /**
3947   A fragment of recursive traversal of the wait-for graph of
3948   MDL contexts in the server in search for deadlocks.
3949   Assume this MDL context is a node in the wait-for graph,
3950   and direct the visitor to all adjacent nodes. As long
3951   as the starting node is remembered in the visitor, a
3952   deadlock is found when the same node is visited twice.
3953   One MDL context is connected to another in the wait-for
3954   graph if it waits on a resource that is held by the other
3955   context.
3956 
3957   @retval true  A deadlock is found. A pointer to deadlock
3958                 victim is saved in the visitor.
3959   @retval false
3960 */
3961 
visit_subgraph(MDL_wait_for_graph_visitor * gvisitor)3962 bool MDL_context::visit_subgraph(MDL_wait_for_graph_visitor *gvisitor) {
3963   bool result = false;
3964 
3965   mysql_prlock_rdlock(&m_LOCK_waiting_for);
3966 
3967   if (m_waiting_for) result = m_waiting_for->accept_visitor(gvisitor);
3968 
3969   mysql_prlock_unlock(&m_LOCK_waiting_for);
3970 
3971   return result;
3972 }
3973 
3974 /**
3975   Try to find a deadlock. This function produces no errors.
3976 
3977   @note If during deadlock resolution context which performs deadlock
3978         detection is chosen as a victim it will be informed about the
3979         fact by setting VICTIM status to its wait slot.
3980 */
3981 
find_deadlock()3982 void MDL_context::find_deadlock() {
3983   while (true) {
3984     /*
3985       The fact that we use fresh instance of gvisitor for each
3986       search performed by find_deadlock() below is important,
3987       the code responsible for victim selection relies on this.
3988     */
3989     Deadlock_detection_visitor dvisitor(this);
3990     MDL_context *victim;
3991 
3992     if (!visit_subgraph(&dvisitor)) {
3993       /* No deadlocks are found! */
3994       break;
3995     }
3996 
3997     victim = dvisitor.get_victim();
3998 
3999     /*
4000       Failure to change status of the victim is OK as it means
4001       that the victim has received some other message and is
4002       about to stop its waiting/to break deadlock loop.
4003       Even when the initiator of the deadlock search is
4004       chosen the victim, we need to set the respective wait
4005       result in order to "close" it for any attempt to
4006       schedule the request.
4007       This is needed to avoid a possible race during
4008       cleanup in case when the lock request on which the
4009       context was waiting is concurrently satisfied.
4010     */
4011     (void)victim->m_wait.set_status(MDL_wait::VICTIM);
4012     victim->unlock_deadlock_victim();
4013 
4014     if (victim == this) break;
4015     /*
4016       After adding a new edge to the waiting graph we found that it
4017       creates a loop (i.e. there is a deadlock). We decided to destroy
4018       this loop by removing an edge, but not the one that we added.
4019       Since this doesn't guarantee that all loops created by addition
4020       of the new edge are destroyed, we have to repeat the search.
4021     */
4022   }
4023 }
4024 
4025 /**
4026   Release lock.
4027 
4028   @param duration Lock duration.
4029   @param ticket   Ticket for lock to be released.
4030 
4031 */
4032 
release_lock(enum_mdl_duration duration,MDL_ticket * ticket)4033 void MDL_context::release_lock(enum_mdl_duration duration, MDL_ticket *ticket) {
4034   MDL_lock *lock = ticket->m_lock;
4035   MDL_key key_for_hton;
4036   DBUG_TRACE;
4037   DBUG_PRINT("enter", ("db=%s name=%s", lock->key.db_name(), lock->key.name()));
4038 
4039   DBUG_ASSERT(this == ticket->get_ctx());
4040   mysql_mutex_assert_not_owner(&LOCK_open);
4041 
4042   /*
4043     If lock we are about to release requires post-release notification
4044     of SEs, we need to save its MDL_key on stack. This is necessary to
4045     be able to pass this key to SEs after corresponding MDL_lock object
4046     might be freed as result of lock release.
4047   */
4048   if (ticket->m_hton_notified) key_for_hton.mdl_key_init(&lock->key);
4049 
4050   if (ticket->m_is_fast_path) {
4051     /*
4052       We are releasing ticket which represents lock request which was
4053       satisfied using "fast path". We can use "fast path" release
4054       algorithm of release for it as well.
4055     */
4056     MDL_lock::fast_path_state_t unobtrusive_lock_increment =
4057         lock->get_unobtrusive_lock_increment(ticket->get_type());
4058     bool is_singleton = mdl_locks.is_lock_object_singleton(&lock->key);
4059 
4060     /* We should not have "fast path" tickets for "obtrusive" lock types. */
4061     DBUG_ASSERT(unobtrusive_lock_increment != 0);
4062 
4063     /*
4064       We need decrement part of m_fast_path_state which holds number of
4065       acquired "fast path" locks of this type. This needs to be done
4066       by atomic compare-and-swap.
4067 
4068       The same atomic compare-and-swap needs to check:
4069 
4070       *) If HAS_OBSTRUSIVE flag is set. In this case we need to acquire
4071          MDL_lock::m_rwlock before changing m_fast_path_state. This is
4072          needed to enforce invariant [INV1] and also because we might
4073          have to atomically wake-up some waiters for our "unobtrusive"
4074          lock to go away.
4075       *) If we are about to release last "fast path" lock and there
4076          are no "slow path" locks. In this case we need to count
4077          MDL_lock object as unused and maybe even delete some
4078          unused MDL_lock objects eventually.
4079 
4080       Similarly to the case with "fast path" acquisition it is OK to
4081       perform ordinary read of MDL_lock::m_fast_path_state as correctness
4082       of value returned by it will be validated by atomic compare-and-swap.
4083       Again, in theory, this algorithm will work correctly if the read will
4084       return random values.
4085     */
4086     MDL_lock::fast_path_state_t old_state = lock->m_fast_path_state;
4087     bool last_use;
4088 
4089     do {
4090       if (old_state & MDL_lock::HAS_OBTRUSIVE) {
4091         mysql_prlock_wrlock(&lock->m_rwlock);
4092         /*
4093           It is possible that obtrusive lock has gone away since we have
4094           read m_fast_path_state value. This means that there is possibility
4095           that there are no "slow path" locks (HAS_SLOW_PATH is not set) and
4096           we are about to release last "fast path" lock. In this case MDL_lock
4097           will become unused and needs to be counted as such eventually.
4098         */
4099         last_use = (lock->fast_path_state_add(-unobtrusive_lock_increment) ==
4100                     unobtrusive_lock_increment);
4101         /*
4102           There might be some lock requests waiting for ticket being released
4103           to go away. Since this is "fast path" ticket it represents
4104           "unobtrusive" type of lock. In this case if there are any waiters
4105           for it there should be "obtrusive" type of request among them.
4106         */
4107         if (lock->m_obtrusive_locks_granted_waiting_count)
4108           lock->reschedule_waiters();
4109         mysql_prlock_unlock(&lock->m_rwlock);
4110         goto end_fast_path;
4111       }
4112       /*
4113         If there are no "slow path" locks (HAS_SLOW_PATH is not set) and
4114         we are about to release last "fast path" lock - MDL_lock object
4115         will become unused and needs to be counted as such.
4116       */
4117       last_use = (old_state == unobtrusive_lock_increment);
4118     } while (!lock->fast_path_state_cas(
4119         &old_state, old_state - unobtrusive_lock_increment));
4120 
4121   end_fast_path:
4122     /* Don't count singleton MDL_lock objects as unused. */
4123     if (last_use && !is_singleton) mdl_locks.lock_object_unused(this, m_pins);
4124   } else {
4125     /*
4126       Lock request represented by ticket was acquired using "slow path"
4127       or ticket was materialized later. We need to use "slow path" release.
4128     */
4129     lock->remove_ticket(this, m_pins, &MDL_lock::m_granted, ticket);
4130   }
4131   m_ticket_store.remove(duration, ticket);
4132   if (ticket->m_hton_notified) {
4133     mysql_mdl_set_status(ticket->m_psi, MDL_ticket::POST_RELEASE_NOTIFY);
4134     m_owner->notify_hton_post_release_exclusive(&key_for_hton);
4135   }
4136 
4137   MDL_ticket::destroy(ticket);
4138 }
4139 
4140 /**
4141   Release lock with explicit duration.
4142 
4143   @param ticket   Ticket for lock to be released.
4144 
4145 */
4146 
release_lock(MDL_ticket * ticket)4147 void MDL_context::release_lock(MDL_ticket *ticket) {
4148   DBUG_ASSERT(ticket->m_duration == MDL_EXPLICIT);
4149 
4150   release_lock(MDL_EXPLICIT, ticket);
4151 }
4152 
4153 /**
4154   Release all locks associated with the context. If the sentinel
4155   is not NULL, do not release locks stored in the list after and
4156   including the sentinel.
4157 
4158   Statement and transactional locks are added to the beginning of
4159   the corresponding lists, i.e. stored in reverse temporal order.
4160   This allows to employ this function to:
4161   - back off in case of a lock conflict.
4162   - release all locks in the end of a statement or transaction
4163   - rollback to a savepoint.
4164 */
4165 
release_locks_stored_before(enum_mdl_duration duration,MDL_ticket * sentinel)4166 void MDL_context::release_locks_stored_before(enum_mdl_duration duration,
4167                                               MDL_ticket *sentinel) {
4168   DBUG_TRACE;
4169 
4170   MDL_ticket *ticket;
4171   MDL_ticket_store::List_iterator it = m_ticket_store.list_iterator(duration);
4172   if (m_ticket_store.is_empty(duration)) {
4173     return;
4174   }
4175 
4176   while ((ticket = it++) && ticket != sentinel) {
4177     DBUG_PRINT("info", ("found lock to release ticket=%p", ticket));
4178     release_lock(duration, ticket);
4179   }
4180 }
4181 
4182 /**
4183   Release all explicit locks in the context which correspond to the
4184   same name/object as this lock request.
4185 
4186   @param name      One of the locks for the name/object for which all
4187                    locks should be released.
4188 */
4189 
release_all_locks_for_name(MDL_ticket * name)4190 void MDL_context::release_all_locks_for_name(MDL_ticket *name) {
4191   /* Use MDL_ticket::m_lock to identify other locks for the same object. */
4192   MDL_lock *lock = name->m_lock;
4193 
4194   /* Remove matching lock tickets from the context. */
4195   MDL_ticket *ticket;
4196   MDL_ticket_store::List_iterator it_ticket =
4197       m_ticket_store.list_iterator(MDL_EXPLICIT);
4198 
4199   while ((ticket = it_ticket++)) {
4200     DBUG_ASSERT(ticket->m_lock);
4201     if (ticket->m_lock == lock) release_lock(MDL_EXPLICIT, ticket);
4202   }
4203 }
4204 
4205 /**
4206   Release all explicit locks in the context for which the release()
4207   method of the provided visitor evalates to true.
4208 
4209   @param visitor   Object to ask if the lock should be released.
4210 */
4211 
release_locks(MDL_release_locks_visitor * visitor)4212 void MDL_context::release_locks(MDL_release_locks_visitor *visitor) {
4213   /* Remove matching lock tickets from the context. */
4214   MDL_ticket *ticket;
4215   MDL_ticket_store::List_iterator it_ticket =
4216       m_ticket_store.list_iterator(MDL_EXPLICIT);
4217 
4218   while ((ticket = it_ticket++)) {
4219     DBUG_ASSERT(ticket->m_lock);
4220     if (visitor->release(ticket)) release_lock(MDL_EXPLICIT, ticket);
4221   }
4222 }
4223 
4224 /**
4225   Downgrade an EXCLUSIVE or SHARED_NO_WRITE lock to shared metadata lock.
4226 
4227   @param new_type  Type of lock to which exclusive lock should be downgraded.
4228 */
4229 
downgrade_lock(enum_mdl_type new_type)4230 void MDL_ticket::downgrade_lock(enum_mdl_type new_type) {
4231   bool new_type_is_unobtrusive;
4232   mysql_mutex_assert_not_owner(&LOCK_open);
4233   DBUG_ASSERT(m_lock->m_strategy->legal_type[new_type]);
4234 
4235   /*
4236     Do nothing if already downgraded. Used when we FLUSH TABLE under
4237     LOCK TABLES and a table is listed twice in LOCK TABLES list.
4238     Note that this code might even try to "downgrade" a weak lock
4239     (e.g. SW) to a stronger one (e.g SNRW). So we can't even assert
4240     here that target lock is weaker than existing lock.
4241   */
4242   if (m_type == new_type || !has_stronger_or_equal_type(new_type)) return;
4243 
4244   /* Only allow downgrade from EXCLUSIVE and SHARED_NO_WRITE. */
4245   DBUG_ASSERT(m_type == MDL_EXCLUSIVE || m_type == MDL_SHARED_NO_WRITE);
4246 
4247   /* Below we assume that we always downgrade "obtrusive" locks. */
4248   DBUG_ASSERT(m_lock->is_obtrusive_lock(m_type));
4249 
4250   new_type_is_unobtrusive = !m_lock->is_obtrusive_lock(new_type);
4251 
4252   mysql_prlock_wrlock(&m_lock->m_rwlock);
4253   /*
4254     To update state of MDL_lock object correctly we need to temporarily
4255     exclude ticket from the granted queue and then include it back.
4256 
4257     Since we downgrade only "obtrusive" locks we can always assume that the
4258     ticket for the lock being downgraded is a "slow path" ticket.
4259     If we are downgrading to non-"obtrusive" lock we also should decrement
4260     counter of waiting and granted "obtrusive" locks.
4261   */
4262   m_lock->m_granted.remove_ticket(this);
4263   if (new_type_is_unobtrusive) {
4264     if ((--m_lock->m_obtrusive_locks_granted_waiting_count) == 0) {
4265       /*
4266         We are downgrading the last "obtrusive" lock. So we need to clear
4267         HAS_OBTRUSIVE flag.
4268         Note that it doesn't matter that we do this before including ticket
4269         to MDL_lock::m_granted list. Threads requesting "obtrusive" locks
4270         won't see this until MDL_lock::m_rwlock is released. And threads
4271         requesting "unobtrusive" locks don't care about this ticket.
4272       */
4273       MDL_lock::fast_path_state_t old_state = m_lock->m_fast_path_state;
4274       while (!m_lock->fast_path_state_cas(
4275           &old_state, old_state & ~MDL_lock::HAS_OBTRUSIVE)) {
4276       }
4277     }
4278   }
4279   m_type = new_type;
4280   m_lock->m_granted.add_ticket(this);
4281   m_lock->reschedule_waiters();
4282   mysql_prlock_unlock(&m_lock->m_rwlock);
4283 
4284   /*
4285     When we downgrade X lock for which SEs were notified about lock
4286     acquisition we treat situation in the same way as lock release
4287     (from SEs notification point of view).
4288   */
4289   if (m_hton_notified) {
4290     mysql_mdl_set_status(m_psi, MDL_ticket::POST_RELEASE_NOTIFY);
4291     m_ctx->get_owner()->notify_hton_post_release_exclusive(&m_lock->key);
4292     m_hton_notified = false;
4293     mysql_mdl_set_status(m_psi, MDL_ticket::GRANTED);
4294   }
4295 }
4296 
4297 /**
4298   Auxiliary function which allows to check if we have some kind of lock on
4299   a object. Returns true if we have a lock of an equal to given or stronger
4300   type.
4301 
4302   @param mdl_key       Key to check for ownership
4303   @param mdl_type      Lock type. Pass in the weakest type to find
4304                        out if there is at least some lock.
4305 
4306   @return TRUE if current context contains satisfied lock for the object,
4307           FALSE otherwise.
4308 */
4309 
owns_equal_or_stronger_lock(const MDL_key * mdl_key,enum_mdl_type mdl_type)4310 bool MDL_context::owns_equal_or_stronger_lock(const MDL_key *mdl_key,
4311                                               enum_mdl_type mdl_type) {
4312   MDL_request mdl_request;
4313   enum_mdl_duration not_used;
4314   /* We don't care about exact duration of lock here. */
4315   MDL_REQUEST_INIT_BY_KEY(&mdl_request, mdl_key, mdl_type, MDL_TRANSACTION);
4316   MDL_ticket *ticket = find_ticket(&mdl_request, &not_used);
4317 
4318   DBUG_ASSERT(ticket == nullptr || ticket->m_lock);
4319 
4320   return ticket;
4321 }
4322 
4323 /**
4324   Auxiliary function which allows to check if we have some kind of lock on
4325   a object. Returns TRUE if we have a lock of an equal to given or stronger
4326   type.
4327 
4328   @param mdl_namespace Id of object namespace
4329   @param db            Name of the database
4330   @param name          Name of the object
4331   @param mdl_type      Lock type. Pass in the weakest type to find
4332                        out if there is at least some lock.
4333 
4334   @return true if current context contains satisfied lock for the object,
4335           false otherwise.
4336 */
4337 
owns_equal_or_stronger_lock(MDL_key::enum_mdl_namespace mdl_namespace,const char * db,const char * name,enum_mdl_type mdl_type)4338 bool MDL_context::owns_equal_or_stronger_lock(
4339     MDL_key::enum_mdl_namespace mdl_namespace, const char *db, const char *name,
4340     enum_mdl_type mdl_type) {
4341   MDL_request mdl_request;
4342   enum_mdl_duration not_used;
4343   /* We don't care about exact duration of lock here. */
4344   MDL_REQUEST_INIT(&mdl_request, mdl_namespace, db, name, mdl_type,
4345                    MDL_TRANSACTION);
4346   MDL_ticket *ticket = find_ticket(&mdl_request, &not_used);
4347 
4348   DBUG_ASSERT(ticket == nullptr || ticket->m_lock);
4349 
4350   return ticket;
4351 }
4352 
4353 /**
4354   Find the first context which owns the lock and inspect it by
4355   calling MDL_context_visitor::visit_context() method.
4356 
4357   @return True in case error. False otherwise. There
4358           is no guarantee that owner was found in either case.
4359   @note This method only works properly for locks which were
4360         acquired using "slow" path.
4361 */
4362 
find_lock_owner(const MDL_key * mdl_key,MDL_context_visitor * visitor)4363 bool MDL_context::find_lock_owner(const MDL_key *mdl_key,
4364                                   MDL_context_visitor *visitor) {
4365   MDL_lock *lock = nullptr;
4366   MDL_context *owner;
4367   bool pinned;
4368 
4369   if (fix_pins()) return true;
4370 
4371 retry:
4372   if ((lock = mdl_locks.find(m_pins, mdl_key, &pinned)) == MY_LF_ERRPTR)
4373     return true;
4374 
4375   /* No MDL_lock object, no owner, nothing to visit. */
4376   if (lock == nullptr) return false;
4377 
4378   mysql_prlock_rdlock(&lock->m_rwlock);
4379 
4380   if (lock->m_fast_path_state & MDL_lock::IS_DESTROYED) {
4381     mysql_prlock_unlock(&lock->m_rwlock);
4382     if (pinned) lf_hash_search_unpin(m_pins);
4383     DEBUG_SYNC(get_thd(), "mdl_find_lock_owner_is_destroyed");
4384     goto retry;
4385   }
4386 
4387   if (pinned) lf_hash_search_unpin(m_pins);
4388 
4389   if ((owner = lock->get_lock_owner())) visitor->visit_context(owner);
4390 
4391   mysql_prlock_unlock(&lock->m_rwlock);
4392 
4393   return false;
4394 }
4395 
4396 /**
4397   Check if we have any pending locks which conflict with existing shared lock.
4398 
4399   @pre The ticket must match an acquired lock.
4400 
4401   @return true if there is a conflicting lock request, false otherwise.
4402 */
4403 
has_pending_conflicting_lock() const4404 bool MDL_ticket::has_pending_conflicting_lock() const {
4405   return m_lock->has_pending_conflicting_lock(m_type);
4406 }
4407 
4408 /** Return a key identifying this lock. */
4409 
get_key() const4410 const MDL_key *MDL_ticket::get_key() const { return &m_lock->key; }
4411 
4412 /**
4413   Releases metadata locks that were acquired after a specific savepoint.
4414 
4415   @note Used to release tickets acquired during a savepoint unit.
4416   @note It's safe to iterate and unlock any locks after taken after this
4417         savepoint because other statements that take other special locks
4418         cause a implicit commit (ie LOCK TABLES).
4419 */
4420 
rollback_to_savepoint(const MDL_savepoint & mdl_savepoint)4421 void MDL_context::rollback_to_savepoint(const MDL_savepoint &mdl_savepoint) {
4422   DBUG_TRACE;
4423 
4424   /* If savepoint is NULL, it is from the start of the transaction. */
4425   release_locks_stored_before(MDL_STATEMENT, mdl_savepoint.m_stmt_ticket);
4426   release_locks_stored_before(MDL_TRANSACTION, mdl_savepoint.m_trans_ticket);
4427 }
4428 
4429 /**
4430   Release locks acquired by normal statements (SELECT, UPDATE,
4431   DELETE, etc) in the course of a transaction. Do not release
4432   HANDLER locks, if there are any.
4433 
4434   This method is used at the end of a transaction, in
4435   implementation of COMMIT (implicit or explicit) and ROLLBACK.
4436 */
4437 
release_transactional_locks()4438 void MDL_context::release_transactional_locks() {
4439   DBUG_TRACE;
4440   release_locks_stored_before(MDL_STATEMENT, nullptr);
4441   release_locks_stored_before(MDL_TRANSACTION, nullptr);
4442 }
4443 
release_statement_locks()4444 void MDL_context::release_statement_locks() {
4445   DBUG_TRACE;
4446   release_locks_stored_before(MDL_STATEMENT, nullptr);
4447 }
4448 
4449 /**
4450   Does this savepoint have this lock?
4451 
4452   @retval true  The ticket is older than the savepoint or
4453                 is an LT, HA or GLR ticket. Thus it belongs
4454                 to the savepoint or has explicit duration.
4455   @retval false The ticket is newer than the savepoint.
4456                 and is not an LT, HA or GLR ticket.
4457 */
4458 
has_lock(const MDL_savepoint & mdl_savepoint,MDL_ticket * mdl_ticket)4459 bool MDL_context::has_lock(const MDL_savepoint &mdl_savepoint,
4460                            MDL_ticket *mdl_ticket) {
4461   MDL_ticket *ticket;
4462   /* Start from the beginning, most likely mdl_ticket's been just acquired. */
4463 
4464   MDL_ticket_store::List_iterator s_it =
4465       m_ticket_store.list_iterator(MDL_STATEMENT);
4466   MDL_ticket_store::List_iterator t_it =
4467       m_ticket_store.list_iterator(MDL_TRANSACTION);
4468 
4469   while ((ticket = s_it++) && ticket != mdl_savepoint.m_stmt_ticket) {
4470     if (ticket == mdl_ticket) return false;
4471   }
4472 
4473   while ((ticket = t_it++) && ticket != mdl_savepoint.m_trans_ticket) {
4474     if (ticket == mdl_ticket) return false;
4475   }
4476   return true;
4477 }
4478 
4479 /**
4480   Does this context have an lock of the given namespace?
4481 
4482   @retval true    At least one lock of given namespace held
4483   @retval false   No locks of the given namespace held
4484 */
4485 
has_locks(MDL_key::enum_mdl_namespace mdl_namespace) const4486 bool MDL_context::has_locks(MDL_key::enum_mdl_namespace mdl_namespace) const {
4487   MDL_ticket *ticket;
4488 
4489   for (int i = 0; i < MDL_DURATION_END; i++) {
4490     enum_mdl_duration duration = static_cast<enum_mdl_duration>(i);
4491 
4492     MDL_ticket_store::List_iterator it = m_ticket_store.list_iterator(duration);
4493     while ((ticket = it++)) {
4494       if (ticket->get_key()->mdl_namespace() == mdl_namespace) return true;
4495     }
4496   }
4497   return false;
4498 }
4499 
4500 /**
4501   Do we hold any locks which are possibly being waited
4502   for by another MDL_context?
4503 
4504   @retval true  A lock being 'waited_for' was found.
4505   @retval false No one waits for the lock(s) we hold.
4506 
4507   @note Should only be called from the thread which
4508         owns the MDL_context
4509 */
4510 
has_locks_waited_for() const4511 bool MDL_context::has_locks_waited_for() const {
4512   MDL_ticket *ticket;
4513 
4514   for (int i = 0; i < MDL_DURATION_END; i++) {
4515     const enum_mdl_duration duration = static_cast<enum_mdl_duration>(i);
4516 
4517     MDL_ticket_store::List_iterator it = m_ticket_store.list_iterator(duration);
4518     while ((ticket = it++)) {
4519       MDL_lock *const lock = ticket->m_lock;
4520 
4521       mysql_prlock_rdlock(&lock->m_rwlock);
4522       const bool has_waiters = !lock->m_waiting.is_empty();
4523       mysql_prlock_unlock(&lock->m_rwlock);
4524 
4525       if (!has_waiters) return true;
4526     }
4527   }
4528   return false;
4529 }
4530 
4531 /**
4532   Change lock duration for transactional lock.
4533 
4534   @param mdl_ticket Ticket representing lock.
4535   @param duration Lock duration to be set.
4536 
4537   @note This method only supports changing duration of
4538         transactional lock to some other duration.
4539 */
4540 
set_lock_duration(MDL_ticket * mdl_ticket,enum_mdl_duration duration)4541 void MDL_context::set_lock_duration(MDL_ticket *mdl_ticket,
4542                                     enum_mdl_duration duration) {
4543   DBUG_ASSERT(mdl_ticket->m_duration == MDL_TRANSACTION &&
4544               duration != MDL_TRANSACTION);
4545   m_ticket_store.remove(MDL_TRANSACTION, mdl_ticket);
4546   m_ticket_store.push_front(duration, mdl_ticket);
4547 
4548 #ifndef DBUG_OFF
4549   mdl_ticket->m_duration = duration;
4550 #endif
4551 }
4552 
4553 /**
4554   Set explicit duration for all locks in the context.
4555 */
4556 
set_explicit_duration_for_all_locks()4557 void MDL_context::set_explicit_duration_for_all_locks() {
4558   m_ticket_store.move_all_to_explicit_duration();
4559 }
4560 
4561 /**
4562   Set transactional duration for all locks in the context.
4563 */
4564 
set_transaction_duration_for_all_locks()4565 void MDL_context::set_transaction_duration_for_all_locks() {
4566   m_ticket_store.move_explicit_to_transaction_duration();
4567 }
4568 
operator ()(const MDL_key * k) const4569 size_t MDL_ticket_store::Hash::operator()(const MDL_key *k) const {
4570   return static_cast<size_t>(murmur3_32(k->ptr(), k->length(), 0));
4571 }
4572 
find_in_lists(const MDL_request & req) const4573 MDL_ticket_store::MDL_ticket_handle MDL_ticket_store::find_in_lists(
4574     const MDL_request &req) const {
4575   for (int i = 0; i < MDL_DURATION_END; i++) {
4576     int di = (req.duration + i) % MDL_DURATION_END;
4577 
4578     List_iterator it(m_durations[di].m_ticket_list);
4579 
4580     for (MDL_ticket *ticket = it++; ticket != nullptr; ticket = it++) {
4581       if (req.key.is_equal(ticket->get_key()) &&
4582           ticket->has_stronger_or_equal_type(req.type)) {
4583         return {ticket, static_cast<enum_mdl_duration>(di)};
4584       }
4585     }
4586   }
4587   return {nullptr, MDL_DURATION_END};
4588 }
4589 
find_in_hash(const MDL_request & req) const4590 MDL_ticket_store::MDL_ticket_handle MDL_ticket_store::find_in_hash(
4591     const MDL_request &req) const {
4592   auto foundrng = m_map->equal_range(&req.key);
4593 
4594   const MDL_ticket_handle *found_handle = nullptr;
4595   std::find_if(foundrng.first, foundrng.second,
4596                [&](const Ticket_map::value_type &vt) {
4597                  auto &th = vt.second;
4598                  if (!th.m_ticket->has_stronger_or_equal_type(req.type)) {
4599                    return false;
4600                  }
4601                  found_handle = &th;
4602                  return (th.m_dur == req.duration);
4603                });
4604 
4605   if (found_handle != nullptr) {
4606     return *found_handle;
4607   }
4608   return {nullptr, MDL_DURATION_END};
4609 }
4610 
is_empty() const4611 bool MDL_ticket_store::is_empty() const {
4612   return (std::all_of(
4613       std::begin(m_durations), std::end(m_durations), [](const Duration &d) {
4614         DBUG_ASSERT(!d.m_ticket_list.is_empty() || d.m_mat_front == nullptr);
4615         return d.m_ticket_list.is_empty();
4616       }));
4617 }
4618 
is_empty(int di) const4619 bool MDL_ticket_store::is_empty(int di) const {
4620   return m_durations[di].m_ticket_list.is_empty();
4621 }
4622 
front(int di)4623 MDL_ticket *MDL_ticket_store::front(int di) {
4624   return m_durations[di].m_ticket_list.front();
4625 }
4626 
push_front(enum_mdl_duration dur,MDL_ticket * ticket)4627 void MDL_ticket_store::push_front(enum_mdl_duration dur, MDL_ticket *ticket) {
4628   ++m_count;
4629   m_durations[dur].m_ticket_list.push_front(ticket);
4630 
4631   /*
4632     Note that we do not update m_durations[dur].m_mat_front here. That
4633     is ok, since it is only to be used as an optimization for
4634     MDL_context::materialize_fast_path_locks(). So if the ticket being
4635     added is already materialized it does not break an invariant, and
4636     m_mat_front will be updated the next time
4637     MDL_context::materialize_fast_path_locks() runs.
4638   */
4639 
4640   if (m_count < THRESHOLD) {
4641     return;
4642   }
4643 
4644   if (m_count == THRESHOLD) {
4645     // If this is the first time we cross the threshold, the map must
4646     // be allocated
4647     if (m_map == nullptr) {
4648       m_map.reset(new Ticket_map{INITIAL_BUCKET_COUNT, Hash{}, Key_equal{}});
4649     }
4650     // In any event, it should now be empty
4651     DBUG_ASSERT(m_map->empty());
4652 
4653     /*
4654        When the THRESHOLD value is reached, the unordered map must be
4655        populated with all the tickets added before reaching the
4656        threshold.
4657     */
4658     for_each_ticket_in_ticket_lists(
4659         [this](MDL_ticket *t, enum_mdl_duration da) {
4660           m_map->emplace(t->get_key(), MDL_ticket_handle{t, da});
4661         });
4662     DBUG_ASSERT(m_map->size() == m_count);
4663     return;
4664   }
4665 
4666   m_map->emplace(ticket->get_key(), MDL_ticket_handle{ticket, dur});
4667   DBUG_ASSERT(m_map->size() == m_count);
4668 }
4669 
remove(enum_mdl_duration dur,MDL_ticket * ticket)4670 void MDL_ticket_store::remove(enum_mdl_duration dur, MDL_ticket *ticket) {
4671   --m_count;
4672   m_durations[dur].m_ticket_list.remove(ticket);
4673 
4674   if (m_durations[dur].m_mat_front == ticket) {
4675     m_durations[dur].m_mat_front = ticket->next_in_context;
4676   }
4677 
4678   if (m_count < THRESHOLD - 1) {
4679     DBUG_ASSERT(m_map == nullptr || m_map->empty());
4680     return;
4681   }
4682   DBUG_ASSERT(m_map != nullptr && m_map->size() == m_count + 1);
4683 
4684   if (m_count == THRESHOLD - 1) {
4685     // This remove dipped us below threshold
4686     m_map->clear();
4687     return;
4688   }
4689   DBUG_ASSERT(m_count >= THRESHOLD);
4690 
4691   auto foundrng = m_map->equal_range(ticket->get_key());
4692 
4693   auto foundit = std::find_if(
4694       foundrng.first, foundrng.second, [&](const Ticket_map::value_type &vt) {
4695         auto &th = vt.second;
4696         DBUG_ASSERT(th.m_ticket != ticket || th.m_dur == dur);
4697         return (th.m_ticket == ticket);
4698       });
4699   DBUG_ASSERT(foundit != foundrng.second);
4700   if (foundit != foundrng.second) {
4701     m_map->erase(foundit);
4702   }
4703   DBUG_ASSERT(m_map->size() == m_count);
4704 }
4705 
move_all_to_explicit_duration()4706 void MDL_ticket_store::move_all_to_explicit_duration() {
4707   int i;
4708   MDL_ticket *ticket;
4709 
4710   /*
4711     It is assumed that at this point all the locks in the context are
4712     materialized. So the next call to MDL_context::materialize_fast_path_locks()
4713     will not be considering these locks. m_mat_front is valid for the current
4714     list of tickets with explicit duration and if we had added all the tickets
4715     which previously had transaction duration at the front, it would still be
4716     valid. But since we are using the swap-trick below, all the previously
4717     transactional tickets end up at the tail end of the list, and the order of
4718     the already explicit tickets is reversed when they are moved (remove pops
4719     from the front and push_front adds to the front). So unless all the tickets
4720     were already materialized we're guaranteed that m_mat_front is wrong after
4721     the swapping and moving. Hence we set m_mat_front of tickets with explicit
4722     duration to null so that the next call to
4723     MDL_context::materialize_fast_path_locks() will set it appropriately.
4724   */
4725   m_durations[MDL_EXPLICIT].m_mat_front = nullptr;
4726 
4727   /*
4728     In the most common case when this function is called list
4729     of transactional locks is bigger than list of locks with
4730     explicit duration. So we start by swapping these two lists
4731     and then move elements from new list of transactional
4732     locks and list of statement locks to list of locks with
4733     explicit duration.
4734   */
4735 
4736   m_durations[MDL_EXPLICIT].m_ticket_list.swap(
4737       m_durations[MDL_TRANSACTION].m_ticket_list);
4738 
4739   for (i = 0; i < MDL_EXPLICIT; i++) {
4740     m_durations[i].m_mat_front = nullptr;
4741     List_iterator it_ticket(m_durations[i].m_ticket_list);
4742 
4743     while ((ticket = it_ticket++)) {
4744       m_durations[i].m_ticket_list.remove(ticket);
4745       m_durations[MDL_EXPLICIT].m_ticket_list.push_front(ticket);
4746     }
4747   }
4748 
4749 #ifndef DBUG_OFF
4750   List_iterator exp_it(m_durations[MDL_EXPLICIT].m_ticket_list);
4751 
4752   while ((ticket = exp_it++)) {
4753     ticket->set_duration(MDL_EXPLICIT);
4754   }
4755 #endif
4756 
4757   // Must also change duration in map
4758   if (m_map != nullptr) {
4759     for (auto &p : *m_map) {
4760       p.second.m_dur = MDL_EXPLICIT;
4761     }
4762   }
4763 }
4764 
move_explicit_to_transaction_duration()4765 void MDL_ticket_store::move_explicit_to_transaction_duration() {
4766   MDL_ticket *ticket;
4767 
4768   /*
4769     In the most common case when this function is called list
4770     of explicit locks is bigger than two other lists (in fact,
4771     list of statement locks is always empty). So we start by
4772     swapping list of explicit and transactional locks and then
4773     move contents of new list of explicit locks to list of
4774     locks with transactional duration.
4775   */
4776 
4777   DBUG_ASSERT(m_durations[MDL_STATEMENT].m_ticket_list.is_empty());
4778   DBUG_ASSERT(m_durations[MDL_STATEMENT].m_mat_front == nullptr);
4779 
4780   m_durations[MDL_TRANSACTION].m_ticket_list.swap(
4781       m_durations[MDL_EXPLICIT].m_ticket_list);
4782 
4783   m_durations[MDL_EXPLICIT].m_mat_front = nullptr;
4784   List_iterator it_ticket(m_durations[MDL_EXPLICIT].m_ticket_list);
4785 
4786   while ((ticket = it_ticket++)) {
4787     m_durations[MDL_EXPLICIT].m_ticket_list.remove(ticket);
4788     m_durations[MDL_TRANSACTION].m_ticket_list.push_front(ticket);
4789   }
4790   /*
4791     Note that we do not update
4792     m_durations[MDL_TRANSACTION].m_mat_front here. That is ok, since
4793     it is only to be used as an optimization for
4794     MDL_context::materialize_fast_path_locks(). So if the tickets
4795     being added are already materialized it does not break an
4796     invariant, and m_mat_front will be updated the next time
4797     MDL_context::materialize_fast_path_locks() runs.
4798   */
4799 
4800 #ifndef DBUG_OFF
4801   List_iterator trans_it(m_durations[MDL_TRANSACTION].m_ticket_list);
4802 
4803   while ((ticket = trans_it++)) {
4804     ticket->set_duration(MDL_TRANSACTION);
4805   }
4806 #endif
4807 
4808   // Must also change duration in map
4809   if (m_map != nullptr) {
4810     for (auto &p : *m_map) {
4811       p.second.m_dur = MDL_TRANSACTION;
4812     }
4813   }
4814 }
4815 
find(const MDL_request & req) const4816 MDL_ticket_store::MDL_ticket_handle MDL_ticket_store::find(
4817     const MDL_request &req) const {
4818 #ifndef DBUG_OFF
4819   if (m_count >= THRESHOLD) {
4820     MDL_ticket_handle list_h = find_in_lists(req);
4821     MDL_ticket_handle hash_h = find_in_hash(req);
4822 
4823     DBUG_ASSERT(equivalent(list_h.m_ticket, hash_h.m_ticket, req.duration));
4824   }
4825 #endif /*! DBUG_OFF */
4826   return (m_map == nullptr || m_count < THRESHOLD) ? find_in_lists(req)
4827                                                    : find_in_hash(req);
4828 }
4829 
set_materialized()4830 void MDL_ticket_store::set_materialized() {
4831   for (auto &d : m_durations) {
4832     d.m_mat_front = d.m_ticket_list.front();
4833   }
4834 }
4835 
materialized_front(int di)4836 MDL_ticket *MDL_ticket_store::materialized_front(int di) {
4837   return m_durations[di].m_mat_front;
4838 }
4839