1 /* Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
2 
3    This program is free software; you can redistribute it and/or modify
4    it under the terms of the GNU General Public License, version 2.0,
5    as published by the Free Software Foundation.
6 
7    This program is also distributed with certain software (including
8    but not limited to OpenSSL) that is licensed under separate terms,
9    as designated in a particular file or component or in included license
10    documentation.  The authors of MySQL hereby grant you an additional
11    permission to link the program and your derivative works with the
12    separately licensed software that they have included with MySQL.
13 
14    This program is distributed in the hope that it will be useful,
15    but WITHOUT ANY WARRANTY; without even the implied warranty of
16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17    GNU General Public License, version 2.0, for more details.
18 
19    You should have received a copy of the GNU General Public License
20    along with this program; if not, write to the Free Software Foundation,
21    51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA */
22 
23 
24 #include "mdl.h"
25 #include "debug_sync.h"
26 #include "sql_array.h"
27 #include <hash.h>
28 #include <mysqld_error.h>
29 #include <mysql/plugin.h>
30 #include <mysql/service_thd_wait.h>
31 #include <mysql/psi/mysql_stage.h>
32 #include <my_murmur3.h>
33 
34 #ifdef HAVE_PSI_INTERFACE
35 static PSI_mutex_key key_MDL_map_mutex;
36 static PSI_mutex_key key_MDL_wait_LOCK_wait_status;
37 
38 static PSI_mutex_info all_mdl_mutexes[]=
39 {
40   { &key_MDL_map_mutex, "MDL_map::mutex", 0},
41   { &key_MDL_wait_LOCK_wait_status, "MDL_wait::LOCK_wait_status", 0}
42 };
43 
44 static PSI_rwlock_key key_MDL_lock_rwlock;
45 static PSI_rwlock_key key_MDL_context_LOCK_waiting_for;
46 
47 static PSI_rwlock_info all_mdl_rwlocks[]=
48 {
49   { &key_MDL_lock_rwlock, "MDL_lock::rwlock", 0},
50   { &key_MDL_context_LOCK_waiting_for, "MDL_context::LOCK_waiting_for", 0}
51 };
52 
53 static PSI_cond_key key_MDL_wait_COND_wait_status;
54 
55 static PSI_cond_info all_mdl_conds[]=
56 {
57   { &key_MDL_wait_COND_wait_status, "MDL_context::COND_wait_status", 0}
58 };
59 
60 /**
61   Initialise all the performance schema instrumentation points
62   used by the MDL subsystem.
63 */
init_mdl_psi_keys(void)64 static void init_mdl_psi_keys(void)
65 {
66   int count;
67 
68   count= array_elements(all_mdl_mutexes);
69   mysql_mutex_register("sql", all_mdl_mutexes, count);
70 
71   count= array_elements(all_mdl_rwlocks);
72   mysql_rwlock_register("sql", all_mdl_rwlocks, count);
73 
74   count= array_elements(all_mdl_conds);
75   mysql_cond_register("sql", all_mdl_conds, count);
76 
77   MDL_key::init_psi_keys();
78 }
79 #endif /* HAVE_PSI_INTERFACE */
80 
81 
82 /**
83   Thread state names to be used in case when we have to wait on resource
84   belonging to certain namespace.
85 */
86 
87 PSI_stage_info MDL_key::m_namespace_to_wait_state_name[NAMESPACE_END]=
88 {
89   {0, "Waiting for global read lock", 0},
90   {0, "Waiting for schema metadata lock", 0},
91   {0, "Waiting for table metadata lock", 0},
92   {0, "Waiting for stored function metadata lock", 0},
93   {0, "Waiting for stored procedure metadata lock", 0},
94   {0, "Waiting for trigger metadata lock", 0},
95   {0, "Waiting for event metadata lock", 0},
96   {0, "Waiting for commit lock", 0}
97 };
98 
99 #ifdef HAVE_PSI_INTERFACE
init_psi_keys()100 void MDL_key::init_psi_keys()
101 {
102   int i;
103   int count;
104   PSI_stage_info *info MY_ATTRIBUTE((unused));
105 
106   count= array_elements(MDL_key::m_namespace_to_wait_state_name);
107   for (i= 0; i<count; i++)
108   {
109     /* mysql_stage_register wants an array of pointers, registering 1 by 1. */
110     info= & MDL_key::m_namespace_to_wait_state_name[i];
111     mysql_stage_register("sql", &info, 1);
112   }
113 }
114 #endif
115 
116 static bool mdl_initialized= 0;
117 
118 
119 class MDL_object_lock;
120 class MDL_object_lock_cache_adapter;
121 
122 
123 /**
124   A partition in a collection of all MDL locks.
125   MDL_map is partitioned for scalability reasons.
126   Maps MDL_key to MDL_lock instances.
127 */
128 
129 class MDL_map_partition
130 {
131 public:
132   MDL_map_partition();
133   ~MDL_map_partition();
134   inline MDL_lock *find_or_insert(const MDL_key *mdl_key,
135                                   my_hash_value_type hash_value);
136   inline void remove(MDL_lock *lock);
get_key_hash(const MDL_key * mdl_key) const137   my_hash_value_type get_key_hash(const MDL_key *mdl_key) const
138   {
139     return my_calc_hash(&m_locks, mdl_key->ptr(), mdl_key->length());
140   }
141 private:
142   bool move_from_hash_to_lock_mutex(MDL_lock *lock);
143   /** A partition of all acquired locks in the server. */
144   HASH m_locks;
145   /* Protects access to m_locks hash. */
146   mysql_mutex_t m_mutex;
147   /**
148     Cache of (unused) MDL_lock objects available for re-use.
149 
150     On some systems (e.g. Windows XP) constructing/destructing
151     MDL_lock objects can be fairly expensive. We use this cache
152     to avoid these costs in scenarios in which they can have
153     significant negative effect on performance. For example, when
154     there is only one thread constantly executing statements in
155     auto-commit mode and thus constantly causing creation/
156     destruction of MDL_lock objects for the tables it uses.
157 
158     Note that this cache contains only MDL_object_lock objects.
159 
160     Protected by m_mutex mutex.
161   */
162   typedef I_P_List<MDL_object_lock, MDL_object_lock_cache_adapter,
163                    I_P_List_counter>
164           Lock_cache;
165   Lock_cache m_unused_locks_cache;
166 };
167 
168 
169 /**
170   Start-up parameter for the number of partitions of the MDL_lock hash.
171 */
172 ulong mdl_locks_hash_partitions;
173 
174 /**
175   A collection of all MDL locks. A singleton,
176   there is only one instance of the map in the server.
177   Contains instances of MDL_map_partition
178 */
179 
180 class MDL_map
181 {
182 public:
183   void init();
184   void destroy();
185   MDL_lock *find_or_insert(const MDL_key *key);
186   void remove(MDL_lock *lock);
187 private:
188   /** Array of partitions where the locks are actually stored. */
189   Dynamic_array<MDL_map_partition *> m_partitions;
190   /** Pre-allocated MDL_lock object for GLOBAL namespace. */
191   MDL_lock *m_global_lock;
192   /** Pre-allocated MDL_lock object for COMMIT namespace. */
193   MDL_lock *m_commit_lock;
194 };
195 
196 
197 /**
198   A context of the recursive traversal through all contexts
199   in all sessions in search for deadlock.
200 */
201 
202 class Deadlock_detection_visitor: public MDL_wait_for_graph_visitor
203 {
204 public:
Deadlock_detection_visitor(MDL_context * start_node_arg)205   Deadlock_detection_visitor(MDL_context *start_node_arg)
206     : m_start_node(start_node_arg),
207       m_victim(NULL),
208       m_current_search_depth(0),
209       m_found_deadlock(FALSE)
210   {}
211   virtual bool enter_node(MDL_context *node);
212   virtual void leave_node(MDL_context *node);
213 
214   virtual bool inspect_edge(MDL_context *dest);
215 
get_victim() const216   MDL_context *get_victim() const { return m_victim; }
217 private:
218   /**
219     Change the deadlock victim to a new one if it has lower deadlock
220     weight.
221   */
222   void opt_change_victim_to(MDL_context *new_victim);
223 private:
224   /**
225     The context which has initiated the search. There
226     can be multiple searches happening in parallel at the same time.
227   */
228   MDL_context *m_start_node;
229   /** If a deadlock is found, the context that identifies the victim. */
230   MDL_context *m_victim;
231   /** Set to the 0 at start. Increased whenever
232     we descend into another MDL context (aka traverse to the next
233     wait-for graph node). When MAX_SEARCH_DEPTH is reached, we
234     assume that a deadlock is found, even if we have not found a
235     loop.
236   */
237   uint m_current_search_depth;
238   /** TRUE if we found a deadlock. */
239   bool m_found_deadlock;
240   /**
241     Maximum depth for deadlock searches. After this depth is
242     achieved we will unconditionally declare that there is a
243     deadlock.
244 
245     @note This depth should be small enough to avoid stack
246           being exhausted by recursive search algorithm.
247 
248     TODO: Find out what is the optimal value for this parameter.
249           Current value is safe, but probably sub-optimal,
250           as there is an anecdotal evidence that real-life
251           deadlocks are even shorter typically.
252   */
253   static const uint MAX_SEARCH_DEPTH= 32;
254 };
255 
256 
257 /**
258   Enter a node of a wait-for graph. After
259   a node is entered, inspect_edge() will be called
260   for all wait-for destinations of this node. Then
261   leave_node() will be called.
262   We call "enter_node()" for all nodes we inspect,
263   including the starting node.
264 
265   @retval  TRUE  Maximum search depth exceeded.
266   @retval  FALSE OK.
267 */
268 
enter_node(MDL_context * node)269 bool Deadlock_detection_visitor::enter_node(MDL_context *node)
270 {
271   m_found_deadlock= ++m_current_search_depth >= MAX_SEARCH_DEPTH;
272   if (m_found_deadlock)
273   {
274     DBUG_ASSERT(! m_victim);
275     opt_change_victim_to(node);
276   }
277   return m_found_deadlock;
278 }
279 
280 
281 /**
282   Done inspecting this node. Decrease the search
283   depth. If a deadlock is found, and we are
284   backtracking to the start node, optionally
285   change the deadlock victim to one with lower
286   deadlock weight.
287 */
288 
leave_node(MDL_context * node)289 void Deadlock_detection_visitor::leave_node(MDL_context *node)
290 {
291   --m_current_search_depth;
292   if (m_found_deadlock)
293     opt_change_victim_to(node);
294 }
295 
296 
297 /**
298   Inspect a wait-for graph edge from one MDL context to another.
299 
300   @retval TRUE   A loop is found.
301   @retval FALSE  No loop is found.
302 */
303 
inspect_edge(MDL_context * node)304 bool Deadlock_detection_visitor::inspect_edge(MDL_context *node)
305 {
306   m_found_deadlock= node == m_start_node;
307   return m_found_deadlock;
308 }
309 
310 
311 /**
312   Change the deadlock victim to a new one if it has lower deadlock
313   weight.
314 
315   @retval new_victim  Victim is not changed.
316   @retval !new_victim New victim became the current.
317 */
318 
319 void
opt_change_victim_to(MDL_context * new_victim)320 Deadlock_detection_visitor::opt_change_victim_to(MDL_context *new_victim)
321 {
322   if (m_victim == NULL ||
323       m_victim->get_deadlock_weight() >= new_victim->get_deadlock_weight())
324   {
325     /* Swap victims, unlock the old one. */
326     MDL_context *tmp= m_victim;
327     m_victim= new_victim;
328     m_victim->lock_deadlock_victim();
329     if (tmp)
330       tmp->unlock_deadlock_victim();
331   }
332 }
333 
334 
335 /**
336   Get a bit corresponding to enum_mdl_type value in a granted/waiting bitmaps
337   and compatibility matrices.
338 */
339 
340 #define MDL_BIT(A) static_cast<MDL_lock::bitmap_t>(1U << A)
341 
342 /**
343   The lock context. Created internally for an acquired lock.
344   For a given name, there exists only one MDL_lock instance,
345   and it exists only when the lock has been granted.
346   Can be seen as an MDL subsystem's version of TABLE_SHARE.
347 
348   This is an abstract class which lacks information about
349   compatibility rules for lock types. They should be specified
350   in its descendants.
351 */
352 
353 class MDL_lock
354 {
355 public:
356   typedef unsigned short bitmap_t;
357 
358   class Ticket_list
359   {
360   public:
361     typedef I_P_List<MDL_ticket,
362                      I_P_List_adapter<MDL_ticket,
363                                       &MDL_ticket::next_in_lock,
364                                       &MDL_ticket::prev_in_lock>,
365                      I_P_List_null_counter,
366                      I_P_List_fast_push_back<MDL_ticket> >
367             List;
operator const List&() const368     operator const List &() const { return m_list; }
Ticket_list()369     Ticket_list() :m_bitmap(0) {}
370 
371     void add_ticket(MDL_ticket *ticket);
372     void remove_ticket(MDL_ticket *ticket);
is_empty() const373     bool is_empty() const { return m_list.is_empty(); }
bitmap() const374     bitmap_t bitmap() const { return m_bitmap; }
375   private:
376     void clear_bit_if_not_in_list(enum_mdl_type type);
377   private:
378     /** List of tickets. */
379     List m_list;
380     /** Bitmap of types of tickets in this list. */
381     bitmap_t m_bitmap;
382   };
383 
384   typedef Ticket_list::List::Iterator Ticket_iterator;
385 
386 public:
387   /** The key of the object (data) being protected. */
388   MDL_key key;
389   /**
390     Read-write lock protecting this lock context.
391 
392     @note The fact that we use read-write lock prefers readers here is
393           important as deadlock detector won't work correctly otherwise.
394 
395           For example, imagine that we have following waiters graph:
396 
397                        ctxA -> obj1 -> ctxB -> obj1 -|
398                         ^                            |
399                         |----------------------------|
400 
401           and both ctxA and ctxB start deadlock detection process:
402 
403             ctxA read-locks obj1             ctxB read-locks obj2
404             ctxA goes deeper                 ctxB goes deeper
405 
406           Now ctxC comes in who wants to start waiting on obj1, also
407           ctxD comes in who wants to start waiting on obj2.
408 
409             ctxC tries to write-lock obj1   ctxD tries to write-lock obj2
410             ctxC is blocked                 ctxD is blocked
411 
412           Now ctxA and ctxB resume their search:
413 
414             ctxA tries to read-lock obj2    ctxB tries to read-lock obj1
415 
416           If m_rwlock prefers writes (or fair) both ctxA and ctxB would be
417           blocked because of pending write locks from ctxD and ctxC
418           correspondingly. Thus we will get a deadlock in deadlock detector.
419           If m_wrlock prefers readers (actually ignoring pending writers is
420           enough) ctxA and ctxB will continue and no deadlock will occur.
421   */
422   mysql_prlock_t m_rwlock;
423 
is_empty() const424   bool is_empty() const
425   {
426     return (m_granted.is_empty() && m_waiting.is_empty());
427   }
428 
429   virtual const bitmap_t *incompatible_granted_types_bitmap() const = 0;
430   virtual const bitmap_t *incompatible_waiting_types_bitmap() const = 0;
431 
432   bool has_pending_conflicting_lock(enum_mdl_type type);
433 
434   bool can_grant_lock(enum_mdl_type type, MDL_context *requstor_ctx,
435                       bool ignore_lock_priority) const;
436 
437   inline static MDL_lock *create(const MDL_key *key,
438                                  MDL_map_partition *map_part);
439 
440   void reschedule_waiters();
441 
442   void remove_ticket(Ticket_list MDL_lock::*queue, MDL_ticket *ticket);
443 
444   bool visit_subgraph(MDL_ticket *waiting_ticket,
445                       MDL_wait_for_graph_visitor *gvisitor);
446 
447   virtual bool needs_notification(const MDL_ticket *ticket) const = 0;
448   virtual void notify_conflicting_locks(MDL_context *ctx) = 0;
449 
450   virtual bitmap_t hog_lock_types_bitmap() const = 0;
451 
452   /** List of granted tickets for this lock. */
453   Ticket_list m_granted;
454   /** Tickets for contexts waiting to acquire a lock. */
455   Ticket_list m_waiting;
456 
457   /**
458     Number of times high priority lock requests have been granted while
459     low priority lock requests were waiting.
460   */
461   ulong m_hog_lock_count;
462 
463 public:
464 
MDL_lock(const MDL_key * key_arg,MDL_map_partition * map_part)465   MDL_lock(const MDL_key *key_arg, MDL_map_partition *map_part)
466   : key(key_arg),
467     m_hog_lock_count(0),
468     m_ref_usage(0),
469     m_ref_release(0),
470     m_is_destroyed(FALSE),
471     m_version(0),
472     m_map_part(map_part)
473   {
474     mysql_prlock_init(key_MDL_lock_rwlock, &m_rwlock);
475   }
476 
~MDL_lock()477   virtual ~MDL_lock()
478   {
479     mysql_prlock_destroy(&m_rwlock);
480   }
481   inline static void destroy(MDL_lock *lock);
482 public:
483   /**
484     These three members are used to make it possible to separate
485     the MDL_map_partition::m_mutex mutex and MDL_lock::m_rwlock in
486     MDL_map::find_or_insert() for increased scalability.
487     The 'm_is_destroyed' member is only set by destroyers that
488     have both the MDL_map_partition::m_mutex and MDL_lock::m_rwlock, thus
489     holding any of the mutexes is sufficient to read it.
490     The 'm_ref_usage; is incremented under protection by
491     MDL_map_partition::m_mutex, but when 'm_is_destroyed' is set to TRUE, this
492     member is moved to be protected by the MDL_lock::m_rwlock.
493     This means that the MDL_map::find_or_insert() which only
494     holds the MDL_lock::m_rwlock can compare it to 'm_ref_release'
495     without acquiring MDL_map_partition::m_mutex again and if equal
496     it can also destroy the lock object safely.
497     The 'm_ref_release' is incremented under protection by
498     MDL_lock::m_rwlock.
499     Note since we are only interested in equality of these two
500     counters we don't have to worry about overflows as long as
501     their size is big enough to hold maximum number of concurrent
502     threads on the system.
503   */
504   uint m_ref_usage;
505   uint m_ref_release;
506   bool m_is_destroyed;
507   /**
508     We use the same idea and an additional version counter to support
509     caching of unused MDL_lock object for further re-use.
510     This counter is incremented while holding both MDL_map_partition::m_mutex
511     and MDL_lock::m_rwlock locks each time when a MDL_lock is moved from
512     the partitioned hash to the paritioned unused objects list (or destroyed).
513     A thread, which has found a MDL_lock object for the key in the hash
514     and then released the MDL_map_partition::m_mutex before acquiring the
515     MDL_lock::m_rwlock, can determine that this object was moved to the
516     unused objects list (or destroyed) while it held no locks by comparing
517     the version value which it read while holding the MDL_map_partition::m_mutex
518     with the value read after acquiring the MDL_lock::m_rwlock.
519     Note that since it takes several years to overflow this counter such
520     theoretically possible overflows should not have any practical effects.
521   */
522   ulonglong m_version;
523   /**
524     Partition of MDL_map where the lock is stored.
525   */
526   MDL_map_partition *m_map_part;
527 };
528 
529 
530 /**
531   An implementation of the scoped metadata lock. The only locking modes
532   which are supported at the moment are SHARED and INTENTION EXCLUSIVE
533   and EXCLUSIVE
534 */
535 
536 class MDL_scoped_lock : public MDL_lock
537 {
538 public:
MDL_scoped_lock(const MDL_key * key_arg,MDL_map_partition * map_part)539   MDL_scoped_lock(const MDL_key *key_arg, MDL_map_partition *map_part)
540     : MDL_lock(key_arg, map_part)
541   { }
542 
incompatible_granted_types_bitmap() const543   virtual const bitmap_t *incompatible_granted_types_bitmap() const
544   {
545     return m_granted_incompatible;
546   }
incompatible_waiting_types_bitmap() const547   virtual const bitmap_t *incompatible_waiting_types_bitmap() const
548   {
549     return m_waiting_incompatible;
550   }
needs_notification(const MDL_ticket * ticket) const551   virtual bool needs_notification(const MDL_ticket *ticket) const
552   {
553     return (ticket->get_type() == MDL_SHARED);
554   }
555   virtual void notify_conflicting_locks(MDL_context *ctx);
556 
557   /*
558     In scoped locks, only IX lock request would starve because of X/S. But that
559     is practically very rare case. So just return 0 from this function.
560   */
hog_lock_types_bitmap() const561   virtual bitmap_t hog_lock_types_bitmap() const
562   {
563     return 0;
564   }
565 
566 private:
567   static const bitmap_t m_granted_incompatible[MDL_TYPE_END];
568   static const bitmap_t m_waiting_incompatible[MDL_TYPE_END];
569 };
570 
571 
572 /**
573   An implementation of a per-object lock. Supports SHARED, SHARED_UPGRADABLE,
574   SHARED HIGH PRIORITY and EXCLUSIVE locks.
575 */
576 
577 class MDL_object_lock : public MDL_lock
578 {
579 public:
MDL_object_lock(const MDL_key * key_arg,MDL_map_partition * map_part)580   MDL_object_lock(const MDL_key *key_arg, MDL_map_partition *map_part)
581     : MDL_lock(key_arg, map_part)
582   { }
583 
584   /**
585     Reset unused MDL_object_lock object to represent the lock context for a
586     different object.
587   */
reset(const MDL_key * new_key)588   void reset(const MDL_key *new_key)
589   {
590     /* We need to change only object's key. */
591     key.mdl_key_init(new_key);
592     /* m_granted and m_waiting should be already in the empty/initial state. */
593     DBUG_ASSERT(is_empty());
594     /* Object should not be marked as destroyed. */
595     DBUG_ASSERT(! m_is_destroyed);
596     /*
597       Values of the rest of the fields should be preserved between old and
598       new versions of the object. E.g., m_version and m_ref_usage/release
599       should be kept intact to properly handle possible remaining references
600       to the old version of the object.
601     */
602   }
603 
incompatible_granted_types_bitmap() const604   virtual const bitmap_t *incompatible_granted_types_bitmap() const
605   {
606     return m_granted_incompatible;
607   }
incompatible_waiting_types_bitmap() const608   virtual const bitmap_t *incompatible_waiting_types_bitmap() const
609   {
610     return m_waiting_incompatible;
611   }
needs_notification(const MDL_ticket * ticket) const612   virtual bool needs_notification(const MDL_ticket *ticket) const
613   {
614     return (ticket->get_type() >= MDL_SHARED_NO_WRITE);
615   }
616   virtual void notify_conflicting_locks(MDL_context *ctx);
617 
618   /*
619     To prevent starvation, these lock types that are only granted
620     max_write_lock_count times in a row while other lock types are
621     waiting.
622   */
hog_lock_types_bitmap() const623   virtual bitmap_t hog_lock_types_bitmap() const
624   {
625     return (MDL_BIT(MDL_SHARED_NO_WRITE) |
626             MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
627             MDL_BIT(MDL_EXCLUSIVE));
628   }
629 
630 private:
631   static const bitmap_t m_granted_incompatible[MDL_TYPE_END];
632   static const bitmap_t m_waiting_incompatible[MDL_TYPE_END];
633 
634 public:
635   /** Members for linking the object into the list of unused objects. */
636   MDL_object_lock *next_in_cache, **prev_in_cache;
637 };
638 
639 
640 /**
641   Helper class for linking MDL_object_lock objects into the unused objects list.
642 */
643 class MDL_object_lock_cache_adapter :
644       public I_P_List_adapter<MDL_object_lock, &MDL_object_lock::next_in_cache,
645                               &MDL_object_lock::prev_in_cache>
646 {
647 };
648 
649 
650 static MDL_map mdl_locks;
651 /**
652   Start-up parameter for the maximum size of the unused MDL_lock objects cache.
653 */
654 ulong mdl_locks_cache_size;
655 
656 
657 extern "C"
658 {
659 static uchar *
mdl_locks_key(const uchar * record,size_t * length,my_bool not_used MY_ATTRIBUTE ((unused)))660 mdl_locks_key(const uchar *record, size_t *length,
661               my_bool not_used MY_ATTRIBUTE((unused)))
662 {
663   MDL_lock *lock=(MDL_lock*) record;
664   *length= lock->key.length();
665   return (uchar*) lock->key.ptr();
666 }
667 } /* extern "C" */
668 
669 
670 /**
671   Initialize the metadata locking subsystem.
672 
673   This function is called at server startup.
674 
675   In particular, initializes the new global mutex and
676   the associated condition variable: LOCK_mdl and COND_mdl.
677   These locking primitives are implementation details of the MDL
678   subsystem and are private to it.
679 */
680 
mdl_init()681 void mdl_init()
682 {
683   DBUG_ASSERT(! mdl_initialized);
684   mdl_initialized= TRUE;
685 
686 #ifdef HAVE_PSI_INTERFACE
687   init_mdl_psi_keys();
688 #endif
689 
690   mdl_locks.init();
691 }
692 
693 
694 /**
695   Release resources of metadata locking subsystem.
696 
697   Destroys the global mutex and the condition variable.
698   Called at server shutdown.
699 */
700 
mdl_destroy()701 void mdl_destroy()
702 {
703   if (mdl_initialized)
704   {
705     mdl_initialized= FALSE;
706     mdl_locks.destroy();
707   }
708 }
709 
710 
711 /** Initialize the container for all MDL locks. */
712 
init()713 void MDL_map::init()
714 {
715   MDL_key global_lock_key(MDL_key::GLOBAL, "", "");
716   MDL_key commit_lock_key(MDL_key::COMMIT, "", "");
717 
718   m_global_lock= MDL_lock::create(&global_lock_key, NULL);
719   m_commit_lock= MDL_lock::create(&commit_lock_key, NULL);
720 
721   for (uint i= 0; i < mdl_locks_hash_partitions; i++)
722   {
723     MDL_map_partition *part= new (std::nothrow) MDL_map_partition();
724     m_partitions.append(part);
725   }
726 }
727 
728 
729 /**
730   Adapter function which allows to use murmur3 with our HASH implementation.
731 */
732 
murmur3_adapter(const HASH *,const uchar * key,size_t length)733 extern "C" my_hash_value_type murmur3_adapter(const HASH*, const uchar *key,
734                                               size_t length)
735 {
736   return murmur3_32(key, length, 0);
737 }
738 
739 
740 /** Initialize the partition in the container with all MDL locks. */
741 
MDL_map_partition()742 MDL_map_partition::MDL_map_partition()
743 {
744   mysql_mutex_init(key_MDL_map_mutex, &m_mutex, NULL);
745   /*
746     Lower bits of values produced by hash function which is used in 'm_locks'
747     HASH container are also to select specific MDL_map_partition instance.
748     This means that this hash function needs to hash key value in such
749     a way that lower bits in result are sufficiently random. Since standard
750     hash function from 'my_charset_bin' doesn't satisfy this criteria we use
751     MurmurHash3 instead.
752   */
753   my_hash_init3(&m_locks, 0, &my_charset_bin, murmur3_adapter,
754                 16 /* FIXME */, 0, 0, mdl_locks_key,
755                 0, 0);
756 };
757 
758 
759 /**
760   Destroy the container for all MDL locks.
761   @pre It must be empty.
762 */
763 
destroy()764 void MDL_map::destroy()
765 {
766   MDL_lock::destroy(m_global_lock);
767   MDL_lock::destroy(m_commit_lock);
768 
769   while (m_partitions.elements() > 0)
770   {
771     MDL_map_partition *part= m_partitions.pop();
772     delete part;
773   }
774 }
775 
776 
777 /**
778   Destroy the partition in container for all MDL locks.
779   @pre It must be empty.
780 */
781 
~MDL_map_partition()782 MDL_map_partition::~MDL_map_partition()
783 {
784   DBUG_ASSERT(!m_locks.records);
785   mysql_mutex_destroy(&m_mutex);
786   my_hash_free(&m_locks);
787 
788   MDL_object_lock *lock;
789   while ((lock= m_unused_locks_cache.pop_front()))
790     MDL_lock::destroy(lock);
791 }
792 
793 
794 /**
795   Find MDL_lock object corresponding to the key, create it
796   if it does not exist.
797 
798   @retval non-NULL - Success. MDL_lock instance for the key with
799                      locked MDL_lock::m_rwlock.
800   @retval NULL     - Failure (OOM).
801 */
802 
find_or_insert(const MDL_key * mdl_key)803 MDL_lock* MDL_map::find_or_insert(const MDL_key *mdl_key)
804 {
805   MDL_lock *lock;
806 
807   if (mdl_key->mdl_namespace() == MDL_key::GLOBAL ||
808       mdl_key->mdl_namespace() == MDL_key::COMMIT)
809   {
810     /*
811       Avoid locking any m_mutex when lock for GLOBAL or COMMIT namespace is
812       requested. Return pointer to pre-allocated MDL_lock instance instead.
813       Such an optimization allows to save one mutex lock/unlock for any
814       statement changing data.
815 
816       It works since these namespaces contain only one element so keys
817       for them look like '<namespace-id>\0\0'.
818     */
819     DBUG_ASSERT(mdl_key->length() == 3);
820 
821     lock= (mdl_key->mdl_namespace() == MDL_key::GLOBAL) ? m_global_lock :
822                                                           m_commit_lock;
823 
824     mysql_prlock_wrlock(&lock->m_rwlock);
825 
826     return lock;
827   }
828 
829   my_hash_value_type hash_value= m_partitions.at(0)->get_key_hash(mdl_key);
830   uint part_id= hash_value % mdl_locks_hash_partitions;
831   MDL_map_partition *part= m_partitions.at(part_id);
832 
833   return part->find_or_insert(mdl_key, hash_value);
834 }
835 
836 
837 /**
838   Find MDL_lock object corresponding to the key and hash value in
839   MDL_map partition, create it if it does not exist.
840 
841   @retval non-NULL - Success. MDL_lock instance for the key with
842                      locked MDL_lock::m_rwlock.
843   @retval NULL     - Failure (OOM).
844 */
845 
find_or_insert(const MDL_key * mdl_key,my_hash_value_type hash_value)846 MDL_lock* MDL_map_partition::find_or_insert(const MDL_key *mdl_key,
847                                             my_hash_value_type hash_value)
848 {
849   MDL_lock *lock;
850 
851 retry:
852   mysql_mutex_lock(&m_mutex);
853   if (!(lock= (MDL_lock*) my_hash_search_using_hash_value(&m_locks,
854                                                           hash_value,
855                                                           mdl_key->ptr(),
856                                                           mdl_key->length())))
857   {
858     MDL_object_lock *unused_lock= NULL;
859 
860     /*
861       No lock object found so we need to create a new one
862       or reuse an existing unused object.
863     */
864     if (mdl_key->mdl_namespace() != MDL_key::SCHEMA &&
865         m_unused_locks_cache.elements())
866     {
867       /*
868         We need a MDL_object_lock type of object and the unused objects
869         cache has some. Get the first object from the cache and set a new
870         key for it.
871       */
872       DBUG_ASSERT(mdl_key->mdl_namespace() != MDL_key::GLOBAL &&
873                   mdl_key->mdl_namespace() != MDL_key::COMMIT);
874 
875       unused_lock= m_unused_locks_cache.pop_front();
876       unused_lock->reset(mdl_key);
877 
878       lock= unused_lock;
879     }
880     else
881     {
882       lock= MDL_lock::create(mdl_key, this);
883     }
884 
885     if (!lock || my_hash_insert(&m_locks, (uchar*)lock))
886     {
887       if (unused_lock)
888       {
889         /*
890           Note that we can't easily destroy an object from cache here as it
891           still might be referenced by other threads. So we simply put it
892           back into the cache.
893         */
894         m_unused_locks_cache.push_front(unused_lock);
895       }
896       else
897       {
898         MDL_lock::destroy(lock);
899       }
900       mysql_mutex_unlock(&m_mutex);
901       return NULL;
902     }
903   }
904 
905   if (move_from_hash_to_lock_mutex(lock))
906     goto retry;
907 
908   return lock;
909 }
910 
911 
912 /**
913   Release MDL_map_partition::m_mutex mutex and lock MDL_lock::m_rwlock for lock
914   object from the hash. Handle situation when object was released
915   while we held no locks.
916 
917   @retval FALSE - Success.
918   @retval TRUE  - Object was released while we held no mutex, caller
919                   should re-try looking up MDL_lock object in the hash.
920 */
921 
move_from_hash_to_lock_mutex(MDL_lock * lock)922 bool MDL_map_partition::move_from_hash_to_lock_mutex(MDL_lock *lock)
923 {
924   ulonglong version;
925 
926   DBUG_ASSERT(! lock->m_is_destroyed);
927   mysql_mutex_assert_owner(&m_mutex);
928 
929   /*
930     We increment m_ref_usage which is a reference counter protected by
931     MDL_map_partition::m_mutex under the condition it is present in the hash
932     and m_is_destroyed is FALSE.
933   */
934   lock->m_ref_usage++;
935   /* Read value of the version counter under protection of m_mutex lock. */
936   version= lock->m_version;
937   mysql_mutex_unlock(&m_mutex);
938 
939   mysql_prlock_wrlock(&lock->m_rwlock);
940   lock->m_ref_release++;
941 
942   if (unlikely(lock->m_version != version))
943   {
944     /*
945       If the current value of version differs from one that was read while
946       we held m_mutex mutex, this MDL_lock object was moved to the unused
947       objects list or destroyed while we held no locks.
948       We should retry our search. But first we should destroy the MDL_lock
949       object if necessary.
950     */
951     if (unlikely(lock->m_is_destroyed))
952     {
953       /*
954         Object was released while we held no locks, we need to
955         release it if no others hold references to it, while our own
956         reference count ensured that the object as such haven't got
957         its memory released yet. We can also safely compare
958         m_ref_usage and m_ref_release since the object is no longer
959         present in the hash (or unused objects list) so no one will
960         be able to find it and increment m_ref_usage anymore.
961       */
962       uint ref_usage= lock->m_ref_usage;
963       uint ref_release= lock->m_ref_release;
964       mysql_prlock_unlock(&lock->m_rwlock);
965       if (ref_usage == ref_release)
966         MDL_lock::destroy(lock);
967     }
968     else
969     {
970       /*
971         Object was not destroyed but its version has changed.
972         This means that it was moved to the unused objects list
973         (and even might be already re-used). So now it might
974         correspond to a different key, therefore we should simply
975         retry our search.
976       */
977       mysql_prlock_unlock(&lock->m_rwlock);
978     }
979     return TRUE;
980   }
981   return FALSE;
982 }
983 
984 
985 /**
986   Destroy MDL_lock object or delegate this responsibility to
987   whatever thread that holds the last outstanding reference to
988   it.
989 */
990 
remove(MDL_lock * lock)991 void MDL_map::remove(MDL_lock *lock)
992 {
993   if (lock->key.mdl_namespace() == MDL_key::GLOBAL ||
994       lock->key.mdl_namespace() == MDL_key::COMMIT)
995   {
996     /*
997       Never destroy pre-allocated MDL_lock objects for GLOBAL and
998       COMMIT namespaces.
999     */
1000     mysql_prlock_unlock(&lock->m_rwlock);
1001     return;
1002   }
1003 
1004   lock->m_map_part->remove(lock);
1005 }
1006 
1007 
1008 /**
1009   Destroy MDL_lock object belonging to specific MDL_map
1010   partition or delegate this responsibility to whatever
1011   thread that holds the last outstanding reference to it.
1012 */
1013 
remove(MDL_lock * lock)1014 void MDL_map_partition::remove(MDL_lock *lock)
1015 {
1016   mysql_mutex_lock(&m_mutex);
1017   my_hash_delete(&m_locks, (uchar*) lock);
1018   /*
1019     To let threads holding references to the MDL_lock object know that it was
1020     moved to the list of unused objects or destroyed, we increment the version
1021     counter under protection of both MDL_map_partition::m_mutex and
1022     MDL_lock::m_rwlock locks. This allows us to read the version value while
1023     having either one of those locks.
1024   */
1025   lock->m_version++;
1026 
1027   if ((lock->key.mdl_namespace() != MDL_key::SCHEMA) &&
1028       (m_unused_locks_cache.elements() <
1029        mdl_locks_cache_size/mdl_locks_hash_partitions))
1030   {
1031     /*
1032       This is an object of MDL_object_lock type and the cache of unused
1033       objects has not reached its maximum size yet. So instead of destroying
1034       object we move it to the list of unused objects to allow its later
1035       re-use with possibly different key. Any threads holding references to
1036       this object (owning MDL_map_partition::m_mutex or MDL_lock::m_rwlock)
1037       will notice this thanks to the fact that we have changed the
1038       MDL_lock::m_version counter.
1039     */
1040     DBUG_ASSERT(lock->key.mdl_namespace() != MDL_key::GLOBAL &&
1041                 lock->key.mdl_namespace() != MDL_key::COMMIT);
1042 
1043     m_unused_locks_cache.push_front((MDL_object_lock*)lock);
1044     mysql_mutex_unlock(&m_mutex);
1045     mysql_prlock_unlock(&lock->m_rwlock);
1046   }
1047   else
1048   {
1049     /*
1050       Destroy the MDL_lock object, but ensure that anyone that is
1051       holding a reference to the object is not remaining, if so he
1052       has the responsibility to release it.
1053 
1054       Setting of m_is_destroyed to TRUE while holding _both_
1055       MDL_map_partition::m_mutex and MDL_lock::m_rwlock mutexes transfers
1056       the protection of m_ref_usage from MDL_map_partition::m_mutex to
1057       MDL_lock::m_rwlock while removal of the object from the hash
1058       (and cache of unused objects) makes it read-only. Therefore
1059       whoever acquires MDL_lock::m_rwlock next will see the most up
1060       to date version of m_ref_usage.
1061 
1062       This means that when m_is_destroyed is TRUE and we hold the
1063       MDL_lock::m_rwlock we can safely read the m_ref_usage
1064       member.
1065     */
1066     uint ref_usage, ref_release;
1067 
1068     lock->m_is_destroyed= TRUE;
1069     ref_usage= lock->m_ref_usage;
1070     ref_release= lock->m_ref_release;
1071     mysql_mutex_unlock(&m_mutex);
1072     mysql_prlock_unlock(&lock->m_rwlock);
1073     if (ref_usage == ref_release)
1074       MDL_lock::destroy(lock);
1075   }
1076 }
1077 
1078 
1079 /**
1080   Initialize a metadata locking context.
1081 
1082   This is to be called when a new server connection is created.
1083 */
1084 
MDL_context()1085 MDL_context::MDL_context()
1086   :
1087   m_owner(NULL),
1088   m_needs_thr_lock_abort(FALSE),
1089   m_waiting_for(NULL)
1090 {
1091   mysql_prlock_init(key_MDL_context_LOCK_waiting_for, &m_LOCK_waiting_for);
1092 }
1093 
1094 
1095 /**
1096   Destroy metadata locking context.
1097 
1098   Assumes and asserts that there are no active or pending locks
1099   associated with this context at the time of the destruction.
1100 
1101   Currently does nothing. Asserts that there are no pending
1102   or satisfied lock requests. The pending locks must be released
1103   prior to destruction. This is a new way to express the assertion
1104   that all tables are closed before a connection is destroyed.
1105 */
1106 
destroy()1107 void MDL_context::destroy()
1108 {
1109   DBUG_ASSERT(m_tickets[MDL_STATEMENT].is_empty());
1110   DBUG_ASSERT(m_tickets[MDL_TRANSACTION].is_empty());
1111   DBUG_ASSERT(m_tickets[MDL_EXPLICIT].is_empty());
1112 
1113   mysql_prlock_destroy(&m_LOCK_waiting_for);
1114 }
1115 
1116 
1117 /**
1118   Initialize a lock request.
1119 
1120   This is to be used for every lock request.
1121 
1122   Note that initialization and allocation are split into two
1123   calls. This is to allow flexible memory management of lock
1124   requests. Normally a lock request is stored in statement memory
1125   (e.g. is a member of struct TABLE_LIST), but we would also like
1126   to allow allocation of lock requests in other memory roots,
1127   for example in the grant subsystem, to lock privilege tables.
1128 
1129   The MDL subsystem does not own or manage memory of lock requests.
1130 
1131   @param  mdl_namespace  Id of namespace of object to be locked
1132   @param  db             Name of database to which the object belongs
1133   @param  name           Name of of the object
1134   @param  mdl_type       The MDL lock type for the request.
1135 */
1136 
init(MDL_key::enum_mdl_namespace mdl_namespace,const char * db_arg,const char * name_arg,enum_mdl_type mdl_type_arg,enum_mdl_duration mdl_duration_arg)1137 void MDL_request::init(MDL_key::enum_mdl_namespace mdl_namespace,
1138                        const char *db_arg,
1139                        const char *name_arg,
1140                        enum_mdl_type mdl_type_arg,
1141                        enum_mdl_duration mdl_duration_arg)
1142 {
1143   key.mdl_key_init(mdl_namespace, db_arg, name_arg);
1144   type= mdl_type_arg;
1145   duration= mdl_duration_arg;
1146   ticket= NULL;
1147 }
1148 
1149 
1150 /**
1151   Initialize a lock request using pre-built MDL_key.
1152 
1153   @sa MDL_request::init(namespace, db, name, type).
1154 
1155   @param key_arg       The pre-built MDL key for the request.
1156   @param mdl_type_arg  The MDL lock type for the request.
1157 */
1158 
init(const MDL_key * key_arg,enum_mdl_type mdl_type_arg,enum_mdl_duration mdl_duration_arg)1159 void MDL_request::init(const MDL_key *key_arg,
1160                        enum_mdl_type mdl_type_arg,
1161                        enum_mdl_duration mdl_duration_arg)
1162 {
1163   key.mdl_key_init(key_arg);
1164   type= mdl_type_arg;
1165   duration= mdl_duration_arg;
1166   ticket= NULL;
1167 }
1168 
1169 
1170 /**
1171   Auxiliary functions needed for creation/destruction of MDL_lock objects.
1172 
1173   @note Also chooses an MDL_lock descendant appropriate for object namespace.
1174 */
1175 
create(const MDL_key * mdl_key,MDL_map_partition * map_part)1176 inline MDL_lock *MDL_lock::create(const MDL_key *mdl_key,
1177                                   MDL_map_partition *map_part)
1178 {
1179   switch (mdl_key->mdl_namespace())
1180   {
1181     case MDL_key::GLOBAL:
1182     case MDL_key::SCHEMA:
1183     case MDL_key::COMMIT:
1184       return new (std::nothrow) MDL_scoped_lock(mdl_key, map_part);
1185     default:
1186       return new (std::nothrow) MDL_object_lock(mdl_key, map_part);
1187   }
1188 }
1189 
1190 
destroy(MDL_lock * lock)1191 void MDL_lock::destroy(MDL_lock *lock)
1192 {
1193   delete lock;
1194 }
1195 
1196 
1197 /**
1198   Auxiliary functions needed for creation/destruction of MDL_ticket
1199   objects.
1200 
1201   @todo This naive implementation should be replaced with one that saves
1202         on memory allocation by reusing released objects.
1203 */
1204 
create(MDL_context * ctx_arg,enum_mdl_type type_arg,enum_mdl_duration duration_arg)1205 MDL_ticket *MDL_ticket::create(MDL_context *ctx_arg, enum_mdl_type type_arg
1206 #ifndef DBUG_OFF
1207                                , enum_mdl_duration duration_arg
1208 #endif
1209                                )
1210 {
1211   return new (std::nothrow)
1212              MDL_ticket(ctx_arg, type_arg
1213 #ifndef DBUG_OFF
1214                         , duration_arg
1215 #endif
1216                         );
1217 }
1218 
1219 
destroy(MDL_ticket * ticket)1220 void MDL_ticket::destroy(MDL_ticket *ticket)
1221 {
1222   delete ticket;
1223 }
1224 
1225 
1226 /**
1227   Return the 'weight' of this ticket for the
1228   victim selection algorithm. Requests with
1229   lower weight are preferred to requests
1230   with higher weight when choosing a victim.
1231 */
1232 
get_deadlock_weight() const1233 uint MDL_ticket::get_deadlock_weight() const
1234 {
1235   return (m_lock->key.mdl_namespace() == MDL_key::GLOBAL ||
1236           m_type >= MDL_SHARED_UPGRADABLE ?
1237           DEADLOCK_WEIGHT_DDL : DEADLOCK_WEIGHT_DML);
1238 }
1239 
1240 
1241 /** Construct an empty wait slot. */
1242 
MDL_wait()1243 MDL_wait::MDL_wait()
1244   :m_wait_status(EMPTY)
1245 {
1246   mysql_mutex_init(key_MDL_wait_LOCK_wait_status, &m_LOCK_wait_status, NULL);
1247   mysql_cond_init(key_MDL_wait_COND_wait_status, &m_COND_wait_status, NULL);
1248 }
1249 
1250 
1251 /** Destroy system resources. */
1252 
~MDL_wait()1253 MDL_wait::~MDL_wait()
1254 {
1255   mysql_mutex_destroy(&m_LOCK_wait_status);
1256   mysql_cond_destroy(&m_COND_wait_status);
1257 }
1258 
1259 
1260 /**
1261   Set the status unless it's already set. Return FALSE if set,
1262   TRUE otherwise.
1263 */
1264 
set_status(enum_wait_status status_arg)1265 bool MDL_wait::set_status(enum_wait_status status_arg)
1266 {
1267   bool was_occupied= TRUE;
1268   mysql_mutex_lock(&m_LOCK_wait_status);
1269   if (m_wait_status == EMPTY)
1270   {
1271     was_occupied= FALSE;
1272     m_wait_status= status_arg;
1273     mysql_cond_signal(&m_COND_wait_status);
1274   }
1275   mysql_mutex_unlock(&m_LOCK_wait_status);
1276   return was_occupied;
1277 }
1278 
1279 
1280 /** Query the current value of the wait slot. */
1281 
get_status()1282 MDL_wait::enum_wait_status MDL_wait::get_status()
1283 {
1284   enum_wait_status result;
1285   mysql_mutex_lock(&m_LOCK_wait_status);
1286   result= m_wait_status;
1287   mysql_mutex_unlock(&m_LOCK_wait_status);
1288   return result;
1289 }
1290 
1291 
1292 /** Clear the current value of the wait slot. */
1293 
reset_status()1294 void MDL_wait::reset_status()
1295 {
1296   mysql_mutex_lock(&m_LOCK_wait_status);
1297   m_wait_status= EMPTY;
1298   mysql_mutex_unlock(&m_LOCK_wait_status);
1299 }
1300 
1301 
1302 /**
1303   Wait for the status to be assigned to this wait slot.
1304 
1305   @param owner           MDL context owner.
1306   @param abs_timeout     Absolute time after which waiting should stop.
1307   @param set_status_on_timeout TRUE  - If in case of timeout waiting
1308                                        context should close the wait slot by
1309                                        sending TIMEOUT to itself.
1310                                FALSE - Otherwise.
1311   @param wait_state_name  Thread state name to be set for duration of wait.
1312 
1313   @returns Signal posted.
1314 */
1315 
1316 MDL_wait::enum_wait_status
timed_wait(MDL_context_owner * owner,struct timespec * abs_timeout,bool set_status_on_timeout,const PSI_stage_info * wait_state_name)1317 MDL_wait::timed_wait(MDL_context_owner *owner, struct timespec *abs_timeout,
1318                      bool set_status_on_timeout,
1319                      const PSI_stage_info *wait_state_name)
1320 {
1321   PSI_stage_info old_stage;
1322   enum_wait_status result;
1323   int wait_result= 0;
1324 
1325   mysql_mutex_lock(&m_LOCK_wait_status);
1326 
1327   owner->ENTER_COND(&m_COND_wait_status, &m_LOCK_wait_status,
1328                     wait_state_name, & old_stage);
1329   thd_wait_begin(NULL, THD_WAIT_META_DATA_LOCK);
1330   while (!m_wait_status && !owner->is_killed() &&
1331          wait_result != ETIMEDOUT && wait_result != ETIME)
1332   {
1333     wait_result= mysql_cond_timedwait(&m_COND_wait_status, &m_LOCK_wait_status,
1334                                       abs_timeout);
1335   }
1336   thd_wait_end(NULL);
1337 
1338   if (m_wait_status == EMPTY)
1339   {
1340     /*
1341       Wait has ended not due to a status being set from another
1342       thread but due to this connection/statement being killed or a
1343       time out.
1344       To avoid races, which may occur if another thread sets
1345       GRANTED status before the code which calls this method
1346       processes the abort/timeout, we assign the status under
1347       protection of the m_LOCK_wait_status, within the critical
1348       section. An exception is when set_status_on_timeout is
1349       false, which means that the caller intends to restart the
1350       wait.
1351     */
1352     if (owner->is_killed())
1353       m_wait_status= KILLED;
1354     else if (set_status_on_timeout)
1355       m_wait_status= TIMEOUT;
1356   }
1357   result= m_wait_status;
1358 
1359   owner->EXIT_COND(& old_stage);
1360 
1361   return result;
1362 }
1363 
1364 
1365 /**
1366   Clear bit corresponding to the type of metadata lock in bitmap representing
1367   set of such types if list of tickets does not contain ticket with such type.
1368 
1369   @param[in,out]  bitmap  Bitmap representing set of types of locks.
1370   @param[in]      list    List to inspect.
1371   @param[in]      type    Type of metadata lock to look up in the list.
1372 */
1373 
clear_bit_if_not_in_list(enum_mdl_type type)1374 void MDL_lock::Ticket_list::clear_bit_if_not_in_list(enum_mdl_type type)
1375 {
1376   MDL_lock::Ticket_iterator it(m_list);
1377   const MDL_ticket *ticket;
1378 
1379   while ((ticket= it++))
1380     if (ticket->get_type() == type)
1381       return;
1382   m_bitmap&= ~ MDL_BIT(type);
1383 }
1384 
1385 
1386 /**
1387   Add ticket to MDL_lock's list of waiting requests and
1388   update corresponding bitmap of lock types.
1389 */
1390 
add_ticket(MDL_ticket * ticket)1391 void MDL_lock::Ticket_list::add_ticket(MDL_ticket *ticket)
1392 {
1393   /*
1394     Ticket being added to the list must have MDL_ticket::m_lock set,
1395     since for such tickets methods accessing this member might be
1396     called by other threads.
1397   */
1398   DBUG_ASSERT(ticket->get_lock());
1399   /*
1400     Add ticket to the *back* of the queue to ensure fairness
1401     among requests with the same priority.
1402   */
1403   m_list.push_back(ticket);
1404   m_bitmap|= MDL_BIT(ticket->get_type());
1405 }
1406 
1407 
1408 /**
1409   Remove ticket from MDL_lock's list of requests and
1410   update corresponding bitmap of lock types.
1411 */
1412 
remove_ticket(MDL_ticket * ticket)1413 void MDL_lock::Ticket_list::remove_ticket(MDL_ticket *ticket)
1414 {
1415   m_list.remove(ticket);
1416   /*
1417     Check if waiting queue has another ticket with the same type as
1418     one which was removed. If there is no such ticket, i.e. we have
1419     removed last ticket of particular type, then we need to update
1420     bitmap of waiting ticket's types.
1421     Note that in most common case, i.e. when shared lock is removed
1422     from waiting queue, we are likely to find ticket of the same
1423     type early without performing full iteration through the list.
1424     So this method should not be too expensive.
1425   */
1426   clear_bit_if_not_in_list(ticket->get_type());
1427 }
1428 
1429 
1430 /**
1431   Determine waiting contexts which requests for the lock can be
1432   satisfied, grant lock to them and wake them up.
1433 
1434   @note Together with MDL_lock::add_ticket() this method implements
1435         fair scheduling among requests with the same priority.
1436         It tries to grant lock from the head of waiters list, while
1437         add_ticket() adds new requests to the back of this list.
1438 
1439 */
1440 
reschedule_waiters()1441 void MDL_lock::reschedule_waiters()
1442 {
1443   MDL_lock::Ticket_iterator it(m_waiting);
1444   MDL_ticket *ticket;
1445   bool skip_high_priority= false;
1446   bitmap_t hog_lock_types= hog_lock_types_bitmap();
1447 
1448   if (m_hog_lock_count >= max_write_lock_count)
1449   {
1450     /*
1451       If number of successively granted high-prio, strong locks has exceeded
1452       max_write_lock_count give a way to low-prio, weak locks to avoid their
1453       starvation.
1454     */
1455 
1456     if ((m_waiting.bitmap() & ~hog_lock_types) != 0)
1457     {
1458       /*
1459         Even though normally when m_hog_lock_count is non-0 there is
1460         some pending low-prio lock, we still can encounter situation
1461         when m_hog_lock_count is non-0 and there are no pending low-prio
1462         locks. This, for example, can happen when a ticket for pending
1463         low-prio lock was removed from waiters list due to timeout,
1464         and reschedule_waiters() is called after that to update the
1465         waiters queue. m_hog_lock_count will be reset to 0 at the
1466         end of this call in such case.
1467 
1468         Note that it is not an issue if we fail to wake up any pending
1469         waiters for weak locks in the loop below. This would mean that
1470         all of them are either killed, timed out or chosen as a victim
1471         by deadlock resolver, but have not managed to remove ticket
1472         from the waiters list yet. After tickets will be removed from
1473         the waiters queue there will be another call to
1474         reschedule_waiters() with pending bitmap updated to reflect new
1475         state of waiters queue.
1476       */
1477       skip_high_priority= true;
1478     }
1479   }
1480 
1481   /*
1482     Find the first (and hence the oldest) waiting request which
1483     can be satisfied (taking into account priority). Grant lock to it.
1484     Repeat the process for the remainder of waiters.
1485     Note we don't need to re-start iteration from the head of the
1486     list after satisfying the first suitable request as in our case
1487     all compatible types of requests have the same priority.
1488 
1489     TODO/FIXME: We should:
1490                 - Either switch to scheduling without priorities
1491                   which will allow to stop iteration through the
1492                   list of waiters once we found the first ticket
1493                   which can't be  satisfied
1494                 - Or implement some check using bitmaps which will
1495                   allow to stop iteration in cases when, e.g., we
1496                   grant SNRW lock and there are no pending S or
1497                   SH locks.
1498   */
1499   while ((ticket= it++))
1500   {
1501     /*
1502       Skip high-prio, strong locks if earlier we have decided to give way to
1503       low-prio, weaker locks.
1504     */
1505     if (skip_high_priority &&
1506         ((MDL_BIT(ticket->get_type()) & hog_lock_types) != 0))
1507       continue;
1508 
1509     if (can_grant_lock(ticket->get_type(), ticket->get_ctx(),
1510                        skip_high_priority))
1511     {
1512       if (! ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED))
1513       {
1514         /*
1515           Satisfy the found request by updating lock structures.
1516           It is OK to do so even after waking up the waiter since any
1517           session which tries to get any information about the state of
1518           this lock has to acquire MDL_lock::m_rwlock first and thus,
1519           when manages to do so, already sees an updated state of the
1520           MDL_lock object.
1521         */
1522         m_waiting.remove_ticket(ticket);
1523         m_granted.add_ticket(ticket);
1524 
1525         /*
1526           Increase counter of successively granted high-priority strong locks,
1527           if we have granted one.
1528         */
1529         if ((MDL_BIT(ticket->get_type()) & hog_lock_types) != 0)
1530           m_hog_lock_count++;
1531       }
1532       /*
1533         If we could not update the wait slot of the waiter,
1534         it can be due to fact that its connection/statement was
1535         killed or it has timed out (i.e. the slot is not empty).
1536         Since in all such cases the waiter assumes that the lock was
1537         not been granted, we should keep the request in the waiting
1538         queue and look for another request to reschedule.
1539       */
1540     }
1541   }
1542 
1543   if ((m_waiting.bitmap() & ~hog_lock_types) == 0)
1544   {
1545     /*
1546       Reset number of successively granted high-prio, strong locks
1547       if there are no pending low-prio, weak locks.
1548       This ensures:
1549       - That m_hog_lock_count is correctly reset after strong lock
1550       is released and weak locks are granted (or there are no
1551       other lock requests).
1552       - That situation when SNW lock is granted along with some SR
1553       locks, but SW locks are still blocked are handled correctly.
1554       - That m_hog_lock_count is zero in most cases when there are no pending
1555       weak locks (see comment at the start of this method for example of
1556       exception). This allows to save on checks at the start of this method.
1557     */
1558     m_hog_lock_count= 0;
1559   }
1560 }
1561 
1562 
1563 /**
1564   Compatibility (or rather "incompatibility") matrices for scoped metadata
1565   lock. Arrays of bitmaps which elements specify which granted/waiting locks
1566   are incompatible with type of lock being requested.
1567 
1568   The first array specifies if particular type of request can be satisfied
1569   if there is granted scoped lock of certain type.
1570 
1571              | Type of active   |
1572      Request |   scoped lock    |
1573       type   | IS(*)  IX   S  X |
1574     ---------+------------------+
1575     IS       |  +      +   +  + |
1576     IX       |  +      +   -  - |
1577     S        |  +      -   +  - |
1578     X        |  +      -   -  - |
1579 
1580   The second array specifies if particular type of request can be satisfied
1581   if there is already waiting request for the scoped lock of certain type.
1582   I.e. it specifies what is the priority of different lock types.
1583 
1584              |    Pending      |
1585      Request |  scoped lock    |
1586       type   | IS(*)  IX  S  X |
1587     ---------+-----------------+
1588     IS       |  +      +  +  + |
1589     IX       |  +      +  -  - |
1590     S        |  +      +  +  - |
1591     X        |  +      +  +  + |
1592 
1593   Here: "+" -- means that request can be satisfied
1594         "-" -- means that request can't be satisfied and should wait
1595 
1596   (*)  Since intention shared scoped locks are compatible with all other
1597        type of locks we don't even have any accounting for them.
1598 
1599   Note that relation between scoped locks and objects locks requested
1600   by statement is not straightforward and is therefore fully defined
1601   by SQL-layer.
1602   For example, in order to support global read lock implementation
1603   SQL-layer acquires IX lock in GLOBAL namespace for each statement
1604   that can modify metadata or data (i.e. for each statement that
1605   needs SW, SU, SNW, SNRW or X object locks). OTOH, to ensure that
1606   DROP DATABASE works correctly with concurrent DDL, IX metadata locks
1607   in SCHEMA namespace are acquired for DDL statements which can update
1608   metadata in the schema (i.e. which acquire SU, SNW, SNRW and X locks
1609   on schema objects) and aren't acquired for DML.
1610 */
1611 
1612 const MDL_lock::bitmap_t MDL_scoped_lock::m_granted_incompatible[MDL_TYPE_END] =
1613 {
1614   MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED),
1615   MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_INTENTION_EXCLUSIVE), 0, 0, 0, 0, 0, 0,
1616   MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED) | MDL_BIT(MDL_INTENTION_EXCLUSIVE)
1617 };
1618 
1619 const MDL_lock::bitmap_t MDL_scoped_lock::m_waiting_incompatible[MDL_TYPE_END] =
1620 {
1621   MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED),
1622   MDL_BIT(MDL_EXCLUSIVE), 0, 0, 0, 0, 0, 0, 0
1623 };
1624 
1625 
1626 /**
1627   Compatibility (or rather "incompatibility") matrices for per-object
1628   metadata lock. Arrays of bitmaps which elements specify which granted/
1629   waiting locks are incompatible with type of lock being requested.
1630 
1631   The first array specifies if particular type of request can be satisfied
1632   if there is granted lock of certain type.
1633 
1634      Request  |  Granted requests for lock       |
1635       type    | S  SH  SR  SW  SU  SNW  SNRW  X  |
1636     ----------+----------------------------------+
1637     S         | +   +   +   +   +   +    +    -  |
1638     SH        | +   +   +   +   +   +    +    -  |
1639     SR        | +   +   +   +   +   +    -    -  |
1640     SW        | +   +   +   +   +   -    -    -  |
1641     SU        | +   +   +   +   -   -    -    -  |
1642     SNW       | +   +   +   -   -   -    -    -  |
1643     SNRW      | +   +   -   -   -   -    -    -  |
1644     X         | -   -   -   -   -   -    -    -  |
1645     SU -> X   | -   -   -   -   0   0    0    0  |
1646     SNW -> X  | -   -   -   0   0   0    0    0  |
1647     SNRW -> X | -   -   0   0   0   0    0    0  |
1648 
1649   The second array specifies if particular type of request can be satisfied
1650   if there is waiting request for the same lock of certain type. In other
1651   words it specifies what is the priority of different lock types.
1652 
1653      Request  |  Pending requests for lock      |
1654       type    | S  SH  SR  SW  SU  SNW  SNRW  X |
1655     ----------+---------------------------------+
1656     S         | +   +   +   +   +   +     +   - |
1657     SH        | +   +   +   +   +   +     +   + |
1658     SR        | +   +   +   +   +   +     -   - |
1659     SW        | +   +   +   +   +   -     -   - |
1660     SU        | +   +   +   +   +   +     +   - |
1661     SNW       | +   +   +   +   +   +     +   - |
1662     SNRW      | +   +   +   +   +   +     +   - |
1663     X         | +   +   +   +   +   +     +   + |
1664     SU -> X   | +   +   +   +   +   +     +   + |
1665     SNW -> X  | +   +   +   +   +   +     +   + |
1666     SNRW -> X | +   +   +   +   +   +     +   + |
1667 
1668   Here: "+" -- means that request can be satisfied
1669         "-" -- means that request can't be satisfied and should wait
1670         "0" -- means impossible situation which will trigger assert
1671 
1672   @note In cases then current context already has "stronger" type
1673         of lock on the object it will be automatically granted
1674         thanks to usage of the MDL_context::find_ticket() method.
1675 
1676   @note IX locks are excluded since they are not used for per-object
1677         metadata locks.
1678 */
1679 
1680 const MDL_lock::bitmap_t
1681 MDL_object_lock::m_granted_incompatible[MDL_TYPE_END] =
1682 {
1683   0,
1684   MDL_BIT(MDL_EXCLUSIVE),
1685   MDL_BIT(MDL_EXCLUSIVE),
1686   MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE),
1687   MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
1688     MDL_BIT(MDL_SHARED_NO_WRITE),
1689   MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
1690     MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_UPGRADABLE),
1691   MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
1692     MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_UPGRADABLE) |
1693     MDL_BIT(MDL_SHARED_WRITE),
1694   MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
1695     MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_UPGRADABLE) |
1696     MDL_BIT(MDL_SHARED_WRITE) | MDL_BIT(MDL_SHARED_READ),
1697   MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
1698     MDL_BIT(MDL_SHARED_NO_WRITE) | MDL_BIT(MDL_SHARED_UPGRADABLE) |
1699     MDL_BIT(MDL_SHARED_WRITE) | MDL_BIT(MDL_SHARED_READ) |
1700     MDL_BIT(MDL_SHARED_HIGH_PRIO) | MDL_BIT(MDL_SHARED)
1701 };
1702 
1703 
1704 const MDL_lock::bitmap_t
1705 MDL_object_lock::m_waiting_incompatible[MDL_TYPE_END] =
1706 {
1707   0,
1708   MDL_BIT(MDL_EXCLUSIVE),
1709   0,
1710   MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE),
1711   MDL_BIT(MDL_EXCLUSIVE) | MDL_BIT(MDL_SHARED_NO_READ_WRITE) |
1712     MDL_BIT(MDL_SHARED_NO_WRITE),
1713   MDL_BIT(MDL_EXCLUSIVE),
1714   MDL_BIT(MDL_EXCLUSIVE),
1715   MDL_BIT(MDL_EXCLUSIVE),
1716   0
1717 };
1718 
1719 
1720 /**
1721   Check if request for the metadata lock can be satisfied given its
1722   current state.
1723 
1724   @param  type_arg             The requested lock type.
1725   @param  requestor_ctx        The MDL context of the requestor.
1726   @param  ignore_lock_priority Ignore lock priority.
1727 
1728   @retval TRUE   Lock request can be satisfied
1729   @retval FALSE  There is some conflicting lock.
1730 
1731   @note In cases then current context already has "stronger" type
1732         of lock on the object it will be automatically granted
1733         thanks to usage of the MDL_context::find_ticket() method.
1734 */
1735 
1736 bool
can_grant_lock(enum_mdl_type type_arg,MDL_context * requestor_ctx,bool ignore_lock_priority) const1737 MDL_lock::can_grant_lock(enum_mdl_type type_arg,
1738                          MDL_context *requestor_ctx,
1739                          bool ignore_lock_priority) const
1740 {
1741   bool can_grant= FALSE;
1742   bitmap_t waiting_incompat_map= incompatible_waiting_types_bitmap()[type_arg];
1743   bitmap_t granted_incompat_map= incompatible_granted_types_bitmap()[type_arg];
1744 
1745   /*
1746     New lock request can be satisfied iff:
1747     - There are no incompatible types of satisfied requests
1748     in other contexts
1749     - There are no waiting requests which have higher priority
1750     than this request when priority was not ignored.
1751   */
1752   if (ignore_lock_priority || !(m_waiting.bitmap() & waiting_incompat_map))
1753   {
1754     if (! (m_granted.bitmap() & granted_incompat_map))
1755       can_grant= TRUE;
1756     else
1757     {
1758       Ticket_iterator it(m_granted);
1759       MDL_ticket *ticket;
1760 
1761       /* Check that the incompatible lock belongs to some other context. */
1762       while ((ticket= it++))
1763       {
1764         if (ticket->get_ctx() != requestor_ctx &&
1765             ticket->is_incompatible_when_granted(type_arg))
1766           break;
1767       }
1768       if (ticket == NULL)             /* Incompatible locks are our own. */
1769         can_grant= TRUE;
1770     }
1771   }
1772   return can_grant;
1773 }
1774 
1775 
1776 /** Remove a ticket from waiting or pending queue and wakeup up waiters. */
1777 
remove_ticket(Ticket_list MDL_lock::* list,MDL_ticket * ticket)1778 void MDL_lock::remove_ticket(Ticket_list MDL_lock::*list, MDL_ticket *ticket)
1779 {
1780   mysql_prlock_wrlock(&m_rwlock);
1781   (this->*list).remove_ticket(ticket);
1782   if (is_empty())
1783     mdl_locks.remove(this);
1784   else
1785   {
1786     /*
1787       There can be some contexts waiting to acquire a lock
1788       which now might be able to do it. Grant the lock to
1789       them and wake them up!
1790 
1791       We always try to reschedule locks, since there is no easy way
1792       (i.e. by looking at the bitmaps) to find out whether it is
1793       required or not.
1794       In a general case, even when the queue's bitmap is not changed
1795       after removal of the ticket, there is a chance that some request
1796       can be satisfied (due to the fact that a granted request
1797       reflected in the bitmap might belong to the same context as a
1798       pending request).
1799     */
1800     reschedule_waiters();
1801     mysql_prlock_unlock(&m_rwlock);
1802   }
1803 }
1804 
1805 
1806 /**
1807   Check if we have any pending locks which conflict with existing
1808   shared lock.
1809 
1810   @pre The ticket must match an acquired lock.
1811 
1812   @return TRUE if there is a conflicting lock request, FALSE otherwise.
1813 */
1814 
has_pending_conflicting_lock(enum_mdl_type type)1815 bool MDL_lock::has_pending_conflicting_lock(enum_mdl_type type)
1816 {
1817   bool result;
1818 
1819   mysql_mutex_assert_not_owner(&LOCK_open);
1820 
1821   mysql_prlock_rdlock(&m_rwlock);
1822   result= (m_waiting.bitmap() & incompatible_granted_types_bitmap()[type]);
1823   mysql_prlock_unlock(&m_rwlock);
1824   return result;
1825 }
1826 
1827 
~MDL_wait_for_graph_visitor()1828 MDL_wait_for_graph_visitor::~MDL_wait_for_graph_visitor()
1829 {
1830 }
1831 
1832 
~MDL_wait_for_subgraph()1833 MDL_wait_for_subgraph::~MDL_wait_for_subgraph()
1834 {
1835 }
1836 
1837 /**
1838   Check if ticket represents metadata lock of "stronger" or equal type
1839   than specified one. I.e. if metadata lock represented by ticket won't
1840   allow any of locks which are not allowed by specified type of lock.
1841 
1842   @return TRUE  if ticket has stronger or equal type
1843           FALSE otherwise.
1844 */
1845 
has_stronger_or_equal_type(enum_mdl_type type) const1846 bool MDL_ticket::has_stronger_or_equal_type(enum_mdl_type type) const
1847 {
1848   const MDL_lock::bitmap_t *
1849     granted_incompat_map= m_lock->incompatible_granted_types_bitmap();
1850 
1851   return ! (granted_incompat_map[type] & ~(granted_incompat_map[m_type]));
1852 }
1853 
1854 
is_incompatible_when_granted(enum_mdl_type type) const1855 bool MDL_ticket::is_incompatible_when_granted(enum_mdl_type type) const
1856 {
1857   return (MDL_BIT(m_type) &
1858           m_lock->incompatible_granted_types_bitmap()[type]);
1859 }
1860 
1861 
is_incompatible_when_waiting(enum_mdl_type type) const1862 bool MDL_ticket::is_incompatible_when_waiting(enum_mdl_type type) const
1863 {
1864   return (MDL_BIT(m_type) &
1865           m_lock->incompatible_waiting_types_bitmap()[type]);
1866 }
1867 
1868 
1869 /**
1870   Check whether the context already holds a compatible lock ticket
1871   on an object.
1872   Start searching from list of locks for the same duration as lock
1873   being requested. If not look at lists for other durations.
1874 
1875   @param mdl_request  Lock request object for lock to be acquired
1876   @param[out] result_duration  Duration of lock which was found.
1877 
1878   @note Tickets which correspond to lock types "stronger" than one
1879         being requested are also considered compatible.
1880 
1881   @return A pointer to the lock ticket for the object or NULL otherwise.
1882 */
1883 
1884 MDL_ticket *
find_ticket(MDL_request * mdl_request,enum_mdl_duration * result_duration)1885 MDL_context::find_ticket(MDL_request *mdl_request,
1886                          enum_mdl_duration *result_duration)
1887 {
1888   MDL_ticket *ticket;
1889   int i;
1890 
1891   for (i= 0; i < MDL_DURATION_END; i++)
1892   {
1893     enum_mdl_duration duration= (enum_mdl_duration)((mdl_request->duration+i) %
1894                                                     MDL_DURATION_END);
1895     Ticket_iterator it(m_tickets[duration]);
1896 
1897     while ((ticket= it++))
1898     {
1899       if (mdl_request->key.is_equal(&ticket->m_lock->key) &&
1900           ticket->has_stronger_or_equal_type(mdl_request->type))
1901       {
1902         *result_duration= duration;
1903         return ticket;
1904       }
1905     }
1906   }
1907   return NULL;
1908 }
1909 
1910 
1911 /**
1912   Try to acquire one lock.
1913 
1914   Unlike exclusive locks, shared locks are acquired one by
1915   one. This is interface is chosen to simplify introduction of
1916   the new locking API to the system. MDL_context::try_acquire_lock()
1917   is currently used from open_table(), and there we have only one
1918   table to work with.
1919 
1920   This function may also be used to try to acquire an exclusive
1921   lock on a destination table, by ALTER TABLE ... RENAME.
1922 
1923   Returns immediately without any side effect if encounters a lock
1924   conflict. Otherwise takes the lock.
1925 
1926   FIXME: Compared to lock_table_name_if_not_cached() (from 5.1)
1927          it gives slightly more false negatives.
1928 
1929   @param mdl_request [in/out] Lock request object for lock to be acquired
1930 
1931   @retval  FALSE   Success. The lock may have not been acquired.
1932                    Check the ticket, if it's NULL, a conflicting lock
1933                    exists.
1934   @retval  TRUE    Out of resources, an error has been reported.
1935 */
1936 
1937 bool
try_acquire_lock(MDL_request * mdl_request)1938 MDL_context::try_acquire_lock(MDL_request *mdl_request)
1939 {
1940   MDL_ticket *ticket= NULL;
1941 
1942   if (try_acquire_lock_impl(mdl_request, &ticket))
1943     return TRUE;
1944 
1945   if (! mdl_request->ticket)
1946   {
1947     /*
1948       Our attempt to acquire lock without waiting has failed.
1949       Let us release resources which were acquired in the process.
1950       We can't get here if we allocated a new lock object so there
1951       is no need to release it.
1952     */
1953     DBUG_ASSERT(! ticket->m_lock->is_empty());
1954     mysql_prlock_unlock(&ticket->m_lock->m_rwlock);
1955     MDL_ticket::destroy(ticket);
1956   }
1957 
1958   return FALSE;
1959 }
1960 
1961 
1962 /**
1963   Auxiliary method for acquiring lock without waiting.
1964 
1965   @param mdl_request [in/out] Lock request object for lock to be acquired
1966   @param out_ticket  [out]    Ticket for the request in case when lock
1967                               has not been acquired.
1968 
1969   @retval  FALSE   Success. The lock may have not been acquired.
1970                    Check MDL_request::ticket, if it's NULL, a conflicting
1971                    lock exists. In this case "out_ticket" out parameter
1972                    points to ticket which was constructed for the request.
1973                    MDL_ticket::m_lock points to the corresponding MDL_lock
1974                    object and MDL_lock::m_rwlock write-locked.
1975   @retval  TRUE    Out of resources, an error has been reported.
1976 */
1977 
1978 bool
try_acquire_lock_impl(MDL_request * mdl_request,MDL_ticket ** out_ticket)1979 MDL_context::try_acquire_lock_impl(MDL_request *mdl_request,
1980                                    MDL_ticket **out_ticket)
1981 {
1982   MDL_lock *lock;
1983   MDL_key *key= &mdl_request->key;
1984   MDL_ticket *ticket;
1985   enum_mdl_duration found_duration;
1986 
1987   DBUG_ASSERT(mdl_request->type != MDL_EXCLUSIVE ||
1988               is_lock_owner(MDL_key::GLOBAL, "", "", MDL_INTENTION_EXCLUSIVE));
1989   DBUG_ASSERT(mdl_request->ticket == NULL);
1990 
1991   /* Don't take chances in production. */
1992   mdl_request->ticket= NULL;
1993   mysql_mutex_assert_not_owner(&LOCK_open);
1994 
1995   /*
1996     Check whether the context already holds a shared lock on the object,
1997     and if so, grant the request.
1998   */
1999   if ((ticket= find_ticket(mdl_request, &found_duration)))
2000   {
2001     DBUG_ASSERT(ticket->m_lock);
2002     DBUG_ASSERT(ticket->has_stronger_or_equal_type(mdl_request->type));
2003     /*
2004       If the request is for a transactional lock, and we found
2005       a transactional lock, just reuse the found ticket.
2006 
2007       It's possible that we found a transactional lock,
2008       but the request is for a HANDLER lock. In that case HANDLER
2009       code will clone the ticket (see below why it's needed).
2010 
2011       If the request is for a transactional lock, and we found
2012       a HANDLER lock, create a copy, to make sure that when user
2013       does HANDLER CLOSE, the transactional lock is not released.
2014 
2015       If the request is for a handler lock, and we found a
2016       HANDLER lock, also do the clone. HANDLER CLOSE for one alias
2017       should not release the lock on the table HANDLER opened through
2018       a different alias.
2019     */
2020     mdl_request->ticket= ticket;
2021     if ((found_duration != mdl_request->duration ||
2022          mdl_request->duration == MDL_EXPLICIT) &&
2023         clone_ticket(mdl_request))
2024     {
2025       /* Clone failed. */
2026       mdl_request->ticket= NULL;
2027       return TRUE;
2028     }
2029     return FALSE;
2030   }
2031 
2032   if (!(ticket= MDL_ticket::create(this, mdl_request->type
2033 #ifndef DBUG_OFF
2034                                    , mdl_request->duration
2035 #endif
2036                                    )))
2037     return TRUE;
2038 
2039   /* The below call implicitly locks MDL_lock::m_rwlock on success. */
2040   if (!(lock= mdl_locks.find_or_insert(key)))
2041   {
2042     MDL_ticket::destroy(ticket);
2043     return TRUE;
2044   }
2045 
2046   ticket->m_lock= lock;
2047 
2048   if (lock->can_grant_lock(mdl_request->type, this, false))
2049   {
2050     lock->m_granted.add_ticket(ticket);
2051 
2052     mysql_prlock_unlock(&lock->m_rwlock);
2053 
2054     m_tickets[mdl_request->duration].push_front(ticket);
2055 
2056     mdl_request->ticket= ticket;
2057   }
2058   else
2059     *out_ticket= ticket;
2060 
2061   return FALSE;
2062 }
2063 
2064 
2065 /**
2066   Create a copy of a granted ticket.
2067   This is used to make sure that HANDLER ticket
2068   is never shared with a ticket that belongs to
2069   a transaction, so that when we HANDLER CLOSE,
2070   we don't release a transactional ticket, and
2071   vice versa -- when we COMMIT, we don't mistakenly
2072   release a ticket for an open HANDLER.
2073 
2074   @retval TRUE   Out of memory.
2075   @retval FALSE  Success.
2076 */
2077 
2078 bool
clone_ticket(MDL_request * mdl_request)2079 MDL_context::clone_ticket(MDL_request *mdl_request)
2080 {
2081   MDL_ticket *ticket;
2082 
2083   mysql_mutex_assert_not_owner(&LOCK_open);
2084   /*
2085     By submitting mdl_request->type to MDL_ticket::create()
2086     we effectively downgrade the cloned lock to the level of
2087     the request.
2088   */
2089   if (!(ticket= MDL_ticket::create(this, mdl_request->type
2090 #ifndef DBUG_OFF
2091                                    , mdl_request->duration
2092 #endif
2093                                    )))
2094     return TRUE;
2095 
2096   /* clone() is not supposed to be used to get a stronger lock. */
2097   DBUG_ASSERT(mdl_request->ticket->has_stronger_or_equal_type(ticket->m_type));
2098 
2099   ticket->m_lock= mdl_request->ticket->m_lock;
2100   mdl_request->ticket= ticket;
2101 
2102   mysql_prlock_wrlock(&ticket->m_lock->m_rwlock);
2103   ticket->m_lock->m_granted.add_ticket(ticket);
2104   mysql_prlock_unlock(&ticket->m_lock->m_rwlock);
2105 
2106   m_tickets[mdl_request->duration].push_front(ticket);
2107 
2108   return FALSE;
2109 }
2110 
2111 
2112 /**
2113   Notify threads holding a shared metadata locks on object which
2114   conflict with a pending X, SNW or SNRW lock.
2115 
2116   @param  ctx  MDL_context for current thread.
2117 */
2118 
notify_conflicting_locks(MDL_context * ctx)2119 void MDL_object_lock::notify_conflicting_locks(MDL_context *ctx)
2120 {
2121   Ticket_iterator it(m_granted);
2122   MDL_ticket *conflicting_ticket;
2123 
2124   while ((conflicting_ticket= it++))
2125   {
2126     /* Only try to abort locks on which we back off. */
2127     if (conflicting_ticket->get_ctx() != ctx &&
2128         conflicting_ticket->get_type() < MDL_SHARED_UPGRADABLE)
2129 
2130     {
2131       MDL_context *conflicting_ctx= conflicting_ticket->get_ctx();
2132 
2133       /*
2134         If thread which holds conflicting lock is waiting on table-level
2135         lock or some other non-MDL resource we might need to wake it up
2136         by calling code outside of MDL.
2137       */
2138       ctx->get_owner()->
2139         notify_shared_lock(conflicting_ctx->get_owner(),
2140                            conflicting_ctx->get_needs_thr_lock_abort());
2141     }
2142   }
2143 }
2144 
2145 
2146 /**
2147   Notify threads holding scoped IX locks which conflict with a pending S lock.
2148 
2149   @param  ctx  MDL_context for current thread.
2150 */
2151 
notify_conflicting_locks(MDL_context * ctx)2152 void MDL_scoped_lock::notify_conflicting_locks(MDL_context *ctx)
2153 {
2154   Ticket_iterator it(m_granted);
2155   MDL_ticket *conflicting_ticket;
2156 
2157   while ((conflicting_ticket= it++))
2158   {
2159     if (conflicting_ticket->get_ctx() != ctx &&
2160         conflicting_ticket->get_type() == MDL_INTENTION_EXCLUSIVE)
2161 
2162     {
2163       MDL_context *conflicting_ctx= conflicting_ticket->get_ctx();
2164 
2165       /*
2166         Thread which holds global IX lock can be a handler thread for
2167         insert delayed. We need to kill such threads in order to get
2168         global shared lock. We do this my calling code outside of MDL.
2169       */
2170       ctx->get_owner()->
2171         notify_shared_lock(conflicting_ctx->get_owner(),
2172                            conflicting_ctx->get_needs_thr_lock_abort());
2173     }
2174   }
2175 }
2176 
2177 
2178 /**
2179   Acquire one lock with waiting for conflicting locks to go away if needed.
2180 
2181   @param mdl_request [in/out] Lock request object for lock to be acquired
2182 
2183   @param lock_wait_timeout [in] Seconds to wait before timeout.
2184 
2185   @retval  FALSE   Success. MDL_request::ticket points to the ticket
2186                    for the lock.
2187   @retval  TRUE    Failure (Out of resources or waiting is aborted),
2188 */
2189 
2190 bool
acquire_lock(MDL_request * mdl_request,ulong lock_wait_timeout)2191 MDL_context::acquire_lock(MDL_request *mdl_request, ulong lock_wait_timeout)
2192 {
2193   MDL_lock *lock;
2194   MDL_ticket *ticket= NULL;
2195   struct timespec abs_timeout;
2196   MDL_wait::enum_wait_status wait_status;
2197   /* Do some work outside the critical section. */
2198   set_timespec(abs_timeout, lock_wait_timeout);
2199 
2200   if (try_acquire_lock_impl(mdl_request, &ticket))
2201     return TRUE;
2202 
2203   if (mdl_request->ticket)
2204   {
2205     /*
2206       We have managed to acquire lock without waiting.
2207       MDL_lock, MDL_context and MDL_request were updated
2208       accordingly, so we can simply return success.
2209     */
2210     return FALSE;
2211   }
2212 
2213   /*
2214     Our attempt to acquire lock without waiting has failed.
2215     As a result of this attempt we got MDL_ticket with m_lock
2216     member pointing to the corresponding MDL_lock object which
2217     has MDL_lock::m_rwlock write-locked.
2218   */
2219   lock= ticket->m_lock;
2220 
2221   lock->m_waiting.add_ticket(ticket);
2222 
2223   /*
2224     Once we added a pending ticket to the waiting queue,
2225     we must ensure that our wait slot is empty, so
2226     that our lock request can be scheduled. Do that in the
2227     critical section formed by the acquired write lock on MDL_lock.
2228   */
2229   m_wait.reset_status();
2230 
2231   if (lock->needs_notification(ticket))
2232     lock->notify_conflicting_locks(this);
2233 
2234   mysql_prlock_unlock(&lock->m_rwlock);
2235 
2236   will_wait_for(ticket);
2237 
2238   /* There is a shared or exclusive lock on the object. */
2239   DEBUG_SYNC(get_thd(), "mdl_acquire_lock_wait");
2240 
2241   find_deadlock();
2242 
2243   if (lock->needs_notification(ticket))
2244   {
2245     struct timespec abs_shortwait;
2246     set_timespec(abs_shortwait, 1);
2247     wait_status= MDL_wait::EMPTY;
2248 
2249     while (cmp_timespec(abs_shortwait, abs_timeout) <= 0)
2250     {
2251       /* abs_timeout is far away. Wait a short while and notify locks. */
2252       wait_status= m_wait.timed_wait(m_owner, &abs_shortwait, FALSE,
2253                                      mdl_request->key.get_wait_state_name());
2254 
2255       if (wait_status != MDL_wait::EMPTY)
2256         break;
2257 
2258       mysql_prlock_wrlock(&lock->m_rwlock);
2259       lock->notify_conflicting_locks(this);
2260       mysql_prlock_unlock(&lock->m_rwlock);
2261       set_timespec(abs_shortwait, 1);
2262     }
2263     if (wait_status == MDL_wait::EMPTY)
2264       wait_status= m_wait.timed_wait(m_owner, &abs_timeout, TRUE,
2265                                      mdl_request->key.get_wait_state_name());
2266   }
2267   else
2268     wait_status= m_wait.timed_wait(m_owner, &abs_timeout, TRUE,
2269                                    mdl_request->key.get_wait_state_name());
2270 
2271   done_waiting_for();
2272 
2273   if (wait_status != MDL_wait::GRANTED)
2274   {
2275     lock->remove_ticket(&MDL_lock::m_waiting, ticket);
2276     MDL_ticket::destroy(ticket);
2277     switch (wait_status)
2278     {
2279     case MDL_wait::VICTIM:
2280       my_error(ER_LOCK_DEADLOCK, MYF(0));
2281       break;
2282     case MDL_wait::TIMEOUT:
2283       my_error(ER_LOCK_WAIT_TIMEOUT, MYF(0));
2284       break;
2285     case MDL_wait::KILLED:
2286       my_error(ER_QUERY_INTERRUPTED, MYF(0));
2287       break;
2288     default:
2289       DBUG_ASSERT(0);
2290       break;
2291     }
2292     return TRUE;
2293   }
2294 
2295   /*
2296     We have been granted our request.
2297     State of MDL_lock object is already being appropriately updated by a
2298     concurrent thread (@sa MDL_lock:reschedule_waiters()).
2299     So all we need to do is to update MDL_context and MDL_request objects.
2300   */
2301   DBUG_ASSERT(wait_status == MDL_wait::GRANTED);
2302 
2303   m_tickets[mdl_request->duration].push_front(ticket);
2304 
2305   mdl_request->ticket= ticket;
2306 
2307   return FALSE;
2308 }
2309 
2310 
mdl_request_ptr_cmp(const void * ptr1,const void * ptr2)2311 extern "C" int mdl_request_ptr_cmp(const void* ptr1, const void* ptr2)
2312 {
2313   MDL_request *req1= *(MDL_request**)ptr1;
2314   MDL_request *req2= *(MDL_request**)ptr2;
2315   return req1->key.cmp(&req2->key);
2316 }
2317 
2318 
2319 /**
2320   Acquire exclusive locks. There must be no granted locks in the
2321   context.
2322 
2323   This is a replacement of lock_table_names(). It is used in
2324   RENAME, DROP and other DDL SQL statements.
2325 
2326   @param  mdl_requests  List of requests for locks to be acquired.
2327 
2328   @param lock_wait_timeout  Seconds to wait before timeout.
2329 
2330   @note The list of requests should not contain non-exclusive lock requests.
2331         There should not be any acquired locks in the context.
2332 
2333   @note Assumes that one already owns scoped intention exclusive lock.
2334 
2335   @retval FALSE  Success
2336   @retval TRUE   Failure
2337 */
2338 
acquire_locks(MDL_request_list * mdl_requests,ulong lock_wait_timeout)2339 bool MDL_context::acquire_locks(MDL_request_list *mdl_requests,
2340                                 ulong lock_wait_timeout)
2341 {
2342   MDL_request_list::Iterator it(*mdl_requests);
2343   MDL_request **sort_buf, **p_req;
2344   MDL_savepoint mdl_svp= mdl_savepoint();
2345   ssize_t req_count= static_cast<ssize_t>(mdl_requests->elements());
2346 
2347   if (req_count == 0)
2348     return FALSE;
2349 
2350   /* Sort requests according to MDL_key. */
2351   if (! (sort_buf= (MDL_request **)my_malloc(req_count *
2352                                              sizeof(MDL_request*),
2353                                              MYF(MY_WME))))
2354     return TRUE;
2355 
2356   for (p_req= sort_buf; p_req < sort_buf + req_count; p_req++)
2357     *p_req= it++;
2358 
2359   my_qsort(sort_buf, req_count, sizeof(MDL_request*),
2360            mdl_request_ptr_cmp);
2361 
2362   for (p_req= sort_buf; p_req < sort_buf + req_count; p_req++)
2363   {
2364     if (acquire_lock(*p_req, lock_wait_timeout))
2365       goto err;
2366   }
2367   my_free(sort_buf);
2368   return FALSE;
2369 
2370 err:
2371   /*
2372     Release locks we have managed to acquire so far.
2373     Use rollback_to_savepoint() since there may be duplicate
2374     requests that got assigned the same ticket.
2375   */
2376   rollback_to_savepoint(mdl_svp);
2377   /* Reset lock requests back to its initial state. */
2378   for (req_count= p_req - sort_buf, p_req= sort_buf;
2379        p_req < sort_buf + req_count; p_req++)
2380   {
2381     (*p_req)->ticket= NULL;
2382   }
2383   my_free(sort_buf);
2384   return TRUE;
2385 }
2386 
2387 
2388 /**
2389   Upgrade a shared metadata lock.
2390 
2391   Used in ALTER TABLE and CREATE TABLE.
2392 
2393   @param mdl_ticket         Lock to upgrade.
2394   @param new_type           Lock type to upgrade to.
2395   @param lock_wait_timeout  Seconds to wait before timeout.
2396 
2397   @note In case of failure to upgrade lock (e.g. because upgrader
2398         was killed) leaves lock in its original state (locked in
2399         shared mode).
2400 
2401   @note There can be only one upgrader for a lock or we will have deadlock.
2402         This invariant is ensured by the fact that upgradeable locks SU, SNW
2403         and SNRW are not compatible with each other and themselves in case
2404         of ALTER TABLE operation.
2405         In case of CREATE TABLE operation there is chance of deadlock as 'S'
2406         is compatible with 'S'. But the deadlock is recovered by backoff and
2407         retry mechanism.
2408 
2409   @retval FALSE  Success
2410   @retval TRUE   Failure (thread was killed)
2411 */
2412 
2413 bool
upgrade_shared_lock(MDL_ticket * mdl_ticket,enum_mdl_type new_type,ulong lock_wait_timeout)2414 MDL_context::upgrade_shared_lock(MDL_ticket *mdl_ticket,
2415                                  enum_mdl_type new_type,
2416                                  ulong lock_wait_timeout)
2417 {
2418   MDL_request mdl_xlock_request;
2419   MDL_savepoint mdl_svp= mdl_savepoint();
2420   bool is_new_ticket;
2421 
2422   DBUG_ENTER("MDL_context::upgrade_shared_lock");
2423   DEBUG_SYNC(get_thd(), "mdl_upgrade_lock");
2424 
2425   /*
2426     Do nothing if already upgraded. Used when we FLUSH TABLE under
2427     LOCK TABLES and a table is listed twice in LOCK TABLES list.
2428   */
2429   if (mdl_ticket->has_stronger_or_equal_type(new_type))
2430     DBUG_RETURN(FALSE);
2431 
2432   mdl_xlock_request.init(&mdl_ticket->m_lock->key, new_type,
2433                          MDL_TRANSACTION);
2434 
2435   if (acquire_lock(&mdl_xlock_request, lock_wait_timeout))
2436     DBUG_RETURN(TRUE);
2437 
2438   is_new_ticket= ! has_lock(mdl_svp, mdl_xlock_request.ticket);
2439 
2440   /* Merge the acquired and the original lock. @todo: move to a method. */
2441   mysql_prlock_wrlock(&mdl_ticket->m_lock->m_rwlock);
2442   if (is_new_ticket)
2443     mdl_ticket->m_lock->m_granted.remove_ticket(mdl_xlock_request.ticket);
2444   /*
2445     Set the new type of lock in the ticket. To update state of
2446     MDL_lock object correctly we need to temporarily exclude
2447     ticket from the granted queue and then include it back.
2448   */
2449   mdl_ticket->m_lock->m_granted.remove_ticket(mdl_ticket);
2450   mdl_ticket->m_type= new_type;
2451   mdl_ticket->m_lock->m_granted.add_ticket(mdl_ticket);
2452 
2453   mysql_prlock_unlock(&mdl_ticket->m_lock->m_rwlock);
2454 
2455   if (is_new_ticket)
2456   {
2457     m_tickets[MDL_TRANSACTION].remove(mdl_xlock_request.ticket);
2458     MDL_ticket::destroy(mdl_xlock_request.ticket);
2459   }
2460 
2461   DBUG_RETURN(FALSE);
2462 }
2463 
2464 
2465 /**
2466   A fragment of recursive traversal of the wait-for graph
2467   in search for deadlocks. Direct the deadlock visitor to all
2468   contexts that own the lock the current node in the wait-for
2469   graph is waiting for.
2470   As long as the initial node is remembered in the visitor,
2471   a deadlock is found when the same node is seen twice.
2472 */
2473 
visit_subgraph(MDL_ticket * waiting_ticket,MDL_wait_for_graph_visitor * gvisitor)2474 bool MDL_lock::visit_subgraph(MDL_ticket *waiting_ticket,
2475                               MDL_wait_for_graph_visitor *gvisitor)
2476 {
2477   MDL_ticket *ticket;
2478   MDL_context *src_ctx= waiting_ticket->get_ctx();
2479   bool result= TRUE;
2480 
2481   mysql_prlock_rdlock(&m_rwlock);
2482 
2483   /* Must be initialized after taking a read lock. */
2484   Ticket_iterator granted_it(m_granted);
2485   Ticket_iterator waiting_it(m_waiting);
2486 
2487   /*
2488     MDL_lock's waiting and granted queues and MDL_context::m_waiting_for
2489     member are updated by different threads when the lock is granted
2490     (see MDL_context::acquire_lock() and MDL_lock::reschedule_waiters()).
2491     As a result, here we may encounter a situation when MDL_lock data
2492     already reflects the fact that the lock was granted but
2493     m_waiting_for member has not been updated yet.
2494 
2495     For example, imagine that:
2496 
2497     thread1: Owns SNW lock on table t1.
2498     thread2: Attempts to acquire SW lock on t1,
2499              but sees an active SNW lock.
2500              Thus adds the ticket to the waiting queue and
2501              sets m_waiting_for to point to the ticket.
2502     thread1: Releases SNW lock, updates MDL_lock object to
2503              grant SW lock to thread2 (moves the ticket for
2504              SW from waiting to the active queue).
2505              Attempts to acquire a new SNW lock on t1,
2506              sees an active SW lock (since it is present in the
2507              active queue), adds ticket for SNW lock to the waiting
2508              queue, sets m_waiting_for to point to this ticket.
2509 
2510     At this point deadlock detection algorithm run by thread1 will see that:
2511     - Thread1 waits for SNW lock on t1 (since m_waiting_for is set).
2512     - SNW lock is not granted, because it conflicts with active SW lock
2513       owned by thread 2 (since ticket for SW is present in granted queue).
2514     - Thread2 waits for SW lock (since its m_waiting_for has not been
2515       updated yet!).
2516     - SW lock is not granted because there is pending SNW lock from thread1.
2517       Therefore deadlock should exist [sic!].
2518 
2519     To avoid detection of such false deadlocks we need to check the "actual"
2520     status of the ticket being waited for, before analyzing its blockers.
2521     We do this by checking the wait status of the context which is waiting
2522     for it. To avoid races this has to be done under protection of
2523     MDL_lock::m_rwlock lock.
2524   */
2525   if (src_ctx->m_wait.get_status() != MDL_wait::EMPTY)
2526   {
2527     result= FALSE;
2528     goto end;
2529   }
2530 
2531   /*
2532     To avoid visiting nodes which were already marked as victims of
2533     deadlock detection (or whose requests were already satisfied) we
2534     enter the node only after peeking at its wait status.
2535     This is necessary to avoid active waiting in a situation
2536     when previous searches for a deadlock already selected the
2537     node we're about to enter as a victim (see the comment
2538     in MDL_context::find_deadlock() for explanation why several searches
2539     can be performed for the same wait).
2540     There is no guarantee that the node isn't chosen a victim while we
2541     are visiting it but this is OK: in the worst case we might do some
2542     extra work and one more context might be chosen as a victim.
2543   */
2544   if (gvisitor->enter_node(src_ctx))
2545     goto end;
2546 
2547   /*
2548     We do a breadth-first search first -- that is, inspect all
2549     edges of the current node, and only then follow up to the next
2550     node. In workloads that involve wait-for graph loops this
2551     has proven to be a more efficient strategy [citation missing].
2552   */
2553   while ((ticket= granted_it++))
2554   {
2555     /* Filter out edges that point to the same node. */
2556     if (ticket->get_ctx() != src_ctx &&
2557         ticket->is_incompatible_when_granted(waiting_ticket->get_type()) &&
2558         gvisitor->inspect_edge(ticket->get_ctx()))
2559     {
2560       goto end_leave_node;
2561     }
2562   }
2563 
2564   while ((ticket= waiting_it++))
2565   {
2566     /* Filter out edges that point to the same node. */
2567     if (ticket->get_ctx() != src_ctx &&
2568         ticket->is_incompatible_when_waiting(waiting_ticket->get_type()) &&
2569         gvisitor->inspect_edge(ticket->get_ctx()))
2570     {
2571       goto end_leave_node;
2572     }
2573   }
2574 
2575   /* Recurse and inspect all adjacent nodes. */
2576   granted_it.rewind();
2577   while ((ticket= granted_it++))
2578   {
2579     if (ticket->get_ctx() != src_ctx &&
2580         ticket->is_incompatible_when_granted(waiting_ticket->get_type()) &&
2581         ticket->get_ctx()->visit_subgraph(gvisitor))
2582     {
2583       goto end_leave_node;
2584     }
2585   }
2586 
2587   waiting_it.rewind();
2588   while ((ticket= waiting_it++))
2589   {
2590     if (ticket->get_ctx() != src_ctx &&
2591         ticket->is_incompatible_when_waiting(waiting_ticket->get_type()) &&
2592         ticket->get_ctx()->visit_subgraph(gvisitor))
2593     {
2594       goto end_leave_node;
2595     }
2596   }
2597 
2598   result= FALSE;
2599 
2600 end_leave_node:
2601   gvisitor->leave_node(src_ctx);
2602 
2603 end:
2604   mysql_prlock_unlock(&m_rwlock);
2605   return result;
2606 }
2607 
2608 
2609 /**
2610   Traverse a portion of wait-for graph which is reachable
2611   through the edge represented by this ticket and search
2612   for deadlocks.
2613 
2614   @retval TRUE  A deadlock is found. A pointer to deadlock
2615                  victim is saved in the visitor.
2616   @retval FALSE
2617 */
2618 
accept_visitor(MDL_wait_for_graph_visitor * gvisitor)2619 bool MDL_ticket::accept_visitor(MDL_wait_for_graph_visitor *gvisitor)
2620 {
2621   return m_lock->visit_subgraph(this, gvisitor);
2622 }
2623 
2624 
2625 /**
2626   A fragment of recursive traversal of the wait-for graph of
2627   MDL contexts in the server in search for deadlocks.
2628   Assume this MDL context is a node in the wait-for graph,
2629   and direct the visitor to all adjacent nodes. As long
2630   as the starting node is remembered in the visitor, a
2631   deadlock is found when the same node is visited twice.
2632   One MDL context is connected to another in the wait-for
2633   graph if it waits on a resource that is held by the other
2634   context.
2635 
2636   @retval TRUE  A deadlock is found. A pointer to deadlock
2637                 victim is saved in the visitor.
2638   @retval FALSE
2639 */
2640 
visit_subgraph(MDL_wait_for_graph_visitor * gvisitor)2641 bool MDL_context::visit_subgraph(MDL_wait_for_graph_visitor *gvisitor)
2642 {
2643   bool result= FALSE;
2644 
2645   mysql_prlock_rdlock(&m_LOCK_waiting_for);
2646 
2647   if (m_waiting_for)
2648     result= m_waiting_for->accept_visitor(gvisitor);
2649 
2650   mysql_prlock_unlock(&m_LOCK_waiting_for);
2651 
2652   return result;
2653 }
2654 
2655 
2656 /**
2657   Try to find a deadlock. This function produces no errors.
2658 
2659   @note If during deadlock resolution context which performs deadlock
2660         detection is chosen as a victim it will be informed about the
2661         fact by setting VICTIM status to its wait slot.
2662 
2663   @retval TRUE  A deadlock is found.
2664   @retval FALSE No deadlock found.
2665 */
2666 
find_deadlock()2667 void MDL_context::find_deadlock()
2668 {
2669   while (1)
2670   {
2671     /*
2672       The fact that we use fresh instance of gvisitor for each
2673       search performed by find_deadlock() below is important,
2674       the code responsible for victim selection relies on this.
2675     */
2676     Deadlock_detection_visitor dvisitor(this);
2677     MDL_context *victim;
2678 
2679     if (! visit_subgraph(&dvisitor))
2680     {
2681       /* No deadlocks are found! */
2682       break;
2683     }
2684 
2685     victim= dvisitor.get_victim();
2686 
2687     /*
2688       Failure to change status of the victim is OK as it means
2689       that the victim has received some other message and is
2690       about to stop its waiting/to break deadlock loop.
2691       Even when the initiator of the deadlock search is
2692       chosen the victim, we need to set the respective wait
2693       result in order to "close" it for any attempt to
2694       schedule the request.
2695       This is needed to avoid a possible race during
2696       cleanup in case when the lock request on which the
2697       context was waiting is concurrently satisfied.
2698     */
2699     (void) victim->m_wait.set_status(MDL_wait::VICTIM);
2700     victim->unlock_deadlock_victim();
2701 
2702     if (victim == this)
2703       break;
2704     /*
2705       After adding a new edge to the waiting graph we found that it
2706       creates a loop (i.e. there is a deadlock). We decided to destroy
2707       this loop by removing an edge, but not the one that we added.
2708       Since this doesn't guarantee that all loops created by addition
2709       of the new edge are destroyed, we have to repeat the search.
2710     */
2711   }
2712 }
2713 
2714 
2715 /**
2716   Release lock.
2717 
2718   @param duration Lock duration.
2719   @param ticket   Ticket for lock to be released.
2720 
2721 */
2722 
release_lock(enum_mdl_duration duration,MDL_ticket * ticket)2723 void MDL_context::release_lock(enum_mdl_duration duration, MDL_ticket *ticket)
2724 {
2725   MDL_lock *lock= ticket->m_lock;
2726   DBUG_ENTER("MDL_context::release_lock");
2727   DBUG_PRINT("enter", ("db=%s name=%s", lock->key.db_name(),
2728                                         lock->key.name()));
2729 
2730   DBUG_ASSERT(this == ticket->get_ctx());
2731   mysql_mutex_assert_not_owner(&LOCK_open);
2732 
2733   lock->remove_ticket(&MDL_lock::m_granted, ticket);
2734 
2735   m_tickets[duration].remove(ticket);
2736   MDL_ticket::destroy(ticket);
2737 
2738   DBUG_VOID_RETURN;
2739 }
2740 
2741 
2742 /**
2743   Release lock with explicit duration.
2744 
2745   @param ticket   Ticket for lock to be released.
2746 
2747 */
2748 
release_lock(MDL_ticket * ticket)2749 void MDL_context::release_lock(MDL_ticket *ticket)
2750 {
2751   DBUG_ASSERT(ticket->m_duration == MDL_EXPLICIT);
2752 
2753   release_lock(MDL_EXPLICIT, ticket);
2754 }
2755 
2756 
2757 /**
2758   Release all locks associated with the context. If the sentinel
2759   is not NULL, do not release locks stored in the list after and
2760   including the sentinel.
2761 
2762   Statement and transactional locks are added to the beginning of
2763   the corresponding lists, i.e. stored in reverse temporal order.
2764   This allows to employ this function to:
2765   - back off in case of a lock conflict.
2766   - release all locks in the end of a statment or transaction
2767   - rollback to a savepoint.
2768 */
2769 
release_locks_stored_before(enum_mdl_duration duration,MDL_ticket * sentinel)2770 void MDL_context::release_locks_stored_before(enum_mdl_duration duration,
2771                                               MDL_ticket *sentinel)
2772 {
2773   MDL_ticket *ticket;
2774   Ticket_iterator it(m_tickets[duration]);
2775   DBUG_ENTER("MDL_context::release_locks_stored_before");
2776 
2777   if (m_tickets[duration].is_empty())
2778     DBUG_VOID_RETURN;
2779 
2780   while ((ticket= it++) && ticket != sentinel)
2781   {
2782     DBUG_PRINT("info", ("found lock to release ticket=%p", ticket));
2783     release_lock(duration, ticket);
2784   }
2785 
2786   DBUG_VOID_RETURN;
2787 }
2788 
2789 
2790 /**
2791   Release all explicit locks in the context which correspond to the
2792   same name/object as this lock request.
2793 
2794   @param ticket    One of the locks for the name/object for which all
2795                    locks should be released.
2796 */
2797 
release_all_locks_for_name(MDL_ticket * name)2798 void MDL_context::release_all_locks_for_name(MDL_ticket *name)
2799 {
2800   /* Use MDL_ticket::m_lock to identify other locks for the same object. */
2801   MDL_lock *lock= name->m_lock;
2802 
2803   /* Remove matching lock tickets from the context. */
2804   MDL_ticket *ticket;
2805   Ticket_iterator it_ticket(m_tickets[MDL_EXPLICIT]);
2806 
2807   while ((ticket= it_ticket++))
2808   {
2809     DBUG_ASSERT(ticket->m_lock);
2810     if (ticket->m_lock == lock)
2811       release_lock(MDL_EXPLICIT, ticket);
2812   }
2813 }
2814 
2815 
2816 /**
2817   Downgrade an EXCLUSIVE or SHARED_NO_WRITE lock to shared metadata lock.
2818 
2819   @param type  Type of lock to which exclusive lock should be downgraded.
2820 */
2821 
downgrade_lock(enum_mdl_type type)2822 void MDL_ticket::downgrade_lock(enum_mdl_type type)
2823 {
2824   mysql_mutex_assert_not_owner(&LOCK_open);
2825 
2826   /*
2827     Do nothing if already downgraded. Used when we FLUSH TABLE under
2828     LOCK TABLES and a table is listed twice in LOCK TABLES list.
2829     Note that this code might even try to "downgrade" a weak lock
2830     (e.g. SW) to a stronger one (e.g SNRW). So we can't even assert
2831     here that target lock is weaker than existing lock.
2832   */
2833   if (m_type == type || !has_stronger_or_equal_type(type))
2834     return;
2835 
2836   /* Only allow downgrade from EXCLUSIVE and SHARED_NO_WRITE. */
2837   DBUG_ASSERT(m_type == MDL_EXCLUSIVE ||
2838               m_type == MDL_SHARED_NO_WRITE);
2839 
2840   mysql_prlock_wrlock(&m_lock->m_rwlock);
2841   /*
2842     To update state of MDL_lock object correctly we need to temporarily
2843     exclude ticket from the granted queue and then include it back.
2844   */
2845   m_lock->m_granted.remove_ticket(this);
2846   m_type= type;
2847   m_lock->m_granted.add_ticket(this);
2848   m_lock->reschedule_waiters();
2849   mysql_prlock_unlock(&m_lock->m_rwlock);
2850 }
2851 
2852 
2853 /**
2854   Auxiliary function which allows to check if we have some kind of lock on
2855   a object. Returns TRUE if we have a lock of a given or stronger type.
2856 
2857   @param mdl_namespace Id of object namespace
2858   @param db            Name of the database
2859   @param name          Name of the object
2860   @param mdl_type      Lock type. Pass in the weakest type to find
2861                        out if there is at least some lock.
2862 
2863   @return TRUE if current context contains satisfied lock for the object,
2864           FALSE otherwise.
2865 */
2866 
2867 bool
is_lock_owner(MDL_key::enum_mdl_namespace mdl_namespace,const char * db,const char * name,enum_mdl_type mdl_type)2868 MDL_context::is_lock_owner(MDL_key::enum_mdl_namespace mdl_namespace,
2869                            const char *db, const char *name,
2870                            enum_mdl_type mdl_type)
2871 {
2872   MDL_request mdl_request;
2873   enum_mdl_duration not_unused;
2874   /* We don't care about exact duration of lock here. */
2875   mdl_request.init(mdl_namespace, db, name, mdl_type, MDL_TRANSACTION);
2876   MDL_ticket *ticket= find_ticket(&mdl_request, &not_unused);
2877 
2878   DBUG_ASSERT(ticket == NULL || ticket->m_lock);
2879 
2880   return ticket;
2881 }
2882 
2883 
2884 /**
2885   Check if we have any pending locks which conflict with existing shared lock.
2886 
2887   @pre The ticket must match an acquired lock.
2888 
2889   @return TRUE if there is a conflicting lock request, FALSE otherwise.
2890 */
2891 
has_pending_conflicting_lock() const2892 bool MDL_ticket::has_pending_conflicting_lock() const
2893 {
2894   return m_lock->has_pending_conflicting_lock(m_type);
2895 }
2896 
2897 
2898 /**
2899   Releases metadata locks that were acquired after a specific savepoint.
2900 
2901   @note Used to release tickets acquired during a savepoint unit.
2902   @note It's safe to iterate and unlock any locks after taken after this
2903         savepoint because other statements that take other special locks
2904         cause a implicit commit (ie LOCK TABLES).
2905 */
2906 
rollback_to_savepoint(const MDL_savepoint & mdl_savepoint)2907 void MDL_context::rollback_to_savepoint(const MDL_savepoint &mdl_savepoint)
2908 {
2909   DBUG_ENTER("MDL_context::rollback_to_savepoint");
2910 
2911   /* If savepoint is NULL, it is from the start of the transaction. */
2912   release_locks_stored_before(MDL_STATEMENT, mdl_savepoint.m_stmt_ticket);
2913   release_locks_stored_before(MDL_TRANSACTION, mdl_savepoint.m_trans_ticket);
2914 
2915   DBUG_VOID_RETURN;
2916 }
2917 
2918 
2919 /**
2920   Release locks acquired by normal statements (SELECT, UPDATE,
2921   DELETE, etc) in the course of a transaction. Do not release
2922   HANDLER locks, if there are any.
2923 
2924   This method is used at the end of a transaction, in
2925   implementation of COMMIT (implicit or explicit) and ROLLBACK.
2926 */
2927 
release_transactional_locks()2928 void MDL_context::release_transactional_locks()
2929 {
2930   DBUG_ENTER("MDL_context::release_transactional_locks");
2931   release_locks_stored_before(MDL_STATEMENT, NULL);
2932   release_locks_stored_before(MDL_TRANSACTION, NULL);
2933   DBUG_VOID_RETURN;
2934 }
2935 
2936 
release_statement_locks()2937 void MDL_context::release_statement_locks()
2938 {
2939   DBUG_ENTER("MDL_context::release_transactional_locks");
2940   release_locks_stored_before(MDL_STATEMENT, NULL);
2941   DBUG_VOID_RETURN;
2942 }
2943 
2944 
2945 /**
2946   Does this savepoint have this lock?
2947 
2948   @retval TRUE  The ticket is older than the savepoint or
2949                 is an LT, HA or GLR ticket. Thus it belongs
2950                 to the savepoint or has explicit duration.
2951   @retval FALSE The ticket is newer than the savepoint.
2952                 and is not an LT, HA or GLR ticket.
2953 */
2954 
has_lock(const MDL_savepoint & mdl_savepoint,MDL_ticket * mdl_ticket)2955 bool MDL_context::has_lock(const MDL_savepoint &mdl_savepoint,
2956                            MDL_ticket *mdl_ticket)
2957 {
2958   MDL_ticket *ticket;
2959   /* Start from the beginning, most likely mdl_ticket's been just acquired. */
2960   MDL_context::Ticket_iterator s_it(m_tickets[MDL_STATEMENT]);
2961   MDL_context::Ticket_iterator t_it(m_tickets[MDL_TRANSACTION]);
2962 
2963   while ((ticket= s_it++) && ticket != mdl_savepoint.m_stmt_ticket)
2964   {
2965     if (ticket == mdl_ticket)
2966       return FALSE;
2967   }
2968 
2969   while ((ticket= t_it++) && ticket != mdl_savepoint.m_trans_ticket)
2970   {
2971     if (ticket == mdl_ticket)
2972       return FALSE;
2973   }
2974   return TRUE;
2975 }
2976 
2977 
2978 /**
2979   Change lock duration for transactional lock.
2980 
2981   @param ticket   Ticket representing lock.
2982   @param duration Lock duration to be set.
2983 
2984   @note This method only supports changing duration of
2985         transactional lock to some other duration.
2986 */
2987 
set_lock_duration(MDL_ticket * mdl_ticket,enum_mdl_duration duration)2988 void MDL_context::set_lock_duration(MDL_ticket *mdl_ticket,
2989                                     enum_mdl_duration duration)
2990 {
2991   DBUG_ASSERT(mdl_ticket->m_duration == MDL_TRANSACTION &&
2992               duration != MDL_TRANSACTION);
2993 
2994   m_tickets[MDL_TRANSACTION].remove(mdl_ticket);
2995   m_tickets[duration].push_front(mdl_ticket);
2996 #ifndef DBUG_OFF
2997   mdl_ticket->m_duration= duration;
2998 #endif
2999 }
3000 
3001 
3002 /**
3003   Set explicit duration for all locks in the context.
3004 */
3005 
set_explicit_duration_for_all_locks()3006 void MDL_context::set_explicit_duration_for_all_locks()
3007 {
3008   int i;
3009   MDL_ticket *ticket;
3010 
3011   /*
3012     In the most common case when this function is called list
3013     of transactional locks is bigger than list of locks with
3014     explicit duration. So we start by swapping these two lists
3015     and then move elements from new list of transactional
3016     locks and list of statement locks to list of locks with
3017     explicit duration.
3018   */
3019 
3020   m_tickets[MDL_EXPLICIT].swap(m_tickets[MDL_TRANSACTION]);
3021 
3022   for (i= 0; i < MDL_EXPLICIT; i++)
3023   {
3024     Ticket_iterator it_ticket(m_tickets[i]);
3025 
3026     while ((ticket= it_ticket++))
3027     {
3028       m_tickets[i].remove(ticket);
3029       m_tickets[MDL_EXPLICIT].push_front(ticket);
3030     }
3031   }
3032 
3033 #ifndef DBUG_OFF
3034   Ticket_iterator exp_it(m_tickets[MDL_EXPLICIT]);
3035 
3036   while ((ticket= exp_it++))
3037     ticket->m_duration= MDL_EXPLICIT;
3038 #endif
3039 }
3040 
3041 
3042 /**
3043   Set transactional duration for all locks in the context.
3044 */
3045 
set_transaction_duration_for_all_locks()3046 void MDL_context::set_transaction_duration_for_all_locks()
3047 {
3048   MDL_ticket *ticket;
3049 
3050   /*
3051     In the most common case when this function is called list
3052     of explicit locks is bigger than two other lists (in fact,
3053     list of statement locks is always empty). So we start by
3054     swapping list of explicit and transactional locks and then
3055     move contents of new list of explicit locks to list of
3056     locks with transactional duration.
3057   */
3058 
3059   DBUG_ASSERT(m_tickets[MDL_STATEMENT].is_empty());
3060 
3061   m_tickets[MDL_TRANSACTION].swap(m_tickets[MDL_EXPLICIT]);
3062 
3063   Ticket_iterator it_ticket(m_tickets[MDL_EXPLICIT]);
3064 
3065   while ((ticket= it_ticket++))
3066   {
3067     m_tickets[MDL_EXPLICIT].remove(ticket);
3068     m_tickets[MDL_TRANSACTION].push_front(ticket);
3069   }
3070 
3071 #ifndef DBUG_OFF
3072   Ticket_iterator trans_it(m_tickets[MDL_TRANSACTION]);
3073 
3074   while ((ticket= trans_it++))
3075     ticket->m_duration= MDL_TRANSACTION;
3076 #endif
3077 }
3078