1 /* Copyright (c) 2000, 2012, Oracle and/or its affiliates.
2    Copyright (c) 2010, 2011 Monty Program Ab
3    Copyright (C) 2013 Sergey Vojtovich and MariaDB Foundation
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; version 2 of the License.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
17 
18 /**
19   @file
20   Table definition cache and table cache implementation.
21 
22   Table definition cache actions:
23   - add new TABLE_SHARE object to cache (tdc_acquire_share())
24   - acquire TABLE_SHARE object from cache (tdc_acquire_share())
25   - release TABLE_SHARE object to cache (tdc_release_share())
26   - purge unused TABLE_SHARE objects from cache (tdc_purge())
27   - remove TABLE_SHARE object from cache (tdc_remove_table())
28   - get number of TABLE_SHARE objects in cache (tdc_records())
29 
30   Table cache actions:
31   - add new TABLE object to cache (tc_add_table())
32   - acquire TABLE object from cache (tc_acquire_table())
33   - release TABLE object to cache (tc_release_table())
34   - purge unused TABLE objects from cache (tc_purge())
35   - purge unused TABLE objects of a table from cache (tdc_remove_table())
36   - get number of TABLE objects in cache (tc_records())
37 
38   Dependencies:
39   - close_cached_tables(): flush tables on shutdown
40   - alloc_table_share()
41   - free_table_share()
42 
43   Table cache invariants:
44   - TABLE_SHARE::free_tables shall not contain objects with TABLE::in_use != 0
45   - TABLE_SHARE::free_tables shall not receive new objects if
46     TABLE_SHARE::tdc.flushed is true
47 */
48 
49 #include "mariadb.h"
50 #include "lf.h"
51 #include "table.h"
52 #include "sql_base.h"
53 
54 
55 /** Configuration. */
56 ulong tdc_size; /**< Table definition cache threshold for LRU eviction. */
57 ulong tc_size; /**< Table cache threshold for LRU eviction. */
58 uint32 tc_instances;
59 uint32 tc_active_instances= 1;
60 static uint32 tc_contention_warning_reported;
61 
62 /** Data collections. */
63 static LF_HASH tdc_hash; /**< Collection of TABLE_SHARE objects. */
64 /** Collection of unused TABLE_SHARE objects. */
65 static
66 I_P_List <TDC_element,
67           I_P_List_adapter<TDC_element, &TDC_element::next, &TDC_element::prev>,
68           I_P_List_null_counter,
69           I_P_List_fast_push_back<TDC_element> > unused_shares;
70 
71 static tdc_version_t tdc_version;  /* Increments on each reload */
72 static bool tdc_inited;
73 
74 
75 /**
76   Protects unused shares list.
77 
78   TDC_element::prev
79   TDC_element::next
80   unused_shares
81 */
82 
83 static mysql_mutex_t LOCK_unused_shares;
84 
85 #ifdef HAVE_PSI_INTERFACE
86 static PSI_mutex_key key_LOCK_unused_shares, key_TABLE_SHARE_LOCK_table_share,
87                      key_LOCK_table_cache;
88 static PSI_mutex_info all_tc_mutexes[]=
89 {
90   { &key_LOCK_unused_shares, "LOCK_unused_shares", PSI_FLAG_GLOBAL },
91   { &key_TABLE_SHARE_LOCK_table_share, "TABLE_SHARE::tdc.LOCK_table_share", 0 },
92   { &key_LOCK_table_cache, "LOCK_table_cache", 0 }
93 };
94 
95 static PSI_cond_key key_TABLE_SHARE_COND_release;
96 static PSI_cond_info all_tc_conds[]=
97 {
98   { &key_TABLE_SHARE_COND_release, "TABLE_SHARE::tdc.COND_release", 0 }
99 };
100 #endif
101 
102 
103 static int fix_thd_pins(THD *thd)
104 {
105   return thd->tdc_hash_pins ? 0 :
106          (thd->tdc_hash_pins= lf_hash_get_pins(&tdc_hash)) == 0;
107 }
108 
109 
110 /*
111   Auxiliary routines for manipulating with per-share all/unused lists
112   and tc_count counter.
113   Responsible for preserving invariants between those lists, counter
114   and TABLE::in_use member.
115   In fact those routines implement sort of implicit table cache as
116   part of table definition cache.
117 */
118 
119 struct Table_cache_instance
120 {
121   /**
122     Protects free_tables (TABLE::global_free_next and TABLE::global_free_prev),
123     records, Share_free_tables::List (TABLE::prev and TABLE::next),
124     TABLE::in_use.
125   */
126   mysql_mutex_t LOCK_table_cache;
127   I_P_List <TABLE, I_P_List_adapter<TABLE, &TABLE::global_free_next,
128                                     &TABLE::global_free_prev>,
129             I_P_List_null_counter, I_P_List_fast_push_back<TABLE> >
130     free_tables;
131   ulong records;
132   uint mutex_waits;
133   uint mutex_nowaits;
134   /** Avoid false sharing between instances */
135   char pad[CPU_LEVEL1_DCACHE_LINESIZE];
136 
137   Table_cache_instance(): records(0), mutex_waits(0), mutex_nowaits(0)
138   {
139     mysql_mutex_init(key_LOCK_table_cache, &LOCK_table_cache,
140                      MY_MUTEX_INIT_FAST);
141   }
142 
143   ~Table_cache_instance()
144   {
145     mysql_mutex_destroy(&LOCK_table_cache);
146     DBUG_ASSERT(free_tables.is_empty());
147     DBUG_ASSERT(records == 0);
148   }
149 
150   /**
151     Lock table cache mutex and check contention.
152 
153     Instance is considered contested if more than 20% of mutex acquisiotions
154     can't be served immediately. Up to 100 000 probes may be performed to avoid
155     instance activation on short sporadic peaks. 100 000 is estimated maximum
156     number of queries one instance can serve in one second.
157 
158     These numbers work well on a 2 socket / 20 core / 40 threads Intel Broadwell
159     system, that is expected number of instances is activated within reasonable
160     warmup time. It may have to be adjusted for other systems.
161 
162     Only TABLE object acquistion is instrumented. We intentionally avoid this
163     overhead on TABLE object release. All other table cache mutex acquistions
164     are considered out of hot path and are not instrumented either.
165   */
166   void lock_and_check_contention(uint32 n_instances, uint32 instance)
167   {
168     if (mysql_mutex_trylock(&LOCK_table_cache))
169     {
170       mysql_mutex_lock(&LOCK_table_cache);
171       if (++mutex_waits == 20000)
172       {
173         if (n_instances < tc_instances)
174         {
175           if (my_atomic_cas32_weak_explicit((int32*) &tc_active_instances,
176                                             (int32*) &n_instances,
177                                             (int32) n_instances + 1,
178                                             MY_MEMORY_ORDER_RELAXED,
179                                             MY_MEMORY_ORDER_RELAXED))
180           {
181             sql_print_information("Detected table cache mutex contention at instance %d: "
182                                   "%d%% waits. Additional table cache instance "
183                                   "activated. Number of instances after "
184                                   "activation: %d.",
185                                   instance + 1,
186                                   mutex_waits * 100 / (mutex_nowaits + mutex_waits),
187                                   n_instances + 1);
188           }
189         }
190         else if (!my_atomic_fas32_explicit((int32*) &tc_contention_warning_reported,
191                                            1, MY_MEMORY_ORDER_RELAXED))
192         {
193           sql_print_warning("Detected table cache mutex contention at instance %d: "
194                             "%d%% waits. Additional table cache instance "
195                             "cannot be activated: consider raising "
196                             "table_open_cache_instances. Number of active "
197                             "instances: %d.",
198                             instance + 1,
199                             mutex_waits * 100 / (mutex_nowaits + mutex_waits),
200                             n_instances);
201         }
202         mutex_waits= 0;
203         mutex_nowaits= 0;
204       }
205     }
206     else if (++mutex_nowaits == 80000)
207     {
208       mutex_waits= 0;
209       mutex_nowaits= 0;
210     }
211   }
212 };
213 
214 
215 static Table_cache_instance *tc;
216 
217 
218 static void intern_close_table(TABLE *table)
219 {
220   delete table->triggers;
221   DBUG_ASSERT(table->file);
222   closefrm(table);
223   tdc_release_share(table->s);
224   my_free(table);
225 }
226 
227 
228 /**
229   Get number of TABLE objects (used and unused) in table cache.
230 */
231 
232 uint tc_records(void)
233 {
234   ulong total= 0;
235   for (ulong i= 0; i < tc_instances; i++)
236   {
237     mysql_mutex_lock(&tc[i].LOCK_table_cache);
238     total+= tc[i].records;
239     mysql_mutex_unlock(&tc[i].LOCK_table_cache);
240   }
241   return total;
242 }
243 
244 
245 /**
246   Remove TABLE object from table cache.
247 */
248 
249 static void tc_remove_table(TABLE *table)
250 {
251   TDC_element *element= table->s->tdc;
252 
253   mysql_mutex_lock(&element->LOCK_table_share);
254   /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
255   while (element->all_tables_refs)
256     mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
257   element->all_tables.remove(table);
258   mysql_mutex_unlock(&element->LOCK_table_share);
259 
260   intern_close_table(table);
261 }
262 
263 
264 static void tc_remove_all_unused_tables(TDC_element *element,
265                                         Share_free_tables::List *purge_tables,
266                                         bool mark_flushed)
267 {
268   TABLE *table;
269 
270   /*
271     Mark share flushed in order to ensure that it gets
272     automatically deleted once it is no longer referenced.
273 
274     Note that code in TABLE_SHARE::wait_for_old_version() assumes that
275     marking share flushed is followed by purge of unused table
276     shares.
277   */
278   if (mark_flushed)
279     element->flushed= true;
280   for (ulong i= 0; i < tc_instances; i++)
281   {
282     mysql_mutex_lock(&tc[i].LOCK_table_cache);
283     while ((table= element->free_tables[i].list.pop_front()))
284     {
285       tc[i].records--;
286       tc[i].free_tables.remove(table);
287       DBUG_ASSERT(element->all_tables_refs == 0);
288       element->all_tables.remove(table);
289       purge_tables->push_front(table);
290     }
291     mysql_mutex_unlock(&tc[i].LOCK_table_cache);
292   }
293 }
294 
295 
296 /**
297   Free all unused TABLE objects.
298 
299   While locked:
300   - remove unused objects from TABLE_SHARE::tdc.free_tables and
301     TABLE_SHARE::tdc.all_tables
302   - decrement tc_count
303 
304   While unlocked:
305   - free resources related to unused objects
306 
307   @note This is called by 'handle_manager' when one wants to
308         periodicly flush all not used tables.
309 */
310 
311 struct tc_purge_arg
312 {
313   Share_free_tables::List purge_tables;
314   bool mark_flushed;
315 };
316 
317 
318 static my_bool tc_purge_callback(TDC_element *element, tc_purge_arg *arg)
319 {
320   mysql_mutex_lock(&element->LOCK_table_share);
321   tc_remove_all_unused_tables(element, &arg->purge_tables, arg->mark_flushed);
322   mysql_mutex_unlock(&element->LOCK_table_share);
323   return FALSE;
324 }
325 
326 
327 void tc_purge(bool mark_flushed)
328 {
329   tc_purge_arg argument;
330   TABLE *table;
331 
332   argument.mark_flushed= mark_flushed;
333   tdc_iterate(0, (my_hash_walk_action) tc_purge_callback, &argument);
334   while ((table= argument.purge_tables.pop_front()))
335     intern_close_table(table);
336 }
337 
338 
339 /**
340   Add new TABLE object to table cache.
341 
342   @pre TABLE object is used by caller.
343 
344   Added object cannot be evicted or acquired.
345 
346   While locked:
347   - add object to TABLE_SHARE::tdc.all_tables
348   - increment tc_count
349   - evict LRU object from table cache if we reached threshold
350 
351   While unlocked:
352   - free evicted object
353 */
354 
355 void tc_add_table(THD *thd, TABLE *table)
356 {
357   uint32 i= thd->thread_id % my_atomic_load32_explicit((int32*) &tc_active_instances,
358                                                        MY_MEMORY_ORDER_RELAXED);
359   TABLE *LRU_table= 0;
360   TDC_element *element= table->s->tdc;
361 
362   DBUG_ASSERT(table->in_use == thd);
363   table->instance= i;
364   mysql_mutex_lock(&element->LOCK_table_share);
365   /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
366   while (element->all_tables_refs)
367     mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
368   element->all_tables.push_front(table);
369   mysql_mutex_unlock(&element->LOCK_table_share);
370 
371   mysql_mutex_lock(&tc[i].LOCK_table_cache);
372   if (tc[i].records == tc_size)
373   {
374     if ((LRU_table= tc[i].free_tables.pop_front()))
375     {
376       LRU_table->s->tdc->free_tables[i].list.remove(LRU_table);
377       /* Needed if MDL deadlock detector chimes in before tc_remove_table() */
378       LRU_table->in_use= thd;
379       mysql_mutex_unlock(&tc[i].LOCK_table_cache);
380       /* Keep out of locked LOCK_table_cache */
381       tc_remove_table(LRU_table);
382     }
383     else
384     {
385       tc[i].records++;
386       mysql_mutex_unlock(&tc[i].LOCK_table_cache);
387     }
388     /* Keep out of locked LOCK_table_cache */
389     status_var_increment(thd->status_var.table_open_cache_overflows);
390   }
391   else
392   {
393     tc[i].records++;
394     mysql_mutex_unlock(&tc[i].LOCK_table_cache);
395   }
396 }
397 
398 
399 /**
400   Acquire TABLE object from table cache.
401 
402   @pre share must be protected against removal.
403 
404   Acquired object cannot be evicted or acquired again.
405 
406   @return TABLE object, or NULL if no unused objects.
407 */
408 
409 static TABLE *tc_acquire_table(THD *thd, TDC_element *element)
410 {
411   uint32 n_instances=
412     my_atomic_load32_explicit((int32*) &tc_active_instances,
413                               MY_MEMORY_ORDER_RELAXED);
414   uint32 i= thd->thread_id % n_instances;
415   TABLE *table;
416 
417   tc[i].lock_and_check_contention(n_instances, i);
418   table= element->free_tables[i].list.pop_front();
419   if (table)
420   {
421     DBUG_ASSERT(!table->in_use);
422     table->in_use= thd;
423     /* The ex-unused table must be fully functional. */
424     DBUG_ASSERT(table->db_stat && table->file);
425     /* The children must be detached from the table. */
426     DBUG_ASSERT(!table->file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN));
427     tc[i].free_tables.remove(table);
428   }
429   mysql_mutex_unlock(&tc[i].LOCK_table_cache);
430   return table;
431 }
432 
433 
434 /**
435   Release TABLE object to table cache.
436 
437   @pre object is used by caller.
438 
439   Released object may be evicted or acquired again.
440 
441   While locked:
442   - if object is marked for purge, decrement tc_count
443   - add object to TABLE_SHARE::tdc.free_tables
444   - evict LRU object from table cache if we reached threshold
445 
446   While unlocked:
447   - mark object not in use by any thread
448   - free evicted/purged object
449 
450   @note Another thread may mark share for purge any moment (even
451   after version check). It means to-be-purged object may go to
452   unused lists. This other thread is expected to call tc_purge(),
453   which is synchronized with us on TABLE_SHARE::tdc.LOCK_table_share.
454 
455   @return
456     @retval true  object purged
457     @retval false object released
458 */
459 
460 void tc_release_table(TABLE *table)
461 {
462   uint32 i= table->instance;
463   DBUG_ENTER("tc_release_table");
464   DBUG_ASSERT(table->in_use);
465   DBUG_ASSERT(table->file);
466   DBUG_ASSERT(!table->pos_in_locked_tables);
467 
468   mysql_mutex_lock(&tc[i].LOCK_table_cache);
469   if (table->needs_reopen() || table->s->tdc->flushed ||
470       tc[i].records > tc_size)
471   {
472     tc[i].records--;
473     mysql_mutex_unlock(&tc[i].LOCK_table_cache);
474     tc_remove_table(table);
475   }
476   else
477   {
478     table->in_use= 0;
479     table->s->tdc->free_tables[i].list.push_front(table);
480     tc[i].free_tables.push_back(table);
481     mysql_mutex_unlock(&tc[i].LOCK_table_cache);
482   }
483   DBUG_VOID_RETURN;
484 }
485 
486 
487 static void tdc_assert_clean_share(TDC_element *element)
488 {
489   DBUG_ASSERT(element->share == 0);
490   DBUG_ASSERT(element->ref_count == 0);
491   DBUG_ASSERT(element->m_flush_tickets.is_empty());
492   DBUG_ASSERT(element->all_tables.is_empty());
493 #ifndef DBUG_OFF
494   for (ulong i= 0; i < tc_instances; i++)
495     DBUG_ASSERT(element->free_tables[i].list.is_empty());
496 #endif
497   DBUG_ASSERT(element->all_tables_refs == 0);
498   DBUG_ASSERT(element->next == 0);
499   DBUG_ASSERT(element->prev == 0);
500 }
501 
502 
503 /**
504   Delete share from hash and free share object.
505 */
506 
507 static void tdc_delete_share_from_hash(TDC_element *element)
508 {
509   THD *thd= current_thd;
510   LF_PINS *pins;
511   TABLE_SHARE *share;
512   DBUG_ENTER("tdc_delete_share_from_hash");
513 
514   mysql_mutex_assert_owner(&element->LOCK_table_share);
515   share= element->share;
516   DBUG_ASSERT(share);
517   element->share= 0;
518   PSI_CALL_release_table_share(share->m_psi);
519   share->m_psi= 0;
520 
521   if (!element->m_flush_tickets.is_empty())
522   {
523     Wait_for_flush_list::Iterator it(element->m_flush_tickets);
524     Wait_for_flush *ticket;
525     while ((ticket= it++))
526       (void) ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED);
527 
528     do
529     {
530       mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
531     } while (!element->m_flush_tickets.is_empty());
532   }
533 
534   mysql_mutex_unlock(&element->LOCK_table_share);
535 
536   if (thd)
537   {
538     fix_thd_pins(thd);
539     pins= thd->tdc_hash_pins;
540   }
541   else
542     pins= lf_hash_get_pins(&tdc_hash);
543 
544   DBUG_ASSERT(pins); // What can we do about it?
545   tdc_assert_clean_share(element);
546   lf_hash_delete(&tdc_hash, pins, element->m_key, element->m_key_length);
547   if (!thd)
548     lf_hash_put_pins(pins);
549   free_table_share(share);
550   DBUG_VOID_RETURN;
551 }
552 
553 
554 /**
555   Prepeare table share for use with table definition cache.
556 */
557 
558 static void lf_alloc_constructor(uchar *arg)
559 {
560   TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD);
561   DBUG_ENTER("lf_alloc_constructor");
562   mysql_mutex_init(key_TABLE_SHARE_LOCK_table_share,
563                    &element->LOCK_table_share, MY_MUTEX_INIT_FAST);
564   mysql_cond_init(key_TABLE_SHARE_COND_release, &element->COND_release, 0);
565   element->m_flush_tickets.empty();
566   element->all_tables.empty();
567   for (ulong i= 0; i < tc_instances; i++)
568     element->free_tables[i].list.empty();
569   element->all_tables_refs= 0;
570   element->share= 0;
571   element->ref_count= 0;
572   element->next= 0;
573   element->prev= 0;
574   DBUG_VOID_RETURN;
575 }
576 
577 
578 /**
579   Release table definition cache specific resources of table share.
580 */
581 
582 static void lf_alloc_destructor(uchar *arg)
583 {
584   TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD);
585   DBUG_ENTER("lf_alloc_destructor");
586   tdc_assert_clean_share(element);
587   mysql_cond_destroy(&element->COND_release);
588   mysql_mutex_destroy(&element->LOCK_table_share);
589   DBUG_VOID_RETURN;
590 }
591 
592 
593 static void tdc_hash_initializer(LF_HASH *,
594                                  TDC_element *element, LEX_STRING *key)
595 {
596   memcpy(element->m_key, key->str, key->length);
597   element->m_key_length= (uint)key->length;
598   tdc_assert_clean_share(element);
599 }
600 
601 
602 static uchar *tdc_hash_key(const TDC_element *element, size_t *length,
603                            my_bool)
604 {
605   *length= element->m_key_length;
606   return (uchar*) element->m_key;
607 }
608 
609 
610 /**
611   Initialize table definition cache.
612 */
613 
614 bool tdc_init(void)
615 {
616   DBUG_ENTER("tdc_init");
617 #ifdef HAVE_PSI_INTERFACE
618   mysql_mutex_register("sql", all_tc_mutexes, array_elements(all_tc_mutexes));
619   mysql_cond_register("sql", all_tc_conds, array_elements(all_tc_conds));
620 #endif
621   /* Extra instance is allocated to avoid false sharing */
622   if (!(tc= new Table_cache_instance[tc_instances + 1]))
623     DBUG_RETURN(true);
624   tdc_inited= true;
625   mysql_mutex_init(key_LOCK_unused_shares, &LOCK_unused_shares,
626                    MY_MUTEX_INIT_FAST);
627   tdc_version= 1L;  /* Increments on each reload */
628   lf_hash_init(&tdc_hash, sizeof(TDC_element) +
629                           sizeof(Share_free_tables) * (tc_instances - 1),
630                LF_HASH_UNIQUE, 0, 0,
631                (my_hash_get_key) tdc_hash_key,
632                &my_charset_bin);
633   tdc_hash.alloc.constructor= lf_alloc_constructor;
634   tdc_hash.alloc.destructor= lf_alloc_destructor;
635   tdc_hash.initializer= (lf_hash_initializer) tdc_hash_initializer;
636   DBUG_RETURN(false);
637 }
638 
639 
640 /**
641   Notify table definition cache that process of shutting down server
642   has started so it has to keep number of TABLE and TABLE_SHARE objects
643   minimal in order to reduce number of references to pluggable engines.
644 */
645 
646 void tdc_start_shutdown(void)
647 {
648   DBUG_ENTER("table_def_start_shutdown");
649   if (tdc_inited)
650   {
651     /*
652       Ensure that TABLE and TABLE_SHARE objects which are created for
653       tables that are open during process of plugins' shutdown are
654       immediately released. This keeps number of references to engine
655       plugins minimal and allows shutdown to proceed smoothly.
656     */
657     tdc_size= 0;
658     tc_size= 0;
659     /* Free all cached but unused TABLEs and TABLE_SHAREs. */
660     close_cached_tables(NULL, NULL, FALSE, LONG_TIMEOUT);
661   }
662   DBUG_VOID_RETURN;
663 }
664 
665 
666 /**
667   Deinitialize table definition cache.
668 */
669 
670 void tdc_deinit(void)
671 {
672   DBUG_ENTER("tdc_deinit");
673   if (tdc_inited)
674   {
675     tdc_inited= false;
676     lf_hash_destroy(&tdc_hash);
677     mysql_mutex_destroy(&LOCK_unused_shares);
678     delete [] tc;
679   }
680   DBUG_VOID_RETURN;
681 }
682 
683 
684 /**
685   Get number of cached table definitions.
686 
687   @return Number of cached table definitions
688 */
689 
690 ulong tdc_records(void)
691 {
692   return my_atomic_load32_explicit(&tdc_hash.count, MY_MEMORY_ORDER_RELAXED);
693 }
694 
695 
696 void tdc_purge(bool all)
697 {
698   DBUG_ENTER("tdc_purge");
699   while (all || tdc_records() > tdc_size)
700   {
701     TDC_element *element;
702 
703     mysql_mutex_lock(&LOCK_unused_shares);
704     if (!(element= unused_shares.pop_front()))
705     {
706       mysql_mutex_unlock(&LOCK_unused_shares);
707       break;
708     }
709 
710     /* Concurrent thread may start using share again, reset prev and next. */
711     element->prev= 0;
712     element->next= 0;
713     mysql_mutex_lock(&element->LOCK_table_share);
714     if (element->ref_count)
715     {
716       mysql_mutex_unlock(&element->LOCK_table_share);
717       mysql_mutex_unlock(&LOCK_unused_shares);
718       continue;
719     }
720     mysql_mutex_unlock(&LOCK_unused_shares);
721 
722     tdc_delete_share_from_hash(element);
723   }
724   DBUG_VOID_RETURN;
725 }
726 
727 
728 /**
729   Lock table share.
730 
731   Find table share with given db.table_name in table definition cache. Return
732   locked table share if found.
733 
734   Locked table share means:
735   - table share is protected against removal from table definition cache
736   - no other thread can acquire/release table share
737 
738   Caller is expected to unlock table share with tdc_unlock_share().
739 
740   @retval 0 Share not found
741   @retval MY_ERRPTR OOM
742   @retval ptr Pointer to locked table share
743 */
744 
745 TDC_element *tdc_lock_share(THD *thd, const char *db, const char *table_name)
746 {
747   TDC_element *element;
748   char key[MAX_DBKEY_LENGTH];
749 
750   DBUG_ENTER("tdc_lock_share");
751   if (unlikely(fix_thd_pins(thd)))
752     DBUG_RETURN((TDC_element*) MY_ERRPTR);
753 
754   element= (TDC_element *) lf_hash_search(&tdc_hash, thd->tdc_hash_pins,
755                                           (uchar*) key,
756                                           tdc_create_key(key, db, table_name));
757   if (element)
758   {
759     mysql_mutex_lock(&element->LOCK_table_share);
760     if (unlikely(!element->share || element->share->error))
761     {
762       mysql_mutex_unlock(&element->LOCK_table_share);
763       element= 0;
764     }
765     lf_hash_search_unpin(thd->tdc_hash_pins);
766   }
767 
768   DBUG_RETURN(element);
769 }
770 
771 
772 /**
773   Unlock share locked by tdc_lock_share().
774 */
775 
776 void tdc_unlock_share(TDC_element *element)
777 {
778   DBUG_ENTER("tdc_unlock_share");
779   mysql_mutex_unlock(&element->LOCK_table_share);
780   DBUG_VOID_RETURN;
781 }
782 
783 
784 /*
785   Get TABLE_SHARE for a table.
786 
787   tdc_acquire_share()
788   thd                   Thread handle
789   tl                    Table that should be opened
790   flags                 operation: what to open table or view
791   out_table             TABLE for the requested table
792 
793   IMPLEMENTATION
794     Get a table definition from the table definition cache.
795     If it doesn't exist, create a new from the table definition file.
796 
797   RETURN
798    0  Error
799    #  Share for table
800 */
801 
802 TABLE_SHARE *tdc_acquire_share(THD *thd, TABLE_LIST *tl, uint flags,
803                                TABLE **out_table)
804 {
805   TABLE_SHARE *share;
806   TDC_element *element;
807   const char *key;
808   uint key_length= get_table_def_key(tl, &key);
809   my_hash_value_type hash_value= tl->mdl_request.key.tc_hash_value();
810   bool was_unused;
811   DBUG_ENTER("tdc_acquire_share");
812 
813   if (fix_thd_pins(thd))
814     DBUG_RETURN(0);
815 
816 retry:
817   while (!(element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash,
818                     thd->tdc_hash_pins, hash_value, (uchar*) key, key_length)))
819   {
820     LEX_STRING tmp= { const_cast<char*>(key), key_length };
821     int res= lf_hash_insert(&tdc_hash, thd->tdc_hash_pins, (uchar*) &tmp);
822 
823     if (res == -1)
824       DBUG_RETURN(0);
825     else if (res == 1)
826       continue;
827 
828     element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash,
829              thd->tdc_hash_pins, hash_value, (uchar*) key, key_length);
830     lf_hash_search_unpin(thd->tdc_hash_pins);
831     DBUG_ASSERT(element);
832 
833     if (!(share= alloc_table_share(tl->db.str, tl->table_name.str, key, key_length)))
834     {
835       lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length);
836       DBUG_RETURN(0);
837     }
838 
839     /* note that tdc_acquire_share() *always* uses discovery */
840     open_table_def(thd, share, flags | GTS_USE_DISCOVERY);
841 
842     if (checked_unlikely(share->error))
843     {
844       free_table_share(share);
845       lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length);
846       DBUG_RETURN(0);
847     }
848 
849     mysql_mutex_lock(&element->LOCK_table_share);
850     element->share= share;
851     share->tdc= element;
852     element->ref_count++;
853     element->version= tdc_refresh_version();
854     element->flushed= false;
855     mysql_mutex_unlock(&element->LOCK_table_share);
856 
857     tdc_purge(false);
858     if (out_table)
859     {
860       status_var_increment(thd->status_var.table_open_cache_misses);
861       *out_table= 0;
862     }
863     share->m_psi= PSI_CALL_get_table_share(false, share);
864     goto end;
865   }
866 
867   /* cannot force discovery of a cached share */
868   DBUG_ASSERT(!(flags & GTS_FORCE_DISCOVERY));
869 
870   if (out_table && (flags & GTS_TABLE))
871   {
872     if ((*out_table= tc_acquire_table(thd, element)))
873     {
874       lf_hash_search_unpin(thd->tdc_hash_pins);
875       DBUG_ASSERT(!(flags & GTS_NOLOCK));
876       DBUG_ASSERT(element->share);
877       DBUG_ASSERT(!element->share->error);
878       DBUG_ASSERT(!element->share->is_view);
879       status_var_increment(thd->status_var.table_open_cache_hits);
880       DBUG_RETURN(element->share);
881     }
882     status_var_increment(thd->status_var.table_open_cache_misses);
883   }
884 
885   mysql_mutex_lock(&element->LOCK_table_share);
886   if (!(share= element->share))
887   {
888     mysql_mutex_unlock(&element->LOCK_table_share);
889     lf_hash_search_unpin(thd->tdc_hash_pins);
890     goto retry;
891   }
892   lf_hash_search_unpin(thd->tdc_hash_pins);
893 
894   /*
895      We found an existing table definition. Return it if we didn't get
896      an error when reading the table definition from file.
897   */
898   if (unlikely(share->error))
899   {
900     open_table_error(share, share->error, share->open_errno);
901     goto err;
902   }
903 
904   if (share->is_view && !(flags & GTS_VIEW))
905   {
906     open_table_error(share, OPEN_FRM_NOT_A_TABLE, ENOENT);
907     goto err;
908   }
909   if (!share->is_view && !(flags & GTS_TABLE))
910   {
911     open_table_error(share, OPEN_FRM_NOT_A_VIEW, ENOENT);
912     goto err;
913   }
914 
915   was_unused= !element->ref_count;
916   element->ref_count++;
917   mysql_mutex_unlock(&element->LOCK_table_share);
918   if (was_unused)
919   {
920     mysql_mutex_lock(&LOCK_unused_shares);
921     if (element->prev)
922     {
923       /*
924         Share was not used before and it was in the old_unused_share list
925         Unlink share from this list
926       */
927       DBUG_PRINT("info", ("Unlinking from not used list"));
928       unused_shares.remove(element);
929       element->next= 0;
930       element->prev= 0;
931     }
932     mysql_mutex_unlock(&LOCK_unused_shares);
933   }
934 
935 end:
936   DBUG_PRINT("exit", ("share: %p  ref_count: %u",
937                       share, share->tdc->ref_count));
938   if (flags & GTS_NOLOCK)
939   {
940     tdc_release_share(share);
941     /*
942       if GTS_NOLOCK is requested, the returned share pointer cannot be used,
943       the share it points to may go away any moment.
944       But perhaps the caller is only interested to know whether a share or
945       table existed?
946       Let's return an invalid pointer here to catch dereferencing attempts.
947     */
948     share= (TABLE_SHARE*) 1;
949   }
950   DBUG_RETURN(share);
951 
952 err:
953   mysql_mutex_unlock(&element->LOCK_table_share);
954   DBUG_RETURN(0);
955 }
956 
957 
958 /**
959   Release table share acquired by tdc_acquire_share().
960 */
961 
962 void tdc_release_share(TABLE_SHARE *share)
963 {
964   DBUG_ENTER("tdc_release_share");
965 
966   mysql_mutex_lock(&share->tdc->LOCK_table_share);
967   DBUG_PRINT("enter",
968              ("share: %p  table: %s.%s  ref_count: %u  version: %lld",
969               share, share->db.str, share->table_name.str,
970               share->tdc->ref_count, share->tdc->version));
971   DBUG_ASSERT(share->tdc->ref_count);
972 
973   if (share->tdc->ref_count > 1)
974   {
975     share->tdc->ref_count--;
976     if (!share->is_view)
977       mysql_cond_broadcast(&share->tdc->COND_release);
978     mysql_mutex_unlock(&share->tdc->LOCK_table_share);
979     DBUG_VOID_RETURN;
980   }
981   mysql_mutex_unlock(&share->tdc->LOCK_table_share);
982 
983   mysql_mutex_lock(&LOCK_unused_shares);
984   mysql_mutex_lock(&share->tdc->LOCK_table_share);
985   if (--share->tdc->ref_count)
986   {
987     if (!share->is_view)
988       mysql_cond_broadcast(&share->tdc->COND_release);
989     mysql_mutex_unlock(&share->tdc->LOCK_table_share);
990     mysql_mutex_unlock(&LOCK_unused_shares);
991     DBUG_VOID_RETURN;
992   }
993   if (share->tdc->flushed || tdc_records() > tdc_size)
994   {
995     mysql_mutex_unlock(&LOCK_unused_shares);
996     tdc_delete_share_from_hash(share->tdc);
997     DBUG_VOID_RETURN;
998   }
999   /* Link share last in used_table_share list */
1000   DBUG_PRINT("info", ("moving share to unused list"));
1001   DBUG_ASSERT(share->tdc->next == 0);
1002   unused_shares.push_back(share->tdc);
1003   mysql_mutex_unlock(&share->tdc->LOCK_table_share);
1004   mysql_mutex_unlock(&LOCK_unused_shares);
1005   DBUG_VOID_RETURN;
1006 }
1007 
1008 
1009 /**
1010    Auxiliary function which allows to kill delayed threads for
1011    particular table identified by its share.
1012 
1013    @param share Table share.
1014 
1015    @pre Caller should have TABLE_SHARE::tdc.LOCK_table_share mutex.
1016 */
1017 
1018 static void kill_delayed_threads_for_table(TDC_element *element)
1019 {
1020   All_share_tables_list::Iterator it(element->all_tables);
1021   TABLE *tab;
1022 
1023   mysql_mutex_assert_owner(&element->LOCK_table_share);
1024 
1025   if (!delayed_insert_threads)
1026     return;
1027 
1028   while ((tab= it++))
1029   {
1030     THD *in_use= tab->in_use;
1031 
1032     DBUG_ASSERT(in_use && tab->s->tdc->flushed);
1033     if ((in_use->system_thread & SYSTEM_THREAD_DELAYED_INSERT) &&
1034         ! in_use->killed)
1035     {
1036       in_use->killed= KILL_SYSTEM_THREAD;
1037       mysql_mutex_lock(&in_use->mysys_var->mutex);
1038       if (in_use->mysys_var->current_cond)
1039       {
1040         mysql_mutex_lock(in_use->mysys_var->current_mutex);
1041         mysql_cond_broadcast(in_use->mysys_var->current_cond);
1042         mysql_mutex_unlock(in_use->mysys_var->current_mutex);
1043       }
1044       mysql_mutex_unlock(&in_use->mysys_var->mutex);
1045     }
1046   }
1047 }
1048 
1049 
1050 /**
1051    Remove all or some (depending on parameter) instances of TABLE and
1052    TABLE_SHARE from the table definition cache.
1053 
1054    @param  thd          Thread context
1055    @param  remove_type  Type of removal:
1056                         TDC_RT_REMOVE_ALL     - remove all TABLE instances and
1057                                                 TABLE_SHARE instance. There
1058                                                 should be no used TABLE objects
1059                                                 and caller should have exclusive
1060                                                 metadata lock on the table.
1061                         TDC_RT_REMOVE_NOT_OWN - remove all TABLE instances
1062                                                 except those that belong to
1063                                                 this thread. There should be
1064                                                 no TABLE objects used by other
1065                                                 threads and caller should have
1066                                                 exclusive metadata lock on the
1067                                                 table.
1068                         TDC_RT_REMOVE_UNUSED  - remove all unused TABLE
1069                                                 instances (if there are no
1070                                                 used instances will also
1071                                                 remove TABLE_SHARE).
1072                         TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE -
1073                                                 remove all TABLE instances
1074                                                 except those that belong to
1075                                                 this thread, but don't mark
1076                                                 TABLE_SHARE as old. There
1077                                                 should be no TABLE objects
1078                                                 used by other threads and
1079                                                 caller should have exclusive
1080                                                 metadata lock on the table.
1081    @param  db           Name of database
1082    @param  table_name   Name of table
1083    @param  kill_delayed_threads     If TRUE, kill INSERT DELAYED threads
1084 
1085    @note It assumes that table instances are already not used by any
1086    (other) thread (this should be achieved by using meta-data locks).
1087 */
1088 
1089 bool tdc_remove_table(THD *thd, enum_tdc_remove_table_type remove_type,
1090                       const char *db, const char *table_name,
1091                       bool kill_delayed_threads)
1092 {
1093   Share_free_tables::List purge_tables;
1094   TABLE *table;
1095   TDC_element *element;
1096   uint my_refs= 1;
1097   DBUG_ENTER("tdc_remove_table");
1098   DBUG_PRINT("enter",("name: %s  remove_type: %d", table_name, remove_type));
1099 
1100   DBUG_ASSERT(remove_type == TDC_RT_REMOVE_UNUSED ||
1101               thd->mdl_context.is_lock_owner(MDL_key::TABLE, db, table_name,
1102                                              MDL_EXCLUSIVE));
1103 
1104 
1105   mysql_mutex_lock(&LOCK_unused_shares);
1106   if (!(element= tdc_lock_share(thd, db, table_name)))
1107   {
1108     mysql_mutex_unlock(&LOCK_unused_shares);
1109     DBUG_ASSERT(remove_type != TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE);
1110     DBUG_RETURN(false);
1111   }
1112 
1113   DBUG_ASSERT(element != MY_ERRPTR); // What can we do about it?
1114 
1115   if (!element->ref_count)
1116   {
1117     if (element->prev)
1118     {
1119       unused_shares.remove(element);
1120       element->prev= 0;
1121       element->next= 0;
1122     }
1123     mysql_mutex_unlock(&LOCK_unused_shares);
1124 
1125     tdc_delete_share_from_hash(element);
1126     DBUG_RETURN(true);
1127   }
1128   mysql_mutex_unlock(&LOCK_unused_shares);
1129 
1130   element->ref_count++;
1131 
1132   tc_remove_all_unused_tables(element, &purge_tables,
1133                               remove_type != TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE);
1134 
1135   if (kill_delayed_threads)
1136     kill_delayed_threads_for_table(element);
1137 
1138   if (remove_type == TDC_RT_REMOVE_NOT_OWN ||
1139       remove_type == TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE)
1140   {
1141     All_share_tables_list::Iterator it(element->all_tables);
1142     while ((table= it++))
1143     {
1144       if (table->in_use == thd)
1145         my_refs++;
1146     }
1147   }
1148   mysql_mutex_unlock(&element->LOCK_table_share);
1149 
1150   while ((table= purge_tables.pop_front()))
1151     intern_close_table(table);
1152 
1153   if (remove_type != TDC_RT_REMOVE_UNUSED)
1154   {
1155     /*
1156       Even though current thread holds exclusive metadata lock on this share
1157       (asserted above), concurrent FLUSH TABLES threads may be in process of
1158       closing unused table instances belonging to this share. E.g.:
1159       thr1 (FLUSH TABLES): table= share->tdc.free_tables.pop_front();
1160       thr1 (FLUSH TABLES): share->tdc.all_tables.remove(table);
1161       thr2 (ALTER TABLE): tdc_remove_table();
1162       thr1 (FLUSH TABLES): intern_close_table(table);
1163 
1164       Current remove type assumes that all table instances (except for those
1165       that are owned by current thread) must be closed before
1166       thd_remove_table() returns. Wait for such tables now.
1167 
1168       intern_close_table() decrements ref_count and signals COND_release. When
1169       ref_count drops down to number of references owned by current thread
1170       waiting is completed.
1171 
1172       Unfortunately TABLE_SHARE::wait_for_old_version() cannot be used here
1173       because it waits for all table instances, whereas we have to wait only
1174       for those that are not owned by current thread.
1175     */
1176     mysql_mutex_lock(&element->LOCK_table_share);
1177     while (element->ref_count > my_refs)
1178       mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
1179     DBUG_ASSERT(element->all_tables.is_empty() ||
1180                 remove_type != TDC_RT_REMOVE_ALL);
1181 #ifndef DBUG_OFF
1182     if (remove_type == TDC_RT_REMOVE_NOT_OWN ||
1183         remove_type == TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE)
1184     {
1185       All_share_tables_list::Iterator it(element->all_tables);
1186       while ((table= it++))
1187         DBUG_ASSERT(table->in_use == thd);
1188     }
1189 #endif
1190     mysql_mutex_unlock(&element->LOCK_table_share);
1191   }
1192 
1193   tdc_release_share(element->share);
1194 
1195   DBUG_RETURN(true);
1196 }
1197 
1198 
1199 /**
1200   Check if table's share is being removed from the table definition
1201   cache and, if yes, wait until the flush is complete.
1202 
1203   @param thd             Thread context.
1204   @param table_list      Table which share should be checked.
1205   @param timeout         Timeout for waiting.
1206   @param deadlock_weight Weight of this wait for deadlock detector.
1207 
1208   @retval 0       Success. Share is up to date or has been flushed.
1209   @retval 1       Error (OOM, was killed, the wait resulted
1210                   in a deadlock or timeout). Reported.
1211 */
1212 
1213 int tdc_wait_for_old_version(THD *thd, const char *db, const char *table_name,
1214                              ulong wait_timeout, uint deadlock_weight, tdc_version_t refresh_version)
1215 {
1216   TDC_element *element;
1217 
1218   if (!(element= tdc_lock_share(thd, db, table_name)))
1219     return FALSE;
1220   else if (element == MY_ERRPTR)
1221     return TRUE;
1222   else if (element->flushed && refresh_version > element->version)
1223   {
1224     struct timespec abstime;
1225     set_timespec(abstime, wait_timeout);
1226     return element->share->wait_for_old_version(thd, &abstime, deadlock_weight);
1227   }
1228   tdc_unlock_share(element);
1229   return FALSE;
1230 }
1231 
1232 
1233 tdc_version_t tdc_refresh_version(void)
1234 {
1235   return (tdc_version_t)my_atomic_load64_explicit(&tdc_version, MY_MEMORY_ORDER_RELAXED);
1236 }
1237 
1238 
1239 tdc_version_t tdc_increment_refresh_version(void)
1240 {
1241   tdc_version_t v= (tdc_version_t)my_atomic_add64_explicit(&tdc_version, 1, MY_MEMORY_ORDER_RELAXED);
1242   DBUG_PRINT("tcache", ("incremented global refresh_version to: %lld", v));
1243   return v + 1;
1244 }
1245 
1246 
1247 /**
1248   Iterate table definition cache.
1249 
1250   Object is protected against removal from table definition cache.
1251 
1252   @note Returned TABLE_SHARE is not guaranteed to be fully initialized:
1253   tdc_acquire_share() added new share, but didn't open it yet. If caller
1254   needs fully initializer share, it must lock table share mutex.
1255 */
1256 
1257 struct eliminate_duplicates_arg
1258 {
1259   HASH hash;
1260   MEM_ROOT root;
1261   my_hash_walk_action action;
1262   void *argument;
1263 };
1264 
1265 
1266 static uchar *eliminate_duplicates_get_key(const uchar *element, size_t *length,
1267                                        my_bool not_used __attribute__((unused)))
1268 {
1269   LEX_STRING *key= (LEX_STRING *) element;
1270   *length= key->length;
1271   return (uchar *) key->str;
1272 }
1273 
1274 
1275 static my_bool eliminate_duplicates(TDC_element *element,
1276                                     eliminate_duplicates_arg *arg)
1277 {
1278   LEX_STRING *key= (LEX_STRING *) alloc_root(&arg->root, sizeof(LEX_STRING));
1279 
1280   if (!key || !(key->str= (char*) memdup_root(&arg->root, element->m_key,
1281                                               element->m_key_length)))
1282     return TRUE;
1283 
1284   key->length= element->m_key_length;
1285 
1286   if (my_hash_insert(&arg->hash, (uchar *) key))
1287     return FALSE;
1288 
1289   return arg->action(element, arg->argument);
1290 }
1291 
1292 
1293 int tdc_iterate(THD *thd, my_hash_walk_action action, void *argument,
1294                 bool no_dups)
1295 {
1296   eliminate_duplicates_arg no_dups_argument;
1297   LF_PINS *pins;
1298   myf alloc_flags= 0;
1299   uint hash_flags= HASH_UNIQUE;
1300   int res;
1301 
1302   if (thd)
1303   {
1304     fix_thd_pins(thd);
1305     pins= thd->tdc_hash_pins;
1306     alloc_flags= MY_THREAD_SPECIFIC;
1307     hash_flags|= HASH_THREAD_SPECIFIC;
1308   }
1309   else
1310     pins= lf_hash_get_pins(&tdc_hash);
1311 
1312   if (!pins)
1313     return ER_OUTOFMEMORY;
1314 
1315   if (no_dups)
1316   {
1317     init_alloc_root(&no_dups_argument.root, "no_dups", 4096, 4096,
1318                     MYF(alloc_flags));
1319     my_hash_init(&no_dups_argument.hash, &my_charset_bin, tdc_records(), 0, 0,
1320                  eliminate_duplicates_get_key, 0, hash_flags);
1321     no_dups_argument.action= action;
1322     no_dups_argument.argument= argument;
1323     action= (my_hash_walk_action) eliminate_duplicates;
1324     argument= &no_dups_argument;
1325   }
1326 
1327   res= lf_hash_iterate(&tdc_hash, pins, action, argument);
1328 
1329   if (!thd)
1330     lf_hash_put_pins(pins);
1331 
1332   if (no_dups)
1333   {
1334     my_hash_free(&no_dups_argument.hash);
1335     free_root(&no_dups_argument.root, MYF(0));
1336   }
1337   return res;
1338 }
1339