1 /* Copyright (c) 2000, 2012, Oracle and/or its affiliates.
2    Copyright (c) 2010, 2011 Monty Program Ab
3    Copyright (C) 2013 Sergey Vojtovich and MariaDB Foundation
4 
5    This program is free software; you can redistribute it and/or modify
6    it under the terms of the GNU General Public License as published by
7    the Free Software Foundation; version 2 of the License.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335  USA */
17 
18 /**
19   @file
20   Table definition cache and table cache implementation.
21 
22   Table definition cache actions:
23   - add new TABLE_SHARE object to cache (tdc_acquire_share())
24   - acquire TABLE_SHARE object from cache (tdc_acquire_share())
25   - release TABLE_SHARE object to cache (tdc_release_share())
26   - purge unused TABLE_SHARE objects from cache (tdc_purge())
27   - remove TABLE_SHARE object from cache (tdc_remove_table())
28   - get number of TABLE_SHARE objects in cache (tdc_records())
29 
30   Table cache actions:
31   - add new TABLE object to cache (tc_add_table())
32   - acquire TABLE object from cache (tc_acquire_table())
33   - release TABLE object to cache (tc_release_table())
34   - purge unused TABLE objects from cache (tc_purge())
35   - purge unused TABLE objects of a table from cache (tdc_remove_table())
36   - get number of TABLE objects in cache (tc_records())
37 
38   Dependencies:
39   - close_cached_tables(): flush tables on shutdown
40   - alloc_table_share()
41   - free_table_share()
42 
43   Table cache invariants:
44   - TABLE_SHARE::free_tables shall not contain objects with TABLE::in_use != 0
45   - TABLE_SHARE::free_tables shall not receive new objects if
46     TABLE_SHARE::tdc.flushed is true
47 */
48 
49 #include "mariadb.h"
50 #include "lf.h"
51 #include "table.h"
52 #include "sql_base.h"
53 
54 
55 /** Configuration. */
56 ulong tdc_size; /**< Table definition cache threshold for LRU eviction. */
57 ulong tc_size; /**< Table cache threshold for LRU eviction. */
58 uint32 tc_instances;
59 static std::atomic<uint32_t> tc_active_instances(1);
60 static std::atomic<bool> tc_contention_warning_reported;
61 
62 /** Data collections. */
63 static LF_HASH tdc_hash; /**< Collection of TABLE_SHARE objects. */
64 /** Collection of unused TABLE_SHARE objects. */
65 static
66 I_P_List <TDC_element,
67           I_P_List_adapter<TDC_element, &TDC_element::next, &TDC_element::prev>,
68           I_P_List_null_counter,
69           I_P_List_fast_push_back<TDC_element> > unused_shares;
70 
71 static bool tdc_inited;
72 
73 
74 /**
75   Protects unused shares list.
76 
77   TDC_element::prev
78   TDC_element::next
79   unused_shares
80 */
81 
82 static mysql_mutex_t LOCK_unused_shares;
83 
84 #ifdef HAVE_PSI_INTERFACE
85 static PSI_mutex_key key_LOCK_unused_shares, key_TABLE_SHARE_LOCK_table_share,
86                      key_LOCK_table_cache;
87 static PSI_mutex_info all_tc_mutexes[]=
88 {
89   { &key_LOCK_unused_shares, "LOCK_unused_shares", PSI_FLAG_GLOBAL },
90   { &key_TABLE_SHARE_LOCK_table_share, "TABLE_SHARE::tdc.LOCK_table_share", 0 },
91   { &key_LOCK_table_cache, "LOCK_table_cache", 0 }
92 };
93 
94 static PSI_cond_key key_TABLE_SHARE_COND_release;
95 static PSI_cond_info all_tc_conds[]=
96 {
97   { &key_TABLE_SHARE_COND_release, "TABLE_SHARE::tdc.COND_release", 0 }
98 };
99 #endif
100 
101 
fix_thd_pins(THD * thd)102 static int fix_thd_pins(THD *thd)
103 {
104   return thd->tdc_hash_pins ? 0 :
105          (thd->tdc_hash_pins= lf_hash_get_pins(&tdc_hash)) == 0;
106 }
107 
108 
109 /*
110   Auxiliary routines for manipulating with per-share all/unused lists
111   and tc_count counter.
112   Responsible for preserving invariants between those lists, counter
113   and TABLE::in_use member.
114   In fact those routines implement sort of implicit table cache as
115   part of table definition cache.
116 */
117 
118 struct Table_cache_instance
119 {
120   /**
121     Protects free_tables (TABLE::global_free_next and TABLE::global_free_prev),
122     records, Share_free_tables::List (TABLE::prev and TABLE::next),
123     TABLE::in_use.
124   */
125   mysql_mutex_t LOCK_table_cache;
126   I_P_List <TABLE, I_P_List_adapter<TABLE, &TABLE::global_free_next,
127                                     &TABLE::global_free_prev>,
128             I_P_List_null_counter, I_P_List_fast_push_back<TABLE> >
129     free_tables;
130   ulong records;
131   uint mutex_waits;
132   uint mutex_nowaits;
133   /** Avoid false sharing between instances */
134   char pad[CPU_LEVEL1_DCACHE_LINESIZE];
135 
Table_cache_instanceTable_cache_instance136   Table_cache_instance(): records(0), mutex_waits(0), mutex_nowaits(0)
137   {
138     mysql_mutex_init(key_LOCK_table_cache, &LOCK_table_cache,
139                      MY_MUTEX_INIT_FAST);
140   }
141 
~Table_cache_instanceTable_cache_instance142   ~Table_cache_instance()
143   {
144     mysql_mutex_destroy(&LOCK_table_cache);
145     DBUG_ASSERT(free_tables.is_empty());
146     DBUG_ASSERT(records == 0);
147   }
148 
149   /**
150     Lock table cache mutex and check contention.
151 
152     Instance is considered contested if more than 20% of mutex acquisiotions
153     can't be served immediately. Up to 100 000 probes may be performed to avoid
154     instance activation on short sporadic peaks. 100 000 is estimated maximum
155     number of queries one instance can serve in one second.
156 
157     These numbers work well on a 2 socket / 20 core / 40 threads Intel Broadwell
158     system, that is expected number of instances is activated within reasonable
159     warmup time. It may have to be adjusted for other systems.
160 
161     Only TABLE object acquistion is instrumented. We intentionally avoid this
162     overhead on TABLE object release. All other table cache mutex acquistions
163     are considered out of hot path and are not instrumented either.
164   */
lock_and_check_contentionTable_cache_instance165   void lock_and_check_contention(uint32_t n_instances, uint32_t instance)
166   {
167     if (mysql_mutex_trylock(&LOCK_table_cache))
168     {
169       mysql_mutex_lock(&LOCK_table_cache);
170       if (++mutex_waits == 20000)
171       {
172         if (n_instances < tc_instances)
173         {
174           if (tc_active_instances.
175               compare_exchange_weak(n_instances, n_instances + 1,
176                                     std::memory_order_relaxed,
177                                     std::memory_order_relaxed))
178           {
179             sql_print_information("Detected table cache mutex contention at instance %d: "
180                                   "%d%% waits. Additional table cache instance "
181                                   "activated. Number of instances after "
182                                   "activation: %d.",
183                                   instance + 1,
184                                   mutex_waits * 100 / (mutex_nowaits + mutex_waits),
185                                   n_instances + 1);
186           }
187         }
188         else if (!tc_contention_warning_reported.exchange(true,
189                                                  std::memory_order_relaxed))
190         {
191           sql_print_warning("Detected table cache mutex contention at instance %d: "
192                             "%d%% waits. Additional table cache instance "
193                             "cannot be activated: consider raising "
194                             "table_open_cache_instances. Number of active "
195                             "instances: %d.",
196                             instance + 1,
197                             mutex_waits * 100 / (mutex_nowaits + mutex_waits),
198                             n_instances);
199         }
200         mutex_waits= 0;
201         mutex_nowaits= 0;
202       }
203     }
204     else if (++mutex_nowaits == 80000)
205     {
206       mutex_waits= 0;
207       mutex_nowaits= 0;
208     }
209   }
210 };
211 
212 
213 static Table_cache_instance *tc;
214 
215 
intern_close_table(TABLE * table)216 static void intern_close_table(TABLE *table)
217 {
218   delete table->triggers;
219   DBUG_ASSERT(table->file);
220   closefrm(table);
221   tdc_release_share(table->s);
222   my_free(table);
223 }
224 
225 
226 /**
227   Get number of TABLE objects (used and unused) in table cache.
228 */
229 
tc_records(void)230 uint tc_records(void)
231 {
232   ulong total= 0;
233   for (uint32 i= 0; i < tc_instances; i++)
234   {
235     mysql_mutex_lock(&tc[i].LOCK_table_cache);
236     total+= tc[i].records;
237     mysql_mutex_unlock(&tc[i].LOCK_table_cache);
238   }
239   return total;
240 }
241 
242 
243 /**
244   Remove TABLE object from table cache.
245 */
246 
tc_remove_table(TABLE * table)247 static void tc_remove_table(TABLE *table)
248 {
249   TDC_element *element= table->s->tdc;
250 
251   mysql_mutex_lock(&element->LOCK_table_share);
252   /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
253   while (element->all_tables_refs)
254     mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
255   element->all_tables.remove(table);
256   mysql_mutex_unlock(&element->LOCK_table_share);
257 
258   intern_close_table(table);
259 }
260 
261 
tc_remove_all_unused_tables(TDC_element * element,Share_free_tables::List * purge_tables)262 static void tc_remove_all_unused_tables(TDC_element *element,
263                                         Share_free_tables::List *purge_tables)
264 {
265   for (uint32 i= 0; i < tc_instances; i++)
266   {
267     mysql_mutex_lock(&tc[i].LOCK_table_cache);
268     while (auto table= element->free_tables[i].list.pop_front())
269     {
270       tc[i].records--;
271       tc[i].free_tables.remove(table);
272       DBUG_ASSERT(element->all_tables_refs == 0);
273       element->all_tables.remove(table);
274       purge_tables->push_front(table);
275     }
276     mysql_mutex_unlock(&tc[i].LOCK_table_cache);
277   }
278 }
279 
280 
281 /**
282   Free all unused TABLE objects.
283 
284   While locked:
285   - remove unused objects from TABLE_SHARE::tdc.free_tables and
286     TABLE_SHARE::tdc.all_tables
287   - decrement tc_count
288 
289   While unlocked:
290   - free resources related to unused objects
291 
292   @note This is called by 'handle_manager' when one wants to
293         periodicly flush all not used tables.
294 */
295 
tc_purge_callback(TDC_element * element,Share_free_tables::List * purge_tables)296 static my_bool tc_purge_callback(TDC_element *element,
297                                  Share_free_tables::List *purge_tables)
298 {
299   mysql_mutex_lock(&element->LOCK_table_share);
300   tc_remove_all_unused_tables(element, purge_tables);
301   mysql_mutex_unlock(&element->LOCK_table_share);
302   return FALSE;
303 }
304 
305 
tc_purge()306 void tc_purge()
307 {
308   Share_free_tables::List purge_tables;
309 
310   tdc_iterate(0, (my_hash_walk_action) tc_purge_callback, &purge_tables);
311   while (auto table= purge_tables.pop_front())
312     intern_close_table(table);
313 }
314 
315 
316 /**
317   Add new TABLE object to table cache.
318 
319   @pre TABLE object is used by caller.
320 
321   Added object cannot be evicted or acquired.
322 
323   While locked:
324   - add object to TABLE_SHARE::tdc.all_tables
325   - increment tc_count
326   - evict LRU object from table cache if we reached threshold
327 
328   While unlocked:
329   - free evicted object
330 */
331 
tc_add_table(THD * thd,TABLE * table)332 void tc_add_table(THD *thd, TABLE *table)
333 {
334   uint32_t i=
335     thd->thread_id % tc_active_instances.load(std::memory_order_relaxed);
336   TABLE *LRU_table= 0;
337   TDC_element *element= table->s->tdc;
338 
339   DBUG_ASSERT(table->in_use == thd);
340   table->instance= i;
341   mysql_mutex_lock(&element->LOCK_table_share);
342   /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
343   while (element->all_tables_refs)
344     mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
345   element->all_tables.push_front(table);
346   mysql_mutex_unlock(&element->LOCK_table_share);
347 
348   mysql_mutex_lock(&tc[i].LOCK_table_cache);
349   if (tc[i].records == tc_size)
350   {
351     if ((LRU_table= tc[i].free_tables.pop_front()))
352     {
353       LRU_table->s->tdc->free_tables[i].list.remove(LRU_table);
354       /* Needed if MDL deadlock detector chimes in before tc_remove_table() */
355       LRU_table->in_use= thd;
356       mysql_mutex_unlock(&tc[i].LOCK_table_cache);
357       /* Keep out of locked LOCK_table_cache */
358       tc_remove_table(LRU_table);
359     }
360     else
361     {
362       tc[i].records++;
363       mysql_mutex_unlock(&tc[i].LOCK_table_cache);
364     }
365     /* Keep out of locked LOCK_table_cache */
366     status_var_increment(thd->status_var.table_open_cache_overflows);
367   }
368   else
369   {
370     tc[i].records++;
371     mysql_mutex_unlock(&tc[i].LOCK_table_cache);
372   }
373 }
374 
375 
376 /**
377   Acquire TABLE object from table cache.
378 
379   @pre share must be protected against removal.
380 
381   Acquired object cannot be evicted or acquired again.
382 
383   @return TABLE object, or NULL if no unused objects.
384 */
385 
tc_acquire_table(THD * thd,TDC_element * element)386 TABLE *tc_acquire_table(THD *thd, TDC_element *element)
387 {
388   uint32_t n_instances= tc_active_instances.load(std::memory_order_relaxed);
389   uint32_t i= thd->thread_id % n_instances;
390   TABLE *table;
391 
392   tc[i].lock_and_check_contention(n_instances, i);
393   table= element->free_tables[i].list.pop_front();
394   if (table)
395   {
396     DBUG_ASSERT(!table->in_use);
397     table->in_use= thd;
398     /* The ex-unused table must be fully functional. */
399     DBUG_ASSERT(table->db_stat && table->file);
400     /* The children must be detached from the table. */
401     DBUG_ASSERT(!table->file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN));
402     tc[i].free_tables.remove(table);
403   }
404   mysql_mutex_unlock(&tc[i].LOCK_table_cache);
405   return table;
406 }
407 
408 
409 /**
410   Release TABLE object to table cache.
411 
412   @pre object is used by caller.
413 
414   Released object may be evicted or acquired again.
415 
416   While locked:
417   - if object is marked for purge, decrement tc_count
418   - add object to TABLE_SHARE::tdc.free_tables
419   - evict LRU object from table cache if we reached threshold
420 
421   While unlocked:
422   - mark object not in use by any thread
423   - free evicted/purged object
424 
425   @note Another thread may mark share for purge any moment (even
426   after version check). It means to-be-purged object may go to
427   unused lists. This other thread is expected to call tc_purge(),
428   which is synchronized with us on TABLE_SHARE::tdc.LOCK_table_share.
429 
430   @return
431     @retval true  object purged
432     @retval false object released
433 */
434 
tc_release_table(TABLE * table)435 void tc_release_table(TABLE *table)
436 {
437   uint32 i= table->instance;
438   DBUG_ENTER("tc_release_table");
439   DBUG_ASSERT(table->in_use);
440   DBUG_ASSERT(table->file);
441   DBUG_ASSERT(!table->pos_in_locked_tables);
442 
443   mysql_mutex_lock(&tc[i].LOCK_table_cache);
444   if (table->needs_reopen() || table->s->tdc->flushed ||
445       tc[i].records > tc_size)
446   {
447     tc[i].records--;
448     mysql_mutex_unlock(&tc[i].LOCK_table_cache);
449     tc_remove_table(table);
450   }
451   else
452   {
453     table->in_use= 0;
454     table->s->tdc->free_tables[i].list.push_front(table);
455     tc[i].free_tables.push_back(table);
456     mysql_mutex_unlock(&tc[i].LOCK_table_cache);
457   }
458   DBUG_VOID_RETURN;
459 }
460 
461 
tdc_assert_clean_share(TDC_element * element)462 static void tdc_assert_clean_share(TDC_element *element)
463 {
464   DBUG_ASSERT(element->share == 0);
465   DBUG_ASSERT(element->ref_count == 0);
466   DBUG_ASSERT(element->m_flush_tickets.is_empty());
467   DBUG_ASSERT(element->all_tables.is_empty());
468 #ifndef DBUG_OFF
469   for (uint32 i= 0; i < tc_instances; i++)
470     DBUG_ASSERT(element->free_tables[i].list.is_empty());
471 #endif
472   DBUG_ASSERT(element->all_tables_refs == 0);
473   DBUG_ASSERT(element->next == 0);
474   DBUG_ASSERT(element->prev == 0);
475 }
476 
477 
478 /**
479   Delete share from hash and free share object.
480 */
481 
tdc_delete_share_from_hash(TDC_element * element)482 static void tdc_delete_share_from_hash(TDC_element *element)
483 {
484   THD *thd= current_thd;
485   LF_PINS *pins;
486   TABLE_SHARE *share;
487   DBUG_ENTER("tdc_delete_share_from_hash");
488 
489   mysql_mutex_assert_owner(&element->LOCK_table_share);
490   share= element->share;
491   DBUG_ASSERT(share);
492   element->share= 0;
493   PSI_CALL_release_table_share(share->m_psi);
494   share->m_psi= 0;
495 
496   if (!element->m_flush_tickets.is_empty())
497   {
498     Wait_for_flush_list::Iterator it(element->m_flush_tickets);
499     Wait_for_flush *ticket;
500     while ((ticket= it++))
501       (void) ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED);
502 
503     do
504     {
505       mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
506     } while (!element->m_flush_tickets.is_empty());
507   }
508 
509   mysql_mutex_unlock(&element->LOCK_table_share);
510 
511   if (thd)
512   {
513     fix_thd_pins(thd);
514     pins= thd->tdc_hash_pins;
515   }
516   else
517     pins= lf_hash_get_pins(&tdc_hash);
518 
519   DBUG_ASSERT(pins); // What can we do about it?
520   tdc_assert_clean_share(element);
521   lf_hash_delete(&tdc_hash, pins, element->m_key, element->m_key_length);
522   if (!thd)
523     lf_hash_put_pins(pins);
524   free_table_share(share);
525   DBUG_VOID_RETURN;
526 }
527 
528 
529 /**
530   Prepeare table share for use with table definition cache.
531 */
532 
lf_alloc_constructor(uchar * arg)533 static void lf_alloc_constructor(uchar *arg)
534 {
535   TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD);
536   DBUG_ENTER("lf_alloc_constructor");
537   mysql_mutex_init(key_TABLE_SHARE_LOCK_table_share,
538                    &element->LOCK_table_share, MY_MUTEX_INIT_FAST);
539   mysql_cond_init(key_TABLE_SHARE_COND_release, &element->COND_release, 0);
540   element->m_flush_tickets.empty();
541   element->all_tables.empty();
542   for (uint32 i= 0; i < tc_instances; i++)
543     element->free_tables[i].list.empty();
544   element->all_tables_refs= 0;
545   element->share= 0;
546   element->ref_count= 0;
547   element->next= 0;
548   element->prev= 0;
549   DBUG_VOID_RETURN;
550 }
551 
552 
553 /**
554   Release table definition cache specific resources of table share.
555 */
556 
lf_alloc_destructor(uchar * arg)557 static void lf_alloc_destructor(uchar *arg)
558 {
559   TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD);
560   DBUG_ENTER("lf_alloc_destructor");
561   tdc_assert_clean_share(element);
562   mysql_cond_destroy(&element->COND_release);
563   mysql_mutex_destroy(&element->LOCK_table_share);
564   DBUG_VOID_RETURN;
565 }
566 
567 
tdc_hash_initializer(LF_HASH *,TDC_element * element,LEX_STRING * key)568 static void tdc_hash_initializer(LF_HASH *,
569                                  TDC_element *element, LEX_STRING *key)
570 {
571   memcpy(element->m_key, key->str, key->length);
572   element->m_key_length= (uint)key->length;
573   tdc_assert_clean_share(element);
574 }
575 
576 
tdc_hash_key(const TDC_element * element,size_t * length,my_bool)577 static uchar *tdc_hash_key(const TDC_element *element, size_t *length,
578                            my_bool)
579 {
580   *length= element->m_key_length;
581   return (uchar*) element->m_key;
582 }
583 
584 
585 /**
586   Initialize table definition cache.
587 */
588 
tdc_init(void)589 bool tdc_init(void)
590 {
591   DBUG_ENTER("tdc_init");
592 #ifdef HAVE_PSI_INTERFACE
593   mysql_mutex_register("sql", all_tc_mutexes, array_elements(all_tc_mutexes));
594   mysql_cond_register("sql", all_tc_conds, array_elements(all_tc_conds));
595 #endif
596   /* Extra instance is allocated to avoid false sharing */
597   if (!(tc= new Table_cache_instance[tc_instances + 1]))
598     DBUG_RETURN(true);
599   tdc_inited= true;
600   mysql_mutex_init(key_LOCK_unused_shares, &LOCK_unused_shares,
601                    MY_MUTEX_INIT_FAST);
602   lf_hash_init(&tdc_hash, sizeof(TDC_element) +
603                           sizeof(Share_free_tables) * (tc_instances - 1),
604                LF_HASH_UNIQUE, 0, 0,
605                (my_hash_get_key) tdc_hash_key,
606                &my_charset_bin);
607   tdc_hash.alloc.constructor= lf_alloc_constructor;
608   tdc_hash.alloc.destructor= lf_alloc_destructor;
609   tdc_hash.initializer= (lf_hash_initializer) tdc_hash_initializer;
610   DBUG_RETURN(false);
611 }
612 
613 
614 /**
615   Notify table definition cache that process of shutting down server
616   has started so it has to keep number of TABLE and TABLE_SHARE objects
617   minimal in order to reduce number of references to pluggable engines.
618 */
619 
tdc_start_shutdown(void)620 void tdc_start_shutdown(void)
621 {
622   DBUG_ENTER("tdc_start_shutdown");
623   if (tdc_inited)
624   {
625     /*
626       Ensure that TABLE and TABLE_SHARE objects which are created for
627       tables that are open during process of plugins' shutdown are
628       immediately released. This keeps number of references to engine
629       plugins minimal and allows shutdown to proceed smoothly.
630     */
631     tdc_size= 0;
632     tc_size= 0;
633     /* Free all cached but unused TABLEs and TABLE_SHAREs. */
634     purge_tables();
635   }
636   DBUG_VOID_RETURN;
637 }
638 
639 
640 /**
641   Deinitialize table definition cache.
642 */
643 
tdc_deinit(void)644 void tdc_deinit(void)
645 {
646   DBUG_ENTER("tdc_deinit");
647   if (tdc_inited)
648   {
649     tdc_inited= false;
650     lf_hash_destroy(&tdc_hash);
651     mysql_mutex_destroy(&LOCK_unused_shares);
652     delete [] tc;
653   }
654   DBUG_VOID_RETURN;
655 }
656 
657 
658 /**
659   Get number of cached table definitions.
660 
661   @return Number of cached table definitions
662 */
663 
tdc_records(void)664 ulong tdc_records(void)
665 {
666   return lf_hash_size(&tdc_hash);
667 }
668 
669 
tdc_purge(bool all)670 void tdc_purge(bool all)
671 {
672   DBUG_ENTER("tdc_purge");
673   while (all || tdc_records() > tdc_size)
674   {
675     TDC_element *element;
676 
677     mysql_mutex_lock(&LOCK_unused_shares);
678     if (!(element= unused_shares.pop_front()))
679     {
680       mysql_mutex_unlock(&LOCK_unused_shares);
681       break;
682     }
683 
684     /* Concurrent thread may start using share again, reset prev and next. */
685     element->prev= 0;
686     element->next= 0;
687     mysql_mutex_lock(&element->LOCK_table_share);
688     if (element->ref_count)
689     {
690       mysql_mutex_unlock(&element->LOCK_table_share);
691       mysql_mutex_unlock(&LOCK_unused_shares);
692       continue;
693     }
694     mysql_mutex_unlock(&LOCK_unused_shares);
695 
696     tdc_delete_share_from_hash(element);
697   }
698   DBUG_VOID_RETURN;
699 }
700 
701 
702 /**
703   Lock table share.
704 
705   Find table share with given db.table_name in table definition cache. Return
706   locked table share if found.
707 
708   Locked table share means:
709   - table share is protected against removal from table definition cache
710   - no other thread can acquire/release table share
711 
712   Caller is expected to unlock table share with tdc_unlock_share().
713 
714   @retval 0 Share not found
715   @retval MY_ERRPTR OOM
716   @retval ptr Pointer to locked table share
717 */
718 
tdc_lock_share(THD * thd,const char * db,const char * table_name)719 TDC_element *tdc_lock_share(THD *thd, const char *db, const char *table_name)
720 {
721   TDC_element *element;
722   char key[MAX_DBKEY_LENGTH];
723 
724   DBUG_ENTER("tdc_lock_share");
725   if (unlikely(fix_thd_pins(thd)))
726     DBUG_RETURN((TDC_element*) MY_ERRPTR);
727 
728   element= (TDC_element *) lf_hash_search(&tdc_hash, thd->tdc_hash_pins,
729                                           (uchar*) key,
730                                           tdc_create_key(key, db, table_name));
731   if (element)
732   {
733     mysql_mutex_lock(&element->LOCK_table_share);
734     if (unlikely(!element->share || element->share->error))
735     {
736       mysql_mutex_unlock(&element->LOCK_table_share);
737       element= 0;
738     }
739     lf_hash_search_unpin(thd->tdc_hash_pins);
740   }
741 
742   DBUG_RETURN(element);
743 }
744 
745 
746 /**
747   Unlock share locked by tdc_lock_share().
748 */
749 
tdc_unlock_share(TDC_element * element)750 void tdc_unlock_share(TDC_element *element)
751 {
752   DBUG_ENTER("tdc_unlock_share");
753   mysql_mutex_unlock(&element->LOCK_table_share);
754   DBUG_VOID_RETURN;
755 }
756 
757 
tdc_share_is_cached(THD * thd,const char * db,const char * table_name)758 int tdc_share_is_cached(THD *thd, const char *db, const char *table_name)
759 {
760   char key[MAX_DBKEY_LENGTH];
761 
762   if (unlikely(fix_thd_pins(thd)))
763     return -1;
764 
765   if (lf_hash_search(&tdc_hash, thd->tdc_hash_pins, (uchar*) key,
766                      tdc_create_key(key, db, table_name)))
767   {
768     lf_hash_search_unpin(thd->tdc_hash_pins);
769     return 1;
770   }
771   return 0;
772 }
773 
774 
775 /*
776   Get TABLE_SHARE for a table.
777 
778   tdc_acquire_share()
779   thd                   Thread handle
780   tl                    Table that should be opened
781   flags                 operation: what to open table or view
782   out_table             TABLE for the requested table
783 
784   IMPLEMENTATION
785     Get a table definition from the table definition cache.
786     If it doesn't exist, create a new from the table definition file.
787 
788   RETURN
789    0  Error
790    #  Share for table
791 */
792 
tdc_acquire_share(THD * thd,TABLE_LIST * tl,uint flags,TABLE ** out_table)793 TABLE_SHARE *tdc_acquire_share(THD *thd, TABLE_LIST *tl, uint flags,
794                                TABLE **out_table)
795 {
796   TABLE_SHARE *share;
797   TDC_element *element;
798   const char *key;
799   uint key_length= get_table_def_key(tl, &key);
800   my_hash_value_type hash_value= tl->mdl_request.key.tc_hash_value();
801   bool was_unused;
802   DBUG_ENTER("tdc_acquire_share");
803 
804   if (fix_thd_pins(thd))
805     DBUG_RETURN(0);
806 
807 retry:
808   while (!(element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash,
809                     thd->tdc_hash_pins, hash_value, (uchar*) key, key_length)))
810   {
811     LEX_STRING tmp= { const_cast<char*>(key), key_length };
812     int res= lf_hash_insert(&tdc_hash, thd->tdc_hash_pins, (uchar*) &tmp);
813 
814     if (res == -1)
815       DBUG_RETURN(0);
816     else if (res == 1)
817       continue;
818 
819     element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash,
820              thd->tdc_hash_pins, hash_value, (uchar*) key, key_length);
821     lf_hash_search_unpin(thd->tdc_hash_pins);
822     DBUG_ASSERT(element);
823 
824     if (!(share= alloc_table_share(tl->db.str, tl->table_name.str, key, key_length)))
825     {
826       lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length);
827       DBUG_RETURN(0);
828     }
829 
830     /* note that tdc_acquire_share() *always* uses discovery */
831     open_table_def(thd, share, flags | GTS_USE_DISCOVERY);
832 
833     if (checked_unlikely(share->error))
834     {
835       free_table_share(share);
836       lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length);
837       DBUG_RETURN(0);
838     }
839 
840     mysql_mutex_lock(&element->LOCK_table_share);
841     element->share= share;
842     share->tdc= element;
843     element->ref_count++;
844     element->flushed= false;
845     mysql_mutex_unlock(&element->LOCK_table_share);
846 
847     tdc_purge(false);
848     if (out_table)
849     {
850       status_var_increment(thd->status_var.table_open_cache_misses);
851       *out_table= 0;
852     }
853     share->m_psi= PSI_CALL_get_table_share(false, share);
854     goto end;
855   }
856 
857   /* cannot force discovery of a cached share */
858   DBUG_ASSERT(!(flags & GTS_FORCE_DISCOVERY));
859 
860   if (out_table && (flags & GTS_TABLE))
861   {
862     if ((*out_table= tc_acquire_table(thd, element)))
863     {
864       lf_hash_search_unpin(thd->tdc_hash_pins);
865       DBUG_ASSERT(!(flags & GTS_NOLOCK));
866       DBUG_ASSERT(element->share);
867       DBUG_ASSERT(!element->share->error);
868       DBUG_ASSERT(!element->share->is_view);
869       status_var_increment(thd->status_var.table_open_cache_hits);
870       DBUG_RETURN(element->share);
871     }
872     status_var_increment(thd->status_var.table_open_cache_misses);
873   }
874 
875   mysql_mutex_lock(&element->LOCK_table_share);
876   if (!(share= element->share))
877   {
878     mysql_mutex_unlock(&element->LOCK_table_share);
879     lf_hash_search_unpin(thd->tdc_hash_pins);
880     goto retry;
881   }
882   lf_hash_search_unpin(thd->tdc_hash_pins);
883 
884   /*
885      We found an existing table definition. Return it if we didn't get
886      an error when reading the table definition from file.
887   */
888   if (unlikely(share->error))
889   {
890     open_table_error(share, share->error, share->open_errno);
891     goto err;
892   }
893 
894   if (share->is_view && !(flags & GTS_VIEW))
895   {
896     open_table_error(share, OPEN_FRM_NOT_A_TABLE, ENOENT);
897     goto err;
898   }
899   if (!share->is_view && !(flags & GTS_TABLE))
900   {
901     open_table_error(share, OPEN_FRM_NOT_A_VIEW, ENOENT);
902     goto err;
903   }
904 
905   was_unused= !element->ref_count;
906   element->ref_count++;
907   mysql_mutex_unlock(&element->LOCK_table_share);
908   if (was_unused)
909   {
910     mysql_mutex_lock(&LOCK_unused_shares);
911     if (element->prev)
912     {
913       /*
914         Share was not used before and it was in the old_unused_share list
915         Unlink share from this list
916       */
917       DBUG_PRINT("info", ("Unlinking from not used list"));
918       unused_shares.remove(element);
919       element->next= 0;
920       element->prev= 0;
921     }
922     mysql_mutex_unlock(&LOCK_unused_shares);
923   }
924 
925 end:
926   DBUG_PRINT("exit", ("share: %p  ref_count: %u",
927                       share, share->tdc->ref_count));
928   if (flags & GTS_NOLOCK)
929   {
930     tdc_release_share(share);
931     /*
932       if GTS_NOLOCK is requested, the returned share pointer cannot be used,
933       the share it points to may go away any moment.
934       But perhaps the caller is only interested to know whether a share or
935       table existed?
936       Let's return an invalid pointer here to catch dereferencing attempts.
937     */
938     share= UNUSABLE_TABLE_SHARE;
939   }
940   DBUG_RETURN(share);
941 
942 err:
943   mysql_mutex_unlock(&element->LOCK_table_share);
944   DBUG_RETURN(0);
945 }
946 
947 
948 /**
949   Release table share acquired by tdc_acquire_share().
950 */
951 
tdc_release_share(TABLE_SHARE * share)952 void tdc_release_share(TABLE_SHARE *share)
953 {
954   DBUG_ENTER("tdc_release_share");
955 
956   mysql_mutex_lock(&share->tdc->LOCK_table_share);
957   DBUG_PRINT("enter",
958              ("share: %p  table: %s.%s  ref_count: %u",
959               share, share->db.str, share->table_name.str,
960               share->tdc->ref_count));
961   DBUG_ASSERT(share->tdc->ref_count);
962 
963   if (share->tdc->ref_count > 1)
964   {
965     share->tdc->ref_count--;
966     if (!share->is_view)
967       mysql_cond_broadcast(&share->tdc->COND_release);
968     mysql_mutex_unlock(&share->tdc->LOCK_table_share);
969     DBUG_VOID_RETURN;
970   }
971   mysql_mutex_unlock(&share->tdc->LOCK_table_share);
972 
973   mysql_mutex_lock(&LOCK_unused_shares);
974   mysql_mutex_lock(&share->tdc->LOCK_table_share);
975   if (--share->tdc->ref_count)
976   {
977     if (!share->is_view)
978       mysql_cond_broadcast(&share->tdc->COND_release);
979     mysql_mutex_unlock(&share->tdc->LOCK_table_share);
980     mysql_mutex_unlock(&LOCK_unused_shares);
981     DBUG_VOID_RETURN;
982   }
983   if (share->tdc->flushed || tdc_records() > tdc_size)
984   {
985     mysql_mutex_unlock(&LOCK_unused_shares);
986     tdc_delete_share_from_hash(share->tdc);
987     DBUG_VOID_RETURN;
988   }
989   /* Link share last in used_table_share list */
990   DBUG_PRINT("info", ("moving share to unused list"));
991   DBUG_ASSERT(share->tdc->next == 0);
992   unused_shares.push_back(share->tdc);
993   mysql_mutex_unlock(&share->tdc->LOCK_table_share);
994   mysql_mutex_unlock(&LOCK_unused_shares);
995   DBUG_VOID_RETURN;
996 }
997 
998 
tdc_remove_referenced_share(THD * thd,TABLE_SHARE * share)999 void tdc_remove_referenced_share(THD *thd, TABLE_SHARE *share)
1000 {
1001   DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, share->db.str,
1002                                              share->table_name.str,
1003                                              MDL_EXCLUSIVE));
1004   share->tdc->flush_unused(false);
1005   mysql_mutex_lock(&share->tdc->LOCK_table_share);
1006   share->tdc->wait_for_refs(1);
1007   DBUG_ASSERT(share->tdc->all_tables.is_empty());
1008   share->tdc->ref_count--;
1009   tdc_delete_share_from_hash(share->tdc);
1010 }
1011 
1012 
1013 /**
1014    Removes all TABLE instances and corresponding TABLE_SHARE
1015 
1016    @param  thd          Thread context
1017    @param  db           Name of database
1018    @param  table_name   Name of table
1019 
1020    @note It assumes that table instances are already not used by any
1021    (other) thread (this should be achieved by using meta-data locks).
1022 */
1023 
tdc_remove_table(THD * thd,const char * db,const char * table_name)1024 void tdc_remove_table(THD *thd, const char *db, const char *table_name)
1025 {
1026   TDC_element *element;
1027   DBUG_ENTER("tdc_remove_table");
1028   DBUG_PRINT("enter", ("name: %s", table_name));
1029 
1030   DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, db, table_name,
1031                                              MDL_EXCLUSIVE));
1032 
1033   mysql_mutex_lock(&LOCK_unused_shares);
1034   if (!(element= tdc_lock_share(thd, db, table_name)))
1035   {
1036     mysql_mutex_unlock(&LOCK_unused_shares);
1037     DBUG_VOID_RETURN;
1038   }
1039 
1040   DBUG_ASSERT(element != MY_ERRPTR); // What can we do about it?
1041 
1042   if (!element->ref_count)
1043   {
1044     if (element->prev)
1045     {
1046       unused_shares.remove(element);
1047       element->prev= 0;
1048       element->next= 0;
1049     }
1050     mysql_mutex_unlock(&LOCK_unused_shares);
1051 
1052     tdc_delete_share_from_hash(element);
1053     DBUG_VOID_RETURN;
1054   }
1055   mysql_mutex_unlock(&LOCK_unused_shares);
1056 
1057   element->ref_count++;
1058   mysql_mutex_unlock(&element->LOCK_table_share);
1059 
1060   /* We have to relock the mutex to avoid code duplication. Sigh. */
1061   tdc_remove_referenced_share(thd, element->share);
1062   DBUG_VOID_RETURN;
1063 }
1064 
1065 
1066 /**
1067   Check if table's share is being removed from the table definition
1068   cache and, if yes, wait until the flush is complete.
1069 
1070   @param thd             Thread context.
1071   @param table_list      Table which share should be checked.
1072   @param timeout         Timeout for waiting.
1073   @param deadlock_weight Weight of this wait for deadlock detector.
1074 
1075   @retval 0       Success. Share is up to date or has been flushed.
1076   @retval 1       Error (OOM, was killed, the wait resulted
1077                   in a deadlock or timeout). Reported.
1078 */
1079 
tdc_wait_for_old_version(THD * thd,const char * db,const char * table_name,ulong wait_timeout,uint deadlock_weight)1080 int tdc_wait_for_old_version(THD *thd, const char *db, const char *table_name,
1081                              ulong wait_timeout, uint deadlock_weight)
1082 {
1083   TDC_element *element;
1084 
1085   if (!(element= tdc_lock_share(thd, db, table_name)))
1086     return FALSE;
1087   else if (element == MY_ERRPTR)
1088     return TRUE;
1089   else if (element->flushed)
1090   {
1091     struct timespec abstime;
1092     set_timespec(abstime, wait_timeout);
1093     return element->share->wait_for_old_version(thd, &abstime, deadlock_weight);
1094   }
1095   tdc_unlock_share(element);
1096   return FALSE;
1097 }
1098 
1099 
1100 /**
1101   Iterate table definition cache.
1102 
1103   Object is protected against removal from table definition cache.
1104 
1105   @note Returned TABLE_SHARE is not guaranteed to be fully initialized:
1106   tdc_acquire_share() added new share, but didn't open it yet. If caller
1107   needs fully initializer share, it must lock table share mutex.
1108 */
1109 
1110 struct eliminate_duplicates_arg
1111 {
1112   HASH hash;
1113   MEM_ROOT root;
1114   my_hash_walk_action action;
1115   void *argument;
1116 };
1117 
1118 
eliminate_duplicates_get_key(const uchar * element,size_t * length,my_bool not_used)1119 static uchar *eliminate_duplicates_get_key(const uchar *element, size_t *length,
1120                                        my_bool not_used __attribute__((unused)))
1121 {
1122   LEX_STRING *key= (LEX_STRING *) element;
1123   *length= key->length;
1124   return (uchar *) key->str;
1125 }
1126 
1127 
eliminate_duplicates(TDC_element * element,eliminate_duplicates_arg * arg)1128 static my_bool eliminate_duplicates(TDC_element *element,
1129                                     eliminate_duplicates_arg *arg)
1130 {
1131   LEX_STRING *key= (LEX_STRING *) alloc_root(&arg->root, sizeof(LEX_STRING));
1132 
1133   if (!key || !(key->str= (char*) memdup_root(&arg->root, element->m_key,
1134                                               element->m_key_length)))
1135     return TRUE;
1136 
1137   key->length= element->m_key_length;
1138 
1139   if (my_hash_insert(&arg->hash, (uchar *) key))
1140     return FALSE;
1141 
1142   return arg->action(element, arg->argument);
1143 }
1144 
1145 
tdc_iterate(THD * thd,my_hash_walk_action action,void * argument,bool no_dups)1146 int tdc_iterate(THD *thd, my_hash_walk_action action, void *argument,
1147                 bool no_dups)
1148 {
1149   eliminate_duplicates_arg no_dups_argument;
1150   LF_PINS *pins;
1151   myf alloc_flags= 0;
1152   uint hash_flags= HASH_UNIQUE;
1153   int res;
1154 
1155   if (thd)
1156   {
1157     fix_thd_pins(thd);
1158     pins= thd->tdc_hash_pins;
1159     alloc_flags= MY_THREAD_SPECIFIC;
1160     hash_flags|= HASH_THREAD_SPECIFIC;
1161   }
1162   else
1163     pins= lf_hash_get_pins(&tdc_hash);
1164 
1165   if (!pins)
1166     return ER_OUTOFMEMORY;
1167 
1168   if (no_dups)
1169   {
1170     init_alloc_root(PSI_INSTRUMENT_ME, &no_dups_argument.root, 4096, 4096, MYF(alloc_flags));
1171     my_hash_init(PSI_INSTRUMENT_ME, &no_dups_argument.hash, &my_charset_bin,
1172                  tdc_records(), 0, 0, eliminate_duplicates_get_key, 0,
1173                  hash_flags);
1174     no_dups_argument.action= action;
1175     no_dups_argument.argument= argument;
1176     action= (my_hash_walk_action) eliminate_duplicates;
1177     argument= &no_dups_argument;
1178   }
1179 
1180   res= lf_hash_iterate(&tdc_hash, pins, action, argument);
1181 
1182   if (!thd)
1183     lf_hash_put_pins(pins);
1184 
1185   if (no_dups)
1186   {
1187     my_hash_free(&no_dups_argument.hash);
1188     free_root(&no_dups_argument.root, MYF(0));
1189   }
1190   return res;
1191 }
1192 
1193 
show_tc_active_instances(THD * thd,SHOW_VAR * var,char * buff,enum enum_var_type scope)1194 int show_tc_active_instances(THD *thd, SHOW_VAR *var, char *buff,
1195                              enum enum_var_type scope)
1196 {
1197   var->type= SHOW_UINT;
1198   var->value= buff;
1199   *(reinterpret_cast<uint32_t*>(buff))=
1200     tc_active_instances.load(std::memory_order_relaxed);
1201   return 0;
1202 }
1203 
1204 
1205 /**
1206   Waits until ref_count goes down to given number
1207 
1208   @param  my_refs  Number of references owned by the caller
1209 
1210   Caller must own at least one TABLE_SHARE reference.
1211 
1212   Even though current thread holds exclusive metadata lock on this share,
1213   concurrent FLUSH TABLES threads may be in process of closing unused table
1214   instances belonging to this share. E.g.:
1215   thr1 (FLUSH TABLES): table= share->tdc.free_tables.pop_front();
1216   thr1 (FLUSH TABLES): share->tdc.all_tables.remove(table);
1217   thr2 (ALTER TABLE): tdc_remove_table();
1218   thr1 (FLUSH TABLES): intern_close_table(table);
1219 
1220   Current remove type assumes that all table instances (except for those
1221   that are owned by current thread) must be closed before
1222   thd_remove_table() returns. Wait for such tables now.
1223 
1224   intern_close_table() decrements ref_count and signals COND_release. When
1225   ref_count drops down to number of references owned by current thread
1226   waiting is completed.
1227 
1228   Unfortunately TABLE_SHARE::wait_for_old_version() cannot be used here
1229   because it waits for all table instances, whereas we have to wait only
1230   for those that are not owned by current thread.
1231 */
1232 
wait_for_refs(uint my_refs)1233 void TDC_element::wait_for_refs(uint my_refs)
1234 {
1235   while (ref_count > my_refs)
1236     mysql_cond_wait(&COND_release, &LOCK_table_share);
1237 }
1238 
1239 
1240 /**
1241   Flushes unused TABLE instances
1242 
1243   @param  thd          Thread context
1244   @param  mark_flushed Whether to destroy TABLE_SHARE when released
1245 
1246   Caller is allowed to own used TABLE instances.
1247   There must be no TABLE objects used by other threads and caller must own
1248   exclusive metadata lock on the table.
1249 */
1250 
flush(THD * thd,bool mark_flushed)1251 void TDC_element::flush(THD *thd, bool mark_flushed)
1252 {
1253   DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, share->db.str,
1254                                              share->table_name.str,
1255                                              MDL_EXCLUSIVE));
1256 
1257   flush_unused(mark_flushed);
1258 
1259   mysql_mutex_lock(&LOCK_table_share);
1260   All_share_tables_list::Iterator it(all_tables);
1261   uint my_refs= 0;
1262   while (auto table= it++)
1263   {
1264     if (table->in_use == thd)
1265       my_refs++;
1266   }
1267   wait_for_refs(my_refs);
1268 #ifndef DBUG_OFF
1269   it.rewind();
1270   while (auto table= it++)
1271     DBUG_ASSERT(table->in_use == thd);
1272 #endif
1273   mysql_mutex_unlock(&LOCK_table_share);
1274 }
1275 
1276 
1277 /**
1278   Flushes unused TABLE instances
1279 */
1280 
flush_unused(bool mark_flushed)1281 void TDC_element::flush_unused(bool mark_flushed)
1282 {
1283   Share_free_tables::List purge_tables;
1284 
1285   mysql_mutex_lock(&LOCK_table_share);
1286   if (mark_flushed)
1287     flushed= true;
1288   tc_remove_all_unused_tables(this, &purge_tables);
1289   mysql_mutex_unlock(&LOCK_table_share);
1290 
1291   while (auto table= purge_tables.pop_front())
1292     intern_close_table(table);
1293 }
1294