1 /* Copyright (c) 2000, 2012, Oracle and/or its affiliates.
2 Copyright (c) 2010, 2011 Monty Program Ab
3 Copyright (C) 2013 Sergey Vojtovich and MariaDB Foundation
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; version 2 of the License.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */
17
18 /**
19 @file
20 Table definition cache and table cache implementation.
21
22 Table definition cache actions:
23 - add new TABLE_SHARE object to cache (tdc_acquire_share())
24 - acquire TABLE_SHARE object from cache (tdc_acquire_share())
25 - release TABLE_SHARE object to cache (tdc_release_share())
26 - purge unused TABLE_SHARE objects from cache (tdc_purge())
27 - remove TABLE_SHARE object from cache (tdc_remove_table())
28 - get number of TABLE_SHARE objects in cache (tdc_records())
29
30 Table cache actions:
31 - add new TABLE object to cache (tc_add_table())
32 - acquire TABLE object from cache (tc_acquire_table())
33 - release TABLE object to cache (tc_release_table())
34 - purge unused TABLE objects from cache (tc_purge())
35 - purge unused TABLE objects of a table from cache (tdc_remove_table())
36 - get number of TABLE objects in cache (tc_records())
37
38 Dependencies:
39 - close_cached_tables(): flush tables on shutdown
40 - alloc_table_share()
41 - free_table_share()
42
43 Table cache invariants:
44 - TABLE_SHARE::free_tables shall not contain objects with TABLE::in_use != 0
45 - TABLE_SHARE::free_tables shall not receive new objects if
46 TABLE_SHARE::tdc.flushed is true
47 */
48
49 #include "mariadb.h"
50 #include "lf.h"
51 #include "table.h"
52 #include "sql_base.h"
53
54
55 /** Configuration. */
56 ulong tdc_size; /**< Table definition cache threshold for LRU eviction. */
57 ulong tc_size; /**< Table cache threshold for LRU eviction. */
58 uint32 tc_instances;
59 static std::atomic<uint32_t> tc_active_instances(1);
60 static std::atomic<bool> tc_contention_warning_reported;
61
62 /** Data collections. */
63 static LF_HASH tdc_hash; /**< Collection of TABLE_SHARE objects. */
64 /** Collection of unused TABLE_SHARE objects. */
65 static
66 I_P_List <TDC_element,
67 I_P_List_adapter<TDC_element, &TDC_element::next, &TDC_element::prev>,
68 I_P_List_null_counter,
69 I_P_List_fast_push_back<TDC_element> > unused_shares;
70
71 static bool tdc_inited;
72
73
74 /**
75 Protects unused shares list.
76
77 TDC_element::prev
78 TDC_element::next
79 unused_shares
80 */
81
82 static mysql_mutex_t LOCK_unused_shares;
83
84 #ifdef HAVE_PSI_INTERFACE
85 static PSI_mutex_key key_LOCK_unused_shares, key_TABLE_SHARE_LOCK_table_share,
86 key_LOCK_table_cache;
87 static PSI_mutex_info all_tc_mutexes[]=
88 {
89 { &key_LOCK_unused_shares, "LOCK_unused_shares", PSI_FLAG_GLOBAL },
90 { &key_TABLE_SHARE_LOCK_table_share, "TABLE_SHARE::tdc.LOCK_table_share", 0 },
91 { &key_LOCK_table_cache, "LOCK_table_cache", 0 }
92 };
93
94 static PSI_cond_key key_TABLE_SHARE_COND_release;
95 static PSI_cond_info all_tc_conds[]=
96 {
97 { &key_TABLE_SHARE_COND_release, "TABLE_SHARE::tdc.COND_release", 0 }
98 };
99 #endif
100
101
fix_thd_pins(THD * thd)102 static int fix_thd_pins(THD *thd)
103 {
104 return thd->tdc_hash_pins ? 0 :
105 (thd->tdc_hash_pins= lf_hash_get_pins(&tdc_hash)) == 0;
106 }
107
108
109 /*
110 Auxiliary routines for manipulating with per-share all/unused lists
111 and tc_count counter.
112 Responsible for preserving invariants between those lists, counter
113 and TABLE::in_use member.
114 In fact those routines implement sort of implicit table cache as
115 part of table definition cache.
116 */
117
118 struct Table_cache_instance
119 {
120 /**
121 Protects free_tables (TABLE::global_free_next and TABLE::global_free_prev),
122 records, Share_free_tables::List (TABLE::prev and TABLE::next),
123 TABLE::in_use.
124 */
125 mysql_mutex_t LOCK_table_cache;
126 I_P_List <TABLE, I_P_List_adapter<TABLE, &TABLE::global_free_next,
127 &TABLE::global_free_prev>,
128 I_P_List_null_counter, I_P_List_fast_push_back<TABLE> >
129 free_tables;
130 ulong records;
131 uint mutex_waits;
132 uint mutex_nowaits;
133 /** Avoid false sharing between instances */
134 char pad[CPU_LEVEL1_DCACHE_LINESIZE];
135
Table_cache_instanceTable_cache_instance136 Table_cache_instance(): records(0), mutex_waits(0), mutex_nowaits(0)
137 {
138 mysql_mutex_init(key_LOCK_table_cache, &LOCK_table_cache,
139 MY_MUTEX_INIT_FAST);
140 }
141
~Table_cache_instanceTable_cache_instance142 ~Table_cache_instance()
143 {
144 mysql_mutex_destroy(&LOCK_table_cache);
145 DBUG_ASSERT(free_tables.is_empty());
146 DBUG_ASSERT(records == 0);
147 }
148
149 /**
150 Lock table cache mutex and check contention.
151
152 Instance is considered contested if more than 20% of mutex acquisiotions
153 can't be served immediately. Up to 100 000 probes may be performed to avoid
154 instance activation on short sporadic peaks. 100 000 is estimated maximum
155 number of queries one instance can serve in one second.
156
157 These numbers work well on a 2 socket / 20 core / 40 threads Intel Broadwell
158 system, that is expected number of instances is activated within reasonable
159 warmup time. It may have to be adjusted for other systems.
160
161 Only TABLE object acquistion is instrumented. We intentionally avoid this
162 overhead on TABLE object release. All other table cache mutex acquistions
163 are considered out of hot path and are not instrumented either.
164 */
lock_and_check_contentionTable_cache_instance165 void lock_and_check_contention(uint32_t n_instances, uint32_t instance)
166 {
167 if (mysql_mutex_trylock(&LOCK_table_cache))
168 {
169 mysql_mutex_lock(&LOCK_table_cache);
170 if (++mutex_waits == 20000)
171 {
172 if (n_instances < tc_instances)
173 {
174 if (tc_active_instances.
175 compare_exchange_weak(n_instances, n_instances + 1,
176 std::memory_order_relaxed,
177 std::memory_order_relaxed))
178 {
179 sql_print_information("Detected table cache mutex contention at instance %d: "
180 "%d%% waits. Additional table cache instance "
181 "activated. Number of instances after "
182 "activation: %d.",
183 instance + 1,
184 mutex_waits * 100 / (mutex_nowaits + mutex_waits),
185 n_instances + 1);
186 }
187 }
188 else if (!tc_contention_warning_reported.exchange(true,
189 std::memory_order_relaxed))
190 {
191 sql_print_warning("Detected table cache mutex contention at instance %d: "
192 "%d%% waits. Additional table cache instance "
193 "cannot be activated: consider raising "
194 "table_open_cache_instances. Number of active "
195 "instances: %d.",
196 instance + 1,
197 mutex_waits * 100 / (mutex_nowaits + mutex_waits),
198 n_instances);
199 }
200 mutex_waits= 0;
201 mutex_nowaits= 0;
202 }
203 }
204 else if (++mutex_nowaits == 80000)
205 {
206 mutex_waits= 0;
207 mutex_nowaits= 0;
208 }
209 }
210 };
211
212
213 static Table_cache_instance *tc;
214
215
intern_close_table(TABLE * table)216 static void intern_close_table(TABLE *table)
217 {
218 delete table->triggers;
219 DBUG_ASSERT(table->file);
220 closefrm(table);
221 tdc_release_share(table->s);
222 my_free(table);
223 }
224
225
226 /**
227 Get number of TABLE objects (used and unused) in table cache.
228 */
229
tc_records(void)230 uint tc_records(void)
231 {
232 ulong total= 0;
233 for (uint32 i= 0; i < tc_instances; i++)
234 {
235 mysql_mutex_lock(&tc[i].LOCK_table_cache);
236 total+= tc[i].records;
237 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
238 }
239 return total;
240 }
241
242
243 /**
244 Remove TABLE object from table cache.
245 */
246
tc_remove_table(TABLE * table)247 static void tc_remove_table(TABLE *table)
248 {
249 TDC_element *element= table->s->tdc;
250
251 mysql_mutex_lock(&element->LOCK_table_share);
252 /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
253 while (element->all_tables_refs)
254 mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
255 element->all_tables.remove(table);
256 mysql_mutex_unlock(&element->LOCK_table_share);
257
258 intern_close_table(table);
259 }
260
261
tc_remove_all_unused_tables(TDC_element * element,Share_free_tables::List * purge_tables)262 static void tc_remove_all_unused_tables(TDC_element *element,
263 Share_free_tables::List *purge_tables)
264 {
265 for (uint32 i= 0; i < tc_instances; i++)
266 {
267 mysql_mutex_lock(&tc[i].LOCK_table_cache);
268 while (auto table= element->free_tables[i].list.pop_front())
269 {
270 tc[i].records--;
271 tc[i].free_tables.remove(table);
272 DBUG_ASSERT(element->all_tables_refs == 0);
273 element->all_tables.remove(table);
274 purge_tables->push_front(table);
275 }
276 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
277 }
278 }
279
280
281 /**
282 Free all unused TABLE objects.
283
284 While locked:
285 - remove unused objects from TABLE_SHARE::tdc.free_tables and
286 TABLE_SHARE::tdc.all_tables
287 - decrement tc_count
288
289 While unlocked:
290 - free resources related to unused objects
291
292 @note This is called by 'handle_manager' when one wants to
293 periodicly flush all not used tables.
294 */
295
tc_purge_callback(TDC_element * element,Share_free_tables::List * purge_tables)296 static my_bool tc_purge_callback(TDC_element *element,
297 Share_free_tables::List *purge_tables)
298 {
299 mysql_mutex_lock(&element->LOCK_table_share);
300 tc_remove_all_unused_tables(element, purge_tables);
301 mysql_mutex_unlock(&element->LOCK_table_share);
302 return FALSE;
303 }
304
305
tc_purge()306 void tc_purge()
307 {
308 Share_free_tables::List purge_tables;
309
310 tdc_iterate(0, (my_hash_walk_action) tc_purge_callback, &purge_tables);
311 while (auto table= purge_tables.pop_front())
312 intern_close_table(table);
313 }
314
315
316 /**
317 Add new TABLE object to table cache.
318
319 @pre TABLE object is used by caller.
320
321 Added object cannot be evicted or acquired.
322
323 While locked:
324 - add object to TABLE_SHARE::tdc.all_tables
325 - increment tc_count
326 - evict LRU object from table cache if we reached threshold
327
328 While unlocked:
329 - free evicted object
330 */
331
tc_add_table(THD * thd,TABLE * table)332 void tc_add_table(THD *thd, TABLE *table)
333 {
334 uint32_t i=
335 thd->thread_id % tc_active_instances.load(std::memory_order_relaxed);
336 TABLE *LRU_table= 0;
337 TDC_element *element= table->s->tdc;
338
339 DBUG_ASSERT(table->in_use == thd);
340 table->instance= i;
341 mysql_mutex_lock(&element->LOCK_table_share);
342 /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */
343 while (element->all_tables_refs)
344 mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
345 element->all_tables.push_front(table);
346 mysql_mutex_unlock(&element->LOCK_table_share);
347
348 mysql_mutex_lock(&tc[i].LOCK_table_cache);
349 if (tc[i].records == tc_size)
350 {
351 if ((LRU_table= tc[i].free_tables.pop_front()))
352 {
353 LRU_table->s->tdc->free_tables[i].list.remove(LRU_table);
354 /* Needed if MDL deadlock detector chimes in before tc_remove_table() */
355 LRU_table->in_use= thd;
356 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
357 /* Keep out of locked LOCK_table_cache */
358 tc_remove_table(LRU_table);
359 }
360 else
361 {
362 tc[i].records++;
363 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
364 }
365 /* Keep out of locked LOCK_table_cache */
366 status_var_increment(thd->status_var.table_open_cache_overflows);
367 }
368 else
369 {
370 tc[i].records++;
371 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
372 }
373 }
374
375
376 /**
377 Acquire TABLE object from table cache.
378
379 @pre share must be protected against removal.
380
381 Acquired object cannot be evicted or acquired again.
382
383 @return TABLE object, or NULL if no unused objects.
384 */
385
tc_acquire_table(THD * thd,TDC_element * element)386 TABLE *tc_acquire_table(THD *thd, TDC_element *element)
387 {
388 uint32_t n_instances= tc_active_instances.load(std::memory_order_relaxed);
389 uint32_t i= thd->thread_id % n_instances;
390 TABLE *table;
391
392 tc[i].lock_and_check_contention(n_instances, i);
393 table= element->free_tables[i].list.pop_front();
394 if (table)
395 {
396 DBUG_ASSERT(!table->in_use);
397 table->in_use= thd;
398 /* The ex-unused table must be fully functional. */
399 DBUG_ASSERT(table->db_stat && table->file);
400 /* The children must be detached from the table. */
401 DBUG_ASSERT(!table->file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN));
402 tc[i].free_tables.remove(table);
403 }
404 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
405 return table;
406 }
407
408
409 /**
410 Release TABLE object to table cache.
411
412 @pre object is used by caller.
413
414 Released object may be evicted or acquired again.
415
416 While locked:
417 - if object is marked for purge, decrement tc_count
418 - add object to TABLE_SHARE::tdc.free_tables
419 - evict LRU object from table cache if we reached threshold
420
421 While unlocked:
422 - mark object not in use by any thread
423 - free evicted/purged object
424
425 @note Another thread may mark share for purge any moment (even
426 after version check). It means to-be-purged object may go to
427 unused lists. This other thread is expected to call tc_purge(),
428 which is synchronized with us on TABLE_SHARE::tdc.LOCK_table_share.
429
430 @return
431 @retval true object purged
432 @retval false object released
433 */
434
tc_release_table(TABLE * table)435 void tc_release_table(TABLE *table)
436 {
437 uint32 i= table->instance;
438 DBUG_ENTER("tc_release_table");
439 DBUG_ASSERT(table->in_use);
440 DBUG_ASSERT(table->file);
441 DBUG_ASSERT(!table->pos_in_locked_tables);
442
443 mysql_mutex_lock(&tc[i].LOCK_table_cache);
444 if (table->needs_reopen() || table->s->tdc->flushed ||
445 tc[i].records > tc_size)
446 {
447 tc[i].records--;
448 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
449 tc_remove_table(table);
450 }
451 else
452 {
453 table->in_use= 0;
454 table->s->tdc->free_tables[i].list.push_front(table);
455 tc[i].free_tables.push_back(table);
456 mysql_mutex_unlock(&tc[i].LOCK_table_cache);
457 }
458 DBUG_VOID_RETURN;
459 }
460
461
tdc_assert_clean_share(TDC_element * element)462 static void tdc_assert_clean_share(TDC_element *element)
463 {
464 DBUG_ASSERT(element->share == 0);
465 DBUG_ASSERT(element->ref_count == 0);
466 DBUG_ASSERT(element->m_flush_tickets.is_empty());
467 DBUG_ASSERT(element->all_tables.is_empty());
468 #ifndef DBUG_OFF
469 for (uint32 i= 0; i < tc_instances; i++)
470 DBUG_ASSERT(element->free_tables[i].list.is_empty());
471 #endif
472 DBUG_ASSERT(element->all_tables_refs == 0);
473 DBUG_ASSERT(element->next == 0);
474 DBUG_ASSERT(element->prev == 0);
475 }
476
477
478 /**
479 Delete share from hash and free share object.
480 */
481
tdc_delete_share_from_hash(TDC_element * element)482 static void tdc_delete_share_from_hash(TDC_element *element)
483 {
484 THD *thd= current_thd;
485 LF_PINS *pins;
486 TABLE_SHARE *share;
487 DBUG_ENTER("tdc_delete_share_from_hash");
488
489 mysql_mutex_assert_owner(&element->LOCK_table_share);
490 share= element->share;
491 DBUG_ASSERT(share);
492 element->share= 0;
493 PSI_CALL_release_table_share(share->m_psi);
494 share->m_psi= 0;
495
496 if (!element->m_flush_tickets.is_empty())
497 {
498 Wait_for_flush_list::Iterator it(element->m_flush_tickets);
499 Wait_for_flush *ticket;
500 while ((ticket= it++))
501 (void) ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED);
502
503 do
504 {
505 mysql_cond_wait(&element->COND_release, &element->LOCK_table_share);
506 } while (!element->m_flush_tickets.is_empty());
507 }
508
509 mysql_mutex_unlock(&element->LOCK_table_share);
510
511 if (thd)
512 {
513 fix_thd_pins(thd);
514 pins= thd->tdc_hash_pins;
515 }
516 else
517 pins= lf_hash_get_pins(&tdc_hash);
518
519 DBUG_ASSERT(pins); // What can we do about it?
520 tdc_assert_clean_share(element);
521 lf_hash_delete(&tdc_hash, pins, element->m_key, element->m_key_length);
522 if (!thd)
523 lf_hash_put_pins(pins);
524 free_table_share(share);
525 DBUG_VOID_RETURN;
526 }
527
528
529 /**
530 Prepeare table share for use with table definition cache.
531 */
532
lf_alloc_constructor(uchar * arg)533 static void lf_alloc_constructor(uchar *arg)
534 {
535 TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD);
536 DBUG_ENTER("lf_alloc_constructor");
537 mysql_mutex_init(key_TABLE_SHARE_LOCK_table_share,
538 &element->LOCK_table_share, MY_MUTEX_INIT_FAST);
539 mysql_cond_init(key_TABLE_SHARE_COND_release, &element->COND_release, 0);
540 element->m_flush_tickets.empty();
541 element->all_tables.empty();
542 for (uint32 i= 0; i < tc_instances; i++)
543 element->free_tables[i].list.empty();
544 element->all_tables_refs= 0;
545 element->share= 0;
546 element->ref_count= 0;
547 element->next= 0;
548 element->prev= 0;
549 DBUG_VOID_RETURN;
550 }
551
552
553 /**
554 Release table definition cache specific resources of table share.
555 */
556
lf_alloc_destructor(uchar * arg)557 static void lf_alloc_destructor(uchar *arg)
558 {
559 TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD);
560 DBUG_ENTER("lf_alloc_destructor");
561 tdc_assert_clean_share(element);
562 mysql_cond_destroy(&element->COND_release);
563 mysql_mutex_destroy(&element->LOCK_table_share);
564 DBUG_VOID_RETURN;
565 }
566
567
tdc_hash_initializer(LF_HASH *,TDC_element * element,LEX_STRING * key)568 static void tdc_hash_initializer(LF_HASH *,
569 TDC_element *element, LEX_STRING *key)
570 {
571 memcpy(element->m_key, key->str, key->length);
572 element->m_key_length= (uint)key->length;
573 tdc_assert_clean_share(element);
574 }
575
576
tdc_hash_key(const TDC_element * element,size_t * length,my_bool)577 static uchar *tdc_hash_key(const TDC_element *element, size_t *length,
578 my_bool)
579 {
580 *length= element->m_key_length;
581 return (uchar*) element->m_key;
582 }
583
584
585 /**
586 Initialize table definition cache.
587 */
588
tdc_init(void)589 bool tdc_init(void)
590 {
591 DBUG_ENTER("tdc_init");
592 #ifdef HAVE_PSI_INTERFACE
593 mysql_mutex_register("sql", all_tc_mutexes, array_elements(all_tc_mutexes));
594 mysql_cond_register("sql", all_tc_conds, array_elements(all_tc_conds));
595 #endif
596 /* Extra instance is allocated to avoid false sharing */
597 if (!(tc= new Table_cache_instance[tc_instances + 1]))
598 DBUG_RETURN(true);
599 tdc_inited= true;
600 mysql_mutex_init(key_LOCK_unused_shares, &LOCK_unused_shares,
601 MY_MUTEX_INIT_FAST);
602 lf_hash_init(&tdc_hash, sizeof(TDC_element) +
603 sizeof(Share_free_tables) * (tc_instances - 1),
604 LF_HASH_UNIQUE, 0, 0,
605 (my_hash_get_key) tdc_hash_key,
606 &my_charset_bin);
607 tdc_hash.alloc.constructor= lf_alloc_constructor;
608 tdc_hash.alloc.destructor= lf_alloc_destructor;
609 tdc_hash.initializer= (lf_hash_initializer) tdc_hash_initializer;
610 DBUG_RETURN(false);
611 }
612
613
614 /**
615 Notify table definition cache that process of shutting down server
616 has started so it has to keep number of TABLE and TABLE_SHARE objects
617 minimal in order to reduce number of references to pluggable engines.
618 */
619
tdc_start_shutdown(void)620 void tdc_start_shutdown(void)
621 {
622 DBUG_ENTER("tdc_start_shutdown");
623 if (tdc_inited)
624 {
625 /*
626 Ensure that TABLE and TABLE_SHARE objects which are created for
627 tables that are open during process of plugins' shutdown are
628 immediately released. This keeps number of references to engine
629 plugins minimal and allows shutdown to proceed smoothly.
630 */
631 tdc_size= 0;
632 tc_size= 0;
633 /* Free all cached but unused TABLEs and TABLE_SHAREs. */
634 purge_tables();
635 }
636 DBUG_VOID_RETURN;
637 }
638
639
640 /**
641 Deinitialize table definition cache.
642 */
643
tdc_deinit(void)644 void tdc_deinit(void)
645 {
646 DBUG_ENTER("tdc_deinit");
647 if (tdc_inited)
648 {
649 tdc_inited= false;
650 lf_hash_destroy(&tdc_hash);
651 mysql_mutex_destroy(&LOCK_unused_shares);
652 delete [] tc;
653 }
654 DBUG_VOID_RETURN;
655 }
656
657
658 /**
659 Get number of cached table definitions.
660
661 @return Number of cached table definitions
662 */
663
tdc_records(void)664 ulong tdc_records(void)
665 {
666 return lf_hash_size(&tdc_hash);
667 }
668
669
tdc_purge(bool all)670 void tdc_purge(bool all)
671 {
672 DBUG_ENTER("tdc_purge");
673 while (all || tdc_records() > tdc_size)
674 {
675 TDC_element *element;
676
677 mysql_mutex_lock(&LOCK_unused_shares);
678 if (!(element= unused_shares.pop_front()))
679 {
680 mysql_mutex_unlock(&LOCK_unused_shares);
681 break;
682 }
683
684 /* Concurrent thread may start using share again, reset prev and next. */
685 element->prev= 0;
686 element->next= 0;
687 mysql_mutex_lock(&element->LOCK_table_share);
688 if (element->ref_count)
689 {
690 mysql_mutex_unlock(&element->LOCK_table_share);
691 mysql_mutex_unlock(&LOCK_unused_shares);
692 continue;
693 }
694 mysql_mutex_unlock(&LOCK_unused_shares);
695
696 tdc_delete_share_from_hash(element);
697 }
698 DBUG_VOID_RETURN;
699 }
700
701
702 /**
703 Lock table share.
704
705 Find table share with given db.table_name in table definition cache. Return
706 locked table share if found.
707
708 Locked table share means:
709 - table share is protected against removal from table definition cache
710 - no other thread can acquire/release table share
711
712 Caller is expected to unlock table share with tdc_unlock_share().
713
714 @retval 0 Share not found
715 @retval MY_ERRPTR OOM
716 @retval ptr Pointer to locked table share
717 */
718
tdc_lock_share(THD * thd,const char * db,const char * table_name)719 TDC_element *tdc_lock_share(THD *thd, const char *db, const char *table_name)
720 {
721 TDC_element *element;
722 char key[MAX_DBKEY_LENGTH];
723
724 DBUG_ENTER("tdc_lock_share");
725 if (unlikely(fix_thd_pins(thd)))
726 DBUG_RETURN((TDC_element*) MY_ERRPTR);
727
728 element= (TDC_element *) lf_hash_search(&tdc_hash, thd->tdc_hash_pins,
729 (uchar*) key,
730 tdc_create_key(key, db, table_name));
731 if (element)
732 {
733 mysql_mutex_lock(&element->LOCK_table_share);
734 if (unlikely(!element->share || element->share->error))
735 {
736 mysql_mutex_unlock(&element->LOCK_table_share);
737 element= 0;
738 }
739 lf_hash_search_unpin(thd->tdc_hash_pins);
740 }
741
742 DBUG_RETURN(element);
743 }
744
745
746 /**
747 Unlock share locked by tdc_lock_share().
748 */
749
tdc_unlock_share(TDC_element * element)750 void tdc_unlock_share(TDC_element *element)
751 {
752 DBUG_ENTER("tdc_unlock_share");
753 mysql_mutex_unlock(&element->LOCK_table_share);
754 DBUG_VOID_RETURN;
755 }
756
757
tdc_share_is_cached(THD * thd,const char * db,const char * table_name)758 int tdc_share_is_cached(THD *thd, const char *db, const char *table_name)
759 {
760 char key[MAX_DBKEY_LENGTH];
761
762 if (unlikely(fix_thd_pins(thd)))
763 return -1;
764
765 if (lf_hash_search(&tdc_hash, thd->tdc_hash_pins, (uchar*) key,
766 tdc_create_key(key, db, table_name)))
767 {
768 lf_hash_search_unpin(thd->tdc_hash_pins);
769 return 1;
770 }
771 return 0;
772 }
773
774
775 /*
776 Get TABLE_SHARE for a table.
777
778 tdc_acquire_share()
779 thd Thread handle
780 tl Table that should be opened
781 flags operation: what to open table or view
782 out_table TABLE for the requested table
783
784 IMPLEMENTATION
785 Get a table definition from the table definition cache.
786 If it doesn't exist, create a new from the table definition file.
787
788 RETURN
789 0 Error
790 # Share for table
791 */
792
tdc_acquire_share(THD * thd,TABLE_LIST * tl,uint flags,TABLE ** out_table)793 TABLE_SHARE *tdc_acquire_share(THD *thd, TABLE_LIST *tl, uint flags,
794 TABLE **out_table)
795 {
796 TABLE_SHARE *share;
797 TDC_element *element;
798 const char *key;
799 uint key_length= get_table_def_key(tl, &key);
800 my_hash_value_type hash_value= tl->mdl_request.key.tc_hash_value();
801 bool was_unused;
802 DBUG_ENTER("tdc_acquire_share");
803
804 if (fix_thd_pins(thd))
805 DBUG_RETURN(0);
806
807 retry:
808 while (!(element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash,
809 thd->tdc_hash_pins, hash_value, (uchar*) key, key_length)))
810 {
811 LEX_STRING tmp= { const_cast<char*>(key), key_length };
812 int res= lf_hash_insert(&tdc_hash, thd->tdc_hash_pins, (uchar*) &tmp);
813
814 if (res == -1)
815 DBUG_RETURN(0);
816 else if (res == 1)
817 continue;
818
819 element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash,
820 thd->tdc_hash_pins, hash_value, (uchar*) key, key_length);
821 lf_hash_search_unpin(thd->tdc_hash_pins);
822 DBUG_ASSERT(element);
823
824 if (!(share= alloc_table_share(tl->db.str, tl->table_name.str, key, key_length)))
825 {
826 lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length);
827 DBUG_RETURN(0);
828 }
829
830 /* note that tdc_acquire_share() *always* uses discovery */
831 open_table_def(thd, share, flags | GTS_USE_DISCOVERY);
832
833 if (checked_unlikely(share->error))
834 {
835 free_table_share(share);
836 lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length);
837 DBUG_RETURN(0);
838 }
839
840 mysql_mutex_lock(&element->LOCK_table_share);
841 element->share= share;
842 share->tdc= element;
843 element->ref_count++;
844 element->flushed= false;
845 mysql_mutex_unlock(&element->LOCK_table_share);
846
847 tdc_purge(false);
848 if (out_table)
849 {
850 status_var_increment(thd->status_var.table_open_cache_misses);
851 *out_table= 0;
852 }
853 share->m_psi= PSI_CALL_get_table_share(false, share);
854 goto end;
855 }
856
857 /* cannot force discovery of a cached share */
858 DBUG_ASSERT(!(flags & GTS_FORCE_DISCOVERY));
859
860 if (out_table && (flags & GTS_TABLE))
861 {
862 if ((*out_table= tc_acquire_table(thd, element)))
863 {
864 lf_hash_search_unpin(thd->tdc_hash_pins);
865 DBUG_ASSERT(!(flags & GTS_NOLOCK));
866 DBUG_ASSERT(element->share);
867 DBUG_ASSERT(!element->share->error);
868 DBUG_ASSERT(!element->share->is_view);
869 status_var_increment(thd->status_var.table_open_cache_hits);
870 DBUG_RETURN(element->share);
871 }
872 status_var_increment(thd->status_var.table_open_cache_misses);
873 }
874
875 mysql_mutex_lock(&element->LOCK_table_share);
876 if (!(share= element->share))
877 {
878 mysql_mutex_unlock(&element->LOCK_table_share);
879 lf_hash_search_unpin(thd->tdc_hash_pins);
880 goto retry;
881 }
882 lf_hash_search_unpin(thd->tdc_hash_pins);
883
884 /*
885 We found an existing table definition. Return it if we didn't get
886 an error when reading the table definition from file.
887 */
888 if (unlikely(share->error))
889 {
890 open_table_error(share, share->error, share->open_errno);
891 goto err;
892 }
893
894 if (share->is_view && !(flags & GTS_VIEW))
895 {
896 open_table_error(share, OPEN_FRM_NOT_A_TABLE, ENOENT);
897 goto err;
898 }
899 if (!share->is_view && !(flags & GTS_TABLE))
900 {
901 open_table_error(share, OPEN_FRM_NOT_A_VIEW, ENOENT);
902 goto err;
903 }
904
905 was_unused= !element->ref_count;
906 element->ref_count++;
907 mysql_mutex_unlock(&element->LOCK_table_share);
908 if (was_unused)
909 {
910 mysql_mutex_lock(&LOCK_unused_shares);
911 if (element->prev)
912 {
913 /*
914 Share was not used before and it was in the old_unused_share list
915 Unlink share from this list
916 */
917 DBUG_PRINT("info", ("Unlinking from not used list"));
918 unused_shares.remove(element);
919 element->next= 0;
920 element->prev= 0;
921 }
922 mysql_mutex_unlock(&LOCK_unused_shares);
923 }
924
925 end:
926 DBUG_PRINT("exit", ("share: %p ref_count: %u",
927 share, share->tdc->ref_count));
928 if (flags & GTS_NOLOCK)
929 {
930 tdc_release_share(share);
931 /*
932 if GTS_NOLOCK is requested, the returned share pointer cannot be used,
933 the share it points to may go away any moment.
934 But perhaps the caller is only interested to know whether a share or
935 table existed?
936 Let's return an invalid pointer here to catch dereferencing attempts.
937 */
938 share= UNUSABLE_TABLE_SHARE;
939 }
940 DBUG_RETURN(share);
941
942 err:
943 mysql_mutex_unlock(&element->LOCK_table_share);
944 DBUG_RETURN(0);
945 }
946
947
948 /**
949 Release table share acquired by tdc_acquire_share().
950 */
951
tdc_release_share(TABLE_SHARE * share)952 void tdc_release_share(TABLE_SHARE *share)
953 {
954 DBUG_ENTER("tdc_release_share");
955
956 mysql_mutex_lock(&share->tdc->LOCK_table_share);
957 DBUG_PRINT("enter",
958 ("share: %p table: %s.%s ref_count: %u",
959 share, share->db.str, share->table_name.str,
960 share->tdc->ref_count));
961 DBUG_ASSERT(share->tdc->ref_count);
962
963 if (share->tdc->ref_count > 1)
964 {
965 share->tdc->ref_count--;
966 if (!share->is_view)
967 mysql_cond_broadcast(&share->tdc->COND_release);
968 mysql_mutex_unlock(&share->tdc->LOCK_table_share);
969 DBUG_VOID_RETURN;
970 }
971 mysql_mutex_unlock(&share->tdc->LOCK_table_share);
972
973 mysql_mutex_lock(&LOCK_unused_shares);
974 mysql_mutex_lock(&share->tdc->LOCK_table_share);
975 if (--share->tdc->ref_count)
976 {
977 if (!share->is_view)
978 mysql_cond_broadcast(&share->tdc->COND_release);
979 mysql_mutex_unlock(&share->tdc->LOCK_table_share);
980 mysql_mutex_unlock(&LOCK_unused_shares);
981 DBUG_VOID_RETURN;
982 }
983 if (share->tdc->flushed || tdc_records() > tdc_size)
984 {
985 mysql_mutex_unlock(&LOCK_unused_shares);
986 tdc_delete_share_from_hash(share->tdc);
987 DBUG_VOID_RETURN;
988 }
989 /* Link share last in used_table_share list */
990 DBUG_PRINT("info", ("moving share to unused list"));
991 DBUG_ASSERT(share->tdc->next == 0);
992 unused_shares.push_back(share->tdc);
993 mysql_mutex_unlock(&share->tdc->LOCK_table_share);
994 mysql_mutex_unlock(&LOCK_unused_shares);
995 DBUG_VOID_RETURN;
996 }
997
998
tdc_remove_referenced_share(THD * thd,TABLE_SHARE * share)999 void tdc_remove_referenced_share(THD *thd, TABLE_SHARE *share)
1000 {
1001 DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, share->db.str,
1002 share->table_name.str,
1003 MDL_EXCLUSIVE));
1004 share->tdc->flush_unused(false);
1005 mysql_mutex_lock(&share->tdc->LOCK_table_share);
1006 share->tdc->wait_for_refs(1);
1007 DBUG_ASSERT(share->tdc->all_tables.is_empty());
1008 share->tdc->ref_count--;
1009 tdc_delete_share_from_hash(share->tdc);
1010 }
1011
1012
1013 /**
1014 Removes all TABLE instances and corresponding TABLE_SHARE
1015
1016 @param thd Thread context
1017 @param db Name of database
1018 @param table_name Name of table
1019
1020 @note It assumes that table instances are already not used by any
1021 (other) thread (this should be achieved by using meta-data locks).
1022 */
1023
tdc_remove_table(THD * thd,const char * db,const char * table_name)1024 void tdc_remove_table(THD *thd, const char *db, const char *table_name)
1025 {
1026 TDC_element *element;
1027 DBUG_ENTER("tdc_remove_table");
1028 DBUG_PRINT("enter", ("name: %s", table_name));
1029
1030 DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, db, table_name,
1031 MDL_EXCLUSIVE));
1032
1033 mysql_mutex_lock(&LOCK_unused_shares);
1034 if (!(element= tdc_lock_share(thd, db, table_name)))
1035 {
1036 mysql_mutex_unlock(&LOCK_unused_shares);
1037 DBUG_VOID_RETURN;
1038 }
1039
1040 DBUG_ASSERT(element != MY_ERRPTR); // What can we do about it?
1041
1042 if (!element->ref_count)
1043 {
1044 if (element->prev)
1045 {
1046 unused_shares.remove(element);
1047 element->prev= 0;
1048 element->next= 0;
1049 }
1050 mysql_mutex_unlock(&LOCK_unused_shares);
1051
1052 tdc_delete_share_from_hash(element);
1053 DBUG_VOID_RETURN;
1054 }
1055 mysql_mutex_unlock(&LOCK_unused_shares);
1056
1057 element->ref_count++;
1058 mysql_mutex_unlock(&element->LOCK_table_share);
1059
1060 /* We have to relock the mutex to avoid code duplication. Sigh. */
1061 tdc_remove_referenced_share(thd, element->share);
1062 DBUG_VOID_RETURN;
1063 }
1064
1065
1066 /**
1067 Check if table's share is being removed from the table definition
1068 cache and, if yes, wait until the flush is complete.
1069
1070 @param thd Thread context.
1071 @param table_list Table which share should be checked.
1072 @param timeout Timeout for waiting.
1073 @param deadlock_weight Weight of this wait for deadlock detector.
1074
1075 @retval 0 Success. Share is up to date or has been flushed.
1076 @retval 1 Error (OOM, was killed, the wait resulted
1077 in a deadlock or timeout). Reported.
1078 */
1079
tdc_wait_for_old_version(THD * thd,const char * db,const char * table_name,ulong wait_timeout,uint deadlock_weight)1080 int tdc_wait_for_old_version(THD *thd, const char *db, const char *table_name,
1081 ulong wait_timeout, uint deadlock_weight)
1082 {
1083 TDC_element *element;
1084
1085 if (!(element= tdc_lock_share(thd, db, table_name)))
1086 return FALSE;
1087 else if (element == MY_ERRPTR)
1088 return TRUE;
1089 else if (element->flushed)
1090 {
1091 struct timespec abstime;
1092 set_timespec(abstime, wait_timeout);
1093 return element->share->wait_for_old_version(thd, &abstime, deadlock_weight);
1094 }
1095 tdc_unlock_share(element);
1096 return FALSE;
1097 }
1098
1099
1100 /**
1101 Iterate table definition cache.
1102
1103 Object is protected against removal from table definition cache.
1104
1105 @note Returned TABLE_SHARE is not guaranteed to be fully initialized:
1106 tdc_acquire_share() added new share, but didn't open it yet. If caller
1107 needs fully initializer share, it must lock table share mutex.
1108 */
1109
1110 struct eliminate_duplicates_arg
1111 {
1112 HASH hash;
1113 MEM_ROOT root;
1114 my_hash_walk_action action;
1115 void *argument;
1116 };
1117
1118
eliminate_duplicates_get_key(const uchar * element,size_t * length,my_bool not_used)1119 static uchar *eliminate_duplicates_get_key(const uchar *element, size_t *length,
1120 my_bool not_used __attribute__((unused)))
1121 {
1122 LEX_STRING *key= (LEX_STRING *) element;
1123 *length= key->length;
1124 return (uchar *) key->str;
1125 }
1126
1127
eliminate_duplicates(TDC_element * element,eliminate_duplicates_arg * arg)1128 static my_bool eliminate_duplicates(TDC_element *element,
1129 eliminate_duplicates_arg *arg)
1130 {
1131 LEX_STRING *key= (LEX_STRING *) alloc_root(&arg->root, sizeof(LEX_STRING));
1132
1133 if (!key || !(key->str= (char*) memdup_root(&arg->root, element->m_key,
1134 element->m_key_length)))
1135 return TRUE;
1136
1137 key->length= element->m_key_length;
1138
1139 if (my_hash_insert(&arg->hash, (uchar *) key))
1140 return FALSE;
1141
1142 return arg->action(element, arg->argument);
1143 }
1144
1145
tdc_iterate(THD * thd,my_hash_walk_action action,void * argument,bool no_dups)1146 int tdc_iterate(THD *thd, my_hash_walk_action action, void *argument,
1147 bool no_dups)
1148 {
1149 eliminate_duplicates_arg no_dups_argument;
1150 LF_PINS *pins;
1151 myf alloc_flags= 0;
1152 uint hash_flags= HASH_UNIQUE;
1153 int res;
1154
1155 if (thd)
1156 {
1157 fix_thd_pins(thd);
1158 pins= thd->tdc_hash_pins;
1159 alloc_flags= MY_THREAD_SPECIFIC;
1160 hash_flags|= HASH_THREAD_SPECIFIC;
1161 }
1162 else
1163 pins= lf_hash_get_pins(&tdc_hash);
1164
1165 if (!pins)
1166 return ER_OUTOFMEMORY;
1167
1168 if (no_dups)
1169 {
1170 init_alloc_root(PSI_INSTRUMENT_ME, &no_dups_argument.root, 4096, 4096, MYF(alloc_flags));
1171 my_hash_init(PSI_INSTRUMENT_ME, &no_dups_argument.hash, &my_charset_bin,
1172 tdc_records(), 0, 0, eliminate_duplicates_get_key, 0,
1173 hash_flags);
1174 no_dups_argument.action= action;
1175 no_dups_argument.argument= argument;
1176 action= (my_hash_walk_action) eliminate_duplicates;
1177 argument= &no_dups_argument;
1178 }
1179
1180 res= lf_hash_iterate(&tdc_hash, pins, action, argument);
1181
1182 if (!thd)
1183 lf_hash_put_pins(pins);
1184
1185 if (no_dups)
1186 {
1187 my_hash_free(&no_dups_argument.hash);
1188 free_root(&no_dups_argument.root, MYF(0));
1189 }
1190 return res;
1191 }
1192
1193
show_tc_active_instances(THD * thd,SHOW_VAR * var,char * buff,enum enum_var_type scope)1194 int show_tc_active_instances(THD *thd, SHOW_VAR *var, char *buff,
1195 enum enum_var_type scope)
1196 {
1197 var->type= SHOW_UINT;
1198 var->value= buff;
1199 *(reinterpret_cast<uint32_t*>(buff))=
1200 tc_active_instances.load(std::memory_order_relaxed);
1201 return 0;
1202 }
1203
1204
1205 /**
1206 Waits until ref_count goes down to given number
1207
1208 @param my_refs Number of references owned by the caller
1209
1210 Caller must own at least one TABLE_SHARE reference.
1211
1212 Even though current thread holds exclusive metadata lock on this share,
1213 concurrent FLUSH TABLES threads may be in process of closing unused table
1214 instances belonging to this share. E.g.:
1215 thr1 (FLUSH TABLES): table= share->tdc.free_tables.pop_front();
1216 thr1 (FLUSH TABLES): share->tdc.all_tables.remove(table);
1217 thr2 (ALTER TABLE): tdc_remove_table();
1218 thr1 (FLUSH TABLES): intern_close_table(table);
1219
1220 Current remove type assumes that all table instances (except for those
1221 that are owned by current thread) must be closed before
1222 thd_remove_table() returns. Wait for such tables now.
1223
1224 intern_close_table() decrements ref_count and signals COND_release. When
1225 ref_count drops down to number of references owned by current thread
1226 waiting is completed.
1227
1228 Unfortunately TABLE_SHARE::wait_for_old_version() cannot be used here
1229 because it waits for all table instances, whereas we have to wait only
1230 for those that are not owned by current thread.
1231 */
1232
wait_for_refs(uint my_refs)1233 void TDC_element::wait_for_refs(uint my_refs)
1234 {
1235 while (ref_count > my_refs)
1236 mysql_cond_wait(&COND_release, &LOCK_table_share);
1237 }
1238
1239
1240 /**
1241 Flushes unused TABLE instances
1242
1243 @param thd Thread context
1244 @param mark_flushed Whether to destroy TABLE_SHARE when released
1245
1246 Caller is allowed to own used TABLE instances.
1247 There must be no TABLE objects used by other threads and caller must own
1248 exclusive metadata lock on the table.
1249 */
1250
flush(THD * thd,bool mark_flushed)1251 void TDC_element::flush(THD *thd, bool mark_flushed)
1252 {
1253 DBUG_ASSERT(thd->mdl_context.is_lock_owner(MDL_key::TABLE, share->db.str,
1254 share->table_name.str,
1255 MDL_EXCLUSIVE));
1256
1257 flush_unused(mark_flushed);
1258
1259 mysql_mutex_lock(&LOCK_table_share);
1260 All_share_tables_list::Iterator it(all_tables);
1261 uint my_refs= 0;
1262 while (auto table= it++)
1263 {
1264 if (table->in_use == thd)
1265 my_refs++;
1266 }
1267 wait_for_refs(my_refs);
1268 #ifndef DBUG_OFF
1269 it.rewind();
1270 while (auto table= it++)
1271 DBUG_ASSERT(table->in_use == thd);
1272 #endif
1273 mysql_mutex_unlock(&LOCK_table_share);
1274 }
1275
1276
1277 /**
1278 Flushes unused TABLE instances
1279 */
1280
flush_unused(bool mark_flushed)1281 void TDC_element::flush_unused(bool mark_flushed)
1282 {
1283 Share_free_tables::List purge_tables;
1284
1285 mysql_mutex_lock(&LOCK_table_share);
1286 if (mark_flushed)
1287 flushed= true;
1288 tc_remove_all_unused_tables(this, &purge_tables);
1289 mysql_mutex_unlock(&LOCK_table_share);
1290
1291 while (auto table= purge_tables.pop_front())
1292 intern_close_table(table);
1293 }
1294