1 /* Copyright (c) 2000, 2012, Oracle and/or its affiliates. 2 Copyright (c) 2010, 2011 Monty Program Ab 3 Copyright (C) 2013 Sergey Vojtovich and MariaDB Foundation 4 5 This program is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; version 2 of the License. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program; if not, write to the Free Software 16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA */ 17 18 /** 19 @file 20 Table definition cache and table cache implementation. 21 22 Table definition cache actions: 23 - add new TABLE_SHARE object to cache (tdc_acquire_share()) 24 - acquire TABLE_SHARE object from cache (tdc_acquire_share()) 25 - release TABLE_SHARE object to cache (tdc_release_share()) 26 - purge unused TABLE_SHARE objects from cache (tdc_purge()) 27 - remove TABLE_SHARE object from cache (tdc_remove_table()) 28 - get number of TABLE_SHARE objects in cache (tdc_records()) 29 30 Table cache actions: 31 - add new TABLE object to cache (tc_add_table()) 32 - acquire TABLE object from cache (tc_acquire_table()) 33 - release TABLE object to cache (tc_release_table()) 34 - purge unused TABLE objects from cache (tc_purge()) 35 - purge unused TABLE objects of a table from cache (tdc_remove_table()) 36 - get number of TABLE objects in cache (tc_records()) 37 38 Dependencies: 39 - close_cached_tables(): flush tables on shutdown 40 - alloc_table_share() 41 - free_table_share() 42 43 Table cache invariants: 44 - TABLE_SHARE::free_tables shall not contain objects with TABLE::in_use != 0 45 - TABLE_SHARE::free_tables shall not receive new objects if 46 TABLE_SHARE::tdc.flushed is true 47 */ 48 49 #include "mariadb.h" 50 #include "lf.h" 51 #include "table.h" 52 #include "sql_base.h" 53 54 55 /** Configuration. */ 56 ulong tdc_size; /**< Table definition cache threshold for LRU eviction. */ 57 ulong tc_size; /**< Table cache threshold for LRU eviction. */ 58 uint32 tc_instances; 59 uint32 tc_active_instances= 1; 60 static uint32 tc_contention_warning_reported; 61 62 /** Data collections. */ 63 static LF_HASH tdc_hash; /**< Collection of TABLE_SHARE objects. */ 64 /** Collection of unused TABLE_SHARE objects. */ 65 static 66 I_P_List <TDC_element, 67 I_P_List_adapter<TDC_element, &TDC_element::next, &TDC_element::prev>, 68 I_P_List_null_counter, 69 I_P_List_fast_push_back<TDC_element> > unused_shares; 70 71 static tdc_version_t tdc_version; /* Increments on each reload */ 72 static bool tdc_inited; 73 74 75 /** 76 Protects unused shares list. 77 78 TDC_element::prev 79 TDC_element::next 80 unused_shares 81 */ 82 83 static mysql_mutex_t LOCK_unused_shares; 84 85 #ifdef HAVE_PSI_INTERFACE 86 static PSI_mutex_key key_LOCK_unused_shares, key_TABLE_SHARE_LOCK_table_share, 87 key_LOCK_table_cache; 88 static PSI_mutex_info all_tc_mutexes[]= 89 { 90 { &key_LOCK_unused_shares, "LOCK_unused_shares", PSI_FLAG_GLOBAL }, 91 { &key_TABLE_SHARE_LOCK_table_share, "TABLE_SHARE::tdc.LOCK_table_share", 0 }, 92 { &key_LOCK_table_cache, "LOCK_table_cache", 0 } 93 }; 94 95 static PSI_cond_key key_TABLE_SHARE_COND_release; 96 static PSI_cond_info all_tc_conds[]= 97 { 98 { &key_TABLE_SHARE_COND_release, "TABLE_SHARE::tdc.COND_release", 0 } 99 }; 100 #endif 101 102 103 static int fix_thd_pins(THD *thd) 104 { 105 return thd->tdc_hash_pins ? 0 : 106 (thd->tdc_hash_pins= lf_hash_get_pins(&tdc_hash)) == 0; 107 } 108 109 110 /* 111 Auxiliary routines for manipulating with per-share all/unused lists 112 and tc_count counter. 113 Responsible for preserving invariants between those lists, counter 114 and TABLE::in_use member. 115 In fact those routines implement sort of implicit table cache as 116 part of table definition cache. 117 */ 118 119 struct Table_cache_instance 120 { 121 /** 122 Protects free_tables (TABLE::global_free_next and TABLE::global_free_prev), 123 records, Share_free_tables::List (TABLE::prev and TABLE::next), 124 TABLE::in_use. 125 */ 126 mysql_mutex_t LOCK_table_cache; 127 I_P_List <TABLE, I_P_List_adapter<TABLE, &TABLE::global_free_next, 128 &TABLE::global_free_prev>, 129 I_P_List_null_counter, I_P_List_fast_push_back<TABLE> > 130 free_tables; 131 ulong records; 132 uint mutex_waits; 133 uint mutex_nowaits; 134 /** Avoid false sharing between instances */ 135 char pad[CPU_LEVEL1_DCACHE_LINESIZE]; 136 137 Table_cache_instance(): records(0), mutex_waits(0), mutex_nowaits(0) 138 { 139 mysql_mutex_init(key_LOCK_table_cache, &LOCK_table_cache, 140 MY_MUTEX_INIT_FAST); 141 } 142 143 ~Table_cache_instance() 144 { 145 mysql_mutex_destroy(&LOCK_table_cache); 146 DBUG_ASSERT(free_tables.is_empty()); 147 DBUG_ASSERT(records == 0); 148 } 149 150 /** 151 Lock table cache mutex and check contention. 152 153 Instance is considered contested if more than 20% of mutex acquisiotions 154 can't be served immediately. Up to 100 000 probes may be performed to avoid 155 instance activation on short sporadic peaks. 100 000 is estimated maximum 156 number of queries one instance can serve in one second. 157 158 These numbers work well on a 2 socket / 20 core / 40 threads Intel Broadwell 159 system, that is expected number of instances is activated within reasonable 160 warmup time. It may have to be adjusted for other systems. 161 162 Only TABLE object acquistion is instrumented. We intentionally avoid this 163 overhead on TABLE object release. All other table cache mutex acquistions 164 are considered out of hot path and are not instrumented either. 165 */ 166 void lock_and_check_contention(uint32 n_instances, uint32 instance) 167 { 168 if (mysql_mutex_trylock(&LOCK_table_cache)) 169 { 170 mysql_mutex_lock(&LOCK_table_cache); 171 if (++mutex_waits == 20000) 172 { 173 if (n_instances < tc_instances) 174 { 175 if (my_atomic_cas32_weak_explicit((int32*) &tc_active_instances, 176 (int32*) &n_instances, 177 (int32) n_instances + 1, 178 MY_MEMORY_ORDER_RELAXED, 179 MY_MEMORY_ORDER_RELAXED)) 180 { 181 sql_print_information("Detected table cache mutex contention at instance %d: " 182 "%d%% waits. Additional table cache instance " 183 "activated. Number of instances after " 184 "activation: %d.", 185 instance + 1, 186 mutex_waits * 100 / (mutex_nowaits + mutex_waits), 187 n_instances + 1); 188 } 189 } 190 else if (!my_atomic_fas32_explicit((int32*) &tc_contention_warning_reported, 191 1, MY_MEMORY_ORDER_RELAXED)) 192 { 193 sql_print_warning("Detected table cache mutex contention at instance %d: " 194 "%d%% waits. Additional table cache instance " 195 "cannot be activated: consider raising " 196 "table_open_cache_instances. Number of active " 197 "instances: %d.", 198 instance + 1, 199 mutex_waits * 100 / (mutex_nowaits + mutex_waits), 200 n_instances); 201 } 202 mutex_waits= 0; 203 mutex_nowaits= 0; 204 } 205 } 206 else if (++mutex_nowaits == 80000) 207 { 208 mutex_waits= 0; 209 mutex_nowaits= 0; 210 } 211 } 212 }; 213 214 215 static Table_cache_instance *tc; 216 217 218 static void intern_close_table(TABLE *table) 219 { 220 delete table->triggers; 221 DBUG_ASSERT(table->file); 222 closefrm(table); 223 tdc_release_share(table->s); 224 my_free(table); 225 } 226 227 228 /** 229 Get number of TABLE objects (used and unused) in table cache. 230 */ 231 232 uint tc_records(void) 233 { 234 ulong total= 0; 235 for (ulong i= 0; i < tc_instances; i++) 236 { 237 mysql_mutex_lock(&tc[i].LOCK_table_cache); 238 total+= tc[i].records; 239 mysql_mutex_unlock(&tc[i].LOCK_table_cache); 240 } 241 return total; 242 } 243 244 245 /** 246 Remove TABLE object from table cache. 247 */ 248 249 static void tc_remove_table(TABLE *table) 250 { 251 TDC_element *element= table->s->tdc; 252 253 mysql_mutex_lock(&element->LOCK_table_share); 254 /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */ 255 while (element->all_tables_refs) 256 mysql_cond_wait(&element->COND_release, &element->LOCK_table_share); 257 element->all_tables.remove(table); 258 mysql_mutex_unlock(&element->LOCK_table_share); 259 260 intern_close_table(table); 261 } 262 263 264 static void tc_remove_all_unused_tables(TDC_element *element, 265 Share_free_tables::List *purge_tables, 266 bool mark_flushed) 267 { 268 TABLE *table; 269 270 /* 271 Mark share flushed in order to ensure that it gets 272 automatically deleted once it is no longer referenced. 273 274 Note that code in TABLE_SHARE::wait_for_old_version() assumes that 275 marking share flushed is followed by purge of unused table 276 shares. 277 */ 278 if (mark_flushed) 279 element->flushed= true; 280 for (ulong i= 0; i < tc_instances; i++) 281 { 282 mysql_mutex_lock(&tc[i].LOCK_table_cache); 283 while ((table= element->free_tables[i].list.pop_front())) 284 { 285 tc[i].records--; 286 tc[i].free_tables.remove(table); 287 DBUG_ASSERT(element->all_tables_refs == 0); 288 element->all_tables.remove(table); 289 purge_tables->push_front(table); 290 } 291 mysql_mutex_unlock(&tc[i].LOCK_table_cache); 292 } 293 } 294 295 296 /** 297 Free all unused TABLE objects. 298 299 While locked: 300 - remove unused objects from TABLE_SHARE::tdc.free_tables and 301 TABLE_SHARE::tdc.all_tables 302 - decrement tc_count 303 304 While unlocked: 305 - free resources related to unused objects 306 307 @note This is called by 'handle_manager' when one wants to 308 periodicly flush all not used tables. 309 */ 310 311 struct tc_purge_arg 312 { 313 Share_free_tables::List purge_tables; 314 bool mark_flushed; 315 }; 316 317 318 static my_bool tc_purge_callback(TDC_element *element, tc_purge_arg *arg) 319 { 320 mysql_mutex_lock(&element->LOCK_table_share); 321 tc_remove_all_unused_tables(element, &arg->purge_tables, arg->mark_flushed); 322 mysql_mutex_unlock(&element->LOCK_table_share); 323 return FALSE; 324 } 325 326 327 void tc_purge(bool mark_flushed) 328 { 329 tc_purge_arg argument; 330 TABLE *table; 331 332 argument.mark_flushed= mark_flushed; 333 tdc_iterate(0, (my_hash_walk_action) tc_purge_callback, &argument); 334 while ((table= argument.purge_tables.pop_front())) 335 intern_close_table(table); 336 } 337 338 339 /** 340 Add new TABLE object to table cache. 341 342 @pre TABLE object is used by caller. 343 344 Added object cannot be evicted or acquired. 345 346 While locked: 347 - add object to TABLE_SHARE::tdc.all_tables 348 - increment tc_count 349 - evict LRU object from table cache if we reached threshold 350 351 While unlocked: 352 - free evicted object 353 */ 354 355 void tc_add_table(THD *thd, TABLE *table) 356 { 357 uint32 i= thd->thread_id % my_atomic_load32_explicit((int32*) &tc_active_instances, 358 MY_MEMORY_ORDER_RELAXED); 359 TABLE *LRU_table= 0; 360 TDC_element *element= table->s->tdc; 361 362 DBUG_ASSERT(table->in_use == thd); 363 table->instance= i; 364 mysql_mutex_lock(&element->LOCK_table_share); 365 /* Wait for MDL deadlock detector to complete traversing tdc.all_tables. */ 366 while (element->all_tables_refs) 367 mysql_cond_wait(&element->COND_release, &element->LOCK_table_share); 368 element->all_tables.push_front(table); 369 mysql_mutex_unlock(&element->LOCK_table_share); 370 371 mysql_mutex_lock(&tc[i].LOCK_table_cache); 372 if (tc[i].records == tc_size) 373 { 374 if ((LRU_table= tc[i].free_tables.pop_front())) 375 { 376 LRU_table->s->tdc->free_tables[i].list.remove(LRU_table); 377 /* Needed if MDL deadlock detector chimes in before tc_remove_table() */ 378 LRU_table->in_use= thd; 379 mysql_mutex_unlock(&tc[i].LOCK_table_cache); 380 /* Keep out of locked LOCK_table_cache */ 381 tc_remove_table(LRU_table); 382 } 383 else 384 { 385 tc[i].records++; 386 mysql_mutex_unlock(&tc[i].LOCK_table_cache); 387 } 388 /* Keep out of locked LOCK_table_cache */ 389 status_var_increment(thd->status_var.table_open_cache_overflows); 390 } 391 else 392 { 393 tc[i].records++; 394 mysql_mutex_unlock(&tc[i].LOCK_table_cache); 395 } 396 } 397 398 399 /** 400 Acquire TABLE object from table cache. 401 402 @pre share must be protected against removal. 403 404 Acquired object cannot be evicted or acquired again. 405 406 @return TABLE object, or NULL if no unused objects. 407 */ 408 409 static TABLE *tc_acquire_table(THD *thd, TDC_element *element) 410 { 411 uint32 n_instances= 412 my_atomic_load32_explicit((int32*) &tc_active_instances, 413 MY_MEMORY_ORDER_RELAXED); 414 uint32 i= thd->thread_id % n_instances; 415 TABLE *table; 416 417 tc[i].lock_and_check_contention(n_instances, i); 418 table= element->free_tables[i].list.pop_front(); 419 if (table) 420 { 421 DBUG_ASSERT(!table->in_use); 422 table->in_use= thd; 423 /* The ex-unused table must be fully functional. */ 424 DBUG_ASSERT(table->db_stat && table->file); 425 /* The children must be detached from the table. */ 426 DBUG_ASSERT(!table->file->extra(HA_EXTRA_IS_ATTACHED_CHILDREN)); 427 tc[i].free_tables.remove(table); 428 } 429 mysql_mutex_unlock(&tc[i].LOCK_table_cache); 430 return table; 431 } 432 433 434 /** 435 Release TABLE object to table cache. 436 437 @pre object is used by caller. 438 439 Released object may be evicted or acquired again. 440 441 While locked: 442 - if object is marked for purge, decrement tc_count 443 - add object to TABLE_SHARE::tdc.free_tables 444 - evict LRU object from table cache if we reached threshold 445 446 While unlocked: 447 - mark object not in use by any thread 448 - free evicted/purged object 449 450 @note Another thread may mark share for purge any moment (even 451 after version check). It means to-be-purged object may go to 452 unused lists. This other thread is expected to call tc_purge(), 453 which is synchronized with us on TABLE_SHARE::tdc.LOCK_table_share. 454 455 @return 456 @retval true object purged 457 @retval false object released 458 */ 459 460 void tc_release_table(TABLE *table) 461 { 462 uint32 i= table->instance; 463 DBUG_ENTER("tc_release_table"); 464 DBUG_ASSERT(table->in_use); 465 DBUG_ASSERT(table->file); 466 DBUG_ASSERT(!table->pos_in_locked_tables); 467 468 mysql_mutex_lock(&tc[i].LOCK_table_cache); 469 if (table->needs_reopen() || table->s->tdc->flushed || 470 tc[i].records > tc_size) 471 { 472 tc[i].records--; 473 mysql_mutex_unlock(&tc[i].LOCK_table_cache); 474 tc_remove_table(table); 475 } 476 else 477 { 478 table->in_use= 0; 479 table->s->tdc->free_tables[i].list.push_front(table); 480 tc[i].free_tables.push_back(table); 481 mysql_mutex_unlock(&tc[i].LOCK_table_cache); 482 } 483 DBUG_VOID_RETURN; 484 } 485 486 487 static void tdc_assert_clean_share(TDC_element *element) 488 { 489 DBUG_ASSERT(element->share == 0); 490 DBUG_ASSERT(element->ref_count == 0); 491 DBUG_ASSERT(element->m_flush_tickets.is_empty()); 492 DBUG_ASSERT(element->all_tables.is_empty()); 493 #ifndef DBUG_OFF 494 for (ulong i= 0; i < tc_instances; i++) 495 DBUG_ASSERT(element->free_tables[i].list.is_empty()); 496 #endif 497 DBUG_ASSERT(element->all_tables_refs == 0); 498 DBUG_ASSERT(element->next == 0); 499 DBUG_ASSERT(element->prev == 0); 500 } 501 502 503 /** 504 Delete share from hash and free share object. 505 */ 506 507 static void tdc_delete_share_from_hash(TDC_element *element) 508 { 509 THD *thd= current_thd; 510 LF_PINS *pins; 511 TABLE_SHARE *share; 512 DBUG_ENTER("tdc_delete_share_from_hash"); 513 514 mysql_mutex_assert_owner(&element->LOCK_table_share); 515 share= element->share; 516 DBUG_ASSERT(share); 517 element->share= 0; 518 PSI_CALL_release_table_share(share->m_psi); 519 share->m_psi= 0; 520 521 if (!element->m_flush_tickets.is_empty()) 522 { 523 Wait_for_flush_list::Iterator it(element->m_flush_tickets); 524 Wait_for_flush *ticket; 525 while ((ticket= it++)) 526 (void) ticket->get_ctx()->m_wait.set_status(MDL_wait::GRANTED); 527 528 do 529 { 530 mysql_cond_wait(&element->COND_release, &element->LOCK_table_share); 531 } while (!element->m_flush_tickets.is_empty()); 532 } 533 534 mysql_mutex_unlock(&element->LOCK_table_share); 535 536 if (thd) 537 { 538 fix_thd_pins(thd); 539 pins= thd->tdc_hash_pins; 540 } 541 else 542 pins= lf_hash_get_pins(&tdc_hash); 543 544 DBUG_ASSERT(pins); // What can we do about it? 545 tdc_assert_clean_share(element); 546 lf_hash_delete(&tdc_hash, pins, element->m_key, element->m_key_length); 547 if (!thd) 548 lf_hash_put_pins(pins); 549 free_table_share(share); 550 DBUG_VOID_RETURN; 551 } 552 553 554 /** 555 Prepeare table share for use with table definition cache. 556 */ 557 558 static void lf_alloc_constructor(uchar *arg) 559 { 560 TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD); 561 DBUG_ENTER("lf_alloc_constructor"); 562 mysql_mutex_init(key_TABLE_SHARE_LOCK_table_share, 563 &element->LOCK_table_share, MY_MUTEX_INIT_FAST); 564 mysql_cond_init(key_TABLE_SHARE_COND_release, &element->COND_release, 0); 565 element->m_flush_tickets.empty(); 566 element->all_tables.empty(); 567 for (ulong i= 0; i < tc_instances; i++) 568 element->free_tables[i].list.empty(); 569 element->all_tables_refs= 0; 570 element->share= 0; 571 element->ref_count= 0; 572 element->next= 0; 573 element->prev= 0; 574 DBUG_VOID_RETURN; 575 } 576 577 578 /** 579 Release table definition cache specific resources of table share. 580 */ 581 582 static void lf_alloc_destructor(uchar *arg) 583 { 584 TDC_element *element= (TDC_element*) (arg + LF_HASH_OVERHEAD); 585 DBUG_ENTER("lf_alloc_destructor"); 586 tdc_assert_clean_share(element); 587 mysql_cond_destroy(&element->COND_release); 588 mysql_mutex_destroy(&element->LOCK_table_share); 589 DBUG_VOID_RETURN; 590 } 591 592 593 static void tdc_hash_initializer(LF_HASH *, 594 TDC_element *element, LEX_STRING *key) 595 { 596 memcpy(element->m_key, key->str, key->length); 597 element->m_key_length= (uint)key->length; 598 tdc_assert_clean_share(element); 599 } 600 601 602 static uchar *tdc_hash_key(const TDC_element *element, size_t *length, 603 my_bool) 604 { 605 *length= element->m_key_length; 606 return (uchar*) element->m_key; 607 } 608 609 610 /** 611 Initialize table definition cache. 612 */ 613 614 bool tdc_init(void) 615 { 616 DBUG_ENTER("tdc_init"); 617 #ifdef HAVE_PSI_INTERFACE 618 mysql_mutex_register("sql", all_tc_mutexes, array_elements(all_tc_mutexes)); 619 mysql_cond_register("sql", all_tc_conds, array_elements(all_tc_conds)); 620 #endif 621 /* Extra instance is allocated to avoid false sharing */ 622 if (!(tc= new Table_cache_instance[tc_instances + 1])) 623 DBUG_RETURN(true); 624 tdc_inited= true; 625 mysql_mutex_init(key_LOCK_unused_shares, &LOCK_unused_shares, 626 MY_MUTEX_INIT_FAST); 627 tdc_version= 1L; /* Increments on each reload */ 628 lf_hash_init(&tdc_hash, sizeof(TDC_element) + 629 sizeof(Share_free_tables) * (tc_instances - 1), 630 LF_HASH_UNIQUE, 0, 0, 631 (my_hash_get_key) tdc_hash_key, 632 &my_charset_bin); 633 tdc_hash.alloc.constructor= lf_alloc_constructor; 634 tdc_hash.alloc.destructor= lf_alloc_destructor; 635 tdc_hash.initializer= (lf_hash_initializer) tdc_hash_initializer; 636 DBUG_RETURN(false); 637 } 638 639 640 /** 641 Notify table definition cache that process of shutting down server 642 has started so it has to keep number of TABLE and TABLE_SHARE objects 643 minimal in order to reduce number of references to pluggable engines. 644 */ 645 646 void tdc_start_shutdown(void) 647 { 648 DBUG_ENTER("table_def_start_shutdown"); 649 if (tdc_inited) 650 { 651 /* 652 Ensure that TABLE and TABLE_SHARE objects which are created for 653 tables that are open during process of plugins' shutdown are 654 immediately released. This keeps number of references to engine 655 plugins minimal and allows shutdown to proceed smoothly. 656 */ 657 tdc_size= 0; 658 tc_size= 0; 659 /* Free all cached but unused TABLEs and TABLE_SHAREs. */ 660 close_cached_tables(NULL, NULL, FALSE, LONG_TIMEOUT); 661 } 662 DBUG_VOID_RETURN; 663 } 664 665 666 /** 667 Deinitialize table definition cache. 668 */ 669 670 void tdc_deinit(void) 671 { 672 DBUG_ENTER("tdc_deinit"); 673 if (tdc_inited) 674 { 675 tdc_inited= false; 676 lf_hash_destroy(&tdc_hash); 677 mysql_mutex_destroy(&LOCK_unused_shares); 678 delete [] tc; 679 } 680 DBUG_VOID_RETURN; 681 } 682 683 684 /** 685 Get number of cached table definitions. 686 687 @return Number of cached table definitions 688 */ 689 690 ulong tdc_records(void) 691 { 692 return my_atomic_load32_explicit(&tdc_hash.count, MY_MEMORY_ORDER_RELAXED); 693 } 694 695 696 void tdc_purge(bool all) 697 { 698 DBUG_ENTER("tdc_purge"); 699 while (all || tdc_records() > tdc_size) 700 { 701 TDC_element *element; 702 703 mysql_mutex_lock(&LOCK_unused_shares); 704 if (!(element= unused_shares.pop_front())) 705 { 706 mysql_mutex_unlock(&LOCK_unused_shares); 707 break; 708 } 709 710 /* Concurrent thread may start using share again, reset prev and next. */ 711 element->prev= 0; 712 element->next= 0; 713 mysql_mutex_lock(&element->LOCK_table_share); 714 if (element->ref_count) 715 { 716 mysql_mutex_unlock(&element->LOCK_table_share); 717 mysql_mutex_unlock(&LOCK_unused_shares); 718 continue; 719 } 720 mysql_mutex_unlock(&LOCK_unused_shares); 721 722 tdc_delete_share_from_hash(element); 723 } 724 DBUG_VOID_RETURN; 725 } 726 727 728 /** 729 Lock table share. 730 731 Find table share with given db.table_name in table definition cache. Return 732 locked table share if found. 733 734 Locked table share means: 735 - table share is protected against removal from table definition cache 736 - no other thread can acquire/release table share 737 738 Caller is expected to unlock table share with tdc_unlock_share(). 739 740 @retval 0 Share not found 741 @retval MY_ERRPTR OOM 742 @retval ptr Pointer to locked table share 743 */ 744 745 TDC_element *tdc_lock_share(THD *thd, const char *db, const char *table_name) 746 { 747 TDC_element *element; 748 char key[MAX_DBKEY_LENGTH]; 749 750 DBUG_ENTER("tdc_lock_share"); 751 if (unlikely(fix_thd_pins(thd))) 752 DBUG_RETURN((TDC_element*) MY_ERRPTR); 753 754 element= (TDC_element *) lf_hash_search(&tdc_hash, thd->tdc_hash_pins, 755 (uchar*) key, 756 tdc_create_key(key, db, table_name)); 757 if (element) 758 { 759 mysql_mutex_lock(&element->LOCK_table_share); 760 if (unlikely(!element->share || element->share->error)) 761 { 762 mysql_mutex_unlock(&element->LOCK_table_share); 763 element= 0; 764 } 765 lf_hash_search_unpin(thd->tdc_hash_pins); 766 } 767 768 DBUG_RETURN(element); 769 } 770 771 772 /** 773 Unlock share locked by tdc_lock_share(). 774 */ 775 776 void tdc_unlock_share(TDC_element *element) 777 { 778 DBUG_ENTER("tdc_unlock_share"); 779 mysql_mutex_unlock(&element->LOCK_table_share); 780 DBUG_VOID_RETURN; 781 } 782 783 784 /* 785 Get TABLE_SHARE for a table. 786 787 tdc_acquire_share() 788 thd Thread handle 789 tl Table that should be opened 790 flags operation: what to open table or view 791 out_table TABLE for the requested table 792 793 IMPLEMENTATION 794 Get a table definition from the table definition cache. 795 If it doesn't exist, create a new from the table definition file. 796 797 RETURN 798 0 Error 799 # Share for table 800 */ 801 802 TABLE_SHARE *tdc_acquire_share(THD *thd, TABLE_LIST *tl, uint flags, 803 TABLE **out_table) 804 { 805 TABLE_SHARE *share; 806 TDC_element *element; 807 const char *key; 808 uint key_length= get_table_def_key(tl, &key); 809 my_hash_value_type hash_value= tl->mdl_request.key.tc_hash_value(); 810 bool was_unused; 811 DBUG_ENTER("tdc_acquire_share"); 812 813 if (fix_thd_pins(thd)) 814 DBUG_RETURN(0); 815 816 retry: 817 while (!(element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash, 818 thd->tdc_hash_pins, hash_value, (uchar*) key, key_length))) 819 { 820 LEX_STRING tmp= { const_cast<char*>(key), key_length }; 821 int res= lf_hash_insert(&tdc_hash, thd->tdc_hash_pins, (uchar*) &tmp); 822 823 if (res == -1) 824 DBUG_RETURN(0); 825 else if (res == 1) 826 continue; 827 828 element= (TDC_element*) lf_hash_search_using_hash_value(&tdc_hash, 829 thd->tdc_hash_pins, hash_value, (uchar*) key, key_length); 830 lf_hash_search_unpin(thd->tdc_hash_pins); 831 DBUG_ASSERT(element); 832 833 if (!(share= alloc_table_share(tl->db.str, tl->table_name.str, key, key_length))) 834 { 835 lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length); 836 DBUG_RETURN(0); 837 } 838 839 /* note that tdc_acquire_share() *always* uses discovery */ 840 open_table_def(thd, share, flags | GTS_USE_DISCOVERY); 841 842 if (checked_unlikely(share->error)) 843 { 844 free_table_share(share); 845 lf_hash_delete(&tdc_hash, thd->tdc_hash_pins, key, key_length); 846 DBUG_RETURN(0); 847 } 848 849 mysql_mutex_lock(&element->LOCK_table_share); 850 element->share= share; 851 share->tdc= element; 852 element->ref_count++; 853 element->version= tdc_refresh_version(); 854 element->flushed= false; 855 mysql_mutex_unlock(&element->LOCK_table_share); 856 857 tdc_purge(false); 858 if (out_table) 859 { 860 status_var_increment(thd->status_var.table_open_cache_misses); 861 *out_table= 0; 862 } 863 share->m_psi= PSI_CALL_get_table_share(false, share); 864 goto end; 865 } 866 867 /* cannot force discovery of a cached share */ 868 DBUG_ASSERT(!(flags & GTS_FORCE_DISCOVERY)); 869 870 if (out_table && (flags & GTS_TABLE)) 871 { 872 if ((*out_table= tc_acquire_table(thd, element))) 873 { 874 lf_hash_search_unpin(thd->tdc_hash_pins); 875 DBUG_ASSERT(!(flags & GTS_NOLOCK)); 876 DBUG_ASSERT(element->share); 877 DBUG_ASSERT(!element->share->error); 878 DBUG_ASSERT(!element->share->is_view); 879 status_var_increment(thd->status_var.table_open_cache_hits); 880 DBUG_RETURN(element->share); 881 } 882 status_var_increment(thd->status_var.table_open_cache_misses); 883 } 884 885 mysql_mutex_lock(&element->LOCK_table_share); 886 if (!(share= element->share)) 887 { 888 mysql_mutex_unlock(&element->LOCK_table_share); 889 lf_hash_search_unpin(thd->tdc_hash_pins); 890 goto retry; 891 } 892 lf_hash_search_unpin(thd->tdc_hash_pins); 893 894 /* 895 We found an existing table definition. Return it if we didn't get 896 an error when reading the table definition from file. 897 */ 898 if (unlikely(share->error)) 899 { 900 open_table_error(share, share->error, share->open_errno); 901 goto err; 902 } 903 904 if (share->is_view && !(flags & GTS_VIEW)) 905 { 906 open_table_error(share, OPEN_FRM_NOT_A_TABLE, ENOENT); 907 goto err; 908 } 909 if (!share->is_view && !(flags & GTS_TABLE)) 910 { 911 open_table_error(share, OPEN_FRM_NOT_A_VIEW, ENOENT); 912 goto err; 913 } 914 915 was_unused= !element->ref_count; 916 element->ref_count++; 917 mysql_mutex_unlock(&element->LOCK_table_share); 918 if (was_unused) 919 { 920 mysql_mutex_lock(&LOCK_unused_shares); 921 if (element->prev) 922 { 923 /* 924 Share was not used before and it was in the old_unused_share list 925 Unlink share from this list 926 */ 927 DBUG_PRINT("info", ("Unlinking from not used list")); 928 unused_shares.remove(element); 929 element->next= 0; 930 element->prev= 0; 931 } 932 mysql_mutex_unlock(&LOCK_unused_shares); 933 } 934 935 end: 936 DBUG_PRINT("exit", ("share: %p ref_count: %u", 937 share, share->tdc->ref_count)); 938 if (flags & GTS_NOLOCK) 939 { 940 tdc_release_share(share); 941 /* 942 if GTS_NOLOCK is requested, the returned share pointer cannot be used, 943 the share it points to may go away any moment. 944 But perhaps the caller is only interested to know whether a share or 945 table existed? 946 Let's return an invalid pointer here to catch dereferencing attempts. 947 */ 948 share= (TABLE_SHARE*) 1; 949 } 950 DBUG_RETURN(share); 951 952 err: 953 mysql_mutex_unlock(&element->LOCK_table_share); 954 DBUG_RETURN(0); 955 } 956 957 958 /** 959 Release table share acquired by tdc_acquire_share(). 960 */ 961 962 void tdc_release_share(TABLE_SHARE *share) 963 { 964 DBUG_ENTER("tdc_release_share"); 965 966 mysql_mutex_lock(&share->tdc->LOCK_table_share); 967 DBUG_PRINT("enter", 968 ("share: %p table: %s.%s ref_count: %u version: %lld", 969 share, share->db.str, share->table_name.str, 970 share->tdc->ref_count, share->tdc->version)); 971 DBUG_ASSERT(share->tdc->ref_count); 972 973 if (share->tdc->ref_count > 1) 974 { 975 share->tdc->ref_count--; 976 if (!share->is_view) 977 mysql_cond_broadcast(&share->tdc->COND_release); 978 mysql_mutex_unlock(&share->tdc->LOCK_table_share); 979 DBUG_VOID_RETURN; 980 } 981 mysql_mutex_unlock(&share->tdc->LOCK_table_share); 982 983 mysql_mutex_lock(&LOCK_unused_shares); 984 mysql_mutex_lock(&share->tdc->LOCK_table_share); 985 if (--share->tdc->ref_count) 986 { 987 if (!share->is_view) 988 mysql_cond_broadcast(&share->tdc->COND_release); 989 mysql_mutex_unlock(&share->tdc->LOCK_table_share); 990 mysql_mutex_unlock(&LOCK_unused_shares); 991 DBUG_VOID_RETURN; 992 } 993 if (share->tdc->flushed || tdc_records() > tdc_size) 994 { 995 mysql_mutex_unlock(&LOCK_unused_shares); 996 tdc_delete_share_from_hash(share->tdc); 997 DBUG_VOID_RETURN; 998 } 999 /* Link share last in used_table_share list */ 1000 DBUG_PRINT("info", ("moving share to unused list")); 1001 DBUG_ASSERT(share->tdc->next == 0); 1002 unused_shares.push_back(share->tdc); 1003 mysql_mutex_unlock(&share->tdc->LOCK_table_share); 1004 mysql_mutex_unlock(&LOCK_unused_shares); 1005 DBUG_VOID_RETURN; 1006 } 1007 1008 1009 /** 1010 Auxiliary function which allows to kill delayed threads for 1011 particular table identified by its share. 1012 1013 @param share Table share. 1014 1015 @pre Caller should have TABLE_SHARE::tdc.LOCK_table_share mutex. 1016 */ 1017 1018 static void kill_delayed_threads_for_table(TDC_element *element) 1019 { 1020 All_share_tables_list::Iterator it(element->all_tables); 1021 TABLE *tab; 1022 1023 mysql_mutex_assert_owner(&element->LOCK_table_share); 1024 1025 if (!delayed_insert_threads) 1026 return; 1027 1028 while ((tab= it++)) 1029 { 1030 THD *in_use= tab->in_use; 1031 1032 DBUG_ASSERT(in_use && tab->s->tdc->flushed); 1033 if ((in_use->system_thread & SYSTEM_THREAD_DELAYED_INSERT) && 1034 ! in_use->killed) 1035 { 1036 in_use->killed= KILL_SYSTEM_THREAD; 1037 mysql_mutex_lock(&in_use->mysys_var->mutex); 1038 if (in_use->mysys_var->current_cond) 1039 { 1040 mysql_mutex_lock(in_use->mysys_var->current_mutex); 1041 mysql_cond_broadcast(in_use->mysys_var->current_cond); 1042 mysql_mutex_unlock(in_use->mysys_var->current_mutex); 1043 } 1044 mysql_mutex_unlock(&in_use->mysys_var->mutex); 1045 } 1046 } 1047 } 1048 1049 1050 /** 1051 Remove all or some (depending on parameter) instances of TABLE and 1052 TABLE_SHARE from the table definition cache. 1053 1054 @param thd Thread context 1055 @param remove_type Type of removal: 1056 TDC_RT_REMOVE_ALL - remove all TABLE instances and 1057 TABLE_SHARE instance. There 1058 should be no used TABLE objects 1059 and caller should have exclusive 1060 metadata lock on the table. 1061 TDC_RT_REMOVE_NOT_OWN - remove all TABLE instances 1062 except those that belong to 1063 this thread. There should be 1064 no TABLE objects used by other 1065 threads and caller should have 1066 exclusive metadata lock on the 1067 table. 1068 TDC_RT_REMOVE_UNUSED - remove all unused TABLE 1069 instances (if there are no 1070 used instances will also 1071 remove TABLE_SHARE). 1072 TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE - 1073 remove all TABLE instances 1074 except those that belong to 1075 this thread, but don't mark 1076 TABLE_SHARE as old. There 1077 should be no TABLE objects 1078 used by other threads and 1079 caller should have exclusive 1080 metadata lock on the table. 1081 @param db Name of database 1082 @param table_name Name of table 1083 @param kill_delayed_threads If TRUE, kill INSERT DELAYED threads 1084 1085 @note It assumes that table instances are already not used by any 1086 (other) thread (this should be achieved by using meta-data locks). 1087 */ 1088 1089 bool tdc_remove_table(THD *thd, enum_tdc_remove_table_type remove_type, 1090 const char *db, const char *table_name, 1091 bool kill_delayed_threads) 1092 { 1093 Share_free_tables::List purge_tables; 1094 TABLE *table; 1095 TDC_element *element; 1096 uint my_refs= 1; 1097 DBUG_ENTER("tdc_remove_table"); 1098 DBUG_PRINT("enter",("name: %s remove_type: %d", table_name, remove_type)); 1099 1100 DBUG_ASSERT(remove_type == TDC_RT_REMOVE_UNUSED || 1101 thd->mdl_context.is_lock_owner(MDL_key::TABLE, db, table_name, 1102 MDL_EXCLUSIVE)); 1103 1104 1105 mysql_mutex_lock(&LOCK_unused_shares); 1106 if (!(element= tdc_lock_share(thd, db, table_name))) 1107 { 1108 mysql_mutex_unlock(&LOCK_unused_shares); 1109 DBUG_ASSERT(remove_type != TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE); 1110 DBUG_RETURN(false); 1111 } 1112 1113 DBUG_ASSERT(element != MY_ERRPTR); // What can we do about it? 1114 1115 if (!element->ref_count) 1116 { 1117 if (element->prev) 1118 { 1119 unused_shares.remove(element); 1120 element->prev= 0; 1121 element->next= 0; 1122 } 1123 mysql_mutex_unlock(&LOCK_unused_shares); 1124 1125 tdc_delete_share_from_hash(element); 1126 DBUG_RETURN(true); 1127 } 1128 mysql_mutex_unlock(&LOCK_unused_shares); 1129 1130 element->ref_count++; 1131 1132 tc_remove_all_unused_tables(element, &purge_tables, 1133 remove_type != TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE); 1134 1135 if (kill_delayed_threads) 1136 kill_delayed_threads_for_table(element); 1137 1138 if (remove_type == TDC_RT_REMOVE_NOT_OWN || 1139 remove_type == TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE) 1140 { 1141 All_share_tables_list::Iterator it(element->all_tables); 1142 while ((table= it++)) 1143 { 1144 if (table->in_use == thd) 1145 my_refs++; 1146 } 1147 } 1148 mysql_mutex_unlock(&element->LOCK_table_share); 1149 1150 while ((table= purge_tables.pop_front())) 1151 intern_close_table(table); 1152 1153 if (remove_type != TDC_RT_REMOVE_UNUSED) 1154 { 1155 /* 1156 Even though current thread holds exclusive metadata lock on this share 1157 (asserted above), concurrent FLUSH TABLES threads may be in process of 1158 closing unused table instances belonging to this share. E.g.: 1159 thr1 (FLUSH TABLES): table= share->tdc.free_tables.pop_front(); 1160 thr1 (FLUSH TABLES): share->tdc.all_tables.remove(table); 1161 thr2 (ALTER TABLE): tdc_remove_table(); 1162 thr1 (FLUSH TABLES): intern_close_table(table); 1163 1164 Current remove type assumes that all table instances (except for those 1165 that are owned by current thread) must be closed before 1166 thd_remove_table() returns. Wait for such tables now. 1167 1168 intern_close_table() decrements ref_count and signals COND_release. When 1169 ref_count drops down to number of references owned by current thread 1170 waiting is completed. 1171 1172 Unfortunately TABLE_SHARE::wait_for_old_version() cannot be used here 1173 because it waits for all table instances, whereas we have to wait only 1174 for those that are not owned by current thread. 1175 */ 1176 mysql_mutex_lock(&element->LOCK_table_share); 1177 while (element->ref_count > my_refs) 1178 mysql_cond_wait(&element->COND_release, &element->LOCK_table_share); 1179 DBUG_ASSERT(element->all_tables.is_empty() || 1180 remove_type != TDC_RT_REMOVE_ALL); 1181 #ifndef DBUG_OFF 1182 if (remove_type == TDC_RT_REMOVE_NOT_OWN || 1183 remove_type == TDC_RT_REMOVE_NOT_OWN_KEEP_SHARE) 1184 { 1185 All_share_tables_list::Iterator it(element->all_tables); 1186 while ((table= it++)) 1187 DBUG_ASSERT(table->in_use == thd); 1188 } 1189 #endif 1190 mysql_mutex_unlock(&element->LOCK_table_share); 1191 } 1192 1193 tdc_release_share(element->share); 1194 1195 DBUG_RETURN(true); 1196 } 1197 1198 1199 /** 1200 Check if table's share is being removed from the table definition 1201 cache and, if yes, wait until the flush is complete. 1202 1203 @param thd Thread context. 1204 @param table_list Table which share should be checked. 1205 @param timeout Timeout for waiting. 1206 @param deadlock_weight Weight of this wait for deadlock detector. 1207 1208 @retval 0 Success. Share is up to date or has been flushed. 1209 @retval 1 Error (OOM, was killed, the wait resulted 1210 in a deadlock or timeout). Reported. 1211 */ 1212 1213 int tdc_wait_for_old_version(THD *thd, const char *db, const char *table_name, 1214 ulong wait_timeout, uint deadlock_weight, tdc_version_t refresh_version) 1215 { 1216 TDC_element *element; 1217 1218 if (!(element= tdc_lock_share(thd, db, table_name))) 1219 return FALSE; 1220 else if (element == MY_ERRPTR) 1221 return TRUE; 1222 else if (element->flushed && refresh_version > element->version) 1223 { 1224 struct timespec abstime; 1225 set_timespec(abstime, wait_timeout); 1226 return element->share->wait_for_old_version(thd, &abstime, deadlock_weight); 1227 } 1228 tdc_unlock_share(element); 1229 return FALSE; 1230 } 1231 1232 1233 tdc_version_t tdc_refresh_version(void) 1234 { 1235 return (tdc_version_t)my_atomic_load64_explicit(&tdc_version, MY_MEMORY_ORDER_RELAXED); 1236 } 1237 1238 1239 tdc_version_t tdc_increment_refresh_version(void) 1240 { 1241 tdc_version_t v= (tdc_version_t)my_atomic_add64_explicit(&tdc_version, 1, MY_MEMORY_ORDER_RELAXED); 1242 DBUG_PRINT("tcache", ("incremented global refresh_version to: %lld", v)); 1243 return v + 1; 1244 } 1245 1246 1247 /** 1248 Iterate table definition cache. 1249 1250 Object is protected against removal from table definition cache. 1251 1252 @note Returned TABLE_SHARE is not guaranteed to be fully initialized: 1253 tdc_acquire_share() added new share, but didn't open it yet. If caller 1254 needs fully initializer share, it must lock table share mutex. 1255 */ 1256 1257 struct eliminate_duplicates_arg 1258 { 1259 HASH hash; 1260 MEM_ROOT root; 1261 my_hash_walk_action action; 1262 void *argument; 1263 }; 1264 1265 1266 static uchar *eliminate_duplicates_get_key(const uchar *element, size_t *length, 1267 my_bool not_used __attribute__((unused))) 1268 { 1269 LEX_STRING *key= (LEX_STRING *) element; 1270 *length= key->length; 1271 return (uchar *) key->str; 1272 } 1273 1274 1275 static my_bool eliminate_duplicates(TDC_element *element, 1276 eliminate_duplicates_arg *arg) 1277 { 1278 LEX_STRING *key= (LEX_STRING *) alloc_root(&arg->root, sizeof(LEX_STRING)); 1279 1280 if (!key || !(key->str= (char*) memdup_root(&arg->root, element->m_key, 1281 element->m_key_length))) 1282 return TRUE; 1283 1284 key->length= element->m_key_length; 1285 1286 if (my_hash_insert(&arg->hash, (uchar *) key)) 1287 return FALSE; 1288 1289 return arg->action(element, arg->argument); 1290 } 1291 1292 1293 int tdc_iterate(THD *thd, my_hash_walk_action action, void *argument, 1294 bool no_dups) 1295 { 1296 eliminate_duplicates_arg no_dups_argument; 1297 LF_PINS *pins; 1298 myf alloc_flags= 0; 1299 uint hash_flags= HASH_UNIQUE; 1300 int res; 1301 1302 if (thd) 1303 { 1304 fix_thd_pins(thd); 1305 pins= thd->tdc_hash_pins; 1306 alloc_flags= MY_THREAD_SPECIFIC; 1307 hash_flags|= HASH_THREAD_SPECIFIC; 1308 } 1309 else 1310 pins= lf_hash_get_pins(&tdc_hash); 1311 1312 if (!pins) 1313 return ER_OUTOFMEMORY; 1314 1315 if (no_dups) 1316 { 1317 init_alloc_root(&no_dups_argument.root, "no_dups", 4096, 4096, 1318 MYF(alloc_flags)); 1319 my_hash_init(&no_dups_argument.hash, &my_charset_bin, tdc_records(), 0, 0, 1320 eliminate_duplicates_get_key, 0, hash_flags); 1321 no_dups_argument.action= action; 1322 no_dups_argument.argument= argument; 1323 action= (my_hash_walk_action) eliminate_duplicates; 1324 argument= &no_dups_argument; 1325 } 1326 1327 res= lf_hash_iterate(&tdc_hash, pins, action, argument); 1328 1329 if (!thd) 1330 lf_hash_put_pins(pins); 1331 1332 if (no_dups) 1333 { 1334 my_hash_free(&no_dups_argument.hash); 1335 free_root(&no_dups_argument.root, MYF(0)); 1336 } 1337 return res; 1338 } 1339