1 /***************************************************************************** 2 3 Copyright (c) 1995, 2020, Oracle and/or its affiliates. All Rights Reserved. 4 5 This program is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License, version 2.0, as published by the 7 Free Software Foundation. 8 9 This program is also distributed with certain software (including but not 10 limited to OpenSSL) that is licensed under separate terms, as designated in a 11 particular file or component or in included license documentation. The authors 12 of MySQL hereby grant you an additional permission to link the program and 13 your derivative works with the separately licensed software that they have 14 included with MySQL. 15 16 This program is distributed in the hope that it will be useful, but WITHOUT 17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, 19 for more details. 20 21 You should have received a copy of the GNU General Public License along with 22 this program; if not, write to the Free Software Foundation, Inc., 23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 25 *****************************************************************************/ 26 27 /** @file include/buf0buf.h 28 The database buffer pool high-level routines 29 30 Created 11/5/1995 Heikki Tuuri 31 *******************************************************/ 32 33 #ifndef buf0buf_h 34 #define buf0buf_h 35 36 #include "buf0types.h" 37 #include "fil0fil.h" 38 #include "hash0hash.h" 39 #include "log0log.h" 40 #include "mtr0types.h" 41 #include "os0proc.h" 42 #include "page0types.h" 43 #include "srv0srv.h" 44 #include "univ.i" 45 #include "ut0byte.h" 46 #include "ut0rbt.h" 47 48 #include "buf/buf.h" 49 50 #include <ostream> 51 52 // Forward declaration 53 struct fil_addr_t; 54 55 /** @name Modes for buf_page_get_gen */ 56 /* @{ */ 57 enum class Page_fetch { 58 /** Get always */ 59 NORMAL, 60 61 /** Same as NORMAL, but hint that the fetch is part of a large scan. 62 Try not to flood the buffer pool with pages that may not be accessed again 63 any time soon. */ 64 SCAN, 65 66 /** get if in pool */ 67 IF_IN_POOL, 68 69 /** get if in pool, do not make the block young in the LRU list */ 70 PEEK_IF_IN_POOL, 71 72 /** get and bufferfix, but set no latch; we have separated this case, because 73 it is error-prone programming not to set a latch, and it should be used with 74 care */ 75 NO_LATCH, 76 77 /** Get the page only if it's in the buffer pool, if not then set a watch on 78 the page. */ 79 IF_IN_POOL_OR_WATCH, 80 81 /** Like Page_fetch::NORMAL, but do not mind if the file page has been 82 freed. */ 83 POSSIBLY_FREED 84 }; 85 /* @} */ 86 87 /** @name Modes for buf_page_get_known_nowait */ 88 89 /* @{ */ 90 enum class Cache_hint { 91 /** Move the block to the start of the LRU list if there is a danger that the 92 block would drift out of the buffer pool*/ 93 MAKE_YOUNG = 51, 94 95 /** Preserve the current LRU position of the block. */ 96 KEEP_OLD = 52 97 }; 98 99 /* @} */ 100 101 /** Number of bits to representing a buffer pool ID */ 102 constexpr ulint MAX_BUFFER_POOLS_BITS = 6; 103 104 /** The maximum number of buffer pools that can be defined */ 105 constexpr ulint MAX_BUFFER_POOLS = (1 << MAX_BUFFER_POOLS_BITS); 106 107 /** Maximum number of concurrent buffer pool watches */ 108 #define BUF_POOL_WATCH_SIZE (srv_n_purge_threads + 1) 109 110 /** The maximum number of page_hash locks */ 111 constexpr ulint MAX_PAGE_HASH_LOCKS = 1024; 112 113 /** The buffer pools of the database */ 114 extern buf_pool_t *buf_pool_ptr; 115 116 /** true when withdrawing buffer pool pages might cause page relocation */ 117 extern volatile bool buf_pool_withdrawing; 118 119 /** the clock is incremented every time a pointer to a page may become 120 obsolete */ 121 extern volatile ulint buf_withdraw_clock; 122 123 #ifdef UNIV_HOTBACKUP 124 /** first block, for --apply-log */ 125 extern buf_block_t *back_block1; 126 /** second block, for page reorganize */ 127 extern buf_block_t *back_block2; 128 #endif /* UNIV_HOTBACKUP */ 129 130 /** @brief States of a control block 131 @see buf_page_t 132 133 The enumeration values must be 0..7. */ 134 enum buf_page_state { 135 BUF_BLOCK_POOL_WATCH, /*!< a sentinel for the buffer pool 136 watch, element of buf_pool->watch[] */ 137 BUF_BLOCK_ZIP_PAGE, /*!< contains a clean 138 compressed page */ 139 BUF_BLOCK_ZIP_DIRTY, /*!< contains a compressed 140 page that is in the 141 buf_pool->flush_list */ 142 143 BUF_BLOCK_NOT_USED, /*!< is in the free list; 144 must be after the BUF_BLOCK_ZIP_ 145 constants for compressed-only pages 146 @see buf_block_state_valid() */ 147 BUF_BLOCK_READY_FOR_USE, /*!< when buf_LRU_get_free_block 148 returns a block, it is in this state */ 149 BUF_BLOCK_FILE_PAGE, /*!< contains a buffered file page */ 150 BUF_BLOCK_MEMORY, /*!< contains some main memory 151 object */ 152 BUF_BLOCK_REMOVE_HASH /*!< hash index should be removed 153 before putting to the free list */ 154 }; 155 156 /** This structure defines information we will fetch from each buffer pool. It 157 will be used to print table IO stats */ 158 struct buf_pool_info_t { 159 /* General buffer pool info */ 160 ulint pool_unique_id; /*!< Buffer Pool ID */ 161 ulint pool_size; /*!< Buffer Pool size in pages */ 162 ulint lru_len; /*!< Length of buf_pool->LRU */ 163 ulint old_lru_len; /*!< buf_pool->LRU_old_len */ 164 ulint free_list_len; /*!< Length of buf_pool->free list */ 165 ulint flush_list_len; /*!< Length of buf_pool->flush_list */ 166 ulint n_pend_unzip; /*!< buf_pool->n_pend_unzip, pages 167 pending decompress */ 168 ulint n_pend_reads; /*!< buf_pool->n_pend_reads, pages 169 pending read */ 170 ulint n_pending_flush_lru; /*!< Pages pending flush in LRU */ 171 ulint n_pending_flush_single_page; /*!< Pages pending to be 172 flushed as part of single page 173 flushes issued by various user 174 threads */ 175 ulint n_pending_flush_list; /*!< Pages pending flush in FLUSH 176 LIST */ 177 ulint n_pages_made_young; /*!< number of pages made young */ 178 ulint n_pages_not_made_young; /*!< number of pages not made young */ 179 ulint n_pages_read; /*!< buf_pool->n_pages_read */ 180 ulint n_pages_created; /*!< buf_pool->n_pages_created */ 181 ulint n_pages_written; /*!< buf_pool->n_pages_written */ 182 ulint n_page_gets; /*!< buf_pool->n_page_gets */ 183 ulint n_ra_pages_read_rnd; /*!< buf_pool->n_ra_pages_read_rnd, 184 number of pages readahead */ 185 ulint n_ra_pages_read; /*!< buf_pool->n_ra_pages_read, number 186 of pages readahead */ 187 ulint n_ra_pages_evicted; /*!< buf_pool->n_ra_pages_evicted, 188 number of readahead pages evicted 189 without access */ 190 ulint n_page_get_delta; /*!< num of buffer pool page gets since 191 last printout */ 192 193 /* Buffer pool access stats */ 194 double page_made_young_rate; /*!< page made young rate in pages 195 per second */ 196 double page_not_made_young_rate; /*!< page not made young rate 197 in pages per second */ 198 double pages_read_rate; /*!< num of pages read per second */ 199 double pages_created_rate; /*!< num of pages create per second */ 200 double pages_written_rate; /*!< num of pages written per second */ 201 ulint page_read_delta; /*!< num of pages read since last 202 printout */ 203 ulint young_making_delta; /*!< num of pages made young since 204 last printout */ 205 ulint not_young_making_delta; /*!< num of pages not make young since 206 last printout */ 207 208 /* Statistics about read ahead algorithm. */ 209 double pages_readahead_rnd_rate; /*!< random readahead rate in pages per 210 second */ 211 double pages_readahead_rate; /*!< readahead rate in pages per 212 second */ 213 double pages_evicted_rate; /*!< rate of readahead page evicted 214 without access, in pages per second */ 215 216 /* Stats about LRU eviction */ 217 ulint unzip_lru_len; /*!< length of buf_pool->unzip_LRU 218 list */ 219 /* Counters for LRU policy */ 220 ulint io_sum; /*!< buf_LRU_stat_sum.io */ 221 ulint io_cur; /*!< buf_LRU_stat_cur.io, num of IO 222 for current interval */ 223 ulint unzip_sum; /*!< buf_LRU_stat_sum.unzip */ 224 ulint unzip_cur; /*!< buf_LRU_stat_cur.unzip, num 225 pages decompressed in current 226 interval */ 227 }; 228 229 /** The occupied bytes of lists in all buffer pools */ 230 struct buf_pools_list_size_t { 231 ulint LRU_bytes; /*!< LRU size in bytes */ 232 ulint unzip_LRU_bytes; /*!< unzip_LRU size in bytes */ 233 ulint flush_list_bytes; /*!< flush_list size in bytes */ 234 }; 235 236 #ifndef UNIV_HOTBACKUP 237 /** Creates the buffer pool. 238 @param[in] total_size Size of the total pool in bytes. 239 @param[in] n_instances Number of buffer pool instances to create. 240 @return DB_SUCCESS if success, DB_ERROR if not enough memory or error */ 241 dberr_t buf_pool_init(ulint total_size, ulint n_instances); 242 243 /** Frees the buffer pool at shutdown. This must not be invoked before 244 freeing all mutexes. */ 245 void buf_pool_free_all(); 246 247 /** Determines if a block is intended to be withdrawn. 248 @param[in] buf_pool buffer pool instance 249 @param[in] block pointer to control block 250 @retval true if will be withdrawn */ 251 bool buf_block_will_withdrawn(buf_pool_t *buf_pool, const buf_block_t *block); 252 253 /** Determines if a frame is intended to be withdrawn. 254 @param[in] buf_pool buffer pool instance 255 @param[in] ptr pointer to a frame 256 @retval true if will be withdrawn */ 257 bool buf_frame_will_withdrawn(buf_pool_t *buf_pool, const byte *ptr); 258 259 /** This is the thread for resizing buffer pool. It waits for an event and 260 when waked up either performs a resizing and sleeps again. */ 261 void buf_resize_thread(); 262 263 /** Checks if innobase_should_madvise_buf_pool() value has changed since we've 264 last check and if so, then updates buf_pool_should_madvise and calls madvise 265 for all chunks in all srv_buf_pool_instances. 266 @see buf_pool_should_madvise comment for a longer explanation. */ 267 void buf_pool_update_madvise(); 268 269 /** Clears the adaptive hash index on all pages in the buffer pool. */ 270 void buf_pool_clear_hash_index(void); 271 272 /** Gets the current size of buffer buf_pool in bytes. 273 @return size in bytes */ 274 UNIV_INLINE 275 ulint buf_pool_get_curr_size(void); 276 /** Gets the current size of buffer buf_pool in frames. 277 @return size in pages */ 278 UNIV_INLINE 279 ulint buf_pool_get_n_pages(void); 280 #endif /* !UNIV_HOTBACKUP */ 281 282 /** Gets the smallest oldest_modification lsn among all of the earliest 283 added pages in flush lists. In other words - takes the last dirty page 284 from each flush list, and calculates minimum oldest_modification among 285 all of them. Does not acquire global lock for the whole process, so the 286 result might come from inconsistent view on flush lists. 287 288 @note Note that because of the relaxed order in each flush list, this 289 functions no longer returns the smallest oldest_modification among all 290 of the dirty pages. If you wanted to have a safe lsn, which is smaller 291 than every oldest_modification, you would need to use another function: 292 buf_pool_get_oldest_modification_lwm(). 293 294 Returns zero if there were no dirty pages (flush lists were empty). 295 296 @return minimum oldest_modification of last pages from flush lists, 297 zero if flush lists were empty */ 298 lsn_t buf_pool_get_oldest_modification_approx(void); 299 300 /** Gets a safe low watermark for oldest_modification. It's guaranteed 301 that there were no dirty pages with smaller oldest_modification in the 302 whole flush lists. 303 304 Returns zero if flush lists were empty, be careful in such case, because 305 taking the newest lsn is probably not a good idea. If you wanted to rely 306 on some lsn in such case, you would need to follow pattern: 307 308 dpa_lsn = log_buffer_dirty_pages_added_up_to_lsn(*log_sys); 309 310 lwm_lsn = buf_pool_get_oldest_modification_lwm(); 311 312 if (lwm_lsn == 0) lwm_lsn = dpa_lsn; 313 314 The order is important to avoid race conditions. 315 316 @remarks 317 It's guaranteed that the returned value will not be smaller than the 318 last checkpoint lsn. It's not guaranteed that the returned value is 319 the maximum possible. It's just the best effort for the low cost. 320 It basically takes result of buf_pool_get_oldest_modification_approx() 321 and subtracts maximum possible lag introduced by relaxed order in 322 flush lists (srv_log_recent_closed_size). 323 324 @return safe low watermark for oldest_modification of dirty pages, 325 or zero if flush lists were empty; if non-zero, it is then 326 guaranteed not to be at block boundary (and it points to lsn 327 inside data fragment of block) */ 328 lsn_t buf_pool_get_oldest_modification_lwm(void); 329 330 #ifndef UNIV_HOTBACKUP 331 332 /** Allocates a buf_page_t descriptor. This function must succeed. In case 333 of failure we assert in this function. */ 334 UNIV_INLINE 335 buf_page_t *buf_page_alloc_descriptor(void) MY_ATTRIBUTE((malloc)); 336 /** Free a buf_page_t descriptor. */ 337 UNIV_INLINE 338 void buf_page_free_descriptor( 339 buf_page_t *bpage); /*!< in: bpage descriptor to free. */ 340 341 /** Allocates a buffer block. 342 @return own: the allocated block, in state BUF_BLOCK_MEMORY */ 343 buf_block_t *buf_block_alloc( 344 buf_pool_t *buf_pool); /*!< in: buffer pool instance, 345 or NULL for round-robin selection 346 of the buffer pool */ 347 /** Frees a buffer block which does not contain a file page. */ 348 UNIV_INLINE 349 void buf_block_free(buf_block_t *block); /*!< in, own: block to be freed */ 350 #endif /* !UNIV_HOTBACKUP */ 351 352 /** Copies contents of a buffer frame to a given buffer. 353 @param[in] buf buffer to copy to 354 @param[in] frame buffer frame 355 @return buf */ 356 UNIV_INLINE 357 byte *buf_frame_copy(byte *buf, const buf_frame_t *frame); 358 359 #ifndef UNIV_HOTBACKUP 360 /** NOTE! The following macros should be used instead of buf_page_get_gen, 361 to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed 362 in LA! */ 363 #define buf_page_get(ID, SIZE, LA, MTR) \ 364 buf_page_get_gen(ID, SIZE, LA, NULL, Page_fetch::NORMAL, __FILE__, __LINE__, \ 365 MTR) 366 /** Use these macros to bufferfix a page with no latching. Remember not to 367 read the contents of the page unless you know it is safe. Do not modify 368 the contents of the page! We have separated this case, because it is 369 error-prone programming not to set a latch, and it should be used 370 with care. */ 371 #define buf_page_get_with_no_latch(ID, SIZE, MTR) \ 372 buf_page_get_gen(ID, SIZE, RW_NO_LATCH, NULL, Page_fetch::NO_LATCH, \ 373 __FILE__, __LINE__, MTR) 374 375 /** This is the general function used to get optimistic access to a database 376 page. 377 @param[in] rw_latch RW_S_LATCH, RW_X_LATCH 378 @param[in,out] block guessed block 379 @param[in] modify_clock modify clock value 380 @param[in] fetch_mode Fetch mode 381 @param[in] file file name 382 @param[in] line line where called 383 @param[in,out] mtr mini-transaction 384 @return true if success */ 385 bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block, 386 uint64_t modify_clock, Page_fetch fetch_mode, 387 const char *file, ulint line, mtr_t *mtr); 388 389 /** This is used to get access to a known database page, when no waiting can be 390 done. 391 @param[in] rw_latch RW_S_LATCH or RW_X_LATCH. 392 @param[in] block The known page. 393 @param[in] hint Cache_hint::MAKE_YOUNG or Cache_hint::KEEP_OLD 394 @param[in] file File name from where it was called. 395 @param[in] line Line from where it was called. 396 @param[in,out] mtr Mini-transaction covering the fetch 397 @return true if success */ 398 bool buf_page_get_known_nowait(ulint rw_latch, buf_block_t *block, 399 Cache_hint hint, const char *file, ulint line, 400 mtr_t *mtr); 401 402 /** Given a tablespace id and page number tries to get that page. If the 403 page is not in the buffer pool it is not loaded and NULL is returned. 404 Suitable for using when holding the lock_sys latches (as it avoids deadlock). 405 @param[in] page_id page id 406 @param[in] file file name 407 @param[in] line line where called 408 @param[in] mtr mini-transaction 409 @return pointer to a page or NULL */ 410 const buf_block_t *buf_page_try_get_func(const page_id_t &page_id, 411 const char *file, ulint line, 412 mtr_t *mtr); 413 414 /** Given a tablespace id and page number tries to get that page. If the 415 page is not in the buffer pool it is not loaded and NULL is returned. 416 Suitable for using when holding the lock_sys latches (as it avoids deadlock). 417 @param[in] page_id page identifier 418 @param[in] mtr mini-transaction 419 @return the page if in buffer pool, NULL if not */ 420 #define buf_page_try_get(page_id, mtr) \ 421 buf_page_try_get_func((page_id), __FILE__, __LINE__, mtr); 422 423 /** Get read access to a compressed page (usually of type 424 FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2). 425 The page must be released with buf_page_release_zip(). 426 NOTE: the page is not protected by any latch. Mutual exclusion has to 427 be implemented at a higher level. In other words, all possible 428 accesses to a given page through this function must be protected by 429 the same set of mutexes or latches. 430 @param[in] page_id page id 431 @param[in] page_size page size 432 @return pointer to the block */ 433 buf_page_t *buf_page_get_zip(const page_id_t &page_id, 434 const page_size_t &page_size); 435 436 /** This is the general function used to get access to a database page. 437 @param[in] page_id page id 438 @param[in] page_size page size 439 @param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH 440 @param[in] guess guessed block or NULL 441 @param[in] mode Fetch mode. 442 @param[in] file file name 443 @param[in] line line where called 444 @param[in] mtr mini-transaction 445 @param[in] dirty_with_no_latch mark page as dirty even if page is being 446 pinned without any latch 447 @return pointer to the block or NULL */ 448 buf_block_t *buf_page_get_gen(const page_id_t &page_id, 449 const page_size_t &page_size, ulint rw_latch, 450 buf_block_t *guess, Page_fetch mode, 451 const char *file, ulint line, mtr_t *mtr, 452 bool dirty_with_no_latch = false); 453 454 /** Initializes a page to the buffer buf_pool. The page is usually not read 455 from a file even if it cannot be found in the buffer buf_pool. This is one 456 of the functions which perform to a block a state transition NOT_USED => 457 FILE_PAGE (the other is buf_page_get_gen). The page is latched by passed mtr. 458 @param[in] page_id page id 459 @param[in] page_size page size 460 @param[in] rw_latch RW_SX_LATCH, RW_X_LATCH 461 @param[in] mtr mini-transaction 462 @return pointer to the block, page bufferfixed */ 463 buf_block_t *buf_page_create(const page_id_t &page_id, 464 const page_size_t &page_size, 465 rw_lock_type_t rw_latch, mtr_t *mtr); 466 467 #else /* !UNIV_HOTBACKUP */ 468 469 /** Inits a page to the buffer buf_pool, for use in mysqlbackup --restore. 470 @param[in] page_id page id 471 @param[in] page_size page size 472 @param[in,out] block block to init */ 473 void meb_page_init(const page_id_t &page_id, const page_size_t &page_size, 474 buf_block_t *block); 475 #endif /* !UNIV_HOTBACKUP */ 476 477 #ifndef UNIV_HOTBACKUP 478 /** Releases a compressed-only page acquired with buf_page_get_zip(). */ 479 UNIV_INLINE 480 void buf_page_release_zip(buf_page_t *bpage); /*!< in: buffer block */ 481 482 /** Releases a latch, if specified. 483 @param[in] block buffer block 484 @param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ 485 UNIV_INLINE 486 void buf_page_release_latch(buf_block_t *block, ulint rw_latch); 487 488 /** Moves a page to the start of the buffer pool LRU list. This high-level 489 function can be used to prevent an important page from slipping out of 490 the buffer pool. 491 @param[in,out] bpage buffer block of a file page */ 492 void buf_page_make_young(buf_page_t *bpage); 493 494 /** Returns TRUE if the page can be found in the buffer pool hash table. 495 NOTE that it is possible that the page is not yet read from disk, 496 though. 497 @param[in] page_id page id 498 @return true if found in the page hash table */ 499 UNIV_INLINE 500 ibool buf_page_peek(const page_id_t &page_id); 501 502 #ifdef UNIV_DEBUG 503 504 /** Sets file_page_was_freed TRUE if the page is found in the buffer pool. 505 This function should be called when we free a file page and want the 506 debug version to check that it is not accessed any more unless 507 reallocated. 508 @param[in] page_id page id 509 @return control block if found in page hash table, otherwise NULL */ 510 buf_page_t *buf_page_set_file_page_was_freed(const page_id_t &page_id); 511 512 /** Sets file_page_was_freed FALSE if the page is found in the buffer pool. 513 This function should be called when we free a file page and want the 514 debug version to check that it is not accessed any more unless 515 reallocated. 516 @param[in] page_id page id 517 @return control block if found in page hash table, otherwise NULL */ 518 buf_page_t *buf_page_reset_file_page_was_freed(const page_id_t &page_id); 519 520 #endif /* UNIV_DEBUG */ 521 /** Reads the freed_page_clock of a buffer block. 522 @return freed_page_clock */ 523 UNIV_INLINE 524 ulint buf_page_get_freed_page_clock(const buf_page_t *bpage) /*!< in: block */ 525 MY_ATTRIBUTE((warn_unused_result)); 526 /** Reads the freed_page_clock of a buffer block. 527 @return freed_page_clock */ 528 UNIV_INLINE 529 ulint buf_block_get_freed_page_clock(const buf_block_t *block) /*!< in: block */ 530 MY_ATTRIBUTE((warn_unused_result)); 531 532 /** Tells, for heuristics, if a block is still close enough to the MRU end of 533 the LRU list meaning that it is not in danger of getting evicted and also 534 implying that it has been accessed recently. 535 The page must be either buffer-fixed, either its page hash must be locked. 536 @param[in] bpage block 537 @return true if block is close to MRU end of LRU */ 538 UNIV_INLINE 539 ibool buf_page_peek_if_young(const buf_page_t *bpage); 540 541 /** Recommends a move of a block to the start of the LRU list if there is 542 danger of dropping from the buffer pool. 543 NOTE: does not reserve the LRU list mutex. 544 @param[in] bpage block to make younger 545 @return true if should be made younger */ 546 UNIV_INLINE 547 ibool buf_page_peek_if_too_old(const buf_page_t *bpage); 548 549 /** Gets the youngest modification log sequence number for a frame. 550 Returns zero if not file page or no modification occurred yet. 551 @return newest modification to page */ 552 UNIV_INLINE 553 lsn_t buf_page_get_newest_modification( 554 const buf_page_t *bpage); /*!< in: block containing the 555 page frame */ 556 557 /** Increment the modify clock. 558 The caller must 559 (1) own the buf_pool->mutex and block bufferfix count has to be zero, 560 (2) own X or SX latch on the block->lock, or 561 (3) operate on a thread-private temporary table 562 @param[in,out] block buffer block */ 563 UNIV_INLINE 564 void buf_block_modify_clock_inc(buf_block_t *block); 565 566 /** Read the modify clock. 567 @param[in] block buffer block 568 @return modify_clock value */ 569 UNIV_INLINE 570 uint64_t buf_block_get_modify_clock(const buf_block_t *block); 571 572 /** Increments the bufferfix count. */ 573 #ifdef UNIV_DEBUG 574 /** 575 @param[in] file file name 576 @param[in] line line */ 577 #endif /* UNIV_DEBUG */ 578 /** 579 @param[in,out] block block to bufferfix */ 580 UNIV_INLINE 581 void buf_block_buf_fix_inc_func( 582 #ifdef UNIV_DEBUG 583 const char *file, ulint line, 584 #endif /* UNIV_DEBUG */ 585 buf_block_t *block); 586 587 /** Increments the bufferfix count. 588 @param[in,out] bpage block to bufferfix 589 @return the count */ 590 UNIV_INLINE 591 ulint buf_block_fix(buf_page_t *bpage); 592 593 /** Increments the bufferfix count. 594 @param[in,out] block block to bufferfix 595 @return the count */ 596 UNIV_INLINE 597 ulint buf_block_fix(buf_block_t *block); 598 599 /** Decrements the bufferfix count. 600 @param[in,out] bpage block to bufferunfix 601 @return the remaining buffer-fix count */ 602 UNIV_INLINE 603 ulint buf_block_unfix(buf_page_t *bpage); 604 #endif /* !UNIV_HOTBACKUP */ 605 /** Decrements the bufferfix count. 606 @param[in,out] block block to bufferunfix 607 @return the remaining buffer-fix count */ 608 UNIV_INLINE 609 ulint buf_block_unfix(buf_block_t *block); 610 611 #ifndef UNIV_HOTBACKUP 612 /** Unfixes the page, unlatches the page, 613 removes it from page_hash and removes it from LRU. 614 @param[in,out] bpage pointer to the block */ 615 void buf_read_page_handle_error(buf_page_t *bpage); 616 617 #ifdef UNIV_DEBUG 618 /** Increments the bufferfix count. 619 @param[in,out] b block to bufferfix 620 @param[in] f file name where requested 621 @param[in] l line number where requested */ 622 #define buf_block_buf_fix_inc(b, f, l) buf_block_buf_fix_inc_func(f, l, b) 623 #else /* UNIV_DEBUG */ 624 /** Increments the bufferfix count. 625 @param[in,out] b block to bufferfix 626 @param[in] f file name where requested 627 @param[in] l line number where requested */ 628 #define buf_block_buf_fix_inc(b, f, l) buf_block_buf_fix_inc_func(b) 629 #endif /* UNIV_DEBUG */ 630 #else /* !UNIV_HOTBACKUP */ 631 #define buf_block_modify_clock_inc(block) ((void)0) 632 #endif /* !UNIV_HOTBACKUP */ 633 634 #ifndef UNIV_HOTBACKUP 635 636 /** Gets the space id, page offset, and byte offset within page of a pointer 637 pointing to a buffer frame containing a file page. 638 @param[in] ptr pointer to a buffer frame 639 @param[out] space space id 640 @param[out] addr page offset and byte offset */ 641 UNIV_INLINE 642 void buf_ptr_get_fsp_addr(const void *ptr, space_id_t *space, fil_addr_t *addr); 643 644 /** Gets the hash value of a block. This can be used in searches in the 645 lock hash table. 646 @return lock hash value */ 647 UNIV_INLINE 648 ulint buf_block_get_lock_hash_val(const buf_block_t *block) /*!< in: block */ 649 MY_ATTRIBUTE((warn_unused_result)); 650 #ifdef UNIV_DEBUG 651 /** Finds a block in the buffer pool that points to a 652 given compressed page. Used only to confirm that buffer pool does not contain a 653 given pointer, thus protected by zip_free_mutex. 654 @param[in] buf_pool buffer pool instance 655 @param[in] data pointer to compressed page 656 @return buffer block pointing to the compressed page, or NULL */ 657 buf_block_t *buf_pool_contains_zip(buf_pool_t *buf_pool, const void *data); 658 #endif /* UNIV_DEBUG */ 659 660 /*********************************************************************** 661 FIXME_FTS: Gets the frame the pointer is pointing to. */ 662 UNIV_INLINE 663 buf_frame_t *buf_frame_align( 664 /* out: pointer to frame */ 665 byte *ptr); /* in: pointer to a frame */ 666 667 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG 668 /** Validates the buffer pool data structure. 669 @return true */ 670 ibool buf_validate(void); 671 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ 672 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG 673 /** Prints info of the buffer pool data structure. */ 674 void buf_print(void); 675 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */ 676 #endif /* !UNIV_HOTBACKUP */ 677 enum buf_page_print_flags { 678 /** Do not crash at the end of buf_page_print(). */ 679 BUF_PAGE_PRINT_NO_CRASH = 1, 680 /** Do not print the full page dump. */ 681 BUF_PAGE_PRINT_NO_FULL = 2 682 }; 683 684 /** Prints a page to stderr. 685 @param[in] read_buf a database page 686 @param[in] page_size page size 687 @param[in] flags 0 or BUF_PAGE_PRINT_NO_CRASH or 688 BUF_PAGE_PRINT_NO_FULL */ 689 void buf_page_print(const byte *read_buf, const page_size_t &page_size, 690 ulint flags); 691 692 /** Decompress a block. 693 @return true if successful */ 694 ibool buf_zip_decompress(buf_block_t *block, /*!< in/out: block */ 695 ibool check); /*!< in: TRUE=verify the page checksum */ 696 #ifndef UNIV_HOTBACKUP 697 #ifdef UNIV_DEBUG 698 /** Returns the number of latched pages in the buffer pool. 699 @return number of latched pages */ 700 ulint buf_get_latched_pages_number(void); 701 #endif /* UNIV_DEBUG */ 702 /** Returns the number of pending buf pool read ios. 703 @return number of pending read I/O operations */ 704 ulint buf_get_n_pending_read_ios(void); 705 /** Prints info of the buffer i/o. */ 706 void buf_print_io(FILE *file); /*!< in: file where to print */ 707 /** Collect buffer pool stats information for a buffer pool. Also 708 record aggregated stats if there are more than one buffer pool 709 in the server */ 710 void buf_stats_get_pool_info( 711 buf_pool_t *buf_pool, /*!< in: buffer pool */ 712 ulint pool_id, /*!< in: buffer pool ID */ 713 buf_pool_info_t *all_pool_info); /*!< in/out: buffer pool info 714 to fill */ 715 /** Return the ratio in percents of modified pages in the buffer pool / 716 database pages in the buffer pool. 717 @return modified page percentage ratio */ 718 double buf_get_modified_ratio_pct(void); 719 /** Refresh the statistics used to print per-second averages. */ 720 void buf_refresh_io_stats_all(); 721 722 /** Assert that all file pages in the buffer are in a replaceable state. */ 723 void buf_must_be_all_freed(void); 724 725 /** Checks that there currently are no pending i/o-operations for the buffer 726 pool. 727 @return number of pending i/o */ 728 ulint buf_pool_check_no_pending_io(void); 729 730 /** Invalidates the file pages in the buffer pool when an archive recovery is 731 completed. All the file pages buffered must be in a replaceable state when 732 this function is called: not latched and not modified. */ 733 void buf_pool_invalidate(void); 734 #endif /* !UNIV_HOTBACKUP */ 735 736 /*======================================================================== 737 --------------------------- LOWER LEVEL ROUTINES ------------------------- 738 =========================================================================*/ 739 740 #ifdef UNIV_DEBUG 741 /** Adds latch level info for the rw-lock protecting the buffer frame. This 742 should be called in the debug version after a successful latching of a page if 743 we know the latching order level of the acquired latch. 744 @param[in] block buffer page where we have acquired latch 745 @param[in] level latching order level */ 746 UNIV_INLINE 747 void buf_block_dbg_add_level(buf_block_t *block, latch_level_t level); 748 #else /* UNIV_DEBUG */ 749 #define buf_block_dbg_add_level(block, level) /* nothing */ 750 #endif /* UNIV_DEBUG */ 751 752 /** Gets the state of a block. 753 @return state */ 754 UNIV_INLINE 755 enum buf_page_state buf_page_get_state( 756 const buf_page_t *bpage); /*!< in: pointer to the control block */ 757 /** Gets the state of a block. 758 @return state */ 759 UNIV_INLINE 760 enum buf_page_state buf_block_get_state( 761 const buf_block_t *block) /*!< in: pointer to the control block */ 762 MY_ATTRIBUTE((warn_unused_result)); 763 764 /** Sets the state of a block. 765 @param[in,out] bpage pointer to control block 766 @param[in] state state */ 767 UNIV_INLINE 768 void buf_page_set_state(buf_page_t *bpage, enum buf_page_state state); 769 770 /** Sets the state of a block. 771 @param[in,out] block pointer to control block 772 @param[in] state state */ 773 UNIV_INLINE 774 void buf_block_set_state(buf_block_t *block, enum buf_page_state state); 775 776 /** Determines if a block is mapped to a tablespace. 777 @return true if mapped */ 778 UNIV_INLINE 779 ibool buf_page_in_file( 780 const buf_page_t *bpage) /*!< in: pointer to control block */ 781 MY_ATTRIBUTE((warn_unused_result)); 782 #ifndef UNIV_HOTBACKUP 783 /** Determines if a block should be on unzip_LRU list. 784 @return true if block belongs to unzip_LRU */ 785 UNIV_INLINE 786 bool buf_page_belongs_to_unzip_LRU( 787 const buf_page_t *bpage) /*!< in: pointer to control block */ 788 MY_ATTRIBUTE((warn_unused_result)); 789 790 /** Gets the mutex of a block. 791 @return pointer to mutex protecting bpage */ 792 UNIV_INLINE 793 BPageMutex *buf_page_get_mutex( 794 const buf_page_t *bpage) /*!< in: pointer to control block */ 795 MY_ATTRIBUTE((warn_unused_result)); 796 797 /** Get the flush type of a page. 798 @return flush type */ 799 UNIV_INLINE 800 buf_flush_t buf_page_get_flush_type( 801 const buf_page_t *bpage) /*!< in: buffer page */ 802 MY_ATTRIBUTE((warn_unused_result)); 803 804 /** Set the flush type of a page. 805 @param[in] bpage buffer page 806 @param[in] flush_type flush type */ 807 UNIV_INLINE 808 void buf_page_set_flush_type(buf_page_t *bpage, buf_flush_t flush_type); 809 810 /** Map a block to a file page. 811 @param[in,out] block pointer to control block 812 @param[in] page_id page id */ 813 UNIV_INLINE 814 void buf_block_set_file_page(buf_block_t *block, const page_id_t &page_id); 815 816 /** Gets the io_fix state of a block. 817 @return io_fix state */ 818 UNIV_INLINE 819 enum buf_io_fix buf_page_get_io_fix( 820 const buf_page_t *bpage) /*!< in: pointer to the control block */ 821 MY_ATTRIBUTE((warn_unused_result)); 822 /** Gets the io_fix state of a block. 823 @return io_fix state */ 824 UNIV_INLINE 825 enum buf_io_fix buf_block_get_io_fix( 826 const buf_block_t *block) /*!< in: pointer to the control block */ 827 MY_ATTRIBUTE((warn_unused_result)); 828 829 /** Sets the io_fix state of a block. 830 @param[in,out] bpage control block 831 @param[in] io_fix io_fix state */ 832 UNIV_INLINE 833 void buf_page_set_io_fix(buf_page_t *bpage, enum buf_io_fix io_fix); 834 835 /** Sets the io_fix state of a block. 836 @param[in,out] block control block 837 @param[in] io_fix io_fix state */ 838 UNIV_INLINE 839 void buf_block_set_io_fix(buf_block_t *block, enum buf_io_fix io_fix); 840 841 /** Makes a block sticky. A sticky block implies that even after we release 842 the buf_pool->LRU_list_mutex and the block->mutex: 843 * it cannot be removed from the flush_list 844 * the block descriptor cannot be relocated 845 * it cannot be removed from the LRU list 846 Note that: 847 * the block can still change its position in the LRU list 848 * the next and previous pointers can change. 849 @param[in,out] bpage control block */ 850 UNIV_INLINE 851 void buf_page_set_sticky(buf_page_t *bpage); 852 853 /** Removes stickiness of a block. */ 854 UNIV_INLINE 855 void buf_page_unset_sticky(buf_page_t *bpage); /*!< in/out: control block */ 856 /** Determine if a buffer block can be relocated in memory. The block 857 can be dirty, but it must not be I/O-fixed or bufferfixed. */ 858 UNIV_INLINE 859 ibool buf_page_can_relocate( 860 const buf_page_t *bpage) /*!< control block being relocated */ 861 MY_ATTRIBUTE((warn_unused_result)); 862 863 /** Determine if a block has been flagged old. 864 @param[in] bpage control block 865 @return true if old */ 866 UNIV_INLINE 867 ibool buf_page_is_old(const buf_page_t *bpage) 868 MY_ATTRIBUTE((warn_unused_result)); 869 870 /** Flag a block old. 871 @param[in,out] bpage control block 872 @param[in] old old */ 873 UNIV_INLINE 874 void buf_page_set_old(buf_page_t *bpage, ibool old); 875 876 /** Determine the time of first access of a block in the buffer pool. 877 @return ut_time_monotonic_ms() at the time of first access, 0 if not accessed 878 */ 879 UNIV_INLINE 880 unsigned buf_page_is_accessed(const buf_page_t *bpage) /*!< in: control block */ 881 MY_ATTRIBUTE((warn_unused_result)); 882 /** Flag a block accessed. */ 883 UNIV_INLINE 884 void buf_page_set_accessed(buf_page_t *bpage); /*!< in/out: control block */ 885 886 /** Gets the buf_block_t handle of a buffered file block if an uncompressed 887 page frame exists, or NULL. page frame exists, or NULL. The caller must hold 888 either the appropriate hash lock in any mode, either the LRU list mutex. Note: 889 even though bpage is not declared a const we don't update its value. It is safe 890 to make this pure. 891 @param[in] bpage control block, or NULL 892 @return control block, or NULL */ 893 UNIV_INLINE 894 buf_block_t *buf_page_get_block(buf_page_t *bpage) 895 MY_ATTRIBUTE((warn_unused_result)); 896 #ifdef UNIV_DEBUG 897 /** Gets a pointer to the memory frame of a block. 898 @return pointer to the frame */ 899 UNIV_INLINE 900 buf_frame_t *buf_block_get_frame( 901 const buf_block_t *block) /*!< in: pointer to the control block */ 902 MY_ATTRIBUTE((warn_unused_result)); 903 #else /* UNIV_DEBUG */ 904 #define buf_block_get_frame(block) (block)->frame 905 #endif /* UNIV_DEBUG */ 906 #else /* !UNIV_HOTBACKUP */ 907 #define buf_block_get_frame(block) (block)->frame 908 #endif /* !UNIV_HOTBACKUP */ 909 /** Gets the compressed page descriptor corresponding to an uncompressed page 910 if applicable. */ 911 #define buf_block_get_page_zip(block) \ 912 ((block)->page.zip.data ? &(block)->page.zip : NULL) 913 914 /** Get a buffer block from an adaptive hash index pointer. 915 This function does not return if the block is not identified. 916 @param[in] ptr pointer to within a page frame 917 @return pointer to block, never NULL */ 918 buf_block_t *buf_block_from_ahi(const byte *ptr); 919 920 #ifndef UNIV_HOTBACKUP 921 /** Find out if a pointer belongs to a buf_block_t. It can be a pointer to 922 the buf_block_t itself or a member of it 923 @return true if ptr belongs to a buf_block_t struct */ 924 ibool buf_pointer_is_block_field(const void *ptr); /*!< in: pointer not 925 dereferenced */ 926 /** Find out if a pointer corresponds to a buf_block_t::mutex. 927 @param m in: mutex candidate 928 @return true if m is a buf_block_t::mutex */ 929 #define buf_pool_is_block_mutex(m) buf_pointer_is_block_field((const void *)(m)) 930 /** Find out if a pointer corresponds to a buf_block_t::lock. 931 @param l in: rw-lock candidate 932 @return true if l is a buf_block_t::lock */ 933 #define buf_pool_is_block_lock(l) buf_pointer_is_block_field((const void *)(l)) 934 935 /** Inits a page for read to the buffer buf_pool. If the page is 936 (1) already in buf_pool, or 937 (2) if we specify to read only ibuf pages and the page is not an ibuf page, or 938 (3) if the space is deleted or being deleted, 939 then this function does nothing. 940 Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock 941 on the buffer frame. The io-handler must take care that the flag is cleared 942 and the lock released later. 943 @param[out] err DB_SUCCESS or DB_TABLESPACE_DELETED 944 @param[in] mode BUF_READ_IBUF_PAGES_ONLY, ... 945 @param[in] page_id page id 946 @param[in] page_size page size 947 @param[in] unzip TRUE=request uncompressed page 948 @return pointer to the block or NULL */ 949 buf_page_t *buf_page_init_for_read(dberr_t *err, ulint mode, 950 const page_id_t &page_id, 951 const page_size_t &page_size, ibool unzip); 952 953 /** Completes an asynchronous read or write request of a file page to or from 954 the buffer pool. 955 @param[in] bpage pointer to the block in question 956 @param[in] evict whether or not to evict the page from LRU list 957 @return true if successful */ 958 bool buf_page_io_complete(buf_page_t *bpage, bool evict); 959 960 /** Calculates the index of a buffer pool to the buf_pool[] array. 961 @return the position of the buffer pool in buf_pool[] */ 962 UNIV_INLINE 963 ulint buf_pool_index(const buf_pool_t *buf_pool) /*!< in: buffer pool */ 964 MY_ATTRIBUTE((warn_unused_result)); 965 /** Returns the buffer pool instance given a page instance 966 @return buf_pool */ 967 UNIV_INLINE 968 buf_pool_t *buf_pool_from_bpage( 969 const buf_page_t *bpage); /*!< in: buffer pool page */ 970 /** Returns the buffer pool instance given a block instance 971 @return buf_pool */ 972 UNIV_INLINE 973 buf_pool_t *buf_pool_from_block(const buf_block_t *block); /*!< in: block */ 974 975 /** Returns the buffer pool instance given a page id. 976 @param[in] page_id page id 977 @return buffer pool */ 978 UNIV_INLINE 979 buf_pool_t *buf_pool_get(const page_id_t &page_id); 980 981 /** Returns the buffer pool instance given its array index 982 @return buffer pool */ 983 UNIV_INLINE 984 buf_pool_t *buf_pool_from_array(ulint index); /*!< in: array index to get 985 buffer pool instance from */ 986 987 /** Returns the control block of a file page, NULL if not found. 988 @param[in] buf_pool buffer pool instance 989 @param[in] page_id page id 990 @return block, NULL if not found */ 991 UNIV_INLINE 992 buf_page_t *buf_page_hash_get_low(buf_pool_t *buf_pool, 993 const page_id_t &page_id); 994 995 /** Returns the control block of a file page, NULL if not found. 996 If the block is found and lock is not NULL then the appropriate 997 page_hash lock is acquired in the specified lock mode. Otherwise, 998 mode value is ignored. It is up to the caller to release the 999 lock. If the block is found and the lock is NULL then the page_hash 1000 lock is released by this function. 1001 @param[in] buf_pool buffer pool instance 1002 @param[in] page_id page id 1003 @param[in,out] lock lock of the page hash acquired if bpage is 1004 found, NULL otherwise. If NULL is passed then the hash_lock is released by 1005 this function. 1006 @param[in] lock_mode RW_LOCK_X or RW_LOCK_S. Ignored if 1007 lock == NULL 1008 @param[in] watch if true, return watch sentinel also. 1009 @return pointer to the bpage or NULL; if NULL, lock is also NULL or 1010 a watch sentinel. */ 1011 UNIV_INLINE 1012 buf_page_t *buf_page_hash_get_locked(buf_pool_t *buf_pool, 1013 const page_id_t &page_id, rw_lock_t **lock, 1014 ulint lock_mode, bool watch = false); 1015 1016 /** Returns the control block of a file page, NULL if not found. 1017 If the block is found and lock is not NULL then the appropriate 1018 page_hash lock is acquired in the specified lock mode. Otherwise, 1019 mode value is ignored. It is up to the caller to release the 1020 lock. If the block is found and the lock is NULL then the page_hash 1021 lock is released by this function. 1022 @param[in] buf_pool buffer pool instance 1023 @param[in] page_id page id 1024 @param[in,out] lock lock of the page hash acquired if bpage is 1025 found, NULL otherwise. If NULL is passed then the hash_lock is released by 1026 this function. 1027 @param[in] lock_mode RW_LOCK_X or RW_LOCK_S. Ignored if 1028 lock == NULL 1029 @return pointer to the block or NULL; if NULL, lock is also NULL. */ 1030 UNIV_INLINE 1031 buf_block_t *buf_block_hash_get_locked(buf_pool_t *buf_pool, 1032 const page_id_t &page_id, 1033 rw_lock_t **lock, ulint lock_mode); 1034 1035 /* There are four different ways we can try to get a bpage or block 1036 from the page hash: 1037 1) Caller already holds the appropriate page hash lock: in the case call 1038 buf_page_hash_get_low() function. 1039 2) Caller wants to hold page hash lock in x-mode 1040 3) Caller wants to hold page hash lock in s-mode 1041 4) Caller doesn't want to hold page hash lock */ 1042 #define buf_page_hash_get_s_locked(b, page_id, l) \ 1043 buf_page_hash_get_locked(b, page_id, l, RW_LOCK_S) 1044 #define buf_page_hash_get_x_locked(b, page_id, l) \ 1045 buf_page_hash_get_locked(b, page_id, l, RW_LOCK_X) 1046 #define buf_page_hash_get(b, page_id) \ 1047 buf_page_hash_get_locked(b, page_id, NULL, 0) 1048 #define buf_page_get_also_watch(b, page_id) \ 1049 buf_page_hash_get_locked(b, page_id, NULL, 0, true) 1050 1051 #define buf_block_hash_get_s_locked(b, page_id, l) \ 1052 buf_block_hash_get_locked(b, page_id, l, RW_LOCK_S) 1053 #define buf_block_hash_get_x_locked(b, page_id, l) \ 1054 buf_block_hash_get_locked(b, page_id, l, RW_LOCK_X) 1055 #define buf_block_hash_get(b, page_id) \ 1056 buf_block_hash_get_locked(b, page_id, NULL, 0) 1057 1058 /** Gets the current length of the free list of buffer blocks. 1059 @return length of the free list */ 1060 ulint buf_get_free_list_len(void); 1061 1062 /** Determine if a block is a sentinel for a buffer pool watch. 1063 @return true if a sentinel for a buffer pool watch, false if not */ 1064 ibool buf_pool_watch_is_sentinel( 1065 const buf_pool_t *buf_pool, /*!< buffer pool instance */ 1066 const buf_page_t *bpage) /*!< in: block */ 1067 MY_ATTRIBUTE((warn_unused_result)); 1068 1069 /** Stop watching if the page has been read in. 1070 buf_pool_watch_set(space,offset) must have returned NULL before. 1071 @param[in] page_id page id */ 1072 void buf_pool_watch_unset(const page_id_t &page_id); 1073 1074 /** Check if the page has been read in. 1075 This may only be called after buf_pool_watch_set(space,offset) 1076 has returned NULL and before invoking buf_pool_watch_unset(space,offset). 1077 @param[in] page_id page id 1078 @return false if the given page was not read in, true if it was */ 1079 ibool buf_pool_watch_occurred(const page_id_t &page_id) 1080 MY_ATTRIBUTE((warn_unused_result)); 1081 1082 /** Get total buffer pool statistics. */ 1083 void buf_get_total_list_len( 1084 ulint *LRU_len, /*!< out: length of all LRU lists */ 1085 ulint *free_len, /*!< out: length of all free lists */ 1086 ulint *flush_list_len); /*!< out: length of all flush lists */ 1087 /** Get total list size in bytes from all buffer pools. */ 1088 void buf_get_total_list_size_in_bytes( 1089 buf_pools_list_size_t *buf_pools_list_size); /*!< out: list sizes 1090 in all buffer pools */ 1091 /** Get total buffer pool statistics. */ 1092 void buf_get_total_stat( 1093 buf_pool_stat_t *tot_stat); /*!< out: buffer pool stats */ 1094 1095 /** Get the nth chunk's buffer block in the specified buffer pool. 1096 @param[in] buf_pool buffer pool instance 1097 @param[in] n nth chunk in the buffer pool 1098 @param[in] chunk_size chunk_size 1099 @return the nth chunk's buffer block. */ 1100 UNIV_INLINE 1101 buf_block_t *buf_get_nth_chunk_block(const buf_pool_t *buf_pool, ulint n, 1102 ulint *chunk_size); 1103 1104 /** Verify the possibility that a stored page is not in buffer pool. 1105 @param[in] withdraw_clock withdraw clock when stored the page 1106 @retval true if the page might be relocated */ 1107 UNIV_INLINE 1108 bool buf_pool_is_obsolete(ulint withdraw_clock); 1109 1110 /** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit, 1111 if needed. 1112 @param[in] size size in bytes 1113 @return aligned size */ 1114 UNIV_INLINE 1115 ulint buf_pool_size_align(ulint size); 1116 1117 /** Adjust the proposed chunk unit size so that it satisfies all invariants 1118 @param[in] size proposed size of buffer pool chunk unit in bytes 1119 @return adjusted size which meets invariants */ 1120 ulonglong buf_pool_adjust_chunk_unit(ulonglong size); 1121 1122 /** Calculate the checksum of a page from compressed table and update the 1123 page. 1124 @param[in,out] page page to update 1125 @param[in] size compressed page size 1126 @param[in] lsn LSN to stamp on the page 1127 @param[in] skip_lsn_check true to skip check for lsn (in DEBUG) */ 1128 void buf_flush_update_zip_checksum(buf_frame_t *page, ulint size, lsn_t lsn, 1129 bool skip_lsn_check); 1130 1131 #endif /* !UNIV_HOTBACKUP */ 1132 1133 /** Return how many more pages must be added to the withdraw list to reach the 1134 withdraw target of the currently ongoing buffer pool resize. 1135 @param[in] buf_pool buffer pool instance 1136 @return page count to be withdrawn or zero if the target is already achieved or 1137 if the buffer pool is not currently being resized. */ 1138 UNIV_INLINE 1139 ulint buf_get_withdraw_depth(buf_pool_t *buf_pool); 1140 1141 /** Gets the io_fix state of a buffer block. Does not assert that the 1142 buf_page_get_mutex() mutex is held, to be used in the cases where it is safe 1143 not to hold it. 1144 @param[in] block pointer to the buffer block 1145 @return page io_fix state */ 1146 UNIV_INLINE 1147 buf_io_fix buf_block_get_io_fix_unlocked(const buf_block_t *block) 1148 MY_ATTRIBUTE((warn_unused_result)); 1149 1150 /** Gets the io_fix state of a buffer page. Does not assert that the 1151 buf_page_get_mutex() mutex is held, to be used in the cases where it is safe 1152 not to hold it. 1153 @param[in] bpage pointer to the buffer page 1154 @return page io_fix state */ 1155 UNIV_INLINE 1156 enum buf_io_fix buf_page_get_io_fix_unlocked(const buf_page_t *bpage) 1157 MY_ATTRIBUTE((warn_unused_result)); 1158 1159 /** The common buffer control block structure 1160 for compressed and uncompressed frames */ 1161 1162 /** Number of bits used for buffer page states. */ 1163 #define BUF_PAGE_STATE_BITS 3 1164 1165 class buf_page_t { 1166 public: 1167 /** Set the doublewrite buffer ID. 1168 @param[in] batch_id Double write batch ID which flushed the page. */ set_dblwr_batch_id(uint16_t batch_id)1169 void set_dblwr_batch_id(uint16_t batch_id) { m_dblwr_id = batch_id; } 1170 1171 /** @return the double write batch id, or uint16_t max if undefined. */ get_dblwr_batch_id()1172 uint16_t get_dblwr_batch_id() const MY_ATTRIBUTE((warn_unused_result)) { 1173 return (m_dblwr_id); 1174 } 1175 1176 /** @name General fields 1177 None of these bit-fields must be modified without holding 1178 buf_page_get_mutex() [buf_block_t::mutex or 1179 buf_pool->zip_mutex], since they can be stored in the same 1180 machine word. */ 1181 /* @{ */ 1182 1183 /** Page id. */ 1184 page_id_t id; 1185 1186 /** Page size. */ 1187 page_size_t size; 1188 1189 /** Count of how manyfold this block is currently bufferfixed. */ 1190 uint32_t buf_fix_count; 1191 1192 /** type of pending I/O operation. */ 1193 buf_io_fix io_fix; 1194 1195 /** Block state. @see buf_page_in_file */ 1196 buf_page_state state; 1197 1198 /** if this block is currently being flushed to disk, this tells 1199 the flush_type. @see buf_flush_t */ 1200 unsigned flush_type : 2; 1201 1202 /** index number of the buffer pool that this block belongs to */ 1203 unsigned buf_pool_index : 6; 1204 1205 static_assert(MAX_BUFFER_POOLS <= 64, 1206 "MAX_BUFFER_POOLS > 64; redefine buf_pool_index"); 1207 1208 /* @} */ 1209 /** compressed page; zip.data (but not the data it points to) is 1210 protected by buf_pool->zip_mutex; state == BUF_BLOCK_ZIP_PAGE and 1211 zip.data == NULL means an active buf_pool->watch */ 1212 page_zip_des_t zip; 1213 1214 #ifndef UNIV_HOTBACKUP 1215 /** node used in chaining to buf_pool->page_hash or buf_pool->zip_hash */ 1216 buf_page_t *hash; 1217 #endif /* !UNIV_HOTBACKUP */ 1218 #ifdef UNIV_DEBUG 1219 /** TRUE if in buf_pool->page_hash */ 1220 bool in_page_hash; 1221 1222 /** TRUE if in buf_pool->zip_hash */ 1223 bool in_zip_hash; 1224 #endif /* UNIV_DEBUG */ 1225 1226 /** @name Page flushing fields 1227 All these are protected by buf_pool->mutex. */ 1228 /* @{ */ 1229 1230 UT_LIST_NODE_T(buf_page_t) list; 1231 /** Based on state, this is a 1232 list node, protected by the 1233 corresponding list mutex, in one of the 1234 following lists in buf_pool: 1235 1236 - BUF_BLOCK_NOT_USED: free, withdraw 1237 - BUF_BLOCK_FILE_PAGE: flush_list 1238 - BUF_BLOCK_ZIP_DIRTY: flush_list 1239 - BUF_BLOCK_ZIP_PAGE: zip_clean 1240 1241 The node pointers are protected by the 1242 corresponding list mutex. 1243 1244 The contents of the list node 1245 is undefined if !in_flush_list 1246 && state == BUF_BLOCK_FILE_PAGE, 1247 or if state is one of 1248 BUF_BLOCK_MEMORY, 1249 BUF_BLOCK_REMOVE_HASH or 1250 BUF_BLOCK_READY_IN_USE. */ 1251 1252 #ifdef UNIV_DEBUG 1253 /** TRUE if in buf_pool->flush_list; when buf_pool->flush_list_mutex is free, 1254 the following should hold: 1255 in_flush_list == (state == BUF_BLOCK_FILE_PAGE || 1256 state == BUF_BLOCK_ZIP_DIRTY) 1257 Writes to this field must be covered by both block->mutex and 1258 buf_pool->flush_list_mutex. Hence reads can happen while holding any one 1259 of the two mutexes */ 1260 bool in_flush_list; 1261 1262 bool in_free_list; 1263 /** TRUE if in buf_pool->free; when buf_pool->free_list_mutex is free, the 1264 following should hold: in_free_list == (state == BUF_BLOCK_NOT_USED) */ 1265 #endif /* UNIV_DEBUG */ 1266 1267 lsn_t newest_modification; 1268 1269 /** log sequence number of the youngest modification to this block, zero 1270 if not modified. Protected by block mutex */ 1271 lsn_t oldest_modification; 1272 1273 /** log sequence number of the START of the log entry written of the oldest 1274 modification to this block which has not yet been flushed on disk; zero if all 1275 modifications are on disk. Writes to this field must be covered by both 1276 block->mutex and buf_pool->flush_list_mutex. Hence reads can happen while 1277 holding any one of the two mutexes */ 1278 /* @} */ 1279 1280 /** @name LRU replacement algorithm fields 1281 These fields are protected by both buf_pool->LRU_list_mutex and the 1282 block mutex. */ 1283 /* @{ */ 1284 1285 UT_LIST_NODE_T(buf_page_t) LRU; 1286 /** node of the LRU list */ 1287 #ifdef UNIV_DEBUG 1288 /** TRUE if the page is in the LRU list; used in debugging */ 1289 bool in_LRU_list; 1290 #endif /* UNIV_DEBUG */ 1291 1292 #ifndef UNIV_HOTBACKUP 1293 1294 /** true if the block is in the old blocks in buf_pool->LRU_old */ 1295 unsigned old : 1; 1296 1297 /** The value of buf_pool->freed_page_clock when this block was the last 1298 time put to the head of the LRU list; a thread is allowed to read this 1299 for heuristic purposes without holding any mutex or latch */ 1300 unsigned freed_page_clock : 31; 1301 1302 /* @} */ 1303 /** Time of first access, or 0 if the block was never accessed in the 1304 buffer pool. Protected by block mutex */ 1305 unsigned access_time; 1306 1307 #ifdef UNIV_DEBUG 1308 /** This is set to TRUE when fsp frees a page in buffer pool; 1309 protected by buf_pool->zip_mutex or buf_block_t::mutex. */ 1310 bool file_page_was_freed; 1311 #endif /* UNIV_DEBUG */ 1312 1313 /** Flush observer */ 1314 FlushObserver *flush_observer; 1315 1316 #endif /* !UNIV_HOTBACKUP */ 1317 1318 /** Double write instance ordinal value during writes. This is used 1319 by IO completion (writes) to select the double write instance.*/ 1320 uint16_t m_dblwr_id{}; 1321 }; 1322 1323 /** The buffer control block structure */ 1324 1325 struct buf_block_t { 1326 /** @name General fields */ 1327 /* @{ */ 1328 1329 /** page information; this must be the first field, so 1330 that buf_pool->page_hash can point to buf_page_t or buf_block_t */ 1331 buf_page_t page; 1332 1333 /** pointer to buffer frame which is of size UNIV_PAGE_SIZE, and aligned 1334 to an address divisible by UNIV_PAGE_SIZE */ 1335 byte *frame; 1336 1337 #ifndef UNIV_HOTBACKUP 1338 /** read-write lock of the buffer frame */ 1339 BPageLock lock; 1340 1341 #endif /* UNIV_HOTBACKUP */ 1342 1343 /** node of the decompressed LRU list; a block is in the unzip_LRU list if 1344 page.state == BUF_BLOCK_FILE_PAGE and page.zip.data != NULL. Protected by 1345 both LRU_list_mutex and the block mutex. */ 1346 UT_LIST_NODE_T(buf_block_t) unzip_LRU; 1347 #ifdef UNIV_DEBUG 1348 1349 /** TRUE if the page is in the decompressed LRU list; used in debugging */ 1350 bool in_unzip_LRU_list; 1351 1352 bool in_withdraw_list; 1353 #endif /* UNIV_DEBUG */ 1354 1355 /** hashed value of the page address in the record lock hash table; 1356 protected by buf_block_t::lock (or buf_block_t::mutex in buf_page_get_gen(), 1357 buf_page_init_for_read() and buf_page_create()) */ 1358 unsigned lock_hash_val : 32; 1359 /* @} */ 1360 /** @name Optimistic search field */ 1361 /* @{ */ 1362 1363 /** This clock is incremented every time a pointer to a record on the page 1364 may become obsolete; this is used in the optimistic cursor positioning: if 1365 the modify clock has not changed, we know that the pointer is still valid; 1366 this field may be changed if the thread (1) owns the LRU list mutex and the 1367 page is not bufferfixed, or (2) the thread has an x-latch on the block, 1368 or (3) the block must belong to an intrinsic table */ 1369 uint64_t modify_clock; 1370 1371 /* @} */ 1372 /** @name Hash search fields (unprotected) 1373 NOTE that these fields are NOT protected by any semaphore! */ 1374 /* @{ */ 1375 1376 /** counter which controls building of a new hash index for the page */ 1377 ulint n_hash_helps; 1378 1379 /** recommended prefix length for hash search: number of bytes in an 1380 incomplete last field */ 1381 volatile ulint n_bytes; 1382 1383 /** recommended prefix length for hash search: number of full fields */ 1384 volatile ulint n_fields; 1385 1386 /** true or false, depending on whether the leftmost record of several 1387 records with the same prefix should be indexed in the hash index */ 1388 volatile bool left_side; 1389 /* @} */ 1390 1391 /** @name Hash search fields 1392 These 5 fields may only be modified when: 1393 we are holding the appropriate x-latch in btr_search_latches[], and 1394 one of the following holds: 1395 (1) the block state is BUF_BLOCK_FILE_PAGE, and 1396 we are holding an s-latch or x-latch on buf_block_t::lock, or 1397 (2) buf_block_t::buf_fix_count == 0, or 1398 (3) the block state is BUF_BLOCK_REMOVE_HASH. 1399 1400 An exception to this is when we init or create a page 1401 in the buffer pool in buf0buf.cc. 1402 1403 Another exception for buf_pool_clear_hash_index() is that 1404 assigning block->index = NULL (and block->n_pointers = 0) 1405 is allowed whenever btr_search_own_all(RW_LOCK_X). 1406 1407 Another exception is that ha_insert_for_fold_func() may 1408 decrement n_pointers without holding the appropriate latch 1409 in btr_search_latches[]. Thus, n_pointers must be 1410 protected by atomic memory access. 1411 1412 This implies that the fields may be read without race 1413 condition whenever any of the following hold: 1414 - the btr_search_latches[] s-latch or x-latch is being held, or 1415 - the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH, 1416 and holding some latch prevents the state from changing to that. 1417 1418 Some use of assert_block_ahi_empty() or assert_block_ahi_valid() 1419 is prone to race conditions while buf_pool_clear_hash_index() is 1420 executing (the adaptive hash index is being disabled). Such use 1421 is explicitly commented. */ 1422 1423 /* @{ */ 1424 1425 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG 1426 /** used in debugging: the number of pointers in the adaptive hash index 1427 pointing to this frame; protected by atomic memory access or 1428 btr_search_own_all(). */ 1429 ulint n_pointers; 1430 1431 #define assert_block_ahi_empty(block) \ 1432 ut_a(os_atomic_increment_ulint(&(block)->n_pointers, 0) == 0) 1433 #define assert_block_ahi_empty_on_init(block) \ 1434 do { \ 1435 UNIV_MEM_VALID(&(block)->n_pointers, sizeof(block)->n_pointers); \ 1436 assert_block_ahi_empty(block); \ 1437 } while (0) 1438 1439 #define assert_block_ahi_valid(block) \ 1440 ut_a((block)->index || \ 1441 os_atomic_increment_ulint(&(block)->n_pointers, 0) == 0) 1442 #else /* UNIV_AHI_DEBUG || UNIV_DEBUG */ 1443 #define assert_block_ahi_empty(block) /* nothing */ 1444 #define assert_block_ahi_empty_on_init(block) /* nothing */ 1445 #define assert_block_ahi_valid(block) /* nothing */ 1446 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ 1447 1448 /** prefix length for hash indexing: number of full fields */ 1449 unsigned curr_n_fields : 10; 1450 1451 /** number of bytes in hash indexing */ 1452 unsigned curr_n_bytes : 15; 1453 1454 /** TRUE or FALSE in hash indexing */ 1455 unsigned curr_left_side : 1; 1456 1457 /** Index for which the adaptive hash index has been created, or NULL if 1458 the page does not exist in the index. Note that it does not guarantee that 1459 the index is complete, though: there may have been hash collisions, record 1460 deletions, etc. */ 1461 dict_index_t *index; 1462 1463 /* @} */ 1464 /** true if block has been made dirty without acquiring X/SX latch as the 1465 block belongs to temporary tablespace and block is always accessed by a 1466 single thread. */ 1467 bool made_dirty_with_no_latch; 1468 1469 #ifndef UNIV_HOTBACKUP 1470 #ifdef UNIV_DEBUG 1471 /** @name Debug fields */ 1472 /* @{ */ 1473 /** In the debug version, each thread which bufferfixes the block acquires 1474 an s-latch here; so we can use the debug utilities in sync0rw */ 1475 rw_lock_t debug_latch; 1476 /* @} */ 1477 #endif /* UNIV_DEBUG */ 1478 #endif /* !UNIV_HOTBACKUP */ 1479 1480 /** mutex protecting this block: state (also protected by the buffer 1481 pool mutex), io_fix, buf_fix_count, and accessed; we introduce this 1482 new mutex in InnoDB-5.1 to relieve contention on the buffer pool mutex */ 1483 BPageMutex mutex; 1484 1485 /** Get the page number and space id of the current buffer block. 1486 @return page number of the current buffer block. */ get_page_idbuf_block_t1487 const page_id_t &get_page_id() const { return page.id; } 1488 1489 /** Get the page number of the current buffer block. 1490 @return page number of the current buffer block. */ get_page_nobuf_block_t1491 page_no_t get_page_no() const { return (page.id.page_no()); } 1492 1493 /** Get the next page number of the current buffer block. 1494 @return next page number of the current buffer block. */ get_next_page_nobuf_block_t1495 page_no_t get_next_page_no() const { 1496 return (mach_read_from_4(frame + FIL_PAGE_NEXT)); 1497 } 1498 1499 /** Get the prev page number of the current buffer block. 1500 @return prev page number of the current buffer block. */ get_prev_page_nobuf_block_t1501 page_no_t get_prev_page_no() const { 1502 return (mach_read_from_4(frame + FIL_PAGE_PREV)); 1503 } 1504 1505 /** Get the page type of the current buffer block. 1506 @return page type of the current buffer block. */ get_page_typebuf_block_t1507 page_type_t get_page_type() const { 1508 return (mach_read_from_2(frame + FIL_PAGE_TYPE)); 1509 } 1510 1511 /** Get the page type of the current buffer block as string. 1512 @return page type of the current buffer block as string. */ 1513 const char *get_page_type_str() const; 1514 }; 1515 1516 /** Check if a buf_block_t object is in a valid state 1517 @param block buffer block 1518 @return true if valid */ 1519 #define buf_block_state_valid(block) \ 1520 (buf_block_get_state(block) >= BUF_BLOCK_NOT_USED && \ 1521 (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH)) 1522 1523 /** Compute the hash fold value for blocks in buf_pool->zip_hash. */ 1524 /* @{ */ 1525 #define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint)(ptr) / UNIV_PAGE_SIZE) 1526 #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame) 1527 #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t *)(b)) 1528 /* @} */ 1529 1530 /** A "Hazard Pointer" class used to iterate over page lists 1531 inside the buffer pool. A hazard pointer is a buf_page_t pointer 1532 which we intend to iterate over next and we want it remain valid 1533 even after we release the buffer pool mutex. */ 1534 class HazardPointer { 1535 public: 1536 /** Constructor 1537 @param buf_pool buffer pool instance 1538 @param mutex mutex that is protecting the hp. */ HazardPointer(const buf_pool_t * buf_pool,const ib_mutex_t * mutex)1539 HazardPointer(const buf_pool_t *buf_pool, const ib_mutex_t *mutex) 1540 : m_buf_pool(buf_pool) 1541 #ifdef UNIV_DEBUG 1542 , 1543 m_mutex(mutex) 1544 #endif /* UNIV_DEBUG */ 1545 , 1546 m_hp() { 1547 } 1548 1549 /** Destructor */ ~HazardPointer()1550 virtual ~HazardPointer() {} 1551 1552 /** Get current value */ get()1553 buf_page_t *get() const { 1554 ut_ad(mutex_own(m_mutex)); 1555 return (m_hp); 1556 } 1557 1558 /** Set current value 1559 @param bpage buffer block to be set as hp */ 1560 void set(buf_page_t *bpage); 1561 1562 /** Checks if a bpage is the hp 1563 @param bpage buffer block to be compared 1564 @return true if it is hp */ 1565 bool is_hp(const buf_page_t *bpage); 1566 1567 /** Adjust the value of hp. This happens when some 1568 other thread working on the same list attempts to 1569 remove the hp from the list. Must be implemented 1570 by the derived classes. 1571 @param bpage buffer block to be compared */ 1572 virtual void adjust(const buf_page_t *bpage) = 0; 1573 1574 protected: 1575 /** Disable copying */ 1576 HazardPointer(const HazardPointer &); 1577 HazardPointer &operator=(const HazardPointer &); 1578 1579 /** Buffer pool instance */ 1580 const buf_pool_t *m_buf_pool; 1581 1582 #ifdef UNIV_DEBUG 1583 /** mutex that protects access to the m_hp. */ 1584 const ib_mutex_t *m_mutex; 1585 #endif /* UNIV_DEBUG */ 1586 1587 /** hazard pointer. */ 1588 buf_page_t *m_hp; 1589 }; 1590 1591 /** Class implementing buf_pool->flush_list hazard pointer */ 1592 class FlushHp : public HazardPointer { 1593 public: 1594 /** Constructor 1595 @param buf_pool buffer pool instance 1596 @param mutex mutex that is protecting the hp. */ FlushHp(const buf_pool_t * buf_pool,const ib_mutex_t * mutex)1597 FlushHp(const buf_pool_t *buf_pool, const ib_mutex_t *mutex) 1598 : HazardPointer(buf_pool, mutex) {} 1599 1600 /** Destructor */ ~FlushHp()1601 virtual ~FlushHp() {} 1602 1603 /** Adjust the value of hp. This happens when some 1604 other thread working on the same list attempts to 1605 remove the hp from the list. 1606 @param bpage buffer block to be compared */ 1607 void adjust(const buf_page_t *bpage); 1608 }; 1609 1610 /** Class implementing buf_pool->LRU hazard pointer */ 1611 class LRUHp : public HazardPointer { 1612 public: 1613 /** Constructor 1614 @param buf_pool buffer pool instance 1615 @param mutex mutex that is protecting the hp. */ LRUHp(const buf_pool_t * buf_pool,const ib_mutex_t * mutex)1616 LRUHp(const buf_pool_t *buf_pool, const ib_mutex_t *mutex) 1617 : HazardPointer(buf_pool, mutex) {} 1618 1619 /** Destructor */ ~LRUHp()1620 virtual ~LRUHp() {} 1621 1622 /** Adjust the value of hp. This happens when some 1623 other thread working on the same list attempts to 1624 remove the hp from the list. 1625 @param bpage buffer block to be compared */ 1626 void adjust(const buf_page_t *bpage); 1627 }; 1628 1629 /** Special purpose iterators to be used when scanning the LRU list. 1630 The idea is that when one thread finishes the scan it leaves the 1631 itr in that position and the other thread can start scan from 1632 there */ 1633 class LRUItr : public LRUHp { 1634 public: 1635 /** Constructor 1636 @param buf_pool buffer pool instance 1637 @param mutex mutex that is protecting the hp. */ LRUItr(const buf_pool_t * buf_pool,const ib_mutex_t * mutex)1638 LRUItr(const buf_pool_t *buf_pool, const ib_mutex_t *mutex) 1639 : LRUHp(buf_pool, mutex) {} 1640 1641 /** Destructor */ ~LRUItr()1642 virtual ~LRUItr() {} 1643 1644 /** Selects from where to start a scan. If we have scanned 1645 too deep into the LRU list it resets the value to the tail 1646 of the LRU list. 1647 @return buf_page_t from where to start scan. */ 1648 buf_page_t *start(); 1649 }; 1650 1651 /** Struct that is embedded in the free zip blocks */ 1652 struct buf_buddy_free_t { 1653 union { 1654 ulint size; /*!< size of the block */ 1655 byte bytes[FIL_PAGE_DATA]; 1656 /*!< stamp[FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID] 1657 == BUF_BUDDY_FREE_STAMP denotes a free 1658 block. If the space_id field of buddy 1659 block != BUF_BUDDY_FREE_STAMP, the block 1660 is not in any zip_free list. If the 1661 space_id is BUF_BUDDY_FREE_STAMP then 1662 stamp[0] will contain the 1663 buddy block size. */ 1664 } stamp; 1665 1666 buf_page_t bpage; /*!< Embedded bpage descriptor */ 1667 UT_LIST_NODE_T(buf_buddy_free_t) list; 1668 /*!< Node of zip_free list */ 1669 }; 1670 1671 /** @brief The buffer pool statistics structure. */ 1672 struct buf_pool_stat_t { 1673 using Shards = Counter::Shards<64>; 1674 1675 /** Number of page gets performed; also successful searches through the 1676 adaptive hash index are counted as page gets; this field is NOT protected 1677 by the buffer pool mutex */ 1678 Shards m_n_page_gets; 1679 1680 /** Number of read operations. Accessed atomically. */ 1681 uint64_t n_pages_read; 1682 1683 /** Number of write operations. Accessed atomically. */ 1684 uint64_t n_pages_written; 1685 1686 /** number of pages created in the pool with no read. Accessed atomically. */ 1687 uint64_t n_pages_created; 1688 1689 /** Number of pages read in as part of random read ahead. Not protected. */ 1690 uint64_t n_ra_pages_read_rnd; 1691 1692 /** Number of pages read in as part of read ahead. Not protected. */ 1693 uint64_t n_ra_pages_read; 1694 1695 /** Number of read ahead pages that are evicted without being accessed. 1696 Protected by LRU_list_mutex. */ 1697 uint64_t n_ra_pages_evicted; 1698 1699 /** Number of pages made young, in calls to buf_LRU_make_block_young(). 1700 Protected by LRU_list_mutex. */ 1701 uint64_t n_pages_made_young; 1702 1703 /** Number of pages not made young because the first access was not long 1704 enough ago, in buf_page_peek_if_too_old(). Not protected. */ 1705 uint64_t n_pages_not_made_young; 1706 1707 /** LRU size in bytes. Protected by LRU_list_mutex. */ 1708 uint64_t LRU_bytes; 1709 1710 /** Flush_list size in bytes. Protected by flush_list_mutex */ 1711 uint64_t flush_list_bytes; 1712 copybuf_pool_stat_t1713 static void copy(buf_pool_stat_t &dst, const buf_pool_stat_t &src) noexcept { 1714 Counter::copy(dst.m_n_page_gets, src.m_n_page_gets); 1715 1716 dst.n_pages_read = src.n_pages_read; 1717 1718 dst.n_pages_written = src.n_pages_written; 1719 1720 dst.n_pages_created = src.n_pages_created; 1721 1722 dst.n_ra_pages_read_rnd = src.n_ra_pages_read_rnd; 1723 1724 dst.n_ra_pages_read = src.n_ra_pages_read; 1725 1726 dst.n_ra_pages_evicted = src.n_ra_pages_evicted; 1727 1728 dst.n_pages_made_young = src.n_pages_made_young; 1729 1730 dst.n_pages_not_made_young = src.n_pages_not_made_young; 1731 1732 dst.LRU_bytes = src.LRU_bytes; 1733 1734 dst.flush_list_bytes = src.flush_list_bytes; 1735 } 1736 resetbuf_pool_stat_t1737 void reset() { 1738 Counter::clear(m_n_page_gets); 1739 1740 n_pages_read = 0; 1741 n_pages_written = 0; 1742 n_pages_created = 0; 1743 n_ra_pages_read_rnd = 0; 1744 n_ra_pages_read = 0; 1745 n_ra_pages_evicted = 0; 1746 n_pages_made_young = 0; 1747 n_pages_not_made_young = 0; 1748 LRU_bytes = 0; 1749 flush_list_bytes = 0; 1750 } 1751 }; 1752 1753 /** Statistics of buddy blocks of a given size. */ 1754 struct buf_buddy_stat_t { 1755 /** Number of blocks allocated from the buddy system. */ 1756 ulint used; 1757 /** Number of blocks relocated by the buddy system. */ 1758 uint64_t relocated; 1759 /** Total duration of block relocations, in microseconds. */ 1760 uint64_t relocated_usec; 1761 }; 1762 1763 /** @brief The buffer pool structure. 1764 1765 NOTE! The definition appears here only for other modules of this 1766 directory (buf) to see it. Do not use from outside! */ 1767 1768 struct buf_pool_t { 1769 /** @name General fields */ 1770 /* @{ */ 1771 BufListMutex chunks_mutex; /*!< protects (de)allocation of chunks: 1772 - changes to chunks, n_chunks are performed 1773 while holding this latch, 1774 - reading buf_pool_should_madvise requires 1775 holding this latch for any buf_pool_t 1776 - writing to buf_pool_should_madvise requires 1777 holding these latches for all buf_pool_t-s 1778 */ 1779 BufListMutex LRU_list_mutex; /*!< LRU list mutex */ 1780 BufListMutex free_list_mutex; /*!< free and withdraw list mutex */ 1781 BufListMutex zip_free_mutex; /*!< buddy allocator mutex */ 1782 BufListMutex zip_hash_mutex; /*!< zip_hash mutex */ 1783 ib_mutex_t flush_state_mutex; /*!< Flush state protection 1784 mutex */ 1785 BufPoolZipMutex zip_mutex; /*!< Zip mutex of this buffer 1786 pool instance, protects compressed 1787 only pages (of type buf_page_t, not 1788 buf_block_t */ 1789 ulint instance_no; /*!< Array index of this buffer 1790 pool instance */ 1791 ulint curr_pool_size; /*!< Current pool size in bytes */ 1792 ulint LRU_old_ratio; /*!< Reserve this much of the buffer 1793 pool for "old" blocks */ 1794 #ifdef UNIV_DEBUG 1795 ulint buddy_n_frames; /*!< Number of frames allocated from 1796 the buffer pool to the buddy system. 1797 Protected by zip_hash_mutex. */ 1798 #endif 1799 ut_allocator<unsigned char> allocator; /*!< Allocator used for 1800 allocating memory for the the "chunks" 1801 member. */ 1802 volatile ulint n_chunks; /*!< number of buffer pool chunks */ 1803 volatile ulint n_chunks_new; /*!< new number of buffer pool chunks */ 1804 buf_chunk_t *chunks; /*!< buffer pool chunks */ 1805 buf_chunk_t *chunks_old; /*!< old buffer pool chunks to be freed 1806 after resizing buffer pool */ 1807 ulint curr_size; /*!< current pool size in pages */ 1808 ulint old_size; /*!< previous pool size in pages */ 1809 page_no_t read_ahead_area; /*!< size in pages of the area which 1810 the read-ahead algorithms read if 1811 invoked */ 1812 hash_table_t *page_hash; /*!< hash table of buf_page_t or 1813 buf_block_t file pages, 1814 buf_page_in_file() == TRUE, 1815 indexed by (space_id, offset). 1816 page_hash is protected by an 1817 array of mutexes. */ 1818 hash_table_t *page_hash_old; /*!< old pointer to page_hash to be 1819 freed after resizing buffer pool */ 1820 hash_table_t *zip_hash; /*!< hash table of buf_block_t blocks 1821 whose frames are allocated to the 1822 zip buddy system, 1823 indexed by block->frame */ 1824 ulint n_pend_reads; /*!< number of pending read 1825 operations. Accessed atomically */ 1826 ulint n_pend_unzip; /*!< number of pending decompressions. 1827 Accessed atomically. */ 1828 1829 ib_time_monotonic_t last_printout_time; 1830 /*!< when buf_print_io was last time 1831 called. Accesses not protected. */ 1832 buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1]; 1833 /*!< Statistics of buddy system, 1834 indexed by block size. Protected by 1835 zip_free mutex, except for the used 1836 field, which is also accessed 1837 atomically */ 1838 buf_pool_stat_t stat; /*!< current statistics */ 1839 buf_pool_stat_t old_stat; /*!< old statistics */ 1840 1841 /* @} */ 1842 1843 /** @name Page flushing algorithm fields */ 1844 1845 /* @{ */ 1846 1847 BufListMutex flush_list_mutex; /*!< mutex protecting the 1848 flush list access. This mutex 1849 protects flush_list, flush_rbt 1850 and bpage::list pointers when 1851 the bpage is on flush_list. It 1852 also protects writes to 1853 bpage::oldest_modification and 1854 flush_list_hp */ 1855 FlushHp flush_hp; /*!< "hazard pointer" 1856 used during scan of flush_list 1857 while doing flush list batch. 1858 Protected by flush_list_mutex */ 1859 UT_LIST_BASE_NODE_T(buf_page_t) flush_list; 1860 /*!< base node of the modified block 1861 list */ 1862 ibool init_flush[BUF_FLUSH_N_TYPES]; 1863 /*!< this is TRUE when a flush of the 1864 given type is being initialized. 1865 Protected by flush_state_mutex. */ 1866 ulint n_flush[BUF_FLUSH_N_TYPES]; 1867 /*!< this is the number of pending 1868 writes in the given flush type. 1869 Protected by flush_state_mutex. */ 1870 os_event_t no_flush[BUF_FLUSH_N_TYPES]; 1871 /*!< this is in the set state 1872 when there is no flush batch 1873 of the given type running. Protected by 1874 flush_state_mutex. */ 1875 ib_rbt_t *flush_rbt; /*!< a red-black tree is used 1876 exclusively during recovery to 1877 speed up insertions in the 1878 flush_list. This tree contains 1879 blocks in order of 1880 oldest_modification LSN and is 1881 kept in sync with the 1882 flush_list. 1883 Each member of the tree MUST 1884 also be on the flush_list. 1885 This tree is relevant only in 1886 recovery and is set to NULL 1887 once the recovery is over. 1888 Protected by flush_list_mutex */ 1889 ulint freed_page_clock; /*!< a sequence number used 1890 to count the number of buffer 1891 blocks removed from the end of 1892 the LRU list; NOTE that this 1893 counter may wrap around at 4 1894 billion! A thread is allowed 1895 to read this for heuristic 1896 purposes without holding any 1897 mutex or latch. For non-heuristic 1898 purposes protected by LRU_list_mutex */ 1899 ibool try_LRU_scan; /*!< Set to FALSE when an LRU 1900 scan for free block fails. This 1901 flag is used to avoid repeated 1902 scans of LRU list when we know 1903 that there is no free block 1904 available in the scan depth for 1905 eviction. Set to TRUE whenever 1906 we flush a batch from the 1907 buffer pool. Accessed protected by 1908 memory barriers. */ 1909 1910 lsn_t track_page_lsn; /* Pagge Tracking start LSN. */ 1911 1912 lsn_t max_lsn_io; /* Maximum LSN for which write io 1913 has already started. */ 1914 1915 /* @} */ 1916 1917 /** @name LRU replacement algorithm fields */ 1918 /* @{ */ 1919 1920 UT_LIST_BASE_NODE_T(buf_page_t) free; 1921 /*!< base node of the free 1922 block list */ 1923 1924 UT_LIST_BASE_NODE_T(buf_page_t) withdraw; 1925 /*!< base node of the withdraw 1926 block list. It is only used during 1927 shrinking buffer pool size, not to 1928 reuse the blocks will be removed. 1929 Protected by free_list_mutex */ 1930 1931 ulint withdraw_target; /*!< target length of withdraw 1932 block list, when withdrawing */ 1933 1934 /** "hazard pointer" used during scan of LRU while doing 1935 LRU list batch. Protected by buf_pool::LRU_list_mutex */ 1936 LRUHp lru_hp; 1937 1938 /** Iterator used to scan the LRU list when searching for 1939 replacable victim. Protected by buf_pool::LRU_list_mutex. */ 1940 LRUItr lru_scan_itr; 1941 1942 /** Iterator used to scan the LRU list when searching for 1943 single page flushing victim. Protected by buf_pool::LRU_list_mutex. */ 1944 LRUItr single_scan_itr; 1945 1946 UT_LIST_BASE_NODE_T(buf_page_t) LRU; 1947 /*!< base node of the LRU list */ 1948 1949 buf_page_t *LRU_old; /*!< pointer to the about 1950 LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV 1951 oldest blocks in the LRU list; 1952 NULL if LRU length less than 1953 BUF_LRU_OLD_MIN_LEN; 1954 NOTE: when LRU_old != NULL, its length 1955 should always equal LRU_old_len */ 1956 ulint LRU_old_len; /*!< length of the LRU list from 1957 the block to which LRU_old points 1958 onward, including that block; 1959 see buf0lru.cc for the restrictions 1960 on this value; 0 if LRU_old == NULL; 1961 NOTE: LRU_old_len must be adjusted 1962 whenever LRU_old shrinks or grows! */ 1963 1964 UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU; 1965 /*!< base node of the 1966 unzip_LRU list. The list is protected 1967 by LRU_list_mutex. */ 1968 1969 /* @} */ 1970 /** @name Buddy allocator fields 1971 The buddy allocator is used for allocating compressed page 1972 frames and buf_page_t descriptors of blocks that exist 1973 in the buffer pool only in compressed form. */ 1974 /* @{ */ 1975 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG 1976 UT_LIST_BASE_NODE_T(buf_page_t) zip_clean; 1977 /*!< unmodified compressed pages */ 1978 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ 1979 UT_LIST_BASE_NODE_T(buf_buddy_free_t) zip_free[BUF_BUDDY_SIZES_MAX]; 1980 /*!< buddy free lists */ 1981 1982 buf_page_t *watch; 1983 /*!< Sentinel records for buffer 1984 pool watches. Scanning the array is 1985 protected by taking all page_hash 1986 latches in X. Updating or reading an 1987 individual watch page is protected by 1988 a corresponding individual page_hash 1989 latch. */ 1990 1991 /** A wrapper for buf_pool_t::allocator.alocate_large which also advices the 1992 OS that this chunk should not be dumped to a core file if that was requested. 1993 Emits a warning to the log and disables @@global.core_file if advising was 1994 requested but could not be performed, but still return true as the allocation 1995 itself succeeded. 1996 @param[in] mem_size number of bytes to allocate 1997 @param[in,out] chunk mem and mem_pfx fields of this chunk will be updated 1998 to contain information about allocated memory region 1999 @return true iff allocated successfully */ 2000 bool allocate_chunk(ulonglong mem_size, buf_chunk_t *chunk); 2001 2002 /** A wrapper for buf_pool_t::allocator.deallocate_large which also advices 2003 the OS that this chunk can be dumped to a core file. 2004 Emits a warning to the log and disables @@global.core_file if advising was 2005 requested but could not be performed. 2006 @param[in] chunk mem and mem_pfx fields of this chunk will be used to 2007 locate the memory region to free */ 2008 void deallocate_chunk(buf_chunk_t *chunk); 2009 2010 /** Advices the OS that all chunks in this buffer pool instance can be dumped 2011 to a core file. 2012 Emits a warning to the log if could not succeed. 2013 @return true iff succeeded, false if no OS support or failed */ 2014 bool madvise_dump(); 2015 2016 /** Advices the OS that all chunks in this buffer pool instance should not 2017 be dumped to a core file. 2018 Emits a warning to the log if could not succeed. 2019 @return true iff succeeded, false if no OS support or failed */ 2020 bool madvise_dont_dump(); 2021 2022 #if BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN 2023 #error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN" 2024 #endif 2025 /* @} */ 2026 }; 2027 2028 /** Print the given buf_pool_t object. 2029 @param[in,out] out the output stream 2030 @param[in] buf_pool the buf_pool_t object to be printed 2031 @return the output stream */ 2032 std::ostream &operator<<(std::ostream &out, const buf_pool_t &buf_pool); 2033 2034 /** @name Accessors for buffer pool mutexes 2035 Use these instead of accessing buffer pool mutexes directly. */ 2036 /* @{ */ 2037 2038 #ifndef UNIV_HOTBACKUP 2039 /** Test if flush list mutex is owned. */ 2040 #define buf_flush_list_mutex_own(b) mutex_own(&(b)->flush_list_mutex) 2041 2042 /** Acquire the flush list mutex. */ 2043 #define buf_flush_list_mutex_enter(b) \ 2044 do { \ 2045 mutex_enter(&(b)->flush_list_mutex); \ 2046 } while (0) 2047 /** Release the flush list mutex. */ 2048 #define buf_flush_list_mutex_exit(b) \ 2049 do { \ 2050 mutex_exit(&(b)->flush_list_mutex); \ 2051 } while (0) 2052 2053 /** Test if block->mutex is owned. */ 2054 #define buf_page_mutex_own(b) (b)->mutex.is_owned() 2055 2056 /** Acquire the block->mutex. */ 2057 #define buf_page_mutex_enter(b) \ 2058 do { \ 2059 mutex_enter(&(b)->mutex); \ 2060 } while (0) 2061 2062 /** Release the block->mutex. */ 2063 #define buf_page_mutex_exit(b) \ 2064 do { \ 2065 (b)->mutex.exit(); \ 2066 } while (0) 2067 2068 /** Get appropriate page_hash_lock. */ 2069 #define buf_page_hash_lock_get(buf_pool, page_id) \ 2070 hash_get_lock((buf_pool)->page_hash, (page_id).fold()) 2071 2072 /** If not appropriate page_hash_lock, relock until appropriate. */ 2073 #define buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id) \ 2074 hash_lock_s_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold()) 2075 2076 #define buf_page_hash_lock_x_confirm(hash_lock, buf_pool, page_id) \ 2077 hash_lock_x_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold()) 2078 #endif /* !UNIV_HOTBACKUP */ 2079 2080 #if defined(UNIV_DEBUG) && !defined(UNIV_HOTBACKUP) 2081 /** Test if page_hash lock is held in s-mode. */ 2082 #define buf_page_hash_lock_held_s(buf_pool, bpage) \ 2083 rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_S) 2084 2085 /** Test if page_hash lock is held in x-mode. */ 2086 #define buf_page_hash_lock_held_x(buf_pool, bpage) \ 2087 rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_X) 2088 2089 /** Test if page_hash lock is held in x or s-mode. */ 2090 #define buf_page_hash_lock_held_s_or_x(buf_pool, bpage) \ 2091 (buf_page_hash_lock_held_s((buf_pool), (bpage)) || \ 2092 buf_page_hash_lock_held_x((buf_pool), (bpage))) 2093 2094 #define buf_block_hash_lock_held_s(buf_pool, block) \ 2095 buf_page_hash_lock_held_s((buf_pool), &(block)->page) 2096 2097 #define buf_block_hash_lock_held_x(buf_pool, block) \ 2098 buf_page_hash_lock_held_x((buf_pool), &(block)->page) 2099 2100 #define buf_block_hash_lock_held_s_or_x(buf_pool, block) \ 2101 buf_page_hash_lock_held_s_or_x((buf_pool), &(block)->page) 2102 #else /* UNIV_DEBUG && !UNIV_HOTBACKUP */ 2103 #define buf_page_hash_lock_held_s(b, p) (TRUE) 2104 #define buf_page_hash_lock_held_x(b, p) (TRUE) 2105 #define buf_page_hash_lock_held_s_or_x(b, p) (TRUE) 2106 #define buf_block_hash_lock_held_s(b, p) (TRUE) 2107 #define buf_block_hash_lock_held_x(b, p) (TRUE) 2108 #define buf_block_hash_lock_held_s_or_x(b, p) (TRUE) 2109 #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ 2110 2111 /* @} */ 2112 2113 /********************************************************************** 2114 Let us list the consistency conditions for different control block states. 2115 2116 NOT_USED: is in free list, not in LRU list, not in flush list, nor 2117 page hash table 2118 READY_FOR_USE: is not in free list, LRU list, or flush list, nor page 2119 hash table 2120 MEMORY: is not in free list, LRU list, or flush list, nor page 2121 hash table 2122 FILE_PAGE: space and offset are defined, is in page hash table 2123 if io_fix == BUF_IO_WRITE, 2124 pool: no_flush[flush_type] is in reset state, 2125 pool: n_flush[flush_type] > 0 2126 2127 (1) if buf_fix_count == 0, then 2128 is in LRU list, not in free list 2129 is in flush list, 2130 if and only if oldest_modification > 0 2131 is x-locked, 2132 if and only if io_fix == BUF_IO_READ 2133 is s-locked, 2134 if and only if io_fix == BUF_IO_WRITE 2135 2136 (2) if buf_fix_count > 0, then 2137 is not in LRU list, not in free list 2138 is in flush list, 2139 if and only if oldest_modification > 0 2140 if io_fix == BUF_IO_READ, 2141 is x-locked 2142 if io_fix == BUF_IO_WRITE, 2143 is s-locked 2144 2145 State transitions: 2146 2147 NOT_USED => READY_FOR_USE 2148 READY_FOR_USE => MEMORY 2149 READY_FOR_USE => FILE_PAGE 2150 MEMORY => NOT_USED 2151 FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if 2152 (1) buf_fix_count == 0, 2153 (2) oldest_modification == 0, and 2154 (3) io_fix == 0. 2155 */ 2156 2157 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG 2158 #ifndef UNIV_HOTBACKUP 2159 /** Functor to validate the LRU list. */ 2160 struct CheckInLRUList { operatorCheckInLRUList2161 void operator()(const buf_page_t *elem) const { ut_a(elem->in_LRU_list); } 2162 validateCheckInLRUList2163 static void validate(const buf_pool_t *buf_pool) { 2164 CheckInLRUList check; 2165 ut_list_validate(buf_pool->LRU, check); 2166 } 2167 }; 2168 2169 /** Functor to validate the LRU list. */ 2170 struct CheckInFreeList { operatorCheckInFreeList2171 void operator()(const buf_page_t *elem) const { ut_a(elem->in_free_list); } 2172 validateCheckInFreeList2173 static void validate(const buf_pool_t *buf_pool) { 2174 CheckInFreeList check; 2175 ut_list_validate(buf_pool->free, check); 2176 } 2177 }; 2178 2179 struct CheckUnzipLRUAndLRUList { operatorCheckUnzipLRUAndLRUList2180 void operator()(const buf_block_t *elem) const { 2181 ut_a(elem->page.in_LRU_list); 2182 ut_a(elem->in_unzip_LRU_list); 2183 } 2184 validateCheckUnzipLRUAndLRUList2185 static void validate(const buf_pool_t *buf_pool) { 2186 CheckUnzipLRUAndLRUList check; 2187 ut_list_validate(buf_pool->unzip_LRU, check); 2188 } 2189 }; 2190 #endif /* !UNIV_HOTBACKUP */ 2191 #endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */ 2192 2193 #include "buf0buf.ic" 2194 2195 #endif /* !buf0buf_h */ 2196