1 /***************************************************************************** 2 3 Copyright (c) 1995, 2021, Oracle and/or its affiliates. 4 Copyright (c) 2016, Percona Inc. All Rights Reserved. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License, version 2.0, 8 as published by the Free Software Foundation. 9 10 This program is also distributed with certain software (including 11 but not limited to OpenSSL) that is licensed under separate terms, 12 as designated in a particular file or component or in included license 13 documentation. The authors of MySQL hereby grant you an additional 14 permission to link the program and your derivative works with the 15 separately licensed software that they have included with MySQL. 16 17 This program is distributed in the hope that it will be useful, 18 but WITHOUT ANY WARRANTY; without even the implied warranty of 19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 GNU General Public License, version 2.0, for more details. 21 22 You should have received a copy of the GNU General Public License along with 23 this program; if not, write to the Free Software Foundation, Inc., 24 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA 25 26 *****************************************************************************/ 27 28 /**************************************************//** 29 @file include/buf0flu.h 30 The database buffer pool flush algorithm 31 32 Created 11/5/1995 Heikki Tuuri 33 *******************************************************/ 34 35 #ifndef buf0flu_h 36 #define buf0flu_h 37 38 #include "univ.i" 39 #include "ut0byte.h" 40 #include "log0log.h" 41 #ifndef UNIV_HOTBACKUP 42 #include "buf0types.h" 43 44 /** Flag indicating if the page_cleaner is in active state. */ 45 extern bool buf_page_cleaner_is_active; 46 47 /** The number of running LRU manager threads. 0 if LRU manager is inactive. */ 48 extern ulint buf_lru_manager_running_threads; 49 50 #ifdef UNIV_DEBUG 51 52 /** Value of MySQL global variable used to disable page cleaner. */ 53 extern my_bool innodb_page_cleaner_disabled_debug; 54 55 #endif /* UNIV_DEBUG */ 56 57 /** Event to synchronise with the flushing. */ 58 extern os_event_t buf_flush_event; 59 60 class ut_stage_alter_t; 61 62 /********************************************************************//** 63 Remove a block from the flush list of modified blocks. */ 64 void 65 buf_flush_remove( 66 /*=============*/ 67 buf_page_t* bpage); /*!< in: pointer to the block in question */ 68 /*******************************************************************//** 69 Relocates a buffer control block on the flush_list. 70 Note that it is assumed that the contents of bpage has already been 71 copied to dpage. */ 72 void 73 buf_flush_relocate_on_flush_list( 74 /*=============================*/ 75 buf_page_t* bpage, /*!< in/out: control block being moved */ 76 buf_page_t* dpage); /*!< in/out: destination block */ 77 /********************************************************************//** 78 Updates the flush system data structures when a write is completed. */ 79 void 80 buf_flush_write_complete( 81 /*=====================*/ 82 buf_page_t* bpage); /*!< in: pointer to the block in question */ 83 #endif /* !UNIV_HOTBACKUP */ 84 /** Initialize a page for writing to the tablespace. 85 @param[in] block buffer block; NULL if bypassing the buffer pool 86 @param[in,out] page page frame 87 @param[in,out] page_zip_ compressed page, or NULL if uncompressed 88 @param[in] newest_lsn newest modification LSN to the page 89 @param[in] skip_checksum whether to disable the page checksum */ 90 void 91 buf_flush_init_for_writing( 92 const buf_block_t* block, 93 byte* page, 94 void* page_zip_, 95 lsn_t newest_lsn, 96 bool skip_checksum); 97 98 #ifndef UNIV_HOTBACKUP 99 # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG 100 /********************************************************************//** 101 Writes a flushable page asynchronously from the buffer pool to a file. 102 NOTE: block and LRU list mutexes must be held upon entering this function, and 103 they will be released by this function after flushing. This is loosely based on 104 buf_flush_batch() and buf_flush_page(). 105 @return TRUE if the page was flushed and the mutexes released */ 106 107 ibool 108 buf_flush_page_try( 109 /*===============*/ 110 buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */ 111 buf_block_t* block) /*!< in/out: buffer control block */ 112 MY_ATTRIBUTE((warn_unused_result)); 113 # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ 114 /** Do flushing batch of a given type. 115 NOTE: The calling thread is not allowed to own any latches on pages! 116 @param[in,out] buf_pool buffer pool instance 117 @param[in] type flush type 118 @param[in] min_n wished minimum mumber of blocks flushed 119 (it is not guaranteed that the actual number is that big, though) 120 @param[in] lsn_limit in the case BUF_FLUSH_LIST all blocks whose 121 oldest_modification is smaller than this should be flushed (if their number 122 does not exceed min_n), otherwise ignored 123 @param[out] n_processed the number of pages which were processed is 124 passed back to caller. Ignored if NULL 125 @retval true if a batch was queued successfully. 126 @retval false if another batch of same type was already running. */ 127 bool 128 buf_flush_do_batch( 129 buf_pool_t* buf_pool, 130 buf_flush_t type, 131 ulint min_n, 132 lsn_t lsn_limit, 133 ulint* n_processed); 134 135 /******************************************************************//** 136 This function picks up a single page from the tail of the LRU 137 list, flushes it (if it is dirty), removes it from page_hash and LRU 138 list and puts it on the free list. It is called from user threads when 139 they are unable to find a replaceable page at the tail of the LRU 140 list i.e.: when the background LRU flushing in the page_cleaner thread 141 is not fast enough to keep pace with the workload. 142 @return true if success. */ 143 bool 144 buf_flush_single_page_from_LRU( 145 /*===========================*/ 146 buf_pool_t* buf_pool); /*!< in/out: buffer pool instance */ 147 /******************************************************************//** 148 Waits until a flush batch of the given type ends */ 149 void 150 buf_flush_wait_batch_end( 151 /*=====================*/ 152 buf_pool_t* buf_pool, /*!< in: buffer pool instance */ 153 buf_flush_t type); /*!< in: BUF_FLUSH_LRU 154 or BUF_FLUSH_LIST */ 155 /** 156 Waits until a flush batch of the given lsn ends 157 @param[in] new_oldest target oldest_modified_lsn to wait for */ 158 159 void 160 buf_flush_wait_flushed( 161 lsn_t new_oldest); 162 163 /******************************************************************//** 164 Waits until a flush batch of the given type ends. This is called by 165 a thread that only wants to wait for a flush to end but doesn't do 166 any flushing itself. */ 167 void 168 buf_flush_wait_batch_end_wait_only( 169 /*===============================*/ 170 buf_pool_t* buf_pool, /*!< in: buffer pool instance */ 171 buf_flush_t type); /*!< in: BUF_FLUSH_LRU 172 or BUF_FLUSH_LIST */ 173 /********************************************************************//** 174 This function should be called at a mini-transaction commit, if a page was 175 modified in it. Puts the block to the list of modified blocks, if it not 176 already in it. */ 177 UNIV_INLINE 178 void 179 buf_flush_note_modification( 180 /*========================*/ 181 buf_block_t* block, /*!< in: block which is modified */ 182 lsn_t start_lsn, /*!< in: start lsn of the first mtr in a 183 set of mtr's */ 184 lsn_t end_lsn, /*!< in: end lsn of the last mtr in the 185 set of mtr's */ 186 FlushObserver* observer); /*!< in: flush observer */ 187 188 /********************************************************************//** 189 This function should be called when recovery has modified a buffer page. */ 190 UNIV_INLINE 191 void 192 buf_flush_recv_note_modification( 193 /*=============================*/ 194 buf_block_t* block, /*!< in: block which is modified */ 195 lsn_t start_lsn, /*!< in: start lsn of the first mtr in a 196 set of mtr's */ 197 lsn_t end_lsn); /*!< in: end lsn of the last mtr in the 198 set of mtr's */ 199 /********************************************************************//** 200 Returns TRUE if the file page block is immediately suitable for replacement, 201 i.e., the transition FILE_PAGE => NOT_USED allowed. The caller must hold the 202 LRU list and block mutexes. 203 @return TRUE if can replace immediately */ 204 ibool 205 buf_flush_ready_for_replace( 206 /*========================*/ 207 buf_page_t* bpage); /*!< in: buffer control block, must be 208 buf_page_in_file(bpage) and in the LRU list */ 209 210 #ifdef UNIV_DEBUG 211 /** Disables page cleaner threads (coordinator and workers). 212 It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0). 213 @param[in] thd thread handle 214 @param[in] var pointer to system variable 215 @param[out] var_ptr where the formal string goes 216 @param[in] save immediate result from check function */ 217 void 218 buf_flush_page_cleaner_disabled_debug_update( 219 THD* thd, 220 struct st_mysql_sys_var* var, 221 void* var_ptr, 222 const void* save); 223 #endif /* UNIV_DEBUG */ 224 225 /******************************************************************//** 226 page_cleaner thread tasked with flushing dirty pages from the buffer 227 pools. As of now we'll have only one coordinator of this thread. 228 @return a dummy parameter */ 229 extern "C" 230 os_thread_ret_t 231 DECLARE_THREAD(buf_flush_page_cleaner_coordinator)( 232 /*===============================================*/ 233 void* arg); /*!< in: a dummy parameter required by 234 os_thread_create */ 235 /******************************************************************//** 236 Worker thread of page_cleaner. 237 @return a dummy parameter */ 238 extern "C" 239 os_thread_ret_t 240 DECLARE_THREAD(buf_flush_page_cleaner_worker)( 241 /*==========================================*/ 242 void* arg); /*!< in: a dummy parameter required by 243 os_thread_create */ 244 245 /** LRU manager thread 246 @param[in] arg buffer pool instance number for this thread 247 @return a dummy value */ 248 extern "C" 249 os_thread_ret_t 250 DECLARE_THREAD(buf_lru_manager)( 251 void* arg); 252 253 /******************************************************************//** 254 Initialize page_cleaner. */ 255 void 256 buf_flush_page_cleaner_init(void); 257 /*=============================*/ 258 /*********************************************************************//** 259 Clears up tail of the LRU lists: 260 * Put replaceable pages at the tail of LRU to the free list 261 * Flush dirty pages at the tail of LRU to the disk 262 The depth to which we scan each buffer pool is controlled by dynamic 263 config parameter innodb_LRU_scan_depth. 264 @return total pages flushed */ 265 ulint 266 buf_flush_LRU_lists(void); 267 /*=====================*/ 268 /*********************************************************************//** 269 Wait for any possible LRU flushes that are in progress to end. */ 270 void 271 buf_flush_wait_LRU_batch_end(void); 272 /*==============================*/ 273 274 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG 275 /******************************************************************//** 276 Validates the flush list. 277 @return TRUE if ok */ 278 ibool 279 buf_flush_validate( 280 /*===============*/ 281 buf_pool_t* buf_pool); 282 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ 283 284 /********************************************************************//** 285 Initialize the red-black tree to speed up insertions into the flush_list 286 during recovery process. Should be called at the start of recovery 287 process before any page has been read/written. */ 288 void 289 buf_flush_init_flush_rbt(void); 290 /*==========================*/ 291 292 /********************************************************************//** 293 Frees up the red-black tree. */ 294 void 295 buf_flush_free_flush_rbt(void); 296 /*==========================*/ 297 298 /********************************************************************//** 299 Writes a flushable page asynchronously from the buffer pool to a file. 300 NOTE: in simulated aio we must call 301 os_aio_simulated_wake_handler_threads after we have posted a batch of 302 writes! NOTE: buf_page_get_mutex(bpage) must be held upon entering this 303 function. The LRU list mutex must be held iff flush_type 304 == BUF_FLUSH_SINGLE_PAGE. Both mutexes will be released by this function if it 305 returns true. 306 @return TRUE if page was flushed */ 307 ibool 308 buf_flush_page( 309 /*===========*/ 310 buf_pool_t* buf_pool, /*!< in: buffer pool instance */ 311 buf_page_t* bpage, /*!< in: buffer control block */ 312 buf_flush_t flush_type, /*!< in: type of flush */ 313 bool sync); /*!< in: true if sync IO request */ 314 /********************************************************************//** 315 Returns true if the block is modified and ready for flushing. 316 @return true if can flush immediately */ 317 bool 318 buf_flush_ready_for_flush( 319 /*======================*/ 320 buf_page_t* bpage, /*!< in: buffer control block, must be 321 buf_page_in_file(bpage) */ 322 buf_flush_t flush_type)/*!< in: type of flush */ 323 MY_ATTRIBUTE((warn_unused_result)); 324 325 #ifdef UNIV_DEBUG 326 /******************************************************************//** 327 Check if there are any dirty pages that belong to a space id in the flush 328 list in a particular buffer pool. 329 @return number of dirty pages present in a single buffer pool */ 330 ulint 331 buf_pool_get_dirty_pages_count( 332 /*===========================*/ 333 buf_pool_t* buf_pool, /*!< in: buffer pool */ 334 ulint id, /*!< in: space id to check */ 335 FlushObserver* observer); /*!< in: flush observer to check */ 336 /******************************************************************//** 337 Check if there are any dirty pages that belong to a space id in the flush list. 338 @return count of dirty pages present in all the buffer pools */ 339 ulint 340 buf_flush_get_dirty_pages_count( 341 /*============================*/ 342 ulint id, /*!< in: space id to check */ 343 FlushObserver* observer); /*!< in: flush observer to check */ 344 #endif /* UNIV_DEBUG */ 345 346 /*******************************************************************//** 347 Signal the page cleaner to flush and wait until it and the LRU manager clean 348 the buffer pool. */ 349 void 350 buf_flush_sync_all_buf_pools(void); 351 /*==============================*/ 352 353 /** Request IO burst and wake page_cleaner up. 354 @param[in] lsn_limit upper limit of LSN to be flushed */ 355 void 356 buf_flush_request_force( 357 lsn_t lsn_limit); 358 359 /** We use FlushObserver to track flushing of non-redo logged pages in bulk 360 create index(BtrBulk.cc).Since we disable redo logging during a index build, 361 we need to make sure that all dirty pages modifed by the index build are 362 flushed to disk before any redo logged operations go to the index. */ 363 364 class FlushObserver { 365 public: 366 /** Constructor 367 @param[in] space_id table space id 368 @param[in] trx trx instance 369 @param[in] stage performance schema accounting object, 370 used by ALTER TABLE. It is passed to log_preflush_pool_modified_pages() 371 for accounting. */ 372 FlushObserver(ulint space_id, trx_t* trx, ut_stage_alter_t* stage); 373 374 /** Deconstructor */ 375 ~FlushObserver(); 376 377 /** Check pages have been flushed and removed from the flush list 378 in a buffer pool instance. 379 @pram[in] instance_no buffer pool instance no 380 @return true if the pages were removed from the flush list */ is_complete(ulint instance_no)381 bool is_complete(ulint instance_no) 382 { 383 return(m_flushed->at(instance_no) == m_removed->at(instance_no) 384 || m_interrupted); 385 } 386 387 /** Interrupt observer not to wait. */ interrupted()388 void interrupted() 389 { 390 m_interrupted = true; 391 } 392 393 /** Check whether trx is interrupted 394 @return true if trx is interrupted */ 395 bool check_interrupted(); 396 397 /** Flush dirty pages. */ 398 void flush(); 399 400 /** Notify observer of flushing a page 401 @param[in] buf_pool buffer pool instance 402 @param[in] bpage buffer page to flush */ 403 void notify_flush( 404 buf_pool_t* buf_pool, 405 buf_page_t* bpage); 406 407 /** Notify observer of removing a page from flush list 408 @param[in] buf_pool buffer pool instance 409 @param[in] bpage buffer page flushed */ 410 void notify_remove( 411 buf_pool_t* buf_pool, 412 buf_page_t* bpage); 413 414 /** Increase the estimate of dirty pages by this observer 415 @param[in] block buffer pool block */ 416 void inc_estimate(const buf_block_t* block); 417 418 /** @return estimate of dirty pages to be flushed */ get_estimate()419 ulint get_estimate() const { 420 os_rmb; 421 return(m_estimate); 422 } 423 get_number_of_pages_flushed()424 ulint get_number_of_pages_flushed() const { 425 return m_number_of_pages_flushed; 426 } 427 428 private: 429 /** Table space id */ 430 ulint m_space_id; 431 432 /** Trx instance */ 433 trx_t* m_trx; 434 435 /** Performance schema accounting object, used by ALTER TABLE. 436 If not NULL, then stage->begin_phase_flush() will be called initially, 437 specifying the number of pages to be attempted to be flushed and 438 subsequently, stage->inc() will be called for each page we attempt to 439 flush. */ 440 ut_stage_alter_t* m_stage; 441 442 /* Flush request sent */ 443 std::vector<ulint>* m_flushed; 444 445 /* Flush request finished */ 446 std::vector<ulint>* m_removed; 447 448 /* True if the operation was interrupted. */ 449 bool m_interrupted; 450 451 /* Estimate of pages to be flushed */ 452 ulint m_estimate; 453 454 /** LSN at which observer started observing. This is 455 used to find the dirty blocks that are dirtied before Observer */ 456 const lsn_t m_lsn; 457 458 ulint m_number_of_pages_flushed; 459 }; 460 461 #endif /* !UNIV_HOTBACKUP */ 462 463 /** If LRU list of a buf_pool is less than this size then LRU eviction 464 should not happen. This is because when we do LRU flushing we also put 465 the blocks on free list. If LRU list is very small then we can end up 466 in thrashing. */ 467 #define BUF_LRU_MIN_LEN 256 468 469 #ifndef UNIV_NONINL 470 #include "buf0flu.ic" 471 #endif 472 473 #endif 474