1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2021, Oracle and/or its affiliates.
4 Copyright (c) 2016, Percona Inc. All Rights Reserved.
5 
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9 
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation.  The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16 
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 GNU General Public License, version 2.0, for more details.
21 
22 You should have received a copy of the GNU General Public License along with
23 this program; if not, write to the Free Software Foundation, Inc.,
24 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
25 
26 *****************************************************************************/
27 
28 /**************************************************//**
29 @file include/buf0flu.h
30 The database buffer pool flush algorithm
31 
32 Created 11/5/1995 Heikki Tuuri
33 *******************************************************/
34 
35 #ifndef buf0flu_h
36 #define buf0flu_h
37 
38 #include "univ.i"
39 #include "ut0byte.h"
40 #include "log0log.h"
41 #ifndef UNIV_HOTBACKUP
42 #include "buf0types.h"
43 
44 /** Flag indicating if the page_cleaner is in active state. */
45 extern bool buf_page_cleaner_is_active;
46 
47 /** The number of running LRU manager threads. 0 if LRU manager is inactive. */
48 extern ulint buf_lru_manager_running_threads;
49 
50 #ifdef UNIV_DEBUG
51 
52 /** Value of MySQL global variable used to disable page cleaner. */
53 extern my_bool		innodb_page_cleaner_disabled_debug;
54 
55 #endif /* UNIV_DEBUG */
56 
57 /** Event to synchronise with the flushing. */
58 extern os_event_t	buf_flush_event;
59 
60 class ut_stage_alter_t;
61 
62 /********************************************************************//**
63 Remove a block from the flush list of modified blocks. */
64 void
65 buf_flush_remove(
66 /*=============*/
67 	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
68 /*******************************************************************//**
69 Relocates a buffer control block on the flush_list.
70 Note that it is assumed that the contents of bpage has already been
71 copied to dpage. */
72 void
73 buf_flush_relocate_on_flush_list(
74 /*=============================*/
75 	buf_page_t*	bpage,	/*!< in/out: control block being moved */
76 	buf_page_t*	dpage);	/*!< in/out: destination block */
77 /********************************************************************//**
78 Updates the flush system data structures when a write is completed. */
79 void
80 buf_flush_write_complete(
81 /*=====================*/
82 	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
83 #endif /* !UNIV_HOTBACKUP */
84 /** Initialize a page for writing to the tablespace.
85 @param[in]	block		buffer block; NULL if bypassing the buffer pool
86 @param[in,out]	page		page frame
87 @param[in,out]	page_zip_	compressed page, or NULL if uncompressed
88 @param[in]	newest_lsn	newest modification LSN to the page
89 @param[in]	skip_checksum	whether to disable the page checksum */
90 void
91 buf_flush_init_for_writing(
92 	const buf_block_t*	block,
93 	byte*			page,
94 	void*			page_zip_,
95 	lsn_t			newest_lsn,
96 	bool			skip_checksum);
97 
98 #ifndef UNIV_HOTBACKUP
99 # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
100 /********************************************************************//**
101 Writes a flushable page asynchronously from the buffer pool to a file.
102 NOTE: block and LRU list mutexes must be held upon entering this function, and
103 they will be released by this function after flushing. This is loosely based on
104 buf_flush_batch() and buf_flush_page().
105 @return TRUE if the page was flushed and the mutexes released */
106 
107 ibool
108 buf_flush_page_try(
109 /*===============*/
110 	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
111 	buf_block_t*	block)		/*!< in/out: buffer control block */
112 	MY_ATTRIBUTE((warn_unused_result));
113 # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
114 /** Do flushing batch of a given type.
115 NOTE: The calling thread is not allowed to own any latches on pages!
116 @param[in,out]	buf_pool	buffer pool instance
117 @param[in]	type		flush type
118 @param[in]	min_n		wished minimum mumber of blocks flushed
119 (it is not guaranteed that the actual number is that big, though)
120 @param[in]	lsn_limit	in the case BUF_FLUSH_LIST all blocks whose
121 oldest_modification is smaller than this should be flushed (if their number
122 does not exceed min_n), otherwise ignored
123 @param[out]	n_processed	the number of pages which were processed is
124 passed back to caller. Ignored if NULL
125 @retval true	if a batch was queued successfully.
126 @retval false	if another batch of same type was already running. */
127 bool
128 buf_flush_do_batch(
129 	buf_pool_t*		buf_pool,
130 	buf_flush_t		type,
131 	ulint			min_n,
132 	lsn_t			lsn_limit,
133 	ulint*			n_processed);
134 
135 /******************************************************************//**
136 This function picks up a single page from the tail of the LRU
137 list, flushes it (if it is dirty), removes it from page_hash and LRU
138 list and puts it on the free list. It is called from user threads when
139 they are unable to find a replaceable page at the tail of the LRU
140 list i.e.: when the background LRU flushing in the page_cleaner thread
141 is not fast enough to keep pace with the workload.
142 @return true if success. */
143 bool
144 buf_flush_single_page_from_LRU(
145 /*===========================*/
146 	buf_pool_t*	buf_pool);	/*!< in/out: buffer pool instance */
147 /******************************************************************//**
148 Waits until a flush batch of the given type ends */
149 void
150 buf_flush_wait_batch_end(
151 /*=====================*/
152 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
153 	buf_flush_t	type);		/*!< in: BUF_FLUSH_LRU
154 					or BUF_FLUSH_LIST */
155 /**
156 Waits until a flush batch of the given lsn ends
157 @param[in]	new_oldest	target oldest_modified_lsn to wait for */
158 
159 void
160 buf_flush_wait_flushed(
161 	lsn_t		new_oldest);
162 
163 /******************************************************************//**
164 Waits until a flush batch of the given type ends. This is called by
165 a thread that only wants to wait for a flush to end but doesn't do
166 any flushing itself. */
167 void
168 buf_flush_wait_batch_end_wait_only(
169 /*===============================*/
170 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
171 	buf_flush_t	type);		/*!< in: BUF_FLUSH_LRU
172 					or BUF_FLUSH_LIST */
173 /********************************************************************//**
174 This function should be called at a mini-transaction commit, if a page was
175 modified in it. Puts the block to the list of modified blocks, if it not
176 already in it. */
177 UNIV_INLINE
178 void
179 buf_flush_note_modification(
180 /*========================*/
181 	buf_block_t*	block,		/*!< in: block which is modified */
182 	lsn_t		start_lsn,	/*!< in: start lsn of the first mtr in a
183 					set of mtr's */
184 	lsn_t		end_lsn,	/*!< in: end lsn of the last mtr in the
185 					set of mtr's */
186 	FlushObserver*	observer);	/*!< in: flush observer */
187 
188 /********************************************************************//**
189 This function should be called when recovery has modified a buffer page. */
190 UNIV_INLINE
191 void
192 buf_flush_recv_note_modification(
193 /*=============================*/
194 	buf_block_t*	block,		/*!< in: block which is modified */
195 	lsn_t		start_lsn,	/*!< in: start lsn of the first mtr in a
196 					set of mtr's */
197 	lsn_t		end_lsn);	/*!< in: end lsn of the last mtr in the
198 					set of mtr's */
199 /********************************************************************//**
200 Returns TRUE if the file page block is immediately suitable for replacement,
201 i.e., the transition FILE_PAGE => NOT_USED allowed. The caller must hold the
202 LRU list and block mutexes.
203 @return TRUE if can replace immediately */
204 ibool
205 buf_flush_ready_for_replace(
206 /*========================*/
207 	buf_page_t*	bpage);	/*!< in: buffer control block, must be
208 				buf_page_in_file(bpage) and in the LRU list */
209 
210 #ifdef UNIV_DEBUG
211 /** Disables page cleaner threads (coordinator and workers).
212 It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0).
213 @param[in]	thd		thread handle
214 @param[in]	var		pointer to system variable
215 @param[out]	var_ptr		where the formal string goes
216 @param[in]	save		immediate result from check function */
217 void
218 buf_flush_page_cleaner_disabled_debug_update(
219 	THD*				thd,
220 	struct st_mysql_sys_var*	var,
221 	void*				var_ptr,
222 	const void*			save);
223 #endif /* UNIV_DEBUG */
224 
225 /******************************************************************//**
226 page_cleaner thread tasked with flushing dirty pages from the buffer
227 pools. As of now we'll have only one coordinator of this thread.
228 @return a dummy parameter */
229 extern "C"
230 os_thread_ret_t
231 DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(
232 /*===============================================*/
233 	void*	arg);		/*!< in: a dummy parameter required by
234 				os_thread_create */
235 /******************************************************************//**
236 Worker thread of page_cleaner.
237 @return a dummy parameter */
238 extern "C"
239 os_thread_ret_t
240 DECLARE_THREAD(buf_flush_page_cleaner_worker)(
241 /*==========================================*/
242 	void*	arg);		/*!< in: a dummy parameter required by
243 				os_thread_create */
244 
245 /** LRU manager thread
246 @param[in]	arg	buffer pool instance number for this thread
247 @return a dummy value */
248 extern "C"
249 os_thread_ret_t
250 DECLARE_THREAD(buf_lru_manager)(
251 	void*	arg);
252 
253 /******************************************************************//**
254 Initialize page_cleaner. */
255 void
256 buf_flush_page_cleaner_init(void);
257 /*=============================*/
258 /*********************************************************************//**
259 Clears up tail of the LRU lists:
260 * Put replaceable pages at the tail of LRU to the free list
261 * Flush dirty pages at the tail of LRU to the disk
262 The depth to which we scan each buffer pool is controlled by dynamic
263 config parameter innodb_LRU_scan_depth.
264 @return total pages flushed */
265 ulint
266 buf_flush_LRU_lists(void);
267 /*=====================*/
268 /*********************************************************************//**
269 Wait for any possible LRU flushes that are in progress to end. */
270 void
271 buf_flush_wait_LRU_batch_end(void);
272 /*==============================*/
273 
274 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
275 /******************************************************************//**
276 Validates the flush list.
277 @return TRUE if ok */
278 ibool
279 buf_flush_validate(
280 /*===============*/
281 	buf_pool_t*	buf_pool);
282 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
283 
284 /********************************************************************//**
285 Initialize the red-black tree to speed up insertions into the flush_list
286 during recovery process. Should be called at the start of recovery
287 process before any page has been read/written. */
288 void
289 buf_flush_init_flush_rbt(void);
290 /*==========================*/
291 
292 /********************************************************************//**
293 Frees up the red-black tree. */
294 void
295 buf_flush_free_flush_rbt(void);
296 /*==========================*/
297 
298 /********************************************************************//**
299 Writes a flushable page asynchronously from the buffer pool to a file.
300 NOTE: in simulated aio we must call
301 os_aio_simulated_wake_handler_threads after we have posted a batch of
302 writes! NOTE: buf_page_get_mutex(bpage) must be held upon entering this
303 function.  The LRU list mutex must be held iff flush_type
304 == BUF_FLUSH_SINGLE_PAGE. Both mutexes will be released by this function if it
305 returns true.
306 @return TRUE if page was flushed */
307 ibool
308 buf_flush_page(
309 /*===========*/
310 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
311 	buf_page_t*	bpage,		/*!< in: buffer control block */
312 	buf_flush_t	flush_type,	/*!< in: type of flush */
313 	bool		sync);		/*!< in: true if sync IO request */
314 /********************************************************************//**
315 Returns true if the block is modified and ready for flushing.
316 @return true if can flush immediately */
317 bool
318 buf_flush_ready_for_flush(
319 /*======================*/
320 	buf_page_t*	bpage,	/*!< in: buffer control block, must be
321 				buf_page_in_file(bpage) */
322 	buf_flush_t	flush_type)/*!< in: type of flush */
323 	MY_ATTRIBUTE((warn_unused_result));
324 
325 #ifdef UNIV_DEBUG
326 /******************************************************************//**
327 Check if there are any dirty pages that belong to a space id in the flush
328 list in a particular buffer pool.
329 @return number of dirty pages present in a single buffer pool */
330 ulint
331 buf_pool_get_dirty_pages_count(
332 /*===========================*/
333 	buf_pool_t*	buf_pool,	/*!< in: buffer pool */
334 	ulint		id,		/*!< in: space id to check */
335 	FlushObserver*	observer);	/*!< in: flush observer to check */
336 /******************************************************************//**
337 Check if there are any dirty pages that belong to a space id in the flush list.
338 @return count of dirty pages present in all the buffer pools */
339 ulint
340 buf_flush_get_dirty_pages_count(
341 /*============================*/
342 	ulint		id,		/*!< in: space id to check */
343 	FlushObserver*	observer);	/*!< in: flush observer to check */
344 #endif /* UNIV_DEBUG */
345 
346 /*******************************************************************//**
347 Signal the page cleaner to flush and wait until it and the LRU manager clean
348 the buffer pool. */
349 void
350 buf_flush_sync_all_buf_pools(void);
351 /*==============================*/
352 
353 /** Request IO burst and wake page_cleaner up.
354 @param[in]	lsn_limit	upper limit of LSN to be flushed */
355 void
356 buf_flush_request_force(
357 	lsn_t	lsn_limit);
358 
359 /** We use FlushObserver to track flushing of non-redo logged pages in bulk
360 create index(BtrBulk.cc).Since we disable redo logging during a index build,
361 we need to make sure that all dirty pages modifed by the index build are
362 flushed to disk before any redo logged operations go to the index. */
363 
364 class FlushObserver {
365 public:
366 	/** Constructor
367 	@param[in]	space_id	table space id
368 	@param[in]	trx		trx instance
369 	@param[in]	stage		performance schema accounting object,
370 	used by ALTER TABLE. It is passed to log_preflush_pool_modified_pages()
371 	for accounting. */
372 	FlushObserver(ulint space_id, trx_t* trx, ut_stage_alter_t* stage);
373 
374 	/** Deconstructor */
375 	~FlushObserver();
376 
377 	/** Check pages have been flushed and removed from the flush list
378 	in a buffer pool instance.
379 	@pram[in]	instance_no	buffer pool instance no
380 	@return true if the pages were removed from the flush list */
is_complete(ulint instance_no)381 	bool is_complete(ulint	instance_no)
382 	{
383 		return(m_flushed->at(instance_no) == m_removed->at(instance_no)
384 		       || m_interrupted);
385 	}
386 
387 	/** Interrupt observer not to wait. */
interrupted()388 	void interrupted()
389 	{
390 		m_interrupted = true;
391 	}
392 
393 	/** Check whether trx is interrupted
394 	@return true if trx is interrupted */
395 	bool check_interrupted();
396 
397 	/** Flush dirty pages. */
398 	void flush();
399 
400 	/** Notify observer of flushing a page
401 	@param[in]	buf_pool	buffer pool instance
402 	@param[in]	bpage		buffer page to flush */
403 	void notify_flush(
404 		buf_pool_t*	buf_pool,
405 		buf_page_t*	bpage);
406 
407 	/** Notify observer of removing a page from flush list
408 	@param[in]	buf_pool	buffer pool instance
409 	@param[in]	bpage		buffer page flushed */
410 	void notify_remove(
411 		buf_pool_t*	buf_pool,
412 		buf_page_t*	bpage);
413 
414 	/** Increase the estimate of dirty pages by this observer
415 	@param[in]	block		buffer pool block */
416 	void inc_estimate(const buf_block_t*	block);
417 
418 	/** @return estimate of dirty pages to be flushed */
get_estimate()419 	ulint get_estimate() const {
420 		os_rmb;
421 		return(m_estimate);
422 	}
423 
get_number_of_pages_flushed()424         ulint get_number_of_pages_flushed() const {
425                 return m_number_of_pages_flushed;
426         }
427 
428 private:
429 	/** Table space id */
430 	ulint			m_space_id;
431 
432 	/** Trx instance */
433 	trx_t*			m_trx;
434 
435 	/** Performance schema accounting object, used by ALTER TABLE.
436 	If not NULL, then stage->begin_phase_flush() will be called initially,
437 	specifying the number of pages to be attempted to be flushed and
438 	subsequently, stage->inc() will be called for each page we attempt to
439 	flush. */
440 	ut_stage_alter_t*	m_stage;
441 
442 	/* Flush request sent */
443 	std::vector<ulint>*	m_flushed;
444 
445 	/* Flush request finished */
446 	std::vector<ulint>*	m_removed;
447 
448 	/* True if the operation was interrupted. */
449 	bool			m_interrupted;
450 
451 	/* Estimate of pages to be flushed */
452 	ulint			m_estimate;
453 
454 	/** LSN at which observer started observing. This is
455 	used to find the dirty blocks that are dirtied before Observer */
456 	const lsn_t		m_lsn;
457 
458         ulint m_number_of_pages_flushed;
459 };
460 
461 #endif /* !UNIV_HOTBACKUP */
462 
463 /** If LRU list of a buf_pool is less than this size then LRU eviction
464 should not happen. This is because when we do LRU flushing we also put
465 the blocks on free list. If LRU list is very small then we can end up
466 in thrashing. */
467 #define BUF_LRU_MIN_LEN		256
468 
469 #ifndef UNIV_NONINL
470 #include "buf0flu.ic"
471 #endif
472 
473 #endif
474