1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2021, Oracle and/or its affiliates.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file include/buf0buf.h
29 The database buffer pool high-level routines
30 
31 Created 11/5/1995 Heikki Tuuri
32 *******************************************************/
33 
34 #ifndef buf0buf_h
35 #define buf0buf_h
36 
37 #include "univ.i"
38 #include "fil0fil.h"
39 #include "mtr0types.h"
40 #include "buf0types.h"
41 #ifndef UNIV_INNOCHECKSUM
42 #include "hash0hash.h"
43 #include "ut0byte.h"
44 #include "page0types.h"
45 #ifndef UNIV_HOTBACKUP
46 #include "ut0rbt.h"
47 #include "os0proc.h"
48 #include "log0log.h"
49 #include "srv0srv.h"
50 #include <ostream>
51 
52 // Forward declaration
53 struct fil_addr_t;
54 
55 /** @name Modes for buf_page_get_gen */
56 /* @{ */
57 #define BUF_GET			10	/*!< get always */
58 #define	BUF_GET_IF_IN_POOL	11	/*!< get if in pool */
59 #define BUF_PEEK_IF_IN_POOL	12	/*!< get if in pool, do not make
60 					the block young in the LRU list */
61 #define BUF_GET_NO_LATCH	14	/*!< get and bufferfix, but
62 					set no latch; we have
63 					separated this case, because
64 					it is error-prone programming
65 					not to set a latch, and it
66 					should be used with care */
67 #define BUF_GET_IF_IN_POOL_OR_WATCH	15
68 					/*!< Get the page only if it's in the
69 					buffer pool, if not then set a watch
70 					on the page. */
71 #define BUF_GET_POSSIBLY_FREED		16
72 					/*!< Like BUF_GET, but do not mind
73 					if the file page has been freed. */
74 /* @} */
75 /** @name Modes for buf_page_get_known_nowait */
76 /* @{ */
77 #define BUF_MAKE_YOUNG	51		/*!< Move the block to the
78 					start of the LRU list if there
79 					is a danger that the block
80 					would drift out of the buffer
81 					pool*/
82 #define BUF_KEEP_OLD	52		/*!< Preserve the current LRU
83 					position of the block. */
84 /* @} */
85 
86 #define MAX_BUFFER_POOLS_BITS	6	/*!< Number of bits to representing
87 					a buffer pool ID */
88 
89 #define MAX_BUFFER_POOLS 	(1 << MAX_BUFFER_POOLS_BITS)
90 					/*!< The maximum number of buffer
91 					pools that can be defined */
92 
93 #define BUF_POOL_WATCH_SIZE		(srv_n_purge_threads + 1)
94 					/*!< Maximum number of concurrent
95 					buffer pool watches */
96 #define MAX_PAGE_HASH_LOCKS	1024	/*!< The maximum number of
97 					page_hash locks */
98 
99 extern	buf_pool_t*	buf_pool_ptr;	/*!< The buffer pools
100 					of the database */
101 
102 #ifdef UNIV_DEBUG
103 extern my_bool	buf_disable_resize_buffer_pool_debug; /*!< if TRUE, resizing
104 					buffer pool is not allowed. */
105 #endif /* UNIV_DEBUG */
106 #else /* !UNIV_HOTBACKUP */
107 extern buf_block_t*	back_block1;	/*!< first block, for --apply-log */
108 extern buf_block_t*	back_block2;	/*!< second block, for page reorganize */
109 #endif /* !UNIV_HOTBACKUP */
110 #endif /* !UNIV_INNOCHECKSUM */
111 
112 /** Magic value to use instead of checksums when they are disabled */
113 #define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
114 
115 #ifndef UNIV_INNOCHECKSUM
116 /** @brief States of a control block
117 @see buf_page_t
118 
119 The enumeration values must be 0..7. */
120 enum buf_page_state {
121 	BUF_BLOCK_POOL_WATCH,		/*!< a sentinel for the buffer pool
122 					watch, element of buf_pool->watch[] */
123 	BUF_BLOCK_ZIP_PAGE,		/*!< contains a clean
124 					compressed page */
125 	BUF_BLOCK_ZIP_DIRTY,		/*!< contains a compressed
126 					page that is in the
127 					buf_pool->flush_list */
128 
129 	BUF_BLOCK_NOT_USED,		/*!< is in the free list;
130 					must be after the BUF_BLOCK_ZIP_
131 					constants for compressed-only pages
132 					@see buf_block_state_valid() */
133 	BUF_BLOCK_READY_FOR_USE,	/*!< when buf_LRU_get_free_block
134 					returns a block, it is in this state */
135 	BUF_BLOCK_FILE_PAGE,		/*!< contains a buffered file page */
136 	BUF_BLOCK_MEMORY,		/*!< contains some main memory
137 					object */
138 	BUF_BLOCK_REMOVE_HASH		/*!< hash index should be removed
139 					before putting to the free list */
140 };
141 
142 
143 /** This structure defines information we will fetch from each buffer pool. It
144 will be used to print table IO stats */
145 struct buf_pool_info_t{
146 	/* General buffer pool info */
147 	ulint	pool_unique_id;		/*!< Buffer Pool ID */
148 	ulint	pool_size;		/*!< Buffer Pool size in pages */
149 	ulint	pool_size_bytes;
150 	ulint	lru_len;		/*!< Length of buf_pool->LRU */
151 	ulint	old_lru_len;		/*!< buf_pool->LRU_old_len */
152 	ulint	free_list_len;		/*!< Length of buf_pool->free list */
153 	ulint	flush_list_len;		/*!< Length of buf_pool->flush_list */
154 	ulint	n_pend_unzip;		/*!< buf_pool->n_pend_unzip, pages
155 					pending decompress */
156 	ulint	n_pend_reads;		/*!< buf_pool->n_pend_reads, pages
157 					pending read */
158 	ulint	n_pending_flush_lru;	/*!< Pages pending flush in LRU */
159 	ulint	n_pending_flush_single_page;/*!< Pages pending to be
160 					flushed as part of single page
161 					flushes issued by various user
162 					threads */
163 	ulint	n_pending_flush_list;	/*!< Pages pending flush in FLUSH
164 					LIST */
165 	ulint	n_pages_made_young;	/*!< number of pages made young */
166 	ulint	n_pages_not_made_young;	/*!< number of pages not made young */
167 	ulint	n_pages_read;		/*!< buf_pool->n_pages_read */
168 	ulint	n_pages_created;	/*!< buf_pool->n_pages_created */
169 	ulint	n_pages_written;	/*!< buf_pool->n_pages_written */
170 	ulint	n_page_gets;		/*!< buf_pool->n_page_gets */
171 	ulint	n_ra_pages_read_rnd;	/*!< buf_pool->n_ra_pages_read_rnd,
172 					number of pages readahead */
173 	ulint	n_ra_pages_read;	/*!< buf_pool->n_ra_pages_read, number
174 					of pages readahead */
175 	ulint	n_ra_pages_evicted;	/*!< buf_pool->n_ra_pages_evicted,
176 					number of readahead pages evicted
177 					without access */
178 	ulint	n_page_get_delta;	/*!< num of buffer pool page gets since
179 					last printout */
180 
181 	/* Buffer pool access stats */
182 	double	page_made_young_rate;	/*!< page made young rate in pages
183 					per second */
184 	double	page_not_made_young_rate;/*!< page not made young rate
185 					in pages per second */
186 	double	pages_read_rate;	/*!< num of pages read per second */
187 	double	pages_created_rate;	/*!< num of pages create per second */
188 	double	pages_written_rate;	/*!< num of  pages written per second */
189 	ulint	page_read_delta;	/*!< num of pages read since last
190 					printout */
191 	ulint	young_making_delta;	/*!< num of pages made young since
192 					last printout */
193 	ulint	not_young_making_delta;	/*!< num of pages not make young since
194 					last printout */
195 
196 	/* Statistics about read ahead algorithm.  */
197 	double	pages_readahead_rnd_rate;/*!< random readahead rate in pages per
198 					second */
199 	double	pages_readahead_rate;	/*!< readahead rate in pages per
200 					second */
201 	double	pages_evicted_rate;	/*!< rate of readahead page evicted
202 					without access, in pages per second */
203 
204 	/* Stats about LRU eviction */
205 	ulint	unzip_lru_len;		/*!< length of buf_pool->unzip_LRU
206 					list */
207 	/* Counters for LRU policy */
208 	ulint	io_sum;			/*!< buf_LRU_stat_sum.io */
209 	ulint	io_cur;			/*!< buf_LRU_stat_cur.io, num of IO
210 					for current interval */
211 	ulint	unzip_sum;		/*!< buf_LRU_stat_sum.unzip */
212 	ulint	unzip_cur;		/*!< buf_LRU_stat_cur.unzip, num
213 					pages decompressed in current
214 					interval */
215 };
216 
217 /** The occupied bytes of lists in all buffer pools */
218 struct buf_pools_list_size_t {
219 	ulint	LRU_bytes;		/*!< LRU size in bytes */
220 	ulint	unzip_LRU_bytes;	/*!< unzip_LRU size in bytes */
221 	ulint	flush_list_bytes;	/*!< flush_list size in bytes */
222 };
223 
224 /** Page identifier. */
225 class page_id_t {
226 public:
227 
228 	/** Constructor from (space, page_no).
229 	@param[in]	space	tablespace id
230 	@param[in]	page_no	page number */
page_id_t(ulint space,ulint page_no)231 	page_id_t(ulint space, ulint page_no)
232 		:
233 		m_space(static_cast<ib_uint32_t>(space)),
234 		m_page_no(static_cast<ib_uint32_t>(page_no)),
235 		m_fold(ULINT_UNDEFINED)
236 	{
237 		ut_ad(space <= 0xFFFFFFFFU);
238 		ut_ad(page_no <= 0xFFFFFFFFU);
239 	}
240 
241 	/** Retrieve the tablespace id.
242 	@return tablespace id */
space()243 	inline ib_uint32_t space() const
244 	{
245 		return(m_space);
246 	}
247 
248 	/** Retrieve the page number.
249 	@return page number */
page_no()250 	inline ib_uint32_t page_no() const
251 	{
252 		return(m_page_no);
253 	}
254 
255 	/** Retrieve the fold value.
256 	@return fold value */
fold()257 	inline ulint fold() const
258 	{
259 		/* Initialize m_fold if it has not been initialized yet. */
260 		if (m_fold == ULINT_UNDEFINED) {
261 			m_fold = (m_space << 20) + m_space + m_page_no;
262 			ut_ad(m_fold != ULINT_UNDEFINED);
263 		}
264 
265 		return(m_fold);
266 	}
267 
268 	/** Copy the values from a given page_id_t object.
269 	@param[in]	src	page id object whose values to fetch */
copy_from(const page_id_t & src)270 	inline void copy_from(const page_id_t& src)
271 	{
272 		m_space = src.space();
273 		m_page_no = src.page_no();
274 		m_fold = src.fold();
275 	}
276 
277 	/** Reset the values from a (space, page_no).
278 	@param[in]	space	tablespace id
279 	@param[in]	page_no	page number */
reset(ulint space,ulint page_no)280 	inline void reset(ulint space, ulint page_no)
281 	{
282 		m_space = static_cast<ib_uint32_t>(space);
283 		m_page_no = static_cast<ib_uint32_t>(page_no);
284 		m_fold = ULINT_UNDEFINED;
285 
286 		ut_ad(space <= 0xFFFFFFFFU);
287 		ut_ad(page_no <= 0xFFFFFFFFU);
288 	}
289 
290 	/** Reset the page number only.
291 	@param[in]	page_no	page number */
set_page_no(ulint page_no)292 	inline void set_page_no(ulint page_no)
293 	{
294 		m_page_no = static_cast<ib_uint32_t>(page_no);
295 		m_fold = ULINT_UNDEFINED;
296 
297 		ut_ad(page_no <= 0xFFFFFFFFU);
298 	}
299 
300 	/** Check if a given page_id_t object is equal to the current one.
301 	@param[in]	a	page_id_t object to compare
302 	@return true if equal */
equals_to(const page_id_t & a)303 	inline bool equals_to(const page_id_t& a) const
304 	{
305 		return(a.space() == m_space && a.page_no() == m_page_no);
306 	}
307 
308 private:
309 
310 	/** Tablespace id. */
311 	ib_uint32_t	m_space;
312 
313 	/** Page number. */
314 	ib_uint32_t	m_page_no;
315 
316 	/** A fold value derived from m_space and m_page_no,
317 	used in hashing. */
318 	mutable ulint	m_fold;
319 
320 	/* Disable implicit copying. */
321 	void operator=(const page_id_t&);
322 
323 	/** Declare the overloaded global operator<< as a friend of this
324 	class. Refer to the global declaration for further details.  Print
325 	the given page_id_t object.
326 	@param[in,out]	out	the output stream
327 	@param[in]	page_id	the page_id_t object to be printed
328 	@return the output stream */
329         friend
330         std::ostream&
331         operator<<(
332                 std::ostream&           out,
333                 const page_id_t&        page_id);
334 };
335 
336 /** Print the given page_id_t object.
337 @param[in,out]	out	the output stream
338 @param[in]	page_id	the page_id_t object to be printed
339 @return the output stream */
340 std::ostream&
341 operator<<(
342 	std::ostream&		out,
343 	const page_id_t&	page_id);
344 
345 #ifndef UNIV_HOTBACKUP
346 /********************************************************************//**
347 Creates the buffer pool.
348 @return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
349 dberr_t
350 buf_pool_init(
351 /*=========*/
352 	ulint	size,		/*!< in: Size of the total pool in bytes */
353 	bool	populate,	/*!< in: Force virtual page preallocation */
354 	ulint	n_instances);	/*!< in: Number of instances */
355 /********************************************************************//**
356 Frees the buffer pool at shutdown.  This must not be invoked before
357 freeing all mutexes. */
358 void
359 buf_pool_free(
360 /*==========*/
361 	ulint	n_instances);	/*!< in: numbere of instances to free */
362 
363 /** Determines if a block is intended to be withdrawn.
364 @param[in]	buf_pool	buffer pool instance
365 @param[in]	block		pointer to control block
366 @retval true	if will be withdrawn */
367 bool
368 buf_block_will_withdrawn(
369 	buf_pool_t*		buf_pool,
370 	const buf_block_t*	block);
371 
372 /** Determines if a frame is intended to be withdrawn.
373 @param[in]	buf_pool	buffer pool instance
374 @param[in]	ptr		pointer to a frame
375 @retval true	if will be withdrawn */
376 bool
377 buf_frame_will_withdrawn(
378 	buf_pool_t*	buf_pool,
379 	const byte*	ptr);
380 
381 /** Resize the buffer pool based on srv_buf_pool_size from
382 srv_buf_pool_old_size. */
383 void
384 buf_pool_resize();
385 
386 /** This is the thread for resizing buffer pool. It waits for an event and
387 when waked up either performs a resizing and sleeps again.
388 @param[in]	arg	a dummy parameter required by os_thread_create.
389 @return	this function does not return, calls os_thread_exit()
390 */
391 extern "C"
392 os_thread_ret_t
393 DECLARE_THREAD(buf_resize_thread)(
394 /*==============================*/
395 	void*	arg);				/*!< in: a dummy parameter
396 						required by os_thread_create */
397 
398 /** Checks if innobase_should_madvise_buf_pool() value has changed since we've
399 last check and if so, then updates buf_pool_should_madvise and calls madvise
400 for all chunks in all srv_buf_pool_instances.
401 @see buf_pool_should_madvise comment for a longer explanation. */
402 void buf_pool_update_madvise();
403 
404 /********************************************************************//**
405 Clears the adaptive hash index on all pages in the buffer pool. */
406 void
407 buf_pool_clear_hash_index(void);
408 /*===========================*/
409 
410 /*********************************************************************//**
411 Gets the current size of buffer buf_pool in bytes.
412 @return size in bytes */
413 UNIV_INLINE
414 ulint
415 buf_pool_get_curr_size(void);
416 /*========================*/
417 /*********************************************************************//**
418 Gets the current size of buffer buf_pool in frames.
419 @return size in pages */
420 UNIV_INLINE
421 ulint
422 buf_pool_get_n_pages(void);
423 /*=======================*/
424 /********************************************************************//**
425 Gets the smallest oldest_modification lsn for any page in the pool. Returns
426 zero if all modified pages have been flushed to disk.
427 @return oldest modification in pool, zero if none */
428 lsn_t
429 buf_pool_get_oldest_modification(void);
430 /*==================================*/
431 
432 /********************************************************************//**
433 Allocates a buf_page_t descriptor. This function must succeed. In case
434 of failure we assert in this function. */
435 UNIV_INLINE
436 buf_page_t*
437 buf_page_alloc_descriptor(void)
438 /*===========================*/
439 	MY_ATTRIBUTE((malloc));
440 /********************************************************************//**
441 Free a buf_page_t descriptor. */
442 UNIV_INLINE
443 void
444 buf_page_free_descriptor(
445 /*=====================*/
446 	buf_page_t*	bpage)	/*!< in: bpage descriptor to free. */
447 	MY_ATTRIBUTE((nonnull));
448 
449 /********************************************************************//**
450 Allocates a buffer block.
451 @return own: the allocated block, in state BUF_BLOCK_MEMORY */
452 buf_block_t*
453 buf_block_alloc(
454 /*============*/
455 	buf_pool_t*	buf_pool);	/*!< in: buffer pool instance,
456 					or NULL for round-robin selection
457 					of the buffer pool */
458 /********************************************************************//**
459 Frees a buffer block which does not contain a file page. */
460 UNIV_INLINE
461 void
462 buf_block_free(
463 /*===========*/
464 	buf_block_t*	block);	/*!< in, own: block to be freed */
465 #endif /* !UNIV_HOTBACKUP */
466 /*********************************************************************//**
467 Copies contents of a buffer frame to a given buffer.
468 @return buf */
469 UNIV_INLINE
470 byte*
471 buf_frame_copy(
472 /*===========*/
473 	byte*			buf,	/*!< in: buffer to copy to */
474 	const buf_frame_t*	frame);	/*!< in: buffer frame */
475 #ifndef UNIV_HOTBACKUP
476 /**************************************************************//**
477 NOTE! The following macros should be used instead of buf_page_get_gen,
478 to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
479 in LA! */
480 #define buf_page_get(ID, SIZE, LA, MTR)	\
481 	buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, __FILE__, __LINE__, MTR)
482 /**************************************************************//**
483 Use these macros to bufferfix a page with no latching. Remember not to
484 read the contents of the page unless you know it is safe. Do not modify
485 the contents of the page! We have separated this case, because it is
486 error-prone programming not to set a latch, and it should be used
487 with care. */
488 #define buf_page_get_with_no_latch(ID, SIZE, MTR)	\
489 	buf_page_get_gen(ID, SIZE, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, \
490 			 __FILE__, __LINE__, MTR)
491 /********************************************************************//**
492 This is the general function used to get optimistic access to a database
493 page.
494 @return TRUE if success */
495 ibool
496 buf_page_optimistic_get(
497 /*====================*/
498 	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
499 	buf_block_t*	block,	/*!< in: guessed block */
500 	ib_uint64_t	modify_clock,/*!< in: modify clock value */
501 	const char*	file,	/*!< in: file name */
502 	ulint		line,	/*!< in: line where called */
503 	mtr_t*		mtr);	/*!< in: mini-transaction */
504 /********************************************************************//**
505 This is used to get access to a known database page, when no waiting can be
506 done.
507 @return TRUE if success */
508 ibool
509 buf_page_get_known_nowait(
510 /*======================*/
511 	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
512 	buf_block_t*	block,	/*!< in: the known page */
513 	ulint		mode,	/*!< in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */
514 	const char*	file,	/*!< in: file name */
515 	ulint		line,	/*!< in: line where called */
516 	mtr_t*		mtr);	/*!< in: mini-transaction */
517 
518 /** Given a tablespace id and page number tries to get that page. If the
519 page is not in the buffer pool it is not loaded and NULL is returned.
520 Suitable for using when holding the lock_sys_t::mutex.
521 @param[in]	page_id	page id
522 @param[in]	file	file name
523 @param[in]	line	line where called
524 @param[in]	mtr	mini-transaction
525 @return pointer to a page or NULL */
526 const buf_block_t*
527 buf_page_try_get_func(
528 	const page_id_t&	page_id,
529 	const char*		file,
530 	ulint			line,
531 	mtr_t*			mtr);
532 
533 /** Tries to get a page.
534 If the page is not in the buffer pool it is not loaded. Suitable for using
535 when holding the lock_sys_t::mutex.
536 @param[in]	page_id	page identifier
537 @param[in]	mtr	mini-transaction
538 @return the page if in buffer pool, NULL if not */
539 #define buf_page_try_get(page_id, mtr)	\
540 	buf_page_try_get_func((page_id), __FILE__, __LINE__, mtr);
541 
542 /** Get read access to a compressed page (usually of type
543 FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
544 The page must be released with buf_page_release_zip().
545 NOTE: the page is not protected by any latch.  Mutual exclusion has to
546 be implemented at a higher level.  In other words, all possible
547 accesses to a given page through this function must be protected by
548 the same set of mutexes or latches.
549 @param[in]	page_id		page id
550 @param[in]	page_size	page size
551 @return pointer to the block */
552 buf_page_t*
553 buf_page_get_zip(
554 	const page_id_t&	page_id,
555 	const page_size_t&	page_size);
556 
557 /** This is the general function used to get access to a database page.
558 @param[in]	page_id		page id
559 @param[in]	rw_latch	RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
560 @param[in]	guess		guessed block or NULL
561 @param[in]	mode		BUF_GET, BUF_GET_IF_IN_POOL,
562 BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
563 @param[in]	file		file name
564 @param[in]	line		line where called
565 @param[in]	mtr		mini-transaction
566 @param[in]	dirty_with_no_latch
567 				mark page as dirty even if page
568 				is being pinned without any latch
569 @return pointer to the block or NULL */
570 buf_block_t*
571 buf_page_get_gen(
572 	const page_id_t&	page_id,
573 	const page_size_t&	page_size,
574 	ulint			rw_latch,
575 	buf_block_t*		guess,
576 	ulint			mode,
577 	const char*		file,
578 	ulint			line,
579 	mtr_t*			mtr,
580 	bool			dirty_with_no_latch = false,
581 	dberr_t*		err = NULL);
582 
583 /** Initializes a page to the buffer buf_pool. The page is usually not read
584 from a file even if it cannot be found in the buffer buf_pool. This is one
585 of the functions which perform to a block a state transition NOT_USED =>
586 FILE_PAGE (the other is buf_page_get_gen).
587 @param[in]	page_id		page id
588 @param[in]	page_size	page size
589 @param[in]	mtr		mini-transaction
590 @return pointer to the block, page bufferfixed */
591 buf_block_t*
592 buf_page_create(
593 	const page_id_t&	page_id,
594 	const page_size_t&	page_size,
595 	mtr_t*			mtr);
596 
597 #else /* !UNIV_HOTBACKUP */
598 
599 /** Inits a page to the buffer buf_pool, for use in mysqlbackup --restore.
600 @param[in]	page_id		page id
601 @param[in]	page_size	page size
602 @param[in,out]	block		block to init */
603 void
604 buf_page_init_for_backup_restore(
605 	const page_id_t&	page_id,
606 	const page_size_t&	page_size,
607 	buf_block_t*		block);
608 
609 #endif /* !UNIV_HOTBACKUP */
610 
611 #ifndef UNIV_HOTBACKUP
612 /********************************************************************//**
613 Releases a compressed-only page acquired with buf_page_get_zip(). */
614 UNIV_INLINE
615 void
616 buf_page_release_zip(
617 /*=================*/
618 	buf_page_t*	bpage);		/*!< in: buffer block */
619 /********************************************************************//**
620 Releases a latch, if specified. */
621 UNIV_INLINE
622 void
623 buf_page_release_latch(
624 /*=====================*/
625 	buf_block_t*	block,		/*!< in: buffer block */
626 	ulint		rw_latch);	/*!< in: RW_S_LATCH, RW_X_LATCH,
627 					RW_NO_LATCH */
628 /********************************************************************//**
629 Moves a page to the start of the buffer pool LRU list. This high-level
630 function can be used to prevent an important page from slipping out of
631 the buffer pool. */
632 void
633 buf_page_make_young(
634 /*================*/
635 	buf_page_t*	bpage);	/*!< in: buffer block of a file page */
636 
637 /** Returns TRUE if the page can be found in the buffer pool hash table.
638 NOTE that it is possible that the page is not yet read from disk,
639 though.
640 @param[in]	page_id	page id
641 @return TRUE if found in the page hash table */
642 UNIV_INLINE
643 ibool
644 buf_page_peek(
645 	const page_id_t&	page_id);
646 
647 #ifdef UNIV_DEBUG
648 
649 /** Sets file_page_was_freed TRUE if the page is found in the buffer pool.
650 This function should be called when we free a file page and want the
651 debug version to check that it is not accessed any more unless
652 reallocated.
653 @param[in]	page_id	page id
654 @return control block if found in page hash table, otherwise NULL */
655 buf_page_t*
656 buf_page_set_file_page_was_freed(
657 	const page_id_t&	page_id);
658 
659 /** Sets file_page_was_freed FALSE if the page is found in the buffer pool.
660 This function should be called when we free a file page and want the
661 debug version to check that it is not accessed any more unless
662 reallocated.
663 @param[in]	page_id	page id
664 @return control block if found in page hash table, otherwise NULL */
665 buf_page_t*
666 buf_page_reset_file_page_was_freed(
667 	const page_id_t&	page_id);
668 
669 #endif /* UNIV_DEBUG */
670 /********************************************************************//**
671 Reads the freed_page_clock of a buffer block.
672 @return freed_page_clock */
673 UNIV_INLINE
674 ulint
675 buf_page_get_freed_page_clock(
676 /*==========================*/
677 	const buf_page_t*	bpage)	/*!< in: block */
678 	MY_ATTRIBUTE((warn_unused_result));
679 /********************************************************************//**
680 Reads the freed_page_clock of a buffer block.
681 @return freed_page_clock */
682 UNIV_INLINE
683 ulint
684 buf_block_get_freed_page_clock(
685 /*===========================*/
686 	const buf_block_t*	block)	/*!< in: block */
687 	MY_ATTRIBUTE((warn_unused_result));
688 
689 /********************************************************************//**
690 Tells, for heuristics, if a block is still close enough to the MRU end of the
691 LRU list meaning that it is not in danger of getting evicted and also implying
692 that it has been accessed recently.
693 The page must be either buffer-fixed, either its page hash must be locked.
694 @return TRUE if block is close to MRU end of LRU */
695 UNIV_INLINE
696 ibool
697 buf_page_peek_if_young(
698 /*===================*/
699 	const buf_page_t*	bpage);	/*!< in: block */
700 /********************************************************************//**
701 Gets the youngest modification log sequence number for a frame.
702 Returns zero if not file page or no modification occurred yet.
703 @return newest modification to page */
704 UNIV_INLINE
705 lsn_t
706 buf_page_get_newest_modification(
707 /*=============================*/
708 	const buf_page_t*	bpage);	/*!< in: block containing the
709 					page frame */
710 /********************************************************************//**
711 Increments the modify clock of a frame by 1. The caller must (1) own the
712 buffer page mutex and block bufferfix count has to be zero, (2) or own an
713 x-lock on the block, (3) or the block must belong to an intrinsic table. */
714 UNIV_INLINE
715 void
716 buf_block_modify_clock_inc(
717 /*=======================*/
718 	buf_block_t*	block);	/*!< in: block */
719 /********************************************************************//**
720 Returns the value of the modify clock. The caller must have an s-lock
721 or x-lock on the block.
722 @return value */
723 UNIV_INLINE
724 ib_uint64_t
725 buf_block_get_modify_clock(
726 /*=======================*/
727 	buf_block_t*	block);	/*!< in: block */
728 /*******************************************************************//**
729 Increments the bufferfix count. */
730 UNIV_INLINE
731 void
732 buf_block_buf_fix_inc_func(
733 /*=======================*/
734 # ifdef UNIV_DEBUG
735 	const char*	file,	/*!< in: file name */
736 	ulint		line,	/*!< in: line */
737 # endif /* UNIV_DEBUG */
738 	buf_block_t*	block)	/*!< in/out: block to bufferfix */
739 	MY_ATTRIBUTE((nonnull));
740 
741 /** Increments the bufferfix count.
742 @param[in,out]	bpage	block to bufferfix
743 @return the count */
744 UNIV_INLINE
745 ulint
746 buf_block_fix(
747 	buf_page_t*	bpage);
748 
749 /** Increments the bufferfix count.
750 @param[in,out]	block	block to bufferfix
751 @return the count */
752 UNIV_INLINE
753 ulint
754 buf_block_fix(
755 	buf_block_t*	block);
756 
757 /** Decrements the bufferfix count.
758 @param[in,out]	bpage	block to bufferunfix
759 @return	the remaining buffer-fix count */
760 UNIV_INLINE
761 ulint
762 buf_block_unfix(
763 	buf_page_t*	bpage);
764 /** Decrements the bufferfix count.
765 @param[in,out]	block	block to bufferunfix
766 @return	the remaining buffer-fix count */
767 UNIV_INLINE
768 ulint
769 buf_block_unfix(
770 	buf_block_t*	block);
771 
772 # ifdef UNIV_DEBUG
773 /** Increments the bufferfix count.
774 @param[in,out]	b	block to bufferfix
775 @param[in]	f	file name where requested
776 @param[in]	l	line number where requested */
777 # define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
778 # else /* UNIV_DEBUG */
779 /** Increments the bufferfix count.
780 @param[in,out]	b	block to bufferfix
781 @param[in]	f	file name where requested
782 @param[in]	l	line number where requested */
783 # define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
784 # endif /* UNIV_DEBUG */
785 #else /* !UNIV_HOTBACKUP */
786 # define buf_block_modify_clock_inc(block) ((void) 0)
787 #endif /* !UNIV_HOTBACKUP */
788 #endif /* !UNIV_INNOCHECKSUM */
789 
790 bool
791 buf_page_is_checksum_valid_crc32(
792 	const byte*			read_buf,
793 	ulint				checksum_field1,
794 	ulint				checksum_field2,
795 #ifdef UNIV_INNOCHECKSUM
796 	uintmax_t			page_no,
797 	bool				is_log_enabled,
798 	FILE*				log_file,
799 	const srv_checksum_algorithm_t	curr_algo,
800 #endif /* UNIV_INNOCHECKSUM */
801 	bool				use_legacy_big_endian);
802 
803 bool
804 buf_page_is_checksum_valid_innodb(
805 	const byte*			read_buf,
806 	ulint				checksum_field1,
807 	ulint				checksum_field2
808 #ifdef UNIV_INNOCHECKSUM
809 	,uintmax_t			page_no,
810 	bool				is_log_enabled,
811 	FILE*				log_file,
812 	const srv_checksum_algorithm_t	curr_algo
813 #endif /* UNIV_INNOCHECKSUM */
814 );
815 
816 /** Checks if a page contains only zeroes.
817 @param[in]	read_buf	database page
818 @param[in]	page_size	page size
819 @return true if page is filled with zeroes */
820 bool
821 buf_page_is_zeroes(
822 	const byte*		read_buf,
823 	const page_size_t&	page_size);
824 
825 /** Checks if a page is corrupt.
826 @param[in]	check_lsn	true if we need to check and complain about
827 the LSN
828 @param[in]	read_buf	database page
829 @param[in]	page_size	page size
830 @param[in]	skip_checksum	if true, skip checksum
831 @param[in]	page_no		page number of given read_buf
832 @param[in]	strict_check	true if strict-check option is enabled
833 @param[in]	is_log_enabled	true if log option is enabled
834 @param[in]	log_file	file pointer to log_file
835 @return TRUE if corrupted */
836 ibool
837 buf_page_is_corrupted(
838 	bool			check_lsn,
839 	const byte*		read_buf,
840 	const page_size_t&	page_size,
841 	bool			skip_checksum
842 #ifdef UNIV_INNOCHECKSUM
843 	,uintmax_t		page_no,
844 	bool			strict_check,
845 	bool			is_log_enabled,
846 	FILE*			log_file
847 #endif /* UNIV_INNOCHECKSUM */
848 ) MY_ATTRIBUTE((warn_unused_result));
849 #ifndef UNIV_INNOCHECKSUM
850 #ifndef UNIV_HOTBACKUP
851 /**********************************************************************//**
852 Gets the space id, page offset, and byte offset within page of a
853 pointer pointing to a buffer frame containing a file page. */
854 UNIV_INLINE
855 void
856 buf_ptr_get_fsp_addr(
857 /*=================*/
858 	const void*	ptr,	/*!< in: pointer to a buffer frame */
859 	ulint*		space,	/*!< out: space id */
860 	fil_addr_t*	addr);	/*!< out: page offset and byte offset */
861 /**********************************************************************//**
862 Gets the hash value of a block. This can be used in searches in the
863 lock hash table.
864 @return lock hash value */
865 UNIV_INLINE
866 ulint
867 buf_block_get_lock_hash_val(
868 /*========================*/
869 	const buf_block_t*	block)	/*!< in: block */
870 	MY_ATTRIBUTE((warn_unused_result));
871 #ifdef UNIV_DEBUG
872 /*********************************************************************//**
873 Finds a block in the buffer pool that points to a
874 given compressed page.
875 @return buffer block pointing to the compressed page, or NULL */
876 buf_block_t*
877 buf_pool_contains_zip(
878 /*==================*/
879 	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
880 	const void*	data);		/*!< in: pointer to compressed page */
881 #endif /* UNIV_DEBUG */
882 
883 /***********************************************************************
884 FIXME_FTS: Gets the frame the pointer is pointing to. */
885 UNIV_INLINE
886 buf_frame_t*
887 buf_frame_align(
888 /*============*/
889                         /* out: pointer to frame */
890         byte*   ptr);   /* in: pointer to a frame */
891 
892 
893 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
894 /*********************************************************************//**
895 Validates the buffer pool data structure.
896 @return TRUE */
897 ibool
898 buf_validate(void);
899 /*==============*/
900 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
901 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
902 /*********************************************************************//**
903 Prints info of the buffer pool data structure. */
904 void
905 buf_print(void);
906 /*============*/
907 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
908 #endif /* !UNIV_HOTBACKUP */
909 enum buf_page_print_flags {
910 	/** Do not crash at the end of buf_page_print(). */
911 	BUF_PAGE_PRINT_NO_CRASH	= 1,
912 	/** Do not print the full page dump. */
913 	BUF_PAGE_PRINT_NO_FULL = 2
914 };
915 
916 /** Prints a page to stderr.
917 @param[in]	read_buf	a database page
918 @param[in]	page_size	page size
919 @param[in]	flags		0 or BUF_PAGE_PRINT_NO_CRASH or
920 BUF_PAGE_PRINT_NO_FULL */
921 void
922 buf_page_print(
923 	const byte*		read_buf,
924 	const page_size_t&	page_size,
925 	ulint			flags);
926 
927 /********************************************************************//**
928 Decompress a block.
929 @return TRUE if successful */
930 ibool
931 buf_zip_decompress(
932 /*===============*/
933 	buf_block_t*	block,	/*!< in/out: block */
934 	ibool		check);	/*!< in: TRUE=verify the page checksum */
935 #ifndef UNIV_HOTBACKUP
936 #ifdef UNIV_DEBUG
937 /*********************************************************************//**
938 Returns the number of latched pages in the buffer pool.
939 @return number of latched pages */
940 ulint
941 buf_get_latched_pages_number(void);
942 /*==============================*/
943 #endif /* UNIV_DEBUG */
944 /*********************************************************************//**
945 Returns the number of pending buf pool read ios.
946 @return number of pending read I/O operations */
947 ulint
948 buf_get_n_pending_read_ios(void);
949 /*============================*/
950 /*********************************************************************//**
951 Prints info of the buffer i/o. */
952 void
953 buf_print_io(
954 /*=========*/
955 	FILE*	file);	/*!< in: file where to print */
956 /*******************************************************************//**
957 Collect buffer pool stats information for a buffer pool. Also
958 record aggregated stats if there are more than one buffer pool
959 in the server */
960 void
961 buf_stats_get_pool_info(
962 /*====================*/
963 	buf_pool_t*		buf_pool,	/*!< in: buffer pool */
964 	ulint			pool_id,	/*!< in: buffer pool ID */
965 	buf_pool_info_t*	all_pool_info);	/*!< in/out: buffer pool info
966 						to fill */
967 /*********************************************************************//**
968 Returns the ratio in percents of modified pages in the buffer pool /
969 database pages in the buffer pool.
970 @return modified page percentage ratio */
971 double
972 buf_get_modified_ratio_pct(void);
973 /*============================*/
974 /**********************************************************************//**
975 Refreshes the statistics used to print per-second averages. */
976 void
977 buf_refresh_io_stats(
978 /*=================*/
979 	buf_pool_t*	buf_pool);	/*!< buffer pool instance */
980 /**********************************************************************//**
981 Refreshes the statistics used to print per-second averages. */
982 void
983 buf_refresh_io_stats_all(void);
984 /*=================*/
985 /*********************************************************************//**
986 Asserts that all file pages in the buffer are in a replaceable state.
987 @return TRUE */
988 ibool
989 buf_all_freed(void);
990 /*===============*/
991 /*********************************************************************//**
992 Checks that there currently are no pending i/o-operations for the buffer
993 pool.
994 @return number of pending i/o operations */
995 ulint
996 buf_pool_check_no_pending_io(void);
997 /*==============================*/
998 /*********************************************************************//**
999 Invalidates the file pages in the buffer pool when an archive recovery is
1000 completed. All the file pages buffered must be in a replaceable state when
1001 this function is called: not latched and not modified. */
1002 void
1003 buf_pool_invalidate(void);
1004 /*=====================*/
1005 #endif /* !UNIV_HOTBACKUP */
1006 
1007 /*========================================================================
1008 --------------------------- LOWER LEVEL ROUTINES -------------------------
1009 =========================================================================*/
1010 
1011 #ifdef UNIV_DEBUG
1012 /*********************************************************************//**
1013 Adds latch level info for the rw-lock protecting the buffer frame. This
1014 should be called in the debug version after a successful latching of a
1015 page if we know the latching order level of the acquired latch. */
1016 UNIV_INLINE
1017 void
1018 buf_block_dbg_add_level(
1019 /*====================*/
1020 	buf_block_t*	block,	/*!< in: buffer page
1021 				where we have acquired latch */
1022 	latch_level_t	level);	/*!< in: latching order level */
1023 #else /* UNIV_DEBUG */
1024 # define buf_block_dbg_add_level(block, level) /* nothing */
1025 #endif /* UNIV_DEBUG */
1026 /*********************************************************************//**
1027 Gets the state of a block.
1028 @return state */
1029 UNIV_INLINE
1030 enum buf_page_state
1031 buf_page_get_state(
1032 /*===============*/
1033 	const buf_page_t*	bpage);	/*!< in: pointer to the control block */
1034 /*********************************************************************//**
1035 Gets the state of a block.
1036 @return state */
1037 UNIV_INLINE
1038 enum buf_page_state
1039 buf_block_get_state(
1040 /*================*/
1041 	const buf_block_t*	block)	/*!< in: pointer to the control block */
1042 	MY_ATTRIBUTE((warn_unused_result));
1043 /*********************************************************************//**
1044 Sets the state of a block. */
1045 UNIV_INLINE
1046 void
1047 buf_page_set_state(
1048 /*===============*/
1049 	buf_page_t*		bpage,	/*!< in/out: pointer to control block */
1050 	enum buf_page_state	state);	/*!< in: state */
1051 /*********************************************************************//**
1052 Sets the state of a block. */
1053 UNIV_INLINE
1054 void
1055 buf_block_set_state(
1056 /*================*/
1057 	buf_block_t*		block,	/*!< in/out: pointer to control block */
1058 	enum buf_page_state	state);	/*!< in: state */
1059 /*********************************************************************//**
1060 Determines if a block is mapped to a tablespace.
1061 @return TRUE if mapped */
1062 UNIV_INLINE
1063 ibool
1064 buf_page_in_file(
1065 /*=============*/
1066 	const buf_page_t*	bpage)	/*!< in: pointer to control block */
1067 	MY_ATTRIBUTE((warn_unused_result));
1068 #ifndef UNIV_HOTBACKUP
1069 /*********************************************************************//**
1070 Determines if a block should be on unzip_LRU list.
1071 @return TRUE if block belongs to unzip_LRU */
1072 UNIV_INLINE
1073 ibool
1074 buf_page_belongs_to_unzip_LRU(
1075 /*==========================*/
1076 	const buf_page_t*	bpage)	/*!< in: pointer to control block */
1077 	MY_ATTRIBUTE((warn_unused_result));
1078 
1079 /*********************************************************************//**
1080 Gets the mutex of a block.
1081 @return pointer to mutex protecting bpage */
1082 UNIV_INLINE
1083 BPageMutex*
1084 buf_page_get_mutex(
1085 /*===============*/
1086 	const buf_page_t*	bpage)	/*!< in: pointer to control block */
1087 	MY_ATTRIBUTE((warn_unused_result));
1088 
1089 /*********************************************************************//**
1090 Get the flush type of a page.
1091 @return flush type */
1092 UNIV_INLINE
1093 buf_flush_t
1094 buf_page_get_flush_type(
1095 /*====================*/
1096 	const buf_page_t*	bpage)	/*!< in: buffer page */
1097 	MY_ATTRIBUTE((warn_unused_result));
1098 /*********************************************************************//**
1099 Set the flush type of a page. */
1100 UNIV_INLINE
1101 void
1102 buf_page_set_flush_type(
1103 /*====================*/
1104 	buf_page_t*	bpage,		/*!< in: buffer page */
1105 	buf_flush_t	flush_type);	/*!< in: flush type */
1106 
1107 /** Map a block to a file page.
1108 @param[in,out]	block	pointer to control block
1109 @param[in]	page_id	page id */
1110 UNIV_INLINE
1111 void
1112 buf_block_set_file_page(
1113 	buf_block_t*		block,
1114 	const page_id_t&	page_id);
1115 
1116 /*********************************************************************//**
1117 Gets the io_fix state of a block.
1118 @return io_fix state */
1119 UNIV_INLINE
1120 enum buf_io_fix
1121 buf_page_get_io_fix(
1122 /*================*/
1123 	const buf_page_t*	bpage)	/*!< in: pointer to the control block */
1124 	MY_ATTRIBUTE((warn_unused_result));
1125 
1126 /** Gets the io_fix state of a buffer page. Does not assert that the
1127 buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
1128 not to hold it.
1129 @param[in]	pointer to the buffer page
1130 @return page io_fix state */
1131 UNIV_INLINE
1132 enum buf_io_fix
1133 buf_page_get_io_fix_unlocked(
1134 /*=========================*/
1135 	const buf_page_t*	bpage)
1136 	MY_ATTRIBUTE((warn_unused_result));
1137 
1138 /*********************************************************************//**
1139 Gets the io_fix state of a block.
1140 @return io_fix state */
1141 UNIV_INLINE
1142 enum buf_io_fix
1143 buf_block_get_io_fix(
1144 /*================*/
1145 	const buf_block_t*	block)	/*!< in: pointer to the control block */
1146 	MY_ATTRIBUTE((warn_unused_result));
1147 
1148 /** Gets the io_fix state of a buffer block. Does not assert that the
1149 buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
1150 not to hold it.
1151 @param[in]	pointer to the buffer block
1152 @return page io_fix state */
1153 UNIV_INLINE
1154 enum buf_io_fix
1155 buf_block_get_io_fix_unlocked(
1156 /*=========================*/
1157 	const buf_block_t*	block)
1158 	MY_ATTRIBUTE((warn_unused_result));
1159 /*********************************************************************//**
1160 Sets the io_fix state of a block. */
1161 UNIV_INLINE
1162 void
1163 buf_page_set_io_fix(
1164 /*================*/
1165 	buf_page_t*	bpage,	/*!< in/out: control block */
1166 	enum buf_io_fix	io_fix);/*!< in: io_fix state */
1167 /*********************************************************************//**
1168 Sets the io_fix state of a block. */
1169 UNIV_INLINE
1170 void
1171 buf_block_set_io_fix(
1172 /*=================*/
1173 	buf_block_t*	block,	/*!< in/out: control block */
1174 	enum buf_io_fix	io_fix);/*!< in: io_fix state */
1175 /*********************************************************************//**
1176 Makes a block sticky. A sticky block implies that even after we release
1177 the buf_pool->mutex and the block->mutex:
1178 * it cannot be removed from the flush_list
1179 * the block descriptor cannot be relocated
1180 * it cannot be removed from the LRU list
1181 Note that:
1182 * the block can still change its position in the LRU list
1183 * the next and previous pointers can change. */
1184 UNIV_INLINE
1185 void
1186 buf_page_set_sticky(
1187 /*================*/
1188 	buf_page_t*	bpage);	/*!< in/out: control block */
1189 /*********************************************************************//**
1190 Removes stickiness of a block. */
1191 UNIV_INLINE
1192 void
1193 buf_page_unset_sticky(
1194 /*==================*/
1195 	buf_page_t*	bpage);	/*!< in/out: control block */
1196 /********************************************************************//**
1197 Determine if a buffer block can be relocated in memory.  The block
1198 can be dirty, but it must not be I/O-fixed or bufferfixed. */
1199 UNIV_INLINE
1200 ibool
1201 buf_page_can_relocate(
1202 /*==================*/
1203 	const buf_page_t*	bpage)	/*!< control block being relocated */
1204 	MY_ATTRIBUTE((warn_unused_result));
1205 
1206 /*********************************************************************//**
1207 Determine if a block has been flagged old.
1208 @return TRUE if old */
1209 UNIV_INLINE
1210 ibool
1211 buf_page_is_old(
1212 /*============*/
1213 	const buf_page_t*	bpage)	/*!< in: control block */
1214 	MY_ATTRIBUTE((warn_unused_result));
1215 /*********************************************************************//**
1216 Flag a block old. */
1217 UNIV_INLINE
1218 void
1219 buf_page_set_old(
1220 /*=============*/
1221 	buf_page_t*	bpage,	/*!< in/out: control block */
1222 	ibool		old);	/*!< in: old */
1223 /*********************************************************************//**
1224 Determine the time of first access of a block in the buffer pool.
1225 @return ut_time_monotonic_ms() at the time of first access, 0 if not accessed */
1226 UNIV_INLINE
1227 unsigned
1228 buf_page_is_accessed(
1229 /*=================*/
1230 	const buf_page_t*	bpage)	/*!< in: control block */
1231 	MY_ATTRIBUTE((warn_unused_result));
1232 /*********************************************************************//**
1233 Flag a block accessed. */
1234 UNIV_INLINE
1235 void
1236 buf_page_set_accessed(
1237 /*==================*/
1238 	buf_page_t*	bpage)		/*!< in/out: control block */
1239 	MY_ATTRIBUTE((nonnull));
1240 /*********************************************************************//**
1241 Gets the buf_block_t handle of a buffered file block if an uncompressed
1242 page frame exists, or NULL. The caller must hold
1243 either the appropriate hash lock in any mode, either the LRU list mutex. Note:
1244 even though bpage is not declared a const we don't update its value.
1245 @return control block, or NULL */
1246 UNIV_INLINE
1247 buf_block_t*
1248 buf_page_get_block(
1249 /*===============*/
1250 	buf_page_t*	bpage)	/*!< in: control block, or NULL */
1251 	MY_ATTRIBUTE((warn_unused_result));
1252 #endif /* !UNIV_HOTBACKUP */
1253 #ifdef UNIV_DEBUG
1254 /*********************************************************************//**
1255 Gets a pointer to the memory frame of a block.
1256 @return pointer to the frame */
1257 UNIV_INLINE
1258 buf_frame_t*
1259 buf_block_get_frame(
1260 /*================*/
1261 	const buf_block_t*	block)	/*!< in: pointer to the control block */
1262 	MY_ATTRIBUTE((warn_unused_result));
1263 #else /* UNIV_DEBUG */
1264 # define buf_block_get_frame(block) (block)->frame
1265 #endif /* UNIV_DEBUG */
1266 /*********************************************************************//**
1267 Gets the compressed page descriptor corresponding to an uncompressed page
1268 if applicable. */
1269 #define buf_block_get_page_zip(block) \
1270 	((block)->page.zip.data ? &(block)->page.zip : NULL)
1271 #ifndef UNIV_HOTBACKUP
1272 
1273 /** Get a buffer block from an adaptive hash index pointer.
1274 This function does not return if the block is not identified.
1275 @param[in]	ptr	pointer to within a page frame
1276 @return pointer to block, never NULL */
1277 buf_block_t*
1278 buf_block_from_ahi(const byte* ptr);
1279 
1280 
1281 /********************************************************************//**
1282 Find out if a pointer belongs to a buf_block_t. It can be a pointer to
1283 the buf_block_t itself or a member of it. This functions checks one of
1284 the buffer pool instances.
1285 @return TRUE if ptr belongs to a buf_block_t struct */
1286 ibool
1287 buf_pointer_is_block_field_instance(
1288 /*================================*/
1289        const buf_pool_t*  buf_pool,   /*!< in: buffer pool instance */
1290        const void*        ptr);       /*!< in: pointer not dereferenced */
1291 
1292 
1293 /** Inits a page for read to the buffer buf_pool. If the page is
1294 (1) already in buf_pool, or
1295 (2) if we specify to read only ibuf pages and the page is not an ibuf page, or
1296 (3) if the space is deleted or being deleted,
1297 then this function does nothing.
1298 Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
1299 on the buffer frame. The io-handler must take care that the flag is cleared
1300 and the lock released later.
1301 @param[out]	err			DB_SUCCESS or DB_TABLESPACE_DELETED
1302 @param[in]	mode			BUF_READ_IBUF_PAGES_ONLY, ...
1303 @param[in]	page_id			page id
1304 @param[in]	unzip			TRUE=request uncompressed page
1305 @return pointer to the block or NULL */
1306 buf_page_t*
1307 buf_page_init_for_read(
1308 	dberr_t*		err,
1309 	ulint			mode,
1310 	const page_id_t&	page_id,
1311 	const page_size_t&	page_size,
1312 	ibool			unzip);
1313 
1314 /********************************************************************//**
1315 Completes an asynchronous read or write request of a file page to or from
1316 the buffer pool.
1317 @return whether the operation succeeded
1318 @retval	DB_SUCCESS		always when writing, or if a read page was OK
1319 @retval	DB_PAGE_CORRUPTED	if the checksum fails on a page read
1320 @retval	DB_DECRYPTION_FAILED	if page post encryption checksum matches but
1321 				after decryption normal page checksum does
1322 				not match */
1323 dberr_t
1324 buf_page_io_complete(
1325 /*=================*/
1326 	buf_page_t*	bpage,	/*!< in: pointer to the block in question */
1327 	bool		evict = false);/*!< in: whether or not to evict
1328 				the page from LRU list. */
1329 /********************************************************************//**
1330 Calculates the index of a buffer pool to the buf_pool[] array.
1331 @return the position of the buffer pool in buf_pool[] */
1332 UNIV_INLINE
1333 ulint
1334 buf_pool_index(
1335 /*===========*/
1336 	const buf_pool_t*	buf_pool)	/*!< in: buffer pool */
1337 	MY_ATTRIBUTE((warn_unused_result));
1338 /******************************************************************//**
1339 Returns the buffer pool instance given a page instance
1340 @return buf_pool */
1341 UNIV_INLINE
1342 buf_pool_t*
1343 buf_pool_from_bpage(
1344 /*================*/
1345 	const buf_page_t*	bpage); /*!< in: buffer pool page */
1346 /******************************************************************//**
1347 Returns the buffer pool instance given a block instance
1348 @return buf_pool */
1349 UNIV_INLINE
1350 buf_pool_t*
1351 buf_pool_from_block(
1352 /*================*/
1353 	const buf_block_t*	block); /*!< in: block */
1354 
1355 /** Returns the buffer pool instance given a page id.
1356 @param[in]	page_id	page id
1357 @return buffer pool */
1358 UNIV_INLINE
1359 buf_pool_t*
1360 buf_pool_get(
1361 	const page_id_t&	page_id);
1362 
1363 /******************************************************************//**
1364 Returns the buffer pool instance given its array index
1365 @return buffer pool */
1366 UNIV_INLINE
1367 buf_pool_t*
1368 buf_pool_from_array(
1369 /*================*/
1370 	ulint	index);		/*!< in: array index to get
1371 				buffer pool instance from */
1372 
1373 /** Returns the control block of a file page, NULL if not found.
1374 @param[in]	buf_pool	buffer pool instance
1375 @param[in]	page_id		page id
1376 @return block, NULL if not found */
1377 UNIV_INLINE
1378 buf_page_t*
1379 buf_page_hash_get_low(
1380 	buf_pool_t*		buf_pool,
1381 	const page_id_t&	page_id);
1382 
1383 /** Returns the control block of a file page, NULL if not found.
1384 If the block is found and lock is not NULL then the appropriate
1385 page_hash lock is acquired in the specified lock mode. Otherwise,
1386 mode value is ignored. It is up to the caller to release the
1387 lock. If the block is found and the lock is NULL then the page_hash
1388 lock is released by this function.
1389 @param[in]	buf_pool	buffer pool instance
1390 @param[in]	page_id		page id
1391 @param[in,out]	lock		lock of the page hash acquired if bpage is
1392 found, NULL otherwise. If NULL is passed then the hash_lock is released by
1393 this function.
1394 @param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
1395 lock == NULL
1396 @param[in]	watch		if true, return watch sentinel also.
1397 @return pointer to the bpage or NULL; if NULL, lock is also NULL or
1398 a watch sentinel. */
1399 UNIV_INLINE
1400 buf_page_t*
1401 buf_page_hash_get_locked(
1402 	buf_pool_t*		buf_pool,
1403 	const page_id_t&	page_id,
1404 	rw_lock_t**		lock,
1405 	ulint			lock_mode,
1406 	bool			watch = false);
1407 
1408 /** Returns the control block of a file page, NULL if not found.
1409 If the block is found and lock is not NULL then the appropriate
1410 page_hash lock is acquired in the specified lock mode. Otherwise,
1411 mode value is ignored. It is up to the caller to release the
1412 lock. If the block is found and the lock is NULL then the page_hash
1413 lock is released by this function.
1414 @param[in]	buf_pool	buffer pool instance
1415 @param[in]	page_id		page id
1416 @param[in,out]	lock		lock of the page hash acquired if bpage is
1417 found, NULL otherwise. If NULL is passed then the hash_lock is released by
1418 this function.
1419 @param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
1420 lock == NULL
1421 @return pointer to the block or NULL; if NULL, lock is also NULL. */
1422 UNIV_INLINE
1423 buf_block_t*
1424 buf_block_hash_get_locked(
1425 	buf_pool_t*		buf_pool,
1426 	const page_id_t&	page_id,
1427 	rw_lock_t**		lock,
1428 	ulint			lock_mode);
1429 
1430 /* There are four different ways we can try to get a bpage or block
1431 from the page hash:
1432 1) Caller already holds the appropriate page hash lock: in the case call
1433 buf_page_hash_get_low() function.
1434 2) Caller wants to hold page hash lock in x-mode
1435 3) Caller wants to hold page hash lock in s-mode
1436 4) Caller doesn't want to hold page hash lock */
1437 #define buf_page_hash_get_s_locked(b, page_id, l)		\
1438 	buf_page_hash_get_locked(b, page_id, l, RW_LOCK_S)
1439 #define buf_page_hash_get_x_locked(b, page_id, l)		\
1440 	buf_page_hash_get_locked(b, page_id, l, RW_LOCK_X)
1441 #define buf_page_hash_get(b, page_id)				\
1442 	buf_page_hash_get_locked(b, page_id, NULL, 0)
1443 #define buf_page_get_also_watch(b, page_id)			\
1444 	buf_page_hash_get_locked(b, page_id, NULL, 0, true)
1445 
1446 #define buf_block_hash_get_s_locked(b, page_id, l)		\
1447 	buf_block_hash_get_locked(b, page_id, l, RW_LOCK_S)
1448 #define buf_block_hash_get_x_locked(b, page_id, l)		\
1449 	buf_block_hash_get_locked(b, page_id, l, RW_LOCK_X)
1450 #define buf_block_hash_get(b, page_id)				\
1451 	buf_block_hash_get_locked(b, page_id, NULL, 0)
1452 
1453 /*********************************************************************//**
1454 Gets the current length of the free list of buffer blocks.
1455 @return length of the free list */
1456 ulint
1457 buf_get_free_list_len(void);
1458 /*=======================*/
1459 
1460 /********************************************************************//**
1461 Determine if a block is a sentinel for a buffer pool watch.
1462 @return TRUE if a sentinel for a buffer pool watch, FALSE if not */
1463 ibool
1464 buf_pool_watch_is_sentinel(
1465 /*=======================*/
1466 	const buf_pool_t*	buf_pool,	/*!< buffer pool instance */
1467 	const buf_page_t*	bpage)		/*!< in: block */
1468 	MY_ATTRIBUTE((nonnull, warn_unused_result));
1469 
1470 /** Stop watching if the page has been read in.
1471 buf_pool_watch_set(space,offset) must have returned NULL before.
1472 @param[in]	page_id	page id */
1473 void
1474 buf_pool_watch_unset(
1475 	const page_id_t&	page_id);
1476 
1477 /** Check if the page has been read in.
1478 This may only be called after buf_pool_watch_set(space,offset)
1479 has returned NULL and before invoking buf_pool_watch_unset(space,offset).
1480 @param[in]	page_id	page id
1481 @return FALSE if the given page was not read in, TRUE if it was */
1482 ibool
1483 buf_pool_watch_occurred(
1484 	const page_id_t&	page_id)
1485 MY_ATTRIBUTE((warn_unused_result));
1486 
1487 /********************************************************************//**
1488 Get total buffer pool statistics. */
1489 void
1490 buf_get_total_list_len(
1491 /*===================*/
1492 	ulint*		LRU_len,	/*!< out: length of all LRU lists */
1493 	ulint*		free_len,	/*!< out: length of all free lists */
1494 	ulint*		flush_list_len);/*!< out: length of all flush lists */
1495 /********************************************************************//**
1496 Get total list size in bytes from all buffer pools. */
1497 void
1498 buf_get_total_list_size_in_bytes(
1499 /*=============================*/
1500 	buf_pools_list_size_t*	buf_pools_list_size);	/*!< out: list sizes
1501 							in all buffer pools */
1502 /********************************************************************//**
1503 Get total buffer pool statistics. */
1504 void
1505 buf_get_total_stat(
1506 /*===============*/
1507 	buf_pool_stat_t*tot_stat);	/*!< out: buffer pool stats */
1508 /*********************************************************************//**
1509 Get the nth chunk's buffer block in the specified buffer pool.
1510 @return the nth chunk's buffer block. */
1511 UNIV_INLINE
1512 buf_block_t*
1513 buf_get_nth_chunk_block(
1514 /*====================*/
1515 	const buf_pool_t* buf_pool,	/*!< in: buffer pool instance */
1516 	ulint		n,		/*!< in: nth chunk in the buffer pool */
1517 	ulint*		chunk_size);	/*!< in: chunk size */
1518 
1519 /** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
1520 if needed.
1521 @param[in]	size	size in bytes
1522 @return	aligned size */
1523 UNIV_INLINE
1524 ulint
1525 buf_pool_size_align(
1526 	ulint	size);
1527 
1528 /** Calculate the checksum of a page from compressed table and update the
1529 page.
1530 @param[in,out]	page	page to update
1531 @param[in]	size	compressed page size
1532 @param[in]	lsn	LSN to stamp on the page */
1533 void
1534 buf_flush_update_zip_checksum(
1535 	buf_frame_t*	page,
1536 	ulint		size,
1537 	lsn_t		lsn);
1538 
1539 #endif /* !UNIV_HOTBACKUP */
1540 
1541 /** Return how many more pages must be added to the withdraw list to reach the
1542 withdraw target of the currently ongoing buffer pool resize.
1543 @param[in]	buf_pool	buffer pool instance
1544 @return page count to be withdrawn or zero if the target is already achieved or
1545 if the buffer pool is not currently being resized. */
1546 UNIV_INLINE
1547 ulint
1548 buf_get_withdraw_depth(
1549 	buf_pool_t* buf_pool);
1550 
1551 /** The common buffer control block structure
1552 for compressed and uncompressed frames */
1553 
1554 /** Number of bits used for buffer page states. */
1555 #define BUF_PAGE_STATE_BITS	3
1556 
1557 class buf_page_t {
1558 public:
1559 	/** @name General fields
1560 	None of these bit-fields must be modified without holding
1561 	buf_page_get_mutex() [buf_block_t::mutex or
1562 	buf_pool->zip_mutex], since they can be stored in the same
1563 	machine word.  */
1564 	/* @{ */
1565 
1566 	/** Page id. */
1567 	page_id_t	id;
1568 
1569 	/** Page size. */
1570 	page_size_t	size;
1571 
1572 	/** Count of how manyfold this block is currently bufferfixed. */
1573 	ib_uint32_t	buf_fix_count;
1574 
1575 	/** type of pending I/O operation. */
1576 	buf_io_fix	io_fix;
1577 
1578 	/** Block state. @see buf_page_in_file */
1579 	buf_page_state	state;
1580 
1581 #ifndef UNIV_HOTBACKUP
1582 	unsigned	flush_type:2;	/*!< if this block is currently being
1583 					flushed to disk, this tells the
1584 					flush_type.
1585 					@see buf_flush_t */
1586 	unsigned	buf_pool_index:6;/*!< index number of the buffer pool
1587 					that this block belongs to */
1588 # if MAX_BUFFER_POOLS > 64
1589 #  error "MAX_BUFFER_POOLS > 64; redefine buf_pool_index:6"
1590 # endif
1591 	/* @} */
1592 #endif /* !UNIV_HOTBACKUP */
1593 	page_zip_des_t	zip;		/*!< compressed page; zip.data
1594 					(but not the data it points to) is
1595 					protected by buf_pool->zip_mutex;
1596 					state == BUF_BLOCK_ZIP_PAGE and
1597 					zip.data == NULL means an active
1598 					buf_pool->watch */
1599 #ifndef UNIV_HOTBACKUP
1600 	buf_page_t*	hash;		/*!< node used in chaining to
1601 					buf_pool->page_hash or
1602 					buf_pool->zip_hash */
1603 #ifdef UNIV_DEBUG
1604 	ibool		in_page_hash;	/*!< TRUE if in buf_pool->page_hash */
1605 	ibool		in_zip_hash;	/*!< TRUE if in buf_pool->zip_hash */
1606 #endif /* UNIV_DEBUG */
1607 
1608 	/** @name Page flushing fields
1609 	All these are protected by buf_pool->mutex. */
1610 	/* @{ */
1611 
1612 	UT_LIST_NODE_T(buf_page_t) list;
1613 					/*!< based on state, this is a
1614 					list node, protected by the
1615 					corresponding list mutex, in one of the
1616 					following lists in buf_pool:
1617 
1618 					- BUF_BLOCK_NOT_USED:	free, withdraw
1619 					- BUF_BLOCK_FILE_PAGE:	flush_list
1620 					- BUF_BLOCK_ZIP_DIRTY:	flush_list
1621 					- BUF_BLOCK_ZIP_PAGE:	zip_clean
1622 
1623 					The node pointers are protected by the
1624 					corresponding list mutex.
1625 
1626 					The contents of the list node
1627 					is undefined if !in_flush_list
1628 					&& state == BUF_BLOCK_FILE_PAGE,
1629 					or if state is one of
1630 					BUF_BLOCK_MEMORY,
1631 					BUF_BLOCK_REMOVE_HASH or
1632 					BUF_BLOCK_READY_IN_USE. */
1633 
1634 #ifdef UNIV_DEBUG
1635 	ibool		in_flush_list;	/*!< TRUE if in buf_pool->flush_list;
1636 					when buf_pool->flush_list_mutex is
1637 					free, the following should hold:
1638 					in_flush_list
1639 					== (state == BUF_BLOCK_FILE_PAGE
1640 					    || state == BUF_BLOCK_ZIP_DIRTY)
1641 					Writes to this field must be
1642 					covered by both block->mutex
1643 					and buf_pool->flush_list_mutex. Hence
1644 					reads can happen while holding
1645 					any one of the two mutexes */
1646 	ibool		in_free_list;	/*!< TRUE if in buf_pool->free; when
1647 					buf_pool->free_list_mutex is free, the
1648 					following should hold: in_free_list
1649 					== (state == BUF_BLOCK_NOT_USED) */
1650 #endif /* UNIV_DEBUG */
1651 
1652 	FlushObserver*	flush_observer;	/*!< flush observer */
1653 
1654 	lsn_t		newest_modification;
1655 					/*!< log sequence number of
1656 					the youngest modification to
1657 					this block, zero if not
1658 					modified. Protected by block
1659 					mutex */
1660 	lsn_t		oldest_modification;
1661 					/*!< log sequence number of
1662 					the START of the log entry
1663 					written of the oldest
1664 					modification to this block
1665 					which has not yet been flushed
1666 					on disk; zero if all
1667 					modifications are on disk.
1668 					Writes to this field must be
1669 					covered by both block->mutex
1670 					and buf_pool->flush_list_mutex. Hence
1671 					reads can happen while holding
1672 					any one of the two mutexes */
1673 	/* @} */
1674 	/** @name LRU replacement algorithm fields
1675 	These fields are protected by both buf_pool->LRU_list_mutex and the
1676 	block mutex. */
1677 	/* @{ */
1678 
1679 	UT_LIST_NODE_T(buf_page_t) LRU;
1680 					/*!< node of the LRU list */
1681 #ifdef UNIV_DEBUG
1682 	ibool		in_LRU_list;	/*!< TRUE if the page is in
1683 					the LRU list; used in
1684 					debugging */
1685 #endif /* UNIV_DEBUG */
1686 	unsigned	old:1;		/*!< TRUE if the block is in the old
1687 					blocks in buf_pool->LRU_old */
1688 	unsigned	freed_page_clock:31;/*!< the value of
1689 					buf_pool->freed_page_clock
1690 					when this block was the last
1691 					time put to the head of the
1692 					LRU list; a thread is allowed
1693 					to read this for heuristic
1694 					purposes without holding any
1695 					mutex or latch */
1696 	/* @} */
1697 	unsigned	access_time;	/*!< time of first access, or
1698 					0 if the block was never accessed
1699 					in the buffer pool. Protected by
1700 					block mutex */
1701 	bool		is_corrupt;
1702 	bool		encrypted;	/*!< page is still encrypted */
1703 # ifdef UNIV_DEBUG
1704 	ibool		file_page_was_freed;
1705 					/*!< this is set to TRUE when
1706 					fsp frees a page in buffer pool;
1707 					protected by buf_pool->zip_mutex
1708 					or buf_block_t::mutex. */
1709 # endif /* UNIV_DEBUG */
1710 #endif /* !UNIV_HOTBACKUP */
1711 };
1712 
1713 /** The buffer control block structure */
1714 
1715 struct buf_block_t{
1716 
1717 	/** @name General fields */
1718 	/* @{ */
1719 
1720 	buf_page_t	page;		/*!< page information; this must
1721 					be the first field, so that
1722 					buf_pool->page_hash can point
1723 					to buf_page_t or buf_block_t */
1724 	byte*		frame;		/*!< pointer to buffer frame which
1725 					is of size UNIV_PAGE_SIZE, and
1726 					aligned to an address divisible by
1727 					UNIV_PAGE_SIZE */
1728 #ifndef UNIV_HOTBACKUP
1729 	BPageLock	lock;		/*!< read-write lock of the buffer
1730 					frame */
1731 	UT_LIST_NODE_T(buf_block_t) unzip_LRU;
1732 					/*!< node of the decompressed LRU list;
1733 					a block is in the unzip_LRU list
1734 					if page.state == BUF_BLOCK_FILE_PAGE
1735 					and page.zip.data != NULL. Protected by
1736 					both LRU_list_mutex and the block
1737 					mutex. */
1738 #ifdef UNIV_DEBUG
1739 	ibool		in_unzip_LRU_list;/*!< TRUE if the page is in the
1740 					decompressed LRU list;
1741 					used in debugging */
1742 	ibool		in_withdraw_list;
1743 #endif /* UNIV_DEBUG */
1744 	unsigned	lock_hash_val:32;/*!< hashed value of the page address
1745 					in the record lock hash table;
1746 					protected by buf_block_t::lock
1747 					(or buf_block_t::mutex in
1748 					buf_page_get_gen(),
1749 					buf_page_init_for_read()
1750 					and buf_page_create()) */
1751 	/* @} */
1752 	/** @name Optimistic search field */
1753 	/* @{ */
1754 
1755 	ib_uint64_t	modify_clock;	/*!< this clock is incremented every
1756 					time a pointer to a record on the
1757 					page may become obsolete; this is
1758 					used in the optimistic cursor
1759 					positioning: if the modify clock has
1760 					not changed, we know that the pointer
1761 					is still valid; this field may be
1762 					changed if the thread (1) owns the LRU
1763 					list mutex and the page is not
1764 					bufferfixed, or (2) the thread has an
1765 					x-latch on the block, or (3) the block
1766 					must belong to an intrinsic table */
1767 	/* @} */
1768 	/** @name Hash search fields (unprotected)
1769 	NOTE that these fields are NOT protected by any semaphore! */
1770 	/* @{ */
1771 
1772 	ulint		n_hash_helps;	/*!< counter which controls building
1773 					of a new hash index for the page */
1774 	volatile ulint	n_bytes;	/*!< recommended prefix length for hash
1775 					search: number of bytes in
1776 					an incomplete last field */
1777 	volatile ulint	n_fields;	/*!< recommended prefix length for hash
1778 					search: number of full fields */
1779 	volatile bool	left_side;	/*!< true or false, depending on
1780 					whether the leftmost record of several
1781 					records with the same prefix should be
1782 					indexed in the hash index */
1783 	/* @} */
1784 
1785 	/** @name Hash search fields
1786 	These 5 fields may only be modified when:
1787 	we are holding the appropriate x-latch in btr_search_latches[], and
1788 	one of the following holds:
1789 	(1) the block state is BUF_BLOCK_FILE_PAGE, and
1790 	we are holding an s-latch or x-latch on buf_block_t::lock, or
1791 	(2) buf_block_t::buf_fix_count == 0, or
1792 	(3) the block state is BUF_BLOCK_REMOVE_HASH.
1793 
1794 	An exception to this is when we init or create a page
1795 	in the buffer pool in buf0buf.cc.
1796 
1797 	Another exception for buf_pool_clear_hash_index() is that
1798 	assigning block->index = NULL (and block->n_pointers = 0)
1799 	is allowed whenever btr_search_own_all(RW_LOCK_X).
1800 
1801 	Another exception is that ha_insert_for_fold_func() may
1802 	decrement n_pointers without holding the appropriate latch
1803 	in btr_search_latches[]. Thus, n_pointers must be
1804 	protected by atomic memory access.
1805 
1806 	This implies that the fields may be read without race
1807 	condition whenever any of the following hold:
1808 	- the btr_search_latches[] s-latch or x-latch is being held, or
1809 	- the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH,
1810 	and holding some latch prevents the state from changing to that.
1811 
1812 	Some use of assert_block_ahi_empty() or assert_block_ahi_valid()
1813 	is prone to race conditions while buf_pool_clear_hash_index() is
1814 	executing (the adaptive hash index is being disabled). Such use
1815 	is explicitly commented. */
1816 
1817 	/* @{ */
1818 
1819 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
1820 	ulint		n_pointers;	/*!< used in debugging: the number of
1821 					pointers in the adaptive hash index
1822 					pointing to this frame;
1823 					protected by atomic memory access
1824 					or btr_search_own_all(). */
1825 # define assert_block_ahi_empty(block)					\
1826 	ut_a(os_atomic_increment_ulint(&(block)->n_pointers, 0) == 0)
1827 # define assert_block_ahi_empty_on_init(block) do {			\
1828 	UNIV_MEM_VALID(&(block)->n_pointers, sizeof (block)->n_pointers); \
1829 	assert_block_ahi_empty(block);					\
1830 } while (0)
1831 # define assert_block_ahi_valid(block)					\
1832 	ut_a((block)->index						\
1833 	     || os_atomic_increment_ulint(&(block)->n_pointers, 0) == 0)
1834 #else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
1835 # define assert_block_ahi_empty(block) /* nothing */
1836 # define assert_block_ahi_empty_on_init(block) /* nothing */
1837 # define assert_block_ahi_valid(block) /* nothing */
1838 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
1839 	unsigned	curr_n_fields:10;/*!< prefix length for hash indexing:
1840 					number of full fields */
1841 	unsigned	curr_n_bytes:15;/*!< number of bytes in hash
1842 					indexing */
1843 	unsigned	curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
1844 	dict_index_t*	index;		/*!< Index for which the
1845 					adaptive hash index has been
1846 					created, or NULL if the page
1847 					does not exist in the
1848 					index. Note that it does not
1849 					guarantee that the index is
1850 					complete, though: there may
1851 					have been hash collisions,
1852 					record deletions, etc. */
1853 	/* @} */
1854 	bool		made_dirty_with_no_latch;
1855 					/*!< true if block has been made dirty
1856 					without acquiring X/SX latch as the
1857 					block belongs to temporary tablespace
1858 					and block is always accessed by a
1859 					single thread. */
1860 	bool		skip_flush_check;
1861 					/*!< Skip check in buf_dblwr_check_block
1862 					during bulk load, protected by lock.*/
1863 # ifdef UNIV_DEBUG
1864 	/** @name Debug fields */
1865 	/* @{ */
1866 	rw_lock_t	debug_latch;	/*!< in the debug version, each thread
1867 					which bufferfixes the block acquires
1868 					an s-latch here; so we can use the
1869 					debug utilities in sync0rw */
1870 	/* @} */
1871 # endif
1872 	BPageMutex	mutex;		/*!< mutex protecting this block:
1873 					state (also protected by the buffer
1874 					pool mutex), io_fix, buf_fix_count,
1875 					and accessed; we introduce this new
1876 					mutex in InnoDB-5.1 to relieve
1877 					contention on the buffer pool mutex */
1878 #endif /* !UNIV_HOTBACKUP */
1879 };
1880 
1881 /** Check if a buf_block_t object is in a valid state
1882 @param block buffer block
1883 @return TRUE if valid */
1884 #define buf_block_state_valid(block)				\
1885 (buf_block_get_state(block) >= BUF_BLOCK_NOT_USED		\
1886  && (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH))
1887 
1888 #ifndef UNIV_HOTBACKUP
1889 /**********************************************************************//**
1890 Compute the hash fold value for blocks in buf_pool->zip_hash. */
1891 /* @{ */
1892 #define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
1893 #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
1894 #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
1895 /* @} */
1896 
1897 /** A "Hazard Pointer" class used to iterate over page lists
1898 inside the buffer pool. A hazard pointer is a buf_page_t pointer
1899 which we intend to iterate over next and we want it remain valid
1900 even after we release the buffer pool mutex. */
1901 class HazardPointer {
1902 
1903 public:
1904 	/** Constructor
1905 	@param buf_pool buffer pool instance
1906 	@param mutex	mutex that is protecting the hp. */
HazardPointer(const buf_pool_t * buf_pool,const ib_mutex_t * mutex)1907 	HazardPointer(const buf_pool_t* buf_pool, const ib_mutex_t* mutex)
1908 		:
1909 		m_buf_pool(buf_pool)
1910 #ifdef UNIV_DEBUG
1911 		, m_mutex(mutex)
1912 #endif /* UNIV_DEBUG */
1913 		, m_hp() {}
1914 
1915 	/** Destructor */
~HazardPointer()1916 	virtual ~HazardPointer() {}
1917 
1918 	/** Get current value */
get()1919 	buf_page_t* get() const
1920 	{
1921 		ut_ad(mutex_own(m_mutex));
1922 		return(m_hp);
1923 	}
1924 
1925 	/** Set current value
1926 	@param bpage	buffer block to be set as hp */
1927 	void set(buf_page_t* bpage);
1928 
1929 	/** Checks if a bpage is the hp
1930 	@param bpage	buffer block to be compared
1931 	@return true if it is hp */
1932 	bool is_hp(const buf_page_t* bpage);
1933 
1934 	/** Adjust the value of hp. This happens when some
1935 	other thread working on the same list attempts to
1936 	remove the hp from the list. Must be implemented
1937 	by the derived classes.
1938 	@param bpage	buffer block to be compared */
1939 	virtual void adjust(const buf_page_t*) = 0;
1940 
1941 	/** Adjust the value of hp for moving. This happens
1942 	when some other thread working on the same list
1943 	attempts to relocate the hp of the page.
1944 	@param bpage	buffer block to be compared
1945 	@param dpage	buffer block to be moved to */
1946 	void move(const buf_page_t *bpage, buf_page_t *dpage);
1947 
1948 protected:
1949 	/** Disable copying */
1950 	HazardPointer(const HazardPointer&);
1951 	HazardPointer& operator=(const HazardPointer&);
1952 
1953 	/** Buffer pool instance */
1954 	const buf_pool_t*	m_buf_pool;
1955 
1956 #ifdef UNIV_DEBUG
1957 	/** mutex that protects access to the m_hp. */
1958 	const ib_mutex_t*	m_mutex;
1959 #endif /* UNIV_DEBUG */
1960 
1961 	/** hazard pointer. */
1962 	buf_page_t*		m_hp;
1963 };
1964 
1965 /** Class implementing buf_pool->flush_list hazard pointer */
1966 class FlushHp: public HazardPointer {
1967 
1968 public:
1969 	/** Constructor
1970 	@param buf_pool buffer pool instance
1971 	@param mutex	mutex that is protecting the hp. */
FlushHp(const buf_pool_t * buf_pool,const ib_mutex_t * mutex)1972 	FlushHp(const buf_pool_t* buf_pool, const ib_mutex_t* mutex)
1973 		:
1974 		HazardPointer(buf_pool, mutex) {}
1975 
1976 	/** Destructor */
~FlushHp()1977 	virtual ~FlushHp() {}
1978 
1979 	/** Adjust the value of hp. This happens when some
1980 	other thread working on the same list attempts to
1981 	remove the hp from the list.
1982 	@param bpage	buffer block to be compared */
1983 	void adjust(const buf_page_t* bpage);
1984 };
1985 
1986 /** Class implementing buf_pool->LRU hazard pointer */
1987 class LRUHp: public HazardPointer {
1988 
1989 public:
1990 	/** Constructor
1991 	@param buf_pool buffer pool instance
1992 	@param mutex	mutex that is protecting the hp. */
LRUHp(const buf_pool_t * buf_pool,const ib_mutex_t * mutex)1993 	LRUHp(const buf_pool_t* buf_pool, const ib_mutex_t* mutex)
1994 		:
1995 		HazardPointer(buf_pool, mutex) {}
1996 
1997 	/** Destructor */
~LRUHp()1998 	virtual ~LRUHp() {}
1999 
2000 	/** Adjust the value of hp. This happens when some
2001 	other thread working on the same list attempts to
2002 	remove the hp from the list.
2003 	@param bpage	buffer block to be compared */
2004 	void adjust(const buf_page_t* bpage);
2005 };
2006 
2007 /** Special purpose iterators to be used when scanning the LRU list.
2008 The idea is that when one thread finishes the scan it leaves the
2009 itr in that position and the other thread can start scan from
2010 there */
2011 class LRUItr: public LRUHp {
2012 
2013 public:
2014 	/** Constructor
2015 	@param buf_pool buffer pool instance
2016 	@param mutex	mutex that is protecting the hp. */
LRUItr(const buf_pool_t * buf_pool,const ib_mutex_t * mutex)2017 	LRUItr(const buf_pool_t* buf_pool, const ib_mutex_t* mutex)
2018 		:
2019 		LRUHp(buf_pool, mutex) {}
2020 
2021 	/** Destructor */
~LRUItr()2022 	virtual ~LRUItr() {}
2023 
2024 	/** Selects from where to start a scan. If we have scanned
2025 	too deep into the LRU list it resets the value to the tail
2026 	of the LRU list.
2027 	@return buf_page_t from where to start scan. */
2028 	buf_page_t* start();
2029 };
2030 
2031 /** Struct that is embedded in the free zip blocks */
2032 struct buf_buddy_free_t {
2033 	union {
2034 		ulint	size;	/*!< size of the block */
2035 		byte	bytes[FIL_PAGE_DATA];
2036 				/*!< stamp[FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID]
2037 				== BUF_BUDDY_FREE_STAMP denotes a free
2038 				block. If the space_id field of buddy
2039 				block != BUF_BUDDY_FREE_STAMP, the block
2040 				is not in any zip_free list. If the
2041 				space_id is BUF_BUDDY_FREE_STAMP then
2042 				stamp[0] will contain the
2043 				buddy block size. */
2044 	} stamp;
2045 
2046 	buf_page_t	bpage;	/*!< Embedded bpage descriptor */
2047 	UT_LIST_NODE_T(buf_buddy_free_t) list;
2048 				/*!< Node of zip_free list */
2049 };
2050 
2051 /** @brief The buffer pool statistics structure. */
2052 struct buf_pool_stat_t{
2053 	ulint	n_page_gets;	/*!< number of page gets performed;
2054 				also successful searches through
2055 				the adaptive hash index are
2056 				counted as page gets; this field
2057 				is NOT protected by the buffer
2058 				pool mutex */
2059 	ulint	n_pages_read;	/*!< number of read operations. Accessed
2060 				atomically. */
2061 	ulint	n_pages_written;/*!< number of write operations. Accessed
2062 				atomically. */
2063 	ulint	n_pages_created;/*!< number of pages created
2064 				in the pool with no read. Accessed
2065 				atomically. */
2066 	ulint	n_ra_pages_read_rnd;/*!< number of pages read in
2067 				as part of random read ahead. Not protected. */
2068 	ulint	n_ra_pages_read;/*!< number of pages read in
2069 				as part of read ahead. Not protected. */
2070 	ulint	n_ra_pages_evicted;/*!< number of read ahead
2071 				pages that are evicted without
2072 				being accessed. Protected by LRU_list_mutex. */
2073 	ulint	n_pages_made_young; /*!< number of pages made young, in
2074 				calls to buf_LRU_make_block_young(). Protected
2075 				by LRU_list_mutex. */
2076 	ulint	n_pages_not_made_young; /*!< number of pages not made
2077 				young because the first access
2078 				was not long enough ago, in
2079 				buf_page_peek_if_too_old(). Not protected. */
2080 	ulint	LRU_bytes;	/*!< LRU size in bytes. Protected by
2081 				LRU_list_mutex. */
2082 	ulint	flush_list_bytes;/*!< flush_list size in bytes.
2083 				Protected by flush_list_mutex */
2084 	ulint	buf_lru_flush_page_count;
2085 };
2086 
2087 /** Statistics of buddy blocks of a given size. */
2088 struct buf_buddy_stat_t {
2089 	/** Number of blocks allocated from the buddy system. */
2090 	ulint		used;
2091 	/** Number of blocks relocated by the buddy system. */
2092 	ib_uint64_t	relocated;
2093 	/** Total duration of block relocations, in microseconds. */
2094 	ib_uint64_t	relocated_usec;
2095 };
2096 
2097 /** @brief The buffer pool structure.
2098 
2099 NOTE! The definition appears here only for other modules of this
2100 directory (buf) to see it. Do not use from outside! */
2101 
2102 struct buf_pool_t{
2103 
2104 	/** @name General fields */
2105 	/* @{ */
2106 	BufListMutex	chunks_mutex; /*!< protects (de)allocation of chunks:
2107 					- changes to chunks, n_chunks are performed
2108 					while holding this latch,
2109 					- reading buf_pool_should_madvise requires
2110 					holding this latch for any buf_pool_t
2111 					- writing to buf_pool_should_madvise requires
2112 					holding these latches for all buf_pool_t-s */
2113 	BufListMutex	LRU_list_mutex;	 /*!< LRU list mutex */
2114 	BufListMutex	free_list_mutex; /*!< free and withdraw list mutex */
2115 	BufListMutex	zip_free_mutex;	 /*!< buddy allocator mutex */
2116 	BufListMutex	zip_hash_mutex;	 /*!< zip_hash mutex */
2117 	ib_mutex_t	flush_state_mutex; /*!< Flush state protection
2118 					mutex */
2119 	BufPoolZipMutex	zip_mutex;	/*!< Zip mutex of this buffer
2120 					pool instance, protects compressed
2121 					only pages (of type buf_page_t, not
2122 					buf_block_t */
2123 	ulint		instance_no;	/*!< Array index of this buffer
2124 					pool instance */
2125 	ulint		curr_pool_size;	/*!< Current pool size in bytes */
2126 	ulint		LRU_old_ratio;  /*!< Reserve this much of the buffer
2127 					pool for "old" blocks */
2128 #ifdef UNIV_DEBUG
2129 	ulint		buddy_n_frames; /*!< Number of frames allocated from
2130 					the buffer pool to the buddy system.
2131 					Protected by zip_hash_mutex. */
2132 #endif
2133 	ut_allocator<unsigned char>	allocator;	/*!< Allocator used for
2134 					allocating memory for the the "chunks"
2135 					member. */
2136 	volatile ulint	n_chunks;	/*!< number of buffer pool chunks */
2137 	volatile ulint	n_chunks_new;	/*!< new number of buffer pool chunks */
2138 	buf_chunk_t*	chunks;		/*!< buffer pool chunks */
2139 	buf_chunk_t*	chunks_old;	/*!< old buffer pool chunks to be freed
2140 					after resizing buffer pool */
2141 	ulint		curr_size;	/*!< current pool size in pages */
2142 	ulint		old_size;	/*!< previous pool size in pages */
2143 	ulint		read_ahead_area;/*!< size in pages of the area which
2144 					the read-ahead algorithms read if
2145 					invoked */
2146 	hash_table_t*	page_hash;	/*!< hash table of buf_page_t or
2147 					buf_block_t file pages,
2148 					buf_page_in_file() == TRUE,
2149 					indexed by (space_id, offset).
2150 					page_hash is protected by an
2151 					array of mutexes. */
2152 	hash_table_t*	zip_hash;	/*!< hash table of buf_block_t blocks
2153 					whose frames are allocated to the
2154 					zip buddy system,
2155 					indexed by block->frame */
2156 	ulint		n_pend_reads;	/*!< number of pending read
2157 					operations. Accessed atomically */
2158 	ulint		n_pend_unzip;	/*!< number of pending decompressions.
2159                                         Accessed atomically. */
2160 
2161 	ib_time_monotonic_t		last_printout_time;
2162 					/*!< when buf_print_io was last time
2163 					called. Accesses not protected. */
2164 	buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
2165 					/*!< Statistics of buddy system,
2166 					indexed by block size. Protected by
2167 					zip_free mutex, except for the used
2168 					field, which is also accessed
2169 					atomically */
2170 	buf_pool_stat_t	stat;		/*!< current statistics */
2171 	buf_pool_stat_t	old_stat;	/*!< old statistics */
2172 
2173 	/* @} */
2174 
2175 	/** @name Page flushing algorithm fields */
2176 
2177 	/* @{ */
2178 
2179 	BufListMutex	flush_list_mutex;/*!< mutex protecting the
2180 					flush list access. This mutex
2181 					protects flush_list, flush_rbt
2182 					and bpage::list pointers when
2183 					the bpage is on flush_list. It
2184 					also protects writes to
2185 					bpage::oldest_modification and
2186 					flush_list_hp */
2187 	FlushHp			flush_hp;/*!< "hazard pointer"
2188 					used during scan of flush_list
2189 					while doing flush list batch.
2190 					Protected by flush_list_mutex */
2191 	FlushHp			oldest_hp;/*!< entry pointer to scan the oldest
2192 					page except for system temporary */
2193 	UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
2194 					/*!< base node of the modified block
2195 					list */
2196 	ibool		init_flush[BUF_FLUSH_N_TYPES];
2197 					/*!< this is TRUE when a flush of the
2198 					given type is being initialized.
2199 					Protected by flush_state_mutex. */
2200 	ulint		n_flush[BUF_FLUSH_N_TYPES];
2201 					/*!< this is the number of pending
2202 					writes in the given flush type.
2203 					Protected by flush_state_mutex. */
2204 	os_event_t	no_flush[BUF_FLUSH_N_TYPES];
2205 					/*!< this is in the set state
2206 					when there is no flush batch
2207 					of the given type running. Protected by
2208 					flush_state_mutex. */
2209 	ib_rbt_t*	flush_rbt;	/*!< a red-black tree is used
2210 					exclusively during recovery to
2211 					speed up insertions in the
2212 					flush_list. This tree contains
2213 					blocks in order of
2214 					oldest_modification LSN and is
2215 					kept in sync with the
2216 					flush_list.
2217 					Each member of the tree MUST
2218 					also be on the flush_list.
2219 					This tree is relevant only in
2220 					recovery and is set to NULL
2221 					once the recovery is over.
2222 					Protected by flush_list_mutex */
2223 	ulint		freed_page_clock;/*!< a sequence number used
2224 					to count the number of buffer
2225 					blocks removed from the end of
2226 					the LRU list; NOTE that this
2227 					counter may wrap around at 4
2228 					billion! A thread is allowed
2229 					to read this for heuristic
2230 					purposes without holding any
2231 					mutex or latch. For non-heuristic
2232 					purposes protected by LRU_list_mutex */
2233 	ibool		try_LRU_scan;	/*!< Set to FALSE when an LRU
2234 					scan for free block fails. This
2235 					flag is used to avoid repeated
2236 					scans of LRU list when we know
2237 					that there is no free block
2238 					available in the scan depth for
2239 					eviction. Set to TRUE whenever
2240 					we flush a batch from the
2241 					buffer pool. Accessed protected by
2242 					memory barriers. */
2243 	/* @} */
2244 
2245 	/** @name LRU replacement algorithm fields */
2246 	/* @{ */
2247 
2248 	UT_LIST_BASE_NODE_T(buf_page_t) free;
2249 					/*!< base node of the free
2250 					block list */
2251 
2252 	UT_LIST_BASE_NODE_T(buf_page_t) withdraw;
2253 					/*!< base node of the withdraw
2254 					block list. It is only used during
2255 					shrinking buffer pool size, not to
2256 					reuse the blocks will be removed.
2257 					Protected by free_list_mutex */
2258 
2259 	ulint		withdraw_target;/*!< target length of withdraw
2260 					block list, when withdrawing */
2261 
2262 	/** "hazard pointer" used during scan of LRU while doing
2263 	LRU list batch.  Protected by buf_pool::LRU_list_mutex */
2264 	LRUHp		lru_hp;
2265 
2266 	/** Iterator used to scan the LRU list when searching for
2267 	replacable victim. Protected by buf_pool::LRU_list_mutex. */
2268 	LRUItr		lru_scan_itr;
2269 
2270 	/** Iterator used to scan the LRU list when searching for
2271 	single page flushing victim.  Protected by buf_pool::LRU_list_mutex. */
2272 	LRUItr		single_scan_itr;
2273 
2274 	UT_LIST_BASE_NODE_T(buf_page_t) LRU;
2275 					/*!< base node of the LRU list */
2276 
2277 	buf_page_t*	LRU_old;	/*!< pointer to the about
2278 					LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
2279 					oldest blocks in the LRU list;
2280 					NULL if LRU length less than
2281 					BUF_LRU_OLD_MIN_LEN;
2282 					NOTE: when LRU_old != NULL, its length
2283 					should always equal LRU_old_len */
2284 	ulint		LRU_old_len;	/*!< length of the LRU list from
2285 					the block to which LRU_old points
2286 					onward, including that block;
2287 					see buf0lru.cc for the restrictions
2288 					on this value; 0 if LRU_old == NULL;
2289 					NOTE: LRU_old_len must be adjusted
2290 					whenever LRU_old shrinks or grows! */
2291 
2292 	UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
2293 					/*!< base node of the
2294 					unzip_LRU list. The list is protected
2295 					by LRU_list_mutex. */
2296 
2297 	/* @} */
2298 	/** @name Buddy allocator fields
2299 	The buddy allocator is used for allocating compressed page
2300 	frames and buf_page_t descriptors of blocks that exist
2301 	in the buffer pool only in compressed form. */
2302 	/* @{ */
2303 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2304 	UT_LIST_BASE_NODE_T(buf_page_t)	zip_clean;
2305 					/*!< unmodified compressed pages */
2306 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
2307 	UT_LIST_BASE_NODE_T(buf_buddy_free_t) zip_free[BUF_BUDDY_SIZES_MAX];
2308 					/*!< buddy free lists */
2309 
2310 	buf_page_t*			watch;
2311 					/*!< Sentinel records for buffer
2312 					pool watches. Scanning the array is
2313 					protected by taking all page_hash
2314 					latches in X. Updating or reading an
2315 					individual watch page is protected by
2316 					a corresponding individual page_hash
2317 					latch. */
2318 
2319 	/** A wrapper for buf_pool_t::allocator.alocate_large which also advices the
2320 	OS that this chunk should not be dumped to a core file if that was requested.
2321 	Emits a warning to the log and disables @@global.core_file if advising was
2322 	requested but could not be performed, but still return true as the allocation
2323 	itself succeeded.
2324 	@param[in]	  mem_size  number of bytes to allocate
2325 	@param[in/out]  chunk     mem and mem_pfx fields of this chunk will be updated
2326 	to contain information about allocated memory region
2327 	@return true if allocated successfully */
2328 	bool
2329 	allocate_chunk(ulonglong mem_size, buf_chunk_t *chunk, bool populate);
2330 
2331 	/** A wrapper for buf_pool_t::allocator.deallocate_large which also advices
2332 	the OS that this chunk can be dumped to a core file.
2333 	Emits a warning to the log and disables @@global.core_file if advising was
2334 	requested but could not be performed.
2335 	@param[in]  chunk   mem and mem_pfx fields of this chunk will be used to
2336 	locate the memory region to free */
2337 	void
2338 	deallocate_chunk(buf_chunk_t *chunk);
2339 
2340 	/** Advices the OS that all chunks in this buffer pool instance can be dumped
2341 	to a core file.
2342 	Emits a warning to the log if could not succeed.
2343 	@return true if succeeded, false if no OS support or failed */
2344 	bool
2345 	madvise_dump();
2346 
2347 	/** Advices the OS that all chunks in this buffer pool instance should not
2348 	be dumped to a core file.
2349 	Emits a warning to the log if could not succeed.
2350 	@return true if succeeded, false if no OS support or failed */
2351 	bool
2352 	madvise_dont_dump();
2353 
2354 #if BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN
2355 # error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN"
2356 #endif
2357 	/* @} */
2358 };
2359 
2360 /** Print the given buf_pool_t object.
2361 @param[in,out]	out		the output stream
2362 @param[in]	buf_pool	the buf_pool_t object to be printed
2363 @return the output stream */
2364 std::ostream&
2365 operator<<(
2366         std::ostream&		out,
2367         const buf_pool_t&	buf_pool);
2368 
2369 /** @name Accessors for buffer pool mutexes
2370 Use these instead of accessing buffer pool mutexes directly. */
2371 /* @{ */
2372 
2373 /** Test if flush list mutex is owned. */
2374 #define buf_flush_list_mutex_own(b) mutex_own(&(b)->flush_list_mutex)
2375 
2376 /** Acquire the flush list mutex. */
2377 #define buf_flush_list_mutex_enter(b) do {	\
2378 	mutex_enter(&(b)->flush_list_mutex);	\
2379 } while (0)
2380 /** Release the flush list mutex. */
2381 # define buf_flush_list_mutex_exit(b) do {	\
2382 	mutex_exit(&(b)->flush_list_mutex);	\
2383 } while (0)
2384 
2385 
2386 /** Test if block->mutex is owned. */
2387 #define buf_page_mutex_own(b)	(b)->mutex.is_owned()
2388 
2389 /** Acquire the block->mutex. */
2390 #define buf_page_mutex_enter(b) do {			\
2391 	mutex_enter(&(b)->mutex);			\
2392 } while (0)
2393 
2394 /** Release the block->mutex. */
2395 #define buf_page_mutex_exit(b) do {			\
2396 	(b)->mutex.exit();				\
2397 } while (0)
2398 
2399 
2400 /** Get appropriate page_hash_lock. */
2401 # define buf_page_hash_lock_get(buf_pool, page_id)	\
2402 	hash_get_lock((buf_pool)->page_hash, (page_id).fold())
2403 
2404 /** If not appropriate page_hash_lock, relock until appropriate. */
2405 # define buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id)\
2406 	hash_lock_s_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold())
2407 
2408 # define buf_page_hash_lock_x_confirm(hash_lock, buf_pool, page_id)\
2409 	hash_lock_x_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold())
2410 
2411 #ifdef UNIV_DEBUG
2412 /** Test if page_hash lock is held in s-mode. */
2413 # define buf_page_hash_lock_held_s(buf_pool, bpage)	\
2414 	rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_S)
2415 
2416 /** Test if page_hash lock is held in x-mode. */
2417 # define buf_page_hash_lock_held_x(buf_pool, bpage)	\
2418 	rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_X)
2419 
2420 /** Test if page_hash lock is held in x or s-mode. */
2421 # define buf_page_hash_lock_held_s_or_x(buf_pool, bpage)\
2422 	(buf_page_hash_lock_held_s((buf_pool), (bpage))	\
2423 	 || buf_page_hash_lock_held_x((buf_pool), (bpage)))
2424 
2425 # define buf_block_hash_lock_held_s(buf_pool, block)	\
2426 	buf_page_hash_lock_held_s((buf_pool), &(block)->page)
2427 
2428 # define buf_block_hash_lock_held_x(buf_pool, block)	\
2429 	buf_page_hash_lock_held_x((buf_pool), &(block)->page)
2430 
2431 # define buf_block_hash_lock_held_s_or_x(buf_pool, block)	\
2432 	buf_page_hash_lock_held_s_or_x((buf_pool), &(block)->page)
2433 #else /* UNIV_DEBUG */
2434 # define buf_page_hash_lock_held_s(b, p)	(TRUE)
2435 # define buf_page_hash_lock_held_x(b, p)	(TRUE)
2436 # define buf_page_hash_lock_held_s_or_x(b, p)	(TRUE)
2437 # define buf_block_hash_lock_held_s(b, p)	(TRUE)
2438 # define buf_block_hash_lock_held_x(b, p)	(TRUE)
2439 # define buf_block_hash_lock_held_s_or_x(b, p)	(TRUE)
2440 #endif /* UNIV_DEBUG */
2441 
2442 #endif /* !UNIV_HOTBACKUP */
2443 /* @} */
2444 
2445 /**********************************************************************
2446 Let us list the consistency conditions for different control block states.
2447 
2448 NOT_USED:	is in free list, not in LRU list, not in flush list, nor
2449 		page hash table
2450 READY_FOR_USE:	is not in free list, LRU list, or flush list, nor page
2451 		hash table
2452 MEMORY:		is not in free list, LRU list, or flush list, nor page
2453 		hash table
2454 FILE_PAGE:	space and offset are defined, is in page hash table
2455 		if io_fix == BUF_IO_WRITE,
2456 			pool: no_flush[flush_type] is in reset state,
2457 			pool: n_flush[flush_type] > 0
2458 
2459 		(1) if buf_fix_count == 0, then
2460 			is in LRU list, not in free list
2461 			is in flush list,
2462 				if and only if oldest_modification > 0
2463 			is x-locked,
2464 				if and only if io_fix == BUF_IO_READ
2465 			is s-locked,
2466 				if and only if io_fix == BUF_IO_WRITE
2467 
2468 		(2) if buf_fix_count > 0, then
2469 			is not in LRU list, not in free list
2470 			is in flush list,
2471 				if and only if oldest_modification > 0
2472 			if io_fix == BUF_IO_READ,
2473 				is x-locked
2474 			if io_fix == BUF_IO_WRITE,
2475 				is s-locked
2476 
2477 State transitions:
2478 
2479 NOT_USED => READY_FOR_USE
2480 READY_FOR_USE => MEMORY
2481 READY_FOR_USE => FILE_PAGE
2482 MEMORY => NOT_USED
2483 FILE_PAGE => NOT_USED	NOTE: This transition is allowed if and only if
2484 				(1) buf_fix_count == 0,
2485 				(2) oldest_modification == 0, and
2486 				(3) io_fix == 0.
2487 */
2488 
2489 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2490 /** Functor to validate the LRU list. */
2491 struct	CheckInLRUList {
operatorCheckInLRUList2492 	void	operator()(const buf_page_t* elem) const
2493 	{
2494 		ut_a(elem->in_LRU_list);
2495 	}
2496 
validateCheckInLRUList2497 	static void validate(const buf_pool_t* buf_pool)
2498 	{
2499 		CheckInLRUList	check;
2500 		ut_list_validate(buf_pool->LRU, check);
2501 	}
2502 };
2503 
2504 /** Functor to validate the LRU list. */
2505 struct	CheckInFreeList {
operatorCheckInFreeList2506 	void	operator()(const buf_page_t* elem) const
2507 	{
2508 		ut_a(elem->in_free_list);
2509 	}
2510 
validateCheckInFreeList2511 	static void validate(const buf_pool_t* buf_pool)
2512 	{
2513 		CheckInFreeList	check;
2514 		ut_list_validate(buf_pool->free, check);
2515 	}
2516 };
2517 
2518 struct	CheckUnzipLRUAndLRUList {
operatorCheckUnzipLRUAndLRUList2519 	void	operator()(const buf_block_t* elem) const
2520 	{
2521                 ut_a(elem->page.in_LRU_list);
2522                 ut_a(elem->in_unzip_LRU_list);
2523 	}
2524 
validateCheckUnzipLRUAndLRUList2525 	static void validate(const buf_pool_t* buf_pool)
2526 	{
2527 		CheckUnzipLRUAndLRUList	check;
2528 		ut_list_validate(buf_pool->unzip_LRU, check);
2529 	}
2530 };
2531 #endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */
2532 
2533 #ifndef UNIV_NONINL
2534 #include "buf0buf.ic"
2535 #endif
2536 #endif /* !UNIV_INNOCHECKSUM */
2537 
2538 #endif
2539