1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file include/buf0buf.h
28  The database buffer pool high-level routines
29 
30  Created 11/5/1995 Heikki Tuuri
31  *******************************************************/
32 
33 #ifndef buf0buf_h
34 #define buf0buf_h
35 
36 #include "buf0types.h"
37 #include "fil0fil.h"
38 #include "hash0hash.h"
39 #include "log0log.h"
40 #include "mtr0types.h"
41 #include "os0proc.h"
42 #include "page0types.h"
43 #include "srv0srv.h"
44 #include "univ.i"
45 #include "ut0byte.h"
46 #include "ut0rbt.h"
47 
48 #include "buf/buf.h"
49 
50 #include <ostream>
51 
52 // Forward declaration
53 struct fil_addr_t;
54 
55 /** @name Modes for buf_page_get_gen */
56 /* @{ */
57 enum class Page_fetch {
58   /** Get always */
59   NORMAL,
60 
61   /** Same as NORMAL, but hint that the fetch is part of a large scan.
62   Try not to flood the buffer pool with pages that may not be accessed again
63   any time soon. */
64   SCAN,
65 
66   /** get if in pool */
67   IF_IN_POOL,
68 
69   /** get if in pool, do not make the block young in the LRU list */
70   PEEK_IF_IN_POOL,
71 
72   /** get and bufferfix, but set no latch; we have separated this case, because
73   it is error-prone programming not to set a latch, and it  should be used with
74   care */
75   NO_LATCH,
76 
77   /** Get the page only if it's in the buffer pool, if not then set a watch on
78   the page. */
79   IF_IN_POOL_OR_WATCH,
80 
81   /** Like Page_fetch::NORMAL, but do not mind if the file page has been
82   freed. */
83   POSSIBLY_FREED
84 };
85 /* @} */
86 
87 /** @name Modes for buf_page_get_known_nowait */
88 
89 /* @{ */
90 enum class Cache_hint {
91   /** Move the block to the start of the LRU list if there is a danger that the
92   block would drift out of the buffer  pool*/
93   MAKE_YOUNG = 51,
94 
95   /** Preserve the current LRU position of the block. */
96   KEEP_OLD = 52
97 };
98 
99 /* @} */
100 
101 /** Number of bits to representing a buffer pool ID */
102 constexpr ulint MAX_BUFFER_POOLS_BITS = 6;
103 
104 /** The maximum number of buffer pools that can be defined */
105 constexpr ulint MAX_BUFFER_POOLS = (1 << MAX_BUFFER_POOLS_BITS);
106 
107 /** Maximum number of concurrent buffer pool watches */
108 #define BUF_POOL_WATCH_SIZE (srv_n_purge_threads + 1)
109 
110 /** The maximum number of page_hash locks */
111 constexpr ulint MAX_PAGE_HASH_LOCKS = 1024;
112 
113 /** The buffer pools of the database */
114 extern buf_pool_t *buf_pool_ptr;
115 
116 /** true when withdrawing buffer pool pages might cause page relocation */
117 extern volatile bool buf_pool_withdrawing;
118 
119 /** the clock is incremented every time a pointer to a page may become
120 obsolete */
121 extern volatile ulint buf_withdraw_clock;
122 
123 #ifdef UNIV_HOTBACKUP
124 /** first block, for --apply-log */
125 extern buf_block_t *back_block1;
126 /** second block, for page reorganize */
127 extern buf_block_t *back_block2;
128 #endif /* UNIV_HOTBACKUP */
129 
130 /** @brief States of a control block
131 @see buf_page_t
132 
133 The enumeration values must be 0..7. */
134 enum buf_page_state {
135   BUF_BLOCK_POOL_WATCH, /*!< a sentinel for the buffer pool
136                         watch, element of buf_pool->watch[] */
137   BUF_BLOCK_ZIP_PAGE,   /*!< contains a clean
138                         compressed page */
139   BUF_BLOCK_ZIP_DIRTY,  /*!< contains a compressed
140                         page that is in the
141                         buf_pool->flush_list */
142 
143   BUF_BLOCK_NOT_USED,      /*!< is in the free list;
144                            must be after the BUF_BLOCK_ZIP_
145                            constants for compressed-only pages
146                            @see buf_block_state_valid() */
147   BUF_BLOCK_READY_FOR_USE, /*!< when buf_LRU_get_free_block
148                            returns a block, it is in this state */
149   BUF_BLOCK_FILE_PAGE,     /*!< contains a buffered file page */
150   BUF_BLOCK_MEMORY,        /*!< contains some main memory
151                            object */
152   BUF_BLOCK_REMOVE_HASH    /*!< hash index should be removed
153                            before putting to the free list */
154 };
155 
156 /** This structure defines information we will fetch from each buffer pool. It
157 will be used to print table IO stats */
158 struct buf_pool_info_t {
159   /* General buffer pool info */
160   ulint pool_unique_id;              /*!< Buffer Pool ID */
161   ulint pool_size;                   /*!< Buffer Pool size in pages */
162   ulint lru_len;                     /*!< Length of buf_pool->LRU */
163   ulint old_lru_len;                 /*!< buf_pool->LRU_old_len */
164   ulint free_list_len;               /*!< Length of buf_pool->free list */
165   ulint flush_list_len;              /*!< Length of buf_pool->flush_list */
166   ulint n_pend_unzip;                /*!< buf_pool->n_pend_unzip, pages
167                                      pending decompress */
168   ulint n_pend_reads;                /*!< buf_pool->n_pend_reads, pages
169                                      pending read */
170   ulint n_pending_flush_lru;         /*!< Pages pending flush in LRU */
171   ulint n_pending_flush_single_page; /*!< Pages pending to be
172                                  flushed as part of single page
173                                  flushes issued by various user
174                                  threads */
175   ulint n_pending_flush_list;        /*!< Pages pending flush in FLUSH
176                                      LIST */
177   ulint n_pages_made_young;          /*!< number of pages made young */
178   ulint n_pages_not_made_young;      /*!< number of pages not made young */
179   ulint n_pages_read;                /*!< buf_pool->n_pages_read */
180   ulint n_pages_created;             /*!< buf_pool->n_pages_created */
181   ulint n_pages_written;             /*!< buf_pool->n_pages_written */
182   ulint n_page_gets;                 /*!< buf_pool->n_page_gets */
183   ulint n_ra_pages_read_rnd;         /*!< buf_pool->n_ra_pages_read_rnd,
184                                      number of pages readahead */
185   ulint n_ra_pages_read;             /*!< buf_pool->n_ra_pages_read, number
186                                      of pages readahead */
187   ulint n_ra_pages_evicted;          /*!< buf_pool->n_ra_pages_evicted,
188                                      number of readahead pages evicted
189                                      without access */
190   ulint n_page_get_delta;            /*!< num of buffer pool page gets since
191                                      last printout */
192 
193   /* Buffer pool access stats */
194   double page_made_young_rate;     /*!< page made young rate in pages
195                                    per second */
196   double page_not_made_young_rate; /*!< page not made young rate
197                                   in pages per second */
198   double pages_read_rate;          /*!< num of pages read per second */
199   double pages_created_rate;       /*!< num of pages create per second */
200   double pages_written_rate;       /*!< num of  pages written per second */
201   ulint page_read_delta;           /*!< num of pages read since last
202                                    printout */
203   ulint young_making_delta;        /*!< num of pages made young since
204                                    last printout */
205   ulint not_young_making_delta;    /*!< num of pages not make young since
206                                    last printout */
207 
208   /* Statistics about read ahead algorithm.  */
209   double pages_readahead_rnd_rate; /*!< random readahead rate in pages per
210                                   second */
211   double pages_readahead_rate;     /*!< readahead rate in pages per
212                                    second */
213   double pages_evicted_rate;       /*!< rate of readahead page evicted
214                                    without access, in pages per second */
215 
216   /* Stats about LRU eviction */
217   ulint unzip_lru_len; /*!< length of buf_pool->unzip_LRU
218                        list */
219   /* Counters for LRU policy */
220   ulint io_sum;    /*!< buf_LRU_stat_sum.io */
221   ulint io_cur;    /*!< buf_LRU_stat_cur.io, num of IO
222                    for current interval */
223   ulint unzip_sum; /*!< buf_LRU_stat_sum.unzip */
224   ulint unzip_cur; /*!< buf_LRU_stat_cur.unzip, num
225                    pages decompressed in current
226                    interval */
227 };
228 
229 /** The occupied bytes of lists in all buffer pools */
230 struct buf_pools_list_size_t {
231   ulint LRU_bytes;        /*!< LRU size in bytes */
232   ulint unzip_LRU_bytes;  /*!< unzip_LRU size in bytes */
233   ulint flush_list_bytes; /*!< flush_list size in bytes */
234 };
235 
236 #ifndef UNIV_HOTBACKUP
237 /** Creates the buffer pool.
238 @param[in]  total_size    Size of the total pool in bytes.
239 @param[in]  n_instances   Number of buffer pool instances to create.
240 @return DB_SUCCESS if success, DB_ERROR if not enough memory or error */
241 dberr_t buf_pool_init(ulint total_size, ulint n_instances);
242 
243 /** Frees the buffer pool at shutdown.  This must not be invoked before
244  freeing all mutexes. */
245 void buf_pool_free_all();
246 
247 /** Determines if a block is intended to be withdrawn.
248 @param[in]	buf_pool	buffer pool instance
249 @param[in]	block		pointer to control block
250 @retval true	if will be withdrawn */
251 bool buf_block_will_withdrawn(buf_pool_t *buf_pool, const buf_block_t *block);
252 
253 /** Determines if a frame is intended to be withdrawn.
254 @param[in]	buf_pool	buffer pool instance
255 @param[in]	ptr		pointer to a frame
256 @retval true	if will be withdrawn */
257 bool buf_frame_will_withdrawn(buf_pool_t *buf_pool, const byte *ptr);
258 
259 /** This is the thread for resizing buffer pool. It waits for an event and
260 when waked up either performs a resizing and sleeps again. */
261 void buf_resize_thread();
262 
263 /** Checks if innobase_should_madvise_buf_pool() value has changed since we've
264 last check and if so, then updates buf_pool_should_madvise and calls madvise
265 for all chunks in all srv_buf_pool_instances.
266 @see buf_pool_should_madvise comment for a longer explanation. */
267 void buf_pool_update_madvise();
268 
269 /** Clears the adaptive hash index on all pages in the buffer pool. */
270 void buf_pool_clear_hash_index(void);
271 
272 /** Gets the current size of buffer buf_pool in bytes.
273  @return size in bytes */
274 UNIV_INLINE
275 ulint buf_pool_get_curr_size(void);
276 /** Gets the current size of buffer buf_pool in frames.
277  @return size in pages */
278 UNIV_INLINE
279 ulint buf_pool_get_n_pages(void);
280 #endif /* !UNIV_HOTBACKUP */
281 
282 /** Gets the smallest oldest_modification lsn among all of the earliest
283 added pages in flush lists. In other words - takes the last dirty page
284 from each flush list, and calculates minimum oldest_modification among
285 all of them. Does not acquire global lock for the whole process, so the
286 result might come from inconsistent view on flush lists.
287 
288 @note Note that because of the relaxed order in each flush list, this
289 functions no longer returns the smallest oldest_modification among all
290 of the dirty pages. If you wanted to have a safe lsn, which is smaller
291 than every oldest_modification, you would need to use another function:
292         buf_pool_get_oldest_modification_lwm().
293 
294 Returns zero if there were no dirty pages (flush lists were empty).
295 
296 @return minimum oldest_modification of last pages from flush lists,
297         zero if flush lists were empty */
298 lsn_t buf_pool_get_oldest_modification_approx(void);
299 
300 /** Gets a safe low watermark for oldest_modification. It's guaranteed
301 that there were no dirty pages with smaller oldest_modification in the
302 whole flush lists.
303 
304 Returns zero if flush lists were empty, be careful in such case, because
305 taking the newest lsn is probably not a good idea. If you wanted to rely
306 on some lsn in such case, you would need to follow pattern:
307 
308         dpa_lsn = log_buffer_dirty_pages_added_up_to_lsn(*log_sys);
309 
310         lwm_lsn = buf_pool_get_oldest_modification_lwm();
311 
312         if (lwm_lsn == 0) lwm_lsn = dpa_lsn;
313 
314 The order is important to avoid race conditions.
315 
316 @remarks
317 It's guaranteed that the returned value will not be smaller than the
318 last checkpoint lsn. It's not guaranteed that the returned value is
319 the maximum possible. It's just the best effort for the low cost.
320 It basically takes result of buf_pool_get_oldest_modification_approx()
321 and subtracts maximum possible lag introduced by relaxed order in
322 flush lists (srv_log_recent_closed_size).
323 
324 @return	safe low watermark for oldest_modification of dirty pages,
325         or zero if flush lists were empty; if non-zero, it is then
326         guaranteed not to be at block boundary (and it points to lsn
327         inside data fragment of block) */
328 lsn_t buf_pool_get_oldest_modification_lwm(void);
329 
330 #ifndef UNIV_HOTBACKUP
331 
332 /** Allocates a buf_page_t descriptor. This function must succeed. In case
333  of failure we assert in this function. */
334 UNIV_INLINE
335 buf_page_t *buf_page_alloc_descriptor(void) MY_ATTRIBUTE((malloc));
336 /** Free a buf_page_t descriptor. */
337 UNIV_INLINE
338 void buf_page_free_descriptor(
339     buf_page_t *bpage); /*!< in: bpage descriptor to free. */
340 
341 /** Allocates a buffer block.
342  @return own: the allocated block, in state BUF_BLOCK_MEMORY */
343 buf_block_t *buf_block_alloc(
344     buf_pool_t *buf_pool); /*!< in: buffer pool instance,
345                            or NULL for round-robin selection
346                            of the buffer pool */
347 /** Frees a buffer block which does not contain a file page. */
348 UNIV_INLINE
349 void buf_block_free(buf_block_t *block); /*!< in, own: block to be freed */
350 #endif                                   /* !UNIV_HOTBACKUP */
351 
352 /** Copies contents of a buffer frame to a given buffer.
353 @param[in]	buf	buffer to copy to
354 @param[in]	frame	buffer frame
355 @return buf */
356 UNIV_INLINE
357 byte *buf_frame_copy(byte *buf, const buf_frame_t *frame);
358 
359 #ifndef UNIV_HOTBACKUP
360 /** NOTE! The following macros should be used instead of buf_page_get_gen,
361  to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
362  in LA! */
363 #define buf_page_get(ID, SIZE, LA, MTR)                                        \
364   buf_page_get_gen(ID, SIZE, LA, NULL, Page_fetch::NORMAL, __FILE__, __LINE__, \
365                    MTR)
366 /** Use these macros to bufferfix a page with no latching. Remember not to
367  read the contents of the page unless you know it is safe. Do not modify
368  the contents of the page! We have separated this case, because it is
369  error-prone programming not to set a latch, and it should be used
370  with care. */
371 #define buf_page_get_with_no_latch(ID, SIZE, MTR)                     \
372   buf_page_get_gen(ID, SIZE, RW_NO_LATCH, NULL, Page_fetch::NO_LATCH, \
373                    __FILE__, __LINE__, MTR)
374 
375 /** This is the general function used to get optimistic access to a database
376 page.
377 @param[in]      rw_latch        RW_S_LATCH, RW_X_LATCH
378 @param[in,out]  block           guessed block
379 @param[in]      modify_clock    modify clock value
380 @param[in]      fetch_mode      Fetch mode
381 @param[in]      file            file name
382 @param[in]      line            line where called
383 @param[in,out]  mtr             mini-transaction
384 @return true if success */
385 bool buf_page_optimistic_get(ulint rw_latch, buf_block_t *block,
386                              uint64_t modify_clock, Page_fetch fetch_mode,
387                              const char *file, ulint line, mtr_t *mtr);
388 
389 /** This is used to get access to a known database page, when no waiting can be
390 done.
391 @param[in] rw_latch RW_S_LATCH or RW_X_LATCH.
392 @param[in] block The known page.
393 @param[in] hint Cache_hint::MAKE_YOUNG or Cache_hint::KEEP_OLD
394 @param[in] file File name from where it was called.
395 @param[in] line Line from where it was called.
396 @param[in,out] mtr Mini-transaction covering the fetch
397 @return true if success */
398 bool buf_page_get_known_nowait(ulint rw_latch, buf_block_t *block,
399                                Cache_hint hint, const char *file, ulint line,
400                                mtr_t *mtr);
401 
402 /** Given a tablespace id and page number tries to get that page. If the
403 page is not in the buffer pool it is not loaded and NULL is returned.
404 Suitable for using when holding the lock_sys latches (as it avoids deadlock).
405 @param[in]	page_id	page id
406 @param[in]	file	file name
407 @param[in]	line	line where called
408 @param[in]	mtr	mini-transaction
409 @return pointer to a page or NULL */
410 const buf_block_t *buf_page_try_get_func(const page_id_t &page_id,
411                                          const char *file, ulint line,
412                                          mtr_t *mtr);
413 
414 /** Given a tablespace id and page number tries to get that page. If the
415 page is not in the buffer pool it is not loaded and NULL is returned.
416 Suitable for using when holding the lock_sys latches (as it avoids deadlock).
417 @param[in]	page_id	page identifier
418 @param[in]	mtr	mini-transaction
419 @return the page if in buffer pool, NULL if not */
420 #define buf_page_try_get(page_id, mtr) \
421   buf_page_try_get_func((page_id), __FILE__, __LINE__, mtr);
422 
423 /** Get read access to a compressed page (usually of type
424 FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
425 The page must be released with buf_page_release_zip().
426 NOTE: the page is not protected by any latch.  Mutual exclusion has to
427 be implemented at a higher level.  In other words, all possible
428 accesses to a given page through this function must be protected by
429 the same set of mutexes or latches.
430 @param[in]	page_id		page id
431 @param[in]	page_size	page size
432 @return pointer to the block */
433 buf_page_t *buf_page_get_zip(const page_id_t &page_id,
434                              const page_size_t &page_size);
435 
436 /** This is the general function used to get access to a database page.
437 @param[in]	page_id			page id
438 @param[in]	page_size		page size
439 @param[in]	rw_latch		RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
440 @param[in]	guess			  guessed block or NULL
441 @param[in]	mode			  Fetch mode.
442 @param[in]	file			  file name
443 @param[in]	line			  line where called
444 @param[in]	mtr			    mini-transaction
445 @param[in]	dirty_with_no_latch	mark page as dirty even if page is being
446                         pinned without any latch
447 @return pointer to the block or NULL */
448 buf_block_t *buf_page_get_gen(const page_id_t &page_id,
449                               const page_size_t &page_size, ulint rw_latch,
450                               buf_block_t *guess, Page_fetch mode,
451                               const char *file, ulint line, mtr_t *mtr,
452                               bool dirty_with_no_latch = false);
453 
454 /** Initializes a page to the buffer buf_pool. The page is usually not read
455 from a file even if it cannot be found in the buffer buf_pool. This is one
456 of the functions which perform to a block a state transition NOT_USED =>
457 FILE_PAGE (the other is buf_page_get_gen). The page is latched by passed mtr.
458 @param[in]	page_id		page id
459 @param[in]	page_size	page size
460 @param[in]	rw_latch	RW_SX_LATCH, RW_X_LATCH
461 @param[in]	mtr		mini-transaction
462 @return pointer to the block, page bufferfixed */
463 buf_block_t *buf_page_create(const page_id_t &page_id,
464                              const page_size_t &page_size,
465                              rw_lock_type_t rw_latch, mtr_t *mtr);
466 
467 #else  /* !UNIV_HOTBACKUP */
468 
469 /** Inits a page to the buffer buf_pool, for use in mysqlbackup --restore.
470 @param[in]	page_id		page id
471 @param[in]	page_size	page size
472 @param[in,out]	block		block to init */
473 void meb_page_init(const page_id_t &page_id, const page_size_t &page_size,
474                    buf_block_t *block);
475 #endif /* !UNIV_HOTBACKUP */
476 
477 #ifndef UNIV_HOTBACKUP
478 /** Releases a compressed-only page acquired with buf_page_get_zip(). */
479 UNIV_INLINE
480 void buf_page_release_zip(buf_page_t *bpage); /*!< in: buffer block */
481 
482 /** Releases a latch, if specified.
483 @param[in]	block		buffer block
484 @param[in]	rw_latch	RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
485 UNIV_INLINE
486 void buf_page_release_latch(buf_block_t *block, ulint rw_latch);
487 
488 /** Moves a page to the start of the buffer pool LRU list. This high-level
489 function can be used to prevent an important page from slipping out of
490 the buffer pool.
491 @param[in,out]	bpage	buffer block of a file page */
492 void buf_page_make_young(buf_page_t *bpage);
493 
494 /** Returns TRUE if the page can be found in the buffer pool hash table.
495 NOTE that it is possible that the page is not yet read from disk,
496 though.
497 @param[in]	page_id	page id
498 @return true if found in the page hash table */
499 UNIV_INLINE
500 ibool buf_page_peek(const page_id_t &page_id);
501 
502 #ifdef UNIV_DEBUG
503 
504 /** Sets file_page_was_freed TRUE if the page is found in the buffer pool.
505 This function should be called when we free a file page and want the
506 debug version to check that it is not accessed any more unless
507 reallocated.
508 @param[in]	page_id	page id
509 @return control block if found in page hash table, otherwise NULL */
510 buf_page_t *buf_page_set_file_page_was_freed(const page_id_t &page_id);
511 
512 /** Sets file_page_was_freed FALSE if the page is found in the buffer pool.
513 This function should be called when we free a file page and want the
514 debug version to check that it is not accessed any more unless
515 reallocated.
516 @param[in]	page_id	page id
517 @return control block if found in page hash table, otherwise NULL */
518 buf_page_t *buf_page_reset_file_page_was_freed(const page_id_t &page_id);
519 
520 #endif /* UNIV_DEBUG */
521 /** Reads the freed_page_clock of a buffer block.
522  @return freed_page_clock */
523 UNIV_INLINE
524 ulint buf_page_get_freed_page_clock(const buf_page_t *bpage) /*!< in: block */
525     MY_ATTRIBUTE((warn_unused_result));
526 /** Reads the freed_page_clock of a buffer block.
527  @return freed_page_clock */
528 UNIV_INLINE
529 ulint buf_block_get_freed_page_clock(const buf_block_t *block) /*!< in: block */
530     MY_ATTRIBUTE((warn_unused_result));
531 
532 /** Tells, for heuristics, if a block is still close enough to the MRU end of
533 the LRU list meaning that it is not in danger of getting evicted and also
534 implying that it has been accessed recently.
535 The page must be either buffer-fixed, either its page hash must be locked.
536 @param[in]	bpage	block
537 @return true if block is close to MRU end of LRU */
538 UNIV_INLINE
539 ibool buf_page_peek_if_young(const buf_page_t *bpage);
540 
541 /** Recommends a move of a block to the start of the LRU list if there is
542 danger of dropping from the buffer pool.
543 NOTE: does not reserve the LRU list mutex.
544 @param[in]	bpage	block to make younger
545 @return true if should be made younger */
546 UNIV_INLINE
547 ibool buf_page_peek_if_too_old(const buf_page_t *bpage);
548 
549 /** Gets the youngest modification log sequence number for a frame.
550  Returns zero if not file page or no modification occurred yet.
551  @return newest modification to page */
552 UNIV_INLINE
553 lsn_t buf_page_get_newest_modification(
554     const buf_page_t *bpage); /*!< in: block containing the
555                               page frame */
556 
557 /** Increment the modify clock.
558 The caller must
559 (1) own the buf_pool->mutex and block bufferfix count has to be zero,
560 (2) own X or SX latch on the block->lock, or
561 (3) operate on a thread-private temporary table
562 @param[in,out]	block	buffer block */
563 UNIV_INLINE
564 void buf_block_modify_clock_inc(buf_block_t *block);
565 
566 /** Read the modify clock.
567 @param[in]	block	buffer block
568 @return modify_clock value */
569 UNIV_INLINE
570 uint64_t buf_block_get_modify_clock(const buf_block_t *block);
571 
572 /** Increments the bufferfix count. */
573 #ifdef UNIV_DEBUG
574 /**
575 @param[in]	file	file name
576 @param[in]	line	line */
577 #endif /* UNIV_DEBUG */
578 /**
579 @param[in,out]	block	block to bufferfix */
580 UNIV_INLINE
581 void buf_block_buf_fix_inc_func(
582 #ifdef UNIV_DEBUG
583     const char *file, ulint line,
584 #endif /* UNIV_DEBUG */
585     buf_block_t *block);
586 
587 /** Increments the bufferfix count.
588 @param[in,out]	bpage	block to bufferfix
589 @return the count */
590 UNIV_INLINE
591 ulint buf_block_fix(buf_page_t *bpage);
592 
593 /** Increments the bufferfix count.
594 @param[in,out]	block	block to bufferfix
595 @return the count */
596 UNIV_INLINE
597 ulint buf_block_fix(buf_block_t *block);
598 
599 /** Decrements the bufferfix count.
600 @param[in,out]	bpage	block to bufferunfix
601 @return	the remaining buffer-fix count */
602 UNIV_INLINE
603 ulint buf_block_unfix(buf_page_t *bpage);
604 #endif /* !UNIV_HOTBACKUP */
605 /** Decrements the bufferfix count.
606 @param[in,out]	block	block to bufferunfix
607 @return	the remaining buffer-fix count */
608 UNIV_INLINE
609 ulint buf_block_unfix(buf_block_t *block);
610 
611 #ifndef UNIV_HOTBACKUP
612 /** Unfixes the page, unlatches the page,
613 removes it from page_hash and removes it from LRU.
614 @param[in,out]	bpage	pointer to the block */
615 void buf_read_page_handle_error(buf_page_t *bpage);
616 
617 #ifdef UNIV_DEBUG
618 /** Increments the bufferfix count.
619 @param[in,out]	b	block to bufferfix
620 @param[in]	f	file name where requested
621 @param[in]	l	line number where requested */
622 #define buf_block_buf_fix_inc(b, f, l) buf_block_buf_fix_inc_func(f, l, b)
623 #else /* UNIV_DEBUG */
624 /** Increments the bufferfix count.
625 @param[in,out]	b	block to bufferfix
626 @param[in]	f	file name where requested
627 @param[in]	l	line number where requested */
628 #define buf_block_buf_fix_inc(b, f, l) buf_block_buf_fix_inc_func(b)
629 #endif /* UNIV_DEBUG */
630 #else  /* !UNIV_HOTBACKUP */
631 #define buf_block_modify_clock_inc(block) ((void)0)
632 #endif /* !UNIV_HOTBACKUP */
633 
634 #ifndef UNIV_HOTBACKUP
635 
636 /** Gets the space id, page offset, and byte offset within page of a pointer
637 pointing to a buffer frame containing a file page.
638 @param[in]	ptr	pointer to a buffer frame
639 @param[out]	space	space id
640 @param[out]	addr	page offset and byte offset */
641 UNIV_INLINE
642 void buf_ptr_get_fsp_addr(const void *ptr, space_id_t *space, fil_addr_t *addr);
643 
644 /** Gets the hash value of a block. This can be used in searches in the
645  lock hash table.
646  @return lock hash value */
647 UNIV_INLINE
648 ulint buf_block_get_lock_hash_val(const buf_block_t *block) /*!< in: block */
649     MY_ATTRIBUTE((warn_unused_result));
650 #ifdef UNIV_DEBUG
651 /** Finds a block in the buffer pool that points to a
652 given compressed page. Used only to confirm that buffer pool does not contain a
653 given pointer, thus protected by zip_free_mutex.
654 @param[in]	buf_pool	buffer pool instance
655 @param[in]	data		pointer to compressed page
656 @return buffer block pointing to the compressed page, or NULL */
657 buf_block_t *buf_pool_contains_zip(buf_pool_t *buf_pool, const void *data);
658 #endif /* UNIV_DEBUG */
659 
660 /***********************************************************************
661 FIXME_FTS: Gets the frame the pointer is pointing to. */
662 UNIV_INLINE
663 buf_frame_t *buf_frame_align(
664     /* out: pointer to frame */
665     byte *ptr); /* in: pointer to a frame */
666 
667 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
668 /** Validates the buffer pool data structure.
669  @return true */
670 ibool buf_validate(void);
671 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
672 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
673 /** Prints info of the buffer pool data structure. */
674 void buf_print(void);
675 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
676 #endif /* !UNIV_HOTBACKUP */
677 enum buf_page_print_flags {
678   /** Do not crash at the end of buf_page_print(). */
679   BUF_PAGE_PRINT_NO_CRASH = 1,
680   /** Do not print the full page dump. */
681   BUF_PAGE_PRINT_NO_FULL = 2
682 };
683 
684 /** Prints a page to stderr.
685 @param[in]	read_buf	a database page
686 @param[in]	page_size	page size
687 @param[in]	flags		0 or BUF_PAGE_PRINT_NO_CRASH or
688 BUF_PAGE_PRINT_NO_FULL */
689 void buf_page_print(const byte *read_buf, const page_size_t &page_size,
690                     ulint flags);
691 
692 /** Decompress a block.
693  @return true if successful */
694 ibool buf_zip_decompress(buf_block_t *block, /*!< in/out: block */
695                          ibool check); /*!< in: TRUE=verify the page checksum */
696 #ifndef UNIV_HOTBACKUP
697 #ifdef UNIV_DEBUG
698 /** Returns the number of latched pages in the buffer pool.
699  @return number of latched pages */
700 ulint buf_get_latched_pages_number(void);
701 #endif /* UNIV_DEBUG */
702 /** Returns the number of pending buf pool read ios.
703  @return number of pending read I/O operations */
704 ulint buf_get_n_pending_read_ios(void);
705 /** Prints info of the buffer i/o. */
706 void buf_print_io(FILE *file); /*!< in: file where to print */
707 /** Collect buffer pool stats information for a buffer pool. Also
708  record aggregated stats if there are more than one buffer pool
709  in the server */
710 void buf_stats_get_pool_info(
711     buf_pool_t *buf_pool,            /*!< in: buffer pool */
712     ulint pool_id,                   /*!< in: buffer pool ID */
713     buf_pool_info_t *all_pool_info); /*!< in/out: buffer pool info
714                                      to fill */
715 /** Return the ratio in percents of modified pages in the buffer pool /
716 database pages in the buffer pool.
717 @return modified page percentage ratio */
718 double buf_get_modified_ratio_pct(void);
719 /** Refresh the statistics used to print per-second averages. */
720 void buf_refresh_io_stats_all();
721 
722 /** Assert that all file pages in the buffer are in a replaceable state. */
723 void buf_must_be_all_freed(void);
724 
725 /** Checks that there currently are no pending i/o-operations for the buffer
726 pool.
727 @return number of pending i/o */
728 ulint buf_pool_check_no_pending_io(void);
729 
730 /** Invalidates the file pages in the buffer pool when an archive recovery is
731  completed. All the file pages buffered must be in a replaceable state when
732  this function is called: not latched and not modified. */
733 void buf_pool_invalidate(void);
734 #endif /* !UNIV_HOTBACKUP */
735 
736 /*========================================================================
737 --------------------------- LOWER LEVEL ROUTINES -------------------------
738 =========================================================================*/
739 
740 #ifdef UNIV_DEBUG
741 /** Adds latch level info for the rw-lock protecting the buffer frame. This
742 should be called in the debug version after a successful latching of a page if
743 we know the latching order level of the acquired latch.
744 @param[in]	block	buffer page where we have acquired latch
745 @param[in]	level	latching order level */
746 UNIV_INLINE
747 void buf_block_dbg_add_level(buf_block_t *block, latch_level_t level);
748 #else                                         /* UNIV_DEBUG */
749 #define buf_block_dbg_add_level(block, level) /* nothing */
750 #endif                                        /* UNIV_DEBUG */
751 
752 /** Gets the state of a block.
753  @return state */
754 UNIV_INLINE
755 enum buf_page_state buf_page_get_state(
756     const buf_page_t *bpage); /*!< in: pointer to the control block */
757 /** Gets the state of a block.
758  @return state */
759 UNIV_INLINE
760 enum buf_page_state buf_block_get_state(
761     const buf_block_t *block) /*!< in: pointer to the control block */
762     MY_ATTRIBUTE((warn_unused_result));
763 
764 /** Sets the state of a block.
765 @param[in,out]	bpage	pointer to control block
766 @param[in]	state	state */
767 UNIV_INLINE
768 void buf_page_set_state(buf_page_t *bpage, enum buf_page_state state);
769 
770 /** Sets the state of a block.
771 @param[in,out]	block	pointer to control block
772 @param[in]	state	state */
773 UNIV_INLINE
774 void buf_block_set_state(buf_block_t *block, enum buf_page_state state);
775 
776 /** Determines if a block is mapped to a tablespace.
777  @return true if mapped */
778 UNIV_INLINE
779 ibool buf_page_in_file(
780     const buf_page_t *bpage) /*!< in: pointer to control block */
781     MY_ATTRIBUTE((warn_unused_result));
782 #ifndef UNIV_HOTBACKUP
783 /** Determines if a block should be on unzip_LRU list.
784  @return true if block belongs to unzip_LRU */
785 UNIV_INLINE
786 bool buf_page_belongs_to_unzip_LRU(
787     const buf_page_t *bpage) /*!< in: pointer to control block */
788     MY_ATTRIBUTE((warn_unused_result));
789 
790 /** Gets the mutex of a block.
791  @return pointer to mutex protecting bpage */
792 UNIV_INLINE
793 BPageMutex *buf_page_get_mutex(
794     const buf_page_t *bpage) /*!< in: pointer to control block */
795     MY_ATTRIBUTE((warn_unused_result));
796 
797 /** Get the flush type of a page.
798  @return flush type */
799 UNIV_INLINE
800 buf_flush_t buf_page_get_flush_type(
801     const buf_page_t *bpage) /*!< in: buffer page */
802     MY_ATTRIBUTE((warn_unused_result));
803 
804 /** Set the flush type of a page.
805 @param[in]	bpage		buffer page
806 @param[in]	flush_type	flush type */
807 UNIV_INLINE
808 void buf_page_set_flush_type(buf_page_t *bpage, buf_flush_t flush_type);
809 
810 /** Map a block to a file page.
811 @param[in,out]	block	pointer to control block
812 @param[in]	page_id	page id */
813 UNIV_INLINE
814 void buf_block_set_file_page(buf_block_t *block, const page_id_t &page_id);
815 
816 /** Gets the io_fix state of a block.
817  @return io_fix state */
818 UNIV_INLINE
819 enum buf_io_fix buf_page_get_io_fix(
820     const buf_page_t *bpage) /*!< in: pointer to the control block */
821     MY_ATTRIBUTE((warn_unused_result));
822 /** Gets the io_fix state of a block.
823  @return io_fix state */
824 UNIV_INLINE
825 enum buf_io_fix buf_block_get_io_fix(
826     const buf_block_t *block) /*!< in: pointer to the control block */
827     MY_ATTRIBUTE((warn_unused_result));
828 
829 /** Sets the io_fix state of a block.
830 @param[in,out]	bpage	control block
831 @param[in]	io_fix	io_fix state */
832 UNIV_INLINE
833 void buf_page_set_io_fix(buf_page_t *bpage, enum buf_io_fix io_fix);
834 
835 /** Sets the io_fix state of a block.
836 @param[in,out]	block	control block
837 @param[in]	io_fix	io_fix state */
838 UNIV_INLINE
839 void buf_block_set_io_fix(buf_block_t *block, enum buf_io_fix io_fix);
840 
841 /** Makes a block sticky. A sticky block implies that even after we release
842 the buf_pool->LRU_list_mutex and the block->mutex:
843 * it cannot be removed from the flush_list
844 * the block descriptor cannot be relocated
845 * it cannot be removed from the LRU list
846 Note that:
847 * the block can still change its position in the LRU list
848 * the next and previous pointers can change.
849 @param[in,out]	bpage	control block */
850 UNIV_INLINE
851 void buf_page_set_sticky(buf_page_t *bpage);
852 
853 /** Removes stickiness of a block. */
854 UNIV_INLINE
855 void buf_page_unset_sticky(buf_page_t *bpage); /*!< in/out: control block */
856 /** Determine if a buffer block can be relocated in memory.  The block
857  can be dirty, but it must not be I/O-fixed or bufferfixed. */
858 UNIV_INLINE
859 ibool buf_page_can_relocate(
860     const buf_page_t *bpage) /*!< control block being relocated */
861     MY_ATTRIBUTE((warn_unused_result));
862 
863 /** Determine if a block has been flagged old.
864 @param[in]	bpage	control block
865 @return true if old */
866 UNIV_INLINE
867 ibool buf_page_is_old(const buf_page_t *bpage)
868     MY_ATTRIBUTE((warn_unused_result));
869 
870 /** Flag a block old.
871 @param[in,out]	bpage	control block
872 @param[in]	old	old */
873 UNIV_INLINE
874 void buf_page_set_old(buf_page_t *bpage, ibool old);
875 
876 /** Determine the time of first access of a block in the buffer pool.
877  @return ut_time_monotonic_ms() at the time of first access, 0 if not accessed
878  */
879 UNIV_INLINE
880 unsigned buf_page_is_accessed(const buf_page_t *bpage) /*!< in: control block */
881     MY_ATTRIBUTE((warn_unused_result));
882 /** Flag a block accessed. */
883 UNIV_INLINE
884 void buf_page_set_accessed(buf_page_t *bpage); /*!< in/out: control block */
885 
886 /** Gets the buf_block_t handle of a buffered file block if an uncompressed
887 page frame exists, or NULL. page frame exists, or NULL. The caller must hold
888 either the appropriate hash lock in any mode, either the LRU list mutex. Note:
889 even though bpage is not declared a const we don't update its value. It is safe
890 to make this pure.
891 @param[in]	bpage	control block, or NULL
892 @return control block, or NULL */
893 UNIV_INLINE
894 buf_block_t *buf_page_get_block(buf_page_t *bpage)
895     MY_ATTRIBUTE((warn_unused_result));
896 #ifdef UNIV_DEBUG
897 /** Gets a pointer to the memory frame of a block.
898  @return pointer to the frame */
899 UNIV_INLINE
900 buf_frame_t *buf_block_get_frame(
901     const buf_block_t *block) /*!< in: pointer to the control block */
902     MY_ATTRIBUTE((warn_unused_result));
903 #else /* UNIV_DEBUG */
904 #define buf_block_get_frame(block) (block)->frame
905 #endif /* UNIV_DEBUG */
906 #else  /* !UNIV_HOTBACKUP */
907 #define buf_block_get_frame(block) (block)->frame
908 #endif /* !UNIV_HOTBACKUP */
909 /** Gets the compressed page descriptor corresponding to an uncompressed page
910  if applicable. */
911 #define buf_block_get_page_zip(block) \
912   ((block)->page.zip.data ? &(block)->page.zip : NULL)
913 
914 /** Get a buffer block from an adaptive hash index pointer.
915 This function does not return if the block is not identified.
916 @param[in]	ptr	pointer to within a page frame
917 @return pointer to block, never NULL */
918 buf_block_t *buf_block_from_ahi(const byte *ptr);
919 
920 #ifndef UNIV_HOTBACKUP
921 /** Find out if a pointer belongs to a buf_block_t. It can be a pointer to
922  the buf_block_t itself or a member of it
923  @return true if ptr belongs to a buf_block_t struct */
924 ibool buf_pointer_is_block_field(const void *ptr); /*!< in: pointer not
925                                                    dereferenced */
926 /** Find out if a pointer corresponds to a buf_block_t::mutex.
927 @param m in: mutex candidate
928 @return true if m is a buf_block_t::mutex */
929 #define buf_pool_is_block_mutex(m) buf_pointer_is_block_field((const void *)(m))
930 /** Find out if a pointer corresponds to a buf_block_t::lock.
931 @param l in: rw-lock candidate
932 @return true if l is a buf_block_t::lock */
933 #define buf_pool_is_block_lock(l) buf_pointer_is_block_field((const void *)(l))
934 
935 /** Inits a page for read to the buffer buf_pool. If the page is
936 (1) already in buf_pool, or
937 (2) if we specify to read only ibuf pages and the page is not an ibuf page, or
938 (3) if the space is deleted or being deleted,
939 then this function does nothing.
940 Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
941 on the buffer frame. The io-handler must take care that the flag is cleared
942 and the lock released later.
943 @param[out]	err			DB_SUCCESS or DB_TABLESPACE_DELETED
944 @param[in]	mode			BUF_READ_IBUF_PAGES_ONLY, ...
945 @param[in]	page_id			page id
946 @param[in]	page_size		page size
947 @param[in]	unzip			TRUE=request uncompressed page
948 @return pointer to the block or NULL */
949 buf_page_t *buf_page_init_for_read(dberr_t *err, ulint mode,
950                                    const page_id_t &page_id,
951                                    const page_size_t &page_size, ibool unzip);
952 
953 /** Completes an asynchronous read or write request of a file page to or from
954 the buffer pool.
955 @param[in]	bpage	pointer to the block in question
956 @param[in]	evict	whether or not to evict the page from LRU list
957 @return true if successful */
958 bool buf_page_io_complete(buf_page_t *bpage, bool evict);
959 
960 /** Calculates the index of a buffer pool to the buf_pool[] array.
961  @return the position of the buffer pool in buf_pool[] */
962 UNIV_INLINE
963 ulint buf_pool_index(const buf_pool_t *buf_pool) /*!< in: buffer pool */
964     MY_ATTRIBUTE((warn_unused_result));
965 /** Returns the buffer pool instance given a page instance
966  @return buf_pool */
967 UNIV_INLINE
968 buf_pool_t *buf_pool_from_bpage(
969     const buf_page_t *bpage); /*!< in: buffer pool page */
970 /** Returns the buffer pool instance given a block instance
971  @return buf_pool */
972 UNIV_INLINE
973 buf_pool_t *buf_pool_from_block(const buf_block_t *block); /*!< in: block */
974 
975 /** Returns the buffer pool instance given a page id.
976 @param[in]	page_id	page id
977 @return buffer pool */
978 UNIV_INLINE
979 buf_pool_t *buf_pool_get(const page_id_t &page_id);
980 
981 /** Returns the buffer pool instance given its array index
982  @return buffer pool */
983 UNIV_INLINE
984 buf_pool_t *buf_pool_from_array(ulint index); /*!< in: array index to get
985                                               buffer pool instance from */
986 
987 /** Returns the control block of a file page, NULL if not found.
988 @param[in]	buf_pool	buffer pool instance
989 @param[in]	page_id		page id
990 @return block, NULL if not found */
991 UNIV_INLINE
992 buf_page_t *buf_page_hash_get_low(buf_pool_t *buf_pool,
993                                   const page_id_t &page_id);
994 
995 /** Returns the control block of a file page, NULL if not found.
996 If the block is found and lock is not NULL then the appropriate
997 page_hash lock is acquired in the specified lock mode. Otherwise,
998 mode value is ignored. It is up to the caller to release the
999 lock. If the block is found and the lock is NULL then the page_hash
1000 lock is released by this function.
1001 @param[in]	buf_pool	buffer pool instance
1002 @param[in]	page_id		page id
1003 @param[in,out]	lock		lock of the page hash acquired if bpage is
1004 found, NULL otherwise. If NULL is passed then the hash_lock is released by
1005 this function.
1006 @param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
1007 lock == NULL
1008 @param[in]	watch		if true, return watch sentinel also.
1009 @return pointer to the bpage or NULL; if NULL, lock is also NULL or
1010 a watch sentinel. */
1011 UNIV_INLINE
1012 buf_page_t *buf_page_hash_get_locked(buf_pool_t *buf_pool,
1013                                      const page_id_t &page_id, rw_lock_t **lock,
1014                                      ulint lock_mode, bool watch = false);
1015 
1016 /** Returns the control block of a file page, NULL if not found.
1017 If the block is found and lock is not NULL then the appropriate
1018 page_hash lock is acquired in the specified lock mode. Otherwise,
1019 mode value is ignored. It is up to the caller to release the
1020 lock. If the block is found and the lock is NULL then the page_hash
1021 lock is released by this function.
1022 @param[in]	buf_pool	buffer pool instance
1023 @param[in]	page_id		page id
1024 @param[in,out]	lock		lock of the page hash acquired if bpage is
1025 found, NULL otherwise. If NULL is passed then the hash_lock is released by
1026 this function.
1027 @param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
1028 lock == NULL
1029 @return pointer to the block or NULL; if NULL, lock is also NULL. */
1030 UNIV_INLINE
1031 buf_block_t *buf_block_hash_get_locked(buf_pool_t *buf_pool,
1032                                        const page_id_t &page_id,
1033                                        rw_lock_t **lock, ulint lock_mode);
1034 
1035 /* There are four different ways we can try to get a bpage or block
1036 from the page hash:
1037 1) Caller already holds the appropriate page hash lock: in the case call
1038 buf_page_hash_get_low() function.
1039 2) Caller wants to hold page hash lock in x-mode
1040 3) Caller wants to hold page hash lock in s-mode
1041 4) Caller doesn't want to hold page hash lock */
1042 #define buf_page_hash_get_s_locked(b, page_id, l) \
1043   buf_page_hash_get_locked(b, page_id, l, RW_LOCK_S)
1044 #define buf_page_hash_get_x_locked(b, page_id, l) \
1045   buf_page_hash_get_locked(b, page_id, l, RW_LOCK_X)
1046 #define buf_page_hash_get(b, page_id) \
1047   buf_page_hash_get_locked(b, page_id, NULL, 0)
1048 #define buf_page_get_also_watch(b, page_id) \
1049   buf_page_hash_get_locked(b, page_id, NULL, 0, true)
1050 
1051 #define buf_block_hash_get_s_locked(b, page_id, l) \
1052   buf_block_hash_get_locked(b, page_id, l, RW_LOCK_S)
1053 #define buf_block_hash_get_x_locked(b, page_id, l) \
1054   buf_block_hash_get_locked(b, page_id, l, RW_LOCK_X)
1055 #define buf_block_hash_get(b, page_id) \
1056   buf_block_hash_get_locked(b, page_id, NULL, 0)
1057 
1058 /** Gets the current length of the free list of buffer blocks.
1059  @return length of the free list */
1060 ulint buf_get_free_list_len(void);
1061 
1062 /** Determine if a block is a sentinel for a buffer pool watch.
1063  @return true if a sentinel for a buffer pool watch, false if not */
1064 ibool buf_pool_watch_is_sentinel(
1065     const buf_pool_t *buf_pool, /*!< buffer pool instance */
1066     const buf_page_t *bpage)    /*!< in: block */
1067     MY_ATTRIBUTE((warn_unused_result));
1068 
1069 /** Stop watching if the page has been read in.
1070 buf_pool_watch_set(space,offset) must have returned NULL before.
1071 @param[in]	page_id	page id */
1072 void buf_pool_watch_unset(const page_id_t &page_id);
1073 
1074 /** Check if the page has been read in.
1075 This may only be called after buf_pool_watch_set(space,offset)
1076 has returned NULL and before invoking buf_pool_watch_unset(space,offset).
1077 @param[in]	page_id	page id
1078 @return false if the given page was not read in, true if it was */
1079 ibool buf_pool_watch_occurred(const page_id_t &page_id)
1080     MY_ATTRIBUTE((warn_unused_result));
1081 
1082 /** Get total buffer pool statistics. */
1083 void buf_get_total_list_len(
1084     ulint *LRU_len,         /*!< out: length of all LRU lists */
1085     ulint *free_len,        /*!< out: length of all free lists */
1086     ulint *flush_list_len); /*!< out: length of all flush lists */
1087 /** Get total list size in bytes from all buffer pools. */
1088 void buf_get_total_list_size_in_bytes(
1089     buf_pools_list_size_t *buf_pools_list_size); /*!< out: list sizes
1090                                                  in all buffer pools */
1091 /** Get total buffer pool statistics. */
1092 void buf_get_total_stat(
1093     buf_pool_stat_t *tot_stat); /*!< out: buffer pool stats */
1094 
1095 /** Get the nth chunk's buffer block in the specified buffer pool.
1096 @param[in]	buf_pool	buffer pool instance
1097 @param[in]	n		nth chunk in the buffer pool
1098 @param[in]	chunk_size	chunk_size
1099 @return the nth chunk's buffer block. */
1100 UNIV_INLINE
1101 buf_block_t *buf_get_nth_chunk_block(const buf_pool_t *buf_pool, ulint n,
1102                                      ulint *chunk_size);
1103 
1104 /** Verify the possibility that a stored page is not in buffer pool.
1105 @param[in]	withdraw_clock	withdraw clock when stored the page
1106 @retval true	if the page might be relocated */
1107 UNIV_INLINE
1108 bool buf_pool_is_obsolete(ulint withdraw_clock);
1109 
1110 /** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
1111 if needed.
1112 @param[in]	size	size in bytes
1113 @return	aligned size */
1114 UNIV_INLINE
1115 ulint buf_pool_size_align(ulint size);
1116 
1117 /** Adjust the proposed chunk unit size so that it satisfies all invariants
1118 @param[in]      size    proposed size of buffer pool chunk unit in bytes
1119 @return adjusted size which meets invariants */
1120 ulonglong buf_pool_adjust_chunk_unit(ulonglong size);
1121 
1122 /** Calculate the checksum of a page from compressed table and update the
1123 page.
1124 @param[in,out]  page              page to update
1125 @param[in]      size              compressed page size
1126 @param[in]      lsn               LSN to stamp on the page
1127 @param[in]      skip_lsn_check    true to skip check for lsn (in DEBUG) */
1128 void buf_flush_update_zip_checksum(buf_frame_t *page, ulint size, lsn_t lsn,
1129                                    bool skip_lsn_check);
1130 
1131 #endif /* !UNIV_HOTBACKUP */
1132 
1133 /** Return how many more pages must be added to the withdraw list to reach the
1134 withdraw target of the currently ongoing buffer pool resize.
1135 @param[in]	buf_pool	buffer pool instance
1136 @return page count to be withdrawn or zero if the target is already achieved or
1137 if the buffer pool is not currently being resized. */
1138 UNIV_INLINE
1139 ulint buf_get_withdraw_depth(buf_pool_t *buf_pool);
1140 
1141 /** Gets the io_fix state of a buffer block. Does not assert that the
1142 buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
1143 not to hold it.
1144 @param[in]	block	pointer to the buffer block
1145 @return page io_fix state */
1146 UNIV_INLINE
1147 buf_io_fix buf_block_get_io_fix_unlocked(const buf_block_t *block)
1148     MY_ATTRIBUTE((warn_unused_result));
1149 
1150 /** Gets the io_fix state of a buffer page. Does not assert that the
1151 buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
1152 not to hold it.
1153 @param[in]	bpage	pointer to the buffer page
1154 @return page io_fix state */
1155 UNIV_INLINE
1156 enum buf_io_fix buf_page_get_io_fix_unlocked(const buf_page_t *bpage)
1157     MY_ATTRIBUTE((warn_unused_result));
1158 
1159 /** The common buffer control block structure
1160 for compressed and uncompressed frames */
1161 
1162 /** Number of bits used for buffer page states. */
1163 #define BUF_PAGE_STATE_BITS 3
1164 
1165 class buf_page_t {
1166  public:
1167   /** Set the doublewrite buffer ID.
1168   @param[in] batch_id           Double write batch ID which flushed the page. */
set_dblwr_batch_id(uint16_t batch_id)1169   void set_dblwr_batch_id(uint16_t batch_id) { m_dblwr_id = batch_id; }
1170 
1171   /** @return the double write batch id, or uint16_t max if undefined. */
get_dblwr_batch_id()1172   uint16_t get_dblwr_batch_id() const MY_ATTRIBUTE((warn_unused_result)) {
1173     return (m_dblwr_id);
1174   }
1175 
1176   /** @name General fields
1177   None of these bit-fields must be modified without holding
1178   buf_page_get_mutex() [buf_block_t::mutex or
1179   buf_pool->zip_mutex], since they can be stored in the same
1180   machine word.  */
1181   /* @{ */
1182 
1183   /** Page id. */
1184   page_id_t id;
1185 
1186   /** Page size. */
1187   page_size_t size;
1188 
1189   /** Count of how manyfold this block is currently bufferfixed. */
1190   uint32_t buf_fix_count;
1191 
1192   /** type of pending I/O operation. */
1193   buf_io_fix io_fix;
1194 
1195   /** Block state. @see buf_page_in_file */
1196   buf_page_state state;
1197 
1198   /** if this block is currently being flushed to disk, this tells
1199   the flush_type.  @see buf_flush_t */
1200   unsigned flush_type : 2;
1201 
1202   /** index number of the buffer pool that this block belongs to */
1203   unsigned buf_pool_index : 6;
1204 
1205   static_assert(MAX_BUFFER_POOLS <= 64,
1206                 "MAX_BUFFER_POOLS > 64; redefine buf_pool_index");
1207 
1208   /* @} */
1209   /** compressed page; zip.data (but not the data it points to) is
1210   protected by buf_pool->zip_mutex; state == BUF_BLOCK_ZIP_PAGE and
1211   zip.data == NULL means an active buf_pool->watch */
1212   page_zip_des_t zip;
1213 
1214 #ifndef UNIV_HOTBACKUP
1215   /** node used in chaining to buf_pool->page_hash or buf_pool->zip_hash */
1216   buf_page_t *hash;
1217 #endif /* !UNIV_HOTBACKUP */
1218 #ifdef UNIV_DEBUG
1219   /** TRUE if in buf_pool->page_hash */
1220   bool in_page_hash;
1221 
1222   /** TRUE if in buf_pool->zip_hash */
1223   bool in_zip_hash;
1224 #endif /* UNIV_DEBUG */
1225 
1226   /** @name Page flushing fields
1227   All these are protected by buf_pool->mutex. */
1228   /* @{ */
1229 
1230   UT_LIST_NODE_T(buf_page_t) list;
1231   /** Based on state, this is a
1232   list node, protected by the
1233   corresponding list mutex, in one of the
1234   following lists in buf_pool:
1235 
1236   - BUF_BLOCK_NOT_USED:	free, withdraw
1237   - BUF_BLOCK_FILE_PAGE:	flush_list
1238   - BUF_BLOCK_ZIP_DIRTY:	flush_list
1239   - BUF_BLOCK_ZIP_PAGE:	zip_clean
1240 
1241   The node pointers are protected by the
1242   corresponding list mutex.
1243 
1244   The contents of the list node
1245   is undefined if !in_flush_list
1246   && state == BUF_BLOCK_FILE_PAGE,
1247   or if state is one of
1248   BUF_BLOCK_MEMORY,
1249   BUF_BLOCK_REMOVE_HASH or
1250   BUF_BLOCK_READY_IN_USE. */
1251 
1252 #ifdef UNIV_DEBUG
1253   /** TRUE if in buf_pool->flush_list; when buf_pool->flush_list_mutex is free,
1254   the following should hold:
1255   in_flush_list == (state == BUF_BLOCK_FILE_PAGE ||
1256                     state == BUF_BLOCK_ZIP_DIRTY)
1257   Writes to this field must be covered by both block->mutex and
1258   buf_pool->flush_list_mutex. Hence reads can happen while holding any one
1259   of the two mutexes */
1260   bool in_flush_list;
1261 
1262   bool in_free_list;
1263   /** TRUE if in buf_pool->free; when buf_pool->free_list_mutex is free, the
1264   following should hold: in_free_list == (state == BUF_BLOCK_NOT_USED) */
1265 #endif /* UNIV_DEBUG */
1266 
1267   lsn_t newest_modification;
1268 
1269   /** log sequence number of the youngest modification to this block, zero
1270   if not modified. Protected by block mutex */
1271   lsn_t oldest_modification;
1272 
1273   /** log sequence number of the START of the log entry written of the oldest
1274   modification to this block which has not yet been flushed on disk; zero if all
1275   modifications are on disk.  Writes to this field must be covered by both
1276   block->mutex and buf_pool->flush_list_mutex. Hence reads can happen while
1277   holding any one of the two mutexes */
1278   /* @} */
1279 
1280   /** @name LRU replacement algorithm fields
1281   These fields are protected by both buf_pool->LRU_list_mutex and the
1282   block mutex. */
1283   /* @{ */
1284 
1285   UT_LIST_NODE_T(buf_page_t) LRU;
1286   /** node of the LRU list */
1287 #ifdef UNIV_DEBUG
1288   /** TRUE if the page is in the LRU list; used in debugging */
1289   bool in_LRU_list;
1290 #endif /* UNIV_DEBUG */
1291 
1292 #ifndef UNIV_HOTBACKUP
1293 
1294   /** true if the block is in the old blocks in buf_pool->LRU_old */
1295   unsigned old : 1;
1296 
1297   /** The value of buf_pool->freed_page_clock when this block was the last
1298   time put to the head of the LRU list; a thread is allowed to read this
1299   for heuristic purposes without holding any mutex or latch */
1300   unsigned freed_page_clock : 31;
1301 
1302   /* @} */
1303   /** Time of first access, or 0 if the block was never accessed in the
1304   buffer pool. Protected by block mutex */
1305   unsigned access_time;
1306 
1307 #ifdef UNIV_DEBUG
1308   /** This is set to TRUE when fsp frees a page in buffer pool;
1309   protected by buf_pool->zip_mutex or buf_block_t::mutex. */
1310   bool file_page_was_freed;
1311 #endif /* UNIV_DEBUG */
1312 
1313   /** Flush observer */
1314   FlushObserver *flush_observer;
1315 
1316 #endif /* !UNIV_HOTBACKUP */
1317 
1318   /** Double write instance ordinal value during writes. This is used
1319   by IO completion (writes) to select the double write instance.*/
1320   uint16_t m_dblwr_id{};
1321 };
1322 
1323 /** The buffer control block structure */
1324 
1325 struct buf_block_t {
1326   /** @name General fields */
1327   /* @{ */
1328 
1329   /** page information; this must be the first field, so
1330   that buf_pool->page_hash can point to buf_page_t or buf_block_t */
1331   buf_page_t page;
1332 
1333   /** pointer to buffer frame which is of size UNIV_PAGE_SIZE, and aligned
1334   to an address divisible by UNIV_PAGE_SIZE */
1335   byte *frame;
1336 
1337 #ifndef UNIV_HOTBACKUP
1338   /** read-write lock of the buffer frame */
1339   BPageLock lock;
1340 
1341 #endif /* UNIV_HOTBACKUP */
1342 
1343   /** node of the decompressed LRU list; a block is in the unzip_LRU list if
1344   page.state == BUF_BLOCK_FILE_PAGE and page.zip.data != NULL. Protected by
1345   both LRU_list_mutex and the block mutex. */
1346   UT_LIST_NODE_T(buf_block_t) unzip_LRU;
1347 #ifdef UNIV_DEBUG
1348 
1349   /** TRUE if the page is in the decompressed LRU list; used in debugging */
1350   bool in_unzip_LRU_list;
1351 
1352   bool in_withdraw_list;
1353 #endif /* UNIV_DEBUG */
1354 
1355   /** hashed value of the page address in the record lock hash table;
1356   protected by buf_block_t::lock (or buf_block_t::mutex in buf_page_get_gen(),
1357   buf_page_init_for_read() and buf_page_create()) */
1358   unsigned lock_hash_val : 32;
1359   /* @} */
1360   /** @name Optimistic search field */
1361   /* @{ */
1362 
1363   /** This clock is incremented every time a pointer to a record on the page
1364   may become obsolete; this is used in the optimistic cursor positioning: if
1365   the modify clock has not changed, we know that the pointer is still valid;
1366   this field may be changed if the thread (1) owns the LRU list mutex and the
1367   page is not bufferfixed, or (2) the thread has an x-latch on the block,
1368   or (3) the block must belong to an intrinsic table */
1369   uint64_t modify_clock;
1370 
1371   /* @} */
1372   /** @name Hash search fields (unprotected)
1373   NOTE that these fields are NOT protected by any semaphore! */
1374   /* @{ */
1375 
1376   /** counter which controls building of a new hash index for the page */
1377   ulint n_hash_helps;
1378 
1379   /** recommended prefix length for hash search: number of bytes in an
1380   incomplete last field */
1381   volatile ulint n_bytes;
1382 
1383   /** recommended prefix length for hash search: number of full fields */
1384   volatile ulint n_fields;
1385 
1386   /** true or false, depending on whether the leftmost record of several
1387   records with the same prefix should be indexed in the hash index */
1388   volatile bool left_side;
1389   /* @} */
1390 
1391   /** @name Hash search fields
1392   These 5 fields may only be modified when:
1393   we are holding the appropriate x-latch in btr_search_latches[], and
1394   one of the following holds:
1395   (1) the block state is BUF_BLOCK_FILE_PAGE, and
1396   we are holding an s-latch or x-latch on buf_block_t::lock, or
1397   (2) buf_block_t::buf_fix_count == 0, or
1398   (3) the block state is BUF_BLOCK_REMOVE_HASH.
1399 
1400   An exception to this is when we init or create a page
1401   in the buffer pool in buf0buf.cc.
1402 
1403   Another exception for buf_pool_clear_hash_index() is that
1404   assigning block->index = NULL (and block->n_pointers = 0)
1405   is allowed whenever btr_search_own_all(RW_LOCK_X).
1406 
1407   Another exception is that ha_insert_for_fold_func() may
1408   decrement n_pointers without holding the appropriate latch
1409   in btr_search_latches[]. Thus, n_pointers must be
1410   protected by atomic memory access.
1411 
1412   This implies that the fields may be read without race
1413   condition whenever any of the following hold:
1414   - the btr_search_latches[] s-latch or x-latch is being held, or
1415   - the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH,
1416   and holding some latch prevents the state from changing to that.
1417 
1418   Some use of assert_block_ahi_empty() or assert_block_ahi_valid()
1419   is prone to race conditions while buf_pool_clear_hash_index() is
1420   executing (the adaptive hash index is being disabled). Such use
1421   is explicitly commented. */
1422 
1423   /* @{ */
1424 
1425 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
1426   /** used in debugging: the number of pointers in the adaptive hash index
1427   pointing to this frame; protected by atomic memory access or
1428   btr_search_own_all(). */
1429   ulint n_pointers;
1430 
1431 #define assert_block_ahi_empty(block) \
1432   ut_a(os_atomic_increment_ulint(&(block)->n_pointers, 0) == 0)
1433 #define assert_block_ahi_empty_on_init(block)                        \
1434   do {                                                               \
1435     UNIV_MEM_VALID(&(block)->n_pointers, sizeof(block)->n_pointers); \
1436     assert_block_ahi_empty(block);                                   \
1437   } while (0)
1438 
1439 #define assert_block_ahi_valid(block) \
1440   ut_a((block)->index ||              \
1441        os_atomic_increment_ulint(&(block)->n_pointers, 0) == 0)
1442 #else                                         /* UNIV_AHI_DEBUG || UNIV_DEBUG */
1443 #define assert_block_ahi_empty(block)         /* nothing */
1444 #define assert_block_ahi_empty_on_init(block) /* nothing */
1445 #define assert_block_ahi_valid(block)         /* nothing */
1446 #endif                                        /* UNIV_AHI_DEBUG || UNIV_DEBUG */
1447 
1448   /** prefix length for hash indexing: number of full fields */
1449   unsigned curr_n_fields : 10;
1450 
1451   /** number of bytes in hash indexing */
1452   unsigned curr_n_bytes : 15;
1453 
1454   /** TRUE or FALSE in hash indexing */
1455   unsigned curr_left_side : 1;
1456 
1457   /** Index for which the adaptive hash index has been created, or NULL if
1458   the page does not exist in the index. Note that it does not guarantee that
1459   the index is complete, though: there may have been hash collisions, record
1460   deletions, etc. */
1461   dict_index_t *index;
1462 
1463   /* @} */
1464   /** true if block has been made dirty without acquiring X/SX latch as the
1465   block belongs to temporary tablespace and block is always accessed by a
1466   single thread. */
1467   bool made_dirty_with_no_latch;
1468 
1469 #ifndef UNIV_HOTBACKUP
1470 #ifdef UNIV_DEBUG
1471   /** @name Debug fields */
1472   /* @{ */
1473   /** In the debug version, each thread which bufferfixes the block acquires
1474   an s-latch here; so we can use the debug utilities in sync0rw */
1475   rw_lock_t debug_latch;
1476   /* @} */
1477 #endif /* UNIV_DEBUG */
1478 #endif /* !UNIV_HOTBACKUP */
1479 
1480   /** mutex protecting this block: state (also protected by the buffer
1481   pool mutex), io_fix, buf_fix_count, and accessed; we introduce this
1482   new mutex in InnoDB-5.1 to relieve contention on the buffer pool mutex */
1483   BPageMutex mutex;
1484 
1485   /** Get the page number and space id of the current buffer block.
1486   @return page number of the current buffer block. */
get_page_idbuf_block_t1487   const page_id_t &get_page_id() const { return page.id; }
1488 
1489   /** Get the page number of the current buffer block.
1490   @return page number of the current buffer block. */
get_page_nobuf_block_t1491   page_no_t get_page_no() const { return (page.id.page_no()); }
1492 
1493   /** Get the next page number of the current buffer block.
1494   @return next page number of the current buffer block. */
get_next_page_nobuf_block_t1495   page_no_t get_next_page_no() const {
1496     return (mach_read_from_4(frame + FIL_PAGE_NEXT));
1497   }
1498 
1499   /** Get the prev page number of the current buffer block.
1500   @return prev page number of the current buffer block. */
get_prev_page_nobuf_block_t1501   page_no_t get_prev_page_no() const {
1502     return (mach_read_from_4(frame + FIL_PAGE_PREV));
1503   }
1504 
1505   /** Get the page type of the current buffer block.
1506   @return page type of the current buffer block. */
get_page_typebuf_block_t1507   page_type_t get_page_type() const {
1508     return (mach_read_from_2(frame + FIL_PAGE_TYPE));
1509   }
1510 
1511   /** Get the page type of the current buffer block as string.
1512   @return page type of the current buffer block as string. */
1513   const char *get_page_type_str() const;
1514 };
1515 
1516 /** Check if a buf_block_t object is in a valid state
1517 @param block buffer block
1518 @return true if valid */
1519 #define buf_block_state_valid(block)                   \
1520   (buf_block_get_state(block) >= BUF_BLOCK_NOT_USED && \
1521    (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH))
1522 
1523 /** Compute the hash fold value for blocks in buf_pool->zip_hash. */
1524 /* @{ */
1525 #define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint)(ptr) / UNIV_PAGE_SIZE)
1526 #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
1527 #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t *)(b))
1528 /* @} */
1529 
1530 /** A "Hazard Pointer" class used to iterate over page lists
1531 inside the buffer pool. A hazard pointer is a buf_page_t pointer
1532 which we intend to iterate over next and we want it remain valid
1533 even after we release the buffer pool mutex. */
1534 class HazardPointer {
1535  public:
1536   /** Constructor
1537   @param buf_pool buffer pool instance
1538   @param mutex	mutex that is protecting the hp. */
HazardPointer(const buf_pool_t * buf_pool,const ib_mutex_t * mutex)1539   HazardPointer(const buf_pool_t *buf_pool, const ib_mutex_t *mutex)
1540       : m_buf_pool(buf_pool)
1541 #ifdef UNIV_DEBUG
1542         ,
1543         m_mutex(mutex)
1544 #endif /* UNIV_DEBUG */
1545         ,
1546         m_hp() {
1547   }
1548 
1549   /** Destructor */
~HazardPointer()1550   virtual ~HazardPointer() {}
1551 
1552   /** Get current value */
get()1553   buf_page_t *get() const {
1554     ut_ad(mutex_own(m_mutex));
1555     return (m_hp);
1556   }
1557 
1558   /** Set current value
1559   @param bpage	buffer block to be set as hp */
1560   void set(buf_page_t *bpage);
1561 
1562   /** Checks if a bpage is the hp
1563   @param bpage	buffer block to be compared
1564   @return true if it is hp */
1565   bool is_hp(const buf_page_t *bpage);
1566 
1567   /** Adjust the value of hp. This happens when some
1568   other thread working on the same list attempts to
1569   remove the hp from the list. Must be implemented
1570   by the derived classes.
1571   @param bpage	buffer block to be compared */
1572   virtual void adjust(const buf_page_t *bpage) = 0;
1573 
1574  protected:
1575   /** Disable copying */
1576   HazardPointer(const HazardPointer &);
1577   HazardPointer &operator=(const HazardPointer &);
1578 
1579   /** Buffer pool instance */
1580   const buf_pool_t *m_buf_pool;
1581 
1582 #ifdef UNIV_DEBUG
1583   /** mutex that protects access to the m_hp. */
1584   const ib_mutex_t *m_mutex;
1585 #endif /* UNIV_DEBUG */
1586 
1587   /** hazard pointer. */
1588   buf_page_t *m_hp;
1589 };
1590 
1591 /** Class implementing buf_pool->flush_list hazard pointer */
1592 class FlushHp : public HazardPointer {
1593  public:
1594   /** Constructor
1595   @param buf_pool buffer pool instance
1596   @param mutex	mutex that is protecting the hp. */
FlushHp(const buf_pool_t * buf_pool,const ib_mutex_t * mutex)1597   FlushHp(const buf_pool_t *buf_pool, const ib_mutex_t *mutex)
1598       : HazardPointer(buf_pool, mutex) {}
1599 
1600   /** Destructor */
~FlushHp()1601   virtual ~FlushHp() {}
1602 
1603   /** Adjust the value of hp. This happens when some
1604   other thread working on the same list attempts to
1605   remove the hp from the list.
1606   @param bpage	buffer block to be compared */
1607   void adjust(const buf_page_t *bpage);
1608 };
1609 
1610 /** Class implementing buf_pool->LRU hazard pointer */
1611 class LRUHp : public HazardPointer {
1612  public:
1613   /** Constructor
1614   @param buf_pool buffer pool instance
1615   @param mutex	mutex that is protecting the hp. */
LRUHp(const buf_pool_t * buf_pool,const ib_mutex_t * mutex)1616   LRUHp(const buf_pool_t *buf_pool, const ib_mutex_t *mutex)
1617       : HazardPointer(buf_pool, mutex) {}
1618 
1619   /** Destructor */
~LRUHp()1620   virtual ~LRUHp() {}
1621 
1622   /** Adjust the value of hp. This happens when some
1623   other thread working on the same list attempts to
1624   remove the hp from the list.
1625   @param bpage	buffer block to be compared */
1626   void adjust(const buf_page_t *bpage);
1627 };
1628 
1629 /** Special purpose iterators to be used when scanning the LRU list.
1630 The idea is that when one thread finishes the scan it leaves the
1631 itr in that position and the other thread can start scan from
1632 there */
1633 class LRUItr : public LRUHp {
1634  public:
1635   /** Constructor
1636   @param buf_pool buffer pool instance
1637   @param mutex	mutex that is protecting the hp. */
LRUItr(const buf_pool_t * buf_pool,const ib_mutex_t * mutex)1638   LRUItr(const buf_pool_t *buf_pool, const ib_mutex_t *mutex)
1639       : LRUHp(buf_pool, mutex) {}
1640 
1641   /** Destructor */
~LRUItr()1642   virtual ~LRUItr() {}
1643 
1644   /** Selects from where to start a scan. If we have scanned
1645   too deep into the LRU list it resets the value to the tail
1646   of the LRU list.
1647   @return buf_page_t from where to start scan. */
1648   buf_page_t *start();
1649 };
1650 
1651 /** Struct that is embedded in the free zip blocks */
1652 struct buf_buddy_free_t {
1653   union {
1654     ulint size; /*!< size of the block */
1655     byte bytes[FIL_PAGE_DATA];
1656     /*!< stamp[FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID]
1657     == BUF_BUDDY_FREE_STAMP denotes a free
1658     block. If the space_id field of buddy
1659     block != BUF_BUDDY_FREE_STAMP, the block
1660     is not in any zip_free list. If the
1661     space_id is BUF_BUDDY_FREE_STAMP then
1662     stamp[0] will contain the
1663     buddy block size. */
1664   } stamp;
1665 
1666   buf_page_t bpage; /*!< Embedded bpage descriptor */
1667   UT_LIST_NODE_T(buf_buddy_free_t) list;
1668   /*!< Node of zip_free list */
1669 };
1670 
1671 /** @brief The buffer pool statistics structure. */
1672 struct buf_pool_stat_t {
1673   using Shards = Counter::Shards<64>;
1674 
1675   /** Number of page gets performed; also successful searches through the
1676   adaptive hash index are counted as page gets; this field is NOT protected
1677   by the buffer pool mutex */
1678   Shards m_n_page_gets;
1679 
1680   /** Number of read operations. Accessed atomically. */
1681   uint64_t n_pages_read;
1682 
1683   /** Number of write operations. Accessed atomically. */
1684   uint64_t n_pages_written;
1685 
1686   /**  number of pages created in the pool with no read. Accessed atomically. */
1687   uint64_t n_pages_created;
1688 
1689   /** Number of pages read in as part of random read ahead. Not protected. */
1690   uint64_t n_ra_pages_read_rnd;
1691 
1692   /** Number of pages read in as part of read ahead. Not protected. */
1693   uint64_t n_ra_pages_read;
1694 
1695   /** Number of read ahead pages that are evicted without being accessed.
1696   Protected by LRU_list_mutex. */
1697   uint64_t n_ra_pages_evicted;
1698 
1699   /** Number of pages made young, in calls to buf_LRU_make_block_young().
1700   Protected by LRU_list_mutex. */
1701   uint64_t n_pages_made_young;
1702 
1703   /** Number of pages not made young because the first access was not long
1704   enough ago, in buf_page_peek_if_too_old(). Not protected. */
1705   uint64_t n_pages_not_made_young;
1706 
1707   /** LRU size in bytes. Protected by LRU_list_mutex. */
1708   uint64_t LRU_bytes;
1709 
1710   /** Flush_list size in bytes.  Protected by flush_list_mutex */
1711   uint64_t flush_list_bytes;
1712 
copybuf_pool_stat_t1713   static void copy(buf_pool_stat_t &dst, const buf_pool_stat_t &src) noexcept {
1714     Counter::copy(dst.m_n_page_gets, src.m_n_page_gets);
1715 
1716     dst.n_pages_read = src.n_pages_read;
1717 
1718     dst.n_pages_written = src.n_pages_written;
1719 
1720     dst.n_pages_created = src.n_pages_created;
1721 
1722     dst.n_ra_pages_read_rnd = src.n_ra_pages_read_rnd;
1723 
1724     dst.n_ra_pages_read = src.n_ra_pages_read;
1725 
1726     dst.n_ra_pages_evicted = src.n_ra_pages_evicted;
1727 
1728     dst.n_pages_made_young = src.n_pages_made_young;
1729 
1730     dst.n_pages_not_made_young = src.n_pages_not_made_young;
1731 
1732     dst.LRU_bytes = src.LRU_bytes;
1733 
1734     dst.flush_list_bytes = src.flush_list_bytes;
1735   }
1736 
resetbuf_pool_stat_t1737   void reset() {
1738     Counter::clear(m_n_page_gets);
1739 
1740     n_pages_read = 0;
1741     n_pages_written = 0;
1742     n_pages_created = 0;
1743     n_ra_pages_read_rnd = 0;
1744     n_ra_pages_read = 0;
1745     n_ra_pages_evicted = 0;
1746     n_pages_made_young = 0;
1747     n_pages_not_made_young = 0;
1748     LRU_bytes = 0;
1749     flush_list_bytes = 0;
1750   }
1751 };
1752 
1753 /** Statistics of buddy blocks of a given size. */
1754 struct buf_buddy_stat_t {
1755   /** Number of blocks allocated from the buddy system. */
1756   ulint used;
1757   /** Number of blocks relocated by the buddy system. */
1758   uint64_t relocated;
1759   /** Total duration of block relocations, in microseconds. */
1760   uint64_t relocated_usec;
1761 };
1762 
1763 /** @brief The buffer pool structure.
1764 
1765 NOTE! The definition appears here only for other modules of this
1766 directory (buf) to see it. Do not use from outside! */
1767 
1768 struct buf_pool_t {
1769   /** @name General fields */
1770   /* @{ */
1771   BufListMutex chunks_mutex;    /*!< protects (de)allocation of chunks:
1772                                 - changes to chunks, n_chunks are performed
1773                                   while holding this latch,
1774                                 - reading buf_pool_should_madvise requires
1775                                   holding this latch for any buf_pool_t
1776                                 - writing to buf_pool_should_madvise requires
1777                                   holding these latches for all buf_pool_t-s
1778                                 */
1779   BufListMutex LRU_list_mutex;  /*!< LRU list mutex */
1780   BufListMutex free_list_mutex; /*!< free and withdraw list mutex */
1781   BufListMutex zip_free_mutex;  /*!< buddy allocator mutex */
1782   BufListMutex zip_hash_mutex;  /*!< zip_hash mutex */
1783   ib_mutex_t flush_state_mutex; /*!< Flush state protection
1784                               mutex */
1785   BufPoolZipMutex zip_mutex;    /*!< Zip mutex of this buffer
1786                                 pool instance, protects compressed
1787                                 only pages (of type buf_page_t, not
1788                                 buf_block_t */
1789   ulint instance_no;            /*!< Array index of this buffer
1790                                 pool instance */
1791   ulint curr_pool_size;         /*!< Current pool size in bytes */
1792   ulint LRU_old_ratio;          /*!< Reserve this much of the buffer
1793                                 pool for "old" blocks */
1794 #ifdef UNIV_DEBUG
1795   ulint buddy_n_frames; /*!< Number of frames allocated from
1796                         the buffer pool to the buddy system.
1797                         Protected by zip_hash_mutex. */
1798 #endif
1799   ut_allocator<unsigned char> allocator; /*!< Allocator used for
1800                          allocating memory for the the "chunks"
1801                          member. */
1802   volatile ulint n_chunks;               /*!< number of buffer pool chunks */
1803   volatile ulint n_chunks_new; /*!< new number of buffer pool chunks */
1804   buf_chunk_t *chunks;         /*!< buffer pool chunks */
1805   buf_chunk_t *chunks_old;     /*!< old buffer pool chunks to be freed
1806                                after resizing buffer pool */
1807   ulint curr_size;             /*!< current pool size in pages */
1808   ulint old_size;              /*!< previous pool size in pages */
1809   page_no_t read_ahead_area;   /*!< size in pages of the area which
1810                                the read-ahead algorithms read if
1811                                invoked */
1812   hash_table_t *page_hash;     /*!< hash table of buf_page_t or
1813                                buf_block_t file pages,
1814                                buf_page_in_file() == TRUE,
1815                                indexed by (space_id, offset).
1816                                page_hash is protected by an
1817                                array of mutexes. */
1818   hash_table_t *page_hash_old; /*!< old pointer to page_hash to be
1819                                freed after resizing buffer pool */
1820   hash_table_t *zip_hash;      /*!< hash table of buf_block_t blocks
1821                                whose frames are allocated to the
1822                                zip buddy system,
1823                                indexed by block->frame */
1824   ulint n_pend_reads;          /*!< number of pending read
1825                                operations. Accessed atomically */
1826   ulint n_pend_unzip;          /*!< number of pending decompressions.
1827                                Accessed atomically. */
1828 
1829   ib_time_monotonic_t last_printout_time;
1830   /*!< when buf_print_io was last time
1831   called. Accesses not protected. */
1832   buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
1833   /*!< Statistics of buddy system,
1834   indexed by block size. Protected by
1835   zip_free mutex, except for the used
1836   field, which is also accessed
1837   atomically */
1838   buf_pool_stat_t stat;     /*!< current statistics */
1839   buf_pool_stat_t old_stat; /*!< old statistics */
1840 
1841   /* @} */
1842 
1843   /** @name Page flushing algorithm fields */
1844 
1845   /* @{ */
1846 
1847   BufListMutex flush_list_mutex; /*!< mutex protecting the
1848                                 flush list access. This mutex
1849                                 protects flush_list, flush_rbt
1850                                 and bpage::list pointers when
1851                                 the bpage is on flush_list. It
1852                                 also protects writes to
1853                                 bpage::oldest_modification and
1854                                 flush_list_hp */
1855   FlushHp flush_hp;              /*!< "hazard pointer"
1856                                 used during scan of flush_list
1857                                 while doing flush list batch.
1858                                 Protected by flush_list_mutex */
1859   UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
1860   /*!< base node of the modified block
1861   list */
1862   ibool init_flush[BUF_FLUSH_N_TYPES];
1863   /*!< this is TRUE when a flush of the
1864   given type is being initialized.
1865   Protected by flush_state_mutex. */
1866   ulint n_flush[BUF_FLUSH_N_TYPES];
1867   /*!< this is the number of pending
1868   writes in the given flush type.
1869   Protected by flush_state_mutex. */
1870   os_event_t no_flush[BUF_FLUSH_N_TYPES];
1871   /*!< this is in the set state
1872   when there is no flush batch
1873   of the given type running. Protected by
1874   flush_state_mutex. */
1875   ib_rbt_t *flush_rbt;    /*!< a red-black tree is used
1876                           exclusively during recovery to
1877                           speed up insertions in the
1878                           flush_list. This tree contains
1879                           blocks in order of
1880                           oldest_modification LSN and is
1881                           kept in sync with the
1882                           flush_list.
1883                           Each member of the tree MUST
1884                           also be on the flush_list.
1885                           This tree is relevant only in
1886                           recovery and is set to NULL
1887                           once the recovery is over.
1888                           Protected by flush_list_mutex */
1889   ulint freed_page_clock; /*!< a sequence number used
1890                          to count the number of buffer
1891                          blocks removed from the end of
1892                          the LRU list; NOTE that this
1893                          counter may wrap around at 4
1894                          billion! A thread is allowed
1895                          to read this for heuristic
1896                          purposes without holding any
1897                          mutex or latch. For non-heuristic
1898                          purposes protected by LRU_list_mutex */
1899   ibool try_LRU_scan;     /*!< Set to FALSE when an LRU
1900                           scan for free block fails. This
1901                           flag is used to avoid repeated
1902                           scans of LRU list when we know
1903                           that there is no free block
1904                           available in the scan depth for
1905                           eviction. Set to TRUE whenever
1906                           we flush a batch from the
1907                           buffer pool. Accessed protected by
1908                           memory barriers. */
1909 
1910   lsn_t track_page_lsn; /* Pagge Tracking start LSN. */
1911 
1912   lsn_t max_lsn_io; /* Maximum LSN for which write io
1913                     has already started. */
1914 
1915   /* @} */
1916 
1917   /** @name LRU replacement algorithm fields */
1918   /* @{ */
1919 
1920   UT_LIST_BASE_NODE_T(buf_page_t) free;
1921   /*!< base node of the free
1922   block list */
1923 
1924   UT_LIST_BASE_NODE_T(buf_page_t) withdraw;
1925   /*!< base node of the withdraw
1926   block list. It is only used during
1927   shrinking buffer pool size, not to
1928   reuse the blocks will be removed.
1929   Protected by free_list_mutex */
1930 
1931   ulint withdraw_target; /*!< target length of withdraw
1932                          block list, when withdrawing */
1933 
1934   /** "hazard pointer" used during scan of LRU while doing
1935   LRU list batch.  Protected by buf_pool::LRU_list_mutex */
1936   LRUHp lru_hp;
1937 
1938   /** Iterator used to scan the LRU list when searching for
1939   replacable victim. Protected by buf_pool::LRU_list_mutex. */
1940   LRUItr lru_scan_itr;
1941 
1942   /** Iterator used to scan the LRU list when searching for
1943   single page flushing victim.  Protected by buf_pool::LRU_list_mutex. */
1944   LRUItr single_scan_itr;
1945 
1946   UT_LIST_BASE_NODE_T(buf_page_t) LRU;
1947   /*!< base node of the LRU list */
1948 
1949   buf_page_t *LRU_old; /*!< pointer to the about
1950                        LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
1951                        oldest blocks in the LRU list;
1952                        NULL if LRU length less than
1953                        BUF_LRU_OLD_MIN_LEN;
1954                        NOTE: when LRU_old != NULL, its length
1955                        should always equal LRU_old_len */
1956   ulint LRU_old_len;   /*!< length of the LRU list from
1957                        the block to which LRU_old points
1958                        onward, including that block;
1959                        see buf0lru.cc for the restrictions
1960                        on this value; 0 if LRU_old == NULL;
1961                        NOTE: LRU_old_len must be adjusted
1962                        whenever LRU_old shrinks or grows! */
1963 
1964   UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
1965   /*!< base node of the
1966   unzip_LRU list. The list is protected
1967   by LRU_list_mutex. */
1968 
1969   /* @} */
1970   /** @name Buddy allocator fields
1971   The buddy allocator is used for allocating compressed page
1972   frames and buf_page_t descriptors of blocks that exist
1973   in the buffer pool only in compressed form. */
1974   /* @{ */
1975 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
1976   UT_LIST_BASE_NODE_T(buf_page_t) zip_clean;
1977   /*!< unmodified compressed pages */
1978 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
1979   UT_LIST_BASE_NODE_T(buf_buddy_free_t) zip_free[BUF_BUDDY_SIZES_MAX];
1980   /*!< buddy free lists */
1981 
1982   buf_page_t *watch;
1983   /*!< Sentinel records for buffer
1984   pool watches. Scanning the array is
1985   protected by taking all page_hash
1986   latches in X. Updating or reading an
1987   individual watch page is protected by
1988   a corresponding individual page_hash
1989   latch. */
1990 
1991   /** A wrapper for buf_pool_t::allocator.alocate_large which also advices the
1992   OS that this chunk should not be dumped to a core file if that was requested.
1993   Emits a warning to the log and disables @@global.core_file if advising was
1994   requested but could not be performed, but still return true as the allocation
1995   itself succeeded.
1996   @param[in]	  mem_size  number of bytes to allocate
1997   @param[in,out]  chunk     mem and mem_pfx fields of this chunk will be updated
1998                             to contain information about allocated memory region
1999   @return true iff allocated successfully */
2000   bool allocate_chunk(ulonglong mem_size, buf_chunk_t *chunk);
2001 
2002   /** A wrapper for buf_pool_t::allocator.deallocate_large which also advices
2003   the OS that this chunk can be dumped to a core file.
2004   Emits a warning to the log and disables @@global.core_file if advising was
2005   requested but could not be performed.
2006   @param[in]  chunk   mem and mem_pfx fields of this chunk will be used to
2007                       locate the memory region to free */
2008   void deallocate_chunk(buf_chunk_t *chunk);
2009 
2010   /** Advices the OS that all chunks in this buffer pool instance can be dumped
2011   to a core file.
2012   Emits a warning to the log if could not succeed.
2013   @return true iff succeeded, false if no OS support or failed */
2014   bool madvise_dump();
2015 
2016   /** Advices the OS that all chunks in this buffer pool instance should not
2017   be dumped to a core file.
2018   Emits a warning to the log if could not succeed.
2019   @return true iff succeeded, false if no OS support or failed */
2020   bool madvise_dont_dump();
2021 
2022 #if BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN
2023 #error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN"
2024 #endif
2025   /* @} */
2026 };
2027 
2028 /** Print the given buf_pool_t object.
2029 @param[in,out]	out		the output stream
2030 @param[in]	buf_pool	the buf_pool_t object to be printed
2031 @return the output stream */
2032 std::ostream &operator<<(std::ostream &out, const buf_pool_t &buf_pool);
2033 
2034 /** @name Accessors for buffer pool mutexes
2035 Use these instead of accessing buffer pool mutexes directly. */
2036 /* @{ */
2037 
2038 #ifndef UNIV_HOTBACKUP
2039 /** Test if flush list mutex is owned. */
2040 #define buf_flush_list_mutex_own(b) mutex_own(&(b)->flush_list_mutex)
2041 
2042 /** Acquire the flush list mutex. */
2043 #define buf_flush_list_mutex_enter(b)    \
2044   do {                                   \
2045     mutex_enter(&(b)->flush_list_mutex); \
2046   } while (0)
2047 /** Release the flush list mutex. */
2048 #define buf_flush_list_mutex_exit(b)    \
2049   do {                                  \
2050     mutex_exit(&(b)->flush_list_mutex); \
2051   } while (0)
2052 
2053 /** Test if block->mutex is owned. */
2054 #define buf_page_mutex_own(b) (b)->mutex.is_owned()
2055 
2056 /** Acquire the block->mutex. */
2057 #define buf_page_mutex_enter(b) \
2058   do {                          \
2059     mutex_enter(&(b)->mutex);   \
2060   } while (0)
2061 
2062 /** Release the block->mutex. */
2063 #define buf_page_mutex_exit(b) \
2064   do {                         \
2065     (b)->mutex.exit();         \
2066   } while (0)
2067 
2068 /** Get appropriate page_hash_lock. */
2069 #define buf_page_hash_lock_get(buf_pool, page_id) \
2070   hash_get_lock((buf_pool)->page_hash, (page_id).fold())
2071 
2072 /** If not appropriate page_hash_lock, relock until appropriate. */
2073 #define buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id) \
2074   hash_lock_s_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold())
2075 
2076 #define buf_page_hash_lock_x_confirm(hash_lock, buf_pool, page_id) \
2077   hash_lock_x_confirm(hash_lock, (buf_pool)->page_hash, (page_id).fold())
2078 #endif /* !UNIV_HOTBACKUP */
2079 
2080 #if defined(UNIV_DEBUG) && !defined(UNIV_HOTBACKUP)
2081 /** Test if page_hash lock is held in s-mode. */
2082 #define buf_page_hash_lock_held_s(buf_pool, bpage) \
2083   rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_S)
2084 
2085 /** Test if page_hash lock is held in x-mode. */
2086 #define buf_page_hash_lock_held_x(buf_pool, bpage) \
2087   rw_lock_own(buf_page_hash_lock_get((buf_pool), (bpage)->id), RW_LOCK_X)
2088 
2089 /** Test if page_hash lock is held in x or s-mode. */
2090 #define buf_page_hash_lock_held_s_or_x(buf_pool, bpage) \
2091   (buf_page_hash_lock_held_s((buf_pool), (bpage)) ||    \
2092    buf_page_hash_lock_held_x((buf_pool), (bpage)))
2093 
2094 #define buf_block_hash_lock_held_s(buf_pool, block) \
2095   buf_page_hash_lock_held_s((buf_pool), &(block)->page)
2096 
2097 #define buf_block_hash_lock_held_x(buf_pool, block) \
2098   buf_page_hash_lock_held_x((buf_pool), &(block)->page)
2099 
2100 #define buf_block_hash_lock_held_s_or_x(buf_pool, block) \
2101   buf_page_hash_lock_held_s_or_x((buf_pool), &(block)->page)
2102 #else /* UNIV_DEBUG && !UNIV_HOTBACKUP */
2103 #define buf_page_hash_lock_held_s(b, p) (TRUE)
2104 #define buf_page_hash_lock_held_x(b, p) (TRUE)
2105 #define buf_page_hash_lock_held_s_or_x(b, p) (TRUE)
2106 #define buf_block_hash_lock_held_s(b, p) (TRUE)
2107 #define buf_block_hash_lock_held_x(b, p) (TRUE)
2108 #define buf_block_hash_lock_held_s_or_x(b, p) (TRUE)
2109 #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
2110 
2111 /* @} */
2112 
2113 /**********************************************************************
2114 Let us list the consistency conditions for different control block states.
2115 
2116 NOT_USED:	is in free list, not in LRU list, not in flush list, nor
2117                 page hash table
2118 READY_FOR_USE:	is not in free list, LRU list, or flush list, nor page
2119                 hash table
2120 MEMORY:		is not in free list, LRU list, or flush list, nor page
2121                 hash table
2122 FILE_PAGE:	space and offset are defined, is in page hash table
2123                 if io_fix == BUF_IO_WRITE,
2124                         pool: no_flush[flush_type] is in reset state,
2125                         pool: n_flush[flush_type] > 0
2126 
2127                 (1) if buf_fix_count == 0, then
2128                         is in LRU list, not in free list
2129                         is in flush list,
2130                                 if and only if oldest_modification > 0
2131                         is x-locked,
2132                                 if and only if io_fix == BUF_IO_READ
2133                         is s-locked,
2134                                 if and only if io_fix == BUF_IO_WRITE
2135 
2136                 (2) if buf_fix_count > 0, then
2137                         is not in LRU list, not in free list
2138                         is in flush list,
2139                                 if and only if oldest_modification > 0
2140                         if io_fix == BUF_IO_READ,
2141                                 is x-locked
2142                         if io_fix == BUF_IO_WRITE,
2143                                 is s-locked
2144 
2145 State transitions:
2146 
2147 NOT_USED => READY_FOR_USE
2148 READY_FOR_USE => MEMORY
2149 READY_FOR_USE => FILE_PAGE
2150 MEMORY => NOT_USED
2151 FILE_PAGE => NOT_USED	NOTE: This transition is allowed if and only if
2152                                 (1) buf_fix_count == 0,
2153                                 (2) oldest_modification == 0, and
2154                                 (3) io_fix == 0.
2155 */
2156 
2157 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
2158 #ifndef UNIV_HOTBACKUP
2159 /** Functor to validate the LRU list. */
2160 struct CheckInLRUList {
operatorCheckInLRUList2161   void operator()(const buf_page_t *elem) const { ut_a(elem->in_LRU_list); }
2162 
validateCheckInLRUList2163   static void validate(const buf_pool_t *buf_pool) {
2164     CheckInLRUList check;
2165     ut_list_validate(buf_pool->LRU, check);
2166   }
2167 };
2168 
2169 /** Functor to validate the LRU list. */
2170 struct CheckInFreeList {
operatorCheckInFreeList2171   void operator()(const buf_page_t *elem) const { ut_a(elem->in_free_list); }
2172 
validateCheckInFreeList2173   static void validate(const buf_pool_t *buf_pool) {
2174     CheckInFreeList check;
2175     ut_list_validate(buf_pool->free, check);
2176   }
2177 };
2178 
2179 struct CheckUnzipLRUAndLRUList {
operatorCheckUnzipLRUAndLRUList2180   void operator()(const buf_block_t *elem) const {
2181     ut_a(elem->page.in_LRU_list);
2182     ut_a(elem->in_unzip_LRU_list);
2183   }
2184 
validateCheckUnzipLRUAndLRUList2185   static void validate(const buf_pool_t *buf_pool) {
2186     CheckUnzipLRUAndLRUList check;
2187     ut_list_validate(buf_pool->unzip_LRU, check);
2188   }
2189 };
2190 #endif /* !UNIV_HOTBACKUP */
2191 #endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */
2192 
2193 #include "buf0buf.ic"
2194 
2195 #endif /* !buf0buf_h */
2196