1/*****************************************************************************
2
3Copyright (c) 1995, 2020, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2008, Google Inc.
5
6Portions of this file contain modifications contributed and copyrighted by
7Google, Inc. Those modifications are gratefully acknowledged and are described
8briefly in the InnoDB documentation. The contributions by Google are
9incorporated with their permission, and subject to the conditions contained in
10the file COPYING.Google.
11
12This program is free software; you can redistribute it and/or modify it under
13the terms of the GNU General Public License, version 2.0, as published by the
14Free Software Foundation.
15
16This program is also distributed with certain software (including but not
17limited to OpenSSL) that is licensed under separate terms, as designated in a
18particular file or component or in included license documentation. The authors
19of MySQL hereby grant you an additional permission to link the program and
20your derivative works with the separately licensed software that they have
21included with MySQL.
22
23This program is distributed in the hope that it will be useful, but WITHOUT
24ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
25FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
26for more details.
27
28You should have received a copy of the GNU General Public License along with
29this program; if not, write to the Free Software Foundation, Inc.,
3051 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
31
32*****************************************************************************/
33
34/** @file include/buf0buf.ic
35 The database buffer buf_pool
36
37 Created 11/5/1995 Heikki Tuuri
38 *******************************************************/
39
40#include "mtr0mtr.h"
41
42#ifndef UNIV_HOTBACKUP
43#include "buf0flu.h"
44#include "buf0lru.h"
45#include "buf0rea.h"
46#include "fsp0types.h"
47#include "sync0debug.h"
48#include "ut0new.h"
49#endif /* !UNIV_HOTBACKUP */
50
51/** A chunk of buffers. The buffer pool is allocated in chunks. */
52struct buf_chunk_t {
53  ulint size;           /*!< size of frames[] and blocks[] */
54  unsigned char *mem;   /*!< pointer to the memory area which
55                        was allocated for the frames */
56  ut_new_pfx_t mem_pfx; /*!< Auxiliary structure, describing
57                        "mem". It is filled by the allocator's
58                        alloc method and later passed to the
59                        deallocate method. */
60  buf_block_t *blocks;  /*!< array of buffer control blocks */
61
62  /** Get the size of 'mem' in bytes. */
63  size_t mem_size() const { return (mem_pfx.m_size); }
64
65  /** Advices the OS that this chunk should not be dumped to a core file.
66  Emits a warning to the log if could not succeed.
67  @return true iff succeeded, false if no OS support or failed */
68  bool madvise_dump();
69
70  /** Advices the OS that this chunk should be dumped to a core file.
71  Emits a warning to the log if could not succeed.
72  @return true iff succeeded, false if no OS support or failed */
73  bool madvise_dont_dump();
74};
75
76/** Gets the current size of buffer buf_pool in bytes.
77 @return size in bytes */
78UNIV_INLINE
79ulint buf_pool_get_curr_size(void) { return (srv_buf_pool_curr_size); }
80
81#ifndef UNIV_HOTBACKUP
82/** Calculates the index of a buffer pool to the buf_pool[] array.
83 @return the position of the buffer pool in buf_pool[] */
84UNIV_INLINE
85ulint buf_pool_index(const buf_pool_t *buf_pool) /*!< in: buffer pool */
86{
87  ulint i = buf_pool - buf_pool_ptr;
88  ut_ad(i < MAX_BUFFER_POOLS);
89  ut_ad(i < srv_buf_pool_instances);
90  return (i);
91}
92
93/** Returns the buffer pool instance given a page instance
94 @return buf_pool */
95UNIV_INLINE
96buf_pool_t *buf_pool_from_bpage(
97    const buf_page_t *bpage) /*!< in: buffer pool page */
98{
99  ulint i;
100  i = bpage->buf_pool_index;
101  ut_ad(i < srv_buf_pool_instances);
102  return (&buf_pool_ptr[i]);
103}
104
105/** Returns the buffer pool instance given a block instance
106 @return buf_pool */
107UNIV_INLINE
108buf_pool_t *buf_pool_from_block(const buf_block_t *block) /*!< in: block */
109{
110  return (buf_pool_from_bpage(&block->page));
111}
112
113/** Gets the current size of buffer buf_pool in pages.
114 @return size in pages*/
115UNIV_INLINE
116ulint buf_pool_get_n_pages(void) {
117  return (buf_pool_get_curr_size() / UNIV_PAGE_SIZE);
118}
119
120/** Reads the freed_page_clock of a buffer block.
121 @return freed_page_clock */
122UNIV_INLINE
123ulint buf_page_get_freed_page_clock(const buf_page_t *bpage) /*!< in: block */
124{
125  /* This is sometimes read without holding any buffer pool mutex. */
126  return (bpage->freed_page_clock);
127}
128
129/** Reads the freed_page_clock of a buffer block.
130 @return freed_page_clock */
131UNIV_INLINE
132ulint buf_block_get_freed_page_clock(const buf_block_t *block) /*!< in: block */
133{
134  return (buf_page_get_freed_page_clock(&block->page));
135}
136
137/** Tells, for heuristics, if a block is still close enough to the MRU end of
138the LRU list meaning that it is not in danger of getting evicted and also
139implying that it has been accessed recently.
140The page must be either buffer-fixed, either its page hash must be locked.
141@param[in]	bpage	block
142@return true if block is close to MRU end of LRU */
143UNIV_INLINE
144ibool buf_page_peek_if_young(const buf_page_t *bpage) {
145  buf_pool_t *buf_pool = buf_pool_from_bpage(bpage);
146
147  ut_ad(bpage->buf_fix_count > 0 ||
148        buf_page_hash_lock_held_s_or_x(buf_pool, bpage));
149
150  /* FIXME: bpage->freed_page_clock is 31 bits */
151  return ((buf_pool->freed_page_clock & ((1UL << 31) - 1)) <
152          ((ulint)bpage->freed_page_clock +
153           (buf_pool->curr_size *
154            (BUF_LRU_OLD_RATIO_DIV - buf_pool->LRU_old_ratio) /
155            (BUF_LRU_OLD_RATIO_DIV * 4))));
156}
157
158/** Recommends a move of a block to the start of the LRU list if there is
159danger of dropping from the buffer pool.
160NOTE: does not reserve the LRU list mutex.
161@param[in]	bpage	block to make younger
162@return true if should be made younger */
163UNIV_INLINE
164ibool buf_page_peek_if_too_old(const buf_page_t *bpage) {
165  buf_pool_t *buf_pool = buf_pool_from_bpage(bpage);
166
167  if (buf_pool->freed_page_clock == 0) {
168    /* If eviction has not started yet, do not update the
169    statistics or move blocks in the LRU list.  This is
170    either the warm-up phase or an in-memory workload. */
171    return (FALSE);
172  } else if (buf_LRU_old_threshold_ms && bpage->old) {
173    unsigned access_time = buf_page_is_accessed(bpage);
174
175    /* It is possible that the below comparison returns an
176    unexpected result. 2^32 milliseconds pass in about 50 days,
177    so if the difference between ut_time_monotonic_ms() and
178    access_time is e.g. 50 days + 15 ms, then the below will
179    behave as if it is 15 ms. This is known and fixing it would
180    require to increase buf_page_t::access_time from 32 to 64 bits. */
181    if (access_time > 0 &&
182        ((ib_uint32_t)(ut_time_monotonic_ms() - access_time)) >=
183            buf_LRU_old_threshold_ms) {
184      return (TRUE);
185    }
186
187    buf_pool->stat.n_pages_not_made_young++;
188    return (FALSE);
189  } else {
190    return (!buf_page_peek_if_young(bpage));
191  }
192}
193#endif /* !UNIV_HOTBACKUP */
194
195/** Gets the state of a block.
196 @return state */
197UNIV_INLINE
198enum buf_page_state buf_page_get_state(
199    const buf_page_t *bpage) /*!< in: pointer to the control block */
200{
201  enum buf_page_state state = bpage->state;
202
203#ifdef UNIV_DEBUG
204  switch (state) {
205    case BUF_BLOCK_POOL_WATCH:
206    case BUF_BLOCK_ZIP_PAGE:
207    case BUF_BLOCK_ZIP_DIRTY:
208    case BUF_BLOCK_NOT_USED:
209    case BUF_BLOCK_READY_FOR_USE:
210    case BUF_BLOCK_FILE_PAGE:
211    case BUF_BLOCK_MEMORY:
212    case BUF_BLOCK_REMOVE_HASH:
213      break;
214    default:
215      ut_error;
216  }
217#endif /* UNIV_DEBUG */
218
219  return (state);
220}
221/** Gets the state of a block.
222 @return state */
223UNIV_INLINE
224enum buf_page_state buf_block_get_state(
225    const buf_block_t *block) /*!< in: pointer to the control block */
226{
227  return (buf_page_get_state(&block->page));
228}
229
230#ifndef UNIV_HOTBACKUP
231#ifdef UNIV_DEBUG
232/** Assert that a given buffer pool page is private to the caller: no pointers
233to it exist in any buffer pool list or hash table. Accessing pages by iterating
234over buffer pool chunks is not considered here. Furthermore, assert that no
235buffer pool locks except for LRU list mutex and page hash are held.
236@param[in]	bpage			pointer to a buffer pool page
237@param[in]	hold_block_mutex	flag whether holding block mutex
238@param[in]	hold_zip_free_mutex	flag whether holding zip free mutex */
239UNIV_INLINE
240bool buf_page_is_private(const buf_page_t *bpage, bool hold_block_mutex,
241                         bool hold_zip_free_mutex) {
242  buf_pool_t *buf_pool = buf_pool_from_bpage(bpage);
243
244  ut_a(!bpage->in_page_hash);
245  ut_a(!bpage->in_zip_hash);
246  ut_a(!bpage->in_flush_list);
247  ut_a(!bpage->in_free_list);
248  ut_a(!bpage->in_LRU_list);
249  if (!hold_block_mutex) {
250    ut_a(!mutex_own(buf_page_get_mutex(bpage)));
251  }
252  ut_a(!mutex_own(&buf_pool->free_list_mutex));
253  if (!hold_zip_free_mutex) {
254    ut_a(!mutex_own(&buf_pool->zip_free_mutex));
255  }
256  ut_a(!mutex_own(&buf_pool->zip_hash_mutex));
257
258  return (true);
259}
260#endif /* UNIV_DEBUG */
261#endif /* !UNIV_HOTBACKUP */
262
263/** Sets the state of a block.
264@param[in,out]	bpage	pointer to control block
265@param[in]	state	state */
266UNIV_INLINE
267void buf_page_set_state(buf_page_t *bpage, enum buf_page_state state) {
268#ifndef UNIV_HOTBACKUP
269#ifdef UNIV_DEBUG
270  enum buf_page_state old_state = buf_page_get_state(bpage);
271  buf_pool_t *buf_pool = buf_pool_from_bpage(bpage);
272
273  switch (old_state) {
274    case BUF_BLOCK_POOL_WATCH:
275      ut_error;
276      break;
277    case BUF_BLOCK_ZIP_PAGE:
278      ut_a(state == BUF_BLOCK_ZIP_DIRTY);
279      break;
280    case BUF_BLOCK_ZIP_DIRTY:
281      ut_a(state == BUF_BLOCK_ZIP_PAGE);
282      ut_a(mutex_own(buf_page_get_mutex(bpage)));
283      ut_a(buf_flush_list_mutex_own(buf_pool));
284      ut_a(bpage->in_flush_list);
285      break;
286    case BUF_BLOCK_NOT_USED:
287      ut_a(state == BUF_BLOCK_READY_FOR_USE);
288      ut_a(buf_page_is_private(bpage, false, false));
289      break;
290    case BUF_BLOCK_READY_FOR_USE:
291      ut_a(state == BUF_BLOCK_MEMORY || state == BUF_BLOCK_FILE_PAGE ||
292           state == BUF_BLOCK_NOT_USED);
293      ut_a(buf_page_is_private(bpage, state == BUF_BLOCK_FILE_PAGE,
294                               state == BUF_BLOCK_NOT_USED));
295      break;
296    case BUF_BLOCK_MEMORY:
297      ut_a(state == BUF_BLOCK_NOT_USED);
298      ut_a(buf_page_is_private(bpage, false, true));
299      break;
300    case BUF_BLOCK_FILE_PAGE:
301      ut_a(state == BUF_BLOCK_NOT_USED || state == BUF_BLOCK_REMOVE_HASH);
302      if (state == BUF_BLOCK_REMOVE_HASH) {
303        ut_a(!bpage->in_page_hash);
304        ut_a(!bpage->in_zip_hash);
305        ut_a(!bpage->in_LRU_list);
306        ut_a(!bpage->in_free_list);
307        ut_a(mutex_own(buf_page_get_mutex(bpage)));
308        ut_a(mutex_own(&buf_pool->LRU_list_mutex));
309        ut_a(buf_page_hash_lock_held_x(buf_pool, bpage));
310      }
311      break;
312    case BUF_BLOCK_REMOVE_HASH:
313      ut_a(state == BUF_BLOCK_MEMORY);
314      break;
315  }
316#endif /* UNIV_DEBUG */
317#endif /* !UNIV_HOTBACKUP */
318  bpage->state = state;
319  ut_ad(buf_page_get_state(bpage) == state);
320}
321
322/** Sets the state of a block. */
323UNIV_INLINE
324void buf_block_set_state(
325    buf_block_t *block,        /*!< in/out: pointer to control block */
326    enum buf_page_state state) /*!< in: state */
327{
328  buf_page_set_state(&block->page, state);
329}
330
331/** Determines if a block is mapped to a tablespace.
332 @return true if mapped */
333UNIV_INLINE
334ibool buf_page_in_file(
335    const buf_page_t *bpage) /*!< in: pointer to control block */
336{
337  switch (buf_page_get_state(bpage)) {
338    case BUF_BLOCK_POOL_WATCH:
339      ut_error;
340      break;
341    case BUF_BLOCK_ZIP_PAGE:
342    case BUF_BLOCK_ZIP_DIRTY:
343    case BUF_BLOCK_FILE_PAGE:
344      return (TRUE);
345    case BUF_BLOCK_NOT_USED:
346    case BUF_BLOCK_READY_FOR_USE:
347    case BUF_BLOCK_MEMORY:
348    case BUF_BLOCK_REMOVE_HASH:
349      break;
350  }
351
352  return (FALSE);
353}
354
355#ifndef UNIV_HOTBACKUP
356/** Determines if a block should be on unzip_LRU list.
357 @return true if block belongs to unzip_LRU */
358UNIV_INLINE
359bool buf_page_belongs_to_unzip_LRU(
360    const buf_page_t *bpage) /*!< in: pointer to control block */
361{
362  ut_ad(buf_page_in_file(bpage));
363
364  return (bpage->zip.data && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
365}
366
367/** Gets the mutex of a block.
368 @return pointer to mutex protecting bpage */
369UNIV_INLINE
370BPageMutex *buf_page_get_mutex(
371    const buf_page_t *bpage) /*!< in: pointer to control block */
372{
373  buf_pool_t *buf_pool = buf_pool_from_bpage(bpage);
374
375  switch (buf_page_get_state(bpage)) {
376    case BUF_BLOCK_POOL_WATCH:
377      ut_error;
378    case BUF_BLOCK_ZIP_PAGE:
379    case BUF_BLOCK_ZIP_DIRTY:
380      return (&buf_pool->zip_mutex);
381    default:
382      return (&((buf_block_t *)bpage)->mutex);
383  }
384}
385
386/** Get the flush type of a page.
387 @return flush type */
388UNIV_INLINE
389buf_flush_t buf_page_get_flush_type(
390    const buf_page_t *bpage) /*!< in: buffer page */
391{
392  buf_flush_t flush_type = (buf_flush_t)bpage->flush_type;
393
394#ifdef UNIV_DEBUG
395  switch (flush_type) {
396    case BUF_FLUSH_LRU:
397    case BUF_FLUSH_LIST:
398    case BUF_FLUSH_SINGLE_PAGE:
399      return (flush_type);
400    case BUF_FLUSH_N_TYPES:
401      ut_error;
402  }
403  ut_error;
404#else  /* UNIV_DEBUG */
405  return (flush_type);
406#endif /* UNIV_DEBUG */
407}
408/** Set the flush type of a page. */
409UNIV_INLINE
410void buf_page_set_flush_type(buf_page_t *bpage,      /*!< in: buffer page */
411                             buf_flush_t flush_type) /*!< in: flush type */
412{
413  bpage->flush_type = flush_type;
414  ut_ad(buf_page_get_flush_type(bpage) == flush_type);
415}
416
417/** Map a block to a file page.
418@param[in,out]	block	pointer to control block
419@param[in]	page_id	page id */
420UNIV_INLINE
421void buf_block_set_file_page(buf_block_t *block, const page_id_t &page_id) {
422  buf_block_set_state(block, BUF_BLOCK_FILE_PAGE);
423  block->page.id = page_id;
424}
425
426/** Gets the io_fix state of a block.
427 @return io_fix state */
428UNIV_INLINE
429enum buf_io_fix buf_page_get_io_fix(
430    const buf_page_t *bpage) /*!< in: pointer to the control block */
431{
432  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
433  return buf_page_get_io_fix_unlocked(bpage);
434}
435
436/** Gets the io_fix state of a buffer page. Does not assert that the
437buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
438not to hold it.
439@param[in]	bpage	pointer to the buffer page
440@return page io_fix state */
441UNIV_INLINE
442enum buf_io_fix buf_page_get_io_fix_unlocked(const buf_page_t *bpage) {
443  ut_ad(bpage != nullptr);
444
445  enum buf_io_fix io_fix = bpage->io_fix;
446
447#ifdef UNIV_DEBUG
448  switch (io_fix) {
449    case BUF_IO_NONE:
450    case BUF_IO_READ:
451    case BUF_IO_WRITE:
452    case BUF_IO_PIN:
453      return (io_fix);
454  }
455  ut_error;
456#else  /* UNIV_DEBUG */
457  return (io_fix);
458#endif /* UNIV_DEBUG */
459}
460
461/** Gets the io_fix state of a block.
462 @return io_fix state */
463UNIV_INLINE
464enum buf_io_fix buf_block_get_io_fix(
465    const buf_block_t *block) /*!< in: pointer to the control block */
466{
467  return (buf_page_get_io_fix(&block->page));
468}
469
470/** Gets the io_fix state of a buffer block. Does not assert that the
471buf_page_get_mutex() mutex is held, to be used in the cases where it is safe
472not to hold it.
473@param[in]	block	pointer to the buffer block
474@return page io_fix state */
475UNIV_INLINE
476enum buf_io_fix buf_block_get_io_fix_unlocked(const buf_block_t *block) {
477  return (buf_page_get_io_fix_unlocked(&block->page));
478}
479
480/** Sets the io_fix state of a block. */
481UNIV_INLINE
482void buf_page_set_io_fix(buf_page_t *bpage,      /*!< in/out: control block */
483                         enum buf_io_fix io_fix) /*!< in: io_fix state */
484{
485  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
486
487  bpage->io_fix = io_fix;
488  ut_ad(buf_page_get_io_fix(bpage) == io_fix);
489}
490
491/** Sets the io_fix state of a block. */
492UNIV_INLINE
493void buf_block_set_io_fix(buf_block_t *block,     /*!< in/out: control block */
494                          enum buf_io_fix io_fix) /*!< in: io_fix state */
495{
496  buf_page_set_io_fix(&block->page, io_fix);
497}
498
499/** Makes a block sticky. A sticky block implies that even after we release
500the buf_pool->LRU_list_mutex and the block->mutex:
501* it cannot be removed from the flush_list
502* the block descriptor cannot be relocated
503* it cannot be removed from the LRU list
504Note that:
505* the block can still change its position in the LRU list
506* the next and previous pointers can change.
507@param[in,out]	bpage	control block */
508UNIV_INLINE
509void buf_page_set_sticky(buf_page_t *bpage) {
510#ifdef UNIV_DEBUG
511  buf_pool_t *buf_pool = buf_pool_from_bpage(bpage);
512  ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
513#endif /* UNIV_DEBUG */
514
515  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
516  ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_NONE);
517  ut_ad(bpage->in_LRU_list);
518
519  bpage->io_fix = BUF_IO_PIN;
520}
521
522/** Removes stickiness of a block. */
523UNIV_INLINE
524void buf_page_unset_sticky(buf_page_t *bpage) /*!< in/out: control block */
525{
526  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
527  ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_PIN);
528
529  bpage->io_fix = BUF_IO_NONE;
530}
531
532/** Determine if a buffer block can be relocated in memory.  The block
533 can be dirty, but it must not be I/O-fixed or bufferfixed. */
534UNIV_INLINE
535ibool buf_page_can_relocate(
536    const buf_page_t *bpage) /*!< control block being relocated */
537{
538  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
539  ut_ad(buf_page_in_file(bpage));
540  ut_ad(bpage->in_LRU_list);
541
542  return (buf_page_get_io_fix(bpage) == BUF_IO_NONE &&
543          bpage->buf_fix_count == 0);
544}
545
546/** Determine if a block has been flagged old.
547@param[in]	bpage	control block
548@return true if old */
549UNIV_INLINE
550ibool buf_page_is_old(const buf_page_t *bpage) {
551#ifdef UNIV_DEBUG
552  buf_pool_t *buf_pool = buf_pool_from_bpage(bpage);
553  /* Buffer page mutex is not strictly required here for heuristic
554  purposes even if LRU mutex is not being held.  Keep the assertion
555  for now since all the callers hold it.  */
556  ut_ad(mutex_own(buf_page_get_mutex(bpage)) ||
557        mutex_own(&buf_pool->LRU_list_mutex));
558#endif /* UNIV_DEBUG */
559  ut_ad(buf_page_in_file(bpage));
560
561  return (bpage->old);
562}
563
564/** Flag a block old.
565@param[in]	bpage	control block
566@param[in]	old	old */
567UNIV_INLINE
568void buf_page_set_old(buf_page_t *bpage, ibool old) {
569#ifdef UNIV_DEBUG
570  buf_pool_t *buf_pool = buf_pool_from_bpage(bpage);
571#endif /* UNIV_DEBUG */
572  ut_a(buf_page_in_file(bpage));
573  ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
574  ut_ad(bpage->in_LRU_list);
575
576#ifdef UNIV_LRU_DEBUG
577  ut_a((buf_pool->LRU_old_len == 0) == (buf_pool->LRU_old == NULL));
578  /* If a block is flagged "old", the LRU_old list must exist. */
579  ut_a(!old || buf_pool->LRU_old);
580
581  if (UT_LIST_GET_PREV(LRU, bpage) && UT_LIST_GET_NEXT(LRU, bpage)) {
582    const buf_page_t *prev = UT_LIST_GET_PREV(LRU, bpage);
583    const buf_page_t *next = UT_LIST_GET_NEXT(LRU, bpage);
584    if (prev->old == next->old) {
585      ut_a(prev->old == old);
586    } else {
587      ut_a(!prev->old);
588      ut_a(buf_pool->LRU_old == (old ? bpage : next));
589    }
590  }
591#endif /* UNIV_LRU_DEBUG */
592
593  bpage->old = old;
594}
595
596/** Determine the time of first access of a block in the buffer pool.
597 @return ut_time_monotonic_ms() at the time of first access, 0 if not accessed
598 */
599UNIV_INLINE
600unsigned buf_page_is_accessed(const buf_page_t *bpage) /*!< in: control block */
601{
602  ut_ad(buf_page_in_file(bpage));
603
604  return (bpage->access_time);
605}
606
607/** Flag a block accessed. */
608UNIV_INLINE
609void buf_page_set_accessed(buf_page_t *bpage) /*!< in/out: control block */
610{
611  ut_ad(mutex_own(buf_page_get_mutex(bpage)));
612
613  ut_a(buf_page_in_file(bpage));
614
615  if (bpage->access_time == 0) {
616    /* Make this the time of the first access. */
617    bpage->access_time = static_cast<uint>(ut_time_monotonic_ms());
618  }
619}
620#endif /* !UNIV_HOTBACKUP */
621
622/** Gets the buf_block_t handle of a buffered file block if an uncompressed
623page frame exists, or NULL. page frame exists, or NULL. The caller must hold
624either the appropriate hash lock in any mode, either the LRU list mutex. Note:
625even though bpage is not declared a const we don't update its value. It is safe
626to make this pure.
627@param[in]	bpage	control block, or NULL
628@return control block, or NULL */
629UNIV_INLINE
630buf_block_t *buf_page_get_block(buf_page_t *bpage) {
631  if (bpage != nullptr) {
632#ifndef UNIV_HOTBACKUP
633#ifdef UNIV_DEBUG
634    buf_pool_t *buf_pool = buf_pool_from_bpage(bpage);
635    ut_ad(buf_page_hash_lock_held_s_or_x(buf_pool, bpage) ||
636          mutex_own(&buf_pool->LRU_list_mutex));
637#endif /* UNIV_DEBUG */
638#endif /* !UNIV_HOTBACKUP */
639    ut_ad(buf_page_in_file(bpage));
640
641    if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
642      return ((buf_block_t *)bpage);
643    }
644  }
645
646  return (nullptr);
647}
648
649#ifndef UNIV_HOTBACKUP
650#ifdef UNIV_DEBUG
651/** Gets a pointer to the memory frame of a block.
652 @return pointer to the frame */
653UNIV_INLINE
654buf_frame_t *buf_block_get_frame(
655    const buf_block_t *block) /*!< in: pointer to the control block */
656{
657  ut_ad(block);
658
659  switch (buf_block_get_state(block)) {
660    case BUF_BLOCK_POOL_WATCH:
661    case BUF_BLOCK_ZIP_PAGE:
662    case BUF_BLOCK_ZIP_DIRTY:
663    case BUF_BLOCK_NOT_USED:
664      ut_error;
665      break;
666    case BUF_BLOCK_FILE_PAGE:
667      ut_a(block->page.buf_fix_count > 0);
668      /* fall through */
669    case BUF_BLOCK_READY_FOR_USE:
670    case BUF_BLOCK_MEMORY:
671    case BUF_BLOCK_REMOVE_HASH:
672      goto ok;
673  }
674  ut_error;
675ok:
676  return ((buf_frame_t *)block->frame);
677}
678#endif /* UNIV_DEBUG */
679#endif /* !UNIV_HOTBACKUP */
680
681/***********************************************************************
682FIXME_FTS Gets the frame the pointer is pointing to. */
683UNIV_INLINE
684buf_frame_t *buf_frame_align(
685    /* out: pointer to frame */
686    byte *ptr) /* in: pointer to a frame */
687{
688  buf_frame_t *frame;
689
690  ut_ad(ptr);
691
692  frame = (buf_frame_t *)ut_align_down(ptr, UNIV_PAGE_SIZE);
693
694  return (frame);
695}
696
697/** Gets the space id, page offset, and byte offset within page of a
698 pointer pointing to a buffer frame containing a file page. */
699UNIV_INLINE
700void buf_ptr_get_fsp_addr(
701    const void *ptr,   /*!< in: pointer to a buffer frame */
702    space_id_t *space, /*!< out: space id */
703    fil_addr_t *addr)  /*!< out: page offset and byte offset */
704{
705  const page_t *page = (const page_t *)ut_align_down(ptr, UNIV_PAGE_SIZE);
706
707  *space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
708  addr->page = mach_read_from_4(page + FIL_PAGE_OFFSET);
709  addr->boffset = static_cast<uint32_t>(ut_align_offset(ptr, UNIV_PAGE_SIZE));
710}
711
712#ifndef UNIV_HOTBACKUP
713/** Gets the hash value of the page the pointer is pointing to. This can be used
714 in searches in the lock hash table.
715 @return lock hash value */
716UNIV_INLINE
717ulint buf_block_get_lock_hash_val(const buf_block_t *block) /*!< in: block */
718{
719  ut_ad(block);
720  ut_ad(buf_page_in_file(&block->page));
721  ut_ad(rw_lock_own(&(((buf_block_t *)block)->lock), RW_LOCK_X) ||
722        rw_lock_own(&(((buf_block_t *)block)->lock), RW_LOCK_S));
723
724  return (block->lock_hash_val);
725}
726
727/** Allocates a buf_page_t descriptor. This function must succeed. In case
728 of failure we assert in this function.
729 @return: the allocated descriptor. */
730UNIV_INLINE
731buf_page_t *buf_page_alloc_descriptor(void) {
732  buf_page_t *bpage;
733
734  bpage = (buf_page_t *)ut_zalloc_nokey(sizeof *bpage);
735  ut_ad(bpage);
736  UNIV_MEM_ALLOC(bpage, sizeof *bpage);
737
738  return (bpage);
739}
740
741/** Free a buf_page_t descriptor. */
742UNIV_INLINE
743void buf_page_free_descriptor(
744    buf_page_t *bpage) /*!< in: bpage descriptor to free. */
745{
746  ut_free(bpage);
747}
748
749/** Frees a buffer block which does not contain a file page. */
750UNIV_INLINE
751void buf_block_free(buf_block_t *block) /*!< in, own: block to be freed */
752{
753  ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE);
754
755  buf_LRU_block_free_non_file_page(block);
756}
757#endif /* !UNIV_HOTBACKUP */
758
759/** Copies contents of a buffer frame to a given buffer.
760 @return buf */
761UNIV_INLINE
762byte *buf_frame_copy(byte *buf,                /*!< in: buffer to copy to */
763                     const buf_frame_t *frame) /*!< in: buffer frame */
764{
765  ut_ad(buf && frame);
766
767  ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
768
769  return (buf);
770}
771
772#ifndef UNIV_HOTBACKUP
773/** Gets the youngest modification log sequence number for a frame.
774 Returns zero if not file page or no modification occurred yet.
775 @return newest modification to page */
776UNIV_INLINE
777lsn_t buf_page_get_newest_modification(
778    const buf_page_t *bpage) /*!< in: block containing the
779                             page frame */
780{
781  lsn_t lsn;
782  BPageMutex *block_mutex = buf_page_get_mutex(bpage);
783
784  mutex_enter(block_mutex);
785
786  if (buf_page_in_file(bpage)) {
787    lsn = bpage->newest_modification;
788  } else {
789    lsn = 0;
790  }
791
792  mutex_exit(block_mutex);
793
794  return (lsn);
795}
796
797/** Increment the modify clock.
798The caller must
799(1) block bufferfix count has to be zero,
800(2) own X or SX latch on the block->lock, or
801(3) operate on a thread-private temporary table
802@param[in,out]	block	buffer block */
803UNIV_INLINE
804void buf_block_modify_clock_inc(buf_block_t *block) {
805#ifdef UNIV_DEBUG
806  buf_pool_t *buf_pool = buf_pool_from_bpage(&block->page);
807#endif /* UNIV_DEBUG */
808  assert_block_ahi_valid(block);
809
810  /* No block latch is acquired for internal temporary tables. */
811  ut_ad(fsp_is_system_temporary(block->page.id.space()) ||
812        (block->page.buf_fix_count == 0 &&
813         mutex_own(&buf_pool->LRU_list_mutex)) ||
814        rw_lock_own_flagged(&block->lock, RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX));
815
816  block->modify_clock++;
817}
818
819/** Read the modify clock.
820@param[in]	block	buffer block
821@return modify_clock value */
822UNIV_INLINE
823ib_uint64_t buf_block_get_modify_clock(const buf_block_t *block) {
824  /* No block latch is acquired for internal temporary tables. */
825  ut_ad(fsp_is_system_temporary(block->page.id.space()) ||
826        rw_lock_own_flagged(&block->lock,
827                            RW_LOCK_FLAG_X | RW_LOCK_FLAG_SX | RW_LOCK_FLAG_S));
828
829  return (block->modify_clock);
830}
831
832/** Increments the bufferfix count.
833@param[in,out]	bpage	block to bufferfix
834@return the count */
835UNIV_INLINE
836ulint buf_block_fix(buf_page_t *bpage) {
837  auto count = os_atomic_increment_uint32(&bpage->buf_fix_count, 1);
838  ut_ad(count > 0);
839  return (count);
840}
841
842/** Increments the bufferfix count.
843@param[in,out]	block	block to bufferfix
844@return the count */
845UNIV_INLINE
846ulint buf_block_fix(buf_block_t *block) {
847  return (buf_block_fix(&block->page));
848}
849
850/** Increments the bufferfix count. */
851UNIV_INLINE
852void buf_block_buf_fix_inc_func(
853#ifdef UNIV_DEBUG
854    const char *file,   /*!< in: file name */
855    ulint line,         /*!< in: line */
856#endif                  /* UNIV_DEBUG */
857    buf_block_t *block) /*!< in/out: block to bufferfix */
858{
859#ifdef UNIV_DEBUG
860  /* No debug latch is acquired if block belongs to system temporary.
861  Debug latch is not of much help if access to block is single
862  threaded. */
863  if (!fsp_is_system_temporary(block->page.id.space())) {
864    ibool ret;
865    ret = rw_lock_s_lock_nowait(&block->debug_latch, file, line);
866    ut_a(ret);
867  }
868#endif /* UNIV_DEBUG */
869
870  buf_block_fix(block);
871}
872
873/** Decrements the bufferfix count.
874@param[in,out]	bpage	block to bufferunfix
875@return	the remaining buffer-fix count */
876UNIV_INLINE
877ulint buf_block_unfix(buf_page_t *bpage) {
878  ut_ad(!mutex_own(buf_page_get_mutex(bpage)));
879  const auto count = os_atomic_decrement_uint32(&bpage->buf_fix_count, 1);
880  static_assert(std::is_unsigned<decltype(count)>::value, "Must be unsigned");
881  ut_ad(count != std::numeric_limits<decltype(count)>::max());
882  return (count);
883}
884
885/** Decrements the bufferfix count.
886@param[in,out]	block	block to bufferunfix
887@return the remaining buffer-fix count */
888UNIV_INLINE
889ulint buf_block_unfix(buf_block_t *block) {
890  return (buf_block_unfix(&block->page));
891}
892
893/** Decrements the bufferfix count. */
894UNIV_INLINE
895void buf_block_buf_fix_dec(
896    buf_block_t *block) /*!< in/out: block to bufferunfix */
897{
898  buf_block_unfix(block);
899
900#ifdef UNIV_DEBUG
901  /* No debug latch is acquired if block belongs to system temporary.
902  Debug latch is not of much help if access to block is single
903  threaded. */
904  if (!fsp_is_system_temporary(block->page.id.space())) {
905    rw_lock_s_unlock(&block->debug_latch);
906  }
907#endif /* UNIV_DEBUG */
908}
909
910/** Returns the buffer pool instance given a page id.
911@param[in]	page_id	page id
912@return buffer pool */
913UNIV_INLINE
914buf_pool_t *buf_pool_get(const page_id_t &page_id) {
915  /* 2log of BUF_READ_AHEAD_AREA (64) */
916  page_no_t ignored_page_no = page_id.page_no() >> 6;
917
918  page_id_t id(page_id.space(), ignored_page_no);
919
920  ulint i = id.fold() % srv_buf_pool_instances;
921
922  return (&buf_pool_ptr[i]);
923}
924
925/** Returns the buffer pool instance given its array index
926 @return buffer pool */
927UNIV_INLINE
928buf_pool_t *buf_pool_from_array(ulint index) /*!< in: array index to get
929                                             buffer pool instance from */
930{
931  ut_ad(index < MAX_BUFFER_POOLS);
932  ut_ad(index < srv_buf_pool_instances);
933  return (&buf_pool_ptr[index]);
934}
935
936/** Returns the control block of a file page, NULL if not found.
937@param[in]	buf_pool	buffer pool instance
938@param[in]	page_id		page id
939@return block, NULL if not found */
940UNIV_INLINE
941buf_page_t *buf_page_hash_get_low(buf_pool_t *buf_pool,
942                                  const page_id_t &page_id) {
943  buf_page_t *bpage;
944
945#ifdef UNIV_DEBUG
946  rw_lock_t *hash_lock;
947
948  hash_lock = hash_get_lock(buf_pool->page_hash, page_id.fold());
949  ut_ad(rw_lock_own(hash_lock, RW_LOCK_X) || rw_lock_own(hash_lock, RW_LOCK_S));
950#endif /* UNIV_DEBUG */
951
952  /* Look for the page in the hash table */
953
954  HASH_SEARCH(hash, buf_pool->page_hash, page_id.fold(), buf_page_t *, bpage,
955              ut_ad(bpage->in_page_hash && !bpage->in_zip_hash &&
956                    buf_page_in_file(bpage)),
957              page_id == bpage->id);
958  if (bpage) {
959    ut_a(buf_page_in_file(bpage));
960    ut_ad(bpage->in_page_hash);
961    ut_ad(!bpage->in_zip_hash);
962    ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
963  }
964
965  return (bpage);
966}
967
968/** Returns the control block of a file page, NULL if not found.
969If the block is found and lock is not NULL then the appropriate
970page_hash lock is acquired in the specified lock mode. Otherwise,
971mode value is ignored. It is up to the caller to release the
972lock. If the block is found and the lock is NULL then the page_hash
973lock is released by this function.
974@param[in]	buf_pool	buffer pool instance
975@param[in]	page_id		page id
976@param[in,out]	lock		lock of the page hash acquired if bpage is
977found, NULL otherwise. If NULL is passed then the hash_lock is released by
978this function.
979@param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
980lock == NULL
981@param[in]	watch		if true, return watch sentinel also.
982@return pointer to the bpage or NULL; if NULL, lock is also NULL or
983a watch sentinel. */
984UNIV_INLINE
985buf_page_t *buf_page_hash_get_locked(buf_pool_t *buf_pool,
986                                     const page_id_t &page_id, rw_lock_t **lock,
987                                     ulint lock_mode, bool watch) {
988  buf_page_t *bpage = nullptr;
989  rw_lock_t *hash_lock;
990  ulint mode = RW_LOCK_S;
991
992  if (lock != nullptr) {
993    *lock = nullptr;
994    ut_ad(lock_mode == RW_LOCK_X || lock_mode == RW_LOCK_S);
995    mode = lock_mode;
996  }
997
998  hash_lock = hash_get_lock(buf_pool->page_hash, page_id.fold());
999
1000  ut_ad(!rw_lock_own(hash_lock, RW_LOCK_X) &&
1001        !rw_lock_own(hash_lock, RW_LOCK_S));
1002
1003  if (mode == RW_LOCK_S) {
1004    rw_lock_s_lock(hash_lock);
1005
1006    /* If not own LRU_list_mutex, page_hash can be changed. */
1007    hash_lock =
1008        hash_lock_s_confirm(hash_lock, buf_pool->page_hash, page_id.fold());
1009  } else {
1010    rw_lock_x_lock(hash_lock);
1011    /* If not own LRU_list_mutex, page_hash can be changed. */
1012    hash_lock =
1013        hash_lock_x_confirm(hash_lock, buf_pool->page_hash, page_id.fold());
1014  }
1015
1016  bpage = buf_page_hash_get_low(buf_pool, page_id);
1017
1018  if (!bpage || buf_pool_watch_is_sentinel(buf_pool, bpage)) {
1019    if (!watch) {
1020      bpage = nullptr;
1021    }
1022    goto unlock_and_exit;
1023  }
1024
1025  ut_ad(buf_page_in_file(bpage));
1026  ut_ad(page_id == bpage->id);
1027
1028  if (lock == nullptr) {
1029    /* The caller wants us to release the page_hash lock */
1030    goto unlock_and_exit;
1031  } else {
1032    /* To be released by the caller */
1033    *lock = hash_lock;
1034    goto exit;
1035  }
1036
1037unlock_and_exit:
1038  if (mode == RW_LOCK_S) {
1039    rw_lock_s_unlock(hash_lock);
1040  } else {
1041    rw_lock_x_unlock(hash_lock);
1042  }
1043exit:
1044  return (bpage);
1045}
1046
1047/** Returns the control block of a file page, NULL if not found.
1048If the block is found and lock is not NULL then the appropriate
1049page_hash lock is acquired in the specified lock mode. Otherwise,
1050mode value is ignored. It is up to the caller to release the
1051lock. If the block is found and the lock is NULL then the page_hash
1052lock is released by this function.
1053@param[in]	buf_pool	buffer pool instance
1054@param[in]	page_id		page id
1055@param[in,out]	lock		lock of the page hash acquired if bpage is
1056found, NULL otherwise. If NULL is passed then the hash_lock is released by
1057this function.
1058@param[in]	lock_mode	RW_LOCK_X or RW_LOCK_S. Ignored if
1059lock == NULL
1060@return pointer to the block or NULL; if NULL, lock is also NULL. */
1061UNIV_INLINE
1062buf_block_t *buf_block_hash_get_locked(buf_pool_t *buf_pool,
1063                                       const page_id_t &page_id,
1064                                       rw_lock_t **lock, ulint lock_mode) {
1065  buf_page_t *bpage =
1066      buf_page_hash_get_locked(buf_pool, page_id, lock, lock_mode);
1067  buf_block_t *block = buf_page_get_block(bpage);
1068
1069  if (block != nullptr) {
1070    ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
1071    ut_ad(!lock || rw_lock_own(*lock, lock_mode));
1072
1073    return (block);
1074  } else if (bpage) {
1075    /* It is not a block. Just a bpage */
1076    ut_ad(buf_page_in_file(bpage));
1077
1078    if (lock) {
1079      if (lock_mode == RW_LOCK_S) {
1080        rw_lock_s_unlock(*lock);
1081      } else {
1082        rw_lock_x_unlock(*lock);
1083      }
1084    }
1085    *lock = nullptr;
1086    return (nullptr);
1087  }
1088
1089  ut_ad(!bpage);
1090  ut_ad(lock == nullptr || *lock == nullptr);
1091  return (nullptr);
1092}
1093
1094/** Returns TRUE if the page can be found in the buffer pool hash table.
1095NOTE that it is possible that the page is not yet read from disk,
1096though.
1097@param[in]	page_id	page id
1098@return true if found in the page hash table */
1099UNIV_INLINE
1100ibool buf_page_peek(const page_id_t &page_id) {
1101  buf_pool_t *buf_pool = buf_pool_get(page_id);
1102
1103  return (buf_page_hash_get(buf_pool, page_id) != nullptr);
1104}
1105
1106/** Releases a compressed-only page acquired with buf_page_get_zip(). */
1107UNIV_INLINE
1108void buf_page_release_zip(buf_page_t *bpage) /*!< in: buffer block */
1109{
1110  ut_ad(bpage);
1111  ut_a(bpage->buf_fix_count > 0);
1112
1113  switch (buf_page_get_state(bpage)) {
1114    case BUF_BLOCK_FILE_PAGE:
1115#ifdef UNIV_DEBUG
1116    {
1117      /* No debug latch is acquired if block belongs to system
1118      temporary. Debug latch is not of much help if access to block
1119      is single threaded. */
1120      buf_block_t *block = reinterpret_cast<buf_block_t *>(bpage);
1121      if (!fsp_is_system_temporary(block->page.id.space())) {
1122        rw_lock_s_unlock(&block->debug_latch);
1123      }
1124    }
1125#endif /* UNIV_DEBUG */
1126       /* Fall through */
1127
1128    case BUF_BLOCK_ZIP_PAGE:
1129    case BUF_BLOCK_ZIP_DIRTY:
1130      buf_block_unfix(reinterpret_cast<buf_block_t *>(bpage));
1131      return;
1132
1133    case BUF_BLOCK_POOL_WATCH:
1134    case BUF_BLOCK_NOT_USED:
1135    case BUF_BLOCK_READY_FOR_USE:
1136    case BUF_BLOCK_MEMORY:
1137    case BUF_BLOCK_REMOVE_HASH:
1138      break;
1139  }
1140
1141  ut_error;
1142}
1143
1144/** Releases a latch, if specified. */
1145UNIV_INLINE
1146void buf_page_release_latch(buf_block_t *block, /*!< in: buffer block */
1147                            ulint rw_latch)     /*!< in: RW_S_LATCH, RW_X_LATCH,
1148                                                RW_NO_LATCH */
1149{
1150#ifdef UNIV_DEBUG
1151  /* No debug latch is acquired if block belongs to system
1152  temporary. Debug latch is not of much help if access to block
1153  is single threaded. */
1154  if (!fsp_is_system_temporary(block->page.id.space())) {
1155    rw_lock_s_unlock(&block->debug_latch);
1156  }
1157#endif /* UNIV_DEBUG */
1158
1159  if (rw_latch == RW_S_LATCH) {
1160    rw_lock_s_unlock(&block->lock);
1161  } else if (rw_latch == RW_SX_LATCH) {
1162    rw_lock_sx_unlock(&block->lock);
1163  } else if (rw_latch == RW_X_LATCH) {
1164    rw_lock_x_unlock(&block->lock);
1165  }
1166}
1167
1168#ifdef UNIV_DEBUG
1169/** Adds latch level info for the rw-lock protecting the buffer frame. This
1170 should be called in the debug version after a successful latching of a
1171 page if we know the latching order level of the acquired latch. */
1172UNIV_INLINE
1173void buf_block_dbg_add_level(
1174    buf_block_t *block,  /*!< in: buffer page
1175                         where we have acquired latch */
1176    latch_level_t level) /*!< in: latching order level */
1177{
1178  sync_check_lock(&block->lock, level);
1179}
1180#endif /* UNIV_DEBUG */
1181
1182/** Get the nth chunk's buffer block in the specified buffer pool.
1183 @return the nth chunk's buffer block. */
1184UNIV_INLINE
1185buf_block_t *buf_get_nth_chunk_block(
1186    const buf_pool_t *buf_pool, /*!< in: buffer pool instance */
1187    ulint n,                    /*!< in: nth chunk in the buffer pool */
1188    ulint *chunk_size)          /*!< in: chunk size */
1189{
1190  const buf_chunk_t *chunk;
1191
1192  chunk = buf_pool->chunks + n;
1193  *chunk_size = chunk->size;
1194  return (chunk->blocks);
1195}
1196
1197/** Verify the possibility that a stored page is not in buffer pool.
1198@param[in]	withdraw_clock	withdraw clock when stored the page
1199@retval true	if the page might be relocated */
1200UNIV_INLINE
1201bool buf_pool_is_obsolete(ulint withdraw_clock) {
1202  return (buf_pool_withdrawing || buf_withdraw_clock != withdraw_clock);
1203}
1204
1205/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
1206if needed.
1207@param[in]	size	size in bytes
1208@return	aligned size */
1209UNIV_INLINE
1210ulint buf_pool_size_align(ulint size) {
1211  const ulint m = srv_buf_pool_instances * srv_buf_pool_chunk_unit;
1212  size = ut_max(size, srv_buf_pool_min_size);
1213
1214  if (size % m == 0) {
1215    return (size);
1216  } else {
1217    return ((size / m + 1) * m);
1218  }
1219}
1220
1221/** Return how many more pages must be added to the withdraw list to reach the
1222withdraw target of the currently ongoing buffer pool resize.
1223@param[in]	buf_pool	buffer pool instance
1224@return page count to be withdrawn or zero if the target is already achieved or
1225if the buffer pool is not currently being resized. */
1226UNIV_INLINE
1227ulint buf_get_withdraw_depth(buf_pool_t *buf_pool) {
1228  os_rmb;
1229  if (buf_pool->curr_size >= buf_pool->old_size) return 0;
1230  mutex_enter(&buf_pool->free_list_mutex);
1231  ulint withdraw_len = UT_LIST_GET_LEN(buf_pool->withdraw);
1232  mutex_exit(&buf_pool->free_list_mutex);
1233  return (buf_pool->withdraw_target > withdraw_len
1234              ? buf_pool->withdraw_target - withdraw_len
1235              : 0);
1236}
1237
1238#endif /* !UNIV_HOTBACKUP */
1239