1 /*****************************************************************************
2
3 Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2013, 2021, MariaDB Corporation.
5
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17
18 *****************************************************************************/
19
20 /**************************************************//**
21 @file include/buf0buf.h
22 The database buffer pool high-level routines
23
24 Created 11/5/1995 Heikki Tuuri
25 *******************************************************/
26
27 #ifndef buf0buf_h
28 #define buf0buf_h
29
30 /** Magic value to use instead of checksums when they are disabled */
31 #define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
32
33 #include "fil0fil.h"
34 #include "mtr0types.h"
35 #include "buf0types.h"
36 #include "span.h"
37 #include "assume_aligned.h"
38 #ifndef UNIV_INNOCHECKSUM
39 #include "hash0hash.h"
40 #include "ut0byte.h"
41 #include "page0types.h"
42 #include "log0log.h"
43 #include "srv0srv.h"
44 #include <ostream>
45
46 // Forward declaration
47 struct fil_addr_t;
48
49 /** @name Modes for buf_page_get_gen */
50 /* @{ */
51 #define BUF_GET 10 /*!< get always */
52 #define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
53 #define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make
54 the block young in the LRU list */
55 #define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but
56 set no latch; we have
57 separated this case, because
58 it is error-prone programming
59 not to set a latch, and it
60 should be used with care */
61 #define BUF_GET_IF_IN_POOL_OR_WATCH 15
62 /*!< Get the page only if it's in the
63 buffer pool, if not then set a watch
64 on the page. */
65 #define BUF_GET_POSSIBLY_FREED 16
66 /*!< Like BUF_GET, but do not mind
67 if the file page has been freed. */
68 #define BUF_EVICT_IF_IN_POOL 20 /*!< evict a clean block if found */
69 /* @} */
70
71 /** If LRU list of a buf_pool is less than this size then LRU eviction
72 should not happen. This is because when we do LRU flushing we also put
73 the blocks on free list. If LRU list is very small then we can end up
74 in thrashing. */
75 #define BUF_LRU_MIN_LEN 256
76
77 # ifdef UNIV_DEBUG
78 extern my_bool buf_disable_resize_buffer_pool_debug; /*!< if TRUE, resizing
79 buffer pool is not allowed. */
80 # endif /* UNIV_DEBUG */
81
82 /** buf_page_t::state() values, distinguishing buf_page_t and buf_block_t */
83 enum buf_page_state
84 {
85 /** available in buf_pool.free or buf_pool.watch */
86 BUF_BLOCK_NOT_USED,
87 /** allocated for something else than a file page */
88 BUF_BLOCK_MEMORY,
89 /** a previously allocated file page, in transit to NOT_USED */
90 BUF_BLOCK_REMOVE_HASH,
91 /** a buf_block_t that is also in buf_pool.LRU */
92 BUF_BLOCK_FILE_PAGE,
93 /** the buf_page_t of a ROW_FORMAT=COMPRESSED page
94 whose uncompressed page frame has been evicted */
95 BUF_BLOCK_ZIP_PAGE
96 };
97
98 /** This structure defines information we will fetch from each buffer pool. It
99 will be used to print table IO stats */
100 struct buf_pool_info_t
101 {
102 /* General buffer pool info */
103 ulint pool_size; /*!< Buffer Pool size in pages */
104 ulint lru_len; /*!< Length of buf_pool.LRU */
105 ulint old_lru_len; /*!< buf_pool.LRU_old_len */
106 ulint free_list_len; /*!< Length of buf_pool.free list */
107 ulint flush_list_len; /*!< Length of buf_pool.flush_list */
108 ulint n_pend_unzip; /*!< buf_pool.n_pend_unzip, pages
109 pending decompress */
110 ulint n_pend_reads; /*!< buf_pool.n_pend_reads, pages
111 pending read */
112 ulint n_pending_flush_lru; /*!< Pages pending flush in LRU */
113 ulint n_pending_flush_list; /*!< Pages pending flush in FLUSH
114 LIST */
115 ulint n_pages_made_young; /*!< number of pages made young */
116 ulint n_pages_not_made_young; /*!< number of pages not made young */
117 ulint n_pages_read; /*!< buf_pool.n_pages_read */
118 ulint n_pages_created; /*!< buf_pool.n_pages_created */
119 ulint n_pages_written; /*!< buf_pool.n_pages_written */
120 ulint n_page_gets; /*!< buf_pool.n_page_gets */
121 ulint n_ra_pages_read_rnd; /*!< buf_pool.n_ra_pages_read_rnd,
122 number of pages readahead */
123 ulint n_ra_pages_read; /*!< buf_pool.n_ra_pages_read, number
124 of pages readahead */
125 ulint n_ra_pages_evicted; /*!< buf_pool.n_ra_pages_evicted,
126 number of readahead pages evicted
127 without access */
128 ulint n_page_get_delta; /*!< num of buffer pool page gets since
129 last printout */
130
131 /* Buffer pool access stats */
132 double page_made_young_rate; /*!< page made young rate in pages
133 per second */
134 double page_not_made_young_rate;/*!< page not made young rate
135 in pages per second */
136 double pages_read_rate; /*!< num of pages read per second */
137 double pages_created_rate; /*!< num of pages create per second */
138 double pages_written_rate; /*!< num of pages written per second */
139 ulint page_read_delta; /*!< num of pages read since last
140 printout */
141 ulint young_making_delta; /*!< num of pages made young since
142 last printout */
143 ulint not_young_making_delta; /*!< num of pages not make young since
144 last printout */
145
146 /* Statistics about read ahead algorithm. */
147 double pages_readahead_rnd_rate;/*!< random readahead rate in pages per
148 second */
149 double pages_readahead_rate; /*!< readahead rate in pages per
150 second */
151 double pages_evicted_rate; /*!< rate of readahead page evicted
152 without access, in pages per second */
153
154 /* Stats about LRU eviction */
155 ulint unzip_lru_len; /*!< length of buf_pool.unzip_LRU
156 list */
157 /* Counters for LRU policy */
158 ulint io_sum; /*!< buf_LRU_stat_sum.io */
159 ulint io_cur; /*!< buf_LRU_stat_cur.io, num of IO
160 for current interval */
161 ulint unzip_sum; /*!< buf_LRU_stat_sum.unzip */
162 ulint unzip_cur; /*!< buf_LRU_stat_cur.unzip, num
163 pages decompressed in current
164 interval */
165 };
166 #endif /* !UNIV_INNOCHECKSUM */
167
168 /** Print the given page_id_t object.
169 @param[in,out] out the output stream
170 @param[in] page_id the page_id_t object to be printed
171 @return the output stream */
172 std::ostream&
173 operator<<(
174 std::ostream& out,
175 const page_id_t page_id);
176
177 #ifndef UNIV_INNOCHECKSUM
178 /*********************************************************************//**
179 Gets the current size of buffer buf_pool in bytes.
180 @return size in bytes */
181 UNIV_INLINE
182 ulint
183 buf_pool_get_curr_size(void);
184 /*========================*/
185
186 /********************************************************************//**
187 Allocates a buf_page_t descriptor. This function must succeed. In case
188 of failure we assert in this function. */
189 UNIV_INLINE
190 buf_page_t*
191 buf_page_alloc_descriptor(void)
192 /*===========================*/
193 MY_ATTRIBUTE((malloc));
194 /********************************************************************//**
195 Free a buf_page_t descriptor. */
196 UNIV_INLINE
197 void
198 buf_page_free_descriptor(
199 /*=====================*/
200 buf_page_t* bpage) /*!< in: bpage descriptor to free. */
201 MY_ATTRIBUTE((nonnull));
202
203 /** Allocate a buffer block.
204 @return own: the allocated block, in state BUF_BLOCK_MEMORY */
205 inline buf_block_t *buf_block_alloc();
206 /********************************************************************//**
207 Frees a buffer block which does not contain a file page. */
208 UNIV_INLINE
209 void
210 buf_block_free(
211 /*===========*/
212 buf_block_t* block); /*!< in, own: block to be freed */
213
214 /**************************************************************//**
215 NOTE! The following macros should be used instead of buf_page_get_gen,
216 to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed
217 in LA! */
218 #define buf_page_get(ID, SIZE, LA, MTR) \
219 buf_page_get_gen(ID, SIZE, LA, NULL, BUF_GET, __FILE__, __LINE__, MTR)
220
221 /**************************************************************//**
222 Use these macros to bufferfix a page with no latching. Remember not to
223 read the contents of the page unless you know it is safe. Do not modify
224 the contents of the page! We have separated this case, because it is
225 error-prone programming not to set a latch, and it should be used
226 with care. */
227 #define buf_page_get_with_no_latch(ID, SIZE, MTR) \
228 buf_page_get_gen(ID, SIZE, RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, \
229 __FILE__, __LINE__, MTR)
230 /********************************************************************//**
231 This is the general function used to get optimistic access to a database
232 page.
233 @return TRUE if success */
234 ibool
235 buf_page_optimistic_get(
236 /*====================*/
237 ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
238 buf_block_t* block, /*!< in: guessed block */
239 ib_uint64_t modify_clock,/*!< in: modify clock value */
240 const char* file, /*!< in: file name */
241 unsigned line, /*!< in: line where called */
242 mtr_t* mtr); /*!< in: mini-transaction */
243
244 /** Given a tablespace id and page number tries to get that page. If the
245 page is not in the buffer pool it is not loaded and NULL is returned.
246 Suitable for using when holding the lock_sys_t::mutex.
247 @param[in] page_id page id
248 @param[in] file file name
249 @param[in] line line where called
250 @param[in] mtr mini-transaction
251 @return pointer to a page or NULL */
252 buf_block_t*
253 buf_page_try_get_func(
254 const page_id_t page_id,
255 const char* file,
256 unsigned line,
257 mtr_t* mtr);
258
259 /** Tries to get a page.
260 If the page is not in the buffer pool it is not loaded. Suitable for using
261 when holding the lock_sys_t::mutex.
262 @param[in] page_id page identifier
263 @param[in] mtr mini-transaction
264 @return the page if in buffer pool, NULL if not */
265 #define buf_page_try_get(page_id, mtr) \
266 buf_page_try_get_func((page_id), __FILE__, __LINE__, mtr);
267
268 /** Get read access to a compressed page (usually of type
269 FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2).
270 The page must be released with buf_page_release_zip().
271 NOTE: the page is not protected by any latch. Mutual exclusion has to
272 be implemented at a higher level. In other words, all possible
273 accesses to a given page through this function must be protected by
274 the same set of mutexes or latches.
275 @param[in] page_id page id
276 @param[in] zip_size ROW_FORMAT=COMPRESSED page size
277 @return pointer to the block */
278 buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size);
279
280 /** Get access to a database page. Buffered redo log may be applied.
281 @param[in] page_id page id
282 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
283 @param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
284 @param[in] guess guessed block or NULL
285 @param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
286 BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
287 @param[in] file file name
288 @param[in] line line where called
289 @param[in] mtr mini-transaction
290 @param[out] err DB_SUCCESS or error code
291 @param[in] allow_ibuf_merge Allow change buffer merge while
292 reading the pages from file.
293 @return pointer to the block or NULL */
294 buf_block_t*
295 buf_page_get_gen(
296 const page_id_t page_id,
297 ulint zip_size,
298 ulint rw_latch,
299 buf_block_t* guess,
300 ulint mode,
301 const char* file,
302 unsigned line,
303 mtr_t* mtr,
304 dberr_t* err = NULL,
305 bool allow_ibuf_merge = false);
306
307 /** This is the low level function used to get access to a database page.
308 @param[in] page_id page id
309 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
310 @param[in] rw_latch RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
311 @param[in] guess guessed block or NULL
312 @param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
313 BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH, or BUF_GET_IF_IN_POOL_OR_WATCH
314 @param[in] file file name
315 @param[in] line line where called
316 @param[in] mtr mini-transaction
317 @param[out] err DB_SUCCESS or error code
318 @param[in] allow_ibuf_merge Allow change buffer merge to happen
319 while reading the page from file
320 then it makes sure that it does merging of change buffer changes while
321 reading the page from file.
322 @return pointer to the block or NULL */
323 buf_block_t*
324 buf_page_get_low(
325 const page_id_t page_id,
326 ulint zip_size,
327 ulint rw_latch,
328 buf_block_t* guess,
329 ulint mode,
330 const char* file,
331 unsigned line,
332 mtr_t* mtr,
333 dberr_t* err,
334 bool allow_ibuf_merge);
335
336 /** Initialize a page in the buffer pool. The page is usually not read
337 from a file even if it cannot be found in the buffer buf_pool. This is one
338 of the functions which perform to a block a state transition NOT_USED =>
339 FILE_PAGE (the other is buf_page_get_gen).
340 @param[in,out] space space object
341 @param[in] offset offset of the tablespace
342 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
343 @param[in,out] mtr mini-transaction
344 @param[in,out] free_block pre-allocated buffer block
345 @return pointer to the block, page bufferfixed */
346 buf_block_t*
347 buf_page_create(fil_space_t *space, uint32_t offset,
348 ulint zip_size, mtr_t *mtr, buf_block_t *free_block);
349
350 /********************************************************************//**
351 Releases a compressed-only page acquired with buf_page_get_zip(). */
352 UNIV_INLINE
353 void
354 buf_page_release_zip(
355 /*=================*/
356 buf_page_t* bpage); /*!< in: buffer block */
357 /********************************************************************//**
358 Releases a latch, if specified. */
359 UNIV_INLINE
360 void
361 buf_page_release_latch(
362 /*=====================*/
363 buf_block_t* block, /*!< in: buffer block */
364 ulint rw_latch); /*!< in: RW_S_LATCH, RW_X_LATCH,
365 RW_NO_LATCH */
366 /** Move a block to the start of the LRU list. */
367 void buf_page_make_young(buf_page_t *bpage);
368 /** Mark the page status as FREED for the given tablespace id and
369 page number. If the page is not in buffer pool then ignore it.
370 @param[in,out] space tablespace
371 @param[in] page page number
372 @param[in,out] mtr mini-transaction
373 @param[in] file file name
374 @param[in] line line where called */
375 void buf_page_free(fil_space_t *space, uint32_t page, mtr_t *mtr,
376 const char *file, unsigned line);
377
378 /********************************************************************//**
379 Reads the freed_page_clock of a buffer block.
380 @return freed_page_clock */
381 UNIV_INLINE
382 unsigned
383 buf_page_get_freed_page_clock(
384 /*==========================*/
385 const buf_page_t* bpage) /*!< in: block */
386 MY_ATTRIBUTE((warn_unused_result));
387 /********************************************************************//**
388 Reads the freed_page_clock of a buffer block.
389 @return freed_page_clock */
390 UNIV_INLINE
391 unsigned
392 buf_block_get_freed_page_clock(
393 /*===========================*/
394 const buf_block_t* block) /*!< in: block */
395 MY_ATTRIBUTE((warn_unused_result));
396
397 /** Determine if a block is still close enough to the MRU end of the LRU list
398 meaning that it is not in danger of getting evicted and also implying
399 that it has been accessed recently.
400 Note that this is for heuristics only and does not reserve buffer pool
401 mutex.
402 @param[in] bpage buffer pool page
403 @return whether bpage is close to MRU end of LRU */
404 inline bool buf_page_peek_if_young(const buf_page_t *bpage);
405
406 /** Determine if a block should be moved to the start of the LRU list if
407 there is danger of dropping from the buffer pool.
408 @param[in] bpage buffer pool page
409 @return true if bpage should be made younger */
410 inline bool buf_page_peek_if_too_old(const buf_page_t *bpage);
411
412 /** Move a page to the start of the buffer pool LRU list if it is too old.
413 @param[in,out] bpage buffer pool page */
buf_page_make_young_if_needed(buf_page_t * bpage)414 inline void buf_page_make_young_if_needed(buf_page_t *bpage)
415 {
416 if (UNIV_UNLIKELY(buf_page_peek_if_too_old(bpage))) {
417 buf_page_make_young(bpage);
418 }
419 }
420
421 /********************************************************************//**
422 Increments the modify clock of a frame by 1. The caller must (1) own the
423 buf_pool.mutex and block bufferfix count has to be zero, (2) or own an x-lock
424 on the block. */
425 UNIV_INLINE
426 void
427 buf_block_modify_clock_inc(
428 /*=======================*/
429 buf_block_t* block); /*!< in: block */
430 /********************************************************************//**
431 Returns the value of the modify clock. The caller must have an s-lock
432 or x-lock on the block.
433 @return value */
434 UNIV_INLINE
435 ib_uint64_t
436 buf_block_get_modify_clock(
437 /*=======================*/
438 buf_block_t* block); /*!< in: block */
439 /*******************************************************************//**
440 Increments the bufferfix count. */
441 UNIV_INLINE
442 void
443 buf_block_buf_fix_inc_func(
444 /*=======================*/
445 # ifdef UNIV_DEBUG
446 const char* file, /*!< in: file name */
447 unsigned line, /*!< in: line */
448 # endif /* UNIV_DEBUG */
449 buf_block_t* block) /*!< in/out: block to bufferfix */
450 MY_ATTRIBUTE((nonnull));
451
452 # ifdef UNIV_DEBUG
453 /** Increments the bufferfix count.
454 @param[in,out] b block to bufferfix
455 @param[in] f file name where requested
456 @param[in] l line number where requested */
457 # define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b)
458 # else /* UNIV_DEBUG */
459 /** Increments the bufferfix count.
460 @param[in,out] b block to bufferfix
461 @param[in] f file name where requested
462 @param[in] l line number where requested */
463 # define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b)
464 # endif /* UNIV_DEBUG */
465 #endif /* !UNIV_INNOCHECKSUM */
466
467 /** Check if a buffer is all zeroes.
468 @param[in] buf data to check
469 @return whether the buffer is all zeroes */
470 bool buf_is_zeroes(st_::span<const byte> buf);
471
472 /** Checks if the page is in crc32 checksum format.
473 @param[in] read_buf database page
474 @param[in] checksum_field1 new checksum field
475 @param[in] checksum_field2 old checksum field
476 @return true if the page is in crc32 checksum format. */
477 bool
478 buf_page_is_checksum_valid_crc32(
479 const byte* read_buf,
480 ulint checksum_field1,
481 ulint checksum_field2)
482 MY_ATTRIBUTE((nonnull(1), warn_unused_result));
483
484 /** Checks if the page is in innodb checksum format.
485 @param[in] read_buf database page
486 @param[in] checksum_field1 new checksum field
487 @param[in] checksum_field2 old checksum field
488 @return true if the page is in innodb checksum format. */
489 bool
490 buf_page_is_checksum_valid_innodb(
491 const byte* read_buf,
492 ulint checksum_field1,
493 ulint checksum_field2)
494 MY_ATTRIBUTE((nonnull(1), warn_unused_result));
495
496 /** Checks if the page is in none checksum format.
497 @param[in] read_buf database page
498 @param[in] checksum_field1 new checksum field
499 @param[in] checksum_field2 old checksum field
500 @return true if the page is in none checksum format. */
501 bool
502 buf_page_is_checksum_valid_none(
503 const byte* read_buf,
504 ulint checksum_field1,
505 ulint checksum_field2)
506 MY_ATTRIBUTE((nonnull(1), warn_unused_result));
507
508 /** Check if a page is corrupt.
509 @param[in] check_lsn whether the LSN should be checked
510 @param[in] read_buf database page
511 @param[in] fsp_flags tablespace flags
512 @return whether the page is corrupted */
513 bool
514 buf_page_is_corrupted(
515 bool check_lsn,
516 const byte* read_buf,
517 ulint fsp_flags)
518 MY_ATTRIBUTE((warn_unused_result));
519
aligned_malloc(size_t size,size_t align)520 inline void *aligned_malloc(size_t size, size_t align)
521 {
522 #ifdef _MSC_VER
523 return _aligned_malloc(size, align);
524 #else
525 void *result;
526 if (posix_memalign(&result, align, size))
527 result= NULL;
528 return result;
529 #endif
530 }
531
aligned_free(void * ptr)532 inline void aligned_free(void *ptr)
533 {
534 #ifdef _MSC_VER
535 _aligned_free(ptr);
536 #else
537 free(ptr);
538 #endif
539 }
540
541 /** Read the key version from the page. In full crc32 format,
542 key version is stored at {0-3th} bytes. In other format, it is
543 stored in 26th position.
544 @param[in] read_buf database page
545 @param[in] fsp_flags tablespace flags
546 @return key version of the page. */
buf_page_get_key_version(const byte * read_buf,ulint fsp_flags)547 inline uint32_t buf_page_get_key_version(const byte* read_buf, ulint fsp_flags)
548 {
549 static_assert(FIL_PAGE_FCRC32_KEY_VERSION == 0, "compatibility");
550 return fil_space_t::full_crc32(fsp_flags)
551 ? mach_read_from_4(my_assume_aligned<4>(read_buf))
552 : mach_read_from_4(my_assume_aligned<2>
553 (read_buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION));
554 }
555
556 /** Read the compression info from the page. In full crc32 format,
557 compression info is at MSB of page type. In other format, it is
558 stored in page type.
559 @param[in] read_buf database page
560 @param[in] fsp_flags tablespace flags
561 @return true if page is compressed. */
buf_page_is_compressed(const byte * read_buf,ulint fsp_flags)562 inline bool buf_page_is_compressed(const byte* read_buf, ulint fsp_flags)
563 {
564 uint16_t page_type= fil_page_get_type(read_buf);
565 return fil_space_t::full_crc32(fsp_flags)
566 ? !!(page_type & 1U << FIL_PAGE_COMPRESS_FCRC32_MARKER)
567 : page_type == FIL_PAGE_PAGE_COMPRESSED;
568 }
569
570 /** Get the compressed or uncompressed size of a full_crc32 page.
571 @param[in] buf page_compressed or uncompressed page
572 @param[out] comp whether the page could be compressed
573 @param[out] cr whether the page could be corrupted
574 @return the payload size in the file page */
buf_page_full_crc32_size(const byte * buf,bool * comp,bool * cr)575 inline uint buf_page_full_crc32_size(const byte* buf, bool* comp, bool* cr)
576 {
577 uint t = fil_page_get_type(buf);
578 uint page_size = uint(srv_page_size);
579
580 if (!(t & 1U << FIL_PAGE_COMPRESS_FCRC32_MARKER)) {
581 return page_size;
582 }
583
584 t &= ~(1U << FIL_PAGE_COMPRESS_FCRC32_MARKER);
585 t <<= 8;
586
587 if (t < page_size) {
588 page_size = t;
589 if (comp) {
590 *comp = true;
591 }
592 } else if (cr) {
593 *cr = true;
594 }
595
596 return page_size;
597 }
598
599 #ifndef UNIV_INNOCHECKSUM
600 /** Dump a page to stderr.
601 @param[in] read_buf database page
602 @param[in] zip_size compressed page size, or 0 */
603 void buf_page_print(const byte* read_buf, ulint zip_size = 0)
604 ATTRIBUTE_COLD __attribute__((nonnull));
605 /********************************************************************//**
606 Decompress a block.
607 @return TRUE if successful */
608 ibool
609 buf_zip_decompress(
610 /*===============*/
611 buf_block_t* block, /*!< in/out: block */
612 ibool check); /*!< in: TRUE=verify the page checksum */
613
614 #ifdef UNIV_DEBUG
615 /** @return the number of latched pages in the buffer pool */
616 ulint buf_get_latched_pages_number();
617 #endif /* UNIV_DEBUG */
618 /*********************************************************************//**
619 Prints info of the buffer i/o. */
620 void
621 buf_print_io(
622 /*=========*/
623 FILE* file); /*!< in: file where to print */
624 /** Collect buffer pool metadata.
625 @param[out] pool_info buffer pool metadata */
626 void buf_stats_get_pool_info(buf_pool_info_t *pool_info);
627
628 /** Refresh the statistics used to print per-second averages. */
629 void buf_refresh_io_stats();
630
631 /** Invalidate all pages in the buffer pool.
632 All pages must be in a replaceable state (not modified or latched). */
633 void buf_pool_invalidate();
634
635 /*========================================================================
636 --------------------------- LOWER LEVEL ROUTINES -------------------------
637 =========================================================================*/
638
639 #ifdef UNIV_DEBUG
640 /*********************************************************************//**
641 Adds latch level info for the rw-lock protecting the buffer frame. This
642 should be called in the debug version after a successful latching of a
643 page if we know the latching order level of the acquired latch. */
644 UNIV_INLINE
645 void
646 buf_block_dbg_add_level(
647 /*====================*/
648 buf_block_t* block, /*!< in: buffer page
649 where we have acquired latch */
650 latch_level_t level); /*!< in: latching order level */
651 #else /* UNIV_DEBUG */
652 # define buf_block_dbg_add_level(block, level) /* nothing */
653 #endif /* UNIV_DEBUG */
654
655 #ifdef UNIV_DEBUG
656 /*********************************************************************//**
657 Gets a pointer to the memory frame of a block.
658 @return pointer to the frame */
659 UNIV_INLINE
660 buf_frame_t*
661 buf_block_get_frame(
662 /*================*/
663 const buf_block_t* block) /*!< in: pointer to the control block */
664 MY_ATTRIBUTE((warn_unused_result));
665 #else /* UNIV_DEBUG */
666 # define buf_block_get_frame(block) (block)->frame
667 #endif /* UNIV_DEBUG */
668
669 /*********************************************************************//**
670 Gets the compressed page descriptor corresponding to an uncompressed page
671 if applicable. */
672 #define buf_block_get_page_zip(block) \
673 (UNIV_LIKELY_NULL((block)->page.zip.data) ? &(block)->page.zip : NULL)
674 #define is_buf_block_get_page_zip(block) \
675 UNIV_LIKELY_NULL((block)->page.zip.data)
676
677 /** Monitor the buffer page read/write activity, and increment corresponding
678 counter value in MONITOR_MODULE_BUF_PAGE.
679 @param bpage buffer page whose read or write was completed
680 @param io_type BUF_IO_READ or BUF_IO_WRITE */
681 ATTRIBUTE_COLD __attribute__((nonnull))
682 void buf_page_monitor(const buf_page_t *bpage, buf_io_fix io_type);
683
684 /** Complete a read request of a file page to buf_pool.
685 @param bpage recently read page
686 @param node data file
687 @return whether the operation succeeded
688 @retval DB_SUCCESS always when writing, or if a read page was OK
689 @retval DB_PAGE_CORRUPTED if the checksum fails on a page read
690 @retval DB_DECRYPTION_FAILED if the page cannot be decrypted */
691 dberr_t buf_page_read_complete(buf_page_t *bpage, const fil_node_t &node);
692
693 /** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
694 if needed.
695 @param[in] size size in bytes
696 @return aligned size */
697 ulint
698 buf_pool_size_align(
699 ulint size);
700
701 /** Verify that post encryption checksum match with the calculated checksum.
702 This function should be called only if tablespace contains crypt data metadata.
703 @param[in] page page frame
704 @param[in] fsp_flags tablespace flags
705 @return true if page is encrypted and OK, false otherwise */
706 bool buf_page_verify_crypt_checksum(
707 const byte* page,
708 ulint fsp_flags);
709
710 /** Calculate a ROW_FORMAT=COMPRESSED page checksum and update the page.
711 @param[in,out] page page to update
712 @param[in] size compressed page size */
713 void buf_flush_update_zip_checksum(buf_frame_t* page, ulint size);
714
715 /** @brief The temporary memory structure.
716
717 NOTE! The definition appears here only for other modules of this
718 directory (buf) to see it. Do not use from outside! */
719
720 class buf_tmp_buffer_t
721 {
722 /** whether this slot is reserved */
723 std::atomic<bool> reserved;
724 public:
725 /** For encryption, the data needs to be copied to a separate buffer
726 before it's encrypted&written. The buffer block itself can be replaced
727 while a write of crypt_buf to file is in progress. */
728 byte *crypt_buf;
729 /** buffer for fil_page_compress(), for flushing page_compressed pages */
730 byte *comp_buf;
731 /** pointer to resulting buffer after encryption or compression;
732 not separately allocated memory */
733 byte *out_buf;
734
735 /** Release the slot */
release()736 void release() { reserved.store(false, std::memory_order_relaxed); }
737
738 /** Acquire the slot
739 @return whether the slot was acquired */
acquire()740 bool acquire() { return !reserved.exchange(true, std::memory_order_relaxed);}
741
742 /** Allocate a buffer for encryption, decryption or decompression. */
allocate()743 void allocate()
744 {
745 if (!crypt_buf)
746 crypt_buf= static_cast<byte*>
747 (aligned_malloc(srv_page_size, srv_page_size));
748 }
749 };
750
751 /** The common buffer control block structure
752 for compressed and uncompressed frames */
753
754 class buf_pool_t;
755
756 class buf_page_t
757 {
758 friend buf_pool_t;
759 friend buf_block_t;
760 /** @name General fields */
761 /* @{ */
762
763 public: // FIXME: fix fil_iterate()
764 /** Page id. Protected by buf_pool.hash_lock_get(id) when
765 the page is in buf_pool.page_hash. */
766 page_id_t id_;
767 private:
768 /** Count of how manyfold this block is currently bufferfixed. */
769 Atomic_counter<uint32_t> buf_fix_count_;
770
771 /** log sequence number of the START of the log entry written of the
772 oldest modification to this block which has not yet been written
773 to the data file;
774
775 0 if no modifications are pending;
776 1 if no modifications are pending, but the block is in buf_pool.flush_list;
777 2 if modifications are pending, but the block is not in buf_pool.flush_list
778 (because id().space() is the temporary tablespace). */
779 Atomic_relaxed<lsn_t> oldest_modification_;
780
781 /** type of pending I/O operation; protected by buf_pool.mutex
782 if in_LRU_list */
783 Atomic_relaxed<buf_io_fix> io_fix_;
784 /** Block state. @see in_file().
785 State transitions between in_file() states and to
786 BUF_BLOCK_REMOVE_HASH are protected by buf_pool.hash_lock_get(id)
787 when the block is in buf_pool.page_hash.
788 Other transitions when in_LRU_list are protected by buf_pool.mutex. */
789 buf_page_state state_;
790
791 public:
792 /** buf_pool.page_hash link; protected by buf_pool.hash_lock_get(id) */
793 buf_page_t *hash;
794 /* @} */
795 page_zip_des_t zip; /*!< compressed page; zip.data
796 (but not the data it points to) is
797 also protected by buf_pool.mutex;
798 state == BUF_BLOCK_ZIP_PAGE and
799 zip.data == NULL means an active
800 buf_pool.watch */
801
802 buf_tmp_buffer_t* slot; /*!< Slot for temporary memory
803 used for encryption/compression
804 or NULL */
805 #ifdef UNIV_DEBUG
806 /** whether this->list is in buf_pool.zip_hash; protected by buf_pool.mutex */
807 bool in_zip_hash;
808 /** whether this->LRU is in buf_pool.LRU (in_file() holds);
809 protected by buf_pool.mutex */
810 bool in_LRU_list;
811 /** whether this is in buf_pool.page_hash (in_file() holds);
812 protected by buf_pool.mutex */
813 bool in_page_hash;
814 /** whether this->list is in buf_pool.free (state() == BUF_BLOCK_NOT_USED);
815 protected by buf_pool.flush_list_mutex */
816 bool in_free_list;
817 #endif /* UNIV_DEBUG */
818 /** list member in one of the lists of buf_pool; protected by
819 buf_pool.mutex or buf_pool.flush_list_mutex
820
821 state() == BUF_BLOCK_NOT_USED: buf_pool.free or buf_pool.withdraw
822
823 in_file() && oldest_modification():
824 buf_pool.flush_list (protected by buf_pool.flush_list_mutex)
825
826 The contents is undefined if in_file() && !oldest_modification(),
827 or if state() is BUF_BLOCK_MEMORY or BUF_BLOCK_REMOVE_HASH. */
828 UT_LIST_NODE_T(buf_page_t) list;
829
830 /** @name LRU replacement algorithm fields.
831 Protected by buf_pool.mutex. */
832 /* @{ */
833
834 UT_LIST_NODE_T(buf_page_t) LRU;
835 /*!< node of the LRU list */
836 unsigned old:1; /*!< TRUE if the block is in the old
837 blocks in buf_pool.LRU_old */
838 unsigned freed_page_clock:31;/*!< the value of
839 buf_pool.freed_page_clock
840 when this block was the last
841 time put to the head of the
842 LRU list; a thread is allowed
843 to read this for heuristic
844 purposes without holding any
845 mutex or latch */
846 /* @} */
847 Atomic_counter<unsigned> access_time; /*!< time of first access, or
848 0 if the block was never accessed
849 in the buffer pool.
850
851 For state==BUF_BLOCK_MEMORY
852 blocks, this field can be repurposed
853 for something else.
854
855 When this field counts log records
856 and bytes allocated for recv_sys.pages,
857 the field is protected by
858 recv_sys_t::mutex. */
859 /** Change buffer entries for the page exist.
860 Protected by io_fix()==BUF_IO_READ or by buf_block_t::lock. */
861 bool ibuf_exist;
862
863 /** Block initialization status. Can be modified while holding io_fix()
864 or buf_block_t::lock X-latch */
865 enum {
866 /** the page was read normally and should be flushed normally */
867 NORMAL = 0,
868 /** the page was (re)initialized, and the doublewrite buffer can be
869 skipped on the next flush */
870 INIT_ON_FLUSH,
871 /** the page was freed and need to be flushed.
872 For page_compressed, page flush will punch a hole to free space.
873 Else if innodb_immediate_scrub_data_uncompressed, the page will
874 be overwritten with zeroes. */
875 FREED
876 } status;
877
buf_page_t()878 buf_page_t() : id_(0)
879 {
880 static_assert(BUF_BLOCK_NOT_USED == 0, "compatibility");
881 memset((void*) this, 0, sizeof *this);
882 }
883
884 /** Initialize some fields */
init()885 void init()
886 {
887 io_fix_= BUF_IO_NONE;
888 buf_fix_count_= 0;
889 old= 0;
890 freed_page_clock= 0;
891 access_time= 0;
892 oldest_modification_= 0;
893 slot= nullptr;
894 ibuf_exist= false;
895 status= NORMAL;
896 ut_d(in_zip_hash= false);
897 ut_d(in_free_list= false);
898 ut_d(in_LRU_list= false);
899 ut_d(in_page_hash= false);
900 HASH_INVALIDATE(this, hash);
901 }
902
903 /** Initialize some more fields */
904 void init(buf_page_state state, page_id_t id, uint32_t buf_fix_count= 0)
905 {
906 init();
907 state_= state;
908 id_= id;
909 buf_fix_count_= buf_fix_count;
910 }
911
912 /** Initialize some more fields */
913 void init(page_id_t id, uint32_t buf_fix_count= 0)
914 {
915 init();
916 id_= id;
917 buf_fix_count_= buf_fix_count;
918 }
919
920 public:
id()921 const page_id_t &id() const { return id_; }
state()922 buf_page_state state() const { return state_; }
buf_fix_count()923 uint32_t buf_fix_count() const { return buf_fix_count_; }
io_fix()924 buf_io_fix io_fix() const { return io_fix_; }
io_unfix()925 void io_unfix()
926 {
927 ut_d(const auto old_io_fix= io_fix());
928 ut_ad(old_io_fix == BUF_IO_READ || old_io_fix == BUF_IO_PIN);
929 io_fix_= BUF_IO_NONE;
930 }
931
932 /** @return if this belongs to buf_pool.unzip_LRU */
belongs_to_unzip_LRU()933 bool belongs_to_unzip_LRU() const
934 {
935 return zip.data && state() != BUF_BLOCK_ZIP_PAGE;
936 }
937
938 inline void add_buf_fix_count(uint32_t count);
939 inline void set_buf_fix_count(uint32_t count);
940 inline void set_state(buf_page_state state);
941 inline void set_io_fix(buf_io_fix io_fix);
942 inline void set_corrupt_id();
943
944 /** @return the log sequence number of the oldest pending modification
945 @retval 0 if the block is being removed from (or not in) buf_pool.flush_list
946 @retval 1 if the block is in buf_pool.flush_list but not modified
947 @retval 2 if the block belongs to the temporary tablespace and
948 has unwritten changes */
oldest_modification()949 lsn_t oldest_modification() const { return oldest_modification_; }
950 /** @return the log sequence number of the oldest pending modification,
951 @retval 0 if the block is definitely not in buf_pool.flush_list
952 @retval 1 if the block is in buf_pool.flush_list but not modified
953 @retval 2 if the block belongs to the temporary tablespace and
954 has unwritten changes */
oldest_modification_acquire()955 lsn_t oldest_modification_acquire() const
956 { return oldest_modification_.load(std::memory_order_acquire); }
957 /** Set oldest_modification when adding to buf_pool.flush_list */
958 inline void set_oldest_modification(lsn_t lsn);
959 /** Clear oldest_modification after removing from buf_pool.flush_list */
960 inline void clear_oldest_modification();
961 /** Note that a block is no longer dirty, while not removing
962 it from buf_pool.flush_list */
963 inline void clear_oldest_modification(bool temporary);
964
965 /** Notify that a page in a temporary tablespace has been modified. */
set_temp_modified()966 void set_temp_modified()
967 {
968 ut_ad(fsp_is_system_temporary(id().space()));
969 ut_ad(state() == BUF_BLOCK_FILE_PAGE);
970 ut_ad(!oldest_modification());
971 oldest_modification_= 2;
972 }
973
974 /** Prepare to release a file page to buf_pool.free. */
free_file_page()975 void free_file_page()
976 {
977 ut_ad(state() == BUF_BLOCK_REMOVE_HASH);
978 /* buf_LRU_block_free_non_file_page() asserts !oldest_modification() */
979 ut_d(oldest_modification_= 0;)
980 set_corrupt_id();
981 ut_d(set_state(BUF_BLOCK_MEMORY));
982 }
983
fix()984 void fix() { buf_fix_count_++; }
unfix()985 uint32_t unfix()
986 {
987 uint32_t count= buf_fix_count_--;
988 ut_ad(count != 0);
989 return count - 1;
990 }
991
992 /** @return the physical size, in bytes */
physical_size()993 ulint physical_size() const
994 {
995 return zip.ssize ? (UNIV_ZIP_SIZE_MIN >> 1) << zip.ssize : srv_page_size;
996 }
997
998 /** @return the ROW_FORMAT=COMPRESSED physical size, in bytes
999 @retval 0 if not compressed */
zip_size()1000 ulint zip_size() const
1001 {
1002 return zip.ssize ? (UNIV_ZIP_SIZE_MIN >> 1) << zip.ssize : 0;
1003 }
1004
1005 /** @return the byte offset of the page within a file */
physical_offset()1006 os_offset_t physical_offset() const
1007 {
1008 os_offset_t o= id().page_no();
1009 return zip.ssize
1010 ? o << (zip.ssize + (UNIV_ZIP_SIZE_SHIFT_MIN - 1))
1011 : o << srv_page_size_shift;
1012 }
1013
1014 /** @return whether the block is mapped to a data file */
in_file()1015 bool in_file() const
1016 {
1017 switch (state_) {
1018 case BUF_BLOCK_ZIP_PAGE:
1019 case BUF_BLOCK_FILE_PAGE:
1020 return true;
1021 case BUF_BLOCK_NOT_USED:
1022 case BUF_BLOCK_MEMORY:
1023 case BUF_BLOCK_REMOVE_HASH:
1024 return false;
1025 }
1026
1027 ut_error;
1028 return false;
1029 }
1030
1031 /** @return whether the block is modified and ready for flushing */
1032 inline bool ready_for_flush() const;
1033 /** @return whether the state can be changed to BUF_BLOCK_NOT_USED */
ready_for_replace()1034 bool ready_for_replace() const
1035 { return !oldest_modification() && can_relocate(); }
1036 /** @return whether the block can be relocated in memory.
1037 The block can be dirty, but it must not be I/O-fixed or bufferfixed. */
1038 inline bool can_relocate() const;
1039 /** @return whether the block has been flagged old in buf_pool.LRU */
1040 inline bool is_old() const;
1041 /** Set whether a block is old in buf_pool.LRU */
1042 inline void set_old(bool old);
1043 /** Flag a page accessed in buf_pool
1044 @return whether this is not the first access */
set_accessed()1045 bool set_accessed()
1046 {
1047 if (is_accessed()) return true;
1048 access_time= static_cast<uint32_t>(ut_time_ms());
1049 return false;
1050 }
1051 /** @return ut_time_ms() at the time of first access of a block in buf_pool
1052 @retval 0 if not accessed */
is_accessed()1053 unsigned is_accessed() const { ut_ad(in_file()); return access_time; }
1054 };
1055
1056 /** The buffer control block structure */
1057
1058 struct buf_block_t{
1059
1060 /** @name General fields */
1061 /* @{ */
1062
1063 buf_page_t page; /*!< page information; this must
1064 be the first field, so that
1065 buf_pool.page_hash can point
1066 to buf_page_t or buf_block_t */
1067 byte* frame; /*!< pointer to buffer frame which
1068 is of size srv_page_size, and
1069 aligned to an address divisible by
1070 srv_page_size */
1071 rw_lock_t lock; /*!< read-write lock of the buffer
1072 frame */
1073 #ifdef UNIV_DEBUG
1074 /** whether page.list is in buf_pool.withdraw
1075 ((state() == BUF_BLOCK_NOT_USED)) and the buffer pool is being shrunk;
1076 protected by buf_pool.mutex */
1077 bool in_withdraw_list;
1078 /** whether unzip_LRU is in buf_pool.unzip_LRU
1079 (state() == BUF_BLOCK_FILE_PAGE and zip.data != nullptr);
1080 protected by buf_pool.mutex */
1081 bool in_unzip_LRU_list;
1082 #endif
1083 UT_LIST_NODE_T(buf_block_t) unzip_LRU;
1084 /*!< node of the decompressed LRU list;
1085 a block is in the unzip_LRU list
1086 if page.state() == BUF_BLOCK_FILE_PAGE
1087 and page.zip.data != NULL */
1088 /* @} */
1089 /** @name Optimistic search field */
1090 /* @{ */
1091
1092 ib_uint64_t modify_clock; /*!< this clock is incremented every
1093 time a pointer to a record on the
1094 page may become obsolete; this is
1095 used in the optimistic cursor
1096 positioning: if the modify clock has
1097 not changed, we know that the pointer
1098 is still valid; this field may be
1099 changed if the thread (1) owns the
1100 pool mutex and the page is not
1101 bufferfixed, or (2) the thread has an
1102 x-latch on the block */
1103 /* @} */
1104 #ifdef BTR_CUR_HASH_ADAPT
1105 /** @name Hash search fields (unprotected)
1106 NOTE that these fields are NOT protected by any semaphore! */
1107 /* @{ */
1108
1109 volatile uint16_t n_bytes; /*!< recommended prefix length for hash
1110 search: number of bytes in
1111 an incomplete last field */
1112 volatile uint16_t n_fields; /*!< recommended prefix length for hash
1113 search: number of full fields */
1114 uint16_t n_hash_helps; /*!< counter which controls building
1115 of a new hash index for the page */
1116 volatile bool left_side; /*!< true or false, depending on
1117 whether the leftmost record of several
1118 records with the same prefix should be
1119 indexed in the hash index */
1120 /* @} */
1121
1122 /** @name Hash search fields
1123 These 5 fields may only be modified when:
1124 we are holding the appropriate x-latch in btr_search_latches[], and
1125 one of the following holds:
1126 (1) the block state is BUF_BLOCK_FILE_PAGE, and
1127 we are holding an s-latch or x-latch on buf_block_t::lock, or
1128 (2) buf_block_t::buf_fix_count == 0, or
1129 (3) the block state is BUF_BLOCK_REMOVE_HASH.
1130
1131 An exception to this is when we init or create a page
1132 in the buffer pool in buf0buf.cc.
1133
1134 Another exception for buf_pool_t::clear_hash_index() is that
1135 assigning block->index = NULL (and block->n_pointers = 0)
1136 is allowed whenever btr_search_own_all(RW_LOCK_X).
1137
1138 Another exception is that ha_insert_for_fold() may
1139 decrement n_pointers without holding the appropriate latch
1140 in btr_search_latches[]. Thus, n_pointers must be
1141 protected by atomic memory access.
1142
1143 This implies that the fields may be read without race
1144 condition whenever any of the following hold:
1145 - the btr_search_latches[] s-latch or x-latch is being held, or
1146 - the block state is not BUF_BLOCK_FILE_PAGE or BUF_BLOCK_REMOVE_HASH,
1147 and holding some latch prevents the state from changing to that.
1148
1149 Some use of assert_block_ahi_empty() or assert_block_ahi_valid()
1150 is prone to race conditions while buf_pool_t::clear_hash_index() is
1151 executing (the adaptive hash index is being disabled). Such use
1152 is explicitly commented. */
1153
1154 /* @{ */
1155
1156 # if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
1157 Atomic_counter<ulint>
1158 n_pointers; /*!< used in debugging: the number of
1159 pointers in the adaptive hash index
1160 pointing to this frame;
1161 protected by atomic memory access
1162 or btr_search_own_all(). */
1163 # define assert_block_ahi_empty(block) \
1164 ut_a((block)->n_pointers == 0)
1165 # define assert_block_ahi_empty_on_init(block) do { \
1166 MEM_MAKE_DEFINED(&(block)->n_pointers, sizeof (block)->n_pointers); \
1167 assert_block_ahi_empty(block); \
1168 } while (0)
1169 # define assert_block_ahi_valid(block) \
1170 ut_a((block)->index || (block)->n_pointers == 0)
1171 # else /* UNIV_AHI_DEBUG || UNIV_DEBUG */
1172 # define assert_block_ahi_empty(block) /* nothing */
1173 # define assert_block_ahi_empty_on_init(block) /* nothing */
1174 # define assert_block_ahi_valid(block) /* nothing */
1175 # endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
1176 unsigned curr_n_fields:10;/*!< prefix length for hash indexing:
1177 number of full fields */
1178 unsigned curr_n_bytes:15;/*!< number of bytes in hash
1179 indexing */
1180 unsigned curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
1181 dict_index_t* index; /*!< Index for which the
1182 adaptive hash index has been
1183 created, or NULL if the page
1184 does not exist in the
1185 index. Note that it does not
1186 guarantee that the index is
1187 complete, though: there may
1188 have been hash collisions,
1189 record deletions, etc. */
1190 /* @} */
1191 #else /* BTR_CUR_HASH_ADAPT */
1192 # define assert_block_ahi_empty(block) /* nothing */
1193 # define assert_block_ahi_empty_on_init(block) /* nothing */
1194 # define assert_block_ahi_valid(block) /* nothing */
1195 #endif /* BTR_CUR_HASH_ADAPT */
1196 # ifdef UNIV_DEBUG
1197 /** @name Debug fields */
1198 /* @{ */
1199 rw_lock_t* debug_latch; /*!< in the debug version, each thread
1200 which bufferfixes the block acquires
1201 an s-latch here; so we can use the
1202 debug utilities in sync0rw */
1203 /* @} */
1204 # endif
fixbuf_block_t1205 void fix() { page.fix(); }
unfixbuf_block_t1206 uint32_t unfix()
1207 {
1208 ut_ad(page.buf_fix_count() || page.io_fix() != BUF_IO_NONE ||
1209 page.state() == BUF_BLOCK_ZIP_PAGE ||
1210 !rw_lock_own_flagged(&lock, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S |
1211 RW_LOCK_FLAG_SX));
1212 return page.unfix();
1213 }
1214
1215 /** @return the physical size, in bytes */
physical_sizebuf_block_t1216 ulint physical_size() const { return page.physical_size(); }
1217
1218 /** @return the ROW_FORMAT=COMPRESSED physical size, in bytes
1219 @retval 0 if not compressed */
zip_sizebuf_block_t1220 ulint zip_size() const { return page.zip_size(); }
1221
1222 /** Initialize the block.
1223 @param page_id page identifier
1224 @param zip_size ROW_FORMAT=COMPRESSED page size, or 0
1225 @param fix initial buf_fix_count() */
1226 void initialise(const page_id_t page_id, ulint zip_size, uint32_t fix= 0);
1227 };
1228
1229 /**********************************************************************//**
1230 Compute the hash fold value for blocks in buf_pool.zip_hash. */
1231 /* @{ */
1232 #define BUF_POOL_ZIP_FOLD_PTR(ptr) (ulint(ptr) >> srv_page_size_shift)
1233 #define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
1234 #define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
1235 /* @} */
1236
1237 /** A "Hazard Pointer" class used to iterate over page lists
1238 inside the buffer pool. A hazard pointer is a buf_page_t pointer
1239 which we intend to iterate over next and we want it remain valid
1240 even after we release the buffer pool mutex. */
1241 class HazardPointer
1242 {
1243 public:
~HazardPointer()1244 virtual ~HazardPointer() {}
1245
1246 /** @return current value */
get()1247 buf_page_t *get() const { mysql_mutex_assert_owner(m_mutex); return m_hp; }
1248
1249 /** Set current value
1250 @param bpage buffer block to be set as hp */
set(buf_page_t * bpage)1251 void set(buf_page_t *bpage)
1252 {
1253 mysql_mutex_assert_owner(m_mutex);
1254 ut_ad(!bpage || bpage->in_file());
1255 m_hp= bpage;
1256 }
1257
1258 /** Checks if a bpage is the hp
1259 @param bpage buffer block to be compared
1260 @return true if it is hp */
is_hp(const buf_page_t * bpage)1261 bool is_hp(const buf_page_t *bpage) const
1262 { mysql_mutex_assert_owner(m_mutex); return bpage == m_hp; }
1263
1264 /** Adjust the value of hp. This happens when some
1265 other thread working on the same list attempts to
1266 remove the hp from the list. */
1267 virtual void adjust(const buf_page_t*) = 0;
1268
1269 #ifdef UNIV_DEBUG
1270 /** mutex that protects access to the m_hp. */
1271 const mysql_mutex_t *m_mutex= nullptr;
1272 #endif /* UNIV_DEBUG */
1273
1274 protected:
1275 /** hazard pointer */
1276 buf_page_t *m_hp= nullptr;
1277 };
1278
1279 /** Class implementing buf_pool.flush_list hazard pointer */
1280 class FlushHp : public HazardPointer
1281 {
1282 public:
~FlushHp()1283 ~FlushHp() override {}
1284
1285 /** Adjust the value of hp. This happens when some
1286 other thread working on the same list attempts to
1287 remove the hp from the list.
1288 @param bpage buffer block to be compared */
adjust(const buf_page_t * bpage)1289 void adjust(const buf_page_t *bpage) override
1290 {
1291 ut_ad(bpage != NULL);
1292
1293 /* We only support reverse traversal for now. */
1294 if (is_hp(bpage))
1295 m_hp= UT_LIST_GET_PREV(list, m_hp);
1296
1297 ut_ad(!m_hp || m_hp->oldest_modification());
1298 }
1299 };
1300
1301 /** Class implementing buf_pool.LRU hazard pointer */
1302 class LRUHp : public HazardPointer {
1303 public:
~LRUHp()1304 ~LRUHp() override {}
1305
1306 /** Adjust the value of hp. This happens when some
1307 other thread working on the same list attempts to
1308 remove the hp from the list.
1309 @param bpage buffer block to be compared */
adjust(const buf_page_t * bpage)1310 void adjust(const buf_page_t *bpage) override
1311 {
1312 ut_ad(bpage);
1313 /** We only support reverse traversal for now. */
1314 if (is_hp(bpage))
1315 m_hp= UT_LIST_GET_PREV(LRU, m_hp);
1316
1317 ut_ad(!m_hp || m_hp->in_LRU_list);
1318 }
1319 };
1320
1321 /** Special purpose iterators to be used when scanning the LRU list.
1322 The idea is that when one thread finishes the scan it leaves the
1323 itr in that position and the other thread can start scan from
1324 there */
1325 class LRUItr : public LRUHp {
1326 public:
LRUItr()1327 LRUItr() : LRUHp() {}
~LRUItr()1328 ~LRUItr() override {}
1329
1330 /** Select from where to start a scan. If we have scanned
1331 too deep into the LRU list it resets the value to the tail
1332 of the LRU list.
1333 @return buf_page_t from where to start scan. */
1334 inline buf_page_t *start();
1335 };
1336
1337 /** Struct that is embedded in the free zip blocks */
1338 struct buf_buddy_free_t {
1339 union {
1340 ulint size; /*!< size of the block */
1341 byte bytes[FIL_PAGE_DATA];
1342 /*!< stamp[FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID]
1343 == BUF_BUDDY_FREE_STAMP denotes a free
1344 block. If the space_id field of buddy
1345 block != BUF_BUDDY_FREE_STAMP, the block
1346 is not in any zip_free list. If the
1347 space_id is BUF_BUDDY_FREE_STAMP then
1348 stamp[0] will contain the
1349 buddy block size. */
1350 } stamp;
1351
1352 buf_page_t bpage; /*!< Embedded bpage descriptor */
1353 UT_LIST_NODE_T(buf_buddy_free_t) list;
1354 /*!< Node of zip_free list */
1355 };
1356
1357 /** @brief The buffer pool statistics structure. */
1358 struct buf_pool_stat_t{
1359 ulint n_page_gets; /*!< number of page gets performed;
1360 also successful searches through
1361 the adaptive hash index are
1362 counted as page gets; this field
1363 is NOT protected by the buffer
1364 pool mutex */
1365 ulint n_pages_read; /*!< number read operations */
1366 ulint n_pages_written;/*!< number write operations */
1367 ulint n_pages_created;/*!< number of pages created
1368 in the pool with no read */
1369 ulint n_ra_pages_read_rnd;/*!< number of pages read in
1370 as part of random read ahead */
1371 ulint n_ra_pages_read;/*!< number of pages read in
1372 as part of read ahead */
1373 ulint n_ra_pages_evicted;/*!< number of read ahead
1374 pages that are evicted without
1375 being accessed */
1376 ulint n_pages_made_young; /*!< number of pages made young, in
1377 buf_page_make_young() */
1378 ulint n_pages_not_made_young; /*!< number of pages not made
1379 young because the first access
1380 was not long enough ago, in
1381 buf_page_peek_if_too_old() */
1382 /** number of waits for eviction; writes protected by buf_pool.mutex */
1383 ulint LRU_waits;
1384 ulint LRU_bytes; /*!< LRU size in bytes */
1385 ulint flush_list_bytes;/*!< flush_list size in bytes */
1386 };
1387
1388 /** Statistics of buddy blocks of a given size. */
1389 struct buf_buddy_stat_t {
1390 /** Number of blocks allocated from the buddy system. */
1391 ulint used;
1392 /** Number of blocks relocated by the buddy system. */
1393 ib_uint64_t relocated;
1394 /** Total duration of block relocations, in microseconds. */
1395 ib_uint64_t relocated_usec;
1396 };
1397
1398 /** The buffer pool */
1399 class buf_pool_t
1400 {
1401 /** A chunk of buffers */
1402 struct chunk_t
1403 {
1404 /** number of elements in blocks[] */
1405 size_t size;
1406 /** memory allocated for the page frames */
1407 unsigned char *mem;
1408 /** descriptor of mem */
1409 ut_new_pfx_t mem_pfx;
1410 /** array of buffer control blocks */
1411 buf_block_t *blocks;
1412
1413 /** Map of first page frame address to chunks[] */
1414 using map= std::map<const void*, chunk_t*, std::less<const void*>,
1415 ut_allocator<std::pair<const void* const,chunk_t*>>>;
1416 /** Chunk map that may be under construction by buf_resize_thread() */
1417 static map *map_reg;
1418 /** Current chunk map for lookup only */
1419 static map *map_ref;
1420
1421 /** @return the memory size bytes. */
mem_sizechunk_t1422 size_t mem_size() const { return mem_pfx.m_size; }
1423
1424 /** Register the chunk */
regchunk_t1425 void reg() { map_reg->emplace(map::value_type(blocks->frame, this)); }
1426
1427 /** Allocate a chunk of buffer frames.
1428 @param bytes requested size
1429 @return whether the allocation succeeded */
1430 inline bool create(size_t bytes);
1431
1432 #ifdef UNIV_DEBUG
1433 /** Find a block that points to a ROW_FORMAT=COMPRESSED page
1434 @param data pointer to the start of a ROW_FORMAT=COMPRESSED page frame
1435 @return the block
1436 @retval nullptr if not found */
contains_zipchunk_t1437 const buf_block_t *contains_zip(const void *data) const
1438 {
1439 const buf_block_t *block= blocks;
1440 for (auto i= size; i--; block++)
1441 if (block->page.zip.data == data)
1442 return block;
1443 return nullptr;
1444 }
1445
1446 /** Check that all blocks are in a replaceable state.
1447 @return address of a non-free block
1448 @retval nullptr if all freed */
1449 inline const buf_block_t *not_freed() const;
1450 #endif /* UNIV_DEBUG */
1451 };
1452
1453 /** Withdraw blocks from the buffer pool until meeting withdraw_target.
1454 @return whether retry is needed */
1455 inline bool withdraw_blocks();
1456
1457 /** Determine if a pointer belongs to a buf_block_t. It can be a pointer to
1458 the buf_block_t itself or a member of it.
1459 @param ptr a pointer that will not be dereferenced
1460 @return whether the ptr belongs to a buf_block_t struct */
is_block_field(const void * ptr)1461 bool is_block_field(const void *ptr) const
1462 {
1463 const chunk_t *chunk= chunks;
1464 const chunk_t *const echunk= chunk + ut_min(n_chunks, n_chunks_new);
1465
1466 /* TODO: protect chunks with a mutex (the older pointer will
1467 currently remain during resize()) */
1468 for (; chunk < echunk; chunk++)
1469 if (ptr >= reinterpret_cast<const void*>(chunk->blocks) &&
1470 ptr < reinterpret_cast<const void*>(chunk->blocks + chunk->size))
1471 return true;
1472 return false;
1473 }
1474
1475 /** Try to reallocate a control block.
1476 @param block control block to reallocate
1477 @return whether the reallocation succeeded */
1478 inline bool realloc(buf_block_t *block);
1479
1480 public:
is_initialised()1481 bool is_initialised() const { return chunks != nullptr; }
1482
1483 /** Create the buffer pool.
1484 @return whether the creation failed */
1485 bool create();
1486
1487 /** Clean up after successful create() */
1488 void close();
1489
1490 /** Resize from srv_buf_pool_old_size to srv_buf_pool_size. */
1491 inline void resize();
1492
1493 /** @return whether resize() is in progress */
resize_in_progress()1494 bool resize_in_progress() const
1495 {
1496 return UNIV_UNLIKELY(resizing.load(std::memory_order_relaxed));
1497 }
1498
1499 /** @return the current size in blocks */
get_n_pages()1500 size_t get_n_pages() const
1501 {
1502 ut_ad(is_initialised());
1503 size_t size= 0;
1504 for (auto j= n_chunks; j--; )
1505 size+= chunks[j].size;
1506 return size;
1507 }
1508
1509 /** Determine whether a frame is intended to be withdrawn during resize().
1510 @param ptr pointer within a buf_block_t::frame
1511 @return whether the frame will be withdrawn */
will_be_withdrawn(const byte * ptr)1512 bool will_be_withdrawn(const byte *ptr) const
1513 {
1514 ut_ad(curr_size < old_size);
1515 #ifdef SAFE_MUTEX
1516 if (resizing.load(std::memory_order_relaxed))
1517 mysql_mutex_assert_owner(&mutex);
1518 #endif /* SAFE_MUTEX */
1519
1520 for (const chunk_t *chunk= chunks + n_chunks_new,
1521 * const echunk= chunks + n_chunks;
1522 chunk != echunk; chunk++)
1523 if (ptr >= chunk->blocks->frame &&
1524 ptr < (chunk->blocks + chunk->size - 1)->frame + srv_page_size)
1525 return true;
1526 return false;
1527 }
1528
1529 /** Determine whether a block is intended to be withdrawn during resize().
1530 @param bpage buffer pool block
1531 @return whether the frame will be withdrawn */
will_be_withdrawn(const buf_page_t & bpage)1532 bool will_be_withdrawn(const buf_page_t &bpage) const
1533 {
1534 ut_ad(curr_size < old_size);
1535 #ifdef SAFE_MUTEX
1536 if (resizing.load(std::memory_order_relaxed))
1537 mysql_mutex_assert_owner(&mutex);
1538 #endif /* SAFE_MUTEX */
1539
1540 for (const chunk_t *chunk= chunks + n_chunks_new,
1541 * const echunk= chunks + n_chunks;
1542 chunk != echunk; chunk++)
1543 if (&bpage >= &chunk->blocks->page &&
1544 &bpage < &chunk->blocks[chunk->size].page)
1545 return true;
1546 return false;
1547 }
1548
1549 /** Release and evict a corrupted page.
1550 @param bpage page that was being read */
1551 ATTRIBUTE_COLD void corrupted_evict(buf_page_t *bpage);
1552
1553 /** Release a memory block to the buffer pool. */
1554 ATTRIBUTE_COLD void free_block(buf_block_t *block);
1555
1556 #ifdef UNIV_DEBUG
1557 /** Find a block that points to a ROW_FORMAT=COMPRESSED page
1558 @param data pointer to the start of a ROW_FORMAT=COMPRESSED page frame
1559 @return the block
1560 @retval nullptr if not found */
contains_zip(const void * data)1561 const buf_block_t *contains_zip(const void *data) const
1562 {
1563 mysql_mutex_assert_owner(&mutex);
1564 for (const chunk_t *chunk= chunks, * const end= chunks + n_chunks;
1565 chunk != end; chunk++)
1566 if (const buf_block_t *block= chunk->contains_zip(data))
1567 return block;
1568 return nullptr;
1569 }
1570
1571 /** Assert that all buffer pool pages are in a replaceable state */
1572 void assert_all_freed();
1573 #endif /* UNIV_DEBUG */
1574
1575 #ifdef BTR_CUR_HASH_ADAPT
1576 /** Clear the adaptive hash index on all pages in the buffer pool. */
1577 inline void clear_hash_index();
1578
1579 /** Get a buffer block from an adaptive hash index pointer.
1580 This function does not return if the block is not identified.
1581 @param ptr pointer to within a page frame
1582 @return pointer to block, never NULL */
1583 inline buf_block_t *block_from_ahi(const byte *ptr) const;
1584 #endif /* BTR_CUR_HASH_ADAPT */
1585
is_block_lock(const rw_lock_t * l)1586 bool is_block_lock(const rw_lock_t *l) const
1587 { return is_block_field(static_cast<const void*>(l)); }
1588
1589 /**
1590 @return the smallest oldest_modification lsn for any page
1591 @retval empty_lsn if all modified persistent pages have been flushed */
get_oldest_modification(lsn_t empty_lsn)1592 lsn_t get_oldest_modification(lsn_t empty_lsn)
1593 {
1594 mysql_mutex_assert_owner(&flush_list_mutex);
1595 while (buf_page_t *bpage= UT_LIST_GET_LAST(flush_list))
1596 {
1597 ut_ad(!fsp_is_system_temporary(bpage->id().space()));
1598 lsn_t lsn= bpage->oldest_modification();
1599 if (lsn != 1)
1600 {
1601 ut_ad(lsn > 2);
1602 return lsn;
1603 }
1604 delete_from_flush_list(bpage);
1605 }
1606 return empty_lsn;
1607 }
1608
1609 /** Determine if a buffer block was created by chunk_t::create().
1610 @param block block descriptor (not dereferenced)
1611 @return whether block has been created by chunk_t::create() */
is_uncompressed(const buf_block_t * block)1612 bool is_uncompressed(const buf_block_t *block) const
1613 {
1614 return is_block_field(reinterpret_cast<const void*>(block));
1615 }
1616
1617 /** Get the page_hash latch for a page */
hash_lock_get(const page_id_t id)1618 page_hash_latch *hash_lock_get(const page_id_t id) const
1619 {
1620 return page_hash.lock_get(id.fold());
1621 }
1622
1623 /** Look up a block descriptor.
1624 @param id page identifier
1625 @param fold id.fold()
1626 @return block descriptor, possibly in watch[]
1627 @retval nullptr if not found*/
page_hash_get_low(const page_id_t id,const ulint fold)1628 buf_page_t *page_hash_get_low(const page_id_t id, const ulint fold)
1629 {
1630 ut_ad(id.fold() == fold);
1631 #ifdef SAFE_MUTEX
1632 DBUG_ASSERT(mysql_mutex_is_owner(&mutex) ||
1633 page_hash.lock_get(fold)->is_locked());
1634 #endif /* SAFE_MUTEX */
1635 buf_page_t *bpage;
1636 /* Look for the page in the hash table */
1637 HASH_SEARCH(hash, &page_hash, fold, buf_page_t*, bpage,
1638 ut_ad(bpage->in_page_hash), id == bpage->id());
1639 return bpage;
1640 }
1641 private:
1642 /** Look up a block descriptor.
1643 @tparam exclusive whether the latch is to be acquired exclusively
1644 @tparam watch whether to allow watch_is_sentinel()
1645 @param page_id page identifier
1646 @param fold page_id.fold()
1647 @param hash_lock pointer to the acquired latch (to be released by caller)
1648 @return pointer to the block
1649 @retval nullptr if no block was found; !lock || !*lock will also hold */
1650 template<bool exclusive,bool watch>
page_hash_get_locked(const page_id_t page_id,ulint fold,page_hash_latch ** hash_lock)1651 buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold,
1652 page_hash_latch **hash_lock)
1653 {
1654 ut_ad(hash_lock || !exclusive);
1655 page_hash_latch *latch= page_hash.lock<exclusive>(fold);
1656 buf_page_t *bpage= page_hash_get_low(page_id, fold);
1657 if (!bpage || watch_is_sentinel(*bpage))
1658 {
1659 latch->release<exclusive>();
1660 if (hash_lock)
1661 *hash_lock= nullptr;
1662 return watch ? bpage : nullptr;
1663 }
1664
1665 ut_ad(bpage->in_file());
1666 ut_ad(page_id == bpage->id());
1667
1668 if (hash_lock)
1669 *hash_lock= latch; /* to be released by the caller */
1670 else
1671 latch->release<exclusive>();
1672 return bpage;
1673 }
1674 public:
1675 /** Look up a block descriptor.
1676 @tparam exclusive whether the latch is to be acquired exclusively
1677 @param page_id page identifier
1678 @param fold page_id.fold()
1679 @param hash_lock pointer to the acquired latch (to be released by caller)
1680 @return pointer to the block
1681 @retval nullptr if no block was found; !lock || !*lock will also hold */
1682 template<bool exclusive>
page_hash_get_locked(const page_id_t page_id,ulint fold,page_hash_latch ** hash_lock)1683 buf_page_t *page_hash_get_locked(const page_id_t page_id, ulint fold,
1684 page_hash_latch **hash_lock)
1685 { return page_hash_get_locked<exclusive,false>(page_id, fold, hash_lock); }
1686
1687 /** @return whether the buffer pool contains a page
1688 @tparam watch whether to allow watch_is_sentinel()
1689 @param page_id page identifier */
1690 template<bool watch= false>
page_hash_contains(const page_id_t page_id)1691 bool page_hash_contains(const page_id_t page_id)
1692 {
1693 return page_hash_get_locked<false,watch>(page_id, page_id.fold(), nullptr);
1694 }
1695
1696 /** Determine if a block is a sentinel for a buffer pool watch.
1697 @param bpage page descriptor
1698 @return whether bpage a sentinel for a buffer pool watch */
watch_is_sentinel(const buf_page_t & bpage)1699 bool watch_is_sentinel(const buf_page_t &bpage)
1700 {
1701 #ifdef SAFE_MUTEX
1702 DBUG_ASSERT(mysql_mutex_is_owner(&mutex) ||
1703 hash_lock_get(bpage.id())->is_locked());
1704 #endif /* SAFE_MUTEX */
1705 ut_ad(bpage.in_file());
1706
1707 if (&bpage < &watch[0] || &bpage >= &watch[UT_ARR_SIZE(watch)])
1708 {
1709 ut_ad(bpage.state() != BUF_BLOCK_ZIP_PAGE || bpage.zip.data);
1710 return false;
1711 }
1712
1713 ut_ad(bpage.state() == BUF_BLOCK_ZIP_PAGE);
1714 ut_ad(!bpage.in_zip_hash);
1715 ut_ad(!bpage.zip.data);
1716 return true;
1717 }
1718
1719 /** Check if a watched page has been read.
1720 This may only be called after !watch_set() and before invoking watch_unset().
1721 @param id page identifier
1722 @return whether the page was read to the buffer pool */
watch_occurred(const page_id_t id)1723 bool watch_occurred(const page_id_t id)
1724 {
1725 const ulint fold= id.fold();
1726 page_hash_latch *hash_lock= page_hash.lock<false>(fold);
1727 /* The page must exist because watch_set() increments buf_fix_count. */
1728 buf_page_t *bpage= page_hash_get_low(id, fold);
1729 const bool is_sentinel= watch_is_sentinel(*bpage);
1730 hash_lock->read_unlock();
1731 return !is_sentinel;
1732 }
1733
1734 /** Register a watch for a page identifier. The caller must hold an
1735 exclusive page hash latch. The *hash_lock may be released,
1736 relocated, and reacquired.
1737 @param id page identifier
1738 @param hash_lock exclusively held page_hash latch
1739 @return a buffer pool block corresponding to id
1740 @retval nullptr if the block was not present, and a watch was installed */
1741 inline buf_page_t *watch_set(const page_id_t id,
1742 page_hash_latch **hash_lock);
1743
1744 /** Stop watching whether a page has been read in.
1745 watch_set(id) must have returned nullptr before.
1746 @param id page identifier */
1747 void watch_unset(const page_id_t id);
1748
1749 /** Remove the sentinel block for the watch before replacing it with a
1750 real block. watch_unset() or watch_occurred() will notice
1751 that the block has been replaced with the real block.
1752 @param watch sentinel */
1753 inline void watch_remove(buf_page_t *watch);
1754
1755 /** @return whether less than 1/4 of the buffer pool is available */
running_out()1756 bool running_out() const
1757 {
1758 return !recv_recovery_is_on() &&
1759 UNIV_UNLIKELY(UT_LIST_GET_LEN(free) + UT_LIST_GET_LEN(LRU) <
1760 std::min(curr_size, old_size) / 4);
1761 }
1762
1763 #ifdef UNIV_DEBUG
1764 /** Validate the buffer pool. */
1765 void validate();
1766 #endif /* UNIV_DEBUG */
1767 #if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG
1768 /** Write information of the buf_pool to the error log. */
1769 void print();
1770 #endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG */
1771
1772 /** Remove a block from the LRU list.
1773 @return the predecessor in the LRU list */
LRU_remove(buf_page_t * bpage)1774 buf_page_t *LRU_remove(buf_page_t *bpage)
1775 {
1776 mysql_mutex_assert_owner(&mutex);
1777 ut_ad(bpage->in_LRU_list);
1778 ut_ad(bpage->in_page_hash);
1779 ut_ad(!bpage->in_zip_hash);
1780 ut_ad(bpage->in_file());
1781 lru_hp.adjust(bpage);
1782 lru_scan_itr.adjust(bpage);
1783 ut_d(bpage->in_LRU_list= false);
1784 buf_page_t *prev= UT_LIST_GET_PREV(LRU, bpage);
1785 UT_LIST_REMOVE(LRU, bpage);
1786 return prev;
1787 }
1788
1789 /** Number of pages to read ahead */
1790 static constexpr uint32_t READ_AHEAD_PAGES= 64;
1791
1792 /** Buffer pool mutex */
1793 MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex;
1794 /** Number of pending LRU flush; protected by mutex. */
1795 ulint n_flush_LRU_;
1796 /** broadcast when n_flush_LRU reaches 0; protected by mutex */
1797 pthread_cond_t done_flush_LRU;
1798 /** Number of pending flush_list flush; protected by mutex */
1799 ulint n_flush_list_;
1800 /** broadcast when n_flush_list reaches 0; protected by mutex */
1801 pthread_cond_t done_flush_list;
1802
n_flush_LRU()1803 TPOOL_SUPPRESS_TSAN ulint n_flush_LRU() const { return n_flush_LRU_; }
n_flush_list()1804 TPOOL_SUPPRESS_TSAN ulint n_flush_list() const { return n_flush_list_; }
1805
1806 /** @name General fields */
1807 /* @{ */
1808 ulint curr_pool_size; /*!< Current pool size in bytes */
1809 ulint LRU_old_ratio; /*!< Reserve this much of the buffer
1810 pool for "old" blocks */
1811 #ifdef UNIV_DEBUG
1812 ulint buddy_n_frames; /*!< Number of frames allocated from
1813 the buffer pool to the buddy system */
1814 ulint mutex_exit_forbidden; /*!< Forbid release mutex */
1815 #endif
1816 ut_allocator<unsigned char> allocator; /*!< Allocator used for
1817 allocating memory for the the "chunks"
1818 member. */
1819 volatile ulint n_chunks; /*!< number of buffer pool chunks */
1820 volatile ulint n_chunks_new; /*!< new number of buffer pool chunks */
1821 chunk_t* chunks; /*!< buffer pool chunks */
1822 chunk_t* chunks_old; /*!< old buffer pool chunks to be freed
1823 after resizing buffer pool */
1824 /** current pool size in pages */
1825 Atomic_counter<ulint> curr_size;
1826 /** previous pool size in pages */
1827 Atomic_counter<ulint> old_size;
1828 /** read-ahead request size in pages */
1829 Atomic_counter<uint32_t> read_ahead_area;
1830
1831 /** Hash table with singly-linked overflow lists. @see hash_table_t */
1832 struct page_hash_table
1833 {
1834 /** Number of array[] elements per page_hash_latch.
1835 Must be one less than a power of 2. */
1836 static constexpr size_t ELEMENTS_PER_LATCH= CPU_LEVEL1_DCACHE_LINESIZE /
1837 sizeof(void*) - 1;
1838
1839 /** number of payload elements in array[] */
1840 Atomic_relaxed<ulint> n_cells;
1841 /** the hash table, with pad(n_cells) elements, aligned to L1 cache size */
1842 hash_cell_t *array;
1843
1844 /** Create the hash table.
1845 @param n the lower bound of n_cells */
1846 void create(ulint n);
1847
1848 /** Free the hash table. */
freepage_hash_table1849 void free() { aligned_free(array); array= nullptr; }
1850
1851 /** @return the index of an array element */
calc_hashpage_hash_table1852 ulint calc_hash(ulint fold) const { return calc_hash(fold, n_cells); }
1853 /** @return raw array index converted to padded index */
padpage_hash_table1854 static ulint pad(ulint h) { return 1 + (h / ELEMENTS_PER_LATCH) + h; }
1855 private:
1856 /** @return the hash value before any ELEMENTS_PER_LATCH padding */
hashpage_hash_table1857 static ulint hash(ulint fold, ulint n) { return ut_hash_ulint(fold, n); }
1858
1859 /** @return the index of an array element */
calc_hashpage_hash_table1860 static ulint calc_hash(ulint fold, ulint n_cells)
1861 {
1862 return pad(hash(fold, n_cells));
1863 }
1864 /** Get a page_hash latch. */
lock_getpage_hash_table1865 page_hash_latch *lock_get(ulint fold, ulint n) const
1866 {
1867 static_assert(!((ELEMENTS_PER_LATCH + 1) & ELEMENTS_PER_LATCH),
1868 "must be one less than a power of 2");
1869 return reinterpret_cast<page_hash_latch*>
1870 (&array[calc_hash(fold, n) & ~ELEMENTS_PER_LATCH]);
1871 }
1872 public:
1873 /** Get a page_hash latch. */
lock_getpage_hash_table1874 page_hash_latch *lock_get(ulint fold) const
1875 { return lock_get(fold, n_cells); }
1876
1877 /** Acquire an array latch.
1878 @tparam exclusive whether the latch is to be acquired exclusively
1879 @param fold hash bucket key */
lockpage_hash_table1880 template<bool exclusive> page_hash_latch *lock(ulint fold)
1881 {
1882 page_hash_latch *latch= lock_get(fold, n_cells);
1883 latch->acquire<exclusive>();
1884 return latch;
1885 }
1886
1887 /** Exclusively aqcuire all latches */
1888 inline void write_lock_all();
1889
1890 /** Release all latches */
1891 inline void write_unlock_all();
1892 };
1893
1894 /** Hash table of file pages (buf_page_t::in_file() holds),
1895 indexed by page_id_t. Protected by both mutex and page_hash.lock_get(). */
1896 page_hash_table page_hash;
1897
1898 /** map of block->frame to buf_block_t blocks that belong
1899 to buf_buddy_alloc(); protected by buf_pool.mutex */
1900 hash_table_t zip_hash;
1901 /** number of pending read operations */
1902 Atomic_counter<ulint> n_pend_reads;
1903 Atomic_counter<ulint>
1904 n_pend_unzip; /*!< number of pending decompressions */
1905
1906 time_t last_printout_time;
1907 /*!< when buf_print_io was last time
1908 called */
1909 buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
1910 /*!< Statistics of buddy system,
1911 indexed by block size */
1912 buf_pool_stat_t stat; /*!< current statistics */
1913 buf_pool_stat_t old_stat; /*!< old statistics */
1914
1915 /* @} */
1916
1917 /** @name Page flushing algorithm fields */
1918 /* @{ */
1919
1920 /** mutex protecting flush_list, buf_page_t::set_oldest_modification()
1921 and buf_page_t::list pointers when !oldest_modification() */
1922 MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t flush_list_mutex;
1923 /** "hazard pointer" for flush_list scans; protected by flush_list_mutex */
1924 FlushHp flush_hp;
1925 /** modified blocks (a subset of LRU) */
1926 UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
1927 private:
1928 /** whether the page cleaner needs wakeup from indefinite sleep */
1929 bool page_cleaner_is_idle;
1930 /** track server activity count for signaling idle flushing */
1931 ulint last_activity_count;
1932 public:
1933 /** signalled to wake up the page_cleaner; protected by flush_list_mutex */
1934 pthread_cond_t do_flush_list;
1935
1936 /** @return whether the page cleaner must sleep due to being idle */
page_cleaner_idle()1937 bool page_cleaner_idle() const
1938 {
1939 mysql_mutex_assert_owner(&flush_list_mutex);
1940 return page_cleaner_is_idle;
1941 }
1942 /** Wake up the page cleaner if needed */
1943 void page_cleaner_wakeup();
1944
1945 /** Register whether an explicit wakeup of the page cleaner is needed */
page_cleaner_set_idle(bool deep_sleep)1946 void page_cleaner_set_idle(bool deep_sleep)
1947 {
1948 mysql_mutex_assert_owner(&flush_list_mutex);
1949 page_cleaner_is_idle= deep_sleep;
1950 }
1951
1952 /** Update server last activity count */
update_last_activity_count(ulint activity_count)1953 void update_last_activity_count(ulint activity_count)
1954 {
1955 mysql_mutex_assert_owner(&flush_list_mutex);
1956 last_activity_count= activity_count;
1957 }
1958
1959 // n_flush_LRU() + n_flush_list()
1960 // is approximately COUNT(io_fix()==BUF_IO_WRITE) in flush_list
1961
1962 unsigned freed_page_clock;/*!< a sequence number used
1963 to count the number of buffer
1964 blocks removed from the end of
1965 the LRU list; NOTE that this
1966 counter may wrap around at 4
1967 billion! A thread is allowed
1968 to read this for heuristic
1969 purposes without holding any
1970 mutex or latch */
1971 bool try_LRU_scan; /*!< Cleared when an LRU
1972 scan for free block fails. This
1973 flag is used to avoid repeated
1974 scans of LRU list when we know
1975 that there is no free block
1976 available in the scan depth for
1977 eviction. Set whenever
1978 we flush a batch from the
1979 buffer pool. Protected by the
1980 buf_pool.mutex */
1981 /* @} */
1982
1983 /** @name LRU replacement algorithm fields */
1984 /* @{ */
1985
1986 UT_LIST_BASE_NODE_T(buf_page_t) free;
1987 /*!< base node of the free
1988 block list */
1989 /** signaled each time when the free list grows; protected by mutex */
1990 pthread_cond_t done_free;
1991
1992 UT_LIST_BASE_NODE_T(buf_page_t) withdraw;
1993 /*!< base node of the withdraw
1994 block list. It is only used during
1995 shrinking buffer pool size, not to
1996 reuse the blocks will be removed */
1997
1998 ulint withdraw_target;/*!< target length of withdraw
1999 block list, when withdrawing */
2000
2001 /** "hazard pointer" used during scan of LRU while doing
2002 LRU list batch. Protected by buf_pool_t::mutex. */
2003 LRUHp lru_hp;
2004
2005 /** Iterator used to scan the LRU list when searching for
2006 replacable victim. Protected by buf_pool_t::mutex. */
2007 LRUItr lru_scan_itr;
2008
2009 UT_LIST_BASE_NODE_T(buf_page_t) LRU;
2010 /*!< base node of the LRU list */
2011
2012 buf_page_t* LRU_old; /*!< pointer to the about
2013 LRU_old_ratio/BUF_LRU_OLD_RATIO_DIV
2014 oldest blocks in the LRU list;
2015 NULL if LRU length less than
2016 BUF_LRU_OLD_MIN_LEN;
2017 NOTE: when LRU_old != NULL, its length
2018 should always equal LRU_old_len */
2019 ulint LRU_old_len; /*!< length of the LRU list from
2020 the block to which LRU_old points
2021 onward, including that block;
2022 see buf0lru.cc for the restrictions
2023 on this value; 0 if LRU_old == NULL;
2024 NOTE: LRU_old_len must be adjusted
2025 whenever LRU_old shrinks or grows! */
2026
2027 UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU;
2028 /*!< base node of the
2029 unzip_LRU list */
2030
2031 /* @} */
2032 /** free ROW_FORMAT=COMPRESSED page frames */
2033 UT_LIST_BASE_NODE_T(buf_buddy_free_t) zip_free[BUF_BUDDY_SIZES_MAX];
2034 #if BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN
2035 # error "BUF_BUDDY_LOW > UNIV_ZIP_SIZE_MIN"
2036 #endif
2037
2038 /** Sentinels to detect if pages are read into the buffer pool while
2039 a delete-buffering operation is pending. Protected by mutex. */
2040 buf_page_t watch[innodb_purge_threads_MAX + 1];
2041 /** Reserve a buffer. */
io_buf_reserve()2042 buf_tmp_buffer_t *io_buf_reserve() { return io_buf.reserve(); }
2043
2044 /** @return whether any I/O is pending */
any_io_pending()2045 bool any_io_pending() const
2046 {
2047 return n_pend_reads || n_flush_LRU() || n_flush_list();
2048 }
2049 /** @return total amount of pending I/O */
io_pending()2050 ulint io_pending() const
2051 {
2052 return n_pend_reads + n_flush_LRU() + n_flush_list();
2053 }
2054
2055 private:
2056 /** Remove a block from the flush list. */
2057 inline void delete_from_flush_list_low(buf_page_t *bpage);
2058 /** Remove a block from flush_list.
2059 @param bpage buffer pool page
2060 @param clear whether to invoke buf_page_t::clear_oldest_modification() */
2061 void delete_from_flush_list(buf_page_t *bpage, bool clear);
2062 public:
2063 /** Remove a block from flush_list.
2064 @param bpage buffer pool page */
delete_from_flush_list(buf_page_t * bpage)2065 void delete_from_flush_list(buf_page_t *bpage)
2066 { delete_from_flush_list(bpage, true); }
2067
2068 /** Insert a modified block into the flush list.
2069 @param block modified block
2070 @param lsn start LSN of the mini-transaction that modified the block */
2071 void insert_into_flush_list(buf_block_t *block, lsn_t lsn);
2072
2073 /** Free a page whose underlying file page has been freed. */
2074 inline void release_freed_page(buf_page_t *bpage);
2075
2076 private:
2077 /** Temporary memory for page_compressed and encrypted I/O */
2078 struct io_buf_t
2079 {
2080 /** number of elements in slots[] */
2081 ulint n_slots;
2082 /** array of slots */
2083 buf_tmp_buffer_t *slots;
2084
createio_buf_t2085 void create(ulint n_slots)
2086 {
2087 this->n_slots= n_slots;
2088 slots= static_cast<buf_tmp_buffer_t*>
2089 (ut_malloc_nokey(n_slots * sizeof *slots));
2090 memset((void*) slots, 0, n_slots * sizeof *slots);
2091 }
2092
closeio_buf_t2093 void close()
2094 {
2095 for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
2096 {
2097 aligned_free(s->crypt_buf);
2098 aligned_free(s->comp_buf);
2099 }
2100 ut_free(slots);
2101 slots= nullptr;
2102 n_slots= 0;
2103 }
2104
2105 /** Reserve a buffer */
reserveio_buf_t2106 buf_tmp_buffer_t *reserve()
2107 {
2108 for (buf_tmp_buffer_t *s= slots, *e= slots + n_slots; s != e; s++)
2109 if (s->acquire())
2110 return s;
2111 return nullptr;
2112 }
2113 } io_buf;
2114
2115 /** whether resize() is in the critical path */
2116 std::atomic<bool> resizing;
2117 };
2118
2119 /** The InnoDB buffer pool */
2120 extern buf_pool_t buf_pool;
2121
read_lock()2122 inline void page_hash_latch::read_lock()
2123 {
2124 mysql_mutex_assert_not_owner(&buf_pool.mutex);
2125 if (!read_trylock())
2126 read_lock_wait();
2127 }
2128
write_lock()2129 inline void page_hash_latch::write_lock()
2130 {
2131 if (!write_trylock())
2132 write_lock_wait();
2133 }
2134
add_buf_fix_count(uint32_t count)2135 inline void buf_page_t::add_buf_fix_count(uint32_t count)
2136 {
2137 mysql_mutex_assert_owner(&buf_pool.mutex);
2138 buf_fix_count_+= count;
2139 }
2140
set_buf_fix_count(uint32_t count)2141 inline void buf_page_t::set_buf_fix_count(uint32_t count)
2142 {
2143 mysql_mutex_assert_owner(&buf_pool.mutex);
2144 buf_fix_count_= count;
2145 }
2146
set_state(buf_page_state state)2147 inline void buf_page_t::set_state(buf_page_state state)
2148 {
2149 mysql_mutex_assert_owner(&buf_pool.mutex);
2150 #ifdef UNIV_DEBUG
2151 switch (state) {
2152 case BUF_BLOCK_REMOVE_HASH:
2153 /* buf_pool_t::corrupted_evict() invokes set_corrupt_id()
2154 before buf_LRU_free_one_page(), so we cannot assert that
2155 we are holding the hash_lock. */
2156 break;
2157 case BUF_BLOCK_MEMORY:
2158 if (!in_file()) break;
2159 /* fall through */
2160 case BUF_BLOCK_FILE_PAGE:
2161 ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked());
2162 break;
2163 case BUF_BLOCK_NOT_USED:
2164 if (!in_file()) break;
2165 /* fall through */
2166 case BUF_BLOCK_ZIP_PAGE:
2167 ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked() ||
2168 (this >= &buf_pool.watch[0] &&
2169 this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)]));
2170 break;
2171 }
2172 #endif
2173 state_= state;
2174 }
2175
set_io_fix(buf_io_fix io_fix)2176 inline void buf_page_t::set_io_fix(buf_io_fix io_fix)
2177 {
2178 mysql_mutex_assert_owner(&buf_pool.mutex);
2179 io_fix_= io_fix;
2180 }
2181
set_corrupt_id()2182 inline void buf_page_t::set_corrupt_id()
2183 {
2184 #ifdef UNIV_DEBUG
2185 switch (oldest_modification()) {
2186 case 0:
2187 break;
2188 case 2:
2189 ut_ad(fsp_is_system_temporary(id().space()));
2190 /* buf_LRU_block_free_non_file_page() asserts !oldest_modification() */
2191 ut_d(oldest_modification_= 0;)
2192 break;
2193 default:
2194 ut_ad("block is dirty" == 0);
2195 }
2196 switch (state()) {
2197 case BUF_BLOCK_REMOVE_HASH:
2198 break;
2199 case BUF_BLOCK_ZIP_PAGE:
2200 case BUF_BLOCK_FILE_PAGE:
2201 ut_ad(buf_pool.hash_lock_get(id_)->is_write_locked());
2202 break;
2203 case BUF_BLOCK_NOT_USED:
2204 case BUF_BLOCK_MEMORY:
2205 ut_ad("invalid state" == 0);
2206 }
2207 #endif
2208 id_= page_id_t(~0ULL);
2209 }
2210
2211 /** Set oldest_modification when adding to buf_pool.flush_list */
set_oldest_modification(lsn_t lsn)2212 inline void buf_page_t::set_oldest_modification(lsn_t lsn)
2213 {
2214 mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
2215 ut_ad(oldest_modification() <= 1);
2216 ut_ad(lsn > 2);
2217 oldest_modification_= lsn;
2218 }
2219
2220 /** Clear oldest_modification after removing from buf_pool.flush_list */
clear_oldest_modification()2221 inline void buf_page_t::clear_oldest_modification()
2222 {
2223 mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
2224 ut_d(const auto state= state_);
2225 ut_ad(state == BUF_BLOCK_FILE_PAGE || state == BUF_BLOCK_ZIP_PAGE ||
2226 state == BUF_BLOCK_REMOVE_HASH);
2227 ut_ad(oldest_modification());
2228 ut_ad(!list.prev);
2229 ut_ad(!list.next);
2230 /* We must use release memory order to guarantee that callers of
2231 oldest_modification_acquire() will observe the block as
2232 being detached from buf_pool.flush_list, after reading the value 0. */
2233 oldest_modification_.store(0, std::memory_order_release);
2234 }
2235
2236 /** Note that a block is no longer dirty, while not removing
2237 it from buf_pool.flush_list */
clear_oldest_modification(bool temporary)2238 inline void buf_page_t::clear_oldest_modification(bool temporary)
2239 {
2240 ut_ad(temporary == fsp_is_system_temporary(id().space()));
2241 if (temporary)
2242 {
2243 ut_ad(oldest_modification() == 2);
2244 oldest_modification_= 0;
2245 }
2246 else
2247 {
2248 /* We use release memory order to guarantee that callers of
2249 oldest_modification_acquire() will observe the block as
2250 being detached from buf_pool.flush_list, after reading the value 0. */
2251 ut_ad(oldest_modification() > 2);
2252 oldest_modification_.store(1, std::memory_order_release);
2253 }
2254 }
2255
2256 /** @return whether the block is modified and ready for flushing */
ready_for_flush()2257 inline bool buf_page_t::ready_for_flush() const
2258 {
2259 mysql_mutex_assert_owner(&buf_pool.mutex);
2260 ut_ad(in_LRU_list);
2261 ut_a(in_file());
2262 ut_ad(fsp_is_system_temporary(id().space())
2263 ? oldest_modification() == 2
2264 : oldest_modification() > 2);
2265 return io_fix_ == BUF_IO_NONE;
2266 }
2267
2268 /** @return whether the block can be relocated in memory.
2269 The block can be dirty, but it must not be I/O-fixed or bufferfixed. */
can_relocate()2270 inline bool buf_page_t::can_relocate() const
2271 {
2272 mysql_mutex_assert_owner(&buf_pool.mutex);
2273 ut_ad(in_file());
2274 ut_ad(in_LRU_list);
2275 return io_fix_ == BUF_IO_NONE && !buf_fix_count_;
2276 }
2277
2278 /** @return whether the block has been flagged old in buf_pool.LRU */
is_old()2279 inline bool buf_page_t::is_old() const
2280 {
2281 mysql_mutex_assert_owner(&buf_pool.mutex);
2282 ut_ad(in_file());
2283 ut_ad(in_LRU_list);
2284 return old;
2285 }
2286
2287 /** Set whether a block is old in buf_pool.LRU */
set_old(bool old)2288 inline void buf_page_t::set_old(bool old)
2289 {
2290 mysql_mutex_assert_owner(&buf_pool.mutex);
2291 ut_ad(in_LRU_list);
2292
2293 #ifdef UNIV_LRU_DEBUG
2294 ut_a((buf_pool.LRU_old_len == 0) == (buf_pool.LRU_old == nullptr));
2295 /* If a block is flagged "old", the LRU_old list must exist. */
2296 ut_a(!old || buf_pool.LRU_old);
2297
2298 if (UT_LIST_GET_PREV(LRU, this) && UT_LIST_GET_NEXT(LRU, this))
2299 {
2300 const buf_page_t *prev= UT_LIST_GET_PREV(LRU, this);
2301 const buf_page_t *next = UT_LIST_GET_NEXT(LRU, this);
2302 if (prev->old == next->old)
2303 ut_a(prev->old == old);
2304 else
2305 {
2306 ut_a(!prev->old);
2307 ut_a(buf_pool.LRU_old == (old ? this : next));
2308 }
2309 }
2310 #endif /* UNIV_LRU_DEBUG */
2311
2312 this->old= old;
2313 }
2314
2315 #ifdef UNIV_DEBUG
2316 /** Forbid the release of the buffer pool mutex. */
2317 # define buf_pool_mutex_exit_forbid() do { \
2318 mysql_mutex_assert_owner(&buf_pool.mutex); \
2319 buf_pool.mutex_exit_forbidden++; \
2320 } while (0)
2321 /** Allow the release of the buffer pool mutex. */
2322 # define buf_pool_mutex_exit_allow() do { \
2323 mysql_mutex_assert_owner(&buf_pool.mutex); \
2324 ut_ad(buf_pool.mutex_exit_forbidden--); \
2325 } while (0)
2326 #else
2327 /** Forbid the release of the buffer pool mutex. */
2328 # define buf_pool_mutex_exit_forbid() ((void) 0)
2329 /** Allow the release of the buffer pool mutex. */
2330 # define buf_pool_mutex_exit_allow() ((void) 0)
2331 #endif
2332
2333 /**********************************************************************
2334 Let us list the consistency conditions for different control block states.
2335
2336 NOT_USED: is in free list, not in LRU list, not in flush list, nor
2337 page hash table
2338 MEMORY: is not in free list, LRU list, or flush list, nor page
2339 hash table
2340 FILE_PAGE: space and offset are defined, is in page hash table
2341 if io_fix == BUF_IO_WRITE,
2342 buf_pool.n_flush_LRU() || buf_pool.n_flush_list()
2343
2344 (1) if buf_fix_count == 0, then
2345 is in LRU list, not in free list
2346 is in flush list,
2347 if and only if oldest_modification > 0
2348 is x-locked,
2349 if and only if io_fix == BUF_IO_READ
2350 is s-locked,
2351 if and only if io_fix == BUF_IO_WRITE
2352
2353 (2) if buf_fix_count > 0, then
2354 is not in LRU list, not in free list
2355 is in flush list,
2356 if and only if oldest_modification > 0
2357 if io_fix == BUF_IO_READ,
2358 is x-locked
2359 if io_fix == BUF_IO_WRITE,
2360 is s-locked
2361
2362 State transitions:
2363
2364 NOT_USED => MEMORY
2365 MEMORY => FILE_PAGE
2366 MEMORY => NOT_USED
2367 FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if
2368 (1) buf_fix_count == 0,
2369 (2) oldest_modification == 0, and
2370 (3) io_fix == 0.
2371 */
2372
2373 /** Select from where to start a scan. If we have scanned
2374 too deep into the LRU list it resets the value to the tail
2375 of the LRU list.
2376 @return buf_page_t from where to start scan. */
start()2377 inline buf_page_t *LRUItr::start()
2378 {
2379 mysql_mutex_assert_owner(m_mutex);
2380
2381 if (!m_hp || m_hp->old)
2382 m_hp= UT_LIST_GET_LAST(buf_pool.LRU);
2383
2384 return m_hp;
2385 }
2386
2387 #ifdef UNIV_DEBUG
2388 /** Functor to validate the LRU list. */
2389 struct CheckInLRUList {
operatorCheckInLRUList2390 void operator()(const buf_page_t* elem) const
2391 {
2392 ut_a(elem->in_LRU_list);
2393 }
2394
validateCheckInLRUList2395 static void validate()
2396 {
2397 ut_list_validate(buf_pool.LRU, CheckInLRUList());
2398 }
2399 };
2400
2401 /** Functor to validate the LRU list. */
2402 struct CheckInFreeList {
operatorCheckInFreeList2403 void operator()(const buf_page_t* elem) const
2404 {
2405 ut_a(elem->in_free_list);
2406 }
2407
validateCheckInFreeList2408 static void validate()
2409 {
2410 ut_list_validate(buf_pool.free, CheckInFreeList());
2411 }
2412 };
2413
2414 struct CheckUnzipLRUAndLRUList {
operatorCheckUnzipLRUAndLRUList2415 void operator()(const buf_block_t* elem) const
2416 {
2417 ut_a(elem->page.in_LRU_list);
2418 ut_a(elem->in_unzip_LRU_list);
2419 }
2420
validateCheckUnzipLRUAndLRUList2421 static void validate()
2422 {
2423 ut_list_validate(buf_pool.unzip_LRU,
2424 CheckUnzipLRUAndLRUList());
2425 }
2426 };
2427 #endif /* UNIV_DEBUG */
2428
2429 #include "buf0buf.inl"
2430
2431 #endif /* !UNIV_INNOCHECKSUM */
2432
2433 #endif
2434