1 /*****************************************************************************
2
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2015, 2021, MariaDB Corporation.
5
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17
18 *****************************************************************************/
19
20 /**************************************************//**
21 @file buf/buf0rea.cc
22 The database buffer read
23
24 Created 11/5/1995 Heikki Tuuri
25 *******************************************************/
26
27 #include "univ.i"
28 #include <mysql/service_thd_wait.h>
29
30 #include "buf0rea.h"
31 #include "fil0fil.h"
32 #include "mtr0mtr.h"
33 #include "buf0buf.h"
34 #include "buf0flu.h"
35 #include "buf0lru.h"
36 #include "buf0buddy.h"
37 #include "buf0dblwr.h"
38 #include "ibuf0ibuf.h"
39 #include "log0recv.h"
40 #include "trx0sys.h"
41 #include "os0file.h"
42 #include "srv0start.h"
43 #include "srv0srv.h"
44
45 /** If there are buf_pool.curr_size per the number below pending reads, then
46 read-ahead is not done: this is to prevent flooding the buffer pool with
47 i/o-fixed buffer blocks */
48 #define BUF_READ_AHEAD_PEND_LIMIT 2
49
50 /** Remove the sentinel block for the watch before replacing it with a
51 real block. watch_unset() or watch_occurred() will notice
52 that the block has been replaced with the real block.
53 @param watch sentinel */
watch_remove(buf_page_t * watch)54 inline void buf_pool_t::watch_remove(buf_page_t *watch)
55 {
56 mysql_mutex_assert_owner(&buf_pool.mutex);
57 ut_ad(hash_lock_get(watch->id())->is_write_locked());
58 ut_a(watch_is_sentinel(*watch));
59 if (watch->buf_fix_count())
60 {
61 ut_ad(watch->in_page_hash);
62 ut_d(watch->in_page_hash= false);
63 HASH_DELETE(buf_page_t, hash, &page_hash, watch->id().fold(), watch);
64 watch->set_buf_fix_count(0);
65 }
66 ut_ad(!watch->in_page_hash);
67 watch->set_state(BUF_BLOCK_NOT_USED);
68 watch->id_= page_id_t(~0ULL);
69 }
70
71 /** Initialize a page for read to the buffer buf_pool. If the page is
72 (1) already in buf_pool, or
73 (2) if we specify to read only ibuf pages and the page is not an ibuf page, or
74 (3) if the space is deleted or being deleted,
75 then this function does nothing.
76 Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock
77 on the buffer frame. The io-handler must take care that the flag is cleared
78 and the lock released later.
79 @param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...
80 @param[in] page_id page id
81 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
82 @param[in] unzip whether the uncompressed page is
83 requested (for ROW_FORMAT=COMPRESSED)
84 @return pointer to the block
85 @retval NULL in case of an error */
buf_page_init_for_read(ulint mode,const page_id_t page_id,ulint zip_size,bool unzip)86 static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
87 ulint zip_size, bool unzip)
88 {
89 mtr_t mtr;
90
91 if (mode == BUF_READ_IBUF_PAGES_ONLY)
92 {
93 /* It is a read-ahead within an ibuf routine */
94 ut_ad(!ibuf_bitmap_page(page_id, zip_size));
95 ibuf_mtr_start(&mtr);
96
97 if (!recv_no_ibuf_operations && !ibuf_page(page_id, zip_size, &mtr))
98 {
99 ibuf_mtr_commit(&mtr);
100 return nullptr;
101 }
102 }
103 else
104 ut_ad(mode == BUF_READ_ANY_PAGE);
105
106 buf_page_t *bpage= nullptr;
107 buf_block_t *block= nullptr;
108 if (!zip_size || unzip || recv_recovery_is_on())
109 {
110 block= buf_LRU_get_free_block(false);
111 block->initialise(page_id, zip_size);
112 /* We set a pass-type x-lock on the frame because then
113 the same thread which called for the read operation
114 (and is running now at this point of code) can wait
115 for the read to complete by waiting for the x-lock on
116 the frame; if the x-lock were recursive, the same
117 thread would illegally get the x-lock before the page
118 read is completed. The x-lock will be released
119 in buf_page_read_complete() by the io-handler thread. */
120 rw_lock_x_lock_gen(&block->lock, BUF_IO_READ);
121 }
122
123 const ulint fold= page_id.fold();
124
125 mysql_mutex_lock(&buf_pool.mutex);
126
127 buf_page_t *hash_page= buf_pool.page_hash_get_low(page_id, fold);
128 if (hash_page && !buf_pool.watch_is_sentinel(*hash_page))
129 {
130 /* The page is already in the buffer pool. */
131 if (block)
132 {
133 rw_lock_x_unlock_gen(&block->lock, BUF_IO_READ);
134 buf_LRU_block_free_non_file_page(block);
135 }
136 goto func_exit;
137 }
138
139 if (UNIV_LIKELY(block != nullptr))
140 {
141 bpage= &block->page;
142
143 /* Insert into the hash table of file pages */
144 page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold);
145 hash_lock->write_lock();
146
147 if (hash_page)
148 {
149 /* Preserve the reference count. */
150 auto buf_fix_count= hash_page->buf_fix_count();
151 ut_a(buf_fix_count > 0);
152 block->page.add_buf_fix_count(buf_fix_count);
153 buf_pool.watch_remove(hash_page);
154 }
155
156 block->page.set_io_fix(BUF_IO_READ);
157 block->page.set_state(BUF_BLOCK_FILE_PAGE);
158 ut_ad(!block->page.in_page_hash);
159 ut_d(block->page.in_page_hash= true);
160 HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, bpage);
161 hash_lock->write_unlock();
162
163 /* The block must be put to the LRU list, to the old blocks */
164 buf_LRU_add_block(bpage, true/* to old blocks */);
165
166 if (UNIV_UNLIKELY(zip_size))
167 {
168 /* buf_pool.mutex may be released and reacquired by
169 buf_buddy_alloc(). We must defer this operation until after the
170 block descriptor has been added to buf_pool.LRU and
171 buf_pool.page_hash. */
172 block->page.zip.data= static_cast<page_zip_t*>
173 (buf_buddy_alloc(zip_size));
174
175 /* To maintain the invariant
176 block->in_unzip_LRU_list == block->page.belongs_to_unzip_LRU()
177 we have to add this block to unzip_LRU
178 after block->page.zip.data is set. */
179 ut_ad(block->page.belongs_to_unzip_LRU());
180 buf_unzip_LRU_add_block(block, TRUE);
181 }
182 }
183 else
184 {
185 /* The compressed page must be allocated before the
186 control block (bpage), in order to avoid the
187 invocation of buf_buddy_relocate_block() on
188 uninitialized data. */
189 bool lru= false;
190 void *data= buf_buddy_alloc(zip_size, &lru);
191
192 /* If buf_buddy_alloc() allocated storage from the LRU list,
193 it released and reacquired buf_pool.mutex. Thus, we must
194 check the page_hash again, as it may have been modified. */
195 if (UNIV_UNLIKELY(lru))
196 {
197 hash_page= buf_pool.page_hash_get_low(page_id, fold);
198
199 if (UNIV_UNLIKELY(hash_page && !buf_pool.watch_is_sentinel(*hash_page)))
200 {
201 /* The block was added by some other thread. */
202 buf_buddy_free(data, zip_size);
203 goto func_exit;
204 }
205 }
206
207 bpage= buf_page_alloc_descriptor();
208
209 page_zip_des_init(&bpage->zip);
210 page_zip_set_size(&bpage->zip, zip_size);
211 bpage->zip.data = (page_zip_t*) data;
212
213 bpage->init(BUF_BLOCK_ZIP_PAGE, page_id);
214
215 page_hash_latch *hash_lock= buf_pool.page_hash.lock_get(fold);
216 hash_lock->write_lock();
217
218 if (hash_page)
219 {
220 /* Preserve the reference count. It can be 0 if
221 buf_pool_t::watch_unset() is executing concurrently,
222 waiting for buf_pool.mutex, which we are holding. */
223 bpage->add_buf_fix_count(hash_page->buf_fix_count());
224 buf_pool.watch_remove(hash_page);
225 }
226
227 ut_ad(!bpage->in_page_hash);
228 ut_d(bpage->in_page_hash= true);
229 HASH_INSERT(buf_page_t, hash, &buf_pool.page_hash, fold, bpage);
230 bpage->set_io_fix(BUF_IO_READ);
231 hash_lock->write_unlock();
232
233 /* The block must be put to the LRU list, to the old blocks.
234 The zip size is already set into the page zip */
235 buf_LRU_add_block(bpage, true/* to old blocks */);
236 }
237
238 mysql_mutex_unlock(&buf_pool.mutex);
239 buf_pool.n_pend_reads++;
240 goto func_exit_no_mutex;
241 func_exit:
242 mysql_mutex_unlock(&buf_pool.mutex);
243 func_exit_no_mutex:
244 if (mode == BUF_READ_IBUF_PAGES_ONLY)
245 ibuf_mtr_commit(&mtr);
246
247 ut_ad(!bpage || bpage->in_file());
248
249 return bpage;
250 }
251
252 /** Low-level function which reads a page asynchronously from a file to the
253 buffer buf_pool if it is not already there, in which case does nothing.
254 Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
255 flag is cleared and the x-lock released by an i/o-handler thread.
256
257 @param[out] err DB_SUCCESS or DB_TABLESPACE_DELETED
258 if we are trying
259 to read from a non-existent tablespace
260 @param[in,out] space tablespace
261 @param[in] sync true if synchronous aio is desired
262 @param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...,
263 @param[in] page_id page id
264 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
265 @param[in] unzip true=request uncompressed page
266 @return whether a read request was queued */
267 static
268 bool
buf_read_page_low(dberr_t * err,fil_space_t * space,bool sync,ulint mode,const page_id_t page_id,ulint zip_size,bool unzip)269 buf_read_page_low(
270 dberr_t* err,
271 fil_space_t* space,
272 bool sync,
273 ulint mode,
274 const page_id_t page_id,
275 ulint zip_size,
276 bool unzip)
277 {
278 buf_page_t* bpage;
279
280 *err = DB_SUCCESS;
281
282 if (buf_dblwr.is_inside(page_id)) {
283 ib::error() << "Trying to read doublewrite buffer page "
284 << page_id;
285 ut_ad(0);
286 nothing_read:
287 space->release();
288 return false;
289 }
290
291 if (sync) {
292 } else if (trx_sys_hdr_page(page_id)
293 || ibuf_bitmap_page(page_id, zip_size)
294 || (!recv_no_ibuf_operations
295 && ibuf_page(page_id, zip_size, nullptr))) {
296
297 /* Trx sys header is so low in the latching order that we play
298 safe and do not leave the i/o-completion to an asynchronous
299 i/o-thread. Change buffer pages must always be read with
300 syncronous i/o, to make sure they do not get involved in
301 thread deadlocks. */
302 sync = true;
303 }
304
305 /* The following call will also check if the tablespace does not exist
306 or is being dropped; if we succeed in initing the page in the buffer
307 pool for read, then DISCARD cannot proceed until the read has
308 completed */
309 bpage = buf_page_init_for_read(mode, page_id, zip_size, unzip);
310
311 if (bpage == NULL) {
312 goto nothing_read;
313 }
314
315 ut_ad(bpage->in_file());
316
317 if (sync) {
318 thd_wait_begin(nullptr, THD_WAIT_DISKIO);
319 }
320
321 DBUG_LOG("ib_buf",
322 "read page " << page_id << " zip_size=" << zip_size
323 << " unzip=" << unzip << ',' << (sync ? "sync" : "async"));
324
325 void* dst;
326
327 if (zip_size) {
328 dst = bpage->zip.data;
329 } else {
330 ut_a(bpage->state() == BUF_BLOCK_FILE_PAGE);
331
332 dst = ((buf_block_t*) bpage)->frame;
333 }
334
335 const ulint len = zip_size ? zip_size : srv_page_size;
336
337 auto fio = space->io(IORequest(sync
338 ? IORequest::READ_SYNC
339 : IORequest::READ_ASYNC),
340 page_id.page_no() * len, len, dst, bpage);
341 *err= fio.err;
342
343 if (UNIV_UNLIKELY(fio.err != DB_SUCCESS)) {
344 if (!sync || fio.err == DB_TABLESPACE_DELETED
345 || fio.err == DB_IO_ERROR) {
346 buf_pool.corrupted_evict(bpage);
347 return false;
348 }
349
350 ut_error;
351 }
352
353 if (sync) {
354 thd_wait_end(NULL);
355
356 /* The i/o was already completed in space->io() */
357 *err = buf_page_read_complete(bpage, *fio.node);
358 space->release();
359
360 if (*err != DB_SUCCESS) {
361 return false;
362 }
363 }
364
365 return true;
366 }
367
368 /** Applies a random read-ahead in buf_pool if there are at least a threshold
369 value of accessed pages from the random read-ahead area. Does not read any
370 page, not even the one at the position (space, offset), if the read-ahead
371 mechanism is not activated. NOTE 1: the calling thread may own latches on
372 pages: to avoid deadlocks this function must be written such that it cannot
373 end up waiting for these latches! NOTE 2: the calling thread must want
374 access to the page given: this rule is set to prevent unintended read-aheads
375 performed by ibuf routines, a situation which could result in a deadlock if
376 the OS does not support asynchronous i/o.
377 @param[in] page_id page id of a page which the current thread
378 wants to access
379 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
380 @param[in] ibuf whether we are inside ibuf routine
381 @return number of page read requests issued; NOTE that if we read ibuf
382 pages, it may happen that the page at the given page number does not
383 get read even if we return a positive value! */
384 ulint
buf_read_ahead_random(const page_id_t page_id,ulint zip_size,bool ibuf)385 buf_read_ahead_random(const page_id_t page_id, ulint zip_size, bool ibuf)
386 {
387 if (!srv_random_read_ahead)
388 return 0;
389
390 if (srv_startup_is_before_trx_rollback_phase)
391 /* No read-ahead to avoid thread deadlocks */
392 return 0;
393
394 if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id))
395 /* If it is an ibuf bitmap page or trx sys hdr, we do no
396 read-ahead, as that could break the ibuf page access order */
397 return 0;
398
399 if (buf_pool.n_pend_reads > buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT)
400 return 0;
401
402 fil_space_t* space= fil_space_t::get(page_id.space());
403 if (!space)
404 return 0;
405
406 const uint32_t buf_read_ahead_area= buf_pool.read_ahead_area;
407 ulint count= 5 + buf_read_ahead_area / 8;
408 const page_id_t low= page_id - (page_id.page_no() % buf_read_ahead_area);
409 page_id_t high= low + buf_read_ahead_area;
410 high.set_page_no(std::min(high.page_no(), space->last_page_number()));
411
412 /* Count how many blocks in the area have been recently accessed,
413 that is, reside near the start of the LRU list. */
414
415 for (page_id_t i= low; i < high; ++i)
416 {
417 const ulint fold= i.fold();
418 page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold);
419 const buf_page_t *bpage= buf_pool.page_hash_get_low(i, fold);
420 bool found= bpage && bpage->is_accessed() && buf_page_peek_if_young(bpage);
421 hash_lock->read_unlock();
422 if (found && !--count)
423 goto read_ahead;
424 }
425
426 no_read_ahead:
427 space->release();
428 return 0;
429
430 read_ahead:
431 if (space->is_stopping())
432 goto no_read_ahead;
433
434 /* Read all the suitable blocks within the area */
435 const ulint ibuf_mode= ibuf ? BUF_READ_IBUF_PAGES_ONLY : BUF_READ_ANY_PAGE;
436
437 for (page_id_t i= low; i < high; ++i)
438 {
439 if (ibuf_bitmap_page(i, zip_size))
440 continue;
441 if (space->is_stopping())
442 break;
443 dberr_t err;
444 space->reacquire();
445 if (buf_read_page_low(&err, space, false, ibuf_mode, i, zip_size, false))
446 count++;
447 }
448
449 if (count)
450 DBUG_PRINT("ib_buf", ("random read-ahead %zu pages from %s: %u",
451 count, space->chain.start->name,
452 low.page_no()));
453 space->release();
454
455 /* Read ahead is considered one I/O operation for the purpose of
456 LRU policy decision. */
457 buf_LRU_stat_inc_io();
458
459 buf_pool.stat.n_ra_pages_read_rnd+= count;
460 srv_stats.buf_pool_reads.add(count);
461 return count;
462 }
463
464 /** High-level function which reads a page from a file to buf_pool
465 if it is not already there. Sets the io_fix and an exclusive lock
466 on the buffer frame. The flag is cleared and the x-lock
467 released by the i/o-handler thread.
468 @param[in] page_id page id
469 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
470 @retval DB_SUCCESS if the page was read and is not corrupted,
471 @retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
472 @retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
473 after decryption normal page checksum does not match.
474 @retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
buf_read_page(const page_id_t page_id,ulint zip_size)475 dberr_t buf_read_page(const page_id_t page_id, ulint zip_size)
476 {
477 fil_space_t *space= fil_space_t::get(page_id.space());
478 if (!space)
479 {
480 ib::info() << "trying to read page " << page_id
481 << " in nonexisting or being-dropped tablespace";
482 return DB_TABLESPACE_DELETED;
483 }
484
485 dberr_t err;
486 if (buf_read_page_low(&err, space, true, BUF_READ_ANY_PAGE,
487 page_id, zip_size, false))
488 srv_stats.buf_pool_reads.add(1);
489
490 buf_LRU_stat_inc_io();
491 return err;
492 }
493
494 /** High-level function which reads a page asynchronously from a file to the
495 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
496 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
497 released by the i/o-handler thread.
498 @param[in,out] space tablespace
499 @param[in] page_id page id
500 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 */
buf_read_page_background(fil_space_t * space,const page_id_t page_id,ulint zip_size)501 void buf_read_page_background(fil_space_t *space, const page_id_t page_id,
502 ulint zip_size)
503 {
504 dberr_t err;
505
506 if (buf_read_page_low(&err, space, false, BUF_READ_ANY_PAGE,
507 page_id, zip_size, false)) {
508 srv_stats.buf_pool_reads.add(1);
509 }
510
511 switch (err) {
512 case DB_SUCCESS:
513 case DB_ERROR:
514 break;
515 case DB_TABLESPACE_DELETED:
516 ib::info() << "trying to read page " << page_id
517 << " in the background"
518 " in a non-existing or being-dropped tablespace";
519 break;
520 case DB_PAGE_CORRUPTED:
521 case DB_DECRYPTION_FAILED:
522 ib::error()
523 << "Background Page read failed to "
524 "read or decrypt " << page_id;
525 break;
526 default:
527 ib::fatal() << "Error " << err << " in background read of "
528 << page_id;
529 }
530
531 /* We do not increment number of I/O operations used for LRU policy
532 here (buf_LRU_stat_inc_io()). We use this in heuristics to decide
533 about evicting uncompressed version of compressed pages from the
534 buffer pool. Since this function is called from buffer pool load
535 these IOs are deliberate and are not part of normal workload we can
536 ignore these in our heuristics. */
537 }
538
539 /** Applies linear read-ahead if in the buf_pool the page is a border page of
540 a linear read-ahead area and all the pages in the area have been accessed.
541 Does not read any page if the read-ahead mechanism is not activated. Note
542 that the algorithm looks at the 'natural' adjacent successor and
543 predecessor of the page, which on the leaf level of a B-tree are the next
544 and previous page in the chain of leaves. To know these, the page specified
545 in (space, offset) must already be present in the buf_pool. Thus, the
546 natural way to use this function is to call it when a page in the buf_pool
547 is accessed the first time, calling this function just after it has been
548 bufferfixed.
549 NOTE 1: as this function looks at the natural predecessor and successor
550 fields on the page, what happens, if these are not initialized to any
551 sensible value? No problem, before applying read-ahead we check that the
552 area to read is within the span of the space, if not, read-ahead is not
553 applied. An uninitialized value may result in a useless read operation, but
554 only very improbably.
555 NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
556 function must be written such that it cannot end up waiting for these
557 latches!
558 NOTE 3: the calling thread must want access to the page given: this rule is
559 set to prevent unintended read-aheads performed by ibuf routines, a situation
560 which could result in a deadlock if the OS does not support asynchronous io.
561 @param[in] page_id page id; see NOTE 3 above
562 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
563 @param[in] ibuf whether if we are inside ibuf routine
564 @return number of page read requests issued */
565 ulint
buf_read_ahead_linear(const page_id_t page_id,ulint zip_size,bool ibuf)566 buf_read_ahead_linear(const page_id_t page_id, ulint zip_size, bool ibuf)
567 {
568 /* check if readahead is disabled */
569 if (!srv_read_ahead_threshold)
570 return 0;
571
572 if (srv_startup_is_before_trx_rollback_phase)
573 /* No read-ahead to avoid thread deadlocks */
574 return 0;
575
576 if (buf_pool.n_pend_reads > buf_pool.curr_size / BUF_READ_AHEAD_PEND_LIMIT)
577 return 0;
578
579 const uint32_t buf_read_ahead_area= buf_pool.read_ahead_area;
580 const page_id_t low= page_id - (page_id.page_no() % buf_read_ahead_area);
581 const page_id_t high_1= low + (buf_read_ahead_area - 1);
582
583 /* We will check that almost all pages in the area have been accessed
584 in the desired order. */
585 const bool descending= page_id == low;
586
587 if (!descending && page_id != high_1)
588 /* This is not a border page of the area */
589 return 0;
590
591 if (ibuf_bitmap_page(page_id, zip_size) || trx_sys_hdr_page(page_id))
592 /* If it is an ibuf bitmap page or trx sys hdr, we do no
593 read-ahead, as that could break the ibuf page access order */
594 return 0;
595
596 fil_space_t *space= fil_space_t::get(page_id.space());
597 if (!space)
598 return 0;
599
600 if (high_1.page_no() > space->last_page_number())
601 {
602 /* The area is not whole. */
603 fail:
604 space->release();
605 return 0;
606 }
607
608 /* How many out of order accessed pages can we ignore
609 when working out the access pattern for linear readahead */
610 ulint count= std::min<ulint>(buf_pool_t::READ_AHEAD_PAGES -
611 srv_read_ahead_threshold,
612 uint32_t{buf_pool.read_ahead_area});
613 page_id_t new_low= low, new_high_1= high_1;
614 unsigned prev_accessed= 0;
615 for (page_id_t i= low; i != high_1; ++i)
616 {
617 const ulint fold= i.fold();
618 page_hash_latch *hash_lock= buf_pool.page_hash.lock<false>(fold);
619 const buf_page_t* bpage= buf_pool.page_hash_get_low(i, fold);
620 if (i == page_id)
621 {
622 /* Read the natural predecessor and successor page addresses from
623 the page; NOTE that because the calling thread may have an x-latch
624 on the page, we do not acquire an s-latch on the page, this is to
625 prevent deadlocks. The hash_lock is only protecting the
626 buf_pool.page_hash for page i, not the bpage contents itself. */
627 if (!bpage)
628 {
629 hard_fail:
630 hash_lock->read_unlock();
631 goto fail;
632 }
633 const byte *f;
634 switch (UNIV_EXPECT(bpage->state(), BUF_BLOCK_FILE_PAGE)) {
635 case BUF_BLOCK_FILE_PAGE:
636 f= reinterpret_cast<const buf_block_t*>(bpage)->frame;
637 break;
638 case BUF_BLOCK_ZIP_PAGE:
639 f= bpage->zip.data;
640 break;
641 default:
642 goto hard_fail;
643 }
644
645 uint32_t prev= mach_read_from_4(my_assume_aligned<4>(f + FIL_PAGE_PREV));
646 uint32_t next= mach_read_from_4(my_assume_aligned<4>(f + FIL_PAGE_NEXT));
647 if (prev == FIL_NULL || next == FIL_NULL)
648 goto hard_fail;
649 page_id_t id= page_id;
650 if (descending && next - 1 == page_id.page_no())
651 id.set_page_no(prev);
652 else if (!descending && prev + 1 == page_id.page_no())
653 id.set_page_no(next);
654 else
655 goto hard_fail; /* Successor or predecessor not in the right order */
656
657 new_low= id - (id.page_no() % buf_read_ahead_area);
658 new_high_1= new_low + (buf_read_ahead_area - 1);
659
660 if (id != new_low && id != new_high_1)
661 /* This is not a border page of the area: return */
662 goto hard_fail;
663 if (new_high_1.page_no() > space->last_page_number())
664 /* The area is not whole */
665 goto hard_fail;
666 }
667 else if (!bpage)
668 {
669 failed:
670 hash_lock->read_unlock();
671 if (--count)
672 continue;
673 goto fail;
674 }
675
676 const unsigned accessed= bpage->is_accessed();
677 if (!accessed)
678 goto failed;
679 /* Note that buf_page_t::is_accessed() returns the time of the
680 first access. If some blocks of the extent existed in the buffer
681 pool at the time of a linear access pattern, the first access
682 times may be nonmonotonic, even though the latest access times
683 were linear. The threshold (srv_read_ahead_factor) should help a
684 little against this. */
685 bool fail= prev_accessed &&
686 (descending ? prev_accessed > accessed : prev_accessed < accessed);
687 prev_accessed= accessed;
688 if (fail)
689 goto failed;
690 hash_lock->read_unlock();
691 }
692
693 /* If we got this far, read-ahead can be sensible: do it */
694 count= 0;
695 for (ulint ibuf_mode= ibuf ? BUF_READ_IBUF_PAGES_ONLY : BUF_READ_ANY_PAGE;
696 new_low != new_high_1; ++new_low)
697 {
698 if (ibuf_bitmap_page(new_low, zip_size))
699 continue;
700 if (space->is_stopping())
701 break;
702 dberr_t err;
703 space->reacquire();
704 count+= buf_read_page_low(&err, space, false, ibuf_mode, new_low, zip_size,
705 false);
706 }
707
708 if (count)
709 DBUG_PRINT("ib_buf", ("random read-ahead %zu pages from %s: %u",
710 count, space->chain.start->name,
711 new_low.page_no()));
712 space->release();
713
714 /* Read ahead is considered one I/O operation for the purpose of
715 LRU policy decision. */
716 buf_LRU_stat_inc_io();
717
718 buf_pool.stat.n_ra_pages_read+= count;
719 return count;
720 }
721
722 /** Issues read requests for pages which recovery wants to read in.
723 @param[in] space_id tablespace id
724 @param[in] page_nos array of page numbers to read, with the
725 highest page number the last in the array
726 @param[in] n number of page numbers in the array */
buf_read_recv_pages(ulint space_id,const uint32_t * page_nos,ulint n)727 void buf_read_recv_pages(ulint space_id, const uint32_t* page_nos, ulint n)
728 {
729 fil_space_t* space = fil_space_t::get(space_id);
730
731 if (!space) {
732 /* The tablespace is missing or unreadable: do nothing */
733 return;
734 }
735
736 const ulint zip_size = space->zip_size();
737
738 for (ulint i = 0; i < n; i++) {
739
740 /* Ignore if the page already present in freed ranges. */
741 if (space->freed_ranges.contains(page_nos[i])) {
742 continue;
743 }
744
745 const page_id_t cur_page_id(space_id, page_nos[i]);
746
747 ulint limit = 0;
748 for (ulint j = 0; j < buf_pool.n_chunks; j++) {
749 limit += buf_pool.chunks[j].size / 2;
750 }
751
752 for (ulint count = 0; buf_pool.n_pend_reads >= limit; ) {
753 os_thread_sleep(10000);
754
755 if (!(++count % 1000)) {
756
757 ib::error()
758 << "Waited for " << count / 100
759 << " seconds for "
760 << buf_pool.n_pend_reads
761 << " pending reads";
762 }
763 }
764
765 dberr_t err;
766 space->reacquire();
767 buf_read_page_low(&err, space, false,
768 BUF_READ_ANY_PAGE, cur_page_id, zip_size,
769 true);
770
771 if (err == DB_DECRYPTION_FAILED || err == DB_PAGE_CORRUPTED) {
772 ib::error() << "Recovery failed to read or decrypt "
773 << cur_page_id;
774 }
775 }
776
777
778 DBUG_PRINT("ib_buf", ("recovery read (%u pages) for %s", n,
779 space->chain.start->name));
780 space->release();
781 }
782