1 /*****************************************************************************
2
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2015, 2021, MariaDB Corporation.
5
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free Software
8 Foundation; version 2 of the License.
9
10 This program is distributed in the hope that it will be useful, but WITHOUT
11 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License along with
15 this program; if not, write to the Free Software Foundation, Inc.,
16 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
17
18 *****************************************************************************/
19
20 /**************************************************//**
21 @file buf/buf0rea.cc
22 The database buffer read
23
24 Created 11/5/1995 Heikki Tuuri
25 *******************************************************/
26
27 #include "univ.i"
28 #include <mysql/service_thd_wait.h>
29
30 #include "buf0rea.h"
31 #include "fil0fil.h"
32 #include "mtr0mtr.h"
33 #include "buf0buf.h"
34 #include "buf0flu.h"
35 #include "buf0lru.h"
36 #include "buf0dblwr.h"
37 #include "ibuf0ibuf.h"
38 #include "log0recv.h"
39 #include "trx0sys.h"
40 #include "os0file.h"
41 #include "srv0start.h"
42 #include "srv0srv.h"
43
44 /** There must be at least this many pages in buf_pool in the area to start
45 a random read-ahead */
46 #define BUF_READ_AHEAD_RANDOM_THRESHOLD(b) \
47 (5 + BUF_READ_AHEAD_AREA(b) / 8)
48
49 /** If there are buf_pool->curr_size per the number below pending reads, then
50 read-ahead is not done: this is to prevent flooding the buffer pool with
51 i/o-fixed buffer blocks */
52 #define BUF_READ_AHEAD_PEND_LIMIT 2
53
54 /********************************************************************//**
55 Unfixes the pages, unlatches the page,
56 removes it from page_hash and removes it from LRU. */
57 static
58 void
buf_read_page_handle_error(buf_page_t * bpage)59 buf_read_page_handle_error(
60 /*=======================*/
61 buf_page_t* bpage) /*!< in: pointer to the block */
62 {
63 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
64 const bool uncompressed = (buf_page_get_state(bpage)
65 == BUF_BLOCK_FILE_PAGE);
66 const page_id_t old_page_id = bpage->id;
67
68 /* First unfix and release lock on the bpage */
69 buf_pool_mutex_enter(buf_pool);
70 mutex_enter(buf_page_get_mutex(bpage));
71 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
72
73 bpage->id.set_corrupt_id();
74 /* Set BUF_IO_NONE before we remove the block from LRU list */
75 buf_page_set_io_fix(bpage, BUF_IO_NONE);
76
77 if (uncompressed) {
78 rw_lock_x_unlock_gen(
79 &((buf_block_t*) bpage)->lock,
80 BUF_IO_READ);
81 }
82
83 mutex_exit(buf_page_get_mutex(bpage));
84
85 /* remove the block from LRU list */
86 buf_LRU_free_one_page(bpage, old_page_id);
87
88 ut_ad(buf_pool->n_pend_reads > 0);
89 buf_pool->n_pend_reads--;
90
91 buf_pool_mutex_exit(buf_pool);
92 }
93
94 /** Low-level function which reads a page asynchronously from a file to the
95 buffer buf_pool if it is not already there, in which case does nothing.
96 Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
97 flag is cleared and the x-lock released by an i/o-handler thread.
98
99 @param[out] err DB_SUCCESS, DB_TABLESPACE_DELETED or
100 DB_TABLESPACE_TRUNCATED if we are trying
101 to read from a non-existent tablespace, a
102 tablespace which is just now being dropped,
103 or a tablespace which is truncated
104 @param[in] sync true if synchronous aio is desired
105 @param[in] type IO type, SIMULATED, IGNORE_MISSING
106 @param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...,
107 @param[in] page_id page id
108 @param[in] unzip true=request uncompressed page
109 @param[in] ignore_missing_space true=ignore missing space when reading
110 @return 1 if a read request was queued, 0 if the page already resided
111 in buf_pool, or if the page is in the doublewrite buffer blocks in
112 which case it is never read into the pool, or if the tablespace does
113 not exist or is being dropped */
114 static
115 ulint
buf_read_page_low(dberr_t * err,bool sync,ulint type,ulint mode,const page_id_t page_id,const page_size_t & page_size,bool unzip,bool ignore_missing_space=false)116 buf_read_page_low(
117 dberr_t* err,
118 bool sync,
119 ulint type,
120 ulint mode,
121 const page_id_t page_id,
122 const page_size_t& page_size,
123 bool unzip,
124 bool ignore_missing_space = false)
125 {
126 buf_page_t* bpage;
127
128 *err = DB_SUCCESS;
129
130 if (page_id.space() == TRX_SYS_SPACE
131 && buf_dblwr_page_inside(page_id.page_no())) {
132
133 ib::error() << "Trying to read doublewrite buffer page "
134 << page_id;
135 return(0);
136 }
137
138 if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
139
140 /* Trx sys header is so low in the latching order that we play
141 safe and do not leave the i/o-completion to an asynchronous
142 i/o-thread. Ibuf bitmap pages must always be read with
143 syncronous i/o, to make sure they do not get involved in
144 thread deadlocks. */
145
146 sync = true;
147 }
148
149 /* The following call will also check if the tablespace does not exist
150 or is being dropped; if we succeed in initing the page in the buffer
151 pool for read, then DISCARD cannot proceed until the read has
152 completed */
153 bpage = buf_page_init_for_read(err, mode, page_id, page_size, unzip);
154
155 if (bpage == NULL) {
156
157 return(0);
158 }
159
160 DBUG_LOG("ib_buf",
161 "read page " << page_id << " size=" << page_size.physical()
162 << " unzip=" << unzip << ',' << (sync ? "sync" : "async"));
163
164 ut_ad(buf_page_in_file(bpage));
165
166 if (sync) {
167 thd_wait_begin(NULL, THD_WAIT_DISKIO);
168 }
169
170 void* dst;
171
172 if (page_size.is_compressed()) {
173 dst = bpage->zip.data;
174 } else {
175 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
176
177 dst = ((buf_block_t*) bpage)->frame;
178 }
179
180 IORequest request(type | IORequest::READ);
181
182 *err = fil_io(
183 request, sync, page_id, page_size, 0, page_size.physical(),
184 dst, bpage, ignore_missing_space);
185
186 if (sync) {
187 thd_wait_end(NULL);
188 }
189
190 if (UNIV_UNLIKELY(*err != DB_SUCCESS)) {
191 if (*err == DB_TABLESPACE_TRUNCATED) {
192 /* Remove the page which is outside the
193 truncated tablespace bounds when recovering
194 from a crash happened during a truncation */
195 buf_read_page_handle_error(bpage);
196 if (recv_recovery_is_on()) {
197 mutex_enter(&recv_sys->mutex);
198 ut_ad(recv_sys->n_addrs > 0);
199 recv_sys->n_addrs--;
200 mutex_exit(&recv_sys->mutex);
201 }
202 return(0);
203 } else if (IORequest::ignore_missing(type)
204 || *err == DB_TABLESPACE_DELETED
205 || *err == DB_IO_ERROR) {
206 buf_read_page_handle_error(bpage);
207 return(0);
208 }
209
210 ut_error;
211 }
212
213 if (sync) {
214 /* The i/o is already completed when we arrive from
215 fil_read */
216 *err = buf_page_io_complete(bpage);
217
218 if (*err != DB_SUCCESS) {
219 return(0);
220 }
221 }
222
223 return(1);
224 }
225
226 /** Applies a random read-ahead in buf_pool if there are at least a threshold
227 value of accessed pages from the random read-ahead area. Does not read any
228 page, not even the one at the position (space, offset), if the read-ahead
229 mechanism is not activated. NOTE 1: the calling thread may own latches on
230 pages: to avoid deadlocks this function must be written such that it cannot
231 end up waiting for these latches! NOTE 2: the calling thread must want
232 access to the page given: this rule is set to prevent unintended read-aheads
233 performed by ibuf routines, a situation which could result in a deadlock if
234 the OS does not support asynchronous i/o.
235 @param[in] page_id page id of a page which the current thread
236 wants to access
237 @param[in] page_size page size
238 @param[in] inside_ibuf TRUE if we are inside ibuf routine
239 @return number of page read requests issued; NOTE that if we read ibuf
240 pages, it may happen that the page at the given page number does not
241 get read even if we return a positive value! */
242 ulint
buf_read_ahead_random(const page_id_t page_id,const page_size_t & page_size,ibool inside_ibuf)243 buf_read_ahead_random(
244 const page_id_t page_id,
245 const page_size_t& page_size,
246 ibool inside_ibuf)
247 {
248 buf_pool_t* buf_pool = buf_pool_get(page_id);
249 ulint recent_blocks = 0;
250 ulint ibuf_mode;
251 ulint count;
252 ulint low, high;
253 dberr_t err = DB_SUCCESS;
254 ulint i;
255 const ulint buf_read_ahead_random_area
256 = BUF_READ_AHEAD_AREA(buf_pool);
257
258 if (!srv_random_read_ahead) {
259 /* Disabled by user */
260 return(0);
261 }
262
263 if (srv_startup_is_before_trx_rollback_phase) {
264 /* No read-ahead to avoid thread deadlocks */
265 return(0);
266 }
267
268 if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
269
270 /* If it is an ibuf bitmap page or trx sys hdr, we do
271 no read-ahead, as that could break the ibuf page access
272 order */
273
274 return(0);
275 }
276
277 low = (page_id.page_no() / buf_read_ahead_random_area)
278 * buf_read_ahead_random_area;
279
280 high = (page_id.page_no() / buf_read_ahead_random_area + 1)
281 * buf_read_ahead_random_area;
282
283 if (fil_space_t* space = fil_space_acquire(page_id.space())) {
284 high = space->max_page_number_for_io(high);
285 space->release();
286 } else {
287 return(0);
288 }
289
290 buf_pool_mutex_enter(buf_pool);
291
292 if (buf_pool->n_pend_reads
293 > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
294 buf_pool_mutex_exit(buf_pool);
295
296 return(0);
297 }
298
299 /* Count how many blocks in the area have been recently accessed,
300 that is, reside near the start of the LRU list. */
301
302 for (i = low; i < high; i++) {
303 const buf_page_t* bpage = buf_page_hash_get(
304 buf_pool, page_id_t(page_id.space(), i));
305
306 if (bpage != NULL
307 && buf_page_is_accessed(bpage)
308 && buf_page_peek_if_young(bpage)) {
309
310 recent_blocks++;
311
312 if (recent_blocks
313 >= BUF_READ_AHEAD_RANDOM_THRESHOLD(buf_pool)) {
314
315 buf_pool_mutex_exit(buf_pool);
316 goto read_ahead;
317 }
318 }
319 }
320
321 buf_pool_mutex_exit(buf_pool);
322 /* Do nothing */
323 return(0);
324
325 read_ahead:
326 /* Read all the suitable blocks within the area */
327
328 if (inside_ibuf) {
329 ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
330 } else {
331 ibuf_mode = BUF_READ_ANY_PAGE;
332 }
333
334 count = 0;
335
336 for (i = low; i < high; i++) {
337 /* It is only sensible to do read-ahead in the non-sync aio
338 mode: hence FALSE as the first parameter */
339
340 const page_id_t cur_page_id(page_id.space(), i);
341
342 if (!ibuf_bitmap_page(cur_page_id, page_size)) {
343 count += buf_read_page_low(
344 &err, false,
345 IORequest::DO_NOT_WAKE,
346 ibuf_mode,
347 cur_page_id, page_size, false);
348
349 switch (err) {
350 case DB_SUCCESS:
351 case DB_TABLESPACE_TRUNCATED:
352 case DB_ERROR:
353 break;
354 case DB_TABLESPACE_DELETED:
355 ib::info() << "Random readahead trying to"
356 " access page " << cur_page_id
357 << " in nonexisting or"
358 " being-dropped tablespace";
359 break;
360 default:
361 ut_error;
362 }
363 }
364 }
365
366 /* In simulated aio we wake the aio handler threads only after
367 queuing all aio requests, in native aio the following call does
368 nothing: */
369
370 os_aio_simulated_wake_handler_threads();
371
372 if (count) {
373 DBUG_PRINT("ib_buf", ("random read-ahead %u pages, %u:%u",
374 (unsigned) count,
375 (unsigned) page_id.space(),
376 (unsigned) page_id.page_no()));
377 }
378
379 /* Read ahead is considered one I/O operation for the purpose of
380 LRU policy decision. */
381 buf_LRU_stat_inc_io();
382
383 buf_pool->stat.n_ra_pages_read_rnd += count;
384 srv_stats.buf_pool_reads.add(count);
385 return(count);
386 }
387
388 /** High-level function which reads a page asynchronously from a file to the
389 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
390 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
391 released by the i/o-handler thread.
392 @param[in] page_id page id
393 @param[in] page_size page size
394 @retval DB_SUCCESS if the page was read and is not corrupted,
395 @retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
396 @retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
397 after decryption normal page checksum does not match.
398 @retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
399 dberr_t
buf_read_page(const page_id_t page_id,const page_size_t & page_size)400 buf_read_page(
401 const page_id_t page_id,
402 const page_size_t& page_size)
403 {
404 ulint count;
405 dberr_t err = DB_SUCCESS;
406
407 /* We do synchronous IO because our AIO completion code
408 is sub-optimal. See buf_page_io_complete(), we have to
409 acquire the buffer pool mutex before acquiring the block
410 mutex, required for updating the page state. The acquire
411 of the buffer pool mutex becomes an expensive bottleneck. */
412
413 count = buf_read_page_low(
414 &err, true,
415 0, BUF_READ_ANY_PAGE, page_id, page_size, false);
416
417 srv_stats.buf_pool_reads.add(count);
418
419 if (err == DB_TABLESPACE_DELETED) {
420 ib::info() << "trying to read page " << page_id
421 << " in nonexisting or being-dropped tablespace";
422 }
423
424 /* Increment number of I/O operations used for LRU policy. */
425 buf_LRU_stat_inc_io();
426
427 return(err);
428 }
429
430 /** High-level function which reads a page asynchronously from a file to the
431 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
432 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
433 released by the i/o-handler thread.
434 @param[in] page_id page id
435 @param[in] page_size page size
436 @param[in] sync true if synchronous aio is desired */
437 void
buf_read_page_background(const page_id_t page_id,const page_size_t & page_size,bool sync)438 buf_read_page_background(
439 const page_id_t page_id,
440 const page_size_t& page_size,
441 bool sync)
442 {
443 ulint count;
444 dberr_t err;
445
446 count = buf_read_page_low(
447 &err, sync,
448 IORequest::DO_NOT_WAKE | IORequest::IGNORE_MISSING,
449 BUF_READ_ANY_PAGE,
450 page_id, page_size, false);
451
452 switch (err) {
453 case DB_SUCCESS:
454 case DB_TABLESPACE_TRUNCATED:
455 case DB_ERROR:
456 break;
457 case DB_TABLESPACE_DELETED:
458 ib::info() << "trying to read page " << page_id
459 << " in the background"
460 " in a non-existing or being-dropped tablespace";
461 break;
462 case DB_PAGE_CORRUPTED:
463 case DB_DECRYPTION_FAILED:
464 ib::error()
465 << "Background Page read failed to "
466 "read or decrypt " << page_id;
467 break;
468 default:
469 ib::fatal() << "Error " << err << " in background read of "
470 << page_id;
471 }
472
473 srv_stats.buf_pool_reads.add(count);
474
475 /* We do not increment number of I/O operations used for LRU policy
476 here (buf_LRU_stat_inc_io()). We use this in heuristics to decide
477 about evicting uncompressed version of compressed pages from the
478 buffer pool. Since this function is called from buffer pool load
479 these IOs are deliberate and are not part of normal workload we can
480 ignore these in our heuristics. */
481 }
482
483 /** Applies linear read-ahead if in the buf_pool the page is a border page of
484 a linear read-ahead area and all the pages in the area have been accessed.
485 Does not read any page if the read-ahead mechanism is not activated. Note
486 that the algorithm looks at the 'natural' adjacent successor and
487 predecessor of the page, which on the leaf level of a B-tree are the next
488 and previous page in the chain of leaves. To know these, the page specified
489 in (space, offset) must already be present in the buf_pool. Thus, the
490 natural way to use this function is to call it when a page in the buf_pool
491 is accessed the first time, calling this function just after it has been
492 bufferfixed.
493 NOTE 1: as this function looks at the natural predecessor and successor
494 fields on the page, what happens, if these are not initialized to any
495 sensible value? No problem, before applying read-ahead we check that the
496 area to read is within the span of the space, if not, read-ahead is not
497 applied. An uninitialized value may result in a useless read operation, but
498 only very improbably.
499 NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
500 function must be written such that it cannot end up waiting for these
501 latches!
502 NOTE 3: the calling thread must want access to the page given: this rule is
503 set to prevent unintended read-aheads performed by ibuf routines, a situation
504 which could result in a deadlock if the OS does not support asynchronous io.
505 @param[in] page_id page id; see NOTE 3 above
506 @param[in] page_size page size
507 @param[in] inside_ibuf TRUE if we are inside ibuf routine
508 @return number of page read requests issued */
509 ulint
buf_read_ahead_linear(const page_id_t page_id,const page_size_t & page_size,ibool inside_ibuf)510 buf_read_ahead_linear(
511 const page_id_t page_id,
512 const page_size_t& page_size,
513 ibool inside_ibuf)
514 {
515 buf_pool_t* buf_pool = buf_pool_get(page_id);
516 buf_page_t* bpage;
517 buf_frame_t* frame;
518 buf_page_t* pred_bpage = NULL;
519 ulint pred_offset;
520 ulint succ_offset;
521 int asc_or_desc;
522 ulint new_offset;
523 ulint fail_count;
524 ulint low, high;
525 dberr_t err = DB_SUCCESS;
526 ulint i;
527 const ulint buf_read_ahead_linear_area
528 = BUF_READ_AHEAD_AREA(buf_pool);
529 ulint threshold;
530
531 /* check if readahead is disabled */
532 if (!srv_read_ahead_threshold) {
533 return(0);
534 }
535
536 if (srv_startup_is_before_trx_rollback_phase) {
537 /* No read-ahead to avoid thread deadlocks */
538 return(0);
539 }
540
541 low = (page_id.page_no() / buf_read_ahead_linear_area)
542 * buf_read_ahead_linear_area;
543 high = (page_id.page_no() / buf_read_ahead_linear_area + 1)
544 * buf_read_ahead_linear_area;
545
546 if ((page_id.page_no() != low) && (page_id.page_no() != high - 1)) {
547 /* This is not a border page of the area: return */
548
549 return(0);
550 }
551
552 if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
553
554 /* If it is an ibuf bitmap page or trx sys hdr, we do
555 no read-ahead, as that could break the ibuf page access
556 order */
557
558 return(0);
559 }
560
561 ulint space_size;
562
563 if (fil_space_t* space = fil_space_acquire(page_id.space())) {
564 space_size = space->committed_size;
565 space->release();
566
567 if (high > space_size) {
568 /* The area is not whole */
569 return(0);
570 }
571 } else {
572 return(0);
573 }
574
575 buf_pool_mutex_enter(buf_pool);
576
577 if (buf_pool->n_pend_reads
578 > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
579 buf_pool_mutex_exit(buf_pool);
580
581 return(0);
582 }
583
584 /* Check that almost all pages in the area have been accessed; if
585 offset == low, the accesses must be in a descending order, otherwise,
586 in an ascending order. */
587
588 asc_or_desc = 1;
589
590 if (page_id.page_no() == low) {
591 asc_or_desc = -1;
592 }
593
594 /* How many out of order accessed pages can we ignore
595 when working out the access pattern for linear readahead */
596 threshold = ut_min(static_cast<ulint>(64 - srv_read_ahead_threshold),
597 BUF_READ_AHEAD_AREA(buf_pool));
598
599 fail_count = 0;
600
601 for (i = low; i < high; i++) {
602 bpage = buf_page_hash_get(buf_pool,
603 page_id_t(page_id.space(), i));
604
605 if (bpage == NULL || !buf_page_is_accessed(bpage)) {
606 /* Not accessed */
607 fail_count++;
608
609 } else if (pred_bpage) {
610 /* Note that buf_page_is_accessed() returns
611 the time of the first access. If some blocks
612 of the extent existed in the buffer pool at
613 the time of a linear access pattern, the first
614 access times may be nonmonotonic, even though
615 the latest access times were linear. The
616 threshold (srv_read_ahead_factor) should help
617 a little against this. */
618 int res = ut_ulint_cmp(
619 buf_page_is_accessed(bpage),
620 buf_page_is_accessed(pred_bpage));
621 /* Accesses not in the right order */
622 if (res != 0 && res != asc_or_desc) {
623 fail_count++;
624 }
625 }
626
627 if (fail_count > threshold) {
628 /* Too many failures: return */
629 buf_pool_mutex_exit(buf_pool);
630 return(0);
631 }
632
633 if (bpage && buf_page_is_accessed(bpage)) {
634 pred_bpage = bpage;
635 }
636 }
637
638 /* If we got this far, we know that enough pages in the area have
639 been accessed in the right order: linear read-ahead can be sensible */
640
641 bpage = buf_page_hash_get(buf_pool, page_id);
642
643 if (bpage == NULL) {
644 buf_pool_mutex_exit(buf_pool);
645
646 return(0);
647 }
648
649 switch (buf_page_get_state(bpage)) {
650 case BUF_BLOCK_ZIP_PAGE:
651 frame = bpage->zip.data;
652 break;
653 case BUF_BLOCK_FILE_PAGE:
654 frame = ((buf_block_t*) bpage)->frame;
655 break;
656 default:
657 ut_error;
658 break;
659 }
660
661 /* Read the natural predecessor and successor page addresses from
662 the page; NOTE that because the calling thread may have an x-latch
663 on the page, we do not acquire an s-latch on the page, this is to
664 prevent deadlocks. Even if we read values which are nonsense, the
665 algorithm will work. */
666
667 pred_offset = fil_page_get_prev(frame);
668 succ_offset = fil_page_get_next(frame);
669
670 buf_pool_mutex_exit(buf_pool);
671
672 if ((page_id.page_no() == low)
673 && (succ_offset == page_id.page_no() + 1)) {
674
675 /* This is ok, we can continue */
676 new_offset = pred_offset;
677
678 } else if ((page_id.page_no() == high - 1)
679 && (pred_offset == page_id.page_no() - 1)) {
680
681 /* This is ok, we can continue */
682 new_offset = succ_offset;
683 } else {
684 /* Successor or predecessor not in the right order */
685
686 return(0);
687 }
688
689 low = (new_offset / buf_read_ahead_linear_area)
690 * buf_read_ahead_linear_area;
691 high = (new_offset / buf_read_ahead_linear_area + 1)
692 * buf_read_ahead_linear_area;
693
694 if ((new_offset != low) && (new_offset != high - 1)) {
695 /* This is not a border page of the area: return */
696
697 return(0);
698 }
699
700 if (high > space_size) {
701 /* The area is not whole, return */
702
703 return(0);
704 }
705
706 ulint count = 0;
707
708 /* If we got this far, read-ahead can be sensible: do it */
709
710 ulint ibuf_mode;
711
712 ibuf_mode = inside_ibuf ? BUF_READ_IBUF_PAGES_ONLY : BUF_READ_ANY_PAGE;
713
714 /* Since Windows XP seems to schedule the i/o handler thread
715 very eagerly, and consequently it does not wait for the
716 full read batch to be posted, we use special heuristics here */
717
718 os_aio_simulated_put_read_threads_to_sleep();
719
720 for (i = low; i < high; i++) {
721 /* It is only sensible to do read-ahead in the non-sync
722 aio mode: hence FALSE as the first parameter */
723
724 const page_id_t cur_page_id(page_id.space(), i);
725
726 if (!ibuf_bitmap_page(cur_page_id, page_size)) {
727 count += buf_read_page_low(
728 &err, false,
729 IORequest::DO_NOT_WAKE,
730 ibuf_mode, cur_page_id, page_size, false);
731
732 switch (err) {
733 case DB_SUCCESS:
734 case DB_TABLESPACE_TRUNCATED:
735 case DB_TABLESPACE_DELETED:
736 case DB_ERROR:
737 break;
738 case DB_PAGE_CORRUPTED:
739 case DB_DECRYPTION_FAILED:
740 ib::error() << "linear readahead failed to"
741 " read or decrypt "
742 << page_id_t(page_id.space(), i);
743 break;
744 default:
745 ut_error;
746 }
747 }
748 }
749
750 /* In simulated aio we wake the aio handler threads only after
751 queuing all aio requests, in native aio the following call does
752 nothing: */
753
754 os_aio_simulated_wake_handler_threads();
755
756 if (count) {
757 DBUG_PRINT("ib_buf", ("linear read-ahead " ULINTPF " pages, "
758 "%u:%u",
759 count,
760 page_id.space(),
761 page_id.page_no()));
762 }
763
764 /* Read ahead is considered one I/O operation for the purpose of
765 LRU policy decision. */
766 buf_LRU_stat_inc_io();
767
768 buf_pool->stat.n_ra_pages_read += count;
769 return(count);
770 }
771
772 /********************************************************************//**
773 Issues read requests for pages which the ibuf module wants to read in, in
774 order to contract the insert buffer tree. Technically, this function is like
775 a read-ahead function. */
776 void
buf_read_ibuf_merge_pages(bool sync,const ulint * space_ids,const ulint * page_nos,ulint n_stored)777 buf_read_ibuf_merge_pages(
778 /*======================*/
779 bool sync, /*!< in: true if the caller
780 wants this function to wait
781 for the highest address page
782 to get read in, before this
783 function returns */
784 const ulint* space_ids, /*!< in: array of space ids */
785 const ulint* page_nos, /*!< in: array of page numbers
786 to read, with the highest page
787 number the last in the
788 array */
789 ulint n_stored) /*!< in: number of elements
790 in the arrays */
791 {
792 #ifdef UNIV_IBUF_DEBUG
793 ut_a(n_stored < srv_page_size);
794 #endif
795
796 for (ulint i = 0; i < n_stored; i++) {
797 fil_space_t* space = fil_space_acquire_silent(space_ids[i]);
798 if (!space) {
799 tablespace_deleted:
800 /* The tablespace was not found: remove all
801 entries for it */
802 ibuf_delete_for_discarded_space(space_ids[i]);
803 while (i + 1 < n_stored
804 && space_ids[i + 1] == space_ids[i]) {
805 i++;
806 }
807 continue;
808 }
809
810 ulint size = space->size;
811 if (!size) {
812 size = fil_space_get_size(space->id);
813 }
814
815 if (UNIV_UNLIKELY(page_nos[i] >= size)) {
816 do {
817 ibuf_delete_recs(page_id_t(space_ids[i],
818 page_nos[i]));
819 } while (++i < n_stored
820 && space_ids[i - 1] == space_ids[i]
821 && page_nos[i] >= size);
822 i--;
823 next:
824 space->release();
825 continue;
826 }
827
828 const page_id_t page_id(space_ids[i], page_nos[i]);
829
830 buf_pool_t* buf_pool = buf_pool_get(page_id);
831
832 while (buf_pool->n_pend_reads
833 > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
834 os_thread_sleep(500000);
835 }
836
837 dberr_t err;
838
839 buf_read_page_low(&err,
840 sync && (i + 1 == n_stored),
841 0,
842 BUF_READ_ANY_PAGE, page_id,
843 page_size_t(space->flags), true);
844
845 switch(err) {
846 case DB_SUCCESS:
847 case DB_TABLESPACE_TRUNCATED:
848 case DB_ERROR:
849 break;
850 case DB_TABLESPACE_DELETED:
851 space->release();
852 goto tablespace_deleted;
853 case DB_PAGE_CORRUPTED:
854 case DB_DECRYPTION_FAILED:
855 ib::error() << "Failed to read or decrypt page "
856 << page_nos[i]
857 << " of '" << space->chain.start->name
858 << "' for change buffer merge";
859 break;
860 default:
861 ut_error;
862 }
863
864 goto next;
865 }
866
867 os_aio_simulated_wake_handler_threads();
868
869 if (n_stored) {
870 DBUG_PRINT("ib_buf",
871 ("ibuf merge read-ahead %u pages, space %u",
872 unsigned(n_stored), unsigned(space_ids[0])));
873 }
874 }
875
876 /** Issues read requests for pages which recovery wants to read in.
877 @param[in] sync true if the caller wants this function to wait
878 for the highest address page to get read in, before this function returns
879 @param[in] space_id tablespace id
880 @param[in] page_nos array of page numbers to read, with the
881 highest page number the last in the array
882 @param[in] n_stored number of page numbers in the array */
883 void
buf_read_recv_pages(bool sync,ulint space_id,const ulint * page_nos,ulint n_stored)884 buf_read_recv_pages(
885 bool sync,
886 ulint space_id,
887 const ulint* page_nos,
888 ulint n_stored)
889 {
890 fil_space_t* space = fil_space_get(space_id);
891
892 if (space == NULL) {
893 /* The tablespace is missing: do nothing */
894 return;
895 }
896
897 fil_space_open_if_needed(space);
898
899 const page_size_t page_size(space->flags);
900
901 for (ulint i = 0; i < n_stored; i++) {
902 buf_pool_t* buf_pool;
903 const page_id_t cur_page_id(space_id, page_nos[i]);
904
905 ulint count = 0;
906
907 buf_pool = buf_pool_get(cur_page_id);
908 ulint limit = 0;
909 for (ulint j = 0; j < buf_pool->n_chunks; j++) {
910 limit += buf_pool->chunks[j].size / 2;
911 }
912
913 while (buf_pool->n_pend_reads >= limit) {
914 os_aio_simulated_wake_handler_threads();
915 os_thread_sleep(10000);
916
917 count++;
918
919 if (!(count % 1000)) {
920
921 ib::error()
922 << "Waited for " << count / 100
923 << " seconds for "
924 << buf_pool->n_pend_reads
925 << " pending reads";
926 }
927 }
928
929 dberr_t err;
930
931 if (sync && i + 1 == n_stored) {
932 buf_read_page_low(
933 &err, true,
934 0,
935 BUF_READ_ANY_PAGE,
936 cur_page_id, page_size, true);
937 } else {
938 buf_read_page_low(
939 &err, false,
940 IORequest::DO_NOT_WAKE,
941 BUF_READ_ANY_PAGE,
942 cur_page_id, page_size, true);
943 }
944
945 if (err == DB_DECRYPTION_FAILED || err == DB_PAGE_CORRUPTED) {
946 ib::error() << "Recovery failed to read or decrypt "
947 << cur_page_id;
948 }
949 }
950
951 os_aio_simulated_wake_handler_threads();
952
953 DBUG_PRINT("ib_buf", ("recovery read-ahead (%u pages)",
954 unsigned(n_stored)));
955 }
956