1 /*****************************************************************************
2
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file buf/buf0rea.cc
29 The database buffer read
30
31 Created 11/5/1995 Heikki Tuuri
32 *******************************************************/
33
34 #include "ha_prototypes.h"
35 #include <mysql/service_thd_wait.h>
36
37 #include "buf0rea.h"
38 #include "fil0fil.h"
39 #include "mtr0mtr.h"
40 #include "buf0buf.h"
41 #include "buf0flu.h"
42 #include "buf0lru.h"
43 #include "buf0dblwr.h"
44 #include "ibuf0ibuf.h"
45 #include "log0recv.h"
46 #include "trx0sys.h"
47 #include "os0file.h"
48 #include "srv0start.h"
49 #include "srv0srv.h"
50
51 /** There must be at least this many pages in buf_pool in the area to start
52 a random read-ahead */
53 #define BUF_READ_AHEAD_RANDOM_THRESHOLD(b) \
54 (5 + BUF_READ_AHEAD_AREA(b) / 8)
55
56 /** If there are buf_pool->curr_size per the number below pending reads, then
57 read-ahead is not done: this is to prevent flooding the buffer pool with
58 i/o-fixed buffer blocks */
59 #define BUF_READ_AHEAD_PEND_LIMIT 2
60
61 /********************************************************************//**
62 Unfixes the pages, unlatches the page,
63 removes it from page_hash and removes it from LRU. */
64 static
65 void
buf_read_page_handle_error(buf_page_t * bpage)66 buf_read_page_handle_error(
67 /*=======================*/
68 buf_page_t* bpage) /*!< in: pointer to the block */
69 {
70 buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
71 const bool uncompressed = (buf_page_get_state(bpage)
72 == BUF_BLOCK_FILE_PAGE);
73
74 /* First unfix and release lock on the bpage */
75 buf_pool_mutex_enter(buf_pool);
76 mutex_enter(buf_page_get_mutex(bpage));
77 ut_ad(buf_page_get_io_fix(bpage) == BUF_IO_READ);
78 ut_ad(bpage->buf_fix_count == 0);
79
80 /* Set BUF_IO_NONE before we remove the block from LRU list */
81 buf_page_set_io_fix(bpage, BUF_IO_NONE);
82
83 if (uncompressed) {
84 rw_lock_x_unlock_gen(
85 &((buf_block_t*) bpage)->lock,
86 BUF_IO_READ);
87 }
88
89 mutex_exit(buf_page_get_mutex(bpage));
90
91 /* remove the block from LRU list */
92 buf_LRU_free_one_page(bpage);
93
94 ut_ad(buf_pool->n_pend_reads > 0);
95 ut_ad(mutex_own(&buf_pool->mutex));
96 buf_pool->n_pend_reads--;
97
98 buf_pool_mutex_exit(buf_pool);
99 }
100
101 /** Low-level function which reads a page asynchronously from a file to the
102 buffer buf_pool if it is not already there, in which case does nothing.
103 Sets the io_fix flag and sets an exclusive lock on the buffer frame. The
104 flag is cleared and the x-lock released by an i/o-handler thread.
105
106 @param[out] err DB_SUCCESS, DB_TABLESPACE_DELETED or
107 DB_TABLESPACE_TRUNCATED if we are trying
108 to read from a non-existent tablespace, a
109 tablespace which is just now being dropped,
110 or a tablespace which is truncated
111 @param[in] sync true if synchronous aio is desired
112 @param[in] type IO type, SIMULATED, IGNORE_MISSING
113 @param[in] mode BUF_READ_IBUF_PAGES_ONLY, ...,
114 @param[in] page_id page id
115 @param[in] unzip true=request uncompressed page
116 @return 1 if a read request was queued, 0 if the page already resided
117 in buf_pool, or if the page is in the doublewrite buffer blocks in
118 which case it is never read into the pool, or if the tablespace does
119 not exist or is being dropped */
120 static
121 ulint
buf_read_page_low(dberr_t * err,bool sync,ulint type,ulint mode,const page_id_t & page_id,const page_size_t & page_size,bool unzip)122 buf_read_page_low(
123 dberr_t* err,
124 bool sync,
125 ulint type,
126 ulint mode,
127 const page_id_t& page_id,
128 const page_size_t& page_size,
129 bool unzip)
130 {
131 buf_page_t* bpage;
132
133 *err = DB_SUCCESS;
134
135 if (page_id.space() == TRX_SYS_SPACE
136 && buf_dblwr_page_inside(page_id.page_no())) {
137
138 ib::error() << "Trying to read doublewrite buffer page "
139 << page_id;
140 return(0);
141 }
142
143 if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
144
145 /* Trx sys header is so low in the latching order that we play
146 safe and do not leave the i/o-completion to an asynchronous
147 i/o-thread. Ibuf bitmap pages must always be read with
148 syncronous i/o, to make sure they do not get involved in
149 thread deadlocks. */
150
151 sync = true;
152 }
153
154 /* The following call will also check if the tablespace does not exist
155 or is being dropped; if we succeed in initing the page in the buffer
156 pool for read, then DISCARD cannot proceed until the read has
157 completed */
158 bpage = buf_page_init_for_read(err, mode, page_id, page_size, unzip);
159
160 if (bpage == NULL) {
161
162 return(0);
163 }
164
165 DBUG_PRINT("ib_buf", ("read page %u:%u size=%u unzip=%u,%s",
166 (unsigned) page_id.space(),
167 (unsigned) page_id.page_no(),
168 (unsigned) page_size.physical(),
169 (unsigned) unzip,
170 sync ? "sync" : "async"));
171
172 ut_ad(buf_page_in_file(bpage));
173
174 if (sync) {
175 thd_wait_begin(NULL, THD_WAIT_DISKIO);
176 }
177
178 void* dst;
179
180 if (page_size.is_compressed()) {
181 dst = bpage->zip.data;
182 } else {
183 ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
184
185 dst = ((buf_block_t*) bpage)->frame;
186 }
187
188 /* This debug code is only for 5.7. In trunk, with newDD,
189 the space->name is no longer same as table name. */
190 DBUG_EXECUTE_IF("innodb_invalid_read_after_truncate",
191 fil_space_t* space = fil_space_get(page_id.space());
192
193 if (space != NULL && strcmp(space->name, "test/t1") == 0
194 && page_id.page_no() == space->size - 1) {
195 type = IORequest::READ;
196 sync = true;
197 }
198 );
199
200 IORequest request(type | IORequest::READ);
201
202 *err = fil_io(
203 request, sync, page_id, page_size, 0, page_size.physical(),
204 dst, bpage);
205
206 if (sync) {
207 thd_wait_end(NULL);
208 }
209
210 if (*err != DB_SUCCESS) {
211 if (*err == DB_TABLESPACE_TRUNCATED) {
212 /* Remove the page which is outside the
213 truncated tablespace bounds when recovering
214 from a crash happened during a truncation */
215 buf_read_page_handle_error(bpage);
216 if (recv_recovery_on) {
217 mutex_enter(&recv_sys->mutex);
218 ut_ad(recv_sys->n_addrs > 0);
219 recv_sys->n_addrs--;
220 mutex_exit(&recv_sys->mutex);
221 }
222 return(0);
223 } else if (IORequest::ignore_missing(type)
224 || *err == DB_TABLESPACE_DELETED) {
225 buf_read_page_handle_error(bpage);
226 return(0);
227 }
228
229 ut_error;
230 }
231
232 if (sync) {
233 /* The i/o is already completed when we arrive from
234 fil_read */
235 if (!buf_page_io_complete(bpage)) {
236 return(0);
237 }
238 }
239
240 return(1);
241 }
242
243 /** Applies a random read-ahead in buf_pool if there are at least a threshold
244 value of accessed pages from the random read-ahead area. Does not read any
245 page, not even the one at the position (space, offset), if the read-ahead
246 mechanism is not activated. NOTE 1: the calling thread may own latches on
247 pages: to avoid deadlocks this function must be written such that it cannot
248 end up waiting for these latches! NOTE 2: the calling thread must want
249 access to the page given: this rule is set to prevent unintended read-aheads
250 performed by ibuf routines, a situation which could result in a deadlock if
251 the OS does not support asynchronous i/o.
252 @param[in] page_id page id of a page which the current thread
253 wants to access
254 @param[in] page_size page size
255 @param[in] inside_ibuf TRUE if we are inside ibuf routine
256 @return number of page read requests issued; NOTE that if we read ibuf
257 pages, it may happen that the page at the given page number does not
258 get read even if we return a positive value! */
259 ulint
buf_read_ahead_random(const page_id_t & page_id,const page_size_t & page_size,ibool inside_ibuf)260 buf_read_ahead_random(
261 const page_id_t& page_id,
262 const page_size_t& page_size,
263 ibool inside_ibuf)
264 {
265 buf_pool_t* buf_pool = buf_pool_get(page_id);
266 ulint recent_blocks = 0;
267 ulint ibuf_mode;
268 ulint count;
269 ulint low, high;
270 dberr_t err;
271 ulint i;
272 const ulint buf_read_ahead_random_area
273 = BUF_READ_AHEAD_AREA(buf_pool);
274
275 if (!srv_random_read_ahead) {
276 /* Disabled by user */
277 return(0);
278 }
279
280 if (srv_startup_is_before_trx_rollback_phase) {
281 /* No read-ahead to avoid thread deadlocks */
282 return(0);
283 }
284
285 if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
286
287 /* If it is an ibuf bitmap page or trx sys hdr, we do
288 no read-ahead, as that could break the ibuf page access
289 order */
290
291 return(0);
292 }
293
294 low = (page_id.page_no() / buf_read_ahead_random_area)
295 * buf_read_ahead_random_area;
296
297 high = (page_id.page_no() / buf_read_ahead_random_area + 1)
298 * buf_read_ahead_random_area;
299
300 /* Remember the tablespace version before we ask the tablespace size
301 below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
302 do not try to read outside the bounds of the tablespace! */
303 if (fil_space_t* space = fil_space_acquire(page_id.space())) {
304
305 #ifdef UNIV_DEBUG
306 if (srv_file_per_table) {
307 ulint size = 0;
308
309 for (const fil_node_t* node =
310 UT_LIST_GET_FIRST(space->chain);
311 node != NULL;
312 node = UT_LIST_GET_NEXT(chain, node)) {
313
314 size += os_file_get_size(node->handle)
315 / page_size.physical();
316 }
317 }
318 #endif /* UNIV_DEBUG */
319
320 if (high > space->size) {
321 high = space->size;
322 }
323 fil_space_release(space);
324 } else {
325 return(0);
326 }
327
328 buf_pool_mutex_enter(buf_pool);
329
330 if (buf_pool->n_pend_reads
331 > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
332 buf_pool_mutex_exit(buf_pool);
333
334 return(0);
335 }
336
337 /* Count how many blocks in the area have been recently accessed,
338 that is, reside near the start of the LRU list. */
339
340 for (i = low; i < high; i++) {
341 /* This debug code is only for 5.7. In trunk, with newDD,
342 the space->name is no longer same as table name. */
343 DBUG_EXECUTE_IF("innodb_invalid_read_after_truncate",
344 fil_space_t* space = fil_space_get(page_id.space());
345
346 if (space != NULL
347 && strcmp(space->name, "test/t1") == 0) {
348 high = space->size;
349 buf_pool_mutex_exit(buf_pool);
350 goto read_ahead;
351 }
352 );
353
354 const buf_page_t* bpage = buf_page_hash_get(
355 buf_pool, page_id_t(page_id.space(), i));
356
357 if (bpage != NULL
358 && buf_page_is_accessed(bpage)
359 && buf_page_peek_if_young(bpage)) {
360
361 recent_blocks++;
362
363 if (recent_blocks
364 >= BUF_READ_AHEAD_RANDOM_THRESHOLD(buf_pool)) {
365
366 buf_pool_mutex_exit(buf_pool);
367 goto read_ahead;
368 }
369 }
370 }
371
372 buf_pool_mutex_exit(buf_pool);
373 /* Do nothing */
374 return(0);
375
376 read_ahead:
377 /* Read all the suitable blocks within the area */
378
379 if (inside_ibuf) {
380 ibuf_mode = BUF_READ_IBUF_PAGES_ONLY;
381 } else {
382 ibuf_mode = BUF_READ_ANY_PAGE;
383 }
384
385 count = 0;
386
387 for (i = low; i < high; i++) {
388 /* It is only sensible to do read-ahead in the non-sync aio
389 mode: hence FALSE as the first parameter */
390
391 const page_id_t cur_page_id(page_id.space(), i);
392
393 if (!ibuf_bitmap_page(cur_page_id, page_size)) {
394
395 count += buf_read_page_low(
396 &err, false,
397 IORequest::DO_NOT_WAKE,
398 ibuf_mode,
399 cur_page_id, page_size, false);
400
401 if (err == DB_TABLESPACE_DELETED) {
402 ib::warn() << "Random readahead trying to"
403 " access page " << cur_page_id
404 << " in nonexisting or"
405 " being-dropped tablespace";
406 break;
407 }
408 }
409 }
410
411 /* In simulated aio we wake the aio handler threads only after
412 queuing all aio requests, in native aio the following call does
413 nothing: */
414
415 os_aio_simulated_wake_handler_threads();
416
417 if (count) {
418 DBUG_PRINT("ib_buf", ("random read-ahead %u pages, %u:%u",
419 (unsigned) count,
420 (unsigned) page_id.space(),
421 (unsigned) page_id.page_no()));
422 }
423
424 /* Read ahead is considered one I/O operation for the purpose of
425 LRU policy decision. */
426 buf_LRU_stat_inc_io();
427
428 buf_pool->stat.n_ra_pages_read_rnd += count;
429 srv_stats.buf_pool_reads.add(count);
430 return(count);
431 }
432
433 /** High-level function which reads a page asynchronously from a file to the
434 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
435 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
436 released by the i/o-handler thread.
437 @param[in] page_id page id
438 @param[in] page_size page size
439 @return TRUE if page has been read in, FALSE in case of failure */
440 ibool
buf_read_page(const page_id_t & page_id,const page_size_t & page_size)441 buf_read_page(
442 const page_id_t& page_id,
443 const page_size_t& page_size)
444 {
445 ulint count;
446 dberr_t err;
447
448 /* We do synchronous IO because our AIO completion code
449 is sub-optimal. See buf_page_io_complete(), we have to
450 acquire the buffer pool mutex before acquiring the block
451 mutex, required for updating the page state. The acquire
452 of the buffer pool mutex becomes an expensive bottleneck. */
453
454 count = buf_read_page_low(
455 &err, true,
456 0, BUF_READ_ANY_PAGE, page_id, page_size, false);
457
458 srv_stats.buf_pool_reads.add(count);
459
460 if (err == DB_TABLESPACE_DELETED) {
461 ib::error() << "trying to read page " << page_id
462 << " in nonexisting or being-dropped tablespace";
463 }
464
465 /* Increment number of I/O operations used for LRU policy. */
466 buf_LRU_stat_inc_io();
467
468 return(count > 0);
469 }
470
471 /** High-level function which reads a page asynchronously from a file to the
472 buffer buf_pool if it is not already there. Sets the io_fix flag and sets
473 an exclusive lock on the buffer frame. The flag is cleared and the x-lock
474 released by the i/o-handler thread.
475 @param[in] page_id page id
476 @param[in] page_size page size
477 @param[in] sync true if synchronous aio is desired
478 @return TRUE if page has been read in, FALSE in case of failure */
479 ibool
buf_read_page_background(const page_id_t & page_id,const page_size_t & page_size,bool sync)480 buf_read_page_background(
481 const page_id_t& page_id,
482 const page_size_t& page_size,
483 bool sync)
484 {
485 ulint count;
486 dberr_t err;
487
488 count = buf_read_page_low(
489 &err, sync,
490 IORequest::DO_NOT_WAKE | IORequest::IGNORE_MISSING,
491 BUF_READ_ANY_PAGE,
492 page_id, page_size, false);
493
494 srv_stats.buf_pool_reads.add(count);
495
496 /* We do not increment number of I/O operations used for LRU policy
497 here (buf_LRU_stat_inc_io()). We use this in heuristics to decide
498 about evicting uncompressed version of compressed pages from the
499 buffer pool. Since this function is called from buffer pool load
500 these IOs are deliberate and are not part of normal workload we can
501 ignore these in our heuristics. */
502
503 return(count > 0);
504 }
505
506 /** Applies linear read-ahead if in the buf_pool the page is a border page of
507 a linear read-ahead area and all the pages in the area have been accessed.
508 Does not read any page if the read-ahead mechanism is not activated. Note
509 that the algorithm looks at the 'natural' adjacent successor and
510 predecessor of the page, which on the leaf level of a B-tree are the next
511 and previous page in the chain of leaves. To know these, the page specified
512 in (space, offset) must already be present in the buf_pool. Thus, the
513 natural way to use this function is to call it when a page in the buf_pool
514 is accessed the first time, calling this function just after it has been
515 bufferfixed.
516 NOTE 1: as this function looks at the natural predecessor and successor
517 fields on the page, what happens, if these are not initialized to any
518 sensible value? No problem, before applying read-ahead we check that the
519 area to read is within the span of the space, if not, read-ahead is not
520 applied. An uninitialized value may result in a useless read operation, but
521 only very improbably.
522 NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
523 function must be written such that it cannot end up waiting for these
524 latches!
525 NOTE 3: the calling thread must want access to the page given: this rule is
526 set to prevent unintended read-aheads performed by ibuf routines, a situation
527 which could result in a deadlock if the OS does not support asynchronous io.
528 @param[in] page_id page id; see NOTE 3 above
529 @param[in] page_size page size
530 @param[in] inside_ibuf TRUE if we are inside ibuf routine
531 @return number of page read requests issued */
532 ulint
buf_read_ahead_linear(const page_id_t & page_id,const page_size_t & page_size,ibool inside_ibuf)533 buf_read_ahead_linear(
534 const page_id_t& page_id,
535 const page_size_t& page_size,
536 ibool inside_ibuf)
537 {
538 buf_pool_t* buf_pool = buf_pool_get(page_id);
539 buf_page_t* bpage;
540 buf_frame_t* frame;
541 buf_page_t* pred_bpage = NULL;
542 ulint pred_offset;
543 ulint succ_offset;
544 int asc_or_desc;
545 ulint new_offset;
546 ulint fail_count;
547 ulint low, high;
548 dberr_t err;
549 ulint i;
550 const ulint buf_read_ahead_linear_area
551 = BUF_READ_AHEAD_AREA(buf_pool);
552 ulint threshold;
553
554 /* check if readahead is disabled */
555 if (!srv_read_ahead_threshold) {
556 return(0);
557 }
558
559 if (srv_startup_is_before_trx_rollback_phase) {
560 /* No read-ahead to avoid thread deadlocks */
561 return(0);
562 }
563
564 low = (page_id.page_no() / buf_read_ahead_linear_area)
565 * buf_read_ahead_linear_area;
566 high = (page_id.page_no() / buf_read_ahead_linear_area + 1)
567 * buf_read_ahead_linear_area;
568
569 if ((page_id.page_no() != low) && (page_id.page_no() != high - 1)) {
570 /* This is not a border page of the area: return */
571
572 return(0);
573 }
574
575 if (ibuf_bitmap_page(page_id, page_size) || trx_sys_hdr_page(page_id)) {
576
577 /* If it is an ibuf bitmap page or trx sys hdr, we do
578 no read-ahead, as that could break the ibuf page access
579 order */
580
581 return(0);
582 }
583
584 /* Remember the tablespace version before we ask te tablespace size
585 below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we
586 do not try to read outside the bounds of the tablespace! */
587 ulint space_size;
588
589 if (fil_space_t* space = fil_space_acquire(page_id.space())) {
590 space_size = space->size;
591 fil_space_release(space);
592
593 if (high > space_size) {
594 /* The area is not whole */
595 return(0);
596 }
597 } else {
598 return(0);
599 }
600
601 buf_pool_mutex_enter(buf_pool);
602
603 if (buf_pool->n_pend_reads
604 > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
605 buf_pool_mutex_exit(buf_pool);
606
607 return(0);
608 }
609
610 /* Check that almost all pages in the area have been accessed; if
611 offset == low, the accesses must be in a descending order, otherwise,
612 in an ascending order. */
613
614 asc_or_desc = 1;
615
616 if (page_id.page_no() == low) {
617 asc_or_desc = -1;
618 }
619
620 /* How many out of order accessed pages can we ignore
621 when working out the access pattern for linear readahead */
622 threshold = ut_min(static_cast<ulint>(64 - srv_read_ahead_threshold),
623 BUF_READ_AHEAD_AREA(buf_pool));
624
625 fail_count = 0;
626
627 for (i = low; i < high; i++) {
628 bpage = buf_page_hash_get(buf_pool,
629 page_id_t(page_id.space(), i));
630
631 if (bpage == NULL || !buf_page_is_accessed(bpage)) {
632 /* Not accessed */
633 fail_count++;
634
635 } else if (pred_bpage) {
636 /* Note that buf_page_is_accessed() returns
637 the time of the first access. If some blocks
638 of the extent existed in the buffer pool at
639 the time of a linear access pattern, the first
640 access times may be nonmonotonic, even though
641 the latest access times were linear. The
642 threshold (srv_read_ahead_factor) should help
643 a little against this. */
644 int res = ut_ulint_cmp(
645 buf_page_is_accessed(bpage),
646 buf_page_is_accessed(pred_bpage));
647 /* Accesses not in the right order */
648 if (res != 0 && res != asc_or_desc) {
649 fail_count++;
650 }
651 }
652
653 if (fail_count > threshold) {
654 /* Too many failures: return */
655 buf_pool_mutex_exit(buf_pool);
656 return(0);
657 }
658
659 if (bpage && buf_page_is_accessed(bpage)) {
660 pred_bpage = bpage;
661 }
662 }
663
664 /* If we got this far, we know that enough pages in the area have
665 been accessed in the right order: linear read-ahead can be sensible */
666
667 bpage = buf_page_hash_get(buf_pool, page_id);
668
669 if (bpage == NULL) {
670 buf_pool_mutex_exit(buf_pool);
671
672 return(0);
673 }
674
675 switch (buf_page_get_state(bpage)) {
676 case BUF_BLOCK_ZIP_PAGE:
677 frame = bpage->zip.data;
678 break;
679 case BUF_BLOCK_FILE_PAGE:
680 frame = ((buf_block_t*) bpage)->frame;
681 break;
682 default:
683 ut_error;
684 break;
685 }
686
687 /* Read the natural predecessor and successor page addresses from
688 the page; NOTE that because the calling thread may have an x-latch
689 on the page, we do not acquire an s-latch on the page, this is to
690 prevent deadlocks. Even if we read values which are nonsense, the
691 algorithm will work. */
692
693 pred_offset = fil_page_get_prev(frame);
694 succ_offset = fil_page_get_next(frame);
695
696 buf_pool_mutex_exit(buf_pool);
697
698 if ((page_id.page_no() == low)
699 && (succ_offset == page_id.page_no() + 1)) {
700
701 /* This is ok, we can continue */
702 new_offset = pred_offset;
703
704 } else if ((page_id.page_no() == high - 1)
705 && (pred_offset == page_id.page_no() - 1)) {
706
707 /* This is ok, we can continue */
708 new_offset = succ_offset;
709 } else {
710 /* Successor or predecessor not in the right order */
711
712 return(0);
713 }
714
715 low = (new_offset / buf_read_ahead_linear_area)
716 * buf_read_ahead_linear_area;
717 high = (new_offset / buf_read_ahead_linear_area + 1)
718 * buf_read_ahead_linear_area;
719
720 if ((new_offset != low) && (new_offset != high - 1)) {
721 /* This is not a border page of the area: return */
722
723 return(0);
724 }
725
726 if (high > space_size) {
727 /* The area is not whole, return */
728
729 return(0);
730 }
731
732 ulint count = 0;
733
734 /* If we got this far, read-ahead can be sensible: do it */
735
736 ulint ibuf_mode;
737
738 ibuf_mode = inside_ibuf ? BUF_READ_IBUF_PAGES_ONLY : BUF_READ_ANY_PAGE;
739
740 /* Since Windows XP seems to schedule the i/o handler thread
741 very eagerly, and consequently it does not wait for the
742 full read batch to be posted, we use special heuristics here */
743
744 os_aio_simulated_put_read_threads_to_sleep();
745
746 for (i = low; i < high; i++) {
747 /* It is only sensible to do read-ahead in the non-sync
748 aio mode: hence FALSE as the first parameter */
749
750 const page_id_t cur_page_id(page_id.space(), i);
751
752 if (!ibuf_bitmap_page(cur_page_id, page_size)) {
753
754 count += buf_read_page_low(
755 &err, false,
756 IORequest::DO_NOT_WAKE,
757 ibuf_mode, cur_page_id, page_size, false);
758
759 if (err == DB_TABLESPACE_DELETED) {
760 ib::warn() << "linear readahead trying to"
761 " access page "
762 << page_id_t(page_id.space(), i)
763 << " in nonexisting or being-dropped"
764 " tablespace";
765 }
766 }
767 }
768
769 /* In simulated aio we wake the aio handler threads only after
770 queuing all aio requests, in native aio the following call does
771 nothing: */
772
773 os_aio_simulated_wake_handler_threads();
774
775 if (count) {
776 DBUG_PRINT("ib_buf", ("linear read-ahead %lu pages, "
777 UINT32PF ":" UINT32PF,
778 count,
779 page_id.space(),
780 page_id.page_no()));
781 }
782
783 /* Read ahead is considered one I/O operation for the purpose of
784 LRU policy decision. */
785 buf_LRU_stat_inc_io();
786
787 buf_pool->stat.n_ra_pages_read += count;
788 return(count);
789 }
790
791 /********************************************************************//**
792 Issues read requests for pages which the ibuf module wants to read in, in
793 order to contract the insert buffer tree. Technically, this function is like
794 a read-ahead function. */
795 void
buf_read_ibuf_merge_pages(bool sync,const ulint * space_ids,const ulint * page_nos,ulint n_stored)796 buf_read_ibuf_merge_pages(
797 /*======================*/
798 bool sync, /*!< in: true if the caller
799 wants this function to wait
800 for the highest address page
801 to get read in, before this
802 function returns */
803 const ulint* space_ids, /*!< in: array of space ids */
804 const ulint* page_nos, /*!< in: array of page numbers
805 to read, with the highest page
806 number the last in the
807 array */
808 ulint n_stored) /*!< in: number of elements
809 in the arrays */
810 {
811 #ifdef UNIV_IBUF_DEBUG
812 ut_a(n_stored < UNIV_PAGE_SIZE);
813 #endif
814
815 for (ulint i = 0; i < n_stored; i++) {
816 const page_id_t page_id(space_ids[i], page_nos[i]);
817
818 buf_pool_t* buf_pool = buf_pool_get(page_id);
819
820 bool found;
821 const page_size_t page_size(fil_space_get_page_size(
822 space_ids[i], &found));
823
824 if (!found) {
825 /* The tablespace was not found, remove the
826 entries for that page */
827 ibuf_merge_or_delete_for_page(NULL, page_id,
828 NULL, FALSE);
829 continue;
830 }
831
832 while (buf_pool->n_pend_reads
833 > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) {
834 os_thread_sleep(500000);
835 }
836
837 dberr_t err;
838
839 buf_read_page_low(&err,
840 sync && (i + 1 == n_stored),
841 0,
842 BUF_READ_ANY_PAGE, page_id, page_size,
843 true);
844
845 if (err == DB_TABLESPACE_DELETED) {
846 /* We have deleted or are deleting the single-table
847 tablespace: remove the entries for the tablespace */
848
849 ibuf_delete_for_discarded_space(space_ids[i]);
850 }
851 }
852
853 os_aio_simulated_wake_handler_threads();
854
855 if (n_stored) {
856 DBUG_PRINT("ib_buf",
857 ("ibuf merge read-ahead %u pages, space %u",
858 unsigned(n_stored), unsigned(space_ids[0])));
859 }
860 }
861
862 /** Issues read requests for pages which recovery wants to read in.
863 @param[in] sync true if the caller wants this function to wait
864 for the highest address page to get read in, before this function returns
865 @param[in] space_id tablespace id
866 @param[in] page_nos array of page numbers to read, with the
867 highest page number the last in the array
868 @param[in] n_stored number of page numbers in the array */
869 void
buf_read_recv_pages(bool sync,ulint space_id,const ulint * page_nos,ulint n_stored)870 buf_read_recv_pages(
871 bool sync,
872 ulint space_id,
873 const ulint* page_nos,
874 ulint n_stored)
875 {
876 ulint count;
877 dberr_t err;
878 ulint i;
879 fil_space_t* space = fil_space_get(space_id);
880
881 if (space == NULL) {
882 /* The tablespace is missing: do nothing */
883 return;
884 }
885
886 fil_space_open_if_needed(space);
887
888 const page_size_t page_size(space->flags);
889
890 for (i = 0; i < n_stored; i++) {
891 buf_pool_t* buf_pool;
892 const page_id_t cur_page_id(space_id, page_nos[i]);
893
894 count = 0;
895
896 buf_pool = buf_pool_get(cur_page_id);
897 while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
898
899 os_aio_simulated_wake_handler_threads();
900 os_thread_sleep(10000);
901
902 count++;
903
904 if (!(count % 1000)) {
905
906 ib::error()
907 << "Waited for " << count / 100
908 << " seconds for "
909 << buf_pool->n_pend_reads
910 << " pending reads";
911 }
912 }
913
914 if ((i + 1 == n_stored) && sync) {
915 buf_read_page_low(
916 &err, true,
917 0,
918 BUF_READ_ANY_PAGE,
919 cur_page_id, page_size, true);
920 } else {
921 buf_read_page_low(
922 &err, false,
923 IORequest::DO_NOT_WAKE,
924 BUF_READ_ANY_PAGE,
925 cur_page_id, page_size, true);
926 }
927 }
928
929 os_aio_simulated_wake_handler_threads();
930
931 DBUG_PRINT("ib_buf", ("recovery read-ahead (%u pages)",
932 unsigned(n_stored)));
933 }
934
935