1 /*- 2 * Copyright (c) 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)vfs_cluster.c 8.6 (Berkeley) 02/05/94 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/buf.h> 13 #include <sys/vnode.h> 14 #include <sys/mount.h> 15 #include <sys/trace.h> 16 #include <sys/malloc.h> 17 #include <sys/resourcevar.h> 18 #include <libkern/libkern.h> 19 20 /* 21 * Local declarations 22 */ 23 struct buf *cluster_newbuf __P((struct vnode *, struct buf *, long, daddr_t, 24 daddr_t, long, int)); 25 struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *, 26 daddr_t, daddr_t, long, int, long)); 27 void cluster_wbuild __P((struct vnode *, struct buf *, long, 28 daddr_t, int, daddr_t)); 29 struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *)); 30 31 #ifdef DIAGNOSTIC 32 /* 33 * Set to 1 if reads of block zero should cause readahead to be done. 34 * Set to 0 treats a read of block zero as a non-sequential read. 35 * 36 * Setting to one assumes that most reads of block zero of files are due to 37 * sequential passes over the files (e.g. cat, sum) where additional blocks 38 * will soon be needed. Setting to zero assumes that the majority are 39 * surgical strikes to get particular info (e.g. size, file) where readahead 40 * blocks will not be used and, in fact, push out other potentially useful 41 * blocks from the cache. The former seems intuitive, but some quick tests 42 * showed that the latter performed better from a system-wide point of view. 43 */ 44 int doclusterraz = 0; 45 #define ISSEQREAD(vp, blk) \ 46 (((blk) != 0 || doclusterraz) && \ 47 ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr)) 48 #else 49 #define ISSEQREAD(vp, blk) \ 50 ((blk) != 0 && ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr)) 51 #endif 52 53 /* 54 * This replaces bread. If this is a bread at the beginning of a file and 55 * lastr is 0, we assume this is the first read and we'll read up to two 56 * blocks if they are sequential. After that, we'll do regular read ahead 57 * in clustered chunks. 58 * 59 * There are 4 or 5 cases depending on how you count: 60 * Desired block is in the cache: 61 * 1 Not sequential access (0 I/Os). 62 * 2 Access is sequential, do read-ahead (1 ASYNC). 63 * Desired block is not in cache: 64 * 3 Not sequential access (1 SYNC). 65 * 4 Sequential access, next block is contiguous (1 SYNC). 66 * 5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC) 67 * 68 * There are potentially two buffers that require I/O. 69 * bp is the block requested. 70 * rbp is the read-ahead block. 71 * If either is NULL, then you don't have to do the I/O. 72 */ 73 cluster_read(vp, filesize, lblkno, size, cred, bpp) 74 struct vnode *vp; 75 u_quad_t filesize; 76 daddr_t lblkno; 77 long size; 78 struct ucred *cred; 79 struct buf **bpp; 80 { 81 struct buf *bp, *rbp; 82 daddr_t blkno, ioblkno; 83 long flags; 84 int error, num_ra, alreadyincore; 85 86 #ifdef DIAGNOSTIC 87 if (size == 0) 88 panic("cluster_read: size = 0"); 89 #endif 90 91 error = 0; 92 flags = B_READ; 93 *bpp = bp = getblk(vp, lblkno, size, 0, 0); 94 if (bp->b_flags & B_CACHE) { 95 /* 96 * Desired block is in cache; do any readahead ASYNC. 97 * Case 1, 2. 98 */ 99 trace(TR_BREADHIT, pack(vp, size), lblkno); 100 flags |= B_ASYNC; 101 ioblkno = lblkno + (vp->v_ralen ? vp->v_ralen : 1); 102 alreadyincore = (int)incore(vp, ioblkno); 103 bp = NULL; 104 } else { 105 /* Block wasn't in cache, case 3, 4, 5. */ 106 trace(TR_BREADMISS, pack(vp, size), lblkno); 107 bp->b_flags |= B_READ; 108 ioblkno = lblkno; 109 alreadyincore = 0; 110 curproc->p_stats->p_ru.ru_inblock++; /* XXX */ 111 } 112 /* 113 * XXX 114 * Replace 1 with a window size based on some permutation of 115 * maxcontig and rot_delay. This will let you figure out how 116 * many blocks you should read-ahead (case 2, 4, 5). 117 * 118 * If the access isn't sequential, reset the window to 1. 119 * Note that a read to the same block is considered sequential. 120 * This catches the case where the file is being read sequentially, 121 * but at smaller than the filesystem block size. 122 */ 123 rbp = NULL; 124 if (!ISSEQREAD(vp, lblkno)) { 125 vp->v_ralen = 0; 126 vp->v_maxra = lblkno; 127 } else if ((ioblkno + 1) * size <= filesize && !alreadyincore && 128 !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra)) && 129 blkno != -1) { 130 /* 131 * Reading sequentially, and the next block is not in the 132 * cache. We are going to try reading ahead. 133 */ 134 if (num_ra) { 135 /* 136 * If our desired readahead block had been read 137 * in a previous readahead but is no longer in 138 * core, then we may be reading ahead too far 139 * or are not using our readahead very rapidly. 140 * In this case we scale back the window. 141 */ 142 if (!alreadyincore && ioblkno <= vp->v_maxra) 143 vp->v_ralen = max(vp->v_ralen >> 1, 1); 144 /* 145 * There are more sequential blocks than our current 146 * window allows, scale up. Ideally we want to get 147 * in sync with the filesystem maxcontig value. 148 */ 149 else if (num_ra > vp->v_ralen && lblkno != vp->v_lastr) 150 vp->v_ralen = vp->v_ralen ? 151 min(num_ra, vp->v_ralen << 1) : 1; 152 153 if (num_ra > vp->v_ralen) 154 num_ra = vp->v_ralen; 155 } 156 157 if (num_ra) /* case 2, 4 */ 158 rbp = cluster_rbuild(vp, filesize, 159 bp, ioblkno, blkno, size, num_ra, flags); 160 else if (ioblkno == lblkno) { 161 bp->b_blkno = blkno; 162 /* Case 5: check how many blocks to read ahead */ 163 ++ioblkno; 164 if ((ioblkno + 1) * size > filesize || 165 incore(vp, ioblkno) || (error = VOP_BMAP(vp, 166 ioblkno, NULL, &blkno, &num_ra)) || blkno == -1) 167 goto skip_readahead; 168 /* 169 * Adjust readahead as above 170 */ 171 if (num_ra) { 172 if (!alreadyincore && ioblkno <= vp->v_maxra) 173 vp->v_ralen = max(vp->v_ralen >> 1, 1); 174 else if (num_ra > vp->v_ralen && 175 lblkno != vp->v_lastr) 176 vp->v_ralen = vp->v_ralen ? 177 min(num_ra,vp->v_ralen<<1) : 1; 178 if (num_ra > vp->v_ralen) 179 num_ra = vp->v_ralen; 180 } 181 flags |= B_ASYNC; 182 if (num_ra) 183 rbp = cluster_rbuild(vp, filesize, 184 NULL, ioblkno, blkno, size, num_ra, flags); 185 else { 186 rbp = getblk(vp, ioblkno, size, 0, 0); 187 rbp->b_flags |= flags; 188 rbp->b_blkno = blkno; 189 } 190 } else { 191 /* case 2; read ahead single block */ 192 rbp = getblk(vp, ioblkno, size, 0, 0); 193 rbp->b_flags |= flags; 194 rbp->b_blkno = blkno; 195 } 196 197 if (rbp == bp) /* case 4 */ 198 rbp = NULL; 199 else if (rbp) { /* case 2, 5 */ 200 trace(TR_BREADMISSRA, 201 pack(vp, (num_ra + 1) * size), ioblkno); 202 curproc->p_stats->p_ru.ru_inblock++; /* XXX */ 203 } 204 } 205 206 /* XXX Kirk, do we need to make sure the bp has creds? */ 207 skip_readahead: 208 if (bp) 209 if (bp->b_flags & (B_DONE | B_DELWRI)) 210 panic("cluster_read: DONE bp"); 211 else 212 error = VOP_STRATEGY(bp); 213 214 if (rbp) 215 if (error || rbp->b_flags & (B_DONE | B_DELWRI)) { 216 rbp->b_flags &= ~(B_ASYNC | B_READ); 217 brelse(rbp); 218 } else 219 (void) VOP_STRATEGY(rbp); 220 221 /* 222 * Recalculate our maximum readahead 223 */ 224 if (rbp == NULL) 225 rbp = bp; 226 if (rbp) 227 vp->v_maxra = rbp->b_lblkno + (rbp->b_bufsize / size) - 1; 228 229 if (bp) 230 return(biowait(bp)); 231 return(error); 232 } 233 234 /* 235 * If blocks are contiguous on disk, use this to provide clustered 236 * read ahead. We will read as many blocks as possible sequentially 237 * and then parcel them up into logical blocks in the buffer hash table. 238 */ 239 struct buf * 240 cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags) 241 struct vnode *vp; 242 u_quad_t filesize; 243 struct buf *bp; 244 daddr_t lbn; 245 daddr_t blkno; 246 long size; 247 int run; 248 long flags; 249 { 250 struct cluster_save *b_save; 251 struct buf *tbp; 252 daddr_t bn; 253 int i, inc; 254 255 #ifdef DIAGNOSTIC 256 if (size != vp->v_mount->mnt_stat.f_iosize) 257 panic("cluster_rbuild: size %d != filesize %d\n", 258 size, vp->v_mount->mnt_stat.f_iosize); 259 #endif 260 if (size * (lbn + run + 1) > filesize) 261 --run; 262 if (run == 0) { 263 if (!bp) { 264 bp = getblk(vp, lbn, size, 0, 0); 265 bp->b_blkno = blkno; 266 bp->b_flags |= flags; 267 } 268 return(bp); 269 } 270 271 bp = cluster_newbuf(vp, bp, flags, blkno, lbn, size, run + 1); 272 if (bp->b_flags & (B_DONE | B_DELWRI)) 273 return (bp); 274 275 b_save = malloc(sizeof(struct buf *) * run + sizeof(struct cluster_save), 276 M_SEGMENT, M_WAITOK); 277 b_save->bs_bufsize = b_save->bs_bcount = size; 278 b_save->bs_nchildren = 0; 279 b_save->bs_children = (struct buf **)(b_save + 1); 280 b_save->bs_saveaddr = bp->b_saveaddr; 281 bp->b_saveaddr = (caddr_t) b_save; 282 283 inc = btodb(size); 284 for (bn = blkno + inc, i = 1; i <= run; ++i, bn += inc) { 285 if (incore(vp, lbn + i)) { 286 if (i == 1) { 287 bp->b_saveaddr = b_save->bs_saveaddr; 288 bp->b_flags &= ~B_CALL; 289 bp->b_iodone = NULL; 290 allocbuf(bp, size); 291 free(b_save, M_SEGMENT); 292 } else 293 allocbuf(bp, size * i); 294 break; 295 } 296 tbp = getblk(vp, lbn + i, 0, 0, 0); 297 /* 298 * getblk may return some memory in the buffer if there were 299 * no empty buffers to shed it to. If there is currently 300 * memory in the buffer, we move it down size bytes to make 301 * room for the valid pages that cluster_callback will insert. 302 * We do this now so we don't have to do it at interrupt time 303 * in the callback routine. 304 */ 305 if (tbp->b_bufsize != 0) { 306 caddr_t bdata = (char *)tbp->b_data; 307 308 if (tbp->b_bufsize + size > MAXBSIZE) 309 panic("cluster_rbuild: too much memory"); 310 if (tbp->b_bufsize > size) { 311 /* 312 * XXX if the source and destination regions 313 * overlap we have to copy backward to avoid 314 * clobbering any valid pages (i.e. pagemove 315 * implementations typically can't handle 316 * overlap). 317 */ 318 bdata += tbp->b_bufsize; 319 while (bdata > (char *)tbp->b_data) { 320 bdata -= CLBYTES; 321 pagemove(bdata, bdata + size, CLBYTES); 322 } 323 } else 324 pagemove(bdata, bdata + size, tbp->b_bufsize); 325 } 326 tbp->b_blkno = bn; 327 tbp->b_flags |= flags | B_READ | B_ASYNC; 328 ++b_save->bs_nchildren; 329 b_save->bs_children[i - 1] = tbp; 330 } 331 return(bp); 332 } 333 334 /* 335 * Either get a new buffer or grow the existing one. 336 */ 337 struct buf * 338 cluster_newbuf(vp, bp, flags, blkno, lblkno, size, run) 339 struct vnode *vp; 340 struct buf *bp; 341 long flags; 342 daddr_t blkno; 343 daddr_t lblkno; 344 long size; 345 int run; 346 { 347 if (!bp) { 348 bp = getblk(vp, lblkno, size, 0, 0); 349 if (bp->b_flags & (B_DONE | B_DELWRI)) { 350 bp->b_blkno = blkno; 351 return(bp); 352 } 353 } 354 allocbuf(bp, run * size); 355 bp->b_blkno = blkno; 356 bp->b_iodone = cluster_callback; 357 bp->b_flags |= flags | B_CALL; 358 return(bp); 359 } 360 361 /* 362 * Cleanup after a clustered read or write. 363 * This is complicated by the fact that any of the buffers might have 364 * extra memory (if there were no empty buffer headers at allocbuf time) 365 * that we will need to shift around. 366 */ 367 void 368 cluster_callback(bp) 369 struct buf *bp; 370 { 371 struct cluster_save *b_save; 372 struct buf **bpp, *tbp; 373 long bsize; 374 caddr_t cp; 375 int error = 0; 376 377 /* 378 * Must propogate errors to all the components. 379 */ 380 if (bp->b_flags & B_ERROR) 381 error = bp->b_error; 382 383 b_save = (struct cluster_save *)(bp->b_saveaddr); 384 bp->b_saveaddr = b_save->bs_saveaddr; 385 386 bsize = b_save->bs_bufsize; 387 cp = (char *)bp->b_data + bsize; 388 /* 389 * Move memory from the large cluster buffer into the component 390 * buffers and mark IO as done on these. 391 */ 392 for (bpp = b_save->bs_children; b_save->bs_nchildren--; ++bpp) { 393 tbp = *bpp; 394 pagemove(cp, tbp->b_data, bsize); 395 tbp->b_bufsize += bsize; 396 tbp->b_bcount = bsize; 397 if (error) { 398 tbp->b_flags |= B_ERROR; 399 tbp->b_error = error; 400 } 401 biodone(tbp); 402 bp->b_bufsize -= bsize; 403 cp += bsize; 404 } 405 /* 406 * If there was excess memory in the cluster buffer, 407 * slide it up adjacent to the remaining valid data. 408 */ 409 if (bp->b_bufsize != bsize) { 410 if (bp->b_bufsize < bsize) 411 panic("cluster_callback: too little memory"); 412 pagemove(cp, (char *)bp->b_data + bsize, bp->b_bufsize - bsize); 413 } 414 bp->b_bcount = bsize; 415 bp->b_iodone = NULL; 416 free(b_save, M_SEGMENT); 417 if (bp->b_flags & B_ASYNC) 418 brelse(bp); 419 else { 420 bp->b_flags &= ~B_WANTED; 421 wakeup((caddr_t)bp); 422 } 423 } 424 425 /* 426 * Do clustered write for FFS. 427 * 428 * Three cases: 429 * 1. Write is not sequential (write asynchronously) 430 * Write is sequential: 431 * 2. beginning of cluster - begin cluster 432 * 3. middle of a cluster - add to cluster 433 * 4. end of a cluster - asynchronously write cluster 434 */ 435 void 436 cluster_write(bp, filesize) 437 struct buf *bp; 438 u_quad_t filesize; 439 { 440 struct vnode *vp; 441 daddr_t lbn; 442 int maxclen, cursize; 443 444 vp = bp->b_vp; 445 lbn = bp->b_lblkno; 446 447 /* Initialize vnode to beginning of file. */ 448 if (lbn == 0) 449 vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; 450 451 if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 || 452 (bp->b_blkno != vp->v_lasta + btodb(bp->b_bcount))) { 453 maxclen = MAXBSIZE / vp->v_mount->mnt_stat.f_iosize - 1; 454 if (vp->v_clen != 0) { 455 /* 456 * Next block is not sequential. 457 * 458 * If we are not writing at end of file, the process 459 * seeked to another point in the file since its 460 * last write, or we have reached our maximum 461 * cluster size, then push the previous cluster. 462 * Otherwise try reallocating to make it sequential. 463 */ 464 cursize = vp->v_lastw - vp->v_cstart + 1; 465 if ((lbn + 1) * bp->b_bcount != filesize || 466 lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) { 467 cluster_wbuild(vp, NULL, bp->b_bcount, 468 vp->v_cstart, cursize, lbn); 469 } else { 470 struct buf **bpp, **endbp; 471 struct cluster_save *buflist; 472 473 buflist = cluster_collectbufs(vp, bp); 474 endbp = &buflist->bs_children 475 [buflist->bs_nchildren - 1]; 476 if (VOP_REALLOCBLKS(vp, buflist)) { 477 /* 478 * Failed, push the previous cluster. 479 */ 480 for (bpp = buflist->bs_children; 481 bpp < endbp; bpp++) 482 brelse(*bpp); 483 free(buflist, M_SEGMENT); 484 cluster_wbuild(vp, NULL, bp->b_bcount, 485 vp->v_cstart, cursize, lbn); 486 } else { 487 /* 488 * Succeeded, keep building cluster. 489 */ 490 for (bpp = buflist->bs_children; 491 bpp <= endbp; bpp++) 492 bdwrite(*bpp); 493 free(buflist, M_SEGMENT); 494 vp->v_lastw = lbn; 495 vp->v_lasta = bp->b_blkno; 496 return; 497 } 498 } 499 } 500 /* 501 * Consider beginning a cluster. 502 * If at end of file, make cluster as large as possible, 503 * otherwise find size of existing cluster. 504 */ 505 if ((lbn + 1) * bp->b_bcount != filesize && 506 (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen) || 507 bp->b_blkno == -1)) { 508 bawrite(bp); 509 vp->v_clen = 0; 510 vp->v_lasta = bp->b_blkno; 511 vp->v_cstart = lbn + 1; 512 vp->v_lastw = lbn; 513 return; 514 } 515 vp->v_clen = maxclen; 516 if (maxclen == 0) { /* I/O not contiguous */ 517 vp->v_cstart = lbn + 1; 518 bawrite(bp); 519 } else { /* Wait for rest of cluster */ 520 vp->v_cstart = lbn; 521 bdwrite(bp); 522 } 523 } else if (lbn == vp->v_cstart + vp->v_clen) { 524 /* 525 * At end of cluster, write it out. 526 */ 527 cluster_wbuild(vp, bp, bp->b_bcount, vp->v_cstart, 528 vp->v_clen + 1, lbn); 529 vp->v_clen = 0; 530 vp->v_cstart = lbn + 1; 531 } else 532 /* 533 * In the middle of a cluster, so just delay the 534 * I/O for now. 535 */ 536 bdwrite(bp); 537 vp->v_lastw = lbn; 538 vp->v_lasta = bp->b_blkno; 539 } 540 541 542 /* 543 * This is an awful lot like cluster_rbuild...wish they could be combined. 544 * The last lbn argument is the current block on which I/O is being 545 * performed. Check to see that it doesn't fall in the middle of 546 * the current block (if last_bp == NULL). 547 */ 548 void 549 cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn) 550 struct vnode *vp; 551 struct buf *last_bp; 552 long size; 553 daddr_t start_lbn; 554 int len; 555 daddr_t lbn; 556 { 557 struct cluster_save *b_save; 558 struct buf *bp, *tbp; 559 caddr_t cp; 560 int i, s; 561 562 #ifdef DIAGNOSTIC 563 if (size != vp->v_mount->mnt_stat.f_iosize) 564 panic("cluster_wbuild: size %d != filesize %d\n", 565 size, vp->v_mount->mnt_stat.f_iosize); 566 #endif 567 redo: 568 while ((!incore(vp, start_lbn) || start_lbn == lbn) && len) { 569 ++start_lbn; 570 --len; 571 } 572 573 /* Get more memory for current buffer */ 574 if (len <= 1) { 575 if (last_bp) { 576 bawrite(last_bp); 577 } else if (len) { 578 bp = getblk(vp, start_lbn, size, 0, 0); 579 bawrite(bp); 580 } 581 return; 582 } 583 584 bp = getblk(vp, start_lbn, size, 0, 0); 585 if (!(bp->b_flags & B_DELWRI)) { 586 ++start_lbn; 587 --len; 588 brelse(bp); 589 goto redo; 590 } 591 592 /* 593 * Extra memory in the buffer, punt on this buffer. 594 * XXX we could handle this in most cases, but we would have to 595 * push the extra memory down to after our max possible cluster 596 * size and then potentially pull it back up if the cluster was 597 * terminated prematurely--too much hassle. 598 */ 599 if (bp->b_bcount != bp->b_bufsize) { 600 ++start_lbn; 601 --len; 602 bawrite(bp); 603 goto redo; 604 } 605 606 --len; 607 b_save = malloc(sizeof(struct buf *) * len + sizeof(struct cluster_save), 608 M_SEGMENT, M_WAITOK); 609 b_save->bs_bcount = bp->b_bcount; 610 b_save->bs_bufsize = bp->b_bufsize; 611 b_save->bs_nchildren = 0; 612 b_save->bs_children = (struct buf **)(b_save + 1); 613 b_save->bs_saveaddr = bp->b_saveaddr; 614 bp->b_saveaddr = (caddr_t) b_save; 615 616 bp->b_flags |= B_CALL; 617 bp->b_iodone = cluster_callback; 618 cp = (char *)bp->b_data + size; 619 for (++start_lbn, i = 0; i < len; ++i, ++start_lbn) { 620 /* 621 * Block is not in core or the non-sequential block 622 * ending our cluster was part of the cluster (in which 623 * case we don't want to write it twice). 624 */ 625 if (!incore(vp, start_lbn) || 626 last_bp == NULL && start_lbn == lbn) 627 break; 628 629 /* 630 * Get the desired block buffer (unless it is the final 631 * sequential block whose buffer was passed in explictly 632 * as last_bp). 633 */ 634 if (last_bp == NULL || start_lbn != lbn) { 635 tbp = getblk(vp, start_lbn, size, 0, 0); 636 if (!(tbp->b_flags & B_DELWRI)) { 637 brelse(tbp); 638 break; 639 } 640 } else 641 tbp = last_bp; 642 643 ++b_save->bs_nchildren; 644 645 /* Move memory from children to parent */ 646 if (tbp->b_blkno != (bp->b_blkno + btodb(bp->b_bufsize))) { 647 printf("Clustered Block: %d addr %x bufsize: %d\n", 648 bp->b_lblkno, bp->b_blkno, bp->b_bufsize); 649 printf("Child Block: %d addr: %x\n", tbp->b_lblkno, 650 tbp->b_blkno); 651 panic("Clustered write to wrong blocks"); 652 } 653 654 pagemove(tbp->b_data, cp, size); 655 bp->b_bcount += size; 656 bp->b_bufsize += size; 657 658 tbp->b_bufsize -= size; 659 tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 660 tbp->b_flags |= (B_ASYNC | B_AGE); 661 s = splbio(); 662 reassignbuf(tbp, tbp->b_vp); /* put on clean list */ 663 ++tbp->b_vp->v_numoutput; 664 splx(s); 665 b_save->bs_children[i] = tbp; 666 667 cp += size; 668 } 669 670 if (i == 0) { 671 /* None to cluster */ 672 bp->b_saveaddr = b_save->bs_saveaddr; 673 bp->b_flags &= ~B_CALL; 674 bp->b_iodone = NULL; 675 free(b_save, M_SEGMENT); 676 } 677 bawrite(bp); 678 if (i < len) { 679 len -= i + 1; 680 start_lbn += 1; 681 goto redo; 682 } 683 } 684 685 /* 686 * Collect together all the buffers in a cluster. 687 * Plus add one additional buffer. 688 */ 689 struct cluster_save * 690 cluster_collectbufs(vp, last_bp) 691 struct vnode *vp; 692 struct buf *last_bp; 693 { 694 struct cluster_save *buflist; 695 daddr_t lbn; 696 int i, len; 697 698 len = vp->v_lastw - vp->v_cstart + 1; 699 buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist), 700 M_SEGMENT, M_WAITOK); 701 buflist->bs_nchildren = 0; 702 buflist->bs_children = (struct buf **)(buflist + 1); 703 for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) 704 (void)bread(vp, lbn, last_bp->b_bcount, NOCRED, 705 &buflist->bs_children[i]); 706 buflist->bs_children[i] = last_bp; 707 buflist->bs_nchildren = i + 1; 708 return (buflist); 709 } 710