1 /*- 2 * Copyright (c) 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)vfs_cluster.c 8.10 (Berkeley) 03/28/95 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/buf.h> 13 #include <sys/vnode.h> 14 #include <sys/mount.h> 15 #include <sys/trace.h> 16 #include <sys/malloc.h> 17 #include <sys/resourcevar.h> 18 #include <libkern/libkern.h> 19 20 /* 21 * Local declarations 22 */ 23 struct buf *cluster_newbuf __P((struct vnode *, struct buf *, long, daddr_t, 24 daddr_t, long, int)); 25 struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *, 26 daddr_t, daddr_t, long, int, long)); 27 void cluster_wbuild __P((struct vnode *, struct buf *, long, 28 daddr_t, int, daddr_t)); 29 struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *)); 30 31 #ifdef DIAGNOSTIC 32 /* 33 * Set to 1 if reads of block zero should cause readahead to be done. 34 * Set to 0 treats a read of block zero as a non-sequential read. 35 * 36 * Setting to one assumes that most reads of block zero of files are due to 37 * sequential passes over the files (e.g. cat, sum) where additional blocks 38 * will soon be needed. Setting to zero assumes that the majority are 39 * surgical strikes to get particular info (e.g. size, file) where readahead 40 * blocks will not be used and, in fact, push out other potentially useful 41 * blocks from the cache. The former seems intuitive, but some quick tests 42 * showed that the latter performed better from a system-wide point of view. 43 */ 44 int doclusterraz = 0; 45 #define ISSEQREAD(vp, blk) \ 46 (((blk) != 0 || doclusterraz) && \ 47 ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr)) 48 #else 49 #define ISSEQREAD(vp, blk) \ 50 ((blk) != 0 && ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr)) 51 #endif 52 53 /* 54 * This replaces bread. If this is a bread at the beginning of a file and 55 * lastr is 0, we assume this is the first read and we'll read up to two 56 * blocks if they are sequential. After that, we'll do regular read ahead 57 * in clustered chunks. 58 * 59 * There are 4 or 5 cases depending on how you count: 60 * Desired block is in the cache: 61 * 1 Not sequential access (0 I/Os). 62 * 2 Access is sequential, do read-ahead (1 ASYNC). 63 * Desired block is not in cache: 64 * 3 Not sequential access (1 SYNC). 65 * 4 Sequential access, next block is contiguous (1 SYNC). 66 * 5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC) 67 * 68 * There are potentially two buffers that require I/O. 69 * bp is the block requested. 70 * rbp is the read-ahead block. 71 * If either is NULL, then you don't have to do the I/O. 72 */ 73 cluster_read(vp, filesize, lblkno, size, cred, bpp) 74 struct vnode *vp; 75 u_quad_t filesize; 76 daddr_t lblkno; 77 long size; 78 struct ucred *cred; 79 struct buf **bpp; 80 { 81 struct buf *bp, *rbp; 82 daddr_t blkno, ioblkno; 83 long flags; 84 int error, num_ra, alreadyincore; 85 86 #ifdef DIAGNOSTIC 87 if (size == 0) 88 panic("cluster_read: size = 0"); 89 #endif 90 91 error = 0; 92 flags = B_READ; 93 *bpp = bp = getblk(vp, lblkno, size, 0, 0); 94 if (bp->b_flags & B_CACHE) { 95 /* 96 * Desired block is in cache; do any readahead ASYNC. 97 * Case 1, 2. 98 */ 99 trace(TR_BREADHIT, pack(vp, size), lblkno); 100 flags |= B_ASYNC; 101 ioblkno = lblkno + (vp->v_ralen ? vp->v_ralen : 1); 102 alreadyincore = incore(vp, ioblkno) != NULL; 103 bp = NULL; 104 } else { 105 /* Block wasn't in cache, case 3, 4, 5. */ 106 trace(TR_BREADMISS, pack(vp, size), lblkno); 107 bp->b_flags |= B_READ; 108 ioblkno = lblkno; 109 alreadyincore = 0; 110 curproc->p_stats->p_ru.ru_inblock++; /* XXX */ 111 } 112 /* 113 * XXX 114 * Replace 1 with a window size based on some permutation of 115 * maxcontig and rot_delay. This will let you figure out how 116 * many blocks you should read-ahead (case 2, 4, 5). 117 * 118 * If the access isn't sequential, reset the window to 1. 119 * Note that a read to the same block is considered sequential. 120 * This catches the case where the file is being read sequentially, 121 * but at smaller than the filesystem block size. 122 */ 123 rbp = NULL; 124 if (!ISSEQREAD(vp, lblkno)) { 125 vp->v_ralen = 0; 126 vp->v_maxra = lblkno; 127 } else if ((ioblkno + 1) * size <= filesize && !alreadyincore && 128 !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra)) && 129 blkno != -1) { 130 /* 131 * Reading sequentially, and the next block is not in the 132 * cache. We are going to try reading ahead. 133 */ 134 if (num_ra) { 135 /* 136 * If our desired readahead block had been read 137 * in a previous readahead but is no longer in 138 * core, then we may be reading ahead too far 139 * or are not using our readahead very rapidly. 140 * In this case we scale back the window. 141 */ 142 if (!alreadyincore && ioblkno <= vp->v_maxra) 143 vp->v_ralen = max(vp->v_ralen >> 1, 1); 144 /* 145 * There are more sequential blocks than our current 146 * window allows, scale up. Ideally we want to get 147 * in sync with the filesystem maxcontig value. 148 */ 149 else if (num_ra > vp->v_ralen && lblkno != vp->v_lastr) 150 vp->v_ralen = vp->v_ralen ? 151 min(num_ra, vp->v_ralen << 1) : 1; 152 153 if (num_ra > vp->v_ralen) 154 num_ra = vp->v_ralen; 155 } 156 157 if (num_ra) /* case 2, 4 */ 158 rbp = cluster_rbuild(vp, filesize, 159 bp, ioblkno, blkno, size, num_ra, flags); 160 else if (ioblkno == lblkno) { 161 bp->b_blkno = blkno; 162 /* Case 5: check how many blocks to read ahead */ 163 ++ioblkno; 164 if ((ioblkno + 1) * size > filesize || 165 incore(vp, ioblkno) || (error = VOP_BMAP(vp, 166 ioblkno, NULL, &blkno, &num_ra)) || blkno == -1) 167 goto skip_readahead; 168 /* 169 * Adjust readahead as above. 170 * Don't check alreadyincore, we know it is 0 from 171 * the previous conditional. 172 */ 173 if (num_ra) { 174 if (ioblkno <= vp->v_maxra) 175 vp->v_ralen = max(vp->v_ralen >> 1, 1); 176 else if (num_ra > vp->v_ralen && 177 lblkno != vp->v_lastr) 178 vp->v_ralen = vp->v_ralen ? 179 min(num_ra,vp->v_ralen<<1) : 1; 180 if (num_ra > vp->v_ralen) 181 num_ra = vp->v_ralen; 182 } 183 flags |= B_ASYNC; 184 if (num_ra) 185 rbp = cluster_rbuild(vp, filesize, 186 NULL, ioblkno, blkno, size, num_ra, flags); 187 else { 188 rbp = getblk(vp, ioblkno, size, 0, 0); 189 rbp->b_flags |= flags; 190 rbp->b_blkno = blkno; 191 } 192 } else { 193 /* case 2; read ahead single block */ 194 rbp = getblk(vp, ioblkno, size, 0, 0); 195 rbp->b_flags |= flags; 196 rbp->b_blkno = blkno; 197 } 198 199 if (rbp == bp) /* case 4 */ 200 rbp = NULL; 201 else if (rbp) { /* case 2, 5 */ 202 trace(TR_BREADMISSRA, 203 pack(vp, (num_ra + 1) * size), ioblkno); 204 curproc->p_stats->p_ru.ru_inblock++; /* XXX */ 205 } 206 } 207 208 /* XXX Kirk, do we need to make sure the bp has creds? */ 209 skip_readahead: 210 if (bp) 211 if (bp->b_flags & (B_DONE | B_DELWRI)) 212 panic("cluster_read: DONE bp"); 213 else 214 error = VOP_STRATEGY(bp); 215 216 if (rbp) 217 if (error || rbp->b_flags & (B_DONE | B_DELWRI)) { 218 rbp->b_flags &= ~(B_ASYNC | B_READ); 219 brelse(rbp); 220 } else 221 (void) VOP_STRATEGY(rbp); 222 223 /* 224 * Recalculate our maximum readahead 225 */ 226 if (rbp == NULL) 227 rbp = bp; 228 if (rbp) 229 vp->v_maxra = rbp->b_lblkno + (rbp->b_bufsize / size) - 1; 230 231 if (bp) 232 return(biowait(bp)); 233 return(error); 234 } 235 236 /* 237 * If blocks are contiguous on disk, use this to provide clustered 238 * read ahead. We will read as many blocks as possible sequentially 239 * and then parcel them up into logical blocks in the buffer hash table. 240 */ 241 struct buf * 242 cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags) 243 struct vnode *vp; 244 u_quad_t filesize; 245 struct buf *bp; 246 daddr_t lbn; 247 daddr_t blkno; 248 long size; 249 int run; 250 long flags; 251 { 252 struct cluster_save *b_save; 253 struct buf *tbp; 254 daddr_t bn; 255 int i, inc; 256 257 #ifdef DIAGNOSTIC 258 if (size != vp->v_mount->mnt_stat.f_iosize) 259 panic("cluster_rbuild: size %d != filesize %d\n", 260 size, vp->v_mount->mnt_stat.f_iosize); 261 #endif 262 if (size * (lbn + run + 1) > filesize) 263 --run; 264 if (run == 0) { 265 if (!bp) { 266 bp = getblk(vp, lbn, size, 0, 0); 267 bp->b_blkno = blkno; 268 bp->b_flags |= flags; 269 } 270 return(bp); 271 } 272 273 bp = cluster_newbuf(vp, bp, flags, blkno, lbn, size, run + 1); 274 if (bp->b_flags & (B_DONE | B_DELWRI)) 275 return (bp); 276 277 b_save = malloc(sizeof(struct buf *) * run + sizeof(struct cluster_save), 278 M_SEGMENT, M_WAITOK); 279 b_save->bs_bufsize = b_save->bs_bcount = size; 280 b_save->bs_nchildren = 0; 281 b_save->bs_children = (struct buf **)(b_save + 1); 282 b_save->bs_saveaddr = bp->b_saveaddr; 283 bp->b_saveaddr = (caddr_t) b_save; 284 285 inc = btodb(size); 286 for (bn = blkno + inc, i = 1; i <= run; ++i, bn += inc) { 287 /* 288 * A component of the cluster is already in core, 289 * terminate the cluster early. 290 */ 291 if (incore(vp, lbn + i)) 292 break; 293 tbp = getblk(vp, lbn + i, 0, 0, 0); 294 /* 295 * getblk may return some memory in the buffer if there were 296 * no empty buffers to shed it to. If there is currently 297 * memory in the buffer, we move it down size bytes to make 298 * room for the valid pages that cluster_callback will insert. 299 * We do this now so we don't have to do it at interrupt time 300 * in the callback routine. 301 */ 302 if (tbp->b_bufsize != 0) { 303 caddr_t bdata = (char *)tbp->b_data; 304 305 /* 306 * No room in the buffer to add another page, 307 * terminate the cluster early. 308 */ 309 if (tbp->b_bufsize + size > MAXBSIZE) { 310 #ifdef DIAGNOSTIC 311 if (tbp->b_bufsize != MAXBSIZE) 312 panic("cluster_rbuild: too much memory"); 313 #endif 314 brelse(tbp); 315 break; 316 } 317 if (tbp->b_bufsize > size) { 318 /* 319 * XXX if the source and destination regions 320 * overlap we have to copy backward to avoid 321 * clobbering any valid pages (i.e. pagemove 322 * implementations typically can't handle 323 * overlap). 324 */ 325 bdata += tbp->b_bufsize; 326 while (bdata > (char *)tbp->b_data) { 327 bdata -= CLBYTES; 328 pagemove(bdata, bdata + size, CLBYTES); 329 } 330 } else 331 pagemove(bdata, bdata + size, tbp->b_bufsize); 332 } 333 tbp->b_blkno = bn; 334 tbp->b_flags |= flags | B_READ | B_ASYNC; 335 ++b_save->bs_nchildren; 336 b_save->bs_children[i - 1] = tbp; 337 } 338 /* 339 * The cluster may have been terminated early, adjust the cluster 340 * buffer size accordingly. If no cluster could be formed, 341 * deallocate the cluster save info. 342 */ 343 if (i <= run) { 344 if (i == 1) { 345 bp->b_saveaddr = b_save->bs_saveaddr; 346 bp->b_flags &= ~B_CALL; 347 bp->b_iodone = NULL; 348 free(b_save, M_SEGMENT); 349 } 350 allocbuf(bp, size * i); 351 } 352 return(bp); 353 } 354 355 /* 356 * Either get a new buffer or grow the existing one. 357 */ 358 struct buf * 359 cluster_newbuf(vp, bp, flags, blkno, lblkno, size, run) 360 struct vnode *vp; 361 struct buf *bp; 362 long flags; 363 daddr_t blkno; 364 daddr_t lblkno; 365 long size; 366 int run; 367 { 368 if (!bp) { 369 bp = getblk(vp, lblkno, size, 0, 0); 370 if (bp->b_flags & (B_DONE | B_DELWRI)) { 371 bp->b_blkno = blkno; 372 return(bp); 373 } 374 } 375 allocbuf(bp, run * size); 376 bp->b_blkno = blkno; 377 bp->b_iodone = cluster_callback; 378 bp->b_flags |= flags | B_CALL; 379 return(bp); 380 } 381 382 /* 383 * Cleanup after a clustered read or write. 384 * This is complicated by the fact that any of the buffers might have 385 * extra memory (if there were no empty buffer headers at allocbuf time) 386 * that we will need to shift around. 387 */ 388 void 389 cluster_callback(bp) 390 struct buf *bp; 391 { 392 struct cluster_save *b_save; 393 struct buf **bpp, *tbp; 394 long bsize; 395 caddr_t cp; 396 int error = 0; 397 398 /* 399 * Must propogate errors to all the components. 400 */ 401 if (bp->b_flags & B_ERROR) 402 error = bp->b_error; 403 404 b_save = (struct cluster_save *)(bp->b_saveaddr); 405 bp->b_saveaddr = b_save->bs_saveaddr; 406 407 bsize = b_save->bs_bufsize; 408 cp = (char *)bp->b_data + bsize; 409 /* 410 * Move memory from the large cluster buffer into the component 411 * buffers and mark IO as done on these. 412 */ 413 for (bpp = b_save->bs_children; b_save->bs_nchildren--; ++bpp) { 414 tbp = *bpp; 415 pagemove(cp, tbp->b_data, bsize); 416 tbp->b_bufsize += bsize; 417 tbp->b_bcount = bsize; 418 if (error) { 419 tbp->b_flags |= B_ERROR; 420 tbp->b_error = error; 421 } 422 biodone(tbp); 423 bp->b_bufsize -= bsize; 424 cp += bsize; 425 } 426 /* 427 * If there was excess memory in the cluster buffer, 428 * slide it up adjacent to the remaining valid data. 429 */ 430 if (bp->b_bufsize != bsize) { 431 if (bp->b_bufsize < bsize) 432 panic("cluster_callback: too little memory"); 433 pagemove(cp, (char *)bp->b_data + bsize, bp->b_bufsize - bsize); 434 } 435 bp->b_bcount = bsize; 436 bp->b_iodone = NULL; 437 free(b_save, M_SEGMENT); 438 if (bp->b_flags & B_ASYNC) 439 brelse(bp); 440 else { 441 bp->b_flags &= ~B_WANTED; 442 wakeup((caddr_t)bp); 443 } 444 } 445 446 /* 447 * Do clustered write for FFS. 448 * 449 * Three cases: 450 * 1. Write is not sequential (write asynchronously) 451 * Write is sequential: 452 * 2. beginning of cluster - begin cluster 453 * 3. middle of a cluster - add to cluster 454 * 4. end of a cluster - asynchronously write cluster 455 */ 456 void 457 cluster_write(bp, filesize) 458 struct buf *bp; 459 u_quad_t filesize; 460 { 461 struct vnode *vp; 462 daddr_t lbn; 463 int maxclen, cursize; 464 465 vp = bp->b_vp; 466 lbn = bp->b_lblkno; 467 468 /* Initialize vnode to beginning of file. */ 469 if (lbn == 0) 470 vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; 471 472 if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 || 473 (bp->b_blkno != vp->v_lasta + btodb(bp->b_bcount))) { 474 maxclen = MAXBSIZE / vp->v_mount->mnt_stat.f_iosize - 1; 475 if (vp->v_clen != 0) { 476 /* 477 * Next block is not sequential. 478 * 479 * If we are not writing at end of file, the process 480 * seeked to another point in the file since its 481 * last write, or we have reached our maximum 482 * cluster size, then push the previous cluster. 483 * Otherwise try reallocating to make it sequential. 484 */ 485 cursize = vp->v_lastw - vp->v_cstart + 1; 486 if ((lbn + 1) * bp->b_bcount != filesize || 487 lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) { 488 cluster_wbuild(vp, NULL, bp->b_bcount, 489 vp->v_cstart, cursize, lbn); 490 } else { 491 struct buf **bpp, **endbp; 492 struct cluster_save *buflist; 493 494 buflist = cluster_collectbufs(vp, bp); 495 endbp = &buflist->bs_children 496 [buflist->bs_nchildren - 1]; 497 if (VOP_REALLOCBLKS(vp, buflist)) { 498 /* 499 * Failed, push the previous cluster. 500 */ 501 for (bpp = buflist->bs_children; 502 bpp < endbp; bpp++) 503 brelse(*bpp); 504 free(buflist, M_SEGMENT); 505 cluster_wbuild(vp, NULL, bp->b_bcount, 506 vp->v_cstart, cursize, lbn); 507 } else { 508 /* 509 * Succeeded, keep building cluster. 510 */ 511 for (bpp = buflist->bs_children; 512 bpp <= endbp; bpp++) 513 bdwrite(*bpp); 514 free(buflist, M_SEGMENT); 515 vp->v_lastw = lbn; 516 vp->v_lasta = bp->b_blkno; 517 return; 518 } 519 } 520 } 521 /* 522 * Consider beginning a cluster. 523 * If at end of file, make cluster as large as possible, 524 * otherwise find size of existing cluster. 525 */ 526 if ((lbn + 1) * bp->b_bcount != filesize && 527 (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen) || 528 bp->b_blkno == -1)) { 529 bawrite(bp); 530 vp->v_clen = 0; 531 vp->v_lasta = bp->b_blkno; 532 vp->v_cstart = lbn + 1; 533 vp->v_lastw = lbn; 534 return; 535 } 536 vp->v_clen = maxclen; 537 if (maxclen == 0) { /* I/O not contiguous */ 538 vp->v_cstart = lbn + 1; 539 bawrite(bp); 540 } else { /* Wait for rest of cluster */ 541 vp->v_cstart = lbn; 542 bdwrite(bp); 543 } 544 } else if (lbn == vp->v_cstart + vp->v_clen) { 545 /* 546 * At end of cluster, write it out. 547 */ 548 cluster_wbuild(vp, bp, bp->b_bcount, vp->v_cstart, 549 vp->v_clen + 1, lbn); 550 vp->v_clen = 0; 551 vp->v_cstart = lbn + 1; 552 } else 553 /* 554 * In the middle of a cluster, so just delay the 555 * I/O for now. 556 */ 557 bdwrite(bp); 558 vp->v_lastw = lbn; 559 vp->v_lasta = bp->b_blkno; 560 } 561 562 563 /* 564 * This is an awful lot like cluster_rbuild...wish they could be combined. 565 * The last lbn argument is the current block on which I/O is being 566 * performed. Check to see that it doesn't fall in the middle of 567 * the current block (if last_bp == NULL). 568 */ 569 void 570 cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn) 571 struct vnode *vp; 572 struct buf *last_bp; 573 long size; 574 daddr_t start_lbn; 575 int len; 576 daddr_t lbn; 577 { 578 struct cluster_save *b_save; 579 struct buf *bp, *tbp; 580 caddr_t cp; 581 int i, s; 582 583 #ifdef DIAGNOSTIC 584 if (size != vp->v_mount->mnt_stat.f_iosize) 585 panic("cluster_wbuild: size %d != filesize %d\n", 586 size, vp->v_mount->mnt_stat.f_iosize); 587 #endif 588 redo: 589 while ((!incore(vp, start_lbn) || start_lbn == lbn) && len) { 590 ++start_lbn; 591 --len; 592 } 593 594 /* Get more memory for current buffer */ 595 if (len <= 1) { 596 if (last_bp) { 597 bawrite(last_bp); 598 } else if (len) { 599 bp = getblk(vp, start_lbn, size, 0, 0); 600 bawrite(bp); 601 } 602 return; 603 } 604 605 bp = getblk(vp, start_lbn, size, 0, 0); 606 if (!(bp->b_flags & B_DELWRI)) { 607 ++start_lbn; 608 --len; 609 brelse(bp); 610 goto redo; 611 } 612 613 /* 614 * Extra memory in the buffer, punt on this buffer. 615 * XXX we could handle this in most cases, but we would have to 616 * push the extra memory down to after our max possible cluster 617 * size and then potentially pull it back up if the cluster was 618 * terminated prematurely--too much hassle. 619 */ 620 if (bp->b_bcount != bp->b_bufsize) { 621 ++start_lbn; 622 --len; 623 bawrite(bp); 624 goto redo; 625 } 626 627 --len; 628 b_save = malloc(sizeof(struct buf *) * len + sizeof(struct cluster_save), 629 M_SEGMENT, M_WAITOK); 630 b_save->bs_bcount = bp->b_bcount; 631 b_save->bs_bufsize = bp->b_bufsize; 632 b_save->bs_nchildren = 0; 633 b_save->bs_children = (struct buf **)(b_save + 1); 634 b_save->bs_saveaddr = bp->b_saveaddr; 635 bp->b_saveaddr = (caddr_t) b_save; 636 637 bp->b_flags |= B_CALL; 638 bp->b_iodone = cluster_callback; 639 cp = (char *)bp->b_data + size; 640 for (++start_lbn, i = 0; i < len; ++i, ++start_lbn) { 641 /* 642 * Block is not in core or the non-sequential block 643 * ending our cluster was part of the cluster (in which 644 * case we don't want to write it twice). 645 */ 646 if (!incore(vp, start_lbn) || 647 last_bp == NULL && start_lbn == lbn) 648 break; 649 650 /* 651 * Get the desired block buffer (unless it is the final 652 * sequential block whose buffer was passed in explictly 653 * as last_bp). 654 */ 655 if (last_bp == NULL || start_lbn != lbn) { 656 tbp = getblk(vp, start_lbn, size, 0, 0); 657 if (!(tbp->b_flags & B_DELWRI)) { 658 brelse(tbp); 659 break; 660 } 661 } else 662 tbp = last_bp; 663 664 ++b_save->bs_nchildren; 665 666 /* Move memory from children to parent */ 667 if (tbp->b_blkno != (bp->b_blkno + btodb(bp->b_bufsize))) { 668 printf("Clustered Block: %d addr %x bufsize: %d\n", 669 bp->b_lblkno, bp->b_blkno, bp->b_bufsize); 670 printf("Child Block: %d addr: %x\n", tbp->b_lblkno, 671 tbp->b_blkno); 672 panic("Clustered write to wrong blocks"); 673 } 674 675 pagemove(tbp->b_data, cp, size); 676 bp->b_bcount += size; 677 bp->b_bufsize += size; 678 679 tbp->b_bufsize -= size; 680 tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 681 tbp->b_flags |= (B_ASYNC | B_AGE); 682 s = splbio(); 683 reassignbuf(tbp, tbp->b_vp); /* put on clean list */ 684 ++tbp->b_vp->v_numoutput; 685 splx(s); 686 b_save->bs_children[i] = tbp; 687 688 cp += size; 689 } 690 691 if (i == 0) { 692 /* None to cluster */ 693 bp->b_saveaddr = b_save->bs_saveaddr; 694 bp->b_flags &= ~B_CALL; 695 bp->b_iodone = NULL; 696 free(b_save, M_SEGMENT); 697 } 698 bawrite(bp); 699 if (i < len) { 700 len -= i + 1; 701 start_lbn += 1; 702 goto redo; 703 } 704 } 705 706 /* 707 * Collect together all the buffers in a cluster. 708 * Plus add one additional buffer. 709 */ 710 struct cluster_save * 711 cluster_collectbufs(vp, last_bp) 712 struct vnode *vp; 713 struct buf *last_bp; 714 { 715 struct cluster_save *buflist; 716 daddr_t lbn; 717 int i, len; 718 719 len = vp->v_lastw - vp->v_cstart + 1; 720 buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist), 721 M_SEGMENT, M_WAITOK); 722 buflist->bs_nchildren = 0; 723 buflist->bs_children = (struct buf **)(buflist + 1); 724 for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) 725 (void)bread(vp, lbn, last_bp->b_bcount, NOCRED, 726 &buflist->bs_children[i]); 727 buflist->bs_children[i] = last_bp; 728 buflist->bs_nchildren = i + 1; 729 return (buflist); 730 } 731