1 /*- 2 * Copyright (c) 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)vfs_cluster.c 8.8 (Berkeley) 07/28/94 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/buf.h> 13 #include <sys/vnode.h> 14 #include <sys/mount.h> 15 #include <sys/trace.h> 16 #include <sys/malloc.h> 17 #include <sys/resourcevar.h> 18 #include <libkern/libkern.h> 19 20 #ifdef DEBUG 21 #include <vm/vm.h> 22 #include <sys/sysctl.h> 23 int doreallocblks = 1; 24 struct ctldebug debug13 = { "doreallocblks", &doreallocblks }; 25 #else 26 /* XXX for cluster_write */ 27 #define doreallocblks 1 28 #endif 29 30 /* 31 * Local declarations 32 */ 33 struct buf *cluster_newbuf __P((struct vnode *, struct buf *, long, daddr_t, 34 daddr_t, long, int)); 35 struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *, 36 daddr_t, daddr_t, long, int, long)); 37 void cluster_wbuild __P((struct vnode *, struct buf *, long, 38 daddr_t, int, daddr_t)); 39 struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *)); 40 41 #ifdef DIAGNOSTIC 42 /* 43 * Set to 1 if reads of block zero should cause readahead to be done. 44 * Set to 0 treats a read of block zero as a non-sequential read. 45 * 46 * Setting to one assumes that most reads of block zero of files are due to 47 * sequential passes over the files (e.g. cat, sum) where additional blocks 48 * will soon be needed. Setting to zero assumes that the majority are 49 * surgical strikes to get particular info (e.g. size, file) where readahead 50 * blocks will not be used and, in fact, push out other potentially useful 51 * blocks from the cache. The former seems intuitive, but some quick tests 52 * showed that the latter performed better from a system-wide point of view. 53 */ 54 int doclusterraz = 0; 55 #define ISSEQREAD(vp, blk) \ 56 (((blk) != 0 || doclusterraz) && \ 57 ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr)) 58 #else 59 #define ISSEQREAD(vp, blk) \ 60 ((blk) != 0 && ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr)) 61 #endif 62 63 /* 64 * This replaces bread. If this is a bread at the beginning of a file and 65 * lastr is 0, we assume this is the first read and we'll read up to two 66 * blocks if they are sequential. After that, we'll do regular read ahead 67 * in clustered chunks. 68 * 69 * There are 4 or 5 cases depending on how you count: 70 * Desired block is in the cache: 71 * 1 Not sequential access (0 I/Os). 72 * 2 Access is sequential, do read-ahead (1 ASYNC). 73 * Desired block is not in cache: 74 * 3 Not sequential access (1 SYNC). 75 * 4 Sequential access, next block is contiguous (1 SYNC). 76 * 5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC) 77 * 78 * There are potentially two buffers that require I/O. 79 * bp is the block requested. 80 * rbp is the read-ahead block. 81 * If either is NULL, then you don't have to do the I/O. 82 */ 83 cluster_read(vp, filesize, lblkno, size, cred, bpp) 84 struct vnode *vp; 85 u_quad_t filesize; 86 daddr_t lblkno; 87 long size; 88 struct ucred *cred; 89 struct buf **bpp; 90 { 91 struct buf *bp, *rbp; 92 daddr_t blkno, ioblkno; 93 long flags; 94 int error, num_ra, alreadyincore; 95 96 #ifdef DIAGNOSTIC 97 if (size == 0) 98 panic("cluster_read: size = 0"); 99 #endif 100 101 error = 0; 102 flags = B_READ; 103 *bpp = bp = getblk(vp, lblkno, size, 0, 0); 104 if (bp->b_flags & B_CACHE) { 105 /* 106 * Desired block is in cache; do any readahead ASYNC. 107 * Case 1, 2. 108 */ 109 trace(TR_BREADHIT, pack(vp, size), lblkno); 110 flags |= B_ASYNC; 111 ioblkno = lblkno + (vp->v_ralen ? vp->v_ralen : 1); 112 alreadyincore = (int)incore(vp, ioblkno); 113 bp = NULL; 114 } else { 115 /* Block wasn't in cache, case 3, 4, 5. */ 116 trace(TR_BREADMISS, pack(vp, size), lblkno); 117 bp->b_flags |= B_READ; 118 ioblkno = lblkno; 119 alreadyincore = 0; 120 curproc->p_stats->p_ru.ru_inblock++; /* XXX */ 121 } 122 /* 123 * XXX 124 * Replace 1 with a window size based on some permutation of 125 * maxcontig and rot_delay. This will let you figure out how 126 * many blocks you should read-ahead (case 2, 4, 5). 127 * 128 * If the access isn't sequential, reset the window to 1. 129 * Note that a read to the same block is considered sequential. 130 * This catches the case where the file is being read sequentially, 131 * but at smaller than the filesystem block size. 132 */ 133 rbp = NULL; 134 if (!ISSEQREAD(vp, lblkno)) { 135 vp->v_ralen = 0; 136 vp->v_maxra = lblkno; 137 } else if ((ioblkno + 1) * size <= filesize && !alreadyincore && 138 !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra)) && 139 blkno != -1) { 140 /* 141 * Reading sequentially, and the next block is not in the 142 * cache. We are going to try reading ahead. 143 */ 144 if (num_ra) { 145 /* 146 * If our desired readahead block had been read 147 * in a previous readahead but is no longer in 148 * core, then we may be reading ahead too far 149 * or are not using our readahead very rapidly. 150 * In this case we scale back the window. 151 */ 152 if (!alreadyincore && ioblkno <= vp->v_maxra) 153 vp->v_ralen = max(vp->v_ralen >> 1, 1); 154 /* 155 * There are more sequential blocks than our current 156 * window allows, scale up. Ideally we want to get 157 * in sync with the filesystem maxcontig value. 158 */ 159 else if (num_ra > vp->v_ralen && lblkno != vp->v_lastr) 160 vp->v_ralen = vp->v_ralen ? 161 min(num_ra, vp->v_ralen << 1) : 1; 162 163 if (num_ra > vp->v_ralen) 164 num_ra = vp->v_ralen; 165 } 166 167 if (num_ra) /* case 2, 4 */ 168 rbp = cluster_rbuild(vp, filesize, 169 bp, ioblkno, blkno, size, num_ra, flags); 170 else if (ioblkno == lblkno) { 171 bp->b_blkno = blkno; 172 /* Case 5: check how many blocks to read ahead */ 173 ++ioblkno; 174 if ((ioblkno + 1) * size > filesize || 175 incore(vp, ioblkno) || (error = VOP_BMAP(vp, 176 ioblkno, NULL, &blkno, &num_ra)) || blkno == -1) 177 goto skip_readahead; 178 /* 179 * Adjust readahead as above. 180 * Don't check alreadyincore, we know it is 0 from 181 * the previous conditional. 182 */ 183 if (num_ra) { 184 if (ioblkno <= vp->v_maxra) 185 vp->v_ralen = max(vp->v_ralen >> 1, 1); 186 else if (num_ra > vp->v_ralen && 187 lblkno != vp->v_lastr) 188 vp->v_ralen = vp->v_ralen ? 189 min(num_ra,vp->v_ralen<<1) : 1; 190 if (num_ra > vp->v_ralen) 191 num_ra = vp->v_ralen; 192 } 193 flags |= B_ASYNC; 194 if (num_ra) 195 rbp = cluster_rbuild(vp, filesize, 196 NULL, ioblkno, blkno, size, num_ra, flags); 197 else { 198 rbp = getblk(vp, ioblkno, size, 0, 0); 199 rbp->b_flags |= flags; 200 rbp->b_blkno = blkno; 201 } 202 } else { 203 /* case 2; read ahead single block */ 204 rbp = getblk(vp, ioblkno, size, 0, 0); 205 rbp->b_flags |= flags; 206 rbp->b_blkno = blkno; 207 } 208 209 if (rbp == bp) /* case 4 */ 210 rbp = NULL; 211 else if (rbp) { /* case 2, 5 */ 212 trace(TR_BREADMISSRA, 213 pack(vp, (num_ra + 1) * size), ioblkno); 214 curproc->p_stats->p_ru.ru_inblock++; /* XXX */ 215 } 216 } 217 218 /* XXX Kirk, do we need to make sure the bp has creds? */ 219 skip_readahead: 220 if (bp) 221 if (bp->b_flags & (B_DONE | B_DELWRI)) 222 panic("cluster_read: DONE bp"); 223 else 224 error = VOP_STRATEGY(bp); 225 226 if (rbp) 227 if (error || rbp->b_flags & (B_DONE | B_DELWRI)) { 228 rbp->b_flags &= ~(B_ASYNC | B_READ); 229 brelse(rbp); 230 } else 231 (void) VOP_STRATEGY(rbp); 232 233 /* 234 * Recalculate our maximum readahead 235 */ 236 if (rbp == NULL) 237 rbp = bp; 238 if (rbp) 239 vp->v_maxra = rbp->b_lblkno + (rbp->b_bufsize / size) - 1; 240 241 if (bp) 242 return(biowait(bp)); 243 return(error); 244 } 245 246 /* 247 * If blocks are contiguous on disk, use this to provide clustered 248 * read ahead. We will read as many blocks as possible sequentially 249 * and then parcel them up into logical blocks in the buffer hash table. 250 */ 251 struct buf * 252 cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags) 253 struct vnode *vp; 254 u_quad_t filesize; 255 struct buf *bp; 256 daddr_t lbn; 257 daddr_t blkno; 258 long size; 259 int run; 260 long flags; 261 { 262 struct cluster_save *b_save; 263 struct buf *tbp; 264 daddr_t bn; 265 int i, inc; 266 267 #ifdef DIAGNOSTIC 268 if (size != vp->v_mount->mnt_stat.f_iosize) 269 panic("cluster_rbuild: size %d != filesize %d\n", 270 size, vp->v_mount->mnt_stat.f_iosize); 271 #endif 272 if (size * (lbn + run + 1) > filesize) 273 --run; 274 if (run == 0) { 275 if (!bp) { 276 bp = getblk(vp, lbn, size, 0, 0); 277 bp->b_blkno = blkno; 278 bp->b_flags |= flags; 279 } 280 return(bp); 281 } 282 283 bp = cluster_newbuf(vp, bp, flags, blkno, lbn, size, run + 1); 284 if (bp->b_flags & (B_DONE | B_DELWRI)) 285 return (bp); 286 287 b_save = malloc(sizeof(struct buf *) * run + sizeof(struct cluster_save), 288 M_SEGMENT, M_WAITOK); 289 b_save->bs_bufsize = b_save->bs_bcount = size; 290 b_save->bs_nchildren = 0; 291 b_save->bs_children = (struct buf **)(b_save + 1); 292 b_save->bs_saveaddr = bp->b_saveaddr; 293 bp->b_saveaddr = (caddr_t) b_save; 294 295 inc = btodb(size); 296 for (bn = blkno + inc, i = 1; i <= run; ++i, bn += inc) { 297 /* 298 * A component of the cluster is already in core, 299 * terminate the cluster early. 300 */ 301 if (incore(vp, lbn + i)) 302 break; 303 tbp = getblk(vp, lbn + i, 0, 0, 0); 304 /* 305 * getblk may return some memory in the buffer if there were 306 * no empty buffers to shed it to. If there is currently 307 * memory in the buffer, we move it down size bytes to make 308 * room for the valid pages that cluster_callback will insert. 309 * We do this now so we don't have to do it at interrupt time 310 * in the callback routine. 311 */ 312 if (tbp->b_bufsize != 0) { 313 caddr_t bdata = (char *)tbp->b_data; 314 315 /* 316 * No room in the buffer to add another page, 317 * terminate the cluster early. 318 */ 319 if (tbp->b_bufsize + size > MAXBSIZE) { 320 #ifdef DIAGNOSTIC 321 if (tbp->b_bufsize != MAXBSIZE) 322 panic("cluster_rbuild: too much memory"); 323 #endif 324 brelse(tbp); 325 break; 326 } 327 if (tbp->b_bufsize > size) { 328 /* 329 * XXX if the source and destination regions 330 * overlap we have to copy backward to avoid 331 * clobbering any valid pages (i.e. pagemove 332 * implementations typically can't handle 333 * overlap). 334 */ 335 bdata += tbp->b_bufsize; 336 while (bdata > (char *)tbp->b_data) { 337 bdata -= CLBYTES; 338 pagemove(bdata, bdata + size, CLBYTES); 339 } 340 } else 341 pagemove(bdata, bdata + size, tbp->b_bufsize); 342 } 343 tbp->b_blkno = bn; 344 tbp->b_flags |= flags | B_READ | B_ASYNC; 345 ++b_save->bs_nchildren; 346 b_save->bs_children[i - 1] = tbp; 347 } 348 /* 349 * The cluster may have been terminated early, adjust the cluster 350 * buffer size accordingly. If no cluster could be formed, 351 * deallocate the cluster save info. 352 */ 353 if (i <= run) { 354 if (i == 1) { 355 bp->b_saveaddr = b_save->bs_saveaddr; 356 bp->b_flags &= ~B_CALL; 357 bp->b_iodone = NULL; 358 free(b_save, M_SEGMENT); 359 } 360 allocbuf(bp, size * i); 361 } 362 return(bp); 363 } 364 365 /* 366 * Either get a new buffer or grow the existing one. 367 */ 368 struct buf * 369 cluster_newbuf(vp, bp, flags, blkno, lblkno, size, run) 370 struct vnode *vp; 371 struct buf *bp; 372 long flags; 373 daddr_t blkno; 374 daddr_t lblkno; 375 long size; 376 int run; 377 { 378 if (!bp) { 379 bp = getblk(vp, lblkno, size, 0, 0); 380 if (bp->b_flags & (B_DONE | B_DELWRI)) { 381 bp->b_blkno = blkno; 382 return(bp); 383 } 384 } 385 allocbuf(bp, run * size); 386 bp->b_blkno = blkno; 387 bp->b_iodone = cluster_callback; 388 bp->b_flags |= flags | B_CALL; 389 return(bp); 390 } 391 392 /* 393 * Cleanup after a clustered read or write. 394 * This is complicated by the fact that any of the buffers might have 395 * extra memory (if there were no empty buffer headers at allocbuf time) 396 * that we will need to shift around. 397 */ 398 void 399 cluster_callback(bp) 400 struct buf *bp; 401 { 402 struct cluster_save *b_save; 403 struct buf **bpp, *tbp; 404 long bsize; 405 caddr_t cp; 406 int error = 0; 407 408 /* 409 * Must propogate errors to all the components. 410 */ 411 if (bp->b_flags & B_ERROR) 412 error = bp->b_error; 413 414 b_save = (struct cluster_save *)(bp->b_saveaddr); 415 bp->b_saveaddr = b_save->bs_saveaddr; 416 417 bsize = b_save->bs_bufsize; 418 cp = (char *)bp->b_data + bsize; 419 /* 420 * Move memory from the large cluster buffer into the component 421 * buffers and mark IO as done on these. 422 */ 423 for (bpp = b_save->bs_children; b_save->bs_nchildren--; ++bpp) { 424 tbp = *bpp; 425 pagemove(cp, tbp->b_data, bsize); 426 tbp->b_bufsize += bsize; 427 tbp->b_bcount = bsize; 428 if (error) { 429 tbp->b_flags |= B_ERROR; 430 tbp->b_error = error; 431 } 432 biodone(tbp); 433 bp->b_bufsize -= bsize; 434 cp += bsize; 435 } 436 /* 437 * If there was excess memory in the cluster buffer, 438 * slide it up adjacent to the remaining valid data. 439 */ 440 if (bp->b_bufsize != bsize) { 441 if (bp->b_bufsize < bsize) 442 panic("cluster_callback: too little memory"); 443 pagemove(cp, (char *)bp->b_data + bsize, bp->b_bufsize - bsize); 444 } 445 bp->b_bcount = bsize; 446 bp->b_iodone = NULL; 447 free(b_save, M_SEGMENT); 448 if (bp->b_flags & B_ASYNC) 449 brelse(bp); 450 else { 451 bp->b_flags &= ~B_WANTED; 452 wakeup((caddr_t)bp); 453 } 454 } 455 456 /* 457 * Do clustered write for FFS. 458 * 459 * Three cases: 460 * 1. Write is not sequential (write asynchronously) 461 * Write is sequential: 462 * 2. beginning of cluster - begin cluster 463 * 3. middle of a cluster - add to cluster 464 * 4. end of a cluster - asynchronously write cluster 465 */ 466 void 467 cluster_write(bp, filesize) 468 struct buf *bp; 469 u_quad_t filesize; 470 { 471 struct vnode *vp; 472 daddr_t lbn; 473 int maxclen, cursize; 474 475 vp = bp->b_vp; 476 lbn = bp->b_lblkno; 477 478 /* Initialize vnode to beginning of file. */ 479 if (lbn == 0) 480 vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; 481 482 if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 || 483 (bp->b_blkno != vp->v_lasta + btodb(bp->b_bcount))) { 484 maxclen = MAXBSIZE / vp->v_mount->mnt_stat.f_iosize - 1; 485 if (vp->v_clen != 0) { 486 /* 487 * Next block is not sequential. 488 * 489 * If we are not writing at end of file, the process 490 * seeked to another point in the file since its 491 * last write, or we have reached our maximum 492 * cluster size, then push the previous cluster. 493 * Otherwise try reallocating to make it sequential. 494 */ 495 cursize = vp->v_lastw - vp->v_cstart + 1; 496 if (!doreallocblks || 497 (lbn + 1) * bp->b_bcount != filesize || 498 lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) { 499 cluster_wbuild(vp, NULL, bp->b_bcount, 500 vp->v_cstart, cursize, lbn); 501 } else { 502 struct buf **bpp, **endbp; 503 struct cluster_save *buflist; 504 505 buflist = cluster_collectbufs(vp, bp); 506 endbp = &buflist->bs_children 507 [buflist->bs_nchildren - 1]; 508 if (VOP_REALLOCBLKS(vp, buflist)) { 509 /* 510 * Failed, push the previous cluster. 511 */ 512 for (bpp = buflist->bs_children; 513 bpp < endbp; bpp++) 514 brelse(*bpp); 515 free(buflist, M_SEGMENT); 516 cluster_wbuild(vp, NULL, bp->b_bcount, 517 vp->v_cstart, cursize, lbn); 518 } else { 519 /* 520 * Succeeded, keep building cluster. 521 */ 522 for (bpp = buflist->bs_children; 523 bpp <= endbp; bpp++) 524 bdwrite(*bpp); 525 free(buflist, M_SEGMENT); 526 vp->v_lastw = lbn; 527 vp->v_lasta = bp->b_blkno; 528 return; 529 } 530 } 531 } 532 /* 533 * Consider beginning a cluster. 534 * If at end of file, make cluster as large as possible, 535 * otherwise find size of existing cluster. 536 */ 537 if ((lbn + 1) * bp->b_bcount != filesize && 538 (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen) || 539 bp->b_blkno == -1)) { 540 bawrite(bp); 541 vp->v_clen = 0; 542 vp->v_lasta = bp->b_blkno; 543 vp->v_cstart = lbn + 1; 544 vp->v_lastw = lbn; 545 return; 546 } 547 vp->v_clen = maxclen; 548 if (maxclen == 0) { /* I/O not contiguous */ 549 vp->v_cstart = lbn + 1; 550 bawrite(bp); 551 } else { /* Wait for rest of cluster */ 552 vp->v_cstart = lbn; 553 bdwrite(bp); 554 } 555 } else if (lbn == vp->v_cstart + vp->v_clen) { 556 /* 557 * At end of cluster, write it out. 558 */ 559 cluster_wbuild(vp, bp, bp->b_bcount, vp->v_cstart, 560 vp->v_clen + 1, lbn); 561 vp->v_clen = 0; 562 vp->v_cstart = lbn + 1; 563 } else 564 /* 565 * In the middle of a cluster, so just delay the 566 * I/O for now. 567 */ 568 bdwrite(bp); 569 vp->v_lastw = lbn; 570 vp->v_lasta = bp->b_blkno; 571 } 572 573 574 /* 575 * This is an awful lot like cluster_rbuild...wish they could be combined. 576 * The last lbn argument is the current block on which I/O is being 577 * performed. Check to see that it doesn't fall in the middle of 578 * the current block (if last_bp == NULL). 579 */ 580 void 581 cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn) 582 struct vnode *vp; 583 struct buf *last_bp; 584 long size; 585 daddr_t start_lbn; 586 int len; 587 daddr_t lbn; 588 { 589 struct cluster_save *b_save; 590 struct buf *bp, *tbp; 591 caddr_t cp; 592 int i, s; 593 594 #ifdef DIAGNOSTIC 595 if (size != vp->v_mount->mnt_stat.f_iosize) 596 panic("cluster_wbuild: size %d != filesize %d\n", 597 size, vp->v_mount->mnt_stat.f_iosize); 598 #endif 599 redo: 600 while ((!incore(vp, start_lbn) || start_lbn == lbn) && len) { 601 ++start_lbn; 602 --len; 603 } 604 605 /* Get more memory for current buffer */ 606 if (len <= 1) { 607 if (last_bp) { 608 bawrite(last_bp); 609 } else if (len) { 610 bp = getblk(vp, start_lbn, size, 0, 0); 611 bawrite(bp); 612 } 613 return; 614 } 615 616 bp = getblk(vp, start_lbn, size, 0, 0); 617 if (!(bp->b_flags & B_DELWRI)) { 618 ++start_lbn; 619 --len; 620 brelse(bp); 621 goto redo; 622 } 623 624 /* 625 * Extra memory in the buffer, punt on this buffer. 626 * XXX we could handle this in most cases, but we would have to 627 * push the extra memory down to after our max possible cluster 628 * size and then potentially pull it back up if the cluster was 629 * terminated prematurely--too much hassle. 630 */ 631 if (bp->b_bcount != bp->b_bufsize) { 632 ++start_lbn; 633 --len; 634 bawrite(bp); 635 goto redo; 636 } 637 638 --len; 639 b_save = malloc(sizeof(struct buf *) * len + sizeof(struct cluster_save), 640 M_SEGMENT, M_WAITOK); 641 b_save->bs_bcount = bp->b_bcount; 642 b_save->bs_bufsize = bp->b_bufsize; 643 b_save->bs_nchildren = 0; 644 b_save->bs_children = (struct buf **)(b_save + 1); 645 b_save->bs_saveaddr = bp->b_saveaddr; 646 bp->b_saveaddr = (caddr_t) b_save; 647 648 bp->b_flags |= B_CALL; 649 bp->b_iodone = cluster_callback; 650 cp = (char *)bp->b_data + size; 651 for (++start_lbn, i = 0; i < len; ++i, ++start_lbn) { 652 /* 653 * Block is not in core or the non-sequential block 654 * ending our cluster was part of the cluster (in which 655 * case we don't want to write it twice). 656 */ 657 if (!incore(vp, start_lbn) || 658 last_bp == NULL && start_lbn == lbn) 659 break; 660 661 /* 662 * Get the desired block buffer (unless it is the final 663 * sequential block whose buffer was passed in explictly 664 * as last_bp). 665 */ 666 if (last_bp == NULL || start_lbn != lbn) { 667 tbp = getblk(vp, start_lbn, size, 0, 0); 668 if (!(tbp->b_flags & B_DELWRI)) { 669 brelse(tbp); 670 break; 671 } 672 } else 673 tbp = last_bp; 674 675 ++b_save->bs_nchildren; 676 677 /* Move memory from children to parent */ 678 if (tbp->b_blkno != (bp->b_blkno + btodb(bp->b_bufsize))) { 679 printf("Clustered Block: %d addr %x bufsize: %d\n", 680 bp->b_lblkno, bp->b_blkno, bp->b_bufsize); 681 printf("Child Block: %d addr: %x\n", tbp->b_lblkno, 682 tbp->b_blkno); 683 panic("Clustered write to wrong blocks"); 684 } 685 686 pagemove(tbp->b_data, cp, size); 687 bp->b_bcount += size; 688 bp->b_bufsize += size; 689 690 tbp->b_bufsize -= size; 691 tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 692 tbp->b_flags |= (B_ASYNC | B_AGE); 693 s = splbio(); 694 reassignbuf(tbp, tbp->b_vp); /* put on clean list */ 695 ++tbp->b_vp->v_numoutput; 696 splx(s); 697 b_save->bs_children[i] = tbp; 698 699 cp += size; 700 } 701 702 if (i == 0) { 703 /* None to cluster */ 704 bp->b_saveaddr = b_save->bs_saveaddr; 705 bp->b_flags &= ~B_CALL; 706 bp->b_iodone = NULL; 707 free(b_save, M_SEGMENT); 708 } 709 bawrite(bp); 710 if (i < len) { 711 len -= i + 1; 712 start_lbn += 1; 713 goto redo; 714 } 715 } 716 717 /* 718 * Collect together all the buffers in a cluster. 719 * Plus add one additional buffer. 720 */ 721 struct cluster_save * 722 cluster_collectbufs(vp, last_bp) 723 struct vnode *vp; 724 struct buf *last_bp; 725 { 726 struct cluster_save *buflist; 727 daddr_t lbn; 728 int i, len; 729 730 len = vp->v_lastw - vp->v_cstart + 1; 731 buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist), 732 M_SEGMENT, M_WAITOK); 733 buflist->bs_nchildren = 0; 734 buflist->bs_children = (struct buf **)(buflist + 1); 735 for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) 736 (void)bread(vp, lbn, last_bp->b_bcount, NOCRED, 737 &buflist->bs_children[i]); 738 buflist->bs_children[i] = last_bp; 739 buflist->bs_nchildren = i + 1; 740 return (buflist); 741 } 742