1 /*- 2 * Copyright (c) 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)vfs_cluster.c 8.7 (Berkeley) 02/13/94 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/buf.h> 13 #include <sys/vnode.h> 14 #include <sys/mount.h> 15 #include <sys/trace.h> 16 #include <sys/malloc.h> 17 #include <sys/resourcevar.h> 18 #include <libkern/libkern.h> 19 20 #ifdef DEBUG 21 #include <vm/vm.h> 22 #include <sys/sysctl.h> 23 int doreallocblks = 1; 24 struct ctldebug debug13 = { "doreallocblks", &doreallocblks }; 25 #else 26 /* XXX for cluster_write */ 27 #define doreallocblks 1 28 #endif 29 30 /* 31 * Local declarations 32 */ 33 struct buf *cluster_newbuf __P((struct vnode *, struct buf *, long, daddr_t, 34 daddr_t, long, int)); 35 struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *, 36 daddr_t, daddr_t, long, int, long)); 37 void cluster_wbuild __P((struct vnode *, struct buf *, long, 38 daddr_t, int, daddr_t)); 39 struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *)); 40 41 #ifdef DIAGNOSTIC 42 /* 43 * Set to 1 if reads of block zero should cause readahead to be done. 44 * Set to 0 treats a read of block zero as a non-sequential read. 45 * 46 * Setting to one assumes that most reads of block zero of files are due to 47 * sequential passes over the files (e.g. cat, sum) where additional blocks 48 * will soon be needed. Setting to zero assumes that the majority are 49 * surgical strikes to get particular info (e.g. size, file) where readahead 50 * blocks will not be used and, in fact, push out other potentially useful 51 * blocks from the cache. The former seems intuitive, but some quick tests 52 * showed that the latter performed better from a system-wide point of view. 53 */ 54 int doclusterraz = 0; 55 #define ISSEQREAD(vp, blk) \ 56 (((blk) != 0 || doclusterraz) && \ 57 ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr)) 58 #else 59 #define ISSEQREAD(vp, blk) \ 60 ((blk) != 0 && ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr)) 61 #endif 62 63 /* 64 * This replaces bread. If this is a bread at the beginning of a file and 65 * lastr is 0, we assume this is the first read and we'll read up to two 66 * blocks if they are sequential. After that, we'll do regular read ahead 67 * in clustered chunks. 68 * 69 * There are 4 or 5 cases depending on how you count: 70 * Desired block is in the cache: 71 * 1 Not sequential access (0 I/Os). 72 * 2 Access is sequential, do read-ahead (1 ASYNC). 73 * Desired block is not in cache: 74 * 3 Not sequential access (1 SYNC). 75 * 4 Sequential access, next block is contiguous (1 SYNC). 76 * 5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC) 77 * 78 * There are potentially two buffers that require I/O. 79 * bp is the block requested. 80 * rbp is the read-ahead block. 81 * If either is NULL, then you don't have to do the I/O. 82 */ 83 cluster_read(vp, filesize, lblkno, size, cred, bpp) 84 struct vnode *vp; 85 u_quad_t filesize; 86 daddr_t lblkno; 87 long size; 88 struct ucred *cred; 89 struct buf **bpp; 90 { 91 struct buf *bp, *rbp; 92 daddr_t blkno, ioblkno; 93 long flags; 94 int error, num_ra, alreadyincore; 95 96 #ifdef DIAGNOSTIC 97 if (size == 0) 98 panic("cluster_read: size = 0"); 99 #endif 100 101 error = 0; 102 flags = B_READ; 103 *bpp = bp = getblk(vp, lblkno, size, 0, 0); 104 if (bp->b_flags & B_CACHE) { 105 /* 106 * Desired block is in cache; do any readahead ASYNC. 107 * Case 1, 2. 108 */ 109 trace(TR_BREADHIT, pack(vp, size), lblkno); 110 flags |= B_ASYNC; 111 ioblkno = lblkno + (vp->v_ralen ? vp->v_ralen : 1); 112 alreadyincore = (int)incore(vp, ioblkno); 113 bp = NULL; 114 } else { 115 /* Block wasn't in cache, case 3, 4, 5. */ 116 trace(TR_BREADMISS, pack(vp, size), lblkno); 117 bp->b_flags |= B_READ; 118 ioblkno = lblkno; 119 alreadyincore = 0; 120 curproc->p_stats->p_ru.ru_inblock++; /* XXX */ 121 } 122 /* 123 * XXX 124 * Replace 1 with a window size based on some permutation of 125 * maxcontig and rot_delay. This will let you figure out how 126 * many blocks you should read-ahead (case 2, 4, 5). 127 * 128 * If the access isn't sequential, reset the window to 1. 129 * Note that a read to the same block is considered sequential. 130 * This catches the case where the file is being read sequentially, 131 * but at smaller than the filesystem block size. 132 */ 133 rbp = NULL; 134 if (!ISSEQREAD(vp, lblkno)) { 135 vp->v_ralen = 0; 136 vp->v_maxra = lblkno; 137 } else if ((ioblkno + 1) * size <= filesize && !alreadyincore && 138 !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra)) && 139 blkno != -1) { 140 /* 141 * Reading sequentially, and the next block is not in the 142 * cache. We are going to try reading ahead. 143 */ 144 if (num_ra) { 145 /* 146 * If our desired readahead block had been read 147 * in a previous readahead but is no longer in 148 * core, then we may be reading ahead too far 149 * or are not using our readahead very rapidly. 150 * In this case we scale back the window. 151 */ 152 if (!alreadyincore && ioblkno <= vp->v_maxra) 153 vp->v_ralen = max(vp->v_ralen >> 1, 1); 154 /* 155 * There are more sequential blocks than our current 156 * window allows, scale up. Ideally we want to get 157 * in sync with the filesystem maxcontig value. 158 */ 159 else if (num_ra > vp->v_ralen && lblkno != vp->v_lastr) 160 vp->v_ralen = vp->v_ralen ? 161 min(num_ra, vp->v_ralen << 1) : 1; 162 163 if (num_ra > vp->v_ralen) 164 num_ra = vp->v_ralen; 165 } 166 167 if (num_ra) /* case 2, 4 */ 168 rbp = cluster_rbuild(vp, filesize, 169 bp, ioblkno, blkno, size, num_ra, flags); 170 else if (ioblkno == lblkno) { 171 bp->b_blkno = blkno; 172 /* Case 5: check how many blocks to read ahead */ 173 ++ioblkno; 174 if ((ioblkno + 1) * size > filesize || 175 incore(vp, ioblkno) || (error = VOP_BMAP(vp, 176 ioblkno, NULL, &blkno, &num_ra)) || blkno == -1) 177 goto skip_readahead; 178 /* 179 * Adjust readahead as above 180 */ 181 if (num_ra) { 182 if (!alreadyincore && ioblkno <= vp->v_maxra) 183 vp->v_ralen = max(vp->v_ralen >> 1, 1); 184 else if (num_ra > vp->v_ralen && 185 lblkno != vp->v_lastr) 186 vp->v_ralen = vp->v_ralen ? 187 min(num_ra,vp->v_ralen<<1) : 1; 188 if (num_ra > vp->v_ralen) 189 num_ra = vp->v_ralen; 190 } 191 flags |= B_ASYNC; 192 if (num_ra) 193 rbp = cluster_rbuild(vp, filesize, 194 NULL, ioblkno, blkno, size, num_ra, flags); 195 else { 196 rbp = getblk(vp, ioblkno, size, 0, 0); 197 rbp->b_flags |= flags; 198 rbp->b_blkno = blkno; 199 } 200 } else { 201 /* case 2; read ahead single block */ 202 rbp = getblk(vp, ioblkno, size, 0, 0); 203 rbp->b_flags |= flags; 204 rbp->b_blkno = blkno; 205 } 206 207 if (rbp == bp) /* case 4 */ 208 rbp = NULL; 209 else if (rbp) { /* case 2, 5 */ 210 trace(TR_BREADMISSRA, 211 pack(vp, (num_ra + 1) * size), ioblkno); 212 curproc->p_stats->p_ru.ru_inblock++; /* XXX */ 213 } 214 } 215 216 /* XXX Kirk, do we need to make sure the bp has creds? */ 217 skip_readahead: 218 if (bp) 219 if (bp->b_flags & (B_DONE | B_DELWRI)) 220 panic("cluster_read: DONE bp"); 221 else 222 error = VOP_STRATEGY(bp); 223 224 if (rbp) 225 if (error || rbp->b_flags & (B_DONE | B_DELWRI)) { 226 rbp->b_flags &= ~(B_ASYNC | B_READ); 227 brelse(rbp); 228 } else 229 (void) VOP_STRATEGY(rbp); 230 231 /* 232 * Recalculate our maximum readahead 233 */ 234 if (rbp == NULL) 235 rbp = bp; 236 if (rbp) 237 vp->v_maxra = rbp->b_lblkno + (rbp->b_bufsize / size) - 1; 238 239 if (bp) 240 return(biowait(bp)); 241 return(error); 242 } 243 244 /* 245 * If blocks are contiguous on disk, use this to provide clustered 246 * read ahead. We will read as many blocks as possible sequentially 247 * and then parcel them up into logical blocks in the buffer hash table. 248 */ 249 struct buf * 250 cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags) 251 struct vnode *vp; 252 u_quad_t filesize; 253 struct buf *bp; 254 daddr_t lbn; 255 daddr_t blkno; 256 long size; 257 int run; 258 long flags; 259 { 260 struct cluster_save *b_save; 261 struct buf *tbp; 262 daddr_t bn; 263 int i, inc; 264 265 #ifdef DIAGNOSTIC 266 if (size != vp->v_mount->mnt_stat.f_iosize) 267 panic("cluster_rbuild: size %d != filesize %d\n", 268 size, vp->v_mount->mnt_stat.f_iosize); 269 #endif 270 if (size * (lbn + run + 1) > filesize) 271 --run; 272 if (run == 0) { 273 if (!bp) { 274 bp = getblk(vp, lbn, size, 0, 0); 275 bp->b_blkno = blkno; 276 bp->b_flags |= flags; 277 } 278 return(bp); 279 } 280 281 bp = cluster_newbuf(vp, bp, flags, blkno, lbn, size, run + 1); 282 if (bp->b_flags & (B_DONE | B_DELWRI)) 283 return (bp); 284 285 b_save = malloc(sizeof(struct buf *) * run + sizeof(struct cluster_save), 286 M_SEGMENT, M_WAITOK); 287 b_save->bs_bufsize = b_save->bs_bcount = size; 288 b_save->bs_nchildren = 0; 289 b_save->bs_children = (struct buf **)(b_save + 1); 290 b_save->bs_saveaddr = bp->b_saveaddr; 291 bp->b_saveaddr = (caddr_t) b_save; 292 293 inc = btodb(size); 294 for (bn = blkno + inc, i = 1; i <= run; ++i, bn += inc) { 295 if (incore(vp, lbn + i)) { 296 if (i == 1) { 297 bp->b_saveaddr = b_save->bs_saveaddr; 298 bp->b_flags &= ~B_CALL; 299 bp->b_iodone = NULL; 300 allocbuf(bp, size); 301 free(b_save, M_SEGMENT); 302 } else 303 allocbuf(bp, size * i); 304 break; 305 } 306 tbp = getblk(vp, lbn + i, 0, 0, 0); 307 /* 308 * getblk may return some memory in the buffer if there were 309 * no empty buffers to shed it to. If there is currently 310 * memory in the buffer, we move it down size bytes to make 311 * room for the valid pages that cluster_callback will insert. 312 * We do this now so we don't have to do it at interrupt time 313 * in the callback routine. 314 */ 315 if (tbp->b_bufsize != 0) { 316 caddr_t bdata = (char *)tbp->b_data; 317 318 if (tbp->b_bufsize + size > MAXBSIZE) 319 panic("cluster_rbuild: too much memory"); 320 if (tbp->b_bufsize > size) { 321 /* 322 * XXX if the source and destination regions 323 * overlap we have to copy backward to avoid 324 * clobbering any valid pages (i.e. pagemove 325 * implementations typically can't handle 326 * overlap). 327 */ 328 bdata += tbp->b_bufsize; 329 while (bdata > (char *)tbp->b_data) { 330 bdata -= CLBYTES; 331 pagemove(bdata, bdata + size, CLBYTES); 332 } 333 } else 334 pagemove(bdata, bdata + size, tbp->b_bufsize); 335 } 336 tbp->b_blkno = bn; 337 tbp->b_flags |= flags | B_READ | B_ASYNC; 338 ++b_save->bs_nchildren; 339 b_save->bs_children[i - 1] = tbp; 340 } 341 return(bp); 342 } 343 344 /* 345 * Either get a new buffer or grow the existing one. 346 */ 347 struct buf * 348 cluster_newbuf(vp, bp, flags, blkno, lblkno, size, run) 349 struct vnode *vp; 350 struct buf *bp; 351 long flags; 352 daddr_t blkno; 353 daddr_t lblkno; 354 long size; 355 int run; 356 { 357 if (!bp) { 358 bp = getblk(vp, lblkno, size, 0, 0); 359 if (bp->b_flags & (B_DONE | B_DELWRI)) { 360 bp->b_blkno = blkno; 361 return(bp); 362 } 363 } 364 allocbuf(bp, run * size); 365 bp->b_blkno = blkno; 366 bp->b_iodone = cluster_callback; 367 bp->b_flags |= flags | B_CALL; 368 return(bp); 369 } 370 371 /* 372 * Cleanup after a clustered read or write. 373 * This is complicated by the fact that any of the buffers might have 374 * extra memory (if there were no empty buffer headers at allocbuf time) 375 * that we will need to shift around. 376 */ 377 void 378 cluster_callback(bp) 379 struct buf *bp; 380 { 381 struct cluster_save *b_save; 382 struct buf **bpp, *tbp; 383 long bsize; 384 caddr_t cp; 385 int error = 0; 386 387 /* 388 * Must propogate errors to all the components. 389 */ 390 if (bp->b_flags & B_ERROR) 391 error = bp->b_error; 392 393 b_save = (struct cluster_save *)(bp->b_saveaddr); 394 bp->b_saveaddr = b_save->bs_saveaddr; 395 396 bsize = b_save->bs_bufsize; 397 cp = (char *)bp->b_data + bsize; 398 /* 399 * Move memory from the large cluster buffer into the component 400 * buffers and mark IO as done on these. 401 */ 402 for (bpp = b_save->bs_children; b_save->bs_nchildren--; ++bpp) { 403 tbp = *bpp; 404 pagemove(cp, tbp->b_data, bsize); 405 tbp->b_bufsize += bsize; 406 tbp->b_bcount = bsize; 407 if (error) { 408 tbp->b_flags |= B_ERROR; 409 tbp->b_error = error; 410 } 411 biodone(tbp); 412 bp->b_bufsize -= bsize; 413 cp += bsize; 414 } 415 /* 416 * If there was excess memory in the cluster buffer, 417 * slide it up adjacent to the remaining valid data. 418 */ 419 if (bp->b_bufsize != bsize) { 420 if (bp->b_bufsize < bsize) 421 panic("cluster_callback: too little memory"); 422 pagemove(cp, (char *)bp->b_data + bsize, bp->b_bufsize - bsize); 423 } 424 bp->b_bcount = bsize; 425 bp->b_iodone = NULL; 426 free(b_save, M_SEGMENT); 427 if (bp->b_flags & B_ASYNC) 428 brelse(bp); 429 else { 430 bp->b_flags &= ~B_WANTED; 431 wakeup((caddr_t)bp); 432 } 433 } 434 435 /* 436 * Do clustered write for FFS. 437 * 438 * Three cases: 439 * 1. Write is not sequential (write asynchronously) 440 * Write is sequential: 441 * 2. beginning of cluster - begin cluster 442 * 3. middle of a cluster - add to cluster 443 * 4. end of a cluster - asynchronously write cluster 444 */ 445 void 446 cluster_write(bp, filesize) 447 struct buf *bp; 448 u_quad_t filesize; 449 { 450 struct vnode *vp; 451 daddr_t lbn; 452 int maxclen, cursize; 453 454 vp = bp->b_vp; 455 lbn = bp->b_lblkno; 456 457 /* Initialize vnode to beginning of file. */ 458 if (lbn == 0) 459 vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; 460 461 if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 || 462 (bp->b_blkno != vp->v_lasta + btodb(bp->b_bcount))) { 463 maxclen = MAXBSIZE / vp->v_mount->mnt_stat.f_iosize - 1; 464 if (vp->v_clen != 0) { 465 /* 466 * Next block is not sequential. 467 * 468 * If we are not writing at end of file, the process 469 * seeked to another point in the file since its 470 * last write, or we have reached our maximum 471 * cluster size, then push the previous cluster. 472 * Otherwise try reallocating to make it sequential. 473 */ 474 cursize = vp->v_lastw - vp->v_cstart + 1; 475 if (!doreallocblks || 476 (lbn + 1) * bp->b_bcount != filesize || 477 lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) { 478 cluster_wbuild(vp, NULL, bp->b_bcount, 479 vp->v_cstart, cursize, lbn); 480 } else { 481 struct buf **bpp, **endbp; 482 struct cluster_save *buflist; 483 484 buflist = cluster_collectbufs(vp, bp); 485 endbp = &buflist->bs_children 486 [buflist->bs_nchildren - 1]; 487 if (VOP_REALLOCBLKS(vp, buflist)) { 488 /* 489 * Failed, push the previous cluster. 490 */ 491 for (bpp = buflist->bs_children; 492 bpp < endbp; bpp++) 493 brelse(*bpp); 494 free(buflist, M_SEGMENT); 495 cluster_wbuild(vp, NULL, bp->b_bcount, 496 vp->v_cstart, cursize, lbn); 497 } else { 498 /* 499 * Succeeded, keep building cluster. 500 */ 501 for (bpp = buflist->bs_children; 502 bpp <= endbp; bpp++) 503 bdwrite(*bpp); 504 free(buflist, M_SEGMENT); 505 vp->v_lastw = lbn; 506 vp->v_lasta = bp->b_blkno; 507 return; 508 } 509 } 510 } 511 /* 512 * Consider beginning a cluster. 513 * If at end of file, make cluster as large as possible, 514 * otherwise find size of existing cluster. 515 */ 516 if ((lbn + 1) * bp->b_bcount != filesize && 517 (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen) || 518 bp->b_blkno == -1)) { 519 bawrite(bp); 520 vp->v_clen = 0; 521 vp->v_lasta = bp->b_blkno; 522 vp->v_cstart = lbn + 1; 523 vp->v_lastw = lbn; 524 return; 525 } 526 vp->v_clen = maxclen; 527 if (maxclen == 0) { /* I/O not contiguous */ 528 vp->v_cstart = lbn + 1; 529 bawrite(bp); 530 } else { /* Wait for rest of cluster */ 531 vp->v_cstart = lbn; 532 bdwrite(bp); 533 } 534 } else if (lbn == vp->v_cstart + vp->v_clen) { 535 /* 536 * At end of cluster, write it out. 537 */ 538 cluster_wbuild(vp, bp, bp->b_bcount, vp->v_cstart, 539 vp->v_clen + 1, lbn); 540 vp->v_clen = 0; 541 vp->v_cstart = lbn + 1; 542 } else 543 /* 544 * In the middle of a cluster, so just delay the 545 * I/O for now. 546 */ 547 bdwrite(bp); 548 vp->v_lastw = lbn; 549 vp->v_lasta = bp->b_blkno; 550 } 551 552 553 /* 554 * This is an awful lot like cluster_rbuild...wish they could be combined. 555 * The last lbn argument is the current block on which I/O is being 556 * performed. Check to see that it doesn't fall in the middle of 557 * the current block (if last_bp == NULL). 558 */ 559 void 560 cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn) 561 struct vnode *vp; 562 struct buf *last_bp; 563 long size; 564 daddr_t start_lbn; 565 int len; 566 daddr_t lbn; 567 { 568 struct cluster_save *b_save; 569 struct buf *bp, *tbp; 570 caddr_t cp; 571 int i, s; 572 573 #ifdef DIAGNOSTIC 574 if (size != vp->v_mount->mnt_stat.f_iosize) 575 panic("cluster_wbuild: size %d != filesize %d\n", 576 size, vp->v_mount->mnt_stat.f_iosize); 577 #endif 578 redo: 579 while ((!incore(vp, start_lbn) || start_lbn == lbn) && len) { 580 ++start_lbn; 581 --len; 582 } 583 584 /* Get more memory for current buffer */ 585 if (len <= 1) { 586 if (last_bp) { 587 bawrite(last_bp); 588 } else if (len) { 589 bp = getblk(vp, start_lbn, size, 0, 0); 590 bawrite(bp); 591 } 592 return; 593 } 594 595 bp = getblk(vp, start_lbn, size, 0, 0); 596 if (!(bp->b_flags & B_DELWRI)) { 597 ++start_lbn; 598 --len; 599 brelse(bp); 600 goto redo; 601 } 602 603 /* 604 * Extra memory in the buffer, punt on this buffer. 605 * XXX we could handle this in most cases, but we would have to 606 * push the extra memory down to after our max possible cluster 607 * size and then potentially pull it back up if the cluster was 608 * terminated prematurely--too much hassle. 609 */ 610 if (bp->b_bcount != bp->b_bufsize) { 611 ++start_lbn; 612 --len; 613 bawrite(bp); 614 goto redo; 615 } 616 617 --len; 618 b_save = malloc(sizeof(struct buf *) * len + sizeof(struct cluster_save), 619 M_SEGMENT, M_WAITOK); 620 b_save->bs_bcount = bp->b_bcount; 621 b_save->bs_bufsize = bp->b_bufsize; 622 b_save->bs_nchildren = 0; 623 b_save->bs_children = (struct buf **)(b_save + 1); 624 b_save->bs_saveaddr = bp->b_saveaddr; 625 bp->b_saveaddr = (caddr_t) b_save; 626 627 bp->b_flags |= B_CALL; 628 bp->b_iodone = cluster_callback; 629 cp = (char *)bp->b_data + size; 630 for (++start_lbn, i = 0; i < len; ++i, ++start_lbn) { 631 /* 632 * Block is not in core or the non-sequential block 633 * ending our cluster was part of the cluster (in which 634 * case we don't want to write it twice). 635 */ 636 if (!incore(vp, start_lbn) || 637 last_bp == NULL && start_lbn == lbn) 638 break; 639 640 /* 641 * Get the desired block buffer (unless it is the final 642 * sequential block whose buffer was passed in explictly 643 * as last_bp). 644 */ 645 if (last_bp == NULL || start_lbn != lbn) { 646 tbp = getblk(vp, start_lbn, size, 0, 0); 647 if (!(tbp->b_flags & B_DELWRI)) { 648 brelse(tbp); 649 break; 650 } 651 } else 652 tbp = last_bp; 653 654 ++b_save->bs_nchildren; 655 656 /* Move memory from children to parent */ 657 if (tbp->b_blkno != (bp->b_blkno + btodb(bp->b_bufsize))) { 658 printf("Clustered Block: %d addr %x bufsize: %d\n", 659 bp->b_lblkno, bp->b_blkno, bp->b_bufsize); 660 printf("Child Block: %d addr: %x\n", tbp->b_lblkno, 661 tbp->b_blkno); 662 panic("Clustered write to wrong blocks"); 663 } 664 665 pagemove(tbp->b_data, cp, size); 666 bp->b_bcount += size; 667 bp->b_bufsize += size; 668 669 tbp->b_bufsize -= size; 670 tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI); 671 tbp->b_flags |= (B_ASYNC | B_AGE); 672 s = splbio(); 673 reassignbuf(tbp, tbp->b_vp); /* put on clean list */ 674 ++tbp->b_vp->v_numoutput; 675 splx(s); 676 b_save->bs_children[i] = tbp; 677 678 cp += size; 679 } 680 681 if (i == 0) { 682 /* None to cluster */ 683 bp->b_saveaddr = b_save->bs_saveaddr; 684 bp->b_flags &= ~B_CALL; 685 bp->b_iodone = NULL; 686 free(b_save, M_SEGMENT); 687 } 688 bawrite(bp); 689 if (i < len) { 690 len -= i + 1; 691 start_lbn += 1; 692 goto redo; 693 } 694 } 695 696 /* 697 * Collect together all the buffers in a cluster. 698 * Plus add one additional buffer. 699 */ 700 struct cluster_save * 701 cluster_collectbufs(vp, last_bp) 702 struct vnode *vp; 703 struct buf *last_bp; 704 { 705 struct cluster_save *buflist; 706 daddr_t lbn; 707 int i, len; 708 709 len = vp->v_lastw - vp->v_cstart + 1; 710 buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist), 711 M_SEGMENT, M_WAITOK); 712 buflist->bs_nchildren = 0; 713 buflist->bs_children = (struct buf **)(buflist + 1); 714 for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) 715 (void)bread(vp, lbn, last_bp->b_bcount, NOCRED, 716 &buflist->bs_children[i]); 717 buflist->bs_children[i] = last_bp; 718 buflist->bs_nchildren = i + 1; 719 return (buflist); 720 } 721