1 /*-
2 * Copyright (c) 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * %sccs.include.redist.c%
6 *
7 * @(#)vfs_cluster.c 8.10 (Berkeley) 03/28/95
8 */
9
10 #include <sys/param.h>
11 #include <sys/proc.h>
12 #include <sys/buf.h>
13 #include <sys/vnode.h>
14 #include <sys/mount.h>
15 #include <sys/trace.h>
16 #include <sys/malloc.h>
17 #include <sys/resourcevar.h>
18 #include <libkern/libkern.h>
19
20 /*
21 * Local declarations
22 */
23 struct buf *cluster_newbuf __P((struct vnode *, struct buf *, long, daddr_t,
24 daddr_t, long, int));
25 struct buf *cluster_rbuild __P((struct vnode *, u_quad_t, struct buf *,
26 daddr_t, daddr_t, long, int, long));
27 void cluster_wbuild __P((struct vnode *, struct buf *, long,
28 daddr_t, int, daddr_t));
29 struct cluster_save *cluster_collectbufs __P((struct vnode *, struct buf *));
30
31 #ifdef DIAGNOSTIC
32 /*
33 * Set to 1 if reads of block zero should cause readahead to be done.
34 * Set to 0 treats a read of block zero as a non-sequential read.
35 *
36 * Setting to one assumes that most reads of block zero of files are due to
37 * sequential passes over the files (e.g. cat, sum) where additional blocks
38 * will soon be needed. Setting to zero assumes that the majority are
39 * surgical strikes to get particular info (e.g. size, file) where readahead
40 * blocks will not be used and, in fact, push out other potentially useful
41 * blocks from the cache. The former seems intuitive, but some quick tests
42 * showed that the latter performed better from a system-wide point of view.
43 */
44 int doclusterraz = 0;
45 #define ISSEQREAD(vp, blk) \
46 (((blk) != 0 || doclusterraz) && \
47 ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))
48 #else
49 #define ISSEQREAD(vp, blk) \
50 ((blk) != 0 && ((blk) == (vp)->v_lastr + 1 || (blk) == (vp)->v_lastr))
51 #endif
52
53 /*
54 * This replaces bread. If this is a bread at the beginning of a file and
55 * lastr is 0, we assume this is the first read and we'll read up to two
56 * blocks if they are sequential. After that, we'll do regular read ahead
57 * in clustered chunks.
58 *
59 * There are 4 or 5 cases depending on how you count:
60 * Desired block is in the cache:
61 * 1 Not sequential access (0 I/Os).
62 * 2 Access is sequential, do read-ahead (1 ASYNC).
63 * Desired block is not in cache:
64 * 3 Not sequential access (1 SYNC).
65 * 4 Sequential access, next block is contiguous (1 SYNC).
66 * 5 Sequential access, next block is not contiguous (1 SYNC, 1 ASYNC)
67 *
68 * There are potentially two buffers that require I/O.
69 * bp is the block requested.
70 * rbp is the read-ahead block.
71 * If either is NULL, then you don't have to do the I/O.
72 */
73 cluster_read(vp, filesize, lblkno, size, cred, bpp)
74 struct vnode *vp;
75 u_quad_t filesize;
76 daddr_t lblkno;
77 long size;
78 struct ucred *cred;
79 struct buf **bpp;
80 {
81 struct buf *bp, *rbp;
82 daddr_t blkno, ioblkno;
83 long flags;
84 int error, num_ra, alreadyincore;
85
86 #ifdef DIAGNOSTIC
87 if (size == 0)
88 panic("cluster_read: size = 0");
89 #endif
90
91 error = 0;
92 flags = B_READ;
93 *bpp = bp = getblk(vp, lblkno, size, 0, 0);
94 if (bp->b_flags & B_CACHE) {
95 /*
96 * Desired block is in cache; do any readahead ASYNC.
97 * Case 1, 2.
98 */
99 trace(TR_BREADHIT, pack(vp, size), lblkno);
100 flags |= B_ASYNC;
101 ioblkno = lblkno + (vp->v_ralen ? vp->v_ralen : 1);
102 alreadyincore = incore(vp, ioblkno) != NULL;
103 bp = NULL;
104 } else {
105 /* Block wasn't in cache, case 3, 4, 5. */
106 trace(TR_BREADMISS, pack(vp, size), lblkno);
107 bp->b_flags |= B_READ;
108 ioblkno = lblkno;
109 alreadyincore = 0;
110 curproc->p_stats->p_ru.ru_inblock++; /* XXX */
111 }
112 /*
113 * XXX
114 * Replace 1 with a window size based on some permutation of
115 * maxcontig and rot_delay. This will let you figure out how
116 * many blocks you should read-ahead (case 2, 4, 5).
117 *
118 * If the access isn't sequential, reset the window to 1.
119 * Note that a read to the same block is considered sequential.
120 * This catches the case where the file is being read sequentially,
121 * but at smaller than the filesystem block size.
122 */
123 rbp = NULL;
124 if (!ISSEQREAD(vp, lblkno)) {
125 vp->v_ralen = 0;
126 vp->v_maxra = lblkno;
127 } else if ((ioblkno + 1) * size <= filesize && !alreadyincore &&
128 !(error = VOP_BMAP(vp, ioblkno, NULL, &blkno, &num_ra)) &&
129 blkno != -1) {
130 /*
131 * Reading sequentially, and the next block is not in the
132 * cache. We are going to try reading ahead.
133 */
134 if (num_ra) {
135 /*
136 * If our desired readahead block had been read
137 * in a previous readahead but is no longer in
138 * core, then we may be reading ahead too far
139 * or are not using our readahead very rapidly.
140 * In this case we scale back the window.
141 */
142 if (!alreadyincore && ioblkno <= vp->v_maxra)
143 vp->v_ralen = max(vp->v_ralen >> 1, 1);
144 /*
145 * There are more sequential blocks than our current
146 * window allows, scale up. Ideally we want to get
147 * in sync with the filesystem maxcontig value.
148 */
149 else if (num_ra > vp->v_ralen && lblkno != vp->v_lastr)
150 vp->v_ralen = vp->v_ralen ?
151 min(num_ra, vp->v_ralen << 1) : 1;
152
153 if (num_ra > vp->v_ralen)
154 num_ra = vp->v_ralen;
155 }
156
157 if (num_ra) /* case 2, 4 */
158 rbp = cluster_rbuild(vp, filesize,
159 bp, ioblkno, blkno, size, num_ra, flags);
160 else if (ioblkno == lblkno) {
161 bp->b_blkno = blkno;
162 /* Case 5: check how many blocks to read ahead */
163 ++ioblkno;
164 if ((ioblkno + 1) * size > filesize ||
165 incore(vp, ioblkno) || (error = VOP_BMAP(vp,
166 ioblkno, NULL, &blkno, &num_ra)) || blkno == -1)
167 goto skip_readahead;
168 /*
169 * Adjust readahead as above.
170 * Don't check alreadyincore, we know it is 0 from
171 * the previous conditional.
172 */
173 if (num_ra) {
174 if (ioblkno <= vp->v_maxra)
175 vp->v_ralen = max(vp->v_ralen >> 1, 1);
176 else if (num_ra > vp->v_ralen &&
177 lblkno != vp->v_lastr)
178 vp->v_ralen = vp->v_ralen ?
179 min(num_ra,vp->v_ralen<<1) : 1;
180 if (num_ra > vp->v_ralen)
181 num_ra = vp->v_ralen;
182 }
183 flags |= B_ASYNC;
184 if (num_ra)
185 rbp = cluster_rbuild(vp, filesize,
186 NULL, ioblkno, blkno, size, num_ra, flags);
187 else {
188 rbp = getblk(vp, ioblkno, size, 0, 0);
189 rbp->b_flags |= flags;
190 rbp->b_blkno = blkno;
191 }
192 } else {
193 /* case 2; read ahead single block */
194 rbp = getblk(vp, ioblkno, size, 0, 0);
195 rbp->b_flags |= flags;
196 rbp->b_blkno = blkno;
197 }
198
199 if (rbp == bp) /* case 4 */
200 rbp = NULL;
201 else if (rbp) { /* case 2, 5 */
202 trace(TR_BREADMISSRA,
203 pack(vp, (num_ra + 1) * size), ioblkno);
204 curproc->p_stats->p_ru.ru_inblock++; /* XXX */
205 }
206 }
207
208 /* XXX Kirk, do we need to make sure the bp has creds? */
209 skip_readahead:
210 if (bp)
211 if (bp->b_flags & (B_DONE | B_DELWRI))
212 panic("cluster_read: DONE bp");
213 else
214 error = VOP_STRATEGY(bp);
215
216 if (rbp)
217 if (error || rbp->b_flags & (B_DONE | B_DELWRI)) {
218 rbp->b_flags &= ~(B_ASYNC | B_READ);
219 brelse(rbp);
220 } else
221 (void) VOP_STRATEGY(rbp);
222
223 /*
224 * Recalculate our maximum readahead
225 */
226 if (rbp == NULL)
227 rbp = bp;
228 if (rbp)
229 vp->v_maxra = rbp->b_lblkno + (rbp->b_bufsize / size) - 1;
230
231 if (bp)
232 return(biowait(bp));
233 return(error);
234 }
235
236 /*
237 * If blocks are contiguous on disk, use this to provide clustered
238 * read ahead. We will read as many blocks as possible sequentially
239 * and then parcel them up into logical blocks in the buffer hash table.
240 */
241 struct buf *
cluster_rbuild(vp,filesize,bp,lbn,blkno,size,run,flags)242 cluster_rbuild(vp, filesize, bp, lbn, blkno, size, run, flags)
243 struct vnode *vp;
244 u_quad_t filesize;
245 struct buf *bp;
246 daddr_t lbn;
247 daddr_t blkno;
248 long size;
249 int run;
250 long flags;
251 {
252 struct cluster_save *b_save;
253 struct buf *tbp;
254 daddr_t bn;
255 int i, inc;
256
257 #ifdef DIAGNOSTIC
258 if (size != vp->v_mount->mnt_stat.f_iosize)
259 panic("cluster_rbuild: size %d != filesize %d\n",
260 size, vp->v_mount->mnt_stat.f_iosize);
261 #endif
262 if (size * (lbn + run + 1) > filesize)
263 --run;
264 if (run == 0) {
265 if (!bp) {
266 bp = getblk(vp, lbn, size, 0, 0);
267 bp->b_blkno = blkno;
268 bp->b_flags |= flags;
269 }
270 return(bp);
271 }
272
273 bp = cluster_newbuf(vp, bp, flags, blkno, lbn, size, run + 1);
274 if (bp->b_flags & (B_DONE | B_DELWRI))
275 return (bp);
276
277 b_save = malloc(sizeof(struct buf *) * run + sizeof(struct cluster_save),
278 M_SEGMENT, M_WAITOK);
279 b_save->bs_bufsize = b_save->bs_bcount = size;
280 b_save->bs_nchildren = 0;
281 b_save->bs_children = (struct buf **)(b_save + 1);
282 b_save->bs_saveaddr = bp->b_saveaddr;
283 bp->b_saveaddr = (caddr_t) b_save;
284
285 inc = btodb(size);
286 for (bn = blkno + inc, i = 1; i <= run; ++i, bn += inc) {
287 /*
288 * A component of the cluster is already in core,
289 * terminate the cluster early.
290 */
291 if (incore(vp, lbn + i))
292 break;
293 tbp = getblk(vp, lbn + i, 0, 0, 0);
294 /*
295 * getblk may return some memory in the buffer if there were
296 * no empty buffers to shed it to. If there is currently
297 * memory in the buffer, we move it down size bytes to make
298 * room for the valid pages that cluster_callback will insert.
299 * We do this now so we don't have to do it at interrupt time
300 * in the callback routine.
301 */
302 if (tbp->b_bufsize != 0) {
303 caddr_t bdata = (char *)tbp->b_data;
304
305 /*
306 * No room in the buffer to add another page,
307 * terminate the cluster early.
308 */
309 if (tbp->b_bufsize + size > MAXBSIZE) {
310 #ifdef DIAGNOSTIC
311 if (tbp->b_bufsize != MAXBSIZE)
312 panic("cluster_rbuild: too much memory");
313 #endif
314 brelse(tbp);
315 break;
316 }
317 if (tbp->b_bufsize > size) {
318 /*
319 * XXX if the source and destination regions
320 * overlap we have to copy backward to avoid
321 * clobbering any valid pages (i.e. pagemove
322 * implementations typically can't handle
323 * overlap).
324 */
325 bdata += tbp->b_bufsize;
326 while (bdata > (char *)tbp->b_data) {
327 bdata -= CLBYTES;
328 pagemove(bdata, bdata + size, CLBYTES);
329 }
330 } else
331 pagemove(bdata, bdata + size, tbp->b_bufsize);
332 }
333 tbp->b_blkno = bn;
334 tbp->b_flags |= flags | B_READ | B_ASYNC;
335 ++b_save->bs_nchildren;
336 b_save->bs_children[i - 1] = tbp;
337 }
338 /*
339 * The cluster may have been terminated early, adjust the cluster
340 * buffer size accordingly. If no cluster could be formed,
341 * deallocate the cluster save info.
342 */
343 if (i <= run) {
344 if (i == 1) {
345 bp->b_saveaddr = b_save->bs_saveaddr;
346 bp->b_flags &= ~B_CALL;
347 bp->b_iodone = NULL;
348 free(b_save, M_SEGMENT);
349 }
350 allocbuf(bp, size * i);
351 }
352 return(bp);
353 }
354
355 /*
356 * Either get a new buffer or grow the existing one.
357 */
358 struct buf *
cluster_newbuf(vp,bp,flags,blkno,lblkno,size,run)359 cluster_newbuf(vp, bp, flags, blkno, lblkno, size, run)
360 struct vnode *vp;
361 struct buf *bp;
362 long flags;
363 daddr_t blkno;
364 daddr_t lblkno;
365 long size;
366 int run;
367 {
368 if (!bp) {
369 bp = getblk(vp, lblkno, size, 0, 0);
370 if (bp->b_flags & (B_DONE | B_DELWRI)) {
371 bp->b_blkno = blkno;
372 return(bp);
373 }
374 }
375 allocbuf(bp, run * size);
376 bp->b_blkno = blkno;
377 bp->b_iodone = cluster_callback;
378 bp->b_flags |= flags | B_CALL;
379 return(bp);
380 }
381
382 /*
383 * Cleanup after a clustered read or write.
384 * This is complicated by the fact that any of the buffers might have
385 * extra memory (if there were no empty buffer headers at allocbuf time)
386 * that we will need to shift around.
387 */
388 void
cluster_callback(bp)389 cluster_callback(bp)
390 struct buf *bp;
391 {
392 struct cluster_save *b_save;
393 struct buf **bpp, *tbp;
394 long bsize;
395 caddr_t cp;
396 int error = 0;
397
398 /*
399 * Must propogate errors to all the components.
400 */
401 if (bp->b_flags & B_ERROR)
402 error = bp->b_error;
403
404 b_save = (struct cluster_save *)(bp->b_saveaddr);
405 bp->b_saveaddr = b_save->bs_saveaddr;
406
407 bsize = b_save->bs_bufsize;
408 cp = (char *)bp->b_data + bsize;
409 /*
410 * Move memory from the large cluster buffer into the component
411 * buffers and mark IO as done on these.
412 */
413 for (bpp = b_save->bs_children; b_save->bs_nchildren--; ++bpp) {
414 tbp = *bpp;
415 pagemove(cp, tbp->b_data, bsize);
416 tbp->b_bufsize += bsize;
417 tbp->b_bcount = bsize;
418 if (error) {
419 tbp->b_flags |= B_ERROR;
420 tbp->b_error = error;
421 }
422 biodone(tbp);
423 bp->b_bufsize -= bsize;
424 cp += bsize;
425 }
426 /*
427 * If there was excess memory in the cluster buffer,
428 * slide it up adjacent to the remaining valid data.
429 */
430 if (bp->b_bufsize != bsize) {
431 if (bp->b_bufsize < bsize)
432 panic("cluster_callback: too little memory");
433 pagemove(cp, (char *)bp->b_data + bsize, bp->b_bufsize - bsize);
434 }
435 bp->b_bcount = bsize;
436 bp->b_iodone = NULL;
437 free(b_save, M_SEGMENT);
438 if (bp->b_flags & B_ASYNC)
439 brelse(bp);
440 else {
441 bp->b_flags &= ~B_WANTED;
442 wakeup((caddr_t)bp);
443 }
444 }
445
446 /*
447 * Do clustered write for FFS.
448 *
449 * Three cases:
450 * 1. Write is not sequential (write asynchronously)
451 * Write is sequential:
452 * 2. beginning of cluster - begin cluster
453 * 3. middle of a cluster - add to cluster
454 * 4. end of a cluster - asynchronously write cluster
455 */
456 void
cluster_write(bp,filesize)457 cluster_write(bp, filesize)
458 struct buf *bp;
459 u_quad_t filesize;
460 {
461 struct vnode *vp;
462 daddr_t lbn;
463 int maxclen, cursize;
464
465 vp = bp->b_vp;
466 lbn = bp->b_lblkno;
467
468 /* Initialize vnode to beginning of file. */
469 if (lbn == 0)
470 vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0;
471
472 if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 ||
473 (bp->b_blkno != vp->v_lasta + btodb(bp->b_bcount))) {
474 maxclen = MAXBSIZE / vp->v_mount->mnt_stat.f_iosize - 1;
475 if (vp->v_clen != 0) {
476 /*
477 * Next block is not sequential.
478 *
479 * If we are not writing at end of file, the process
480 * seeked to another point in the file since its
481 * last write, or we have reached our maximum
482 * cluster size, then push the previous cluster.
483 * Otherwise try reallocating to make it sequential.
484 */
485 cursize = vp->v_lastw - vp->v_cstart + 1;
486 if ((lbn + 1) * bp->b_bcount != filesize ||
487 lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) {
488 cluster_wbuild(vp, NULL, bp->b_bcount,
489 vp->v_cstart, cursize, lbn);
490 } else {
491 struct buf **bpp, **endbp;
492 struct cluster_save *buflist;
493
494 buflist = cluster_collectbufs(vp, bp);
495 endbp = &buflist->bs_children
496 [buflist->bs_nchildren - 1];
497 if (VOP_REALLOCBLKS(vp, buflist)) {
498 /*
499 * Failed, push the previous cluster.
500 */
501 for (bpp = buflist->bs_children;
502 bpp < endbp; bpp++)
503 brelse(*bpp);
504 free(buflist, M_SEGMENT);
505 cluster_wbuild(vp, NULL, bp->b_bcount,
506 vp->v_cstart, cursize, lbn);
507 } else {
508 /*
509 * Succeeded, keep building cluster.
510 */
511 for (bpp = buflist->bs_children;
512 bpp <= endbp; bpp++)
513 bdwrite(*bpp);
514 free(buflist, M_SEGMENT);
515 vp->v_lastw = lbn;
516 vp->v_lasta = bp->b_blkno;
517 return;
518 }
519 }
520 }
521 /*
522 * Consider beginning a cluster.
523 * If at end of file, make cluster as large as possible,
524 * otherwise find size of existing cluster.
525 */
526 if ((lbn + 1) * bp->b_bcount != filesize &&
527 (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen) ||
528 bp->b_blkno == -1)) {
529 bawrite(bp);
530 vp->v_clen = 0;
531 vp->v_lasta = bp->b_blkno;
532 vp->v_cstart = lbn + 1;
533 vp->v_lastw = lbn;
534 return;
535 }
536 vp->v_clen = maxclen;
537 if (maxclen == 0) { /* I/O not contiguous */
538 vp->v_cstart = lbn + 1;
539 bawrite(bp);
540 } else { /* Wait for rest of cluster */
541 vp->v_cstart = lbn;
542 bdwrite(bp);
543 }
544 } else if (lbn == vp->v_cstart + vp->v_clen) {
545 /*
546 * At end of cluster, write it out.
547 */
548 cluster_wbuild(vp, bp, bp->b_bcount, vp->v_cstart,
549 vp->v_clen + 1, lbn);
550 vp->v_clen = 0;
551 vp->v_cstart = lbn + 1;
552 } else
553 /*
554 * In the middle of a cluster, so just delay the
555 * I/O for now.
556 */
557 bdwrite(bp);
558 vp->v_lastw = lbn;
559 vp->v_lasta = bp->b_blkno;
560 }
561
562
563 /*
564 * This is an awful lot like cluster_rbuild...wish they could be combined.
565 * The last lbn argument is the current block on which I/O is being
566 * performed. Check to see that it doesn't fall in the middle of
567 * the current block (if last_bp == NULL).
568 */
569 void
cluster_wbuild(vp,last_bp,size,start_lbn,len,lbn)570 cluster_wbuild(vp, last_bp, size, start_lbn, len, lbn)
571 struct vnode *vp;
572 struct buf *last_bp;
573 long size;
574 daddr_t start_lbn;
575 int len;
576 daddr_t lbn;
577 {
578 struct cluster_save *b_save;
579 struct buf *bp, *tbp;
580 caddr_t cp;
581 int i, s;
582
583 #ifdef DIAGNOSTIC
584 if (size != vp->v_mount->mnt_stat.f_iosize)
585 panic("cluster_wbuild: size %d != filesize %d\n",
586 size, vp->v_mount->mnt_stat.f_iosize);
587 #endif
588 redo:
589 while ((!incore(vp, start_lbn) || start_lbn == lbn) && len) {
590 ++start_lbn;
591 --len;
592 }
593
594 /* Get more memory for current buffer */
595 if (len <= 1) {
596 if (last_bp) {
597 bawrite(last_bp);
598 } else if (len) {
599 bp = getblk(vp, start_lbn, size, 0, 0);
600 bawrite(bp);
601 }
602 return;
603 }
604
605 bp = getblk(vp, start_lbn, size, 0, 0);
606 if (!(bp->b_flags & B_DELWRI)) {
607 ++start_lbn;
608 --len;
609 brelse(bp);
610 goto redo;
611 }
612
613 /*
614 * Extra memory in the buffer, punt on this buffer.
615 * XXX we could handle this in most cases, but we would have to
616 * push the extra memory down to after our max possible cluster
617 * size and then potentially pull it back up if the cluster was
618 * terminated prematurely--too much hassle.
619 */
620 if (bp->b_bcount != bp->b_bufsize) {
621 ++start_lbn;
622 --len;
623 bawrite(bp);
624 goto redo;
625 }
626
627 --len;
628 b_save = malloc(sizeof(struct buf *) * len + sizeof(struct cluster_save),
629 M_SEGMENT, M_WAITOK);
630 b_save->bs_bcount = bp->b_bcount;
631 b_save->bs_bufsize = bp->b_bufsize;
632 b_save->bs_nchildren = 0;
633 b_save->bs_children = (struct buf **)(b_save + 1);
634 b_save->bs_saveaddr = bp->b_saveaddr;
635 bp->b_saveaddr = (caddr_t) b_save;
636
637 bp->b_flags |= B_CALL;
638 bp->b_iodone = cluster_callback;
639 cp = (char *)bp->b_data + size;
640 for (++start_lbn, i = 0; i < len; ++i, ++start_lbn) {
641 /*
642 * Block is not in core or the non-sequential block
643 * ending our cluster was part of the cluster (in which
644 * case we don't want to write it twice).
645 */
646 if (!incore(vp, start_lbn) ||
647 last_bp == NULL && start_lbn == lbn)
648 break;
649
650 /*
651 * Get the desired block buffer (unless it is the final
652 * sequential block whose buffer was passed in explictly
653 * as last_bp).
654 */
655 if (last_bp == NULL || start_lbn != lbn) {
656 tbp = getblk(vp, start_lbn, size, 0, 0);
657 if (!(tbp->b_flags & B_DELWRI)) {
658 brelse(tbp);
659 break;
660 }
661 } else
662 tbp = last_bp;
663
664 ++b_save->bs_nchildren;
665
666 /* Move memory from children to parent */
667 if (tbp->b_blkno != (bp->b_blkno + btodb(bp->b_bufsize))) {
668 printf("Clustered Block: %d addr %x bufsize: %d\n",
669 bp->b_lblkno, bp->b_blkno, bp->b_bufsize);
670 printf("Child Block: %d addr: %x\n", tbp->b_lblkno,
671 tbp->b_blkno);
672 panic("Clustered write to wrong blocks");
673 }
674
675 pagemove(tbp->b_data, cp, size);
676 bp->b_bcount += size;
677 bp->b_bufsize += size;
678
679 tbp->b_bufsize -= size;
680 tbp->b_flags &= ~(B_READ | B_DONE | B_ERROR | B_DELWRI);
681 tbp->b_flags |= (B_ASYNC | B_AGE);
682 s = splbio();
683 reassignbuf(tbp, tbp->b_vp); /* put on clean list */
684 ++tbp->b_vp->v_numoutput;
685 splx(s);
686 b_save->bs_children[i] = tbp;
687
688 cp += size;
689 }
690
691 if (i == 0) {
692 /* None to cluster */
693 bp->b_saveaddr = b_save->bs_saveaddr;
694 bp->b_flags &= ~B_CALL;
695 bp->b_iodone = NULL;
696 free(b_save, M_SEGMENT);
697 }
698 bawrite(bp);
699 if (i < len) {
700 len -= i + 1;
701 start_lbn += 1;
702 goto redo;
703 }
704 }
705
706 /*
707 * Collect together all the buffers in a cluster.
708 * Plus add one additional buffer.
709 */
710 struct cluster_save *
cluster_collectbufs(vp,last_bp)711 cluster_collectbufs(vp, last_bp)
712 struct vnode *vp;
713 struct buf *last_bp;
714 {
715 struct cluster_save *buflist;
716 daddr_t lbn;
717 int i, len;
718
719 len = vp->v_lastw - vp->v_cstart + 1;
720 buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist),
721 M_SEGMENT, M_WAITOK);
722 buflist->bs_nchildren = 0;
723 buflist->bs_children = (struct buf **)(buflist + 1);
724 for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++)
725 (void)bread(vp, lbn, last_bp->b_bcount, NOCRED,
726 &buflist->bs_children[i]);
727 buflist->bs_children[i] = last_bp;
728 buflist->bs_nchildren = i + 1;
729 return (buflist);
730 }
731