xref: /original-bsd/sys/ufs/lfs/lfs_segment.c (revision e59fb703)
1 /*
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)lfs_segment.c	7.9 (Berkeley) 12/31/91
8  */
9 
10 #include <sys/param.h>
11 #include <sys/systm.h>
12 #include <sys/namei.h>
13 #include <sys/kernel.h>
14 #include <sys/resourcevar.h>
15 #include <sys/file.h>
16 #include <sys/stat.h>
17 #include <sys/buf.h>
18 #include <sys/proc.h>
19 #include <sys/conf.h>
20 #include <sys/vnode.h>
21 #include <sys/specdev.h>
22 #include <sys/fifo.h>
23 #include <sys/malloc.h>
24 #include <sys/mount.h>
25 
26 #include <ufs/ufs/quota.h>
27 #include <ufs/ufs/inode.h>
28 #include <ufs/ufs/dir.h>
29 #include <ufs/ufs/ufsmount.h>
30 
31 #include <ufs/lfs/lfs.h>
32 #include <ufs/lfs/lfs_extern.h>
33 
34 /* In-memory description of a segment about to be written. */
35 struct segment {
36 	struct buf	**bpp;		/* pointer to buffer array */
37 	struct buf	**cbpp;		/* pointer to next available bp */
38 	struct buf	*ibp;		/* buffer pointer to inode page */
39 	struct finfo	*fip;		/* current fileinfo pointer */
40 	void	*segsum;		/* segment summary info */
41 	u_long	ninodes;		/* number of inodes in this segment */
42 	u_long	seg_bytes_left;		/* bytes left in segment */
43 	u_long	sum_bytes_left;		/* bytes left in summary block */
44 	u_long	seg_number;		/* number of this segment */
45 #define	SEGM_CKP	0x01		/* doing a checkpoint */
46 	u_long	seg_flags;		/* run-time flags for this segment */
47 };
48 
49 /*
50  * Determine if it's OK to start a partial in this segment, or if we need
51  * to go on to a new segment.
52  */
53 #define	LFS_PARTIAL_FITS(fs) \
54 	((fs)->lfs_dbpseg - ((fs)->lfs_offset - (fs)->lfs_curseg) > \
55 	1 << (fs)->lfs_fsbtodb)
56 
57 int	 lfs_callback __P((struct buf *));
58 void	 lfs_gather __P((struct lfs *, struct segment *,
59 	     struct vnode *, int (*) __P((struct lfs *, struct buf *))));
60 void	 lfs_initseg __P((struct lfs *, struct segment *));
61 void	 lfs_iset __P((struct inode *, daddr_t, time_t));
62 int	 lfs_match_data __P((struct lfs *, struct buf *));
63 int	 lfs_match_dindir __P((struct lfs *, struct buf *));
64 int	 lfs_match_indir __P((struct lfs *, struct buf *));
65 int	 lfs_match_tindir __P((struct lfs *, struct buf *));
66 struct buf *
67 	 lfs_newbuf __P((struct lfs *, struct segment *, daddr_t, size_t));
68 void	 lfs_newseg __P((struct lfs *));
69 void	 lfs_shellsort __P((struct buf **, daddr_t *, register int));
70 void	 lfs_updatemeta __P((struct lfs *,
71 	    struct segment *, struct vnode *, daddr_t *, struct buf **, int));
72 void	 lfs_writefile __P((struct lfs *, struct segment *, struct vnode *));
73 void	 lfs_writeinode __P((struct lfs *, struct segment *, struct inode *));
74 void	 lfs_writeseg __P((struct lfs *, struct segment *));
75 void	 lfs_writesuper __P((struct lfs *, struct segment *));
76 
77 int	lfs_allclean_wakeup;		/* Cleaner wakeup address. */
78 
79 int
80 lfs_segwrite(mp, do_ckp)
81 	struct mount *mp;
82 	int do_ckp;			/* Do a checkpoint. */
83 {
84 	struct inode *ip;
85 	struct lfs *fs;
86 	struct segment *sp;
87 	struct vnode *vp;
88 	int s, error;
89 
90 	/*
91 	 * Ifile and meta data blocks are not marked busy, so segment writes
92 	 * must be single threaded.  Currently, there are two paths into this
93 	 * code, sync() and getnewbuf().  They both mark the file system busy,
94 	 * so lfs_segwrite is safe.  I think.
95 	 */
96 #ifdef VERBOSE
97 	printf("lfs_segwrite\n");
98 #endif
99 
100 	/*
101 	 * If doing a checkpoint, we keep a cumulative count of the outstanding
102 	 * I/O operations.  If the disk drive catches up with us it could go to
103 	 * zero before we finish, so we artificially increment it by one until
104 	 * we've scheduled all of the writes we intend to do.
105 	 */
106 	fs = VFSTOUFS(mp)->um_lfs;
107 	if (do_ckp) {
108 		s = splbio();
109 		fs->lfs_iocount = 1;
110 		splx(s);
111 	}
112 
113 	/*
114 	 * Allocate a segment structure and enough space to hold pointers to
115 	 * the maximum possible number of buffers which can be described in a
116 	 * single summary block.
117 	 */
118 	sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
119 	sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
120 	    sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
121 	sp->seg_flags = do_ckp ? SEGM_CKP : 0;
122 	lfs_initseg(fs, sp);
123 loop:
124 	for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
125 		/*
126 		 * If the vnode that we are about to sync is no longer
127 		 * associated with this mount point, start over.
128 		 */
129 		if (vp->v_mount != mp)
130 			goto loop;
131 		if (VOP_ISLOCKED(vp))
132 			continue;
133 
134 		/*
135 		 * Write the inode/file if dirty and it's not the
136 		 * the IFILE.
137 		 */
138 		ip = VTOI(vp);
139 		if (ip->i_flag & (IMOD | IACC | IUPD | ICHG) == 0 &&
140 		    vp->v_dirtyblkhd == NULL ||
141 		    ip->i_number == LFS_IFILE_INUM)
142 			continue;
143 
144 		/*
145 		 * XXX
146 		 * This is wrong, I think -- we should just wait until we
147 		 * get the vnode and go on.  Probably going to reschedule
148 		 * all of the writes we already scheduled...
149 		 */
150 		if (vget(vp))
151 {
152 printf("lfs_segment: failed to get vnode (tell Keith)!\n");
153 			goto loop;
154 }
155 
156 		if (vp->v_dirtyblkhd != NULL)
157 			lfs_writefile(fs, sp, vp);
158 		lfs_writeinode(fs, sp, ip);
159 		ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG);
160 		vput(vp);
161 	}
162 	if (do_ckp) {
163 		vp = fs->lfs_ivnode;
164 		while (vget(vp));
165 		ip = VTOI(vp);
166 		if (vp->v_dirtyblkhd != NULL)
167 			lfs_writefile(fs, sp, vp);
168 		lfs_writeinode(fs, sp, ip);
169 		ip->i_flags &= ~(IMOD | IACC | IUPD | ICHG);
170 		vput(vp);
171 	}
172 	lfs_writeseg(fs, sp);
173 
174 	/*
175 	 * If the I/O count is non-zero, sleep until it reaches zero.  At the
176 	 * moment, the user's process hangs around so we can sleep.
177 	 */
178 	if (do_ckp) {
179 		s = splbio();
180 		if (--fs->lfs_iocount && (error =
181 		    tsleep(&fs->lfs_iocount, PRIBIO + 1, "lfs sync", 0)))
182 			return (error);
183 		splx(s);
184 		lfs_writesuper(fs, sp);
185 	}
186 
187 	free(sp->bpp, M_SEGMENT);
188 	free(sp, M_SEGMENT);
189 
190 	/* Wake up any cleaning processes waiting on this file system. */
191 	wakeup(&fs->lfs_nextseg);
192 	wakeup(&lfs_allclean_wakeup);
193 
194 	return (0);
195 }
196 
197 /*
198  * Write the dirty blocks associated with a vnode.
199  */
200 void
201 lfs_writefile(fs, sp, vp)
202 	struct lfs *fs;
203 	struct segment *sp;
204 	struct vnode *vp;
205 {
206 	struct buf *bp;
207 	struct finfo *fip;
208 	IFILE *ifp;
209 
210 #ifdef VERBOSE
211 	printf("lfs_writefile\n");
212 #endif
213 	if (sp->seg_bytes_left < fs->lfs_bsize ||
214 	    sp->sum_bytes_left < sizeof(struct finfo)) {
215 		lfs_writeseg(fs, sp);
216 		lfs_initseg(fs, sp);
217 	}
218 	sp->sum_bytes_left -= sizeof(struct finfo) - sizeof(daddr_t);
219 
220 	fip = sp->fip;
221 	fip->fi_nblocks = 0;
222 	fip->fi_ino = VTOI(vp)->i_number;
223 	LFS_IENTRY(ifp, fs, fip->fi_ino, bp);
224 	fip->fi_version = ifp->if_version;
225 	brelse(bp);
226 
227 	/*
228 	 * It may not be necessary to write the meta-data blocks at this point,
229 	 * as the roll-forward recovery code should be able to reconstruct the
230 	 * list.
231 	 */
232 	lfs_gather(fs, sp, vp, lfs_match_data);
233 	lfs_gather(fs, sp, vp, lfs_match_indir);
234 	lfs_gather(fs, sp, vp, lfs_match_dindir);
235 #ifdef TRIPLE
236 	lfs_gather(fs, sp, vp, lfs_match_tindir);
237 #endif
238 
239 	fip = sp->fip;
240 #ifdef META
241 	printf("lfs_writefile: adding %d blocks\n", fip->fi_nblocks);
242 #endif
243 	if (fip->fi_nblocks != 0) {
244 		++((SEGSUM *)(sp->segsum))->ss_nfinfo;
245 		sp->fip =
246 		    (struct finfo *)((caddr_t)fip + sizeof(struct finfo) +
247 		    sizeof(daddr_t) * (fip->fi_nblocks - 1));
248 	}
249 }
250 
251 void
252 lfs_writeinode(fs, sp, ip)
253 	struct lfs *fs;
254 	struct segment *sp;
255 	struct inode *ip;
256 {
257 	struct buf *bp, *ibp;
258 	IFILE *ifp;
259 	daddr_t next_addr;
260 	ino_t ino;
261 	int ndx;
262 
263 #ifdef VERBOSE
264 	printf("lfs_writeinode\n");
265 #endif
266 	/* Allocate a new inode block if necessary. */
267 	if (sp->ibp == NULL) {
268 		/* Allocate a new segment if necessary. */
269 		if (sp->seg_bytes_left < fs->lfs_bsize ||
270 		    sp->sum_bytes_left < sizeof(daddr_t)) {
271 			lfs_writeseg(fs, sp);
272 			lfs_initseg(fs, sp);
273 		}
274 
275 		/* Get next inode block. */
276 		next_addr = fs->lfs_offset;
277 		fs->lfs_offset += fsbtodb(fs, 1);
278 		sp->ibp = *sp->cbpp++ =
279 		    lfs_newbuf(fs, sp, next_addr, fs->lfs_bsize);
280 
281 		/* Set remaining space counter. */
282 		sp->seg_bytes_left -= fs->lfs_bsize;
283 		sp->sum_bytes_left -= sizeof(daddr_t);
284 		ndx = LFS_SUMMARY_SIZE / sizeof(daddr_t) -
285 		    sp->ninodes / INOPB(fs) - 1;
286 		((daddr_t *)(sp->segsum))[ndx] = next_addr;
287 	}
288 
289 	/* Update the inode times and copy the inode onto the inode page. */
290 	ITIMES(ip, &time, &time);
291 	bp = sp->ibp;
292 	bp->b_un.b_dino[sp->ninodes % INOPB(fs)] = ip->i_din;
293 
294 	/* Increment inode count in segment summary block. */
295 	++((SEGSUM *)(sp->segsum))->ss_ninos;
296 
297 	/* If this page is full, set flag to allocate a new page. */
298 	if (++sp->ninodes % INOPB(fs) == 0)
299 		sp->ibp = NULL;
300 
301 	/*
302 	 * If updating the ifile, update the super-block.  Update the disk
303 	 * address and access times for this inode in the ifile.
304 	 */
305 	ino = ip->i_number;
306 	if (ino == LFS_IFILE_INUM)
307 		fs->lfs_idaddr = bp->b_blkno;
308 
309 	LFS_IENTRY(ifp, fs, ino, ibp);
310 	ifp->if_daddr = bp->b_blkno;
311 	LFS_UBWRITE(ibp);
312 }
313 
314 void
315 lfs_gather(fs, sp, vp, match)
316 	struct lfs *fs;
317 	struct segment *sp;
318 	struct vnode *vp;
319 	int (*match) __P((struct lfs *, struct buf *));
320 {
321 	struct buf **bpp, *bp, *nbp;
322 	struct finfo *fip;
323 	struct inode *ip;
324 	daddr_t *lbp, *start_lbp;
325 	u_long version;
326 	int s;
327 
328 #ifdef VERBOSE
329 	printf("lfs_gather\n");
330 #endif
331 	ip = VTOI(vp);
332 	bpp = sp->cbpp;
333 	fip = sp->fip;
334 	start_lbp = lbp = &fip->fi_blocks[fip->fi_nblocks];
335 
336 	s = splbio();
337 	for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
338 		nbp = bp->b_blockf;
339 		/*
340 		 * XXX
341 		 * Should probably sleep on any BUSY buffer if
342 		 * doing an fsync?
343 		 */
344 		if (bp->b_flags & B_BUSY)
345 			continue;
346 #ifdef DIAGNOSTIC
347 		if (!(bp->b_flags & B_DELWRI))
348 			panic("lfs_gather: bp not B_DELWRI");
349 		if (!(bp->b_flags & B_LOCKED))
350 			panic("lfs_gather: bp not B_LOCKED");
351 #endif
352 		if (!match(fs, bp))
353 			continue;
354 
355 		/* Insert into the buffer list, update the FINFO block. */
356 		*sp->cbpp++ = bp;
357 		++fip->fi_nblocks;
358 		*lbp++ = bp->b_lblkno;
359 
360 		/*
361 		 * If full, finish this segment.  We may be doing I/O, so
362 		 * release and reacquire the splbio().
363 		 */
364 		sp->sum_bytes_left -= sizeof(daddr_t);
365 		sp->seg_bytes_left -= bp->b_bufsize;
366 		if (sp->sum_bytes_left < sizeof(daddr_t) ||
367 		    sp->seg_bytes_left < fs->lfs_bsize) {
368 			splx(s);
369 			lfs_updatemeta(fs,
370 			    sp, vp, start_lbp, bpp, lbp - start_lbp);
371 
372 			/* Add the current file to the segment summary. */
373 			++((SEGSUM *)(sp->segsum))->ss_nfinfo;
374 
375 			version = fip->fi_version;
376 			lfs_writeseg(fs, sp);
377 			lfs_initseg(fs, sp);
378 
379 			fip = sp->fip;
380 			fip->fi_version = version;
381 			fip->fi_ino = ip->i_number;
382 			start_lbp = lbp = fip->fi_blocks;
383 
384 			bpp = sp->cbpp;
385 			s = splbio();
386 		}
387 	}
388 	splx(s);
389 	lfs_updatemeta(fs, sp, vp, start_lbp, bpp, lbp - start_lbp);
390 }
391 
392 /*
393  * Update the metadata that points to the blocks listed in the FINFO
394  * array.
395  */
396 void
397 lfs_updatemeta(fs, sp, vp, lbp, bpp, nblocks)
398 	struct lfs *fs;
399 	struct segment *sp;
400 	struct vnode *vp;
401 	daddr_t *lbp;
402 	struct buf **bpp;
403 	int nblocks;
404 {
405 	SEGUSE *sup;
406 	struct buf *bp;
407 	INDIR a[NIADDR], *ap;
408 	struct inode *ip;
409 	daddr_t daddr, lbn, off;
410 	int db_per_fsb, error, i, num;
411 
412 #ifdef VERBOSE
413 	printf("lfs_updatemeta\n");
414 #endif
415 	if (nblocks == 0)
416 		return;
417 
418 	/* Sort the blocks. */
419 	lfs_shellsort(bpp, lbp, nblocks);
420 
421 	/*
422 	 * Assign disk addresses, and update references to the logical
423 	 * block and the segment usage information.
424 	 */
425 	db_per_fsb = fsbtodb(fs, 1);
426 	for (i = nblocks; i--; ++bpp) {
427 		lbn = *lbp++;
428 		(*bpp)->b_blkno = off = fs->lfs_offset;
429 		fs->lfs_offset += db_per_fsb;
430 
431 		if (error = lfs_bmaparray(vp, lbn, &daddr, a, &num))
432 			panic("lfs_updatemeta: lfs_bmaparray %d", error);
433 		ip = VTOI(vp);
434 		switch (num) {
435 		case 0:
436 			ip->i_db[lbn] = off;
437 			break;
438 		case 1:
439 			ip->i_ib[a[0].in_off] = off;
440 			break;
441 		default:
442 			ap = &a[num - 1];
443 			if (bread(vp, ap->in_lbn, fs->lfs_bsize, NOCRED, &bp))
444 				panic("lfs_updatemeta: bread bno %d",
445 				    ap->in_lbn);
446 			bp->b_un.b_daddr[ap->in_off] = off;
447 			lfs_bwrite(bp);
448 		}
449 
450 		/* Update segment usage information. */
451 		if (daddr != UNASSIGNED) {
452 			LFS_SEGENTRY(sup, fs, datosn(fs, daddr), bp);
453 			sup->su_lastmod = time.tv_sec;
454 #ifdef DIAGNOSTIC
455 			if (sup->su_nbytes < fs->lfs_bsize)
456 				panic("lfs: negative bytes (segment %d)\n",
457 				    datosn(fs, daddr));
458 #endif
459 			sup->su_nbytes -= fs->lfs_bsize;
460 			LFS_UBWRITE(bp);
461 		}
462 	}
463 }
464 
465 /*
466  * Start a new segment.
467  */
468 void
469 lfs_initseg(fs, sp)
470 	struct lfs *fs;
471 	struct segment *sp;
472 {
473 	SEGUSE *sup;
474 	SEGSUM *ssp;
475 	struct buf *bp;
476 	daddr_t lbn, *lbnp;
477 
478 #ifdef VERBOSE
479 	printf("lfs_initseg\n");
480 #endif
481 	/* Advance to the next segment. */
482 	if (!LFS_PARTIAL_FITS(fs)) {
483 		lfs_newseg(fs);
484 		fs->lfs_offset = fs->lfs_curseg;
485 		sp->seg_number = datosn(fs, fs->lfs_curseg);
486 		sp->seg_bytes_left = fs->lfs_dbpseg * DEV_BSIZE;
487 
488 		/*
489 		 * If the segment contains a superblock, update the offset
490 		 * and summary address to skip over it.
491 		 */
492 		LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
493 		if (sup->su_flags & SEGUSE_SUPERBLOCK) {
494 			fs->lfs_offset += LFS_SBPAD / DEV_BSIZE;
495 			sp->seg_bytes_left -= LFS_SBPAD;
496 		}
497 		brelse(bp);
498 	} else {
499 		sp->seg_number = datosn(fs, fs->lfs_curseg);
500 		sp->seg_bytes_left = (fs->lfs_dbpseg -
501 		    (fs->lfs_offset - fs->lfs_curseg)) * DEV_BSIZE;
502 	}
503 
504 	sp->ibp = NULL;
505 	sp->ninodes = 0;
506 
507 	/* Get a new buffer for SEGSUM and enter it into the buffer list. */
508 	sp->cbpp = sp->bpp;
509 	*sp->cbpp = lfs_newbuf(fs, sp, fs->lfs_offset, LFS_SUMMARY_SIZE);
510 	sp->segsum = (*sp->cbpp)->b_un.b_addr;
511 	++sp->cbpp;
512 	fs->lfs_offset += LFS_SUMMARY_SIZE / DEV_BSIZE;
513 
514 	/* Set point to SEGSUM, initialize it. */
515 	ssp = sp->segsum;
516 	ssp->ss_next = fs->lfs_nextseg;
517 	ssp->ss_nfinfo = ssp->ss_ninos = 0;
518 
519 	/* Set pointer to first FINFO, initialize it. */
520 	sp->fip = (struct finfo *)(sp->segsum + sizeof(SEGSUM));
521 	sp->fip->fi_nblocks = 0;
522 
523 	sp->seg_bytes_left -= LFS_SUMMARY_SIZE;
524 	sp->sum_bytes_left = LFS_SUMMARY_SIZE - sizeof(SEGSUM);
525 }
526 
527 /*
528  * Return the next segment to write.
529  */
530 void
531 lfs_newseg(fs)
532 	struct lfs *fs;
533 {
534 	CLEANERINFO *cip;
535 	SEGUSE *sup;
536 	struct buf *bp;
537 	int curseg, isdirty, sn;
538 
539 #ifdef VERBOSE
540 	printf("lfs_newseg\n");
541 #endif
542 	/*
543 	 * Turn off the active bit for the current segment, turn on the
544 	 * active and dirty bits for the next segment, update the cleaner
545 	 * info.  Set the current segment to the next segment, get a new
546 	 * next segment.
547 	 */
548 	LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_curseg), bp);
549 	sup->su_flags &= ~SEGUSE_ACTIVE;
550 	LFS_UBWRITE(bp);
551 
552 	LFS_SEGENTRY(sup, fs, datosn(fs, fs->lfs_nextseg), bp);
553 	sup->su_flags |= SEGUSE_ACTIVE | SEGUSE_DIRTY;
554 	LFS_UBWRITE(bp);
555 
556 	LFS_CLEANERINFO(cip, fs, bp);
557 	--cip->clean;
558 	++cip->dirty;
559 	LFS_UBWRITE(bp);
560 
561 	fs->lfs_lastseg = fs->lfs_curseg;
562 	fs->lfs_curseg = fs->lfs_nextseg;
563 	for (sn = curseg = datosn(fs, fs->lfs_curseg);;) {
564 		sn = (sn + 1) % fs->lfs_nseg;
565 		if (sn == curseg)
566 			panic("lfs_nextseg: no clean segments");
567 		LFS_SEGENTRY(sup, fs, sn, bp);
568 		isdirty = sup->su_flags & SEGUSE_DIRTY;
569 		brelse(bp);
570 		if (!isdirty)
571 			break;
572 	}
573 	fs->lfs_nextseg = sntoda(fs, sn);
574 }
575 
576 void
577 lfs_writeseg(fs, sp)
578 	struct lfs *fs;
579 	struct segment *sp;
580 {
581 	struct buf **bpp, *bp;
582 	SEGUSE *sup;
583 	SEGSUM *ssp;
584 	dev_t i_dev;
585 	u_long *datap, *dp;
586 	void *pmeta;
587 	int flags, i, nblocks, s, (*strategy)__P((struct buf *));
588 
589 #ifdef VERBOSE
590 	printf("lfs_writeseg\n");
591 #endif
592 	if ((nblocks = sp->cbpp - sp->bpp) == 0)
593 		return;
594 
595 	/* Update the segment usage information. */
596 	LFS_SEGENTRY(sup, fs, sp->seg_number, bp);
597 	sup->su_nbytes += nblocks - 1 << fs->lfs_bshift;
598 	sup->su_lastmod = time.tv_sec;
599 	LFS_UBWRITE(bp);
600 
601 	/*
602 	 * Compute checksum across data and then across summary; the first
603 	 * block (the summary block) is skipped.  Set the create time here
604 	 * so that it's guaranteed to be later than the inode mod times.
605 	 *
606 	 * XXX
607 	 * Fix this to do it inline, instead of malloc/copy.
608 	 */
609 	datap = dp = malloc(nblocks * sizeof(u_long), M_SEGMENT, M_WAITOK);
610 	for (bpp = sp->bpp, i = nblocks - 1; i--;)
611 		*dp++ = (*++bpp)->b_un.b_words[0];
612 	ssp = (SEGSUM *)sp->segsum;
613 	ssp->ss_create = time.tv_sec;
614 	ssp->ss_datasum = cksum(datap, nblocks * sizeof(u_long));
615 	ssp->ss_sumsum =
616 	    cksum(&ssp->ss_datasum, LFS_SUMMARY_SIZE - sizeof(ssp->ss_sumsum));
617 	free(datap, M_SEGMENT);
618 
619 	/*
620 	 * When we gathered the blocks for I/O we did not mark them busy or
621 	 * remove them from the freelist.  As we do this, turn off the B_LOCKED
622 	 * bit so the future brelse will put them on the LRU list, and add the
623 	 * B_CALL flags if we're doing a checkpoint so we can count I/O's.  LFS
624 	 * requires that the super blocks (on checkpoint) be written after all
625 	 * the segment data.
626 	 */
627 	i_dev = VTOI(fs->lfs_ivnode)->i_dev;
628 	strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy;
629 
630 	s = splbio();
631 	if (sp->seg_flags & SEGM_CKP) {
632 		fs->lfs_iocount += nblocks;
633  		flags = B_ASYNC | B_BUSY | B_CALL;
634 	} else
635 		flags = B_ASYNC | B_BUSY;
636 	for (bpp = sp->bpp, i = nblocks; i--;) {
637 		bp = *bpp++;
638 		bp->b_flags |= flags;
639 		bp->b_flags &=
640 		    ~(B_DONE | B_ERROR | B_READ | B_DELWRI | B_LOCKED);
641 		bp->b_dev = i_dev;
642 		bp->b_iodone = lfs_callback;
643 		if (!(bp->b_flags & B_NOCACHE)) {
644 			bremfree(bp);
645 			reassignbuf(bp, bp->b_vp);
646 		}
647 	}
648 	splx(s);
649 
650 	for (bpp = sp->bpp, i = nblocks; i--;)
651 		(strategy)(*bpp++);
652 }
653 
654 void
655 lfs_writesuper(fs, sp)
656 	struct lfs *fs;
657 	struct segment *sp;
658 {
659 	struct buf *bp;
660 	dev_t i_dev;
661 	int (*strategy) __P((struct buf *));
662 
663 #ifdef VERBOSE
664 	printf("lfs_writesuper\n");
665 #endif
666 	i_dev = VTOI(fs->lfs_ivnode)->i_dev;
667 	strategy = VTOI(fs->lfs_ivnode)->i_devvp->v_op->vop_strategy;
668 
669 	/* Checksum the superblock and copy it into a buffer. */
670 	fs->lfs_cksum = cksum(fs, sizeof(struct lfs) - sizeof(fs->lfs_cksum));
671 	bp = lfs_newbuf(fs, sp, fs->lfs_sboffs[0], LFS_SBPAD);
672 	*bp->b_un.b_lfs = *fs;
673 
674 	/* Write the first superblock (wait). */
675 	bp->b_dev = i_dev;
676 	bp->b_flags |= B_BUSY;
677 	bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
678 	(strategy)(bp);
679 	biowait(bp);
680 
681 	/* Write the second superblock (don't wait). */
682 	bp->b_blkno = bp->b_lblkno = fs->lfs_sboffs[1];
683 	bp->b_flags |= B_ASYNC | B_BUSY;
684 	bp->b_flags &= ~(B_DONE | B_ERROR | B_READ | B_DELWRI);
685 	(strategy)(bp);
686 }
687 
688 /*
689  * Logical block number match routines used when traversing the dirty block
690  * chain.
691  */
692 int
693 lfs_match_data(fs, bp)
694 	struct lfs *fs;
695 	struct buf *bp;
696 {
697 	return (bp->b_lblkno >= 0);
698 }
699 
700 int
701 lfs_match_indir(fs, bp)
702 	struct lfs *fs;
703 	struct buf *bp;
704 {
705 	int lbn;
706 
707 	lbn = bp->b_lblkno;
708 	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 0);
709 }
710 
711 int
712 lfs_match_dindir(fs, bp)
713 	struct lfs *fs;
714 	struct buf *bp;
715 {
716 	int lbn;
717 
718 	lbn = bp->b_lblkno;
719 	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 1);
720 }
721 
722 int
723 lfs_match_tindir(fs, bp)
724 	struct lfs *fs;
725 	struct buf *bp;
726 {
727 	int lbn;
728 
729 	lbn = bp->b_lblkno;
730 	return (lbn < 0 && (-lbn - NDADDR) % NINDIR(fs) == 2);
731 }
732 
733 /*
734  * Allocate a new buffer header.
735  */
736 struct buf *
737 lfs_newbuf(fs, sp, daddr, size)
738 	struct lfs *fs;
739 	struct segment *sp;
740 	daddr_t daddr;
741 	size_t size;
742 {
743 	struct buf *bp;
744 
745 #ifdef VERBOSE
746 	printf("lfs_newbuf\n");
747 #endif
748 	bp = getnewbuf();
749 	bremhash(bp);
750 	bgetvp(fs->lfs_ivnode, bp);
751 	bp->b_bcount = 0;
752 	bp->b_lblkno = daddr;
753 	bp->b_blkno = daddr;
754 	bp->b_error = 0;
755 	bp->b_resid = 0;
756 	allocbuf(bp, size);
757 	bp->b_flags |= B_NOCACHE;
758 	binshash(bp, &bfreelist[BQ_AGE]);
759 	return (bp);
760 }
761 
762 int						/* XXX should be void */
763 lfs_callback(bp)
764 	struct buf *bp;
765 {
766 	struct lfs *fs;
767 
768 	fs = VFSTOUFS(bp->b_vp->v_mount)->um_lfs;
769 #ifdef DIAGNOSTIC
770 	if (fs->lfs_iocount == 0)
771 		panic("lfs_callback: zero iocount\n");
772 #endif
773 	if (--fs->lfs_iocount == 0)
774 		wakeup(&fs->lfs_iocount);
775 
776 	brelse(bp);
777 }
778 
779 /*
780  * Shellsort (diminishing increment sort) from Data Structures and
781  * Algorithms, Aho, Hopcraft and Ullman, 1983 Edition, page 290;
782  * see also Knuth Vol. 3, page 84.  The increments are selected from
783  * formula (8), page 95.  Roughly O(N^3/2).
784  */
785 /*
786  * This is our own private copy of shellsort because we want to sort
787  * two parallel arrays (the array of buffer pointers and the array of
788  * logical block numbers) simultaneously.  Note that we cast the array
789  * of logical block numbers to a unsigned in this routine so that the
790  * negative block numbers (meta data blocks) sort AFTER the data blocks.
791  */
792 void
793 lfs_shellsort(bp_array, lb_array, nmemb)
794 	struct buf **bp_array;
795 	daddr_t *lb_array;
796 	register int nmemb;
797 {
798 	static int __rsshell_increments[] = { 4, 1, 0 };
799 	register int incr, *incrp, t1, t2;
800 	struct buf *bp_temp;
801 	u_long lb_temp;
802 
803 	for (incrp = __rsshell_increments; incr = *incrp++;)
804 		for (t1 = incr; t1 < nmemb; ++t1)
805 			for (t2 = t1 - incr; t2 >= 0;)
806 				if (lb_array[t2] > lb_array[t2 + incr]) {
807 					lb_temp = lb_array[t2];
808 					lb_array[t2] = lb_array[t2 + incr];
809 					lb_array[t2 + incr] = lb_temp;
810 					bp_temp = bp_array[t2];
811 					bp_array[t2] = bp_array[t2 + incr];
812 					bp_array[t2 + incr] = bp_temp;
813 					t2 -= incr;
814 				} else
815 					break;
816 }
817