xref: /freebsd/sys/ufs/ffs/ffs_balloc.c (revision 206b73d0)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
3  *
4  * Copyright (c) 2002 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
62  */
63 
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
66 
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/bio.h>
70 #include <sys/buf.h>
71 #include <sys/lock.h>
72 #include <sys/mount.h>
73 #include <sys/vnode.h>
74 #include <sys/vmmeter.h>
75 
76 #include <ufs/ufs/quota.h>
77 #include <ufs/ufs/inode.h>
78 #include <ufs/ufs/ufs_extern.h>
79 #include <ufs/ufs/extattr.h>
80 #include <ufs/ufs/ufsmount.h>
81 
82 #include <ufs/ffs/fs.h>
83 #include <ufs/ffs/ffs_extern.h>
84 
85 /*
86  * Balloc defines the structure of filesystem storage
87  * by allocating the physical blocks on a device given
88  * the inode and the logical block number in a file.
89  * This is the allocation strategy for UFS1. Below is
90  * the allocation strategy for UFS2.
91  */
92 int
93 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
94     struct ucred *cred, int flags, struct buf **bpp)
95 {
96 	struct inode *ip;
97 	struct ufs1_dinode *dp;
98 	ufs_lbn_t lbn, lastlbn;
99 	struct fs *fs;
100 	ufs1_daddr_t nb;
101 	struct buf *bp, *nbp;
102 	struct mount *mp;
103 	struct ufsmount *ump;
104 	struct indir indirs[UFS_NIADDR + 2];
105 	int deallocated, osize, nsize, num, i, error;
106 	ufs2_daddr_t newb;
107 	ufs1_daddr_t *bap, pref;
108 	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
109 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
110 	int unwindidx = -1;
111 	int saved_inbdflush;
112 	int gbflags, reclaimed;
113 
114 	ip = VTOI(vp);
115 	dp = ip->i_din1;
116 	fs = ITOFS(ip);
117 	mp = ITOVFS(ip);
118 	ump = ITOUMP(ip);
119 	lbn = lblkno(fs, startoffset);
120 	size = blkoff(fs, startoffset) + size;
121 	reclaimed = 0;
122 	if (size > fs->fs_bsize)
123 		panic("ffs_balloc_ufs1: blk too big");
124 	*bpp = NULL;
125 	if (flags & IO_EXT)
126 		return (EOPNOTSUPP);
127 	if (lbn < 0)
128 		return (EFBIG);
129 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
130 
131 	if (DOINGSOFTDEP(vp))
132 		softdep_prealloc(vp, MNT_WAIT);
133 	/*
134 	 * If the next write will extend the file into a new block,
135 	 * and the file is currently composed of a fragment
136 	 * this fragment has to be extended to be a full block.
137 	 */
138 	lastlbn = lblkno(fs, ip->i_size);
139 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
140 		nb = lastlbn;
141 		osize = blksize(fs, ip, nb);
142 		if (osize < fs->fs_bsize && osize > 0) {
143 			UFS_LOCK(ump);
144 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
145 			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
146 			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
147 			   cred, &bp);
148 			if (error)
149 				return (error);
150 			if (DOINGSOFTDEP(vp))
151 				softdep_setup_allocdirect(ip, nb,
152 				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
153 				    fs->fs_bsize, osize, bp);
154 			ip->i_size = smalllblktosize(fs, nb + 1);
155 			dp->di_size = ip->i_size;
156 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
157 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
158 			if (flags & IO_SYNC)
159 				bwrite(bp);
160 			else if (DOINGASYNC(vp))
161 				bdwrite(bp);
162 			else
163 				bawrite(bp);
164 		}
165 	}
166 	/*
167 	 * The first UFS_NDADDR blocks are direct blocks
168 	 */
169 	if (lbn < UFS_NDADDR) {
170 		if (flags & BA_METAONLY)
171 			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
172 		nb = dp->di_db[lbn];
173 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
174 			error = bread(vp, lbn, fs->fs_bsize, NOCRED, &bp);
175 			if (error) {
176 				brelse(bp);
177 				return (error);
178 			}
179 			bp->b_blkno = fsbtodb(fs, nb);
180 			*bpp = bp;
181 			return (0);
182 		}
183 		if (nb != 0) {
184 			/*
185 			 * Consider need to reallocate a fragment.
186 			 */
187 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
188 			nsize = fragroundup(fs, size);
189 			if (nsize <= osize) {
190 				error = bread(vp, lbn, osize, NOCRED, &bp);
191 				if (error) {
192 					brelse(bp);
193 					return (error);
194 				}
195 				bp->b_blkno = fsbtodb(fs, nb);
196 			} else {
197 				UFS_LOCK(ump);
198 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
199 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
200 				    &dp->di_db[0]), osize, nsize, flags,
201 				    cred, &bp);
202 				if (error)
203 					return (error);
204 				if (DOINGSOFTDEP(vp))
205 					softdep_setup_allocdirect(ip, lbn,
206 					    dbtofsb(fs, bp->b_blkno), nb,
207 					    nsize, osize, bp);
208 			}
209 		} else {
210 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
211 				nsize = fragroundup(fs, size);
212 			else
213 				nsize = fs->fs_bsize;
214 			UFS_LOCK(ump);
215 			error = ffs_alloc(ip, lbn,
216 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
217 			    nsize, flags, cred, &newb);
218 			if (error)
219 				return (error);
220 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
221 			bp->b_blkno = fsbtodb(fs, newb);
222 			if (flags & BA_CLRBUF)
223 				vfs_bio_clrbuf(bp);
224 			if (DOINGSOFTDEP(vp))
225 				softdep_setup_allocdirect(ip, lbn, newb, 0,
226 				    nsize, 0, bp);
227 		}
228 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
229 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
230 		*bpp = bp;
231 		return (0);
232 	}
233 	/*
234 	 * Determine the number of levels of indirection.
235 	 */
236 	pref = 0;
237 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
238 		return(error);
239 #ifdef INVARIANTS
240 	if (num < 1)
241 		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
242 #endif
243 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
244 	/*
245 	 * Fetch the first indirect block allocating if necessary.
246 	 */
247 	--num;
248 	nb = dp->di_ib[indirs[0].in_off];
249 	allocib = NULL;
250 	allocblk = allociblk;
251 	lbns_remfree = lbns;
252 	if (nb == 0) {
253 		UFS_LOCK(ump);
254 		pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
255 		    (ufs1_daddr_t *)0);
256 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
257 		    flags, cred, &newb)) != 0) {
258 			curthread_pflags_restore(saved_inbdflush);
259 			return (error);
260 		}
261 		pref = newb + fs->fs_frag;
262 		nb = newb;
263 		MPASS(allocblk < allociblk + nitems(allociblk));
264 		MPASS(lbns_remfree < lbns + nitems(lbns));
265 		*allocblk++ = nb;
266 		*lbns_remfree++ = indirs[1].in_lbn;
267 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
268 		bp->b_blkno = fsbtodb(fs, nb);
269 		vfs_bio_clrbuf(bp);
270 		if (DOINGSOFTDEP(vp)) {
271 			softdep_setup_allocdirect(ip,
272 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
273 			    fs->fs_bsize, 0, bp);
274 			bdwrite(bp);
275 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
276 			if (bp->b_bufsize == fs->fs_bsize)
277 				bp->b_flags |= B_CLUSTEROK;
278 			bdwrite(bp);
279 		} else {
280 			if ((error = bwrite(bp)) != 0)
281 				goto fail;
282 		}
283 		allocib = &dp->di_ib[indirs[0].in_off];
284 		*allocib = nb;
285 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
286 	}
287 	/*
288 	 * Fetch through the indirect blocks, allocating as necessary.
289 	 */
290 retry:
291 	for (i = 1;;) {
292 		error = bread(vp,
293 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
294 		if (error) {
295 			brelse(bp);
296 			goto fail;
297 		}
298 		bap = (ufs1_daddr_t *)bp->b_data;
299 		nb = bap[indirs[i].in_off];
300 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
301 		    fs->fs_bsize)) != 0) {
302 			brelse(bp);
303 			goto fail;
304 		}
305 		if (i == num)
306 			break;
307 		i += 1;
308 		if (nb != 0) {
309 			bqrelse(bp);
310 			continue;
311 		}
312 		UFS_LOCK(ump);
313 		/*
314 		 * If parent indirect has just been allocated, try to cluster
315 		 * immediately following it.
316 		 */
317 		if (pref == 0)
318 			pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
319 			    (ufs1_daddr_t *)0);
320 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
321 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
322 			brelse(bp);
323 			UFS_LOCK(ump);
324 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
325 				softdep_request_cleanup(fs, vp, cred,
326 				    FLUSH_BLOCKS_WAIT);
327 				UFS_UNLOCK(ump);
328 				goto retry;
329 			}
330 			if (ppsratecheck(&ump->um_last_fullmsg,
331 			    &ump->um_secs_fullmsg, 1)) {
332 				UFS_UNLOCK(ump);
333 				ffs_fserr(fs, ip->i_number, "filesystem full");
334 				uprintf("\n%s: write failed, filesystem "
335 				    "is full\n", fs->fs_fsmnt);
336 			} else {
337 				UFS_UNLOCK(ump);
338 			}
339 			goto fail;
340 		}
341 		pref = newb + fs->fs_frag;
342 		nb = newb;
343 		MPASS(allocblk < allociblk + nitems(allociblk));
344 		MPASS(lbns_remfree < lbns + nitems(lbns));
345 		*allocblk++ = nb;
346 		*lbns_remfree++ = indirs[i].in_lbn;
347 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
348 		nbp->b_blkno = fsbtodb(fs, nb);
349 		vfs_bio_clrbuf(nbp);
350 		if (DOINGSOFTDEP(vp)) {
351 			softdep_setup_allocindir_meta(nbp, ip, bp,
352 			    indirs[i - 1].in_off, nb);
353 			bdwrite(nbp);
354 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
355 			if (nbp->b_bufsize == fs->fs_bsize)
356 				nbp->b_flags |= B_CLUSTEROK;
357 			bdwrite(nbp);
358 		} else {
359 			if ((error = bwrite(nbp)) != 0) {
360 				brelse(bp);
361 				goto fail;
362 			}
363 		}
364 		bap[indirs[i - 1].in_off] = nb;
365 		if (allocib == NULL && unwindidx < 0)
366 			unwindidx = i - 1;
367 		/*
368 		 * If required, write synchronously, otherwise use
369 		 * delayed write.
370 		 */
371 		if (flags & IO_SYNC) {
372 			bwrite(bp);
373 		} else {
374 			if (bp->b_bufsize == fs->fs_bsize)
375 				bp->b_flags |= B_CLUSTEROK;
376 			bdwrite(bp);
377 		}
378 	}
379 	/*
380 	 * If asked only for the indirect block, then return it.
381 	 */
382 	if (flags & BA_METAONLY) {
383 		curthread_pflags_restore(saved_inbdflush);
384 		*bpp = bp;
385 		return (0);
386 	}
387 	/*
388 	 * Get the data block, allocating if necessary.
389 	 */
390 	if (nb == 0) {
391 		UFS_LOCK(ump);
392 		/*
393 		 * If allocating metadata at the front of the cylinder
394 		 * group and parent indirect block has just been allocated,
395 		 * then cluster next to it if it is the first indirect in
396 		 * the file. Otherwise it has been allocated in the metadata
397 		 * area, so we want to find our own place out in the data area.
398 		 */
399 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
400 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
401 			    &bap[0]);
402 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
403 		    flags | IO_BUFLOCKED, cred, &newb);
404 		if (error) {
405 			brelse(bp);
406 			UFS_LOCK(ump);
407 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
408 				softdep_request_cleanup(fs, vp, cred,
409 				    FLUSH_BLOCKS_WAIT);
410 				UFS_UNLOCK(ump);
411 				goto retry;
412 			}
413 			if (ppsratecheck(&ump->um_last_fullmsg,
414 			    &ump->um_secs_fullmsg, 1)) {
415 				UFS_UNLOCK(ump);
416 				ffs_fserr(fs, ip->i_number, "filesystem full");
417 				uprintf("\n%s: write failed, filesystem "
418 				    "is full\n", fs->fs_fsmnt);
419 			} else {
420 				UFS_UNLOCK(ump);
421 			}
422 			goto fail;
423 		}
424 		nb = newb;
425 		MPASS(allocblk < allociblk + nitems(allociblk));
426 		MPASS(lbns_remfree < lbns + nitems(lbns));
427 		*allocblk++ = nb;
428 		*lbns_remfree++ = lbn;
429 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
430 		nbp->b_blkno = fsbtodb(fs, nb);
431 		if (flags & BA_CLRBUF)
432 			vfs_bio_clrbuf(nbp);
433 		if (DOINGSOFTDEP(vp))
434 			softdep_setup_allocindir_page(ip, lbn, bp,
435 			    indirs[i].in_off, nb, 0, nbp);
436 		bap[indirs[i].in_off] = nb;
437 		/*
438 		 * If required, write synchronously, otherwise use
439 		 * delayed write.
440 		 */
441 		if (flags & IO_SYNC) {
442 			bwrite(bp);
443 		} else {
444 			if (bp->b_bufsize == fs->fs_bsize)
445 				bp->b_flags |= B_CLUSTEROK;
446 			bdwrite(bp);
447 		}
448 		curthread_pflags_restore(saved_inbdflush);
449 		*bpp = nbp;
450 		return (0);
451 	}
452 	brelse(bp);
453 	if (flags & BA_CLRBUF) {
454 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
455 		if (seqcount != 0 &&
456 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
457 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
458 			error = cluster_read(vp, ip->i_size, lbn,
459 			    (int)fs->fs_bsize, NOCRED,
460 			    MAXBSIZE, seqcount, gbflags, &nbp);
461 		} else {
462 			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
463 			    gbflags, &nbp);
464 		}
465 		if (error) {
466 			brelse(nbp);
467 			goto fail;
468 		}
469 	} else {
470 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
471 		nbp->b_blkno = fsbtodb(fs, nb);
472 	}
473 	curthread_pflags_restore(saved_inbdflush);
474 	*bpp = nbp;
475 	return (0);
476 fail:
477 	curthread_pflags_restore(saved_inbdflush);
478 	/*
479 	 * If we have failed to allocate any blocks, simply return the error.
480 	 * This is the usual case and avoids the need to fsync the file.
481 	 */
482 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
483 		return (error);
484 	/*
485 	 * If we have failed part way through block allocation, we
486 	 * have to deallocate any indirect blocks that we have allocated.
487 	 * We have to fsync the file before we start to get rid of all
488 	 * of its dependencies so that we do not leave them dangling.
489 	 * We have to sync it at the end so that the soft updates code
490 	 * does not find any untracked changes. Although this is really
491 	 * slow, running out of disk space is not expected to be a common
492 	 * occurrence. The error return from fsync is ignored as we already
493 	 * have an error to return to the user.
494 	 *
495 	 * XXX Still have to journal the free below
496 	 */
497 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
498 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
499 	     blkp < allocblk; blkp++, lbns_remfree++) {
500 		/*
501 		 * We shall not leave the freed blocks on the vnode
502 		 * buffer object lists.
503 		 */
504 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
505 		    GB_NOCREAT | GB_UNMAPPED);
506 		if (bp != NULL) {
507 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
508 			    ("mismatch1 l %jd %jd b %ju %ju",
509 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
510 			    (uintmax_t)bp->b_blkno,
511 			    (uintmax_t)fsbtodb(fs, *blkp)));
512 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
513 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
514 			brelse(bp);
515 		}
516 		deallocated += fs->fs_bsize;
517 	}
518 	if (allocib != NULL) {
519 		*allocib = 0;
520 	} else if (unwindidx >= 0) {
521 		int r;
522 
523 		r = bread(vp, indirs[unwindidx].in_lbn,
524 		    (int)fs->fs_bsize, NOCRED, &bp);
525 		if (r) {
526 			panic("Could not unwind indirect block, error %d", r);
527 			brelse(bp);
528 		} else {
529 			bap = (ufs1_daddr_t *)bp->b_data;
530 			bap[indirs[unwindidx].in_off] = 0;
531 			if (flags & IO_SYNC) {
532 				bwrite(bp);
533 			} else {
534 				if (bp->b_bufsize == fs->fs_bsize)
535 					bp->b_flags |= B_CLUSTEROK;
536 				bdwrite(bp);
537 			}
538 		}
539 	}
540 	if (deallocated) {
541 #ifdef QUOTA
542 		/*
543 		 * Restore user's disk quota because allocation failed.
544 		 */
545 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
546 #endif
547 		dp->di_blocks -= btodb(deallocated);
548 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
549 	}
550 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
551 	/*
552 	 * After the buffers are invalidated and on-disk pointers are
553 	 * cleared, free the blocks.
554 	 */
555 	for (blkp = allociblk; blkp < allocblk; blkp++) {
556 #ifdef INVARIANTS
557 		if (blkp == allociblk)
558 			lbns_remfree = lbns;
559 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
560 		    GB_NOCREAT | GB_UNMAPPED);
561 		if (bp != NULL) {
562 			panic("zombie1 %jd %ju %ju",
563 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
564 			    (uintmax_t)fsbtodb(fs, *blkp));
565 		}
566 		lbns_remfree++;
567 #endif
568 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
569 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
570 	}
571 	return (error);
572 }
573 
574 /*
575  * Balloc defines the structure of file system storage
576  * by allocating the physical blocks on a device given
577  * the inode and the logical block number in a file.
578  * This is the allocation strategy for UFS2. Above is
579  * the allocation strategy for UFS1.
580  */
581 int
582 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
583     struct ucred *cred, int flags, struct buf **bpp)
584 {
585 	struct inode *ip;
586 	struct ufs2_dinode *dp;
587 	ufs_lbn_t lbn, lastlbn;
588 	struct fs *fs;
589 	struct buf *bp, *nbp;
590 	struct mount *mp;
591 	struct ufsmount *ump;
592 	struct indir indirs[UFS_NIADDR + 2];
593 	ufs2_daddr_t nb, newb, *bap, pref;
594 	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
595 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
596 	int deallocated, osize, nsize, num, i, error;
597 	int unwindidx = -1;
598 	int saved_inbdflush;
599 	int gbflags, reclaimed;
600 
601 	ip = VTOI(vp);
602 	dp = ip->i_din2;
603 	fs = ITOFS(ip);
604 	mp = ITOVFS(ip);
605 	ump = ITOUMP(ip);
606 	lbn = lblkno(fs, startoffset);
607 	size = blkoff(fs, startoffset) + size;
608 	reclaimed = 0;
609 	if (size > fs->fs_bsize)
610 		panic("ffs_balloc_ufs2: blk too big");
611 	*bpp = NULL;
612 	if (lbn < 0)
613 		return (EFBIG);
614 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
615 
616 	if (DOINGSOFTDEP(vp))
617 		softdep_prealloc(vp, MNT_WAIT);
618 
619 	/*
620 	 * Check for allocating external data.
621 	 */
622 	if (flags & IO_EXT) {
623 		if (lbn >= UFS_NXADDR)
624 			return (EFBIG);
625 		/*
626 		 * If the next write will extend the data into a new block,
627 		 * and the data is currently composed of a fragment
628 		 * this fragment has to be extended to be a full block.
629 		 */
630 		lastlbn = lblkno(fs, dp->di_extsize);
631 		if (lastlbn < lbn) {
632 			nb = lastlbn;
633 			osize = sblksize(fs, dp->di_extsize, nb);
634 			if (osize < fs->fs_bsize && osize > 0) {
635 				UFS_LOCK(ump);
636 				error = ffs_realloccg(ip, -1 - nb,
637 				    dp->di_extb[nb],
638 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
639 				    &dp->di_extb[0]), osize,
640 				    (int)fs->fs_bsize, flags, cred, &bp);
641 				if (error)
642 					return (error);
643 				if (DOINGSOFTDEP(vp))
644 					softdep_setup_allocext(ip, nb,
645 					    dbtofsb(fs, bp->b_blkno),
646 					    dp->di_extb[nb],
647 					    fs->fs_bsize, osize, bp);
648 				dp->di_extsize = smalllblktosize(fs, nb + 1);
649 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
650 				bp->b_xflags |= BX_ALTDATA;
651 				ip->i_flag |= IN_CHANGE;
652 				if (flags & IO_SYNC)
653 					bwrite(bp);
654 				else
655 					bawrite(bp);
656 			}
657 		}
658 		/*
659 		 * All blocks are direct blocks
660 		 */
661 		if (flags & BA_METAONLY)
662 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
663 		nb = dp->di_extb[lbn];
664 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
665 			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
666 			    gbflags, &bp);
667 			if (error) {
668 				brelse(bp);
669 				return (error);
670 			}
671 			bp->b_blkno = fsbtodb(fs, nb);
672 			bp->b_xflags |= BX_ALTDATA;
673 			*bpp = bp;
674 			return (0);
675 		}
676 		if (nb != 0) {
677 			/*
678 			 * Consider need to reallocate a fragment.
679 			 */
680 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
681 			nsize = fragroundup(fs, size);
682 			if (nsize <= osize) {
683 				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
684 				    gbflags, &bp);
685 				if (error) {
686 					brelse(bp);
687 					return (error);
688 				}
689 				bp->b_blkno = fsbtodb(fs, nb);
690 				bp->b_xflags |= BX_ALTDATA;
691 			} else {
692 				UFS_LOCK(ump);
693 				error = ffs_realloccg(ip, -1 - lbn,
694 				    dp->di_extb[lbn],
695 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
696 				    &dp->di_extb[0]), osize, nsize, flags,
697 				    cred, &bp);
698 				if (error)
699 					return (error);
700 				bp->b_xflags |= BX_ALTDATA;
701 				if (DOINGSOFTDEP(vp))
702 					softdep_setup_allocext(ip, lbn,
703 					    dbtofsb(fs, bp->b_blkno), nb,
704 					    nsize, osize, bp);
705 			}
706 		} else {
707 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
708 				nsize = fragroundup(fs, size);
709 			else
710 				nsize = fs->fs_bsize;
711 			UFS_LOCK(ump);
712 			error = ffs_alloc(ip, lbn,
713 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
714 			   nsize, flags, cred, &newb);
715 			if (error)
716 				return (error);
717 			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
718 			bp->b_blkno = fsbtodb(fs, newb);
719 			bp->b_xflags |= BX_ALTDATA;
720 			if (flags & BA_CLRBUF)
721 				vfs_bio_clrbuf(bp);
722 			if (DOINGSOFTDEP(vp))
723 				softdep_setup_allocext(ip, lbn, newb, 0,
724 				    nsize, 0, bp);
725 		}
726 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
727 		ip->i_flag |= IN_CHANGE;
728 		*bpp = bp;
729 		return (0);
730 	}
731 	/*
732 	 * If the next write will extend the file into a new block,
733 	 * and the file is currently composed of a fragment
734 	 * this fragment has to be extended to be a full block.
735 	 */
736 	lastlbn = lblkno(fs, ip->i_size);
737 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
738 		nb = lastlbn;
739 		osize = blksize(fs, ip, nb);
740 		if (osize < fs->fs_bsize && osize > 0) {
741 			UFS_LOCK(ump);
742 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
743 			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
744 			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
745 			    flags, cred, &bp);
746 			if (error)
747 				return (error);
748 			if (DOINGSOFTDEP(vp))
749 				softdep_setup_allocdirect(ip, nb,
750 				    dbtofsb(fs, bp->b_blkno),
751 				    dp->di_db[nb],
752 				    fs->fs_bsize, osize, bp);
753 			ip->i_size = smalllblktosize(fs, nb + 1);
754 			dp->di_size = ip->i_size;
755 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
756 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
757 			if (flags & IO_SYNC)
758 				bwrite(bp);
759 			else
760 				bawrite(bp);
761 		}
762 	}
763 	/*
764 	 * The first UFS_NDADDR blocks are direct blocks
765 	 */
766 	if (lbn < UFS_NDADDR) {
767 		if (flags & BA_METAONLY)
768 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
769 		nb = dp->di_db[lbn];
770 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
771 			error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
772 			    gbflags, &bp);
773 			if (error) {
774 				brelse(bp);
775 				return (error);
776 			}
777 			bp->b_blkno = fsbtodb(fs, nb);
778 			*bpp = bp;
779 			return (0);
780 		}
781 		if (nb != 0) {
782 			/*
783 			 * Consider need to reallocate a fragment.
784 			 */
785 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
786 			nsize = fragroundup(fs, size);
787 			if (nsize <= osize) {
788 				error = bread_gb(vp, lbn, osize, NOCRED,
789 				    gbflags, &bp);
790 				if (error) {
791 					brelse(bp);
792 					return (error);
793 				}
794 				bp->b_blkno = fsbtodb(fs, nb);
795 			} else {
796 				UFS_LOCK(ump);
797 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
798 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
799 				    &dp->di_db[0]), osize, nsize, flags,
800 				    cred, &bp);
801 				if (error)
802 					return (error);
803 				if (DOINGSOFTDEP(vp))
804 					softdep_setup_allocdirect(ip, lbn,
805 					    dbtofsb(fs, bp->b_blkno), nb,
806 					    nsize, osize, bp);
807 			}
808 		} else {
809 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
810 				nsize = fragroundup(fs, size);
811 			else
812 				nsize = fs->fs_bsize;
813 			UFS_LOCK(ump);
814 			error = ffs_alloc(ip, lbn,
815 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
816 				&dp->di_db[0]), nsize, flags, cred, &newb);
817 			if (error)
818 				return (error);
819 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
820 			bp->b_blkno = fsbtodb(fs, newb);
821 			if (flags & BA_CLRBUF)
822 				vfs_bio_clrbuf(bp);
823 			if (DOINGSOFTDEP(vp))
824 				softdep_setup_allocdirect(ip, lbn, newb, 0,
825 				    nsize, 0, bp);
826 		}
827 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
828 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
829 		*bpp = bp;
830 		return (0);
831 	}
832 	/*
833 	 * Determine the number of levels of indirection.
834 	 */
835 	pref = 0;
836 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
837 		return(error);
838 #ifdef INVARIANTS
839 	if (num < 1)
840 		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
841 #endif
842 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
843 	/*
844 	 * Fetch the first indirect block allocating if necessary.
845 	 */
846 	--num;
847 	nb = dp->di_ib[indirs[0].in_off];
848 	allocib = NULL;
849 	allocblk = allociblk;
850 	lbns_remfree = lbns;
851 	if (nb == 0) {
852 		UFS_LOCK(ump);
853 		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
854 		    (ufs2_daddr_t *)0);
855 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
856 		    flags, cred, &newb)) != 0) {
857 			curthread_pflags_restore(saved_inbdflush);
858 			return (error);
859 		}
860 		pref = newb + fs->fs_frag;
861 		nb = newb;
862 		MPASS(allocblk < allociblk + nitems(allociblk));
863 		MPASS(lbns_remfree < lbns + nitems(lbns));
864 		*allocblk++ = nb;
865 		*lbns_remfree++ = indirs[1].in_lbn;
866 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
867 		    GB_UNMAPPED);
868 		bp->b_blkno = fsbtodb(fs, nb);
869 		vfs_bio_clrbuf(bp);
870 		if (DOINGSOFTDEP(vp)) {
871 			softdep_setup_allocdirect(ip,
872 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
873 			    fs->fs_bsize, 0, bp);
874 			bdwrite(bp);
875 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
876 			if (bp->b_bufsize == fs->fs_bsize)
877 				bp->b_flags |= B_CLUSTEROK;
878 			bdwrite(bp);
879 		} else {
880 			if ((error = bwrite(bp)) != 0)
881 				goto fail;
882 		}
883 		allocib = &dp->di_ib[indirs[0].in_off];
884 		*allocib = nb;
885 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
886 	}
887 	/*
888 	 * Fetch through the indirect blocks, allocating as necessary.
889 	 */
890 retry:
891 	for (i = 1;;) {
892 		error = bread(vp,
893 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
894 		if (error) {
895 			brelse(bp);
896 			goto fail;
897 		}
898 		bap = (ufs2_daddr_t *)bp->b_data;
899 		nb = bap[indirs[i].in_off];
900 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
901 		    fs->fs_bsize)) != 0) {
902 			brelse(bp);
903 			goto fail;
904 		}
905 		if (i == num)
906 			break;
907 		i += 1;
908 		if (nb != 0) {
909 			bqrelse(bp);
910 			continue;
911 		}
912 		UFS_LOCK(ump);
913 		/*
914 		 * If parent indirect has just been allocated, try to cluster
915 		 * immediately following it.
916 		 */
917 		if (pref == 0)
918 			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
919 			    (ufs2_daddr_t *)0);
920 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
921 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
922 			brelse(bp);
923 			UFS_LOCK(ump);
924 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
925 				softdep_request_cleanup(fs, vp, cred,
926 				    FLUSH_BLOCKS_WAIT);
927 				UFS_UNLOCK(ump);
928 				goto retry;
929 			}
930 			if (ppsratecheck(&ump->um_last_fullmsg,
931 			    &ump->um_secs_fullmsg, 1)) {
932 				UFS_UNLOCK(ump);
933 				ffs_fserr(fs, ip->i_number, "filesystem full");
934 				uprintf("\n%s: write failed, filesystem "
935 				    "is full\n", fs->fs_fsmnt);
936 			} else {
937 				UFS_UNLOCK(ump);
938 			}
939 			goto fail;
940 		}
941 		pref = newb + fs->fs_frag;
942 		nb = newb;
943 		MPASS(allocblk < allociblk + nitems(allociblk));
944 		MPASS(lbns_remfree < lbns + nitems(lbns));
945 		*allocblk++ = nb;
946 		*lbns_remfree++ = indirs[i].in_lbn;
947 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
948 		    GB_UNMAPPED);
949 		nbp->b_blkno = fsbtodb(fs, nb);
950 		vfs_bio_clrbuf(nbp);
951 		if (DOINGSOFTDEP(vp)) {
952 			softdep_setup_allocindir_meta(nbp, ip, bp,
953 			    indirs[i - 1].in_off, nb);
954 			bdwrite(nbp);
955 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
956 			if (nbp->b_bufsize == fs->fs_bsize)
957 				nbp->b_flags |= B_CLUSTEROK;
958 			bdwrite(nbp);
959 		} else {
960 			if ((error = bwrite(nbp)) != 0) {
961 				brelse(bp);
962 				goto fail;
963 			}
964 		}
965 		bap[indirs[i - 1].in_off] = nb;
966 		if (allocib == NULL && unwindidx < 0)
967 			unwindidx = i - 1;
968 		/*
969 		 * If required, write synchronously, otherwise use
970 		 * delayed write.
971 		 */
972 		if (flags & IO_SYNC) {
973 			bwrite(bp);
974 		} else {
975 			if (bp->b_bufsize == fs->fs_bsize)
976 				bp->b_flags |= B_CLUSTEROK;
977 			bdwrite(bp);
978 		}
979 	}
980 	/*
981 	 * If asked only for the indirect block, then return it.
982 	 */
983 	if (flags & BA_METAONLY) {
984 		curthread_pflags_restore(saved_inbdflush);
985 		*bpp = bp;
986 		return (0);
987 	}
988 	/*
989 	 * Get the data block, allocating if necessary.
990 	 */
991 	if (nb == 0) {
992 		UFS_LOCK(ump);
993 		/*
994 		 * If allocating metadata at the front of the cylinder
995 		 * group and parent indirect block has just been allocated,
996 		 * then cluster next to it if it is the first indirect in
997 		 * the file. Otherwise it has been allocated in the metadata
998 		 * area, so we want to find our own place out in the data area.
999 		 */
1000 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
1001 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
1002 			    &bap[0]);
1003 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
1004 		    flags | IO_BUFLOCKED, cred, &newb);
1005 		if (error) {
1006 			brelse(bp);
1007 			UFS_LOCK(ump);
1008 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1009 				softdep_request_cleanup(fs, vp, cred,
1010 				    FLUSH_BLOCKS_WAIT);
1011 				UFS_UNLOCK(ump);
1012 				goto retry;
1013 			}
1014 			if (ppsratecheck(&ump->um_last_fullmsg,
1015 			    &ump->um_secs_fullmsg, 1)) {
1016 				UFS_UNLOCK(ump);
1017 				ffs_fserr(fs, ip->i_number, "filesystem full");
1018 				uprintf("\n%s: write failed, filesystem "
1019 				    "is full\n", fs->fs_fsmnt);
1020 			} else {
1021 				UFS_UNLOCK(ump);
1022 			}
1023 			goto fail;
1024 		}
1025 		nb = newb;
1026 		MPASS(allocblk < allociblk + nitems(allociblk));
1027 		MPASS(lbns_remfree < lbns + nitems(lbns));
1028 		*allocblk++ = nb;
1029 		*lbns_remfree++ = lbn;
1030 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1031 		nbp->b_blkno = fsbtodb(fs, nb);
1032 		if (flags & BA_CLRBUF)
1033 			vfs_bio_clrbuf(nbp);
1034 		if (DOINGSOFTDEP(vp))
1035 			softdep_setup_allocindir_page(ip, lbn, bp,
1036 			    indirs[i].in_off, nb, 0, nbp);
1037 		bap[indirs[i].in_off] = nb;
1038 		/*
1039 		 * If required, write synchronously, otherwise use
1040 		 * delayed write.
1041 		 */
1042 		if (flags & IO_SYNC) {
1043 			bwrite(bp);
1044 		} else {
1045 			if (bp->b_bufsize == fs->fs_bsize)
1046 				bp->b_flags |= B_CLUSTEROK;
1047 			bdwrite(bp);
1048 		}
1049 		curthread_pflags_restore(saved_inbdflush);
1050 		*bpp = nbp;
1051 		return (0);
1052 	}
1053 	brelse(bp);
1054 	/*
1055 	 * If requested clear invalid portions of the buffer.  If we
1056 	 * have to do a read-before-write (typical if BA_CLRBUF is set),
1057 	 * try to do some read-ahead in the sequential case to reduce
1058 	 * the number of I/O transactions.
1059 	 */
1060 	if (flags & BA_CLRBUF) {
1061 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1062 		if (seqcount != 0 &&
1063 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1064 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
1065 			error = cluster_read(vp, ip->i_size, lbn,
1066 			    (int)fs->fs_bsize, NOCRED,
1067 			    MAXBSIZE, seqcount, gbflags, &nbp);
1068 		} else {
1069 			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1070 			    NOCRED, gbflags, &nbp);
1071 		}
1072 		if (error) {
1073 			brelse(nbp);
1074 			goto fail;
1075 		}
1076 	} else {
1077 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1078 		nbp->b_blkno = fsbtodb(fs, nb);
1079 	}
1080 	curthread_pflags_restore(saved_inbdflush);
1081 	*bpp = nbp;
1082 	return (0);
1083 fail:
1084 	curthread_pflags_restore(saved_inbdflush);
1085 	/*
1086 	 * If we have failed to allocate any blocks, simply return the error.
1087 	 * This is the usual case and avoids the need to fsync the file.
1088 	 */
1089 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1090 		return (error);
1091 	/*
1092 	 * If we have failed part way through block allocation, we
1093 	 * have to deallocate any indirect blocks that we have allocated.
1094 	 * We have to fsync the file before we start to get rid of all
1095 	 * of its dependencies so that we do not leave them dangling.
1096 	 * We have to sync it at the end so that the soft updates code
1097 	 * does not find any untracked changes. Although this is really
1098 	 * slow, running out of disk space is not expected to be a common
1099 	 * occurrence. The error return from fsync is ignored as we already
1100 	 * have an error to return to the user.
1101 	 *
1102 	 * XXX Still have to journal the free below
1103 	 */
1104 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1105 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1106 	     blkp < allocblk; blkp++, lbns_remfree++) {
1107 		/*
1108 		 * We shall not leave the freed blocks on the vnode
1109 		 * buffer object lists.
1110 		 */
1111 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1112 		    GB_NOCREAT | GB_UNMAPPED);
1113 		if (bp != NULL) {
1114 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1115 			    ("mismatch2 l %jd %jd b %ju %ju",
1116 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1117 			    (uintmax_t)bp->b_blkno,
1118 			    (uintmax_t)fsbtodb(fs, *blkp)));
1119 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1120 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
1121 			brelse(bp);
1122 		}
1123 		deallocated += fs->fs_bsize;
1124 	}
1125 	if (allocib != NULL) {
1126 		*allocib = 0;
1127 	} else if (unwindidx >= 0) {
1128 		int r;
1129 
1130 		r = bread(vp, indirs[unwindidx].in_lbn,
1131 		    (int)fs->fs_bsize, NOCRED, &bp);
1132 		if (r) {
1133 			panic("Could not unwind indirect block, error %d", r);
1134 			brelse(bp);
1135 		} else {
1136 			bap = (ufs2_daddr_t *)bp->b_data;
1137 			bap[indirs[unwindidx].in_off] = 0;
1138 			if (flags & IO_SYNC) {
1139 				bwrite(bp);
1140 			} else {
1141 				if (bp->b_bufsize == fs->fs_bsize)
1142 					bp->b_flags |= B_CLUSTEROK;
1143 				bdwrite(bp);
1144 			}
1145 		}
1146 	}
1147 	if (deallocated) {
1148 #ifdef QUOTA
1149 		/*
1150 		 * Restore user's disk quota because allocation failed.
1151 		 */
1152 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1153 #endif
1154 		dp->di_blocks -= btodb(deallocated);
1155 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
1156 	}
1157 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1158 	/*
1159 	 * After the buffers are invalidated and on-disk pointers are
1160 	 * cleared, free the blocks.
1161 	 */
1162 	for (blkp = allociblk; blkp < allocblk; blkp++) {
1163 #ifdef INVARIANTS
1164 		if (blkp == allociblk)
1165 			lbns_remfree = lbns;
1166 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1167 		    GB_NOCREAT | GB_UNMAPPED);
1168 		if (bp != NULL) {
1169 			panic("zombie2 %jd %ju %ju",
1170 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1171 			    (uintmax_t)fsbtodb(fs, *blkp));
1172 		}
1173 		lbns_remfree++;
1174 #endif
1175 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1176 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
1177 	}
1178 	return (error);
1179 }
1180