xref: /freebsd/sys/ufs/ffs/ffs_balloc.c (revision c03c5b1c)
1 /*-
2  * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND BSD-3-Clause)
3  *
4  * Copyright (c) 2002 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * Copyright (c) 1982, 1986, 1989, 1993
35  *	The Regents of the University of California.  All rights reserved.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. Neither the name of the University nor the names of its contributors
46  *    may be used to endorse or promote products derived from this software
47  *    without specific prior written permission.
48  *
49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
59  * SUCH DAMAGE.
60  *
61  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
62  */
63 
64 #include <sys/cdefs.h>
65 __FBSDID("$FreeBSD$");
66 
67 #include <sys/param.h>
68 #include <sys/systm.h>
69 #include <sys/bio.h>
70 #include <sys/buf.h>
71 #include <sys/lock.h>
72 #include <sys/mount.h>
73 #include <sys/stat.h>
74 #include <sys/vnode.h>
75 #include <sys/vmmeter.h>
76 
77 #include <ufs/ufs/quota.h>
78 #include <ufs/ufs/inode.h>
79 #include <ufs/ufs/ufs_extern.h>
80 #include <ufs/ufs/extattr.h>
81 #include <ufs/ufs/ufsmount.h>
82 
83 #include <ufs/ffs/fs.h>
84 #include <ufs/ffs/ffs_extern.h>
85 
86 /*
87  * Balloc defines the structure of filesystem storage
88  * by allocating the physical blocks on a device given
89  * the inode and the logical block number in a file.
90  * This is the allocation strategy for UFS1. Below is
91  * the allocation strategy for UFS2.
92  */
93 int
94 ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
95     struct ucred *cred, int flags, struct buf **bpp)
96 {
97 	struct inode *ip;
98 	struct ufs1_dinode *dp;
99 	ufs_lbn_t lbn, lastlbn;
100 	struct fs *fs;
101 	ufs1_daddr_t nb;
102 	struct buf *bp, *nbp;
103 	struct mount *mp;
104 	struct ufsmount *ump;
105 	struct indir indirs[UFS_NIADDR + 2];
106 	int deallocated, osize, nsize, num, i, error;
107 	ufs2_daddr_t newb;
108 	ufs1_daddr_t *bap, pref;
109 	ufs1_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
110 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
111 	int unwindidx = -1;
112 	int saved_inbdflush;
113 	int gbflags, reclaimed;
114 
115 	ip = VTOI(vp);
116 	dp = ip->i_din1;
117 	fs = ITOFS(ip);
118 	mp = ITOVFS(ip);
119 	ump = ITOUMP(ip);
120 	lbn = lblkno(fs, startoffset);
121 	size = blkoff(fs, startoffset) + size;
122 	reclaimed = 0;
123 	if (size > fs->fs_bsize)
124 		panic("ffs_balloc_ufs1: blk too big");
125 	*bpp = NULL;
126 	if (flags & IO_EXT)
127 		return (EOPNOTSUPP);
128 	if (lbn < 0)
129 		return (EFBIG);
130 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
131 
132 	vn_seqc_write_begin(vp);
133 
134 	/*
135 	 * If the next write will extend the file into a new block,
136 	 * and the file is currently composed of a fragment
137 	 * this fragment has to be extended to be a full block.
138 	 */
139 	lastlbn = lblkno(fs, ip->i_size);
140 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
141 		nb = lastlbn;
142 		osize = blksize(fs, ip, nb);
143 		if (osize < fs->fs_bsize && osize > 0) {
144 			UFS_LOCK(ump);
145 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
146 			   ffs_blkpref_ufs1(ip, lastlbn, (int)nb,
147 			   &dp->di_db[0]), osize, (int)fs->fs_bsize, flags,
148 			   cred, &bp);
149 			if (error)
150 				goto done;
151 			if (DOINGSOFTDEP(vp))
152 				softdep_setup_allocdirect(ip, nb,
153 				    dbtofsb(fs, bp->b_blkno), dp->di_db[nb],
154 				    fs->fs_bsize, osize, bp);
155 			ip->i_size = smalllblktosize(fs, nb + 1);
156 			dp->di_size = ip->i_size;
157 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
158 			UFS_INODE_SET_FLAG(ip,
159 			    IN_SIZEMOD | IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
160 			if (flags & IO_SYNC)
161 				bwrite(bp);
162 			else if (DOINGASYNC(vp))
163 				bdwrite(bp);
164 			else
165 				bawrite(bp);
166 		}
167 	}
168 	/*
169 	 * The first UFS_NDADDR blocks are direct blocks
170 	 */
171 	if (lbn < UFS_NDADDR) {
172 		if (flags & BA_METAONLY)
173 			panic("ffs_balloc_ufs1: BA_METAONLY for direct block");
174 		nb = dp->di_db[lbn];
175 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
176 			if ((flags & BA_CLRBUF) != 0) {
177 				error = bread(vp, lbn, fs->fs_bsize, NOCRED,
178 				    &bp);
179 				if (error != 0)
180 					goto done;
181 			} else {
182 				bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
183 				    gbflags);
184 				if (bp == NULL) {
185 					error = EIO;
186 					goto done;
187 				}
188 				vfs_bio_clrbuf(bp);
189 			}
190 			bp->b_blkno = fsbtodb(fs, nb);
191 			*bpp = bp;
192 			error = 0;
193 			goto done;
194 		}
195 		if (nb != 0) {
196 			/*
197 			 * Consider need to reallocate a fragment.
198 			 */
199 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
200 			nsize = fragroundup(fs, size);
201 			if (nsize <= osize) {
202 				error = bread(vp, lbn, osize, NOCRED, &bp);
203 				if (error)
204 					goto done;
205 				bp->b_blkno = fsbtodb(fs, nb);
206 			} else {
207 				UFS_LOCK(ump);
208 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
209 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn,
210 				    &dp->di_db[0]), osize, nsize, flags,
211 				    cred, &bp);
212 				if (error)
213 					goto done;
214 				if (DOINGSOFTDEP(vp))
215 					softdep_setup_allocdirect(ip, lbn,
216 					    dbtofsb(fs, bp->b_blkno), nb,
217 					    nsize, osize, bp);
218 			}
219 		} else {
220 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
221 				nsize = fragroundup(fs, size);
222 			else
223 				nsize = fs->fs_bsize;
224 			UFS_LOCK(ump);
225 			error = ffs_alloc(ip, lbn,
226 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, &dp->di_db[0]),
227 			    nsize, flags, cred, &newb);
228 			if (error)
229 				goto done;
230 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
231 			bp->b_blkno = fsbtodb(fs, newb);
232 			if (flags & BA_CLRBUF)
233 				vfs_bio_clrbuf(bp);
234 			if (DOINGSOFTDEP(vp))
235 				softdep_setup_allocdirect(ip, lbn, newb, 0,
236 				    nsize, 0, bp);
237 		}
238 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
239 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
240 		*bpp = bp;
241 		error = 0;
242 		goto done;
243 	}
244 	/*
245 	 * Determine the number of levels of indirection.
246 	 */
247 	pref = 0;
248 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
249 		goto done;
250 #ifdef INVARIANTS
251 	if (num < 1)
252 		panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block");
253 #endif
254 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
255 	/*
256 	 * Fetch the first indirect block allocating if necessary.
257 	 */
258 	--num;
259 	nb = dp->di_ib[indirs[0].in_off];
260 	allocib = NULL;
261 	allocblk = allociblk;
262 	lbns_remfree = lbns;
263 	if (nb == 0) {
264 		UFS_LOCK(ump);
265 		pref = ffs_blkpref_ufs1(ip, lbn, -indirs[0].in_off - 1,
266 		    (ufs1_daddr_t *)0);
267 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
268 		    flags, cred, &newb)) != 0) {
269 			curthread_pflags_restore(saved_inbdflush);
270 			goto done;
271 		}
272 		pref = newb + fs->fs_frag;
273 		nb = newb;
274 		MPASS(allocblk < allociblk + nitems(allociblk));
275 		MPASS(lbns_remfree < lbns + nitems(lbns));
276 		*allocblk++ = nb;
277 		*lbns_remfree++ = indirs[1].in_lbn;
278 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0, gbflags);
279 		bp->b_blkno = fsbtodb(fs, nb);
280 		vfs_bio_clrbuf(bp);
281 		if (DOINGSOFTDEP(vp)) {
282 			softdep_setup_allocdirect(ip,
283 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
284 			    fs->fs_bsize, 0, bp);
285 			bdwrite(bp);
286 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
287 			if (bp->b_bufsize == fs->fs_bsize)
288 				bp->b_flags |= B_CLUSTEROK;
289 			bdwrite(bp);
290 		} else {
291 			if ((error = bwrite(bp)) != 0)
292 				goto fail;
293 		}
294 		allocib = &dp->di_ib[indirs[0].in_off];
295 		*allocib = nb;
296 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
297 	}
298 	/*
299 	 * Fetch through the indirect blocks, allocating as necessary.
300 	 */
301 retry:
302 	for (i = 1;;) {
303 		error = bread(vp,
304 		    indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp);
305 		if (error) {
306 			goto fail;
307 		}
308 		bap = (ufs1_daddr_t *)bp->b_data;
309 		nb = bap[indirs[i].in_off];
310 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
311 		    fs->fs_bsize)) != 0) {
312 			brelse(bp);
313 			goto fail;
314 		}
315 		if (i == num)
316 			break;
317 		i += 1;
318 		if (nb != 0) {
319 			bqrelse(bp);
320 			continue;
321 		}
322 		UFS_LOCK(ump);
323 		/*
324 		 * If parent indirect has just been allocated, try to cluster
325 		 * immediately following it.
326 		 */
327 		if (pref == 0)
328 			pref = ffs_blkpref_ufs1(ip, lbn, i - num - 1,
329 			    (ufs1_daddr_t *)0);
330 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
331 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
332 			brelse(bp);
333 			UFS_LOCK(ump);
334 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
335 				softdep_request_cleanup(fs, vp, cred,
336 				    FLUSH_BLOCKS_WAIT);
337 				UFS_UNLOCK(ump);
338 				goto retry;
339 			}
340 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
341 			    ppsratecheck(&ump->um_last_fullmsg,
342 			    &ump->um_secs_fullmsg, 1)) {
343 				UFS_UNLOCK(ump);
344 				ffs_fserr(fs, ip->i_number, "filesystem full");
345 				uprintf("\n%s: write failed, filesystem "
346 				    "is full\n", fs->fs_fsmnt);
347 			} else {
348 				UFS_UNLOCK(ump);
349 			}
350 			goto fail;
351 		}
352 		pref = newb + fs->fs_frag;
353 		nb = newb;
354 		MPASS(allocblk < allociblk + nitems(allociblk));
355 		MPASS(lbns_remfree < lbns + nitems(lbns));
356 		*allocblk++ = nb;
357 		*lbns_remfree++ = indirs[i].in_lbn;
358 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0, 0);
359 		nbp->b_blkno = fsbtodb(fs, nb);
360 		vfs_bio_clrbuf(nbp);
361 		if (DOINGSOFTDEP(vp)) {
362 			softdep_setup_allocindir_meta(nbp, ip, bp,
363 			    indirs[i - 1].in_off, nb);
364 			bdwrite(nbp);
365 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
366 			if (nbp->b_bufsize == fs->fs_bsize)
367 				nbp->b_flags |= B_CLUSTEROK;
368 			bdwrite(nbp);
369 		} else {
370 			if ((error = bwrite(nbp)) != 0) {
371 				brelse(bp);
372 				goto fail;
373 			}
374 		}
375 		bap[indirs[i - 1].in_off] = nb;
376 		if (allocib == NULL && unwindidx < 0)
377 			unwindidx = i - 1;
378 		/*
379 		 * If required, write synchronously, otherwise use
380 		 * delayed write.
381 		 */
382 		if (flags & IO_SYNC) {
383 			bwrite(bp);
384 		} else {
385 			if (bp->b_bufsize == fs->fs_bsize)
386 				bp->b_flags |= B_CLUSTEROK;
387 			bdwrite(bp);
388 		}
389 	}
390 	/*
391 	 * If asked only for the indirect block, then return it.
392 	 */
393 	if (flags & BA_METAONLY) {
394 		curthread_pflags_restore(saved_inbdflush);
395 		*bpp = bp;
396 		error = 0;
397 		goto done;
398 	}
399 	/*
400 	 * Get the data block, allocating if necessary.
401 	 */
402 	if (nb == 0) {
403 		UFS_LOCK(ump);
404 		/*
405 		 * If allocating metadata at the front of the cylinder
406 		 * group and parent indirect block has just been allocated,
407 		 * then cluster next to it if it is the first indirect in
408 		 * the file. Otherwise it has been allocated in the metadata
409 		 * area, so we want to find our own place out in the data area.
410 		 */
411 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
412 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i].in_off,
413 			    &bap[0]);
414 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
415 		    flags | IO_BUFLOCKED, cred, &newb);
416 		if (error) {
417 			brelse(bp);
418 			UFS_LOCK(ump);
419 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
420 				softdep_request_cleanup(fs, vp, cred,
421 				    FLUSH_BLOCKS_WAIT);
422 				UFS_UNLOCK(ump);
423 				goto retry;
424 			}
425 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
426 			    ppsratecheck(&ump->um_last_fullmsg,
427 			    &ump->um_secs_fullmsg, 1)) {
428 				UFS_UNLOCK(ump);
429 				ffs_fserr(fs, ip->i_number, "filesystem full");
430 				uprintf("\n%s: write failed, filesystem "
431 				    "is full\n", fs->fs_fsmnt);
432 			} else {
433 				UFS_UNLOCK(ump);
434 			}
435 			goto fail;
436 		}
437 		nb = newb;
438 		MPASS(allocblk < allociblk + nitems(allociblk));
439 		MPASS(lbns_remfree < lbns + nitems(lbns));
440 		*allocblk++ = nb;
441 		*lbns_remfree++ = lbn;
442 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
443 		nbp->b_blkno = fsbtodb(fs, nb);
444 		if (flags & BA_CLRBUF)
445 			vfs_bio_clrbuf(nbp);
446 		if (DOINGSOFTDEP(vp))
447 			softdep_setup_allocindir_page(ip, lbn, bp,
448 			    indirs[i].in_off, nb, 0, nbp);
449 		bap[indirs[i].in_off] = nb;
450 		/*
451 		 * If required, write synchronously, otherwise use
452 		 * delayed write.
453 		 */
454 		if (flags & IO_SYNC) {
455 			bwrite(bp);
456 		} else {
457 			if (bp->b_bufsize == fs->fs_bsize)
458 				bp->b_flags |= B_CLUSTEROK;
459 			bdwrite(bp);
460 		}
461 		curthread_pflags_restore(saved_inbdflush);
462 		*bpp = nbp;
463 		error = 0;
464 		goto done;
465 	}
466 	brelse(bp);
467 	if (flags & BA_CLRBUF) {
468 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
469 		if (seqcount != 0 &&
470 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
471 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
472 			error = cluster_read(vp, ip->i_size, lbn,
473 			    (int)fs->fs_bsize, NOCRED,
474 			    MAXBSIZE, seqcount, gbflags, &nbp);
475 		} else {
476 			error = bread_gb(vp, lbn, (int)fs->fs_bsize, NOCRED,
477 			    gbflags, &nbp);
478 		}
479 		if (error) {
480 			brelse(nbp);
481 			goto fail;
482 		}
483 	} else {
484 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
485 		nbp->b_blkno = fsbtodb(fs, nb);
486 	}
487 	curthread_pflags_restore(saved_inbdflush);
488 	*bpp = nbp;
489 	error = 0;
490 	goto done;
491 fail:
492 	curthread_pflags_restore(saved_inbdflush);
493 	/*
494 	 * If we have failed to allocate any blocks, simply return the error.
495 	 * This is the usual case and avoids the need to fsync the file.
496 	 */
497 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
498 		goto done;
499 	/*
500 	 * If we have failed part way through block allocation, we
501 	 * have to deallocate any indirect blocks that we have allocated.
502 	 * We have to fsync the file before we start to get rid of all
503 	 * of its dependencies so that we do not leave them dangling.
504 	 * We have to sync it at the end so that the soft updates code
505 	 * does not find any untracked changes. Although this is really
506 	 * slow, running out of disk space is not expected to be a common
507 	 * occurrence. The error return from fsync is ignored as we already
508 	 * have an error to return to the user.
509 	 *
510 	 * XXX Still have to journal the free below
511 	 */
512 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
513 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
514 	     blkp < allocblk; blkp++, lbns_remfree++) {
515 		/*
516 		 * We shall not leave the freed blocks on the vnode
517 		 * buffer object lists.
518 		 */
519 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
520 		    GB_NOCREAT | GB_UNMAPPED);
521 		if (bp != NULL) {
522 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
523 			    ("mismatch1 l %jd %jd b %ju %ju",
524 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
525 			    (uintmax_t)bp->b_blkno,
526 			    (uintmax_t)fsbtodb(fs, *blkp)));
527 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
528 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
529 			brelse(bp);
530 		}
531 		deallocated += fs->fs_bsize;
532 	}
533 	if (allocib != NULL) {
534 		*allocib = 0;
535 	} else if (unwindidx >= 0) {
536 		int r;
537 
538 		r = bread(vp, indirs[unwindidx].in_lbn,
539 		    (int)fs->fs_bsize, NOCRED, &bp);
540 		if (r) {
541 			panic("Could not unwind indirect block, error %d", r);
542 			brelse(bp);
543 		} else {
544 			bap = (ufs1_daddr_t *)bp->b_data;
545 			bap[indirs[unwindidx].in_off] = 0;
546 			if (flags & IO_SYNC) {
547 				bwrite(bp);
548 			} else {
549 				if (bp->b_bufsize == fs->fs_bsize)
550 					bp->b_flags |= B_CLUSTEROK;
551 				bdwrite(bp);
552 			}
553 		}
554 	}
555 	if (deallocated) {
556 #ifdef QUOTA
557 		/*
558 		 * Restore user's disk quota because allocation failed.
559 		 */
560 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
561 #endif
562 		dp->di_blocks -= btodb(deallocated);
563 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
564 	}
565 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
566 	/*
567 	 * After the buffers are invalidated and on-disk pointers are
568 	 * cleared, free the blocks.
569 	 */
570 	for (blkp = allociblk; blkp < allocblk; blkp++) {
571 #ifdef INVARIANTS
572 		if (blkp == allociblk)
573 			lbns_remfree = lbns;
574 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
575 		    GB_NOCREAT | GB_UNMAPPED);
576 		if (bp != NULL) {
577 			panic("zombie1 %jd %ju %ju",
578 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
579 			    (uintmax_t)fsbtodb(fs, *blkp));
580 		}
581 		lbns_remfree++;
582 #endif
583 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
584 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
585 	}
586 done:
587 	vn_seqc_write_end(vp);
588 	return (error);
589 }
590 
591 /*
592  * Balloc defines the structure of file system storage
593  * by allocating the physical blocks on a device given
594  * the inode and the logical block number in a file.
595  * This is the allocation strategy for UFS2. Above is
596  * the allocation strategy for UFS1.
597  */
598 int
599 ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
600     struct ucred *cred, int flags, struct buf **bpp)
601 {
602 	struct inode *ip;
603 	struct ufs2_dinode *dp;
604 	ufs_lbn_t lbn, lastlbn;
605 	struct fs *fs;
606 	struct buf *bp, *nbp;
607 	struct mount *mp;
608 	struct ufsmount *ump;
609 	struct indir indirs[UFS_NIADDR + 2];
610 	ufs2_daddr_t nb, newb, *bap, pref;
611 	ufs2_daddr_t *allocib, *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
612 	ufs2_daddr_t *lbns_remfree, lbns[UFS_NIADDR + 1];
613 	int deallocated, osize, nsize, num, i, error;
614 	int unwindidx = -1;
615 	int saved_inbdflush;
616 	int gbflags, gbwflag, reclaimed;
617 
618 	ip = VTOI(vp);
619 	dp = ip->i_din2;
620 	fs = ITOFS(ip);
621 	mp = ITOVFS(ip);
622 	ump = ITOUMP(ip);
623 	lbn = lblkno(fs, startoffset);
624 	size = blkoff(fs, startoffset) + size;
625 	reclaimed = 0;
626 	if (size > fs->fs_bsize)
627 		panic("ffs_balloc_ufs2: blk too big");
628 	*bpp = NULL;
629 	if (lbn < 0)
630 		return (EFBIG);
631 	gbflags = (flags & BA_UNMAPPED) != 0 ? GB_UNMAPPED : 0;
632 #ifdef WITNESS
633 	gbwflag = IS_SNAPSHOT(ip) ? GB_NOWITNESS : 0;
634 	gbflags |= gbwflag;
635 #else
636 	gbwflag = 0;
637 #endif
638 
639 	vn_seqc_write_begin(vp);
640 
641 	/*
642 	 * Check for allocating external data.
643 	 */
644 	if (flags & IO_EXT) {
645 		if (lbn >= UFS_NXADDR) {
646 			error = EFBIG;
647 			goto done;
648 		}
649 
650 		/*
651 		 * If the next write will extend the data into a new block,
652 		 * and the data is currently composed of a fragment
653 		 * this fragment has to be extended to be a full block.
654 		 */
655 		lastlbn = lblkno(fs, dp->di_extsize);
656 		if (lastlbn < lbn) {
657 			nb = lastlbn;
658 			osize = sblksize(fs, dp->di_extsize, nb);
659 			if (osize < fs->fs_bsize && osize > 0) {
660 				UFS_LOCK(ump);
661 				error = ffs_realloccg(ip, -1 - nb,
662 				    dp->di_extb[nb],
663 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
664 				    &dp->di_extb[0]), osize,
665 				    (int)fs->fs_bsize, flags, cred, &bp);
666 				if (error)
667 					goto done;
668 				if (DOINGSOFTDEP(vp))
669 					softdep_setup_allocext(ip, nb,
670 					    dbtofsb(fs, bp->b_blkno),
671 					    dp->di_extb[nb],
672 					    fs->fs_bsize, osize, bp);
673 				dp->di_extsize = smalllblktosize(fs, nb + 1);
674 				dp->di_extb[nb] = dbtofsb(fs, bp->b_blkno);
675 				bp->b_xflags |= BX_ALTDATA;
676 				UFS_INODE_SET_FLAG(ip,
677 				    IN_SIZEMOD | IN_CHANGE | IN_IBLKDATA);
678 				if (flags & IO_SYNC)
679 					bwrite(bp);
680 				else
681 					bawrite(bp);
682 			}
683 		}
684 		/*
685 		 * All blocks are direct blocks
686 		 */
687 		if (flags & BA_METAONLY)
688 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
689 		nb = dp->di_extb[lbn];
690 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
691 			error = bread_gb(vp, -1 - lbn, fs->fs_bsize, NOCRED,
692 			    gbflags, &bp);
693 			if (error)
694 				goto done;
695 			bp->b_blkno = fsbtodb(fs, nb);
696 			bp->b_xflags |= BX_ALTDATA;
697 			*bpp = bp;
698 			goto done;
699 		}
700 		if (nb != 0) {
701 			/*
702 			 * Consider need to reallocate a fragment.
703 			 */
704 			osize = fragroundup(fs, blkoff(fs, dp->di_extsize));
705 			nsize = fragroundup(fs, size);
706 			if (nsize <= osize) {
707 				error = bread_gb(vp, -1 - lbn, osize, NOCRED,
708 				    gbflags, &bp);
709 				if (error)
710 					goto done;
711 				bp->b_blkno = fsbtodb(fs, nb);
712 				bp->b_xflags |= BX_ALTDATA;
713 			} else {
714 				UFS_LOCK(ump);
715 				error = ffs_realloccg(ip, -1 - lbn,
716 				    dp->di_extb[lbn],
717 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
718 				    &dp->di_extb[0]), osize, nsize, flags,
719 				    cred, &bp);
720 				if (error)
721 					goto done;
722 				bp->b_xflags |= BX_ALTDATA;
723 				if (DOINGSOFTDEP(vp))
724 					softdep_setup_allocext(ip, lbn,
725 					    dbtofsb(fs, bp->b_blkno), nb,
726 					    nsize, osize, bp);
727 			}
728 		} else {
729 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
730 				nsize = fragroundup(fs, size);
731 			else
732 				nsize = fs->fs_bsize;
733 			UFS_LOCK(ump);
734 			error = ffs_alloc(ip, lbn,
735 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, &dp->di_extb[0]),
736 			   nsize, flags, cred, &newb);
737 			if (error)
738 				goto done;
739 			bp = getblk(vp, -1 - lbn, nsize, 0, 0, gbflags);
740 			bp->b_blkno = fsbtodb(fs, newb);
741 			bp->b_xflags |= BX_ALTDATA;
742 			if (flags & BA_CLRBUF)
743 				vfs_bio_clrbuf(bp);
744 			if (DOINGSOFTDEP(vp))
745 				softdep_setup_allocext(ip, lbn, newb, 0,
746 				    nsize, 0, bp);
747 		}
748 		dp->di_extb[lbn] = dbtofsb(fs, bp->b_blkno);
749 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_IBLKDATA);
750 		*bpp = bp;
751 		error = 0;
752 		goto done;
753 	}
754 	/*
755 	 * If the next write will extend the file into a new block,
756 	 * and the file is currently composed of a fragment
757 	 * this fragment has to be extended to be a full block.
758 	 */
759 	lastlbn = lblkno(fs, ip->i_size);
760 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
761 		nb = lastlbn;
762 		osize = blksize(fs, ip, nb);
763 		if (osize < fs->fs_bsize && osize > 0) {
764 			UFS_LOCK(ump);
765 			error = ffs_realloccg(ip, nb, dp->di_db[nb],
766 			    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
767 			    &dp->di_db[0]), osize, (int)fs->fs_bsize,
768 			    flags, cred, &bp);
769 			if (error)
770 				goto done;
771 			if (DOINGSOFTDEP(vp))
772 				softdep_setup_allocdirect(ip, nb,
773 				    dbtofsb(fs, bp->b_blkno),
774 				    dp->di_db[nb],
775 				    fs->fs_bsize, osize, bp);
776 			ip->i_size = smalllblktosize(fs, nb + 1);
777 			dp->di_size = ip->i_size;
778 			dp->di_db[nb] = dbtofsb(fs, bp->b_blkno);
779 			UFS_INODE_SET_FLAG(ip,
780 			    IN_SIZEMOD |IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
781 			if (flags & IO_SYNC)
782 				bwrite(bp);
783 			else
784 				bawrite(bp);
785 		}
786 	}
787 	/*
788 	 * The first UFS_NDADDR blocks are direct blocks
789 	 */
790 	if (lbn < UFS_NDADDR) {
791 		if (flags & BA_METAONLY)
792 			panic("ffs_balloc_ufs2: BA_METAONLY for direct block");
793 		nb = dp->di_db[lbn];
794 		if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) {
795 			if ((flags & BA_CLRBUF) != 0) {
796 				error = bread_gb(vp, lbn, fs->fs_bsize, NOCRED,
797 				    gbflags, &bp);
798 				if (error != 0)
799 					goto done;
800 			} else {
801 				bp = getblk(vp, lbn, fs->fs_bsize, 0, 0,
802 				    gbflags);
803 				if (bp == NULL) {
804 					error = EIO;
805 					goto done;
806 				}
807 				vfs_bio_clrbuf(bp);
808 			}
809 			bp->b_blkno = fsbtodb(fs, nb);
810 			*bpp = bp;
811 			error = 0;
812 			goto done;
813 		}
814 		if (nb != 0) {
815 			/*
816 			 * Consider need to reallocate a fragment.
817 			 */
818 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
819 			nsize = fragroundup(fs, size);
820 			if (nsize <= osize) {
821 				error = bread_gb(vp, lbn, osize, NOCRED,
822 				    gbflags, &bp);
823 				if (error)
824 					goto done;
825 				bp->b_blkno = fsbtodb(fs, nb);
826 			} else {
827 				UFS_LOCK(ump);
828 				error = ffs_realloccg(ip, lbn, dp->di_db[lbn],
829 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
830 				    &dp->di_db[0]), osize, nsize, flags,
831 				    cred, &bp);
832 				if (error)
833 					goto done;
834 				if (DOINGSOFTDEP(vp))
835 					softdep_setup_allocdirect(ip, lbn,
836 					    dbtofsb(fs, bp->b_blkno), nb,
837 					    nsize, osize, bp);
838 			}
839 		} else {
840 			if (ip->i_size < smalllblktosize(fs, lbn + 1))
841 				nsize = fragroundup(fs, size);
842 			else
843 				nsize = fs->fs_bsize;
844 			UFS_LOCK(ump);
845 			error = ffs_alloc(ip, lbn,
846 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn,
847 				&dp->di_db[0]), nsize, flags, cred, &newb);
848 			if (error)
849 				goto done;
850 			bp = getblk(vp, lbn, nsize, 0, 0, gbflags);
851 			bp->b_blkno = fsbtodb(fs, newb);
852 			if (flags & BA_CLRBUF)
853 				vfs_bio_clrbuf(bp);
854 			if (DOINGSOFTDEP(vp))
855 				softdep_setup_allocdirect(ip, lbn, newb, 0,
856 				    nsize, 0, bp);
857 		}
858 		dp->di_db[lbn] = dbtofsb(fs, bp->b_blkno);
859 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
860 		*bpp = bp;
861 		error = 0;
862 		goto done;
863 	}
864 	/*
865 	 * Determine the number of levels of indirection.
866 	 */
867 	pref = 0;
868 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
869 		goto done;
870 #ifdef INVARIANTS
871 	if (num < 1)
872 		panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block");
873 #endif
874 	saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH);
875 	/*
876 	 * Fetch the first indirect block allocating if necessary.
877 	 */
878 	--num;
879 	nb = dp->di_ib[indirs[0].in_off];
880 	allocib = NULL;
881 	allocblk = allociblk;
882 	lbns_remfree = lbns;
883 	if (nb == 0) {
884 		UFS_LOCK(ump);
885 		pref = ffs_blkpref_ufs2(ip, lbn, -indirs[0].in_off - 1,
886 		    (ufs2_daddr_t *)0);
887 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
888 		    flags, cred, &newb)) != 0) {
889 			curthread_pflags_restore(saved_inbdflush);
890 			goto done;
891 		}
892 		pref = newb + fs->fs_frag;
893 		nb = newb;
894 		MPASS(allocblk < allociblk + nitems(allociblk));
895 		MPASS(lbns_remfree < lbns + nitems(lbns));
896 		*allocblk++ = nb;
897 		*lbns_remfree++ = indirs[1].in_lbn;
898 		bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0,
899 		    GB_UNMAPPED | gbwflag);
900 		bp->b_blkno = fsbtodb(fs, nb);
901 		vfs_bio_clrbuf(bp);
902 		if (DOINGSOFTDEP(vp)) {
903 			softdep_setup_allocdirect(ip,
904 			    UFS_NDADDR + indirs[0].in_off, newb, 0,
905 			    fs->fs_bsize, 0, bp);
906 			bdwrite(bp);
907 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
908 			if (bp->b_bufsize == fs->fs_bsize)
909 				bp->b_flags |= B_CLUSTEROK;
910 			bdwrite(bp);
911 		} else {
912 			if ((error = bwrite(bp)) != 0)
913 				goto fail;
914 		}
915 		allocib = &dp->di_ib[indirs[0].in_off];
916 		*allocib = nb;
917 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE | IN_IBLKDATA);
918 	}
919 	/*
920 	 * Fetch through the indirect blocks, allocating as necessary.
921 	 */
922 retry:
923 	for (i = 1;;) {
924 		error = bread_gb(vp, indirs[i].in_lbn, (int)fs->fs_bsize,
925 		    NOCRED, gbwflag, &bp);
926 		if (error) {
927 			goto fail;
928 		}
929 		bap = (ufs2_daddr_t *)bp->b_data;
930 		nb = bap[indirs[i].in_off];
931 		if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, nb,
932 		    fs->fs_bsize)) != 0) {
933 			brelse(bp);
934 			goto fail;
935 		}
936 		if (i == num)
937 			break;
938 		i += 1;
939 		if (nb != 0) {
940 			bqrelse(bp);
941 			continue;
942 		}
943 		UFS_LOCK(ump);
944 		/*
945 		 * If parent indirect has just been allocated, try to cluster
946 		 * immediately following it.
947 		 */
948 		if (pref == 0)
949 			pref = ffs_blkpref_ufs2(ip, lbn, i - num - 1,
950 			    (ufs2_daddr_t *)0);
951 		if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
952 		    flags | IO_BUFLOCKED, cred, &newb)) != 0) {
953 			brelse(bp);
954 			UFS_LOCK(ump);
955 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
956 				softdep_request_cleanup(fs, vp, cred,
957 				    FLUSH_BLOCKS_WAIT);
958 				UFS_UNLOCK(ump);
959 				goto retry;
960 			}
961 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
962 			    ppsratecheck(&ump->um_last_fullmsg,
963 			    &ump->um_secs_fullmsg, 1)) {
964 				UFS_UNLOCK(ump);
965 				ffs_fserr(fs, ip->i_number, "filesystem full");
966 				uprintf("\n%s: write failed, filesystem "
967 				    "is full\n", fs->fs_fsmnt);
968 			} else {
969 				UFS_UNLOCK(ump);
970 			}
971 			goto fail;
972 		}
973 		pref = newb + fs->fs_frag;
974 		nb = newb;
975 		MPASS(allocblk < allociblk + nitems(allociblk));
976 		MPASS(lbns_remfree < lbns + nitems(lbns));
977 		*allocblk++ = nb;
978 		*lbns_remfree++ = indirs[i].in_lbn;
979 		nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0,
980 		    GB_UNMAPPED);
981 		nbp->b_blkno = fsbtodb(fs, nb);
982 		vfs_bio_clrbuf(nbp);
983 		if (DOINGSOFTDEP(vp)) {
984 			softdep_setup_allocindir_meta(nbp, ip, bp,
985 			    indirs[i - 1].in_off, nb);
986 			bdwrite(nbp);
987 		} else if ((flags & IO_SYNC) == 0 && DOINGASYNC(vp)) {
988 			if (nbp->b_bufsize == fs->fs_bsize)
989 				nbp->b_flags |= B_CLUSTEROK;
990 			bdwrite(nbp);
991 		} else {
992 			if ((error = bwrite(nbp)) != 0) {
993 				brelse(bp);
994 				goto fail;
995 			}
996 		}
997 		bap[indirs[i - 1].in_off] = nb;
998 		if (allocib == NULL && unwindidx < 0)
999 			unwindidx = i - 1;
1000 		/*
1001 		 * If required, write synchronously, otherwise use
1002 		 * delayed write.
1003 		 */
1004 		if (flags & IO_SYNC) {
1005 			bwrite(bp);
1006 		} else {
1007 			if (bp->b_bufsize == fs->fs_bsize)
1008 				bp->b_flags |= B_CLUSTEROK;
1009 			bdwrite(bp);
1010 		}
1011 	}
1012 	/*
1013 	 * If asked only for the indirect block, then return it.
1014 	 */
1015 	if (flags & BA_METAONLY) {
1016 		curthread_pflags_restore(saved_inbdflush);
1017 		*bpp = bp;
1018 		error = 0;
1019 		goto done;
1020 	}
1021 	/*
1022 	 * Get the data block, allocating if necessary.
1023 	 */
1024 	if (nb == 0) {
1025 		UFS_LOCK(ump);
1026 		/*
1027 		 * If allocating metadata at the front of the cylinder
1028 		 * group and parent indirect block has just been allocated,
1029 		 * then cluster next to it if it is the first indirect in
1030 		 * the file. Otherwise it has been allocated in the metadata
1031 		 * area, so we want to find our own place out in the data area.
1032 		 */
1033 		if (pref == 0 || (lbn > UFS_NDADDR && fs->fs_metaspace != 0))
1034 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i].in_off,
1035 			    &bap[0]);
1036 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
1037 		    flags | IO_BUFLOCKED, cred, &newb);
1038 		if (error) {
1039 			brelse(bp);
1040 			UFS_LOCK(ump);
1041 			if (DOINGSOFTDEP(vp) && ++reclaimed == 1) {
1042 				softdep_request_cleanup(fs, vp, cred,
1043 				    FLUSH_BLOCKS_WAIT);
1044 				UFS_UNLOCK(ump);
1045 				goto retry;
1046 			}
1047 			if (!ffs_fsfail_cleanup_locked(ump, error) &&
1048 			    ppsratecheck(&ump->um_last_fullmsg,
1049 			    &ump->um_secs_fullmsg, 1)) {
1050 				UFS_UNLOCK(ump);
1051 				ffs_fserr(fs, ip->i_number, "filesystem full");
1052 				uprintf("\n%s: write failed, filesystem "
1053 				    "is full\n", fs->fs_fsmnt);
1054 			} else {
1055 				UFS_UNLOCK(ump);
1056 			}
1057 			goto fail;
1058 		}
1059 		nb = newb;
1060 		MPASS(allocblk < allociblk + nitems(allociblk));
1061 		MPASS(lbns_remfree < lbns + nitems(lbns));
1062 		*allocblk++ = nb;
1063 		*lbns_remfree++ = lbn;
1064 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1065 		nbp->b_blkno = fsbtodb(fs, nb);
1066 		if (flags & BA_CLRBUF)
1067 			vfs_bio_clrbuf(nbp);
1068 		if (DOINGSOFTDEP(vp))
1069 			softdep_setup_allocindir_page(ip, lbn, bp,
1070 			    indirs[i].in_off, nb, 0, nbp);
1071 		bap[indirs[i].in_off] = nb;
1072 		/*
1073 		 * If required, write synchronously, otherwise use
1074 		 * delayed write.
1075 		 */
1076 		if (flags & IO_SYNC) {
1077 			bwrite(bp);
1078 		} else {
1079 			if (bp->b_bufsize == fs->fs_bsize)
1080 				bp->b_flags |= B_CLUSTEROK;
1081 			bdwrite(bp);
1082 		}
1083 		curthread_pflags_restore(saved_inbdflush);
1084 		*bpp = nbp;
1085 		error = 0;
1086 		goto done;
1087 	}
1088 	brelse(bp);
1089 	/*
1090 	 * If requested clear invalid portions of the buffer.  If we
1091 	 * have to do a read-before-write (typical if BA_CLRBUF is set),
1092 	 * try to do some read-ahead in the sequential case to reduce
1093 	 * the number of I/O transactions.
1094 	 */
1095 	if (flags & BA_CLRBUF) {
1096 		int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
1097 		if (seqcount != 0 &&
1098 		    (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0 &&
1099 		    !(vm_page_count_severe() || buf_dirty_count_severe())) {
1100 			error = cluster_read(vp, ip->i_size, lbn,
1101 			    (int)fs->fs_bsize, NOCRED,
1102 			    MAXBSIZE, seqcount, gbflags, &nbp);
1103 		} else {
1104 			error = bread_gb(vp, lbn, (int)fs->fs_bsize,
1105 			    NOCRED, gbflags, &nbp);
1106 		}
1107 		if (error) {
1108 			brelse(nbp);
1109 			goto fail;
1110 		}
1111 	} else {
1112 		nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, gbflags);
1113 		nbp->b_blkno = fsbtodb(fs, nb);
1114 	}
1115 	curthread_pflags_restore(saved_inbdflush);
1116 	*bpp = nbp;
1117 	error = 0;
1118 	goto done;
1119 fail:
1120 	curthread_pflags_restore(saved_inbdflush);
1121 	/*
1122 	 * If we have failed to allocate any blocks, simply return the error.
1123 	 * This is the usual case and avoids the need to fsync the file.
1124 	 */
1125 	if (allocblk == allociblk && allocib == NULL && unwindidx == -1)
1126 		goto done;
1127 	/*
1128 	 * If we have failed part way through block allocation, we
1129 	 * have to deallocate any indirect blocks that we have allocated.
1130 	 * We have to fsync the file before we start to get rid of all
1131 	 * of its dependencies so that we do not leave them dangling.
1132 	 * We have to sync it at the end so that the soft updates code
1133 	 * does not find any untracked changes. Although this is really
1134 	 * slow, running out of disk space is not expected to be a common
1135 	 * occurrence. The error return from fsync is ignored as we already
1136 	 * have an error to return to the user.
1137 	 *
1138 	 * XXX Still have to journal the free below
1139 	 */
1140 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1141 	for (deallocated = 0, blkp = allociblk, lbns_remfree = lbns;
1142 	     blkp < allocblk; blkp++, lbns_remfree++) {
1143 		/*
1144 		 * We shall not leave the freed blocks on the vnode
1145 		 * buffer object lists.
1146 		 */
1147 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1148 		    GB_NOCREAT | GB_UNMAPPED | gbwflag);
1149 		if (bp != NULL) {
1150 			KASSERT(bp->b_blkno == fsbtodb(fs, *blkp),
1151 			    ("mismatch2 l %jd %jd b %ju %ju",
1152 			    (intmax_t)bp->b_lblkno, (uintmax_t)*lbns_remfree,
1153 			    (uintmax_t)bp->b_blkno,
1154 			    (uintmax_t)fsbtodb(fs, *blkp)));
1155 			bp->b_flags |= B_INVAL | B_RELBUF | B_NOCACHE;
1156 			bp->b_flags &= ~(B_ASYNC | B_CACHE);
1157 			brelse(bp);
1158 		}
1159 		deallocated += fs->fs_bsize;
1160 	}
1161 	if (allocib != NULL) {
1162 		*allocib = 0;
1163 	} else if (unwindidx >= 0) {
1164 		int r;
1165 
1166 		r = bread_gb(vp, indirs[unwindidx].in_lbn,
1167 		    (int)fs->fs_bsize, NOCRED, gbwflag, &bp);
1168 		if (r) {
1169 			panic("Could not unwind indirect block, error %d", r);
1170 			brelse(bp);
1171 		} else {
1172 			bap = (ufs2_daddr_t *)bp->b_data;
1173 			bap[indirs[unwindidx].in_off] = 0;
1174 			if (flags & IO_SYNC) {
1175 				bwrite(bp);
1176 			} else {
1177 				if (bp->b_bufsize == fs->fs_bsize)
1178 					bp->b_flags |= B_CLUSTEROK;
1179 				bdwrite(bp);
1180 			}
1181 		}
1182 	}
1183 	if (deallocated) {
1184 #ifdef QUOTA
1185 		/*
1186 		 * Restore user's disk quota because allocation failed.
1187 		 */
1188 		(void) chkdq(ip, -btodb(deallocated), cred, FORCE);
1189 #endif
1190 		dp->di_blocks -= btodb(deallocated);
1191 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
1192 	}
1193 	(void) ffs_syncvnode(vp, MNT_WAIT, 0);
1194 	/*
1195 	 * After the buffers are invalidated and on-disk pointers are
1196 	 * cleared, free the blocks.
1197 	 */
1198 	for (blkp = allociblk; blkp < allocblk; blkp++) {
1199 #ifdef INVARIANTS
1200 		if (blkp == allociblk)
1201 			lbns_remfree = lbns;
1202 		bp = getblk(vp, *lbns_remfree, fs->fs_bsize, 0, 0,
1203 		    GB_NOCREAT | GB_UNMAPPED | gbwflag);
1204 		if (bp != NULL) {
1205 			panic("zombie2 %jd %ju %ju",
1206 			    (intmax_t)bp->b_lblkno, (uintmax_t)bp->b_blkno,
1207 			    (uintmax_t)fsbtodb(fs, *blkp));
1208 		}
1209 		lbns_remfree++;
1210 #endif
1211 		ffs_blkfree(ump, fs, ump->um_devvp, *blkp, fs->fs_bsize,
1212 		    ip->i_number, vp->v_type, NULL, SINGLETON_KEY);
1213 	}
1214 done:
1215 	vn_seqc_write_end(vp);
1216 	return (error);
1217 }
1218