1 /*	$NetBSD: ffs_balloc.c,v 1.61 2015/03/28 19:24:04 maxv Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Networks Associates Technology, Inc.
5  * All rights reserved.
6  *
7  * This software was developed for the FreeBSD Project by Marshall
8  * Kirk McKusick and Network Associates Laboratories, the Security
9  * Research Division of Network Associates, Inc. under DARPA/SPAWAR
10  * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS
11  * research program
12  *
13  * Copyright (c) 1982, 1986, 1989, 1993
14  *	The Regents of the University of California.  All rights reserved.
15  *
16  * Redistribution and use in source and binary forms, with or without
17  * modification, are permitted provided that the following conditions
18  * are met:
19  * 1. Redistributions of source code must retain the above copyright
20  *    notice, this list of conditions and the following disclaimer.
21  * 2. Redistributions in binary form must reproduce the above copyright
22  *    notice, this list of conditions and the following disclaimer in the
23  *    documentation and/or other materials provided with the distribution.
24  * 3. Neither the name of the University nor the names of its contributors
25  *    may be used to endorse or promote products derived from this software
26  *    without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38  * SUCH DAMAGE.
39  *
40  *	@(#)ffs_balloc.c	8.8 (Berkeley) 6/16/95
41  */
42 
43 #include <sys/cdefs.h>
44 __KERNEL_RCSID(0, "$NetBSD: ffs_balloc.c,v 1.61 2015/03/28 19:24:04 maxv Exp $");
45 
46 #if defined(_KERNEL_OPT)
47 #include "opt_quota.h"
48 #endif
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/buf.h>
53 #include <sys/file.h>
54 #include <sys/mount.h>
55 #include <sys/vnode.h>
56 #include <sys/kauth.h>
57 #include <sys/fstrans.h>
58 
59 #include <ufs/ufs/quota.h>
60 #include <ufs/ufs/ufsmount.h>
61 #include <ufs/ufs/inode.h>
62 #include <ufs/ufs/ufs_extern.h>
63 #include <ufs/ufs/ufs_bswap.h>
64 
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
67 
68 #include <uvm/uvm.h>
69 
70 static int ffs_balloc_ufs1(struct vnode *, off_t, int, kauth_cred_t, int,
71     struct buf **);
72 static int ffs_balloc_ufs2(struct vnode *, off_t, int, kauth_cred_t, int,
73     struct buf **);
74 
75 /*
76  * Balloc defines the structure of file system storage
77  * by allocating the physical blocks on a device given
78  * the inode and the logical block number in a file.
79  */
80 
81 int
ffs_balloc(struct vnode * vp,off_t off,int size,kauth_cred_t cred,int flags,struct buf ** bpp)82 ffs_balloc(struct vnode *vp, off_t off, int size, kauth_cred_t cred, int flags,
83     struct buf **bpp)
84 {
85 	int error;
86 
87 	if (VTOI(vp)->i_fs->fs_magic == FS_UFS2_MAGIC)
88 		error = ffs_balloc_ufs2(vp, off, size, cred, flags, bpp);
89 	else
90 		error = ffs_balloc_ufs1(vp, off, size, cred, flags, bpp);
91 
92 	if (error == 0 && bpp != NULL && (error = fscow_run(*bpp, false)) != 0)
93 		brelse(*bpp, 0);
94 
95 	return error;
96 }
97 
98 static int
ffs_balloc_ufs1(struct vnode * vp,off_t off,int size,kauth_cred_t cred,int flags,struct buf ** bpp)99 ffs_balloc_ufs1(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
100     int flags, struct buf **bpp)
101 {
102 	daddr_t lbn, lastlbn;
103 	struct buf *bp, *nbp;
104 	struct inode *ip = VTOI(vp);
105 	struct fs *fs = ip->i_fs;
106 	struct ufsmount *ump = ip->i_ump;
107 	struct indir indirs[UFS_NIADDR + 2];
108 	daddr_t newb, pref, nb;
109 	int32_t *bap;	/* XXX ondisk32 */
110 	int deallocated, osize, nsize, num, i, error;
111 	int32_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
112 	int32_t *allocib;
113 	int unwindidx = -1;
114 	const int needswap = UFS_FSNEEDSWAP(fs);
115 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
116 
117 	lbn = ffs_lblkno(fs, off);
118 	size = ffs_blkoff(fs, off) + size;
119 	if (size > fs->fs_bsize)
120 		panic("ffs_balloc: blk too big");
121 	if (bpp != NULL) {
122 		*bpp = NULL;
123 	}
124 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
125 
126 	if (lbn < 0)
127 		return (EFBIG);
128 
129 	/*
130 	 * If the next write will extend the file into a new block,
131 	 * and the file is currently composed of a fragment
132 	 * this fragment has to be extended to be a full block.
133 	 */
134 
135 	lastlbn = ffs_lblkno(fs, ip->i_size);
136 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
137 		nb = lastlbn;
138 		osize = ffs_blksize(fs, ip, nb);
139 		if (osize < fs->fs_bsize && osize > 0) {
140 			mutex_enter(&ump->um_lock);
141 			error = ffs_realloccg(ip, nb,
142 				    ffs_blkpref_ufs1(ip, lastlbn, nb, flags,
143 					&ip->i_ffs1_db[0]),
144 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
145 			if (error)
146 				return (error);
147 			ip->i_size = ffs_lblktosize(fs, nb + 1);
148 			ip->i_ffs1_size = ip->i_size;
149 			uvm_vnp_setsize(vp, ip->i_ffs1_size);
150 			ip->i_ffs1_db[nb] = ufs_rw32((u_int32_t)newb, needswap);
151 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
152 			if (bpp && *bpp) {
153 				if (flags & B_SYNC)
154 					bwrite(*bpp);
155 				else
156 					bawrite(*bpp);
157 			}
158 		}
159 	}
160 
161 	/*
162 	 * The first UFS_NDADDR blocks are direct blocks
163 	 */
164 
165 	if (lbn < UFS_NDADDR) {
166 		nb = ufs_rw32(ip->i_ffs1_db[lbn], needswap);
167 		if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
168 
169 			/*
170 			 * The block is an already-allocated direct block
171 			 * and the file already extends past this block,
172 			 * thus this must be a whole block.
173 			 * Just read the block (if requested).
174 			 */
175 
176 			if (bpp != NULL) {
177 				error = bread(vp, lbn, fs->fs_bsize,
178 					      B_MODIFY, bpp);
179 				if (error) {
180 					return (error);
181 				}
182 			}
183 			return (0);
184 		}
185 		if (nb != 0) {
186 
187 			/*
188 			 * Consider need to reallocate a fragment.
189 			 */
190 
191 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
192 			nsize = ffs_fragroundup(fs, size);
193 			if (nsize <= osize) {
194 
195 				/*
196 				 * The existing block is already
197 				 * at least as big as we want.
198 				 * Just read the block (if requested).
199 				 */
200 
201 				if (bpp != NULL) {
202 					error = bread(vp, lbn, osize,
203 						      B_MODIFY, bpp);
204 					if (error) {
205 						return (error);
206 					}
207 				}
208 				return 0;
209 			} else {
210 
211 				/*
212 				 * The existing block is smaller than we want,
213 				 * grow it.
214 				 */
215 				mutex_enter(&ump->um_lock);
216 				error = ffs_realloccg(ip, lbn,
217 				    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
218 					&ip->i_ffs1_db[0]),
219 				    osize, nsize, cred, bpp, &newb);
220 				if (error)
221 					return (error);
222 			}
223 		} else {
224 
225 			/*
226 			 * the block was not previously allocated,
227 			 * allocate a new block or fragment.
228 			 */
229 
230 			if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
231 				nsize = ffs_fragroundup(fs, size);
232 			else
233 				nsize = fs->fs_bsize;
234 			mutex_enter(&ump->um_lock);
235 			error = ffs_alloc(ip, lbn,
236 			    ffs_blkpref_ufs1(ip, lbn, (int)lbn, flags,
237 				&ip->i_ffs1_db[0]),
238 			    nsize, flags, cred, &newb);
239 			if (error)
240 				return (error);
241 			if (bpp != NULL) {
242 				error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
243 				    nsize, (flags & B_CLRBUF) != 0, bpp);
244 				if (error)
245 					return error;
246 			}
247 		}
248 		ip->i_ffs1_db[lbn] = ufs_rw32((u_int32_t)newb, needswap);
249 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
250 		return (0);
251 	}
252 
253 	/*
254 	 * Determine the number of levels of indirection.
255 	 */
256 
257 	pref = 0;
258 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
259 		return (error);
260 
261 	/*
262 	 * Fetch the first indirect block allocating if necessary.
263 	 */
264 
265 	--num;
266 	nb = ufs_rw32(ip->i_ffs1_ib[indirs[0].in_off], needswap);
267 	allocib = NULL;
268 	allocblk = allociblk;
269 	if (nb == 0) {
270 		mutex_enter(&ump->um_lock);
271 		pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY, NULL);
272 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
273 		    flags | B_METAONLY, cred, &newb);
274 		if (error)
275 			goto fail;
276 		nb = newb;
277 		*allocblk++ = nb;
278 		error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
279 		    fs->fs_bsize, true, &bp);
280 		if (error)
281 			goto fail;
282 		/*
283 		 * Write synchronously so that indirect blocks
284 		 * never point at garbage.
285 		 */
286 		if ((error = bwrite(bp)) != 0)
287 			goto fail;
288 		unwindidx = 0;
289 		allocib = &ip->i_ffs1_ib[indirs[0].in_off];
290 		*allocib = ufs_rw32(nb, needswap);
291 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
292 	}
293 
294 	/*
295 	 * Fetch through the indirect blocks, allocating as necessary.
296 	 */
297 
298 	for (i = 1;;) {
299 		error = bread(vp,
300 		    indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
301 		if (error) {
302 			goto fail;
303 		}
304 		bap = (int32_t *)bp->b_data;	/* XXX ondisk32 */
305 		nb = ufs_rw32(bap[indirs[i].in_off], needswap);
306 		if (i == num)
307 			break;
308 		i++;
309 		if (nb != 0) {
310 			brelse(bp, 0);
311 			continue;
312 		}
313 		if (fscow_run(bp, true) != 0) {
314 			brelse(bp, 0);
315 			goto fail;
316 		}
317 		mutex_enter(&ump->um_lock);
318 		/* Try to keep snapshot indirect blocks contiguous. */
319 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
320 			pref = ffs_blkpref_ufs1(ip, lbn, indirs[i-1].in_off,
321 			    flags | B_METAONLY, &bap[0]);
322 		if (pref == 0)
323 			pref = ffs_blkpref_ufs1(ip, lbn, 0, flags | B_METAONLY,
324 			    NULL);
325 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
326 		    flags | B_METAONLY, cred, &newb);
327 		if (error) {
328 			brelse(bp, 0);
329 			goto fail;
330 		}
331 		nb = newb;
332 		*allocblk++ = nb;
333 		error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
334 		    fs->fs_bsize, true, &nbp);
335 		if (error) {
336 			brelse(bp, 0);
337 			goto fail;
338 		}
339 		/*
340 		 * Write synchronously so that indirect blocks
341 		 * never point at garbage.
342 		 */
343 		if ((error = bwrite(nbp)) != 0) {
344 			brelse(bp, 0);
345 			goto fail;
346 		}
347 		if (unwindidx < 0)
348 			unwindidx = i - 1;
349 		bap[indirs[i - 1].in_off] = ufs_rw32(nb, needswap);
350 
351 		/*
352 		 * If required, write synchronously, otherwise use
353 		 * delayed write.
354 		 */
355 
356 		if (flags & B_SYNC) {
357 			bwrite(bp);
358 		} else {
359 			bdwrite(bp);
360 		}
361 	}
362 
363 	if (flags & B_METAONLY) {
364 		KASSERT(bpp != NULL);
365 		*bpp = bp;
366 		return (0);
367 	}
368 
369 	/*
370 	 * Get the data block, allocating if necessary.
371 	 */
372 
373 	if (nb == 0) {
374 		if (fscow_run(bp, true) != 0) {
375 			brelse(bp, 0);
376 			goto fail;
377 		}
378 		mutex_enter(&ump->um_lock);
379 		pref = ffs_blkpref_ufs1(ip, lbn, indirs[num].in_off, flags,
380 		    &bap[0]);
381 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
382 		    &newb);
383 		if (error) {
384 			brelse(bp, 0);
385 			goto fail;
386 		}
387 		nb = newb;
388 		*allocblk++ = nb;
389 		if (bpp != NULL) {
390 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
391 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
392 			if (error) {
393 				brelse(bp, 0);
394 				goto fail;
395 			}
396 		}
397 		bap[indirs[num].in_off] = ufs_rw32(nb, needswap);
398 		if (allocib == NULL && unwindidx < 0) {
399 			unwindidx = i - 1;
400 		}
401 
402 		/*
403 		 * If required, write synchronously, otherwise use
404 		 * delayed write.
405 		 */
406 
407 		if (flags & B_SYNC) {
408 			bwrite(bp);
409 		} else {
410 			bdwrite(bp);
411 		}
412 		return (0);
413 	}
414 	brelse(bp, 0);
415 	if (bpp != NULL) {
416 		if (flags & B_CLRBUF) {
417 			error = bread(vp, lbn, (int)fs->fs_bsize,
418 			    B_MODIFY, &nbp);
419 			if (error) {
420 				goto fail;
421 			}
422 		} else {
423 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
424 			    fs->fs_bsize, true, &nbp);
425 			if (error)
426 				goto fail;
427 		}
428 		*bpp = nbp;
429 	}
430 	return (0);
431 
432 fail:
433 	/*
434 	 * If we have failed part way through block allocation, we
435 	 * have to deallocate any indirect blocks that we have allocated.
436 	 */
437 
438 	if (unwindidx >= 0) {
439 
440 		/*
441 		 * First write out any buffers we've created to resolve their
442 		 * softdeps.  This must be done in reverse order of creation
443 		 * so that we resolve the dependencies in one pass.
444 		 * Write the cylinder group buffers for these buffers too.
445 		 */
446 
447 		for (i = num; i >= unwindidx; i--) {
448 			if (i == 0) {
449 				break;
450 			}
451 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
452 			    fs->fs_bsize, false, &bp) != 0)
453 				continue;
454 			if (bp->b_oflags & BO_DELWRI) {
455 				nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
456 				    FFS_DBTOFSB(fs, bp->b_blkno))));
457 				bwrite(bp);
458 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
459 				    fs->fs_cgsize, false, &bp) != 0)
460 					continue;
461 				if (bp->b_oflags & BO_DELWRI) {
462 					bwrite(bp);
463 				} else {
464 					brelse(bp, BC_INVAL);
465 				}
466 			} else {
467 				brelse(bp, BC_INVAL);
468 			}
469 		}
470 
471 		/*
472 		 * Undo the partial allocation.
473 		 */
474 		if (unwindidx == 0) {
475 			*allocib = 0;
476 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
477 		} else {
478 			int r;
479 
480 			r = bread(vp, indirs[unwindidx].in_lbn,
481 			    (int)fs->fs_bsize, 0, &bp);
482 			if (r) {
483 				panic("Could not unwind indirect block, error %d", r);
484 			} else {
485 				bap = (int32_t *)bp->b_data; /* XXX ondisk32 */
486 				bap[indirs[unwindidx].in_off] = 0;
487 				bwrite(bp);
488 			}
489 		}
490 		for (i = unwindidx + 1; i <= num; i++) {
491 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
492 			    fs->fs_bsize, false, &bp) == 0)
493 				brelse(bp, BC_INVAL);
494 		}
495 	}
496 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
497 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
498 		deallocated += fs->fs_bsize;
499 	}
500 	if (deallocated) {
501 #if defined(QUOTA) || defined(QUOTA2)
502 		/*
503 		 * Restore user's disk quota because allocation failed.
504 		 */
505 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
506 #endif
507 		ip->i_ffs1_blocks -= btodb(deallocated);
508 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
509 	}
510 	return (error);
511 }
512 
513 static int
ffs_balloc_ufs2(struct vnode * vp,off_t off,int size,kauth_cred_t cred,int flags,struct buf ** bpp)514 ffs_balloc_ufs2(struct vnode *vp, off_t off, int size, kauth_cred_t cred,
515     int flags, struct buf **bpp)
516 {
517 	daddr_t lbn, lastlbn;
518 	struct buf *bp, *nbp;
519 	struct inode *ip = VTOI(vp);
520 	struct fs *fs = ip->i_fs;
521 	struct ufsmount *ump = ip->i_ump;
522 	struct indir indirs[UFS_NIADDR + 2];
523 	daddr_t newb, pref, nb;
524 	int64_t *bap;
525 	int deallocated, osize, nsize, num, i, error;
526 	daddr_t *blkp, *allocblk, allociblk[UFS_NIADDR + 1];
527 	int64_t *allocib;
528 	int unwindidx = -1;
529 	const int needswap = UFS_FSNEEDSWAP(fs);
530 	UVMHIST_FUNC("ffs_balloc"); UVMHIST_CALLED(ubchist);
531 
532 	lbn = ffs_lblkno(fs, off);
533 	size = ffs_blkoff(fs, off) + size;
534 	if (size > fs->fs_bsize)
535 		panic("ffs_balloc: blk too big");
536 	if (bpp != NULL) {
537 		*bpp = NULL;
538 	}
539 	UVMHIST_LOG(ubchist, "vp %p lbn 0x%x size 0x%x", vp, lbn, size,0);
540 
541 	if (lbn < 0)
542 		return (EFBIG);
543 
544 #ifdef notyet
545 	/*
546 	 * Check for allocating external data.
547 	 */
548 	if (flags & IO_EXT) {
549 		if (lbn >= UFS_NXADDR)
550 			return (EFBIG);
551 		/*
552 		 * If the next write will extend the data into a new block,
553 		 * and the data is currently composed of a fragment
554 		 * this fragment has to be extended to be a full block.
555 		 */
556 		lastlbn = ffs_lblkno(fs, dp->di_extsize);
557 		if (lastlbn < lbn) {
558 			nb = lastlbn;
559 			osize = ffs_sblksize(fs, dp->di_extsize, nb);
560 			if (osize < fs->fs_bsize && osize > 0) {
561 				mutex_enter(&ump->um_lock);
562 				error = ffs_realloccg(ip, -1 - nb,
563 				    dp->di_extb[nb],
564 				    ffs_blkpref_ufs2(ip, lastlbn, (int)nb,
565 					flags, &dp->di_extb[0]),
566 				    osize,
567 				    (int)fs->fs_bsize, cred, &bp);
568 				if (error)
569 					return (error);
570 				dp->di_extsize = smalllblktosize(fs, nb + 1);
571 				dp->di_extb[nb] = FFS_DBTOFSB(fs, bp->b_blkno);
572 				bp->b_xflags |= BX_ALTDATA;
573 				ip->i_flag |= IN_CHANGE | IN_UPDATE;
574 				if (flags & IO_SYNC)
575 					bwrite(bp);
576 				else
577 					bawrite(bp);
578 			}
579 		}
580 		/*
581 		 * All blocks are direct blocks
582 		 */
583 		if (flags & BA_METAONLY)
584 			panic("ffs_balloc_ufs2: BA_METAONLY for ext block");
585 		nb = dp->di_extb[lbn];
586 		if (nb != 0 && dp->di_extsize >= smalllblktosize(fs, lbn + 1)) {
587 			error = bread(vp, -1 - lbn, fs->fs_bsize,
588 			    0, &bp);
589 			if (error) {
590 				return (error);
591 			}
592 			mutex_enter(&bp->b_interlock);
593 			bp->b_blkno = FFS_FSBTODB(fs, nb);
594 			bp->b_xflags |= BX_ALTDATA;
595 			mutex_exit(&bp->b_interlock);
596 			*bpp = bp;
597 			return (0);
598 		}
599 		if (nb != 0) {
600 			/*
601 			 * Consider need to reallocate a fragment.
602 			 */
603 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, dp->di_extsize));
604 			nsize = ffs_fragroundup(fs, size);
605 			if (nsize <= osize) {
606 				error = bread(vp, -1 - lbn, osize,
607 				    0, &bp);
608 				if (error) {
609 					return (error);
610 				}
611 				mutex_enter(&bp->b_interlock);
612 				bp->b_blkno = FFS_FSBTODB(fs, nb);
613 				bp->b_xflags |= BX_ALTDATA;
614 				mutex_exit(&bp->b_interlock);
615 			} else {
616 				mutex_enter(&ump->um_lock);
617 				error = ffs_realloccg(ip, -1 - lbn,
618 				    dp->di_extb[lbn],
619 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
620 				        &dp->di_extb[0]),
621 				    osize, nsize, cred, &bp);
622 				if (error)
623 					return (error);
624 				bp->b_xflags |= BX_ALTDATA;
625 			}
626 		} else {
627 			if (dp->di_extsize < smalllblktosize(fs, lbn + 1))
628 				nsize = ffs_fragroundup(fs, size);
629 			else
630 				nsize = fs->fs_bsize;
631 			mutex_enter(&ump->um_lock);
632 			error = ffs_alloc(ip, lbn,
633 			   ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
634 			       &dp->di_extb[0]),
635 			   nsize, flags, cred, &newb);
636 			if (error)
637 				return (error);
638 			error = ffs_getblk(vp, -1 - lbn, FFS_FSBTODB(fs, newb),
639 			    nsize, (flags & BA_CLRBUF) != 0, &bp);
640 			if (error)
641 				return error;
642 			bp->b_xflags |= BX_ALTDATA;
643 		}
644 		dp->di_extb[lbn] = FFS_DBTOFSB(fs, bp->b_blkno);
645 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
646 		*bpp = bp;
647 		return (0);
648 	}
649 #endif
650 	/*
651 	 * If the next write will extend the file into a new block,
652 	 * and the file is currently composed of a fragment
653 	 * this fragment has to be extended to be a full block.
654 	 */
655 
656 	lastlbn = ffs_lblkno(fs, ip->i_size);
657 	if (lastlbn < UFS_NDADDR && lastlbn < lbn) {
658 		nb = lastlbn;
659 		osize = ffs_blksize(fs, ip, nb);
660 		if (osize < fs->fs_bsize && osize > 0) {
661 			mutex_enter(&ump->um_lock);
662 			error = ffs_realloccg(ip, nb,
663 				    ffs_blkpref_ufs2(ip, lastlbn, nb, flags,
664 					&ip->i_ffs2_db[0]),
665 				    osize, (int)fs->fs_bsize, cred, bpp, &newb);
666 			if (error)
667 				return (error);
668 			ip->i_size = ffs_lblktosize(fs, nb + 1);
669 			ip->i_ffs2_size = ip->i_size;
670 			uvm_vnp_setsize(vp, ip->i_size);
671 			ip->i_ffs2_db[nb] = ufs_rw64(newb, needswap);
672 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
673 			if (bpp) {
674 				if (flags & B_SYNC)
675 					bwrite(*bpp);
676 				else
677 					bawrite(*bpp);
678 			}
679 		}
680 	}
681 
682 	/*
683 	 * The first UFS_NDADDR blocks are direct blocks
684 	 */
685 
686 	if (lbn < UFS_NDADDR) {
687 		nb = ufs_rw64(ip->i_ffs2_db[lbn], needswap);
688 		if (nb != 0 && ip->i_size >= ffs_lblktosize(fs, lbn + 1)) {
689 
690 			/*
691 			 * The block is an already-allocated direct block
692 			 * and the file already extends past this block,
693 			 * thus this must be a whole block.
694 			 * Just read the block (if requested).
695 			 */
696 
697 			if (bpp != NULL) {
698 				error = bread(vp, lbn, fs->fs_bsize,
699 					      B_MODIFY, bpp);
700 				if (error) {
701 					return (error);
702 				}
703 			}
704 			return (0);
705 		}
706 		if (nb != 0) {
707 
708 			/*
709 			 * Consider need to reallocate a fragment.
710 			 */
711 
712 			osize = ffs_fragroundup(fs, ffs_blkoff(fs, ip->i_size));
713 			nsize = ffs_fragroundup(fs, size);
714 			if (nsize <= osize) {
715 
716 				/*
717 				 * The existing block is already
718 				 * at least as big as we want.
719 				 * Just read the block (if requested).
720 				 */
721 
722 				if (bpp != NULL) {
723 					error = bread(vp, lbn, osize,
724 						      B_MODIFY, bpp);
725 					if (error) {
726 						return (error);
727 					}
728 				}
729 				return 0;
730 			} else {
731 
732 				/*
733 				 * The existing block is smaller than we want,
734 				 * grow it.
735 				 */
736 				mutex_enter(&ump->um_lock);
737 				error = ffs_realloccg(ip, lbn,
738 				    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
739 					&ip->i_ffs2_db[0]),
740 				    osize, nsize, cred, bpp, &newb);
741 				if (error)
742 					return (error);
743 			}
744 		} else {
745 
746 			/*
747 			 * the block was not previously allocated,
748 			 * allocate a new block or fragment.
749 			 */
750 
751 			if (ip->i_size < ffs_lblktosize(fs, lbn + 1))
752 				nsize = ffs_fragroundup(fs, size);
753 			else
754 				nsize = fs->fs_bsize;
755 			mutex_enter(&ump->um_lock);
756 			error = ffs_alloc(ip, lbn,
757 			    ffs_blkpref_ufs2(ip, lbn, (int)lbn, flags,
758 				&ip->i_ffs2_db[0]),
759 			    nsize, flags, cred, &newb);
760 			if (error)
761 				return (error);
762 			if (bpp != NULL) {
763 				error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, newb),
764 				    nsize, (flags & B_CLRBUF) != 0, bpp);
765 				if (error)
766 					return error;
767 			}
768 		}
769 		ip->i_ffs2_db[lbn] = ufs_rw64(newb, needswap);
770 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
771 		return (0);
772 	}
773 
774 	/*
775 	 * Determine the number of levels of indirection.
776 	 */
777 
778 	pref = 0;
779 	if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0)
780 		return (error);
781 
782 	/*
783 	 * Fetch the first indirect block allocating if necessary.
784 	 */
785 
786 	--num;
787 	nb = ufs_rw64(ip->i_ffs2_ib[indirs[0].in_off], needswap);
788 	allocib = NULL;
789 	allocblk = allociblk;
790 	if (nb == 0) {
791 		mutex_enter(&ump->um_lock);
792 		pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY, NULL);
793 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
794 		    flags | B_METAONLY, cred, &newb);
795 		if (error)
796 			goto fail;
797 		nb = newb;
798 		*allocblk++ = nb;
799 		error = ffs_getblk(vp, indirs[1].in_lbn, FFS_FSBTODB(fs, nb),
800 		    fs->fs_bsize, true, &bp);
801 		if (error)
802 			goto fail;
803 		/*
804 		 * Write synchronously so that indirect blocks
805 		 * never point at garbage.
806 		 */
807 		if ((error = bwrite(bp)) != 0)
808 			goto fail;
809 		unwindidx = 0;
810 		allocib = &ip->i_ffs2_ib[indirs[0].in_off];
811 		*allocib = ufs_rw64(nb, needswap);
812 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
813 	}
814 
815 	/*
816 	 * Fetch through the indirect blocks, allocating as necessary.
817 	 */
818 
819 	for (i = 1;;) {
820 		error = bread(vp,
821 		    indirs[i].in_lbn, (int)fs->fs_bsize, 0, &bp);
822 		if (error) {
823 			goto fail;
824 		}
825 		bap = (int64_t *)bp->b_data;
826 		nb = ufs_rw64(bap[indirs[i].in_off], needswap);
827 		if (i == num)
828 			break;
829 		i++;
830 		if (nb != 0) {
831 			brelse(bp, 0);
832 			continue;
833 		}
834 		if (fscow_run(bp, true) != 0) {
835 			brelse(bp, 0);
836 			goto fail;
837 		}
838 		mutex_enter(&ump->um_lock);
839 		/* Try to keep snapshot indirect blocks contiguous. */
840 		if (i == num && (ip->i_flags & SF_SNAPSHOT) != 0)
841 			pref = ffs_blkpref_ufs2(ip, lbn, indirs[i-1].in_off,
842 			    flags | B_METAONLY, &bap[0]);
843 		if (pref == 0)
844 			pref = ffs_blkpref_ufs2(ip, lbn, 0, flags | B_METAONLY,
845 			    NULL);
846 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize,
847 		    flags | B_METAONLY, cred, &newb);
848 		if (error) {
849 			brelse(bp, 0);
850 			goto fail;
851 		}
852 		nb = newb;
853 		*allocblk++ = nb;
854 		error = ffs_getblk(vp, indirs[i].in_lbn, FFS_FSBTODB(fs, nb),
855 		    fs->fs_bsize, true, &nbp);
856 		if (error) {
857 			brelse(bp, 0);
858 			goto fail;
859 		}
860 		/*
861 		 * Write synchronously so that indirect blocks
862 		 * never point at garbage.
863 		 */
864 		if ((error = bwrite(nbp)) != 0) {
865 			brelse(bp, 0);
866 			goto fail;
867 		}
868 		if (unwindidx < 0)
869 			unwindidx = i - 1;
870 		bap[indirs[i - 1].in_off] = ufs_rw64(nb, needswap);
871 
872 		/*
873 		 * If required, write synchronously, otherwise use
874 		 * delayed write.
875 		 */
876 
877 		if (flags & B_SYNC) {
878 			bwrite(bp);
879 		} else {
880 			bdwrite(bp);
881 		}
882 	}
883 
884 	if (flags & B_METAONLY) {
885 		KASSERT(bpp != NULL);
886 		*bpp = bp;
887 		return (0);
888 	}
889 
890 	/*
891 	 * Get the data block, allocating if necessary.
892 	 */
893 
894 	if (nb == 0) {
895 		if (fscow_run(bp, true) != 0) {
896 			brelse(bp, 0);
897 			goto fail;
898 		}
899 		mutex_enter(&ump->um_lock);
900 		pref = ffs_blkpref_ufs2(ip, lbn, indirs[num].in_off, flags,
901 		    &bap[0]);
902 		error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred,
903 		    &newb);
904 		if (error) {
905 			brelse(bp, 0);
906 			goto fail;
907 		}
908 		nb = newb;
909 		*allocblk++ = nb;
910 		if (bpp != NULL) {
911 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
912 			    fs->fs_bsize, (flags & B_CLRBUF) != 0, bpp);
913 			if (error) {
914 				brelse(bp, 0);
915 				goto fail;
916 			}
917 		}
918 		bap[indirs[num].in_off] = ufs_rw64(nb, needswap);
919 		if (allocib == NULL && unwindidx < 0) {
920 			unwindidx = i - 1;
921 		}
922 
923 		/*
924 		 * If required, write synchronously, otherwise use
925 		 * delayed write.
926 		 */
927 
928 		if (flags & B_SYNC) {
929 			bwrite(bp);
930 		} else {
931 			bdwrite(bp);
932 		}
933 		return (0);
934 	}
935 	brelse(bp, 0);
936 	if (bpp != NULL) {
937 		if (flags & B_CLRBUF) {
938 			error = bread(vp, lbn, (int)fs->fs_bsize,
939 			    B_MODIFY, &nbp);
940 			if (error) {
941 				goto fail;
942 			}
943 		} else {
944 			error = ffs_getblk(vp, lbn, FFS_FSBTODB(fs, nb),
945 			    fs->fs_bsize, true, &nbp);
946 			if (error)
947 				goto fail;
948 		}
949 		*bpp = nbp;
950 	}
951 	return (0);
952 
953 fail:
954 	/*
955 	 * If we have failed part way through block allocation, we
956 	 * have to deallocate any indirect blocks that we have allocated.
957 	 */
958 
959 	if (unwindidx >= 0) {
960 
961 		/*
962 		 * First write out any buffers we've created to resolve their
963 		 * softdeps.  This must be done in reverse order of creation
964 		 * so that we resolve the dependencies in one pass.
965 		 * Write the cylinder group buffers for these buffers too.
966 		 */
967 
968 		for (i = num; i >= unwindidx; i--) {
969 			if (i == 0) {
970 				break;
971 			}
972 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
973 			    fs->fs_bsize, false, &bp) != 0)
974 				continue;
975 			if (bp->b_oflags & BO_DELWRI) {
976 				nb = FFS_FSBTODB(fs, cgtod(fs, dtog(fs,
977 				    FFS_DBTOFSB(fs, bp->b_blkno))));
978 				bwrite(bp);
979 				if (ffs_getblk(ip->i_devvp, nb, FFS_NOBLK,
980 				    fs->fs_cgsize, false, &bp) != 0)
981 					continue;
982 				if (bp->b_oflags & BO_DELWRI) {
983 					bwrite(bp);
984 				} else {
985 					brelse(bp, BC_INVAL);
986 				}
987 			} else {
988 				brelse(bp, BC_INVAL);
989 			}
990 		}
991 
992 		/*
993 		 * Now that any dependencies that we created have been
994 		 * resolved, we can undo the partial allocation.
995 		 */
996 
997 		if (unwindidx == 0) {
998 			*allocib = 0;
999 			ip->i_flag |= IN_CHANGE | IN_UPDATE;
1000 		} else {
1001 			int r;
1002 
1003 			r = bread(vp, indirs[unwindidx].in_lbn,
1004 			    (int)fs->fs_bsize, 0, &bp);
1005 			if (r) {
1006 				panic("Could not unwind indirect block, error %d", r);
1007 			} else {
1008 				bap = (int64_t *)bp->b_data;
1009 				bap[indirs[unwindidx].in_off] = 0;
1010 				bwrite(bp);
1011 			}
1012 		}
1013 		for (i = unwindidx + 1; i <= num; i++) {
1014 			if (ffs_getblk(vp, indirs[i].in_lbn, FFS_NOBLK,
1015 			    fs->fs_bsize, false, &bp) == 0)
1016 				brelse(bp, BC_INVAL);
1017 		}
1018 	}
1019 	for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) {
1020 		ffs_blkfree(fs, ip->i_devvp, *blkp, fs->fs_bsize, ip->i_number);
1021 		deallocated += fs->fs_bsize;
1022 	}
1023 	if (deallocated) {
1024 #if defined(QUOTA) || defined(QUOTA2)
1025 		/*
1026 		 * Restore user's disk quota because allocation failed.
1027 		 */
1028 		(void)chkdq(ip, -btodb(deallocated), cred, FORCE);
1029 #endif
1030 		ip->i_ffs2_blocks -= btodb(deallocated);
1031 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
1032 	}
1033 
1034 	return (error);
1035 }
1036