xref: /freebsd/sys/ufs/ffs/ffs_inode.c (revision 61e21613)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #include "opt_ufs.h"
34 #include "opt_quota.h"
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/bio.h>
39 #include <sys/buf.h>
40 #include <sys/malloc.h>
41 #include <sys/mount.h>
42 #include <sys/proc.h>
43 #include <sys/racct.h>
44 #include <sys/random.h>
45 #include <sys/resourcevar.h>
46 #include <sys/rwlock.h>
47 #include <sys/stat.h>
48 #include <sys/vmmeter.h>
49 #include <sys/vnode.h>
50 
51 #include <vm/vm.h>
52 #include <vm/vm_extern.h>
53 #include <vm/vm_object.h>
54 
55 #include <ufs/ufs/extattr.h>
56 #include <ufs/ufs/quota.h>
57 #include <ufs/ufs/ufsmount.h>
58 #include <ufs/ufs/inode.h>
59 #include <ufs/ufs/dir.h>
60 #ifdef UFS_DIRHASH
61 #include <ufs/ufs/dirhash.h>
62 #endif
63 #include <ufs/ufs/ufs_extern.h>
64 
65 #include <ufs/ffs/fs.h>
66 #include <ufs/ffs/ffs_extern.h>
67 
68 static int ffs_indirtrunc(struct inode *, ufs2_daddr_t, ufs2_daddr_t,
69 	    ufs2_daddr_t, int, ufs2_daddr_t *);
70 
71 static void
72 ffs_inode_bwrite(struct vnode *vp, struct buf *bp, int flags)
73 {
74 	if ((flags & IO_SYNC) != 0)
75 		bwrite(bp);
76 	else if (DOINGASYNC(vp))
77 		bdwrite(bp);
78 	else
79 		bawrite(bp);
80 }
81 
82 /*
83  * Update the access, modified, and inode change times as specified by the
84  * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively.  Write the inode
85  * to disk if the IN_MODIFIED flag is set (it may be set initially, or by
86  * the timestamp update).  The IN_LAZYMOD flag is set to force a write
87  * later if not now.  The IN_LAZYACCESS is set instead of IN_MODIFIED if the fs
88  * is currently being suspended (or is suspended) and vnode has been accessed.
89  * If we write now, then clear IN_MODIFIED, IN_LAZYACCESS and IN_LAZYMOD to
90  * reflect the presumably successful write, and if waitfor is set, then wait
91  * for the write to complete.
92  */
93 int
94 ffs_update(struct vnode *vp, int waitfor)
95 {
96 	struct fs *fs;
97 	struct buf *bp;
98 	struct inode *ip;
99 	daddr_t bn;
100 	int flags, error;
101 
102 	ASSERT_VOP_ELOCKED(vp, "ffs_update");
103 	ufs_itimes(vp);
104 	ip = VTOI(vp);
105 	if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0)
106 		return (0);
107 	ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED);
108 	/*
109 	 * The IN_SIZEMOD and IN_IBLKDATA flags indicate changes to the
110 	 * file size and block pointer fields in the inode. When these
111 	 * fields have been changed, the fsync() and fsyncdata() system
112 	 * calls must write the inode to ensure their semantics that the
113 	 * file is on stable store.
114 	 *
115 	 * The IN_SIZEMOD and IN_IBLKDATA flags cannot be cleared until
116 	 * a synchronous write of the inode is done. If they are cleared
117 	 * on an asynchronous write, then the inode may not yet have been
118 	 * written to the disk when an fsync() or fsyncdata() call is done.
119 	 * Absent these flags, these calls would not know that they needed
120 	 * to write the inode. Thus, these flags only can be cleared on
121 	 * synchronous writes of the inode. Since the inode will be locked
122 	 * for the duration of the I/O that writes it to disk, no fsync()
123 	 * or fsyncdata() will be able to run before the on-disk inode
124 	 * is complete.
125 	 */
126 	if (waitfor)
127 		ip->i_flag &= ~(IN_SIZEMOD | IN_IBLKDATA);
128 	fs = ITOFS(ip);
129 	if (fs->fs_ronly)
130 		return (0);
131 	/*
132 	 * If we are updating a snapshot and another process is currently
133 	 * writing the buffer containing the inode for this snapshot then
134 	 * a deadlock can occur when it tries to check the snapshot to see
135 	 * if that block needs to be copied. Thus when updating a snapshot
136 	 * we check to see if the buffer is already locked, and if it is
137 	 * we drop the snapshot lock until the buffer has been written
138 	 * and is available to us. We have to grab a reference to the
139 	 * snapshot vnode to prevent it from being removed while we are
140 	 * waiting for the buffer.
141 	 */
142 loop:
143 	flags = 0;
144 	if (IS_SNAPSHOT(ip))
145 		flags = GB_LOCK_NOWAIT;
146 	bn = fsbtodb(fs, ino_to_fsba(fs, ip->i_number));
147 	error = ffs_breadz(VFSTOUFS(vp->v_mount), ITODEVVP(ip), bn, bn,
148 	     (int) fs->fs_bsize, NULL, NULL, 0, NOCRED, flags, NULL, &bp);
149 	if (error != 0) {
150 		/*
151 		 * If EBUSY was returned without GB_LOCK_NOWAIT (which
152 		 * requests trylock for buffer lock), it is for some
153 		 * other reason and we should not handle it specially.
154 		 */
155 		if (error != EBUSY || (flags & GB_LOCK_NOWAIT) == 0)
156 			return (error);
157 
158 		/*
159 		 * Wait for our inode block to become available.
160 		 *
161 		 * Hold a reference to the vnode to protect against
162 		 * ffs_snapgone(). Since we hold a reference, it can only
163 		 * get reclaimed (VIRF_DOOMED flag) in a forcible downgrade
164 		 * or unmount. For an unmount, the entire filesystem will be
165 		 * gone, so we cannot attempt to touch anything associated
166 		 * with it while the vnode is unlocked; all we can do is
167 		 * pause briefly and try again. If when we relock the vnode
168 		 * we discover that it has been reclaimed, updating it is no
169 		 * longer necessary and we can just return an error.
170 		 */
171 		vref(vp);
172 		VOP_UNLOCK(vp);
173 		pause("ffsupd", 1);
174 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
175 		vrele(vp);
176 		if (!IS_UFS(vp))
177 			return (ENOENT);
178 
179 		/*
180 		 * Recalculate flags, because the vnode was relocked and
181 		 * could no longer be a snapshot.
182 		 */
183 		goto loop;
184 	}
185 	if (DOINGSOFTDEP(vp))
186 		softdep_update_inodeblock(ip, bp, waitfor);
187 	else if (ip->i_effnlink != ip->i_nlink)
188 		panic("ffs_update: bad link cnt");
189 	if (I_IS_UFS1(ip)) {
190 		*((struct ufs1_dinode *)bp->b_data +
191 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
192 		/*
193 		 * XXX: FIX? The entropy here is desirable,
194 		 * but the harvesting may be expensive
195 		 */
196 		random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), RANDOM_FS_ATIME);
197 	} else {
198 		ffs_update_dinode_ckhash(fs, ip->i_din2);
199 		*((struct ufs2_dinode *)bp->b_data +
200 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
201 		/*
202 		 * XXX: FIX? The entropy here is desirable,
203 		 * but the harvesting may be expensive
204 		 */
205 		random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), RANDOM_FS_ATIME);
206 	}
207 	if (waitfor) {
208 		error = bwrite(bp);
209 		if (ffs_fsfail_cleanup(VFSTOUFS(vp->v_mount), error))
210 			error = 0;
211 	} else if (vm_page_count_severe() || buf_dirty_count_severe()) {
212 		bawrite(bp);
213 		error = 0;
214 	} else {
215 		if (bp->b_bufsize == fs->fs_bsize)
216 			bp->b_flags |= B_CLUSTEROK;
217 		bdwrite(bp);
218 		error = 0;
219 	}
220 	return (error);
221 }
222 
223 #define	SINGLE	0	/* index of single indirect block */
224 #define	DOUBLE	1	/* index of double indirect block */
225 #define	TRIPLE	2	/* index of triple indirect block */
226 /*
227  * Truncate the inode ip to at most length size, freeing the
228  * disk blocks.
229  */
230 int
231 ffs_truncate(struct vnode *vp,
232 	off_t length,
233 	int flags,
234 	struct ucred *cred)
235 {
236 	struct inode *ip;
237 	ufs2_daddr_t bn, lbn, lastblock, lastiblock[UFS_NIADDR];
238 	ufs2_daddr_t indir_lbn[UFS_NIADDR], oldblks[UFS_NDADDR + UFS_NIADDR];
239 #ifdef INVARIANTS
240 	ufs2_daddr_t newblks[UFS_NDADDR + UFS_NIADDR];
241 #endif
242 	ufs2_daddr_t count, blocksreleased = 0, blkno;
243 	struct bufobj *bo __diagused;
244 	struct fs *fs;
245 	struct buf *bp;
246 	struct ufsmount *ump;
247 	int softdeptrunc, journaltrunc;
248 	int needextclean, extblocks;
249 	int offset, size, level, nblocks;
250 	int i, error, allerror, indiroff, waitforupdate;
251 	uint64_t key;
252 	off_t osize;
253 
254 	ip = VTOI(vp);
255 	ump = VFSTOUFS(vp->v_mount);
256 	fs = ump->um_fs;
257 	bo = &vp->v_bufobj;
258 
259 	ASSERT_VOP_LOCKED(vp, "ffs_truncate");
260 
261 	if (length < 0)
262 		return (EINVAL);
263 	if (length > fs->fs_maxfilesize)
264 		return (EFBIG);
265 #ifdef QUOTA
266 	error = getinoquota(ip);
267 	if (error)
268 		return (error);
269 #endif
270 	/*
271 	 * Historically clients did not have to specify which data
272 	 * they were truncating. So, if not specified, we assume
273 	 * traditional behavior, e.g., just the normal data.
274 	 */
275 	if ((flags & (IO_EXT | IO_NORMAL)) == 0)
276 		flags |= IO_NORMAL;
277 	if (!DOINGSOFTDEP(vp) && !DOINGASYNC(vp))
278 		flags |= IO_SYNC;
279 	waitforupdate = (flags & IO_SYNC) != 0 || !DOINGASYNC(vp);
280 	/*
281 	 * If we are truncating the extended-attributes, and cannot
282 	 * do it with soft updates, then do it slowly here. If we are
283 	 * truncating both the extended attributes and the file contents
284 	 * (e.g., the file is being unlinked), then pick it off with
285 	 * soft updates below.
286 	 */
287 	allerror = 0;
288 	needextclean = 0;
289 	softdeptrunc = 0;
290 	journaltrunc = DOINGSUJ(vp);
291 	journaltrunc = 0;	/* XXX temp patch until bug found */
292 	if (journaltrunc == 0 && DOINGSOFTDEP(vp) && length == 0)
293 		softdeptrunc = !softdep_slowdown(vp);
294 	extblocks = 0;
295 	if (fs->fs_magic == FS_UFS2_MAGIC && ip->i_din2->di_extsize > 0) {
296 		extblocks = btodb(fragroundup(fs, ip->i_din2->di_extsize));
297 	}
298 	if ((flags & IO_EXT) && extblocks > 0) {
299 		if (length != 0)
300 			panic("ffs_truncate: partial trunc of extdata");
301 		if (softdeptrunc || journaltrunc) {
302 			if ((flags & IO_NORMAL) == 0)
303 				goto extclean;
304 			needextclean = 1;
305 		} else {
306 			if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
307 				return (error);
308 #ifdef QUOTA
309 			(void) chkdq(ip, -extblocks, NOCRED, FORCE);
310 #endif
311 			vinvalbuf(vp, V_ALT, 0, 0);
312 			vn_pages_remove(vp,
313 			    OFF_TO_IDX(lblktosize(fs, -extblocks)), 0);
314 			osize = ip->i_din2->di_extsize;
315 			ip->i_din2->di_blocks -= extblocks;
316 			ip->i_din2->di_extsize = 0;
317 			for (i = 0; i < UFS_NXADDR; i++) {
318 				oldblks[i] = ip->i_din2->di_extb[i];
319 				ip->i_din2->di_extb[i] = 0;
320 			}
321 			UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
322 			if ((error = ffs_update(vp, waitforupdate)))
323 				return (error);
324 			for (i = 0; i < UFS_NXADDR; i++) {
325 				if (oldblks[i] == 0)
326 					continue;
327 				ffs_blkfree(ump, fs, ITODEVVP(ip), oldblks[i],
328 				    sblksize(fs, osize, i), ip->i_number,
329 				    vp->v_type, NULL, SINGLETON_KEY);
330 			}
331 		}
332 	}
333 	if ((flags & IO_NORMAL) == 0)
334 		return (0);
335 	if (vp->v_type == VLNK && ip->i_size < ump->um_maxsymlinklen) {
336 #ifdef INVARIANTS
337 		if (length != 0)
338 			panic("ffs_truncate: partial truncate of symlink");
339 #endif
340 		bzero(DIP(ip, i_shortlink), (uint64_t)ip->i_size);
341 		ip->i_size = 0;
342 		DIP_SET(ip, i_size, 0);
343 		UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
344 		if (needextclean)
345 			goto extclean;
346 		return (ffs_update(vp, waitforupdate));
347 	}
348 	if (ip->i_size == length) {
349 		UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
350 		if (needextclean)
351 			goto extclean;
352 		return (ffs_update(vp, 0));
353 	}
354 	if (fs->fs_ronly)
355 		panic("ffs_truncate: read-only filesystem");
356 	if (IS_SNAPSHOT(ip))
357 		ffs_snapremove(vp);
358 	cluster_init_vn(&ip->i_clusterw);
359 	osize = ip->i_size;
360 	/*
361 	 * Lengthen the size of the file. We must ensure that the
362 	 * last byte of the file is allocated. Since the smallest
363 	 * value of osize is 0, length will be at least 1.
364 	 */
365 	if (osize < length) {
366 		vnode_pager_setsize(vp, length);
367 		flags |= BA_CLRBUF;
368 		error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
369 		if (error) {
370 			vnode_pager_setsize(vp, osize);
371 			return (error);
372 		}
373 		ip->i_size = length;
374 		DIP_SET(ip, i_size, length);
375 		if (bp->b_bufsize == fs->fs_bsize)
376 			bp->b_flags |= B_CLUSTEROK;
377 		ffs_inode_bwrite(vp, bp, flags);
378 		UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
379 		return (ffs_update(vp, waitforupdate));
380 	}
381 	/*
382 	 * Lookup block number for a given offset. Zero length files
383 	 * have no blocks, so return a blkno of -1.
384 	 */
385 	lbn = lblkno(fs, length - 1);
386 	if (length == 0) {
387 		blkno = -1;
388 	} else if (lbn < UFS_NDADDR) {
389 		blkno = DIP(ip, i_db[lbn]);
390 	} else {
391 		error = UFS_BALLOC(vp, lblktosize(fs, (off_t)lbn), fs->fs_bsize,
392 		    cred, BA_METAONLY, &bp);
393 		if (error)
394 			return (error);
395 		indiroff = (lbn - UFS_NDADDR) % NINDIR(fs);
396 		if (I_IS_UFS1(ip))
397 			blkno = ((ufs1_daddr_t *)(bp->b_data))[indiroff];
398 		else
399 			blkno = ((ufs2_daddr_t *)(bp->b_data))[indiroff];
400 		/*
401 		 * If the block number is non-zero, then the indirect block
402 		 * must have been previously allocated and need not be written.
403 		 * If the block number is zero, then we may have allocated
404 		 * the indirect block and hence need to write it out.
405 		 */
406 		if (blkno != 0)
407 			brelse(bp);
408 		else if (flags & IO_SYNC)
409 			bwrite(bp);
410 		else
411 			bdwrite(bp);
412 	}
413 	/*
414 	 * If the block number at the new end of the file is zero,
415 	 * then we must allocate it to ensure that the last block of
416 	 * the file is allocated. Soft updates does not handle this
417 	 * case, so here we have to clean up the soft updates data
418 	 * structures describing the allocation past the truncation
419 	 * point. Finding and deallocating those structures is a lot of
420 	 * work. Since partial truncation with a hole at the end occurs
421 	 * rarely, we solve the problem by syncing the file so that it
422 	 * will have no soft updates data structures left.
423 	 */
424 	if (blkno == 0 && (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
425 		return (error);
426 	if (blkno != 0 && DOINGSOFTDEP(vp)) {
427 		if (softdeptrunc == 0 && journaltrunc == 0) {
428 			/*
429 			 * If soft updates cannot handle this truncation,
430 			 * clean up soft dependency data structures and
431 			 * fall through to the synchronous truncation.
432 			 */
433 			if ((error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
434 				return (error);
435 		} else {
436 			flags = IO_NORMAL | (needextclean ? IO_EXT: 0);
437 			if (journaltrunc)
438 				softdep_journal_freeblocks(ip, cred, length,
439 				    flags);
440 			else
441 				softdep_setup_freeblocks(ip, length, flags);
442 			ASSERT_VOP_LOCKED(vp, "ffs_truncate1");
443 			if (journaltrunc == 0) {
444 				UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
445 				error = ffs_update(vp, 0);
446 			}
447 			return (error);
448 		}
449 	}
450 	/*
451 	 * Shorten the size of the file. If the last block of the
452 	 * shortened file is unallocated, we must allocate it.
453 	 * Additionally, if the file is not being truncated to a
454 	 * block boundary, the contents of the partial block
455 	 * following the end of the file must be zero'ed in
456 	 * case it ever becomes accessible again because of
457 	 * subsequent file growth. Directories however are not
458 	 * zero'ed as they should grow back initialized to empty.
459 	 */
460 	offset = blkoff(fs, length);
461 	if (blkno != 0 && offset == 0) {
462 		ip->i_size = length;
463 		DIP_SET(ip, i_size, length);
464 		UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
465 #ifdef UFS_DIRHASH
466 		if (vp->v_type == VDIR && ip->i_dirhash != NULL)
467 			ufsdirhash_dirtrunc(ip, length);
468 #endif
469 	} else {
470 		lbn = lblkno(fs, length);
471 		flags |= BA_CLRBUF;
472 		error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
473 		if (error)
474 			return (error);
475 		ffs_inode_bwrite(vp, bp, flags);
476 
477 		/*
478 		 * When we are doing soft updates and the UFS_BALLOC
479 		 * above fills in a direct block hole with a full sized
480 		 * block that will be truncated down to a fragment below,
481 		 * we must flush out the block dependency with an FSYNC
482 		 * so that we do not get a soft updates inconsistency
483 		 * when we create the fragment below.
484 		 */
485 		if (DOINGSOFTDEP(vp) && lbn < UFS_NDADDR &&
486 		    fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize &&
487 		    (error = ffs_syncvnode(vp, MNT_WAIT, 0)) != 0)
488 			return (error);
489 
490 		error = UFS_BALLOC(vp, length - 1, 1, cred, flags, &bp);
491 		if (error)
492 			return (error);
493 		ip->i_size = length;
494 		DIP_SET(ip, i_size, length);
495 #ifdef UFS_DIRHASH
496 		if (vp->v_type == VDIR && ip->i_dirhash != NULL)
497 			ufsdirhash_dirtrunc(ip, length);
498 #endif
499 		size = blksize(fs, ip, lbn);
500 		if (vp->v_type != VDIR && offset != 0)
501 			bzero((char *)bp->b_data + offset,
502 			    (uint64_t)(size - offset));
503 		/* Kirk's code has reallocbuf(bp, size, 1) here */
504 		allocbuf(bp, size);
505 		if (bp->b_bufsize == fs->fs_bsize)
506 			bp->b_flags |= B_CLUSTEROK;
507 		ffs_inode_bwrite(vp, bp, flags);
508 		UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
509 	}
510 	/*
511 	 * Calculate index into inode's block list of
512 	 * last direct and indirect blocks (if any)
513 	 * which we want to keep.  Lastblock is -1 when
514 	 * the file is truncated to 0.
515 	 */
516 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
517 	lastiblock[SINGLE] = lastblock - UFS_NDADDR;
518 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
519 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
520 	nblocks = btodb(fs->fs_bsize);
521 	/*
522 	 * Update file and block pointers on disk before we start freeing
523 	 * blocks.  If we crash before free'ing blocks below, the blocks
524 	 * will be returned to the free list.  lastiblock values are also
525 	 * normalized to -1 for calls to ffs_indirtrunc below.
526 	 */
527 	for (level = TRIPLE; level >= SINGLE; level--) {
528 		oldblks[UFS_NDADDR + level] = DIP(ip, i_ib[level]);
529 		if (lastiblock[level] < 0) {
530 			DIP_SET(ip, i_ib[level], 0);
531 			lastiblock[level] = -1;
532 		}
533 	}
534 	for (i = 0; i < UFS_NDADDR; i++) {
535 		oldblks[i] = DIP(ip, i_db[i]);
536 		if (i > lastblock)
537 			DIP_SET(ip, i_db[i], 0);
538 	}
539 	UFS_INODE_SET_FLAG(ip, IN_CHANGE | IN_UPDATE);
540 	allerror = ffs_update(vp, waitforupdate);
541 
542 	/*
543 	 * Having written the new inode to disk, save its new configuration
544 	 * and put back the old block pointers long enough to process them.
545 	 * Note that we save the new block configuration so we can check it
546 	 * when we are done.
547 	 */
548 	for (i = 0; i < UFS_NDADDR; i++) {
549 #ifdef INVARIANTS
550 		newblks[i] = DIP(ip, i_db[i]);
551 #endif
552 		DIP_SET(ip, i_db[i], oldblks[i]);
553 	}
554 	for (i = 0; i < UFS_NIADDR; i++) {
555 #ifdef INVARIANTS
556 		newblks[UFS_NDADDR + i] = DIP(ip, i_ib[i]);
557 #endif
558 		DIP_SET(ip, i_ib[i], oldblks[UFS_NDADDR + i]);
559 	}
560 	ip->i_size = osize;
561 	DIP_SET(ip, i_size, osize);
562 	UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
563 
564 	error = vtruncbuf(vp, length, fs->fs_bsize);
565 	if (error && (allerror == 0))
566 		allerror = error;
567 
568 	/*
569 	 * Indirect blocks first.
570 	 */
571 	indir_lbn[SINGLE] = -UFS_NDADDR;
572 	indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1;
573 	indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1;
574 	for (level = TRIPLE; level >= SINGLE; level--) {
575 		bn = DIP(ip, i_ib[level]);
576 		if (bn != 0) {
577 			error = ffs_indirtrunc(ip, indir_lbn[level],
578 			    fsbtodb(fs, bn), lastiblock[level], level, &count);
579 			if (error)
580 				allerror = error;
581 			blocksreleased += count;
582 			if (lastiblock[level] < 0) {
583 				DIP_SET(ip, i_ib[level], 0);
584 				ffs_blkfree(ump, fs, ump->um_devvp, bn,
585 				    fs->fs_bsize, ip->i_number,
586 				    vp->v_type, NULL, SINGLETON_KEY);
587 				blocksreleased += nblocks;
588 			}
589 		}
590 		if (lastiblock[level] >= 0)
591 			goto done;
592 	}
593 
594 	/*
595 	 * All whole direct blocks or frags.
596 	 */
597 	key = ffs_blkrelease_start(ump, ump->um_devvp, ip->i_number);
598 	for (i = UFS_NDADDR - 1; i > lastblock; i--) {
599 		long bsize;
600 
601 		bn = DIP(ip, i_db[i]);
602 		if (bn == 0)
603 			continue;
604 		DIP_SET(ip, i_db[i], 0);
605 		bsize = blksize(fs, ip, i);
606 		ffs_blkfree(ump, fs, ump->um_devvp, bn, bsize, ip->i_number,
607 		    vp->v_type, NULL, key);
608 		blocksreleased += btodb(bsize);
609 	}
610 	ffs_blkrelease_finish(ump, key);
611 	if (lastblock < 0)
612 		goto done;
613 
614 	/*
615 	 * Finally, look for a change in size of the
616 	 * last direct block; release any frags.
617 	 */
618 	bn = DIP(ip, i_db[lastblock]);
619 	if (bn != 0) {
620 		long oldspace, newspace;
621 
622 		/*
623 		 * Calculate amount of space we're giving
624 		 * back as old block size minus new block size.
625 		 */
626 		oldspace = blksize(fs, ip, lastblock);
627 		ip->i_size = length;
628 		DIP_SET(ip, i_size, length);
629 		UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE | IN_UPDATE);
630 		newspace = blksize(fs, ip, lastblock);
631 		if (newspace == 0)
632 			panic("ffs_truncate: newspace");
633 		if (oldspace - newspace > 0) {
634 			/*
635 			 * Block number of space to be free'd is
636 			 * the old block # plus the number of frags
637 			 * required for the storage we're keeping.
638 			 */
639 			bn += numfrags(fs, newspace);
640 			ffs_blkfree(ump, fs, ump->um_devvp, bn,
641 			   oldspace - newspace, ip->i_number, vp->v_type,
642 			   NULL, SINGLETON_KEY);
643 			blocksreleased += btodb(oldspace - newspace);
644 		}
645 	}
646 done:
647 #ifdef INVARIANTS
648 	for (level = SINGLE; level <= TRIPLE; level++)
649 		if (newblks[UFS_NDADDR + level] != DIP(ip, i_ib[level]))
650 			panic("ffs_truncate1: level %d newblks %jd != i_ib %jd",
651 			    level, (intmax_t)newblks[UFS_NDADDR + level],
652 			    (intmax_t)DIP(ip, i_ib[level]));
653 	for (i = 0; i < UFS_NDADDR; i++)
654 		if (newblks[i] != DIP(ip, i_db[i]))
655 			panic("ffs_truncate2: blkno %d newblks %jd != i_db %jd",
656 			    i, (intmax_t)newblks[UFS_NDADDR + level],
657 			    (intmax_t)DIP(ip, i_ib[level]));
658 	BO_LOCK(bo);
659 	if (length == 0 &&
660 	    (fs->fs_magic != FS_UFS2_MAGIC || ip->i_din2->di_extsize == 0) &&
661 	    (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0))
662 		panic("ffs_truncate3: vp = %p, buffers: dirty = %d, clean = %d",
663 			vp, bo->bo_dirty.bv_cnt, bo->bo_clean.bv_cnt);
664 	BO_UNLOCK(bo);
665 #endif /* INVARIANTS */
666 	/*
667 	 * Put back the real size.
668 	 */
669 	ip->i_size = length;
670 	DIP_SET(ip, i_size, length);
671 	if (DIP(ip, i_blocks) >= blocksreleased)
672 		DIP_SET(ip, i_blocks, DIP(ip, i_blocks) - blocksreleased);
673 	else	/* sanity */
674 		DIP_SET(ip, i_blocks, 0);
675 	UFS_INODE_SET_FLAG(ip, IN_SIZEMOD | IN_CHANGE);
676 #ifdef QUOTA
677 	(void) chkdq(ip, -blocksreleased, NOCRED, FORCE);
678 #endif
679 	return (allerror);
680 
681 extclean:
682 	if (journaltrunc)
683 		softdep_journal_freeblocks(ip, cred, length, IO_EXT);
684 	else
685 		softdep_setup_freeblocks(ip, length, IO_EXT);
686 	return (ffs_update(vp, waitforupdate));
687 }
688 
689 /*
690  * Release blocks associated with the inode ip and stored in the indirect
691  * block bn.  Blocks are free'd in LIFO order up to (but not including)
692  * lastbn.  If level is greater than SINGLE, the block is an indirect block
693  * and recursive calls to indirtrunc must be used to cleanse other indirect
694  * blocks.
695  */
696 static int
697 ffs_indirtrunc(struct inode *ip,
698 	ufs2_daddr_t lbn,
699 	ufs2_daddr_t dbn,
700 	ufs2_daddr_t lastbn,
701 	int level,
702 	ufs2_daddr_t *countp)
703 {
704 	struct buf *bp;
705 	struct fs *fs;
706 	struct ufsmount *ump;
707 	struct vnode *vp;
708 	caddr_t copy = NULL;
709 	uint64_t key;
710 	int i, nblocks, error = 0, allerror = 0;
711 	ufs2_daddr_t nb, nlbn, last;
712 	ufs2_daddr_t blkcount, factor, blocksreleased = 0;
713 	ufs1_daddr_t *bap1 = NULL;
714 	ufs2_daddr_t *bap2 = NULL;
715 #define BAP(ip, i) (I_IS_UFS1(ip) ? bap1[i] : bap2[i])
716 
717 	fs = ITOFS(ip);
718 	ump = ITOUMP(ip);
719 
720 	/*
721 	 * Calculate index in current block of last
722 	 * block to be kept.  -1 indicates the entire
723 	 * block so we need not calculate the index.
724 	 */
725 	factor = lbn_offset(fs, level);
726 	last = lastbn;
727 	if (lastbn > 0)
728 		last /= factor;
729 	nblocks = btodb(fs->fs_bsize);
730 	/*
731 	 * Get buffer of block pointers, zero those entries corresponding
732 	 * to blocks to be free'd, and update on disk copy first.  Since
733 	 * double(triple) indirect before single(double) indirect, calls
734 	 * to VOP_BMAP() on these blocks will fail.  However, we already
735 	 * have the on-disk address, so we just pass it to bread() instead
736 	 * of having bread() attempt to calculate it using VOP_BMAP().
737 	 */
738 	vp = ITOV(ip);
739 	error = ffs_breadz(ump, vp, lbn, dbn, (int)fs->fs_bsize, NULL, NULL, 0,
740 	    NOCRED, 0, NULL, &bp);
741 	if (error) {
742 		*countp = 0;
743 		return (error);
744 	}
745 
746 	if (I_IS_UFS1(ip))
747 		bap1 = (ufs1_daddr_t *)bp->b_data;
748 	else
749 		bap2 = (ufs2_daddr_t *)bp->b_data;
750 	if (lastbn != -1) {
751 		copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK);
752 		bcopy((caddr_t)bp->b_data, copy, (uint64_t)fs->fs_bsize);
753 		for (i = last + 1; i < NINDIR(fs); i++)
754 			if (I_IS_UFS1(ip))
755 				bap1[i] = 0;
756 			else
757 				bap2[i] = 0;
758 		if (DOINGASYNC(vp)) {
759 			bdwrite(bp);
760 		} else {
761 			error = bwrite(bp);
762 			if (error)
763 				allerror = error;
764 		}
765 		if (I_IS_UFS1(ip))
766 			bap1 = (ufs1_daddr_t *)copy;
767 		else
768 			bap2 = (ufs2_daddr_t *)copy;
769 	}
770 
771 	/*
772 	 * Recursively free totally unused blocks.
773 	 */
774 	key = ffs_blkrelease_start(ump, ITODEVVP(ip), ip->i_number);
775 	for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last;
776 	    i--, nlbn += factor) {
777 		nb = BAP(ip, i);
778 		if (nb == 0)
779 			continue;
780 		if (level > SINGLE) {
781 			if ((error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
782 			    (ufs2_daddr_t)-1, level - 1, &blkcount)) != 0)
783 				allerror = error;
784 			blocksreleased += blkcount;
785 		}
786 		ffs_blkfree(ump, fs, ITODEVVP(ip), nb, fs->fs_bsize,
787 		    ip->i_number, vp->v_type, NULL, key);
788 		blocksreleased += nblocks;
789 	}
790 	ffs_blkrelease_finish(ump, key);
791 
792 	/*
793 	 * Recursively free last partial block.
794 	 */
795 	if (level > SINGLE && lastbn >= 0) {
796 		last = lastbn % factor;
797 		nb = BAP(ip, i);
798 		if (nb != 0) {
799 			error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb),
800 			    last, level - 1, &blkcount);
801 			if (error)
802 				allerror = error;
803 			blocksreleased += blkcount;
804 		}
805 	}
806 	if (copy != NULL) {
807 		free(copy, M_TEMP);
808 	} else {
809 		bp->b_flags |= B_INVAL | B_NOCACHE;
810 		brelse(bp);
811 	}
812 
813 	*countp = blocksreleased;
814 	return (allerror);
815 }
816 
817 int
818 ffs_rdonly(struct inode *ip)
819 {
820 
821 	return (ITOFS(ip)->fs_ronly != 0);
822 }
823