1 /* $OpenBSD: ffs_inode.c,v 1.82 2024/01/09 03:15:59 guenther Exp $ */ 2 /* $NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ffs_inode.c 8.8 (Berkeley) 10/19/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/mount.h> 38 #include <sys/proc.h> 39 #include <sys/buf.h> 40 #include <sys/vnode.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/resourcevar.h> 44 45 #include <ufs/ufs/quota.h> 46 #include <ufs/ufs/inode.h> 47 #include <ufs/ufs/ufsmount.h> 48 #include <ufs/ufs/ufs_extern.h> 49 50 #include <ufs/ffs/fs.h> 51 #include <ufs/ffs/ffs_extern.h> 52 53 int ffs_indirtrunc(struct inode *, daddr_t, daddr_t, daddr_t, int, long *); 54 55 /* 56 * Update the access, modified, and inode change times as specified by the 57 * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. The IN_MODIFIED 58 * flag is used to specify that the inode needs to be updated but that the 59 * times have already been set. The IN_LAZYMOD flag is used to specify 60 * that the inode needs to be updated at some point, by reclaim if not 61 * in the course of other changes; this is used to defer writes just to 62 * update device timestamps. If waitfor is set, then wait for the disk 63 * write of the inode to complete. 64 */ 65 int 66 ffs_update(struct inode *ip, int waitfor) 67 { 68 struct vnode *vp; 69 struct fs *fs; 70 struct buf *bp; 71 int error; 72 73 vp = ITOV(ip); 74 ufs_itimes(vp); 75 76 if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0) 77 return (0); 78 79 ip->i_flag &= ~(IN_MODIFIED | IN_LAZYMOD); 80 fs = ip->i_fs; 81 82 /* 83 * Ensure that uid and gid are correct. This is a temporary 84 * fix until fsck has been changed to do the update. 85 */ 86 if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_inodefmt < FS_44INODEFMT) { 87 ip->i_din1->di_ouid = ip->i_ffs1_uid; 88 ip->i_din1->di_ogid = ip->i_ffs1_gid; 89 } 90 91 error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 92 (int)fs->fs_bsize, &bp); 93 if (error) { 94 brelse(bp); 95 return (error); 96 } 97 98 if (DOINGSOFTDEP(vp)) 99 softdep_update_inodeblock(ip, bp, waitfor); 100 else if (ip->i_effnlink != DIP(ip, nlink)) 101 panic("ffs_update: bad link cnt"); 102 103 #ifdef FFS2 104 if (ip->i_ump->um_fstype == UM_UFS2) 105 *((struct ufs2_dinode *)bp->b_data + 106 ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; 107 else 108 #endif 109 *((struct ufs1_dinode *)bp->b_data + 110 ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; 111 112 if (waitfor && !DOINGASYNC(vp)) { 113 return (bwrite(bp)); 114 } else { 115 bdwrite(bp); 116 return (0); 117 } 118 } 119 120 #define SINGLE 0 /* index of single indirect block */ 121 #define DOUBLE 1 /* index of double indirect block */ 122 #define TRIPLE 2 /* index of triple indirect block */ 123 124 /* 125 * Truncate the inode oip to at most length size, freeing the 126 * disk blocks. 127 */ 128 int 129 ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) 130 { 131 struct vnode *ovp; 132 daddr_t lastblock; 133 daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; 134 daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; 135 struct fs *fs; 136 struct buf *bp; 137 int offset, size, level; 138 long count, nblocks, vflags, blocksreleased = 0; 139 int i, aflags, error, allerror; 140 off_t osize; 141 142 if (length < 0) 143 return (EINVAL); 144 ovp = ITOV(oip); 145 146 if (ovp->v_type != VREG && 147 ovp->v_type != VDIR && 148 ovp->v_type != VLNK) 149 return (0); 150 151 if (DIP(oip, size) == length) 152 return (0); 153 154 if (ovp->v_type == VLNK && 155 DIP(oip, size) < oip->i_ump->um_maxsymlinklen) { 156 #ifdef DIAGNOSTIC 157 if (length != 0) 158 panic("ffs_truncate: partial truncate of symlink"); 159 #endif 160 memset(SHORTLINK(oip), 0, (size_t) DIP(oip, size)); 161 DIP_ASSIGN(oip, size, 0); 162 oip->i_flag |= IN_CHANGE | IN_UPDATE; 163 return (UFS_UPDATE(oip, 1)); 164 } 165 166 if ((error = getinoquota(oip)) != 0) 167 return (error); 168 169 fs = oip->i_fs; 170 if (length > fs->fs_maxfilesize) 171 return (EFBIG); 172 173 uvm_vnp_setsize(ovp, length); 174 oip->i_ci.ci_lasta = oip->i_ci.ci_clen 175 = oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0; 176 177 if (DOINGSOFTDEP(ovp)) { 178 if (length > 0 || softdep_slowdown(ovp)) { 179 /* 180 * If a file is only partially truncated, then 181 * we have to clean up the data structures 182 * describing the allocation past the truncation 183 * point. Finding and deallocating those structures 184 * is a lot of work. Since partial truncation occurs 185 * rarely, we solve the problem by syncing the file 186 * so that it will have no data structures left. 187 */ 188 if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, 189 curproc)) != 0) 190 return (error); 191 } else { 192 (void)ufs_quota_free_blocks(oip, DIP(oip, blocks), 193 NOCRED); 194 softdep_setup_freeblocks(oip, length); 195 vinvalbuf(ovp, 0, cred, curproc, 0, INFSLP); 196 oip->i_flag |= IN_CHANGE | IN_UPDATE; 197 return (UFS_UPDATE(oip, 0)); 198 } 199 } 200 201 osize = DIP(oip, size); 202 /* 203 * Lengthen the size of the file. We must ensure that the 204 * last byte of the file is allocated. Since the smallest 205 * value of osize is 0, length will be at least 1. 206 */ 207 if (osize < length) { 208 aflags = B_CLRBUF; 209 if (flags & IO_SYNC) 210 aflags |= B_SYNC; 211 error = UFS_BUF_ALLOC(oip, length - 1, 1, 212 cred, aflags, &bp); 213 if (error) 214 return (error); 215 DIP_ASSIGN(oip, size, length); 216 uvm_vnp_setsize(ovp, length); 217 (void) uvm_vnp_uncache(ovp); 218 if (aflags & B_SYNC) 219 bwrite(bp); 220 else 221 bawrite(bp); 222 oip->i_flag |= IN_CHANGE | IN_UPDATE; 223 return (UFS_UPDATE(oip, 1)); 224 } 225 uvm_vnp_setsize(ovp, length); 226 227 /* 228 * Shorten the size of the file. If the file is not being 229 * truncated to a block boundary, the contents of the 230 * partial block following the end of the file must be 231 * zero'ed in case it ever becomes accessible again because 232 * of subsequent file growth. Directories however are not 233 * zero'ed as they should grow back initialized to empty. 234 */ 235 offset = blkoff(fs, length); 236 if (offset == 0) { 237 DIP_ASSIGN(oip, size, length); 238 } else { 239 lbn = lblkno(fs, length); 240 aflags = B_CLRBUF; 241 if (flags & IO_SYNC) 242 aflags |= B_SYNC; 243 error = UFS_BUF_ALLOC(oip, length - 1, 1, 244 cred, aflags, &bp); 245 if (error) 246 return (error); 247 /* 248 * When we are doing soft updates and the UFS_BALLOC 249 * above fills in a direct block hole with a full sized 250 * block that will be truncated down to a fragment below, 251 * we must flush out the block dependency with an FSYNC 252 * so that we do not get a soft updates inconsistency 253 * when we create the fragment below. 254 */ 255 if (DOINGSOFTDEP(ovp) && lbn < NDADDR && 256 fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && 257 (error = VOP_FSYNC(ovp, cred, MNT_WAIT, curproc)) != 0) 258 return (error); 259 DIP_ASSIGN(oip, size, length); 260 size = blksize(fs, oip, lbn); 261 (void) uvm_vnp_uncache(ovp); 262 if (ovp->v_type != VDIR) 263 memset(bp->b_data + offset, 0, size - offset); 264 buf_adjcnt(bp, size); 265 if (aflags & B_SYNC) 266 bwrite(bp); 267 else 268 bawrite(bp); 269 } 270 /* 271 * Calculate index into inode's block list of 272 * last direct and indirect blocks (if any) 273 * which we want to keep. Lastblock is -1 when 274 * the file is truncated to 0. 275 */ 276 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 277 lastiblock[SINGLE] = lastblock - NDADDR; 278 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 279 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 280 nblocks = btodb(fs->fs_bsize); 281 282 /* 283 * Update file and block pointers on disk before we start freeing 284 * blocks. If we crash before free'ing blocks below, the blocks 285 * will be returned to the free list. lastiblock values are also 286 * normalized to -1 for calls to ffs_indirtrunc below. 287 */ 288 for (level = TRIPLE; level >= SINGLE; level--) { 289 oldblks[NDADDR + level] = DIP(oip, ib[level]); 290 if (lastiblock[level] < 0) { 291 DIP_ASSIGN(oip, ib[level], 0); 292 lastiblock[level] = -1; 293 } 294 } 295 296 for (i = 0; i < NDADDR; i++) { 297 oldblks[i] = DIP(oip, db[i]); 298 if (i > lastblock) 299 DIP_ASSIGN(oip, db[i], 0); 300 } 301 302 oip->i_flag |= IN_CHANGE | IN_UPDATE; 303 if ((error = UFS_UPDATE(oip, 1)) != 0) 304 allerror = error; 305 306 /* 307 * Having written the new inode to disk, save its new configuration 308 * and put back the old block pointers long enough to process them. 309 * Note that we save the new block configuration so we can check it 310 * when we are done. 311 */ 312 for (i = 0; i < NDADDR; i++) { 313 newblks[i] = DIP(oip, db[i]); 314 DIP_ASSIGN(oip, db[i], oldblks[i]); 315 } 316 317 for (i = 0; i < NIADDR; i++) { 318 newblks[NDADDR + i] = DIP(oip, ib[i]); 319 DIP_ASSIGN(oip, ib[i], oldblks[NDADDR + i]); 320 } 321 322 DIP_ASSIGN(oip, size, osize); 323 vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; 324 allerror = vinvalbuf(ovp, vflags, cred, curproc, 0, INFSLP); 325 326 /* 327 * Indirect blocks first. 328 */ 329 indir_lbn[SINGLE] = -NDADDR; 330 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; 331 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; 332 for (level = TRIPLE; level >= SINGLE; level--) { 333 bn = DIP(oip, ib[level]); 334 if (bn != 0) { 335 error = ffs_indirtrunc(oip, indir_lbn[level], 336 fsbtodb(fs, bn), lastiblock[level], level, &count); 337 if (error) 338 allerror = error; 339 blocksreleased += count; 340 if (lastiblock[level] < 0) { 341 DIP_ASSIGN(oip, ib[level], 0); 342 ffs_blkfree(oip, bn, fs->fs_bsize); 343 blocksreleased += nblocks; 344 } 345 } 346 if (lastiblock[level] >= 0) 347 goto done; 348 } 349 350 /* 351 * All whole direct blocks or frags. 352 */ 353 for (i = NDADDR - 1; i > lastblock; i--) { 354 long bsize; 355 356 bn = DIP(oip, db[i]); 357 if (bn == 0) 358 continue; 359 360 DIP_ASSIGN(oip, db[i], 0); 361 bsize = blksize(fs, oip, i); 362 ffs_blkfree(oip, bn, bsize); 363 blocksreleased += btodb(bsize); 364 } 365 if (lastblock < 0) 366 goto done; 367 368 /* 369 * Finally, look for a change in size of the 370 * last direct block; release any frags. 371 */ 372 bn = DIP(oip, db[lastblock]); 373 if (bn != 0) { 374 long oldspace, newspace; 375 376 /* 377 * Calculate amount of space we're giving 378 * back as old block size minus new block size. 379 */ 380 oldspace = blksize(fs, oip, lastblock); 381 DIP_ASSIGN(oip, size, length); 382 newspace = blksize(fs, oip, lastblock); 383 if (newspace == 0) 384 panic("ffs_truncate: newspace"); 385 if (oldspace - newspace > 0) { 386 /* 387 * Block number of space to be free'd is 388 * the old block # plus the number of frags 389 * required for the storage we're keeping. 390 */ 391 bn += numfrags(fs, newspace); 392 ffs_blkfree(oip, bn, oldspace - newspace); 393 blocksreleased += btodb(oldspace - newspace); 394 } 395 } 396 done: 397 #ifdef DIAGNOSTIC 398 for (level = SINGLE; level <= TRIPLE; level++) 399 if (newblks[NDADDR + level] != DIP(oip, ib[level])) 400 panic("ffs_truncate1"); 401 for (i = 0; i < NDADDR; i++) 402 if (newblks[i] != DIP(oip, db[i])) 403 panic("ffs_truncate2"); 404 #endif /* DIAGNOSTIC */ 405 /* 406 * Put back the real size. 407 */ 408 DIP_ASSIGN(oip, size, length); 409 if (DIP(oip, blocks) >= blocksreleased) 410 DIP_ADD(oip, blocks, -blocksreleased); 411 else /* sanity */ 412 DIP_ASSIGN(oip, blocks, 0); 413 oip->i_flag |= IN_CHANGE; 414 (void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED); 415 return (allerror); 416 } 417 418 #ifdef FFS2 419 #define BAP(ip, i) (((ip)->i_ump->um_fstype == UM_UFS2) ? bap2[i] : bap1[i]) 420 #define BAP_ASSIGN(ip, i, value) \ 421 do { \ 422 if ((ip)->i_ump->um_fstype == UM_UFS2) \ 423 bap2[i] = (value); \ 424 else \ 425 bap1[i] = (value); \ 426 } while (0) 427 #else 428 #define BAP(ip, i) bap1[i] 429 #define BAP_ASSIGN(ip, i, value) do { bap1[i] = (value); } while (0) 430 #endif /* FFS2 */ 431 432 /* 433 * Release blocks associated with the inode ip and stored in the indirect 434 * block bn. Blocks are free'd in LIFO order up to (but not including) 435 * lastbn. If level is greater than SINGLE, the block is an indirect block 436 * and recursive calls to indirtrunc must be used to cleanse other indirect 437 * blocks. 438 * 439 * NB: triple indirect blocks are untested. 440 */ 441 int 442 ffs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, 443 daddr_t lastbn, int level, long *countp) 444 { 445 int i; 446 struct buf *bp; 447 struct fs *fs = ip->i_fs; 448 struct vnode *vp; 449 void *copy = NULL; 450 daddr_t nb, nlbn, last; 451 long blkcount, factor; 452 int nblocks, blocksreleased = 0; 453 int error = 0, allerror = 0; 454 int32_t *bap1 = NULL; 455 #ifdef FFS2 456 int64_t *bap2 = NULL; 457 #endif 458 459 /* 460 * Calculate index in current block of last 461 * block to be kept. -1 indicates the entire 462 * block so we need not calculate the index. 463 */ 464 factor = 1; 465 for (i = SINGLE; i < level; i++) 466 factor *= NINDIR(fs); 467 last = lastbn; 468 if (lastbn > 0) 469 last /= factor; 470 nblocks = btodb(fs->fs_bsize); 471 /* 472 * Get buffer of block pointers, zero those entries corresponding 473 * to blocks to be free'd, and update on disk copy first. Since 474 * double(triple) indirect before single(double) indirect, calls 475 * to bmap on these blocks will fail. However, we already have 476 * the on disk address, so we have to set the b_blkno field 477 * explicitly instead of letting bread do everything for us. 478 */ 479 vp = ITOV(ip); 480 bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, INFSLP); 481 if (!(bp->b_flags & (B_DONE | B_DELWRI))) { 482 curproc->p_ru.ru_inblock++; /* pay for read */ 483 bcstats.pendingreads++; 484 bcstats.numreads++; 485 bp->b_flags |= B_READ; 486 if (bp->b_bcount > bp->b_bufsize) 487 panic("ffs_indirtrunc: bad buffer size"); 488 bp->b_blkno = dbn; 489 VOP_STRATEGY(bp->b_vp, bp); 490 error = biowait(bp); 491 } 492 if (error) { 493 brelse(bp); 494 *countp = 0; 495 return (error); 496 } 497 498 #ifdef FFS2 499 if (ip->i_ump->um_fstype == UM_UFS2) 500 bap2 = (int64_t *)bp->b_data; 501 else 502 #endif 503 bap1 = (int32_t *)bp->b_data; 504 505 if (lastbn != -1) { 506 copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK); 507 memcpy(copy, bp->b_data, fs->fs_bsize); 508 509 for (i = last + 1; i < NINDIR(fs); i++) 510 BAP_ASSIGN(ip, i, 0); 511 512 if (!DOINGASYNC(vp)) { 513 error = bwrite(bp); 514 if (error) 515 allerror = error; 516 } else { 517 bawrite(bp); 518 } 519 520 #ifdef FFS2 521 if (ip->i_ump->um_fstype == UM_UFS2) 522 bap2 = (int64_t *)copy; 523 else 524 #endif 525 bap1 = (int32_t *)copy; 526 } 527 528 /* 529 * Recursively free totally unused blocks. 530 */ 531 for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; 532 i--, nlbn += factor) { 533 nb = BAP(ip, i); 534 if (nb == 0) 535 continue; 536 if (level > SINGLE) { 537 error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 538 -1, level - 1, &blkcount); 539 if (error) 540 allerror = error; 541 blocksreleased += blkcount; 542 } 543 ffs_blkfree(ip, nb, fs->fs_bsize); 544 blocksreleased += nblocks; 545 } 546 547 /* 548 * Recursively free last partial block. 549 */ 550 if (level > SINGLE && lastbn >= 0) { 551 last = lastbn % factor; 552 nb = BAP(ip, i); 553 if (nb != 0) { 554 error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 555 last, level - 1, &blkcount); 556 if (error) 557 allerror = error; 558 blocksreleased += blkcount; 559 } 560 } 561 if (copy != NULL) { 562 free(copy, M_TEMP, fs->fs_bsize); 563 } else { 564 bp->b_flags |= B_INVAL; 565 brelse(bp); 566 } 567 568 *countp = blocksreleased; 569 return (allerror); 570 } 571