1 /* $OpenBSD: ffs_inode.c,v 1.80 2019/07/25 01:43:21 cheloha Exp $ */ 2 /* $NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ffs_inode.c 8.8 (Berkeley) 10/19/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/mount.h> 38 #include <sys/proc.h> 39 #include <sys/buf.h> 40 #include <sys/vnode.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/resourcevar.h> 44 45 #include <ufs/ufs/quota.h> 46 #include <ufs/ufs/inode.h> 47 #include <ufs/ufs/ufsmount.h> 48 #include <ufs/ufs/ufs_extern.h> 49 50 #include <ufs/ffs/fs.h> 51 #include <ufs/ffs/ffs_extern.h> 52 53 int ffs_indirtrunc(struct inode *, daddr_t, daddr_t, daddr_t, int, long *); 54 55 /* 56 * Update the access, modified, and inode change times as specified by the 57 * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. The IN_MODIFIED 58 * flag is used to specify that the inode needs to be updated but that the 59 * times have already been set. The IN_LAZYMOD flag is used to specify 60 * that the inode needs to be updated at some point, by reclaim if not 61 * in the course of other changes; this is used to defer writes just to 62 * update device timestamps. If waitfor is set, then wait for the disk 63 * write of the inode to complete. 64 */ 65 int 66 ffs_update(struct inode *ip, int waitfor) 67 { 68 struct vnode *vp; 69 struct fs *fs; 70 struct buf *bp; 71 int error; 72 73 vp = ITOV(ip); 74 ufs_itimes(vp); 75 76 if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0) 77 return (0); 78 79 ip->i_flag &= ~(IN_MODIFIED | IN_LAZYMOD); 80 fs = ip->i_fs; 81 82 /* 83 * Ensure that uid and gid are correct. This is a temporary 84 * fix until fsck has been changed to do the update. 85 */ 86 if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_inodefmt < FS_44INODEFMT) { 87 ip->i_din1->di_ouid = ip->i_ffs1_uid; 88 ip->i_din1->di_ogid = ip->i_ffs1_gid; 89 } 90 91 error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 92 (int)fs->fs_bsize, &bp); 93 if (error) { 94 brelse(bp); 95 return (error); 96 } 97 98 if (DOINGSOFTDEP(vp)) 99 softdep_update_inodeblock(ip, bp, waitfor); 100 else if (ip->i_effnlink != DIP(ip, nlink)) 101 panic("ffs_update: bad link cnt"); 102 103 #ifdef FFS2 104 if (ip->i_ump->um_fstype == UM_UFS2) 105 *((struct ufs2_dinode *)bp->b_data + 106 ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; 107 else 108 #endif 109 *((struct ufs1_dinode *)bp->b_data + 110 ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; 111 112 if (waitfor && !DOINGASYNC(vp)) { 113 return (bwrite(bp)); 114 } else { 115 bdwrite(bp); 116 return (0); 117 } 118 } 119 120 #define SINGLE 0 /* index of single indirect block */ 121 #define DOUBLE 1 /* index of double indirect block */ 122 #define TRIPLE 2 /* index of triple indirect block */ 123 124 /* 125 * Truncate the inode oip to at most length size, freeing the 126 * disk blocks. 127 */ 128 int 129 ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) 130 { 131 struct vnode *ovp; 132 daddr_t lastblock; 133 daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; 134 daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; 135 struct fs *fs; 136 struct buf *bp; 137 int offset, size, level; 138 long count, nblocks, vflags, blocksreleased = 0; 139 int i, aflags, error, allerror; 140 off_t osize; 141 142 if (length < 0) 143 return (EINVAL); 144 ovp = ITOV(oip); 145 146 if (ovp->v_type != VREG && 147 ovp->v_type != VDIR && 148 ovp->v_type != VLNK) 149 return (0); 150 151 if (DIP(oip, size) == length) 152 return (0); 153 154 if (ovp->v_type == VLNK && 155 (DIP(oip, size) < oip->i_ump->um_maxsymlinklen || 156 (oip->i_ump->um_maxsymlinklen == 0 && 157 oip->i_din1->di_blocks == 0))) { 158 #ifdef DIAGNOSTIC 159 if (length != 0) 160 panic("ffs_truncate: partial truncate of symlink"); 161 #endif 162 memset(SHORTLINK(oip), 0, (size_t) DIP(oip, size)); 163 DIP_ASSIGN(oip, size, 0); 164 oip->i_flag |= IN_CHANGE | IN_UPDATE; 165 return (UFS_UPDATE(oip, 1)); 166 } 167 168 if ((error = getinoquota(oip)) != 0) 169 return (error); 170 171 fs = oip->i_fs; 172 if (length > fs->fs_maxfilesize) 173 return (EFBIG); 174 175 uvm_vnp_setsize(ovp, length); 176 oip->i_ci.ci_lasta = oip->i_ci.ci_clen 177 = oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0; 178 179 if (DOINGSOFTDEP(ovp)) { 180 if (length > 0 || softdep_slowdown(ovp)) { 181 /* 182 * If a file is only partially truncated, then 183 * we have to clean up the data structures 184 * describing the allocation past the truncation 185 * point. Finding and deallocating those structures 186 * is a lot of work. Since partial truncation occurs 187 * rarely, we solve the problem by syncing the file 188 * so that it will have no data structures left. 189 */ 190 if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, 191 curproc)) != 0) 192 return (error); 193 } else { 194 (void)ufs_quota_free_blocks(oip, DIP(oip, blocks), 195 NOCRED); 196 softdep_setup_freeblocks(oip, length); 197 vinvalbuf(ovp, 0, cred, curproc, 0, INFSLP); 198 oip->i_flag |= IN_CHANGE | IN_UPDATE; 199 return (UFS_UPDATE(oip, 0)); 200 } 201 } 202 203 osize = DIP(oip, size); 204 /* 205 * Lengthen the size of the file. We must ensure that the 206 * last byte of the file is allocated. Since the smallest 207 * value of osize is 0, length will be at least 1. 208 */ 209 if (osize < length) { 210 aflags = B_CLRBUF; 211 if (flags & IO_SYNC) 212 aflags |= B_SYNC; 213 error = UFS_BUF_ALLOC(oip, length - 1, 1, 214 cred, aflags, &bp); 215 if (error) 216 return (error); 217 DIP_ASSIGN(oip, size, length); 218 uvm_vnp_setsize(ovp, length); 219 (void) uvm_vnp_uncache(ovp); 220 if (aflags & B_SYNC) 221 bwrite(bp); 222 else 223 bawrite(bp); 224 oip->i_flag |= IN_CHANGE | IN_UPDATE; 225 return (UFS_UPDATE(oip, 1)); 226 } 227 uvm_vnp_setsize(ovp, length); 228 229 /* 230 * Shorten the size of the file. If the file is not being 231 * truncated to a block boundary, the contents of the 232 * partial block following the end of the file must be 233 * zero'ed in case it ever becomes accessible again because 234 * of subsequent file growth. Directories however are not 235 * zero'ed as they should grow back initialized to empty. 236 */ 237 offset = blkoff(fs, length); 238 if (offset == 0) { 239 DIP_ASSIGN(oip, size, length); 240 } else { 241 lbn = lblkno(fs, length); 242 aflags = B_CLRBUF; 243 if (flags & IO_SYNC) 244 aflags |= B_SYNC; 245 error = UFS_BUF_ALLOC(oip, length - 1, 1, 246 cred, aflags, &bp); 247 if (error) 248 return (error); 249 /* 250 * When we are doing soft updates and the UFS_BALLOC 251 * above fills in a direct block hole with a full sized 252 * block that will be truncated down to a fragment below, 253 * we must flush out the block dependency with an FSYNC 254 * so that we do not get a soft updates inconsistency 255 * when we create the fragment below. 256 */ 257 if (DOINGSOFTDEP(ovp) && lbn < NDADDR && 258 fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && 259 (error = VOP_FSYNC(ovp, cred, MNT_WAIT, curproc)) != 0) 260 return (error); 261 DIP_ASSIGN(oip, size, length); 262 size = blksize(fs, oip, lbn); 263 (void) uvm_vnp_uncache(ovp); 264 if (ovp->v_type != VDIR) 265 memset(bp->b_data + offset, 0, size - offset); 266 buf_adjcnt(bp, size); 267 if (aflags & B_SYNC) 268 bwrite(bp); 269 else 270 bawrite(bp); 271 } 272 /* 273 * Calculate index into inode's block list of 274 * last direct and indirect blocks (if any) 275 * which we want to keep. Lastblock is -1 when 276 * the file is truncated to 0. 277 */ 278 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 279 lastiblock[SINGLE] = lastblock - NDADDR; 280 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 281 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 282 nblocks = btodb(fs->fs_bsize); 283 284 /* 285 * Update file and block pointers on disk before we start freeing 286 * blocks. If we crash before free'ing blocks below, the blocks 287 * will be returned to the free list. lastiblock values are also 288 * normalized to -1 for calls to ffs_indirtrunc below. 289 */ 290 for (level = TRIPLE; level >= SINGLE; level--) { 291 oldblks[NDADDR + level] = DIP(oip, ib[level]); 292 if (lastiblock[level] < 0) { 293 DIP_ASSIGN(oip, ib[level], 0); 294 lastiblock[level] = -1; 295 } 296 } 297 298 for (i = 0; i < NDADDR; i++) { 299 oldblks[i] = DIP(oip, db[i]); 300 if (i > lastblock) 301 DIP_ASSIGN(oip, db[i], 0); 302 } 303 304 oip->i_flag |= IN_CHANGE | IN_UPDATE; 305 if ((error = UFS_UPDATE(oip, 1)) != 0) 306 allerror = error; 307 308 /* 309 * Having written the new inode to disk, save its new configuration 310 * and put back the old block pointers long enough to process them. 311 * Note that we save the new block configuration so we can check it 312 * when we are done. 313 */ 314 for (i = 0; i < NDADDR; i++) { 315 newblks[i] = DIP(oip, db[i]); 316 DIP_ASSIGN(oip, db[i], oldblks[i]); 317 } 318 319 for (i = 0; i < NIADDR; i++) { 320 newblks[NDADDR + i] = DIP(oip, ib[i]); 321 DIP_ASSIGN(oip, ib[i], oldblks[NDADDR + i]); 322 } 323 324 DIP_ASSIGN(oip, size, osize); 325 vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; 326 allerror = vinvalbuf(ovp, vflags, cred, curproc, 0, INFSLP); 327 328 /* 329 * Indirect blocks first. 330 */ 331 indir_lbn[SINGLE] = -NDADDR; 332 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; 333 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; 334 for (level = TRIPLE; level >= SINGLE; level--) { 335 bn = DIP(oip, ib[level]); 336 if (bn != 0) { 337 error = ffs_indirtrunc(oip, indir_lbn[level], 338 fsbtodb(fs, bn), lastiblock[level], level, &count); 339 if (error) 340 allerror = error; 341 blocksreleased += count; 342 if (lastiblock[level] < 0) { 343 DIP_ASSIGN(oip, ib[level], 0); 344 ffs_blkfree(oip, bn, fs->fs_bsize); 345 blocksreleased += nblocks; 346 } 347 } 348 if (lastiblock[level] >= 0) 349 goto done; 350 } 351 352 /* 353 * All whole direct blocks or frags. 354 */ 355 for (i = NDADDR - 1; i > lastblock; i--) { 356 long bsize; 357 358 bn = DIP(oip, db[i]); 359 if (bn == 0) 360 continue; 361 362 DIP_ASSIGN(oip, db[i], 0); 363 bsize = blksize(fs, oip, i); 364 ffs_blkfree(oip, bn, bsize); 365 blocksreleased += btodb(bsize); 366 } 367 if (lastblock < 0) 368 goto done; 369 370 /* 371 * Finally, look for a change in size of the 372 * last direct block; release any frags. 373 */ 374 bn = DIP(oip, db[lastblock]); 375 if (bn != 0) { 376 long oldspace, newspace; 377 378 /* 379 * Calculate amount of space we're giving 380 * back as old block size minus new block size. 381 */ 382 oldspace = blksize(fs, oip, lastblock); 383 DIP_ASSIGN(oip, size, length); 384 newspace = blksize(fs, oip, lastblock); 385 if (newspace == 0) 386 panic("ffs_truncate: newspace"); 387 if (oldspace - newspace > 0) { 388 /* 389 * Block number of space to be free'd is 390 * the old block # plus the number of frags 391 * required for the storage we're keeping. 392 */ 393 bn += numfrags(fs, newspace); 394 ffs_blkfree(oip, bn, oldspace - newspace); 395 blocksreleased += btodb(oldspace - newspace); 396 } 397 } 398 done: 399 #ifdef DIAGNOSTIC 400 for (level = SINGLE; level <= TRIPLE; level++) 401 if (newblks[NDADDR + level] != DIP(oip, ib[level])) 402 panic("ffs_truncate1"); 403 for (i = 0; i < NDADDR; i++) 404 if (newblks[i] != DIP(oip, db[i])) 405 panic("ffs_truncate2"); 406 #endif /* DIAGNOSTIC */ 407 /* 408 * Put back the real size. 409 */ 410 DIP_ASSIGN(oip, size, length); 411 if (DIP(oip, blocks) >= blocksreleased) 412 DIP_ADD(oip, blocks, -blocksreleased); 413 else /* sanity */ 414 DIP_ASSIGN(oip, blocks, 0); 415 oip->i_flag |= IN_CHANGE; 416 (void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED); 417 return (allerror); 418 } 419 420 #ifdef FFS2 421 #define BAP(ip, i) (((ip)->i_ump->um_fstype == UM_UFS2) ? bap2[i] : bap1[i]) 422 #define BAP_ASSIGN(ip, i, value) \ 423 do { \ 424 if ((ip)->i_ump->um_fstype == UM_UFS2) \ 425 bap2[i] = (value); \ 426 else \ 427 bap1[i] = (value); \ 428 } while (0) 429 #else 430 #define BAP(ip, i) bap1[i] 431 #define BAP_ASSIGN(ip, i, value) do { bap1[i] = (value); } while (0) 432 #endif /* FFS2 */ 433 434 /* 435 * Release blocks associated with the inode ip and stored in the indirect 436 * block bn. Blocks are free'd in LIFO order up to (but not including) 437 * lastbn. If level is greater than SINGLE, the block is an indirect block 438 * and recursive calls to indirtrunc must be used to cleanse other indirect 439 * blocks. 440 * 441 * NB: triple indirect blocks are untested. 442 */ 443 int 444 ffs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, 445 daddr_t lastbn, int level, long *countp) 446 { 447 int i; 448 struct buf *bp; 449 struct fs *fs = ip->i_fs; 450 struct vnode *vp; 451 void *copy = NULL; 452 daddr_t nb, nlbn, last; 453 long blkcount, factor; 454 int nblocks, blocksreleased = 0; 455 int error = 0, allerror = 0; 456 int32_t *bap1 = NULL; 457 #ifdef FFS2 458 int64_t *bap2 = NULL; 459 #endif 460 461 /* 462 * Calculate index in current block of last 463 * block to be kept. -1 indicates the entire 464 * block so we need not calculate the index. 465 */ 466 factor = 1; 467 for (i = SINGLE; i < level; i++) 468 factor *= NINDIR(fs); 469 last = lastbn; 470 if (lastbn > 0) 471 last /= factor; 472 nblocks = btodb(fs->fs_bsize); 473 /* 474 * Get buffer of block pointers, zero those entries corresponding 475 * to blocks to be free'd, and update on disk copy first. Since 476 * double(triple) indirect before single(double) indirect, calls 477 * to bmap on these blocks will fail. However, we already have 478 * the on disk address, so we have to set the b_blkno field 479 * explicitly instead of letting bread do everything for us. 480 */ 481 vp = ITOV(ip); 482 bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, INFSLP); 483 if (!(bp->b_flags & (B_DONE | B_DELWRI))) { 484 curproc->p_ru.ru_inblock++; /* pay for read */ 485 bcstats.pendingreads++; 486 bcstats.numreads++; 487 bp->b_flags |= B_READ; 488 if (bp->b_bcount > bp->b_bufsize) 489 panic("ffs_indirtrunc: bad buffer size"); 490 bp->b_blkno = dbn; 491 VOP_STRATEGY(bp); 492 error = biowait(bp); 493 } 494 if (error) { 495 brelse(bp); 496 *countp = 0; 497 return (error); 498 } 499 500 #ifdef FFS2 501 if (ip->i_ump->um_fstype == UM_UFS2) 502 bap2 = (int64_t *)bp->b_data; 503 else 504 #endif 505 bap1 = (int32_t *)bp->b_data; 506 507 if (lastbn != -1) { 508 copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK); 509 memcpy(copy, bp->b_data, fs->fs_bsize); 510 511 for (i = last + 1; i < NINDIR(fs); i++) 512 BAP_ASSIGN(ip, i, 0); 513 514 if (!DOINGASYNC(vp)) { 515 error = bwrite(bp); 516 if (error) 517 allerror = error; 518 } else { 519 bawrite(bp); 520 } 521 522 #ifdef FFS2 523 if (ip->i_ump->um_fstype == UM_UFS2) 524 bap2 = (int64_t *)copy; 525 else 526 #endif 527 bap1 = (int32_t *)copy; 528 } 529 530 /* 531 * Recursively free totally unused blocks. 532 */ 533 for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; 534 i--, nlbn += factor) { 535 nb = BAP(ip, i); 536 if (nb == 0) 537 continue; 538 if (level > SINGLE) { 539 error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 540 -1, level - 1, &blkcount); 541 if (error) 542 allerror = error; 543 blocksreleased += blkcount; 544 } 545 ffs_blkfree(ip, nb, fs->fs_bsize); 546 blocksreleased += nblocks; 547 } 548 549 /* 550 * Recursively free last partial block. 551 */ 552 if (level > SINGLE && lastbn >= 0) { 553 last = lastbn % factor; 554 nb = BAP(ip, i); 555 if (nb != 0) { 556 error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 557 last, level - 1, &blkcount); 558 if (error) 559 allerror = error; 560 blocksreleased += blkcount; 561 } 562 } 563 if (copy != NULL) { 564 free(copy, M_TEMP, fs->fs_bsize); 565 } else { 566 bp->b_flags |= B_INVAL; 567 brelse(bp); 568 } 569 570 *countp = blocksreleased; 571 return (allerror); 572 } 573