1 /* $OpenBSD: ffs_inode.c,v 1.68 2014/03/19 04:17:33 guenther Exp $ */ 2 /* $NetBSD: ffs_inode.c,v 1.10 1996/05/11 18:27:19 mycroft Exp $ */ 3 4 /* 5 * Copyright (c) 1982, 1986, 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 * 32 * @(#)ffs_inode.c 8.8 (Berkeley) 10/19/94 33 */ 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/mount.h> 38 #include <sys/proc.h> 39 #include <sys/file.h> 40 #include <sys/buf.h> 41 #include <sys/vnode.h> 42 #include <sys/kernel.h> 43 #include <sys/malloc.h> 44 #include <sys/resourcevar.h> 45 46 #include <uvm/uvm_extern.h> 47 48 #include <ufs/ufs/quota.h> 49 #include <ufs/ufs/inode.h> 50 #include <ufs/ufs/ufsmount.h> 51 #include <ufs/ufs/ufs_extern.h> 52 53 #include <ufs/ffs/fs.h> 54 #include <ufs/ffs/ffs_extern.h> 55 56 int ffs_indirtrunc(struct inode *, daddr_t, daddr_t, daddr_t, int, long *); 57 58 /* 59 * Update the access, modified, and inode change times as specified by the 60 * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. The IN_MODIFIED 61 * flag is used to specify that the inode needs to be updated but that the 62 * times have already been set. The IN_LAZYMOD flag is used to specify 63 * that the inode needs to be updated at some point, by reclaim if not 64 * in the course of other changes; this is used to defer writes just to 65 * update device timestamps. If waitfor is set, then wait for the disk 66 * write of the inode to complete. 67 */ 68 int 69 ffs_update(struct inode *ip, int waitfor) 70 { 71 struct vnode *vp; 72 struct fs *fs; 73 struct buf *bp; 74 int error; 75 76 vp = ITOV(ip); 77 ufs_itimes(vp); 78 79 if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor != MNT_WAIT) 80 return (0); 81 82 ip->i_flag &= ~(IN_MODIFIED | IN_LAZYMOD); 83 fs = ip->i_fs; 84 85 /* 86 * Ensure that uid and gid are correct. This is a temporary 87 * fix until fsck has been changed to do the update. 88 */ 89 if (fs->fs_magic == FS_UFS1_MAGIC && fs->fs_inodefmt < FS_44INODEFMT) { 90 ip->i_din1->di_ouid = ip->i_ffs1_uid; 91 ip->i_din1->di_ogid = ip->i_ffs1_gid; 92 } 93 94 error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), 95 (int)fs->fs_bsize, &bp); 96 if (error) { 97 brelse(bp); 98 return (error); 99 } 100 101 if (DOINGSOFTDEP(vp)) 102 softdep_update_inodeblock(ip, bp, waitfor); 103 else if (ip->i_effnlink != DIP(ip, nlink)) 104 panic("ffs_update: bad link cnt"); 105 106 #ifdef FFS2 107 if (ip->i_ump->um_fstype == UM_UFS2) 108 *((struct ufs2_dinode *)bp->b_data + 109 ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2; 110 else 111 #endif 112 *((struct ufs1_dinode *)bp->b_data + 113 ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1; 114 115 if (waitfor && !DOINGASYNC(vp)) { 116 return (bwrite(bp)); 117 } else { 118 bdwrite(bp); 119 return (0); 120 } 121 } 122 123 #define SINGLE 0 /* index of single indirect block */ 124 #define DOUBLE 1 /* index of double indirect block */ 125 #define TRIPLE 2 /* index of triple indirect block */ 126 127 /* 128 * Truncate the inode oip to at most length size, freeing the 129 * disk blocks. 130 */ 131 int 132 ffs_truncate(struct inode *oip, off_t length, int flags, struct ucred *cred) 133 { 134 struct vnode *ovp; 135 daddr_t lastblock; 136 daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; 137 daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; 138 struct fs *fs; 139 struct buf *bp; 140 int offset, size, level; 141 long count, nblocks, vflags, blocksreleased = 0; 142 int i, aflags, error, allerror; 143 off_t osize; 144 145 if (length < 0) 146 return (EINVAL); 147 ovp = ITOV(oip); 148 149 if (ovp->v_type != VREG && 150 ovp->v_type != VDIR && 151 ovp->v_type != VLNK) 152 return (0); 153 154 if (DIP(oip, size) == length) 155 return (0); 156 157 if (ovp->v_type == VLNK && 158 (DIP(oip, size) < ovp->v_mount->mnt_maxsymlinklen || 159 (ovp->v_mount->mnt_maxsymlinklen == 0 && 160 oip->i_din1->di_blocks == 0))) { 161 #ifdef DIAGNOSTIC 162 if (length != 0) 163 panic("ffs_truncate: partial truncate of symlink"); 164 #endif 165 memset(SHORTLINK(oip), 0, (size_t) DIP(oip, size)); 166 DIP_ASSIGN(oip, size, 0); 167 oip->i_flag |= IN_CHANGE | IN_UPDATE; 168 return (UFS_UPDATE(oip, MNT_WAIT)); 169 } 170 171 if ((error = getinoquota(oip)) != 0) 172 return (error); 173 174 uvm_vnp_setsize(ovp, length); 175 oip->i_ci.ci_lasta = oip->i_ci.ci_clen 176 = oip->i_ci.ci_cstart = oip->i_ci.ci_lastw = 0; 177 178 if (DOINGSOFTDEP(ovp)) { 179 if (length > 0 || softdep_slowdown(ovp)) { 180 /* 181 * If a file is only partially truncated, then 182 * we have to clean up the data structures 183 * describing the allocation past the truncation 184 * point. Finding and deallocating those structures 185 * is a lot of work. Since partial truncation occurs 186 * rarely, we solve the problem by syncing the file 187 * so that it will have no data structures left. 188 */ 189 if ((error = VOP_FSYNC(ovp, cred, MNT_WAIT, 190 curproc)) != 0) 191 return (error); 192 } else { 193 (void)ufs_quota_free_blocks(oip, DIP(oip, blocks), 194 NOCRED); 195 softdep_setup_freeblocks(oip, length); 196 (void) vinvalbuf(ovp, 0, cred, curproc, 0, 0); 197 oip->i_flag |= IN_CHANGE | IN_UPDATE; 198 return (UFS_UPDATE(oip, 0)); 199 } 200 } 201 202 fs = oip->i_fs; 203 osize = DIP(oip, size); 204 /* 205 * Lengthen the size of the file. We must ensure that the 206 * last byte of the file is allocated. Since the smallest 207 * value of osize is 0, length will be at least 1. 208 */ 209 if (osize < length) { 210 if (length > fs->fs_maxfilesize) 211 return (EFBIG); 212 aflags = B_CLRBUF; 213 if (flags & IO_SYNC) 214 aflags |= B_SYNC; 215 error = UFS_BUF_ALLOC(oip, length - 1, 1, 216 cred, aflags, &bp); 217 if (error) 218 return (error); 219 DIP_ASSIGN(oip, size, length); 220 uvm_vnp_setsize(ovp, length); 221 (void) uvm_vnp_uncache(ovp); 222 if (aflags & B_SYNC) 223 bwrite(bp); 224 else 225 bawrite(bp); 226 oip->i_flag |= IN_CHANGE | IN_UPDATE; 227 return (UFS_UPDATE(oip, MNT_WAIT)); 228 } 229 uvm_vnp_setsize(ovp, length); 230 231 /* 232 * Shorten the size of the file. If the file is not being 233 * truncated to a block boundary, the contents of the 234 * partial block following the end of the file must be 235 * zero'ed in case it ever becomes accessible again because 236 * of subsequent file growth. Directories however are not 237 * zero'ed as they should grow back initialized to empty. 238 */ 239 offset = blkoff(fs, length); 240 if (offset == 0) { 241 DIP_ASSIGN(oip, size, length); 242 } else { 243 lbn = lblkno(fs, length); 244 aflags = B_CLRBUF; 245 if (flags & IO_SYNC) 246 aflags |= B_SYNC; 247 error = UFS_BUF_ALLOC(oip, length - 1, 1, 248 cred, aflags, &bp); 249 if (error) 250 return (error); 251 /* 252 * When we are doing soft updates and the UFS_BALLOC 253 * above fills in a direct block hole with a full sized 254 * block that will be truncated down to a fragment below, 255 * we must flush out the block dependency with an FSYNC 256 * so that we do not get a soft updates inconsistency 257 * when we create the fragment below. 258 */ 259 if (DOINGSOFTDEP(ovp) && lbn < NDADDR && 260 fragroundup(fs, blkoff(fs, length)) < fs->fs_bsize && 261 (error = VOP_FSYNC(ovp, cred, MNT_WAIT, curproc)) != 0) 262 return (error); 263 DIP_ASSIGN(oip, size, length); 264 size = blksize(fs, oip, lbn); 265 (void) uvm_vnp_uncache(ovp); 266 if (ovp->v_type != VDIR) 267 memset(bp->b_data + offset, 0, size - offset); 268 bp->b_bcount = size; 269 if (aflags & B_SYNC) 270 bwrite(bp); 271 else 272 bawrite(bp); 273 } 274 /* 275 * Calculate index into inode's block list of 276 * last direct and indirect blocks (if any) 277 * which we want to keep. Lastblock is -1 when 278 * the file is truncated to 0. 279 */ 280 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 281 lastiblock[SINGLE] = lastblock - NDADDR; 282 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 283 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 284 nblocks = btodb(fs->fs_bsize); 285 286 /* 287 * Update file and block pointers on disk before we start freeing 288 * blocks. If we crash before free'ing blocks below, the blocks 289 * will be returned to the free list. lastiblock values are also 290 * normalized to -1 for calls to ffs_indirtrunc below. 291 */ 292 for (level = TRIPLE; level >= SINGLE; level--) { 293 oldblks[NDADDR + level] = DIP(oip, ib[level]); 294 if (lastiblock[level] < 0) { 295 DIP_ASSIGN(oip, ib[level], 0); 296 lastiblock[level] = -1; 297 } 298 } 299 300 for (i = 0; i < NDADDR; i++) { 301 oldblks[i] = DIP(oip, db[i]); 302 if (i > lastblock) 303 DIP_ASSIGN(oip, db[i], 0); 304 } 305 306 oip->i_flag |= IN_CHANGE | IN_UPDATE; 307 if ((error = UFS_UPDATE(oip, MNT_WAIT)) != 0) 308 allerror = error; 309 310 /* 311 * Having written the new inode to disk, save its new configuration 312 * and put back the old block pointers long enough to process them. 313 * Note that we save the new block configuration so we can check it 314 * when we are done. 315 */ 316 for (i = 0; i < NDADDR; i++) { 317 newblks[i] = DIP(oip, db[i]); 318 DIP_ASSIGN(oip, db[i], oldblks[i]); 319 } 320 321 for (i = 0; i < NIADDR; i++) { 322 newblks[NDADDR + i] = DIP(oip, ib[i]); 323 DIP_ASSIGN(oip, ib[i], oldblks[NDADDR + i]); 324 } 325 326 DIP_ASSIGN(oip, size, osize); 327 vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; 328 allerror = vinvalbuf(ovp, vflags, cred, curproc, 0, 0); 329 330 /* 331 * Indirect blocks first. 332 */ 333 indir_lbn[SINGLE] = -NDADDR; 334 indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; 335 indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; 336 for (level = TRIPLE; level >= SINGLE; level--) { 337 bn = DIP(oip, ib[level]); 338 if (bn != 0) { 339 error = ffs_indirtrunc(oip, indir_lbn[level], 340 fsbtodb(fs, bn), lastiblock[level], level, &count); 341 if (error) 342 allerror = error; 343 blocksreleased += count; 344 if (lastiblock[level] < 0) { 345 DIP_ASSIGN(oip, ib[level], 0); 346 ffs_blkfree(oip, bn, fs->fs_bsize); 347 blocksreleased += nblocks; 348 } 349 } 350 if (lastiblock[level] >= 0) 351 goto done; 352 } 353 354 /* 355 * All whole direct blocks or frags. 356 */ 357 for (i = NDADDR - 1; i > lastblock; i--) { 358 long bsize; 359 360 bn = DIP(oip, db[i]); 361 if (bn == 0) 362 continue; 363 364 DIP_ASSIGN(oip, db[i], 0); 365 bsize = blksize(fs, oip, i); 366 ffs_blkfree(oip, bn, bsize); 367 blocksreleased += btodb(bsize); 368 } 369 if (lastblock < 0) 370 goto done; 371 372 /* 373 * Finally, look for a change in size of the 374 * last direct block; release any frags. 375 */ 376 bn = DIP(oip, db[lastblock]); 377 if (bn != 0) { 378 long oldspace, newspace; 379 380 /* 381 * Calculate amount of space we're giving 382 * back as old block size minus new block size. 383 */ 384 oldspace = blksize(fs, oip, lastblock); 385 DIP_ASSIGN(oip, size, length); 386 newspace = blksize(fs, oip, lastblock); 387 if (newspace == 0) 388 panic("ffs_truncate: newspace"); 389 if (oldspace - newspace > 0) { 390 /* 391 * Block number of space to be free'd is 392 * the old block # plus the number of frags 393 * required for the storage we're keeping. 394 */ 395 bn += numfrags(fs, newspace); 396 ffs_blkfree(oip, bn, oldspace - newspace); 397 blocksreleased += btodb(oldspace - newspace); 398 } 399 } 400 done: 401 #ifdef DIAGNOSTIC 402 for (level = SINGLE; level <= TRIPLE; level++) 403 if (newblks[NDADDR + level] != DIP(oip, ib[level])) 404 panic("ffs_truncate1"); 405 for (i = 0; i < NDADDR; i++) 406 if (newblks[i] != DIP(oip, db[i])) 407 panic("ffs_truncate2"); 408 #endif /* DIAGNOSTIC */ 409 /* 410 * Put back the real size. 411 */ 412 DIP_ASSIGN(oip, size, length); 413 if (DIP(oip, blocks) >= blocksreleased) 414 DIP_ADD(oip, blocks, -blocksreleased); 415 else /* sanity */ 416 DIP_ASSIGN(oip, blocks, 0); 417 oip->i_flag |= IN_CHANGE; 418 (void)ufs_quota_free_blocks(oip, blocksreleased, NOCRED); 419 return (allerror); 420 } 421 422 #ifdef FFS2 423 #define BAP(ip, i) (((ip)->i_ump->um_fstype == UM_UFS2) ? bap2[i] : bap1[i]) 424 #define BAP_ASSIGN(ip, i, value) \ 425 do { \ 426 if ((ip)->i_ump->um_fstype == UM_UFS2) \ 427 bap2[i] = (value); \ 428 else \ 429 bap1[i] = (value); \ 430 } while (0) 431 #else 432 #define BAP(ip, i) bap1[i] 433 #define BAP_ASSIGN(ip, i, value) do { bap1[i] = (value); } while (0) 434 #endif /* FFS2 */ 435 436 /* 437 * Release blocks associated with the inode ip and stored in the indirect 438 * block bn. Blocks are free'd in LIFO order up to (but not including) 439 * lastbn. If level is greater than SINGLE, the block is an indirect block 440 * and recursive calls to indirtrunc must be used to cleanse other indirect 441 * blocks. 442 * 443 * NB: triple indirect blocks are untested. 444 */ 445 int 446 ffs_indirtrunc(struct inode *ip, daddr_t lbn, daddr_t dbn, 447 daddr_t lastbn, int level, long *countp) 448 { 449 int i; 450 struct buf *bp; 451 struct fs *fs = ip->i_fs; 452 struct vnode *vp; 453 void *copy = NULL; 454 daddr_t nb, nlbn, last; 455 long blkcount, factor; 456 int nblocks, blocksreleased = 0; 457 int error = 0, allerror = 0; 458 int32_t *bap1 = NULL; 459 #ifdef FFS2 460 int64_t *bap2 = NULL; 461 #endif 462 463 /* 464 * Calculate index in current block of last 465 * block to be kept. -1 indicates the entire 466 * block so we need not calculate the index. 467 */ 468 factor = 1; 469 for (i = SINGLE; i < level; i++) 470 factor *= NINDIR(fs); 471 last = lastbn; 472 if (lastbn > 0) 473 last /= factor; 474 nblocks = btodb(fs->fs_bsize); 475 /* 476 * Get buffer of block pointers, zero those entries corresponding 477 * to blocks to be free'd, and update on disk copy first. Since 478 * double(triple) indirect before single(double) indirect, calls 479 * to bmap on these blocks will fail. However, we already have 480 * the on disk address, so we have to set the b_blkno field 481 * explicitly instead of letting bread do everything for us. 482 */ 483 vp = ITOV(ip); 484 bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0); 485 if (!(bp->b_flags & (B_DONE | B_DELWRI))) { 486 curproc->p_ru.ru_inblock++; /* pay for read */ 487 bcstats.pendingreads++; 488 bcstats.numreads++; 489 bp->b_flags |= B_READ; 490 if (bp->b_bcount > bp->b_bufsize) 491 panic("ffs_indirtrunc: bad buffer size"); 492 bp->b_blkno = dbn; 493 VOP_STRATEGY(bp); 494 error = biowait(bp); 495 } 496 if (error) { 497 brelse(bp); 498 *countp = 0; 499 return (error); 500 } 501 502 #ifdef FFS2 503 if (ip->i_ump->um_fstype == UM_UFS2) 504 bap2 = (int64_t *)bp->b_data; 505 else 506 #endif 507 bap1 = (int32_t *)bp->b_data; 508 509 if (lastbn != -1) { 510 copy = malloc(fs->fs_bsize, M_TEMP, M_WAITOK); 511 memcpy(copy, bp->b_data, fs->fs_bsize); 512 513 for (i = last + 1; i < NINDIR(fs); i++) 514 BAP_ASSIGN(ip, i, 0); 515 516 if (!DOINGASYNC(vp)) { 517 error = bwrite(bp); 518 if (error) 519 allerror = error; 520 } else { 521 bawrite(bp); 522 } 523 524 #ifdef FFS2 525 if (ip->i_ump->um_fstype == UM_UFS2) 526 bap2 = (int64_t *)copy; 527 else 528 #endif 529 bap1 = (int32_t *)copy; 530 } 531 532 /* 533 * Recursively free totally unused blocks. 534 */ 535 for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; 536 i--, nlbn += factor) { 537 nb = BAP(ip, i); 538 if (nb == 0) 539 continue; 540 if (level > SINGLE) { 541 error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 542 -1, level - 1, &blkcount); 543 if (error) 544 allerror = error; 545 blocksreleased += blkcount; 546 } 547 ffs_blkfree(ip, nb, fs->fs_bsize); 548 blocksreleased += nblocks; 549 } 550 551 /* 552 * Recursively free last partial block. 553 */ 554 if (level > SINGLE && lastbn >= 0) { 555 last = lastbn % factor; 556 nb = BAP(ip, i); 557 if (nb != 0) { 558 error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), 559 last, level - 1, &blkcount); 560 if (error) 561 allerror = error; 562 blocksreleased += blkcount; 563 } 564 } 565 if (copy != NULL) { 566 free(copy, M_TEMP); 567 } else { 568 bp->b_flags |= B_INVAL; 569 brelse(bp); 570 } 571 572 *countp = blocksreleased; 573 return (allerror); 574 } 575