1 /* 2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)ufs_inode.c 7.40.1.1 (Berkeley) 06/03/91 8 */ 9 10 #include "param.h" 11 #include "systm.h" 12 #include "mount.h" 13 #include "proc.h" 14 #include "file.h" 15 #include "buf.h" 16 #include "vnode.h" 17 #include "kernel.h" 18 #include "malloc.h" 19 20 #include "quota.h" 21 #include "inode.h" 22 #include "fs.h" 23 #include "ufsmount.h" 24 25 #define INOHSZ 512 26 #if ((INOHSZ&(INOHSZ-1)) == 0) 27 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 28 #else 29 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 30 #endif 31 32 union ihead { 33 union ihead *ih_head[2]; 34 struct inode *ih_chain[2]; 35 } ihead[INOHSZ]; 36 37 int prtactive; /* 1 => print out reclaim of active vnodes */ 38 39 /* 40 * Initialize hash links for inodes. 41 */ 42 ufs_init() 43 { 44 register int i; 45 register union ihead *ih = ihead; 46 47 #ifndef lint 48 if (VN_MAXPRIVATE < sizeof(struct inode)) 49 panic("ihinit: too small"); 50 #endif /* not lint */ 51 for (i = INOHSZ; --i >= 0; ih++) { 52 ih->ih_head[0] = ih; 53 ih->ih_head[1] = ih; 54 } 55 #ifdef QUOTA 56 dqinit(); 57 #endif /* QUOTA */ 58 } 59 60 /* 61 * Look up a UFS dinode number to find its incore vnode. 62 * If it is not in core, read it in from the specified device. 63 * If it is in core, wait for the lock bit to clear, then 64 * return the inode locked. Detection and handling of mount 65 * points must be done by the calling routine. 66 */ 67 iget(xp, ino, ipp) 68 struct inode *xp; 69 ino_t ino; 70 struct inode **ipp; 71 { 72 dev_t dev = xp->i_dev; 73 struct mount *mntp = ITOV(xp)->v_mount; 74 register struct fs *fs = VFSTOUFS(mntp)->um_fs; 75 extern struct vnodeops ufs_vnodeops, spec_inodeops; 76 register struct inode *ip, *iq; 77 register struct vnode *vp; 78 struct vnode *nvp; 79 struct buf *bp; 80 struct dinode *dp; 81 union ihead *ih; 82 int i, error; 83 84 ih = &ihead[INOHASH(dev, ino)]; 85 loop: 86 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) { 87 if (ino != ip->i_number || dev != ip->i_dev) 88 continue; 89 if ((ip->i_flag&ILOCKED) != 0) { 90 ip->i_flag |= IWANT; 91 sleep((caddr_t)ip, PINOD); 92 goto loop; 93 } 94 if (vget(ITOV(ip))) 95 goto loop; 96 *ipp = ip; 97 return(0); 98 } 99 /* 100 * Allocate a new inode. 101 */ 102 if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) { 103 *ipp = 0; 104 return (error); 105 } 106 ip = VTOI(nvp); 107 ip->i_vnode = nvp; 108 ip->i_flag = 0; 109 ip->i_devvp = 0; 110 ip->i_mode = 0; 111 ip->i_diroff = 0; 112 ip->i_lockf = 0; 113 #ifdef QUOTA 114 for (i = 0; i < MAXQUOTAS; i++) 115 ip->i_dquot[i] = NODQUOT; 116 #endif 117 /* 118 * Put it onto its hash chain and lock it so that other requests for 119 * this inode will block if they arrive while we are sleeping waiting 120 * for old data structures to be purged or for the contents of the 121 * disk portion of this inode to be read. 122 */ 123 ip->i_dev = dev; 124 ip->i_number = ino; 125 insque(ip, ih); 126 ILOCK(ip); 127 /* 128 * Read in the disk contents for the inode. 129 */ 130 if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)), 131 (int)fs->fs_bsize, NOCRED, &bp)) { 132 /* 133 * The inode does not contain anything useful, so it would 134 * be misleading to leave it on its hash chain. 135 * Iput() will take care of putting it back on the free list. 136 */ 137 remque(ip); 138 ip->i_forw = ip; 139 ip->i_back = ip; 140 /* 141 * Unlock and discard unneeded inode. 142 */ 143 iput(ip); 144 brelse(bp); 145 *ipp = 0; 146 return (error); 147 } 148 dp = bp->b_un.b_dino; 149 dp += itoo(fs, ino); 150 ip->i_din = *dp; 151 brelse(bp); 152 /* 153 * Initialize the associated vnode 154 */ 155 vp = ITOV(ip); 156 vp->v_type = IFTOVT(ip->i_mode); 157 if (vp->v_type == VFIFO) { 158 #ifdef FIFO 159 extern struct vnodeops fifo_inodeops; 160 vp->v_op = &fifo_inodeops; 161 #else 162 iput(ip); 163 *ipp = 0; 164 return (EOPNOTSUPP); 165 #endif /* FIFO */ 166 } 167 if (vp->v_type == VCHR || vp->v_type == VBLK) { 168 vp->v_op = &spec_inodeops; 169 if (nvp = checkalias(vp, ip->i_rdev, mntp)) { 170 /* 171 * Reinitialize aliased inode. 172 */ 173 vp = nvp; 174 iq = VTOI(vp); 175 iq->i_vnode = vp; 176 iq->i_flag = 0; 177 ILOCK(iq); 178 iq->i_din = ip->i_din; 179 iq->i_dev = dev; 180 iq->i_number = ino; 181 insque(iq, ih); 182 /* 183 * Discard unneeded vnode 184 */ 185 ip->i_mode = 0; 186 iput(ip); 187 ip = iq; 188 } 189 } 190 if (ino == ROOTINO) 191 vp->v_flag |= VROOT; 192 /* 193 * Finish inode initialization. 194 */ 195 ip->i_fs = fs; 196 ip->i_devvp = VFSTOUFS(mntp)->um_devvp; 197 VREF(ip->i_devvp); 198 /* 199 * Set up a generation number for this inode if it does not 200 * already have one. This should only happen on old filesystems. 201 */ 202 if (ip->i_gen == 0) { 203 if (++nextgennumber < (u_long)time.tv_sec) 204 nextgennumber = time.tv_sec; 205 ip->i_gen = nextgennumber; 206 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 207 ip->i_flag |= IMOD; 208 } 209 *ipp = ip; 210 return (0); 211 } 212 213 /* 214 * Unlock and decrement the reference count of an inode structure. 215 */ 216 iput(ip) 217 register struct inode *ip; 218 { 219 220 if ((ip->i_flag & ILOCKED) == 0) 221 panic("iput"); 222 IUNLOCK(ip); 223 vrele(ITOV(ip)); 224 } 225 226 /* 227 * Last reference to an inode, write the inode out and if necessary, 228 * truncate and deallocate the file. 229 */ 230 ufs_inactive(vp, p) 231 struct vnode *vp; 232 struct proc *p; 233 { 234 register struct inode *ip = VTOI(vp); 235 int mode, error = 0; 236 237 if (prtactive && vp->v_usecount != 0) 238 vprint("ufs_inactive: pushing active", vp); 239 /* 240 * Get rid of inodes related to stale file handles. 241 */ 242 if (ip->i_mode == 0) { 243 if ((vp->v_flag & VXLOCK) == 0) 244 vgone(vp); 245 return (0); 246 } 247 ILOCK(ip); 248 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 249 #ifdef QUOTA 250 if (!getinoquota(ip)) 251 (void) chkiq(ip, -1, NOCRED, 0); 252 #endif 253 error = itrunc(ip, (u_long)0, 0); 254 mode = ip->i_mode; 255 ip->i_mode = 0; 256 ip->i_rdev = 0; 257 ip->i_flag |= IUPD|ICHG; 258 ifree(ip, ip->i_number, mode); 259 } 260 IUPDAT(ip, &time, &time, 0); 261 IUNLOCK(ip); 262 ip->i_flag = 0; 263 /* 264 * If we are done with the inode, reclaim it 265 * so that it can be reused immediately. 266 */ 267 if (vp->v_usecount == 0 && ip->i_mode == 0) 268 vgone(vp); 269 return (error); 270 } 271 272 /* 273 * Reclaim an inode so that it can be used for other purposes. 274 */ 275 ufs_reclaim(vp) 276 register struct vnode *vp; 277 { 278 register struct inode *ip = VTOI(vp); 279 int i; 280 281 if (prtactive && vp->v_usecount != 0) 282 vprint("ufs_reclaim: pushing active", vp); 283 /* 284 * Remove the inode from its hash chain. 285 */ 286 remque(ip); 287 ip->i_forw = ip; 288 ip->i_back = ip; 289 /* 290 * Purge old data structures associated with the inode. 291 */ 292 cache_purge(vp); 293 if (ip->i_devvp) { 294 vrele(ip->i_devvp); 295 ip->i_devvp = 0; 296 } 297 #ifdef QUOTA 298 for (i = 0; i < MAXQUOTAS; i++) { 299 if (ip->i_dquot[i] != NODQUOT) { 300 dqrele(vp, ip->i_dquot[i]); 301 ip->i_dquot[i] = NODQUOT; 302 } 303 } 304 #endif 305 ip->i_flag = 0; 306 return (0); 307 } 308 309 /* 310 * Update the access, modified, and inode change times as specified 311 * by the IACC, IMOD, and ICHG flags respectively. The IUPD flag 312 * is used to specify that the inode needs to be updated but that 313 * the times have already been set. The access and modified times 314 * are taken from the second and third parameters; the inode change 315 * time is always taken from the current time. If waitfor is set, 316 * then wait for the disk write of the inode to complete. 317 */ 318 iupdat(ip, ta, tm, waitfor) 319 register struct inode *ip; 320 struct timeval *ta, *tm; 321 int waitfor; 322 { 323 struct buf *bp; 324 struct vnode *vp = ITOV(ip); 325 struct dinode *dp; 326 register struct fs *fs; 327 int error; 328 329 fs = ip->i_fs; 330 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0) 331 return (0); 332 if (vp->v_mount->mnt_flag & MNT_RDONLY) 333 return (0); 334 error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)), 335 (int)fs->fs_bsize, NOCRED, &bp); 336 if (error) { 337 brelse(bp); 338 return (error); 339 } 340 if (ip->i_flag&IACC) 341 ip->i_atime = ta->tv_sec; 342 if (ip->i_flag&IUPD) 343 ip->i_mtime = tm->tv_sec; 344 if (ip->i_flag&ICHG) 345 ip->i_ctime = time.tv_sec; 346 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 347 dp = bp->b_un.b_dino + itoo(fs, ip->i_number); 348 *dp = ip->i_din; 349 if (waitfor) { 350 return (bwrite(bp)); 351 } else { 352 bdwrite(bp); 353 return (0); 354 } 355 } 356 357 #define SINGLE 0 /* index of single indirect block */ 358 #define DOUBLE 1 /* index of double indirect block */ 359 #define TRIPLE 2 /* index of triple indirect block */ 360 /* 361 * Truncate the inode ip to at most length size. Free affected disk 362 * blocks -- the blocks of the file are removed in reverse order. 363 * 364 * NB: triple indirect blocks are untested. 365 */ 366 itrunc(oip, length, flags) 367 register struct inode *oip; 368 u_long length; 369 int flags; 370 { 371 register daddr_t lastblock; 372 daddr_t bn, lbn, lastiblock[NIADDR]; 373 register struct fs *fs; 374 register struct inode *ip; 375 struct buf *bp; 376 int offset, osize, size, level; 377 long count, nblocks, blocksreleased = 0; 378 register int i; 379 int aflags, error, allerror; 380 struct inode tip; 381 382 vnode_pager_setsize(ITOV(oip), length); 383 if (oip->i_size <= length) { 384 oip->i_flag |= ICHG|IUPD; 385 error = iupdat(oip, &time, &time, 1); 386 return (error); 387 } 388 /* 389 * Calculate index into inode's block list of 390 * last direct and indirect blocks (if any) 391 * which we want to keep. Lastblock is -1 when 392 * the file is truncated to 0. 393 */ 394 fs = oip->i_fs; 395 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 396 lastiblock[SINGLE] = lastblock - NDADDR; 397 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 398 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 399 nblocks = btodb(fs->fs_bsize); 400 /* 401 * Update the size of the file. If the file is not being 402 * truncated to a block boundry, the contents of the 403 * partial block following the end of the file must be 404 * zero'ed in case it ever become accessable again because 405 * of subsequent file growth. 406 */ 407 osize = oip->i_size; 408 offset = blkoff(fs, length); 409 if (offset == 0) { 410 oip->i_size = length; 411 } else { 412 lbn = lblkno(fs, length); 413 aflags = B_CLRBUF; 414 if (flags & IO_SYNC) 415 aflags |= B_SYNC; 416 #ifdef QUOTA 417 if (error = getinoquota(oip)) 418 return (error); 419 #endif 420 if (error = balloc(oip, lbn, offset, &bp, aflags)) 421 return (error); 422 oip->i_size = length; 423 size = blksize(fs, oip, lbn); 424 (void) vnode_pager_uncache(ITOV(oip)); 425 bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset)); 426 allocbuf(bp, size); 427 if (flags & IO_SYNC) 428 bwrite(bp); 429 else 430 bdwrite(bp); 431 } 432 /* 433 * Update file and block pointers 434 * on disk before we start freeing blocks. 435 * If we crash before free'ing blocks below, 436 * the blocks will be returned to the free list. 437 * lastiblock values are also normalized to -1 438 * for calls to indirtrunc below. 439 */ 440 tip = *oip; 441 tip.i_size = osize; 442 for (level = TRIPLE; level >= SINGLE; level--) 443 if (lastiblock[level] < 0) { 444 oip->i_ib[level] = 0; 445 lastiblock[level] = -1; 446 } 447 for (i = NDADDR - 1; i > lastblock; i--) 448 oip->i_db[i] = 0; 449 oip->i_flag |= ICHG|IUPD; 450 vinvalbuf(ITOV(oip), (length > 0)); 451 allerror = iupdat(oip, &time, &time, MNT_WAIT); 452 453 /* 454 * Indirect blocks first. 455 */ 456 ip = &tip; 457 for (level = TRIPLE; level >= SINGLE; level--) { 458 bn = ip->i_ib[level]; 459 if (bn != 0) { 460 error = indirtrunc(ip, bn, lastiblock[level], level, 461 &count); 462 if (error) 463 allerror = error; 464 blocksreleased += count; 465 if (lastiblock[level] < 0) { 466 ip->i_ib[level] = 0; 467 blkfree(ip, bn, (off_t)fs->fs_bsize); 468 blocksreleased += nblocks; 469 } 470 } 471 if (lastiblock[level] >= 0) 472 goto done; 473 } 474 475 /* 476 * All whole direct blocks or frags. 477 */ 478 for (i = NDADDR - 1; i > lastblock; i--) { 479 register off_t bsize; 480 481 bn = ip->i_db[i]; 482 if (bn == 0) 483 continue; 484 ip->i_db[i] = 0; 485 bsize = (off_t)blksize(fs, ip, i); 486 blkfree(ip, bn, bsize); 487 blocksreleased += btodb(bsize); 488 } 489 if (lastblock < 0) 490 goto done; 491 492 /* 493 * Finally, look for a change in size of the 494 * last direct block; release any frags. 495 */ 496 bn = ip->i_db[lastblock]; 497 if (bn != 0) { 498 off_t oldspace, newspace; 499 500 /* 501 * Calculate amount of space we're giving 502 * back as old block size minus new block size. 503 */ 504 oldspace = blksize(fs, ip, lastblock); 505 ip->i_size = length; 506 newspace = blksize(fs, ip, lastblock); 507 if (newspace == 0) 508 panic("itrunc: newspace"); 509 if (oldspace - newspace > 0) { 510 /* 511 * Block number of space to be free'd is 512 * the old block # plus the number of frags 513 * required for the storage we're keeping. 514 */ 515 bn += numfrags(fs, newspace); 516 blkfree(ip, bn, oldspace - newspace); 517 blocksreleased += btodb(oldspace - newspace); 518 } 519 } 520 done: 521 /* BEGIN PARANOIA */ 522 for (level = SINGLE; level <= TRIPLE; level++) 523 if (ip->i_ib[level] != oip->i_ib[level]) 524 panic("itrunc1"); 525 for (i = 0; i < NDADDR; i++) 526 if (ip->i_db[i] != oip->i_db[i]) 527 panic("itrunc2"); 528 /* END PARANOIA */ 529 oip->i_blocks -= blocksreleased; 530 if (oip->i_blocks < 0) /* sanity */ 531 oip->i_blocks = 0; 532 oip->i_flag |= ICHG; 533 #ifdef QUOTA 534 if (!getinoquota(oip)) 535 (void) chkdq(oip, -blocksreleased, NOCRED, 0); 536 #endif 537 return (allerror); 538 } 539 540 /* 541 * Release blocks associated with the inode ip and 542 * stored in the indirect block bn. Blocks are free'd 543 * in LIFO order up to (but not including) lastbn. If 544 * level is greater than SINGLE, the block is an indirect 545 * block and recursive calls to indirtrunc must be used to 546 * cleanse other indirect blocks. 547 * 548 * NB: triple indirect blocks are untested. 549 */ 550 indirtrunc(ip, bn, lastbn, level, countp) 551 register struct inode *ip; 552 daddr_t bn, lastbn; 553 int level; 554 long *countp; 555 { 556 register int i; 557 struct buf *bp; 558 register struct fs *fs = ip->i_fs; 559 register daddr_t *bap; 560 daddr_t *copy, nb, last; 561 long blkcount, factor; 562 int nblocks, blocksreleased = 0; 563 int error, allerror = 0; 564 565 /* 566 * Calculate index in current block of last 567 * block to be kept. -1 indicates the entire 568 * block so we need not calculate the index. 569 */ 570 factor = 1; 571 for (i = SINGLE; i < level; i++) 572 factor *= NINDIR(fs); 573 last = lastbn; 574 if (lastbn > 0) 575 last /= factor; 576 nblocks = btodb(fs->fs_bsize); 577 /* 578 * Get buffer of block pointers, zero those 579 * entries corresponding to blocks to be free'd, 580 * and update on disk copy first. 581 */ 582 error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize, 583 NOCRED, &bp); 584 if (error) { 585 brelse(bp); 586 *countp = 0; 587 return (error); 588 } 589 bap = bp->b_un.b_daddr; 590 MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK); 591 bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); 592 bzero((caddr_t)&bap[last + 1], 593 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 594 if (last == -1) 595 bp->b_flags |= B_INVAL; 596 error = bwrite(bp); 597 if (error) 598 allerror = error; 599 bap = copy; 600 601 /* 602 * Recursively free totally unused blocks. 603 */ 604 for (i = NINDIR(fs) - 1; i > last; i--) { 605 nb = bap[i]; 606 if (nb == 0) 607 continue; 608 if (level > SINGLE) { 609 error = indirtrunc(ip, nb, (daddr_t)-1, level - 1, 610 &blkcount); 611 if (error) 612 allerror = error; 613 blocksreleased += blkcount; 614 } 615 blkfree(ip, nb, (off_t)fs->fs_bsize); 616 blocksreleased += nblocks; 617 } 618 619 /* 620 * Recursively free last partial block. 621 */ 622 if (level > SINGLE && lastbn >= 0) { 623 last = lastbn % factor; 624 nb = bap[i]; 625 if (nb != 0) { 626 error = indirtrunc(ip, nb, last, level - 1, &blkcount); 627 if (error) 628 allerror = error; 629 blocksreleased += blkcount; 630 } 631 } 632 FREE(copy, M_TEMP); 633 *countp = blocksreleased; 634 return (allerror); 635 } 636 637 /* 638 * Lock an inode. If its already locked, set the WANT bit and sleep. 639 */ 640 ilock(ip) 641 register struct inode *ip; 642 { 643 644 while (ip->i_flag & ILOCKED) { 645 ip->i_flag |= IWANT; 646 if (ip->i_spare0 == curproc->p_pid) 647 panic("locking against myself"); 648 ip->i_spare1 = curproc->p_pid; 649 (void) sleep((caddr_t)ip, PINOD); 650 } 651 ip->i_spare1 = 0; 652 ip->i_spare0 = curproc->p_pid; 653 ip->i_flag |= ILOCKED; 654 curproc->p_spare[2]++; 655 } 656 657 /* 658 * Unlock an inode. If WANT bit is on, wakeup. 659 */ 660 iunlock(ip) 661 register struct inode *ip; 662 { 663 664 if ((ip->i_flag & ILOCKED) == 0) 665 vprint("iunlock: unlocked inode", ITOV(ip)); 666 ip->i_spare0 = 0; 667 ip->i_flag &= ~ILOCKED; 668 curproc->p_spare[2]--; 669 if (ip->i_flag&IWANT) { 670 ip->i_flag &= ~IWANT; 671 wakeup((caddr_t)ip); 672 } 673 } 674