1 /* 2 * Copyright (c) 1982, 1986, 1989 Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_inode.c 7.34 (Berkeley) 07/03/90 8 */ 9 10 #include "param.h" 11 #include "systm.h" 12 #include "mount.h" 13 #include "user.h" 14 #include "proc.h" 15 #include "file.h" 16 #include "buf.h" 17 #include "cmap.h" 18 #include "vnode.h" 19 #include "../ufs/quota.h" 20 #include "../ufs/inode.h" 21 #include "../ufs/fs.h" 22 #include "../ufs/ufsmount.h" 23 #include "kernel.h" 24 #include "malloc.h" 25 26 #define INOHSZ 512 27 #if ((INOHSZ&(INOHSZ-1)) == 0) 28 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 29 #else 30 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 31 #endif 32 33 union ihead { 34 union ihead *ih_head[2]; 35 struct inode *ih_chain[2]; 36 } ihead[INOHSZ]; 37 38 int prtactive; /* 1 => print out reclaim of active vnodes */ 39 40 /* 41 * Initialize hash links for inodes. 42 */ 43 ufs_init() 44 { 45 register int i; 46 register union ihead *ih = ihead; 47 48 #ifndef lint 49 if (VN_MAXPRIVATE < sizeof(struct inode)) 50 panic("ihinit: too small"); 51 #endif /* not lint */ 52 for (i = INOHSZ; --i >= 0; ih++) { 53 ih->ih_head[0] = ih; 54 ih->ih_head[1] = ih; 55 } 56 #ifdef QUOTA 57 dqinit(); 58 #endif /* QUOTA */ 59 } 60 61 /* 62 * Look up an vnode/inode by device,inumber. 63 * If it is in core (in the inode structure), 64 * honor the locking protocol. 65 * If it is not in core, read it in from the 66 * specified device. 67 * Callers must check for mount points!! 68 * In all cases, a pointer to a locked 69 * inode structure is returned. 70 */ 71 iget(xp, ino, ipp) 72 struct inode *xp; 73 ino_t ino; 74 struct inode **ipp; 75 { 76 dev_t dev = xp->i_dev; 77 struct mount *mntp = ITOV(xp)->v_mount; 78 register struct fs *fs = VFSTOUFS(mntp)->um_fs; 79 extern struct vnodeops ufs_vnodeops, spec_inodeops; 80 register struct inode *ip, *iq; 81 register struct vnode *vp; 82 struct vnode *nvp; 83 struct buf *bp; 84 struct dinode *dp; 85 union ihead *ih; 86 int i, error; 87 88 ih = &ihead[INOHASH(dev, ino)]; 89 loop: 90 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) { 91 if (ino != ip->i_number || dev != ip->i_dev) 92 continue; 93 if ((ip->i_flag&ILOCKED) != 0) { 94 ip->i_flag |= IWANT; 95 sleep((caddr_t)ip, PINOD); 96 goto loop; 97 } 98 if (vget(ITOV(ip))) 99 goto loop; 100 *ipp = ip; 101 return(0); 102 } 103 /* 104 * Allocate a new inode. 105 */ 106 if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) { 107 *ipp = 0; 108 return (error); 109 } 110 ip = VTOI(nvp); 111 ip->i_vnode = nvp; 112 ip->i_flag = 0; 113 ip->i_devvp = 0; 114 ip->i_mode = 0; 115 ip->i_diroff = 0; 116 #ifdef QUOTA 117 for (i = 0; i < MAXQUOTAS; i++) 118 ip->i_dquot[i] = NODQUOT; 119 #endif 120 /* 121 * Put it onto its hash chain and lock it so that other requests for 122 * this inode will block if they arrive while we are sleeping waiting 123 * for old data structures to be purged or for the contents of the 124 * disk portion of this inode to be read. 125 */ 126 ip->i_dev = dev; 127 ip->i_number = ino; 128 insque(ip, ih); 129 ILOCK(ip); 130 /* 131 * Read in the disk contents for the inode. 132 */ 133 if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)), 134 (int)fs->fs_bsize, NOCRED, &bp)) { 135 /* 136 * The inode does not contain anything useful, so it would 137 * be misleading to leave it on its hash chain. 138 * Iput() will take care of putting it back on the free list. 139 */ 140 remque(ip); 141 ip->i_forw = ip; 142 ip->i_back = ip; 143 /* 144 * Unlock and discard unneeded inode. 145 */ 146 iput(ip); 147 brelse(bp); 148 *ipp = 0; 149 return (error); 150 } 151 dp = bp->b_un.b_dino; 152 dp += itoo(fs, ino); 153 ip->i_din = *dp; 154 brelse(bp); 155 /* 156 * Initialize the associated vnode 157 */ 158 vp = ITOV(ip); 159 vp->v_type = IFTOVT(ip->i_mode); 160 if (vp->v_type == VFIFO) { 161 #ifdef FIFO 162 extern struct vnodeops fifo_inodeops; 163 vp->v_op = &fifo_inodeops; 164 #else 165 iput(ip); 166 *ipp = 0; 167 return (EOPNOTSUPP); 168 #endif /* FIFO */ 169 } 170 if (vp->v_type == VCHR || vp->v_type == VBLK) { 171 vp->v_op = &spec_inodeops; 172 if (nvp = checkalias(vp, ip->i_rdev, mntp)) { 173 /* 174 * Reinitialize aliased inode. 175 */ 176 vp = nvp; 177 iq = VTOI(vp); 178 iq->i_vnode = vp; 179 iq->i_flag = 0; 180 ILOCK(iq); 181 iq->i_din = ip->i_din; 182 iq->i_dev = dev; 183 iq->i_number = ino; 184 insque(iq, ih); 185 /* 186 * Discard unneeded vnode 187 */ 188 ip->i_mode = 0; 189 iput(ip); 190 ip = iq; 191 } 192 } 193 if (ino == ROOTINO) 194 vp->v_flag |= VROOT; 195 /* 196 * Finish inode initialization. 197 */ 198 ip->i_fs = fs; 199 ip->i_devvp = VFSTOUFS(mntp)->um_devvp; 200 VREF(ip->i_devvp); 201 /* 202 * Set up a generation number for this inode if it does not 203 * already have one. This should only happen on old filesystems. 204 */ 205 if (ip->i_gen == 0) { 206 if (++nextgennumber < (u_long)time.tv_sec) 207 nextgennumber = time.tv_sec; 208 ip->i_gen = nextgennumber; 209 if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) 210 ip->i_flag |= IMOD; 211 } 212 *ipp = ip; 213 return (0); 214 } 215 216 /* 217 * Unlock and decrement the reference count of an inode structure. 218 */ 219 iput(ip) 220 register struct inode *ip; 221 { 222 223 if ((ip->i_flag & ILOCKED) == 0) 224 panic("iput"); 225 IUNLOCK(ip); 226 vrele(ITOV(ip)); 227 } 228 229 /* 230 * Last reference to an inode, write the inode out and if necessary, 231 * truncate and deallocate the file. 232 */ 233 ufs_inactive(vp) 234 struct vnode *vp; 235 { 236 register struct inode *ip = VTOI(vp); 237 int mode, error = 0; 238 239 if (prtactive && vp->v_usecount != 0) 240 vprint("ufs_inactive: pushing active", vp); 241 /* 242 * Get rid of inodes related to stale file handles. 243 */ 244 if (ip->i_mode == 0) { 245 if ((vp->v_flag & VXLOCK) == 0) 246 vgone(vp); 247 return (0); 248 } 249 ILOCK(ip); 250 if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { 251 #ifdef QUOTA 252 if (!getinoquota(ip)) 253 (void) chkiq(ip, -1, NOCRED, 0); 254 #endif 255 error = itrunc(ip, (u_long)0, 0); 256 mode = ip->i_mode; 257 ip->i_mode = 0; 258 ip->i_rdev = 0; 259 ip->i_flag |= IUPD|ICHG; 260 ifree(ip, ip->i_number, mode); 261 } 262 IUPDAT(ip, &time, &time, 0); 263 IUNLOCK(ip); 264 ip->i_flag = 0; 265 /* 266 * If we are done with the inode, reclaim it 267 * so that it can be reused immediately. 268 */ 269 if (vp->v_usecount == 0 && ip->i_mode == 0) 270 vgone(vp); 271 return (error); 272 } 273 274 /* 275 * Reclaim an inode so that it can be used for other purposes. 276 */ 277 ufs_reclaim(vp) 278 register struct vnode *vp; 279 { 280 register struct inode *ip = VTOI(vp); 281 int i; 282 283 if (prtactive && vp->v_usecount != 0) 284 vprint("ufs_reclaim: pushing active", vp); 285 /* 286 * Remove the inode from its hash chain. 287 */ 288 remque(ip); 289 ip->i_forw = ip; 290 ip->i_back = ip; 291 /* 292 * Purge old data structures associated with the inode. 293 */ 294 cache_purge(vp); 295 if (ip->i_devvp) { 296 vrele(ip->i_devvp); 297 ip->i_devvp = 0; 298 } 299 #ifdef QUOTA 300 for (i = 0; i < MAXQUOTAS; i++) { 301 if (ip->i_dquot[i] != NODQUOT) { 302 dqrele(vp, ip->i_dquot[i]); 303 ip->i_dquot[i] = NODQUOT; 304 } 305 } 306 #endif 307 ip->i_flag = 0; 308 return (0); 309 } 310 311 /* 312 * Check accessed and update flags on an inode structure. 313 * If any is on, update the inode with the current time. 314 * If waitfor is given, then must ensure I/O order, 315 * so wait for write to complete. 316 */ 317 iupdat(ip, ta, tm, waitfor) 318 register struct inode *ip; 319 struct timeval *ta, *tm; 320 int waitfor; 321 { 322 struct buf *bp; 323 struct vnode *vp = ITOV(ip); 324 struct dinode *dp; 325 register struct fs *fs; 326 int error; 327 328 fs = ip->i_fs; 329 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0) 330 return (0); 331 if (vp->v_mount->mnt_flag & MNT_RDONLY) 332 return (0); 333 error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)), 334 (int)fs->fs_bsize, NOCRED, &bp); 335 if (error) { 336 brelse(bp); 337 return (error); 338 } 339 if (ip->i_flag&IACC) 340 ip->i_atime = ta->tv_sec; 341 if (ip->i_flag&IUPD) 342 ip->i_mtime = tm->tv_sec; 343 if (ip->i_flag&ICHG) 344 ip->i_ctime = time.tv_sec; 345 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 346 dp = bp->b_un.b_dino + itoo(fs, ip->i_number); 347 *dp = ip->i_din; 348 if (waitfor) { 349 return (bwrite(bp)); 350 } else { 351 bdwrite(bp); 352 return (0); 353 } 354 } 355 356 #define SINGLE 0 /* index of single indirect block */ 357 #define DOUBLE 1 /* index of double indirect block */ 358 #define TRIPLE 2 /* index of triple indirect block */ 359 /* 360 * Truncate the inode ip to at most length size. Free affected disk 361 * blocks -- the blocks of the file are removed in reverse order. 362 * 363 * NB: triple indirect blocks are untested. 364 */ 365 itrunc(oip, length, flags) 366 register struct inode *oip; 367 u_long length; 368 int flags; 369 { 370 register daddr_t lastblock; 371 daddr_t bn, lbn, lastiblock[NIADDR]; 372 register struct fs *fs; 373 register struct inode *ip; 374 struct buf *bp; 375 int offset, osize, size, level; 376 long count, nblocks, blocksreleased = 0; 377 register int i; 378 int aflags, error, allerror; 379 struct inode tip; 380 381 if (oip->i_size <= length) { 382 oip->i_flag |= ICHG|IUPD; 383 error = iupdat(oip, &time, &time, 1); 384 return (error); 385 } 386 /* 387 * Calculate index into inode's block list of 388 * last direct and indirect blocks (if any) 389 * which we want to keep. Lastblock is -1 when 390 * the file is truncated to 0. 391 */ 392 fs = oip->i_fs; 393 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 394 lastiblock[SINGLE] = lastblock - NDADDR; 395 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 396 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 397 nblocks = btodb(fs->fs_bsize); 398 /* 399 * Update the size of the file. If the file is not being 400 * truncated to a block boundry, the contents of the 401 * partial block following the end of the file must be 402 * zero'ed in case it ever become accessable again because 403 * of subsequent file growth. 404 */ 405 osize = oip->i_size; 406 offset = blkoff(fs, length); 407 if (offset == 0) { 408 oip->i_size = length; 409 } else { 410 lbn = lblkno(fs, length); 411 aflags = B_CLRBUF; 412 if (flags & IO_SYNC) 413 aflags |= B_SYNC; 414 #ifdef QUOTA 415 if (error = getinoquota(oip)) 416 return (error); 417 #endif 418 if (error = balloc(oip, lbn, offset, &bp, aflags)) 419 return (error); 420 oip->i_size = length; 421 size = blksize(fs, oip, lbn); 422 bn = bp->b_blkno; 423 count = howmany(size, CLBYTES); 424 for (i = 0; i < count; i++) 425 munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE); 426 bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset)); 427 brealloc(bp, size); 428 if (flags & IO_SYNC) 429 bwrite(bp); 430 else 431 bdwrite(bp); 432 } 433 /* 434 * Update file and block pointers 435 * on disk before we start freeing blocks. 436 * If we crash before free'ing blocks below, 437 * the blocks will be returned to the free list. 438 * lastiblock values are also normalized to -1 439 * for calls to indirtrunc below. 440 */ 441 tip = *oip; 442 tip.i_size = osize; 443 for (level = TRIPLE; level >= SINGLE; level--) 444 if (lastiblock[level] < 0) { 445 oip->i_ib[level] = 0; 446 lastiblock[level] = -1; 447 } 448 for (i = NDADDR - 1; i > lastblock; i--) 449 oip->i_db[i] = 0; 450 oip->i_flag |= ICHG|IUPD; 451 vinvalbuf(ITOV(oip), (length > 0)); 452 allerror = iupdat(oip, &time, &time, MNT_WAIT); 453 454 /* 455 * Indirect blocks first. 456 */ 457 ip = &tip; 458 for (level = TRIPLE; level >= SINGLE; level--) { 459 bn = ip->i_ib[level]; 460 if (bn != 0) { 461 error = indirtrunc(ip, bn, lastiblock[level], level, 462 &count); 463 if (error) 464 allerror = error; 465 blocksreleased += count; 466 if (lastiblock[level] < 0) { 467 ip->i_ib[level] = 0; 468 blkfree(ip, bn, (off_t)fs->fs_bsize); 469 blocksreleased += nblocks; 470 } 471 } 472 if (lastiblock[level] >= 0) 473 goto done; 474 } 475 476 /* 477 * All whole direct blocks or frags. 478 */ 479 for (i = NDADDR - 1; i > lastblock; i--) { 480 register off_t bsize; 481 482 bn = ip->i_db[i]; 483 if (bn == 0) 484 continue; 485 ip->i_db[i] = 0; 486 bsize = (off_t)blksize(fs, ip, i); 487 blkfree(ip, bn, bsize); 488 blocksreleased += btodb(bsize); 489 } 490 if (lastblock < 0) 491 goto done; 492 493 /* 494 * Finally, look for a change in size of the 495 * last direct block; release any frags. 496 */ 497 bn = ip->i_db[lastblock]; 498 if (bn != 0) { 499 off_t oldspace, newspace; 500 501 /* 502 * Calculate amount of space we're giving 503 * back as old block size minus new block size. 504 */ 505 oldspace = blksize(fs, ip, lastblock); 506 ip->i_size = length; 507 newspace = blksize(fs, ip, lastblock); 508 if (newspace == 0) 509 panic("itrunc: newspace"); 510 if (oldspace - newspace > 0) { 511 /* 512 * Block number of space to be free'd is 513 * the old block # plus the number of frags 514 * required for the storage we're keeping. 515 */ 516 bn += numfrags(fs, newspace); 517 blkfree(ip, bn, oldspace - newspace); 518 blocksreleased += btodb(oldspace - newspace); 519 } 520 } 521 done: 522 /* BEGIN PARANOIA */ 523 for (level = SINGLE; level <= TRIPLE; level++) 524 if (ip->i_ib[level] != oip->i_ib[level]) 525 panic("itrunc1"); 526 for (i = 0; i < NDADDR; i++) 527 if (ip->i_db[i] != oip->i_db[i]) 528 panic("itrunc2"); 529 /* END PARANOIA */ 530 oip->i_blocks -= blocksreleased; 531 if (oip->i_blocks < 0) /* sanity */ 532 oip->i_blocks = 0; 533 oip->i_flag |= ICHG; 534 #ifdef QUOTA 535 if (!getinoquota(oip)) 536 (void) chkdq(oip, -blocksreleased, NOCRED, 0); 537 #endif 538 return (allerror); 539 } 540 541 /* 542 * Release blocks associated with the inode ip and 543 * stored in the indirect block bn. Blocks are free'd 544 * in LIFO order up to (but not including) lastbn. If 545 * level is greater than SINGLE, the block is an indirect 546 * block and recursive calls to indirtrunc must be used to 547 * cleanse other indirect blocks. 548 * 549 * NB: triple indirect blocks are untested. 550 */ 551 indirtrunc(ip, bn, lastbn, level, countp) 552 register struct inode *ip; 553 daddr_t bn, lastbn; 554 int level; 555 long *countp; 556 { 557 register int i; 558 struct buf *bp; 559 register struct fs *fs = ip->i_fs; 560 register daddr_t *bap; 561 daddr_t *copy, nb, last; 562 long blkcount, factor; 563 int nblocks, blocksreleased = 0; 564 int error, allerror = 0; 565 566 /* 567 * Calculate index in current block of last 568 * block to be kept. -1 indicates the entire 569 * block so we need not calculate the index. 570 */ 571 factor = 1; 572 for (i = SINGLE; i < level; i++) 573 factor *= NINDIR(fs); 574 last = lastbn; 575 if (lastbn > 0) 576 last /= factor; 577 nblocks = btodb(fs->fs_bsize); 578 /* 579 * Get buffer of block pointers, zero those 580 * entries corresponding to blocks to be free'd, 581 * and update on disk copy first. 582 */ 583 error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize, 584 NOCRED, &bp); 585 if (error) { 586 brelse(bp); 587 *countp = 0; 588 return (error); 589 } 590 bap = bp->b_un.b_daddr; 591 MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK); 592 bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); 593 bzero((caddr_t)&bap[last + 1], 594 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 595 if (last == -1) 596 bp->b_flags |= B_INVAL; 597 error = bwrite(bp); 598 if (error) 599 allerror = error; 600 bap = copy; 601 602 /* 603 * Recursively free totally unused blocks. 604 */ 605 for (i = NINDIR(fs) - 1; i > last; i--) { 606 nb = bap[i]; 607 if (nb == 0) 608 continue; 609 if (level > SINGLE) { 610 error = indirtrunc(ip, nb, (daddr_t)-1, level - 1, 611 &blkcount); 612 if (error) 613 allerror = error; 614 blocksreleased += blkcount; 615 } 616 blkfree(ip, nb, (off_t)fs->fs_bsize); 617 blocksreleased += nblocks; 618 } 619 620 /* 621 * Recursively free last partial block. 622 */ 623 if (level > SINGLE && lastbn >= 0) { 624 last = lastbn % factor; 625 nb = bap[i]; 626 if (nb != 0) { 627 error = indirtrunc(ip, nb, last, level - 1, &blkcount); 628 if (error) 629 allerror = error; 630 blocksreleased += blkcount; 631 } 632 } 633 FREE(copy, M_TEMP); 634 *countp = blocksreleased; 635 return (allerror); 636 } 637 638 /* 639 * Lock an inode. If its already locked, set the WANT bit and sleep. 640 */ 641 ilock(ip) 642 register struct inode *ip; 643 { 644 645 while (ip->i_flag & ILOCKED) { 646 ip->i_flag |= IWANT; 647 if (ip->i_spare0 == u.u_procp->p_pid) 648 panic("locking against myself"); 649 ip->i_spare1 = u.u_procp->p_pid; 650 (void) sleep((caddr_t)ip, PINOD); 651 } 652 ip->i_spare1 = 0; 653 ip->i_spare0 = u.u_procp->p_pid; 654 u.u_spare[0]++; 655 ip->i_flag |= ILOCKED; 656 } 657 658 /* 659 * Unlock an inode. If WANT bit is on, wakeup. 660 */ 661 iunlock(ip) 662 register struct inode *ip; 663 { 664 665 if ((ip->i_flag & ILOCKED) == 0) 666 vprint("iunlock: unlocked inode", ITOV(ip)); 667 ip->i_spare0 = 0; 668 u.u_spare[0]--; 669 ip->i_flag &= ~ILOCKED; 670 if (ip->i_flag&IWANT) { 671 ip->i_flag &= ~IWANT; 672 wakeup((caddr_t)ip); 673 } 674 } 675