1 /* ufs_inode.c 6.15 85/03/20 */ 2 3 #include "param.h" 4 #include "systm.h" 5 #include "mount.h" 6 #include "dir.h" 7 #include "user.h" 8 #include "inode.h" 9 #include "fs.h" 10 #include "buf.h" 11 #ifdef QUOTA 12 #include "quota.h" 13 #endif 14 #include "kernel.h" 15 16 #define INOHSZ 512 17 #if ((INOHSZ&(INOHSZ-1)) == 0) 18 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 19 #else 20 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 21 #endif 22 23 union ihead { /* inode LRU cache, Chris Maltby */ 24 union ihead *ih_head[2]; 25 struct inode *ih_chain[2]; 26 } ihead[INOHSZ]; 27 28 struct inode *ifreeh, **ifreet; 29 30 /* 31 * Initialize hash links for inodes 32 * and build inode free list. 33 */ 34 ihinit() 35 { 36 register int i; 37 register struct inode *ip = inode; 38 register union ihead *ih = ihead; 39 40 for (i = INOHSZ; --i >= 0; ih++) { 41 ih->ih_head[0] = ih; 42 ih->ih_head[1] = ih; 43 } 44 ifreeh = ip; 45 ifreet = &ip->i_freef; 46 ip->i_freeb = &ifreeh; 47 ip->i_forw = ip; 48 ip->i_back = ip; 49 for (i = ninode; --i > 0; ) { 50 ++ip; 51 ip->i_forw = ip; 52 ip->i_back = ip; 53 *ifreet = ip; 54 ip->i_freeb = ifreet; 55 ifreet = &ip->i_freef; 56 } 57 ip->i_freef = NULL; 58 } 59 60 #ifdef notdef 61 /* 62 * Find an inode if it is incore. 63 * This is the equivalent, for inodes, 64 * of ``incore'' in bio.c or ``pfind'' in subr.c. 65 */ 66 struct inode * 67 ifind(dev, ino) 68 dev_t dev; 69 ino_t ino; 70 { 71 register struct inode *ip; 72 register union ihead *ih; 73 74 ih = &ihead[INOHASH(dev, ino)]; 75 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) 76 if (ino==ip->i_number && dev==ip->i_dev) 77 return (ip); 78 return ((struct inode *)0); 79 } 80 #endif notdef 81 82 /* 83 * Look up an inode by device,inumber. 84 * If it is in core (in the inode structure), 85 * honor the locking protocol. 86 * If it is not in core, read it in from the 87 * specified device. 88 * If the inode is mounted on, perform 89 * the indicated indirection. 90 * In all cases, a pointer to a locked 91 * inode structure is returned. 92 * 93 * panic: no imt -- if the mounted file 94 * system is not in the mount table. 95 * "cannot happen" 96 */ 97 struct inode * 98 iget(dev, fs, ino) 99 dev_t dev; 100 register struct fs *fs; 101 ino_t ino; 102 { 103 register struct inode *ip; 104 register union ihead *ih; 105 register struct mount *mp; 106 register struct buf *bp; 107 register struct dinode *dp; 108 register struct inode *iq; 109 110 111 loop: 112 ih = &ihead[INOHASH(dev, ino)]; 113 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) 114 if (ino == ip->i_number && dev == ip->i_dev) { 115 /* 116 * Following is essentially an inline expanded 117 * copy of igrab(), expanded inline for speed, 118 * and so that the test for a mounted on inode 119 * can be deferred until after we are sure that 120 * the inode isn't busy. 121 */ 122 if ((ip->i_flag&ILOCKED) != 0) { 123 ip->i_flag |= IWANT; 124 sleep((caddr_t)ip, PINOD); 125 goto loop; 126 } 127 if ((ip->i_flag&IMOUNT) != 0) { 128 for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) 129 if(mp->m_inodp == ip) { 130 dev = mp->m_dev; 131 fs = mp->m_bufp->b_un.b_fs; 132 ino = ROOTINO; 133 goto loop; 134 } 135 panic("no imt"); 136 } 137 if (ip->i_count == 0) { /* ino on free list */ 138 if (iq = ip->i_freef) 139 iq->i_freeb = ip->i_freeb; 140 else 141 ifreet = ip->i_freeb; 142 *ip->i_freeb = iq; 143 ip->i_freef = NULL; 144 ip->i_freeb = NULL; 145 } 146 ip->i_count++; 147 ip->i_flag |= ILOCKED; 148 return(ip); 149 } 150 151 if ((ip = ifreeh) == NULL) { 152 tablefull("inode"); 153 u.u_error = ENFILE; 154 return(NULL); 155 } 156 if (ip->i_count) 157 panic("free inode isn't"); 158 if (iq = ip->i_freef) 159 iq->i_freeb = &ifreeh; 160 ifreeh = iq; 161 ip->i_freef = NULL; 162 ip->i_freeb = NULL; 163 /* 164 * Now to take inode off the hash chain it was on 165 * (initially, or after an iflush, it is on a "hash chain" 166 * consisting entirely of itself, and pointed to by no-one, 167 * but that doesn't matter), and put it on the chain for 168 * its new (ino, dev) pair 169 */ 170 remque(ip); 171 insque(ip, ih); 172 ip->i_dev = dev; 173 ip->i_fs = fs; 174 ip->i_number = ino; 175 cacheinval(ip); 176 ip->i_flag = ILOCKED; 177 ip->i_count++; 178 ip->i_lastr = 0; 179 #ifdef QUOTA 180 dqrele(ip->i_dquot); 181 #endif 182 bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize); 183 /* 184 * Check I/O errors 185 */ 186 if ((bp->b_flags&B_ERROR) != 0) { 187 brelse(bp); 188 /* 189 * the inode doesn't contain anything useful, so it would 190 * be misleading to leave it on its hash chain. 191 * 'iput' will take care of putting it back on the free list. 192 */ 193 remque(ip); 194 ip->i_forw = ip; 195 ip->i_back = ip; 196 /* 197 * we also loose its inumber, just in case (as iput 198 * doesn't do that any more) - but as it isn't on its 199 * hash chain, I doubt if this is really necessary .. kre 200 * (probably the two methods are interchangable) 201 */ 202 ip->i_number = 0; 203 #ifdef QUOTA 204 ip->i_dquot = NODQUOT; 205 #endif 206 iput(ip); 207 return(NULL); 208 } 209 dp = bp->b_un.b_dino; 210 dp += itoo(fs, ino); 211 ip->i_ic = dp->di_ic; 212 brelse(bp); 213 #ifdef QUOTA 214 if (ip->i_mode == 0) 215 ip->i_dquot = NODQUOT; 216 else 217 ip->i_dquot = inoquota(ip); 218 #endif 219 return (ip); 220 } 221 222 /* 223 * Convert a pointer to an inode into a reference to an inode. 224 * 225 * This is basically the internal piece of iget (after the 226 * inode pointer is located) but without the test for mounted 227 * filesystems. It is caller's responsibility to check that 228 * the inode pointer is valid. 229 */ 230 igrab(ip) 231 register struct inode *ip; 232 { 233 while ((ip->i_flag&ILOCKED) != 0) { 234 ip->i_flag |= IWANT; 235 sleep((caddr_t)ip, PINOD); 236 } 237 if (ip->i_count == 0) { /* ino on free list */ 238 register struct inode *iq; 239 240 if (iq = ip->i_freef) 241 iq->i_freeb = ip->i_freeb; 242 else 243 ifreet = ip->i_freeb; 244 *ip->i_freeb = iq; 245 ip->i_freef = NULL; 246 ip->i_freeb = NULL; 247 } 248 ip->i_count++; 249 ip->i_flag |= ILOCKED; 250 } 251 252 /* 253 * Decrement reference count of 254 * an inode structure. 255 * On the last reference, 256 * write the inode out and if necessary, 257 * truncate and deallocate the file. 258 */ 259 iput(ip) 260 register struct inode *ip; 261 { 262 263 if ((ip->i_flag & ILOCKED) == 0) 264 panic("iput"); 265 IUNLOCK(ip); 266 irele(ip); 267 } 268 269 irele(ip) 270 register struct inode *ip; 271 { 272 int mode; 273 274 if (ip->i_count == 1) { 275 ip->i_flag |= ILOCKED; 276 if (ip->i_nlink <= 0 && ip->i_fs->fs_ronly == 0) { 277 itrunc(ip, (u_long)0); 278 mode = ip->i_mode; 279 ip->i_mode = 0; 280 ip->i_rdev = 0; 281 ip->i_flag |= IUPD|ICHG; 282 ifree(ip, ip->i_number, mode); 283 #ifdef QUOTA 284 (void) chkiq(ip->i_dev, ip, ip->i_uid, 0); 285 dqrele(ip->i_dquot); 286 ip->i_dquot = NODQUOT; 287 #endif 288 } 289 IUPDAT(ip, &time, &time, 0); 290 IUNLOCK(ip); 291 ip->i_flag = 0; 292 /* 293 * Put the inode on the end of the free list. 294 * Possibly in some cases it would be better to 295 * put the inode at the head of the free list, 296 * (eg: where i_mode == 0 || i_number == 0) 297 * but I will think about that later .. kre 298 * (i_number is rarely 0 - only after an i/o error in iget, 299 * where i_mode == 0, the inode will probably be wanted 300 * again soon for an ialloc, so possibly we should keep it) 301 */ 302 if (ifreeh) { 303 *ifreet = ip; 304 ip->i_freeb = ifreet; 305 } else { 306 ifreeh = ip; 307 ip->i_freeb = &ifreeh; 308 } 309 ip->i_freef = NULL; 310 ifreet = &ip->i_freef; 311 } else if (!(ip->i_flag & ILOCKED)) 312 ITIMES(ip, &time, &time); 313 ip->i_count--; 314 } 315 316 /* 317 * Check accessed and update flags on 318 * an inode structure. 319 * If any is on, update the inode 320 * with the current time. 321 * If waitfor is given, then must insure 322 * i/o order so wait for write to complete. 323 */ 324 iupdat(ip, ta, tm, waitfor) 325 register struct inode *ip; 326 struct timeval *ta, *tm; 327 int waitfor; 328 { 329 register struct buf *bp; 330 struct dinode *dp; 331 register struct fs *fp; 332 333 fp = ip->i_fs; 334 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) { 335 if (fp->fs_ronly) 336 return; 337 bp = bread(ip->i_dev, fsbtodb(fp, itod(fp, ip->i_number)), 338 (int)fp->fs_bsize); 339 if (bp->b_flags & B_ERROR) { 340 brelse(bp); 341 return; 342 } 343 if (ip->i_flag&IACC) 344 ip->i_atime = ta->tv_sec; 345 if (ip->i_flag&IUPD) 346 ip->i_mtime = tm->tv_sec; 347 if (ip->i_flag&ICHG) 348 ip->i_ctime = time.tv_sec; 349 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 350 dp = bp->b_un.b_dino + itoo(fp, ip->i_number); 351 dp->di_ic = ip->i_ic; 352 if (waitfor) 353 bwrite(bp); 354 else 355 bdwrite(bp); 356 } 357 } 358 359 #define SINGLE 0 /* index of single indirect block */ 360 #define DOUBLE 1 /* index of double indirect block */ 361 #define TRIPLE 2 /* index of triple indirect block */ 362 /* 363 * Truncate the inode ip to at most 364 * length size. Free affected disk 365 * blocks -- the blocks of the file 366 * are removed in reverse order. 367 * 368 * NB: triple indirect blocks are untested. 369 */ 370 itrunc(oip, length) 371 register struct inode *oip; 372 u_long length; 373 { 374 register daddr_t lastblock; 375 daddr_t bn, lastiblock[NIADDR]; 376 register struct fs *fs; 377 register struct inode *ip; 378 struct buf *bp; 379 int offset, lbn, osize, size, error, count, level, s; 380 long nblocks, blocksreleased = 0; 381 register int i; 382 dev_t dev; 383 struct inode tip; 384 extern long indirtrunc(); 385 extern struct cmap *mfind(); 386 387 if (oip->i_size <= length) { 388 oip->i_flag |= ICHG|IUPD; 389 iupdat(oip, &time, &time, 1); 390 return; 391 } 392 /* 393 * Calculate index into inode's block list of 394 * last direct and indirect blocks (if any) 395 * which we want to keep. Lastblock is -1 when 396 * the file is truncated to 0. 397 */ 398 fs = oip->i_fs; 399 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 400 lastiblock[SINGLE] = lastblock - NDADDR; 401 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 402 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 403 nblocks = btodb(fs->fs_bsize); 404 /* 405 * Update the size of the file. If the file is not being 406 * truncated to a block boundry, the contents of the 407 * partial block following the end of the file must be 408 * zero'ed in case it ever become accessable again because 409 * of subsequent file growth. 410 */ 411 osize = oip->i_size; 412 offset = blkoff(fs, length); 413 if (offset == 0) { 414 oip->i_size = length; 415 } else { 416 lbn = lblkno(fs, length); 417 bn = fsbtodb(fs, bmap(oip, lbn, B_WRITE, offset)); 418 if (u.u_error || (long)bn < 0) 419 return; 420 oip->i_size = length; 421 size = blksize(fs, oip, lbn); 422 count = howmany(size, DEV_BSIZE); 423 dev = oip->i_dev; 424 s = splimp(); 425 for (i = 0; i < count; i += CLSIZE) 426 if (mfind(dev, bn + i)) 427 munhash(dev, bn + i); 428 splx(s); 429 bp = bread(dev, bn, size); 430 if (bp->b_flags & B_ERROR) { 431 u.u_error = EIO; 432 oip->i_size = osize; 433 brelse(bp); 434 return; 435 } 436 bzero(bp->b_un.b_addr + offset, size - offset); 437 bdwrite(bp); 438 } 439 /* 440 * Update file and block pointers 441 * on disk before we start freeing blocks. 442 * If we crash before free'ing blocks below, 443 * the blocks will be returned to the free list. 444 * lastiblock values are also normalized to -1 445 * for calls to indirtrunc below. 446 */ 447 tip = *oip; 448 tip.i_size = osize; 449 for (level = TRIPLE; level >= SINGLE; level--) 450 if (lastiblock[level] < 0) { 451 oip->i_ib[level] = 0; 452 lastiblock[level] = -1; 453 } 454 for (i = NDADDR - 1; i > lastblock; i--) 455 oip->i_db[i] = 0; 456 oip->i_flag |= ICHG|IUPD; 457 syncip(oip); 458 459 /* 460 * Indirect blocks first. 461 */ 462 ip = &tip; 463 for (level = TRIPLE; level >= SINGLE; level--) { 464 bn = ip->i_ib[level]; 465 if (bn != 0) { 466 blocksreleased += 467 indirtrunc(ip, bn, lastiblock[level], level); 468 if (lastiblock[level] < 0) { 469 ip->i_ib[level] = 0; 470 free(ip, bn, (off_t)fs->fs_bsize); 471 blocksreleased += nblocks; 472 } 473 } 474 if (lastiblock[level] >= 0) 475 goto done; 476 } 477 478 /* 479 * All whole direct blocks or frags. 480 */ 481 for (i = NDADDR - 1; i > lastblock; i--) { 482 register int size; 483 484 bn = ip->i_db[i]; 485 if (bn == 0) 486 continue; 487 ip->i_db[i] = 0; 488 size = (off_t)blksize(fs, ip, i); 489 free(ip, bn, size); 490 blocksreleased += btodb(size); 491 } 492 if (lastblock < 0) 493 goto done; 494 495 /* 496 * Finally, look for a change in size of the 497 * last direct block; release any frags. 498 */ 499 bn = ip->i_db[lastblock]; 500 if (bn != 0) { 501 int oldspace, newspace; 502 503 /* 504 * Calculate amount of space we're giving 505 * back as old block size minus new block size. 506 */ 507 oldspace = blksize(fs, ip, lastblock); 508 ip->i_size = length; 509 newspace = blksize(fs, ip, lastblock); 510 if (newspace == 0) 511 panic("itrunc: newspace"); 512 if (oldspace - newspace > 0) { 513 /* 514 * Block number of space to be free'd is 515 * the old block # plus the number of frags 516 * required for the storage we're keeping. 517 */ 518 bn += numfrags(fs, newspace); 519 free(ip, bn, oldspace - newspace); 520 blocksreleased += btodb(oldspace - newspace); 521 } 522 } 523 done: 524 /* BEGIN PARANOIA */ 525 for (level = SINGLE; level <= TRIPLE; level++) 526 if (ip->i_ib[level] != oip->i_ib[level]) 527 panic("itrunc1"); 528 for (i = 0; i < NDADDR; i++) 529 if (ip->i_db[i] != oip->i_db[i]) 530 panic("itrunc2"); 531 /* END PARANOIA */ 532 oip->i_blocks -= blocksreleased; 533 if (oip->i_blocks < 0) /* sanity */ 534 oip->i_blocks = 0; 535 oip->i_flag |= ICHG; 536 #ifdef QUOTA 537 (void) chkdq(oip, -blocksreleased, 0); 538 #endif 539 } 540 541 /* 542 * Release blocks associated with the inode ip and 543 * stored in the indirect block bn. Blocks are free'd 544 * in LIFO order up to (but not including) lastbn. If 545 * level is greater than SINGLE, the block is an indirect 546 * block and recursive calls to indirtrunc must be used to 547 * cleanse other indirect blocks. 548 * 549 * NB: triple indirect blocks are untested. 550 */ 551 long 552 indirtrunc(ip, bn, lastbn, level) 553 register struct inode *ip; 554 daddr_t bn, lastbn; 555 int level; 556 { 557 register int i; 558 struct buf *bp, *copy; 559 register daddr_t *bap; 560 register struct fs *fs = ip->i_fs; 561 daddr_t nb, last; 562 long factor; 563 int blocksreleased = 0, nblocks; 564 565 /* 566 * Calculate index in current block of last 567 * block to be kept. -1 indicates the entire 568 * block so we need not calculate the index. 569 */ 570 factor = 1; 571 for (i = SINGLE; i < level; i++) 572 factor *= NINDIR(fs); 573 last = lastbn; 574 if (lastbn > 0) 575 last /= factor; 576 nblocks = btodb(fs->fs_bsize); 577 /* 578 * Get buffer of block pointers, zero those 579 * entries corresponding to blocks to be free'd, 580 * and update on disk copy first. 581 */ 582 copy = geteblk((int)fs->fs_bsize); 583 bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize); 584 if (bp->b_flags&B_ERROR) { 585 brelse(copy); 586 brelse(bp); 587 return (0); 588 } 589 bap = bp->b_un.b_daddr; 590 bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize); 591 bzero((caddr_t)&bap[last + 1], 592 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 593 bwrite(bp); 594 bp = copy, bap = bp->b_un.b_daddr; 595 596 /* 597 * Recursively free totally unused blocks. 598 */ 599 for (i = NINDIR(fs) - 1; i > last; i--) { 600 nb = bap[i]; 601 if (nb == 0) 602 continue; 603 if (level > SINGLE) 604 blocksreleased += 605 indirtrunc(ip, nb, (daddr_t)-1, level - 1); 606 free(ip, nb, (int)fs->fs_bsize); 607 blocksreleased += nblocks; 608 } 609 610 /* 611 * Recursively free last partial block. 612 */ 613 if (level > SINGLE && lastbn >= 0) { 614 last = lastbn % factor; 615 nb = bap[i]; 616 if (nb != 0) 617 blocksreleased += indirtrunc(ip, nb, last, level - 1); 618 } 619 brelse(bp); 620 return (blocksreleased); 621 } 622 623 /* 624 * remove any inodes in the inode cache belonging to dev 625 * 626 * There should not be any active ones, return error if any are found 627 * (nb: this is a user error, not a system err) 628 * 629 * Also, count the references to dev by block devices - this really 630 * has nothing to do with the object of the procedure, but as we have 631 * to scan the inode table here anyway, we might as well get the 632 * extra benefit. 633 * 634 * this is called from sumount()/sys3.c when dev is being unmounted 635 */ 636 #ifdef QUOTA 637 iflush(dev, iq) 638 dev_t dev; 639 struct inode *iq; 640 #else 641 iflush(dev) 642 dev_t dev; 643 #endif 644 { 645 register struct inode *ip; 646 register open = 0; 647 648 for (ip = inode; ip < inodeNINODE; ip++) { 649 #ifdef QUOTA 650 if (ip != iq && ip->i_dev == dev) 651 #else 652 if (ip->i_dev == dev) 653 #endif 654 if (ip->i_count) 655 return(-1); 656 else { 657 remque(ip); 658 ip->i_forw = ip; 659 ip->i_back = ip; 660 /* 661 * as i_count == 0, the inode was on the free 662 * list already, just leave it there, it will 663 * fall off the bottom eventually. We could 664 * perhaps move it to the head of the free 665 * list, but as umounts are done so 666 * infrequently, we would gain very little, 667 * while making the code bigger. 668 */ 669 #ifdef QUOTA 670 dqrele(ip->i_dquot); 671 ip->i_dquot = NODQUOT; 672 #endif 673 } 674 else if (ip->i_count && (ip->i_mode&IFMT)==IFBLK && 675 ip->i_rdev == dev) 676 open++; 677 } 678 return (open); 679 } 680 681 /* 682 * Lock an inode. If its already locked, set the WANT bit and sleep. 683 */ 684 ilock(ip) 685 register struct inode *ip; 686 { 687 688 ILOCK(ip); 689 } 690 691 /* 692 * Unlock an inode. If WANT bit is on, wakeup. 693 */ 694 iunlock(ip) 695 register struct inode *ip; 696 { 697 698 IUNLOCK(ip); 699 } 700