1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)lfs_inode.c 7.5 (Berkeley) 06/21/87 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "mount.h" 12 #include "dir.h" 13 #include "user.h" 14 #include "inode.h" 15 #include "fs.h" 16 #include "buf.h" 17 #include "cmap.h" 18 #ifdef QUOTA 19 #include "quota.h" 20 #endif 21 #include "kernel.h" 22 #include "malloc.h" 23 24 #define INOHSZ 512 25 #if ((INOHSZ&(INOHSZ-1)) == 0) 26 #define INOHASH(dev,ino) (((dev)+(ino))&(INOHSZ-1)) 27 #else 28 #define INOHASH(dev,ino) (((unsigned)((dev)+(ino)))%INOHSZ) 29 #endif 30 31 union ihead { /* inode LRU cache, Chris Maltby */ 32 union ihead *ih_head[2]; 33 struct inode *ih_chain[2]; 34 } ihead[INOHSZ]; 35 36 struct inode *ifreeh, **ifreet; 37 38 /* 39 * Initialize hash links for inodes 40 * and build inode free list. 41 */ 42 ihinit() 43 { 44 register int i; 45 register struct inode *ip = inode; 46 register union ihead *ih = ihead; 47 48 for (i = INOHSZ; --i >= 0; ih++) { 49 ih->ih_head[0] = ih; 50 ih->ih_head[1] = ih; 51 } 52 ifreeh = ip; 53 ifreet = &ip->i_freef; 54 ip->i_freeb = &ifreeh; 55 ip->i_forw = ip; 56 ip->i_back = ip; 57 for (i = ninode; --i > 0; ) { 58 ++ip; 59 ip->i_forw = ip; 60 ip->i_back = ip; 61 *ifreet = ip; 62 ip->i_freeb = ifreet; 63 ifreet = &ip->i_freef; 64 } 65 ip->i_freef = NULL; 66 } 67 68 #ifdef notdef 69 /* 70 * Find an inode if it is incore. 71 * This is the equivalent, for inodes, 72 * of ``incore'' in bio.c or ``pfind'' in subr.c. 73 */ 74 struct inode * 75 ifind(dev, ino) 76 dev_t dev; 77 ino_t ino; 78 { 79 register struct inode *ip; 80 register union ihead *ih; 81 82 ih = &ihead[INOHASH(dev, ino)]; 83 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) 84 if (ino==ip->i_number && dev==ip->i_dev) 85 return (ip); 86 return ((struct inode *)0); 87 } 88 #endif notdef 89 90 /* 91 * Look up an inode by device,inumber. 92 * If it is in core (in the inode structure), 93 * honor the locking protocol. 94 * If it is not in core, read it in from the 95 * specified device. 96 * If the inode is mounted on, perform 97 * the indicated indirection. 98 * In all cases, a pointer to a locked 99 * inode structure is returned. 100 * 101 * panic: no imt -- if the mounted file 102 * system is not in the mount table. 103 * "cannot happen" 104 */ 105 struct inode * 106 iget(dev, fs, ino) 107 dev_t dev; 108 register struct fs *fs; 109 ino_t ino; 110 { 111 register struct inode *ip; 112 register union ihead *ih; 113 register struct mount *mp; 114 register struct buf *bp; 115 register struct dinode *dp; 116 register struct inode *iq; 117 118 loop: 119 ih = &ihead[INOHASH(dev, ino)]; 120 for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) 121 if (ino == ip->i_number && dev == ip->i_dev) { 122 /* 123 * Following is essentially an inline expanded 124 * copy of igrab(), expanded inline for speed, 125 * and so that the test for a mounted on inode 126 * can be deferred until after we are sure that 127 * the inode isn't busy. 128 */ 129 if ((ip->i_flag&ILOCKED) != 0) { 130 ip->i_flag |= IWANT; 131 sleep((caddr_t)ip, PINOD); 132 goto loop; 133 } 134 if ((ip->i_flag&IMOUNT) != 0) { 135 for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++) 136 if(mp->m_inodp == ip) { 137 dev = mp->m_dev; 138 fs = mp->m_fs; 139 ino = ROOTINO; 140 goto loop; 141 } 142 panic("no imt"); 143 } 144 if (ip->i_count == 0) { /* ino on free list */ 145 if (iq = ip->i_freef) 146 iq->i_freeb = ip->i_freeb; 147 else 148 ifreet = ip->i_freeb; 149 *ip->i_freeb = iq; 150 ip->i_freef = NULL; 151 ip->i_freeb = NULL; 152 } 153 ip->i_count++; 154 ip->i_flag |= ILOCKED; 155 return(ip); 156 } 157 158 if ((ip = ifreeh) == NULL) { 159 tablefull("inode"); 160 u.u_error = ENFILE; 161 return(NULL); 162 } 163 if (ip->i_count) 164 panic("free inode isn't"); 165 if (iq = ip->i_freef) 166 iq->i_freeb = &ifreeh; 167 ifreeh = iq; 168 ip->i_freef = NULL; 169 ip->i_freeb = NULL; 170 /* 171 * Now to take inode off the hash chain it was on 172 * (initially, or after an iflush, it is on a "hash chain" 173 * consisting entirely of itself, and pointed to by no-one, 174 * but that doesn't matter), and put it on the chain for 175 * its new (ino, dev) pair 176 */ 177 remque(ip); 178 insque(ip, ih); 179 ip->i_dev = dev; 180 ip->i_fs = fs; 181 ip->i_number = ino; 182 cacheinval(ip); 183 ip->i_flag = ILOCKED; 184 ip->i_count++; 185 ip->i_lastr = 0; 186 #ifdef QUOTA 187 dqrele(ip->i_dquot); 188 #endif 189 bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize); 190 /* 191 * Check I/O errors 192 */ 193 if ((bp->b_flags&B_ERROR) != 0) { 194 brelse(bp); 195 /* 196 * the inode doesn't contain anything useful, so it would 197 * be misleading to leave it on its hash chain. 198 * 'iput' will take care of putting it back on the free list. 199 */ 200 remque(ip); 201 ip->i_forw = ip; 202 ip->i_back = ip; 203 /* 204 * we also loose its inumber, just in case (as iput 205 * doesn't do that any more) - but as it isn't on its 206 * hash chain, I doubt if this is really necessary .. kre 207 * (probably the two methods are interchangable) 208 */ 209 ip->i_number = 0; 210 #ifdef QUOTA 211 ip->i_dquot = NODQUOT; 212 #endif 213 iput(ip); 214 return(NULL); 215 } 216 dp = bp->b_un.b_dino; 217 dp += itoo(fs, ino); 218 ip->i_ic = dp->di_ic; 219 brelse(bp); 220 #ifdef QUOTA 221 if (ip->i_mode == 0) 222 ip->i_dquot = NODQUOT; 223 else 224 ip->i_dquot = inoquota(ip); 225 #endif 226 return (ip); 227 } 228 229 /* 230 * Convert a pointer to an inode into a reference to an inode. 231 * 232 * This is basically the internal piece of iget (after the 233 * inode pointer is located) but without the test for mounted 234 * filesystems. It is caller's responsibility to check that 235 * the inode pointer is valid. 236 */ 237 igrab(ip) 238 register struct inode *ip; 239 { 240 while ((ip->i_flag&ILOCKED) != 0) { 241 ip->i_flag |= IWANT; 242 sleep((caddr_t)ip, PINOD); 243 } 244 if (ip->i_count == 0) { /* ino on free list */ 245 register struct inode *iq; 246 247 if (iq = ip->i_freef) 248 iq->i_freeb = ip->i_freeb; 249 else 250 ifreet = ip->i_freeb; 251 *ip->i_freeb = iq; 252 ip->i_freef = NULL; 253 ip->i_freeb = NULL; 254 } 255 ip->i_count++; 256 ip->i_flag |= ILOCKED; 257 } 258 259 /* 260 * Decrement reference count of 261 * an inode structure. 262 * On the last reference, 263 * write the inode out and if necessary, 264 * truncate and deallocate the file. 265 */ 266 iput(ip) 267 register struct inode *ip; 268 { 269 270 if ((ip->i_flag & ILOCKED) == 0) 271 panic("iput"); 272 IUNLOCK(ip); 273 irele(ip); 274 } 275 276 irele(ip) 277 register struct inode *ip; 278 { 279 int mode; 280 281 if (ip->i_count == 1) { 282 ip->i_flag |= ILOCKED; 283 if (ip->i_nlink <= 0 && ip->i_fs->fs_ronly == 0) { 284 itrunc(ip, (u_long)0); 285 mode = ip->i_mode; 286 ip->i_mode = 0; 287 ip->i_rdev = 0; 288 ip->i_flag |= IUPD|ICHG; 289 ifree(ip, ip->i_number, mode); 290 #ifdef QUOTA 291 (void) chkiq(ip->i_dev, ip, ip->i_uid, 0); 292 dqrele(ip->i_dquot); 293 ip->i_dquot = NODQUOT; 294 #endif 295 } 296 IUPDAT(ip, &time, &time, 0); 297 IUNLOCK(ip); 298 ip->i_flag = 0; 299 /* 300 * Put the inode on the end of the free list. 301 * Possibly in some cases it would be better to 302 * put the inode at the head of the free list, 303 * (eg: where i_mode == 0 || i_number == 0) 304 * but I will think about that later .. kre 305 * (i_number is rarely 0 - only after an i/o error in iget, 306 * where i_mode == 0, the inode will probably be wanted 307 * again soon for an ialloc, so possibly we should keep it) 308 */ 309 if (ifreeh) { 310 *ifreet = ip; 311 ip->i_freeb = ifreet; 312 } else { 313 ifreeh = ip; 314 ip->i_freeb = &ifreeh; 315 } 316 ip->i_freef = NULL; 317 ifreet = &ip->i_freef; 318 } else if (!(ip->i_flag & ILOCKED)) 319 ITIMES(ip, &time, &time); 320 ip->i_count--; 321 } 322 323 /* 324 * Check accessed and update flags on 325 * an inode structure. 326 * If any is on, update the inode 327 * with the current time. 328 * If waitfor is given, then must insure 329 * i/o order so wait for write to complete. 330 */ 331 iupdat(ip, ta, tm, waitfor) 332 register struct inode *ip; 333 struct timeval *ta, *tm; 334 int waitfor; 335 { 336 register struct buf *bp; 337 struct dinode *dp; 338 register struct fs *fs; 339 340 fs = ip->i_fs; 341 if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) { 342 if (fs->fs_ronly) 343 return; 344 bp = bread(ip->i_dev, fsbtodb(fs, itod(fs, ip->i_number)), 345 (int)fs->fs_bsize); 346 if (bp->b_flags & B_ERROR) { 347 brelse(bp); 348 return; 349 } 350 if (ip->i_flag&IACC) 351 ip->i_atime = ta->tv_sec; 352 if (ip->i_flag&IUPD) 353 ip->i_mtime = tm->tv_sec; 354 if (ip->i_flag&ICHG) 355 ip->i_ctime = time.tv_sec; 356 ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD); 357 dp = bp->b_un.b_dino + itoo(fs, ip->i_number); 358 dp->di_ic = ip->i_ic; 359 if (waitfor) 360 bwrite(bp); 361 else 362 bdwrite(bp); 363 } 364 } 365 366 #define SINGLE 0 /* index of single indirect block */ 367 #define DOUBLE 1 /* index of double indirect block */ 368 #define TRIPLE 2 /* index of triple indirect block */ 369 /* 370 * Truncate the inode ip to at most 371 * length size. Free affected disk 372 * blocks -- the blocks of the file 373 * are removed in reverse order. 374 * 375 * NB: triple indirect blocks are untested. 376 */ 377 itrunc(oip, length) 378 register struct inode *oip; 379 u_long length; 380 { 381 register daddr_t lastblock; 382 daddr_t bn, lbn, lastiblock[NIADDR]; 383 register struct fs *fs; 384 register struct inode *ip; 385 struct buf *bp; 386 int offset, osize, size, count, level; 387 long nblocks, blocksreleased = 0; 388 register int i; 389 dev_t dev; 390 struct inode tip; 391 extern long indirtrunc(); 392 393 if (oip->i_size <= length) { 394 oip->i_flag |= ICHG|IUPD; 395 iupdat(oip, &time, &time, 1); 396 return; 397 } 398 /* 399 * Calculate index into inode's block list of 400 * last direct and indirect blocks (if any) 401 * which we want to keep. Lastblock is -1 when 402 * the file is truncated to 0. 403 */ 404 fs = oip->i_fs; 405 lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; 406 lastiblock[SINGLE] = lastblock - NDADDR; 407 lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); 408 lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); 409 nblocks = btodb(fs->fs_bsize); 410 /* 411 * Update the size of the file. If the file is not being 412 * truncated to a block boundry, the contents of the 413 * partial block following the end of the file must be 414 * zero'ed in case it ever become accessable again because 415 * of subsequent file growth. 416 */ 417 osize = oip->i_size; 418 offset = blkoff(fs, length); 419 if (offset == 0) { 420 oip->i_size = length; 421 } else { 422 lbn = lblkno(fs, length); 423 bn = fsbtodb(fs, bmap(oip, lbn, B_WRITE, offset)); 424 if (u.u_error || (long)bn < 0) 425 return; 426 oip->i_size = length; 427 size = blksize(fs, oip, lbn); 428 count = howmany(size, CLBYTES); 429 dev = oip->i_dev; 430 for (i = 0; i < count; i++) 431 munhash(dev, bn + i * CLBYTES / DEV_BSIZE); 432 bp = bread(dev, bn, size); 433 if (bp->b_flags & B_ERROR) { 434 u.u_error = EIO; 435 oip->i_size = osize; 436 brelse(bp); 437 return; 438 } 439 bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset)); 440 bdwrite(bp); 441 } 442 /* 443 * Update file and block pointers 444 * on disk before we start freeing blocks. 445 * If we crash before free'ing blocks below, 446 * the blocks will be returned to the free list. 447 * lastiblock values are also normalized to -1 448 * for calls to indirtrunc below. 449 */ 450 tip = *oip; 451 tip.i_size = osize; 452 for (level = TRIPLE; level >= SINGLE; level--) 453 if (lastiblock[level] < 0) { 454 oip->i_ib[level] = 0; 455 lastiblock[level] = -1; 456 } 457 for (i = NDADDR - 1; i > lastblock; i--) 458 oip->i_db[i] = 0; 459 oip->i_flag |= ICHG|IUPD; 460 syncip(oip); 461 462 /* 463 * Indirect blocks first. 464 */ 465 ip = &tip; 466 for (level = TRIPLE; level >= SINGLE; level--) { 467 bn = ip->i_ib[level]; 468 if (bn != 0) { 469 blocksreleased += 470 indirtrunc(ip, bn, lastiblock[level], level); 471 if (lastiblock[level] < 0) { 472 ip->i_ib[level] = 0; 473 blkfree(ip, bn, (off_t)fs->fs_bsize); 474 blocksreleased += nblocks; 475 } 476 } 477 if (lastiblock[level] >= 0) 478 goto done; 479 } 480 481 /* 482 * All whole direct blocks or frags. 483 */ 484 for (i = NDADDR - 1; i > lastblock; i--) { 485 register off_t bsize; 486 487 bn = ip->i_db[i]; 488 if (bn == 0) 489 continue; 490 ip->i_db[i] = 0; 491 bsize = (off_t)blksize(fs, ip, i); 492 blkfree(ip, bn, bsize); 493 blocksreleased += btodb(bsize); 494 } 495 if (lastblock < 0) 496 goto done; 497 498 /* 499 * Finally, look for a change in size of the 500 * last direct block; release any frags. 501 */ 502 bn = ip->i_db[lastblock]; 503 if (bn != 0) { 504 off_t oldspace, newspace; 505 506 /* 507 * Calculate amount of space we're giving 508 * back as old block size minus new block size. 509 */ 510 oldspace = blksize(fs, ip, lastblock); 511 ip->i_size = length; 512 newspace = blksize(fs, ip, lastblock); 513 if (newspace == 0) 514 panic("itrunc: newspace"); 515 if (oldspace - newspace > 0) { 516 /* 517 * Block number of space to be free'd is 518 * the old block # plus the number of frags 519 * required for the storage we're keeping. 520 */ 521 bn += numfrags(fs, newspace); 522 blkfree(ip, bn, oldspace - newspace); 523 blocksreleased += btodb(oldspace - newspace); 524 } 525 } 526 done: 527 /* BEGIN PARANOIA */ 528 for (level = SINGLE; level <= TRIPLE; level++) 529 if (ip->i_ib[level] != oip->i_ib[level]) 530 panic("itrunc1"); 531 for (i = 0; i < NDADDR; i++) 532 if (ip->i_db[i] != oip->i_db[i]) 533 panic("itrunc2"); 534 /* END PARANOIA */ 535 oip->i_blocks -= blocksreleased; 536 if (oip->i_blocks < 0) /* sanity */ 537 oip->i_blocks = 0; 538 oip->i_flag |= ICHG; 539 #ifdef QUOTA 540 (void) chkdq(oip, -blocksreleased, 0); 541 #endif 542 } 543 544 /* 545 * Release blocks associated with the inode ip and 546 * stored in the indirect block bn. Blocks are free'd 547 * in LIFO order up to (but not including) lastbn. If 548 * level is greater than SINGLE, the block is an indirect 549 * block and recursive calls to indirtrunc must be used to 550 * cleanse other indirect blocks. 551 * 552 * NB: triple indirect blocks are untested. 553 */ 554 long 555 indirtrunc(ip, bn, lastbn, level) 556 register struct inode *ip; 557 daddr_t bn, lastbn; 558 int level; 559 { 560 register int i; 561 struct buf *bp; 562 register struct fs *fs = ip->i_fs; 563 register daddr_t *bap; 564 daddr_t *copy, nb, last; 565 long factor; 566 int blocksreleased = 0, nblocks; 567 568 /* 569 * Calculate index in current block of last 570 * block to be kept. -1 indicates the entire 571 * block so we need not calculate the index. 572 */ 573 factor = 1; 574 for (i = SINGLE; i < level; i++) 575 factor *= NINDIR(fs); 576 last = lastbn; 577 if (lastbn > 0) 578 last /= factor; 579 nblocks = btodb(fs->fs_bsize); 580 /* 581 * Get buffer of block pointers, zero those 582 * entries corresponding to blocks to be free'd, 583 * and update on disk copy first. 584 */ 585 bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize); 586 if (bp->b_flags&B_ERROR) { 587 brelse(bp); 588 return (0); 589 } 590 bap = bp->b_un.b_daddr; 591 MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK); 592 bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); 593 bzero((caddr_t)&bap[last + 1], 594 (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); 595 bwrite(bp); 596 bap = copy; 597 598 /* 599 * Recursively free totally unused blocks. 600 */ 601 for (i = NINDIR(fs) - 1; i > last; i--) { 602 nb = bap[i]; 603 if (nb == 0) 604 continue; 605 if (level > SINGLE) 606 blocksreleased += 607 indirtrunc(ip, nb, (daddr_t)-1, level - 1); 608 blkfree(ip, nb, (off_t)fs->fs_bsize); 609 blocksreleased += nblocks; 610 } 611 612 /* 613 * Recursively free last partial block. 614 */ 615 if (level > SINGLE && lastbn >= 0) { 616 last = lastbn % factor; 617 nb = bap[i]; 618 if (nb != 0) 619 blocksreleased += indirtrunc(ip, nb, last, level - 1); 620 } 621 FREE(copy, M_TEMP); 622 return (blocksreleased); 623 } 624 625 /* 626 * Remove any inodes in the inode cache belonging to dev. 627 * 628 * There should not be any active ones, return error if any are found 629 * (nb: this is a user error, not a system err). 630 */ 631 #ifdef QUOTA 632 iflush(dev, iq) 633 dev_t dev; 634 struct inode *iq; 635 #else 636 iflush(dev) 637 dev_t dev; 638 #endif 639 { 640 register struct inode *ip; 641 642 for (ip = inode; ip < inodeNINODE; ip++) { 643 #ifdef QUOTA 644 if (ip != iq && ip->i_dev == dev) 645 #else 646 if (ip->i_dev == dev) 647 #endif 648 if (ip->i_count) 649 return (EBUSY); 650 else { 651 remque(ip); 652 ip->i_forw = ip; 653 ip->i_back = ip; 654 /* 655 * as i_count == 0, the inode was on the free 656 * list already, just leave it there, it will 657 * fall off the bottom eventually. We could 658 * perhaps move it to the head of the free 659 * list, but as umounts are done so 660 * infrequently, we would gain very little, 661 * while making the code bigger. 662 */ 663 #ifdef QUOTA 664 dqrele(ip->i_dquot); 665 ip->i_dquot = NODQUOT; 666 #endif 667 } 668 } 669 return (0); 670 } 671 672 /* 673 * Lock an inode. If its already locked, set the WANT bit and sleep. 674 */ 675 ilock(ip) 676 register struct inode *ip; 677 { 678 679 ILOCK(ip); 680 } 681 682 /* 683 * Unlock an inode. If WANT bit is on, wakeup. 684 */ 685 iunlock(ip) 686 register struct inode *ip; 687 { 688 689 IUNLOCK(ip); 690 } 691