1 // File system implementation. Five layers: 2 // + Blocks: allocator for raw disk blocks. 3 // + Log: crash recovery for multi-step updates. 4 // + Files: inode allocator, reading, writing, metadata. 5 // + Directories: inode with special contents (list of other inodes!) 6 // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. 7 // 8 // This file contains the low-level file system manipulation 9 // routines. The (higher-level) system call implementations 10 // are in sysfile.c. 11 12 #include "types.h" 13 #include "defs.h" 14 #include "param.h" 15 #include "stat.h" 16 #include "mmu.h" 17 #include "proc.h" 18 #include "spinlock.h" 19 #include "fs.h" 20 #include "buf.h" 21 #include "file.h" 22 23 #define min(a, b) ((a) < (b) ? (a) : (b)) 24 static void itrunc(struct inode*); 25 // there should be one superblock per disk device, but we run with 26 // only one device 27 struct superblock sb; 28 29 // Read the super block. 30 void 31 readsb(int dev, struct superblock *sb) 32 { 33 struct buf *bp; 34 35 bp = bread(dev, 1); 36 memmove(sb, bp->data, sizeof(*sb)); 37 brelse(bp); 38 } 39 40 // Zero a block. 41 static void 42 bzero(int dev, int bno) 43 { 44 struct buf *bp; 45 46 bp = bread(dev, bno); 47 memset(bp->data, 0, BSIZE); 48 log_write(bp); 49 brelse(bp); 50 } 51 52 // Blocks. 53 54 // Allocate a zeroed disk block. 55 static uint 56 balloc(uint dev) 57 { 58 int b, bi, m; 59 struct buf *bp; 60 61 bp = 0; 62 for(b = 0; b < sb.size; b += BPB){ 63 bp = bread(dev, BBLOCK(b, sb)); 64 for(bi = 0; bi < BPB && b + bi < sb.size; bi++){ 65 m = 1 << (bi % 8); 66 if((bp->data[bi/8] & m) == 0){ // Is block free? 67 bp->data[bi/8] |= m; // Mark block in use. 68 log_write(bp); 69 brelse(bp); 70 bzero(dev, b + bi); 71 return b + bi; 72 } 73 } 74 brelse(bp); 75 } 76 panic("balloc: out of blocks"); 77 } 78 79 // Free a disk block. 80 static void 81 bfree(int dev, uint b) 82 { 83 struct buf *bp; 84 int bi, m; 85 86 readsb(dev, &sb); 87 bp = bread(dev, BBLOCK(b, sb)); 88 bi = b % BPB; 89 m = 1 << (bi % 8); 90 if((bp->data[bi/8] & m) == 0) 91 panic("freeing free block"); 92 bp->data[bi/8] &= ~m; 93 log_write(bp); 94 brelse(bp); 95 } 96 97 // Inodes. 98 // 99 // An inode describes a single unnamed file. 100 // The inode disk structure holds metadata: the file's type, 101 // its size, the number of links referring to it, and the 102 // list of blocks holding the file's content. 103 // 104 // The inodes are laid out sequentially on disk at 105 // sb.startinode. Each inode has a number, indicating its 106 // position on the disk. 107 // 108 // The kernel keeps a cache of in-use inodes in memory 109 // to provide a place for synchronizing access 110 // to inodes used by multiple processes. The cached 111 // inodes include book-keeping information that is 112 // not stored on disk: ip->ref and ip->flags. 113 // 114 // An inode and its in-memory represtative go through a 115 // sequence of states before they can be used by the 116 // rest of the file system code. 117 // 118 // * Allocation: an inode is allocated if its type (on disk) 119 // is non-zero. ialloc() allocates, iput() frees if 120 // the link count has fallen to zero. 121 // 122 // * Referencing in cache: an entry in the inode cache 123 // is free if ip->ref is zero. Otherwise ip->ref tracks 124 // the number of in-memory pointers to the entry (open 125 // files and current directories). iget() to find or 126 // create a cache entry and increment its ref, iput() 127 // to decrement ref. 128 // 129 // * Valid: the information (type, size, &c) in an inode 130 // cache entry is only correct when the I_VALID bit 131 // is set in ip->flags. ilock() reads the inode from 132 // the disk and sets I_VALID, while iput() clears 133 // I_VALID if ip->ref has fallen to zero. 134 // 135 // * Locked: file system code may only examine and modify 136 // the information in an inode and its content if it 137 // has first locked the inode. The I_BUSY flag indicates 138 // that the inode is locked. ilock() sets I_BUSY, 139 // while iunlock clears it. 140 // 141 // Thus a typical sequence is: 142 // ip = iget(dev, inum) 143 // ilock(ip) 144 // ... examine and modify ip->xxx ... 145 // iunlock(ip) 146 // iput(ip) 147 // 148 // ilock() is separate from iget() so that system calls can 149 // get a long-term reference to an inode (as for an open file) 150 // and only lock it for short periods (e.g., in read()). 151 // The separation also helps avoid deadlock and races during 152 // pathname lookup. iget() increments ip->ref so that the inode 153 // stays cached and pointers to it remain valid. 154 // 155 // Many internal file system functions expect the caller to 156 // have locked the inodes involved; this lets callers create 157 // multi-step atomic operations. 158 159 struct { 160 struct spinlock lock; 161 struct inode inode[NINODE]; 162 } icache; 163 164 void 165 iinit(int dev) 166 { 167 initlock(&icache.lock, "icache"); 168 readsb(dev, &sb); 169 cprintf("sb: size %d nblocks %d ninodes %d nlog %d logstart %d\ 170 inodestart %d bmap start %d\n", sb.size, sb.nblocks, 171 sb.ninodes, sb.nlog, sb.logstart, sb.inodestart, 172 sb.bmapstart); 173 } 174 175 static struct inode* iget(uint dev, uint inum); 176 177 //PAGEBREAK! 178 // Allocate a new inode with the given type on device dev. 179 // A free inode has a type of zero. 180 struct inode* 181 ialloc(uint dev, short type) 182 { 183 int inum; 184 struct buf *bp; 185 struct dinode *dip; 186 187 for(inum = 1; inum < sb.ninodes; inum++){ 188 bp = bread(dev, IBLOCK(inum, sb)); 189 dip = (struct dinode*)bp->data + inum%IPB; 190 if(dip->type == 0){ // a free inode 191 memset(dip, 0, sizeof(*dip)); 192 dip->type = type; 193 log_write(bp); // mark it allocated on the disk 194 brelse(bp); 195 return iget(dev, inum); 196 } 197 brelse(bp); 198 } 199 panic("ialloc: no inodes"); 200 } 201 202 // Copy a modified in-memory inode to disk. 203 void 204 iupdate(struct inode *ip) 205 { 206 struct buf *bp; 207 struct dinode *dip; 208 209 bp = bread(ip->dev, IBLOCK(ip->inum, sb)); 210 dip = (struct dinode*)bp->data + ip->inum%IPB; 211 dip->type = ip->type; 212 dip->major = ip->major; 213 dip->minor = ip->minor; 214 dip->nlink = ip->nlink; 215 dip->size = ip->size; 216 memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); 217 log_write(bp); 218 brelse(bp); 219 } 220 221 // Find the inode with number inum on device dev 222 // and return the in-memory copy. Does not lock 223 // the inode and does not read it from disk. 224 static struct inode* 225 iget(uint dev, uint inum) 226 { 227 struct inode *ip, *empty; 228 229 acquire(&icache.lock); 230 231 // Is the inode already cached? 232 empty = 0; 233 for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){ 234 if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){ 235 ip->ref++; 236 release(&icache.lock); 237 return ip; 238 } 239 if(empty == 0 && ip->ref == 0) // Remember empty slot. 240 empty = ip; 241 } 242 243 // Recycle an inode cache entry. 244 if(empty == 0) 245 panic("iget: no inodes"); 246 247 ip = empty; 248 ip->dev = dev; 249 ip->inum = inum; 250 ip->ref = 1; 251 ip->flags = 0; 252 release(&icache.lock); 253 254 return ip; 255 } 256 257 // Increment reference count for ip. 258 // Returns ip to enable ip = idup(ip1) idiom. 259 struct inode* 260 idup(struct inode *ip) 261 { 262 acquire(&icache.lock); 263 ip->ref++; 264 release(&icache.lock); 265 return ip; 266 } 267 268 // Lock the given inode. 269 // Reads the inode from disk if necessary. 270 void 271 ilock(struct inode *ip) 272 { 273 struct buf *bp; 274 struct dinode *dip; 275 276 if(ip == 0 || ip->ref < 1) 277 panic("ilock"); 278 279 acquire(&icache.lock); 280 while(ip->flags & I_BUSY) 281 sleep(ip, &icache.lock); 282 ip->flags |= I_BUSY; 283 release(&icache.lock); 284 285 if(!(ip->flags & I_VALID)){ 286 bp = bread(ip->dev, IBLOCK(ip->inum, sb)); 287 dip = (struct dinode*)bp->data + ip->inum%IPB; 288 ip->type = dip->type; 289 ip->major = dip->major; 290 ip->minor = dip->minor; 291 ip->nlink = dip->nlink; 292 ip->size = dip->size; 293 memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); 294 brelse(bp); 295 ip->flags |= I_VALID; 296 if(ip->type == 0) 297 panic("ilock: no type"); 298 } 299 } 300 301 // Unlock the given inode. 302 void 303 iunlock(struct inode *ip) 304 { 305 if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1) 306 panic("iunlock"); 307 308 acquire(&icache.lock); 309 ip->flags &= ~I_BUSY; 310 wakeup(ip); 311 release(&icache.lock); 312 } 313 314 // Drop a reference to an in-memory inode. 315 // If that was the last reference, the inode cache entry can 316 // be recycled. 317 // If that was the last reference and the inode has no links 318 // to it, free the inode (and its content) on disk. 319 // All calls to iput() must be inside a transaction in 320 // case it has to free the inode. 321 void 322 iput(struct inode *ip) 323 { 324 acquire(&icache.lock); 325 if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){ 326 // inode has no links and no other references: truncate and free. 327 if(ip->flags & I_BUSY) 328 panic("iput busy"); 329 ip->flags |= I_BUSY; 330 release(&icache.lock); 331 itrunc(ip); 332 ip->type = 0; 333 iupdate(ip); 334 acquire(&icache.lock); 335 ip->flags = 0; 336 wakeup(ip); 337 } 338 ip->ref--; 339 release(&icache.lock); 340 } 341 342 // Common idiom: unlock, then put. 343 void 344 iunlockput(struct inode *ip) 345 { 346 iunlock(ip); 347 iput(ip); 348 } 349 350 //PAGEBREAK! 351 // Inode content 352 // 353 // The content (data) associated with each inode is stored 354 // in blocks on the disk. The first NDIRECT block numbers 355 // are listed in ip->addrs[]. The next NINDIRECT blocks are 356 // listed in block ip->addrs[NDIRECT]. 357 358 // Return the disk block address of the nth block in inode ip. 359 // If there is no such block, bmap allocates one. 360 static uint 361 bmap(struct inode *ip, uint bn) 362 { 363 uint addr, *a; 364 struct buf *bp; 365 366 if(bn < NDIRECT){ 367 if((addr = ip->addrs[bn]) == 0) 368 ip->addrs[bn] = addr = balloc(ip->dev); 369 return addr; 370 } 371 bn -= NDIRECT; 372 373 if(bn < NINDIRECT){ 374 // Load indirect block, allocating if necessary. 375 if((addr = ip->addrs[NDIRECT]) == 0) 376 ip->addrs[NDIRECT] = addr = balloc(ip->dev); 377 bp = bread(ip->dev, addr); 378 a = (uint*)bp->data; 379 if((addr = a[bn]) == 0){ 380 a[bn] = addr = balloc(ip->dev); 381 log_write(bp); 382 } 383 brelse(bp); 384 return addr; 385 } 386 387 panic("bmap: out of range"); 388 } 389 390 // Truncate inode (discard contents). 391 // Only called when the inode has no links 392 // to it (no directory entries referring to it) 393 // and has no in-memory reference to it (is 394 // not an open file or current directory). 395 static void 396 itrunc(struct inode *ip) 397 { 398 int i, j; 399 struct buf *bp; 400 uint *a; 401 402 for(i = 0; i < NDIRECT; i++){ 403 if(ip->addrs[i]){ 404 bfree(ip->dev, ip->addrs[i]); 405 ip->addrs[i] = 0; 406 } 407 } 408 409 if(ip->addrs[NDIRECT]){ 410 bp = bread(ip->dev, ip->addrs[NDIRECT]); 411 a = (uint*)bp->data; 412 for(j = 0; j < NINDIRECT; j++){ 413 if(a[j]) 414 bfree(ip->dev, a[j]); 415 } 416 brelse(bp); 417 bfree(ip->dev, ip->addrs[NDIRECT]); 418 ip->addrs[NDIRECT] = 0; 419 } 420 421 ip->size = 0; 422 iupdate(ip); 423 } 424 425 // Copy stat information from inode. 426 void 427 stati(struct inode *ip, struct stat *st) 428 { 429 st->dev = ip->dev; 430 st->ino = ip->inum; 431 st->type = ip->type; 432 st->nlink = ip->nlink; 433 st->size = ip->size; 434 } 435 436 //PAGEBREAK! 437 // Read data from inode. 438 int 439 readi(struct inode *ip, char *dst, uint off, uint n) 440 { 441 uint tot, m; 442 struct buf *bp; 443 444 if(ip->type == T_DEV){ 445 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read) 446 return -1; 447 return devsw[ip->major].read(ip, dst, n); 448 } 449 450 if(off > ip->size || off + n < off) 451 return -1; 452 if(off + n > ip->size) 453 n = ip->size - off; 454 455 for(tot=0; tot<n; tot+=m, off+=m, dst+=m){ 456 bp = bread(ip->dev, bmap(ip, off/BSIZE)); 457 m = min(n - tot, BSIZE - off%BSIZE); 458 memmove(dst, bp->data + off%BSIZE, m); 459 brelse(bp); 460 } 461 return n; 462 } 463 464 // PAGEBREAK! 465 // Write data to inode. 466 int 467 writei(struct inode *ip, char *src, uint off, uint n) 468 { 469 uint tot, m; 470 struct buf *bp; 471 472 if(ip->type == T_DEV){ 473 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write) 474 return -1; 475 return devsw[ip->major].write(ip, src, n); 476 } 477 478 if(off > ip->size || off + n < off) 479 return -1; 480 if(off + n > MAXFILE*BSIZE) 481 return -1; 482 483 for(tot=0; tot<n; tot+=m, off+=m, src+=m){ 484 bp = bread(ip->dev, bmap(ip, off/BSIZE)); 485 m = min(n - tot, BSIZE - off%BSIZE); 486 memmove(bp->data + off%BSIZE, src, m); 487 log_write(bp); 488 brelse(bp); 489 } 490 491 if(n > 0 && off > ip->size){ 492 ip->size = off; 493 iupdate(ip); 494 } 495 return n; 496 } 497 498 //PAGEBREAK! 499 // Directories 500 501 int 502 namecmp(const char *s, const char *t) 503 { 504 return strncmp(s, t, DIRSIZ); 505 } 506 507 // Look for a directory entry in a directory. 508 // If found, set *poff to byte offset of entry. 509 struct inode* 510 dirlookup(struct inode *dp, char *name, uint *poff) 511 { 512 uint off, inum; 513 struct dirent de; 514 515 if(dp->type != T_DIR) 516 panic("dirlookup not DIR"); 517 518 for(off = 0; off < dp->size; off += sizeof(de)){ 519 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 520 panic("dirlink read"); 521 if(de.inum == 0) 522 continue; 523 if(namecmp(name, de.name) == 0){ 524 // entry matches path element 525 if(poff) 526 *poff = off; 527 inum = de.inum; 528 return iget(dp->dev, inum); 529 } 530 } 531 532 return 0; 533 } 534 535 // Write a new directory entry (name, inum) into the directory dp. 536 int 537 dirlink(struct inode *dp, char *name, uint inum) 538 { 539 int off; 540 struct dirent de; 541 struct inode *ip; 542 543 // Check that name is not present. 544 if((ip = dirlookup(dp, name, 0)) != 0){ 545 iput(ip); 546 return -1; 547 } 548 549 // Look for an empty dirent. 550 for(off = 0; off < dp->size; off += sizeof(de)){ 551 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 552 panic("dirlink read"); 553 if(de.inum == 0) 554 break; 555 } 556 557 strncpy(de.name, name, DIRSIZ); 558 de.inum = inum; 559 if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 560 panic("dirlink"); 561 562 return 0; 563 } 564 565 //PAGEBREAK! 566 // Paths 567 568 // Copy the next path element from path into name. 569 // Return a pointer to the element following the copied one. 570 // The returned path has no leading slashes, 571 // so the caller can check *path=='\0' to see if the name is the last one. 572 // If no name to remove, return 0. 573 // 574 // Examples: 575 // skipelem("a/bb/c", name) = "bb/c", setting name = "a" 576 // skipelem("///a//bb", name) = "bb", setting name = "a" 577 // skipelem("a", name) = "", setting name = "a" 578 // skipelem("", name) = skipelem("////", name) = 0 579 // 580 static char* 581 skipelem(char *path, char *name) 582 { 583 char *s; 584 int len; 585 586 while(*path == '/') 587 path++; 588 if(*path == 0) 589 return 0; 590 s = path; 591 while(*path != '/' && *path != 0) 592 path++; 593 len = path - s; 594 if(len >= DIRSIZ) 595 memmove(name, s, DIRSIZ); 596 else { 597 memmove(name, s, len); 598 name[len] = 0; 599 } 600 while(*path == '/') 601 path++; 602 return path; 603 } 604 605 // Look up and return the inode for a path name. 606 // If parent != 0, return the inode for the parent and copy the final 607 // path element into name, which must have room for DIRSIZ bytes. 608 // Must be called inside a transaction since it calls iput(). 609 static struct inode* 610 namex(char *path, int nameiparent, char *name) 611 { 612 struct inode *ip, *next; 613 614 if(*path == '/') 615 ip = iget(ROOTDEV, ROOTINO); 616 else 617 ip = idup(proc->cwd); 618 619 while((path = skipelem(path, name)) != 0){ 620 ilock(ip); 621 if(ip->type != T_DIR){ 622 iunlockput(ip); 623 return 0; 624 } 625 if(nameiparent && *path == '\0'){ 626 // Stop one level early. 627 iunlock(ip); 628 return ip; 629 } 630 if((next = dirlookup(ip, name, 0)) == 0){ 631 iunlockput(ip); 632 return 0; 633 } 634 iunlockput(ip); 635 ip = next; 636 } 637 if(nameiparent){ 638 iput(ip); 639 return 0; 640 } 641 return ip; 642 } 643 644 struct inode* 645 namei(char *path) 646 { 647 char name[DIRSIZ]; 648 return namex(path, 0, name); 649 } 650 651 struct inode* 652 nameiparent(char *path, char *name) 653 { 654 return namex(path, 1, name); 655 } 656