1 // File system implementation. Five layers: 2 // + Blocks: allocator for raw disk blocks. 3 // + Log: crash recovery for multi-step updates. 4 // + Files: inode allocator, reading, writing, metadata. 5 // + Directories: inode with special contents (list of other inodes!) 6 // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. 7 // 8 // This file contains the low-level file system manipulation 9 // routines. The (higher-level) system call implementations 10 // are in sysfile.c. 11 12 #include "types.h" 13 #include "defs.h" 14 #include "param.h" 15 #include "stat.h" 16 #include "mmu.h" 17 #include "proc.h" 18 #include "spinlock.h" 19 #include "buf.h" 20 #include "fs.h" 21 #include "file.h" 22 23 #define min(a, b) ((a) < (b) ? (a) : (b)) 24 static void itrunc(struct inode*); 25 26 // Read the super block. 27 void 28 readsb(int dev, struct superblock *sb) 29 { 30 struct buf *bp; 31 32 bp = bread(dev, 1); 33 memmove(sb, bp->data, sizeof(*sb)); 34 brelse(bp); 35 } 36 37 // Zero a block. 38 static void 39 bzero(int dev, int bno) 40 { 41 struct buf *bp; 42 43 bp = bread(dev, bno); 44 memset(bp->data, 0, BSIZE); 45 log_write(bp); 46 brelse(bp); 47 } 48 49 // Blocks. 50 51 // Allocate a zeroed disk block. 52 static uint 53 balloc(uint dev) 54 { 55 int b, bi, m; 56 struct buf *bp; 57 struct superblock sb; 58 59 bp = 0; 60 readsb(dev, &sb); 61 for(b = 0; b < sb.size; b += BPB){ 62 bp = bread(dev, BBLOCK(b, sb.ninodes)); 63 for(bi = 0; bi < BPB && b + bi < sb.size; bi++){ 64 m = 1 << (bi % 8); 65 if((bp->data[bi/8] & m) == 0){ // Is block free? 66 bp->data[bi/8] |= m; // Mark block in use. 67 log_write(bp); 68 brelse(bp); 69 bzero(dev, b + bi); 70 return b + bi; 71 } 72 } 73 brelse(bp); 74 } 75 panic("balloc: out of blocks"); 76 } 77 78 // Free a disk block. 79 static void 80 bfree(int dev, uint b) 81 { 82 struct buf *bp; 83 struct superblock sb; 84 int bi, m; 85 86 readsb(dev, &sb); 87 bp = bread(dev, BBLOCK(b, sb.ninodes)); 88 bi = b % BPB; 89 m = 1 << (bi % 8); 90 if((bp->data[bi/8] & m) == 0) 91 panic("freeing free block"); 92 bp->data[bi/8] &= ~m; 93 log_write(bp); 94 brelse(bp); 95 } 96 97 // Inodes. 98 // 99 // An inode describes a single unnamed file. 100 // The inode disk structure holds metadata: the file's type, 101 // its size, the number of links referring to it, and the 102 // list of blocks holding the file's content. 103 // 104 // The inodes are laid out sequentially on disk immediately after 105 // the superblock. Each inode has a number, indicating its 106 // position on the disk. 107 // 108 // The kernel keeps a cache of in-use inodes in memory 109 // to provide a place for synchronizing access 110 // to inodes used by multiple processes. The cached 111 // inodes include book-keeping information that is 112 // not stored on disk: ip->ref and ip->flags. 113 // 114 // ip->ref counts the number of pointer references to this cached 115 // inode; references are typically kept in struct file and in proc->cwd. 116 // When ip->ref falls to zero, the inode is no longer cached. 117 // It is an error to use an inode without holding a reference to it. 118 // 119 // Processes are only allowed to read and write inode 120 // metadata and contents when holding the inode's lock, 121 // represented by the I_BUSY bit in ip->flags. 122 // Because inode locks are held during disk accesses, 123 // they are implemented using a flag rather than with 124 // spin locks. ilock() and iunlock() manipulate an 125 // inode's I_BUSY flag. Many routines in this file expect 126 // the caller to have already locked the inode; leaving 127 // this responsibility with the caller makes it possible for them 128 // to create arbitrarily-sized atomic operations. 129 // 130 // To give maximum control over locking to the callers, 131 // the routines in this file that return inode pointers 132 // return pointers to *unlocked* inodes. It is the callers' 133 // responsibility to lock them before using them. A non-zero 134 // ip->ref keeps these unlocked inodes in the cache. 135 // 136 // In order for the file system code to look at an inode, the inode 137 // must pass through a number of states, with transitions 138 // driven by the indicated functions: 139 // 140 // * Allocated on disk, indicated by a non-zero type. 141 // ialloc() and iput(). 142 // * Referenced in the cache, indicated by ip->ref > 0. 143 // iget() and iput(). 144 // * Cached inode is valid, indicated by I_VALID. 145 // ilock() and iput(). 146 // * Locked, indicated by I_BUSY. 147 // ilock() and iunlock(). 148 149 struct { 150 struct spinlock lock; 151 struct inode inode[NINODE]; 152 } icache; 153 154 void 155 iinit(void) 156 { 157 initlock(&icache.lock, "icache"); 158 } 159 160 static struct inode* iget(uint dev, uint inum); 161 162 //PAGEBREAK! 163 // Allocate a new inode with the given type on device dev. 164 // A free inode has a type of zero. 165 struct inode* 166 ialloc(uint dev, short type) 167 { 168 int inum; 169 struct buf *bp; 170 struct dinode *dip; 171 struct superblock sb; 172 173 readsb(dev, &sb); 174 175 for(inum = 1; inum < sb.ninodes; inum++){ 176 bp = bread(dev, IBLOCK(inum)); 177 dip = (struct dinode*)bp->data + inum%IPB; 178 if(dip->type == 0){ // a free inode 179 memset(dip, 0, sizeof(*dip)); 180 dip->type = type; 181 log_write(bp); // mark it allocated on the disk 182 brelse(bp); 183 return iget(dev, inum); 184 } 185 brelse(bp); 186 } 187 panic("ialloc: no inodes"); 188 } 189 190 // Copy inode, which has changed, from memory to disk. 191 void 192 iupdate(struct inode *ip) 193 { 194 struct buf *bp; 195 struct dinode *dip; 196 197 bp = bread(ip->dev, IBLOCK(ip->inum)); 198 dip = (struct dinode*)bp->data + ip->inum%IPB; 199 dip->type = ip->type; 200 dip->major = ip->major; 201 dip->minor = ip->minor; 202 dip->nlink = ip->nlink; 203 dip->size = ip->size; 204 memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); 205 log_write(bp); 206 brelse(bp); 207 } 208 209 // Find the inode with number inum on device dev 210 // and return the in-memory copy. 211 static struct inode* 212 iget(uint dev, uint inum) 213 { 214 struct inode *ip, *empty; 215 216 acquire(&icache.lock); 217 218 // Try for cached inode. 219 empty = 0; 220 for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){ 221 if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){ 222 ip->ref++; 223 release(&icache.lock); 224 return ip; 225 } 226 if(empty == 0 && ip->ref == 0) // Remember empty slot. 227 empty = ip; 228 } 229 230 // Allocate fresh inode. 231 if(empty == 0) 232 panic("iget: no inodes"); 233 234 ip = empty; 235 ip->dev = dev; 236 ip->inum = inum; 237 ip->ref = 1; 238 ip->flags = 0; 239 release(&icache.lock); 240 241 return ip; 242 } 243 244 // Increment reference count for ip. 245 // Returns ip to enable ip = idup(ip1) idiom. 246 struct inode* 247 idup(struct inode *ip) 248 { 249 acquire(&icache.lock); 250 ip->ref++; 251 release(&icache.lock); 252 return ip; 253 } 254 255 // Lock the given inode. 256 void 257 ilock(struct inode *ip) 258 { 259 struct buf *bp; 260 struct dinode *dip; 261 262 if(ip == 0 || ip->ref < 1) 263 panic("ilock"); 264 265 acquire(&icache.lock); 266 while(ip->flags & I_BUSY) 267 sleep(ip, &icache.lock); 268 ip->flags |= I_BUSY; 269 release(&icache.lock); 270 271 if(!(ip->flags & I_VALID)){ 272 bp = bread(ip->dev, IBLOCK(ip->inum)); 273 dip = (struct dinode*)bp->data + ip->inum%IPB; 274 ip->type = dip->type; 275 ip->major = dip->major; 276 ip->minor = dip->minor; 277 ip->nlink = dip->nlink; 278 ip->size = dip->size; 279 memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); 280 brelse(bp); 281 ip->flags |= I_VALID; 282 if(ip->type == 0) 283 panic("ilock: no type"); 284 } 285 } 286 287 // Unlock the given inode. 288 void 289 iunlock(struct inode *ip) 290 { 291 if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1) 292 panic("iunlock"); 293 294 acquire(&icache.lock); 295 ip->flags &= ~I_BUSY; 296 wakeup(ip); 297 release(&icache.lock); 298 } 299 300 // Caller holds reference to unlocked ip. Drop reference. 301 void 302 iput(struct inode *ip) 303 { 304 acquire(&icache.lock); 305 if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){ 306 // inode is no longer used: truncate and free inode. 307 if(ip->flags & I_BUSY) 308 panic("iput busy"); 309 ip->flags |= I_BUSY; 310 release(&icache.lock); 311 itrunc(ip); 312 ip->type = 0; 313 iupdate(ip); 314 acquire(&icache.lock); 315 ip->flags = 0; 316 wakeup(ip); 317 } 318 ip->ref--; 319 release(&icache.lock); 320 } 321 322 // Common idiom: unlock, then put. 323 void 324 iunlockput(struct inode *ip) 325 { 326 iunlock(ip); 327 iput(ip); 328 } 329 330 //PAGEBREAK! 331 // Inode contents 332 // 333 // The contents (data) associated with each inode is stored 334 // in a sequence of blocks on the disk. The first NDIRECT blocks 335 // are listed in ip->addrs[]. The next NINDIRECT blocks are 336 // listed in the block ip->addrs[NDIRECT]. 337 338 // Return the disk block address of the nth block in inode ip. 339 // If there is no such block, bmap allocates one. 340 static uint 341 bmap(struct inode *ip, uint bn) 342 { 343 uint addr, *a; 344 struct buf *bp; 345 346 if(bn < NDIRECT){ 347 if((addr = ip->addrs[bn]) == 0) 348 ip->addrs[bn] = addr = balloc(ip->dev); 349 return addr; 350 } 351 bn -= NDIRECT; 352 353 if(bn < NINDIRECT){ 354 // Load indirect block, allocating if necessary. 355 if((addr = ip->addrs[NDIRECT]) == 0) 356 ip->addrs[NDIRECT] = addr = balloc(ip->dev); 357 bp = bread(ip->dev, addr); 358 a = (uint*)bp->data; 359 if((addr = a[bn]) == 0){ 360 a[bn] = addr = balloc(ip->dev); 361 log_write(bp); 362 } 363 brelse(bp); 364 return addr; 365 } 366 367 panic("bmap: out of range"); 368 } 369 370 // Truncate inode (discard contents). 371 // Only called after the last dirent referring 372 // to this inode has been erased on disk. 373 static void 374 itrunc(struct inode *ip) 375 { 376 int i, j; 377 struct buf *bp; 378 uint *a; 379 380 for(i = 0; i < NDIRECT; i++){ 381 if(ip->addrs[i]){ 382 bfree(ip->dev, ip->addrs[i]); 383 ip->addrs[i] = 0; 384 } 385 } 386 387 if(ip->addrs[NDIRECT]){ 388 bp = bread(ip->dev, ip->addrs[NDIRECT]); 389 a = (uint*)bp->data; 390 for(j = 0; j < NINDIRECT; j++){ 391 if(a[j]) 392 bfree(ip->dev, a[j]); 393 } 394 brelse(bp); 395 bfree(ip->dev, ip->addrs[NDIRECT]); 396 ip->addrs[NDIRECT] = 0; 397 } 398 399 ip->size = 0; 400 iupdate(ip); 401 } 402 403 // Copy stat information from inode. 404 void 405 stati(struct inode *ip, struct stat *st) 406 { 407 st->dev = ip->dev; 408 st->ino = ip->inum; 409 st->type = ip->type; 410 st->nlink = ip->nlink; 411 st->size = ip->size; 412 } 413 414 //PAGEBREAK! 415 // Read data from inode. 416 int 417 readi(struct inode *ip, char *dst, uint off, uint n) 418 { 419 uint tot, m; 420 struct buf *bp; 421 422 if(ip->type == T_DEV){ 423 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read) 424 return -1; 425 return devsw[ip->major].read(ip, dst, n); 426 } 427 428 if(off > ip->size || off + n < off) 429 return -1; 430 if(off + n > ip->size) 431 n = ip->size - off; 432 433 for(tot=0; tot<n; tot+=m, off+=m, dst+=m){ 434 bp = bread(ip->dev, bmap(ip, off/BSIZE)); 435 m = min(n - tot, BSIZE - off%BSIZE); 436 memmove(dst, bp->data + off%BSIZE, m); 437 brelse(bp); 438 } 439 return n; 440 } 441 442 // PAGEBREAK! 443 // Write data to inode. 444 int 445 writei(struct inode *ip, char *src, uint off, uint n) 446 { 447 uint tot, m; 448 struct buf *bp; 449 450 if(ip->type == T_DEV){ 451 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write) 452 return -1; 453 return devsw[ip->major].write(ip, src, n); 454 } 455 456 if(off > ip->size || off + n < off) 457 return -1; 458 if(off + n > MAXFILE*BSIZE) 459 return -1; 460 461 for(tot=0; tot<n; tot+=m, off+=m, src+=m){ 462 bp = bread(ip->dev, bmap(ip, off/BSIZE)); 463 m = min(n - tot, BSIZE - off%BSIZE); 464 memmove(bp->data + off%BSIZE, src, m); 465 log_write(bp); 466 brelse(bp); 467 } 468 469 if(n > 0 && off > ip->size){ 470 ip->size = off; 471 iupdate(ip); 472 } 473 return n; 474 } 475 476 //PAGEBREAK! 477 // Directories 478 479 int 480 namecmp(const char *s, const char *t) 481 { 482 return strncmp(s, t, DIRSIZ); 483 } 484 485 // Look for a directory entry in a directory. 486 // If found, set *poff to byte offset of entry. 487 // Caller must have already locked dp. 488 struct inode* 489 dirlookup(struct inode *dp, char *name, uint *poff) 490 { 491 uint off, inum; 492 struct dirent de; 493 494 if(dp->type != T_DIR) 495 panic("dirlookup not DIR"); 496 497 for(off = 0; off < dp->size; off += sizeof(de)){ 498 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 499 panic("dirlink read"); 500 if(de.inum == 0) 501 continue; 502 if(namecmp(name, de.name) == 0){ 503 // entry matches path element 504 if(poff) 505 *poff = off; 506 inum = de.inum; 507 return iget(dp->dev, inum); 508 } 509 } 510 511 return 0; 512 } 513 514 // Write a new directory entry (name, inum) into the directory dp. 515 int 516 dirlink(struct inode *dp, char *name, uint inum) 517 { 518 int off; 519 struct dirent de; 520 struct inode *ip; 521 522 // Check that name is not present. 523 if((ip = dirlookup(dp, name, 0)) != 0){ 524 iput(ip); 525 return -1; 526 } 527 528 // Look for an empty dirent. 529 for(off = 0; off < dp->size; off += sizeof(de)){ 530 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 531 panic("dirlink read"); 532 if(de.inum == 0) 533 break; 534 } 535 536 strncpy(de.name, name, DIRSIZ); 537 de.inum = inum; 538 if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 539 panic("dirlink"); 540 541 return 0; 542 } 543 544 //PAGEBREAK! 545 // Paths 546 547 // Copy the next path element from path into name. 548 // Return a pointer to the element following the copied one. 549 // The returned path has no leading slashes, 550 // so the caller can check *path=='\0' to see if the name is the last one. 551 // If no name to remove, return 0. 552 // 553 // Examples: 554 // skipelem("a/bb/c", name) = "bb/c", setting name = "a" 555 // skipelem("///a//bb", name) = "bb", setting name = "a" 556 // skipelem("a", name) = "", setting name = "a" 557 // skipelem("", name) = skipelem("////", name) = 0 558 // 559 static char* 560 skipelem(char *path, char *name) 561 { 562 char *s; 563 int len; 564 565 while(*path == '/') 566 path++; 567 if(*path == 0) 568 return 0; 569 s = path; 570 while(*path != '/' && *path != 0) 571 path++; 572 len = path - s; 573 if(len >= DIRSIZ) 574 memmove(name, s, DIRSIZ); 575 else { 576 memmove(name, s, len); 577 name[len] = 0; 578 } 579 while(*path == '/') 580 path++; 581 return path; 582 } 583 584 // Look up and return the inode for a path name. 585 // If parent != 0, return the inode for the parent and copy the final 586 // path element into name, which must have room for DIRSIZ bytes. 587 static struct inode* 588 namex(char *path, int nameiparent, char *name) 589 { 590 struct inode *ip, *next; 591 592 if(*path == '/') 593 ip = iget(ROOTDEV, ROOTINO); 594 else 595 ip = idup(proc->cwd); 596 597 while((path = skipelem(path, name)) != 0){ 598 ilock(ip); 599 if(ip->type != T_DIR){ 600 iunlockput(ip); 601 return 0; 602 } 603 if(nameiparent && *path == '\0'){ 604 // Stop one level early. 605 iunlock(ip); 606 return ip; 607 } 608 if((next = dirlookup(ip, name, 0)) == 0){ 609 iunlockput(ip); 610 return 0; 611 } 612 iunlockput(ip); 613 ip = next; 614 } 615 if(nameiparent){ 616 iput(ip); 617 return 0; 618 } 619 return ip; 620 } 621 622 struct inode* 623 namei(char *path) 624 { 625 char name[DIRSIZ]; 626 return namex(path, 0, name); 627 } 628 629 struct inode* 630 nameiparent(char *path, char *name) 631 { 632 return namex(path, 1, name); 633 } 634