1 // File system implementation. Four layers: 2 // + Blocks: allocator for raw disk blocks. 3 // + Files: inode allocator, reading, writing, metadata. 4 // + Directories: inode with special contents (list of other inodes!) 5 // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. 6 // 7 // Disk layout is: superblock, inodes, block in-use bitmap, data blocks. 8 // 9 // This file contains the low-level file system manipulation 10 // routines. The (higher-level) system call implementations 11 // are in sysfile.c. 12 13 #include "types.h" 14 #include "defs.h" 15 #include "param.h" 16 #include "stat.h" 17 #include "mmu.h" 18 #include "proc.h" 19 #include "spinlock.h" 20 #include "buf.h" 21 #include "fs.h" 22 #include "fsvar.h" 23 #include "dev.h" 24 25 #define min(a, b) ((a) < (b) ? (a) : (b)) 26 static void itrunc(struct inode*); 27 28 // Read the super block. 29 static void 30 readsb(int dev, struct superblock *sb) 31 { 32 struct buf *bp; 33 34 bp = bread(dev, 1); 35 memmove(sb, bp->data, sizeof(*sb)); 36 brelse(bp); 37 } 38 39 // Zero a block. 40 static void 41 bzero(int dev, int bno) 42 { 43 struct buf *bp; 44 45 bp = bread(dev, bno); 46 memset(bp->data, 0, BSIZE); 47 bwrite(bp); 48 brelse(bp); 49 } 50 51 // Blocks. 52 53 // Allocate a disk block. 54 static uint 55 balloc(uint dev) 56 { 57 int b, bi, m; 58 struct buf *bp; 59 struct superblock sb; 60 61 bp = 0; 62 readsb(dev, &sb); 63 for(b = 0; b < sb.size; b += BPB){ 64 bp = bread(dev, BBLOCK(b, sb.ninodes)); 65 for(bi = 0; bi < BPB; bi++){ 66 m = 1 << (bi % 8); 67 if((bp->data[bi/8] & m) == 0){ // Is block free? 68 bp->data[bi/8] |= m; // Mark block in use on disk. 69 bwrite(bp); 70 brelse(bp); 71 return b + bi; 72 } 73 } 74 brelse(bp); 75 } 76 panic("balloc: out of blocks"); 77 } 78 79 // Free a disk block. 80 static void 81 bfree(int dev, uint b) 82 { 83 struct buf *bp; 84 struct superblock sb; 85 int bi, m; 86 87 bzero(dev, b); 88 89 readsb(dev, &sb); 90 bp = bread(dev, BBLOCK(b, sb.ninodes)); 91 bi = b % BPB; 92 m = 1 << (bi % 8); 93 if((bp->data[bi/8] & m) == 0) 94 panic("freeing free block"); 95 bp->data[bi/8] &= ~m; // Mark block free on disk. 96 bwrite(bp); 97 brelse(bp); 98 } 99 100 // Inodes. 101 // 102 // An inode is a single, unnamed file in the file system. 103 // The inode disk structure holds metadata (the type, device numbers, 104 // and data size) along with a list of blocks where the associated 105 // data can be found. 106 // 107 // The inodes are laid out sequentially on disk immediately after 108 // the superblock. The kernel keeps a cache of the in-use 109 // on-disk structures to provide a place for synchronizing access 110 // to inodes shared between multiple processes. 111 // 112 // ip->ref counts the number of pointer references to this cached 113 // inode; references are typically kept in struct file and in cp->cwd. 114 // When ip->ref falls to zero, the inode is no longer cached. 115 // It is an error to use an inode without holding a reference to it. 116 // 117 // Processes are only allowed to read and write inode 118 // metadata and contents when holding the inode's lock, 119 // represented by the I_BUSY flag in the in-memory copy. 120 // Because inode locks are held during disk accesses, 121 // they are implemented using a flag rather than with 122 // spin locks. Callers are responsible for locking 123 // inodes before passing them to routines in this file; leaving 124 // this responsibility with the caller makes it possible for them 125 // to create arbitrarily-sized atomic operations. 126 // 127 // To give maximum control over locking to the callers, 128 // the routines in this file that return inode pointers 129 // return pointers to *unlocked* inodes. It is the callers' 130 // responsibility to lock them before using them. A non-zero 131 // ip->ref keeps these unlocked inodes in the cache. 132 133 struct { 134 struct spinlock lock; 135 struct inode inode[NINODE]; 136 } icache; 137 138 void 139 iinit(void) 140 { 141 initlock(&icache.lock, "icache"); 142 } 143 144 // Find the inode with number inum on device dev 145 // and return the in-memory copy. 146 static struct inode* 147 iget(uint dev, uint inum) 148 { 149 struct inode *ip, *empty; 150 151 acquire(&icache.lock); 152 153 // Try for cached inode. 154 empty = 0; 155 for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){ 156 if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){ 157 ip->ref++; 158 release(&icache.lock); 159 return ip; 160 } 161 if(empty == 0 && ip->ref == 0) // Remember empty slot. 162 empty = ip; 163 } 164 165 // Allocate fresh inode. 166 if(empty == 0) 167 panic("iget: no inodes"); 168 169 ip = empty; 170 ip->dev = dev; 171 ip->inum = inum; 172 ip->ref = 1; 173 ip->flags = 0; 174 release(&icache.lock); 175 176 return ip; 177 } 178 179 // Increment reference count for ip. 180 // Returns ip to enable ip = idup(ip1) idiom. 181 struct inode* 182 idup(struct inode *ip) 183 { 184 acquire(&icache.lock); 185 ip->ref++; 186 release(&icache.lock); 187 return ip; 188 } 189 190 // Lock the given inode. 191 void 192 ilock(struct inode *ip) 193 { 194 struct buf *bp; 195 struct dinode *dip; 196 197 if(ip == 0 || ip->ref < 1) 198 panic("ilock"); 199 200 acquire(&icache.lock); 201 while(ip->flags & I_BUSY) 202 sleep(ip, &icache.lock); 203 ip->flags |= I_BUSY; 204 release(&icache.lock); 205 206 if(!(ip->flags & I_VALID)){ 207 bp = bread(ip->dev, IBLOCK(ip->inum)); 208 dip = (struct dinode*)bp->data + ip->inum%IPB; 209 ip->type = dip->type; 210 ip->major = dip->major; 211 ip->minor = dip->minor; 212 ip->nlink = dip->nlink; 213 ip->size = dip->size; 214 memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); 215 brelse(bp); 216 ip->flags |= I_VALID; 217 if(ip->type == 0) 218 panic("ilock: no type"); 219 } 220 } 221 222 // Unlock the given inode. 223 void 224 iunlock(struct inode *ip) 225 { 226 if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1) 227 panic("iunlock"); 228 229 acquire(&icache.lock); 230 ip->flags &= ~I_BUSY; 231 wakeup(ip); 232 release(&icache.lock); 233 } 234 235 // Caller holds reference to unlocked ip. Drop reference. 236 void 237 iput(struct inode *ip) 238 { 239 acquire(&icache.lock); 240 if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){ 241 // inode is no longer used: truncate and free inode. 242 if(ip->flags & I_BUSY) 243 panic("iput busy"); 244 ip->flags |= I_BUSY; 245 release(&icache.lock); 246 itrunc(ip); 247 ip->type = 0; 248 iupdate(ip); 249 acquire(&icache.lock); 250 ip->flags = 0; 251 wakeup(ip); 252 } 253 ip->ref--; 254 release(&icache.lock); 255 } 256 257 // Common idiom: unlock, then put. 258 void 259 iunlockput(struct inode *ip) 260 { 261 iunlock(ip); 262 iput(ip); 263 } 264 265 //PAGEBREAK! 266 // Allocate a new inode with the given type on device dev. 267 struct inode* 268 ialloc(uint dev, short type) 269 { 270 int inum; 271 struct buf *bp; 272 struct dinode *dip; 273 struct superblock sb; 274 275 readsb(dev, &sb); 276 for(inum = 1; inum < sb.ninodes; inum++){ // loop over inode blocks 277 bp = bread(dev, IBLOCK(inum)); 278 dip = (struct dinode*)bp->data + inum%IPB; 279 if(dip->type == 0){ // a free inode 280 memset(dip, 0, sizeof(*dip)); 281 dip->type = type; 282 bwrite(bp); // mark it allocated on the disk 283 brelse(bp); 284 return iget(dev, inum); 285 } 286 brelse(bp); 287 } 288 panic("ialloc: no inodes"); 289 } 290 291 // Copy inode, which has changed, from memory to disk. 292 void 293 iupdate(struct inode *ip) 294 { 295 struct buf *bp; 296 struct dinode *dip; 297 298 bp = bread(ip->dev, IBLOCK(ip->inum)); 299 dip = (struct dinode*)bp->data + ip->inum%IPB; 300 dip->type = ip->type; 301 dip->major = ip->major; 302 dip->minor = ip->minor; 303 dip->nlink = ip->nlink; 304 dip->size = ip->size; 305 memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); 306 bwrite(bp); 307 brelse(bp); 308 } 309 310 //PAGEBREAK! 311 // Inode contents 312 // 313 // The contents (data) associated with each inode is stored 314 // in a sequence of blocks on the disk. The first NDIRECT blocks 315 // are listed in ip->addrs[]. The next NINDIRECT blocks are 316 // listed in the block ip->addrs[INDIRECT]. 317 318 // Return the disk block address of the nth block in inode ip. 319 // If there is no such block, bmap allocates one. 320 static uint 321 bmap(struct inode *ip, uint bn) 322 { 323 uint addr, *a; 324 struct buf *bp; 325 326 if(bn < NDIRECT){ 327 if((addr = ip->addrs[bn]) == 0) 328 ip->addrs[bn] = addr = balloc(ip->dev); 329 return addr; 330 } 331 bn -= NDIRECT; 332 333 if(bn < NINDIRECT){ 334 // Load indirect block, allocating if necessary. 335 if((addr = ip->addrs[NDIRECT]) == 0) 336 ip->addrs[NDIRECT] = addr = balloc(ip->dev); 337 bp = bread(ip->dev, addr); 338 a = (uint*)bp->data; 339 340 if((addr = a[bn]) == 0){ 341 a[bn] = addr = balloc(ip->dev); 342 bwrite(bp); 343 } 344 brelse(bp); 345 return addr; 346 } 347 348 panic("bmap: out of range"); 349 } 350 351 // Truncate inode (discard contents). 352 // Only called after the last dirent referring 353 // to this inode has been erased on disk. 354 static void 355 itrunc(struct inode *ip) 356 { 357 int i, j; 358 struct buf *bp; 359 uint *a; 360 361 for(i = 0; i < NDIRECT; i++){ 362 if(ip->addrs[i]){ 363 bfree(ip->dev, ip->addrs[i]); 364 ip->addrs[i] = 0; 365 } 366 } 367 368 if(ip->addrs[NDIRECT]){ 369 bp = bread(ip->dev, ip->addrs[NDIRECT]); 370 a = (uint*)bp->data; 371 for(j = 0; j < NINDIRECT; j++){ 372 if(a[j]) 373 bfree(ip->dev, a[j]); 374 } 375 brelse(bp); 376 bfree(ip->dev, ip->addrs[NDIRECT]); 377 ip->addrs[NDIRECT] = 0; 378 } 379 380 ip->size = 0; 381 iupdate(ip); 382 } 383 384 // Copy stat information from inode. 385 void 386 stati(struct inode *ip, struct stat *st) 387 { 388 st->dev = ip->dev; 389 st->ino = ip->inum; 390 st->type = ip->type; 391 st->nlink = ip->nlink; 392 st->size = ip->size; 393 } 394 395 //PAGEBREAK! 396 // Read data from inode. 397 int 398 readi(struct inode *ip, char *dst, uint off, uint n) 399 { 400 uint tot, m; 401 struct buf *bp; 402 403 if(ip->type == T_DEV){ 404 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read) 405 return -1; 406 return devsw[ip->major].read(ip, dst, n); 407 } 408 409 if(off > ip->size || off + n < off) 410 return -1; 411 if(off + n > ip->size) 412 n = ip->size - off; 413 414 for(tot=0; tot<n; tot+=m, off+=m, dst+=m){ 415 bp = bread(ip->dev, bmap(ip, off/BSIZE)); 416 m = min(n - tot, BSIZE - off%BSIZE); 417 memmove(dst, bp->data + off%BSIZE, m); 418 brelse(bp); 419 } 420 return n; 421 } 422 423 // PAGEBREAK! 424 // Write data to inode. 425 int 426 writei(struct inode *ip, char *src, uint off, uint n) 427 { 428 uint tot, m; 429 struct buf *bp; 430 431 if(ip->type == T_DEV){ 432 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write) 433 return -1; 434 return devsw[ip->major].write(ip, src, n); 435 } 436 437 if(off > ip->size || off + n < off) 438 return -1; 439 if(off + n > MAXFILE*BSIZE) 440 n = MAXFILE*BSIZE - off; 441 442 for(tot=0; tot<n; tot+=m, off+=m, src+=m){ 443 bp = bread(ip->dev, bmap(ip, off/BSIZE)); 444 m = min(n - tot, BSIZE - off%BSIZE); 445 memmove(bp->data + off%BSIZE, src, m); 446 bwrite(bp); 447 brelse(bp); 448 } 449 450 if(n > 0 && off > ip->size){ 451 ip->size = off; 452 iupdate(ip); 453 } 454 return n; 455 } 456 457 //PAGEBREAK! 458 // Directories 459 460 int 461 namecmp(const char *s, const char *t) 462 { 463 return strncmp(s, t, DIRSIZ); 464 } 465 466 // Look for a directory entry in a directory. 467 // If found, set *poff to byte offset of entry. 468 // Caller must have already locked dp. 469 struct inode* 470 dirlookup(struct inode *dp, char *name, uint *poff) 471 { 472 uint off, inum; 473 struct buf *bp; 474 struct dirent *de; 475 476 if(dp->type != T_DIR) 477 panic("dirlookup not DIR"); 478 479 for(off = 0; off < dp->size; off += BSIZE){ 480 bp = bread(dp->dev, bmap(dp, off / BSIZE)); 481 for(de = (struct dirent*)bp->data; 482 de < (struct dirent*)(bp->data + BSIZE); 483 de++){ 484 if(de->inum == 0) 485 continue; 486 if(namecmp(name, de->name) == 0){ 487 // entry matches path element 488 if(poff) 489 *poff = off + (uchar*)de - bp->data; 490 inum = de->inum; 491 brelse(bp); 492 return iget(dp->dev, inum); 493 } 494 } 495 brelse(bp); 496 } 497 return 0; 498 } 499 500 // Write a new directory entry (name, inum) into the directory dp. 501 int 502 dirlink(struct inode *dp, char *name, uint inum) 503 { 504 int off; 505 struct dirent de; 506 struct inode *ip; 507 508 // Check that name is not present. 509 if((ip = dirlookup(dp, name, 0)) != 0){ 510 iput(ip); 511 return -1; 512 } 513 514 // Look for an empty dirent. 515 for(off = 0; off < dp->size; off += sizeof(de)){ 516 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 517 panic("dirlink read"); 518 if(de.inum == 0) 519 break; 520 } 521 522 strncpy(de.name, name, DIRSIZ); 523 de.inum = inum; 524 if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 525 panic("dirlink"); 526 527 return 0; 528 } 529 530 //PAGEBREAK! 531 // Paths 532 533 // Copy the next path element from path into name. 534 // Return a pointer to the element following the copied one. 535 // The returned path has no leading slashes, 536 // so the caller can check *path=='\0' to see if the name is the last one. 537 // If no name to remove, return 0. 538 // 539 // Examples: 540 // skipelem("a/bb/c", name) = "bb/c", setting name = "a" 541 // skipelem("///a//bb", name) = "bb", setting name = "a" 542 // skipelem("a", name) = "", setting name = "a" 543 // skipelem("", name) = skipelem("////", name) = 0 544 // 545 static char* 546 skipelem(char *path, char *name) 547 { 548 char *s; 549 int len; 550 551 while(*path == '/') 552 path++; 553 if(*path == 0) 554 return 0; 555 s = path; 556 while(*path != '/' && *path != 0) 557 path++; 558 len = path - s; 559 if(len >= DIRSIZ) 560 memmove(name, s, DIRSIZ); 561 else { 562 memmove(name, s, len); 563 name[len] = 0; 564 } 565 while(*path == '/') 566 path++; 567 return path; 568 } 569 570 // Look up and return the inode for a path name. 571 // If parent != 0, return the inode for the parent and copy the final 572 // path element into name, which must have room for DIRSIZ bytes. 573 static struct inode* 574 namex(char *path, int parent, char *name) 575 { 576 struct inode *ip, *next; 577 578 if(*path == '/') 579 ip = iget(ROOTDEV, ROOTINO); 580 else 581 ip = idup(cp->cwd); 582 583 while((path = skipelem(path, name)) != 0){ 584 ilock(ip); 585 if(ip->type != T_DIR){ 586 iunlockput(ip); 587 return 0; 588 } 589 if(parent && *path == '\0'){ 590 // Stop one level early. 591 iunlock(ip); 592 return ip; 593 } 594 if((next = dirlookup(ip, name, 0)) == 0){ 595 iunlockput(ip); 596 return 0; 597 } 598 iunlockput(ip); 599 ip = next; 600 } 601 if(parent){ 602 iput(ip); 603 return 0; 604 } 605 return ip; 606 } 607 608 struct inode* 609 namei(char *path) 610 { 611 char name[DIRSIZ]; 612 return namex(path, 0, name); 613 } 614 615 struct inode* 616 nameiparent(char *path, char *name) 617 { 618 return namex(path, 1, name); 619 } 620