1 // File system implementation. Four layers: 2 // + Blocks: allocator for raw disk blocks. 3 // + Files: inode allocator, reading, writing, metadata. 4 // + Directories: inode with special contents (list of other inodes!) 5 // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. 6 // 7 // Disk layout is: superblock, inodes, block in-use bitmap, data blocks. 8 // 9 // This file contains the low-level file system manipulation 10 // routines. The (higher-level) system call implementations 11 // are in sysfile.c. 12 13 #include "types.h" 14 #include "defs.h" 15 #include "param.h" 16 #include "stat.h" 17 #include "mmu.h" 18 #include "proc.h" 19 #include "spinlock.h" 20 #include "buf.h" 21 #include "fs.h" 22 #include "fsvar.h" 23 #include "dev.h" 24 25 #define min(a, b) ((a) < (b) ? (a) : (b)) 26 static void itrunc(struct inode*); 27 28 // Read the super block. 29 static void 30 readsb(int dev, struct superblock *sb) 31 { 32 struct buf *bp; 33 34 bp = bread(dev, 1); 35 memmove(sb, bp->data, sizeof(*sb)); 36 brelse(bp); 37 } 38 39 // Zero a block. 40 static void 41 bzero(int dev, int bno) 42 { 43 struct buf *bp; 44 45 bp = bread(dev, bno); 46 memset(bp->data, 0, BSIZE); 47 bwrite(bp); 48 brelse(bp); 49 } 50 51 // Blocks. 52 53 // Allocate a disk block. 54 static uint 55 balloc(uint dev) 56 { 57 int b, bi, m; 58 struct buf *bp; 59 struct superblock sb; 60 61 bp = 0; 62 readsb(dev, &sb); 63 for(b = 0; b < sb.size; b += BPB){ 64 bp = bread(dev, BBLOCK(b, sb.ninodes)); 65 for(bi = 0; bi < BPB; bi++){ 66 m = 1 << (bi % 8); 67 if((bp->data[bi/8] & m) == 0){ // Is block free? 68 bp->data[bi/8] |= m; // Mark block in use on disk. 69 bwrite(bp); 70 brelse(bp); 71 return b + bi; 72 } 73 } 74 brelse(bp); 75 } 76 panic("balloc: out of blocks"); 77 } 78 79 // Free a disk block. 80 static void 81 bfree(int dev, uint b) 82 { 83 struct buf *bp; 84 struct superblock sb; 85 int bi, m; 86 87 bzero(dev, b); 88 89 readsb(dev, &sb); 90 bp = bread(dev, BBLOCK(b, sb.ninodes)); 91 bi = b % BPB; 92 m = 1 << (bi % 8); 93 if((bp->data[bi/8] & m) == 0) 94 panic("freeing free block"); 95 bp->data[bi/8] &= ~m; // Mark block free on disk. 96 bwrite(bp); 97 brelse(bp); 98 } 99 100 // Inodes. 101 // 102 // An inode is a single, unnamed file in the file system. 103 // The inode disk structure holds metadata (the type, device numbers, 104 // and data size) along with a list of blocks where the associated 105 // data can be found. 106 // 107 // The inodes are laid out sequentially on disk immediately after 108 // the superblock. The kernel keeps a cache of the in-use 109 // on-disk structures to provide a place for synchronizing access 110 // to inodes shared between multiple processes. 111 // 112 // ip->ref counts the number of pointer references to this cached 113 // inode; references are typically kept in struct file and in cp->cwd. 114 // When ip->ref falls to zero, the inode is no longer cached. 115 // It is an error to use an inode without holding a reference to it. 116 // 117 // Processes are only allowed to read and write inode 118 // metadata and contents when holding the inode's lock, 119 // represented by the I_BUSY flag in the in-memory copy. 120 // Because inode locks are held during disk accesses, 121 // they are implemented using a flag rather than with 122 // spin locks. Callers are responsible for locking 123 // inodes before passing them to routines in this file; leaving 124 // this responsibility with the caller makes it possible for them 125 // to create arbitrarily-sized atomic operations. 126 // 127 // To give maximum control over locking to the callers, 128 // the routines in this file that return inode pointers 129 // return pointers to *unlocked* inodes. It is the callers' 130 // responsibility to lock them before using them. A non-zero 131 // ip->ref keeps these unlocked inodes in the cache. 132 133 struct { 134 struct spinlock lock; 135 struct inode inode[NINODE]; 136 } icache; 137 138 void 139 iinit(void) 140 { 141 initlock(&icache.lock, "icache.lock"); 142 } 143 144 // Find the inode with number inum on device dev 145 // and return the in-memory copy. 146 static struct inode* 147 iget(uint dev, uint inum) 148 { 149 struct inode *ip, *empty; 150 151 acquire(&icache.lock); 152 153 // Try for cached inode. 154 empty = 0; 155 for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){ 156 if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){ 157 ip->ref++; 158 release(&icache.lock); 159 return ip; 160 } 161 if(empty == 0 && ip->ref == 0) // Remember empty slot. 162 empty = ip; 163 } 164 165 // Allocate fresh inode. 166 if(empty == 0) 167 panic("iget: no inodes"); 168 169 ip = empty; 170 ip->dev = dev; 171 ip->inum = inum; 172 ip->ref = 1; 173 ip->flags = 0; 174 release(&icache.lock); 175 176 return ip; 177 } 178 179 // Increment reference count for ip. 180 // Returns ip to enable ip = idup(ip1) idiom. 181 struct inode* 182 idup(struct inode *ip) 183 { 184 acquire(&icache.lock); 185 ip->ref++; 186 release(&icache.lock); 187 return ip; 188 } 189 190 // Lock the given inode. 191 void 192 ilock(struct inode *ip) 193 { 194 struct buf *bp; 195 struct dinode *dip; 196 197 if(ip == 0 || ip->ref < 1) 198 panic("ilock"); 199 200 acquire(&icache.lock); 201 while(ip->flags & I_BUSY) 202 sleep(ip, &icache.lock); 203 ip->flags |= I_BUSY; 204 release(&icache.lock); 205 206 if(!(ip->flags & I_VALID)){ 207 bp = bread(ip->dev, IBLOCK(ip->inum)); 208 dip = (struct dinode*)bp->data + ip->inum%IPB; 209 ip->type = dip->type; 210 ip->major = dip->major; 211 ip->minor = dip->minor; 212 ip->nlink = dip->nlink; 213 ip->size = dip->size; 214 memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); 215 brelse(bp); 216 ip->flags |= I_VALID; 217 if(ip->type == 0) 218 panic("ilock: no type"); 219 } 220 } 221 222 // Unlock the given inode. 223 void 224 iunlock(struct inode *ip) 225 { 226 if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1) 227 panic("iunlock"); 228 229 acquire(&icache.lock); 230 ip->flags &= ~I_BUSY; 231 wakeup(ip); 232 release(&icache.lock); 233 } 234 235 // Caller holds reference to unlocked ip. Drop reference. 236 void 237 iput(struct inode *ip) 238 { 239 acquire(&icache.lock); 240 if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0) { 241 // inode is no longer used: truncate and free inode. 242 if(ip->flags & I_BUSY) 243 panic("iput busy"); 244 ip->flags |= I_BUSY; 245 release(&icache.lock); 246 itrunc(ip); 247 ip->type = 0; 248 iupdate(ip); 249 acquire(&icache.lock); 250 ip->flags &= ~I_BUSY; 251 wakeup(ip); 252 } 253 ip->ref--; 254 release(&icache.lock); 255 } 256 257 // Common idiom: unlock, then put. 258 void 259 iunlockput(struct inode *ip) 260 { 261 iunlock(ip); 262 iput(ip); 263 } 264 265 //PAGEBREAK! 266 // Allocate a new inode with the given type on device dev. 267 struct inode* 268 ialloc(uint dev, short type) 269 { 270 int inum; 271 struct buf *bp; 272 struct dinode *dip; 273 struct superblock sb; 274 275 readsb(dev, &sb); 276 for(inum = 1; inum < sb.ninodes; inum++) { // loop over inode blocks 277 bp = bread(dev, IBLOCK(inum)); 278 dip = (struct dinode*)bp->data + inum%IPB; 279 if(dip->type == 0) { // a free inode 280 memset(dip, 0, sizeof(*dip)); 281 dip->type = type; 282 bwrite(bp); // mark it allocated on the disk 283 brelse(bp); 284 return iget(dev, inum); 285 } 286 brelse(bp); 287 } 288 panic("ialloc: no inodes"); 289 } 290 291 // Copy inode, which has changed, from memory to disk. 292 void 293 iupdate(struct inode *ip) 294 { 295 struct buf *bp; 296 struct dinode *dip; 297 298 bp = bread(ip->dev, IBLOCK(ip->inum)); 299 dip = (struct dinode*)bp->data + ip->inum%IPB; 300 dip->type = ip->type; 301 dip->major = ip->major; 302 dip->minor = ip->minor; 303 dip->nlink = ip->nlink; 304 dip->size = ip->size; 305 memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); 306 bwrite(bp); 307 brelse(bp); 308 } 309 310 //PAGEBREAK! 311 // Inode contents 312 // 313 // The contents (data) associated with each inode is stored 314 // in a sequence of blocks on the disk. The first NDIRECT blocks 315 // are listed in ip->addrs[]. The next NINDIRECT blocks are 316 // listed in the block ip->addrs[INDIRECT]. 317 318 // Return the disk block address of the nth block in inode ip. 319 // If there is no such block, alloc controls whether one is allocated. 320 static uint 321 bmap(struct inode *ip, uint bn, int alloc) 322 { 323 uint addr, *a; 324 struct buf *bp; 325 326 if(bn < NDIRECT) { 327 if((addr = ip->addrs[bn]) == 0) { 328 if(!alloc) 329 return -1; 330 ip->addrs[bn] = addr = balloc(ip->dev); 331 } 332 return addr; 333 } 334 bn -= NDIRECT; 335 336 if(bn < NINDIRECT) { 337 // Load indirect block, allocating if necessary. 338 if((addr = ip->addrs[INDIRECT]) == 0) { 339 if(!alloc) 340 return -1; 341 ip->addrs[INDIRECT] = addr = balloc(ip->dev); 342 } 343 bp = bread(ip->dev, addr); 344 a = (uint*)bp->data; 345 346 if((addr = a[bn]) == 0) { 347 if(!alloc) { 348 brelse(bp); 349 return -1; 350 } 351 a[bn] = addr = balloc(ip->dev); 352 bwrite(bp); 353 } 354 brelse(bp); 355 return addr; 356 } 357 358 panic("bmap: out of range"); 359 } 360 361 // PAGEBREAK: 30 362 // Truncate inode (discard contents). 363 static void 364 itrunc(struct inode *ip) 365 { 366 int i, j; 367 struct buf *bp; 368 uint *a; 369 370 for(i = 0; i < NDIRECT; i++) { 371 if(ip->addrs[i]) { 372 bfree(ip->dev, ip->addrs[i]); 373 ip->addrs[i] = 0; 374 } 375 } 376 377 if(ip->addrs[INDIRECT]) { 378 bp = bread(ip->dev, ip->addrs[INDIRECT]); 379 a = (uint*)bp->data; 380 for(j = 0; j < NINDIRECT; j++) { 381 if(a[j]) 382 bfree(ip->dev, a[j]); 383 } 384 brelse(bp); 385 ip->addrs[INDIRECT] = 0; 386 } 387 388 ip->size = 0; 389 iupdate(ip); 390 } 391 392 // Copy stat information from inode. 393 void 394 stati(struct inode *ip, struct stat *st) 395 { 396 st->dev = ip->dev; 397 st->ino = ip->inum; 398 st->type = ip->type; 399 st->nlink = ip->nlink; 400 st->size = ip->size; 401 } 402 403 //PAGEBREAK! 404 // Read data from inode. 405 int 406 readi(struct inode *ip, char *dst, uint off, uint n) 407 { 408 uint tot, m; 409 struct buf *bp; 410 411 if(ip->type == T_DEV) { 412 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read) 413 return -1; 414 return devsw[ip->major].read(ip->minor, dst, n); 415 } 416 417 if(off > ip->size || off + n < off) 418 return -1; 419 if(off + n > ip->size) 420 n = ip->size - off; 421 422 for(tot=0; tot<n; tot+=m, off+=m, dst+=m) { 423 bp = bread(ip->dev, bmap(ip, off/BSIZE, 0)); 424 m = min(n - tot, BSIZE - off%BSIZE); 425 memmove(dst, bp->data + off%BSIZE, m); 426 brelse(bp); 427 } 428 return n; 429 } 430 431 // PAGEBREAK! 432 // Write data to inode. 433 int 434 writei(struct inode *ip, char *src, uint off, uint n) 435 { 436 uint tot, m; 437 struct buf *bp; 438 439 if(ip->type == T_DEV) { 440 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write) 441 return -1; 442 return devsw[ip->major].write(ip->minor, src, n); 443 } 444 445 if(off + n < off) 446 return -1; 447 if(off + n > MAXFILE*BSIZE) 448 n = MAXFILE*BSIZE - off; 449 450 for(tot=0; tot<n; tot+=m, off+=m, src+=m) { 451 bp = bread(ip->dev, bmap(ip, off/BSIZE, 1)); 452 m = min(n - tot, BSIZE - off%BSIZE); 453 memmove(bp->data + off%BSIZE, src, m); 454 bwrite(bp); 455 brelse(bp); 456 } 457 458 if(n > 0 && off > ip->size) { 459 ip->size = off; 460 iupdate(ip); 461 } 462 return n; 463 } 464 465 //PAGEBREAK! 466 // Directories 467 468 int 469 namecmp(const char *s, const char *t) 470 { 471 return strncmp(s, t, DIRSIZ); 472 } 473 474 // Look for a directory entry in a directory. 475 // If found, set *poff to byte offset of entry. 476 // Caller must have already locked dp. 477 struct inode* 478 dirlookup(struct inode *dp, char *name, uint *poff) 479 { 480 uint off, inum; 481 struct buf *bp; 482 struct dirent *de; 483 484 if(dp->type != T_DIR) 485 panic("dirlookup not DIR"); 486 487 for(off = 0; off < dp->size; off += BSIZE){ 488 bp = bread(dp->dev, bmap(dp, off / BSIZE, 0)); 489 for(de = (struct dirent*)bp->data; 490 de < (struct dirent*)(bp->data + BSIZE); 491 de++){ 492 if(de->inum == 0) 493 continue; 494 if(namecmp(name, de->name) == 0){ 495 // entry matches path element 496 if(poff) 497 *poff = off + (uchar*)de - bp->data; 498 inum = de->inum; 499 brelse(bp); 500 return iget(dp->dev, inum); 501 } 502 } 503 brelse(bp); 504 } 505 return 0; 506 } 507 508 // Write a new directory entry (name, ino) into the directory dp. 509 int 510 dirlink(struct inode *dp, char *name, uint ino) 511 { 512 int off; 513 struct dirent de; 514 struct inode *ip; 515 516 // Check that name is not present. 517 if((ip = dirlookup(dp, name, 0)) != 0){ 518 iput(ip); 519 return -1; 520 } 521 522 // Look for an empty dirent. 523 for(off = 0; off < dp->size; off += sizeof(de)){ 524 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 525 panic("dirlink read"); 526 if(de.inum == 0) 527 break; 528 } 529 530 strncpy(de.name, name, DIRSIZ); 531 de.inum = ino; 532 if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 533 panic("dirlink"); 534 535 return 0; 536 } 537 538 //PAGEBREAK! 539 // Paths 540 541 // Copy the next path element from path into name. 542 // Return a pointer to the element following the copied one. 543 // The returned path has no leading slashes, 544 // so the caller can check *path=='\0' to see if the name is the last one. 545 // If no name to remove, return 0. 546 // 547 // Examples: 548 // skipelem("a/bb/c", name) = "bb/c", setting name = "a" 549 // skipelem("///a//bb", name) = "bb", setting name = "a" 550 // skipelem("", name) = skipelem("////", name) = 0 551 // 552 static char* 553 skipelem(char *path, char *name) 554 { 555 char *s; 556 int len; 557 558 while(*path == '/') 559 path++; 560 if(*path == 0) 561 return 0; 562 s = path; 563 while(*path != '/' && *path != 0) 564 path++; 565 len = path - s; 566 if(len >= DIRSIZ) 567 memmove(name, s, DIRSIZ); 568 else{ 569 memmove(name, s, len); 570 name[len] = 0; 571 } 572 while(*path == '/') 573 path++; 574 return path; 575 } 576 577 // Look up and return the inode for a path name. 578 // If parent is set, return the inode for the parent 579 // and write the final path element to name, which 580 // should have room for DIRSIZ bytes. 581 static struct inode* 582 _namei(char *path, int parent, char *name) 583 { 584 struct inode *ip, *next; 585 586 if(*path == '/') 587 ip = iget(ROOTDEV, 1); 588 else 589 ip = idup(cp->cwd); 590 591 while((path = skipelem(path, name)) != 0){ 592 ilock(ip); 593 if(ip->type != T_DIR){ 594 iunlockput(ip); 595 return 0; 596 } 597 if(parent && *path == '\0'){ 598 // Stop one level early. 599 iunlock(ip); 600 return ip; 601 } 602 if((next = dirlookup(ip, name, 0)) == 0){ 603 iunlockput(ip); 604 return 0; 605 } 606 iunlockput(ip); 607 ip = next; 608 } 609 if(parent){ 610 iput(ip); 611 return 0; 612 } 613 return ip; 614 } 615 616 struct inode* 617 namei(char *path) 618 { 619 char name[DIRSIZ]; 620 return _namei(path, 0, name); 621 } 622 struct inode* 623 nameiparent(char *path, char *name) 624 { 625 return _namei(path, 1, name); 626 } 627