1 // File system implementation. Four layers: 2 // + Blocks: allocator for raw disk blocks. 3 // + Files: inode allocator, reading, writing, metadata. 4 // + Directories: inode with special contents (list of other inodes!) 5 // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. 6 // 7 // Disk layout is: superblock, inodes, block in-use bitmap, data blocks. 8 // 9 // This file contains the low-level file system manipulation 10 // routines. The (higher-level) system call implementations 11 // are in sysfile.c. 12 13 #include "types.h" 14 #include "defs.h" 15 #include "param.h" 16 #include "stat.h" 17 #include "mmu.h" 18 #include "proc.h" 19 #include "spinlock.h" 20 #include "buf.h" 21 #include "fs.h" 22 #include "fsvar.h" 23 #include "dev.h" 24 25 #define min(a, b) ((a) < (b) ? (a) : (b)) 26 static void itrunc(struct inode*); 27 28 // Read the super block. 29 static void 30 readsb(int dev, struct superblock *sb) 31 { 32 struct buf *bp; 33 34 bp = bread(dev, 1); 35 memmove(sb, bp->data, sizeof(*sb)); 36 brelse(bp); 37 } 38 39 // Zero a block. 40 static void 41 bzero(int dev, int bno) 42 { 43 struct buf *bp; 44 45 bp = bread(dev, bno); 46 memset(bp->data, 0, BSIZE); 47 bwrite(bp); 48 brelse(bp); 49 } 50 51 // Blocks. 52 53 // Allocate a disk block. 54 static uint 55 balloc(uint dev) 56 { 57 int b, bi, m; 58 struct buf *bp; 59 struct superblock sb; 60 61 bp = 0; 62 readsb(dev, &sb); 63 for(b = 0; b < sb.size; b += BPB){ 64 bp = bread(dev, BBLOCK(b, sb.ninodes)); 65 for(bi = 0; bi < BPB; bi++){ 66 m = 1 << (bi % 8); 67 if((bp->data[bi/8] & m) == 0){ // Is block free? 68 bp->data[bi/8] |= m; // Mark block in use on disk. 69 bwrite(bp); 70 brelse(bp); 71 return b + bi; 72 } 73 } 74 brelse(bp); 75 } 76 panic("balloc: out of blocks"); 77 } 78 79 // Free a disk block. 80 static void 81 bfree(int dev, uint b) 82 { 83 struct buf *bp; 84 struct superblock sb; 85 int bi, m; 86 87 bzero(dev, b); 88 89 readsb(dev, &sb); 90 bp = bread(dev, BBLOCK(b, sb.ninodes)); 91 bi = b % BPB; 92 m = 1 << (bi % 8); 93 if((bp->data[bi/8] & m) == 0) 94 panic("freeing free block"); 95 bp->data[bi/8] &= ~m; // Mark block free on disk. 96 bwrite(bp); 97 brelse(bp); 98 } 99 100 // Inodes. 101 // 102 // An inode is a single, unnamed file in the file system. 103 // The inode disk structure holds metadata (the type, device numbers, 104 // and data size) along with a list of blocks where the associated 105 // data can be found. 106 // 107 // The inodes are laid out sequentially on disk immediately after 108 // the superblock. The kernel keeps a cache of the in-use 109 // on-disk structures to provide a place for synchronizing access 110 // to inodes shared between multiple processes. 111 // 112 // ip->ref counts the number of pointer references to this cached 113 // inode; references are typically kept in struct file and in cp->cwd. 114 // When ip->ref falls to zero, the inode is no longer cached. 115 // It is an error to use an inode without holding a reference to it. 116 // 117 // Processes are only allowed to read and write inode 118 // metadata and contents when holding the inode's lock, 119 // represented by the I_BUSY flag in the in-memory copy. 120 // Because inode locks are held during disk accesses, 121 // they are implemented using a flag rather than with 122 // spin locks. Callers are responsible for locking 123 // inodes before passing them to routines in this file; leaving 124 // this responsibility with the caller makes it possible for them 125 // to create arbitrarily-sized atomic operations. 126 // 127 // To give maximum control over locking to the callers, 128 // the routines in this file that return inode pointers 129 // return pointers to *unlocked* inodes. It is the callers' 130 // responsibility to lock them before using them. A non-zero 131 // ip->ref keeps these unlocked inodes in the cache. 132 133 struct { 134 struct spinlock lock; 135 struct inode inode[NINODE]; 136 } icache; 137 138 void 139 iinit(void) 140 { 141 initlock(&icache.lock, "icache.lock"); 142 } 143 144 // Find the inode with number inum on device dev 145 // and return the in-memory copy. 146 static struct inode* 147 iget(uint dev, uint inum) 148 { 149 struct inode *ip, *empty; 150 151 acquire(&icache.lock); 152 153 // Try for cached inode. 154 empty = 0; 155 for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){ 156 if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){ 157 ip->ref++; 158 release(&icache.lock); 159 return ip; 160 } 161 if(empty == 0 && ip->ref == 0) // Remember empty slot. 162 empty = ip; 163 } 164 165 // Allocate fresh inode. 166 if(empty == 0) 167 panic("iget: no inodes"); 168 169 ip = empty; 170 ip->dev = dev; 171 ip->inum = inum; 172 ip->ref = 1; 173 ip->flags = 0; 174 release(&icache.lock); 175 176 return ip; 177 } 178 179 // Increment reference count for ip. 180 // Returns ip to enable ip = idup(ip1) idiom. 181 struct inode* 182 idup(struct inode *ip) 183 { 184 acquire(&icache.lock); 185 ip->ref++; 186 release(&icache.lock); 187 return ip; 188 } 189 190 // Lock the given inode. 191 void 192 ilock(struct inode *ip) 193 { 194 struct buf *bp; 195 struct dinode *dip; 196 197 if(ip == 0 || ip->ref < 1) 198 panic("ilock"); 199 200 acquire(&icache.lock); 201 while(ip->flags & I_BUSY) 202 sleep(ip, &icache.lock); 203 ip->flags |= I_BUSY; 204 release(&icache.lock); 205 206 if(!(ip->flags & I_VALID)){ 207 bp = bread(ip->dev, IBLOCK(ip->inum)); 208 dip = (struct dinode*)bp->data + ip->inum%IPB; 209 ip->type = dip->type; 210 ip->major = dip->major; 211 ip->minor = dip->minor; 212 ip->nlink = dip->nlink; 213 ip->size = dip->size; 214 memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); 215 brelse(bp); 216 ip->flags |= I_VALID; 217 if(ip->type == 0) 218 panic("ilock: no type"); 219 } 220 } 221 222 // Unlock the given inode. 223 void 224 iunlock(struct inode *ip) 225 { 226 if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1) 227 panic("iunlock"); 228 229 acquire(&icache.lock); 230 ip->flags &= ~I_BUSY; 231 wakeup(ip); 232 release(&icache.lock); 233 } 234 235 // Caller holds reference to unlocked ip. Drop reference. 236 void 237 iput(struct inode *ip) 238 { 239 acquire(&icache.lock); 240 if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){ 241 // inode is no longer used: truncate and free inode. 242 if(ip->flags & I_BUSY) 243 panic("iput busy"); 244 ip->flags |= I_BUSY; 245 release(&icache.lock); 246 itrunc(ip); 247 ip->type = 0; 248 iupdate(ip); 249 acquire(&icache.lock); 250 ip->flags &= ~I_BUSY; 251 wakeup(ip); 252 } 253 ip->ref--; 254 release(&icache.lock); 255 } 256 257 // Common idiom: unlock, then put. 258 void 259 iunlockput(struct inode *ip) 260 { 261 iunlock(ip); 262 iput(ip); 263 } 264 265 //PAGEBREAK! 266 // Allocate a new inode with the given type on device dev. 267 struct inode* 268 ialloc(uint dev, short type) 269 { 270 int inum; 271 struct buf *bp; 272 struct dinode *dip; 273 struct superblock sb; 274 275 readsb(dev, &sb); 276 for(inum = 1; inum < sb.ninodes; inum++){ // loop over inode blocks 277 bp = bread(dev, IBLOCK(inum)); 278 dip = (struct dinode*)bp->data + inum%IPB; 279 if(dip->type == 0){ // a free inode 280 memset(dip, 0, sizeof(*dip)); 281 dip->type = type; 282 bwrite(bp); // mark it allocated on the disk 283 brelse(bp); 284 return iget(dev, inum); 285 } 286 brelse(bp); 287 } 288 panic("ialloc: no inodes"); 289 } 290 291 // Copy inode, which has changed, from memory to disk. 292 void 293 iupdate(struct inode *ip) 294 { 295 struct buf *bp; 296 struct dinode *dip; 297 298 bp = bread(ip->dev, IBLOCK(ip->inum)); 299 dip = (struct dinode*)bp->data + ip->inum%IPB; 300 dip->type = ip->type; 301 dip->major = ip->major; 302 dip->minor = ip->minor; 303 dip->nlink = ip->nlink; 304 dip->size = ip->size; 305 memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); 306 bwrite(bp); 307 brelse(bp); 308 } 309 310 //PAGEBREAK! 311 // Inode contents 312 // 313 // The contents (data) associated with each inode is stored 314 // in a sequence of blocks on the disk. The first NDIRECT blocks 315 // are listed in ip->addrs[]. The next NINDIRECT blocks are 316 // listed in the block ip->addrs[INDIRECT]. 317 318 // Return the disk block address of the nth block in inode ip. 319 // If there is no such block, alloc controls whether one is allocated. 320 static uint 321 bmap(struct inode *ip, uint bn, int alloc) 322 { 323 uint addr, *a; 324 struct buf *bp; 325 326 if(bn < NDIRECT){ 327 if((addr = ip->addrs[bn]) == 0){ 328 if(!alloc) 329 return -1; 330 ip->addrs[bn] = addr = balloc(ip->dev); 331 } 332 return addr; 333 } 334 bn -= NDIRECT; 335 336 if(bn < NINDIRECT){ 337 // Load indirect block, allocating if necessary. 338 if((addr = ip->addrs[INDIRECT]) == 0){ 339 if(!alloc) 340 return -1; 341 ip->addrs[INDIRECT] = addr = balloc(ip->dev); 342 } 343 bp = bread(ip->dev, addr); 344 a = (uint*)bp->data; 345 346 if((addr = a[bn]) == 0){ 347 if(!alloc){ 348 brelse(bp); 349 return -1; 350 } 351 a[bn] = addr = balloc(ip->dev); 352 bwrite(bp); 353 } 354 brelse(bp); 355 return addr; 356 } 357 358 panic("bmap: out of range"); 359 } 360 361 // Truncate inode (discard contents). 362 static void 363 itrunc(struct inode *ip) 364 { 365 int i, j; 366 struct buf *bp; 367 uint *a; 368 369 for(i = 0; i < NDIRECT; i++){ 370 if(ip->addrs[i]){ 371 bfree(ip->dev, ip->addrs[i]); 372 ip->addrs[i] = 0; 373 } 374 } 375 376 if(ip->addrs[INDIRECT]){ 377 bp = bread(ip->dev, ip->addrs[INDIRECT]); 378 a = (uint*)bp->data; 379 for(j = 0; j < NINDIRECT; j++){ 380 if(a[j]) 381 bfree(ip->dev, a[j]); 382 } 383 brelse(bp); 384 ip->addrs[INDIRECT] = 0; 385 } 386 387 ip->size = 0; 388 iupdate(ip); 389 } 390 391 // Copy stat information from inode. 392 void 393 stati(struct inode *ip, struct stat *st) 394 { 395 st->dev = ip->dev; 396 st->ino = ip->inum; 397 st->type = ip->type; 398 st->nlink = ip->nlink; 399 st->size = ip->size; 400 } 401 402 //PAGEBREAK! 403 // Read data from inode. 404 int 405 readi(struct inode *ip, char *dst, uint off, uint n) 406 { 407 uint tot, m; 408 struct buf *bp; 409 410 if(ip->type == T_DEV){ 411 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read) 412 return -1; 413 return devsw[ip->major].read(ip, dst, n); 414 } 415 416 if(off > ip->size || off + n < off) 417 return -1; 418 if(off + n > ip->size) 419 n = ip->size - off; 420 421 for(tot=0; tot<n; tot+=m, off+=m, dst+=m){ 422 bp = bread(ip->dev, bmap(ip, off/BSIZE, 0)); 423 m = min(n - tot, BSIZE - off%BSIZE); 424 memmove(dst, bp->data + off%BSIZE, m); 425 brelse(bp); 426 } 427 return n; 428 } 429 430 // PAGEBREAK! 431 // Write data to inode. 432 int 433 writei(struct inode *ip, char *src, uint off, uint n) 434 { 435 uint tot, m; 436 struct buf *bp; 437 438 if(ip->type == T_DEV){ 439 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write) 440 return -1; 441 return devsw[ip->major].write(ip, src, n); 442 } 443 444 if(off + n < off) 445 return -1; 446 if(off + n > MAXFILE*BSIZE) 447 n = MAXFILE*BSIZE - off; 448 449 for(tot=0; tot<n; tot+=m, off+=m, src+=m){ 450 bp = bread(ip->dev, bmap(ip, off/BSIZE, 1)); 451 m = min(n - tot, BSIZE - off%BSIZE); 452 memmove(bp->data + off%BSIZE, src, m); 453 bwrite(bp); 454 brelse(bp); 455 } 456 457 if(n > 0 && off > ip->size){ 458 ip->size = off; 459 iupdate(ip); 460 } 461 return n; 462 } 463 464 //PAGEBREAK! 465 // Directories 466 467 int 468 namecmp(const char *s, const char *t) 469 { 470 return strncmp(s, t, DIRSIZ); 471 } 472 473 // Look for a directory entry in a directory. 474 // If found, set *poff to byte offset of entry. 475 // Caller must have already locked dp. 476 struct inode* 477 dirlookup(struct inode *dp, char *name, uint *poff) 478 { 479 uint off, inum; 480 struct buf *bp; 481 struct dirent *de; 482 483 if(dp->type != T_DIR) 484 panic("dirlookup not DIR"); 485 486 for(off = 0; off < dp->size; off += BSIZE){ 487 bp = bread(dp->dev, bmap(dp, off / BSIZE, 0)); 488 for(de = (struct dirent*)bp->data; 489 de < (struct dirent*)(bp->data + BSIZE); 490 de++){ 491 if(de->inum == 0) 492 continue; 493 if(namecmp(name, de->name) == 0){ 494 // entry matches path element 495 if(poff) 496 *poff = off + (uchar*)de - bp->data; 497 inum = de->inum; 498 brelse(bp); 499 return iget(dp->dev, inum); 500 } 501 } 502 brelse(bp); 503 } 504 return 0; 505 } 506 507 // Write a new directory entry (name, ino) into the directory dp. 508 int 509 dirlink(struct inode *dp, char *name, uint ino) 510 { 511 int off; 512 struct dirent de; 513 struct inode *ip; 514 515 // Check that name is not present. 516 if((ip = dirlookup(dp, name, 0)) != 0){ 517 iput(ip); 518 return -1; 519 } 520 521 // Look for an empty dirent. 522 for(off = 0; off < dp->size; off += sizeof(de)){ 523 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 524 panic("dirlink read"); 525 if(de.inum == 0) 526 break; 527 } 528 529 strncpy(de.name, name, DIRSIZ); 530 de.inum = ino; 531 if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 532 panic("dirlink"); 533 534 return 0; 535 } 536 537 //PAGEBREAK! 538 // Paths 539 540 // Copy the next path element from path into name. 541 // Return a pointer to the element following the copied one. 542 // The returned path has no leading slashes, 543 // so the caller can check *path=='\0' to see if the name is the last one. 544 // If no name to remove, return 0. 545 // 546 // Examples: 547 // skipelem("a/bb/c", name) = "bb/c", setting name = "a" 548 // skipelem("///a//bb", name) = "bb", setting name = "a" 549 // skipelem("", name) = skipelem("////", name) = 0 550 // 551 static char* 552 skipelem(char *path, char *name) 553 { 554 char *s; 555 int len; 556 557 while(*path == '/') 558 path++; 559 if(*path == 0) 560 return 0; 561 s = path; 562 while(*path != '/' && *path != 0) 563 path++; 564 len = path - s; 565 if(len >= DIRSIZ) 566 memmove(name, s, DIRSIZ); 567 else { 568 memmove(name, s, len); 569 name[len] = 0; 570 } 571 while(*path == '/') 572 path++; 573 return path; 574 } 575 576 // Look up and return the inode for a path name. 577 // If parent != 0, return the inode for the parent and copy the final 578 // path element into name, which must have room for DIRSIZ bytes. 579 static struct inode* 580 _namei(char *path, int parent, char *name) 581 { 582 struct inode *ip, *next; 583 584 if(*path == '/') 585 ip = iget(ROOTDEV, 1); 586 else 587 ip = idup(cp->cwd); 588 589 while((path = skipelem(path, name)) != 0){ 590 ilock(ip); 591 if(ip->type != T_DIR){ 592 iunlockput(ip); 593 return 0; 594 } 595 if(parent && *path == '\0'){ 596 // Stop one level early. 597 iunlock(ip); 598 return ip; 599 } 600 if((next = dirlookup(ip, name, 0)) == 0){ 601 iunlockput(ip); 602 return 0; 603 } 604 iunlockput(ip); 605 ip = next; 606 } 607 if(parent){ 608 iput(ip); 609 return 0; 610 } 611 return ip; 612 } 613 614 struct inode* 615 namei(char *path) 616 { 617 char name[DIRSIZ]; 618 return _namei(path, 0, name); 619 } 620 621 struct inode* 622 nameiparent(char *path, char *name) 623 { 624 return _namei(path, 1, name); 625 } 626