1 // File system implementation. Four layers: 2 // + Blocks: allocator for raw disk blocks. 3 // + Files: inode allocator, reading, writing, metadata. 4 // + Directories: inode with special contents (list of other inodes!) 5 // + Names: paths like /usr/rtm/xv6/fs.c for convenient naming. 6 // 7 // Disk layout is: superblock, inodes, block in-use bitmap, data blocks. 8 // 9 // This file contains the low-level file system manipulation 10 // routines. The (higher-level) system call implementations 11 // are in sysfile.c. 12 13 #include "types.h" 14 #include "defs.h" 15 #include "param.h" 16 #include "stat.h" 17 #include "mmu.h" 18 #include "proc.h" 19 #include "spinlock.h" 20 #include "buf.h" 21 #include "fs.h" 22 #include "file.h" 23 24 #define min(a, b) ((a) < (b) ? (a) : (b)) 25 static void itrunc(struct inode*); 26 27 // Read the super block. 28 void 29 readsb(int dev, struct superblock *sb) 30 { 31 struct buf *bp; 32 33 bp = bread(dev, 1); 34 memmove(sb, bp->data, sizeof(*sb)); 35 brelse(bp); 36 } 37 38 // Zero a block. 39 static void 40 bzero(int dev, int bno) 41 { 42 struct buf *bp; 43 44 bp = bread(dev, bno); 45 memset(bp->data, 0, BSIZE); 46 log_write(bp); 47 brelse(bp); 48 } 49 50 // Blocks. 51 52 // Allocate a zeroed disk block. 53 static uint 54 balloc(uint dev) 55 { 56 int b, bi, m; 57 struct buf *bp; 58 struct superblock sb; 59 60 bp = 0; 61 readsb(dev, &sb); 62 for(b = 0; b < sb.size; b += BPB){ 63 bp = bread(dev, BBLOCK(b, sb.ninodes)); 64 for(bi = 0; bi < BPB && bi < (sb.size - b); bi++){ 65 m = 1 << (bi % 8); 66 if((bp->data[bi/8] & m) == 0){ // Is block free? 67 bp->data[bi/8] |= m; // Mark block in use on disk. 68 log_write(bp); 69 brelse(bp); 70 bzero(dev, b + bi); 71 return b + bi; 72 } 73 } 74 brelse(bp); 75 } 76 panic("balloc: out of blocks"); 77 } 78 79 // Free a disk block. 80 static void 81 bfree(int dev, uint b) 82 { 83 struct buf *bp; 84 struct superblock sb; 85 int bi, m; 86 87 readsb(dev, &sb); 88 bp = bread(dev, BBLOCK(b, sb.ninodes)); 89 bi = b % BPB; 90 m = 1 << (bi % 8); 91 if((bp->data[bi/8] & m) == 0) 92 panic("freeing free block"); 93 bp->data[bi/8] &= ~m; // Mark block free on disk. 94 log_write(bp); 95 brelse(bp); 96 } 97 98 // Inodes. 99 // 100 // An inode is a single, unnamed file in the file system. 101 // The inode disk structure holds metadata (the type, device numbers, 102 // and data size) along with a list of blocks where the associated 103 // data can be found. 104 // 105 // The inodes are laid out sequentially on disk immediately after 106 // the superblock. The kernel keeps a cache of the in-use 107 // on-disk structures to provide a place for synchronizing access 108 // to inodes shared between multiple processes. 109 // 110 // ip->ref counts the number of pointer references to this cached 111 // inode; references are typically kept in struct file and in proc->cwd. 112 // When ip->ref falls to zero, the inode is no longer cached. 113 // It is an error to use an inode without holding a reference to it. 114 // 115 // Processes are only allowed to read and write inode 116 // metadata and contents when holding the inode's lock, 117 // represented by the I_BUSY flag in the in-memory copy. 118 // Because inode locks are held during disk accesses, 119 // they are implemented using a flag rather than with 120 // spin locks. Callers are responsible for locking 121 // inodes before passing them to routines in this file; leaving 122 // this responsibility with the caller makes it possible for them 123 // to create arbitrarily-sized atomic operations. 124 // 125 // To give maximum control over locking to the callers, 126 // the routines in this file that return inode pointers 127 // return pointers to *unlocked* inodes. It is the callers' 128 // responsibility to lock them before using them. A non-zero 129 // ip->ref keeps these unlocked inodes in the cache. 130 131 struct { 132 struct spinlock lock; 133 struct inode inode[NINODE]; 134 } icache; 135 136 void 137 iinit(void) 138 { 139 initlock(&icache.lock, "icache"); 140 } 141 142 static struct inode* iget(uint dev, uint inum); 143 144 //PAGEBREAK! 145 // Allocate a new inode with the given type on device dev. 146 struct inode* 147 ialloc(uint dev, short type) 148 { 149 int inum; 150 struct buf *bp; 151 struct dinode *dip; 152 struct superblock sb; 153 154 readsb(dev, &sb); 155 for(inum = 1; inum < sb.ninodes; inum++){ // loop over inode blocks 156 bp = bread(dev, IBLOCK(inum)); 157 dip = (struct dinode*)bp->data + inum%IPB; 158 if(dip->type == 0){ // a free inode 159 memset(dip, 0, sizeof(*dip)); 160 dip->type = type; 161 log_write(bp); // mark it allocated on the disk 162 brelse(bp); 163 return iget(dev, inum); 164 } 165 brelse(bp); 166 } 167 panic("ialloc: no inodes"); 168 } 169 170 // Copy inode, which has changed, from memory to disk. 171 void 172 iupdate(struct inode *ip) 173 { 174 struct buf *bp; 175 struct dinode *dip; 176 177 bp = bread(ip->dev, IBLOCK(ip->inum)); 178 dip = (struct dinode*)bp->data + ip->inum%IPB; 179 dip->type = ip->type; 180 dip->major = ip->major; 181 dip->minor = ip->minor; 182 dip->nlink = ip->nlink; 183 dip->size = ip->size; 184 memmove(dip->addrs, ip->addrs, sizeof(ip->addrs)); 185 log_write(bp); 186 brelse(bp); 187 } 188 189 // Find the inode with number inum on device dev 190 // and return the in-memory copy. 191 static struct inode* 192 iget(uint dev, uint inum) 193 { 194 struct inode *ip, *empty; 195 196 acquire(&icache.lock); 197 198 // Try for cached inode. 199 empty = 0; 200 for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){ 201 if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){ 202 ip->ref++; 203 release(&icache.lock); 204 return ip; 205 } 206 if(empty == 0 && ip->ref == 0) // Remember empty slot. 207 empty = ip; 208 } 209 210 // Allocate fresh inode. 211 if(empty == 0) 212 panic("iget: no inodes"); 213 214 ip = empty; 215 ip->dev = dev; 216 ip->inum = inum; 217 ip->ref = 1; 218 ip->flags = 0; 219 release(&icache.lock); 220 221 return ip; 222 } 223 224 // Increment reference count for ip. 225 // Returns ip to enable ip = idup(ip1) idiom. 226 struct inode* 227 idup(struct inode *ip) 228 { 229 acquire(&icache.lock); 230 ip->ref++; 231 release(&icache.lock); 232 return ip; 233 } 234 235 // Lock the given inode. 236 void 237 ilock(struct inode *ip) 238 { 239 struct buf *bp; 240 struct dinode *dip; 241 242 if(ip == 0 || ip->ref < 1) 243 panic("ilock"); 244 245 acquire(&icache.lock); 246 while(ip->flags & I_BUSY) 247 sleep(ip, &icache.lock); 248 ip->flags |= I_BUSY; 249 release(&icache.lock); 250 251 if(!(ip->flags & I_VALID)){ 252 bp = bread(ip->dev, IBLOCK(ip->inum)); 253 dip = (struct dinode*)bp->data + ip->inum%IPB; 254 ip->type = dip->type; 255 ip->major = dip->major; 256 ip->minor = dip->minor; 257 ip->nlink = dip->nlink; 258 ip->size = dip->size; 259 memmove(ip->addrs, dip->addrs, sizeof(ip->addrs)); 260 brelse(bp); 261 ip->flags |= I_VALID; 262 if(ip->type == 0) 263 panic("ilock: no type"); 264 } 265 } 266 267 // Unlock the given inode. 268 void 269 iunlock(struct inode *ip) 270 { 271 if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1) 272 panic("iunlock"); 273 274 acquire(&icache.lock); 275 ip->flags &= ~I_BUSY; 276 wakeup(ip); 277 release(&icache.lock); 278 } 279 280 // Caller holds reference to unlocked ip. Drop reference. 281 void 282 iput(struct inode *ip) 283 { 284 acquire(&icache.lock); 285 if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){ 286 // inode is no longer used: truncate and free inode. 287 if(ip->flags & I_BUSY) 288 panic("iput busy"); 289 ip->flags |= I_BUSY; 290 release(&icache.lock); 291 itrunc(ip); 292 ip->type = 0; 293 iupdate(ip); 294 acquire(&icache.lock); 295 ip->flags = 0; 296 wakeup(ip); 297 } 298 ip->ref--; 299 release(&icache.lock); 300 } 301 302 // Common idiom: unlock, then put. 303 void 304 iunlockput(struct inode *ip) 305 { 306 iunlock(ip); 307 iput(ip); 308 } 309 310 //PAGEBREAK! 311 // Inode contents 312 // 313 // The contents (data) associated with each inode is stored 314 // in a sequence of blocks on the disk. The first NDIRECT blocks 315 // are listed in ip->addrs[]. The next NINDIRECT blocks are 316 // listed in the block ip->addrs[NDIRECT]. 317 318 // Return the disk block address of the nth block in inode ip. 319 // If there is no such block, bmap allocates one. 320 static uint 321 bmap(struct inode *ip, uint bn) 322 { 323 uint addr, *a; 324 struct buf *bp; 325 326 if(bn < NDIRECT){ 327 if((addr = ip->addrs[bn]) == 0) 328 ip->addrs[bn] = addr = balloc(ip->dev); 329 return addr; 330 } 331 bn -= NDIRECT; 332 333 if(bn < NINDIRECT){ 334 // Load indirect block, allocating if necessary. 335 if((addr = ip->addrs[NDIRECT]) == 0) 336 ip->addrs[NDIRECT] = addr = balloc(ip->dev); 337 bp = bread(ip->dev, addr); 338 a = (uint*)bp->data; 339 if((addr = a[bn]) == 0){ 340 a[bn] = addr = balloc(ip->dev); 341 log_write(bp); 342 } 343 brelse(bp); 344 return addr; 345 } 346 347 panic("bmap: out of range"); 348 } 349 350 // Truncate inode (discard contents). 351 // Only called after the last dirent referring 352 // to this inode has been erased on disk. 353 static void 354 itrunc(struct inode *ip) 355 { 356 int i, j; 357 struct buf *bp; 358 uint *a; 359 360 for(i = 0; i < NDIRECT; i++){ 361 if(ip->addrs[i]){ 362 bfree(ip->dev, ip->addrs[i]); 363 ip->addrs[i] = 0; 364 } 365 } 366 367 if(ip->addrs[NDIRECT]){ 368 bp = bread(ip->dev, ip->addrs[NDIRECT]); 369 a = (uint*)bp->data; 370 for(j = 0; j < NINDIRECT; j++){ 371 if(a[j]) 372 bfree(ip->dev, a[j]); 373 } 374 brelse(bp); 375 bfree(ip->dev, ip->addrs[NDIRECT]); 376 ip->addrs[NDIRECT] = 0; 377 } 378 379 ip->size = 0; 380 iupdate(ip); 381 } 382 383 // Copy stat information from inode. 384 void 385 stati(struct inode *ip, struct stat *st) 386 { 387 st->dev = ip->dev; 388 st->ino = ip->inum; 389 st->type = ip->type; 390 st->nlink = ip->nlink; 391 st->size = ip->size; 392 } 393 394 //PAGEBREAK! 395 // Read data from inode. 396 int 397 readi(struct inode *ip, char *dst, uint off, uint n) 398 { 399 uint tot, m; 400 struct buf *bp; 401 402 if(ip->type == T_DEV){ 403 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read) 404 return -1; 405 return devsw[ip->major].read(ip, dst, n); 406 } 407 408 if(off > ip->size || off + n < off) 409 return -1; 410 if(off + n > ip->size) 411 n = ip->size - off; 412 413 for(tot=0; tot<n; tot+=m, off+=m, dst+=m){ 414 bp = bread(ip->dev, bmap(ip, off/BSIZE)); 415 m = min(n - tot, BSIZE - off%BSIZE); 416 memmove(dst, bp->data + off%BSIZE, m); 417 brelse(bp); 418 } 419 return n; 420 } 421 422 // PAGEBREAK! 423 // Write data to inode. 424 int 425 writei(struct inode *ip, char *src, uint off, uint n) 426 { 427 uint tot, m; 428 struct buf *bp; 429 430 if(ip->type == T_DEV){ 431 if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write) 432 return -1; 433 return devsw[ip->major].write(ip, src, n); 434 } 435 436 if(off > ip->size || off + n < off) 437 return -1; 438 if(off + n > MAXFILE*BSIZE) 439 return -1; 440 441 for(tot=0; tot<n; tot+=m, off+=m, src+=m){ 442 bp = bread(ip->dev, bmap(ip, off/BSIZE)); 443 m = min(n - tot, BSIZE - off%BSIZE); 444 memmove(bp->data + off%BSIZE, src, m); 445 log_write(bp); 446 brelse(bp); 447 } 448 449 if(n > 0 && off > ip->size){ 450 ip->size = off; 451 iupdate(ip); 452 } 453 return n; 454 } 455 456 //PAGEBREAK! 457 // Directories 458 459 int 460 namecmp(const char *s, const char *t) 461 { 462 return strncmp(s, t, DIRSIZ); 463 } 464 465 // Look for a directory entry in a directory. 466 // If found, set *poff to byte offset of entry. 467 // Caller must have already locked dp. 468 struct inode* 469 dirlookup(struct inode *dp, char *name, uint *poff) 470 { 471 uint off, inum; 472 struct dirent de; 473 474 if(dp->type != T_DIR) 475 panic("dirlookup not DIR"); 476 477 for(off = 0; off < dp->size; off += sizeof(de)){ 478 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 479 panic("dirlink read"); 480 if(de.inum == 0) 481 continue; 482 if(namecmp(name, de.name) == 0){ 483 // entry matches path element 484 if(poff) 485 *poff = off; 486 inum = de.inum; 487 return iget(dp->dev, inum); 488 } 489 } 490 491 return 0; 492 } 493 494 // Write a new directory entry (name, inum) into the directory dp. 495 int 496 dirlink(struct inode *dp, char *name, uint inum) 497 { 498 int off; 499 struct dirent de; 500 struct inode *ip; 501 502 // Check that name is not present. 503 if((ip = dirlookup(dp, name, 0)) != 0){ 504 iput(ip); 505 return -1; 506 } 507 508 // Look for an empty dirent. 509 for(off = 0; off < dp->size; off += sizeof(de)){ 510 if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 511 panic("dirlink read"); 512 if(de.inum == 0) 513 break; 514 } 515 516 strncpy(de.name, name, DIRSIZ); 517 de.inum = inum; 518 if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de)) 519 panic("dirlink"); 520 521 return 0; 522 } 523 524 //PAGEBREAK! 525 // Paths 526 527 // Copy the next path element from path into name. 528 // Return a pointer to the element following the copied one. 529 // The returned path has no leading slashes, 530 // so the caller can check *path=='\0' to see if the name is the last one. 531 // If no name to remove, return 0. 532 // 533 // Examples: 534 // skipelem("a/bb/c", name) = "bb/c", setting name = "a" 535 // skipelem("///a//bb", name) = "bb", setting name = "a" 536 // skipelem("a", name) = "", setting name = "a" 537 // skipelem("", name) = skipelem("////", name) = 0 538 // 539 static char* 540 skipelem(char *path, char *name) 541 { 542 char *s; 543 int len; 544 545 while(*path == '/') 546 path++; 547 if(*path == 0) 548 return 0; 549 s = path; 550 while(*path != '/' && *path != 0) 551 path++; 552 len = path - s; 553 if(len >= DIRSIZ) 554 memmove(name, s, DIRSIZ); 555 else { 556 memmove(name, s, len); 557 name[len] = 0; 558 } 559 while(*path == '/') 560 path++; 561 return path; 562 } 563 564 // Look up and return the inode for a path name. 565 // If parent != 0, return the inode for the parent and copy the final 566 // path element into name, which must have room for DIRSIZ bytes. 567 static struct inode* 568 namex(char *path, int nameiparent, char *name) 569 { 570 struct inode *ip, *next; 571 572 if(*path == '/') 573 ip = iget(ROOTDEV, ROOTINO); 574 else 575 ip = idup(proc->cwd); 576 577 while((path = skipelem(path, name)) != 0){ 578 ilock(ip); 579 if(ip->type != T_DIR){ 580 iunlockput(ip); 581 return 0; 582 } 583 if(nameiparent && *path == '\0'){ 584 // Stop one level early. 585 iunlock(ip); 586 return ip; 587 } 588 if((next = dirlookup(ip, name, 0)) == 0){ 589 iunlockput(ip); 590 return 0; 591 } 592 iunlockput(ip); 593 ip = next; 594 } 595 if(nameiparent){ 596 iput(ip); 597 return 0; 598 } 599 return ip; 600 } 601 602 struct inode* 603 namei(char *path) 604 { 605 char name[DIRSIZ]; 606 return namex(path, 0, name); 607 } 608 609 struct inode* 610 nameiparent(char *path, char *name) 611 { 612 return namex(path, 1, name); 613 } 614