xref: /xv6-public/fs.c (revision a5fbfe41)
1 // File system implementation.  Five layers:
2 //   + Blocks: allocator for raw disk blocks.
3 //   + Log: crash recovery for multi-step updates.
4 //   + Files: inode allocator, reading, writing, metadata.
5 //   + Directories: inode with special contents (list of other inodes!)
6 //   + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
7 //
8 // This file contains the low-level file system manipulation
9 // routines.  The (higher-level) system call implementations
10 // are in sysfile.c.
11 
12 #include "types.h"
13 #include "defs.h"
14 #include "param.h"
15 #include "stat.h"
16 #include "mmu.h"
17 #include "proc.h"
18 #include "spinlock.h"
19 #include "buf.h"
20 #include "fs.h"
21 #include "file.h"
22 
23 #define min(a, b) ((a) < (b) ? (a) : (b))
24 static void itrunc(struct inode*);
25 
26 // Read the super block.
27 void
28 readsb(int dev, struct superblock *sb)
29 {
30   struct buf *bp;
31 
32   bp = bread(dev, 1);
33   memmove(sb, bp->data, sizeof(*sb));
34   brelse(bp);
35 }
36 
37 // Zero a block.
38 static void
39 bzero(int dev, int bno)
40 {
41   struct buf *bp;
42 
43   bp = bread(dev, bno);
44   memset(bp->data, 0, BSIZE);
45   log_write(bp);
46   brelse(bp);
47 }
48 
49 // Blocks.
50 
51 // Allocate a zeroed disk block.
52 static uint
53 balloc(uint dev)
54 {
55   int b, bi, m;
56   struct buf *bp;
57   struct superblock sb;
58 
59   bp = 0;
60   readsb(dev, &sb);
61   for(b = 0; b < sb.size; b += BPB){
62     bp = bread(dev, BBLOCK(b, sb.ninodes));
63     for(bi = 0; bi < BPB && b + bi < sb.size; bi++){
64       m = 1 << (bi % 8);
65       if((bp->data[bi/8] & m) == 0){  // Is block free?
66         bp->data[bi/8] |= m;  // Mark block in use.
67         log_write(bp);
68         brelse(bp);
69         bzero(dev, b + bi);
70         return b + bi;
71       }
72     }
73     brelse(bp);
74   }
75   panic("balloc: out of blocks");
76 }
77 
78 // Free a disk block.
79 static void
80 bfree(int dev, uint b)
81 {
82   struct buf *bp;
83   struct superblock sb;
84   int bi, m;
85 
86   readsb(dev, &sb);
87   bp = bread(dev, BBLOCK(b, sb.ninodes));
88   bi = b % BPB;
89   m = 1 << (bi % 8);
90   if((bp->data[bi/8] & m) == 0)
91     panic("freeing free block");
92   bp->data[bi/8] &= ~m;
93   log_write(bp);
94   brelse(bp);
95 }
96 
97 // Inodes.
98 //
99 // An inode describes a single unnamed file.
100 // The inode disk structure holds metadata: the file's type,
101 // its size, the number of links referring to it, and the
102 // list of blocks holding the file's content.
103 //
104 // The inodes are laid out sequentially on disk immediately after
105 // the superblock. Each inode has a number, indicating its
106 // position on the disk.
107 //
108 // The kernel keeps a cache of in-use inodes in memory
109 // to provide a place for synchronizing access
110 // to inodes used by multiple processes. The cached
111 // inodes include book-keeping information that is
112 // not stored on disk: ip->ref and ip->flags.
113 //
114 // ip->ref counts the number of pointer references to this cached
115 // inode; references are typically kept in struct file and in proc->cwd.
116 // When ip->ref falls to zero, the inode is no longer cached.
117 // It is an error to use an inode without holding a reference to it.
118 //
119 // Processes are only allowed to read and write inode
120 // metadata and contents when holding the inode's lock,
121 // represented by the I_BUSY bit in ip->flags.
122 // Because inode locks are held during disk accesses,
123 // they are implemented using a flag rather than with
124 // spin locks.  ilock() and iunlock() manipulate an
125 // inode's I_BUSY flag. Many routines in this file expect
126 // the caller to have already locked the inode; leaving
127 // this responsibility with the caller makes it possible for them
128 // to create arbitrarily-sized atomic operations.
129 //
130 // To give maximum control over locking to the callers,
131 // the routines in this file that return inode pointers
132 // return pointers to *unlocked* inodes.  It is the callers'
133 // responsibility to lock them before using them.  A non-zero
134 // ip->ref keeps these unlocked inodes in the cache.
135 //
136 // In order for the file system code to look at an inode, the inode
137 // must pass through a number of states, with transitions
138 // driven by the indicated functions:
139 //
140 // * Allocated on disk, indicated by a non-zero type.
141 //   ialloc() and iput().
142 // * Referenced in the cache, indicated by ip->ref > 0.
143 //   iget() and iput().
144 // * Cached inode is valid, indicated by I_VALID.
145 //   ilock() and iput().
146 // * Locked, indicated by I_BUSY.
147 //   ilock() and iunlock().
148 
149 struct {
150   struct spinlock lock;
151   struct inode inode[NINODE];
152 } icache;
153 
154 void
155 iinit(void)
156 {
157   initlock(&icache.lock, "icache");
158 }
159 
160 static struct inode* iget(uint dev, uint inum);
161 
162 //PAGEBREAK!
163 // Allocate a new inode with the given type on device dev.
164 // A free inode has a type of zero.
165 struct inode*
166 ialloc(uint dev, short type)
167 {
168   int inum;
169   struct buf *bp;
170   struct dinode *dip;
171   struct superblock sb;
172 
173   readsb(dev, &sb);
174 
175   for(inum = 1; inum < sb.ninodes; inum++){
176     bp = bread(dev, IBLOCK(inum));
177     dip = (struct dinode*)bp->data + inum%IPB;
178     if(dip->type == 0){  // a free inode
179       memset(dip, 0, sizeof(*dip));
180       dip->type = type;
181       log_write(bp);   // mark it allocated on the disk
182       brelse(bp);
183       return iget(dev, inum);
184     }
185     brelse(bp);
186   }
187   panic("ialloc: no inodes");
188 }
189 
190 // Copy inode, which has changed, from memory to disk.
191 void
192 iupdate(struct inode *ip)
193 {
194   struct buf *bp;
195   struct dinode *dip;
196 
197   bp = bread(ip->dev, IBLOCK(ip->inum));
198   dip = (struct dinode*)bp->data + ip->inum%IPB;
199   dip->type = ip->type;
200   dip->major = ip->major;
201   dip->minor = ip->minor;
202   dip->nlink = ip->nlink;
203   dip->size = ip->size;
204   memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
205   log_write(bp);
206   brelse(bp);
207 }
208 
209 // Find the inode with number inum on device dev
210 // and return the in-memory copy.
211 static struct inode*
212 iget(uint dev, uint inum)
213 {
214   struct inode *ip, *empty;
215 
216   acquire(&icache.lock);
217 
218   // Try for cached inode.
219   empty = 0;
220   for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
221     if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
222       ip->ref++;
223       release(&icache.lock);
224       return ip;
225     }
226     if(empty == 0 && ip->ref == 0)    // Remember empty slot.
227       empty = ip;
228   }
229 
230   // Allocate fresh inode.
231   if(empty == 0)
232     panic("iget: no inodes");
233 
234   ip = empty;
235   ip->dev = dev;
236   ip->inum = inum;
237   ip->ref = 1;
238   ip->flags = 0;
239   release(&icache.lock);
240 
241   return ip;
242 }
243 
244 // Increment reference count for ip.
245 // Returns ip to enable ip = idup(ip1) idiom.
246 struct inode*
247 idup(struct inode *ip)
248 {
249   acquire(&icache.lock);
250   ip->ref++;
251   release(&icache.lock);
252   return ip;
253 }
254 
255 // Lock the given inode.
256 void
257 ilock(struct inode *ip)
258 {
259   struct buf *bp;
260   struct dinode *dip;
261 
262   if(ip == 0 || ip->ref < 1)
263     panic("ilock");
264 
265   acquire(&icache.lock);
266   while(ip->flags & I_BUSY)
267     sleep(ip, &icache.lock);
268   ip->flags |= I_BUSY;
269   release(&icache.lock);
270 
271   if(!(ip->flags & I_VALID)){
272     bp = bread(ip->dev, IBLOCK(ip->inum));
273     dip = (struct dinode*)bp->data + ip->inum%IPB;
274     ip->type = dip->type;
275     ip->major = dip->major;
276     ip->minor = dip->minor;
277     ip->nlink = dip->nlink;
278     ip->size = dip->size;
279     memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
280     brelse(bp);
281     ip->flags |= I_VALID;
282     if(ip->type == 0)
283       panic("ilock: no type");
284   }
285 }
286 
287 // Unlock the given inode.
288 void
289 iunlock(struct inode *ip)
290 {
291   if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
292     panic("iunlock");
293 
294   acquire(&icache.lock);
295   ip->flags &= ~I_BUSY;
296   wakeup(ip);
297   release(&icache.lock);
298 }
299 
300 // Caller holds reference to unlocked ip.  Drop reference.
301 void
302 iput(struct inode *ip)
303 {
304   acquire(&icache.lock);
305   if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
306     // inode is no longer used: truncate and free inode.
307     if(ip->flags & I_BUSY)
308       panic("iput busy");
309     ip->flags |= I_BUSY;
310     release(&icache.lock);
311     itrunc(ip);
312     ip->type = 0;
313     iupdate(ip);
314     acquire(&icache.lock);
315     ip->flags = 0;
316     wakeup(ip);
317   }
318   ip->ref--;
319   release(&icache.lock);
320 }
321 
322 // Common idiom: unlock, then put.
323 void
324 iunlockput(struct inode *ip)
325 {
326   iunlock(ip);
327   iput(ip);
328 }
329 
330 //PAGEBREAK!
331 // Inode contents
332 //
333 // The contents (data) associated with each inode is stored
334 // in a sequence of blocks on the disk.  The first NDIRECT blocks
335 // are listed in ip->addrs[].  The next NINDIRECT blocks are
336 // listed in the block ip->addrs[NDIRECT].
337 
338 // Return the disk block address of the nth block in inode ip.
339 // If there is no such block, bmap allocates one.
340 static uint
341 bmap(struct inode *ip, uint bn)
342 {
343   uint addr, *a;
344   struct buf *bp;
345 
346   if(bn < NDIRECT){
347     if((addr = ip->addrs[bn]) == 0)
348       ip->addrs[bn] = addr = balloc(ip->dev);
349     return addr;
350   }
351   bn -= NDIRECT;
352 
353   if(bn < NINDIRECT){
354     // Load indirect block, allocating if necessary.
355     if((addr = ip->addrs[NDIRECT]) == 0)
356       ip->addrs[NDIRECT] = addr = balloc(ip->dev);
357     bp = bread(ip->dev, addr);
358     a = (uint*)bp->data;
359     if((addr = a[bn]) == 0){
360       a[bn] = addr = balloc(ip->dev);
361       log_write(bp);
362     }
363     brelse(bp);
364     return addr;
365   }
366 
367   panic("bmap: out of range");
368 }
369 
370 // Truncate inode (discard contents).
371 // Only called after the last dirent referring
372 // to this inode has been erased on disk.
373 static void
374 itrunc(struct inode *ip)
375 {
376   int i, j;
377   struct buf *bp;
378   uint *a;
379 
380   for(i = 0; i < NDIRECT; i++){
381     if(ip->addrs[i]){
382       bfree(ip->dev, ip->addrs[i]);
383       ip->addrs[i] = 0;
384     }
385   }
386 
387   if(ip->addrs[NDIRECT]){
388     bp = bread(ip->dev, ip->addrs[NDIRECT]);
389     a = (uint*)bp->data;
390     for(j = 0; j < NINDIRECT; j++){
391       if(a[j])
392         bfree(ip->dev, a[j]);
393     }
394     brelse(bp);
395     bfree(ip->dev, ip->addrs[NDIRECT]);
396     ip->addrs[NDIRECT] = 0;
397   }
398 
399   ip->size = 0;
400   iupdate(ip);
401 }
402 
403 // Copy stat information from inode.
404 void
405 stati(struct inode *ip, struct stat *st)
406 {
407   st->dev = ip->dev;
408   st->ino = ip->inum;
409   st->type = ip->type;
410   st->nlink = ip->nlink;
411   st->size = ip->size;
412 }
413 
414 //PAGEBREAK!
415 // Read data from inode.
416 int
417 readi(struct inode *ip, char *dst, uint off, uint n)
418 {
419   uint tot, m;
420   struct buf *bp;
421 
422   if(ip->type == T_DEV){
423     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
424       return -1;
425     return devsw[ip->major].read(ip, dst, n);
426   }
427 
428   if(off > ip->size || off + n < off)
429     return -1;
430   if(off + n > ip->size)
431     n = ip->size - off;
432 
433   for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
434     bp = bread(ip->dev, bmap(ip, off/BSIZE));
435     m = min(n - tot, BSIZE - off%BSIZE);
436     memmove(dst, bp->data + off%BSIZE, m);
437     brelse(bp);
438   }
439   return n;
440 }
441 
442 // PAGEBREAK!
443 // Write data to inode.
444 int
445 writei(struct inode *ip, char *src, uint off, uint n)
446 {
447   uint tot, m;
448   struct buf *bp;
449 
450   if(ip->type == T_DEV){
451     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
452       return -1;
453     return devsw[ip->major].write(ip, src, n);
454   }
455 
456   if(off > ip->size || off + n < off)
457     return -1;
458   if(off + n > MAXFILE*BSIZE)
459     return -1;
460 
461   for(tot=0; tot<n; tot+=m, off+=m, src+=m){
462     bp = bread(ip->dev, bmap(ip, off/BSIZE));
463     m = min(n - tot, BSIZE - off%BSIZE);
464     memmove(bp->data + off%BSIZE, src, m);
465     log_write(bp);
466     brelse(bp);
467   }
468 
469   if(n > 0 && off > ip->size){
470     ip->size = off;
471     iupdate(ip);
472   }
473   return n;
474 }
475 
476 //PAGEBREAK!
477 // Directories
478 
479 int
480 namecmp(const char *s, const char *t)
481 {
482   return strncmp(s, t, DIRSIZ);
483 }
484 
485 // Look for a directory entry in a directory.
486 // If found, set *poff to byte offset of entry.
487 // Caller must have already locked dp.
488 struct inode*
489 dirlookup(struct inode *dp, char *name, uint *poff)
490 {
491   uint off, inum;
492   struct dirent de;
493 
494   if(dp->type != T_DIR)
495     panic("dirlookup not DIR");
496 
497   for(off = 0; off < dp->size; off += sizeof(de)){
498     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
499       panic("dirlink read");
500     if(de.inum == 0)
501       continue;
502     if(namecmp(name, de.name) == 0){
503       // entry matches path element
504       if(poff)
505         *poff = off;
506       inum = de.inum;
507       return iget(dp->dev, inum);
508     }
509   }
510 
511   return 0;
512 }
513 
514 // Write a new directory entry (name, inum) into the directory dp.
515 int
516 dirlink(struct inode *dp, char *name, uint inum)
517 {
518   int off;
519   struct dirent de;
520   struct inode *ip;
521 
522   // Check that name is not present.
523   if((ip = dirlookup(dp, name, 0)) != 0){
524     iput(ip);
525     return -1;
526   }
527 
528   // Look for an empty dirent.
529   for(off = 0; off < dp->size; off += sizeof(de)){
530     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
531       panic("dirlink read");
532     if(de.inum == 0)
533       break;
534   }
535 
536   strncpy(de.name, name, DIRSIZ);
537   de.inum = inum;
538   if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
539     panic("dirlink");
540 
541   return 0;
542 }
543 
544 //PAGEBREAK!
545 // Paths
546 
547 // Copy the next path element from path into name.
548 // Return a pointer to the element following the copied one.
549 // The returned path has no leading slashes,
550 // so the caller can check *path=='\0' to see if the name is the last one.
551 // If no name to remove, return 0.
552 //
553 // Examples:
554 //   skipelem("a/bb/c", name) = "bb/c", setting name = "a"
555 //   skipelem("///a//bb", name) = "bb", setting name = "a"
556 //   skipelem("a", name) = "", setting name = "a"
557 //   skipelem("", name) = skipelem("////", name) = 0
558 //
559 static char*
560 skipelem(char *path, char *name)
561 {
562   char *s;
563   int len;
564 
565   while(*path == '/')
566     path++;
567   if(*path == 0)
568     return 0;
569   s = path;
570   while(*path != '/' && *path != 0)
571     path++;
572   len = path - s;
573   if(len >= DIRSIZ)
574     memmove(name, s, DIRSIZ);
575   else {
576     memmove(name, s, len);
577     name[len] = 0;
578   }
579   while(*path == '/')
580     path++;
581   return path;
582 }
583 
584 // Look up and return the inode for a path name.
585 // If parent != 0, return the inode for the parent and copy the final
586 // path element into name, which must have room for DIRSIZ bytes.
587 static struct inode*
588 namex(char *path, int nameiparent, char *name)
589 {
590   struct inode *ip, *next;
591 
592   if(*path == '/')
593     ip = iget(ROOTDEV, ROOTINO);
594   else
595     ip = idup(proc->cwd);
596 
597   while((path = skipelem(path, name)) != 0){
598     ilock(ip);
599     if(ip->type != T_DIR){
600       iunlockput(ip);
601       return 0;
602     }
603     if(nameiparent && *path == '\0'){
604       // Stop one level early.
605       iunlock(ip);
606       return ip;
607     }
608     if((next = dirlookup(ip, name, 0)) == 0){
609       iunlockput(ip);
610       return 0;
611     }
612     iunlockput(ip);
613     ip = next;
614   }
615   if(nameiparent){
616     iput(ip);
617     return 0;
618   }
619   return ip;
620 }
621 
622 struct inode*
623 namei(char *path)
624 {
625   char name[DIRSIZ];
626   return namex(path, 0, name);
627 }
628 
629 struct inode*
630 nameiparent(char *path, char *name)
631 {
632   return namex(path, 1, name);
633 }
634