xref: /xv6-public/fs.c (revision 3341e30f)
1 // File system implementation.  Four layers:
2 //   + Blocks: allocator for raw disk blocks.
3 //   + Files: inode allocator, reading, writing, metadata.
4 //   + Directories: inode with special contents (list of other inodes!)
5 //   + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
6 //
7 // Disk layout is: superblock, inodes, block in-use bitmap, data blocks.
8 //
9 // This file contains the low-level file system manipulation
10 // routines.  The (higher-level) system call implementations
11 // are in sysfile.c.
12 
13 #include "types.h"
14 #include "defs.h"
15 #include "param.h"
16 #include "stat.h"
17 #include "mmu.h"
18 #include "proc.h"
19 #include "spinlock.h"
20 #include "buf.h"
21 #include "fs.h"
22 #include "fsvar.h"
23 #include "dev.h"
24 
25 #define min(a, b) ((a) < (b) ? (a) : (b))
26 static void itrunc(struct inode*);
27 
28 // Read the super block.
29 static void
30 readsb(int dev, struct superblock *sb)
31 {
32   struct buf *bp;
33 
34   bp = bread(dev, 1);
35   memmove(sb, bp->data, sizeof(*sb));
36   brelse(bp);
37 }
38 
39 // Zero a block.
40 static void
41 bzero(int dev, int bno)
42 {
43   struct buf *bp;
44 
45   bp = bread(dev, bno);
46   memset(bp->data, 0, BSIZE);
47   bwrite(bp);
48   brelse(bp);
49 }
50 
51 // Blocks.
52 
53 // Allocate a disk block.
54 static uint
55 balloc(uint dev)
56 {
57   int b, bi, m;
58   struct buf *bp;
59   struct superblock sb;
60 
61   bp = 0;
62   readsb(dev, &sb);
63   for(b = 0; b < sb.size; b += BPB){
64     bp = bread(dev, BBLOCK(b, sb.ninodes));
65     for(bi = 0; bi < BPB; bi++){
66       m = 1 << (bi % 8);
67       if((bp->data[bi/8] & m) == 0){  // Is block free?
68         bp->data[bi/8] |= m;  // Mark block in use on disk.
69         bwrite(bp);
70         brelse(bp);
71         return b + bi;
72       }
73     }
74     brelse(bp);
75   }
76   panic("balloc: out of blocks");
77 }
78 
79 // Free a disk block.
80 static void
81 bfree(int dev, uint b)
82 {
83   struct buf *bp;
84   struct superblock sb;
85   int bi, m;
86 
87   bzero(dev, b);
88 
89   readsb(dev, &sb);
90   bp = bread(dev, BBLOCK(b, sb.ninodes));
91   bi = b % BPB;
92   m = 1 << (bi % 8);
93   if((bp->data[bi/8] & m) == 0)
94     panic("freeing free block");
95   bp->data[bi/8] &= ~m;  // Mark block free on disk.
96   bwrite(bp);
97   brelse(bp);
98 }
99 
100 // Inodes.
101 //
102 // An inode is a single, unnamed file in the file system.
103 // The inode disk structure holds metadata (the type, device numbers,
104 // and data size) along with a list of blocks where the associated
105 // data can be found.
106 //
107 // The inodes are laid out sequentially on disk immediately after
108 // the superblock.  The kernel keeps a cache of the in-use
109 // on-disk structures to provide a place for synchronizing access
110 // to inodes shared between multiple processes.
111 //
112 // ip->ref counts the number of pointer references to this cached
113 // inode; references are typically kept in struct file and in cp->cwd.
114 // When ip->ref falls to zero, the inode is no longer cached.
115 // It is an error to use an inode without holding a reference to it.
116 //
117 // Processes are only allowed to read and write inode
118 // metadata and contents when holding the inode's lock,
119 // represented by the I_BUSY flag in the in-memory copy.
120 // Because inode locks are held during disk accesses,
121 // they are implemented using a flag rather than with
122 // spin locks.  Callers are responsible for locking
123 // inodes before passing them to routines in this file; leaving
124 // this responsibility with the caller makes it possible for them
125 // to create arbitrarily-sized atomic operations.
126 //
127 // To give maximum control over locking to the callers,
128 // the routines in this file that return inode pointers
129 // return pointers to *unlocked* inodes.  It is the callers'
130 // responsibility to lock them before using them.  A non-zero
131 // ip->ref keeps these unlocked inodes in the cache.
132 
133 struct {
134   struct spinlock lock;
135   struct inode inode[NINODE];
136 } icache;
137 
138 void
139 iinit(void)
140 {
141   initlock(&icache.lock, "icache.lock");
142 }
143 
144 // Find the inode with number inum on device dev
145 // and return the in-memory copy.
146 static struct inode*
147 iget(uint dev, uint inum)
148 {
149   struct inode *ip, *empty;
150 
151   acquire(&icache.lock);
152 
153   // Try for cached inode.
154   empty = 0;
155   for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
156     if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
157       ip->ref++;
158       release(&icache.lock);
159       return ip;
160     }
161     if(empty == 0 && ip->ref == 0)    // Remember empty slot.
162       empty = ip;
163   }
164 
165   // Allocate fresh inode.
166   if(empty == 0)
167     panic("iget: no inodes");
168 
169   ip = empty;
170   ip->dev = dev;
171   ip->inum = inum;
172   ip->ref = 1;
173   ip->flags = 0;
174   release(&icache.lock);
175 
176   return ip;
177 }
178 
179 // Increment reference count for ip.
180 // Returns ip to enable ip = idup(ip1) idiom.
181 struct inode*
182 idup(struct inode *ip)
183 {
184   acquire(&icache.lock);
185   ip->ref++;
186   release(&icache.lock);
187   return ip;
188 }
189 
190 // Lock the given inode.
191 void
192 ilock(struct inode *ip)
193 {
194   struct buf *bp;
195   struct dinode *dip;
196 
197   if(ip == 0 || ip->ref < 1)
198     panic("ilock");
199 
200   acquire(&icache.lock);
201   while(ip->flags & I_BUSY)
202     sleep(ip, &icache.lock);
203   ip->flags |= I_BUSY;
204   release(&icache.lock);
205 
206   if(!(ip->flags & I_VALID)){
207     bp = bread(ip->dev, IBLOCK(ip->inum));
208     dip = (struct dinode*)bp->data + ip->inum%IPB;
209     ip->type = dip->type;
210     ip->major = dip->major;
211     ip->minor = dip->minor;
212     ip->nlink = dip->nlink;
213     ip->size = dip->size;
214     memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
215     brelse(bp);
216     ip->flags |= I_VALID;
217     if(ip->type == 0)
218       panic("ilock: no type");
219   }
220 }
221 
222 // Unlock the given inode.
223 void
224 iunlock(struct inode *ip)
225 {
226   if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
227     panic("iunlock");
228 
229   acquire(&icache.lock);
230   ip->flags &= ~I_BUSY;
231   wakeup(ip);
232   release(&icache.lock);
233 }
234 
235 // Caller holds reference to unlocked ip.  Drop reference.
236 void
237 iput(struct inode *ip)
238 {
239   acquire(&icache.lock);
240   if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0) {
241     // inode is no longer used: truncate and free inode.
242     if(ip->flags & I_BUSY)
243       panic("iput busy");
244     ip->flags |= I_BUSY;
245     release(&icache.lock);
246     itrunc(ip);
247     ip->type = 0;
248     iupdate(ip);
249     acquire(&icache.lock);
250     ip->flags &= ~I_BUSY;
251     wakeup(ip);
252   }
253   ip->ref--;
254   release(&icache.lock);
255 }
256 
257 // Common idiom: unlock, then put.
258 void
259 iunlockput(struct inode *ip)
260 {
261   iunlock(ip);
262   iput(ip);
263 }
264 
265 //PAGEBREAK!
266 // Allocate a new inode with the given type on device dev.
267 struct inode*
268 ialloc(uint dev, short type)
269 {
270   int inum;
271   struct buf *bp;
272   struct dinode *dip;
273   struct superblock sb;
274 
275   readsb(dev, &sb);
276   for(inum = 1; inum < sb.ninodes; inum++) {  // loop over inode blocks
277     bp = bread(dev, IBLOCK(inum));
278     dip = (struct dinode*)bp->data + inum%IPB;
279     if(dip->type == 0) {  // a free inode
280       memset(dip, 0, sizeof(*dip));
281       dip->type = type;
282       bwrite(bp);   // mark it allocated on the disk
283       brelse(bp);
284       return iget(dev, inum);
285     }
286     brelse(bp);
287   }
288   panic("ialloc: no inodes");
289 }
290 
291 // Copy inode, which has changed, from memory to disk.
292 void
293 iupdate(struct inode *ip)
294 {
295   struct buf *bp;
296   struct dinode *dip;
297 
298   bp = bread(ip->dev, IBLOCK(ip->inum));
299   dip = (struct dinode*)bp->data + ip->inum%IPB;
300   dip->type = ip->type;
301   dip->major = ip->major;
302   dip->minor = ip->minor;
303   dip->nlink = ip->nlink;
304   dip->size = ip->size;
305   memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
306   bwrite(bp);
307   brelse(bp);
308 }
309 
310 //PAGEBREAK!
311 // Inode contents
312 //
313 // The contents (data) associated with each inode is stored
314 // in a sequence of blocks on the disk.  The first NDIRECT blocks
315 // are listed in ip->addrs[].  The next NINDIRECT blocks are
316 // listed in the block ip->addrs[INDIRECT].
317 
318 // Return the disk block address of the nth block in inode ip.
319 // If there is no such block, alloc controls whether one is allocated.
320 static uint
321 bmap(struct inode *ip, uint bn, int alloc)
322 {
323   uint addr, *a;
324   struct buf *bp;
325 
326   if(bn < NDIRECT) {
327     if((addr = ip->addrs[bn]) == 0) {
328       if(!alloc)
329         return -1;
330       ip->addrs[bn] = addr = balloc(ip->dev);
331     }
332     return addr;
333   }
334   bn -= NDIRECT;
335 
336   if(bn < NINDIRECT) {
337     // Load indirect block, allocating if necessary.
338     if((addr = ip->addrs[INDIRECT]) == 0) {
339       if(!alloc)
340         return -1;
341       ip->addrs[INDIRECT] = addr = balloc(ip->dev);
342     }
343     bp = bread(ip->dev, addr);
344     a = (uint*)bp->data;
345 
346     if((addr = a[bn]) == 0) {
347       if(!alloc) {
348         brelse(bp);
349         return -1;
350       }
351       a[bn] = addr = balloc(ip->dev);
352       bwrite(bp);
353     }
354     brelse(bp);
355     return addr;
356   }
357 
358   panic("bmap: out of range");
359 }
360 
361 // PAGEBREAK: 30
362 // Truncate inode (discard contents).
363 static void
364 itrunc(struct inode *ip)
365 {
366   int i, j;
367   struct buf *bp;
368   uint *a;
369 
370   for(i = 0; i < NDIRECT; i++) {
371     if(ip->addrs[i]) {
372       bfree(ip->dev, ip->addrs[i]);
373       ip->addrs[i] = 0;
374     }
375   }
376 
377   if(ip->addrs[INDIRECT]) {
378     bp = bread(ip->dev, ip->addrs[INDIRECT]);
379     a = (uint*)bp->data;
380     for(j = 0; j < NINDIRECT; j++) {
381       if(a[j])
382         bfree(ip->dev, a[j]);
383     }
384     brelse(bp);
385     ip->addrs[INDIRECT] = 0;
386   }
387 
388   ip->size = 0;
389   iupdate(ip);
390 }
391 
392 // Copy stat information from inode.
393 void
394 stati(struct inode *ip, struct stat *st)
395 {
396   st->dev = ip->dev;
397   st->ino = ip->inum;
398   st->type = ip->type;
399   st->nlink = ip->nlink;
400   st->size = ip->size;
401 }
402 
403 //PAGEBREAK!
404 // Read data from inode.
405 int
406 readi(struct inode *ip, char *dst, uint off, uint n)
407 {
408   uint tot, m;
409   struct buf *bp;
410 
411   if(ip->type == T_DEV) {
412     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
413       return -1;
414     return devsw[ip->major].read(ip->minor, dst, n);
415   }
416 
417   if(off > ip->size || off + n < off)
418     return -1;
419   if(off + n > ip->size)
420     n = ip->size - off;
421 
422   for(tot=0; tot<n; tot+=m, off+=m, dst+=m) {
423     bp = bread(ip->dev, bmap(ip, off/BSIZE, 0));
424     m = min(n - tot, BSIZE - off%BSIZE);
425     memmove(dst, bp->data + off%BSIZE, m);
426     brelse(bp);
427   }
428   return n;
429 }
430 
431 // PAGEBREAK!
432 // Write data to inode.
433 int
434 writei(struct inode *ip, char *src, uint off, uint n)
435 {
436   uint tot, m;
437   struct buf *bp;
438 
439   if(ip->type == T_DEV) {
440     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
441       return -1;
442     return devsw[ip->major].write(ip->minor, src, n);
443   }
444 
445   if(off + n < off)
446     return -1;
447   if(off + n > MAXFILE*BSIZE)
448     n = MAXFILE*BSIZE - off;
449 
450   for(tot=0; tot<n; tot+=m, off+=m, src+=m) {
451     bp = bread(ip->dev, bmap(ip, off/BSIZE, 1));
452     m = min(n - tot, BSIZE - off%BSIZE);
453     memmove(bp->data + off%BSIZE, src, m);
454     bwrite(bp);
455     brelse(bp);
456   }
457 
458   if(n > 0 && off > ip->size) {
459     ip->size = off;
460     iupdate(ip);
461   }
462   return n;
463 }
464 
465 //PAGEBREAK!
466 // Directories
467 
468 int
469 namecmp(const char *s, const char *t)
470 {
471   return strncmp(s, t, DIRSIZ);
472 }
473 
474 // Look for a directory entry in a directory.
475 // If found, set *poff to byte offset of entry.
476 // Caller must have already locked dp.
477 struct inode*
478 dirlookup(struct inode *dp, char *name, uint *poff)
479 {
480   uint off, inum;
481   struct buf *bp;
482   struct dirent *de;
483 
484   if(dp->type != T_DIR)
485     panic("dirlookup not DIR");
486 
487   for(off = 0; off < dp->size; off += BSIZE){
488     bp = bread(dp->dev, bmap(dp, off / BSIZE, 0));
489     for(de = (struct dirent*)bp->data;
490         de < (struct dirent*)(bp->data + BSIZE);
491         de++){
492       if(de->inum == 0)
493         continue;
494       if(namecmp(name, de->name) == 0){
495         // entry matches path element
496         if(poff)
497           *poff = off + (uchar*)de - bp->data;
498         inum = de->inum;
499         brelse(bp);
500         return iget(dp->dev, inum);
501       }
502     }
503     brelse(bp);
504   }
505   return 0;
506 }
507 
508 // Write a new directory entry (name, ino) into the directory dp.
509 int
510 dirlink(struct inode *dp, char *name, uint ino)
511 {
512   int off;
513   struct dirent de;
514   struct inode *ip;
515 
516   // Check that name is not present.
517   if((ip = dirlookup(dp, name, 0)) != 0){
518     iput(ip);
519     return -1;
520   }
521 
522   // Look for an empty dirent.
523   for(off = 0; off < dp->size; off += sizeof(de)){
524     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
525       panic("dirlink read");
526     if(de.inum == 0)
527       break;
528   }
529 
530   strncpy(de.name, name, DIRSIZ);
531   de.inum = ino;
532   if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
533     panic("dirlink");
534 
535   return 0;
536 }
537 
538 //PAGEBREAK!
539 // Paths
540 
541 // Copy the next path element from path into name.
542 // Return a pointer to the element following the copied one.
543 // The returned path has no leading slashes,
544 // so the caller can check *path=='\0' to see if the name is the last one.
545 // If no name to remove, return 0.
546 //
547 // Examples:
548 //   skipelem("a/bb/c", name) = "bb/c", setting name = "a"
549 //   skipelem("///a//bb", name) = "bb", setting name = "a"
550 //   skipelem("", name) = skipelem("////", name) = 0
551 //
552 static char*
553 skipelem(char *path, char *name)
554 {
555   char *s;
556   int len;
557 
558   while(*path == '/')
559     path++;
560   if(*path == 0)
561     return 0;
562   s = path;
563   while(*path != '/' && *path != 0)
564     path++;
565   len = path - s;
566   if(len >= DIRSIZ)
567     memmove(name, s, DIRSIZ);
568   else{
569     memmove(name, s, len);
570     name[len] = 0;
571   }
572   while(*path == '/')
573     path++;
574   return path;
575 }
576 
577 // Look up and return the inode for a path name.
578 // If parent is set, return the inode for the parent
579 // and write the final path element to name, which
580 // should have room for DIRSIZ bytes.
581 static struct inode*
582 _namei(char *path, int parent, char *name)
583 {
584   struct inode *ip, *next;
585 
586   if(*path == '/')
587     ip = iget(ROOTDEV, 1);
588   else
589     ip = idup(cp->cwd);
590 
591   while((path = skipelem(path, name)) != 0){
592     ilock(ip);
593     if(ip->type != T_DIR){
594       iunlockput(ip);
595       return 0;
596     }
597     if(parent && *path == '\0'){
598       // Stop one level early.
599       iunlock(ip);
600       return ip;
601     }
602     if((next = dirlookup(ip, name, 0)) == 0){
603       iunlockput(ip);
604       return 0;
605     }
606     iunlockput(ip);
607     ip = next;
608   }
609   if(parent){
610     iput(ip);
611     return 0;
612   }
613   return ip;
614 }
615 
616 struct inode*
617 namei(char *path)
618 {
619   char name[DIRSIZ];
620   return _namei(path, 0, name);
621 }
622 struct inode*
623 nameiparent(char *path, char *name)
624 {
625   return _namei(path, 1, name);
626 }
627