xref: /xv6-public/fs.c (revision 8eb20827)
1 // File system implementation.  Four layers:
2 //   + Blocks: allocator for raw disk blocks.
3 //   + Files: inode allocator, reading, writing, metadata.
4 //   + Directories: inode with special contents (list of other inodes!)
5 //   + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
6 //
7 // Disk layout is: superblock, inodes, block in-use bitmap, data blocks.
8 //
9 // This file contains the low-level file system manipulation
10 // routines.  The (higher-level) system call implementations
11 // are in sysfile.c.
12 
13 #include "types.h"
14 #include "defs.h"
15 #include "param.h"
16 #include "stat.h"
17 #include "mmu.h"
18 #include "proc.h"
19 #include "spinlock.h"
20 #include "buf.h"
21 #include "fs.h"
22 #include "fsvar.h"
23 #include "dev.h"
24 
25 #define min(a, b) ((a) < (b) ? (a) : (b))
26 static void itrunc(struct inode*);
27 
28 // Read the super block.
29 static void
30 readsb(int dev, struct superblock *sb)
31 {
32   struct buf *bp;
33 
34   bp = bread(dev, 1);
35   memmove(sb, bp->data, sizeof(*sb));
36   brelse(bp);
37 }
38 
39 // Zero a block.
40 static void
41 bzero(int dev, int bno)
42 {
43   struct buf *bp;
44 
45   bp = bread(dev, bno);
46   memset(bp->data, 0, BSIZE);
47   bwrite(bp);
48   brelse(bp);
49 }
50 
51 // Blocks.
52 
53 // Allocate a disk block.
54 static uint
55 balloc(uint dev)
56 {
57   int b, bi, m;
58   struct buf *bp;
59   struct superblock sb;
60 
61   bp = 0;
62   readsb(dev, &sb);
63   for(b = 0; b < sb.size; b += BPB){
64     bp = bread(dev, BBLOCK(b, sb.ninodes));
65     for(bi = 0; bi < BPB; bi++){
66       m = 1 << (bi % 8);
67       if((bp->data[bi/8] & m) == 0){  // Is block free?
68         bp->data[bi/8] |= m;  // Mark block in use on disk.
69         bwrite(bp);
70         brelse(bp);
71         return b + bi;
72       }
73     }
74     brelse(bp);
75   }
76   panic("balloc: out of blocks");
77 }
78 
79 // Free a disk block.
80 static void
81 bfree(int dev, uint b)
82 {
83   struct buf *bp;
84   struct superblock sb;
85   int bi, m;
86 
87   bzero(dev, b);
88 
89   readsb(dev, &sb);
90   bp = bread(dev, BBLOCK(b, sb.ninodes));
91   bi = b % BPB;
92   m = 1 << (bi % 8);
93   if((bp->data[bi/8] & m) == 0)
94     panic("freeing free block");
95   bp->data[bi/8] &= ~m;  // Mark block free on disk.
96   bwrite(bp);
97   brelse(bp);
98 }
99 
100 // Inodes.
101 //
102 // An inode is a single, unnamed file in the file system.
103 // The inode disk structure holds metadata (the type, device numbers,
104 // and data size) along with a list of blocks where the associated
105 // data can be found.
106 //
107 // The inodes are laid out sequentially on disk immediately after
108 // the superblock.  The kernel keeps a cache of the in-use
109 // on-disk structures to provide a place for synchronizing access
110 // to inodes shared between multiple processes.
111 //
112 // ip->ref counts the number of pointer references to this cached
113 // inode; references are typically kept in struct file and in cp->cwd.
114 // When ip->ref falls to zero, the inode is no longer cached.
115 // It is an error to use an inode without holding a reference to it.
116 //
117 // Processes are only allowed to read and write inode
118 // metadata and contents when holding the inode's lock,
119 // represented by the I_BUSY flag in the in-memory copy.
120 // Because inode locks are held during disk accesses,
121 // they are implemented using a flag rather than with
122 // spin locks.  Callers are responsible for locking
123 // inodes before passing them to routines in this file; leaving
124 // this responsibility with the caller makes it possible for them
125 // to create arbitrarily-sized atomic operations.
126 //
127 // To give maximum control over locking to the callers,
128 // the routines in this file that return inode pointers
129 // return pointers to *unlocked* inodes.  It is the callers'
130 // responsibility to lock them before using them.  A non-zero
131 // ip->ref keeps these unlocked inodes in the cache.
132 
133 struct {
134   struct spinlock lock;
135   struct inode inode[NINODE];
136 } icache;
137 
138 void
139 iinit(void)
140 {
141   initlock(&icache.lock, "icache.lock");
142 }
143 
144 // Find the inode with number inum on device dev
145 // and return the in-memory copy.
146 static struct inode*
147 iget(uint dev, uint inum)
148 {
149   struct inode *ip, *empty;
150 
151   acquire(&icache.lock);
152 
153   // Try for cached inode.
154   empty = 0;
155   for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
156     if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
157       ip->ref++;
158       release(&icache.lock);
159       return ip;
160     }
161     if(empty == 0 && ip->ref == 0)    // Remember empty slot.
162       empty = ip;
163   }
164 
165   // Allocate fresh inode.
166   if(empty == 0)
167     panic("iget: no inodes");
168 
169   ip = empty;
170   ip->dev = dev;
171   ip->inum = inum;
172   ip->ref = 1;
173   ip->flags = 0;
174   release(&icache.lock);
175 
176   return ip;
177 }
178 
179 // Increment reference count for ip.
180 // Returns ip to enable ip = idup(ip1) idiom.
181 struct inode*
182 idup(struct inode *ip)
183 {
184   acquire(&icache.lock);
185   ip->ref++;
186   release(&icache.lock);
187   return ip;
188 }
189 
190 // Lock the given inode.
191 void
192 ilock(struct inode *ip)
193 {
194   struct buf *bp;
195   struct dinode *dip;
196 
197   if(ip == 0 || ip->ref < 1)
198     panic("ilock");
199 
200   acquire(&icache.lock);
201   while(ip->flags & I_BUSY)
202     sleep(ip, &icache.lock);
203   ip->flags |= I_BUSY;
204   release(&icache.lock);
205 
206   if(!(ip->flags & I_VALID)){
207     bp = bread(ip->dev, IBLOCK(ip->inum));
208     dip = (struct dinode*)bp->data + ip->inum%IPB;
209     ip->type = dip->type;
210     ip->major = dip->major;
211     ip->minor = dip->minor;
212     ip->nlink = dip->nlink;
213     ip->size = dip->size;
214     memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
215     brelse(bp);
216     ip->flags |= I_VALID;
217     if(ip->type == 0)
218       panic("ilock: no type");
219   }
220 }
221 
222 // Unlock the given inode.
223 void
224 iunlock(struct inode *ip)
225 {
226   if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
227     panic("iunlock");
228 
229   acquire(&icache.lock);
230   ip->flags &= ~I_BUSY;
231   wakeup(ip);
232   release(&icache.lock);
233 }
234 
235 // Caller holds reference to unlocked ip.  Drop reference.
236 void
237 iput(struct inode *ip)
238 {
239   acquire(&icache.lock);
240   if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
241     // inode is no longer used: truncate and free inode.
242     if(ip->flags & I_BUSY)
243       panic("iput busy");
244     ip->flags |= I_BUSY;
245     release(&icache.lock);
246     itrunc(ip);
247     ip->type = 0;
248     iupdate(ip);
249     acquire(&icache.lock);
250     ip->flags &= ~I_BUSY;
251     wakeup(ip);
252   }
253   ip->ref--;
254   release(&icache.lock);
255 }
256 
257 // Common idiom: unlock, then put.
258 void
259 iunlockput(struct inode *ip)
260 {
261   iunlock(ip);
262   iput(ip);
263 }
264 
265 //PAGEBREAK!
266 // Allocate a new inode with the given type on device dev.
267 struct inode*
268 ialloc(uint dev, short type)
269 {
270   int inum;
271   struct buf *bp;
272   struct dinode *dip;
273   struct superblock sb;
274 
275   readsb(dev, &sb);
276   for(inum = 1; inum < sb.ninodes; inum++){  // loop over inode blocks
277     bp = bread(dev, IBLOCK(inum));
278     dip = (struct dinode*)bp->data + inum%IPB;
279     if(dip->type == 0){  // a free inode
280       memset(dip, 0, sizeof(*dip));
281       dip->type = type;
282       bwrite(bp);   // mark it allocated on the disk
283       brelse(bp);
284       return iget(dev, inum);
285     }
286     brelse(bp);
287   }
288   panic("ialloc: no inodes");
289 }
290 
291 // Copy inode, which has changed, from memory to disk.
292 void
293 iupdate(struct inode *ip)
294 {
295   struct buf *bp;
296   struct dinode *dip;
297 
298   bp = bread(ip->dev, IBLOCK(ip->inum));
299   dip = (struct dinode*)bp->data + ip->inum%IPB;
300   dip->type = ip->type;
301   dip->major = ip->major;
302   dip->minor = ip->minor;
303   dip->nlink = ip->nlink;
304   dip->size = ip->size;
305   memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
306   bwrite(bp);
307   brelse(bp);
308 }
309 
310 //PAGEBREAK!
311 // Inode contents
312 //
313 // The contents (data) associated with each inode is stored
314 // in a sequence of blocks on the disk.  The first NDIRECT blocks
315 // are listed in ip->addrs[].  The next NINDIRECT blocks are
316 // listed in the block ip->addrs[INDIRECT].
317 
318 // Return the disk block address of the nth block in inode ip.
319 // If there is no such block, alloc controls whether one is allocated.
320 static uint
321 bmap(struct inode *ip, uint bn, int alloc)
322 {
323   uint addr, *a;
324   struct buf *bp;
325 
326   if(bn < NDIRECT){
327     if((addr = ip->addrs[bn]) == 0){
328       if(!alloc)
329         return -1;
330       ip->addrs[bn] = addr = balloc(ip->dev);
331     }
332     return addr;
333   }
334   bn -= NDIRECT;
335 
336   if(bn < NINDIRECT){
337     // Load indirect block, allocating if necessary.
338     if((addr = ip->addrs[INDIRECT]) == 0){
339       if(!alloc)
340         return -1;
341       ip->addrs[INDIRECT] = addr = balloc(ip->dev);
342     }
343     bp = bread(ip->dev, addr);
344     a = (uint*)bp->data;
345 
346     if((addr = a[bn]) == 0){
347       if(!alloc){
348         brelse(bp);
349         return -1;
350       }
351       a[bn] = addr = balloc(ip->dev);
352       bwrite(bp);
353     }
354     brelse(bp);
355     return addr;
356   }
357 
358   panic("bmap: out of range");
359 }
360 
361 // Truncate inode (discard contents).
362 // Only called after the last dirent referring
363 // to this inode has been erased on disk.
364 static void
365 itrunc(struct inode *ip)
366 {
367   int i, j;
368   struct buf *bp;
369   uint *a;
370 
371   for(i = 0; i < NDIRECT; i++){
372     if(ip->addrs[i]){
373       bfree(ip->dev, ip->addrs[i]);
374       ip->addrs[i] = 0;
375     }
376   }
377 
378   if(ip->addrs[INDIRECT]){
379     bp = bread(ip->dev, ip->addrs[INDIRECT]);
380     a = (uint*)bp->data;
381     for(j = 0; j < NINDIRECT; j++){
382       if(a[j])
383         bfree(ip->dev, a[j]);
384     }
385     brelse(bp);
386     bfree(ip->dev, ip->addrs[INDIRECT]);
387     ip->addrs[INDIRECT] = 0;
388   }
389 
390   ip->size = 0;
391   iupdate(ip);
392 }
393 
394 // Copy stat information from inode.
395 void
396 stati(struct inode *ip, struct stat *st)
397 {
398   st->dev = ip->dev;
399   st->ino = ip->inum;
400   st->type = ip->type;
401   st->nlink = ip->nlink;
402   st->size = ip->size;
403 }
404 
405 //PAGEBREAK!
406 // Read data from inode.
407 int
408 readi(struct inode *ip, char *dst, uint off, uint n)
409 {
410   uint tot, m;
411   struct buf *bp;
412 
413   if(ip->type == T_DEV){
414     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
415       return -1;
416     return devsw[ip->major].read(ip, dst, n);
417   }
418 
419   if(off > ip->size || off + n < off)
420     return -1;
421   if(off + n > ip->size)
422     n = ip->size - off;
423 
424   for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
425     bp = bread(ip->dev, bmap(ip, off/BSIZE, 0));
426     m = min(n - tot, BSIZE - off%BSIZE);
427     memmove(dst, bp->data + off%BSIZE, m);
428     brelse(bp);
429   }
430   return n;
431 }
432 
433 // PAGEBREAK!
434 // Write data to inode.
435 int
436 writei(struct inode *ip, char *src, uint off, uint n)
437 {
438   uint tot, m;
439   struct buf *bp;
440 
441   if(ip->type == T_DEV){
442     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
443       return -1;
444     return devsw[ip->major].write(ip, src, n);
445   }
446 
447   if(off + n < off)
448     return -1;
449   if(off + n > MAXFILE*BSIZE)
450     n = MAXFILE*BSIZE - off;
451 
452   for(tot=0; tot<n; tot+=m, off+=m, src+=m){
453     bp = bread(ip->dev, bmap(ip, off/BSIZE, 1));
454     m = min(n - tot, BSIZE - off%BSIZE);
455     memmove(bp->data + off%BSIZE, src, m);
456     bwrite(bp);
457     brelse(bp);
458   }
459 
460   if(n > 0 && off > ip->size){
461     ip->size = off;
462     iupdate(ip);
463   }
464   return n;
465 }
466 
467 //PAGEBREAK!
468 // Directories
469 
470 int
471 namecmp(const char *s, const char *t)
472 {
473   return strncmp(s, t, DIRSIZ);
474 }
475 
476 // Look for a directory entry in a directory.
477 // If found, set *poff to byte offset of entry.
478 // Caller must have already locked dp.
479 struct inode*
480 dirlookup(struct inode *dp, char *name, uint *poff)
481 {
482   uint off, inum;
483   struct buf *bp;
484   struct dirent *de;
485 
486   if(dp->type != T_DIR)
487     panic("dirlookup not DIR");
488 
489   for(off = 0; off < dp->size; off += BSIZE){
490     bp = bread(dp->dev, bmap(dp, off / BSIZE, 0));
491     for(de = (struct dirent*)bp->data;
492         de < (struct dirent*)(bp->data + BSIZE);
493         de++){
494       if(de->inum == 0)
495         continue;
496       if(namecmp(name, de->name) == 0){
497         // entry matches path element
498         if(poff)
499           *poff = off + (uchar*)de - bp->data;
500         inum = de->inum;
501         brelse(bp);
502         return iget(dp->dev, inum);
503       }
504     }
505     brelse(bp);
506   }
507   return 0;
508 }
509 
510 // Write a new directory entry (name, ino) into the directory dp.
511 int
512 dirlink(struct inode *dp, char *name, uint ino)
513 {
514   int off;
515   struct dirent de;
516   struct inode *ip;
517 
518   // Check that name is not present.
519   if((ip = dirlookup(dp, name, 0)) != 0){
520     iput(ip);
521     return -1;
522   }
523 
524   // Look for an empty dirent.
525   for(off = 0; off < dp->size; off += sizeof(de)){
526     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
527       panic("dirlink read");
528     if(de.inum == 0)
529       break;
530   }
531 
532   strncpy(de.name, name, DIRSIZ);
533   de.inum = ino;
534   if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
535     panic("dirlink");
536 
537   return 0;
538 }
539 
540 //PAGEBREAK!
541 // Paths
542 
543 // Copy the next path element from path into name.
544 // Return a pointer to the element following the copied one.
545 // The returned path has no leading slashes,
546 // so the caller can check *path=='\0' to see if the name is the last one.
547 // If no name to remove, return 0.
548 //
549 // Examples:
550 //   skipelem("a/bb/c", name) = "bb/c", setting name = "a"
551 //   skipelem("///a//bb", name) = "bb", setting name = "a"
552 //   skipelem("", name) = skipelem("////", name) = 0
553 //
554 static char*
555 skipelem(char *path, char *name)
556 {
557   char *s;
558   int len;
559 
560   while(*path == '/')
561     path++;
562   if(*path == 0)
563     return 0;
564   s = path;
565   while(*path != '/' && *path != 0)
566     path++;
567   len = path - s;
568   if(len >= DIRSIZ)
569     memmove(name, s, DIRSIZ);
570   else {
571     memmove(name, s, len);
572     name[len] = 0;
573   }
574   while(*path == '/')
575     path++;
576   return path;
577 }
578 
579 // Look up and return the inode for a path name.
580 // If parent != 0, return the inode for the parent and copy the final
581 // path element into name, which must have room for DIRSIZ bytes.
582 static struct inode*
583 _namei(char *path, int parent, char *name)
584 {
585   struct inode *ip, *next;
586 
587   if(*path == '/')
588     ip = iget(ROOTDEV, ROOTINO);
589   else
590     ip = idup(cp->cwd);
591 
592   while((path = skipelem(path, name)) != 0){
593     ilock(ip);
594     if(ip->type != T_DIR){
595       iunlockput(ip);
596       return 0;
597     }
598     if(parent && *path == '\0'){
599       // Stop one level early.
600       iunlock(ip);
601       return ip;
602     }
603     if((next = dirlookup(ip, name, 0)) == 0){
604       iunlockput(ip);
605       return 0;
606     }
607     iunlockput(ip);
608     ip = next;
609   }
610   if(parent){
611     iput(ip);
612     return 0;
613   }
614   return ip;
615 }
616 
617 struct inode*
618 namei(char *path)
619 {
620   char name[DIRSIZ];
621   return _namei(path, 0, name);
622 }
623 
624 struct inode*
625 nameiparent(char *path, char *name)
626 {
627   return _namei(path, 1, name);
628 }
629