xref: /xv6-public/fs.c (revision 1ddfbbb1)
1 // File system implementation.  Four layers:
2 //   + Blocks: allocator for raw disk blocks.
3 //   + Files: inode allocator, reading, writing, metadata.
4 //   + Directories: inode with special contents (list of other inodes!)
5 //   + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
6 //
7 // Disk layout is: superblock, inodes, block in-use bitmap, data blocks.
8 //
9 // This file contains the low-level file system manipulation
10 // routines.  The (higher-level) system call implementations
11 // are in sysfile.c.
12 
13 #include "types.h"
14 #include "defs.h"
15 #include "param.h"
16 #include "stat.h"
17 #include "mmu.h"
18 #include "proc.h"
19 #include "spinlock.h"
20 #include "buf.h"
21 #include "fs.h"
22 #include "file.h"
23 
24 #define min(a, b) ((a) < (b) ? (a) : (b))
25 static void itrunc(struct inode*);
26 
27 // Read the super block.
28 void
29 readsb(int dev, struct superblock *sb)
30 {
31   struct buf *bp;
32 
33   bp = bread(dev, 1);
34   memmove(sb, bp->data, sizeof(*sb));
35   brelse(bp);
36 }
37 
38 // Zero a block.
39 static void
40 bzero(int dev, int bno)
41 {
42   struct buf *bp;
43 
44   bp = bread(dev, bno);
45   memset(bp->data, 0, BSIZE);
46   log_write(bp);
47   brelse(bp);
48 }
49 
50 // Blocks.
51 
52 // Allocate a zeroed disk block.
53 static uint
54 balloc(uint dev)
55 {
56   int b, bi, m;
57   struct buf *bp;
58   struct superblock sb;
59 
60   bp = 0;
61   readsb(dev, &sb);
62   for(b = 0; b < sb.size; b += BPB){
63     bp = bread(dev, BBLOCK(b, sb.ninodes));
64     for(bi = 0; bi < BPB && bi < (sb.size - b); bi++){
65       m = 1 << (bi % 8);
66       if((bp->data[bi/8] & m) == 0){  // Is block free?
67         bp->data[bi/8] |= m;  // Mark block in use on disk.
68         log_write(bp);
69         brelse(bp);
70         bzero(dev, b + bi);
71         return b + bi;
72       }
73     }
74     brelse(bp);
75   }
76   panic("balloc: out of blocks");
77 }
78 
79 // Free a disk block.
80 static void
81 bfree(int dev, uint b)
82 {
83   struct buf *bp;
84   struct superblock sb;
85   int bi, m;
86 
87   readsb(dev, &sb);
88   bp = bread(dev, BBLOCK(b, sb.ninodes));
89   bi = b % BPB;
90   m = 1 << (bi % 8);
91   if((bp->data[bi/8] & m) == 0)
92     panic("freeing free block");
93   bp->data[bi/8] &= ~m;  // Mark block free on disk.
94   log_write(bp);
95   brelse(bp);
96 }
97 
98 // Inodes.
99 //
100 // An inode is a single, unnamed file in the file system.
101 // The inode disk structure holds metadata (the type, device numbers,
102 // and data size) along with a list of blocks where the associated
103 // data can be found.
104 //
105 // The inodes are laid out sequentially on disk immediately after
106 // the superblock.  The kernel keeps a cache of the in-use
107 // on-disk structures to provide a place for synchronizing access
108 // to inodes shared between multiple processes.
109 //
110 // ip->ref counts the number of pointer references to this cached
111 // inode; references are typically kept in struct file and in proc->cwd.
112 // When ip->ref falls to zero, the inode is no longer cached.
113 // It is an error to use an inode without holding a reference to it.
114 //
115 // Processes are only allowed to read and write inode
116 // metadata and contents when holding the inode's lock,
117 // represented by the I_BUSY flag in the in-memory copy.
118 // Because inode locks are held during disk accesses,
119 // they are implemented using a flag rather than with
120 // spin locks.  Callers are responsible for locking
121 // inodes before passing them to routines in this file; leaving
122 // this responsibility with the caller makes it possible for them
123 // to create arbitrarily-sized atomic operations.
124 //
125 // To give maximum control over locking to the callers,
126 // the routines in this file that return inode pointers
127 // return pointers to *unlocked* inodes.  It is the callers'
128 // responsibility to lock them before using them.  A non-zero
129 // ip->ref keeps these unlocked inodes in the cache.
130 
131 struct {
132   struct spinlock lock;
133   struct inode inode[NINODE];
134 } icache;
135 
136 void
137 iinit(void)
138 {
139   initlock(&icache.lock, "icache");
140 }
141 
142 static struct inode* iget(uint dev, uint inum);
143 
144 //PAGEBREAK!
145 // Allocate a new inode with the given type on device dev.
146 struct inode*
147 ialloc(uint dev, short type)
148 {
149   int inum;
150   struct buf *bp;
151   struct dinode *dip;
152   struct superblock sb;
153 
154   readsb(dev, &sb);
155   for(inum = 1; inum < sb.ninodes; inum++){  // loop over inode blocks
156     bp = bread(dev, IBLOCK(inum));
157     dip = (struct dinode*)bp->data + inum%IPB;
158     if(dip->type == 0){  // a free inode
159       memset(dip, 0, sizeof(*dip));
160       dip->type = type;
161       log_write(bp);   // mark it allocated on the disk
162       brelse(bp);
163       return iget(dev, inum);
164     }
165     brelse(bp);
166   }
167   panic("ialloc: no inodes");
168 }
169 
170 // Copy inode, which has changed, from memory to disk.
171 void
172 iupdate(struct inode *ip)
173 {
174   struct buf *bp;
175   struct dinode *dip;
176 
177   bp = bread(ip->dev, IBLOCK(ip->inum));
178   dip = (struct dinode*)bp->data + ip->inum%IPB;
179   dip->type = ip->type;
180   dip->major = ip->major;
181   dip->minor = ip->minor;
182   dip->nlink = ip->nlink;
183   dip->size = ip->size;
184   memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
185   log_write(bp);
186   brelse(bp);
187 }
188 
189 // Find the inode with number inum on device dev
190 // and return the in-memory copy.
191 static struct inode*
192 iget(uint dev, uint inum)
193 {
194   struct inode *ip, *empty;
195 
196   acquire(&icache.lock);
197 
198   // Try for cached inode.
199   empty = 0;
200   for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
201     if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
202       ip->ref++;
203       release(&icache.lock);
204       return ip;
205     }
206     if(empty == 0 && ip->ref == 0)    // Remember empty slot.
207       empty = ip;
208   }
209 
210   // Allocate fresh inode.
211   if(empty == 0)
212     panic("iget: no inodes");
213 
214   ip = empty;
215   ip->dev = dev;
216   ip->inum = inum;
217   ip->ref = 1;
218   ip->flags = 0;
219   release(&icache.lock);
220 
221   return ip;
222 }
223 
224 // Increment reference count for ip.
225 // Returns ip to enable ip = idup(ip1) idiom.
226 struct inode*
227 idup(struct inode *ip)
228 {
229   acquire(&icache.lock);
230   ip->ref++;
231   release(&icache.lock);
232   return ip;
233 }
234 
235 // Lock the given inode.
236 void
237 ilock(struct inode *ip)
238 {
239   struct buf *bp;
240   struct dinode *dip;
241 
242   if(ip == 0 || ip->ref < 1)
243     panic("ilock");
244 
245   acquire(&icache.lock);
246   while(ip->flags & I_BUSY)
247     sleep(ip, &icache.lock);
248   ip->flags |= I_BUSY;
249   release(&icache.lock);
250 
251   if(!(ip->flags & I_VALID)){
252     bp = bread(ip->dev, IBLOCK(ip->inum));
253     dip = (struct dinode*)bp->data + ip->inum%IPB;
254     ip->type = dip->type;
255     ip->major = dip->major;
256     ip->minor = dip->minor;
257     ip->nlink = dip->nlink;
258     ip->size = dip->size;
259     memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
260     brelse(bp);
261     ip->flags |= I_VALID;
262     if(ip->type == 0)
263       panic("ilock: no type");
264   }
265 }
266 
267 // Unlock the given inode.
268 void
269 iunlock(struct inode *ip)
270 {
271   if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
272     panic("iunlock");
273 
274   acquire(&icache.lock);
275   ip->flags &= ~I_BUSY;
276   wakeup(ip);
277   release(&icache.lock);
278 }
279 
280 // Caller holds reference to unlocked ip.  Drop reference.
281 void
282 iput(struct inode *ip)
283 {
284   acquire(&icache.lock);
285   if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
286     // inode is no longer used: truncate and free inode.
287     if(ip->flags & I_BUSY)
288       panic("iput busy");
289     ip->flags |= I_BUSY;
290     release(&icache.lock);
291     itrunc(ip);
292     ip->type = 0;
293     iupdate(ip);
294     acquire(&icache.lock);
295     ip->flags = 0;
296     wakeup(ip);
297   }
298   ip->ref--;
299   release(&icache.lock);
300 }
301 
302 // Common idiom: unlock, then put.
303 void
304 iunlockput(struct inode *ip)
305 {
306   iunlock(ip);
307   iput(ip);
308 }
309 
310 //PAGEBREAK!
311 // Inode contents
312 //
313 // The contents (data) associated with each inode is stored
314 // in a sequence of blocks on the disk.  The first NDIRECT blocks
315 // are listed in ip->addrs[].  The next NINDIRECT blocks are
316 // listed in the block ip->addrs[NDIRECT].
317 
318 // Return the disk block address of the nth block in inode ip.
319 // If there is no such block, bmap allocates one.
320 static uint
321 bmap(struct inode *ip, uint bn)
322 {
323   uint addr, *a;
324   struct buf *bp;
325 
326   if(bn < NDIRECT){
327     if((addr = ip->addrs[bn]) == 0)
328       ip->addrs[bn] = addr = balloc(ip->dev);
329     return addr;
330   }
331   bn -= NDIRECT;
332 
333   if(bn < NINDIRECT){
334     // Load indirect block, allocating if necessary.
335     if((addr = ip->addrs[NDIRECT]) == 0)
336       ip->addrs[NDIRECT] = addr = balloc(ip->dev);
337     bp = bread(ip->dev, addr);
338     a = (uint*)bp->data;
339     if((addr = a[bn]) == 0){
340       a[bn] = addr = balloc(ip->dev);
341       log_write(bp);
342     }
343     brelse(bp);
344     return addr;
345   }
346 
347   panic("bmap: out of range");
348 }
349 
350 // Truncate inode (discard contents).
351 // Only called after the last dirent referring
352 // to this inode has been erased on disk.
353 static void
354 itrunc(struct inode *ip)
355 {
356   int i, j;
357   struct buf *bp;
358   uint *a;
359 
360   for(i = 0; i < NDIRECT; i++){
361     if(ip->addrs[i]){
362       bfree(ip->dev, ip->addrs[i]);
363       ip->addrs[i] = 0;
364     }
365   }
366 
367   if(ip->addrs[NDIRECT]){
368     bp = bread(ip->dev, ip->addrs[NDIRECT]);
369     a = (uint*)bp->data;
370     for(j = 0; j < NINDIRECT; j++){
371       if(a[j])
372         bfree(ip->dev, a[j]);
373     }
374     brelse(bp);
375     bfree(ip->dev, ip->addrs[NDIRECT]);
376     ip->addrs[NDIRECT] = 0;
377   }
378 
379   ip->size = 0;
380   iupdate(ip);
381 }
382 
383 // Copy stat information from inode.
384 void
385 stati(struct inode *ip, struct stat *st)
386 {
387   st->dev = ip->dev;
388   st->ino = ip->inum;
389   st->type = ip->type;
390   st->nlink = ip->nlink;
391   st->size = ip->size;
392 }
393 
394 //PAGEBREAK!
395 // Read data from inode.
396 int
397 readi(struct inode *ip, char *dst, uint off, uint n)
398 {
399   uint tot, m;
400   struct buf *bp;
401 
402   if(ip->type == T_DEV){
403     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
404       return -1;
405     return devsw[ip->major].read(ip, dst, n);
406   }
407 
408   if(off > ip->size || off + n < off)
409     return -1;
410   if(off + n > ip->size)
411     n = ip->size - off;
412 
413   for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
414     bp = bread(ip->dev, bmap(ip, off/BSIZE));
415     m = min(n - tot, BSIZE - off%BSIZE);
416     memmove(dst, bp->data + off%BSIZE, m);
417     brelse(bp);
418   }
419   return n;
420 }
421 
422 // PAGEBREAK!
423 // Write data to inode.
424 int
425 writei(struct inode *ip, char *src, uint off, uint n)
426 {
427   uint tot, m;
428   struct buf *bp;
429 
430   if(ip->type == T_DEV){
431     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
432       return -1;
433     return devsw[ip->major].write(ip, src, n);
434   }
435 
436   if(off > ip->size || off + n < off)
437     return -1;
438   if(off + n > MAXFILE*BSIZE)
439     return -1;
440 
441   for(tot=0; tot<n; tot+=m, off+=m, src+=m){
442     bp = bread(ip->dev, bmap(ip, off/BSIZE));
443     m = min(n - tot, BSIZE - off%BSIZE);
444     memmove(bp->data + off%BSIZE, src, m);
445     log_write(bp);
446     brelse(bp);
447   }
448 
449   if(n > 0 && off > ip->size){
450     ip->size = off;
451     iupdate(ip);
452   }
453   return n;
454 }
455 
456 //PAGEBREAK!
457 // Directories
458 
459 int
460 namecmp(const char *s, const char *t)
461 {
462   return strncmp(s, t, DIRSIZ);
463 }
464 
465 // Look for a directory entry in a directory.
466 // If found, set *poff to byte offset of entry.
467 // Caller must have already locked dp.
468 struct inode*
469 dirlookup(struct inode *dp, char *name, uint *poff)
470 {
471   uint off, inum;
472   struct dirent de;
473 
474   if(dp->type != T_DIR)
475     panic("dirlookup not DIR");
476 
477   for(off = 0; off < dp->size; off += sizeof(de)){
478     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
479       panic("dirlink read");
480     if(de.inum == 0)
481       continue;
482     if(namecmp(name, de.name) == 0){
483       // entry matches path element
484       if(poff)
485         *poff = off;
486       inum = de.inum;
487       return iget(dp->dev, inum);
488     }
489   }
490 
491   return 0;
492 }
493 
494 // Write a new directory entry (name, inum) into the directory dp.
495 int
496 dirlink(struct inode *dp, char *name, uint inum)
497 {
498   int off;
499   struct dirent de;
500   struct inode *ip;
501 
502   // Check that name is not present.
503   if((ip = dirlookup(dp, name, 0)) != 0){
504     iput(ip);
505     return -1;
506   }
507 
508   // Look for an empty dirent.
509   for(off = 0; off < dp->size; off += sizeof(de)){
510     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
511       panic("dirlink read");
512     if(de.inum == 0)
513       break;
514   }
515 
516   strncpy(de.name, name, DIRSIZ);
517   de.inum = inum;
518   if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
519     panic("dirlink");
520 
521   return 0;
522 }
523 
524 //PAGEBREAK!
525 // Paths
526 
527 // Copy the next path element from path into name.
528 // Return a pointer to the element following the copied one.
529 // The returned path has no leading slashes,
530 // so the caller can check *path=='\0' to see if the name is the last one.
531 // If no name to remove, return 0.
532 //
533 // Examples:
534 //   skipelem("a/bb/c", name) = "bb/c", setting name = "a"
535 //   skipelem("///a//bb", name) = "bb", setting name = "a"
536 //   skipelem("a", name) = "", setting name = "a"
537 //   skipelem("", name) = skipelem("////", name) = 0
538 //
539 static char*
540 skipelem(char *path, char *name)
541 {
542   char *s;
543   int len;
544 
545   while(*path == '/')
546     path++;
547   if(*path == 0)
548     return 0;
549   s = path;
550   while(*path != '/' && *path != 0)
551     path++;
552   len = path - s;
553   if(len >= DIRSIZ)
554     memmove(name, s, DIRSIZ);
555   else {
556     memmove(name, s, len);
557     name[len] = 0;
558   }
559   while(*path == '/')
560     path++;
561   return path;
562 }
563 
564 // Look up and return the inode for a path name.
565 // If parent != 0, return the inode for the parent and copy the final
566 // path element into name, which must have room for DIRSIZ bytes.
567 static struct inode*
568 namex(char *path, int nameiparent, char *name)
569 {
570   struct inode *ip, *next;
571 
572   if(*path == '/')
573     ip = iget(ROOTDEV, ROOTINO);
574   else
575     ip = idup(proc->cwd);
576 
577   while((path = skipelem(path, name)) != 0){
578     ilock(ip);
579     if(ip->type != T_DIR){
580       iunlockput(ip);
581       return 0;
582     }
583     if(nameiparent && *path == '\0'){
584       // Stop one level early.
585       iunlock(ip);
586       return ip;
587     }
588     if((next = dirlookup(ip, name, 0)) == 0){
589       iunlockput(ip);
590       return 0;
591     }
592     iunlockput(ip);
593     ip = next;
594   }
595   if(nameiparent){
596     iput(ip);
597     return 0;
598   }
599   return ip;
600 }
601 
602 struct inode*
603 namei(char *path)
604 {
605   char name[DIRSIZ];
606   return namex(path, 0, name);
607 }
608 
609 struct inode*
610 nameiparent(char *path, char *name)
611 {
612   return namex(path, 1, name);
613 }
614