xref: /xv6-public/fs.c (revision bf2932a6)
1 // File system implementation.  Four layers:
2 //   + Blocks: allocator for raw disk blocks.
3 //   + Files: inode allocator, reading, writing, metadata.
4 //   + Directories: inode with special contents (list of other inodes!)
5 //   + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
6 //
7 // Disk layout is: superblock, inodes, block in-use bitmap, data blocks.
8 //
9 // This file contains the low-level file system manipulation
10 // routines.  The (higher-level) system call implementations
11 // are in sysfile.c.
12 
13 #include "types.h"
14 #include "defs.h"
15 #include "param.h"
16 #include "stat.h"
17 #include "mmu.h"
18 #include "proc.h"
19 #include "spinlock.h"
20 #include "buf.h"
21 #include "fs.h"
22 #include "fsvar.h"
23 #include "dev.h"
24 
25 #define min(a, b) ((a) < (b) ? (a) : (b))
26 static void itrunc(struct inode*);
27 
28 // Read the super block.
29 static void
30 readsb(int dev, struct superblock *sb)
31 {
32   struct buf *bp;
33 
34   bp = bread(dev, 1);
35   memmove(sb, bp->data, sizeof(*sb));
36   brelse(bp);
37 }
38 
39 // Zero a block.
40 static void
41 bzero(int dev, int bno)
42 {
43   struct buf *bp;
44 
45   bp = bread(dev, bno);
46   memset(bp->data, 0, BSIZE);
47   bwrite(bp);
48   brelse(bp);
49 }
50 
51 // Blocks.
52 
53 // Allocate a disk block.
54 static uint
55 balloc(uint dev)
56 {
57   int b, bi, m;
58   struct buf *bp;
59   struct superblock sb;
60 
61   bp = 0;
62   readsb(dev, &sb);
63   for(b = 0; b < sb.size; b += BPB){
64     bp = bread(dev, BBLOCK(b, sb.ninodes));
65     for(bi = 0; bi < BPB; bi++){
66       m = 1 << (bi % 8);
67       if((bp->data[bi/8] & m) == 0){  // Is block free?
68         bp->data[bi/8] |= m;  // Mark block in use on disk.
69         bwrite(bp);
70         brelse(bp);
71         return b + bi;
72       }
73     }
74     brelse(bp);
75   }
76   panic("balloc: out of blocks");
77 }
78 
79 // Free a disk block.
80 static void
81 bfree(int dev, uint b)
82 {
83   struct buf *bp;
84   struct superblock sb;
85   int bi, m;
86 
87   bzero(dev, b);
88 
89   readsb(dev, &sb);
90   bp = bread(dev, BBLOCK(b, sb.ninodes));
91   bi = b % BPB;
92   m = 1 << (bi % 8);
93   if((bp->data[bi/8] & m) == 0)
94     panic("freeing free block");
95   bp->data[bi/8] &= ~m;  // Mark block free on disk.
96   bwrite(bp);
97   brelse(bp);
98 }
99 
100 // Inodes.
101 //
102 // An inode is a single, unnamed file in the file system.
103 // The inode disk structure holds metadata (the type, device numbers,
104 // and data size) along with a list of blocks where the associated
105 // data can be found.
106 //
107 // The inodes are laid out sequentially on disk immediately after
108 // the superblock.  The kernel keeps a cache of the in-use
109 // on-disk structures to provide a place for synchronizing access
110 // to inodes shared between multiple processes.
111 //
112 // ip->ref counts the number of pointer references to this cached
113 // inode; references are typically kept in struct file and in cp->cwd.
114 // When ip->ref falls to zero, the inode is no longer cached.
115 // It is an error to use an inode without holding a reference to it.
116 //
117 // Processes are only allowed to read and write inode
118 // metadata and contents when holding the inode's lock,
119 // represented by the I_BUSY flag in the in-memory copy.
120 // Because inode locks are held during disk accesses,
121 // they are implemented using a flag rather than with
122 // spin locks.  Callers are responsible for locking
123 // inodes before passing them to routines in this file; leaving
124 // this responsibility with the caller makes it possible for them
125 // to create arbitrarily-sized atomic operations.
126 //
127 // To give maximum control over locking to the callers,
128 // the routines in this file that return inode pointers
129 // return pointers to *unlocked* inodes.  It is the callers'
130 // responsibility to lock them before using them.  A non-zero
131 // ip->ref keeps these unlocked inodes in the cache.
132 
133 struct {
134   struct spinlock lock;
135   struct inode inode[NINODE];
136 } icache;
137 
138 void
139 iinit(void)
140 {
141   initlock(&icache.lock, "icache.lock");
142 }
143 
144 // Find the inode with number inum on device dev
145 // and return the in-memory copy.
146 static struct inode*
147 iget(uint dev, uint inum)
148 {
149   struct inode *ip, *empty;
150 
151   acquire(&icache.lock);
152 
153   // Try for cached inode.
154   empty = 0;
155   for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
156     if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
157       ip->ref++;
158       release(&icache.lock);
159       return ip;
160     }
161     if(empty == 0 && ip->ref == 0)    // Remember empty slot.
162       empty = ip;
163   }
164 
165   // Allocate fresh inode.
166   if(empty == 0)
167     panic("iget: no inodes");
168 
169   ip = empty;
170   ip->dev = dev;
171   ip->inum = inum;
172   ip->ref = 1;
173   ip->flags = 0;
174   release(&icache.lock);
175 
176   return ip;
177 }
178 
179 // Increment reference count for ip.
180 // Returns ip to enable ip = idup(ip1) idiom.
181 struct inode*
182 idup(struct inode *ip)
183 {
184   acquire(&icache.lock);
185   ip->ref++;
186   release(&icache.lock);
187   return ip;
188 }
189 
190 // Lock the given inode.
191 void
192 ilock(struct inode *ip)
193 {
194   struct buf *bp;
195   struct dinode *dip;
196 
197   if(ip == 0 || ip->ref < 1)
198     panic("ilock");
199 
200   acquire(&icache.lock);
201   while(ip->flags & I_BUSY)
202     sleep(ip, &icache.lock);
203   ip->flags |= I_BUSY;
204   release(&icache.lock);
205 
206   if(!(ip->flags & I_VALID)){
207     bp = bread(ip->dev, IBLOCK(ip->inum));
208     dip = (struct dinode*)bp->data + ip->inum%IPB;
209     ip->type = dip->type;
210     ip->major = dip->major;
211     ip->minor = dip->minor;
212     ip->nlink = dip->nlink;
213     ip->size = dip->size;
214     memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
215     brelse(bp);
216     ip->flags |= I_VALID;
217     if(ip->type == 0)
218       panic("ilock: no type");
219   }
220 }
221 
222 // Unlock the given inode.
223 void
224 iunlock(struct inode *ip)
225 {
226   if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
227     panic("iunlock");
228 
229   acquire(&icache.lock);
230   ip->flags &= ~I_BUSY;
231   wakeup(ip);
232   release(&icache.lock);
233 }
234 
235 // Caller holds reference to unlocked ip.  Drop reference.
236 void
237 iput(struct inode *ip)
238 {
239   acquire(&icache.lock);
240   if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
241     // inode is no longer used: truncate and free inode.
242     if(ip->flags & I_BUSY)
243       panic("iput busy");
244     ip->flags |= I_BUSY;
245     release(&icache.lock);
246     itrunc(ip);
247     ip->type = 0;
248     iupdate(ip);
249     acquire(&icache.lock);
250     ip->flags &= ~I_BUSY;
251     wakeup(ip);
252   }
253   ip->ref--;
254   release(&icache.lock);
255 }
256 
257 // Common idiom: unlock, then put.
258 void
259 iunlockput(struct inode *ip)
260 {
261   iunlock(ip);
262   iput(ip);
263 }
264 
265 //PAGEBREAK!
266 // Allocate a new inode with the given type on device dev.
267 struct inode*
268 ialloc(uint dev, short type)
269 {
270   int inum;
271   struct buf *bp;
272   struct dinode *dip;
273   struct superblock sb;
274 
275   readsb(dev, &sb);
276   for(inum = 1; inum < sb.ninodes; inum++){  // loop over inode blocks
277     bp = bread(dev, IBLOCK(inum));
278     dip = (struct dinode*)bp->data + inum%IPB;
279     if(dip->type == 0){  // a free inode
280       memset(dip, 0, sizeof(*dip));
281       dip->type = type;
282       bwrite(bp);   // mark it allocated on the disk
283       brelse(bp);
284       return iget(dev, inum);
285     }
286     brelse(bp);
287   }
288   panic("ialloc: no inodes");
289 }
290 
291 // Copy inode, which has changed, from memory to disk.
292 void
293 iupdate(struct inode *ip)
294 {
295   struct buf *bp;
296   struct dinode *dip;
297 
298   bp = bread(ip->dev, IBLOCK(ip->inum));
299   dip = (struct dinode*)bp->data + ip->inum%IPB;
300   dip->type = ip->type;
301   dip->major = ip->major;
302   dip->minor = ip->minor;
303   dip->nlink = ip->nlink;
304   dip->size = ip->size;
305   memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
306   bwrite(bp);
307   brelse(bp);
308 }
309 
310 //PAGEBREAK!
311 // Inode contents
312 //
313 // The contents (data) associated with each inode is stored
314 // in a sequence of blocks on the disk.  The first NDIRECT blocks
315 // are listed in ip->addrs[].  The next NINDIRECT blocks are
316 // listed in the block ip->addrs[INDIRECT].
317 
318 // Return the disk block address of the nth block in inode ip.
319 // If there is no such block, alloc controls whether one is allocated.
320 static uint
321 bmap(struct inode *ip, uint bn, int alloc)
322 {
323   uint addr, *a;
324   struct buf *bp;
325 
326   if(bn < NDIRECT){
327     if((addr = ip->addrs[bn]) == 0){
328       if(!alloc)
329         return -1;
330       ip->addrs[bn] = addr = balloc(ip->dev);
331     }
332     return addr;
333   }
334   bn -= NDIRECT;
335 
336   if(bn < NINDIRECT){
337     // Load indirect block, allocating if necessary.
338     if((addr = ip->addrs[INDIRECT]) == 0){
339       if(!alloc)
340         return -1;
341       ip->addrs[INDIRECT] = addr = balloc(ip->dev);
342     }
343     bp = bread(ip->dev, addr);
344     a = (uint*)bp->data;
345 
346     if((addr = a[bn]) == 0){
347       if(!alloc){
348         brelse(bp);
349         return -1;
350       }
351       a[bn] = addr = balloc(ip->dev);
352       bwrite(bp);
353     }
354     brelse(bp);
355     return addr;
356   }
357 
358   panic("bmap: out of range");
359 }
360 
361 // Truncate inode (discard contents).
362 static void
363 itrunc(struct inode *ip)
364 {
365   int i, j;
366   struct buf *bp;
367   uint *a;
368 
369   for(i = 0; i < NDIRECT; i++){
370     if(ip->addrs[i]){
371       bfree(ip->dev, ip->addrs[i]);
372       ip->addrs[i] = 0;
373     }
374   }
375 
376   if(ip->addrs[INDIRECT]){
377     bp = bread(ip->dev, ip->addrs[INDIRECT]);
378     a = (uint*)bp->data;
379     for(j = 0; j < NINDIRECT; j++){
380       if(a[j])
381         bfree(ip->dev, a[j]);
382     }
383     brelse(bp);
384     ip->addrs[INDIRECT] = 0;
385   }
386 
387   ip->size = 0;
388   iupdate(ip);
389 }
390 
391 // Copy stat information from inode.
392 void
393 stati(struct inode *ip, struct stat *st)
394 {
395   st->dev = ip->dev;
396   st->ino = ip->inum;
397   st->type = ip->type;
398   st->nlink = ip->nlink;
399   st->size = ip->size;
400 }
401 
402 //PAGEBREAK!
403 // Read data from inode.
404 int
405 readi(struct inode *ip, char *dst, uint off, uint n)
406 {
407   uint tot, m;
408   struct buf *bp;
409 
410   if(ip->type == T_DEV){
411     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
412       return -1;
413     return devsw[ip->major].read(ip, dst, n);
414   }
415 
416   if(off > ip->size || off + n < off)
417     return -1;
418   if(off + n > ip->size)
419     n = ip->size - off;
420 
421   for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
422     bp = bread(ip->dev, bmap(ip, off/BSIZE, 0));
423     m = min(n - tot, BSIZE - off%BSIZE);
424     memmove(dst, bp->data + off%BSIZE, m);
425     brelse(bp);
426   }
427   return n;
428 }
429 
430 // PAGEBREAK!
431 // Write data to inode.
432 int
433 writei(struct inode *ip, char *src, uint off, uint n)
434 {
435   uint tot, m;
436   struct buf *bp;
437 
438   if(ip->type == T_DEV){
439     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
440       return -1;
441     return devsw[ip->major].write(ip, src, n);
442   }
443 
444   if(off + n < off)
445     return -1;
446   if(off + n > MAXFILE*BSIZE)
447     n = MAXFILE*BSIZE - off;
448 
449   for(tot=0; tot<n; tot+=m, off+=m, src+=m){
450     bp = bread(ip->dev, bmap(ip, off/BSIZE, 1));
451     m = min(n - tot, BSIZE - off%BSIZE);
452     memmove(bp->data + off%BSIZE, src, m);
453     bwrite(bp);
454     brelse(bp);
455   }
456 
457   if(n > 0 && off > ip->size){
458     ip->size = off;
459     iupdate(ip);
460   }
461   return n;
462 }
463 
464 //PAGEBREAK!
465 // Directories
466 
467 int
468 namecmp(const char *s, const char *t)
469 {
470   return strncmp(s, t, DIRSIZ);
471 }
472 
473 // Look for a directory entry in a directory.
474 // If found, set *poff to byte offset of entry.
475 // Caller must have already locked dp.
476 struct inode*
477 dirlookup(struct inode *dp, char *name, uint *poff)
478 {
479   uint off, inum;
480   struct buf *bp;
481   struct dirent *de;
482 
483   if(dp->type != T_DIR)
484     panic("dirlookup not DIR");
485 
486   for(off = 0; off < dp->size; off += BSIZE){
487     bp = bread(dp->dev, bmap(dp, off / BSIZE, 0));
488     for(de = (struct dirent*)bp->data;
489         de < (struct dirent*)(bp->data + BSIZE);
490         de++){
491       if(de->inum == 0)
492         continue;
493       if(namecmp(name, de->name) == 0){
494         // entry matches path element
495         if(poff)
496           *poff = off + (uchar*)de - bp->data;
497         inum = de->inum;
498         brelse(bp);
499         return iget(dp->dev, inum);
500       }
501     }
502     brelse(bp);
503   }
504   return 0;
505 }
506 
507 // Write a new directory entry (name, ino) into the directory dp.
508 int
509 dirlink(struct inode *dp, char *name, uint ino)
510 {
511   int off;
512   struct dirent de;
513   struct inode *ip;
514 
515   // Check that name is not present.
516   if((ip = dirlookup(dp, name, 0)) != 0){
517     iput(ip);
518     return -1;
519   }
520 
521   // Look for an empty dirent.
522   for(off = 0; off < dp->size; off += sizeof(de)){
523     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
524       panic("dirlink read");
525     if(de.inum == 0)
526       break;
527   }
528 
529   strncpy(de.name, name, DIRSIZ);
530   de.inum = ino;
531   if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
532     panic("dirlink");
533 
534   return 0;
535 }
536 
537 //PAGEBREAK!
538 // Paths
539 
540 // Copy the next path element from path into name.
541 // Return a pointer to the element following the copied one.
542 // The returned path has no leading slashes,
543 // so the caller can check *path=='\0' to see if the name is the last one.
544 // If no name to remove, return 0.
545 //
546 // Examples:
547 //   skipelem("a/bb/c", name) = "bb/c", setting name = "a"
548 //   skipelem("///a//bb", name) = "bb", setting name = "a"
549 //   skipelem("", name) = skipelem("////", name) = 0
550 //
551 static char*
552 skipelem(char *path, char *name)
553 {
554   char *s;
555   int len;
556 
557   while(*path == '/')
558     path++;
559   if(*path == 0)
560     return 0;
561   s = path;
562   while(*path != '/' && *path != 0)
563     path++;
564   len = path - s;
565   if(len >= DIRSIZ)
566     memmove(name, s, DIRSIZ);
567   else {
568     memmove(name, s, len);
569     name[len] = 0;
570   }
571   while(*path == '/')
572     path++;
573   return path;
574 }
575 
576 // Look up and return the inode for a path name.
577 // If parent != 0, return the inode for the parent and copy the final
578 // path element into name, which must have room for DIRSIZ bytes.
579 static struct inode*
580 _namei(char *path, int parent, char *name)
581 {
582   struct inode *ip, *next;
583 
584   if(*path == '/')
585     ip = iget(ROOTDEV, 1);
586   else
587     ip = idup(cp->cwd);
588 
589   while((path = skipelem(path, name)) != 0){
590     ilock(ip);
591     if(ip->type != T_DIR){
592       iunlockput(ip);
593       return 0;
594     }
595     if(parent && *path == '\0'){
596       // Stop one level early.
597       iunlock(ip);
598       return ip;
599     }
600     if((next = dirlookup(ip, name, 0)) == 0){
601       iunlockput(ip);
602       return 0;
603     }
604     iunlockput(ip);
605     ip = next;
606   }
607   if(parent){
608     iput(ip);
609     return 0;
610   }
611   return ip;
612 }
613 
614 struct inode*
615 namei(char *path)
616 {
617   char name[DIRSIZ];
618   return _namei(path, 0, name);
619 }
620 
621 struct inode*
622 nameiparent(char *path, char *name)
623 {
624   return _namei(path, 1, name);
625 }
626