xref: /xv6-public/fs.c (revision 34295f46)
1 // File system implementation.  Four layers:
2 //   + Blocks: allocator for raw disk blocks.
3 //   + Files: inode allocator, reading, writing, metadata.
4 //   + Directories: inode with special contents (list of other inodes!)
5 //   + Names: paths like /usr/rtm/xv6/fs.c for convenient naming.
6 //
7 // Disk layout is: superblock, inodes, block in-use bitmap, data blocks.
8 //
9 // This file contains the low-level file system manipulation
10 // routines.  The (higher-level) system call implementations
11 // are in sysfile.c.
12 
13 #include "types.h"
14 #include "defs.h"
15 #include "param.h"
16 #include "stat.h"
17 #include "mmu.h"
18 #include "proc.h"
19 #include "spinlock.h"
20 #include "buf.h"
21 #include "fs.h"
22 #include "fsvar.h"
23 #include "dev.h"
24 
25 #define min(a, b) ((a) < (b) ? (a) : (b))
26 static void itrunc(struct inode*);
27 
28 // Read the super block.
29 static void
30 readsb(int dev, struct superblock *sb)
31 {
32   struct buf *bp;
33 
34   bp = bread(dev, 1);
35   memmove(sb, bp->data, sizeof(*sb));
36   brelse(bp);
37 }
38 
39 // Zero a block.
40 static void
41 bzero(int dev, int bno)
42 {
43   struct buf *bp;
44 
45   bp = bread(dev, bno);
46   memset(bp->data, 0, BSIZE);
47   bwrite(bp);
48   brelse(bp);
49 }
50 
51 // Blocks.
52 
53 // Allocate a disk block.
54 static uint
55 balloc(uint dev)
56 {
57   int b, bi, m;
58   struct buf *bp;
59   struct superblock sb;
60 
61   bp = 0;
62   readsb(dev, &sb);
63   for(b = 0; b < sb.size; b += BPB){
64     bp = bread(dev, BBLOCK(b, sb.ninodes));
65     for(bi = 0; bi < BPB; bi++){
66       m = 1 << (bi % 8);
67       if((bp->data[bi/8] & m) == 0){  // Is block free?
68         bp->data[bi/8] |= m;  // Mark block in use on disk.
69         bwrite(bp);
70         brelse(bp);
71         return b + bi;
72       }
73     }
74     brelse(bp);
75   }
76   panic("balloc: out of blocks");
77 }
78 
79 // Free a disk block.
80 static void
81 bfree(int dev, uint b)
82 {
83   struct buf *bp;
84   struct superblock sb;
85   int bi, m;
86 
87   bzero(dev, b);
88 
89   readsb(dev, &sb);
90   bp = bread(dev, BBLOCK(b, sb.ninodes));
91   bi = b % BPB;
92   m = 1 << (bi % 8);
93   if((bp->data[bi/8] & m) == 0)
94     panic("freeing free block");
95   bp->data[bi/8] &= ~m;  // Mark block free on disk.
96   bwrite(bp);
97   brelse(bp);
98 }
99 
100 // Inodes.
101 //
102 // An inode is a single, unnamed file in the file system.
103 // The inode disk structure holds metadata (the type, device numbers,
104 // and data size) along with a list of blocks where the associated
105 // data can be found.
106 //
107 // The inodes are laid out sequentially on disk immediately after
108 // the superblock.  The kernel keeps a cache of the in-use
109 // on-disk structures to provide a place for synchronizing access
110 // to inodes shared between multiple processes.
111 //
112 // ip->ref counts the number of pointer references to this cached
113 // inode; references are typically kept in struct file and in cp->cwd.
114 // When ip->ref falls to zero, the inode is no longer cached.
115 // It is an error to use an inode without holding a reference to it.
116 //
117 // Processes are only allowed to read and write inode
118 // metadata and contents when holding the inode's lock,
119 // represented by the I_BUSY flag in the in-memory copy.
120 // Because inode locks are held during disk accesses,
121 // they are implemented using a flag rather than with
122 // spin locks.  Callers are responsible for locking
123 // inodes before passing them to routines in this file; leaving
124 // this responsibility with the caller makes it possible for them
125 // to create arbitrarily-sized atomic operations.
126 //
127 // To give maximum control over locking to the callers,
128 // the routines in this file that return inode pointers
129 // return pointers to *unlocked* inodes.  It is the callers'
130 // responsibility to lock them before using them.  A non-zero
131 // ip->ref keeps these unlocked inodes in the cache.
132 
133 struct {
134   struct spinlock lock;
135   struct inode inode[NINODE];
136 } icache;
137 
138 void
139 iinit(void)
140 {
141   initlock(&icache.lock, "icache");
142 }
143 
144 // Find the inode with number inum on device dev
145 // and return the in-memory copy.
146 static struct inode*
147 iget(uint dev, uint inum)
148 {
149   struct inode *ip, *empty;
150 
151   acquire(&icache.lock);
152 
153   // Try for cached inode.
154   empty = 0;
155   for(ip = &icache.inode[0]; ip < &icache.inode[NINODE]; ip++){
156     if(ip->ref > 0 && ip->dev == dev && ip->inum == inum){
157       ip->ref++;
158       release(&icache.lock);
159       return ip;
160     }
161     if(empty == 0 && ip->ref == 0)    // Remember empty slot.
162       empty = ip;
163   }
164 
165   // Allocate fresh inode.
166   if(empty == 0)
167     panic("iget: no inodes");
168 
169   ip = empty;
170   ip->dev = dev;
171   ip->inum = inum;
172   ip->ref = 1;
173   ip->flags = 0;
174   release(&icache.lock);
175 
176   return ip;
177 }
178 
179 // Increment reference count for ip.
180 // Returns ip to enable ip = idup(ip1) idiom.
181 struct inode*
182 idup(struct inode *ip)
183 {
184   acquire(&icache.lock);
185   ip->ref++;
186   release(&icache.lock);
187   return ip;
188 }
189 
190 // Lock the given inode.
191 void
192 ilock(struct inode *ip)
193 {
194   struct buf *bp;
195   struct dinode *dip;
196 
197   if(ip == 0 || ip->ref < 1)
198     panic("ilock");
199 
200   acquire(&icache.lock);
201   while(ip->flags & I_BUSY)
202     sleep(ip, &icache.lock);
203   ip->flags |= I_BUSY;
204   release(&icache.lock);
205 
206   if(!(ip->flags & I_VALID)){
207     bp = bread(ip->dev, IBLOCK(ip->inum));
208     dip = (struct dinode*)bp->data + ip->inum%IPB;
209     ip->type = dip->type;
210     ip->major = dip->major;
211     ip->minor = dip->minor;
212     ip->nlink = dip->nlink;
213     ip->size = dip->size;
214     memmove(ip->addrs, dip->addrs, sizeof(ip->addrs));
215     brelse(bp);
216     ip->flags |= I_VALID;
217     if(ip->type == 0)
218       panic("ilock: no type");
219   }
220 }
221 
222 // Unlock the given inode.
223 void
224 iunlock(struct inode *ip)
225 {
226   if(ip == 0 || !(ip->flags & I_BUSY) || ip->ref < 1)
227     panic("iunlock");
228 
229   acquire(&icache.lock);
230   ip->flags &= ~I_BUSY;
231   wakeup(ip);
232   release(&icache.lock);
233 }
234 
235 // Caller holds reference to unlocked ip.  Drop reference.
236 void
237 iput(struct inode *ip)
238 {
239   acquire(&icache.lock);
240   if(ip->ref == 1 && (ip->flags & I_VALID) && ip->nlink == 0){
241     // inode is no longer used: truncate and free inode.
242     if(ip->flags & I_BUSY)
243       panic("iput busy");
244     ip->flags |= I_BUSY;
245     release(&icache.lock);
246     itrunc(ip);
247     ip->type = 0;
248     iupdate(ip);
249     acquire(&icache.lock);
250     ip->flags = 0;
251     wakeup(ip);
252   }
253   ip->ref--;
254   release(&icache.lock);
255 }
256 
257 // Common idiom: unlock, then put.
258 void
259 iunlockput(struct inode *ip)
260 {
261   iunlock(ip);
262   iput(ip);
263 }
264 
265 //PAGEBREAK!
266 // Allocate a new inode with the given type on device dev.
267 struct inode*
268 ialloc(uint dev, short type)
269 {
270   int inum;
271   struct buf *bp;
272   struct dinode *dip;
273   struct superblock sb;
274 
275   readsb(dev, &sb);
276   for(inum = 1; inum < sb.ninodes; inum++){  // loop over inode blocks
277     bp = bread(dev, IBLOCK(inum));
278     dip = (struct dinode*)bp->data + inum%IPB;
279     if(dip->type == 0){  // a free inode
280       memset(dip, 0, sizeof(*dip));
281       dip->type = type;
282       bwrite(bp);   // mark it allocated on the disk
283       brelse(bp);
284       return iget(dev, inum);
285     }
286     brelse(bp);
287   }
288   panic("ialloc: no inodes");
289 }
290 
291 // Copy inode, which has changed, from memory to disk.
292 void
293 iupdate(struct inode *ip)
294 {
295   struct buf *bp;
296   struct dinode *dip;
297 
298   bp = bread(ip->dev, IBLOCK(ip->inum));
299   dip = (struct dinode*)bp->data + ip->inum%IPB;
300   dip->type = ip->type;
301   dip->major = ip->major;
302   dip->minor = ip->minor;
303   dip->nlink = ip->nlink;
304   dip->size = ip->size;
305   memmove(dip->addrs, ip->addrs, sizeof(ip->addrs));
306   bwrite(bp);
307   brelse(bp);
308 }
309 
310 //PAGEBREAK!
311 // Inode contents
312 //
313 // The contents (data) associated with each inode is stored
314 // in a sequence of blocks on the disk.  The first NDIRECT blocks
315 // are listed in ip->addrs[].  The next NINDIRECT blocks are
316 // listed in the block ip->addrs[INDIRECT].
317 
318 // Return the disk block address of the nth block in inode ip.
319 // If there is no such block, bmap allocates one.
320 static uint
321 bmap(struct inode *ip, uint bn)
322 {
323   uint addr, *a;
324   struct buf *bp;
325 
326   if(bn < NDIRECT){
327     if((addr = ip->addrs[bn]) == 0)
328       ip->addrs[bn] = addr = balloc(ip->dev);
329     return addr;
330   }
331   bn -= NDIRECT;
332 
333   if(bn < NINDIRECT){
334     // Load indirect block, allocating if necessary.
335     if((addr = ip->addrs[NDIRECT]) == 0)
336       ip->addrs[NDIRECT] = addr = balloc(ip->dev);
337     bp = bread(ip->dev, addr);
338     a = (uint*)bp->data;
339 
340     if((addr = a[bn]) == 0){
341       a[bn] = addr = balloc(ip->dev);
342       bwrite(bp);
343     }
344     brelse(bp);
345     return addr;
346   }
347 
348   panic("bmap: out of range");
349 }
350 
351 // Truncate inode (discard contents).
352 // Only called after the last dirent referring
353 // to this inode has been erased on disk.
354 static void
355 itrunc(struct inode *ip)
356 {
357   int i, j;
358   struct buf *bp;
359   uint *a;
360 
361   for(i = 0; i < NDIRECT; i++){
362     if(ip->addrs[i]){
363       bfree(ip->dev, ip->addrs[i]);
364       ip->addrs[i] = 0;
365     }
366   }
367 
368   if(ip->addrs[NDIRECT]){
369     bp = bread(ip->dev, ip->addrs[NDIRECT]);
370     a = (uint*)bp->data;
371     for(j = 0; j < NINDIRECT; j++){
372       if(a[j])
373         bfree(ip->dev, a[j]);
374     }
375     brelse(bp);
376     bfree(ip->dev, ip->addrs[NDIRECT]);
377     ip->addrs[NDIRECT] = 0;
378   }
379 
380   ip->size = 0;
381   iupdate(ip);
382 }
383 
384 // Copy stat information from inode.
385 void
386 stati(struct inode *ip, struct stat *st)
387 {
388   st->dev = ip->dev;
389   st->ino = ip->inum;
390   st->type = ip->type;
391   st->nlink = ip->nlink;
392   st->size = ip->size;
393 }
394 
395 //PAGEBREAK!
396 // Read data from inode.
397 int
398 readi(struct inode *ip, char *dst, uint off, uint n)
399 {
400   uint tot, m;
401   struct buf *bp;
402 
403   if(ip->type == T_DEV){
404     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].read)
405       return -1;
406     return devsw[ip->major].read(ip, dst, n);
407   }
408 
409   if(off > ip->size || off + n < off)
410     return -1;
411   if(off + n > ip->size)
412     n = ip->size - off;
413 
414   for(tot=0; tot<n; tot+=m, off+=m, dst+=m){
415     bp = bread(ip->dev, bmap(ip, off/BSIZE));
416     m = min(n - tot, BSIZE - off%BSIZE);
417     memmove(dst, bp->data + off%BSIZE, m);
418     brelse(bp);
419   }
420   return n;
421 }
422 
423 // PAGEBREAK!
424 // Write data to inode.
425 int
426 writei(struct inode *ip, char *src, uint off, uint n)
427 {
428   uint tot, m;
429   struct buf *bp;
430 
431   if(ip->type == T_DEV){
432     if(ip->major < 0 || ip->major >= NDEV || !devsw[ip->major].write)
433       return -1;
434     return devsw[ip->major].write(ip, src, n);
435   }
436 
437   if(off > ip->size || off + n < off)
438     return -1;
439   if(off + n > MAXFILE*BSIZE)
440     n = MAXFILE*BSIZE - off;
441 
442   for(tot=0; tot<n; tot+=m, off+=m, src+=m){
443     bp = bread(ip->dev, bmap(ip, off/BSIZE));
444     m = min(n - tot, BSIZE - off%BSIZE);
445     memmove(bp->data + off%BSIZE, src, m);
446     bwrite(bp);
447     brelse(bp);
448   }
449 
450   if(n > 0 && off > ip->size){
451     ip->size = off;
452     iupdate(ip);
453   }
454   return n;
455 }
456 
457 //PAGEBREAK!
458 // Directories
459 
460 int
461 namecmp(const char *s, const char *t)
462 {
463   return strncmp(s, t, DIRSIZ);
464 }
465 
466 // Look for a directory entry in a directory.
467 // If found, set *poff to byte offset of entry.
468 // Caller must have already locked dp.
469 struct inode*
470 dirlookup(struct inode *dp, char *name, uint *poff)
471 {
472   uint off, inum;
473   struct buf *bp;
474   struct dirent *de;
475 
476   if(dp->type != T_DIR)
477     panic("dirlookup not DIR");
478 
479   for(off = 0; off < dp->size; off += BSIZE){
480     bp = bread(dp->dev, bmap(dp, off / BSIZE));
481     for(de = (struct dirent*)bp->data;
482         de < (struct dirent*)(bp->data + BSIZE);
483         de++){
484       if(de->inum == 0)
485         continue;
486       if(namecmp(name, de->name) == 0){
487         // entry matches path element
488         if(poff)
489           *poff = off + (uchar*)de - bp->data;
490         inum = de->inum;
491         brelse(bp);
492         return iget(dp->dev, inum);
493       }
494     }
495     brelse(bp);
496   }
497   return 0;
498 }
499 
500 // Write a new directory entry (name, inum) into the directory dp.
501 int
502 dirlink(struct inode *dp, char *name, uint inum)
503 {
504   int off;
505   struct dirent de;
506   struct inode *ip;
507 
508   // Check that name is not present.
509   if((ip = dirlookup(dp, name, 0)) != 0){
510     iput(ip);
511     return -1;
512   }
513 
514   // Look for an empty dirent.
515   for(off = 0; off < dp->size; off += sizeof(de)){
516     if(readi(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
517       panic("dirlink read");
518     if(de.inum == 0)
519       break;
520   }
521 
522   strncpy(de.name, name, DIRSIZ);
523   de.inum = inum;
524   if(writei(dp, (char*)&de, off, sizeof(de)) != sizeof(de))
525     panic("dirlink");
526 
527   return 0;
528 }
529 
530 //PAGEBREAK!
531 // Paths
532 
533 // Copy the next path element from path into name.
534 // Return a pointer to the element following the copied one.
535 // The returned path has no leading slashes,
536 // so the caller can check *path=='\0' to see if the name is the last one.
537 // If no name to remove, return 0.
538 //
539 // Examples:
540 //   skipelem("a/bb/c", name) = "bb/c", setting name = "a"
541 //   skipelem("///a//bb", name) = "bb", setting name = "a"
542 //   skipelem("a", name) = "", setting name = "a"
543 //   skipelem("", name) = skipelem("////", name) = 0
544 //
545 static char*
546 skipelem(char *path, char *name)
547 {
548   char *s;
549   int len;
550 
551   while(*path == '/')
552     path++;
553   if(*path == 0)
554     return 0;
555   s = path;
556   while(*path != '/' && *path != 0)
557     path++;
558   len = path - s;
559   if(len >= DIRSIZ)
560     memmove(name, s, DIRSIZ);
561   else {
562     memmove(name, s, len);
563     name[len] = 0;
564   }
565   while(*path == '/')
566     path++;
567   return path;
568 }
569 
570 // Look up and return the inode for a path name.
571 // If parent != 0, return the inode for the parent and copy the final
572 // path element into name, which must have room for DIRSIZ bytes.
573 static struct inode*
574 namex(char *path, int parent, char *name)
575 {
576   struct inode *ip, *next;
577 
578   if(*path == '/')
579     ip = iget(ROOTDEV, ROOTINO);
580   else
581     ip = idup(cp->cwd);
582 
583   while((path = skipelem(path, name)) != 0){
584     ilock(ip);
585     if(ip->type != T_DIR){
586       iunlockput(ip);
587       return 0;
588     }
589     if(parent && *path == '\0'){
590       // Stop one level early.
591       iunlock(ip);
592       return ip;
593     }
594     if((next = dirlookup(ip, name, 0)) == 0){
595       iunlockput(ip);
596       return 0;
597     }
598     iunlockput(ip);
599     ip = next;
600   }
601   if(parent){
602     iput(ip);
603     return 0;
604   }
605   return ip;
606 }
607 
608 struct inode*
609 namei(char *path)
610 {
611   char name[DIRSIZ];
612   return namex(path, 0, name);
613 }
614 
615 struct inode*
616 nameiparent(char *path, char *name)
617 {
618   return namex(path, 1, name);
619 }
620