xref: /original-bsd/sys/ufs/ffs/ffs_inode.c (revision 883fb6be)
1 /*
2  * Copyright (c) 1982, 1986 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)ffs_inode.c	7.2 (Berkeley) 04/02/87
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "mount.h"
12 #include "dir.h"
13 #include "user.h"
14 #include "inode.h"
15 #include "fs.h"
16 #include "buf.h"
17 #include "cmap.h"
18 #ifdef QUOTA
19 #include "quota.h"
20 #endif
21 #include "kernel.h"
22 
23 #define	INOHSZ	512
24 #if	((INOHSZ&(INOHSZ-1)) == 0)
25 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
26 #else
27 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
28 #endif
29 
30 union ihead {				/* inode LRU cache, Chris Maltby */
31 	union  ihead *ih_head[2];
32 	struct inode *ih_chain[2];
33 } ihead[INOHSZ];
34 
35 struct inode *ifreeh, **ifreet;
36 
37 /*
38  * Initialize hash links for inodes
39  * and build inode free list.
40  */
41 ihinit()
42 {
43 	register int i;
44 	register struct inode *ip = inode;
45 	register union  ihead *ih = ihead;
46 
47 	for (i = INOHSZ; --i >= 0; ih++) {
48 		ih->ih_head[0] = ih;
49 		ih->ih_head[1] = ih;
50 	}
51 	ifreeh = ip;
52 	ifreet = &ip->i_freef;
53 	ip->i_freeb = &ifreeh;
54 	ip->i_forw = ip;
55 	ip->i_back = ip;
56 	for (i = ninode; --i > 0; ) {
57 		++ip;
58 		ip->i_forw = ip;
59 		ip->i_back = ip;
60 		*ifreet = ip;
61 		ip->i_freeb = ifreet;
62 		ifreet = &ip->i_freef;
63 	}
64 	ip->i_freef = NULL;
65 }
66 
67 #ifdef notdef
68 /*
69  * Find an inode if it is incore.
70  * This is the equivalent, for inodes,
71  * of ``incore'' in bio.c or ``pfind'' in subr.c.
72  */
73 struct inode *
74 ifind(dev, ino)
75 	dev_t dev;
76 	ino_t ino;
77 {
78 	register struct inode *ip;
79 	register union  ihead *ih;
80 
81 	ih = &ihead[INOHASH(dev, ino)];
82 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
83 		if (ino==ip->i_number && dev==ip->i_dev)
84 			return (ip);
85 	return ((struct inode *)0);
86 }
87 #endif notdef
88 
89 /*
90  * Look up an inode by device,inumber.
91  * If it is in core (in the inode structure),
92  * honor the locking protocol.
93  * If it is not in core, read it in from the
94  * specified device.
95  * If the inode is mounted on, perform
96  * the indicated indirection.
97  * In all cases, a pointer to a locked
98  * inode structure is returned.
99  *
100  * panic: no imt -- if the mounted file
101  *	system is not in the mount table.
102  *	"cannot happen"
103  */
104 struct inode *
105 iget(dev, fs, ino)
106 	dev_t dev;
107 	register struct fs *fs;
108 	ino_t ino;
109 {
110 	register struct inode *ip;
111 	register union  ihead *ih;
112 	register struct mount *mp;
113 	register struct buf *bp;
114 	register struct dinode *dp;
115 	register struct inode *iq;
116 
117 loop:
118 	ih = &ihead[INOHASH(dev, ino)];
119 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
120 		if (ino == ip->i_number && dev == ip->i_dev) {
121 			/*
122 			 * Following is essentially an inline expanded
123 			 * copy of igrab(), expanded inline for speed,
124 			 * and so that the test for a mounted on inode
125 			 * can be deferred until after we are sure that
126 			 * the inode isn't busy.
127 			 */
128 			if ((ip->i_flag&ILOCKED) != 0) {
129 				ip->i_flag |= IWANT;
130 				sleep((caddr_t)ip, PINOD);
131 				goto loop;
132 			}
133 			if ((ip->i_flag&IMOUNT) != 0) {
134 				for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++)
135 					if(mp->m_inodp == ip) {
136 						dev = mp->m_dev;
137 						fs = mp->m_bufp->b_un.b_fs;
138 						ino = ROOTINO;
139 						goto loop;
140 					}
141 				panic("no imt");
142 			}
143 			if (ip->i_count == 0) {		/* ino on free list */
144 				if (iq = ip->i_freef)
145 					iq->i_freeb = ip->i_freeb;
146 				else
147 					ifreet = ip->i_freeb;
148 				*ip->i_freeb = iq;
149 				ip->i_freef = NULL;
150 				ip->i_freeb = NULL;
151 			}
152 			ip->i_count++;
153 			ip->i_flag |= ILOCKED;
154 			return(ip);
155 		}
156 
157 	if ((ip = ifreeh) == NULL) {
158 		tablefull("inode");
159 		u.u_error = ENFILE;
160 		return(NULL);
161 	}
162 	if (ip->i_count)
163 		panic("free inode isn't");
164 	if (iq = ip->i_freef)
165 		iq->i_freeb = &ifreeh;
166 	ifreeh = iq;
167 	ip->i_freef = NULL;
168 	ip->i_freeb = NULL;
169 	/*
170 	 * Now to take inode off the hash chain it was on
171 	 * (initially, or after an iflush, it is on a "hash chain"
172 	 * consisting entirely of itself, and pointed to by no-one,
173 	 * but that doesn't matter), and put it on the chain for
174 	 * its new (ino, dev) pair
175 	 */
176 	remque(ip);
177 	insque(ip, ih);
178 	ip->i_dev = dev;
179 	ip->i_fs = fs;
180 	ip->i_number = ino;
181 	cacheinval(ip);
182 	ip->i_flag = ILOCKED;
183 	ip->i_count++;
184 	ip->i_lastr = 0;
185 #ifdef QUOTA
186 	dqrele(ip->i_dquot);
187 #endif
188 	bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize);
189 	/*
190 	 * Check I/O errors
191 	 */
192 	if ((bp->b_flags&B_ERROR) != 0) {
193 		brelse(bp);
194 		/*
195 		 * the inode doesn't contain anything useful, so it would
196 		 * be misleading to leave it on its hash chain.
197 		 * 'iput' will take care of putting it back on the free list.
198 		 */
199 		remque(ip);
200 		ip->i_forw = ip;
201 		ip->i_back = ip;
202 		/*
203 		 * we also loose its inumber, just in case (as iput
204 		 * doesn't do that any more) - but as it isn't on its
205 		 * hash chain, I doubt if this is really necessary .. kre
206 		 * (probably the two methods are interchangable)
207 		 */
208 		ip->i_number = 0;
209 #ifdef QUOTA
210 		ip->i_dquot = NODQUOT;
211 #endif
212 		iput(ip);
213 		return(NULL);
214 	}
215 	dp = bp->b_un.b_dino;
216 	dp += itoo(fs, ino);
217 	ip->i_ic = dp->di_ic;
218 	brelse(bp);
219 #ifdef QUOTA
220 	if (ip->i_mode == 0)
221 		ip->i_dquot = NODQUOT;
222 	else
223 		ip->i_dquot = inoquota(ip);
224 #endif
225 	return (ip);
226 }
227 
228 /*
229  * Convert a pointer to an inode into a reference to an inode.
230  *
231  * This is basically the internal piece of iget (after the
232  * inode pointer is located) but without the test for mounted
233  * filesystems.  It is caller's responsibility to check that
234  * the inode pointer is valid.
235  */
236 igrab(ip)
237 	register struct inode *ip;
238 {
239 	while ((ip->i_flag&ILOCKED) != 0) {
240 		ip->i_flag |= IWANT;
241 		sleep((caddr_t)ip, PINOD);
242 	}
243 	if (ip->i_count == 0) {		/* ino on free list */
244 		register struct inode *iq;
245 
246 		if (iq = ip->i_freef)
247 			iq->i_freeb = ip->i_freeb;
248 		else
249 			ifreet = ip->i_freeb;
250 		*ip->i_freeb = iq;
251 		ip->i_freef = NULL;
252 		ip->i_freeb = NULL;
253 	}
254 	ip->i_count++;
255 	ip->i_flag |= ILOCKED;
256 }
257 
258 /*
259  * Decrement reference count of
260  * an inode structure.
261  * On the last reference,
262  * write the inode out and if necessary,
263  * truncate and deallocate the file.
264  */
265 iput(ip)
266 	register struct inode *ip;
267 {
268 
269 	if ((ip->i_flag & ILOCKED) == 0)
270 		panic("iput");
271 	IUNLOCK(ip);
272 	irele(ip);
273 }
274 
275 irele(ip)
276 	register struct inode *ip;
277 {
278 	int mode;
279 
280 	if (ip->i_count == 1) {
281 		ip->i_flag |= ILOCKED;
282 		if (ip->i_nlink <= 0 && ip->i_fs->fs_ronly == 0) {
283 			itrunc(ip, (u_long)0);
284 			mode = ip->i_mode;
285 			ip->i_mode = 0;
286 			ip->i_rdev = 0;
287 			ip->i_flag |= IUPD|ICHG;
288 			ifree(ip, ip->i_number, mode);
289 #ifdef QUOTA
290 			(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
291 			dqrele(ip->i_dquot);
292 			ip->i_dquot = NODQUOT;
293 #endif
294 		}
295 		IUPDAT(ip, &time, &time, 0);
296 		IUNLOCK(ip);
297 		ip->i_flag = 0;
298 		/*
299 		 * Put the inode on the end of the free list.
300 		 * Possibly in some cases it would be better to
301 		 * put the inode at the head of the free list,
302 		 * (eg: where i_mode == 0 || i_number == 0)
303 		 * but I will think about that later .. kre
304 		 * (i_number is rarely 0 - only after an i/o error in iget,
305 		 * where i_mode == 0, the inode will probably be wanted
306 		 * again soon for an ialloc, so possibly we should keep it)
307 		 */
308 		if (ifreeh) {
309 			*ifreet = ip;
310 			ip->i_freeb = ifreet;
311 		} else {
312 			ifreeh = ip;
313 			ip->i_freeb = &ifreeh;
314 		}
315 		ip->i_freef = NULL;
316 		ifreet = &ip->i_freef;
317 	} else if (!(ip->i_flag & ILOCKED))
318 		ITIMES(ip, &time, &time);
319 	ip->i_count--;
320 }
321 
322 /*
323  * Check accessed and update flags on
324  * an inode structure.
325  * If any is on, update the inode
326  * with the current time.
327  * If waitfor is given, then must insure
328  * i/o order so wait for write to complete.
329  */
330 iupdat(ip, ta, tm, waitfor)
331 	register struct inode *ip;
332 	struct timeval *ta, *tm;
333 	int waitfor;
334 {
335 	register struct buf *bp;
336 	struct dinode *dp;
337 	register struct fs *fs;
338 
339 	fs = ip->i_fs;
340 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) {
341 		if (fs->fs_ronly)
342 			return;
343 		bp = bread(ip->i_dev, fsbtodb(fs, itod(fs, ip->i_number)),
344 			(int)fs->fs_bsize);
345 		if (bp->b_flags & B_ERROR) {
346 			brelse(bp);
347 			return;
348 		}
349 		if (ip->i_flag&IACC)
350 			ip->i_atime = ta->tv_sec;
351 		if (ip->i_flag&IUPD)
352 			ip->i_mtime = tm->tv_sec;
353 		if (ip->i_flag&ICHG)
354 			ip->i_ctime = time.tv_sec;
355 		ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
356 		dp = bp->b_un.b_dino + itoo(fs, ip->i_number);
357 		dp->di_ic = ip->i_ic;
358 		if (waitfor)
359 			bwrite(bp);
360 		else
361 			bdwrite(bp);
362 	}
363 }
364 
365 #define	SINGLE	0	/* index of single indirect block */
366 #define	DOUBLE	1	/* index of double indirect block */
367 #define	TRIPLE	2	/* index of triple indirect block */
368 /*
369  * Truncate the inode ip to at most
370  * length size.  Free affected disk
371  * blocks -- the blocks of the file
372  * are removed in reverse order.
373  *
374  * NB: triple indirect blocks are untested.
375  */
376 itrunc(oip, length)
377 	register struct inode *oip;
378 	u_long length;
379 {
380 	register daddr_t lastblock;
381 	daddr_t bn, lbn, lastiblock[NIADDR];
382 	register struct fs *fs;
383 	register struct inode *ip;
384 	struct buf *bp;
385 	int offset, osize, size, count, level;
386 	long nblocks, blocksreleased = 0;
387 	register int i;
388 	dev_t dev;
389 	struct inode tip;
390 	extern long indirtrunc();
391 
392 	if (oip->i_size <= length) {
393 		oip->i_flag |= ICHG|IUPD;
394 		iupdat(oip, &time, &time, 1);
395 		return;
396 	}
397 	/*
398 	 * Calculate index into inode's block list of
399 	 * last direct and indirect blocks (if any)
400 	 * which we want to keep.  Lastblock is -1 when
401 	 * the file is truncated to 0.
402 	 */
403 	fs = oip->i_fs;
404 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
405 	lastiblock[SINGLE] = lastblock - NDADDR;
406 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
407 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
408 	nblocks = btodb(fs->fs_bsize);
409 	/*
410 	 * Update the size of the file. If the file is not being
411 	 * truncated to a block boundry, the contents of the
412 	 * partial block following the end of the file must be
413 	 * zero'ed in case it ever become accessable again because
414 	 * of subsequent file growth.
415 	 */
416 	osize = oip->i_size;
417 	offset = blkoff(fs, length);
418 	if (offset == 0) {
419 		oip->i_size = length;
420 	} else {
421 		lbn = lblkno(fs, length);
422 		bn = fsbtodb(fs, bmap(oip, lbn, B_WRITE, offset));
423 		if (u.u_error || (long)bn < 0)
424 			return;
425 		oip->i_size = length;
426 		size = blksize(fs, oip, lbn);
427 		count = howmany(size, CLBYTES);
428 		dev = oip->i_dev;
429 		for (i = 0; i < count; i++)
430 			munhash(dev, bn + i * CLBYTES / DEV_BSIZE);
431 		bp = bread(dev, bn, size);
432 		if (bp->b_flags & B_ERROR) {
433 			u.u_error = EIO;
434 			oip->i_size = osize;
435 			brelse(bp);
436 			return;
437 		}
438 		bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset));
439 		bdwrite(bp);
440 	}
441 	/*
442 	 * Update file and block pointers
443 	 * on disk before we start freeing blocks.
444 	 * If we crash before free'ing blocks below,
445 	 * the blocks will be returned to the free list.
446 	 * lastiblock values are also normalized to -1
447 	 * for calls to indirtrunc below.
448 	 */
449 	tip = *oip;
450 	tip.i_size = osize;
451 	for (level = TRIPLE; level >= SINGLE; level--)
452 		if (lastiblock[level] < 0) {
453 			oip->i_ib[level] = 0;
454 			lastiblock[level] = -1;
455 		}
456 	for (i = NDADDR - 1; i > lastblock; i--)
457 		oip->i_db[i] = 0;
458 	oip->i_flag |= ICHG|IUPD;
459 	syncip(oip);
460 
461 	/*
462 	 * Indirect blocks first.
463 	 */
464 	ip = &tip;
465 	for (level = TRIPLE; level >= SINGLE; level--) {
466 		bn = ip->i_ib[level];
467 		if (bn != 0) {
468 			blocksreleased +=
469 			    indirtrunc(ip, bn, lastiblock[level], level);
470 			if (lastiblock[level] < 0) {
471 				ip->i_ib[level] = 0;
472 				free(ip, bn, (off_t)fs->fs_bsize);
473 				blocksreleased += nblocks;
474 			}
475 		}
476 		if (lastiblock[level] >= 0)
477 			goto done;
478 	}
479 
480 	/*
481 	 * All whole direct blocks or frags.
482 	 */
483 	for (i = NDADDR - 1; i > lastblock; i--) {
484 		register off_t bsize;
485 
486 		bn = ip->i_db[i];
487 		if (bn == 0)
488 			continue;
489 		ip->i_db[i] = 0;
490 		bsize = (off_t)blksize(fs, ip, i);
491 		free(ip, bn, bsize);
492 		blocksreleased += btodb(bsize);
493 	}
494 	if (lastblock < 0)
495 		goto done;
496 
497 	/*
498 	 * Finally, look for a change in size of the
499 	 * last direct block; release any frags.
500 	 */
501 	bn = ip->i_db[lastblock];
502 	if (bn != 0) {
503 		off_t oldspace, newspace;
504 
505 		/*
506 		 * Calculate amount of space we're giving
507 		 * back as old block size minus new block size.
508 		 */
509 		oldspace = blksize(fs, ip, lastblock);
510 		ip->i_size = length;
511 		newspace = blksize(fs, ip, lastblock);
512 		if (newspace == 0)
513 			panic("itrunc: newspace");
514 		if (oldspace - newspace > 0) {
515 			/*
516 			 * Block number of space to be free'd is
517 			 * the old block # plus the number of frags
518 			 * required for the storage we're keeping.
519 			 */
520 			bn += numfrags(fs, newspace);
521 			free(ip, bn, oldspace - newspace);
522 			blocksreleased += btodb(oldspace - newspace);
523 		}
524 	}
525 done:
526 /* BEGIN PARANOIA */
527 	for (level = SINGLE; level <= TRIPLE; level++)
528 		if (ip->i_ib[level] != oip->i_ib[level])
529 			panic("itrunc1");
530 	for (i = 0; i < NDADDR; i++)
531 		if (ip->i_db[i] != oip->i_db[i])
532 			panic("itrunc2");
533 /* END PARANOIA */
534 	oip->i_blocks -= blocksreleased;
535 	if (oip->i_blocks < 0)			/* sanity */
536 		oip->i_blocks = 0;
537 	oip->i_flag |= ICHG;
538 #ifdef QUOTA
539 	(void) chkdq(oip, -blocksreleased, 0);
540 #endif
541 }
542 
543 /*
544  * Release blocks associated with the inode ip and
545  * stored in the indirect block bn.  Blocks are free'd
546  * in LIFO order up to (but not including) lastbn.  If
547  * level is greater than SINGLE, the block is an indirect
548  * block and recursive calls to indirtrunc must be used to
549  * cleanse other indirect blocks.
550  *
551  * NB: triple indirect blocks are untested.
552  */
553 long
554 indirtrunc(ip, bn, lastbn, level)
555 	register struct inode *ip;
556 	daddr_t bn, lastbn;
557 	int level;
558 {
559 	register int i;
560 	struct buf *bp, *copy;
561 	register daddr_t *bap;
562 	register struct fs *fs = ip->i_fs;
563 	daddr_t nb, last;
564 	long factor;
565 	int blocksreleased = 0, nblocks;
566 
567 	/*
568 	 * Calculate index in current block of last
569 	 * block to be kept.  -1 indicates the entire
570 	 * block so we need not calculate the index.
571 	 */
572 	factor = 1;
573 	for (i = SINGLE; i < level; i++)
574 		factor *= NINDIR(fs);
575 	last = lastbn;
576 	if (lastbn > 0)
577 		last /= factor;
578 	nblocks = btodb(fs->fs_bsize);
579 	/*
580 	 * Get buffer of block pointers, zero those
581 	 * entries corresponding to blocks to be free'd,
582 	 * and update on disk copy first.
583 	 */
584 	copy = geteblk((int)fs->fs_bsize);
585 	bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize);
586 	if (bp->b_flags&B_ERROR) {
587 		brelse(copy);
588 		brelse(bp);
589 		return (0);
590 	}
591 	bap = bp->b_un.b_daddr;
592 	bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize);
593 	bzero((caddr_t)&bap[last + 1],
594 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
595 	bwrite(bp);
596 	bp = copy, bap = bp->b_un.b_daddr;
597 
598 	/*
599 	 * Recursively free totally unused blocks.
600 	 */
601 	for (i = NINDIR(fs) - 1; i > last; i--) {
602 		nb = bap[i];
603 		if (nb == 0)
604 			continue;
605 		if (level > SINGLE)
606 			blocksreleased +=
607 			    indirtrunc(ip, nb, (daddr_t)-1, level - 1);
608 		free(ip, nb, (off_t)fs->fs_bsize);
609 		blocksreleased += nblocks;
610 	}
611 
612 	/*
613 	 * Recursively free last partial block.
614 	 */
615 	if (level > SINGLE && lastbn >= 0) {
616 		last = lastbn % factor;
617 		nb = bap[i];
618 		if (nb != 0)
619 			blocksreleased += indirtrunc(ip, nb, last, level - 1);
620 	}
621 	brelse(bp);
622 	return (blocksreleased);
623 }
624 
625 /*
626  * Remove any inodes in the inode cache belonging to dev.
627  *
628  * There should not be any active ones, return error if any are found
629  * (nb: this is a user error, not a system err).
630  */
631 #ifdef QUOTA
632 iflush(dev, iq)
633 	dev_t dev;
634 	struct inode *iq;
635 #else
636 iflush(dev)
637 	dev_t dev;
638 #endif
639 {
640 	register struct inode *ip;
641 
642 	for (ip = inode; ip < inodeNINODE; ip++) {
643 #ifdef QUOTA
644 		if (ip != iq && ip->i_dev == dev)
645 #else
646 		if (ip->i_dev == dev)
647 #endif
648 			if (ip->i_count)
649 				return (EBUSY);
650 			else {
651 				remque(ip);
652 				ip->i_forw = ip;
653 				ip->i_back = ip;
654 				/*
655 				 * as i_count == 0, the inode was on the free
656 				 * list already, just leave it there, it will
657 				 * fall off the bottom eventually. We could
658 				 * perhaps move it to the head of the free
659 				 * list, but as umounts are done so
660 				 * infrequently, we would gain very little,
661 				 * while making the code bigger.
662 				 */
663 #ifdef QUOTA
664 				dqrele(ip->i_dquot);
665 				ip->i_dquot = NODQUOT;
666 #endif
667 			}
668 	}
669 	return (0);
670 }
671 
672 /*
673  * Lock an inode. If its already locked, set the WANT bit and sleep.
674  */
675 ilock(ip)
676 	register struct inode *ip;
677 {
678 
679 	ILOCK(ip);
680 }
681 
682 /*
683  * Unlock an inode.  If WANT bit is on, wakeup.
684  */
685 iunlock(ip)
686 	register struct inode *ip;
687 {
688 
689 	IUNLOCK(ip);
690 }
691