xref: /original-bsd/sys/ufs/lfs/lfs_inode.c (revision 1a56dd2c)
1 /*
2  * Copyright (c) 1982, 1986 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)lfs_inode.c	7.5 (Berkeley) 06/21/87
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "mount.h"
12 #include "dir.h"
13 #include "user.h"
14 #include "inode.h"
15 #include "fs.h"
16 #include "buf.h"
17 #include "cmap.h"
18 #ifdef QUOTA
19 #include "quota.h"
20 #endif
21 #include "kernel.h"
22 #include "malloc.h"
23 
24 #define	INOHSZ	512
25 #if	((INOHSZ&(INOHSZ-1)) == 0)
26 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
27 #else
28 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
29 #endif
30 
31 union ihead {				/* inode LRU cache, Chris Maltby */
32 	union  ihead *ih_head[2];
33 	struct inode *ih_chain[2];
34 } ihead[INOHSZ];
35 
36 struct inode *ifreeh, **ifreet;
37 
38 /*
39  * Initialize hash links for inodes
40  * and build inode free list.
41  */
42 ihinit()
43 {
44 	register int i;
45 	register struct inode *ip = inode;
46 	register union  ihead *ih = ihead;
47 
48 	for (i = INOHSZ; --i >= 0; ih++) {
49 		ih->ih_head[0] = ih;
50 		ih->ih_head[1] = ih;
51 	}
52 	ifreeh = ip;
53 	ifreet = &ip->i_freef;
54 	ip->i_freeb = &ifreeh;
55 	ip->i_forw = ip;
56 	ip->i_back = ip;
57 	for (i = ninode; --i > 0; ) {
58 		++ip;
59 		ip->i_forw = ip;
60 		ip->i_back = ip;
61 		*ifreet = ip;
62 		ip->i_freeb = ifreet;
63 		ifreet = &ip->i_freef;
64 	}
65 	ip->i_freef = NULL;
66 }
67 
68 #ifdef notdef
69 /*
70  * Find an inode if it is incore.
71  * This is the equivalent, for inodes,
72  * of ``incore'' in bio.c or ``pfind'' in subr.c.
73  */
74 struct inode *
75 ifind(dev, ino)
76 	dev_t dev;
77 	ino_t ino;
78 {
79 	register struct inode *ip;
80 	register union  ihead *ih;
81 
82 	ih = &ihead[INOHASH(dev, ino)];
83 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
84 		if (ino==ip->i_number && dev==ip->i_dev)
85 			return (ip);
86 	return ((struct inode *)0);
87 }
88 #endif notdef
89 
90 /*
91  * Look up an inode by device,inumber.
92  * If it is in core (in the inode structure),
93  * honor the locking protocol.
94  * If it is not in core, read it in from the
95  * specified device.
96  * If the inode is mounted on, perform
97  * the indicated indirection.
98  * In all cases, a pointer to a locked
99  * inode structure is returned.
100  *
101  * panic: no imt -- if the mounted file
102  *	system is not in the mount table.
103  *	"cannot happen"
104  */
105 struct inode *
106 iget(dev, fs, ino)
107 	dev_t dev;
108 	register struct fs *fs;
109 	ino_t ino;
110 {
111 	register struct inode *ip;
112 	register union  ihead *ih;
113 	register struct mount *mp;
114 	register struct buf *bp;
115 	register struct dinode *dp;
116 	register struct inode *iq;
117 
118 loop:
119 	ih = &ihead[INOHASH(dev, ino)];
120 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
121 		if (ino == ip->i_number && dev == ip->i_dev) {
122 			/*
123 			 * Following is essentially an inline expanded
124 			 * copy of igrab(), expanded inline for speed,
125 			 * and so that the test for a mounted on inode
126 			 * can be deferred until after we are sure that
127 			 * the inode isn't busy.
128 			 */
129 			if ((ip->i_flag&ILOCKED) != 0) {
130 				ip->i_flag |= IWANT;
131 				sleep((caddr_t)ip, PINOD);
132 				goto loop;
133 			}
134 			if ((ip->i_flag&IMOUNT) != 0) {
135 				for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++)
136 					if(mp->m_inodp == ip) {
137 						dev = mp->m_dev;
138 						fs = mp->m_fs;
139 						ino = ROOTINO;
140 						goto loop;
141 					}
142 				panic("no imt");
143 			}
144 			if (ip->i_count == 0) {		/* ino on free list */
145 				if (iq = ip->i_freef)
146 					iq->i_freeb = ip->i_freeb;
147 				else
148 					ifreet = ip->i_freeb;
149 				*ip->i_freeb = iq;
150 				ip->i_freef = NULL;
151 				ip->i_freeb = NULL;
152 			}
153 			ip->i_count++;
154 			ip->i_flag |= ILOCKED;
155 			return(ip);
156 		}
157 
158 	if ((ip = ifreeh) == NULL) {
159 		tablefull("inode");
160 		u.u_error = ENFILE;
161 		return(NULL);
162 	}
163 	if (ip->i_count)
164 		panic("free inode isn't");
165 	if (iq = ip->i_freef)
166 		iq->i_freeb = &ifreeh;
167 	ifreeh = iq;
168 	ip->i_freef = NULL;
169 	ip->i_freeb = NULL;
170 	/*
171 	 * Now to take inode off the hash chain it was on
172 	 * (initially, or after an iflush, it is on a "hash chain"
173 	 * consisting entirely of itself, and pointed to by no-one,
174 	 * but that doesn't matter), and put it on the chain for
175 	 * its new (ino, dev) pair
176 	 */
177 	remque(ip);
178 	insque(ip, ih);
179 	ip->i_dev = dev;
180 	ip->i_fs = fs;
181 	ip->i_number = ino;
182 	cacheinval(ip);
183 	ip->i_flag = ILOCKED;
184 	ip->i_count++;
185 	ip->i_lastr = 0;
186 #ifdef QUOTA
187 	dqrele(ip->i_dquot);
188 #endif
189 	bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize);
190 	/*
191 	 * Check I/O errors
192 	 */
193 	if ((bp->b_flags&B_ERROR) != 0) {
194 		brelse(bp);
195 		/*
196 		 * the inode doesn't contain anything useful, so it would
197 		 * be misleading to leave it on its hash chain.
198 		 * 'iput' will take care of putting it back on the free list.
199 		 */
200 		remque(ip);
201 		ip->i_forw = ip;
202 		ip->i_back = ip;
203 		/*
204 		 * we also loose its inumber, just in case (as iput
205 		 * doesn't do that any more) - but as it isn't on its
206 		 * hash chain, I doubt if this is really necessary .. kre
207 		 * (probably the two methods are interchangable)
208 		 */
209 		ip->i_number = 0;
210 #ifdef QUOTA
211 		ip->i_dquot = NODQUOT;
212 #endif
213 		iput(ip);
214 		return(NULL);
215 	}
216 	dp = bp->b_un.b_dino;
217 	dp += itoo(fs, ino);
218 	ip->i_ic = dp->di_ic;
219 	brelse(bp);
220 #ifdef QUOTA
221 	if (ip->i_mode == 0)
222 		ip->i_dquot = NODQUOT;
223 	else
224 		ip->i_dquot = inoquota(ip);
225 #endif
226 	return (ip);
227 }
228 
229 /*
230  * Convert a pointer to an inode into a reference to an inode.
231  *
232  * This is basically the internal piece of iget (after the
233  * inode pointer is located) but without the test for mounted
234  * filesystems.  It is caller's responsibility to check that
235  * the inode pointer is valid.
236  */
237 igrab(ip)
238 	register struct inode *ip;
239 {
240 	while ((ip->i_flag&ILOCKED) != 0) {
241 		ip->i_flag |= IWANT;
242 		sleep((caddr_t)ip, PINOD);
243 	}
244 	if (ip->i_count == 0) {		/* ino on free list */
245 		register struct inode *iq;
246 
247 		if (iq = ip->i_freef)
248 			iq->i_freeb = ip->i_freeb;
249 		else
250 			ifreet = ip->i_freeb;
251 		*ip->i_freeb = iq;
252 		ip->i_freef = NULL;
253 		ip->i_freeb = NULL;
254 	}
255 	ip->i_count++;
256 	ip->i_flag |= ILOCKED;
257 }
258 
259 /*
260  * Decrement reference count of
261  * an inode structure.
262  * On the last reference,
263  * write the inode out and if necessary,
264  * truncate and deallocate the file.
265  */
266 iput(ip)
267 	register struct inode *ip;
268 {
269 
270 	if ((ip->i_flag & ILOCKED) == 0)
271 		panic("iput");
272 	IUNLOCK(ip);
273 	irele(ip);
274 }
275 
276 irele(ip)
277 	register struct inode *ip;
278 {
279 	int mode;
280 
281 	if (ip->i_count == 1) {
282 		ip->i_flag |= ILOCKED;
283 		if (ip->i_nlink <= 0 && ip->i_fs->fs_ronly == 0) {
284 			itrunc(ip, (u_long)0);
285 			mode = ip->i_mode;
286 			ip->i_mode = 0;
287 			ip->i_rdev = 0;
288 			ip->i_flag |= IUPD|ICHG;
289 			ifree(ip, ip->i_number, mode);
290 #ifdef QUOTA
291 			(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
292 			dqrele(ip->i_dquot);
293 			ip->i_dquot = NODQUOT;
294 #endif
295 		}
296 		IUPDAT(ip, &time, &time, 0);
297 		IUNLOCK(ip);
298 		ip->i_flag = 0;
299 		/*
300 		 * Put the inode on the end of the free list.
301 		 * Possibly in some cases it would be better to
302 		 * put the inode at the head of the free list,
303 		 * (eg: where i_mode == 0 || i_number == 0)
304 		 * but I will think about that later .. kre
305 		 * (i_number is rarely 0 - only after an i/o error in iget,
306 		 * where i_mode == 0, the inode will probably be wanted
307 		 * again soon for an ialloc, so possibly we should keep it)
308 		 */
309 		if (ifreeh) {
310 			*ifreet = ip;
311 			ip->i_freeb = ifreet;
312 		} else {
313 			ifreeh = ip;
314 			ip->i_freeb = &ifreeh;
315 		}
316 		ip->i_freef = NULL;
317 		ifreet = &ip->i_freef;
318 	} else if (!(ip->i_flag & ILOCKED))
319 		ITIMES(ip, &time, &time);
320 	ip->i_count--;
321 }
322 
323 /*
324  * Check accessed and update flags on
325  * an inode structure.
326  * If any is on, update the inode
327  * with the current time.
328  * If waitfor is given, then must insure
329  * i/o order so wait for write to complete.
330  */
331 iupdat(ip, ta, tm, waitfor)
332 	register struct inode *ip;
333 	struct timeval *ta, *tm;
334 	int waitfor;
335 {
336 	register struct buf *bp;
337 	struct dinode *dp;
338 	register struct fs *fs;
339 
340 	fs = ip->i_fs;
341 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) {
342 		if (fs->fs_ronly)
343 			return;
344 		bp = bread(ip->i_dev, fsbtodb(fs, itod(fs, ip->i_number)),
345 			(int)fs->fs_bsize);
346 		if (bp->b_flags & B_ERROR) {
347 			brelse(bp);
348 			return;
349 		}
350 		if (ip->i_flag&IACC)
351 			ip->i_atime = ta->tv_sec;
352 		if (ip->i_flag&IUPD)
353 			ip->i_mtime = tm->tv_sec;
354 		if (ip->i_flag&ICHG)
355 			ip->i_ctime = time.tv_sec;
356 		ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
357 		dp = bp->b_un.b_dino + itoo(fs, ip->i_number);
358 		dp->di_ic = ip->i_ic;
359 		if (waitfor)
360 			bwrite(bp);
361 		else
362 			bdwrite(bp);
363 	}
364 }
365 
366 #define	SINGLE	0	/* index of single indirect block */
367 #define	DOUBLE	1	/* index of double indirect block */
368 #define	TRIPLE	2	/* index of triple indirect block */
369 /*
370  * Truncate the inode ip to at most
371  * length size.  Free affected disk
372  * blocks -- the blocks of the file
373  * are removed in reverse order.
374  *
375  * NB: triple indirect blocks are untested.
376  */
377 itrunc(oip, length)
378 	register struct inode *oip;
379 	u_long length;
380 {
381 	register daddr_t lastblock;
382 	daddr_t bn, lbn, lastiblock[NIADDR];
383 	register struct fs *fs;
384 	register struct inode *ip;
385 	struct buf *bp;
386 	int offset, osize, size, count, level;
387 	long nblocks, blocksreleased = 0;
388 	register int i;
389 	dev_t dev;
390 	struct inode tip;
391 	extern long indirtrunc();
392 
393 	if (oip->i_size <= length) {
394 		oip->i_flag |= ICHG|IUPD;
395 		iupdat(oip, &time, &time, 1);
396 		return;
397 	}
398 	/*
399 	 * Calculate index into inode's block list of
400 	 * last direct and indirect blocks (if any)
401 	 * which we want to keep.  Lastblock is -1 when
402 	 * the file is truncated to 0.
403 	 */
404 	fs = oip->i_fs;
405 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
406 	lastiblock[SINGLE] = lastblock - NDADDR;
407 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
408 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
409 	nblocks = btodb(fs->fs_bsize);
410 	/*
411 	 * Update the size of the file. If the file is not being
412 	 * truncated to a block boundry, the contents of the
413 	 * partial block following the end of the file must be
414 	 * zero'ed in case it ever become accessable again because
415 	 * of subsequent file growth.
416 	 */
417 	osize = oip->i_size;
418 	offset = blkoff(fs, length);
419 	if (offset == 0) {
420 		oip->i_size = length;
421 	} else {
422 		lbn = lblkno(fs, length);
423 		bn = fsbtodb(fs, bmap(oip, lbn, B_WRITE, offset));
424 		if (u.u_error || (long)bn < 0)
425 			return;
426 		oip->i_size = length;
427 		size = blksize(fs, oip, lbn);
428 		count = howmany(size, CLBYTES);
429 		dev = oip->i_dev;
430 		for (i = 0; i < count; i++)
431 			munhash(dev, bn + i * CLBYTES / DEV_BSIZE);
432 		bp = bread(dev, bn, size);
433 		if (bp->b_flags & B_ERROR) {
434 			u.u_error = EIO;
435 			oip->i_size = osize;
436 			brelse(bp);
437 			return;
438 		}
439 		bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset));
440 		bdwrite(bp);
441 	}
442 	/*
443 	 * Update file and block pointers
444 	 * on disk before we start freeing blocks.
445 	 * If we crash before free'ing blocks below,
446 	 * the blocks will be returned to the free list.
447 	 * lastiblock values are also normalized to -1
448 	 * for calls to indirtrunc below.
449 	 */
450 	tip = *oip;
451 	tip.i_size = osize;
452 	for (level = TRIPLE; level >= SINGLE; level--)
453 		if (lastiblock[level] < 0) {
454 			oip->i_ib[level] = 0;
455 			lastiblock[level] = -1;
456 		}
457 	for (i = NDADDR - 1; i > lastblock; i--)
458 		oip->i_db[i] = 0;
459 	oip->i_flag |= ICHG|IUPD;
460 	syncip(oip);
461 
462 	/*
463 	 * Indirect blocks first.
464 	 */
465 	ip = &tip;
466 	for (level = TRIPLE; level >= SINGLE; level--) {
467 		bn = ip->i_ib[level];
468 		if (bn != 0) {
469 			blocksreleased +=
470 			    indirtrunc(ip, bn, lastiblock[level], level);
471 			if (lastiblock[level] < 0) {
472 				ip->i_ib[level] = 0;
473 				blkfree(ip, bn, (off_t)fs->fs_bsize);
474 				blocksreleased += nblocks;
475 			}
476 		}
477 		if (lastiblock[level] >= 0)
478 			goto done;
479 	}
480 
481 	/*
482 	 * All whole direct blocks or frags.
483 	 */
484 	for (i = NDADDR - 1; i > lastblock; i--) {
485 		register off_t bsize;
486 
487 		bn = ip->i_db[i];
488 		if (bn == 0)
489 			continue;
490 		ip->i_db[i] = 0;
491 		bsize = (off_t)blksize(fs, ip, i);
492 		blkfree(ip, bn, bsize);
493 		blocksreleased += btodb(bsize);
494 	}
495 	if (lastblock < 0)
496 		goto done;
497 
498 	/*
499 	 * Finally, look for a change in size of the
500 	 * last direct block; release any frags.
501 	 */
502 	bn = ip->i_db[lastblock];
503 	if (bn != 0) {
504 		off_t oldspace, newspace;
505 
506 		/*
507 		 * Calculate amount of space we're giving
508 		 * back as old block size minus new block size.
509 		 */
510 		oldspace = blksize(fs, ip, lastblock);
511 		ip->i_size = length;
512 		newspace = blksize(fs, ip, lastblock);
513 		if (newspace == 0)
514 			panic("itrunc: newspace");
515 		if (oldspace - newspace > 0) {
516 			/*
517 			 * Block number of space to be free'd is
518 			 * the old block # plus the number of frags
519 			 * required for the storage we're keeping.
520 			 */
521 			bn += numfrags(fs, newspace);
522 			blkfree(ip, bn, oldspace - newspace);
523 			blocksreleased += btodb(oldspace - newspace);
524 		}
525 	}
526 done:
527 /* BEGIN PARANOIA */
528 	for (level = SINGLE; level <= TRIPLE; level++)
529 		if (ip->i_ib[level] != oip->i_ib[level])
530 			panic("itrunc1");
531 	for (i = 0; i < NDADDR; i++)
532 		if (ip->i_db[i] != oip->i_db[i])
533 			panic("itrunc2");
534 /* END PARANOIA */
535 	oip->i_blocks -= blocksreleased;
536 	if (oip->i_blocks < 0)			/* sanity */
537 		oip->i_blocks = 0;
538 	oip->i_flag |= ICHG;
539 #ifdef QUOTA
540 	(void) chkdq(oip, -blocksreleased, 0);
541 #endif
542 }
543 
544 /*
545  * Release blocks associated with the inode ip and
546  * stored in the indirect block bn.  Blocks are free'd
547  * in LIFO order up to (but not including) lastbn.  If
548  * level is greater than SINGLE, the block is an indirect
549  * block and recursive calls to indirtrunc must be used to
550  * cleanse other indirect blocks.
551  *
552  * NB: triple indirect blocks are untested.
553  */
554 long
555 indirtrunc(ip, bn, lastbn, level)
556 	register struct inode *ip;
557 	daddr_t bn, lastbn;
558 	int level;
559 {
560 	register int i;
561 	struct buf *bp;
562 	register struct fs *fs = ip->i_fs;
563 	register daddr_t *bap;
564 	daddr_t *copy, nb, last;
565 	long factor;
566 	int blocksreleased = 0, nblocks;
567 
568 	/*
569 	 * Calculate index in current block of last
570 	 * block to be kept.  -1 indicates the entire
571 	 * block so we need not calculate the index.
572 	 */
573 	factor = 1;
574 	for (i = SINGLE; i < level; i++)
575 		factor *= NINDIR(fs);
576 	last = lastbn;
577 	if (lastbn > 0)
578 		last /= factor;
579 	nblocks = btodb(fs->fs_bsize);
580 	/*
581 	 * Get buffer of block pointers, zero those
582 	 * entries corresponding to blocks to be free'd,
583 	 * and update on disk copy first.
584 	 */
585 	bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize);
586 	if (bp->b_flags&B_ERROR) {
587 		brelse(bp);
588 		return (0);
589 	}
590 	bap = bp->b_un.b_daddr;
591 	MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
592 	bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
593 	bzero((caddr_t)&bap[last + 1],
594 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
595 	bwrite(bp);
596 	bap = copy;
597 
598 	/*
599 	 * Recursively free totally unused blocks.
600 	 */
601 	for (i = NINDIR(fs) - 1; i > last; i--) {
602 		nb = bap[i];
603 		if (nb == 0)
604 			continue;
605 		if (level > SINGLE)
606 			blocksreleased +=
607 			    indirtrunc(ip, nb, (daddr_t)-1, level - 1);
608 		blkfree(ip, nb, (off_t)fs->fs_bsize);
609 		blocksreleased += nblocks;
610 	}
611 
612 	/*
613 	 * Recursively free last partial block.
614 	 */
615 	if (level > SINGLE && lastbn >= 0) {
616 		last = lastbn % factor;
617 		nb = bap[i];
618 		if (nb != 0)
619 			blocksreleased += indirtrunc(ip, nb, last, level - 1);
620 	}
621 	FREE(copy, M_TEMP);
622 	return (blocksreleased);
623 }
624 
625 /*
626  * Remove any inodes in the inode cache belonging to dev.
627  *
628  * There should not be any active ones, return error if any are found
629  * (nb: this is a user error, not a system err).
630  */
631 #ifdef QUOTA
632 iflush(dev, iq)
633 	dev_t dev;
634 	struct inode *iq;
635 #else
636 iflush(dev)
637 	dev_t dev;
638 #endif
639 {
640 	register struct inode *ip;
641 
642 	for (ip = inode; ip < inodeNINODE; ip++) {
643 #ifdef QUOTA
644 		if (ip != iq && ip->i_dev == dev)
645 #else
646 		if (ip->i_dev == dev)
647 #endif
648 			if (ip->i_count)
649 				return (EBUSY);
650 			else {
651 				remque(ip);
652 				ip->i_forw = ip;
653 				ip->i_back = ip;
654 				/*
655 				 * as i_count == 0, the inode was on the free
656 				 * list already, just leave it there, it will
657 				 * fall off the bottom eventually. We could
658 				 * perhaps move it to the head of the free
659 				 * list, but as umounts are done so
660 				 * infrequently, we would gain very little,
661 				 * while making the code bigger.
662 				 */
663 #ifdef QUOTA
664 				dqrele(ip->i_dquot);
665 				ip->i_dquot = NODQUOT;
666 #endif
667 			}
668 	}
669 	return (0);
670 }
671 
672 /*
673  * Lock an inode. If its already locked, set the WANT bit and sleep.
674  */
675 ilock(ip)
676 	register struct inode *ip;
677 {
678 
679 	ILOCK(ip);
680 }
681 
682 /*
683  * Unlock an inode.  If WANT bit is on, wakeup.
684  */
685 iunlock(ip)
686 	register struct inode *ip;
687 {
688 
689 	IUNLOCK(ip);
690 }
691