xref: /original-bsd/sys/ufs/ufs/ufs_inode.c (revision 3109f15a)
1 /*	ufs_inode.c	6.15	85/03/20	*/
2 
3 #include "param.h"
4 #include "systm.h"
5 #include "mount.h"
6 #include "dir.h"
7 #include "user.h"
8 #include "inode.h"
9 #include "fs.h"
10 #include "buf.h"
11 #ifdef QUOTA
12 #include "quota.h"
13 #endif
14 #include "kernel.h"
15 
16 #define	INOHSZ	512
17 #if	((INOHSZ&(INOHSZ-1)) == 0)
18 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
19 #else
20 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
21 #endif
22 
23 union ihead {				/* inode LRU cache, Chris Maltby */
24 	union  ihead *ih_head[2];
25 	struct inode *ih_chain[2];
26 } ihead[INOHSZ];
27 
28 struct inode *ifreeh, **ifreet;
29 
30 /*
31  * Initialize hash links for inodes
32  * and build inode free list.
33  */
34 ihinit()
35 {
36 	register int i;
37 	register struct inode *ip = inode;
38 	register union  ihead *ih = ihead;
39 
40 	for (i = INOHSZ; --i >= 0; ih++) {
41 		ih->ih_head[0] = ih;
42 		ih->ih_head[1] = ih;
43 	}
44 	ifreeh = ip;
45 	ifreet = &ip->i_freef;
46 	ip->i_freeb = &ifreeh;
47 	ip->i_forw = ip;
48 	ip->i_back = ip;
49 	for (i = ninode; --i > 0; ) {
50 		++ip;
51 		ip->i_forw = ip;
52 		ip->i_back = ip;
53 		*ifreet = ip;
54 		ip->i_freeb = ifreet;
55 		ifreet = &ip->i_freef;
56 	}
57 	ip->i_freef = NULL;
58 }
59 
60 #ifdef notdef
61 /*
62  * Find an inode if it is incore.
63  * This is the equivalent, for inodes,
64  * of ``incore'' in bio.c or ``pfind'' in subr.c.
65  */
66 struct inode *
67 ifind(dev, ino)
68 	dev_t dev;
69 	ino_t ino;
70 {
71 	register struct inode *ip;
72 	register union  ihead *ih;
73 
74 	ih = &ihead[INOHASH(dev, ino)];
75 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
76 		if (ino==ip->i_number && dev==ip->i_dev)
77 			return (ip);
78 	return ((struct inode *)0);
79 }
80 #endif notdef
81 
82 /*
83  * Look up an inode by device,inumber.
84  * If it is in core (in the inode structure),
85  * honor the locking protocol.
86  * If it is not in core, read it in from the
87  * specified device.
88  * If the inode is mounted on, perform
89  * the indicated indirection.
90  * In all cases, a pointer to a locked
91  * inode structure is returned.
92  *
93  * panic: no imt -- if the mounted file
94  *	system is not in the mount table.
95  *	"cannot happen"
96  */
97 struct inode *
98 iget(dev, fs, ino)
99 	dev_t dev;
100 	register struct fs *fs;
101 	ino_t ino;
102 {
103 	register struct inode *ip;
104 	register union  ihead *ih;
105 	register struct mount *mp;
106 	register struct buf *bp;
107 	register struct dinode *dp;
108 	register struct inode *iq;
109 
110 
111 loop:
112 	ih = &ihead[INOHASH(dev, ino)];
113 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw)
114 		if (ino == ip->i_number && dev == ip->i_dev) {
115 			/*
116 			 * Following is essentially an inline expanded
117 			 * copy of igrab(), expanded inline for speed,
118 			 * and so that the test for a mounted on inode
119 			 * can be deferred until after we are sure that
120 			 * the inode isn't busy.
121 			 */
122 			if ((ip->i_flag&ILOCKED) != 0) {
123 				ip->i_flag |= IWANT;
124 				sleep((caddr_t)ip, PINOD);
125 				goto loop;
126 			}
127 			if ((ip->i_flag&IMOUNT) != 0) {
128 				for (mp = &mount[0]; mp < &mount[NMOUNT]; mp++)
129 					if(mp->m_inodp == ip) {
130 						dev = mp->m_dev;
131 						fs = mp->m_bufp->b_un.b_fs;
132 						ino = ROOTINO;
133 						goto loop;
134 					}
135 				panic("no imt");
136 			}
137 			if (ip->i_count == 0) {		/* ino on free list */
138 				if (iq = ip->i_freef)
139 					iq->i_freeb = ip->i_freeb;
140 				else
141 					ifreet = ip->i_freeb;
142 				*ip->i_freeb = iq;
143 				ip->i_freef = NULL;
144 				ip->i_freeb = NULL;
145 			}
146 			ip->i_count++;
147 			ip->i_flag |= ILOCKED;
148 			return(ip);
149 		}
150 
151 	if ((ip = ifreeh) == NULL) {
152 		tablefull("inode");
153 		u.u_error = ENFILE;
154 		return(NULL);
155 	}
156 	if (ip->i_count)
157 		panic("free inode isn't");
158 	if (iq = ip->i_freef)
159 		iq->i_freeb = &ifreeh;
160 	ifreeh = iq;
161 	ip->i_freef = NULL;
162 	ip->i_freeb = NULL;
163 	/*
164 	 * Now to take inode off the hash chain it was on
165 	 * (initially, or after an iflush, it is on a "hash chain"
166 	 * consisting entirely of itself, and pointed to by no-one,
167 	 * but that doesn't matter), and put it on the chain for
168 	 * its new (ino, dev) pair
169 	 */
170 	remque(ip);
171 	insque(ip, ih);
172 	ip->i_dev = dev;
173 	ip->i_fs = fs;
174 	ip->i_number = ino;
175 	cacheinval(ip);
176 	ip->i_flag = ILOCKED;
177 	ip->i_count++;
178 	ip->i_lastr = 0;
179 #ifdef QUOTA
180 	dqrele(ip->i_dquot);
181 #endif
182 	bp = bread(dev, fsbtodb(fs, itod(fs, ino)), (int)fs->fs_bsize);
183 	/*
184 	 * Check I/O errors
185 	 */
186 	if ((bp->b_flags&B_ERROR) != 0) {
187 		brelse(bp);
188 		/*
189 		 * the inode doesn't contain anything useful, so it would
190 		 * be misleading to leave it on its hash chain.
191 		 * 'iput' will take care of putting it back on the free list.
192 		 */
193 		remque(ip);
194 		ip->i_forw = ip;
195 		ip->i_back = ip;
196 		/*
197 		 * we also loose its inumber, just in case (as iput
198 		 * doesn't do that any more) - but as it isn't on its
199 		 * hash chain, I doubt if this is really necessary .. kre
200 		 * (probably the two methods are interchangable)
201 		 */
202 		ip->i_number = 0;
203 #ifdef QUOTA
204 		ip->i_dquot = NODQUOT;
205 #endif
206 		iput(ip);
207 		return(NULL);
208 	}
209 	dp = bp->b_un.b_dino;
210 	dp += itoo(fs, ino);
211 	ip->i_ic = dp->di_ic;
212 	brelse(bp);
213 #ifdef QUOTA
214 	if (ip->i_mode == 0)
215 		ip->i_dquot = NODQUOT;
216 	else
217 		ip->i_dquot = inoquota(ip);
218 #endif
219 	return (ip);
220 }
221 
222 /*
223  * Convert a pointer to an inode into a reference to an inode.
224  *
225  * This is basically the internal piece of iget (after the
226  * inode pointer is located) but without the test for mounted
227  * filesystems.  It is caller's responsibility to check that
228  * the inode pointer is valid.
229  */
230 igrab(ip)
231 	register struct inode *ip;
232 {
233 	while ((ip->i_flag&ILOCKED) != 0) {
234 		ip->i_flag |= IWANT;
235 		sleep((caddr_t)ip, PINOD);
236 	}
237 	if (ip->i_count == 0) {		/* ino on free list */
238 		register struct inode *iq;
239 
240 		if (iq = ip->i_freef)
241 			iq->i_freeb = ip->i_freeb;
242 		else
243 			ifreet = ip->i_freeb;
244 		*ip->i_freeb = iq;
245 		ip->i_freef = NULL;
246 		ip->i_freeb = NULL;
247 	}
248 	ip->i_count++;
249 	ip->i_flag |= ILOCKED;
250 }
251 
252 /*
253  * Decrement reference count of
254  * an inode structure.
255  * On the last reference,
256  * write the inode out and if necessary,
257  * truncate and deallocate the file.
258  */
259 iput(ip)
260 	register struct inode *ip;
261 {
262 
263 	if ((ip->i_flag & ILOCKED) == 0)
264 		panic("iput");
265 	IUNLOCK(ip);
266 	irele(ip);
267 }
268 
269 irele(ip)
270 	register struct inode *ip;
271 {
272 	int mode;
273 
274 	if (ip->i_count == 1) {
275 		ip->i_flag |= ILOCKED;
276 		if (ip->i_nlink <= 0 && ip->i_fs->fs_ronly == 0) {
277 			itrunc(ip, (u_long)0);
278 			mode = ip->i_mode;
279 			ip->i_mode = 0;
280 			ip->i_rdev = 0;
281 			ip->i_flag |= IUPD|ICHG;
282 			ifree(ip, ip->i_number, mode);
283 #ifdef QUOTA
284 			(void) chkiq(ip->i_dev, ip, ip->i_uid, 0);
285 			dqrele(ip->i_dquot);
286 			ip->i_dquot = NODQUOT;
287 #endif
288 		}
289 		IUPDAT(ip, &time, &time, 0);
290 		IUNLOCK(ip);
291 		ip->i_flag = 0;
292 		/*
293 		 * Put the inode on the end of the free list.
294 		 * Possibly in some cases it would be better to
295 		 * put the inode at the head of the free list,
296 		 * (eg: where i_mode == 0 || i_number == 0)
297 		 * but I will think about that later .. kre
298 		 * (i_number is rarely 0 - only after an i/o error in iget,
299 		 * where i_mode == 0, the inode will probably be wanted
300 		 * again soon for an ialloc, so possibly we should keep it)
301 		 */
302 		if (ifreeh) {
303 			*ifreet = ip;
304 			ip->i_freeb = ifreet;
305 		} else {
306 			ifreeh = ip;
307 			ip->i_freeb = &ifreeh;
308 		}
309 		ip->i_freef = NULL;
310 		ifreet = &ip->i_freef;
311 	} else if (!(ip->i_flag & ILOCKED))
312 		ITIMES(ip, &time, &time);
313 	ip->i_count--;
314 }
315 
316 /*
317  * Check accessed and update flags on
318  * an inode structure.
319  * If any is on, update the inode
320  * with the current time.
321  * If waitfor is given, then must insure
322  * i/o order so wait for write to complete.
323  */
324 iupdat(ip, ta, tm, waitfor)
325 	register struct inode *ip;
326 	struct timeval *ta, *tm;
327 	int waitfor;
328 {
329 	register struct buf *bp;
330 	struct dinode *dp;
331 	register struct fs *fp;
332 
333 	fp = ip->i_fs;
334 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) != 0) {
335 		if (fp->fs_ronly)
336 			return;
337 		bp = bread(ip->i_dev, fsbtodb(fp, itod(fp, ip->i_number)),
338 			(int)fp->fs_bsize);
339 		if (bp->b_flags & B_ERROR) {
340 			brelse(bp);
341 			return;
342 		}
343 		if (ip->i_flag&IACC)
344 			ip->i_atime = ta->tv_sec;
345 		if (ip->i_flag&IUPD)
346 			ip->i_mtime = tm->tv_sec;
347 		if (ip->i_flag&ICHG)
348 			ip->i_ctime = time.tv_sec;
349 		ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
350 		dp = bp->b_un.b_dino + itoo(fp, ip->i_number);
351 		dp->di_ic = ip->i_ic;
352 		if (waitfor)
353 			bwrite(bp);
354 		else
355 			bdwrite(bp);
356 	}
357 }
358 
359 #define	SINGLE	0	/* index of single indirect block */
360 #define	DOUBLE	1	/* index of double indirect block */
361 #define	TRIPLE	2	/* index of triple indirect block */
362 /*
363  * Truncate the inode ip to at most
364  * length size.  Free affected disk
365  * blocks -- the blocks of the file
366  * are removed in reverse order.
367  *
368  * NB: triple indirect blocks are untested.
369  */
370 itrunc(oip, length)
371 	register struct inode *oip;
372 	u_long length;
373 {
374 	register daddr_t lastblock;
375 	daddr_t bn, lastiblock[NIADDR];
376 	register struct fs *fs;
377 	register struct inode *ip;
378 	struct buf *bp;
379 	int offset, lbn, osize, size, error, count, level, s;
380 	long nblocks, blocksreleased = 0;
381 	register int i;
382 	dev_t dev;
383 	struct inode tip;
384 	extern long indirtrunc();
385 	extern struct cmap *mfind();
386 
387 	if (oip->i_size <= length) {
388 		oip->i_flag |= ICHG|IUPD;
389 		iupdat(oip, &time, &time, 1);
390 		return;
391 	}
392 	/*
393 	 * Calculate index into inode's block list of
394 	 * last direct and indirect blocks (if any)
395 	 * which we want to keep.  Lastblock is -1 when
396 	 * the file is truncated to 0.
397 	 */
398 	fs = oip->i_fs;
399 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
400 	lastiblock[SINGLE] = lastblock - NDADDR;
401 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
402 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
403 	nblocks = btodb(fs->fs_bsize);
404 	/*
405 	 * Update the size of the file. If the file is not being
406 	 * truncated to a block boundry, the contents of the
407 	 * partial block following the end of the file must be
408 	 * zero'ed in case it ever become accessable again because
409 	 * of subsequent file growth.
410 	 */
411 	osize = oip->i_size;
412 	offset = blkoff(fs, length);
413 	if (offset == 0) {
414 		oip->i_size = length;
415 	} else {
416 		lbn = lblkno(fs, length);
417 		bn = fsbtodb(fs, bmap(oip, lbn, B_WRITE, offset));
418 		if (u.u_error || (long)bn < 0)
419 			return;
420 		oip->i_size = length;
421 		size = blksize(fs, oip, lbn);
422 		count = howmany(size, DEV_BSIZE);
423 		dev = oip->i_dev;
424 		s = splimp();
425 		for (i = 0; i < count; i += CLSIZE)
426 			if (mfind(dev, bn + i))
427 				munhash(dev, bn + i);
428 		splx(s);
429 		bp = bread(dev, bn, size);
430 		if (bp->b_flags & B_ERROR) {
431 			u.u_error = EIO;
432 			oip->i_size = osize;
433 			brelse(bp);
434 			return;
435 		}
436 		bzero(bp->b_un.b_addr + offset, size - offset);
437 		bdwrite(bp);
438 	}
439 	/*
440 	 * Update file and block pointers
441 	 * on disk before we start freeing blocks.
442 	 * If we crash before free'ing blocks below,
443 	 * the blocks will be returned to the free list.
444 	 * lastiblock values are also normalized to -1
445 	 * for calls to indirtrunc below.
446 	 */
447 	tip = *oip;
448 	tip.i_size = osize;
449 	for (level = TRIPLE; level >= SINGLE; level--)
450 		if (lastiblock[level] < 0) {
451 			oip->i_ib[level] = 0;
452 			lastiblock[level] = -1;
453 		}
454 	for (i = NDADDR - 1; i > lastblock; i--)
455 		oip->i_db[i] = 0;
456 	oip->i_flag |= ICHG|IUPD;
457 	syncip(oip);
458 
459 	/*
460 	 * Indirect blocks first.
461 	 */
462 	ip = &tip;
463 	for (level = TRIPLE; level >= SINGLE; level--) {
464 		bn = ip->i_ib[level];
465 		if (bn != 0) {
466 			blocksreleased +=
467 			    indirtrunc(ip, bn, lastiblock[level], level);
468 			if (lastiblock[level] < 0) {
469 				ip->i_ib[level] = 0;
470 				free(ip, bn, (off_t)fs->fs_bsize);
471 				blocksreleased += nblocks;
472 			}
473 		}
474 		if (lastiblock[level] >= 0)
475 			goto done;
476 	}
477 
478 	/*
479 	 * All whole direct blocks or frags.
480 	 */
481 	for (i = NDADDR - 1; i > lastblock; i--) {
482 		register int size;
483 
484 		bn = ip->i_db[i];
485 		if (bn == 0)
486 			continue;
487 		ip->i_db[i] = 0;
488 		size = (off_t)blksize(fs, ip, i);
489 		free(ip, bn, size);
490 		blocksreleased += btodb(size);
491 	}
492 	if (lastblock < 0)
493 		goto done;
494 
495 	/*
496 	 * Finally, look for a change in size of the
497 	 * last direct block; release any frags.
498 	 */
499 	bn = ip->i_db[lastblock];
500 	if (bn != 0) {
501 		int oldspace, newspace;
502 
503 		/*
504 		 * Calculate amount of space we're giving
505 		 * back as old block size minus new block size.
506 		 */
507 		oldspace = blksize(fs, ip, lastblock);
508 		ip->i_size = length;
509 		newspace = blksize(fs, ip, lastblock);
510 		if (newspace == 0)
511 			panic("itrunc: newspace");
512 		if (oldspace - newspace > 0) {
513 			/*
514 			 * Block number of space to be free'd is
515 			 * the old block # plus the number of frags
516 			 * required for the storage we're keeping.
517 			 */
518 			bn += numfrags(fs, newspace);
519 			free(ip, bn, oldspace - newspace);
520 			blocksreleased += btodb(oldspace - newspace);
521 		}
522 	}
523 done:
524 /* BEGIN PARANOIA */
525 	for (level = SINGLE; level <= TRIPLE; level++)
526 		if (ip->i_ib[level] != oip->i_ib[level])
527 			panic("itrunc1");
528 	for (i = 0; i < NDADDR; i++)
529 		if (ip->i_db[i] != oip->i_db[i])
530 			panic("itrunc2");
531 /* END PARANOIA */
532 	oip->i_blocks -= blocksreleased;
533 	if (oip->i_blocks < 0)			/* sanity */
534 		oip->i_blocks = 0;
535 	oip->i_flag |= ICHG;
536 #ifdef QUOTA
537 	(void) chkdq(oip, -blocksreleased, 0);
538 #endif
539 }
540 
541 /*
542  * Release blocks associated with the inode ip and
543  * stored in the indirect block bn.  Blocks are free'd
544  * in LIFO order up to (but not including) lastbn.  If
545  * level is greater than SINGLE, the block is an indirect
546  * block and recursive calls to indirtrunc must be used to
547  * cleanse other indirect blocks.
548  *
549  * NB: triple indirect blocks are untested.
550  */
551 long
552 indirtrunc(ip, bn, lastbn, level)
553 	register struct inode *ip;
554 	daddr_t bn, lastbn;
555 	int level;
556 {
557 	register int i;
558 	struct buf *bp, *copy;
559 	register daddr_t *bap;
560 	register struct fs *fs = ip->i_fs;
561 	daddr_t nb, last;
562 	long factor;
563 	int blocksreleased = 0, nblocks;
564 
565 	/*
566 	 * Calculate index in current block of last
567 	 * block to be kept.  -1 indicates the entire
568 	 * block so we need not calculate the index.
569 	 */
570 	factor = 1;
571 	for (i = SINGLE; i < level; i++)
572 		factor *= NINDIR(fs);
573 	last = lastbn;
574 	if (lastbn > 0)
575 		last /= factor;
576 	nblocks = btodb(fs->fs_bsize);
577 	/*
578 	 * Get buffer of block pointers, zero those
579 	 * entries corresponding to blocks to be free'd,
580 	 * and update on disk copy first.
581 	 */
582 	copy = geteblk((int)fs->fs_bsize);
583 	bp = bread(ip->i_dev, fsbtodb(fs, bn), (int)fs->fs_bsize);
584 	if (bp->b_flags&B_ERROR) {
585 		brelse(copy);
586 		brelse(bp);
587 		return (0);
588 	}
589 	bap = bp->b_un.b_daddr;
590 	bcopy((caddr_t)bap, (caddr_t)copy->b_un.b_daddr, (u_int)fs->fs_bsize);
591 	bzero((caddr_t)&bap[last + 1],
592 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
593 	bwrite(bp);
594 	bp = copy, bap = bp->b_un.b_daddr;
595 
596 	/*
597 	 * Recursively free totally unused blocks.
598 	 */
599 	for (i = NINDIR(fs) - 1; i > last; i--) {
600 		nb = bap[i];
601 		if (nb == 0)
602 			continue;
603 		if (level > SINGLE)
604 			blocksreleased +=
605 			    indirtrunc(ip, nb, (daddr_t)-1, level - 1);
606 		free(ip, nb, (int)fs->fs_bsize);
607 		blocksreleased += nblocks;
608 	}
609 
610 	/*
611 	 * Recursively free last partial block.
612 	 */
613 	if (level > SINGLE && lastbn >= 0) {
614 		last = lastbn % factor;
615 		nb = bap[i];
616 		if (nb != 0)
617 			blocksreleased += indirtrunc(ip, nb, last, level - 1);
618 	}
619 	brelse(bp);
620 	return (blocksreleased);
621 }
622 
623 /*
624  * remove any inodes in the inode cache belonging to dev
625  *
626  * There should not be any active ones, return error if any are found
627  * (nb: this is a user error, not a system err)
628  *
629  * Also, count the references to dev by block devices - this really
630  * has nothing to do with the object of the procedure, but as we have
631  * to scan the inode table here anyway, we might as well get the
632  * extra benefit.
633  *
634  * this is called from sumount()/sys3.c when dev is being unmounted
635  */
636 #ifdef QUOTA
637 iflush(dev, iq)
638 	dev_t dev;
639 	struct inode *iq;
640 #else
641 iflush(dev)
642 	dev_t dev;
643 #endif
644 {
645 	register struct inode *ip;
646 	register open = 0;
647 
648 	for (ip = inode; ip < inodeNINODE; ip++) {
649 #ifdef QUOTA
650 		if (ip != iq && ip->i_dev == dev)
651 #else
652 		if (ip->i_dev == dev)
653 #endif
654 			if (ip->i_count)
655 				return(-1);
656 			else {
657 				remque(ip);
658 				ip->i_forw = ip;
659 				ip->i_back = ip;
660 				/*
661 				 * as i_count == 0, the inode was on the free
662 				 * list already, just leave it there, it will
663 				 * fall off the bottom eventually. We could
664 				 * perhaps move it to the head of the free
665 				 * list, but as umounts are done so
666 				 * infrequently, we would gain very little,
667 				 * while making the code bigger.
668 				 */
669 #ifdef QUOTA
670 				dqrele(ip->i_dquot);
671 				ip->i_dquot = NODQUOT;
672 #endif
673 			}
674 		else if (ip->i_count && (ip->i_mode&IFMT)==IFBLK &&
675 		    ip->i_rdev == dev)
676 			open++;
677 	}
678 	return (open);
679 }
680 
681 /*
682  * Lock an inode. If its already locked, set the WANT bit and sleep.
683  */
684 ilock(ip)
685 	register struct inode *ip;
686 {
687 
688 	ILOCK(ip);
689 }
690 
691 /*
692  * Unlock an inode.  If WANT bit is on, wakeup.
693  */
694 iunlock(ip)
695 	register struct inode *ip;
696 {
697 
698 	IUNLOCK(ip);
699 }
700