xref: /original-bsd/sys/ufs/ufs/ufs_inode.c (revision a6d4d8bb)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)ufs_inode.c	7.40.1.1 (Berkeley) 06/03/91
8  */
9 
10 #include "param.h"
11 #include "systm.h"
12 #include "mount.h"
13 #include "proc.h"
14 #include "file.h"
15 #include "buf.h"
16 #include "vnode.h"
17 #include "kernel.h"
18 #include "malloc.h"
19 
20 #include "quota.h"
21 #include "inode.h"
22 #include "fs.h"
23 #include "ufsmount.h"
24 
25 #define	INOHSZ	512
26 #if	((INOHSZ&(INOHSZ-1)) == 0)
27 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
28 #else
29 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
30 #endif
31 
32 union ihead {
33 	union  ihead *ih_head[2];
34 	struct inode *ih_chain[2];
35 } ihead[INOHSZ];
36 
37 int prtactive;	/* 1 => print out reclaim of active vnodes */
38 
39 /*
40  * Initialize hash links for inodes.
41  */
42 ufs_init()
43 {
44 	register int i;
45 	register union ihead *ih = ihead;
46 
47 #ifndef lint
48 	if (VN_MAXPRIVATE < sizeof(struct inode))
49 		panic("ihinit: too small");
50 #endif /* not lint */
51 	for (i = INOHSZ; --i >= 0; ih++) {
52 		ih->ih_head[0] = ih;
53 		ih->ih_head[1] = ih;
54 	}
55 #ifdef QUOTA
56 	dqinit();
57 #endif /* QUOTA */
58 }
59 
60 /*
61  * Look up a UFS dinode number to find its incore vnode.
62  * If it is not in core, read it in from the specified device.
63  * If it is in core, wait for the lock bit to clear, then
64  * return the inode locked. Detection and handling of mount
65  * points must be done by the calling routine.
66  */
67 iget(xp, ino, ipp)
68 	struct inode *xp;
69 	ino_t ino;
70 	struct inode **ipp;
71 {
72 	dev_t dev = xp->i_dev;
73 	struct mount *mntp = ITOV(xp)->v_mount;
74 	register struct fs *fs = VFSTOUFS(mntp)->um_fs;
75 	extern struct vnodeops ufs_vnodeops, spec_inodeops;
76 	register struct inode *ip, *iq;
77 	register struct vnode *vp;
78 	struct vnode *nvp;
79 	struct buf *bp;
80 	struct dinode *dp;
81 	union ihead *ih;
82 	int i, error;
83 
84 	ih = &ihead[INOHASH(dev, ino)];
85 loop:
86 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) {
87 		if (ino != ip->i_number || dev != ip->i_dev)
88 			continue;
89 		if ((ip->i_flag&ILOCKED) != 0) {
90 			ip->i_flag |= IWANT;
91 			sleep((caddr_t)ip, PINOD);
92 			goto loop;
93 		}
94 		if (vget(ITOV(ip)))
95 			goto loop;
96 		*ipp = ip;
97 		return(0);
98 	}
99 	/*
100 	 * Allocate a new inode.
101 	 */
102 	if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) {
103 		*ipp = 0;
104 		return (error);
105 	}
106 	ip = VTOI(nvp);
107 	ip->i_vnode = nvp;
108 	ip->i_flag = 0;
109 	ip->i_devvp = 0;
110 	ip->i_mode = 0;
111 	ip->i_diroff = 0;
112 	ip->i_lockf = 0;
113 #ifdef QUOTA
114 	for (i = 0; i < MAXQUOTAS; i++)
115 		ip->i_dquot[i] = NODQUOT;
116 #endif
117 	/*
118 	 * Put it onto its hash chain and lock it so that other requests for
119 	 * this inode will block if they arrive while we are sleeping waiting
120 	 * for old data structures to be purged or for the contents of the
121 	 * disk portion of this inode to be read.
122 	 */
123 	ip->i_dev = dev;
124 	ip->i_number = ino;
125 	insque(ip, ih);
126 	ILOCK(ip);
127 	/*
128 	 * Read in the disk contents for the inode.
129 	 */
130 	if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)),
131 	    (int)fs->fs_bsize, NOCRED, &bp)) {
132 		/*
133 		 * The inode does not contain anything useful, so it would
134 		 * be misleading to leave it on its hash chain.
135 		 * Iput() will take care of putting it back on the free list.
136 		 */
137 		remque(ip);
138 		ip->i_forw = ip;
139 		ip->i_back = ip;
140 		/*
141 		 * Unlock and discard unneeded inode.
142 		 */
143 		iput(ip);
144 		brelse(bp);
145 		*ipp = 0;
146 		return (error);
147 	}
148 	dp = bp->b_un.b_dino;
149 	dp += itoo(fs, ino);
150 	ip->i_din = *dp;
151 	brelse(bp);
152 	/*
153 	 * Initialize the associated vnode
154 	 */
155 	vp = ITOV(ip);
156 	vp->v_type = IFTOVT(ip->i_mode);
157 	if (vp->v_type == VFIFO) {
158 #ifdef FIFO
159 		extern struct vnodeops fifo_inodeops;
160 		vp->v_op = &fifo_inodeops;
161 #else
162 		iput(ip);
163 		*ipp = 0;
164 		return (EOPNOTSUPP);
165 #endif /* FIFO */
166 	}
167 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
168 		vp->v_op = &spec_inodeops;
169 		if (nvp = checkalias(vp, ip->i_rdev, mntp)) {
170 			/*
171 			 * Reinitialize aliased inode.
172 			 */
173 			vp = nvp;
174 			iq = VTOI(vp);
175 			iq->i_vnode = vp;
176 			iq->i_flag = 0;
177 			ILOCK(iq);
178 			iq->i_din = ip->i_din;
179 			iq->i_dev = dev;
180 			iq->i_number = ino;
181 			insque(iq, ih);
182 			/*
183 			 * Discard unneeded vnode
184 			 */
185 			ip->i_mode = 0;
186 			iput(ip);
187 			ip = iq;
188 		}
189 	}
190 	if (ino == ROOTINO)
191 		vp->v_flag |= VROOT;
192 	/*
193 	 * Finish inode initialization.
194 	 */
195 	ip->i_fs = fs;
196 	ip->i_devvp = VFSTOUFS(mntp)->um_devvp;
197 	VREF(ip->i_devvp);
198 	/*
199 	 * Set up a generation number for this inode if it does not
200 	 * already have one. This should only happen on old filesystems.
201 	 */
202 	if (ip->i_gen == 0) {
203 		if (++nextgennumber < (u_long)time.tv_sec)
204 			nextgennumber = time.tv_sec;
205 		ip->i_gen = nextgennumber;
206 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
207 			ip->i_flag |= IMOD;
208 	}
209 	*ipp = ip;
210 	return (0);
211 }
212 
213 /*
214  * Unlock and decrement the reference count of an inode structure.
215  */
216 iput(ip)
217 	register struct inode *ip;
218 {
219 
220 	if ((ip->i_flag & ILOCKED) == 0)
221 		panic("iput");
222 	IUNLOCK(ip);
223 	vrele(ITOV(ip));
224 }
225 
226 /*
227  * Last reference to an inode, write the inode out and if necessary,
228  * truncate and deallocate the file.
229  */
230 ufs_inactive(vp, p)
231 	struct vnode *vp;
232 	struct proc *p;
233 {
234 	register struct inode *ip = VTOI(vp);
235 	int mode, error = 0;
236 
237 	if (prtactive && vp->v_usecount != 0)
238 		vprint("ufs_inactive: pushing active", vp);
239 	/*
240 	 * Get rid of inodes related to stale file handles.
241 	 */
242 	if (ip->i_mode == 0) {
243 		if ((vp->v_flag & VXLOCK) == 0)
244 			vgone(vp);
245 		return (0);
246 	}
247 	ILOCK(ip);
248 	if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
249 #ifdef QUOTA
250 		if (!getinoquota(ip))
251 			(void) chkiq(ip, -1, NOCRED, 0);
252 #endif
253 		error = itrunc(ip, (u_long)0, 0);
254 		mode = ip->i_mode;
255 		ip->i_mode = 0;
256 		ip->i_rdev = 0;
257 		ip->i_flag |= IUPD|ICHG;
258 		ifree(ip, ip->i_number, mode);
259 	}
260 	IUPDAT(ip, &time, &time, 0);
261 	IUNLOCK(ip);
262 	ip->i_flag = 0;
263 	/*
264 	 * If we are done with the inode, reclaim it
265 	 * so that it can be reused immediately.
266 	 */
267 	if (vp->v_usecount == 0 && ip->i_mode == 0)
268 		vgone(vp);
269 	return (error);
270 }
271 
272 /*
273  * Reclaim an inode so that it can be used for other purposes.
274  */
275 ufs_reclaim(vp)
276 	register struct vnode *vp;
277 {
278 	register struct inode *ip = VTOI(vp);
279 	int i;
280 
281 	if (prtactive && vp->v_usecount != 0)
282 		vprint("ufs_reclaim: pushing active", vp);
283 	/*
284 	 * Remove the inode from its hash chain.
285 	 */
286 	remque(ip);
287 	ip->i_forw = ip;
288 	ip->i_back = ip;
289 	/*
290 	 * Purge old data structures associated with the inode.
291 	 */
292 	cache_purge(vp);
293 	if (ip->i_devvp) {
294 		vrele(ip->i_devvp);
295 		ip->i_devvp = 0;
296 	}
297 #ifdef QUOTA
298 	for (i = 0; i < MAXQUOTAS; i++) {
299 		if (ip->i_dquot[i] != NODQUOT) {
300 			dqrele(vp, ip->i_dquot[i]);
301 			ip->i_dquot[i] = NODQUOT;
302 		}
303 	}
304 #endif
305 	ip->i_flag = 0;
306 	return (0);
307 }
308 
309 /*
310  * Update the access, modified, and inode change times as specified
311  * by the IACC, IMOD, and ICHG flags respectively. The IUPD flag
312  * is used to specify that the inode needs to be updated but that
313  * the times have already been set. The access and modified times
314  * are taken from the second and third parameters; the inode change
315  * time is always taken from the current time. If waitfor is set,
316  * then wait for the disk write of the inode to complete.
317  */
318 iupdat(ip, ta, tm, waitfor)
319 	register struct inode *ip;
320 	struct timeval *ta, *tm;
321 	int waitfor;
322 {
323 	struct buf *bp;
324 	struct vnode *vp = ITOV(ip);
325 	struct dinode *dp;
326 	register struct fs *fs;
327 	int error;
328 
329 	fs = ip->i_fs;
330 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0)
331 		return (0);
332 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
333 		return (0);
334 	error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)),
335 		(int)fs->fs_bsize, NOCRED, &bp);
336 	if (error) {
337 		brelse(bp);
338 		return (error);
339 	}
340 	if (ip->i_flag&IACC)
341 		ip->i_atime = ta->tv_sec;
342 	if (ip->i_flag&IUPD)
343 		ip->i_mtime = tm->tv_sec;
344 	if (ip->i_flag&ICHG)
345 		ip->i_ctime = time.tv_sec;
346 	ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
347 	dp = bp->b_un.b_dino + itoo(fs, ip->i_number);
348 	*dp = ip->i_din;
349 	if (waitfor) {
350 		return (bwrite(bp));
351 	} else {
352 		bdwrite(bp);
353 		return (0);
354 	}
355 }
356 
357 #define	SINGLE	0	/* index of single indirect block */
358 #define	DOUBLE	1	/* index of double indirect block */
359 #define	TRIPLE	2	/* index of triple indirect block */
360 /*
361  * Truncate the inode ip to at most length size.  Free affected disk
362  * blocks -- the blocks of the file are removed in reverse order.
363  *
364  * NB: triple indirect blocks are untested.
365  */
366 itrunc(oip, length, flags)
367 	register struct inode *oip;
368 	u_long length;
369 	int flags;
370 {
371 	register daddr_t lastblock;
372 	daddr_t bn, lbn, lastiblock[NIADDR];
373 	register struct fs *fs;
374 	register struct inode *ip;
375 	struct buf *bp;
376 	int offset, osize, size, level;
377 	long count, nblocks, blocksreleased = 0;
378 	register int i;
379 	int aflags, error, allerror;
380 	struct inode tip;
381 
382 	vnode_pager_setsize(ITOV(oip), length);
383 	if (oip->i_size <= length) {
384 		oip->i_flag |= ICHG|IUPD;
385 		error = iupdat(oip, &time, &time, 1);
386 		return (error);
387 	}
388 	/*
389 	 * Calculate index into inode's block list of
390 	 * last direct and indirect blocks (if any)
391 	 * which we want to keep.  Lastblock is -1 when
392 	 * the file is truncated to 0.
393 	 */
394 	fs = oip->i_fs;
395 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
396 	lastiblock[SINGLE] = lastblock - NDADDR;
397 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
398 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
399 	nblocks = btodb(fs->fs_bsize);
400 	/*
401 	 * Update the size of the file. If the file is not being
402 	 * truncated to a block boundry, the contents of the
403 	 * partial block following the end of the file must be
404 	 * zero'ed in case it ever become accessable again because
405 	 * of subsequent file growth.
406 	 */
407 	osize = oip->i_size;
408 	offset = blkoff(fs, length);
409 	if (offset == 0) {
410 		oip->i_size = length;
411 	} else {
412 		lbn = lblkno(fs, length);
413 		aflags = B_CLRBUF;
414 		if (flags & IO_SYNC)
415 			aflags |= B_SYNC;
416 #ifdef QUOTA
417 		if (error = getinoquota(oip))
418 			return (error);
419 #endif
420 		if (error = balloc(oip, lbn, offset, &bp, aflags))
421 			return (error);
422 		oip->i_size = length;
423 		size = blksize(fs, oip, lbn);
424 		(void) vnode_pager_uncache(ITOV(oip));
425 		bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset));
426 		allocbuf(bp, size);
427 		if (flags & IO_SYNC)
428 			bwrite(bp);
429 		else
430 			bdwrite(bp);
431 	}
432 	/*
433 	 * Update file and block pointers
434 	 * on disk before we start freeing blocks.
435 	 * If we crash before free'ing blocks below,
436 	 * the blocks will be returned to the free list.
437 	 * lastiblock values are also normalized to -1
438 	 * for calls to indirtrunc below.
439 	 */
440 	tip = *oip;
441 	tip.i_size = osize;
442 	for (level = TRIPLE; level >= SINGLE; level--)
443 		if (lastiblock[level] < 0) {
444 			oip->i_ib[level] = 0;
445 			lastiblock[level] = -1;
446 		}
447 	for (i = NDADDR - 1; i > lastblock; i--)
448 		oip->i_db[i] = 0;
449 	oip->i_flag |= ICHG|IUPD;
450 	vinvalbuf(ITOV(oip), (length > 0));
451 	allerror = iupdat(oip, &time, &time, MNT_WAIT);
452 
453 	/*
454 	 * Indirect blocks first.
455 	 */
456 	ip = &tip;
457 	for (level = TRIPLE; level >= SINGLE; level--) {
458 		bn = ip->i_ib[level];
459 		if (bn != 0) {
460 			error = indirtrunc(ip, bn, lastiblock[level], level,
461 				&count);
462 			if (error)
463 				allerror = error;
464 			blocksreleased += count;
465 			if (lastiblock[level] < 0) {
466 				ip->i_ib[level] = 0;
467 				blkfree(ip, bn, (off_t)fs->fs_bsize);
468 				blocksreleased += nblocks;
469 			}
470 		}
471 		if (lastiblock[level] >= 0)
472 			goto done;
473 	}
474 
475 	/*
476 	 * All whole direct blocks or frags.
477 	 */
478 	for (i = NDADDR - 1; i > lastblock; i--) {
479 		register off_t bsize;
480 
481 		bn = ip->i_db[i];
482 		if (bn == 0)
483 			continue;
484 		ip->i_db[i] = 0;
485 		bsize = (off_t)blksize(fs, ip, i);
486 		blkfree(ip, bn, bsize);
487 		blocksreleased += btodb(bsize);
488 	}
489 	if (lastblock < 0)
490 		goto done;
491 
492 	/*
493 	 * Finally, look for a change in size of the
494 	 * last direct block; release any frags.
495 	 */
496 	bn = ip->i_db[lastblock];
497 	if (bn != 0) {
498 		off_t oldspace, newspace;
499 
500 		/*
501 		 * Calculate amount of space we're giving
502 		 * back as old block size minus new block size.
503 		 */
504 		oldspace = blksize(fs, ip, lastblock);
505 		ip->i_size = length;
506 		newspace = blksize(fs, ip, lastblock);
507 		if (newspace == 0)
508 			panic("itrunc: newspace");
509 		if (oldspace - newspace > 0) {
510 			/*
511 			 * Block number of space to be free'd is
512 			 * the old block # plus the number of frags
513 			 * required for the storage we're keeping.
514 			 */
515 			bn += numfrags(fs, newspace);
516 			blkfree(ip, bn, oldspace - newspace);
517 			blocksreleased += btodb(oldspace - newspace);
518 		}
519 	}
520 done:
521 /* BEGIN PARANOIA */
522 	for (level = SINGLE; level <= TRIPLE; level++)
523 		if (ip->i_ib[level] != oip->i_ib[level])
524 			panic("itrunc1");
525 	for (i = 0; i < NDADDR; i++)
526 		if (ip->i_db[i] != oip->i_db[i])
527 			panic("itrunc2");
528 /* END PARANOIA */
529 	oip->i_blocks -= blocksreleased;
530 	if (oip->i_blocks < 0)			/* sanity */
531 		oip->i_blocks = 0;
532 	oip->i_flag |= ICHG;
533 #ifdef QUOTA
534 	if (!getinoquota(oip))
535 		(void) chkdq(oip, -blocksreleased, NOCRED, 0);
536 #endif
537 	return (allerror);
538 }
539 
540 /*
541  * Release blocks associated with the inode ip and
542  * stored in the indirect block bn.  Blocks are free'd
543  * in LIFO order up to (but not including) lastbn.  If
544  * level is greater than SINGLE, the block is an indirect
545  * block and recursive calls to indirtrunc must be used to
546  * cleanse other indirect blocks.
547  *
548  * NB: triple indirect blocks are untested.
549  */
550 indirtrunc(ip, bn, lastbn, level, countp)
551 	register struct inode *ip;
552 	daddr_t bn, lastbn;
553 	int level;
554 	long *countp;
555 {
556 	register int i;
557 	struct buf *bp;
558 	register struct fs *fs = ip->i_fs;
559 	register daddr_t *bap;
560 	daddr_t *copy, nb, last;
561 	long blkcount, factor;
562 	int nblocks, blocksreleased = 0;
563 	int error, allerror = 0;
564 
565 	/*
566 	 * Calculate index in current block of last
567 	 * block to be kept.  -1 indicates the entire
568 	 * block so we need not calculate the index.
569 	 */
570 	factor = 1;
571 	for (i = SINGLE; i < level; i++)
572 		factor *= NINDIR(fs);
573 	last = lastbn;
574 	if (lastbn > 0)
575 		last /= factor;
576 	nblocks = btodb(fs->fs_bsize);
577 	/*
578 	 * Get buffer of block pointers, zero those
579 	 * entries corresponding to blocks to be free'd,
580 	 * and update on disk copy first.
581 	 */
582 	error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize,
583 		NOCRED, &bp);
584 	if (error) {
585 		brelse(bp);
586 		*countp = 0;
587 		return (error);
588 	}
589 	bap = bp->b_un.b_daddr;
590 	MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
591 	bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
592 	bzero((caddr_t)&bap[last + 1],
593 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
594 	if (last == -1)
595 		bp->b_flags |= B_INVAL;
596 	error = bwrite(bp);
597 	if (error)
598 		allerror = error;
599 	bap = copy;
600 
601 	/*
602 	 * Recursively free totally unused blocks.
603 	 */
604 	for (i = NINDIR(fs) - 1; i > last; i--) {
605 		nb = bap[i];
606 		if (nb == 0)
607 			continue;
608 		if (level > SINGLE) {
609 			error = indirtrunc(ip, nb, (daddr_t)-1, level - 1,
610 				&blkcount);
611 			if (error)
612 				allerror = error;
613 			blocksreleased += blkcount;
614 		}
615 		blkfree(ip, nb, (off_t)fs->fs_bsize);
616 		blocksreleased += nblocks;
617 	}
618 
619 	/*
620 	 * Recursively free last partial block.
621 	 */
622 	if (level > SINGLE && lastbn >= 0) {
623 		last = lastbn % factor;
624 		nb = bap[i];
625 		if (nb != 0) {
626 			error = indirtrunc(ip, nb, last, level - 1, &blkcount);
627 			if (error)
628 				allerror = error;
629 			blocksreleased += blkcount;
630 		}
631 	}
632 	FREE(copy, M_TEMP);
633 	*countp = blocksreleased;
634 	return (allerror);
635 }
636 
637 /*
638  * Lock an inode. If its already locked, set the WANT bit and sleep.
639  */
640 ilock(ip)
641 	register struct inode *ip;
642 {
643 
644 	while (ip->i_flag & ILOCKED) {
645 		ip->i_flag |= IWANT;
646 		if (ip->i_spare0 == curproc->p_pid)
647 			panic("locking against myself");
648 		ip->i_spare1 = curproc->p_pid;
649 		(void) sleep((caddr_t)ip, PINOD);
650 	}
651 	ip->i_spare1 = 0;
652 	ip->i_spare0 = curproc->p_pid;
653 	ip->i_flag |= ILOCKED;
654 	curproc->p_spare[2]++;
655 }
656 
657 /*
658  * Unlock an inode.  If WANT bit is on, wakeup.
659  */
660 iunlock(ip)
661 	register struct inode *ip;
662 {
663 
664 	if ((ip->i_flag & ILOCKED) == 0)
665 		vprint("iunlock: unlocked inode", ITOV(ip));
666 	ip->i_spare0 = 0;
667 	ip->i_flag &= ~ILOCKED;
668 	curproc->p_spare[2]--;
669 	if (ip->i_flag&IWANT) {
670 		ip->i_flag &= ~IWANT;
671 		wakeup((caddr_t)ip);
672 	}
673 }
674