xref: /original-bsd/sys/ufs/lfs/lfs_inode.c (revision 210ce081)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)lfs_inode.c	7.35 (Berkeley) 08/24/90
8  */
9 
10 #include "param.h"
11 #include "systm.h"
12 #include "mount.h"
13 #include "user.h"
14 #include "proc.h"
15 #include "file.h"
16 #include "buf.h"
17 #include "cmap.h"
18 #include "vnode.h"
19 #include "../ufs/quota.h"
20 #include "../ufs/inode.h"
21 #include "../ufs/fs.h"
22 #include "../ufs/ufsmount.h"
23 #include "kernel.h"
24 #include "malloc.h"
25 
26 #define	INOHSZ	512
27 #if	((INOHSZ&(INOHSZ-1)) == 0)
28 #define	INOHASH(dev,ino)	(((dev)+(ino))&(INOHSZ-1))
29 #else
30 #define	INOHASH(dev,ino)	(((unsigned)((dev)+(ino)))%INOHSZ)
31 #endif
32 
33 union ihead {
34 	union  ihead *ih_head[2];
35 	struct inode *ih_chain[2];
36 } ihead[INOHSZ];
37 
38 int prtactive;	/* 1 => print out reclaim of active vnodes */
39 
40 /*
41  * Initialize hash links for inodes.
42  */
43 ufs_init()
44 {
45 	register int i;
46 	register union ihead *ih = ihead;
47 
48 #ifndef lint
49 	if (VN_MAXPRIVATE < sizeof(struct inode))
50 		panic("ihinit: too small");
51 #endif /* not lint */
52 	for (i = INOHSZ; --i >= 0; ih++) {
53 		ih->ih_head[0] = ih;
54 		ih->ih_head[1] = ih;
55 	}
56 #ifdef QUOTA
57 	dqinit();
58 #endif /* QUOTA */
59 }
60 
61 /*
62  * Look up an vnode/inode by device,inumber.
63  * If it is in core (in the inode structure),
64  * honor the locking protocol.
65  * If it is not in core, read it in from the
66  * specified device.
67  * Callers must check for mount points!!
68  * In all cases, a pointer to a locked
69  * inode structure is returned.
70  */
71 iget(xp, ino, ipp)
72 	struct inode *xp;
73 	ino_t ino;
74 	struct inode **ipp;
75 {
76 	dev_t dev = xp->i_dev;
77 	struct mount *mntp = ITOV(xp)->v_mount;
78 	register struct fs *fs = VFSTOUFS(mntp)->um_fs;
79 	extern struct vnodeops ufs_vnodeops, spec_inodeops;
80 	register struct inode *ip, *iq;
81 	register struct vnode *vp;
82 	struct vnode *nvp;
83 	struct buf *bp;
84 	struct dinode *dp;
85 	union ihead *ih;
86 	int i, error;
87 
88 	ih = &ihead[INOHASH(dev, ino)];
89 loop:
90 	for (ip = ih->ih_chain[0]; ip != (struct inode *)ih; ip = ip->i_forw) {
91 		if (ino != ip->i_number || dev != ip->i_dev)
92 			continue;
93 		if ((ip->i_flag&ILOCKED) != 0) {
94 			ip->i_flag |= IWANT;
95 			sleep((caddr_t)ip, PINOD);
96 			goto loop;
97 		}
98 		if (vget(ITOV(ip)))
99 			goto loop;
100 		*ipp = ip;
101 		return(0);
102 	}
103 	/*
104 	 * Allocate a new inode.
105 	 */
106 	if (error = getnewvnode(VT_UFS, mntp, &ufs_vnodeops, &nvp)) {
107 		*ipp = 0;
108 		return (error);
109 	}
110 	ip = VTOI(nvp);
111 	ip->i_vnode = nvp;
112 	ip->i_flag = 0;
113 	ip->i_devvp = 0;
114 	ip->i_mode = 0;
115 	ip->i_diroff = 0;
116 #ifdef QUOTA
117 	for (i = 0; i < MAXQUOTAS; i++)
118 		ip->i_dquot[i] = NODQUOT;
119 #endif
120 	/*
121 	 * Put it onto its hash chain and lock it so that other requests for
122 	 * this inode will block if they arrive while we are sleeping waiting
123 	 * for old data structures to be purged or for the contents of the
124 	 * disk portion of this inode to be read.
125 	 */
126 	ip->i_dev = dev;
127 	ip->i_number = ino;
128 	insque(ip, ih);
129 	ILOCK(ip);
130 	/*
131 	 * Read in the disk contents for the inode.
132 	 */
133 	if (error = bread(VFSTOUFS(mntp)->um_devvp, fsbtodb(fs, itod(fs, ino)),
134 	    (int)fs->fs_bsize, NOCRED, &bp)) {
135 		/*
136 		 * The inode does not contain anything useful, so it would
137 		 * be misleading to leave it on its hash chain.
138 		 * Iput() will take care of putting it back on the free list.
139 		 */
140 		remque(ip);
141 		ip->i_forw = ip;
142 		ip->i_back = ip;
143 		/*
144 		 * Unlock and discard unneeded inode.
145 		 */
146 		iput(ip);
147 		brelse(bp);
148 		*ipp = 0;
149 		return (error);
150 	}
151 	dp = bp->b_un.b_dino;
152 	dp += itoo(fs, ino);
153 	ip->i_din = *dp;
154 	brelse(bp);
155 	/*
156 	 * Initialize the associated vnode
157 	 */
158 	vp = ITOV(ip);
159 	vp->v_type = IFTOVT(ip->i_mode);
160 	if (vp->v_type == VFIFO) {
161 #ifdef FIFO
162 		extern struct vnodeops fifo_inodeops;
163 		vp->v_op = &fifo_inodeops;
164 #else
165 		iput(ip);
166 		*ipp = 0;
167 		return (EOPNOTSUPP);
168 #endif /* FIFO */
169 	}
170 	if (vp->v_type == VCHR || vp->v_type == VBLK) {
171 		vp->v_op = &spec_inodeops;
172 		if (nvp = checkalias(vp, ip->i_rdev, mntp)) {
173 			/*
174 			 * Reinitialize aliased inode.
175 			 */
176 			vp = nvp;
177 			iq = VTOI(vp);
178 			iq->i_vnode = vp;
179 			iq->i_flag = 0;
180 			ILOCK(iq);
181 			iq->i_din = ip->i_din;
182 			iq->i_dev = dev;
183 			iq->i_number = ino;
184 			insque(iq, ih);
185 			/*
186 			 * Discard unneeded vnode
187 			 */
188 			ip->i_mode = 0;
189 			iput(ip);
190 			ip = iq;
191 		}
192 	}
193 	if (ino == ROOTINO)
194 		vp->v_flag |= VROOT;
195 	/*
196 	 * Finish inode initialization.
197 	 */
198 	ip->i_fs = fs;
199 	ip->i_devvp = VFSTOUFS(mntp)->um_devvp;
200 	VREF(ip->i_devvp);
201 	/*
202 	 * Set up a generation number for this inode if it does not
203 	 * already have one. This should only happen on old filesystems.
204 	 */
205 	if (ip->i_gen == 0) {
206 		if (++nextgennumber < (u_long)time.tv_sec)
207 			nextgennumber = time.tv_sec;
208 		ip->i_gen = nextgennumber;
209 		if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0)
210 			ip->i_flag |= IMOD;
211 	}
212 	*ipp = ip;
213 	return (0);
214 }
215 
216 /*
217  * Unlock and decrement the reference count of an inode structure.
218  */
219 iput(ip)
220 	register struct inode *ip;
221 {
222 
223 	if ((ip->i_flag & ILOCKED) == 0)
224 		panic("iput");
225 	IUNLOCK(ip);
226 	vrele(ITOV(ip));
227 }
228 
229 /*
230  * Last reference to an inode, write the inode out and if necessary,
231  * truncate and deallocate the file.
232  */
233 ufs_inactive(vp)
234 	struct vnode *vp;
235 {
236 	register struct inode *ip = VTOI(vp);
237 	int mode, error = 0;
238 
239 	if (prtactive && vp->v_usecount != 0)
240 		vprint("ufs_inactive: pushing active", vp);
241 	/*
242 	 * Get rid of inodes related to stale file handles.
243 	 */
244 	if (ip->i_mode == 0) {
245 		if ((vp->v_flag & VXLOCK) == 0)
246 			vgone(vp);
247 		return (0);
248 	}
249 	ILOCK(ip);
250 	if (ip->i_nlink <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
251 #ifdef QUOTA
252 		if (!getinoquota(ip))
253 			(void) chkiq(ip, -1, NOCRED, 0);
254 #endif
255 		error = itrunc(ip, (u_long)0, 0);
256 		mode = ip->i_mode;
257 		ip->i_mode = 0;
258 		ip->i_rdev = 0;
259 		ip->i_flag |= IUPD|ICHG;
260 		ifree(ip, ip->i_number, mode);
261 	}
262 	IUPDAT(ip, &time, &time, 0);
263 	IUNLOCK(ip);
264 	ip->i_flag = 0;
265 	/*
266 	 * If we are done with the inode, reclaim it
267 	 * so that it can be reused immediately.
268 	 */
269 	if (vp->v_usecount == 0 && ip->i_mode == 0)
270 		vgone(vp);
271 	return (error);
272 }
273 
274 /*
275  * Reclaim an inode so that it can be used for other purposes.
276  */
277 ufs_reclaim(vp)
278 	register struct vnode *vp;
279 {
280 	register struct inode *ip = VTOI(vp);
281 	int i;
282 
283 	if (prtactive && vp->v_usecount != 0)
284 		vprint("ufs_reclaim: pushing active", vp);
285 	/*
286 	 * Remove the inode from its hash chain.
287 	 */
288 	remque(ip);
289 	ip->i_forw = ip;
290 	ip->i_back = ip;
291 	/*
292 	 * Purge old data structures associated with the inode.
293 	 */
294 	cache_purge(vp);
295 	if (ip->i_devvp) {
296 		vrele(ip->i_devvp);
297 		ip->i_devvp = 0;
298 	}
299 #ifdef QUOTA
300 	for (i = 0; i < MAXQUOTAS; i++) {
301 		if (ip->i_dquot[i] != NODQUOT) {
302 			dqrele(vp, ip->i_dquot[i]);
303 			ip->i_dquot[i] = NODQUOT;
304 		}
305 	}
306 #endif
307 	ip->i_flag = 0;
308 	return (0);
309 }
310 
311 /*
312  * Check accessed and update flags on an inode structure.
313  * If any is on, update the inode with the current time.
314  * If waitfor is given, then must ensure I/O order,
315  * so wait for write to complete.
316  */
317 iupdat(ip, ta, tm, waitfor)
318 	register struct inode *ip;
319 	struct timeval *ta, *tm;
320 	int waitfor;
321 {
322 	struct buf *bp;
323 	struct vnode *vp = ITOV(ip);
324 	struct dinode *dp;
325 	register struct fs *fs;
326 	int error;
327 
328 	fs = ip->i_fs;
329 	if ((ip->i_flag & (IUPD|IACC|ICHG|IMOD)) == 0)
330 		return (0);
331 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
332 		return (0);
333 	error = bread(ip->i_devvp, fsbtodb(fs, itod(fs, ip->i_number)),
334 		(int)fs->fs_bsize, NOCRED, &bp);
335 	if (error) {
336 		brelse(bp);
337 		return (error);
338 	}
339 	if (ip->i_flag&IACC)
340 		ip->i_atime = ta->tv_sec;
341 	if (ip->i_flag&IUPD)
342 		ip->i_mtime = tm->tv_sec;
343 	if (ip->i_flag&ICHG)
344 		ip->i_ctime = time.tv_sec;
345 	ip->i_flag &= ~(IUPD|IACC|ICHG|IMOD);
346 	dp = bp->b_un.b_dino + itoo(fs, ip->i_number);
347 	*dp = ip->i_din;
348 	if (waitfor) {
349 		return (bwrite(bp));
350 	} else {
351 		bdwrite(bp);
352 		return (0);
353 	}
354 }
355 
356 #define	SINGLE	0	/* index of single indirect block */
357 #define	DOUBLE	1	/* index of double indirect block */
358 #define	TRIPLE	2	/* index of triple indirect block */
359 /*
360  * Truncate the inode ip to at most length size.  Free affected disk
361  * blocks -- the blocks of the file are removed in reverse order.
362  *
363  * NB: triple indirect blocks are untested.
364  */
365 itrunc(oip, length, flags)
366 	register struct inode *oip;
367 	u_long length;
368 	int flags;
369 {
370 	register daddr_t lastblock;
371 	daddr_t bn, lbn, lastiblock[NIADDR];
372 	register struct fs *fs;
373 	register struct inode *ip;
374 	struct buf *bp;
375 	int offset, osize, size, level;
376 	long count, nblocks, blocksreleased = 0;
377 	register int i;
378 	int aflags, error, allerror;
379 	struct inode tip;
380 
381 	if (oip->i_size <= length) {
382 		oip->i_flag |= ICHG|IUPD;
383 		error = iupdat(oip, &time, &time, 1);
384 		return (error);
385 	}
386 	/*
387 	 * Calculate index into inode's block list of
388 	 * last direct and indirect blocks (if any)
389 	 * which we want to keep.  Lastblock is -1 when
390 	 * the file is truncated to 0.
391 	 */
392 	fs = oip->i_fs;
393 	lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1;
394 	lastiblock[SINGLE] = lastblock - NDADDR;
395 	lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs);
396 	lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs);
397 	nblocks = btodb(fs->fs_bsize);
398 	/*
399 	 * Update the size of the file. If the file is not being
400 	 * truncated to a block boundry, the contents of the
401 	 * partial block following the end of the file must be
402 	 * zero'ed in case it ever become accessable again because
403 	 * of subsequent file growth.
404 	 */
405 	osize = oip->i_size;
406 	offset = blkoff(fs, length);
407 	if (offset == 0) {
408 		oip->i_size = length;
409 	} else {
410 		lbn = lblkno(fs, length);
411 		aflags = B_CLRBUF;
412 		if (flags & IO_SYNC)
413 			aflags |= B_SYNC;
414 #ifdef QUOTA
415 		if (error = getinoquota(oip))
416 			return (error);
417 #endif
418 		if (error = balloc(oip, lbn, offset, &bp, aflags))
419 			return (error);
420 		oip->i_size = length;
421 		size = blksize(fs, oip, lbn);
422 		bn = bp->b_blkno;
423 		count = howmany(size, CLBYTES);
424 		for (i = 0; i < count; i++)
425 			munhash(oip->i_devvp, bn + i * CLBYTES / DEV_BSIZE);
426 		bzero(bp->b_un.b_addr + offset, (unsigned)(size - offset));
427 		allocbuf(bp, size);
428 		if (flags & IO_SYNC)
429 			bwrite(bp);
430 		else
431 			bdwrite(bp);
432 	}
433 	/*
434 	 * Update file and block pointers
435 	 * on disk before we start freeing blocks.
436 	 * If we crash before free'ing blocks below,
437 	 * the blocks will be returned to the free list.
438 	 * lastiblock values are also normalized to -1
439 	 * for calls to indirtrunc below.
440 	 */
441 	tip = *oip;
442 	tip.i_size = osize;
443 	for (level = TRIPLE; level >= SINGLE; level--)
444 		if (lastiblock[level] < 0) {
445 			oip->i_ib[level] = 0;
446 			lastiblock[level] = -1;
447 		}
448 	for (i = NDADDR - 1; i > lastblock; i--)
449 		oip->i_db[i] = 0;
450 	oip->i_flag |= ICHG|IUPD;
451 	vinvalbuf(ITOV(oip), (length > 0));
452 	allerror = iupdat(oip, &time, &time, MNT_WAIT);
453 
454 	/*
455 	 * Indirect blocks first.
456 	 */
457 	ip = &tip;
458 	for (level = TRIPLE; level >= SINGLE; level--) {
459 		bn = ip->i_ib[level];
460 		if (bn != 0) {
461 			error = indirtrunc(ip, bn, lastiblock[level], level,
462 				&count);
463 			if (error)
464 				allerror = error;
465 			blocksreleased += count;
466 			if (lastiblock[level] < 0) {
467 				ip->i_ib[level] = 0;
468 				blkfree(ip, bn, (off_t)fs->fs_bsize);
469 				blocksreleased += nblocks;
470 			}
471 		}
472 		if (lastiblock[level] >= 0)
473 			goto done;
474 	}
475 
476 	/*
477 	 * All whole direct blocks or frags.
478 	 */
479 	for (i = NDADDR - 1; i > lastblock; i--) {
480 		register off_t bsize;
481 
482 		bn = ip->i_db[i];
483 		if (bn == 0)
484 			continue;
485 		ip->i_db[i] = 0;
486 		bsize = (off_t)blksize(fs, ip, i);
487 		blkfree(ip, bn, bsize);
488 		blocksreleased += btodb(bsize);
489 	}
490 	if (lastblock < 0)
491 		goto done;
492 
493 	/*
494 	 * Finally, look for a change in size of the
495 	 * last direct block; release any frags.
496 	 */
497 	bn = ip->i_db[lastblock];
498 	if (bn != 0) {
499 		off_t oldspace, newspace;
500 
501 		/*
502 		 * Calculate amount of space we're giving
503 		 * back as old block size minus new block size.
504 		 */
505 		oldspace = blksize(fs, ip, lastblock);
506 		ip->i_size = length;
507 		newspace = blksize(fs, ip, lastblock);
508 		if (newspace == 0)
509 			panic("itrunc: newspace");
510 		if (oldspace - newspace > 0) {
511 			/*
512 			 * Block number of space to be free'd is
513 			 * the old block # plus the number of frags
514 			 * required for the storage we're keeping.
515 			 */
516 			bn += numfrags(fs, newspace);
517 			blkfree(ip, bn, oldspace - newspace);
518 			blocksreleased += btodb(oldspace - newspace);
519 		}
520 	}
521 done:
522 /* BEGIN PARANOIA */
523 	for (level = SINGLE; level <= TRIPLE; level++)
524 		if (ip->i_ib[level] != oip->i_ib[level])
525 			panic("itrunc1");
526 	for (i = 0; i < NDADDR; i++)
527 		if (ip->i_db[i] != oip->i_db[i])
528 			panic("itrunc2");
529 /* END PARANOIA */
530 	oip->i_blocks -= blocksreleased;
531 	if (oip->i_blocks < 0)			/* sanity */
532 		oip->i_blocks = 0;
533 	oip->i_flag |= ICHG;
534 #ifdef QUOTA
535 	if (!getinoquota(oip))
536 		(void) chkdq(oip, -blocksreleased, NOCRED, 0);
537 #endif
538 	return (allerror);
539 }
540 
541 /*
542  * Release blocks associated with the inode ip and
543  * stored in the indirect block bn.  Blocks are free'd
544  * in LIFO order up to (but not including) lastbn.  If
545  * level is greater than SINGLE, the block is an indirect
546  * block and recursive calls to indirtrunc must be used to
547  * cleanse other indirect blocks.
548  *
549  * NB: triple indirect blocks are untested.
550  */
551 indirtrunc(ip, bn, lastbn, level, countp)
552 	register struct inode *ip;
553 	daddr_t bn, lastbn;
554 	int level;
555 	long *countp;
556 {
557 	register int i;
558 	struct buf *bp;
559 	register struct fs *fs = ip->i_fs;
560 	register daddr_t *bap;
561 	daddr_t *copy, nb, last;
562 	long blkcount, factor;
563 	int nblocks, blocksreleased = 0;
564 	int error, allerror = 0;
565 
566 	/*
567 	 * Calculate index in current block of last
568 	 * block to be kept.  -1 indicates the entire
569 	 * block so we need not calculate the index.
570 	 */
571 	factor = 1;
572 	for (i = SINGLE; i < level; i++)
573 		factor *= NINDIR(fs);
574 	last = lastbn;
575 	if (lastbn > 0)
576 		last /= factor;
577 	nblocks = btodb(fs->fs_bsize);
578 	/*
579 	 * Get buffer of block pointers, zero those
580 	 * entries corresponding to blocks to be free'd,
581 	 * and update on disk copy first.
582 	 */
583 	error = bread(ip->i_devvp, fsbtodb(fs, bn), (int)fs->fs_bsize,
584 		NOCRED, &bp);
585 	if (error) {
586 		brelse(bp);
587 		*countp = 0;
588 		return (error);
589 	}
590 	bap = bp->b_un.b_daddr;
591 	MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK);
592 	bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize);
593 	bzero((caddr_t)&bap[last + 1],
594 	  (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t));
595 	if (last == -1)
596 		bp->b_flags |= B_INVAL;
597 	error = bwrite(bp);
598 	if (error)
599 		allerror = error;
600 	bap = copy;
601 
602 	/*
603 	 * Recursively free totally unused blocks.
604 	 */
605 	for (i = NINDIR(fs) - 1; i > last; i--) {
606 		nb = bap[i];
607 		if (nb == 0)
608 			continue;
609 		if (level > SINGLE) {
610 			error = indirtrunc(ip, nb, (daddr_t)-1, level - 1,
611 				&blkcount);
612 			if (error)
613 				allerror = error;
614 			blocksreleased += blkcount;
615 		}
616 		blkfree(ip, nb, (off_t)fs->fs_bsize);
617 		blocksreleased += nblocks;
618 	}
619 
620 	/*
621 	 * Recursively free last partial block.
622 	 */
623 	if (level > SINGLE && lastbn >= 0) {
624 		last = lastbn % factor;
625 		nb = bap[i];
626 		if (nb != 0) {
627 			error = indirtrunc(ip, nb, last, level - 1, &blkcount);
628 			if (error)
629 				allerror = error;
630 			blocksreleased += blkcount;
631 		}
632 	}
633 	FREE(copy, M_TEMP);
634 	*countp = blocksreleased;
635 	return (allerror);
636 }
637 
638 /*
639  * Lock an inode. If its already locked, set the WANT bit and sleep.
640  */
641 ilock(ip)
642 	register struct inode *ip;
643 {
644 
645 	while (ip->i_flag & ILOCKED) {
646 		ip->i_flag |= IWANT;
647 		if (ip->i_spare0 == u.u_procp->p_pid)
648 			panic("locking against myself");
649 		ip->i_spare1 = u.u_procp->p_pid;
650 		(void) sleep((caddr_t)ip, PINOD);
651 	}
652 	ip->i_spare1 = 0;
653 	ip->i_spare0 = u.u_procp->p_pid;
654 	u.u_spare[0]++;
655 	ip->i_flag |= ILOCKED;
656 }
657 
658 /*
659  * Unlock an inode.  If WANT bit is on, wakeup.
660  */
661 iunlock(ip)
662 	register struct inode *ip;
663 {
664 
665 	if ((ip->i_flag & ILOCKED) == 0)
666 		vprint("iunlock: unlocked inode", ITOV(ip));
667 	ip->i_spare0 = 0;
668 	u.u_spare[0]--;
669 	ip->i_flag &= ~ILOCKED;
670 	if (ip->i_flag&IWANT) {
671 		ip->i_flag &= ~IWANT;
672 		wakeup((caddr_t)ip);
673 	}
674 }
675