xref: /original-bsd/sys/kern/vfs_subr.c (revision 3705696b)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)vfs_subr.c	8.1 (Berkeley) 06/10/93
8  */
9 
10 /*
11  * External virtual filesystem routines
12  */
13 
14 #include <sys/param.h>
15 #include <sys/systm.h>
16 #include <sys/proc.h>
17 #include <sys/mount.h>
18 #include <sys/time.h>
19 #include <sys/vnode.h>
20 #include <sys/stat.h>
21 #include <sys/namei.h>
22 #include <sys/ucred.h>
23 #include <sys/buf.h>
24 #include <sys/errno.h>
25 #include <sys/malloc.h>
26 
27 #include <vm/vm.h>
28 #include <sys/sysctl.h>
29 
30 #include <miscfs/specfs/specdev.h>
31 
32 enum vtype iftovt_tab[16] = {
33 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
34 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
35 };
36 int	vttoif_tab[9] = {
37 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
38 	S_IFSOCK, S_IFIFO, S_IFMT,
39 };
40 
41 /*
42  * Insq/Remq for the vnode usage lists.
43  */
44 #define	bufinsvn(bp, dp)	list_enter_head(dp, bp, struct buf *, b_vnbufs)
45 #define	bufremvn(bp)		list_remove(bp, struct buf *, b_vnbufs)
46 
47 /*
48  * Remove a mount point from the list of mounted filesystems.
49  * Unmount of the root is illegal.
50  */
51 void
52 vfs_remove(mp)
53 	register struct mount *mp;
54 {
55 
56 	if (mp == rootfs)
57 		panic("vfs_remove: unmounting root");
58 	mp->mnt_prev->mnt_next = mp->mnt_next;
59 	mp->mnt_next->mnt_prev = mp->mnt_prev;
60 	mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
61 	vfs_unlock(mp);
62 }
63 
64 /*
65  * Lock a filesystem.
66  * Used to prevent access to it while mounting and unmounting.
67  */
68 vfs_lock(mp)
69 	register struct mount *mp;
70 {
71 
72 	while(mp->mnt_flag & MNT_MLOCK) {
73 		mp->mnt_flag |= MNT_MWAIT;
74 		sleep((caddr_t)mp, PVFS);
75 	}
76 	mp->mnt_flag |= MNT_MLOCK;
77 	return (0);
78 }
79 
80 /*
81  * Unlock a locked filesystem.
82  * Panic if filesystem is not locked.
83  */
84 void
85 vfs_unlock(mp)
86 	register struct mount *mp;
87 {
88 
89 	if ((mp->mnt_flag & MNT_MLOCK) == 0)
90 		panic("vfs_unlock: not locked");
91 	mp->mnt_flag &= ~MNT_MLOCK;
92 	if (mp->mnt_flag & MNT_MWAIT) {
93 		mp->mnt_flag &= ~MNT_MWAIT;
94 		wakeup((caddr_t)mp);
95 	}
96 }
97 
98 /*
99  * Mark a mount point as busy.
100  * Used to synchronize access and to delay unmounting.
101  */
102 vfs_busy(mp)
103 	register struct mount *mp;
104 {
105 
106 	while(mp->mnt_flag & MNT_MPBUSY) {
107 		mp->mnt_flag |= MNT_MPWANT;
108 		sleep((caddr_t)&mp->mnt_flag, PVFS);
109 	}
110 	if (mp->mnt_flag & MNT_UNMOUNT)
111 		return (1);
112 	mp->mnt_flag |= MNT_MPBUSY;
113 	return (0);
114 }
115 
116 /*
117  * Free a busy filesystem.
118  * Panic if filesystem is not busy.
119  */
120 vfs_unbusy(mp)
121 	register struct mount *mp;
122 {
123 
124 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
125 		panic("vfs_unbusy: not busy");
126 	mp->mnt_flag &= ~MNT_MPBUSY;
127 	if (mp->mnt_flag & MNT_MPWANT) {
128 		mp->mnt_flag &= ~MNT_MPWANT;
129 		wakeup((caddr_t)&mp->mnt_flag);
130 	}
131 }
132 
133 /*
134  * Lookup a mount point by filesystem identifier.
135  */
136 struct mount *
137 getvfs(fsid)
138 	fsid_t *fsid;
139 {
140 	register struct mount *mp;
141 
142 	mp = rootfs;
143 	do {
144 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
145 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
146 			return (mp);
147 		}
148 		mp = mp->mnt_next;
149 	} while (mp != rootfs);
150 	return ((struct mount *)0);
151 }
152 
153 /*
154  * Get a new unique fsid
155  */
156 void
157 getnewfsid(mp, mtype)
158 	struct mount *mp;
159 	int mtype;
160 {
161 static u_short xxxfs_mntid;
162 
163 	fsid_t tfsid;
164 
165 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0);	/* XXX */
166 	mp->mnt_stat.f_fsid.val[1] = mtype;
167 	if (xxxfs_mntid == 0)
168 		++xxxfs_mntid;
169 	tfsid.val[0] = makedev(nblkdev, xxxfs_mntid);
170 	tfsid.val[1] = mtype;
171 	if (rootfs) {
172 		while (getvfs(&tfsid)) {
173 			tfsid.val[0]++;
174 			xxxfs_mntid++;
175 		}
176 	}
177 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
178 }
179 
180 /*
181  * Set vnode attributes to VNOVAL
182  */
183 void vattr_null(vap)
184 	register struct vattr *vap;
185 {
186 
187 	vap->va_type = VNON;
188 	vap->va_size = vap->va_bytes = VNOVAL;
189 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
190 		vap->va_fsid = vap->va_fileid =
191 		vap->va_blocksize = vap->va_rdev =
192 		vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
193 		vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
194 		vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
195 		vap->va_flags = vap->va_gen = VNOVAL;
196 	vap->va_vaflags = 0;
197 }
198 
199 /*
200  * Routines having to do with the management of the vnode table.
201  */
202 struct vnode *vfreeh, **vfreet = &vfreeh;
203 extern int (**dead_vnodeop_p)();
204 extern void vclean();
205 long numvnodes;
206 extern struct vattr va_null;
207 
208 
209 /*
210  * Return the next vnode from the free list.
211  */
212 getnewvnode(tag, mp, vops, vpp)
213 	enum vtagtype tag;
214 	struct mount *mp;
215 	int (**vops)();
216 	struct vnode **vpp;
217 {
218 	register struct vnode *vp, *vq;
219 	int s;
220 
221 	if ((vfreeh == NULL && numvnodes < 2 * desiredvnodes) ||
222 	    numvnodes < desiredvnodes) {
223 		vp = (struct vnode *)malloc((u_long)sizeof *vp,
224 		    M_VNODE, M_WAITOK);
225 		bzero((char *)vp, sizeof *vp);
226 		numvnodes++;
227 	} else {
228 		if ((vp = vfreeh) == NULL) {
229 			tablefull("vnode");
230 			*vpp = 0;
231 			return (ENFILE);
232 		}
233 		if (vp->v_usecount)
234 			panic("free vnode isn't");
235 		if (vq = vp->v_freef)
236 			vq->v_freeb = &vfreeh;
237 		else
238 			vfreet = &vfreeh;
239 		vfreeh = vq;
240 		vp->v_freef = NULL;
241 		vp->v_freeb = NULL;
242 		vp->v_lease = NULL;
243 		if (vp->v_type != VBAD)
244 			vgone(vp);
245 #ifdef DIAGNOSTIC
246 		if (vp->v_data)
247 			panic("cleaned vnode isn't");
248 		s = splbio();
249 		if (vp->v_numoutput)
250 			panic("Clean vnode has pending I/O's");
251 		splx(s);
252 #endif
253 		vp->v_flag = 0;
254 		vp->v_lastr = 0;
255 		vp->v_lastw = 0;
256 		vp->v_lasta = 0;
257 		vp->v_cstart = 0;
258 		vp->v_clen = 0;
259 		vp->v_socket = 0;
260 	}
261 	vp->v_ralen = 1;
262 	vp->v_type = VNON;
263 	cache_purge(vp);
264 	vp->v_tag = tag;
265 	vp->v_op = vops;
266 	insmntque(vp, mp);
267 	vp->v_usecount++;
268 	*vpp = vp;
269 	return (0);
270 }
271 /*
272  * Move a vnode from one mount queue to another.
273  */
274 insmntque(vp, mp)
275 	register struct vnode *vp;
276 	register struct mount *mp;
277 {
278 	register struct vnode *vq;
279 
280 	/*
281 	 * Delete from old mount point vnode list, if on one.
282 	 */
283 	if (vp->v_mountb) {
284 		if (vq = vp->v_mountf)
285 			vq->v_mountb = vp->v_mountb;
286 		*vp->v_mountb = vq;
287 	}
288 	/*
289 	 * Insert into list of vnodes for the new mount point, if available.
290 	 */
291 	vp->v_mount = mp;
292 	if (mp == NULL) {
293 		vp->v_mountf = NULL;
294 		vp->v_mountb = NULL;
295 		return;
296 	}
297 	if (vq = mp->mnt_mounth)
298 		vq->v_mountb = &vp->v_mountf;
299 	vp->v_mountf = vq;
300 	vp->v_mountb = &mp->mnt_mounth;
301 	mp->mnt_mounth = vp;
302 }
303 
304 /*
305  * Update outstanding I/O count and do wakeup if requested.
306  */
307 vwakeup(bp)
308 	register struct buf *bp;
309 {
310 	register struct vnode *vp;
311 
312 	bp->b_flags &= ~B_WRITEINPROG;
313 	if (vp = bp->b_vp) {
314 		vp->v_numoutput--;
315 		if (vp->v_numoutput < 0)
316 			panic("vwakeup: neg numoutput");
317 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
318 			if (vp->v_numoutput < 0)
319 				panic("vwakeup: neg numoutput");
320 			vp->v_flag &= ~VBWAIT;
321 			wakeup((caddr_t)&vp->v_numoutput);
322 		}
323 	}
324 }
325 
326 /*
327  * Flush out and invalidate all buffers associated with a vnode.
328  * Called with the underlying object locked.
329  */
330 int
331 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
332 	register struct vnode *vp;
333 	int flags;
334 	struct ucred *cred;
335 	struct proc *p;
336 	int slpflag, slptimeo;
337 {
338 	register struct buf *bp;
339 	struct buf *nbp, *blist;
340 	int s, error;
341 
342 	if (flags & V_SAVE) {
343 		if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
344 			return (error);
345 		if (vp->v_dirtyblkhd.le_next != NULL)
346 			panic("vinvalbuf: dirty bufs");
347 	}
348 	for (;;) {
349 		if ((blist = vp->v_cleanblkhd.le_next) && flags & V_SAVEMETA)
350 			while (blist && blist->b_lblkno < 0)
351 				blist = blist->b_vnbufs.qe_next;
352 		if (!blist && (blist = vp->v_dirtyblkhd.le_next) &&
353 		    (flags & V_SAVEMETA))
354 			while (blist && blist->b_lblkno < 0)
355 				blist = blist->b_vnbufs.qe_next;
356 		if (!blist)
357 			break;
358 
359 		for (bp = blist; bp; bp = nbp) {
360 			nbp = bp->b_vnbufs.qe_next;
361 			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
362 				continue;
363 			s = splbio();
364 			if (bp->b_flags & B_BUSY) {
365 				bp->b_flags |= B_WANTED;
366 				error = tsleep((caddr_t)bp,
367 					slpflag | (PRIBIO + 1), "vinvalbuf",
368 					slptimeo);
369 				splx(s);
370 				if (error)
371 					return (error);
372 				break;
373 			}
374 			bremfree(bp);
375 			bp->b_flags |= B_BUSY;
376 			splx(s);
377 			/*
378 			 * XXX Since there are no node locks for NFS, I believe
379 			 * there is a slight chance that a delayed write will
380 			 * occur while sleeping just above, so check for it.
381 			 */
382 			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
383 				(void) VOP_BWRITE(bp);
384 				break;
385 			}
386 			bp->b_flags |= B_INVAL;
387 			brelse(bp);
388 		}
389 	}
390 	if (!(flags & V_SAVEMETA) &&
391 	    (vp->v_dirtyblkhd.le_next || vp->v_cleanblkhd.le_next))
392 		panic("vinvalbuf: flush failed");
393 	return (0);
394 }
395 
396 /*
397  * Associate a buffer with a vnode.
398  */
399 bgetvp(vp, bp)
400 	register struct vnode *vp;
401 	register struct buf *bp;
402 {
403 	register struct vnode *vq;
404 
405 	if (bp->b_vp)
406 		panic("bgetvp: not free");
407 	VHOLD(vp);
408 	bp->b_vp = vp;
409 	if (vp->v_type == VBLK || vp->v_type == VCHR)
410 		bp->b_dev = vp->v_rdev;
411 	else
412 		bp->b_dev = NODEV;
413 	/*
414 	 * Insert onto list for new vnode.
415 	 */
416 	bufinsvn(bp, &vp->v_cleanblkhd);
417 }
418 
419 /*
420  * Disassociate a buffer from a vnode.
421  */
422 brelvp(bp)
423 	register struct buf *bp;
424 {
425 	struct vnode *vp;
426 
427 	if (bp->b_vp == (struct vnode *) 0)
428 		panic("brelvp: NULL");
429 	/*
430 	 * Delete from old vnode list, if on one.
431 	 */
432 	if (bp->b_vnbufs.qe_next != NOLIST)
433 		bufremvn(bp);
434 	vp = bp->b_vp;
435 	bp->b_vp = (struct vnode *) 0;
436 	HOLDRELE(vp);
437 }
438 
439 /*
440  * Reassign a buffer from one vnode to another.
441  * Used to assign file specific control information
442  * (indirect blocks) to the vnode to which they belong.
443  */
444 reassignbuf(bp, newvp)
445 	register struct buf *bp;
446 	register struct vnode *newvp;
447 {
448 	register struct list_entry *listheadp;
449 
450 	if (newvp == NULL) {
451 		printf("reassignbuf: NULL");
452 		return;
453 	}
454 	/*
455 	 * Delete from old vnode list, if on one.
456 	 */
457 	if (bp->b_vnbufs.qe_next != NOLIST)
458 		bufremvn(bp);
459 	/*
460 	 * If dirty, put on list of dirty buffers;
461 	 * otherwise insert onto list of clean buffers.
462 	 */
463 	if (bp->b_flags & B_DELWRI)
464 		listheadp = &newvp->v_dirtyblkhd;
465 	else
466 		listheadp = &newvp->v_cleanblkhd;
467 	bufinsvn(bp, listheadp);
468 }
469 
470 /*
471  * Create a vnode for a block device.
472  * Used for root filesystem, argdev, and swap areas.
473  * Also used for memory file system special devices.
474  */
475 bdevvp(dev, vpp)
476 	dev_t dev;
477 	struct vnode **vpp;
478 {
479 	register struct vnode *vp;
480 	struct vnode *nvp;
481 	int error;
482 
483 	if (dev == NODEV)
484 		return (0);
485 	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
486 	if (error) {
487 		*vpp = 0;
488 		return (error);
489 	}
490 	vp = nvp;
491 	vp->v_type = VBLK;
492 	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
493 		vput(vp);
494 		vp = nvp;
495 	}
496 	*vpp = vp;
497 	return (0);
498 }
499 
500 /*
501  * Check to see if the new vnode represents a special device
502  * for which we already have a vnode (either because of
503  * bdevvp() or because of a different vnode representing
504  * the same block device). If such an alias exists, deallocate
505  * the existing contents and return the aliased vnode. The
506  * caller is responsible for filling it with its new contents.
507  */
508 struct vnode *
509 checkalias(nvp, nvp_rdev, mp)
510 	register struct vnode *nvp;
511 	dev_t nvp_rdev;
512 	struct mount *mp;
513 {
514 	register struct vnode *vp;
515 	struct vnode **vpp;
516 
517 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
518 		return (NULLVP);
519 
520 	vpp = &speclisth[SPECHASH(nvp_rdev)];
521 loop:
522 	for (vp = *vpp; vp; vp = vp->v_specnext) {
523 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
524 			continue;
525 		/*
526 		 * Alias, but not in use, so flush it out.
527 		 */
528 		if (vp->v_usecount == 0) {
529 			vgone(vp);
530 			goto loop;
531 		}
532 		if (vget(vp))
533 			goto loop;
534 		break;
535 	}
536 	if (vp == NULL || vp->v_tag != VT_NON) {
537 		MALLOC(nvp->v_specinfo, struct specinfo *,
538 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
539 		nvp->v_rdev = nvp_rdev;
540 		nvp->v_hashchain = vpp;
541 		nvp->v_specnext = *vpp;
542 		nvp->v_specflags = 0;
543 		*vpp = nvp;
544 		if (vp != NULL) {
545 			nvp->v_flag |= VALIASED;
546 			vp->v_flag |= VALIASED;
547 			vput(vp);
548 		}
549 		return (NULLVP);
550 	}
551 	VOP_UNLOCK(vp);
552 	vclean(vp, 0);
553 	vp->v_op = nvp->v_op;
554 	vp->v_tag = nvp->v_tag;
555 	nvp->v_type = VNON;
556 	insmntque(vp, mp);
557 	return (vp);
558 }
559 
560 /*
561  * Grab a particular vnode from the free list, increment its
562  * reference count and lock it. The vnode lock bit is set the
563  * vnode is being eliminated in vgone. The process is awakened
564  * when the transition is completed, and an error returned to
565  * indicate that the vnode is no longer usable (possibly having
566  * been changed to a new file system type).
567  */
568 vget(vp)
569 	register struct vnode *vp;
570 {
571 	register struct vnode *vq;
572 
573 	if (vp->v_flag & VXLOCK) {
574 		vp->v_flag |= VXWANT;
575 		sleep((caddr_t)vp, PINOD);
576 		return (1);
577 	}
578 	if (vp->v_usecount == 0) {
579 		if (vq = vp->v_freef)
580 			vq->v_freeb = vp->v_freeb;
581 		else
582 			vfreet = vp->v_freeb;
583 		*vp->v_freeb = vq;
584 		vp->v_freef = NULL;
585 		vp->v_freeb = NULL;
586 	}
587 	vp->v_usecount++;
588 	VOP_LOCK(vp);
589 	return (0);
590 }
591 
592 /*
593  * Vnode reference, just increment the count
594  */
595 void vref(vp)
596 	struct vnode *vp;
597 {
598 
599 	if (vp->v_usecount <= 0)
600 		panic("vref used where vget required");
601 	vp->v_usecount++;
602 }
603 
604 /*
605  * vput(), just unlock and vrele()
606  */
607 void vput(vp)
608 	register struct vnode *vp;
609 {
610 
611 	VOP_UNLOCK(vp);
612 	vrele(vp);
613 }
614 
615 /*
616  * Vnode release.
617  * If count drops to zero, call inactive routine and return to freelist.
618  */
619 void vrele(vp)
620 	register struct vnode *vp;
621 {
622 
623 #ifdef DIAGNOSTIC
624 	if (vp == NULL)
625 		panic("vrele: null vp");
626 #endif
627 	vp->v_usecount--;
628 	if (vp->v_usecount > 0)
629 		return;
630 #ifdef DIAGNOSTIC
631 	if (vp->v_usecount != 0 || vp->v_writecount != 0) {
632 		vprint("vrele: bad ref count", vp);
633 		panic("vrele: ref cnt");
634 	}
635 #endif
636 	/*
637 	 * insert at tail of LRU list
638 	 */
639 	*vfreet = vp;
640 	vp->v_freeb = vfreet;
641 	vp->v_freef = NULL;
642 	vfreet = &vp->v_freef;
643 	VOP_INACTIVE(vp);
644 }
645 
646 /*
647  * Page or buffer structure gets a reference.
648  */
649 void vhold(vp)
650 	register struct vnode *vp;
651 {
652 
653 	vp->v_holdcnt++;
654 }
655 
656 /*
657  * Page or buffer structure frees a reference.
658  */
659 void holdrele(vp)
660 	register struct vnode *vp;
661 {
662 
663 	if (vp->v_holdcnt <= 0)
664 		panic("holdrele: holdcnt");
665 	vp->v_holdcnt--;
666 }
667 
668 /*
669  * Remove any vnodes in the vnode table belonging to mount point mp.
670  *
671  * If MNT_NOFORCE is specified, there should not be any active ones,
672  * return error if any are found (nb: this is a user error, not a
673  * system error). If MNT_FORCE is specified, detach any active vnodes
674  * that are found.
675  */
676 int busyprt = 0;	/* print out busy vnodes */
677 struct ctldebug debug1 = { "busyprt", &busyprt };
678 
679 vflush(mp, skipvp, flags)
680 	struct mount *mp;
681 	struct vnode *skipvp;
682 	int flags;
683 {
684 	register struct vnode *vp, *nvp;
685 	int busy = 0;
686 
687 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
688 		panic("vflush: not busy");
689 loop:
690 	for (vp = mp->mnt_mounth; vp; vp = nvp) {
691 		if (vp->v_mount != mp)
692 			goto loop;
693 		nvp = vp->v_mountf;
694 		/*
695 		 * Skip over a selected vnode.
696 		 */
697 		if (vp == skipvp)
698 			continue;
699 		/*
700 		 * Skip over a vnodes marked VSYSTEM.
701 		 */
702 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
703 			continue;
704 		/*
705 		 * If WRITECLOSE is set, only flush out regular file
706 		 * vnodes open for writing.
707 		 */
708 		if ((flags & WRITECLOSE) &&
709 		    (vp->v_writecount == 0 || vp->v_type != VREG))
710 			continue;
711 		/*
712 		 * With v_usecount == 0, all we need to do is clear
713 		 * out the vnode data structures and we are done.
714 		 */
715 		if (vp->v_usecount == 0) {
716 			vgone(vp);
717 			continue;
718 		}
719 		/*
720 		 * If FORCECLOSE is set, forcibly close the vnode.
721 		 * For block or character devices, revert to an
722 		 * anonymous device. For all other files, just kill them.
723 		 */
724 		if (flags & FORCECLOSE) {
725 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
726 				vgone(vp);
727 			} else {
728 				vclean(vp, 0);
729 				vp->v_op = spec_vnodeop_p;
730 				insmntque(vp, (struct mount *)0);
731 			}
732 			continue;
733 		}
734 		if (busyprt)
735 			vprint("vflush: busy vnode", vp);
736 		busy++;
737 	}
738 	if (busy)
739 		return (EBUSY);
740 	return (0);
741 }
742 
743 /*
744  * Disassociate the underlying file system from a vnode.
745  */
746 void
747 vclean(vp, flags)
748 	register struct vnode *vp;
749 	int flags;
750 {
751 	int active;
752 
753 	/*
754 	 * Check to see if the vnode is in use.
755 	 * If so we have to reference it before we clean it out
756 	 * so that its count cannot fall to zero and generate a
757 	 * race against ourselves to recycle it.
758 	 */
759 	if (active = vp->v_usecount)
760 		VREF(vp);
761 	/*
762 	 * Even if the count is zero, the VOP_INACTIVE routine may still
763 	 * have the object locked while it cleans it out. The VOP_LOCK
764 	 * ensures that the VOP_INACTIVE routine is done with its work.
765 	 * For active vnodes, it ensures that no other activity can
766 	 * occur while the underlying object is being cleaned out.
767 	 */
768 	VOP_LOCK(vp);
769 	/*
770 	 * Prevent the vnode from being recycled or
771 	 * brought into use while we clean it out.
772 	 */
773 	if (vp->v_flag & VXLOCK)
774 		panic("vclean: deadlock");
775 	vp->v_flag |= VXLOCK;
776 	/*
777 	 * Clean out any buffers associated with the vnode.
778 	 */
779 	if (flags & DOCLOSE)
780 		vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
781 	/*
782 	 * Any other processes trying to obtain this lock must first
783 	 * wait for VXLOCK to clear, then call the new lock operation.
784 	 */
785 	VOP_UNLOCK(vp);
786 	/*
787 	 * If purging an active vnode, it must be closed and
788 	 * deactivated before being reclaimed.
789 	 */
790 	if (active) {
791 		if (flags & DOCLOSE)
792 			VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL);
793 		VOP_INACTIVE(vp);
794 	}
795 	/*
796 	 * Reclaim the vnode.
797 	 */
798 	if (VOP_RECLAIM(vp))
799 		panic("vclean: cannot reclaim");
800 	if (active)
801 		vrele(vp);
802 
803 	/*
804 	 * Done with purge, notify sleepers of the grim news.
805 	 */
806 	vp->v_op = dead_vnodeop_p;
807 	vp->v_tag = VT_NON;
808 	vp->v_flag &= ~VXLOCK;
809 	if (vp->v_flag & VXWANT) {
810 		vp->v_flag &= ~VXWANT;
811 		wakeup((caddr_t)vp);
812 	}
813 }
814 
815 /*
816  * Eliminate all activity associated with  the requested vnode
817  * and with all vnodes aliased to the requested vnode.
818  */
819 void vgoneall(vp)
820 	register struct vnode *vp;
821 {
822 	register struct vnode *vq;
823 
824 	if (vp->v_flag & VALIASED) {
825 		/*
826 		 * If a vgone (or vclean) is already in progress,
827 		 * wait until it is done and return.
828 		 */
829 		if (vp->v_flag & VXLOCK) {
830 			vp->v_flag |= VXWANT;
831 			sleep((caddr_t)vp, PINOD);
832 			return;
833 		}
834 		/*
835 		 * Ensure that vp will not be vgone'd while we
836 		 * are eliminating its aliases.
837 		 */
838 		vp->v_flag |= VXLOCK;
839 		while (vp->v_flag & VALIASED) {
840 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
841 				if (vq->v_rdev != vp->v_rdev ||
842 				    vq->v_type != vp->v_type || vp == vq)
843 					continue;
844 				vgone(vq);
845 				break;
846 			}
847 		}
848 		/*
849 		 * Remove the lock so that vgone below will
850 		 * really eliminate the vnode after which time
851 		 * vgone will awaken any sleepers.
852 		 */
853 		vp->v_flag &= ~VXLOCK;
854 	}
855 	vgone(vp);
856 }
857 
858 /*
859  * Eliminate all activity associated with a vnode
860  * in preparation for reuse.
861  */
862 void vgone(vp)
863 	register struct vnode *vp;
864 {
865 	register struct vnode *vq;
866 	struct vnode *vx;
867 
868 	/*
869 	 * If a vgone (or vclean) is already in progress,
870 	 * wait until it is done and return.
871 	 */
872 	if (vp->v_flag & VXLOCK) {
873 		vp->v_flag |= VXWANT;
874 		sleep((caddr_t)vp, PINOD);
875 		return;
876 	}
877 	/*
878 	 * Clean out the filesystem specific data.
879 	 */
880 	vclean(vp, DOCLOSE);
881 	/*
882 	 * Delete from old mount point vnode list, if on one.
883 	 */
884 	if (vp->v_mountb) {
885 		if (vq = vp->v_mountf)
886 			vq->v_mountb = vp->v_mountb;
887 		*vp->v_mountb = vq;
888 		vp->v_mountf = NULL;
889 		vp->v_mountb = NULL;
890 		vp->v_mount = NULL;
891 	}
892 	/*
893 	 * If special device, remove it from special device alias list.
894 	 */
895 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
896 		if (*vp->v_hashchain == vp) {
897 			*vp->v_hashchain = vp->v_specnext;
898 		} else {
899 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
900 				if (vq->v_specnext != vp)
901 					continue;
902 				vq->v_specnext = vp->v_specnext;
903 				break;
904 			}
905 			if (vq == NULL)
906 				panic("missing bdev");
907 		}
908 		if (vp->v_flag & VALIASED) {
909 			vx = NULL;
910 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
911 				if (vq->v_rdev != vp->v_rdev ||
912 				    vq->v_type != vp->v_type)
913 					continue;
914 				if (vx)
915 					break;
916 				vx = vq;
917 			}
918 			if (vx == NULL)
919 				panic("missing alias");
920 			if (vq == NULL)
921 				vx->v_flag &= ~VALIASED;
922 			vp->v_flag &= ~VALIASED;
923 		}
924 		FREE(vp->v_specinfo, M_VNODE);
925 		vp->v_specinfo = NULL;
926 	}
927 	/*
928 	 * If it is on the freelist and not already at the head,
929 	 * move it to the head of the list.
930 	 */
931 	if (vp->v_freeb && vfreeh != vp) {
932 		if (vq = vp->v_freef)
933 			vq->v_freeb = vp->v_freeb;
934 		else
935 			vfreet = vp->v_freeb;
936 		*vp->v_freeb = vq;
937 		vp->v_freef = vfreeh;
938 		vp->v_freeb = &vfreeh;
939 		vfreeh->v_freeb = &vp->v_freef;
940 		vfreeh = vp;
941 	}
942 	vp->v_type = VBAD;
943 }
944 
945 /*
946  * Lookup a vnode by device number.
947  */
948 vfinddev(dev, type, vpp)
949 	dev_t dev;
950 	enum vtype type;
951 	struct vnode **vpp;
952 {
953 	register struct vnode *vp;
954 
955 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
956 		if (dev != vp->v_rdev || type != vp->v_type)
957 			continue;
958 		*vpp = vp;
959 		return (1);
960 	}
961 	return (0);
962 }
963 
964 /*
965  * Calculate the total number of references to a special device.
966  */
967 vcount(vp)
968 	register struct vnode *vp;
969 {
970 	register struct vnode *vq;
971 	int count;
972 
973 	if ((vp->v_flag & VALIASED) == 0)
974 		return (vp->v_usecount);
975 loop:
976 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
977 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
978 			continue;
979 		/*
980 		 * Alias, but not in use, so flush it out.
981 		 */
982 		if (vq->v_usecount == 0) {
983 			vgone(vq);
984 			goto loop;
985 		}
986 		count += vq->v_usecount;
987 	}
988 	return (count);
989 }
990 
991 /*
992  * Print out a description of a vnode.
993  */
994 static char *typename[] =
995    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
996 
997 vprint(label, vp)
998 	char *label;
999 	register struct vnode *vp;
1000 {
1001 	char buf[64];
1002 
1003 	if (label != NULL)
1004 		printf("%s: ", label);
1005 	printf("type %s, usecount %d, writecount %d, refcount %d,",
1006 		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1007 		vp->v_holdcnt);
1008 	buf[0] = '\0';
1009 	if (vp->v_flag & VROOT)
1010 		strcat(buf, "|VROOT");
1011 	if (vp->v_flag & VTEXT)
1012 		strcat(buf, "|VTEXT");
1013 	if (vp->v_flag & VSYSTEM)
1014 		strcat(buf, "|VSYSTEM");
1015 	if (vp->v_flag & VXLOCK)
1016 		strcat(buf, "|VXLOCK");
1017 	if (vp->v_flag & VXWANT)
1018 		strcat(buf, "|VXWANT");
1019 	if (vp->v_flag & VBWAIT)
1020 		strcat(buf, "|VBWAIT");
1021 	if (vp->v_flag & VALIASED)
1022 		strcat(buf, "|VALIASED");
1023 	if (buf[0] != '\0')
1024 		printf(" flags (%s)", &buf[1]);
1025 	printf("\n\t");
1026 	VOP_PRINT(vp);
1027 }
1028 
1029 #ifdef DEBUG
1030 /*
1031  * List all of the locked vnodes in the system.
1032  * Called when debugging the kernel.
1033  */
1034 printlockedvnodes()
1035 {
1036 	register struct mount *mp;
1037 	register struct vnode *vp;
1038 
1039 	printf("Locked vnodes\n");
1040 	mp = rootfs;
1041 	do {
1042 		for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf)
1043 			if (VOP_ISLOCKED(vp))
1044 				vprint((char *)0, vp);
1045 		mp = mp->mnt_next;
1046 	} while (mp != rootfs);
1047 }
1048 #endif
1049 
1050 int kinfo_vdebug = 1;
1051 int kinfo_vgetfailed;
1052 #define KINFO_VNODESLOP	10
1053 /*
1054  * Dump vnode list (via sysctl).
1055  * Copyout address of vnode followed by vnode.
1056  */
1057 /* ARGSUSED */
1058 sysctl_vnode(where, sizep)
1059 	char *where;
1060 	size_t *sizep;
1061 {
1062 	register struct mount *mp = rootfs;
1063 	struct mount *omp;
1064 	struct vnode *vp;
1065 	register char *bp = where, *savebp;
1066 	char *ewhere;
1067 	int error;
1068 
1069 #define VPTRSZ	sizeof (struct vnode *)
1070 #define VNODESZ	sizeof (struct vnode)
1071 	if (where == NULL) {
1072 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1073 		return (0);
1074 	}
1075 	ewhere = where + *sizep;
1076 
1077 	do {
1078 		if (vfs_busy(mp)) {
1079 			mp = mp->mnt_next;
1080 			continue;
1081 		}
1082 		savebp = bp;
1083 again:
1084 		for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
1085 			/*
1086 			 * Check that the vp is still associated with
1087 			 * this filesystem.  RACE: could have been
1088 			 * recycled onto the same filesystem.
1089 			 */
1090 			if (vp->v_mount != mp) {
1091 				if (kinfo_vdebug)
1092 					printf("kinfo: vp changed\n");
1093 				bp = savebp;
1094 				goto again;
1095 			}
1096 			if (bp + VPTRSZ + VNODESZ > ewhere) {
1097 				*sizep = bp - where;
1098 				return (ENOMEM);
1099 			}
1100 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1101 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1102 				return (error);
1103 			bp += VPTRSZ + VNODESZ;
1104 		}
1105 		omp = mp;
1106 		mp = mp->mnt_next;
1107 		vfs_unbusy(omp);
1108 	} while (mp != rootfs);
1109 
1110 	*sizep = bp - where;
1111 	return (0);
1112 }
1113