xref: /original-bsd/sys/kern/vfs_subr.c (revision be1f24e8)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)vfs_subr.c	7.86 (Berkeley) 10/07/92
8  */
9 
10 /*
11  * External virtual filesystem routines
12  */
13 
14 #include <sys/param.h>
15 #include <sys/systm.h>
16 #include <sys/proc.h>
17 #include <sys/mount.h>
18 #include <sys/time.h>
19 #include <sys/vnode.h>
20 #include <sys/stat.h>
21 #include <sys/namei.h>
22 #include <sys/ucred.h>
23 #include <sys/buf.h>
24 #include <sys/errno.h>
25 #include <sys/malloc.h>
26 
27 #include <miscfs/specfs/specdev.h>
28 
29 enum vtype iftovt_tab[16] = {
30 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
31 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
32 };
33 int	vttoif_tab[9] = {
34 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
35 	S_IFSOCK, S_IFIFO, S_IFMT,
36 };
37 
38 /*
39  * Remove a mount point from the list of mounted filesystems.
40  * Unmount of the root is illegal.
41  */
42 void
43 vfs_remove(mp)
44 	register struct mount *mp;
45 {
46 
47 	if (mp == rootfs)
48 		panic("vfs_remove: unmounting root");
49 	mp->mnt_prev->mnt_next = mp->mnt_next;
50 	mp->mnt_next->mnt_prev = mp->mnt_prev;
51 	mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
52 	vfs_unlock(mp);
53 }
54 
55 /*
56  * Lock a filesystem.
57  * Used to prevent access to it while mounting and unmounting.
58  */
59 vfs_lock(mp)
60 	register struct mount *mp;
61 {
62 
63 	while(mp->mnt_flag & MNT_MLOCK) {
64 		mp->mnt_flag |= MNT_MWAIT;
65 		sleep((caddr_t)mp, PVFS);
66 	}
67 	mp->mnt_flag |= MNT_MLOCK;
68 	return (0);
69 }
70 
71 /*
72  * Unlock a locked filesystem.
73  * Panic if filesystem is not locked.
74  */
75 void
76 vfs_unlock(mp)
77 	register struct mount *mp;
78 {
79 
80 	if ((mp->mnt_flag & MNT_MLOCK) == 0)
81 		panic("vfs_unlock: not locked");
82 	mp->mnt_flag &= ~MNT_MLOCK;
83 	if (mp->mnt_flag & MNT_MWAIT) {
84 		mp->mnt_flag &= ~MNT_MWAIT;
85 		wakeup((caddr_t)mp);
86 	}
87 }
88 
89 /*
90  * Mark a mount point as busy.
91  * Used to synchronize access and to delay unmounting.
92  */
93 vfs_busy(mp)
94 	register struct mount *mp;
95 {
96 
97 	while(mp->mnt_flag & MNT_MPBUSY) {
98 		mp->mnt_flag |= MNT_MPWANT;
99 		sleep((caddr_t)&mp->mnt_flag, PVFS);
100 	}
101 	if (mp->mnt_flag & MNT_UNMOUNT)
102 		return (1);
103 	mp->mnt_flag |= MNT_MPBUSY;
104 	return (0);
105 }
106 
107 /*
108  * Free a busy filesystem.
109  * Panic if filesystem is not busy.
110  */
111 vfs_unbusy(mp)
112 	register struct mount *mp;
113 {
114 
115 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
116 		panic("vfs_unbusy: not busy");
117 	mp->mnt_flag &= ~MNT_MPBUSY;
118 	if (mp->mnt_flag & MNT_MPWANT) {
119 		mp->mnt_flag &= ~MNT_MPWANT;
120 		wakeup((caddr_t)&mp->mnt_flag);
121 	}
122 }
123 
124 /*
125  * Lookup a mount point by filesystem identifier.
126  */
127 struct mount *
128 getvfs(fsid)
129 	fsid_t *fsid;
130 {
131 	register struct mount *mp;
132 
133 	mp = rootfs;
134 	do {
135 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
136 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
137 			return (mp);
138 		}
139 		mp = mp->mnt_next;
140 	} while (mp != rootfs);
141 	return ((struct mount *)0);
142 }
143 
144 /*
145  * Get a new unique fsid
146  */
147 void
148 getnewfsid(mp, mtype)
149 	struct mount *mp;
150 	int mtype;
151 {
152 static u_short xxxfs_mntid;
153 
154 	fsid_t tfsid;
155 
156 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + 11, 0);	/* XXX */
157 	mp->mnt_stat.f_fsid.val[1] = mtype;
158 	if (xxxfs_mntid == 0)
159 		++xxxfs_mntid;
160 	tfsid.val[0] = makedev(nblkdev, xxxfs_mntid);
161 	tfsid.val[1] = mtype;
162 	if (rootfs) {
163 		while (getvfs(&tfsid)) {
164 			tfsid.val[0]++;
165 			xxxfs_mntid++;
166 		}
167 	}
168 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
169 }
170 
171 /*
172  * Set vnode attributes to VNOVAL
173  */
174 void vattr_null(vap)
175 	register struct vattr *vap;
176 {
177 
178 	vap->va_type = VNON;
179 	vap->va_size = vap->va_bytes = VNOVAL;
180 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
181 		vap->va_fsid = vap->va_fileid =
182 		vap->va_blocksize = vap->va_rdev =
183 		vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
184 		vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
185 		vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
186 		vap->va_flags = vap->va_gen = VNOVAL;
187 }
188 
189 /*
190  * Routines having to do with the management of the vnode table.
191  */
192 struct vnode *vfreeh, **vfreet = &vfreeh;
193 extern int (**dead_vnodeop_p)();
194 extern void vclean();
195 long numvnodes;
196 extern struct vattr va_null;
197 
198 /*
199  * Return the next vnode from the free list.
200  */
201 getnewvnode(tag, mp, vops, vpp)
202 	enum vtagtype tag;
203 	struct mount *mp;
204 	int (**vops)();
205 	struct vnode **vpp;
206 {
207 	register struct vnode *vp, *vq;
208 
209 	if ((vfreeh == NULL && numvnodes < 2 * desiredvnodes) ||
210 	    numvnodes < desiredvnodes) {
211 		vp = (struct vnode *)malloc((u_long)sizeof *vp,
212 		    M_VNODE, M_WAITOK);
213 		bzero((char *)vp, sizeof *vp);
214 		numvnodes++;
215 	} else {
216 		if ((vp = vfreeh) == NULL) {
217 			tablefull("vnode");
218 			*vpp = 0;
219 			return (ENFILE);
220 		}
221 		if (vp->v_usecount)
222 			panic("free vnode isn't");
223 		if (vq = vp->v_freef)
224 			vq->v_freeb = &vfreeh;
225 		else
226 			vfreet = &vfreeh;
227 		vfreeh = vq;
228 		vp->v_freef = NULL;
229 		vp->v_freeb = NULL;
230 		vp->v_lease = NULL;
231 		if (vp->v_type != VBAD)
232 			vgone(vp);
233 		if (vp->v_data)
234 			panic("cleaned vnode isn't");
235 		vp->v_flag = 0;
236 		vp->v_lastr = 0;
237 		vp->v_socket = 0;
238 	}
239 	vp->v_type = VNON;
240 	cache_purge(vp);
241 	vp->v_tag = tag;
242 	vp->v_op = vops;
243 	insmntque(vp, mp);
244 	VREF(vp);
245 	*vpp = vp;
246 	return (0);
247 }
248 
249 /*
250  * Move a vnode from one mount queue to another.
251  */
252 insmntque(vp, mp)
253 	register struct vnode *vp;
254 	register struct mount *mp;
255 {
256 	register struct vnode *vq;
257 
258 	/*
259 	 * Delete from old mount point vnode list, if on one.
260 	 */
261 	if (vp->v_mountb) {
262 		if (vq = vp->v_mountf)
263 			vq->v_mountb = vp->v_mountb;
264 		*vp->v_mountb = vq;
265 	}
266 	/*
267 	 * Insert into list of vnodes for the new mount point, if available.
268 	 */
269 	vp->v_mount = mp;
270 	if (mp == NULL) {
271 		vp->v_mountf = NULL;
272 		vp->v_mountb = NULL;
273 		return;
274 	}
275 	if (vq = mp->mnt_mounth)
276 		vq->v_mountb = &vp->v_mountf;
277 	vp->v_mountf = vq;
278 	vp->v_mountb = &mp->mnt_mounth;
279 	mp->mnt_mounth = vp;
280 }
281 
282 /*
283  * Update outstanding I/O count and do wakeup if requested.
284  */
285 vwakeup(bp)
286 	register struct buf *bp;
287 {
288 	register struct vnode *vp;
289 
290 	bp->b_dirtyoff = bp->b_dirtyend = 0;
291 	if (vp = bp->b_vp) {
292 		vp->v_numoutput--;
293 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
294 			if (vp->v_numoutput < 0)
295 				panic("vwakeup: neg numoutput");
296 			vp->v_flag &= ~VBWAIT;
297 			wakeup((caddr_t)&vp->v_numoutput);
298 		}
299 	}
300 }
301 
302 /*
303  * Flush out and invalidate all buffers associated with a vnode.
304  * Called with the underlying object locked.
305  */
306 int
307 vinvalbuf(vp, flags, cred, p)
308 	register struct vnode *vp;
309 	int flags;
310 	struct ucred *cred;
311 	struct proc *p;
312 {
313 	register struct buf *bp;
314 	struct buf *nbp, *blist;
315 	int s, error;
316 
317 	if (flags & V_SAVE) {
318 		if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
319 			return (error);
320 		if (vp->v_dirtyblkhd != NULL)
321 			panic("vinvalbuf: dirty bufs");
322 	}
323 	for (;;) {
324 		if ((blist = vp->v_cleanblkhd) && flags & V_SAVEMETA)
325 			while (blist && blist->b_lblkno < 0)
326 				blist = blist->b_blockf;
327 		if (!blist && (blist = vp->v_dirtyblkhd) && flags & V_SAVEMETA)
328 			while (blist && blist->b_lblkno < 0)
329 				blist = blist->b_blockf;
330 		if (!blist)
331 			break;
332 
333 		for (bp = blist; bp; bp = nbp) {
334 			nbp = bp->b_blockf;
335 			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
336 				continue;
337 			s = splbio();
338 			if (bp->b_flags & B_BUSY) {
339 				bp->b_flags |= B_WANTED;
340 				sleep((caddr_t)bp, PRIBIO + 1);
341 				splx(s);
342 				break;
343 			}
344 			bremfree(bp);
345 			bp->b_flags |= B_BUSY;
346 			splx(s);
347 			bp->b_flags |= B_INVAL;
348 			brelse(bp);
349 		}
350 	}
351 	if (!(flags & V_SAVEMETA) && (vp->v_dirtyblkhd || vp->v_cleanblkhd))
352 		panic("vinvalbuf: flush failed");
353 	return (0);
354 }
355 
356 /*
357  * Associate a buffer with a vnode.
358  */
359 bgetvp(vp, bp)
360 	register struct vnode *vp;
361 	register struct buf *bp;
362 {
363 	register struct vnode *vq;
364 	register struct buf *bq;
365 
366 	if (bp->b_vp)
367 		panic("bgetvp: not free");
368 	VHOLD(vp);
369 	bp->b_vp = vp;
370 	if (vp->v_type == VBLK || vp->v_type == VCHR)
371 		bp->b_dev = vp->v_rdev;
372 	else
373 		bp->b_dev = NODEV;
374 	/*
375 	 * Insert onto list for new vnode.
376 	 */
377 	if (bq = vp->v_cleanblkhd)
378 		bq->b_blockb = &bp->b_blockf;
379 	bp->b_blockf = bq;
380 	bp->b_blockb = &vp->v_cleanblkhd;
381 	vp->v_cleanblkhd = bp;
382 }
383 
384 /*
385  * Disassociate a buffer from a vnode.
386  */
387 brelvp(bp)
388 	register struct buf *bp;
389 {
390 	struct buf *bq;
391 	struct vnode *vp;
392 
393 	if (bp->b_vp == (struct vnode *) 0)
394 		panic("brelvp: NULL");
395 	/*
396 	 * Delete from old vnode list, if on one.
397 	 */
398 	if (bp->b_blockb) {
399 		if (bq = bp->b_blockf)
400 			bq->b_blockb = bp->b_blockb;
401 		*bp->b_blockb = bq;
402 		bp->b_blockf = NULL;
403 		bp->b_blockb = NULL;
404 	}
405 	vp = bp->b_vp;
406 	bp->b_vp = (struct vnode *) 0;
407 	HOLDRELE(vp);
408 }
409 
410 /*
411  * Reassign a buffer from one vnode to another.
412  * Used to assign file specific control information
413  * (indirect blocks) to the vnode to which they belong.
414  */
415 reassignbuf(bp, newvp)
416 	register struct buf *bp;
417 	register struct vnode *newvp;
418 {
419 	register struct buf *bq, **listheadp;
420 
421 	if (newvp == NULL) {
422 		printf("reassignbuf: NULL");
423 		return;
424 	}
425 	/*
426 	 * Delete from old vnode list, if on one.
427 	 */
428 	if (bp->b_blockb) {
429 		if (bq = bp->b_blockf)
430 			bq->b_blockb = bp->b_blockb;
431 		*bp->b_blockb = bq;
432 	}
433 	/*
434 	 * If dirty, put on list of dirty buffers;
435 	 * otherwise insert onto list of clean buffers.
436 	 */
437 	if (bp->b_flags & B_DELWRI)
438 		listheadp = &newvp->v_dirtyblkhd;
439 	else
440 		listheadp = &newvp->v_cleanblkhd;
441 	if (bq = *listheadp)
442 		bq->b_blockb = &bp->b_blockf;
443 	bp->b_blockf = bq;
444 	bp->b_blockb = listheadp;
445 	*listheadp = bp;
446 }
447 
448 /*
449  * Create a vnode for a block device.
450  * Used for root filesystem, argdev, and swap areas.
451  * Also used for memory file system special devices.
452  */
453 bdevvp(dev, vpp)
454 	dev_t dev;
455 	struct vnode **vpp;
456 {
457 	register struct vnode *vp;
458 	struct vnode *nvp;
459 	int error;
460 
461 	if (dev == NODEV)
462 		return (0);
463 	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
464 	if (error) {
465 		*vpp = 0;
466 		return (error);
467 	}
468 	vp = nvp;
469 	vp->v_type = VBLK;
470 	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
471 		vput(vp);
472 		vp = nvp;
473 	}
474 	*vpp = vp;
475 	return (0);
476 }
477 
478 /*
479  * Check to see if the new vnode represents a special device
480  * for which we already have a vnode (either because of
481  * bdevvp() or because of a different vnode representing
482  * the same block device). If such an alias exists, deallocate
483  * the existing contents and return the aliased vnode. The
484  * caller is responsible for filling it with its new contents.
485  */
486 struct vnode *
487 checkalias(nvp, nvp_rdev, mp)
488 	register struct vnode *nvp;
489 	dev_t nvp_rdev;
490 	struct mount *mp;
491 {
492 	register struct vnode *vp;
493 	struct vnode **vpp;
494 
495 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
496 		return (NULLVP);
497 
498 	vpp = &speclisth[SPECHASH(nvp_rdev)];
499 loop:
500 	for (vp = *vpp; vp; vp = vp->v_specnext) {
501 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
502 			continue;
503 		/*
504 		 * Alias, but not in use, so flush it out.
505 		 */
506 		if (vp->v_usecount == 0) {
507 			vgone(vp);
508 			goto loop;
509 		}
510 		if (vget(vp))
511 			goto loop;
512 		break;
513 	}
514 	if (vp == NULL || vp->v_tag != VT_NON) {
515 		MALLOC(nvp->v_specinfo, struct specinfo *,
516 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
517 		nvp->v_rdev = nvp_rdev;
518 		nvp->v_hashchain = vpp;
519 		nvp->v_specnext = *vpp;
520 		nvp->v_specflags = 0;
521 		*vpp = nvp;
522 		if (vp != NULL) {
523 			nvp->v_flag |= VALIASED;
524 			vp->v_flag |= VALIASED;
525 			vput(vp);
526 		}
527 		return (NULLVP);
528 	}
529 	VOP_UNLOCK(vp);
530 	vclean(vp, 0);
531 	vp->v_op = nvp->v_op;
532 	vp->v_tag = nvp->v_tag;
533 	nvp->v_type = VNON;
534 	insmntque(vp, mp);
535 	return (vp);
536 }
537 
538 /*
539  * Grab a particular vnode from the free list, increment its
540  * reference count and lock it. The vnode lock bit is set the
541  * vnode is being eliminated in vgone. The process is awakened
542  * when the transition is completed, and an error returned to
543  * indicate that the vnode is no longer usable (possibly having
544  * been changed to a new file system type).
545  */
546 vget(vp)
547 	register struct vnode *vp;
548 {
549 	register struct vnode *vq;
550 
551 	if (vp->v_flag & VXLOCK) {
552 		vp->v_flag |= VXWANT;
553 		sleep((caddr_t)vp, PINOD);
554 		return (1);
555 	}
556 	if (vp->v_usecount == 0) {
557 		if (vq = vp->v_freef)
558 			vq->v_freeb = vp->v_freeb;
559 		else
560 			vfreet = vp->v_freeb;
561 		*vp->v_freeb = vq;
562 		vp->v_freef = NULL;
563 		vp->v_freeb = NULL;
564 	}
565 	VREF(vp);
566 	VOP_LOCK(vp);
567 	return (0);
568 }
569 
570 /*
571  * Vnode reference, just increment the count
572  */
573 void vref(vp)
574 	struct vnode *vp;
575 {
576 
577 	vp->v_usecount++;
578 }
579 
580 /*
581  * vput(), just unlock and vrele()
582  */
583 void vput(vp)
584 	register struct vnode *vp;
585 {
586 
587 	VOP_UNLOCK(vp);
588 	vrele(vp);
589 }
590 
591 /*
592  * Vnode release.
593  * If count drops to zero, call inactive routine and return to freelist.
594  */
595 void vrele(vp)
596 	register struct vnode *vp;
597 {
598 
599 #ifdef DIAGNOSTIC
600 	if (vp == NULL)
601 		panic("vrele: null vp");
602 #endif
603 	vp->v_usecount--;
604 	if (vp->v_usecount > 0)
605 		return;
606 #ifdef DIAGNOSTIC
607 	if (vp->v_usecount != 0 || vp->v_writecount != 0) {
608 		vprint("vrele: bad ref count", vp);
609 		panic("vrele: ref cnt");
610 	}
611 #endif
612 	/*
613 	 * insert at tail of LRU list
614 	 */
615 	*vfreet = vp;
616 	vp->v_freeb = vfreet;
617 	vp->v_freef = NULL;
618 	vfreet = &vp->v_freef;
619 	VOP_INACTIVE(vp);
620 }
621 
622 /*
623  * Page or buffer structure gets a reference.
624  */
625 void vhold(vp)
626 	register struct vnode *vp;
627 {
628 
629 	vp->v_holdcnt++;
630 }
631 
632 /*
633  * Page or buffer structure frees a reference.
634  */
635 void holdrele(vp)
636 	register struct vnode *vp;
637 {
638 
639 	if (vp->v_holdcnt <= 0)
640 		panic("holdrele: holdcnt");
641 	vp->v_holdcnt--;
642 }
643 
644 /*
645  * Remove any vnodes in the vnode table belonging to mount point mp.
646  *
647  * If MNT_NOFORCE is specified, there should not be any active ones,
648  * return error if any are found (nb: this is a user error, not a
649  * system error). If MNT_FORCE is specified, detach any active vnodes
650  * that are found.
651  */
652 int busyprt = 0;	/* patch to print out busy vnodes */
653 
654 vflush(mp, skipvp, flags)
655 	struct mount *mp;
656 	struct vnode *skipvp;
657 	int flags;
658 {
659 	register struct vnode *vp, *nvp;
660 	int busy = 0;
661 
662 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
663 		panic("vflush: not busy");
664 loop:
665 	for (vp = mp->mnt_mounth; vp; vp = nvp) {
666 		if (vp->v_mount != mp)
667 			goto loop;
668 		nvp = vp->v_mountf;
669 		/*
670 		 * Skip over a selected vnode.
671 		 */
672 		if (vp == skipvp)
673 			continue;
674 		/*
675 		 * Skip over a vnodes marked VSYSTEM.
676 		 */
677 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
678 			continue;
679 		/*
680 		 * With v_usecount == 0, all we need to do is clear
681 		 * out the vnode data structures and we are done.
682 		 */
683 		if (vp->v_usecount == 0) {
684 			vgone(vp);
685 			continue;
686 		}
687 		/*
688 		 * For block or character devices, revert to an
689 		 * anonymous device. For all other files, just kill them.
690 		 */
691 		if (flags & FORCECLOSE) {
692 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
693 				vgone(vp);
694 			} else {
695 				vclean(vp, 0);
696 				vp->v_op = spec_vnodeop_p;
697 				insmntque(vp, (struct mount *)0);
698 			}
699 			continue;
700 		}
701 		if (busyprt)
702 			vprint("vflush: busy vnode", vp);
703 		busy++;
704 	}
705 	if (busy)
706 		return (EBUSY);
707 	return (0);
708 }
709 
710 /*
711  * Disassociate the underlying file system from a vnode.
712  */
713 void
714 vclean(vp, flags)
715 	register struct vnode *vp;
716 	int flags;
717 {
718 	struct vop_inactive_args vop_inactive_a;
719 	struct vop_reclaim_args vop_reclaim_a;
720 	struct vop_unlock_args vop_unlock_a;
721 	struct vop_close_args vop_close_a;
722 	int (**origops)();
723 	int active;
724 
725 	/*
726 	 * Check to see if the vnode is in use.
727 	 * If so we have to reference it before we clean it out
728 	 * so that its count cannot fall to zero and generate a
729 	 * race against ourselves to recycle it.
730 	 */
731 	if (active = vp->v_usecount)
732 		VREF(vp);
733 	/*
734 	 * Prevent the vnode from being recycled or
735 	 * brought into use while we clean it out.
736 	 */
737 	if (vp->v_flag & VXLOCK)
738 		panic("vclean: deadlock");
739 	vp->v_flag |= VXLOCK;
740 	/*
741 	 * Even if the count is zero, the VOP_INACTIVE routine may still
742 	 * have the object locked while it cleans it out. The VOP_LOCK
743 	 * ensures that the VOP_INACTIVE routine is done with its work.
744 	 * For active vnodes, it ensures that no other activity can
745 	 * occur while the buffer list is being cleaned out.
746 	 */
747 	VOP_LOCK(vp);
748 	if (flags & DOCLOSE)
749 		vinvalbuf(vp, 1, NOCRED, NULL);
750 	/*
751 	 * Prevent any further operations on the vnode from
752 	 * being passed through to the old file system.
753 	 */
754 	origops = vp->v_op;
755 	vp->v_op = dead_vnodeop_p;
756 	vp->v_tag = VT_NON;
757 	/*
758 	 * If purging an active vnode, it must be unlocked, closed,
759 	 * and deactivated before being reclaimed.
760 	 */
761 	vop_unlock_a.a_desc = VDESC(vop_unlock);
762 	vop_unlock_a.a_vp = vp;
763 	VOCALL(origops,VOFFSET(vop_unlock),&vop_unlock_a);
764 	if (active) {
765 		/*
766 		 * Note: these next two calls imply
767 		 * that vop_close and vop_inactive implementations
768 		 * cannot count on the ops vector being correctly
769 		 * set.
770 		 */
771 		if (flags & DOCLOSE) {
772 			vop_close_a.a_desc = VDESC(vop_close);
773 			vop_close_a.a_vp = vp;
774 			vop_close_a.a_fflag = IO_NDELAY;
775 			vop_close_a.a_p = NULL;
776 			VOCALL(origops,VOFFSET(vop_close),&vop_close_a);
777 		};
778 		vop_inactive_a.a_desc = VDESC(vop_inactive);
779 		vop_inactive_a.a_vp = vp;
780 		VOCALL(origops,VOFFSET(vop_inactive),&vop_inactive_a);
781 	}
782 	/*
783 	 * Reclaim the vnode.
784 	 */
785 	/*
786 	 * Emulate VOP_RECLAIM.
787 	 */
788 	vop_reclaim_a.a_desc = VDESC(vop_reclaim);
789 	vop_reclaim_a.a_vp = vp;
790 	if (VOCALL(origops,VOFFSET(vop_reclaim),&vop_reclaim_a))
791 		panic("vclean: cannot reclaim");
792 	if (active)
793 		vrele(vp);
794 
795 	/*
796 	 * Done with purge, notify sleepers in vget of the grim news.
797 	 */
798 	vp->v_flag &= ~VXLOCK;
799 	if (vp->v_flag & VXWANT) {
800 		vp->v_flag &= ~VXWANT;
801 		wakeup((caddr_t)vp);
802 	}
803 }
804 
805 /*
806  * Eliminate all activity associated with  the requested vnode
807  * and with all vnodes aliased to the requested vnode.
808  */
809 void vgoneall(vp)
810 	register struct vnode *vp;
811 {
812 	register struct vnode *vq;
813 
814 	if (vp->v_flag & VALIASED) {
815 		/*
816 		 * If a vgone (or vclean) is already in progress,
817 		 * wait until it is done and return.
818 		 */
819 		if (vp->v_flag & VXLOCK) {
820 			vp->v_flag |= VXWANT;
821 			sleep((caddr_t)vp, PINOD);
822 			return;
823 		}
824 		/*
825 		 * Ensure that vp will not be vgone'd while we
826 		 * are eliminating its aliases.
827 		 */
828 		vp->v_flag |= VXLOCK;
829 		while (vp->v_flag & VALIASED) {
830 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
831 				if (vq->v_rdev != vp->v_rdev ||
832 				    vq->v_type != vp->v_type || vp == vq)
833 					continue;
834 				vgone(vq);
835 				break;
836 			}
837 		}
838 		/*
839 		 * Remove the lock so that vgone below will
840 		 * really eliminate the vnode after which time
841 		 * vgone will awaken any sleepers.
842 		 */
843 		vp->v_flag &= ~VXLOCK;
844 	}
845 	vgone(vp);
846 }
847 
848 /*
849  * Eliminate all activity associated with a vnode
850  * in preparation for reuse.
851  */
852 void vgone(vp)
853 	register struct vnode *vp;
854 {
855 	register struct vnode *vq;
856 	struct vnode *vx;
857 
858 	/*
859 	 * If a vgone (or vclean) is already in progress,
860 	 * wait until it is done and return.
861 	 */
862 	if (vp->v_flag & VXLOCK) {
863 		vp->v_flag |= VXWANT;
864 		sleep((caddr_t)vp, PINOD);
865 		return;
866 	}
867 	/*
868 	 * Clean out the filesystem specific data.
869 	 */
870 	vclean(vp, DOCLOSE);
871 	/*
872 	 * Delete from old mount point vnode list, if on one.
873 	 */
874 	if (vp->v_mountb) {
875 		if (vq = vp->v_mountf)
876 			vq->v_mountb = vp->v_mountb;
877 		*vp->v_mountb = vq;
878 		vp->v_mountf = NULL;
879 		vp->v_mountb = NULL;
880 		vp->v_mount = NULL;
881 	}
882 	/*
883 	 * If special device, remove it from special device alias list.
884 	 */
885 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
886 		if (*vp->v_hashchain == vp) {
887 			*vp->v_hashchain = vp->v_specnext;
888 		} else {
889 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
890 				if (vq->v_specnext != vp)
891 					continue;
892 				vq->v_specnext = vp->v_specnext;
893 				break;
894 			}
895 			if (vq == NULL)
896 				panic("missing bdev");
897 		}
898 		if (vp->v_flag & VALIASED) {
899 			vx = NULL;
900 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
901 				if (vq->v_rdev != vp->v_rdev ||
902 				    vq->v_type != vp->v_type)
903 					continue;
904 				if (vx)
905 					break;
906 				vx = vq;
907 			}
908 			if (vx == NULL)
909 				panic("missing alias");
910 			if (vq == NULL)
911 				vx->v_flag &= ~VALIASED;
912 			vp->v_flag &= ~VALIASED;
913 		}
914 		FREE(vp->v_specinfo, M_VNODE);
915 		vp->v_specinfo = NULL;
916 	}
917 	/*
918 	 * If it is on the freelist, move it to the head of the list.
919 	 */
920 	if (vp->v_freeb) {
921 		if (vq = vp->v_freef)
922 			vq->v_freeb = vp->v_freeb;
923 		else
924 			vfreet = vp->v_freeb;
925 		*vp->v_freeb = vq;
926 		vp->v_freef = vfreeh;
927 		vp->v_freeb = &vfreeh;
928 		vfreeh->v_freeb = &vp->v_freef;
929 		vfreeh = vp;
930 	}
931 	vp->v_type = VBAD;
932 }
933 
934 /*
935  * Lookup a vnode by device number.
936  */
937 vfinddev(dev, type, vpp)
938 	dev_t dev;
939 	enum vtype type;
940 	struct vnode **vpp;
941 {
942 	register struct vnode *vp;
943 
944 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
945 		if (dev != vp->v_rdev || type != vp->v_type)
946 			continue;
947 		*vpp = vp;
948 		return (0);
949 	}
950 	return (1);
951 }
952 
953 /*
954  * Calculate the total number of references to a special device.
955  */
956 vcount(vp)
957 	register struct vnode *vp;
958 {
959 	register struct vnode *vq;
960 	int count;
961 
962 	if ((vp->v_flag & VALIASED) == 0)
963 		return (vp->v_usecount);
964 loop:
965 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
966 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
967 			continue;
968 		/*
969 		 * Alias, but not in use, so flush it out.
970 		 */
971 		if (vq->v_usecount == 0) {
972 			vgone(vq);
973 			goto loop;
974 		}
975 		count += vq->v_usecount;
976 	}
977 	return (count);
978 }
979 
980 /*
981  * Print out a description of a vnode.
982  */
983 static char *typename[] =
984    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
985 
986 vprint(label, vp)
987 	char *label;
988 	register struct vnode *vp;
989 {
990 	char buf[64];
991 
992 	if (label != NULL)
993 		printf("%s: ", label);
994 	printf("type %s, usecount %d, writecount %d, refcount %d,",
995 		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
996 		vp->v_holdcnt);
997 	buf[0] = '\0';
998 	if (vp->v_flag & VROOT)
999 		strcat(buf, "|VROOT");
1000 	if (vp->v_flag & VTEXT)
1001 		strcat(buf, "|VTEXT");
1002 	if (vp->v_flag & VSYSTEM)
1003 		strcat(buf, "|VSYSTEM");
1004 	if (vp->v_flag & VXLOCK)
1005 		strcat(buf, "|VXLOCK");
1006 	if (vp->v_flag & VXWANT)
1007 		strcat(buf, "|VXWANT");
1008 	if (vp->v_flag & VBWAIT)
1009 		strcat(buf, "|VBWAIT");
1010 	if (vp->v_flag & VALIASED)
1011 		strcat(buf, "|VALIASED");
1012 	if (buf[0] != '\0')
1013 		printf(" flags (%s)", &buf[1]);
1014 	printf("\n\t");
1015 	VOP_PRINT(vp);
1016 }
1017 
1018 #ifdef DEBUG
1019 /*
1020  * List all of the locked vnodes in the system.
1021  * Called when debugging the kernel.
1022  */
1023 printlockedvnodes()
1024 {
1025 	register struct mount *mp;
1026 	register struct vnode *vp;
1027 
1028 	printf("Locked vnodes\n");
1029 	mp = rootfs;
1030 	do {
1031 		for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf)
1032 			if (VOP_ISLOCKED(vp))
1033 				vprint((char *)0, vp);
1034 		mp = mp->mnt_next;
1035 	} while (mp != rootfs);
1036 }
1037 #endif
1038 
1039 int kinfo_vdebug = 1;
1040 int kinfo_vgetfailed;
1041 #define KINFO_VNODESLOP	10
1042 /*
1043  * Dump vnode list (via kinfo).
1044  * Copyout address of vnode followed by vnode.
1045  */
1046 /* ARGSUSED */
1047 kinfo_vnode(op, where, acopysize, arg, aneeded)
1048 	int op;
1049 	char *where;
1050 	int *acopysize, arg, *aneeded;
1051 {
1052 	register struct mount *mp = rootfs;
1053 	struct mount *omp;
1054 	struct vnode *vp;
1055 	register char *bp = where, *savebp;
1056 	char *ewhere;
1057 	int error;
1058 
1059 #define VPTRSZ	sizeof (struct vnode *)
1060 #define VNODESZ	sizeof (struct vnode)
1061 	if (where == NULL) {
1062 		*aneeded = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1063 		return (0);
1064 	}
1065 	ewhere = where + *acopysize;
1066 
1067 	do {
1068 		if (vfs_busy(mp)) {
1069 			mp = mp->mnt_next;
1070 			continue;
1071 		}
1072 		savebp = bp;
1073 again:
1074 		for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
1075 			/*
1076 			 * Check that the vp is still associated with
1077 			 * this filesystem.  RACE: could have been
1078 			 * recycled onto the same filesystem.
1079 			 */
1080 			if (vp->v_mount != mp) {
1081 				if (kinfo_vdebug)
1082 					printf("kinfo: vp changed\n");
1083 				bp = savebp;
1084 				goto again;
1085 			}
1086 			if ((bp + VPTRSZ + VNODESZ <= ewhere) &&
1087 			    ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1088 			     (error = copyout((caddr_t)vp, bp + VPTRSZ,
1089 			      VNODESZ))))
1090 				return (error);
1091 			bp += VPTRSZ + VNODESZ;
1092 		}
1093 		omp = mp;
1094 		mp = mp->mnt_next;
1095 		vfs_unbusy(omp);
1096 	} while (mp != rootfs);
1097 
1098 	*aneeded = bp - where;
1099 	if (bp > ewhere)
1100 		*acopysize = ewhere - where;
1101 	else
1102 		*acopysize = bp - where;
1103 	return (0);
1104 }
1105