xref: /original-bsd/sys/kern/vfs_subr.c (revision 753853ba)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)vfs_subr.c	7.70 (Berkeley) 02/25/92
8  */
9 
10 /*
11  * External virtual filesystem routines
12  */
13 
14 #include <sys/param.h>
15 #include <sys/proc.h>
16 #include <sys/mount.h>
17 #include <sys/time.h>
18 #include <sys/vnode.h>
19 #include <sys/stat.h>
20 #include <sys/specdev.h>
21 #include <sys/namei.h>
22 #include <sys/ucred.h>
23 #include <sys/buf.h>
24 #include <sys/errno.h>
25 #include <sys/malloc.h>
26 
27 enum vtype iftovt_tab[16] = {
28 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
29 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
30 };
31 int	vttoif_tab[9] = {
32 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
33 	S_IFSOCK, S_IFIFO, S_IFMT,
34 };
35 
36 /*
37  * Remove a mount point from the list of mounted filesystems.
38  * Unmount of the root is illegal.
39  */
40 void
41 vfs_remove(mp)
42 	register struct mount *mp;
43 {
44 
45 	if (mp == rootfs)
46 		panic("vfs_remove: unmounting root");
47 	mp->mnt_prev->mnt_next = mp->mnt_next;
48 	mp->mnt_next->mnt_prev = mp->mnt_prev;
49 	mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
50 	vfs_unlock(mp);
51 }
52 
53 /*
54  * Lock a filesystem.
55  * Used to prevent access to it while mounting and unmounting.
56  */
57 vfs_lock(mp)
58 	register struct mount *mp;
59 {
60 
61 	while(mp->mnt_flag & MNT_MLOCK) {
62 		mp->mnt_flag |= MNT_MWAIT;
63 		sleep((caddr_t)mp, PVFS);
64 	}
65 	mp->mnt_flag |= MNT_MLOCK;
66 	return (0);
67 }
68 
69 /*
70  * Unlock a locked filesystem.
71  * Panic if filesystem is not locked.
72  */
73 void
74 vfs_unlock(mp)
75 	register struct mount *mp;
76 {
77 
78 	if ((mp->mnt_flag & MNT_MLOCK) == 0)
79 		panic("vfs_unlock: not locked");
80 	mp->mnt_flag &= ~MNT_MLOCK;
81 	if (mp->mnt_flag & MNT_MWAIT) {
82 		mp->mnt_flag &= ~MNT_MWAIT;
83 		wakeup((caddr_t)mp);
84 	}
85 }
86 
87 /*
88  * Mark a mount point as busy.
89  * Used to synchronize access and to delay unmounting.
90  */
91 vfs_busy(mp)
92 	register struct mount *mp;
93 {
94 
95 	while(mp->mnt_flag & MNT_MPBUSY) {
96 		mp->mnt_flag |= MNT_MPWANT;
97 		sleep((caddr_t)&mp->mnt_flag, PVFS);
98 	}
99 	if (mp->mnt_flag & MNT_UNMOUNT)
100 		return (1);
101 	mp->mnt_flag |= MNT_MPBUSY;
102 	return (0);
103 }
104 
105 /*
106  * Free a busy filesystem.
107  * Panic if filesystem is not busy.
108  */
109 vfs_unbusy(mp)
110 	register struct mount *mp;
111 {
112 
113 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
114 		panic("vfs_unbusy: not busy");
115 	mp->mnt_flag &= ~MNT_MPBUSY;
116 	if (mp->mnt_flag & MNT_MPWANT) {
117 		mp->mnt_flag &= ~MNT_MPWANT;
118 		wakeup((caddr_t)&mp->mnt_flag);
119 	}
120 }
121 
122 /*
123  * Lookup a mount point by filesystem identifier.
124  */
125 struct mount *
126 getvfs(fsid)
127 	fsid_t *fsid;
128 {
129 	register struct mount *mp;
130 
131 	mp = rootfs;
132 	do {
133 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
134 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
135 			return (mp);
136 		}
137 		mp = mp->mnt_next;
138 	} while (mp != rootfs);
139 	return ((struct mount *)0);
140 }
141 
142 /*
143  * Set vnode attributes to VNOVAL
144  */
145 void vattr_null(vap)
146 	register struct vattr *vap;
147 {
148 
149 	vap->va_type = VNON;
150 	vap->va_size = vap->va_bytes = VNOVAL;
151 #ifdef _NOQUAD
152 	vap->va_size_rsv = vap->va_bytes_rsv = VNOVAL;
153 #endif
154 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
155 		vap->va_fsid = vap->va_fileid =
156 		vap->va_blocksize = vap->va_rdev =
157 		vap->va_atime.tv_sec = vap->va_atime.tv_usec =
158 		vap->va_mtime.tv_sec = vap->va_mtime.tv_usec =
159 		vap->va_ctime.tv_sec = vap->va_ctime.tv_usec =
160 		vap->va_flags = vap->va_gen = VNOVAL;
161 }
162 
163 /*
164  * Routines having to do with the management of the vnode table.
165  */
166 struct vnode *vfreeh, **vfreet;
167 extern struct vnodeops dead_vnodeops, spec_vnodeops;
168 extern void vclean();
169 long numvnodes;
170 struct vattr va_null;
171 
172 /*
173  * Initialize the vnode structures and initialize each file system type.
174  */
175 vfsinit()
176 {
177 	struct vfsops **vfsp;
178 
179 	/*
180 	 * Initialize the vnode name cache
181 	 */
182 	nchinit();
183 	/*
184 	 * Initialize each file system type.
185 	 */
186 	vattr_null(&va_null);
187 	for (vfsp = &vfssw[0]; vfsp <= &vfssw[MOUNT_MAXTYPE]; vfsp++) {
188 		if (*vfsp == NULL)
189 			continue;
190 		(*(*vfsp)->vfs_init)();
191 	}
192 }
193 
194 /*
195  * Return the next vnode from the free list.
196  */
197 getnewvnode(tag, mp, vops, vpp)
198 	enum vtagtype tag;
199 	struct mount *mp;
200 	struct vnodeops *vops;
201 	struct vnode **vpp;
202 {
203 	register struct vnode *vp, *vq;
204 
205 	if (numvnodes < desiredvnodes) {
206 		vp = (struct vnode *)malloc((u_long)sizeof *vp,
207 		    M_VNODE, M_WAITOK);
208 		bzero((char *)vp, sizeof *vp);
209 		numvnodes++;
210 	} else {
211 		if ((vp = vfreeh) == NULL) {
212 			tablefull("vnode");
213 			*vpp = 0;
214 			return (ENFILE);
215 		}
216 		if (vp->v_usecount)
217 			panic("free vnode isn't");
218 		if (vq = vp->v_freef)
219 			vq->v_freeb = &vfreeh;
220 		else
221 			vfreet = &vfreeh;
222 		vfreeh = vq;
223 		vp->v_freef = NULL;
224 		vp->v_freeb = NULL;
225 		vp->v_lease = NULL;
226 		if (vp->v_type != VBAD)
227 			vgone(vp);
228 		if (vp->v_data)
229 			panic("cleaned vnode isn't");
230 		vp->v_flag = 0;
231 		vp->v_lastr = 0;
232 		vp->v_socket = 0;
233 	}
234 	vp->v_type = VNON;
235 	cache_purge(vp);
236 	vp->v_tag = tag;
237 	vp->v_op = vops;
238 	insmntque(vp, mp);
239 	VREF(vp);
240 	*vpp = vp;
241 	return (0);
242 }
243 
244 /*
245  * Move a vnode from one mount queue to another.
246  */
247 insmntque(vp, mp)
248 	register struct vnode *vp;
249 	register struct mount *mp;
250 {
251 	register struct vnode *vq;
252 
253 	/*
254 	 * Delete from old mount point vnode list, if on one.
255 	 */
256 	if (vp->v_mountb) {
257 		if (vq = vp->v_mountf)
258 			vq->v_mountb = vp->v_mountb;
259 		*vp->v_mountb = vq;
260 	}
261 	/*
262 	 * Insert into list of vnodes for the new mount point, if available.
263 	 */
264 	vp->v_mount = mp;
265 	if (mp == NULL) {
266 		vp->v_mountf = NULL;
267 		vp->v_mountb = NULL;
268 		return;
269 	}
270 	if (vq = mp->mnt_mounth)
271 		vq->v_mountb = &vp->v_mountf;
272 	vp->v_mountf = vq;
273 	vp->v_mountb = &mp->mnt_mounth;
274 	mp->mnt_mounth = vp;
275 }
276 
277 /*
278  * Make sure all write-behind blocks associated
279  * with mount point are flushed out (from sync).
280  */
281 mntflushbuf(mountp, flags)
282 	struct mount *mountp;
283 	int flags;
284 {
285 	register struct vnode *vp;
286 
287 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
288 		panic("mntflushbuf: not busy");
289 loop:
290 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
291 		if (VOP_ISLOCKED(vp))
292 			continue;
293 		if (vget(vp))
294 			goto loop;
295 		vflushbuf(vp, flags);
296 		vput(vp);
297 		if (vp->v_mount != mountp)
298 			goto loop;
299 	}
300 }
301 
302 /*
303  * Flush all dirty buffers associated with a vnode.
304  */
305 vflushbuf(vp, flags)
306 	register struct vnode *vp;
307 	int flags;
308 {
309 	register struct buf *bp;
310 	struct buf *nbp;
311 	int s;
312 
313 loop:
314 	s = splbio();
315 	for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
316 		nbp = bp->b_blockf;
317 		if ((bp->b_flags & B_BUSY))
318 			continue;
319 		if ((bp->b_flags & B_DELWRI) == 0)
320 			panic("vflushbuf: not dirty");
321 		bremfree(bp);
322 		bp->b_flags |= B_BUSY;
323 		splx(s);
324 		/*
325 		 * Wait for I/O associated with indirect blocks to complete,
326 		 * since there is no way to quickly wait for them below.
327 		 * NB: This is really specific to ufs, but is done here
328 		 * as it is easier and quicker.
329 		 */
330 		if (bp->b_vp == vp || (flags & B_SYNC) == 0)
331 			(void) bawrite(bp);
332 		else
333 			(void) bwrite(bp);
334 		goto loop;
335 	}
336 	splx(s);
337 	if ((flags & B_SYNC) == 0)
338 		return;
339 	s = splbio();
340 	while (vp->v_numoutput) {
341 		vp->v_flag |= VBWAIT;
342 		sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
343 	}
344 	splx(s);
345 	if (vp->v_dirtyblkhd) {
346 		vprint("vflushbuf: dirty", vp);
347 		goto loop;
348 	}
349 }
350 
351 /*
352  * Update outstanding I/O count and do wakeup if requested.
353  */
354 vwakeup(bp)
355 	register struct buf *bp;
356 {
357 	register struct vnode *vp;
358 
359 	bp->b_dirtyoff = bp->b_dirtyend = 0;
360 	if (vp = bp->b_vp) {
361 		vp->v_numoutput--;
362 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
363 			if (vp->v_numoutput < 0)
364 				panic("vwakeup: neg numoutput");
365 			vp->v_flag &= ~VBWAIT;
366 			wakeup((caddr_t)&vp->v_numoutput);
367 		}
368 	}
369 }
370 
371 /*
372  * Invalidate in core blocks belonging to closed or umounted filesystem
373  *
374  * Go through the list of vnodes associated with the file system;
375  * for each vnode invalidate any buffers that it holds. Normally
376  * this routine is preceeded by a bflush call, so that on a quiescent
377  * filesystem there will be no dirty buffers when we are done. Binval
378  * returns the count of dirty buffers when it is finished.
379  */
380 mntinvalbuf(mountp)
381 	struct mount *mountp;
382 {
383 	register struct vnode *vp;
384 	int dirty = 0;
385 
386 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
387 		panic("mntinvalbuf: not busy");
388 loop:
389 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
390 		if (vget(vp))
391 			goto loop;
392 		dirty += vinvalbuf(vp, 1);
393 		vput(vp);
394 		if (vp->v_mount != mountp)
395 			goto loop;
396 	}
397 	return (dirty);
398 }
399 
400 /*
401  * Flush out and invalidate all buffers associated with a vnode.
402  * Called with the underlying object locked.
403  */
404 vinvalbuf(vp, save)
405 	register struct vnode *vp;
406 	int save;
407 {
408 	register struct buf *bp;
409 	struct buf *nbp, *blist;
410 	int s, dirty = 0;
411 
412 	for (;;) {
413 		if (blist = vp->v_dirtyblkhd)
414 			/* void */;
415 		else if (blist = vp->v_cleanblkhd)
416 			/* void */;
417 		else
418 			break;
419 		for (bp = blist; bp; bp = nbp) {
420 			nbp = bp->b_blockf;
421 			s = splbio();
422 			if (bp->b_flags & B_BUSY) {
423 				bp->b_flags |= B_WANTED;
424 				sleep((caddr_t)bp, PRIBIO + 1);
425 				splx(s);
426 				break;
427 			}
428 			bremfree(bp);
429 			bp->b_flags |= B_BUSY;
430 			splx(s);
431 			if (save && (bp->b_flags & B_DELWRI)) {
432 				dirty++;
433 				(void) VOP_BWRITE(bp);
434 				break;
435 			}
436 			if (bp->b_vp != vp)
437 				reassignbuf(bp, bp->b_vp);
438 			else
439 				bp->b_flags |= B_INVAL;
440 			brelse(bp);
441 		}
442 	}
443 	if (vp->v_dirtyblkhd || vp->v_cleanblkhd)
444 		panic("vinvalbuf: flush failed");
445 	return (dirty);
446 }
447 
448 /*
449  * Associate a buffer with a vnode.
450  */
451 bgetvp(vp, bp)
452 	register struct vnode *vp;
453 	register struct buf *bp;
454 {
455 	register struct vnode *vq;
456 	register struct buf *bq;
457 
458 	if (bp->b_vp)
459 		panic("bgetvp: not free");
460 	VHOLD(vp);
461 	bp->b_vp = vp;
462 	if (vp->v_type == VBLK || vp->v_type == VCHR)
463 		bp->b_dev = vp->v_rdev;
464 	else
465 		bp->b_dev = NODEV;
466 	/*
467 	 * Insert onto list for new vnode.
468 	 */
469 	if (bq = vp->v_cleanblkhd)
470 		bq->b_blockb = &bp->b_blockf;
471 	bp->b_blockf = bq;
472 	bp->b_blockb = &vp->v_cleanblkhd;
473 	vp->v_cleanblkhd = bp;
474 }
475 
476 /*
477  * Disassociate a buffer from a vnode.
478  */
479 brelvp(bp)
480 	register struct buf *bp;
481 {
482 	struct buf *bq;
483 	struct vnode *vp;
484 
485 	if (bp->b_vp == (struct vnode *) 0)
486 		panic("brelvp: NULL");
487 	/*
488 	 * Delete from old vnode list, if on one.
489 	 */
490 	if (bp->b_blockb) {
491 		if (bq = bp->b_blockf)
492 			bq->b_blockb = bp->b_blockb;
493 		*bp->b_blockb = bq;
494 		bp->b_blockf = NULL;
495 		bp->b_blockb = NULL;
496 	}
497 	vp = bp->b_vp;
498 	bp->b_vp = (struct vnode *) 0;
499 	HOLDRELE(vp);
500 }
501 
502 /*
503  * Reassign a buffer from one vnode to another.
504  * Used to assign file specific control information
505  * (indirect blocks) to the vnode to which they belong.
506  */
507 reassignbuf(bp, newvp)
508 	register struct buf *bp;
509 	register struct vnode *newvp;
510 {
511 	register struct buf *bq, **listheadp;
512 
513 	if (newvp == NULL) {
514 		printf("reassignbuf: NULL");
515 		return;
516 	}
517 	/*
518 	 * Delete from old vnode list, if on one.
519 	 */
520 	if (bp->b_blockb) {
521 		if (bq = bp->b_blockf)
522 			bq->b_blockb = bp->b_blockb;
523 		*bp->b_blockb = bq;
524 	}
525 	/*
526 	 * If dirty, put on list of dirty buffers;
527 	 * otherwise insert onto list of clean buffers.
528 	 */
529 	if (bp->b_flags & B_DELWRI)
530 		listheadp = &newvp->v_dirtyblkhd;
531 	else
532 		listheadp = &newvp->v_cleanblkhd;
533 	if (bq = *listheadp)
534 		bq->b_blockb = &bp->b_blockf;
535 	bp->b_blockf = bq;
536 	bp->b_blockb = listheadp;
537 	*listheadp = bp;
538 }
539 
540 /*
541  * Create a vnode for a block device.
542  * Used for root filesystem, argdev, and swap areas.
543  * Also used for memory file system special devices.
544  */
545 bdevvp(dev, vpp)
546 	dev_t dev;
547 	struct vnode **vpp;
548 {
549 	register struct vnode *vp;
550 	struct vnode *nvp;
551 	int error;
552 
553 	if (dev == NODEV)
554 		return (0);
555 	error = getnewvnode(VT_NON, (struct mount *)0, &spec_vnodeops, &nvp);
556 	if (error) {
557 		*vpp = 0;
558 		return (error);
559 	}
560 	vp = nvp;
561 	vp->v_type = VBLK;
562 	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
563 		vput(vp);
564 		vp = nvp;
565 	}
566 	*vpp = vp;
567 	return (0);
568 }
569 
570 /*
571  * Check to see if the new vnode represents a special device
572  * for which we already have a vnode (either because of
573  * bdevvp() or because of a different vnode representing
574  * the same block device). If such an alias exists, deallocate
575  * the existing contents and return the aliased vnode. The
576  * caller is responsible for filling it with its new contents.
577  */
578 struct vnode *
579 checkalias(nvp, nvp_rdev, mp)
580 	register struct vnode *nvp;
581 	dev_t nvp_rdev;
582 	struct mount *mp;
583 {
584 	register struct vnode *vp;
585 	struct vnode **vpp;
586 
587 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
588 		return (NULLVP);
589 
590 	vpp = &speclisth[SPECHASH(nvp_rdev)];
591 loop:
592 	for (vp = *vpp; vp; vp = vp->v_specnext) {
593 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
594 			continue;
595 		/*
596 		 * Alias, but not in use, so flush it out.
597 		 */
598 		if (vp->v_usecount == 0) {
599 			vgone(vp);
600 			goto loop;
601 		}
602 		if (vget(vp))
603 			goto loop;
604 		break;
605 	}
606 	if (vp == NULL || vp->v_tag != VT_NON) {
607 		MALLOC(nvp->v_specinfo, struct specinfo *,
608 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
609 		nvp->v_rdev = nvp_rdev;
610 		nvp->v_hashchain = vpp;
611 		nvp->v_specnext = *vpp;
612 		nvp->v_specflags = 0;
613 		*vpp = nvp;
614 		if (vp != NULL) {
615 			nvp->v_flag |= VALIASED;
616 			vp->v_flag |= VALIASED;
617 			vput(vp);
618 		}
619 		return (NULLVP);
620 	}
621 	VOP_UNLOCK(vp);
622 	vclean(vp, 0);
623 	vp->v_op = nvp->v_op;
624 	vp->v_tag = nvp->v_tag;
625 	nvp->v_type = VNON;
626 	insmntque(vp, mp);
627 	return (vp);
628 }
629 
630 /*
631  * Grab a particular vnode from the free list, increment its
632  * reference count and lock it. The vnode lock bit is set the
633  * vnode is being eliminated in vgone. The process is awakened
634  * when the transition is completed, and an error returned to
635  * indicate that the vnode is no longer usable (possibly having
636  * been changed to a new file system type).
637  */
638 vget(vp)
639 	register struct vnode *vp;
640 {
641 	register struct vnode *vq;
642 
643 	if (vp->v_flag & VXLOCK) {
644 		vp->v_flag |= VXWANT;
645 		sleep((caddr_t)vp, PINOD);
646 		return (1);
647 	}
648 	if (vp->v_usecount == 0) {
649 		if (vq = vp->v_freef)
650 			vq->v_freeb = vp->v_freeb;
651 		else
652 			vfreet = vp->v_freeb;
653 		*vp->v_freeb = vq;
654 		vp->v_freef = NULL;
655 		vp->v_freeb = NULL;
656 	}
657 	VREF(vp);
658 	VOP_LOCK(vp);
659 	return (0);
660 }
661 
662 /*
663  * Vnode reference, just increment the count
664  */
665 void vref(vp)
666 	struct vnode *vp;
667 {
668 
669 	vp->v_usecount++;
670 }
671 
672 /*
673  * vput(), just unlock and vrele()
674  */
675 void vput(vp)
676 	register struct vnode *vp;
677 {
678 
679 	VOP_UNLOCK(vp);
680 	vrele(vp);
681 }
682 
683 /*
684  * Vnode release.
685  * If count drops to zero, call inactive routine and return to freelist.
686  */
687 void vrele(vp)
688 	register struct vnode *vp;
689 {
690 	struct proc *p = curproc;		/* XXX */
691 
692 #ifdef DIAGNOSTIC
693 	if (vp == NULL)
694 		panic("vrele: null vp");
695 #endif
696 	vp->v_usecount--;
697 	if (vp->v_usecount > 0)
698 		return;
699 #ifdef DIAGNOSTIC
700 	if (vp->v_usecount != 0 || vp->v_writecount != 0) {
701 		vprint("vrele: bad ref count", vp);
702 		panic("vrele: ref cnt");
703 	}
704 #endif
705 	if (vfreeh == NULLVP) {
706 		/*
707 		 * insert into empty list
708 		 */
709 		vfreeh = vp;
710 		vp->v_freeb = &vfreeh;
711 	} else {
712 		/*
713 		 * insert at tail of list
714 		 */
715 		*vfreet = vp;
716 		vp->v_freeb = vfreet;
717 	}
718 	vp->v_freef = NULL;
719 	vfreet = &vp->v_freef;
720 	VOP_INACTIVE(vp, p);
721 }
722 
723 /*
724  * Page or buffer structure gets a reference.
725  */
726 vhold(vp)
727 	register struct vnode *vp;
728 {
729 
730 	vp->v_holdcnt++;
731 }
732 
733 /*
734  * Page or buffer structure frees a reference.
735  */
736 holdrele(vp)
737 	register struct vnode *vp;
738 {
739 
740 	if (vp->v_holdcnt <= 0)
741 		panic("holdrele: holdcnt");
742 	vp->v_holdcnt--;
743 }
744 
745 /*
746  * Remove any vnodes in the vnode table belonging to mount point mp.
747  *
748  * If MNT_NOFORCE is specified, there should not be any active ones,
749  * return error if any are found (nb: this is a user error, not a
750  * system error). If MNT_FORCE is specified, detach any active vnodes
751  * that are found.
752  */
753 int busyprt = 0;	/* patch to print out busy vnodes */
754 
755 vflush(mp, skipvp, flags)
756 	struct mount *mp;
757 	struct vnode *skipvp;
758 	int flags;
759 {
760 	register struct vnode *vp, *nvp;
761 	int busy = 0;
762 
763 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
764 		panic("vflush: not busy");
765 loop:
766 	for (vp = mp->mnt_mounth; vp; vp = nvp) {
767 		if (vp->v_mount != mp)
768 			goto loop;
769 		nvp = vp->v_mountf;
770 		/*
771 		 * Skip over a selected vnode.
772 		 */
773 		if (vp == skipvp)
774 			continue;
775 		/*
776 		 * Skip over a vnodes marked VSYSTEM.
777 		 */
778 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
779 			continue;
780 		/*
781 		 * With v_usecount == 0, all we need to do is clear
782 		 * out the vnode data structures and we are done.
783 		 */
784 		if (vp->v_usecount == 0) {
785 			vgone(vp);
786 			continue;
787 		}
788 		/*
789 		 * For block or character devices, revert to an
790 		 * anonymous device. For all other files, just kill them.
791 		 */
792 		if (flags & FORCECLOSE) {
793 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
794 				vgone(vp);
795 			} else {
796 				vclean(vp, 0);
797 				vp->v_op = &spec_vnodeops;
798 				insmntque(vp, (struct mount *)0);
799 			}
800 			continue;
801 		}
802 		if (busyprt)
803 			vprint("vflush: busy vnode", vp);
804 		busy++;
805 	}
806 	if (busy)
807 		return (EBUSY);
808 	return (0);
809 }
810 
811 /*
812  * Disassociate the underlying file system from a vnode.
813  */
814 void vclean(vp, flags)
815 	register struct vnode *vp;
816 	int flags;
817 {
818 	struct vnodeops *origops;
819 	int active;
820 	struct proc *p = curproc;	/* XXX */
821 
822 	/*
823 	 * Check to see if the vnode is in use.
824 	 * If so we have to reference it before we clean it out
825 	 * so that its count cannot fall to zero and generate a
826 	 * race against ourselves to recycle it.
827 	 */
828 	if (active = vp->v_usecount)
829 		VREF(vp);
830 	/*
831 	 * Prevent the vnode from being recycled or
832 	 * brought into use while we clean it out.
833 	 */
834 	if (vp->v_flag & VXLOCK)
835 		panic("vclean: deadlock");
836 	vp->v_flag |= VXLOCK;
837 	/*
838 	 * Even if the count is zero, the VOP_INACTIVE routine may still
839 	 * have the object locked while it cleans it out. The VOP_LOCK
840 	 * ensures that the VOP_INACTIVE routine is done with its work.
841 	 * For active vnodes, it ensures that no other activity can
842 	 * occur while the buffer list is being cleaned out.
843 	 */
844 	VOP_LOCK(vp);
845 	if (flags & DOCLOSE)
846 		vinvalbuf(vp, 1);
847 	/*
848 	 * Prevent any further operations on the vnode from
849 	 * being passed through to the old file system.
850 	 */
851 	origops = vp->v_op;
852 	vp->v_op = &dead_vnodeops;
853 	vp->v_tag = VT_NON;
854 	/*
855 	 * If purging an active vnode, it must be unlocked, closed,
856 	 * and deactivated before being reclaimed.
857 	 */
858 	(*(origops->vop_unlock))(vp);
859 	if (active) {
860 		if (flags & DOCLOSE)
861 			(*(origops->vop_close))(vp, IO_NDELAY, NOCRED, p);
862 		(*(origops->vop_inactive))(vp, p);
863 	}
864 	/*
865 	 * Reclaim the vnode.
866 	 */
867 	if ((*(origops->vop_reclaim))(vp))
868 		panic("vclean: cannot reclaim");
869 	if (active)
870 		vrele(vp);
871 	/*
872 	 * Done with purge, notify sleepers in vget of the grim news.
873 	 */
874 	vp->v_flag &= ~VXLOCK;
875 	if (vp->v_flag & VXWANT) {
876 		vp->v_flag &= ~VXWANT;
877 		wakeup((caddr_t)vp);
878 	}
879 }
880 
881 /*
882  * Eliminate all activity associated with  the requested vnode
883  * and with all vnodes aliased to the requested vnode.
884  */
885 void vgoneall(vp)
886 	register struct vnode *vp;
887 {
888 	register struct vnode *vq;
889 
890 	if (vp->v_flag & VALIASED) {
891 		/*
892 		 * If a vgone (or vclean) is already in progress,
893 		 * wait until it is done and return.
894 		 */
895 		if (vp->v_flag & VXLOCK) {
896 			vp->v_flag |= VXWANT;
897 			sleep((caddr_t)vp, PINOD);
898 			return;
899 		}
900 		/*
901 		 * Ensure that vp will not be vgone'd while we
902 		 * are eliminating its aliases.
903 		 */
904 		vp->v_flag |= VXLOCK;
905 		while (vp->v_flag & VALIASED) {
906 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
907 				if (vq->v_rdev != vp->v_rdev ||
908 				    vq->v_type != vp->v_type || vp == vq)
909 					continue;
910 				vgone(vq);
911 				break;
912 			}
913 		}
914 		/*
915 		 * Remove the lock so that vgone below will
916 		 * really eliminate the vnode after which time
917 		 * vgone will awaken any sleepers.
918 		 */
919 		vp->v_flag &= ~VXLOCK;
920 	}
921 	vgone(vp);
922 }
923 
924 /*
925  * Eliminate all activity associated with a vnode
926  * in preparation for reuse.
927  */
928 void vgone(vp)
929 	register struct vnode *vp;
930 {
931 	register struct vnode *vq;
932 	struct vnode *vx;
933 
934 	/*
935 	 * If a vgone (or vclean) is already in progress,
936 	 * wait until it is done and return.
937 	 */
938 	if (vp->v_flag & VXLOCK) {
939 		vp->v_flag |= VXWANT;
940 		sleep((caddr_t)vp, PINOD);
941 		return;
942 	}
943 	/*
944 	 * Clean out the filesystem specific data.
945 	 */
946 	vclean(vp, DOCLOSE);
947 	/*
948 	 * Delete from old mount point vnode list, if on one.
949 	 */
950 	if (vp->v_mountb) {
951 		if (vq = vp->v_mountf)
952 			vq->v_mountb = vp->v_mountb;
953 		*vp->v_mountb = vq;
954 		vp->v_mountf = NULL;
955 		vp->v_mountb = NULL;
956 		vp->v_mount = NULL;
957 	}
958 	/*
959 	 * If special device, remove it from special device alias list.
960 	 */
961 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
962 		if (*vp->v_hashchain == vp) {
963 			*vp->v_hashchain = vp->v_specnext;
964 		} else {
965 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
966 				if (vq->v_specnext != vp)
967 					continue;
968 				vq->v_specnext = vp->v_specnext;
969 				break;
970 			}
971 			if (vq == NULL)
972 				panic("missing bdev");
973 		}
974 		if (vp->v_flag & VALIASED) {
975 			vx = NULL;
976 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
977 				if (vq->v_rdev != vp->v_rdev ||
978 				    vq->v_type != vp->v_type)
979 					continue;
980 				if (vx)
981 					break;
982 				vx = vq;
983 			}
984 			if (vx == NULL)
985 				panic("missing alias");
986 			if (vq == NULL)
987 				vx->v_flag &= ~VALIASED;
988 			vp->v_flag &= ~VALIASED;
989 		}
990 		FREE(vp->v_specinfo, M_VNODE);
991 		vp->v_specinfo = NULL;
992 	}
993 	/*
994 	 * If it is on the freelist, move it to the head of the list.
995 	 */
996 	if (vp->v_freeb) {
997 		if (vq = vp->v_freef)
998 			vq->v_freeb = vp->v_freeb;
999 		else
1000 			vfreet = vp->v_freeb;
1001 		*vp->v_freeb = vq;
1002 		vp->v_freef = vfreeh;
1003 		vp->v_freeb = &vfreeh;
1004 		vfreeh->v_freeb = &vp->v_freef;
1005 		vfreeh = vp;
1006 	}
1007 	vp->v_type = VBAD;
1008 }
1009 
1010 /*
1011  * Lookup a vnode by device number.
1012  */
1013 vfinddev(dev, type, vpp)
1014 	dev_t dev;
1015 	enum vtype type;
1016 	struct vnode **vpp;
1017 {
1018 	register struct vnode *vp;
1019 
1020 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1021 		if (dev != vp->v_rdev || type != vp->v_type)
1022 			continue;
1023 		*vpp = vp;
1024 		return (0);
1025 	}
1026 	return (1);
1027 }
1028 
1029 /*
1030  * Calculate the total number of references to a special device.
1031  */
1032 vcount(vp)
1033 	register struct vnode *vp;
1034 {
1035 	register struct vnode *vq;
1036 	int count;
1037 
1038 	if ((vp->v_flag & VALIASED) == 0)
1039 		return (vp->v_usecount);
1040 loop:
1041 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1042 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1043 			continue;
1044 		/*
1045 		 * Alias, but not in use, so flush it out.
1046 		 */
1047 		if (vq->v_usecount == 0) {
1048 			vgone(vq);
1049 			goto loop;
1050 		}
1051 		count += vq->v_usecount;
1052 	}
1053 	return (count);
1054 }
1055 
1056 /*
1057  * Print out a description of a vnode.
1058  */
1059 static char *typename[] =
1060    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1061 
1062 vprint(label, vp)
1063 	char *label;
1064 	register struct vnode *vp;
1065 {
1066 	char buf[64];
1067 
1068 	if (label != NULL)
1069 		printf("%s: ", label);
1070 	printf("type %s, usecount %d, writecount %d, refcount %d,",
1071 		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1072 		vp->v_holdcnt);
1073 	buf[0] = '\0';
1074 	if (vp->v_flag & VROOT)
1075 		strcat(buf, "|VROOT");
1076 	if (vp->v_flag & VTEXT)
1077 		strcat(buf, "|VTEXT");
1078 	if (vp->v_flag & VSYSTEM)
1079 		strcat(buf, "|VSYSTEM");
1080 	if (vp->v_flag & VXLOCK)
1081 		strcat(buf, "|VXLOCK");
1082 	if (vp->v_flag & VXWANT)
1083 		strcat(buf, "|VXWANT");
1084 	if (vp->v_flag & VBWAIT)
1085 		strcat(buf, "|VBWAIT");
1086 	if (vp->v_flag & VALIASED)
1087 		strcat(buf, "|VALIASED");
1088 	if (buf[0] != '\0')
1089 		printf(" flags (%s)", &buf[1]);
1090 	printf("\n\t");
1091 	VOP_PRINT(vp);
1092 }
1093 
1094 #ifdef DEBUG
1095 /*
1096  * List all of the locked vnodes in the system.
1097  * Called when debugging the kernel.
1098  */
1099 printlockedvnodes()
1100 {
1101 	register struct mount *mp;
1102 	register struct vnode *vp;
1103 
1104 	printf("Locked vnodes\n");
1105 	mp = rootfs;
1106 	do {
1107 		for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf)
1108 			if (VOP_ISLOCKED(vp))
1109 				vprint((char *)0, vp);
1110 		mp = mp->mnt_next;
1111 	} while (mp != rootfs);
1112 }
1113 #endif
1114 
1115 int kinfo_vdebug = 1;
1116 int kinfo_vgetfailed;
1117 #define KINFO_VNODESLOP	10
1118 /*
1119  * Dump vnode list (via kinfo).
1120  * Copyout address of vnode followed by vnode.
1121  */
1122 /* ARGSUSED */
1123 kinfo_vnode(op, where, acopysize, arg, aneeded)
1124 	int op;
1125 	char *where;
1126 	int *acopysize, arg, *aneeded;
1127 {
1128 	register struct mount *mp = rootfs;
1129 	struct mount *omp;
1130 	struct vnode *vp;
1131 	register char *bp = where, *savebp;
1132 	char *ewhere = where + *acopysize;
1133 	int error;
1134 
1135 #define VPTRSZ	sizeof (struct vnode *)
1136 #define VNODESZ	sizeof (struct vnode)
1137 	if (where == NULL) {
1138 		*aneeded = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1139 		return (0);
1140 	}
1141 
1142 	do {
1143 		if (vfs_busy(mp)) {
1144 			mp = mp->mnt_next;
1145 			continue;
1146 		}
1147 		savebp = bp;
1148 again:
1149 		for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
1150 			/*
1151 			 * Check that the vp is still associated with
1152 			 * this filesystem.  RACE: could have been
1153 			 * recycled onto the same filesystem.
1154 			 */
1155 			if (vp->v_mount != mp) {
1156 				if (kinfo_vdebug)
1157 					printf("kinfo: vp changed\n");
1158 				bp = savebp;
1159 				goto again;
1160 			}
1161 			if ((bp + VPTRSZ + VNODESZ <= ewhere) &&
1162 			    ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1163 			     (error = copyout((caddr_t)vp, bp + VPTRSZ,
1164 			      VNODESZ))))
1165 				return (error);
1166 			bp += VPTRSZ + VNODESZ;
1167 		}
1168 		omp = mp;
1169 		mp = mp->mnt_next;
1170 		vfs_unbusy(omp);
1171 	} while (mp != rootfs);
1172 
1173 	*aneeded = bp - where;
1174 	if (bp > ewhere)
1175 		*acopysize = ewhere - where;
1176 	else
1177 		*acopysize = bp - where;
1178 	return (0);
1179 }
1180