xref: /original-bsd/sys/kern/vfs_subr.c (revision 358f5c7d)
1 /*
2  * Copyright (c) 1989 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)vfs_subr.c	7.76 (Berkeley) 05/15/92
8  */
9 
10 /*
11  * External virtual filesystem routines
12  */
13 
14 #include <sys/param.h>
15 #include <sys/proc.h>
16 #include <sys/mount.h>
17 #include <sys/time.h>
18 #include <sys/vnode.h>
19 #include <sys/stat.h>
20 #include <sys/specdev.h>
21 #include <sys/namei.h>
22 #include <sys/ucred.h>
23 #include <sys/buf.h>
24 #include <sys/errno.h>
25 #include <sys/malloc.h>
26 
27 enum vtype iftovt_tab[16] = {
28 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
29 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
30 };
31 int	vttoif_tab[9] = {
32 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
33 	S_IFSOCK, S_IFIFO, S_IFMT,
34 };
35 
36 /*
37  * Remove a mount point from the list of mounted filesystems.
38  * Unmount of the root is illegal.
39  */
40 void
41 vfs_remove(mp)
42 	register struct mount *mp;
43 {
44 
45 	if (mp == rootfs)
46 		panic("vfs_remove: unmounting root");
47 	mp->mnt_prev->mnt_next = mp->mnt_next;
48 	mp->mnt_next->mnt_prev = mp->mnt_prev;
49 	mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
50 	vfs_unlock(mp);
51 }
52 
53 /*
54  * Lock a filesystem.
55  * Used to prevent access to it while mounting and unmounting.
56  */
57 vfs_lock(mp)
58 	register struct mount *mp;
59 {
60 
61 	while(mp->mnt_flag & MNT_MLOCK) {
62 		mp->mnt_flag |= MNT_MWAIT;
63 		sleep((caddr_t)mp, PVFS);
64 	}
65 	mp->mnt_flag |= MNT_MLOCK;
66 	return (0);
67 }
68 
69 /*
70  * Unlock a locked filesystem.
71  * Panic if filesystem is not locked.
72  */
73 void
74 vfs_unlock(mp)
75 	register struct mount *mp;
76 {
77 
78 	if ((mp->mnt_flag & MNT_MLOCK) == 0)
79 		panic("vfs_unlock: not locked");
80 	mp->mnt_flag &= ~MNT_MLOCK;
81 	if (mp->mnt_flag & MNT_MWAIT) {
82 		mp->mnt_flag &= ~MNT_MWAIT;
83 		wakeup((caddr_t)mp);
84 	}
85 }
86 
87 /*
88  * Mark a mount point as busy.
89  * Used to synchronize access and to delay unmounting.
90  */
91 vfs_busy(mp)
92 	register struct mount *mp;
93 {
94 
95 	while(mp->mnt_flag & MNT_MPBUSY) {
96 		mp->mnt_flag |= MNT_MPWANT;
97 		sleep((caddr_t)&mp->mnt_flag, PVFS);
98 	}
99 	if (mp->mnt_flag & MNT_UNMOUNT)
100 		return (1);
101 	mp->mnt_flag |= MNT_MPBUSY;
102 	return (0);
103 }
104 
105 /*
106  * Free a busy filesystem.
107  * Panic if filesystem is not busy.
108  */
109 vfs_unbusy(mp)
110 	register struct mount *mp;
111 {
112 
113 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
114 		panic("vfs_unbusy: not busy");
115 	mp->mnt_flag &= ~MNT_MPBUSY;
116 	if (mp->mnt_flag & MNT_MPWANT) {
117 		mp->mnt_flag &= ~MNT_MPWANT;
118 		wakeup((caddr_t)&mp->mnt_flag);
119 	}
120 }
121 
122 /*
123  * Lookup a mount point by filesystem identifier.
124  */
125 struct mount *
126 getvfs(fsid)
127 	fsid_t *fsid;
128 {
129 	register struct mount *mp;
130 
131 	mp = rootfs;
132 	do {
133 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
134 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
135 			return (mp);
136 		}
137 		mp = mp->mnt_next;
138 	} while (mp != rootfs);
139 	return ((struct mount *)0);
140 }
141 
142 /*
143  * Set vnode attributes to VNOVAL
144  */
145 void vattr_null(vap)
146 	register struct vattr *vap;
147 {
148 
149 	vap->va_type = VNON;
150 	vap->va_size = vap->va_bytes = VNOVAL;
151 #ifdef _NOQUAD
152 	vap->va_size_rsv = vap->va_bytes_rsv = VNOVAL;
153 #endif
154 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
155 		vap->va_fsid = vap->va_fileid =
156 		vap->va_blocksize = vap->va_rdev =
157 		vap->va_atime.tv_sec = vap->va_atime.tv_usec =
158 		vap->va_mtime.tv_sec = vap->va_mtime.tv_usec =
159 		vap->va_ctime.tv_sec = vap->va_ctime.tv_usec =
160 		vap->va_flags = vap->va_gen = VNOVAL;
161 }
162 
163 /*
164  * Routines having to do with the management of the vnode table.
165  */
166 extern struct vnode *vfreeh, **vfreet;
167 extern int (**dead_vnodeop_p)();
168 extern int (**spec_vnodeop_p)();
169 extern void vclean();
170 long numvnodes;
171 extern struct vattr va_null;
172 
173 /*
174  * Return the next vnode from the free list.
175  */
176 getnewvnode(tag, mp, vops, vpp)
177 	enum vtagtype tag;
178 	struct mount *mp;
179 	int (**vops)();
180 	struct vnode **vpp;
181 {
182 	register struct vnode *vp, *vq;
183 
184 	if (numvnodes < desiredvnodes) {
185 		vp = (struct vnode *)malloc((u_long)sizeof *vp,
186 		    M_VNODE, M_WAITOK);
187 		bzero((char *)vp, sizeof *vp);
188 		numvnodes++;
189 	} else {
190 		if ((vp = vfreeh) == NULL) {
191 			tablefull("vnode");
192 			*vpp = 0;
193 			return (ENFILE);
194 		}
195 		if (vp->v_usecount)
196 			panic("free vnode isn't");
197 		if (vq = vp->v_freef)
198 			vq->v_freeb = &vfreeh;
199 		else
200 			vfreet = &vfreeh;
201 		vfreeh = vq;
202 		vp->v_freef = NULL;
203 		vp->v_freeb = NULL;
204 		vp->v_lease = NULL;
205 		if (vp->v_type != VBAD)
206 			vgone(vp);
207 		if (vp->v_data)
208 			panic("cleaned vnode isn't");
209 		vp->v_flag = 0;
210 		vp->v_lastr = 0;
211 		vp->v_socket = 0;
212 	}
213 	vp->v_type = VNON;
214 	cache_purge(vp);
215 	vp->v_tag = tag;
216 	vp->v_op = vops;
217 	insmntque(vp, mp);
218 	VREF(vp);
219 	*vpp = vp;
220 	return (0);
221 }
222 
223 /*
224  * Move a vnode from one mount queue to another.
225  */
226 insmntque(vp, mp)
227 	register struct vnode *vp;
228 	register struct mount *mp;
229 {
230 	register struct vnode *vq;
231 
232 	/*
233 	 * Delete from old mount point vnode list, if on one.
234 	 */
235 	if (vp->v_mountb) {
236 		if (vq = vp->v_mountf)
237 			vq->v_mountb = vp->v_mountb;
238 		*vp->v_mountb = vq;
239 	}
240 	/*
241 	 * Insert into list of vnodes for the new mount point, if available.
242 	 */
243 	vp->v_mount = mp;
244 	if (mp == NULL) {
245 		vp->v_mountf = NULL;
246 		vp->v_mountb = NULL;
247 		return;
248 	}
249 	if (vq = mp->mnt_mounth)
250 		vq->v_mountb = &vp->v_mountf;
251 	vp->v_mountf = vq;
252 	vp->v_mountb = &mp->mnt_mounth;
253 	mp->mnt_mounth = vp;
254 }
255 
256 /*
257  * Make sure all write-behind blocks associated
258  * with mount point are flushed out (from sync).
259  */
260 mntflushbuf(mountp, flags)
261 	struct mount *mountp;
262 	int flags;
263 {
264 	USES_VOP_ISLOCKED;
265 	register struct vnode *vp;
266 
267 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
268 		panic("mntflushbuf: not busy");
269 loop:
270 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
271 		if (VOP_ISLOCKED(vp))
272 			continue;
273 		if (vget(vp))
274 			goto loop;
275 		vflushbuf(vp, flags);
276 		vput(vp);
277 		if (vp->v_mount != mountp)
278 			goto loop;
279 	}
280 }
281 
282 /*
283  * Flush all dirty buffers associated with a vnode.
284  */
285 vflushbuf(vp, flags)
286 	register struct vnode *vp;
287 	int flags;
288 {
289 	register struct buf *bp;
290 	struct buf *nbp;
291 	int s;
292 
293 loop:
294 	s = splbio();
295 	for (bp = vp->v_dirtyblkhd; bp; bp = nbp) {
296 		nbp = bp->b_blockf;
297 		if ((bp->b_flags & B_BUSY))
298 			continue;
299 		if ((bp->b_flags & B_DELWRI) == 0)
300 			panic("vflushbuf: not dirty");
301 		bremfree(bp);
302 		bp->b_flags |= B_BUSY;
303 		splx(s);
304 		/*
305 		 * Wait for I/O associated with indirect blocks to complete,
306 		 * since there is no way to quickly wait for them below.
307 		 * NB: This is really specific to ufs, but is done here
308 		 * as it is easier and quicker.
309 		 */
310 		if (bp->b_vp == vp || (flags & B_SYNC) == 0)
311 			(void) bawrite(bp);
312 		else
313 			(void) bwrite(bp);
314 		goto loop;
315 	}
316 	splx(s);
317 	if ((flags & B_SYNC) == 0)
318 		return;
319 	s = splbio();
320 	while (vp->v_numoutput) {
321 		vp->v_flag |= VBWAIT;
322 		sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
323 	}
324 	splx(s);
325 	if (vp->v_dirtyblkhd) {
326 		vprint("vflushbuf: dirty", vp);
327 		goto loop;
328 	}
329 }
330 
331 /*
332  * Update outstanding I/O count and do wakeup if requested.
333  */
334 vwakeup(bp)
335 	register struct buf *bp;
336 {
337 	register struct vnode *vp;
338 
339 	bp->b_dirtyoff = bp->b_dirtyend = 0;
340 	if (vp = bp->b_vp) {
341 		vp->v_numoutput--;
342 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
343 			if (vp->v_numoutput < 0)
344 				panic("vwakeup: neg numoutput");
345 			vp->v_flag &= ~VBWAIT;
346 			wakeup((caddr_t)&vp->v_numoutput);
347 		}
348 	}
349 }
350 
351 /*
352  * Invalidate in core blocks belonging to closed or umounted filesystem
353  *
354  * Go through the list of vnodes associated with the file system;
355  * for each vnode invalidate any buffers that it holds. Normally
356  * this routine is preceeded by a bflush call, so that on a quiescent
357  * filesystem there will be no dirty buffers when we are done. Binval
358  * returns the count of dirty buffers when it is finished.
359  */
360 mntinvalbuf(mountp)
361 	struct mount *mountp;
362 {
363 	register struct vnode *vp;
364 	int dirty = 0;
365 
366 	if ((mountp->mnt_flag & MNT_MPBUSY) == 0)
367 		panic("mntinvalbuf: not busy");
368 loop:
369 	for (vp = mountp->mnt_mounth; vp; vp = vp->v_mountf) {
370 		if (vget(vp))
371 			goto loop;
372 		dirty += vinvalbuf(vp, 1);
373 		vput(vp);
374 		if (vp->v_mount != mountp)
375 			goto loop;
376 	}
377 	return (dirty);
378 }
379 
380 /*
381  * Flush out and invalidate all buffers associated with a vnode.
382  * Called with the underlying object locked.
383  */
384 vinvalbuf(vp, save)
385 	register struct vnode *vp;
386 	int save;
387 {
388 	USES_VOP_BWRITE;
389 	register struct buf *bp;
390 	struct buf *nbp, *blist;
391 	int s, dirty = 0;
392 
393 	for (;;) {
394 		if (blist = vp->v_dirtyblkhd)
395 			/* void */;
396 		else if (blist = vp->v_cleanblkhd)
397 			/* void */;
398 		else
399 			break;
400 		for (bp = blist; bp; bp = nbp) {
401 			nbp = bp->b_blockf;
402 			s = splbio();
403 			if (bp->b_flags & B_BUSY) {
404 				bp->b_flags |= B_WANTED;
405 				sleep((caddr_t)bp, PRIBIO + 1);
406 				splx(s);
407 				break;
408 			}
409 			bremfree(bp);
410 			bp->b_flags |= B_BUSY;
411 			splx(s);
412 			if (save && (bp->b_flags & B_DELWRI)) {
413 				dirty++;
414 				(void) VOP_BWRITE(bp);
415 				break;
416 			}
417 			if (bp->b_vp != vp)
418 				reassignbuf(bp, bp->b_vp);
419 			else
420 				bp->b_flags |= B_INVAL;
421 			brelse(bp);
422 		}
423 	}
424 	if (vp->v_dirtyblkhd || vp->v_cleanblkhd)
425 		panic("vinvalbuf: flush failed");
426 	return (dirty);
427 }
428 
429 /*
430  * Associate a buffer with a vnode.
431  */
432 bgetvp(vp, bp)
433 	register struct vnode *vp;
434 	register struct buf *bp;
435 {
436 	register struct vnode *vq;
437 	register struct buf *bq;
438 
439 	if (bp->b_vp)
440 		panic("bgetvp: not free");
441 	VHOLD(vp);
442 	bp->b_vp = vp;
443 	if (vp->v_type == VBLK || vp->v_type == VCHR)
444 		bp->b_dev = vp->v_rdev;
445 	else
446 		bp->b_dev = NODEV;
447 	/*
448 	 * Insert onto list for new vnode.
449 	 */
450 	if (bq = vp->v_cleanblkhd)
451 		bq->b_blockb = &bp->b_blockf;
452 	bp->b_blockf = bq;
453 	bp->b_blockb = &vp->v_cleanblkhd;
454 	vp->v_cleanblkhd = bp;
455 }
456 
457 /*
458  * Disassociate a buffer from a vnode.
459  */
460 brelvp(bp)
461 	register struct buf *bp;
462 {
463 	struct buf *bq;
464 	struct vnode *vp;
465 
466 	if (bp->b_vp == (struct vnode *) 0)
467 		panic("brelvp: NULL");
468 	/*
469 	 * Delete from old vnode list, if on one.
470 	 */
471 	if (bp->b_blockb) {
472 		if (bq = bp->b_blockf)
473 			bq->b_blockb = bp->b_blockb;
474 		*bp->b_blockb = bq;
475 		bp->b_blockf = NULL;
476 		bp->b_blockb = NULL;
477 	}
478 	vp = bp->b_vp;
479 	bp->b_vp = (struct vnode *) 0;
480 	HOLDRELE(vp);
481 }
482 
483 /*
484  * Reassign a buffer from one vnode to another.
485  * Used to assign file specific control information
486  * (indirect blocks) to the vnode to which they belong.
487  */
488 reassignbuf(bp, newvp)
489 	register struct buf *bp;
490 	register struct vnode *newvp;
491 {
492 	register struct buf *bq, **listheadp;
493 
494 	if (newvp == NULL) {
495 		printf("reassignbuf: NULL");
496 		return;
497 	}
498 	/*
499 	 * Delete from old vnode list, if on one.
500 	 */
501 	if (bp->b_blockb) {
502 		if (bq = bp->b_blockf)
503 			bq->b_blockb = bp->b_blockb;
504 		*bp->b_blockb = bq;
505 	}
506 	/*
507 	 * If dirty, put on list of dirty buffers;
508 	 * otherwise insert onto list of clean buffers.
509 	 */
510 	if (bp->b_flags & B_DELWRI)
511 		listheadp = &newvp->v_dirtyblkhd;
512 	else
513 		listheadp = &newvp->v_cleanblkhd;
514 	if (bq = *listheadp)
515 		bq->b_blockb = &bp->b_blockf;
516 	bp->b_blockf = bq;
517 	bp->b_blockb = listheadp;
518 	*listheadp = bp;
519 }
520 
521 /*
522  * Create a vnode for a block device.
523  * Used for root filesystem, argdev, and swap areas.
524  * Also used for memory file system special devices.
525  */
526 bdevvp(dev, vpp)
527 	dev_t dev;
528 	struct vnode **vpp;
529 {
530 	register struct vnode *vp;
531 	struct vnode *nvp;
532 	int error;
533 
534 	if (dev == NODEV)
535 		return (0);
536 	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
537 	if (error) {
538 		*vpp = 0;
539 		return (error);
540 	}
541 	vp = nvp;
542 	vp->v_type = VBLK;
543 	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
544 		vput(vp);
545 		vp = nvp;
546 	}
547 	*vpp = vp;
548 	return (0);
549 }
550 
551 /*
552  * Check to see if the new vnode represents a special device
553  * for which we already have a vnode (either because of
554  * bdevvp() or because of a different vnode representing
555  * the same block device). If such an alias exists, deallocate
556  * the existing contents and return the aliased vnode. The
557  * caller is responsible for filling it with its new contents.
558  */
559 struct vnode *
560 checkalias(nvp, nvp_rdev, mp)
561 	register struct vnode *nvp;
562 	dev_t nvp_rdev;
563 	struct mount *mp;
564 {
565 	USES_VOP_UNLOCK;
566 	register struct vnode *vp;
567 	struct vnode **vpp;
568 
569 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
570 		return (NULLVP);
571 
572 	vpp = &speclisth[SPECHASH(nvp_rdev)];
573 loop:
574 	for (vp = *vpp; vp; vp = vp->v_specnext) {
575 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
576 			continue;
577 		/*
578 		 * Alias, but not in use, so flush it out.
579 		 */
580 		if (vp->v_usecount == 0) {
581 			vgone(vp);
582 			goto loop;
583 		}
584 		if (vget(vp))
585 			goto loop;
586 		break;
587 	}
588 	if (vp == NULL || vp->v_tag != VT_NON) {
589 		MALLOC(nvp->v_specinfo, struct specinfo *,
590 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
591 		nvp->v_rdev = nvp_rdev;
592 		nvp->v_hashchain = vpp;
593 		nvp->v_specnext = *vpp;
594 		nvp->v_specflags = 0;
595 		*vpp = nvp;
596 		if (vp != NULL) {
597 			nvp->v_flag |= VALIASED;
598 			vp->v_flag |= VALIASED;
599 			vput(vp);
600 		}
601 		return (NULLVP);
602 	}
603 	VOP_UNLOCK(vp);
604 	vclean(vp, 0);
605 	vp->v_op = nvp->v_op;
606 	vp->v_tag = nvp->v_tag;
607 	nvp->v_type = VNON;
608 	insmntque(vp, mp);
609 	return (vp);
610 }
611 
612 /*
613  * Grab a particular vnode from the free list, increment its
614  * reference count and lock it. The vnode lock bit is set the
615  * vnode is being eliminated in vgone. The process is awakened
616  * when the transition is completed, and an error returned to
617  * indicate that the vnode is no longer usable (possibly having
618  * been changed to a new file system type).
619  */
620 vget(vp)
621 	register struct vnode *vp;
622 {
623 	USES_VOP_LOCK;
624 	register struct vnode *vq;
625 
626 	if (vp->v_flag & VXLOCK) {
627 		vp->v_flag |= VXWANT;
628 		sleep((caddr_t)vp, PINOD);
629 		return (1);
630 	}
631 	if (vp->v_usecount == 0) {
632 		if (vq = vp->v_freef)
633 			vq->v_freeb = vp->v_freeb;
634 		else
635 			vfreet = vp->v_freeb;
636 		*vp->v_freeb = vq;
637 		vp->v_freef = NULL;
638 		vp->v_freeb = NULL;
639 	}
640 	VREF(vp);
641 	VOP_LOCK(vp);
642 	return (0);
643 }
644 
645 /*
646  * Vnode reference, just increment the count
647  */
648 void vref(vp)
649 	struct vnode *vp;
650 {
651 
652 	vp->v_usecount++;
653 }
654 
655 /*
656  * vput(), just unlock and vrele()
657  */
658 void vput(vp)
659 	register struct vnode *vp;
660 {
661 	USES_VOP_UNLOCK;
662 
663 	VOP_UNLOCK(vp);
664 	vrele(vp);
665 }
666 
667 /*
668  * Vnode release.
669  * If count drops to zero, call inactive routine and return to freelist.
670  */
671 void vrele(vp)
672 	register struct vnode *vp;
673 {
674 	USES_VOP_INACTIVE;
675 	struct proc *p = curproc;		/* XXX */
676 
677 #ifdef DIAGNOSTIC
678 	if (vp == NULL)
679 		panic("vrele: null vp");
680 #endif
681 	vp->v_usecount--;
682 	if (vp->v_usecount > 0)
683 		return;
684 #ifdef DIAGNOSTIC
685 	if (vp->v_usecount != 0 || vp->v_writecount != 0) {
686 		vprint("vrele: bad ref count", vp);
687 		panic("vrele: ref cnt");
688 	}
689 #endif
690 	if (vfreeh == NULLVP) {
691 		/*
692 		 * insert into empty list
693 		 */
694 		vfreeh = vp;
695 		vp->v_freeb = &vfreeh;
696 	} else {
697 		/*
698 		 * insert at tail of list
699 		 */
700 		*vfreet = vp;
701 		vp->v_freeb = vfreet;
702 	}
703 	vp->v_freef = NULL;
704 	vfreet = &vp->v_freef;
705 	VOP_INACTIVE(vp, p);
706 }
707 
708 /*
709  * Page or buffer structure gets a reference.
710  */
711 void vhold(vp)
712 	register struct vnode *vp;
713 {
714 
715 	vp->v_holdcnt++;
716 }
717 
718 /*
719  * Page or buffer structure frees a reference.
720  */
721 void holdrele(vp)
722 	register struct vnode *vp;
723 {
724 
725 	if (vp->v_holdcnt <= 0)
726 		panic("holdrele: holdcnt");
727 	vp->v_holdcnt--;
728 }
729 
730 /*
731  * Remove any vnodes in the vnode table belonging to mount point mp.
732  *
733  * If MNT_NOFORCE is specified, there should not be any active ones,
734  * return error if any are found (nb: this is a user error, not a
735  * system error). If MNT_FORCE is specified, detach any active vnodes
736  * that are found.
737  */
738 int busyprt = 0;	/* patch to print out busy vnodes */
739 
740 vflush(mp, skipvp, flags)
741 	struct mount *mp;
742 	struct vnode *skipvp;
743 	int flags;
744 {
745 	register struct vnode *vp, *nvp;
746 	int busy = 0;
747 
748 	if ((mp->mnt_flag & MNT_MPBUSY) == 0)
749 		panic("vflush: not busy");
750 loop:
751 	for (vp = mp->mnt_mounth; vp; vp = nvp) {
752 		if (vp->v_mount != mp)
753 			goto loop;
754 		nvp = vp->v_mountf;
755 		/*
756 		 * Skip over a selected vnode.
757 		 */
758 		if (vp == skipvp)
759 			continue;
760 		/*
761 		 * Skip over a vnodes marked VSYSTEM.
762 		 */
763 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
764 			continue;
765 		/*
766 		 * With v_usecount == 0, all we need to do is clear
767 		 * out the vnode data structures and we are done.
768 		 */
769 		if (vp->v_usecount == 0) {
770 			vgone(vp);
771 			continue;
772 		}
773 		/*
774 		 * For block or character devices, revert to an
775 		 * anonymous device. For all other files, just kill them.
776 		 */
777 		if (flags & FORCECLOSE) {
778 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
779 				vgone(vp);
780 			} else {
781 				vclean(vp, 0);
782 				vp->v_op = spec_vnodeop_p;
783 				insmntque(vp, (struct mount *)0);
784 			}
785 			continue;
786 		}
787 		if (busyprt)
788 			vprint("vflush: busy vnode", vp);
789 		busy++;
790 	}
791 	if (busy)
792 		return (EBUSY);
793 	return (0);
794 }
795 
796 /*
797  * Disassociate the underlying file system from a vnode.
798  */
799 void vclean(vp, flags)
800 	register struct vnode *vp;
801 	int flags;
802 {
803 	USES_VOP_LOCK;
804 	USES_VOP_UNLOCK;
805 	USES_VOP_CLOSE;
806 	USES_VOP_INACTIVE;
807 	USES_VOP_RECLAIM;
808 	int (**origops)();
809 	int active;
810 	struct proc *p = curproc;	/* XXX */
811 
812 	/*
813 	 * Check to see if the vnode is in use.
814 	 * If so we have to reference it before we clean it out
815 	 * so that its count cannot fall to zero and generate a
816 	 * race against ourselves to recycle it.
817 	 */
818 	if (active = vp->v_usecount)
819 		VREF(vp);
820 	/*
821 	 * Prevent the vnode from being recycled or
822 	 * brought into use while we clean it out.
823 	 */
824 	if (vp->v_flag & VXLOCK)
825 		panic("vclean: deadlock");
826 	vp->v_flag |= VXLOCK;
827 	/*
828 	 * Even if the count is zero, the VOP_INACTIVE routine may still
829 	 * have the object locked while it cleans it out. The VOP_LOCK
830 	 * ensures that the VOP_INACTIVE routine is done with its work.
831 	 * For active vnodes, it ensures that no other activity can
832 	 * occur while the buffer list is being cleaned out.
833 	 */
834 	VOP_LOCK(vp);
835 	if (flags & DOCLOSE)
836 		vinvalbuf(vp, 1);
837 	/*
838 	 * Prevent any further operations on the vnode from
839 	 * being passed through to the old file system.
840 	 */
841 	origops = vp->v_op;
842 	vp->v_op = dead_vnodeop_p;
843 	vp->v_tag = VT_NON;
844 	/*
845 	 * If purging an active vnode, it must be unlocked, closed,
846 	 * and deactivated before being reclaimed.
847 	 */
848 	vop_unlock_a.a_desc = VDESC(vop_unlock);
849 	vop_unlock_a.a_vp = vp;
850 	VOCALL(origops,VOFFSET(vop_unlock),&vop_unlock_a);
851 	if (active) {
852 		/*
853 		 * Note: these next two calls imply
854 		 * that vop_close and vop_inactive implementations
855 		 * cannot count on the ops vector being correctly
856 		 * set.
857 		 */
858 		if (flags & DOCLOSE) {
859 			vop_close_a.a_desc = VDESC(vop_close);
860 			vop_close_a.a_vp = vp;
861 			vop_close_a.a_fflag = IO_NDELAY;
862 			vop_close_a.a_p = p;
863 			VOCALL(origops,VOFFSET(vop_close),&vop_close_a);
864 		};
865 		vop_inactive_a.a_desc = VDESC(vop_inactive);
866 		vop_inactive_a.a_vp = vp;
867 		vop_inactive_a.a_p = p;
868 		VOCALL(origops,VOFFSET(vop_inactive),&vop_inactive_a);
869 	}
870 	/*
871 	 * Reclaim the vnode.
872 	 */
873 	/*
874 	 * Emulate VOP_RECLAIM.
875 	 */
876 	vop_reclaim_a.a_desc = VDESC(vop_reclaim);
877 	vop_reclaim_a.a_vp = vp;
878 	if (VOCALL(origops,VOFFSET(vop_reclaim),&vop_reclaim_a))
879 		panic("vclean: cannot reclaim");
880 	if (active)
881 		vrele(vp);
882 
883 	/*
884 	 * Done with purge, notify sleepers in vget of the grim news.
885 	 */
886 	vp->v_flag &= ~VXLOCK;
887 	if (vp->v_flag & VXWANT) {
888 		vp->v_flag &= ~VXWANT;
889 		wakeup((caddr_t)vp);
890 	}
891 }
892 
893 /*
894  * Eliminate all activity associated with  the requested vnode
895  * and with all vnodes aliased to the requested vnode.
896  */
897 void vgoneall(vp)
898 	register struct vnode *vp;
899 {
900 	register struct vnode *vq;
901 
902 	if (vp->v_flag & VALIASED) {
903 		/*
904 		 * If a vgone (or vclean) is already in progress,
905 		 * wait until it is done and return.
906 		 */
907 		if (vp->v_flag & VXLOCK) {
908 			vp->v_flag |= VXWANT;
909 			sleep((caddr_t)vp, PINOD);
910 			return;
911 		}
912 		/*
913 		 * Ensure that vp will not be vgone'd while we
914 		 * are eliminating its aliases.
915 		 */
916 		vp->v_flag |= VXLOCK;
917 		while (vp->v_flag & VALIASED) {
918 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
919 				if (vq->v_rdev != vp->v_rdev ||
920 				    vq->v_type != vp->v_type || vp == vq)
921 					continue;
922 				vgone(vq);
923 				break;
924 			}
925 		}
926 		/*
927 		 * Remove the lock so that vgone below will
928 		 * really eliminate the vnode after which time
929 		 * vgone will awaken any sleepers.
930 		 */
931 		vp->v_flag &= ~VXLOCK;
932 	}
933 	vgone(vp);
934 }
935 
936 /*
937  * Eliminate all activity associated with a vnode
938  * in preparation for reuse.
939  */
940 void vgone(vp)
941 	register struct vnode *vp;
942 {
943 	register struct vnode *vq;
944 	struct vnode *vx;
945 
946 	/*
947 	 * If a vgone (or vclean) is already in progress,
948 	 * wait until it is done and return.
949 	 */
950 	if (vp->v_flag & VXLOCK) {
951 		vp->v_flag |= VXWANT;
952 		sleep((caddr_t)vp, PINOD);
953 		return;
954 	}
955 	/*
956 	 * Clean out the filesystem specific data.
957 	 */
958 	vclean(vp, DOCLOSE);
959 	/*
960 	 * Delete from old mount point vnode list, if on one.
961 	 */
962 	if (vp->v_mountb) {
963 		if (vq = vp->v_mountf)
964 			vq->v_mountb = vp->v_mountb;
965 		*vp->v_mountb = vq;
966 		vp->v_mountf = NULL;
967 		vp->v_mountb = NULL;
968 		vp->v_mount = NULL;
969 	}
970 	/*
971 	 * If special device, remove it from special device alias list.
972 	 */
973 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
974 		if (*vp->v_hashchain == vp) {
975 			*vp->v_hashchain = vp->v_specnext;
976 		} else {
977 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
978 				if (vq->v_specnext != vp)
979 					continue;
980 				vq->v_specnext = vp->v_specnext;
981 				break;
982 			}
983 			if (vq == NULL)
984 				panic("missing bdev");
985 		}
986 		if (vp->v_flag & VALIASED) {
987 			vx = NULL;
988 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
989 				if (vq->v_rdev != vp->v_rdev ||
990 				    vq->v_type != vp->v_type)
991 					continue;
992 				if (vx)
993 					break;
994 				vx = vq;
995 			}
996 			if (vx == NULL)
997 				panic("missing alias");
998 			if (vq == NULL)
999 				vx->v_flag &= ~VALIASED;
1000 			vp->v_flag &= ~VALIASED;
1001 		}
1002 		FREE(vp->v_specinfo, M_VNODE);
1003 		vp->v_specinfo = NULL;
1004 	}
1005 	/*
1006 	 * If it is on the freelist, move it to the head of the list.
1007 	 */
1008 	if (vp->v_freeb) {
1009 		if (vq = vp->v_freef)
1010 			vq->v_freeb = vp->v_freeb;
1011 		else
1012 			vfreet = vp->v_freeb;
1013 		*vp->v_freeb = vq;
1014 		vp->v_freef = vfreeh;
1015 		vp->v_freeb = &vfreeh;
1016 		vfreeh->v_freeb = &vp->v_freef;
1017 		vfreeh = vp;
1018 	}
1019 	vp->v_type = VBAD;
1020 }
1021 
1022 /*
1023  * Lookup a vnode by device number.
1024  */
1025 vfinddev(dev, type, vpp)
1026 	dev_t dev;
1027 	enum vtype type;
1028 	struct vnode **vpp;
1029 {
1030 	register struct vnode *vp;
1031 
1032 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1033 		if (dev != vp->v_rdev || type != vp->v_type)
1034 			continue;
1035 		*vpp = vp;
1036 		return (0);
1037 	}
1038 	return (1);
1039 }
1040 
1041 /*
1042  * Calculate the total number of references to a special device.
1043  */
1044 vcount(vp)
1045 	register struct vnode *vp;
1046 {
1047 	register struct vnode *vq;
1048 	int count;
1049 
1050 	if ((vp->v_flag & VALIASED) == 0)
1051 		return (vp->v_usecount);
1052 loop:
1053 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1054 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1055 			continue;
1056 		/*
1057 		 * Alias, but not in use, so flush it out.
1058 		 */
1059 		if (vq->v_usecount == 0) {
1060 			vgone(vq);
1061 			goto loop;
1062 		}
1063 		count += vq->v_usecount;
1064 	}
1065 	return (count);
1066 }
1067 
1068 /*
1069  * Print out a description of a vnode.
1070  */
1071 static char *typename[] =
1072    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1073 
1074 vprint(label, vp)
1075 	char *label;
1076 	register struct vnode *vp;
1077 {
1078 	USES_VOP_PRINT;
1079 	char buf[64];
1080 
1081 	if (label != NULL)
1082 		printf("%s: ", label);
1083 	printf("type %s, usecount %d, writecount %d, refcount %d,",
1084 		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1085 		vp->v_holdcnt);
1086 	buf[0] = '\0';
1087 	if (vp->v_flag & VROOT)
1088 		strcat(buf, "|VROOT");
1089 	if (vp->v_flag & VTEXT)
1090 		strcat(buf, "|VTEXT");
1091 	if (vp->v_flag & VSYSTEM)
1092 		strcat(buf, "|VSYSTEM");
1093 	if (vp->v_flag & VXLOCK)
1094 		strcat(buf, "|VXLOCK");
1095 	if (vp->v_flag & VXWANT)
1096 		strcat(buf, "|VXWANT");
1097 	if (vp->v_flag & VBWAIT)
1098 		strcat(buf, "|VBWAIT");
1099 	if (vp->v_flag & VALIASED)
1100 		strcat(buf, "|VALIASED");
1101 	if (buf[0] != '\0')
1102 		printf(" flags (%s)", &buf[1]);
1103 	printf("\n\t");
1104 	VOP_PRINT(vp);
1105 }
1106 
1107 #ifdef DEBUG
1108 /*
1109  * List all of the locked vnodes in the system.
1110  * Called when debugging the kernel.
1111  */
1112 printlockedvnodes()
1113 {
1114 	USES_VOP_ISLOCKED;
1115 	register struct mount *mp;
1116 	register struct vnode *vp;
1117 
1118 	printf("Locked vnodes\n");
1119 	mp = rootfs;
1120 	do {
1121 		for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf)
1122 			if (VOP_ISLOCKED(vp))
1123 				vprint((char *)0, vp);
1124 		mp = mp->mnt_next;
1125 	} while (mp != rootfs);
1126 }
1127 #endif
1128 
1129 int kinfo_vdebug = 1;
1130 int kinfo_vgetfailed;
1131 #define KINFO_VNODESLOP	10
1132 /*
1133  * Dump vnode list (via kinfo).
1134  * Copyout address of vnode followed by vnode.
1135  */
1136 /* ARGSUSED */
1137 kinfo_vnode(op, where, acopysize, arg, aneeded)
1138 	int op;
1139 	char *where;
1140 	int *acopysize, arg, *aneeded;
1141 {
1142 	register struct mount *mp = rootfs;
1143 	struct mount *omp;
1144 	struct vnode *vp;
1145 	register char *bp = where, *savebp;
1146 	char *ewhere = where + *acopysize;
1147 	int error;
1148 
1149 #define VPTRSZ	sizeof (struct vnode *)
1150 #define VNODESZ	sizeof (struct vnode)
1151 	if (where == NULL) {
1152 		*aneeded = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1153 		return (0);
1154 	}
1155 
1156 	do {
1157 		if (vfs_busy(mp)) {
1158 			mp = mp->mnt_next;
1159 			continue;
1160 		}
1161 		savebp = bp;
1162 again:
1163 		for (vp = mp->mnt_mounth; vp; vp = vp->v_mountf) {
1164 			/*
1165 			 * Check that the vp is still associated with
1166 			 * this filesystem.  RACE: could have been
1167 			 * recycled onto the same filesystem.
1168 			 */
1169 			if (vp->v_mount != mp) {
1170 				if (kinfo_vdebug)
1171 					printf("kinfo: vp changed\n");
1172 				bp = savebp;
1173 				goto again;
1174 			}
1175 			if ((bp + VPTRSZ + VNODESZ <= ewhere) &&
1176 			    ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1177 			     (error = copyout((caddr_t)vp, bp + VPTRSZ,
1178 			      VNODESZ))))
1179 				return (error);
1180 			bp += VPTRSZ + VNODESZ;
1181 		}
1182 		omp = mp;
1183 		mp = mp->mnt_next;
1184 		vfs_unbusy(omp);
1185 	} while (mp != rootfs);
1186 
1187 	*aneeded = bp - where;
1188 	if (bp > ewhere)
1189 		*acopysize = ewhere - where;
1190 	else
1191 		*acopysize = bp - where;
1192 	return (0);
1193 }
1194