xref: /original-bsd/sys/kern/vfs_subr.c (revision d7947c38)
1 /*
2  * Copyright (c) 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * %sccs.include.redist.c%
11  *
12  *	@(#)vfs_subr.c	8.29 (Berkeley) 05/20/95
13  */
14 
15 /*
16  * External virtual filesystem routines
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/proc.h>
22 #include <sys/mount.h>
23 #include <sys/time.h>
24 #include <sys/vnode.h>
25 #include <sys/stat.h>
26 #include <sys/namei.h>
27 #include <sys/ucred.h>
28 #include <sys/buf.h>
29 #include <sys/errno.h>
30 #include <sys/malloc.h>
31 #include <sys/domain.h>
32 #include <sys/mbuf.h>
33 
34 #include <vm/vm.h>
35 #include <sys/sysctl.h>
36 
37 #include <miscfs/specfs/specdev.h>
38 
39 enum vtype iftovt_tab[16] = {
40 	VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
41 	VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
42 };
43 int	vttoif_tab[9] = {
44 	0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
45 	S_IFSOCK, S_IFIFO, S_IFMT,
46 };
47 
48 /*
49  * Insq/Remq for the vnode usage lists.
50  */
51 #define	bufinsvn(bp, dp)	LIST_INSERT_HEAD(dp, bp, b_vnbufs)
52 #define	bufremvn(bp) {							\
53 	LIST_REMOVE(bp, b_vnbufs);					\
54 	(bp)->b_vnbufs.le_next = NOLIST;				\
55 }
56 TAILQ_HEAD(freelst, vnode) vnode_free_list;	/* vnode free list */
57 struct mntlist mountlist;			/* mounted filesystem list */
58 struct simplelock mountlist_slock;
59 static struct simplelock mntid_slock;
60 struct simplelock mntvnode_slock;
61 static struct simplelock spechash_slock;
62 static struct simplelock vnode_free_list_slock;
63 
64 /*
65  * Initialize the vnode management data structures.
66  */
67 void
68 vntblinit()
69 {
70 
71 	simple_lock_init(&mntvnode_slock);
72 	simple_lock_init(&mntid_slock);
73 	simple_lock_init(&spechash_slock);
74 	TAILQ_INIT(&vnode_free_list);
75 	simple_lock_init(&vnode_free_list_slock);
76 	CIRCLEQ_INIT(&mountlist);
77 }
78 
79 /*
80  * Mark a mount point as busy. Used to synchronize access and to delay
81  * unmounting. Interlock is not released on failure.
82  */
83 int
84 vfs_busy(mp, flags, interlkp, p)
85 	struct mount *mp;
86 	int flags;
87 	struct simplelock *interlkp;
88 	struct proc *p;
89 {
90 	int lkflags;
91 
92 	if (mp->mnt_flag & MNT_UNMOUNT) {
93 		if (flags & LK_NOWAIT)
94 			return (ENOENT);
95 		mp->mnt_flag |= MNT_MWAIT;
96 		sleep((caddr_t)mp, PVFS);
97 		return (ENOENT);
98 	}
99 	lkflags = LK_SHARED;
100 	if (interlkp)
101 		lkflags |= LK_INTERLOCK;
102 	if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
103 		panic("vfs_busy: unexpected lock failure");
104 	return (0);
105 }
106 
107 /*
108  * Free a busy filesystem.
109  * Panic if filesystem is not busy.
110  */
111 void
112 vfs_unbusy(mp, p)
113 	struct mount *mp;
114 	struct proc *p;
115 {
116 
117 	lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
118 }
119 
120 /*
121  * Lookup a filesystem type, and if found allocate and initialize
122  * a mount structure for it.
123  *
124  * Devname is usually updated by mount(8) after booting.
125  */
126 int
127 vfs_rootmountalloc(fstypename, devname, mpp)
128 	char *fstypename;
129 	char *devname;
130 	struct mount **mpp;
131 {
132 	struct proc *p = curproc;	/* XXX */
133 	struct vfsconf *vfsp;
134 	struct mount *mp;
135 
136 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
137 		if (!strcmp(vfsp->vfc_name, fstypename))
138 			break;
139 	if (vfsp == NULL)
140 		return (ENODEV);
141 	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
142 	bzero((char *)mp, (u_long)sizeof(struct mount));
143 	lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
144 	(void)vfs_busy(mp, LK_NOWAIT, 0, p);
145 	LIST_INIT(&mp->mnt_vnodelist);
146 	mp->mnt_vfc = vfsp;
147 	mp->mnt_op = vfsp->vfc_vfsops;
148 	mp->mnt_flag = MNT_RDONLY;
149 	mp->mnt_vnodecovered = NULLVP;
150 	vfsp->vfc_refcount++;
151 	mp->mnt_stat.f_type = vfsp->vfc_typenum;
152 	mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
153 	strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
154 	mp->mnt_stat.f_mntonname[0] = '/';
155 	(void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
156 	*mpp = mp;
157 	return (0);
158 }
159 
160 /*
161  * Find an appropriate filesystem to use for the root. If a filesystem
162  * has not been preselected, walk through the list of known filesystems
163  * trying those that have mountroot routines, and try them until one
164  * works or we have tried them all.
165  */
166 int
167 vfs_mountroot()
168 {
169 	struct vfsconf *vfsp;
170 	extern int (*mountroot)(void);
171 	int error;
172 
173 	if (mountroot != NULL)
174 		return ((*mountroot)());
175 	for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
176 		if (vfsp->vfc_mountroot == NULL)
177 			continue;
178 		if ((error = (*vfsp->vfc_mountroot)()) == 0)
179 			return (0);
180 		printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
181 	}
182 	return (ENODEV);
183 }
184 
185 /*
186  * Lookup a mount point by filesystem identifier.
187  */
188 struct mount *
189 vfs_getvfs(fsid)
190 	fsid_t *fsid;
191 {
192 	register struct mount *mp;
193 
194 	simple_lock(&mountlist_slock);
195 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
196 	     mp = mp->mnt_list.cqe_next) {
197 		if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
198 		    mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
199 			simple_unlock(&mountlist_slock);
200 			return (mp);
201 		}
202 	}
203 	simple_unlock(&mountlist_slock);
204 	return ((struct mount *)0);
205 }
206 
207 /*
208  * Get a new unique fsid
209  */
210 void
211 vfs_getnewfsid(mp)
212 	struct mount *mp;
213 {
214 static u_short xxxfs_mntid;
215 
216 	fsid_t tfsid;
217 	int mtype;
218 
219 	simple_lock(&mntid_slock);
220 	mtype = mp->mnt_vfc->vfc_typenum;
221 	mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
222 	mp->mnt_stat.f_fsid.val[1] = mtype;
223 	if (xxxfs_mntid == 0)
224 		++xxxfs_mntid;
225 	tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
226 	tfsid.val[1] = mtype;
227 	if (mountlist.cqh_first != (void *)&mountlist) {
228 		while (vfs_getvfs(&tfsid)) {
229 			tfsid.val[0]++;
230 			xxxfs_mntid++;
231 		}
232 	}
233 	mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
234 	simple_unlock(&mntid_slock);
235 }
236 
237 /*
238  * Set vnode attributes to VNOVAL
239  */
240 void
241 vattr_null(vap)
242 	register struct vattr *vap;
243 {
244 
245 	vap->va_type = VNON;
246 	vap->va_size = vap->va_bytes = VNOVAL;
247 	vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
248 		vap->va_fsid = vap->va_fileid =
249 		vap->va_blocksize = vap->va_rdev =
250 		vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
251 		vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
252 		vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
253 		vap->va_flags = vap->va_gen = VNOVAL;
254 	vap->va_vaflags = 0;
255 }
256 
257 /*
258  * Routines having to do with the management of the vnode table.
259  */
260 extern int (**dead_vnodeop_p)();
261 static void vclean __P((struct vnode *vp, int flag, struct proc *p));
262 extern void vgonel __P((struct vnode *vp, struct proc *p));
263 long numvnodes;
264 extern struct vattr va_null;
265 
266 /*
267  * Return the next vnode from the free list.
268  */
269 int
270 getnewvnode(tag, mp, vops, vpp)
271 	enum vtagtype tag;
272 	struct mount *mp;
273 	int (**vops)();
274 	struct vnode **vpp;
275 {
276 	struct proc *p = curproc;	/* XXX */
277 	struct vnode *vp;
278 	int s;
279 	int cnt;
280 
281 top:
282 	simple_lock(&vnode_free_list_slock);
283 	if ((vnode_free_list.tqh_first == NULL &&
284 	     numvnodes < 2 * desiredvnodes) ||
285 	    numvnodes < desiredvnodes) {
286 		simple_unlock(&vnode_free_list_slock);
287 		vp = (struct vnode *)malloc((u_long)sizeof *vp,
288 		    M_VNODE, M_WAITOK);
289 		bzero((char *)vp, sizeof *vp);
290 		numvnodes++;
291 	} else {
292 		for (vp = vnode_free_list.tqh_first;
293 				vp != NULLVP; vp = vp->v_freelist.tqe_next) {
294 			if (simple_lock_try(&vp->v_interlock))
295 				break;
296 		}
297 		/*
298 		 * Unless this is a bad time of the month, at most
299 		 * the first NCPUS items on the free list are
300 		 * locked, so this is close enough to being empty.
301 		 */
302 		if (vp == NULLVP) {
303 			simple_unlock(&vnode_free_list_slock);
304 			tablefull("vnode");
305 			*vpp = 0;
306 			return (ENFILE);
307 		}
308 		if (vp->v_usecount)
309 			panic("free vnode isn't");
310 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
311 		/* see comment on why 0xdeadb is set at end of vgone (below) */
312 		vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
313 		simple_unlock(&vnode_free_list_slock);
314 		vp->v_lease = NULL;
315 		if (vp->v_type != VBAD)
316 			vgonel(vp, p);
317 		else
318 			simple_unlock(&vp->v_interlock);
319 #ifdef DIAGNOSTIC
320 		if (vp->v_data)
321 			panic("cleaned vnode isn't");
322 		s = splbio();
323 		if (vp->v_numoutput)
324 			panic("Clean vnode has pending I/O's");
325 		splx(s);
326 #endif
327 		vp->v_flag = 0;
328 		vp->v_lastr = 0;
329 		vp->v_ralen = 0;
330 		vp->v_maxra = 0;
331 		vp->v_lastw = 0;
332 		vp->v_lasta = 0;
333 		vp->v_cstart = 0;
334 		vp->v_clen = 0;
335 		vp->v_socket = 0;
336 	}
337 	vp->v_type = VNON;
338 	cache_purge(vp);
339 	vp->v_tag = tag;
340 	vp->v_op = vops;
341 	insmntque(vp, mp);
342 	*vpp = vp;
343 	vp->v_usecount = 1;
344 	vp->v_data = 0;
345 	return (0);
346 }
347 
348 /*
349  * Move a vnode from one mount queue to another.
350  */
351 void
352 insmntque(vp, mp)
353 	struct vnode *vp;
354 	struct mount *mp;
355 {
356 
357 	simple_lock(&mntvnode_slock);
358 	/*
359 	 * Delete from old mount point vnode list, if on one.
360 	 */
361 	if (vp->v_mount != NULL)
362 		LIST_REMOVE(vp, v_mntvnodes);
363 	/*
364 	 * Insert into list of vnodes for the new mount point, if available.
365 	 */
366 	if ((vp->v_mount = mp) != NULL)
367 		LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
368 	simple_unlock(&mntvnode_slock);
369 }
370 
371 /*
372  * Update outstanding I/O count and do wakeup if requested.
373  */
374 void
375 vwakeup(bp)
376 	register struct buf *bp;
377 {
378 	register struct vnode *vp;
379 
380 	bp->b_flags &= ~B_WRITEINPROG;
381 	if (vp = bp->b_vp) {
382 		if (--vp->v_numoutput < 0)
383 			panic("vwakeup: neg numoutput");
384 		if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
385 			if (vp->v_numoutput < 0)
386 				panic("vwakeup: neg numoutput 2");
387 			vp->v_flag &= ~VBWAIT;
388 			wakeup((caddr_t)&vp->v_numoutput);
389 		}
390 	}
391 }
392 
393 /*
394  * Flush out and invalidate all buffers associated with a vnode.
395  * Called with the underlying object locked.
396  */
397 int
398 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
399 	register struct vnode *vp;
400 	int flags;
401 	struct ucred *cred;
402 	struct proc *p;
403 	int slpflag, slptimeo;
404 {
405 	register struct buf *bp;
406 	struct buf *nbp, *blist;
407 	int s, error;
408 
409 	if (flags & V_SAVE) {
410 		if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
411 			return (error);
412 		if (vp->v_dirtyblkhd.lh_first != NULL)
413 			panic("vinvalbuf: dirty bufs");
414 	}
415 	for (;;) {
416 		if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
417 			while (blist && blist->b_lblkno < 0)
418 				blist = blist->b_vnbufs.le_next;
419 		if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
420 		    (flags & V_SAVEMETA))
421 			while (blist && blist->b_lblkno < 0)
422 				blist = blist->b_vnbufs.le_next;
423 		if (!blist)
424 			break;
425 
426 		for (bp = blist; bp; bp = nbp) {
427 			nbp = bp->b_vnbufs.le_next;
428 			if (flags & V_SAVEMETA && bp->b_lblkno < 0)
429 				continue;
430 			s = splbio();
431 			if (bp->b_flags & B_BUSY) {
432 				bp->b_flags |= B_WANTED;
433 				error = tsleep((caddr_t)bp,
434 					slpflag | (PRIBIO + 1), "vinvalbuf",
435 					slptimeo);
436 				splx(s);
437 				if (error)
438 					return (error);
439 				break;
440 			}
441 			bremfree(bp);
442 			bp->b_flags |= B_BUSY;
443 			splx(s);
444 			/*
445 			 * XXX Since there are no node locks for NFS, I believe
446 			 * there is a slight chance that a delayed write will
447 			 * occur while sleeping just above, so check for it.
448 			 */
449 			if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
450 				(void) VOP_BWRITE(bp);
451 				break;
452 			}
453 			bp->b_flags |= B_INVAL;
454 			brelse(bp);
455 		}
456 	}
457 	if (!(flags & V_SAVEMETA) &&
458 	    (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
459 		panic("vinvalbuf: flush failed");
460 	return (0);
461 }
462 
463 /*
464  * Associate a buffer with a vnode.
465  */
466 void
467 bgetvp(vp, bp)
468 	register struct vnode *vp;
469 	register struct buf *bp;
470 {
471 
472 	if (bp->b_vp)
473 		panic("bgetvp: not free");
474 	VHOLD(vp);
475 	bp->b_vp = vp;
476 	if (vp->v_type == VBLK || vp->v_type == VCHR)
477 		bp->b_dev = vp->v_rdev;
478 	else
479 		bp->b_dev = NODEV;
480 	/*
481 	 * Insert onto list for new vnode.
482 	 */
483 	bufinsvn(bp, &vp->v_cleanblkhd);
484 }
485 
486 /*
487  * Disassociate a buffer from a vnode.
488  */
489 void
490 brelvp(bp)
491 	register struct buf *bp;
492 {
493 	struct vnode *vp;
494 
495 	if (bp->b_vp == (struct vnode *) 0)
496 		panic("brelvp: NULL");
497 	/*
498 	 * Delete from old vnode list, if on one.
499 	 */
500 	if (bp->b_vnbufs.le_next != NOLIST)
501 		bufremvn(bp);
502 	vp = bp->b_vp;
503 	bp->b_vp = (struct vnode *) 0;
504 	HOLDRELE(vp);
505 }
506 
507 /*
508  * Reassign a buffer from one vnode to another.
509  * Used to assign file specific control information
510  * (indirect blocks) to the vnode to which they belong.
511  */
512 void
513 reassignbuf(bp, newvp)
514 	register struct buf *bp;
515 	register struct vnode *newvp;
516 {
517 	register struct buflists *listheadp;
518 
519 	if (newvp == NULL) {
520 		printf("reassignbuf: NULL");
521 		return;
522 	}
523 	/*
524 	 * Delete from old vnode list, if on one.
525 	 */
526 	if (bp->b_vnbufs.le_next != NOLIST)
527 		bufremvn(bp);
528 	/*
529 	 * If dirty, put on list of dirty buffers;
530 	 * otherwise insert onto list of clean buffers.
531 	 */
532 	if (bp->b_flags & B_DELWRI)
533 		listheadp = &newvp->v_dirtyblkhd;
534 	else
535 		listheadp = &newvp->v_cleanblkhd;
536 	bufinsvn(bp, listheadp);
537 }
538 
539 /*
540  * Create a vnode for a block device.
541  * Used for root filesystem, argdev, and swap areas.
542  * Also used for memory file system special devices.
543  */
544 int
545 bdevvp(dev, vpp)
546 	dev_t dev;
547 	struct vnode **vpp;
548 {
549 	register struct vnode *vp;
550 	struct vnode *nvp;
551 	int error;
552 
553 	if (dev == NODEV) {
554 		*vpp = NULLVP;
555 		return (ENODEV);
556 	}
557 	error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
558 	if (error) {
559 		*vpp = NULLVP;
560 		return (error);
561 	}
562 	vp = nvp;
563 	vp->v_type = VBLK;
564 	if (nvp = checkalias(vp, dev, (struct mount *)0)) {
565 		vput(vp);
566 		vp = nvp;
567 	}
568 	*vpp = vp;
569 	return (0);
570 }
571 
572 /*
573  * Check to see if the new vnode represents a special device
574  * for which we already have a vnode (either because of
575  * bdevvp() or because of a different vnode representing
576  * the same block device). If such an alias exists, deallocate
577  * the existing contents and return the aliased vnode. The
578  * caller is responsible for filling it with its new contents.
579  */
580 struct vnode *
581 checkalias(nvp, nvp_rdev, mp)
582 	register struct vnode *nvp;
583 	dev_t nvp_rdev;
584 	struct mount *mp;
585 {
586 	struct proc *p = curproc;	/* XXX */
587 	struct vnode *vp;
588 	struct vnode **vpp;
589 
590 	if (nvp->v_type != VBLK && nvp->v_type != VCHR)
591 		return (NULLVP);
592 
593 	vpp = &speclisth[SPECHASH(nvp_rdev)];
594 loop:
595 	simple_lock(&spechash_slock);
596 	for (vp = *vpp; vp; vp = vp->v_specnext) {
597 		if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
598 			continue;
599 		/*
600 		 * Alias, but not in use, so flush it out.
601 		 */
602 		simple_lock(&vp->v_interlock);
603 		if (vp->v_usecount == 0) {
604 			simple_unlock(&spechash_slock);
605 			vgonel(vp, p);
606 			goto loop;
607 		}
608 		if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
609 			simple_unlock(&spechash_slock);
610 			goto loop;
611 		}
612 		break;
613 	}
614 	if (vp == NULL || vp->v_tag != VT_NON) {
615 		MALLOC(nvp->v_specinfo, struct specinfo *,
616 			sizeof(struct specinfo), M_VNODE, M_WAITOK);
617 		nvp->v_rdev = nvp_rdev;
618 		nvp->v_hashchain = vpp;
619 		nvp->v_specnext = *vpp;
620 		nvp->v_specflags = 0;
621 		simple_unlock(&spechash_slock);
622 		*vpp = nvp;
623 		if (vp != NULLVP) {
624 			nvp->v_flag |= VALIASED;
625 			vp->v_flag |= VALIASED;
626 			vput(vp);
627 		}
628 		return (NULLVP);
629 	}
630 	simple_unlock(&spechash_slock);
631 	VOP_UNLOCK(vp, 0, p);
632 	simple_lock(&vp->v_interlock);
633 	vclean(vp, 0, p);
634 	vp->v_op = nvp->v_op;
635 	vp->v_tag = nvp->v_tag;
636 	nvp->v_type = VNON;
637 	insmntque(vp, mp);
638 	return (vp);
639 }
640 
641 /*
642  * Grab a particular vnode from the free list, increment its
643  * reference count and lock it. The vnode lock bit is set the
644  * vnode is being eliminated in vgone. The process is awakened
645  * when the transition is completed, and an error returned to
646  * indicate that the vnode is no longer usable (possibly having
647  * been changed to a new file system type).
648  */
649 int
650 vget(vp, flags, p)
651 	struct vnode *vp;
652 	int flags;
653 	struct proc *p;
654 {
655 	int error;
656 
657 	/*
658 	 * If the vnode is in the process of being cleaned out for
659 	 * another use, we wait for the cleaning to finish and then
660 	 * return failure. Cleaning is determined by checking that
661 	 * the VXLOCK flag is set.
662 	 */
663 	if ((flags & LK_INTERLOCK) == 0)
664 		simple_lock(&vp->v_interlock);
665 	if (vp->v_flag & VXLOCK) {
666 		vp->v_flag |= VXWANT;
667 		simple_unlock(&vp->v_interlock);
668 		tsleep((caddr_t)vp, PINOD, "vget", 0);
669 		return (ENOENT);
670 	}
671 	if (vp->v_usecount == 0) {
672 		simple_lock(&vnode_free_list_slock);
673 		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
674 		simple_unlock(&vnode_free_list_slock);
675 	}
676 	vp->v_usecount++;
677 	if (flags & LK_TYPE_MASK) {
678 		if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
679 			vrele(vp);
680 		return (error);
681 	}
682 	simple_unlock(&vp->v_interlock);
683 	return (0);
684 }
685 
686 /*
687  * Stubs to use when there is no locking to be done on the underlying object.
688  *
689  * Getting a lock just clears the interlock if necessary.
690  */
691 int
692 vop_nolock(ap)
693 	struct vop_lock_args /* {
694 		struct vnode *a_vp;
695 		int a_flags;
696 		struct proc *a_p;
697 	} */ *ap;
698 {
699 	struct vnode *vp = ap->a_vp;
700 
701 	/*
702 	 * Since we are not using the lock manager, we must clear
703 	 * the interlock here.
704 	 */
705 	if (ap->a_flags & LK_INTERLOCK)
706 		simple_unlock(&vp->v_interlock);
707 	return (0);
708 }
709 
710 /*
711  * Unlock has nothing to do.
712  */
713 int
714 vop_nounlock(ap)
715 	struct vop_unlock_args /* {
716 		struct vnode *a_vp;
717 		int a_flags;
718 		struct proc *a_p;
719 	} */ *ap;
720 {
721 
722 	return (0);
723 }
724 
725 /*
726  * Nothing is ever locked.
727  */
728 int
729 vop_noislocked(ap)
730 	struct vop_islocked_args /* {
731 		struct vnode *a_vp;
732 	} */ *ap;
733 {
734 
735 	return (0);
736 }
737 
738 /*
739  * Vnode reference.
740  */
741 void
742 vref(vp)
743 	struct vnode *vp;
744 {
745 
746 	simple_lock(&vp->v_interlock);
747 	if (vp->v_usecount <= 0)
748 		panic("vref used where vget required");
749 	vp->v_usecount++;
750 	simple_unlock(&vp->v_interlock);
751 }
752 
753 /*
754  * vput(), just unlock and vrele()
755  */
756 void
757 vput(vp)
758 	struct vnode *vp;
759 {
760 	struct proc *p = curproc;	/* XXX */
761 
762 #ifdef DIGANOSTIC
763 	if (vp == NULL)
764 		panic("vput: null vp");
765 #endif
766 	simple_lock(&vp->v_interlock);
767 	vp->v_usecount--;
768 	if (vp->v_usecount > 0) {
769 		simple_unlock(&vp->v_interlock);
770 		VOP_UNLOCK(vp, 0, p);
771 		return;
772 	}
773 #ifdef DIAGNOSTIC
774 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
775 		vprint("vput: bad ref count", vp);
776 		panic("vput: ref cnt");
777 	}
778 #endif
779 	/*
780 	 * insert at tail of LRU list
781 	 */
782 	simple_lock(&vnode_free_list_slock);
783 	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
784 	simple_unlock(&vnode_free_list_slock);
785 	simple_unlock(&vp->v_interlock);
786 	VOP_INACTIVE(vp, p);
787 }
788 
789 /*
790  * Vnode release.
791  * If count drops to zero, call inactive routine and return to freelist.
792  */
793 void
794 vrele(vp)
795 	struct vnode *vp;
796 {
797 	struct proc *p = curproc;	/* XXX */
798 
799 #ifdef DIAGNOSTIC
800 	if (vp == NULL)
801 		panic("vrele: null vp");
802 #endif
803 	simple_lock(&vp->v_interlock);
804 	vp->v_usecount--;
805 	if (vp->v_usecount > 0) {
806 		simple_unlock(&vp->v_interlock);
807 		return;
808 	}
809 #ifdef DIAGNOSTIC
810 	if (vp->v_usecount < 0 || vp->v_writecount != 0) {
811 		vprint("vrele: bad ref count", vp);
812 		panic("vrele: ref cnt");
813 	}
814 #endif
815 	/*
816 	 * insert at tail of LRU list
817 	 */
818 	simple_lock(&vnode_free_list_slock);
819 	TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
820 	simple_unlock(&vnode_free_list_slock);
821 	if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0)
822 		VOP_INACTIVE(vp, p);
823 }
824 
825 #ifdef DIAGNOSTIC
826 /*
827  * Page or buffer structure gets a reference.
828  */
829 void
830 vhold(vp)
831 	register struct vnode *vp;
832 {
833 
834 	simple_lock(&vp->v_interlock);
835 	vp->v_holdcnt++;
836 	simple_unlock(&vp->v_interlock);
837 }
838 
839 /*
840  * Page or buffer structure frees a reference.
841  */
842 void
843 holdrele(vp)
844 	register struct vnode *vp;
845 {
846 
847 	simple_lock(&vp->v_interlock);
848 	if (vp->v_holdcnt <= 0)
849 		panic("holdrele: holdcnt");
850 	vp->v_holdcnt--;
851 	simple_unlock(&vp->v_interlock);
852 }
853 #endif /* DIAGNOSTIC */
854 
855 /*
856  * Remove any vnodes in the vnode table belonging to mount point mp.
857  *
858  * If MNT_NOFORCE is specified, there should not be any active ones,
859  * return error if any are found (nb: this is a user error, not a
860  * system error). If MNT_FORCE is specified, detach any active vnodes
861  * that are found.
862  */
863 #ifdef DIAGNOSTIC
864 int busyprt = 0;	/* print out busy vnodes */
865 struct ctldebug debug1 = { "busyprt", &busyprt };
866 #endif
867 
868 int
869 vflush(mp, skipvp, flags)
870 	struct mount *mp;
871 	struct vnode *skipvp;
872 	int flags;
873 {
874 	struct proc *p = curproc;	/* XXX */
875 	struct vnode *vp, *nvp;
876 	int busy = 0;
877 
878 	simple_lock(&mntvnode_slock);
879 loop:
880 	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
881 		if (vp->v_mount != mp)
882 			goto loop;
883 		nvp = vp->v_mntvnodes.le_next;
884 		/*
885 		 * Skip over a selected vnode.
886 		 */
887 		if (vp == skipvp)
888 			continue;
889 
890 		simple_lock(&vp->v_interlock);
891 		/*
892 		 * Skip over a vnodes marked VSYSTEM.
893 		 */
894 		if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
895 			simple_unlock(&vp->v_interlock);
896 			continue;
897 		}
898 		/*
899 		 * If WRITECLOSE is set, only flush out regular file
900 		 * vnodes open for writing.
901 		 */
902 		if ((flags & WRITECLOSE) &&
903 		    (vp->v_writecount == 0 || vp->v_type != VREG)) {
904 			simple_unlock(&vp->v_interlock);
905 			continue;
906 		}
907 		/*
908 		 * With v_usecount == 0, all we need to do is clear
909 		 * out the vnode data structures and we are done.
910 		 */
911 		if (vp->v_usecount == 0) {
912 			simple_unlock(&mntvnode_slock);
913 			vgonel(vp, p);
914 			simple_lock(&mntvnode_slock);
915 			continue;
916 		}
917 		/*
918 		 * If FORCECLOSE is set, forcibly close the vnode.
919 		 * For block or character devices, revert to an
920 		 * anonymous device. For all other files, just kill them.
921 		 */
922 		if (flags & FORCECLOSE) {
923 			simple_unlock(&mntvnode_slock);
924 			if (vp->v_type != VBLK && vp->v_type != VCHR) {
925 				vgonel(vp, p);
926 			} else {
927 				vclean(vp, 0, p);
928 				vp->v_op = spec_vnodeop_p;
929 				insmntque(vp, (struct mount *)0);
930 			}
931 			simple_lock(&mntvnode_slock);
932 			continue;
933 		}
934 #ifdef DIAGNOSTIC
935 		if (busyprt)
936 			vprint("vflush: busy vnode", vp);
937 #endif
938 		simple_unlock(&vp->v_interlock);
939 		busy++;
940 	}
941 	simple_unlock(&mntvnode_slock);
942 	if (busy)
943 		return (EBUSY);
944 	return (0);
945 }
946 
947 /*
948  * Disassociate the underlying file system from a vnode.
949  * The vnode interlock is held on entry.
950  */
951 static void
952 vclean(vp, flags, p)
953 	struct vnode *vp;
954 	int flags;
955 	struct proc *p;
956 {
957 	int active;
958 
959 	/*
960 	 * Check to see if the vnode is in use.
961 	 * If so we have to reference it before we clean it out
962 	 * so that its count cannot fall to zero and generate a
963 	 * race against ourselves to recycle it.
964 	 */
965 	if (active = vp->v_usecount)
966 		vp->v_usecount++;
967 	/*
968 	 * Prevent the vnode from being recycled or
969 	 * brought into use while we clean it out.
970 	 */
971 	if (vp->v_flag & VXLOCK)
972 		panic("vclean: deadlock");
973 	vp->v_flag |= VXLOCK;
974 	/*
975 	 * Even if the count is zero, the VOP_INACTIVE routine may still
976 	 * have the object locked while it cleans it out. The VOP_LOCK
977 	 * ensures that the VOP_INACTIVE routine is done with its work.
978 	 * For active vnodes, it ensures that no other activity can
979 	 * occur while the underlying object is being cleaned out.
980 	 */
981 	VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
982 	/*
983 	 * Clean out any buffers associated with the vnode.
984 	 */
985 	if (flags & DOCLOSE)
986 		vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
987 	/*
988 	 * If purging an active vnode, it must be closed and
989 	 * deactivated before being reclaimed. Note that the
990 	 * VOP_INACTIVE will unlock the vnode.
991 	 */
992 	if (active) {
993 		if (flags & DOCLOSE)
994 			VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
995 		VOP_INACTIVE(vp, p);
996 	} else {
997 		/*
998 		 * Any other processes trying to obtain this lock must first
999 		 * wait for VXLOCK to clear, then call the new lock operation.
1000 		 */
1001 		VOP_UNLOCK(vp, 0, p);
1002 	}
1003 	/*
1004 	 * Reclaim the vnode.
1005 	 */
1006 	if (VOP_RECLAIM(vp, p))
1007 		panic("vclean: cannot reclaim");
1008 	if (active)
1009 		vrele(vp);
1010 	cache_purge(vp);
1011 
1012 	/*
1013 	 * Done with purge, notify sleepers of the grim news.
1014 	 */
1015 	vp->v_op = dead_vnodeop_p;
1016 	vp->v_tag = VT_NON;
1017 	vp->v_flag &= ~VXLOCK;
1018 	if (vp->v_flag & VXWANT) {
1019 		vp->v_flag &= ~VXWANT;
1020 		wakeup((caddr_t)vp);
1021 	}
1022 }
1023 
1024 /*
1025  * Eliminate all activity associated with  the requested vnode
1026  * and with all vnodes aliased to the requested vnode.
1027  */
1028 int
1029 vop_revoke(ap)
1030 	struct vop_revoke_args /* {
1031 		struct vnode *a_vp;
1032 		int a_flags;
1033 	} */ *ap;
1034 {
1035 	struct vnode *vp, *vq;
1036 	struct proc *p = curproc;	/* XXX */
1037 
1038 #ifdef DIAGNOSTIC
1039 	if ((ap->a_flags & REVOKEALL) == 0)
1040 		panic("vop_revoke");
1041 #endif
1042 
1043 	vp = ap->a_vp;
1044 	simple_lock(&vp->v_interlock);
1045 
1046 	if (vp->v_flag & VALIASED) {
1047 		/*
1048 		 * If a vgone (or vclean) is already in progress,
1049 		 * wait until it is done and return.
1050 		 */
1051 		if (vp->v_flag & VXLOCK) {
1052 			vp->v_flag |= VXWANT;
1053 			simple_unlock(&vp->v_interlock);
1054 			tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1055 			return (0);
1056 		}
1057 		/*
1058 		 * Ensure that vp will not be vgone'd while we
1059 		 * are eliminating its aliases.
1060 		 */
1061 		vp->v_flag |= VXLOCK;
1062 		simple_unlock(&vp->v_interlock);
1063 		while (vp->v_flag & VALIASED) {
1064 			simple_lock(&spechash_slock);
1065 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1066 				if (vq->v_rdev != vp->v_rdev ||
1067 				    vq->v_type != vp->v_type || vp == vq)
1068 					continue;
1069 				simple_unlock(&spechash_slock);
1070 				vgone(vq);
1071 				break;
1072 			}
1073 			if (vq == NULLVP)
1074 				simple_unlock(&spechash_slock);
1075 		}
1076 		/*
1077 		 * Remove the lock so that vgone below will
1078 		 * really eliminate the vnode after which time
1079 		 * vgone will awaken any sleepers.
1080 		 */
1081 		simple_lock(&vp->v_interlock);
1082 		vp->v_flag &= ~VXLOCK;
1083 	}
1084 	vgonel(vp, p);
1085 	return (0);
1086 }
1087 
1088 /*
1089  * Recycle an unused vnode to the front of the free list.
1090  * Release the passed interlock if the vnode will be recycled.
1091  */
1092 int
1093 vrecycle(vp, inter_lkp, p)
1094 	struct vnode *vp;
1095 	struct simplelock *inter_lkp;
1096 	struct proc *p;
1097 {
1098 
1099 	simple_lock(&vp->v_interlock);
1100 	if (vp->v_usecount == 0) {
1101 		if (inter_lkp)
1102 			simple_unlock(inter_lkp);
1103 		vgonel(vp, p);
1104 		return (1);
1105 	}
1106 	simple_unlock(&vp->v_interlock);
1107 	return (0);
1108 }
1109 
1110 /*
1111  * Eliminate all activity associated with a vnode
1112  * in preparation for reuse.
1113  */
1114 void
1115 vgone(vp)
1116 	struct vnode *vp;
1117 {
1118 	struct proc *p = curproc;	/* XXX */
1119 
1120 	simple_lock(&vp->v_interlock);
1121 	vgonel(vp, p);
1122 }
1123 
1124 /*
1125  * vgone, with the vp interlock held.
1126  */
1127 void
1128 vgonel(vp, p)
1129 	struct vnode *vp;
1130 	struct proc *p;
1131 {
1132 	struct vnode *vq;
1133 	struct vnode *vx;
1134 
1135 	/*
1136 	 * If a vgone (or vclean) is already in progress,
1137 	 * wait until it is done and return.
1138 	 */
1139 	if (vp->v_flag & VXLOCK) {
1140 		vp->v_flag |= VXWANT;
1141 		simple_unlock(&vp->v_interlock);
1142 		tsleep((caddr_t)vp, PINOD, "vgone", 0);
1143 		return;
1144 	}
1145 	/*
1146 	 * Clean out the filesystem specific data.
1147 	 */
1148 	vclean(vp, DOCLOSE, p);
1149 	/*
1150 	 * Delete from old mount point vnode list, if on one.
1151 	 */
1152 	if (vp->v_mount != NULL)
1153 		insmntque(vp, (struct mount *)0);
1154 	/*
1155 	 * If special device, remove it from special device alias list
1156 	 * if it is on one.
1157 	 */
1158 	if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1159 		simple_lock(&spechash_slock);
1160 		if (*vp->v_hashchain == vp) {
1161 			*vp->v_hashchain = vp->v_specnext;
1162 		} else {
1163 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1164 				if (vq->v_specnext != vp)
1165 					continue;
1166 				vq->v_specnext = vp->v_specnext;
1167 				break;
1168 			}
1169 			if (vq == NULL)
1170 				panic("missing bdev");
1171 		}
1172 		if (vp->v_flag & VALIASED) {
1173 			vx = NULL;
1174 			for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1175 				if (vq->v_rdev != vp->v_rdev ||
1176 				    vq->v_type != vp->v_type)
1177 					continue;
1178 				if (vx)
1179 					break;
1180 				vx = vq;
1181 			}
1182 			if (vx == NULL)
1183 				panic("missing alias");
1184 			if (vq == NULL)
1185 				vx->v_flag &= ~VALIASED;
1186 			vp->v_flag &= ~VALIASED;
1187 		}
1188 		simple_unlock(&spechash_slock);
1189 		FREE(vp->v_specinfo, M_VNODE);
1190 		vp->v_specinfo = NULL;
1191 	}
1192 	/*
1193 	 * If it is on the freelist and not already at the head,
1194 	 * move it to the head of the list. The test of the back
1195 	 * pointer and the reference count of zero is because
1196 	 * it will be removed from the free list by getnewvnode,
1197 	 * but will not have its reference count incremented until
1198 	 * after calling vgone. If the reference count were
1199 	 * incremented first, vgone would (incorrectly) try to
1200 	 * close the previous instance of the underlying object.
1201 	 * So, the back pointer is explicitly set to `0xdeadb' in
1202 	 * getnewvnode after removing it from the freelist to ensure
1203 	 * that we do not try to move it here.
1204 	 */
1205 	if (vp->v_usecount == 0) {
1206 		simple_lock(&vnode_free_list_slock);
1207 		if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1208 		    vnode_free_list.tqh_first != vp) {
1209 			TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1210 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1211 		}
1212 		simple_unlock(&vnode_free_list_slock);
1213 	}
1214 	vp->v_type = VBAD;
1215 }
1216 
1217 /*
1218  * Lookup a vnode by device number.
1219  */
1220 int
1221 vfinddev(dev, type, vpp)
1222 	dev_t dev;
1223 	enum vtype type;
1224 	struct vnode **vpp;
1225 {
1226 	struct vnode *vp;
1227 	int rc = 0;
1228 
1229 	simple_lock(&spechash_slock);
1230 	for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1231 		if (dev != vp->v_rdev || type != vp->v_type)
1232 			continue;
1233 		*vpp = vp;
1234 		rc = 1;
1235 		break;
1236 	}
1237 	simple_unlock(&spechash_slock);
1238 	return (rc);
1239 }
1240 
1241 /*
1242  * Calculate the total number of references to a special device.
1243  */
1244 int
1245 vcount(vp)
1246 	struct vnode *vp;
1247 {
1248 	struct vnode *vq, *vnext;
1249 	int count;
1250 
1251 loop:
1252 	if ((vp->v_flag & VALIASED) == 0)
1253 		return (vp->v_usecount);
1254 	simple_lock(&spechash_slock);
1255 	for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1256 		vnext = vq->v_specnext;
1257 		if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1258 			continue;
1259 		/*
1260 		 * Alias, but not in use, so flush it out.
1261 		 */
1262 		if (vq->v_usecount == 0 && vq != vp) {
1263 			simple_unlock(&spechash_slock);
1264 			vgone(vq);
1265 			goto loop;
1266 		}
1267 		count += vq->v_usecount;
1268 	}
1269 	simple_unlock(&spechash_slock);
1270 	return (count);
1271 }
1272 
1273 /*
1274  * Print out a description of a vnode.
1275  */
1276 static char *typename[] =
1277    { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1278 
1279 void
1280 vprint(label, vp)
1281 	char *label;
1282 	register struct vnode *vp;
1283 {
1284 	char buf[64];
1285 
1286 	if (label != NULL)
1287 		printf("%s: ", label);
1288 	printf("type %s, usecount %d, writecount %d, refcount %d,",
1289 		typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1290 		vp->v_holdcnt);
1291 	buf[0] = '\0';
1292 	if (vp->v_flag & VROOT)
1293 		strcat(buf, "|VROOT");
1294 	if (vp->v_flag & VTEXT)
1295 		strcat(buf, "|VTEXT");
1296 	if (vp->v_flag & VSYSTEM)
1297 		strcat(buf, "|VSYSTEM");
1298 	if (vp->v_flag & VXLOCK)
1299 		strcat(buf, "|VXLOCK");
1300 	if (vp->v_flag & VXWANT)
1301 		strcat(buf, "|VXWANT");
1302 	if (vp->v_flag & VBWAIT)
1303 		strcat(buf, "|VBWAIT");
1304 	if (vp->v_flag & VALIASED)
1305 		strcat(buf, "|VALIASED");
1306 	if (buf[0] != '\0')
1307 		printf(" flags (%s)", &buf[1]);
1308 	if (vp->v_data == NULL) {
1309 		printf("\n");
1310 	} else {
1311 		printf("\n\t");
1312 		VOP_PRINT(vp);
1313 	}
1314 }
1315 
1316 #ifdef DEBUG
1317 /*
1318  * List all of the locked vnodes in the system.
1319  * Called when debugging the kernel.
1320  */
1321 void
1322 printlockedvnodes()
1323 {
1324 	struct proc *p = curproc;	/* XXX */
1325 	struct mount *mp, *nmp;
1326 	struct vnode *vp;
1327 
1328 	printf("Locked vnodes\n");
1329 	simple_lock(&mountlist_slock);
1330 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1331 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1332 			nmp = mp->mnt_list.cqe_next;
1333 			continue;
1334 		}
1335 		for (vp = mp->mnt_vnodelist.lh_first;
1336 		     vp != NULL;
1337 		     vp = vp->v_mntvnodes.le_next) {
1338 			if (VOP_ISLOCKED(vp))
1339 				vprint((char *)0, vp);
1340 		}
1341 		simple_lock(&mountlist_slock);
1342 		nmp = mp->mnt_list.cqe_next;
1343 		vfs_unbusy(mp, p);
1344 	}
1345 	simple_unlock(&mountlist_slock);
1346 }
1347 #endif
1348 
1349 /*
1350  * Top level filesystem related information gathering.
1351  */
1352 int
1353 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1354 	int *name;
1355 	u_int namelen;
1356 	void *oldp;
1357 	size_t *oldlenp;
1358 	void *newp;
1359 	size_t newlen;
1360 	struct proc *p;
1361 {
1362 	struct ctldebug *cdp;
1363 	struct vfsconf *vfsp;
1364 
1365 	/* all sysctl names at this level are at least name and field */
1366 	if (namelen < 2)
1367 		return (ENOTDIR);		/* overloaded */
1368 	if (name[0] != VFS_GENERIC) {
1369 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1370 			if (vfsp->vfc_typenum == name[0])
1371 				break;
1372 		if (vfsp == NULL)
1373 			return (EOPNOTSUPP);
1374 		return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1375 		    oldp, oldlenp, newp, newlen, p));
1376 	}
1377 	switch (name[1]) {
1378 	case VFS_MAXTYPENUM:
1379 		return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1380 	case VFS_CONF:
1381 		if (namelen < 3)
1382 			return (ENOTDIR);	/* overloaded */
1383 		for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1384 			if (vfsp->vfc_typenum == name[2])
1385 				break;
1386 		if (vfsp == NULL)
1387 			return (EOPNOTSUPP);
1388 		return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1389 		    sizeof(struct vfsconf)));
1390 	}
1391 	return (EOPNOTSUPP);
1392 }
1393 
1394 int kinfo_vdebug = 1;
1395 int kinfo_vgetfailed;
1396 #define KINFO_VNODESLOP	10
1397 /*
1398  * Dump vnode list (via sysctl).
1399  * Copyout address of vnode followed by vnode.
1400  */
1401 /* ARGSUSED */
1402 int
1403 sysctl_vnode(where, sizep, p)
1404 	char *where;
1405 	size_t *sizep;
1406 	struct proc *p;
1407 {
1408 	struct mount *mp, *nmp;
1409 	struct vnode *nvp, *vp;
1410 	char *bp = where, *savebp;
1411 	char *ewhere;
1412 	int error;
1413 
1414 #define VPTRSZ	sizeof (struct vnode *)
1415 #define VNODESZ	sizeof (struct vnode)
1416 	if (where == NULL) {
1417 		*sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1418 		return (0);
1419 	}
1420 	ewhere = where + *sizep;
1421 
1422 	simple_lock(&mountlist_slock);
1423 	for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1424 		if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1425 			nmp = mp->mnt_list.cqe_next;
1426 			continue;
1427 		}
1428 		savebp = bp;
1429 again:
1430 		simple_lock(&mntvnode_slock);
1431 		for (vp = mp->mnt_vnodelist.lh_first;
1432 		     vp != NULL;
1433 		     vp = nvp) {
1434 			/*
1435 			 * Check that the vp is still associated with
1436 			 * this filesystem.  RACE: could have been
1437 			 * recycled onto the same filesystem.
1438 			 */
1439 			if (vp->v_mount != mp) {
1440 				simple_unlock(&mntvnode_slock);
1441 				if (kinfo_vdebug)
1442 					printf("kinfo: vp changed\n");
1443 				bp = savebp;
1444 				goto again;
1445 			}
1446 			nvp = vp->v_mntvnodes.le_next;
1447 			if (bp + VPTRSZ + VNODESZ > ewhere) {
1448 				simple_unlock(&mntvnode_slock);
1449 				*sizep = bp - where;
1450 				return (ENOMEM);
1451 			}
1452 			simple_unlock(&mntvnode_slock);
1453 			if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1454 			   (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1455 				return (error);
1456 			bp += VPTRSZ + VNODESZ;
1457 			simple_lock(&mntvnode_slock);
1458 		}
1459 		simple_unlock(&mntvnode_slock);
1460 		simple_lock(&mountlist_slock);
1461 		nmp = mp->mnt_list.cqe_next;
1462 		vfs_unbusy(mp, p);
1463 	}
1464 	simple_unlock(&mountlist_slock);
1465 
1466 	*sizep = bp - where;
1467 	return (0);
1468 }
1469 
1470 /*
1471  * Check to see if a filesystem is mounted on a block device.
1472  */
1473 int
1474 vfs_mountedon(vp)
1475 	struct vnode *vp;
1476 {
1477 	struct vnode *vq;
1478 	int error = 0;
1479 
1480 	if (vp->v_specflags & SI_MOUNTEDON)
1481 		return (EBUSY);
1482 	if (vp->v_flag & VALIASED) {
1483 		simple_lock(&spechash_slock);
1484 		for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1485 			if (vq->v_rdev != vp->v_rdev ||
1486 			    vq->v_type != vp->v_type)
1487 				continue;
1488 			if (vq->v_specflags & SI_MOUNTEDON) {
1489 				error = EBUSY;
1490 				break;
1491 			}
1492 		}
1493 		simple_unlock(&spechash_slock);
1494 	}
1495 	return (error);
1496 }
1497 
1498 /*
1499  * Unmount all filesystems. The list is traversed in reverse order
1500  * of mounting to avoid dependencies.
1501  */
1502 void
1503 vfs_unmountall()
1504 {
1505 	struct mount *mp, *nmp;
1506 	struct proc *p = curproc;	/* XXX */
1507 
1508 	/*
1509 	 * Since this only runs when rebooting, it is not interlocked.
1510 	 */
1511 	for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1512 		nmp = mp->mnt_list.cqe_prev;
1513 		(void) dounmount(mp, MNT_FORCE, p);
1514 	}
1515 }
1516 
1517 /*
1518  * Build hash lists of net addresses and hang them off the mount point.
1519  * Called by ufs_mount() to set up the lists of export addresses.
1520  */
1521 static int
1522 vfs_hang_addrlist(mp, nep, argp)
1523 	struct mount *mp;
1524 	struct netexport *nep;
1525 	struct export_args *argp;
1526 {
1527 	register struct netcred *np;
1528 	register struct radix_node_head *rnh;
1529 	register int i;
1530 	struct radix_node *rn;
1531 	struct sockaddr *saddr, *smask = 0;
1532 	struct domain *dom;
1533 	int error;
1534 
1535 	if (argp->ex_addrlen == 0) {
1536 		if (mp->mnt_flag & MNT_DEFEXPORTED)
1537 			return (EPERM);
1538 		np = &nep->ne_defexported;
1539 		np->netc_exflags = argp->ex_flags;
1540 		np->netc_anon = argp->ex_anon;
1541 		np->netc_anon.cr_ref = 1;
1542 		mp->mnt_flag |= MNT_DEFEXPORTED;
1543 		return (0);
1544 	}
1545 	i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1546 	np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1547 	bzero((caddr_t)np, i);
1548 	saddr = (struct sockaddr *)(np + 1);
1549 	if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
1550 		goto out;
1551 	if (saddr->sa_len > argp->ex_addrlen)
1552 		saddr->sa_len = argp->ex_addrlen;
1553 	if (argp->ex_masklen) {
1554 		smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1555 		error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
1556 		if (error)
1557 			goto out;
1558 		if (smask->sa_len > argp->ex_masklen)
1559 			smask->sa_len = argp->ex_masklen;
1560 	}
1561 	i = saddr->sa_family;
1562 	if ((rnh = nep->ne_rtable[i]) == 0) {
1563 		/*
1564 		 * Seems silly to initialize every AF when most are not
1565 		 * used, do so on demand here
1566 		 */
1567 		for (dom = domains; dom; dom = dom->dom_next)
1568 			if (dom->dom_family == i && dom->dom_rtattach) {
1569 				dom->dom_rtattach((void **)&nep->ne_rtable[i],
1570 					dom->dom_rtoffset);
1571 				break;
1572 			}
1573 		if ((rnh = nep->ne_rtable[i]) == 0) {
1574 			error = ENOBUFS;
1575 			goto out;
1576 		}
1577 	}
1578 	rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1579 		np->netc_rnodes);
1580 	if (rn == 0) {
1581 		/*
1582 		 * One of the reasons that rnh_addaddr may fail is that
1583 		 * the entry already exists. To check for this case, we
1584 		 * look up the entry to see if it is there. If so, we
1585 		 * do not need to make a new entry but do return success.
1586 		 */
1587 		free(np, M_NETADDR);
1588 		rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
1589 		if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
1590 		    ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
1591 		    !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
1592 			    (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
1593 			return (0);
1594 		return (EPERM);
1595 	}
1596 	np->netc_exflags = argp->ex_flags;
1597 	np->netc_anon = argp->ex_anon;
1598 	np->netc_anon.cr_ref = 1;
1599 	return (0);
1600 out:
1601 	free(np, M_NETADDR);
1602 	return (error);
1603 }
1604 
1605 /* ARGSUSED */
1606 static int
1607 vfs_free_netcred(rn, w)
1608 	struct radix_node *rn;
1609 	caddr_t w;
1610 {
1611 	register struct radix_node_head *rnh = (struct radix_node_head *)w;
1612 
1613 	(*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1614 	free((caddr_t)rn, M_NETADDR);
1615 	return (0);
1616 }
1617 
1618 /*
1619  * Free the net address hash lists that are hanging off the mount points.
1620  */
1621 static void
1622 vfs_free_addrlist(nep)
1623 	struct netexport *nep;
1624 {
1625 	register int i;
1626 	register struct radix_node_head *rnh;
1627 
1628 	for (i = 0; i <= AF_MAX; i++)
1629 		if (rnh = nep->ne_rtable[i]) {
1630 			(*rnh->rnh_walktree)(rnh, vfs_free_netcred,
1631 			    (caddr_t)rnh);
1632 			free((caddr_t)rnh, M_RTABLE);
1633 			nep->ne_rtable[i] = 0;
1634 		}
1635 }
1636 
1637 int
1638 vfs_export(mp, nep, argp)
1639 	struct mount *mp;
1640 	struct netexport *nep;
1641 	struct export_args *argp;
1642 {
1643 	int error;
1644 
1645 	if (argp->ex_flags & MNT_DELEXPORT) {
1646 		vfs_free_addrlist(nep);
1647 		mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1648 	}
1649 	if (argp->ex_flags & MNT_EXPORTED) {
1650 		if (error = vfs_hang_addrlist(mp, nep, argp))
1651 			return (error);
1652 		mp->mnt_flag |= MNT_EXPORTED;
1653 	}
1654 	return (0);
1655 }
1656 
1657 struct netcred *
1658 vfs_export_lookup(mp, nep, nam)
1659 	register struct mount *mp;
1660 	struct netexport *nep;
1661 	struct mbuf *nam;
1662 {
1663 	register struct netcred *np;
1664 	register struct radix_node_head *rnh;
1665 	struct sockaddr *saddr;
1666 
1667 	np = NULL;
1668 	if (mp->mnt_flag & MNT_EXPORTED) {
1669 		/*
1670 		 * Lookup in the export list first.
1671 		 */
1672 		if (nam != NULL) {
1673 			saddr = mtod(nam, struct sockaddr *);
1674 			rnh = nep->ne_rtable[saddr->sa_family];
1675 			if (rnh != NULL) {
1676 				np = (struct netcred *)
1677 					(*rnh->rnh_matchaddr)((caddr_t)saddr,
1678 							      rnh);
1679 				if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1680 					np = NULL;
1681 			}
1682 		}
1683 		/*
1684 		 * If no address match, use the default if it exists.
1685 		 */
1686 		if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1687 			np = &nep->ne_defexported;
1688 	}
1689 	return (np);
1690 }
1691