1 /*
2 * Copyright (c) 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * %sccs.include.redist.c%
11 *
12 * @(#)vfs_subr.c 8.31 (Berkeley) 05/26/95
13 */
14
15 /*
16 * External virtual filesystem routines
17 */
18
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/proc.h>
22 #include <sys/mount.h>
23 #include <sys/time.h>
24 #include <sys/vnode.h>
25 #include <sys/stat.h>
26 #include <sys/namei.h>
27 #include <sys/ucred.h>
28 #include <sys/buf.h>
29 #include <sys/errno.h>
30 #include <sys/malloc.h>
31 #include <sys/domain.h>
32 #include <sys/mbuf.h>
33
34 #include <vm/vm.h>
35 #include <sys/sysctl.h>
36
37 #include <miscfs/specfs/specdev.h>
38
39 enum vtype iftovt_tab[16] = {
40 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
41 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
42 };
43 int vttoif_tab[9] = {
44 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
45 S_IFSOCK, S_IFIFO, S_IFMT,
46 };
47
48 /*
49 * Insq/Remq for the vnode usage lists.
50 */
51 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
52 #define bufremvn(bp) { \
53 LIST_REMOVE(bp, b_vnbufs); \
54 (bp)->b_vnbufs.le_next = NOLIST; \
55 }
56 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
57 struct mntlist mountlist; /* mounted filesystem list */
58 struct simplelock mountlist_slock;
59 static struct simplelock mntid_slock;
60 struct simplelock mntvnode_slock;
61 struct simplelock vnode_free_list_slock;
62 static struct simplelock spechash_slock;
63
64 /*
65 * Initialize the vnode management data structures.
66 */
67 void
vntblinit()68 vntblinit()
69 {
70
71 simple_lock_init(&mntvnode_slock);
72 simple_lock_init(&mntid_slock);
73 simple_lock_init(&spechash_slock);
74 TAILQ_INIT(&vnode_free_list);
75 simple_lock_init(&vnode_free_list_slock);
76 CIRCLEQ_INIT(&mountlist);
77 }
78
79 /*
80 * Mark a mount point as busy. Used to synchronize access and to delay
81 * unmounting. Interlock is not released on failure.
82 */
83 int
vfs_busy(mp,flags,interlkp,p)84 vfs_busy(mp, flags, interlkp, p)
85 struct mount *mp;
86 int flags;
87 struct simplelock *interlkp;
88 struct proc *p;
89 {
90 int lkflags;
91
92 if (mp->mnt_flag & MNT_UNMOUNT) {
93 if (flags & LK_NOWAIT)
94 return (ENOENT);
95 mp->mnt_flag |= MNT_MWAIT;
96 if (interlkp)
97 simple_unlock(interlkp);
98 /*
99 * Since all busy locks are shared except the exclusive
100 * lock granted when unmounting, the only place that a
101 * wakeup needs to be done is at the release of the
102 * exclusive lock at the end of dounmount.
103 */
104 sleep((caddr_t)mp, PVFS);
105 if (interlkp)
106 simple_lock(interlkp);
107 return (ENOENT);
108 }
109 lkflags = LK_SHARED;
110 if (interlkp)
111 lkflags |= LK_INTERLOCK;
112 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p))
113 panic("vfs_busy: unexpected lock failure");
114 return (0);
115 }
116
117 /*
118 * Free a busy filesystem.
119 */
120 void
vfs_unbusy(mp,p)121 vfs_unbusy(mp, p)
122 struct mount *mp;
123 struct proc *p;
124 {
125
126 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p);
127 }
128
129 /*
130 * Lookup a filesystem type, and if found allocate and initialize
131 * a mount structure for it.
132 *
133 * Devname is usually updated by mount(8) after booting.
134 */
135 int
vfs_rootmountalloc(fstypename,devname,mpp)136 vfs_rootmountalloc(fstypename, devname, mpp)
137 char *fstypename;
138 char *devname;
139 struct mount **mpp;
140 {
141 struct proc *p = curproc; /* XXX */
142 struct vfsconf *vfsp;
143 struct mount *mp;
144
145 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
146 if (!strcmp(vfsp->vfc_name, fstypename))
147 break;
148 if (vfsp == NULL)
149 return (ENODEV);
150 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
151 bzero((char *)mp, (u_long)sizeof(struct mount));
152 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0);
153 (void)vfs_busy(mp, LK_NOWAIT, 0, p);
154 LIST_INIT(&mp->mnt_vnodelist);
155 mp->mnt_vfc = vfsp;
156 mp->mnt_op = vfsp->vfc_vfsops;
157 mp->mnt_flag = MNT_RDONLY;
158 mp->mnt_vnodecovered = NULLVP;
159 vfsp->vfc_refcount++;
160 mp->mnt_stat.f_type = vfsp->vfc_typenum;
161 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK;
162 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN);
163 mp->mnt_stat.f_mntonname[0] = '/';
164 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0);
165 *mpp = mp;
166 return (0);
167 }
168
169 /*
170 * Find an appropriate filesystem to use for the root. If a filesystem
171 * has not been preselected, walk through the list of known filesystems
172 * trying those that have mountroot routines, and try them until one
173 * works or we have tried them all.
174 */
175 int
vfs_mountroot()176 vfs_mountroot()
177 {
178 struct vfsconf *vfsp;
179 extern int (*mountroot)(void);
180 int error;
181
182 if (mountroot != NULL)
183 return ((*mountroot)());
184 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) {
185 if (vfsp->vfc_mountroot == NULL)
186 continue;
187 if ((error = (*vfsp->vfc_mountroot)()) == 0)
188 return (0);
189 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error);
190 }
191 return (ENODEV);
192 }
193
194 /*
195 * Lookup a mount point by filesystem identifier.
196 */
197 struct mount *
vfs_getvfs(fsid)198 vfs_getvfs(fsid)
199 fsid_t *fsid;
200 {
201 register struct mount *mp;
202
203 simple_lock(&mountlist_slock);
204 for (mp = mountlist.cqh_first; mp != (void *)&mountlist;
205 mp = mp->mnt_list.cqe_next) {
206 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
207 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) {
208 simple_unlock(&mountlist_slock);
209 return (mp);
210 }
211 }
212 simple_unlock(&mountlist_slock);
213 return ((struct mount *)0);
214 }
215
216 /*
217 * Get a new unique fsid
218 */
219 void
vfs_getnewfsid(mp)220 vfs_getnewfsid(mp)
221 struct mount *mp;
222 {
223 static u_short xxxfs_mntid;
224
225 fsid_t tfsid;
226 int mtype;
227
228 simple_lock(&mntid_slock);
229 mtype = mp->mnt_vfc->vfc_typenum;
230 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
231 mp->mnt_stat.f_fsid.val[1] = mtype;
232 if (xxxfs_mntid == 0)
233 ++xxxfs_mntid;
234 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
235 tfsid.val[1] = mtype;
236 if (mountlist.cqh_first != (void *)&mountlist) {
237 while (vfs_getvfs(&tfsid)) {
238 tfsid.val[0]++;
239 xxxfs_mntid++;
240 }
241 }
242 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
243 simple_unlock(&mntid_slock);
244 }
245
246 /*
247 * Set vnode attributes to VNOVAL
248 */
249 void
vattr_null(vap)250 vattr_null(vap)
251 register struct vattr *vap;
252 {
253
254 vap->va_type = VNON;
255 vap->va_size = vap->va_bytes = VNOVAL;
256 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
257 vap->va_fsid = vap->va_fileid =
258 vap->va_blocksize = vap->va_rdev =
259 vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
260 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
261 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
262 vap->va_flags = vap->va_gen = VNOVAL;
263 vap->va_vaflags = 0;
264 }
265
266 /*
267 * Routines having to do with the management of the vnode table.
268 */
269 extern int (**dead_vnodeop_p)();
270 static void vclean __P((struct vnode *vp, int flag, struct proc *p));
271 extern void vgonel __P((struct vnode *vp, struct proc *p));
272 long numvnodes;
273 extern struct vattr va_null;
274
275 /*
276 * Return the next vnode from the free list.
277 */
278 int
getnewvnode(tag,mp,vops,vpp)279 getnewvnode(tag, mp, vops, vpp)
280 enum vtagtype tag;
281 struct mount *mp;
282 int (**vops)();
283 struct vnode **vpp;
284 {
285 struct proc *p = curproc; /* XXX */
286 struct vnode *vp;
287 int s;
288 int cnt;
289
290 top:
291 simple_lock(&vnode_free_list_slock);
292 if ((vnode_free_list.tqh_first == NULL &&
293 numvnodes < 2 * desiredvnodes) ||
294 numvnodes < desiredvnodes) {
295 simple_unlock(&vnode_free_list_slock);
296 vp = (struct vnode *)malloc((u_long)sizeof *vp,
297 M_VNODE, M_WAITOK);
298 bzero((char *)vp, sizeof *vp);
299 numvnodes++;
300 } else {
301 for (vp = vnode_free_list.tqh_first;
302 vp != NULLVP; vp = vp->v_freelist.tqe_next) {
303 if (simple_lock_try(&vp->v_interlock))
304 break;
305 }
306 /*
307 * Unless this is a bad time of the month, at most
308 * the first NCPUS items on the free list are
309 * locked, so this is close enough to being empty.
310 */
311 if (vp == NULLVP) {
312 simple_unlock(&vnode_free_list_slock);
313 tablefull("vnode");
314 *vpp = 0;
315 return (ENFILE);
316 }
317 if (vp->v_usecount)
318 panic("free vnode isn't");
319 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
320 /* see comment on why 0xdeadb is set at end of vgone (below) */
321 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
322 simple_unlock(&vnode_free_list_slock);
323 vp->v_lease = NULL;
324 if (vp->v_type != VBAD)
325 vgonel(vp, p);
326 else
327 simple_unlock(&vp->v_interlock);
328 #ifdef DIAGNOSTIC
329 if (vp->v_data)
330 panic("cleaned vnode isn't");
331 s = splbio();
332 if (vp->v_numoutput)
333 panic("Clean vnode has pending I/O's");
334 splx(s);
335 #endif
336 vp->v_flag = 0;
337 vp->v_lastr = 0;
338 vp->v_ralen = 0;
339 vp->v_maxra = 0;
340 vp->v_lastw = 0;
341 vp->v_lasta = 0;
342 vp->v_cstart = 0;
343 vp->v_clen = 0;
344 vp->v_socket = 0;
345 }
346 vp->v_type = VNON;
347 cache_purge(vp);
348 vp->v_tag = tag;
349 vp->v_op = vops;
350 insmntque(vp, mp);
351 *vpp = vp;
352 vp->v_usecount = 1;
353 vp->v_data = 0;
354 return (0);
355 }
356
357 /*
358 * Move a vnode from one mount queue to another.
359 */
360 void
insmntque(vp,mp)361 insmntque(vp, mp)
362 struct vnode *vp;
363 struct mount *mp;
364 {
365
366 simple_lock(&mntvnode_slock);
367 /*
368 * Delete from old mount point vnode list, if on one.
369 */
370 if (vp->v_mount != NULL)
371 LIST_REMOVE(vp, v_mntvnodes);
372 /*
373 * Insert into list of vnodes for the new mount point, if available.
374 */
375 if ((vp->v_mount = mp) != NULL)
376 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
377 simple_unlock(&mntvnode_slock);
378 }
379
380 /*
381 * Update outstanding I/O count and do wakeup if requested.
382 */
383 void
vwakeup(bp)384 vwakeup(bp)
385 register struct buf *bp;
386 {
387 register struct vnode *vp;
388
389 bp->b_flags &= ~B_WRITEINPROG;
390 if (vp = bp->b_vp) {
391 if (--vp->v_numoutput < 0)
392 panic("vwakeup: neg numoutput");
393 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
394 if (vp->v_numoutput < 0)
395 panic("vwakeup: neg numoutput 2");
396 vp->v_flag &= ~VBWAIT;
397 wakeup((caddr_t)&vp->v_numoutput);
398 }
399 }
400 }
401
402 /*
403 * Flush out and invalidate all buffers associated with a vnode.
404 * Called with the underlying object locked.
405 */
406 int
vinvalbuf(vp,flags,cred,p,slpflag,slptimeo)407 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
408 register struct vnode *vp;
409 int flags;
410 struct ucred *cred;
411 struct proc *p;
412 int slpflag, slptimeo;
413 {
414 register struct buf *bp;
415 struct buf *nbp, *blist;
416 int s, error;
417
418 if (flags & V_SAVE) {
419 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
420 return (error);
421 if (vp->v_dirtyblkhd.lh_first != NULL)
422 panic("vinvalbuf: dirty bufs");
423 }
424 for (;;) {
425 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
426 while (blist && blist->b_lblkno < 0)
427 blist = blist->b_vnbufs.le_next;
428 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
429 (flags & V_SAVEMETA))
430 while (blist && blist->b_lblkno < 0)
431 blist = blist->b_vnbufs.le_next;
432 if (!blist)
433 break;
434
435 for (bp = blist; bp; bp = nbp) {
436 nbp = bp->b_vnbufs.le_next;
437 if (flags & V_SAVEMETA && bp->b_lblkno < 0)
438 continue;
439 s = splbio();
440 if (bp->b_flags & B_BUSY) {
441 bp->b_flags |= B_WANTED;
442 error = tsleep((caddr_t)bp,
443 slpflag | (PRIBIO + 1), "vinvalbuf",
444 slptimeo);
445 splx(s);
446 if (error)
447 return (error);
448 break;
449 }
450 bremfree(bp);
451 bp->b_flags |= B_BUSY;
452 splx(s);
453 /*
454 * XXX Since there are no node locks for NFS, I believe
455 * there is a slight chance that a delayed write will
456 * occur while sleeping just above, so check for it.
457 */
458 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
459 (void) VOP_BWRITE(bp);
460 break;
461 }
462 bp->b_flags |= B_INVAL;
463 brelse(bp);
464 }
465 }
466 if (!(flags & V_SAVEMETA) &&
467 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
468 panic("vinvalbuf: flush failed");
469 return (0);
470 }
471
472 /*
473 * Associate a buffer with a vnode.
474 */
475 void
bgetvp(vp,bp)476 bgetvp(vp, bp)
477 register struct vnode *vp;
478 register struct buf *bp;
479 {
480
481 if (bp->b_vp)
482 panic("bgetvp: not free");
483 VHOLD(vp);
484 bp->b_vp = vp;
485 if (vp->v_type == VBLK || vp->v_type == VCHR)
486 bp->b_dev = vp->v_rdev;
487 else
488 bp->b_dev = NODEV;
489 /*
490 * Insert onto list for new vnode.
491 */
492 bufinsvn(bp, &vp->v_cleanblkhd);
493 }
494
495 /*
496 * Disassociate a buffer from a vnode.
497 */
498 void
brelvp(bp)499 brelvp(bp)
500 register struct buf *bp;
501 {
502 struct vnode *vp;
503
504 if (bp->b_vp == (struct vnode *) 0)
505 panic("brelvp: NULL");
506 /*
507 * Delete from old vnode list, if on one.
508 */
509 if (bp->b_vnbufs.le_next != NOLIST)
510 bufremvn(bp);
511 vp = bp->b_vp;
512 bp->b_vp = (struct vnode *) 0;
513 HOLDRELE(vp);
514 }
515
516 /*
517 * Reassign a buffer from one vnode to another.
518 * Used to assign file specific control information
519 * (indirect blocks) to the vnode to which they belong.
520 */
521 void
reassignbuf(bp,newvp)522 reassignbuf(bp, newvp)
523 register struct buf *bp;
524 register struct vnode *newvp;
525 {
526 register struct buflists *listheadp;
527
528 if (newvp == NULL) {
529 printf("reassignbuf: NULL");
530 return;
531 }
532 /*
533 * Delete from old vnode list, if on one.
534 */
535 if (bp->b_vnbufs.le_next != NOLIST)
536 bufremvn(bp);
537 /*
538 * If dirty, put on list of dirty buffers;
539 * otherwise insert onto list of clean buffers.
540 */
541 if (bp->b_flags & B_DELWRI)
542 listheadp = &newvp->v_dirtyblkhd;
543 else
544 listheadp = &newvp->v_cleanblkhd;
545 bufinsvn(bp, listheadp);
546 }
547
548 /*
549 * Create a vnode for a block device.
550 * Used for root filesystem, argdev, and swap areas.
551 * Also used for memory file system special devices.
552 */
553 int
bdevvp(dev,vpp)554 bdevvp(dev, vpp)
555 dev_t dev;
556 struct vnode **vpp;
557 {
558 register struct vnode *vp;
559 struct vnode *nvp;
560 int error;
561
562 if (dev == NODEV) {
563 *vpp = NULLVP;
564 return (ENODEV);
565 }
566 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
567 if (error) {
568 *vpp = NULLVP;
569 return (error);
570 }
571 vp = nvp;
572 vp->v_type = VBLK;
573 if (nvp = checkalias(vp, dev, (struct mount *)0)) {
574 vput(vp);
575 vp = nvp;
576 }
577 *vpp = vp;
578 return (0);
579 }
580
581 /*
582 * Check to see if the new vnode represents a special device
583 * for which we already have a vnode (either because of
584 * bdevvp() or because of a different vnode representing
585 * the same block device). If such an alias exists, deallocate
586 * the existing contents and return the aliased vnode. The
587 * caller is responsible for filling it with its new contents.
588 */
589 struct vnode *
checkalias(nvp,nvp_rdev,mp)590 checkalias(nvp, nvp_rdev, mp)
591 register struct vnode *nvp;
592 dev_t nvp_rdev;
593 struct mount *mp;
594 {
595 struct proc *p = curproc; /* XXX */
596 struct vnode *vp;
597 struct vnode **vpp;
598
599 if (nvp->v_type != VBLK && nvp->v_type != VCHR)
600 return (NULLVP);
601
602 vpp = &speclisth[SPECHASH(nvp_rdev)];
603 loop:
604 simple_lock(&spechash_slock);
605 for (vp = *vpp; vp; vp = vp->v_specnext) {
606 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
607 continue;
608 /*
609 * Alias, but not in use, so flush it out.
610 */
611 simple_lock(&vp->v_interlock);
612 if (vp->v_usecount == 0) {
613 simple_unlock(&spechash_slock);
614 vgonel(vp, p);
615 goto loop;
616 }
617 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) {
618 simple_unlock(&spechash_slock);
619 goto loop;
620 }
621 break;
622 }
623 if (vp == NULL || vp->v_tag != VT_NON) {
624 MALLOC(nvp->v_specinfo, struct specinfo *,
625 sizeof(struct specinfo), M_VNODE, M_WAITOK);
626 nvp->v_rdev = nvp_rdev;
627 nvp->v_hashchain = vpp;
628 nvp->v_specnext = *vpp;
629 nvp->v_specflags = 0;
630 simple_unlock(&spechash_slock);
631 *vpp = nvp;
632 if (vp != NULLVP) {
633 nvp->v_flag |= VALIASED;
634 vp->v_flag |= VALIASED;
635 vput(vp);
636 }
637 return (NULLVP);
638 }
639 simple_unlock(&spechash_slock);
640 VOP_UNLOCK(vp, 0, p);
641 simple_lock(&vp->v_interlock);
642 vclean(vp, 0, p);
643 vp->v_op = nvp->v_op;
644 vp->v_tag = nvp->v_tag;
645 nvp->v_type = VNON;
646 insmntque(vp, mp);
647 return (vp);
648 }
649
650 /*
651 * Grab a particular vnode from the free list, increment its
652 * reference count and lock it. The vnode lock bit is set the
653 * vnode is being eliminated in vgone. The process is awakened
654 * when the transition is completed, and an error returned to
655 * indicate that the vnode is no longer usable (possibly having
656 * been changed to a new file system type).
657 */
658 int
vget(vp,flags,p)659 vget(vp, flags, p)
660 struct vnode *vp;
661 int flags;
662 struct proc *p;
663 {
664 int error;
665
666 /*
667 * If the vnode is in the process of being cleaned out for
668 * another use, we wait for the cleaning to finish and then
669 * return failure. Cleaning is determined by checking that
670 * the VXLOCK flag is set.
671 */
672 if ((flags & LK_INTERLOCK) == 0)
673 simple_lock(&vp->v_interlock);
674 if (vp->v_flag & VXLOCK) {
675 vp->v_flag |= VXWANT;
676 simple_unlock(&vp->v_interlock);
677 tsleep((caddr_t)vp, PINOD, "vget", 0);
678 return (ENOENT);
679 }
680 if (vp->v_usecount == 0) {
681 simple_lock(&vnode_free_list_slock);
682 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
683 simple_unlock(&vnode_free_list_slock);
684 }
685 vp->v_usecount++;
686 if (flags & LK_TYPE_MASK) {
687 if (error = vn_lock(vp, flags | LK_INTERLOCK, p))
688 vrele(vp);
689 return (error);
690 }
691 simple_unlock(&vp->v_interlock);
692 return (0);
693 }
694
695 /*
696 * Stubs to use when there is no locking to be done on the underlying object.
697 * A minimal shared lock is necessary to ensure that the underlying object
698 * is not revoked while an operation is in progress. So, an active shared
699 * count is maintained in an auxillary vnode lock structure.
700 */
701 int
vop_nolock(ap)702 vop_nolock(ap)
703 struct vop_lock_args /* {
704 struct vnode *a_vp;
705 int a_flags;
706 struct proc *a_p;
707 } */ *ap;
708 {
709 #ifdef notyet
710 /*
711 * This code cannot be used until all the non-locking filesystems
712 * (notably NFS) are converted to properly lock and release nodes.
713 * Also, certain vnode operations change the locking state within
714 * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
715 * and symlink). Ideally these operations should not change the
716 * lock state, but should be changed to let the caller of the
717 * function unlock them. Otherwise all intermediate vnode layers
718 * (such as union, umapfs, etc) must catch these functions to do
719 * the necessary locking at their layer. Note that the inactive
720 * and lookup operations also change their lock state, but this
721 * cannot be avoided, so these two operations will always need
722 * to be handled in intermediate layers.
723 */
724 struct vnode *vp = ap->a_vp;
725 int vnflags, flags = ap->a_flags;
726
727 if (vp->v_vnlock == NULL) {
728 if ((flags & LK_TYPE_MASK) == LK_DRAIN)
729 return (0);
730 MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock),
731 M_VNODE, M_WAITOK);
732 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0);
733 }
734 switch (flags & LK_TYPE_MASK) {
735 case LK_DRAIN:
736 vnflags = LK_DRAIN;
737 break;
738 case LK_EXCLUSIVE:
739 case LK_SHARED:
740 vnflags = LK_SHARED;
741 break;
742 case LK_UPGRADE:
743 case LK_EXCLUPGRADE:
744 case LK_DOWNGRADE:
745 return (0);
746 case LK_RELEASE:
747 default:
748 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK);
749 }
750 if (flags & LK_INTERLOCK)
751 vnflags |= LK_INTERLOCK;
752 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p));
753 #else /* for now */
754 /*
755 * Since we are not using the lock manager, we must clear
756 * the interlock here.
757 */
758 if (ap->a_flags & LK_INTERLOCK)
759 simple_unlock(&ap->a_vp->v_interlock);
760 return (0);
761 #endif
762 }
763
764 /*
765 * Decrement the active use count.
766 */
767 int
vop_nounlock(ap)768 vop_nounlock(ap)
769 struct vop_unlock_args /* {
770 struct vnode *a_vp;
771 int a_flags;
772 struct proc *a_p;
773 } */ *ap;
774 {
775 struct vnode *vp = ap->a_vp;
776
777 if (vp->v_vnlock == NULL)
778 return (0);
779 return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p));
780 }
781
782 /*
783 * Return whether or not the node is in use.
784 */
785 int
vop_noislocked(ap)786 vop_noislocked(ap)
787 struct vop_islocked_args /* {
788 struct vnode *a_vp;
789 } */ *ap;
790 {
791 struct vnode *vp = ap->a_vp;
792
793 if (vp->v_vnlock == NULL)
794 return (0);
795 return (lockstatus(vp->v_vnlock));
796 }
797
798 /*
799 * Vnode reference.
800 */
801 void
vref(vp)802 vref(vp)
803 struct vnode *vp;
804 {
805
806 simple_lock(&vp->v_interlock);
807 if (vp->v_usecount <= 0)
808 panic("vref used where vget required");
809 vp->v_usecount++;
810 simple_unlock(&vp->v_interlock);
811 }
812
813 /*
814 * vput(), just unlock and vrele()
815 */
816 void
vput(vp)817 vput(vp)
818 struct vnode *vp;
819 {
820 struct proc *p = curproc; /* XXX */
821
822 #ifdef DIGANOSTIC
823 if (vp == NULL)
824 panic("vput: null vp");
825 #endif
826 simple_lock(&vp->v_interlock);
827 vp->v_usecount--;
828 if (vp->v_usecount > 0) {
829 simple_unlock(&vp->v_interlock);
830 VOP_UNLOCK(vp, 0, p);
831 return;
832 }
833 #ifdef DIAGNOSTIC
834 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
835 vprint("vput: bad ref count", vp);
836 panic("vput: ref cnt");
837 }
838 #endif
839 /*
840 * insert at tail of LRU list
841 */
842 simple_lock(&vnode_free_list_slock);
843 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
844 simple_unlock(&vnode_free_list_slock);
845 simple_unlock(&vp->v_interlock);
846 VOP_INACTIVE(vp, p);
847 }
848
849 /*
850 * Vnode release.
851 * If count drops to zero, call inactive routine and return to freelist.
852 */
853 void
vrele(vp)854 vrele(vp)
855 struct vnode *vp;
856 {
857 struct proc *p = curproc; /* XXX */
858
859 #ifdef DIAGNOSTIC
860 if (vp == NULL)
861 panic("vrele: null vp");
862 #endif
863 simple_lock(&vp->v_interlock);
864 vp->v_usecount--;
865 if (vp->v_usecount > 0) {
866 simple_unlock(&vp->v_interlock);
867 return;
868 }
869 #ifdef DIAGNOSTIC
870 if (vp->v_usecount < 0 || vp->v_writecount != 0) {
871 vprint("vrele: bad ref count", vp);
872 panic("vrele: ref cnt");
873 }
874 #endif
875 /*
876 * insert at tail of LRU list
877 */
878 simple_lock(&vnode_free_list_slock);
879 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
880 simple_unlock(&vnode_free_list_slock);
881 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0)
882 VOP_INACTIVE(vp, p);
883 }
884
885 #ifdef DIAGNOSTIC
886 /*
887 * Page or buffer structure gets a reference.
888 */
889 void
vhold(vp)890 vhold(vp)
891 register struct vnode *vp;
892 {
893
894 simple_lock(&vp->v_interlock);
895 vp->v_holdcnt++;
896 simple_unlock(&vp->v_interlock);
897 }
898
899 /*
900 * Page or buffer structure frees a reference.
901 */
902 void
holdrele(vp)903 holdrele(vp)
904 register struct vnode *vp;
905 {
906
907 simple_lock(&vp->v_interlock);
908 if (vp->v_holdcnt <= 0)
909 panic("holdrele: holdcnt");
910 vp->v_holdcnt--;
911 simple_unlock(&vp->v_interlock);
912 }
913 #endif /* DIAGNOSTIC */
914
915 /*
916 * Remove any vnodes in the vnode table belonging to mount point mp.
917 *
918 * If MNT_NOFORCE is specified, there should not be any active ones,
919 * return error if any are found (nb: this is a user error, not a
920 * system error). If MNT_FORCE is specified, detach any active vnodes
921 * that are found.
922 */
923 #ifdef DIAGNOSTIC
924 int busyprt = 0; /* print out busy vnodes */
925 struct ctldebug debug1 = { "busyprt", &busyprt };
926 #endif
927
928 int
vflush(mp,skipvp,flags)929 vflush(mp, skipvp, flags)
930 struct mount *mp;
931 struct vnode *skipvp;
932 int flags;
933 {
934 struct proc *p = curproc; /* XXX */
935 struct vnode *vp, *nvp;
936 int busy = 0;
937
938 simple_lock(&mntvnode_slock);
939 loop:
940 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
941 if (vp->v_mount != mp)
942 goto loop;
943 nvp = vp->v_mntvnodes.le_next;
944 /*
945 * Skip over a selected vnode.
946 */
947 if (vp == skipvp)
948 continue;
949
950 simple_lock(&vp->v_interlock);
951 /*
952 * Skip over a vnodes marked VSYSTEM.
953 */
954 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) {
955 simple_unlock(&vp->v_interlock);
956 continue;
957 }
958 /*
959 * If WRITECLOSE is set, only flush out regular file
960 * vnodes open for writing.
961 */
962 if ((flags & WRITECLOSE) &&
963 (vp->v_writecount == 0 || vp->v_type != VREG)) {
964 simple_unlock(&vp->v_interlock);
965 continue;
966 }
967 /*
968 * With v_usecount == 0, all we need to do is clear
969 * out the vnode data structures and we are done.
970 */
971 if (vp->v_usecount == 0) {
972 simple_unlock(&mntvnode_slock);
973 vgonel(vp, p);
974 simple_lock(&mntvnode_slock);
975 continue;
976 }
977 /*
978 * If FORCECLOSE is set, forcibly close the vnode.
979 * For block or character devices, revert to an
980 * anonymous device. For all other files, just kill them.
981 */
982 if (flags & FORCECLOSE) {
983 simple_unlock(&mntvnode_slock);
984 if (vp->v_type != VBLK && vp->v_type != VCHR) {
985 vgonel(vp, p);
986 } else {
987 vclean(vp, 0, p);
988 vp->v_op = spec_vnodeop_p;
989 insmntque(vp, (struct mount *)0);
990 }
991 simple_lock(&mntvnode_slock);
992 continue;
993 }
994 #ifdef DIAGNOSTIC
995 if (busyprt)
996 vprint("vflush: busy vnode", vp);
997 #endif
998 simple_unlock(&vp->v_interlock);
999 busy++;
1000 }
1001 simple_unlock(&mntvnode_slock);
1002 if (busy)
1003 return (EBUSY);
1004 return (0);
1005 }
1006
1007 /*
1008 * Disassociate the underlying file system from a vnode.
1009 * The vnode interlock is held on entry.
1010 */
1011 static void
vclean(vp,flags,p)1012 vclean(vp, flags, p)
1013 struct vnode *vp;
1014 int flags;
1015 struct proc *p;
1016 {
1017 int active;
1018
1019 /*
1020 * Check to see if the vnode is in use.
1021 * If so we have to reference it before we clean it out
1022 * so that its count cannot fall to zero and generate a
1023 * race against ourselves to recycle it.
1024 */
1025 if (active = vp->v_usecount)
1026 vp->v_usecount++;
1027 /*
1028 * Prevent the vnode from being recycled or
1029 * brought into use while we clean it out.
1030 */
1031 if (vp->v_flag & VXLOCK)
1032 panic("vclean: deadlock");
1033 vp->v_flag |= VXLOCK;
1034 /*
1035 * Even if the count is zero, the VOP_INACTIVE routine may still
1036 * have the object locked while it cleans it out. The VOP_LOCK
1037 * ensures that the VOP_INACTIVE routine is done with its work.
1038 * For active vnodes, it ensures that no other activity can
1039 * occur while the underlying object is being cleaned out.
1040 */
1041 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p);
1042 /*
1043 * Clean out any buffers associated with the vnode.
1044 */
1045 if (flags & DOCLOSE)
1046 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0);
1047 /*
1048 * If purging an active vnode, it must be closed and
1049 * deactivated before being reclaimed. Note that the
1050 * VOP_INACTIVE will unlock the vnode.
1051 */
1052 if (active) {
1053 if (flags & DOCLOSE)
1054 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p);
1055 VOP_INACTIVE(vp, p);
1056 } else {
1057 /*
1058 * Any other processes trying to obtain this lock must first
1059 * wait for VXLOCK to clear, then call the new lock operation.
1060 */
1061 VOP_UNLOCK(vp, 0, p);
1062 }
1063 /*
1064 * Reclaim the vnode.
1065 */
1066 if (VOP_RECLAIM(vp, p))
1067 panic("vclean: cannot reclaim");
1068 if (active)
1069 vrele(vp);
1070 cache_purge(vp);
1071 if (vp->v_vnlock) {
1072 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0)
1073 vprint("vclean: lock not drained", vp);
1074 FREE(vp->v_vnlock, M_VNODE);
1075 vp->v_vnlock = NULL;
1076 }
1077
1078 /*
1079 * Done with purge, notify sleepers of the grim news.
1080 */
1081 vp->v_op = dead_vnodeop_p;
1082 vp->v_tag = VT_NON;
1083 vp->v_flag &= ~VXLOCK;
1084 if (vp->v_flag & VXWANT) {
1085 vp->v_flag &= ~VXWANT;
1086 wakeup((caddr_t)vp);
1087 }
1088 }
1089
1090 /*
1091 * Eliminate all activity associated with the requested vnode
1092 * and with all vnodes aliased to the requested vnode.
1093 */
1094 int
vop_revoke(ap)1095 vop_revoke(ap)
1096 struct vop_revoke_args /* {
1097 struct vnode *a_vp;
1098 int a_flags;
1099 } */ *ap;
1100 {
1101 struct vnode *vp, *vq;
1102 struct proc *p = curproc; /* XXX */
1103
1104 #ifdef DIAGNOSTIC
1105 if ((ap->a_flags & REVOKEALL) == 0)
1106 panic("vop_revoke");
1107 #endif
1108
1109 vp = ap->a_vp;
1110 simple_lock(&vp->v_interlock);
1111
1112 if (vp->v_flag & VALIASED) {
1113 /*
1114 * If a vgone (or vclean) is already in progress,
1115 * wait until it is done and return.
1116 */
1117 if (vp->v_flag & VXLOCK) {
1118 vp->v_flag |= VXWANT;
1119 simple_unlock(&vp->v_interlock);
1120 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0);
1121 return (0);
1122 }
1123 /*
1124 * Ensure that vp will not be vgone'd while we
1125 * are eliminating its aliases.
1126 */
1127 vp->v_flag |= VXLOCK;
1128 simple_unlock(&vp->v_interlock);
1129 while (vp->v_flag & VALIASED) {
1130 simple_lock(&spechash_slock);
1131 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1132 if (vq->v_rdev != vp->v_rdev ||
1133 vq->v_type != vp->v_type || vp == vq)
1134 continue;
1135 simple_unlock(&spechash_slock);
1136 vgone(vq);
1137 break;
1138 }
1139 if (vq == NULLVP)
1140 simple_unlock(&spechash_slock);
1141 }
1142 /*
1143 * Remove the lock so that vgone below will
1144 * really eliminate the vnode after which time
1145 * vgone will awaken any sleepers.
1146 */
1147 simple_lock(&vp->v_interlock);
1148 vp->v_flag &= ~VXLOCK;
1149 }
1150 vgonel(vp, p);
1151 return (0);
1152 }
1153
1154 /*
1155 * Recycle an unused vnode to the front of the free list.
1156 * Release the passed interlock if the vnode will be recycled.
1157 */
1158 int
vrecycle(vp,inter_lkp,p)1159 vrecycle(vp, inter_lkp, p)
1160 struct vnode *vp;
1161 struct simplelock *inter_lkp;
1162 struct proc *p;
1163 {
1164
1165 simple_lock(&vp->v_interlock);
1166 if (vp->v_usecount == 0) {
1167 if (inter_lkp)
1168 simple_unlock(inter_lkp);
1169 vgonel(vp, p);
1170 return (1);
1171 }
1172 simple_unlock(&vp->v_interlock);
1173 return (0);
1174 }
1175
1176 /*
1177 * Eliminate all activity associated with a vnode
1178 * in preparation for reuse.
1179 */
1180 void
vgone(vp)1181 vgone(vp)
1182 struct vnode *vp;
1183 {
1184 struct proc *p = curproc; /* XXX */
1185
1186 simple_lock(&vp->v_interlock);
1187 vgonel(vp, p);
1188 }
1189
1190 /*
1191 * vgone, with the vp interlock held.
1192 */
1193 void
vgonel(vp,p)1194 vgonel(vp, p)
1195 struct vnode *vp;
1196 struct proc *p;
1197 {
1198 struct vnode *vq;
1199 struct vnode *vx;
1200
1201 /*
1202 * If a vgone (or vclean) is already in progress,
1203 * wait until it is done and return.
1204 */
1205 if (vp->v_flag & VXLOCK) {
1206 vp->v_flag |= VXWANT;
1207 simple_unlock(&vp->v_interlock);
1208 tsleep((caddr_t)vp, PINOD, "vgone", 0);
1209 return;
1210 }
1211 /*
1212 * Clean out the filesystem specific data.
1213 */
1214 vclean(vp, DOCLOSE, p);
1215 /*
1216 * Delete from old mount point vnode list, if on one.
1217 */
1218 if (vp->v_mount != NULL)
1219 insmntque(vp, (struct mount *)0);
1220 /*
1221 * If special device, remove it from special device alias list
1222 * if it is on one.
1223 */
1224 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) {
1225 simple_lock(&spechash_slock);
1226 if (*vp->v_hashchain == vp) {
1227 *vp->v_hashchain = vp->v_specnext;
1228 } else {
1229 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1230 if (vq->v_specnext != vp)
1231 continue;
1232 vq->v_specnext = vp->v_specnext;
1233 break;
1234 }
1235 if (vq == NULL)
1236 panic("missing bdev");
1237 }
1238 if (vp->v_flag & VALIASED) {
1239 vx = NULL;
1240 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1241 if (vq->v_rdev != vp->v_rdev ||
1242 vq->v_type != vp->v_type)
1243 continue;
1244 if (vx)
1245 break;
1246 vx = vq;
1247 }
1248 if (vx == NULL)
1249 panic("missing alias");
1250 if (vq == NULL)
1251 vx->v_flag &= ~VALIASED;
1252 vp->v_flag &= ~VALIASED;
1253 }
1254 simple_unlock(&spechash_slock);
1255 FREE(vp->v_specinfo, M_VNODE);
1256 vp->v_specinfo = NULL;
1257 }
1258 /*
1259 * If it is on the freelist and not already at the head,
1260 * move it to the head of the list. The test of the back
1261 * pointer and the reference count of zero is because
1262 * it will be removed from the free list by getnewvnode,
1263 * but will not have its reference count incremented until
1264 * after calling vgone. If the reference count were
1265 * incremented first, vgone would (incorrectly) try to
1266 * close the previous instance of the underlying object.
1267 * So, the back pointer is explicitly set to `0xdeadb' in
1268 * getnewvnode after removing it from the freelist to ensure
1269 * that we do not try to move it here.
1270 */
1271 if (vp->v_usecount == 0) {
1272 simple_lock(&vnode_free_list_slock);
1273 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) &&
1274 vnode_free_list.tqh_first != vp) {
1275 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
1276 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
1277 }
1278 simple_unlock(&vnode_free_list_slock);
1279 }
1280 vp->v_type = VBAD;
1281 }
1282
1283 /*
1284 * Lookup a vnode by device number.
1285 */
1286 int
vfinddev(dev,type,vpp)1287 vfinddev(dev, type, vpp)
1288 dev_t dev;
1289 enum vtype type;
1290 struct vnode **vpp;
1291 {
1292 struct vnode *vp;
1293 int rc = 0;
1294
1295 simple_lock(&spechash_slock);
1296 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
1297 if (dev != vp->v_rdev || type != vp->v_type)
1298 continue;
1299 *vpp = vp;
1300 rc = 1;
1301 break;
1302 }
1303 simple_unlock(&spechash_slock);
1304 return (rc);
1305 }
1306
1307 /*
1308 * Calculate the total number of references to a special device.
1309 */
1310 int
vcount(vp)1311 vcount(vp)
1312 struct vnode *vp;
1313 {
1314 struct vnode *vq, *vnext;
1315 int count;
1316
1317 loop:
1318 if ((vp->v_flag & VALIASED) == 0)
1319 return (vp->v_usecount);
1320 simple_lock(&spechash_slock);
1321 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
1322 vnext = vq->v_specnext;
1323 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
1324 continue;
1325 /*
1326 * Alias, but not in use, so flush it out.
1327 */
1328 if (vq->v_usecount == 0 && vq != vp) {
1329 simple_unlock(&spechash_slock);
1330 vgone(vq);
1331 goto loop;
1332 }
1333 count += vq->v_usecount;
1334 }
1335 simple_unlock(&spechash_slock);
1336 return (count);
1337 }
1338
1339 /*
1340 * Print out a description of a vnode.
1341 */
1342 static char *typename[] =
1343 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
1344
1345 void
vprint(label,vp)1346 vprint(label, vp)
1347 char *label;
1348 register struct vnode *vp;
1349 {
1350 char buf[64];
1351
1352 if (label != NULL)
1353 printf("%s: ", label);
1354 printf("type %s, usecount %d, writecount %d, refcount %d,",
1355 typename[vp->v_type], vp->v_usecount, vp->v_writecount,
1356 vp->v_holdcnt);
1357 buf[0] = '\0';
1358 if (vp->v_flag & VROOT)
1359 strcat(buf, "|VROOT");
1360 if (vp->v_flag & VTEXT)
1361 strcat(buf, "|VTEXT");
1362 if (vp->v_flag & VSYSTEM)
1363 strcat(buf, "|VSYSTEM");
1364 if (vp->v_flag & VXLOCK)
1365 strcat(buf, "|VXLOCK");
1366 if (vp->v_flag & VXWANT)
1367 strcat(buf, "|VXWANT");
1368 if (vp->v_flag & VBWAIT)
1369 strcat(buf, "|VBWAIT");
1370 if (vp->v_flag & VALIASED)
1371 strcat(buf, "|VALIASED");
1372 if (buf[0] != '\0')
1373 printf(" flags (%s)", &buf[1]);
1374 if (vp->v_data == NULL) {
1375 printf("\n");
1376 } else {
1377 printf("\n\t");
1378 VOP_PRINT(vp);
1379 }
1380 }
1381
1382 #ifdef DEBUG
1383 /*
1384 * List all of the locked vnodes in the system.
1385 * Called when debugging the kernel.
1386 */
1387 void
printlockedvnodes()1388 printlockedvnodes()
1389 {
1390 struct proc *p = curproc; /* XXX */
1391 struct mount *mp, *nmp;
1392 struct vnode *vp;
1393
1394 printf("Locked vnodes\n");
1395 simple_lock(&mountlist_slock);
1396 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1397 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1398 nmp = mp->mnt_list.cqe_next;
1399 continue;
1400 }
1401 for (vp = mp->mnt_vnodelist.lh_first;
1402 vp != NULL;
1403 vp = vp->v_mntvnodes.le_next) {
1404 if (VOP_ISLOCKED(vp))
1405 vprint((char *)0, vp);
1406 }
1407 simple_lock(&mountlist_slock);
1408 nmp = mp->mnt_list.cqe_next;
1409 vfs_unbusy(mp, p);
1410 }
1411 simple_unlock(&mountlist_slock);
1412 }
1413 #endif
1414
1415 /*
1416 * Top level filesystem related information gathering.
1417 */
1418 int
vfs_sysctl(name,namelen,oldp,oldlenp,newp,newlen,p)1419 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p)
1420 int *name;
1421 u_int namelen;
1422 void *oldp;
1423 size_t *oldlenp;
1424 void *newp;
1425 size_t newlen;
1426 struct proc *p;
1427 {
1428 struct ctldebug *cdp;
1429 struct vfsconf *vfsp;
1430
1431 /* all sysctl names at this level are at least name and field */
1432 if (namelen < 2)
1433 return (ENOTDIR); /* overloaded */
1434 if (name[0] != VFS_GENERIC) {
1435 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1436 if (vfsp->vfc_typenum == name[0])
1437 break;
1438 if (vfsp == NULL)
1439 return (EOPNOTSUPP);
1440 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1,
1441 oldp, oldlenp, newp, newlen, p));
1442 }
1443 switch (name[1]) {
1444 case VFS_MAXTYPENUM:
1445 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf));
1446 case VFS_CONF:
1447 if (namelen < 3)
1448 return (ENOTDIR); /* overloaded */
1449 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next)
1450 if (vfsp->vfc_typenum == name[2])
1451 break;
1452 if (vfsp == NULL)
1453 return (EOPNOTSUPP);
1454 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp,
1455 sizeof(struct vfsconf)));
1456 }
1457 return (EOPNOTSUPP);
1458 }
1459
1460 int kinfo_vdebug = 1;
1461 int kinfo_vgetfailed;
1462 #define KINFO_VNODESLOP 10
1463 /*
1464 * Dump vnode list (via sysctl).
1465 * Copyout address of vnode followed by vnode.
1466 */
1467 /* ARGSUSED */
1468 int
sysctl_vnode(where,sizep,p)1469 sysctl_vnode(where, sizep, p)
1470 char *where;
1471 size_t *sizep;
1472 struct proc *p;
1473 {
1474 struct mount *mp, *nmp;
1475 struct vnode *nvp, *vp;
1476 char *bp = where, *savebp;
1477 char *ewhere;
1478 int error;
1479
1480 #define VPTRSZ sizeof (struct vnode *)
1481 #define VNODESZ sizeof (struct vnode)
1482 if (where == NULL) {
1483 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
1484 return (0);
1485 }
1486 ewhere = where + *sizep;
1487
1488 simple_lock(&mountlist_slock);
1489 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) {
1490 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) {
1491 nmp = mp->mnt_list.cqe_next;
1492 continue;
1493 }
1494 savebp = bp;
1495 again:
1496 simple_lock(&mntvnode_slock);
1497 for (vp = mp->mnt_vnodelist.lh_first;
1498 vp != NULL;
1499 vp = nvp) {
1500 /*
1501 * Check that the vp is still associated with
1502 * this filesystem. RACE: could have been
1503 * recycled onto the same filesystem.
1504 */
1505 if (vp->v_mount != mp) {
1506 simple_unlock(&mntvnode_slock);
1507 if (kinfo_vdebug)
1508 printf("kinfo: vp changed\n");
1509 bp = savebp;
1510 goto again;
1511 }
1512 nvp = vp->v_mntvnodes.le_next;
1513 if (bp + VPTRSZ + VNODESZ > ewhere) {
1514 simple_unlock(&mntvnode_slock);
1515 *sizep = bp - where;
1516 return (ENOMEM);
1517 }
1518 simple_unlock(&mntvnode_slock);
1519 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
1520 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
1521 return (error);
1522 bp += VPTRSZ + VNODESZ;
1523 simple_lock(&mntvnode_slock);
1524 }
1525 simple_unlock(&mntvnode_slock);
1526 simple_lock(&mountlist_slock);
1527 nmp = mp->mnt_list.cqe_next;
1528 vfs_unbusy(mp, p);
1529 }
1530 simple_unlock(&mountlist_slock);
1531
1532 *sizep = bp - where;
1533 return (0);
1534 }
1535
1536 /*
1537 * Check to see if a filesystem is mounted on a block device.
1538 */
1539 int
vfs_mountedon(vp)1540 vfs_mountedon(vp)
1541 struct vnode *vp;
1542 {
1543 struct vnode *vq;
1544 int error = 0;
1545
1546 if (vp->v_specflags & SI_MOUNTEDON)
1547 return (EBUSY);
1548 if (vp->v_flag & VALIASED) {
1549 simple_lock(&spechash_slock);
1550 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
1551 if (vq->v_rdev != vp->v_rdev ||
1552 vq->v_type != vp->v_type)
1553 continue;
1554 if (vq->v_specflags & SI_MOUNTEDON) {
1555 error = EBUSY;
1556 break;
1557 }
1558 }
1559 simple_unlock(&spechash_slock);
1560 }
1561 return (error);
1562 }
1563
1564 /*
1565 * Unmount all filesystems. The list is traversed in reverse order
1566 * of mounting to avoid dependencies.
1567 */
1568 void
vfs_unmountall()1569 vfs_unmountall()
1570 {
1571 struct mount *mp, *nmp;
1572 struct proc *p = curproc; /* XXX */
1573
1574 /*
1575 * Since this only runs when rebooting, it is not interlocked.
1576 */
1577 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) {
1578 nmp = mp->mnt_list.cqe_prev;
1579 (void) dounmount(mp, MNT_FORCE, p);
1580 }
1581 }
1582
1583 /*
1584 * Build hash lists of net addresses and hang them off the mount point.
1585 * Called by ufs_mount() to set up the lists of export addresses.
1586 */
1587 static int
vfs_hang_addrlist(mp,nep,argp)1588 vfs_hang_addrlist(mp, nep, argp)
1589 struct mount *mp;
1590 struct netexport *nep;
1591 struct export_args *argp;
1592 {
1593 register struct netcred *np;
1594 register struct radix_node_head *rnh;
1595 register int i;
1596 struct radix_node *rn;
1597 struct sockaddr *saddr, *smask = 0;
1598 struct domain *dom;
1599 int error;
1600
1601 if (argp->ex_addrlen == 0) {
1602 if (mp->mnt_flag & MNT_DEFEXPORTED)
1603 return (EPERM);
1604 np = &nep->ne_defexported;
1605 np->netc_exflags = argp->ex_flags;
1606 np->netc_anon = argp->ex_anon;
1607 np->netc_anon.cr_ref = 1;
1608 mp->mnt_flag |= MNT_DEFEXPORTED;
1609 return (0);
1610 }
1611 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
1612 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
1613 bzero((caddr_t)np, i);
1614 saddr = (struct sockaddr *)(np + 1);
1615 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
1616 goto out;
1617 if (saddr->sa_len > argp->ex_addrlen)
1618 saddr->sa_len = argp->ex_addrlen;
1619 if (argp->ex_masklen) {
1620 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
1621 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
1622 if (error)
1623 goto out;
1624 if (smask->sa_len > argp->ex_masklen)
1625 smask->sa_len = argp->ex_masklen;
1626 }
1627 i = saddr->sa_family;
1628 if ((rnh = nep->ne_rtable[i]) == 0) {
1629 /*
1630 * Seems silly to initialize every AF when most are not
1631 * used, do so on demand here
1632 */
1633 for (dom = domains; dom; dom = dom->dom_next)
1634 if (dom->dom_family == i && dom->dom_rtattach) {
1635 dom->dom_rtattach((void **)&nep->ne_rtable[i],
1636 dom->dom_rtoffset);
1637 break;
1638 }
1639 if ((rnh = nep->ne_rtable[i]) == 0) {
1640 error = ENOBUFS;
1641 goto out;
1642 }
1643 }
1644 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
1645 np->netc_rnodes);
1646 if (rn == 0) {
1647 /*
1648 * One of the reasons that rnh_addaddr may fail is that
1649 * the entry already exists. To check for this case, we
1650 * look up the entry to see if it is there. If so, we
1651 * do not need to make a new entry but do return success.
1652 */
1653 free(np, M_NETADDR);
1654 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh);
1655 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 &&
1656 ((struct netcred *)rn)->netc_exflags == argp->ex_flags &&
1657 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon,
1658 (caddr_t)&argp->ex_anon, sizeof(struct ucred)))
1659 return (0);
1660 return (EPERM);
1661 }
1662 np->netc_exflags = argp->ex_flags;
1663 np->netc_anon = argp->ex_anon;
1664 np->netc_anon.cr_ref = 1;
1665 return (0);
1666 out:
1667 free(np, M_NETADDR);
1668 return (error);
1669 }
1670
1671 /* ARGSUSED */
1672 static int
vfs_free_netcred(rn,w)1673 vfs_free_netcred(rn, w)
1674 struct radix_node *rn;
1675 caddr_t w;
1676 {
1677 register struct radix_node_head *rnh = (struct radix_node_head *)w;
1678
1679 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
1680 free((caddr_t)rn, M_NETADDR);
1681 return (0);
1682 }
1683
1684 /*
1685 * Free the net address hash lists that are hanging off the mount points.
1686 */
1687 static void
vfs_free_addrlist(nep)1688 vfs_free_addrlist(nep)
1689 struct netexport *nep;
1690 {
1691 register int i;
1692 register struct radix_node_head *rnh;
1693
1694 for (i = 0; i <= AF_MAX; i++)
1695 if (rnh = nep->ne_rtable[i]) {
1696 (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
1697 (caddr_t)rnh);
1698 free((caddr_t)rnh, M_RTABLE);
1699 nep->ne_rtable[i] = 0;
1700 }
1701 }
1702
1703 int
vfs_export(mp,nep,argp)1704 vfs_export(mp, nep, argp)
1705 struct mount *mp;
1706 struct netexport *nep;
1707 struct export_args *argp;
1708 {
1709 int error;
1710
1711 if (argp->ex_flags & MNT_DELEXPORT) {
1712 vfs_free_addrlist(nep);
1713 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
1714 }
1715 if (argp->ex_flags & MNT_EXPORTED) {
1716 if (error = vfs_hang_addrlist(mp, nep, argp))
1717 return (error);
1718 mp->mnt_flag |= MNT_EXPORTED;
1719 }
1720 return (0);
1721 }
1722
1723 struct netcred *
vfs_export_lookup(mp,nep,nam)1724 vfs_export_lookup(mp, nep, nam)
1725 register struct mount *mp;
1726 struct netexport *nep;
1727 struct mbuf *nam;
1728 {
1729 register struct netcred *np;
1730 register struct radix_node_head *rnh;
1731 struct sockaddr *saddr;
1732
1733 np = NULL;
1734 if (mp->mnt_flag & MNT_EXPORTED) {
1735 /*
1736 * Lookup in the export list first.
1737 */
1738 if (nam != NULL) {
1739 saddr = mtod(nam, struct sockaddr *);
1740 rnh = nep->ne_rtable[saddr->sa_family];
1741 if (rnh != NULL) {
1742 np = (struct netcred *)
1743 (*rnh->rnh_matchaddr)((caddr_t)saddr,
1744 rnh);
1745 if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
1746 np = NULL;
1747 }
1748 }
1749 /*
1750 * If no address match, use the default if it exists.
1751 */
1752 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
1753 np = &nep->ne_defexported;
1754 }
1755 return (np);
1756 }
1757