1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * %sccs.include.redist.c% 11 * 12 * @(#)vfs_subr.c 8.31 (Berkeley) 05/26/95 13 */ 14 15 /* 16 * External virtual filesystem routines 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/proc.h> 22 #include <sys/mount.h> 23 #include <sys/time.h> 24 #include <sys/vnode.h> 25 #include <sys/stat.h> 26 #include <sys/namei.h> 27 #include <sys/ucred.h> 28 #include <sys/buf.h> 29 #include <sys/errno.h> 30 #include <sys/malloc.h> 31 #include <sys/domain.h> 32 #include <sys/mbuf.h> 33 34 #include <vm/vm.h> 35 #include <sys/sysctl.h> 36 37 #include <miscfs/specfs/specdev.h> 38 39 enum vtype iftovt_tab[16] = { 40 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 41 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 42 }; 43 int vttoif_tab[9] = { 44 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 45 S_IFSOCK, S_IFIFO, S_IFMT, 46 }; 47 48 /* 49 * Insq/Remq for the vnode usage lists. 50 */ 51 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 52 #define bufremvn(bp) { \ 53 LIST_REMOVE(bp, b_vnbufs); \ 54 (bp)->b_vnbufs.le_next = NOLIST; \ 55 } 56 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 57 struct mntlist mountlist; /* mounted filesystem list */ 58 struct simplelock mountlist_slock; 59 static struct simplelock mntid_slock; 60 struct simplelock mntvnode_slock; 61 struct simplelock vnode_free_list_slock; 62 static struct simplelock spechash_slock; 63 64 /* 65 * Initialize the vnode management data structures. 66 */ 67 void 68 vntblinit() 69 { 70 71 simple_lock_init(&mntvnode_slock); 72 simple_lock_init(&mntid_slock); 73 simple_lock_init(&spechash_slock); 74 TAILQ_INIT(&vnode_free_list); 75 simple_lock_init(&vnode_free_list_slock); 76 CIRCLEQ_INIT(&mountlist); 77 } 78 79 /* 80 * Mark a mount point as busy. Used to synchronize access and to delay 81 * unmounting. Interlock is not released on failure. 82 */ 83 int 84 vfs_busy(mp, flags, interlkp, p) 85 struct mount *mp; 86 int flags; 87 struct simplelock *interlkp; 88 struct proc *p; 89 { 90 int lkflags; 91 92 if (mp->mnt_flag & MNT_UNMOUNT) { 93 if (flags & LK_NOWAIT) 94 return (ENOENT); 95 mp->mnt_flag |= MNT_MWAIT; 96 if (interlkp) 97 simple_unlock(interlkp); 98 /* 99 * Since all busy locks are shared except the exclusive 100 * lock granted when unmounting, the only place that a 101 * wakeup needs to be done is at the release of the 102 * exclusive lock at the end of dounmount. 103 */ 104 sleep((caddr_t)mp, PVFS); 105 if (interlkp) 106 simple_lock(interlkp); 107 return (ENOENT); 108 } 109 lkflags = LK_SHARED; 110 if (interlkp) 111 lkflags |= LK_INTERLOCK; 112 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 113 panic("vfs_busy: unexpected lock failure"); 114 return (0); 115 } 116 117 /* 118 * Free a busy filesystem. 119 */ 120 void 121 vfs_unbusy(mp, p) 122 struct mount *mp; 123 struct proc *p; 124 { 125 126 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 127 } 128 129 /* 130 * Lookup a filesystem type, and if found allocate and initialize 131 * a mount structure for it. 132 * 133 * Devname is usually updated by mount(8) after booting. 134 */ 135 int 136 vfs_rootmountalloc(fstypename, devname, mpp) 137 char *fstypename; 138 char *devname; 139 struct mount **mpp; 140 { 141 struct proc *p = curproc; /* XXX */ 142 struct vfsconf *vfsp; 143 struct mount *mp; 144 145 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 146 if (!strcmp(vfsp->vfc_name, fstypename)) 147 break; 148 if (vfsp == NULL) 149 return (ENODEV); 150 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 151 bzero((char *)mp, (u_long)sizeof(struct mount)); 152 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 153 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 154 LIST_INIT(&mp->mnt_vnodelist); 155 mp->mnt_vfc = vfsp; 156 mp->mnt_op = vfsp->vfc_vfsops; 157 mp->mnt_flag = MNT_RDONLY; 158 mp->mnt_vnodecovered = NULLVP; 159 vfsp->vfc_refcount++; 160 mp->mnt_stat.f_type = vfsp->vfc_typenum; 161 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 162 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 163 mp->mnt_stat.f_mntonname[0] = '/'; 164 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 165 *mpp = mp; 166 return (0); 167 } 168 169 /* 170 * Find an appropriate filesystem to use for the root. If a filesystem 171 * has not been preselected, walk through the list of known filesystems 172 * trying those that have mountroot routines, and try them until one 173 * works or we have tried them all. 174 */ 175 int 176 vfs_mountroot() 177 { 178 struct vfsconf *vfsp; 179 extern int (*mountroot)(void); 180 int error; 181 182 if (mountroot != NULL) 183 return ((*mountroot)()); 184 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 185 if (vfsp->vfc_mountroot == NULL) 186 continue; 187 if ((error = (*vfsp->vfc_mountroot)()) == 0) 188 return (0); 189 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 190 } 191 return (ENODEV); 192 } 193 194 /* 195 * Lookup a mount point by filesystem identifier. 196 */ 197 struct mount * 198 vfs_getvfs(fsid) 199 fsid_t *fsid; 200 { 201 register struct mount *mp; 202 203 simple_lock(&mountlist_slock); 204 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 205 mp = mp->mnt_list.cqe_next) { 206 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 207 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 208 simple_unlock(&mountlist_slock); 209 return (mp); 210 } 211 } 212 simple_unlock(&mountlist_slock); 213 return ((struct mount *)0); 214 } 215 216 /* 217 * Get a new unique fsid 218 */ 219 void 220 vfs_getnewfsid(mp) 221 struct mount *mp; 222 { 223 static u_short xxxfs_mntid; 224 225 fsid_t tfsid; 226 int mtype; 227 228 simple_lock(&mntid_slock); 229 mtype = mp->mnt_vfc->vfc_typenum; 230 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 231 mp->mnt_stat.f_fsid.val[1] = mtype; 232 if (xxxfs_mntid == 0) 233 ++xxxfs_mntid; 234 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 235 tfsid.val[1] = mtype; 236 if (mountlist.cqh_first != (void *)&mountlist) { 237 while (vfs_getvfs(&tfsid)) { 238 tfsid.val[0]++; 239 xxxfs_mntid++; 240 } 241 } 242 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 243 simple_unlock(&mntid_slock); 244 } 245 246 /* 247 * Set vnode attributes to VNOVAL 248 */ 249 void 250 vattr_null(vap) 251 register struct vattr *vap; 252 { 253 254 vap->va_type = VNON; 255 vap->va_size = vap->va_bytes = VNOVAL; 256 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 257 vap->va_fsid = vap->va_fileid = 258 vap->va_blocksize = vap->va_rdev = 259 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 260 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 261 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 262 vap->va_flags = vap->va_gen = VNOVAL; 263 vap->va_vaflags = 0; 264 } 265 266 /* 267 * Routines having to do with the management of the vnode table. 268 */ 269 extern int (**dead_vnodeop_p)(); 270 static void vclean __P((struct vnode *vp, int flag, struct proc *p)); 271 extern void vgonel __P((struct vnode *vp, struct proc *p)); 272 long numvnodes; 273 extern struct vattr va_null; 274 275 /* 276 * Return the next vnode from the free list. 277 */ 278 int 279 getnewvnode(tag, mp, vops, vpp) 280 enum vtagtype tag; 281 struct mount *mp; 282 int (**vops)(); 283 struct vnode **vpp; 284 { 285 struct proc *p = curproc; /* XXX */ 286 struct vnode *vp; 287 int s; 288 int cnt; 289 290 top: 291 simple_lock(&vnode_free_list_slock); 292 if ((vnode_free_list.tqh_first == NULL && 293 numvnodes < 2 * desiredvnodes) || 294 numvnodes < desiredvnodes) { 295 simple_unlock(&vnode_free_list_slock); 296 vp = (struct vnode *)malloc((u_long)sizeof *vp, 297 M_VNODE, M_WAITOK); 298 bzero((char *)vp, sizeof *vp); 299 numvnodes++; 300 } else { 301 for (vp = vnode_free_list.tqh_first; 302 vp != NULLVP; vp = vp->v_freelist.tqe_next) { 303 if (simple_lock_try(&vp->v_interlock)) 304 break; 305 } 306 /* 307 * Unless this is a bad time of the month, at most 308 * the first NCPUS items on the free list are 309 * locked, so this is close enough to being empty. 310 */ 311 if (vp == NULLVP) { 312 simple_unlock(&vnode_free_list_slock); 313 tablefull("vnode"); 314 *vpp = 0; 315 return (ENFILE); 316 } 317 if (vp->v_usecount) 318 panic("free vnode isn't"); 319 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 320 /* see comment on why 0xdeadb is set at end of vgone (below) */ 321 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 322 simple_unlock(&vnode_free_list_slock); 323 vp->v_lease = NULL; 324 if (vp->v_type != VBAD) 325 vgonel(vp, p); 326 else 327 simple_unlock(&vp->v_interlock); 328 #ifdef DIAGNOSTIC 329 if (vp->v_data) 330 panic("cleaned vnode isn't"); 331 s = splbio(); 332 if (vp->v_numoutput) 333 panic("Clean vnode has pending I/O's"); 334 splx(s); 335 #endif 336 vp->v_flag = 0; 337 vp->v_lastr = 0; 338 vp->v_ralen = 0; 339 vp->v_maxra = 0; 340 vp->v_lastw = 0; 341 vp->v_lasta = 0; 342 vp->v_cstart = 0; 343 vp->v_clen = 0; 344 vp->v_socket = 0; 345 } 346 vp->v_type = VNON; 347 cache_purge(vp); 348 vp->v_tag = tag; 349 vp->v_op = vops; 350 insmntque(vp, mp); 351 *vpp = vp; 352 vp->v_usecount = 1; 353 vp->v_data = 0; 354 return (0); 355 } 356 357 /* 358 * Move a vnode from one mount queue to another. 359 */ 360 void 361 insmntque(vp, mp) 362 struct vnode *vp; 363 struct mount *mp; 364 { 365 366 simple_lock(&mntvnode_slock); 367 /* 368 * Delete from old mount point vnode list, if on one. 369 */ 370 if (vp->v_mount != NULL) 371 LIST_REMOVE(vp, v_mntvnodes); 372 /* 373 * Insert into list of vnodes for the new mount point, if available. 374 */ 375 if ((vp->v_mount = mp) != NULL) 376 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 377 simple_unlock(&mntvnode_slock); 378 } 379 380 /* 381 * Update outstanding I/O count and do wakeup if requested. 382 */ 383 void 384 vwakeup(bp) 385 register struct buf *bp; 386 { 387 register struct vnode *vp; 388 389 bp->b_flags &= ~B_WRITEINPROG; 390 if (vp = bp->b_vp) { 391 if (--vp->v_numoutput < 0) 392 panic("vwakeup: neg numoutput"); 393 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 394 if (vp->v_numoutput < 0) 395 panic("vwakeup: neg numoutput 2"); 396 vp->v_flag &= ~VBWAIT; 397 wakeup((caddr_t)&vp->v_numoutput); 398 } 399 } 400 } 401 402 /* 403 * Flush out and invalidate all buffers associated with a vnode. 404 * Called with the underlying object locked. 405 */ 406 int 407 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 408 register struct vnode *vp; 409 int flags; 410 struct ucred *cred; 411 struct proc *p; 412 int slpflag, slptimeo; 413 { 414 register struct buf *bp; 415 struct buf *nbp, *blist; 416 int s, error; 417 418 if (flags & V_SAVE) { 419 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 420 return (error); 421 if (vp->v_dirtyblkhd.lh_first != NULL) 422 panic("vinvalbuf: dirty bufs"); 423 } 424 for (;;) { 425 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 426 while (blist && blist->b_lblkno < 0) 427 blist = blist->b_vnbufs.le_next; 428 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 429 (flags & V_SAVEMETA)) 430 while (blist && blist->b_lblkno < 0) 431 blist = blist->b_vnbufs.le_next; 432 if (!blist) 433 break; 434 435 for (bp = blist; bp; bp = nbp) { 436 nbp = bp->b_vnbufs.le_next; 437 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 438 continue; 439 s = splbio(); 440 if (bp->b_flags & B_BUSY) { 441 bp->b_flags |= B_WANTED; 442 error = tsleep((caddr_t)bp, 443 slpflag | (PRIBIO + 1), "vinvalbuf", 444 slptimeo); 445 splx(s); 446 if (error) 447 return (error); 448 break; 449 } 450 bremfree(bp); 451 bp->b_flags |= B_BUSY; 452 splx(s); 453 /* 454 * XXX Since there are no node locks for NFS, I believe 455 * there is a slight chance that a delayed write will 456 * occur while sleeping just above, so check for it. 457 */ 458 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 459 (void) VOP_BWRITE(bp); 460 break; 461 } 462 bp->b_flags |= B_INVAL; 463 brelse(bp); 464 } 465 } 466 if (!(flags & V_SAVEMETA) && 467 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 468 panic("vinvalbuf: flush failed"); 469 return (0); 470 } 471 472 /* 473 * Associate a buffer with a vnode. 474 */ 475 void 476 bgetvp(vp, bp) 477 register struct vnode *vp; 478 register struct buf *bp; 479 { 480 481 if (bp->b_vp) 482 panic("bgetvp: not free"); 483 VHOLD(vp); 484 bp->b_vp = vp; 485 if (vp->v_type == VBLK || vp->v_type == VCHR) 486 bp->b_dev = vp->v_rdev; 487 else 488 bp->b_dev = NODEV; 489 /* 490 * Insert onto list for new vnode. 491 */ 492 bufinsvn(bp, &vp->v_cleanblkhd); 493 } 494 495 /* 496 * Disassociate a buffer from a vnode. 497 */ 498 void 499 brelvp(bp) 500 register struct buf *bp; 501 { 502 struct vnode *vp; 503 504 if (bp->b_vp == (struct vnode *) 0) 505 panic("brelvp: NULL"); 506 /* 507 * Delete from old vnode list, if on one. 508 */ 509 if (bp->b_vnbufs.le_next != NOLIST) 510 bufremvn(bp); 511 vp = bp->b_vp; 512 bp->b_vp = (struct vnode *) 0; 513 HOLDRELE(vp); 514 } 515 516 /* 517 * Reassign a buffer from one vnode to another. 518 * Used to assign file specific control information 519 * (indirect blocks) to the vnode to which they belong. 520 */ 521 void 522 reassignbuf(bp, newvp) 523 register struct buf *bp; 524 register struct vnode *newvp; 525 { 526 register struct buflists *listheadp; 527 528 if (newvp == NULL) { 529 printf("reassignbuf: NULL"); 530 return; 531 } 532 /* 533 * Delete from old vnode list, if on one. 534 */ 535 if (bp->b_vnbufs.le_next != NOLIST) 536 bufremvn(bp); 537 /* 538 * If dirty, put on list of dirty buffers; 539 * otherwise insert onto list of clean buffers. 540 */ 541 if (bp->b_flags & B_DELWRI) 542 listheadp = &newvp->v_dirtyblkhd; 543 else 544 listheadp = &newvp->v_cleanblkhd; 545 bufinsvn(bp, listheadp); 546 } 547 548 /* 549 * Create a vnode for a block device. 550 * Used for root filesystem, argdev, and swap areas. 551 * Also used for memory file system special devices. 552 */ 553 int 554 bdevvp(dev, vpp) 555 dev_t dev; 556 struct vnode **vpp; 557 { 558 register struct vnode *vp; 559 struct vnode *nvp; 560 int error; 561 562 if (dev == NODEV) { 563 *vpp = NULLVP; 564 return (ENODEV); 565 } 566 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 567 if (error) { 568 *vpp = NULLVP; 569 return (error); 570 } 571 vp = nvp; 572 vp->v_type = VBLK; 573 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 574 vput(vp); 575 vp = nvp; 576 } 577 *vpp = vp; 578 return (0); 579 } 580 581 /* 582 * Check to see if the new vnode represents a special device 583 * for which we already have a vnode (either because of 584 * bdevvp() or because of a different vnode representing 585 * the same block device). If such an alias exists, deallocate 586 * the existing contents and return the aliased vnode. The 587 * caller is responsible for filling it with its new contents. 588 */ 589 struct vnode * 590 checkalias(nvp, nvp_rdev, mp) 591 register struct vnode *nvp; 592 dev_t nvp_rdev; 593 struct mount *mp; 594 { 595 struct proc *p = curproc; /* XXX */ 596 struct vnode *vp; 597 struct vnode **vpp; 598 599 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 600 return (NULLVP); 601 602 vpp = &speclisth[SPECHASH(nvp_rdev)]; 603 loop: 604 simple_lock(&spechash_slock); 605 for (vp = *vpp; vp; vp = vp->v_specnext) { 606 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 607 continue; 608 /* 609 * Alias, but not in use, so flush it out. 610 */ 611 simple_lock(&vp->v_interlock); 612 if (vp->v_usecount == 0) { 613 simple_unlock(&spechash_slock); 614 vgonel(vp, p); 615 goto loop; 616 } 617 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 618 simple_unlock(&spechash_slock); 619 goto loop; 620 } 621 break; 622 } 623 if (vp == NULL || vp->v_tag != VT_NON) { 624 MALLOC(nvp->v_specinfo, struct specinfo *, 625 sizeof(struct specinfo), M_VNODE, M_WAITOK); 626 nvp->v_rdev = nvp_rdev; 627 nvp->v_hashchain = vpp; 628 nvp->v_specnext = *vpp; 629 nvp->v_specflags = 0; 630 simple_unlock(&spechash_slock); 631 *vpp = nvp; 632 if (vp != NULLVP) { 633 nvp->v_flag |= VALIASED; 634 vp->v_flag |= VALIASED; 635 vput(vp); 636 } 637 return (NULLVP); 638 } 639 simple_unlock(&spechash_slock); 640 VOP_UNLOCK(vp, 0, p); 641 simple_lock(&vp->v_interlock); 642 vclean(vp, 0, p); 643 vp->v_op = nvp->v_op; 644 vp->v_tag = nvp->v_tag; 645 nvp->v_type = VNON; 646 insmntque(vp, mp); 647 return (vp); 648 } 649 650 /* 651 * Grab a particular vnode from the free list, increment its 652 * reference count and lock it. The vnode lock bit is set the 653 * vnode is being eliminated in vgone. The process is awakened 654 * when the transition is completed, and an error returned to 655 * indicate that the vnode is no longer usable (possibly having 656 * been changed to a new file system type). 657 */ 658 int 659 vget(vp, flags, p) 660 struct vnode *vp; 661 int flags; 662 struct proc *p; 663 { 664 int error; 665 666 /* 667 * If the vnode is in the process of being cleaned out for 668 * another use, we wait for the cleaning to finish and then 669 * return failure. Cleaning is determined by checking that 670 * the VXLOCK flag is set. 671 */ 672 if ((flags & LK_INTERLOCK) == 0) 673 simple_lock(&vp->v_interlock); 674 if (vp->v_flag & VXLOCK) { 675 vp->v_flag |= VXWANT; 676 simple_unlock(&vp->v_interlock); 677 tsleep((caddr_t)vp, PINOD, "vget", 0); 678 return (ENOENT); 679 } 680 if (vp->v_usecount == 0) { 681 simple_lock(&vnode_free_list_slock); 682 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 683 simple_unlock(&vnode_free_list_slock); 684 } 685 vp->v_usecount++; 686 if (flags & LK_TYPE_MASK) { 687 if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) 688 vrele(vp); 689 return (error); 690 } 691 simple_unlock(&vp->v_interlock); 692 return (0); 693 } 694 695 /* 696 * Stubs to use when there is no locking to be done on the underlying object. 697 * A minimal shared lock is necessary to ensure that the underlying object 698 * is not revoked while an operation is in progress. So, an active shared 699 * count is maintained in an auxillary vnode lock structure. 700 */ 701 int 702 vop_nolock(ap) 703 struct vop_lock_args /* { 704 struct vnode *a_vp; 705 int a_flags; 706 struct proc *a_p; 707 } */ *ap; 708 { 709 #ifdef notyet 710 /* 711 * This code cannot be used until all the non-locking filesystems 712 * (notably NFS) are converted to properly lock and release nodes. 713 * Also, certain vnode operations change the locking state within 714 * the operation (create, mknod, remove, link, rename, mkdir, rmdir, 715 * and symlink). Ideally these operations should not change the 716 * lock state, but should be changed to let the caller of the 717 * function unlock them. Otherwise all intermediate vnode layers 718 * (such as union, umapfs, etc) must catch these functions to do 719 * the necessary locking at their layer. Note that the inactive 720 * and lookup operations also change their lock state, but this 721 * cannot be avoided, so these two operations will always need 722 * to be handled in intermediate layers. 723 */ 724 struct vnode *vp = ap->a_vp; 725 int vnflags, flags = ap->a_flags; 726 727 if (vp->v_vnlock == NULL) { 728 if ((flags & LK_TYPE_MASK) == LK_DRAIN) 729 return (0); 730 MALLOC(vp->v_vnlock, struct lock *, sizeof(struct lock), 731 M_VNODE, M_WAITOK); 732 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 733 } 734 switch (flags & LK_TYPE_MASK) { 735 case LK_DRAIN: 736 vnflags = LK_DRAIN; 737 break; 738 case LK_EXCLUSIVE: 739 case LK_SHARED: 740 vnflags = LK_SHARED; 741 break; 742 case LK_UPGRADE: 743 case LK_EXCLUPGRADE: 744 case LK_DOWNGRADE: 745 return (0); 746 case LK_RELEASE: 747 default: 748 panic("vop_nolock: bad operation %d", flags & LK_TYPE_MASK); 749 } 750 if (flags & LK_INTERLOCK) 751 vnflags |= LK_INTERLOCK; 752 return(lockmgr(vp->v_vnlock, vnflags, &vp->v_interlock, ap->a_p)); 753 #else /* for now */ 754 /* 755 * Since we are not using the lock manager, we must clear 756 * the interlock here. 757 */ 758 if (ap->a_flags & LK_INTERLOCK) 759 simple_unlock(&ap->a_vp->v_interlock); 760 return (0); 761 #endif 762 } 763 764 /* 765 * Decrement the active use count. 766 */ 767 int 768 vop_nounlock(ap) 769 struct vop_unlock_args /* { 770 struct vnode *a_vp; 771 int a_flags; 772 struct proc *a_p; 773 } */ *ap; 774 { 775 struct vnode *vp = ap->a_vp; 776 777 if (vp->v_vnlock == NULL) 778 return (0); 779 return (lockmgr(vp->v_vnlock, LK_RELEASE, NULL, ap->a_p)); 780 } 781 782 /* 783 * Return whether or not the node is in use. 784 */ 785 int 786 vop_noislocked(ap) 787 struct vop_islocked_args /* { 788 struct vnode *a_vp; 789 } */ *ap; 790 { 791 struct vnode *vp = ap->a_vp; 792 793 if (vp->v_vnlock == NULL) 794 return (0); 795 return (lockstatus(vp->v_vnlock)); 796 } 797 798 /* 799 * Vnode reference. 800 */ 801 void 802 vref(vp) 803 struct vnode *vp; 804 { 805 806 simple_lock(&vp->v_interlock); 807 if (vp->v_usecount <= 0) 808 panic("vref used where vget required"); 809 vp->v_usecount++; 810 simple_unlock(&vp->v_interlock); 811 } 812 813 /* 814 * vput(), just unlock and vrele() 815 */ 816 void 817 vput(vp) 818 struct vnode *vp; 819 { 820 struct proc *p = curproc; /* XXX */ 821 822 #ifdef DIGANOSTIC 823 if (vp == NULL) 824 panic("vput: null vp"); 825 #endif 826 simple_lock(&vp->v_interlock); 827 vp->v_usecount--; 828 if (vp->v_usecount > 0) { 829 simple_unlock(&vp->v_interlock); 830 VOP_UNLOCK(vp, 0, p); 831 return; 832 } 833 #ifdef DIAGNOSTIC 834 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 835 vprint("vput: bad ref count", vp); 836 panic("vput: ref cnt"); 837 } 838 #endif 839 /* 840 * insert at tail of LRU list 841 */ 842 simple_lock(&vnode_free_list_slock); 843 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 844 simple_unlock(&vnode_free_list_slock); 845 simple_unlock(&vp->v_interlock); 846 VOP_INACTIVE(vp, p); 847 } 848 849 /* 850 * Vnode release. 851 * If count drops to zero, call inactive routine and return to freelist. 852 */ 853 void 854 vrele(vp) 855 struct vnode *vp; 856 { 857 struct proc *p = curproc; /* XXX */ 858 859 #ifdef DIAGNOSTIC 860 if (vp == NULL) 861 panic("vrele: null vp"); 862 #endif 863 simple_lock(&vp->v_interlock); 864 vp->v_usecount--; 865 if (vp->v_usecount > 0) { 866 simple_unlock(&vp->v_interlock); 867 return; 868 } 869 #ifdef DIAGNOSTIC 870 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 871 vprint("vrele: bad ref count", vp); 872 panic("vrele: ref cnt"); 873 } 874 #endif 875 /* 876 * insert at tail of LRU list 877 */ 878 simple_lock(&vnode_free_list_slock); 879 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 880 simple_unlock(&vnode_free_list_slock); 881 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) 882 VOP_INACTIVE(vp, p); 883 } 884 885 #ifdef DIAGNOSTIC 886 /* 887 * Page or buffer structure gets a reference. 888 */ 889 void 890 vhold(vp) 891 register struct vnode *vp; 892 { 893 894 simple_lock(&vp->v_interlock); 895 vp->v_holdcnt++; 896 simple_unlock(&vp->v_interlock); 897 } 898 899 /* 900 * Page or buffer structure frees a reference. 901 */ 902 void 903 holdrele(vp) 904 register struct vnode *vp; 905 { 906 907 simple_lock(&vp->v_interlock); 908 if (vp->v_holdcnt <= 0) 909 panic("holdrele: holdcnt"); 910 vp->v_holdcnt--; 911 simple_unlock(&vp->v_interlock); 912 } 913 #endif /* DIAGNOSTIC */ 914 915 /* 916 * Remove any vnodes in the vnode table belonging to mount point mp. 917 * 918 * If MNT_NOFORCE is specified, there should not be any active ones, 919 * return error if any are found (nb: this is a user error, not a 920 * system error). If MNT_FORCE is specified, detach any active vnodes 921 * that are found. 922 */ 923 #ifdef DIAGNOSTIC 924 int busyprt = 0; /* print out busy vnodes */ 925 struct ctldebug debug1 = { "busyprt", &busyprt }; 926 #endif 927 928 int 929 vflush(mp, skipvp, flags) 930 struct mount *mp; 931 struct vnode *skipvp; 932 int flags; 933 { 934 struct proc *p = curproc; /* XXX */ 935 struct vnode *vp, *nvp; 936 int busy = 0; 937 938 simple_lock(&mntvnode_slock); 939 loop: 940 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 941 if (vp->v_mount != mp) 942 goto loop; 943 nvp = vp->v_mntvnodes.le_next; 944 /* 945 * Skip over a selected vnode. 946 */ 947 if (vp == skipvp) 948 continue; 949 950 simple_lock(&vp->v_interlock); 951 /* 952 * Skip over a vnodes marked VSYSTEM. 953 */ 954 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 955 simple_unlock(&vp->v_interlock); 956 continue; 957 } 958 /* 959 * If WRITECLOSE is set, only flush out regular file 960 * vnodes open for writing. 961 */ 962 if ((flags & WRITECLOSE) && 963 (vp->v_writecount == 0 || vp->v_type != VREG)) { 964 simple_unlock(&vp->v_interlock); 965 continue; 966 } 967 /* 968 * With v_usecount == 0, all we need to do is clear 969 * out the vnode data structures and we are done. 970 */ 971 if (vp->v_usecount == 0) { 972 simple_unlock(&mntvnode_slock); 973 vgonel(vp, p); 974 simple_lock(&mntvnode_slock); 975 continue; 976 } 977 /* 978 * If FORCECLOSE is set, forcibly close the vnode. 979 * For block or character devices, revert to an 980 * anonymous device. For all other files, just kill them. 981 */ 982 if (flags & FORCECLOSE) { 983 simple_unlock(&mntvnode_slock); 984 if (vp->v_type != VBLK && vp->v_type != VCHR) { 985 vgonel(vp, p); 986 } else { 987 vclean(vp, 0, p); 988 vp->v_op = spec_vnodeop_p; 989 insmntque(vp, (struct mount *)0); 990 } 991 simple_lock(&mntvnode_slock); 992 continue; 993 } 994 #ifdef DIAGNOSTIC 995 if (busyprt) 996 vprint("vflush: busy vnode", vp); 997 #endif 998 simple_unlock(&vp->v_interlock); 999 busy++; 1000 } 1001 simple_unlock(&mntvnode_slock); 1002 if (busy) 1003 return (EBUSY); 1004 return (0); 1005 } 1006 1007 /* 1008 * Disassociate the underlying file system from a vnode. 1009 * The vnode interlock is held on entry. 1010 */ 1011 static void 1012 vclean(vp, flags, p) 1013 struct vnode *vp; 1014 int flags; 1015 struct proc *p; 1016 { 1017 int active; 1018 1019 /* 1020 * Check to see if the vnode is in use. 1021 * If so we have to reference it before we clean it out 1022 * so that its count cannot fall to zero and generate a 1023 * race against ourselves to recycle it. 1024 */ 1025 if (active = vp->v_usecount) 1026 vp->v_usecount++; 1027 /* 1028 * Prevent the vnode from being recycled or 1029 * brought into use while we clean it out. 1030 */ 1031 if (vp->v_flag & VXLOCK) 1032 panic("vclean: deadlock"); 1033 vp->v_flag |= VXLOCK; 1034 /* 1035 * Even if the count is zero, the VOP_INACTIVE routine may still 1036 * have the object locked while it cleans it out. The VOP_LOCK 1037 * ensures that the VOP_INACTIVE routine is done with its work. 1038 * For active vnodes, it ensures that no other activity can 1039 * occur while the underlying object is being cleaned out. 1040 */ 1041 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 1042 /* 1043 * Clean out any buffers associated with the vnode. 1044 */ 1045 if (flags & DOCLOSE) 1046 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1047 /* 1048 * If purging an active vnode, it must be closed and 1049 * deactivated before being reclaimed. Note that the 1050 * VOP_INACTIVE will unlock the vnode. 1051 */ 1052 if (active) { 1053 if (flags & DOCLOSE) 1054 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 1055 VOP_INACTIVE(vp, p); 1056 } else { 1057 /* 1058 * Any other processes trying to obtain this lock must first 1059 * wait for VXLOCK to clear, then call the new lock operation. 1060 */ 1061 VOP_UNLOCK(vp, 0, p); 1062 } 1063 /* 1064 * Reclaim the vnode. 1065 */ 1066 if (VOP_RECLAIM(vp, p)) 1067 panic("vclean: cannot reclaim"); 1068 if (active) 1069 vrele(vp); 1070 cache_purge(vp); 1071 if (vp->v_vnlock) { 1072 if ((vp->v_vnlock->lk_flags & LK_DRAINED) == 0) 1073 vprint("vclean: lock not drained", vp); 1074 FREE(vp->v_vnlock, M_VNODE); 1075 vp->v_vnlock = NULL; 1076 } 1077 1078 /* 1079 * Done with purge, notify sleepers of the grim news. 1080 */ 1081 vp->v_op = dead_vnodeop_p; 1082 vp->v_tag = VT_NON; 1083 vp->v_flag &= ~VXLOCK; 1084 if (vp->v_flag & VXWANT) { 1085 vp->v_flag &= ~VXWANT; 1086 wakeup((caddr_t)vp); 1087 } 1088 } 1089 1090 /* 1091 * Eliminate all activity associated with the requested vnode 1092 * and with all vnodes aliased to the requested vnode. 1093 */ 1094 int 1095 vop_revoke(ap) 1096 struct vop_revoke_args /* { 1097 struct vnode *a_vp; 1098 int a_flags; 1099 } */ *ap; 1100 { 1101 struct vnode *vp, *vq; 1102 struct proc *p = curproc; /* XXX */ 1103 1104 #ifdef DIAGNOSTIC 1105 if ((ap->a_flags & REVOKEALL) == 0) 1106 panic("vop_revoke"); 1107 #endif 1108 1109 vp = ap->a_vp; 1110 simple_lock(&vp->v_interlock); 1111 1112 if (vp->v_flag & VALIASED) { 1113 /* 1114 * If a vgone (or vclean) is already in progress, 1115 * wait until it is done and return. 1116 */ 1117 if (vp->v_flag & VXLOCK) { 1118 vp->v_flag |= VXWANT; 1119 simple_unlock(&vp->v_interlock); 1120 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1121 return (0); 1122 } 1123 /* 1124 * Ensure that vp will not be vgone'd while we 1125 * are eliminating its aliases. 1126 */ 1127 vp->v_flag |= VXLOCK; 1128 simple_unlock(&vp->v_interlock); 1129 while (vp->v_flag & VALIASED) { 1130 simple_lock(&spechash_slock); 1131 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1132 if (vq->v_rdev != vp->v_rdev || 1133 vq->v_type != vp->v_type || vp == vq) 1134 continue; 1135 simple_unlock(&spechash_slock); 1136 vgone(vq); 1137 break; 1138 } 1139 if (vq == NULLVP) 1140 simple_unlock(&spechash_slock); 1141 } 1142 /* 1143 * Remove the lock so that vgone below will 1144 * really eliminate the vnode after which time 1145 * vgone will awaken any sleepers. 1146 */ 1147 simple_lock(&vp->v_interlock); 1148 vp->v_flag &= ~VXLOCK; 1149 } 1150 vgonel(vp, p); 1151 return (0); 1152 } 1153 1154 /* 1155 * Recycle an unused vnode to the front of the free list. 1156 * Release the passed interlock if the vnode will be recycled. 1157 */ 1158 int 1159 vrecycle(vp, inter_lkp, p) 1160 struct vnode *vp; 1161 struct simplelock *inter_lkp; 1162 struct proc *p; 1163 { 1164 1165 simple_lock(&vp->v_interlock); 1166 if (vp->v_usecount == 0) { 1167 if (inter_lkp) 1168 simple_unlock(inter_lkp); 1169 vgonel(vp, p); 1170 return (1); 1171 } 1172 simple_unlock(&vp->v_interlock); 1173 return (0); 1174 } 1175 1176 /* 1177 * Eliminate all activity associated with a vnode 1178 * in preparation for reuse. 1179 */ 1180 void 1181 vgone(vp) 1182 struct vnode *vp; 1183 { 1184 struct proc *p = curproc; /* XXX */ 1185 1186 simple_lock(&vp->v_interlock); 1187 vgonel(vp, p); 1188 } 1189 1190 /* 1191 * vgone, with the vp interlock held. 1192 */ 1193 void 1194 vgonel(vp, p) 1195 struct vnode *vp; 1196 struct proc *p; 1197 { 1198 struct vnode *vq; 1199 struct vnode *vx; 1200 1201 /* 1202 * If a vgone (or vclean) is already in progress, 1203 * wait until it is done and return. 1204 */ 1205 if (vp->v_flag & VXLOCK) { 1206 vp->v_flag |= VXWANT; 1207 simple_unlock(&vp->v_interlock); 1208 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1209 return; 1210 } 1211 /* 1212 * Clean out the filesystem specific data. 1213 */ 1214 vclean(vp, DOCLOSE, p); 1215 /* 1216 * Delete from old mount point vnode list, if on one. 1217 */ 1218 if (vp->v_mount != NULL) 1219 insmntque(vp, (struct mount *)0); 1220 /* 1221 * If special device, remove it from special device alias list 1222 * if it is on one. 1223 */ 1224 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1225 simple_lock(&spechash_slock); 1226 if (*vp->v_hashchain == vp) { 1227 *vp->v_hashchain = vp->v_specnext; 1228 } else { 1229 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1230 if (vq->v_specnext != vp) 1231 continue; 1232 vq->v_specnext = vp->v_specnext; 1233 break; 1234 } 1235 if (vq == NULL) 1236 panic("missing bdev"); 1237 } 1238 if (vp->v_flag & VALIASED) { 1239 vx = NULL; 1240 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1241 if (vq->v_rdev != vp->v_rdev || 1242 vq->v_type != vp->v_type) 1243 continue; 1244 if (vx) 1245 break; 1246 vx = vq; 1247 } 1248 if (vx == NULL) 1249 panic("missing alias"); 1250 if (vq == NULL) 1251 vx->v_flag &= ~VALIASED; 1252 vp->v_flag &= ~VALIASED; 1253 } 1254 simple_unlock(&spechash_slock); 1255 FREE(vp->v_specinfo, M_VNODE); 1256 vp->v_specinfo = NULL; 1257 } 1258 /* 1259 * If it is on the freelist and not already at the head, 1260 * move it to the head of the list. The test of the back 1261 * pointer and the reference count of zero is because 1262 * it will be removed from the free list by getnewvnode, 1263 * but will not have its reference count incremented until 1264 * after calling vgone. If the reference count were 1265 * incremented first, vgone would (incorrectly) try to 1266 * close the previous instance of the underlying object. 1267 * So, the back pointer is explicitly set to `0xdeadb' in 1268 * getnewvnode after removing it from the freelist to ensure 1269 * that we do not try to move it here. 1270 */ 1271 if (vp->v_usecount == 0) { 1272 simple_lock(&vnode_free_list_slock); 1273 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1274 vnode_free_list.tqh_first != vp) { 1275 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1276 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1277 } 1278 simple_unlock(&vnode_free_list_slock); 1279 } 1280 vp->v_type = VBAD; 1281 } 1282 1283 /* 1284 * Lookup a vnode by device number. 1285 */ 1286 int 1287 vfinddev(dev, type, vpp) 1288 dev_t dev; 1289 enum vtype type; 1290 struct vnode **vpp; 1291 { 1292 struct vnode *vp; 1293 int rc = 0; 1294 1295 simple_lock(&spechash_slock); 1296 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1297 if (dev != vp->v_rdev || type != vp->v_type) 1298 continue; 1299 *vpp = vp; 1300 rc = 1; 1301 break; 1302 } 1303 simple_unlock(&spechash_slock); 1304 return (rc); 1305 } 1306 1307 /* 1308 * Calculate the total number of references to a special device. 1309 */ 1310 int 1311 vcount(vp) 1312 struct vnode *vp; 1313 { 1314 struct vnode *vq, *vnext; 1315 int count; 1316 1317 loop: 1318 if ((vp->v_flag & VALIASED) == 0) 1319 return (vp->v_usecount); 1320 simple_lock(&spechash_slock); 1321 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1322 vnext = vq->v_specnext; 1323 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1324 continue; 1325 /* 1326 * Alias, but not in use, so flush it out. 1327 */ 1328 if (vq->v_usecount == 0 && vq != vp) { 1329 simple_unlock(&spechash_slock); 1330 vgone(vq); 1331 goto loop; 1332 } 1333 count += vq->v_usecount; 1334 } 1335 simple_unlock(&spechash_slock); 1336 return (count); 1337 } 1338 1339 /* 1340 * Print out a description of a vnode. 1341 */ 1342 static char *typename[] = 1343 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1344 1345 void 1346 vprint(label, vp) 1347 char *label; 1348 register struct vnode *vp; 1349 { 1350 char buf[64]; 1351 1352 if (label != NULL) 1353 printf("%s: ", label); 1354 printf("type %s, usecount %d, writecount %d, refcount %d,", 1355 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1356 vp->v_holdcnt); 1357 buf[0] = '\0'; 1358 if (vp->v_flag & VROOT) 1359 strcat(buf, "|VROOT"); 1360 if (vp->v_flag & VTEXT) 1361 strcat(buf, "|VTEXT"); 1362 if (vp->v_flag & VSYSTEM) 1363 strcat(buf, "|VSYSTEM"); 1364 if (vp->v_flag & VXLOCK) 1365 strcat(buf, "|VXLOCK"); 1366 if (vp->v_flag & VXWANT) 1367 strcat(buf, "|VXWANT"); 1368 if (vp->v_flag & VBWAIT) 1369 strcat(buf, "|VBWAIT"); 1370 if (vp->v_flag & VALIASED) 1371 strcat(buf, "|VALIASED"); 1372 if (buf[0] != '\0') 1373 printf(" flags (%s)", &buf[1]); 1374 if (vp->v_data == NULL) { 1375 printf("\n"); 1376 } else { 1377 printf("\n\t"); 1378 VOP_PRINT(vp); 1379 } 1380 } 1381 1382 #ifdef DEBUG 1383 /* 1384 * List all of the locked vnodes in the system. 1385 * Called when debugging the kernel. 1386 */ 1387 void 1388 printlockedvnodes() 1389 { 1390 struct proc *p = curproc; /* XXX */ 1391 struct mount *mp, *nmp; 1392 struct vnode *vp; 1393 1394 printf("Locked vnodes\n"); 1395 simple_lock(&mountlist_slock); 1396 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1397 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1398 nmp = mp->mnt_list.cqe_next; 1399 continue; 1400 } 1401 for (vp = mp->mnt_vnodelist.lh_first; 1402 vp != NULL; 1403 vp = vp->v_mntvnodes.le_next) { 1404 if (VOP_ISLOCKED(vp)) 1405 vprint((char *)0, vp); 1406 } 1407 simple_lock(&mountlist_slock); 1408 nmp = mp->mnt_list.cqe_next; 1409 vfs_unbusy(mp, p); 1410 } 1411 simple_unlock(&mountlist_slock); 1412 } 1413 #endif 1414 1415 /* 1416 * Top level filesystem related information gathering. 1417 */ 1418 int 1419 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1420 int *name; 1421 u_int namelen; 1422 void *oldp; 1423 size_t *oldlenp; 1424 void *newp; 1425 size_t newlen; 1426 struct proc *p; 1427 { 1428 struct ctldebug *cdp; 1429 struct vfsconf *vfsp; 1430 1431 /* all sysctl names at this level are at least name and field */ 1432 if (namelen < 2) 1433 return (ENOTDIR); /* overloaded */ 1434 if (name[0] != VFS_GENERIC) { 1435 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1436 if (vfsp->vfc_typenum == name[0]) 1437 break; 1438 if (vfsp == NULL) 1439 return (EOPNOTSUPP); 1440 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1441 oldp, oldlenp, newp, newlen, p)); 1442 } 1443 switch (name[1]) { 1444 case VFS_MAXTYPENUM: 1445 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); 1446 case VFS_CONF: 1447 if (namelen < 3) 1448 return (ENOTDIR); /* overloaded */ 1449 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1450 if (vfsp->vfc_typenum == name[2]) 1451 break; 1452 if (vfsp == NULL) 1453 return (EOPNOTSUPP); 1454 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, 1455 sizeof(struct vfsconf))); 1456 } 1457 return (EOPNOTSUPP); 1458 } 1459 1460 int kinfo_vdebug = 1; 1461 int kinfo_vgetfailed; 1462 #define KINFO_VNODESLOP 10 1463 /* 1464 * Dump vnode list (via sysctl). 1465 * Copyout address of vnode followed by vnode. 1466 */ 1467 /* ARGSUSED */ 1468 int 1469 sysctl_vnode(where, sizep, p) 1470 char *where; 1471 size_t *sizep; 1472 struct proc *p; 1473 { 1474 struct mount *mp, *nmp; 1475 struct vnode *nvp, *vp; 1476 char *bp = where, *savebp; 1477 char *ewhere; 1478 int error; 1479 1480 #define VPTRSZ sizeof (struct vnode *) 1481 #define VNODESZ sizeof (struct vnode) 1482 if (where == NULL) { 1483 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1484 return (0); 1485 } 1486 ewhere = where + *sizep; 1487 1488 simple_lock(&mountlist_slock); 1489 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1490 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1491 nmp = mp->mnt_list.cqe_next; 1492 continue; 1493 } 1494 savebp = bp; 1495 again: 1496 simple_lock(&mntvnode_slock); 1497 for (vp = mp->mnt_vnodelist.lh_first; 1498 vp != NULL; 1499 vp = nvp) { 1500 /* 1501 * Check that the vp is still associated with 1502 * this filesystem. RACE: could have been 1503 * recycled onto the same filesystem. 1504 */ 1505 if (vp->v_mount != mp) { 1506 simple_unlock(&mntvnode_slock); 1507 if (kinfo_vdebug) 1508 printf("kinfo: vp changed\n"); 1509 bp = savebp; 1510 goto again; 1511 } 1512 nvp = vp->v_mntvnodes.le_next; 1513 if (bp + VPTRSZ + VNODESZ > ewhere) { 1514 simple_unlock(&mntvnode_slock); 1515 *sizep = bp - where; 1516 return (ENOMEM); 1517 } 1518 simple_unlock(&mntvnode_slock); 1519 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1520 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1521 return (error); 1522 bp += VPTRSZ + VNODESZ; 1523 simple_lock(&mntvnode_slock); 1524 } 1525 simple_unlock(&mntvnode_slock); 1526 simple_lock(&mountlist_slock); 1527 nmp = mp->mnt_list.cqe_next; 1528 vfs_unbusy(mp, p); 1529 } 1530 simple_unlock(&mountlist_slock); 1531 1532 *sizep = bp - where; 1533 return (0); 1534 } 1535 1536 /* 1537 * Check to see if a filesystem is mounted on a block device. 1538 */ 1539 int 1540 vfs_mountedon(vp) 1541 struct vnode *vp; 1542 { 1543 struct vnode *vq; 1544 int error = 0; 1545 1546 if (vp->v_specflags & SI_MOUNTEDON) 1547 return (EBUSY); 1548 if (vp->v_flag & VALIASED) { 1549 simple_lock(&spechash_slock); 1550 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1551 if (vq->v_rdev != vp->v_rdev || 1552 vq->v_type != vp->v_type) 1553 continue; 1554 if (vq->v_specflags & SI_MOUNTEDON) { 1555 error = EBUSY; 1556 break; 1557 } 1558 } 1559 simple_unlock(&spechash_slock); 1560 } 1561 return (error); 1562 } 1563 1564 /* 1565 * Unmount all filesystems. The list is traversed in reverse order 1566 * of mounting to avoid dependencies. 1567 */ 1568 void 1569 vfs_unmountall() 1570 { 1571 struct mount *mp, *nmp; 1572 struct proc *p = curproc; /* XXX */ 1573 1574 /* 1575 * Since this only runs when rebooting, it is not interlocked. 1576 */ 1577 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1578 nmp = mp->mnt_list.cqe_prev; 1579 (void) dounmount(mp, MNT_FORCE, p); 1580 } 1581 } 1582 1583 /* 1584 * Build hash lists of net addresses and hang them off the mount point. 1585 * Called by ufs_mount() to set up the lists of export addresses. 1586 */ 1587 static int 1588 vfs_hang_addrlist(mp, nep, argp) 1589 struct mount *mp; 1590 struct netexport *nep; 1591 struct export_args *argp; 1592 { 1593 register struct netcred *np; 1594 register struct radix_node_head *rnh; 1595 register int i; 1596 struct radix_node *rn; 1597 struct sockaddr *saddr, *smask = 0; 1598 struct domain *dom; 1599 int error; 1600 1601 if (argp->ex_addrlen == 0) { 1602 if (mp->mnt_flag & MNT_DEFEXPORTED) 1603 return (EPERM); 1604 np = &nep->ne_defexported; 1605 np->netc_exflags = argp->ex_flags; 1606 np->netc_anon = argp->ex_anon; 1607 np->netc_anon.cr_ref = 1; 1608 mp->mnt_flag |= MNT_DEFEXPORTED; 1609 return (0); 1610 } 1611 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1612 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1613 bzero((caddr_t)np, i); 1614 saddr = (struct sockaddr *)(np + 1); 1615 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 1616 goto out; 1617 if (saddr->sa_len > argp->ex_addrlen) 1618 saddr->sa_len = argp->ex_addrlen; 1619 if (argp->ex_masklen) { 1620 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1621 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1622 if (error) 1623 goto out; 1624 if (smask->sa_len > argp->ex_masklen) 1625 smask->sa_len = argp->ex_masklen; 1626 } 1627 i = saddr->sa_family; 1628 if ((rnh = nep->ne_rtable[i]) == 0) { 1629 /* 1630 * Seems silly to initialize every AF when most are not 1631 * used, do so on demand here 1632 */ 1633 for (dom = domains; dom; dom = dom->dom_next) 1634 if (dom->dom_family == i && dom->dom_rtattach) { 1635 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1636 dom->dom_rtoffset); 1637 break; 1638 } 1639 if ((rnh = nep->ne_rtable[i]) == 0) { 1640 error = ENOBUFS; 1641 goto out; 1642 } 1643 } 1644 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1645 np->netc_rnodes); 1646 if (rn == 0) { 1647 /* 1648 * One of the reasons that rnh_addaddr may fail is that 1649 * the entry already exists. To check for this case, we 1650 * look up the entry to see if it is there. If so, we 1651 * do not need to make a new entry but do return success. 1652 */ 1653 free(np, M_NETADDR); 1654 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); 1655 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 && 1656 ((struct netcred *)rn)->netc_exflags == argp->ex_flags && 1657 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon, 1658 (caddr_t)&argp->ex_anon, sizeof(struct ucred))) 1659 return (0); 1660 return (EPERM); 1661 } 1662 np->netc_exflags = argp->ex_flags; 1663 np->netc_anon = argp->ex_anon; 1664 np->netc_anon.cr_ref = 1; 1665 return (0); 1666 out: 1667 free(np, M_NETADDR); 1668 return (error); 1669 } 1670 1671 /* ARGSUSED */ 1672 static int 1673 vfs_free_netcred(rn, w) 1674 struct radix_node *rn; 1675 caddr_t w; 1676 { 1677 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1678 1679 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1680 free((caddr_t)rn, M_NETADDR); 1681 return (0); 1682 } 1683 1684 /* 1685 * Free the net address hash lists that are hanging off the mount points. 1686 */ 1687 static void 1688 vfs_free_addrlist(nep) 1689 struct netexport *nep; 1690 { 1691 register int i; 1692 register struct radix_node_head *rnh; 1693 1694 for (i = 0; i <= AF_MAX; i++) 1695 if (rnh = nep->ne_rtable[i]) { 1696 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1697 (caddr_t)rnh); 1698 free((caddr_t)rnh, M_RTABLE); 1699 nep->ne_rtable[i] = 0; 1700 } 1701 } 1702 1703 int 1704 vfs_export(mp, nep, argp) 1705 struct mount *mp; 1706 struct netexport *nep; 1707 struct export_args *argp; 1708 { 1709 int error; 1710 1711 if (argp->ex_flags & MNT_DELEXPORT) { 1712 vfs_free_addrlist(nep); 1713 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1714 } 1715 if (argp->ex_flags & MNT_EXPORTED) { 1716 if (error = vfs_hang_addrlist(mp, nep, argp)) 1717 return (error); 1718 mp->mnt_flag |= MNT_EXPORTED; 1719 } 1720 return (0); 1721 } 1722 1723 struct netcred * 1724 vfs_export_lookup(mp, nep, nam) 1725 register struct mount *mp; 1726 struct netexport *nep; 1727 struct mbuf *nam; 1728 { 1729 register struct netcred *np; 1730 register struct radix_node_head *rnh; 1731 struct sockaddr *saddr; 1732 1733 np = NULL; 1734 if (mp->mnt_flag & MNT_EXPORTED) { 1735 /* 1736 * Lookup in the export list first. 1737 */ 1738 if (nam != NULL) { 1739 saddr = mtod(nam, struct sockaddr *); 1740 rnh = nep->ne_rtable[saddr->sa_family]; 1741 if (rnh != NULL) { 1742 np = (struct netcred *) 1743 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1744 rnh); 1745 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1746 np = NULL; 1747 } 1748 } 1749 /* 1750 * If no address match, use the default if it exists. 1751 */ 1752 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1753 np = &nep->ne_defexported; 1754 } 1755 return (np); 1756 } 1757