1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * %sccs.include.redist.c% 11 * 12 * @(#)vfs_subr.c 8.29 (Berkeley) 05/20/95 13 */ 14 15 /* 16 * External virtual filesystem routines 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/proc.h> 22 #include <sys/mount.h> 23 #include <sys/time.h> 24 #include <sys/vnode.h> 25 #include <sys/stat.h> 26 #include <sys/namei.h> 27 #include <sys/ucred.h> 28 #include <sys/buf.h> 29 #include <sys/errno.h> 30 #include <sys/malloc.h> 31 #include <sys/domain.h> 32 #include <sys/mbuf.h> 33 34 #include <vm/vm.h> 35 #include <sys/sysctl.h> 36 37 #include <miscfs/specfs/specdev.h> 38 39 enum vtype iftovt_tab[16] = { 40 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 41 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 42 }; 43 int vttoif_tab[9] = { 44 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 45 S_IFSOCK, S_IFIFO, S_IFMT, 46 }; 47 48 /* 49 * Insq/Remq for the vnode usage lists. 50 */ 51 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 52 #define bufremvn(bp) { \ 53 LIST_REMOVE(bp, b_vnbufs); \ 54 (bp)->b_vnbufs.le_next = NOLIST; \ 55 } 56 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 57 struct mntlist mountlist; /* mounted filesystem list */ 58 struct simplelock mountlist_slock; 59 static struct simplelock mntid_slock; 60 struct simplelock mntvnode_slock; 61 static struct simplelock spechash_slock; 62 static struct simplelock vnode_free_list_slock; 63 64 /* 65 * Initialize the vnode management data structures. 66 */ 67 void 68 vntblinit() 69 { 70 71 simple_lock_init(&mntvnode_slock); 72 simple_lock_init(&mntid_slock); 73 simple_lock_init(&spechash_slock); 74 TAILQ_INIT(&vnode_free_list); 75 simple_lock_init(&vnode_free_list_slock); 76 CIRCLEQ_INIT(&mountlist); 77 } 78 79 /* 80 * Mark a mount point as busy. Used to synchronize access and to delay 81 * unmounting. Interlock is not released on failure. 82 */ 83 int 84 vfs_busy(mp, flags, interlkp, p) 85 struct mount *mp; 86 int flags; 87 struct simplelock *interlkp; 88 struct proc *p; 89 { 90 int lkflags; 91 92 if (mp->mnt_flag & MNT_UNMOUNT) { 93 if (flags & LK_NOWAIT) 94 return (ENOENT); 95 mp->mnt_flag |= MNT_MWAIT; 96 sleep((caddr_t)mp, PVFS); 97 return (ENOENT); 98 } 99 lkflags = LK_SHARED; 100 if (interlkp) 101 lkflags |= LK_INTERLOCK; 102 if (lockmgr(&mp->mnt_lock, lkflags, interlkp, p)) 103 panic("vfs_busy: unexpected lock failure"); 104 return (0); 105 } 106 107 /* 108 * Free a busy filesystem. 109 * Panic if filesystem is not busy. 110 */ 111 void 112 vfs_unbusy(mp, p) 113 struct mount *mp; 114 struct proc *p; 115 { 116 117 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, p); 118 } 119 120 /* 121 * Lookup a filesystem type, and if found allocate and initialize 122 * a mount structure for it. 123 * 124 * Devname is usually updated by mount(8) after booting. 125 */ 126 int 127 vfs_rootmountalloc(fstypename, devname, mpp) 128 char *fstypename; 129 char *devname; 130 struct mount **mpp; 131 { 132 struct proc *p = curproc; /* XXX */ 133 struct vfsconf *vfsp; 134 struct mount *mp; 135 136 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 137 if (!strcmp(vfsp->vfc_name, fstypename)) 138 break; 139 if (vfsp == NULL) 140 return (ENODEV); 141 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 142 bzero((char *)mp, (u_long)sizeof(struct mount)); 143 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 144 (void)vfs_busy(mp, LK_NOWAIT, 0, p); 145 LIST_INIT(&mp->mnt_vnodelist); 146 mp->mnt_vfc = vfsp; 147 mp->mnt_op = vfsp->vfc_vfsops; 148 mp->mnt_flag = MNT_RDONLY; 149 mp->mnt_vnodecovered = NULLVP; 150 vfsp->vfc_refcount++; 151 mp->mnt_stat.f_type = vfsp->vfc_typenum; 152 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 153 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 154 mp->mnt_stat.f_mntonname[0] = '/'; 155 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 156 *mpp = mp; 157 return (0); 158 } 159 160 /* 161 * Find an appropriate filesystem to use for the root. If a filesystem 162 * has not been preselected, walk through the list of known filesystems 163 * trying those that have mountroot routines, and try them until one 164 * works or we have tried them all. 165 */ 166 int 167 vfs_mountroot() 168 { 169 struct vfsconf *vfsp; 170 extern int (*mountroot)(void); 171 int error; 172 173 if (mountroot != NULL) 174 return ((*mountroot)()); 175 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 176 if (vfsp->vfc_mountroot == NULL) 177 continue; 178 if ((error = (*vfsp->vfc_mountroot)()) == 0) 179 return (0); 180 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 181 } 182 return (ENODEV); 183 } 184 185 /* 186 * Lookup a mount point by filesystem identifier. 187 */ 188 struct mount * 189 vfs_getvfs(fsid) 190 fsid_t *fsid; 191 { 192 register struct mount *mp; 193 194 simple_lock(&mountlist_slock); 195 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 196 mp = mp->mnt_list.cqe_next) { 197 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 198 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 199 simple_unlock(&mountlist_slock); 200 return (mp); 201 } 202 } 203 simple_unlock(&mountlist_slock); 204 return ((struct mount *)0); 205 } 206 207 /* 208 * Get a new unique fsid 209 */ 210 void 211 vfs_getnewfsid(mp) 212 struct mount *mp; 213 { 214 static u_short xxxfs_mntid; 215 216 fsid_t tfsid; 217 int mtype; 218 219 simple_lock(&mntid_slock); 220 mtype = mp->mnt_vfc->vfc_typenum; 221 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 222 mp->mnt_stat.f_fsid.val[1] = mtype; 223 if (xxxfs_mntid == 0) 224 ++xxxfs_mntid; 225 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 226 tfsid.val[1] = mtype; 227 if (mountlist.cqh_first != (void *)&mountlist) { 228 while (vfs_getvfs(&tfsid)) { 229 tfsid.val[0]++; 230 xxxfs_mntid++; 231 } 232 } 233 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 234 simple_unlock(&mntid_slock); 235 } 236 237 /* 238 * Set vnode attributes to VNOVAL 239 */ 240 void 241 vattr_null(vap) 242 register struct vattr *vap; 243 { 244 245 vap->va_type = VNON; 246 vap->va_size = vap->va_bytes = VNOVAL; 247 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 248 vap->va_fsid = vap->va_fileid = 249 vap->va_blocksize = vap->va_rdev = 250 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 251 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 252 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 253 vap->va_flags = vap->va_gen = VNOVAL; 254 vap->va_vaflags = 0; 255 } 256 257 /* 258 * Routines having to do with the management of the vnode table. 259 */ 260 extern int (**dead_vnodeop_p)(); 261 static void vclean __P((struct vnode *vp, int flag, struct proc *p)); 262 extern void vgonel __P((struct vnode *vp, struct proc *p)); 263 long numvnodes; 264 extern struct vattr va_null; 265 266 /* 267 * Return the next vnode from the free list. 268 */ 269 int 270 getnewvnode(tag, mp, vops, vpp) 271 enum vtagtype tag; 272 struct mount *mp; 273 int (**vops)(); 274 struct vnode **vpp; 275 { 276 struct proc *p = curproc; /* XXX */ 277 struct vnode *vp; 278 int s; 279 int cnt; 280 281 top: 282 simple_lock(&vnode_free_list_slock); 283 if ((vnode_free_list.tqh_first == NULL && 284 numvnodes < 2 * desiredvnodes) || 285 numvnodes < desiredvnodes) { 286 simple_unlock(&vnode_free_list_slock); 287 vp = (struct vnode *)malloc((u_long)sizeof *vp, 288 M_VNODE, M_WAITOK); 289 bzero((char *)vp, sizeof *vp); 290 numvnodes++; 291 } else { 292 for (vp = vnode_free_list.tqh_first; 293 vp != NULLVP; vp = vp->v_freelist.tqe_next) { 294 if (simple_lock_try(&vp->v_interlock)) 295 break; 296 } 297 /* 298 * Unless this is a bad time of the month, at most 299 * the first NCPUS items on the free list are 300 * locked, so this is close enough to being empty. 301 */ 302 if (vp == NULLVP) { 303 simple_unlock(&vnode_free_list_slock); 304 tablefull("vnode"); 305 *vpp = 0; 306 return (ENFILE); 307 } 308 if (vp->v_usecount) 309 panic("free vnode isn't"); 310 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 311 /* see comment on why 0xdeadb is set at end of vgone (below) */ 312 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 313 simple_unlock(&vnode_free_list_slock); 314 vp->v_lease = NULL; 315 if (vp->v_type != VBAD) 316 vgonel(vp, p); 317 else 318 simple_unlock(&vp->v_interlock); 319 #ifdef DIAGNOSTIC 320 if (vp->v_data) 321 panic("cleaned vnode isn't"); 322 s = splbio(); 323 if (vp->v_numoutput) 324 panic("Clean vnode has pending I/O's"); 325 splx(s); 326 #endif 327 vp->v_flag = 0; 328 vp->v_lastr = 0; 329 vp->v_ralen = 0; 330 vp->v_maxra = 0; 331 vp->v_lastw = 0; 332 vp->v_lasta = 0; 333 vp->v_cstart = 0; 334 vp->v_clen = 0; 335 vp->v_socket = 0; 336 } 337 vp->v_type = VNON; 338 cache_purge(vp); 339 vp->v_tag = tag; 340 vp->v_op = vops; 341 insmntque(vp, mp); 342 *vpp = vp; 343 vp->v_usecount = 1; 344 vp->v_data = 0; 345 return (0); 346 } 347 348 /* 349 * Move a vnode from one mount queue to another. 350 */ 351 void 352 insmntque(vp, mp) 353 struct vnode *vp; 354 struct mount *mp; 355 { 356 357 simple_lock(&mntvnode_slock); 358 /* 359 * Delete from old mount point vnode list, if on one. 360 */ 361 if (vp->v_mount != NULL) 362 LIST_REMOVE(vp, v_mntvnodes); 363 /* 364 * Insert into list of vnodes for the new mount point, if available. 365 */ 366 if ((vp->v_mount = mp) != NULL) 367 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 368 simple_unlock(&mntvnode_slock); 369 } 370 371 /* 372 * Update outstanding I/O count and do wakeup if requested. 373 */ 374 void 375 vwakeup(bp) 376 register struct buf *bp; 377 { 378 register struct vnode *vp; 379 380 bp->b_flags &= ~B_WRITEINPROG; 381 if (vp = bp->b_vp) { 382 if (--vp->v_numoutput < 0) 383 panic("vwakeup: neg numoutput"); 384 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 385 if (vp->v_numoutput < 0) 386 panic("vwakeup: neg numoutput 2"); 387 vp->v_flag &= ~VBWAIT; 388 wakeup((caddr_t)&vp->v_numoutput); 389 } 390 } 391 } 392 393 /* 394 * Flush out and invalidate all buffers associated with a vnode. 395 * Called with the underlying object locked. 396 */ 397 int 398 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 399 register struct vnode *vp; 400 int flags; 401 struct ucred *cred; 402 struct proc *p; 403 int slpflag, slptimeo; 404 { 405 register struct buf *bp; 406 struct buf *nbp, *blist; 407 int s, error; 408 409 if (flags & V_SAVE) { 410 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 411 return (error); 412 if (vp->v_dirtyblkhd.lh_first != NULL) 413 panic("vinvalbuf: dirty bufs"); 414 } 415 for (;;) { 416 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 417 while (blist && blist->b_lblkno < 0) 418 blist = blist->b_vnbufs.le_next; 419 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 420 (flags & V_SAVEMETA)) 421 while (blist && blist->b_lblkno < 0) 422 blist = blist->b_vnbufs.le_next; 423 if (!blist) 424 break; 425 426 for (bp = blist; bp; bp = nbp) { 427 nbp = bp->b_vnbufs.le_next; 428 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 429 continue; 430 s = splbio(); 431 if (bp->b_flags & B_BUSY) { 432 bp->b_flags |= B_WANTED; 433 error = tsleep((caddr_t)bp, 434 slpflag | (PRIBIO + 1), "vinvalbuf", 435 slptimeo); 436 splx(s); 437 if (error) 438 return (error); 439 break; 440 } 441 bremfree(bp); 442 bp->b_flags |= B_BUSY; 443 splx(s); 444 /* 445 * XXX Since there are no node locks for NFS, I believe 446 * there is a slight chance that a delayed write will 447 * occur while sleeping just above, so check for it. 448 */ 449 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 450 (void) VOP_BWRITE(bp); 451 break; 452 } 453 bp->b_flags |= B_INVAL; 454 brelse(bp); 455 } 456 } 457 if (!(flags & V_SAVEMETA) && 458 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 459 panic("vinvalbuf: flush failed"); 460 return (0); 461 } 462 463 /* 464 * Associate a buffer with a vnode. 465 */ 466 void 467 bgetvp(vp, bp) 468 register struct vnode *vp; 469 register struct buf *bp; 470 { 471 472 if (bp->b_vp) 473 panic("bgetvp: not free"); 474 VHOLD(vp); 475 bp->b_vp = vp; 476 if (vp->v_type == VBLK || vp->v_type == VCHR) 477 bp->b_dev = vp->v_rdev; 478 else 479 bp->b_dev = NODEV; 480 /* 481 * Insert onto list for new vnode. 482 */ 483 bufinsvn(bp, &vp->v_cleanblkhd); 484 } 485 486 /* 487 * Disassociate a buffer from a vnode. 488 */ 489 void 490 brelvp(bp) 491 register struct buf *bp; 492 { 493 struct vnode *vp; 494 495 if (bp->b_vp == (struct vnode *) 0) 496 panic("brelvp: NULL"); 497 /* 498 * Delete from old vnode list, if on one. 499 */ 500 if (bp->b_vnbufs.le_next != NOLIST) 501 bufremvn(bp); 502 vp = bp->b_vp; 503 bp->b_vp = (struct vnode *) 0; 504 HOLDRELE(vp); 505 } 506 507 /* 508 * Reassign a buffer from one vnode to another. 509 * Used to assign file specific control information 510 * (indirect blocks) to the vnode to which they belong. 511 */ 512 void 513 reassignbuf(bp, newvp) 514 register struct buf *bp; 515 register struct vnode *newvp; 516 { 517 register struct buflists *listheadp; 518 519 if (newvp == NULL) { 520 printf("reassignbuf: NULL"); 521 return; 522 } 523 /* 524 * Delete from old vnode list, if on one. 525 */ 526 if (bp->b_vnbufs.le_next != NOLIST) 527 bufremvn(bp); 528 /* 529 * If dirty, put on list of dirty buffers; 530 * otherwise insert onto list of clean buffers. 531 */ 532 if (bp->b_flags & B_DELWRI) 533 listheadp = &newvp->v_dirtyblkhd; 534 else 535 listheadp = &newvp->v_cleanblkhd; 536 bufinsvn(bp, listheadp); 537 } 538 539 /* 540 * Create a vnode for a block device. 541 * Used for root filesystem, argdev, and swap areas. 542 * Also used for memory file system special devices. 543 */ 544 int 545 bdevvp(dev, vpp) 546 dev_t dev; 547 struct vnode **vpp; 548 { 549 register struct vnode *vp; 550 struct vnode *nvp; 551 int error; 552 553 if (dev == NODEV) { 554 *vpp = NULLVP; 555 return (ENODEV); 556 } 557 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 558 if (error) { 559 *vpp = NULLVP; 560 return (error); 561 } 562 vp = nvp; 563 vp->v_type = VBLK; 564 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 565 vput(vp); 566 vp = nvp; 567 } 568 *vpp = vp; 569 return (0); 570 } 571 572 /* 573 * Check to see if the new vnode represents a special device 574 * for which we already have a vnode (either because of 575 * bdevvp() or because of a different vnode representing 576 * the same block device). If such an alias exists, deallocate 577 * the existing contents and return the aliased vnode. The 578 * caller is responsible for filling it with its new contents. 579 */ 580 struct vnode * 581 checkalias(nvp, nvp_rdev, mp) 582 register struct vnode *nvp; 583 dev_t nvp_rdev; 584 struct mount *mp; 585 { 586 struct proc *p = curproc; /* XXX */ 587 struct vnode *vp; 588 struct vnode **vpp; 589 590 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 591 return (NULLVP); 592 593 vpp = &speclisth[SPECHASH(nvp_rdev)]; 594 loop: 595 simple_lock(&spechash_slock); 596 for (vp = *vpp; vp; vp = vp->v_specnext) { 597 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 598 continue; 599 /* 600 * Alias, but not in use, so flush it out. 601 */ 602 simple_lock(&vp->v_interlock); 603 if (vp->v_usecount == 0) { 604 simple_unlock(&spechash_slock); 605 vgonel(vp, p); 606 goto loop; 607 } 608 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 609 simple_unlock(&spechash_slock); 610 goto loop; 611 } 612 break; 613 } 614 if (vp == NULL || vp->v_tag != VT_NON) { 615 MALLOC(nvp->v_specinfo, struct specinfo *, 616 sizeof(struct specinfo), M_VNODE, M_WAITOK); 617 nvp->v_rdev = nvp_rdev; 618 nvp->v_hashchain = vpp; 619 nvp->v_specnext = *vpp; 620 nvp->v_specflags = 0; 621 simple_unlock(&spechash_slock); 622 *vpp = nvp; 623 if (vp != NULLVP) { 624 nvp->v_flag |= VALIASED; 625 vp->v_flag |= VALIASED; 626 vput(vp); 627 } 628 return (NULLVP); 629 } 630 simple_unlock(&spechash_slock); 631 VOP_UNLOCK(vp, 0, p); 632 simple_lock(&vp->v_interlock); 633 vclean(vp, 0, p); 634 vp->v_op = nvp->v_op; 635 vp->v_tag = nvp->v_tag; 636 nvp->v_type = VNON; 637 insmntque(vp, mp); 638 return (vp); 639 } 640 641 /* 642 * Grab a particular vnode from the free list, increment its 643 * reference count and lock it. The vnode lock bit is set the 644 * vnode is being eliminated in vgone. The process is awakened 645 * when the transition is completed, and an error returned to 646 * indicate that the vnode is no longer usable (possibly having 647 * been changed to a new file system type). 648 */ 649 int 650 vget(vp, flags, p) 651 struct vnode *vp; 652 int flags; 653 struct proc *p; 654 { 655 int error; 656 657 /* 658 * If the vnode is in the process of being cleaned out for 659 * another use, we wait for the cleaning to finish and then 660 * return failure. Cleaning is determined by checking that 661 * the VXLOCK flag is set. 662 */ 663 if ((flags & LK_INTERLOCK) == 0) 664 simple_lock(&vp->v_interlock); 665 if (vp->v_flag & VXLOCK) { 666 vp->v_flag |= VXWANT; 667 simple_unlock(&vp->v_interlock); 668 tsleep((caddr_t)vp, PINOD, "vget", 0); 669 return (ENOENT); 670 } 671 if (vp->v_usecount == 0) { 672 simple_lock(&vnode_free_list_slock); 673 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 674 simple_unlock(&vnode_free_list_slock); 675 } 676 vp->v_usecount++; 677 if (flags & LK_TYPE_MASK) { 678 if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) 679 vrele(vp); 680 return (error); 681 } 682 simple_unlock(&vp->v_interlock); 683 return (0); 684 } 685 686 /* 687 * Stubs to use when there is no locking to be done on the underlying object. 688 * 689 * Getting a lock just clears the interlock if necessary. 690 */ 691 int 692 vop_nolock(ap) 693 struct vop_lock_args /* { 694 struct vnode *a_vp; 695 int a_flags; 696 struct proc *a_p; 697 } */ *ap; 698 { 699 struct vnode *vp = ap->a_vp; 700 701 /* 702 * Since we are not using the lock manager, we must clear 703 * the interlock here. 704 */ 705 if (ap->a_flags & LK_INTERLOCK) 706 simple_unlock(&vp->v_interlock); 707 return (0); 708 } 709 710 /* 711 * Unlock has nothing to do. 712 */ 713 int 714 vop_nounlock(ap) 715 struct vop_unlock_args /* { 716 struct vnode *a_vp; 717 int a_flags; 718 struct proc *a_p; 719 } */ *ap; 720 { 721 722 return (0); 723 } 724 725 /* 726 * Nothing is ever locked. 727 */ 728 int 729 vop_noislocked(ap) 730 struct vop_islocked_args /* { 731 struct vnode *a_vp; 732 } */ *ap; 733 { 734 735 return (0); 736 } 737 738 /* 739 * Vnode reference. 740 */ 741 void 742 vref(vp) 743 struct vnode *vp; 744 { 745 746 simple_lock(&vp->v_interlock); 747 if (vp->v_usecount <= 0) 748 panic("vref used where vget required"); 749 vp->v_usecount++; 750 simple_unlock(&vp->v_interlock); 751 } 752 753 /* 754 * vput(), just unlock and vrele() 755 */ 756 void 757 vput(vp) 758 struct vnode *vp; 759 { 760 struct proc *p = curproc; /* XXX */ 761 762 #ifdef DIGANOSTIC 763 if (vp == NULL) 764 panic("vput: null vp"); 765 #endif 766 simple_lock(&vp->v_interlock); 767 vp->v_usecount--; 768 if (vp->v_usecount > 0) { 769 simple_unlock(&vp->v_interlock); 770 VOP_UNLOCK(vp, 0, p); 771 return; 772 } 773 #ifdef DIAGNOSTIC 774 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 775 vprint("vput: bad ref count", vp); 776 panic("vput: ref cnt"); 777 } 778 #endif 779 /* 780 * insert at tail of LRU list 781 */ 782 simple_lock(&vnode_free_list_slock); 783 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 784 simple_unlock(&vnode_free_list_slock); 785 simple_unlock(&vp->v_interlock); 786 VOP_INACTIVE(vp, p); 787 } 788 789 /* 790 * Vnode release. 791 * If count drops to zero, call inactive routine and return to freelist. 792 */ 793 void 794 vrele(vp) 795 struct vnode *vp; 796 { 797 struct proc *p = curproc; /* XXX */ 798 799 #ifdef DIAGNOSTIC 800 if (vp == NULL) 801 panic("vrele: null vp"); 802 #endif 803 simple_lock(&vp->v_interlock); 804 vp->v_usecount--; 805 if (vp->v_usecount > 0) { 806 simple_unlock(&vp->v_interlock); 807 return; 808 } 809 #ifdef DIAGNOSTIC 810 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 811 vprint("vrele: bad ref count", vp); 812 panic("vrele: ref cnt"); 813 } 814 #endif 815 /* 816 * insert at tail of LRU list 817 */ 818 simple_lock(&vnode_free_list_slock); 819 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 820 simple_unlock(&vnode_free_list_slock); 821 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) 822 VOP_INACTIVE(vp, p); 823 } 824 825 #ifdef DIAGNOSTIC 826 /* 827 * Page or buffer structure gets a reference. 828 */ 829 void 830 vhold(vp) 831 register struct vnode *vp; 832 { 833 834 simple_lock(&vp->v_interlock); 835 vp->v_holdcnt++; 836 simple_unlock(&vp->v_interlock); 837 } 838 839 /* 840 * Page or buffer structure frees a reference. 841 */ 842 void 843 holdrele(vp) 844 register struct vnode *vp; 845 { 846 847 simple_lock(&vp->v_interlock); 848 if (vp->v_holdcnt <= 0) 849 panic("holdrele: holdcnt"); 850 vp->v_holdcnt--; 851 simple_unlock(&vp->v_interlock); 852 } 853 #endif /* DIAGNOSTIC */ 854 855 /* 856 * Remove any vnodes in the vnode table belonging to mount point mp. 857 * 858 * If MNT_NOFORCE is specified, there should not be any active ones, 859 * return error if any are found (nb: this is a user error, not a 860 * system error). If MNT_FORCE is specified, detach any active vnodes 861 * that are found. 862 */ 863 #ifdef DIAGNOSTIC 864 int busyprt = 0; /* print out busy vnodes */ 865 struct ctldebug debug1 = { "busyprt", &busyprt }; 866 #endif 867 868 int 869 vflush(mp, skipvp, flags) 870 struct mount *mp; 871 struct vnode *skipvp; 872 int flags; 873 { 874 struct proc *p = curproc; /* XXX */ 875 struct vnode *vp, *nvp; 876 int busy = 0; 877 878 simple_lock(&mntvnode_slock); 879 loop: 880 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 881 if (vp->v_mount != mp) 882 goto loop; 883 nvp = vp->v_mntvnodes.le_next; 884 /* 885 * Skip over a selected vnode. 886 */ 887 if (vp == skipvp) 888 continue; 889 890 simple_lock(&vp->v_interlock); 891 /* 892 * Skip over a vnodes marked VSYSTEM. 893 */ 894 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 895 simple_unlock(&vp->v_interlock); 896 continue; 897 } 898 /* 899 * If WRITECLOSE is set, only flush out regular file 900 * vnodes open for writing. 901 */ 902 if ((flags & WRITECLOSE) && 903 (vp->v_writecount == 0 || vp->v_type != VREG)) { 904 simple_unlock(&vp->v_interlock); 905 continue; 906 } 907 /* 908 * With v_usecount == 0, all we need to do is clear 909 * out the vnode data structures and we are done. 910 */ 911 if (vp->v_usecount == 0) { 912 simple_unlock(&mntvnode_slock); 913 vgonel(vp, p); 914 simple_lock(&mntvnode_slock); 915 continue; 916 } 917 /* 918 * If FORCECLOSE is set, forcibly close the vnode. 919 * For block or character devices, revert to an 920 * anonymous device. For all other files, just kill them. 921 */ 922 if (flags & FORCECLOSE) { 923 simple_unlock(&mntvnode_slock); 924 if (vp->v_type != VBLK && vp->v_type != VCHR) { 925 vgonel(vp, p); 926 } else { 927 vclean(vp, 0, p); 928 vp->v_op = spec_vnodeop_p; 929 insmntque(vp, (struct mount *)0); 930 } 931 simple_lock(&mntvnode_slock); 932 continue; 933 } 934 #ifdef DIAGNOSTIC 935 if (busyprt) 936 vprint("vflush: busy vnode", vp); 937 #endif 938 simple_unlock(&vp->v_interlock); 939 busy++; 940 } 941 simple_unlock(&mntvnode_slock); 942 if (busy) 943 return (EBUSY); 944 return (0); 945 } 946 947 /* 948 * Disassociate the underlying file system from a vnode. 949 * The vnode interlock is held on entry. 950 */ 951 static void 952 vclean(vp, flags, p) 953 struct vnode *vp; 954 int flags; 955 struct proc *p; 956 { 957 int active; 958 959 /* 960 * Check to see if the vnode is in use. 961 * If so we have to reference it before we clean it out 962 * so that its count cannot fall to zero and generate a 963 * race against ourselves to recycle it. 964 */ 965 if (active = vp->v_usecount) 966 vp->v_usecount++; 967 /* 968 * Prevent the vnode from being recycled or 969 * brought into use while we clean it out. 970 */ 971 if (vp->v_flag & VXLOCK) 972 panic("vclean: deadlock"); 973 vp->v_flag |= VXLOCK; 974 /* 975 * Even if the count is zero, the VOP_INACTIVE routine may still 976 * have the object locked while it cleans it out. The VOP_LOCK 977 * ensures that the VOP_INACTIVE routine is done with its work. 978 * For active vnodes, it ensures that no other activity can 979 * occur while the underlying object is being cleaned out. 980 */ 981 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 982 /* 983 * Clean out any buffers associated with the vnode. 984 */ 985 if (flags & DOCLOSE) 986 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 987 /* 988 * If purging an active vnode, it must be closed and 989 * deactivated before being reclaimed. Note that the 990 * VOP_INACTIVE will unlock the vnode. 991 */ 992 if (active) { 993 if (flags & DOCLOSE) 994 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 995 VOP_INACTIVE(vp, p); 996 } else { 997 /* 998 * Any other processes trying to obtain this lock must first 999 * wait for VXLOCK to clear, then call the new lock operation. 1000 */ 1001 VOP_UNLOCK(vp, 0, p); 1002 } 1003 /* 1004 * Reclaim the vnode. 1005 */ 1006 if (VOP_RECLAIM(vp, p)) 1007 panic("vclean: cannot reclaim"); 1008 if (active) 1009 vrele(vp); 1010 cache_purge(vp); 1011 1012 /* 1013 * Done with purge, notify sleepers of the grim news. 1014 */ 1015 vp->v_op = dead_vnodeop_p; 1016 vp->v_tag = VT_NON; 1017 vp->v_flag &= ~VXLOCK; 1018 if (vp->v_flag & VXWANT) { 1019 vp->v_flag &= ~VXWANT; 1020 wakeup((caddr_t)vp); 1021 } 1022 } 1023 1024 /* 1025 * Eliminate all activity associated with the requested vnode 1026 * and with all vnodes aliased to the requested vnode. 1027 */ 1028 int 1029 vop_revoke(ap) 1030 struct vop_revoke_args /* { 1031 struct vnode *a_vp; 1032 int a_flags; 1033 } */ *ap; 1034 { 1035 struct vnode *vp, *vq; 1036 struct proc *p = curproc; /* XXX */ 1037 1038 #ifdef DIAGNOSTIC 1039 if ((ap->a_flags & REVOKEALL) == 0) 1040 panic("vop_revoke"); 1041 #endif 1042 1043 vp = ap->a_vp; 1044 simple_lock(&vp->v_interlock); 1045 1046 if (vp->v_flag & VALIASED) { 1047 /* 1048 * If a vgone (or vclean) is already in progress, 1049 * wait until it is done and return. 1050 */ 1051 if (vp->v_flag & VXLOCK) { 1052 vp->v_flag |= VXWANT; 1053 simple_unlock(&vp->v_interlock); 1054 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1055 return (0); 1056 } 1057 /* 1058 * Ensure that vp will not be vgone'd while we 1059 * are eliminating its aliases. 1060 */ 1061 vp->v_flag |= VXLOCK; 1062 simple_unlock(&vp->v_interlock); 1063 while (vp->v_flag & VALIASED) { 1064 simple_lock(&spechash_slock); 1065 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1066 if (vq->v_rdev != vp->v_rdev || 1067 vq->v_type != vp->v_type || vp == vq) 1068 continue; 1069 simple_unlock(&spechash_slock); 1070 vgone(vq); 1071 break; 1072 } 1073 if (vq == NULLVP) 1074 simple_unlock(&spechash_slock); 1075 } 1076 /* 1077 * Remove the lock so that vgone below will 1078 * really eliminate the vnode after which time 1079 * vgone will awaken any sleepers. 1080 */ 1081 simple_lock(&vp->v_interlock); 1082 vp->v_flag &= ~VXLOCK; 1083 } 1084 vgonel(vp, p); 1085 return (0); 1086 } 1087 1088 /* 1089 * Recycle an unused vnode to the front of the free list. 1090 * Release the passed interlock if the vnode will be recycled. 1091 */ 1092 int 1093 vrecycle(vp, inter_lkp, p) 1094 struct vnode *vp; 1095 struct simplelock *inter_lkp; 1096 struct proc *p; 1097 { 1098 1099 simple_lock(&vp->v_interlock); 1100 if (vp->v_usecount == 0) { 1101 if (inter_lkp) 1102 simple_unlock(inter_lkp); 1103 vgonel(vp, p); 1104 return (1); 1105 } 1106 simple_unlock(&vp->v_interlock); 1107 return (0); 1108 } 1109 1110 /* 1111 * Eliminate all activity associated with a vnode 1112 * in preparation for reuse. 1113 */ 1114 void 1115 vgone(vp) 1116 struct vnode *vp; 1117 { 1118 struct proc *p = curproc; /* XXX */ 1119 1120 simple_lock(&vp->v_interlock); 1121 vgonel(vp, p); 1122 } 1123 1124 /* 1125 * vgone, with the vp interlock held. 1126 */ 1127 void 1128 vgonel(vp, p) 1129 struct vnode *vp; 1130 struct proc *p; 1131 { 1132 struct vnode *vq; 1133 struct vnode *vx; 1134 1135 /* 1136 * If a vgone (or vclean) is already in progress, 1137 * wait until it is done and return. 1138 */ 1139 if (vp->v_flag & VXLOCK) { 1140 vp->v_flag |= VXWANT; 1141 simple_unlock(&vp->v_interlock); 1142 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1143 return; 1144 } 1145 /* 1146 * Clean out the filesystem specific data. 1147 */ 1148 vclean(vp, DOCLOSE, p); 1149 /* 1150 * Delete from old mount point vnode list, if on one. 1151 */ 1152 if (vp->v_mount != NULL) 1153 insmntque(vp, (struct mount *)0); 1154 /* 1155 * If special device, remove it from special device alias list 1156 * if it is on one. 1157 */ 1158 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1159 simple_lock(&spechash_slock); 1160 if (*vp->v_hashchain == vp) { 1161 *vp->v_hashchain = vp->v_specnext; 1162 } else { 1163 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1164 if (vq->v_specnext != vp) 1165 continue; 1166 vq->v_specnext = vp->v_specnext; 1167 break; 1168 } 1169 if (vq == NULL) 1170 panic("missing bdev"); 1171 } 1172 if (vp->v_flag & VALIASED) { 1173 vx = NULL; 1174 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1175 if (vq->v_rdev != vp->v_rdev || 1176 vq->v_type != vp->v_type) 1177 continue; 1178 if (vx) 1179 break; 1180 vx = vq; 1181 } 1182 if (vx == NULL) 1183 panic("missing alias"); 1184 if (vq == NULL) 1185 vx->v_flag &= ~VALIASED; 1186 vp->v_flag &= ~VALIASED; 1187 } 1188 simple_unlock(&spechash_slock); 1189 FREE(vp->v_specinfo, M_VNODE); 1190 vp->v_specinfo = NULL; 1191 } 1192 /* 1193 * If it is on the freelist and not already at the head, 1194 * move it to the head of the list. The test of the back 1195 * pointer and the reference count of zero is because 1196 * it will be removed from the free list by getnewvnode, 1197 * but will not have its reference count incremented until 1198 * after calling vgone. If the reference count were 1199 * incremented first, vgone would (incorrectly) try to 1200 * close the previous instance of the underlying object. 1201 * So, the back pointer is explicitly set to `0xdeadb' in 1202 * getnewvnode after removing it from the freelist to ensure 1203 * that we do not try to move it here. 1204 */ 1205 if (vp->v_usecount == 0) { 1206 simple_lock(&vnode_free_list_slock); 1207 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1208 vnode_free_list.tqh_first != vp) { 1209 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1210 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1211 } 1212 simple_unlock(&vnode_free_list_slock); 1213 } 1214 vp->v_type = VBAD; 1215 } 1216 1217 /* 1218 * Lookup a vnode by device number. 1219 */ 1220 int 1221 vfinddev(dev, type, vpp) 1222 dev_t dev; 1223 enum vtype type; 1224 struct vnode **vpp; 1225 { 1226 struct vnode *vp; 1227 int rc = 0; 1228 1229 simple_lock(&spechash_slock); 1230 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1231 if (dev != vp->v_rdev || type != vp->v_type) 1232 continue; 1233 *vpp = vp; 1234 rc = 1; 1235 break; 1236 } 1237 simple_unlock(&spechash_slock); 1238 return (rc); 1239 } 1240 1241 /* 1242 * Calculate the total number of references to a special device. 1243 */ 1244 int 1245 vcount(vp) 1246 struct vnode *vp; 1247 { 1248 struct vnode *vq, *vnext; 1249 int count; 1250 1251 loop: 1252 if ((vp->v_flag & VALIASED) == 0) 1253 return (vp->v_usecount); 1254 simple_lock(&spechash_slock); 1255 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1256 vnext = vq->v_specnext; 1257 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1258 continue; 1259 /* 1260 * Alias, but not in use, so flush it out. 1261 */ 1262 if (vq->v_usecount == 0 && vq != vp) { 1263 simple_unlock(&spechash_slock); 1264 vgone(vq); 1265 goto loop; 1266 } 1267 count += vq->v_usecount; 1268 } 1269 simple_unlock(&spechash_slock); 1270 return (count); 1271 } 1272 1273 /* 1274 * Print out a description of a vnode. 1275 */ 1276 static char *typename[] = 1277 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1278 1279 void 1280 vprint(label, vp) 1281 char *label; 1282 register struct vnode *vp; 1283 { 1284 char buf[64]; 1285 1286 if (label != NULL) 1287 printf("%s: ", label); 1288 printf("type %s, usecount %d, writecount %d, refcount %d,", 1289 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1290 vp->v_holdcnt); 1291 buf[0] = '\0'; 1292 if (vp->v_flag & VROOT) 1293 strcat(buf, "|VROOT"); 1294 if (vp->v_flag & VTEXT) 1295 strcat(buf, "|VTEXT"); 1296 if (vp->v_flag & VSYSTEM) 1297 strcat(buf, "|VSYSTEM"); 1298 if (vp->v_flag & VXLOCK) 1299 strcat(buf, "|VXLOCK"); 1300 if (vp->v_flag & VXWANT) 1301 strcat(buf, "|VXWANT"); 1302 if (vp->v_flag & VBWAIT) 1303 strcat(buf, "|VBWAIT"); 1304 if (vp->v_flag & VALIASED) 1305 strcat(buf, "|VALIASED"); 1306 if (buf[0] != '\0') 1307 printf(" flags (%s)", &buf[1]); 1308 if (vp->v_data == NULL) { 1309 printf("\n"); 1310 } else { 1311 printf("\n\t"); 1312 VOP_PRINT(vp); 1313 } 1314 } 1315 1316 #ifdef DEBUG 1317 /* 1318 * List all of the locked vnodes in the system. 1319 * Called when debugging the kernel. 1320 */ 1321 void 1322 printlockedvnodes() 1323 { 1324 struct proc *p = curproc; /* XXX */ 1325 struct mount *mp, *nmp; 1326 struct vnode *vp; 1327 1328 printf("Locked vnodes\n"); 1329 simple_lock(&mountlist_slock); 1330 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1331 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1332 nmp = mp->mnt_list.cqe_next; 1333 continue; 1334 } 1335 for (vp = mp->mnt_vnodelist.lh_first; 1336 vp != NULL; 1337 vp = vp->v_mntvnodes.le_next) { 1338 if (VOP_ISLOCKED(vp)) 1339 vprint((char *)0, vp); 1340 } 1341 simple_lock(&mountlist_slock); 1342 nmp = mp->mnt_list.cqe_next; 1343 vfs_unbusy(mp, p); 1344 } 1345 simple_unlock(&mountlist_slock); 1346 } 1347 #endif 1348 1349 /* 1350 * Top level filesystem related information gathering. 1351 */ 1352 int 1353 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1354 int *name; 1355 u_int namelen; 1356 void *oldp; 1357 size_t *oldlenp; 1358 void *newp; 1359 size_t newlen; 1360 struct proc *p; 1361 { 1362 struct ctldebug *cdp; 1363 struct vfsconf *vfsp; 1364 1365 /* all sysctl names at this level are at least name and field */ 1366 if (namelen < 2) 1367 return (ENOTDIR); /* overloaded */ 1368 if (name[0] != VFS_GENERIC) { 1369 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1370 if (vfsp->vfc_typenum == name[0]) 1371 break; 1372 if (vfsp == NULL) 1373 return (EOPNOTSUPP); 1374 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1375 oldp, oldlenp, newp, newlen, p)); 1376 } 1377 switch (name[1]) { 1378 case VFS_MAXTYPENUM: 1379 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); 1380 case VFS_CONF: 1381 if (namelen < 3) 1382 return (ENOTDIR); /* overloaded */ 1383 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1384 if (vfsp->vfc_typenum == name[2]) 1385 break; 1386 if (vfsp == NULL) 1387 return (EOPNOTSUPP); 1388 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, 1389 sizeof(struct vfsconf))); 1390 } 1391 return (EOPNOTSUPP); 1392 } 1393 1394 int kinfo_vdebug = 1; 1395 int kinfo_vgetfailed; 1396 #define KINFO_VNODESLOP 10 1397 /* 1398 * Dump vnode list (via sysctl). 1399 * Copyout address of vnode followed by vnode. 1400 */ 1401 /* ARGSUSED */ 1402 int 1403 sysctl_vnode(where, sizep, p) 1404 char *where; 1405 size_t *sizep; 1406 struct proc *p; 1407 { 1408 struct mount *mp, *nmp; 1409 struct vnode *nvp, *vp; 1410 char *bp = where, *savebp; 1411 char *ewhere; 1412 int error; 1413 1414 #define VPTRSZ sizeof (struct vnode *) 1415 #define VNODESZ sizeof (struct vnode) 1416 if (where == NULL) { 1417 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1418 return (0); 1419 } 1420 ewhere = where + *sizep; 1421 1422 simple_lock(&mountlist_slock); 1423 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1424 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock, p)) { 1425 nmp = mp->mnt_list.cqe_next; 1426 continue; 1427 } 1428 savebp = bp; 1429 again: 1430 simple_lock(&mntvnode_slock); 1431 for (vp = mp->mnt_vnodelist.lh_first; 1432 vp != NULL; 1433 vp = nvp) { 1434 /* 1435 * Check that the vp is still associated with 1436 * this filesystem. RACE: could have been 1437 * recycled onto the same filesystem. 1438 */ 1439 if (vp->v_mount != mp) { 1440 simple_unlock(&mntvnode_slock); 1441 if (kinfo_vdebug) 1442 printf("kinfo: vp changed\n"); 1443 bp = savebp; 1444 goto again; 1445 } 1446 nvp = vp->v_mntvnodes.le_next; 1447 if (bp + VPTRSZ + VNODESZ > ewhere) { 1448 simple_unlock(&mntvnode_slock); 1449 *sizep = bp - where; 1450 return (ENOMEM); 1451 } 1452 simple_unlock(&mntvnode_slock); 1453 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1454 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1455 return (error); 1456 bp += VPTRSZ + VNODESZ; 1457 simple_lock(&mntvnode_slock); 1458 } 1459 simple_unlock(&mntvnode_slock); 1460 simple_lock(&mountlist_slock); 1461 nmp = mp->mnt_list.cqe_next; 1462 vfs_unbusy(mp, p); 1463 } 1464 simple_unlock(&mountlist_slock); 1465 1466 *sizep = bp - where; 1467 return (0); 1468 } 1469 1470 /* 1471 * Check to see if a filesystem is mounted on a block device. 1472 */ 1473 int 1474 vfs_mountedon(vp) 1475 struct vnode *vp; 1476 { 1477 struct vnode *vq; 1478 int error = 0; 1479 1480 if (vp->v_specflags & SI_MOUNTEDON) 1481 return (EBUSY); 1482 if (vp->v_flag & VALIASED) { 1483 simple_lock(&spechash_slock); 1484 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1485 if (vq->v_rdev != vp->v_rdev || 1486 vq->v_type != vp->v_type) 1487 continue; 1488 if (vq->v_specflags & SI_MOUNTEDON) { 1489 error = EBUSY; 1490 break; 1491 } 1492 } 1493 simple_unlock(&spechash_slock); 1494 } 1495 return (error); 1496 } 1497 1498 /* 1499 * Unmount all filesystems. The list is traversed in reverse order 1500 * of mounting to avoid dependencies. 1501 */ 1502 void 1503 vfs_unmountall() 1504 { 1505 struct mount *mp, *nmp; 1506 struct proc *p = curproc; /* XXX */ 1507 1508 /* 1509 * Since this only runs when rebooting, it is not interlocked. 1510 */ 1511 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1512 nmp = mp->mnt_list.cqe_prev; 1513 (void) dounmount(mp, MNT_FORCE, p); 1514 } 1515 } 1516 1517 /* 1518 * Build hash lists of net addresses and hang them off the mount point. 1519 * Called by ufs_mount() to set up the lists of export addresses. 1520 */ 1521 static int 1522 vfs_hang_addrlist(mp, nep, argp) 1523 struct mount *mp; 1524 struct netexport *nep; 1525 struct export_args *argp; 1526 { 1527 register struct netcred *np; 1528 register struct radix_node_head *rnh; 1529 register int i; 1530 struct radix_node *rn; 1531 struct sockaddr *saddr, *smask = 0; 1532 struct domain *dom; 1533 int error; 1534 1535 if (argp->ex_addrlen == 0) { 1536 if (mp->mnt_flag & MNT_DEFEXPORTED) 1537 return (EPERM); 1538 np = &nep->ne_defexported; 1539 np->netc_exflags = argp->ex_flags; 1540 np->netc_anon = argp->ex_anon; 1541 np->netc_anon.cr_ref = 1; 1542 mp->mnt_flag |= MNT_DEFEXPORTED; 1543 return (0); 1544 } 1545 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1546 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1547 bzero((caddr_t)np, i); 1548 saddr = (struct sockaddr *)(np + 1); 1549 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 1550 goto out; 1551 if (saddr->sa_len > argp->ex_addrlen) 1552 saddr->sa_len = argp->ex_addrlen; 1553 if (argp->ex_masklen) { 1554 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1555 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1556 if (error) 1557 goto out; 1558 if (smask->sa_len > argp->ex_masklen) 1559 smask->sa_len = argp->ex_masklen; 1560 } 1561 i = saddr->sa_family; 1562 if ((rnh = nep->ne_rtable[i]) == 0) { 1563 /* 1564 * Seems silly to initialize every AF when most are not 1565 * used, do so on demand here 1566 */ 1567 for (dom = domains; dom; dom = dom->dom_next) 1568 if (dom->dom_family == i && dom->dom_rtattach) { 1569 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1570 dom->dom_rtoffset); 1571 break; 1572 } 1573 if ((rnh = nep->ne_rtable[i]) == 0) { 1574 error = ENOBUFS; 1575 goto out; 1576 } 1577 } 1578 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1579 np->netc_rnodes); 1580 if (rn == 0) { 1581 /* 1582 * One of the reasons that rnh_addaddr may fail is that 1583 * the entry already exists. To check for this case, we 1584 * look up the entry to see if it is there. If so, we 1585 * do not need to make a new entry but do return success. 1586 */ 1587 free(np, M_NETADDR); 1588 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); 1589 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 && 1590 ((struct netcred *)rn)->netc_exflags == argp->ex_flags && 1591 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon, 1592 (caddr_t)&argp->ex_anon, sizeof(struct ucred))) 1593 return (0); 1594 return (EPERM); 1595 } 1596 np->netc_exflags = argp->ex_flags; 1597 np->netc_anon = argp->ex_anon; 1598 np->netc_anon.cr_ref = 1; 1599 return (0); 1600 out: 1601 free(np, M_NETADDR); 1602 return (error); 1603 } 1604 1605 /* ARGSUSED */ 1606 static int 1607 vfs_free_netcred(rn, w) 1608 struct radix_node *rn; 1609 caddr_t w; 1610 { 1611 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1612 1613 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1614 free((caddr_t)rn, M_NETADDR); 1615 return (0); 1616 } 1617 1618 /* 1619 * Free the net address hash lists that are hanging off the mount points. 1620 */ 1621 static void 1622 vfs_free_addrlist(nep) 1623 struct netexport *nep; 1624 { 1625 register int i; 1626 register struct radix_node_head *rnh; 1627 1628 for (i = 0; i <= AF_MAX; i++) 1629 if (rnh = nep->ne_rtable[i]) { 1630 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1631 (caddr_t)rnh); 1632 free((caddr_t)rnh, M_RTABLE); 1633 nep->ne_rtable[i] = 0; 1634 } 1635 } 1636 1637 int 1638 vfs_export(mp, nep, argp) 1639 struct mount *mp; 1640 struct netexport *nep; 1641 struct export_args *argp; 1642 { 1643 int error; 1644 1645 if (argp->ex_flags & MNT_DELEXPORT) { 1646 vfs_free_addrlist(nep); 1647 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1648 } 1649 if (argp->ex_flags & MNT_EXPORTED) { 1650 if (error = vfs_hang_addrlist(mp, nep, argp)) 1651 return (error); 1652 mp->mnt_flag |= MNT_EXPORTED; 1653 } 1654 return (0); 1655 } 1656 1657 struct netcred * 1658 vfs_export_lookup(mp, nep, nam) 1659 register struct mount *mp; 1660 struct netexport *nep; 1661 struct mbuf *nam; 1662 { 1663 register struct netcred *np; 1664 register struct radix_node_head *rnh; 1665 struct sockaddr *saddr; 1666 1667 np = NULL; 1668 if (mp->mnt_flag & MNT_EXPORTED) { 1669 /* 1670 * Lookup in the export list first. 1671 */ 1672 if (nam != NULL) { 1673 saddr = mtod(nam, struct sockaddr *); 1674 rnh = nep->ne_rtable[saddr->sa_family]; 1675 if (rnh != NULL) { 1676 np = (struct netcred *) 1677 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1678 rnh); 1679 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1680 np = NULL; 1681 } 1682 } 1683 /* 1684 * If no address match, use the default if it exists. 1685 */ 1686 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1687 np = &nep->ne_defexported; 1688 } 1689 return (np); 1690 } 1691