1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * %sccs.include.redist.c% 11 * 12 * @(#)vfs_subr.c 8.24 (Berkeley) 05/14/95 13 */ 14 15 /* 16 * External virtual filesystem routines 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/proc.h> 22 #include <sys/mount.h> 23 #include <sys/time.h> 24 #include <sys/vnode.h> 25 #include <sys/stat.h> 26 #include <sys/namei.h> 27 #include <sys/ucred.h> 28 #include <sys/buf.h> 29 #include <sys/errno.h> 30 #include <sys/malloc.h> 31 #include <sys/domain.h> 32 #include <sys/mbuf.h> 33 34 #include <vm/vm.h> 35 #include <sys/sysctl.h> 36 37 #include <miscfs/specfs/specdev.h> 38 39 enum vtype iftovt_tab[16] = { 40 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 41 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 42 }; 43 int vttoif_tab[9] = { 44 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 45 S_IFSOCK, S_IFIFO, S_IFMT, 46 }; 47 48 /* 49 * Insq/Remq for the vnode usage lists. 50 */ 51 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 52 #define bufremvn(bp) { \ 53 LIST_REMOVE(bp, b_vnbufs); \ 54 (bp)->b_vnbufs.le_next = NOLIST; \ 55 } 56 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 57 struct mntlist mountlist; /* mounted filesystem list */ 58 static struct simplelock mntid_slock; 59 struct simplelock mntvnode_slock; 60 static struct simplelock spechash_slock; 61 static struct simplelock vnode_free_list_slock; 62 63 /* 64 * Initialize the vnode management data structures. 65 */ 66 void 67 vntblinit() 68 { 69 70 simple_lock_init(&mntvnode_slock); 71 simple_lock_init(&mntid_slock); 72 simple_lock_init(&spechash_slock); 73 TAILQ_INIT(&vnode_free_list); 74 simple_lock_init(&vnode_free_list_slock); 75 CIRCLEQ_INIT(&mountlist); 76 } 77 78 /* 79 * Lock a filesystem. 80 * Used to prevent access to it while mounting and unmounting. 81 */ 82 int 83 vfs_lock(mp) 84 register struct mount *mp; 85 { 86 87 while (mp->mnt_flag & MNT_MLOCK) { 88 mp->mnt_flag |= MNT_MWAIT; 89 tsleep((caddr_t)mp, PVFS, "vfslock", 0); 90 } 91 mp->mnt_flag |= MNT_MLOCK; 92 return (0); 93 } 94 95 /* 96 * Unlock a locked filesystem. 97 * Panic if filesystem is not locked. 98 */ 99 void 100 vfs_unlock(mp) 101 register struct mount *mp; 102 { 103 104 if ((mp->mnt_flag & MNT_MLOCK) == 0) 105 panic("vfs_unlock: not locked"); 106 mp->mnt_flag &= ~MNT_MLOCK; 107 if (mp->mnt_flag & MNT_MWAIT) { 108 mp->mnt_flag &= ~MNT_MWAIT; 109 wakeup((caddr_t)mp); 110 } 111 } 112 113 /* 114 * Mark a mount point as busy. 115 * Used to synchronize access and to delay unmounting. 116 */ 117 int 118 vfs_busy(mp) 119 register struct mount *mp; 120 { 121 122 while (mp->mnt_flag & MNT_MPBUSY) { 123 mp->mnt_flag |= MNT_MPWANT; 124 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); 125 } 126 if (mp->mnt_flag & MNT_UNMOUNT) 127 return (1); 128 mp->mnt_flag |= MNT_MPBUSY; 129 return (0); 130 } 131 132 /* 133 * Free a busy filesystem. 134 * Panic if filesystem is not busy. 135 */ 136 void 137 vfs_unbusy(mp) 138 register struct mount *mp; 139 { 140 141 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 142 panic("vfs_unbusy: not busy"); 143 mp->mnt_flag &= ~MNT_MPBUSY; 144 if (mp->mnt_flag & MNT_MPWANT) { 145 mp->mnt_flag &= ~MNT_MPWANT; 146 wakeup((caddr_t)&mp->mnt_flag); 147 } 148 } 149 150 /* 151 * Lookup a filesystem type, and if found allocate and initialize 152 * a mount structure for it. 153 * 154 * Devname is usually updated by mount(8) after booting. 155 */ 156 int 157 vfs_rootmountalloc(fstypename, devname, mpp) 158 char *fstypename; 159 char *devname; 160 struct mount **mpp; 161 { 162 struct vfsconf *vfsp; 163 struct mount *mp; 164 165 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 166 if (!strcmp(vfsp->vfc_name, fstypename)) 167 break; 168 if (vfsp == NULL) 169 return (ENODEV); 170 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 171 bzero((char *)mp, (u_long)sizeof(struct mount)); 172 LIST_INIT(&mp->mnt_vnodelist); 173 mp->mnt_vfc = vfsp; 174 mp->mnt_op = vfsp->vfc_vfsops; 175 mp->mnt_flag = MNT_RDONLY; 176 mp->mnt_vnodecovered = NULLVP; 177 vfsp->vfc_refcount++; 178 mp->mnt_stat.f_type = vfsp->vfc_typenum; 179 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 180 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 181 mp->mnt_stat.f_mntonname[0] = '/'; 182 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 183 *mpp = mp; 184 return (0); 185 } 186 187 /* 188 * Find an appropriate filesystem to use for the root. If a filesystem 189 * has not been preselected, walk through the list of known filesystems 190 * trying those that have mountroot routines, and try them until one 191 * works or we have tried them all. 192 */ 193 int 194 vfs_mountroot() 195 { 196 struct vfsconf *vfsp; 197 extern int (*mountroot)(void); 198 int error; 199 200 if (mountroot != NULL) 201 return ((*vfsp->vfc_mountroot)()); 202 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 203 if (vfsp->vfc_mountroot == NULL) 204 continue; 205 if ((error = (*vfsp->vfc_mountroot)()) == 0) 206 return (0); 207 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 208 } 209 return (ENODEV); 210 } 211 212 /* 213 * Lookup a mount point by filesystem identifier. 214 */ 215 struct mount * 216 vfs_getvfs(fsid) 217 fsid_t *fsid; 218 { 219 register struct mount *mp; 220 221 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 222 mp = mp->mnt_list.cqe_next) { 223 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 224 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 225 return (mp); 226 } 227 return ((struct mount *)0); 228 } 229 230 /* 231 * Get a new unique fsid 232 */ 233 void 234 vfs_getnewfsid(mp) 235 struct mount *mp; 236 { 237 static u_short xxxfs_mntid; 238 239 fsid_t tfsid; 240 int mtype; 241 242 simple_lock(&mntid_slock); 243 mtype = mp->mnt_vfc->vfc_typenum; 244 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 245 mp->mnt_stat.f_fsid.val[1] = mtype; 246 if (xxxfs_mntid == 0) 247 ++xxxfs_mntid; 248 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 249 tfsid.val[1] = mtype; 250 if (mountlist.cqh_first != (void *)&mountlist) { 251 while (vfs_getvfs(&tfsid)) { 252 tfsid.val[0]++; 253 xxxfs_mntid++; 254 } 255 } 256 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 257 simple_unlock(&mntid_slock); 258 } 259 260 /* 261 * Set vnode attributes to VNOVAL 262 */ 263 void 264 vattr_null(vap) 265 register struct vattr *vap; 266 { 267 268 vap->va_type = VNON; 269 vap->va_size = vap->va_bytes = VNOVAL; 270 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 271 vap->va_fsid = vap->va_fileid = 272 vap->va_blocksize = vap->va_rdev = 273 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 274 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 275 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 276 vap->va_flags = vap->va_gen = VNOVAL; 277 vap->va_vaflags = 0; 278 } 279 280 /* 281 * Routines having to do with the management of the vnode table. 282 */ 283 extern int (**dead_vnodeop_p)(); 284 static void vclean __P((struct vnode *vp, int flag, struct proc *p)); 285 extern void vgonel __P((struct vnode *vp, struct proc *p)); 286 long numvnodes; 287 extern struct vattr va_null; 288 289 /* 290 * Return the next vnode from the free list. 291 */ 292 int 293 getnewvnode(tag, mp, vops, vpp) 294 enum vtagtype tag; 295 struct mount *mp; 296 int (**vops)(); 297 struct vnode **vpp; 298 { 299 struct proc *p = curproc; /* XXX */ 300 struct vnode *vp; 301 int s; 302 int cnt; 303 304 top: 305 simple_lock(&vnode_free_list_slock); 306 if ((vnode_free_list.tqh_first == NULL && 307 numvnodes < 2 * desiredvnodes) || 308 numvnodes < desiredvnodes) { 309 simple_unlock(&vnode_free_list_slock); 310 vp = (struct vnode *)malloc((u_long)sizeof *vp, 311 M_VNODE, M_WAITOK); 312 bzero((char *)vp, sizeof *vp); 313 numvnodes++; 314 } else { 315 for (vp = vnode_free_list.tqh_first; 316 vp != NULLVP; vp = vp->v_freelist.tqe_next) { 317 if (simple_lock_try(&vp->v_interlock)) 318 break; 319 } 320 /* 321 * Unless this is a bad time of the month, at most 322 * the first NCPUS items on the free list are 323 * locked, so this is close enough to being empty. 324 */ 325 if (vp == NULLVP) { 326 simple_unlock(&vnode_free_list_slock); 327 tablefull("vnode"); 328 *vpp = 0; 329 return (ENFILE); 330 } 331 if (vp->v_usecount) 332 panic("free vnode isn't"); 333 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 334 /* see comment on why 0xdeadb is set at end of vgone (below) */ 335 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 336 simple_unlock(&vnode_free_list_slock); 337 vp->v_lease = NULL; 338 if (vp->v_type != VBAD) 339 vgonel(vp, p); 340 else 341 simple_unlock(&vp->v_interlock); 342 #ifdef DIAGNOSTIC 343 if (vp->v_data) 344 panic("cleaned vnode isn't"); 345 s = splbio(); 346 if (vp->v_numoutput) 347 panic("Clean vnode has pending I/O's"); 348 splx(s); 349 #endif 350 vp->v_flag = 0; 351 vp->v_lastr = 0; 352 vp->v_ralen = 0; 353 vp->v_maxra = 0; 354 vp->v_lastw = 0; 355 vp->v_lasta = 0; 356 vp->v_cstart = 0; 357 vp->v_clen = 0; 358 vp->v_socket = 0; 359 } 360 vp->v_type = VNON; 361 cache_purge(vp); 362 vp->v_tag = tag; 363 vp->v_op = vops; 364 insmntque(vp, mp); 365 *vpp = vp; 366 vp->v_usecount = 1; 367 vp->v_data = 0; 368 return (0); 369 } 370 371 /* 372 * Move a vnode from one mount queue to another. 373 */ 374 void 375 insmntque(vp, mp) 376 struct vnode *vp; 377 struct mount *mp; 378 { 379 380 simple_lock(&mntvnode_slock); 381 /* 382 * Delete from old mount point vnode list, if on one. 383 */ 384 if (vp->v_mount != NULL) 385 LIST_REMOVE(vp, v_mntvnodes); 386 /* 387 * Insert into list of vnodes for the new mount point, if available. 388 */ 389 if ((vp->v_mount = mp) != NULL) 390 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 391 simple_unlock(&mntvnode_slock); 392 } 393 394 /* 395 * Update outstanding I/O count and do wakeup if requested. 396 */ 397 void 398 vwakeup(bp) 399 register struct buf *bp; 400 { 401 register struct vnode *vp; 402 403 bp->b_flags &= ~B_WRITEINPROG; 404 if (vp = bp->b_vp) { 405 if (--vp->v_numoutput < 0) 406 panic("vwakeup: neg numoutput"); 407 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 408 if (vp->v_numoutput < 0) 409 panic("vwakeup: neg numoutput 2"); 410 vp->v_flag &= ~VBWAIT; 411 wakeup((caddr_t)&vp->v_numoutput); 412 } 413 } 414 } 415 416 /* 417 * Flush out and invalidate all buffers associated with a vnode. 418 * Called with the underlying object locked. 419 */ 420 int 421 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 422 register struct vnode *vp; 423 int flags; 424 struct ucred *cred; 425 struct proc *p; 426 int slpflag, slptimeo; 427 { 428 register struct buf *bp; 429 struct buf *nbp, *blist; 430 int s, error; 431 432 if (flags & V_SAVE) { 433 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 434 return (error); 435 if (vp->v_dirtyblkhd.lh_first != NULL) 436 panic("vinvalbuf: dirty bufs"); 437 } 438 for (;;) { 439 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 440 while (blist && blist->b_lblkno < 0) 441 blist = blist->b_vnbufs.le_next; 442 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 443 (flags & V_SAVEMETA)) 444 while (blist && blist->b_lblkno < 0) 445 blist = blist->b_vnbufs.le_next; 446 if (!blist) 447 break; 448 449 for (bp = blist; bp; bp = nbp) { 450 nbp = bp->b_vnbufs.le_next; 451 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 452 continue; 453 s = splbio(); 454 if (bp->b_flags & B_BUSY) { 455 bp->b_flags |= B_WANTED; 456 error = tsleep((caddr_t)bp, 457 slpflag | (PRIBIO + 1), "vinvalbuf", 458 slptimeo); 459 splx(s); 460 if (error) 461 return (error); 462 break; 463 } 464 bremfree(bp); 465 bp->b_flags |= B_BUSY; 466 splx(s); 467 /* 468 * XXX Since there are no node locks for NFS, I believe 469 * there is a slight chance that a delayed write will 470 * occur while sleeping just above, so check for it. 471 */ 472 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 473 (void) VOP_BWRITE(bp); 474 break; 475 } 476 bp->b_flags |= B_INVAL; 477 brelse(bp); 478 } 479 } 480 if (!(flags & V_SAVEMETA) && 481 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 482 panic("vinvalbuf: flush failed"); 483 return (0); 484 } 485 486 /* 487 * Associate a buffer with a vnode. 488 */ 489 void 490 bgetvp(vp, bp) 491 register struct vnode *vp; 492 register struct buf *bp; 493 { 494 495 if (bp->b_vp) 496 panic("bgetvp: not free"); 497 VHOLD(vp); 498 bp->b_vp = vp; 499 if (vp->v_type == VBLK || vp->v_type == VCHR) 500 bp->b_dev = vp->v_rdev; 501 else 502 bp->b_dev = NODEV; 503 /* 504 * Insert onto list for new vnode. 505 */ 506 bufinsvn(bp, &vp->v_cleanblkhd); 507 } 508 509 /* 510 * Disassociate a buffer from a vnode. 511 */ 512 void 513 brelvp(bp) 514 register struct buf *bp; 515 { 516 struct vnode *vp; 517 518 if (bp->b_vp == (struct vnode *) 0) 519 panic("brelvp: NULL"); 520 /* 521 * Delete from old vnode list, if on one. 522 */ 523 if (bp->b_vnbufs.le_next != NOLIST) 524 bufremvn(bp); 525 vp = bp->b_vp; 526 bp->b_vp = (struct vnode *) 0; 527 HOLDRELE(vp); 528 } 529 530 /* 531 * Reassign a buffer from one vnode to another. 532 * Used to assign file specific control information 533 * (indirect blocks) to the vnode to which they belong. 534 */ 535 void 536 reassignbuf(bp, newvp) 537 register struct buf *bp; 538 register struct vnode *newvp; 539 { 540 register struct buflists *listheadp; 541 542 if (newvp == NULL) { 543 printf("reassignbuf: NULL"); 544 return; 545 } 546 /* 547 * Delete from old vnode list, if on one. 548 */ 549 if (bp->b_vnbufs.le_next != NOLIST) 550 bufremvn(bp); 551 /* 552 * If dirty, put on list of dirty buffers; 553 * otherwise insert onto list of clean buffers. 554 */ 555 if (bp->b_flags & B_DELWRI) 556 listheadp = &newvp->v_dirtyblkhd; 557 else 558 listheadp = &newvp->v_cleanblkhd; 559 bufinsvn(bp, listheadp); 560 } 561 562 /* 563 * Create a vnode for a block device. 564 * Used for root filesystem, argdev, and swap areas. 565 * Also used for memory file system special devices. 566 */ 567 int 568 bdevvp(dev, vpp) 569 dev_t dev; 570 struct vnode **vpp; 571 { 572 register struct vnode *vp; 573 struct vnode *nvp; 574 int error; 575 576 if (dev == NODEV) { 577 *vpp = NULLVP; 578 return (ENODEV); 579 } 580 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 581 if (error) { 582 *vpp = NULLVP; 583 return (error); 584 } 585 vp = nvp; 586 vp->v_type = VBLK; 587 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 588 vput(vp); 589 vp = nvp; 590 } 591 *vpp = vp; 592 return (0); 593 } 594 595 /* 596 * Check to see if the new vnode represents a special device 597 * for which we already have a vnode (either because of 598 * bdevvp() or because of a different vnode representing 599 * the same block device). If such an alias exists, deallocate 600 * the existing contents and return the aliased vnode. The 601 * caller is responsible for filling it with its new contents. 602 */ 603 struct vnode * 604 checkalias(nvp, nvp_rdev, mp) 605 register struct vnode *nvp; 606 dev_t nvp_rdev; 607 struct mount *mp; 608 { 609 struct proc *p = curproc; /* XXX */ 610 struct vnode *vp; 611 struct vnode **vpp; 612 613 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 614 return (NULLVP); 615 616 vpp = &speclisth[SPECHASH(nvp_rdev)]; 617 loop: 618 simple_lock(&spechash_slock); 619 for (vp = *vpp; vp; vp = vp->v_specnext) { 620 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 621 continue; 622 /* 623 * Alias, but not in use, so flush it out. 624 */ 625 simple_lock(&vp->v_interlock); 626 if (vp->v_usecount == 0) { 627 simple_unlock(&spechash_slock); 628 vgonel(vp, p); 629 goto loop; 630 } 631 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) { 632 simple_unlock(&spechash_slock); 633 goto loop; 634 } 635 break; 636 } 637 if (vp == NULL || vp->v_tag != VT_NON) { 638 MALLOC(nvp->v_specinfo, struct specinfo *, 639 sizeof(struct specinfo), M_VNODE, M_WAITOK); 640 nvp->v_rdev = nvp_rdev; 641 nvp->v_hashchain = vpp; 642 nvp->v_specnext = *vpp; 643 nvp->v_specflags = 0; 644 simple_unlock(&spechash_slock); 645 *vpp = nvp; 646 if (vp != NULLVP) { 647 nvp->v_flag |= VALIASED; 648 vp->v_flag |= VALIASED; 649 vput(vp); 650 } 651 return (NULLVP); 652 } 653 simple_unlock(&spechash_slock); 654 VOP_UNLOCK(vp, 0, p); 655 simple_lock(&vp->v_interlock); 656 vclean(vp, 0, p); 657 vp->v_op = nvp->v_op; 658 vp->v_tag = nvp->v_tag; 659 nvp->v_type = VNON; 660 insmntque(vp, mp); 661 return (vp); 662 } 663 664 /* 665 * Grab a particular vnode from the free list, increment its 666 * reference count and lock it. The vnode lock bit is set the 667 * vnode is being eliminated in vgone. The process is awakened 668 * when the transition is completed, and an error returned to 669 * indicate that the vnode is no longer usable (possibly having 670 * been changed to a new file system type). 671 */ 672 int 673 vget(vp, flags, p) 674 struct vnode *vp; 675 int flags; 676 struct proc *p; 677 { 678 679 /* 680 * If the vnode is in the process of being cleaned out for 681 * another use, we wait for the cleaning to finish and then 682 * return failure. Cleaning is determined by checking that 683 * the VXLOCK flag is set. 684 */ 685 if ((flags & LK_INTERLOCK) == 0) 686 simple_lock(&vp->v_interlock); 687 if (vp->v_flag & VXLOCK) { 688 vp->v_flag |= VXWANT; 689 simple_unlock(&vp->v_interlock); 690 tsleep((caddr_t)vp, PINOD, "vget", 0); 691 return (ENOENT); 692 } 693 if (vp->v_usecount == 0) { 694 simple_lock(&vnode_free_list_slock); 695 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 696 simple_unlock(&vnode_free_list_slock); 697 } 698 vp->v_usecount++; 699 if (flags & LK_TYPE_MASK) 700 return (vn_lock(vp, flags | LK_INTERLOCK, p)); 701 simple_unlock(&vp->v_interlock); 702 return (0); 703 } 704 705 /* 706 * Stubs to use when there is no locking to be done on the underlying object. 707 * 708 * Getting a lock just clears the interlock if necessary. 709 */ 710 int 711 vop_nolock(ap) 712 struct vop_lock_args /* { 713 struct vnode *a_vp; 714 int a_flags; 715 struct proc *a_p; 716 } */ *ap; 717 { 718 struct vnode *vp = ap->a_vp; 719 720 /* 721 * Since we are not using the lock manager, we must clear 722 * the interlock here. 723 */ 724 if (ap->a_flags & LK_INTERLOCK) 725 simple_unlock(&vp->v_interlock); 726 return (0); 727 } 728 729 /* 730 * Unlock has nothing to do. 731 */ 732 int 733 vop_nounlock(ap) 734 struct vop_unlock_args /* { 735 struct vnode *a_vp; 736 int a_flags; 737 struct proc *a_p; 738 } */ *ap; 739 { 740 741 return (0); 742 } 743 744 /* 745 * Nothing is ever locked. 746 */ 747 int 748 vop_noislocked(ap) 749 struct vop_islocked_args /* { 750 struct vnode *a_vp; 751 } */ *ap; 752 { 753 754 return (0); 755 } 756 757 /* 758 * Vnode reference. 759 */ 760 void 761 vref(vp) 762 struct vnode *vp; 763 { 764 765 simple_lock(&vp->v_interlock); 766 if (vp->v_usecount <= 0) 767 panic("vref used where vget required"); 768 vp->v_usecount++; 769 simple_unlock(&vp->v_interlock); 770 } 771 772 /* 773 * vput(), just unlock and vrele() 774 */ 775 void 776 vput(vp) 777 struct vnode *vp; 778 { 779 struct proc *p = curproc; /* XXX */ 780 781 VOP_UNLOCK(vp, 0, p); 782 vrele(vp); 783 } 784 785 /* 786 * Vnode release. 787 * If count drops to zero, call inactive routine and return to freelist. 788 */ 789 void 790 vrele(vp) 791 struct vnode *vp; 792 { 793 struct proc *p = curproc; /* XXX */ 794 795 #ifdef DIAGNOSTIC 796 if (vp == NULL) 797 panic("vrele: null vp"); 798 #endif 799 simple_lock(&vp->v_interlock); 800 vp->v_usecount--; 801 if (vp->v_usecount > 0) { 802 simple_unlock(&vp->v_interlock); 803 return; 804 } 805 #ifdef DIAGNOSTIC 806 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 807 vprint("vrele: bad ref count", vp); 808 panic("vrele: ref cnt"); 809 } 810 #endif 811 /* 812 * insert at tail of LRU list 813 */ 814 simple_lock(&vnode_free_list_slock); 815 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 816 simple_unlock(&vnode_free_list_slock); 817 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, p) == 0) 818 VOP_INACTIVE(vp, p); 819 } 820 821 #ifdef DIAGNOSTIC 822 /* 823 * Page or buffer structure gets a reference. 824 */ 825 void 826 vhold(vp) 827 register struct vnode *vp; 828 { 829 830 simple_lock(&vp->v_interlock); 831 vp->v_holdcnt++; 832 simple_unlock(&vp->v_interlock); 833 } 834 835 /* 836 * Page or buffer structure frees a reference. 837 */ 838 void 839 holdrele(vp) 840 register struct vnode *vp; 841 { 842 843 simple_lock(&vp->v_interlock); 844 if (vp->v_holdcnt <= 0) 845 panic("holdrele: holdcnt"); 846 vp->v_holdcnt--; 847 simple_unlock(&vp->v_interlock); 848 } 849 #endif /* DIAGNOSTIC */ 850 851 /* 852 * Remove any vnodes in the vnode table belonging to mount point mp. 853 * 854 * If MNT_NOFORCE is specified, there should not be any active ones, 855 * return error if any are found (nb: this is a user error, not a 856 * system error). If MNT_FORCE is specified, detach any active vnodes 857 * that are found. 858 */ 859 #ifdef DIAGNOSTIC 860 int busyprt = 0; /* print out busy vnodes */ 861 struct ctldebug debug1 = { "busyprt", &busyprt }; 862 #endif 863 864 int 865 vflush(mp, skipvp, flags) 866 struct mount *mp; 867 struct vnode *skipvp; 868 int flags; 869 { 870 struct proc *p = curproc; /* XXX */ 871 struct vnode *vp, *nvp; 872 int busy = 0; 873 874 #ifdef DIAGNOSTIC 875 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 876 panic("vflush: not busy"); 877 #endif 878 879 simple_lock(&mntvnode_slock); 880 loop: 881 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 882 if (vp->v_mount != mp) 883 goto loop; 884 nvp = vp->v_mntvnodes.le_next; 885 /* 886 * Skip over a selected vnode. 887 */ 888 if (vp == skipvp) 889 continue; 890 891 simple_lock(&vp->v_interlock); 892 /* 893 * Skip over a vnodes marked VSYSTEM. 894 */ 895 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 896 simple_unlock(&vp->v_interlock); 897 continue; 898 } 899 /* 900 * If WRITECLOSE is set, only flush out regular file 901 * vnodes open for writing. 902 */ 903 if ((flags & WRITECLOSE) && 904 (vp->v_writecount == 0 || vp->v_type != VREG)) { 905 simple_unlock(&vp->v_interlock); 906 continue; 907 } 908 /* 909 * With v_usecount == 0, all we need to do is clear 910 * out the vnode data structures and we are done. 911 */ 912 if (vp->v_usecount == 0) { 913 simple_unlock(&mntvnode_slock); 914 vgonel(vp, p); 915 simple_lock(&mntvnode_slock); 916 continue; 917 } 918 /* 919 * If FORCECLOSE is set, forcibly close the vnode. 920 * For block or character devices, revert to an 921 * anonymous device. For all other files, just kill them. 922 */ 923 if (flags & FORCECLOSE) { 924 simple_unlock(&mntvnode_slock); 925 if (vp->v_type != VBLK && vp->v_type != VCHR) { 926 vgonel(vp, p); 927 } else { 928 vclean(vp, 0, p); 929 vp->v_op = spec_vnodeop_p; 930 insmntque(vp, (struct mount *)0); 931 } 932 simple_lock(&mntvnode_slock); 933 continue; 934 } 935 #ifdef DIAGNOSTIC 936 if (busyprt) 937 vprint("vflush: busy vnode", vp); 938 #endif 939 simple_unlock(&vp->v_interlock); 940 busy++; 941 } 942 simple_unlock(&mntvnode_slock); 943 if (busy) 944 return (EBUSY); 945 return (0); 946 } 947 948 /* 949 * Disassociate the underlying file system from a vnode. 950 * The vnode interlock is held on entry. 951 */ 952 static void 953 vclean(vp, flags, p) 954 struct vnode *vp; 955 int flags; 956 struct proc *p; 957 { 958 int active; 959 960 /* 961 * Check to see if the vnode is in use. 962 * If so we have to reference it before we clean it out 963 * so that its count cannot fall to zero and generate a 964 * race against ourselves to recycle it. 965 */ 966 if (active = vp->v_usecount) 967 vp->v_usecount++; 968 /* 969 * Prevent the vnode from being recycled or 970 * brought into use while we clean it out. 971 */ 972 if (vp->v_flag & VXLOCK) 973 panic("vclean: deadlock"); 974 vp->v_flag |= VXLOCK; 975 /* 976 * Even if the count is zero, the VOP_INACTIVE routine may still 977 * have the object locked while it cleans it out. The VOP_LOCK 978 * ensures that the VOP_INACTIVE routine is done with its work. 979 * For active vnodes, it ensures that no other activity can 980 * occur while the underlying object is being cleaned out. 981 */ 982 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); 983 /* 984 * Clean out any buffers associated with the vnode. 985 */ 986 if (flags & DOCLOSE) 987 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 988 /* 989 * If purging an active vnode, it must be closed and 990 * deactivated before being reclaimed. Note that the 991 * VOP_INACTIVE will unlock the vnode. 992 */ 993 if (active) { 994 if (flags & DOCLOSE) 995 VOP_CLOSE(vp, IO_NDELAY, NOCRED, p); 996 VOP_INACTIVE(vp, p); 997 } else { 998 /* 999 * Any other processes trying to obtain this lock must first 1000 * wait for VXLOCK to clear, then call the new lock operation. 1001 */ 1002 VOP_UNLOCK(vp, 0, p); 1003 } 1004 /* 1005 * Reclaim the vnode. 1006 */ 1007 if (VOP_RECLAIM(vp, p)) 1008 panic("vclean: cannot reclaim"); 1009 if (active) 1010 vrele(vp); 1011 cache_purge(vp); 1012 1013 /* 1014 * Done with purge, notify sleepers of the grim news. 1015 */ 1016 vp->v_op = dead_vnodeop_p; 1017 vp->v_tag = VT_NON; 1018 vp->v_flag &= ~VXLOCK; 1019 if (vp->v_flag & VXWANT) { 1020 vp->v_flag &= ~VXWANT; 1021 wakeup((caddr_t)vp); 1022 } 1023 } 1024 1025 /* 1026 * Eliminate all activity associated with the requested vnode 1027 * and with all vnodes aliased to the requested vnode. 1028 */ 1029 int 1030 vop_revoke(ap) 1031 struct vop_revoke_args /* { 1032 struct vnode *a_vp; 1033 int a_flags; 1034 } */ *ap; 1035 { 1036 struct vnode *vp, *vq; 1037 struct proc *p = curproc; /* XXX */ 1038 1039 #ifdef DIAGNOSTIC 1040 if ((ap->a_flags & REVOKEALL) == 0) 1041 panic("vop_revoke"); 1042 #endif 1043 1044 vp = ap->a_vp; 1045 simple_lock(&vp->v_interlock); 1046 1047 if (vp->v_flag & VALIASED) { 1048 /* 1049 * If a vgone (or vclean) is already in progress, 1050 * wait until it is done and return. 1051 */ 1052 if (vp->v_flag & VXLOCK) { 1053 vp->v_flag |= VXWANT; 1054 simple_unlock(&vp->v_interlock); 1055 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 1056 return (0); 1057 } 1058 /* 1059 * Ensure that vp will not be vgone'd while we 1060 * are eliminating its aliases. 1061 */ 1062 vp->v_flag |= VXLOCK; 1063 simple_unlock(&vp->v_interlock); 1064 while (vp->v_flag & VALIASED) { 1065 simple_lock(&spechash_slock); 1066 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1067 if (vq->v_rdev != vp->v_rdev || 1068 vq->v_type != vp->v_type || vp == vq) 1069 continue; 1070 simple_unlock(&spechash_slock); 1071 vgone(vq); 1072 break; 1073 } 1074 if (vq == NULLVP) 1075 simple_unlock(&spechash_slock); 1076 } 1077 /* 1078 * Remove the lock so that vgone below will 1079 * really eliminate the vnode after which time 1080 * vgone will awaken any sleepers. 1081 */ 1082 simple_lock(&vp->v_interlock); 1083 vp->v_flag &= ~VXLOCK; 1084 } 1085 vgonel(vp, p); 1086 return (0); 1087 } 1088 1089 /* 1090 * Recycle an unused vnode to the front of the free list. 1091 * Release the passed interlock if the vnode will be recycled. 1092 */ 1093 int 1094 vrecycle(vp, inter_lkp, p) 1095 struct vnode *vp; 1096 struct simplelock *inter_lkp; 1097 struct proc *p; 1098 { 1099 1100 simple_lock(&vp->v_interlock); 1101 if (vp->v_usecount == 0) { 1102 if (inter_lkp) 1103 simple_unlock(inter_lkp); 1104 vgonel(vp, p); 1105 return (1); 1106 } 1107 simple_unlock(&vp->v_interlock); 1108 return (0); 1109 } 1110 1111 /* 1112 * Eliminate all activity associated with a vnode 1113 * in preparation for reuse. 1114 */ 1115 void 1116 vgone(vp) 1117 struct vnode *vp; 1118 { 1119 struct proc *p = curproc; /* XXX */ 1120 1121 simple_lock(&vp->v_interlock); 1122 vgonel(vp, p); 1123 } 1124 1125 /* 1126 * vgone, with the vp interlock held. 1127 */ 1128 void 1129 vgonel(vp, p) 1130 struct vnode *vp; 1131 struct proc *p; 1132 { 1133 struct vnode *vq; 1134 struct vnode *vx; 1135 1136 /* 1137 * If a vgone (or vclean) is already in progress, 1138 * wait until it is done and return. 1139 */ 1140 if (vp->v_flag & VXLOCK) { 1141 vp->v_flag |= VXWANT; 1142 simple_unlock(&vp->v_interlock); 1143 tsleep((caddr_t)vp, PINOD, "vgone", 0); 1144 return; 1145 } 1146 /* 1147 * Clean out the filesystem specific data. 1148 */ 1149 vclean(vp, DOCLOSE, p); 1150 /* 1151 * Delete from old mount point vnode list, if on one. 1152 */ 1153 if (vp->v_mount != NULL) 1154 insmntque(vp, (struct mount *)0); 1155 /* 1156 * If special device, remove it from special device alias list 1157 * if it is on one. 1158 */ 1159 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1160 simple_lock(&spechash_slock); 1161 if (*vp->v_hashchain == vp) { 1162 *vp->v_hashchain = vp->v_specnext; 1163 } else { 1164 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1165 if (vq->v_specnext != vp) 1166 continue; 1167 vq->v_specnext = vp->v_specnext; 1168 break; 1169 } 1170 if (vq == NULL) 1171 panic("missing bdev"); 1172 } 1173 if (vp->v_flag & VALIASED) { 1174 vx = NULL; 1175 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1176 if (vq->v_rdev != vp->v_rdev || 1177 vq->v_type != vp->v_type) 1178 continue; 1179 if (vx) 1180 break; 1181 vx = vq; 1182 } 1183 if (vx == NULL) 1184 panic("missing alias"); 1185 if (vq == NULL) 1186 vx->v_flag &= ~VALIASED; 1187 vp->v_flag &= ~VALIASED; 1188 } 1189 simple_unlock(&spechash_slock); 1190 FREE(vp->v_specinfo, M_VNODE); 1191 vp->v_specinfo = NULL; 1192 } 1193 /* 1194 * If it is on the freelist and not already at the head, 1195 * move it to the head of the list. The test of the back 1196 * pointer and the reference count of zero is because 1197 * it will be removed from the free list by getnewvnode, 1198 * but will not have its reference count incremented until 1199 * after calling vgone. If the reference count were 1200 * incremented first, vgone would (incorrectly) try to 1201 * close the previous instance of the underlying object. 1202 * So, the back pointer is explicitly set to `0xdeadb' in 1203 * getnewvnode after removing it from the freelist to ensure 1204 * that we do not try to move it here. 1205 */ 1206 if (vp->v_usecount == 0) { 1207 simple_lock(&vnode_free_list_slock); 1208 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1209 vnode_free_list.tqh_first != vp) { 1210 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1211 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1212 } 1213 simple_unlock(&vnode_free_list_slock); 1214 } 1215 vp->v_type = VBAD; 1216 } 1217 1218 /* 1219 * Lookup a vnode by device number. 1220 */ 1221 int 1222 vfinddev(dev, type, vpp) 1223 dev_t dev; 1224 enum vtype type; 1225 struct vnode **vpp; 1226 { 1227 struct vnode *vp; 1228 int rc = 0; 1229 1230 simple_lock(&spechash_slock); 1231 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1232 if (dev != vp->v_rdev || type != vp->v_type) 1233 continue; 1234 *vpp = vp; 1235 rc = 1; 1236 break; 1237 } 1238 simple_unlock(&spechash_slock); 1239 return (rc); 1240 } 1241 1242 /* 1243 * Calculate the total number of references to a special device. 1244 */ 1245 int 1246 vcount(vp) 1247 struct vnode *vp; 1248 { 1249 struct vnode *vq, *vnext; 1250 int count; 1251 1252 loop: 1253 if ((vp->v_flag & VALIASED) == 0) 1254 return (vp->v_usecount); 1255 simple_lock(&spechash_slock); 1256 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1257 vnext = vq->v_specnext; 1258 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1259 continue; 1260 /* 1261 * Alias, but not in use, so flush it out. 1262 */ 1263 if (vq->v_usecount == 0 && vq != vp) { 1264 simple_unlock(&spechash_slock); 1265 vgone(vq); 1266 goto loop; 1267 } 1268 count += vq->v_usecount; 1269 } 1270 simple_unlock(&spechash_slock); 1271 return (count); 1272 } 1273 1274 /* 1275 * Print out a description of a vnode. 1276 */ 1277 static char *typename[] = 1278 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1279 1280 void 1281 vprint(label, vp) 1282 char *label; 1283 register struct vnode *vp; 1284 { 1285 char buf[64]; 1286 1287 if (label != NULL) 1288 printf("%s: ", label); 1289 printf("type %s, usecount %d, writecount %d, refcount %d,", 1290 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1291 vp->v_holdcnt); 1292 buf[0] = '\0'; 1293 if (vp->v_flag & VROOT) 1294 strcat(buf, "|VROOT"); 1295 if (vp->v_flag & VTEXT) 1296 strcat(buf, "|VTEXT"); 1297 if (vp->v_flag & VSYSTEM) 1298 strcat(buf, "|VSYSTEM"); 1299 if (vp->v_flag & VXLOCK) 1300 strcat(buf, "|VXLOCK"); 1301 if (vp->v_flag & VXWANT) 1302 strcat(buf, "|VXWANT"); 1303 if (vp->v_flag & VBWAIT) 1304 strcat(buf, "|VBWAIT"); 1305 if (vp->v_flag & VALIASED) 1306 strcat(buf, "|VALIASED"); 1307 if (buf[0] != '\0') 1308 printf(" flags (%s)", &buf[1]); 1309 if (vp->v_data == NULL) { 1310 printf("\n"); 1311 } else { 1312 printf("\n\t"); 1313 VOP_PRINT(vp); 1314 } 1315 } 1316 1317 #ifdef DEBUG 1318 /* 1319 * List all of the locked vnodes in the system. 1320 * Called when debugging the kernel. 1321 */ 1322 void 1323 printlockedvnodes() 1324 { 1325 register struct mount *mp; 1326 register struct vnode *vp; 1327 1328 printf("Locked vnodes\n"); 1329 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 1330 mp = mp->mnt_list.cqe_next) { 1331 for (vp = mp->mnt_vnodelist.lh_first; 1332 vp != NULL; 1333 vp = vp->v_mntvnodes.le_next) { 1334 if (VOP_ISLOCKED(vp)) 1335 vprint((char *)0, vp); 1336 } 1337 } 1338 } 1339 #endif 1340 1341 /* 1342 * Top level filesystem related information gathering. 1343 */ 1344 int 1345 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1346 int *name; 1347 u_int namelen; 1348 void *oldp; 1349 size_t *oldlenp; 1350 void *newp; 1351 size_t newlen; 1352 struct proc *p; 1353 { 1354 struct ctldebug *cdp; 1355 struct vfsconf *vfsp; 1356 1357 /* all sysctl names at this level are at least name and field */ 1358 if (namelen < 2) 1359 return (ENOTDIR); /* overloaded */ 1360 if (name[0] != VFS_GENERIC) { 1361 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1362 if (vfsp->vfc_typenum == name[0]) 1363 break; 1364 if (vfsp == NULL) 1365 return (EOPNOTSUPP); 1366 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1367 oldp, oldlenp, newp, newlen, p)); 1368 } 1369 switch (name[1]) { 1370 case VFS_MAXTYPENUM: 1371 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); 1372 case VFS_CONF: 1373 if (namelen < 3) 1374 return (ENOTDIR); /* overloaded */ 1375 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1376 if (vfsp->vfc_typenum == name[2]) 1377 break; 1378 if (vfsp == NULL) 1379 return (EOPNOTSUPP); 1380 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, 1381 sizeof(struct vfsconf))); 1382 } 1383 return (EOPNOTSUPP); 1384 } 1385 1386 int kinfo_vdebug = 1; 1387 int kinfo_vgetfailed; 1388 #define KINFO_VNODESLOP 10 1389 /* 1390 * Dump vnode list (via sysctl). 1391 * Copyout address of vnode followed by vnode. 1392 */ 1393 /* ARGSUSED */ 1394 int 1395 sysctl_vnode(where, sizep) 1396 char *where; 1397 size_t *sizep; 1398 { 1399 register struct mount *mp, *nmp; 1400 struct vnode *nvp, *vp; 1401 register char *bp = where, *savebp; 1402 char *ewhere; 1403 int error; 1404 1405 #define VPTRSZ sizeof (struct vnode *) 1406 #define VNODESZ sizeof (struct vnode) 1407 if (where == NULL) { 1408 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1409 return (0); 1410 } 1411 ewhere = where + *sizep; 1412 1413 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1414 nmp = mp->mnt_list.cqe_next; 1415 if (vfs_busy(mp)) 1416 continue; 1417 savebp = bp; 1418 again: 1419 simple_lock(&mntvnode_slock); 1420 for (vp = mp->mnt_vnodelist.lh_first; 1421 vp != NULL; 1422 vp = nvp) { 1423 /* 1424 * Check that the vp is still associated with 1425 * this filesystem. RACE: could have been 1426 * recycled onto the same filesystem. 1427 */ 1428 if (vp->v_mount != mp) { 1429 simple_unlock(&mntvnode_slock); 1430 if (kinfo_vdebug) 1431 printf("kinfo: vp changed\n"); 1432 bp = savebp; 1433 goto again; 1434 } 1435 nvp = vp->v_mntvnodes.le_next; 1436 if (bp + VPTRSZ + VNODESZ > ewhere) { 1437 simple_unlock(&mntvnode_slock); 1438 *sizep = bp - where; 1439 return (ENOMEM); 1440 } 1441 simple_unlock(&mntvnode_slock); 1442 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1443 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1444 return (error); 1445 bp += VPTRSZ + VNODESZ; 1446 simple_lock(&mntvnode_slock); 1447 } 1448 simple_unlock(&mntvnode_slock); 1449 vfs_unbusy(mp); 1450 } 1451 1452 *sizep = bp - where; 1453 return (0); 1454 } 1455 1456 /* 1457 * Check to see if a filesystem is mounted on a block device. 1458 */ 1459 int 1460 vfs_mountedon(vp) 1461 struct vnode *vp; 1462 { 1463 struct vnode *vq; 1464 int error = 0; 1465 1466 if (vp->v_specflags & SI_MOUNTEDON) 1467 return (EBUSY); 1468 if (vp->v_flag & VALIASED) { 1469 simple_lock(&spechash_slock); 1470 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1471 if (vq->v_rdev != vp->v_rdev || 1472 vq->v_type != vp->v_type) 1473 continue; 1474 if (vq->v_specflags & SI_MOUNTEDON) { 1475 error = EBUSY; 1476 break; 1477 } 1478 } 1479 simple_unlock(&spechash_slock); 1480 } 1481 return (error); 1482 } 1483 1484 /* 1485 * Unmount all filesystems. The list is traversed in reverse order 1486 * of mounting to avoid dependencies. 1487 */ 1488 void 1489 vfs_unmountall() 1490 { 1491 struct mount *mp, *nmp; 1492 1493 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1494 nmp = mp->mnt_list.cqe_prev; 1495 (void) dounmount(mp, MNT_FORCE, &proc0); 1496 } 1497 } 1498 1499 /* 1500 * Build hash lists of net addresses and hang them off the mount point. 1501 * Called by ufs_mount() to set up the lists of export addresses. 1502 */ 1503 static int 1504 vfs_hang_addrlist(mp, nep, argp) 1505 struct mount *mp; 1506 struct netexport *nep; 1507 struct export_args *argp; 1508 { 1509 register struct netcred *np; 1510 register struct radix_node_head *rnh; 1511 register int i; 1512 struct radix_node *rn; 1513 struct sockaddr *saddr, *smask = 0; 1514 struct domain *dom; 1515 int error; 1516 1517 if (argp->ex_addrlen == 0) { 1518 if (mp->mnt_flag & MNT_DEFEXPORTED) 1519 return (EPERM); 1520 np = &nep->ne_defexported; 1521 np->netc_exflags = argp->ex_flags; 1522 np->netc_anon = argp->ex_anon; 1523 np->netc_anon.cr_ref = 1; 1524 mp->mnt_flag |= MNT_DEFEXPORTED; 1525 return (0); 1526 } 1527 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1528 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1529 bzero((caddr_t)np, i); 1530 saddr = (struct sockaddr *)(np + 1); 1531 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 1532 goto out; 1533 if (saddr->sa_len > argp->ex_addrlen) 1534 saddr->sa_len = argp->ex_addrlen; 1535 if (argp->ex_masklen) { 1536 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1537 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1538 if (error) 1539 goto out; 1540 if (smask->sa_len > argp->ex_masklen) 1541 smask->sa_len = argp->ex_masklen; 1542 } 1543 i = saddr->sa_family; 1544 if ((rnh = nep->ne_rtable[i]) == 0) { 1545 /* 1546 * Seems silly to initialize every AF when most are not 1547 * used, do so on demand here 1548 */ 1549 for (dom = domains; dom; dom = dom->dom_next) 1550 if (dom->dom_family == i && dom->dom_rtattach) { 1551 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1552 dom->dom_rtoffset); 1553 break; 1554 } 1555 if ((rnh = nep->ne_rtable[i]) == 0) { 1556 error = ENOBUFS; 1557 goto out; 1558 } 1559 } 1560 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1561 np->netc_rnodes); 1562 if (rn == 0) { 1563 /* 1564 * One of the reasons that rnh_addaddr may fail is that 1565 * the entry already exists. To check for this case, we 1566 * look up the entry to see if it is there. If so, we 1567 * do not need to make a new entry but do return success. 1568 */ 1569 free(np, M_NETADDR); 1570 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); 1571 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 && 1572 ((struct netcred *)rn)->netc_exflags == argp->ex_flags && 1573 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon, 1574 (caddr_t)&argp->ex_anon, sizeof(struct ucred))) 1575 return (0); 1576 return (EPERM); 1577 } 1578 np->netc_exflags = argp->ex_flags; 1579 np->netc_anon = argp->ex_anon; 1580 np->netc_anon.cr_ref = 1; 1581 return (0); 1582 out: 1583 free(np, M_NETADDR); 1584 return (error); 1585 } 1586 1587 /* ARGSUSED */ 1588 static int 1589 vfs_free_netcred(rn, w) 1590 struct radix_node *rn; 1591 caddr_t w; 1592 { 1593 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1594 1595 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1596 free((caddr_t)rn, M_NETADDR); 1597 return (0); 1598 } 1599 1600 /* 1601 * Free the net address hash lists that are hanging off the mount points. 1602 */ 1603 static void 1604 vfs_free_addrlist(nep) 1605 struct netexport *nep; 1606 { 1607 register int i; 1608 register struct radix_node_head *rnh; 1609 1610 for (i = 0; i <= AF_MAX; i++) 1611 if (rnh = nep->ne_rtable[i]) { 1612 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1613 (caddr_t)rnh); 1614 free((caddr_t)rnh, M_RTABLE); 1615 nep->ne_rtable[i] = 0; 1616 } 1617 } 1618 1619 int 1620 vfs_export(mp, nep, argp) 1621 struct mount *mp; 1622 struct netexport *nep; 1623 struct export_args *argp; 1624 { 1625 int error; 1626 1627 if (argp->ex_flags & MNT_DELEXPORT) { 1628 vfs_free_addrlist(nep); 1629 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1630 } 1631 if (argp->ex_flags & MNT_EXPORTED) { 1632 if (error = vfs_hang_addrlist(mp, nep, argp)) 1633 return (error); 1634 mp->mnt_flag |= MNT_EXPORTED; 1635 } 1636 return (0); 1637 } 1638 1639 struct netcred * 1640 vfs_export_lookup(mp, nep, nam) 1641 register struct mount *mp; 1642 struct netexport *nep; 1643 struct mbuf *nam; 1644 { 1645 register struct netcred *np; 1646 register struct radix_node_head *rnh; 1647 struct sockaddr *saddr; 1648 1649 np = NULL; 1650 if (mp->mnt_flag & MNT_EXPORTED) { 1651 /* 1652 * Lookup in the export list first. 1653 */ 1654 if (nam != NULL) { 1655 saddr = mtod(nam, struct sockaddr *); 1656 rnh = nep->ne_rtable[saddr->sa_family]; 1657 if (rnh != NULL) { 1658 np = (struct netcred *) 1659 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1660 rnh); 1661 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1662 np = NULL; 1663 } 1664 } 1665 /* 1666 * If no address match, use the default if it exists. 1667 */ 1668 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1669 np = &nep->ne_defexported; 1670 } 1671 return (np); 1672 } 1673