1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * %sccs.include.redist.c% 11 * 12 * @(#)vfs_subr.c 8.23 (Berkeley) 05/10/95 13 */ 14 15 /* 16 * External virtual filesystem routines 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/proc.h> 22 #include <sys/mount.h> 23 #include <sys/time.h> 24 #include <sys/vnode.h> 25 #include <sys/stat.h> 26 #include <sys/namei.h> 27 #include <sys/ucred.h> 28 #include <sys/buf.h> 29 #include <sys/errno.h> 30 #include <sys/malloc.h> 31 #include <sys/domain.h> 32 #include <sys/mbuf.h> 33 34 #include <vm/vm.h> 35 #include <sys/sysctl.h> 36 37 #include <miscfs/specfs/specdev.h> 38 39 enum vtype iftovt_tab[16] = { 40 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 41 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 42 }; 43 int vttoif_tab[9] = { 44 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 45 S_IFSOCK, S_IFIFO, S_IFMT, 46 }; 47 48 /* 49 * Insq/Remq for the vnode usage lists. 50 */ 51 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 52 #define bufremvn(bp) { \ 53 LIST_REMOVE(bp, b_vnbufs); \ 54 (bp)->b_vnbufs.le_next = NOLIST; \ 55 } 56 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 57 struct mntlist mountlist; /* mounted filesystem list */ 58 59 /* 60 * Initialize the vnode management data structures. 61 */ 62 void 63 vntblinit() 64 { 65 66 TAILQ_INIT(&vnode_free_list); 67 CIRCLEQ_INIT(&mountlist); 68 } 69 70 /* 71 * Lock a filesystem. 72 * Used to prevent access to it while mounting and unmounting. 73 */ 74 int 75 vfs_lock(mp) 76 register struct mount *mp; 77 { 78 79 while (mp->mnt_flag & MNT_MLOCK) { 80 mp->mnt_flag |= MNT_MWAIT; 81 tsleep((caddr_t)mp, PVFS, "vfslock", 0); 82 } 83 mp->mnt_flag |= MNT_MLOCK; 84 return (0); 85 } 86 87 /* 88 * Unlock a locked filesystem. 89 * Panic if filesystem is not locked. 90 */ 91 void 92 vfs_unlock(mp) 93 register struct mount *mp; 94 { 95 96 if ((mp->mnt_flag & MNT_MLOCK) == 0) 97 panic("vfs_unlock: not locked"); 98 mp->mnt_flag &= ~MNT_MLOCK; 99 if (mp->mnt_flag & MNT_MWAIT) { 100 mp->mnt_flag &= ~MNT_MWAIT; 101 wakeup((caddr_t)mp); 102 } 103 } 104 105 /* 106 * Mark a mount point as busy. 107 * Used to synchronize access and to delay unmounting. 108 */ 109 int 110 vfs_busy(mp) 111 register struct mount *mp; 112 { 113 114 while (mp->mnt_flag & MNT_MPBUSY) { 115 mp->mnt_flag |= MNT_MPWANT; 116 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); 117 } 118 if (mp->mnt_flag & MNT_UNMOUNT) 119 return (1); 120 mp->mnt_flag |= MNT_MPBUSY; 121 return (0); 122 } 123 124 /* 125 * Free a busy filesystem. 126 * Panic if filesystem is not busy. 127 */ 128 void 129 vfs_unbusy(mp) 130 register struct mount *mp; 131 { 132 133 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 134 panic("vfs_unbusy: not busy"); 135 mp->mnt_flag &= ~MNT_MPBUSY; 136 if (mp->mnt_flag & MNT_MPWANT) { 137 mp->mnt_flag &= ~MNT_MPWANT; 138 wakeup((caddr_t)&mp->mnt_flag); 139 } 140 } 141 142 /* 143 * Lookup a filesystem type, and if found allocate and initialize 144 * a mount structure for it. 145 * 146 * Devname is usually updated by mount(8) after booting. 147 */ 148 int 149 vfs_rootmountalloc(fstypename, devname, mpp) 150 char *fstypename; 151 char *devname; 152 struct mount **mpp; 153 { 154 struct vfsconf *vfsp; 155 struct mount *mp; 156 157 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 158 if (!strcmp(vfsp->vfc_name, fstypename)) 159 break; 160 if (vfsp == NULL) 161 return (ENODEV); 162 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 163 bzero((char *)mp, (u_long)sizeof(struct mount)); 164 LIST_INIT(&mp->mnt_vnodelist); 165 mp->mnt_vfc = vfsp; 166 mp->mnt_op = vfsp->vfc_vfsops; 167 mp->mnt_flag = MNT_RDONLY; 168 mp->mnt_vnodecovered = NULLVP; 169 vfsp->vfc_refcount++; 170 mp->mnt_stat.f_type = vfsp->vfc_typenum; 171 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 172 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 173 mp->mnt_stat.f_mntonname[0] = '/'; 174 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 175 *mpp = mp; 176 return (0); 177 } 178 179 /* 180 * Find an appropriate filesystem to use for the root. If a filesystem 181 * has not been preselected, walk through the list of known filesystems 182 * trying those that have mountroot routines, and try them until one 183 * works or we have tried them all. 184 */ 185 int 186 vfs_mountroot() 187 { 188 struct vfsconf *vfsp; 189 extern int (*mountroot)(void); 190 int error; 191 192 if (mountroot != NULL) 193 return ((*vfsp->vfc_mountroot)()); 194 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 195 if (vfsp->vfc_mountroot == NULL) 196 continue; 197 if ((error = (*vfsp->vfc_mountroot)()) == 0) 198 return (0); 199 printf("%s_mountroot failed: %d\n", vfsp->vfc_name, error); 200 } 201 return (ENODEV); 202 } 203 204 /* 205 * Lookup a mount point by filesystem identifier. 206 */ 207 struct mount * 208 vfs_getvfs(fsid) 209 fsid_t *fsid; 210 { 211 register struct mount *mp; 212 213 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 214 mp = mp->mnt_list.cqe_next) { 215 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 216 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 217 return (mp); 218 } 219 return ((struct mount *)0); 220 } 221 222 /* 223 * Get a new unique fsid 224 */ 225 void 226 vfs_getnewfsid(mp) 227 struct mount *mp; 228 { 229 static u_short xxxfs_mntid; 230 231 fsid_t tfsid; 232 int mtype; 233 234 mtype = mp->mnt_vfc->vfc_typenum; 235 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 236 mp->mnt_stat.f_fsid.val[1] = mtype; 237 if (xxxfs_mntid == 0) 238 ++xxxfs_mntid; 239 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 240 tfsid.val[1] = mtype; 241 if (mountlist.cqh_first != (void *)&mountlist) { 242 while (vfs_getvfs(&tfsid)) { 243 tfsid.val[0]++; 244 xxxfs_mntid++; 245 } 246 } 247 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 248 } 249 250 /* 251 * Set vnode attributes to VNOVAL 252 */ 253 void 254 vattr_null(vap) 255 register struct vattr *vap; 256 { 257 258 vap->va_type = VNON; 259 vap->va_size = vap->va_bytes = VNOVAL; 260 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 261 vap->va_fsid = vap->va_fileid = 262 vap->va_blocksize = vap->va_rdev = 263 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 264 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 265 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 266 vap->va_flags = vap->va_gen = VNOVAL; 267 vap->va_vaflags = 0; 268 } 269 270 /* 271 * Routines having to do with the management of the vnode table. 272 */ 273 extern int (**dead_vnodeop_p)(); 274 extern void vclean(); 275 long numvnodes; 276 extern struct vattr va_null; 277 278 /* 279 * Return the next vnode from the free list. 280 */ 281 int 282 getnewvnode(tag, mp, vops, vpp) 283 enum vtagtype tag; 284 struct mount *mp; 285 int (**vops)(); 286 struct vnode **vpp; 287 { 288 register struct vnode *vp; 289 int s; 290 291 if ((vnode_free_list.tqh_first == NULL && 292 numvnodes < 2 * desiredvnodes) || 293 numvnodes < desiredvnodes) { 294 vp = (struct vnode *)malloc((u_long)sizeof *vp, 295 M_VNODE, M_WAITOK); 296 bzero((char *)vp, sizeof *vp); 297 numvnodes++; 298 } else { 299 if ((vp = vnode_free_list.tqh_first) == NULL) { 300 tablefull("vnode"); 301 *vpp = 0; 302 return (ENFILE); 303 } 304 if (vp->v_usecount) 305 panic("free vnode isn't"); 306 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 307 /* see comment on why 0xdeadb is set at end of vgone (below) */ 308 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 309 vp->v_lease = NULL; 310 if (vp->v_type != VBAD) 311 vgone(vp); 312 #ifdef DIAGNOSTIC 313 if (vp->v_data) 314 panic("cleaned vnode isn't"); 315 s = splbio(); 316 if (vp->v_numoutput) 317 panic("Clean vnode has pending I/O's"); 318 splx(s); 319 #endif 320 vp->v_flag = 0; 321 vp->v_lastr = 0; 322 vp->v_ralen = 0; 323 vp->v_maxra = 0; 324 vp->v_lastw = 0; 325 vp->v_lasta = 0; 326 vp->v_cstart = 0; 327 vp->v_clen = 0; 328 vp->v_socket = 0; 329 } 330 vp->v_type = VNON; 331 cache_purge(vp); 332 vp->v_tag = tag; 333 vp->v_op = vops; 334 insmntque(vp, mp); 335 *vpp = vp; 336 vp->v_usecount = 1; 337 vp->v_data = 0; 338 return (0); 339 } 340 341 /* 342 * Move a vnode from one mount queue to another. 343 */ 344 void 345 insmntque(vp, mp) 346 register struct vnode *vp; 347 register struct mount *mp; 348 { 349 350 /* 351 * Delete from old mount point vnode list, if on one. 352 */ 353 if (vp->v_mount != NULL) 354 LIST_REMOVE(vp, v_mntvnodes); 355 /* 356 * Insert into list of vnodes for the new mount point, if available. 357 */ 358 if ((vp->v_mount = mp) == NULL) 359 return; 360 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 361 } 362 363 /* 364 * Update outstanding I/O count and do wakeup if requested. 365 */ 366 void 367 vwakeup(bp) 368 register struct buf *bp; 369 { 370 register struct vnode *vp; 371 372 bp->b_flags &= ~B_WRITEINPROG; 373 if (vp = bp->b_vp) { 374 if (--vp->v_numoutput < 0) 375 panic("vwakeup: neg numoutput"); 376 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 377 if (vp->v_numoutput < 0) 378 panic("vwakeup: neg numoutput 2"); 379 vp->v_flag &= ~VBWAIT; 380 wakeup((caddr_t)&vp->v_numoutput); 381 } 382 } 383 } 384 385 /* 386 * Flush out and invalidate all buffers associated with a vnode. 387 * Called with the underlying object locked. 388 */ 389 int 390 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 391 register struct vnode *vp; 392 int flags; 393 struct ucred *cred; 394 struct proc *p; 395 int slpflag, slptimeo; 396 { 397 register struct buf *bp; 398 struct buf *nbp, *blist; 399 int s, error; 400 401 if (flags & V_SAVE) { 402 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 403 return (error); 404 if (vp->v_dirtyblkhd.lh_first != NULL) 405 panic("vinvalbuf: dirty bufs"); 406 } 407 for (;;) { 408 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 409 while (blist && blist->b_lblkno < 0) 410 blist = blist->b_vnbufs.le_next; 411 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 412 (flags & V_SAVEMETA)) 413 while (blist && blist->b_lblkno < 0) 414 blist = blist->b_vnbufs.le_next; 415 if (!blist) 416 break; 417 418 for (bp = blist; bp; bp = nbp) { 419 nbp = bp->b_vnbufs.le_next; 420 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 421 continue; 422 s = splbio(); 423 if (bp->b_flags & B_BUSY) { 424 bp->b_flags |= B_WANTED; 425 error = tsleep((caddr_t)bp, 426 slpflag | (PRIBIO + 1), "vinvalbuf", 427 slptimeo); 428 splx(s); 429 if (error) 430 return (error); 431 break; 432 } 433 bremfree(bp); 434 bp->b_flags |= B_BUSY; 435 splx(s); 436 /* 437 * XXX Since there are no node locks for NFS, I believe 438 * there is a slight chance that a delayed write will 439 * occur while sleeping just above, so check for it. 440 */ 441 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 442 (void) VOP_BWRITE(bp); 443 break; 444 } 445 bp->b_flags |= B_INVAL; 446 brelse(bp); 447 } 448 } 449 if (!(flags & V_SAVEMETA) && 450 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 451 panic("vinvalbuf: flush failed"); 452 return (0); 453 } 454 455 /* 456 * Associate a buffer with a vnode. 457 */ 458 void 459 bgetvp(vp, bp) 460 register struct vnode *vp; 461 register struct buf *bp; 462 { 463 464 if (bp->b_vp) 465 panic("bgetvp: not free"); 466 VHOLD(vp); 467 bp->b_vp = vp; 468 if (vp->v_type == VBLK || vp->v_type == VCHR) 469 bp->b_dev = vp->v_rdev; 470 else 471 bp->b_dev = NODEV; 472 /* 473 * Insert onto list for new vnode. 474 */ 475 bufinsvn(bp, &vp->v_cleanblkhd); 476 } 477 478 /* 479 * Disassociate a buffer from a vnode. 480 */ 481 void 482 brelvp(bp) 483 register struct buf *bp; 484 { 485 struct vnode *vp; 486 487 if (bp->b_vp == (struct vnode *) 0) 488 panic("brelvp: NULL"); 489 /* 490 * Delete from old vnode list, if on one. 491 */ 492 if (bp->b_vnbufs.le_next != NOLIST) 493 bufremvn(bp); 494 vp = bp->b_vp; 495 bp->b_vp = (struct vnode *) 0; 496 HOLDRELE(vp); 497 } 498 499 /* 500 * Reassign a buffer from one vnode to another. 501 * Used to assign file specific control information 502 * (indirect blocks) to the vnode to which they belong. 503 */ 504 void 505 reassignbuf(bp, newvp) 506 register struct buf *bp; 507 register struct vnode *newvp; 508 { 509 register struct buflists *listheadp; 510 511 if (newvp == NULL) { 512 printf("reassignbuf: NULL"); 513 return; 514 } 515 /* 516 * Delete from old vnode list, if on one. 517 */ 518 if (bp->b_vnbufs.le_next != NOLIST) 519 bufremvn(bp); 520 /* 521 * If dirty, put on list of dirty buffers; 522 * otherwise insert onto list of clean buffers. 523 */ 524 if (bp->b_flags & B_DELWRI) 525 listheadp = &newvp->v_dirtyblkhd; 526 else 527 listheadp = &newvp->v_cleanblkhd; 528 bufinsvn(bp, listheadp); 529 } 530 531 /* 532 * Create a vnode for a block device. 533 * Used for root filesystem, argdev, and swap areas. 534 * Also used for memory file system special devices. 535 */ 536 int 537 bdevvp(dev, vpp) 538 dev_t dev; 539 struct vnode **vpp; 540 { 541 register struct vnode *vp; 542 struct vnode *nvp; 543 int error; 544 545 if (dev == NODEV) { 546 *vpp = NULLVP; 547 return (ENODEV); 548 } 549 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 550 if (error) { 551 *vpp = NULLVP; 552 return (error); 553 } 554 vp = nvp; 555 vp->v_type = VBLK; 556 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 557 vput(vp); 558 vp = nvp; 559 } 560 *vpp = vp; 561 return (0); 562 } 563 564 /* 565 * Check to see if the new vnode represents a special device 566 * for which we already have a vnode (either because of 567 * bdevvp() or because of a different vnode representing 568 * the same block device). If such an alias exists, deallocate 569 * the existing contents and return the aliased vnode. The 570 * caller is responsible for filling it with its new contents. 571 */ 572 struct vnode * 573 checkalias(nvp, nvp_rdev, mp) 574 register struct vnode *nvp; 575 dev_t nvp_rdev; 576 struct mount *mp; 577 { 578 register struct vnode *vp; 579 struct vnode **vpp; 580 581 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 582 return (NULLVP); 583 584 vpp = &speclisth[SPECHASH(nvp_rdev)]; 585 loop: 586 for (vp = *vpp; vp; vp = vp->v_specnext) { 587 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 588 continue; 589 /* 590 * Alias, but not in use, so flush it out. 591 */ 592 if (vp->v_usecount == 0) { 593 vgone(vp); 594 goto loop; 595 } 596 if (vget(vp, 1)) 597 goto loop; 598 break; 599 } 600 if (vp == NULL || vp->v_tag != VT_NON) { 601 MALLOC(nvp->v_specinfo, struct specinfo *, 602 sizeof(struct specinfo), M_VNODE, M_WAITOK); 603 nvp->v_rdev = nvp_rdev; 604 nvp->v_hashchain = vpp; 605 nvp->v_specnext = *vpp; 606 nvp->v_specflags = 0; 607 *vpp = nvp; 608 if (vp != NULL) { 609 nvp->v_flag |= VALIASED; 610 vp->v_flag |= VALIASED; 611 vput(vp); 612 } 613 return (NULLVP); 614 } 615 VOP_UNLOCK(vp); 616 vclean(vp, 0); 617 vp->v_op = nvp->v_op; 618 vp->v_tag = nvp->v_tag; 619 nvp->v_type = VNON; 620 insmntque(vp, mp); 621 return (vp); 622 } 623 624 /* 625 * Grab a particular vnode from the free list, increment its 626 * reference count and lock it. The vnode lock bit is set the 627 * vnode is being eliminated in vgone. The process is awakened 628 * when the transition is completed, and an error returned to 629 * indicate that the vnode is no longer usable (possibly having 630 * been changed to a new file system type). 631 */ 632 int 633 vget(vp, lockflag) 634 register struct vnode *vp; 635 int lockflag; 636 { 637 638 /* 639 * If the vnode is in the process of being cleaned out for 640 * another use, we wait for the cleaning to finish and then 641 * return failure. Cleaning is determined either by checking 642 * that the VXLOCK flag is set, or that the use count is 643 * zero with the back pointer set to show that it has been 644 * removed from the free list by getnewvnode. The VXLOCK 645 * flag may not have been set yet because vclean is blocked in 646 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 647 */ 648 if ((vp->v_flag & VXLOCK) || 649 (vp->v_usecount == 0 && 650 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 651 vp->v_flag |= VXWANT; 652 tsleep((caddr_t)vp, PINOD, "vget", 0); 653 return (1); 654 } 655 if (vp->v_usecount == 0) 656 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 657 vp->v_usecount++; 658 if (lockflag) 659 VOP_LOCK(vp); 660 return (0); 661 } 662 663 /* 664 * Vnode reference, just increment the count 665 */ 666 void 667 vref(vp) 668 struct vnode *vp; 669 { 670 671 if (vp->v_usecount <= 0) 672 panic("vref used where vget required"); 673 vp->v_usecount++; 674 } 675 676 /* 677 * vput(), just unlock and vrele() 678 */ 679 void 680 vput(vp) 681 register struct vnode *vp; 682 { 683 684 VOP_UNLOCK(vp); 685 vrele(vp); 686 } 687 688 /* 689 * Vnode release. 690 * If count drops to zero, call inactive routine and return to freelist. 691 */ 692 void 693 vrele(vp) 694 register struct vnode *vp; 695 { 696 697 #ifdef DIAGNOSTIC 698 if (vp == NULL) 699 panic("vrele: null vp"); 700 #endif 701 vp->v_usecount--; 702 if (vp->v_usecount > 0) 703 return; 704 #ifdef DIAGNOSTIC 705 if (vp->v_usecount != 0 || vp->v_writecount != 0) { 706 vprint("vrele: bad ref count", vp); 707 panic("vrele: ref cnt"); 708 } 709 #endif 710 /* 711 * insert at tail of LRU list 712 */ 713 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 714 VOP_INACTIVE(vp); 715 } 716 717 /* 718 * Page or buffer structure gets a reference. 719 */ 720 void 721 vhold(vp) 722 register struct vnode *vp; 723 { 724 725 vp->v_holdcnt++; 726 } 727 728 /* 729 * Page or buffer structure frees a reference. 730 */ 731 void 732 holdrele(vp) 733 register struct vnode *vp; 734 { 735 736 if (vp->v_holdcnt <= 0) 737 panic("holdrele: holdcnt"); 738 vp->v_holdcnt--; 739 } 740 741 /* 742 * Remove any vnodes in the vnode table belonging to mount point mp. 743 * 744 * If MNT_NOFORCE is specified, there should not be any active ones, 745 * return error if any are found (nb: this is a user error, not a 746 * system error). If MNT_FORCE is specified, detach any active vnodes 747 * that are found. 748 */ 749 #ifdef DIAGNOSTIC 750 int busyprt = 0; /* print out busy vnodes */ 751 struct ctldebug debug1 = { "busyprt", &busyprt }; 752 #endif 753 754 int 755 vflush(mp, skipvp, flags) 756 struct mount *mp; 757 struct vnode *skipvp; 758 int flags; 759 { 760 register struct vnode *vp, *nvp; 761 int busy = 0; 762 763 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 764 panic("vflush: not busy"); 765 loop: 766 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 767 if (vp->v_mount != mp) 768 goto loop; 769 nvp = vp->v_mntvnodes.le_next; 770 /* 771 * Skip over a selected vnode. 772 */ 773 if (vp == skipvp) 774 continue; 775 /* 776 * Skip over a vnodes marked VSYSTEM. 777 */ 778 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 779 continue; 780 /* 781 * If WRITECLOSE is set, only flush out regular file 782 * vnodes open for writing. 783 */ 784 if ((flags & WRITECLOSE) && 785 (vp->v_writecount == 0 || vp->v_type != VREG)) 786 continue; 787 /* 788 * With v_usecount == 0, all we need to do is clear 789 * out the vnode data structures and we are done. 790 */ 791 if (vp->v_usecount == 0) { 792 vgone(vp); 793 continue; 794 } 795 /* 796 * If FORCECLOSE is set, forcibly close the vnode. 797 * For block or character devices, revert to an 798 * anonymous device. For all other files, just kill them. 799 */ 800 if (flags & FORCECLOSE) { 801 if (vp->v_type != VBLK && vp->v_type != VCHR) { 802 vgone(vp); 803 } else { 804 vclean(vp, 0); 805 vp->v_op = spec_vnodeop_p; 806 insmntque(vp, (struct mount *)0); 807 } 808 continue; 809 } 810 #ifdef DIAGNOSTIC 811 if (busyprt) 812 vprint("vflush: busy vnode", vp); 813 #endif 814 busy++; 815 } 816 if (busy) 817 return (EBUSY); 818 return (0); 819 } 820 821 /* 822 * Disassociate the underlying file system from a vnode. 823 */ 824 void 825 vclean(vp, flags) 826 register struct vnode *vp; 827 int flags; 828 { 829 int active; 830 831 /* 832 * Check to see if the vnode is in use. 833 * If so we have to reference it before we clean it out 834 * so that its count cannot fall to zero and generate a 835 * race against ourselves to recycle it. 836 */ 837 if (active = vp->v_usecount) 838 VREF(vp); 839 /* 840 * Even if the count is zero, the VOP_INACTIVE routine may still 841 * have the object locked while it cleans it out. The VOP_LOCK 842 * ensures that the VOP_INACTIVE routine is done with its work. 843 * For active vnodes, it ensures that no other activity can 844 * occur while the underlying object is being cleaned out. 845 */ 846 VOP_LOCK(vp); 847 /* 848 * Prevent the vnode from being recycled or 849 * brought into use while we clean it out. 850 */ 851 if (vp->v_flag & VXLOCK) 852 panic("vclean: deadlock"); 853 vp->v_flag |= VXLOCK; 854 /* 855 * Clean out any buffers associated with the vnode. 856 */ 857 if (flags & DOCLOSE) 858 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 859 /* 860 * Any other processes trying to obtain this lock must first 861 * wait for VXLOCK to clear, then call the new lock operation. 862 */ 863 VOP_UNLOCK(vp); 864 /* 865 * If purging an active vnode, it must be closed and 866 * deactivated before being reclaimed. 867 */ 868 if (active) { 869 if (flags & DOCLOSE) 870 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); 871 VOP_INACTIVE(vp); 872 } 873 /* 874 * Reclaim the vnode. 875 */ 876 if (VOP_RECLAIM(vp)) 877 panic("vclean: cannot reclaim"); 878 if (active) 879 vrele(vp); 880 cache_purge(vp); 881 882 /* 883 * Done with purge, notify sleepers of the grim news. 884 */ 885 vp->v_op = dead_vnodeop_p; 886 vp->v_tag = VT_NON; 887 vp->v_flag &= ~VXLOCK; 888 if (vp->v_flag & VXWANT) { 889 vp->v_flag &= ~VXWANT; 890 wakeup((caddr_t)vp); 891 } 892 } 893 894 /* 895 * Eliminate all activity associated with the requested vnode 896 * and with all vnodes aliased to the requested vnode. 897 */ 898 int 899 vop_revoke(ap) 900 struct vop_revoke_args /* { 901 struct vnode *a_vp; 902 int a_flags; 903 } */ *ap; 904 { 905 register struct vnode *vp, *vq; 906 907 vp = ap->a_vp; 908 if ((ap->a_flags & REVOKEALL) && (vp->v_flag & VALIASED)) { 909 /* 910 * If a vgone (or vclean) is already in progress, 911 * wait until it is done and return. 912 */ 913 if (vp->v_flag & VXLOCK) { 914 vp->v_flag |= VXWANT; 915 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 916 return (0); 917 } 918 /* 919 * Ensure that vp will not be vgone'd while we 920 * are eliminating its aliases. 921 */ 922 vp->v_flag |= VXLOCK; 923 while (vp->v_flag & VALIASED) { 924 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 925 if (vq->v_rdev != vp->v_rdev || 926 vq->v_type != vp->v_type || vp == vq) 927 continue; 928 vgone(vq); 929 break; 930 } 931 } 932 /* 933 * Remove the lock so that vgone below will 934 * really eliminate the vnode after which time 935 * vgone will awaken any sleepers. 936 */ 937 vp->v_flag &= ~VXLOCK; 938 } 939 vgone(vp); 940 return (0); 941 } 942 943 /* 944 * Eliminate all activity associated with a vnode 945 * in preparation for reuse. 946 */ 947 void 948 vgone(vp) 949 register struct vnode *vp; 950 { 951 register struct vnode *vq; 952 struct vnode *vx; 953 954 /* 955 * If a vgone (or vclean) is already in progress, 956 * wait until it is done and return. 957 */ 958 if (vp->v_flag & VXLOCK) { 959 vp->v_flag |= VXWANT; 960 tsleep((caddr_t)vp, PINOD, "vgone", 0); 961 return; 962 } 963 /* 964 * Clean out the filesystem specific data. 965 */ 966 vclean(vp, DOCLOSE); 967 /* 968 * Delete from old mount point vnode list, if on one. 969 */ 970 if (vp->v_mount != NULL) { 971 LIST_REMOVE(vp, v_mntvnodes); 972 vp->v_mount = NULL; 973 } 974 /* 975 * If special device, remove it from special device alias list 976 * if it is on one. 977 */ 978 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 979 if (*vp->v_hashchain == vp) { 980 *vp->v_hashchain = vp->v_specnext; 981 } else { 982 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 983 if (vq->v_specnext != vp) 984 continue; 985 vq->v_specnext = vp->v_specnext; 986 break; 987 } 988 if (vq == NULL) 989 panic("missing bdev"); 990 } 991 if (vp->v_flag & VALIASED) { 992 vx = NULL; 993 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 994 if (vq->v_rdev != vp->v_rdev || 995 vq->v_type != vp->v_type) 996 continue; 997 if (vx) 998 break; 999 vx = vq; 1000 } 1001 if (vx == NULL) 1002 panic("missing alias"); 1003 if (vq == NULL) 1004 vx->v_flag &= ~VALIASED; 1005 vp->v_flag &= ~VALIASED; 1006 } 1007 FREE(vp->v_specinfo, M_VNODE); 1008 vp->v_specinfo = NULL; 1009 } 1010 /* 1011 * If it is on the freelist and not already at the head, 1012 * move it to the head of the list. The test of the back 1013 * pointer and the reference count of zero is because 1014 * it will be removed from the free list by getnewvnode, 1015 * but will not have its reference count incremented until 1016 * after calling vgone. If the reference count were 1017 * incremented first, vgone would (incorrectly) try to 1018 * close the previous instance of the underlying object. 1019 * So, the back pointer is explicitly set to `0xdeadb' in 1020 * getnewvnode after removing it from the freelist to ensure 1021 * that we do not try to move it here. 1022 */ 1023 if (vp->v_usecount == 0 && 1024 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1025 vnode_free_list.tqh_first != vp) { 1026 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1027 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1028 } 1029 vp->v_type = VBAD; 1030 } 1031 1032 /* 1033 * Lookup a vnode by device number. 1034 */ 1035 int 1036 vfinddev(dev, type, vpp) 1037 dev_t dev; 1038 enum vtype type; 1039 struct vnode **vpp; 1040 { 1041 register struct vnode *vp; 1042 1043 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1044 if (dev != vp->v_rdev || type != vp->v_type) 1045 continue; 1046 *vpp = vp; 1047 return (1); 1048 } 1049 return (0); 1050 } 1051 1052 /* 1053 * Calculate the total number of references to a special device. 1054 */ 1055 int 1056 vcount(vp) 1057 register struct vnode *vp; 1058 { 1059 register struct vnode *vq, *vnext; 1060 int count; 1061 1062 loop: 1063 if ((vp->v_flag & VALIASED) == 0) 1064 return (vp->v_usecount); 1065 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1066 vnext = vq->v_specnext; 1067 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1068 continue; 1069 /* 1070 * Alias, but not in use, so flush it out. 1071 */ 1072 if (vq->v_usecount == 0 && vq != vp) { 1073 vgone(vq); 1074 goto loop; 1075 } 1076 count += vq->v_usecount; 1077 } 1078 return (count); 1079 } 1080 1081 /* 1082 * Print out a description of a vnode. 1083 */ 1084 static char *typename[] = 1085 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1086 1087 void 1088 vprint(label, vp) 1089 char *label; 1090 register struct vnode *vp; 1091 { 1092 char buf[64]; 1093 1094 if (label != NULL) 1095 printf("%s: ", label); 1096 printf("type %s, usecount %d, writecount %d, refcount %d,", 1097 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1098 vp->v_holdcnt); 1099 buf[0] = '\0'; 1100 if (vp->v_flag & VROOT) 1101 strcat(buf, "|VROOT"); 1102 if (vp->v_flag & VTEXT) 1103 strcat(buf, "|VTEXT"); 1104 if (vp->v_flag & VSYSTEM) 1105 strcat(buf, "|VSYSTEM"); 1106 if (vp->v_flag & VXLOCK) 1107 strcat(buf, "|VXLOCK"); 1108 if (vp->v_flag & VXWANT) 1109 strcat(buf, "|VXWANT"); 1110 if (vp->v_flag & VBWAIT) 1111 strcat(buf, "|VBWAIT"); 1112 if (vp->v_flag & VALIASED) 1113 strcat(buf, "|VALIASED"); 1114 if (buf[0] != '\0') 1115 printf(" flags (%s)", &buf[1]); 1116 if (vp->v_data == NULL) { 1117 printf("\n"); 1118 } else { 1119 printf("\n\t"); 1120 VOP_PRINT(vp); 1121 } 1122 } 1123 1124 #ifdef DEBUG 1125 /* 1126 * List all of the locked vnodes in the system. 1127 * Called when debugging the kernel. 1128 */ 1129 void 1130 printlockedvnodes() 1131 { 1132 register struct mount *mp; 1133 register struct vnode *vp; 1134 1135 printf("Locked vnodes\n"); 1136 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 1137 mp = mp->mnt_list.cqe_next) { 1138 for (vp = mp->mnt_vnodelist.lh_first; 1139 vp != NULL; 1140 vp = vp->v_mntvnodes.le_next) { 1141 if (VOP_ISLOCKED(vp)) 1142 vprint((char *)0, vp); 1143 } 1144 } 1145 } 1146 #endif 1147 1148 /* 1149 * Top level filesystem related information gathering. 1150 */ 1151 int 1152 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1153 int *name; 1154 u_int namelen; 1155 void *oldp; 1156 size_t *oldlenp; 1157 void *newp; 1158 size_t newlen; 1159 struct proc *p; 1160 { 1161 struct ctldebug *cdp; 1162 struct vfsconf *vfsp; 1163 1164 /* all sysctl names at this level are at least name and field */ 1165 if (namelen < 2) 1166 return (ENOTDIR); /* overloaded */ 1167 if (name[0] != VFS_GENERIC) { 1168 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1169 if (vfsp->vfc_typenum == name[0]) 1170 break; 1171 if (vfsp == NULL) 1172 return (EOPNOTSUPP); 1173 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1174 oldp, oldlenp, newp, newlen, p)); 1175 } 1176 switch (name[1]) { 1177 case VFS_MAXTYPENUM: 1178 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); 1179 case VFS_CONF: 1180 if (namelen < 3) 1181 return (ENOTDIR); /* overloaded */ 1182 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1183 if (vfsp->vfc_typenum == name[2]) 1184 break; 1185 if (vfsp == NULL) 1186 return (EOPNOTSUPP); 1187 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, 1188 sizeof(struct vfsconf))); 1189 } 1190 return (EOPNOTSUPP); 1191 } 1192 1193 int kinfo_vdebug = 1; 1194 int kinfo_vgetfailed; 1195 #define KINFO_VNODESLOP 10 1196 /* 1197 * Dump vnode list (via sysctl). 1198 * Copyout address of vnode followed by vnode. 1199 */ 1200 /* ARGSUSED */ 1201 int 1202 sysctl_vnode(where, sizep) 1203 char *where; 1204 size_t *sizep; 1205 { 1206 register struct mount *mp, *nmp; 1207 struct vnode *vp; 1208 register char *bp = where, *savebp; 1209 char *ewhere; 1210 int error; 1211 1212 #define VPTRSZ sizeof (struct vnode *) 1213 #define VNODESZ sizeof (struct vnode) 1214 if (where == NULL) { 1215 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1216 return (0); 1217 } 1218 ewhere = where + *sizep; 1219 1220 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1221 nmp = mp->mnt_list.cqe_next; 1222 if (vfs_busy(mp)) 1223 continue; 1224 savebp = bp; 1225 again: 1226 for (vp = mp->mnt_vnodelist.lh_first; 1227 vp != NULL; 1228 vp = vp->v_mntvnodes.le_next) { 1229 /* 1230 * Check that the vp is still associated with 1231 * this filesystem. RACE: could have been 1232 * recycled onto the same filesystem. 1233 */ 1234 if (vp->v_mount != mp) { 1235 if (kinfo_vdebug) 1236 printf("kinfo: vp changed\n"); 1237 bp = savebp; 1238 goto again; 1239 } 1240 if (bp + VPTRSZ + VNODESZ > ewhere) { 1241 *sizep = bp - where; 1242 return (ENOMEM); 1243 } 1244 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1245 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1246 return (error); 1247 bp += VPTRSZ + VNODESZ; 1248 } 1249 vfs_unbusy(mp); 1250 } 1251 1252 *sizep = bp - where; 1253 return (0); 1254 } 1255 1256 /* 1257 * Check to see if a filesystem is mounted on a block device. 1258 */ 1259 int 1260 vfs_mountedon(vp) 1261 register struct vnode *vp; 1262 { 1263 register struct vnode *vq; 1264 1265 if (vp->v_specflags & SI_MOUNTEDON) 1266 return (EBUSY); 1267 if (vp->v_flag & VALIASED) { 1268 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1269 if (vq->v_rdev != vp->v_rdev || 1270 vq->v_type != vp->v_type) 1271 continue; 1272 if (vq->v_specflags & SI_MOUNTEDON) 1273 return (EBUSY); 1274 } 1275 } 1276 return (0); 1277 } 1278 1279 /* 1280 * Unmount all filesystems. The list is traversed in reverse order 1281 * of mounting to avoid dependencies. 1282 */ 1283 void 1284 vfs_unmountall() 1285 { 1286 struct mount *mp, *nmp; 1287 1288 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1289 nmp = mp->mnt_list.cqe_prev; 1290 (void) dounmount(mp, MNT_FORCE, &proc0); 1291 } 1292 } 1293 1294 /* 1295 * Build hash lists of net addresses and hang them off the mount point. 1296 * Called by ufs_mount() to set up the lists of export addresses. 1297 */ 1298 static int 1299 vfs_hang_addrlist(mp, nep, argp) 1300 struct mount *mp; 1301 struct netexport *nep; 1302 struct export_args *argp; 1303 { 1304 register struct netcred *np; 1305 register struct radix_node_head *rnh; 1306 register int i; 1307 struct radix_node *rn; 1308 struct sockaddr *saddr, *smask = 0; 1309 struct domain *dom; 1310 int error; 1311 1312 if (argp->ex_addrlen == 0) { 1313 if (mp->mnt_flag & MNT_DEFEXPORTED) 1314 return (EPERM); 1315 np = &nep->ne_defexported; 1316 np->netc_exflags = argp->ex_flags; 1317 np->netc_anon = argp->ex_anon; 1318 np->netc_anon.cr_ref = 1; 1319 mp->mnt_flag |= MNT_DEFEXPORTED; 1320 return (0); 1321 } 1322 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1323 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1324 bzero((caddr_t)np, i); 1325 saddr = (struct sockaddr *)(np + 1); 1326 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 1327 goto out; 1328 if (saddr->sa_len > argp->ex_addrlen) 1329 saddr->sa_len = argp->ex_addrlen; 1330 if (argp->ex_masklen) { 1331 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1332 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1333 if (error) 1334 goto out; 1335 if (smask->sa_len > argp->ex_masklen) 1336 smask->sa_len = argp->ex_masklen; 1337 } 1338 i = saddr->sa_family; 1339 if ((rnh = nep->ne_rtable[i]) == 0) { 1340 /* 1341 * Seems silly to initialize every AF when most are not 1342 * used, do so on demand here 1343 */ 1344 for (dom = domains; dom; dom = dom->dom_next) 1345 if (dom->dom_family == i && dom->dom_rtattach) { 1346 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1347 dom->dom_rtoffset); 1348 break; 1349 } 1350 if ((rnh = nep->ne_rtable[i]) == 0) { 1351 error = ENOBUFS; 1352 goto out; 1353 } 1354 } 1355 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1356 np->netc_rnodes); 1357 if (rn == 0) { 1358 /* 1359 * One of the reasons that rnh_addaddr may fail is that 1360 * the entry already exists. To check for this case, we 1361 * look up the entry to see if it is there. If so, we 1362 * do not need to make a new entry but do return success. 1363 */ 1364 free(np, M_NETADDR); 1365 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); 1366 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 && 1367 ((struct netcred *)rn)->netc_exflags == argp->ex_flags && 1368 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon, 1369 (caddr_t)&argp->ex_anon, sizeof(struct ucred))) 1370 return (0); 1371 return (EPERM); 1372 } 1373 np->netc_exflags = argp->ex_flags; 1374 np->netc_anon = argp->ex_anon; 1375 np->netc_anon.cr_ref = 1; 1376 return (0); 1377 out: 1378 free(np, M_NETADDR); 1379 return (error); 1380 } 1381 1382 /* ARGSUSED */ 1383 static int 1384 vfs_free_netcred(rn, w) 1385 struct radix_node *rn; 1386 caddr_t w; 1387 { 1388 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1389 1390 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1391 free((caddr_t)rn, M_NETADDR); 1392 return (0); 1393 } 1394 1395 /* 1396 * Free the net address hash lists that are hanging off the mount points. 1397 */ 1398 static void 1399 vfs_free_addrlist(nep) 1400 struct netexport *nep; 1401 { 1402 register int i; 1403 register struct radix_node_head *rnh; 1404 1405 for (i = 0; i <= AF_MAX; i++) 1406 if (rnh = nep->ne_rtable[i]) { 1407 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1408 (caddr_t)rnh); 1409 free((caddr_t)rnh, M_RTABLE); 1410 nep->ne_rtable[i] = 0; 1411 } 1412 } 1413 1414 int 1415 vfs_export(mp, nep, argp) 1416 struct mount *mp; 1417 struct netexport *nep; 1418 struct export_args *argp; 1419 { 1420 int error; 1421 1422 if (argp->ex_flags & MNT_DELEXPORT) { 1423 vfs_free_addrlist(nep); 1424 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1425 } 1426 if (argp->ex_flags & MNT_EXPORTED) { 1427 if (error = vfs_hang_addrlist(mp, nep, argp)) 1428 return (error); 1429 mp->mnt_flag |= MNT_EXPORTED; 1430 } 1431 return (0); 1432 } 1433 1434 struct netcred * 1435 vfs_export_lookup(mp, nep, nam) 1436 register struct mount *mp; 1437 struct netexport *nep; 1438 struct mbuf *nam; 1439 { 1440 register struct netcred *np; 1441 register struct radix_node_head *rnh; 1442 struct sockaddr *saddr; 1443 1444 np = NULL; 1445 if (mp->mnt_flag & MNT_EXPORTED) { 1446 /* 1447 * Lookup in the export list first. 1448 */ 1449 if (nam != NULL) { 1450 saddr = mtod(nam, struct sockaddr *); 1451 rnh = nep->ne_rtable[saddr->sa_family]; 1452 if (rnh != NULL) { 1453 np = (struct netcred *) 1454 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1455 rnh); 1456 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1457 np = NULL; 1458 } 1459 } 1460 /* 1461 * If no address match, use the default if it exists. 1462 */ 1463 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1464 np = &nep->ne_defexported; 1465 } 1466 return (np); 1467 } 1468