1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * %sccs.include.redist.c% 11 * 12 * @(#)vfs_subr.c 8.20 (Berkeley) 05/01/95 13 */ 14 15 /* 16 * External virtual filesystem routines 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/proc.h> 22 #include <sys/mount.h> 23 #include <sys/time.h> 24 #include <sys/vnode.h> 25 #include <sys/stat.h> 26 #include <sys/namei.h> 27 #include <sys/ucred.h> 28 #include <sys/buf.h> 29 #include <sys/errno.h> 30 #include <sys/malloc.h> 31 #include <sys/domain.h> 32 #include <sys/mbuf.h> 33 34 #include <vm/vm.h> 35 #include <sys/sysctl.h> 36 37 #include <miscfs/specfs/specdev.h> 38 39 enum vtype iftovt_tab[16] = { 40 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 41 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 42 }; 43 int vttoif_tab[9] = { 44 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 45 S_IFSOCK, S_IFIFO, S_IFMT, 46 }; 47 48 /* 49 * Insq/Remq for the vnode usage lists. 50 */ 51 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 52 #define bufremvn(bp) { \ 53 LIST_REMOVE(bp, b_vnbufs); \ 54 (bp)->b_vnbufs.le_next = NOLIST; \ 55 } 56 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 57 struct mntlist mountlist; /* mounted filesystem list */ 58 59 /* 60 * Initialize the vnode management data structures. 61 */ 62 void 63 vntblinit() 64 { 65 66 TAILQ_INIT(&vnode_free_list); 67 TAILQ_INIT(&mountlist); 68 } 69 70 /* 71 * Lock a filesystem. 72 * Used to prevent access to it while mounting and unmounting. 73 */ 74 int 75 vfs_lock(mp) 76 register struct mount *mp; 77 { 78 79 while (mp->mnt_flag & MNT_MLOCK) { 80 mp->mnt_flag |= MNT_MWAIT; 81 tsleep((caddr_t)mp, PVFS, "vfslock", 0); 82 } 83 mp->mnt_flag |= MNT_MLOCK; 84 return (0); 85 } 86 87 /* 88 * Unlock a locked filesystem. 89 * Panic if filesystem is not locked. 90 */ 91 void 92 vfs_unlock(mp) 93 register struct mount *mp; 94 { 95 96 if ((mp->mnt_flag & MNT_MLOCK) == 0) 97 panic("vfs_unlock: not locked"); 98 mp->mnt_flag &= ~MNT_MLOCK; 99 if (mp->mnt_flag & MNT_MWAIT) { 100 mp->mnt_flag &= ~MNT_MWAIT; 101 wakeup((caddr_t)mp); 102 } 103 } 104 105 /* 106 * Mark a mount point as busy. 107 * Used to synchronize access and to delay unmounting. 108 */ 109 int 110 vfs_busy(mp) 111 register struct mount *mp; 112 { 113 114 while (mp->mnt_flag & MNT_MPBUSY) { 115 mp->mnt_flag |= MNT_MPWANT; 116 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); 117 } 118 if (mp->mnt_flag & MNT_UNMOUNT) 119 return (1); 120 mp->mnt_flag |= MNT_MPBUSY; 121 return (0); 122 } 123 124 /* 125 * Free a busy filesystem. 126 * Panic if filesystem is not busy. 127 */ 128 void 129 vfs_unbusy(mp) 130 register struct mount *mp; 131 { 132 133 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 134 panic("vfs_unbusy: not busy"); 135 mp->mnt_flag &= ~MNT_MPBUSY; 136 if (mp->mnt_flag & MNT_MPWANT) { 137 mp->mnt_flag &= ~MNT_MPWANT; 138 wakeup((caddr_t)&mp->mnt_flag); 139 } 140 } 141 142 /* 143 * Lookup a mount point by filesystem identifier. 144 */ 145 struct mount * 146 vfs_getvfs(fsid) 147 fsid_t *fsid; 148 { 149 register struct mount *mp; 150 151 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 152 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 153 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 154 return (mp); 155 } 156 return ((struct mount *)0); 157 } 158 159 /* 160 * Get a new unique fsid 161 */ 162 void 163 vfs_getnewfsid(mp) 164 struct mount *mp; 165 { 166 static u_short xxxfs_mntid; 167 168 fsid_t tfsid; 169 int mtype; 170 171 mtype = mp->mnt_vfc->vfc_typenum; 172 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 173 mp->mnt_stat.f_fsid.val[1] = mtype; 174 if (xxxfs_mntid == 0) 175 ++xxxfs_mntid; 176 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 177 tfsid.val[1] = mtype; 178 if (mountlist.tqh_first != NULL) { 179 while (vfs_getvfs(&tfsid)) { 180 tfsid.val[0]++; 181 xxxfs_mntid++; 182 } 183 } 184 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 185 } 186 187 /* 188 * Set vnode attributes to VNOVAL 189 */ 190 void 191 vattr_null(vap) 192 register struct vattr *vap; 193 { 194 195 vap->va_type = VNON; 196 vap->va_size = vap->va_bytes = VNOVAL; 197 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 198 vap->va_fsid = vap->va_fileid = 199 vap->va_blocksize = vap->va_rdev = 200 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 201 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 202 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 203 vap->va_flags = vap->va_gen = VNOVAL; 204 vap->va_vaflags = 0; 205 } 206 207 /* 208 * Routines having to do with the management of the vnode table. 209 */ 210 extern int (**dead_vnodeop_p)(); 211 extern void vclean(); 212 long numvnodes; 213 extern struct vattr va_null; 214 215 /* 216 * Return the next vnode from the free list. 217 */ 218 int 219 getnewvnode(tag, mp, vops, vpp) 220 enum vtagtype tag; 221 struct mount *mp; 222 int (**vops)(); 223 struct vnode **vpp; 224 { 225 register struct vnode *vp; 226 int s; 227 228 if ((vnode_free_list.tqh_first == NULL && 229 numvnodes < 2 * desiredvnodes) || 230 numvnodes < desiredvnodes) { 231 vp = (struct vnode *)malloc((u_long)sizeof *vp, 232 M_VNODE, M_WAITOK); 233 bzero((char *)vp, sizeof *vp); 234 numvnodes++; 235 } else { 236 if ((vp = vnode_free_list.tqh_first) == NULL) { 237 tablefull("vnode"); 238 *vpp = 0; 239 return (ENFILE); 240 } 241 if (vp->v_usecount) 242 panic("free vnode isn't"); 243 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 244 /* see comment on why 0xdeadb is set at end of vgone (below) */ 245 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 246 vp->v_lease = NULL; 247 if (vp->v_type != VBAD) 248 VOP_REVOKE(vp, 0); 249 #ifdef DIAGNOSTIC 250 if (vp->v_data) 251 panic("cleaned vnode isn't"); 252 s = splbio(); 253 if (vp->v_numoutput) 254 panic("Clean vnode has pending I/O's"); 255 splx(s); 256 #endif 257 vp->v_flag = 0; 258 vp->v_lastr = 0; 259 vp->v_ralen = 0; 260 vp->v_maxra = 0; 261 vp->v_lastw = 0; 262 vp->v_lasta = 0; 263 vp->v_cstart = 0; 264 vp->v_clen = 0; 265 vp->v_socket = 0; 266 } 267 vp->v_type = VNON; 268 cache_purge(vp); 269 vp->v_tag = tag; 270 vp->v_op = vops; 271 insmntque(vp, mp); 272 *vpp = vp; 273 vp->v_usecount = 1; 274 vp->v_data = 0; 275 return (0); 276 } 277 278 /* 279 * Move a vnode from one mount queue to another. 280 */ 281 void 282 insmntque(vp, mp) 283 register struct vnode *vp; 284 register struct mount *mp; 285 { 286 287 /* 288 * Delete from old mount point vnode list, if on one. 289 */ 290 if (vp->v_mount != NULL) 291 LIST_REMOVE(vp, v_mntvnodes); 292 /* 293 * Insert into list of vnodes for the new mount point, if available. 294 */ 295 if ((vp->v_mount = mp) == NULL) 296 return; 297 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 298 } 299 300 /* 301 * Update outstanding I/O count and do wakeup if requested. 302 */ 303 void 304 vwakeup(bp) 305 register struct buf *bp; 306 { 307 register struct vnode *vp; 308 309 bp->b_flags &= ~B_WRITEINPROG; 310 if (vp = bp->b_vp) { 311 if (--vp->v_numoutput < 0) 312 panic("vwakeup: neg numoutput"); 313 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 314 if (vp->v_numoutput < 0) 315 panic("vwakeup: neg numoutput 2"); 316 vp->v_flag &= ~VBWAIT; 317 wakeup((caddr_t)&vp->v_numoutput); 318 } 319 } 320 } 321 322 /* 323 * Flush out and invalidate all buffers associated with a vnode. 324 * Called with the underlying object locked. 325 */ 326 int 327 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 328 register struct vnode *vp; 329 int flags; 330 struct ucred *cred; 331 struct proc *p; 332 int slpflag, slptimeo; 333 { 334 register struct buf *bp; 335 struct buf *nbp, *blist; 336 int s, error; 337 338 if (flags & V_SAVE) { 339 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 340 return (error); 341 if (vp->v_dirtyblkhd.lh_first != NULL) 342 panic("vinvalbuf: dirty bufs"); 343 } 344 for (;;) { 345 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 346 while (blist && blist->b_lblkno < 0) 347 blist = blist->b_vnbufs.le_next; 348 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 349 (flags & V_SAVEMETA)) 350 while (blist && blist->b_lblkno < 0) 351 blist = blist->b_vnbufs.le_next; 352 if (!blist) 353 break; 354 355 for (bp = blist; bp; bp = nbp) { 356 nbp = bp->b_vnbufs.le_next; 357 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 358 continue; 359 s = splbio(); 360 if (bp->b_flags & B_BUSY) { 361 bp->b_flags |= B_WANTED; 362 error = tsleep((caddr_t)bp, 363 slpflag | (PRIBIO + 1), "vinvalbuf", 364 slptimeo); 365 splx(s); 366 if (error) 367 return (error); 368 break; 369 } 370 bremfree(bp); 371 bp->b_flags |= B_BUSY; 372 splx(s); 373 /* 374 * XXX Since there are no node locks for NFS, I believe 375 * there is a slight chance that a delayed write will 376 * occur while sleeping just above, so check for it. 377 */ 378 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 379 (void) VOP_BWRITE(bp); 380 break; 381 } 382 bp->b_flags |= B_INVAL; 383 brelse(bp); 384 } 385 } 386 if (!(flags & V_SAVEMETA) && 387 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 388 panic("vinvalbuf: flush failed"); 389 return (0); 390 } 391 392 /* 393 * Associate a buffer with a vnode. 394 */ 395 void 396 bgetvp(vp, bp) 397 register struct vnode *vp; 398 register struct buf *bp; 399 { 400 401 if (bp->b_vp) 402 panic("bgetvp: not free"); 403 VHOLD(vp); 404 bp->b_vp = vp; 405 if (vp->v_type == VBLK || vp->v_type == VCHR) 406 bp->b_dev = vp->v_rdev; 407 else 408 bp->b_dev = NODEV; 409 /* 410 * Insert onto list for new vnode. 411 */ 412 bufinsvn(bp, &vp->v_cleanblkhd); 413 } 414 415 /* 416 * Disassociate a buffer from a vnode. 417 */ 418 void 419 brelvp(bp) 420 register struct buf *bp; 421 { 422 struct vnode *vp; 423 424 if (bp->b_vp == (struct vnode *) 0) 425 panic("brelvp: NULL"); 426 /* 427 * Delete from old vnode list, if on one. 428 */ 429 if (bp->b_vnbufs.le_next != NOLIST) 430 bufremvn(bp); 431 vp = bp->b_vp; 432 bp->b_vp = (struct vnode *) 0; 433 HOLDRELE(vp); 434 } 435 436 /* 437 * Reassign a buffer from one vnode to another. 438 * Used to assign file specific control information 439 * (indirect blocks) to the vnode to which they belong. 440 */ 441 void 442 reassignbuf(bp, newvp) 443 register struct buf *bp; 444 register struct vnode *newvp; 445 { 446 register struct buflists *listheadp; 447 448 if (newvp == NULL) { 449 printf("reassignbuf: NULL"); 450 return; 451 } 452 /* 453 * Delete from old vnode list, if on one. 454 */ 455 if (bp->b_vnbufs.le_next != NOLIST) 456 bufremvn(bp); 457 /* 458 * If dirty, put on list of dirty buffers; 459 * otherwise insert onto list of clean buffers. 460 */ 461 if (bp->b_flags & B_DELWRI) 462 listheadp = &newvp->v_dirtyblkhd; 463 else 464 listheadp = &newvp->v_cleanblkhd; 465 bufinsvn(bp, listheadp); 466 } 467 468 /* 469 * Create a vnode for a block device. 470 * Used for root filesystem, argdev, and swap areas. 471 * Also used for memory file system special devices. 472 */ 473 int 474 bdevvp(dev, vpp) 475 dev_t dev; 476 struct vnode **vpp; 477 { 478 register struct vnode *vp; 479 struct vnode *nvp; 480 int error; 481 482 if (dev == NODEV) 483 return (0); 484 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 485 if (error) { 486 *vpp = NULLVP; 487 return (error); 488 } 489 vp = nvp; 490 vp->v_type = VBLK; 491 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 492 vput(vp); 493 vp = nvp; 494 } 495 *vpp = vp; 496 return (0); 497 } 498 499 /* 500 * Check to see if the new vnode represents a special device 501 * for which we already have a vnode (either because of 502 * bdevvp() or because of a different vnode representing 503 * the same block device). If such an alias exists, deallocate 504 * the existing contents and return the aliased vnode. The 505 * caller is responsible for filling it with its new contents. 506 */ 507 struct vnode * 508 checkalias(nvp, nvp_rdev, mp) 509 register struct vnode *nvp; 510 dev_t nvp_rdev; 511 struct mount *mp; 512 { 513 register struct vnode *vp; 514 struct vnode **vpp; 515 516 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 517 return (NULLVP); 518 519 vpp = &speclisth[SPECHASH(nvp_rdev)]; 520 loop: 521 for (vp = *vpp; vp; vp = vp->v_specnext) { 522 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 523 continue; 524 /* 525 * Alias, but not in use, so flush it out. 526 */ 527 if (vp->v_usecount == 0) { 528 vgone(vp); 529 goto loop; 530 } 531 if (vget(vp, 1)) 532 goto loop; 533 break; 534 } 535 if (vp == NULL || vp->v_tag != VT_NON) { 536 MALLOC(nvp->v_specinfo, struct specinfo *, 537 sizeof(struct specinfo), M_VNODE, M_WAITOK); 538 nvp->v_rdev = nvp_rdev; 539 nvp->v_hashchain = vpp; 540 nvp->v_specnext = *vpp; 541 nvp->v_specflags = 0; 542 *vpp = nvp; 543 if (vp != NULL) { 544 nvp->v_flag |= VALIASED; 545 vp->v_flag |= VALIASED; 546 vput(vp); 547 } 548 return (NULLVP); 549 } 550 VOP_UNLOCK(vp); 551 vclean(vp, 0); 552 vp->v_op = nvp->v_op; 553 vp->v_tag = nvp->v_tag; 554 nvp->v_type = VNON; 555 insmntque(vp, mp); 556 return (vp); 557 } 558 559 /* 560 * Grab a particular vnode from the free list, increment its 561 * reference count and lock it. The vnode lock bit is set the 562 * vnode is being eliminated in vgone. The process is awakened 563 * when the transition is completed, and an error returned to 564 * indicate that the vnode is no longer usable (possibly having 565 * been changed to a new file system type). 566 */ 567 int 568 vget(vp, lockflag) 569 register struct vnode *vp; 570 int lockflag; 571 { 572 573 /* 574 * If the vnode is in the process of being cleaned out for 575 * another use, we wait for the cleaning to finish and then 576 * return failure. Cleaning is determined either by checking 577 * that the VXLOCK flag is set, or that the use count is 578 * zero with the back pointer set to show that it has been 579 * removed from the free list by getnewvnode. The VXLOCK 580 * flag may not have been set yet because vclean is blocked in 581 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 582 */ 583 if ((vp->v_flag & VXLOCK) || 584 (vp->v_usecount == 0 && 585 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 586 vp->v_flag |= VXWANT; 587 tsleep((caddr_t)vp, PINOD, "vget", 0); 588 return (1); 589 } 590 if (vp->v_usecount == 0) 591 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 592 vp->v_usecount++; 593 if (lockflag) 594 VOP_LOCK(vp); 595 return (0); 596 } 597 598 /* 599 * Vnode reference, just increment the count 600 */ 601 void 602 vref(vp) 603 struct vnode *vp; 604 { 605 606 if (vp->v_usecount <= 0) 607 panic("vref used where vget required"); 608 vp->v_usecount++; 609 } 610 611 /* 612 * vput(), just unlock and vrele() 613 */ 614 void 615 vput(vp) 616 register struct vnode *vp; 617 { 618 619 VOP_UNLOCK(vp); 620 vrele(vp); 621 } 622 623 /* 624 * Vnode release. 625 * If count drops to zero, call inactive routine and return to freelist. 626 */ 627 void 628 vrele(vp) 629 register struct vnode *vp; 630 { 631 632 #ifdef DIAGNOSTIC 633 if (vp == NULL) 634 panic("vrele: null vp"); 635 #endif 636 vp->v_usecount--; 637 if (vp->v_usecount > 0) 638 return; 639 #ifdef DIAGNOSTIC 640 if (vp->v_usecount != 0 || vp->v_writecount != 0) { 641 vprint("vrele: bad ref count", vp); 642 panic("vrele: ref cnt"); 643 } 644 #endif 645 /* 646 * insert at tail of LRU list 647 */ 648 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 649 VOP_INACTIVE(vp); 650 } 651 652 /* 653 * Page or buffer structure gets a reference. 654 */ 655 void 656 vhold(vp) 657 register struct vnode *vp; 658 { 659 660 vp->v_holdcnt++; 661 } 662 663 /* 664 * Page or buffer structure frees a reference. 665 */ 666 void 667 holdrele(vp) 668 register struct vnode *vp; 669 { 670 671 if (vp->v_holdcnt <= 0) 672 panic("holdrele: holdcnt"); 673 vp->v_holdcnt--; 674 } 675 676 /* 677 * Remove any vnodes in the vnode table belonging to mount point mp. 678 * 679 * If MNT_NOFORCE is specified, there should not be any active ones, 680 * return error if any are found (nb: this is a user error, not a 681 * system error). If MNT_FORCE is specified, detach any active vnodes 682 * that are found. 683 */ 684 #ifdef DIAGNOSTIC 685 int busyprt = 0; /* print out busy vnodes */ 686 struct ctldebug debug1 = { "busyprt", &busyprt }; 687 #endif 688 689 int 690 vflush(mp, skipvp, flags) 691 struct mount *mp; 692 struct vnode *skipvp; 693 int flags; 694 { 695 register struct vnode *vp, *nvp; 696 int busy = 0; 697 698 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 699 panic("vflush: not busy"); 700 loop: 701 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 702 if (vp->v_mount != mp) 703 goto loop; 704 nvp = vp->v_mntvnodes.le_next; 705 /* 706 * Skip over a selected vnode. 707 */ 708 if (vp == skipvp) 709 continue; 710 /* 711 * Skip over a vnodes marked VSYSTEM. 712 */ 713 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 714 continue; 715 /* 716 * If WRITECLOSE is set, only flush out regular file 717 * vnodes open for writing. 718 */ 719 if ((flags & WRITECLOSE) && 720 (vp->v_writecount == 0 || vp->v_type != VREG)) 721 continue; 722 /* 723 * With v_usecount == 0, all we need to do is clear 724 * out the vnode data structures and we are done. 725 */ 726 if (vp->v_usecount == 0) { 727 VOP_REVOKE(vp, 0); 728 continue; 729 } 730 /* 731 * If FORCECLOSE is set, forcibly close the vnode. 732 * For block or character devices, revert to an 733 * anonymous device. For all other files, just kill them. 734 */ 735 if (flags & FORCECLOSE) { 736 if (vp->v_type != VBLK && vp->v_type != VCHR) { 737 VOP_REVOKE(vp, 0); 738 } else { 739 vclean(vp, 0); 740 vp->v_op = spec_vnodeop_p; 741 insmntque(vp, (struct mount *)0); 742 } 743 continue; 744 } 745 #ifdef DIAGNOSTIC 746 if (busyprt) 747 vprint("vflush: busy vnode", vp); 748 #endif 749 busy++; 750 } 751 if (busy) 752 return (EBUSY); 753 return (0); 754 } 755 756 /* 757 * Disassociate the underlying file system from a vnode. 758 */ 759 void 760 vclean(vp, flags) 761 register struct vnode *vp; 762 int flags; 763 { 764 int active; 765 766 /* 767 * Check to see if the vnode is in use. 768 * If so we have to reference it before we clean it out 769 * so that its count cannot fall to zero and generate a 770 * race against ourselves to recycle it. 771 */ 772 if (active = vp->v_usecount) 773 VREF(vp); 774 /* 775 * Even if the count is zero, the VOP_INACTIVE routine may still 776 * have the object locked while it cleans it out. The VOP_LOCK 777 * ensures that the VOP_INACTIVE routine is done with its work. 778 * For active vnodes, it ensures that no other activity can 779 * occur while the underlying object is being cleaned out. 780 */ 781 VOP_LOCK(vp); 782 /* 783 * Prevent the vnode from being recycled or 784 * brought into use while we clean it out. 785 */ 786 if (vp->v_flag & VXLOCK) 787 panic("vclean: deadlock"); 788 vp->v_flag |= VXLOCK; 789 /* 790 * Clean out any buffers associated with the vnode. 791 */ 792 if (flags & DOCLOSE) 793 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 794 /* 795 * Any other processes trying to obtain this lock must first 796 * wait for VXLOCK to clear, then call the new lock operation. 797 */ 798 VOP_UNLOCK(vp); 799 /* 800 * If purging an active vnode, it must be closed and 801 * deactivated before being reclaimed. 802 */ 803 if (active) { 804 if (flags & DOCLOSE) 805 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); 806 VOP_INACTIVE(vp); 807 } 808 /* 809 * Reclaim the vnode. 810 */ 811 if (VOP_RECLAIM(vp)) 812 panic("vclean: cannot reclaim"); 813 if (active) 814 vrele(vp); 815 cache_purge(vp); 816 817 /* 818 * Done with purge, notify sleepers of the grim news. 819 */ 820 vp->v_op = dead_vnodeop_p; 821 vp->v_tag = VT_NON; 822 vp->v_flag &= ~VXLOCK; 823 if (vp->v_flag & VXWANT) { 824 vp->v_flag &= ~VXWANT; 825 wakeup((caddr_t)vp); 826 } 827 } 828 829 /* 830 * Eliminate all activity associated with the requested vnode 831 * and with all vnodes aliased to the requested vnode. 832 */ 833 int 834 vop_revoke(ap) 835 struct vop_revoke_args /* { 836 struct vnode *a_vp; 837 int a_flags; 838 } */ *ap; 839 { 840 register struct vnode *vp, *vq; 841 842 vp = ap->a_vp; 843 if ((ap->a_flags & REVOKEALL) && (vp->v_flag & VALIASED)) { 844 /* 845 * If a vgone (or vclean) is already in progress, 846 * wait until it is done and return. 847 */ 848 if (vp->v_flag & VXLOCK) { 849 vp->v_flag |= VXWANT; 850 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 851 return (0); 852 } 853 /* 854 * Ensure that vp will not be vgone'd while we 855 * are eliminating its aliases. 856 */ 857 vp->v_flag |= VXLOCK; 858 while (vp->v_flag & VALIASED) { 859 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 860 if (vq->v_rdev != vp->v_rdev || 861 vq->v_type != vp->v_type || vp == vq) 862 continue; 863 vgone(vq); 864 break; 865 } 866 } 867 /* 868 * Remove the lock so that vgone below will 869 * really eliminate the vnode after which time 870 * vgone will awaken any sleepers. 871 */ 872 vp->v_flag &= ~VXLOCK; 873 } 874 vgone(vp); 875 return (0); 876 } 877 878 /* 879 * Eliminate all activity associated with a vnode 880 * in preparation for reuse. 881 */ 882 void 883 vgone(vp) 884 register struct vnode *vp; 885 { 886 register struct vnode *vq; 887 struct vnode *vx; 888 889 /* 890 * If a vgone (or vclean) is already in progress, 891 * wait until it is done and return. 892 */ 893 if (vp->v_flag & VXLOCK) { 894 vp->v_flag |= VXWANT; 895 tsleep((caddr_t)vp, PINOD, "vgone", 0); 896 return; 897 } 898 /* 899 * Clean out the filesystem specific data. 900 */ 901 vclean(vp, DOCLOSE); 902 /* 903 * Delete from old mount point vnode list, if on one. 904 */ 905 if (vp->v_mount != NULL) { 906 LIST_REMOVE(vp, v_mntvnodes); 907 vp->v_mount = NULL; 908 } 909 /* 910 * If special device, remove it from special device alias list 911 * if it is on one. 912 */ 913 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 914 if (*vp->v_hashchain == vp) { 915 *vp->v_hashchain = vp->v_specnext; 916 } else { 917 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 918 if (vq->v_specnext != vp) 919 continue; 920 vq->v_specnext = vp->v_specnext; 921 break; 922 } 923 if (vq == NULL) 924 panic("missing bdev"); 925 } 926 if (vp->v_flag & VALIASED) { 927 vx = NULL; 928 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 929 if (vq->v_rdev != vp->v_rdev || 930 vq->v_type != vp->v_type) 931 continue; 932 if (vx) 933 break; 934 vx = vq; 935 } 936 if (vx == NULL) 937 panic("missing alias"); 938 if (vq == NULL) 939 vx->v_flag &= ~VALIASED; 940 vp->v_flag &= ~VALIASED; 941 } 942 FREE(vp->v_specinfo, M_VNODE); 943 vp->v_specinfo = NULL; 944 } 945 /* 946 * If it is on the freelist and not already at the head, 947 * move it to the head of the list. The test of the back 948 * pointer and the reference count of zero is because 949 * it will be removed from the free list by getnewvnode, 950 * but will not have its reference count incremented until 951 * after calling vgone. If the reference count were 952 * incremented first, vgone would (incorrectly) try to 953 * close the previous instance of the underlying object. 954 * So, the back pointer is explicitly set to `0xdeadb' in 955 * getnewvnode after removing it from the freelist to ensure 956 * that we do not try to move it here. 957 */ 958 if (vp->v_usecount == 0 && 959 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 960 vnode_free_list.tqh_first != vp) { 961 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 962 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 963 } 964 vp->v_type = VBAD; 965 } 966 967 /* 968 * Lookup a vnode by device number. 969 */ 970 int 971 vfinddev(dev, type, vpp) 972 dev_t dev; 973 enum vtype type; 974 struct vnode **vpp; 975 { 976 register struct vnode *vp; 977 978 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 979 if (dev != vp->v_rdev || type != vp->v_type) 980 continue; 981 *vpp = vp; 982 return (1); 983 } 984 return (0); 985 } 986 987 /* 988 * Calculate the total number of references to a special device. 989 */ 990 int 991 vcount(vp) 992 register struct vnode *vp; 993 { 994 register struct vnode *vq, *vnext; 995 int count; 996 997 loop: 998 if ((vp->v_flag & VALIASED) == 0) 999 return (vp->v_usecount); 1000 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1001 vnext = vq->v_specnext; 1002 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1003 continue; 1004 /* 1005 * Alias, but not in use, so flush it out. 1006 */ 1007 if (vq->v_usecount == 0 && vq != vp) { 1008 vgone(vq); 1009 goto loop; 1010 } 1011 count += vq->v_usecount; 1012 } 1013 return (count); 1014 } 1015 1016 /* 1017 * Print out a description of a vnode. 1018 */ 1019 static char *typename[] = 1020 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1021 1022 void 1023 vprint(label, vp) 1024 char *label; 1025 register struct vnode *vp; 1026 { 1027 char buf[64]; 1028 1029 if (label != NULL) 1030 printf("%s: ", label); 1031 printf("type %s, usecount %d, writecount %d, refcount %d,", 1032 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1033 vp->v_holdcnt); 1034 buf[0] = '\0'; 1035 if (vp->v_flag & VROOT) 1036 strcat(buf, "|VROOT"); 1037 if (vp->v_flag & VTEXT) 1038 strcat(buf, "|VTEXT"); 1039 if (vp->v_flag & VSYSTEM) 1040 strcat(buf, "|VSYSTEM"); 1041 if (vp->v_flag & VXLOCK) 1042 strcat(buf, "|VXLOCK"); 1043 if (vp->v_flag & VXWANT) 1044 strcat(buf, "|VXWANT"); 1045 if (vp->v_flag & VBWAIT) 1046 strcat(buf, "|VBWAIT"); 1047 if (vp->v_flag & VALIASED) 1048 strcat(buf, "|VALIASED"); 1049 if (buf[0] != '\0') 1050 printf(" flags (%s)", &buf[1]); 1051 if (vp->v_data == NULL) { 1052 printf("\n"); 1053 } else { 1054 printf("\n\t"); 1055 VOP_PRINT(vp); 1056 } 1057 } 1058 1059 #ifdef DEBUG 1060 /* 1061 * List all of the locked vnodes in the system. 1062 * Called when debugging the kernel. 1063 */ 1064 void 1065 printlockedvnodes() 1066 { 1067 register struct mount *mp; 1068 register struct vnode *vp; 1069 1070 printf("Locked vnodes\n"); 1071 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 1072 for (vp = mp->mnt_vnodelist.lh_first; 1073 vp != NULL; 1074 vp = vp->v_mntvnodes.le_next) 1075 if (VOP_ISLOCKED(vp)) 1076 vprint((char *)0, vp); 1077 } 1078 } 1079 #endif 1080 1081 /* 1082 * Top level filesystem related information gathering. 1083 */ 1084 int 1085 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1086 int *name; 1087 u_int namelen; 1088 void *oldp; 1089 size_t *oldlenp; 1090 void *newp; 1091 size_t newlen; 1092 struct proc *p; 1093 { 1094 struct ctldebug *cdp; 1095 struct vfsconf *vfsp; 1096 1097 /* all sysctl names at this level are at least name and field */ 1098 if (namelen < 2) 1099 return (ENOTDIR); /* overloaded */ 1100 if (name[0] != VFS_GENERIC) { 1101 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1102 if (vfsp->vfc_typenum == name[0]) 1103 break; 1104 if (vfsp == NULL) 1105 return (EOPNOTSUPP); 1106 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1107 oldp, oldlenp, newp, newlen, p)); 1108 } 1109 switch (name[1]) { 1110 case VFS_MAXTYPENUM: 1111 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); 1112 case VFS_CONF: 1113 if (namelen < 3) 1114 return (ENOTDIR); /* overloaded */ 1115 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1116 if (vfsp->vfc_typenum == name[2]) 1117 break; 1118 if (vfsp == NULL) 1119 return (EOPNOTSUPP); 1120 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, 1121 sizeof(struct vfsconf))); 1122 } 1123 return (EOPNOTSUPP); 1124 } 1125 1126 int kinfo_vdebug = 1; 1127 int kinfo_vgetfailed; 1128 #define KINFO_VNODESLOP 10 1129 /* 1130 * Dump vnode list (via sysctl). 1131 * Copyout address of vnode followed by vnode. 1132 */ 1133 /* ARGSUSED */ 1134 int 1135 sysctl_vnode(where, sizep) 1136 char *where; 1137 size_t *sizep; 1138 { 1139 register struct mount *mp, *nmp; 1140 struct vnode *vp; 1141 register char *bp = where, *savebp; 1142 char *ewhere; 1143 int error; 1144 1145 #define VPTRSZ sizeof (struct vnode *) 1146 #define VNODESZ sizeof (struct vnode) 1147 if (where == NULL) { 1148 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1149 return (0); 1150 } 1151 ewhere = where + *sizep; 1152 1153 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) { 1154 nmp = mp->mnt_list.tqe_next; 1155 if (vfs_busy(mp)) 1156 continue; 1157 savebp = bp; 1158 again: 1159 for (vp = mp->mnt_vnodelist.lh_first; 1160 vp != NULL; 1161 vp = vp->v_mntvnodes.le_next) { 1162 /* 1163 * Check that the vp is still associated with 1164 * this filesystem. RACE: could have been 1165 * recycled onto the same filesystem. 1166 */ 1167 if (vp->v_mount != mp) { 1168 if (kinfo_vdebug) 1169 printf("kinfo: vp changed\n"); 1170 bp = savebp; 1171 goto again; 1172 } 1173 if (bp + VPTRSZ + VNODESZ > ewhere) { 1174 *sizep = bp - where; 1175 return (ENOMEM); 1176 } 1177 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1178 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1179 return (error); 1180 bp += VPTRSZ + VNODESZ; 1181 } 1182 vfs_unbusy(mp); 1183 } 1184 1185 *sizep = bp - where; 1186 return (0); 1187 } 1188 1189 /* 1190 * Check to see if a filesystem is mounted on a block device. 1191 */ 1192 int 1193 vfs_mountedon(vp) 1194 register struct vnode *vp; 1195 { 1196 register struct vnode *vq; 1197 1198 if (vp->v_specflags & SI_MOUNTEDON) 1199 return (EBUSY); 1200 if (vp->v_flag & VALIASED) { 1201 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1202 if (vq->v_rdev != vp->v_rdev || 1203 vq->v_type != vp->v_type) 1204 continue; 1205 if (vq->v_specflags & SI_MOUNTEDON) 1206 return (EBUSY); 1207 } 1208 } 1209 return (0); 1210 } 1211 1212 /* 1213 * Build hash lists of net addresses and hang them off the mount point. 1214 * Called by ufs_mount() to set up the lists of export addresses. 1215 */ 1216 static int 1217 vfs_hang_addrlist(mp, nep, argp) 1218 struct mount *mp; 1219 struct netexport *nep; 1220 struct export_args *argp; 1221 { 1222 register struct netcred *np; 1223 register struct radix_node_head *rnh; 1224 register int i; 1225 struct radix_node *rn; 1226 struct sockaddr *saddr, *smask = 0; 1227 struct domain *dom; 1228 int error; 1229 1230 if (argp->ex_addrlen == 0) { 1231 if (mp->mnt_flag & MNT_DEFEXPORTED) 1232 return (EPERM); 1233 np = &nep->ne_defexported; 1234 np->netc_exflags = argp->ex_flags; 1235 np->netc_anon = argp->ex_anon; 1236 np->netc_anon.cr_ref = 1; 1237 mp->mnt_flag |= MNT_DEFEXPORTED; 1238 return (0); 1239 } 1240 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1241 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1242 bzero((caddr_t)np, i); 1243 saddr = (struct sockaddr *)(np + 1); 1244 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 1245 goto out; 1246 if (saddr->sa_len > argp->ex_addrlen) 1247 saddr->sa_len = argp->ex_addrlen; 1248 if (argp->ex_masklen) { 1249 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1250 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1251 if (error) 1252 goto out; 1253 if (smask->sa_len > argp->ex_masklen) 1254 smask->sa_len = argp->ex_masklen; 1255 } 1256 i = saddr->sa_family; 1257 if ((rnh = nep->ne_rtable[i]) == 0) { 1258 /* 1259 * Seems silly to initialize every AF when most are not 1260 * used, do so on demand here 1261 */ 1262 for (dom = domains; dom; dom = dom->dom_next) 1263 if (dom->dom_family == i && dom->dom_rtattach) { 1264 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1265 dom->dom_rtoffset); 1266 break; 1267 } 1268 if ((rnh = nep->ne_rtable[i]) == 0) { 1269 error = ENOBUFS; 1270 goto out; 1271 } 1272 } 1273 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1274 np->netc_rnodes); 1275 if (rn == 0) { 1276 /* 1277 * One of the reasons that rnh_addaddr may fail is that 1278 * the entry already exists. To check for this case, we 1279 * look up the entry to see if it is there. If so, we 1280 * do not need to make a new entry but do return success. 1281 */ 1282 free(np, M_NETADDR); 1283 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); 1284 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 && 1285 ((struct netcred *)rn)->netc_exflags == argp->ex_flags && 1286 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon, 1287 (caddr_t)&argp->ex_anon, sizeof(struct ucred))) 1288 return (0); 1289 return (EPERM); 1290 } 1291 np->netc_exflags = argp->ex_flags; 1292 np->netc_anon = argp->ex_anon; 1293 np->netc_anon.cr_ref = 1; 1294 return (0); 1295 out: 1296 free(np, M_NETADDR); 1297 return (error); 1298 } 1299 1300 /* ARGSUSED */ 1301 static int 1302 vfs_free_netcred(rn, w) 1303 struct radix_node *rn; 1304 caddr_t w; 1305 { 1306 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1307 1308 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1309 free((caddr_t)rn, M_NETADDR); 1310 return (0); 1311 } 1312 1313 /* 1314 * Free the net address hash lists that are hanging off the mount points. 1315 */ 1316 static void 1317 vfs_free_addrlist(nep) 1318 struct netexport *nep; 1319 { 1320 register int i; 1321 register struct radix_node_head *rnh; 1322 1323 for (i = 0; i <= AF_MAX; i++) 1324 if (rnh = nep->ne_rtable[i]) { 1325 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1326 (caddr_t)rnh); 1327 free((caddr_t)rnh, M_RTABLE); 1328 nep->ne_rtable[i] = 0; 1329 } 1330 } 1331 1332 int 1333 vfs_export(mp, nep, argp) 1334 struct mount *mp; 1335 struct netexport *nep; 1336 struct export_args *argp; 1337 { 1338 int error; 1339 1340 if (argp->ex_flags & MNT_DELEXPORT) { 1341 vfs_free_addrlist(nep); 1342 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1343 } 1344 if (argp->ex_flags & MNT_EXPORTED) { 1345 if (error = vfs_hang_addrlist(mp, nep, argp)) 1346 return (error); 1347 mp->mnt_flag |= MNT_EXPORTED; 1348 } 1349 return (0); 1350 } 1351 1352 struct netcred * 1353 vfs_export_lookup(mp, nep, nam) 1354 register struct mount *mp; 1355 struct netexport *nep; 1356 struct mbuf *nam; 1357 { 1358 register struct netcred *np; 1359 register struct radix_node_head *rnh; 1360 struct sockaddr *saddr; 1361 1362 np = NULL; 1363 if (mp->mnt_flag & MNT_EXPORTED) { 1364 /* 1365 * Lookup in the export list first. 1366 */ 1367 if (nam != NULL) { 1368 saddr = mtod(nam, struct sockaddr *); 1369 rnh = nep->ne_rtable[saddr->sa_family]; 1370 if (rnh != NULL) { 1371 np = (struct netcred *) 1372 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1373 rnh); 1374 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1375 np = NULL; 1376 } 1377 } 1378 /* 1379 * If no address match, use the default if it exists. 1380 */ 1381 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1382 np = &nep->ne_defexported; 1383 } 1384 return (np); 1385 } 1386