1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * %sccs.include.redist.c% 11 * 12 * @(#)vfs_subr.c 8.16 (Berkeley) 02/23/95 13 */ 14 15 /* 16 * External virtual filesystem routines 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/proc.h> 22 #include <sys/mount.h> 23 #include <sys/time.h> 24 #include <sys/vnode.h> 25 #include <sys/stat.h> 26 #include <sys/namei.h> 27 #include <sys/ucred.h> 28 #include <sys/buf.h> 29 #include <sys/errno.h> 30 #include <sys/malloc.h> 31 #include <sys/domain.h> 32 #include <sys/mbuf.h> 33 34 #include <vm/vm.h> 35 #include <sys/sysctl.h> 36 37 #include <miscfs/specfs/specdev.h> 38 39 enum vtype iftovt_tab[16] = { 40 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 41 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 42 }; 43 int vttoif_tab[9] = { 44 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 45 S_IFSOCK, S_IFIFO, S_IFMT, 46 }; 47 48 /* 49 * Insq/Remq for the vnode usage lists. 50 */ 51 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 52 #define bufremvn(bp) { \ 53 LIST_REMOVE(bp, b_vnbufs); \ 54 (bp)->b_vnbufs.le_next = NOLIST; \ 55 } 56 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 57 struct mntlist mountlist; /* mounted filesystem list */ 58 59 /* 60 * Initialize the vnode management data structures. 61 */ 62 void 63 vntblinit() 64 { 65 66 TAILQ_INIT(&vnode_free_list); 67 TAILQ_INIT(&mountlist); 68 } 69 70 /* 71 * Lock a filesystem. 72 * Used to prevent access to it while mounting and unmounting. 73 */ 74 int 75 vfs_lock(mp) 76 register struct mount *mp; 77 { 78 79 while (mp->mnt_flag & MNT_MLOCK) { 80 mp->mnt_flag |= MNT_MWAIT; 81 tsleep((caddr_t)mp, PVFS, "vfslock", 0); 82 } 83 mp->mnt_flag |= MNT_MLOCK; 84 return (0); 85 } 86 87 /* 88 * Unlock a locked filesystem. 89 * Panic if filesystem is not locked. 90 */ 91 void 92 vfs_unlock(mp) 93 register struct mount *mp; 94 { 95 96 if ((mp->mnt_flag & MNT_MLOCK) == 0) 97 panic("vfs_unlock: not locked"); 98 mp->mnt_flag &= ~MNT_MLOCK; 99 if (mp->mnt_flag & MNT_MWAIT) { 100 mp->mnt_flag &= ~MNT_MWAIT; 101 wakeup((caddr_t)mp); 102 } 103 } 104 105 /* 106 * Mark a mount point as busy. 107 * Used to synchronize access and to delay unmounting. 108 */ 109 int 110 vfs_busy(mp) 111 register struct mount *mp; 112 { 113 114 while (mp->mnt_flag & MNT_MPBUSY) { 115 mp->mnt_flag |= MNT_MPWANT; 116 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); 117 } 118 if (mp->mnt_flag & MNT_UNMOUNT) 119 return (1); 120 mp->mnt_flag |= MNT_MPBUSY; 121 return (0); 122 } 123 124 /* 125 * Free a busy filesystem. 126 * Panic if filesystem is not busy. 127 */ 128 void 129 vfs_unbusy(mp) 130 register struct mount *mp; 131 { 132 133 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 134 panic("vfs_unbusy: not busy"); 135 mp->mnt_flag &= ~MNT_MPBUSY; 136 if (mp->mnt_flag & MNT_MPWANT) { 137 mp->mnt_flag &= ~MNT_MPWANT; 138 wakeup((caddr_t)&mp->mnt_flag); 139 } 140 } 141 142 /* 143 * Lookup a mount point by filesystem identifier. 144 */ 145 struct mount * 146 getvfs(fsid) 147 fsid_t *fsid; 148 { 149 register struct mount *mp; 150 151 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 152 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 153 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 154 return (mp); 155 } 156 return ((struct mount *)0); 157 } 158 159 /* 160 * Get a new unique fsid 161 */ 162 void 163 getnewfsid(mp, mtype) 164 struct mount *mp; 165 int mtype; 166 { 167 static u_short xxxfs_mntid; 168 169 fsid_t tfsid; 170 171 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 172 mp->mnt_stat.f_fsid.val[1] = mtype; 173 if (xxxfs_mntid == 0) 174 ++xxxfs_mntid; 175 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 176 tfsid.val[1] = mtype; 177 if (mountlist.tqh_first != NULL) { 178 while (getvfs(&tfsid)) { 179 tfsid.val[0]++; 180 xxxfs_mntid++; 181 } 182 } 183 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 184 } 185 186 /* 187 * Set vnode attributes to VNOVAL 188 */ 189 void 190 vattr_null(vap) 191 register struct vattr *vap; 192 { 193 194 vap->va_type = VNON; 195 vap->va_size = vap->va_bytes = VNOVAL; 196 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 197 vap->va_fsid = vap->va_fileid = 198 vap->va_blocksize = vap->va_rdev = 199 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 200 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 201 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 202 vap->va_flags = vap->va_gen = VNOVAL; 203 vap->va_vaflags = 0; 204 } 205 206 /* 207 * Routines having to do with the management of the vnode table. 208 */ 209 extern int (**dead_vnodeop_p)(); 210 extern void vclean(); 211 long numvnodes; 212 extern struct vattr va_null; 213 214 /* 215 * Return the next vnode from the free list. 216 */ 217 int 218 getnewvnode(tag, mp, vops, vpp) 219 enum vtagtype tag; 220 struct mount *mp; 221 int (**vops)(); 222 struct vnode **vpp; 223 { 224 register struct vnode *vp; 225 int s; 226 227 if ((vnode_free_list.tqh_first == NULL && 228 numvnodes < 2 * desiredvnodes) || 229 numvnodes < desiredvnodes) { 230 vp = (struct vnode *)malloc((u_long)sizeof *vp, 231 M_VNODE, M_WAITOK); 232 bzero((char *)vp, sizeof *vp); 233 numvnodes++; 234 } else { 235 if ((vp = vnode_free_list.tqh_first) == NULL) { 236 tablefull("vnode"); 237 *vpp = 0; 238 return (ENFILE); 239 } 240 if (vp->v_usecount) 241 panic("free vnode isn't"); 242 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 243 /* see comment on why 0xdeadb is set at end of vgone (below) */ 244 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 245 vp->v_lease = NULL; 246 if (vp->v_type != VBAD) 247 VOP_REVOKE(vp, 0); 248 #ifdef DIAGNOSTIC 249 if (vp->v_data) 250 panic("cleaned vnode isn't"); 251 s = splbio(); 252 if (vp->v_numoutput) 253 panic("Clean vnode has pending I/O's"); 254 splx(s); 255 #endif 256 vp->v_flag = 0; 257 vp->v_lastr = 0; 258 vp->v_ralen = 0; 259 vp->v_maxra = 0; 260 vp->v_lastw = 0; 261 vp->v_lasta = 0; 262 vp->v_cstart = 0; 263 vp->v_clen = 0; 264 vp->v_socket = 0; 265 } 266 vp->v_type = VNON; 267 cache_purge(vp); 268 vp->v_tag = tag; 269 vp->v_op = vops; 270 insmntque(vp, mp); 271 *vpp = vp; 272 vp->v_usecount = 1; 273 vp->v_data = 0; 274 return (0); 275 } 276 277 /* 278 * Move a vnode from one mount queue to another. 279 */ 280 void 281 insmntque(vp, mp) 282 register struct vnode *vp; 283 register struct mount *mp; 284 { 285 286 /* 287 * Delete from old mount point vnode list, if on one. 288 */ 289 if (vp->v_mount != NULL) 290 LIST_REMOVE(vp, v_mntvnodes); 291 /* 292 * Insert into list of vnodes for the new mount point, if available. 293 */ 294 if ((vp->v_mount = mp) == NULL) 295 return; 296 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 297 } 298 299 /* 300 * Update outstanding I/O count and do wakeup if requested. 301 */ 302 void 303 vwakeup(bp) 304 register struct buf *bp; 305 { 306 register struct vnode *vp; 307 308 bp->b_flags &= ~B_WRITEINPROG; 309 if (vp = bp->b_vp) { 310 if (--vp->v_numoutput < 0) 311 panic("vwakeup: neg numoutput"); 312 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 313 if (vp->v_numoutput < 0) 314 panic("vwakeup: neg numoutput 2"); 315 vp->v_flag &= ~VBWAIT; 316 wakeup((caddr_t)&vp->v_numoutput); 317 } 318 } 319 } 320 321 /* 322 * Flush out and invalidate all buffers associated with a vnode. 323 * Called with the underlying object locked. 324 */ 325 int 326 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 327 register struct vnode *vp; 328 int flags; 329 struct ucred *cred; 330 struct proc *p; 331 int slpflag, slptimeo; 332 { 333 register struct buf *bp; 334 struct buf *nbp, *blist; 335 int s, error; 336 337 if (flags & V_SAVE) { 338 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 339 return (error); 340 if (vp->v_dirtyblkhd.lh_first != NULL) 341 panic("vinvalbuf: dirty bufs"); 342 } 343 for (;;) { 344 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 345 while (blist && blist->b_lblkno < 0) 346 blist = blist->b_vnbufs.le_next; 347 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 348 (flags & V_SAVEMETA)) 349 while (blist && blist->b_lblkno < 0) 350 blist = blist->b_vnbufs.le_next; 351 if (!blist) 352 break; 353 354 for (bp = blist; bp; bp = nbp) { 355 nbp = bp->b_vnbufs.le_next; 356 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 357 continue; 358 s = splbio(); 359 if (bp->b_flags & B_BUSY) { 360 bp->b_flags |= B_WANTED; 361 error = tsleep((caddr_t)bp, 362 slpflag | (PRIBIO + 1), "vinvalbuf", 363 slptimeo); 364 splx(s); 365 if (error) 366 return (error); 367 break; 368 } 369 bremfree(bp); 370 bp->b_flags |= B_BUSY; 371 splx(s); 372 /* 373 * XXX Since there are no node locks for NFS, I believe 374 * there is a slight chance that a delayed write will 375 * occur while sleeping just above, so check for it. 376 */ 377 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 378 (void) VOP_BWRITE(bp); 379 break; 380 } 381 bp->b_flags |= B_INVAL; 382 brelse(bp); 383 } 384 } 385 if (!(flags & V_SAVEMETA) && 386 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 387 panic("vinvalbuf: flush failed"); 388 return (0); 389 } 390 391 /* 392 * Associate a buffer with a vnode. 393 */ 394 void 395 bgetvp(vp, bp) 396 register struct vnode *vp; 397 register struct buf *bp; 398 { 399 400 if (bp->b_vp) 401 panic("bgetvp: not free"); 402 VHOLD(vp); 403 bp->b_vp = vp; 404 if (vp->v_type == VBLK || vp->v_type == VCHR) 405 bp->b_dev = vp->v_rdev; 406 else 407 bp->b_dev = NODEV; 408 /* 409 * Insert onto list for new vnode. 410 */ 411 bufinsvn(bp, &vp->v_cleanblkhd); 412 } 413 414 /* 415 * Disassociate a buffer from a vnode. 416 */ 417 void 418 brelvp(bp) 419 register struct buf *bp; 420 { 421 struct vnode *vp; 422 423 if (bp->b_vp == (struct vnode *) 0) 424 panic("brelvp: NULL"); 425 /* 426 * Delete from old vnode list, if on one. 427 */ 428 if (bp->b_vnbufs.le_next != NOLIST) 429 bufremvn(bp); 430 vp = bp->b_vp; 431 bp->b_vp = (struct vnode *) 0; 432 HOLDRELE(vp); 433 } 434 435 /* 436 * Reassign a buffer from one vnode to another. 437 * Used to assign file specific control information 438 * (indirect blocks) to the vnode to which they belong. 439 */ 440 void 441 reassignbuf(bp, newvp) 442 register struct buf *bp; 443 register struct vnode *newvp; 444 { 445 register struct buflists *listheadp; 446 447 if (newvp == NULL) { 448 printf("reassignbuf: NULL"); 449 return; 450 } 451 /* 452 * Delete from old vnode list, if on one. 453 */ 454 if (bp->b_vnbufs.le_next != NOLIST) 455 bufremvn(bp); 456 /* 457 * If dirty, put on list of dirty buffers; 458 * otherwise insert onto list of clean buffers. 459 */ 460 if (bp->b_flags & B_DELWRI) 461 listheadp = &newvp->v_dirtyblkhd; 462 else 463 listheadp = &newvp->v_cleanblkhd; 464 bufinsvn(bp, listheadp); 465 } 466 467 /* 468 * Create a vnode for a block device. 469 * Used for root filesystem, argdev, and swap areas. 470 * Also used for memory file system special devices. 471 */ 472 int 473 bdevvp(dev, vpp) 474 dev_t dev; 475 struct vnode **vpp; 476 { 477 register struct vnode *vp; 478 struct vnode *nvp; 479 int error; 480 481 if (dev == NODEV) 482 return (0); 483 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 484 if (error) { 485 *vpp = NULLVP; 486 return (error); 487 } 488 vp = nvp; 489 vp->v_type = VBLK; 490 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 491 vput(vp); 492 vp = nvp; 493 } 494 *vpp = vp; 495 return (0); 496 } 497 498 /* 499 * Check to see if the new vnode represents a special device 500 * for which we already have a vnode (either because of 501 * bdevvp() or because of a different vnode representing 502 * the same block device). If such an alias exists, deallocate 503 * the existing contents and return the aliased vnode. The 504 * caller is responsible for filling it with its new contents. 505 */ 506 struct vnode * 507 checkalias(nvp, nvp_rdev, mp) 508 register struct vnode *nvp; 509 dev_t nvp_rdev; 510 struct mount *mp; 511 { 512 register struct vnode *vp; 513 struct vnode **vpp; 514 515 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 516 return (NULLVP); 517 518 vpp = &speclisth[SPECHASH(nvp_rdev)]; 519 loop: 520 for (vp = *vpp; vp; vp = vp->v_specnext) { 521 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 522 continue; 523 /* 524 * Alias, but not in use, so flush it out. 525 */ 526 if (vp->v_usecount == 0) { 527 vgone(vp); 528 goto loop; 529 } 530 if (vget(vp, 1)) 531 goto loop; 532 break; 533 } 534 if (vp == NULL || vp->v_tag != VT_NON) { 535 MALLOC(nvp->v_specinfo, struct specinfo *, 536 sizeof(struct specinfo), M_VNODE, M_WAITOK); 537 nvp->v_rdev = nvp_rdev; 538 nvp->v_hashchain = vpp; 539 nvp->v_specnext = *vpp; 540 nvp->v_specflags = 0; 541 *vpp = nvp; 542 if (vp != NULL) { 543 nvp->v_flag |= VALIASED; 544 vp->v_flag |= VALIASED; 545 vput(vp); 546 } 547 return (NULLVP); 548 } 549 VOP_UNLOCK(vp); 550 vclean(vp, 0); 551 vp->v_op = nvp->v_op; 552 vp->v_tag = nvp->v_tag; 553 nvp->v_type = VNON; 554 insmntque(vp, mp); 555 return (vp); 556 } 557 558 /* 559 * Grab a particular vnode from the free list, increment its 560 * reference count and lock it. The vnode lock bit is set the 561 * vnode is being eliminated in vgone. The process is awakened 562 * when the transition is completed, and an error returned to 563 * indicate that the vnode is no longer usable (possibly having 564 * been changed to a new file system type). 565 */ 566 int 567 vget(vp, lockflag) 568 register struct vnode *vp; 569 int lockflag; 570 { 571 572 /* 573 * If the vnode is in the process of being cleaned out for 574 * another use, we wait for the cleaning to finish and then 575 * return failure. Cleaning is determined either by checking 576 * that the VXLOCK flag is set, or that the use count is 577 * zero with the back pointer set to show that it has been 578 * removed from the free list by getnewvnode. The VXLOCK 579 * flag may not have been set yet because vclean is blocked in 580 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 581 */ 582 if ((vp->v_flag & VXLOCK) || 583 (vp->v_usecount == 0 && 584 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 585 vp->v_flag |= VXWANT; 586 tsleep((caddr_t)vp, PINOD, "vget", 0); 587 return (1); 588 } 589 if (vp->v_usecount == 0) 590 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 591 vp->v_usecount++; 592 if (lockflag) 593 VOP_LOCK(vp); 594 return (0); 595 } 596 597 /* 598 * Vnode reference, just increment the count 599 */ 600 void 601 vref(vp) 602 struct vnode *vp; 603 { 604 605 if (vp->v_usecount <= 0) 606 panic("vref used where vget required"); 607 vp->v_usecount++; 608 } 609 610 /* 611 * vput(), just unlock and vrele() 612 */ 613 void 614 vput(vp) 615 register struct vnode *vp; 616 { 617 618 VOP_UNLOCK(vp); 619 vrele(vp); 620 } 621 622 /* 623 * Vnode release. 624 * If count drops to zero, call inactive routine and return to freelist. 625 */ 626 void 627 vrele(vp) 628 register struct vnode *vp; 629 { 630 631 #ifdef DIAGNOSTIC 632 if (vp == NULL) 633 panic("vrele: null vp"); 634 #endif 635 vp->v_usecount--; 636 if (vp->v_usecount > 0) 637 return; 638 #ifdef DIAGNOSTIC 639 if (vp->v_usecount != 0 || vp->v_writecount != 0) { 640 vprint("vrele: bad ref count", vp); 641 panic("vrele: ref cnt"); 642 } 643 #endif 644 /* 645 * insert at tail of LRU list 646 */ 647 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 648 VOP_INACTIVE(vp); 649 } 650 651 /* 652 * Page or buffer structure gets a reference. 653 */ 654 void 655 vhold(vp) 656 register struct vnode *vp; 657 { 658 659 vp->v_holdcnt++; 660 } 661 662 /* 663 * Page or buffer structure frees a reference. 664 */ 665 void 666 holdrele(vp) 667 register struct vnode *vp; 668 { 669 670 if (vp->v_holdcnt <= 0) 671 panic("holdrele: holdcnt"); 672 vp->v_holdcnt--; 673 } 674 675 /* 676 * Remove any vnodes in the vnode table belonging to mount point mp. 677 * 678 * If MNT_NOFORCE is specified, there should not be any active ones, 679 * return error if any are found (nb: this is a user error, not a 680 * system error). If MNT_FORCE is specified, detach any active vnodes 681 * that are found. 682 */ 683 #ifdef DIAGNOSTIC 684 int busyprt = 0; /* print out busy vnodes */ 685 struct ctldebug debug1 = { "busyprt", &busyprt }; 686 #endif 687 688 int 689 vflush(mp, skipvp, flags) 690 struct mount *mp; 691 struct vnode *skipvp; 692 int flags; 693 { 694 register struct vnode *vp, *nvp; 695 int busy = 0; 696 697 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 698 panic("vflush: not busy"); 699 loop: 700 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 701 if (vp->v_mount != mp) 702 goto loop; 703 nvp = vp->v_mntvnodes.le_next; 704 /* 705 * Skip over a selected vnode. 706 */ 707 if (vp == skipvp) 708 continue; 709 /* 710 * Skip over a vnodes marked VSYSTEM. 711 */ 712 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 713 continue; 714 /* 715 * If WRITECLOSE is set, only flush out regular file 716 * vnodes open for writing. 717 */ 718 if ((flags & WRITECLOSE) && 719 (vp->v_writecount == 0 || vp->v_type != VREG)) 720 continue; 721 /* 722 * With v_usecount == 0, all we need to do is clear 723 * out the vnode data structures and we are done. 724 */ 725 if (vp->v_usecount == 0) { 726 VOP_REVOKE(vp, 0); 727 continue; 728 } 729 /* 730 * If FORCECLOSE is set, forcibly close the vnode. 731 * For block or character devices, revert to an 732 * anonymous device. For all other files, just kill them. 733 */ 734 if (flags & FORCECLOSE) { 735 if (vp->v_type != VBLK && vp->v_type != VCHR) { 736 VOP_REVOKE(vp, 0); 737 } else { 738 vclean(vp, 0); 739 vp->v_op = spec_vnodeop_p; 740 insmntque(vp, (struct mount *)0); 741 } 742 continue; 743 } 744 #ifdef DIAGNOSTIC 745 if (busyprt) 746 vprint("vflush: busy vnode", vp); 747 #endif 748 busy++; 749 } 750 if (busy) 751 return (EBUSY); 752 return (0); 753 } 754 755 /* 756 * Disassociate the underlying file system from a vnode. 757 */ 758 void 759 vclean(vp, flags) 760 register struct vnode *vp; 761 int flags; 762 { 763 int active; 764 765 /* 766 * Check to see if the vnode is in use. 767 * If so we have to reference it before we clean it out 768 * so that its count cannot fall to zero and generate a 769 * race against ourselves to recycle it. 770 */ 771 if (active = vp->v_usecount) 772 VREF(vp); 773 /* 774 * Even if the count is zero, the VOP_INACTIVE routine may still 775 * have the object locked while it cleans it out. The VOP_LOCK 776 * ensures that the VOP_INACTIVE routine is done with its work. 777 * For active vnodes, it ensures that no other activity can 778 * occur while the underlying object is being cleaned out. 779 */ 780 VOP_LOCK(vp); 781 /* 782 * Prevent the vnode from being recycled or 783 * brought into use while we clean it out. 784 */ 785 if (vp->v_flag & VXLOCK) 786 panic("vclean: deadlock"); 787 vp->v_flag |= VXLOCK; 788 /* 789 * Clean out any buffers associated with the vnode. 790 */ 791 if (flags & DOCLOSE) 792 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 793 /* 794 * Any other processes trying to obtain this lock must first 795 * wait for VXLOCK to clear, then call the new lock operation. 796 */ 797 VOP_UNLOCK(vp); 798 /* 799 * If purging an active vnode, it must be closed and 800 * deactivated before being reclaimed. 801 */ 802 if (active) { 803 if (flags & DOCLOSE) 804 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); 805 VOP_INACTIVE(vp); 806 } 807 /* 808 * Reclaim the vnode. 809 */ 810 if (VOP_RECLAIM(vp)) 811 panic("vclean: cannot reclaim"); 812 if (active) 813 vrele(vp); 814 815 /* 816 * Done with purge, notify sleepers of the grim news. 817 */ 818 vp->v_op = dead_vnodeop_p; 819 vp->v_tag = VT_NON; 820 vp->v_flag &= ~VXLOCK; 821 if (vp->v_flag & VXWANT) { 822 vp->v_flag &= ~VXWANT; 823 wakeup((caddr_t)vp); 824 } 825 } 826 827 /* 828 * Eliminate all activity associated with the requested vnode 829 * and with all vnodes aliased to the requested vnode. 830 */ 831 int 832 vop_revoke(ap) 833 struct vop_revoke_args /* { 834 struct vnode *a_vp; 835 int a_flags; 836 } */ *ap; 837 { 838 register struct vnode *vp, *vq; 839 840 vp = ap->a_vp; 841 if ((ap->a_flags & REVOKEALL) && (vp->v_flag & VALIASED)) { 842 /* 843 * If a vgone (or vclean) is already in progress, 844 * wait until it is done and return. 845 */ 846 if (vp->v_flag & VXLOCK) { 847 vp->v_flag |= VXWANT; 848 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 849 return (0); 850 } 851 /* 852 * Ensure that vp will not be vgone'd while we 853 * are eliminating its aliases. 854 */ 855 vp->v_flag |= VXLOCK; 856 while (vp->v_flag & VALIASED) { 857 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 858 if (vq->v_rdev != vp->v_rdev || 859 vq->v_type != vp->v_type || vp == vq) 860 continue; 861 vgone(vq); 862 break; 863 } 864 } 865 /* 866 * Remove the lock so that vgone below will 867 * really eliminate the vnode after which time 868 * vgone will awaken any sleepers. 869 */ 870 vp->v_flag &= ~VXLOCK; 871 } 872 vgone(vp); 873 return (0); 874 } 875 876 /* 877 * Eliminate all activity associated with a vnode 878 * in preparation for reuse. 879 */ 880 void 881 vgone(vp) 882 register struct vnode *vp; 883 { 884 register struct vnode *vq; 885 struct vnode *vx; 886 887 /* 888 * If a vgone (or vclean) is already in progress, 889 * wait until it is done and return. 890 */ 891 if (vp->v_flag & VXLOCK) { 892 vp->v_flag |= VXWANT; 893 tsleep((caddr_t)vp, PINOD, "vgone", 0); 894 return; 895 } 896 /* 897 * Clean out the filesystem specific data. 898 */ 899 vclean(vp, DOCLOSE); 900 /* 901 * Delete from old mount point vnode list, if on one. 902 */ 903 if (vp->v_mount != NULL) { 904 LIST_REMOVE(vp, v_mntvnodes); 905 vp->v_mount = NULL; 906 } 907 /* 908 * If special device, remove it from special device alias list. 909 */ 910 if (vp->v_type == VBLK || vp->v_type == VCHR) { 911 if (*vp->v_hashchain == vp) { 912 *vp->v_hashchain = vp->v_specnext; 913 } else { 914 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 915 if (vq->v_specnext != vp) 916 continue; 917 vq->v_specnext = vp->v_specnext; 918 break; 919 } 920 if (vq == NULL) 921 panic("missing bdev"); 922 } 923 if (vp->v_flag & VALIASED) { 924 vx = NULL; 925 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 926 if (vq->v_rdev != vp->v_rdev || 927 vq->v_type != vp->v_type) 928 continue; 929 if (vx) 930 break; 931 vx = vq; 932 } 933 if (vx == NULL) 934 panic("missing alias"); 935 if (vq == NULL) 936 vx->v_flag &= ~VALIASED; 937 vp->v_flag &= ~VALIASED; 938 } 939 FREE(vp->v_specinfo, M_VNODE); 940 vp->v_specinfo = NULL; 941 } 942 /* 943 * If it is on the freelist and not already at the head, 944 * move it to the head of the list. The test of the back 945 * pointer and the reference count of zero is because 946 * it will be removed from the free list by getnewvnode, 947 * but will not have its reference count incremented until 948 * after calling vgone. If the reference count were 949 * incremented first, vgone would (incorrectly) try to 950 * close the previous instance of the underlying object. 951 * So, the back pointer is explicitly set to `0xdeadb' in 952 * getnewvnode after removing it from the freelist to ensure 953 * that we do not try to move it here. 954 */ 955 if (vp->v_usecount == 0 && 956 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 957 vnode_free_list.tqh_first != vp) { 958 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 959 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 960 } 961 vp->v_type = VBAD; 962 } 963 964 /* 965 * Lookup a vnode by device number. 966 */ 967 int 968 vfinddev(dev, type, vpp) 969 dev_t dev; 970 enum vtype type; 971 struct vnode **vpp; 972 { 973 register struct vnode *vp; 974 975 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 976 if (dev != vp->v_rdev || type != vp->v_type) 977 continue; 978 *vpp = vp; 979 return (1); 980 } 981 return (0); 982 } 983 984 /* 985 * Calculate the total number of references to a special device. 986 */ 987 int 988 vcount(vp) 989 register struct vnode *vp; 990 { 991 register struct vnode *vq, *vnext; 992 int count; 993 994 loop: 995 if ((vp->v_flag & VALIASED) == 0) 996 return (vp->v_usecount); 997 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 998 vnext = vq->v_specnext; 999 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1000 continue; 1001 /* 1002 * Alias, but not in use, so flush it out. 1003 */ 1004 if (vq->v_usecount == 0 && vq != vp) { 1005 vgone(vq); 1006 goto loop; 1007 } 1008 count += vq->v_usecount; 1009 } 1010 return (count); 1011 } 1012 1013 /* 1014 * Print out a description of a vnode. 1015 */ 1016 static char *typename[] = 1017 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1018 1019 void 1020 vprint(label, vp) 1021 char *label; 1022 register struct vnode *vp; 1023 { 1024 char buf[64]; 1025 1026 if (label != NULL) 1027 printf("%s: ", label); 1028 printf("type %s, usecount %d, writecount %d, refcount %d,", 1029 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1030 vp->v_holdcnt); 1031 buf[0] = '\0'; 1032 if (vp->v_flag & VROOT) 1033 strcat(buf, "|VROOT"); 1034 if (vp->v_flag & VTEXT) 1035 strcat(buf, "|VTEXT"); 1036 if (vp->v_flag & VSYSTEM) 1037 strcat(buf, "|VSYSTEM"); 1038 if (vp->v_flag & VXLOCK) 1039 strcat(buf, "|VXLOCK"); 1040 if (vp->v_flag & VXWANT) 1041 strcat(buf, "|VXWANT"); 1042 if (vp->v_flag & VBWAIT) 1043 strcat(buf, "|VBWAIT"); 1044 if (vp->v_flag & VALIASED) 1045 strcat(buf, "|VALIASED"); 1046 if (buf[0] != '\0') 1047 printf(" flags (%s)", &buf[1]); 1048 if (vp->v_data == NULL) { 1049 printf("\n"); 1050 } else { 1051 printf("\n\t"); 1052 VOP_PRINT(vp); 1053 } 1054 } 1055 1056 #ifdef DEBUG 1057 /* 1058 * List all of the locked vnodes in the system. 1059 * Called when debugging the kernel. 1060 */ 1061 void 1062 printlockedvnodes() 1063 { 1064 register struct mount *mp; 1065 register struct vnode *vp; 1066 1067 printf("Locked vnodes\n"); 1068 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 1069 for (vp = mp->mnt_vnodelist.lh_first; 1070 vp != NULL; 1071 vp = vp->v_mntvnodes.le_next) 1072 if (VOP_ISLOCKED(vp)) 1073 vprint((char *)0, vp); 1074 } 1075 } 1076 #endif 1077 1078 int kinfo_vdebug = 1; 1079 int kinfo_vgetfailed; 1080 #define KINFO_VNODESLOP 10 1081 /* 1082 * Dump vnode list (via sysctl). 1083 * Copyout address of vnode followed by vnode. 1084 */ 1085 /* ARGSUSED */ 1086 int 1087 sysctl_vnode(where, sizep) 1088 char *where; 1089 size_t *sizep; 1090 { 1091 register struct mount *mp, *nmp; 1092 struct vnode *vp; 1093 register char *bp = where, *savebp; 1094 char *ewhere; 1095 int error; 1096 1097 #define VPTRSZ sizeof (struct vnode *) 1098 #define VNODESZ sizeof (struct vnode) 1099 if (where == NULL) { 1100 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1101 return (0); 1102 } 1103 ewhere = where + *sizep; 1104 1105 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) { 1106 nmp = mp->mnt_list.tqe_next; 1107 if (vfs_busy(mp)) 1108 continue; 1109 savebp = bp; 1110 again: 1111 for (vp = mp->mnt_vnodelist.lh_first; 1112 vp != NULL; 1113 vp = vp->v_mntvnodes.le_next) { 1114 /* 1115 * Check that the vp is still associated with 1116 * this filesystem. RACE: could have been 1117 * recycled onto the same filesystem. 1118 */ 1119 if (vp->v_mount != mp) { 1120 if (kinfo_vdebug) 1121 printf("kinfo: vp changed\n"); 1122 bp = savebp; 1123 goto again; 1124 } 1125 if (bp + VPTRSZ + VNODESZ > ewhere) { 1126 *sizep = bp - where; 1127 return (ENOMEM); 1128 } 1129 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1130 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1131 return (error); 1132 bp += VPTRSZ + VNODESZ; 1133 } 1134 vfs_unbusy(mp); 1135 } 1136 1137 *sizep = bp - where; 1138 return (0); 1139 } 1140 1141 /* 1142 * Check to see if a filesystem is mounted on a block device. 1143 */ 1144 int 1145 vfs_mountedon(vp) 1146 register struct vnode *vp; 1147 { 1148 register struct vnode *vq; 1149 1150 if (vp->v_specflags & SI_MOUNTEDON) 1151 return (EBUSY); 1152 if (vp->v_flag & VALIASED) { 1153 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1154 if (vq->v_rdev != vp->v_rdev || 1155 vq->v_type != vp->v_type) 1156 continue; 1157 if (vq->v_specflags & SI_MOUNTEDON) 1158 return (EBUSY); 1159 } 1160 } 1161 return (0); 1162 } 1163 1164 /* 1165 * Build hash lists of net addresses and hang them off the mount point. 1166 * Called by ufs_mount() to set up the lists of export addresses. 1167 */ 1168 static int 1169 vfs_hang_addrlist(mp, nep, argp) 1170 struct mount *mp; 1171 struct netexport *nep; 1172 struct export_args *argp; 1173 { 1174 register struct netcred *np; 1175 register struct radix_node_head *rnh; 1176 register int i; 1177 struct radix_node *rn; 1178 struct sockaddr *saddr, *smask = 0; 1179 struct domain *dom; 1180 int error; 1181 1182 if (argp->ex_addrlen == 0) { 1183 if (mp->mnt_flag & MNT_DEFEXPORTED) 1184 return (EPERM); 1185 np = &nep->ne_defexported; 1186 np->netc_exflags = argp->ex_flags; 1187 np->netc_anon = argp->ex_anon; 1188 np->netc_anon.cr_ref = 1; 1189 mp->mnt_flag |= MNT_DEFEXPORTED; 1190 return (0); 1191 } 1192 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1193 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1194 bzero((caddr_t)np, i); 1195 saddr = (struct sockaddr *)(np + 1); 1196 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 1197 goto out; 1198 if (saddr->sa_len > argp->ex_addrlen) 1199 saddr->sa_len = argp->ex_addrlen; 1200 if (argp->ex_masklen) { 1201 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1202 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1203 if (error) 1204 goto out; 1205 if (smask->sa_len > argp->ex_masklen) 1206 smask->sa_len = argp->ex_masklen; 1207 } 1208 i = saddr->sa_family; 1209 if ((rnh = nep->ne_rtable[i]) == 0) { 1210 /* 1211 * Seems silly to initialize every AF when most are not 1212 * used, do so on demand here 1213 */ 1214 for (dom = domains; dom; dom = dom->dom_next) 1215 if (dom->dom_family == i && dom->dom_rtattach) { 1216 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1217 dom->dom_rtoffset); 1218 break; 1219 } 1220 if ((rnh = nep->ne_rtable[i]) == 0) { 1221 error = ENOBUFS; 1222 goto out; 1223 } 1224 } 1225 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1226 np->netc_rnodes); 1227 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1228 error = EPERM; 1229 goto out; 1230 } 1231 np->netc_exflags = argp->ex_flags; 1232 np->netc_anon = argp->ex_anon; 1233 np->netc_anon.cr_ref = 1; 1234 return (0); 1235 out: 1236 free(np, M_NETADDR); 1237 return (error); 1238 } 1239 1240 /* ARGSUSED */ 1241 static int 1242 vfs_free_netcred(rn, w) 1243 struct radix_node *rn; 1244 caddr_t w; 1245 { 1246 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1247 1248 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1249 free((caddr_t)rn, M_NETADDR); 1250 return (0); 1251 } 1252 1253 /* 1254 * Free the net address hash lists that are hanging off the mount points. 1255 */ 1256 static void 1257 vfs_free_addrlist(nep) 1258 struct netexport *nep; 1259 { 1260 register int i; 1261 register struct radix_node_head *rnh; 1262 1263 for (i = 0; i <= AF_MAX; i++) 1264 if (rnh = nep->ne_rtable[i]) { 1265 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1266 (caddr_t)rnh); 1267 free((caddr_t)rnh, M_RTABLE); 1268 nep->ne_rtable[i] = 0; 1269 } 1270 } 1271 1272 int 1273 vfs_export(mp, nep, argp) 1274 struct mount *mp; 1275 struct netexport *nep; 1276 struct export_args *argp; 1277 { 1278 int error; 1279 1280 if (argp->ex_flags & MNT_DELEXPORT) { 1281 vfs_free_addrlist(nep); 1282 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1283 } 1284 if (argp->ex_flags & MNT_EXPORTED) { 1285 if (error = vfs_hang_addrlist(mp, nep, argp)) 1286 return (error); 1287 mp->mnt_flag |= MNT_EXPORTED; 1288 } 1289 return (0); 1290 } 1291 1292 struct netcred * 1293 vfs_export_lookup(mp, nep, nam) 1294 register struct mount *mp; 1295 struct netexport *nep; 1296 struct mbuf *nam; 1297 { 1298 register struct netcred *np; 1299 register struct radix_node_head *rnh; 1300 struct sockaddr *saddr; 1301 1302 np = NULL; 1303 if (mp->mnt_flag & MNT_EXPORTED) { 1304 /* 1305 * Lookup in the export list first. 1306 */ 1307 if (nam != NULL) { 1308 saddr = mtod(nam, struct sockaddr *); 1309 rnh = nep->ne_rtable[saddr->sa_family]; 1310 if (rnh != NULL) { 1311 np = (struct netcred *) 1312 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1313 rnh); 1314 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1315 np = NULL; 1316 } 1317 } 1318 /* 1319 * If no address match, use the default if it exists. 1320 */ 1321 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1322 np = &nep->ne_defexported; 1323 } 1324 return (np); 1325 } 1326