1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * %sccs.include.redist.c% 11 * 12 * @(#)vfs_subr.c 8.21 (Berkeley) 05/09/95 13 */ 14 15 /* 16 * External virtual filesystem routines 17 */ 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/proc.h> 22 #include <sys/mount.h> 23 #include <sys/time.h> 24 #include <sys/vnode.h> 25 #include <sys/stat.h> 26 #include <sys/namei.h> 27 #include <sys/ucred.h> 28 #include <sys/buf.h> 29 #include <sys/errno.h> 30 #include <sys/malloc.h> 31 #include <sys/domain.h> 32 #include <sys/mbuf.h> 33 34 #include <vm/vm.h> 35 #include <sys/sysctl.h> 36 37 #include <miscfs/specfs/specdev.h> 38 39 enum vtype iftovt_tab[16] = { 40 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 41 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 42 }; 43 int vttoif_tab[9] = { 44 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 45 S_IFSOCK, S_IFIFO, S_IFMT, 46 }; 47 48 /* 49 * Insq/Remq for the vnode usage lists. 50 */ 51 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 52 #define bufremvn(bp) { \ 53 LIST_REMOVE(bp, b_vnbufs); \ 54 (bp)->b_vnbufs.le_next = NOLIST; \ 55 } 56 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 57 struct mntlist mountlist; /* mounted filesystem list */ 58 59 /* 60 * Initialize the vnode management data structures. 61 */ 62 void 63 vntblinit() 64 { 65 66 TAILQ_INIT(&vnode_free_list); 67 CIRCLEQ_INIT(&mountlist); 68 } 69 70 /* 71 * Lock a filesystem. 72 * Used to prevent access to it while mounting and unmounting. 73 */ 74 int 75 vfs_lock(mp) 76 register struct mount *mp; 77 { 78 79 while (mp->mnt_flag & MNT_MLOCK) { 80 mp->mnt_flag |= MNT_MWAIT; 81 tsleep((caddr_t)mp, PVFS, "vfslock", 0); 82 } 83 mp->mnt_flag |= MNT_MLOCK; 84 return (0); 85 } 86 87 /* 88 * Unlock a locked filesystem. 89 * Panic if filesystem is not locked. 90 */ 91 void 92 vfs_unlock(mp) 93 register struct mount *mp; 94 { 95 96 if ((mp->mnt_flag & MNT_MLOCK) == 0) 97 panic("vfs_unlock: not locked"); 98 mp->mnt_flag &= ~MNT_MLOCK; 99 if (mp->mnt_flag & MNT_MWAIT) { 100 mp->mnt_flag &= ~MNT_MWAIT; 101 wakeup((caddr_t)mp); 102 } 103 } 104 105 /* 106 * Mark a mount point as busy. 107 * Used to synchronize access and to delay unmounting. 108 */ 109 int 110 vfs_busy(mp) 111 register struct mount *mp; 112 { 113 114 while (mp->mnt_flag & MNT_MPBUSY) { 115 mp->mnt_flag |= MNT_MPWANT; 116 tsleep((caddr_t)&mp->mnt_flag, PVFS, "vfsbusy", 0); 117 } 118 if (mp->mnt_flag & MNT_UNMOUNT) 119 return (1); 120 mp->mnt_flag |= MNT_MPBUSY; 121 return (0); 122 } 123 124 /* 125 * Free a busy filesystem. 126 * Panic if filesystem is not busy. 127 */ 128 void 129 vfs_unbusy(mp) 130 register struct mount *mp; 131 { 132 133 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 134 panic("vfs_unbusy: not busy"); 135 mp->mnt_flag &= ~MNT_MPBUSY; 136 if (mp->mnt_flag & MNT_MPWANT) { 137 mp->mnt_flag &= ~MNT_MPWANT; 138 wakeup((caddr_t)&mp->mnt_flag); 139 } 140 } 141 142 /* 143 * Lookup a mount point by filesystem identifier. 144 */ 145 struct mount * 146 vfs_getvfs(fsid) 147 fsid_t *fsid; 148 { 149 register struct mount *mp; 150 151 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 152 mp = mp->mnt_list.cqe_next) { 153 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 154 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 155 return (mp); 156 } 157 return ((struct mount *)0); 158 } 159 160 /* 161 * Get a new unique fsid 162 */ 163 void 164 vfs_getnewfsid(mp) 165 struct mount *mp; 166 { 167 static u_short xxxfs_mntid; 168 169 fsid_t tfsid; 170 int mtype; 171 172 mtype = mp->mnt_vfc->vfc_typenum; 173 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 174 mp->mnt_stat.f_fsid.val[1] = mtype; 175 if (xxxfs_mntid == 0) 176 ++xxxfs_mntid; 177 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 178 tfsid.val[1] = mtype; 179 if (mountlist.cqh_first != (void *)&mountlist) { 180 while (vfs_getvfs(&tfsid)) { 181 tfsid.val[0]++; 182 xxxfs_mntid++; 183 } 184 } 185 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 186 } 187 188 /* 189 * Set vnode attributes to VNOVAL 190 */ 191 void 192 vattr_null(vap) 193 register struct vattr *vap; 194 { 195 196 vap->va_type = VNON; 197 vap->va_size = vap->va_bytes = VNOVAL; 198 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 199 vap->va_fsid = vap->va_fileid = 200 vap->va_blocksize = vap->va_rdev = 201 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 202 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 203 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 204 vap->va_flags = vap->va_gen = VNOVAL; 205 vap->va_vaflags = 0; 206 } 207 208 /* 209 * Routines having to do with the management of the vnode table. 210 */ 211 extern int (**dead_vnodeop_p)(); 212 extern void vclean(); 213 long numvnodes; 214 extern struct vattr va_null; 215 216 /* 217 * Return the next vnode from the free list. 218 */ 219 int 220 getnewvnode(tag, mp, vops, vpp) 221 enum vtagtype tag; 222 struct mount *mp; 223 int (**vops)(); 224 struct vnode **vpp; 225 { 226 register struct vnode *vp; 227 int s; 228 229 if ((vnode_free_list.tqh_first == NULL && 230 numvnodes < 2 * desiredvnodes) || 231 numvnodes < desiredvnodes) { 232 vp = (struct vnode *)malloc((u_long)sizeof *vp, 233 M_VNODE, M_WAITOK); 234 bzero((char *)vp, sizeof *vp); 235 numvnodes++; 236 } else { 237 if ((vp = vnode_free_list.tqh_first) == NULL) { 238 tablefull("vnode"); 239 *vpp = 0; 240 return (ENFILE); 241 } 242 if (vp->v_usecount) 243 panic("free vnode isn't"); 244 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 245 /* see comment on why 0xdeadb is set at end of vgone (below) */ 246 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 247 vp->v_lease = NULL; 248 if (vp->v_type != VBAD) 249 VOP_REVOKE(vp, 0); 250 #ifdef DIAGNOSTIC 251 if (vp->v_data) 252 panic("cleaned vnode isn't"); 253 s = splbio(); 254 if (vp->v_numoutput) 255 panic("Clean vnode has pending I/O's"); 256 splx(s); 257 #endif 258 vp->v_flag = 0; 259 vp->v_lastr = 0; 260 vp->v_ralen = 0; 261 vp->v_maxra = 0; 262 vp->v_lastw = 0; 263 vp->v_lasta = 0; 264 vp->v_cstart = 0; 265 vp->v_clen = 0; 266 vp->v_socket = 0; 267 } 268 vp->v_type = VNON; 269 cache_purge(vp); 270 vp->v_tag = tag; 271 vp->v_op = vops; 272 insmntque(vp, mp); 273 *vpp = vp; 274 vp->v_usecount = 1; 275 vp->v_data = 0; 276 return (0); 277 } 278 279 /* 280 * Move a vnode from one mount queue to another. 281 */ 282 void 283 insmntque(vp, mp) 284 register struct vnode *vp; 285 register struct mount *mp; 286 { 287 288 /* 289 * Delete from old mount point vnode list, if on one. 290 */ 291 if (vp->v_mount != NULL) 292 LIST_REMOVE(vp, v_mntvnodes); 293 /* 294 * Insert into list of vnodes for the new mount point, if available. 295 */ 296 if ((vp->v_mount = mp) == NULL) 297 return; 298 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 299 } 300 301 /* 302 * Update outstanding I/O count and do wakeup if requested. 303 */ 304 void 305 vwakeup(bp) 306 register struct buf *bp; 307 { 308 register struct vnode *vp; 309 310 bp->b_flags &= ~B_WRITEINPROG; 311 if (vp = bp->b_vp) { 312 if (--vp->v_numoutput < 0) 313 panic("vwakeup: neg numoutput"); 314 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 315 if (vp->v_numoutput < 0) 316 panic("vwakeup: neg numoutput 2"); 317 vp->v_flag &= ~VBWAIT; 318 wakeup((caddr_t)&vp->v_numoutput); 319 } 320 } 321 } 322 323 /* 324 * Flush out and invalidate all buffers associated with a vnode. 325 * Called with the underlying object locked. 326 */ 327 int 328 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 329 register struct vnode *vp; 330 int flags; 331 struct ucred *cred; 332 struct proc *p; 333 int slpflag, slptimeo; 334 { 335 register struct buf *bp; 336 struct buf *nbp, *blist; 337 int s, error; 338 339 if (flags & V_SAVE) { 340 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 341 return (error); 342 if (vp->v_dirtyblkhd.lh_first != NULL) 343 panic("vinvalbuf: dirty bufs"); 344 } 345 for (;;) { 346 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 347 while (blist && blist->b_lblkno < 0) 348 blist = blist->b_vnbufs.le_next; 349 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 350 (flags & V_SAVEMETA)) 351 while (blist && blist->b_lblkno < 0) 352 blist = blist->b_vnbufs.le_next; 353 if (!blist) 354 break; 355 356 for (bp = blist; bp; bp = nbp) { 357 nbp = bp->b_vnbufs.le_next; 358 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 359 continue; 360 s = splbio(); 361 if (bp->b_flags & B_BUSY) { 362 bp->b_flags |= B_WANTED; 363 error = tsleep((caddr_t)bp, 364 slpflag | (PRIBIO + 1), "vinvalbuf", 365 slptimeo); 366 splx(s); 367 if (error) 368 return (error); 369 break; 370 } 371 bremfree(bp); 372 bp->b_flags |= B_BUSY; 373 splx(s); 374 /* 375 * XXX Since there are no node locks for NFS, I believe 376 * there is a slight chance that a delayed write will 377 * occur while sleeping just above, so check for it. 378 */ 379 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 380 (void) VOP_BWRITE(bp); 381 break; 382 } 383 bp->b_flags |= B_INVAL; 384 brelse(bp); 385 } 386 } 387 if (!(flags & V_SAVEMETA) && 388 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 389 panic("vinvalbuf: flush failed"); 390 return (0); 391 } 392 393 /* 394 * Associate a buffer with a vnode. 395 */ 396 void 397 bgetvp(vp, bp) 398 register struct vnode *vp; 399 register struct buf *bp; 400 { 401 402 if (bp->b_vp) 403 panic("bgetvp: not free"); 404 VHOLD(vp); 405 bp->b_vp = vp; 406 if (vp->v_type == VBLK || vp->v_type == VCHR) 407 bp->b_dev = vp->v_rdev; 408 else 409 bp->b_dev = NODEV; 410 /* 411 * Insert onto list for new vnode. 412 */ 413 bufinsvn(bp, &vp->v_cleanblkhd); 414 } 415 416 /* 417 * Disassociate a buffer from a vnode. 418 */ 419 void 420 brelvp(bp) 421 register struct buf *bp; 422 { 423 struct vnode *vp; 424 425 if (bp->b_vp == (struct vnode *) 0) 426 panic("brelvp: NULL"); 427 /* 428 * Delete from old vnode list, if on one. 429 */ 430 if (bp->b_vnbufs.le_next != NOLIST) 431 bufremvn(bp); 432 vp = bp->b_vp; 433 bp->b_vp = (struct vnode *) 0; 434 HOLDRELE(vp); 435 } 436 437 /* 438 * Reassign a buffer from one vnode to another. 439 * Used to assign file specific control information 440 * (indirect blocks) to the vnode to which they belong. 441 */ 442 void 443 reassignbuf(bp, newvp) 444 register struct buf *bp; 445 register struct vnode *newvp; 446 { 447 register struct buflists *listheadp; 448 449 if (newvp == NULL) { 450 printf("reassignbuf: NULL"); 451 return; 452 } 453 /* 454 * Delete from old vnode list, if on one. 455 */ 456 if (bp->b_vnbufs.le_next != NOLIST) 457 bufremvn(bp); 458 /* 459 * If dirty, put on list of dirty buffers; 460 * otherwise insert onto list of clean buffers. 461 */ 462 if (bp->b_flags & B_DELWRI) 463 listheadp = &newvp->v_dirtyblkhd; 464 else 465 listheadp = &newvp->v_cleanblkhd; 466 bufinsvn(bp, listheadp); 467 } 468 469 /* 470 * Create a vnode for a block device. 471 * Used for root filesystem, argdev, and swap areas. 472 * Also used for memory file system special devices. 473 */ 474 int 475 bdevvp(dev, vpp) 476 dev_t dev; 477 struct vnode **vpp; 478 { 479 register struct vnode *vp; 480 struct vnode *nvp; 481 int error; 482 483 if (dev == NODEV) 484 return (0); 485 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 486 if (error) { 487 *vpp = NULLVP; 488 return (error); 489 } 490 vp = nvp; 491 vp->v_type = VBLK; 492 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 493 vput(vp); 494 vp = nvp; 495 } 496 *vpp = vp; 497 return (0); 498 } 499 500 /* 501 * Check to see if the new vnode represents a special device 502 * for which we already have a vnode (either because of 503 * bdevvp() or because of a different vnode representing 504 * the same block device). If such an alias exists, deallocate 505 * the existing contents and return the aliased vnode. The 506 * caller is responsible for filling it with its new contents. 507 */ 508 struct vnode * 509 checkalias(nvp, nvp_rdev, mp) 510 register struct vnode *nvp; 511 dev_t nvp_rdev; 512 struct mount *mp; 513 { 514 register struct vnode *vp; 515 struct vnode **vpp; 516 517 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 518 return (NULLVP); 519 520 vpp = &speclisth[SPECHASH(nvp_rdev)]; 521 loop: 522 for (vp = *vpp; vp; vp = vp->v_specnext) { 523 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 524 continue; 525 /* 526 * Alias, but not in use, so flush it out. 527 */ 528 if (vp->v_usecount == 0) { 529 vgone(vp); 530 goto loop; 531 } 532 if (vget(vp, 1)) 533 goto loop; 534 break; 535 } 536 if (vp == NULL || vp->v_tag != VT_NON) { 537 MALLOC(nvp->v_specinfo, struct specinfo *, 538 sizeof(struct specinfo), M_VNODE, M_WAITOK); 539 nvp->v_rdev = nvp_rdev; 540 nvp->v_hashchain = vpp; 541 nvp->v_specnext = *vpp; 542 nvp->v_specflags = 0; 543 *vpp = nvp; 544 if (vp != NULL) { 545 nvp->v_flag |= VALIASED; 546 vp->v_flag |= VALIASED; 547 vput(vp); 548 } 549 return (NULLVP); 550 } 551 VOP_UNLOCK(vp); 552 vclean(vp, 0); 553 vp->v_op = nvp->v_op; 554 vp->v_tag = nvp->v_tag; 555 nvp->v_type = VNON; 556 insmntque(vp, mp); 557 return (vp); 558 } 559 560 /* 561 * Grab a particular vnode from the free list, increment its 562 * reference count and lock it. The vnode lock bit is set the 563 * vnode is being eliminated in vgone. The process is awakened 564 * when the transition is completed, and an error returned to 565 * indicate that the vnode is no longer usable (possibly having 566 * been changed to a new file system type). 567 */ 568 int 569 vget(vp, lockflag) 570 register struct vnode *vp; 571 int lockflag; 572 { 573 574 /* 575 * If the vnode is in the process of being cleaned out for 576 * another use, we wait for the cleaning to finish and then 577 * return failure. Cleaning is determined either by checking 578 * that the VXLOCK flag is set, or that the use count is 579 * zero with the back pointer set to show that it has been 580 * removed from the free list by getnewvnode. The VXLOCK 581 * flag may not have been set yet because vclean is blocked in 582 * the VOP_LOCK call waiting for the VOP_INACTIVE to complete. 583 */ 584 if ((vp->v_flag & VXLOCK) || 585 (vp->v_usecount == 0 && 586 vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) { 587 vp->v_flag |= VXWANT; 588 tsleep((caddr_t)vp, PINOD, "vget", 0); 589 return (1); 590 } 591 if (vp->v_usecount == 0) 592 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 593 vp->v_usecount++; 594 if (lockflag) 595 VOP_LOCK(vp); 596 return (0); 597 } 598 599 /* 600 * Vnode reference, just increment the count 601 */ 602 void 603 vref(vp) 604 struct vnode *vp; 605 { 606 607 if (vp->v_usecount <= 0) 608 panic("vref used where vget required"); 609 vp->v_usecount++; 610 } 611 612 /* 613 * vput(), just unlock and vrele() 614 */ 615 void 616 vput(vp) 617 register struct vnode *vp; 618 { 619 620 VOP_UNLOCK(vp); 621 vrele(vp); 622 } 623 624 /* 625 * Vnode release. 626 * If count drops to zero, call inactive routine and return to freelist. 627 */ 628 void 629 vrele(vp) 630 register struct vnode *vp; 631 { 632 633 #ifdef DIAGNOSTIC 634 if (vp == NULL) 635 panic("vrele: null vp"); 636 #endif 637 vp->v_usecount--; 638 if (vp->v_usecount > 0) 639 return; 640 #ifdef DIAGNOSTIC 641 if (vp->v_usecount != 0 || vp->v_writecount != 0) { 642 vprint("vrele: bad ref count", vp); 643 panic("vrele: ref cnt"); 644 } 645 #endif 646 /* 647 * insert at tail of LRU list 648 */ 649 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 650 VOP_INACTIVE(vp); 651 } 652 653 /* 654 * Page or buffer structure gets a reference. 655 */ 656 void 657 vhold(vp) 658 register struct vnode *vp; 659 { 660 661 vp->v_holdcnt++; 662 } 663 664 /* 665 * Page or buffer structure frees a reference. 666 */ 667 void 668 holdrele(vp) 669 register struct vnode *vp; 670 { 671 672 if (vp->v_holdcnt <= 0) 673 panic("holdrele: holdcnt"); 674 vp->v_holdcnt--; 675 } 676 677 /* 678 * Remove any vnodes in the vnode table belonging to mount point mp. 679 * 680 * If MNT_NOFORCE is specified, there should not be any active ones, 681 * return error if any are found (nb: this is a user error, not a 682 * system error). If MNT_FORCE is specified, detach any active vnodes 683 * that are found. 684 */ 685 #ifdef DIAGNOSTIC 686 int busyprt = 0; /* print out busy vnodes */ 687 struct ctldebug debug1 = { "busyprt", &busyprt }; 688 #endif 689 690 int 691 vflush(mp, skipvp, flags) 692 struct mount *mp; 693 struct vnode *skipvp; 694 int flags; 695 { 696 register struct vnode *vp, *nvp; 697 int busy = 0; 698 699 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 700 panic("vflush: not busy"); 701 loop: 702 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 703 if (vp->v_mount != mp) 704 goto loop; 705 nvp = vp->v_mntvnodes.le_next; 706 /* 707 * Skip over a selected vnode. 708 */ 709 if (vp == skipvp) 710 continue; 711 /* 712 * Skip over a vnodes marked VSYSTEM. 713 */ 714 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 715 continue; 716 /* 717 * If WRITECLOSE is set, only flush out regular file 718 * vnodes open for writing. 719 */ 720 if ((flags & WRITECLOSE) && 721 (vp->v_writecount == 0 || vp->v_type != VREG)) 722 continue; 723 /* 724 * With v_usecount == 0, all we need to do is clear 725 * out the vnode data structures and we are done. 726 */ 727 if (vp->v_usecount == 0) { 728 VOP_REVOKE(vp, 0); 729 continue; 730 } 731 /* 732 * If FORCECLOSE is set, forcibly close the vnode. 733 * For block or character devices, revert to an 734 * anonymous device. For all other files, just kill them. 735 */ 736 if (flags & FORCECLOSE) { 737 if (vp->v_type != VBLK && vp->v_type != VCHR) { 738 VOP_REVOKE(vp, 0); 739 } else { 740 vclean(vp, 0); 741 vp->v_op = spec_vnodeop_p; 742 insmntque(vp, (struct mount *)0); 743 } 744 continue; 745 } 746 #ifdef DIAGNOSTIC 747 if (busyprt) 748 vprint("vflush: busy vnode", vp); 749 #endif 750 busy++; 751 } 752 if (busy) 753 return (EBUSY); 754 return (0); 755 } 756 757 /* 758 * Disassociate the underlying file system from a vnode. 759 */ 760 void 761 vclean(vp, flags) 762 register struct vnode *vp; 763 int flags; 764 { 765 int active; 766 767 /* 768 * Check to see if the vnode is in use. 769 * If so we have to reference it before we clean it out 770 * so that its count cannot fall to zero and generate a 771 * race against ourselves to recycle it. 772 */ 773 if (active = vp->v_usecount) 774 VREF(vp); 775 /* 776 * Even if the count is zero, the VOP_INACTIVE routine may still 777 * have the object locked while it cleans it out. The VOP_LOCK 778 * ensures that the VOP_INACTIVE routine is done with its work. 779 * For active vnodes, it ensures that no other activity can 780 * occur while the underlying object is being cleaned out. 781 */ 782 VOP_LOCK(vp); 783 /* 784 * Prevent the vnode from being recycled or 785 * brought into use while we clean it out. 786 */ 787 if (vp->v_flag & VXLOCK) 788 panic("vclean: deadlock"); 789 vp->v_flag |= VXLOCK; 790 /* 791 * Clean out any buffers associated with the vnode. 792 */ 793 if (flags & DOCLOSE) 794 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 795 /* 796 * Any other processes trying to obtain this lock must first 797 * wait for VXLOCK to clear, then call the new lock operation. 798 */ 799 VOP_UNLOCK(vp); 800 /* 801 * If purging an active vnode, it must be closed and 802 * deactivated before being reclaimed. 803 */ 804 if (active) { 805 if (flags & DOCLOSE) 806 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); 807 VOP_INACTIVE(vp); 808 } 809 /* 810 * Reclaim the vnode. 811 */ 812 if (VOP_RECLAIM(vp)) 813 panic("vclean: cannot reclaim"); 814 if (active) 815 vrele(vp); 816 cache_purge(vp); 817 818 /* 819 * Done with purge, notify sleepers of the grim news. 820 */ 821 vp->v_op = dead_vnodeop_p; 822 vp->v_tag = VT_NON; 823 vp->v_flag &= ~VXLOCK; 824 if (vp->v_flag & VXWANT) { 825 vp->v_flag &= ~VXWANT; 826 wakeup((caddr_t)vp); 827 } 828 } 829 830 /* 831 * Eliminate all activity associated with the requested vnode 832 * and with all vnodes aliased to the requested vnode. 833 */ 834 int 835 vop_revoke(ap) 836 struct vop_revoke_args /* { 837 struct vnode *a_vp; 838 int a_flags; 839 } */ *ap; 840 { 841 register struct vnode *vp, *vq; 842 843 vp = ap->a_vp; 844 if ((ap->a_flags & REVOKEALL) && (vp->v_flag & VALIASED)) { 845 /* 846 * If a vgone (or vclean) is already in progress, 847 * wait until it is done and return. 848 */ 849 if (vp->v_flag & VXLOCK) { 850 vp->v_flag |= VXWANT; 851 tsleep((caddr_t)vp, PINOD, "vop_revokeall", 0); 852 return (0); 853 } 854 /* 855 * Ensure that vp will not be vgone'd while we 856 * are eliminating its aliases. 857 */ 858 vp->v_flag |= VXLOCK; 859 while (vp->v_flag & VALIASED) { 860 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 861 if (vq->v_rdev != vp->v_rdev || 862 vq->v_type != vp->v_type || vp == vq) 863 continue; 864 vgone(vq); 865 break; 866 } 867 } 868 /* 869 * Remove the lock so that vgone below will 870 * really eliminate the vnode after which time 871 * vgone will awaken any sleepers. 872 */ 873 vp->v_flag &= ~VXLOCK; 874 } 875 vgone(vp); 876 return (0); 877 } 878 879 /* 880 * Eliminate all activity associated with a vnode 881 * in preparation for reuse. 882 */ 883 void 884 vgone(vp) 885 register struct vnode *vp; 886 { 887 register struct vnode *vq; 888 struct vnode *vx; 889 890 /* 891 * If a vgone (or vclean) is already in progress, 892 * wait until it is done and return. 893 */ 894 if (vp->v_flag & VXLOCK) { 895 vp->v_flag |= VXWANT; 896 tsleep((caddr_t)vp, PINOD, "vgone", 0); 897 return; 898 } 899 /* 900 * Clean out the filesystem specific data. 901 */ 902 vclean(vp, DOCLOSE); 903 /* 904 * Delete from old mount point vnode list, if on one. 905 */ 906 if (vp->v_mount != NULL) { 907 LIST_REMOVE(vp, v_mntvnodes); 908 vp->v_mount = NULL; 909 } 910 /* 911 * If special device, remove it from special device alias list 912 * if it is on one. 913 */ 914 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 915 if (*vp->v_hashchain == vp) { 916 *vp->v_hashchain = vp->v_specnext; 917 } else { 918 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 919 if (vq->v_specnext != vp) 920 continue; 921 vq->v_specnext = vp->v_specnext; 922 break; 923 } 924 if (vq == NULL) 925 panic("missing bdev"); 926 } 927 if (vp->v_flag & VALIASED) { 928 vx = NULL; 929 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 930 if (vq->v_rdev != vp->v_rdev || 931 vq->v_type != vp->v_type) 932 continue; 933 if (vx) 934 break; 935 vx = vq; 936 } 937 if (vx == NULL) 938 panic("missing alias"); 939 if (vq == NULL) 940 vx->v_flag &= ~VALIASED; 941 vp->v_flag &= ~VALIASED; 942 } 943 FREE(vp->v_specinfo, M_VNODE); 944 vp->v_specinfo = NULL; 945 } 946 /* 947 * If it is on the freelist and not already at the head, 948 * move it to the head of the list. The test of the back 949 * pointer and the reference count of zero is because 950 * it will be removed from the free list by getnewvnode, 951 * but will not have its reference count incremented until 952 * after calling vgone. If the reference count were 953 * incremented first, vgone would (incorrectly) try to 954 * close the previous instance of the underlying object. 955 * So, the back pointer is explicitly set to `0xdeadb' in 956 * getnewvnode after removing it from the freelist to ensure 957 * that we do not try to move it here. 958 */ 959 if (vp->v_usecount == 0 && 960 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 961 vnode_free_list.tqh_first != vp) { 962 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 963 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 964 } 965 vp->v_type = VBAD; 966 } 967 968 /* 969 * Lookup a vnode by device number. 970 */ 971 int 972 vfinddev(dev, type, vpp) 973 dev_t dev; 974 enum vtype type; 975 struct vnode **vpp; 976 { 977 register struct vnode *vp; 978 979 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 980 if (dev != vp->v_rdev || type != vp->v_type) 981 continue; 982 *vpp = vp; 983 return (1); 984 } 985 return (0); 986 } 987 988 /* 989 * Calculate the total number of references to a special device. 990 */ 991 int 992 vcount(vp) 993 register struct vnode *vp; 994 { 995 register struct vnode *vq, *vnext; 996 int count; 997 998 loop: 999 if ((vp->v_flag & VALIASED) == 0) 1000 return (vp->v_usecount); 1001 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1002 vnext = vq->v_specnext; 1003 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1004 continue; 1005 /* 1006 * Alias, but not in use, so flush it out. 1007 */ 1008 if (vq->v_usecount == 0 && vq != vp) { 1009 vgone(vq); 1010 goto loop; 1011 } 1012 count += vq->v_usecount; 1013 } 1014 return (count); 1015 } 1016 1017 /* 1018 * Print out a description of a vnode. 1019 */ 1020 static char *typename[] = 1021 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1022 1023 void 1024 vprint(label, vp) 1025 char *label; 1026 register struct vnode *vp; 1027 { 1028 char buf[64]; 1029 1030 if (label != NULL) 1031 printf("%s: ", label); 1032 printf("type %s, usecount %d, writecount %d, refcount %d,", 1033 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1034 vp->v_holdcnt); 1035 buf[0] = '\0'; 1036 if (vp->v_flag & VROOT) 1037 strcat(buf, "|VROOT"); 1038 if (vp->v_flag & VTEXT) 1039 strcat(buf, "|VTEXT"); 1040 if (vp->v_flag & VSYSTEM) 1041 strcat(buf, "|VSYSTEM"); 1042 if (vp->v_flag & VXLOCK) 1043 strcat(buf, "|VXLOCK"); 1044 if (vp->v_flag & VXWANT) 1045 strcat(buf, "|VXWANT"); 1046 if (vp->v_flag & VBWAIT) 1047 strcat(buf, "|VBWAIT"); 1048 if (vp->v_flag & VALIASED) 1049 strcat(buf, "|VALIASED"); 1050 if (buf[0] != '\0') 1051 printf(" flags (%s)", &buf[1]); 1052 if (vp->v_data == NULL) { 1053 printf("\n"); 1054 } else { 1055 printf("\n\t"); 1056 VOP_PRINT(vp); 1057 } 1058 } 1059 1060 #ifdef DEBUG 1061 /* 1062 * List all of the locked vnodes in the system. 1063 * Called when debugging the kernel. 1064 */ 1065 void 1066 printlockedvnodes() 1067 { 1068 register struct mount *mp; 1069 register struct vnode *vp; 1070 1071 printf("Locked vnodes\n"); 1072 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 1073 mp = mp->mnt_list.cqe_next) { 1074 for (vp = mp->mnt_vnodelist.lh_first; 1075 vp != NULL; 1076 vp = vp->v_mntvnodes.le_next) { 1077 if (VOP_ISLOCKED(vp)) 1078 vprint((char *)0, vp); 1079 } 1080 } 1081 } 1082 #endif 1083 1084 /* 1085 * Top level filesystem related information gathering. 1086 */ 1087 int 1088 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1089 int *name; 1090 u_int namelen; 1091 void *oldp; 1092 size_t *oldlenp; 1093 void *newp; 1094 size_t newlen; 1095 struct proc *p; 1096 { 1097 struct ctldebug *cdp; 1098 struct vfsconf *vfsp; 1099 1100 /* all sysctl names at this level are at least name and field */ 1101 if (namelen < 2) 1102 return (ENOTDIR); /* overloaded */ 1103 if (name[0] != VFS_GENERIC) { 1104 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1105 if (vfsp->vfc_typenum == name[0]) 1106 break; 1107 if (vfsp == NULL) 1108 return (EOPNOTSUPP); 1109 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1110 oldp, oldlenp, newp, newlen, p)); 1111 } 1112 switch (name[1]) { 1113 case VFS_MAXTYPENUM: 1114 return (sysctl_rdint(oldp, oldlenp, newp, maxvfsconf)); 1115 case VFS_CONF: 1116 if (namelen < 3) 1117 return (ENOTDIR); /* overloaded */ 1118 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1119 if (vfsp->vfc_typenum == name[2]) 1120 break; 1121 if (vfsp == NULL) 1122 return (EOPNOTSUPP); 1123 return (sysctl_rdstruct(oldp, oldlenp, newp, vfsp, 1124 sizeof(struct vfsconf))); 1125 } 1126 return (EOPNOTSUPP); 1127 } 1128 1129 int kinfo_vdebug = 1; 1130 int kinfo_vgetfailed; 1131 #define KINFO_VNODESLOP 10 1132 /* 1133 * Dump vnode list (via sysctl). 1134 * Copyout address of vnode followed by vnode. 1135 */ 1136 /* ARGSUSED */ 1137 int 1138 sysctl_vnode(where, sizep) 1139 char *where; 1140 size_t *sizep; 1141 { 1142 register struct mount *mp, *nmp; 1143 struct vnode *vp; 1144 register char *bp = where, *savebp; 1145 char *ewhere; 1146 int error; 1147 1148 #define VPTRSZ sizeof (struct vnode *) 1149 #define VNODESZ sizeof (struct vnode) 1150 if (where == NULL) { 1151 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1152 return (0); 1153 } 1154 ewhere = where + *sizep; 1155 1156 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1157 nmp = mp->mnt_list.cqe_next; 1158 if (vfs_busy(mp)) 1159 continue; 1160 savebp = bp; 1161 again: 1162 for (vp = mp->mnt_vnodelist.lh_first; 1163 vp != NULL; 1164 vp = vp->v_mntvnodes.le_next) { 1165 /* 1166 * Check that the vp is still associated with 1167 * this filesystem. RACE: could have been 1168 * recycled onto the same filesystem. 1169 */ 1170 if (vp->v_mount != mp) { 1171 if (kinfo_vdebug) 1172 printf("kinfo: vp changed\n"); 1173 bp = savebp; 1174 goto again; 1175 } 1176 if (bp + VPTRSZ + VNODESZ > ewhere) { 1177 *sizep = bp - where; 1178 return (ENOMEM); 1179 } 1180 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1181 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1182 return (error); 1183 bp += VPTRSZ + VNODESZ; 1184 } 1185 vfs_unbusy(mp); 1186 } 1187 1188 *sizep = bp - where; 1189 return (0); 1190 } 1191 1192 /* 1193 * Check to see if a filesystem is mounted on a block device. 1194 */ 1195 int 1196 vfs_mountedon(vp) 1197 register struct vnode *vp; 1198 { 1199 register struct vnode *vq; 1200 1201 if (vp->v_specflags & SI_MOUNTEDON) 1202 return (EBUSY); 1203 if (vp->v_flag & VALIASED) { 1204 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1205 if (vq->v_rdev != vp->v_rdev || 1206 vq->v_type != vp->v_type) 1207 continue; 1208 if (vq->v_specflags & SI_MOUNTEDON) 1209 return (EBUSY); 1210 } 1211 } 1212 return (0); 1213 } 1214 1215 /* 1216 * Unmount all filesystems. The list is traversed in reverse order 1217 * of mounting to avoid dependencies. 1218 */ 1219 void 1220 vfs_unmountall() 1221 { 1222 struct mount *mp, *nmp; 1223 1224 for (mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 1225 nmp = mp->mnt_list.cqe_prev; 1226 (void) dounmount(mp, MNT_FORCE, &proc0); 1227 } 1228 } 1229 1230 /* 1231 * Build hash lists of net addresses and hang them off the mount point. 1232 * Called by ufs_mount() to set up the lists of export addresses. 1233 */ 1234 static int 1235 vfs_hang_addrlist(mp, nep, argp) 1236 struct mount *mp; 1237 struct netexport *nep; 1238 struct export_args *argp; 1239 { 1240 register struct netcred *np; 1241 register struct radix_node_head *rnh; 1242 register int i; 1243 struct radix_node *rn; 1244 struct sockaddr *saddr, *smask = 0; 1245 struct domain *dom; 1246 int error; 1247 1248 if (argp->ex_addrlen == 0) { 1249 if (mp->mnt_flag & MNT_DEFEXPORTED) 1250 return (EPERM); 1251 np = &nep->ne_defexported; 1252 np->netc_exflags = argp->ex_flags; 1253 np->netc_anon = argp->ex_anon; 1254 np->netc_anon.cr_ref = 1; 1255 mp->mnt_flag |= MNT_DEFEXPORTED; 1256 return (0); 1257 } 1258 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1259 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1260 bzero((caddr_t)np, i); 1261 saddr = (struct sockaddr *)(np + 1); 1262 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 1263 goto out; 1264 if (saddr->sa_len > argp->ex_addrlen) 1265 saddr->sa_len = argp->ex_addrlen; 1266 if (argp->ex_masklen) { 1267 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1268 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1269 if (error) 1270 goto out; 1271 if (smask->sa_len > argp->ex_masklen) 1272 smask->sa_len = argp->ex_masklen; 1273 } 1274 i = saddr->sa_family; 1275 if ((rnh = nep->ne_rtable[i]) == 0) { 1276 /* 1277 * Seems silly to initialize every AF when most are not 1278 * used, do so on demand here 1279 */ 1280 for (dom = domains; dom; dom = dom->dom_next) 1281 if (dom->dom_family == i && dom->dom_rtattach) { 1282 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1283 dom->dom_rtoffset); 1284 break; 1285 } 1286 if ((rnh = nep->ne_rtable[i]) == 0) { 1287 error = ENOBUFS; 1288 goto out; 1289 } 1290 } 1291 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1292 np->netc_rnodes); 1293 if (rn == 0) { 1294 /* 1295 * One of the reasons that rnh_addaddr may fail is that 1296 * the entry already exists. To check for this case, we 1297 * look up the entry to see if it is there. If so, we 1298 * do not need to make a new entry but do return success. 1299 */ 1300 free(np, M_NETADDR); 1301 rn = (*rnh->rnh_matchaddr)((caddr_t)saddr, rnh); 1302 if (rn != 0 && (rn->rn_flags & RNF_ROOT) == 0 && 1303 ((struct netcred *)rn)->netc_exflags == argp->ex_flags && 1304 !bcmp((caddr_t)&((struct netcred *)rn)->netc_anon, 1305 (caddr_t)&argp->ex_anon, sizeof(struct ucred))) 1306 return (0); 1307 return (EPERM); 1308 } 1309 np->netc_exflags = argp->ex_flags; 1310 np->netc_anon = argp->ex_anon; 1311 np->netc_anon.cr_ref = 1; 1312 return (0); 1313 out: 1314 free(np, M_NETADDR); 1315 return (error); 1316 } 1317 1318 /* ARGSUSED */ 1319 static int 1320 vfs_free_netcred(rn, w) 1321 struct radix_node *rn; 1322 caddr_t w; 1323 { 1324 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1325 1326 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1327 free((caddr_t)rn, M_NETADDR); 1328 return (0); 1329 } 1330 1331 /* 1332 * Free the net address hash lists that are hanging off the mount points. 1333 */ 1334 static void 1335 vfs_free_addrlist(nep) 1336 struct netexport *nep; 1337 { 1338 register int i; 1339 register struct radix_node_head *rnh; 1340 1341 for (i = 0; i <= AF_MAX; i++) 1342 if (rnh = nep->ne_rtable[i]) { 1343 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1344 (caddr_t)rnh); 1345 free((caddr_t)rnh, M_RTABLE); 1346 nep->ne_rtable[i] = 0; 1347 } 1348 } 1349 1350 int 1351 vfs_export(mp, nep, argp) 1352 struct mount *mp; 1353 struct netexport *nep; 1354 struct export_args *argp; 1355 { 1356 int error; 1357 1358 if (argp->ex_flags & MNT_DELEXPORT) { 1359 vfs_free_addrlist(nep); 1360 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1361 } 1362 if (argp->ex_flags & MNT_EXPORTED) { 1363 if (error = vfs_hang_addrlist(mp, nep, argp)) 1364 return (error); 1365 mp->mnt_flag |= MNT_EXPORTED; 1366 } 1367 return (0); 1368 } 1369 1370 struct netcred * 1371 vfs_export_lookup(mp, nep, nam) 1372 register struct mount *mp; 1373 struct netexport *nep; 1374 struct mbuf *nam; 1375 { 1376 register struct netcred *np; 1377 register struct radix_node_head *rnh; 1378 struct sockaddr *saddr; 1379 1380 np = NULL; 1381 if (mp->mnt_flag & MNT_EXPORTED) { 1382 /* 1383 * Lookup in the export list first. 1384 */ 1385 if (nam != NULL) { 1386 saddr = mtod(nam, struct sockaddr *); 1387 rnh = nep->ne_rtable[saddr->sa_family]; 1388 if (rnh != NULL) { 1389 np = (struct netcred *) 1390 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1391 rnh); 1392 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1393 np = NULL; 1394 } 1395 } 1396 /* 1397 * If no address match, use the default if it exists. 1398 */ 1399 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1400 np = &nep->ne_defexported; 1401 } 1402 return (np); 1403 } 1404