1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)vfs_subr.c 8.8 (Berkeley) 01/14/94 8 */ 9 10 /* 11 * External virtual filesystem routines 12 */ 13 14 #include <sys/param.h> 15 #include <sys/systm.h> 16 #include <sys/proc.h> 17 #include <sys/mount.h> 18 #include <sys/time.h> 19 #include <sys/vnode.h> 20 #include <sys/stat.h> 21 #include <sys/namei.h> 22 #include <sys/ucred.h> 23 #include <sys/buf.h> 24 #include <sys/errno.h> 25 #include <sys/malloc.h> 26 #include <sys/domain.h> 27 #include <sys/mbuf.h> 28 29 #include <vm/vm.h> 30 #include <sys/sysctl.h> 31 32 #include <miscfs/specfs/specdev.h> 33 34 enum vtype iftovt_tab[16] = { 35 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 36 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 37 }; 38 int vttoif_tab[9] = { 39 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 40 S_IFSOCK, S_IFIFO, S_IFMT, 41 }; 42 43 /* 44 * Insq/Remq for the vnode usage lists. 45 */ 46 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 47 #define bufremvn(bp) { \ 48 LIST_REMOVE(bp, b_vnbufs); \ 49 (bp)->b_vnbufs.le_next = NOLIST; \ 50 } 51 52 TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */ 53 struct mntlist mountlist; /* mounted filesystem list */ 54 55 /* 56 * Initialize the vnode management data structures. 57 */ 58 vntblinit() 59 { 60 61 TAILQ_INIT(&vnode_free_list); 62 TAILQ_INIT(&mountlist); 63 } 64 65 /* 66 * Lock a filesystem. 67 * Used to prevent access to it while mounting and unmounting. 68 */ 69 vfs_lock(mp) 70 register struct mount *mp; 71 { 72 73 while(mp->mnt_flag & MNT_MLOCK) { 74 mp->mnt_flag |= MNT_MWAIT; 75 sleep((caddr_t)mp, PVFS); 76 } 77 mp->mnt_flag |= MNT_MLOCK; 78 return (0); 79 } 80 81 /* 82 * Unlock a locked filesystem. 83 * Panic if filesystem is not locked. 84 */ 85 void 86 vfs_unlock(mp) 87 register struct mount *mp; 88 { 89 90 if ((mp->mnt_flag & MNT_MLOCK) == 0) 91 panic("vfs_unlock: not locked"); 92 mp->mnt_flag &= ~MNT_MLOCK; 93 if (mp->mnt_flag & MNT_MWAIT) { 94 mp->mnt_flag &= ~MNT_MWAIT; 95 wakeup((caddr_t)mp); 96 } 97 } 98 99 /* 100 * Mark a mount point as busy. 101 * Used to synchronize access and to delay unmounting. 102 */ 103 vfs_busy(mp) 104 register struct mount *mp; 105 { 106 107 while(mp->mnt_flag & MNT_MPBUSY) { 108 mp->mnt_flag |= MNT_MPWANT; 109 sleep((caddr_t)&mp->mnt_flag, PVFS); 110 } 111 if (mp->mnt_flag & MNT_UNMOUNT) 112 return (1); 113 mp->mnt_flag |= MNT_MPBUSY; 114 return (0); 115 } 116 117 /* 118 * Free a busy filesystem. 119 * Panic if filesystem is not busy. 120 */ 121 vfs_unbusy(mp) 122 register struct mount *mp; 123 { 124 125 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 126 panic("vfs_unbusy: not busy"); 127 mp->mnt_flag &= ~MNT_MPBUSY; 128 if (mp->mnt_flag & MNT_MPWANT) { 129 mp->mnt_flag &= ~MNT_MPWANT; 130 wakeup((caddr_t)&mp->mnt_flag); 131 } 132 } 133 134 /* 135 * Lookup a mount point by filesystem identifier. 136 */ 137 struct mount * 138 getvfs(fsid) 139 fsid_t *fsid; 140 { 141 register struct mount *mp; 142 143 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 144 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 145 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) 146 return (mp); 147 } 148 return ((struct mount *)0); 149 } 150 151 /* 152 * Get a new unique fsid 153 */ 154 void 155 getnewfsid(mp, mtype) 156 struct mount *mp; 157 int mtype; 158 { 159 static u_short xxxfs_mntid; 160 161 fsid_t tfsid; 162 163 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 164 mp->mnt_stat.f_fsid.val[1] = mtype; 165 if (xxxfs_mntid == 0) 166 ++xxxfs_mntid; 167 tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid); 168 tfsid.val[1] = mtype; 169 if (mountlist.tqh_first != NULL) { 170 while (getvfs(&tfsid)) { 171 tfsid.val[0]++; 172 xxxfs_mntid++; 173 } 174 } 175 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 176 } 177 178 /* 179 * Set vnode attributes to VNOVAL 180 */ 181 void vattr_null(vap) 182 register struct vattr *vap; 183 { 184 185 vap->va_type = VNON; 186 vap->va_size = vap->va_bytes = VNOVAL; 187 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 188 vap->va_fsid = vap->va_fileid = 189 vap->va_blocksize = vap->va_rdev = 190 vap->va_atime.ts_sec = vap->va_atime.ts_nsec = 191 vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec = 192 vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec = 193 vap->va_flags = vap->va_gen = VNOVAL; 194 vap->va_vaflags = 0; 195 } 196 197 /* 198 * Routines having to do with the management of the vnode table. 199 */ 200 extern int (**dead_vnodeop_p)(); 201 extern void vclean(); 202 long numvnodes; 203 extern struct vattr va_null; 204 205 /* 206 * Return the next vnode from the free list. 207 */ 208 getnewvnode(tag, mp, vops, vpp) 209 enum vtagtype tag; 210 struct mount *mp; 211 int (**vops)(); 212 struct vnode **vpp; 213 { 214 register struct vnode *vp; 215 int s; 216 217 if ((vnode_free_list.tqh_first == NULL && 218 numvnodes < 2 * desiredvnodes) || 219 numvnodes < desiredvnodes) { 220 vp = (struct vnode *)malloc((u_long)sizeof *vp, 221 M_VNODE, M_WAITOK); 222 bzero((char *)vp, sizeof *vp); 223 numvnodes++; 224 } else { 225 if ((vp = vnode_free_list.tqh_first) == NULL) { 226 tablefull("vnode"); 227 *vpp = 0; 228 return (ENFILE); 229 } 230 if (vp->v_usecount) 231 panic("free vnode isn't"); 232 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 233 /* see comment on why 0xdeadb is set at end of vgone (below) */ 234 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 235 vp->v_lease = NULL; 236 if (vp->v_type != VBAD) 237 vgone(vp); 238 #ifdef DIAGNOSTIC 239 if (vp->v_data) 240 panic("cleaned vnode isn't"); 241 s = splbio(); 242 if (vp->v_numoutput) 243 panic("Clean vnode has pending I/O's"); 244 splx(s); 245 #endif 246 vp->v_flag = 0; 247 vp->v_lastr = 0; 248 vp->v_ralen = 0; 249 vp->v_maxra = 0; 250 vp->v_lastw = 0; 251 vp->v_lasta = 0; 252 vp->v_cstart = 0; 253 vp->v_clen = 0; 254 vp->v_socket = 0; 255 } 256 vp->v_type = VNON; 257 cache_purge(vp); 258 vp->v_tag = tag; 259 vp->v_op = vops; 260 insmntque(vp, mp); 261 *vpp = vp; 262 vp->v_usecount = 1; 263 vp->v_data = 0; 264 return (0); 265 } 266 267 /* 268 * Move a vnode from one mount queue to another. 269 */ 270 insmntque(vp, mp) 271 register struct vnode *vp; 272 register struct mount *mp; 273 { 274 275 /* 276 * Delete from old mount point vnode list, if on one. 277 */ 278 if (vp->v_mount != NULL) 279 LIST_REMOVE(vp, v_mntvnodes); 280 /* 281 * Insert into list of vnodes for the new mount point, if available. 282 */ 283 if ((vp->v_mount = mp) == NULL) 284 return; 285 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 286 } 287 288 /* 289 * Update outstanding I/O count and do wakeup if requested. 290 */ 291 vwakeup(bp) 292 register struct buf *bp; 293 { 294 register struct vnode *vp; 295 296 bp->b_flags &= ~B_WRITEINPROG; 297 if (vp = bp->b_vp) { 298 vp->v_numoutput--; 299 if (vp->v_numoutput < 0) 300 panic("vwakeup: neg numoutput"); 301 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 302 if (vp->v_numoutput < 0) 303 panic("vwakeup: neg numoutput"); 304 vp->v_flag &= ~VBWAIT; 305 wakeup((caddr_t)&vp->v_numoutput); 306 } 307 } 308 } 309 310 /* 311 * Flush out and invalidate all buffers associated with a vnode. 312 * Called with the underlying object locked. 313 */ 314 int 315 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 316 register struct vnode *vp; 317 int flags; 318 struct ucred *cred; 319 struct proc *p; 320 int slpflag, slptimeo; 321 { 322 register struct buf *bp; 323 struct buf *nbp, *blist; 324 int s, error; 325 326 if (flags & V_SAVE) { 327 if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p)) 328 return (error); 329 if (vp->v_dirtyblkhd.lh_first != NULL) 330 panic("vinvalbuf: dirty bufs"); 331 } 332 for (;;) { 333 if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA) 334 while (blist && blist->b_lblkno < 0) 335 blist = blist->b_vnbufs.le_next; 336 if (!blist && (blist = vp->v_dirtyblkhd.lh_first) && 337 (flags & V_SAVEMETA)) 338 while (blist && blist->b_lblkno < 0) 339 blist = blist->b_vnbufs.le_next; 340 if (!blist) 341 break; 342 343 for (bp = blist; bp; bp = nbp) { 344 nbp = bp->b_vnbufs.le_next; 345 if (flags & V_SAVEMETA && bp->b_lblkno < 0) 346 continue; 347 s = splbio(); 348 if (bp->b_flags & B_BUSY) { 349 bp->b_flags |= B_WANTED; 350 error = tsleep((caddr_t)bp, 351 slpflag | (PRIBIO + 1), "vinvalbuf", 352 slptimeo); 353 splx(s); 354 if (error) 355 return (error); 356 break; 357 } 358 bremfree(bp); 359 bp->b_flags |= B_BUSY; 360 splx(s); 361 /* 362 * XXX Since there are no node locks for NFS, I believe 363 * there is a slight chance that a delayed write will 364 * occur while sleeping just above, so check for it. 365 */ 366 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 367 (void) VOP_BWRITE(bp); 368 break; 369 } 370 bp->b_flags |= B_INVAL; 371 brelse(bp); 372 } 373 } 374 if (!(flags & V_SAVEMETA) && 375 (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) 376 panic("vinvalbuf: flush failed"); 377 return (0); 378 } 379 380 /* 381 * Associate a buffer with a vnode. 382 */ 383 bgetvp(vp, bp) 384 register struct vnode *vp; 385 register struct buf *bp; 386 { 387 388 if (bp->b_vp) 389 panic("bgetvp: not free"); 390 VHOLD(vp); 391 bp->b_vp = vp; 392 if (vp->v_type == VBLK || vp->v_type == VCHR) 393 bp->b_dev = vp->v_rdev; 394 else 395 bp->b_dev = NODEV; 396 /* 397 * Insert onto list for new vnode. 398 */ 399 bufinsvn(bp, &vp->v_cleanblkhd); 400 } 401 402 /* 403 * Disassociate a buffer from a vnode. 404 */ 405 brelvp(bp) 406 register struct buf *bp; 407 { 408 struct vnode *vp; 409 410 if (bp->b_vp == (struct vnode *) 0) 411 panic("brelvp: NULL"); 412 /* 413 * Delete from old vnode list, if on one. 414 */ 415 if (bp->b_vnbufs.le_next != NOLIST) 416 bufremvn(bp); 417 vp = bp->b_vp; 418 bp->b_vp = (struct vnode *) 0; 419 HOLDRELE(vp); 420 } 421 422 /* 423 * Reassign a buffer from one vnode to another. 424 * Used to assign file specific control information 425 * (indirect blocks) to the vnode to which they belong. 426 */ 427 reassignbuf(bp, newvp) 428 register struct buf *bp; 429 register struct vnode *newvp; 430 { 431 register struct buflists *listheadp; 432 433 if (newvp == NULL) { 434 printf("reassignbuf: NULL"); 435 return; 436 } 437 /* 438 * Delete from old vnode list, if on one. 439 */ 440 if (bp->b_vnbufs.le_next != NOLIST) 441 bufremvn(bp); 442 /* 443 * If dirty, put on list of dirty buffers; 444 * otherwise insert onto list of clean buffers. 445 */ 446 if (bp->b_flags & B_DELWRI) 447 listheadp = &newvp->v_dirtyblkhd; 448 else 449 listheadp = &newvp->v_cleanblkhd; 450 bufinsvn(bp, listheadp); 451 } 452 453 /* 454 * Create a vnode for a block device. 455 * Used for root filesystem, argdev, and swap areas. 456 * Also used for memory file system special devices. 457 */ 458 bdevvp(dev, vpp) 459 dev_t dev; 460 struct vnode **vpp; 461 { 462 register struct vnode *vp; 463 struct vnode *nvp; 464 int error; 465 466 if (dev == NODEV) 467 return (0); 468 error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp); 469 if (error) { 470 *vpp = 0; 471 return (error); 472 } 473 vp = nvp; 474 vp->v_type = VBLK; 475 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 476 vput(vp); 477 vp = nvp; 478 } 479 *vpp = vp; 480 return (0); 481 } 482 483 /* 484 * Check to see if the new vnode represents a special device 485 * for which we already have a vnode (either because of 486 * bdevvp() or because of a different vnode representing 487 * the same block device). If such an alias exists, deallocate 488 * the existing contents and return the aliased vnode. The 489 * caller is responsible for filling it with its new contents. 490 */ 491 struct vnode * 492 checkalias(nvp, nvp_rdev, mp) 493 register struct vnode *nvp; 494 dev_t nvp_rdev; 495 struct mount *mp; 496 { 497 register struct vnode *vp; 498 struct vnode **vpp; 499 500 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 501 return (NULLVP); 502 503 vpp = &speclisth[SPECHASH(nvp_rdev)]; 504 loop: 505 for (vp = *vpp; vp; vp = vp->v_specnext) { 506 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 507 continue; 508 /* 509 * Alias, but not in use, so flush it out. 510 */ 511 if (vp->v_usecount == 0) { 512 vgone(vp); 513 goto loop; 514 } 515 if (vget(vp, 1)) 516 goto loop; 517 break; 518 } 519 if (vp == NULL || vp->v_tag != VT_NON) { 520 MALLOC(nvp->v_specinfo, struct specinfo *, 521 sizeof(struct specinfo), M_VNODE, M_WAITOK); 522 nvp->v_rdev = nvp_rdev; 523 nvp->v_hashchain = vpp; 524 nvp->v_specnext = *vpp; 525 nvp->v_specflags = 0; 526 *vpp = nvp; 527 if (vp != NULL) { 528 nvp->v_flag |= VALIASED; 529 vp->v_flag |= VALIASED; 530 vput(vp); 531 } 532 return (NULLVP); 533 } 534 VOP_UNLOCK(vp); 535 vclean(vp, 0); 536 vp->v_op = nvp->v_op; 537 vp->v_tag = nvp->v_tag; 538 nvp->v_type = VNON; 539 insmntque(vp, mp); 540 return (vp); 541 } 542 543 /* 544 * Grab a particular vnode from the free list, increment its 545 * reference count and lock it. The vnode lock bit is set the 546 * vnode is being eliminated in vgone. The process is awakened 547 * when the transition is completed, and an error returned to 548 * indicate that the vnode is no longer usable (possibly having 549 * been changed to a new file system type). 550 */ 551 vget(vp, lockflag) 552 register struct vnode *vp; 553 int lockflag; 554 { 555 556 if (vp->v_flag & VXLOCK) { 557 vp->v_flag |= VXWANT; 558 sleep((caddr_t)vp, PINOD); 559 return (1); 560 } 561 if (vp->v_usecount == 0) 562 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 563 vp->v_usecount++; 564 if (lockflag) 565 VOP_LOCK(vp); 566 return (0); 567 } 568 569 /* 570 * Vnode reference, just increment the count 571 */ 572 void vref(vp) 573 struct vnode *vp; 574 { 575 576 if (vp->v_usecount <= 0) 577 panic("vref used where vget required"); 578 vp->v_usecount++; 579 } 580 581 /* 582 * vput(), just unlock and vrele() 583 */ 584 void vput(vp) 585 register struct vnode *vp; 586 { 587 588 VOP_UNLOCK(vp); 589 vrele(vp); 590 } 591 592 /* 593 * Vnode release. 594 * If count drops to zero, call inactive routine and return to freelist. 595 */ 596 void vrele(vp) 597 register struct vnode *vp; 598 { 599 600 #ifdef DIAGNOSTIC 601 if (vp == NULL) 602 panic("vrele: null vp"); 603 #endif 604 vp->v_usecount--; 605 if (vp->v_usecount > 0) 606 return; 607 #ifdef DIAGNOSTIC 608 if (vp->v_usecount != 0 || vp->v_writecount != 0) { 609 vprint("vrele: bad ref count", vp); 610 panic("vrele: ref cnt"); 611 } 612 #endif 613 /* 614 * insert at tail of LRU list 615 */ 616 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 617 VOP_INACTIVE(vp); 618 } 619 620 /* 621 * Page or buffer structure gets a reference. 622 */ 623 void vhold(vp) 624 register struct vnode *vp; 625 { 626 627 vp->v_holdcnt++; 628 } 629 630 /* 631 * Page or buffer structure frees a reference. 632 */ 633 void holdrele(vp) 634 register struct vnode *vp; 635 { 636 637 if (vp->v_holdcnt <= 0) 638 panic("holdrele: holdcnt"); 639 vp->v_holdcnt--; 640 } 641 642 /* 643 * Remove any vnodes in the vnode table belonging to mount point mp. 644 * 645 * If MNT_NOFORCE is specified, there should not be any active ones, 646 * return error if any are found (nb: this is a user error, not a 647 * system error). If MNT_FORCE is specified, detach any active vnodes 648 * that are found. 649 */ 650 #ifdef DIAGNOSTIC 651 int busyprt = 0; /* print out busy vnodes */ 652 struct ctldebug debug1 = { "busyprt", &busyprt }; 653 #endif 654 655 vflush(mp, skipvp, flags) 656 struct mount *mp; 657 struct vnode *skipvp; 658 int flags; 659 { 660 register struct vnode *vp, *nvp; 661 int busy = 0; 662 663 if ((mp->mnt_flag & MNT_MPBUSY) == 0) 664 panic("vflush: not busy"); 665 loop: 666 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 667 if (vp->v_mount != mp) 668 goto loop; 669 nvp = vp->v_mntvnodes.le_next; 670 /* 671 * Skip over a selected vnode. 672 */ 673 if (vp == skipvp) 674 continue; 675 /* 676 * Skip over a vnodes marked VSYSTEM. 677 */ 678 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) 679 continue; 680 /* 681 * If WRITECLOSE is set, only flush out regular file 682 * vnodes open for writing. 683 */ 684 if ((flags & WRITECLOSE) && 685 (vp->v_writecount == 0 || vp->v_type != VREG)) 686 continue; 687 /* 688 * With v_usecount == 0, all we need to do is clear 689 * out the vnode data structures and we are done. 690 */ 691 if (vp->v_usecount == 0) { 692 vgone(vp); 693 continue; 694 } 695 /* 696 * If FORCECLOSE is set, forcibly close the vnode. 697 * For block or character devices, revert to an 698 * anonymous device. For all other files, just kill them. 699 */ 700 if (flags & FORCECLOSE) { 701 if (vp->v_type != VBLK && vp->v_type != VCHR) { 702 vgone(vp); 703 } else { 704 vclean(vp, 0); 705 vp->v_op = spec_vnodeop_p; 706 insmntque(vp, (struct mount *)0); 707 } 708 continue; 709 } 710 #ifdef DIAGNOSTIC 711 if (busyprt) 712 vprint("vflush: busy vnode", vp); 713 #endif 714 busy++; 715 } 716 if (busy) 717 return (EBUSY); 718 return (0); 719 } 720 721 /* 722 * Disassociate the underlying file system from a vnode. 723 */ 724 void 725 vclean(vp, flags) 726 register struct vnode *vp; 727 int flags; 728 { 729 int active; 730 731 /* 732 * Check to see if the vnode is in use. 733 * If so we have to reference it before we clean it out 734 * so that its count cannot fall to zero and generate a 735 * race against ourselves to recycle it. 736 */ 737 if (active = vp->v_usecount) 738 VREF(vp); 739 /* 740 * Even if the count is zero, the VOP_INACTIVE routine may still 741 * have the object locked while it cleans it out. The VOP_LOCK 742 * ensures that the VOP_INACTIVE routine is done with its work. 743 * For active vnodes, it ensures that no other activity can 744 * occur while the underlying object is being cleaned out. 745 */ 746 VOP_LOCK(vp); 747 /* 748 * Prevent the vnode from being recycled or 749 * brought into use while we clean it out. 750 */ 751 if (vp->v_flag & VXLOCK) 752 panic("vclean: deadlock"); 753 vp->v_flag |= VXLOCK; 754 /* 755 * Clean out any buffers associated with the vnode. 756 */ 757 if (flags & DOCLOSE) 758 vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); 759 /* 760 * Any other processes trying to obtain this lock must first 761 * wait for VXLOCK to clear, then call the new lock operation. 762 */ 763 VOP_UNLOCK(vp); 764 /* 765 * If purging an active vnode, it must be closed and 766 * deactivated before being reclaimed. 767 */ 768 if (active) { 769 if (flags & DOCLOSE) 770 VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL); 771 VOP_INACTIVE(vp); 772 } 773 /* 774 * Reclaim the vnode. 775 */ 776 if (VOP_RECLAIM(vp)) 777 panic("vclean: cannot reclaim"); 778 if (active) 779 vrele(vp); 780 781 /* 782 * Done with purge, notify sleepers of the grim news. 783 */ 784 vp->v_op = dead_vnodeop_p; 785 vp->v_tag = VT_NON; 786 vp->v_flag &= ~VXLOCK; 787 if (vp->v_flag & VXWANT) { 788 vp->v_flag &= ~VXWANT; 789 wakeup((caddr_t)vp); 790 } 791 } 792 793 /* 794 * Eliminate all activity associated with the requested vnode 795 * and with all vnodes aliased to the requested vnode. 796 */ 797 void vgoneall(vp) 798 register struct vnode *vp; 799 { 800 register struct vnode *vq; 801 802 if (vp->v_flag & VALIASED) { 803 /* 804 * If a vgone (or vclean) is already in progress, 805 * wait until it is done and return. 806 */ 807 if (vp->v_flag & VXLOCK) { 808 vp->v_flag |= VXWANT; 809 sleep((caddr_t)vp, PINOD); 810 return; 811 } 812 /* 813 * Ensure that vp will not be vgone'd while we 814 * are eliminating its aliases. 815 */ 816 vp->v_flag |= VXLOCK; 817 while (vp->v_flag & VALIASED) { 818 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 819 if (vq->v_rdev != vp->v_rdev || 820 vq->v_type != vp->v_type || vp == vq) 821 continue; 822 vgone(vq); 823 break; 824 } 825 } 826 /* 827 * Remove the lock so that vgone below will 828 * really eliminate the vnode after which time 829 * vgone will awaken any sleepers. 830 */ 831 vp->v_flag &= ~VXLOCK; 832 } 833 vgone(vp); 834 } 835 836 /* 837 * Eliminate all activity associated with a vnode 838 * in preparation for reuse. 839 */ 840 void vgone(vp) 841 register struct vnode *vp; 842 { 843 register struct vnode *vq; 844 struct vnode *vx; 845 846 /* 847 * If a vgone (or vclean) is already in progress, 848 * wait until it is done and return. 849 */ 850 if (vp->v_flag & VXLOCK) { 851 vp->v_flag |= VXWANT; 852 sleep((caddr_t)vp, PINOD); 853 return; 854 } 855 /* 856 * Clean out the filesystem specific data. 857 */ 858 vclean(vp, DOCLOSE); 859 /* 860 * Delete from old mount point vnode list, if on one. 861 */ 862 if (vp->v_mount != NULL) { 863 LIST_REMOVE(vp, v_mntvnodes); 864 vp->v_mount = NULL; 865 } 866 /* 867 * If special device, remove it from special device alias list. 868 */ 869 if (vp->v_type == VBLK || vp->v_type == VCHR) { 870 if (*vp->v_hashchain == vp) { 871 *vp->v_hashchain = vp->v_specnext; 872 } else { 873 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 874 if (vq->v_specnext != vp) 875 continue; 876 vq->v_specnext = vp->v_specnext; 877 break; 878 } 879 if (vq == NULL) 880 panic("missing bdev"); 881 } 882 if (vp->v_flag & VALIASED) { 883 vx = NULL; 884 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 885 if (vq->v_rdev != vp->v_rdev || 886 vq->v_type != vp->v_type) 887 continue; 888 if (vx) 889 break; 890 vx = vq; 891 } 892 if (vx == NULL) 893 panic("missing alias"); 894 if (vq == NULL) 895 vx->v_flag &= ~VALIASED; 896 vp->v_flag &= ~VALIASED; 897 } 898 FREE(vp->v_specinfo, M_VNODE); 899 vp->v_specinfo = NULL; 900 } 901 /* 902 * If it is on the freelist and not already at the head, 903 * move it to the head of the list. The test of the back 904 * pointer and the reference count of zero is because 905 * it will be removed from the free list by getnewvnode, 906 * but will not have its reference count incremented until 907 * after calling vgone. If the reference count were 908 * incremented first, vgone would (incorrectly) try to 909 * close the previous instance of the underlying object. 910 * So, the back pointer is explicitly set to `0xdeadb' in 911 * getnewvnode after removing it from the freelist to ensure 912 * that we do not try to move it here. 913 */ 914 if (vp->v_usecount == 0 && 915 vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 916 vnode_free_list.tqh_first != vp) { 917 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 918 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 919 } 920 vp->v_type = VBAD; 921 } 922 923 /* 924 * Lookup a vnode by device number. 925 */ 926 vfinddev(dev, type, vpp) 927 dev_t dev; 928 enum vtype type; 929 struct vnode **vpp; 930 { 931 register struct vnode *vp; 932 933 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 934 if (dev != vp->v_rdev || type != vp->v_type) 935 continue; 936 *vpp = vp; 937 return (1); 938 } 939 return (0); 940 } 941 942 /* 943 * Calculate the total number of references to a special device. 944 */ 945 vcount(vp) 946 register struct vnode *vp; 947 { 948 register struct vnode *vq; 949 int count; 950 951 if ((vp->v_flag & VALIASED) == 0) 952 return (vp->v_usecount); 953 loop: 954 for (count = 0, vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 955 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 956 continue; 957 /* 958 * Alias, but not in use, so flush it out. 959 */ 960 if (vq->v_usecount == 0) { 961 vgone(vq); 962 goto loop; 963 } 964 count += vq->v_usecount; 965 } 966 return (count); 967 } 968 969 /* 970 * Print out a description of a vnode. 971 */ 972 static char *typename[] = 973 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 974 975 vprint(label, vp) 976 char *label; 977 register struct vnode *vp; 978 { 979 char buf[64]; 980 981 if (label != NULL) 982 printf("%s: ", label); 983 printf("type %s, usecount %d, writecount %d, refcount %d,", 984 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 985 vp->v_holdcnt); 986 buf[0] = '\0'; 987 if (vp->v_flag & VROOT) 988 strcat(buf, "|VROOT"); 989 if (vp->v_flag & VTEXT) 990 strcat(buf, "|VTEXT"); 991 if (vp->v_flag & VSYSTEM) 992 strcat(buf, "|VSYSTEM"); 993 if (vp->v_flag & VXLOCK) 994 strcat(buf, "|VXLOCK"); 995 if (vp->v_flag & VXWANT) 996 strcat(buf, "|VXWANT"); 997 if (vp->v_flag & VBWAIT) 998 strcat(buf, "|VBWAIT"); 999 if (vp->v_flag & VALIASED) 1000 strcat(buf, "|VALIASED"); 1001 if (buf[0] != '\0') 1002 printf(" flags (%s)", &buf[1]); 1003 if (vp->v_data == NULL) { 1004 printf("\n"); 1005 } else { 1006 printf("\n\t"); 1007 VOP_PRINT(vp); 1008 } 1009 } 1010 1011 #ifdef DEBUG 1012 /* 1013 * List all of the locked vnodes in the system. 1014 * Called when debugging the kernel. 1015 */ 1016 printlockedvnodes() 1017 { 1018 register struct mount *mp; 1019 register struct vnode *vp; 1020 1021 printf("Locked vnodes\n"); 1022 for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) { 1023 for (vp = mp->mnt_vnodelist.lh_first; 1024 vp != NULL; 1025 vp = vp->v_mntvnodes.le_next) 1026 if (VOP_ISLOCKED(vp)) 1027 vprint((char *)0, vp); 1028 } 1029 } 1030 #endif 1031 1032 int kinfo_vdebug = 1; 1033 int kinfo_vgetfailed; 1034 #define KINFO_VNODESLOP 10 1035 /* 1036 * Dump vnode list (via sysctl). 1037 * Copyout address of vnode followed by vnode. 1038 */ 1039 /* ARGSUSED */ 1040 sysctl_vnode(where, sizep) 1041 char *where; 1042 size_t *sizep; 1043 { 1044 register struct mount *mp, *nmp; 1045 struct vnode *vp; 1046 register char *bp = where, *savebp; 1047 char *ewhere; 1048 int error; 1049 1050 #define VPTRSZ sizeof (struct vnode *) 1051 #define VNODESZ sizeof (struct vnode) 1052 if (where == NULL) { 1053 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1054 return (0); 1055 } 1056 ewhere = where + *sizep; 1057 1058 for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) { 1059 nmp = mp->mnt_list.tqe_next; 1060 if (vfs_busy(mp)) 1061 continue; 1062 savebp = bp; 1063 again: 1064 for (vp = mp->mnt_vnodelist.lh_first; 1065 vp != NULL; 1066 vp = vp->v_mntvnodes.le_next) { 1067 /* 1068 * Check that the vp is still associated with 1069 * this filesystem. RACE: could have been 1070 * recycled onto the same filesystem. 1071 */ 1072 if (vp->v_mount != mp) { 1073 if (kinfo_vdebug) 1074 printf("kinfo: vp changed\n"); 1075 bp = savebp; 1076 goto again; 1077 } 1078 if (bp + VPTRSZ + VNODESZ > ewhere) { 1079 *sizep = bp - where; 1080 return (ENOMEM); 1081 } 1082 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 1083 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 1084 return (error); 1085 bp += VPTRSZ + VNODESZ; 1086 } 1087 vfs_unbusy(mp); 1088 } 1089 1090 *sizep = bp - where; 1091 return (0); 1092 } 1093 1094 /* 1095 * Check to see if a filesystem is mounted on a block device. 1096 */ 1097 int 1098 vfs_mountedon(vp) 1099 register struct vnode *vp; 1100 { 1101 register struct vnode *vq; 1102 1103 if (vp->v_specflags & SI_MOUNTEDON) 1104 return (EBUSY); 1105 if (vp->v_flag & VALIASED) { 1106 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 1107 if (vq->v_rdev != vp->v_rdev || 1108 vq->v_type != vp->v_type) 1109 continue; 1110 if (vq->v_specflags & SI_MOUNTEDON) 1111 return (EBUSY); 1112 } 1113 } 1114 return (0); 1115 } 1116 1117 /* 1118 * Build hash lists of net addresses and hang them off the mount point. 1119 * Called by ufs_mount() to set up the lists of export addresses. 1120 */ 1121 static int 1122 vfs_hang_addrlist(mp, nep, argp) 1123 struct mount *mp; 1124 struct netexport *nep; 1125 struct export_args *argp; 1126 { 1127 register struct netcred *np; 1128 register struct radix_node_head *rnh; 1129 register int i; 1130 struct radix_node *rn; 1131 struct sockaddr *saddr, *smask = 0; 1132 struct domain *dom; 1133 int error; 1134 1135 if (argp->ex_addrlen == 0) { 1136 if (mp->mnt_flag & MNT_DEFEXPORTED) 1137 return (EPERM); 1138 np = &nep->ne_defexported; 1139 np->netc_exflags = argp->ex_flags; 1140 np->netc_anon = argp->ex_anon; 1141 np->netc_anon.cr_ref = 1; 1142 mp->mnt_flag |= MNT_DEFEXPORTED; 1143 return (0); 1144 } 1145 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1146 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 1147 bzero((caddr_t)np, i); 1148 saddr = (struct sockaddr *)(np + 1); 1149 if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen)) 1150 goto out; 1151 if (saddr->sa_len > argp->ex_addrlen) 1152 saddr->sa_len = argp->ex_addrlen; 1153 if (argp->ex_masklen) { 1154 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1155 error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen); 1156 if (error) 1157 goto out; 1158 if (smask->sa_len > argp->ex_masklen) 1159 smask->sa_len = argp->ex_masklen; 1160 } 1161 i = saddr->sa_family; 1162 if ((rnh = nep->ne_rtable[i]) == 0) { 1163 /* 1164 * Seems silly to initialize every AF when most are not 1165 * used, do so on demand here 1166 */ 1167 for (dom = domains; dom; dom = dom->dom_next) 1168 if (dom->dom_family == i && dom->dom_rtattach) { 1169 dom->dom_rtattach((void **)&nep->ne_rtable[i], 1170 dom->dom_rtoffset); 1171 break; 1172 } 1173 if ((rnh = nep->ne_rtable[i]) == 0) { 1174 error = ENOBUFS; 1175 goto out; 1176 } 1177 } 1178 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 1179 np->netc_rnodes); 1180 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 1181 error = EPERM; 1182 goto out; 1183 } 1184 np->netc_exflags = argp->ex_flags; 1185 np->netc_anon = argp->ex_anon; 1186 np->netc_anon.cr_ref = 1; 1187 return (0); 1188 out: 1189 free(np, M_NETADDR); 1190 return (error); 1191 } 1192 1193 /* ARGSUSED */ 1194 static int 1195 vfs_free_netcred(rn, w) 1196 struct radix_node *rn; 1197 caddr_t w; 1198 { 1199 register struct radix_node_head *rnh = (struct radix_node_head *)w; 1200 1201 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 1202 free((caddr_t)rn, M_NETADDR); 1203 return (0); 1204 } 1205 1206 /* 1207 * Free the net address hash lists that are hanging off the mount points. 1208 */ 1209 static void 1210 vfs_free_addrlist(nep) 1211 struct netexport *nep; 1212 { 1213 register int i; 1214 register struct radix_node_head *rnh; 1215 1216 for (i = 0; i <= AF_MAX; i++) 1217 if (rnh = nep->ne_rtable[i]) { 1218 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, 1219 (caddr_t)rnh); 1220 free((caddr_t)rnh, M_RTABLE); 1221 nep->ne_rtable[i] = 0; 1222 } 1223 } 1224 1225 int 1226 vfs_export(mp, nep, argp) 1227 struct mount *mp; 1228 struct netexport *nep; 1229 struct export_args *argp; 1230 { 1231 int error; 1232 1233 if (argp->ex_flags & MNT_DELEXPORT) { 1234 vfs_free_addrlist(nep); 1235 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1236 } 1237 if (argp->ex_flags & MNT_EXPORTED) { 1238 if (error = vfs_hang_addrlist(mp, nep, argp)) 1239 return (error); 1240 mp->mnt_flag |= MNT_EXPORTED; 1241 } 1242 return (0); 1243 } 1244 1245 struct netcred * 1246 vfs_export_lookup(mp, nep, nam) 1247 register struct mount *mp; 1248 struct netexport *nep; 1249 struct mbuf *nam; 1250 { 1251 register struct netcred *np; 1252 register struct radix_node_head *rnh; 1253 struct sockaddr *saddr; 1254 1255 np = NULL; 1256 if (mp->mnt_flag & MNT_EXPORTED) { 1257 /* 1258 * Lookup in the export list first. 1259 */ 1260 if (nam != NULL) { 1261 saddr = mtod(nam, struct sockaddr *); 1262 rnh = nep->ne_rtable[saddr->sa_family]; 1263 if (rnh != NULL) { 1264 np = (struct netcred *) 1265 (*rnh->rnh_matchaddr)((caddr_t)saddr, 1266 rnh); 1267 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1268 np = NULL; 1269 } 1270 } 1271 /* 1272 * If no address match, use the default if it exists. 1273 */ 1274 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1275 np = &nep->ne_defexported; 1276 } 1277 return (np); 1278 } 1279