1 /* 2 * Copyright (c) 1989 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms are permitted 6 * provided that the above copyright notice and this paragraph are 7 * duplicated in all such forms and that any documentation, 8 * advertising materials, and other materials related to such 9 * distribution and use acknowledge that the software was developed 10 * by the University of California, Berkeley. The name of the 11 * University may not be used to endorse or promote products derived 12 * from this software without specific prior written permission. 13 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 * 17 * @(#)vfs_subr.c 7.22 (Berkeley) 12/31/89 18 */ 19 20 /* 21 * External virtual filesystem routines 22 */ 23 24 #include "param.h" 25 #include "mount.h" 26 #include "time.h" 27 #include "vnode.h" 28 #include "namei.h" 29 #include "ucred.h" 30 #include "errno.h" 31 #include "malloc.h" 32 33 /* 34 * Remove a mount point from the list of mounted filesystems. 35 * Unmount of the root is illegal. 36 */ 37 void 38 vfs_remove(mp) 39 register struct mount *mp; 40 { 41 42 if (mp == rootfs) 43 panic("vfs_remove: unmounting root"); 44 mp->m_prev->m_next = mp->m_next; 45 mp->m_next->m_prev = mp->m_prev; 46 mp->m_vnodecovered->v_mountedhere = (struct mount *)0; 47 vfs_unlock(mp); 48 } 49 50 /* 51 * Lock a filesystem. 52 * Used to prevent access to it while mounting and unmounting. 53 */ 54 vfs_lock(mp) 55 register struct mount *mp; 56 { 57 58 while(mp->m_flag & M_MLOCK) { 59 mp->m_flag |= M_MWAIT; 60 sleep((caddr_t)mp, PVFS); 61 } 62 mp->m_flag |= M_MLOCK; 63 return (0); 64 } 65 66 /* 67 * Unlock a locked filesystem. 68 * Panic if filesystem is not locked. 69 */ 70 void 71 vfs_unlock(mp) 72 register struct mount *mp; 73 { 74 75 if ((mp->m_flag & M_MLOCK) == 0) 76 panic("vfs_unlock: locked fs"); 77 mp->m_flag &= ~M_MLOCK; 78 if (mp->m_flag & M_MWAIT) { 79 mp->m_flag &= ~M_MWAIT; 80 wakeup((caddr_t)mp); 81 } 82 } 83 84 /* 85 * Lookup a mount point by filesystem identifier. 86 */ 87 struct mount * 88 getvfs(fsid) 89 fsid_t *fsid; 90 { 91 register struct mount *mp; 92 93 mp = rootfs; 94 do { 95 if (mp->m_fsid.val[0] == fsid->val[0] && 96 mp->m_fsid.val[1] == fsid->val[1]) { 97 return (mp); 98 } 99 mp = mp->m_next; 100 } while (mp != rootfs); 101 return ((struct mount *)0); 102 } 103 104 /* 105 * Set vnode attributes to VNOVAL 106 */ 107 void vattr_null(vap) 108 register struct vattr *vap; 109 { 110 111 vap->va_type = VNON; 112 vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid = 113 vap->va_fsid = vap->va_fileid = vap->va_size = 114 vap->va_size1 = vap->va_blocksize = vap->va_rdev = 115 vap->va_bytes = vap->va_bytes1 = 116 vap->va_atime.tv_sec = vap->va_atime.tv_usec = 117 vap->va_mtime.tv_sec = vap->va_mtime.tv_usec = 118 vap->va_ctime.tv_sec = vap->va_ctime.tv_usec = 119 vap->va_flags = vap->va_gen = VNOVAL; 120 } 121 122 /* 123 * Initialize a nameidata structure 124 */ 125 ndinit(ndp) 126 register struct nameidata *ndp; 127 { 128 129 bzero((caddr_t)ndp, sizeof(struct nameidata)); 130 ndp->ni_iov = &ndp->ni_nd.nd_iovec; 131 ndp->ni_iovcnt = 1; 132 ndp->ni_base = (caddr_t)&ndp->ni_dent; 133 ndp->ni_rw = UIO_WRITE; 134 ndp->ni_uioseg = UIO_SYSSPACE; 135 } 136 137 /* 138 * Duplicate a nameidata structure 139 */ 140 nddup(ndp, newndp) 141 register struct nameidata *ndp, *newndp; 142 { 143 144 ndinit(newndp); 145 newndp->ni_cdir = ndp->ni_cdir; 146 VREF(newndp->ni_cdir); 147 newndp->ni_rdir = ndp->ni_rdir; 148 if (newndp->ni_rdir) 149 VREF(newndp->ni_rdir); 150 newndp->ni_cred = ndp->ni_cred; 151 crhold(newndp->ni_cred); 152 } 153 154 /* 155 * Release a nameidata structure 156 */ 157 ndrele(ndp) 158 register struct nameidata *ndp; 159 { 160 161 vrele(ndp->ni_cdir); 162 if (ndp->ni_rdir) 163 vrele(ndp->ni_rdir); 164 crfree(ndp->ni_cred); 165 } 166 167 /* 168 * Routines having to do with the management of the vnode table. 169 */ 170 struct vnode *vfreeh, **vfreet; 171 extern struct vnodeops dead_vnodeops, spec_vnodeops; 172 extern void vclean(); 173 174 #define SPECHSZ 64 175 #if ((SPECHSZ&(SPECHSZ-1)) == 0) 176 #define SPECHASH(rdev) (((rdev>>5)+(rdev))&(SPECHSZ-1)) 177 #else 178 #define SPECHASH(rdev) (((unsigned)((rdev>>5)+(rdev)))%SPECHSZ) 179 #endif 180 struct vnode *speclisth[SPECHSZ]; 181 182 /* 183 * Initialize the vnode structures and initialize each file system type. 184 */ 185 vfsinit() 186 { 187 register struct vnode *vp = vnode; 188 struct vfsops **vfsp; 189 190 /* 191 * Build vnode free list. 192 */ 193 vfreeh = vp; 194 vfreet = &vp->v_freef; 195 vp->v_freeb = &vfreeh; 196 vp->v_op = &dead_vnodeops; 197 for (vp++; vp < vnodeNVNODE; vp++) { 198 *vfreet = vp; 199 vp->v_freeb = vfreet; 200 vfreet = &vp->v_freef; 201 vp->v_op = &dead_vnodeops; 202 } 203 vp--; 204 vp->v_freef = NULL; 205 /* 206 * Initialize the vnode name cache 207 */ 208 nchinit(); 209 /* 210 * Initialize each file system type. 211 */ 212 for (vfsp = &vfssw[0]; vfsp <= &vfssw[MOUNT_MAXTYPE]; vfsp++) { 213 if (*vfsp == NULL) 214 continue; 215 (*(*vfsp)->vfs_init)(); 216 } 217 } 218 219 /* 220 * Return the next vnode from the free list. 221 */ 222 getnewvnode(tag, mp, vops, vpp) 223 enum vtagtype tag; 224 struct mount *mp; 225 struct vnodeops *vops; 226 struct vnode **vpp; 227 { 228 register struct vnode *vp, *vq; 229 230 if ((vp = vfreeh) == NULL) { 231 tablefull("vnode"); 232 *vpp = 0; 233 return (ENFILE); 234 } 235 if (vp->v_usecount) 236 panic("free vnode isn't"); 237 if (vq = vp->v_freef) 238 vq->v_freeb = &vfreeh; 239 vfreeh = vq; 240 vp->v_freef = NULL; 241 vp->v_freeb = NULL; 242 if (vp->v_type != VNON && vp->v_type != VBAD) 243 vgone(vp); 244 vp->v_type = VNON; 245 vp->v_flag = 0; 246 vp->v_shlockc = 0; 247 vp->v_exlockc = 0; 248 vp->v_lastr = 0; 249 vp->v_socket = 0; 250 cache_purge(vp); 251 vp->v_tag = tag; 252 vp->v_op = vops; 253 insmntque(vp, mp); 254 VREF(vp); 255 *vpp = vp; 256 return (0); 257 } 258 259 /* 260 * Move a vnode from one mount queue to another. 261 */ 262 insmntque(vp, mp) 263 register struct vnode *vp; 264 register struct mount *mp; 265 { 266 struct vnode *vq; 267 268 /* 269 * Delete from old mount point vnode list, if on one. 270 */ 271 if (vp->v_mountb) { 272 if (vq = vp->v_mountf) 273 vq->v_mountb = vp->v_mountb; 274 *vp->v_mountb = vq; 275 } 276 /* 277 * Insert into list of vnodes for the new mount point, if available. 278 */ 279 vp->v_mount = mp; 280 if (mp == NULL) { 281 vp->v_mountf = NULL; 282 vp->v_mountb = NULL; 283 return; 284 } 285 if (mp->m_mounth) { 286 vp->v_mountf = mp->m_mounth; 287 vp->v_mountb = &mp->m_mounth; 288 mp->m_mounth->v_mountb = &vp->v_mountf; 289 mp->m_mounth = vp; 290 } else { 291 mp->m_mounth = vp; 292 vp->v_mountb = &mp->m_mounth; 293 vp->v_mountf = NULL; 294 } 295 } 296 297 /* 298 * Create a vnode for a block device. 299 * Used for root filesystem, argdev, and swap areas. 300 * Also used for memory file system special devices. 301 */ 302 bdevvp(dev, vpp) 303 dev_t dev; 304 struct vnode **vpp; 305 { 306 register struct vnode *vp; 307 struct vnode *nvp; 308 int error; 309 310 error = getnewvnode(VT_NON, (struct mount *)0, &spec_vnodeops, &nvp); 311 if (error) { 312 *vpp = 0; 313 return (error); 314 } 315 vp = nvp; 316 vp->v_type = VBLK; 317 if (nvp = checkalias(vp, dev, (struct mount *)0)) { 318 vput(vp); 319 vp = nvp; 320 } 321 *vpp = vp; 322 return (0); 323 } 324 325 /* 326 * Check to see if the new vnode represents a special device 327 * for which we already have a vnode (either because of 328 * bdevvp() or because of a different vnode representing 329 * the same block device). If such an alias exists, deallocate 330 * the existing contents and return the aliased vnode. The 331 * caller is responsible for filling it with its new contents. 332 */ 333 struct vnode * 334 checkalias(nvp, nvp_rdev, mp) 335 register struct vnode *nvp; 336 dev_t nvp_rdev; 337 struct mount *mp; 338 { 339 register struct vnode *vp; 340 struct vnode **vpp; 341 342 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 343 return ((struct vnode *)0); 344 345 vpp = &speclisth[SPECHASH(nvp_rdev)]; 346 loop: 347 for (vp = *vpp; vp; vp = vp->v_specnext) { 348 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 349 continue; 350 /* 351 * Alias, but not in use, so flush it out. 352 */ 353 if (vp->v_usecount == 0) { 354 vgone(vp); 355 goto loop; 356 } 357 if (vget(vp)) 358 goto loop; 359 break; 360 } 361 if (vp == NULL || vp->v_tag != VT_NON) { 362 if (vp != NULL) { 363 nvp->v_flag |= VALIASED; 364 vp->v_flag |= VALIASED; 365 vput(vp); 366 } 367 MALLOC(nvp->v_specinfo, struct specinfo *, 368 sizeof(struct specinfo), M_VNODE, M_WAITOK); 369 nvp->v_rdev = nvp_rdev; 370 nvp->v_hashchain = vpp; 371 nvp->v_specnext = *vpp; 372 *vpp = nvp; 373 return ((struct vnode *)0); 374 } 375 VOP_UNLOCK(vp); 376 vclean(vp, 0); 377 vp->v_op = nvp->v_op; 378 vp->v_tag = nvp->v_tag; 379 nvp->v_type = VNON; 380 insmntque(vp, mp); 381 return (vp); 382 } 383 384 /* 385 * Grab a particular vnode from the free list, increment its 386 * reference count and lock it. The vnode lock bit is set the 387 * vnode is being eliminated in vgone. The process is awakened 388 * when the transition is completed, and an error returned to 389 * indicate that the vnode is no longer usable (possibly having 390 * been changed to a new file system type). 391 */ 392 vget(vp) 393 register struct vnode *vp; 394 { 395 register struct vnode *vq; 396 397 if (vp->v_flag & VXLOCK) { 398 vp->v_flag |= VXWANT; 399 sleep((caddr_t)vp, PINOD); 400 return (1); 401 } 402 if (vp->v_usecount == 0) { 403 if (vq = vp->v_freef) 404 vq->v_freeb = vp->v_freeb; 405 else 406 vfreet = vp->v_freeb; 407 *vp->v_freeb = vq; 408 vp->v_freef = NULL; 409 vp->v_freeb = NULL; 410 } 411 VREF(vp); 412 VOP_LOCK(vp); 413 return (0); 414 } 415 416 /* 417 * Vnode reference, just increment the count 418 */ 419 void vref(vp) 420 struct vnode *vp; 421 { 422 423 vp->v_usecount++; 424 } 425 426 /* 427 * vput(), just unlock and vrele() 428 */ 429 void vput(vp) 430 register struct vnode *vp; 431 { 432 VOP_UNLOCK(vp); 433 vrele(vp); 434 } 435 436 /* 437 * Vnode release. 438 * If count drops to zero, call inactive routine and return to freelist. 439 */ 440 void vrele(vp) 441 register struct vnode *vp; 442 { 443 444 if (vp == NULL) 445 panic("vrele: null vp"); 446 vp->v_usecount--; 447 if (vp->v_usecount < 0) 448 vprint("vrele: bad ref count", vp); 449 if (vp->v_usecount > 0) 450 return; 451 if (vfreeh == (struct vnode *)0) { 452 /* 453 * insert into empty list 454 */ 455 vfreeh = vp; 456 vp->v_freeb = &vfreeh; 457 } else { 458 /* 459 * insert at tail of list 460 */ 461 *vfreet = vp; 462 vp->v_freeb = vfreet; 463 } 464 vp->v_freef = NULL; 465 vfreet = &vp->v_freef; 466 VOP_INACTIVE(vp); 467 } 468 469 /* 470 * Page or buffer structure gets a reference. 471 */ 472 vhold(vp) 473 register struct vnode *vp; 474 { 475 476 vp->v_holdcnt++; 477 } 478 479 /* 480 * Page or buffer structure frees a reference. 481 */ 482 holdrele(vp) 483 register struct vnode *vp; 484 { 485 486 if (vp->v_holdcnt <= 0) 487 panic("holdrele: holdcnt"); 488 vp->v_holdcnt--; 489 } 490 491 /* 492 * Remove any vnodes in the vnode table belonging to mount point mp. 493 * 494 * If MNT_NOFORCE is specified, there should not be any active ones, 495 * return error if any are found (nb: this is a user error, not a 496 * system error). If MNT_FORCE is specified, detach any active vnodes 497 * that are found. 498 */ 499 int busyprt = 0; /* patch to print out busy vnodes */ 500 501 vflush(mp, skipvp, flags) 502 struct mount *mp; 503 struct vnode *skipvp; 504 int flags; 505 { 506 register struct vnode *vp, *nvp; 507 int busy = 0; 508 509 for (vp = mp->m_mounth; vp; vp = nvp) { 510 nvp = vp->v_mountf; 511 /* 512 * Skip over a selected vnode. 513 * Used by ufs to skip over the quota structure inode. 514 */ 515 if (vp == skipvp) 516 continue; 517 /* 518 * With v_usecount == 0, all we need to do is clear 519 * out the vnode data structures and we are done. 520 */ 521 if (vp->v_usecount == 0) { 522 vgone(vp); 523 continue; 524 } 525 /* 526 * For block or character devices, revert to an 527 * anonymous device. For all other files, just kill them. 528 */ 529 if (flags & MNT_FORCE) { 530 if (vp->v_type != VBLK && vp->v_type != VCHR) { 531 vgone(vp); 532 } else { 533 vclean(vp, 0); 534 vp->v_op = &spec_vnodeops; 535 insmntque(vp, (struct mount *)0); 536 } 537 continue; 538 } 539 if (busyprt) 540 vprint("vflush: busy vnode", vp); 541 busy++; 542 } 543 if (busy) 544 return (EBUSY); 545 return (0); 546 } 547 548 /* 549 * Disassociate the underlying file system from a vnode. 550 */ 551 void vclean(vp, doclose) 552 register struct vnode *vp; 553 long doclose; 554 { 555 struct vnodeops *origops; 556 int active; 557 558 /* 559 * Check to see if the vnode is in use. 560 * If so we have to reference it before we clean it out 561 * so that its count cannot fall to zero and generate a 562 * race against ourselves to recycle it. 563 */ 564 if (active = vp->v_usecount) 565 VREF(vp); 566 /* 567 * Prevent the vnode from being recycled or 568 * brought into use while we clean it out. 569 */ 570 if (vp->v_flag & VXLOCK) 571 panic("vclean: deadlock"); 572 vp->v_flag |= VXLOCK; 573 /* 574 * Even if the count is zero, the VOP_INACTIVE routine may still 575 * have the object locked while it cleans it out. The VOP_LOCK 576 * ensures that the VOP_INACTIVE routine is done with its work. 577 * For active vnodes, it ensures that no other activity can 578 * occur while the buffer list is being cleaned out. 579 */ 580 VOP_LOCK(vp); 581 if (doclose) 582 vinvalbuf(vp, 1); 583 /* 584 * Prevent any further operations on the vnode from 585 * being passed through to the old file system. 586 */ 587 origops = vp->v_op; 588 vp->v_op = &dead_vnodeops; 589 vp->v_tag = VT_NON; 590 /* 591 * If purging an active vnode, it must be unlocked, closed, 592 * and deactivated before being reclaimed. 593 */ 594 (*(origops->vn_unlock))(vp); 595 if (active) { 596 if (doclose) 597 (*(origops->vn_close))(vp, 0, NOCRED); 598 (*(origops->vn_inactive))(vp); 599 } 600 /* 601 * Reclaim the vnode. 602 */ 603 if ((*(origops->vn_reclaim))(vp)) 604 panic("vclean: cannot reclaim"); 605 if (active) 606 vrele(vp); 607 /* 608 * Done with purge, notify sleepers in vget of the grim news. 609 */ 610 vp->v_flag &= ~VXLOCK; 611 if (vp->v_flag & VXWANT) { 612 vp->v_flag &= ~VXWANT; 613 wakeup((caddr_t)vp); 614 } 615 } 616 617 /* 618 * Eliminate all activity associated with the requested vnode 619 * and with all vnodes aliased to the requested vnode. 620 */ 621 void vgoneall(vp) 622 register struct vnode *vp; 623 { 624 register struct vnode *vq; 625 626 while (vp->v_flag & VALIASED) { 627 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 628 if (vq->v_rdev != vp->v_rdev || vp == vq) 629 continue; 630 vgone(vq); 631 break; 632 } 633 } 634 vgone(vp); 635 } 636 637 /* 638 * Eliminate all activity associated with a vnode 639 * in preparation for reuse. 640 */ 641 void vgone(vp) 642 register struct vnode *vp; 643 { 644 register struct vnode *vq; 645 struct vnode *vx; 646 long count; 647 648 /* 649 * Clean out the filesystem specific data. 650 */ 651 vclean(vp, 1); 652 /* 653 * Delete from old mount point vnode list, if on one. 654 */ 655 if (vp->v_mountb) { 656 if (vq = vp->v_mountf) 657 vq->v_mountb = vp->v_mountb; 658 *vp->v_mountb = vq; 659 vp->v_mountf = NULL; 660 vp->v_mountb = NULL; 661 } 662 /* 663 * If special device, remove it from special device alias list. 664 */ 665 if (vp->v_type == VBLK || vp->v_type == VCHR) { 666 if (*vp->v_hashchain == vp) { 667 *vp->v_hashchain = vp->v_specnext; 668 } else { 669 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 670 if (vq->v_specnext != vp) 671 continue; 672 vq->v_specnext = vp->v_specnext; 673 break; 674 } 675 if (vq == NULL) 676 panic("missing bdev"); 677 } 678 if (vp->v_flag & VALIASED) { 679 count = 0; 680 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 681 if (vq->v_rdev != vp->v_rdev) 682 continue; 683 count++; 684 vx = vq; 685 } 686 if (count == 0) 687 panic("missing alias"); 688 if (count == 1) 689 vx->v_flag &= ~VALIASED; 690 vp->v_flag &= ~VALIASED; 691 } 692 FREE(vp->v_specinfo, M_VNODE); 693 vp->v_specinfo = NULL; 694 } 695 /* 696 * If it is on the freelist, move it to the head of the list. 697 */ 698 if (vp->v_freeb) { 699 if (vq = vp->v_freef) 700 vq->v_freeb = vp->v_freeb; 701 else 702 vfreet = vp->v_freeb; 703 *vp->v_freeb = vq; 704 vp->v_freef = vfreeh; 705 vp->v_freeb = &vfreeh; 706 vfreeh->v_freeb = &vp->v_freef; 707 vfreeh = vp; 708 } 709 vp->v_type = VBAD; 710 } 711 712 /* 713 * Lookup a vnode by device number. 714 */ 715 vfinddev(dev, type, vpp) 716 dev_t dev; 717 enum vtype type; 718 struct vnode **vpp; 719 { 720 register struct vnode *vp; 721 722 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 723 if (dev != vp->v_rdev || type != vp->v_type) 724 continue; 725 *vpp = vp; 726 return (0); 727 } 728 return (1); 729 } 730 731 /* 732 * Calculate the total number of references to a special device. 733 */ 734 vcount(vp) 735 register struct vnode *vp; 736 { 737 register struct vnode *vq; 738 int count; 739 740 if ((vp->v_flag & VALIASED) == 0) 741 return (vp->v_usecount); 742 loop: 743 for (count = 0, vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 744 if (vq->v_rdev != vp->v_rdev) 745 continue; 746 /* 747 * Alias, but not in use, so flush it out. 748 */ 749 if (vq->v_usecount == 0) { 750 vgone(vq); 751 goto loop; 752 } 753 count += vq->v_usecount; 754 } 755 return (count); 756 } 757 758 /* 759 * Print out a description of a vnode. 760 */ 761 static char *typename[] = 762 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VBAD" }; 763 764 vprint(label, vp) 765 char *label; 766 register struct vnode *vp; 767 { 768 769 if (label != NULL) 770 printf("%s: ", label); 771 printf("type %s, usecount %d, refcount %d,\n\t", typename[vp->v_type], 772 vp->v_usecount, vp->v_holdcnt); 773 VOP_PRINT(vp); 774 } 775