1 /* $NetBSD: vfs_subr.c,v 1.175 2002/05/23 23:05:25 perseant Exp $ */ 2 3 /*- 4 * Copyright (c) 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /* 41 * Copyright (c) 1989, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94 78 */ 79 80 /* 81 * External virtual filesystem routines 82 */ 83 84 #include <sys/cdefs.h> 85 __KERNEL_RCSID(0, "$NetBSD: vfs_subr.c,v 1.175 2002/05/23 23:05:25 perseant Exp $"); 86 87 #include "opt_ddb.h" 88 #include "opt_compat_netbsd.h" 89 #include "opt_compat_43.h" 90 91 #include <sys/param.h> 92 #include <sys/systm.h> 93 #include <sys/proc.h> 94 #include <sys/kernel.h> 95 #include <sys/mount.h> 96 #include <sys/time.h> 97 #include <sys/fcntl.h> 98 #include <sys/vnode.h> 99 #include <sys/stat.h> 100 #include <sys/namei.h> 101 #include <sys/ucred.h> 102 #include <sys/buf.h> 103 #include <sys/errno.h> 104 #include <sys/malloc.h> 105 #include <sys/domain.h> 106 #include <sys/mbuf.h> 107 #include <sys/syscallargs.h> 108 #include <sys/device.h> 109 #include <sys/dirent.h> 110 111 #include <miscfs/specfs/specdev.h> 112 #include <miscfs/genfs/genfs.h> 113 #include <miscfs/syncfs/syncfs.h> 114 115 #include <uvm/uvm.h> 116 #include <uvm/uvm_ddb.h> 117 118 #include <sys/sysctl.h> 119 120 enum vtype iftovt_tab[16] = { 121 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 122 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 123 }; 124 const int vttoif_tab[9] = { 125 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 126 S_IFSOCK, S_IFIFO, S_IFMT, 127 }; 128 129 int doforce = 1; /* 1 => permit forcible unmounting */ 130 int prtactive = 0; /* 1 => print out reclaim of active vnodes */ 131 132 extern int dovfsusermount; /* 1 => permit any user to mount filesystems */ 133 134 /* 135 * Insq/Remq for the vnode usage lists. 136 */ 137 #define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs) 138 #define bufremvn(bp) { \ 139 LIST_REMOVE(bp, b_vnbufs); \ 140 (bp)->b_vnbufs.le_next = NOLIST; \ 141 } 142 /* TAILQ_HEAD(freelst, vnode) vnode_free_list = vnode free list (in vnode.h) */ 143 struct freelst vnode_free_list = TAILQ_HEAD_INITIALIZER(vnode_free_list); 144 struct freelst vnode_hold_list = TAILQ_HEAD_INITIALIZER(vnode_hold_list); 145 146 struct mntlist mountlist = /* mounted filesystem list */ 147 CIRCLEQ_HEAD_INITIALIZER(mountlist); 148 struct vfs_list_head vfs_list = /* vfs list */ 149 LIST_HEAD_INITIALIZER(vfs_list); 150 151 struct nfs_public nfs_pub; /* publicly exported FS */ 152 153 struct simplelock mountlist_slock = SIMPLELOCK_INITIALIZER; 154 static struct simplelock mntid_slock = SIMPLELOCK_INITIALIZER; 155 struct simplelock mntvnode_slock = SIMPLELOCK_INITIALIZER; 156 struct simplelock vnode_free_list_slock = SIMPLELOCK_INITIALIZER; 157 struct simplelock spechash_slock = SIMPLELOCK_INITIALIZER; 158 159 /* 160 * These define the root filesystem and device. 161 */ 162 struct mount *rootfs; 163 struct vnode *rootvnode; 164 struct device *root_device; /* root device */ 165 166 struct pool vnode_pool; /* memory pool for vnodes */ 167 168 /* 169 * Local declarations. 170 */ 171 void insmntque __P((struct vnode *, struct mount *)); 172 int getdevvp __P((dev_t, struct vnode **, enum vtype)); 173 void vgoneall __P((struct vnode *)); 174 175 static int vfs_hang_addrlist __P((struct mount *, struct netexport *, 176 struct export_args *)); 177 static int vfs_free_netcred __P((struct radix_node *, void *)); 178 static void vfs_free_addrlist __P((struct netexport *)); 179 180 #ifdef DEBUG 181 void printlockedvnodes __P((void)); 182 #endif 183 184 /* 185 * Initialize the vnode management data structures. 186 */ 187 void 188 vntblinit() 189 { 190 191 pool_init(&vnode_pool, sizeof(struct vnode), 0, 0, 0, "vnodepl", 192 &pool_allocator_nointr); 193 194 /* 195 * Initialize the filesystem syncer. 196 */ 197 vn_initialize_syncerd(); 198 } 199 200 /* 201 * Mark a mount point as busy. Used to synchronize access and to delay 202 * unmounting. Interlock is not released on failure. 203 */ 204 int 205 vfs_busy(mp, flags, interlkp) 206 struct mount *mp; 207 int flags; 208 struct simplelock *interlkp; 209 { 210 int lkflags; 211 212 while (mp->mnt_flag & MNT_UNMOUNT) { 213 int gone; 214 215 if (flags & LK_NOWAIT) 216 return (ENOENT); 217 if ((flags & LK_RECURSEFAIL) && mp->mnt_unmounter != NULL 218 && mp->mnt_unmounter == curproc) 219 return (EDEADLK); 220 if (interlkp) 221 simple_unlock(interlkp); 222 /* 223 * Since all busy locks are shared except the exclusive 224 * lock granted when unmounting, the only place that a 225 * wakeup needs to be done is at the release of the 226 * exclusive lock at the end of dounmount. 227 * 228 * XXX MP: add spinlock protecting mnt_wcnt here once you 229 * can atomically unlock-and-sleep. 230 */ 231 mp->mnt_wcnt++; 232 tsleep((caddr_t)mp, PVFS, "vfs_busy", 0); 233 mp->mnt_wcnt--; 234 gone = mp->mnt_flag & MNT_GONE; 235 236 if (mp->mnt_wcnt == 0) 237 wakeup(&mp->mnt_wcnt); 238 if (interlkp) 239 simple_lock(interlkp); 240 if (gone) 241 return (ENOENT); 242 } 243 lkflags = LK_SHARED; 244 if (interlkp) 245 lkflags |= LK_INTERLOCK; 246 if (lockmgr(&mp->mnt_lock, lkflags, interlkp)) 247 panic("vfs_busy: unexpected lock failure"); 248 return (0); 249 } 250 251 /* 252 * Free a busy filesystem. 253 */ 254 void 255 vfs_unbusy(mp) 256 struct mount *mp; 257 { 258 259 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL); 260 } 261 262 /* 263 * Lookup a filesystem type, and if found allocate and initialize 264 * a mount structure for it. 265 * 266 * Devname is usually updated by mount(8) after booting. 267 */ 268 int 269 vfs_rootmountalloc(fstypename, devname, mpp) 270 char *fstypename; 271 char *devname; 272 struct mount **mpp; 273 { 274 struct vfsops *vfsp = NULL; 275 struct mount *mp; 276 277 LIST_FOREACH(vfsp, &vfs_list, vfs_list) 278 if (!strncmp(vfsp->vfs_name, fstypename, MFSNAMELEN)) 279 break; 280 281 if (vfsp == NULL) 282 return (ENODEV); 283 mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); 284 memset((char *)mp, 0, (u_long)sizeof(struct mount)); 285 lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, 0); 286 (void)vfs_busy(mp, LK_NOWAIT, 0); 287 LIST_INIT(&mp->mnt_vnodelist); 288 mp->mnt_op = vfsp; 289 mp->mnt_flag = MNT_RDONLY; 290 mp->mnt_vnodecovered = NULLVP; 291 vfsp->vfs_refcount++; 292 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfs_name, MFSNAMELEN); 293 mp->mnt_stat.f_mntonname[0] = '/'; 294 (void) copystr(devname, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, 0); 295 *mpp = mp; 296 return (0); 297 } 298 299 /* 300 * Lookup a mount point by filesystem identifier. 301 */ 302 struct mount * 303 vfs_getvfs(fsid) 304 fsid_t *fsid; 305 { 306 struct mount *mp; 307 308 simple_lock(&mountlist_slock); 309 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; 310 mp = mp->mnt_list.cqe_next) { 311 if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && 312 mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { 313 simple_unlock(&mountlist_slock); 314 return (mp); 315 } 316 } 317 simple_unlock(&mountlist_slock); 318 return ((struct mount *)0); 319 } 320 321 /* 322 * Get a new unique fsid 323 */ 324 void 325 vfs_getnewfsid(mp) 326 struct mount *mp; 327 { 328 static u_short xxxfs_mntid; 329 fsid_t tfsid; 330 int mtype; 331 332 simple_lock(&mntid_slock); 333 mtype = makefstype(mp->mnt_op->vfs_name); 334 mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0); 335 mp->mnt_stat.f_fsid.val[1] = mtype; 336 if (xxxfs_mntid == 0) 337 ++xxxfs_mntid; 338 tfsid.val[0] = makedev((nblkdev + mtype) & 0xff, xxxfs_mntid); 339 tfsid.val[1] = mtype; 340 if (mountlist.cqh_first != (void *)&mountlist) { 341 while (vfs_getvfs(&tfsid)) { 342 tfsid.val[0]++; 343 xxxfs_mntid++; 344 } 345 } 346 mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; 347 simple_unlock(&mntid_slock); 348 } 349 350 /* 351 * Make a 'unique' number from a mount type name. 352 */ 353 long 354 makefstype(type) 355 const char *type; 356 { 357 long rv; 358 359 for (rv = 0; *type; type++) { 360 rv <<= 2; 361 rv ^= *type; 362 } 363 return rv; 364 } 365 366 367 /* 368 * Set vnode attributes to VNOVAL 369 */ 370 void 371 vattr_null(vap) 372 struct vattr *vap; 373 { 374 375 vap->va_type = VNON; 376 377 /* 378 * Assign individually so that it is safe even if size and 379 * sign of each member are varied. 380 */ 381 vap->va_mode = VNOVAL; 382 vap->va_nlink = VNOVAL; 383 vap->va_uid = VNOVAL; 384 vap->va_gid = VNOVAL; 385 vap->va_fsid = VNOVAL; 386 vap->va_fileid = VNOVAL; 387 vap->va_size = VNOVAL; 388 vap->va_blocksize = VNOVAL; 389 vap->va_atime.tv_sec = 390 vap->va_mtime.tv_sec = 391 vap->va_ctime.tv_sec = VNOVAL; 392 vap->va_atime.tv_nsec = 393 vap->va_mtime.tv_nsec = 394 vap->va_ctime.tv_nsec = VNOVAL; 395 vap->va_gen = VNOVAL; 396 vap->va_flags = VNOVAL; 397 vap->va_rdev = VNOVAL; 398 vap->va_bytes = VNOVAL; 399 vap->va_vaflags = 0; 400 } 401 402 /* 403 * Routines having to do with the management of the vnode table. 404 */ 405 extern int (**dead_vnodeop_p) __P((void *)); 406 long numvnodes; 407 408 /* 409 * Return the next vnode from the free list. 410 */ 411 int 412 getnewvnode(tag, mp, vops, vpp) 413 enum vtagtype tag; 414 struct mount *mp; 415 int (**vops) __P((void *)); 416 struct vnode **vpp; 417 { 418 extern struct uvm_pagerops uvm_vnodeops; 419 struct uvm_object *uobj; 420 struct proc *p = curproc; /* XXX */ 421 struct freelst *listhd; 422 static int toggle; 423 struct vnode *vp; 424 int error = 0, tryalloc; 425 426 try_again: 427 if (mp) { 428 /* 429 * Mark filesystem busy while we're creating a vnode. 430 * If unmount is in progress, this will wait; if the 431 * unmount succeeds (only if umount -f), this will 432 * return an error. If the unmount fails, we'll keep 433 * going afterwards. 434 * (This puts the per-mount vnode list logically under 435 * the protection of the vfs_busy lock). 436 */ 437 error = vfs_busy(mp, LK_RECURSEFAIL, 0); 438 if (error && error != EDEADLK) 439 return error; 440 } 441 442 /* 443 * We must choose whether to allocate a new vnode or recycle an 444 * existing one. The criterion for allocating a new one is that 445 * the total number of vnodes is less than the number desired or 446 * there are no vnodes on either free list. Generally we only 447 * want to recycle vnodes that have no buffers associated with 448 * them, so we look first on the vnode_free_list. If it is empty, 449 * we next consider vnodes with referencing buffers on the 450 * vnode_hold_list. The toggle ensures that half the time we 451 * will use a buffer from the vnode_hold_list, and half the time 452 * we will allocate a new one unless the list has grown to twice 453 * the desired size. We are reticent to recycle vnodes from the 454 * vnode_hold_list because we will lose the identity of all its 455 * referencing buffers. 456 */ 457 458 vp = NULL; 459 460 simple_lock(&vnode_free_list_slock); 461 462 toggle ^= 1; 463 if (numvnodes > 2 * desiredvnodes) 464 toggle = 0; 465 466 tryalloc = numvnodes < desiredvnodes || 467 (TAILQ_FIRST(&vnode_free_list) == NULL && 468 (TAILQ_FIRST(&vnode_hold_list) == NULL || toggle)); 469 470 if (tryalloc && 471 (vp = pool_get(&vnode_pool, PR_NOWAIT)) != NULL) { 472 simple_unlock(&vnode_free_list_slock); 473 memset(vp, 0, sizeof(*vp)); 474 simple_lock_init(&vp->v_interlock); 475 uobj = &vp->v_uobj; 476 uobj->pgops = &uvm_vnodeops; 477 uobj->uo_npages = 0; 478 TAILQ_INIT(&uobj->memq); 479 numvnodes++; 480 } else { 481 if ((vp = TAILQ_FIRST(listhd = &vnode_free_list)) == NULL) 482 vp = TAILQ_FIRST(listhd = &vnode_hold_list); 483 for (; vp != NULL; vp = TAILQ_NEXT(vp, v_freelist)) { 484 if (simple_lock_try(&vp->v_interlock)) { 485 if ((vp->v_flag & VLAYER) == 0) { 486 break; 487 } 488 if (VOP_ISLOCKED(vp) == 0) 489 break; 490 else 491 simple_unlock(&vp->v_interlock); 492 } 493 } 494 /* 495 * Unless this is a bad time of the month, at most 496 * the first NCPUS items on the free list are 497 * locked, so this is close enough to being empty. 498 */ 499 if (vp == NULLVP) { 500 simple_unlock(&vnode_free_list_slock); 501 if (mp && error != EDEADLK) 502 vfs_unbusy(mp); 503 if (tryalloc) { 504 printf("WARNING: unable to allocate new " 505 "vnode, retrying...\n"); 506 (void) tsleep(&lbolt, PRIBIO, "newvn", hz); 507 goto try_again; 508 } 509 tablefull("vnode", "increase kern.maxvnodes or NVNODE"); 510 *vpp = 0; 511 return (ENFILE); 512 } 513 if (vp->v_usecount) 514 panic("free vnode isn't, vp %p", vp); 515 TAILQ_REMOVE(listhd, vp, v_freelist); 516 /* see comment on why 0xdeadb is set at end of vgone (below) */ 517 vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb; 518 simple_unlock(&vnode_free_list_slock); 519 vp->v_lease = NULL; 520 521 if (vp->v_type != VBAD) 522 vgonel(vp, p); 523 else 524 simple_unlock(&vp->v_interlock); 525 #ifdef DIAGNOSTIC 526 if (vp->v_data || vp->v_uobj.uo_npages || 527 TAILQ_FIRST(&vp->v_uobj.memq)) 528 panic("cleaned vnode isn't, vp %p", vp); 529 if (vp->v_numoutput) 530 panic("clean vnode has pending I/O's, vp %p", vp); 531 #endif 532 KASSERT((vp->v_flag & VONWORKLST) == 0); 533 vp->v_flag = 0; 534 vp->v_socket = NULL; 535 } 536 vp->v_type = VNON; 537 vp->v_vnlock = &vp->v_lock; 538 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 539 cache_purge(vp); 540 vp->v_tag = tag; 541 vp->v_op = vops; 542 insmntque(vp, mp); 543 *vpp = vp; 544 vp->v_usecount = 1; 545 vp->v_data = 0; 546 simple_lock_init(&vp->v_uobj.vmobjlock); 547 548 /* 549 * initialize uvm_object within vnode. 550 */ 551 552 uobj = &vp->v_uobj; 553 KASSERT(uobj->pgops == &uvm_vnodeops); 554 KASSERT(uobj->uo_npages == 0); 555 KASSERT(TAILQ_FIRST(&uobj->memq) == NULL); 556 vp->v_size = VSIZENOTSET; 557 558 if (mp && error != EDEADLK) 559 vfs_unbusy(mp); 560 return (0); 561 } 562 563 /* 564 * This is really just the reverse of getnewvnode(). Needed for 565 * VFS_VGET functions who may need to push back a vnode in case 566 * of a locking race. 567 */ 568 void 569 ungetnewvnode(vp) 570 struct vnode *vp; 571 { 572 #ifdef DIAGNOSTIC 573 if (vp->v_usecount != 1) 574 panic("ungetnewvnode: busy vnode"); 575 #endif 576 vp->v_usecount--; 577 insmntque(vp, NULL); 578 vp->v_type = VBAD; 579 580 simple_lock(&vp->v_interlock); 581 /* 582 * Insert at head of LRU list 583 */ 584 simple_lock(&vnode_free_list_slock); 585 if (vp->v_holdcnt > 0) 586 TAILQ_INSERT_HEAD(&vnode_hold_list, vp, v_freelist); 587 else 588 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 589 simple_unlock(&vnode_free_list_slock); 590 simple_unlock(&vp->v_interlock); 591 } 592 593 /* 594 * Move a vnode from one mount queue to another. 595 */ 596 void 597 insmntque(vp, mp) 598 struct vnode *vp; 599 struct mount *mp; 600 { 601 602 #ifdef DIAGNOSTIC 603 if ((mp != NULL) && 604 (mp->mnt_flag & MNT_UNMOUNT) && 605 !(mp->mnt_flag & MNT_SOFTDEP) && 606 vp->v_tag != VT_VFS) { 607 panic("insmntque into dying filesystem"); 608 } 609 #endif 610 611 simple_lock(&mntvnode_slock); 612 /* 613 * Delete from old mount point vnode list, if on one. 614 */ 615 if (vp->v_mount != NULL) 616 LIST_REMOVE(vp, v_mntvnodes); 617 /* 618 * Insert into list of vnodes for the new mount point, if available. 619 */ 620 if ((vp->v_mount = mp) != NULL) 621 LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes); 622 simple_unlock(&mntvnode_slock); 623 } 624 625 /* 626 * Update outstanding I/O count and do wakeup if requested. 627 */ 628 void 629 vwakeup(bp) 630 struct buf *bp; 631 { 632 struct vnode *vp; 633 634 if ((vp = bp->b_vp) != NULL) { 635 if (--vp->v_numoutput < 0) 636 panic("vwakeup: neg numoutput, vp %p", vp); 637 if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) { 638 vp->v_flag &= ~VBWAIT; 639 wakeup((caddr_t)&vp->v_numoutput); 640 } 641 } 642 } 643 644 /* 645 * Flush out and invalidate all buffers associated with a vnode. 646 * Called with the underlying vnode locked, which should prevent new dirty 647 * buffers from being queued. 648 */ 649 int 650 vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) 651 struct vnode *vp; 652 int flags; 653 struct ucred *cred; 654 struct proc *p; 655 int slpflag, slptimeo; 656 { 657 struct buf *bp, *nbp; 658 int s, error; 659 int flushflags = PGO_ALLPAGES | PGO_FREE | PGO_SYNCIO | 660 (flags & V_SAVE ? PGO_CLEANIT : 0); 661 662 /* XXXUBC this doesn't look at flags or slp* */ 663 simple_lock(&vp->v_interlock); 664 error = VOP_PUTPAGES(vp, 0, 0, flushflags); 665 if (error) { 666 return error; 667 } 668 669 if (flags & V_SAVE) { 670 error = VOP_FSYNC(vp, cred, FSYNC_WAIT|FSYNC_RECLAIM, 0, 0, p); 671 if (error) 672 return (error); 673 #ifdef DIAGNOSTIC 674 s = splbio(); 675 if (vp->v_numoutput > 0 || !LIST_EMPTY(&vp->v_dirtyblkhd)) 676 panic("vinvalbuf: dirty bufs, vp %p", vp); 677 splx(s); 678 #endif 679 } 680 681 s = splbio(); 682 683 restart: 684 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 685 nbp = LIST_NEXT(bp, b_vnbufs); 686 if (bp->b_flags & B_BUSY) { 687 bp->b_flags |= B_WANTED; 688 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 689 "vinvalbuf", slptimeo); 690 if (error) { 691 splx(s); 692 return (error); 693 } 694 goto restart; 695 } 696 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 697 brelse(bp); 698 } 699 700 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 701 nbp = LIST_NEXT(bp, b_vnbufs); 702 if (bp->b_flags & B_BUSY) { 703 bp->b_flags |= B_WANTED; 704 error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1), 705 "vinvalbuf", slptimeo); 706 if (error) { 707 splx(s); 708 return (error); 709 } 710 goto restart; 711 } 712 /* 713 * XXX Since there are no node locks for NFS, I believe 714 * there is a slight chance that a delayed write will 715 * occur while sleeping just above, so check for it. 716 */ 717 if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { 718 #ifdef DEBUG 719 printf("buffer still DELWRI\n"); 720 #endif 721 bp->b_flags |= B_BUSY | B_VFLUSH; 722 VOP_BWRITE(bp); 723 goto restart; 724 } 725 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 726 brelse(bp); 727 } 728 729 #ifdef DIAGNOSTIC 730 if (!LIST_EMPTY(&vp->v_cleanblkhd) || !LIST_EMPTY(&vp->v_dirtyblkhd)) 731 panic("vinvalbuf: flush failed, vp %p", vp); 732 #endif 733 734 splx(s); 735 736 return (0); 737 } 738 739 /* 740 * Destroy any in core blocks past the truncation length. 741 * Called with the underlying vnode locked, which should prevent new dirty 742 * buffers from being queued. 743 */ 744 int 745 vtruncbuf(vp, lbn, slpflag, slptimeo) 746 struct vnode *vp; 747 daddr_t lbn; 748 int slpflag, slptimeo; 749 { 750 struct buf *bp, *nbp; 751 int s, error; 752 voff_t off; 753 754 off = round_page((voff_t)lbn << vp->v_mount->mnt_fs_bshift); 755 simple_lock(&vp->v_interlock); 756 error = VOP_PUTPAGES(vp, off, 0, PGO_FREE | PGO_SYNCIO); 757 if (error) { 758 return error; 759 } 760 761 s = splbio(); 762 763 restart: 764 for (bp = LIST_FIRST(&vp->v_cleanblkhd); bp; bp = nbp) { 765 nbp = LIST_NEXT(bp, b_vnbufs); 766 if (bp->b_lblkno < lbn) 767 continue; 768 if (bp->b_flags & B_BUSY) { 769 bp->b_flags |= B_WANTED; 770 error = tsleep(bp, slpflag | (PRIBIO + 1), 771 "vtruncbuf", slptimeo); 772 if (error) { 773 splx(s); 774 return (error); 775 } 776 goto restart; 777 } 778 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 779 brelse(bp); 780 } 781 782 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 783 nbp = LIST_NEXT(bp, b_vnbufs); 784 if (bp->b_lblkno < lbn) 785 continue; 786 if (bp->b_flags & B_BUSY) { 787 bp->b_flags |= B_WANTED; 788 error = tsleep(bp, slpflag | (PRIBIO + 1), 789 "vtruncbuf", slptimeo); 790 if (error) { 791 splx(s); 792 return (error); 793 } 794 goto restart; 795 } 796 bp->b_flags |= B_BUSY | B_INVAL | B_VFLUSH; 797 brelse(bp); 798 } 799 800 splx(s); 801 802 return (0); 803 } 804 805 void 806 vflushbuf(vp, sync) 807 struct vnode *vp; 808 int sync; 809 { 810 struct buf *bp, *nbp; 811 int flags = PGO_CLEANIT | PGO_ALLPAGES | (sync ? PGO_SYNCIO : 0); 812 int s; 813 814 simple_lock(&vp->v_interlock); 815 (void) VOP_PUTPAGES(vp, 0, 0, flags); 816 817 loop: 818 s = splbio(); 819 for (bp = LIST_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { 820 nbp = LIST_NEXT(bp, b_vnbufs); 821 if ((bp->b_flags & B_BUSY)) 822 continue; 823 if ((bp->b_flags & B_DELWRI) == 0) 824 panic("vflushbuf: not dirty, bp %p", bp); 825 bp->b_flags |= B_BUSY | B_VFLUSH; 826 splx(s); 827 /* 828 * Wait for I/O associated with indirect blocks to complete, 829 * since there is no way to quickly wait for them below. 830 */ 831 if (bp->b_vp == vp || sync == 0) 832 (void) bawrite(bp); 833 else 834 (void) bwrite(bp); 835 goto loop; 836 } 837 if (sync == 0) { 838 splx(s); 839 return; 840 } 841 while (vp->v_numoutput) { 842 vp->v_flag |= VBWAIT; 843 tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "vflushbuf", 0); 844 } 845 splx(s); 846 if (!LIST_EMPTY(&vp->v_dirtyblkhd)) { 847 vprint("vflushbuf: dirty", vp); 848 goto loop; 849 } 850 } 851 852 /* 853 * Associate a buffer with a vnode. 854 */ 855 void 856 bgetvp(vp, bp) 857 struct vnode *vp; 858 struct buf *bp; 859 { 860 int s; 861 862 if (bp->b_vp) 863 panic("bgetvp: not free, bp %p", bp); 864 VHOLD(vp); 865 s = splbio(); 866 bp->b_vp = vp; 867 if (vp->v_type == VBLK || vp->v_type == VCHR) 868 bp->b_dev = vp->v_rdev; 869 else 870 bp->b_dev = NODEV; 871 /* 872 * Insert onto list for new vnode. 873 */ 874 bufinsvn(bp, &vp->v_cleanblkhd); 875 splx(s); 876 } 877 878 /* 879 * Disassociate a buffer from a vnode. 880 */ 881 void 882 brelvp(bp) 883 struct buf *bp; 884 { 885 struct vnode *vp; 886 int s; 887 888 if (bp->b_vp == NULL) 889 panic("brelvp: vp NULL, bp %p", bp); 890 891 s = splbio(); 892 vp = bp->b_vp; 893 /* 894 * Delete from old vnode list, if on one. 895 */ 896 if (bp->b_vnbufs.le_next != NOLIST) 897 bufremvn(bp); 898 899 if (TAILQ_EMPTY(&vp->v_uobj.memq) && (vp->v_flag & VONWORKLST) && 900 LIST_FIRST(&vp->v_dirtyblkhd) == NULL) { 901 vp->v_flag &= ~VONWORKLST; 902 LIST_REMOVE(vp, v_synclist); 903 } 904 905 bp->b_vp = NULL; 906 HOLDRELE(vp); 907 splx(s); 908 } 909 910 /* 911 * Reassign a buffer from one vnode to another. 912 * Used to assign file specific control information 913 * (indirect blocks) to the vnode to which they belong. 914 * 915 * This function must be called at splbio(). 916 */ 917 void 918 reassignbuf(bp, newvp) 919 struct buf *bp; 920 struct vnode *newvp; 921 { 922 struct buflists *listheadp; 923 int delay; 924 925 /* 926 * Delete from old vnode list, if on one. 927 */ 928 if (bp->b_vnbufs.le_next != NOLIST) 929 bufremvn(bp); 930 /* 931 * If dirty, put on list of dirty buffers; 932 * otherwise insert onto list of clean buffers. 933 */ 934 if ((bp->b_flags & B_DELWRI) == 0) { 935 listheadp = &newvp->v_cleanblkhd; 936 if (TAILQ_EMPTY(&newvp->v_uobj.memq) && 937 (newvp->v_flag & VONWORKLST) && 938 LIST_FIRST(&newvp->v_dirtyblkhd) == NULL) { 939 newvp->v_flag &= ~VONWORKLST; 940 LIST_REMOVE(newvp, v_synclist); 941 } 942 } else { 943 listheadp = &newvp->v_dirtyblkhd; 944 if ((newvp->v_flag & VONWORKLST) == 0) { 945 switch (newvp->v_type) { 946 case VDIR: 947 delay = dirdelay; 948 break; 949 case VBLK: 950 if (newvp->v_specmountpoint != NULL) { 951 delay = metadelay; 952 break; 953 } 954 /* fall through */ 955 default: 956 delay = filedelay; 957 break; 958 } 959 if (!newvp->v_mount || 960 (newvp->v_mount->mnt_flag & MNT_ASYNC) == 0) 961 vn_syncer_add_to_worklist(newvp, delay); 962 } 963 } 964 bufinsvn(bp, listheadp); 965 } 966 967 /* 968 * Create a vnode for a block device. 969 * Used for root filesystem and swap areas. 970 * Also used for memory file system special devices. 971 */ 972 int 973 bdevvp(dev, vpp) 974 dev_t dev; 975 struct vnode **vpp; 976 { 977 978 return (getdevvp(dev, vpp, VBLK)); 979 } 980 981 /* 982 * Create a vnode for a character device. 983 * Used for kernfs and some console handling. 984 */ 985 int 986 cdevvp(dev, vpp) 987 dev_t dev; 988 struct vnode **vpp; 989 { 990 991 return (getdevvp(dev, vpp, VCHR)); 992 } 993 994 /* 995 * Create a vnode for a device. 996 * Used by bdevvp (block device) for root file system etc., 997 * and by cdevvp (character device) for console and kernfs. 998 */ 999 int 1000 getdevvp(dev, vpp, type) 1001 dev_t dev; 1002 struct vnode **vpp; 1003 enum vtype type; 1004 { 1005 struct vnode *vp; 1006 struct vnode *nvp; 1007 int error; 1008 1009 if (dev == NODEV) { 1010 *vpp = NULLVP; 1011 return (0); 1012 } 1013 error = getnewvnode(VT_NON, NULL, spec_vnodeop_p, &nvp); 1014 if (error) { 1015 *vpp = NULLVP; 1016 return (error); 1017 } 1018 vp = nvp; 1019 vp->v_type = type; 1020 if ((nvp = checkalias(vp, dev, NULL)) != 0) { 1021 vput(vp); 1022 vp = nvp; 1023 } 1024 *vpp = vp; 1025 return (0); 1026 } 1027 1028 /* 1029 * Check to see if the new vnode represents a special device 1030 * for which we already have a vnode (either because of 1031 * bdevvp() or because of a different vnode representing 1032 * the same block device). If such an alias exists, deallocate 1033 * the existing contents and return the aliased vnode. The 1034 * caller is responsible for filling it with its new contents. 1035 */ 1036 struct vnode * 1037 checkalias(nvp, nvp_rdev, mp) 1038 struct vnode *nvp; 1039 dev_t nvp_rdev; 1040 struct mount *mp; 1041 { 1042 struct proc *p = curproc; /* XXX */ 1043 struct vnode *vp; 1044 struct vnode **vpp; 1045 1046 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1047 return (NULLVP); 1048 1049 vpp = &speclisth[SPECHASH(nvp_rdev)]; 1050 loop: 1051 simple_lock(&spechash_slock); 1052 for (vp = *vpp; vp; vp = vp->v_specnext) { 1053 if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type) 1054 continue; 1055 /* 1056 * Alias, but not in use, so flush it out. 1057 */ 1058 simple_lock(&vp->v_interlock); 1059 if (vp->v_usecount == 0) { 1060 simple_unlock(&spechash_slock); 1061 vgonel(vp, p); 1062 goto loop; 1063 } 1064 if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK)) { 1065 simple_unlock(&spechash_slock); 1066 goto loop; 1067 } 1068 break; 1069 } 1070 if (vp == NULL || vp->v_tag != VT_NON || vp->v_type != VBLK) { 1071 MALLOC(nvp->v_specinfo, struct specinfo *, 1072 sizeof(struct specinfo), M_VNODE, M_NOWAIT); 1073 /* XXX Erg. */ 1074 if (nvp->v_specinfo == NULL) { 1075 simple_unlock(&spechash_slock); 1076 uvm_wait("checkalias"); 1077 goto loop; 1078 } 1079 1080 nvp->v_rdev = nvp_rdev; 1081 nvp->v_hashchain = vpp; 1082 nvp->v_specnext = *vpp; 1083 nvp->v_specmountpoint = NULL; 1084 simple_unlock(&spechash_slock); 1085 nvp->v_speclockf = NULL; 1086 *vpp = nvp; 1087 if (vp != NULLVP) { 1088 nvp->v_flag |= VALIASED; 1089 vp->v_flag |= VALIASED; 1090 vput(vp); 1091 } 1092 return (NULLVP); 1093 } 1094 simple_unlock(&spechash_slock); 1095 VOP_UNLOCK(vp, 0); 1096 simple_lock(&vp->v_interlock); 1097 vclean(vp, 0, p); 1098 vp->v_op = nvp->v_op; 1099 vp->v_tag = nvp->v_tag; 1100 vp->v_vnlock = &vp->v_lock; 1101 lockinit(vp->v_vnlock, PVFS, "vnlock", 0, 0); 1102 nvp->v_type = VNON; 1103 insmntque(vp, mp); 1104 return (vp); 1105 } 1106 1107 /* 1108 * Grab a particular vnode from the free list, increment its 1109 * reference count and lock it. If the vnode lock bit is set the 1110 * vnode is being eliminated in vgone. In that case, we can not 1111 * grab the vnode, so the process is awakened when the transition is 1112 * completed, and an error returned to indicate that the vnode is no 1113 * longer usable (possibly having been changed to a new file system type). 1114 */ 1115 int 1116 vget(vp, flags) 1117 struct vnode *vp; 1118 int flags; 1119 { 1120 int error; 1121 1122 /* 1123 * If the vnode is in the process of being cleaned out for 1124 * another use, we wait for the cleaning to finish and then 1125 * return failure. Cleaning is determined by checking that 1126 * the VXLOCK flag is set. 1127 */ 1128 1129 if ((flags & LK_INTERLOCK) == 0) 1130 simple_lock(&vp->v_interlock); 1131 if (vp->v_flag & VXLOCK) { 1132 if (flags & LK_NOWAIT) { 1133 simple_unlock(&vp->v_interlock); 1134 return EBUSY; 1135 } 1136 vp->v_flag |= VXWANT; 1137 ltsleep(vp, PINOD|PNORELOCK, "vget", 0, &vp->v_interlock); 1138 return (ENOENT); 1139 } 1140 if (vp->v_usecount == 0) { 1141 simple_lock(&vnode_free_list_slock); 1142 if (vp->v_holdcnt > 0) 1143 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1144 else 1145 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1146 simple_unlock(&vnode_free_list_slock); 1147 } 1148 vp->v_usecount++; 1149 #ifdef DIAGNOSTIC 1150 if (vp->v_usecount == 0) { 1151 vprint("vget", vp); 1152 panic("vget: usecount overflow, vp %p", vp); 1153 } 1154 #endif 1155 if (flags & LK_TYPE_MASK) { 1156 if ((error = vn_lock(vp, flags | LK_INTERLOCK))) { 1157 /* 1158 * must expand vrele here because we do not want 1159 * to call VOP_INACTIVE if the reference count 1160 * drops back to zero since it was never really 1161 * active. We must remove it from the free list 1162 * before sleeping so that multiple processes do 1163 * not try to recycle it. 1164 */ 1165 simple_lock(&vp->v_interlock); 1166 vp->v_usecount--; 1167 if (vp->v_usecount > 0) { 1168 simple_unlock(&vp->v_interlock); 1169 return (error); 1170 } 1171 /* 1172 * insert at tail of LRU list 1173 */ 1174 simple_lock(&vnode_free_list_slock); 1175 if (vp->v_holdcnt > 0) 1176 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, 1177 v_freelist); 1178 else 1179 TAILQ_INSERT_TAIL(&vnode_free_list, vp, 1180 v_freelist); 1181 simple_unlock(&vnode_free_list_slock); 1182 simple_unlock(&vp->v_interlock); 1183 } 1184 return (error); 1185 } 1186 simple_unlock(&vp->v_interlock); 1187 return (0); 1188 } 1189 1190 /* 1191 * vput(), just unlock and vrele() 1192 */ 1193 void 1194 vput(vp) 1195 struct vnode *vp; 1196 { 1197 struct proc *p = curproc; /* XXX */ 1198 1199 #ifdef DIAGNOSTIC 1200 if (vp == NULL) 1201 panic("vput: null vp"); 1202 #endif 1203 simple_lock(&vp->v_interlock); 1204 vp->v_usecount--; 1205 if (vp->v_usecount > 0) { 1206 simple_unlock(&vp->v_interlock); 1207 VOP_UNLOCK(vp, 0); 1208 return; 1209 } 1210 #ifdef DIAGNOSTIC 1211 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1212 vprint("vput: bad ref count", vp); 1213 panic("vput: ref cnt"); 1214 } 1215 #endif 1216 /* 1217 * Insert at tail of LRU list. 1218 */ 1219 simple_lock(&vnode_free_list_slock); 1220 if (vp->v_holdcnt > 0) 1221 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1222 else 1223 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1224 simple_unlock(&vnode_free_list_slock); 1225 if (vp->v_flag & VEXECMAP) { 1226 uvmexp.execpages -= vp->v_uobj.uo_npages; 1227 uvmexp.filepages += vp->v_uobj.uo_npages; 1228 } 1229 vp->v_flag &= ~(VTEXT|VEXECMAP); 1230 simple_unlock(&vp->v_interlock); 1231 VOP_INACTIVE(vp, p); 1232 } 1233 1234 /* 1235 * Vnode release. 1236 * If count drops to zero, call inactive routine and return to freelist. 1237 */ 1238 void 1239 vrele(vp) 1240 struct vnode *vp; 1241 { 1242 struct proc *p = curproc; /* XXX */ 1243 1244 #ifdef DIAGNOSTIC 1245 if (vp == NULL) 1246 panic("vrele: null vp"); 1247 #endif 1248 simple_lock(&vp->v_interlock); 1249 vp->v_usecount--; 1250 if (vp->v_usecount > 0) { 1251 simple_unlock(&vp->v_interlock); 1252 return; 1253 } 1254 #ifdef DIAGNOSTIC 1255 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1256 vprint("vrele: bad ref count", vp); 1257 panic("vrele: ref cnt vp %p", vp); 1258 } 1259 #endif 1260 /* 1261 * Insert at tail of LRU list. 1262 */ 1263 simple_lock(&vnode_free_list_slock); 1264 if (vp->v_holdcnt > 0) 1265 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1266 else 1267 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1268 simple_unlock(&vnode_free_list_slock); 1269 if (vp->v_flag & VEXECMAP) { 1270 uvmexp.execpages -= vp->v_uobj.uo_npages; 1271 uvmexp.filepages += vp->v_uobj.uo_npages; 1272 } 1273 vp->v_flag &= ~(VTEXT|VEXECMAP); 1274 if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK) == 0) 1275 VOP_INACTIVE(vp, p); 1276 } 1277 1278 #ifdef DIAGNOSTIC 1279 /* 1280 * Page or buffer structure gets a reference. 1281 */ 1282 void 1283 vhold(vp) 1284 struct vnode *vp; 1285 { 1286 1287 /* 1288 * If it is on the freelist and the hold count is currently 1289 * zero, move it to the hold list. The test of the back 1290 * pointer and the use reference count of zero is because 1291 * it will be removed from a free list by getnewvnode, 1292 * but will not have its reference count incremented until 1293 * after calling vgone. If the reference count were 1294 * incremented first, vgone would (incorrectly) try to 1295 * close the previous instance of the underlying object. 1296 * So, the back pointer is explicitly set to `0xdeadb' in 1297 * getnewvnode after removing it from a freelist to ensure 1298 * that we do not try to move it here. 1299 */ 1300 simple_lock(&vp->v_interlock); 1301 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1302 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1303 simple_lock(&vnode_free_list_slock); 1304 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1305 TAILQ_INSERT_TAIL(&vnode_hold_list, vp, v_freelist); 1306 simple_unlock(&vnode_free_list_slock); 1307 } 1308 vp->v_holdcnt++; 1309 simple_unlock(&vp->v_interlock); 1310 } 1311 1312 /* 1313 * Page or buffer structure frees a reference. 1314 */ 1315 void 1316 holdrele(vp) 1317 struct vnode *vp; 1318 { 1319 1320 simple_lock(&vp->v_interlock); 1321 if (vp->v_holdcnt <= 0) 1322 panic("holdrele: holdcnt vp %p", vp); 1323 vp->v_holdcnt--; 1324 1325 /* 1326 * If it is on the holdlist and the hold count drops to 1327 * zero, move it to the free list. The test of the back 1328 * pointer and the use reference count of zero is because 1329 * it will be removed from a free list by getnewvnode, 1330 * but will not have its reference count incremented until 1331 * after calling vgone. If the reference count were 1332 * incremented first, vgone would (incorrectly) try to 1333 * close the previous instance of the underlying object. 1334 * So, the back pointer is explicitly set to `0xdeadb' in 1335 * getnewvnode after removing it from a freelist to ensure 1336 * that we do not try to move it here. 1337 */ 1338 1339 if ((vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb) && 1340 vp->v_holdcnt == 0 && vp->v_usecount == 0) { 1341 simple_lock(&vnode_free_list_slock); 1342 TAILQ_REMOVE(&vnode_hold_list, vp, v_freelist); 1343 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1344 simple_unlock(&vnode_free_list_slock); 1345 } 1346 simple_unlock(&vp->v_interlock); 1347 } 1348 1349 /* 1350 * Vnode reference. 1351 */ 1352 void 1353 vref(vp) 1354 struct vnode *vp; 1355 { 1356 1357 simple_lock(&vp->v_interlock); 1358 if (vp->v_usecount <= 0) 1359 panic("vref used where vget required, vp %p", vp); 1360 vp->v_usecount++; 1361 #ifdef DIAGNOSTIC 1362 if (vp->v_usecount == 0) { 1363 vprint("vref", vp); 1364 panic("vref: usecount overflow, vp %p", vp); 1365 } 1366 #endif 1367 simple_unlock(&vp->v_interlock); 1368 } 1369 #endif /* DIAGNOSTIC */ 1370 1371 /* 1372 * Remove any vnodes in the vnode table belonging to mount point mp. 1373 * 1374 * If MNT_NOFORCE is specified, there should not be any active ones, 1375 * return error if any are found (nb: this is a user error, not a 1376 * system error). If MNT_FORCE is specified, detach any active vnodes 1377 * that are found. 1378 */ 1379 #ifdef DEBUG 1380 int busyprt = 0; /* print out busy vnodes */ 1381 struct ctldebug debug1 = { "busyprt", &busyprt }; 1382 #endif 1383 1384 int 1385 vflush(mp, skipvp, flags) 1386 struct mount *mp; 1387 struct vnode *skipvp; 1388 int flags; 1389 { 1390 struct proc *p = curproc; /* XXX */ 1391 struct vnode *vp, *nvp; 1392 int busy = 0; 1393 1394 simple_lock(&mntvnode_slock); 1395 loop: 1396 for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { 1397 if (vp->v_mount != mp) 1398 goto loop; 1399 nvp = vp->v_mntvnodes.le_next; 1400 /* 1401 * Skip over a selected vnode. 1402 */ 1403 if (vp == skipvp) 1404 continue; 1405 simple_lock(&vp->v_interlock); 1406 /* 1407 * Skip over a vnodes marked VSYSTEM. 1408 */ 1409 if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM)) { 1410 simple_unlock(&vp->v_interlock); 1411 continue; 1412 } 1413 /* 1414 * If WRITECLOSE is set, only flush out regular file 1415 * vnodes open for writing. 1416 */ 1417 if ((flags & WRITECLOSE) && 1418 (vp->v_writecount == 0 || vp->v_type != VREG)) { 1419 simple_unlock(&vp->v_interlock); 1420 continue; 1421 } 1422 /* 1423 * With v_usecount == 0, all we need to do is clear 1424 * out the vnode data structures and we are done. 1425 */ 1426 if (vp->v_usecount == 0) { 1427 simple_unlock(&mntvnode_slock); 1428 vgonel(vp, p); 1429 simple_lock(&mntvnode_slock); 1430 continue; 1431 } 1432 /* 1433 * If FORCECLOSE is set, forcibly close the vnode. 1434 * For block or character devices, revert to an 1435 * anonymous device. For all other files, just kill them. 1436 */ 1437 if (flags & FORCECLOSE) { 1438 simple_unlock(&mntvnode_slock); 1439 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1440 vgonel(vp, p); 1441 } else { 1442 vclean(vp, 0, p); 1443 vp->v_op = spec_vnodeop_p; 1444 insmntque(vp, (struct mount *)0); 1445 } 1446 simple_lock(&mntvnode_slock); 1447 continue; 1448 } 1449 #ifdef DEBUG 1450 if (busyprt) 1451 vprint("vflush: busy vnode", vp); 1452 #endif 1453 simple_unlock(&vp->v_interlock); 1454 busy++; 1455 } 1456 simple_unlock(&mntvnode_slock); 1457 if (busy) 1458 return (EBUSY); 1459 return (0); 1460 } 1461 1462 /* 1463 * Disassociate the underlying file system from a vnode. 1464 */ 1465 void 1466 vclean(vp, flags, p) 1467 struct vnode *vp; 1468 int flags; 1469 struct proc *p; 1470 { 1471 int active; 1472 1473 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1474 1475 /* 1476 * Check to see if the vnode is in use. 1477 * If so we have to reference it before we clean it out 1478 * so that its count cannot fall to zero and generate a 1479 * race against ourselves to recycle it. 1480 */ 1481 1482 if ((active = vp->v_usecount) != 0) { 1483 vp->v_usecount++; 1484 #ifdef DIAGNOSTIC 1485 if (vp->v_usecount == 0) { 1486 vprint("vclean", vp); 1487 panic("vclean: usecount overflow"); 1488 } 1489 #endif 1490 } 1491 1492 /* 1493 * Prevent the vnode from being recycled or 1494 * brought into use while we clean it out. 1495 */ 1496 if (vp->v_flag & VXLOCK) 1497 panic("vclean: deadlock, vp %p", vp); 1498 vp->v_flag |= VXLOCK; 1499 if (vp->v_flag & VEXECMAP) { 1500 uvmexp.execpages -= vp->v_uobj.uo_npages; 1501 uvmexp.filepages += vp->v_uobj.uo_npages; 1502 } 1503 vp->v_flag &= ~(VTEXT|VEXECMAP); 1504 1505 /* 1506 * Even if the count is zero, the VOP_INACTIVE routine may still 1507 * have the object locked while it cleans it out. The VOP_LOCK 1508 * ensures that the VOP_INACTIVE routine is done with its work. 1509 * For active vnodes, it ensures that no other activity can 1510 * occur while the underlying object is being cleaned out. 1511 */ 1512 VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK); 1513 1514 /* 1515 * Clean out any cached data associated with the vnode. 1516 */ 1517 if (flags & DOCLOSE) { 1518 vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); 1519 KASSERT((vp->v_flag & VONWORKLST) == 0); 1520 } 1521 LOCK_ASSERT(!simple_lock_held(&vp->v_interlock)); 1522 1523 /* 1524 * If purging an active vnode, it must be closed and 1525 * deactivated before being reclaimed. Note that the 1526 * VOP_INACTIVE will unlock the vnode. 1527 */ 1528 if (active) { 1529 if (flags & DOCLOSE) 1530 VOP_CLOSE(vp, FNONBLOCK, NOCRED, NULL); 1531 VOP_INACTIVE(vp, p); 1532 } else { 1533 /* 1534 * Any other processes trying to obtain this lock must first 1535 * wait for VXLOCK to clear, then call the new lock operation. 1536 */ 1537 VOP_UNLOCK(vp, 0); 1538 } 1539 /* 1540 * Reclaim the vnode. 1541 */ 1542 if (VOP_RECLAIM(vp, p)) 1543 panic("vclean: cannot reclaim, vp %p", vp); 1544 if (active) { 1545 /* 1546 * Inline copy of vrele() since VOP_INACTIVE 1547 * has already been called. 1548 */ 1549 simple_lock(&vp->v_interlock); 1550 if (--vp->v_usecount <= 0) { 1551 #ifdef DIAGNOSTIC 1552 if (vp->v_usecount < 0 || vp->v_writecount != 0) { 1553 vprint("vclean: bad ref count", vp); 1554 panic("vclean: ref cnt"); 1555 } 1556 #endif 1557 /* 1558 * Insert at tail of LRU list. 1559 */ 1560 1561 simple_unlock(&vp->v_interlock); 1562 simple_lock(&vnode_free_list_slock); 1563 #ifdef DIAGNOSTIC 1564 if (vp->v_holdcnt > 0) 1565 panic("vclean: not clean, vp %p", vp); 1566 #endif 1567 TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); 1568 simple_unlock(&vnode_free_list_slock); 1569 } else 1570 simple_unlock(&vp->v_interlock); 1571 } 1572 1573 KASSERT(vp->v_uobj.uo_npages == 0); 1574 cache_purge(vp); 1575 1576 /* 1577 * Done with purge, notify sleepers of the grim news. 1578 */ 1579 vp->v_op = dead_vnodeop_p; 1580 vp->v_tag = VT_NON; 1581 simple_lock(&vp->v_interlock); 1582 vp->v_flag &= ~VXLOCK; 1583 if (vp->v_flag & VXWANT) { 1584 vp->v_flag &= ~VXWANT; 1585 simple_unlock(&vp->v_interlock); 1586 wakeup((caddr_t)vp); 1587 } else 1588 simple_unlock(&vp->v_interlock); 1589 } 1590 1591 /* 1592 * Recycle an unused vnode to the front of the free list. 1593 * Release the passed interlock if the vnode will be recycled. 1594 */ 1595 int 1596 vrecycle(vp, inter_lkp, p) 1597 struct vnode *vp; 1598 struct simplelock *inter_lkp; 1599 struct proc *p; 1600 { 1601 1602 simple_lock(&vp->v_interlock); 1603 if (vp->v_usecount == 0) { 1604 if (inter_lkp) 1605 simple_unlock(inter_lkp); 1606 vgonel(vp, p); 1607 return (1); 1608 } 1609 simple_unlock(&vp->v_interlock); 1610 return (0); 1611 } 1612 1613 /* 1614 * Eliminate all activity associated with a vnode 1615 * in preparation for reuse. 1616 */ 1617 void 1618 vgone(vp) 1619 struct vnode *vp; 1620 { 1621 struct proc *p = curproc; /* XXX */ 1622 1623 simple_lock(&vp->v_interlock); 1624 vgonel(vp, p); 1625 } 1626 1627 /* 1628 * vgone, with the vp interlock held. 1629 */ 1630 void 1631 vgonel(vp, p) 1632 struct vnode *vp; 1633 struct proc *p; 1634 { 1635 struct vnode *vq; 1636 struct vnode *vx; 1637 1638 LOCK_ASSERT(simple_lock_held(&vp->v_interlock)); 1639 1640 /* 1641 * If a vgone (or vclean) is already in progress, 1642 * wait until it is done and return. 1643 */ 1644 1645 if (vp->v_flag & VXLOCK) { 1646 vp->v_flag |= VXWANT; 1647 ltsleep(vp, PINOD | PNORELOCK, "vgone", 0, &vp->v_interlock); 1648 return; 1649 } 1650 1651 /* 1652 * Clean out the filesystem specific data. 1653 */ 1654 1655 vclean(vp, DOCLOSE, p); 1656 KASSERT((vp->v_flag & VONWORKLST) == 0); 1657 1658 /* 1659 * Delete from old mount point vnode list, if on one. 1660 */ 1661 1662 if (vp->v_mount != NULL) 1663 insmntque(vp, (struct mount *)0); 1664 1665 /* 1666 * If special device, remove it from special device alias list. 1667 * if it is on one. 1668 */ 1669 1670 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_specinfo != 0) { 1671 simple_lock(&spechash_slock); 1672 if (vp->v_hashchain != NULL) { 1673 if (*vp->v_hashchain == vp) { 1674 *vp->v_hashchain = vp->v_specnext; 1675 } else { 1676 for (vq = *vp->v_hashchain; vq; 1677 vq = vq->v_specnext) { 1678 if (vq->v_specnext != vp) 1679 continue; 1680 vq->v_specnext = vp->v_specnext; 1681 break; 1682 } 1683 if (vq == NULL) 1684 panic("missing bdev"); 1685 } 1686 if (vp->v_flag & VALIASED) { 1687 vx = NULL; 1688 for (vq = *vp->v_hashchain; vq; 1689 vq = vq->v_specnext) { 1690 if (vq->v_rdev != vp->v_rdev || 1691 vq->v_type != vp->v_type) 1692 continue; 1693 if (vx) 1694 break; 1695 vx = vq; 1696 } 1697 if (vx == NULL) 1698 panic("missing alias"); 1699 if (vq == NULL) 1700 vx->v_flag &= ~VALIASED; 1701 vp->v_flag &= ~VALIASED; 1702 } 1703 } 1704 simple_unlock(&spechash_slock); 1705 FREE(vp->v_specinfo, M_VNODE); 1706 vp->v_specinfo = NULL; 1707 } 1708 1709 /* 1710 * If it is on the freelist and not already at the head, 1711 * move it to the head of the list. The test of the back 1712 * pointer and the reference count of zero is because 1713 * it will be removed from the free list by getnewvnode, 1714 * but will not have its reference count incremented until 1715 * after calling vgone. If the reference count were 1716 * incremented first, vgone would (incorrectly) try to 1717 * close the previous instance of the underlying object. 1718 * So, the back pointer is explicitly set to `0xdeadb' in 1719 * getnewvnode after removing it from the freelist to ensure 1720 * that we do not try to move it here. 1721 */ 1722 1723 if (vp->v_usecount == 0) { 1724 simple_lock(&vnode_free_list_slock); 1725 if (vp->v_holdcnt > 0) 1726 panic("vgonel: not clean, vp %p", vp); 1727 if (vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb && 1728 TAILQ_FIRST(&vnode_free_list) != vp) { 1729 TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); 1730 TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); 1731 } 1732 simple_unlock(&vnode_free_list_slock); 1733 } 1734 vp->v_type = VBAD; 1735 } 1736 1737 /* 1738 * Lookup a vnode by device number. 1739 */ 1740 int 1741 vfinddev(dev, type, vpp) 1742 dev_t dev; 1743 enum vtype type; 1744 struct vnode **vpp; 1745 { 1746 struct vnode *vp; 1747 int rc = 0; 1748 1749 simple_lock(&spechash_slock); 1750 for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) { 1751 if (dev != vp->v_rdev || type != vp->v_type) 1752 continue; 1753 *vpp = vp; 1754 rc = 1; 1755 break; 1756 } 1757 simple_unlock(&spechash_slock); 1758 return (rc); 1759 } 1760 1761 /* 1762 * Revoke all the vnodes corresponding to the specified minor number 1763 * range (endpoints inclusive) of the specified major. 1764 */ 1765 void 1766 vdevgone(maj, minl, minh, type) 1767 int maj, minl, minh; 1768 enum vtype type; 1769 { 1770 struct vnode *vp; 1771 int mn; 1772 1773 for (mn = minl; mn <= minh; mn++) 1774 if (vfinddev(makedev(maj, mn), type, &vp)) 1775 VOP_REVOKE(vp, REVOKEALL); 1776 } 1777 1778 /* 1779 * Calculate the total number of references to a special device. 1780 */ 1781 int 1782 vcount(vp) 1783 struct vnode *vp; 1784 { 1785 struct vnode *vq, *vnext; 1786 int count; 1787 1788 loop: 1789 if ((vp->v_flag & VALIASED) == 0) 1790 return (vp->v_usecount); 1791 simple_lock(&spechash_slock); 1792 for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) { 1793 vnext = vq->v_specnext; 1794 if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type) 1795 continue; 1796 /* 1797 * Alias, but not in use, so flush it out. 1798 */ 1799 if (vq->v_usecount == 0 && vq != vp && 1800 (vq->v_flag & VXLOCK) == 0) { 1801 simple_unlock(&spechash_slock); 1802 vgone(vq); 1803 goto loop; 1804 } 1805 count += vq->v_usecount; 1806 } 1807 simple_unlock(&spechash_slock); 1808 return (count); 1809 } 1810 1811 /* 1812 * Print out a description of a vnode. 1813 */ 1814 static const char * const typename[] = 1815 { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" }; 1816 1817 void 1818 vprint(label, vp) 1819 char *label; 1820 struct vnode *vp; 1821 { 1822 char buf[96]; 1823 1824 if (label != NULL) 1825 printf("%s: ", label); 1826 printf("tag %d type %s, usecount %d, writecount %ld, refcount %ld,", 1827 vp->v_tag, typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1828 vp->v_holdcnt); 1829 buf[0] = '\0'; 1830 if (vp->v_flag & VROOT) 1831 strcat(buf, "|VROOT"); 1832 if (vp->v_flag & VTEXT) 1833 strcat(buf, "|VTEXT"); 1834 if (vp->v_flag & VEXECMAP) 1835 strcat(buf, "|VEXECMAP"); 1836 if (vp->v_flag & VSYSTEM) 1837 strcat(buf, "|VSYSTEM"); 1838 if (vp->v_flag & VXLOCK) 1839 strcat(buf, "|VXLOCK"); 1840 if (vp->v_flag & VXWANT) 1841 strcat(buf, "|VXWANT"); 1842 if (vp->v_flag & VBWAIT) 1843 strcat(buf, "|VBWAIT"); 1844 if (vp->v_flag & VALIASED) 1845 strcat(buf, "|VALIASED"); 1846 if (buf[0] != '\0') 1847 printf(" flags (%s)", &buf[1]); 1848 if (vp->v_data == NULL) { 1849 printf("\n"); 1850 } else { 1851 printf("\n\t"); 1852 VOP_PRINT(vp); 1853 } 1854 } 1855 1856 #ifdef DEBUG 1857 /* 1858 * List all of the locked vnodes in the system. 1859 * Called when debugging the kernel. 1860 */ 1861 void 1862 printlockedvnodes() 1863 { 1864 struct mount *mp, *nmp; 1865 struct vnode *vp; 1866 1867 printf("Locked vnodes\n"); 1868 simple_lock(&mountlist_slock); 1869 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1870 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1871 nmp = mp->mnt_list.cqe_next; 1872 continue; 1873 } 1874 LIST_FOREACH(vp, &mp->mnt_vnodelist, v_mntvnodes) { 1875 if (VOP_ISLOCKED(vp)) 1876 vprint(NULL, vp); 1877 } 1878 simple_lock(&mountlist_slock); 1879 nmp = mp->mnt_list.cqe_next; 1880 vfs_unbusy(mp); 1881 } 1882 simple_unlock(&mountlist_slock); 1883 } 1884 #endif 1885 1886 /* 1887 * Top level filesystem related information gathering. 1888 */ 1889 int 1890 vfs_sysctl(name, namelen, oldp, oldlenp, newp, newlen, p) 1891 int *name; 1892 u_int namelen; 1893 void *oldp; 1894 size_t *oldlenp; 1895 void *newp; 1896 size_t newlen; 1897 struct proc *p; 1898 { 1899 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1900 struct vfsconf vfc; 1901 extern const char * const mountcompatnames[]; 1902 extern int nmountcompatnames; 1903 #endif 1904 struct vfsops *vfsp; 1905 1906 /* all sysctl names at this level are at least name and field */ 1907 if (namelen < 2) 1908 return (ENOTDIR); /* overloaded */ 1909 1910 /* Not generic: goes to file system. */ 1911 if (name[0] != VFS_GENERIC) { 1912 static const struct ctlname vfsnames[VFS_MAXID+1]=CTL_VFS_NAMES; 1913 const char *vfsname; 1914 1915 if (name[0] < 0 || name[0] > VFS_MAXID 1916 || (vfsname = vfsnames[name[0]].ctl_name) == NULL) 1917 return (EOPNOTSUPP); 1918 1919 vfsp = vfs_getopsbyname(vfsname); 1920 if (vfsp == NULL || vfsp->vfs_sysctl == NULL) 1921 return (EOPNOTSUPP); 1922 return ((*vfsp->vfs_sysctl)(&name[1], namelen - 1, 1923 oldp, oldlenp, newp, newlen, p)); 1924 } 1925 1926 /* The rest are generic vfs sysctls. */ 1927 switch (name[1]) { 1928 case VFS_USERMOUNT: 1929 return sysctl_int(oldp, oldlenp, newp, newlen, &dovfsusermount); 1930 #if defined(COMPAT_09) || defined(COMPAT_43) || defined(COMPAT_44) 1931 case VFS_MAXTYPENUM: 1932 /* 1933 * Provided for 4.4BSD-Lite2 compatibility. 1934 */ 1935 return (sysctl_rdint(oldp, oldlenp, newp, nmountcompatnames)); 1936 case VFS_CONF: 1937 /* 1938 * Special: a node, next is a file system name. 1939 * Provided for 4.4BSD-Lite2 compatibility. 1940 */ 1941 if (namelen < 3) 1942 return (ENOTDIR); /* overloaded */ 1943 if (name[2] >= nmountcompatnames || name[2] < 0 || 1944 mountcompatnames[name[2]] == NULL) 1945 return (EOPNOTSUPP); 1946 vfsp = vfs_getopsbyname(mountcompatnames[name[2]]); 1947 if (vfsp == NULL) 1948 return (EOPNOTSUPP); 1949 vfc.vfc_vfsops = vfsp; 1950 strncpy(vfc.vfc_name, vfsp->vfs_name, MFSNAMELEN); 1951 vfc.vfc_typenum = name[2]; 1952 vfc.vfc_refcount = vfsp->vfs_refcount; 1953 vfc.vfc_flags = 0; 1954 vfc.vfc_mountroot = vfsp->vfs_mountroot; 1955 vfc.vfc_next = NULL; 1956 return (sysctl_rdstruct(oldp, oldlenp, newp, &vfc, 1957 sizeof(struct vfsconf))); 1958 #endif 1959 default: 1960 break; 1961 } 1962 return (EOPNOTSUPP); 1963 } 1964 1965 int kinfo_vdebug = 1; 1966 int kinfo_vgetfailed; 1967 #define KINFO_VNODESLOP 10 1968 /* 1969 * Dump vnode list (via sysctl). 1970 * Copyout address of vnode followed by vnode. 1971 */ 1972 /* ARGSUSED */ 1973 int 1974 sysctl_vnode(where, sizep, p) 1975 char *where; 1976 size_t *sizep; 1977 struct proc *p; 1978 { 1979 struct mount *mp, *nmp; 1980 struct vnode *nvp, *vp; 1981 char *bp = where, *savebp; 1982 char *ewhere; 1983 int error; 1984 1985 #define VPTRSZ sizeof(struct vnode *) 1986 #define VNODESZ sizeof(struct vnode) 1987 if (where == NULL) { 1988 *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ); 1989 return (0); 1990 } 1991 ewhere = where + *sizep; 1992 1993 simple_lock(&mountlist_slock); 1994 for (mp = mountlist.cqh_first; mp != (void *)&mountlist; mp = nmp) { 1995 if (vfs_busy(mp, LK_NOWAIT, &mountlist_slock)) { 1996 nmp = mp->mnt_list.cqe_next; 1997 continue; 1998 } 1999 savebp = bp; 2000 again: 2001 simple_lock(&mntvnode_slock); 2002 for (vp = mp->mnt_vnodelist.lh_first; 2003 vp != NULL; 2004 vp = nvp) { 2005 /* 2006 * Check that the vp is still associated with 2007 * this filesystem. RACE: could have been 2008 * recycled onto the same filesystem. 2009 */ 2010 if (vp->v_mount != mp) { 2011 simple_unlock(&mntvnode_slock); 2012 if (kinfo_vdebug) 2013 printf("kinfo: vp changed\n"); 2014 bp = savebp; 2015 goto again; 2016 } 2017 nvp = vp->v_mntvnodes.le_next; 2018 if (bp + VPTRSZ + VNODESZ > ewhere) { 2019 simple_unlock(&mntvnode_slock); 2020 *sizep = bp - where; 2021 return (ENOMEM); 2022 } 2023 simple_unlock(&mntvnode_slock); 2024 if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) || 2025 (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ))) 2026 return (error); 2027 bp += VPTRSZ + VNODESZ; 2028 simple_lock(&mntvnode_slock); 2029 } 2030 simple_unlock(&mntvnode_slock); 2031 simple_lock(&mountlist_slock); 2032 nmp = mp->mnt_list.cqe_next; 2033 vfs_unbusy(mp); 2034 } 2035 simple_unlock(&mountlist_slock); 2036 2037 *sizep = bp - where; 2038 return (0); 2039 } 2040 2041 /* 2042 * Check to see if a filesystem is mounted on a block device. 2043 */ 2044 int 2045 vfs_mountedon(vp) 2046 struct vnode *vp; 2047 { 2048 struct vnode *vq; 2049 int error = 0; 2050 2051 if (vp->v_specmountpoint != NULL) 2052 return (EBUSY); 2053 if (vp->v_flag & VALIASED) { 2054 simple_lock(&spechash_slock); 2055 for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) { 2056 if (vq->v_rdev != vp->v_rdev || 2057 vq->v_type != vp->v_type) 2058 continue; 2059 if (vq->v_specmountpoint != NULL) { 2060 error = EBUSY; 2061 break; 2062 } 2063 } 2064 simple_unlock(&spechash_slock); 2065 } 2066 return (error); 2067 } 2068 2069 /* 2070 * Build hash lists of net addresses and hang them off the mount point. 2071 * Called by ufs_mount() to set up the lists of export addresses. 2072 */ 2073 static int 2074 vfs_hang_addrlist(mp, nep, argp) 2075 struct mount *mp; 2076 struct netexport *nep; 2077 struct export_args *argp; 2078 { 2079 struct netcred *np, *enp; 2080 struct radix_node_head *rnh; 2081 int i; 2082 struct radix_node *rn; 2083 struct sockaddr *saddr, *smask = 0; 2084 struct domain *dom; 2085 int error; 2086 2087 if (argp->ex_addrlen == 0) { 2088 if (mp->mnt_flag & MNT_DEFEXPORTED) 2089 return (EPERM); 2090 np = &nep->ne_defexported; 2091 np->netc_exflags = argp->ex_flags; 2092 crcvt(&np->netc_anon, &argp->ex_anon); 2093 np->netc_anon.cr_ref = 1; 2094 mp->mnt_flag |= MNT_DEFEXPORTED; 2095 return (0); 2096 } 2097 2098 if (argp->ex_addrlen > MLEN) 2099 return (EINVAL); 2100 2101 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 2102 np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK); 2103 memset((caddr_t)np, 0, i); 2104 saddr = (struct sockaddr *)(np + 1); 2105 error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen); 2106 if (error) 2107 goto out; 2108 if (saddr->sa_len > argp->ex_addrlen) 2109 saddr->sa_len = argp->ex_addrlen; 2110 if (argp->ex_masklen) { 2111 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 2112 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 2113 if (error) 2114 goto out; 2115 if (smask->sa_len > argp->ex_masklen) 2116 smask->sa_len = argp->ex_masklen; 2117 } 2118 i = saddr->sa_family; 2119 if ((rnh = nep->ne_rtable[i]) == 0) { 2120 /* 2121 * Seems silly to initialize every AF when most are not 2122 * used, do so on demand here 2123 */ 2124 for (dom = domains; dom; dom = dom->dom_next) 2125 if (dom->dom_family == i && dom->dom_rtattach) { 2126 dom->dom_rtattach((void **)&nep->ne_rtable[i], 2127 dom->dom_rtoffset); 2128 break; 2129 } 2130 if ((rnh = nep->ne_rtable[i]) == 0) { 2131 error = ENOBUFS; 2132 goto out; 2133 } 2134 } 2135 rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh, 2136 np->netc_rnodes); 2137 if (rn == 0 || np != (struct netcred *)rn) { /* already exists */ 2138 if (rn == 0) { 2139 enp = (struct netcred *)(*rnh->rnh_lookup)(saddr, 2140 smask, rnh); 2141 if (enp == 0) { 2142 error = EPERM; 2143 goto out; 2144 } 2145 } else 2146 enp = (struct netcred *)rn; 2147 2148 if (enp->netc_exflags != argp->ex_flags || 2149 enp->netc_anon.cr_uid != argp->ex_anon.cr_uid || 2150 enp->netc_anon.cr_gid != argp->ex_anon.cr_gid || 2151 enp->netc_anon.cr_ngroups != argp->ex_anon.cr_ngroups || 2152 memcmp(&enp->netc_anon.cr_groups, &argp->ex_anon.cr_groups, 2153 enp->netc_anon.cr_ngroups)) 2154 error = EPERM; 2155 else 2156 error = 0; 2157 goto out; 2158 } 2159 np->netc_exflags = argp->ex_flags; 2160 crcvt(&np->netc_anon, &argp->ex_anon); 2161 np->netc_anon.cr_ref = 1; 2162 return (0); 2163 out: 2164 free(np, M_NETADDR); 2165 return (error); 2166 } 2167 2168 /* ARGSUSED */ 2169 static int 2170 vfs_free_netcred(rn, w) 2171 struct radix_node *rn; 2172 void *w; 2173 { 2174 struct radix_node_head *rnh = (struct radix_node_head *)w; 2175 2176 (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh); 2177 free((caddr_t)rn, M_NETADDR); 2178 return (0); 2179 } 2180 2181 /* 2182 * Free the net address hash lists that are hanging off the mount points. 2183 */ 2184 static void 2185 vfs_free_addrlist(nep) 2186 struct netexport *nep; 2187 { 2188 int i; 2189 struct radix_node_head *rnh; 2190 2191 for (i = 0; i <= AF_MAX; i++) 2192 if ((rnh = nep->ne_rtable[i]) != NULL) { 2193 (*rnh->rnh_walktree)(rnh, vfs_free_netcred, rnh); 2194 free((caddr_t)rnh, M_RTABLE); 2195 nep->ne_rtable[i] = 0; 2196 } 2197 } 2198 2199 int 2200 vfs_export(mp, nep, argp) 2201 struct mount *mp; 2202 struct netexport *nep; 2203 struct export_args *argp; 2204 { 2205 int error; 2206 2207 if (argp->ex_flags & MNT_DELEXPORT) { 2208 if (mp->mnt_flag & MNT_EXPUBLIC) { 2209 vfs_setpublicfs(NULL, NULL, NULL); 2210 mp->mnt_flag &= ~MNT_EXPUBLIC; 2211 } 2212 vfs_free_addrlist(nep); 2213 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 2214 } 2215 if (argp->ex_flags & MNT_EXPORTED) { 2216 if (argp->ex_flags & MNT_EXPUBLIC) { 2217 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 2218 return (error); 2219 mp->mnt_flag |= MNT_EXPUBLIC; 2220 } 2221 if ((error = vfs_hang_addrlist(mp, nep, argp)) != 0) 2222 return (error); 2223 mp->mnt_flag |= MNT_EXPORTED; 2224 } 2225 return (0); 2226 } 2227 2228 /* 2229 * Set the publicly exported filesystem (WebNFS). Currently, only 2230 * one public filesystem is possible in the spec (RFC 2054 and 2055) 2231 */ 2232 int 2233 vfs_setpublicfs(mp, nep, argp) 2234 struct mount *mp; 2235 struct netexport *nep; 2236 struct export_args *argp; 2237 { 2238 int error; 2239 struct vnode *rvp; 2240 char *cp; 2241 2242 /* 2243 * mp == NULL -> invalidate the current info, the FS is 2244 * no longer exported. May be called from either vfs_export 2245 * or unmount, so check if it hasn't already been done. 2246 */ 2247 if (mp == NULL) { 2248 if (nfs_pub.np_valid) { 2249 nfs_pub.np_valid = 0; 2250 if (nfs_pub.np_index != NULL) { 2251 FREE(nfs_pub.np_index, M_TEMP); 2252 nfs_pub.np_index = NULL; 2253 } 2254 } 2255 return (0); 2256 } 2257 2258 /* 2259 * Only one allowed at a time. 2260 */ 2261 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 2262 return (EBUSY); 2263 2264 /* 2265 * Get real filehandle for root of exported FS. 2266 */ 2267 memset((caddr_t)&nfs_pub.np_handle, 0, sizeof(nfs_pub.np_handle)); 2268 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 2269 2270 if ((error = VFS_ROOT(mp, &rvp))) 2271 return (error); 2272 2273 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 2274 return (error); 2275 2276 vput(rvp); 2277 2278 /* 2279 * If an indexfile was specified, pull it in. 2280 */ 2281 if (argp->ex_indexfile != NULL) { 2282 MALLOC(nfs_pub.np_index, char *, MAXNAMLEN + 1, M_TEMP, 2283 M_WAITOK); 2284 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 2285 MAXNAMLEN, (size_t *)0); 2286 if (!error) { 2287 /* 2288 * Check for illegal filenames. 2289 */ 2290 for (cp = nfs_pub.np_index; *cp; cp++) { 2291 if (*cp == '/') { 2292 error = EINVAL; 2293 break; 2294 } 2295 } 2296 } 2297 if (error) { 2298 FREE(nfs_pub.np_index, M_TEMP); 2299 return (error); 2300 } 2301 } 2302 2303 nfs_pub.np_mount = mp; 2304 nfs_pub.np_valid = 1; 2305 return (0); 2306 } 2307 2308 struct netcred * 2309 vfs_export_lookup(mp, nep, nam) 2310 struct mount *mp; 2311 struct netexport *nep; 2312 struct mbuf *nam; 2313 { 2314 struct netcred *np; 2315 struct radix_node_head *rnh; 2316 struct sockaddr *saddr; 2317 2318 np = NULL; 2319 if (mp->mnt_flag & MNT_EXPORTED) { 2320 /* 2321 * Lookup in the export list first. 2322 */ 2323 if (nam != NULL) { 2324 saddr = mtod(nam, struct sockaddr *); 2325 rnh = nep->ne_rtable[saddr->sa_family]; 2326 if (rnh != NULL) { 2327 np = (struct netcred *) 2328 (*rnh->rnh_matchaddr)((caddr_t)saddr, 2329 rnh); 2330 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 2331 np = NULL; 2332 } 2333 } 2334 /* 2335 * If no address match, use the default if it exists. 2336 */ 2337 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 2338 np = &nep->ne_defexported; 2339 } 2340 return (np); 2341 } 2342 2343 /* 2344 * Do the usual access checking. 2345 * file_mode, uid and gid are from the vnode in question, 2346 * while acc_mode and cred are from the VOP_ACCESS parameter list 2347 */ 2348 int 2349 vaccess(type, file_mode, uid, gid, acc_mode, cred) 2350 enum vtype type; 2351 mode_t file_mode; 2352 uid_t uid; 2353 gid_t gid; 2354 mode_t acc_mode; 2355 struct ucred *cred; 2356 { 2357 mode_t mask; 2358 2359 /* 2360 * Super-user always gets read/write access, but execute access depends 2361 * on at least one execute bit being set. 2362 */ 2363 if (cred->cr_uid == 0) { 2364 if ((acc_mode & VEXEC) && type != VDIR && 2365 (file_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) == 0) 2366 return (EACCES); 2367 return (0); 2368 } 2369 2370 mask = 0; 2371 2372 /* Otherwise, check the owner. */ 2373 if (cred->cr_uid == uid) { 2374 if (acc_mode & VEXEC) 2375 mask |= S_IXUSR; 2376 if (acc_mode & VREAD) 2377 mask |= S_IRUSR; 2378 if (acc_mode & VWRITE) 2379 mask |= S_IWUSR; 2380 return ((file_mode & mask) == mask ? 0 : EACCES); 2381 } 2382 2383 /* Otherwise, check the groups. */ 2384 if (cred->cr_gid == gid || groupmember(gid, cred)) { 2385 if (acc_mode & VEXEC) 2386 mask |= S_IXGRP; 2387 if (acc_mode & VREAD) 2388 mask |= S_IRGRP; 2389 if (acc_mode & VWRITE) 2390 mask |= S_IWGRP; 2391 return ((file_mode & mask) == mask ? 0 : EACCES); 2392 } 2393 2394 /* Otherwise, check everyone else. */ 2395 if (acc_mode & VEXEC) 2396 mask |= S_IXOTH; 2397 if (acc_mode & VREAD) 2398 mask |= S_IROTH; 2399 if (acc_mode & VWRITE) 2400 mask |= S_IWOTH; 2401 return ((file_mode & mask) == mask ? 0 : EACCES); 2402 } 2403 2404 /* 2405 * Unmount all file systems. 2406 * We traverse the list in reverse order under the assumption that doing so 2407 * will avoid needing to worry about dependencies. 2408 */ 2409 void 2410 vfs_unmountall(p) 2411 struct proc *p; 2412 { 2413 struct mount *mp, *nmp; 2414 int allerror, error; 2415 2416 for (allerror = 0, 2417 mp = mountlist.cqh_last; mp != (void *)&mountlist; mp = nmp) { 2418 nmp = mp->mnt_list.cqe_prev; 2419 #ifdef DEBUG 2420 printf("unmounting %s (%s)...\n", 2421 mp->mnt_stat.f_mntonname, mp->mnt_stat.f_mntfromname); 2422 #endif 2423 /* 2424 * XXX Freeze syncer. Must do this before locking the 2425 * mount point. See dounmount() for details. 2426 */ 2427 lockmgr(&syncer_lock, LK_EXCLUSIVE, NULL); 2428 if (vfs_busy(mp, 0, 0)) { 2429 lockmgr(&syncer_lock, LK_RELEASE, NULL); 2430 continue; 2431 } 2432 if ((error = dounmount(mp, MNT_FORCE, p)) != 0) { 2433 printf("unmount of %s failed with error %d\n", 2434 mp->mnt_stat.f_mntonname, error); 2435 allerror = 1; 2436 } 2437 } 2438 if (allerror) 2439 printf("WARNING: some file systems would not unmount\n"); 2440 } 2441 2442 /* 2443 * Sync and unmount file systems before shutting down. 2444 */ 2445 void 2446 vfs_shutdown() 2447 { 2448 struct buf *bp; 2449 int iter, nbusy, nbusy_prev = 0, dcount, s; 2450 struct proc *p = curproc; 2451 2452 /* XXX we're certainly not running in proc0's context! */ 2453 if (p == NULL) 2454 p = &proc0; 2455 2456 printf("syncing disks... "); 2457 2458 /* remove user process from run queue */ 2459 suspendsched(); 2460 (void) spl0(); 2461 2462 /* avoid coming back this way again if we panic. */ 2463 doing_shutdown = 1; 2464 2465 sys_sync(p, NULL, NULL); 2466 2467 /* Wait for sync to finish. */ 2468 dcount = 10000; 2469 for (iter = 0; iter < 20;) { 2470 nbusy = 0; 2471 for (bp = &buf[nbuf]; --bp >= buf; ) { 2472 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2473 nbusy++; 2474 /* 2475 * With soft updates, some buffers that are 2476 * written will be remarked as dirty until other 2477 * buffers are written. 2478 */ 2479 if (bp->b_vp && bp->b_vp->v_mount 2480 && (bp->b_vp->v_mount->mnt_flag & MNT_SOFTDEP) 2481 && (bp->b_flags & B_DELWRI)) { 2482 s = splbio(); 2483 bremfree(bp); 2484 bp->b_flags |= B_BUSY; 2485 splx(s); 2486 nbusy++; 2487 bawrite(bp); 2488 if (dcount-- <= 0) { 2489 printf("softdep "); 2490 goto fail; 2491 } 2492 } 2493 } 2494 if (nbusy == 0) 2495 break; 2496 if (nbusy_prev == 0) 2497 nbusy_prev = nbusy; 2498 printf("%d ", nbusy); 2499 tsleep(&nbusy, PRIBIO, "bflush", 2500 (iter == 0) ? 1 : hz / 25 * iter); 2501 if (nbusy >= nbusy_prev) /* we didn't flush anything */ 2502 iter++; 2503 else 2504 nbusy_prev = nbusy; 2505 } 2506 if (nbusy) { 2507 fail: 2508 #if defined(DEBUG) || defined(DEBUG_HALT_BUSY) 2509 printf("giving up\nPrinting vnodes for busy buffers\n"); 2510 for (bp = &buf[nbuf]; --bp >= buf; ) 2511 if ((bp->b_flags & (B_BUSY|B_INVAL|B_READ)) == B_BUSY) 2512 vprint(NULL, bp->b_vp); 2513 2514 #if defined(DDB) && defined(DEBUG_HALT_BUSY) 2515 Debugger(); 2516 #endif 2517 2518 #else /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2519 printf("giving up\n"); 2520 #endif /* defined(DEBUG) || defined(DEBUG_HALT_BUSY) */ 2521 return; 2522 } else 2523 printf("done\n"); 2524 2525 /* 2526 * If we've panic'd, don't make the situation potentially 2527 * worse by unmounting the file systems. 2528 */ 2529 if (panicstr != NULL) 2530 return; 2531 2532 /* Release inodes held by texts before update. */ 2533 #ifdef notdef 2534 vnshutdown(); 2535 #endif 2536 /* Unmount file systems. */ 2537 vfs_unmountall(p); 2538 } 2539 2540 /* 2541 * Mount the root file system. If the operator didn't specify a 2542 * file system to use, try all possible file systems until one 2543 * succeeds. 2544 */ 2545 int 2546 vfs_mountroot() 2547 { 2548 struct vfsops *v; 2549 2550 if (root_device == NULL) 2551 panic("vfs_mountroot: root device unknown"); 2552 2553 switch (root_device->dv_class) { 2554 case DV_IFNET: 2555 if (rootdev != NODEV) 2556 panic("vfs_mountroot: rootdev set for DV_IFNET " 2557 "(0x%08x -> %d,%d)", rootdev, 2558 major(rootdev), minor(rootdev)); 2559 break; 2560 2561 case DV_DISK: 2562 if (rootdev == NODEV) 2563 panic("vfs_mountroot: rootdev not set for DV_DISK"); 2564 break; 2565 2566 default: 2567 printf("%s: inappropriate for root file system\n", 2568 root_device->dv_xname); 2569 return (ENODEV); 2570 } 2571 2572 /* 2573 * If user specified a file system, use it. 2574 */ 2575 if (mountroot != NULL) 2576 return ((*mountroot)()); 2577 2578 /* 2579 * Try each file system currently configured into the kernel. 2580 */ 2581 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2582 if (v->vfs_mountroot == NULL) 2583 continue; 2584 #ifdef DEBUG 2585 printf("mountroot: trying %s...\n", v->vfs_name); 2586 #endif 2587 if ((*v->vfs_mountroot)() == 0) { 2588 printf("root file system type: %s\n", v->vfs_name); 2589 break; 2590 } 2591 } 2592 2593 if (v == NULL) { 2594 printf("no file system for %s", root_device->dv_xname); 2595 if (root_device->dv_class == DV_DISK) 2596 printf(" (dev 0x%x)", rootdev); 2597 printf("\n"); 2598 return (EFTYPE); 2599 } 2600 return (0); 2601 } 2602 2603 /* 2604 * Given a file system name, look up the vfsops for that 2605 * file system, or return NULL if file system isn't present 2606 * in the kernel. 2607 */ 2608 struct vfsops * 2609 vfs_getopsbyname(name) 2610 const char *name; 2611 { 2612 struct vfsops *v; 2613 2614 for (v = LIST_FIRST(&vfs_list); v != NULL; v = LIST_NEXT(v, vfs_list)) { 2615 if (strcmp(v->vfs_name, name) == 0) 2616 break; 2617 } 2618 2619 return (v); 2620 } 2621 2622 /* 2623 * Establish a file system and initialize it. 2624 */ 2625 int 2626 vfs_attach(vfs) 2627 struct vfsops *vfs; 2628 { 2629 struct vfsops *v; 2630 int error = 0; 2631 2632 2633 /* 2634 * Make sure this file system doesn't already exist. 2635 */ 2636 LIST_FOREACH(v, &vfs_list, vfs_list) { 2637 if (strcmp(vfs->vfs_name, v->vfs_name) == 0) { 2638 error = EEXIST; 2639 goto out; 2640 } 2641 } 2642 2643 /* 2644 * Initialize the vnode operations for this file system. 2645 */ 2646 vfs_opv_init(vfs->vfs_opv_descs); 2647 2648 /* 2649 * Now initialize the file system itself. 2650 */ 2651 (*vfs->vfs_init)(); 2652 2653 /* 2654 * ...and link it into the kernel's list. 2655 */ 2656 LIST_INSERT_HEAD(&vfs_list, vfs, vfs_list); 2657 2658 /* 2659 * Sanity: make sure the reference count is 0. 2660 */ 2661 vfs->vfs_refcount = 0; 2662 2663 out: 2664 return (error); 2665 } 2666 2667 /* 2668 * Remove a file system from the kernel. 2669 */ 2670 int 2671 vfs_detach(vfs) 2672 struct vfsops *vfs; 2673 { 2674 struct vfsops *v; 2675 2676 /* 2677 * Make sure no one is using the filesystem. 2678 */ 2679 if (vfs->vfs_refcount != 0) 2680 return (EBUSY); 2681 2682 /* 2683 * ...and remove it from the kernel's list. 2684 */ 2685 LIST_FOREACH(v, &vfs_list, vfs_list) { 2686 if (v == vfs) { 2687 LIST_REMOVE(v, vfs_list); 2688 break; 2689 } 2690 } 2691 2692 if (v == NULL) 2693 return (ESRCH); 2694 2695 /* 2696 * Now run the file system-specific cleanups. 2697 */ 2698 (*vfs->vfs_done)(); 2699 2700 /* 2701 * Free the vnode operations vector. 2702 */ 2703 vfs_opv_free(vfs->vfs_opv_descs); 2704 return (0); 2705 } 2706 2707 void 2708 vfs_reinit(void) 2709 { 2710 struct vfsops *vfs; 2711 2712 LIST_FOREACH(vfs, &vfs_list, vfs_list) { 2713 if (vfs->vfs_reinit) { 2714 (*vfs->vfs_reinit)(); 2715 } 2716 } 2717 } 2718 2719 #ifdef DDB 2720 const char buf_flagbits[] = 2721 "\20\1AGE\2NEEDCOMMIT\3ASYNC\4BAD\5BUSY\6SCANNED\7CALL\10DELWRI" 2722 "\11DIRTY\12DONE\13EINTR\14ERROR\15GATHERED\16INVAL\17LOCKED\20NOCACHE" 2723 "\21ORDERED\22CACHE\23PHYS\24RAW\25READ\26TAPE\30WANTED" 2724 "\32XXX\33VFLUSH"; 2725 2726 void 2727 vfs_buf_print(bp, full, pr) 2728 struct buf *bp; 2729 int full; 2730 void (*pr) __P((const char *, ...)); 2731 { 2732 char buf[1024]; 2733 2734 (*pr)(" vp %p lblkno 0x%x blkno 0x%x dev 0x%x\n", 2735 bp->b_vp, bp->b_lblkno, bp->b_blkno, bp->b_dev); 2736 2737 bitmask_snprintf(bp->b_flags, buf_flagbits, buf, sizeof(buf)); 2738 (*pr)(" error %d flags 0x%s\n", bp->b_error, buf); 2739 2740 (*pr)(" bufsize 0x%lx bcount 0x%lx resid 0x%lx\n", 2741 bp->b_bufsize, bp->b_bcount, bp->b_resid); 2742 (*pr)(" data %p saveaddr %p dep %p\n", 2743 bp->b_data, bp->b_saveaddr, LIST_FIRST(&bp->b_dep)); 2744 (*pr)(" iodone %p\n", bp->b_iodone); 2745 } 2746 2747 2748 const char vnode_flagbits[] = 2749 "\20\1ROOT\2TEXT\3SYSTEM\4ISTTY\5EXECMAP" 2750 "\11XLOCK\12XWANT\13BWAIT\14ALIASED" 2751 "\15DIROP\16LAYER\17ONWORKLIST\20DIRTY"; 2752 2753 const char *vnode_types[] = { 2754 "VNON", 2755 "VREG", 2756 "VDIR", 2757 "VBLK", 2758 "VCHR", 2759 "VLNK", 2760 "VSOCK", 2761 "VFIFO", 2762 "VBAD", 2763 }; 2764 2765 const char *vnode_tags[] = { 2766 "VT_NON", 2767 "VT_UFS", 2768 "VT_NFS", 2769 "VT_MFS", 2770 "VT_MSDOSFS", 2771 "VT_LFS", 2772 "VT_LOFS", 2773 "VT_FDESC", 2774 "VT_PORTAL", 2775 "VT_NULL", 2776 "VT_UMAP", 2777 "VT_KERNFS", 2778 "VT_PROCFS", 2779 "VT_AFS", 2780 "VT_ISOFS", 2781 "VT_UNION", 2782 "VT_ADOSFS", 2783 "VT_EXT2FS", 2784 "VT_CODA", 2785 "VT_FILECORE", 2786 "VT_NTFS", 2787 "VT_VFS", 2788 "VT_OVERLAY" 2789 }; 2790 2791 void 2792 vfs_vnode_print(vp, full, pr) 2793 struct vnode *vp; 2794 int full; 2795 void (*pr) __P((const char *, ...)); 2796 { 2797 char buf[256]; 2798 const char *vtype, *vtag; 2799 int tmp; 2800 2801 uvm_object_printit(&vp->v_uobj, full, pr); 2802 bitmask_snprintf(vp->v_flag, vnode_flagbits, buf, sizeof(buf)); 2803 (*pr)("\nVNODE flags %s\n", buf); 2804 (*pr)("mp %p numoutput %d size 0x%llx\n", 2805 vp->v_mount, vp->v_numoutput, vp->v_size); 2806 2807 (*pr)("data %p usecount %d writecount %ld holdcnt %ld numoutput %d\n", 2808 vp->v_data, vp->v_usecount, vp->v_writecount, 2809 vp->v_holdcnt, vp->v_numoutput); 2810 2811 vtype = ((tmp = vp->v_type) >= 0 && 2812 vp->v_type < sizeof(vnode_types) / sizeof(vnode_types[0])) ? 2813 vnode_types[vp->v_type] : "UNKNOWN"; 2814 vtag = ((tmp = vp->v_tag) >= 0 && 2815 vp->v_tag < sizeof(vnode_tags) / sizeof(vnode_tags[0])) ? 2816 vnode_tags[vp->v_tag] : "UNKNOWN"; 2817 2818 (*pr)("type %s(%d) tag %s(%d) id 0x%lx mount %p typedata %p\n", 2819 vtype, vp->v_type, vtag, vp->v_tag, 2820 vp->v_id, vp->v_mount, vp->v_mountedhere); 2821 2822 if (full) { 2823 struct buf *bp; 2824 2825 (*pr)("clean bufs:\n"); 2826 LIST_FOREACH(bp, &vp->v_cleanblkhd, b_vnbufs) { 2827 (*pr)(" bp %p\n", bp); 2828 vfs_buf_print(bp, full, pr); 2829 } 2830 2831 (*pr)("dirty bufs:\n"); 2832 LIST_FOREACH(bp, &vp->v_dirtyblkhd, b_vnbufs) { 2833 (*pr)(" bp %p\n", bp); 2834 vfs_buf_print(bp, full, pr); 2835 } 2836 } 2837 } 2838 #endif 2839