1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $FreeBSD: src/sys/kern/vfs_subr.c,v 1.249.2.30 2003/04/04 20:35:57 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_subr.c,v 1.101 2006/12/28 18:29:03 dillon Exp $ 41 */ 42 43 /* 44 * External virtual filesystem routines 45 */ 46 #include "opt_ddb.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/buf.h> 51 #include <sys/conf.h> 52 #include <sys/dirent.h> 53 #include <sys/domain.h> 54 #include <sys/eventhandler.h> 55 #include <sys/fcntl.h> 56 #include <sys/kernel.h> 57 #include <sys/kthread.h> 58 #include <sys/malloc.h> 59 #include <sys/mbuf.h> 60 #include <sys/mount.h> 61 #include <sys/proc.h> 62 #include <sys/reboot.h> 63 #include <sys/socket.h> 64 #include <sys/stat.h> 65 #include <sys/sysctl.h> 66 #include <sys/syslog.h> 67 #include <sys/unistd.h> 68 #include <sys/vmmeter.h> 69 #include <sys/vnode.h> 70 71 #include <machine/limits.h> 72 73 #include <vm/vm.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_extern.h> 76 #include <vm/vm_kern.h> 77 #include <vm/pmap.h> 78 #include <vm/vm_map.h> 79 #include <vm/vm_page.h> 80 #include <vm/vm_pager.h> 81 #include <vm/vnode_pager.h> 82 #include <vm/vm_zone.h> 83 84 #include <sys/buf2.h> 85 #include <sys/thread2.h> 86 87 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 88 89 int numvnodes; 90 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 91 int vfs_fastdev = 1; 92 SYSCTL_INT(_vfs, OID_AUTO, fastdev, CTLFLAG_RW, &vfs_fastdev, 0, ""); 93 94 enum vtype iftovt_tab[16] = { 95 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 96 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 97 }; 98 int vttoif_tab[9] = { 99 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 100 S_IFSOCK, S_IFIFO, S_IFMT, 101 }; 102 103 static int reassignbufcalls; 104 SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, 105 &reassignbufcalls, 0, ""); 106 static int reassignbufloops; 107 SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, 108 &reassignbufloops, 0, ""); 109 static int reassignbufsortgood; 110 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, 111 &reassignbufsortgood, 0, ""); 112 static int reassignbufsortbad; 113 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, 114 &reassignbufsortbad, 0, ""); 115 static int reassignbufmethod = 1; 116 SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, 117 &reassignbufmethod, 0, ""); 118 119 int nfs_mount_type = -1; 120 static struct lwkt_token spechash_token; 121 struct nfs_public nfs_pub; /* publicly exported FS */ 122 123 int desiredvnodes; 124 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 125 &desiredvnodes, 0, "Maximum number of vnodes"); 126 127 static void vfs_free_addrlist (struct netexport *nep); 128 static int vfs_free_netcred (struct radix_node *rn, void *w); 129 static int vfs_hang_addrlist (struct mount *mp, struct netexport *nep, 130 struct export_args *argp); 131 132 extern int dev_ref_debug; 133 134 /* 135 * Red black tree functions 136 */ 137 static int rb_buf_compare(struct buf *b1, struct buf *b2); 138 RB_GENERATE2(buf_rb_tree, buf, b_rbnode, rb_buf_compare, off_t, b_loffset); 139 RB_GENERATE2(buf_rb_hash, buf, b_rbhash, rb_buf_compare, off_t, b_loffset); 140 141 static int 142 rb_buf_compare(struct buf *b1, struct buf *b2) 143 { 144 if (b1->b_loffset < b2->b_loffset) 145 return(-1); 146 if (b1->b_loffset > b2->b_loffset) 147 return(1); 148 return(0); 149 } 150 151 /* 152 * Returns non-zero if the vnode is a candidate for lazy msyncing. 153 */ 154 static __inline int 155 vshouldmsync(struct vnode *vp, int usecount) 156 { 157 if (vp->v_holdcnt != 0 || vp->v_usecount != usecount) 158 return (0); /* other holders */ 159 if (vp->v_object && 160 (vp->v_object->ref_count || vp->v_object->resident_page_count)) { 161 return (0); 162 } 163 return (1); 164 } 165 166 /* 167 * Initialize the vnode management data structures. 168 * 169 * Called from vfsinit() 170 */ 171 void 172 vfs_subr_init(void) 173 { 174 /* 175 * Desired vnodes is a result of the physical page count 176 * and the size of kernel's heap. It scales in proportion 177 * to the amount of available physical memory. This can 178 * cause trouble on 64-bit and large memory platforms. 179 */ 180 /* desiredvnodes = maxproc + vmstats.v_page_count / 4; */ 181 desiredvnodes = 182 min(maxproc + vmstats.v_page_count / 4, 183 2 * KvaSize / 184 (5 * (sizeof(struct vm_object) + sizeof(struct vnode)))); 185 186 lwkt_token_init(&spechash_token); 187 } 188 189 /* 190 * Knob to control the precision of file timestamps: 191 * 192 * 0 = seconds only; nanoseconds zeroed. 193 * 1 = seconds and nanoseconds, accurate within 1/HZ. 194 * 2 = seconds and nanoseconds, truncated to microseconds. 195 * >=3 = seconds and nanoseconds, maximum precision. 196 */ 197 enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; 198 199 static int timestamp_precision = TSP_SEC; 200 SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, 201 ×tamp_precision, 0, ""); 202 203 /* 204 * Get a current timestamp. 205 */ 206 void 207 vfs_timestamp(struct timespec *tsp) 208 { 209 struct timeval tv; 210 211 switch (timestamp_precision) { 212 case TSP_SEC: 213 tsp->tv_sec = time_second; 214 tsp->tv_nsec = 0; 215 break; 216 case TSP_HZ: 217 getnanotime(tsp); 218 break; 219 case TSP_USEC: 220 microtime(&tv); 221 TIMEVAL_TO_TIMESPEC(&tv, tsp); 222 break; 223 case TSP_NSEC: 224 default: 225 nanotime(tsp); 226 break; 227 } 228 } 229 230 /* 231 * Set vnode attributes to VNOVAL 232 */ 233 void 234 vattr_null(struct vattr *vap) 235 { 236 vap->va_type = VNON; 237 vap->va_size = VNOVAL; 238 vap->va_bytes = VNOVAL; 239 vap->va_mode = VNOVAL; 240 vap->va_nlink = VNOVAL; 241 vap->va_uid = VNOVAL; 242 vap->va_gid = VNOVAL; 243 vap->va_fsid = VNOVAL; 244 vap->va_fileid = VNOVAL; 245 vap->va_blocksize = VNOVAL; 246 vap->va_rdev = VNOVAL; 247 vap->va_atime.tv_sec = VNOVAL; 248 vap->va_atime.tv_nsec = VNOVAL; 249 vap->va_mtime.tv_sec = VNOVAL; 250 vap->va_mtime.tv_nsec = VNOVAL; 251 vap->va_ctime.tv_sec = VNOVAL; 252 vap->va_ctime.tv_nsec = VNOVAL; 253 vap->va_flags = VNOVAL; 254 vap->va_gen = VNOVAL; 255 vap->va_vaflags = 0; 256 vap->va_fsmid = VNOVAL; 257 } 258 259 /* 260 * Flush out and invalidate all buffers associated with a vnode. 261 * 262 * vp must be locked. 263 */ 264 static int vinvalbuf_bp(struct buf *bp, void *data); 265 266 struct vinvalbuf_bp_info { 267 struct vnode *vp; 268 int slptimeo; 269 int lkflags; 270 int flags; 271 }; 272 273 void 274 vupdatefsmid(struct vnode *vp) 275 { 276 atomic_set_int(&vp->v_flag, VFSMID); 277 } 278 279 int 280 vinvalbuf(struct vnode *vp, int flags, int slpflag, int slptimeo) 281 { 282 struct vinvalbuf_bp_info info; 283 int error; 284 vm_object_t object; 285 286 /* 287 * If we are being asked to save, call fsync to ensure that the inode 288 * is updated. 289 */ 290 if (flags & V_SAVE) { 291 crit_enter(); 292 while (vp->v_track_write.bk_active) { 293 vp->v_track_write.bk_waitflag = 1; 294 error = tsleep(&vp->v_track_write, slpflag, 295 "vinvlbuf", slptimeo); 296 if (error) { 297 crit_exit(); 298 return (error); 299 } 300 } 301 if (!RB_EMPTY(&vp->v_rbdirty_tree)) { 302 crit_exit(); 303 if ((error = VOP_FSYNC(vp, MNT_WAIT)) != 0) 304 return (error); 305 crit_enter(); 306 if (vp->v_track_write.bk_active > 0 || 307 !RB_EMPTY(&vp->v_rbdirty_tree)) 308 panic("vinvalbuf: dirty bufs"); 309 } 310 crit_exit(); 311 } 312 crit_enter(); 313 info.slptimeo = slptimeo; 314 info.lkflags = LK_EXCLUSIVE | LK_SLEEPFAIL; 315 if (slpflag & PCATCH) 316 info.lkflags |= LK_PCATCH; 317 info.flags = flags; 318 info.vp = vp; 319 320 /* 321 * Flush the buffer cache until nothing is left. 322 */ 323 while (!RB_EMPTY(&vp->v_rbclean_tree) || 324 !RB_EMPTY(&vp->v_rbdirty_tree)) { 325 error = RB_SCAN(buf_rb_tree, &vp->v_rbclean_tree, NULL, 326 vinvalbuf_bp, &info); 327 if (error == 0) { 328 error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL, 329 vinvalbuf_bp, &info); 330 } 331 } 332 333 /* 334 * Wait for I/O to complete. XXX needs cleaning up. The vnode can 335 * have write I/O in-progress but if there is a VM object then the 336 * VM object can also have read-I/O in-progress. 337 */ 338 do { 339 while (vp->v_track_write.bk_active > 0) { 340 vp->v_track_write.bk_waitflag = 1; 341 tsleep(&vp->v_track_write, 0, "vnvlbv", 0); 342 } 343 if ((object = vp->v_object) != NULL) { 344 while (object->paging_in_progress) 345 vm_object_pip_sleep(object, "vnvlbx"); 346 } 347 } while (vp->v_track_write.bk_active > 0); 348 349 crit_exit(); 350 351 /* 352 * Destroy the copy in the VM cache, too. 353 */ 354 if ((object = vp->v_object) != NULL) { 355 vm_object_page_remove(object, 0, 0, 356 (flags & V_SAVE) ? TRUE : FALSE); 357 } 358 359 if (!RB_EMPTY(&vp->v_rbdirty_tree) || !RB_EMPTY(&vp->v_rbclean_tree)) 360 panic("vinvalbuf: flush failed"); 361 if (!RB_EMPTY(&vp->v_rbhash_tree)) 362 panic("vinvalbuf: flush failed, buffers still present"); 363 return (0); 364 } 365 366 static int 367 vinvalbuf_bp(struct buf *bp, void *data) 368 { 369 struct vinvalbuf_bp_info *info = data; 370 int error; 371 372 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 373 error = BUF_TIMELOCK(bp, info->lkflags, 374 "vinvalbuf", info->slptimeo); 375 if (error == 0) { 376 BUF_UNLOCK(bp); 377 error = ENOLCK; 378 } 379 if (error == ENOLCK) 380 return(0); 381 return (-error); 382 } 383 384 KKASSERT(bp->b_vp == info->vp); 385 386 /* 387 * XXX Since there are no node locks for NFS, I 388 * believe there is a slight chance that a delayed 389 * write will occur while sleeping just above, so 390 * check for it. Note that vfs_bio_awrite expects 391 * buffers to reside on a queue, while bwrite() and 392 * brelse() do not. 393 */ 394 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 395 (info->flags & V_SAVE)) { 396 if (bp->b_vp == info->vp) { 397 if (bp->b_flags & B_CLUSTEROK) { 398 vfs_bio_awrite(bp); 399 } else { 400 bremfree(bp); 401 bp->b_flags |= B_ASYNC; 402 bwrite(bp); 403 } 404 } else { 405 bremfree(bp); 406 bwrite(bp); 407 } 408 } else if (info->flags & V_SAVE) { 409 /* 410 * Cannot set B_NOCACHE on a clean buffer as this will 411 * destroy the VM backing store which might actually 412 * be dirty (and unsynchronized). 413 */ 414 bremfree(bp); 415 bp->b_flags |= (B_INVAL | B_RELBUF); 416 bp->b_flags &= ~B_ASYNC; 417 brelse(bp); 418 } else { 419 bremfree(bp); 420 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF); 421 bp->b_flags &= ~B_ASYNC; 422 brelse(bp); 423 } 424 return(0); 425 } 426 427 /* 428 * Truncate a file's buffer and pages to a specified length. This 429 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 430 * sync activity. 431 * 432 * The vnode must be locked. 433 */ 434 static int vtruncbuf_bp_trunc_cmp(struct buf *bp, void *data); 435 static int vtruncbuf_bp_trunc(struct buf *bp, void *data); 436 static int vtruncbuf_bp_metasync_cmp(struct buf *bp, void *data); 437 static int vtruncbuf_bp_metasync(struct buf *bp, void *data); 438 439 int 440 vtruncbuf(struct vnode *vp, off_t length, int blksize) 441 { 442 off_t truncloffset; 443 int count; 444 const char *filename; 445 446 /* 447 * Round up to the *next* block, then destroy the buffers in question. 448 * Since we are only removing some of the buffers we must rely on the 449 * scan count to determine whether a loop is necessary. 450 */ 451 if ((count = (int)(length % blksize)) != 0) 452 truncloffset = length + (blksize - count); 453 else 454 truncloffset = length; 455 456 crit_enter(); 457 do { 458 count = RB_SCAN(buf_rb_tree, &vp->v_rbclean_tree, 459 vtruncbuf_bp_trunc_cmp, 460 vtruncbuf_bp_trunc, &truncloffset); 461 count += RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, 462 vtruncbuf_bp_trunc_cmp, 463 vtruncbuf_bp_trunc, &truncloffset); 464 } while(count); 465 466 /* 467 * For safety, fsync any remaining metadata if the file is not being 468 * truncated to 0. Since the metadata does not represent the entire 469 * dirty list we have to rely on the hit count to ensure that we get 470 * all of it. 471 */ 472 if (length > 0) { 473 do { 474 count = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, 475 vtruncbuf_bp_metasync_cmp, 476 vtruncbuf_bp_metasync, vp); 477 } while (count); 478 } 479 480 /* 481 * Clean out any left over VM backing store. 482 */ 483 crit_exit(); 484 485 vnode_pager_setsize(vp, length); 486 487 crit_enter(); 488 489 /* 490 * It is possible to have in-progress I/O from buffers that were 491 * not part of the truncation. This should not happen if we 492 * are truncating to 0-length. 493 */ 494 filename = TAILQ_FIRST(&vp->v_namecache) ? 495 TAILQ_FIRST(&vp->v_namecache)->nc_name : "?"; 496 497 while ((count = vp->v_track_write.bk_active) > 0) { 498 vp->v_track_write.bk_waitflag = 1; 499 tsleep(&vp->v_track_write, 0, "vbtrunc", 0); 500 if (length == 0) { 501 kprintf("Warning: vtruncbuf(): Had to wait for " 502 "%d buffer I/Os to finish in %s\n", 503 count, filename); 504 } 505 } 506 507 /* 508 * Make sure no buffers were instantiated while we were trying 509 * to clean out the remaining VM pages. This could occur due 510 * to busy dirty VM pages being flushed out to disk. 511 */ 512 do { 513 count = RB_SCAN(buf_rb_tree, &vp->v_rbclean_tree, 514 vtruncbuf_bp_trunc_cmp, 515 vtruncbuf_bp_trunc, &truncloffset); 516 count += RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, 517 vtruncbuf_bp_trunc_cmp, 518 vtruncbuf_bp_trunc, &truncloffset); 519 if (count) { 520 kprintf("Warning: vtruncbuf(): Had to re-clean %d " 521 "left over buffers in %s\n", count, filename); 522 } 523 } while(count); 524 525 crit_exit(); 526 527 return (0); 528 } 529 530 /* 531 * The callback buffer is beyond the new file EOF and must be destroyed. 532 * Note that the compare function must conform to the RB_SCAN's requirements. 533 */ 534 static 535 int 536 vtruncbuf_bp_trunc_cmp(struct buf *bp, void *data) 537 { 538 if (bp->b_loffset >= *(off_t *)data) 539 return(0); 540 return(-1); 541 } 542 543 static 544 int 545 vtruncbuf_bp_trunc(struct buf *bp, void *data) 546 { 547 /* 548 * Do not try to use a buffer we cannot immediately lock, but sleep 549 * anyway to prevent a livelock. The code will loop until all buffers 550 * can be acted upon. 551 */ 552 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 553 if (BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL) == 0) 554 BUF_UNLOCK(bp); 555 } else { 556 bremfree(bp); 557 bp->b_flags |= (B_INVAL | B_RELBUF | B_NOCACHE); 558 bp->b_flags &= ~B_ASYNC; 559 brelse(bp); 560 } 561 return(1); 562 } 563 564 /* 565 * Fsync all meta-data after truncating a file to be non-zero. Only metadata 566 * blocks (with a negative loffset) are scanned. 567 * Note that the compare function must conform to the RB_SCAN's requirements. 568 */ 569 static int 570 vtruncbuf_bp_metasync_cmp(struct buf *bp, void *data) 571 { 572 if (bp->b_loffset < 0) 573 return(0); 574 return(1); 575 } 576 577 static int 578 vtruncbuf_bp_metasync(struct buf *bp, void *data) 579 { 580 struct vnode *vp = data; 581 582 if (bp->b_flags & B_DELWRI) { 583 /* 584 * Do not try to use a buffer we cannot immediately lock, 585 * but sleep anyway to prevent a livelock. The code will 586 * loop until all buffers can be acted upon. 587 */ 588 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 589 if (BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL) == 0) 590 BUF_UNLOCK(bp); 591 } else { 592 bremfree(bp); 593 if (bp->b_vp == vp) { 594 bp->b_flags |= B_ASYNC; 595 } else { 596 bp->b_flags &= ~B_ASYNC; 597 } 598 bwrite(bp); 599 } 600 return(1); 601 } else { 602 return(0); 603 } 604 } 605 606 /* 607 * vfsync - implements a multipass fsync on a file which understands 608 * dependancies and meta-data. The passed vnode must be locked. The 609 * waitfor argument may be MNT_WAIT or MNT_NOWAIT, or MNT_LAZY. 610 * 611 * When fsyncing data asynchronously just do one consolidated pass starting 612 * with the most negative block number. This may not get all the data due 613 * to dependancies. 614 * 615 * When fsyncing data synchronously do a data pass, then a metadata pass, 616 * then do additional data+metadata passes to try to get all the data out. 617 */ 618 static int vfsync_wait_output(struct vnode *vp, 619 int (*waitoutput)(struct vnode *, struct thread *)); 620 static int vfsync_data_only_cmp(struct buf *bp, void *data); 621 static int vfsync_meta_only_cmp(struct buf *bp, void *data); 622 static int vfsync_lazy_range_cmp(struct buf *bp, void *data); 623 static int vfsync_bp(struct buf *bp, void *data); 624 625 struct vfsync_info { 626 struct vnode *vp; 627 int synchronous; 628 int syncdeps; 629 int lazycount; 630 int lazylimit; 631 int skippedbufs; 632 int (*checkdef)(struct buf *); 633 }; 634 635 int 636 vfsync(struct vnode *vp, int waitfor, int passes, 637 int (*checkdef)(struct buf *), 638 int (*waitoutput)(struct vnode *, struct thread *)) 639 { 640 struct vfsync_info info; 641 int error; 642 643 bzero(&info, sizeof(info)); 644 info.vp = vp; 645 if ((info.checkdef = checkdef) == NULL) 646 info.syncdeps = 1; 647 648 crit_enter_id("vfsync"); 649 650 switch(waitfor) { 651 case MNT_LAZY: 652 /* 653 * Lazy (filesystem syncer typ) Asynchronous plus limit the 654 * number of data (not meta) pages we try to flush to 1MB. 655 * A non-zero return means that lazy limit was reached. 656 */ 657 info.lazylimit = 1024 * 1024; 658 info.syncdeps = 1; 659 error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, 660 vfsync_lazy_range_cmp, vfsync_bp, &info); 661 RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, 662 vfsync_meta_only_cmp, vfsync_bp, &info); 663 if (error == 0) 664 vp->v_lazyw = 0; 665 else if (!RB_EMPTY(&vp->v_rbdirty_tree)) 666 vn_syncer_add_to_worklist(vp, 1); 667 error = 0; 668 break; 669 case MNT_NOWAIT: 670 /* 671 * Asynchronous. Do a data-only pass and a meta-only pass. 672 */ 673 info.syncdeps = 1; 674 RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, vfsync_data_only_cmp, 675 vfsync_bp, &info); 676 RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, vfsync_meta_only_cmp, 677 vfsync_bp, &info); 678 error = 0; 679 break; 680 default: 681 /* 682 * Synchronous. Do a data-only pass, then a meta-data+data 683 * pass, then additional integrated passes to try to get 684 * all the dependancies flushed. 685 */ 686 RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, vfsync_data_only_cmp, 687 vfsync_bp, &info); 688 error = vfsync_wait_output(vp, waitoutput); 689 if (error == 0) { 690 info.skippedbufs = 0; 691 RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL, 692 vfsync_bp, &info); 693 error = vfsync_wait_output(vp, waitoutput); 694 if (info.skippedbufs) 695 kprintf("Warning: vfsync skipped %d dirty bufs in pass2!\n", info.skippedbufs); 696 } 697 while (error == 0 && passes > 0 && 698 !RB_EMPTY(&vp->v_rbdirty_tree)) { 699 if (--passes == 0) { 700 info.synchronous = 1; 701 info.syncdeps = 1; 702 } 703 error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL, 704 vfsync_bp, &info); 705 if (error < 0) 706 error = -error; 707 info.syncdeps = 1; 708 if (error == 0) 709 error = vfsync_wait_output(vp, waitoutput); 710 } 711 break; 712 } 713 crit_exit_id("vfsync"); 714 return(error); 715 } 716 717 static int 718 vfsync_wait_output(struct vnode *vp, int (*waitoutput)(struct vnode *, struct thread *)) 719 { 720 int error = 0; 721 722 while (vp->v_track_write.bk_active) { 723 vp->v_track_write.bk_waitflag = 1; 724 tsleep(&vp->v_track_write, 0, "fsfsn", 0); 725 } 726 if (waitoutput) 727 error = waitoutput(vp, curthread); 728 return(error); 729 } 730 731 static int 732 vfsync_data_only_cmp(struct buf *bp, void *data) 733 { 734 if (bp->b_loffset < 0) 735 return(-1); 736 return(0); 737 } 738 739 static int 740 vfsync_meta_only_cmp(struct buf *bp, void *data) 741 { 742 if (bp->b_loffset < 0) 743 return(0); 744 return(1); 745 } 746 747 static int 748 vfsync_lazy_range_cmp(struct buf *bp, void *data) 749 { 750 struct vfsync_info *info = data; 751 if (bp->b_loffset < info->vp->v_lazyw) 752 return(-1); 753 return(0); 754 } 755 756 static int 757 vfsync_bp(struct buf *bp, void *data) 758 { 759 struct vfsync_info *info = data; 760 struct vnode *vp = info->vp; 761 int error; 762 763 /* 764 * if syncdeps is not set we do not try to write buffers which have 765 * dependancies. 766 */ 767 if (!info->synchronous && info->syncdeps == 0 && info->checkdef(bp)) 768 return(0); 769 770 /* 771 * Ignore buffers that we cannot immediately lock. XXX 772 */ 773 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 774 kprintf("Warning: vfsync_bp skipping dirty buffer %p\n", bp); 775 ++info->skippedbufs; 776 return(0); 777 } 778 if ((bp->b_flags & B_DELWRI) == 0) 779 panic("vfsync_bp: buffer not dirty"); 780 if (vp != bp->b_vp) 781 panic("vfsync_bp: buffer vp mismatch"); 782 783 /* 784 * B_NEEDCOMMIT (primarily used by NFS) is a state where the buffer 785 * has been written but an additional handshake with the device 786 * is required before we can dispose of the buffer. We have no idea 787 * how to do this so we have to skip these buffers. 788 */ 789 if (bp->b_flags & B_NEEDCOMMIT) { 790 BUF_UNLOCK(bp); 791 return(0); 792 } 793 794 if (info->synchronous) { 795 /* 796 * Synchronous flushing. An error may be returned. 797 */ 798 bremfree(bp); 799 crit_exit_id("vfsync"); 800 error = bwrite(bp); 801 crit_enter_id("vfsync"); 802 } else { 803 /* 804 * Asynchronous flushing. A negative return value simply 805 * stops the scan and is not considered an error. We use 806 * this to support limited MNT_LAZY flushes. 807 */ 808 vp->v_lazyw = bp->b_loffset; 809 if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) { 810 info->lazycount += vfs_bio_awrite(bp); 811 } else { 812 info->lazycount += bp->b_bufsize; 813 bremfree(bp); 814 crit_exit_id("vfsync"); 815 bawrite(bp); 816 crit_enter_id("vfsync"); 817 } 818 if (info->lazylimit && info->lazycount >= info->lazylimit) 819 error = 1; 820 else 821 error = 0; 822 } 823 return(-error); 824 } 825 826 /* 827 * Associate a buffer with a vnode. 828 */ 829 void 830 bgetvp(struct vnode *vp, struct buf *bp) 831 { 832 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 833 KKASSERT((bp->b_flags & (B_HASHED|B_DELWRI|B_VNCLEAN|B_VNDIRTY)) == 0); 834 835 vhold(vp); 836 /* 837 * Insert onto list for new vnode. 838 */ 839 crit_enter(); 840 bp->b_vp = vp; 841 bp->b_flags |= B_HASHED; 842 if (buf_rb_hash_RB_INSERT(&vp->v_rbhash_tree, bp)) 843 panic("reassignbuf: dup lblk vp %p bp %p", vp, bp); 844 845 bp->b_flags |= B_VNCLEAN; 846 if (buf_rb_tree_RB_INSERT(&vp->v_rbclean_tree, bp)) 847 panic("reassignbuf: dup lblk/clean vp %p bp %p", vp, bp); 848 crit_exit(); 849 } 850 851 /* 852 * Disassociate a buffer from a vnode. 853 */ 854 void 855 brelvp(struct buf *bp) 856 { 857 struct vnode *vp; 858 859 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 860 861 /* 862 * Delete from old vnode list, if on one. 863 */ 864 vp = bp->b_vp; 865 crit_enter(); 866 if (bp->b_flags & (B_VNDIRTY | B_VNCLEAN)) { 867 if (bp->b_flags & B_VNDIRTY) 868 buf_rb_tree_RB_REMOVE(&vp->v_rbdirty_tree, bp); 869 else 870 buf_rb_tree_RB_REMOVE(&vp->v_rbclean_tree, bp); 871 bp->b_flags &= ~(B_VNDIRTY | B_VNCLEAN); 872 } 873 if (bp->b_flags & B_HASHED) { 874 buf_rb_hash_RB_REMOVE(&vp->v_rbhash_tree, bp); 875 bp->b_flags &= ~B_HASHED; 876 } 877 if ((vp->v_flag & VONWORKLST) && RB_EMPTY(&vp->v_rbdirty_tree)) { 878 vp->v_flag &= ~VONWORKLST; 879 LIST_REMOVE(vp, v_synclist); 880 } 881 crit_exit(); 882 bp->b_vp = NULL; 883 vdrop(vp); 884 } 885 886 /* 887 * Reassign the buffer to the proper clean/dirty list based on B_DELWRI. 888 * This routine is called when the state of the B_DELWRI bit is changed. 889 */ 890 void 891 reassignbuf(struct buf *bp) 892 { 893 struct vnode *vp = bp->b_vp; 894 int delay; 895 896 KKASSERT(vp != NULL); 897 ++reassignbufcalls; 898 899 /* 900 * B_PAGING flagged buffers cannot be reassigned because their vp 901 * is not fully linked in. 902 */ 903 if (bp->b_flags & B_PAGING) 904 panic("cannot reassign paging buffer"); 905 906 crit_enter(); 907 if (bp->b_flags & B_DELWRI) { 908 /* 909 * Move to the dirty list, add the vnode to the worklist 910 */ 911 if (bp->b_flags & B_VNCLEAN) { 912 buf_rb_tree_RB_REMOVE(&vp->v_rbclean_tree, bp); 913 bp->b_flags &= ~B_VNCLEAN; 914 } 915 if ((bp->b_flags & B_VNDIRTY) == 0) { 916 if (buf_rb_tree_RB_INSERT(&vp->v_rbdirty_tree, bp)) { 917 panic("reassignbuf: dup lblk vp %p bp %p", 918 vp, bp); 919 } 920 bp->b_flags |= B_VNDIRTY; 921 } 922 if ((vp->v_flag & VONWORKLST) == 0) { 923 switch (vp->v_type) { 924 case VDIR: 925 delay = dirdelay; 926 break; 927 case VCHR: 928 case VBLK: 929 if (vp->v_rdev && 930 vp->v_rdev->si_mountpoint != NULL) { 931 delay = metadelay; 932 break; 933 } 934 /* fall through */ 935 default: 936 delay = filedelay; 937 } 938 vn_syncer_add_to_worklist(vp, delay); 939 } 940 } else { 941 /* 942 * Move to the clean list, remove the vnode from the worklist 943 * if no dirty blocks remain. 944 */ 945 if (bp->b_flags & B_VNDIRTY) { 946 buf_rb_tree_RB_REMOVE(&vp->v_rbdirty_tree, bp); 947 bp->b_flags &= ~B_VNDIRTY; 948 } 949 if ((bp->b_flags & B_VNCLEAN) == 0) { 950 if (buf_rb_tree_RB_INSERT(&vp->v_rbclean_tree, bp)) { 951 panic("reassignbuf: dup lblk vp %p bp %p", 952 vp, bp); 953 } 954 bp->b_flags |= B_VNCLEAN; 955 } 956 if ((vp->v_flag & VONWORKLST) && 957 RB_EMPTY(&vp->v_rbdirty_tree)) { 958 vp->v_flag &= ~VONWORKLST; 959 LIST_REMOVE(vp, v_synclist); 960 } 961 } 962 crit_exit(); 963 } 964 965 /* 966 * Create a vnode for a block device. 967 * Used for mounting the root file system. 968 */ 969 int 970 bdevvp(cdev_t dev, struct vnode **vpp) 971 { 972 struct vnode *vp; 973 struct vnode *nvp; 974 int error; 975 976 if (dev == NOCDEV) { 977 *vpp = NULLVP; 978 return (ENXIO); 979 } 980 error = getspecialvnode(VT_NON, NULL, &spec_vnode_vops_p, &nvp, 0, 0); 981 if (error) { 982 *vpp = NULLVP; 983 return (error); 984 } 985 vp = nvp; 986 vp->v_type = VCHR; 987 vp->v_udev = dev->si_udev; 988 vx_unlock(vp); 989 *vpp = vp; 990 return (0); 991 } 992 993 int 994 v_associate_rdev(struct vnode *vp, cdev_t dev) 995 { 996 lwkt_tokref ilock; 997 998 if (dev == NULL || dev == NOCDEV) 999 return(ENXIO); 1000 if (dev_is_good(dev) == 0) 1001 return(ENXIO); 1002 KKASSERT(vp->v_rdev == NULL); 1003 if (dev_ref_debug) 1004 kprintf("Z1"); 1005 vp->v_rdev = reference_dev(dev); 1006 lwkt_gettoken(&ilock, &spechash_token); 1007 SLIST_INSERT_HEAD(&dev->si_hlist, vp, v_cdevnext); 1008 lwkt_reltoken(&ilock); 1009 return(0); 1010 } 1011 1012 void 1013 v_release_rdev(struct vnode *vp) 1014 { 1015 lwkt_tokref ilock; 1016 cdev_t dev; 1017 1018 if ((dev = vp->v_rdev) != NULL) { 1019 lwkt_gettoken(&ilock, &spechash_token); 1020 SLIST_REMOVE(&dev->si_hlist, vp, vnode, v_cdevnext); 1021 vp->v_rdev = NULL; 1022 release_dev(dev); 1023 lwkt_reltoken(&ilock); 1024 } 1025 } 1026 1027 /* 1028 * Add a vnode to the alias list hung off the cdev_t. We only associate 1029 * the device number with the vnode. The actual device is not associated 1030 * until the vnode is opened (usually in spec_open()), and will be 1031 * disassociated on last close. 1032 */ 1033 void 1034 addaliasu(struct vnode *nvp, udev_t nvp_udev) 1035 { 1036 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1037 panic("addaliasu on non-special vnode"); 1038 nvp->v_udev = nvp_udev; 1039 } 1040 1041 /* 1042 * Disassociate a vnode from its underlying filesystem. 1043 * 1044 * The vnode must be VX locked, referenced, and v_spinlock must be held. 1045 * This routine releases v_spinlock. 1046 * 1047 * If there are v_usecount references to the vnode other then ours we have 1048 * to VOP_CLOSE the vnode before we can deactivate and reclaim it. 1049 */ 1050 void 1051 vclean_interlocked(struct vnode *vp, int flags) 1052 { 1053 int active; 1054 int n; 1055 vm_object_t object; 1056 1057 /* 1058 * If the vnode has already been reclaimed we have nothing to do. 1059 * VRECLAIMED must be interlocked with the vnode's spinlock. 1060 */ 1061 if (vp->v_flag & VRECLAIMED) { 1062 spin_unlock_wr(&vp->v_spinlock); 1063 return; 1064 } 1065 vp->v_flag |= VRECLAIMED; 1066 spin_unlock_wr(&vp->v_spinlock); 1067 1068 /* 1069 * Scrap the vfs cache 1070 */ 1071 while (cache_inval_vp(vp, 0) != 0) { 1072 kprintf("Warning: vnode %p clean/cache_resolution race detected\n", vp); 1073 tsleep(vp, 0, "vclninv", 2); 1074 } 1075 1076 /* 1077 * Check to see if the vnode is in use. If so we have to reference it 1078 * before we clean it out so that its count cannot fall to zero and 1079 * generate a race against ourselves to recycle it. 1080 */ 1081 active = (vp->v_usecount > 1); 1082 1083 /* 1084 * Clean out any buffers associated with the vnode and destroy its 1085 * object, if it has one. 1086 */ 1087 vinvalbuf(vp, V_SAVE, 0, 0); 1088 1089 /* 1090 * If purging an active vnode (typically during a forced unmount 1091 * or reboot), it must be closed and deactivated before being 1092 * reclaimed. This isn't really all that safe, but what can 1093 * we do? XXX. 1094 * 1095 * Note that neither of these routines unlocks the vnode. 1096 */ 1097 if (active && (flags & DOCLOSE)) { 1098 while ((n = vp->v_opencount) != 0) { 1099 if (vp->v_writecount) 1100 VOP_CLOSE(vp, FWRITE|FNONBLOCK); 1101 else 1102 VOP_CLOSE(vp, FNONBLOCK); 1103 if (vp->v_opencount == n) { 1104 kprintf("Warning: unable to force-close" 1105 " vnode %p\n", vp); 1106 break; 1107 } 1108 } 1109 } 1110 1111 /* 1112 * If the vnode has not be deactivated, deactivated it. Deactivation 1113 * can create new buffers and VM pages so we have to call vinvalbuf() 1114 * again to make sure they all get flushed. 1115 * 1116 * This can occur if a file with a link count of 0 needs to be 1117 * truncated. 1118 */ 1119 if ((vp->v_flag & VINACTIVE) == 0) { 1120 vp->v_flag |= VINACTIVE; 1121 VOP_INACTIVE(vp); 1122 vinvalbuf(vp, V_SAVE, 0, 0); 1123 } 1124 1125 /* 1126 * If the vnode has an object, destroy it. 1127 */ 1128 if ((object = vp->v_object) != NULL) { 1129 if (object->ref_count == 0) { 1130 if ((object->flags & OBJ_DEAD) == 0) 1131 vm_object_terminate(object); 1132 } else { 1133 vm_pager_deallocate(object); 1134 } 1135 vp->v_flag &= ~VOBJBUF; 1136 } 1137 KKASSERT((vp->v_flag & VOBJBUF) == 0); 1138 1139 1140 /* 1141 * Reclaim the vnode. 1142 */ 1143 if (VOP_RECLAIM(vp)) 1144 panic("vclean: cannot reclaim"); 1145 1146 /* 1147 * Done with purge, notify sleepers of the grim news. 1148 */ 1149 vp->v_ops = &dead_vnode_vops_p; 1150 vn_pollgone(vp); 1151 vp->v_tag = VT_NON; 1152 } 1153 1154 /* 1155 * Eliminate all activity associated with the requested vnode 1156 * and with all vnodes aliased to the requested vnode. 1157 * 1158 * The vnode must be referenced and vx_lock()'d 1159 * 1160 * revoke { struct vnode *a_vp, int a_flags } 1161 */ 1162 int 1163 vop_stdrevoke(struct vop_revoke_args *ap) 1164 { 1165 struct vnode *vp, *vq; 1166 lwkt_tokref ilock; 1167 cdev_t dev; 1168 1169 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1170 1171 vp = ap->a_vp; 1172 1173 /* 1174 * If the vnode is already dead don't try to revoke it 1175 */ 1176 if (vp->v_flag & VRECLAIMED) 1177 return (0); 1178 1179 /* 1180 * If the vnode has a device association, scrap all vnodes associated 1181 * with the device. Don't let the device disappear on us while we 1182 * are scrapping the vnodes. 1183 * 1184 * The passed vp will probably show up in the list, do not VX lock 1185 * it twice! 1186 */ 1187 if (vp->v_type != VCHR && vp->v_type != VBLK) 1188 return(0); 1189 if ((dev = vp->v_rdev) == NULL) { 1190 if ((dev = udev2dev(vp->v_udev, vp->v_type == VBLK)) == NOCDEV) 1191 return(0); 1192 } 1193 reference_dev(dev); 1194 lwkt_gettoken(&ilock, &spechash_token); 1195 while ((vq = SLIST_FIRST(&dev->si_hlist)) != NULL) { 1196 if (vp != vq) 1197 vx_get(vq); 1198 if (vq == SLIST_FIRST(&dev->si_hlist)) 1199 vgone(vq); 1200 if (vp != vq) 1201 vx_put(vq); 1202 } 1203 lwkt_reltoken(&ilock); 1204 release_dev(dev); 1205 return (0); 1206 } 1207 1208 /* 1209 * Recycle an unused vnode to the front of the free list. 1210 * 1211 * Returns 1 if we were successfully able to recycle the vnode, 1212 * 0 otherwise. 1213 */ 1214 int 1215 vrecycle(struct vnode *vp) 1216 { 1217 if (vp->v_usecount == 1) { 1218 vgone(vp); 1219 return (1); 1220 } 1221 return (0); 1222 } 1223 1224 /* 1225 * Eliminate all activity associated with a vnode in preparation for reuse. 1226 * 1227 * The vnode must be VX locked and refd and will remain VX locked and refd 1228 * on return. This routine may be called with the vnode in any state, as 1229 * long as it is VX locked. The vnode will be cleaned out and marked 1230 * VRECLAIMED but will not actually be reused until all existing refs and 1231 * holds go away. 1232 * 1233 * NOTE: This routine may be called on a vnode which has not yet been 1234 * already been deactivated (VOP_INACTIVE), or on a vnode which has 1235 * already been reclaimed. 1236 * 1237 * This routine is not responsible for placing us back on the freelist. 1238 * Instead, it happens automatically when the caller releases the VX lock 1239 * (assuming there aren't any other references). 1240 */ 1241 void 1242 vgone(struct vnode *vp) 1243 { 1244 spin_lock_wr(&vp->v_spinlock); 1245 vgone_interlocked(vp); 1246 } 1247 1248 void 1249 vgone_interlocked(struct vnode *vp) 1250 { 1251 /* 1252 * assert that the VX lock is held. This is an absolute requirement 1253 * now for vgone() to be called. 1254 */ 1255 KKASSERT(vp->v_lock.lk_exclusivecount == 1); 1256 1257 /* 1258 * Clean out the filesystem specific data and set the VRECLAIMED 1259 * bit. Also deactivate the vnode if necessary. 1260 */ 1261 vclean_interlocked(vp, DOCLOSE); 1262 /* spinlock unlocked */ 1263 1264 /* 1265 * Delete from old mount point vnode list, if on one. 1266 */ 1267 if (vp->v_mount != NULL) 1268 insmntque(vp, NULL); 1269 1270 /* 1271 * If special device, remove it from special device alias list 1272 * if it is on one. This should normally only occur if a vnode is 1273 * being revoked as the device should otherwise have been released 1274 * naturally. 1275 */ 1276 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) { 1277 v_release_rdev(vp); 1278 } 1279 1280 /* 1281 * Set us to VBAD 1282 */ 1283 vp->v_type = VBAD; 1284 } 1285 1286 /* 1287 * Lookup a vnode by device number. 1288 */ 1289 int 1290 vfinddev(cdev_t dev, enum vtype type, struct vnode **vpp) 1291 { 1292 lwkt_tokref ilock; 1293 struct vnode *vp; 1294 1295 lwkt_gettoken(&ilock, &spechash_token); 1296 SLIST_FOREACH(vp, &dev->si_hlist, v_cdevnext) { 1297 if (type == vp->v_type) { 1298 *vpp = vp; 1299 lwkt_reltoken(&ilock); 1300 return (1); 1301 } 1302 } 1303 lwkt_reltoken(&ilock); 1304 return (0); 1305 } 1306 1307 /* 1308 * Calculate the total number of references to a special device. This 1309 * routine may only be called for VBLK and VCHR vnodes since v_rdev is 1310 * an overloaded field. Since udev2dev can now return NOCDEV, we have 1311 * to check for a NULL v_rdev. 1312 */ 1313 int 1314 count_dev(cdev_t dev) 1315 { 1316 lwkt_tokref ilock; 1317 struct vnode *vp; 1318 int count = 0; 1319 1320 if (SLIST_FIRST(&dev->si_hlist)) { 1321 lwkt_gettoken(&ilock, &spechash_token); 1322 SLIST_FOREACH(vp, &dev->si_hlist, v_cdevnext) { 1323 count += vp->v_usecount; 1324 } 1325 lwkt_reltoken(&ilock); 1326 } 1327 return(count); 1328 } 1329 1330 int 1331 count_udev(udev_t udev) 1332 { 1333 cdev_t dev; 1334 1335 if ((dev = udev2dev(udev, 0)) == NOCDEV) 1336 return(0); 1337 return(count_dev(dev)); 1338 } 1339 1340 int 1341 vcount(struct vnode *vp) 1342 { 1343 if (vp->v_rdev == NULL) 1344 return(0); 1345 return(count_dev(vp->v_rdev)); 1346 } 1347 1348 /* 1349 * Initialize VMIO for a vnode. This routine MUST be called before a 1350 * VFS can issue buffer cache ops on a vnode. It is typically called 1351 * when a vnode is initialized from its inode. 1352 */ 1353 int 1354 vinitvmio(struct vnode *vp, off_t filesize) 1355 { 1356 vm_object_t object; 1357 int error = 0; 1358 1359 retry: 1360 if ((object = vp->v_object) == NULL) { 1361 object = vnode_pager_alloc(vp, filesize, 0, 0); 1362 /* 1363 * Dereference the reference we just created. This assumes 1364 * that the object is associated with the vp. 1365 */ 1366 object->ref_count--; 1367 vp->v_usecount--; 1368 } else { 1369 if (object->flags & OBJ_DEAD) { 1370 vn_unlock(vp); 1371 tsleep(object, 0, "vodead", 0); 1372 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1373 goto retry; 1374 } 1375 } 1376 KASSERT(vp->v_object != NULL, ("vinitvmio: NULL object")); 1377 vp->v_flag |= VOBJBUF; 1378 return (error); 1379 } 1380 1381 1382 /* 1383 * Print out a description of a vnode. 1384 */ 1385 static char *typename[] = 1386 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1387 1388 void 1389 vprint(char *label, struct vnode *vp) 1390 { 1391 char buf[96]; 1392 1393 if (label != NULL) 1394 kprintf("%s: %p: ", label, (void *)vp); 1395 else 1396 kprintf("%p: ", (void *)vp); 1397 kprintf("type %s, usecount %d, writecount %d, refcount %d,", 1398 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1399 vp->v_holdcnt); 1400 buf[0] = '\0'; 1401 if (vp->v_flag & VROOT) 1402 strcat(buf, "|VROOT"); 1403 if (vp->v_flag & VTEXT) 1404 strcat(buf, "|VTEXT"); 1405 if (vp->v_flag & VSYSTEM) 1406 strcat(buf, "|VSYSTEM"); 1407 if (vp->v_flag & VFREE) 1408 strcat(buf, "|VFREE"); 1409 if (vp->v_flag & VOBJBUF) 1410 strcat(buf, "|VOBJBUF"); 1411 if (buf[0] != '\0') 1412 kprintf(" flags (%s)", &buf[1]); 1413 if (vp->v_data == NULL) { 1414 kprintf("\n"); 1415 } else { 1416 kprintf("\n\t"); 1417 VOP_PRINT(vp); 1418 } 1419 } 1420 1421 #ifdef DDB 1422 #include <ddb/ddb.h> 1423 1424 static int db_show_locked_vnodes(struct mount *mp, void *data); 1425 1426 /* 1427 * List all of the locked vnodes in the system. 1428 * Called when debugging the kernel. 1429 */ 1430 DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 1431 { 1432 kprintf("Locked vnodes\n"); 1433 mountlist_scan(db_show_locked_vnodes, NULL, 1434 MNTSCAN_FORWARD|MNTSCAN_NOBUSY); 1435 } 1436 1437 static int 1438 db_show_locked_vnodes(struct mount *mp, void *data __unused) 1439 { 1440 struct vnode *vp; 1441 1442 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { 1443 if (vn_islocked(vp)) 1444 vprint((char *)0, vp); 1445 } 1446 return(0); 1447 } 1448 #endif 1449 1450 /* 1451 * Top level filesystem related information gathering. 1452 */ 1453 static int sysctl_ovfs_conf (SYSCTL_HANDLER_ARGS); 1454 1455 static int 1456 vfs_sysctl(SYSCTL_HANDLER_ARGS) 1457 { 1458 int *name = (int *)arg1 - 1; /* XXX */ 1459 u_int namelen = arg2 + 1; /* XXX */ 1460 struct vfsconf *vfsp; 1461 1462 #if 1 || defined(COMPAT_PRELITE2) 1463 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1464 if (namelen == 1) 1465 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1466 #endif 1467 1468 #ifdef notyet 1469 /* all sysctl names at this level are at least name and field */ 1470 if (namelen < 2) 1471 return (ENOTDIR); /* overloaded */ 1472 if (name[0] != VFS_GENERIC) { 1473 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1474 if (vfsp->vfc_typenum == name[0]) 1475 break; 1476 if (vfsp == NULL) 1477 return (EOPNOTSUPP); 1478 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1479 oldp, oldlenp, newp, newlen, p)); 1480 } 1481 #endif 1482 switch (name[1]) { 1483 case VFS_MAXTYPENUM: 1484 if (namelen != 2) 1485 return (ENOTDIR); 1486 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 1487 case VFS_CONF: 1488 if (namelen != 3) 1489 return (ENOTDIR); /* overloaded */ 1490 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1491 if (vfsp->vfc_typenum == name[2]) 1492 break; 1493 if (vfsp == NULL) 1494 return (EOPNOTSUPP); 1495 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 1496 } 1497 return (EOPNOTSUPP); 1498 } 1499 1500 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 1501 "Generic filesystem"); 1502 1503 #if 1 || defined(COMPAT_PRELITE2) 1504 1505 static int 1506 sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) 1507 { 1508 int error; 1509 struct vfsconf *vfsp; 1510 struct ovfsconf ovfs; 1511 1512 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 1513 bzero(&ovfs, sizeof(ovfs)); 1514 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 1515 strcpy(ovfs.vfc_name, vfsp->vfc_name); 1516 ovfs.vfc_index = vfsp->vfc_typenum; 1517 ovfs.vfc_refcount = vfsp->vfc_refcount; 1518 ovfs.vfc_flags = vfsp->vfc_flags; 1519 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 1520 if (error) 1521 return error; 1522 } 1523 return 0; 1524 } 1525 1526 #endif /* 1 || COMPAT_PRELITE2 */ 1527 1528 /* 1529 * Check to see if a filesystem is mounted on a block device. 1530 */ 1531 int 1532 vfs_mountedon(struct vnode *vp) 1533 { 1534 cdev_t dev; 1535 1536 if ((dev = vp->v_rdev) == NULL) 1537 dev = udev2dev(vp->v_udev, (vp->v_type == VBLK)); 1538 if (dev != NOCDEV && dev->si_mountpoint) 1539 return (EBUSY); 1540 return (0); 1541 } 1542 1543 /* 1544 * Unmount all filesystems. The list is traversed in reverse order 1545 * of mounting to avoid dependencies. 1546 */ 1547 1548 static int vfs_umountall_callback(struct mount *mp, void *data); 1549 1550 void 1551 vfs_unmountall(void) 1552 { 1553 int count; 1554 1555 do { 1556 count = mountlist_scan(vfs_umountall_callback, 1557 NULL, MNTSCAN_REVERSE|MNTSCAN_NOBUSY); 1558 } while (count); 1559 } 1560 1561 static 1562 int 1563 vfs_umountall_callback(struct mount *mp, void *data) 1564 { 1565 int error; 1566 1567 error = dounmount(mp, MNT_FORCE); 1568 if (error) { 1569 mountlist_remove(mp); 1570 kprintf("unmount of filesystem mounted from %s failed (", 1571 mp->mnt_stat.f_mntfromname); 1572 if (error == EBUSY) 1573 kprintf("BUSY)\n"); 1574 else 1575 kprintf("%d)\n", error); 1576 } 1577 return(1); 1578 } 1579 1580 /* 1581 * Build hash lists of net addresses and hang them off the mount point. 1582 * Called by ufs_mount() to set up the lists of export addresses. 1583 */ 1584 static int 1585 vfs_hang_addrlist(struct mount *mp, struct netexport *nep, 1586 struct export_args *argp) 1587 { 1588 struct netcred *np; 1589 struct radix_node_head *rnh; 1590 int i; 1591 struct radix_node *rn; 1592 struct sockaddr *saddr, *smask = 0; 1593 struct domain *dom; 1594 int error; 1595 1596 if (argp->ex_addrlen == 0) { 1597 if (mp->mnt_flag & MNT_DEFEXPORTED) 1598 return (EPERM); 1599 np = &nep->ne_defexported; 1600 np->netc_exflags = argp->ex_flags; 1601 np->netc_anon = argp->ex_anon; 1602 np->netc_anon.cr_ref = 1; 1603 mp->mnt_flag |= MNT_DEFEXPORTED; 1604 return (0); 1605 } 1606 1607 if (argp->ex_addrlen < 0 || argp->ex_addrlen > MLEN) 1608 return (EINVAL); 1609 if (argp->ex_masklen < 0 || argp->ex_masklen > MLEN) 1610 return (EINVAL); 1611 1612 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1613 np = (struct netcred *) kmalloc(i, M_NETADDR, M_WAITOK); 1614 bzero((caddr_t) np, i); 1615 saddr = (struct sockaddr *) (np + 1); 1616 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 1617 goto out; 1618 if (saddr->sa_len > argp->ex_addrlen) 1619 saddr->sa_len = argp->ex_addrlen; 1620 if (argp->ex_masklen) { 1621 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1622 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1623 if (error) 1624 goto out; 1625 if (smask->sa_len > argp->ex_masklen) 1626 smask->sa_len = argp->ex_masklen; 1627 } 1628 i = saddr->sa_family; 1629 if ((rnh = nep->ne_rtable[i]) == 0) { 1630 /* 1631 * Seems silly to initialize every AF when most are not used, 1632 * do so on demand here 1633 */ 1634 SLIST_FOREACH(dom, &domains, dom_next) 1635 if (dom->dom_family == i && dom->dom_rtattach) { 1636 dom->dom_rtattach((void **) &nep->ne_rtable[i], 1637 dom->dom_rtoffset); 1638 break; 1639 } 1640 if ((rnh = nep->ne_rtable[i]) == 0) { 1641 error = ENOBUFS; 1642 goto out; 1643 } 1644 } 1645 rn = (*rnh->rnh_addaddr) ((char *) saddr, (char *) smask, rnh, 1646 np->netc_rnodes); 1647 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 1648 error = EPERM; 1649 goto out; 1650 } 1651 np->netc_exflags = argp->ex_flags; 1652 np->netc_anon = argp->ex_anon; 1653 np->netc_anon.cr_ref = 1; 1654 return (0); 1655 out: 1656 kfree(np, M_NETADDR); 1657 return (error); 1658 } 1659 1660 /* ARGSUSED */ 1661 static int 1662 vfs_free_netcred(struct radix_node *rn, void *w) 1663 { 1664 struct radix_node_head *rnh = (struct radix_node_head *) w; 1665 1666 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 1667 kfree((caddr_t) rn, M_NETADDR); 1668 return (0); 1669 } 1670 1671 /* 1672 * Free the net address hash lists that are hanging off the mount points. 1673 */ 1674 static void 1675 vfs_free_addrlist(struct netexport *nep) 1676 { 1677 int i; 1678 struct radix_node_head *rnh; 1679 1680 for (i = 0; i <= AF_MAX; i++) 1681 if ((rnh = nep->ne_rtable[i])) { 1682 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 1683 (caddr_t) rnh); 1684 kfree((caddr_t) rnh, M_RTABLE); 1685 nep->ne_rtable[i] = 0; 1686 } 1687 } 1688 1689 int 1690 vfs_export(struct mount *mp, struct netexport *nep, struct export_args *argp) 1691 { 1692 int error; 1693 1694 if (argp->ex_flags & MNT_DELEXPORT) { 1695 if (mp->mnt_flag & MNT_EXPUBLIC) { 1696 vfs_setpublicfs(NULL, NULL, NULL); 1697 mp->mnt_flag &= ~MNT_EXPUBLIC; 1698 } 1699 vfs_free_addrlist(nep); 1700 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1701 } 1702 if (argp->ex_flags & MNT_EXPORTED) { 1703 if (argp->ex_flags & MNT_EXPUBLIC) { 1704 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 1705 return (error); 1706 mp->mnt_flag |= MNT_EXPUBLIC; 1707 } 1708 if ((error = vfs_hang_addrlist(mp, nep, argp))) 1709 return (error); 1710 mp->mnt_flag |= MNT_EXPORTED; 1711 } 1712 return (0); 1713 } 1714 1715 1716 /* 1717 * Set the publicly exported filesystem (WebNFS). Currently, only 1718 * one public filesystem is possible in the spec (RFC 2054 and 2055) 1719 */ 1720 int 1721 vfs_setpublicfs(struct mount *mp, struct netexport *nep, 1722 struct export_args *argp) 1723 { 1724 int error; 1725 struct vnode *rvp; 1726 char *cp; 1727 1728 /* 1729 * mp == NULL -> invalidate the current info, the FS is 1730 * no longer exported. May be called from either vfs_export 1731 * or unmount, so check if it hasn't already been done. 1732 */ 1733 if (mp == NULL) { 1734 if (nfs_pub.np_valid) { 1735 nfs_pub.np_valid = 0; 1736 if (nfs_pub.np_index != NULL) { 1737 FREE(nfs_pub.np_index, M_TEMP); 1738 nfs_pub.np_index = NULL; 1739 } 1740 } 1741 return (0); 1742 } 1743 1744 /* 1745 * Only one allowed at a time. 1746 */ 1747 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 1748 return (EBUSY); 1749 1750 /* 1751 * Get real filehandle for root of exported FS. 1752 */ 1753 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 1754 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 1755 1756 if ((error = VFS_ROOT(mp, &rvp))) 1757 return (error); 1758 1759 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 1760 return (error); 1761 1762 vput(rvp); 1763 1764 /* 1765 * If an indexfile was specified, pull it in. 1766 */ 1767 if (argp->ex_indexfile != NULL) { 1768 int namelen; 1769 1770 error = vn_get_namelen(rvp, &namelen); 1771 if (error) 1772 return (error); 1773 MALLOC(nfs_pub.np_index, char *, namelen, M_TEMP, 1774 M_WAITOK); 1775 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 1776 namelen, (size_t *)0); 1777 if (!error) { 1778 /* 1779 * Check for illegal filenames. 1780 */ 1781 for (cp = nfs_pub.np_index; *cp; cp++) { 1782 if (*cp == '/') { 1783 error = EINVAL; 1784 break; 1785 } 1786 } 1787 } 1788 if (error) { 1789 FREE(nfs_pub.np_index, M_TEMP); 1790 return (error); 1791 } 1792 } 1793 1794 nfs_pub.np_mount = mp; 1795 nfs_pub.np_valid = 1; 1796 return (0); 1797 } 1798 1799 struct netcred * 1800 vfs_export_lookup(struct mount *mp, struct netexport *nep, 1801 struct sockaddr *nam) 1802 { 1803 struct netcred *np; 1804 struct radix_node_head *rnh; 1805 struct sockaddr *saddr; 1806 1807 np = NULL; 1808 if (mp->mnt_flag & MNT_EXPORTED) { 1809 /* 1810 * Lookup in the export list first. 1811 */ 1812 if (nam != NULL) { 1813 saddr = nam; 1814 rnh = nep->ne_rtable[saddr->sa_family]; 1815 if (rnh != NULL) { 1816 np = (struct netcred *) 1817 (*rnh->rnh_matchaddr)((char *)saddr, 1818 rnh); 1819 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1820 np = NULL; 1821 } 1822 } 1823 /* 1824 * If no address match, use the default if it exists. 1825 */ 1826 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1827 np = &nep->ne_defexported; 1828 } 1829 return (np); 1830 } 1831 1832 /* 1833 * perform msync on all vnodes under a mount point. The mount point must 1834 * be locked. This code is also responsible for lazy-freeing unreferenced 1835 * vnodes whos VM objects no longer contain pages. 1836 * 1837 * NOTE: MNT_WAIT still skips vnodes in the VXLOCK state. 1838 * 1839 * NOTE: XXX VOP_PUTPAGES and friends requires that the vnode be locked, 1840 * but vnode_pager_putpages() doesn't lock the vnode. We have to do it 1841 * way up in this high level function. 1842 */ 1843 static int vfs_msync_scan1(struct mount *mp, struct vnode *vp, void *data); 1844 static int vfs_msync_scan2(struct mount *mp, struct vnode *vp, void *data); 1845 1846 void 1847 vfs_msync(struct mount *mp, int flags) 1848 { 1849 int vmsc_flags; 1850 1851 vmsc_flags = VMSC_GETVP; 1852 if (flags != MNT_WAIT) 1853 vmsc_flags |= VMSC_NOWAIT; 1854 vmntvnodescan(mp, vmsc_flags, vfs_msync_scan1, vfs_msync_scan2, 1855 (void *)flags); 1856 } 1857 1858 /* 1859 * scan1 is a fast pre-check. There could be hundreds of thousands of 1860 * vnodes, we cannot afford to do anything heavy weight until we have a 1861 * fairly good indication that there is work to do. 1862 */ 1863 static 1864 int 1865 vfs_msync_scan1(struct mount *mp, struct vnode *vp, void *data) 1866 { 1867 int flags = (int)data; 1868 1869 if ((vp->v_flag & VRECLAIMED) == 0) { 1870 if (vshouldmsync(vp, 0)) 1871 return(0); /* call scan2 */ 1872 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 1873 (vp->v_flag & VOBJDIRTY) && 1874 (flags == MNT_WAIT || vn_islocked(vp) == 0)) { 1875 return(0); /* call scan2 */ 1876 } 1877 } 1878 1879 /* 1880 * do not call scan2, continue the loop 1881 */ 1882 return(-1); 1883 } 1884 1885 /* 1886 * This callback is handed a locked vnode. 1887 */ 1888 static 1889 int 1890 vfs_msync_scan2(struct mount *mp, struct vnode *vp, void *data) 1891 { 1892 vm_object_t obj; 1893 int flags = (int)data; 1894 1895 if (vp->v_flag & VRECLAIMED) 1896 return(0); 1897 1898 if ((mp->mnt_flag & MNT_RDONLY) == 0 && (vp->v_flag & VOBJDIRTY)) { 1899 if ((obj = vp->v_object) != NULL) { 1900 vm_object_page_clean(obj, 0, 0, 1901 flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); 1902 } 1903 } 1904 return(0); 1905 } 1906 1907 /* 1908 * Record a process's interest in events which might happen to 1909 * a vnode. Because poll uses the historic select-style interface 1910 * internally, this routine serves as both the ``check for any 1911 * pending events'' and the ``record my interest in future events'' 1912 * functions. (These are done together, while the lock is held, 1913 * to avoid race conditions.) 1914 */ 1915 int 1916 vn_pollrecord(struct vnode *vp, int events) 1917 { 1918 lwkt_tokref ilock; 1919 1920 KKASSERT(curthread->td_proc != NULL); 1921 1922 lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token); 1923 if (vp->v_pollinfo.vpi_revents & events) { 1924 /* 1925 * This leaves events we are not interested 1926 * in available for the other process which 1927 * which presumably had requested them 1928 * (otherwise they would never have been 1929 * recorded). 1930 */ 1931 events &= vp->v_pollinfo.vpi_revents; 1932 vp->v_pollinfo.vpi_revents &= ~events; 1933 1934 lwkt_reltoken(&ilock); 1935 return events; 1936 } 1937 vp->v_pollinfo.vpi_events |= events; 1938 selrecord(curthread, &vp->v_pollinfo.vpi_selinfo); 1939 lwkt_reltoken(&ilock); 1940 return 0; 1941 } 1942 1943 /* 1944 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 1945 * it is possible for us to miss an event due to race conditions, but 1946 * that condition is expected to be rare, so for the moment it is the 1947 * preferred interface. 1948 */ 1949 void 1950 vn_pollevent(struct vnode *vp, int events) 1951 { 1952 lwkt_tokref ilock; 1953 1954 lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token); 1955 if (vp->v_pollinfo.vpi_events & events) { 1956 /* 1957 * We clear vpi_events so that we don't 1958 * call selwakeup() twice if two events are 1959 * posted before the polling process(es) is 1960 * awakened. This also ensures that we take at 1961 * most one selwakeup() if the polling process 1962 * is no longer interested. However, it does 1963 * mean that only one event can be noticed at 1964 * a time. (Perhaps we should only clear those 1965 * event bits which we note?) XXX 1966 */ 1967 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 1968 vp->v_pollinfo.vpi_revents |= events; 1969 selwakeup(&vp->v_pollinfo.vpi_selinfo); 1970 } 1971 lwkt_reltoken(&ilock); 1972 } 1973 1974 /* 1975 * Wake up anyone polling on vp because it is being revoked. 1976 * This depends on dead_poll() returning POLLHUP for correct 1977 * behavior. 1978 */ 1979 void 1980 vn_pollgone(struct vnode *vp) 1981 { 1982 lwkt_tokref ilock; 1983 1984 lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token); 1985 if (vp->v_pollinfo.vpi_events) { 1986 vp->v_pollinfo.vpi_events = 0; 1987 selwakeup(&vp->v_pollinfo.vpi_selinfo); 1988 } 1989 lwkt_reltoken(&ilock); 1990 } 1991 1992 /* 1993 * extract the cdev_t from a VBLK or VCHR. The vnode must have been opened 1994 * (or v_rdev might be NULL). 1995 */ 1996 cdev_t 1997 vn_todev(struct vnode *vp) 1998 { 1999 if (vp->v_type != VBLK && vp->v_type != VCHR) 2000 return (NOCDEV); 2001 KKASSERT(vp->v_rdev != NULL); 2002 return (vp->v_rdev); 2003 } 2004 2005 /* 2006 * Check if vnode represents a disk device. The vnode does not need to be 2007 * opened. 2008 */ 2009 int 2010 vn_isdisk(struct vnode *vp, int *errp) 2011 { 2012 cdev_t dev; 2013 2014 if (vp->v_type != VBLK && vp->v_type != VCHR) { 2015 if (errp != NULL) 2016 *errp = ENOTBLK; 2017 return (0); 2018 } 2019 2020 if ((dev = vp->v_rdev) == NULL) 2021 dev = udev2dev(vp->v_udev, (vp->v_type == VBLK)); 2022 if (dev == NULL || dev == NOCDEV) { 2023 if (errp != NULL) 2024 *errp = ENXIO; 2025 return (0); 2026 } 2027 if (dev_is_good(dev) == 0) { 2028 if (errp != NULL) 2029 *errp = ENXIO; 2030 return (0); 2031 } 2032 if ((dev_dflags(dev) & D_DISK) == 0) { 2033 if (errp != NULL) 2034 *errp = ENOTBLK; 2035 return (0); 2036 } 2037 if (errp != NULL) 2038 *errp = 0; 2039 return (1); 2040 } 2041 2042 int 2043 vn_get_namelen(struct vnode *vp, int *namelen) 2044 { 2045 int error, retval[2]; 2046 2047 error = VOP_PATHCONF(vp, _PC_NAME_MAX, retval); 2048 if (error) 2049 return (error); 2050 *namelen = *retval; 2051 return (0); 2052 } 2053 2054 int 2055 vop_write_dirent(int *error, struct uio *uio, ino_t d_ino, uint8_t d_type, 2056 uint16_t d_namlen, const char *d_name) 2057 { 2058 struct dirent *dp; 2059 size_t len; 2060 2061 len = _DIRENT_RECLEN(d_namlen); 2062 if (len > uio->uio_resid) 2063 return(1); 2064 2065 dp = kmalloc(len, M_TEMP, M_WAITOK | M_ZERO); 2066 2067 dp->d_ino = d_ino; 2068 dp->d_namlen = d_namlen; 2069 dp->d_type = d_type; 2070 bcopy(d_name, dp->d_name, d_namlen); 2071 2072 *error = uiomove((caddr_t)dp, len, uio); 2073 2074 kfree(dp, M_TEMP); 2075 2076 return(0); 2077 } 2078 2079