1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 39 * $FreeBSD: src/sys/kern/vfs_subr.c,v 1.249.2.30 2003/04/04 20:35:57 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_subr.c,v 1.71 2006/03/24 18:35:33 dillon Exp $ 41 */ 42 43 /* 44 * External virtual filesystem routines 45 */ 46 #include "opt_ddb.h" 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/buf.h> 51 #include <sys/conf.h> 52 #include <sys/dirent.h> 53 #include <sys/domain.h> 54 #include <sys/eventhandler.h> 55 #include <sys/fcntl.h> 56 #include <sys/kernel.h> 57 #include <sys/kthread.h> 58 #include <sys/malloc.h> 59 #include <sys/mbuf.h> 60 #include <sys/mount.h> 61 #include <sys/proc.h> 62 #include <sys/reboot.h> 63 #include <sys/socket.h> 64 #include <sys/stat.h> 65 #include <sys/sysctl.h> 66 #include <sys/syslog.h> 67 #include <sys/unistd.h> 68 #include <sys/vmmeter.h> 69 #include <sys/vnode.h> 70 71 #include <machine/limits.h> 72 73 #include <vm/vm.h> 74 #include <vm/vm_object.h> 75 #include <vm/vm_extern.h> 76 #include <vm/vm_kern.h> 77 #include <vm/pmap.h> 78 #include <vm/vm_map.h> 79 #include <vm/vm_page.h> 80 #include <vm/vm_pager.h> 81 #include <vm/vnode_pager.h> 82 #include <vm/vm_zone.h> 83 84 #include <sys/buf2.h> 85 #include <sys/thread2.h> 86 87 static MALLOC_DEFINE(M_NETADDR, "Export Host", "Export host address structure"); 88 89 int numvnodes; 90 SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); 91 int vfs_fastdev = 1; 92 SYSCTL_INT(_vfs, OID_AUTO, fastdev, CTLFLAG_RW, &vfs_fastdev, 0, ""); 93 94 enum vtype iftovt_tab[16] = { 95 VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, 96 VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, 97 }; 98 int vttoif_tab[9] = { 99 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, 100 S_IFSOCK, S_IFIFO, S_IFMT, 101 }; 102 103 static int reassignbufcalls; 104 SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, 105 &reassignbufcalls, 0, ""); 106 static int reassignbufloops; 107 SYSCTL_INT(_vfs, OID_AUTO, reassignbufloops, CTLFLAG_RW, 108 &reassignbufloops, 0, ""); 109 static int reassignbufsortgood; 110 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortgood, CTLFLAG_RW, 111 &reassignbufsortgood, 0, ""); 112 static int reassignbufsortbad; 113 SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, 114 &reassignbufsortbad, 0, ""); 115 static int reassignbufmethod = 1; 116 SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, 117 &reassignbufmethod, 0, ""); 118 119 int nfs_mount_type = -1; 120 static struct lwkt_token spechash_token; 121 struct nfs_public nfs_pub; /* publicly exported FS */ 122 123 int desiredvnodes; 124 SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, 125 &desiredvnodes, 0, "Maximum number of vnodes"); 126 127 static void vfs_free_addrlist (struct netexport *nep); 128 static int vfs_free_netcred (struct radix_node *rn, void *w); 129 static int vfs_hang_addrlist (struct mount *mp, struct netexport *nep, 130 struct export_args *argp); 131 132 extern int dev_ref_debug; 133 extern struct vnodeopv_entry_desc spec_vnodeop_entries[]; 134 135 /* 136 * Red black tree functions 137 */ 138 static int rb_buf_compare(struct buf *b1, struct buf *b2); 139 RB_GENERATE2(buf_rb_tree, buf, b_rbnode, rb_buf_compare, off_t, b_loffset); 140 RB_GENERATE2(buf_rb_hash, buf, b_rbhash, rb_buf_compare, off_t, b_loffset); 141 142 static int 143 rb_buf_compare(struct buf *b1, struct buf *b2) 144 { 145 if (b1->b_loffset < b2->b_loffset) 146 return(-1); 147 if (b1->b_loffset > b2->b_loffset) 148 return(1); 149 return(0); 150 } 151 152 /* 153 * Return 0 if the vnode is already on the free list or cannot be placed 154 * on the free list. Return 1 if the vnode can be placed on the free list. 155 */ 156 static __inline int 157 vshouldfree(struct vnode *vp, int usecount) 158 { 159 if (vp->v_flag & VFREE) 160 return (0); /* already free */ 161 if (vp->v_holdcnt != 0 || vp->v_usecount != usecount) 162 return (0); /* other holderse */ 163 if (vp->v_object && 164 (vp->v_object->ref_count || vp->v_object->resident_page_count)) { 165 return (0); 166 } 167 return (1); 168 } 169 170 /* 171 * Initialize the vnode management data structures. 172 * 173 * Called from vfsinit() 174 */ 175 void 176 vfs_subr_init(void) 177 { 178 /* 179 * Desired vnodes is a result of the physical page count 180 * and the size of kernel's heap. It scales in proportion 181 * to the amount of available physical memory. This can 182 * cause trouble on 64-bit and large memory platforms. 183 */ 184 /* desiredvnodes = maxproc + vmstats.v_page_count / 4; */ 185 desiredvnodes = 186 min(maxproc + vmstats.v_page_count /4, 187 2 * (VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS) / 188 (5 * (sizeof(struct vm_object) + sizeof(struct vnode)))); 189 190 lwkt_token_init(&spechash_token); 191 } 192 193 /* 194 * Knob to control the precision of file timestamps: 195 * 196 * 0 = seconds only; nanoseconds zeroed. 197 * 1 = seconds and nanoseconds, accurate within 1/HZ. 198 * 2 = seconds and nanoseconds, truncated to microseconds. 199 * >=3 = seconds and nanoseconds, maximum precision. 200 */ 201 enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; 202 203 static int timestamp_precision = TSP_SEC; 204 SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, 205 ×tamp_precision, 0, ""); 206 207 /* 208 * Get a current timestamp. 209 */ 210 void 211 vfs_timestamp(struct timespec *tsp) 212 { 213 struct timeval tv; 214 215 switch (timestamp_precision) { 216 case TSP_SEC: 217 tsp->tv_sec = time_second; 218 tsp->tv_nsec = 0; 219 break; 220 case TSP_HZ: 221 getnanotime(tsp); 222 break; 223 case TSP_USEC: 224 microtime(&tv); 225 TIMEVAL_TO_TIMESPEC(&tv, tsp); 226 break; 227 case TSP_NSEC: 228 default: 229 nanotime(tsp); 230 break; 231 } 232 } 233 234 /* 235 * Set vnode attributes to VNOVAL 236 */ 237 void 238 vattr_null(struct vattr *vap) 239 { 240 vap->va_type = VNON; 241 vap->va_size = VNOVAL; 242 vap->va_bytes = VNOVAL; 243 vap->va_mode = VNOVAL; 244 vap->va_nlink = VNOVAL; 245 vap->va_uid = VNOVAL; 246 vap->va_gid = VNOVAL; 247 vap->va_fsid = VNOVAL; 248 vap->va_fileid = VNOVAL; 249 vap->va_blocksize = VNOVAL; 250 vap->va_rdev = VNOVAL; 251 vap->va_atime.tv_sec = VNOVAL; 252 vap->va_atime.tv_nsec = VNOVAL; 253 vap->va_mtime.tv_sec = VNOVAL; 254 vap->va_mtime.tv_nsec = VNOVAL; 255 vap->va_ctime.tv_sec = VNOVAL; 256 vap->va_ctime.tv_nsec = VNOVAL; 257 vap->va_flags = VNOVAL; 258 vap->va_gen = VNOVAL; 259 vap->va_vaflags = 0; 260 vap->va_fsmid = VNOVAL; 261 } 262 263 /* 264 * Flush out and invalidate all buffers associated with a vnode. 265 * 266 * vp must be locked. 267 */ 268 static int vinvalbuf_bp(struct buf *bp, void *data); 269 270 struct vinvalbuf_bp_info { 271 struct vnode *vp; 272 int slptimeo; 273 int lkflags; 274 int flags; 275 }; 276 277 int 278 vinvalbuf(struct vnode *vp, int flags, struct thread *td, 279 int slpflag, int slptimeo) 280 { 281 struct vinvalbuf_bp_info info; 282 int error; 283 vm_object_t object; 284 285 /* 286 * If we are being asked to save, call fsync to ensure that the inode 287 * is updated. 288 */ 289 if (flags & V_SAVE) { 290 crit_enter(); 291 while (vp->v_track_write.bk_active) { 292 vp->v_track_write.bk_waitflag = 1; 293 error = tsleep(&vp->v_track_write, slpflag, 294 "vinvlbuf", slptimeo); 295 if (error) { 296 crit_exit(); 297 return (error); 298 } 299 } 300 if (!RB_EMPTY(&vp->v_rbdirty_tree)) { 301 crit_exit(); 302 if ((error = VOP_FSYNC(vp, MNT_WAIT, td)) != 0) 303 return (error); 304 crit_enter(); 305 if (vp->v_track_write.bk_active > 0 || 306 !RB_EMPTY(&vp->v_rbdirty_tree)) 307 panic("vinvalbuf: dirty bufs"); 308 } 309 crit_exit(); 310 } 311 crit_enter(); 312 info.slptimeo = slptimeo; 313 info.lkflags = LK_EXCLUSIVE | LK_SLEEPFAIL; 314 if (slpflag & PCATCH) 315 info.lkflags |= LK_PCATCH; 316 info.flags = flags; 317 info.vp = vp; 318 319 /* 320 * Flush the buffer cache until nothing is left. 321 */ 322 while (!RB_EMPTY(&vp->v_rbclean_tree) || 323 !RB_EMPTY(&vp->v_rbdirty_tree)) { 324 error = RB_SCAN(buf_rb_tree, &vp->v_rbclean_tree, NULL, 325 vinvalbuf_bp, &info); 326 if (error == 0) { 327 error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL, 328 vinvalbuf_bp, &info); 329 } 330 } 331 332 /* 333 * Wait for I/O to complete. XXX needs cleaning up. The vnode can 334 * have write I/O in-progress but if there is a VM object then the 335 * VM object can also have read-I/O in-progress. 336 */ 337 do { 338 while (vp->v_track_write.bk_active > 0) { 339 vp->v_track_write.bk_waitflag = 1; 340 tsleep(&vp->v_track_write, 0, "vnvlbv", 0); 341 } 342 if (VOP_GETVOBJECT(vp, &object) == 0) { 343 while (object->paging_in_progress) 344 vm_object_pip_sleep(object, "vnvlbx"); 345 } 346 } while (vp->v_track_write.bk_active > 0); 347 348 crit_exit(); 349 350 /* 351 * Destroy the copy in the VM cache, too. 352 */ 353 if (VOP_GETVOBJECT(vp, &object) == 0) { 354 vm_object_page_remove(object, 0, 0, 355 (flags & V_SAVE) ? TRUE : FALSE); 356 } 357 358 if (!RB_EMPTY(&vp->v_rbdirty_tree) || !RB_EMPTY(&vp->v_rbclean_tree)) 359 panic("vinvalbuf: flush failed"); 360 if (!RB_EMPTY(&vp->v_rbhash_tree)) 361 panic("vinvalbuf: flush failed, buffers still present"); 362 return (0); 363 } 364 365 static int 366 vinvalbuf_bp(struct buf *bp, void *data) 367 { 368 struct vinvalbuf_bp_info *info = data; 369 int error; 370 371 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 372 error = BUF_TIMELOCK(bp, info->lkflags, 373 "vinvalbuf", info->slptimeo); 374 if (error == 0) { 375 BUF_UNLOCK(bp); 376 error = ENOLCK; 377 } 378 if (error == ENOLCK) 379 return(0); 380 return (-error); 381 } 382 383 KKASSERT(bp->b_vp == info->vp); 384 385 /* 386 * XXX Since there are no node locks for NFS, I 387 * believe there is a slight chance that a delayed 388 * write will occur while sleeping just above, so 389 * check for it. Note that vfs_bio_awrite expects 390 * buffers to reside on a queue, while VOP_BWRITE and 391 * brelse do not. 392 */ 393 if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && 394 (info->flags & V_SAVE)) { 395 if (bp->b_vp == info->vp) { 396 if (bp->b_flags & B_CLUSTEROK) { 397 vfs_bio_awrite(bp); 398 } else { 399 bremfree(bp); 400 bp->b_flags |= B_ASYNC; 401 VOP_BWRITE(bp->b_vp, bp); 402 } 403 } else { 404 bremfree(bp); 405 VOP_BWRITE(bp->b_vp, bp); 406 } 407 } else { 408 bremfree(bp); 409 bp->b_flags |= (B_INVAL | B_NOCACHE | B_RELBUF); 410 bp->b_flags &= ~B_ASYNC; 411 brelse(bp); 412 } 413 return(0); 414 } 415 416 /* 417 * Truncate a file's buffer and pages to a specified length. This 418 * is in lieu of the old vinvalbuf mechanism, which performed unneeded 419 * sync activity. 420 * 421 * The vnode must be locked. 422 */ 423 static int vtruncbuf_bp_trunc_cmp(struct buf *bp, void *data); 424 static int vtruncbuf_bp_trunc(struct buf *bp, void *data); 425 static int vtruncbuf_bp_metasync_cmp(struct buf *bp, void *data); 426 static int vtruncbuf_bp_metasync(struct buf *bp, void *data); 427 428 int 429 vtruncbuf(struct vnode *vp, struct thread *td, off_t length, int blksize) 430 { 431 off_t truncloffset; 432 int count; 433 434 /* 435 * Round up to the *next* block, then destroy the buffers in question. 436 * Since we are only removing some of the buffers we must rely on the 437 * scan count to determine whether a loop is necessary. 438 */ 439 if ((count = (int)(length % blksize)) != 0) 440 truncloffset = length + (blksize - count); 441 else 442 truncloffset = length; 443 444 crit_enter(); 445 do { 446 count = RB_SCAN(buf_rb_tree, &vp->v_rbclean_tree, 447 vtruncbuf_bp_trunc_cmp, 448 vtruncbuf_bp_trunc, &truncloffset); 449 count += RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, 450 vtruncbuf_bp_trunc_cmp, 451 vtruncbuf_bp_trunc, &truncloffset); 452 } while(count); 453 454 /* 455 * For safety, fsync any remaining metadata if the file is not being 456 * truncated to 0. Since the metadata does not represent the entire 457 * dirty list we have to rely on the hit count to ensure that we get 458 * all of it. 459 */ 460 if (length > 0) { 461 do { 462 count = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, 463 vtruncbuf_bp_metasync_cmp, 464 vtruncbuf_bp_metasync, vp); 465 } while (count); 466 } 467 468 /* 469 * Wait for any in-progress I/O to complete before returning (why?) 470 */ 471 while (vp->v_track_write.bk_active > 0) { 472 vp->v_track_write.bk_waitflag = 1; 473 tsleep(&vp->v_track_write, 0, "vbtrunc", 0); 474 } 475 476 crit_exit(); 477 478 vnode_pager_setsize(vp, length); 479 480 return (0); 481 } 482 483 /* 484 * The callback buffer is beyond the new file EOF and must be destroyed. 485 * Note that the compare function must conform to the RB_SCAN's requirements. 486 */ 487 static 488 int 489 vtruncbuf_bp_trunc_cmp(struct buf *bp, void *data) 490 { 491 if (bp->b_loffset >= *(off_t *)data) 492 return(0); 493 return(-1); 494 } 495 496 static 497 int 498 vtruncbuf_bp_trunc(struct buf *bp, void *data) 499 { 500 /* 501 * Do not try to use a buffer we cannot immediately lock, but sleep 502 * anyway to prevent a livelock. The code will loop until all buffers 503 * can be acted upon. 504 */ 505 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 506 if (BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL) == 0) 507 BUF_UNLOCK(bp); 508 } else { 509 bremfree(bp); 510 bp->b_flags |= (B_INVAL | B_RELBUF); 511 bp->b_flags &= ~B_ASYNC; 512 brelse(bp); 513 } 514 return(1); 515 } 516 517 /* 518 * Fsync all meta-data after truncating a file to be non-zero. Only metadata 519 * blocks (with a negative loffset) are scanned. 520 * Note that the compare function must conform to the RB_SCAN's requirements. 521 */ 522 static int 523 vtruncbuf_bp_metasync_cmp(struct buf *bp, void *data) 524 { 525 if (bp->b_loffset < 0) 526 return(0); 527 return(1); 528 } 529 530 static int 531 vtruncbuf_bp_metasync(struct buf *bp, void *data) 532 { 533 struct vnode *vp = data; 534 535 if (bp->b_flags & B_DELWRI) { 536 /* 537 * Do not try to use a buffer we cannot immediately lock, 538 * but sleep anyway to prevent a livelock. The code will 539 * loop until all buffers can be acted upon. 540 */ 541 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 542 if (BUF_LOCK(bp, LK_EXCLUSIVE|LK_SLEEPFAIL) == 0) 543 BUF_UNLOCK(bp); 544 } else { 545 bremfree(bp); 546 if (bp->b_vp == vp) { 547 bp->b_flags |= B_ASYNC; 548 } else { 549 bp->b_flags &= ~B_ASYNC; 550 } 551 VOP_BWRITE(bp->b_vp, bp); 552 } 553 return(1); 554 } else { 555 return(0); 556 } 557 } 558 559 /* 560 * vfsync - implements a multipass fsync on a file which understands 561 * dependancies and meta-data. The passed vnode must be locked. The 562 * waitfor argument may be MNT_WAIT or MNT_NOWAIT, or MNT_LAZY. 563 * 564 * When fsyncing data asynchronously just do one consolidated pass starting 565 * with the most negative block number. This may not get all the data due 566 * to dependancies. 567 * 568 * When fsyncing data synchronously do a data pass, then a metadata pass, 569 * then do additional data+metadata passes to try to get all the data out. 570 */ 571 static int vfsync_wait_output(struct vnode *vp, 572 int (*waitoutput)(struct vnode *, struct thread *)); 573 static int vfsync_data_only_cmp(struct buf *bp, void *data); 574 static int vfsync_meta_only_cmp(struct buf *bp, void *data); 575 static int vfsync_lazy_range_cmp(struct buf *bp, void *data); 576 static int vfsync_bp(struct buf *bp, void *data); 577 578 struct vfsync_info { 579 struct vnode *vp; 580 int synchronous; 581 int syncdeps; 582 int lazycount; 583 int lazylimit; 584 int skippedbufs; 585 off_t loffset; 586 int (*checkdef)(struct buf *); 587 }; 588 589 int 590 vfsync(struct vnode *vp, int waitfor, int passes, off_t loffset, 591 int (*checkdef)(struct buf *), 592 int (*waitoutput)(struct vnode *, struct thread *)) 593 { 594 struct vfsync_info info; 595 int error; 596 597 bzero(&info, sizeof(info)); 598 info.vp = vp; 599 info.loffset = loffset; 600 if ((info.checkdef = checkdef) == NULL) 601 info.syncdeps = 1; 602 603 crit_enter(); 604 605 switch(waitfor) { 606 case MNT_LAZY: 607 /* 608 * Lazy (filesystem syncer typ) Asynchronous plus limit the 609 * number of data (not meta) pages we try to flush to 1MB. 610 * A non-zero return means that lazy limit was reached. 611 */ 612 info.lazylimit = 1024 * 1024; 613 info.syncdeps = 1; 614 error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, 615 vfsync_lazy_range_cmp, vfsync_bp, &info); 616 RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, 617 vfsync_meta_only_cmp, vfsync_bp, &info); 618 if (error == 0) 619 vp->v_lazyw = 0; 620 else if (!RB_EMPTY(&vp->v_rbdirty_tree)) 621 vn_syncer_add_to_worklist(vp, 1); 622 error = 0; 623 break; 624 case MNT_NOWAIT: 625 /* 626 * Asynchronous. Do a data-only pass and a meta-only pass. 627 */ 628 info.syncdeps = 1; 629 RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, vfsync_data_only_cmp, 630 vfsync_bp, &info); 631 RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, vfsync_meta_only_cmp, 632 vfsync_bp, &info); 633 error = 0; 634 break; 635 default: 636 /* 637 * Synchronous. Do a data-only pass, then a meta-data+data 638 * pass, then additional integrated passes to try to get 639 * all the dependancies flushed. 640 */ 641 RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, vfsync_data_only_cmp, 642 vfsync_bp, &info); 643 error = vfsync_wait_output(vp, waitoutput); 644 if (error == 0) { 645 info.skippedbufs = 0; 646 RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL, 647 vfsync_bp, &info); 648 error = vfsync_wait_output(vp, waitoutput); 649 if (info.skippedbufs) 650 printf("Warning: vfsync skipped %d dirty bufs in pass2!\n", info.skippedbufs); 651 } 652 while (error == 0 && passes > 0 && 653 !RB_EMPTY(&vp->v_rbdirty_tree)) { 654 if (--passes == 0) { 655 info.synchronous = 1; 656 info.syncdeps = 1; 657 } 658 error = RB_SCAN(buf_rb_tree, &vp->v_rbdirty_tree, NULL, 659 vfsync_bp, &info); 660 if (error < 0) 661 error = -error; 662 info.syncdeps = 1; 663 if (error == 0) 664 error = vfsync_wait_output(vp, waitoutput); 665 } 666 break; 667 } 668 crit_exit(); 669 return(error); 670 } 671 672 static int 673 vfsync_wait_output(struct vnode *vp, int (*waitoutput)(struct vnode *, struct thread *)) 674 { 675 int error = 0; 676 677 while (vp->v_track_write.bk_active) { 678 vp->v_track_write.bk_waitflag = 1; 679 tsleep(&vp->v_track_write, 0, "fsfsn", 0); 680 } 681 if (waitoutput) 682 error = waitoutput(vp, curthread); 683 return(error); 684 } 685 686 static int 687 vfsync_data_only_cmp(struct buf *bp, void *data) 688 { 689 if (bp->b_loffset < 0) 690 return(-1); 691 return(0); 692 } 693 694 static int 695 vfsync_meta_only_cmp(struct buf *bp, void *data) 696 { 697 if (bp->b_loffset < 0) 698 return(0); 699 return(1); 700 } 701 702 static int 703 vfsync_lazy_range_cmp(struct buf *bp, void *data) 704 { 705 struct vfsync_info *info = data; 706 if (bp->b_loffset < info->vp->v_lazyw) 707 return(-1); 708 return(0); 709 } 710 711 static int 712 vfsync_bp(struct buf *bp, void *data) 713 { 714 struct vfsync_info *info = data; 715 struct vnode *vp = info->vp; 716 int error; 717 718 /* 719 * if syncdeps is not set we do not try to write buffers which have 720 * dependancies. 721 */ 722 if (!info->synchronous && info->syncdeps == 0 && info->checkdef(bp)) 723 return(0); 724 725 /* 726 * Ignore buffers that we cannot immediately lock. XXX 727 */ 728 if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT)) { 729 printf("Warning: vfsync_bp skipping dirty buffer %p\n", bp); 730 ++info->skippedbufs; 731 return(0); 732 } 733 if ((bp->b_flags & B_DELWRI) == 0) 734 panic("vfsync_bp: buffer not dirty"); 735 if (vp != bp->b_vp) 736 panic("vfsync_bp: buffer vp mismatch"); 737 738 /* 739 * B_NEEDCOMMIT (primarily used by NFS) is a state where the buffer 740 * has been written but an additional handshake with the device 741 * is required before we can dispose of the buffer. We have no idea 742 * how to do this so we have to skip these buffers. 743 */ 744 if (bp->b_flags & B_NEEDCOMMIT) { 745 BUF_UNLOCK(bp); 746 return(0); 747 } 748 749 /* 750 * (LEGACY FROM UFS, REMOVE WHEN POSSIBLE) - invalidate any dirty 751 * buffers beyond the file EOF. 752 */ 753 if (info->loffset != NOOFFSET && vp->v_type == VREG && 754 bp->b_loffset >= info->loffset) { 755 bremfree(bp); 756 bp->b_flags |= B_INVAL | B_NOCACHE; 757 crit_exit(); 758 brelse(bp); 759 crit_enter(); 760 } 761 762 if (info->synchronous) { 763 /* 764 * Synchronous flushing. An error may be returned. 765 */ 766 bremfree(bp); 767 crit_exit(); 768 error = bwrite(bp); 769 crit_enter(); 770 } else { 771 /* 772 * Asynchronous flushing. A negative return value simply 773 * stops the scan and is not considered an error. We use 774 * this to support limited MNT_LAZY flushes. 775 */ 776 vp->v_lazyw = bp->b_loffset; 777 if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) { 778 info->lazycount += vfs_bio_awrite(bp); 779 } else { 780 info->lazycount += bp->b_bufsize; 781 bremfree(bp); 782 crit_exit(); 783 bawrite(bp); 784 crit_enter(); 785 } 786 if (info->lazylimit && info->lazycount >= info->lazylimit) 787 error = 1; 788 else 789 error = 0; 790 } 791 return(-error); 792 } 793 794 /* 795 * Associate a buffer with a vnode. 796 */ 797 void 798 bgetvp(struct vnode *vp, struct buf *bp) 799 { 800 KASSERT(bp->b_vp == NULL, ("bgetvp: not free")); 801 KKASSERT((bp->b_flags & (B_HASHED|B_DELWRI)) == 0); 802 KKASSERT((bp->b_xflags & (BX_VNCLEAN|BX_VNDIRTY)) == 0); 803 804 vhold(vp); 805 /* 806 * Insert onto list for new vnode. 807 */ 808 crit_enter(); 809 bp->b_vp = vp; 810 bp->b_flags |= B_HASHED; 811 if (buf_rb_hash_RB_INSERT(&vp->v_rbhash_tree, bp)) 812 panic("reassignbuf: dup lblk vp %p bp %p", vp, bp); 813 814 bp->b_xflags |= BX_VNCLEAN; 815 if (buf_rb_tree_RB_INSERT(&vp->v_rbclean_tree, bp)) 816 panic("reassignbuf: dup lblk/clean vp %p bp %p", vp, bp); 817 crit_exit(); 818 } 819 820 /* 821 * Disassociate a buffer from a vnode. 822 */ 823 void 824 brelvp(struct buf *bp) 825 { 826 struct vnode *vp; 827 828 KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); 829 830 /* 831 * Delete from old vnode list, if on one. 832 */ 833 vp = bp->b_vp; 834 crit_enter(); 835 if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) { 836 if (bp->b_xflags & BX_VNDIRTY) 837 buf_rb_tree_RB_REMOVE(&vp->v_rbdirty_tree, bp); 838 else 839 buf_rb_tree_RB_REMOVE(&vp->v_rbclean_tree, bp); 840 bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); 841 } 842 if (bp->b_flags & B_HASHED) { 843 buf_rb_hash_RB_REMOVE(&vp->v_rbhash_tree, bp); 844 bp->b_flags &= ~B_HASHED; 845 } 846 if ((vp->v_flag & VONWORKLST) && RB_EMPTY(&vp->v_rbdirty_tree)) { 847 vp->v_flag &= ~VONWORKLST; 848 LIST_REMOVE(vp, v_synclist); 849 } 850 crit_exit(); 851 bp->b_vp = NULL; 852 vdrop(vp); 853 } 854 855 /* 856 * Associate a p-buffer with a vnode. 857 * 858 * Also sets B_PAGING flag to indicate that vnode is not fully associated 859 * with the buffer. i.e. the bp has not been linked into the vnode or 860 * ref-counted. 861 */ 862 void 863 pbgetvp(struct vnode *vp, struct buf *bp) 864 { 865 KASSERT(bp->b_vp == NULL, ("pbgetvp: not free")); 866 KKASSERT((bp->b_flags & B_HASHED) == 0); 867 868 bp->b_vp = vp; 869 bp->b_flags |= B_PAGING; 870 } 871 872 /* 873 * Disassociate a p-buffer from a vnode. 874 */ 875 void 876 pbrelvp(struct buf *bp) 877 { 878 KASSERT(bp->b_vp != NULL, ("pbrelvp: NULL")); 879 KKASSERT((bp->b_flags & B_HASHED) == 0); 880 881 bp->b_vp = NULL; 882 bp->b_flags &= ~B_PAGING; 883 } 884 885 /* 886 * Reassign the buffer to the proper clean/dirty list based on B_DELWRI. 887 * This routine is called when the state of the B_DELWRI bit is changed. 888 */ 889 void 890 reassignbuf(struct buf *bp) 891 { 892 struct vnode *vp = bp->b_vp; 893 int delay; 894 895 KKASSERT(vp != NULL); 896 ++reassignbufcalls; 897 898 /* 899 * B_PAGING flagged buffers cannot be reassigned because their vp 900 * is not fully linked in. 901 */ 902 if (bp->b_flags & B_PAGING) 903 panic("cannot reassign paging buffer"); 904 905 crit_enter(); 906 if (bp->b_flags & B_DELWRI) { 907 /* 908 * Move to the dirty list, add the vnode to the worklist 909 */ 910 if (bp->b_xflags & BX_VNCLEAN) { 911 buf_rb_tree_RB_REMOVE(&vp->v_rbclean_tree, bp); 912 bp->b_xflags &= ~BX_VNCLEAN; 913 } 914 if ((bp->b_xflags & BX_VNDIRTY) == 0) { 915 if (buf_rb_tree_RB_INSERT(&vp->v_rbdirty_tree, bp)) { 916 panic("reassignbuf: dup lblk vp %p bp %p", 917 vp, bp); 918 } 919 bp->b_xflags |= BX_VNDIRTY; 920 } 921 if ((vp->v_flag & VONWORKLST) == 0) { 922 switch (vp->v_type) { 923 case VDIR: 924 delay = dirdelay; 925 break; 926 case VCHR: 927 case VBLK: 928 if (vp->v_rdev && 929 vp->v_rdev->si_mountpoint != NULL) { 930 delay = metadelay; 931 break; 932 } 933 /* fall through */ 934 default: 935 delay = filedelay; 936 } 937 vn_syncer_add_to_worklist(vp, delay); 938 } 939 } else { 940 /* 941 * Move to the clean list, remove the vnode from the worklist 942 * if no dirty blocks remain. 943 */ 944 if (bp->b_xflags & BX_VNDIRTY) { 945 buf_rb_tree_RB_REMOVE(&vp->v_rbdirty_tree, bp); 946 bp->b_xflags &= ~BX_VNDIRTY; 947 } 948 if ((bp->b_xflags & BX_VNCLEAN) == 0) { 949 if (buf_rb_tree_RB_INSERT(&vp->v_rbclean_tree, bp)) { 950 panic("reassignbuf: dup lblk vp %p bp %p", 951 vp, bp); 952 } 953 bp->b_xflags |= BX_VNCLEAN; 954 } 955 if ((vp->v_flag & VONWORKLST) && 956 RB_EMPTY(&vp->v_rbdirty_tree)) { 957 vp->v_flag &= ~VONWORKLST; 958 LIST_REMOVE(vp, v_synclist); 959 } 960 } 961 crit_exit(); 962 } 963 964 /* 965 * Create a vnode for a block device. 966 * Used for mounting the root file system. 967 */ 968 int 969 bdevvp(dev_t dev, struct vnode **vpp) 970 { 971 struct vnode *vp; 972 struct vnode *nvp; 973 int error; 974 975 if (dev == NODEV) { 976 *vpp = NULLVP; 977 return (ENXIO); 978 } 979 error = getspecialvnode(VT_NON, NULL, &spec_vnode_vops, &nvp, 0, 0); 980 if (error) { 981 *vpp = NULLVP; 982 return (error); 983 } 984 vp = nvp; 985 vp->v_type = VCHR; 986 vp->v_udev = dev->si_udev; 987 vx_unlock(vp); 988 *vpp = vp; 989 return (0); 990 } 991 992 int 993 v_associate_rdev(struct vnode *vp, dev_t dev) 994 { 995 lwkt_tokref ilock; 996 997 if (dev == NULL || dev == NODEV) 998 return(ENXIO); 999 if (dev_is_good(dev) == 0) 1000 return(ENXIO); 1001 KKASSERT(vp->v_rdev == NULL); 1002 if (dev_ref_debug) 1003 printf("Z1"); 1004 vp->v_rdev = reference_dev(dev); 1005 lwkt_gettoken(&ilock, &spechash_token); 1006 SLIST_INSERT_HEAD(&dev->si_hlist, vp, v_specnext); 1007 lwkt_reltoken(&ilock); 1008 return(0); 1009 } 1010 1011 void 1012 v_release_rdev(struct vnode *vp) 1013 { 1014 lwkt_tokref ilock; 1015 dev_t dev; 1016 1017 if ((dev = vp->v_rdev) != NULL) { 1018 lwkt_gettoken(&ilock, &spechash_token); 1019 SLIST_REMOVE(&dev->si_hlist, vp, vnode, v_specnext); 1020 if (dev_ref_debug && vp->v_opencount != 0) { 1021 printf("releasing rdev with non-0 " 1022 "v_opencount(%d) (revoked?)\n", 1023 vp->v_opencount); 1024 } 1025 vp->v_rdev = NULL; 1026 vp->v_opencount = 0; 1027 release_dev(dev); 1028 lwkt_reltoken(&ilock); 1029 } 1030 } 1031 1032 /* 1033 * Add a vnode to the alias list hung off the dev_t. We only associate 1034 * the device number with the vnode. The actual device is not associated 1035 * until the vnode is opened (usually in spec_open()), and will be 1036 * disassociated on last close. 1037 */ 1038 void 1039 addaliasu(struct vnode *nvp, udev_t nvp_udev) 1040 { 1041 if (nvp->v_type != VBLK && nvp->v_type != VCHR) 1042 panic("addaliasu on non-special vnode"); 1043 nvp->v_udev = nvp_udev; 1044 } 1045 1046 /* 1047 * Disassociate a vnode from its underlying filesystem. 1048 * 1049 * The vnode must be VX locked and refd 1050 * 1051 * If there are v_usecount references to the vnode other then ours we have 1052 * to VOP_CLOSE the vnode before we can deactivate and reclaim it. 1053 */ 1054 void 1055 vclean(struct vnode *vp, int flags, struct thread *td) 1056 { 1057 int active; 1058 int retflags = 0; 1059 1060 /* 1061 * If the vnode has already been reclaimed we have nothing to do. 1062 */ 1063 if (vp->v_flag & VRECLAIMED) 1064 return; 1065 vp->v_flag |= VRECLAIMED; 1066 1067 /* 1068 * Scrap the vfs cache 1069 */ 1070 while (cache_inval_vp(vp, 0, &retflags) != 0) { 1071 printf("Warning: vnode %p clean/cache_resolution race detected\n", vp); 1072 tsleep(vp, 0, "vclninv", 2); 1073 } 1074 1075 /* 1076 * Check to see if the vnode is in use. If so we have to reference it 1077 * before we clean it out so that its count cannot fall to zero and 1078 * generate a race against ourselves to recycle it. 1079 */ 1080 active = (vp->v_usecount > 1); 1081 1082 /* 1083 * Clean out any buffers associated with the vnode and destroy its 1084 * object, if it has one. 1085 */ 1086 vinvalbuf(vp, V_SAVE, td, 0, 0); 1087 VOP_DESTROYVOBJECT(vp); 1088 1089 /* 1090 * If purging an active vnode, it must be closed and 1091 * deactivated before being reclaimed. XXX 1092 * 1093 * Note that neither of these routines unlocks the vnode. 1094 */ 1095 if (active) { 1096 if (flags & DOCLOSE) 1097 VOP_CLOSE(vp, FNONBLOCK, td); 1098 } 1099 1100 /* 1101 * If the vnode has not be deactivated, deactivated it. 1102 */ 1103 if ((vp->v_flag & VINACTIVE) == 0) { 1104 vp->v_flag |= VINACTIVE; 1105 VOP_INACTIVE(vp, td); 1106 } 1107 1108 /* 1109 * Reclaim the vnode. 1110 */ 1111 if (VOP_RECLAIM(vp, retflags, td)) 1112 panic("vclean: cannot reclaim"); 1113 1114 /* 1115 * Done with purge, notify sleepers of the grim news. 1116 */ 1117 vp->v_ops = &dead_vnode_vops; 1118 vn_pollgone(vp); 1119 vp->v_tag = VT_NON; 1120 } 1121 1122 /* 1123 * Eliminate all activity associated with the requested vnode 1124 * and with all vnodes aliased to the requested vnode. 1125 * 1126 * The vnode must be referenced and vx_lock()'d 1127 * 1128 * revoke { struct vnode *a_vp, int a_flags } 1129 */ 1130 int 1131 vop_stdrevoke(struct vop_revoke_args *ap) 1132 { 1133 struct vnode *vp, *vq; 1134 lwkt_tokref ilock; 1135 dev_t dev; 1136 1137 KASSERT((ap->a_flags & REVOKEALL) != 0, ("vop_revoke")); 1138 1139 vp = ap->a_vp; 1140 1141 /* 1142 * If the vnode is already dead don't try to revoke it 1143 */ 1144 if (vp->v_flag & VRECLAIMED) 1145 return (0); 1146 1147 /* 1148 * If the vnode has a device association, scrap all vnodes associated 1149 * with the device. Don't let the device disappear on us while we 1150 * are scrapping the vnodes. 1151 * 1152 * The passed vp will probably show up in the list, do not VX lock 1153 * it twice! 1154 */ 1155 if (vp->v_type != VCHR && vp->v_type != VBLK) 1156 return(0); 1157 if ((dev = vp->v_rdev) == NULL) { 1158 if ((dev = udev2dev(vp->v_udev, vp->v_type == VBLK)) == NODEV) 1159 return(0); 1160 } 1161 reference_dev(dev); 1162 lwkt_gettoken(&ilock, &spechash_token); 1163 while ((vq = SLIST_FIRST(&dev->si_hlist)) != NULL) { 1164 if (vp == vq || vx_get(vq) == 0) { 1165 if (vq == SLIST_FIRST(&dev->si_hlist)) 1166 vgone(vq); 1167 if (vp != vq) 1168 vx_put(vq); 1169 } 1170 } 1171 lwkt_reltoken(&ilock); 1172 release_dev(dev); 1173 return (0); 1174 } 1175 1176 /* 1177 * Recycle an unused vnode to the front of the free list. 1178 * 1179 * Returns 1 if we were successfully able to recycle the vnode, 1180 * 0 otherwise. 1181 */ 1182 int 1183 vrecycle(struct vnode *vp, struct thread *td) 1184 { 1185 if (vp->v_usecount == 1) { 1186 vgone(vp); 1187 return (1); 1188 } 1189 return (0); 1190 } 1191 1192 /* 1193 * Eliminate all activity associated with a vnode in preparation for reuse. 1194 * 1195 * The vnode must be VX locked and refd and will remain VX locked and refd 1196 * on return. This routine may be called with the vnode in any state, as 1197 * long as it is VX locked. The vnode will be cleaned out and marked 1198 * VRECLAIMED but will not actually be reused until all existing refs and 1199 * holds go away. 1200 * 1201 * NOTE: This routine may be called on a vnode which has not yet been 1202 * already been deactivated (VOP_INACTIVE), or on a vnode which has 1203 * already been reclaimed. 1204 * 1205 * This routine is not responsible for placing us back on the freelist. 1206 * Instead, it happens automatically when the caller releases the VX lock 1207 * (assuming there aren't any other references). 1208 */ 1209 void 1210 vgone(struct vnode *vp) 1211 { 1212 /* 1213 * assert that the VX lock is held. This is an absolute requirement 1214 * now for vgone() to be called. 1215 */ 1216 KKASSERT(vp->v_lock.lk_exclusivecount == 1); 1217 1218 /* 1219 * Clean out the filesystem specific data and set the VRECLAIMED 1220 * bit. Also deactivate the vnode if necessary. 1221 */ 1222 vclean(vp, DOCLOSE, curthread); 1223 1224 /* 1225 * Delete from old mount point vnode list, if on one. 1226 */ 1227 if (vp->v_mount != NULL) 1228 insmntque(vp, NULL); 1229 1230 /* 1231 * If special device, remove it from special device alias list 1232 * if it is on one. This should normally only occur if a vnode is 1233 * being revoked as the device should otherwise have been released 1234 * naturally. 1235 */ 1236 if ((vp->v_type == VBLK || vp->v_type == VCHR) && vp->v_rdev != NULL) { 1237 v_release_rdev(vp); 1238 } 1239 1240 /* 1241 * Set us to VBAD 1242 */ 1243 vp->v_type = VBAD; 1244 } 1245 1246 /* 1247 * Lookup a vnode by device number. 1248 */ 1249 int 1250 vfinddev(dev_t dev, enum vtype type, struct vnode **vpp) 1251 { 1252 lwkt_tokref ilock; 1253 struct vnode *vp; 1254 1255 lwkt_gettoken(&ilock, &spechash_token); 1256 SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) { 1257 if (type == vp->v_type) { 1258 *vpp = vp; 1259 lwkt_reltoken(&ilock); 1260 return (1); 1261 } 1262 } 1263 lwkt_reltoken(&ilock); 1264 return (0); 1265 } 1266 1267 /* 1268 * Calculate the total number of references to a special device. This 1269 * routine may only be called for VBLK and VCHR vnodes since v_rdev is 1270 * an overloaded field. Since udev2dev can now return NODEV, we have 1271 * to check for a NULL v_rdev. 1272 */ 1273 int 1274 count_dev(dev_t dev) 1275 { 1276 lwkt_tokref ilock; 1277 struct vnode *vp; 1278 int count = 0; 1279 1280 if (SLIST_FIRST(&dev->si_hlist)) { 1281 lwkt_gettoken(&ilock, &spechash_token); 1282 SLIST_FOREACH(vp, &dev->si_hlist, v_specnext) { 1283 count += vp->v_usecount; 1284 } 1285 lwkt_reltoken(&ilock); 1286 } 1287 return(count); 1288 } 1289 1290 int 1291 count_udev(udev_t udev) 1292 { 1293 dev_t dev; 1294 1295 if ((dev = udev2dev(udev, 0)) == NODEV) 1296 return(0); 1297 return(count_dev(dev)); 1298 } 1299 1300 int 1301 vcount(struct vnode *vp) 1302 { 1303 if (vp->v_rdev == NULL) 1304 return(0); 1305 return(count_dev(vp->v_rdev)); 1306 } 1307 1308 /* 1309 * Print out a description of a vnode. 1310 */ 1311 static char *typename[] = 1312 {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD"}; 1313 1314 void 1315 vprint(char *label, struct vnode *vp) 1316 { 1317 char buf[96]; 1318 1319 if (label != NULL) 1320 printf("%s: %p: ", label, (void *)vp); 1321 else 1322 printf("%p: ", (void *)vp); 1323 printf("type %s, usecount %d, writecount %d, refcount %d,", 1324 typename[vp->v_type], vp->v_usecount, vp->v_writecount, 1325 vp->v_holdcnt); 1326 buf[0] = '\0'; 1327 if (vp->v_flag & VROOT) 1328 strcat(buf, "|VROOT"); 1329 if (vp->v_flag & VTEXT) 1330 strcat(buf, "|VTEXT"); 1331 if (vp->v_flag & VSYSTEM) 1332 strcat(buf, "|VSYSTEM"); 1333 if (vp->v_flag & VFREE) 1334 strcat(buf, "|VFREE"); 1335 if (vp->v_flag & VOBJBUF) 1336 strcat(buf, "|VOBJBUF"); 1337 if (buf[0] != '\0') 1338 printf(" flags (%s)", &buf[1]); 1339 if (vp->v_data == NULL) { 1340 printf("\n"); 1341 } else { 1342 printf("\n\t"); 1343 VOP_PRINT(vp); 1344 } 1345 } 1346 1347 #ifdef DDB 1348 #include <ddb/ddb.h> 1349 1350 static int db_show_locked_vnodes(struct mount *mp, void *data); 1351 1352 /* 1353 * List all of the locked vnodes in the system. 1354 * Called when debugging the kernel. 1355 */ 1356 DB_SHOW_COMMAND(lockedvnodes, lockedvnodes) 1357 { 1358 printf("Locked vnodes\n"); 1359 mountlist_scan(db_show_locked_vnodes, NULL, 1360 MNTSCAN_FORWARD|MNTSCAN_NOBUSY); 1361 } 1362 1363 static int 1364 db_show_locked_vnodes(struct mount *mp, void *data __unused) 1365 { 1366 struct vnode *vp; 1367 1368 TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { 1369 if (VOP_ISLOCKED(vp, NULL)) 1370 vprint((char *)0, vp); 1371 } 1372 return(0); 1373 } 1374 #endif 1375 1376 /* 1377 * Top level filesystem related information gathering. 1378 */ 1379 static int sysctl_ovfs_conf (SYSCTL_HANDLER_ARGS); 1380 1381 static int 1382 vfs_sysctl(SYSCTL_HANDLER_ARGS) 1383 { 1384 int *name = (int *)arg1 - 1; /* XXX */ 1385 u_int namelen = arg2 + 1; /* XXX */ 1386 struct vfsconf *vfsp; 1387 1388 #if 1 || defined(COMPAT_PRELITE2) 1389 /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ 1390 if (namelen == 1) 1391 return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); 1392 #endif 1393 1394 #ifdef notyet 1395 /* all sysctl names at this level are at least name and field */ 1396 if (namelen < 2) 1397 return (ENOTDIR); /* overloaded */ 1398 if (name[0] != VFS_GENERIC) { 1399 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1400 if (vfsp->vfc_typenum == name[0]) 1401 break; 1402 if (vfsp == NULL) 1403 return (EOPNOTSUPP); 1404 return ((*vfsp->vfc_vfsops->vfs_sysctl)(&name[1], namelen - 1, 1405 oldp, oldlenp, newp, newlen, p)); 1406 } 1407 #endif 1408 switch (name[1]) { 1409 case VFS_MAXTYPENUM: 1410 if (namelen != 2) 1411 return (ENOTDIR); 1412 return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); 1413 case VFS_CONF: 1414 if (namelen != 3) 1415 return (ENOTDIR); /* overloaded */ 1416 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 1417 if (vfsp->vfc_typenum == name[2]) 1418 break; 1419 if (vfsp == NULL) 1420 return (EOPNOTSUPP); 1421 return (SYSCTL_OUT(req, vfsp, sizeof *vfsp)); 1422 } 1423 return (EOPNOTSUPP); 1424 } 1425 1426 SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD, vfs_sysctl, 1427 "Generic filesystem"); 1428 1429 #if 1 || defined(COMPAT_PRELITE2) 1430 1431 static int 1432 sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) 1433 { 1434 int error; 1435 struct vfsconf *vfsp; 1436 struct ovfsconf ovfs; 1437 1438 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 1439 bzero(&ovfs, sizeof(ovfs)); 1440 ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ 1441 strcpy(ovfs.vfc_name, vfsp->vfc_name); 1442 ovfs.vfc_index = vfsp->vfc_typenum; 1443 ovfs.vfc_refcount = vfsp->vfc_refcount; 1444 ovfs.vfc_flags = vfsp->vfc_flags; 1445 error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); 1446 if (error) 1447 return error; 1448 } 1449 return 0; 1450 } 1451 1452 #endif /* 1 || COMPAT_PRELITE2 */ 1453 1454 /* 1455 * Check to see if a filesystem is mounted on a block device. 1456 */ 1457 int 1458 vfs_mountedon(struct vnode *vp) 1459 { 1460 dev_t dev; 1461 1462 if ((dev = vp->v_rdev) == NULL) 1463 dev = udev2dev(vp->v_udev, (vp->v_type == VBLK)); 1464 if (dev != NODEV && dev->si_mountpoint) 1465 return (EBUSY); 1466 return (0); 1467 } 1468 1469 /* 1470 * Unmount all filesystems. The list is traversed in reverse order 1471 * of mounting to avoid dependencies. 1472 */ 1473 1474 static int vfs_umountall_callback(struct mount *mp, void *data); 1475 1476 void 1477 vfs_unmountall(void) 1478 { 1479 struct thread *td = curthread; 1480 int count; 1481 1482 if (td->td_proc == NULL) 1483 td = initproc->p_thread; /* XXX XXX use proc0 instead? */ 1484 1485 do { 1486 count = mountlist_scan(vfs_umountall_callback, 1487 &td, MNTSCAN_REVERSE|MNTSCAN_NOBUSY); 1488 } while (count); 1489 } 1490 1491 static 1492 int 1493 vfs_umountall_callback(struct mount *mp, void *data) 1494 { 1495 struct thread *td = *(struct thread **)data; 1496 int error; 1497 1498 error = dounmount(mp, MNT_FORCE, td); 1499 if (error) { 1500 mountlist_remove(mp); 1501 printf("unmount of filesystem mounted from %s failed (", 1502 mp->mnt_stat.f_mntfromname); 1503 if (error == EBUSY) 1504 printf("BUSY)\n"); 1505 else 1506 printf("%d)\n", error); 1507 } 1508 return(1); 1509 } 1510 1511 /* 1512 * Build hash lists of net addresses and hang them off the mount point. 1513 * Called by ufs_mount() to set up the lists of export addresses. 1514 */ 1515 static int 1516 vfs_hang_addrlist(struct mount *mp, struct netexport *nep, 1517 struct export_args *argp) 1518 { 1519 struct netcred *np; 1520 struct radix_node_head *rnh; 1521 int i; 1522 struct radix_node *rn; 1523 struct sockaddr *saddr, *smask = 0; 1524 struct domain *dom; 1525 int error; 1526 1527 if (argp->ex_addrlen == 0) { 1528 if (mp->mnt_flag & MNT_DEFEXPORTED) 1529 return (EPERM); 1530 np = &nep->ne_defexported; 1531 np->netc_exflags = argp->ex_flags; 1532 np->netc_anon = argp->ex_anon; 1533 np->netc_anon.cr_ref = 1; 1534 mp->mnt_flag |= MNT_DEFEXPORTED; 1535 return (0); 1536 } 1537 1538 if (argp->ex_addrlen < 0 || argp->ex_addrlen > MLEN) 1539 return (EINVAL); 1540 if (argp->ex_masklen < 0 || argp->ex_masklen > MLEN) 1541 return (EINVAL); 1542 1543 i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen; 1544 np = (struct netcred *) malloc(i, M_NETADDR, M_WAITOK); 1545 bzero((caddr_t) np, i); 1546 saddr = (struct sockaddr *) (np + 1); 1547 if ((error = copyin(argp->ex_addr, (caddr_t) saddr, argp->ex_addrlen))) 1548 goto out; 1549 if (saddr->sa_len > argp->ex_addrlen) 1550 saddr->sa_len = argp->ex_addrlen; 1551 if (argp->ex_masklen) { 1552 smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen); 1553 error = copyin(argp->ex_mask, (caddr_t)smask, argp->ex_masklen); 1554 if (error) 1555 goto out; 1556 if (smask->sa_len > argp->ex_masklen) 1557 smask->sa_len = argp->ex_masklen; 1558 } 1559 i = saddr->sa_family; 1560 if ((rnh = nep->ne_rtable[i]) == 0) { 1561 /* 1562 * Seems silly to initialize every AF when most are not used, 1563 * do so on demand here 1564 */ 1565 SLIST_FOREACH(dom, &domains, dom_next) 1566 if (dom->dom_family == i && dom->dom_rtattach) { 1567 dom->dom_rtattach((void **) &nep->ne_rtable[i], 1568 dom->dom_rtoffset); 1569 break; 1570 } 1571 if ((rnh = nep->ne_rtable[i]) == 0) { 1572 error = ENOBUFS; 1573 goto out; 1574 } 1575 } 1576 rn = (*rnh->rnh_addaddr) ((char *) saddr, (char *) smask, rnh, 1577 np->netc_rnodes); 1578 if (rn == 0 || np != (struct netcred *) rn) { /* already exists */ 1579 error = EPERM; 1580 goto out; 1581 } 1582 np->netc_exflags = argp->ex_flags; 1583 np->netc_anon = argp->ex_anon; 1584 np->netc_anon.cr_ref = 1; 1585 return (0); 1586 out: 1587 free(np, M_NETADDR); 1588 return (error); 1589 } 1590 1591 /* ARGSUSED */ 1592 static int 1593 vfs_free_netcred(struct radix_node *rn, void *w) 1594 { 1595 struct radix_node_head *rnh = (struct radix_node_head *) w; 1596 1597 (*rnh->rnh_deladdr) (rn->rn_key, rn->rn_mask, rnh); 1598 free((caddr_t) rn, M_NETADDR); 1599 return (0); 1600 } 1601 1602 /* 1603 * Free the net address hash lists that are hanging off the mount points. 1604 */ 1605 static void 1606 vfs_free_addrlist(struct netexport *nep) 1607 { 1608 int i; 1609 struct radix_node_head *rnh; 1610 1611 for (i = 0; i <= AF_MAX; i++) 1612 if ((rnh = nep->ne_rtable[i])) { 1613 (*rnh->rnh_walktree) (rnh, vfs_free_netcred, 1614 (caddr_t) rnh); 1615 free((caddr_t) rnh, M_RTABLE); 1616 nep->ne_rtable[i] = 0; 1617 } 1618 } 1619 1620 int 1621 vfs_export(struct mount *mp, struct netexport *nep, struct export_args *argp) 1622 { 1623 int error; 1624 1625 if (argp->ex_flags & MNT_DELEXPORT) { 1626 if (mp->mnt_flag & MNT_EXPUBLIC) { 1627 vfs_setpublicfs(NULL, NULL, NULL); 1628 mp->mnt_flag &= ~MNT_EXPUBLIC; 1629 } 1630 vfs_free_addrlist(nep); 1631 mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED); 1632 } 1633 if (argp->ex_flags & MNT_EXPORTED) { 1634 if (argp->ex_flags & MNT_EXPUBLIC) { 1635 if ((error = vfs_setpublicfs(mp, nep, argp)) != 0) 1636 return (error); 1637 mp->mnt_flag |= MNT_EXPUBLIC; 1638 } 1639 if ((error = vfs_hang_addrlist(mp, nep, argp))) 1640 return (error); 1641 mp->mnt_flag |= MNT_EXPORTED; 1642 } 1643 return (0); 1644 } 1645 1646 1647 /* 1648 * Set the publicly exported filesystem (WebNFS). Currently, only 1649 * one public filesystem is possible in the spec (RFC 2054 and 2055) 1650 */ 1651 int 1652 vfs_setpublicfs(struct mount *mp, struct netexport *nep, 1653 struct export_args *argp) 1654 { 1655 int error; 1656 struct vnode *rvp; 1657 char *cp; 1658 1659 /* 1660 * mp == NULL -> invalidate the current info, the FS is 1661 * no longer exported. May be called from either vfs_export 1662 * or unmount, so check if it hasn't already been done. 1663 */ 1664 if (mp == NULL) { 1665 if (nfs_pub.np_valid) { 1666 nfs_pub.np_valid = 0; 1667 if (nfs_pub.np_index != NULL) { 1668 FREE(nfs_pub.np_index, M_TEMP); 1669 nfs_pub.np_index = NULL; 1670 } 1671 } 1672 return (0); 1673 } 1674 1675 /* 1676 * Only one allowed at a time. 1677 */ 1678 if (nfs_pub.np_valid != 0 && mp != nfs_pub.np_mount) 1679 return (EBUSY); 1680 1681 /* 1682 * Get real filehandle for root of exported FS. 1683 */ 1684 bzero((caddr_t)&nfs_pub.np_handle, sizeof(nfs_pub.np_handle)); 1685 nfs_pub.np_handle.fh_fsid = mp->mnt_stat.f_fsid; 1686 1687 if ((error = VFS_ROOT(mp, &rvp))) 1688 return (error); 1689 1690 if ((error = VFS_VPTOFH(rvp, &nfs_pub.np_handle.fh_fid))) 1691 return (error); 1692 1693 vput(rvp); 1694 1695 /* 1696 * If an indexfile was specified, pull it in. 1697 */ 1698 if (argp->ex_indexfile != NULL) { 1699 int namelen; 1700 1701 error = vn_get_namelen(rvp, &namelen); 1702 if (error) 1703 return (error); 1704 MALLOC(nfs_pub.np_index, char *, namelen, M_TEMP, 1705 M_WAITOK); 1706 error = copyinstr(argp->ex_indexfile, nfs_pub.np_index, 1707 namelen, (size_t *)0); 1708 if (!error) { 1709 /* 1710 * Check for illegal filenames. 1711 */ 1712 for (cp = nfs_pub.np_index; *cp; cp++) { 1713 if (*cp == '/') { 1714 error = EINVAL; 1715 break; 1716 } 1717 } 1718 } 1719 if (error) { 1720 FREE(nfs_pub.np_index, M_TEMP); 1721 return (error); 1722 } 1723 } 1724 1725 nfs_pub.np_mount = mp; 1726 nfs_pub.np_valid = 1; 1727 return (0); 1728 } 1729 1730 struct netcred * 1731 vfs_export_lookup(struct mount *mp, struct netexport *nep, 1732 struct sockaddr *nam) 1733 { 1734 struct netcred *np; 1735 struct radix_node_head *rnh; 1736 struct sockaddr *saddr; 1737 1738 np = NULL; 1739 if (mp->mnt_flag & MNT_EXPORTED) { 1740 /* 1741 * Lookup in the export list first. 1742 */ 1743 if (nam != NULL) { 1744 saddr = nam; 1745 rnh = nep->ne_rtable[saddr->sa_family]; 1746 if (rnh != NULL) { 1747 np = (struct netcred *) 1748 (*rnh->rnh_matchaddr)((char *)saddr, 1749 rnh); 1750 if (np && np->netc_rnodes->rn_flags & RNF_ROOT) 1751 np = NULL; 1752 } 1753 } 1754 /* 1755 * If no address match, use the default if it exists. 1756 */ 1757 if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED) 1758 np = &nep->ne_defexported; 1759 } 1760 return (np); 1761 } 1762 1763 /* 1764 * perform msync on all vnodes under a mount point. The mount point must 1765 * be locked. This code is also responsible for lazy-freeing unreferenced 1766 * vnodes whos VM objects no longer contain pages. 1767 * 1768 * NOTE: MNT_WAIT still skips vnodes in the VXLOCK state. 1769 * 1770 * NOTE: XXX VOP_PUTPAGES and friends requires that the vnode be locked, 1771 * but vnode_pager_putpages() doesn't lock the vnode. We have to do it 1772 * way up in this high level function. 1773 */ 1774 static int vfs_msync_scan1(struct mount *mp, struct vnode *vp, void *data); 1775 static int vfs_msync_scan2(struct mount *mp, struct vnode *vp, void *data); 1776 1777 void 1778 vfs_msync(struct mount *mp, int flags) 1779 { 1780 int vmsc_flags; 1781 1782 vmsc_flags = VMSC_GETVP; 1783 if (flags != MNT_WAIT) 1784 vmsc_flags |= VMSC_NOWAIT; 1785 vmntvnodescan(mp, vmsc_flags, vfs_msync_scan1, vfs_msync_scan2, 1786 (void *)flags); 1787 } 1788 1789 /* 1790 * scan1 is a fast pre-check. There could be hundreds of thousands of 1791 * vnodes, we cannot afford to do anything heavy weight until we have a 1792 * fairly good indication that there is work to do. 1793 */ 1794 static 1795 int 1796 vfs_msync_scan1(struct mount *mp, struct vnode *vp, void *data) 1797 { 1798 int flags = (int)data; 1799 1800 if ((vp->v_flag & VRECLAIMED) == 0) { 1801 if (vshouldfree(vp, 0)) 1802 return(0); /* call scan2 */ 1803 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 1804 (vp->v_flag & VOBJDIRTY) && 1805 (flags == MNT_WAIT || VOP_ISLOCKED(vp, NULL) == 0)) { 1806 return(0); /* call scan2 */ 1807 } 1808 } 1809 1810 /* 1811 * do not call scan2, continue the loop 1812 */ 1813 return(-1); 1814 } 1815 1816 /* 1817 * This callback is handed a locked vnode. 1818 */ 1819 static 1820 int 1821 vfs_msync_scan2(struct mount *mp, struct vnode *vp, void *data) 1822 { 1823 vm_object_t obj; 1824 int flags = (int)data; 1825 1826 if (vp->v_flag & VRECLAIMED) 1827 return(0); 1828 1829 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 1830 (vp->v_flag & VOBJDIRTY)) { 1831 if (VOP_GETVOBJECT(vp, &obj) == 0) { 1832 vm_object_page_clean(obj, 0, 0, 1833 flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); 1834 } 1835 } 1836 return(0); 1837 } 1838 1839 /* 1840 * Create the VM object needed for VMIO and mmap support. This 1841 * is done for all VREG files in the system. Some filesystems might 1842 * afford the additional metadata buffering capability of the 1843 * VMIO code by making the device node be VMIO mode also. 1844 * 1845 * vp must be locked when vfs_object_create is called. 1846 */ 1847 int 1848 vfs_object_create(struct vnode *vp, struct thread *td) 1849 { 1850 return (VOP_CREATEVOBJECT(vp, td)); 1851 } 1852 1853 /* 1854 * Record a process's interest in events which might happen to 1855 * a vnode. Because poll uses the historic select-style interface 1856 * internally, this routine serves as both the ``check for any 1857 * pending events'' and the ``record my interest in future events'' 1858 * functions. (These are done together, while the lock is held, 1859 * to avoid race conditions.) 1860 */ 1861 int 1862 vn_pollrecord(struct vnode *vp, struct thread *td, int events) 1863 { 1864 lwkt_tokref ilock; 1865 1866 lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token); 1867 if (vp->v_pollinfo.vpi_revents & events) { 1868 /* 1869 * This leaves events we are not interested 1870 * in available for the other process which 1871 * which presumably had requested them 1872 * (otherwise they would never have been 1873 * recorded). 1874 */ 1875 events &= vp->v_pollinfo.vpi_revents; 1876 vp->v_pollinfo.vpi_revents &= ~events; 1877 1878 lwkt_reltoken(&ilock); 1879 return events; 1880 } 1881 vp->v_pollinfo.vpi_events |= events; 1882 selrecord(td, &vp->v_pollinfo.vpi_selinfo); 1883 lwkt_reltoken(&ilock); 1884 return 0; 1885 } 1886 1887 /* 1888 * Note the occurrence of an event. If the VN_POLLEVENT macro is used, 1889 * it is possible for us to miss an event due to race conditions, but 1890 * that condition is expected to be rare, so for the moment it is the 1891 * preferred interface. 1892 */ 1893 void 1894 vn_pollevent(struct vnode *vp, int events) 1895 { 1896 lwkt_tokref ilock; 1897 1898 lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token); 1899 if (vp->v_pollinfo.vpi_events & events) { 1900 /* 1901 * We clear vpi_events so that we don't 1902 * call selwakeup() twice if two events are 1903 * posted before the polling process(es) is 1904 * awakened. This also ensures that we take at 1905 * most one selwakeup() if the polling process 1906 * is no longer interested. However, it does 1907 * mean that only one event can be noticed at 1908 * a time. (Perhaps we should only clear those 1909 * event bits which we note?) XXX 1910 */ 1911 vp->v_pollinfo.vpi_events = 0; /* &= ~events ??? */ 1912 vp->v_pollinfo.vpi_revents |= events; 1913 selwakeup(&vp->v_pollinfo.vpi_selinfo); 1914 } 1915 lwkt_reltoken(&ilock); 1916 } 1917 1918 /* 1919 * Wake up anyone polling on vp because it is being revoked. 1920 * This depends on dead_poll() returning POLLHUP for correct 1921 * behavior. 1922 */ 1923 void 1924 vn_pollgone(struct vnode *vp) 1925 { 1926 lwkt_tokref ilock; 1927 1928 lwkt_gettoken(&ilock, &vp->v_pollinfo.vpi_token); 1929 if (vp->v_pollinfo.vpi_events) { 1930 vp->v_pollinfo.vpi_events = 0; 1931 selwakeup(&vp->v_pollinfo.vpi_selinfo); 1932 } 1933 lwkt_reltoken(&ilock); 1934 } 1935 1936 /* 1937 * extract the dev_t from a VBLK or VCHR. The vnode must have been opened 1938 * (or v_rdev might be NULL). 1939 */ 1940 dev_t 1941 vn_todev(struct vnode *vp) 1942 { 1943 if (vp->v_type != VBLK && vp->v_type != VCHR) 1944 return (NODEV); 1945 KKASSERT(vp->v_rdev != NULL); 1946 return (vp->v_rdev); 1947 } 1948 1949 /* 1950 * Check if vnode represents a disk device. The vnode does not need to be 1951 * opened. 1952 */ 1953 int 1954 vn_isdisk(struct vnode *vp, int *errp) 1955 { 1956 dev_t dev; 1957 1958 if (vp->v_type != VBLK && vp->v_type != VCHR) { 1959 if (errp != NULL) 1960 *errp = ENOTBLK; 1961 return (0); 1962 } 1963 1964 if ((dev = vp->v_rdev) == NULL) 1965 dev = udev2dev(vp->v_udev, (vp->v_type == VBLK)); 1966 if (dev == NULL || dev == NODEV) { 1967 if (errp != NULL) 1968 *errp = ENXIO; 1969 return (0); 1970 } 1971 if (dev_is_good(dev) == 0) { 1972 if (errp != NULL) 1973 *errp = ENXIO; 1974 return (0); 1975 } 1976 if ((dev_dflags(dev) & D_DISK) == 0) { 1977 if (errp != NULL) 1978 *errp = ENOTBLK; 1979 return (0); 1980 } 1981 if (errp != NULL) 1982 *errp = 0; 1983 return (1); 1984 } 1985 1986 #ifdef DEBUG_VFS_LOCKS 1987 1988 void 1989 assert_vop_locked(struct vnode *vp, const char *str) 1990 { 1991 if (vp && IS_LOCKING_VFS(vp) && !VOP_ISLOCKED(vp, NULL)) { 1992 panic("%s: %p is not locked shared but should be", str, vp); 1993 } 1994 } 1995 1996 void 1997 assert_vop_unlocked(struct vnode *vp, const char *str) 1998 { 1999 if (vp && IS_LOCKING_VFS(vp)) { 2000 if (VOP_ISLOCKED(vp, curthread) == LK_EXCLUSIVE) { 2001 panic("%s: %p is locked but should not be", str, vp); 2002 } 2003 } 2004 } 2005 2006 #endif 2007 2008 int 2009 vn_get_namelen(struct vnode *vp, int *namelen) 2010 { 2011 int error, retval[2]; 2012 2013 error = VOP_PATHCONF(vp, _PC_NAME_MAX, retval); 2014 if (error) 2015 return (error); 2016 *namelen = *retval; 2017 return (0); 2018 } 2019 2020 int 2021 vop_write_dirent(int *error, struct uio *uio, ino_t d_ino, uint8_t d_type, 2022 uint16_t d_namlen, const char *d_name) 2023 { 2024 struct dirent *dp; 2025 size_t len; 2026 2027 len = _DIRENT_RECLEN(d_namlen); 2028 if (len > uio->uio_resid) 2029 return(1); 2030 2031 dp = malloc(len, M_TEMP, M_WAITOK | M_ZERO); 2032 2033 dp->d_ino = d_ino; 2034 dp->d_namlen = d_namlen; 2035 dp->d_type = d_type; 2036 bcopy(d_name, dp->d_name, d_namlen); 2037 2038 *error = uiomove((caddr_t)dp, len, uio); 2039 2040 free(dp, M_TEMP); 2041 2042 return(0); 2043 } 2044