1 /*- 2 * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. 3 * All rights reserved. 4 * 5 * This code is derived from software contributed to The NetBSD Foundation 6 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 7 * 2005 program. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $ 31 */ 32 33 /* 34 * tmpfs vnode interface. 35 */ 36 37 #include <sys/kernel.h> 38 #include <sys/kern_syscall.h> 39 #include <sys/param.h> 40 #include <sys/uio.h> 41 #include <sys/fcntl.h> 42 #include <sys/lockf.h> 43 #include <sys/priv.h> 44 #include <sys/proc.h> 45 #include <sys/resourcevar.h> 46 #include <sys/sched.h> 47 #include <sys/stat.h> 48 #include <sys/systm.h> 49 #include <sys/sysctl.h> 50 #include <sys/unistd.h> 51 #include <sys/vfsops.h> 52 #include <sys/vnode.h> 53 #include <sys/mountctl.h> 54 55 #include <vm/vm.h> 56 #include <vm/vm_extern.h> 57 #include <vm/vm_object.h> 58 #include <vm/vm_page.h> 59 #include <vm/vm_pageout.h> 60 #include <vm/vm_pager.h> 61 #include <vm/swap_pager.h> 62 63 #include <sys/buf2.h> 64 #include <vm/vm_page2.h> 65 66 #include <vfs/fifofs/fifo.h> 67 #include <vfs/tmpfs/tmpfs_vnops.h> 68 #include "tmpfs.h" 69 70 static void tmpfs_strategy_done(struct bio *bio); 71 static void tmpfs_move_pages(vm_object_t src, vm_object_t dst, int movflags); 72 73 /* 74 * bufcache_mode: 75 * 0 Normal page queue operation on flush. Try to keep in memory. 76 * 1 Try to cache on flush to swap (default). 77 * 2 Always page to swap (not recommended). 78 */ 79 __read_mostly static int tmpfs_cluster_rd_enable = 1; 80 __read_mostly static int tmpfs_cluster_wr_enable = 1; 81 __read_mostly int tmpfs_bufcache_mode = 1; 82 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "TMPFS filesystem"); 83 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, cluster_rd_enable, CTLFLAG_RW, 84 &tmpfs_cluster_rd_enable, 0, ""); 85 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, cluster_wr_enable, CTLFLAG_RW, 86 &tmpfs_cluster_wr_enable, 0, ""); 87 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, bufcache_mode, CTLFLAG_RW, 88 &tmpfs_bufcache_mode, 0, ""); 89 90 #define TMPFS_MOVF_FROMBACKING 0x0001 91 #define TMPFS_MOVF_DEACTIVATE 0x0002 92 93 94 static __inline 95 void 96 tmpfs_knote(struct vnode *vp, int flags) 97 { 98 if (flags) 99 KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags); 100 } 101 102 103 /* --------------------------------------------------------------------- */ 104 105 static int 106 tmpfs_nresolve(struct vop_nresolve_args *ap) 107 { 108 struct vnode *dvp = ap->a_dvp; 109 struct vnode *vp = NULL; 110 struct namecache *ncp = ap->a_nch->ncp; 111 struct tmpfs_node *tnode; 112 struct tmpfs_dirent *de; 113 struct tmpfs_node *dnode; 114 int error; 115 116 dnode = VP_TO_TMPFS_DIR(dvp); 117 118 TMPFS_NODE_LOCK_SH(dnode); 119 loop: 120 de = tmpfs_dir_lookup(dnode, NULL, ncp); 121 if (de == NULL) { 122 error = ENOENT; 123 } else { 124 /* 125 * Allocate a vnode for the node we found. Use 126 * tmpfs_alloc_vp()'s deadlock handling mode. 127 */ 128 tnode = de->td_node; 129 error = tmpfs_alloc_vp(dvp->v_mount, dnode, tnode, 130 LK_EXCLUSIVE | LK_RETRY, &vp); 131 if (error == EAGAIN) 132 goto loop; 133 if (error) 134 goto out; 135 KKASSERT(vp); 136 } 137 138 out: 139 TMPFS_NODE_UNLOCK(dnode); 140 141 if ((dnode->tn_status & TMPFS_NODE_ACCESSED) == 0) { 142 TMPFS_NODE_LOCK(dnode); 143 dnode->tn_status |= TMPFS_NODE_ACCESSED; 144 TMPFS_NODE_UNLOCK(dnode); 145 } 146 147 /* 148 * Store the result of this lookup in the cache. Avoid this if the 149 * request was for creation, as it does not improve timings on 150 * emprical tests. 151 */ 152 if (vp) { 153 vn_unlock(vp); 154 cache_setvp(ap->a_nch, vp); 155 vrele(vp); 156 } else if (error == ENOENT) { 157 cache_setvp(ap->a_nch, NULL); 158 } 159 return (error); 160 } 161 162 static int 163 tmpfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap) 164 { 165 struct vnode *dvp = ap->a_dvp; 166 struct vnode **vpp = ap->a_vpp; 167 struct tmpfs_node *dnode = VP_TO_TMPFS_NODE(dvp); 168 struct ucred *cred = ap->a_cred; 169 int error; 170 171 *vpp = NULL; 172 173 /* Check accessibility of requested node as a first step. */ 174 error = VOP_ACCESS(dvp, VEXEC, cred); 175 if (error != 0) 176 return error; 177 178 if (dnode->tn_dir.tn_parent != NULL) { 179 /* Allocate a new vnode on the matching entry. */ 180 error = tmpfs_alloc_vp(dvp->v_mount, 181 NULL, dnode->tn_dir.tn_parent, 182 LK_EXCLUSIVE | LK_RETRY, vpp); 183 184 if (*vpp) 185 vn_unlock(*vpp); 186 } 187 return (*vpp == NULL) ? ENOENT : 0; 188 } 189 190 /* --------------------------------------------------------------------- */ 191 192 static int 193 tmpfs_ncreate(struct vop_ncreate_args *ap) 194 { 195 struct vnode *dvp = ap->a_dvp; 196 struct vnode **vpp = ap->a_vpp; 197 struct namecache *ncp = ap->a_nch->ncp; 198 struct vattr *vap = ap->a_vap; 199 struct ucred *cred = ap->a_cred; 200 int error; 201 202 KKASSERT(vap->va_type == VREG || vap->va_type == VSOCK); 203 204 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL); 205 if (error == 0) { 206 cache_setunresolved(ap->a_nch); 207 cache_setvp(ap->a_nch, *vpp); 208 tmpfs_knote(dvp, NOTE_WRITE); 209 } 210 return (error); 211 } 212 /* --------------------------------------------------------------------- */ 213 214 static int 215 tmpfs_nmknod(struct vop_nmknod_args *ap) 216 { 217 struct vnode *dvp = ap->a_dvp; 218 struct vnode **vpp = ap->a_vpp; 219 struct namecache *ncp = ap->a_nch->ncp; 220 struct vattr *vap = ap->a_vap; 221 struct ucred *cred = ap->a_cred; 222 int error; 223 224 if (vap->va_type != VBLK && vap->va_type != VCHR && 225 vap->va_type != VFIFO) { 226 return (EINVAL); 227 } 228 229 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL); 230 if (error == 0) { 231 cache_setunresolved(ap->a_nch); 232 cache_setvp(ap->a_nch, *vpp); 233 tmpfs_knote(dvp, NOTE_WRITE); 234 } 235 return error; 236 } 237 238 /* --------------------------------------------------------------------- */ 239 240 static int 241 tmpfs_open(struct vop_open_args *ap) 242 { 243 struct vnode *vp = ap->a_vp; 244 int mode = ap->a_mode; 245 struct tmpfs_node *node; 246 int error; 247 248 node = VP_TO_TMPFS_NODE(vp); 249 250 #if 0 251 /* The file is still active but all its names have been removed 252 * (e.g. by a "rmdir $(pwd)"). It cannot be opened any more as 253 * it is about to die. */ 254 if (node->tn_links < 1) 255 return (ENOENT); 256 #endif 257 258 /* If the file is marked append-only, deny write requests. */ 259 if ((node->tn_flags & APPEND) && 260 (mode & (FWRITE | O_APPEND)) == FWRITE) { 261 error = EPERM; 262 } else { 263 if (node->tn_reg.tn_pages_in_aobj) { 264 TMPFS_NODE_LOCK(node); 265 if (node->tn_reg.tn_pages_in_aobj) { 266 tmpfs_move_pages(node->tn_reg.tn_aobj, 267 vp->v_object, 268 TMPFS_MOVF_FROMBACKING); 269 node->tn_reg.tn_pages_in_aobj = 0; 270 } 271 TMPFS_NODE_UNLOCK(node); 272 } 273 error = vop_stdopen(ap); 274 } 275 276 return (error); 277 } 278 279 /* --------------------------------------------------------------------- */ 280 281 static int 282 tmpfs_close(struct vop_close_args *ap) 283 { 284 struct vnode *vp = ap->a_vp; 285 struct tmpfs_node *node; 286 int error; 287 288 node = VP_TO_TMPFS_NODE(vp); 289 290 if (node->tn_links > 0) { 291 /* 292 * Update node times. No need to do it if the node has 293 * been deleted, because it will vanish after we return. 294 */ 295 tmpfs_update(vp); 296 } 297 298 error = vop_stdclose(ap); 299 300 return (error); 301 } 302 303 /* --------------------------------------------------------------------- */ 304 305 int 306 tmpfs_access(struct vop_access_args *ap) 307 { 308 struct vnode *vp = ap->a_vp; 309 int error; 310 struct tmpfs_node *node; 311 312 node = VP_TO_TMPFS_NODE(vp); 313 314 switch (vp->v_type) { 315 case VDIR: 316 /* FALLTHROUGH */ 317 case VLNK: 318 /* FALLTHROUGH */ 319 case VREG: 320 if ((ap->a_mode & VWRITE) && 321 (vp->v_mount->mnt_flag & MNT_RDONLY)) { 322 error = EROFS; 323 goto out; 324 } 325 break; 326 327 case VBLK: 328 /* FALLTHROUGH */ 329 case VCHR: 330 /* FALLTHROUGH */ 331 case VSOCK: 332 /* FALLTHROUGH */ 333 case VFIFO: 334 break; 335 336 default: 337 error = EINVAL; 338 goto out; 339 } 340 341 if ((ap->a_mode & VWRITE) && (node->tn_flags & IMMUTABLE)) { 342 error = EPERM; 343 goto out; 344 } 345 346 error = vop_helper_access(ap, node->tn_uid, node->tn_gid, 347 node->tn_mode, 0); 348 out: 349 return error; 350 } 351 352 /* --------------------------------------------------------------------- */ 353 354 int 355 tmpfs_getattr(struct vop_getattr_args *ap) 356 { 357 struct vnode *vp = ap->a_vp; 358 struct vattr *vap = ap->a_vap; 359 struct tmpfs_node *node; 360 361 node = VP_TO_TMPFS_NODE(vp); 362 363 tmpfs_update(vp); 364 365 vap->va_type = vp->v_type; 366 vap->va_mode = node->tn_mode; 367 vap->va_nlink = node->tn_links; 368 vap->va_uid = node->tn_uid; 369 vap->va_gid = node->tn_gid; 370 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 371 vap->va_fileid = node->tn_id; 372 vap->va_size = node->tn_size; 373 vap->va_blocksize = PAGE_SIZE; 374 vap->va_atime.tv_sec = node->tn_atime; 375 vap->va_atime.tv_nsec = node->tn_atimensec; 376 vap->va_mtime.tv_sec = node->tn_mtime; 377 vap->va_mtime.tv_nsec = node->tn_mtimensec; 378 vap->va_ctime.tv_sec = node->tn_ctime; 379 vap->va_ctime.tv_nsec = node->tn_ctimensec; 380 vap->va_gen = node->tn_gen; 381 vap->va_flags = node->tn_flags; 382 if (vp->v_type == VBLK || vp->v_type == VCHR) { 383 vap->va_rmajor = umajor(node->tn_rdev); 384 vap->va_rminor = uminor(node->tn_rdev); 385 } 386 vap->va_bytes = round_page(node->tn_size); 387 vap->va_filerev = 0; 388 389 return 0; 390 } 391 392 /* --------------------------------------------------------------------- */ 393 394 int 395 tmpfs_getattr_quick(struct vop_getattr_args *ap) 396 { 397 struct vnode *vp = ap->a_vp; 398 struct vattr *vap = ap->a_vap; 399 struct tmpfs_node *node; 400 401 node = VP_TO_TMPFS_NODE(vp); 402 403 tmpfs_update(vp); 404 405 vap->va_type = vp->v_type; 406 vap->va_mode = node->tn_mode; 407 vap->va_nlink = node->tn_links; 408 vap->va_uid = node->tn_uid; 409 vap->va_gid = node->tn_gid; 410 vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; 411 vap->va_fileid = node->tn_id; 412 vap->va_size = node->tn_size; 413 vap->va_blocksize = PAGE_SIZE; 414 vap->va_gen = node->tn_gen; 415 vap->va_flags = node->tn_flags; 416 if (vp->v_type == VBLK || vp->v_type == VCHR) { 417 vap->va_rmajor = umajor(node->tn_rdev); 418 vap->va_rminor = uminor(node->tn_rdev); 419 } 420 vap->va_bytes = -1; 421 vap->va_filerev = 0; 422 423 return 0; 424 } 425 426 427 /* --------------------------------------------------------------------- */ 428 429 int 430 tmpfs_setattr(struct vop_setattr_args *ap) 431 { 432 struct vnode *vp = ap->a_vp; 433 struct vattr *vap = ap->a_vap; 434 struct ucred *cred = ap->a_cred; 435 struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp); 436 int error = 0; 437 int kflags = 0; 438 439 TMPFS_NODE_LOCK(node); 440 if (error == 0 && (vap->va_flags != VNOVAL)) { 441 error = tmpfs_chflags(vp, vap->va_flags, cred); 442 kflags |= NOTE_ATTRIB; 443 } 444 445 if (error == 0 && (vap->va_size != VNOVAL)) { 446 /* restore any saved pages before proceeding */ 447 if (node->tn_reg.tn_pages_in_aobj) { 448 tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object, 449 TMPFS_MOVF_FROMBACKING | 450 TMPFS_MOVF_DEACTIVATE); 451 node->tn_reg.tn_pages_in_aobj = 0; 452 } 453 if (vap->va_size > node->tn_size) 454 kflags |= NOTE_WRITE | NOTE_EXTEND; 455 else 456 kflags |= NOTE_WRITE; 457 error = tmpfs_chsize(vp, vap->va_size, cred); 458 } 459 460 if (error == 0 && (vap->va_uid != (uid_t)VNOVAL || 461 vap->va_gid != (gid_t)VNOVAL)) { 462 error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred); 463 kflags |= NOTE_ATTRIB; 464 } 465 466 if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) { 467 error = tmpfs_chmod(vp, vap->va_mode, cred); 468 kflags |= NOTE_ATTRIB; 469 } 470 471 if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL && 472 vap->va_atime.tv_nsec != VNOVAL) || 473 (vap->va_mtime.tv_sec != VNOVAL && 474 vap->va_mtime.tv_nsec != VNOVAL) )) { 475 error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime, 476 vap->va_vaflags, cred); 477 kflags |= NOTE_ATTRIB; 478 } 479 480 /* 481 * Update the node times. We give preference to the error codes 482 * generated by this function rather than the ones that may arise 483 * from tmpfs_update. 484 */ 485 tmpfs_update(vp); 486 TMPFS_NODE_UNLOCK(node); 487 tmpfs_knote(vp, kflags); 488 489 return (error); 490 } 491 492 /* --------------------------------------------------------------------- */ 493 494 /* 495 * fsync is usually a NOP, but we must take action when unmounting or 496 * when recycling. 497 */ 498 static int 499 tmpfs_fsync(struct vop_fsync_args *ap) 500 { 501 struct tmpfs_node *node; 502 struct vnode *vp = ap->a_vp; 503 504 node = VP_TO_TMPFS_NODE(vp); 505 506 /* 507 * tmpfs vnodes typically remain dirty, avoid long syncer scans 508 * by forcing removal from the syncer list. 509 */ 510 vn_syncer_remove(vp, 1); 511 512 tmpfs_update(vp); 513 if (vp->v_type == VREG) { 514 if (vp->v_flag & VRECLAIMED) { 515 if (node->tn_links == 0) 516 tmpfs_truncate(vp, 0); 517 else 518 vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL); 519 } 520 } 521 522 return 0; 523 } 524 525 /* --------------------------------------------------------------------- */ 526 527 static int 528 tmpfs_read(struct vop_read_args *ap) 529 { 530 struct buf *bp; 531 struct vnode *vp = ap->a_vp; 532 struct uio *uio = ap->a_uio; 533 struct tmpfs_node *node; 534 off_t base_offset; 535 size_t offset; 536 size_t len; 537 size_t resid; 538 int error; 539 int seqcount; 540 541 /* 542 * Check the basics 543 */ 544 if (uio->uio_offset < 0) 545 return (EINVAL); 546 if (vp->v_type != VREG) 547 return (EINVAL); 548 549 /* 550 * Extract node, try to shortcut the operation through 551 * the VM page cache, allowing us to avoid buffer cache 552 * overheads. 553 */ 554 node = VP_TO_TMPFS_NODE(vp); 555 resid = uio->uio_resid; 556 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 557 error = vop_helper_read_shortcut(ap); 558 if (error) 559 return error; 560 if (uio->uio_resid == 0) { 561 if (resid) 562 goto finished; 563 return error; 564 } 565 566 /* 567 * restore any saved pages before proceeding 568 */ 569 if (node->tn_reg.tn_pages_in_aobj) { 570 TMPFS_NODE_LOCK(node); 571 if (node->tn_reg.tn_pages_in_aobj) { 572 tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object, 573 TMPFS_MOVF_FROMBACKING); 574 node->tn_reg.tn_pages_in_aobj = 0; 575 } 576 TMPFS_NODE_UNLOCK(node); 577 } 578 579 /* 580 * Fall-through to our normal read code. 581 */ 582 while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) { 583 /* 584 * Use buffer cache I/O (via tmpfs_strategy) 585 */ 586 offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64; 587 base_offset = (off_t)uio->uio_offset - offset; 588 bp = getcacheblk(vp, base_offset, 589 node->tn_blksize, GETBLK_KVABIO); 590 if (bp == NULL) { 591 if (tmpfs_cluster_rd_enable) { 592 error = cluster_readx(vp, node->tn_size, 593 base_offset, 594 node->tn_blksize, 595 B_NOTMETA | B_KVABIO, 596 uio->uio_resid, 597 seqcount * MAXBSIZE, 598 &bp); 599 } else { 600 error = bread_kvabio(vp, base_offset, 601 node->tn_blksize, &bp); 602 } 603 if (error) { 604 brelse(bp); 605 kprintf("tmpfs_read bread error %d\n", error); 606 break; 607 } 608 609 /* 610 * tmpfs pretty much fiddles directly with the VM 611 * system, don't let it exhaust it or we won't play 612 * nice with other processes. 613 * 614 * Only do this if the VOP is coming from a normal 615 * read/write. The VM system handles the case for 616 * UIO_NOCOPY. 617 */ 618 if (uio->uio_segflg != UIO_NOCOPY) 619 vm_wait_nominal(); 620 } 621 bp->b_flags |= B_CLUSTEROK; 622 bkvasync(bp); 623 624 /* 625 * Figure out how many bytes we can actually copy this loop. 626 */ 627 len = node->tn_blksize - offset; 628 if (len > uio->uio_resid) 629 len = uio->uio_resid; 630 if (len > node->tn_size - uio->uio_offset) 631 len = (size_t)(node->tn_size - uio->uio_offset); 632 633 error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio); 634 bqrelse(bp); 635 if (error) { 636 kprintf("tmpfs_read uiomove error %d\n", error); 637 break; 638 } 639 } 640 641 finished: 642 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) { 643 TMPFS_NODE_LOCK(node); 644 node->tn_status |= TMPFS_NODE_ACCESSED; 645 TMPFS_NODE_UNLOCK(node); 646 } 647 return (error); 648 } 649 650 static int 651 tmpfs_write(struct vop_write_args *ap) 652 { 653 struct buf *bp; 654 struct vnode *vp = ap->a_vp; 655 struct uio *uio = ap->a_uio; 656 struct thread *td = uio->uio_td; 657 struct tmpfs_node *node; 658 boolean_t extended; 659 off_t oldsize; 660 int error; 661 off_t base_offset; 662 size_t offset; 663 size_t len; 664 struct rlimit limit; 665 int trivial = 0; 666 int kflags = 0; 667 int seqcount; 668 669 error = 0; 670 if (uio->uio_resid == 0) { 671 return error; 672 } 673 674 node = VP_TO_TMPFS_NODE(vp); 675 676 if (vp->v_type != VREG) 677 return (EINVAL); 678 seqcount = ap->a_ioflag >> IO_SEQSHIFT; 679 680 TMPFS_NODE_LOCK(node); 681 682 /* 683 * restore any saved pages before proceeding 684 */ 685 if (node->tn_reg.tn_pages_in_aobj) { 686 tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object, 687 TMPFS_MOVF_FROMBACKING); 688 node->tn_reg.tn_pages_in_aobj = 0; 689 } 690 691 oldsize = node->tn_size; 692 if (ap->a_ioflag & IO_APPEND) 693 uio->uio_offset = node->tn_size; 694 695 /* 696 * Check for illegal write offsets. 697 */ 698 if (uio->uio_offset + uio->uio_resid > 699 VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) { 700 error = EFBIG; 701 goto done; 702 } 703 704 /* 705 * NOTE: Ignore if UIO does not come from a user thread (e.g. VN). 706 */ 707 if (vp->v_type == VREG && td != NULL && td->td_lwp != NULL) { 708 error = kern_getrlimit(RLIMIT_FSIZE, &limit); 709 if (error) 710 goto done; 711 if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) { 712 ksignal(td->td_proc, SIGXFSZ); 713 error = EFBIG; 714 goto done; 715 } 716 } 717 718 /* 719 * Extend the file's size if necessary 720 */ 721 extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size); 722 723 while (uio->uio_resid > 0) { 724 /* 725 * Don't completely blow out running buffer I/O 726 * when being hit from the pageout daemon. 727 */ 728 if (uio->uio_segflg == UIO_NOCOPY && 729 (ap->a_ioflag & IO_RECURSE) == 0) { 730 bwillwrite(node->tn_blksize); 731 } 732 733 /* 734 * Use buffer cache I/O (via tmpfs_strategy) 735 * 736 * Calculate the maximum bytes we can write to the buffer at 737 * this offset (after resizing). 738 */ 739 offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64; 740 base_offset = (off_t)uio->uio_offset - offset; 741 len = uio->uio_resid; 742 if (len > TMPFS_BLKSIZE - offset) 743 len = TMPFS_BLKSIZE - offset; 744 745 if ((uio->uio_offset + len) > node->tn_size) { 746 trivial = (uio->uio_offset <= node->tn_size); 747 error = tmpfs_reg_resize(vp, uio->uio_offset + len, 748 trivial); 749 if (error) 750 break; 751 } 752 753 /* 754 * Read to fill in any gaps. Theoretically we could 755 * optimize this if the write covers the entire buffer 756 * and is not a UIO_NOCOPY write, however this can lead 757 * to a security violation exposing random kernel memory 758 * (whatever junk was in the backing VM pages before). 759 * 760 * So just use bread() to do the right thing. 761 */ 762 error = bread_kvabio(vp, base_offset, node->tn_blksize, &bp); 763 bkvasync(bp); 764 error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio); 765 if (error) { 766 kprintf("tmpfs_write uiomove error %d\n", error); 767 brelse(bp); 768 break; 769 } 770 771 if (uio->uio_offset > node->tn_size) { 772 node->tn_size = uio->uio_offset; 773 kflags |= NOTE_EXTEND; 774 } 775 kflags |= NOTE_WRITE; 776 777 /* 778 * UIO_NOCOPY is a sensitive state due to potentially being 779 * issued from the pageout daemon while in a low-memory 780 * situation. However, in order to cluster the I/O nicely 781 * (e.g. 64KB+ writes instead of 16KB writes), we still try 782 * to follow the same semantics that any other filesystem 783 * might use. 784 * 785 * For the normal case we buwrite(), dirtying the underlying 786 * VM pages instead of dirtying the buffer and releasing the 787 * buffer as a clean buffer. This allows tmpfs to use 788 * essentially all available memory to cache file data. 789 * If we used bdwrite() the buffer cache would wind up 790 * flushing the data to swap too quickly. 791 * 792 * But because tmpfs can seriously load the VM system we 793 * fall-back to using bdwrite() when free memory starts 794 * to get low. This shifts the load away from the VM system 795 * and makes tmpfs act more like a normal filesystem with 796 * regards to disk activity. 797 * 798 * tmpfs pretty much fiddles directly with the VM 799 * system, don't let it exhaust it or we won't play 800 * nice with other processes. Only do this if the 801 * VOP is coming from a normal read/write. The VM system 802 * handles the case for UIO_NOCOPY. 803 */ 804 bp->b_flags |= B_CLUSTEROK; 805 if (uio->uio_segflg == UIO_NOCOPY) { 806 /* 807 * Flush from the pageout daemon, deal with 808 * potentially very heavy tmpfs write activity 809 * causing long stalls in the pageout daemon 810 * before pages get to free/cache. 811 * 812 * (a) Under severe pressure setting B_DIRECT will 813 * cause a buffer release to try to free the 814 * underlying pages. 815 * 816 * (b) Under modest memory pressure the B_RELBUF 817 * alone is sufficient to get the pages moved 818 * to the cache. We could also force this by 819 * setting B_NOTMETA but that might have other 820 * unintended side-effects (e.g. setting 821 * PG_NOTMETA on the VM page). 822 * 823 * (c) For the pageout->putpages->generic_putpages-> 824 * UIO_NOCOPY-write (here), issuing an immediate 825 * write prevents any real clustering from 826 * happening because the buffers probably aren't 827 * (yet) marked dirty, or lost due to prior use 828 * of buwrite(). Try to use the normal 829 * cluster_write() mechanism for performance. 830 * 831 * Hopefully this will unblock the VM system more 832 * quickly under extreme tmpfs write load. 833 */ 834 if (vm_page_count_min(vm_page_free_hysteresis)) 835 bp->b_flags |= B_DIRECT; 836 bp->b_flags |= B_AGE | B_RELBUF | B_TTC; 837 bp->b_act_count = 0; /* buffer->deactivate pgs */ 838 if (tmpfs_cluster_wr_enable && 839 (ap->a_ioflag & (IO_SYNC | IO_DIRECT)) == 0) { 840 cluster_write(bp, node->tn_size, 841 node->tn_blksize, seqcount); 842 } else { 843 cluster_awrite(bp); 844 } 845 } else if (vm_pages_needed || vm_paging_needed(0) || 846 tmpfs_bufcache_mode >= 2) { 847 /* 848 * If the pageout daemon is running we cycle the 849 * write through the buffer cache normally to 850 * pipeline the flush, thus avoiding adding any 851 * more memory pressure to the pageout daemon. 852 */ 853 bp->b_act_count = 0; /* buffer->deactivate pgs */ 854 if (tmpfs_cluster_wr_enable) { 855 cluster_write(bp, node->tn_size, 856 node->tn_blksize, seqcount); 857 } else { 858 bdwrite(bp); 859 } 860 } else { 861 /* 862 * Otherwise run the buffer directly through to the 863 * backing VM store, leaving the buffer clean so 864 * buffer limits do not force early flushes to swap. 865 */ 866 buwrite(bp); 867 /*vm_wait_nominal();*/ 868 } 869 870 if (bp->b_error) { 871 kprintf("tmpfs_write bwrite error %d\n", bp->b_error); 872 break; 873 } 874 } 875 876 if (error) { 877 if (extended) { 878 (void)tmpfs_reg_resize(vp, oldsize, trivial); 879 kflags &= ~NOTE_EXTEND; 880 } 881 goto done; 882 } 883 884 /* 885 * Currently we don't set the mtime on files modified via mmap() 886 * because we can't tell the difference between those modifications 887 * and an attempt by the pageout daemon to flush tmpfs pages to 888 * swap. 889 * 890 * This is because in order to defer flushes as long as possible 891 * buwrite() works by marking the underlying VM pages dirty in 892 * order to be able to dispose of the buffer cache buffer without 893 * flushing it. 894 */ 895 if (uio->uio_segflg == UIO_NOCOPY) { 896 if (vp->v_flag & VLASTWRITETS) { 897 node->tn_mtime = vp->v_lastwrite_ts.tv_sec; 898 node->tn_mtimensec = vp->v_lastwrite_ts.tv_nsec; 899 } 900 } else { 901 node->tn_status |= TMPFS_NODE_MODIFIED; 902 vclrflags(vp, VLASTWRITETS); 903 } 904 905 if (extended) 906 node->tn_status |= TMPFS_NODE_CHANGED; 907 908 if (node->tn_mode & (S_ISUID | S_ISGID)) { 909 if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0)) 910 node->tn_mode &= ~(S_ISUID | S_ISGID); 911 } 912 done: 913 TMPFS_NODE_UNLOCK(node); 914 if (kflags) 915 tmpfs_knote(vp, kflags); 916 917 return(error); 918 } 919 920 static int 921 tmpfs_advlock(struct vop_advlock_args *ap) 922 { 923 struct tmpfs_node *node; 924 struct vnode *vp = ap->a_vp; 925 int error; 926 927 node = VP_TO_TMPFS_NODE(vp); 928 error = (lf_advlock(ap, &node->tn_advlock, node->tn_size)); 929 930 return (error); 931 } 932 933 /* 934 * The strategy function is typically only called when memory pressure 935 * forces the system to attempt to pageout pages. It can also be called 936 * by [n]vtruncbuf() when a truncation cuts a page in half. Normal write 937 * operations 938 * 939 * We set VKVABIO for VREG files so bp->b_data may not be synchronized to 940 * our cpu. swap_pager_strategy() is all we really use, and it directly 941 * supports this. 942 */ 943 static int 944 tmpfs_strategy(struct vop_strategy_args *ap) 945 { 946 struct bio *bio = ap->a_bio; 947 struct bio *nbio; 948 struct buf *bp = bio->bio_buf; 949 struct vnode *vp = ap->a_vp; 950 struct tmpfs_node *node; 951 vm_object_t uobj; 952 vm_page_t m; 953 int i; 954 955 if (vp->v_type != VREG) { 956 bp->b_resid = bp->b_bcount; 957 bp->b_flags |= B_ERROR | B_INVAL; 958 bp->b_error = EINVAL; 959 biodone(bio); 960 return(0); 961 } 962 963 node = VP_TO_TMPFS_NODE(vp); 964 965 uobj = node->tn_reg.tn_aobj; 966 967 /* 968 * Don't bother flushing to swap if there is no swap, just 969 * ensure that the pages are marked as needing a commit (still). 970 */ 971 if (bp->b_cmd == BUF_CMD_WRITE && vm_swap_size == 0) { 972 for (i = 0; i < bp->b_xio.xio_npages; ++i) { 973 m = bp->b_xio.xio_pages[i]; 974 vm_page_need_commit(m); 975 } 976 bp->b_resid = 0; 977 bp->b_error = 0; 978 biodone(bio); 979 } else { 980 /* 981 * Tell the buffer cache to try to recycle the pages 982 * to PQ_CACHE on release. 983 */ 984 if (tmpfs_bufcache_mode >= 2 || 985 (tmpfs_bufcache_mode == 1 && vm_paging_needed(0))) { 986 bp->b_flags |= B_TTC; 987 } 988 nbio = push_bio(bio); 989 nbio->bio_done = tmpfs_strategy_done; 990 nbio->bio_offset = bio->bio_offset; 991 swap_pager_strategy(uobj, nbio); 992 } 993 return 0; 994 } 995 996 /* 997 * If we were unable to commit the pages to swap make sure they are marked 998 * as needing a commit (again). If we were, clear the flag to allow the 999 * pages to be freed. 1000 * 1001 * Do not error-out the buffer. In particular, vinvalbuf() needs to 1002 * always work. 1003 */ 1004 static void 1005 tmpfs_strategy_done(struct bio *bio) 1006 { 1007 struct buf *bp; 1008 vm_page_t m; 1009 int i; 1010 1011 bp = bio->bio_buf; 1012 1013 if (bp->b_flags & B_ERROR) { 1014 bp->b_flags &= ~B_ERROR; 1015 bp->b_error = 0; 1016 bp->b_resid = 0; 1017 for (i = 0; i < bp->b_xio.xio_npages; ++i) { 1018 m = bp->b_xio.xio_pages[i]; 1019 vm_page_need_commit(m); 1020 } 1021 } else { 1022 for (i = 0; i < bp->b_xio.xio_npages; ++i) { 1023 m = bp->b_xio.xio_pages[i]; 1024 vm_page_clear_commit(m); 1025 } 1026 } 1027 bio = pop_bio(bio); 1028 biodone(bio); 1029 } 1030 1031 /* 1032 * To make write clustering work well make the backing store look 1033 * contiguous to the cluster_*() code. The swap_strategy() function 1034 * will take it from there. 1035 * 1036 * Use MAXBSIZE-sized chunks as a micro-optimization to make random 1037 * flushes leave full-sized gaps. 1038 */ 1039 static int 1040 tmpfs_bmap(struct vop_bmap_args *ap) 1041 { 1042 if (ap->a_doffsetp != NULL) 1043 *ap->a_doffsetp = ap->a_loffset; 1044 if (ap->a_runp != NULL) 1045 *ap->a_runp = MAXBSIZE - (ap->a_loffset & (MAXBSIZE - 1)); 1046 if (ap->a_runb != NULL) 1047 *ap->a_runb = ap->a_loffset & (MAXBSIZE - 1); 1048 1049 return 0; 1050 } 1051 1052 /* --------------------------------------------------------------------- */ 1053 1054 static int 1055 tmpfs_nremove(struct vop_nremove_args *ap) 1056 { 1057 struct vnode *dvp = ap->a_dvp; 1058 struct namecache *ncp = ap->a_nch->ncp; 1059 struct vnode *vp; 1060 int error; 1061 struct tmpfs_dirent *de; 1062 struct tmpfs_mount *tmp; 1063 struct tmpfs_node *dnode; 1064 struct tmpfs_node *node; 1065 1066 /* 1067 * We have to acquire the vp from ap->a_nch because we will likely 1068 * unresolve the namecache entry, and a vrele/vput is needed to 1069 * trigger the tmpfs_inactive/tmpfs_reclaim sequence. 1070 * 1071 * We have to use vget to clear any inactive state on the vnode, 1072 * otherwise the vnode may remain inactive and thus tmpfs_inactive 1073 * will not get called when we release it. 1074 */ 1075 error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp); 1076 KKASSERT(vp->v_mount == dvp->v_mount); 1077 KKASSERT(error == 0); 1078 vn_unlock(vp); 1079 1080 if (vp->v_type == VDIR) { 1081 error = EISDIR; 1082 goto out2; 1083 } 1084 1085 dnode = VP_TO_TMPFS_DIR(dvp); 1086 node = VP_TO_TMPFS_NODE(vp); 1087 tmp = VFS_TO_TMPFS(vp->v_mount); 1088 1089 TMPFS_NODE_LOCK(dnode); 1090 TMPFS_NODE_LOCK(node); 1091 de = tmpfs_dir_lookup(dnode, node, ncp); 1092 if (de == NULL) { 1093 error = ENOENT; 1094 TMPFS_NODE_UNLOCK(node); 1095 TMPFS_NODE_UNLOCK(dnode); 1096 goto out; 1097 } 1098 1099 /* Files marked as immutable or append-only cannot be deleted. */ 1100 if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) || 1101 (dnode->tn_flags & APPEND)) { 1102 error = EPERM; 1103 TMPFS_NODE_UNLOCK(node); 1104 TMPFS_NODE_UNLOCK(dnode); 1105 goto out; 1106 } 1107 1108 /* Remove the entry from the directory; as it is a file, we do not 1109 * have to change the number of hard links of the directory. */ 1110 tmpfs_dir_detach(dnode, de); 1111 TMPFS_NODE_UNLOCK(dnode); 1112 1113 /* Free the directory entry we just deleted. Note that the node 1114 * referred by it will not be removed until the vnode is really 1115 * reclaimed. */ 1116 tmpfs_free_dirent(tmp, de); 1117 1118 if (node->tn_links > 0) 1119 node->tn_status |= TMPFS_NODE_CHANGED; 1120 TMPFS_NODE_UNLOCK(node); 1121 1122 cache_unlink(ap->a_nch); 1123 tmpfs_knote(vp, NOTE_DELETE); 1124 error = 0; 1125 1126 out: 1127 if (error == 0) 1128 tmpfs_knote(dvp, NOTE_WRITE); 1129 out2: 1130 vrele(vp); 1131 1132 return error; 1133 } 1134 1135 /* --------------------------------------------------------------------- */ 1136 1137 static int 1138 tmpfs_nlink(struct vop_nlink_args *ap) 1139 { 1140 struct vnode *dvp = ap->a_dvp; 1141 struct vnode *vp = ap->a_vp; 1142 struct namecache *ncp = ap->a_nch->ncp; 1143 struct tmpfs_dirent *de; 1144 struct tmpfs_node *node; 1145 struct tmpfs_node *dnode; 1146 int error; 1147 1148 KKASSERT(dvp != vp); /* XXX When can this be false? */ 1149 1150 node = VP_TO_TMPFS_NODE(vp); 1151 dnode = VP_TO_TMPFS_NODE(dvp); 1152 TMPFS_NODE_LOCK(dnode); 1153 1154 /* XXX: Why aren't the following two tests done by the caller? */ 1155 1156 /* Hard links of directories are forbidden. */ 1157 if (vp->v_type == VDIR) { 1158 error = EPERM; 1159 goto out; 1160 } 1161 1162 /* Cannot create cross-device links. */ 1163 if (dvp->v_mount != vp->v_mount) { 1164 error = EXDEV; 1165 goto out; 1166 } 1167 1168 /* Ensure that we do not overflow the maximum number of links imposed 1169 * by the system. */ 1170 KKASSERT(node->tn_links <= LINK_MAX); 1171 if (node->tn_links >= LINK_MAX) { 1172 error = EMLINK; 1173 goto out; 1174 } 1175 1176 /* We cannot create links of files marked immutable or append-only. */ 1177 if (node->tn_flags & (IMMUTABLE | APPEND)) { 1178 error = EPERM; 1179 goto out; 1180 } 1181 1182 /* Allocate a new directory entry to represent the node. */ 1183 error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node, 1184 ncp->nc_name, ncp->nc_nlen, &de); 1185 if (error != 0) 1186 goto out; 1187 1188 /* Insert the new directory entry into the appropriate directory. */ 1189 tmpfs_dir_attach(dnode, de); 1190 1191 /* vp link count has changed, so update node times. */ 1192 1193 TMPFS_NODE_LOCK(node); 1194 node->tn_status |= TMPFS_NODE_CHANGED; 1195 TMPFS_NODE_UNLOCK(node); 1196 tmpfs_update(vp); 1197 1198 tmpfs_knote(vp, NOTE_LINK); 1199 cache_setunresolved(ap->a_nch); 1200 cache_setvp(ap->a_nch, vp); 1201 error = 0; 1202 1203 out: 1204 TMPFS_NODE_UNLOCK(dnode); 1205 if (error == 0) 1206 tmpfs_knote(dvp, NOTE_WRITE); 1207 return error; 1208 } 1209 1210 /* --------------------------------------------------------------------- */ 1211 1212 static int 1213 tmpfs_nrename(struct vop_nrename_args *ap) 1214 { 1215 struct vnode *fdvp = ap->a_fdvp; 1216 struct namecache *fncp = ap->a_fnch->ncp; 1217 struct vnode *fvp = fncp->nc_vp; 1218 struct vnode *tdvp = ap->a_tdvp; 1219 struct namecache *tncp = ap->a_tnch->ncp; 1220 struct vnode *tvp; 1221 struct tmpfs_dirent *de, *tde; 1222 struct tmpfs_mount *tmp; 1223 struct tmpfs_node *fdnode; 1224 struct tmpfs_node *tdnode; 1225 struct tmpfs_node *fnode; 1226 struct tmpfs_node *tnode; 1227 char *newname; 1228 char *oldname; 1229 int error; 1230 1231 KKASSERT(fdvp->v_mount == fvp->v_mount); 1232 1233 /* 1234 * Because tvp can get overwritten we have to vget it instead of 1235 * just vref or use it, otherwise it's VINACTIVE flag may not get 1236 * cleared and the node won't get destroyed. 1237 */ 1238 error = cache_vget(ap->a_tnch, ap->a_cred, LK_SHARED, &tvp); 1239 if (error == 0) { 1240 tnode = VP_TO_TMPFS_NODE(tvp); 1241 vn_unlock(tvp); 1242 } else { 1243 tnode = NULL; 1244 } 1245 1246 /* Disallow cross-device renames. 1247 * XXX Why isn't this done by the caller? */ 1248 if (fvp->v_mount != tdvp->v_mount || 1249 (tvp != NULL && fvp->v_mount != tvp->v_mount)) { 1250 error = EXDEV; 1251 goto out; 1252 } 1253 1254 tmp = VFS_TO_TMPFS(tdvp->v_mount); 1255 tdnode = VP_TO_TMPFS_DIR(tdvp); 1256 1257 /* If source and target are the same file, there is nothing to do. */ 1258 if (fvp == tvp) { 1259 error = 0; 1260 goto out; 1261 } 1262 1263 fdnode = VP_TO_TMPFS_DIR(fdvp); 1264 fnode = VP_TO_TMPFS_NODE(fvp); 1265 1266 tmpfs_lock4(fdnode, tdnode, fnode, tnode); 1267 1268 de = tmpfs_dir_lookup(fdnode, fnode, fncp); 1269 1270 /* Avoid manipulating '.' and '..' entries. */ 1271 if (de == NULL) { 1272 error = ENOENT; 1273 goto out_locked; 1274 } 1275 KKASSERT(de->td_node == fnode); 1276 1277 /* 1278 * If replacing an entry in the target directory and that entry 1279 * is a directory, it must be empty. 1280 * 1281 * Kern_rename gurantees the destination to be a directory 1282 * if the source is one (it does?). 1283 */ 1284 if (tvp != NULL) { 1285 KKASSERT(tnode != NULL); 1286 1287 if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 1288 (tdnode->tn_flags & (APPEND | IMMUTABLE))) { 1289 error = EPERM; 1290 goto out_locked; 1291 } 1292 1293 if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) { 1294 if (tnode->tn_size > 0) { 1295 error = ENOTEMPTY; 1296 goto out_locked; 1297 } 1298 } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) { 1299 error = ENOTDIR; 1300 goto out_locked; 1301 } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) { 1302 error = EISDIR; 1303 goto out_locked; 1304 } else { 1305 KKASSERT(fnode->tn_type != VDIR && 1306 tnode->tn_type != VDIR); 1307 } 1308 } 1309 1310 if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || 1311 (fdnode->tn_flags & (APPEND | IMMUTABLE))) { 1312 error = EPERM; 1313 goto out_locked; 1314 } 1315 1316 /* 1317 * Ensure that we have enough memory to hold the new name, if it 1318 * has to be changed. 1319 */ 1320 if (fncp->nc_nlen != tncp->nc_nlen || 1321 bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) { 1322 newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone, 1323 M_WAITOK | M_NULLOK); 1324 if (newname == NULL) { 1325 error = ENOSPC; 1326 goto out_locked; 1327 } 1328 bcopy(tncp->nc_name, newname, tncp->nc_nlen); 1329 newname[tncp->nc_nlen] = '\0'; 1330 } else { 1331 newname = NULL; 1332 } 1333 1334 /* 1335 * Unlink entry from source directory. Note that the kernel has 1336 * already checked for illegal recursion cases (renaming a directory 1337 * into a subdirectory of itself). 1338 */ 1339 if (fdnode != tdnode) { 1340 tmpfs_dir_detach(fdnode, de); 1341 } else { 1342 /* XXX depend on namecache lock */ 1343 KKASSERT(de == tmpfs_dir_lookup(fdnode, fnode, fncp)); 1344 RB_REMOVE(tmpfs_dirtree, &fdnode->tn_dir.tn_dirtree, de); 1345 RB_REMOVE(tmpfs_dirtree_cookie, 1346 &fdnode->tn_dir.tn_cookietree, de); 1347 } 1348 1349 /* 1350 * Handle any name change. Swap with newname, we will 1351 * deallocate it at the end. 1352 */ 1353 if (newname != NULL) { 1354 oldname = de->td_name; 1355 de->td_name = newname; 1356 de->td_namelen = (uint16_t)tncp->nc_nlen; 1357 newname = oldname; 1358 } 1359 1360 /* 1361 * If we are overwriting an entry, we have to remove the old one 1362 * from the target directory. 1363 */ 1364 if (tvp != NULL) { 1365 /* Remove the old entry from the target directory. */ 1366 tde = tmpfs_dir_lookup(tdnode, tnode, tncp); 1367 tmpfs_dir_detach(tdnode, tde); 1368 tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE); 1369 1370 /* 1371 * Free the directory entry we just deleted. Note that the 1372 * node referred by it will not be removed until the vnode is 1373 * really reclaimed. 1374 */ 1375 tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde); 1376 /*cache_inval_vp(tvp, CINV_DESTROY);*/ 1377 } 1378 1379 /* 1380 * Link entry to target directory. If the entry 1381 * represents a directory move the parent linkage 1382 * as well. 1383 */ 1384 if (fdnode != tdnode) { 1385 if (de->td_node->tn_type == VDIR) { 1386 TMPFS_VALIDATE_DIR(fnode); 1387 } 1388 tmpfs_dir_attach(tdnode, de); 1389 } else { 1390 tdnode->tn_status |= TMPFS_NODE_MODIFIED; 1391 RB_INSERT(tmpfs_dirtree, &tdnode->tn_dir.tn_dirtree, de); 1392 RB_INSERT(tmpfs_dirtree_cookie, 1393 &tdnode->tn_dir.tn_cookietree, de); 1394 } 1395 tmpfs_unlock4(fdnode, tdnode, fnode, tnode); 1396 1397 /* 1398 * Finish up 1399 */ 1400 if (newname) { 1401 kfree(newname, tmp->tm_name_zone); 1402 newname = NULL; 1403 } 1404 cache_rename(ap->a_fnch, ap->a_tnch); 1405 tmpfs_knote(ap->a_fdvp, NOTE_WRITE); 1406 tmpfs_knote(ap->a_tdvp, NOTE_WRITE); 1407 if (fnode->tn_vnode) 1408 tmpfs_knote(fnode->tn_vnode, NOTE_RENAME); 1409 if (tvp) 1410 vrele(tvp); 1411 return 0; 1412 1413 out_locked: 1414 tmpfs_unlock4(fdnode, tdnode, fnode, tnode); 1415 out: 1416 if (tvp) 1417 vrele(tvp); 1418 return error; 1419 } 1420 1421 /* --------------------------------------------------------------------- */ 1422 1423 static int 1424 tmpfs_nmkdir(struct vop_nmkdir_args *ap) 1425 { 1426 struct vnode *dvp = ap->a_dvp; 1427 struct vnode **vpp = ap->a_vpp; 1428 struct namecache *ncp = ap->a_nch->ncp; 1429 struct vattr *vap = ap->a_vap; 1430 struct ucred *cred = ap->a_cred; 1431 int error; 1432 1433 KKASSERT(vap->va_type == VDIR); 1434 1435 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL); 1436 if (error == 0) { 1437 cache_setunresolved(ap->a_nch); 1438 cache_setvp(ap->a_nch, *vpp); 1439 tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK); 1440 } 1441 return error; 1442 } 1443 1444 /* --------------------------------------------------------------------- */ 1445 1446 static int 1447 tmpfs_nrmdir(struct vop_nrmdir_args *ap) 1448 { 1449 struct vnode *dvp = ap->a_dvp; 1450 struct namecache *ncp = ap->a_nch->ncp; 1451 struct vnode *vp; 1452 struct tmpfs_dirent *de; 1453 struct tmpfs_mount *tmp; 1454 struct tmpfs_node *dnode; 1455 struct tmpfs_node *node; 1456 int error; 1457 1458 /* 1459 * We have to acquire the vp from ap->a_nch because we will likely 1460 * unresolve the namecache entry, and a vrele/vput is needed to 1461 * trigger the tmpfs_inactive/tmpfs_reclaim sequence. 1462 * 1463 * We have to use vget to clear any inactive state on the vnode, 1464 * otherwise the vnode may remain inactive and thus tmpfs_inactive 1465 * will not get called when we release it. 1466 */ 1467 error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp); 1468 KKASSERT(error == 0); 1469 vn_unlock(vp); 1470 1471 /* 1472 * Prevalidate so we don't hit an assertion later 1473 */ 1474 if (vp->v_type != VDIR) { 1475 error = ENOTDIR; 1476 goto out; 1477 } 1478 1479 tmp = VFS_TO_TMPFS(dvp->v_mount); 1480 dnode = VP_TO_TMPFS_DIR(dvp); 1481 node = VP_TO_TMPFS_DIR(vp); 1482 1483 /* 1484 * 1485 */ 1486 TMPFS_NODE_LOCK(dnode); 1487 TMPFS_NODE_LOCK(node); 1488 1489 /* 1490 * Only empty directories can be removed. 1491 */ 1492 if (node->tn_size > 0) { 1493 error = ENOTEMPTY; 1494 goto out_locked; 1495 } 1496 1497 if ((dnode->tn_flags & APPEND) 1498 || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) { 1499 error = EPERM; 1500 goto out_locked; 1501 } 1502 1503 /* 1504 * This invariant holds only if we are not trying to 1505 * remove "..". We checked for that above so this is safe now. 1506 */ 1507 KKASSERT(node->tn_dir.tn_parent == dnode); 1508 1509 /* 1510 * Get the directory entry associated with node (vp) 1511 */ 1512 de = tmpfs_dir_lookup(dnode, node, ncp); 1513 KKASSERT(TMPFS_DIRENT_MATCHES(de, ncp->nc_name, ncp->nc_nlen)); 1514 1515 /* Check flags to see if we are allowed to remove the directory. */ 1516 if ((dnode->tn_flags & APPEND) || 1517 node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) { 1518 error = EPERM; 1519 goto out_locked; 1520 } 1521 1522 /* Detach the directory entry from the directory (dnode). */ 1523 tmpfs_dir_detach(dnode, de); 1524 1525 /* 1526 * Must set parent linkage to NULL (tested by ncreate to disallow 1527 * the creation of new files/dirs in a deleted directory) 1528 */ 1529 node->tn_status |= TMPFS_NODE_CHANGED; 1530 1531 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 1532 TMPFS_NODE_MODIFIED; 1533 1534 /* Free the directory entry we just deleted. Note that the node 1535 * referred by it will not be removed until the vnode is really 1536 * reclaimed. */ 1537 tmpfs_free_dirent(tmp, de); 1538 1539 /* Release the deleted vnode (will destroy the node, notify 1540 * interested parties and clean it from the cache). */ 1541 1542 dnode->tn_status |= TMPFS_NODE_CHANGED; 1543 1544 TMPFS_NODE_UNLOCK(node); 1545 TMPFS_NODE_UNLOCK(dnode); 1546 1547 tmpfs_update(dvp); 1548 cache_unlink(ap->a_nch); 1549 tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK); 1550 vrele(vp); 1551 return 0; 1552 1553 out_locked: 1554 TMPFS_NODE_UNLOCK(node); 1555 TMPFS_NODE_UNLOCK(dnode); 1556 1557 out: 1558 vrele(vp); 1559 1560 return error; 1561 } 1562 1563 /* --------------------------------------------------------------------- */ 1564 1565 static int 1566 tmpfs_nsymlink(struct vop_nsymlink_args *ap) 1567 { 1568 struct vnode *dvp = ap->a_dvp; 1569 struct vnode **vpp = ap->a_vpp; 1570 struct namecache *ncp = ap->a_nch->ncp; 1571 struct vattr *vap = ap->a_vap; 1572 struct ucred *cred = ap->a_cred; 1573 char *target = ap->a_target; 1574 int error; 1575 1576 vap->va_type = VLNK; 1577 error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, target); 1578 if (error == 0) { 1579 tmpfs_knote(*vpp, NOTE_WRITE); 1580 cache_setunresolved(ap->a_nch); 1581 cache_setvp(ap->a_nch, *vpp); 1582 } 1583 return error; 1584 } 1585 1586 /* --------------------------------------------------------------------- */ 1587 1588 static int 1589 tmpfs_readdir(struct vop_readdir_args *ap) 1590 { 1591 struct vnode *vp = ap->a_vp; 1592 struct uio *uio = ap->a_uio; 1593 int *eofflag = ap->a_eofflag; 1594 off_t **cookies = ap->a_cookies; 1595 int *ncookies = ap->a_ncookies; 1596 struct tmpfs_mount *tmp; 1597 int error; 1598 off_t startoff; 1599 off_t cnt = 0; 1600 struct tmpfs_node *node; 1601 1602 /* This operation only makes sense on directory nodes. */ 1603 if (vp->v_type != VDIR) { 1604 return ENOTDIR; 1605 } 1606 1607 tmp = VFS_TO_TMPFS(vp->v_mount); 1608 node = VP_TO_TMPFS_DIR(vp); 1609 startoff = uio->uio_offset; 1610 1611 if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) { 1612 error = tmpfs_dir_getdotdent(node, uio); 1613 if (error != 0) { 1614 TMPFS_NODE_LOCK_SH(node); 1615 goto outok; 1616 } 1617 cnt++; 1618 } 1619 1620 if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) { 1621 /* may lock parent, cannot hold node lock */ 1622 error = tmpfs_dir_getdotdotdent(tmp, node, uio); 1623 if (error != 0) { 1624 TMPFS_NODE_LOCK_SH(node); 1625 goto outok; 1626 } 1627 cnt++; 1628 } 1629 1630 TMPFS_NODE_LOCK_SH(node); 1631 error = tmpfs_dir_getdents(node, uio, &cnt); 1632 1633 outok: 1634 KKASSERT(error >= -1); 1635 1636 if (error == -1) 1637 error = 0; 1638 1639 if (eofflag != NULL) 1640 *eofflag = 1641 (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF); 1642 1643 /* Update NFS-related variables. */ 1644 if (error == 0 && cookies != NULL && ncookies != NULL) { 1645 off_t i; 1646 off_t off = startoff; 1647 struct tmpfs_dirent *de = NULL; 1648 1649 *ncookies = cnt; 1650 *cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK); 1651 1652 for (i = 0; i < cnt; i++) { 1653 KKASSERT(off != TMPFS_DIRCOOKIE_EOF); 1654 if (off == TMPFS_DIRCOOKIE_DOT) { 1655 off = TMPFS_DIRCOOKIE_DOTDOT; 1656 } else { 1657 if (off == TMPFS_DIRCOOKIE_DOTDOT) { 1658 de = RB_MIN(tmpfs_dirtree_cookie, 1659 &node->tn_dir.tn_cookietree); 1660 } else if (de != NULL) { 1661 de = RB_NEXT(tmpfs_dirtree_cookie, 1662 &node->tn_dir.tn_cookietree, de); 1663 } else { 1664 de = tmpfs_dir_lookupbycookie(node, 1665 off); 1666 KKASSERT(de != NULL); 1667 de = RB_NEXT(tmpfs_dirtree_cookie, 1668 &node->tn_dir.tn_cookietree, de); 1669 } 1670 if (de == NULL) 1671 off = TMPFS_DIRCOOKIE_EOF; 1672 else 1673 off = tmpfs_dircookie(de); 1674 } 1675 (*cookies)[i] = off; 1676 } 1677 KKASSERT(uio->uio_offset == off); 1678 } 1679 TMPFS_NODE_UNLOCK(node); 1680 1681 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) { 1682 TMPFS_NODE_LOCK(node); 1683 node->tn_status |= TMPFS_NODE_ACCESSED; 1684 TMPFS_NODE_UNLOCK(node); 1685 } 1686 return error; 1687 } 1688 1689 /* --------------------------------------------------------------------- */ 1690 1691 static int 1692 tmpfs_readlink(struct vop_readlink_args *ap) 1693 { 1694 struct vnode *vp = ap->a_vp; 1695 struct uio *uio = ap->a_uio; 1696 int error; 1697 struct tmpfs_node *node; 1698 1699 KKASSERT(uio->uio_offset == 0); 1700 KKASSERT(vp->v_type == VLNK); 1701 1702 node = VP_TO_TMPFS_NODE(vp); 1703 TMPFS_NODE_LOCK_SH(node); 1704 error = uiomove(node->tn_link, 1705 MIN(node->tn_size, uio->uio_resid), uio); 1706 TMPFS_NODE_UNLOCK(node); 1707 if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) { 1708 TMPFS_NODE_LOCK(node); 1709 node->tn_status |= TMPFS_NODE_ACCESSED; 1710 TMPFS_NODE_UNLOCK(node); 1711 } 1712 return error; 1713 } 1714 1715 /* --------------------------------------------------------------------- */ 1716 1717 static int 1718 tmpfs_inactive(struct vop_inactive_args *ap) 1719 { 1720 struct vnode *vp = ap->a_vp; 1721 struct tmpfs_node *node; 1722 struct mount *mp; 1723 1724 mp = vp->v_mount; 1725 lwkt_gettoken(&mp->mnt_token); 1726 node = VP_TO_TMPFS_NODE(vp); 1727 1728 /* 1729 * Degenerate case 1730 */ 1731 if (node == NULL) { 1732 vrecycle(vp); 1733 lwkt_reltoken(&mp->mnt_token); 1734 return(0); 1735 } 1736 1737 /* 1738 * Get rid of unreferenced deleted vnodes sooner rather than 1739 * later so the data memory can be recovered immediately. 1740 * 1741 * We must truncate the vnode to prevent the normal reclamation 1742 * path from flushing the data for the removed file to disk. 1743 */ 1744 TMPFS_NODE_LOCK(node); 1745 if (node->tn_links == 0) { 1746 node->tn_vpstate = TMPFS_VNODE_DOOMED; 1747 TMPFS_NODE_UNLOCK(node); 1748 if (node->tn_type == VREG) 1749 tmpfs_truncate(vp, 0); 1750 vrecycle(vp); 1751 } else { 1752 /* 1753 * We must retain any VM pages belonging to the vnode's 1754 * object as the vnode will destroy the object during a 1755 * later reclaim. We call vinvalbuf(V_SAVE) to clean 1756 * out the buffer cache. 1757 * 1758 * On DragonFlyBSD, vnodes are not immediately deactivated 1759 * on the 1->0 refs, so this is a relatively optimal 1760 * operation. We have to do this in tmpfs_inactive() 1761 * because the pages will have already been thrown away 1762 * at the time tmpfs_reclaim() is called. 1763 */ 1764 if (node->tn_type == VREG && 1765 node->tn_reg.tn_pages_in_aobj == 0) { 1766 vinvalbuf(vp, V_SAVE, 0, 0); 1767 KKASSERT(RB_EMPTY(&vp->v_rbdirty_tree)); 1768 KKASSERT(RB_EMPTY(&vp->v_rbclean_tree)); 1769 tmpfs_move_pages(vp->v_object, node->tn_reg.tn_aobj, 1770 TMPFS_MOVF_DEACTIVATE); 1771 node->tn_reg.tn_pages_in_aobj = 1; 1772 } 1773 1774 TMPFS_NODE_UNLOCK(node); 1775 } 1776 lwkt_reltoken(&mp->mnt_token); 1777 1778 return 0; 1779 } 1780 1781 /* --------------------------------------------------------------------- */ 1782 1783 int 1784 tmpfs_reclaim(struct vop_reclaim_args *ap) 1785 { 1786 struct vnode *vp = ap->a_vp; 1787 struct tmpfs_mount *tmp; 1788 struct tmpfs_node *node; 1789 struct mount *mp; 1790 1791 mp = vp->v_mount; 1792 lwkt_gettoken(&mp->mnt_token); 1793 1794 node = VP_TO_TMPFS_NODE(vp); 1795 tmp = VFS_TO_TMPFS(vp->v_mount); 1796 KKASSERT(mp == tmp->tm_mount); 1797 1798 TMPFS_NODE_LOCK(node); 1799 KKASSERT(node->tn_vnode == vp); 1800 node->tn_vnode = NULL; 1801 vp->v_data = NULL; 1802 1803 /* 1804 * If the node referenced by this vnode was deleted by the 1805 * user, we must free its associated data structures now that 1806 * the vnode is being reclaimed. 1807 * 1808 * Directories have an extra link ref. 1809 */ 1810 if (node->tn_links == 0) { 1811 node->tn_vpstate = TMPFS_VNODE_DOOMED; 1812 tmpfs_free_node(tmp, node); 1813 /* eats the lock */ 1814 } else { 1815 TMPFS_NODE_UNLOCK(node); 1816 } 1817 lwkt_reltoken(&mp->mnt_token); 1818 1819 KKASSERT(vp->v_data == NULL); 1820 return 0; 1821 } 1822 1823 /* --------------------------------------------------------------------- */ 1824 1825 static int 1826 tmpfs_mountctl(struct vop_mountctl_args *ap) 1827 { 1828 struct tmpfs_mount *tmp; 1829 struct mount *mp; 1830 int rc; 1831 1832 mp = ap->a_head.a_ops->head.vv_mount; 1833 lwkt_gettoken(&mp->mnt_token); 1834 1835 switch (ap->a_op) { 1836 case (MOUNTCTL_SET_EXPORT): 1837 tmp = (struct tmpfs_mount *) mp->mnt_data; 1838 1839 if (ap->a_ctllen != sizeof(struct export_args)) 1840 rc = (EINVAL); 1841 else 1842 rc = vfs_export(mp, &tmp->tm_export, 1843 (const struct export_args *) ap->a_ctl); 1844 break; 1845 default: 1846 rc = vop_stdmountctl(ap); 1847 break; 1848 } 1849 1850 lwkt_reltoken(&mp->mnt_token); 1851 return (rc); 1852 } 1853 1854 /* --------------------------------------------------------------------- */ 1855 1856 static int 1857 tmpfs_print(struct vop_print_args *ap) 1858 { 1859 struct vnode *vp = ap->a_vp; 1860 1861 struct tmpfs_node *node; 1862 1863 node = VP_TO_TMPFS_NODE(vp); 1864 1865 kprintf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n", 1866 node, node->tn_flags, node->tn_links); 1867 kprintf("\tmode 0%o, owner %d, group %d, size %ju, status 0x%x\n", 1868 node->tn_mode, node->tn_uid, node->tn_gid, 1869 (uintmax_t)node->tn_size, node->tn_status); 1870 1871 if (vp->v_type == VFIFO) 1872 fifo_printinfo(vp); 1873 1874 kprintf("\n"); 1875 1876 return 0; 1877 } 1878 1879 /* --------------------------------------------------------------------- */ 1880 1881 static int 1882 tmpfs_pathconf(struct vop_pathconf_args *ap) 1883 { 1884 struct vnode *vp = ap->a_vp; 1885 int name = ap->a_name; 1886 register_t *retval = ap->a_retval; 1887 struct tmpfs_mount *tmp; 1888 int error; 1889 1890 error = 0; 1891 1892 switch (name) { 1893 case _PC_CHOWN_RESTRICTED: 1894 *retval = 1; 1895 break; 1896 1897 case _PC_FILESIZEBITS: 1898 tmp = VFS_TO_TMPFS(vp->v_mount); 1899 *retval = max(32, flsll(tmp->tm_pages_max * PAGE_SIZE) + 1); 1900 break; 1901 1902 case _PC_LINK_MAX: 1903 *retval = LINK_MAX; 1904 break; 1905 1906 case _PC_NAME_MAX: 1907 *retval = NAME_MAX; 1908 break; 1909 1910 case _PC_NO_TRUNC: 1911 *retval = 1; 1912 break; 1913 1914 case _PC_PATH_MAX: 1915 *retval = PATH_MAX; 1916 break; 1917 1918 case _PC_PIPE_BUF: 1919 *retval = PIPE_BUF; 1920 break; 1921 1922 case _PC_SYNC_IO: 1923 *retval = 1; 1924 break; 1925 1926 case _PC_2_SYMLINKS: 1927 *retval = 1; 1928 break; 1929 1930 default: 1931 error = EINVAL; 1932 } 1933 1934 return error; 1935 } 1936 1937 /************************************************************************ 1938 * KQFILTER OPS * 1939 ************************************************************************/ 1940 1941 static void filt_tmpfsdetach(struct knote *kn); 1942 static int filt_tmpfsread(struct knote *kn, long hint); 1943 static int filt_tmpfswrite(struct knote *kn, long hint); 1944 static int filt_tmpfsvnode(struct knote *kn, long hint); 1945 1946 static struct filterops tmpfsread_filtops = 1947 { FILTEROP_ISFD | FILTEROP_MPSAFE, 1948 NULL, filt_tmpfsdetach, filt_tmpfsread }; 1949 static struct filterops tmpfswrite_filtops = 1950 { FILTEROP_ISFD | FILTEROP_MPSAFE, 1951 NULL, filt_tmpfsdetach, filt_tmpfswrite }; 1952 static struct filterops tmpfsvnode_filtops = 1953 { FILTEROP_ISFD | FILTEROP_MPSAFE, 1954 NULL, filt_tmpfsdetach, filt_tmpfsvnode }; 1955 1956 static int 1957 tmpfs_kqfilter (struct vop_kqfilter_args *ap) 1958 { 1959 struct vnode *vp = ap->a_vp; 1960 struct knote *kn = ap->a_kn; 1961 1962 switch (kn->kn_filter) { 1963 case EVFILT_READ: 1964 kn->kn_fop = &tmpfsread_filtops; 1965 break; 1966 case EVFILT_WRITE: 1967 kn->kn_fop = &tmpfswrite_filtops; 1968 break; 1969 case EVFILT_VNODE: 1970 kn->kn_fop = &tmpfsvnode_filtops; 1971 break; 1972 default: 1973 return (EOPNOTSUPP); 1974 } 1975 1976 kn->kn_hook = (caddr_t)vp; 1977 1978 knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 1979 1980 return(0); 1981 } 1982 1983 static void 1984 filt_tmpfsdetach(struct knote *kn) 1985 { 1986 struct vnode *vp = (void *)kn->kn_hook; 1987 1988 knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn); 1989 } 1990 1991 static int 1992 filt_tmpfsread(struct knote *kn, long hint) 1993 { 1994 struct vnode *vp = (void *)kn->kn_hook; 1995 struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp); 1996 off_t off; 1997 1998 if (hint == NOTE_REVOKE) { 1999 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2000 return(1); 2001 } 2002 2003 /* 2004 * Interlock against MP races when performing this function. 2005 */ 2006 TMPFS_NODE_LOCK_SH(node); 2007 off = node->tn_size - kn->kn_fp->f_offset; 2008 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX; 2009 if (kn->kn_sfflags & NOTE_OLDAPI) { 2010 TMPFS_NODE_UNLOCK(node); 2011 return(1); 2012 } 2013 if (kn->kn_data == 0) { 2014 kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX; 2015 } 2016 TMPFS_NODE_UNLOCK(node); 2017 return (kn->kn_data != 0); 2018 } 2019 2020 static int 2021 filt_tmpfswrite(struct knote *kn, long hint) 2022 { 2023 if (hint == NOTE_REVOKE) 2024 kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT); 2025 kn->kn_data = 0; 2026 return (1); 2027 } 2028 2029 static int 2030 filt_tmpfsvnode(struct knote *kn, long hint) 2031 { 2032 if (kn->kn_sfflags & hint) 2033 kn->kn_fflags |= hint; 2034 if (hint == NOTE_REVOKE) { 2035 kn->kn_flags |= (EV_EOF | EV_NODATA); 2036 return (1); 2037 } 2038 return (kn->kn_fflags != 0); 2039 } 2040 2041 /* 2042 * Helper to move VM pages between objects 2043 * 2044 * NOTE: The vm_page_rename() dirties the page, so we can clear the 2045 * PG_NEED_COMMIT flag. If the pages are being moved into tn_aobj, 2046 * the pageout daemon will be able to page them out. 2047 */ 2048 static int 2049 tmpfs_move_pages_callback(vm_page_t p, void *data) 2050 { 2051 struct rb_vm_page_scan_info *info = data; 2052 vm_pindex_t pindex; 2053 2054 pindex = p->pindex; 2055 if (vm_page_busy_try(p, TRUE)) { 2056 vm_page_sleep_busy(p, TRUE, "tpgmov"); 2057 info->error = -1; 2058 return -1; 2059 } 2060 if (p->object != info->object || p->pindex != pindex) { 2061 vm_page_wakeup(p); 2062 info->error = -1; 2063 return -1; 2064 } 2065 2066 if ((info->pagerflags & TMPFS_MOVF_FROMBACKING) && 2067 (p->flags & PG_SWAPPED) && 2068 (p->flags & PG_NEED_COMMIT) == 0 && 2069 p->dirty == 0) { 2070 /* 2071 * If the page in the backing aobj was paged out to swap 2072 * it will be clean and it is better to free it rather 2073 * than re-dirty it. We will assume that the page was 2074 * paged out to swap for a reason! 2075 * 2076 * This helps avoid unnecessary swap thrashing on the page. 2077 */ 2078 vm_page_free(p); 2079 } else if ((info->pagerflags & TMPFS_MOVF_FROMBACKING) == 0 && 2080 (p->flags & PG_NEED_COMMIT) == 0 && 2081 p->dirty == 0) { 2082 /* 2083 * If the page associated with the vnode was cleaned via 2084 * a tmpfs_strategy() call, it exists as a swap block in 2085 * aobj and it is again better to free it rather than 2086 * re-dirty it. We will assume that the page was 2087 * paged out to swap for a reason! 2088 * 2089 * This helps avoid unnecessary swap thrashing on the page. 2090 */ 2091 vm_page_free(p); 2092 } else { 2093 /* 2094 * Rename the page, which will also ensure that it is flagged 2095 * as dirty and check whether a swap block association exists 2096 * in the target object or not, setting appropriate flags if 2097 * it does. 2098 */ 2099 vm_page_rename(p, info->dest_object, pindex); 2100 vm_page_clear_commit(p); 2101 if (info->pagerflags & TMPFS_MOVF_DEACTIVATE) 2102 vm_page_deactivate(p); 2103 vm_page_wakeup(p); 2104 /* page automaticaly made dirty */ 2105 } 2106 2107 return 0; 2108 } 2109 2110 static 2111 void 2112 tmpfs_move_pages(vm_object_t src, vm_object_t dst, int movflags) 2113 { 2114 struct rb_vm_page_scan_info info; 2115 2116 vm_object_hold(src); 2117 vm_object_hold(dst); 2118 info.object = src; 2119 info.dest_object = dst; 2120 info.pagerflags = movflags; 2121 do { 2122 if (src->paging_in_progress) 2123 vm_object_pip_wait(src, "objtfs"); 2124 info.error = 1; 2125 vm_page_rb_tree_RB_SCAN(&src->rb_memq, NULL, 2126 tmpfs_move_pages_callback, &info); 2127 } while (info.error < 0 || !RB_EMPTY(&src->rb_memq) || 2128 src->paging_in_progress); 2129 vm_object_drop(dst); 2130 vm_object_drop(src); 2131 } 2132 2133 /* --------------------------------------------------------------------- */ 2134 2135 /* 2136 * vnode operations vector used for files stored in a tmpfs file system. 2137 */ 2138 struct vop_ops tmpfs_vnode_vops = { 2139 .vop_default = vop_defaultop, 2140 .vop_getpages = vop_stdgetpages, 2141 .vop_putpages = vop_stdputpages, 2142 .vop_ncreate = tmpfs_ncreate, 2143 .vop_nresolve = tmpfs_nresolve, 2144 .vop_nlookupdotdot = tmpfs_nlookupdotdot, 2145 .vop_nmknod = tmpfs_nmknod, 2146 .vop_open = tmpfs_open, 2147 .vop_close = tmpfs_close, 2148 .vop_access = tmpfs_access, 2149 .vop_getattr = tmpfs_getattr, 2150 .vop_getattr_quick = tmpfs_getattr_quick, 2151 .vop_setattr = tmpfs_setattr, 2152 .vop_read = tmpfs_read, 2153 .vop_write = tmpfs_write, 2154 .vop_fsync = tmpfs_fsync, 2155 .vop_mountctl = tmpfs_mountctl, 2156 .vop_nremove = tmpfs_nremove, 2157 .vop_nlink = tmpfs_nlink, 2158 .vop_nrename = tmpfs_nrename, 2159 .vop_nmkdir = tmpfs_nmkdir, 2160 .vop_nrmdir = tmpfs_nrmdir, 2161 .vop_nsymlink = tmpfs_nsymlink, 2162 .vop_readdir = tmpfs_readdir, 2163 .vop_readlink = tmpfs_readlink, 2164 .vop_inactive = tmpfs_inactive, 2165 .vop_reclaim = tmpfs_reclaim, 2166 .vop_print = tmpfs_print, 2167 .vop_pathconf = tmpfs_pathconf, 2168 .vop_bmap = tmpfs_bmap, 2169 .vop_strategy = tmpfs_strategy, 2170 .vop_advlock = tmpfs_advlock, 2171 .vop_kqfilter = tmpfs_kqfilter 2172 }; 2173