xref: /dragonfly/sys/vfs/tmpfs/tmpfs_vnops.c (revision 1487f786)
1 /*-
2  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to The NetBSD Foundation
6  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
7  * 2005 program.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $
31  */
32 
33 /*
34  * tmpfs vnode interface.
35  */
36 
37 #include <sys/kernel.h>
38 #include <sys/kern_syscall.h>
39 #include <sys/param.h>
40 #include <sys/uio.h>
41 #include <sys/fcntl.h>
42 #include <sys/lockf.h>
43 #include <sys/priv.h>
44 #include <sys/proc.h>
45 #include <sys/resourcevar.h>
46 #include <sys/sched.h>
47 #include <sys/stat.h>
48 #include <sys/systm.h>
49 #include <sys/sysctl.h>
50 #include <sys/unistd.h>
51 #include <sys/vfsops.h>
52 #include <sys/vnode.h>
53 #include <sys/mountctl.h>
54 
55 #include <vm/vm.h>
56 #include <vm/vm_extern.h>
57 #include <vm/vm_object.h>
58 #include <vm/vm_page.h>
59 #include <vm/vm_pageout.h>
60 #include <vm/vm_pager.h>
61 #include <vm/swap_pager.h>
62 
63 #include <sys/buf2.h>
64 #include <vm/vm_page2.h>
65 
66 #include <vfs/fifofs/fifo.h>
67 #include <vfs/tmpfs/tmpfs_vnops.h>
68 #include "tmpfs.h"
69 
70 static void tmpfs_strategy_done(struct bio *bio);
71 static void tmpfs_move_pages(vm_object_t src, vm_object_t dst);
72 
73 static int tmpfs_cluster_enable = 1;
74 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "TMPFS filesystem");
75 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, cluster_enable, CTLFLAG_RW,
76 		&tmpfs_cluster_enable, 0, "");
77 
78 static __inline
79 void
80 tmpfs_knote(struct vnode *vp, int flags)
81 {
82 	if (flags)
83 		KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
84 }
85 
86 
87 /* --------------------------------------------------------------------- */
88 
89 static int
90 tmpfs_nresolve(struct vop_nresolve_args *ap)
91 {
92 	struct vnode *dvp = ap->a_dvp;
93 	struct vnode *vp = NULL;
94 	struct namecache *ncp = ap->a_nch->ncp;
95 	struct tmpfs_node *tnode;
96 	struct tmpfs_dirent *de;
97 	struct tmpfs_node *dnode;
98 	int error;
99 
100 	dnode = VP_TO_TMPFS_DIR(dvp);
101 
102 	TMPFS_NODE_LOCK_SH(dnode);
103 loop:
104 	de = tmpfs_dir_lookup(dnode, NULL, ncp);
105 	if (de == NULL) {
106 		error = ENOENT;
107 	} else {
108 		/*
109 		 * Allocate a vnode for the node we found.  Use
110 		 * tmpfs_alloc_vp()'s deadlock handling mode.
111 		 */
112 		tnode = de->td_node;
113 		error = tmpfs_alloc_vp(dvp->v_mount, dnode, tnode,
114 				       LK_EXCLUSIVE | LK_RETRY, &vp);
115 		if (error == EAGAIN)
116 			goto loop;
117 		if (error)
118 			goto out;
119 		KKASSERT(vp);
120 	}
121 
122 out:
123 	TMPFS_NODE_UNLOCK(dnode);
124 
125 	if ((dnode->tn_status & TMPFS_NODE_ACCESSED) == 0) {
126 		TMPFS_NODE_LOCK(dnode);
127 		dnode->tn_status |= TMPFS_NODE_ACCESSED;
128 		TMPFS_NODE_UNLOCK(dnode);
129 	}
130 
131 	/*
132 	 * Store the result of this lookup in the cache.  Avoid this if the
133 	 * request was for creation, as it does not improve timings on
134 	 * emprical tests.
135 	 */
136 	if (vp) {
137 		vn_unlock(vp);
138 		cache_setvp(ap->a_nch, vp);
139 		vrele(vp);
140 	} else if (error == ENOENT) {
141 		cache_setvp(ap->a_nch, NULL);
142 	}
143 	return (error);
144 }
145 
146 static int
147 tmpfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
148 {
149 	struct vnode *dvp = ap->a_dvp;
150 	struct vnode **vpp = ap->a_vpp;
151 	struct tmpfs_node *dnode = VP_TO_TMPFS_NODE(dvp);
152 	struct ucred *cred = ap->a_cred;
153 	int error;
154 
155 	*vpp = NULL;
156 
157 	/* Check accessibility of requested node as a first step. */
158 	error = VOP_ACCESS(dvp, VEXEC, cred);
159 	if (error != 0)
160 		return error;
161 
162 	if (dnode->tn_dir.tn_parent != NULL) {
163 		/* Allocate a new vnode on the matching entry. */
164 		error = tmpfs_alloc_vp(dvp->v_mount,
165 				       NULL, dnode->tn_dir.tn_parent,
166 				       LK_EXCLUSIVE | LK_RETRY, vpp);
167 
168 		if (*vpp)
169 			vn_unlock(*vpp);
170 	}
171 	return (*vpp == NULL) ? ENOENT : 0;
172 }
173 
174 /* --------------------------------------------------------------------- */
175 
176 static int
177 tmpfs_ncreate(struct vop_ncreate_args *ap)
178 {
179 	struct vnode *dvp = ap->a_dvp;
180 	struct vnode **vpp = ap->a_vpp;
181 	struct namecache *ncp = ap->a_nch->ncp;
182 	struct vattr *vap = ap->a_vap;
183 	struct ucred *cred = ap->a_cred;
184 	int error;
185 
186 	KKASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
187 
188 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
189 	if (error == 0) {
190 		cache_setunresolved(ap->a_nch);
191 		cache_setvp(ap->a_nch, *vpp);
192 		tmpfs_knote(dvp, NOTE_WRITE);
193 	}
194 	return (error);
195 }
196 /* --------------------------------------------------------------------- */
197 
198 static int
199 tmpfs_nmknod(struct vop_nmknod_args *ap)
200 {
201 	struct vnode *dvp = ap->a_dvp;
202 	struct vnode **vpp = ap->a_vpp;
203 	struct namecache *ncp = ap->a_nch->ncp;
204 	struct vattr *vap = ap->a_vap;
205 	struct ucred *cred = ap->a_cred;
206 	int error;
207 
208 	if (vap->va_type != VBLK && vap->va_type != VCHR &&
209 	    vap->va_type != VFIFO) {
210 		return (EINVAL);
211 	}
212 
213 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
214 	if (error == 0) {
215 		cache_setunresolved(ap->a_nch);
216 		cache_setvp(ap->a_nch, *vpp);
217 		tmpfs_knote(dvp, NOTE_WRITE);
218 	}
219 	return error;
220 }
221 
222 /* --------------------------------------------------------------------- */
223 
224 static int
225 tmpfs_open(struct vop_open_args *ap)
226 {
227 	struct vnode *vp = ap->a_vp;
228 	int mode = ap->a_mode;
229 	struct tmpfs_node *node;
230 	int error;
231 
232 	node = VP_TO_TMPFS_NODE(vp);
233 
234 #if 0
235 	/* The file is still active but all its names have been removed
236 	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
237 	 * it is about to die. */
238 	if (node->tn_links < 1)
239 		return (ENOENT);
240 #endif
241 
242 	/* If the file is marked append-only, deny write requests. */
243 	if ((node->tn_flags & APPEND) &&
244 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
245 		error = EPERM;
246 	} else {
247 		if (node->tn_reg.tn_pages_in_aobj) {
248 			TMPFS_NODE_LOCK(node);
249 			if (node->tn_reg.tn_pages_in_aobj) {
250 				tmpfs_move_pages(node->tn_reg.tn_aobj,
251 						 vp->v_object);
252 				node->tn_reg.tn_pages_in_aobj = 0;
253 			}
254 			TMPFS_NODE_UNLOCK(node);
255 		}
256 		error = vop_stdopen(ap);
257 	}
258 
259 	return (error);
260 }
261 
262 /* --------------------------------------------------------------------- */
263 
264 static int
265 tmpfs_close(struct vop_close_args *ap)
266 {
267 	struct vnode *vp = ap->a_vp;
268 	struct tmpfs_node *node;
269 	int error;
270 
271 	node = VP_TO_TMPFS_NODE(vp);
272 
273 	if (node->tn_links > 0) {
274 		/*
275 		 * Update node times.  No need to do it if the node has
276 		 * been deleted, because it will vanish after we return.
277 		 */
278 		tmpfs_update(vp);
279 	}
280 
281 	error = vop_stdclose(ap);
282 
283 	return (error);
284 }
285 
286 /* --------------------------------------------------------------------- */
287 
288 int
289 tmpfs_access(struct vop_access_args *ap)
290 {
291 	struct vnode *vp = ap->a_vp;
292 	int error;
293 	struct tmpfs_node *node;
294 
295 	node = VP_TO_TMPFS_NODE(vp);
296 
297 	switch (vp->v_type) {
298 	case VDIR:
299 		/* FALLTHROUGH */
300 	case VLNK:
301 		/* FALLTHROUGH */
302 	case VREG:
303 		if ((ap->a_mode & VWRITE) &&
304 	            (vp->v_mount->mnt_flag & MNT_RDONLY)) {
305 			error = EROFS;
306 			goto out;
307 		}
308 		break;
309 
310 	case VBLK:
311 		/* FALLTHROUGH */
312 	case VCHR:
313 		/* FALLTHROUGH */
314 	case VSOCK:
315 		/* FALLTHROUGH */
316 	case VFIFO:
317 		break;
318 
319 	default:
320 		error = EINVAL;
321 		goto out;
322 	}
323 
324 	if ((ap->a_mode & VWRITE) && (node->tn_flags & IMMUTABLE)) {
325 		error = EPERM;
326 		goto out;
327 	}
328 
329 	error = vop_helper_access(ap, node->tn_uid, node->tn_gid,
330 			          node->tn_mode, 0);
331 out:
332 	return error;
333 }
334 
335 /* --------------------------------------------------------------------- */
336 
337 int
338 tmpfs_getattr(struct vop_getattr_args *ap)
339 {
340 	struct vnode *vp = ap->a_vp;
341 	struct vattr *vap = ap->a_vap;
342 	struct tmpfs_node *node;
343 
344 	node = VP_TO_TMPFS_NODE(vp);
345 
346 	tmpfs_update(vp);
347 
348 	TMPFS_NODE_LOCK_SH(node);
349 	vap->va_type = vp->v_type;
350 	vap->va_mode = node->tn_mode;
351 	vap->va_nlink = node->tn_links;
352 	vap->va_uid = node->tn_uid;
353 	vap->va_gid = node->tn_gid;
354 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
355 	vap->va_fileid = node->tn_id;
356 	vap->va_size = node->tn_size;
357 	vap->va_blocksize = PAGE_SIZE;
358 	vap->va_atime.tv_sec = node->tn_atime;
359 	vap->va_atime.tv_nsec = node->tn_atimensec;
360 	vap->va_mtime.tv_sec = node->tn_mtime;
361 	vap->va_mtime.tv_nsec = node->tn_mtimensec;
362 	vap->va_ctime.tv_sec = node->tn_ctime;
363 	vap->va_ctime.tv_nsec = node->tn_ctimensec;
364 	vap->va_gen = node->tn_gen;
365 	vap->va_flags = node->tn_flags;
366 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
367 		vap->va_rmajor = umajor(node->tn_rdev);
368 		vap->va_rminor = uminor(node->tn_rdev);
369 	}
370 	vap->va_bytes = round_page(node->tn_size);
371 	vap->va_filerev = 0;
372 	TMPFS_NODE_UNLOCK(node);
373 
374 	return 0;
375 }
376 
377 /* --------------------------------------------------------------------- */
378 
379 int
380 tmpfs_setattr(struct vop_setattr_args *ap)
381 {
382 	struct vnode *vp = ap->a_vp;
383 	struct vattr *vap = ap->a_vap;
384 	struct ucred *cred = ap->a_cred;
385 	struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
386 	int error = 0;
387 	int kflags = 0;
388 
389 	TMPFS_NODE_LOCK(node);
390 	if (error == 0 && (vap->va_flags != VNOVAL)) {
391 		error = tmpfs_chflags(vp, vap->va_flags, cred);
392 		kflags |= NOTE_ATTRIB;
393 	}
394 
395 	if (error == 0 && (vap->va_size != VNOVAL)) {
396 		/* restore any saved pages before proceeding */
397 		if (node->tn_reg.tn_pages_in_aobj) {
398 			tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
399 			node->tn_reg.tn_pages_in_aobj = 0;
400 		}
401 		if (vap->va_size > node->tn_size)
402 			kflags |= NOTE_WRITE | NOTE_EXTEND;
403 		else
404 			kflags |= NOTE_WRITE;
405 		error = tmpfs_chsize(vp, vap->va_size, cred);
406 	}
407 
408 	if (error == 0 && (vap->va_uid != (uid_t)VNOVAL ||
409 			   vap->va_gid != (gid_t)VNOVAL)) {
410 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred);
411 		kflags |= NOTE_ATTRIB;
412 	}
413 
414 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) {
415 		error = tmpfs_chmod(vp, vap->va_mode, cred);
416 		kflags |= NOTE_ATTRIB;
417 	}
418 
419 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
420 	    vap->va_atime.tv_nsec != VNOVAL) ||
421 	    (vap->va_mtime.tv_sec != VNOVAL &&
422 	    vap->va_mtime.tv_nsec != VNOVAL) )) {
423 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
424 				      vap->va_vaflags, cred);
425 		kflags |= NOTE_ATTRIB;
426 	}
427 
428 	/*
429 	 * Update the node times.  We give preference to the error codes
430 	 * generated by this function rather than the ones that may arise
431 	 * from tmpfs_update.
432 	 */
433 	tmpfs_update(vp);
434 	TMPFS_NODE_UNLOCK(node);
435 	tmpfs_knote(vp, kflags);
436 
437 	return (error);
438 }
439 
440 /* --------------------------------------------------------------------- */
441 
442 /*
443  * fsync is usually a NOP, but we must take action when unmounting or
444  * when recycling.
445  */
446 static int
447 tmpfs_fsync(struct vop_fsync_args *ap)
448 {
449 	struct tmpfs_node *node;
450 	struct vnode *vp = ap->a_vp;
451 
452 	node = VP_TO_TMPFS_NODE(vp);
453 
454 	/*
455 	 * tmpfs vnodes typically remain dirty, avoid long syncer scans
456 	 * by forcing removal from the syncer list.
457 	 */
458 	vn_syncer_remove(vp, 1);
459 
460 	tmpfs_update(vp);
461 	if (vp->v_type == VREG) {
462 		if (vp->v_flag & VRECLAIMED) {
463 			if (node->tn_links == 0)
464 				tmpfs_truncate(vp, 0);
465 			else
466 				vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
467 		}
468 	}
469 
470 	return 0;
471 }
472 
473 /* --------------------------------------------------------------------- */
474 
475 static int
476 tmpfs_read(struct vop_read_args *ap)
477 {
478 	struct buf *bp;
479 	struct vnode *vp = ap->a_vp;
480 	struct uio *uio = ap->a_uio;
481 	struct tmpfs_node *node;
482 	off_t base_offset;
483 	size_t offset;
484 	size_t len;
485 	size_t resid;
486 	int error;
487 	int seqcount;
488 
489 	/*
490 	 * Check the basics
491 	 */
492 	if (uio->uio_offset < 0)
493 		return (EINVAL);
494 	if (vp->v_type != VREG)
495 		return (EINVAL);
496 
497 	/*
498 	 * Extract node, try to shortcut the operation through
499 	 * the VM page cache, allowing us to avoid buffer cache
500 	 * overheads.
501 	 */
502 	node = VP_TO_TMPFS_NODE(vp);
503         resid = uio->uio_resid;
504 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
505         error = vop_helper_read_shortcut(ap);
506         if (error)
507                 return error;
508         if (uio->uio_resid == 0) {
509 		if (resid)
510 			goto finished;
511 		return error;
512 	}
513 
514 	/*
515 	 * restore any saved pages before proceeding
516 	 */
517 	if (node->tn_reg.tn_pages_in_aobj) {
518 		TMPFS_NODE_LOCK(node);
519 		if (node->tn_reg.tn_pages_in_aobj) {
520 			tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
521 			node->tn_reg.tn_pages_in_aobj = 0;
522 		}
523 		TMPFS_NODE_UNLOCK(node);
524 	}
525 
526 	/*
527 	 * Fall-through to our normal read code.
528 	 */
529 	while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
530 		/*
531 		 * Use buffer cache I/O (via tmpfs_strategy)
532 		 */
533 		offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
534 		base_offset = (off_t)uio->uio_offset - offset;
535 		bp = getcacheblk(vp, base_offset, TMPFS_BLKSIZE, GETBLK_KVABIO);
536 		if (bp == NULL) {
537 			if (tmpfs_cluster_enable) {
538 				error = cluster_readx(vp, node->tn_size,
539 						     base_offset,
540 						     TMPFS_BLKSIZE,
541 						     B_NOTMETA | B_KVABIO,
542 						     uio->uio_resid,
543 						     seqcount * MAXBSIZE,
544 						     &bp);
545 			} else {
546 				error = bread_kvabio(vp, base_offset,
547 						     TMPFS_BLKSIZE, &bp);
548 			}
549 			if (error) {
550 				brelse(bp);
551 				kprintf("tmpfs_read bread error %d\n", error);
552 				break;
553 			}
554 
555 			/*
556 			 * tmpfs pretty much fiddles directly with the VM
557 			 * system, don't let it exhaust it or we won't play
558 			 * nice with other processes.
559 			 *
560 			 * Only do this if the VOP is coming from a normal
561 			 * read/write.  The VM system handles the case for
562 			 * UIO_NOCOPY.
563 			 */
564 			if (uio->uio_segflg != UIO_NOCOPY)
565 				vm_wait_nominal();
566 		}
567 		bp->b_flags |= B_CLUSTEROK;
568 		bkvasync(bp);
569 
570 		/*
571 		 * Figure out how many bytes we can actually copy this loop.
572 		 */
573 		len = TMPFS_BLKSIZE - offset;
574 		if (len > uio->uio_resid)
575 			len = uio->uio_resid;
576 		if (len > node->tn_size - uio->uio_offset)
577 			len = (size_t)(node->tn_size - uio->uio_offset);
578 
579 		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
580 		bqrelse(bp);
581 		if (error) {
582 			kprintf("tmpfs_read uiomove error %d\n", error);
583 			break;
584 		}
585 	}
586 
587 finished:
588 	if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
589 		TMPFS_NODE_LOCK(node);
590 		node->tn_status |= TMPFS_NODE_ACCESSED;
591 		TMPFS_NODE_UNLOCK(node);
592 	}
593 	return (error);
594 }
595 
596 static int
597 tmpfs_write(struct vop_write_args *ap)
598 {
599 	struct buf *bp;
600 	struct vnode *vp = ap->a_vp;
601 	struct uio *uio = ap->a_uio;
602 	struct thread *td = uio->uio_td;
603 	struct tmpfs_node *node;
604 	boolean_t extended;
605 	off_t oldsize;
606 	int error;
607 	off_t base_offset;
608 	size_t offset;
609 	size_t len;
610 	struct rlimit limit;
611 	int trivial = 0;
612 	int kflags = 0;
613 	int seqcount;
614 
615 	error = 0;
616 	if (uio->uio_resid == 0) {
617 		return error;
618 	}
619 
620 	node = VP_TO_TMPFS_NODE(vp);
621 
622 	if (vp->v_type != VREG)
623 		return (EINVAL);
624 	seqcount = ap->a_ioflag >> IO_SEQSHIFT;
625 
626 	TMPFS_NODE_LOCK(node);
627 
628 	/*
629 	 * restore any saved pages before proceeding
630 	 */
631 	if (node->tn_reg.tn_pages_in_aobj) {
632 		tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
633 		node->tn_reg.tn_pages_in_aobj = 0;
634 	}
635 
636 	oldsize = node->tn_size;
637 	if (ap->a_ioflag & IO_APPEND)
638 		uio->uio_offset = node->tn_size;
639 
640 	/*
641 	 * Check for illegal write offsets.
642 	 */
643 	if (uio->uio_offset + uio->uio_resid >
644 	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) {
645 		error = EFBIG;
646 		goto done;
647 	}
648 
649 	/*
650 	 * NOTE: Ignore if UIO does not come from a user thread (e.g. VN).
651 	 */
652 	if (vp->v_type == VREG && td != NULL && td->td_lwp != NULL) {
653 		error = kern_getrlimit(RLIMIT_FSIZE, &limit);
654 		if (error)
655 			goto done;
656 		if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) {
657 			ksignal(td->td_proc, SIGXFSZ);
658 			error = EFBIG;
659 			goto done;
660 		}
661 	}
662 
663 	/*
664 	 * Extend the file's size if necessary
665 	 */
666 	extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size);
667 
668 	while (uio->uio_resid > 0) {
669 		/*
670 		 * Don't completely blow out running buffer I/O
671 		 * when being hit from the pageout daemon.
672 		 */
673 		if (uio->uio_segflg == UIO_NOCOPY &&
674 		    (ap->a_ioflag & IO_RECURSE) == 0) {
675 			bwillwrite(TMPFS_BLKSIZE);
676 		}
677 
678 		/*
679 		 * Use buffer cache I/O (via tmpfs_strategy)
680 		 */
681 		offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
682 		base_offset = (off_t)uio->uio_offset - offset;
683 		len = TMPFS_BLKSIZE - offset;
684 		if (len > uio->uio_resid)
685 			len = uio->uio_resid;
686 
687 		if ((uio->uio_offset + len) > node->tn_size) {
688 			trivial = (uio->uio_offset <= node->tn_size);
689 			error = tmpfs_reg_resize(vp, uio->uio_offset + len,
690 						 trivial);
691 			if (error)
692 				break;
693 		}
694 
695 		/*
696 		 * Read to fill in any gaps.  Theoretically we could
697 		 * optimize this if the write covers the entire buffer
698 		 * and is not a UIO_NOCOPY write, however this can lead
699 		 * to a security violation exposing random kernel memory
700 		 * (whatever junk was in the backing VM pages before).
701 		 *
702 		 * So just use bread() to do the right thing.
703 		 */
704 		error = bread_kvabio(vp, base_offset, TMPFS_BLKSIZE, &bp);
705 		bkvasync(bp);
706 		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
707 		if (error) {
708 			kprintf("tmpfs_write uiomove error %d\n", error);
709 			brelse(bp);
710 			break;
711 		}
712 
713 		if (uio->uio_offset > node->tn_size) {
714 			node->tn_size = uio->uio_offset;
715 			kflags |= NOTE_EXTEND;
716 		}
717 		kflags |= NOTE_WRITE;
718 
719 		/*
720 		 * Always try to flush the page in the UIO_NOCOPY case.  This
721 		 * can come from the pageout daemon or during vnode eviction.
722 		 * It is not necessarily going to be marked IO_ASYNC/IO_SYNC.
723 		 *
724 		 * For the normal case we buwrite(), dirtying the underlying
725 		 * VM pages instead of dirtying the buffer and releasing the
726 		 * buffer as a clean buffer.  This allows tmpfs to use
727 		 * essentially all available memory to cache file data.
728 		 * If we used bdwrite() the buffer cache would wind up
729 		 * flushing the data to swap too quickly.
730 		 *
731 		 * But because tmpfs can seriously load the VM system we
732 		 * fall-back to using bdwrite() when free memory starts
733 		 * to get low.  This shifts the load away from the VM system
734 		 * and makes tmpfs act more like a normal filesystem with
735 		 * regards to disk activity.
736 		 *
737 		 * tmpfs pretty much fiddles directly with the VM
738 		 * system, don't let it exhaust it or we won't play
739 		 * nice with other processes.  Only do this if the
740 		 * VOP is coming from a normal read/write.  The VM system
741 		 * handles the case for UIO_NOCOPY.
742 		 */
743 		bp->b_flags |= B_CLUSTEROK;
744 		if (uio->uio_segflg == UIO_NOCOPY) {
745 			/*
746 			 * Flush from the pageout daemon, deal with
747 			 * potentially very heavy tmpfs write activity
748 			 * causing long stalls in the pageout daemon
749 			 * before pages get to free/cache.
750 			 *
751 			 * (a) Under severe pressure setting B_DIRECT will
752 			 *     cause a buffer release to try to free the
753 			 *     underlying pages.
754 			 *
755 			 * (b) Under modest memory pressure the B_RELBUF
756 			 *     alone is sufficient to get the pages moved
757 			 *     to the cache.  We could also force this by
758 			 *     setting B_NOTMETA but that might have other
759 			 *     unintended side-effects (e.g. setting
760 			 *     PG_NOTMETA on the VM page).
761 			 *
762 			 * Hopefully this will unblock the VM system more
763 			 * quickly under extreme tmpfs write load.
764 			 */
765 			if (vm_page_count_min(vm_page_free_hysteresis))
766 				bp->b_flags |= B_DIRECT;
767 			bp->b_flags |= B_AGE | B_RELBUF;
768 			bp->b_act_count = 0;	/* buffer->deactivate pgs */
769 			cluster_awrite(bp);
770 		} else if (vm_page_count_target()) {
771 			/*
772 			 * Normal (userland) write but we are low on memory,
773 			 * run the buffer the buffer cache.
774 			 */
775 			bp->b_act_count = 0;	/* buffer->deactivate pgs */
776 			bdwrite(bp);
777 		} else {
778 			/*
779 			 * Otherwise run the buffer directly through to the
780 			 * backing VM store.
781 			 */
782 			buwrite(bp);
783 			/*vm_wait_nominal();*/
784 		}
785 
786 		if (bp->b_error) {
787 			kprintf("tmpfs_write bwrite error %d\n", bp->b_error);
788 			break;
789 		}
790 	}
791 
792 	if (error) {
793 		if (extended) {
794 			(void)tmpfs_reg_resize(vp, oldsize, trivial);
795 			kflags &= ~NOTE_EXTEND;
796 		}
797 		goto done;
798 	}
799 
800 	/*
801 	 * Currently we don't set the mtime on files modified via mmap()
802 	 * because we can't tell the difference between those modifications
803 	 * and an attempt by the pageout daemon to flush tmpfs pages to
804 	 * swap.
805 	 *
806 	 * This is because in order to defer flushes as long as possible
807 	 * buwrite() works by marking the underlying VM pages dirty in
808 	 * order to be able to dispose of the buffer cache buffer without
809 	 * flushing it.
810 	 */
811 	if (uio->uio_segflg == UIO_NOCOPY) {
812 		if (vp->v_flag & VLASTWRITETS) {
813 			node->tn_mtime = vp->v_lastwrite_ts.tv_sec;
814 			node->tn_mtimensec = vp->v_lastwrite_ts.tv_nsec;
815 		}
816 	} else {
817 		node->tn_status |= TMPFS_NODE_MODIFIED;
818 		vclrflags(vp, VLASTWRITETS);
819 	}
820 
821 	if (extended)
822 		node->tn_status |= TMPFS_NODE_CHANGED;
823 
824 	if (node->tn_mode & (S_ISUID | S_ISGID)) {
825 		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
826 			node->tn_mode &= ~(S_ISUID | S_ISGID);
827 	}
828 done:
829 	TMPFS_NODE_UNLOCK(node);
830 	if (kflags)
831 		tmpfs_knote(vp, kflags);
832 
833 	return(error);
834 }
835 
836 static int
837 tmpfs_advlock(struct vop_advlock_args *ap)
838 {
839 	struct tmpfs_node *node;
840 	struct vnode *vp = ap->a_vp;
841 	int error;
842 
843 	node = VP_TO_TMPFS_NODE(vp);
844 	error = (lf_advlock(ap, &node->tn_advlock, node->tn_size));
845 
846 	return (error);
847 }
848 
849 /*
850  * The strategy function is typically only called when memory pressure
851  * forces the system to attempt to pageout pages.  It can also be called
852  * by [n]vtruncbuf() when a truncation cuts a page in half.  Normal write
853  * operations
854  *
855  * We set VKVABIO for VREG files so bp->b_data may not be synchronized to
856  * our cpu.  swap_pager_strategy() is all we really use, and it directly
857  * supports this.
858  */
859 static int
860 tmpfs_strategy(struct vop_strategy_args *ap)
861 {
862 	struct bio *bio = ap->a_bio;
863 	struct bio *nbio;
864 	struct buf *bp = bio->bio_buf;
865 	struct vnode *vp = ap->a_vp;
866 	struct tmpfs_node *node;
867 	vm_object_t uobj;
868 	vm_page_t m;
869 	int i;
870 
871 	if (vp->v_type != VREG) {
872 		bp->b_resid = bp->b_bcount;
873 		bp->b_flags |= B_ERROR | B_INVAL;
874 		bp->b_error = EINVAL;
875 		biodone(bio);
876 		return(0);
877 	}
878 
879 	node = VP_TO_TMPFS_NODE(vp);
880 
881 	uobj = node->tn_reg.tn_aobj;
882 
883 	/*
884 	 * Don't bother flushing to swap if there is no swap, just
885 	 * ensure that the pages are marked as needing a commit (still).
886 	 */
887 	if (bp->b_cmd == BUF_CMD_WRITE && vm_swap_size == 0) {
888 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
889 			m = bp->b_xio.xio_pages[i];
890 			vm_page_need_commit(m);
891 		}
892 		bp->b_resid = 0;
893 		bp->b_error = 0;
894 		biodone(bio);
895 	} else {
896 		nbio = push_bio(bio);
897 		nbio->bio_done = tmpfs_strategy_done;
898 		nbio->bio_offset = bio->bio_offset;
899 		swap_pager_strategy(uobj, nbio);
900 	}
901 	return 0;
902 }
903 
904 /*
905  * If we were unable to commit the pages to swap make sure they are marked
906  * as needing a commit (again).  If we were, clear the flag to allow the
907  * pages to be freed.
908  *
909  * Do not error-out the buffer.  In particular, vinvalbuf() needs to
910  * always work.
911  */
912 static void
913 tmpfs_strategy_done(struct bio *bio)
914 {
915 	struct buf *bp;
916 	vm_page_t m;
917 	int i;
918 
919 	bp = bio->bio_buf;
920 
921 	if (bp->b_flags & B_ERROR) {
922 		bp->b_flags &= ~B_ERROR;
923 		bp->b_error = 0;
924 		bp->b_resid = 0;
925 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
926 			m = bp->b_xio.xio_pages[i];
927 			vm_page_need_commit(m);
928 		}
929 	} else {
930 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
931 			m = bp->b_xio.xio_pages[i];
932 			vm_page_clear_commit(m);
933 		}
934 	}
935 	bio = pop_bio(bio);
936 	biodone(bio);
937 }
938 
939 static int
940 tmpfs_bmap(struct vop_bmap_args *ap)
941 {
942 	if (ap->a_doffsetp != NULL)
943 		*ap->a_doffsetp = ap->a_loffset;
944 	if (ap->a_runp != NULL)
945 		*ap->a_runp = 0;
946 	if (ap->a_runb != NULL)
947 		*ap->a_runb = 0;
948 
949 	return 0;
950 }
951 
952 /* --------------------------------------------------------------------- */
953 
954 static int
955 tmpfs_nremove(struct vop_nremove_args *ap)
956 {
957 	struct vnode *dvp = ap->a_dvp;
958 	struct namecache *ncp = ap->a_nch->ncp;
959 	struct vnode *vp;
960 	int error;
961 	struct tmpfs_dirent *de;
962 	struct tmpfs_mount *tmp;
963 	struct tmpfs_node *dnode;
964 	struct tmpfs_node *node;
965 
966 	/*
967 	 * We have to acquire the vp from ap->a_nch because we will likely
968 	 * unresolve the namecache entry, and a vrele/vput is needed to
969 	 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
970 	 *
971 	 * We have to use vget to clear any inactive state on the vnode,
972 	 * otherwise the vnode may remain inactive and thus tmpfs_inactive
973 	 * will not get called when we release it.
974 	 */
975 	error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
976 	KKASSERT(vp->v_mount == dvp->v_mount);
977 	KKASSERT(error == 0);
978 	vn_unlock(vp);
979 
980 	if (vp->v_type == VDIR) {
981 		error = EISDIR;
982 		goto out2;
983 	}
984 
985 	dnode = VP_TO_TMPFS_DIR(dvp);
986 	node = VP_TO_TMPFS_NODE(vp);
987 	tmp = VFS_TO_TMPFS(vp->v_mount);
988 
989 	TMPFS_NODE_LOCK(dnode);
990 	de = tmpfs_dir_lookup(dnode, node, ncp);
991 	if (de == NULL) {
992 		error = ENOENT;
993 		TMPFS_NODE_UNLOCK(dnode);
994 		goto out;
995 	}
996 
997 	/* Files marked as immutable or append-only cannot be deleted. */
998 	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
999 	    (dnode->tn_flags & APPEND)) {
1000 		error = EPERM;
1001 		TMPFS_NODE_UNLOCK(dnode);
1002 		goto out;
1003 	}
1004 
1005 	/* Remove the entry from the directory; as it is a file, we do not
1006 	 * have to change the number of hard links of the directory. */
1007 	tmpfs_dir_detach(dnode, de);
1008 	TMPFS_NODE_UNLOCK(dnode);
1009 
1010 	/* Free the directory entry we just deleted.  Note that the node
1011 	 * referred by it will not be removed until the vnode is really
1012 	 * reclaimed. */
1013 	tmpfs_free_dirent(tmp, de);
1014 
1015 	if (node->tn_links > 0) {
1016 	        TMPFS_NODE_LOCK(node);
1017 		node->tn_status |= TMPFS_NODE_CHANGED;
1018 	        TMPFS_NODE_UNLOCK(node);
1019 	}
1020 
1021 	cache_unlink(ap->a_nch);
1022 	tmpfs_knote(vp, NOTE_DELETE);
1023 	error = 0;
1024 
1025 out:
1026 	if (error == 0)
1027 		tmpfs_knote(dvp, NOTE_WRITE);
1028 out2:
1029 	vrele(vp);
1030 
1031 	return error;
1032 }
1033 
1034 /* --------------------------------------------------------------------- */
1035 
1036 static int
1037 tmpfs_nlink(struct vop_nlink_args *ap)
1038 {
1039 	struct vnode *dvp = ap->a_dvp;
1040 	struct vnode *vp = ap->a_vp;
1041 	struct namecache *ncp = ap->a_nch->ncp;
1042 	struct tmpfs_dirent *de;
1043 	struct tmpfs_node *node;
1044 	struct tmpfs_node *dnode;
1045 	int error;
1046 
1047 	KKASSERT(dvp != vp); /* XXX When can this be false? */
1048 
1049 	node = VP_TO_TMPFS_NODE(vp);
1050 	dnode = VP_TO_TMPFS_NODE(dvp);
1051 	TMPFS_NODE_LOCK(dnode);
1052 
1053 	/* XXX: Why aren't the following two tests done by the caller? */
1054 
1055 	/* Hard links of directories are forbidden. */
1056 	if (vp->v_type == VDIR) {
1057 		error = EPERM;
1058 		goto out;
1059 	}
1060 
1061 	/* Cannot create cross-device links. */
1062 	if (dvp->v_mount != vp->v_mount) {
1063 		error = EXDEV;
1064 		goto out;
1065 	}
1066 
1067 	/* Ensure that we do not overflow the maximum number of links imposed
1068 	 * by the system. */
1069 	KKASSERT(node->tn_links <= LINK_MAX);
1070 	if (node->tn_links >= LINK_MAX) {
1071 		error = EMLINK;
1072 		goto out;
1073 	}
1074 
1075 	/* We cannot create links of files marked immutable or append-only. */
1076 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
1077 		error = EPERM;
1078 		goto out;
1079 	}
1080 
1081 	/* Allocate a new directory entry to represent the node. */
1082 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
1083 				   ncp->nc_name, ncp->nc_nlen, &de);
1084 	if (error != 0)
1085 		goto out;
1086 
1087 	/* Insert the new directory entry into the appropriate directory. */
1088 	tmpfs_dir_attach(dnode, de);
1089 
1090 	/* vp link count has changed, so update node times. */
1091 
1092 	TMPFS_NODE_LOCK(node);
1093 	node->tn_status |= TMPFS_NODE_CHANGED;
1094 	TMPFS_NODE_UNLOCK(node);
1095 	tmpfs_update(vp);
1096 
1097 	tmpfs_knote(vp, NOTE_LINK);
1098 	cache_setunresolved(ap->a_nch);
1099 	cache_setvp(ap->a_nch, vp);
1100 	error = 0;
1101 
1102 out:
1103 	TMPFS_NODE_UNLOCK(dnode);
1104 	if (error == 0)
1105 		tmpfs_knote(dvp, NOTE_WRITE);
1106 	return error;
1107 }
1108 
1109 /* --------------------------------------------------------------------- */
1110 
1111 static int
1112 tmpfs_nrename(struct vop_nrename_args *ap)
1113 {
1114 	struct vnode *fdvp = ap->a_fdvp;
1115 	struct namecache *fncp = ap->a_fnch->ncp;
1116 	struct vnode *fvp = fncp->nc_vp;
1117 	struct vnode *tdvp = ap->a_tdvp;
1118 	struct namecache *tncp = ap->a_tnch->ncp;
1119 	struct vnode *tvp;
1120 	struct tmpfs_dirent *de, *tde;
1121 	struct tmpfs_mount *tmp;
1122 	struct tmpfs_node *fdnode;
1123 	struct tmpfs_node *fnode;
1124 	struct tmpfs_node *tnode;
1125 	struct tmpfs_node *tdnode;
1126 	char *newname;
1127 	char *oldname;
1128 	int error;
1129 
1130 	KKASSERT(fdvp->v_mount == fvp->v_mount);
1131 
1132 	/*
1133 	 * Because tvp can get overwritten we have to vget it instead of
1134 	 * just vref or use it, otherwise it's VINACTIVE flag may not get
1135 	 * cleared and the node won't get destroyed.
1136 	 */
1137 	error = cache_vget(ap->a_tnch, ap->a_cred, LK_SHARED, &tvp);
1138 	if (error == 0) {
1139 		tnode = VP_TO_TMPFS_NODE(tvp);
1140 		vn_unlock(tvp);
1141 	} else {
1142 		tnode = NULL;
1143 	}
1144 
1145 	/* Disallow cross-device renames.
1146 	 * XXX Why isn't this done by the caller? */
1147 	if (fvp->v_mount != tdvp->v_mount ||
1148 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
1149 		error = EXDEV;
1150 		goto out;
1151 	}
1152 
1153 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
1154 	tdnode = VP_TO_TMPFS_DIR(tdvp);
1155 
1156 	/* If source and target are the same file, there is nothing to do. */
1157 	if (fvp == tvp) {
1158 		error = 0;
1159 		goto out;
1160 	}
1161 
1162 	fdnode = VP_TO_TMPFS_DIR(fdvp);
1163 	fnode = VP_TO_TMPFS_NODE(fvp);
1164 	TMPFS_NODE_LOCK(fdnode);
1165 	de = tmpfs_dir_lookup(fdnode, fnode, fncp);
1166 	TMPFS_NODE_UNLOCK(fdnode);	/* XXX depend on namecache lock */
1167 
1168 	/* Avoid manipulating '.' and '..' entries. */
1169 	if (de == NULL) {
1170 		error = ENOENT;
1171 		goto out_locked;
1172 	}
1173 	KKASSERT(de->td_node == fnode);
1174 
1175 	/*
1176 	 * If replacing an entry in the target directory and that entry
1177 	 * is a directory, it must be empty.
1178 	 *
1179 	 * Kern_rename gurantees the destination to be a directory
1180 	 * if the source is one (it does?).
1181 	 */
1182 	if (tvp != NULL) {
1183 		KKASSERT(tnode != NULL);
1184 
1185 		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1186 		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
1187 			error = EPERM;
1188 			goto out_locked;
1189 		}
1190 
1191 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
1192 			if (tnode->tn_size > 0) {
1193 				error = ENOTEMPTY;
1194 				goto out_locked;
1195 			}
1196 		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
1197 			error = ENOTDIR;
1198 			goto out_locked;
1199 		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
1200 			error = EISDIR;
1201 			goto out_locked;
1202 		} else {
1203 			KKASSERT(fnode->tn_type != VDIR &&
1204 				tnode->tn_type != VDIR);
1205 		}
1206 	}
1207 
1208 	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1209 	    (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1210 		error = EPERM;
1211 		goto out_locked;
1212 	}
1213 
1214 	/*
1215 	 * Ensure that we have enough memory to hold the new name, if it
1216 	 * has to be changed.
1217 	 */
1218 	if (fncp->nc_nlen != tncp->nc_nlen ||
1219 	    bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) {
1220 		newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone,
1221 				  M_WAITOK | M_NULLOK);
1222 		if (newname == NULL) {
1223 			error = ENOSPC;
1224 			goto out_locked;
1225 		}
1226 		bcopy(tncp->nc_name, newname, tncp->nc_nlen);
1227 		newname[tncp->nc_nlen] = '\0';
1228 	} else {
1229 		newname = NULL;
1230 	}
1231 
1232 	/*
1233 	 * Unlink entry from source directory.  Note that the kernel has
1234 	 * already checked for illegal recursion cases (renaming a directory
1235 	 * into a subdirectory of itself).
1236 	 */
1237 	if (fdnode != tdnode) {
1238 		tmpfs_dir_detach(fdnode, de);
1239 	} else {
1240 		/* XXX depend on namecache lock */
1241 		TMPFS_NODE_LOCK(fdnode);
1242 		KKASSERT(de == tmpfs_dir_lookup(fdnode, fnode, fncp));
1243 		RB_REMOVE(tmpfs_dirtree, &fdnode->tn_dir.tn_dirtree, de);
1244 		RB_REMOVE(tmpfs_dirtree_cookie,
1245 			  &fdnode->tn_dir.tn_cookietree, de);
1246 		TMPFS_NODE_UNLOCK(fdnode);
1247 	}
1248 
1249 	/*
1250 	 * Handle any name change.  Swap with newname, we will
1251 	 * deallocate it at the end.
1252 	 */
1253 	if (newname != NULL) {
1254 #if 0
1255 		TMPFS_NODE_LOCK(fnode);
1256 		fnode->tn_status |= TMPFS_NODE_CHANGED;
1257 		TMPFS_NODE_UNLOCK(fnode);
1258 #endif
1259 		oldname = de->td_name;
1260 		de->td_name = newname;
1261 		de->td_namelen = (uint16_t)tncp->nc_nlen;
1262 		newname = oldname;
1263 	}
1264 
1265 	/*
1266 	 * If we are overwriting an entry, we have to remove the old one
1267 	 * from the target directory.
1268 	 */
1269 	if (tvp != NULL) {
1270 		/* Remove the old entry from the target directory. */
1271 		TMPFS_NODE_LOCK(tdnode);
1272 		tde = tmpfs_dir_lookup(tdnode, tnode, tncp);
1273 		tmpfs_dir_detach(tdnode, tde);
1274 		TMPFS_NODE_UNLOCK(tdnode);
1275 		tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE);
1276 
1277 		/*
1278 		 * Free the directory entry we just deleted.  Note that the
1279 		 * node referred by it will not be removed until the vnode is
1280 		 * really reclaimed.
1281 		 */
1282 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1283 		/*cache_inval_vp(tvp, CINV_DESTROY);*/
1284 	}
1285 
1286 	/*
1287 	 * Link entry to target directory.  If the entry
1288 	 * represents a directory move the parent linkage
1289 	 * as well.
1290 	 */
1291 	if (fdnode != tdnode) {
1292 		if (de->td_node->tn_type == VDIR) {
1293 			TMPFS_VALIDATE_DIR(fnode);
1294 		}
1295 		tmpfs_dir_attach(tdnode, de);
1296 	} else {
1297 		TMPFS_NODE_LOCK(tdnode);
1298 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1299 		RB_INSERT(tmpfs_dirtree, &tdnode->tn_dir.tn_dirtree, de);
1300 		RB_INSERT(tmpfs_dirtree_cookie,
1301 			  &tdnode->tn_dir.tn_cookietree, de);
1302 		TMPFS_NODE_UNLOCK(tdnode);
1303 	}
1304 
1305 	/*
1306 	 * Finish up
1307 	 */
1308 	if (newname) {
1309 		kfree(newname, tmp->tm_name_zone);
1310 		newname = NULL;
1311 	}
1312 	cache_rename(ap->a_fnch, ap->a_tnch);
1313 	tmpfs_knote(ap->a_fdvp, NOTE_WRITE);
1314 	tmpfs_knote(ap->a_tdvp, NOTE_WRITE);
1315 	if (fnode->tn_vnode)
1316 		tmpfs_knote(fnode->tn_vnode, NOTE_RENAME);
1317 	error = 0;
1318 
1319 out_locked:
1320 	;
1321 out:
1322 	if (tvp)
1323 		vrele(tvp);
1324 	return error;
1325 }
1326 
1327 /* --------------------------------------------------------------------- */
1328 
1329 static int
1330 tmpfs_nmkdir(struct vop_nmkdir_args *ap)
1331 {
1332 	struct vnode *dvp = ap->a_dvp;
1333 	struct vnode **vpp = ap->a_vpp;
1334 	struct namecache *ncp = ap->a_nch->ncp;
1335 	struct vattr *vap = ap->a_vap;
1336 	struct ucred *cred = ap->a_cred;
1337 	int error;
1338 
1339 	KKASSERT(vap->va_type == VDIR);
1340 
1341 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
1342 	if (error == 0) {
1343 		cache_setunresolved(ap->a_nch);
1344 		cache_setvp(ap->a_nch, *vpp);
1345 		tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1346 	}
1347 	return error;
1348 }
1349 
1350 /* --------------------------------------------------------------------- */
1351 
1352 static int
1353 tmpfs_nrmdir(struct vop_nrmdir_args *ap)
1354 {
1355 	struct vnode *dvp = ap->a_dvp;
1356 	struct namecache *ncp = ap->a_nch->ncp;
1357 	struct vnode *vp;
1358 	struct tmpfs_dirent *de;
1359 	struct tmpfs_mount *tmp;
1360 	struct tmpfs_node *dnode;
1361 	struct tmpfs_node *node;
1362 	int error;
1363 
1364 	/*
1365 	 * We have to acquire the vp from ap->a_nch because we will likely
1366 	 * unresolve the namecache entry, and a vrele/vput is needed to
1367 	 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
1368 	 *
1369 	 * We have to use vget to clear any inactive state on the vnode,
1370 	 * otherwise the vnode may remain inactive and thus tmpfs_inactive
1371 	 * will not get called when we release it.
1372 	 */
1373 	error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
1374 	KKASSERT(error == 0);
1375 	vn_unlock(vp);
1376 
1377 	/*
1378 	 * Prevalidate so we don't hit an assertion later
1379 	 */
1380 	if (vp->v_type != VDIR) {
1381 		error = ENOTDIR;
1382 		goto out;
1383 	}
1384 
1385 	tmp = VFS_TO_TMPFS(dvp->v_mount);
1386 	dnode = VP_TO_TMPFS_DIR(dvp);
1387 	node = VP_TO_TMPFS_DIR(vp);
1388 
1389 	/*
1390 	 * Directories with more than two entries ('.' and '..') cannot
1391 	 * be removed.
1392 	 */
1393 	if (node->tn_size > 0) {
1394 		error = ENOTEMPTY;
1395 		goto out;
1396 	}
1397 
1398 	if ((dnode->tn_flags & APPEND)
1399 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1400 		error = EPERM;
1401 		goto out;
1402 	}
1403 
1404 	/*
1405 	 * This invariant holds only if we are not trying to
1406 	 * remove "..".  We checked for that above so this is safe now.
1407 	 */
1408 	KKASSERT(node->tn_dir.tn_parent == dnode);
1409 
1410 	/*
1411 	 * Get the directory entry associated with node (vp).  This
1412 	 * was filled by tmpfs_lookup while looking up the entry.
1413 	 */
1414 	TMPFS_NODE_LOCK(dnode);
1415 	de = tmpfs_dir_lookup(dnode, node, ncp);
1416 	KKASSERT(TMPFS_DIRENT_MATCHES(de, ncp->nc_name, ncp->nc_nlen));
1417 
1418 	/* Check flags to see if we are allowed to remove the directory. */
1419 	if ((dnode->tn_flags & APPEND) ||
1420 	    node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
1421 		error = EPERM;
1422 		TMPFS_NODE_UNLOCK(dnode);
1423 		goto out;
1424 	}
1425 
1426 	/* Detach the directory entry from the directory (dnode). */
1427 	tmpfs_dir_detach(dnode, de);
1428 	TMPFS_NODE_UNLOCK(dnode);
1429 
1430 	/* No vnode should be allocated for this entry from this point */
1431 	TMPFS_NODE_LOCK(dnode);
1432 	TMPFS_ASSERT_ELOCKED(dnode);
1433 	TMPFS_NODE_LOCK(node);
1434 	TMPFS_ASSERT_ELOCKED(node);
1435 
1436 	/*
1437 	 * Must set parent linkage to NULL (tested by ncreate to disallow
1438 	 * the creation of new files/dirs in a deleted directory)
1439 	 */
1440 	node->tn_status |= TMPFS_NODE_CHANGED;
1441 
1442 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
1443 			    TMPFS_NODE_MODIFIED;
1444 
1445 	TMPFS_NODE_UNLOCK(node);
1446 	TMPFS_NODE_UNLOCK(dnode);
1447 
1448 	/* Free the directory entry we just deleted.  Note that the node
1449 	 * referred by it will not be removed until the vnode is really
1450 	 * reclaimed. */
1451 	tmpfs_free_dirent(tmp, de);
1452 
1453 	/* Release the deleted vnode (will destroy the node, notify
1454 	 * interested parties and clean it from the cache). */
1455 
1456 	TMPFS_NODE_LOCK(dnode);
1457 	dnode->tn_status |= TMPFS_NODE_CHANGED;
1458 	TMPFS_NODE_UNLOCK(dnode);
1459 	tmpfs_update(dvp);
1460 
1461 	cache_unlink(ap->a_nch);
1462 	tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1463 	error = 0;
1464 
1465 out:
1466 	vrele(vp);
1467 
1468 	return error;
1469 }
1470 
1471 /* --------------------------------------------------------------------- */
1472 
1473 static int
1474 tmpfs_nsymlink(struct vop_nsymlink_args *ap)
1475 {
1476 	struct vnode *dvp = ap->a_dvp;
1477 	struct vnode **vpp = ap->a_vpp;
1478 	struct namecache *ncp = ap->a_nch->ncp;
1479 	struct vattr *vap = ap->a_vap;
1480 	struct ucred *cred = ap->a_cred;
1481 	char *target = ap->a_target;
1482 	int error;
1483 
1484 	vap->va_type = VLNK;
1485 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, target);
1486 	if (error == 0) {
1487 		tmpfs_knote(*vpp, NOTE_WRITE);
1488 		cache_setunresolved(ap->a_nch);
1489 		cache_setvp(ap->a_nch, *vpp);
1490 	}
1491 	return error;
1492 }
1493 
1494 /* --------------------------------------------------------------------- */
1495 
1496 static int
1497 tmpfs_readdir(struct vop_readdir_args *ap)
1498 {
1499 	struct vnode *vp = ap->a_vp;
1500 	struct uio *uio = ap->a_uio;
1501 	int *eofflag = ap->a_eofflag;
1502 	off_t **cookies = ap->a_cookies;
1503 	int *ncookies = ap->a_ncookies;
1504 	struct tmpfs_mount *tmp;
1505 	int error;
1506 	off_t startoff;
1507 	off_t cnt = 0;
1508 	struct tmpfs_node *node;
1509 
1510 	/* This operation only makes sense on directory nodes. */
1511 	if (vp->v_type != VDIR) {
1512 		return ENOTDIR;
1513 	}
1514 
1515 	tmp = VFS_TO_TMPFS(vp->v_mount);
1516 	node = VP_TO_TMPFS_DIR(vp);
1517 	startoff = uio->uio_offset;
1518 
1519 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
1520 		error = tmpfs_dir_getdotdent(node, uio);
1521 		if (error != 0) {
1522 			TMPFS_NODE_LOCK_SH(node);
1523 			goto outok;
1524 		}
1525 		cnt++;
1526 	}
1527 
1528 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
1529 		/* may lock parent, cannot hold node lock */
1530 		error = tmpfs_dir_getdotdotdent(tmp, node, uio);
1531 		if (error != 0) {
1532 			TMPFS_NODE_LOCK_SH(node);
1533 			goto outok;
1534 		}
1535 		cnt++;
1536 	}
1537 
1538 	TMPFS_NODE_LOCK_SH(node);
1539 	error = tmpfs_dir_getdents(node, uio, &cnt);
1540 
1541 outok:
1542 	KKASSERT(error >= -1);
1543 
1544 	if (error == -1)
1545 		error = 0;
1546 
1547 	if (eofflag != NULL)
1548 		*eofflag =
1549 		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1550 
1551 	/* Update NFS-related variables. */
1552 	if (error == 0 && cookies != NULL && ncookies != NULL) {
1553 		off_t i;
1554 		off_t off = startoff;
1555 		struct tmpfs_dirent *de = NULL;
1556 
1557 		*ncookies = cnt;
1558 		*cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
1559 
1560 		for (i = 0; i < cnt; i++) {
1561 			KKASSERT(off != TMPFS_DIRCOOKIE_EOF);
1562 			if (off == TMPFS_DIRCOOKIE_DOT) {
1563 				off = TMPFS_DIRCOOKIE_DOTDOT;
1564 			} else {
1565 				if (off == TMPFS_DIRCOOKIE_DOTDOT) {
1566 					de = RB_MIN(tmpfs_dirtree_cookie,
1567 						&node->tn_dir.tn_cookietree);
1568 				} else if (de != NULL) {
1569 					de = RB_NEXT(tmpfs_dirtree_cookie,
1570 					       &node->tn_dir.tn_cookietree, de);
1571 				} else {
1572 					de = tmpfs_dir_lookupbycookie(node,
1573 								      off);
1574 					KKASSERT(de != NULL);
1575 					de = RB_NEXT(tmpfs_dirtree_cookie,
1576 					       &node->tn_dir.tn_cookietree, de);
1577 				}
1578 				if (de == NULL)
1579 					off = TMPFS_DIRCOOKIE_EOF;
1580 				else
1581 					off = tmpfs_dircookie(de);
1582 			}
1583 			(*cookies)[i] = off;
1584 		}
1585 		KKASSERT(uio->uio_offset == off);
1586 	}
1587 	TMPFS_NODE_UNLOCK(node);
1588 
1589 	if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
1590 		TMPFS_NODE_LOCK(node);
1591 		node->tn_status |= TMPFS_NODE_ACCESSED;
1592 		TMPFS_NODE_UNLOCK(node);
1593 	}
1594 	return error;
1595 }
1596 
1597 /* --------------------------------------------------------------------- */
1598 
1599 static int
1600 tmpfs_readlink(struct vop_readlink_args *ap)
1601 {
1602 	struct vnode *vp = ap->a_vp;
1603 	struct uio *uio = ap->a_uio;
1604 	int error;
1605 	struct tmpfs_node *node;
1606 
1607 	KKASSERT(uio->uio_offset == 0);
1608 	KKASSERT(vp->v_type == VLNK);
1609 
1610 	node = VP_TO_TMPFS_NODE(vp);
1611 	TMPFS_NODE_LOCK_SH(node);
1612 	error = uiomove(node->tn_link,
1613 			MIN(node->tn_size, uio->uio_resid), uio);
1614 	TMPFS_NODE_UNLOCK(node);
1615 	if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
1616 		TMPFS_NODE_LOCK(node);
1617 		node->tn_status |= TMPFS_NODE_ACCESSED;
1618 		TMPFS_NODE_UNLOCK(node);
1619 	}
1620 	return error;
1621 }
1622 
1623 /* --------------------------------------------------------------------- */
1624 
1625 static int
1626 tmpfs_inactive(struct vop_inactive_args *ap)
1627 {
1628 	struct vnode *vp = ap->a_vp;
1629 	struct tmpfs_node *node;
1630 	struct mount *mp;
1631 
1632 	mp = vp->v_mount;
1633 	lwkt_gettoken(&mp->mnt_token);
1634 	node = VP_TO_TMPFS_NODE(vp);
1635 
1636 	/*
1637 	 * Degenerate case
1638 	 */
1639 	if (node == NULL) {
1640 		vrecycle(vp);
1641 		lwkt_reltoken(&mp->mnt_token);
1642 		return(0);
1643 	}
1644 
1645 	/*
1646 	 * Get rid of unreferenced deleted vnodes sooner rather than
1647 	 * later so the data memory can be recovered immediately.
1648 	 *
1649 	 * We must truncate the vnode to prevent the normal reclamation
1650 	 * path from flushing the data for the removed file to disk.
1651 	 */
1652 	TMPFS_NODE_LOCK(node);
1653 	if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1654 	    node->tn_links == 0)
1655 	{
1656 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1657 		TMPFS_NODE_UNLOCK(node);
1658 		if (node->tn_type == VREG)
1659 			tmpfs_truncate(vp, 0);
1660 		vrecycle(vp);
1661 	} else {
1662 		/*
1663 		 * We must retain any VM pages belonging to the vnode's
1664 		 * object as the vnode will destroy the object during a
1665 		 * later reclaim.  We call vinvalbuf(V_SAVE) to clean
1666 		 * out the buffer cache.
1667 		 *
1668 		 * On DragonFlyBSD, vnodes are not immediately deactivated
1669 		 * on the 1->0 refs, so this is a relatively optimal
1670 		 * operation.  We have to do this in tmpfs_inactive()
1671 		 * because the pages will have already been thrown away
1672 		 * at the time tmpfs_reclaim() is called.
1673 		 */
1674 		if (node->tn_type == VREG &&
1675 		    node->tn_reg.tn_pages_in_aobj == 0) {
1676 			vinvalbuf(vp, V_SAVE, 0, 0);
1677 			KKASSERT(RB_EMPTY(&vp->v_rbdirty_tree));
1678 			KKASSERT(RB_EMPTY(&vp->v_rbclean_tree));
1679 			tmpfs_move_pages(vp->v_object, node->tn_reg.tn_aobj);
1680 			node->tn_reg.tn_pages_in_aobj = 1;
1681 		}
1682 
1683 		TMPFS_NODE_UNLOCK(node);
1684 	}
1685 	lwkt_reltoken(&mp->mnt_token);
1686 
1687 	return 0;
1688 }
1689 
1690 /* --------------------------------------------------------------------- */
1691 
1692 int
1693 tmpfs_reclaim(struct vop_reclaim_args *ap)
1694 {
1695 	struct vnode *vp = ap->a_vp;
1696 	struct tmpfs_mount *tmp;
1697 	struct tmpfs_node *node;
1698 	struct mount *mp;
1699 
1700 	mp = vp->v_mount;
1701 	lwkt_gettoken(&mp->mnt_token);
1702 
1703 	node = VP_TO_TMPFS_NODE(vp);
1704 	tmp = VFS_TO_TMPFS(vp->v_mount);
1705 	KKASSERT(mp == tmp->tm_mount);
1706 
1707         TMPFS_NODE_LOCK(node);
1708 	KKASSERT(node->tn_vnode == vp);
1709         node->tn_vnode = NULL;
1710         vp->v_data = NULL;
1711 
1712 	/*
1713 	 * If the node referenced by this vnode was deleted by the
1714 	 * user, we must free its associated data structures now that
1715 	 * the vnode is being reclaimed.
1716 	 *
1717 	 * Directories have an extra link ref.
1718 	 */
1719 	if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1720 	    node->tn_links == 0) {
1721 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1722 		tmpfs_free_node(tmp, node);
1723 		/* eats the lock */
1724 	} else {
1725 		TMPFS_NODE_UNLOCK(node);
1726 	}
1727 	lwkt_reltoken(&mp->mnt_token);
1728 
1729 	KKASSERT(vp->v_data == NULL);
1730 	return 0;
1731 }
1732 
1733 /* --------------------------------------------------------------------- */
1734 
1735 static int
1736 tmpfs_mountctl(struct vop_mountctl_args *ap)
1737 {
1738 	struct tmpfs_mount *tmp;
1739 	struct mount *mp;
1740 	int rc;
1741 
1742 	mp = ap->a_head.a_ops->head.vv_mount;
1743 	lwkt_gettoken(&mp->mnt_token);
1744 
1745 	switch (ap->a_op) {
1746 	case (MOUNTCTL_SET_EXPORT):
1747 		tmp = (struct tmpfs_mount *) mp->mnt_data;
1748 
1749 		if (ap->a_ctllen != sizeof(struct export_args))
1750 			rc = (EINVAL);
1751 		else
1752 			rc = vfs_export(mp, &tmp->tm_export,
1753 					(const struct export_args *) ap->a_ctl);
1754 		break;
1755 	default:
1756 		rc = vop_stdmountctl(ap);
1757 		break;
1758 	}
1759 
1760 	lwkt_reltoken(&mp->mnt_token);
1761 	return (rc);
1762 }
1763 
1764 /* --------------------------------------------------------------------- */
1765 
1766 static int
1767 tmpfs_print(struct vop_print_args *ap)
1768 {
1769 	struct vnode *vp = ap->a_vp;
1770 
1771 	struct tmpfs_node *node;
1772 
1773 	node = VP_TO_TMPFS_NODE(vp);
1774 
1775 	kprintf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
1776 	    node, node->tn_flags, node->tn_links);
1777 	kprintf("\tmode 0%o, owner %d, group %d, size %ju, status 0x%x\n",
1778 	    node->tn_mode, node->tn_uid, node->tn_gid,
1779 	    (uintmax_t)node->tn_size, node->tn_status);
1780 
1781 	if (vp->v_type == VFIFO)
1782 		fifo_printinfo(vp);
1783 
1784 	kprintf("\n");
1785 
1786 	return 0;
1787 }
1788 
1789 /* --------------------------------------------------------------------- */
1790 
1791 static int
1792 tmpfs_pathconf(struct vop_pathconf_args *ap)
1793 {
1794 	struct vnode *vp = ap->a_vp;
1795 	int name = ap->a_name;
1796 	register_t *retval = ap->a_retval;
1797 	struct tmpfs_mount *tmp;
1798 	int error;
1799 
1800 	error = 0;
1801 
1802 	switch (name) {
1803 	case _PC_CHOWN_RESTRICTED:
1804 		*retval = 1;
1805 		break;
1806 
1807 	case _PC_FILESIZEBITS:
1808 		tmp = VFS_TO_TMPFS(vp->v_mount);
1809 		*retval = max(32, flsll(tmp->tm_pages_max * PAGE_SIZE) + 1);
1810 		break;
1811 
1812 	case _PC_LINK_MAX:
1813 		*retval = LINK_MAX;
1814 		break;
1815 
1816 	case _PC_NAME_MAX:
1817 		*retval = NAME_MAX;
1818 		break;
1819 
1820 	case _PC_NO_TRUNC:
1821 		*retval = 1;
1822 		break;
1823 
1824 	case _PC_PATH_MAX:
1825 		*retval = PATH_MAX;
1826 		break;
1827 
1828 	case _PC_PIPE_BUF:
1829 		*retval = PIPE_BUF;
1830 		break;
1831 
1832 	case _PC_SYNC_IO:
1833 		*retval = 1;
1834 		break;
1835 
1836 	case _PC_2_SYMLINKS:
1837 		*retval = 1;
1838 		break;
1839 
1840 	default:
1841 		error = EINVAL;
1842 	}
1843 
1844 	return error;
1845 }
1846 
1847 /************************************************************************
1848  *                          KQFILTER OPS                                *
1849  ************************************************************************/
1850 
1851 static void filt_tmpfsdetach(struct knote *kn);
1852 static int filt_tmpfsread(struct knote *kn, long hint);
1853 static int filt_tmpfswrite(struct knote *kn, long hint);
1854 static int filt_tmpfsvnode(struct knote *kn, long hint);
1855 
1856 static struct filterops tmpfsread_filtops =
1857 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
1858 	  NULL, filt_tmpfsdetach, filt_tmpfsread };
1859 static struct filterops tmpfswrite_filtops =
1860 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
1861 	  NULL, filt_tmpfsdetach, filt_tmpfswrite };
1862 static struct filterops tmpfsvnode_filtops =
1863 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
1864 	  NULL, filt_tmpfsdetach, filt_tmpfsvnode };
1865 
1866 static int
1867 tmpfs_kqfilter (struct vop_kqfilter_args *ap)
1868 {
1869 	struct vnode *vp = ap->a_vp;
1870 	struct knote *kn = ap->a_kn;
1871 
1872 	switch (kn->kn_filter) {
1873 	case EVFILT_READ:
1874 		kn->kn_fop = &tmpfsread_filtops;
1875 		break;
1876 	case EVFILT_WRITE:
1877 		kn->kn_fop = &tmpfswrite_filtops;
1878 		break;
1879 	case EVFILT_VNODE:
1880 		kn->kn_fop = &tmpfsvnode_filtops;
1881 		break;
1882 	default:
1883 		return (EOPNOTSUPP);
1884 	}
1885 
1886 	kn->kn_hook = (caddr_t)vp;
1887 
1888 	knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1889 
1890 	return(0);
1891 }
1892 
1893 static void
1894 filt_tmpfsdetach(struct knote *kn)
1895 {
1896 	struct vnode *vp = (void *)kn->kn_hook;
1897 
1898 	knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1899 }
1900 
1901 static int
1902 filt_tmpfsread(struct knote *kn, long hint)
1903 {
1904 	struct vnode *vp = (void *)kn->kn_hook;
1905 	struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
1906 	off_t off;
1907 
1908 	if (hint == NOTE_REVOKE) {
1909 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1910 		return(1);
1911 	}
1912 
1913 	/*
1914 	 * Interlock against MP races when performing this function.
1915 	 */
1916 	TMPFS_NODE_LOCK_SH(node);
1917 	off = node->tn_size - kn->kn_fp->f_offset;
1918 	kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1919 	if (kn->kn_sfflags & NOTE_OLDAPI) {
1920 		TMPFS_NODE_UNLOCK(node);
1921 		return(1);
1922 	}
1923 	if (kn->kn_data == 0) {
1924 		kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1925 	}
1926 	TMPFS_NODE_UNLOCK(node);
1927 	return (kn->kn_data != 0);
1928 }
1929 
1930 static int
1931 filt_tmpfswrite(struct knote *kn, long hint)
1932 {
1933 	if (hint == NOTE_REVOKE)
1934 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1935 	kn->kn_data = 0;
1936 	return (1);
1937 }
1938 
1939 static int
1940 filt_tmpfsvnode(struct knote *kn, long hint)
1941 {
1942 	if (kn->kn_sfflags & hint)
1943 		kn->kn_fflags |= hint;
1944 	if (hint == NOTE_REVOKE) {
1945 		kn->kn_flags |= (EV_EOF | EV_NODATA);
1946 		return (1);
1947 	}
1948 	return (kn->kn_fflags != 0);
1949 }
1950 
1951 /*
1952  * Helper to move VM pages between objects
1953  *
1954  * NOTE: The vm_page_rename() dirties the page, so we can clear the
1955  *	 PG_NEED_COMMIT flag.  If the pages are being moved into tn_aobj,
1956  *	 the pageout daemon will be able to page them out.
1957  */
1958 static int
1959 tmpfs_move_pages_callback(vm_page_t p, void *data)
1960 {
1961 	struct rb_vm_page_scan_info *info = data;
1962 	vm_pindex_t pindex;
1963 
1964 	pindex = p->pindex;
1965 	if (vm_page_busy_try(p, TRUE)) {
1966 		vm_page_sleep_busy(p, TRUE, "tpgmov");
1967 		info->error = -1;
1968 		return -1;
1969 	}
1970 	if (p->object != info->object || p->pindex != pindex) {
1971 		vm_page_wakeup(p);
1972 		info->error = -1;
1973 		return -1;
1974 	}
1975 	vm_page_rename(p, info->dest_object, pindex);
1976 	vm_page_clear_commit(p);
1977 	vm_page_wakeup(p);
1978 	/* page automaticaly made dirty */
1979 
1980 	return 0;
1981 }
1982 
1983 static
1984 void
1985 tmpfs_move_pages(vm_object_t src, vm_object_t dst)
1986 {
1987 	struct rb_vm_page_scan_info info;
1988 
1989 	vm_object_hold(src);
1990 	vm_object_hold(dst);
1991 	info.object = src;
1992 	info.dest_object = dst;
1993 	do {
1994 		if (src->paging_in_progress)
1995 			vm_object_pip_wait(src, "objtfs");
1996 		info.error = 1;
1997 		vm_page_rb_tree_RB_SCAN(&src->rb_memq, NULL,
1998 					tmpfs_move_pages_callback, &info);
1999 	} while (info.error < 0 || !RB_EMPTY(&src->rb_memq) ||
2000 		 src->paging_in_progress);
2001 	vm_object_drop(dst);
2002 	vm_object_drop(src);
2003 }
2004 
2005 /* --------------------------------------------------------------------- */
2006 
2007 /*
2008  * vnode operations vector used for files stored in a tmpfs file system.
2009  */
2010 struct vop_ops tmpfs_vnode_vops = {
2011 	.vop_default =			vop_defaultop,
2012 	.vop_getpages = 		vop_stdgetpages,
2013 	.vop_putpages = 		vop_stdputpages,
2014 	.vop_ncreate =			tmpfs_ncreate,
2015 	.vop_nresolve =			tmpfs_nresolve,
2016 	.vop_nlookupdotdot =		tmpfs_nlookupdotdot,
2017 	.vop_nmknod =			tmpfs_nmknod,
2018 	.vop_open =			tmpfs_open,
2019 	.vop_close =			tmpfs_close,
2020 	.vop_access =			tmpfs_access,
2021 	.vop_getattr =			tmpfs_getattr,
2022 	.vop_setattr =			tmpfs_setattr,
2023 	.vop_read =			tmpfs_read,
2024 	.vop_write =			tmpfs_write,
2025 	.vop_fsync =			tmpfs_fsync,
2026 	.vop_mountctl =			tmpfs_mountctl,
2027 	.vop_nremove =			tmpfs_nremove,
2028 	.vop_nlink =			tmpfs_nlink,
2029 	.vop_nrename =			tmpfs_nrename,
2030 	.vop_nmkdir =			tmpfs_nmkdir,
2031 	.vop_nrmdir =			tmpfs_nrmdir,
2032 	.vop_nsymlink =			tmpfs_nsymlink,
2033 	.vop_readdir =			tmpfs_readdir,
2034 	.vop_readlink =			tmpfs_readlink,
2035 	.vop_inactive =			tmpfs_inactive,
2036 	.vop_reclaim =			tmpfs_reclaim,
2037 	.vop_print =			tmpfs_print,
2038 	.vop_pathconf =			tmpfs_pathconf,
2039 	.vop_bmap =			tmpfs_bmap,
2040 	.vop_strategy =			tmpfs_strategy,
2041 	.vop_advlock =			tmpfs_advlock,
2042 	.vop_kqfilter =			tmpfs_kqfilter
2043 };
2044