xref: /dragonfly/sys/vfs/tmpfs/tmpfs_vnops.c (revision def76f9f)
1 /*-
2  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to The NetBSD Foundation
6  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
7  * 2005 program.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $
31  */
32 
33 /*
34  * tmpfs vnode interface.
35  */
36 
37 #include <sys/kernel.h>
38 #include <sys/kern_syscall.h>
39 #include <sys/param.h>
40 #include <sys/fcntl.h>
41 #include <sys/lockf.h>
42 #include <sys/priv.h>
43 #include <sys/proc.h>
44 #include <sys/resourcevar.h>
45 #include <sys/sched.h>
46 #include <sys/stat.h>
47 #include <sys/systm.h>
48 #include <sys/sysctl.h>
49 #include <sys/unistd.h>
50 #include <sys/vfsops.h>
51 #include <sys/vnode.h>
52 #include <sys/mountctl.h>
53 
54 #include <vm/vm.h>
55 #include <vm/vm_extern.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_pageout.h>
59 #include <vm/vm_pager.h>
60 #include <vm/swap_pager.h>
61 
62 #include <sys/buf2.h>
63 #include <vm/vm_page2.h>
64 
65 #include <vfs/fifofs/fifo.h>
66 #include <vfs/tmpfs/tmpfs_vnops.h>
67 #include "tmpfs.h"
68 
69 static void tmpfs_strategy_done(struct bio *bio);
70 static void tmpfs_move_pages(vm_object_t src, vm_object_t dst);
71 
72 static int tmpfs_cluster_enable = 1;
73 SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "TMPFS filesystem");
74 SYSCTL_INT(_vfs_tmpfs, OID_AUTO, cluster_enable, CTLFLAG_RW,
75 		&tmpfs_cluster_enable, 0, "");
76 
77 static __inline
78 void
79 tmpfs_knote(struct vnode *vp, int flags)
80 {
81 	if (flags)
82 		KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
83 }
84 
85 
86 /* --------------------------------------------------------------------- */
87 
88 static int
89 tmpfs_nresolve(struct vop_nresolve_args *ap)
90 {
91 	struct vnode *dvp = ap->a_dvp;
92 	struct vnode *vp = NULL;
93 	struct namecache *ncp = ap->a_nch->ncp;
94 	struct tmpfs_node *tnode;
95 	struct tmpfs_dirent *de;
96 	struct tmpfs_node *dnode;
97 	int error;
98 
99 	dnode = VP_TO_TMPFS_DIR(dvp);
100 
101 	TMPFS_NODE_LOCK_SH(dnode);
102 loop:
103 	de = tmpfs_dir_lookup(dnode, NULL, ncp);
104 	if (de == NULL) {
105 		error = ENOENT;
106 	} else {
107 		/*
108 		 * Allocate a vnode for the node we found.  Use
109 		 * tmpfs_alloc_vp()'s deadlock handling mode.
110 		 */
111 		tnode = de->td_node;
112 		error = tmpfs_alloc_vp(dvp->v_mount, dnode, tnode,
113 				       LK_EXCLUSIVE | LK_RETRY, &vp);
114 		if (error == EAGAIN)
115 			goto loop;
116 		if (error)
117 			goto out;
118 		KKASSERT(vp);
119 	}
120 
121 out:
122 	TMPFS_NODE_UNLOCK(dnode);
123 
124 	if ((dnode->tn_status & TMPFS_NODE_ACCESSED) == 0) {
125 		TMPFS_NODE_LOCK(dnode);
126 		dnode->tn_status |= TMPFS_NODE_ACCESSED;
127 		TMPFS_NODE_UNLOCK(dnode);
128 	}
129 
130 	/*
131 	 * Store the result of this lookup in the cache.  Avoid this if the
132 	 * request was for creation, as it does not improve timings on
133 	 * emprical tests.
134 	 */
135 	if (vp) {
136 		vn_unlock(vp);
137 		cache_setvp(ap->a_nch, vp);
138 		vrele(vp);
139 	} else if (error == ENOENT) {
140 		cache_setvp(ap->a_nch, NULL);
141 	}
142 	return (error);
143 }
144 
145 static int
146 tmpfs_nlookupdotdot(struct vop_nlookupdotdot_args *ap)
147 {
148 	struct vnode *dvp = ap->a_dvp;
149 	struct vnode **vpp = ap->a_vpp;
150 	struct tmpfs_node *dnode = VP_TO_TMPFS_NODE(dvp);
151 	struct ucred *cred = ap->a_cred;
152 	int error;
153 
154 	*vpp = NULL;
155 
156 	/* Check accessibility of requested node as a first step. */
157 	error = VOP_ACCESS(dvp, VEXEC, cred);
158 	if (error != 0)
159 		return error;
160 
161 	if (dnode->tn_dir.tn_parent != NULL) {
162 		/* Allocate a new vnode on the matching entry. */
163 		error = tmpfs_alloc_vp(dvp->v_mount,
164 				       NULL, dnode->tn_dir.tn_parent,
165 				       LK_EXCLUSIVE | LK_RETRY, vpp);
166 
167 		if (*vpp)
168 			vn_unlock(*vpp);
169 	}
170 	return (*vpp == NULL) ? ENOENT : 0;
171 }
172 
173 /* --------------------------------------------------------------------- */
174 
175 static int
176 tmpfs_ncreate(struct vop_ncreate_args *ap)
177 {
178 	struct vnode *dvp = ap->a_dvp;
179 	struct vnode **vpp = ap->a_vpp;
180 	struct namecache *ncp = ap->a_nch->ncp;
181 	struct vattr *vap = ap->a_vap;
182 	struct ucred *cred = ap->a_cred;
183 	int error;
184 
185 	KKASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
186 
187 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
188 	if (error == 0) {
189 		cache_setunresolved(ap->a_nch);
190 		cache_setvp(ap->a_nch, *vpp);
191 		tmpfs_knote(dvp, NOTE_WRITE);
192 	}
193 	return (error);
194 }
195 /* --------------------------------------------------------------------- */
196 
197 static int
198 tmpfs_nmknod(struct vop_nmknod_args *ap)
199 {
200 	struct vnode *dvp = ap->a_dvp;
201 	struct vnode **vpp = ap->a_vpp;
202 	struct namecache *ncp = ap->a_nch->ncp;
203 	struct vattr *vap = ap->a_vap;
204 	struct ucred *cred = ap->a_cred;
205 	int error;
206 
207 	if (vap->va_type != VBLK && vap->va_type != VCHR &&
208 	    vap->va_type != VFIFO) {
209 		return (EINVAL);
210 	}
211 
212 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
213 	if (error == 0) {
214 		cache_setunresolved(ap->a_nch);
215 		cache_setvp(ap->a_nch, *vpp);
216 		tmpfs_knote(dvp, NOTE_WRITE);
217 	}
218 	return error;
219 }
220 
221 /* --------------------------------------------------------------------- */
222 
223 static int
224 tmpfs_open(struct vop_open_args *ap)
225 {
226 	struct vnode *vp = ap->a_vp;
227 	int mode = ap->a_mode;
228 	struct tmpfs_node *node;
229 	int error;
230 
231 	node = VP_TO_TMPFS_NODE(vp);
232 
233 #if 0
234 	/* The file is still active but all its names have been removed
235 	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
236 	 * it is about to die. */
237 	if (node->tn_links < 1)
238 		return (ENOENT);
239 #endif
240 
241 	/* If the file is marked append-only, deny write requests. */
242 	if ((node->tn_flags & APPEND) &&
243 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
244 		error = EPERM;
245 	} else {
246 		if (node->tn_reg.tn_pages_in_aobj) {
247 			TMPFS_NODE_LOCK(node);
248 			if (node->tn_reg.tn_pages_in_aobj) {
249 				tmpfs_move_pages(node->tn_reg.tn_aobj,
250 						 vp->v_object);
251 				node->tn_reg.tn_pages_in_aobj = 0;
252 			}
253 			TMPFS_NODE_UNLOCK(node);
254 		}
255 		error = vop_stdopen(ap);
256 	}
257 
258 	return (error);
259 }
260 
261 /* --------------------------------------------------------------------- */
262 
263 static int
264 tmpfs_close(struct vop_close_args *ap)
265 {
266 	struct vnode *vp = ap->a_vp;
267 	struct tmpfs_node *node;
268 	int error;
269 
270 	node = VP_TO_TMPFS_NODE(vp);
271 
272 	if (node->tn_links > 0) {
273 		/*
274 		 * Update node times.  No need to do it if the node has
275 		 * been deleted, because it will vanish after we return.
276 		 */
277 		tmpfs_update(vp);
278 	}
279 
280 	error = vop_stdclose(ap);
281 
282 	return (error);
283 }
284 
285 /* --------------------------------------------------------------------- */
286 
287 int
288 tmpfs_access(struct vop_access_args *ap)
289 {
290 	struct vnode *vp = ap->a_vp;
291 	int error;
292 	struct tmpfs_node *node;
293 
294 	node = VP_TO_TMPFS_NODE(vp);
295 
296 	switch (vp->v_type) {
297 	case VDIR:
298 		/* FALLTHROUGH */
299 	case VLNK:
300 		/* FALLTHROUGH */
301 	case VREG:
302 		if ((ap->a_mode & VWRITE) &&
303 	            (vp->v_mount->mnt_flag & MNT_RDONLY)) {
304 			error = EROFS;
305 			goto out;
306 		}
307 		break;
308 
309 	case VBLK:
310 		/* FALLTHROUGH */
311 	case VCHR:
312 		/* FALLTHROUGH */
313 	case VSOCK:
314 		/* FALLTHROUGH */
315 	case VFIFO:
316 		break;
317 
318 	default:
319 		error = EINVAL;
320 		goto out;
321 	}
322 
323 	if ((ap->a_mode & VWRITE) && (node->tn_flags & IMMUTABLE)) {
324 		error = EPERM;
325 		goto out;
326 	}
327 
328 	error = vop_helper_access(ap, node->tn_uid, node->tn_gid,
329 			          node->tn_mode, 0);
330 out:
331 	return error;
332 }
333 
334 /* --------------------------------------------------------------------- */
335 
336 int
337 tmpfs_getattr(struct vop_getattr_args *ap)
338 {
339 	struct vnode *vp = ap->a_vp;
340 	struct vattr *vap = ap->a_vap;
341 	struct tmpfs_node *node;
342 
343 	node = VP_TO_TMPFS_NODE(vp);
344 
345 	tmpfs_update(vp);
346 
347 	TMPFS_NODE_LOCK_SH(node);
348 	vap->va_type = vp->v_type;
349 	vap->va_mode = node->tn_mode;
350 	vap->va_nlink = node->tn_links;
351 	vap->va_uid = node->tn_uid;
352 	vap->va_gid = node->tn_gid;
353 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
354 	vap->va_fileid = node->tn_id;
355 	vap->va_size = node->tn_size;
356 	vap->va_blocksize = PAGE_SIZE;
357 	vap->va_atime.tv_sec = node->tn_atime;
358 	vap->va_atime.tv_nsec = node->tn_atimensec;
359 	vap->va_mtime.tv_sec = node->tn_mtime;
360 	vap->va_mtime.tv_nsec = node->tn_mtimensec;
361 	vap->va_ctime.tv_sec = node->tn_ctime;
362 	vap->va_ctime.tv_nsec = node->tn_ctimensec;
363 	vap->va_gen = node->tn_gen;
364 	vap->va_flags = node->tn_flags;
365 	if (vp->v_type == VBLK || vp->v_type == VCHR) {
366 		vap->va_rmajor = umajor(node->tn_rdev);
367 		vap->va_rminor = uminor(node->tn_rdev);
368 	}
369 	vap->va_bytes = round_page(node->tn_size);
370 	vap->va_filerev = 0;
371 	TMPFS_NODE_UNLOCK(node);
372 
373 	return 0;
374 }
375 
376 /* --------------------------------------------------------------------- */
377 
378 int
379 tmpfs_setattr(struct vop_setattr_args *ap)
380 {
381 	struct vnode *vp = ap->a_vp;
382 	struct vattr *vap = ap->a_vap;
383 	struct ucred *cred = ap->a_cred;
384 	struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
385 	int error = 0;
386 	int kflags = 0;
387 
388 	TMPFS_NODE_LOCK(node);
389 	if (error == 0 && (vap->va_flags != VNOVAL)) {
390 		error = tmpfs_chflags(vp, vap->va_flags, cred);
391 		kflags |= NOTE_ATTRIB;
392 	}
393 
394 	if (error == 0 && (vap->va_size != VNOVAL)) {
395 		/* restore any saved pages before proceeding */
396 		if (node->tn_reg.tn_pages_in_aobj) {
397 			tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
398 			node->tn_reg.tn_pages_in_aobj = 0;
399 		}
400 		if (vap->va_size > node->tn_size)
401 			kflags |= NOTE_WRITE | NOTE_EXTEND;
402 		else
403 			kflags |= NOTE_WRITE;
404 		error = tmpfs_chsize(vp, vap->va_size, cred);
405 	}
406 
407 	if (error == 0 && (vap->va_uid != (uid_t)VNOVAL ||
408 			   vap->va_gid != (gid_t)VNOVAL)) {
409 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred);
410 		kflags |= NOTE_ATTRIB;
411 	}
412 
413 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) {
414 		error = tmpfs_chmod(vp, vap->va_mode, cred);
415 		kflags |= NOTE_ATTRIB;
416 	}
417 
418 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
419 	    vap->va_atime.tv_nsec != VNOVAL) ||
420 	    (vap->va_mtime.tv_sec != VNOVAL &&
421 	    vap->va_mtime.tv_nsec != VNOVAL) )) {
422 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
423 				      vap->va_vaflags, cred);
424 		kflags |= NOTE_ATTRIB;
425 	}
426 
427 	/*
428 	 * Update the node times.  We give preference to the error codes
429 	 * generated by this function rather than the ones that may arise
430 	 * from tmpfs_update.
431 	 */
432 	tmpfs_update(vp);
433 	TMPFS_NODE_UNLOCK(node);
434 	tmpfs_knote(vp, kflags);
435 
436 	return (error);
437 }
438 
439 /* --------------------------------------------------------------------- */
440 
441 /*
442  * fsync is usually a NOP, but we must take action when unmounting or
443  * when recycling.
444  */
445 static int
446 tmpfs_fsync(struct vop_fsync_args *ap)
447 {
448 	struct tmpfs_node *node;
449 	struct vnode *vp = ap->a_vp;
450 
451 	node = VP_TO_TMPFS_NODE(vp);
452 
453 	tmpfs_update(vp);
454 	if (vp->v_type == VREG) {
455 		if (vp->v_flag & VRECLAIMED) {
456 			if (node->tn_links == 0)
457 				tmpfs_truncate(vp, 0);
458 			else
459 				vfsync(ap->a_vp, ap->a_waitfor, 1, NULL, NULL);
460 		}
461 	}
462 	return 0;
463 }
464 
465 /* --------------------------------------------------------------------- */
466 
467 static int
468 tmpfs_read(struct vop_read_args *ap)
469 {
470 	struct buf *bp;
471 	struct vnode *vp = ap->a_vp;
472 	struct uio *uio = ap->a_uio;
473 	struct tmpfs_node *node;
474 	off_t base_offset;
475 	size_t offset;
476 	size_t len;
477 	size_t resid;
478 	int error;
479 	int seqcount;
480 
481 	/*
482 	 * Check the basics
483 	 */
484 	if (uio->uio_offset < 0)
485 		return (EINVAL);
486 	if (vp->v_type != VREG)
487 		return (EINVAL);
488 
489 	/*
490 	 * Extract node, try to shortcut the operation through
491 	 * the VM page cache, allowing us to avoid buffer cache
492 	 * overheads.
493 	 */
494 	node = VP_TO_TMPFS_NODE(vp);
495         resid = uio->uio_resid;
496 	seqcount = ap->a_ioflag >> 16;
497         error = vop_helper_read_shortcut(ap);
498         if (error)
499                 return error;
500         if (uio->uio_resid == 0) {
501 		if (resid)
502 			goto finished;
503 		return error;
504 	}
505 
506 	/*
507 	 * restore any saved pages before proceeding
508 	 */
509 	if (node->tn_reg.tn_pages_in_aobj) {
510 		TMPFS_NODE_LOCK(node);
511 		if (node->tn_reg.tn_pages_in_aobj) {
512 			tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
513 			node->tn_reg.tn_pages_in_aobj = 0;
514 		}
515 		TMPFS_NODE_UNLOCK(node);
516 	}
517 
518 	/*
519 	 * Fall-through to our normal read code.
520 	 */
521 	while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
522 		/*
523 		 * Use buffer cache I/O (via tmpfs_strategy)
524 		 */
525 		offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
526 		base_offset = (off_t)uio->uio_offset - offset;
527 		bp = getcacheblk(vp, base_offset, TMPFS_BLKSIZE, GETBLK_KVABIO);
528 		if (bp == NULL) {
529 			if (tmpfs_cluster_enable) {
530 				error = cluster_readx(vp, node->tn_size,
531 						     base_offset,
532 						     TMPFS_BLKSIZE,
533 						     B_NOTMETA | B_KVABIO,
534 						     uio->uio_resid,
535 						     seqcount * MAXBSIZE,
536 						     &bp);
537 			} else {
538 				error = bread_kvabio(vp, base_offset,
539 						     TMPFS_BLKSIZE, &bp);
540 			}
541 			if (error) {
542 				brelse(bp);
543 				kprintf("tmpfs_read bread error %d\n", error);
544 				break;
545 			}
546 
547 			/*
548 			 * tmpfs pretty much fiddles directly with the VM
549 			 * system, don't let it exhaust it or we won't play
550 			 * nice with other processes.
551 			 *
552 			 * Only do this if the VOP is coming from a normal
553 			 * read/write.  The VM system handles the case for
554 			 * UIO_NOCOPY.
555 			 */
556 			if (uio->uio_segflg != UIO_NOCOPY)
557 				vm_wait_nominal();
558 		}
559 		bp->b_flags |= B_CLUSTEROK;
560 		bkvasync(bp);
561 
562 		/*
563 		 * Figure out how many bytes we can actually copy this loop.
564 		 */
565 		len = TMPFS_BLKSIZE - offset;
566 		if (len > uio->uio_resid)
567 			len = uio->uio_resid;
568 		if (len > node->tn_size - uio->uio_offset)
569 			len = (size_t)(node->tn_size - uio->uio_offset);
570 
571 		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
572 		bqrelse(bp);
573 		if (error) {
574 			kprintf("tmpfs_read uiomove error %d\n", error);
575 			break;
576 		}
577 	}
578 
579 finished:
580 	if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
581 		TMPFS_NODE_LOCK(node);
582 		node->tn_status |= TMPFS_NODE_ACCESSED;
583 		TMPFS_NODE_UNLOCK(node);
584 	}
585 	return (error);
586 }
587 
588 static int
589 tmpfs_write(struct vop_write_args *ap)
590 {
591 	struct buf *bp;
592 	struct vnode *vp = ap->a_vp;
593 	struct uio *uio = ap->a_uio;
594 	struct thread *td = uio->uio_td;
595 	struct tmpfs_node *node;
596 	boolean_t extended;
597 	off_t oldsize;
598 	int error;
599 	off_t base_offset;
600 	size_t offset;
601 	size_t len;
602 	struct rlimit limit;
603 	int trivial = 0;
604 	int kflags = 0;
605 	int seqcount;
606 
607 	error = 0;
608 	if (uio->uio_resid == 0) {
609 		return error;
610 	}
611 
612 	node = VP_TO_TMPFS_NODE(vp);
613 
614 	if (vp->v_type != VREG)
615 		return (EINVAL);
616 	seqcount = ap->a_ioflag >> 16;
617 
618 	TMPFS_NODE_LOCK(node);
619 
620 	/*
621 	 * restore any saved pages before proceeding
622 	 */
623 	if (node->tn_reg.tn_pages_in_aobj) {
624 		tmpfs_move_pages(node->tn_reg.tn_aobj, vp->v_object);
625 		node->tn_reg.tn_pages_in_aobj = 0;
626 	}
627 
628 	oldsize = node->tn_size;
629 	if (ap->a_ioflag & IO_APPEND)
630 		uio->uio_offset = node->tn_size;
631 
632 	/*
633 	 * Check for illegal write offsets.
634 	 */
635 	if (uio->uio_offset + uio->uio_resid >
636 	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) {
637 		error = EFBIG;
638 		goto done;
639 	}
640 
641 	/*
642 	 * NOTE: Ignore if UIO does not come from a user thread (e.g. VN).
643 	 */
644 	if (vp->v_type == VREG && td != NULL && td->td_lwp != NULL) {
645 		error = kern_getrlimit(RLIMIT_FSIZE, &limit);
646 		if (error)
647 			goto done;
648 		if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) {
649 			ksignal(td->td_proc, SIGXFSZ);
650 			error = EFBIG;
651 			goto done;
652 		}
653 	}
654 
655 	/*
656 	 * Extend the file's size if necessary
657 	 */
658 	extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size);
659 
660 	while (uio->uio_resid > 0) {
661 		/*
662 		 * Don't completely blow out running buffer I/O
663 		 * when being hit from the pageout daemon.
664 		 */
665 		if (uio->uio_segflg == UIO_NOCOPY &&
666 		    (ap->a_ioflag & IO_RECURSE) == 0) {
667 			bwillwrite(TMPFS_BLKSIZE);
668 		}
669 
670 		/*
671 		 * Use buffer cache I/O (via tmpfs_strategy)
672 		 */
673 		offset = (size_t)uio->uio_offset & TMPFS_BLKMASK64;
674 		base_offset = (off_t)uio->uio_offset - offset;
675 		len = TMPFS_BLKSIZE - offset;
676 		if (len > uio->uio_resid)
677 			len = uio->uio_resid;
678 
679 		if ((uio->uio_offset + len) > node->tn_size) {
680 			trivial = (uio->uio_offset <= node->tn_size);
681 			error = tmpfs_reg_resize(vp, uio->uio_offset + len,
682 						 trivial);
683 			if (error)
684 				break;
685 		}
686 
687 		/*
688 		 * Read to fill in any gaps.  Theoretically we could
689 		 * optimize this if the write covers the entire buffer
690 		 * and is not a UIO_NOCOPY write, however this can lead
691 		 * to a security violation exposing random kernel memory
692 		 * (whatever junk was in the backing VM pages before).
693 		 *
694 		 * So just use bread() to do the right thing.
695 		 */
696 		error = bread_kvabio(vp, base_offset, TMPFS_BLKSIZE, &bp);
697 		bkvasync(bp);
698 		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
699 		if (error) {
700 			kprintf("tmpfs_write uiomove error %d\n", error);
701 			brelse(bp);
702 			break;
703 		}
704 
705 		if (uio->uio_offset > node->tn_size) {
706 			node->tn_size = uio->uio_offset;
707 			kflags |= NOTE_EXTEND;
708 		}
709 		kflags |= NOTE_WRITE;
710 
711 		/*
712 		 * Always try to flush the page in the UIO_NOCOPY case.  This
713 		 * can come from the pageout daemon or during vnode eviction.
714 		 * It is not necessarily going to be marked IO_ASYNC/IO_SYNC.
715 		 *
716 		 * For the normal case we buwrite(), dirtying the underlying
717 		 * VM pages instead of dirtying the buffer and releasing the
718 		 * buffer as a clean buffer.  This allows tmpfs to use
719 		 * essentially all available memory to cache file data.
720 		 * If we used bdwrite() the buffer cache would wind up
721 		 * flushing the data to swap too quickly.
722 		 *
723 		 * But because tmpfs can seriously load the VM system we
724 		 * fall-back to using bdwrite() when free memory starts
725 		 * to get low.  This shifts the load away from the VM system
726 		 * and makes tmpfs act more like a normal filesystem with
727 		 * regards to disk activity.
728 		 *
729 		 * tmpfs pretty much fiddles directly with the VM
730 		 * system, don't let it exhaust it or we won't play
731 		 * nice with other processes.  Only do this if the
732 		 * VOP is coming from a normal read/write.  The VM system
733 		 * handles the case for UIO_NOCOPY.
734 		 */
735 		bp->b_flags |= B_CLUSTEROK;
736 		if (uio->uio_segflg == UIO_NOCOPY) {
737 			/*
738 			 * Flush from the pageout daemon, deal with
739 			 * potentially very heavy tmpfs write activity
740 			 * causing long stalls in the pageout daemon
741 			 * before pages get to free/cache.
742 			 *
743 			 * (a) Under severe pressure setting B_DIRECT will
744 			 *     cause a buffer release to try to free the
745 			 *     underlying pages.
746 			 *
747 			 * (b) Under modest memory pressure the B_RELBUF
748 			 *     alone is sufficient to get the pages moved
749 			 *     to the cache.  We could also force this by
750 			 *     setting B_NOTMETA but that might have other
751 			 *     unintended side-effects (e.g. setting
752 			 *     PG_NOTMETA on the VM page).
753 			 *
754 			 * Hopefully this will unblock the VM system more
755 			 * quickly under extreme tmpfs write load.
756 			 */
757 			if (vm_page_count_min(vm_page_free_hysteresis))
758 				bp->b_flags |= B_DIRECT;
759 			bp->b_flags |= B_AGE | B_RELBUF;
760 			bp->b_act_count = 0;	/* buffer->deactivate pgs */
761 			cluster_awrite(bp);
762 		} else if (vm_page_count_target()) {
763 			/*
764 			 * Normal (userland) write but we are low on memory,
765 			 * run the buffer the buffer cache.
766 			 */
767 			bp->b_act_count = 0;	/* buffer->deactivate pgs */
768 			bdwrite(bp);
769 		} else {
770 			/*
771 			 * Otherwise run the buffer directly through to the
772 			 * backing VM store.
773 			 */
774 			buwrite(bp);
775 			/*vm_wait_nominal();*/
776 		}
777 
778 		if (bp->b_error) {
779 			kprintf("tmpfs_write bwrite error %d\n", bp->b_error);
780 			break;
781 		}
782 	}
783 
784 	if (error) {
785 		if (extended) {
786 			(void)tmpfs_reg_resize(vp, oldsize, trivial);
787 			kflags &= ~NOTE_EXTEND;
788 		}
789 		goto done;
790 	}
791 
792 	/*
793 	 * Currently we don't set the mtime on files modified via mmap()
794 	 * because we can't tell the difference between those modifications
795 	 * and an attempt by the pageout daemon to flush tmpfs pages to
796 	 * swap.
797 	 *
798 	 * This is because in order to defer flushes as long as possible
799 	 * buwrite() works by marking the underlying VM pages dirty in
800 	 * order to be able to dispose of the buffer cache buffer without
801 	 * flushing it.
802 	 */
803 	if (uio->uio_segflg == UIO_NOCOPY) {
804 		if (vp->v_flag & VLASTWRITETS) {
805 			node->tn_mtime = vp->v_lastwrite_ts.tv_sec;
806 			node->tn_mtimensec = vp->v_lastwrite_ts.tv_nsec;
807 		}
808 	} else {
809 		node->tn_status |= TMPFS_NODE_MODIFIED;
810 		vclrflags(vp, VLASTWRITETS);
811 	}
812 
813 	if (extended)
814 		node->tn_status |= TMPFS_NODE_CHANGED;
815 
816 	if (node->tn_mode & (S_ISUID | S_ISGID)) {
817 		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
818 			node->tn_mode &= ~(S_ISUID | S_ISGID);
819 	}
820 done:
821 	TMPFS_NODE_UNLOCK(node);
822 	if (kflags)
823 		tmpfs_knote(vp, kflags);
824 
825 	return(error);
826 }
827 
828 static int
829 tmpfs_advlock(struct vop_advlock_args *ap)
830 {
831 	struct tmpfs_node *node;
832 	struct vnode *vp = ap->a_vp;
833 	int error;
834 
835 	node = VP_TO_TMPFS_NODE(vp);
836 	error = (lf_advlock(ap, &node->tn_advlock, node->tn_size));
837 
838 	return (error);
839 }
840 
841 /*
842  * The strategy function is typically only called when memory pressure
843  * forces the system to attempt to pageout pages.  It can also be called
844  * by [n]vtruncbuf() when a truncation cuts a page in half.  Normal write
845  * operations
846  *
847  * We set VKVABIO for VREG files so bp->b_data may not be synchronized to
848  * our cpu.  swap_pager_strategy() is all we really use, and it directly
849  * supports this.
850  */
851 static int
852 tmpfs_strategy(struct vop_strategy_args *ap)
853 {
854 	struct bio *bio = ap->a_bio;
855 	struct bio *nbio;
856 	struct buf *bp = bio->bio_buf;
857 	struct vnode *vp = ap->a_vp;
858 	struct tmpfs_node *node;
859 	vm_object_t uobj;
860 	vm_page_t m;
861 	int i;
862 
863 	if (vp->v_type != VREG) {
864 		bp->b_resid = bp->b_bcount;
865 		bp->b_flags |= B_ERROR | B_INVAL;
866 		bp->b_error = EINVAL;
867 		biodone(bio);
868 		return(0);
869 	}
870 
871 	node = VP_TO_TMPFS_NODE(vp);
872 
873 	uobj = node->tn_reg.tn_aobj;
874 
875 	/*
876 	 * Don't bother flushing to swap if there is no swap, just
877 	 * ensure that the pages are marked as needing a commit (still).
878 	 */
879 	if (bp->b_cmd == BUF_CMD_WRITE && vm_swap_size == 0) {
880 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
881 			m = bp->b_xio.xio_pages[i];
882 			vm_page_need_commit(m);
883 		}
884 		bp->b_resid = 0;
885 		bp->b_error = 0;
886 		biodone(bio);
887 	} else {
888 		nbio = push_bio(bio);
889 		nbio->bio_done = tmpfs_strategy_done;
890 		nbio->bio_offset = bio->bio_offset;
891 		swap_pager_strategy(uobj, nbio);
892 	}
893 	return 0;
894 }
895 
896 /*
897  * If we were unable to commit the pages to swap make sure they are marked
898  * as needing a commit (again).  If we were, clear the flag to allow the
899  * pages to be freed.
900  *
901  * Do not error-out the buffer.  In particular, vinvalbuf() needs to
902  * always work.
903  */
904 static void
905 tmpfs_strategy_done(struct bio *bio)
906 {
907 	struct buf *bp;
908 	vm_page_t m;
909 	int i;
910 
911 	bp = bio->bio_buf;
912 
913 	if (bp->b_flags & B_ERROR) {
914 		bp->b_flags &= ~B_ERROR;
915 		bp->b_error = 0;
916 		bp->b_resid = 0;
917 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
918 			m = bp->b_xio.xio_pages[i];
919 			vm_page_need_commit(m);
920 		}
921 	} else {
922 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
923 			m = bp->b_xio.xio_pages[i];
924 			vm_page_clear_commit(m);
925 		}
926 	}
927 	bio = pop_bio(bio);
928 	biodone(bio);
929 }
930 
931 static int
932 tmpfs_bmap(struct vop_bmap_args *ap)
933 {
934 	if (ap->a_doffsetp != NULL)
935 		*ap->a_doffsetp = ap->a_loffset;
936 	if (ap->a_runp != NULL)
937 		*ap->a_runp = 0;
938 	if (ap->a_runb != NULL)
939 		*ap->a_runb = 0;
940 
941 	return 0;
942 }
943 
944 /* --------------------------------------------------------------------- */
945 
946 static int
947 tmpfs_nremove(struct vop_nremove_args *ap)
948 {
949 	struct vnode *dvp = ap->a_dvp;
950 	struct namecache *ncp = ap->a_nch->ncp;
951 	struct vnode *vp;
952 	int error;
953 	struct tmpfs_dirent *de;
954 	struct tmpfs_mount *tmp;
955 	struct tmpfs_node *dnode;
956 	struct tmpfs_node *node;
957 
958 	/*
959 	 * We have to acquire the vp from ap->a_nch because we will likely
960 	 * unresolve the namecache entry, and a vrele/vput is needed to
961 	 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
962 	 *
963 	 * We have to use vget to clear any inactive state on the vnode,
964 	 * otherwise the vnode may remain inactive and thus tmpfs_inactive
965 	 * will not get called when we release it.
966 	 */
967 	error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
968 	KKASSERT(vp->v_mount == dvp->v_mount);
969 	KKASSERT(error == 0);
970 	vn_unlock(vp);
971 
972 	if (vp->v_type == VDIR) {
973 		error = EISDIR;
974 		goto out2;
975 	}
976 
977 	dnode = VP_TO_TMPFS_DIR(dvp);
978 	node = VP_TO_TMPFS_NODE(vp);
979 	tmp = VFS_TO_TMPFS(vp->v_mount);
980 
981 	TMPFS_NODE_LOCK(dnode);
982 	de = tmpfs_dir_lookup(dnode, node, ncp);
983 	if (de == NULL) {
984 		error = ENOENT;
985 		TMPFS_NODE_UNLOCK(dnode);
986 		goto out;
987 	}
988 
989 	/* Files marked as immutable or append-only cannot be deleted. */
990 	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
991 	    (dnode->tn_flags & APPEND)) {
992 		error = EPERM;
993 		TMPFS_NODE_UNLOCK(dnode);
994 		goto out;
995 	}
996 
997 	/* Remove the entry from the directory; as it is a file, we do not
998 	 * have to change the number of hard links of the directory. */
999 	tmpfs_dir_detach(dnode, de);
1000 	TMPFS_NODE_UNLOCK(dnode);
1001 
1002 	/* Free the directory entry we just deleted.  Note that the node
1003 	 * referred by it will not be removed until the vnode is really
1004 	 * reclaimed. */
1005 	tmpfs_free_dirent(tmp, de);
1006 
1007 	if (node->tn_links > 0) {
1008 	        TMPFS_NODE_LOCK(node);
1009 		node->tn_status |= TMPFS_NODE_CHANGED;
1010 	        TMPFS_NODE_UNLOCK(node);
1011 	}
1012 
1013 	cache_unlink(ap->a_nch);
1014 	tmpfs_knote(vp, NOTE_DELETE);
1015 	error = 0;
1016 
1017 out:
1018 	if (error == 0)
1019 		tmpfs_knote(dvp, NOTE_WRITE);
1020 out2:
1021 	vrele(vp);
1022 
1023 	return error;
1024 }
1025 
1026 /* --------------------------------------------------------------------- */
1027 
1028 static int
1029 tmpfs_nlink(struct vop_nlink_args *ap)
1030 {
1031 	struct vnode *dvp = ap->a_dvp;
1032 	struct vnode *vp = ap->a_vp;
1033 	struct namecache *ncp = ap->a_nch->ncp;
1034 	struct tmpfs_dirent *de;
1035 	struct tmpfs_node *node;
1036 	struct tmpfs_node *dnode;
1037 	int error;
1038 
1039 	KKASSERT(dvp != vp); /* XXX When can this be false? */
1040 
1041 	node = VP_TO_TMPFS_NODE(vp);
1042 	dnode = VP_TO_TMPFS_NODE(dvp);
1043 	TMPFS_NODE_LOCK(dnode);
1044 
1045 	/* XXX: Why aren't the following two tests done by the caller? */
1046 
1047 	/* Hard links of directories are forbidden. */
1048 	if (vp->v_type == VDIR) {
1049 		error = EPERM;
1050 		goto out;
1051 	}
1052 
1053 	/* Cannot create cross-device links. */
1054 	if (dvp->v_mount != vp->v_mount) {
1055 		error = EXDEV;
1056 		goto out;
1057 	}
1058 
1059 	/* Ensure that we do not overflow the maximum number of links imposed
1060 	 * by the system. */
1061 	KKASSERT(node->tn_links <= LINK_MAX);
1062 	if (node->tn_links >= LINK_MAX) {
1063 		error = EMLINK;
1064 		goto out;
1065 	}
1066 
1067 	/* We cannot create links of files marked immutable or append-only. */
1068 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
1069 		error = EPERM;
1070 		goto out;
1071 	}
1072 
1073 	/* Allocate a new directory entry to represent the node. */
1074 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
1075 				   ncp->nc_name, ncp->nc_nlen, &de);
1076 	if (error != 0)
1077 		goto out;
1078 
1079 	/* Insert the new directory entry into the appropriate directory. */
1080 	tmpfs_dir_attach(dnode, de);
1081 
1082 	/* vp link count has changed, so update node times. */
1083 
1084 	TMPFS_NODE_LOCK(node);
1085 	node->tn_status |= TMPFS_NODE_CHANGED;
1086 	TMPFS_NODE_UNLOCK(node);
1087 	tmpfs_update(vp);
1088 
1089 	tmpfs_knote(vp, NOTE_LINK);
1090 	cache_setunresolved(ap->a_nch);
1091 	cache_setvp(ap->a_nch, vp);
1092 	error = 0;
1093 
1094 out:
1095 	TMPFS_NODE_UNLOCK(dnode);
1096 	if (error == 0)
1097 		tmpfs_knote(dvp, NOTE_WRITE);
1098 	return error;
1099 }
1100 
1101 /* --------------------------------------------------------------------- */
1102 
1103 static int
1104 tmpfs_nrename(struct vop_nrename_args *ap)
1105 {
1106 	struct vnode *fdvp = ap->a_fdvp;
1107 	struct namecache *fncp = ap->a_fnch->ncp;
1108 	struct vnode *fvp = fncp->nc_vp;
1109 	struct vnode *tdvp = ap->a_tdvp;
1110 	struct namecache *tncp = ap->a_tnch->ncp;
1111 	struct vnode *tvp;
1112 	struct tmpfs_dirent *de, *tde;
1113 	struct tmpfs_mount *tmp;
1114 	struct tmpfs_node *fdnode;
1115 	struct tmpfs_node *fnode;
1116 	struct tmpfs_node *tnode;
1117 	struct tmpfs_node *tdnode;
1118 	char *newname;
1119 	char *oldname;
1120 	int error;
1121 
1122 	KKASSERT(fdvp->v_mount == fvp->v_mount);
1123 
1124 	/*
1125 	 * Because tvp can get overwritten we have to vget it instead of
1126 	 * just vref or use it, otherwise it's VINACTIVE flag may not get
1127 	 * cleared and the node won't get destroyed.
1128 	 */
1129 	error = cache_vget(ap->a_tnch, ap->a_cred, LK_SHARED, &tvp);
1130 	if (error == 0) {
1131 		tnode = VP_TO_TMPFS_NODE(tvp);
1132 		vn_unlock(tvp);
1133 	} else {
1134 		tnode = NULL;
1135 	}
1136 
1137 	/* Disallow cross-device renames.
1138 	 * XXX Why isn't this done by the caller? */
1139 	if (fvp->v_mount != tdvp->v_mount ||
1140 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
1141 		error = EXDEV;
1142 		goto out;
1143 	}
1144 
1145 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
1146 	tdnode = VP_TO_TMPFS_DIR(tdvp);
1147 
1148 	/* If source and target are the same file, there is nothing to do. */
1149 	if (fvp == tvp) {
1150 		error = 0;
1151 		goto out;
1152 	}
1153 
1154 	fdnode = VP_TO_TMPFS_DIR(fdvp);
1155 	fnode = VP_TO_TMPFS_NODE(fvp);
1156 	TMPFS_NODE_LOCK(fdnode);
1157 	de = tmpfs_dir_lookup(fdnode, fnode, fncp);
1158 	TMPFS_NODE_UNLOCK(fdnode);	/* XXX depend on namecache lock */
1159 
1160 	/* Avoid manipulating '.' and '..' entries. */
1161 	if (de == NULL) {
1162 		error = ENOENT;
1163 		goto out_locked;
1164 	}
1165 	KKASSERT(de->td_node == fnode);
1166 
1167 	/*
1168 	 * If replacing an entry in the target directory and that entry
1169 	 * is a directory, it must be empty.
1170 	 *
1171 	 * Kern_rename gurantees the destination to be a directory
1172 	 * if the source is one (it does?).
1173 	 */
1174 	if (tvp != NULL) {
1175 		KKASSERT(tnode != NULL);
1176 
1177 		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1178 		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
1179 			error = EPERM;
1180 			goto out_locked;
1181 		}
1182 
1183 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
1184 			if (tnode->tn_size > 0) {
1185 				error = ENOTEMPTY;
1186 				goto out_locked;
1187 			}
1188 		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
1189 			error = ENOTDIR;
1190 			goto out_locked;
1191 		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
1192 			error = EISDIR;
1193 			goto out_locked;
1194 		} else {
1195 			KKASSERT(fnode->tn_type != VDIR &&
1196 				tnode->tn_type != VDIR);
1197 		}
1198 	}
1199 
1200 	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1201 	    (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1202 		error = EPERM;
1203 		goto out_locked;
1204 	}
1205 
1206 	/*
1207 	 * Ensure that we have enough memory to hold the new name, if it
1208 	 * has to be changed.
1209 	 */
1210 	if (fncp->nc_nlen != tncp->nc_nlen ||
1211 	    bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) {
1212 		newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone,
1213 				  M_WAITOK | M_NULLOK);
1214 		if (newname == NULL) {
1215 			error = ENOSPC;
1216 			goto out_locked;
1217 		}
1218 		bcopy(tncp->nc_name, newname, tncp->nc_nlen);
1219 		newname[tncp->nc_nlen] = '\0';
1220 	} else {
1221 		newname = NULL;
1222 	}
1223 
1224 	/*
1225 	 * Unlink entry from source directory.  Note that the kernel has
1226 	 * already checked for illegal recursion cases (renaming a directory
1227 	 * into a subdirectory of itself).
1228 	 */
1229 	if (fdnode != tdnode) {
1230 		tmpfs_dir_detach(fdnode, de);
1231 	} else {
1232 		/* XXX depend on namecache lock */
1233 		TMPFS_NODE_LOCK(fdnode);
1234 		KKASSERT(de == tmpfs_dir_lookup(fdnode, fnode, fncp));
1235 		RB_REMOVE(tmpfs_dirtree, &fdnode->tn_dir.tn_dirtree, de);
1236 		RB_REMOVE(tmpfs_dirtree_cookie,
1237 			  &fdnode->tn_dir.tn_cookietree, de);
1238 		TMPFS_NODE_UNLOCK(fdnode);
1239 	}
1240 
1241 	/*
1242 	 * Handle any name change.  Swap with newname, we will
1243 	 * deallocate it at the end.
1244 	 */
1245 	if (newname != NULL) {
1246 #if 0
1247 		TMPFS_NODE_LOCK(fnode);
1248 		fnode->tn_status |= TMPFS_NODE_CHANGED;
1249 		TMPFS_NODE_UNLOCK(fnode);
1250 #endif
1251 		oldname = de->td_name;
1252 		de->td_name = newname;
1253 		de->td_namelen = (uint16_t)tncp->nc_nlen;
1254 		newname = oldname;
1255 	}
1256 
1257 	/*
1258 	 * If we are overwriting an entry, we have to remove the old one
1259 	 * from the target directory.
1260 	 */
1261 	if (tvp != NULL) {
1262 		/* Remove the old entry from the target directory. */
1263 		TMPFS_NODE_LOCK(tdnode);
1264 		tde = tmpfs_dir_lookup(tdnode, tnode, tncp);
1265 		tmpfs_dir_detach(tdnode, tde);
1266 		TMPFS_NODE_UNLOCK(tdnode);
1267 		tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE);
1268 
1269 		/*
1270 		 * Free the directory entry we just deleted.  Note that the
1271 		 * node referred by it will not be removed until the vnode is
1272 		 * really reclaimed.
1273 		 */
1274 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1275 		/*cache_inval_vp(tvp, CINV_DESTROY);*/
1276 	}
1277 
1278 	/*
1279 	 * Link entry to target directory.  If the entry
1280 	 * represents a directory move the parent linkage
1281 	 * as well.
1282 	 */
1283 	if (fdnode != tdnode) {
1284 		if (de->td_node->tn_type == VDIR) {
1285 			TMPFS_VALIDATE_DIR(fnode);
1286 		}
1287 		tmpfs_dir_attach(tdnode, de);
1288 	} else {
1289 		TMPFS_NODE_LOCK(tdnode);
1290 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1291 		RB_INSERT(tmpfs_dirtree, &tdnode->tn_dir.tn_dirtree, de);
1292 		RB_INSERT(tmpfs_dirtree_cookie,
1293 			  &tdnode->tn_dir.tn_cookietree, de);
1294 		TMPFS_NODE_UNLOCK(tdnode);
1295 	}
1296 
1297 	/*
1298 	 * Finish up
1299 	 */
1300 	if (newname) {
1301 		kfree(newname, tmp->tm_name_zone);
1302 		newname = NULL;
1303 	}
1304 	cache_rename(ap->a_fnch, ap->a_tnch);
1305 	tmpfs_knote(ap->a_fdvp, NOTE_WRITE);
1306 	tmpfs_knote(ap->a_tdvp, NOTE_WRITE);
1307 	if (fnode->tn_vnode)
1308 		tmpfs_knote(fnode->tn_vnode, NOTE_RENAME);
1309 	error = 0;
1310 
1311 out_locked:
1312 	;
1313 out:
1314 	if (tvp)
1315 		vrele(tvp);
1316 	return error;
1317 }
1318 
1319 /* --------------------------------------------------------------------- */
1320 
1321 static int
1322 tmpfs_nmkdir(struct vop_nmkdir_args *ap)
1323 {
1324 	struct vnode *dvp = ap->a_dvp;
1325 	struct vnode **vpp = ap->a_vpp;
1326 	struct namecache *ncp = ap->a_nch->ncp;
1327 	struct vattr *vap = ap->a_vap;
1328 	struct ucred *cred = ap->a_cred;
1329 	int error;
1330 
1331 	KKASSERT(vap->va_type == VDIR);
1332 
1333 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
1334 	if (error == 0) {
1335 		cache_setunresolved(ap->a_nch);
1336 		cache_setvp(ap->a_nch, *vpp);
1337 		tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1338 	}
1339 	return error;
1340 }
1341 
1342 /* --------------------------------------------------------------------- */
1343 
1344 static int
1345 tmpfs_nrmdir(struct vop_nrmdir_args *ap)
1346 {
1347 	struct vnode *dvp = ap->a_dvp;
1348 	struct namecache *ncp = ap->a_nch->ncp;
1349 	struct vnode *vp;
1350 	struct tmpfs_dirent *de;
1351 	struct tmpfs_mount *tmp;
1352 	struct tmpfs_node *dnode;
1353 	struct tmpfs_node *node;
1354 	int error;
1355 
1356 	/*
1357 	 * We have to acquire the vp from ap->a_nch because we will likely
1358 	 * unresolve the namecache entry, and a vrele/vput is needed to
1359 	 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
1360 	 *
1361 	 * We have to use vget to clear any inactive state on the vnode,
1362 	 * otherwise the vnode may remain inactive and thus tmpfs_inactive
1363 	 * will not get called when we release it.
1364 	 */
1365 	error = cache_vget(ap->a_nch, ap->a_cred, LK_SHARED, &vp);
1366 	KKASSERT(error == 0);
1367 	vn_unlock(vp);
1368 
1369 	/*
1370 	 * Prevalidate so we don't hit an assertion later
1371 	 */
1372 	if (vp->v_type != VDIR) {
1373 		error = ENOTDIR;
1374 		goto out;
1375 	}
1376 
1377 	tmp = VFS_TO_TMPFS(dvp->v_mount);
1378 	dnode = VP_TO_TMPFS_DIR(dvp);
1379 	node = VP_TO_TMPFS_DIR(vp);
1380 
1381 	/*
1382 	 * Directories with more than two entries ('.' and '..') cannot
1383 	 * be removed.
1384 	 */
1385 	if (node->tn_size > 0) {
1386 		error = ENOTEMPTY;
1387 		goto out;
1388 	}
1389 
1390 	if ((dnode->tn_flags & APPEND)
1391 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1392 		error = EPERM;
1393 		goto out;
1394 	}
1395 
1396 	/*
1397 	 * This invariant holds only if we are not trying to
1398 	 * remove "..".  We checked for that above so this is safe now.
1399 	 */
1400 	KKASSERT(node->tn_dir.tn_parent == dnode);
1401 
1402 	/*
1403 	 * Get the directory entry associated with node (vp).  This
1404 	 * was filled by tmpfs_lookup while looking up the entry.
1405 	 */
1406 	TMPFS_NODE_LOCK(dnode);
1407 	de = tmpfs_dir_lookup(dnode, node, ncp);
1408 	KKASSERT(TMPFS_DIRENT_MATCHES(de, ncp->nc_name, ncp->nc_nlen));
1409 
1410 	/* Check flags to see if we are allowed to remove the directory. */
1411 	if ((dnode->tn_flags & APPEND) ||
1412 	    node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
1413 		error = EPERM;
1414 		TMPFS_NODE_UNLOCK(dnode);
1415 		goto out;
1416 	}
1417 
1418 	/* Detach the directory entry from the directory (dnode). */
1419 	tmpfs_dir_detach(dnode, de);
1420 	TMPFS_NODE_UNLOCK(dnode);
1421 
1422 	/* No vnode should be allocated for this entry from this point */
1423 	TMPFS_NODE_LOCK(dnode);
1424 	TMPFS_ASSERT_ELOCKED(dnode);
1425 	TMPFS_NODE_LOCK(node);
1426 	TMPFS_ASSERT_ELOCKED(node);
1427 
1428 	/*
1429 	 * Must set parent linkage to NULL (tested by ncreate to disallow
1430 	 * the creation of new files/dirs in a deleted directory)
1431 	 */
1432 	node->tn_status |= TMPFS_NODE_CHANGED;
1433 
1434 	dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED |
1435 			    TMPFS_NODE_MODIFIED;
1436 
1437 	TMPFS_NODE_UNLOCK(node);
1438 	TMPFS_NODE_UNLOCK(dnode);
1439 
1440 	/* Free the directory entry we just deleted.  Note that the node
1441 	 * referred by it will not be removed until the vnode is really
1442 	 * reclaimed. */
1443 	tmpfs_free_dirent(tmp, de);
1444 
1445 	/* Release the deleted vnode (will destroy the node, notify
1446 	 * interested parties and clean it from the cache). */
1447 
1448 	TMPFS_NODE_LOCK(dnode);
1449 	dnode->tn_status |= TMPFS_NODE_CHANGED;
1450 	TMPFS_NODE_UNLOCK(dnode);
1451 	tmpfs_update(dvp);
1452 
1453 	cache_unlink(ap->a_nch);
1454 	tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1455 	error = 0;
1456 
1457 out:
1458 	vrele(vp);
1459 
1460 	return error;
1461 }
1462 
1463 /* --------------------------------------------------------------------- */
1464 
1465 static int
1466 tmpfs_nsymlink(struct vop_nsymlink_args *ap)
1467 {
1468 	struct vnode *dvp = ap->a_dvp;
1469 	struct vnode **vpp = ap->a_vpp;
1470 	struct namecache *ncp = ap->a_nch->ncp;
1471 	struct vattr *vap = ap->a_vap;
1472 	struct ucred *cred = ap->a_cred;
1473 	char *target = ap->a_target;
1474 	int error;
1475 
1476 	vap->va_type = VLNK;
1477 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, target);
1478 	if (error == 0) {
1479 		tmpfs_knote(*vpp, NOTE_WRITE);
1480 		cache_setunresolved(ap->a_nch);
1481 		cache_setvp(ap->a_nch, *vpp);
1482 	}
1483 	return error;
1484 }
1485 
1486 /* --------------------------------------------------------------------- */
1487 
1488 static int
1489 tmpfs_readdir(struct vop_readdir_args *ap)
1490 {
1491 	struct vnode *vp = ap->a_vp;
1492 	struct uio *uio = ap->a_uio;
1493 	int *eofflag = ap->a_eofflag;
1494 	off_t **cookies = ap->a_cookies;
1495 	int *ncookies = ap->a_ncookies;
1496 	struct tmpfs_mount *tmp;
1497 	int error;
1498 	off_t startoff;
1499 	off_t cnt = 0;
1500 	struct tmpfs_node *node;
1501 
1502 	/* This operation only makes sense on directory nodes. */
1503 	if (vp->v_type != VDIR) {
1504 		return ENOTDIR;
1505 	}
1506 
1507 	tmp = VFS_TO_TMPFS(vp->v_mount);
1508 	node = VP_TO_TMPFS_DIR(vp);
1509 	startoff = uio->uio_offset;
1510 
1511 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
1512 		error = tmpfs_dir_getdotdent(node, uio);
1513 		if (error != 0) {
1514 			TMPFS_NODE_LOCK_SH(node);
1515 			goto outok;
1516 		}
1517 		cnt++;
1518 	}
1519 
1520 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
1521 		/* may lock parent, cannot hold node lock */
1522 		error = tmpfs_dir_getdotdotdent(tmp, node, uio);
1523 		if (error != 0) {
1524 			TMPFS_NODE_LOCK_SH(node);
1525 			goto outok;
1526 		}
1527 		cnt++;
1528 	}
1529 
1530 	TMPFS_NODE_LOCK_SH(node);
1531 	error = tmpfs_dir_getdents(node, uio, &cnt);
1532 
1533 outok:
1534 	KKASSERT(error >= -1);
1535 
1536 	if (error == -1)
1537 		error = 0;
1538 
1539 	if (eofflag != NULL)
1540 		*eofflag =
1541 		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1542 
1543 	/* Update NFS-related variables. */
1544 	if (error == 0 && cookies != NULL && ncookies != NULL) {
1545 		off_t i;
1546 		off_t off = startoff;
1547 		struct tmpfs_dirent *de = NULL;
1548 
1549 		*ncookies = cnt;
1550 		*cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
1551 
1552 		for (i = 0; i < cnt; i++) {
1553 			KKASSERT(off != TMPFS_DIRCOOKIE_EOF);
1554 			if (off == TMPFS_DIRCOOKIE_DOT) {
1555 				off = TMPFS_DIRCOOKIE_DOTDOT;
1556 			} else {
1557 				if (off == TMPFS_DIRCOOKIE_DOTDOT) {
1558 					de = RB_MIN(tmpfs_dirtree_cookie,
1559 						&node->tn_dir.tn_cookietree);
1560 				} else if (de != NULL) {
1561 					de = RB_NEXT(tmpfs_dirtree_cookie,
1562 					       &node->tn_dir.tn_cookietree, de);
1563 				} else {
1564 					de = tmpfs_dir_lookupbycookie(node,
1565 								      off);
1566 					KKASSERT(de != NULL);
1567 					de = RB_NEXT(tmpfs_dirtree_cookie,
1568 					       &node->tn_dir.tn_cookietree, de);
1569 				}
1570 				if (de == NULL)
1571 					off = TMPFS_DIRCOOKIE_EOF;
1572 				else
1573 					off = tmpfs_dircookie(de);
1574 			}
1575 			(*cookies)[i] = off;
1576 		}
1577 		KKASSERT(uio->uio_offset == off);
1578 	}
1579 	TMPFS_NODE_UNLOCK(node);
1580 
1581 	if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
1582 		TMPFS_NODE_LOCK(node);
1583 		node->tn_status |= TMPFS_NODE_ACCESSED;
1584 		TMPFS_NODE_UNLOCK(node);
1585 	}
1586 	return error;
1587 }
1588 
1589 /* --------------------------------------------------------------------- */
1590 
1591 static int
1592 tmpfs_readlink(struct vop_readlink_args *ap)
1593 {
1594 	struct vnode *vp = ap->a_vp;
1595 	struct uio *uio = ap->a_uio;
1596 	int error;
1597 	struct tmpfs_node *node;
1598 
1599 	KKASSERT(uio->uio_offset == 0);
1600 	KKASSERT(vp->v_type == VLNK);
1601 
1602 	node = VP_TO_TMPFS_NODE(vp);
1603 	TMPFS_NODE_LOCK_SH(node);
1604 	error = uiomove(node->tn_link,
1605 			MIN(node->tn_size, uio->uio_resid), uio);
1606 	TMPFS_NODE_UNLOCK(node);
1607 	if ((node->tn_status & TMPFS_NODE_ACCESSED) == 0) {
1608 		TMPFS_NODE_LOCK(node);
1609 		node->tn_status |= TMPFS_NODE_ACCESSED;
1610 		TMPFS_NODE_UNLOCK(node);
1611 	}
1612 	return error;
1613 }
1614 
1615 /* --------------------------------------------------------------------- */
1616 
1617 static int
1618 tmpfs_inactive(struct vop_inactive_args *ap)
1619 {
1620 	struct vnode *vp = ap->a_vp;
1621 	struct tmpfs_node *node;
1622 	struct mount *mp;
1623 
1624 	mp = vp->v_mount;
1625 	lwkt_gettoken(&mp->mnt_token);
1626 	node = VP_TO_TMPFS_NODE(vp);
1627 
1628 	/*
1629 	 * Degenerate case
1630 	 */
1631 	if (node == NULL) {
1632 		vrecycle(vp);
1633 		lwkt_reltoken(&mp->mnt_token);
1634 		return(0);
1635 	}
1636 
1637 	/*
1638 	 * Get rid of unreferenced deleted vnodes sooner rather than
1639 	 * later so the data memory can be recovered immediately.
1640 	 *
1641 	 * We must truncate the vnode to prevent the normal reclamation
1642 	 * path from flushing the data for the removed file to disk.
1643 	 */
1644 	TMPFS_NODE_LOCK(node);
1645 	if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1646 	    node->tn_links == 0)
1647 	{
1648 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1649 		TMPFS_NODE_UNLOCK(node);
1650 		if (node->tn_type == VREG)
1651 			tmpfs_truncate(vp, 0);
1652 		vrecycle(vp);
1653 	} else {
1654 		/*
1655 		 * We must retain any VM pages belonging to the vnode's
1656 		 * object as the vnode will destroy the object during a
1657 		 * later reclaim.  We call vinvalbuf(V_SAVE) to clean
1658 		 * out the buffer cache.
1659 		 *
1660 		 * On DragonFlyBSD, vnodes are not immediately deactivated
1661 		 * on the 1->0 refs, so this is a relatively optimal
1662 		 * operation.  We have to do this in tmpfs_inactive()
1663 		 * because the pages will have already been thrown away
1664 		 * at the time tmpfs_reclaim() is called.
1665 		 */
1666 		if (node->tn_type == VREG &&
1667 		    node->tn_reg.tn_pages_in_aobj == 0) {
1668 			vinvalbuf(vp, V_SAVE, 0, 0);
1669 			KKASSERT(RB_EMPTY(&vp->v_rbdirty_tree));
1670 			KKASSERT(RB_EMPTY(&vp->v_rbclean_tree));
1671 			tmpfs_move_pages(vp->v_object, node->tn_reg.tn_aobj);
1672 			node->tn_reg.tn_pages_in_aobj = 1;
1673 		}
1674 
1675 		TMPFS_NODE_UNLOCK(node);
1676 	}
1677 	lwkt_reltoken(&mp->mnt_token);
1678 
1679 	return 0;
1680 }
1681 
1682 /* --------------------------------------------------------------------- */
1683 
1684 int
1685 tmpfs_reclaim(struct vop_reclaim_args *ap)
1686 {
1687 	struct vnode *vp = ap->a_vp;
1688 	struct tmpfs_mount *tmp;
1689 	struct tmpfs_node *node;
1690 	struct mount *mp;
1691 
1692 	mp = vp->v_mount;
1693 	lwkt_gettoken(&mp->mnt_token);
1694 
1695 	node = VP_TO_TMPFS_NODE(vp);
1696 	tmp = VFS_TO_TMPFS(vp->v_mount);
1697 	KKASSERT(mp == tmp->tm_mount);
1698 
1699 	tmpfs_free_vp(vp);
1700 
1701 	/*
1702 	 * If the node referenced by this vnode was deleted by the
1703 	 * user, we must free its associated data structures now that
1704 	 * the vnode is being reclaimed.
1705 	 *
1706 	 * Directories have an extra link ref.
1707 	 */
1708 	TMPFS_NODE_LOCK(node);
1709 	if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1710 	    node->tn_links == 0) {
1711 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1712 		tmpfs_free_node(tmp, node);
1713 		/* eats the lock */
1714 	} else {
1715 		TMPFS_NODE_UNLOCK(node);
1716 	}
1717 	lwkt_reltoken(&mp->mnt_token);
1718 
1719 	KKASSERT(vp->v_data == NULL);
1720 	return 0;
1721 }
1722 
1723 /* --------------------------------------------------------------------- */
1724 
1725 static int
1726 tmpfs_mountctl(struct vop_mountctl_args *ap)
1727 {
1728 	struct tmpfs_mount *tmp;
1729 	struct mount *mp;
1730 	int rc;
1731 
1732 	mp = ap->a_head.a_ops->head.vv_mount;
1733 	lwkt_gettoken(&mp->mnt_token);
1734 
1735 	switch (ap->a_op) {
1736 	case (MOUNTCTL_SET_EXPORT):
1737 		tmp = (struct tmpfs_mount *) mp->mnt_data;
1738 
1739 		if (ap->a_ctllen != sizeof(struct export_args))
1740 			rc = (EINVAL);
1741 		else
1742 			rc = vfs_export(mp, &tmp->tm_export,
1743 					(const struct export_args *) ap->a_ctl);
1744 		break;
1745 	default:
1746 		rc = vop_stdmountctl(ap);
1747 		break;
1748 	}
1749 
1750 	lwkt_reltoken(&mp->mnt_token);
1751 	return (rc);
1752 }
1753 
1754 /* --------------------------------------------------------------------- */
1755 
1756 static int
1757 tmpfs_print(struct vop_print_args *ap)
1758 {
1759 	struct vnode *vp = ap->a_vp;
1760 
1761 	struct tmpfs_node *node;
1762 
1763 	node = VP_TO_TMPFS_NODE(vp);
1764 
1765 	kprintf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
1766 	    node, node->tn_flags, node->tn_links);
1767 	kprintf("\tmode 0%o, owner %d, group %d, size %ju, status 0x%x\n",
1768 	    node->tn_mode, node->tn_uid, node->tn_gid,
1769 	    (uintmax_t)node->tn_size, node->tn_status);
1770 
1771 	if (vp->v_type == VFIFO)
1772 		fifo_printinfo(vp);
1773 
1774 	kprintf("\n");
1775 
1776 	return 0;
1777 }
1778 
1779 /* --------------------------------------------------------------------- */
1780 
1781 static int
1782 tmpfs_pathconf(struct vop_pathconf_args *ap)
1783 {
1784 	struct vnode *vp = ap->a_vp;
1785 	int name = ap->a_name;
1786 	register_t *retval = ap->a_retval;
1787 	struct tmpfs_mount *tmp;
1788 	int error;
1789 
1790 	error = 0;
1791 
1792 	switch (name) {
1793 	case _PC_CHOWN_RESTRICTED:
1794 		*retval = 1;
1795 		break;
1796 
1797 	case _PC_FILESIZEBITS:
1798 		tmp = VFS_TO_TMPFS(vp->v_mount);
1799 		*retval = max(32, flsll(tmp->tm_pages_max * PAGE_SIZE) + 1);
1800 		break;
1801 
1802 	case _PC_LINK_MAX:
1803 		*retval = LINK_MAX;
1804 		break;
1805 
1806 	case _PC_NAME_MAX:
1807 		*retval = NAME_MAX;
1808 		break;
1809 
1810 	case _PC_NO_TRUNC:
1811 		*retval = 1;
1812 		break;
1813 
1814 	case _PC_PATH_MAX:
1815 		*retval = PATH_MAX;
1816 		break;
1817 
1818 	case _PC_PIPE_BUF:
1819 		*retval = PIPE_BUF;
1820 		break;
1821 
1822 	case _PC_SYNC_IO:
1823 		*retval = 1;
1824 		break;
1825 
1826 	case _PC_2_SYMLINKS:
1827 		*retval = 1;
1828 		break;
1829 
1830 	default:
1831 		error = EINVAL;
1832 	}
1833 
1834 	return error;
1835 }
1836 
1837 /************************************************************************
1838  *                          KQFILTER OPS                                *
1839  ************************************************************************/
1840 
1841 static void filt_tmpfsdetach(struct knote *kn);
1842 static int filt_tmpfsread(struct knote *kn, long hint);
1843 static int filt_tmpfswrite(struct knote *kn, long hint);
1844 static int filt_tmpfsvnode(struct knote *kn, long hint);
1845 
1846 static struct filterops tmpfsread_filtops =
1847 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
1848 	  NULL, filt_tmpfsdetach, filt_tmpfsread };
1849 static struct filterops tmpfswrite_filtops =
1850 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
1851 	  NULL, filt_tmpfsdetach, filt_tmpfswrite };
1852 static struct filterops tmpfsvnode_filtops =
1853 	{ FILTEROP_ISFD | FILTEROP_MPSAFE,
1854 	  NULL, filt_tmpfsdetach, filt_tmpfsvnode };
1855 
1856 static int
1857 tmpfs_kqfilter (struct vop_kqfilter_args *ap)
1858 {
1859 	struct vnode *vp = ap->a_vp;
1860 	struct knote *kn = ap->a_kn;
1861 
1862 	switch (kn->kn_filter) {
1863 	case EVFILT_READ:
1864 		kn->kn_fop = &tmpfsread_filtops;
1865 		break;
1866 	case EVFILT_WRITE:
1867 		kn->kn_fop = &tmpfswrite_filtops;
1868 		break;
1869 	case EVFILT_VNODE:
1870 		kn->kn_fop = &tmpfsvnode_filtops;
1871 		break;
1872 	default:
1873 		return (EOPNOTSUPP);
1874 	}
1875 
1876 	kn->kn_hook = (caddr_t)vp;
1877 
1878 	knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1879 
1880 	return(0);
1881 }
1882 
1883 static void
1884 filt_tmpfsdetach(struct knote *kn)
1885 {
1886 	struct vnode *vp = (void *)kn->kn_hook;
1887 
1888 	knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1889 }
1890 
1891 static int
1892 filt_tmpfsread(struct knote *kn, long hint)
1893 {
1894 	struct vnode *vp = (void *)kn->kn_hook;
1895 	struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
1896 	off_t off;
1897 
1898 	if (hint == NOTE_REVOKE) {
1899 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1900 		return(1);
1901 	}
1902 
1903 	/*
1904 	 * Interlock against MP races when performing this function.
1905 	 */
1906 	TMPFS_NODE_LOCK_SH(node);
1907 	off = node->tn_size - kn->kn_fp->f_offset;
1908 	kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1909 	if (kn->kn_sfflags & NOTE_OLDAPI) {
1910 		TMPFS_NODE_UNLOCK(node);
1911 		return(1);
1912 	}
1913 	if (kn->kn_data == 0) {
1914 		kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1915 	}
1916 	TMPFS_NODE_UNLOCK(node);
1917 	return (kn->kn_data != 0);
1918 }
1919 
1920 static int
1921 filt_tmpfswrite(struct knote *kn, long hint)
1922 {
1923 	if (hint == NOTE_REVOKE)
1924 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1925 	kn->kn_data = 0;
1926 	return (1);
1927 }
1928 
1929 static int
1930 filt_tmpfsvnode(struct knote *kn, long hint)
1931 {
1932 	if (kn->kn_sfflags & hint)
1933 		kn->kn_fflags |= hint;
1934 	if (hint == NOTE_REVOKE) {
1935 		kn->kn_flags |= (EV_EOF | EV_NODATA);
1936 		return (1);
1937 	}
1938 	return (kn->kn_fflags != 0);
1939 }
1940 
1941 /*
1942  * Helper to move VM pages between objects
1943  *
1944  * NOTE: The vm_page_rename() dirties the page, so we can clear the
1945  *	 PG_NEED_COMMIT flag.  If the pages are being moved into tn_aobj,
1946  *	 the pageout daemon will be able to page them out.
1947  */
1948 static int
1949 tmpfs_move_pages_callback(vm_page_t p, void *data)
1950 {
1951 	struct rb_vm_page_scan_info *info = data;
1952 	vm_pindex_t pindex;
1953 
1954 	pindex = p->pindex;
1955 	if (vm_page_busy_try(p, TRUE)) {
1956 		vm_page_sleep_busy(p, TRUE, "tpgmov");
1957 		info->error = -1;
1958 		return -1;
1959 	}
1960 	if (p->object != info->object || p->pindex != pindex) {
1961 		vm_page_wakeup(p);
1962 		info->error = -1;
1963 		return -1;
1964 	}
1965 	vm_page_rename(p, info->backing_object, pindex);
1966 	vm_page_clear_commit(p);
1967 	vm_page_wakeup(p);
1968 	/* page automaticaly made dirty */
1969 
1970 	return 0;
1971 }
1972 
1973 static
1974 void
1975 tmpfs_move_pages(vm_object_t src, vm_object_t dst)
1976 {
1977 	struct rb_vm_page_scan_info info;
1978 
1979 	vm_object_hold(src);
1980 	vm_object_hold(dst);
1981 	info.object = src;
1982 	info.backing_object = dst;
1983 	do {
1984 		info.error = 1;
1985 		vm_page_rb_tree_RB_SCAN(&src->rb_memq, NULL,
1986 					tmpfs_move_pages_callback, &info);
1987 	} while (info.error < 0);
1988 	vm_object_drop(dst);
1989 	vm_object_drop(src);
1990 }
1991 
1992 /* --------------------------------------------------------------------- */
1993 
1994 /*
1995  * vnode operations vector used for files stored in a tmpfs file system.
1996  */
1997 struct vop_ops tmpfs_vnode_vops = {
1998 	.vop_default =			vop_defaultop,
1999 	.vop_getpages = 		vop_stdgetpages,
2000 	.vop_putpages = 		vop_stdputpages,
2001 	.vop_ncreate =			tmpfs_ncreate,
2002 	.vop_nresolve =			tmpfs_nresolve,
2003 	.vop_nlookupdotdot =		tmpfs_nlookupdotdot,
2004 	.vop_nmknod =			tmpfs_nmknod,
2005 	.vop_open =			tmpfs_open,
2006 	.vop_close =			tmpfs_close,
2007 	.vop_access =			tmpfs_access,
2008 	.vop_getattr =			tmpfs_getattr,
2009 	.vop_setattr =			tmpfs_setattr,
2010 	.vop_read =			tmpfs_read,
2011 	.vop_write =			tmpfs_write,
2012 	.vop_fsync =			tmpfs_fsync,
2013 	.vop_mountctl =			tmpfs_mountctl,
2014 	.vop_nremove =			tmpfs_nremove,
2015 	.vop_nlink =			tmpfs_nlink,
2016 	.vop_nrename =			tmpfs_nrename,
2017 	.vop_nmkdir =			tmpfs_nmkdir,
2018 	.vop_nrmdir =			tmpfs_nrmdir,
2019 	.vop_nsymlink =			tmpfs_nsymlink,
2020 	.vop_readdir =			tmpfs_readdir,
2021 	.vop_readlink =			tmpfs_readlink,
2022 	.vop_inactive =			tmpfs_inactive,
2023 	.vop_reclaim =			tmpfs_reclaim,
2024 	.vop_print =			tmpfs_print,
2025 	.vop_pathconf =			tmpfs_pathconf,
2026 	.vop_bmap =			tmpfs_bmap,
2027 	.vop_strategy =			tmpfs_strategy,
2028 	.vop_advlock =			tmpfs_advlock,
2029 	.vop_kqfilter =			tmpfs_kqfilter
2030 };
2031