xref: /dragonfly/sys/vfs/tmpfs/tmpfs_vnops.c (revision e7d467f4)
1 /*-
2  * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to The NetBSD Foundation
6  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
7  * 2005 program.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $
31  */
32 
33 /*
34  * tmpfs vnode interface.
35  */
36 
37 #include <sys/kernel.h>
38 #include <sys/kern_syscall.h>
39 #include <sys/param.h>
40 #include <sys/fcntl.h>
41 #include <sys/lockf.h>
42 #include <sys/priv.h>
43 #include <sys/proc.h>
44 #include <sys/resourcevar.h>
45 #include <sys/sched.h>
46 #include <sys/stat.h>
47 #include <sys/systm.h>
48 #include <sys/unistd.h>
49 #include <sys/vfsops.h>
50 #include <sys/vnode.h>
51 #include <sys/mountctl.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_extern.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pager.h>
58 #include <vm/swap_pager.h>
59 
60 #include <sys/buf2.h>
61 
62 #include <vfs/fifofs/fifo.h>
63 #include <vfs/tmpfs/tmpfs_vnops.h>
64 #if 0
65 #include <vfs/tmpfs/tmpfs.h>
66 #endif
67 #include "tmpfs.h"
68 
69 static void tmpfs_strategy_done(struct bio *bio);
70 
71 static __inline
72 void
73 tmpfs_knote(struct vnode *vp, int flags)
74 {
75 	if (flags)
76 		KNOTE(&vp->v_pollinfo.vpi_kqinfo.ki_note, flags);
77 }
78 
79 
80 /* --------------------------------------------------------------------- */
81 
82 static int
83 tmpfs_nresolve(struct vop_nresolve_args *v)
84 {
85 	struct vnode *dvp = v->a_dvp;
86 	struct vnode *vp = NULL;
87 	struct namecache *ncp = v->a_nch->ncp;
88 	struct tmpfs_node *tnode;
89 	struct mount *mp;
90 
91 	int error;
92 	struct tmpfs_dirent *de;
93 	struct tmpfs_node *dnode;
94 
95 	mp = dvp->v_mount;
96 	lwkt_gettoken(&mp->mnt_token);
97 
98 	dnode = VP_TO_TMPFS_DIR(dvp);
99 
100 	de = tmpfs_dir_lookup(dnode, NULL, ncp);
101 	if (de == NULL) {
102 		error = ENOENT;
103 	} else {
104 		/*
105 		 * Allocate a vnode for the node we found.
106 		 */
107 		tnode = de->td_node;
108 		error = tmpfs_alloc_vp(dvp->v_mount, tnode,
109 				       LK_EXCLUSIVE | LK_RETRY, &vp);
110 		if (error)
111 			goto out;
112 		KKASSERT(vp);
113 	}
114 
115 out:
116 	/*
117 	 * Store the result of this lookup in the cache.  Avoid this if the
118 	 * request was for creation, as it does not improve timings on
119 	 * emprical tests.
120 	 */
121 	if (vp) {
122 		vn_unlock(vp);
123 		cache_setvp(v->a_nch, vp);
124 		vrele(vp);
125 	} else if (error == ENOENT) {
126 		cache_setvp(v->a_nch, NULL);
127 	}
128 
129 	lwkt_reltoken(&mp->mnt_token);
130 	return (error);
131 }
132 
133 static int
134 tmpfs_nlookupdotdot(struct vop_nlookupdotdot_args *v)
135 {
136 	struct vnode *dvp = v->a_dvp;
137 	struct vnode **vpp = v->a_vpp;
138 	struct tmpfs_node *dnode = VP_TO_TMPFS_NODE(dvp);
139 	struct ucred *cred = v->a_cred;
140 	struct mount *mp;
141 	int error;
142 
143 	*vpp = NULL;
144 
145 	mp = dvp->v_mount;
146 	lwkt_gettoken(&mp->mnt_token);
147 
148 	/* Check accessibility of requested node as a first step. */
149 	error = VOP_ACCESS(dvp, VEXEC, cred);
150 	if (error != 0) {
151 		lwkt_reltoken(&mp->mnt_token);
152 		return error;
153 	}
154 
155 	if (dnode->tn_dir.tn_parent != NULL) {
156 		/* Allocate a new vnode on the matching entry. */
157 		error = tmpfs_alloc_vp(dvp->v_mount, dnode->tn_dir.tn_parent,
158 				       LK_EXCLUSIVE | LK_RETRY, vpp);
159 
160 		if (*vpp)
161 			vn_unlock(*vpp);
162 	}
163 
164 	lwkt_reltoken(&mp->mnt_token);
165 
166 	return (*vpp == NULL) ? ENOENT : 0;
167 }
168 
169 /* --------------------------------------------------------------------- */
170 
171 static int
172 tmpfs_ncreate(struct vop_ncreate_args *v)
173 {
174 	struct vnode *dvp = v->a_dvp;
175 	struct vnode **vpp = v->a_vpp;
176 	struct namecache *ncp = v->a_nch->ncp;
177 	struct vattr *vap = v->a_vap;
178 	struct ucred *cred = v->a_cred;
179 	struct mount *mp;
180 	int error;
181 
182 	mp = dvp->v_mount;
183 	lwkt_gettoken(&mp->mnt_token);
184 
185 	KKASSERT(vap->va_type == VREG || vap->va_type == VSOCK);
186 
187 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
188 	if (error == 0) {
189 		cache_setunresolved(v->a_nch);
190 		cache_setvp(v->a_nch, *vpp);
191 		tmpfs_knote(dvp, NOTE_WRITE);
192 	}
193 
194 	lwkt_reltoken(&mp->mnt_token);
195 
196 	return (error);
197 }
198 /* --------------------------------------------------------------------- */
199 
200 static int
201 tmpfs_nmknod(struct vop_nmknod_args *v)
202 {
203 	struct vnode *dvp = v->a_dvp;
204 	struct vnode **vpp = v->a_vpp;
205 	struct namecache *ncp = v->a_nch->ncp;
206 	struct vattr *vap = v->a_vap;
207 	struct ucred *cred = v->a_cred;
208 	struct mount *mp = dvp->v_mount;
209 	int error;
210 
211 	lwkt_gettoken(&mp->mnt_token);
212 
213 	if (vap->va_type != VBLK && vap->va_type != VCHR &&
214 	    vap->va_type != VFIFO) {
215 		lwkt_reltoken(&mp->mnt_token);
216 		return (EINVAL);
217 	}
218 
219 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
220 	if (error == 0) {
221 		cache_setunresolved(v->a_nch);
222 		cache_setvp(v->a_nch, *vpp);
223 		tmpfs_knote(dvp, NOTE_WRITE);
224 	}
225 
226 	lwkt_reltoken(&mp->mnt_token);
227 
228 	return error;
229 }
230 
231 /* --------------------------------------------------------------------- */
232 
233 static int
234 tmpfs_open(struct vop_open_args *v)
235 {
236 	struct vnode *vp = v->a_vp;
237 	int mode = v->a_mode;
238 	struct mount *mp = vp->v_mount;
239 	struct tmpfs_node *node;
240 	int error;
241 
242 	lwkt_gettoken(&mp->mnt_token);
243 	node = VP_TO_TMPFS_NODE(vp);
244 
245 #if 0
246 	/* The file is still active but all its names have been removed
247 	 * (e.g. by a "rmdir $(pwd)").  It cannot be opened any more as
248 	 * it is about to die. */
249 	if (node->tn_links < 1)
250 		return (ENOENT);
251 #endif
252 
253 	/* If the file is marked append-only, deny write requests. */
254 	if ((node->tn_flags & APPEND) &&
255 	    (mode & (FWRITE | O_APPEND)) == FWRITE) {
256 		error = EPERM;
257 	} else {
258 		error = (vop_stdopen(v));
259 	}
260 
261 	lwkt_reltoken(&mp->mnt_token);
262 	return (error);
263 }
264 
265 /* --------------------------------------------------------------------- */
266 
267 static int
268 tmpfs_close(struct vop_close_args *v)
269 {
270 	struct vnode *vp = v->a_vp;
271 	struct tmpfs_node *node;
272 	int error;
273 
274 	lwkt_gettoken(&vp->v_mount->mnt_token);
275 	node = VP_TO_TMPFS_NODE(vp);
276 
277 	if (node->tn_links > 0) {
278 		/*
279 		 * Update node times.  No need to do it if the node has
280 		 * been deleted, because it will vanish after we return.
281 		 */
282 		tmpfs_update(vp);
283 	}
284 
285 	error = vop_stdclose(v);
286 
287 	lwkt_reltoken(&vp->v_mount->mnt_token);
288 
289 	return (error);
290 }
291 
292 /* --------------------------------------------------------------------- */
293 
294 int
295 tmpfs_access(struct vop_access_args *v)
296 {
297 	struct vnode *vp = v->a_vp;
298 	int error;
299 	struct tmpfs_node *node;
300 
301 	lwkt_gettoken(&vp->v_mount->mnt_token);
302 	node = VP_TO_TMPFS_NODE(vp);
303 
304 	switch (vp->v_type) {
305 	case VDIR:
306 		/* FALLTHROUGH */
307 	case VLNK:
308 		/* FALLTHROUGH */
309 	case VREG:
310 		if ((v->a_mode & VWRITE) &&
311 	            (vp->v_mount->mnt_flag & MNT_RDONLY)) {
312 			error = EROFS;
313 			goto out;
314 		}
315 		break;
316 
317 	case VBLK:
318 		/* FALLTHROUGH */
319 	case VCHR:
320 		/* FALLTHROUGH */
321 	case VSOCK:
322 		/* FALLTHROUGH */
323 	case VFIFO:
324 		break;
325 
326 	default:
327 		error = EINVAL;
328 		goto out;
329 	}
330 
331 	if ((v->a_mode & VWRITE) && (node->tn_flags & IMMUTABLE)) {
332 		error = EPERM;
333 		goto out;
334 	}
335 
336 	error = vop_helper_access(v, node->tn_uid, node->tn_gid,
337 			          node->tn_mode, 0);
338 
339 out:
340 	lwkt_reltoken(&vp->v_mount->mnt_token);
341 	return error;
342 }
343 
344 /* --------------------------------------------------------------------- */
345 
346 int
347 tmpfs_getattr(struct vop_getattr_args *v)
348 {
349 	struct vnode *vp = v->a_vp;
350 	struct vattr *vap = v->a_vap;
351 	struct tmpfs_node *node;
352 
353 	lwkt_gettoken(&vp->v_mount->mnt_token);
354 	node = VP_TO_TMPFS_NODE(vp);
355 
356 	tmpfs_update(vp);
357 
358 	vap->va_type = vp->v_type;
359 	vap->va_mode = node->tn_mode;
360 	vap->va_nlink = node->tn_links;
361 	vap->va_uid = node->tn_uid;
362 	vap->va_gid = node->tn_gid;
363 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
364 	vap->va_fileid = node->tn_id;
365 	vap->va_size = node->tn_size;
366 	vap->va_blocksize = PAGE_SIZE;
367 	vap->va_atime.tv_sec = node->tn_atime;
368 	vap->va_atime.tv_nsec = node->tn_atimensec;
369 	vap->va_mtime.tv_sec = node->tn_mtime;
370 	vap->va_mtime.tv_nsec = node->tn_mtimensec;
371 	vap->va_ctime.tv_sec = node->tn_ctime;
372 	vap->va_ctime.tv_nsec = node->tn_ctimensec;
373 	vap->va_gen = node->tn_gen;
374 	vap->va_flags = node->tn_flags;
375 	if (vp->v_type == VBLK || vp->v_type == VCHR)
376 	{
377 		vap->va_rmajor = umajor(node->tn_rdev);
378 		vap->va_rminor = uminor(node->tn_rdev);
379 	}
380 	vap->va_bytes = round_page(node->tn_size);
381 	vap->va_filerev = 0;
382 
383 	lwkt_reltoken(&vp->v_mount->mnt_token);
384 
385 	return 0;
386 }
387 
388 /* --------------------------------------------------------------------- */
389 
390 int
391 tmpfs_setattr(struct vop_setattr_args *v)
392 {
393 	struct vnode *vp = v->a_vp;
394 	struct vattr *vap = v->a_vap;
395 	struct ucred *cred = v->a_cred;
396 	struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
397 	int error = 0;
398 	int kflags = 0;
399 
400 	lwkt_gettoken(&vp->v_mount->mnt_token);
401 	if (error == 0 && (vap->va_flags != VNOVAL)) {
402 		error = tmpfs_chflags(vp, vap->va_flags, cred);
403 		kflags |= NOTE_ATTRIB;
404 	}
405 
406 	if (error == 0 && (vap->va_size != VNOVAL)) {
407 		if (vap->va_size > node->tn_size)
408 			kflags |= NOTE_WRITE | NOTE_EXTEND;
409 		else
410 			kflags |= NOTE_WRITE;
411 		error = tmpfs_chsize(vp, vap->va_size, cred);
412 	}
413 
414 	if (error == 0 && (vap->va_uid != (uid_t)VNOVAL ||
415 			   vap->va_gid != (gid_t)VNOVAL)) {
416 		error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred);
417 		kflags |= NOTE_ATTRIB;
418 	}
419 
420 	if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) {
421 		error = tmpfs_chmod(vp, vap->va_mode, cred);
422 		kflags |= NOTE_ATTRIB;
423 	}
424 
425 	if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL &&
426 	    vap->va_atime.tv_nsec != VNOVAL) ||
427 	    (vap->va_mtime.tv_sec != VNOVAL &&
428 	    vap->va_mtime.tv_nsec != VNOVAL) )) {
429 		error = tmpfs_chtimes(vp, &vap->va_atime, &vap->va_mtime,
430 				      vap->va_vaflags, cred);
431 		kflags |= NOTE_ATTRIB;
432 	}
433 
434 	/* Update the node times.  We give preference to the error codes
435 	 * generated by this function rather than the ones that may arise
436 	 * from tmpfs_update. */
437 	tmpfs_update(vp);
438 	tmpfs_knote(vp, kflags);
439 
440 	lwkt_reltoken(&vp->v_mount->mnt_token);
441 
442 	return (error);
443 }
444 
445 /* --------------------------------------------------------------------- */
446 
447 /*
448  * fsync is usually a NOP, but we must take action when unmounting or
449  * when recycling.
450  */
451 static int
452 tmpfs_fsync(struct vop_fsync_args *v)
453 {
454 	struct tmpfs_node *node;
455 	struct vnode *vp = v->a_vp;
456 
457 	lwkt_gettoken(&vp->v_mount->mnt_token);
458 	node = VP_TO_TMPFS_NODE(vp);
459 
460 	tmpfs_update(vp);
461 	if (vp->v_type == VREG) {
462 		if (vp->v_flag & VRECLAIMED) {
463 			if (node->tn_links == 0)
464 				tmpfs_truncate(vp, 0);
465 			else
466 				vfsync(v->a_vp, v->a_waitfor, 1, NULL, NULL);
467 		}
468 	}
469 
470 	lwkt_reltoken(&vp->v_mount->mnt_token);
471 	return 0;
472 }
473 
474 /* --------------------------------------------------------------------- */
475 
476 static int
477 tmpfs_read (struct vop_read_args *ap)
478 {
479 	struct buf *bp;
480 	struct vnode *vp = ap->a_vp;
481 	struct uio *uio = ap->a_uio;
482 	struct tmpfs_node *node;
483 	off_t base_offset;
484 	size_t offset;
485 	size_t len;
486 	size_t resid;
487 	int error;
488 
489 	/*
490 	 * Check the basics
491 	 */
492 	if (uio->uio_offset < 0)
493 		return (EINVAL);
494 	if (vp->v_type != VREG)
495 		return (EINVAL);
496 
497 	/*
498 	 * Extract node, try to shortcut the operation through
499 	 * the VM page cache, allowing us to avoid buffer cache
500 	 * overheads.
501 	 */
502 	node = VP_TO_TMPFS_NODE(vp);
503         resid = uio->uio_resid;
504         error = vop_helper_read_shortcut(ap);
505         if (error)
506                 return error;
507         if (uio->uio_resid == 0) {
508 		if (resid)
509 			goto finished;
510 		return error;
511 	}
512 
513 	/*
514 	 * Fall-through to our normal read code.
515 	 */
516 	while (uio->uio_resid > 0 && uio->uio_offset < node->tn_size) {
517 		/*
518 		 * Use buffer cache I/O (via tmpfs_strategy)
519 		 */
520 		offset = (size_t)uio->uio_offset & BMASK;
521 		base_offset = (off_t)uio->uio_offset - offset;
522 		bp = getcacheblk(vp, base_offset, BSIZE, 0);
523 		if (bp == NULL) {
524 			lwkt_gettoken(&vp->v_mount->mnt_token);
525 			error = bread(vp, base_offset, BSIZE, &bp);
526 			if (error) {
527 				brelse(bp);
528 				lwkt_reltoken(&vp->v_mount->mnt_token);
529 				kprintf("tmpfs_read bread error %d\n", error);
530 				break;
531 			}
532 			lwkt_reltoken(&vp->v_mount->mnt_token);
533 
534 			/*
535 			 * tmpfs pretty much fiddles directly with the VM
536 			 * system, don't let it exhaust it or we won't play
537 			 * nice with other processes.
538 			 *
539 			 * Only do this if the VOP is coming from a normal
540 			 * read/write.  The VM system handles the case for
541 			 * UIO_NOCOPY.
542 			 */
543 			if (uio->uio_segflg != UIO_NOCOPY)
544 				vm_wait_nominal();
545 		}
546 
547 		/*
548 		 * Figure out how many bytes we can actually copy this loop.
549 		 */
550 		len = BSIZE - offset;
551 		if (len > uio->uio_resid)
552 			len = uio->uio_resid;
553 		if (len > node->tn_size - uio->uio_offset)
554 			len = (size_t)(node->tn_size - uio->uio_offset);
555 
556 		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
557 		bqrelse(bp);
558 		if (error) {
559 			kprintf("tmpfs_read uiomove error %d\n", error);
560 			break;
561 		}
562 	}
563 
564 finished:
565 	TMPFS_NODE_LOCK(node);
566 	node->tn_status |= TMPFS_NODE_ACCESSED;
567 	TMPFS_NODE_UNLOCK(node);
568 
569 	return (error);
570 }
571 
572 static int
573 tmpfs_write (struct vop_write_args *ap)
574 {
575 	struct buf *bp;
576 	struct vnode *vp = ap->a_vp;
577 	struct uio *uio = ap->a_uio;
578 	struct thread *td = uio->uio_td;
579 	struct tmpfs_node *node;
580 	boolean_t extended;
581 	off_t oldsize;
582 	int error;
583 	off_t base_offset;
584 	size_t offset;
585 	size_t len;
586 	struct rlimit limit;
587 	int trivial = 0;
588 	int kflags = 0;
589 
590 	error = 0;
591 	if (uio->uio_resid == 0) {
592 		return error;
593 	}
594 
595 	node = VP_TO_TMPFS_NODE(vp);
596 
597 	if (vp->v_type != VREG)
598 		return (EINVAL);
599 
600 	lwkt_gettoken(&vp->v_mount->mnt_token);
601 
602 	oldsize = node->tn_size;
603 	if (ap->a_ioflag & IO_APPEND)
604 		uio->uio_offset = node->tn_size;
605 
606 	/*
607 	 * Check for illegal write offsets.
608 	 */
609 	if (uio->uio_offset + uio->uio_resid >
610 	  VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) {
611 		lwkt_reltoken(&vp->v_mount->mnt_token);
612 		return (EFBIG);
613 	}
614 
615 	/*
616 	 * NOTE: Ignore if UIO does not come from a user thread (e.g. VN).
617 	 */
618 	if (vp->v_type == VREG && td != NULL && td->td_lwp != NULL) {
619 		error = kern_getrlimit(RLIMIT_FSIZE, &limit);
620 		if (error != 0) {
621 			lwkt_reltoken(&vp->v_mount->mnt_token);
622 			return error;
623 		}
624 		if (uio->uio_offset + uio->uio_resid > limit.rlim_cur) {
625 			ksignal(td->td_proc, SIGXFSZ);
626 			lwkt_reltoken(&vp->v_mount->mnt_token);
627 			return (EFBIG);
628 		}
629 	}
630 
631 
632 	/*
633 	 * Extend the file's size if necessary
634 	 */
635 	extended = ((uio->uio_offset + uio->uio_resid) > node->tn_size);
636 
637 	while (uio->uio_resid > 0) {
638 		/*
639 		 * Use buffer cache I/O (via tmpfs_strategy)
640 		 */
641 		offset = (size_t)uio->uio_offset & BMASK;
642 		base_offset = (off_t)uio->uio_offset - offset;
643 		len = BSIZE - offset;
644 		if (len > uio->uio_resid)
645 			len = uio->uio_resid;
646 
647 		if ((uio->uio_offset + len) > node->tn_size) {
648 			trivial = (uio->uio_offset <= node->tn_size);
649 			error = tmpfs_reg_resize(vp, uio->uio_offset + len,  trivial);
650 			if (error)
651 				break;
652 		}
653 
654 		/*
655 		 * Read to fill in any gaps.  Theoretically we could
656 		 * optimize this if the write covers the entire buffer
657 		 * and is not a UIO_NOCOPY write, however this can lead
658 		 * to a security violation exposing random kernel memory
659 		 * (whatever junk was in the backing VM pages before).
660 		 *
661 		 * So just use bread() to do the right thing.
662 		 */
663 		error = bread(vp, base_offset, BSIZE, &bp);
664 		error = uiomovebp(bp, (char *)bp->b_data + offset, len, uio);
665 		if (error) {
666 			kprintf("tmpfs_write uiomove error %d\n", error);
667 			brelse(bp);
668 			break;
669 		}
670 
671 		if (uio->uio_offset > node->tn_size) {
672 			node->tn_size = uio->uio_offset;
673 			kflags |= NOTE_EXTEND;
674 		}
675 		kflags |= NOTE_WRITE;
676 
677 		/*
678 		 * Always try to flush the page in the UIO_NOCOPY case.  This
679 		 * can come from the pageout daemon or during vnode eviction.
680 		 * It is not necessarily going to be marked IO_ASYNC/IO_SYNC.
681 		 *
682 		 * For the normal case we buwrite(), dirtying the underlying
683 		 * VM pages instead of dirtying the buffer and releasing the
684 		 * buffer as a clean buffer.  This allows tmpfs to use
685 		 * essentially all available memory to cache file data.
686 		 * If we used bdwrite() the buffer cache would wind up
687 		 * flushing the data to swap too quickly.
688 		 *
689 		 * tmpfs pretty much fiddles directly with the VM
690 		 * system, don't let it exhaust it or we won't play
691 		 * nice with other processes.  Only do this if the
692 		 * VOP is coming from a normal read/write.  The VM system
693 		 * handles the case for UIO_NOCOPY.
694 		 */
695 		bp->b_flags |= B_AGE;
696 		if (uio->uio_segflg == UIO_NOCOPY) {
697 			bawrite(bp);
698 		} else {
699 			buwrite(bp);
700 			vm_wait_nominal();
701 		}
702 
703 		if (bp->b_error) {
704 			kprintf("tmpfs_write bwrite error %d\n", bp->b_error);
705 			break;
706 		}
707 	}
708 
709 	if (error) {
710 		if (extended) {
711 			(void)tmpfs_reg_resize(vp, oldsize, trivial);
712 			kflags &= ~NOTE_EXTEND;
713 		}
714 		goto done;
715 	}
716 
717 	/*
718 	 * Currently we don't set the mtime on files modified via mmap()
719 	 * because we can't tell the difference between those modifications
720 	 * and an attempt by the pageout daemon to flush tmpfs pages to
721 	 * swap.
722 	 *
723 	 * This is because in order to defer flushes as long as possible
724 	 * buwrite() works by marking the underlying VM pages dirty in
725 	 * order to be able to dispose of the buffer cache buffer without
726 	 * flushing it.
727 	 */
728 	TMPFS_NODE_LOCK(node);
729 	if (uio->uio_segflg != UIO_NOCOPY)
730 		node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED;
731 	if (extended)
732 		node->tn_status |= TMPFS_NODE_CHANGED;
733 
734 	if (node->tn_mode & (S_ISUID | S_ISGID)) {
735 		if (priv_check_cred(ap->a_cred, PRIV_VFS_RETAINSUGID, 0))
736 			node->tn_mode &= ~(S_ISUID | S_ISGID);
737 	}
738 	TMPFS_NODE_UNLOCK(node);
739 done:
740 
741 	tmpfs_knote(vp, kflags);
742 
743 	lwkt_reltoken(&vp->v_mount->mnt_token);
744 	return(error);
745 }
746 
747 static int
748 tmpfs_advlock (struct vop_advlock_args *ap)
749 {
750 	struct tmpfs_node *node;
751 	struct vnode *vp = ap->a_vp;
752 	int error;
753 
754 	lwkt_gettoken(&vp->v_mount->mnt_token);
755 	node = VP_TO_TMPFS_NODE(vp);
756 
757 	error = (lf_advlock(ap, &node->tn_advlock, node->tn_size));
758 	lwkt_reltoken(&vp->v_mount->mnt_token);
759 
760 	return (error);
761 }
762 
763 /*
764  * The strategy function is typically only called when memory pressure
765  * forces the system to attempt to pageout pages.  It can also be called
766  * by [n]vtruncbuf() when a truncation cuts a page in half.  Normal write
767  * operations
768  */
769 static int
770 tmpfs_strategy(struct vop_strategy_args *ap)
771 {
772 	struct bio *bio = ap->a_bio;
773 	struct bio *nbio;
774 	struct buf *bp = bio->bio_buf;
775 	struct vnode *vp = ap->a_vp;
776 	struct tmpfs_node *node;
777 	vm_object_t uobj;
778 	vm_page_t m;
779 	int i;
780 
781 	if (vp->v_type != VREG) {
782 		bp->b_resid = bp->b_bcount;
783 		bp->b_flags |= B_ERROR | B_INVAL;
784 		bp->b_error = EINVAL;
785 		biodone(bio);
786 		return(0);
787 	}
788 
789 	lwkt_gettoken(&vp->v_mount->mnt_token);
790 	node = VP_TO_TMPFS_NODE(vp);
791 
792 	uobj = node->tn_reg.tn_aobj;
793 
794 	/*
795 	 * Don't bother flushing to swap if there is no swap, just
796 	 * ensure that the pages are marked as needing a commit (still).
797 	 */
798 	if (bp->b_cmd == BUF_CMD_WRITE && vm_swap_size == 0) {
799 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
800 			m = bp->b_xio.xio_pages[i];
801 			vm_page_need_commit(m);
802 		}
803 		bp->b_resid = 0;
804 		bp->b_error = 0;
805 		biodone(bio);
806 	} else {
807 		nbio = push_bio(bio);
808 		nbio->bio_done = tmpfs_strategy_done;
809 		nbio->bio_offset = bio->bio_offset;
810 		swap_pager_strategy(uobj, nbio);
811 	}
812 
813 	lwkt_reltoken(&vp->v_mount->mnt_token);
814 	return 0;
815 }
816 
817 /*
818  * If we were unable to commit the pages to swap make sure they are marked
819  * as needing a commit (again).  If we were, clear the flag to allow the
820  * pages to be freed.
821  */
822 static void
823 tmpfs_strategy_done(struct bio *bio)
824 {
825 	struct buf *bp;
826 	vm_page_t m;
827 	int i;
828 
829 	bp = bio->bio_buf;
830 
831 	if (bp->b_flags & B_ERROR) {
832 		bp->b_flags &= ~B_ERROR;
833 		bp->b_error = 0;
834 		bp->b_resid = 0;
835 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
836 			m = bp->b_xio.xio_pages[i];
837 			vm_page_need_commit(m);
838 		}
839 	} else {
840 		for (i = 0; i < bp->b_xio.xio_npages; ++i) {
841 			m = bp->b_xio.xio_pages[i];
842 			vm_page_clear_commit(m);
843 		}
844 	}
845 	bio = pop_bio(bio);
846 	biodone(bio);
847 }
848 
849 static int
850 tmpfs_bmap(struct vop_bmap_args *ap)
851 {
852 	if (ap->a_doffsetp != NULL)
853 		*ap->a_doffsetp = ap->a_loffset;
854 	if (ap->a_runp != NULL)
855 		*ap->a_runp = 0;
856 	if (ap->a_runb != NULL)
857 		*ap->a_runb = 0;
858 
859 	return 0;
860 }
861 
862 /* --------------------------------------------------------------------- */
863 
864 static int
865 tmpfs_nremove(struct vop_nremove_args *v)
866 {
867 	struct vnode *dvp = v->a_dvp;
868 	struct namecache *ncp = v->a_nch->ncp;
869 	struct vnode *vp;
870 	int error;
871 	struct tmpfs_dirent *de;
872 	struct tmpfs_mount *tmp;
873 	struct tmpfs_node *dnode;
874 	struct tmpfs_node *node;
875 	struct mount *mp;
876 
877 	mp = dvp->v_mount;
878 
879 	lwkt_gettoken(&mp->mnt_token);
880 
881 	/*
882 	 * We have to acquire the vp from v->a_nch because we will likely
883 	 * unresolve the namecache entry, and a vrele/vput is needed to
884 	 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
885 	 *
886 	 * We have to use vget to clear any inactive state on the vnode,
887 	 * otherwise the vnode may remain inactive and thus tmpfs_inactive
888 	 * will not get called when we release it.
889 	 */
890 	error = cache_vget(v->a_nch, v->a_cred, LK_SHARED, &vp);
891 	KKASSERT(vp->v_mount == dvp->v_mount);
892 	KKASSERT(error == 0);
893 	vn_unlock(vp);
894 
895 	if (vp->v_type == VDIR) {
896 		error = EISDIR;
897 		goto out;
898 	}
899 
900 	dnode = VP_TO_TMPFS_DIR(dvp);
901 	node = VP_TO_TMPFS_NODE(vp);
902 	tmp = VFS_TO_TMPFS(vp->v_mount);
903 	de = tmpfs_dir_lookup(dnode, node, ncp);
904 	if (de == NULL) {
905 		error = ENOENT;
906 		goto out;
907 	}
908 
909 	/* Files marked as immutable or append-only cannot be deleted. */
910 	if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) ||
911 	    (dnode->tn_flags & APPEND)) {
912 		error = EPERM;
913 		goto out;
914 	}
915 
916 	/* Remove the entry from the directory; as it is a file, we do not
917 	 * have to change the number of hard links of the directory. */
918 	tmpfs_dir_detach(dnode, de);
919 
920 	/* Free the directory entry we just deleted.  Note that the node
921 	 * referred by it will not be removed until the vnode is really
922 	 * reclaimed. */
923 	tmpfs_free_dirent(tmp, de);
924 
925 	if (node->tn_links > 0) {
926 	        TMPFS_NODE_LOCK(node);
927 		node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
928 	                TMPFS_NODE_MODIFIED;
929 	        TMPFS_NODE_UNLOCK(node);
930 	}
931 
932 	cache_unlink(v->a_nch);
933 	tmpfs_knote(vp, NOTE_DELETE);
934 	tmpfs_knote(dvp, NOTE_WRITE);
935 	error = 0;
936 
937 out:
938 	vrele(vp);
939 	lwkt_reltoken(&mp->mnt_token);
940 
941 	return error;
942 }
943 
944 /* --------------------------------------------------------------------- */
945 
946 static int
947 tmpfs_nlink(struct vop_nlink_args *v)
948 {
949 	struct vnode *dvp = v->a_dvp;
950 	struct vnode *vp = v->a_vp;
951 	struct namecache *ncp = v->a_nch->ncp;
952 	struct tmpfs_dirent *de;
953 	struct tmpfs_node *node;
954 	struct tmpfs_node *dnode;
955 	struct mount *mp;
956 	int error;
957 
958 	if (dvp->v_mount != vp->v_mount)
959 		return(EXDEV);
960 	mp = dvp->v_mount;
961 
962 	lwkt_gettoken(&mp->mnt_token);
963 	KKASSERT(dvp != vp); /* XXX When can this be false? */
964 
965 	node = VP_TO_TMPFS_NODE(vp);
966 	dnode = VP_TO_TMPFS_NODE(dvp);
967 
968 	/* XXX: Why aren't the following two tests done by the caller? */
969 
970 	/* Hard links of directories are forbidden. */
971 	if (vp->v_type == VDIR) {
972 		error = EPERM;
973 		goto out;
974 	}
975 
976 	/* Cannot create cross-device links. */
977 	if (dvp->v_mount != vp->v_mount) {
978 		error = EXDEV;
979 		goto out;
980 	}
981 
982 	/* Ensure that we do not overflow the maximum number of links imposed
983 	 * by the system. */
984 	KKASSERT(node->tn_links <= LINK_MAX);
985 	if (node->tn_links == LINK_MAX) {
986 		error = EMLINK;
987 		goto out;
988 	}
989 
990 	/* We cannot create links of files marked immutable or append-only. */
991 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
992 		error = EPERM;
993 		goto out;
994 	}
995 
996 	/* Allocate a new directory entry to represent the node. */
997 	error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node,
998 				   ncp->nc_name, ncp->nc_nlen, &de);
999 	if (error != 0)
1000 		goto out;
1001 
1002 	/* Insert the new directory entry into the appropriate directory. */
1003 	tmpfs_dir_attach(dnode, de);
1004 
1005 	/* vp link count has changed, so update node times. */
1006 
1007 	TMPFS_NODE_LOCK(node);
1008 	node->tn_status |= TMPFS_NODE_CHANGED;
1009 	TMPFS_NODE_UNLOCK(node);
1010 	tmpfs_update(vp);
1011 
1012 	tmpfs_knote(vp, NOTE_LINK);
1013 	cache_setunresolved(v->a_nch);
1014 	cache_setvp(v->a_nch, vp);
1015 	tmpfs_knote(dvp, NOTE_WRITE);
1016 	error = 0;
1017 
1018 out:
1019 	lwkt_reltoken(&mp->mnt_token);
1020 	return error;
1021 }
1022 
1023 /* --------------------------------------------------------------------- */
1024 
1025 static int
1026 tmpfs_nrename(struct vop_nrename_args *v)
1027 {
1028 	struct vnode *fdvp = v->a_fdvp;
1029 	struct namecache *fncp = v->a_fnch->ncp;
1030 	struct vnode *fvp = fncp->nc_vp;
1031 	struct vnode *tdvp = v->a_tdvp;
1032 	struct namecache *tncp = v->a_tnch->ncp;
1033 	struct vnode *tvp;
1034 	struct tmpfs_dirent *de, *tde;
1035 	struct tmpfs_mount *tmp;
1036 	struct tmpfs_node *fdnode;
1037 	struct tmpfs_node *fnode;
1038 	struct tmpfs_node *tnode;
1039 	struct tmpfs_node *tdnode;
1040 	struct mount *mp;
1041 	char *newname;
1042 	char *oldname;
1043 	int error;
1044 
1045 	mp = fdvp->v_mount;
1046 	KKASSERT(fdvp->v_mount == fvp->v_mount);
1047 
1048 	lwkt_gettoken(&mp->mnt_token);
1049 	/*
1050 	 * Because tvp can get overwritten we have to vget it instead of
1051 	 * just vref or use it, otherwise it's VINACTIVE flag may not get
1052 	 * cleared and the node won't get destroyed.
1053 	 */
1054 	error = cache_vget(v->a_tnch, v->a_cred, LK_SHARED, &tvp);
1055 	if (error == 0) {
1056 		tnode = VP_TO_TMPFS_NODE(tvp);
1057 		vn_unlock(tvp);
1058 	} else {
1059 		tnode = NULL;
1060 	}
1061 
1062 	/* Disallow cross-device renames.
1063 	 * XXX Why isn't this done by the caller? */
1064 	if (fvp->v_mount != tdvp->v_mount ||
1065 	    (tvp != NULL && fvp->v_mount != tvp->v_mount)) {
1066 		error = EXDEV;
1067 		goto out;
1068 	}
1069 
1070 	tmp = VFS_TO_TMPFS(tdvp->v_mount);
1071 	tdnode = VP_TO_TMPFS_DIR(tdvp);
1072 
1073 	/* If source and target are the same file, there is nothing to do. */
1074 	if (fvp == tvp) {
1075 		error = 0;
1076 		goto out;
1077 	}
1078 
1079 	fdnode = VP_TO_TMPFS_DIR(fdvp);
1080 	fnode = VP_TO_TMPFS_NODE(fvp);
1081 	de = tmpfs_dir_lookup(fdnode, fnode, fncp);
1082 
1083 	/* Avoid manipulating '.' and '..' entries. */
1084 	if (de == NULL) {
1085 		error = ENOENT;
1086 		goto out_locked;
1087 	}
1088 	KKASSERT(de->td_node == fnode);
1089 
1090 	/*
1091 	 * If replacing an entry in the target directory and that entry
1092 	 * is a directory, it must be empty.
1093 	 *
1094 	 * Kern_rename gurantees the destination to be a directory
1095 	 * if the source is one (it does?).
1096 	 */
1097 	if (tvp != NULL) {
1098 		KKASSERT(tnode != NULL);
1099 
1100 		if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1101 		    (tdnode->tn_flags & (APPEND | IMMUTABLE))) {
1102 			error = EPERM;
1103 			goto out_locked;
1104 		}
1105 
1106 		if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) {
1107 			if (tnode->tn_size > 0) {
1108 				error = ENOTEMPTY;
1109 				goto out_locked;
1110 			}
1111 		} else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) {
1112 			error = ENOTDIR;
1113 			goto out_locked;
1114 		} else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) {
1115 			error = EISDIR;
1116 			goto out_locked;
1117 		} else {
1118 			KKASSERT(fnode->tn_type != VDIR &&
1119 				tnode->tn_type != VDIR);
1120 		}
1121 	}
1122 
1123 	if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) ||
1124 	    (fdnode->tn_flags & (APPEND | IMMUTABLE))) {
1125 		error = EPERM;
1126 		goto out_locked;
1127 	}
1128 
1129 	/*
1130 	 * Ensure that we have enough memory to hold the new name, if it
1131 	 * has to be changed.
1132 	 */
1133 	if (fncp->nc_nlen != tncp->nc_nlen ||
1134 	    bcmp(fncp->nc_name, tncp->nc_name, fncp->nc_nlen) != 0) {
1135 		newname = kmalloc(tncp->nc_nlen + 1, tmp->tm_name_zone,
1136 				  M_WAITOK | M_NULLOK);
1137 		if (newname == NULL) {
1138 			error = ENOSPC;
1139 			goto out_locked;
1140 		}
1141 		bcopy(tncp->nc_name, newname, tncp->nc_nlen);
1142 		newname[tncp->nc_nlen] = '\0';
1143 	} else {
1144 		newname = NULL;
1145 	}
1146 
1147 	/*
1148 	 * Unlink entry from source directory.  Note that the kernel has
1149 	 * already checked for illegal recursion cases (renaming a directory
1150 	 * into a subdirectory of itself).
1151 	 */
1152 	if (fdnode != tdnode) {
1153 		tmpfs_dir_detach(fdnode, de);
1154 	} else {
1155 		RB_REMOVE(tmpfs_dirtree, &fdnode->tn_dir.tn_dirtree, de);
1156 	}
1157 
1158 	/*
1159 	 * Handle any name change.  Swap with newname, we will
1160 	 * deallocate it at the end.
1161 	 */
1162 	if (newname != NULL) {
1163 #if 0
1164 		TMPFS_NODE_LOCK(fnode);
1165 		fnode->tn_status |= TMPFS_NODE_CHANGED;
1166 		TMPFS_NODE_UNLOCK(fnode);
1167 #endif
1168 		oldname = de->td_name;
1169 		de->td_name = newname;
1170 		de->td_namelen = (uint16_t)tncp->nc_nlen;
1171 		newname = oldname;
1172 	}
1173 
1174 	/*
1175 	 * If we are overwriting an entry, we have to remove the old one
1176 	 * from the target directory.
1177 	 */
1178 	if (tvp != NULL) {
1179 		/* Remove the old entry from the target directory. */
1180 		tde = tmpfs_dir_lookup(tdnode, tnode, tncp);
1181 		tmpfs_dir_detach(tdnode, tde);
1182 		tmpfs_knote(tdnode->tn_vnode, NOTE_DELETE);
1183 
1184 		/*
1185 		 * Free the directory entry we just deleted.  Note that the
1186 		 * node referred by it will not be removed until the vnode is
1187 		 * really reclaimed.
1188 		 */
1189 		tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde);
1190 		/*cache_inval_vp(tvp, CINV_DESTROY);*/
1191 	}
1192 
1193 	/*
1194 	 * Link entry to target directory.  If the entry
1195 	 * represents a directory move the parent linkage
1196 	 * as well.
1197 	 */
1198 	if (fdnode != tdnode) {
1199 		if (de->td_node->tn_type == VDIR) {
1200 			TMPFS_VALIDATE_DIR(fnode);
1201 		}
1202 		tmpfs_dir_attach(tdnode, de);
1203 	} else {
1204 		TMPFS_NODE_LOCK(tdnode);
1205 		tdnode->tn_status |= TMPFS_NODE_MODIFIED;
1206 		RB_INSERT(tmpfs_dirtree, &tdnode->tn_dir.tn_dirtree, de);
1207 		TMPFS_NODE_UNLOCK(tdnode);
1208 	}
1209 
1210 	/*
1211 	 * Finish up
1212 	 */
1213 	if (newname) {
1214 		kfree(newname, tmp->tm_name_zone);
1215 		newname = NULL;
1216 	}
1217 	cache_rename(v->a_fnch, v->a_tnch);
1218 	tmpfs_knote(v->a_fdvp, NOTE_WRITE);
1219 	tmpfs_knote(v->a_tdvp, NOTE_WRITE);
1220 	if (fnode->tn_vnode)
1221 		tmpfs_knote(fnode->tn_vnode, NOTE_RENAME);
1222 	error = 0;
1223 
1224 out_locked:
1225 	;
1226 
1227 out:
1228 	if (tvp)
1229 		vrele(tvp);
1230 
1231 	lwkt_reltoken(&mp->mnt_token);
1232 
1233 	return error;
1234 }
1235 
1236 /* --------------------------------------------------------------------- */
1237 
1238 static int
1239 tmpfs_nmkdir(struct vop_nmkdir_args *v)
1240 {
1241 	struct vnode *dvp = v->a_dvp;
1242 	struct vnode **vpp = v->a_vpp;
1243 	struct namecache *ncp = v->a_nch->ncp;
1244 	struct vattr *vap = v->a_vap;
1245 	struct ucred *cred = v->a_cred;
1246 	struct mount *mp;
1247 	int error;
1248 
1249 	mp = dvp->v_mount;
1250 
1251 	lwkt_gettoken(&mp->mnt_token);
1252 	KKASSERT(vap->va_type == VDIR);
1253 
1254 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, NULL);
1255 	if (error == 0) {
1256 		cache_setunresolved(v->a_nch);
1257 		cache_setvp(v->a_nch, *vpp);
1258 		tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1259 	}
1260 
1261 	lwkt_reltoken(&mp->mnt_token);
1262 
1263 	return error;
1264 }
1265 
1266 /* --------------------------------------------------------------------- */
1267 
1268 static int
1269 tmpfs_nrmdir(struct vop_nrmdir_args *v)
1270 {
1271 	struct vnode *dvp = v->a_dvp;
1272 	struct namecache *ncp = v->a_nch->ncp;
1273 	struct vnode *vp;
1274 	struct tmpfs_dirent *de;
1275 	struct tmpfs_mount *tmp;
1276 	struct tmpfs_node *dnode;
1277 	struct tmpfs_node *node;
1278 	struct mount *mp;
1279 	int error;
1280 
1281 	mp = dvp->v_mount;
1282 	lwkt_gettoken(&mp->mnt_token);
1283 
1284 	/*
1285 	 * We have to acquire the vp from v->a_nch because we will likely
1286 	 * unresolve the namecache entry, and a vrele/vput is needed to
1287 	 * trigger the tmpfs_inactive/tmpfs_reclaim sequence.
1288 	 *
1289 	 * We have to use vget to clear any inactive state on the vnode,
1290 	 * otherwise the vnode may remain inactive and thus tmpfs_inactive
1291 	 * will not get called when we release it.
1292 	 */
1293 	error = cache_vget(v->a_nch, v->a_cred, LK_SHARED, &vp);
1294 	KKASSERT(error == 0);
1295 	vn_unlock(vp);
1296 
1297 	/*
1298 	 * Prevalidate so we don't hit an assertion later
1299 	 */
1300 	if (vp->v_type != VDIR) {
1301 		error = ENOTDIR;
1302 		goto out;
1303 	}
1304 
1305 	tmp = VFS_TO_TMPFS(dvp->v_mount);
1306 	dnode = VP_TO_TMPFS_DIR(dvp);
1307 	node = VP_TO_TMPFS_DIR(vp);
1308 
1309 	/* Directories with more than two entries ('.' and '..') cannot be
1310 	 * removed. */
1311 	 if (node->tn_size > 0) {
1312 		 error = ENOTEMPTY;
1313 		 goto out;
1314 	 }
1315 
1316 	if ((dnode->tn_flags & APPEND)
1317 	    || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) {
1318 		error = EPERM;
1319 		goto out;
1320 	}
1321 
1322 	/* This invariant holds only if we are not trying to remove "..".
1323 	  * We checked for that above so this is safe now. */
1324 	KKASSERT(node->tn_dir.tn_parent == dnode);
1325 
1326 	/* Get the directory entry associated with node (vp).  This was
1327 	 * filled by tmpfs_lookup while looking up the entry. */
1328 	de = tmpfs_dir_lookup(dnode, node, ncp);
1329 	KKASSERT(TMPFS_DIRENT_MATCHES(de,
1330 	    ncp->nc_name,
1331 	    ncp->nc_nlen));
1332 
1333 	/* Check flags to see if we are allowed to remove the directory. */
1334 	if ((dnode->tn_flags & APPEND) ||
1335 	    node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) {
1336 		error = EPERM;
1337 		goto out;
1338 	}
1339 
1340 
1341 	/* Detach the directory entry from the directory (dnode). */
1342 	tmpfs_dir_detach(dnode, de);
1343 
1344 	/* No vnode should be allocated for this entry from this point */
1345 	TMPFS_NODE_LOCK(node);
1346 	TMPFS_ASSERT_ELOCKED(node);
1347 	TMPFS_NODE_LOCK(dnode);
1348 	TMPFS_ASSERT_ELOCKED(dnode);
1349 
1350 	/*
1351 	 * Must set parent linkage to NULL (tested by ncreate to disallow
1352 	 * the creation of new files/dirs in a deleted directory)
1353 	 */
1354 	node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \
1355 	    TMPFS_NODE_MODIFIED;
1356 
1357 	dnode->tn_status |= TMPFS_NODE_ACCESSED | \
1358 	    TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED;
1359 
1360 	TMPFS_NODE_UNLOCK(dnode);
1361 	TMPFS_NODE_UNLOCK(node);
1362 
1363 	/* Free the directory entry we just deleted.  Note that the node
1364 	 * referred by it will not be removed until the vnode is really
1365 	 * reclaimed. */
1366 	tmpfs_free_dirent(tmp, de);
1367 
1368 	/* Release the deleted vnode (will destroy the node, notify
1369 	 * interested parties and clean it from the cache). */
1370 
1371 	TMPFS_NODE_LOCK(dnode);
1372 	dnode->tn_status |= TMPFS_NODE_CHANGED;
1373 	TMPFS_NODE_UNLOCK(dnode);
1374 	tmpfs_update(dvp);
1375 
1376 	cache_unlink(v->a_nch);
1377 	tmpfs_knote(dvp, NOTE_WRITE | NOTE_LINK);
1378 	error = 0;
1379 
1380 out:
1381 	vrele(vp);
1382 
1383 	lwkt_reltoken(&mp->mnt_token);
1384 
1385 	return error;
1386 }
1387 
1388 /* --------------------------------------------------------------------- */
1389 
1390 static int
1391 tmpfs_nsymlink(struct vop_nsymlink_args *v)
1392 {
1393 	struct vnode *dvp = v->a_dvp;
1394 	struct vnode **vpp = v->a_vpp;
1395 	struct namecache *ncp = v->a_nch->ncp;
1396 	struct vattr *vap = v->a_vap;
1397 	struct ucred *cred = v->a_cred;
1398 	char *target = v->a_target;
1399 	struct mount *mp = dvp->v_mount;
1400 	int error;
1401 
1402 	lwkt_gettoken(&mp->mnt_token);
1403 	vap->va_type = VLNK;
1404 	error = tmpfs_alloc_file(dvp, vpp, vap, ncp, cred, target);
1405 	if (error == 0) {
1406 		tmpfs_knote(*vpp, NOTE_WRITE);
1407 		cache_setunresolved(v->a_nch);
1408 		cache_setvp(v->a_nch, *vpp);
1409 	}
1410 
1411 	lwkt_reltoken(&mp->mnt_token);
1412 
1413 	return error;
1414 }
1415 
1416 /* --------------------------------------------------------------------- */
1417 
1418 static int
1419 tmpfs_readdir(struct vop_readdir_args *v)
1420 {
1421 	struct vnode *vp = v->a_vp;
1422 	struct uio *uio = v->a_uio;
1423 	int *eofflag = v->a_eofflag;
1424 	off_t **cookies = v->a_cookies;
1425 	int *ncookies = v->a_ncookies;
1426 	struct tmpfs_mount *tmp;
1427 	int error;
1428 	off_t startoff;
1429 	off_t cnt = 0;
1430 	struct tmpfs_node *node;
1431 	struct mount *mp = vp->v_mount;
1432 
1433 	lwkt_gettoken(&mp->mnt_token);
1434 
1435 	/* This operation only makes sense on directory nodes. */
1436 	if (vp->v_type != VDIR) {
1437 		lwkt_reltoken(&mp->mnt_token);
1438 		return ENOTDIR;
1439 	}
1440 
1441 	tmp = VFS_TO_TMPFS(vp->v_mount);
1442 	node = VP_TO_TMPFS_DIR(vp);
1443 	startoff = uio->uio_offset;
1444 
1445 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOT) {
1446 		error = tmpfs_dir_getdotdent(node, uio);
1447 		if (error != 0)
1448 			goto outok;
1449 		cnt++;
1450 	}
1451 
1452 	if (uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT) {
1453 		error = tmpfs_dir_getdotdotdent(tmp, node, uio);
1454 		if (error != 0)
1455 			goto outok;
1456 		cnt++;
1457 	}
1458 
1459 	error = tmpfs_dir_getdents(node, uio, &cnt);
1460 
1461 outok:
1462 	KKASSERT(error >= -1);
1463 
1464 	if (error == -1)
1465 		error = 0;
1466 
1467 	if (eofflag != NULL)
1468 		*eofflag =
1469 		    (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF);
1470 
1471 	/* Update NFS-related variables. */
1472 	if (error == 0 && cookies != NULL && ncookies != NULL) {
1473 		off_t i;
1474 		off_t off = startoff;
1475 		struct tmpfs_dirent *de = NULL;
1476 
1477 		*ncookies = cnt;
1478 		*cookies = kmalloc(cnt * sizeof(off_t), M_TEMP, M_WAITOK);
1479 
1480 		for (i = 0; i < cnt; i++) {
1481 			KKASSERT(off != TMPFS_DIRCOOKIE_EOF);
1482 			if (off == TMPFS_DIRCOOKIE_DOT) {
1483 				off = TMPFS_DIRCOOKIE_DOTDOT;
1484 			} else {
1485 				if (off == TMPFS_DIRCOOKIE_DOTDOT) {
1486 					de = RB_MIN(tmpfs_dirtree, &node->tn_dir.tn_dirtree);
1487 				} else if (de != NULL) {
1488 					de = RB_NEXT(tmpfs_dirtree, &node->tn_dir.tn_dirtree, de);
1489 				} else {
1490 					de = tmpfs_dir_lookupbycookie(node,
1491 					    off);
1492 					KKASSERT(de != NULL);
1493 					de = RB_NEXT(tmpfs_dirtree, &node->tn_dir.tn_dirtree, de);
1494 				}
1495 				if (de == NULL)
1496 					off = TMPFS_DIRCOOKIE_EOF;
1497 				else
1498 					off = tmpfs_dircookie(de);
1499 			}
1500 
1501 			(*cookies)[i] = off;
1502 		}
1503 		KKASSERT(uio->uio_offset == off);
1504 	}
1505 
1506 	lwkt_reltoken(&mp->mnt_token);
1507 
1508 	return error;
1509 }
1510 
1511 /* --------------------------------------------------------------------- */
1512 
1513 static int
1514 tmpfs_readlink(struct vop_readlink_args *v)
1515 {
1516 	struct vnode *vp = v->a_vp;
1517 	struct uio *uio = v->a_uio;
1518 	struct mount *mp = vp->v_mount;
1519 	int error;
1520 	struct tmpfs_node *node;
1521 
1522 	lwkt_gettoken(&mp->mnt_token);
1523 
1524 	KKASSERT(uio->uio_offset == 0);
1525 	KKASSERT(vp->v_type == VLNK);
1526 
1527 	node = VP_TO_TMPFS_NODE(vp);
1528 
1529 	error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid),
1530 	    uio);
1531 	TMPFS_NODE_LOCK(node);
1532 	node->tn_status |= TMPFS_NODE_ACCESSED;
1533 	TMPFS_NODE_UNLOCK(node);
1534 
1535 	lwkt_reltoken(&mp->mnt_token);
1536 
1537 	return error;
1538 }
1539 
1540 /* --------------------------------------------------------------------- */
1541 
1542 static int
1543 tmpfs_inactive(struct vop_inactive_args *v)
1544 {
1545 	struct vnode *vp = v->a_vp;
1546 	struct tmpfs_node *node;
1547 	struct mount *mp;
1548 
1549 	mp = vp->v_mount;
1550 	lwkt_gettoken(&mp->mnt_token);
1551 	node = VP_TO_TMPFS_NODE(vp);
1552 
1553 	/*
1554 	 * Degenerate case
1555 	 */
1556 	if (node == NULL) {
1557 		vrecycle(vp);
1558 		lwkt_reltoken(&mp->mnt_token);
1559 		return(0);
1560 	}
1561 
1562 	/*
1563 	 * Get rid of unreferenced deleted vnodes sooner rather than
1564 	 * later so the data memory can be recovered immediately.
1565 	 *
1566 	 * We must truncate the vnode to prevent the normal reclamation
1567 	 * path from flushing the data for the removed file to disk.
1568 	 */
1569 	TMPFS_NODE_LOCK(node);
1570 	if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1571 	    node->tn_links == 0)
1572 	{
1573 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1574 		TMPFS_NODE_UNLOCK(node);
1575 		if (node->tn_type == VREG)
1576 			tmpfs_truncate(vp, 0);
1577 		vrecycle(vp);
1578 	} else {
1579 		TMPFS_NODE_UNLOCK(node);
1580 	}
1581 	lwkt_reltoken(&mp->mnt_token);
1582 
1583 	return 0;
1584 }
1585 
1586 /* --------------------------------------------------------------------- */
1587 
1588 int
1589 tmpfs_reclaim(struct vop_reclaim_args *v)
1590 {
1591 	struct vnode *vp = v->a_vp;
1592 	struct tmpfs_mount *tmp;
1593 	struct tmpfs_node *node;
1594 	struct mount *mp;
1595 
1596 	mp = vp->v_mount;
1597 	lwkt_gettoken(&mp->mnt_token);
1598 
1599 	node = VP_TO_TMPFS_NODE(vp);
1600 	tmp = VFS_TO_TMPFS(vp->v_mount);
1601 	KKASSERT(mp == tmp->tm_mount);
1602 
1603 	tmpfs_free_vp(vp);
1604 
1605 	/*
1606 	 * If the node referenced by this vnode was deleted by the
1607 	 * user, we must free its associated data structures now that
1608 	 * the vnode is being reclaimed.
1609 	 *
1610 	 * Directories have an extra link ref.
1611 	 */
1612 	TMPFS_NODE_LOCK(node);
1613 	if ((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0 &&
1614 	    node->tn_links == 0) {
1615 		node->tn_vpstate = TMPFS_VNODE_DOOMED;
1616 		tmpfs_free_node(tmp, node);
1617 		/* eats the lock */
1618 	} else {
1619 		TMPFS_NODE_UNLOCK(node);
1620 	}
1621 	lwkt_reltoken(&mp->mnt_token);
1622 
1623 	KKASSERT(vp->v_data == NULL);
1624 	return 0;
1625 }
1626 
1627 /* --------------------------------------------------------------------- */
1628 
1629 static int
1630 tmpfs_mountctl(struct vop_mountctl_args *ap)
1631 {
1632 	struct tmpfs_mount *tmp;
1633 	struct mount *mp;
1634 	int rc;
1635 
1636 	mp = ap->a_head.a_ops->head.vv_mount;
1637 	lwkt_gettoken(&mp->mnt_token);
1638 
1639 	switch (ap->a_op) {
1640 	case (MOUNTCTL_SET_EXPORT):
1641 		tmp = (struct tmpfs_mount *) mp->mnt_data;
1642 
1643 		if (ap->a_ctllen != sizeof(struct export_args))
1644 			rc = (EINVAL);
1645 		else
1646 			rc = vfs_export(mp, &tmp->tm_export,
1647 					(const struct export_args *) ap->a_ctl);
1648 		break;
1649 	default:
1650 		rc = vop_stdmountctl(ap);
1651 		break;
1652 	}
1653 
1654 	lwkt_reltoken(&mp->mnt_token);
1655 	return (rc);
1656 }
1657 
1658 /* --------------------------------------------------------------------- */
1659 
1660 static int
1661 tmpfs_print(struct vop_print_args *v)
1662 {
1663 	struct vnode *vp = v->a_vp;
1664 
1665 	struct tmpfs_node *node;
1666 
1667 	node = VP_TO_TMPFS_NODE(vp);
1668 
1669 	kprintf("tag VT_TMPFS, tmpfs_node %p, flags 0x%x, links %d\n",
1670 	    node, node->tn_flags, node->tn_links);
1671 	kprintf("\tmode 0%o, owner %d, group %d, size %ju, status 0x%x\n",
1672 	    node->tn_mode, node->tn_uid, node->tn_gid,
1673 	    (uintmax_t)node->tn_size, node->tn_status);
1674 
1675 	if (vp->v_type == VFIFO)
1676 		fifo_printinfo(vp);
1677 
1678 	kprintf("\n");
1679 
1680 	return 0;
1681 }
1682 
1683 /* --------------------------------------------------------------------- */
1684 
1685 static int
1686 tmpfs_pathconf(struct vop_pathconf_args *v)
1687 {
1688 	int name = v->a_name;
1689 	register_t *retval = v->a_retval;
1690 
1691 	int error;
1692 
1693 	error = 0;
1694 
1695 	switch (name) {
1696 	case _PC_LINK_MAX:
1697 		*retval = LINK_MAX;
1698 		break;
1699 
1700 	case _PC_NAME_MAX:
1701 		*retval = NAME_MAX;
1702 		break;
1703 
1704 	case _PC_PATH_MAX:
1705 		*retval = PATH_MAX;
1706 		break;
1707 
1708 	case _PC_PIPE_BUF:
1709 		*retval = PIPE_BUF;
1710 		break;
1711 
1712 	case _PC_CHOWN_RESTRICTED:
1713 		*retval = 1;
1714 		break;
1715 
1716 	case _PC_NO_TRUNC:
1717 		*retval = 1;
1718 		break;
1719 
1720 	case _PC_SYNC_IO:
1721 		*retval = 1;
1722 		break;
1723 
1724 	case _PC_FILESIZEBITS:
1725 		*retval = 0; /* XXX Don't know which value should I return. */
1726 		break;
1727 
1728 	default:
1729 		error = EINVAL;
1730 	}
1731 
1732 	return error;
1733 }
1734 
1735 /************************************************************************
1736  *                          KQFILTER OPS                                *
1737  ************************************************************************/
1738 
1739 static void filt_tmpfsdetach(struct knote *kn);
1740 static int filt_tmpfsread(struct knote *kn, long hint);
1741 static int filt_tmpfswrite(struct knote *kn, long hint);
1742 static int filt_tmpfsvnode(struct knote *kn, long hint);
1743 
1744 static struct filterops tmpfsread_filtops =
1745 	{ FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfsread };
1746 static struct filterops tmpfswrite_filtops =
1747 	{ FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfswrite };
1748 static struct filterops tmpfsvnode_filtops =
1749 	{ FILTEROP_ISFD, NULL, filt_tmpfsdetach, filt_tmpfsvnode };
1750 
1751 static int
1752 tmpfs_kqfilter (struct vop_kqfilter_args *ap)
1753 {
1754 	struct vnode *vp = ap->a_vp;
1755 	struct knote *kn = ap->a_kn;
1756 
1757 	switch (kn->kn_filter) {
1758 	case EVFILT_READ:
1759 		kn->kn_fop = &tmpfsread_filtops;
1760 		break;
1761 	case EVFILT_WRITE:
1762 		kn->kn_fop = &tmpfswrite_filtops;
1763 		break;
1764 	case EVFILT_VNODE:
1765 		kn->kn_fop = &tmpfsvnode_filtops;
1766 		break;
1767 	default:
1768 		return (EOPNOTSUPP);
1769 	}
1770 
1771 	kn->kn_hook = (caddr_t)vp;
1772 
1773 	knote_insert(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1774 
1775 	return(0);
1776 }
1777 
1778 static void
1779 filt_tmpfsdetach(struct knote *kn)
1780 {
1781 	struct vnode *vp = (void *)kn->kn_hook;
1782 
1783 	knote_remove(&vp->v_pollinfo.vpi_kqinfo.ki_note, kn);
1784 }
1785 
1786 static int
1787 filt_tmpfsread(struct knote *kn, long hint)
1788 {
1789 	struct vnode *vp = (void *)kn->kn_hook;
1790 	struct tmpfs_node *node = VP_TO_TMPFS_NODE(vp);
1791 	off_t off;
1792 
1793 	if (hint == NOTE_REVOKE) {
1794 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1795 		return(1);
1796 	}
1797 
1798 	/*
1799 	 * Interlock against MP races when performing this function.
1800 	 */
1801 	lwkt_gettoken(&vp->v_mount->mnt_token);
1802 	off = node->tn_size - kn->kn_fp->f_offset;
1803 	kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1804 	if (kn->kn_sfflags & NOTE_OLDAPI) {
1805 		lwkt_reltoken(&vp->v_mount->mnt_token);
1806 		return(1);
1807 	}
1808 
1809 	if (kn->kn_data == 0) {
1810 		kn->kn_data = (off < INTPTR_MAX) ? off : INTPTR_MAX;
1811 	}
1812 	lwkt_reltoken(&vp->v_mount->mnt_token);
1813 	return (kn->kn_data != 0);
1814 }
1815 
1816 static int
1817 filt_tmpfswrite(struct knote *kn, long hint)
1818 {
1819 	if (hint == NOTE_REVOKE)
1820 		kn->kn_flags |= (EV_EOF | EV_NODATA | EV_ONESHOT);
1821 	kn->kn_data = 0;
1822 	return (1);
1823 }
1824 
1825 static int
1826 filt_tmpfsvnode(struct knote *kn, long hint)
1827 {
1828 	if (kn->kn_sfflags & hint)
1829 		kn->kn_fflags |= hint;
1830 	if (hint == NOTE_REVOKE) {
1831 		kn->kn_flags |= (EV_EOF | EV_NODATA);
1832 		return (1);
1833 	}
1834 	return (kn->kn_fflags != 0);
1835 }
1836 
1837 
1838 /* --------------------------------------------------------------------- */
1839 
1840 /*
1841  * vnode operations vector used for files stored in a tmpfs file system.
1842  */
1843 struct vop_ops tmpfs_vnode_vops = {
1844 	.vop_default =			vop_defaultop,
1845 	.vop_getpages = 		vop_stdgetpages,
1846 	.vop_putpages = 		vop_stdputpages,
1847 	.vop_ncreate =			tmpfs_ncreate,
1848 	.vop_nresolve =			tmpfs_nresolve,
1849 	.vop_nlookupdotdot =		tmpfs_nlookupdotdot,
1850 	.vop_nmknod =			tmpfs_nmknod,
1851 	.vop_open =			tmpfs_open,
1852 	.vop_close =			tmpfs_close,
1853 	.vop_access =			tmpfs_access,
1854 	.vop_getattr =			tmpfs_getattr,
1855 	.vop_setattr =			tmpfs_setattr,
1856 	.vop_read =			tmpfs_read,
1857 	.vop_write =			tmpfs_write,
1858 	.vop_fsync =			tmpfs_fsync,
1859 	.vop_mountctl =			tmpfs_mountctl,
1860 	.vop_nremove =			tmpfs_nremove,
1861 	.vop_nlink =			tmpfs_nlink,
1862 	.vop_nrename =			tmpfs_nrename,
1863 	.vop_nmkdir =			tmpfs_nmkdir,
1864 	.vop_nrmdir =			tmpfs_nrmdir,
1865 	.vop_nsymlink =			tmpfs_nsymlink,
1866 	.vop_readdir =			tmpfs_readdir,
1867 	.vop_readlink =			tmpfs_readlink,
1868 	.vop_inactive =			tmpfs_inactive,
1869 	.vop_reclaim =			tmpfs_reclaim,
1870 	.vop_print =			tmpfs_print,
1871 	.vop_pathconf =			tmpfs_pathconf,
1872 	.vop_bmap =			tmpfs_bmap,
1873 	.vop_strategy =			tmpfs_strategy,
1874 	.vop_advlock =			tmpfs_advlock,
1875 	.vop_kqfilter =			tmpfs_kqfilter
1876 };
1877