xref: /openbsd/sys/tmpfs/tmpfs_subr.c (revision d25d28bf)
1 /*	$OpenBSD: tmpfs_subr.c,v 1.17 2016/09/22 10:23:27 jsg Exp $	*/
2 /*	$NetBSD: tmpfs_subr.c,v 1.79 2012/03/13 18:40:50 elad Exp $	*/
3 
4 /*
5  * Copyright (c) 2005-2011 The NetBSD Foundation, Inc.
6  * Copyright (c) 2013 Pedro Martelletto
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11  * 2005 program, and by Mindaugas Rasiukevicius.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 /*
36  * Efficient memory file system: interfaces for inode and directory entry
37  * construction, destruction and manipulation.
38  *
39  * Reference counting
40  *
41  *	The link count of inode (tmpfs_node_t::tn_links) is used as a
42  *	reference counter.  However, it has slightly different semantics.
43  *
44  *	For directories - link count represents directory entries, which
45  *	refer to the directories.  In other words, it represents the count
46  *	of sub-directories.  It also takes into account the virtual '.'
47  *	entry (which has no real entry in the list).  For files - link count
48  *	represents the hard links.  Since only empty directories can be
49  *	removed - link count aligns the reference counting requirements
50  *	enough.  Note: to check whether directory is not empty, the inode
51  *	size (tmpfs_node_t::tn_size) can be used.
52  *
53  *	The inode itself, as an object, gathers its first reference when
54  *	directory entry is attached via tmpfs_dir_attach(9).  For instance,
55  *	after regular tmpfs_create(), a file would have a link count of 1,
56  *	while directory after tmpfs_mkdir() would have 2 (due to '.').
57  *
58  * Reclamation
59  *
60  *	It should be noted that tmpfs inodes rely on a combination of vnode
61  *	reference counting and link counting.  That is, an inode can only be
62  *	destroyed if its associated vnode is inactive.  The destruction is
63  *	done on vnode reclamation i.e. tmpfs_reclaim().  It should be noted
64  *	that tmpfs_node_t::tn_links being 0 is a destruction criterion.
65  *
66  *	If an inode has references within the file system (tn_links > 0) and
67  *	its inactive vnode gets reclaimed/recycled - then the association is
68  *	broken in tmpfs_reclaim().  In such case, an inode will always pass
69  *	tmpfs_lookup() and thus tmpfs_vnode_get() to associate a new vnode.
70  *
71  * Lock order
72  *
73  *	tmpfs_node_t::tn_nlock ->
74  *		struct vnode::v_vlock ->
75  *			struct vnode::v_interlock
76  */
77 
78 #include <sys/param.h>
79 #include <sys/dirent.h>
80 #include <sys/event.h>
81 #include <sys/mount.h>
82 #include <sys/namei.h>
83 #include <sys/time.h>
84 #include <sys/proc.h>
85 #include <sys/stat.h>
86 #include <sys/systm.h>
87 #include <sys/vnode.h>
88 
89 #include <uvm/uvm_aobj.h>
90 
91 #include <tmpfs/tmpfs.h>
92 #include <tmpfs/tmpfs_vnops.h>
93 
94 
95 /* Local functions. */
96 void	tmpfs_dir_putseq(tmpfs_node_t *, tmpfs_dirent_t *);
97 int	tmpfs_dir_getdotents(tmpfs_node_t *, struct dirent *, struct uio *);
98 
99 /*
100  * tmpfs_alloc_node: allocate a new inode of a specified type and
101  * insert it into the list of specified mount point.
102  */
103 int
104 tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid, gid_t gid,
105     mode_t mode, char *target, dev_t rdev, tmpfs_node_t **node)
106 {
107 	tmpfs_node_t *nnode;
108 	struct uvm_object *uobj;
109 
110 	nnode = tmpfs_node_get(tmp);
111 	if (nnode == NULL) {
112 		return ENOSPC;
113 	}
114 
115 	/* Initially, no references and no associations. */
116 	nnode->tn_links = 0;
117 	nnode->tn_vnode = NULL;
118 	nnode->tn_dirent_hint = NULL;
119 
120 	rw_enter_write(&tmp->tm_acc_lock);
121 	nnode->tn_id = ++tmp->tm_highest_inode;
122 	if (nnode->tn_id == 0) {
123 		--tmp->tm_highest_inode;
124 		rw_exit_write(&tmp->tm_acc_lock);
125 		tmpfs_node_put(tmp, nnode);
126 		return ENOSPC;
127 	}
128 	 rw_exit_write(&tmp->tm_acc_lock);
129 
130 	/* Generic initialization. */
131 	nnode->tn_type = type;
132 	nnode->tn_size = 0;
133 	nnode->tn_flags = 0;
134 	nnode->tn_lockf = NULL;
135 	nnode->tn_gen = TMPFS_NODE_GEN_MASK & arc4random();
136 
137 	nanotime(&nnode->tn_atime);
138 	nnode->tn_birthtime = nnode->tn_atime;
139 	nnode->tn_ctime = nnode->tn_atime;
140 	nnode->tn_mtime = nnode->tn_atime;
141 
142 	/* XXX pedro: we should check for UID_MAX and GID_MAX instead. */
143 	KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
144 
145 	nnode->tn_uid = uid;
146 	nnode->tn_gid = gid;
147 	nnode->tn_mode = mode;
148 
149 	/* Type-specific initialization. */
150 	switch (nnode->tn_type) {
151 	case VBLK:
152 	case VCHR:
153 		/* Character/block special device. */
154 		KASSERT(rdev != VNOVAL);
155 		nnode->tn_spec.tn_dev.tn_rdev = rdev;
156 		break;
157 	case VDIR:
158 		/* Directory. */
159 		TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir);
160 		nnode->tn_spec.tn_dir.tn_parent = NULL;
161 		nnode->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START;
162 		nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
163 
164 		/* Extra link count for the virtual '.' entry. */
165 		nnode->tn_links++;
166 		break;
167 	case VFIFO:
168 	case VSOCK:
169 		break;
170 	case VLNK:
171 		/* Symbolic link.  Target specifies the file name. */
172 		KASSERT(target && strlen(target) < MAXPATHLEN);
173 
174 		nnode->tn_size = strlen(target);
175 		if (nnode->tn_size == 0) {
176 			nnode->tn_spec.tn_lnk.tn_link = NULL;
177 			break;
178 		}
179 		nnode->tn_spec.tn_lnk.tn_link =
180 		    tmpfs_strname_alloc(tmp, nnode->tn_size);
181 		if (nnode->tn_spec.tn_lnk.tn_link == NULL) {
182 			tmpfs_node_put(tmp, nnode);
183 			return ENOSPC;
184 		}
185 		memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size);
186 		break;
187 	case VREG:
188 		/* Regular file.  Create an underlying UVM object. */
189 		uobj = uao_create(0, UAO_FLAG_CANFAIL);
190 		if (uobj == NULL) {
191 			tmpfs_node_put(tmp, nnode);
192 			return ENOSPC;
193 		}
194 		nnode->tn_spec.tn_reg.tn_aobj = uobj;
195 		nnode->tn_spec.tn_reg.tn_aobj_pages = 0;
196 		nnode->tn_spec.tn_reg.tn_aobj_pgptr = (vaddr_t)NULL;
197 		nnode->tn_spec.tn_reg.tn_aobj_pgnum = (voff_t)-1;
198 		break;
199 	default:
200 		KASSERT(0);
201 	}
202 
203 	rw_init(&nnode->tn_nlock, "tvlk");
204 
205 	rw_enter_write(&tmp->tm_lock);
206 	LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries);
207 	rw_exit_write(&tmp->tm_lock);
208 
209 	*node = nnode;
210 	return 0;
211 }
212 
213 /*
214  * tmpfs_free_node: remove the inode from a list in the mount point and
215  * destroy the inode structures.
216  */
217 void
218 tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node)
219 {
220 	size_t objsz;
221 
222 	rw_enter_write(&tmp->tm_lock);
223 	LIST_REMOVE(node, tn_entries);
224 	rw_exit_write(&tmp->tm_lock);
225 
226 	switch (node->tn_type) {
227 	case VLNK:
228 		if (node->tn_size > 0) {
229 			KASSERT(node->tn_size <= SIZE_MAX);
230 			tmpfs_strname_free(tmp, node->tn_spec.tn_lnk.tn_link,
231 			    node->tn_size);
232 		}
233 		break;
234 	case VREG:
235 		/*
236 		 * Calculate the size of inode data, decrease the used-memory
237 		 * counter, and destroy the underlying UVM object (if any).
238 		 */
239 		objsz = PAGE_SIZE * node->tn_spec.tn_reg.tn_aobj_pages;
240 		if (objsz != 0) {
241 			tmpfs_mem_decr(tmp, objsz);
242 		}
243 		if (node->tn_spec.tn_reg.tn_aobj != NULL) {
244 			uao_detach(node->tn_spec.tn_reg.tn_aobj);
245 			node->tn_spec.tn_reg.tn_aobj = NULL;
246 		}
247 		break;
248 	case VDIR:
249 		KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir));
250 		KASSERT(node->tn_spec.tn_dir.tn_parent == NULL ||
251 		    node == tmp->tm_root);
252 		break;
253 	default:
254 		break;
255 	}
256 
257 	rw_enter_write(&tmp->tm_acc_lock);
258 	if (node->tn_id == tmp->tm_highest_inode)
259 		--tmp->tm_highest_inode;
260 	rw_exit_write(&tmp->tm_acc_lock);
261 
262 	/* mutex_destroy(&node->tn_nlock); */
263 	tmpfs_node_put(tmp, node);
264 }
265 
266 /*
267  * tmpfs_vnode_get: allocate or reclaim a vnode for a specified inode.
268  *
269  * => Must be called with tmpfs_node_t::tn_nlock held.
270  * => Returns vnode (*vpp) locked.
271  */
272 int
273 tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, struct vnode **vpp)
274 {
275 	struct vnode *vp, *nvp;
276 	/* kmutex_t *slock; */
277 	int error;
278 again:
279 	/* If there is already a vnode, try to reclaim it. */
280 	if ((vp = node->tn_vnode) != NULL) {
281 		/* atomic_or_ulong(&node->tn_gen, TMPFS_RECLAIMING_BIT); */
282 		node->tn_gen |= TMPFS_RECLAIMING_BIT;
283 		rw_exit_write(&node->tn_nlock);
284 		error = vget(vp, LK_EXCLUSIVE, curproc);
285 		if (error == ENOENT) {
286 			rw_enter_write(&node->tn_nlock);
287 			goto again;
288 		}
289 		/* atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); */
290 		node->tn_gen &= ~TMPFS_RECLAIMING_BIT;
291 		*vpp = vp;
292 		return error;
293 	}
294 	if (TMPFS_NODE_RECLAIMING(node)) {
295 		/* atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); */
296 		node->tn_gen &= ~TMPFS_RECLAIMING_BIT;
297 	}
298 
299 	/*
300 	 * Get a new vnode and associate it with our inode.  Share the
301 	 * lock with underlying UVM object, if there is one (VREG case).
302 	 */
303 #if 0
304 	if (node->tn_type == VREG) {
305 		struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj;
306 		slock = uobj->vmobjlock;
307 	} else {
308 		slock = NULL;
309 	}
310 #endif
311 	error = getnewvnode(VT_TMPFS, mp, &tmpfs_vops, &vp);
312 	if (error) {
313 		rw_exit_write(&node->tn_nlock);
314 		return error;
315 	}
316 
317 	rrw_init(&node->tn_vlock, "tnode");
318 	vp->v_type = node->tn_type;
319 
320 	/* Type-specific initialization. */
321 	switch (node->tn_type) {
322 	case VBLK:
323 	case VCHR:
324 		vp->v_op = &tmpfs_specvops;
325 		if ((nvp = checkalias(vp, node->tn_spec.tn_dev.tn_rdev, mp))) {
326 			nvp->v_data = vp->v_data;
327 			vp->v_data = NULL;
328 			vp->v_op = &spec_vops;
329 			vrele(vp);
330 			vgone(vp);
331 			vp = nvp;
332 			node->tn_vnode = vp;
333 		}
334 		break;
335 	case VDIR:
336 		vp->v_flag |= node->tn_spec.tn_dir.tn_parent == node ?
337 		    VROOT : 0;
338 		break;
339 #ifdef FIFO
340 	case VFIFO:
341 		vp->v_op = &tmpfs_fifovops;
342 		break;
343 #endif
344 	case VLNK:
345 	case VREG:
346 	case VSOCK:
347 		break;
348 	default:
349 		KASSERT(0);
350 	}
351 
352 	uvm_vnp_setsize(vp, node->tn_size);
353 	vp->v_data = node;
354 	node->tn_vnode = vp;
355 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc);
356 	rw_exit_write(&node->tn_nlock);
357 
358 	KASSERT(VOP_ISLOCKED(vp));
359 	*vpp = vp;
360 	return 0;
361 }
362 
363 /*
364  * tmpfs_alloc_file: allocate a new file of specified type and adds it
365  * into the parent directory.
366  *
367  * => Credentials of the caller are used.
368  */
369 int
370 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
371     struct componentname *cnp, char *target)
372 {
373 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
374 	tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node;
375 	tmpfs_dirent_t *de;
376 	int error;
377 
378 	KASSERT(VOP_ISLOCKED(dvp));
379 	*vpp = NULL;
380 
381 	/* Check for the maximum number of links limit. */
382 	if (vap->va_type == VDIR) {
383 		/* Check for maximum links limit. */
384 		if (dnode->tn_links == LINK_MAX) {
385 			error = EMLINK;
386 			goto out;
387 		}
388 		KASSERT(dnode->tn_links < LINK_MAX);
389 	}
390 
391 	if (TMPFS_DIRSEQ_FULL(dnode)) {
392 		error = ENOSPC;
393 		goto out;
394 	}
395 
396 	if (dnode->tn_links == 0) {
397 		error = ENOENT;
398 		goto out;
399 	}
400 
401 	/* Allocate a node that represents the new file. */
402 	error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid,
403 	    dnode->tn_gid, vap->va_mode, target, vap->va_rdev, &node);
404 	if (error)
405 		goto out;
406 
407 	/* Allocate a directory entry that points to the new file. */
408 	error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr, cnp->cn_namelen, &de);
409 	if (error) {
410 		tmpfs_free_node(tmp, node);
411 		goto out;
412 	}
413 
414 	/* Get a vnode for the new file. */
415 	rw_enter_write(&node->tn_nlock);
416 	error = tmpfs_vnode_get(dvp->v_mount, node, vpp);
417 	if (error) {
418 		tmpfs_free_dirent(tmp, de);
419 		tmpfs_free_node(tmp, node);
420 		goto out;
421 	}
422 
423 	/* Associate inode and attach the entry into the directory. */
424 	tmpfs_dir_attach(dnode, de, node);
425 
426 out:
427 	if (error == 0 && (cnp->cn_flags & SAVESTART) == 0)
428 		pool_put(&namei_pool, cnp->cn_pnbuf);
429 	vput(dvp);
430 	return error;
431 }
432 
433 /*
434  * tmpfs_alloc_dirent: allocates a new directory entry for the inode.
435  * The directory entry contains a path name component.
436  */
437 int
438 tmpfs_alloc_dirent(tmpfs_mount_t *tmp, const char *name, uint16_t len,
439     tmpfs_dirent_t **de)
440 {
441 	tmpfs_dirent_t *nde;
442 
443 	nde = tmpfs_dirent_get(tmp);
444 	if (nde == NULL)
445 		return ENOSPC;
446 
447 	nde->td_name = tmpfs_strname_alloc(tmp, len);
448 	if (nde->td_name == NULL) {
449 		tmpfs_dirent_put(tmp, nde);
450 		return ENOSPC;
451 	}
452 	nde->td_namelen = len;
453 	memcpy(nde->td_name, name, len);
454 	nde->td_seq = TMPFS_DIRSEQ_NONE;
455 
456 	*de = nde;
457 	return 0;
458 }
459 
460 /*
461  * tmpfs_free_dirent: free a directory entry.
462  */
463 void
464 tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de)
465 {
466 
467 	KASSERT(de->td_node == NULL);
468 	KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE);
469 	tmpfs_strname_free(tmp, de->td_name, de->td_namelen);
470 	tmpfs_dirent_put(tmp, de);
471 }
472 
473 /*
474  * tmpfs_dir_attach: associate directory entry with a specified inode,
475  * and attach the entry into the directory, specified by vnode.
476  *
477  * => Increases link count on the associated node.
478  * => Increases link count on directory node, if our node is VDIR.
479  *    It is caller's responsibility to check for the LINK_MAX limit.
480  * => Triggers kqueue events here.
481  */
482 void
483 tmpfs_dir_attach(tmpfs_node_t *dnode, tmpfs_dirent_t *de, tmpfs_node_t *node)
484 {
485 	struct vnode *dvp = dnode->tn_vnode;
486 	int events = NOTE_WRITE;
487 
488 	KASSERT(dvp != NULL);
489 	KASSERT(VOP_ISLOCKED(dvp));
490 
491 	/* Get a new sequence number. */
492 	KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE);
493 	de->td_seq = tmpfs_dir_getseq(dnode, de);
494 
495 	/* Associate directory entry and the inode. */
496 	de->td_node = node;
497 	KASSERT(node->tn_links < LINK_MAX);
498 	node->tn_links++;
499 
500 	/* Save the hint (might overwrite). */
501 	node->tn_dirent_hint = de;
502 
503 	/* Insert the entry to the directory (parent of inode). */
504 	TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
505 	dnode->tn_size += sizeof(tmpfs_dirent_t);
506 	tmpfs_update(dnode, TMPFS_NODE_STATUSALL);
507 	uvm_vnp_setsize(dvp, dnode->tn_size);
508 
509 	if (node->tn_type == VDIR) {
510 		/* Set parent. */
511 		KASSERT(node->tn_spec.tn_dir.tn_parent == NULL);
512 		node->tn_spec.tn_dir.tn_parent = dnode;
513 
514 		/* Increase the link count of parent. */
515 		KASSERT(dnode->tn_links < LINK_MAX);
516 		dnode->tn_links++;
517 		events |= NOTE_LINK;
518 
519 		TMPFS_VALIDATE_DIR(node);
520 	}
521 	VN_KNOTE(dvp, events);
522 }
523 
524 /*
525  * tmpfs_dir_detach: disassociate directory entry and its inode,
526  * and detach the entry from the directory, specified by vnode.
527  *
528  * => Decreases link count on the associated node.
529  * => Decreases the link count on directory node, if our node is VDIR.
530  * => Triggers kqueue events here.
531  */
532 void
533 tmpfs_dir_detach(tmpfs_node_t *dnode, tmpfs_dirent_t *de)
534 {
535 	tmpfs_node_t *node = de->td_node;
536 	struct vnode *vp, *dvp = dnode->tn_vnode;
537 	int events = NOTE_WRITE;
538 
539 	KASSERT(dvp == NULL || VOP_ISLOCKED(dvp));
540 
541 	/* Deassociate the inode and entry. */
542 	de->td_node = NULL;
543 	node->tn_dirent_hint = NULL;
544 
545 	KASSERT(node->tn_links > 0);
546 	node->tn_links--;
547 	if ((vp = node->tn_vnode) != NULL) {
548 		KASSERT(VOP_ISLOCKED(vp));
549 		VN_KNOTE(vp, node->tn_links ?  NOTE_LINK : NOTE_DELETE);
550 	}
551 
552 	/* If directory - decrease the link count of parent. */
553 	if (node->tn_type == VDIR) {
554 		KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
555 		node->tn_spec.tn_dir.tn_parent = NULL;
556 
557 		KASSERT(dnode->tn_links > 0);
558 		dnode->tn_links--;
559 		events |= NOTE_LINK;
560 	}
561 
562 	/* Remove the entry from the directory. */
563 	if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) {
564 		dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
565 	}
566 	TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
567 
568 	dnode->tn_size -= sizeof(tmpfs_dirent_t);
569 	tmpfs_update(dnode, TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED);
570 	tmpfs_dir_putseq(dnode, de);
571 	if (dvp) {
572 		tmpfs_update(dnode, 0);
573 		uvm_vnp_setsize(dvp, dnode->tn_size);
574 		VN_KNOTE(dvp, events);
575 	}
576 }
577 
578 /*
579  * tmpfs_dir_lookup: find a directory entry in the specified inode.
580  *
581  * Note that the . and .. components are not allowed as they do not
582  * physically exist within directories.
583  */
584 tmpfs_dirent_t *
585 tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp)
586 {
587 	const char *name = cnp->cn_nameptr;
588 	const uint16_t nlen = cnp->cn_namelen;
589 	tmpfs_dirent_t *de;
590 
591 	KASSERT(VOP_ISLOCKED(node->tn_vnode));
592 	KASSERT(nlen != 1 || !(name[0] == '.'));
593 	KASSERT(nlen != 2 || !(name[0] == '.' && name[1] == '.'));
594 	TMPFS_VALIDATE_DIR(node);
595 
596 	TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
597 		if (de->td_namelen != nlen)
598 			continue;
599 		if (memcmp(de->td_name, name, nlen) != 0)
600 			continue;
601 		break;
602 	}
603 	tmpfs_update(node, TMPFS_NODE_ACCESSED);
604 	return de;
605 }
606 
607 /*
608  * tmpfs_dir_cached: get a cached directory entry if it is valid.  Used to
609  * avoid unnecessary tmpfs_dir_lookup().
610  *
611  * => The vnode must be locked.
612  */
613 tmpfs_dirent_t *
614 tmpfs_dir_cached(tmpfs_node_t *node)
615 {
616 	tmpfs_dirent_t *de = node->tn_dirent_hint;
617 
618 	KASSERT(VOP_ISLOCKED(node->tn_vnode));
619 
620 	if (de == NULL) {
621 		return NULL;
622 	}
623 	KASSERT(de->td_node == node);
624 
625 	/*
626 	 * Directories always have a valid hint.  For files, check if there
627 	 * are any hard links.  If there are - hint might be invalid.
628 	 */
629 	return (node->tn_type != VDIR && node->tn_links > 1) ? NULL : de;
630 }
631 
632 /*
633  * tmpfs_dir_getseq: get a per-directory sequence number for the entry.
634  */
635 uint64_t
636 tmpfs_dir_getseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de)
637 {
638 	uint64_t seq = de->td_seq;
639 
640 	TMPFS_VALIDATE_DIR(dnode);
641 
642 	if (__predict_true(seq != TMPFS_DIRSEQ_NONE)) {
643 		/* Already set. */
644 		KASSERT(seq >= TMPFS_DIRSEQ_START);
645 		return seq;
646 	}
647 
648 	/*
649 	 * The "." and ".." and the end-of-directory have reserved numbers.
650 	 * The other sequence numbers are allocated incrementally.
651 	 */
652 
653 	seq = dnode->tn_spec.tn_dir.tn_next_seq;
654 	KASSERT(seq >= TMPFS_DIRSEQ_START);
655 	KASSERT(seq < TMPFS_DIRSEQ_END);
656 	dnode->tn_spec.tn_dir.tn_next_seq++;
657 	return seq;
658 }
659 
660 void
661 tmpfs_dir_putseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de)
662 {
663 	uint64_t seq = de->td_seq;
664 
665 	TMPFS_VALIDATE_DIR(dnode);
666 	KASSERT(seq == TMPFS_DIRSEQ_NONE || seq >= TMPFS_DIRSEQ_START);
667 	KASSERT(seq == TMPFS_DIRSEQ_NONE || seq < TMPFS_DIRSEQ_END);
668 
669 	de->td_seq = TMPFS_DIRSEQ_NONE;
670 
671 	/* Empty?  We can reset. */
672 	if (dnode->tn_size == 0) {
673 		dnode->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START;
674 	} else if (seq != TMPFS_DIRSEQ_NONE &&
675 		seq == dnode->tn_spec.tn_dir.tn_next_seq - 1) {
676 		dnode->tn_spec.tn_dir.tn_next_seq--;
677 	}
678 }
679 
680 /*
681  * tmpfs_dir_lookupbyseq: lookup a directory entry by the sequence number.
682  */
683 tmpfs_dirent_t *
684 tmpfs_dir_lookupbyseq(tmpfs_node_t *node, off_t seq)
685 {
686 	tmpfs_dirent_t *de = node->tn_spec.tn_dir.tn_readdir_lastp;
687 
688 	TMPFS_VALIDATE_DIR(node);
689 
690 	/*
691 	 * First, check the cache.  If does not match - perform a lookup.
692 	 */
693 	if (de && de->td_seq == seq) {
694 		KASSERT(de->td_seq >= TMPFS_DIRSEQ_START);
695 		KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE);
696 		return de;
697 	}
698 
699 	TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
700 		KASSERT(de->td_seq >= TMPFS_DIRSEQ_START);
701 		KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE);
702 		if (de->td_seq == seq)
703 			return de;
704 	}
705 	return NULL;
706 }
707 
708 /*
709  * tmpfs_dir_getdotents: helper function for tmpfs_readdir() to get the
710  * dot meta entries, that is, "." or "..".  Copy it to the UIO space.
711  */
712 int
713 tmpfs_dir_getdotents(tmpfs_node_t *node, struct dirent *dp, struct uio *uio)
714 {
715 	tmpfs_dirent_t *de;
716 	off_t next = 0;
717 	int error;
718 
719 	switch (uio->uio_offset) {
720 		case TMPFS_DIRSEQ_DOT:
721 			dp->d_fileno = node->tn_id;
722 			strlcpy(dp->d_name, ".", sizeof(dp->d_name));
723 			next = TMPFS_DIRSEQ_DOTDOT;
724 			break;
725 		case TMPFS_DIRSEQ_DOTDOT:
726 			dp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id;
727 			strlcpy(dp->d_name, "..", sizeof(dp->d_name));
728 			de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
729 			next = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF;
730 			break;
731 		default:
732 			KASSERT(false);
733 	}
734 	dp->d_type = DT_DIR;
735 	dp->d_namlen = strlen(dp->d_name);
736 	dp->d_reclen = DIRENT_SIZE(dp);
737 	dp->d_off = next;
738 
739 	if (dp->d_reclen > uio->uio_resid) {
740 		return EJUSTRETURN;
741 	}
742 
743 	if ((error = uiomove(dp, dp->d_reclen, uio)) != 0) {
744 		return error;
745 	}
746 
747 	uio->uio_offset = next;
748 	return error;
749 }
750 
751 /*
752  * tmpfs_dir_getdents: helper function for tmpfs_readdir.
753  *
754  * => Returns as much directory entries as can fit in the uio space.
755  * => The read starts at uio->uio_offset.
756  */
757 int
758 tmpfs_dir_getdents(tmpfs_node_t *node, struct uio *uio)
759 {
760 	tmpfs_dirent_t *de, *next_de;
761 	struct dirent dent;
762 	int error = 0;
763 
764 	KASSERT(VOP_ISLOCKED(node->tn_vnode));
765 	TMPFS_VALIDATE_DIR(node);
766 	memset(&dent, 0, sizeof(dent));
767 
768 	if (uio->uio_offset == TMPFS_DIRSEQ_DOT) {
769 		if ((error = tmpfs_dir_getdotents(node, &dent, uio)) != 0) {
770 			goto done;
771 		}
772 	}
773 	if (uio->uio_offset == TMPFS_DIRSEQ_DOTDOT) {
774 		if ((error = tmpfs_dir_getdotents(node, &dent, uio)) != 0) {
775 			goto done;
776 		}
777 	}
778 	/* Done if we reached the end. */
779 	if (uio->uio_offset == TMPFS_DIRSEQ_EOF) {
780 		goto done;
781 	}
782 
783 	/* Locate the directory entry given by the given sequence number. */
784 	de = tmpfs_dir_lookupbyseq(node, uio->uio_offset);
785 	if (de == NULL) {
786 		error = EINVAL;
787 		goto done;
788 	}
789 
790 	/*
791 	 * Read as many entries as possible; i.e., until we reach the end
792 	 * of the directory or we exhaust UIO space.
793 	 */
794 	do {
795 		dent.d_fileno = de->td_node->tn_id;
796 		switch (de->td_node->tn_type) {
797 		case VBLK:
798 			dent.d_type = DT_BLK;
799 			break;
800 		case VCHR:
801 			dent.d_type = DT_CHR;
802 			break;
803 		case VDIR:
804 			dent.d_type = DT_DIR;
805 			break;
806 		case VFIFO:
807 			dent.d_type = DT_FIFO;
808 			break;
809 		case VLNK:
810 			dent.d_type = DT_LNK;
811 			break;
812 		case VREG:
813 			dent.d_type = DT_REG;
814 			break;
815 		case VSOCK:
816 			dent.d_type = DT_SOCK;
817 			break;
818 		default:
819 			KASSERT(0);
820 		}
821 		dent.d_namlen = de->td_namelen;
822 		KASSERT(de->td_namelen < sizeof(dent.d_name));
823 		memcpy(dent.d_name, de->td_name, de->td_namelen);
824 		dent.d_name[de->td_namelen] = '\0';
825 		dent.d_reclen = DIRENT_SIZE(&dent);
826 
827 		next_de = TAILQ_NEXT(de, td_entries);
828 		if (next_de == NULL)
829 			dent.d_off = TMPFS_DIRSEQ_EOF;
830 		else
831 			dent.d_off = tmpfs_dir_getseq(node, next_de);
832 
833 		if (dent.d_reclen > uio->uio_resid) {
834 			/* Exhausted UIO space. */
835 			error = EJUSTRETURN;
836 			break;
837 		}
838 
839 		/* Copy out the directory entry and continue. */
840 		error = uiomove(&dent, dent.d_reclen, uio);
841 		if (error) {
842 			break;
843 		}
844 		de = TAILQ_NEXT(de, td_entries);
845 
846 	} while (uio->uio_resid > 0 && de);
847 
848 	/* Cache the last entry or clear and mark EOF. */
849 	uio->uio_offset = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF;
850 	node->tn_spec.tn_dir.tn_readdir_lastp = de;
851 done:
852 	tmpfs_update(node, TMPFS_NODE_ACCESSED);
853 
854 	if (error == EJUSTRETURN) {
855 		/* Exhausted UIO space - just return. */
856 		error = 0;
857 	}
858 	KASSERT(error >= 0);
859 	return error;
860 }
861 
862 /*
863  * tmpfs_reg_resize: resize the underlying UVM object associated with the
864  * specified regular file.
865  */
866 
867 int
868 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
869 {
870 	tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
871 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
872 	struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj;
873 	size_t newpages, oldpages, bytes;
874 	off_t oldsize;
875 	vaddr_t pgoff;
876 	int error;
877 
878 	KASSERT(vp->v_type == VREG);
879 	KASSERT(newsize >= 0);
880 
881 	oldsize = node->tn_size;
882 	oldpages = round_page(oldsize) >> PAGE_SHIFT;
883 	newpages = round_page(newsize) >> PAGE_SHIFT;
884 	KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages);
885 
886 	if (newpages > oldpages) {
887 		/* Increase the used-memory counter if getting extra pages. */
888 		bytes = (newpages - oldpages) << PAGE_SHIFT;
889 		if (tmpfs_mem_incr(tmp, bytes) == 0)
890 			return ENOSPC;
891 		if (uao_grow(uobj, newpages) != 0) {
892 			tmpfs_mem_decr(tmp, bytes);
893 			return ENOSPC;
894 		}
895 	}
896 
897 	node->tn_spec.tn_reg.tn_aobj_pages = newpages;
898 	node->tn_size = newsize;
899 	uvm_vnp_setsize(vp, newsize);
900 	uvm_vnp_uncache(vp);
901 
902 	/*
903 	 * Free "backing store".
904 	 */
905 	if (newpages < oldpages) {
906 		if (tmpfs_uio_cached(node))
907 			tmpfs_uio_uncache(node);
908 		if (uao_shrink(uobj, newpages))
909 			panic("shrink failed");
910 		/* Decrease the used-memory counter. */
911 		tmpfs_mem_decr(tmp, (oldpages - newpages) << PAGE_SHIFT);
912 	}
913 	if (newsize > oldsize) {
914 		if (tmpfs_uio_cached(node))
915 			tmpfs_uio_uncache(node);
916 		pgoff = oldsize & PAGE_MASK;
917 		if (pgoff != 0) {
918 			/*
919 			 * Growing from an offset which is not at a page
920 			 * boundary; zero out unused bytes in current page.
921 			 */
922 			error = tmpfs_zeropg(node, trunc_page(oldsize), pgoff);
923 			if (error)
924 				panic("tmpfs_zeropg: error %d", error);
925 		}
926 		VN_KNOTE(vp, NOTE_EXTEND);
927 	}
928 	return 0;
929 }
930 
931 /*
932  * tmpfs_chflags: change flags of the given vnode.
933  *
934  */
935 int
936 tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct proc *p)
937 {
938 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
939 	int error;
940 
941 	KASSERT(VOP_ISLOCKED(vp));
942 
943 	/* Disallow this operation if the file system is mounted read-only. */
944 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
945 		return EROFS;
946 
947 	if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred)))
948 		return error;
949 
950 	if (cred->cr_uid == 0) {
951 		if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND) &&
952 		    securelevel > 0)
953 			return EPERM;
954 		node->tn_flags = flags;
955 	} else {
956 		if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND) ||
957 		    (flags & UF_SETTABLE) != flags)
958 			return EPERM;
959 		node->tn_flags &= SF_SETTABLE;
960 		node->tn_flags |= (flags & UF_SETTABLE);
961 	}
962 
963 	tmpfs_update(node, TMPFS_NODE_CHANGED);
964 	VN_KNOTE(vp, NOTE_ATTRIB);
965 	return 0;
966 }
967 
968 /*
969  * tmpfs_chmod: change access mode on the given vnode.
970  *
971  */
972 int
973 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct proc *p)
974 {
975 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
976 	int error;
977 
978 	KASSERT(VOP_ISLOCKED(vp));
979 
980 	/* Disallow this operation if the file system is mounted read-only. */
981 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
982 		return EROFS;
983 
984 	/* Immutable or append-only files cannot be modified, either. */
985 	if (node->tn_flags & (IMMUTABLE | APPEND))
986 		return EPERM;
987 
988 	if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred)))
989 		return error;
990 	if (cred->cr_uid != 0) {
991 		if (vp->v_type != VDIR && (mode & S_ISTXT))
992 			return EFTYPE;
993 		if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID))
994 			return EPERM;
995 	}
996 
997 	node->tn_mode = (mode & ALLPERMS);
998 	tmpfs_update(node, TMPFS_NODE_CHANGED);
999 	if ((vp->v_flag & VTEXT) && (node->tn_mode & S_ISTXT) == 0)
1000 		uvm_vnp_uncache(vp);
1001 	VN_KNOTE(vp, NOTE_ATTRIB);
1002 	return 0;
1003 }
1004 
1005 /*
1006  * tmpfs_chown: change ownership of the given vnode.
1007  *
1008  * => At least one of uid or gid must be different than VNOVAL.
1009  * => Attribute is unchanged for VNOVAL case.
1010  */
1011 int
1012 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, struct proc *p)
1013 {
1014 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1015 	int error;
1016 
1017 	KASSERT(VOP_ISLOCKED(vp));
1018 
1019 	/* Assign default values if they are unknown. */
1020 	KASSERT(uid != VNOVAL || gid != VNOVAL);
1021 	if (uid == VNOVAL) {
1022 		uid = node->tn_uid;
1023 	}
1024 	if (gid == VNOVAL) {
1025 		gid = node->tn_gid;
1026 	}
1027 
1028 	/* Disallow this operation if the file system is mounted read-only. */
1029 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1030 		return EROFS;
1031 
1032 	/* Immutable or append-only files cannot be modified, either. */
1033 	if (node->tn_flags & (IMMUTABLE | APPEND))
1034 		return EPERM;
1035 
1036 	if ((cred->cr_uid != node->tn_uid || uid != node->tn_uid ||
1037 	    (gid != node->tn_gid && !groupmember(gid, cred))) &&
1038 	    (error = suser_ucred(cred)))
1039 	    	return error;
1040 
1041 	node->tn_uid = uid;
1042 	node->tn_gid = gid;
1043 	tmpfs_update(node, TMPFS_NODE_CHANGED);
1044 	VN_KNOTE(vp, NOTE_ATTRIB);
1045 	return 0;
1046 }
1047 
1048 /*
1049  * tmpfs_chsize: change size of the given vnode.
1050  */
1051 int
1052 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, struct proc *p)
1053 {
1054 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1055 
1056 	KASSERT(VOP_ISLOCKED(vp));
1057 
1058 	/* Decide whether this is a valid operation based on the file type. */
1059 	switch (vp->v_type) {
1060 	case VDIR:
1061 		return EISDIR;
1062 	case VREG:
1063 		if (vp->v_mount->mnt_flag & MNT_RDONLY) {
1064 			return EROFS;
1065 		}
1066 		break;
1067 	case VBLK:
1068 	case VCHR:
1069 	case VFIFO:
1070 		/*
1071 		 * Allow modifications of special files even if in the file
1072 		 * system is mounted read-only (we are not modifying the
1073 		 * files themselves, but the objects they represent).
1074 		 */
1075 		return 0;
1076 	default:
1077 		return EOPNOTSUPP;
1078 	}
1079 
1080 	/* Immutable or append-only files cannot be modified, either. */
1081 	if (node->tn_flags & (IMMUTABLE | APPEND)) {
1082 		return EPERM;
1083 	}
1084 
1085 	/* Note: tmpfs_truncate() will raise NOTE_EXTEND and NOTE_ATTRIB. */
1086 	return tmpfs_truncate(vp, size);
1087 }
1088 
1089 /*
1090  * tmpfs_chtimes: change access and modification times for vnode.
1091  */
1092 int
1093 tmpfs_chtimes(struct vnode *vp, const struct timespec *atime,
1094     const struct timespec *mtime, int vaflags, struct ucred *cred,
1095     struct proc *p)
1096 {
1097 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1098 	int error;
1099 
1100 	KASSERT(VOP_ISLOCKED(vp));
1101 
1102 	/* Disallow this operation if the file system is mounted read-only. */
1103 	if (vp->v_mount->mnt_flag & MNT_RDONLY)
1104 		return EROFS;
1105 
1106 	/* Immutable or append-only files cannot be modified, either. */
1107 	if (node->tn_flags & (IMMUTABLE | APPEND))
1108 		return EPERM;
1109 
1110 	if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred)) &&
1111 	    ((vaflags & VA_UTIMES_NULL) == 0 ||
1112 	    (error = VOP_ACCESS(vp, VWRITE, cred, p))))
1113 	    	return error;
1114 
1115 	if (atime->tv_nsec != VNOVAL)
1116 		node->tn_atime = *atime;
1117 
1118 	if (mtime->tv_nsec != VNOVAL)
1119 		node->tn_mtime = *mtime;
1120 
1121 	if (mtime->tv_nsec != VNOVAL || (vaflags & VA_UTIMES_CHANGE))
1122 		tmpfs_update(VP_TO_TMPFS_NODE(vp), TMPFS_NODE_CHANGED);
1123 
1124 	VN_KNOTE(vp, NOTE_ATTRIB);
1125 
1126 	return 0;
1127 }
1128 
1129 /*
1130  * tmpfs_update: update timestamps, et al.
1131  */
1132 void
1133 tmpfs_update(tmpfs_node_t *node, int flags)
1134 {
1135 	struct timespec nowtm;
1136 
1137 	nanotime(&nowtm);
1138 
1139 	if (flags & TMPFS_NODE_ACCESSED) {
1140 		node->tn_atime = nowtm;
1141  	}
1142 	if (flags & TMPFS_NODE_MODIFIED) {
1143 		node->tn_mtime = nowtm;
1144  	}
1145 	if (flags & TMPFS_NODE_CHANGED) {
1146  		node->tn_ctime = nowtm;
1147  	}
1148 }
1149 
1150 int
1151 tmpfs_truncate(struct vnode *vp, off_t length)
1152 {
1153 	tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1154 	int error;
1155 
1156 	if (length < 0) {
1157 		error = EINVAL;
1158 		goto out;
1159 	}
1160 	if (node->tn_size == length) {
1161 		error = 0;
1162 		goto out;
1163 	}
1164 	error = tmpfs_reg_resize(vp, length);
1165 	if (error == 0) {
1166 		tmpfs_update(node, TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED);
1167 	}
1168 out:
1169 	return error;
1170 }
1171 
1172 int
1173 tmpfs_uio_cached(tmpfs_node_t *node)
1174 {
1175 	int pgnum_valid = (node->tn_pgnum != (voff_t)-1);
1176 	int pgptr_valid = (node->tn_pgptr != (vaddr_t)NULL);
1177 	KASSERT(pgnum_valid == pgptr_valid);
1178 	return pgnum_valid && pgptr_valid;
1179 }
1180 
1181 vaddr_t
1182 tmpfs_uio_lookup(tmpfs_node_t *node, voff_t pgnum)
1183 {
1184 	if (tmpfs_uio_cached(node) == 1 && node->tn_pgnum == pgnum)
1185 		return node->tn_pgptr;
1186 
1187 	return (vaddr_t)NULL;
1188 }
1189 
1190 void
1191 tmpfs_uio_uncache(tmpfs_node_t *node)
1192 {
1193 	KASSERT(node->tn_pgnum != (voff_t)-1);
1194 	KASSERT(node->tn_pgptr != (vaddr_t)NULL);
1195 	uvm_unmap(kernel_map, node->tn_pgptr, node->tn_pgptr + PAGE_SIZE);
1196 	node->tn_pgnum = (voff_t)-1;
1197 	node->tn_pgptr = (vaddr_t)NULL;
1198 }
1199 
1200 void
1201 tmpfs_uio_cache(tmpfs_node_t *node, voff_t pgnum, vaddr_t pgptr)
1202 {
1203 	KASSERT(node->tn_pgnum == (voff_t)-1);
1204 	KASSERT(node->tn_pgptr == (vaddr_t)NULL);
1205 	node->tn_pgnum = pgnum;
1206 	node->tn_pgptr = pgptr;
1207 }
1208 
1209 /*
1210  * Be gentle to kernel_map, don't allow more than 4MB in a single transaction.
1211  */
1212 #define TMPFS_UIO_MAXBYTES	((1 << 22) - PAGE_SIZE)
1213 
1214 int
1215 tmpfs_uiomove(tmpfs_node_t *node, struct uio *uio, vsize_t len)
1216 {
1217 	vaddr_t va, pgoff;
1218 	int error, adv;
1219 	voff_t pgnum;
1220 	vsize_t sz;
1221 
1222 	pgnum = trunc_page(uio->uio_offset);
1223 	pgoff = uio->uio_offset & PAGE_MASK;
1224 
1225 	if (pgoff + len < PAGE_SIZE) {
1226 		va = tmpfs_uio_lookup(node, pgnum);
1227 		if (va != (vaddr_t)NULL)
1228 			return uiomove((void *)va + pgoff, len, uio);
1229 	}
1230 
1231 	if (len >= TMPFS_UIO_MAXBYTES) {
1232 		sz = TMPFS_UIO_MAXBYTES;
1233 		adv = MADV_NORMAL;
1234 	} else {
1235 		sz = len;
1236 		adv = MADV_SEQUENTIAL;
1237 	}
1238 
1239 	if (tmpfs_uio_cached(node))
1240 		tmpfs_uio_uncache(node);
1241 
1242 	uao_reference(node->tn_uobj);
1243 
1244 	error = uvm_map(kernel_map, &va, round_page(pgoff + sz), node->tn_uobj,
1245 	    trunc_page(uio->uio_offset), 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1246 	    PROT_READ | PROT_WRITE, MAP_INHERIT_NONE, adv, 0));
1247 	if (error) {
1248 		uao_detach(node->tn_uobj); /* Drop reference. */
1249 		return error;
1250 	}
1251 
1252 	error = uiomove((void *)va + pgoff, sz, uio);
1253 	if (error == 0 && pgoff + sz < PAGE_SIZE)
1254 		tmpfs_uio_cache(node, pgnum, va);
1255 	else
1256 		uvm_unmap(kernel_map, va, va + round_page(pgoff + sz));
1257 
1258 	return error;
1259 }
1260 
1261 int
1262 tmpfs_zeropg(tmpfs_node_t *node, voff_t pgnum, vaddr_t pgoff)
1263 {
1264 	vaddr_t va;
1265 	int error;
1266 
1267 	KASSERT(tmpfs_uio_cached(node) == 0);
1268 
1269 	uao_reference(node->tn_uobj);
1270 
1271 	error = uvm_map(kernel_map, &va, PAGE_SIZE, node->tn_uobj, pgnum, 0,
1272 	    UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
1273 	    MAP_INHERIT_NONE, MADV_NORMAL, 0));
1274 	if (error) {
1275 		uao_detach(node->tn_uobj); /* Drop reference. */
1276 		return error;
1277 	}
1278 
1279 	bzero((void *)va + pgoff, PAGE_SIZE - pgoff);
1280 	uvm_unmap(kernel_map, va, va + PAGE_SIZE);
1281 
1282 	return 0;
1283 }
1284 
1285