1 /* $OpenBSD: tmpfs_subr.c,v 1.27 2024/09/12 09:04:51 claudio Exp $ */
2 /* $NetBSD: tmpfs_subr.c,v 1.79 2012/03/13 18:40:50 elad Exp $ */
3
4 /*
5 * Copyright (c) 2005-2011 The NetBSD Foundation, Inc.
6 * Copyright (c) 2013 Pedro Martelletto
7 * All rights reserved.
8 *
9 * This code is derived from software contributed to The NetBSD Foundation
10 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
11 * 2005 program, and by Mindaugas Rasiukevicius.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 /*
36 * Efficient memory file system: interfaces for inode and directory entry
37 * construction, destruction and manipulation.
38 *
39 * Reference counting
40 *
41 * The link count of inode (tmpfs_node_t::tn_links) is used as a
42 * reference counter. However, it has slightly different semantics.
43 *
44 * For directories - link count represents directory entries, which
45 * refer to the directories. In other words, it represents the count
46 * of sub-directories. It also takes into account the virtual '.'
47 * entry (which has no real entry in the list). For files - link count
48 * represents the hard links. Since only empty directories can be
49 * removed - link count aligns the reference counting requirements
50 * enough. Note: to check whether directory is not empty, the inode
51 * size (tmpfs_node_t::tn_size) can be used.
52 *
53 * The inode itself, as an object, gathers its first reference when
54 * directory entry is attached via tmpfs_dir_attach(9). For instance,
55 * after regular tmpfs_create(), a file would have a link count of 1,
56 * while directory after tmpfs_mkdir() would have 2 (due to '.').
57 *
58 * Reclamation
59 *
60 * It should be noted that tmpfs inodes rely on a combination of vnode
61 * reference counting and link counting. That is, an inode can only be
62 * destroyed if its associated vnode is inactive. The destruction is
63 * done on vnode reclamation i.e. tmpfs_reclaim(). It should be noted
64 * that tmpfs_node_t::tn_links being 0 is a destruction criterion.
65 *
66 * If an inode has references within the file system (tn_links > 0) and
67 * its inactive vnode gets reclaimed/recycled - then the association is
68 * broken in tmpfs_reclaim(). In such case, an inode will always pass
69 * tmpfs_lookup() and thus tmpfs_vnode_get() to associate a new vnode.
70 *
71 * Lock order
72 *
73 * tmpfs_node_t::tn_nlock ->
74 * struct vnode::v_vlock ->
75 * struct vnode::v_interlock
76 */
77
78 #include <sys/param.h>
79 #include <sys/dirent.h>
80 #include <sys/event.h>
81 #include <sys/mount.h>
82 #include <sys/namei.h>
83 #include <sys/time.h>
84 #include <sys/proc.h>
85 #include <sys/stat.h>
86 #include <sys/systm.h>
87 #include <sys/vnode.h>
88
89 #include <uvm/uvm_aobj.h>
90
91 #include <tmpfs/tmpfs.h>
92 #include <tmpfs/tmpfs_vnops.h>
93
94
95 /* Local functions. */
96 void tmpfs_dir_putseq(tmpfs_node_t *, tmpfs_dirent_t *);
97 int tmpfs_dir_getdotents(tmpfs_node_t *, struct dirent *, struct uio *);
98
99 /*
100 * tmpfs_alloc_node: allocate a new inode of a specified type and
101 * insert it into the list of specified mount point.
102 */
103 int
tmpfs_alloc_node(tmpfs_mount_t * tmp,enum vtype type,uid_t uid,gid_t gid,mode_t mode,char * target,dev_t rdev,tmpfs_node_t ** node)104 tmpfs_alloc_node(tmpfs_mount_t *tmp, enum vtype type, uid_t uid, gid_t gid,
105 mode_t mode, char *target, dev_t rdev, tmpfs_node_t **node)
106 {
107 tmpfs_node_t *nnode;
108 struct uvm_object *uobj;
109
110 nnode = tmpfs_node_get(tmp);
111 if (nnode == NULL) {
112 return ENOSPC;
113 }
114
115 /* Initially, no references and no associations. */
116 nnode->tn_links = 0;
117 nnode->tn_vnode = NULL;
118 nnode->tn_dirent_hint = NULL;
119
120 rw_enter_write(&tmp->tm_acc_lock);
121 nnode->tn_id = ++tmp->tm_highest_inode;
122 if (nnode->tn_id == 0) {
123 --tmp->tm_highest_inode;
124 rw_exit_write(&tmp->tm_acc_lock);
125 tmpfs_node_put(tmp, nnode);
126 return ENOSPC;
127 }
128 rw_exit_write(&tmp->tm_acc_lock);
129
130 /* Generic initialization. */
131 nnode->tn_type = type;
132 nnode->tn_size = 0;
133 nnode->tn_flags = 0;
134 nnode->tn_lockf = NULL;
135 nnode->tn_gen = TMPFS_NODE_GEN_MASK & arc4random();
136
137 nanotime(&nnode->tn_atime);
138 nnode->tn_birthtime = nnode->tn_atime;
139 nnode->tn_ctime = nnode->tn_atime;
140 nnode->tn_mtime = nnode->tn_atime;
141
142 KASSERT(uid != VNOVAL && gid != VNOVAL && mode != VNOVAL);
143
144 nnode->tn_uid = uid;
145 nnode->tn_gid = gid;
146 nnode->tn_mode = mode;
147
148 /* Type-specific initialization. */
149 switch (nnode->tn_type) {
150 case VBLK:
151 case VCHR:
152 /* Character/block special device. */
153 KASSERT(rdev != VNOVAL);
154 nnode->tn_spec.tn_dev.tn_rdev = rdev;
155 break;
156 case VDIR:
157 /* Directory. */
158 TAILQ_INIT(&nnode->tn_spec.tn_dir.tn_dir);
159 nnode->tn_spec.tn_dir.tn_parent = NULL;
160 nnode->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START;
161 nnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
162
163 /* Extra link count for the virtual '.' entry. */
164 nnode->tn_links++;
165 break;
166 case VFIFO:
167 case VSOCK:
168 break;
169 case VLNK:
170 /* Symbolic link. Target specifies the file name. */
171 KASSERT(target && strlen(target) < MAXPATHLEN);
172
173 nnode->tn_size = strlen(target);
174 if (nnode->tn_size == 0) {
175 nnode->tn_spec.tn_lnk.tn_link = NULL;
176 break;
177 }
178 nnode->tn_spec.tn_lnk.tn_link =
179 tmpfs_strname_alloc(tmp, nnode->tn_size);
180 if (nnode->tn_spec.tn_lnk.tn_link == NULL) {
181 tmpfs_node_put(tmp, nnode);
182 return ENOSPC;
183 }
184 memcpy(nnode->tn_spec.tn_lnk.tn_link, target, nnode->tn_size);
185 break;
186 case VREG:
187 /* Regular file. Create an underlying UVM object. */
188 uobj = uao_create(0, UAO_FLAG_CANFAIL);
189 if (uobj == NULL) {
190 tmpfs_node_put(tmp, nnode);
191 return ENOSPC;
192 }
193 nnode->tn_spec.tn_reg.tn_aobj = uobj;
194 nnode->tn_spec.tn_reg.tn_aobj_pages = 0;
195 nnode->tn_spec.tn_reg.tn_aobj_pgptr = (vaddr_t)NULL;
196 nnode->tn_spec.tn_reg.tn_aobj_pgnum = (voff_t)-1;
197 break;
198 default:
199 KASSERT(0);
200 }
201
202 rw_init(&nnode->tn_nlock, "tvlk");
203
204 rw_enter_write(&tmp->tm_lock);
205 LIST_INSERT_HEAD(&tmp->tm_nodes, nnode, tn_entries);
206 rw_exit_write(&tmp->tm_lock);
207
208 *node = nnode;
209 return 0;
210 }
211
212 /*
213 * tmpfs_free_node: remove the inode from a list in the mount point and
214 * destroy the inode structures.
215 */
216 void
tmpfs_free_node(tmpfs_mount_t * tmp,tmpfs_node_t * node)217 tmpfs_free_node(tmpfs_mount_t *tmp, tmpfs_node_t *node)
218 {
219 size_t objsz;
220
221 rw_enter_write(&tmp->tm_lock);
222 LIST_REMOVE(node, tn_entries);
223 rw_exit_write(&tmp->tm_lock);
224
225 switch (node->tn_type) {
226 case VLNK:
227 if (node->tn_size > 0) {
228 KASSERT(node->tn_size <= SIZE_MAX);
229 tmpfs_strname_free(tmp, node->tn_spec.tn_lnk.tn_link,
230 node->tn_size);
231 }
232 break;
233 case VREG:
234 /*
235 * Calculate the size of inode data, decrease the used-memory
236 * counter, and destroy the underlying UVM object (if any).
237 */
238 objsz = PAGE_SIZE * node->tn_spec.tn_reg.tn_aobj_pages;
239 if (objsz != 0) {
240 tmpfs_mem_decr(tmp, objsz);
241 }
242 if (node->tn_spec.tn_reg.tn_aobj != NULL) {
243 uao_detach(node->tn_spec.tn_reg.tn_aobj);
244 node->tn_spec.tn_reg.tn_aobj = NULL;
245 }
246 break;
247 case VDIR:
248 KASSERT(TAILQ_EMPTY(&node->tn_spec.tn_dir.tn_dir));
249 KASSERT(node->tn_spec.tn_dir.tn_parent == NULL ||
250 node == tmp->tm_root);
251 break;
252 default:
253 break;
254 }
255
256 rw_enter_write(&tmp->tm_acc_lock);
257 if (node->tn_id == tmp->tm_highest_inode)
258 --tmp->tm_highest_inode;
259 rw_exit_write(&tmp->tm_acc_lock);
260
261 /* mutex_destroy(&node->tn_nlock); */
262 tmpfs_node_put(tmp, node);
263 }
264
265 /*
266 * tmpfs_vnode_get: allocate or reclaim a vnode for a specified inode.
267 *
268 * => Must be called with tmpfs_node_t::tn_nlock held.
269 * => Returns vnode (*vpp) locked.
270 */
271 int
tmpfs_vnode_get(struct mount * mp,tmpfs_node_t * node,struct vnode ** vpp)272 tmpfs_vnode_get(struct mount *mp, tmpfs_node_t *node, struct vnode **vpp)
273 {
274 struct vnode *vp, *nvp;
275 /* kmutex_t *slock; */
276 int error;
277 again:
278 /* If there is already a vnode, try to reclaim it. */
279 if ((vp = node->tn_vnode) != NULL) {
280 /* atomic_or_ulong(&node->tn_gen, TMPFS_RECLAIMING_BIT); */
281 node->tn_gen |= TMPFS_RECLAIMING_BIT;
282 rw_exit_write(&node->tn_nlock);
283 error = vget(vp, LK_EXCLUSIVE);
284 if (error == ENOENT) {
285 rw_enter_write(&node->tn_nlock);
286 goto again;
287 }
288 /* atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); */
289 node->tn_gen &= ~TMPFS_RECLAIMING_BIT;
290 *vpp = vp;
291 return error;
292 }
293 if (TMPFS_NODE_RECLAIMING(node)) {
294 /* atomic_and_ulong(&node->tn_gen, ~TMPFS_RECLAIMING_BIT); */
295 node->tn_gen &= ~TMPFS_RECLAIMING_BIT;
296 }
297
298 /*
299 * Get a new vnode and associate it with our inode. Share the
300 * lock with underlying UVM object, if there is one (VREG case).
301 */
302 #if 0
303 if (node->tn_type == VREG) {
304 struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj;
305 slock = uobj->vmobjlock;
306 } else {
307 slock = NULL;
308 }
309 #endif
310 error = getnewvnode(VT_TMPFS, mp, &tmpfs_vops, &vp);
311 if (error) {
312 rw_exit_write(&node->tn_nlock);
313 return error;
314 }
315
316 rrw_init_flags(&node->tn_vlock, "tnode", RWL_DUPOK | RWL_IS_VNODE);
317 vp->v_type = node->tn_type;
318
319 /* Type-specific initialization. */
320 switch (node->tn_type) {
321 case VBLK:
322 case VCHR:
323 vp->v_op = &tmpfs_specvops;
324 if ((nvp = checkalias(vp, node->tn_spec.tn_dev.tn_rdev, mp))) {
325 nvp->v_data = vp->v_data;
326 vp->v_data = NULL;
327 vp->v_op = &spec_vops;
328 vrele(vp);
329 vgone(vp);
330 vp = nvp;
331 node->tn_vnode = vp;
332 }
333 break;
334 case VDIR:
335 vp->v_flag |= node->tn_spec.tn_dir.tn_parent == node ?
336 VROOT : 0;
337 break;
338 #ifdef FIFO
339 case VFIFO:
340 vp->v_op = &tmpfs_fifovops;
341 break;
342 #endif
343 case VLNK:
344 case VREG:
345 case VSOCK:
346 break;
347 default:
348 KASSERT(0);
349 }
350
351 uvm_vnp_setsize(vp, node->tn_size);
352 vp->v_data = node;
353 node->tn_vnode = vp;
354 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
355 rw_exit_write(&node->tn_nlock);
356
357 KASSERT(VOP_ISLOCKED(vp));
358 *vpp = vp;
359 return 0;
360 }
361
362 /*
363 * tmpfs_alloc_file: allocate a new file of specified type and adds it
364 * into the parent directory.
365 *
366 * => Credentials of the caller are used.
367 */
368 int
tmpfs_alloc_file(struct vnode * dvp,struct vnode ** vpp,struct vattr * vap,struct componentname * cnp,char * target)369 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap,
370 struct componentname *cnp, char *target)
371 {
372 tmpfs_mount_t *tmp = VFS_TO_TMPFS(dvp->v_mount);
373 tmpfs_node_t *dnode = VP_TO_TMPFS_DIR(dvp), *node;
374 tmpfs_dirent_t *de;
375 int error;
376
377 KASSERT(VOP_ISLOCKED(dvp));
378 *vpp = NULL;
379
380 /* Check for the maximum number of links limit. */
381 if (vap->va_type == VDIR) {
382 /* Check for maximum links limit. */
383 if (dnode->tn_links == LINK_MAX) {
384 error = EMLINK;
385 goto out;
386 }
387 KASSERT(dnode->tn_links < LINK_MAX);
388 }
389
390 if (TMPFS_DIRSEQ_FULL(dnode)) {
391 error = ENOSPC;
392 goto out;
393 }
394
395 if (dnode->tn_links == 0) {
396 error = ENOENT;
397 goto out;
398 }
399
400 /* Allocate a node that represents the new file. */
401 error = tmpfs_alloc_node(tmp, vap->va_type, cnp->cn_cred->cr_uid,
402 dnode->tn_gid, vap->va_mode, target, vap->va_rdev, &node);
403 if (error)
404 goto out;
405
406 /* Allocate a directory entry that points to the new file. */
407 error = tmpfs_alloc_dirent(tmp, cnp->cn_nameptr, cnp->cn_namelen, &de);
408 if (error) {
409 tmpfs_free_node(tmp, node);
410 goto out;
411 }
412
413 /* Get a vnode for the new file. */
414 rw_enter_write(&node->tn_nlock);
415 error = tmpfs_vnode_get(dvp->v_mount, node, vpp);
416 if (error) {
417 tmpfs_free_dirent(tmp, de);
418 tmpfs_free_node(tmp, node);
419 goto out;
420 }
421
422 /* Associate inode and attach the entry into the directory. */
423 tmpfs_dir_attach(dnode, de, node);
424
425 out:
426 if (error == 0 && (cnp->cn_flags & SAVESTART) == 0)
427 pool_put(&namei_pool, cnp->cn_pnbuf);
428 return error;
429 }
430
431 /*
432 * tmpfs_alloc_dirent: allocates a new directory entry for the inode.
433 * The directory entry contains a path name component.
434 */
435 int
tmpfs_alloc_dirent(tmpfs_mount_t * tmp,const char * name,uint16_t len,tmpfs_dirent_t ** de)436 tmpfs_alloc_dirent(tmpfs_mount_t *tmp, const char *name, uint16_t len,
437 tmpfs_dirent_t **de)
438 {
439 tmpfs_dirent_t *nde;
440
441 nde = tmpfs_dirent_get(tmp);
442 if (nde == NULL)
443 return ENOSPC;
444
445 nde->td_name = tmpfs_strname_alloc(tmp, len);
446 if (nde->td_name == NULL) {
447 tmpfs_dirent_put(tmp, nde);
448 return ENOSPC;
449 }
450 nde->td_namelen = len;
451 memcpy(nde->td_name, name, len);
452 nde->td_seq = TMPFS_DIRSEQ_NONE;
453
454 *de = nde;
455 return 0;
456 }
457
458 /*
459 * tmpfs_free_dirent: free a directory entry.
460 */
461 void
tmpfs_free_dirent(tmpfs_mount_t * tmp,tmpfs_dirent_t * de)462 tmpfs_free_dirent(tmpfs_mount_t *tmp, tmpfs_dirent_t *de)
463 {
464 KASSERT(de->td_node == NULL);
465 KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE);
466 tmpfs_strname_free(tmp, de->td_name, de->td_namelen);
467 tmpfs_dirent_put(tmp, de);
468 }
469
470 /*
471 * tmpfs_dir_attach: associate directory entry with a specified inode,
472 * and attach the entry into the directory, specified by vnode.
473 *
474 * => Increases link count on the associated node.
475 * => Increases link count on directory node, if our node is VDIR.
476 * It is caller's responsibility to check for the LINK_MAX limit.
477 * => Triggers kqueue events here.
478 */
479 void
tmpfs_dir_attach(tmpfs_node_t * dnode,tmpfs_dirent_t * de,tmpfs_node_t * node)480 tmpfs_dir_attach(tmpfs_node_t *dnode, tmpfs_dirent_t *de, tmpfs_node_t *node)
481 {
482 struct vnode *dvp = dnode->tn_vnode;
483 int events = NOTE_WRITE;
484
485 KASSERT(dvp != NULL);
486 KASSERT(VOP_ISLOCKED(dvp));
487
488 /* Get a new sequence number. */
489 KASSERT(de->td_seq == TMPFS_DIRSEQ_NONE);
490 de->td_seq = tmpfs_dir_getseq(dnode, de);
491
492 /* Associate directory entry and the inode. */
493 de->td_node = node;
494 KASSERT(node->tn_links < LINK_MAX);
495 node->tn_links++;
496
497 /* Save the hint (might overwrite). */
498 node->tn_dirent_hint = de;
499
500 /* Insert the entry to the directory (parent of inode). */
501 TAILQ_INSERT_TAIL(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
502 dnode->tn_size += sizeof(tmpfs_dirent_t);
503 tmpfs_update(dnode, TMPFS_NODE_STATUSALL);
504 uvm_vnp_setsize(dvp, dnode->tn_size);
505
506 if (node->tn_type == VDIR) {
507 /* Set parent. */
508 KASSERT(node->tn_spec.tn_dir.tn_parent == NULL);
509 node->tn_spec.tn_dir.tn_parent = dnode;
510
511 /* Increase the link count of parent. */
512 KASSERT(dnode->tn_links < LINK_MAX);
513 dnode->tn_links++;
514 events |= NOTE_LINK;
515
516 TMPFS_VALIDATE_DIR(node);
517 }
518 VN_KNOTE(dvp, events);
519 }
520
521 /*
522 * tmpfs_dir_detach: disassociate directory entry and its inode,
523 * and detach the entry from the directory, specified by vnode.
524 *
525 * => Decreases link count on the associated node.
526 * => Decreases the link count on directory node, if our node is VDIR.
527 * => Triggers kqueue events here.
528 */
529 void
tmpfs_dir_detach(tmpfs_node_t * dnode,tmpfs_dirent_t * de)530 tmpfs_dir_detach(tmpfs_node_t *dnode, tmpfs_dirent_t *de)
531 {
532 tmpfs_node_t *node = de->td_node;
533 struct vnode *vp, *dvp = dnode->tn_vnode;
534 int events = NOTE_WRITE;
535
536 KASSERT(dvp == NULL || VOP_ISLOCKED(dvp));
537
538 /* Deassociate the inode and entry. */
539 de->td_node = NULL;
540 node->tn_dirent_hint = NULL;
541
542 KASSERT(node->tn_links > 0);
543 node->tn_links--;
544 if ((vp = node->tn_vnode) != NULL) {
545 KASSERT(VOP_ISLOCKED(vp));
546 VN_KNOTE(vp, node->tn_links ? NOTE_LINK : NOTE_DELETE);
547 }
548
549 /* If directory - decrease the link count of parent. */
550 if (node->tn_type == VDIR) {
551 KASSERT(node->tn_spec.tn_dir.tn_parent == dnode);
552 node->tn_spec.tn_dir.tn_parent = NULL;
553
554 KASSERT(dnode->tn_links > 0);
555 dnode->tn_links--;
556 events |= NOTE_LINK;
557 }
558
559 /* Remove the entry from the directory. */
560 if (dnode->tn_spec.tn_dir.tn_readdir_lastp == de) {
561 dnode->tn_spec.tn_dir.tn_readdir_lastp = NULL;
562 }
563 TAILQ_REMOVE(&dnode->tn_spec.tn_dir.tn_dir, de, td_entries);
564
565 dnode->tn_size -= sizeof(tmpfs_dirent_t);
566 tmpfs_update(dnode, TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED);
567 tmpfs_dir_putseq(dnode, de);
568 if (dvp) {
569 tmpfs_update(dnode, 0);
570 uvm_vnp_setsize(dvp, dnode->tn_size);
571 VN_KNOTE(dvp, events);
572 }
573 }
574
575 /*
576 * tmpfs_dir_lookup: find a directory entry in the specified inode.
577 *
578 * Note that the . and .. components are not allowed as they do not
579 * physically exist within directories.
580 */
581 tmpfs_dirent_t *
tmpfs_dir_lookup(tmpfs_node_t * node,struct componentname * cnp)582 tmpfs_dir_lookup(tmpfs_node_t *node, struct componentname *cnp)
583 {
584 const char *name = cnp->cn_nameptr;
585 const uint16_t nlen = cnp->cn_namelen;
586 tmpfs_dirent_t *de;
587
588 KASSERT(VOP_ISLOCKED(node->tn_vnode));
589 KASSERT(nlen != 1 || !(name[0] == '.'));
590 KASSERT(nlen != 2 || !(name[0] == '.' && name[1] == '.'));
591 TMPFS_VALIDATE_DIR(node);
592
593 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
594 if (de->td_namelen != nlen)
595 continue;
596 if (memcmp(de->td_name, name, nlen) != 0)
597 continue;
598 break;
599 }
600 tmpfs_update(node, TMPFS_NODE_ACCESSED);
601 return de;
602 }
603
604 /*
605 * tmpfs_dir_cached: get a cached directory entry if it is valid. Used to
606 * avoid unnecessary tmpfs_dir_lookup().
607 *
608 * => The vnode must be locked.
609 */
610 tmpfs_dirent_t *
tmpfs_dir_cached(tmpfs_node_t * node)611 tmpfs_dir_cached(tmpfs_node_t *node)
612 {
613 tmpfs_dirent_t *de = node->tn_dirent_hint;
614
615 KASSERT(VOP_ISLOCKED(node->tn_vnode));
616
617 if (de == NULL) {
618 return NULL;
619 }
620 KASSERT(de->td_node == node);
621
622 /*
623 * Directories always have a valid hint. For files, check if there
624 * are any hard links. If there are - hint might be invalid.
625 */
626 return (node->tn_type != VDIR && node->tn_links > 1) ? NULL : de;
627 }
628
629 /*
630 * tmpfs_dir_getseq: get a per-directory sequence number for the entry.
631 */
632 uint64_t
tmpfs_dir_getseq(tmpfs_node_t * dnode,tmpfs_dirent_t * de)633 tmpfs_dir_getseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de)
634 {
635 uint64_t seq = de->td_seq;
636
637 TMPFS_VALIDATE_DIR(dnode);
638
639 if (__predict_true(seq != TMPFS_DIRSEQ_NONE)) {
640 /* Already set. */
641 KASSERT(seq >= TMPFS_DIRSEQ_START);
642 return seq;
643 }
644
645 /*
646 * The "." and ".." and the end-of-directory have reserved numbers.
647 * The other sequence numbers are allocated incrementally.
648 */
649
650 seq = dnode->tn_spec.tn_dir.tn_next_seq;
651 KASSERT(seq >= TMPFS_DIRSEQ_START);
652 KASSERT(seq < TMPFS_DIRSEQ_END);
653 dnode->tn_spec.tn_dir.tn_next_seq++;
654 return seq;
655 }
656
657 void
tmpfs_dir_putseq(tmpfs_node_t * dnode,tmpfs_dirent_t * de)658 tmpfs_dir_putseq(tmpfs_node_t *dnode, tmpfs_dirent_t *de)
659 {
660 uint64_t seq = de->td_seq;
661
662 TMPFS_VALIDATE_DIR(dnode);
663 KASSERT(seq == TMPFS_DIRSEQ_NONE || seq >= TMPFS_DIRSEQ_START);
664 KASSERT(seq == TMPFS_DIRSEQ_NONE || seq < TMPFS_DIRSEQ_END);
665
666 de->td_seq = TMPFS_DIRSEQ_NONE;
667
668 /* Empty? We can reset. */
669 if (dnode->tn_size == 0) {
670 dnode->tn_spec.tn_dir.tn_next_seq = TMPFS_DIRSEQ_START;
671 } else if (seq != TMPFS_DIRSEQ_NONE &&
672 seq == dnode->tn_spec.tn_dir.tn_next_seq - 1) {
673 dnode->tn_spec.tn_dir.tn_next_seq--;
674 }
675 }
676
677 /*
678 * tmpfs_dir_lookupbyseq: lookup a directory entry by the sequence number.
679 */
680 tmpfs_dirent_t *
tmpfs_dir_lookupbyseq(tmpfs_node_t * node,off_t seq)681 tmpfs_dir_lookupbyseq(tmpfs_node_t *node, off_t seq)
682 {
683 tmpfs_dirent_t *de = node->tn_spec.tn_dir.tn_readdir_lastp;
684
685 TMPFS_VALIDATE_DIR(node);
686
687 /*
688 * First, check the cache. If does not match - perform a lookup.
689 */
690 if (de && de->td_seq == seq) {
691 KASSERT(de->td_seq >= TMPFS_DIRSEQ_START);
692 KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE);
693 return de;
694 }
695 TAILQ_FOREACH(de, &node->tn_spec.tn_dir.tn_dir, td_entries) {
696 KASSERT(de->td_seq >= TMPFS_DIRSEQ_START);
697 KASSERT(de->td_seq != TMPFS_DIRSEQ_NONE);
698 if (de->td_seq == seq)
699 return de;
700 }
701 return NULL;
702 }
703
704 /*
705 * tmpfs_dir_getdotents: helper function for tmpfs_readdir() to get the
706 * dot meta entries, that is, "." or "..". Copy it to the UIO space.
707 */
708 int
tmpfs_dir_getdotents(tmpfs_node_t * node,struct dirent * dp,struct uio * uio)709 tmpfs_dir_getdotents(tmpfs_node_t *node, struct dirent *dp, struct uio *uio)
710 {
711 tmpfs_dirent_t *de;
712 off_t next = 0;
713 int error;
714
715 switch (uio->uio_offset) {
716 case TMPFS_DIRSEQ_DOT:
717 dp->d_fileno = node->tn_id;
718 strlcpy(dp->d_name, ".", sizeof(dp->d_name));
719 next = TMPFS_DIRSEQ_DOTDOT;
720 break;
721 case TMPFS_DIRSEQ_DOTDOT:
722 dp->d_fileno = node->tn_spec.tn_dir.tn_parent->tn_id;
723 strlcpy(dp->d_name, "..", sizeof(dp->d_name));
724 de = TAILQ_FIRST(&node->tn_spec.tn_dir.tn_dir);
725 next = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF;
726 break;
727 default:
728 KASSERT(false);
729 }
730 dp->d_type = DT_DIR;
731 dp->d_namlen = strlen(dp->d_name);
732 dp->d_reclen = DIRENT_SIZE(dp);
733 dp->d_off = next;
734
735 if (dp->d_reclen > uio->uio_resid) {
736 return EJUSTRETURN;
737 }
738
739 if ((error = uiomove(dp, dp->d_reclen, uio)) != 0) {
740 return error;
741 }
742
743 uio->uio_offset = next;
744 return error;
745 }
746
747 /*
748 * tmpfs_dir_getdents: helper function for tmpfs_readdir.
749 *
750 * => Returns as much directory entries as can fit in the uio space.
751 * => The read starts at uio->uio_offset.
752 */
753 int
tmpfs_dir_getdents(tmpfs_node_t * node,struct uio * uio)754 tmpfs_dir_getdents(tmpfs_node_t *node, struct uio *uio)
755 {
756 tmpfs_dirent_t *de, *next_de;
757 struct dirent dent;
758 int error = 0;
759
760 KASSERT(VOP_ISLOCKED(node->tn_vnode));
761 TMPFS_VALIDATE_DIR(node);
762 memset(&dent, 0, sizeof(dent));
763
764 if (uio->uio_offset == TMPFS_DIRSEQ_DOT) {
765 if ((error = tmpfs_dir_getdotents(node, &dent, uio)) != 0) {
766 goto done;
767 }
768 }
769 if (uio->uio_offset == TMPFS_DIRSEQ_DOTDOT) {
770 if ((error = tmpfs_dir_getdotents(node, &dent, uio)) != 0) {
771 goto done;
772 }
773 }
774 /* Done if we reached the end. */
775 if (uio->uio_offset == TMPFS_DIRSEQ_EOF) {
776 goto done;
777 }
778
779 /* Locate the directory entry given by the given sequence number. */
780 de = tmpfs_dir_lookupbyseq(node, uio->uio_offset);
781 if (de == NULL) {
782 error = EINVAL;
783 goto done;
784 }
785
786 /*
787 * Read as many entries as possible; i.e., until we reach the end
788 * of the directory or we exhaust UIO space.
789 */
790 do {
791 dent.d_fileno = de->td_node->tn_id;
792 switch (de->td_node->tn_type) {
793 case VBLK:
794 dent.d_type = DT_BLK;
795 break;
796 case VCHR:
797 dent.d_type = DT_CHR;
798 break;
799 case VDIR:
800 dent.d_type = DT_DIR;
801 break;
802 case VFIFO:
803 dent.d_type = DT_FIFO;
804 break;
805 case VLNK:
806 dent.d_type = DT_LNK;
807 break;
808 case VREG:
809 dent.d_type = DT_REG;
810 break;
811 case VSOCK:
812 dent.d_type = DT_SOCK;
813 break;
814 default:
815 KASSERT(0);
816 }
817 dent.d_namlen = de->td_namelen;
818 KASSERT(de->td_namelen < sizeof(dent.d_name));
819 memcpy(dent.d_name, de->td_name, de->td_namelen);
820 dent.d_name[de->td_namelen] = '\0';
821 dent.d_reclen = DIRENT_SIZE(&dent);
822
823 if (memchr(dent.d_name, '/', dent.d_namlen) != NULL) {
824 error = EINVAL;
825 break;
826 }
827
828 next_de = TAILQ_NEXT(de, td_entries);
829 if (next_de == NULL)
830 dent.d_off = TMPFS_DIRSEQ_EOF;
831 else
832 dent.d_off = tmpfs_dir_getseq(node, next_de);
833
834 if (dent.d_reclen > uio->uio_resid) {
835 /* Exhausted UIO space. */
836 error = EJUSTRETURN;
837 break;
838 }
839
840 /* Copy out the directory entry and continue. */
841 error = uiomove(&dent, dent.d_reclen, uio);
842 if (error) {
843 break;
844 }
845 de = TAILQ_NEXT(de, td_entries);
846
847 } while (uio->uio_resid > 0 && de);
848
849 /* Cache the last entry or clear and mark EOF. */
850 uio->uio_offset = de ? tmpfs_dir_getseq(node, de) : TMPFS_DIRSEQ_EOF;
851 node->tn_spec.tn_dir.tn_readdir_lastp = de;
852 done:
853 tmpfs_update(node, TMPFS_NODE_ACCESSED);
854
855 if (error == EJUSTRETURN) {
856 /* Exhausted UIO space - just return. */
857 error = 0;
858 }
859 KASSERT(error >= 0);
860 return error;
861 }
862
863 /*
864 * tmpfs_reg_resize: resize the underlying UVM object associated with the
865 * specified regular file.
866 */
867
868 int
tmpfs_reg_resize(struct vnode * vp,off_t newsize)869 tmpfs_reg_resize(struct vnode *vp, off_t newsize)
870 {
871 tmpfs_mount_t *tmp = VFS_TO_TMPFS(vp->v_mount);
872 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
873 struct uvm_object *uobj = node->tn_spec.tn_reg.tn_aobj;
874 size_t newpages, oldpages, bytes;
875 off_t oldsize;
876 vaddr_t pgoff;
877 int error;
878
879 KASSERT(vp->v_type == VREG);
880 KASSERT(newsize >= 0);
881
882 oldsize = node->tn_size;
883 oldpages = round_page(oldsize) >> PAGE_SHIFT;
884 newpages = round_page(newsize) >> PAGE_SHIFT;
885 KASSERT(oldpages == node->tn_spec.tn_reg.tn_aobj_pages);
886
887 if (newpages > oldpages) {
888 /* Increase the used-memory counter if getting extra pages. */
889 bytes = (newpages - oldpages) << PAGE_SHIFT;
890 if (tmpfs_mem_incr(tmp, bytes) == 0)
891 return ENOSPC;
892 rw_enter(uobj->vmobjlock, RW_WRITE);
893 error = uao_grow(uobj, newpages);
894 rw_exit(uobj->vmobjlock);
895 if (error) {
896 tmpfs_mem_decr(tmp, bytes);
897 return ENOSPC;
898 }
899 }
900
901 node->tn_spec.tn_reg.tn_aobj_pages = newpages;
902 node->tn_size = newsize;
903 uvm_vnp_setsize(vp, newsize);
904 uvm_vnp_uncache(vp);
905
906 /*
907 * Free "backing store".
908 */
909 if (newpages < oldpages) {
910 if (tmpfs_uio_cached(node))
911 tmpfs_uio_uncache(node);
912 rw_enter(uobj->vmobjlock, RW_WRITE);
913 if (uao_shrink(uobj, newpages))
914 panic("shrink failed");
915 rw_exit(uobj->vmobjlock);
916 /* Decrease the used-memory counter. */
917 tmpfs_mem_decr(tmp, (oldpages - newpages) << PAGE_SHIFT);
918 }
919 if (newsize > oldsize) {
920 if (tmpfs_uio_cached(node))
921 tmpfs_uio_uncache(node);
922 pgoff = oldsize & PAGE_MASK;
923 if (pgoff != 0) {
924 /*
925 * Growing from an offset which is not at a page
926 * boundary; zero out unused bytes in current page.
927 */
928 error = tmpfs_zeropg(node, trunc_page(oldsize), pgoff);
929 if (error)
930 panic("tmpfs_zeropg: error %d", error);
931 }
932 VN_KNOTE(vp, NOTE_EXTEND);
933 }
934 return 0;
935 }
936
937 /*
938 * tmpfs_chflags: change flags of the given vnode.
939 *
940 */
941 int
tmpfs_chflags(struct vnode * vp,int flags,struct ucred * cred,struct proc * p)942 tmpfs_chflags(struct vnode *vp, int flags, struct ucred *cred, struct proc *p)
943 {
944 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
945 int error;
946
947 KASSERT(VOP_ISLOCKED(vp));
948
949 /* Disallow this operation if the file system is mounted read-only. */
950 if (vp->v_mount->mnt_flag & MNT_RDONLY)
951 return EROFS;
952
953 if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred)))
954 return error;
955
956 if (cred->cr_uid == 0) {
957 if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND) &&
958 securelevel > 0)
959 return EPERM;
960 node->tn_flags = flags;
961 } else {
962 if (node->tn_flags & (SF_IMMUTABLE | SF_APPEND) ||
963 (flags & UF_SETTABLE) != flags)
964 return EPERM;
965 node->tn_flags &= SF_SETTABLE;
966 node->tn_flags |= (flags & UF_SETTABLE);
967 }
968
969 tmpfs_update(node, TMPFS_NODE_CHANGED);
970 VN_KNOTE(vp, NOTE_ATTRIB);
971 return 0;
972 }
973
974 /*
975 * tmpfs_chmod: change access mode on the given vnode.
976 *
977 */
978 int
tmpfs_chmod(struct vnode * vp,mode_t mode,struct ucred * cred,struct proc * p)979 tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct proc *p)
980 {
981 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
982 int error;
983
984 KASSERT(VOP_ISLOCKED(vp));
985
986 /* Disallow this operation if the file system is mounted read-only. */
987 if (vp->v_mount->mnt_flag & MNT_RDONLY)
988 return EROFS;
989
990 /* Immutable or append-only files cannot be modified, either. */
991 if (node->tn_flags & (IMMUTABLE | APPEND))
992 return EPERM;
993
994 if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred)))
995 return error;
996 if (cred->cr_uid != 0) {
997 if (vp->v_type != VDIR && (mode & S_ISTXT))
998 return EFTYPE;
999 if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID))
1000 return EPERM;
1001 }
1002
1003 node->tn_mode = (mode & ALLPERMS);
1004 tmpfs_update(node, TMPFS_NODE_CHANGED);
1005 if ((vp->v_flag & VTEXT) && (node->tn_mode & S_ISTXT) == 0)
1006 uvm_vnp_uncache(vp);
1007 VN_KNOTE(vp, NOTE_ATTRIB);
1008 return 0;
1009 }
1010
1011 /*
1012 * tmpfs_chown: change ownership of the given vnode.
1013 *
1014 * => At least one of uid or gid must be different than VNOVAL.
1015 * => Attribute is unchanged for VNOVAL case.
1016 */
1017 int
tmpfs_chown(struct vnode * vp,uid_t uid,gid_t gid,struct ucred * cred,struct proc * p)1018 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, struct proc *p)
1019 {
1020 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1021 int error;
1022
1023 KASSERT(VOP_ISLOCKED(vp));
1024
1025 /* Assign default values if they are unknown. */
1026 KASSERT(uid != VNOVAL || gid != VNOVAL);
1027 if (uid == VNOVAL) {
1028 uid = node->tn_uid;
1029 }
1030 if (gid == VNOVAL) {
1031 gid = node->tn_gid;
1032 }
1033
1034 /* Disallow this operation if the file system is mounted read-only. */
1035 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1036 return EROFS;
1037
1038 /* Immutable or append-only files cannot be modified, either. */
1039 if (node->tn_flags & (IMMUTABLE | APPEND))
1040 return EPERM;
1041
1042 if ((cred->cr_uid != node->tn_uid || uid != node->tn_uid ||
1043 (gid != node->tn_gid && !groupmember(gid, cred))) &&
1044 (error = suser_ucred(cred)))
1045 return error;
1046
1047 node->tn_uid = uid;
1048 node->tn_gid = gid;
1049 tmpfs_update(node, TMPFS_NODE_CHANGED);
1050 VN_KNOTE(vp, NOTE_ATTRIB);
1051 return 0;
1052 }
1053
1054 /*
1055 * tmpfs_chsize: change size of the given vnode.
1056 */
1057 int
tmpfs_chsize(struct vnode * vp,u_quad_t size,struct ucred * cred,struct proc * p)1058 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, struct proc *p)
1059 {
1060 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1061
1062 KASSERT(VOP_ISLOCKED(vp));
1063
1064 /* Decide whether this is a valid operation based on the file type. */
1065 switch (vp->v_type) {
1066 case VDIR:
1067 return EISDIR;
1068 case VREG:
1069 if (vp->v_mount->mnt_flag & MNT_RDONLY) {
1070 return EROFS;
1071 }
1072 break;
1073 case VBLK:
1074 case VCHR:
1075 case VFIFO:
1076 /*
1077 * Allow modifications of special files even if in the file
1078 * system is mounted read-only (we are not modifying the
1079 * files themselves, but the objects they represent).
1080 */
1081 return 0;
1082 default:
1083 return EOPNOTSUPP;
1084 }
1085
1086 /* Immutable or append-only files cannot be modified, either. */
1087 if (node->tn_flags & (IMMUTABLE | APPEND)) {
1088 return EPERM;
1089 }
1090
1091 /* Note: tmpfs_truncate() will raise NOTE_EXTEND and NOTE_ATTRIB. */
1092 return tmpfs_truncate(vp, size);
1093 }
1094
1095 /*
1096 * tmpfs_chtimes: change access and modification times for vnode.
1097 */
1098 int
tmpfs_chtimes(struct vnode * vp,const struct timespec * atime,const struct timespec * mtime,int vaflags,struct ucred * cred,struct proc * p)1099 tmpfs_chtimes(struct vnode *vp, const struct timespec *atime,
1100 const struct timespec *mtime, int vaflags, struct ucred *cred,
1101 struct proc *p)
1102 {
1103 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1104 int error;
1105
1106 KASSERT(VOP_ISLOCKED(vp));
1107
1108 /* Disallow this operation if the file system is mounted read-only. */
1109 if (vp->v_mount->mnt_flag & MNT_RDONLY)
1110 return EROFS;
1111
1112 /* Immutable or append-only files cannot be modified, either. */
1113 if (node->tn_flags & (IMMUTABLE | APPEND))
1114 return EPERM;
1115
1116 if (cred->cr_uid != node->tn_uid && (error = suser_ucred(cred)) &&
1117 ((vaflags & VA_UTIMES_NULL) == 0 ||
1118 (error = VOP_ACCESS(vp, VWRITE, cred, p))))
1119 return error;
1120
1121 if (atime->tv_nsec != VNOVAL)
1122 node->tn_atime = *atime;
1123
1124 if (mtime->tv_nsec != VNOVAL)
1125 node->tn_mtime = *mtime;
1126
1127 if (mtime->tv_nsec != VNOVAL || (vaflags & VA_UTIMES_CHANGE))
1128 tmpfs_update(VP_TO_TMPFS_NODE(vp), TMPFS_NODE_CHANGED);
1129
1130 VN_KNOTE(vp, NOTE_ATTRIB);
1131
1132 return 0;
1133 }
1134
1135 /*
1136 * tmpfs_update: update timestamps, et al.
1137 */
1138 void
tmpfs_update(tmpfs_node_t * node,int flags)1139 tmpfs_update(tmpfs_node_t *node, int flags)
1140 {
1141 struct timespec nowtm;
1142
1143 nanotime(&nowtm);
1144
1145 if (flags & TMPFS_NODE_ACCESSED) {
1146 node->tn_atime = nowtm;
1147 }
1148 if (flags & TMPFS_NODE_MODIFIED) {
1149 node->tn_mtime = nowtm;
1150 }
1151 if (flags & TMPFS_NODE_CHANGED) {
1152 node->tn_ctime = nowtm;
1153 }
1154 }
1155
1156 int
tmpfs_truncate(struct vnode * vp,off_t length)1157 tmpfs_truncate(struct vnode *vp, off_t length)
1158 {
1159 tmpfs_node_t *node = VP_TO_TMPFS_NODE(vp);
1160 int error;
1161
1162 if (length < 0) {
1163 error = EINVAL;
1164 goto out;
1165 }
1166 if (node->tn_size == length) {
1167 error = 0;
1168 goto out;
1169 }
1170 error = tmpfs_reg_resize(vp, length);
1171 if (error == 0) {
1172 tmpfs_update(node, TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED);
1173 }
1174 out:
1175 return error;
1176 }
1177
1178 int
tmpfs_uio_cached(tmpfs_node_t * node)1179 tmpfs_uio_cached(tmpfs_node_t *node)
1180 {
1181 int pgnum_valid = (node->tn_pgnum != (voff_t)-1);
1182 int pgptr_valid = (node->tn_pgptr != (vaddr_t)NULL);
1183 KASSERT(pgnum_valid == pgptr_valid);
1184 return pgnum_valid && pgptr_valid;
1185 }
1186
1187 vaddr_t
tmpfs_uio_lookup(tmpfs_node_t * node,voff_t pgnum)1188 tmpfs_uio_lookup(tmpfs_node_t *node, voff_t pgnum)
1189 {
1190 if (tmpfs_uio_cached(node) == 1 && node->tn_pgnum == pgnum)
1191 return node->tn_pgptr;
1192
1193 return (vaddr_t)NULL;
1194 }
1195
1196 void
tmpfs_uio_uncache(tmpfs_node_t * node)1197 tmpfs_uio_uncache(tmpfs_node_t *node)
1198 {
1199 KASSERT(node->tn_pgnum != (voff_t)-1);
1200 KASSERT(node->tn_pgptr != (vaddr_t)NULL);
1201 uvm_unmap(kernel_map, node->tn_pgptr, node->tn_pgptr + PAGE_SIZE);
1202 node->tn_pgnum = (voff_t)-1;
1203 node->tn_pgptr = (vaddr_t)NULL;
1204 }
1205
1206 void
tmpfs_uio_cache(tmpfs_node_t * node,voff_t pgnum,vaddr_t pgptr)1207 tmpfs_uio_cache(tmpfs_node_t *node, voff_t pgnum, vaddr_t pgptr)
1208 {
1209 KASSERT(node->tn_pgnum == (voff_t)-1);
1210 KASSERT(node->tn_pgptr == (vaddr_t)NULL);
1211 node->tn_pgnum = pgnum;
1212 node->tn_pgptr = pgptr;
1213 }
1214
1215 /*
1216 * Be gentle to kernel_map, don't allow more than 4MB in a single transaction.
1217 */
1218 #define TMPFS_UIO_MAXBYTES ((1 << 22) - PAGE_SIZE)
1219
1220 int
tmpfs_uiomove(tmpfs_node_t * node,struct uio * uio,vsize_t len)1221 tmpfs_uiomove(tmpfs_node_t *node, struct uio *uio, vsize_t len)
1222 {
1223 vaddr_t va, pgoff;
1224 int error, adv;
1225 voff_t pgnum;
1226 vsize_t sz;
1227
1228 pgnum = trunc_page(uio->uio_offset);
1229 pgoff = uio->uio_offset & PAGE_MASK;
1230
1231 if (pgoff + len < PAGE_SIZE) {
1232 va = tmpfs_uio_lookup(node, pgnum);
1233 if (va != (vaddr_t)NULL)
1234 return uiomove((void *)va + pgoff, len, uio);
1235 }
1236
1237 if (len >= TMPFS_UIO_MAXBYTES) {
1238 sz = TMPFS_UIO_MAXBYTES;
1239 adv = MADV_NORMAL;
1240 } else {
1241 sz = len;
1242 adv = MADV_SEQUENTIAL;
1243 }
1244
1245 if (tmpfs_uio_cached(node))
1246 tmpfs_uio_uncache(node);
1247
1248 uao_reference(node->tn_uobj);
1249
1250 error = uvm_map(kernel_map, &va, round_page(pgoff + sz), node->tn_uobj,
1251 trunc_page(uio->uio_offset), 0, UVM_MAPFLAG(PROT_READ | PROT_WRITE,
1252 PROT_READ | PROT_WRITE, MAP_INHERIT_NONE, adv, 0));
1253 if (error) {
1254 uao_detach(node->tn_uobj); /* Drop reference. */
1255 return error;
1256 }
1257
1258 error = uiomove((void *)va + pgoff, sz, uio);
1259 if (error == 0 && pgoff + sz < PAGE_SIZE)
1260 tmpfs_uio_cache(node, pgnum, va);
1261 else
1262 uvm_unmap(kernel_map, va, va + round_page(pgoff + sz));
1263
1264 return error;
1265 }
1266
1267 int
tmpfs_zeropg(tmpfs_node_t * node,voff_t pgnum,vaddr_t pgoff)1268 tmpfs_zeropg(tmpfs_node_t *node, voff_t pgnum, vaddr_t pgoff)
1269 {
1270 vaddr_t va;
1271 int error;
1272
1273 KASSERT(tmpfs_uio_cached(node) == 0);
1274
1275 uao_reference(node->tn_uobj);
1276
1277 error = uvm_map(kernel_map, &va, PAGE_SIZE, node->tn_uobj, pgnum, 0,
1278 UVM_MAPFLAG(PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
1279 MAP_INHERIT_NONE, MADV_NORMAL, 0));
1280 if (error) {
1281 uao_detach(node->tn_uobj); /* Drop reference. */
1282 return error;
1283 }
1284
1285 bzero((void *)va + pgoff, PAGE_SIZE - pgoff);
1286 uvm_unmap(kernel_map, va, va + PAGE_SIZE);
1287
1288 return 0;
1289 }
1290