xref: /dragonfly/sys/vfs/tmpfs/tmpfs_vfsops.c (revision 0b2c5ee3)
1 /*	$NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2005 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9  * 2005 program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Efficient memory file system.
35  *
36  * tmpfs is a file system that uses virtual memory to store file data and
37  * metadata efficiently. It does not follow the structure of an on-disk
38  * file system because it simply does not need to. Instead, it uses
39  * memory-specific data structures and algorithms to automatically
40  * allocate and release resources.
41  */
42 
43 #include <sys/conf.h>
44 #include <sys/param.h>
45 #include <sys/limits.h>
46 #include <sys/lock.h>
47 #include <sys/kernel.h>
48 #include <sys/stat.h>
49 #include <sys/systm.h>
50 #include <sys/sysctl.h>
51 
52 #include <vm/vm.h>
53 #include <vm/vm_object.h>
54 #include <vm/vm_param.h>
55 
56 #if 0
57 #include <vfs/tmpfs/tmpfs.h>
58 #endif
59 #include "tmpfs.h"
60 #include <vfs/tmpfs/tmpfs_vnops.h>
61 #include <vfs/tmpfs/tmpfs_mount.h>
62 
63 /*
64  * Default permission for root node
65  */
66 #define TMPFS_DEFAULT_ROOT_MODE	(S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
67 
68 MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures");
69 
70 /* --------------------------------------------------------------------- */
71 
72 static int	tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *);
73 static int	tmpfs_unmount(struct mount *, int);
74 static int	tmpfs_root(struct mount *, struct vnode **);
75 static int	tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **);
76 static int	tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred);
77 
78 /* --------------------------------------------------------------------- */
79 
80 void
81 tmpfs_node_init(struct tmpfs_node *node)
82 {
83 	node->tn_blksize = PAGE_SIZE;	/* start small */
84 	lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE);
85 	node->tn_gen = karc4random();
86 }
87 
88 void
89 tmpfs_node_uninit(struct tmpfs_node *node)
90 {
91 	node->tn_type = VNON;
92 	node->tn_vpstate = TMPFS_VNODE_DOOMED;
93 	lockuninit(&node->tn_interlock);
94 }
95 
96 static int
97 tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred)
98 {
99 	struct tmpfs_mount *tmp;
100 	struct tmpfs_node *root;
101 	struct tmpfs_mount_info args;
102 	vm_pindex_t pages;
103 	vm_pindex_t pages_limit;
104 	ino_t nodes;
105 	u_int64_t	maxfsize;
106 	int error;
107 	/* Size counters. */
108 	ino_t	nodes_max;
109 	off_t	size_max;
110 	size_t	maxfsize_max;
111 	size_t	size;
112 
113 	/* Root node attributes. */
114 	uid_t	root_uid = cred->cr_uid;
115 	gid_t	root_gid = cred->cr_gid;
116 	mode_t	root_mode = (VREAD | VWRITE);
117 
118 	if (mp->mnt_flag & MNT_UPDATE) {
119 		/* XXX: There is no support yet to update file system
120 		 * settings.  Should be added. */
121 
122 		return EOPNOTSUPP;
123 	}
124 
125 	/*
126 	 * mount info
127 	 */
128 	bzero(&args, sizeof(args));
129 	size_max  = 0;
130 	nodes_max = 0;
131 	maxfsize_max = 0;
132 
133 	if (path) {
134 		if (data) {
135 			error = copyin(data, &args, sizeof(args));
136 			if (error)
137 				return (error);
138 		}
139 		size_max = args.ta_size_max;
140 		nodes_max = args.ta_nodes_max;
141 		maxfsize_max = args.ta_maxfsize_max;
142 		root_uid = args.ta_root_uid;
143 		root_gid = args.ta_root_gid;
144 		root_mode = args.ta_root_mode;
145 	}
146 
147 	/*
148 	 * If mount by non-root, then verify that user has necessary
149 	 * permissions on the device.
150 	 */
151 	if (cred->cr_uid != 0) {
152 		root_mode = VREAD;
153 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
154 			root_mode |= VWRITE;
155 	}
156 
157 	pages_limit = vm_swap_max + vmstats.v_page_count / 2;
158 
159 	if (size_max == 0) {
160 		pages = pages_limit / 2;
161 	} else if (size_max < PAGE_SIZE) {
162 		pages = 1;
163 	} else if (OFF_TO_IDX(size_max) > pages_limit) {
164 		/*
165 		 * do not force pages = pages_limit for this case, otherwise
166 		 * we might not honor tmpfs size requests from /etc/fstab
167 		 * during boot because they are mounted prior to swap being
168 		 * turned on.
169 		 */
170 		pages = OFF_TO_IDX(size_max);
171 	} else {
172 		pages = OFF_TO_IDX(size_max);
173 	}
174 
175 	if (nodes_max == 0)
176 		nodes = 3 + pages * PAGE_SIZE / 1024;
177 	else if (nodes_max < 3)
178 		nodes = 3;
179 	else if (nodes_max > pages)
180 		nodes = pages;
181 	else
182 		nodes = nodes_max;
183 
184 	maxfsize = 0x7FFFFFFFFFFFFFFFLLU - TMPFS_BLKSIZE;
185 	if (maxfsize_max != 0 && maxfsize > maxfsize_max)
186 		maxfsize = maxfsize_max;
187 
188 	/* Allocate the tmpfs mount structure and fill it. */
189 	tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO);
190 
191 	tmp->tm_mount = mp;
192 	tmp->tm_nodes_max = nodes;
193 	tmp->tm_nodes_inuse = 0;
194 	tmp->tm_maxfilesize = maxfsize;
195 	LIST_INIT(&tmp->tm_nodes_used);
196 
197 	tmp->tm_pages_max = pages;
198 	tmp->tm_pages_used = 0;
199 
200 	kmalloc_create_obj(&tmp->tm_node_zone, "tmpfs node",
201 			sizeof(struct tmpfs_node));
202 	kmalloc_create_obj(&tmp->tm_dirent_zone, "tmpfs dirent",
203 			sizeof(struct tmpfs_dirent));
204 	kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone");
205 
206 	kmalloc_obj_raise_limit(tmp->tm_node_zone,
207 				sizeof(struct tmpfs_node) * tmp->tm_nodes_max);
208 
209 	tmp->tm_ino = TMPFS_ROOTINO;
210 
211 	/* Allocate the root node. */
212 	error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid,
213 				 root_mode & ALLPERMS, NULL,
214 				 VNOVAL, VNOVAL, &root);
215 
216 	/*
217 	 * We are backed by swap, set snocache chflags flag so we
218 	 * don't trip over swapcache.
219 	 */
220 	root->tn_flags = SF_NOCACHE;
221 
222 	if (error != 0 || root == NULL) {
223 	    kmalloc_destroy(&tmp->tm_name_zone);
224 	    kmalloc_destroy(&tmp->tm_dirent_zone_obj);
225 	    kmalloc_destroy(&tmp->tm_node_zone_obj);
226 	    kfree(tmp, M_TMPFSMNT);
227 	    return error;
228 	}
229 	KASSERT(root->tn_id == TMPFS_ROOTINO,
230 		("tmpfs root with invalid ino: %ju", (uintmax_t)root->tn_id));
231 
232 	atomic_add_int(&root->tn_links, 1);	/* keep around */
233 	tmp->tm_root = root;
234 
235 	mp->mnt_flag |= MNT_LOCAL;
236 	mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;
237 	mp->mnt_kern_flag |= MNTK_NOMSYNC;
238 	mp->mnt_kern_flag |= MNTK_THR_SYNC;	/* new vsyncscan semantics */
239 	mp->mnt_kern_flag |= MNTK_QUICKHALT;	/* no teardown needed on halt */
240 	mp->mnt_data = (qaddr_t)tmp;
241 	mp->mnt_iosize_max = MAXBSIZE;
242 	vfs_getnewfsid(mp);
243 
244 	vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops);
245 	vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops);
246 
247 	copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
248 	bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size);
249 	bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname));
250 	copyinstr(path, mp->mnt_stat.f_mntonname,
251 		  sizeof(mp->mnt_stat.f_mntonname) -1,
252 		  &size);
253 
254 	tmpfs_statfs(mp, &mp->mnt_stat, cred);
255 
256 	return 0;
257 }
258 
259 /* --------------------------------------------------------------------- */
260 
261 /* ARGSUSED2 */
262 static int
263 tmpfs_unmount(struct mount *mp, int mntflags)
264 {
265 	int error;
266 	int flags = 0;
267 	struct tmpfs_mount *tmp;
268 	struct tmpfs_node *node;
269 	struct vnode *vp;
270 	int isok;
271 
272 	tmp = VFS_TO_TMPFS(mp);
273 	TMPFS_LOCK(tmp);
274 
275 	/* Handle forced unmounts. */
276 	if (mntflags & MNT_FORCE)
277 		flags |= FORCECLOSE;
278 
279 	/*
280 	 * Finalize all pending I/O.  In the case of tmpfs we want
281 	 * to throw all the data away so clean out the buffer cache
282 	 * and vm objects before calling vflush().
283 	 */
284 	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
285 		/*
286 		 * tn_links is mnt_token protected
287 		 */
288 		atomic_add_int(&node->tn_links, 1);
289 		TMPFS_NODE_LOCK(node);
290 
291 		while (node->tn_type == VREG && node->tn_vnode) {
292 			vp = node->tn_vnode;
293 			vhold(vp);
294 			TMPFS_NODE_UNLOCK(node);
295 			lwkt_yield();
296 
297 			/*
298 			 * vx_get/vx_put and tmpfs_truncate may block,
299 			 * releasing the tmpfs mountpoint token.
300 			 *
301 			 * Make sure the lock order is correct.
302 			 */
303 			vx_get(vp);		/* held vnode */
304 			TMPFS_NODE_LOCK(node);
305 			if (node->tn_vnode == vp) {
306 				tmpfs_truncate(vp, 0);
307 				isok = 1;
308 			} else {
309 				isok = 0;
310 			}
311 			TMPFS_NODE_UNLOCK(node);
312 			vx_put(vp);
313 			vdrop(vp);
314 			TMPFS_NODE_LOCK(node);
315 			if (isok)
316 				break;
317 			/* retry */
318 		}
319 
320 		TMPFS_NODE_UNLOCK(node);
321 		atomic_add_int(&node->tn_links, -1);
322 	}
323 
324 	/*
325 	 * Flush all vnodes on the unmount.
326 	 *
327 	 * If we fail to flush, we cannot unmount, but all the nodes have
328 	 * already been truncated. Erroring out is the best we can do.
329 	 */
330 	error = vflush(mp, 0, flags);
331 	if (error != 0) {
332 		TMPFS_UNLOCK(tmp);
333 		return (error);
334 	}
335 
336 	/*
337 	 * First pass get rid of all the directory entries and
338 	 * vnode associations.  This will also destroy the
339 	 * directory topology and should drop all link counts
340 	 * to 0 except for the root.
341 	 *
342 	 * No vnodes should remain after the vflush above.
343 	 */
344 	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
345 		lwkt_yield();
346 
347 		atomic_add_int(&node->tn_links, 1);
348 		TMPFS_NODE_LOCK(node);
349 		if (node->tn_type == VDIR) {
350 			struct tmpfs_dirent *de;
351 
352 			while ((de = RB_ROOT(&node->tn_dir.tn_dirtree)) != NULL)			{
353 				tmpfs_dir_detach_locked(node, de);
354 				tmpfs_free_dirent(tmp, de);
355 			}
356 		}
357 		KKASSERT(node->tn_vnode == NULL);
358 
359 		TMPFS_NODE_UNLOCK(node);
360 		atomic_add_int(&node->tn_links, -1);
361 	}
362 
363 	/*
364 	 * Allow the root node to be destroyed by dropping the link count
365 	 * we bumped in the mount code.
366 	 */
367 	KKASSERT(tmp->tm_root);
368 	TMPFS_NODE_LOCK(tmp->tm_root);
369 	atomic_add_int(&tmp->tm_root->tn_links, -1);
370 	TMPFS_NODE_UNLOCK(tmp->tm_root);
371 
372 	/*
373 	 * At this point all nodes, including the root node, should have a
374 	 * link count of 0.  The root is not necessarily going to be last.
375 	 */
376 	while ((node = LIST_FIRST(&tmp->tm_nodes_used)) != NULL) {
377 		if (node->tn_links) {
378 			panic("tmpfs: Dangling nodes during umount (%p)!\n",
379 			      node);
380 		}
381 
382 		TMPFS_NODE_LOCK(node);
383 		tmpfs_free_node(tmp, node);
384 		/* eats lock */
385 		lwkt_yield();
386 	}
387 	KKASSERT(tmp->tm_root == NULL);
388 
389 	kmalloc_destroy(&tmp->tm_name_zone);
390 	kmalloc_destroy(&tmp->tm_dirent_zone_obj);
391 	kmalloc_destroy(&tmp->tm_node_zone_obj);
392 
393 	tmp->tm_node_zone_obj = NULL;
394 	tmp->tm_dirent_zone_obj = NULL;
395 
396 	KKASSERT(tmp->tm_pages_used == 0);
397 	KKASSERT(tmp->tm_nodes_inuse == 0);
398 
399 	TMPFS_UNLOCK(tmp);
400 
401 	/* Throw away the tmpfs_mount structure. */
402 	kfree(tmp, M_TMPFSMNT);
403 	mp->mnt_data = NULL;
404 
405 	mp->mnt_flag &= ~MNT_LOCAL;
406 	return 0;
407 }
408 
409 /* --------------------------------------------------------------------- */
410 
411 static int
412 tmpfs_root(struct mount *mp, struct vnode **vpp)
413 {
414 	struct tmpfs_mount *tmp;
415 	int error;
416 
417 	tmp = VFS_TO_TMPFS(mp);
418 	if (tmp->tm_root == NULL) {
419 		kprintf("tmpfs_root: called without root node %p\n", mp);
420 		print_backtrace(-1);
421 		*vpp = NULL;
422 		error = EINVAL;
423 	} else {
424 		error = tmpfs_alloc_vp(mp, NULL, tmp->tm_root,
425 				       LK_EXCLUSIVE, vpp);
426 		(*vpp)->v_flag |= VROOT;
427 		(*vpp)->v_type = VDIR;
428 	}
429 	return error;
430 }
431 
432 /* --------------------------------------------------------------------- */
433 
434 static int
435 tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp,
436 	     struct vnode **vpp)
437 {
438 	boolean_t found;
439 	struct tmpfs_fid *tfhp;
440 	struct tmpfs_mount *tmp;
441 	struct tmpfs_node *node;
442 	int rc;
443 
444 	tmp = VFS_TO_TMPFS(mp);
445 
446 	tfhp = (struct tmpfs_fid *) fhp;
447 	if (tfhp->tf_len != sizeof(struct tmpfs_fid))
448 		return EINVAL;
449 
450 	rc = EINVAL;
451 	found = FALSE;
452 
453 	TMPFS_LOCK(tmp);
454 	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
455 		if (node->tn_id == tfhp->tf_id &&
456 		    node->tn_gen == tfhp->tf_gen) {
457 			found = TRUE;
458 			break;
459 		}
460 	}
461 
462 	if (found)
463 		rc = tmpfs_alloc_vp(mp, NULL, node, LK_EXCLUSIVE, vpp);
464 
465 	TMPFS_UNLOCK(tmp);
466 
467 	return (rc);
468 }
469 
470 /* --------------------------------------------------------------------- */
471 
472 /* ARGSUSED2 */
473 static int
474 tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
475 {
476 	fsfilcnt_t freenodes;
477 	struct tmpfs_mount *tmp;
478 
479 	tmp = VFS_TO_TMPFS(mp);
480 
481 	/* TMPFS_LOCK(tmp); not really needed */
482 
483 	sbp->f_iosize = PAGE_SIZE;
484 	sbp->f_bsize = PAGE_SIZE;
485 
486 	sbp->f_blocks = tmp->tm_pages_max;
487 	sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used;
488 	sbp->f_bfree = sbp->f_bavail;
489 
490 	freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse;
491 
492 	sbp->f_files = freenodes + tmp->tm_nodes_inuse;
493 	sbp->f_ffree = freenodes;
494 	sbp->f_owner = tmp->tm_root->tn_uid;
495 
496 	/* TMPFS_UNLOCK(tmp); */
497 
498 	return 0;
499 }
500 
501 /* --------------------------------------------------------------------- */
502 
503 static int
504 tmpfs_vptofh(struct vnode *vp, struct fid *fhp)
505 {
506 	struct tmpfs_node *node;
507 	struct tmpfs_fid tfh;
508 	node = VP_TO_TMPFS_NODE(vp);
509 	memset(&tfh, 0, sizeof(tfh));
510 	tfh.tf_len = sizeof(struct tmpfs_fid);
511 	tfh.tf_gen = node->tn_gen;
512 	tfh.tf_id = node->tn_id;
513 	memcpy(fhp, &tfh, sizeof(tfh));
514 	return (0);
515 }
516 
517 /* --------------------------------------------------------------------- */
518 
519 static int
520 tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp,
521 	       struct ucred **credanonp)
522 {
523 	struct tmpfs_mount *tmp;
524 	struct netcred *nc;
525 
526 	tmp = (struct tmpfs_mount *) mp->mnt_data;
527 	nc = vfs_export_lookup(mp, &tmp->tm_export, nam);
528 	if (nc == NULL)
529 		return (EACCES);
530 
531 	*exflagsp = nc->netc_exflags;
532 	*credanonp = &nc->netc_anon;
533 
534 	return (0);
535 }
536 
537 /* --------------------------------------------------------------------- */
538 
539 /*
540  * tmpfs vfs operations.
541  */
542 
543 static struct vfsops tmpfs_vfsops = {
544 	.vfs_flags =			0,
545 	.vfs_mount =			tmpfs_mount,
546 	.vfs_unmount =			tmpfs_unmount,
547 	.vfs_root =			tmpfs_root,
548 	.vfs_statfs =			tmpfs_statfs,
549 	.vfs_fhtovp =			tmpfs_fhtovp,
550 	.vfs_vptofh =			tmpfs_vptofh,
551 	.vfs_checkexp =			tmpfs_checkexp,
552 };
553 
554 VFS_SET(tmpfs_vfsops, tmpfs, VFCF_MPSAFE);
555 MODULE_VERSION(tmpfs, 1);
556