xref: /dragonfly/sys/vfs/tmpfs/tmpfs_vfsops.c (revision 029e6489)
1 /*	$NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $	*/
2 
3 /*-
4  * Copyright (c) 2005 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Julio M. Merino Vidal, developed as part of Google's Summer of Code
9  * 2005 program.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Efficient memory file system.
35  *
36  * tmpfs is a file system that uses virtual memory to store file data and
37  * metadata efficiently. It does not follow the structure of an on-disk
38  * file system because it simply does not need to. Instead, it uses
39  * memory-specific data structures and algorithms to automatically
40  * allocate and release resources.
41  */
42 
43 #include <sys/conf.h>
44 #include <sys/param.h>
45 #include <sys/limits.h>
46 #include <sys/lock.h>
47 #include <sys/kernel.h>
48 #include <sys/stat.h>
49 #include <sys/systm.h>
50 #include <sys/sysctl.h>
51 #include <sys/objcache.h>
52 
53 #include <vm/vm.h>
54 #include <vm/vm_object.h>
55 #include <vm/vm_param.h>
56 
57 #if 0
58 #include <vfs/tmpfs/tmpfs.h>
59 #endif
60 #include "tmpfs.h"
61 #include <vfs/tmpfs/tmpfs_vnops.h>
62 #include <vfs/tmpfs/tmpfs_mount.h>
63 
64 /*
65  * Default permission for root node
66  */
67 #define TMPFS_DEFAULT_ROOT_MODE	(S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
68 
69 MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures");
70 
71 /* --------------------------------------------------------------------- */
72 
73 static int	tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *);
74 static int	tmpfs_unmount(struct mount *, int);
75 static int	tmpfs_root(struct mount *, struct vnode **);
76 static int	tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **);
77 static int	tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred);
78 
79 /* --------------------------------------------------------------------- */
80 boolean_t
81 tmpfs_node_ctor(void *obj, void *privdata, int flags)
82 {
83 	struct tmpfs_node *node = obj;
84 
85 	node->tn_gen++;
86 	node->tn_size = 0;
87 	node->tn_status = 0;
88 	node->tn_flags = 0;
89 	node->tn_links = 0;
90 	node->tn_vnode = NULL;
91 	node->tn_vpstate = 0;
92 	bzero(&node->tn_spec, sizeof(node->tn_spec));
93 
94 	return (TRUE);
95 }
96 
97 static void
98 tmpfs_node_dtor(void *obj, void *privdata)
99 {
100 	struct tmpfs_node *node = (struct tmpfs_node *)obj;
101 	node->tn_type = VNON;
102 	node->tn_vpstate = TMPFS_VNODE_DOOMED;
103 }
104 
105 static void *
106 tmpfs_node_init(void *args, int flags)
107 {
108 	struct tmpfs_node *node;
109 
110 	node = objcache_malloc_alloc(args, flags);
111 	if (node == NULL)
112 		return (NULL);
113 	node->tn_id = 0;
114 	node->tn_blksize = PAGE_SIZE;	/* start small */
115 
116 	lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE);
117 	node->tn_gen = karc4random();
118 
119 	return node;
120 }
121 
122 static void
123 tmpfs_node_fini(void *obj, void *args)
124 {
125 	struct tmpfs_node *node = (struct tmpfs_node *)obj;
126 	lockuninit(&node->tn_interlock);
127 	objcache_malloc_free(obj, args);
128 }
129 
130 static int
131 tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred)
132 {
133 	struct tmpfs_mount *tmp;
134 	struct tmpfs_node *root;
135 	struct tmpfs_mount_info args;
136 	vm_pindex_t pages;
137 	vm_pindex_t pages_limit;
138 	ino_t nodes;
139 	u_int64_t	maxfsize;
140 	int error;
141 	/* Size counters. */
142 	ino_t	nodes_max;
143 	off_t	size_max;
144 	size_t	maxfsize_max;
145 	size_t	size;
146 
147 	/* Root node attributes. */
148 	uid_t	root_uid = cred->cr_uid;
149 	gid_t	root_gid = cred->cr_gid;
150 	mode_t	root_mode = (VREAD | VWRITE);
151 
152 	if (mp->mnt_flag & MNT_UPDATE) {
153 		/* XXX: There is no support yet to update file system
154 		 * settings.  Should be added. */
155 
156 		return EOPNOTSUPP;
157 	}
158 
159 	/*
160 	 * mount info
161 	 */
162 	bzero(&args, sizeof(args));
163 	size_max  = 0;
164 	nodes_max = 0;
165 	maxfsize_max = 0;
166 
167 	if (path) {
168 		if (data) {
169 			error = copyin(data, &args, sizeof(args));
170 			if (error)
171 				return (error);
172 		}
173 		size_max = args.ta_size_max;
174 		nodes_max = args.ta_nodes_max;
175 		maxfsize_max = args.ta_maxfsize_max;
176 		root_uid = args.ta_root_uid;
177 		root_gid = args.ta_root_gid;
178 		root_mode = args.ta_root_mode;
179 	}
180 
181 	/*
182 	 * If mount by non-root, then verify that user has necessary
183 	 * permissions on the device.
184 	 */
185 	if (cred->cr_uid != 0) {
186 		root_mode = VREAD;
187 		if ((mp->mnt_flag & MNT_RDONLY) == 0)
188 			root_mode |= VWRITE;
189 	}
190 
191 	pages_limit = vm_swap_max + vmstats.v_page_count / 2;
192 
193 	if (size_max == 0) {
194 		pages = pages_limit / 2;
195 	} else if (size_max < PAGE_SIZE) {
196 		pages = 1;
197 	} else if (OFF_TO_IDX(size_max) > pages_limit) {
198 		/*
199 		 * do not force pages = pages_limit for this case, otherwise
200 		 * we might not honor tmpfs size requests from /etc/fstab
201 		 * during boot because they are mounted prior to swap being
202 		 * turned on.
203 		 */
204 		pages = OFF_TO_IDX(size_max);
205 	} else {
206 		pages = OFF_TO_IDX(size_max);
207 	}
208 
209 	if (nodes_max == 0)
210 		nodes = 3 + pages * PAGE_SIZE / 1024;
211 	else if (nodes_max < 3)
212 		nodes = 3;
213 	else if (nodes_max > pages)
214 		nodes = pages;
215 	else
216 		nodes = nodes_max;
217 
218 	maxfsize = 0x7FFFFFFFFFFFFFFFLLU - TMPFS_BLKSIZE;
219 	if (maxfsize_max != 0 && maxfsize > maxfsize_max)
220 		maxfsize = maxfsize_max;
221 
222 	/* Allocate the tmpfs mount structure and fill it. */
223 	tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO);
224 
225 	tmp->tm_mount = mp;
226 	tmp->tm_nodes_max = nodes;
227 	tmp->tm_nodes_inuse = 0;
228 	tmp->tm_maxfilesize = maxfsize;
229 	LIST_INIT(&tmp->tm_nodes_used);
230 
231 	tmp->tm_pages_max = pages;
232 	tmp->tm_pages_used = 0;
233 
234 	kmalloc_create(&tmp->tm_node_zone, "tmpfs node");
235 	kmalloc_create(&tmp->tm_dirent_zone, "tmpfs dirent");
236 	kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone");
237 
238 	kmalloc_raise_limit(tmp->tm_node_zone, sizeof(struct tmpfs_node) *
239 			    tmp->tm_nodes_max);
240 
241 	tmp->tm_node_zone_malloc_args.objsize = sizeof(struct tmpfs_node);
242 	tmp->tm_node_zone_malloc_args.mtype = tmp->tm_node_zone;
243 
244 	tmp->tm_dirent_zone_malloc_args.objsize = sizeof(struct tmpfs_dirent);
245 	tmp->tm_dirent_zone_malloc_args.mtype = tmp->tm_dirent_zone;
246 
247 	tmp->tm_dirent_pool =  objcache_create( "tmpfs dirent cache",
248 	    0, 0,
249 	    NULL, NULL, NULL,
250 	    objcache_malloc_alloc, objcache_malloc_free,
251 	    &tmp->tm_dirent_zone_malloc_args);
252 	tmp->tm_node_pool = objcache_create( "tmpfs node cache",
253 	    0, 0,
254 	    tmpfs_node_ctor, tmpfs_node_dtor, NULL,
255 	    tmpfs_node_init, tmpfs_node_fini,
256 	    &tmp->tm_node_zone_malloc_args);
257 
258 	tmp->tm_ino = TMPFS_ROOTINO;
259 
260 	/* Allocate the root node. */
261 	error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid,
262 				 root_mode & ALLPERMS, NULL,
263 				 VNOVAL, VNOVAL, &root);
264 
265 	/*
266 	 * We are backed by swap, set snocache chflags flag so we
267 	 * don't trip over swapcache.
268 	 */
269 	root->tn_flags = SF_NOCACHE;
270 
271 	if (error != 0 || root == NULL) {
272 	    objcache_destroy(tmp->tm_node_pool);
273 	    objcache_destroy(tmp->tm_dirent_pool);
274 	    kfree(tmp, M_TMPFSMNT);
275 	    return error;
276 	}
277 	KASSERT(root->tn_id == TMPFS_ROOTINO,
278 		("tmpfs root with invalid ino: %ju", (uintmax_t)root->tn_id));
279 
280 	atomic_add_int(&root->tn_links, 1);	/* keep around */
281 	tmp->tm_root = root;
282 
283 	mp->mnt_flag |= MNT_LOCAL;
284 	mp->mnt_kern_flag |= MNTK_ALL_MPSAFE;
285 	mp->mnt_kern_flag |= MNTK_NOMSYNC;
286 	mp->mnt_kern_flag |= MNTK_THR_SYNC;	/* new vsyncscan semantics */
287 	mp->mnt_kern_flag |= MNTK_QUICKHALT;	/* no teardown needed on halt */
288 	mp->mnt_data = (qaddr_t)tmp;
289 	mp->mnt_iosize_max = MAXBSIZE;
290 	vfs_getnewfsid(mp);
291 
292 	vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops);
293 	vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops);
294 
295 	copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
296 	bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size);
297 	bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname));
298 	copyinstr(path, mp->mnt_stat.f_mntonname,
299 		  sizeof(mp->mnt_stat.f_mntonname) -1,
300 		  &size);
301 
302 	tmpfs_statfs(mp, &mp->mnt_stat, cred);
303 
304 	return 0;
305 }
306 
307 /* --------------------------------------------------------------------- */
308 
309 /* ARGSUSED2 */
310 static int
311 tmpfs_unmount(struct mount *mp, int mntflags)
312 {
313 	int error;
314 	int flags = 0;
315 	struct tmpfs_mount *tmp;
316 	struct tmpfs_node *node;
317 	struct vnode *vp;
318 	int isok;
319 
320 	tmp = VFS_TO_TMPFS(mp);
321 	TMPFS_LOCK(tmp);
322 
323 	/* Handle forced unmounts. */
324 	if (mntflags & MNT_FORCE)
325 		flags |= FORCECLOSE;
326 
327 	/*
328 	 * Finalize all pending I/O.  In the case of tmpfs we want
329 	 * to throw all the data away so clean out the buffer cache
330 	 * and vm objects before calling vflush().
331 	 */
332 	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
333 		/*
334 		 * tn_links is mnt_token protected
335 		 */
336 		atomic_add_int(&node->tn_links, 1);
337 		TMPFS_NODE_LOCK(node);
338 
339 		while (node->tn_type == VREG && node->tn_vnode) {
340 			vp = node->tn_vnode;
341 			vhold(vp);
342 			TMPFS_NODE_UNLOCK(node);
343 			lwkt_yield();
344 
345 			/*
346 			 * vx_get/vx_put and tmpfs_truncate may block,
347 			 * releasing the tmpfs mountpoint token.
348 			 *
349 			 * Make sure the lock order is correct.
350 			 */
351 			vx_get(vp);		/* held vnode */
352 			TMPFS_NODE_LOCK(node);
353 			if (node->tn_vnode == vp) {
354 				tmpfs_truncate(vp, 0);
355 				isok = 1;
356 			} else {
357 				isok = 0;
358 			}
359 			TMPFS_NODE_UNLOCK(node);
360 			vx_put(vp);
361 			vdrop(vp);
362 			TMPFS_NODE_LOCK(node);
363 			if (isok)
364 				break;
365 			/* retry */
366 		}
367 
368 		TMPFS_NODE_UNLOCK(node);
369 		atomic_add_int(&node->tn_links, -1);
370 	}
371 
372 	/*
373 	 * Flush all vnodes on the unmount.
374 	 *
375 	 * If we fail to flush, we cannot unmount, but all the nodes have
376 	 * already been truncated. Erroring out is the best we can do.
377 	 */
378 	error = vflush(mp, 0, flags);
379 	if (error != 0) {
380 		TMPFS_UNLOCK(tmp);
381 		return (error);
382 	}
383 
384 	/*
385 	 * First pass get rid of all the directory entries and
386 	 * vnode associations.  This will also destroy the
387 	 * directory topology and should drop all link counts
388 	 * to 0 except for the root.
389 	 *
390 	 * No vnodes should remain after the vflush above.
391 	 */
392 	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
393 		lwkt_yield();
394 
395 		atomic_add_int(&node->tn_links, 1);
396 		TMPFS_NODE_LOCK(node);
397 		if (node->tn_type == VDIR) {
398 			struct tmpfs_dirent *de;
399 
400 			while ((de = RB_ROOT(&node->tn_dir.tn_dirtree)) != NULL)			{
401 				tmpfs_dir_detach_locked(node, de);
402 				tmpfs_free_dirent(tmp, de);
403 			}
404 		}
405 		KKASSERT(node->tn_vnode == NULL);
406 
407 		TMPFS_NODE_UNLOCK(node);
408 		atomic_add_int(&node->tn_links, -1);
409 	}
410 
411 	/*
412 	 * Allow the root node to be destroyed by dropping the link count
413 	 * we bumped in the mount code.
414 	 */
415 	KKASSERT(tmp->tm_root);
416 	TMPFS_NODE_LOCK(tmp->tm_root);
417 	atomic_add_int(&tmp->tm_root->tn_links, -1);
418 	TMPFS_NODE_UNLOCK(tmp->tm_root);
419 
420 	/*
421 	 * At this point all nodes, including the root node, should have a
422 	 * link count of 0.  The root is not necessarily going to be last.
423 	 */
424 	while ((node = LIST_FIRST(&tmp->tm_nodes_used)) != NULL) {
425 		if (node->tn_links)
426 			panic("tmpfs: Dangling nodes during umount (%p)!\n",
427 			      node);
428 
429 		TMPFS_NODE_LOCK(node);
430 		tmpfs_free_node(tmp, node);
431 		/* eats lock */
432 		lwkt_yield();
433 	}
434 	KKASSERT(tmp->tm_root == NULL);
435 
436 	objcache_destroy(tmp->tm_dirent_pool);
437 	objcache_destroy(tmp->tm_node_pool);
438 
439 	kmalloc_destroy(&tmp->tm_name_zone);
440 	kmalloc_destroy(&tmp->tm_dirent_zone);
441 	kmalloc_destroy(&tmp->tm_node_zone);
442 
443 	tmp->tm_node_zone = tmp->tm_dirent_zone = NULL;
444 
445 	KKASSERT(tmp->tm_pages_used == 0);
446 	KKASSERT(tmp->tm_nodes_inuse == 0);
447 
448 	TMPFS_UNLOCK(tmp);
449 
450 	/* Throw away the tmpfs_mount structure. */
451 	kfree(tmp, M_TMPFSMNT);
452 	mp->mnt_data = NULL;
453 
454 	mp->mnt_flag &= ~MNT_LOCAL;
455 	return 0;
456 }
457 
458 /* --------------------------------------------------------------------- */
459 
460 static int
461 tmpfs_root(struct mount *mp, struct vnode **vpp)
462 {
463 	struct tmpfs_mount *tmp;
464 	int error;
465 
466 	tmp = VFS_TO_TMPFS(mp);
467 	if (tmp->tm_root == NULL) {
468 		kprintf("tmpfs_root: called without root node %p\n", mp);
469 		print_backtrace(-1);
470 		*vpp = NULL;
471 		error = EINVAL;
472 	} else {
473 		error = tmpfs_alloc_vp(mp, NULL, tmp->tm_root,
474 				       LK_EXCLUSIVE, vpp);
475 		(*vpp)->v_flag |= VROOT;
476 		(*vpp)->v_type = VDIR;
477 	}
478 	return error;
479 }
480 
481 /* --------------------------------------------------------------------- */
482 
483 static int
484 tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp,
485 	     struct vnode **vpp)
486 {
487 	boolean_t found;
488 	struct tmpfs_fid *tfhp;
489 	struct tmpfs_mount *tmp;
490 	struct tmpfs_node *node;
491 	int rc;
492 
493 	tmp = VFS_TO_TMPFS(mp);
494 
495 	tfhp = (struct tmpfs_fid *) fhp;
496 	if (tfhp->tf_len != sizeof(struct tmpfs_fid))
497 		return EINVAL;
498 
499 	rc = EINVAL;
500 	found = FALSE;
501 
502 	TMPFS_LOCK(tmp);
503 	LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) {
504 		if (node->tn_id == tfhp->tf_id &&
505 		    node->tn_gen == tfhp->tf_gen) {
506 			found = TRUE;
507 			break;
508 		}
509 	}
510 
511 	if (found)
512 		rc = tmpfs_alloc_vp(mp, NULL, node, LK_EXCLUSIVE, vpp);
513 
514 	TMPFS_UNLOCK(tmp);
515 
516 	return (rc);
517 }
518 
519 /* --------------------------------------------------------------------- */
520 
521 /* ARGSUSED2 */
522 static int
523 tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred)
524 {
525 	fsfilcnt_t freenodes;
526 	struct tmpfs_mount *tmp;
527 
528 	tmp = VFS_TO_TMPFS(mp);
529 
530 	/* TMPFS_LOCK(tmp); not really needed */
531 
532 	sbp->f_iosize = PAGE_SIZE;
533 	sbp->f_bsize = PAGE_SIZE;
534 
535 	sbp->f_blocks = tmp->tm_pages_max;
536 	sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used;
537 	sbp->f_bfree = sbp->f_bavail;
538 
539 	freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse;
540 
541 	sbp->f_files = freenodes + tmp->tm_nodes_inuse;
542 	sbp->f_ffree = freenodes;
543 	sbp->f_owner = tmp->tm_root->tn_uid;
544 
545 	/* TMPFS_UNLOCK(tmp); */
546 
547 	return 0;
548 }
549 
550 /* --------------------------------------------------------------------- */
551 
552 static int
553 tmpfs_vptofh(struct vnode *vp, struct fid *fhp)
554 {
555 	struct tmpfs_node *node;
556 	struct tmpfs_fid tfh;
557 	node = VP_TO_TMPFS_NODE(vp);
558 	memset(&tfh, 0, sizeof(tfh));
559 	tfh.tf_len = sizeof(struct tmpfs_fid);
560 	tfh.tf_gen = node->tn_gen;
561 	tfh.tf_id = node->tn_id;
562 	memcpy(fhp, &tfh, sizeof(tfh));
563 	return (0);
564 }
565 
566 /* --------------------------------------------------------------------- */
567 
568 static int
569 tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp,
570 	       struct ucred **credanonp)
571 {
572 	struct tmpfs_mount *tmp;
573 	struct netcred *nc;
574 
575 	tmp = (struct tmpfs_mount *) mp->mnt_data;
576 	nc = vfs_export_lookup(mp, &tmp->tm_export, nam);
577 	if (nc == NULL)
578 		return (EACCES);
579 
580 	*exflagsp = nc->netc_exflags;
581 	*credanonp = &nc->netc_anon;
582 
583 	return (0);
584 }
585 
586 /* --------------------------------------------------------------------- */
587 
588 /*
589  * tmpfs vfs operations.
590  */
591 
592 static struct vfsops tmpfs_vfsops = {
593 	.vfs_flags =			0,
594 	.vfs_mount =			tmpfs_mount,
595 	.vfs_unmount =			tmpfs_unmount,
596 	.vfs_root =			tmpfs_root,
597 	.vfs_statfs =			tmpfs_statfs,
598 	.vfs_fhtovp =			tmpfs_fhtovp,
599 	.vfs_vptofh =			tmpfs_vptofh,
600 	.vfs_checkexp =			tmpfs_checkexp,
601 };
602 
603 VFS_SET(tmpfs_vfsops, tmpfs, VFCF_MPSAFE);
604 MODULE_VERSION(tmpfs, 1);
605