1 /* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system. 35 * 36 * tmpfs is a file system that uses NetBSD's virtual memory sub-system 37 * (the well-known UVM) to store file data and metadata in an efficient 38 * way. This means that it does not follow the structure of an on-disk 39 * file system because it simply does not need to. Instead, it uses 40 * memory-specific data structures and algorithms to automatically 41 * allocate and release resources. 42 */ 43 44 #include <sys/conf.h> 45 #include <sys/param.h> 46 #include <sys/limits.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/kernel.h> 50 #include <sys/stat.h> 51 #include <sys/systm.h> 52 #include <sys/sysctl.h> 53 #include <sys/objcache.h> 54 55 #include <vm/vm.h> 56 #include <vm/vm_object.h> 57 #include <vm/vm_param.h> 58 59 #include <vfs/tmpfs/tmpfs.h> 60 #include <vfs/tmpfs/tmpfs_vnops.h> 61 #include <vfs/tmpfs/tmpfs_args.h> 62 63 /* 64 * Default permission for root node 65 */ 66 #define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 67 68 MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures"); 69 70 /* --------------------------------------------------------------------- */ 71 72 static int tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *); 73 static int tmpfs_unmount(struct mount *, int); 74 static int tmpfs_root(struct mount *, struct vnode **); 75 static int tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **); 76 static int tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred); 77 78 /* --------------------------------------------------------------------- */ 79 int 80 tmpfs_node_ctor(void *obj, void *privdata, int flags) 81 { 82 struct tmpfs_node *node = (struct tmpfs_node *)obj; 83 84 node->tn_gen++; 85 node->tn_size = 0; 86 node->tn_status = 0; 87 node->tn_flags = 0; 88 node->tn_links = 0; 89 node->tn_vnode = NULL; 90 node->tn_vpstate = TMPFS_VNODE_WANT; 91 bzero(&node->tn_spec, sizeof(node->tn_spec)); 92 93 return (1); 94 } 95 96 static void 97 tmpfs_node_dtor(void *obj, void *privdata) 98 { 99 struct tmpfs_node *node = (struct tmpfs_node *)obj; 100 node->tn_type = VNON; 101 node->tn_vpstate = TMPFS_VNODE_DOOMED; 102 } 103 104 static void* 105 tmpfs_node_init(void *args, int flags) 106 { 107 struct tmpfs_node *node = (struct tmpfs_node *)objcache_malloc_alloc(args, flags); 108 if (node == NULL) 109 return (NULL); 110 node->tn_id = 0; 111 112 lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE); 113 node->tn_gen = karc4random(); 114 115 return node; 116 } 117 118 static void 119 tmpfs_node_fini(void *obj, void *args) 120 { 121 struct tmpfs_node *node = (struct tmpfs_node *)obj; 122 lockuninit(&node->tn_interlock); 123 objcache_malloc_free(obj, args); 124 } 125 126 static int 127 tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 128 { 129 struct tmpfs_mount *tmp; 130 struct tmpfs_node *root; 131 struct tmpfs_args args; 132 vm_pindex_t pages; 133 vm_pindex_t pages_limit; 134 ino_t nodes; 135 u_int64_t maxfsize; 136 int error; 137 /* Size counters. */ 138 ino_t nodes_max; 139 off_t size_max; 140 size_t maxfsize_max; 141 size_t size; 142 143 /* Root node attributes. */ 144 uid_t root_uid = cred->cr_uid; 145 gid_t root_gid = cred->cr_gid; 146 mode_t root_mode = (VREAD | VWRITE); 147 148 if (mp->mnt_flag & MNT_UPDATE) { 149 /* XXX: There is no support yet to update file system 150 * settings. Should be added. */ 151 152 return EOPNOTSUPP; 153 } 154 155 /* 156 * mount info 157 */ 158 bzero(&args, sizeof(args)); 159 size_max = 0; 160 nodes_max = 0; 161 maxfsize_max = 0; 162 163 if (path) { 164 if (data) { 165 error = copyin(data, &args, sizeof(args)); 166 if (error) 167 return (error); 168 } 169 size_max = args.ta_size_max; 170 nodes_max = args.ta_nodes_max; 171 maxfsize_max = args.ta_maxfsize_max; 172 root_uid = args.ta_root_uid; 173 root_gid = args.ta_root_gid; 174 root_mode = args.ta_root_mode; 175 } 176 177 /* 178 * If mount by non-root, then verify that user has necessary 179 * permissions on the device. 180 */ 181 if (cred->cr_uid != 0) { 182 root_mode = VREAD; 183 if ((mp->mnt_flag & MNT_RDONLY) == 0) 184 root_mode |= VWRITE; 185 } 186 187 pages_limit = vm_swap_max + vmstats.v_page_count / 2; 188 189 if (size_max == 0) { 190 pages = pages_limit / 2; 191 } else if (size_max < PAGE_SIZE) { 192 pages = 1; 193 } else if (OFF_TO_IDX(size_max) > pages_limit) { 194 /* 195 * do not force pages = pages_limit for this case, otherwise 196 * we might not honor tmpfs size requests from /etc/fstab 197 * during boot because they are mounted prior to swap being 198 * turned on. 199 */ 200 pages = OFF_TO_IDX(size_max); 201 } else { 202 pages = OFF_TO_IDX(size_max); 203 } 204 205 if (nodes_max == 0) 206 nodes = 3 + pages * PAGE_SIZE / 1024; 207 else if (nodes_max < 3) 208 nodes = 3; 209 else if (nodes_max > pages) 210 nodes = pages; 211 else 212 nodes = nodes_max; 213 214 maxfsize = IDX_TO_OFF(pages_limit); 215 if (maxfsize_max != 0 && maxfsize > maxfsize_max) 216 maxfsize = maxfsize_max; 217 218 /* Allocate the tmpfs mount structure and fill it. */ 219 tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO); 220 221 lockinit(&(tmp->allnode_lock), "tmpfs allnode lock", 0, LK_CANRECURSE); 222 tmp->tm_nodes_max = nodes; 223 tmp->tm_nodes_inuse = 0; 224 tmp->tm_maxfilesize = maxfsize; 225 LIST_INIT(&tmp->tm_nodes_used); 226 227 tmp->tm_pages_max = pages; 228 tmp->tm_pages_used = 0; 229 230 kmalloc_create(&tmp->tm_node_zone, "tmpfs node"); 231 kmalloc_create(&tmp->tm_dirent_zone, "tmpfs dirent"); 232 kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone"); 233 234 kmalloc_raise_limit(tmp->tm_node_zone, sizeof(struct tmpfs_node) * 235 tmp->tm_nodes_max); 236 237 tmp->tm_node_zone_malloc_args.objsize = sizeof(struct tmpfs_node); 238 tmp->tm_node_zone_malloc_args.mtype = tmp->tm_node_zone; 239 240 tmp->tm_dirent_zone_malloc_args.objsize = sizeof(struct tmpfs_dirent); 241 tmp->tm_dirent_zone_malloc_args.mtype = tmp->tm_dirent_zone; 242 243 tmp->tm_dirent_pool = objcache_create( "tmpfs dirent cache", 244 0, 0, 245 NULL, NULL, NULL, 246 objcache_malloc_alloc, objcache_malloc_free, 247 &tmp->tm_dirent_zone_malloc_args); 248 tmp->tm_node_pool = objcache_create( "tmpfs node cache", 249 0, 0, 250 tmpfs_node_ctor, tmpfs_node_dtor, NULL, 251 tmpfs_node_init, tmpfs_node_fini, 252 &tmp->tm_node_zone_malloc_args); 253 254 tmp->tm_ino = 2; 255 256 /* Allocate the root node. */ 257 error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid, 258 root_mode & ALLPERMS, NULL, NULL, 259 VNOVAL, VNOVAL, &root); 260 261 /* 262 * We are backed by swap, set snocache chflags flag so we 263 * don't trip over swapcache. 264 */ 265 root->tn_flags = SF_NOCACHE; 266 267 if (error != 0 || root == NULL) { 268 objcache_destroy(tmp->tm_node_pool); 269 objcache_destroy(tmp->tm_dirent_pool); 270 kfree(tmp, M_TMPFSMNT); 271 return error; 272 } 273 KASSERT(root->tn_id >= 0, ("tmpfs root with invalid ino: %d", (int)root->tn_id)); 274 tmp->tm_root = root; 275 276 mp->mnt_flag |= MNT_LOCAL; 277 #if 0 278 mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_WR_MPSAFE | MNTK_GA_MPSAFE | 279 MNTK_IN_MPSAFE | MNTK_SG_MPSAFE; 280 #endif 281 mp->mnt_kern_flag |= MNTK_RD_MPSAFE | MNTK_GA_MPSAFE | MNTK_SG_MPSAFE; 282 mp->mnt_kern_flag |= MNTK_WR_MPSAFE; 283 mp->mnt_kern_flag |= MNTK_NOMSYNC; 284 mp->mnt_data = (qaddr_t)tmp; 285 vfs_getnewfsid(mp); 286 287 288 vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops); 289 vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops); 290 291 copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 292 bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size); 293 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 294 copyinstr(path, mp->mnt_stat.f_mntonname, 295 sizeof(mp->mnt_stat.f_mntonname) -1, 296 &size); 297 298 tmpfs_statfs(mp, &mp->mnt_stat, cred); 299 300 return 0; 301 } 302 303 /* --------------------------------------------------------------------- */ 304 305 /* ARGSUSED2 */ 306 static int 307 tmpfs_unmount(struct mount *mp, int mntflags) 308 { 309 int error; 310 int flags = 0; 311 int found; 312 struct tmpfs_mount *tmp; 313 struct tmpfs_node *node; 314 315 /* Handle forced unmounts. */ 316 if (mntflags & MNT_FORCE) 317 flags |= FORCECLOSE; 318 319 tmp = VFS_TO_TMPFS(mp); 320 321 /* 322 * Finalize all pending I/O. In the case of tmpfs we want 323 * to throw all the data away so clean out the buffer cache 324 * and vm objects before calling vflush(). 325 */ 326 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 327 if (node->tn_type == VREG && node->tn_vnode) { 328 ++node->tn_links; 329 TMPFS_NODE_LOCK(node); 330 vx_get(node->tn_vnode); 331 tmpfs_truncate(node->tn_vnode, 0); 332 vx_put(node->tn_vnode); 333 TMPFS_NODE_UNLOCK(node); 334 --node->tn_links; 335 } 336 } 337 error = vflush(mp, 0, flags); 338 if (error != 0) 339 return error; 340 341 /* 342 * First pass get rid of all the directory entries and 343 * vnode associations. The directory structure will 344 * remain via the extra link count representing tn_dir.tn_parent. 345 * 346 * No vnodes should remain after the vflush above. 347 */ 348 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 349 ++node->tn_links; 350 TMPFS_NODE_LOCK(node); 351 if (node->tn_type == VDIR) { 352 struct tmpfs_dirent *de; 353 354 while (!TAILQ_EMPTY(&node->tn_dir.tn_dirhead)) { 355 de = TAILQ_FIRST(&node->tn_dir.tn_dirhead); 356 tmpfs_dir_detach(node, de); 357 tmpfs_free_dirent(tmp, de); 358 node->tn_size -= sizeof(struct tmpfs_dirent); 359 } 360 } 361 KKASSERT(node->tn_vnode == NULL); 362 #if 0 363 vp = node->tn_vnode; 364 if (vp != NULL) { 365 tmpfs_free_vp(vp); 366 vrecycle(vp); 367 node->tn_vnode = NULL; 368 } 369 #endif 370 TMPFS_NODE_UNLOCK(node); 371 --node->tn_links; 372 } 373 374 /* 375 * Now get rid of all nodes. We can remove any node with a 376 * link count of 0 or any directory node with a link count of 377 * 1. The parents will not be destroyed until all their children 378 * have been destroyed. 379 * 380 * Recursion in tmpfs_free_node() can further modify the list so 381 * we cannot use a next pointer here. 382 * 383 * The root node will be destroyed by this loop (it will be last). 384 */ 385 while (!LIST_EMPTY(&tmp->tm_nodes_used)) { 386 found = 0; 387 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 388 if (node->tn_links == 0 || 389 (node->tn_links == 1 && node->tn_type == VDIR)) { 390 TMPFS_NODE_LOCK(node); 391 tmpfs_free_node(tmp, node); 392 /* eats lock */ 393 found = 1; 394 break; 395 } 396 } 397 if (found == 0) { 398 kprintf("tmpfs: Cannot free entire node tree!"); 399 break; 400 } 401 } 402 403 KKASSERT(tmp->tm_root == NULL); 404 405 objcache_destroy(tmp->tm_dirent_pool); 406 objcache_destroy(tmp->tm_node_pool); 407 408 kmalloc_destroy(&tmp->tm_name_zone); 409 kmalloc_destroy(&tmp->tm_dirent_zone); 410 kmalloc_destroy(&tmp->tm_node_zone); 411 412 tmp->tm_node_zone = tmp->tm_dirent_zone = NULL; 413 414 lockuninit(&tmp->allnode_lock); 415 KKASSERT(tmp->tm_pages_used == 0); 416 KKASSERT(tmp->tm_nodes_inuse == 0); 417 418 /* Throw away the tmpfs_mount structure. */ 419 kfree(tmp, M_TMPFSMNT); 420 mp->mnt_data = NULL; 421 422 mp->mnt_flag &= ~MNT_LOCAL; 423 return 0; 424 } 425 426 /* --------------------------------------------------------------------- */ 427 428 static int 429 tmpfs_root(struct mount *mp, struct vnode **vpp) 430 { 431 struct tmpfs_mount *tmp; 432 int error; 433 434 tmp = VFS_TO_TMPFS(mp); 435 if (tmp->tm_root == NULL) { 436 kprintf("tmpfs_root: called without root node %p\n", mp); 437 print_backtrace(-1); 438 *vpp = NULL; 439 error = EINVAL; 440 } else { 441 error = tmpfs_alloc_vp(mp, tmp->tm_root, LK_EXCLUSIVE, vpp); 442 (*vpp)->v_flag |= VROOT; 443 (*vpp)->v_type = VDIR; 444 } 445 return error; 446 } 447 448 /* --------------------------------------------------------------------- */ 449 450 static int 451 tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, struct vnode **vpp) 452 { 453 boolean_t found; 454 struct tmpfs_fid *tfhp; 455 struct tmpfs_mount *tmp; 456 struct tmpfs_node *node; 457 458 tmp = VFS_TO_TMPFS(mp); 459 460 tfhp = (struct tmpfs_fid *)fhp; 461 if (tfhp->tf_len != sizeof(struct tmpfs_fid)) 462 return EINVAL; 463 464 if (tfhp->tf_id >= tmp->tm_nodes_max) 465 return EINVAL; 466 467 found = FALSE; 468 469 TMPFS_LOCK(tmp); 470 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 471 if (node->tn_id == tfhp->tf_id && 472 node->tn_gen == tfhp->tf_gen) { 473 found = TRUE; 474 break; 475 } 476 } 477 TMPFS_UNLOCK(tmp); 478 479 if (found) 480 return (tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp)); 481 482 return (EINVAL); 483 } 484 485 /* --------------------------------------------------------------------- */ 486 487 /* ARGSUSED2 */ 488 static int 489 tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 490 { 491 fsfilcnt_t freenodes; 492 struct tmpfs_mount *tmp; 493 494 tmp = VFS_TO_TMPFS(mp); 495 496 sbp->f_iosize = PAGE_SIZE; 497 sbp->f_bsize = PAGE_SIZE; 498 499 sbp->f_blocks = tmp->tm_pages_max; 500 sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used; 501 sbp->f_bfree = sbp->f_bavail; 502 503 freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse; 504 505 sbp->f_files = freenodes + tmp->tm_nodes_inuse; 506 sbp->f_ffree = freenodes; 507 sbp->f_owner = tmp->tm_root->tn_uid; 508 509 return 0; 510 } 511 512 /* --------------------------------------------------------------------- */ 513 514 static int 515 tmpfs_vptofh(struct vnode *vp, struct fid *fhp) 516 { 517 struct tmpfs_node *node; 518 struct tmpfs_fid tfh; 519 node = VP_TO_TMPFS_NODE(vp); 520 memset(&tfh, 0, sizeof(tfh)); 521 tfh.tf_len = sizeof(struct tmpfs_fid); 522 tfh.tf_gen = node->tn_gen; 523 tfh.tf_id = node->tn_id; 524 memcpy(fhp, &tfh, sizeof(tfh)); 525 return (0); 526 } 527 528 /* --------------------------------------------------------------------- */ 529 530 static int 531 tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp, 532 struct ucred **credanonp) 533 { 534 struct tmpfs_mount *tmp; 535 struct netcred *nc; 536 537 tmp = (struct tmpfs_mount *) mp->mnt_data; 538 nc = vfs_export_lookup(mp, &tmp->tm_export, nam); 539 if (nc == NULL) 540 return (EACCES); 541 542 *exflagsp = nc->netc_exflags; 543 *credanonp = &nc->netc_anon; 544 545 return (0); 546 } 547 548 /* --------------------------------------------------------------------- */ 549 550 /* 551 * tmpfs vfs operations. 552 */ 553 554 static struct vfsops tmpfs_vfsops = { 555 .vfs_mount = tmpfs_mount, 556 .vfs_unmount = tmpfs_unmount, 557 .vfs_root = tmpfs_root, 558 .vfs_statfs = tmpfs_statfs, 559 .vfs_fhtovp = tmpfs_fhtovp, 560 .vfs_vptofh = tmpfs_vptofh, 561 .vfs_sync = vfs_stdsync, 562 .vfs_checkexp = tmpfs_checkexp, 563 }; 564 565 VFS_SET(tmpfs_vfsops, tmpfs, 0); 566 MODULE_VERSION(tmpfs, 1); 567