1 /* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system. 35 * 36 * tmpfs is a file system that uses virtual memory to store file data and 37 * metadata efficiently. It does not follow the structure of an on-disk 38 * file system because it simply does not need to. Instead, it uses 39 * memory-specific data structures and algorithms to automatically 40 * allocate and release resources. 41 */ 42 43 #include <sys/conf.h> 44 #include <sys/param.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/mutex.h> 48 #include <sys/kernel.h> 49 #include <sys/stat.h> 50 #include <sys/systm.h> 51 #include <sys/sysctl.h> 52 #include <sys/objcache.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_object.h> 56 #include <vm/vm_param.h> 57 58 #if 0 59 #include <vfs/tmpfs/tmpfs.h> 60 #endif 61 #include "tmpfs.h" 62 #include <vfs/tmpfs/tmpfs_vnops.h> 63 #include <vfs/tmpfs/tmpfs_args.h> 64 65 /* 66 * Default permission for root node 67 */ 68 #define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 69 70 MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures"); 71 72 /* --------------------------------------------------------------------- */ 73 74 static int tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *); 75 static int tmpfs_unmount(struct mount *, int); 76 static int tmpfs_root(struct mount *, struct vnode **); 77 static int tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **); 78 static int tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred); 79 80 /* --------------------------------------------------------------------- */ 81 int 82 tmpfs_node_ctor(void *obj, void *privdata, int flags) 83 { 84 struct tmpfs_node *node = obj; 85 86 node->tn_gen++; 87 node->tn_size = 0; 88 node->tn_status = 0; 89 node->tn_flags = 0; 90 node->tn_links = 0; 91 node->tn_vnode = NULL; 92 node->tn_vpstate = TMPFS_VNODE_WANT; 93 bzero(&node->tn_spec, sizeof(node->tn_spec)); 94 95 return (1); 96 } 97 98 static void 99 tmpfs_node_dtor(void *obj, void *privdata) 100 { 101 struct tmpfs_node *node = (struct tmpfs_node *)obj; 102 node->tn_type = VNON; 103 node->tn_vpstate = TMPFS_VNODE_DOOMED; 104 } 105 106 static void * 107 tmpfs_node_init(void *args, int flags) 108 { 109 struct tmpfs_node *node = objcache_malloc_alloc(args, flags); 110 if (node == NULL) 111 return (NULL); 112 node->tn_id = 0; 113 114 lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE); 115 node->tn_gen = karc4random(); 116 117 return node; 118 } 119 120 static void 121 tmpfs_node_fini(void *obj, void *args) 122 { 123 struct tmpfs_node *node = (struct tmpfs_node *)obj; 124 lockuninit(&node->tn_interlock); 125 objcache_malloc_free(obj, args); 126 } 127 128 static int 129 tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 130 { 131 struct tmpfs_mount *tmp; 132 struct tmpfs_node *root; 133 struct tmpfs_args args; 134 vm_pindex_t pages; 135 vm_pindex_t pages_limit; 136 ino_t nodes; 137 u_int64_t maxfsize; 138 int error; 139 /* Size counters. */ 140 ino_t nodes_max; 141 off_t size_max; 142 size_t maxfsize_max; 143 size_t size; 144 145 /* Root node attributes. */ 146 uid_t root_uid = cred->cr_uid; 147 gid_t root_gid = cred->cr_gid; 148 mode_t root_mode = (VREAD | VWRITE); 149 150 if (mp->mnt_flag & MNT_UPDATE) { 151 /* XXX: There is no support yet to update file system 152 * settings. Should be added. */ 153 154 return EOPNOTSUPP; 155 } 156 157 /* 158 * mount info 159 */ 160 bzero(&args, sizeof(args)); 161 size_max = 0; 162 nodes_max = 0; 163 maxfsize_max = 0; 164 165 if (path) { 166 if (data) { 167 error = copyin(data, &args, sizeof(args)); 168 if (error) 169 return (error); 170 } 171 size_max = args.ta_size_max; 172 nodes_max = args.ta_nodes_max; 173 maxfsize_max = args.ta_maxfsize_max; 174 root_uid = args.ta_root_uid; 175 root_gid = args.ta_root_gid; 176 root_mode = args.ta_root_mode; 177 } 178 179 /* 180 * If mount by non-root, then verify that user has necessary 181 * permissions on the device. 182 */ 183 if (cred->cr_uid != 0) { 184 root_mode = VREAD; 185 if ((mp->mnt_flag & MNT_RDONLY) == 0) 186 root_mode |= VWRITE; 187 } 188 189 pages_limit = vm_swap_max + vmstats.v_page_count / 2; 190 191 if (size_max == 0) { 192 pages = pages_limit / 2; 193 } else if (size_max < PAGE_SIZE) { 194 pages = 1; 195 } else if (OFF_TO_IDX(size_max) > pages_limit) { 196 /* 197 * do not force pages = pages_limit for this case, otherwise 198 * we might not honor tmpfs size requests from /etc/fstab 199 * during boot because they are mounted prior to swap being 200 * turned on. 201 */ 202 pages = OFF_TO_IDX(size_max); 203 } else { 204 pages = OFF_TO_IDX(size_max); 205 } 206 207 if (nodes_max == 0) 208 nodes = 3 + pages * PAGE_SIZE / 1024; 209 else if (nodes_max < 3) 210 nodes = 3; 211 else if (nodes_max > pages) 212 nodes = pages; 213 else 214 nodes = nodes_max; 215 216 maxfsize = IDX_TO_OFF(pages_limit); 217 if (maxfsize_max != 0 && maxfsize > maxfsize_max) 218 maxfsize = maxfsize_max; 219 220 /* Allocate the tmpfs mount structure and fill it. */ 221 tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO); 222 223 tmp->tm_mount = mp; 224 tmp->tm_nodes_max = nodes; 225 tmp->tm_nodes_inuse = 0; 226 tmp->tm_maxfilesize = maxfsize; 227 LIST_INIT(&tmp->tm_nodes_used); 228 229 tmp->tm_pages_max = pages; 230 tmp->tm_pages_used = 0; 231 232 kmalloc_create(&tmp->tm_node_zone, "tmpfs node"); 233 kmalloc_create(&tmp->tm_dirent_zone, "tmpfs dirent"); 234 kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone"); 235 236 kmalloc_raise_limit(tmp->tm_node_zone, sizeof(struct tmpfs_node) * 237 tmp->tm_nodes_max); 238 239 tmp->tm_node_zone_malloc_args.objsize = sizeof(struct tmpfs_node); 240 tmp->tm_node_zone_malloc_args.mtype = tmp->tm_node_zone; 241 242 tmp->tm_dirent_zone_malloc_args.objsize = sizeof(struct tmpfs_dirent); 243 tmp->tm_dirent_zone_malloc_args.mtype = tmp->tm_dirent_zone; 244 245 tmp->tm_dirent_pool = objcache_create( "tmpfs dirent cache", 246 0, 0, 247 NULL, NULL, NULL, 248 objcache_malloc_alloc, objcache_malloc_free, 249 &tmp->tm_dirent_zone_malloc_args); 250 tmp->tm_node_pool = objcache_create( "tmpfs node cache", 251 0, 0, 252 tmpfs_node_ctor, tmpfs_node_dtor, NULL, 253 tmpfs_node_init, tmpfs_node_fini, 254 &tmp->tm_node_zone_malloc_args); 255 256 tmp->tm_ino = 2; 257 258 /* Allocate the root node. */ 259 error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid, 260 root_mode & ALLPERMS, NULL, 261 VNOVAL, VNOVAL, &root); 262 263 /* 264 * We are backed by swap, set snocache chflags flag so we 265 * don't trip over swapcache. 266 */ 267 root->tn_flags = SF_NOCACHE; 268 269 if (error != 0 || root == NULL) { 270 objcache_destroy(tmp->tm_node_pool); 271 objcache_destroy(tmp->tm_dirent_pool); 272 kfree(tmp, M_TMPFSMNT); 273 return error; 274 } 275 KASSERT(root->tn_id >= 0, 276 ("tmpfs root with invalid ino: %d", (int)root->tn_id)); 277 278 ++root->tn_links; /* prevent destruction */ 279 tmp->tm_root = root; 280 281 mp->mnt_flag |= MNT_LOCAL; 282 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 283 mp->mnt_kern_flag |= MNTK_NOMSYNC; 284 mp->mnt_data = (qaddr_t)tmp; 285 vfs_getnewfsid(mp); 286 287 vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops); 288 vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops); 289 290 copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 291 bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size); 292 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 293 copyinstr(path, mp->mnt_stat.f_mntonname, 294 sizeof(mp->mnt_stat.f_mntonname) -1, 295 &size); 296 297 tmpfs_statfs(mp, &mp->mnt_stat, cred); 298 299 return 0; 300 } 301 302 /* --------------------------------------------------------------------- */ 303 304 /* ARGSUSED2 */ 305 static int 306 tmpfs_unmount(struct mount *mp, int mntflags) 307 { 308 int error; 309 int flags = 0; 310 struct tmpfs_mount *tmp; 311 struct tmpfs_node *node; 312 struct vnode *vp; 313 int isok; 314 315 tmp = VFS_TO_TMPFS(mp); 316 TMPFS_LOCK(tmp); 317 318 /* Handle forced unmounts. */ 319 if (mntflags & MNT_FORCE) 320 flags |= FORCECLOSE; 321 322 /* 323 * Finalize all pending I/O. In the case of tmpfs we want 324 * to throw all the data away so clean out the buffer cache 325 * and vm objects before calling vflush(). 326 */ 327 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 328 /* 329 * tn_links is mnt_token protected 330 */ 331 ++node->tn_links; 332 while (node->tn_type == VREG && node->tn_vnode) { 333 vp = node->tn_vnode; 334 vhold_interlocked(vp); 335 lwkt_yield(); 336 337 /* 338 * vx_get/vx_put and tmpfs_truncate may block, 339 * releasing the tmpfs mountpoint token. 340 * 341 * Make sure the lock order is correct. 342 */ 343 vx_get(vp); /* held vnode */ 344 TMPFS_NODE_LOCK(node); 345 if (node->tn_vnode == vp) { 346 tmpfs_truncate(vp, 0); 347 isok = 1; 348 } else { 349 isok = 0; 350 } 351 TMPFS_NODE_UNLOCK(node); 352 vx_put(vp); 353 vdrop(vp); 354 if (isok) 355 break; 356 /* retry */ 357 } 358 --node->tn_links; 359 } 360 361 /* 362 * Flush all vnodes on the mount. 363 * 364 * If we fail to flush, we cannot unmount, but all the nodes have 365 * already been truncated. Erroring out is the best we can do. 366 */ 367 error = vflush(mp, 0, flags); 368 if (error != 0) { 369 TMPFS_UNLOCK(tmp); 370 return (error); 371 } 372 373 /* 374 * First pass get rid of all the directory entries and 375 * vnode associations. This will also destroy the 376 * directory topology and should drop all link counts 377 * to 0 except for the root. 378 * 379 * No vnodes should remain after the vflush above. 380 */ 381 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 382 ++node->tn_links; /* mnt_token protected */ 383 lwkt_yield(); 384 TMPFS_NODE_LOCK(node); 385 386 if (node->tn_type == VDIR) { 387 struct tmpfs_dirent *de; 388 389 while ((de = RB_ROOT(&node->tn_dir.tn_dirtree)) != NULL) { 390 tmpfs_dir_detach(node, de); 391 tmpfs_free_dirent(tmp, de); 392 } 393 } 394 KKASSERT(node->tn_vnode == NULL); 395 396 TMPFS_NODE_UNLOCK(node); 397 --node->tn_links; /* mnt_token protected */ 398 } 399 400 /* 401 * Allow the root node to be destroyed by dropping the link count 402 * we bumped in the mount code. 403 */ 404 KKASSERT(tmp->tm_root); 405 --tmp->tm_root->tn_links; /* mnt_token protected */ 406 407 /* 408 * At this point all nodes, including the root node, should have a 409 * link count of 0. The root is not necessarily going to be last. 410 */ 411 while ((node = LIST_FIRST(&tmp->tm_nodes_used)) != NULL) { 412 if (node->tn_links) 413 panic("tmpfs: Dangling nodes during umount (%p)!\n", 414 node); 415 416 TMPFS_NODE_LOCK(node); 417 tmpfs_free_node(tmp, node); 418 /* eats lock */ 419 lwkt_yield(); 420 } 421 KKASSERT(tmp->tm_root == NULL); 422 423 objcache_destroy(tmp->tm_dirent_pool); 424 objcache_destroy(tmp->tm_node_pool); 425 426 kmalloc_destroy(&tmp->tm_name_zone); 427 kmalloc_destroy(&tmp->tm_dirent_zone); 428 kmalloc_destroy(&tmp->tm_node_zone); 429 430 tmp->tm_node_zone = tmp->tm_dirent_zone = NULL; 431 432 KKASSERT(tmp->tm_pages_used == 0); 433 KKASSERT(tmp->tm_nodes_inuse == 0); 434 435 TMPFS_UNLOCK(tmp); 436 437 /* Throw away the tmpfs_mount structure. */ 438 kfree(tmp, M_TMPFSMNT); 439 mp->mnt_data = NULL; 440 441 mp->mnt_flag &= ~MNT_LOCAL; 442 return 0; 443 } 444 445 /* --------------------------------------------------------------------- */ 446 447 static int 448 tmpfs_root(struct mount *mp, struct vnode **vpp) 449 { 450 struct tmpfs_mount *tmp; 451 int error; 452 453 tmp = VFS_TO_TMPFS(mp); 454 if (tmp->tm_root == NULL) { 455 kprintf("tmpfs_root: called without root node %p\n", mp); 456 print_backtrace(-1); 457 *vpp = NULL; 458 error = EINVAL; 459 } else { 460 error = tmpfs_alloc_vp(mp, tmp->tm_root, LK_EXCLUSIVE, vpp); 461 (*vpp)->v_flag |= VROOT; 462 (*vpp)->v_type = VDIR; 463 } 464 return error; 465 } 466 467 /* --------------------------------------------------------------------- */ 468 469 static int 470 tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, 471 struct vnode **vpp) 472 { 473 boolean_t found; 474 struct tmpfs_fid *tfhp; 475 struct tmpfs_mount *tmp; 476 struct tmpfs_node *node; 477 int rc; 478 479 tmp = VFS_TO_TMPFS(mp); 480 481 tfhp = (struct tmpfs_fid *) fhp; 482 if (tfhp->tf_len != sizeof(struct tmpfs_fid)) 483 return EINVAL; 484 485 if (tfhp->tf_id >= tmp->tm_nodes_max) 486 return EINVAL; 487 488 rc = EINVAL; 489 found = FALSE; 490 491 TMPFS_LOCK(tmp); 492 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 493 if (node->tn_id == tfhp->tf_id && 494 node->tn_gen == tfhp->tf_gen) { 495 found = TRUE; 496 break; 497 } 498 } 499 500 if (found) 501 rc = tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp); 502 503 TMPFS_UNLOCK(tmp); 504 505 return (rc); 506 } 507 508 /* --------------------------------------------------------------------- */ 509 510 /* ARGSUSED2 */ 511 static int 512 tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 513 { 514 fsfilcnt_t freenodes; 515 struct tmpfs_mount *tmp; 516 517 tmp = VFS_TO_TMPFS(mp); 518 519 TMPFS_LOCK(tmp); 520 sbp->f_iosize = PAGE_SIZE; 521 sbp->f_bsize = PAGE_SIZE; 522 523 sbp->f_blocks = tmp->tm_pages_max; 524 sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used; 525 sbp->f_bfree = sbp->f_bavail; 526 527 freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse; 528 529 sbp->f_files = freenodes + tmp->tm_nodes_inuse; 530 sbp->f_ffree = freenodes; 531 sbp->f_owner = tmp->tm_root->tn_uid; 532 533 TMPFS_UNLOCK(tmp); 534 535 return 0; 536 } 537 538 /* --------------------------------------------------------------------- */ 539 540 static int 541 tmpfs_vptofh(struct vnode *vp, struct fid *fhp) 542 { 543 struct tmpfs_node *node; 544 struct tmpfs_fid tfh; 545 node = VP_TO_TMPFS_NODE(vp); 546 memset(&tfh, 0, sizeof(tfh)); 547 tfh.tf_len = sizeof(struct tmpfs_fid); 548 tfh.tf_gen = node->tn_gen; 549 tfh.tf_id = node->tn_id; 550 memcpy(fhp, &tfh, sizeof(tfh)); 551 return (0); 552 } 553 554 /* --------------------------------------------------------------------- */ 555 556 static int 557 tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp, 558 struct ucred **credanonp) 559 { 560 struct tmpfs_mount *tmp; 561 struct netcred *nc; 562 563 tmp = (struct tmpfs_mount *) mp->mnt_data; 564 nc = vfs_export_lookup(mp, &tmp->tm_export, nam); 565 if (nc == NULL) 566 return (EACCES); 567 568 *exflagsp = nc->netc_exflags; 569 *credanonp = &nc->netc_anon; 570 571 return (0); 572 } 573 574 /* --------------------------------------------------------------------- */ 575 576 /* 577 * tmpfs vfs operations. 578 */ 579 580 static struct vfsops tmpfs_vfsops = { 581 .vfs_mount = tmpfs_mount, 582 .vfs_unmount = tmpfs_unmount, 583 .vfs_root = tmpfs_root, 584 .vfs_statfs = tmpfs_statfs, 585 .vfs_fhtovp = tmpfs_fhtovp, 586 .vfs_vptofh = tmpfs_vptofh, 587 .vfs_sync = vfs_stdsync, 588 .vfs_checkexp = tmpfs_checkexp, 589 }; 590 591 VFS_SET(tmpfs_vfsops, tmpfs, 0); 592 MODULE_VERSION(tmpfs, 1); 593