1 /* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system. 35 * 36 * tmpfs is a file system that uses virtual memory to store file data and 37 * metadata efficiently. It does not follow the structure of an on-disk 38 * file system because it simply does not need to. Instead, it uses 39 * memory-specific data structures and algorithms to automatically 40 * allocate and release resources. 41 */ 42 43 #include <sys/conf.h> 44 #include <sys/param.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/kernel.h> 48 #include <sys/stat.h> 49 #include <sys/systm.h> 50 #include <sys/sysctl.h> 51 #include <sys/objcache.h> 52 53 #include <vm/vm.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_param.h> 56 57 #if 0 58 #include <vfs/tmpfs/tmpfs.h> 59 #endif 60 #include "tmpfs.h" 61 #include <vfs/tmpfs/tmpfs_vnops.h> 62 #include <vfs/tmpfs/tmpfs_mount.h> 63 64 /* 65 * Default permission for root node 66 */ 67 #define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 68 69 MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures"); 70 71 /* --------------------------------------------------------------------- */ 72 73 static int tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *); 74 static int tmpfs_unmount(struct mount *, int); 75 static int tmpfs_root(struct mount *, struct vnode **); 76 static int tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **); 77 static int tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred); 78 79 /* --------------------------------------------------------------------- */ 80 boolean_t 81 tmpfs_node_ctor(void *obj, void *privdata, int flags) 82 { 83 struct tmpfs_node *node = obj; 84 85 node->tn_gen++; 86 node->tn_size = 0; 87 node->tn_status = 0; 88 node->tn_flags = 0; 89 node->tn_links = 0; 90 node->tn_vnode = NULL; 91 node->tn_vpstate = 0; 92 bzero(&node->tn_spec, sizeof(node->tn_spec)); 93 94 return (TRUE); 95 } 96 97 static void 98 tmpfs_node_dtor(void *obj, void *privdata) 99 { 100 struct tmpfs_node *node = (struct tmpfs_node *)obj; 101 node->tn_type = VNON; 102 node->tn_vpstate = TMPFS_VNODE_DOOMED; 103 } 104 105 static void * 106 tmpfs_node_init(void *args, int flags) 107 { 108 struct tmpfs_node *node; 109 110 node = objcache_malloc_alloc(args, flags); 111 if (node == NULL) 112 return (NULL); 113 node->tn_id = 0; 114 node->tn_blksize = PAGE_SIZE; /* start small */ 115 116 lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE); 117 node->tn_gen = karc4random(); 118 119 return node; 120 } 121 122 static void 123 tmpfs_node_fini(void *obj, void *args) 124 { 125 struct tmpfs_node *node = (struct tmpfs_node *)obj; 126 lockuninit(&node->tn_interlock); 127 objcache_malloc_free(obj, args); 128 } 129 130 static int 131 tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 132 { 133 struct tmpfs_mount *tmp; 134 struct tmpfs_node *root; 135 struct tmpfs_mount_info args; 136 vm_pindex_t pages; 137 vm_pindex_t pages_limit; 138 ino_t nodes; 139 u_int64_t maxfsize; 140 int error; 141 /* Size counters. */ 142 ino_t nodes_max; 143 off_t size_max; 144 size_t maxfsize_max; 145 size_t size; 146 147 /* Root node attributes. */ 148 uid_t root_uid = cred->cr_uid; 149 gid_t root_gid = cred->cr_gid; 150 mode_t root_mode = (VREAD | VWRITE); 151 152 if (mp->mnt_flag & MNT_UPDATE) { 153 /* XXX: There is no support yet to update file system 154 * settings. Should be added. */ 155 156 return EOPNOTSUPP; 157 } 158 159 /* 160 * mount info 161 */ 162 bzero(&args, sizeof(args)); 163 size_max = 0; 164 nodes_max = 0; 165 maxfsize_max = 0; 166 167 if (path) { 168 if (data) { 169 error = copyin(data, &args, sizeof(args)); 170 if (error) 171 return (error); 172 } 173 size_max = args.ta_size_max; 174 nodes_max = args.ta_nodes_max; 175 maxfsize_max = args.ta_maxfsize_max; 176 root_uid = args.ta_root_uid; 177 root_gid = args.ta_root_gid; 178 root_mode = args.ta_root_mode; 179 } 180 181 /* 182 * If mount by non-root, then verify that user has necessary 183 * permissions on the device. 184 */ 185 if (cred->cr_uid != 0) { 186 root_mode = VREAD; 187 if ((mp->mnt_flag & MNT_RDONLY) == 0) 188 root_mode |= VWRITE; 189 } 190 191 pages_limit = vm_swap_max + vmstats.v_page_count / 2; 192 193 if (size_max == 0) { 194 pages = pages_limit / 2; 195 } else if (size_max < PAGE_SIZE) { 196 pages = 1; 197 } else if (OFF_TO_IDX(size_max) > pages_limit) { 198 /* 199 * do not force pages = pages_limit for this case, otherwise 200 * we might not honor tmpfs size requests from /etc/fstab 201 * during boot because they are mounted prior to swap being 202 * turned on. 203 */ 204 pages = OFF_TO_IDX(size_max); 205 } else { 206 pages = OFF_TO_IDX(size_max); 207 } 208 209 if (nodes_max == 0) 210 nodes = 3 + pages * PAGE_SIZE / 1024; 211 else if (nodes_max < 3) 212 nodes = 3; 213 else if (nodes_max > pages) 214 nodes = pages; 215 else 216 nodes = nodes_max; 217 218 maxfsize = 0x7FFFFFFFFFFFFFFFLLU - TMPFS_BLKSIZE; 219 if (maxfsize_max != 0 && maxfsize > maxfsize_max) 220 maxfsize = maxfsize_max; 221 222 /* Allocate the tmpfs mount structure and fill it. */ 223 tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO); 224 225 tmp->tm_mount = mp; 226 tmp->tm_nodes_max = nodes; 227 tmp->tm_nodes_inuse = 0; 228 tmp->tm_maxfilesize = maxfsize; 229 LIST_INIT(&tmp->tm_nodes_used); 230 231 tmp->tm_pages_max = pages; 232 tmp->tm_pages_used = 0; 233 234 kmalloc_create(&tmp->tm_node_zone, "tmpfs node"); 235 kmalloc_create(&tmp->tm_dirent_zone, "tmpfs dirent"); 236 kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone"); 237 238 kmalloc_raise_limit(tmp->tm_node_zone, sizeof(struct tmpfs_node) * 239 tmp->tm_nodes_max); 240 241 tmp->tm_node_zone_malloc_args.objsize = sizeof(struct tmpfs_node); 242 tmp->tm_node_zone_malloc_args.mtype = tmp->tm_node_zone; 243 244 tmp->tm_dirent_zone_malloc_args.objsize = sizeof(struct tmpfs_dirent); 245 tmp->tm_dirent_zone_malloc_args.mtype = tmp->tm_dirent_zone; 246 247 tmp->tm_dirent_pool = objcache_create( "tmpfs dirent cache", 248 0, 0, 249 NULL, NULL, NULL, 250 objcache_malloc_alloc, objcache_malloc_free, 251 &tmp->tm_dirent_zone_malloc_args); 252 tmp->tm_node_pool = objcache_create( "tmpfs node cache", 253 0, 0, 254 tmpfs_node_ctor, tmpfs_node_dtor, NULL, 255 tmpfs_node_init, tmpfs_node_fini, 256 &tmp->tm_node_zone_malloc_args); 257 258 tmp->tm_ino = TMPFS_ROOTINO; 259 260 /* Allocate the root node. */ 261 error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid, 262 root_mode & ALLPERMS, NULL, 263 VNOVAL, VNOVAL, &root); 264 265 /* 266 * We are backed by swap, set snocache chflags flag so we 267 * don't trip over swapcache. 268 */ 269 root->tn_flags = SF_NOCACHE; 270 271 if (error != 0 || root == NULL) { 272 objcache_destroy(tmp->tm_node_pool); 273 objcache_destroy(tmp->tm_dirent_pool); 274 kfree(tmp, M_TMPFSMNT); 275 return error; 276 } 277 KASSERT(root->tn_id == TMPFS_ROOTINO, 278 ("tmpfs root with invalid ino: %ju", (uintmax_t)root->tn_id)); 279 280 atomic_add_int(&root->tn_links, 1); /* keep around */ 281 tmp->tm_root = root; 282 283 mp->mnt_flag |= MNT_LOCAL; 284 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 285 mp->mnt_kern_flag |= MNTK_NOMSYNC; 286 mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */ 287 mp->mnt_kern_flag |= MNTK_QUICKHALT; /* no teardown needed on halt */ 288 mp->mnt_data = (qaddr_t)tmp; 289 mp->mnt_iosize_max = MAXBSIZE; 290 vfs_getnewfsid(mp); 291 292 vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops); 293 vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops); 294 295 copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 296 bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size); 297 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 298 copyinstr(path, mp->mnt_stat.f_mntonname, 299 sizeof(mp->mnt_stat.f_mntonname) -1, 300 &size); 301 302 tmpfs_statfs(mp, &mp->mnt_stat, cred); 303 304 return 0; 305 } 306 307 /* --------------------------------------------------------------------- */ 308 309 /* ARGSUSED2 */ 310 static int 311 tmpfs_unmount(struct mount *mp, int mntflags) 312 { 313 int error; 314 int flags = 0; 315 struct tmpfs_mount *tmp; 316 struct tmpfs_node *node; 317 struct vnode *vp; 318 int isok; 319 320 tmp = VFS_TO_TMPFS(mp); 321 TMPFS_LOCK(tmp); 322 323 /* Handle forced unmounts. */ 324 if (mntflags & MNT_FORCE) 325 flags |= FORCECLOSE; 326 327 /* 328 * Finalize all pending I/O. In the case of tmpfs we want 329 * to throw all the data away so clean out the buffer cache 330 * and vm objects before calling vflush(). 331 */ 332 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 333 /* 334 * tn_links is mnt_token protected 335 */ 336 atomic_add_int(&node->tn_links, 1); 337 TMPFS_NODE_LOCK(node); 338 339 while (node->tn_type == VREG && node->tn_vnode) { 340 vp = node->tn_vnode; 341 vhold(vp); 342 TMPFS_NODE_UNLOCK(node); 343 lwkt_yield(); 344 345 /* 346 * vx_get/vx_put and tmpfs_truncate may block, 347 * releasing the tmpfs mountpoint token. 348 * 349 * Make sure the lock order is correct. 350 */ 351 vx_get(vp); /* held vnode */ 352 TMPFS_NODE_LOCK(node); 353 if (node->tn_vnode == vp) { 354 tmpfs_truncate(vp, 0); 355 isok = 1; 356 } else { 357 isok = 0; 358 } 359 TMPFS_NODE_UNLOCK(node); 360 vx_put(vp); 361 vdrop(vp); 362 TMPFS_NODE_LOCK(node); 363 if (isok) 364 break; 365 /* retry */ 366 } 367 368 TMPFS_NODE_UNLOCK(node); 369 atomic_add_int(&node->tn_links, -1); 370 } 371 372 /* 373 * Flush all vnodes on the unmount. 374 * 375 * If we fail to flush, we cannot unmount, but all the nodes have 376 * already been truncated. Erroring out is the best we can do. 377 */ 378 error = vflush(mp, 0, flags); 379 if (error != 0) { 380 TMPFS_UNLOCK(tmp); 381 return (error); 382 } 383 384 /* 385 * First pass get rid of all the directory entries and 386 * vnode associations. This will also destroy the 387 * directory topology and should drop all link counts 388 * to 0 except for the root. 389 * 390 * No vnodes should remain after the vflush above. 391 */ 392 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 393 lwkt_yield(); 394 395 atomic_add_int(&node->tn_links, 1); 396 TMPFS_NODE_LOCK(node); 397 if (node->tn_type == VDIR) { 398 struct tmpfs_dirent *de; 399 400 while ((de = RB_ROOT(&node->tn_dir.tn_dirtree)) != NULL) { 401 tmpfs_dir_detach_locked(node, de); 402 tmpfs_free_dirent(tmp, de); 403 } 404 } 405 KKASSERT(node->tn_vnode == NULL); 406 407 TMPFS_NODE_UNLOCK(node); 408 atomic_add_int(&node->tn_links, -1); 409 } 410 411 /* 412 * Allow the root node to be destroyed by dropping the link count 413 * we bumped in the mount code. 414 */ 415 KKASSERT(tmp->tm_root); 416 TMPFS_NODE_LOCK(tmp->tm_root); 417 atomic_add_int(&tmp->tm_root->tn_links, -1); 418 TMPFS_NODE_UNLOCK(tmp->tm_root); 419 420 /* 421 * At this point all nodes, including the root node, should have a 422 * link count of 0. The root is not necessarily going to be last. 423 */ 424 while ((node = LIST_FIRST(&tmp->tm_nodes_used)) != NULL) { 425 if (node->tn_links) 426 panic("tmpfs: Dangling nodes during umount (%p)!\n", 427 node); 428 429 TMPFS_NODE_LOCK(node); 430 tmpfs_free_node(tmp, node); 431 /* eats lock */ 432 lwkt_yield(); 433 } 434 KKASSERT(tmp->tm_root == NULL); 435 436 objcache_destroy(tmp->tm_dirent_pool); 437 objcache_destroy(tmp->tm_node_pool); 438 439 kmalloc_destroy(&tmp->tm_name_zone); 440 kmalloc_destroy(&tmp->tm_dirent_zone); 441 kmalloc_destroy(&tmp->tm_node_zone); 442 443 tmp->tm_node_zone = tmp->tm_dirent_zone = NULL; 444 445 KKASSERT(tmp->tm_pages_used == 0); 446 KKASSERT(tmp->tm_nodes_inuse == 0); 447 448 TMPFS_UNLOCK(tmp); 449 450 /* Throw away the tmpfs_mount structure. */ 451 kfree(tmp, M_TMPFSMNT); 452 mp->mnt_data = NULL; 453 454 mp->mnt_flag &= ~MNT_LOCAL; 455 return 0; 456 } 457 458 /* --------------------------------------------------------------------- */ 459 460 static int 461 tmpfs_root(struct mount *mp, struct vnode **vpp) 462 { 463 struct tmpfs_mount *tmp; 464 int error; 465 466 tmp = VFS_TO_TMPFS(mp); 467 if (tmp->tm_root == NULL) { 468 kprintf("tmpfs_root: called without root node %p\n", mp); 469 print_backtrace(-1); 470 *vpp = NULL; 471 error = EINVAL; 472 } else { 473 error = tmpfs_alloc_vp(mp, NULL, tmp->tm_root, 474 LK_EXCLUSIVE, vpp); 475 (*vpp)->v_flag |= VROOT; 476 (*vpp)->v_type = VDIR; 477 } 478 return error; 479 } 480 481 /* --------------------------------------------------------------------- */ 482 483 static int 484 tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, 485 struct vnode **vpp) 486 { 487 boolean_t found; 488 struct tmpfs_fid *tfhp; 489 struct tmpfs_mount *tmp; 490 struct tmpfs_node *node; 491 int rc; 492 493 tmp = VFS_TO_TMPFS(mp); 494 495 tfhp = (struct tmpfs_fid *) fhp; 496 if (tfhp->tf_len != sizeof(struct tmpfs_fid)) 497 return EINVAL; 498 499 rc = EINVAL; 500 found = FALSE; 501 502 TMPFS_LOCK(tmp); 503 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 504 if (node->tn_id == tfhp->tf_id && 505 node->tn_gen == tfhp->tf_gen) { 506 found = TRUE; 507 break; 508 } 509 } 510 511 if (found) 512 rc = tmpfs_alloc_vp(mp, NULL, node, LK_EXCLUSIVE, vpp); 513 514 TMPFS_UNLOCK(tmp); 515 516 return (rc); 517 } 518 519 /* --------------------------------------------------------------------- */ 520 521 /* ARGSUSED2 */ 522 static int 523 tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 524 { 525 fsfilcnt_t freenodes; 526 struct tmpfs_mount *tmp; 527 528 tmp = VFS_TO_TMPFS(mp); 529 530 /* TMPFS_LOCK(tmp); not really needed */ 531 532 sbp->f_iosize = PAGE_SIZE; 533 sbp->f_bsize = PAGE_SIZE; 534 535 sbp->f_blocks = tmp->tm_pages_max; 536 sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used; 537 sbp->f_bfree = sbp->f_bavail; 538 539 freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse; 540 541 sbp->f_files = freenodes + tmp->tm_nodes_inuse; 542 sbp->f_ffree = freenodes; 543 sbp->f_owner = tmp->tm_root->tn_uid; 544 545 /* TMPFS_UNLOCK(tmp); */ 546 547 return 0; 548 } 549 550 /* --------------------------------------------------------------------- */ 551 552 static int 553 tmpfs_vptofh(struct vnode *vp, struct fid *fhp) 554 { 555 struct tmpfs_node *node; 556 struct tmpfs_fid tfh; 557 node = VP_TO_TMPFS_NODE(vp); 558 memset(&tfh, 0, sizeof(tfh)); 559 tfh.tf_len = sizeof(struct tmpfs_fid); 560 tfh.tf_gen = node->tn_gen; 561 tfh.tf_id = node->tn_id; 562 memcpy(fhp, &tfh, sizeof(tfh)); 563 return (0); 564 } 565 566 /* --------------------------------------------------------------------- */ 567 568 static int 569 tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp, 570 struct ucred **credanonp) 571 { 572 struct tmpfs_mount *tmp; 573 struct netcred *nc; 574 575 tmp = (struct tmpfs_mount *) mp->mnt_data; 576 nc = vfs_export_lookup(mp, &tmp->tm_export, nam); 577 if (nc == NULL) 578 return (EACCES); 579 580 *exflagsp = nc->netc_exflags; 581 *credanonp = &nc->netc_anon; 582 583 return (0); 584 } 585 586 /* --------------------------------------------------------------------- */ 587 588 /* 589 * tmpfs vfs operations. 590 */ 591 592 static struct vfsops tmpfs_vfsops = { 593 .vfs_flags = 0, 594 .vfs_mount = tmpfs_mount, 595 .vfs_unmount = tmpfs_unmount, 596 .vfs_root = tmpfs_root, 597 .vfs_statfs = tmpfs_statfs, 598 .vfs_fhtovp = tmpfs_fhtovp, 599 .vfs_vptofh = tmpfs_vptofh, 600 .vfs_checkexp = tmpfs_checkexp, 601 }; 602 603 VFS_SET(tmpfs_vfsops, tmpfs, VFCF_MPSAFE); 604 MODULE_VERSION(tmpfs, 1); 605