1 /* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system. 35 * 36 * tmpfs is a file system that uses virtual memory to store file data and 37 * metadata efficiently. It does not follow the structure of an on-disk 38 * file system because it simply does not need to. Instead, it uses 39 * memory-specific data structures and algorithms to automatically 40 * allocate and release resources. 41 */ 42 43 #include <sys/conf.h> 44 #include <sys/param.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/mutex.h> 48 #include <sys/kernel.h> 49 #include <sys/stat.h> 50 #include <sys/systm.h> 51 #include <sys/sysctl.h> 52 #include <sys/objcache.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_object.h> 56 #include <vm/vm_param.h> 57 58 #if 0 59 #include <vfs/tmpfs/tmpfs.h> 60 #endif 61 #include "tmpfs.h" 62 #include <vfs/tmpfs/tmpfs_vnops.h> 63 #include <vfs/tmpfs/tmpfs_mount.h> 64 65 /* 66 * Default permission for root node 67 */ 68 #define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 69 70 MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures"); 71 72 /* --------------------------------------------------------------------- */ 73 74 static int tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *); 75 static int tmpfs_unmount(struct mount *, int); 76 static int tmpfs_root(struct mount *, struct vnode **); 77 static int tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **); 78 static int tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred); 79 80 /* --------------------------------------------------------------------- */ 81 boolean_t 82 tmpfs_node_ctor(void *obj, void *privdata, int flags) 83 { 84 struct tmpfs_node *node = obj; 85 86 node->tn_gen++; 87 node->tn_size = 0; 88 node->tn_status = 0; 89 node->tn_flags = 0; 90 node->tn_links = 0; 91 node->tn_vnode = NULL; 92 node->tn_vpstate = TMPFS_VNODE_WANT; 93 bzero(&node->tn_spec, sizeof(node->tn_spec)); 94 95 return (TRUE); 96 } 97 98 static void 99 tmpfs_node_dtor(void *obj, void *privdata) 100 { 101 struct tmpfs_node *node = (struct tmpfs_node *)obj; 102 node->tn_type = VNON; 103 node->tn_vpstate = TMPFS_VNODE_DOOMED; 104 } 105 106 static void * 107 tmpfs_node_init(void *args, int flags) 108 { 109 struct tmpfs_node *node = objcache_malloc_alloc(args, flags); 110 if (node == NULL) 111 return (NULL); 112 node->tn_id = 0; 113 114 lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE); 115 node->tn_gen = karc4random(); 116 117 return node; 118 } 119 120 static void 121 tmpfs_node_fini(void *obj, void *args) 122 { 123 struct tmpfs_node *node = (struct tmpfs_node *)obj; 124 lockuninit(&node->tn_interlock); 125 objcache_malloc_free(obj, args); 126 } 127 128 static int 129 tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 130 { 131 struct tmpfs_mount *tmp; 132 struct tmpfs_node *root; 133 struct tmpfs_mount_info args; 134 vm_pindex_t pages; 135 vm_pindex_t pages_limit; 136 ino_t nodes; 137 u_int64_t maxfsize; 138 int error; 139 /* Size counters. */ 140 ino_t nodes_max; 141 off_t size_max; 142 size_t maxfsize_max; 143 size_t size; 144 145 /* Root node attributes. */ 146 uid_t root_uid = cred->cr_uid; 147 gid_t root_gid = cred->cr_gid; 148 mode_t root_mode = (VREAD | VWRITE); 149 150 if (mp->mnt_flag & MNT_UPDATE) { 151 /* XXX: There is no support yet to update file system 152 * settings. Should be added. */ 153 154 return EOPNOTSUPP; 155 } 156 157 /* 158 * mount info 159 */ 160 bzero(&args, sizeof(args)); 161 size_max = 0; 162 nodes_max = 0; 163 maxfsize_max = 0; 164 165 if (path) { 166 if (data) { 167 error = copyin(data, &args, sizeof(args)); 168 if (error) 169 return (error); 170 } 171 size_max = args.ta_size_max; 172 nodes_max = args.ta_nodes_max; 173 maxfsize_max = args.ta_maxfsize_max; 174 root_uid = args.ta_root_uid; 175 root_gid = args.ta_root_gid; 176 root_mode = args.ta_root_mode; 177 } 178 179 /* 180 * If mount by non-root, then verify that user has necessary 181 * permissions on the device. 182 */ 183 if (cred->cr_uid != 0) { 184 root_mode = VREAD; 185 if ((mp->mnt_flag & MNT_RDONLY) == 0) 186 root_mode |= VWRITE; 187 } 188 189 pages_limit = vm_swap_max + vmstats.v_page_count / 2; 190 191 if (size_max == 0) { 192 pages = pages_limit / 2; 193 } else if (size_max < PAGE_SIZE) { 194 pages = 1; 195 } else if (OFF_TO_IDX(size_max) > pages_limit) { 196 /* 197 * do not force pages = pages_limit for this case, otherwise 198 * we might not honor tmpfs size requests from /etc/fstab 199 * during boot because they are mounted prior to swap being 200 * turned on. 201 */ 202 pages = OFF_TO_IDX(size_max); 203 } else { 204 pages = OFF_TO_IDX(size_max); 205 } 206 207 if (nodes_max == 0) 208 nodes = 3 + pages * PAGE_SIZE / 1024; 209 else if (nodes_max < 3) 210 nodes = 3; 211 else if (nodes_max > pages) 212 nodes = pages; 213 else 214 nodes = nodes_max; 215 216 maxfsize = 0x7FFFFFFFFFFFFFFFLLU - TMPFS_BLKSIZE; 217 if (maxfsize_max != 0 && maxfsize > maxfsize_max) 218 maxfsize = maxfsize_max; 219 220 /* Allocate the tmpfs mount structure and fill it. */ 221 tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO); 222 223 tmp->tm_mount = mp; 224 tmp->tm_nodes_max = nodes; 225 tmp->tm_nodes_inuse = 0; 226 tmp->tm_maxfilesize = maxfsize; 227 LIST_INIT(&tmp->tm_nodes_used); 228 229 tmp->tm_pages_max = pages; 230 tmp->tm_pages_used = 0; 231 232 kmalloc_create(&tmp->tm_node_zone, "tmpfs node"); 233 kmalloc_create(&tmp->tm_dirent_zone, "tmpfs dirent"); 234 kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone"); 235 236 kmalloc_raise_limit(tmp->tm_node_zone, sizeof(struct tmpfs_node) * 237 tmp->tm_nodes_max); 238 239 tmp->tm_node_zone_malloc_args.objsize = sizeof(struct tmpfs_node); 240 tmp->tm_node_zone_malloc_args.mtype = tmp->tm_node_zone; 241 242 tmp->tm_dirent_zone_malloc_args.objsize = sizeof(struct tmpfs_dirent); 243 tmp->tm_dirent_zone_malloc_args.mtype = tmp->tm_dirent_zone; 244 245 tmp->tm_dirent_pool = objcache_create( "tmpfs dirent cache", 246 0, 0, 247 NULL, NULL, NULL, 248 objcache_malloc_alloc, objcache_malloc_free, 249 &tmp->tm_dirent_zone_malloc_args); 250 tmp->tm_node_pool = objcache_create( "tmpfs node cache", 251 0, 0, 252 tmpfs_node_ctor, tmpfs_node_dtor, NULL, 253 tmpfs_node_init, tmpfs_node_fini, 254 &tmp->tm_node_zone_malloc_args); 255 256 tmp->tm_ino = TMPFS_ROOTINO; 257 258 /* Allocate the root node. */ 259 error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid, 260 root_mode & ALLPERMS, NULL, 261 VNOVAL, VNOVAL, &root); 262 263 /* 264 * We are backed by swap, set snocache chflags flag so we 265 * don't trip over swapcache. 266 */ 267 root->tn_flags = SF_NOCACHE; 268 269 if (error != 0 || root == NULL) { 270 objcache_destroy(tmp->tm_node_pool); 271 objcache_destroy(tmp->tm_dirent_pool); 272 kfree(tmp, M_TMPFSMNT); 273 return error; 274 } 275 KASSERT(root->tn_id == TMPFS_ROOTINO, 276 ("tmpfs root with invalid ino: %ju", (uintmax_t)root->tn_id)); 277 278 ++root->tn_links; /* prevent destruction */ 279 tmp->tm_root = root; 280 281 mp->mnt_flag |= MNT_LOCAL; 282 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 283 mp->mnt_kern_flag |= MNTK_NOMSYNC; 284 mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */ 285 mp->mnt_kern_flag |= MNTK_QUICKHALT; /* no teardown needed on halt */ 286 mp->mnt_data = (qaddr_t)tmp; 287 vfs_getnewfsid(mp); 288 289 vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops); 290 vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops); 291 292 copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 293 bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size); 294 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 295 copyinstr(path, mp->mnt_stat.f_mntonname, 296 sizeof(mp->mnt_stat.f_mntonname) -1, 297 &size); 298 299 tmpfs_statfs(mp, &mp->mnt_stat, cred); 300 301 return 0; 302 } 303 304 /* --------------------------------------------------------------------- */ 305 306 /* ARGSUSED2 */ 307 static int 308 tmpfs_unmount(struct mount *mp, int mntflags) 309 { 310 int error; 311 int flags = 0; 312 struct tmpfs_mount *tmp; 313 struct tmpfs_node *node; 314 struct vnode *vp; 315 int isok; 316 317 tmp = VFS_TO_TMPFS(mp); 318 TMPFS_LOCK(tmp); 319 320 /* Handle forced unmounts. */ 321 if (mntflags & MNT_FORCE) 322 flags |= FORCECLOSE; 323 324 /* 325 * Finalize all pending I/O. In the case of tmpfs we want 326 * to throw all the data away so clean out the buffer cache 327 * and vm objects before calling vflush(). 328 */ 329 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 330 /* 331 * tn_links is mnt_token protected 332 */ 333 TMPFS_NODE_LOCK(node); 334 ++node->tn_links; 335 336 while (node->tn_type == VREG && node->tn_vnode) { 337 vp = node->tn_vnode; 338 vhold(vp); 339 TMPFS_NODE_UNLOCK(node); 340 lwkt_yield(); 341 342 /* 343 * vx_get/vx_put and tmpfs_truncate may block, 344 * releasing the tmpfs mountpoint token. 345 * 346 * Make sure the lock order is correct. 347 */ 348 vx_get(vp); /* held vnode */ 349 TMPFS_NODE_LOCK(node); 350 if (node->tn_vnode == vp) { 351 tmpfs_truncate(vp, 0); 352 isok = 1; 353 } else { 354 isok = 0; 355 } 356 TMPFS_NODE_UNLOCK(node); 357 vx_put(vp); 358 vdrop(vp); 359 TMPFS_NODE_LOCK(node); 360 if (isok) 361 break; 362 /* retry */ 363 } 364 365 --node->tn_links; 366 TMPFS_NODE_UNLOCK(node); 367 } 368 369 /* 370 * Flush all vnodes on the unmount. 371 * 372 * If we fail to flush, we cannot unmount, but all the nodes have 373 * already been truncated. Erroring out is the best we can do. 374 */ 375 error = vflush(mp, 0, flags); 376 if (error != 0) { 377 TMPFS_UNLOCK(tmp); 378 return (error); 379 } 380 381 /* 382 * First pass get rid of all the directory entries and 383 * vnode associations. This will also destroy the 384 * directory topology and should drop all link counts 385 * to 0 except for the root. 386 * 387 * No vnodes should remain after the vflush above. 388 */ 389 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 390 lwkt_yield(); 391 392 TMPFS_NODE_LOCK(node); 393 ++node->tn_links; 394 if (node->tn_type == VDIR) { 395 struct tmpfs_dirent *de; 396 397 while ((de = RB_ROOT(&node->tn_dir.tn_dirtree)) != NULL) { 398 tmpfs_dir_detach(node, de); 399 tmpfs_free_dirent(tmp, de); 400 } 401 } 402 KKASSERT(node->tn_vnode == NULL); 403 404 --node->tn_links; 405 TMPFS_NODE_UNLOCK(node); 406 } 407 408 /* 409 * Allow the root node to be destroyed by dropping the link count 410 * we bumped in the mount code. 411 */ 412 KKASSERT(tmp->tm_root); 413 TMPFS_NODE_LOCK(tmp->tm_root); 414 --tmp->tm_root->tn_links; 415 TMPFS_NODE_UNLOCK(tmp->tm_root); 416 417 /* 418 * At this point all nodes, including the root node, should have a 419 * link count of 0. The root is not necessarily going to be last. 420 */ 421 while ((node = LIST_FIRST(&tmp->tm_nodes_used)) != NULL) { 422 if (node->tn_links) 423 panic("tmpfs: Dangling nodes during umount (%p)!\n", 424 node); 425 426 TMPFS_NODE_LOCK(node); 427 tmpfs_free_node(tmp, node); 428 /* eats lock */ 429 lwkt_yield(); 430 } 431 KKASSERT(tmp->tm_root == NULL); 432 433 objcache_destroy(tmp->tm_dirent_pool); 434 objcache_destroy(tmp->tm_node_pool); 435 436 kmalloc_destroy(&tmp->tm_name_zone); 437 kmalloc_destroy(&tmp->tm_dirent_zone); 438 kmalloc_destroy(&tmp->tm_node_zone); 439 440 tmp->tm_node_zone = tmp->tm_dirent_zone = NULL; 441 442 KKASSERT(tmp->tm_pages_used == 0); 443 KKASSERT(tmp->tm_nodes_inuse == 0); 444 445 TMPFS_UNLOCK(tmp); 446 447 /* Throw away the tmpfs_mount structure. */ 448 kfree(tmp, M_TMPFSMNT); 449 mp->mnt_data = NULL; 450 451 mp->mnt_flag &= ~MNT_LOCAL; 452 return 0; 453 } 454 455 /* --------------------------------------------------------------------- */ 456 457 static int 458 tmpfs_root(struct mount *mp, struct vnode **vpp) 459 { 460 struct tmpfs_mount *tmp; 461 int error; 462 463 tmp = VFS_TO_TMPFS(mp); 464 if (tmp->tm_root == NULL) { 465 kprintf("tmpfs_root: called without root node %p\n", mp); 466 print_backtrace(-1); 467 *vpp = NULL; 468 error = EINVAL; 469 } else { 470 error = tmpfs_alloc_vp(mp, NULL, tmp->tm_root, 471 LK_EXCLUSIVE, vpp); 472 (*vpp)->v_flag |= VROOT; 473 (*vpp)->v_type = VDIR; 474 } 475 return error; 476 } 477 478 /* --------------------------------------------------------------------- */ 479 480 static int 481 tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, 482 struct vnode **vpp) 483 { 484 boolean_t found; 485 struct tmpfs_fid *tfhp; 486 struct tmpfs_mount *tmp; 487 struct tmpfs_node *node; 488 int rc; 489 490 tmp = VFS_TO_TMPFS(mp); 491 492 tfhp = (struct tmpfs_fid *) fhp; 493 if (tfhp->tf_len != sizeof(struct tmpfs_fid)) 494 return EINVAL; 495 496 rc = EINVAL; 497 found = FALSE; 498 499 TMPFS_LOCK(tmp); 500 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 501 if (node->tn_id == tfhp->tf_id && 502 node->tn_gen == tfhp->tf_gen) { 503 found = TRUE; 504 break; 505 } 506 } 507 508 if (found) 509 rc = tmpfs_alloc_vp(mp, NULL, node, LK_EXCLUSIVE, vpp); 510 511 TMPFS_UNLOCK(tmp); 512 513 return (rc); 514 } 515 516 /* --------------------------------------------------------------------- */ 517 518 /* ARGSUSED2 */ 519 static int 520 tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 521 { 522 fsfilcnt_t freenodes; 523 struct tmpfs_mount *tmp; 524 525 tmp = VFS_TO_TMPFS(mp); 526 527 TMPFS_LOCK(tmp); 528 sbp->f_iosize = PAGE_SIZE; 529 sbp->f_bsize = PAGE_SIZE; 530 531 sbp->f_blocks = tmp->tm_pages_max; 532 sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used; 533 sbp->f_bfree = sbp->f_bavail; 534 535 freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse; 536 537 sbp->f_files = freenodes + tmp->tm_nodes_inuse; 538 sbp->f_ffree = freenodes; 539 sbp->f_owner = tmp->tm_root->tn_uid; 540 541 TMPFS_UNLOCK(tmp); 542 543 return 0; 544 } 545 546 /* --------------------------------------------------------------------- */ 547 548 static int 549 tmpfs_vptofh(struct vnode *vp, struct fid *fhp) 550 { 551 struct tmpfs_node *node; 552 struct tmpfs_fid tfh; 553 node = VP_TO_TMPFS_NODE(vp); 554 memset(&tfh, 0, sizeof(tfh)); 555 tfh.tf_len = sizeof(struct tmpfs_fid); 556 tfh.tf_gen = node->tn_gen; 557 tfh.tf_id = node->tn_id; 558 memcpy(fhp, &tfh, sizeof(tfh)); 559 return (0); 560 } 561 562 /* --------------------------------------------------------------------- */ 563 564 static int 565 tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp, 566 struct ucred **credanonp) 567 { 568 struct tmpfs_mount *tmp; 569 struct netcred *nc; 570 571 tmp = (struct tmpfs_mount *) mp->mnt_data; 572 nc = vfs_export_lookup(mp, &tmp->tm_export, nam); 573 if (nc == NULL) 574 return (EACCES); 575 576 *exflagsp = nc->netc_exflags; 577 *credanonp = &nc->netc_anon; 578 579 return (0); 580 } 581 582 /* --------------------------------------------------------------------- */ 583 584 /* 585 * tmpfs vfs operations. 586 */ 587 588 static struct vfsops tmpfs_vfsops = { 589 .vfs_mount = tmpfs_mount, 590 .vfs_unmount = tmpfs_unmount, 591 .vfs_root = tmpfs_root, 592 .vfs_statfs = tmpfs_statfs, 593 .vfs_fhtovp = tmpfs_fhtovp, 594 .vfs_vptofh = tmpfs_vptofh, 595 .vfs_checkexp = tmpfs_checkexp, 596 }; 597 598 VFS_SET(tmpfs_vfsops, tmpfs, VFCF_MPSAFE); 599 MODULE_VERSION(tmpfs, 1); 600