1 /* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system. 35 * 36 * tmpfs is a file system that uses virtual memory to store file data and 37 * metadata efficiently. It does not follow the structure of an on-disk 38 * file system because it simply does not need to. Instead, it uses 39 * memory-specific data structures and algorithms to automatically 40 * allocate and release resources. 41 */ 42 43 #include <sys/conf.h> 44 #include <sys/param.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/mutex.h> 48 #include <sys/kernel.h> 49 #include <sys/stat.h> 50 #include <sys/systm.h> 51 #include <sys/sysctl.h> 52 #include <sys/objcache.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_object.h> 56 #include <vm/vm_param.h> 57 58 #if 0 59 #include <vfs/tmpfs/tmpfs.h> 60 #endif 61 #include "tmpfs.h" 62 #include <vfs/tmpfs/tmpfs_vnops.h> 63 #include <vfs/tmpfs/tmpfs_args.h> 64 65 /* 66 * Default permission for root node 67 */ 68 #define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 69 70 MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures"); 71 72 /* --------------------------------------------------------------------- */ 73 74 static int tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *); 75 static int tmpfs_unmount(struct mount *, int); 76 static int tmpfs_root(struct mount *, struct vnode **); 77 static int tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **); 78 static int tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred); 79 80 /* --------------------------------------------------------------------- */ 81 boolean_t 82 tmpfs_node_ctor(void *obj, void *privdata, int flags) 83 { 84 struct tmpfs_node *node = obj; 85 86 node->tn_gen++; 87 node->tn_size = 0; 88 node->tn_status = 0; 89 node->tn_flags = 0; 90 node->tn_links = 0; 91 node->tn_vnode = NULL; 92 node->tn_vpstate = TMPFS_VNODE_WANT; 93 bzero(&node->tn_spec, sizeof(node->tn_spec)); 94 95 return (TRUE); 96 } 97 98 static void 99 tmpfs_node_dtor(void *obj, void *privdata) 100 { 101 struct tmpfs_node *node = (struct tmpfs_node *)obj; 102 node->tn_type = VNON; 103 node->tn_vpstate = TMPFS_VNODE_DOOMED; 104 } 105 106 static void * 107 tmpfs_node_init(void *args, int flags) 108 { 109 struct tmpfs_node *node = objcache_malloc_alloc(args, flags); 110 if (node == NULL) 111 return (NULL); 112 node->tn_id = 0; 113 114 lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE); 115 node->tn_gen = karc4random(); 116 117 return node; 118 } 119 120 static void 121 tmpfs_node_fini(void *obj, void *args) 122 { 123 struct tmpfs_node *node = (struct tmpfs_node *)obj; 124 lockuninit(&node->tn_interlock); 125 objcache_malloc_free(obj, args); 126 } 127 128 static int 129 tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 130 { 131 struct tmpfs_mount *tmp; 132 struct tmpfs_node *root; 133 struct tmpfs_args args; 134 vm_pindex_t pages; 135 vm_pindex_t pages_limit; 136 ino_t nodes; 137 u_int64_t maxfsize; 138 int error; 139 /* Size counters. */ 140 ino_t nodes_max; 141 off_t size_max; 142 size_t maxfsize_max; 143 size_t size; 144 145 /* Root node attributes. */ 146 uid_t root_uid = cred->cr_uid; 147 gid_t root_gid = cred->cr_gid; 148 mode_t root_mode = (VREAD | VWRITE); 149 150 if (mp->mnt_flag & MNT_UPDATE) { 151 /* XXX: There is no support yet to update file system 152 * settings. Should be added. */ 153 154 return EOPNOTSUPP; 155 } 156 157 /* 158 * mount info 159 */ 160 bzero(&args, sizeof(args)); 161 size_max = 0; 162 nodes_max = 0; 163 maxfsize_max = 0; 164 165 if (path) { 166 if (data) { 167 error = copyin(data, &args, sizeof(args)); 168 if (error) 169 return (error); 170 } 171 size_max = args.ta_size_max; 172 nodes_max = args.ta_nodes_max; 173 maxfsize_max = args.ta_maxfsize_max; 174 root_uid = args.ta_root_uid; 175 root_gid = args.ta_root_gid; 176 root_mode = args.ta_root_mode; 177 } 178 179 /* 180 * If mount by non-root, then verify that user has necessary 181 * permissions on the device. 182 */ 183 if (cred->cr_uid != 0) { 184 root_mode = VREAD; 185 if ((mp->mnt_flag & MNT_RDONLY) == 0) 186 root_mode |= VWRITE; 187 } 188 189 pages_limit = vm_swap_max + vmstats.v_page_count / 2; 190 191 if (size_max == 0) { 192 pages = pages_limit / 2; 193 } else if (size_max < PAGE_SIZE) { 194 pages = 1; 195 } else if (OFF_TO_IDX(size_max) > pages_limit) { 196 /* 197 * do not force pages = pages_limit for this case, otherwise 198 * we might not honor tmpfs size requests from /etc/fstab 199 * during boot because they are mounted prior to swap being 200 * turned on. 201 */ 202 pages = OFF_TO_IDX(size_max); 203 } else { 204 pages = OFF_TO_IDX(size_max); 205 } 206 207 if (nodes_max == 0) 208 nodes = 3 + pages * PAGE_SIZE / 1024; 209 else if (nodes_max < 3) 210 nodes = 3; 211 else if (nodes_max > pages) 212 nodes = pages; 213 else 214 nodes = nodes_max; 215 216 maxfsize = IDX_TO_OFF(pages_limit); 217 if (maxfsize_max != 0 && maxfsize > maxfsize_max) 218 maxfsize = maxfsize_max; 219 220 /* Allocate the tmpfs mount structure and fill it. */ 221 tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO); 222 223 tmp->tm_mount = mp; 224 tmp->tm_nodes_max = nodes; 225 tmp->tm_nodes_inuse = 0; 226 tmp->tm_maxfilesize = maxfsize; 227 LIST_INIT(&tmp->tm_nodes_used); 228 229 tmp->tm_pages_max = pages; 230 tmp->tm_pages_used = 0; 231 232 kmalloc_create(&tmp->tm_node_zone, "tmpfs node"); 233 kmalloc_create(&tmp->tm_dirent_zone, "tmpfs dirent"); 234 kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone"); 235 236 kmalloc_raise_limit(tmp->tm_node_zone, sizeof(struct tmpfs_node) * 237 tmp->tm_nodes_max); 238 239 tmp->tm_node_zone_malloc_args.objsize = sizeof(struct tmpfs_node); 240 tmp->tm_node_zone_malloc_args.mtype = tmp->tm_node_zone; 241 242 tmp->tm_dirent_zone_malloc_args.objsize = sizeof(struct tmpfs_dirent); 243 tmp->tm_dirent_zone_malloc_args.mtype = tmp->tm_dirent_zone; 244 245 tmp->tm_dirent_pool = objcache_create( "tmpfs dirent cache", 246 0, 0, 247 NULL, NULL, NULL, 248 objcache_malloc_alloc, objcache_malloc_free, 249 &tmp->tm_dirent_zone_malloc_args); 250 tmp->tm_node_pool = objcache_create( "tmpfs node cache", 251 0, 0, 252 tmpfs_node_ctor, tmpfs_node_dtor, NULL, 253 tmpfs_node_init, tmpfs_node_fini, 254 &tmp->tm_node_zone_malloc_args); 255 256 tmp->tm_ino = TMPFS_ROOTINO; 257 258 /* Allocate the root node. */ 259 error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid, 260 root_mode & ALLPERMS, NULL, 261 VNOVAL, VNOVAL, &root); 262 263 /* 264 * We are backed by swap, set snocache chflags flag so we 265 * don't trip over swapcache. 266 */ 267 root->tn_flags = SF_NOCACHE; 268 269 if (error != 0 || root == NULL) { 270 objcache_destroy(tmp->tm_node_pool); 271 objcache_destroy(tmp->tm_dirent_pool); 272 kfree(tmp, M_TMPFSMNT); 273 return error; 274 } 275 KASSERT(root->tn_id == TMPFS_ROOTINO, 276 ("tmpfs root with invalid ino: %ju", (uintmax_t)root->tn_id)); 277 278 ++root->tn_links; /* prevent destruction */ 279 tmp->tm_root = root; 280 281 mp->mnt_flag |= MNT_LOCAL; 282 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 283 mp->mnt_kern_flag |= MNTK_NOMSYNC; 284 mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */ 285 mp->mnt_data = (qaddr_t)tmp; 286 vfs_getnewfsid(mp); 287 288 vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops); 289 vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops); 290 291 copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 292 bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size); 293 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 294 copyinstr(path, mp->mnt_stat.f_mntonname, 295 sizeof(mp->mnt_stat.f_mntonname) -1, 296 &size); 297 298 tmpfs_statfs(mp, &mp->mnt_stat, cred); 299 300 return 0; 301 } 302 303 /* --------------------------------------------------------------------- */ 304 305 /* ARGSUSED2 */ 306 static int 307 tmpfs_unmount(struct mount *mp, int mntflags) 308 { 309 int error; 310 int flags = 0; 311 struct tmpfs_mount *tmp; 312 struct tmpfs_node *node; 313 struct vnode *vp; 314 int isok; 315 316 tmp = VFS_TO_TMPFS(mp); 317 TMPFS_LOCK(tmp); 318 319 /* Handle forced unmounts. */ 320 if (mntflags & MNT_FORCE) 321 flags |= FORCECLOSE; 322 323 /* 324 * Finalize all pending I/O. In the case of tmpfs we want 325 * to throw all the data away so clean out the buffer cache 326 * and vm objects before calling vflush(). 327 */ 328 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 329 /* 330 * tn_links is mnt_token protected 331 */ 332 TMPFS_NODE_LOCK(node); 333 ++node->tn_links; 334 TMPFS_NODE_UNLOCK(node); 335 336 while (node->tn_type == VREG && node->tn_vnode) { 337 vp = node->tn_vnode; 338 vhold(vp); 339 lwkt_yield(); 340 341 /* 342 * vx_get/vx_put and tmpfs_truncate may block, 343 * releasing the tmpfs mountpoint token. 344 * 345 * Make sure the lock order is correct. 346 */ 347 vx_get(vp); /* held vnode */ 348 TMPFS_NODE_LOCK(node); 349 if (node->tn_vnode == vp) { 350 tmpfs_truncate(vp, 0); 351 isok = 1; 352 } else { 353 isok = 0; 354 } 355 TMPFS_NODE_UNLOCK(node); 356 vx_put(vp); 357 vdrop(vp); 358 if (isok) 359 break; 360 /* retry */ 361 } 362 363 TMPFS_NODE_LOCK(node); 364 --node->tn_links; 365 TMPFS_NODE_UNLOCK(node); 366 } 367 368 /* 369 * Flush all vnodes on the unmount. 370 * 371 * If we fail to flush, we cannot unmount, but all the nodes have 372 * already been truncated. Erroring out is the best we can do. 373 */ 374 error = vflush(mp, 0, flags); 375 if (error != 0) { 376 TMPFS_UNLOCK(tmp); 377 return (error); 378 } 379 380 /* 381 * First pass get rid of all the directory entries and 382 * vnode associations. This will also destroy the 383 * directory topology and should drop all link counts 384 * to 0 except for the root. 385 * 386 * No vnodes should remain after the vflush above. 387 */ 388 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 389 lwkt_yield(); 390 391 TMPFS_NODE_LOCK(node); 392 ++node->tn_links; 393 if (node->tn_type == VDIR) { 394 struct tmpfs_dirent *de; 395 396 while ((de = RB_ROOT(&node->tn_dir.tn_dirtree)) != NULL) { 397 tmpfs_dir_detach(node, de); 398 tmpfs_free_dirent(tmp, de); 399 } 400 } 401 KKASSERT(node->tn_vnode == NULL); 402 403 --node->tn_links; 404 TMPFS_NODE_UNLOCK(node); 405 } 406 407 /* 408 * Allow the root node to be destroyed by dropping the link count 409 * we bumped in the mount code. 410 */ 411 KKASSERT(tmp->tm_root); 412 TMPFS_NODE_LOCK(tmp->tm_root); 413 --tmp->tm_root->tn_links; 414 TMPFS_NODE_UNLOCK(tmp->tm_root); 415 416 /* 417 * At this point all nodes, including the root node, should have a 418 * link count of 0. The root is not necessarily going to be last. 419 */ 420 while ((node = LIST_FIRST(&tmp->tm_nodes_used)) != NULL) { 421 if (node->tn_links) 422 panic("tmpfs: Dangling nodes during umount (%p)!\n", 423 node); 424 425 TMPFS_NODE_LOCK(node); 426 tmpfs_free_node(tmp, node); 427 /* eats lock */ 428 lwkt_yield(); 429 } 430 KKASSERT(tmp->tm_root == NULL); 431 432 objcache_destroy(tmp->tm_dirent_pool); 433 objcache_destroy(tmp->tm_node_pool); 434 435 kmalloc_destroy(&tmp->tm_name_zone); 436 kmalloc_destroy(&tmp->tm_dirent_zone); 437 kmalloc_destroy(&tmp->tm_node_zone); 438 439 tmp->tm_node_zone = tmp->tm_dirent_zone = NULL; 440 441 KKASSERT(tmp->tm_pages_used == 0); 442 KKASSERT(tmp->tm_nodes_inuse == 0); 443 444 TMPFS_UNLOCK(tmp); 445 446 /* Throw away the tmpfs_mount structure. */ 447 kfree(tmp, M_TMPFSMNT); 448 mp->mnt_data = NULL; 449 450 mp->mnt_flag &= ~MNT_LOCAL; 451 return 0; 452 } 453 454 /* --------------------------------------------------------------------- */ 455 456 static int 457 tmpfs_root(struct mount *mp, struct vnode **vpp) 458 { 459 struct tmpfs_mount *tmp; 460 int error; 461 462 tmp = VFS_TO_TMPFS(mp); 463 if (tmp->tm_root == NULL) { 464 kprintf("tmpfs_root: called without root node %p\n", mp); 465 print_backtrace(-1); 466 *vpp = NULL; 467 error = EINVAL; 468 } else { 469 error = tmpfs_alloc_vp(mp, tmp->tm_root, LK_EXCLUSIVE, vpp); 470 (*vpp)->v_flag |= VROOT; 471 (*vpp)->v_type = VDIR; 472 } 473 return error; 474 } 475 476 /* --------------------------------------------------------------------- */ 477 478 static int 479 tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, 480 struct vnode **vpp) 481 { 482 boolean_t found; 483 struct tmpfs_fid *tfhp; 484 struct tmpfs_mount *tmp; 485 struct tmpfs_node *node; 486 int rc; 487 488 tmp = VFS_TO_TMPFS(mp); 489 490 tfhp = (struct tmpfs_fid *) fhp; 491 if (tfhp->tf_len != sizeof(struct tmpfs_fid)) 492 return EINVAL; 493 494 if (tfhp->tf_id >= tmp->tm_nodes_max) 495 return EINVAL; 496 497 rc = EINVAL; 498 found = FALSE; 499 500 TMPFS_LOCK(tmp); 501 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 502 if (node->tn_id == tfhp->tf_id && 503 node->tn_gen == tfhp->tf_gen) { 504 found = TRUE; 505 break; 506 } 507 } 508 509 if (found) 510 rc = tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp); 511 512 TMPFS_UNLOCK(tmp); 513 514 return (rc); 515 } 516 517 /* --------------------------------------------------------------------- */ 518 519 /* ARGSUSED2 */ 520 static int 521 tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 522 { 523 fsfilcnt_t freenodes; 524 struct tmpfs_mount *tmp; 525 526 tmp = VFS_TO_TMPFS(mp); 527 528 TMPFS_LOCK(tmp); 529 sbp->f_iosize = PAGE_SIZE; 530 sbp->f_bsize = PAGE_SIZE; 531 532 sbp->f_blocks = tmp->tm_pages_max; 533 sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used; 534 sbp->f_bfree = sbp->f_bavail; 535 536 freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse; 537 538 sbp->f_files = freenodes + tmp->tm_nodes_inuse; 539 sbp->f_ffree = freenodes; 540 sbp->f_owner = tmp->tm_root->tn_uid; 541 542 TMPFS_UNLOCK(tmp); 543 544 return 0; 545 } 546 547 /* --------------------------------------------------------------------- */ 548 549 static int 550 tmpfs_vptofh(struct vnode *vp, struct fid *fhp) 551 { 552 struct tmpfs_node *node; 553 struct tmpfs_fid tfh; 554 node = VP_TO_TMPFS_NODE(vp); 555 memset(&tfh, 0, sizeof(tfh)); 556 tfh.tf_len = sizeof(struct tmpfs_fid); 557 tfh.tf_gen = node->tn_gen; 558 tfh.tf_id = node->tn_id; 559 memcpy(fhp, &tfh, sizeof(tfh)); 560 return (0); 561 } 562 563 /* --------------------------------------------------------------------- */ 564 565 static int 566 tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp, 567 struct ucred **credanonp) 568 { 569 struct tmpfs_mount *tmp; 570 struct netcred *nc; 571 572 tmp = (struct tmpfs_mount *) mp->mnt_data; 573 nc = vfs_export_lookup(mp, &tmp->tm_export, nam); 574 if (nc == NULL) 575 return (EACCES); 576 577 *exflagsp = nc->netc_exflags; 578 *credanonp = &nc->netc_anon; 579 580 return (0); 581 } 582 583 /* --------------------------------------------------------------------- */ 584 585 /* 586 * tmpfs vfs operations. 587 */ 588 589 static struct vfsops tmpfs_vfsops = { 590 .vfs_mount = tmpfs_mount, 591 .vfs_unmount = tmpfs_unmount, 592 .vfs_root = tmpfs_root, 593 .vfs_statfs = tmpfs_statfs, 594 .vfs_fhtovp = tmpfs_fhtovp, 595 .vfs_vptofh = tmpfs_vptofh, 596 .vfs_checkexp = tmpfs_checkexp, 597 }; 598 599 VFS_SET(tmpfs_vfsops, tmpfs, VFCF_MPSAFE); 600 MODULE_VERSION(tmpfs, 1); 601