1 /* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system. 35 * 36 * tmpfs is a file system that uses virtual memory to store file data and 37 * metadata efficiently. It does not follow the structure of an on-disk 38 * file system because it simply does not need to. Instead, it uses 39 * memory-specific data structures and algorithms to automatically 40 * allocate and release resources. 41 */ 42 43 #include <sys/conf.h> 44 #include <sys/param.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/kernel.h> 48 #include <sys/stat.h> 49 #include <sys/systm.h> 50 #include <sys/sysctl.h> 51 52 #include <vm/vm.h> 53 #include <vm/vm_object.h> 54 #include <vm/vm_param.h> 55 56 #if 0 57 #include <vfs/tmpfs/tmpfs.h> 58 #endif 59 #include "tmpfs.h" 60 #include <vfs/tmpfs/tmpfs_vnops.h> 61 #include <vfs/tmpfs/tmpfs_mount.h> 62 63 /* 64 * Default permission for root node 65 */ 66 #define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 67 68 MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures"); 69 70 /* --------------------------------------------------------------------- */ 71 72 static int tmpfs_mount(struct mount *, char *, caddr_t, struct ucred *); 73 static int tmpfs_unmount(struct mount *, int); 74 static int tmpfs_root(struct mount *, struct vnode **); 75 static int tmpfs_fhtovp(struct mount *, struct vnode *, struct fid *, struct vnode **); 76 static int tmpfs_statfs(struct mount *, struct statfs *, struct ucred *cred); 77 78 /* --------------------------------------------------------------------- */ 79 80 void 81 tmpfs_node_init(struct tmpfs_node *node) 82 { 83 node->tn_blksize = PAGE_SIZE; /* start small */ 84 lockinit(&node->tn_interlock, "tmpfs node interlock", 0, LK_CANRECURSE); 85 node->tn_gen = karc4random(); 86 } 87 88 void 89 tmpfs_node_uninit(struct tmpfs_node *node) 90 { 91 node->tn_type = VNON; 92 node->tn_vpstate = TMPFS_VNODE_DOOMED; 93 lockuninit(&node->tn_interlock); 94 } 95 96 static int 97 tmpfs_mount(struct mount *mp, char *path, caddr_t data, struct ucred *cred) 98 { 99 struct tmpfs_mount *tmp; 100 struct tmpfs_node *root; 101 struct tmpfs_mount_info args; 102 vm_pindex_t pages; 103 vm_pindex_t pages_limit; 104 ino_t nodes; 105 u_int64_t maxfsize; 106 int error; 107 /* Size counters. */ 108 ino_t nodes_max; 109 off_t size_max; 110 size_t maxfsize_max; 111 size_t size; 112 113 /* Root node attributes. */ 114 uid_t root_uid = cred->cr_uid; 115 gid_t root_gid = cred->cr_gid; 116 mode_t root_mode = (VREAD | VWRITE); 117 118 if (mp->mnt_flag & MNT_UPDATE) { 119 /* XXX: There is no support yet to update file system 120 * settings. Should be added. */ 121 122 return EOPNOTSUPP; 123 } 124 125 /* 126 * mount info 127 */ 128 bzero(&args, sizeof(args)); 129 size_max = 0; 130 nodes_max = 0; 131 maxfsize_max = 0; 132 133 if (path) { 134 if (data) { 135 error = copyin(data, &args, sizeof(args)); 136 if (error) 137 return (error); 138 } 139 size_max = args.ta_size_max; 140 nodes_max = args.ta_nodes_max; 141 maxfsize_max = args.ta_maxfsize_max; 142 root_uid = args.ta_root_uid; 143 root_gid = args.ta_root_gid; 144 root_mode = args.ta_root_mode; 145 } 146 147 /* 148 * If mount by non-root, then verify that user has necessary 149 * permissions on the device. 150 */ 151 if (cred->cr_uid != 0) { 152 root_mode = VREAD; 153 if ((mp->mnt_flag & MNT_RDONLY) == 0) 154 root_mode |= VWRITE; 155 } 156 157 pages_limit = vm_swap_max + vmstats.v_page_count / 2; 158 159 if (size_max == 0) { 160 pages = pages_limit / 2; 161 } else if (size_max < PAGE_SIZE) { 162 pages = 1; 163 } else if (OFF_TO_IDX(size_max) > pages_limit) { 164 /* 165 * do not force pages = pages_limit for this case, otherwise 166 * we might not honor tmpfs size requests from /etc/fstab 167 * during boot because they are mounted prior to swap being 168 * turned on. 169 */ 170 pages = OFF_TO_IDX(size_max); 171 } else { 172 pages = OFF_TO_IDX(size_max); 173 } 174 175 if (nodes_max == 0) 176 nodes = 3 + pages * PAGE_SIZE / 1024; 177 else if (nodes_max < 3) 178 nodes = 3; 179 else if (nodes_max > pages) 180 nodes = pages; 181 else 182 nodes = nodes_max; 183 184 maxfsize = 0x7FFFFFFFFFFFFFFFLLU - TMPFS_BLKSIZE; 185 if (maxfsize_max != 0 && maxfsize > maxfsize_max) 186 maxfsize = maxfsize_max; 187 188 /* Allocate the tmpfs mount structure and fill it. */ 189 tmp = kmalloc(sizeof(*tmp), M_TMPFSMNT, M_WAITOK | M_ZERO); 190 191 tmp->tm_mount = mp; 192 tmp->tm_nodes_max = nodes; 193 tmp->tm_nodes_inuse = 0; 194 tmp->tm_maxfilesize = maxfsize; 195 LIST_INIT(&tmp->tm_nodes_used); 196 197 tmp->tm_pages_max = pages; 198 tmp->tm_pages_used = 0; 199 200 kmalloc_create_obj(&tmp->tm_node_zone, "tmpfs node", 201 sizeof(struct tmpfs_node)); 202 kmalloc_create_obj(&tmp->tm_dirent_zone, "tmpfs dirent", 203 sizeof(struct tmpfs_dirent)); 204 kmalloc_create(&tmp->tm_name_zone, "tmpfs name zone"); 205 206 kmalloc_obj_raise_limit(tmp->tm_node_zone, 207 sizeof(struct tmpfs_node) * tmp->tm_nodes_max); 208 209 tmp->tm_ino = TMPFS_ROOTINO; 210 211 /* Allocate the root node. */ 212 error = tmpfs_alloc_node(tmp, VDIR, root_uid, root_gid, 213 root_mode & ALLPERMS, NULL, 214 VNOVAL, VNOVAL, &root); 215 216 /* 217 * We are backed by swap, set snocache chflags flag so we 218 * don't trip over swapcache. 219 */ 220 root->tn_flags = SF_NOCACHE; 221 222 if (error != 0 || root == NULL) { 223 kmalloc_destroy(&tmp->tm_name_zone); 224 kmalloc_destroy(&tmp->tm_dirent_zone_obj); 225 kmalloc_destroy(&tmp->tm_node_zone_obj); 226 kfree(tmp, M_TMPFSMNT); 227 return error; 228 } 229 KASSERT(root->tn_id == TMPFS_ROOTINO, 230 ("tmpfs root with invalid ino: %ju", (uintmax_t)root->tn_id)); 231 232 atomic_add_int(&root->tn_links, 1); /* keep around */ 233 tmp->tm_root = root; 234 235 mp->mnt_flag |= MNT_LOCAL; 236 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 237 mp->mnt_kern_flag |= MNTK_NOMSYNC; 238 mp->mnt_kern_flag |= MNTK_THR_SYNC; /* new vsyncscan semantics */ 239 mp->mnt_kern_flag |= MNTK_QUICKHALT; /* no teardown needed on halt */ 240 mp->mnt_data = (qaddr_t)tmp; 241 mp->mnt_iosize_max = MAXBSIZE; 242 vfs_getnewfsid(mp); 243 244 vfs_add_vnodeops(mp, &tmpfs_vnode_vops, &mp->mnt_vn_norm_ops); 245 vfs_add_vnodeops(mp, &tmpfs_fifo_vops, &mp->mnt_vn_fifo_ops); 246 247 copystr("tmpfs", mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); 248 bzero(mp->mnt_stat.f_mntfromname +size, MNAMELEN - size); 249 bzero(mp->mnt_stat.f_mntonname, sizeof(mp->mnt_stat.f_mntonname)); 250 copyinstr(path, mp->mnt_stat.f_mntonname, 251 sizeof(mp->mnt_stat.f_mntonname) -1, 252 &size); 253 254 tmpfs_statfs(mp, &mp->mnt_stat, cred); 255 256 return 0; 257 } 258 259 /* --------------------------------------------------------------------- */ 260 261 /* ARGSUSED2 */ 262 static int 263 tmpfs_unmount(struct mount *mp, int mntflags) 264 { 265 int error; 266 int flags = 0; 267 struct tmpfs_mount *tmp; 268 struct tmpfs_node *node; 269 struct vnode *vp; 270 int isok; 271 272 tmp = VFS_TO_TMPFS(mp); 273 TMPFS_LOCK(tmp); 274 275 /* Handle forced unmounts. */ 276 if (mntflags & MNT_FORCE) 277 flags |= FORCECLOSE; 278 279 /* 280 * Finalize all pending I/O. In the case of tmpfs we want 281 * to throw all the data away so clean out the buffer cache 282 * and vm objects before calling vflush(). 283 */ 284 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 285 /* 286 * tn_links is mnt_token protected 287 */ 288 atomic_add_int(&node->tn_links, 1); 289 TMPFS_NODE_LOCK(node); 290 291 while (node->tn_type == VREG && node->tn_vnode) { 292 vp = node->tn_vnode; 293 vhold(vp); 294 TMPFS_NODE_UNLOCK(node); 295 lwkt_yield(); 296 297 /* 298 * vx_get/vx_put and tmpfs_truncate may block, 299 * releasing the tmpfs mountpoint token. 300 * 301 * Make sure the lock order is correct. 302 */ 303 vx_get(vp); /* held vnode */ 304 TMPFS_NODE_LOCK(node); 305 if (node->tn_vnode == vp) { 306 tmpfs_truncate(vp, 0); 307 isok = 1; 308 } else { 309 isok = 0; 310 } 311 TMPFS_NODE_UNLOCK(node); 312 vx_put(vp); 313 vdrop(vp); 314 TMPFS_NODE_LOCK(node); 315 if (isok) 316 break; 317 /* retry */ 318 } 319 320 TMPFS_NODE_UNLOCK(node); 321 atomic_add_int(&node->tn_links, -1); 322 } 323 324 /* 325 * Flush all vnodes on the unmount. 326 * 327 * If we fail to flush, we cannot unmount, but all the nodes have 328 * already been truncated. Erroring out is the best we can do. 329 */ 330 error = vflush(mp, 0, flags); 331 if (error != 0) { 332 TMPFS_UNLOCK(tmp); 333 return (error); 334 } 335 336 /* 337 * First pass get rid of all the directory entries and 338 * vnode associations. This will also destroy the 339 * directory topology and should drop all link counts 340 * to 0 except for the root. 341 * 342 * No vnodes should remain after the vflush above. 343 */ 344 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 345 lwkt_yield(); 346 347 atomic_add_int(&node->tn_links, 1); 348 TMPFS_NODE_LOCK(node); 349 if (node->tn_type == VDIR) { 350 struct tmpfs_dirent *de; 351 352 while ((de = RB_ROOT(&node->tn_dir.tn_dirtree)) != NULL) { 353 tmpfs_dir_detach_locked(node, de); 354 tmpfs_free_dirent(tmp, de); 355 } 356 } 357 KKASSERT(node->tn_vnode == NULL); 358 359 TMPFS_NODE_UNLOCK(node); 360 atomic_add_int(&node->tn_links, -1); 361 } 362 363 /* 364 * Allow the root node to be destroyed by dropping the link count 365 * we bumped in the mount code. 366 */ 367 KKASSERT(tmp->tm_root); 368 TMPFS_NODE_LOCK(tmp->tm_root); 369 atomic_add_int(&tmp->tm_root->tn_links, -1); 370 TMPFS_NODE_UNLOCK(tmp->tm_root); 371 372 /* 373 * At this point all nodes, including the root node, should have a 374 * link count of 0. The root is not necessarily going to be last. 375 */ 376 while ((node = LIST_FIRST(&tmp->tm_nodes_used)) != NULL) { 377 if (node->tn_links) { 378 panic("tmpfs: Dangling nodes during umount (%p)!\n", 379 node); 380 } 381 382 TMPFS_NODE_LOCK(node); 383 tmpfs_free_node(tmp, node); 384 /* eats lock */ 385 lwkt_yield(); 386 } 387 KKASSERT(tmp->tm_root == NULL); 388 389 kmalloc_destroy(&tmp->tm_name_zone); 390 kmalloc_destroy(&tmp->tm_dirent_zone_obj); 391 kmalloc_destroy(&tmp->tm_node_zone_obj); 392 393 tmp->tm_node_zone_obj = NULL; 394 tmp->tm_dirent_zone_obj = NULL; 395 396 KKASSERT(tmp->tm_pages_used == 0); 397 KKASSERT(tmp->tm_nodes_inuse == 0); 398 399 TMPFS_UNLOCK(tmp); 400 401 /* Throw away the tmpfs_mount structure. */ 402 kfree(tmp, M_TMPFSMNT); 403 mp->mnt_data = NULL; 404 405 mp->mnt_flag &= ~MNT_LOCAL; 406 return 0; 407 } 408 409 /* --------------------------------------------------------------------- */ 410 411 static int 412 tmpfs_root(struct mount *mp, struct vnode **vpp) 413 { 414 struct tmpfs_mount *tmp; 415 int error; 416 417 tmp = VFS_TO_TMPFS(mp); 418 if (tmp->tm_root == NULL) { 419 kprintf("tmpfs_root: called without root node %p\n", mp); 420 print_backtrace(-1); 421 *vpp = NULL; 422 error = EINVAL; 423 } else { 424 error = tmpfs_alloc_vp(mp, NULL, tmp->tm_root, 425 LK_EXCLUSIVE, vpp); 426 (*vpp)->v_flag |= VROOT; 427 (*vpp)->v_type = VDIR; 428 } 429 return error; 430 } 431 432 /* --------------------------------------------------------------------- */ 433 434 static int 435 tmpfs_fhtovp(struct mount *mp, struct vnode *rootvp, struct fid *fhp, 436 struct vnode **vpp) 437 { 438 boolean_t found; 439 struct tmpfs_fid *tfhp; 440 struct tmpfs_mount *tmp; 441 struct tmpfs_node *node; 442 int rc; 443 444 tmp = VFS_TO_TMPFS(mp); 445 446 tfhp = (struct tmpfs_fid *) fhp; 447 if (tfhp->tf_len != sizeof(struct tmpfs_fid)) 448 return EINVAL; 449 450 rc = EINVAL; 451 found = FALSE; 452 453 TMPFS_LOCK(tmp); 454 LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { 455 if (node->tn_id == tfhp->tf_id && 456 node->tn_gen == tfhp->tf_gen) { 457 found = TRUE; 458 break; 459 } 460 } 461 462 if (found) 463 rc = tmpfs_alloc_vp(mp, NULL, node, LK_EXCLUSIVE, vpp); 464 465 TMPFS_UNLOCK(tmp); 466 467 return (rc); 468 } 469 470 /* --------------------------------------------------------------------- */ 471 472 /* ARGSUSED2 */ 473 static int 474 tmpfs_statfs(struct mount *mp, struct statfs *sbp, struct ucred *cred) 475 { 476 fsfilcnt_t freenodes; 477 struct tmpfs_mount *tmp; 478 479 tmp = VFS_TO_TMPFS(mp); 480 481 /* TMPFS_LOCK(tmp); not really needed */ 482 483 sbp->f_iosize = PAGE_SIZE; 484 sbp->f_bsize = PAGE_SIZE; 485 486 sbp->f_blocks = tmp->tm_pages_max; 487 sbp->f_bavail = tmp->tm_pages_max - tmp->tm_pages_used; 488 sbp->f_bfree = sbp->f_bavail; 489 490 freenodes = tmp->tm_nodes_max - tmp->tm_nodes_inuse; 491 492 sbp->f_files = freenodes + tmp->tm_nodes_inuse; 493 sbp->f_ffree = freenodes; 494 sbp->f_owner = tmp->tm_root->tn_uid; 495 496 /* TMPFS_UNLOCK(tmp); */ 497 498 return 0; 499 } 500 501 /* --------------------------------------------------------------------- */ 502 503 static int 504 tmpfs_vptofh(struct vnode *vp, struct fid *fhp) 505 { 506 struct tmpfs_node *node; 507 struct tmpfs_fid tfh; 508 node = VP_TO_TMPFS_NODE(vp); 509 memset(&tfh, 0, sizeof(tfh)); 510 tfh.tf_len = sizeof(struct tmpfs_fid); 511 tfh.tf_gen = node->tn_gen; 512 tfh.tf_id = node->tn_id; 513 memcpy(fhp, &tfh, sizeof(tfh)); 514 return (0); 515 } 516 517 /* --------------------------------------------------------------------- */ 518 519 static int 520 tmpfs_checkexp(struct mount *mp, struct sockaddr *nam, int *exflagsp, 521 struct ucred **credanonp) 522 { 523 struct tmpfs_mount *tmp; 524 struct netcred *nc; 525 526 tmp = (struct tmpfs_mount *) mp->mnt_data; 527 nc = vfs_export_lookup(mp, &tmp->tm_export, nam); 528 if (nc == NULL) 529 return (EACCES); 530 531 *exflagsp = nc->netc_exflags; 532 *credanonp = &nc->netc_anon; 533 534 return (0); 535 } 536 537 /* --------------------------------------------------------------------- */ 538 539 /* 540 * tmpfs vfs operations. 541 */ 542 543 static struct vfsops tmpfs_vfsops = { 544 .vfs_flags = 0, 545 .vfs_mount = tmpfs_mount, 546 .vfs_unmount = tmpfs_unmount, 547 .vfs_root = tmpfs_root, 548 .vfs_statfs = tmpfs_statfs, 549 .vfs_fhtovp = tmpfs_fhtovp, 550 .vfs_vptofh = tmpfs_vptofh, 551 .vfs_checkexp = tmpfs_checkexp, 552 }; 553 554 VFS_SET(tmpfs_vfsops, tmpfs, VFCF_MPSAFE); 555 MODULE_VERSION(tmpfs, 1); 556