1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 37 #include <sys/kernel.h> 38 #include <sys/param.h> 39 #include <sys/namei.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/spinlock2.h> 43 #include <sys/stat.h> 44 #include <sys/systm.h> 45 #include <sys/vnode.h> 46 #include <sys/vmmeter.h> 47 48 #include <vm/vm.h> 49 #include <vm/vm_object.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_pager.h> 52 #include <vm/vm_extern.h> 53 54 #include <vfs/tmpfs/tmpfs.h> 55 #include <vfs/tmpfs/tmpfs_vnops.h> 56 57 static ino_t tmpfs_fetch_ino(struct tmpfs_mount *); 58 static int tmpfs_dirtree_compare(struct tmpfs_dirent *a, 59 struct tmpfs_dirent *b); 60 61 RB_GENERATE(tmpfs_dirtree, tmpfs_dirent, rb_node, tmpfs_dirtree_compare); 62 63 64 /* --------------------------------------------------------------------- */ 65 66 /* 67 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 68 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 69 * using the credentials of the process 'p'. 70 * 71 * If the node type is set to 'VDIR', then the parent parameter must point 72 * to the parent directory of the node being created. It may only be NULL 73 * while allocating the root node. 74 * 75 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 76 * specifies the device the node represents. 77 * 78 * If the node type is set to 'VLNK', then the parameter target specifies 79 * the file name of the target file for the symbolic link that is being 80 * created. 81 * 82 * Note that new nodes are retrieved from the available list if it has 83 * items or, if it is empty, from the node pool as long as there is enough 84 * space to create them. 85 * 86 * Returns zero on success or an appropriate error code on failure. 87 */ 88 int 89 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 90 uid_t uid, gid_t gid, mode_t mode, 91 char *target, int rmajor, int rminor, 92 struct tmpfs_node **node) 93 { 94 struct tmpfs_node *nnode; 95 struct timespec ts; 96 udev_t rdev; 97 98 KKASSERT(IFF(type == VLNK, target != NULL)); 99 KKASSERT(IFF(type == VBLK || type == VCHR, rmajor != VNOVAL)); 100 101 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 102 return (ENOSPC); 103 104 nnode = objcache_get(tmp->tm_node_pool, M_WAITOK | M_NULLOK); 105 if (nnode == NULL) 106 return (ENOSPC); 107 108 /* Generic initialization. */ 109 nnode->tn_type = type; 110 vfs_timestamp(&ts); 111 nnode->tn_ctime = nnode->tn_mtime = nnode->tn_atime 112 = ts.tv_sec; 113 nnode->tn_ctimensec = nnode->tn_mtimensec = nnode->tn_atimensec 114 = ts.tv_nsec; 115 nnode->tn_uid = uid; 116 nnode->tn_gid = gid; 117 nnode->tn_mode = mode; 118 nnode->tn_id = tmpfs_fetch_ino(tmp); 119 nnode->tn_advlock.init_done = 0; 120 KKASSERT(nnode->tn_links == 0); 121 122 /* Type-specific initialization. */ 123 switch (nnode->tn_type) { 124 case VBLK: 125 case VCHR: 126 rdev = makeudev(rmajor, rminor); 127 if (rdev == NOUDEV) { 128 objcache_put(tmp->tm_node_pool, nnode); 129 return(EINVAL); 130 } 131 nnode->tn_rdev = rdev; 132 break; 133 134 case VDIR: 135 RB_INIT(&nnode->tn_dir.tn_dirtree); 136 nnode->tn_dir.tn_readdir_lastn = 0; 137 nnode->tn_dir.tn_readdir_lastp = NULL; 138 nnode->tn_size = 0; 139 break; 140 141 case VFIFO: 142 /* FALLTHROUGH */ 143 case VSOCK: 144 break; 145 146 case VLNK: 147 nnode->tn_size = strlen(target); 148 nnode->tn_link = kmalloc(nnode->tn_size + 1, tmp->tm_name_zone, 149 M_WAITOK | M_NULLOK); 150 if (nnode->tn_link == NULL) { 151 objcache_put(tmp->tm_node_pool, nnode); 152 return (ENOSPC); 153 } 154 bcopy(target, nnode->tn_link, nnode->tn_size); 155 nnode->tn_link[nnode->tn_size] = '\0'; 156 break; 157 158 case VREG: 159 nnode->tn_reg.tn_aobj = 160 swap_pager_alloc(NULL, 0, VM_PROT_DEFAULT, 0); 161 nnode->tn_reg.tn_aobj_pages = 0; 162 nnode->tn_size = 0; 163 break; 164 165 default: 166 panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); 167 } 168 169 TMPFS_NODE_LOCK(nnode); 170 TMPFS_LOCK(tmp); 171 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 172 tmp->tm_nodes_inuse++; 173 TMPFS_UNLOCK(tmp); 174 TMPFS_NODE_UNLOCK(nnode); 175 176 *node = nnode; 177 return 0; 178 } 179 180 /* --------------------------------------------------------------------- */ 181 182 /* 183 * Destroys the node pointed to by node from the file system 'tmp'. 184 * If the node does not belong to the given mount point, the results are 185 * unpredicted. 186 * 187 * If the node references a directory; no entries are allowed because 188 * their removal could need a recursive algorithm, something forbidden in 189 * kernel space. Furthermore, there is not need to provide such 190 * functionality (recursive removal) because the only primitives offered 191 * to the user are the removal of empty directories and the deletion of 192 * individual files. 193 * 194 * Note that nodes are not really deleted; in fact, when a node has been 195 * allocated, it cannot be deleted during the whole life of the file 196 * system. Instead, they are moved to the available list and remain there 197 * until reused. 198 */ 199 void 200 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 201 { 202 vm_pindex_t pages = 0; 203 204 #ifdef INVARIANTS 205 TMPFS_ASSERT_ELOCKED(node); 206 KKASSERT(node->tn_vnode == NULL); 207 KKASSERT((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 208 #endif 209 210 TMPFS_LOCK(tmp); 211 LIST_REMOVE(node, tn_entries); 212 tmp->tm_nodes_inuse--; 213 TMPFS_UNLOCK(tmp); 214 TMPFS_NODE_UNLOCK(node); 215 216 switch (node->tn_type) { 217 case VNON: 218 /* Do not do anything. VNON is provided to let the 219 * allocation routine clean itself easily by avoiding 220 * duplicating code in it. */ 221 /* FALLTHROUGH */ 222 case VBLK: 223 /* FALLTHROUGH */ 224 case VCHR: 225 /* FALLTHROUGH */ 226 break; 227 case VDIR: 228 /* 229 * The parent link can be NULL if this is the root 230 * node or if it is a directory node that was rmdir'd. 231 * 232 * XXX what if node is a directory which still contains 233 * directory entries (e.g. due to a forced umount) ? 234 */ 235 node->tn_size = 0; 236 KKASSERT(node->tn_dir.tn_parent == NULL); 237 238 /* 239 * If the root node is being destroyed don't leave a 240 * dangling pointer in tmpfs_mount. 241 */ 242 if (node == tmp->tm_root) 243 tmp->tm_root = NULL; 244 break; 245 case VFIFO: 246 /* FALLTHROUGH */ 247 case VSOCK: 248 break; 249 250 case VLNK: 251 kfree(node->tn_link, tmp->tm_name_zone); 252 node->tn_link = NULL; 253 node->tn_size = 0; 254 break; 255 256 case VREG: 257 if (node->tn_reg.tn_aobj != NULL) 258 vm_object_deallocate(node->tn_reg.tn_aobj); 259 node->tn_reg.tn_aobj = NULL; 260 pages = node->tn_reg.tn_aobj_pages; 261 break; 262 263 default: 264 panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); 265 } 266 267 /* 268 * Clean up fields for the next allocation. The objcache only ctors 269 * new allocations. 270 */ 271 tmpfs_node_ctor(node, NULL, 0); 272 objcache_put(tmp->tm_node_pool, node); 273 /* node is now invalid */ 274 275 TMPFS_LOCK(tmp); 276 tmp->tm_pages_used -= pages; 277 TMPFS_UNLOCK(tmp); 278 } 279 280 /* --------------------------------------------------------------------- */ 281 282 /* 283 * Allocates a new directory entry for the node node with a name of name. 284 * The new directory entry is returned in *de. 285 * 286 * The link count of node is increased by one to reflect the new object 287 * referencing it. 288 * 289 * Returns zero on success or an appropriate error code on failure. 290 */ 291 int 292 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 293 const char *name, uint16_t len, struct tmpfs_dirent **de) 294 { 295 struct tmpfs_dirent *nde; 296 297 nde = objcache_get(tmp->tm_dirent_pool, M_WAITOK); 298 nde->td_name = kmalloc(len + 1, tmp->tm_name_zone, M_WAITOK | M_NULLOK); 299 if (nde->td_name == NULL) { 300 objcache_put(tmp->tm_dirent_pool, nde); 301 *de = NULL; 302 return (ENOSPC); 303 } 304 nde->td_namelen = len; 305 bcopy(name, nde->td_name, len); 306 nde->td_name[len] = '\0'; 307 308 nde->td_node = node; 309 310 TMPFS_NODE_LOCK(node); 311 node->tn_links++; 312 TMPFS_NODE_UNLOCK(node); 313 314 *de = nde; 315 316 return 0; 317 } 318 319 /* --------------------------------------------------------------------- */ 320 321 /* 322 * Frees a directory entry. It is the caller's responsibility to destroy 323 * the node referenced by it if needed. 324 * 325 * The link count of node is decreased by one to reflect the removal of an 326 * object that referenced it. This only happens if 'node_exists' is true; 327 * otherwise the function will not access the node referred to by the 328 * directory entry, as it may already have been released from the outside. 329 */ 330 void 331 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 332 { 333 struct tmpfs_node *node; 334 335 node = de->td_node; 336 337 TMPFS_NODE_LOCK(node); 338 TMPFS_ASSERT_ELOCKED(node); 339 KKASSERT(node->tn_links > 0); 340 node->tn_links--; 341 TMPFS_NODE_UNLOCK(node); 342 343 kfree(de->td_name, tmp->tm_name_zone); 344 de->td_namelen = 0; 345 de->td_name = NULL; 346 de->td_node = NULL; 347 objcache_put(tmp->tm_dirent_pool, de); 348 } 349 350 /* --------------------------------------------------------------------- */ 351 352 /* 353 * Allocates a new vnode for the node node or returns a new reference to 354 * an existing one if the node had already a vnode referencing it. The 355 * resulting locked vnode is returned in *vpp. 356 * 357 * Returns zero on success or an appropriate error code on failure. 358 */ 359 int 360 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 361 struct vnode **vpp) 362 { 363 int error = 0; 364 struct vnode *vp; 365 366 loop: 367 /* 368 * Interlocked extraction from node. This can race many things. 369 * We have to get a soft reference on the vnode while we hold 370 * the node locked, then acquire it properly and check for races. 371 */ 372 TMPFS_NODE_LOCK(node); 373 if ((vp = node->tn_vnode) != NULL) { 374 KKASSERT((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 375 vhold_interlocked(vp); 376 TMPFS_NODE_UNLOCK(node); 377 378 if (vget(vp, lkflag | LK_EXCLUSIVE) != 0) { 379 vdrop(vp); 380 goto loop; 381 } 382 if (node->tn_vnode != vp) { 383 vput(vp); 384 vdrop(vp); 385 goto loop; 386 } 387 vdrop(vp); 388 goto out; 389 } 390 /* vp is NULL */ 391 392 /* 393 * This should never happen. 394 */ 395 if (node->tn_vpstate & TMPFS_VNODE_DOOMED) { 396 TMPFS_NODE_UNLOCK(node); 397 error = ENOENT; 398 goto out; 399 } 400 401 /* 402 * Interlock against other calls to tmpfs_alloc_vp() trying to 403 * allocate and assign a vp to node. 404 */ 405 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 406 node->tn_vpstate |= TMPFS_VNODE_WANT; 407 error = tsleep(&node->tn_vpstate, PINTERLOCKED | PCATCH, 408 "tmpfs_alloc_vp", 0); 409 TMPFS_NODE_UNLOCK(node); 410 if (error) 411 return error; 412 goto loop; 413 } 414 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 415 TMPFS_NODE_UNLOCK(node); 416 417 /* 418 * Allocate a new vnode (may block). The ALLOCATING flag should 419 * prevent a race against someone else assigning node->tn_vnode. 420 */ 421 error = getnewvnode(VT_TMPFS, mp, &vp, VLKTIMEOUT, LK_CANRECURSE); 422 if (error != 0) 423 goto unlock; 424 425 KKASSERT(node->tn_vnode == NULL); 426 KKASSERT(vp != NULL); 427 vp->v_data = node; 428 vp->v_type = node->tn_type; 429 430 /* Type-specific initialization. */ 431 switch (node->tn_type) { 432 case VBLK: 433 /* FALLTHROUGH */ 434 case VCHR: 435 /* FALLTHROUGH */ 436 case VSOCK: 437 break; 438 case VREG: 439 vinitvmio(vp, node->tn_size, BMASK, -1); 440 break; 441 case VLNK: 442 break; 443 case VFIFO: 444 vp->v_ops = &mp->mnt_vn_fifo_ops; 445 break; 446 case VDIR: 447 break; 448 449 default: 450 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 451 } 452 453 insmntque(vp, mp); 454 455 unlock: 456 TMPFS_NODE_LOCK(node); 457 458 KKASSERT(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 459 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 460 node->tn_vnode = vp; 461 462 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 463 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 464 TMPFS_NODE_UNLOCK(node); 465 wakeup(&node->tn_vpstate); 466 } else { 467 TMPFS_NODE_UNLOCK(node); 468 } 469 470 out: 471 *vpp = vp; 472 473 KKASSERT(IFF(error == 0, *vpp != NULL && vn_islocked(*vpp))); 474 #ifdef INVARIANTS 475 TMPFS_NODE_LOCK(node); 476 KKASSERT(*vpp == node->tn_vnode); 477 TMPFS_NODE_UNLOCK(node); 478 #endif 479 480 return error; 481 } 482 483 /* --------------------------------------------------------------------- */ 484 485 /* 486 * Destroys the association between the vnode vp and the node it 487 * references. 488 */ 489 void 490 tmpfs_free_vp(struct vnode *vp) 491 { 492 struct tmpfs_node *node; 493 494 node = VP_TO_TMPFS_NODE(vp); 495 496 TMPFS_NODE_LOCK(node); 497 KKASSERT(lockcount(TMPFS_NODE_MTX(node)) > 0); 498 node->tn_vnode = NULL; 499 vp->v_data = NULL; 500 TMPFS_NODE_UNLOCK(node); 501 } 502 503 /* --------------------------------------------------------------------- */ 504 505 /* 506 * Allocates a new file of type 'type' and adds it to the parent directory 507 * 'dvp'; this addition is done using the component name given in 'cnp'. 508 * The ownership of the new file is automatically assigned based on the 509 * credentials of the caller (through 'cnp'), the group is set based on 510 * the parent directory and the mode is determined from the 'vap' argument. 511 * If successful, *vpp holds a vnode to the newly created file and zero 512 * is returned. Otherwise *vpp is NULL and the function returns an 513 * appropriate error code. 514 */ 515 int 516 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 517 struct namecache *ncp, struct ucred *cred, char *target) 518 { 519 int error; 520 struct tmpfs_dirent *de; 521 struct tmpfs_mount *tmp; 522 struct tmpfs_node *dnode; 523 struct tmpfs_node *node; 524 525 tmp = VFS_TO_TMPFS(dvp->v_mount); 526 dnode = VP_TO_TMPFS_DIR(dvp); 527 *vpp = NULL; 528 529 /* 530 * If the directory was removed but a process was CD'd into it, 531 * we do not allow any more file/dir creation within it. Otherwise 532 * we will lose track of it. 533 */ 534 KKASSERT(dnode->tn_type == VDIR); 535 if (dnode != tmp->tm_root && dnode->tn_dir.tn_parent == NULL) 536 return ENOENT; 537 538 /* 539 * Make sure the link count does not overflow. 540 */ 541 if (vap->va_type == VDIR && dnode->tn_links >= LINK_MAX) 542 return EMLINK; 543 544 /* Allocate a node that represents the new file. */ 545 error = tmpfs_alloc_node(tmp, vap->va_type, cred->cr_uid, 546 dnode->tn_gid, vap->va_mode, target, 547 vap->va_rmajor, vap->va_rminor, &node); 548 if (error != 0) 549 return error; 550 TMPFS_NODE_LOCK(node); 551 552 /* Allocate a directory entry that points to the new file. */ 553 error = tmpfs_alloc_dirent(tmp, node, ncp->nc_name, ncp->nc_nlen, &de); 554 if (error != 0) { 555 tmpfs_free_node(tmp, node); 556 /* eats node lock */ 557 return error; 558 } 559 560 /* Allocate a vnode for the new file. */ 561 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 562 if (error != 0) { 563 tmpfs_free_dirent(tmp, de); 564 tmpfs_free_node(tmp, node); 565 /* eats node lock */ 566 return error; 567 } 568 569 /* 570 * Now that all required items are allocated, we can proceed to 571 * insert the new node into the directory, an operation that 572 * cannot fail. 573 */ 574 tmpfs_dir_attach(dnode, de); 575 TMPFS_NODE_UNLOCK(node); 576 577 return error; 578 } 579 580 /* --------------------------------------------------------------------- */ 581 582 /* 583 * Attaches the directory entry de to the directory represented by vp. 584 * Note that this does not change the link count of the node pointed by 585 * the directory entry, as this is done by tmpfs_alloc_dirent. 586 */ 587 void 588 tmpfs_dir_attach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 589 { 590 struct tmpfs_node *node = de->td_node; 591 592 TMPFS_NODE_LOCK(dnode); 593 if (node && node->tn_type == VDIR) { 594 TMPFS_NODE_LOCK(node); 595 ++node->tn_links; 596 node->tn_status |= TMPFS_NODE_CHANGED; 597 node->tn_dir.tn_parent = dnode; 598 ++dnode->tn_links; 599 TMPFS_NODE_UNLOCK(node); 600 } 601 RB_INSERT(tmpfs_dirtree, &dnode->tn_dir.tn_dirtree, de); 602 dnode->tn_size += sizeof(struct tmpfs_dirent); 603 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 604 TMPFS_NODE_MODIFIED; 605 TMPFS_NODE_UNLOCK(dnode); 606 } 607 608 /* --------------------------------------------------------------------- */ 609 610 /* 611 * Detaches the directory entry de from the directory represented by vp. 612 * Note that this does not change the link count of the node pointed by 613 * the directory entry, as this is done by tmpfs_free_dirent. 614 */ 615 void 616 tmpfs_dir_detach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 617 { 618 struct tmpfs_node *node = de->td_node; 619 620 TMPFS_NODE_LOCK(dnode); 621 if (dnode->tn_dir.tn_readdir_lastp == de) { 622 dnode->tn_dir.tn_readdir_lastn = 0; 623 dnode->tn_dir.tn_readdir_lastp = NULL; 624 } 625 RB_REMOVE(tmpfs_dirtree, &dnode->tn_dir.tn_dirtree, de); 626 dnode->tn_size -= sizeof(struct tmpfs_dirent); 627 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 628 TMPFS_NODE_MODIFIED; 629 TMPFS_NODE_UNLOCK(dnode); 630 631 /* 632 * Clean out the tn_parent pointer immediately when removing a 633 * directory. 634 * 635 * Removal of the parent linkage also cleans out the extra tn_links 636 * count we had on both node and dnode. 637 * 638 * node can be NULL (typ during a forced umount), in which case 639 * the mount code is dealing with the linkages from a linked list 640 * scan. 641 */ 642 if (node && node->tn_type == VDIR && node->tn_dir.tn_parent) { 643 TMPFS_NODE_LOCK(dnode); 644 TMPFS_NODE_LOCK(node); 645 KKASSERT(node->tn_dir.tn_parent == dnode); 646 dnode->tn_links--; 647 node->tn_links--; 648 node->tn_dir.tn_parent = NULL; 649 TMPFS_NODE_UNLOCK(node); 650 TMPFS_NODE_UNLOCK(dnode); 651 } 652 } 653 654 /* --------------------------------------------------------------------- */ 655 656 /* 657 * Looks for a directory entry in the directory represented by node. 658 * 'ncp' describes the name of the entry to look for. Note that the . 659 * and .. components are not allowed as they do not physically exist 660 * within directories. 661 * 662 * Returns a pointer to the entry when found, otherwise NULL. 663 */ 664 struct tmpfs_dirent * 665 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 666 struct namecache *ncp) 667 { 668 struct tmpfs_dirent *de; 669 int len = ncp->nc_nlen; 670 struct tmpfs_dirent wanted; 671 672 wanted.td_namelen = len; 673 wanted.td_name = ncp->nc_name; 674 675 TMPFS_VALIDATE_DIR(node); 676 677 de = RB_FIND(tmpfs_dirtree, &node->tn_dir.tn_dirtree, &wanted); 678 679 KKASSERT(f == NULL || f == de->td_node); 680 681 TMPFS_NODE_LOCK(node); 682 node->tn_status |= TMPFS_NODE_ACCESSED; 683 TMPFS_NODE_UNLOCK(node); 684 685 return de; 686 } 687 688 /* --------------------------------------------------------------------- */ 689 690 /* 691 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 692 * directory and returns it in the uio space. The function returns 0 693 * on success, -1 if there was not enough space in the uio structure to 694 * hold the directory entry or an appropriate error code if another 695 * error happens. 696 */ 697 int 698 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 699 { 700 int error; 701 struct dirent dent; 702 int dirsize; 703 704 TMPFS_VALIDATE_DIR(node); 705 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 706 707 dent.d_ino = node->tn_id; 708 dent.d_type = DT_DIR; 709 dent.d_namlen = 1; 710 dent.d_name[0] = '.'; 711 dent.d_name[1] = '\0'; 712 dirsize = _DIRENT_DIRSIZ(&dent); 713 714 if (dirsize > uio->uio_resid) 715 error = -1; 716 else { 717 error = uiomove((caddr_t)&dent, dirsize, uio); 718 if (error == 0) 719 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 720 } 721 722 TMPFS_NODE_LOCK(node); 723 node->tn_status |= TMPFS_NODE_ACCESSED; 724 TMPFS_NODE_UNLOCK(node); 725 726 return error; 727 } 728 729 /* --------------------------------------------------------------------- */ 730 731 /* 732 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 733 * directory and returns it in the uio space. The function returns 0 734 * on success, -1 if there was not enough space in the uio structure to 735 * hold the directory entry or an appropriate error code if another 736 * error happens. 737 */ 738 int 739 tmpfs_dir_getdotdotdent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 740 struct uio *uio) 741 { 742 int error; 743 struct dirent dent; 744 int dirsize; 745 746 TMPFS_VALIDATE_DIR(node); 747 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 748 749 if (node->tn_dir.tn_parent) { 750 TMPFS_NODE_LOCK(node->tn_dir.tn_parent); 751 dent.d_ino = node->tn_dir.tn_parent->tn_id; 752 TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent); 753 } else { 754 dent.d_ino = tmp->tm_root->tn_id; 755 } 756 757 dent.d_type = DT_DIR; 758 dent.d_namlen = 2; 759 dent.d_name[0] = '.'; 760 dent.d_name[1] = '.'; 761 dent.d_name[2] = '\0'; 762 dirsize = _DIRENT_DIRSIZ(&dent); 763 764 if (dirsize > uio->uio_resid) 765 error = -1; 766 else { 767 error = uiomove((caddr_t)&dent, dirsize, uio); 768 if (error == 0) { 769 struct tmpfs_dirent *de; 770 771 de = RB_MIN(tmpfs_dirtree, &node->tn_dir.tn_dirtree); 772 if (de == NULL) 773 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 774 else 775 uio->uio_offset = tmpfs_dircookie(de); 776 } 777 } 778 779 TMPFS_NODE_LOCK(node); 780 node->tn_status |= TMPFS_NODE_ACCESSED; 781 TMPFS_NODE_UNLOCK(node); 782 783 return error; 784 } 785 786 /* --------------------------------------------------------------------- */ 787 788 /* 789 * Lookup a directory entry by its associated cookie. 790 */ 791 struct tmpfs_dirent * 792 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 793 { 794 struct tmpfs_dirent *de; 795 796 if (cookie == node->tn_dir.tn_readdir_lastn && 797 node->tn_dir.tn_readdir_lastp != NULL) { 798 return node->tn_dir.tn_readdir_lastp; 799 } 800 801 RB_FOREACH(de, tmpfs_dirtree, &node->tn_dir.tn_dirtree) { 802 if (tmpfs_dircookie(de) == cookie) { 803 break; 804 } 805 } 806 807 return de; 808 } 809 810 /* --------------------------------------------------------------------- */ 811 812 /* 813 * Helper function for tmpfs_readdir. Returns as much directory entries 814 * as can fit in the uio space. The read starts at uio->uio_offset. 815 * The function returns 0 on success, -1 if there was not enough space 816 * in the uio structure to hold the directory entry or an appropriate 817 * error code if another error happens. 818 */ 819 int 820 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 821 { 822 int error; 823 off_t startcookie; 824 struct tmpfs_dirent *de; 825 826 TMPFS_VALIDATE_DIR(node); 827 828 /* Locate the first directory entry we have to return. We have cached 829 * the last readdir in the node, so use those values if appropriate. 830 * Otherwise do a linear scan to find the requested entry. */ 831 startcookie = uio->uio_offset; 832 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 833 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 834 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 835 return 0; 836 } else { 837 de = tmpfs_dir_lookupbycookie(node, startcookie); 838 } 839 if (de == NULL) { 840 return EINVAL; 841 } 842 843 /* Read as much entries as possible; i.e., until we reach the end of 844 * the directory or we exhaust uio space. */ 845 do { 846 struct dirent d; 847 int reclen; 848 849 /* Create a dirent structure representing the current 850 * tmpfs_node and fill it. */ 851 d.d_ino = de->td_node->tn_id; 852 switch (de->td_node->tn_type) { 853 case VBLK: 854 d.d_type = DT_BLK; 855 break; 856 857 case VCHR: 858 d.d_type = DT_CHR; 859 break; 860 861 case VDIR: 862 d.d_type = DT_DIR; 863 break; 864 865 case VFIFO: 866 d.d_type = DT_FIFO; 867 break; 868 869 case VLNK: 870 d.d_type = DT_LNK; 871 break; 872 873 case VREG: 874 d.d_type = DT_REG; 875 break; 876 877 case VSOCK: 878 d.d_type = DT_SOCK; 879 break; 880 881 default: 882 panic("tmpfs_dir_getdents: type %p %d", 883 de->td_node, (int)de->td_node->tn_type); 884 } 885 d.d_namlen = de->td_namelen; 886 KKASSERT(de->td_namelen < sizeof(d.d_name)); 887 bcopy(de->td_name, d.d_name, d.d_namlen); 888 d.d_name[d.d_namlen] = '\0'; 889 reclen = _DIRENT_RECLEN(d.d_namlen); 890 891 /* Stop reading if the directory entry we are treating is 892 * bigger than the amount of data that can be returned. */ 893 if (reclen > uio->uio_resid) { 894 error = -1; 895 break; 896 } 897 898 /* Copy the new dirent structure into the output buffer and 899 * advance pointers. */ 900 error = uiomove((caddr_t)&d, reclen, uio); 901 902 (*cntp)++; 903 de = RB_NEXT(tmpfs_dirtree, node->tn_dir.tn_dirtree, de); 904 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 905 906 /* Update the offset and cache. */ 907 if (de == NULL) { 908 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 909 node->tn_dir.tn_readdir_lastn = 0; 910 node->tn_dir.tn_readdir_lastp = NULL; 911 } else { 912 node->tn_dir.tn_readdir_lastn = uio->uio_offset = tmpfs_dircookie(de); 913 node->tn_dir.tn_readdir_lastp = de; 914 } 915 node->tn_status |= TMPFS_NODE_ACCESSED; 916 917 return error; 918 } 919 920 /* --------------------------------------------------------------------- */ 921 922 /* 923 * Resizes the aobj associated to the regular file pointed to by vp to 924 * the size newsize. 'vp' must point to a vnode that represents a regular 925 * file. 'newsize' must be positive. 926 * 927 * pass trivial as 1 when buf content will be overwritten, otherwise set 0 928 * to be zero filled. 929 * 930 * Returns zero on success or an appropriate error code on failure. 931 */ 932 int 933 tmpfs_reg_resize(struct vnode *vp, off_t newsize, int trivial) 934 { 935 int error; 936 vm_pindex_t newpages, oldpages; 937 struct tmpfs_mount *tmp; 938 struct tmpfs_node *node; 939 off_t oldsize; 940 941 #ifdef INVARIANTS 942 KKASSERT(vp->v_type == VREG); 943 KKASSERT(newsize >= 0); 944 #endif 945 946 node = VP_TO_TMPFS_NODE(vp); 947 tmp = VFS_TO_TMPFS(vp->v_mount); 948 949 /* Convert the old and new sizes to the number of pages needed to 950 * store them. It may happen that we do not need to do anything 951 * because the last allocated page can accommodate the change on 952 * its own. */ 953 oldsize = node->tn_size; 954 oldpages = round_page64(oldsize) / PAGE_SIZE; 955 KKASSERT(oldpages == node->tn_reg.tn_aobj_pages); 956 newpages = round_page64(newsize) / PAGE_SIZE; 957 958 if (newpages > oldpages && 959 tmp->tm_pages_used + newpages - oldpages > tmp->tm_pages_max) { 960 error = ENOSPC; 961 goto out; 962 } 963 964 TMPFS_LOCK(tmp); 965 tmp->tm_pages_used += (newpages - oldpages); 966 TMPFS_UNLOCK(tmp); 967 968 TMPFS_NODE_LOCK(node); 969 node->tn_reg.tn_aobj_pages = newpages; 970 node->tn_size = newsize; 971 TMPFS_NODE_UNLOCK(node); 972 973 /* 974 * When adjusting the vnode filesize and its VM object we must 975 * also adjust our backing VM object (aobj). The blocksize 976 * used must match the block sized we use for the buffer cache. 977 * 978 * The backing VM object contains no VM pages, only swap 979 * assignments. 980 */ 981 if (newsize < oldsize) { 982 vm_pindex_t osize; 983 vm_pindex_t nsize; 984 vm_object_t aobj; 985 986 error = nvtruncbuf(vp, newsize, BSIZE, -1, 0); 987 aobj = node->tn_reg.tn_aobj; 988 if (aobj) { 989 osize = aobj->size; 990 nsize = vp->v_object->size; 991 if (nsize < osize) { 992 aobj->size = osize; 993 swap_pager_freespace(aobj, nsize, 994 osize - nsize); 995 } 996 } 997 } else { 998 vm_object_t aobj; 999 1000 error = nvextendbuf(vp, oldsize, newsize, BSIZE, BSIZE, 1001 -1, -1, trivial); 1002 aobj = node->tn_reg.tn_aobj; 1003 if (aobj) 1004 aobj->size = vp->v_object->size; 1005 } 1006 1007 out: 1008 return error; 1009 } 1010 1011 /* --------------------------------------------------------------------- */ 1012 1013 /* 1014 * Change flags of the given vnode. 1015 * Caller should execute tmpfs_update on vp after a successful execution. 1016 * The vnode must be locked on entry and remain locked on exit. 1017 */ 1018 int 1019 tmpfs_chflags(struct vnode *vp, int vaflags, struct ucred *cred) 1020 { 1021 int error; 1022 struct tmpfs_node *node; 1023 int flags; 1024 1025 KKASSERT(vn_islocked(vp)); 1026 1027 node = VP_TO_TMPFS_NODE(vp); 1028 flags = node->tn_flags; 1029 1030 /* Disallow this operation if the file system is mounted read-only. */ 1031 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1032 return EROFS; 1033 error = vop_helper_setattr_flags(&flags, vaflags, node->tn_uid, cred); 1034 1035 /* 1036 * Unprivileged processes are not permitted to unset system 1037 * flags, or modify flags if any system flags are set. 1038 * 1039 * Silently enforce SF_NOCACHE on the root tmpfs vnode so 1040 * tmpfs data is not double-cached by swapcache. 1041 */ 1042 if (error == 0) { 1043 TMPFS_NODE_LOCK(node); 1044 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 1045 if (vp->v_flag & VROOT) 1046 flags |= SF_NOCACHE; 1047 node->tn_flags = flags; 1048 } else { 1049 if (node->tn_flags & (SF_NOUNLINK | SF_IMMUTABLE | 1050 SF_APPEND) || 1051 (flags & UF_SETTABLE) != flags) { 1052 error = EPERM; 1053 } else { 1054 node->tn_flags &= SF_SETTABLE; 1055 node->tn_flags |= (flags & UF_SETTABLE); 1056 } 1057 } 1058 node->tn_status |= TMPFS_NODE_CHANGED; 1059 TMPFS_NODE_UNLOCK(node); 1060 } 1061 1062 KKASSERT(vn_islocked(vp)); 1063 1064 return error; 1065 } 1066 1067 /* --------------------------------------------------------------------- */ 1068 1069 /* 1070 * Change access mode on the given vnode. 1071 * Caller should execute tmpfs_update on vp after a successful execution. 1072 * The vnode must be locked on entry and remain locked on exit. 1073 */ 1074 int 1075 tmpfs_chmod(struct vnode *vp, mode_t vamode, struct ucred *cred) 1076 { 1077 struct tmpfs_node *node; 1078 mode_t cur_mode; 1079 int error; 1080 1081 KKASSERT(vn_islocked(vp)); 1082 1083 node = VP_TO_TMPFS_NODE(vp); 1084 1085 /* Disallow this operation if the file system is mounted read-only. */ 1086 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1087 return EROFS; 1088 1089 /* Immutable or append-only files cannot be modified, either. */ 1090 if (node->tn_flags & (IMMUTABLE | APPEND)) 1091 return EPERM; 1092 1093 cur_mode = node->tn_mode; 1094 error = vop_helper_chmod(vp, vamode, cred, node->tn_uid, node->tn_gid, 1095 &cur_mode); 1096 1097 if (error == 0 && 1098 (node->tn_mode & ALLPERMS) != (cur_mode & ALLPERMS)) { 1099 TMPFS_NODE_LOCK(node); 1100 node->tn_mode &= ~ALLPERMS; 1101 node->tn_mode |= cur_mode & ALLPERMS; 1102 1103 node->tn_status |= TMPFS_NODE_CHANGED; 1104 TMPFS_NODE_UNLOCK(node); 1105 } 1106 1107 KKASSERT(vn_islocked(vp)); 1108 1109 return 0; 1110 } 1111 1112 /* --------------------------------------------------------------------- */ 1113 1114 /* 1115 * Change ownership of the given vnode. At least one of uid or gid must 1116 * be different than VNOVAL. If one is set to that value, the attribute 1117 * is unchanged. 1118 * Caller should execute tmpfs_update on vp after a successful execution. 1119 * The vnode must be locked on entry and remain locked on exit. 1120 */ 1121 int 1122 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred) 1123 { 1124 mode_t cur_mode; 1125 uid_t cur_uid; 1126 gid_t cur_gid; 1127 struct tmpfs_node *node; 1128 int error; 1129 1130 KKASSERT(vn_islocked(vp)); 1131 node = VP_TO_TMPFS_NODE(vp); 1132 1133 /* Disallow this operation if the file system is mounted read-only. */ 1134 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1135 return EROFS; 1136 1137 /* Immutable or append-only files cannot be modified, either. */ 1138 if (node->tn_flags & (IMMUTABLE | APPEND)) 1139 return EPERM; 1140 1141 cur_uid = node->tn_uid; 1142 cur_gid = node->tn_gid; 1143 cur_mode = node->tn_mode; 1144 error = vop_helper_chown(vp, uid, gid, cred, 1145 &cur_uid, &cur_gid, &cur_mode); 1146 1147 if (error == 0) { 1148 TMPFS_NODE_LOCK(node); 1149 if (cur_uid != node->tn_uid || 1150 cur_gid != node->tn_gid || 1151 cur_mode != node->tn_mode) { 1152 node->tn_uid = cur_uid; 1153 node->tn_gid = cur_gid; 1154 node->tn_mode = cur_mode; 1155 node->tn_status |= TMPFS_NODE_CHANGED; 1156 } 1157 TMPFS_NODE_UNLOCK(node); 1158 } 1159 1160 return error; 1161 } 1162 1163 /* --------------------------------------------------------------------- */ 1164 1165 /* 1166 * Change size of the given vnode. 1167 * Caller should execute tmpfs_update on vp after a successful execution. 1168 * The vnode must be locked on entry and remain locked on exit. 1169 */ 1170 int 1171 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred) 1172 { 1173 int error; 1174 struct tmpfs_node *node; 1175 1176 KKASSERT(vn_islocked(vp)); 1177 1178 node = VP_TO_TMPFS_NODE(vp); 1179 1180 /* Decide whether this is a valid operation based on the file type. */ 1181 error = 0; 1182 switch (vp->v_type) { 1183 case VDIR: 1184 return EISDIR; 1185 1186 case VREG: 1187 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1188 return EROFS; 1189 break; 1190 1191 case VBLK: 1192 /* FALLTHROUGH */ 1193 case VCHR: 1194 /* FALLTHROUGH */ 1195 case VFIFO: 1196 /* Allow modifications of special files even if in the file 1197 * system is mounted read-only (we are not modifying the 1198 * files themselves, but the objects they represent). */ 1199 return 0; 1200 1201 default: 1202 /* Anything else is unsupported. */ 1203 return EOPNOTSUPP; 1204 } 1205 1206 /* Immutable or append-only files cannot be modified, either. */ 1207 if (node->tn_flags & (IMMUTABLE | APPEND)) 1208 return EPERM; 1209 1210 error = tmpfs_truncate(vp, size); 1211 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1212 * for us, as will update tn_status; no need to do that here. */ 1213 1214 KKASSERT(vn_islocked(vp)); 1215 1216 return error; 1217 } 1218 1219 /* --------------------------------------------------------------------- */ 1220 1221 /* 1222 * Change access and modification times of the given vnode. 1223 * Caller should execute tmpfs_update on vp after a successful execution. 1224 * The vnode must be locked on entry and remain locked on exit. 1225 */ 1226 int 1227 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1228 int vaflags, struct ucred *cred) 1229 { 1230 struct tmpfs_node *node; 1231 1232 KKASSERT(vn_islocked(vp)); 1233 1234 node = VP_TO_TMPFS_NODE(vp); 1235 1236 /* Disallow this operation if the file system is mounted read-only. */ 1237 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1238 return EROFS; 1239 1240 /* Immutable or append-only files cannot be modified, either. */ 1241 if (node->tn_flags & (IMMUTABLE | APPEND)) 1242 return EPERM; 1243 1244 TMPFS_NODE_LOCK(node); 1245 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1246 node->tn_status |= TMPFS_NODE_ACCESSED; 1247 1248 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1249 node->tn_status |= TMPFS_NODE_MODIFIED; 1250 1251 TMPFS_NODE_UNLOCK(node); 1252 1253 tmpfs_itimes(vp, atime, mtime); 1254 1255 KKASSERT(vn_islocked(vp)); 1256 1257 return 0; 1258 } 1259 1260 /* --------------------------------------------------------------------- */ 1261 /* Sync timestamps */ 1262 void 1263 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1264 const struct timespec *mod) 1265 { 1266 struct tmpfs_node *node; 1267 struct timespec now; 1268 1269 node = VP_TO_TMPFS_NODE(vp); 1270 1271 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1272 TMPFS_NODE_CHANGED)) == 0) 1273 return; 1274 1275 vfs_timestamp(&now); 1276 1277 TMPFS_NODE_LOCK(node); 1278 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1279 if (acc == NULL) 1280 acc = &now; 1281 node->tn_atime = acc->tv_sec; 1282 node->tn_atimensec = acc->tv_nsec; 1283 } 1284 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1285 if (mod == NULL) 1286 mod = &now; 1287 node->tn_mtime = mod->tv_sec; 1288 node->tn_mtimensec = mod->tv_nsec; 1289 } 1290 if (node->tn_status & TMPFS_NODE_CHANGED) { 1291 node->tn_ctime = now.tv_sec; 1292 node->tn_ctimensec = now.tv_nsec; 1293 } 1294 node->tn_status &= 1295 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1296 TMPFS_NODE_UNLOCK(node); 1297 } 1298 1299 /* --------------------------------------------------------------------- */ 1300 1301 void 1302 tmpfs_update(struct vnode *vp) 1303 { 1304 1305 tmpfs_itimes(vp, NULL, NULL); 1306 } 1307 1308 /* --------------------------------------------------------------------- */ 1309 1310 int 1311 tmpfs_truncate(struct vnode *vp, off_t length) 1312 { 1313 int error; 1314 struct tmpfs_node *node; 1315 1316 node = VP_TO_TMPFS_NODE(vp); 1317 1318 if (length < 0) { 1319 error = EINVAL; 1320 goto out; 1321 } 1322 1323 if (node->tn_size == length) { 1324 error = 0; 1325 goto out; 1326 } 1327 1328 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 1329 return (EFBIG); 1330 1331 1332 error = tmpfs_reg_resize(vp, length, 1); 1333 1334 if (error == 0) { 1335 TMPFS_NODE_LOCK(node); 1336 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1337 TMPFS_NODE_UNLOCK(node); 1338 } 1339 1340 out: 1341 tmpfs_update(vp); 1342 1343 return error; 1344 } 1345 1346 /* --------------------------------------------------------------------- */ 1347 1348 static ino_t 1349 tmpfs_fetch_ino(struct tmpfs_mount *tmp) 1350 { 1351 ino_t ret; 1352 1353 ret = tmp->tm_ino++; 1354 1355 return (ret); 1356 } 1357 1358 static int 1359 tmpfs_dirtree_compare(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 1360 { 1361 if (a->td_namelen > b->td_namelen) 1362 return 1; 1363 else if (a->td_namelen < b->td_namelen) 1364 return -1; 1365 else 1366 return strncmp(a->td_name, b->td_name, a->td_namelen); 1367 } 1368