1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 37 #include <sys/kernel.h> 38 #include <sys/param.h> 39 #include <sys/namei.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/spinlock2.h> 43 #include <sys/stat.h> 44 #include <sys/systm.h> 45 #include <sys/vnode.h> 46 #include <sys/vmmeter.h> 47 48 #include <vm/vm.h> 49 #include <vm/vm_object.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_pager.h> 52 #include <vm/vm_extern.h> 53 54 #include <vfs/tmpfs/tmpfs.h> 55 #include <vfs/tmpfs/tmpfs_vnops.h> 56 57 static ino_t tmpfs_fetch_ino(struct tmpfs_mount *); 58 59 static int tmpfs_dirtree_compare(struct tmpfs_dirent *a, 60 struct tmpfs_dirent *b); 61 RB_GENERATE(tmpfs_dirtree, tmpfs_dirent, rb_node, tmpfs_dirtree_compare); 62 63 static int tmpfs_dirtree_compare_cookie(struct tmpfs_dirent *a, 64 struct tmpfs_dirent *b); 65 RB_GENERATE(tmpfs_dirtree_cookie, tmpfs_dirent, 66 rb_cookienode, tmpfs_dirtree_compare_cookie); 67 68 69 /* --------------------------------------------------------------------- */ 70 71 /* 72 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 73 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 74 * using the credentials of the process 'p'. 75 * 76 * If the node type is set to 'VDIR', then the parent parameter must point 77 * to the parent directory of the node being created. It may only be NULL 78 * while allocating the root node. 79 * 80 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 81 * specifies the device the node represents. 82 * 83 * If the node type is set to 'VLNK', then the parameter target specifies 84 * the file name of the target file for the symbolic link that is being 85 * created. 86 * 87 * Note that new nodes are retrieved from the available list if it has 88 * items or, if it is empty, from the node pool as long as there is enough 89 * space to create them. 90 * 91 * Returns zero on success or an appropriate error code on failure. 92 */ 93 int 94 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 95 uid_t uid, gid_t gid, mode_t mode, 96 char *target, int rmajor, int rminor, 97 struct tmpfs_node **node) 98 { 99 struct tmpfs_node *nnode; 100 struct timespec ts; 101 udev_t rdev; 102 103 KKASSERT(IFF(type == VLNK, target != NULL)); 104 KKASSERT(IFF(type == VBLK || type == VCHR, rmajor != VNOVAL)); 105 106 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 107 return (ENOSPC); 108 109 nnode = objcache_get(tmp->tm_node_pool, M_WAITOK | M_NULLOK); 110 if (nnode == NULL) 111 return (ENOSPC); 112 113 /* Generic initialization. */ 114 nnode->tn_type = type; 115 vfs_timestamp(&ts); 116 nnode->tn_ctime = nnode->tn_mtime = nnode->tn_atime 117 = ts.tv_sec; 118 nnode->tn_ctimensec = nnode->tn_mtimensec = nnode->tn_atimensec 119 = ts.tv_nsec; 120 nnode->tn_uid = uid; 121 nnode->tn_gid = gid; 122 nnode->tn_mode = mode; 123 nnode->tn_id = tmpfs_fetch_ino(tmp); 124 nnode->tn_advlock.init_done = 0; 125 KKASSERT(nnode->tn_links == 0); 126 127 /* Type-specific initialization. */ 128 switch (nnode->tn_type) { 129 case VBLK: 130 case VCHR: 131 rdev = makeudev(rmajor, rminor); 132 if (rdev == NOUDEV) { 133 objcache_put(tmp->tm_node_pool, nnode); 134 return(EINVAL); 135 } 136 nnode->tn_rdev = rdev; 137 break; 138 139 case VDIR: 140 RB_INIT(&nnode->tn_dir.tn_dirtree); 141 RB_INIT(&nnode->tn_dir.tn_cookietree); 142 nnode->tn_size = 0; 143 break; 144 145 case VFIFO: 146 /* FALLTHROUGH */ 147 case VSOCK: 148 break; 149 150 case VLNK: 151 nnode->tn_size = strlen(target); 152 nnode->tn_link = kmalloc(nnode->tn_size + 1, tmp->tm_name_zone, 153 M_WAITOK | M_NULLOK); 154 if (nnode->tn_link == NULL) { 155 objcache_put(tmp->tm_node_pool, nnode); 156 return (ENOSPC); 157 } 158 bcopy(target, nnode->tn_link, nnode->tn_size); 159 nnode->tn_link[nnode->tn_size] = '\0'; 160 break; 161 162 case VREG: 163 nnode->tn_reg.tn_aobj = 164 swap_pager_alloc(NULL, 0, VM_PROT_DEFAULT, 0); 165 nnode->tn_reg.tn_aobj_pages = 0; 166 nnode->tn_size = 0; 167 break; 168 169 default: 170 panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); 171 } 172 173 TMPFS_NODE_LOCK(nnode); 174 TMPFS_LOCK(tmp); 175 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 176 tmp->tm_nodes_inuse++; 177 TMPFS_UNLOCK(tmp); 178 TMPFS_NODE_UNLOCK(nnode); 179 180 *node = nnode; 181 return 0; 182 } 183 184 /* --------------------------------------------------------------------- */ 185 186 /* 187 * Destroys the node pointed to by node from the file system 'tmp'. 188 * If the node does not belong to the given mount point, the results are 189 * unpredicted. 190 * 191 * If the node references a directory; no entries are allowed because 192 * their removal could need a recursive algorithm, something forbidden in 193 * kernel space. Furthermore, there is not need to provide such 194 * functionality (recursive removal) because the only primitives offered 195 * to the user are the removal of empty directories and the deletion of 196 * individual files. 197 * 198 * Note that nodes are not really deleted; in fact, when a node has been 199 * allocated, it cannot be deleted during the whole life of the file 200 * system. Instead, they are moved to the available list and remain there 201 * until reused. 202 */ 203 void 204 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 205 { 206 vm_pindex_t pages = 0; 207 208 #ifdef INVARIANTS 209 TMPFS_ASSERT_ELOCKED(node); 210 KKASSERT(node->tn_vnode == NULL); 211 KKASSERT((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 212 #endif 213 214 TMPFS_LOCK(tmp); 215 LIST_REMOVE(node, tn_entries); 216 tmp->tm_nodes_inuse--; 217 TMPFS_UNLOCK(tmp); 218 TMPFS_NODE_UNLOCK(node); 219 220 switch (node->tn_type) { 221 case VNON: 222 /* Do not do anything. VNON is provided to let the 223 * allocation routine clean itself easily by avoiding 224 * duplicating code in it. */ 225 /* FALLTHROUGH */ 226 case VBLK: 227 /* FALLTHROUGH */ 228 case VCHR: 229 /* FALLTHROUGH */ 230 break; 231 case VDIR: 232 /* 233 * The parent link can be NULL if this is the root 234 * node or if it is a directory node that was rmdir'd. 235 * 236 * XXX what if node is a directory which still contains 237 * directory entries (e.g. due to a forced umount) ? 238 */ 239 node->tn_size = 0; 240 KKASSERT(node->tn_dir.tn_parent == NULL); 241 242 /* 243 * If the root node is being destroyed don't leave a 244 * dangling pointer in tmpfs_mount. 245 */ 246 if (node == tmp->tm_root) 247 tmp->tm_root = NULL; 248 break; 249 case VFIFO: 250 /* FALLTHROUGH */ 251 case VSOCK: 252 break; 253 254 case VLNK: 255 kfree(node->tn_link, tmp->tm_name_zone); 256 node->tn_link = NULL; 257 node->tn_size = 0; 258 break; 259 260 case VREG: 261 if (node->tn_reg.tn_aobj != NULL) 262 vm_object_deallocate(node->tn_reg.tn_aobj); 263 node->tn_reg.tn_aobj = NULL; 264 pages = node->tn_reg.tn_aobj_pages; 265 break; 266 267 default: 268 panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); 269 } 270 271 /* 272 * Clean up fields for the next allocation. The objcache only ctors 273 * new allocations. 274 */ 275 tmpfs_node_ctor(node, NULL, 0); 276 objcache_put(tmp->tm_node_pool, node); 277 /* node is now invalid */ 278 279 TMPFS_LOCK(tmp); 280 tmp->tm_pages_used -= pages; 281 TMPFS_UNLOCK(tmp); 282 } 283 284 /* --------------------------------------------------------------------- */ 285 286 /* 287 * Allocates a new directory entry for the node node with a name of name. 288 * The new directory entry is returned in *de. 289 * 290 * The link count of node is increased by one to reflect the new object 291 * referencing it. 292 * 293 * Returns zero on success or an appropriate error code on failure. 294 */ 295 int 296 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 297 const char *name, uint16_t len, struct tmpfs_dirent **de) 298 { 299 struct tmpfs_dirent *nde; 300 301 nde = objcache_get(tmp->tm_dirent_pool, M_WAITOK); 302 nde->td_name = kmalloc(len + 1, tmp->tm_name_zone, M_WAITOK | M_NULLOK); 303 if (nde->td_name == NULL) { 304 objcache_put(tmp->tm_dirent_pool, nde); 305 *de = NULL; 306 return (ENOSPC); 307 } 308 nde->td_namelen = len; 309 bcopy(name, nde->td_name, len); 310 nde->td_name[len] = '\0'; 311 312 nde->td_node = node; 313 314 TMPFS_NODE_LOCK(node); 315 ++node->tn_links; 316 TMPFS_NODE_UNLOCK(node); 317 318 *de = nde; 319 320 return 0; 321 } 322 323 /* --------------------------------------------------------------------- */ 324 325 /* 326 * Frees a directory entry. It is the caller's responsibility to destroy 327 * the node referenced by it if needed. 328 * 329 * The link count of node is decreased by one to reflect the removal of an 330 * object that referenced it. This only happens if 'node_exists' is true; 331 * otherwise the function will not access the node referred to by the 332 * directory entry, as it may already have been released from the outside. 333 */ 334 void 335 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 336 { 337 struct tmpfs_node *node; 338 339 node = de->td_node; 340 341 TMPFS_NODE_LOCK(node); 342 TMPFS_ASSERT_ELOCKED(node); 343 KKASSERT(node->tn_links > 0); 344 node->tn_links--; 345 TMPFS_NODE_UNLOCK(node); 346 347 kfree(de->td_name, tmp->tm_name_zone); 348 de->td_namelen = 0; 349 de->td_name = NULL; 350 de->td_node = NULL; 351 objcache_put(tmp->tm_dirent_pool, de); 352 } 353 354 /* --------------------------------------------------------------------- */ 355 356 /* 357 * Allocates a new vnode for the node node or returns a new reference to 358 * an existing one if the node had already a vnode referencing it. The 359 * resulting locked vnode is returned in *vpp. 360 * 361 * Returns zero on success or an appropriate error code on failure. 362 */ 363 int 364 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 365 struct vnode **vpp) 366 { 367 int error = 0; 368 struct vnode *vp; 369 370 loop: 371 /* 372 * Interlocked extraction from node. This can race many things. 373 * We have to get a soft reference on the vnode while we hold 374 * the node locked, then acquire it properly and check for races. 375 */ 376 TMPFS_NODE_LOCK(node); 377 if ((vp = node->tn_vnode) != NULL) { 378 KKASSERT((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 379 vhold(vp); 380 TMPFS_NODE_UNLOCK(node); 381 382 if (vget(vp, lkflag | LK_EXCLUSIVE) != 0) { 383 vdrop(vp); 384 goto loop; 385 } 386 if (node->tn_vnode != vp) { 387 vput(vp); 388 vdrop(vp); 389 goto loop; 390 } 391 vdrop(vp); 392 goto out; 393 } 394 /* vp is NULL */ 395 396 /* 397 * This should never happen. 398 */ 399 if (node->tn_vpstate & TMPFS_VNODE_DOOMED) { 400 TMPFS_NODE_UNLOCK(node); 401 error = ENOENT; 402 goto out; 403 } 404 405 /* 406 * Interlock against other calls to tmpfs_alloc_vp() trying to 407 * allocate and assign a vp to node. 408 */ 409 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 410 node->tn_vpstate |= TMPFS_VNODE_WANT; 411 error = tsleep(&node->tn_vpstate, PINTERLOCKED | PCATCH, 412 "tmpfs_alloc_vp", 0); 413 TMPFS_NODE_UNLOCK(node); 414 if (error) 415 return error; 416 goto loop; 417 } 418 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 419 TMPFS_NODE_UNLOCK(node); 420 421 /* 422 * Allocate a new vnode (may block). The ALLOCATING flag should 423 * prevent a race against someone else assigning node->tn_vnode. 424 */ 425 error = getnewvnode(VT_TMPFS, mp, &vp, VLKTIMEOUT, LK_CANRECURSE); 426 if (error != 0) 427 goto unlock; 428 429 KKASSERT(node->tn_vnode == NULL); 430 KKASSERT(vp != NULL); 431 vp->v_data = node; 432 vp->v_type = node->tn_type; 433 434 /* Type-specific initialization. */ 435 switch (node->tn_type) { 436 case VBLK: 437 /* FALLTHROUGH */ 438 case VCHR: 439 /* FALLTHROUGH */ 440 case VSOCK: 441 break; 442 case VREG: 443 vinitvmio(vp, node->tn_size, TMPFS_BLKMASK, -1); 444 break; 445 case VLNK: 446 break; 447 case VFIFO: 448 vp->v_ops = &mp->mnt_vn_fifo_ops; 449 break; 450 case VDIR: 451 break; 452 453 default: 454 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 455 } 456 457 458 unlock: 459 TMPFS_NODE_LOCK(node); 460 461 KKASSERT(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 462 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 463 node->tn_vnode = vp; 464 465 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 466 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 467 TMPFS_NODE_UNLOCK(node); 468 wakeup(&node->tn_vpstate); 469 } else { 470 TMPFS_NODE_UNLOCK(node); 471 } 472 473 out: 474 *vpp = vp; 475 476 KKASSERT(IFF(error == 0, *vpp != NULL && vn_islocked(*vpp))); 477 #ifdef INVARIANTS 478 TMPFS_NODE_LOCK(node); 479 KKASSERT(*vpp == node->tn_vnode); 480 TMPFS_NODE_UNLOCK(node); 481 #endif 482 483 return error; 484 } 485 486 /* --------------------------------------------------------------------- */ 487 488 /* 489 * Destroys the association between the vnode vp and the node it 490 * references. 491 */ 492 void 493 tmpfs_free_vp(struct vnode *vp) 494 { 495 struct tmpfs_node *node; 496 497 node = VP_TO_TMPFS_NODE(vp); 498 499 TMPFS_NODE_LOCK(node); 500 KKASSERT(lockcount(TMPFS_NODE_MTX(node)) > 0); 501 node->tn_vnode = NULL; 502 vp->v_data = NULL; 503 TMPFS_NODE_UNLOCK(node); 504 } 505 506 /* --------------------------------------------------------------------- */ 507 508 /* 509 * Allocates a new file of type 'type' and adds it to the parent directory 510 * 'dvp'; this addition is done using the component name given in 'cnp'. 511 * The ownership of the new file is automatically assigned based on the 512 * credentials of the caller (through 'cnp'), the group is set based on 513 * the parent directory and the mode is determined from the 'vap' argument. 514 * If successful, *vpp holds a vnode to the newly created file and zero 515 * is returned. Otherwise *vpp is NULL and the function returns an 516 * appropriate error code. 517 */ 518 int 519 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 520 struct namecache *ncp, struct ucred *cred, char *target) 521 { 522 int error; 523 struct tmpfs_dirent *de; 524 struct tmpfs_mount *tmp; 525 struct tmpfs_node *dnode; 526 struct tmpfs_node *node; 527 528 tmp = VFS_TO_TMPFS(dvp->v_mount); 529 dnode = VP_TO_TMPFS_DIR(dvp); 530 *vpp = NULL; 531 532 /* 533 * If the directory was removed but a process was CD'd into it, 534 * we do not allow any more file/dir creation within it. Otherwise 535 * we will lose track of it. 536 */ 537 KKASSERT(dnode->tn_type == VDIR); 538 if (dnode != tmp->tm_root && dnode->tn_dir.tn_parent == NULL) 539 return ENOENT; 540 541 /* 542 * Make sure the link count does not overflow. 543 */ 544 if (vap->va_type == VDIR && dnode->tn_links >= LINK_MAX) 545 return EMLINK; 546 547 /* Allocate a node that represents the new file. */ 548 error = tmpfs_alloc_node(tmp, vap->va_type, cred->cr_uid, 549 dnode->tn_gid, vap->va_mode, target, 550 vap->va_rmajor, vap->va_rminor, &node); 551 if (error != 0) 552 return error; 553 TMPFS_NODE_LOCK(node); 554 555 /* Allocate a directory entry that points to the new file. */ 556 error = tmpfs_alloc_dirent(tmp, node, ncp->nc_name, ncp->nc_nlen, &de); 557 if (error != 0) { 558 tmpfs_free_node(tmp, node); 559 /* eats node lock */ 560 return error; 561 } 562 563 /* Allocate a vnode for the new file. */ 564 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 565 if (error != 0) { 566 tmpfs_free_dirent(tmp, de); 567 tmpfs_free_node(tmp, node); 568 /* eats node lock */ 569 return error; 570 } 571 572 /* 573 * Now that all required items are allocated, we can proceed to 574 * insert the new node into the directory, an operation that 575 * cannot fail. 576 */ 577 tmpfs_dir_attach(dnode, de); 578 TMPFS_NODE_UNLOCK(node); 579 580 return error; 581 } 582 583 /* --------------------------------------------------------------------- */ 584 585 /* 586 * Attaches the directory entry de to the directory represented by vp. 587 * Note that this does not change the link count of the node pointed by 588 * the directory entry, as this is done by tmpfs_alloc_dirent. 589 */ 590 void 591 tmpfs_dir_attach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 592 { 593 struct tmpfs_node *node = de->td_node; 594 595 TMPFS_NODE_LOCK(dnode); 596 if (node && node->tn_type == VDIR) { 597 TMPFS_NODE_LOCK(node); 598 ++node->tn_links; 599 node->tn_status |= TMPFS_NODE_CHANGED; 600 node->tn_dir.tn_parent = dnode; 601 ++dnode->tn_links; 602 TMPFS_NODE_UNLOCK(node); 603 } 604 RB_INSERT(tmpfs_dirtree, &dnode->tn_dir.tn_dirtree, de); 605 RB_INSERT(tmpfs_dirtree_cookie, &dnode->tn_dir.tn_cookietree, de); 606 dnode->tn_size += sizeof(struct tmpfs_dirent); 607 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 608 TMPFS_NODE_MODIFIED; 609 TMPFS_NODE_UNLOCK(dnode); 610 } 611 612 /* --------------------------------------------------------------------- */ 613 614 /* 615 * Detaches the directory entry de from the directory represented by vp. 616 * Note that this does not change the link count of the node pointed by 617 * the directory entry, as this is done by tmpfs_free_dirent. 618 */ 619 void 620 tmpfs_dir_detach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 621 { 622 struct tmpfs_node *node = de->td_node; 623 624 TMPFS_NODE_LOCK(dnode); 625 RB_REMOVE(tmpfs_dirtree, &dnode->tn_dir.tn_dirtree, de); 626 RB_REMOVE(tmpfs_dirtree_cookie, &dnode->tn_dir.tn_cookietree, de); 627 dnode->tn_size -= sizeof(struct tmpfs_dirent); 628 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 629 TMPFS_NODE_MODIFIED; 630 TMPFS_NODE_UNLOCK(dnode); 631 632 /* 633 * Clean out the tn_parent pointer immediately when removing a 634 * directory. 635 * 636 * Removal of the parent linkage also cleans out the extra tn_links 637 * count we had on both node and dnode. 638 * 639 * node can be NULL (typ during a forced umount), in which case 640 * the mount code is dealing with the linkages from a linked list 641 * scan. 642 */ 643 if (node && node->tn_type == VDIR && node->tn_dir.tn_parent) { 644 TMPFS_NODE_LOCK(dnode); 645 TMPFS_NODE_LOCK(node); 646 KKASSERT(node->tn_dir.tn_parent == dnode); 647 dnode->tn_links--; 648 node->tn_links--; 649 node->tn_dir.tn_parent = NULL; 650 TMPFS_NODE_UNLOCK(node); 651 TMPFS_NODE_UNLOCK(dnode); 652 } 653 } 654 655 /* --------------------------------------------------------------------- */ 656 657 /* 658 * Looks for a directory entry in the directory represented by node. 659 * 'ncp' describes the name of the entry to look for. Note that the . 660 * and .. components are not allowed as they do not physically exist 661 * within directories. 662 * 663 * Returns a pointer to the entry when found, otherwise NULL. 664 * 665 * Caller must hold the node locked (shared ok) 666 */ 667 struct tmpfs_dirent * 668 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 669 struct namecache *ncp) 670 { 671 struct tmpfs_dirent *de; 672 int len = ncp->nc_nlen; 673 struct tmpfs_dirent wanted; 674 675 wanted.td_namelen = len; 676 wanted.td_name = ncp->nc_name; 677 678 TMPFS_VALIDATE_DIR(node); 679 680 de = RB_FIND(tmpfs_dirtree, &node->tn_dir.tn_dirtree, &wanted); 681 682 KKASSERT(f == NULL || f == de->td_node); 683 684 return de; 685 } 686 687 /* --------------------------------------------------------------------- */ 688 689 /* 690 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 691 * directory and returns it in the uio space. The function returns 0 692 * on success, -1 if there was not enough space in the uio structure to 693 * hold the directory entry or an appropriate error code if another 694 * error happens. 695 */ 696 int 697 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 698 { 699 int error; 700 struct dirent dent; 701 int dirsize; 702 703 TMPFS_VALIDATE_DIR(node); 704 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 705 706 dent.d_ino = node->tn_id; 707 dent.d_type = DT_DIR; 708 dent.d_namlen = 1; 709 dent.d_name[0] = '.'; 710 dent.d_name[1] = '\0'; 711 dirsize = _DIRENT_DIRSIZ(&dent); 712 713 if (dirsize > uio->uio_resid) 714 error = -1; 715 else { 716 error = uiomove((caddr_t)&dent, dirsize, uio); 717 if (error == 0) 718 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 719 } 720 return error; 721 } 722 723 /* --------------------------------------------------------------------- */ 724 725 /* 726 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 727 * directory and returns it in the uio space. The function returns 0 728 * on success, -1 if there was not enough space in the uio structure to 729 * hold the directory entry or an appropriate error code if another 730 * error happens. 731 */ 732 int 733 tmpfs_dir_getdotdotdent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 734 struct uio *uio) 735 { 736 int error; 737 struct dirent dent; 738 int dirsize; 739 740 TMPFS_VALIDATE_DIR(node); 741 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 742 743 if (node->tn_dir.tn_parent) { 744 TMPFS_NODE_LOCK(node); 745 if (node->tn_dir.tn_parent) 746 dent.d_ino = node->tn_dir.tn_parent->tn_id; 747 else 748 dent.d_ino = tmp->tm_root->tn_id; 749 TMPFS_NODE_UNLOCK(node); 750 } else { 751 dent.d_ino = tmp->tm_root->tn_id; 752 } 753 754 dent.d_type = DT_DIR; 755 dent.d_namlen = 2; 756 dent.d_name[0] = '.'; 757 dent.d_name[1] = '.'; 758 dent.d_name[2] = '\0'; 759 dirsize = _DIRENT_DIRSIZ(&dent); 760 761 if (dirsize > uio->uio_resid) 762 error = -1; 763 else { 764 error = uiomove((caddr_t)&dent, dirsize, uio); 765 if (error == 0) { 766 struct tmpfs_dirent *de; 767 768 de = RB_MIN(tmpfs_dirtree_cookie, 769 &node->tn_dir.tn_cookietree); 770 if (de == NULL) 771 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 772 else 773 uio->uio_offset = tmpfs_dircookie(de); 774 } 775 } 776 return error; 777 } 778 779 /* --------------------------------------------------------------------- */ 780 781 /* 782 * Lookup a directory entry by its associated cookie. 783 * 784 * Must be called with the directory node locked (shared ok) 785 */ 786 struct lubycookie_info { 787 off_t cookie; 788 struct tmpfs_dirent *de; 789 }; 790 791 static int 792 lubycookie_cmp(struct tmpfs_dirent *de, void *arg) 793 { 794 struct lubycookie_info *info = arg; 795 off_t cookie = tmpfs_dircookie(de); 796 797 if (cookie < info->cookie) 798 return(-1); 799 if (cookie > info->cookie) 800 return(1); 801 return(0); 802 } 803 804 static int 805 lubycookie_callback(struct tmpfs_dirent *de, void *arg) 806 { 807 struct lubycookie_info *info = arg; 808 809 if (tmpfs_dircookie(de) == info->cookie) { 810 info->de = de; 811 return(-1); 812 } 813 return(0); 814 } 815 816 struct tmpfs_dirent * 817 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 818 { 819 struct lubycookie_info info; 820 821 info.cookie = cookie; 822 info.de = NULL; 823 RB_SCAN(tmpfs_dirtree_cookie, &node->tn_dir.tn_cookietree, 824 lubycookie_cmp, lubycookie_callback, &info); 825 return (info.de); 826 } 827 828 /* --------------------------------------------------------------------- */ 829 830 /* 831 * Helper function for tmpfs_readdir. Returns as much directory entries 832 * as can fit in the uio space. The read starts at uio->uio_offset. 833 * The function returns 0 on success, -1 if there was not enough space 834 * in the uio structure to hold the directory entry or an appropriate 835 * error code if another error happens. 836 * 837 * Caller must hold the node locked (shared ok) 838 */ 839 int 840 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 841 { 842 int error; 843 off_t startcookie; 844 struct tmpfs_dirent *de; 845 846 TMPFS_VALIDATE_DIR(node); 847 848 /* 849 * Locate the first directory entry we have to return. We have cached 850 * the last readdir in the node, so use those values if appropriate. 851 * Otherwise do a linear scan to find the requested entry. 852 */ 853 startcookie = uio->uio_offset; 854 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 855 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 856 857 if (startcookie == TMPFS_DIRCOOKIE_EOF) 858 return 0; 859 860 de = tmpfs_dir_lookupbycookie(node, startcookie); 861 if (de == NULL) 862 return EINVAL; 863 864 /* 865 * Read as much entries as possible; i.e., until we reach the end of 866 * the directory or we exhaust uio space. 867 */ 868 do { 869 struct dirent d; 870 int reclen; 871 872 /* Create a dirent structure representing the current 873 * tmpfs_node and fill it. */ 874 d.d_ino = de->td_node->tn_id; 875 switch (de->td_node->tn_type) { 876 case VBLK: 877 d.d_type = DT_BLK; 878 break; 879 880 case VCHR: 881 d.d_type = DT_CHR; 882 break; 883 884 case VDIR: 885 d.d_type = DT_DIR; 886 break; 887 888 case VFIFO: 889 d.d_type = DT_FIFO; 890 break; 891 892 case VLNK: 893 d.d_type = DT_LNK; 894 break; 895 896 case VREG: 897 d.d_type = DT_REG; 898 break; 899 900 case VSOCK: 901 d.d_type = DT_SOCK; 902 break; 903 904 default: 905 panic("tmpfs_dir_getdents: type %p %d", 906 de->td_node, (int)de->td_node->tn_type); 907 } 908 d.d_namlen = de->td_namelen; 909 KKASSERT(de->td_namelen < sizeof(d.d_name)); 910 bcopy(de->td_name, d.d_name, d.d_namlen); 911 d.d_name[d.d_namlen] = '\0'; 912 reclen = _DIRENT_RECLEN(d.d_namlen); 913 914 /* Stop reading if the directory entry we are treating is 915 * bigger than the amount of data that can be returned. */ 916 if (reclen > uio->uio_resid) { 917 error = -1; 918 break; 919 } 920 921 /* Copy the new dirent structure into the output buffer and 922 * advance pointers. */ 923 error = uiomove((caddr_t)&d, reclen, uio); 924 925 (*cntp)++; 926 de = RB_NEXT(tmpfs_dirtree_cookie, 927 node->tn_dir.tn_cookietree, de); 928 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 929 930 /* Update the offset and cache. */ 931 if (de == NULL) { 932 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 933 } else { 934 uio->uio_offset = tmpfs_dircookie(de); 935 } 936 937 return error; 938 } 939 940 /* --------------------------------------------------------------------- */ 941 942 /* 943 * Resizes the aobj associated to the regular file pointed to by vp to 944 * the size newsize. 'vp' must point to a vnode that represents a regular 945 * file. 'newsize' must be positive. 946 * 947 * pass trivial as 1 when buf content will be overwritten, otherwise set 0 948 * to be zero filled. 949 * 950 * Returns zero on success or an appropriate error code on failure. 951 */ 952 int 953 tmpfs_reg_resize(struct vnode *vp, off_t newsize, int trivial) 954 { 955 int error; 956 vm_pindex_t newpages, oldpages; 957 struct tmpfs_mount *tmp; 958 struct tmpfs_node *node; 959 off_t oldsize; 960 961 #ifdef INVARIANTS 962 KKASSERT(vp->v_type == VREG); 963 KKASSERT(newsize >= 0); 964 #endif 965 966 node = VP_TO_TMPFS_NODE(vp); 967 tmp = VFS_TO_TMPFS(vp->v_mount); 968 969 /* 970 * Convert the old and new sizes to the number of pages needed to 971 * store them. It may happen that we do not need to do anything 972 * because the last allocated page can accommodate the change on 973 * its own. 974 */ 975 TMPFS_NODE_LOCK(node); 976 oldsize = node->tn_size; 977 oldpages = round_page64(oldsize) / PAGE_SIZE; 978 KKASSERT(oldpages == node->tn_reg.tn_aobj_pages); 979 newpages = round_page64(newsize) / PAGE_SIZE; 980 981 if (newpages > oldpages && 982 tmp->tm_pages_used + newpages - oldpages > tmp->tm_pages_max) { 983 TMPFS_NODE_UNLOCK(node); 984 error = ENOSPC; 985 goto out; 986 } 987 node->tn_reg.tn_aobj_pages = newpages; 988 node->tn_size = newsize; 989 TMPFS_NODE_UNLOCK(node); 990 991 TMPFS_LOCK(tmp); 992 tmp->tm_pages_used += (newpages - oldpages); 993 TMPFS_UNLOCK(tmp); 994 995 /* 996 * When adjusting the vnode filesize and its VM object we must 997 * also adjust our backing VM object (aobj). The blocksize 998 * used must match the block sized we use for the buffer cache. 999 * 1000 * The backing VM object contains no VM pages, only swap 1001 * assignments. 1002 */ 1003 if (newsize < oldsize) { 1004 vm_pindex_t osize; 1005 vm_pindex_t nsize; 1006 vm_object_t aobj; 1007 1008 error = nvtruncbuf(vp, newsize, TMPFS_BLKSIZE, -1, 0); 1009 aobj = node->tn_reg.tn_aobj; 1010 if (aobj) { 1011 osize = aobj->size; 1012 nsize = vp->v_object->size; 1013 if (nsize < osize) { 1014 aobj->size = osize; 1015 swap_pager_freespace(aobj, nsize, 1016 osize - nsize); 1017 } 1018 } 1019 } else { 1020 vm_object_t aobj; 1021 1022 error = nvextendbuf(vp, oldsize, newsize, 1023 TMPFS_BLKSIZE, TMPFS_BLKSIZE, 1024 -1, -1, trivial); 1025 aobj = node->tn_reg.tn_aobj; 1026 if (aobj) 1027 aobj->size = vp->v_object->size; 1028 } 1029 1030 out: 1031 return error; 1032 } 1033 1034 /* --------------------------------------------------------------------- */ 1035 1036 /* 1037 * Change flags of the given vnode. 1038 * Caller should execute tmpfs_update on vp after a successful execution. 1039 * The vnode must be locked on entry and remain locked on exit. 1040 */ 1041 int 1042 tmpfs_chflags(struct vnode *vp, int vaflags, struct ucred *cred) 1043 { 1044 int error; 1045 struct tmpfs_node *node; 1046 int flags; 1047 1048 KKASSERT(vn_islocked(vp)); 1049 1050 node = VP_TO_TMPFS_NODE(vp); 1051 flags = node->tn_flags; 1052 1053 /* Disallow this operation if the file system is mounted read-only. */ 1054 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1055 return EROFS; 1056 error = vop_helper_setattr_flags(&flags, vaflags, node->tn_uid, cred); 1057 1058 /* Actually change the flags on the node itself */ 1059 if (error == 0) { 1060 TMPFS_NODE_LOCK(node); 1061 node->tn_flags = flags; 1062 node->tn_status |= TMPFS_NODE_CHANGED; 1063 TMPFS_NODE_UNLOCK(node); 1064 } 1065 1066 KKASSERT(vn_islocked(vp)); 1067 1068 return error; 1069 } 1070 1071 /* --------------------------------------------------------------------- */ 1072 1073 /* 1074 * Change access mode on the given vnode. 1075 * Caller should execute tmpfs_update on vp after a successful execution. 1076 * The vnode must be locked on entry and remain locked on exit. 1077 */ 1078 int 1079 tmpfs_chmod(struct vnode *vp, mode_t vamode, struct ucred *cred) 1080 { 1081 struct tmpfs_node *node; 1082 mode_t cur_mode; 1083 int error; 1084 1085 KKASSERT(vn_islocked(vp)); 1086 1087 node = VP_TO_TMPFS_NODE(vp); 1088 1089 /* Disallow this operation if the file system is mounted read-only. */ 1090 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1091 return EROFS; 1092 1093 /* Immutable or append-only files cannot be modified, either. */ 1094 if (node->tn_flags & (IMMUTABLE | APPEND)) 1095 return EPERM; 1096 1097 cur_mode = node->tn_mode; 1098 error = vop_helper_chmod(vp, vamode, cred, node->tn_uid, node->tn_gid, 1099 &cur_mode); 1100 1101 if (error == 0 && 1102 (node->tn_mode & ALLPERMS) != (cur_mode & ALLPERMS)) { 1103 TMPFS_NODE_LOCK(node); 1104 node->tn_mode &= ~ALLPERMS; 1105 node->tn_mode |= cur_mode & ALLPERMS; 1106 1107 node->tn_status |= TMPFS_NODE_CHANGED; 1108 TMPFS_NODE_UNLOCK(node); 1109 } 1110 1111 KKASSERT(vn_islocked(vp)); 1112 1113 return 0; 1114 } 1115 1116 /* --------------------------------------------------------------------- */ 1117 1118 /* 1119 * Change ownership of the given vnode. At least one of uid or gid must 1120 * be different than VNOVAL. If one is set to that value, the attribute 1121 * is unchanged. 1122 * Caller should execute tmpfs_update on vp after a successful execution. 1123 * The vnode must be locked on entry and remain locked on exit. 1124 */ 1125 int 1126 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred) 1127 { 1128 mode_t cur_mode; 1129 uid_t cur_uid; 1130 gid_t cur_gid; 1131 struct tmpfs_node *node; 1132 int error; 1133 1134 KKASSERT(vn_islocked(vp)); 1135 node = VP_TO_TMPFS_NODE(vp); 1136 1137 /* Disallow this operation if the file system is mounted read-only. */ 1138 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1139 return EROFS; 1140 1141 /* Immutable or append-only files cannot be modified, either. */ 1142 if (node->tn_flags & (IMMUTABLE | APPEND)) 1143 return EPERM; 1144 1145 cur_uid = node->tn_uid; 1146 cur_gid = node->tn_gid; 1147 cur_mode = node->tn_mode; 1148 error = vop_helper_chown(vp, uid, gid, cred, 1149 &cur_uid, &cur_gid, &cur_mode); 1150 1151 if (error == 0) { 1152 TMPFS_NODE_LOCK(node); 1153 if (cur_uid != node->tn_uid || 1154 cur_gid != node->tn_gid || 1155 cur_mode != node->tn_mode) { 1156 node->tn_uid = cur_uid; 1157 node->tn_gid = cur_gid; 1158 node->tn_mode = cur_mode; 1159 node->tn_status |= TMPFS_NODE_CHANGED; 1160 } 1161 TMPFS_NODE_UNLOCK(node); 1162 } 1163 1164 return error; 1165 } 1166 1167 /* --------------------------------------------------------------------- */ 1168 1169 /* 1170 * Change size of the given vnode. 1171 * Caller should execute tmpfs_update on vp after a successful execution. 1172 * The vnode must be locked on entry and remain locked on exit. 1173 */ 1174 int 1175 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred) 1176 { 1177 int error; 1178 struct tmpfs_node *node; 1179 1180 KKASSERT(vn_islocked(vp)); 1181 1182 node = VP_TO_TMPFS_NODE(vp); 1183 1184 /* Decide whether this is a valid operation based on the file type. */ 1185 error = 0; 1186 switch (vp->v_type) { 1187 case VDIR: 1188 return EISDIR; 1189 1190 case VREG: 1191 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1192 return EROFS; 1193 break; 1194 1195 case VBLK: 1196 /* FALLTHROUGH */ 1197 case VCHR: 1198 /* FALLTHROUGH */ 1199 case VFIFO: 1200 /* Allow modifications of special files even if in the file 1201 * system is mounted read-only (we are not modifying the 1202 * files themselves, but the objects they represent). */ 1203 return 0; 1204 1205 default: 1206 /* Anything else is unsupported. */ 1207 return EOPNOTSUPP; 1208 } 1209 1210 /* Immutable or append-only files cannot be modified, either. */ 1211 if (node->tn_flags & (IMMUTABLE | APPEND)) 1212 return EPERM; 1213 1214 error = tmpfs_truncate(vp, size); 1215 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1216 * for us, as will update tn_status; no need to do that here. */ 1217 1218 KKASSERT(vn_islocked(vp)); 1219 1220 return error; 1221 } 1222 1223 /* --------------------------------------------------------------------- */ 1224 1225 /* 1226 * Change access and modification times of the given vnode. 1227 * Caller should execute tmpfs_update on vp after a successful execution. 1228 * The vnode must be locked on entry and remain locked on exit. 1229 */ 1230 int 1231 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1232 int vaflags, struct ucred *cred) 1233 { 1234 struct tmpfs_node *node; 1235 1236 KKASSERT(vn_islocked(vp)); 1237 1238 node = VP_TO_TMPFS_NODE(vp); 1239 1240 /* Disallow this operation if the file system is mounted read-only. */ 1241 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1242 return EROFS; 1243 1244 /* Immutable or append-only files cannot be modified, either. */ 1245 if (node->tn_flags & (IMMUTABLE | APPEND)) 1246 return EPERM; 1247 1248 TMPFS_NODE_LOCK(node); 1249 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1250 node->tn_status |= TMPFS_NODE_ACCESSED; 1251 1252 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1253 node->tn_status |= TMPFS_NODE_MODIFIED; 1254 1255 TMPFS_NODE_UNLOCK(node); 1256 1257 tmpfs_itimes(vp, atime, mtime); 1258 1259 KKASSERT(vn_islocked(vp)); 1260 1261 return 0; 1262 } 1263 1264 /* --------------------------------------------------------------------- */ 1265 /* Sync timestamps */ 1266 void 1267 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1268 const struct timespec *mod) 1269 { 1270 struct tmpfs_node *node; 1271 struct timespec now; 1272 1273 node = VP_TO_TMPFS_NODE(vp); 1274 1275 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1276 TMPFS_NODE_CHANGED)) == 0) 1277 return; 1278 1279 vfs_timestamp(&now); 1280 1281 TMPFS_NODE_LOCK(node); 1282 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1283 if (acc == NULL) 1284 acc = &now; 1285 node->tn_atime = acc->tv_sec; 1286 node->tn_atimensec = acc->tv_nsec; 1287 } 1288 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1289 if (mod == NULL) 1290 mod = &now; 1291 node->tn_mtime = mod->tv_sec; 1292 node->tn_mtimensec = mod->tv_nsec; 1293 } 1294 if (node->tn_status & TMPFS_NODE_CHANGED) { 1295 node->tn_ctime = now.tv_sec; 1296 node->tn_ctimensec = now.tv_nsec; 1297 } 1298 node->tn_status &= 1299 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1300 TMPFS_NODE_UNLOCK(node); 1301 } 1302 1303 /* --------------------------------------------------------------------- */ 1304 1305 void 1306 tmpfs_update(struct vnode *vp) 1307 { 1308 tmpfs_itimes(vp, NULL, NULL); 1309 } 1310 1311 /* --------------------------------------------------------------------- */ 1312 1313 int 1314 tmpfs_truncate(struct vnode *vp, off_t length) 1315 { 1316 int error; 1317 struct tmpfs_node *node; 1318 1319 node = VP_TO_TMPFS_NODE(vp); 1320 1321 if (length < 0) { 1322 error = EINVAL; 1323 goto out; 1324 } 1325 1326 if (node->tn_size == length) { 1327 error = 0; 1328 goto out; 1329 } 1330 1331 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 1332 return (EFBIG); 1333 1334 1335 error = tmpfs_reg_resize(vp, length, 1); 1336 1337 if (error == 0) { 1338 TMPFS_NODE_LOCK(node); 1339 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1340 TMPFS_NODE_UNLOCK(node); 1341 } 1342 1343 out: 1344 tmpfs_update(vp); 1345 1346 return error; 1347 } 1348 1349 /* --------------------------------------------------------------------- */ 1350 1351 static ino_t 1352 tmpfs_fetch_ino(struct tmpfs_mount *tmp) 1353 { 1354 ino_t ret; 1355 1356 ret = tmp->tm_ino++; 1357 1358 return (ret); 1359 } 1360 1361 static int 1362 tmpfs_dirtree_compare(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 1363 { 1364 if (a->td_namelen > b->td_namelen) 1365 return 1; 1366 else if (a->td_namelen < b->td_namelen) 1367 return -1; 1368 else 1369 return strncmp(a->td_name, b->td_name, a->td_namelen); 1370 } 1371 1372 static int 1373 tmpfs_dirtree_compare_cookie(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 1374 { 1375 if (a < b) 1376 return(-1); 1377 if (a > b) 1378 return(1); 1379 return 0; 1380 } 1381