1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 37 #include <sys/kernel.h> 38 #include <sys/param.h> 39 #include <sys/namei.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/stat.h> 43 #include <sys/systm.h> 44 #include <sys/vnode.h> 45 #include <sys/vmmeter.h> 46 47 #include <vm/vm.h> 48 #include <vm/vm_object.h> 49 #include <vm/vm_page.h> 50 #include <vm/vm_pager.h> 51 #include <vm/vm_extern.h> 52 53 #include <vfs/tmpfs/tmpfs.h> 54 #include <vfs/tmpfs/tmpfs_vnops.h> 55 56 static ino_t tmpfs_fetch_ino(struct tmpfs_mount *); 57 58 static int tmpfs_dirtree_compare(struct tmpfs_dirent *a, 59 struct tmpfs_dirent *b); 60 RB_GENERATE(tmpfs_dirtree, tmpfs_dirent, rb_node, tmpfs_dirtree_compare); 61 62 static int tmpfs_dirtree_compare_cookie(struct tmpfs_dirent *a, 63 struct tmpfs_dirent *b); 64 RB_GENERATE(tmpfs_dirtree_cookie, tmpfs_dirent, 65 rb_cookienode, tmpfs_dirtree_compare_cookie); 66 67 68 /* --------------------------------------------------------------------- */ 69 70 /* 71 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 72 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 73 * using the credentials of the process 'p'. 74 * 75 * If the node type is set to 'VDIR', then the parent parameter must point 76 * to the parent directory of the node being created. It may only be NULL 77 * while allocating the root node. 78 * 79 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 80 * specifies the device the node represents. 81 * 82 * If the node type is set to 'VLNK', then the parameter target specifies 83 * the file name of the target file for the symbolic link that is being 84 * created. 85 * 86 * Note that new nodes are retrieved from the available list if it has 87 * items or, if it is empty, from the node pool as long as there is enough 88 * space to create them. 89 * 90 * Returns zero on success or an appropriate error code on failure. 91 */ 92 int 93 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 94 uid_t uid, gid_t gid, mode_t mode, 95 char *target, int rmajor, int rminor, 96 struct tmpfs_node **node) 97 { 98 struct tmpfs_node *nnode; 99 struct timespec ts; 100 udev_t rdev; 101 102 KKASSERT(IFF(type == VLNK, target != NULL)); 103 KKASSERT(IFF(type == VBLK || type == VCHR, rmajor != VNOVAL)); 104 105 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 106 return (ENOSPC); 107 108 nnode = objcache_get(tmp->tm_node_pool, M_WAITOK | M_NULLOK); 109 if (nnode == NULL) 110 return (ENOSPC); 111 112 /* Generic initialization. */ 113 nnode->tn_type = type; 114 vfs_timestamp(&ts); 115 nnode->tn_ctime = nnode->tn_mtime = nnode->tn_atime 116 = ts.tv_sec; 117 nnode->tn_ctimensec = nnode->tn_mtimensec = nnode->tn_atimensec 118 = ts.tv_nsec; 119 nnode->tn_uid = uid; 120 nnode->tn_gid = gid; 121 nnode->tn_mode = mode; 122 nnode->tn_id = tmpfs_fetch_ino(tmp); 123 nnode->tn_advlock.init_done = 0; 124 KKASSERT(nnode->tn_links == 0); 125 126 /* Type-specific initialization. */ 127 switch (nnode->tn_type) { 128 case VBLK: 129 case VCHR: 130 rdev = makeudev(rmajor, rminor); 131 if (rdev == NOUDEV) { 132 objcache_put(tmp->tm_node_pool, nnode); 133 return(EINVAL); 134 } 135 nnode->tn_rdev = rdev; 136 break; 137 138 case VDIR: 139 RB_INIT(&nnode->tn_dir.tn_dirtree); 140 RB_INIT(&nnode->tn_dir.tn_cookietree); 141 nnode->tn_size = 0; 142 break; 143 144 case VFIFO: 145 /* FALLTHROUGH */ 146 case VSOCK: 147 break; 148 149 case VLNK: 150 nnode->tn_size = strlen(target); 151 nnode->tn_link = kmalloc(nnode->tn_size + 1, tmp->tm_name_zone, 152 M_WAITOK | M_NULLOK); 153 if (nnode->tn_link == NULL) { 154 objcache_put(tmp->tm_node_pool, nnode); 155 return (ENOSPC); 156 } 157 bcopy(target, nnode->tn_link, nnode->tn_size); 158 nnode->tn_link[nnode->tn_size] = '\0'; 159 break; 160 161 case VREG: 162 nnode->tn_reg.tn_aobj = swap_pager_alloc(NULL, 0, 163 VM_PROT_DEFAULT, 0); 164 nnode->tn_reg.tn_aobj_pages = 0; 165 nnode->tn_size = 0; 166 vm_object_set_flag(nnode->tn_reg.tn_aobj, OBJ_NOPAGEIN); 167 break; 168 169 default: 170 panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); 171 } 172 173 TMPFS_NODE_LOCK(nnode); 174 TMPFS_LOCK(tmp); 175 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 176 tmp->tm_nodes_inuse++; 177 TMPFS_UNLOCK(tmp); 178 TMPFS_NODE_UNLOCK(nnode); 179 180 *node = nnode; 181 return 0; 182 } 183 184 /* --------------------------------------------------------------------- */ 185 186 /* 187 * Destroys the node pointed to by node from the file system 'tmp'. 188 * If the node does not belong to the given mount point, the results are 189 * unpredicted. 190 * 191 * If the node references a directory; no entries are allowed because 192 * their removal could need a recursive algorithm, something forbidden in 193 * kernel space. Furthermore, there is not need to provide such 194 * functionality (recursive removal) because the only primitives offered 195 * to the user are the removal of empty directories and the deletion of 196 * individual files. 197 * 198 * Note that nodes are not really deleted; in fact, when a node has been 199 * allocated, it cannot be deleted during the whole life of the file 200 * system. Instead, they are moved to the available list and remain there 201 * until reused. 202 * 203 * A caller must have TMPFS_NODE_LOCK(node) and this function unlocks it. 204 */ 205 void 206 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 207 { 208 vm_pindex_t pages = 0; 209 210 #ifdef INVARIANTS 211 TMPFS_ASSERT_ELOCKED(node); 212 KKASSERT(node->tn_vnode == NULL); 213 KKASSERT((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 214 #endif 215 216 TMPFS_LOCK(tmp); 217 LIST_REMOVE(node, tn_entries); 218 tmp->tm_nodes_inuse--; 219 TMPFS_UNLOCK(tmp); 220 TMPFS_NODE_UNLOCK(node); /* Caller has this lock */ 221 222 switch (node->tn_type) { 223 case VNON: 224 /* Do not do anything. VNON is provided to let the 225 * allocation routine clean itself easily by avoiding 226 * duplicating code in it. */ 227 /* FALLTHROUGH */ 228 case VBLK: 229 /* FALLTHROUGH */ 230 case VCHR: 231 /* FALLTHROUGH */ 232 break; 233 case VDIR: 234 /* 235 * The parent link can be NULL if this is the root 236 * node or if it is a directory node that was rmdir'd. 237 * 238 * XXX what if node is a directory which still contains 239 * directory entries (e.g. due to a forced umount) ? 240 */ 241 node->tn_size = 0; 242 KKASSERT(node->tn_dir.tn_parent == NULL); 243 244 /* 245 * If the root node is being destroyed don't leave a 246 * dangling pointer in tmpfs_mount. 247 */ 248 if (node == tmp->tm_root) 249 tmp->tm_root = NULL; 250 break; 251 case VFIFO: 252 /* FALLTHROUGH */ 253 case VSOCK: 254 break; 255 256 case VLNK: 257 kfree(node->tn_link, tmp->tm_name_zone); 258 node->tn_link = NULL; 259 node->tn_size = 0; 260 break; 261 262 case VREG: 263 if (node->tn_reg.tn_aobj != NULL) 264 vm_object_deallocate(node->tn_reg.tn_aobj); 265 node->tn_reg.tn_aobj = NULL; 266 pages = node->tn_reg.tn_aobj_pages; 267 break; 268 269 default: 270 panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); 271 } 272 273 /* 274 * Clean up fields for the next allocation. The objcache only ctors 275 * new allocations. 276 */ 277 tmpfs_node_ctor(node, NULL, 0); 278 objcache_put(tmp->tm_node_pool, node); 279 /* node is now invalid */ 280 281 if (pages) 282 atomic_add_long(&tmp->tm_pages_used, -(long)pages); 283 } 284 285 /* --------------------------------------------------------------------- */ 286 287 /* 288 * Allocates a new directory entry for the node node with a name of name. 289 * The new directory entry is returned in *de. 290 * 291 * The link count of node is increased by one to reflect the new object 292 * referencing it. 293 * 294 * Returns zero on success or an appropriate error code on failure. 295 */ 296 int 297 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 298 const char *name, uint16_t len, struct tmpfs_dirent **de) 299 { 300 struct tmpfs_dirent *nde; 301 302 nde = objcache_get(tmp->tm_dirent_pool, M_WAITOK); 303 nde->td_name = kmalloc(len + 1, tmp->tm_name_zone, M_WAITOK | M_NULLOK); 304 if (nde->td_name == NULL) { 305 objcache_put(tmp->tm_dirent_pool, nde); 306 *de = NULL; 307 return (ENOSPC); 308 } 309 nde->td_namelen = len; 310 bcopy(name, nde->td_name, len); 311 nde->td_name[len] = '\0'; 312 313 nde->td_node = node; 314 315 TMPFS_NODE_LOCK(node); 316 ++node->tn_links; 317 TMPFS_NODE_UNLOCK(node); 318 319 *de = nde; 320 321 return 0; 322 } 323 324 /* --------------------------------------------------------------------- */ 325 326 /* 327 * Frees a directory entry. It is the caller's responsibility to destroy 328 * the node referenced by it if needed. 329 * 330 * The link count of node is decreased by one to reflect the removal of an 331 * object that referenced it. This only happens if 'node_exists' is true; 332 * otherwise the function will not access the node referred to by the 333 * directory entry, as it may already have been released from the outside. 334 */ 335 void 336 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 337 { 338 struct tmpfs_node *node; 339 340 node = de->td_node; 341 342 TMPFS_NODE_LOCK(node); 343 TMPFS_ASSERT_ELOCKED(node); 344 KKASSERT(node->tn_links > 0); 345 node->tn_links--; 346 TMPFS_NODE_UNLOCK(node); 347 348 kfree(de->td_name, tmp->tm_name_zone); 349 de->td_namelen = 0; 350 de->td_name = NULL; 351 de->td_node = NULL; 352 objcache_put(tmp->tm_dirent_pool, de); 353 } 354 355 /* --------------------------------------------------------------------- */ 356 357 /* 358 * Allocates a new vnode for the node node or returns a new reference to 359 * an existing one if the node had already a vnode referencing it. The 360 * resulting locked vnode is returned in *vpp. 361 * 362 * Returns zero on success or an appropriate error code on failure. 363 * 364 * The caller must ensure that node cannot go away (usually by holding 365 * the related directory entry). 366 * 367 * If dnode is non-NULL this routine avoids deadlocking against it but 368 * can return EAGAIN. Caller must try again. The dnode lock will cycle 369 * in this case, it remains locked on return in all cases. dnode must 370 * be shared-locked. 371 */ 372 int 373 tmpfs_alloc_vp(struct mount *mp, 374 struct tmpfs_node *dnode, struct tmpfs_node *node, int lkflag, 375 struct vnode **vpp) 376 { 377 int error = 0; 378 struct vnode *vp; 379 380 loop: 381 /* 382 * Interlocked extraction from node. This can race many things. 383 * We have to get a soft reference on the vnode while we hold 384 * the node locked, then acquire it properly and check for races. 385 */ 386 TMPFS_NODE_LOCK(node); 387 if ((vp = node->tn_vnode) != NULL) { 388 KKASSERT((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 389 vhold(vp); 390 TMPFS_NODE_UNLOCK(node); 391 392 if (dnode) { 393 /* 394 * Special-case handling to avoid deadlocking against 395 * dnode. This case has been validated and occurs 396 * every so often during synth builds. 397 */ 398 if (vget(vp, (lkflag & ~LK_RETRY) | 399 LK_NOWAIT | 400 LK_EXCLUSIVE) != 0) { 401 TMPFS_NODE_UNLOCK(dnode); 402 if (vget(vp, (lkflag & ~LK_RETRY) | 403 LK_SLEEPFAIL | 404 LK_EXCLUSIVE) == 0) { 405 vn_unlock(vp); 406 } 407 vdrop(vp); 408 TMPFS_NODE_LOCK_SH(dnode); 409 410 return EAGAIN; 411 } 412 } else { 413 /* 414 * Normal path 415 */ 416 if (vget(vp, lkflag | LK_EXCLUSIVE) != 0) { 417 vdrop(vp); 418 goto loop; 419 } 420 } 421 if (node->tn_vnode != vp) { 422 vput(vp); 423 vdrop(vp); 424 goto loop; 425 } 426 vdrop(vp); 427 goto out; 428 } 429 /* vp is NULL */ 430 431 /* 432 * This should never happen. 433 */ 434 if (node->tn_vpstate & TMPFS_VNODE_DOOMED) { 435 TMPFS_NODE_UNLOCK(node); 436 error = ENOENT; 437 goto out; 438 } 439 440 /* 441 * Interlock against other calls to tmpfs_alloc_vp() trying to 442 * allocate and assign a vp to node. 443 */ 444 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 445 node->tn_vpstate |= TMPFS_VNODE_WANT; 446 error = tsleep(&node->tn_vpstate, PINTERLOCKED | PCATCH, 447 "tmpfs_alloc_vp", 0); 448 TMPFS_NODE_UNLOCK(node); 449 if (error) 450 return error; 451 goto loop; 452 } 453 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 454 TMPFS_NODE_UNLOCK(node); 455 456 /* 457 * Allocate a new vnode (may block). The ALLOCATING flag should 458 * prevent a race against someone else assigning node->tn_vnode. 459 */ 460 error = getnewvnode(VT_TMPFS, mp, &vp, VLKTIMEOUT, LK_CANRECURSE); 461 if (error != 0) 462 goto unlock; 463 464 KKASSERT(node->tn_vnode == NULL); 465 KKASSERT(vp != NULL); 466 vp->v_data = node; 467 vp->v_type = node->tn_type; 468 469 /* Type-specific initialization. */ 470 switch (node->tn_type) { 471 case VBLK: 472 /* FALLTHROUGH */ 473 case VCHR: 474 /* FALLTHROUGH */ 475 case VSOCK: 476 break; 477 case VREG: 478 /* 479 * VMIO is mandatory. Tmpfs also supports KVABIO 480 * for its tmpfs_strategy(). 481 */ 482 vsetflags(vp, VKVABIO); 483 vinitvmio(vp, node->tn_size, TMPFS_BLKSIZE, -1); 484 break; 485 case VLNK: 486 break; 487 case VFIFO: 488 vp->v_ops = &mp->mnt_vn_fifo_ops; 489 break; 490 case VDIR: 491 break; 492 493 default: 494 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 495 } 496 497 498 unlock: 499 TMPFS_NODE_LOCK(node); 500 501 KKASSERT(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 502 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 503 node->tn_vnode = vp; 504 505 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 506 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 507 TMPFS_NODE_UNLOCK(node); 508 wakeup(&node->tn_vpstate); 509 } else { 510 TMPFS_NODE_UNLOCK(node); 511 } 512 513 out: 514 *vpp = vp; 515 KKASSERT(IFF(error == 0, *vpp != NULL && vn_islocked(*vpp))); 516 517 return error; 518 } 519 520 /* --------------------------------------------------------------------- */ 521 522 /* 523 * Allocates a new file of type 'type' and adds it to the parent directory 524 * 'dvp'; this addition is done using the component name given in 'cnp'. 525 * The ownership of the new file is automatically assigned based on the 526 * credentials of the caller (through 'cnp'), the group is set based on 527 * the parent directory and the mode is determined from the 'vap' argument. 528 * If successful, *vpp holds a vnode to the newly created file and zero 529 * is returned. Otherwise *vpp is NULL and the function returns an 530 * appropriate error code. 531 */ 532 int 533 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 534 struct namecache *ncp, struct ucred *cred, char *target) 535 { 536 int error; 537 struct tmpfs_dirent *de; 538 struct tmpfs_mount *tmp; 539 struct tmpfs_node *dnode; 540 struct tmpfs_node *node; 541 542 tmp = VFS_TO_TMPFS(dvp->v_mount); 543 dnode = VP_TO_TMPFS_DIR(dvp); 544 *vpp = NULL; 545 546 /* 547 * If the directory was removed but a process was CD'd into it, 548 * we do not allow any more file/dir creation within it. Otherwise 549 * we will lose track of it. 550 */ 551 KKASSERT(dnode->tn_type == VDIR); 552 if (dnode != tmp->tm_root && dnode->tn_dir.tn_parent == NULL) 553 return ENOENT; 554 555 /* 556 * Make sure the link count does not overflow. 557 */ 558 if (vap->va_type == VDIR && dnode->tn_links >= LINK_MAX) 559 return EMLINK; 560 561 /* Allocate a node that represents the new file. */ 562 error = tmpfs_alloc_node(tmp, vap->va_type, cred->cr_uid, 563 dnode->tn_gid, vap->va_mode, target, 564 vap->va_rmajor, vap->va_rminor, &node); 565 if (error != 0) 566 return error; 567 TMPFS_NODE_LOCK(node); 568 569 /* Allocate a directory entry that points to the new file. */ 570 error = tmpfs_alloc_dirent(tmp, node, ncp->nc_name, ncp->nc_nlen, &de); 571 if (error != 0) { 572 tmpfs_free_node(tmp, node); 573 /* eats node lock */ 574 return error; 575 } 576 577 /* Allocate a vnode for the new file. */ 578 error = tmpfs_alloc_vp(dvp->v_mount, NULL, node, LK_EXCLUSIVE, vpp); 579 if (error != 0) { 580 tmpfs_free_dirent(tmp, de); 581 tmpfs_free_node(tmp, node); 582 /* eats node lock */ 583 return error; 584 } 585 586 /* 587 * Now that all required items are allocated, we can proceed to 588 * insert the new node into the directory, an operation that 589 * cannot fail. 590 */ 591 tmpfs_dir_attach(dnode, de); 592 TMPFS_NODE_UNLOCK(node); 593 594 return error; 595 } 596 597 /* --------------------------------------------------------------------- */ 598 599 /* 600 * Attaches the directory entry de to the directory represented by dnode. 601 * Note that this does not change the link count of the node pointed by 602 * the directory entry, as this is done by tmpfs_alloc_dirent. 603 */ 604 void 605 tmpfs_dir_attach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 606 { 607 struct tmpfs_node *node = de->td_node; 608 609 TMPFS_NODE_LOCK(dnode); 610 if (node && node->tn_type == VDIR) { 611 TMPFS_NODE_LOCK(node); 612 ++node->tn_links; 613 node->tn_status |= TMPFS_NODE_CHANGED; 614 node->tn_dir.tn_parent = dnode; 615 ++dnode->tn_links; 616 TMPFS_NODE_UNLOCK(node); 617 } 618 RB_INSERT(tmpfs_dirtree, &dnode->tn_dir.tn_dirtree, de); 619 RB_INSERT(tmpfs_dirtree_cookie, &dnode->tn_dir.tn_cookietree, de); 620 dnode->tn_size += sizeof(struct tmpfs_dirent); 621 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 622 TMPFS_NODE_MODIFIED; 623 TMPFS_NODE_UNLOCK(dnode); 624 } 625 626 /* --------------------------------------------------------------------- */ 627 628 /* 629 * Detaches the directory entry de from the directory represented by dnode. 630 * Note that this does not change the link count of the node pointed by 631 * the directory entry, as this is done by tmpfs_free_dirent. 632 */ 633 void 634 tmpfs_dir_detach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 635 { 636 struct tmpfs_node *node = de->td_node; 637 638 TMPFS_NODE_LOCK(dnode); 639 RB_REMOVE(tmpfs_dirtree, &dnode->tn_dir.tn_dirtree, de); 640 RB_REMOVE(tmpfs_dirtree_cookie, &dnode->tn_dir.tn_cookietree, de); 641 dnode->tn_size -= sizeof(struct tmpfs_dirent); 642 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 643 TMPFS_NODE_MODIFIED; 644 TMPFS_NODE_UNLOCK(dnode); 645 646 /* 647 * Clean out the tn_parent pointer immediately when removing a 648 * directory. 649 * 650 * Removal of the parent linkage also cleans out the extra tn_links 651 * count we had on both node and dnode. 652 * 653 * node can be NULL (typ during a forced umount), in which case 654 * the mount code is dealing with the linkages from a linked list 655 * scan. 656 */ 657 if (node && node->tn_type == VDIR && node->tn_dir.tn_parent) { 658 TMPFS_NODE_LOCK(dnode); 659 TMPFS_NODE_LOCK(node); 660 KKASSERT(node->tn_dir.tn_parent == dnode); 661 dnode->tn_links--; 662 node->tn_links--; 663 node->tn_dir.tn_parent = NULL; 664 TMPFS_NODE_UNLOCK(node); 665 TMPFS_NODE_UNLOCK(dnode); 666 } 667 } 668 669 /* --------------------------------------------------------------------- */ 670 671 /* 672 * Looks for a directory entry in the directory represented by node. 673 * 'ncp' describes the name of the entry to look for. Note that the . 674 * and .. components are not allowed as they do not physically exist 675 * within directories. 676 * 677 * Returns a pointer to the entry when found, otherwise NULL. 678 * 679 * Caller must hold the node locked (shared ok) 680 */ 681 struct tmpfs_dirent * 682 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 683 struct namecache *ncp) 684 { 685 struct tmpfs_dirent *de; 686 int len = ncp->nc_nlen; 687 struct tmpfs_dirent wanted; 688 689 wanted.td_namelen = len; 690 wanted.td_name = ncp->nc_name; 691 692 TMPFS_VALIDATE_DIR(node); 693 694 de = RB_FIND(tmpfs_dirtree, &node->tn_dir.tn_dirtree, &wanted); 695 696 KKASSERT(f == NULL || f == de->td_node); 697 698 return de; 699 } 700 701 /* --------------------------------------------------------------------- */ 702 703 /* 704 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 705 * directory and returns it in the uio space. The function returns 0 706 * on success, -1 if there was not enough space in the uio structure to 707 * hold the directory entry or an appropriate error code if another 708 * error happens. 709 */ 710 int 711 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 712 { 713 int error; 714 715 TMPFS_VALIDATE_DIR(node); 716 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 717 718 if (vop_write_dirent(&error, uio, node->tn_id, DT_DIR, 1, ".")) 719 return -1; 720 if (error == 0) 721 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 722 return error; 723 } 724 725 /* --------------------------------------------------------------------- */ 726 727 /* 728 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 729 * directory and returns it in the uio space. The function returns 0 730 * on success, -1 if there was not enough space in the uio structure to 731 * hold the directory entry or an appropriate error code if another 732 * error happens. 733 */ 734 int 735 tmpfs_dir_getdotdotdent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 736 struct uio *uio) 737 { 738 int error; 739 ino_t d_ino; 740 741 TMPFS_VALIDATE_DIR(node); 742 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 743 744 if (node->tn_dir.tn_parent) { 745 TMPFS_NODE_LOCK(node); 746 if (node->tn_dir.tn_parent) 747 d_ino = node->tn_dir.tn_parent->tn_id; 748 else 749 d_ino = tmp->tm_root->tn_id; 750 TMPFS_NODE_UNLOCK(node); 751 } else { 752 d_ino = tmp->tm_root->tn_id; 753 } 754 755 if (vop_write_dirent(&error, uio, d_ino, DT_DIR, 2, "..")) 756 return -1; 757 if (error == 0) { 758 struct tmpfs_dirent *de; 759 de = RB_MIN(tmpfs_dirtree_cookie, &node->tn_dir.tn_cookietree); 760 if (de == NULL) 761 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 762 else 763 uio->uio_offset = tmpfs_dircookie(de); 764 } 765 return error; 766 } 767 768 /* --------------------------------------------------------------------- */ 769 770 /* 771 * Lookup a directory entry by its associated cookie. 772 * 773 * Must be called with the directory node locked (shared ok) 774 */ 775 struct lubycookie_info { 776 off_t cookie; 777 struct tmpfs_dirent *de; 778 }; 779 780 static int 781 lubycookie_cmp(struct tmpfs_dirent *de, void *arg) 782 { 783 struct lubycookie_info *info = arg; 784 off_t cookie = tmpfs_dircookie(de); 785 786 if (cookie < info->cookie) 787 return(-1); 788 if (cookie > info->cookie) 789 return(1); 790 return(0); 791 } 792 793 static int 794 lubycookie_callback(struct tmpfs_dirent *de, void *arg) 795 { 796 struct lubycookie_info *info = arg; 797 798 if (tmpfs_dircookie(de) == info->cookie) { 799 info->de = de; 800 return(-1); 801 } 802 return(0); 803 } 804 805 struct tmpfs_dirent * 806 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 807 { 808 struct lubycookie_info info; 809 810 info.cookie = cookie; 811 info.de = NULL; 812 RB_SCAN(tmpfs_dirtree_cookie, &node->tn_dir.tn_cookietree, 813 lubycookie_cmp, lubycookie_callback, &info); 814 return (info.de); 815 } 816 817 /* --------------------------------------------------------------------- */ 818 819 /* 820 * Helper function for tmpfs_readdir. Returns as much directory entries 821 * as can fit in the uio space. The read starts at uio->uio_offset. 822 * The function returns 0 on success, -1 if there was not enough space 823 * in the uio structure to hold the directory entry or an appropriate 824 * error code if another error happens. 825 * 826 * Caller must hold the node locked (shared ok) 827 */ 828 int 829 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 830 { 831 int error; 832 off_t startcookie; 833 struct tmpfs_dirent *de; 834 835 TMPFS_VALIDATE_DIR(node); 836 837 /* 838 * Locate the first directory entry we have to return. We have cached 839 * the last readdir in the node, so use those values if appropriate. 840 * Otherwise do a linear scan to find the requested entry. 841 */ 842 startcookie = uio->uio_offset; 843 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 844 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 845 846 if (startcookie == TMPFS_DIRCOOKIE_EOF) 847 return 0; 848 849 de = tmpfs_dir_lookupbycookie(node, startcookie); 850 if (de == NULL) 851 return EINVAL; 852 853 /* 854 * Read as much entries as possible; i.e., until we reach the end of 855 * the directory or we exhaust uio space. 856 */ 857 do { 858 ino_t d_ino; 859 uint8_t d_type; 860 861 /* Create a dirent structure representing the current 862 * tmpfs_node and fill it. */ 863 d_ino = de->td_node->tn_id; 864 switch (de->td_node->tn_type) { 865 case VBLK: 866 d_type = DT_BLK; 867 break; 868 869 case VCHR: 870 d_type = DT_CHR; 871 break; 872 873 case VDIR: 874 d_type = DT_DIR; 875 break; 876 877 case VFIFO: 878 d_type = DT_FIFO; 879 break; 880 881 case VLNK: 882 d_type = DT_LNK; 883 break; 884 885 case VREG: 886 d_type = DT_REG; 887 break; 888 889 case VSOCK: 890 d_type = DT_SOCK; 891 break; 892 893 default: 894 panic("tmpfs_dir_getdents: type %p %d", 895 de->td_node, (int)de->td_node->tn_type); 896 } 897 KKASSERT(de->td_namelen < 256); /* 255 + 1 */ 898 899 if (vop_write_dirent(&error, uio, d_ino, d_type, 900 de->td_namelen, de->td_name)) { 901 error = -1; 902 break; 903 } 904 905 (*cntp)++; 906 de = RB_NEXT(tmpfs_dirtree_cookie, 907 node->tn_dir.tn_cookietree, de); 908 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 909 910 /* Update the offset and cache. */ 911 if (de == NULL) { 912 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 913 } else { 914 uio->uio_offset = tmpfs_dircookie(de); 915 } 916 917 return error; 918 } 919 920 /* --------------------------------------------------------------------- */ 921 922 /* 923 * Resizes the aobj associated to the regular file pointed to by vp to 924 * the size newsize. 'vp' must point to a vnode that represents a regular 925 * file. 'newsize' must be positive. 926 * 927 * pass trivial as 1 when buf content will be overwritten, otherwise set 0 928 * to be zero filled. 929 * 930 * Returns zero on success or an appropriate error code on failure. 931 * 932 * Caller must hold the node exclusively locked. 933 */ 934 int 935 tmpfs_reg_resize(struct vnode *vp, off_t newsize, int trivial) 936 { 937 int error; 938 vm_pindex_t newpages, oldpages; 939 struct tmpfs_mount *tmp; 940 struct tmpfs_node *node; 941 off_t oldsize; 942 943 #ifdef INVARIANTS 944 KKASSERT(vp->v_type == VREG); 945 KKASSERT(newsize >= 0); 946 #endif 947 948 node = VP_TO_TMPFS_NODE(vp); 949 tmp = VFS_TO_TMPFS(vp->v_mount); 950 951 /* 952 * Convert the old and new sizes to the number of pages needed to 953 * store them. It may happen that we do not need to do anything 954 * because the last allocated page can accommodate the change on 955 * its own. 956 */ 957 oldsize = node->tn_size; 958 oldpages = round_page64(oldsize) / PAGE_SIZE; 959 KKASSERT(oldpages == node->tn_reg.tn_aobj_pages); 960 newpages = round_page64(newsize) / PAGE_SIZE; 961 962 if (newpages > oldpages && 963 tmp->tm_pages_used + newpages - oldpages > tmp->tm_pages_max) { 964 error = ENOSPC; 965 goto out; 966 } 967 node->tn_reg.tn_aobj_pages = newpages; 968 node->tn_size = newsize; 969 970 if (newpages != oldpages) 971 atomic_add_long(&tmp->tm_pages_used, (newpages - oldpages)); 972 973 /* 974 * When adjusting the vnode filesize and its VM object we must 975 * also adjust our backing VM object (aobj). The blocksize 976 * used must match the block sized we use for the buffer cache. 977 * 978 * The backing VM object may contain VM pages as well as swap 979 * assignments if we previously renamed main object pages into 980 * it during deactivation. 981 */ 982 if (newsize < oldsize) { 983 vm_pindex_t osize; 984 vm_pindex_t nsize; 985 vm_object_t aobj; 986 987 error = nvtruncbuf(vp, newsize, TMPFS_BLKSIZE, -1, 0); 988 aobj = node->tn_reg.tn_aobj; 989 if (aobj) { 990 osize = aobj->size; 991 nsize = vp->v_object->size; 992 if (nsize < osize) { 993 aobj->size = osize; 994 swap_pager_freespace(aobj, nsize, 995 osize - nsize); 996 vm_object_page_remove(aobj, nsize, osize, 997 FALSE); 998 } 999 } 1000 } else { 1001 vm_object_t aobj; 1002 1003 error = nvextendbuf(vp, oldsize, newsize, 1004 TMPFS_BLKSIZE, TMPFS_BLKSIZE, 1005 -1, -1, trivial); 1006 aobj = node->tn_reg.tn_aobj; 1007 if (aobj) 1008 aobj->size = vp->v_object->size; 1009 } 1010 1011 out: 1012 return error; 1013 } 1014 1015 /* --------------------------------------------------------------------- */ 1016 1017 /* 1018 * Change flags of the given vnode. 1019 * Caller should execute tmpfs_update on vp after a successful execution. 1020 * The vnode must be locked on entry and remain locked on exit. 1021 */ 1022 int 1023 tmpfs_chflags(struct vnode *vp, u_long vaflags, struct ucred *cred) 1024 { 1025 int error; 1026 struct tmpfs_node *node; 1027 int flags; 1028 1029 KKASSERT(vn_islocked(vp)); 1030 1031 node = VP_TO_TMPFS_NODE(vp); 1032 flags = node->tn_flags; 1033 1034 /* Disallow this operation if the file system is mounted read-only. */ 1035 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1036 return EROFS; 1037 error = vop_helper_setattr_flags(&flags, vaflags, node->tn_uid, cred); 1038 1039 /* Actually change the flags on the node itself */ 1040 if (error == 0) { 1041 TMPFS_NODE_LOCK(node); 1042 node->tn_flags = flags; 1043 node->tn_status |= TMPFS_NODE_CHANGED; 1044 TMPFS_NODE_UNLOCK(node); 1045 } 1046 1047 KKASSERT(vn_islocked(vp)); 1048 1049 return error; 1050 } 1051 1052 /* --------------------------------------------------------------------- */ 1053 1054 /* 1055 * Change access mode on the given vnode. 1056 * Caller should execute tmpfs_update on vp after a successful execution. 1057 * The vnode must be locked on entry and remain locked on exit. 1058 */ 1059 int 1060 tmpfs_chmod(struct vnode *vp, mode_t vamode, struct ucred *cred) 1061 { 1062 struct tmpfs_node *node; 1063 mode_t cur_mode; 1064 int error; 1065 1066 KKASSERT(vn_islocked(vp)); 1067 1068 node = VP_TO_TMPFS_NODE(vp); 1069 1070 /* Disallow this operation if the file system is mounted read-only. */ 1071 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1072 return EROFS; 1073 1074 /* Immutable or append-only files cannot be modified, either. */ 1075 if (node->tn_flags & (IMMUTABLE | APPEND)) 1076 return EPERM; 1077 1078 cur_mode = node->tn_mode; 1079 error = vop_helper_chmod(vp, vamode, cred, node->tn_uid, node->tn_gid, 1080 &cur_mode); 1081 1082 if (error == 0 && 1083 (node->tn_mode & ALLPERMS) != (cur_mode & ALLPERMS)) { 1084 TMPFS_NODE_LOCK(node); 1085 node->tn_mode &= ~ALLPERMS; 1086 node->tn_mode |= cur_mode & ALLPERMS; 1087 1088 node->tn_status |= TMPFS_NODE_CHANGED; 1089 TMPFS_NODE_UNLOCK(node); 1090 } 1091 1092 KKASSERT(vn_islocked(vp)); 1093 1094 return 0; 1095 } 1096 1097 /* --------------------------------------------------------------------- */ 1098 1099 /* 1100 * Change ownership of the given vnode. At least one of uid or gid must 1101 * be different than VNOVAL. If one is set to that value, the attribute 1102 * is unchanged. 1103 * Caller should execute tmpfs_update on vp after a successful execution. 1104 * The vnode must be locked on entry and remain locked on exit. 1105 */ 1106 int 1107 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred) 1108 { 1109 mode_t cur_mode; 1110 uid_t cur_uid; 1111 gid_t cur_gid; 1112 struct tmpfs_node *node; 1113 int error; 1114 1115 KKASSERT(vn_islocked(vp)); 1116 node = VP_TO_TMPFS_NODE(vp); 1117 1118 /* Disallow this operation if the file system is mounted read-only. */ 1119 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1120 return EROFS; 1121 1122 /* Immutable or append-only files cannot be modified, either. */ 1123 if (node->tn_flags & (IMMUTABLE | APPEND)) 1124 return EPERM; 1125 1126 cur_uid = node->tn_uid; 1127 cur_gid = node->tn_gid; 1128 cur_mode = node->tn_mode; 1129 error = vop_helper_chown(vp, uid, gid, cred, 1130 &cur_uid, &cur_gid, &cur_mode); 1131 1132 if (error == 0) { 1133 TMPFS_NODE_LOCK(node); 1134 if (cur_uid != node->tn_uid || 1135 cur_gid != node->tn_gid || 1136 cur_mode != node->tn_mode) { 1137 node->tn_uid = cur_uid; 1138 node->tn_gid = cur_gid; 1139 node->tn_mode = cur_mode; 1140 node->tn_status |= TMPFS_NODE_CHANGED; 1141 } 1142 TMPFS_NODE_UNLOCK(node); 1143 } 1144 1145 return error; 1146 } 1147 1148 /* --------------------------------------------------------------------- */ 1149 1150 /* 1151 * Change size of the given vnode. 1152 * Caller should execute tmpfs_update on vp after a successful execution. 1153 * The vnode must be locked on entry and remain locked on exit. 1154 */ 1155 int 1156 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred) 1157 { 1158 int error; 1159 struct tmpfs_node *node; 1160 1161 KKASSERT(vn_islocked(vp)); 1162 1163 node = VP_TO_TMPFS_NODE(vp); 1164 1165 /* Decide whether this is a valid operation based on the file type. */ 1166 error = 0; 1167 switch (vp->v_type) { 1168 case VDIR: 1169 return EISDIR; 1170 1171 case VREG: 1172 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1173 return EROFS; 1174 break; 1175 1176 case VBLK: 1177 /* FALLTHROUGH */ 1178 case VCHR: 1179 /* FALLTHROUGH */ 1180 case VFIFO: 1181 /* Allow modifications of special files even if in the file 1182 * system is mounted read-only (we are not modifying the 1183 * files themselves, but the objects they represent). */ 1184 return 0; 1185 1186 default: 1187 /* Anything else is unsupported. */ 1188 return EOPNOTSUPP; 1189 } 1190 1191 /* Immutable or append-only files cannot be modified, either. */ 1192 if (node->tn_flags & (IMMUTABLE | APPEND)) 1193 return EPERM; 1194 1195 error = tmpfs_truncate(vp, size); 1196 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1197 * for us, as will update tn_status; no need to do that here. */ 1198 1199 KKASSERT(vn_islocked(vp)); 1200 1201 return error; 1202 } 1203 1204 /* --------------------------------------------------------------------- */ 1205 1206 /* 1207 * Change access and modification times of the given vnode. 1208 * Caller should execute tmpfs_update on vp after a successful execution. 1209 * The vnode must be locked on entry and remain locked on exit. 1210 */ 1211 int 1212 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1213 int vaflags, struct ucred *cred) 1214 { 1215 struct tmpfs_node *node; 1216 1217 KKASSERT(vn_islocked(vp)); 1218 1219 node = VP_TO_TMPFS_NODE(vp); 1220 1221 /* Disallow this operation if the file system is mounted read-only. */ 1222 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1223 return EROFS; 1224 1225 /* Immutable or append-only files cannot be modified, either. */ 1226 if (node->tn_flags & (IMMUTABLE | APPEND)) 1227 return EPERM; 1228 1229 TMPFS_NODE_LOCK(node); 1230 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1231 node->tn_status |= TMPFS_NODE_ACCESSED; 1232 1233 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) { 1234 node->tn_status |= TMPFS_NODE_MODIFIED; 1235 vclrflags(vp, VLASTWRITETS); 1236 } 1237 1238 TMPFS_NODE_UNLOCK(node); 1239 1240 tmpfs_itimes(vp, atime, mtime); 1241 1242 KKASSERT(vn_islocked(vp)); 1243 1244 return 0; 1245 } 1246 1247 /* --------------------------------------------------------------------- */ 1248 /* Sync timestamps */ 1249 void 1250 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1251 const struct timespec *mod) 1252 { 1253 struct tmpfs_node *node; 1254 struct timespec now; 1255 1256 node = VP_TO_TMPFS_NODE(vp); 1257 1258 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1259 TMPFS_NODE_CHANGED)) == 0) 1260 return; 1261 1262 vfs_timestamp(&now); 1263 1264 TMPFS_NODE_LOCK(node); 1265 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1266 if (acc == NULL) 1267 acc = &now; 1268 node->tn_atime = acc->tv_sec; 1269 node->tn_atimensec = acc->tv_nsec; 1270 } 1271 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1272 if (mod == NULL) 1273 mod = &now; 1274 node->tn_mtime = mod->tv_sec; 1275 node->tn_mtimensec = mod->tv_nsec; 1276 } 1277 if (node->tn_status & TMPFS_NODE_CHANGED) { 1278 node->tn_ctime = now.tv_sec; 1279 node->tn_ctimensec = now.tv_nsec; 1280 } 1281 1282 node->tn_status &= ~(TMPFS_NODE_ACCESSED | 1283 TMPFS_NODE_MODIFIED | 1284 TMPFS_NODE_CHANGED); 1285 TMPFS_NODE_UNLOCK(node); 1286 } 1287 1288 /* --------------------------------------------------------------------- */ 1289 1290 void 1291 tmpfs_update(struct vnode *vp) 1292 { 1293 tmpfs_itimes(vp, NULL, NULL); 1294 } 1295 1296 /* --------------------------------------------------------------------- */ 1297 1298 /* 1299 * Caller must hold an exclusive node lock. 1300 */ 1301 int 1302 tmpfs_truncate(struct vnode *vp, off_t length) 1303 { 1304 int error; 1305 struct tmpfs_node *node; 1306 1307 node = VP_TO_TMPFS_NODE(vp); 1308 1309 if (length < 0) { 1310 error = EINVAL; 1311 goto out; 1312 } 1313 1314 if (node->tn_size == length) { 1315 error = 0; 1316 goto out; 1317 } 1318 1319 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 1320 return (EFBIG); 1321 1322 1323 error = tmpfs_reg_resize(vp, length, 1); 1324 1325 if (error == 0) 1326 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1327 1328 out: 1329 tmpfs_update(vp); 1330 1331 return error; 1332 } 1333 1334 /* --------------------------------------------------------------------- */ 1335 1336 static ino_t 1337 tmpfs_fetch_ino(struct tmpfs_mount *tmp) 1338 { 1339 ino_t ret; 1340 1341 ret = atomic_fetchadd_64(&tmp->tm_ino, 1); 1342 1343 return (ret); 1344 } 1345 1346 static int 1347 tmpfs_dirtree_compare(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 1348 { 1349 if (a->td_namelen > b->td_namelen) 1350 return 1; 1351 else if (a->td_namelen < b->td_namelen) 1352 return -1; 1353 else 1354 return strncmp(a->td_name, b->td_name, a->td_namelen); 1355 } 1356 1357 static int 1358 tmpfs_dirtree_compare_cookie(struct tmpfs_dirent *a, struct tmpfs_dirent *b) 1359 { 1360 if (a < b) 1361 return(-1); 1362 if (a > b) 1363 return(1); 1364 return 0; 1365 } 1366