1 /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 2005 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Julio M. Merino Vidal, developed as part of Google's Summer of Code 9 * 2005 program. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 /* 34 * Efficient memory file system supporting functions. 35 */ 36 37 #include <sys/kernel.h> 38 #include <sys/param.h> 39 #include <sys/namei.h> 40 #include <sys/priv.h> 41 #include <sys/proc.h> 42 #include <sys/spinlock2.h> 43 #include <sys/stat.h> 44 #include <sys/systm.h> 45 #include <sys/vnode.h> 46 #include <sys/vmmeter.h> 47 48 #include <vm/vm.h> 49 #include <vm/vm_object.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_pager.h> 52 #include <vm/vm_extern.h> 53 54 #include <vfs/tmpfs/tmpfs.h> 55 #include <vfs/tmpfs/tmpfs_vnops.h> 56 57 static ino_t t_ino = 2; 58 static struct spinlock ino_lock; 59 static ino_t tmpfs_fetch_ino(void); 60 61 /* --------------------------------------------------------------------- */ 62 63 /* 64 * Allocates a new node of type 'type' inside the 'tmp' mount point, with 65 * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', 66 * using the credentials of the process 'p'. 67 * 68 * If the node type is set to 'VDIR', then the parent parameter must point 69 * to the parent directory of the node being created. It may only be NULL 70 * while allocating the root node. 71 * 72 * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter 73 * specifies the device the node represents. 74 * 75 * If the node type is set to 'VLNK', then the parameter target specifies 76 * the file name of the target file for the symbolic link that is being 77 * created. 78 * 79 * Note that new nodes are retrieved from the available list if it has 80 * items or, if it is empty, from the node pool as long as there is enough 81 * space to create them. 82 * 83 * Returns zero on success or an appropriate error code on failure. 84 */ 85 int 86 tmpfs_alloc_node(struct tmpfs_mount *tmp, enum vtype type, 87 uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, 88 char *target, int rmajor, int rminor, struct tmpfs_node **node) 89 { 90 struct tmpfs_node *nnode; 91 struct timespec ts; 92 udev_t rdev; 93 94 /* If the root directory of the 'tmp' file system is not yet 95 * allocated, this must be the request to do it. */ 96 KKASSERT(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); 97 98 KKASSERT(IFF(type == VLNK, target != NULL)); 99 KKASSERT(IFF(type == VBLK || type == VCHR, rmajor != VNOVAL)); 100 101 if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) 102 return (ENOSPC); 103 104 nnode = objcache_get(tmp->tm_node_pool, M_WAITOK | M_NULLOK); 105 if (nnode == NULL) 106 return (ENOSPC); 107 108 /* Generic initialization. */ 109 nnode->tn_type = type; 110 vfs_timestamp(&ts); 111 nnode->tn_ctime = nnode->tn_mtime = nnode->tn_atime 112 = ts.tv_sec; 113 nnode->tn_ctimensec = nnode->tn_mtimensec = nnode->tn_atimensec 114 = ts.tv_nsec; 115 nnode->tn_uid = uid; 116 nnode->tn_gid = gid; 117 nnode->tn_mode = mode; 118 nnode->tn_id = tmpfs_fetch_ino(); 119 nnode->tn_advlock.init_done = 0; 120 121 /* Type-specific initialization. */ 122 switch (nnode->tn_type) { 123 case VBLK: 124 case VCHR: 125 rdev = makeudev(rmajor, rminor); 126 if (rdev == NOUDEV) { 127 objcache_put(tmp->tm_node_pool, nnode); 128 return(EINVAL); 129 } 130 nnode->tn_rdev = rdev; 131 break; 132 133 case VDIR: 134 TAILQ_INIT(&nnode->tn_dir.tn_dirhead); 135 KKASSERT(parent != nnode); 136 KKASSERT(IMPLIES(parent == NULL, tmp->tm_root == NULL)); 137 nnode->tn_dir.tn_parent = parent; 138 nnode->tn_dir.tn_readdir_lastn = 0; 139 nnode->tn_dir.tn_readdir_lastp = NULL; 140 nnode->tn_links++; 141 nnode->tn_size = 0; 142 if (parent) { 143 TMPFS_NODE_LOCK(parent); 144 parent->tn_links++; 145 TMPFS_NODE_UNLOCK(parent); 146 } 147 break; 148 149 case VFIFO: 150 /* FALLTHROUGH */ 151 case VSOCK: 152 break; 153 154 case VLNK: 155 nnode->tn_size = strlen(target); 156 nnode->tn_link = kmalloc(nnode->tn_size + 1, tmp->tm_name_zone, 157 M_WAITOK | M_NULLOK); 158 if (nnode->tn_link == NULL) { 159 objcache_put(tmp->tm_node_pool, nnode); 160 return (ENOSPC); 161 } 162 bcopy(target, nnode->tn_link, nnode->tn_size); 163 nnode->tn_link[nnode->tn_size] = '\0'; 164 break; 165 166 case VREG: 167 nnode->tn_reg.tn_aobj = 168 swap_pager_alloc(NULL, 0, VM_PROT_DEFAULT, 0); 169 nnode->tn_reg.tn_aobj_pages = 0; 170 nnode->tn_size = 0; 171 break; 172 173 default: 174 panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); 175 } 176 177 TMPFS_NODE_LOCK(nnode); 178 TMPFS_LOCK(tmp); 179 LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); 180 tmp->tm_nodes_inuse++; 181 TMPFS_UNLOCK(tmp); 182 TMPFS_NODE_UNLOCK(nnode); 183 184 *node = nnode; 185 return 0; 186 } 187 188 /* --------------------------------------------------------------------- */ 189 190 /* 191 * Destroys the node pointed to by node from the file system 'tmp'. 192 * If the node does not belong to the given mount point, the results are 193 * unpredicted. 194 * 195 * If the node references a directory; no entries are allowed because 196 * their removal could need a recursive algorithm, something forbidden in 197 * kernel space. Furthermore, there is not need to provide such 198 * functionality (recursive removal) because the only primitives offered 199 * to the user are the removal of empty directories and the deletion of 200 * individual files. 201 * 202 * Note that nodes are not really deleted; in fact, when a node has been 203 * allocated, it cannot be deleted during the whole life of the file 204 * system. Instead, they are moved to the available list and remain there 205 * until reused. 206 */ 207 void 208 tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) 209 { 210 vm_pindex_t pages = 0; 211 212 #ifdef INVARIANTS 213 TMPFS_ASSERT_ELOCKED(node); 214 KKASSERT(node->tn_vnode == NULL); 215 KKASSERT((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); 216 #endif 217 218 TMPFS_LOCK(tmp); 219 LIST_REMOVE(node, tn_entries); 220 tmp->tm_nodes_inuse--; 221 TMPFS_UNLOCK(tmp); 222 TMPFS_NODE_UNLOCK(node); 223 224 switch (node->tn_type) { 225 case VNON: 226 /* Do not do anything. VNON is provided to let the 227 * allocation routine clean itself easily by avoiding 228 * duplicating code in it. */ 229 /* FALLTHROUGH */ 230 case VBLK: 231 /* FALLTHROUGH */ 232 case VCHR: 233 /* FALLTHROUGH */ 234 break; 235 case VDIR: 236 /* 237 * The parent link can be NULL if this is the root 238 * node. 239 */ 240 node->tn_links--; 241 node->tn_size = 0; 242 KKASSERT(node->tn_dir.tn_parent || node == tmp->tm_root); 243 if (node->tn_dir.tn_parent) { 244 TMPFS_NODE_LOCK(node->tn_dir.tn_parent); 245 node->tn_dir.tn_parent->tn_links--; 246 247 /* 248 * If the parent directory has no more links and 249 * no vnode ref nothing is going to come along 250 * and clean it up unless we do it here. 251 */ 252 if (node->tn_dir.tn_parent->tn_links == 0 && 253 node->tn_dir.tn_parent->tn_vnode == NULL) { 254 tmpfs_free_node(tmp, node->tn_dir.tn_parent); 255 /* eats parent lock */ 256 } else { 257 TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent); 258 } 259 node->tn_dir.tn_parent = NULL; 260 } 261 262 /* 263 * If the root node is being destroyed don't leave a 264 * dangling pointer in tmpfs_mount. 265 */ 266 if (node == tmp->tm_root) 267 tmp->tm_root = NULL; 268 break; 269 case VFIFO: 270 /* FALLTHROUGH */ 271 case VSOCK: 272 break; 273 274 case VLNK: 275 kfree(node->tn_link, tmp->tm_name_zone); 276 node->tn_link = NULL; 277 node->tn_size = 0; 278 break; 279 280 case VREG: 281 if (node->tn_reg.tn_aobj != NULL) 282 vm_object_deallocate(node->tn_reg.tn_aobj); 283 node->tn_reg.tn_aobj = NULL; 284 pages = node->tn_reg.tn_aobj_pages; 285 break; 286 287 default: 288 panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); 289 } 290 291 /* 292 * Clean up fields for the next allocation. The objcache only ctors 293 * new allocations. 294 */ 295 tmpfs_node_ctor(node, NULL, 0); 296 objcache_put(tmp->tm_node_pool, node); 297 /* node is now invalid */ 298 299 TMPFS_LOCK(tmp); 300 tmp->tm_pages_used -= pages; 301 TMPFS_UNLOCK(tmp); 302 } 303 304 /* --------------------------------------------------------------------- */ 305 306 /* 307 * Allocates a new directory entry for the node node with a name of name. 308 * The new directory entry is returned in *de. 309 * 310 * The link count of node is increased by one to reflect the new object 311 * referencing it. 312 * 313 * Returns zero on success or an appropriate error code on failure. 314 */ 315 int 316 tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 317 const char *name, uint16_t len, struct tmpfs_dirent **de) 318 { 319 struct tmpfs_dirent *nde; 320 321 nde = objcache_get(tmp->tm_dirent_pool, M_WAITOK); 322 nde->td_name = kmalloc(len + 1, tmp->tm_name_zone, M_WAITOK | M_NULLOK); 323 if (nde->td_name == NULL) { 324 objcache_put(tmp->tm_dirent_pool, nde); 325 *de = NULL; 326 return (ENOSPC); 327 } 328 nde->td_namelen = len; 329 bcopy(name, nde->td_name, len); 330 nde->td_name[len] = '\0'; 331 332 nde->td_node = node; 333 334 TMPFS_NODE_LOCK(node); 335 node->tn_links++; 336 TMPFS_NODE_UNLOCK(node); 337 338 *de = nde; 339 340 return 0; 341 } 342 343 /* --------------------------------------------------------------------- */ 344 345 /* 346 * Frees a directory entry. It is the caller's responsibility to destroy 347 * the node referenced by it if needed. 348 * 349 * The link count of node is decreased by one to reflect the removal of an 350 * object that referenced it. This only happens if 'node_exists' is true; 351 * otherwise the function will not access the node referred to by the 352 * directory entry, as it may already have been released from the outside. 353 */ 354 void 355 tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) 356 { 357 struct tmpfs_node *node; 358 359 node = de->td_node; 360 361 TMPFS_NODE_LOCK(node); 362 TMPFS_ASSERT_ELOCKED(node); 363 KKASSERT(node->tn_links > 0); 364 node->tn_links--; 365 TMPFS_NODE_UNLOCK(node); 366 367 kfree(de->td_name, tmp->tm_name_zone); 368 de->td_namelen = 0; 369 de->td_name = NULL; 370 de->td_node = NULL; 371 objcache_put(tmp->tm_dirent_pool, de); 372 } 373 374 /* --------------------------------------------------------------------- */ 375 376 /* 377 * Allocates a new vnode for the node node or returns a new reference to 378 * an existing one if the node had already a vnode referencing it. The 379 * resulting locked vnode is returned in *vpp. 380 * 381 * Returns zero on success or an appropriate error code on failure. 382 */ 383 int 384 tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, 385 struct vnode **vpp) 386 { 387 int error = 0; 388 struct vnode *vp; 389 390 loop: 391 /* 392 * Interlocked extraction from node. This can race many things. 393 * We have to get a soft reference on the vnode while we hold 394 * the node locked, then acquire it properly and check for races. 395 */ 396 TMPFS_NODE_LOCK(node); 397 if ((vp = node->tn_vnode) != NULL) { 398 KKASSERT((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); 399 vhold_interlocked(vp); 400 TMPFS_NODE_UNLOCK(node); 401 402 if (vget(vp, lkflag | LK_EXCLUSIVE) != 0) { 403 vdrop(vp); 404 goto loop; 405 } 406 if (node->tn_vnode != vp) { 407 vput(vp); 408 vdrop(vp); 409 goto loop; 410 } 411 vdrop(vp); 412 goto out; 413 } 414 /* vp is NULL */ 415 416 /* 417 * This should never happen. 418 */ 419 if (node->tn_vpstate & TMPFS_VNODE_DOOMED) { 420 TMPFS_NODE_UNLOCK(node); 421 error = ENOENT; 422 goto out; 423 } 424 425 /* 426 * Interlock against other calls to tmpfs_alloc_vp() trying to 427 * allocate and assign a vp to node. 428 */ 429 if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { 430 node->tn_vpstate |= TMPFS_VNODE_WANT; 431 error = tsleep(&node->tn_vpstate, PINTERLOCKED | PCATCH, 432 "tmpfs_alloc_vp", 0); 433 TMPFS_NODE_UNLOCK(node); 434 if (error) 435 return error; 436 goto loop; 437 } 438 node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; 439 TMPFS_NODE_UNLOCK(node); 440 441 /* 442 * Allocate a new vnode (may block). The ALLOCATING flag should 443 * prevent a race against someone else assigning node->tn_vnode. 444 */ 445 error = getnewvnode(VT_TMPFS, mp, &vp, VLKTIMEOUT, LK_CANRECURSE); 446 if (error != 0) 447 goto unlock; 448 449 KKASSERT(node->tn_vnode == NULL); 450 KKASSERT(vp != NULL); 451 vp->v_data = node; 452 vp->v_type = node->tn_type; 453 454 /* Type-specific initialization. */ 455 switch (node->tn_type) { 456 case VBLK: 457 /* FALLTHROUGH */ 458 case VCHR: 459 /* FALLTHROUGH */ 460 case VSOCK: 461 break; 462 case VREG: 463 vinitvmio(vp, node->tn_size, BMASK, -1); 464 break; 465 case VLNK: 466 break; 467 case VFIFO: 468 vp->v_ops = &mp->mnt_vn_fifo_ops; 469 break; 470 case VDIR: 471 break; 472 473 default: 474 panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); 475 } 476 477 insmntque(vp, mp); 478 479 unlock: 480 TMPFS_NODE_LOCK(node); 481 482 KKASSERT(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); 483 node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; 484 node->tn_vnode = vp; 485 486 if (node->tn_vpstate & TMPFS_VNODE_WANT) { 487 node->tn_vpstate &= ~TMPFS_VNODE_WANT; 488 TMPFS_NODE_UNLOCK(node); 489 wakeup(&node->tn_vpstate); 490 } else { 491 TMPFS_NODE_UNLOCK(node); 492 } 493 494 out: 495 *vpp = vp; 496 497 KKASSERT(IFF(error == 0, *vpp != NULL && vn_islocked(*vpp))); 498 #ifdef INVARIANTS 499 TMPFS_NODE_LOCK(node); 500 KKASSERT(*vpp == node->tn_vnode); 501 TMPFS_NODE_UNLOCK(node); 502 #endif 503 504 return error; 505 } 506 507 /* --------------------------------------------------------------------- */ 508 509 /* 510 * Destroys the association between the vnode vp and the node it 511 * references. 512 */ 513 void 514 tmpfs_free_vp(struct vnode *vp) 515 { 516 struct tmpfs_node *node; 517 518 node = VP_TO_TMPFS_NODE(vp); 519 520 TMPFS_NODE_LOCK(node); 521 KKASSERT(lockcount(TMPFS_NODE_MTX(node)) > 0); 522 node->tn_vnode = NULL; 523 TMPFS_NODE_UNLOCK(node); 524 vp->v_data = NULL; 525 } 526 527 /* --------------------------------------------------------------------- */ 528 529 /* 530 * Allocates a new file of type 'type' and adds it to the parent directory 531 * 'dvp'; this addition is done using the component name given in 'cnp'. 532 * The ownership of the new file is automatically assigned based on the 533 * credentials of the caller (through 'cnp'), the group is set based on 534 * the parent directory and the mode is determined from the 'vap' argument. 535 * If successful, *vpp holds a vnode to the newly created file and zero 536 * is returned. Otherwise *vpp is NULL and the function returns an 537 * appropriate error code. 538 */ 539 int 540 tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, 541 struct namecache *ncp, struct ucred *cred, char *target) 542 { 543 int error; 544 struct tmpfs_dirent *de; 545 struct tmpfs_mount *tmp; 546 struct tmpfs_node *dnode; 547 struct tmpfs_node *node; 548 struct tmpfs_node *parent; 549 550 tmp = VFS_TO_TMPFS(dvp->v_mount); 551 dnode = VP_TO_TMPFS_DIR(dvp); 552 *vpp = NULL; 553 554 /* If the entry we are creating is a directory, we cannot overflow 555 * the number of links of its parent, because it will get a new 556 * link. */ 557 if (vap->va_type == VDIR) { 558 /* Ensure that we do not overflow the maximum number of links 559 * imposed by the system. */ 560 KKASSERT(dnode->tn_links <= LINK_MAX); 561 if (dnode->tn_links == LINK_MAX) { 562 return EMLINK; 563 } 564 565 parent = dnode; 566 KKASSERT(parent != NULL); 567 } else 568 parent = NULL; 569 570 /* Allocate a node that represents the new file. */ 571 error = tmpfs_alloc_node(tmp, vap->va_type, cred->cr_uid, 572 dnode->tn_gid, vap->va_mode, parent, target, vap->va_rmajor, vap->va_rminor, &node); 573 if (error != 0) 574 return error; 575 TMPFS_NODE_LOCK(node); 576 577 /* Allocate a directory entry that points to the new file. */ 578 error = tmpfs_alloc_dirent(tmp, node, ncp->nc_name, ncp->nc_nlen, &de); 579 if (error != 0) { 580 tmpfs_free_node(tmp, node); 581 /* eats node lock */ 582 return error; 583 } 584 585 /* Allocate a vnode for the new file. */ 586 error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); 587 if (error != 0) { 588 tmpfs_free_dirent(tmp, de); 589 tmpfs_free_node(tmp, node); 590 /* eats node lock */ 591 return error; 592 } 593 594 /* Now that all required items are allocated, we can proceed to 595 * insert the new node into the directory, an operation that 596 * cannot fail. */ 597 tmpfs_dir_attach(dnode, de); 598 TMPFS_NODE_UNLOCK(node); 599 600 return error; 601 } 602 603 /* --------------------------------------------------------------------- */ 604 605 /* 606 * Attaches the directory entry de to the directory represented by vp. 607 * Note that this does not change the link count of the node pointed by 608 * the directory entry, as this is done by tmpfs_alloc_dirent. 609 */ 610 void 611 tmpfs_dir_attach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 612 { 613 TMPFS_NODE_LOCK(dnode); 614 TAILQ_INSERT_TAIL(&dnode->tn_dir.tn_dirhead, de, td_entries); 615 616 TMPFS_ASSERT_ELOCKED(dnode); 617 dnode->tn_size += sizeof(struct tmpfs_dirent); 618 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 619 TMPFS_NODE_MODIFIED; 620 TMPFS_NODE_UNLOCK(dnode); 621 } 622 623 /* --------------------------------------------------------------------- */ 624 625 /* 626 * Detaches the directory entry de from the directory represented by vp. 627 * Note that this does not change the link count of the node pointed by 628 * the directory entry, as this is done by tmpfs_free_dirent. 629 */ 630 void 631 tmpfs_dir_detach(struct tmpfs_node *dnode, struct tmpfs_dirent *de) 632 { 633 TMPFS_NODE_LOCK(dnode); 634 if (dnode->tn_dir.tn_readdir_lastp == de) { 635 dnode->tn_dir.tn_readdir_lastn = 0; 636 dnode->tn_dir.tn_readdir_lastp = NULL; 637 } 638 TAILQ_REMOVE(&dnode->tn_dir.tn_dirhead, de, td_entries); 639 640 TMPFS_ASSERT_ELOCKED(dnode); 641 dnode->tn_size -= sizeof(struct tmpfs_dirent); 642 dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | 643 TMPFS_NODE_MODIFIED; 644 TMPFS_NODE_UNLOCK(dnode); 645 } 646 647 /* --------------------------------------------------------------------- */ 648 649 /* 650 * Looks for a directory entry in the directory represented by node. 651 * 'ncp' describes the name of the entry to look for. Note that the . 652 * and .. components are not allowed as they do not physically exist 653 * within directories. 654 * 655 * Returns a pointer to the entry when found, otherwise NULL. 656 */ 657 struct tmpfs_dirent * 658 tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, 659 struct namecache *ncp) 660 { 661 struct tmpfs_dirent *de; 662 int len = ncp->nc_nlen; 663 664 TMPFS_VALIDATE_DIR(node); 665 666 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 667 if (f != NULL && de->td_node != f) 668 continue; 669 if (len == de->td_namelen) { 670 if (!memcmp(ncp->nc_name, de->td_name, len)) 671 break; 672 } 673 } 674 675 TMPFS_NODE_LOCK(node); 676 node->tn_status |= TMPFS_NODE_ACCESSED; 677 TMPFS_NODE_UNLOCK(node); 678 679 return de; 680 } 681 682 /* --------------------------------------------------------------------- */ 683 684 /* 685 * Helper function for tmpfs_readdir. Creates a '.' entry for the given 686 * directory and returns it in the uio space. The function returns 0 687 * on success, -1 if there was not enough space in the uio structure to 688 * hold the directory entry or an appropriate error code if another 689 * error happens. 690 */ 691 int 692 tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) 693 { 694 int error; 695 struct dirent dent; 696 int dirsize; 697 698 TMPFS_VALIDATE_DIR(node); 699 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); 700 701 dent.d_ino = node->tn_id; 702 dent.d_type = DT_DIR; 703 dent.d_namlen = 1; 704 dent.d_name[0] = '.'; 705 dent.d_name[1] = '\0'; 706 dirsize = _DIRENT_DIRSIZ(&dent); 707 708 if (dirsize > uio->uio_resid) 709 error = -1; 710 else { 711 error = uiomove((caddr_t)&dent, dirsize, uio); 712 if (error == 0) 713 uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; 714 } 715 716 TMPFS_NODE_LOCK(node); 717 node->tn_status |= TMPFS_NODE_ACCESSED; 718 TMPFS_NODE_UNLOCK(node); 719 720 return error; 721 } 722 723 /* --------------------------------------------------------------------- */ 724 725 /* 726 * Helper function for tmpfs_readdir. Creates a '..' entry for the given 727 * directory and returns it in the uio space. The function returns 0 728 * on success, -1 if there was not enough space in the uio structure to 729 * hold the directory entry or an appropriate error code if another 730 * error happens. 731 */ 732 int 733 tmpfs_dir_getdotdotdent(struct tmpfs_mount *tmp, struct tmpfs_node *node, 734 struct uio *uio) 735 { 736 int error; 737 struct dirent dent; 738 int dirsize; 739 740 TMPFS_VALIDATE_DIR(node); 741 KKASSERT(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); 742 743 if (node->tn_dir.tn_parent) { 744 TMPFS_NODE_LOCK(node->tn_dir.tn_parent); 745 dent.d_ino = node->tn_dir.tn_parent->tn_id; 746 TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent); 747 } else { 748 dent.d_ino = tmp->tm_root->tn_id; 749 } 750 751 dent.d_type = DT_DIR; 752 dent.d_namlen = 2; 753 dent.d_name[0] = '.'; 754 dent.d_name[1] = '.'; 755 dent.d_name[2] = '\0'; 756 dirsize = _DIRENT_DIRSIZ(&dent); 757 758 if (dirsize > uio->uio_resid) 759 error = -1; 760 else { 761 error = uiomove((caddr_t)&dent, dirsize, uio); 762 if (error == 0) { 763 struct tmpfs_dirent *de; 764 765 de = TAILQ_FIRST(&node->tn_dir.tn_dirhead); 766 if (de == NULL) 767 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 768 else 769 uio->uio_offset = tmpfs_dircookie(de); 770 } 771 } 772 773 TMPFS_NODE_LOCK(node); 774 node->tn_status |= TMPFS_NODE_ACCESSED; 775 TMPFS_NODE_UNLOCK(node); 776 777 return error; 778 } 779 780 /* --------------------------------------------------------------------- */ 781 782 /* 783 * Lookup a directory entry by its associated cookie. 784 */ 785 struct tmpfs_dirent * 786 tmpfs_dir_lookupbycookie(struct tmpfs_node *node, off_t cookie) 787 { 788 struct tmpfs_dirent *de; 789 790 if (cookie == node->tn_dir.tn_readdir_lastn && 791 node->tn_dir.tn_readdir_lastp != NULL) { 792 return node->tn_dir.tn_readdir_lastp; 793 } 794 795 TAILQ_FOREACH(de, &node->tn_dir.tn_dirhead, td_entries) { 796 if (tmpfs_dircookie(de) == cookie) { 797 break; 798 } 799 } 800 801 return de; 802 } 803 804 /* --------------------------------------------------------------------- */ 805 806 /* 807 * Helper function for tmpfs_readdir. Returns as much directory entries 808 * as can fit in the uio space. The read starts at uio->uio_offset. 809 * The function returns 0 on success, -1 if there was not enough space 810 * in the uio structure to hold the directory entry or an appropriate 811 * error code if another error happens. 812 */ 813 int 814 tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, off_t *cntp) 815 { 816 int error; 817 off_t startcookie; 818 struct tmpfs_dirent *de; 819 820 TMPFS_VALIDATE_DIR(node); 821 822 /* Locate the first directory entry we have to return. We have cached 823 * the last readdir in the node, so use those values if appropriate. 824 * Otherwise do a linear scan to find the requested entry. */ 825 startcookie = uio->uio_offset; 826 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOT); 827 KKASSERT(startcookie != TMPFS_DIRCOOKIE_DOTDOT); 828 if (startcookie == TMPFS_DIRCOOKIE_EOF) { 829 return 0; 830 } else { 831 de = tmpfs_dir_lookupbycookie(node, startcookie); 832 } 833 if (de == NULL) { 834 return EINVAL; 835 } 836 837 /* Read as much entries as possible; i.e., until we reach the end of 838 * the directory or we exhaust uio space. */ 839 do { 840 struct dirent d; 841 int reclen; 842 843 /* Create a dirent structure representing the current 844 * tmpfs_node and fill it. */ 845 d.d_ino = de->td_node->tn_id; 846 switch (de->td_node->tn_type) { 847 case VBLK: 848 d.d_type = DT_BLK; 849 break; 850 851 case VCHR: 852 d.d_type = DT_CHR; 853 break; 854 855 case VDIR: 856 d.d_type = DT_DIR; 857 break; 858 859 case VFIFO: 860 d.d_type = DT_FIFO; 861 break; 862 863 case VLNK: 864 d.d_type = DT_LNK; 865 break; 866 867 case VREG: 868 d.d_type = DT_REG; 869 break; 870 871 case VSOCK: 872 d.d_type = DT_SOCK; 873 break; 874 875 default: 876 panic("tmpfs_dir_getdents: type %p %d", 877 de->td_node, (int)de->td_node->tn_type); 878 } 879 d.d_namlen = de->td_namelen; 880 KKASSERT(de->td_namelen < sizeof(d.d_name)); 881 bcopy(de->td_name, d.d_name, d.d_namlen); 882 d.d_name[d.d_namlen] = '\0'; 883 reclen = _DIRENT_RECLEN(d.d_namlen); 884 885 /* Stop reading if the directory entry we are treating is 886 * bigger than the amount of data that can be returned. */ 887 if (reclen > uio->uio_resid) { 888 error = -1; 889 break; 890 } 891 892 /* Copy the new dirent structure into the output buffer and 893 * advance pointers. */ 894 error = uiomove((caddr_t)&d, reclen, uio); 895 896 (*cntp)++; 897 de = TAILQ_NEXT(de, td_entries); 898 } while (error == 0 && uio->uio_resid > 0 && de != NULL); 899 900 /* Update the offset and cache. */ 901 if (de == NULL) { 902 uio->uio_offset = TMPFS_DIRCOOKIE_EOF; 903 node->tn_dir.tn_readdir_lastn = 0; 904 node->tn_dir.tn_readdir_lastp = NULL; 905 } else { 906 node->tn_dir.tn_readdir_lastn = uio->uio_offset = tmpfs_dircookie(de); 907 node->tn_dir.tn_readdir_lastp = de; 908 } 909 node->tn_status |= TMPFS_NODE_ACCESSED; 910 911 return error; 912 } 913 914 /* --------------------------------------------------------------------- */ 915 916 /* 917 * Resizes the aobj associated to the regular file pointed to by vp to 918 * the size newsize. 'vp' must point to a vnode that represents a regular 919 * file. 'newsize' must be positive. 920 * 921 * pass trivial as 1 when buf content will be overwritten, otherwise set 0 922 * to be zero filled. 923 * 924 * Returns zero on success or an appropriate error code on failure. 925 */ 926 int 927 tmpfs_reg_resize(struct vnode *vp, off_t newsize, int trivial) 928 { 929 int error; 930 vm_pindex_t newpages, oldpages; 931 struct tmpfs_mount *tmp; 932 struct tmpfs_node *node; 933 off_t oldsize; 934 935 #ifdef INVARIANTS 936 KKASSERT(vp->v_type == VREG); 937 KKASSERT(newsize >= 0); 938 #endif 939 940 node = VP_TO_TMPFS_NODE(vp); 941 tmp = VFS_TO_TMPFS(vp->v_mount); 942 943 /* Convert the old and new sizes to the number of pages needed to 944 * store them. It may happen that we do not need to do anything 945 * because the last allocated page can accommodate the change on 946 * its own. */ 947 oldsize = node->tn_size; 948 oldpages = round_page64(oldsize) / PAGE_SIZE; 949 KKASSERT(oldpages == node->tn_reg.tn_aobj_pages); 950 newpages = round_page64(newsize) / PAGE_SIZE; 951 952 if (newpages > oldpages && 953 tmp->tm_pages_used + newpages - oldpages > tmp->tm_pages_max) { 954 error = ENOSPC; 955 goto out; 956 } 957 958 TMPFS_LOCK(tmp); 959 tmp->tm_pages_used += (newpages - oldpages); 960 TMPFS_UNLOCK(tmp); 961 962 TMPFS_NODE_LOCK(node); 963 node->tn_reg.tn_aobj_pages = newpages; 964 node->tn_size = newsize; 965 TMPFS_NODE_UNLOCK(node); 966 967 /* 968 * When adjusting the vnode filesize and its VM object we must 969 * also adjust our backing VM object (aobj). The blocksize 970 * used must match the block sized we use for the buffer cache. 971 * 972 * The backing VM object contains no VM pages, only swap 973 * assignments. 974 */ 975 if (newsize < oldsize) { 976 vm_pindex_t osize; 977 vm_pindex_t nsize; 978 vm_object_t aobj; 979 980 error = nvtruncbuf(vp, newsize, BSIZE, -1); 981 aobj = node->tn_reg.tn_aobj; 982 if (aobj) { 983 osize = aobj->size; 984 nsize = vp->v_object->size; 985 if (nsize < osize) { 986 aobj->size = osize; 987 swap_pager_freespace(aobj, nsize, 988 osize - nsize); 989 } 990 } 991 } else { 992 vm_object_t aobj; 993 994 error = nvextendbuf(vp, oldsize, newsize, BSIZE, BSIZE, 995 -1, -1, trivial); 996 aobj = node->tn_reg.tn_aobj; 997 if (aobj) 998 aobj->size = vp->v_object->size; 999 } 1000 1001 out: 1002 return error; 1003 } 1004 1005 /* --------------------------------------------------------------------- */ 1006 1007 /* 1008 * Change flags of the given vnode. 1009 * Caller should execute tmpfs_update on vp after a successful execution. 1010 * The vnode must be locked on entry and remain locked on exit. 1011 */ 1012 int 1013 tmpfs_chflags(struct vnode *vp, int vaflags, struct ucred *cred) 1014 { 1015 int error; 1016 struct tmpfs_node *node; 1017 int flags; 1018 1019 KKASSERT(vn_islocked(vp)); 1020 1021 node = VP_TO_TMPFS_NODE(vp); 1022 flags = node->tn_flags; 1023 1024 /* Disallow this operation if the file system is mounted read-only. */ 1025 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1026 return EROFS; 1027 error = vop_helper_setattr_flags(&flags, vaflags, node->tn_uid, cred); 1028 1029 /* 1030 * Unprivileged processes are not permitted to unset system 1031 * flags, or modify flags if any system flags are set. 1032 * 1033 * Silently enforce SF_NOCACHE on the root tmpfs vnode so 1034 * tmpfs data is not double-cached by swapcache. 1035 */ 1036 if (error == 0) { 1037 TMPFS_NODE_LOCK(node); 1038 if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { 1039 if (vp->v_flag & VROOT) 1040 flags |= SF_NOCACHE; 1041 node->tn_flags = flags; 1042 } else { 1043 if (node->tn_flags & (SF_NOUNLINK | SF_IMMUTABLE | 1044 SF_APPEND) || 1045 (flags & UF_SETTABLE) != flags) { 1046 error = EPERM; 1047 } else { 1048 node->tn_flags &= SF_SETTABLE; 1049 node->tn_flags |= (flags & UF_SETTABLE); 1050 } 1051 } 1052 node->tn_status |= TMPFS_NODE_CHANGED; 1053 TMPFS_NODE_UNLOCK(node); 1054 } 1055 1056 KKASSERT(vn_islocked(vp)); 1057 1058 return error; 1059 } 1060 1061 /* --------------------------------------------------------------------- */ 1062 1063 /* 1064 * Change access mode on the given vnode. 1065 * Caller should execute tmpfs_update on vp after a successful execution. 1066 * The vnode must be locked on entry and remain locked on exit. 1067 */ 1068 int 1069 tmpfs_chmod(struct vnode *vp, mode_t vamode, struct ucred *cred) 1070 { 1071 struct tmpfs_node *node; 1072 mode_t cur_mode; 1073 int error; 1074 1075 KKASSERT(vn_islocked(vp)); 1076 1077 node = VP_TO_TMPFS_NODE(vp); 1078 1079 /* Disallow this operation if the file system is mounted read-only. */ 1080 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1081 return EROFS; 1082 1083 /* Immutable or append-only files cannot be modified, either. */ 1084 if (node->tn_flags & (IMMUTABLE | APPEND)) 1085 return EPERM; 1086 1087 cur_mode = node->tn_mode; 1088 error = vop_helper_chmod(vp, vamode, cred, node->tn_uid, node->tn_gid, 1089 &cur_mode); 1090 1091 if (error == 0 && 1092 (node->tn_mode & ALLPERMS) != (cur_mode & ALLPERMS)) { 1093 TMPFS_NODE_LOCK(node); 1094 node->tn_mode &= ~ALLPERMS; 1095 node->tn_mode |= cur_mode & ALLPERMS; 1096 1097 node->tn_status |= TMPFS_NODE_CHANGED; 1098 TMPFS_NODE_UNLOCK(node); 1099 } 1100 1101 KKASSERT(vn_islocked(vp)); 1102 1103 return 0; 1104 } 1105 1106 /* --------------------------------------------------------------------- */ 1107 1108 /* 1109 * Change ownership of the given vnode. At least one of uid or gid must 1110 * be different than VNOVAL. If one is set to that value, the attribute 1111 * is unchanged. 1112 * Caller should execute tmpfs_update on vp after a successful execution. 1113 * The vnode must be locked on entry and remain locked on exit. 1114 */ 1115 int 1116 tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred) 1117 { 1118 mode_t cur_mode; 1119 uid_t cur_uid; 1120 gid_t cur_gid; 1121 struct tmpfs_node *node; 1122 int error; 1123 1124 KKASSERT(vn_islocked(vp)); 1125 node = VP_TO_TMPFS_NODE(vp); 1126 1127 /* Disallow this operation if the file system is mounted read-only. */ 1128 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1129 return EROFS; 1130 1131 /* Immutable or append-only files cannot be modified, either. */ 1132 if (node->tn_flags & (IMMUTABLE | APPEND)) 1133 return EPERM; 1134 1135 cur_uid = node->tn_uid; 1136 cur_gid = node->tn_gid; 1137 cur_mode = node->tn_mode; 1138 error = vop_helper_chown(vp, uid, gid, cred, 1139 &cur_uid, &cur_gid, &cur_mode); 1140 1141 if (error == 0) { 1142 TMPFS_NODE_LOCK(node); 1143 if (cur_uid != node->tn_uid || 1144 cur_gid != node->tn_gid || 1145 cur_mode != node->tn_mode) { 1146 node->tn_uid = cur_uid; 1147 node->tn_gid = cur_gid; 1148 node->tn_mode = cur_mode; 1149 node->tn_status |= TMPFS_NODE_CHANGED; 1150 } 1151 TMPFS_NODE_UNLOCK(node); 1152 } 1153 1154 return error; 1155 } 1156 1157 /* --------------------------------------------------------------------- */ 1158 1159 /* 1160 * Change size of the given vnode. 1161 * Caller should execute tmpfs_update on vp after a successful execution. 1162 * The vnode must be locked on entry and remain locked on exit. 1163 */ 1164 int 1165 tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred) 1166 { 1167 int error; 1168 struct tmpfs_node *node; 1169 1170 KKASSERT(vn_islocked(vp)); 1171 1172 node = VP_TO_TMPFS_NODE(vp); 1173 1174 /* Decide whether this is a valid operation based on the file type. */ 1175 error = 0; 1176 switch (vp->v_type) { 1177 case VDIR: 1178 return EISDIR; 1179 1180 case VREG: 1181 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1182 return EROFS; 1183 break; 1184 1185 case VBLK: 1186 /* FALLTHROUGH */ 1187 case VCHR: 1188 /* FALLTHROUGH */ 1189 case VFIFO: 1190 /* Allow modifications of special files even if in the file 1191 * system is mounted read-only (we are not modifying the 1192 * files themselves, but the objects they represent). */ 1193 return 0; 1194 1195 default: 1196 /* Anything else is unsupported. */ 1197 return EOPNOTSUPP; 1198 } 1199 1200 /* Immutable or append-only files cannot be modified, either. */ 1201 if (node->tn_flags & (IMMUTABLE | APPEND)) 1202 return EPERM; 1203 1204 error = tmpfs_truncate(vp, size); 1205 /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents 1206 * for us, as will update tn_status; no need to do that here. */ 1207 1208 KKASSERT(vn_islocked(vp)); 1209 1210 return error; 1211 } 1212 1213 /* --------------------------------------------------------------------- */ 1214 1215 /* 1216 * Change access and modification times of the given vnode. 1217 * Caller should execute tmpfs_update on vp after a successful execution. 1218 * The vnode must be locked on entry and remain locked on exit. 1219 */ 1220 int 1221 tmpfs_chtimes(struct vnode *vp, struct timespec *atime, struct timespec *mtime, 1222 int vaflags, struct ucred *cred) 1223 { 1224 struct tmpfs_node *node; 1225 1226 KKASSERT(vn_islocked(vp)); 1227 1228 node = VP_TO_TMPFS_NODE(vp); 1229 1230 /* Disallow this operation if the file system is mounted read-only. */ 1231 if (vp->v_mount->mnt_flag & MNT_RDONLY) 1232 return EROFS; 1233 1234 /* Immutable or append-only files cannot be modified, either. */ 1235 if (node->tn_flags & (IMMUTABLE | APPEND)) 1236 return EPERM; 1237 1238 TMPFS_NODE_LOCK(node); 1239 if (atime->tv_sec != VNOVAL && atime->tv_nsec != VNOVAL) 1240 node->tn_status |= TMPFS_NODE_ACCESSED; 1241 1242 if (mtime->tv_sec != VNOVAL && mtime->tv_nsec != VNOVAL) 1243 node->tn_status |= TMPFS_NODE_MODIFIED; 1244 1245 TMPFS_NODE_UNLOCK(node); 1246 1247 tmpfs_itimes(vp, atime, mtime); 1248 1249 KKASSERT(vn_islocked(vp)); 1250 1251 return 0; 1252 } 1253 1254 /* --------------------------------------------------------------------- */ 1255 /* Sync timestamps */ 1256 void 1257 tmpfs_itimes(struct vnode *vp, const struct timespec *acc, 1258 const struct timespec *mod) 1259 { 1260 struct tmpfs_node *node; 1261 struct timespec now; 1262 1263 node = VP_TO_TMPFS_NODE(vp); 1264 1265 if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | 1266 TMPFS_NODE_CHANGED)) == 0) 1267 return; 1268 1269 vfs_timestamp(&now); 1270 1271 TMPFS_NODE_LOCK(node); 1272 if (node->tn_status & TMPFS_NODE_ACCESSED) { 1273 if (acc == NULL) 1274 acc = &now; 1275 node->tn_atime = acc->tv_sec; 1276 node->tn_atimensec = acc->tv_nsec; 1277 } 1278 if (node->tn_status & TMPFS_NODE_MODIFIED) { 1279 if (mod == NULL) 1280 mod = &now; 1281 node->tn_mtime = mod->tv_sec; 1282 node->tn_mtimensec = mod->tv_nsec; 1283 } 1284 if (node->tn_status & TMPFS_NODE_CHANGED) { 1285 node->tn_ctime = now.tv_sec; 1286 node->tn_ctimensec = now.tv_nsec; 1287 } 1288 node->tn_status &= 1289 ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); 1290 TMPFS_NODE_UNLOCK(node); 1291 } 1292 1293 /* --------------------------------------------------------------------- */ 1294 1295 void 1296 tmpfs_update(struct vnode *vp) 1297 { 1298 1299 tmpfs_itimes(vp, NULL, NULL); 1300 } 1301 1302 /* --------------------------------------------------------------------- */ 1303 1304 int 1305 tmpfs_truncate(struct vnode *vp, off_t length) 1306 { 1307 int error; 1308 struct tmpfs_node *node; 1309 1310 node = VP_TO_TMPFS_NODE(vp); 1311 1312 if (length < 0) { 1313 error = EINVAL; 1314 goto out; 1315 } 1316 1317 if (node->tn_size == length) { 1318 error = 0; 1319 goto out; 1320 } 1321 1322 if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) 1323 return (EFBIG); 1324 1325 1326 error = tmpfs_reg_resize(vp, length, 1); 1327 1328 if (error == 0) { 1329 TMPFS_NODE_LOCK(node); 1330 node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; 1331 TMPFS_NODE_UNLOCK(node); 1332 } 1333 1334 out: 1335 tmpfs_update(vp); 1336 1337 return error; 1338 } 1339 1340 /* --------------------------------------------------------------------- */ 1341 1342 static ino_t 1343 tmpfs_fetch_ino(void) 1344 { 1345 ino_t ret; 1346 1347 spin_lock(&ino_lock); 1348 ret = t_ino++; 1349 spin_unlock(&ino_lock); 1350 1351 return ret; 1352 } 1353