1 /* 2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/cdefs.h> 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/types.h> 39 #include <sys/lock.h> 40 #include <sys/uuid.h> 41 42 #include "hammer2.h" 43 44 #define INODE_DEBUG 0 45 46 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp, 47 hammer2_tid_t, meta.inum); 48 49 int 50 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2) 51 { 52 if (ip1->meta.inum < ip2->meta.inum) 53 return(-1); 54 if (ip1->meta.inum > ip2->meta.inum) 55 return(1); 56 return(0); 57 } 58 59 static 60 void 61 hammer2_inode_delayed_sideq(hammer2_inode_t *ip) 62 { 63 hammer2_inode_sideq_t *ipul; 64 hammer2_pfs_t *pmp = ip->pmp; 65 66 if ((ip->flags & HAMMER2_INODE_ONSIDEQ) == 0) { 67 ipul = kmalloc(sizeof(*ipul), pmp->minode, 68 M_WAITOK | M_ZERO); 69 ipul->ip = ip; 70 hammer2_spin_ex(&pmp->list_spin); 71 if ((ip->flags & HAMMER2_INODE_ONSIDEQ) == 0) { 72 hammer2_inode_ref(ip); 73 atomic_set_int(&ip->flags, 74 HAMMER2_INODE_ONSIDEQ); 75 TAILQ_INSERT_TAIL(&pmp->sideq, ipul, entry); 76 hammer2_spin_unex(&pmp->list_spin); 77 } else { 78 hammer2_spin_unex(&pmp->list_spin); 79 kfree(ipul, pmp->minode); 80 } 81 } 82 } 83 84 /* 85 * HAMMER2 inode locks 86 * 87 * HAMMER2 offers shared and exclusive locks on inodes. Pass a mask of 88 * flags for options: 89 * 90 * - pass HAMMER2_RESOLVE_SHARED if a shared lock is desired. The 91 * inode locking function will automatically set the RDONLY flag. 92 * 93 * - pass HAMMER2_RESOLVE_ALWAYS if you need the inode's meta-data. 94 * Most front-end inode locks do. 95 * 96 * - pass HAMMER2_RESOLVE_NEVER if you do not want to require that 97 * the inode data be resolved. This is used by the syncthr because 98 * it can run on an unresolved/out-of-sync cluster, and also by the 99 * vnode reclamation code to avoid unnecessary I/O (particularly when 100 * disposing of hundreds of thousands of cached vnodes). 101 * 102 * The inode locking function locks the inode itself, resolves any stale 103 * chains in the inode's cluster, and allocates a fresh copy of the 104 * cluster with 1 ref and all the underlying chains locked. 105 * 106 * ip->cluster will be stable while the inode is locked. 107 * 108 * NOTE: We don't combine the inode/chain lock because putting away an 109 * inode would otherwise confuse multiple lock holders of the inode. 110 * 111 * NOTE: In-memory inodes always point to hardlink targets (the actual file), 112 * and never point to a hardlink pointer. 113 * 114 * NOTE: If caller passes HAMMER2_RESOLVE_RDONLY the exclusive locking code 115 * will feel free to reduce the chain set in the cluster as an 116 * optimization. It will still be validated against the quorum if 117 * appropriate, but the optimization might be able to reduce data 118 * accesses to one node. This flag is automatically set if the inode 119 * is locked with HAMMER2_RESOLVE_SHARED. 120 */ 121 void 122 hammer2_inode_lock(hammer2_inode_t *ip, int how) 123 { 124 hammer2_inode_ref(ip); 125 126 /* 127 * Inode structure mutex 128 */ 129 if (how & HAMMER2_RESOLVE_SHARED) { 130 /*how |= HAMMER2_RESOLVE_RDONLY; not used */ 131 hammer2_mtx_sh(&ip->lock); 132 } else { 133 hammer2_mtx_ex(&ip->lock); 134 } 135 } 136 137 /* 138 * Select a chain out of an inode's cluster and lock it. 139 * 140 * The inode does not have to be locked. 141 */ 142 hammer2_chain_t * 143 hammer2_inode_chain(hammer2_inode_t *ip, int clindex, int how) 144 { 145 hammer2_chain_t *chain; 146 147 hammer2_spin_sh(&ip->cluster_spin); 148 if (clindex >= ip->cluster.nchains) 149 chain = NULL; 150 else 151 chain = ip->cluster.array[clindex].chain; 152 if (chain) { 153 hammer2_chain_ref(chain); 154 hammer2_spin_unsh(&ip->cluster_spin); 155 hammer2_chain_lock(chain, how); 156 } else { 157 hammer2_spin_unsh(&ip->cluster_spin); 158 } 159 return chain; 160 } 161 162 hammer2_chain_t * 163 hammer2_inode_chain_and_parent(hammer2_inode_t *ip, int clindex, 164 hammer2_chain_t **parentp, int how) 165 { 166 hammer2_chain_t *chain; 167 hammer2_chain_t *parent; 168 169 for (;;) { 170 hammer2_spin_sh(&ip->cluster_spin); 171 if (clindex >= ip->cluster.nchains) 172 chain = NULL; 173 else 174 chain = ip->cluster.array[clindex].chain; 175 if (chain) { 176 hammer2_chain_ref(chain); 177 hammer2_spin_unsh(&ip->cluster_spin); 178 hammer2_chain_lock(chain, how); 179 } else { 180 hammer2_spin_unsh(&ip->cluster_spin); 181 } 182 183 /* 184 * Get parent, lock order must be (parent, chain). 185 */ 186 parent = chain->parent; 187 hammer2_chain_ref(parent); 188 hammer2_chain_unlock(chain); 189 hammer2_chain_lock(parent, how); 190 hammer2_chain_lock(chain, how); 191 if (ip->cluster.array[clindex].chain == chain && 192 chain->parent == parent) { 193 break; 194 } 195 196 /* 197 * Retry 198 */ 199 hammer2_chain_unlock(chain); 200 hammer2_chain_drop(chain); 201 hammer2_chain_unlock(parent); 202 hammer2_chain_drop(parent); 203 } 204 *parentp = parent; 205 206 return chain; 207 } 208 209 void 210 hammer2_inode_unlock(hammer2_inode_t *ip) 211 { 212 hammer2_mtx_unlock(&ip->lock); 213 hammer2_inode_drop(ip); 214 } 215 216 /* 217 * Temporarily release a lock held shared or exclusive. Caller must 218 * hold the lock shared or exclusive on call and lock will be released 219 * on return. 220 * 221 * Restore a lock that was temporarily released. 222 */ 223 hammer2_mtx_state_t 224 hammer2_inode_lock_temp_release(hammer2_inode_t *ip) 225 { 226 return hammer2_mtx_temp_release(&ip->lock); 227 } 228 229 void 230 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, hammer2_mtx_state_t ostate) 231 { 232 hammer2_mtx_temp_restore(&ip->lock, ostate); 233 } 234 235 /* 236 * Upgrade a shared inode lock to exclusive and return. If the inode lock 237 * is already held exclusively this is a NOP. 238 * 239 * The caller MUST hold the inode lock either shared or exclusive on call 240 * and will own the lock exclusively on return. 241 * 242 * Returns non-zero if the lock was already exclusive prior to the upgrade. 243 */ 244 int 245 hammer2_inode_lock_upgrade(hammer2_inode_t *ip) 246 { 247 int wasexclusive; 248 249 if (mtx_islocked_ex(&ip->lock)) { 250 wasexclusive = 1; 251 } else { 252 hammer2_mtx_unlock(&ip->lock); 253 hammer2_mtx_ex(&ip->lock); 254 wasexclusive = 0; 255 } 256 return wasexclusive; 257 } 258 259 /* 260 * Downgrade an inode lock from exclusive to shared only if the inode 261 * lock was previously shared. If the inode lock was previously exclusive, 262 * this is a NOP. 263 */ 264 void 265 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, int wasexclusive) 266 { 267 if (wasexclusive == 0) 268 mtx_downgrade(&ip->lock); 269 } 270 271 /* 272 * Lookup an inode by inode number 273 */ 274 hammer2_inode_t * 275 hammer2_inode_lookup(hammer2_pfs_t *pmp, hammer2_tid_t inum) 276 { 277 hammer2_inode_t *ip; 278 279 KKASSERT(pmp); 280 if (pmp->spmp_hmp) { 281 ip = NULL; 282 } else { 283 hammer2_spin_ex(&pmp->inum_spin); 284 ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum); 285 if (ip) 286 hammer2_inode_ref(ip); 287 hammer2_spin_unex(&pmp->inum_spin); 288 } 289 return(ip); 290 } 291 292 /* 293 * Adding a ref to an inode is only legal if the inode already has at least 294 * one ref. 295 * 296 * (can be called with spinlock held) 297 */ 298 void 299 hammer2_inode_ref(hammer2_inode_t *ip) 300 { 301 atomic_add_int(&ip->refs, 1); 302 if (hammer2_debug & 0x80000) { 303 kprintf("INODE+1 %p (%d->%d)\n", ip, ip->refs - 1, ip->refs); 304 print_backtrace(8); 305 } 306 } 307 308 /* 309 * Drop an inode reference, freeing the inode when the last reference goes 310 * away. 311 */ 312 void 313 hammer2_inode_drop(hammer2_inode_t *ip) 314 { 315 hammer2_pfs_t *pmp; 316 hammer2_inode_t *pip; 317 u_int refs; 318 319 while (ip) { 320 if (hammer2_debug & 0x80000) { 321 kprintf("INODE-1 %p (%d->%d)\n", 322 ip, ip->refs, ip->refs - 1); 323 print_backtrace(8); 324 } 325 refs = ip->refs; 326 cpu_ccfence(); 327 if (refs == 1) { 328 /* 329 * Transition to zero, must interlock with 330 * the inode inumber lookup tree (if applicable). 331 * It should not be possible for anyone to race 332 * the transition to 0. 333 */ 334 pmp = ip->pmp; 335 KKASSERT(pmp); 336 hammer2_spin_ex(&pmp->inum_spin); 337 338 if (atomic_cmpset_int(&ip->refs, 1, 0)) { 339 KKASSERT(hammer2_mtx_refs(&ip->lock) == 0); 340 if (ip->flags & HAMMER2_INODE_ONRBTREE) { 341 atomic_clear_int(&ip->flags, 342 HAMMER2_INODE_ONRBTREE); 343 RB_REMOVE(hammer2_inode_tree, 344 &pmp->inum_tree, ip); 345 } 346 hammer2_spin_unex(&pmp->inum_spin); 347 348 pip = ip->pip; 349 ip->pip = NULL; 350 ip->pmp = NULL; 351 352 /* 353 * Cleaning out ip->cluster isn't entirely 354 * trivial. 355 */ 356 hammer2_inode_repoint(ip, NULL, NULL); 357 358 /* 359 * We have to drop pip (if non-NULL) to 360 * dispose of our implied reference from 361 * ip->pip. We can simply loop on it. 362 */ 363 kfree(ip, pmp->minode); 364 atomic_add_long(&pmp->inmem_inodes, -1); 365 ip = pip; 366 /* continue with pip (can be NULL) */ 367 } else { 368 hammer2_spin_unex(&ip->pmp->inum_spin); 369 } 370 } else { 371 /* 372 * Non zero transition 373 */ 374 if (atomic_cmpset_int(&ip->refs, refs, refs - 1)) 375 break; 376 } 377 } 378 } 379 380 /* 381 * Get the vnode associated with the given inode, allocating the vnode if 382 * necessary. The vnode will be returned exclusively locked. 383 * 384 * The caller must lock the inode (shared or exclusive). 385 * 386 * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim 387 * races. 388 */ 389 struct vnode * 390 hammer2_igetv(hammer2_inode_t *ip, int *errorp) 391 { 392 hammer2_pfs_t *pmp; 393 struct vnode *vp; 394 395 pmp = ip->pmp; 396 KKASSERT(pmp != NULL); 397 *errorp = 0; 398 399 for (;;) { 400 /* 401 * Attempt to reuse an existing vnode assignment. It is 402 * possible to race a reclaim so the vget() may fail. The 403 * inode must be unlocked during the vget() to avoid a 404 * deadlock against a reclaim. 405 */ 406 int wasexclusive; 407 408 vp = ip->vp; 409 if (vp) { 410 /* 411 * Inode must be unlocked during the vget() to avoid 412 * possible deadlocks, but leave the ip ref intact. 413 * 414 * vnode is held to prevent destruction during the 415 * vget(). The vget() can still fail if we lost 416 * a reclaim race on the vnode. 417 */ 418 hammer2_mtx_state_t ostate; 419 420 vhold(vp); 421 ostate = hammer2_inode_lock_temp_release(ip); 422 if (vget(vp, LK_EXCLUSIVE)) { 423 vdrop(vp); 424 hammer2_inode_lock_temp_restore(ip, ostate); 425 continue; 426 } 427 hammer2_inode_lock_temp_restore(ip, ostate); 428 vdrop(vp); 429 /* vp still locked and ref from vget */ 430 if (ip->vp != vp) { 431 kprintf("hammer2: igetv race %p/%p\n", 432 ip->vp, vp); 433 vput(vp); 434 continue; 435 } 436 *errorp = 0; 437 break; 438 } 439 440 /* 441 * No vnode exists, allocate a new vnode. Beware of 442 * allocation races. This function will return an 443 * exclusively locked and referenced vnode. 444 */ 445 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0); 446 if (*errorp) { 447 kprintf("hammer2: igetv getnewvnode failed %d\n", 448 *errorp); 449 vp = NULL; 450 break; 451 } 452 453 /* 454 * Lock the inode and check for an allocation race. 455 */ 456 wasexclusive = hammer2_inode_lock_upgrade(ip); 457 if (ip->vp != NULL) { 458 vp->v_type = VBAD; 459 vx_put(vp); 460 hammer2_inode_lock_downgrade(ip, wasexclusive); 461 continue; 462 } 463 464 switch (ip->meta.type) { 465 case HAMMER2_OBJTYPE_DIRECTORY: 466 vp->v_type = VDIR; 467 break; 468 case HAMMER2_OBJTYPE_REGFILE: 469 vp->v_type = VREG; 470 vinitvmio(vp, ip->meta.size, 471 HAMMER2_LBUFSIZE, 472 (int)ip->meta.size & HAMMER2_LBUFMASK); 473 break; 474 case HAMMER2_OBJTYPE_SOFTLINK: 475 /* 476 * XXX for now we are using the generic file_read 477 * and file_write code so we need a buffer cache 478 * association. 479 */ 480 vp->v_type = VLNK; 481 vinitvmio(vp, ip->meta.size, 482 HAMMER2_LBUFSIZE, 483 (int)ip->meta.size & HAMMER2_LBUFMASK); 484 break; 485 case HAMMER2_OBJTYPE_CDEV: 486 vp->v_type = VCHR; 487 /* fall through */ 488 case HAMMER2_OBJTYPE_BDEV: 489 vp->v_ops = &pmp->mp->mnt_vn_spec_ops; 490 if (ip->meta.type != HAMMER2_OBJTYPE_CDEV) 491 vp->v_type = VBLK; 492 addaliasu(vp, 493 ip->meta.rmajor, 494 ip->meta.rminor); 495 break; 496 case HAMMER2_OBJTYPE_FIFO: 497 vp->v_type = VFIFO; 498 vp->v_ops = &pmp->mp->mnt_vn_fifo_ops; 499 break; 500 case HAMMER2_OBJTYPE_SOCKET: 501 vp->v_type = VSOCK; 502 break; 503 default: 504 panic("hammer2: unhandled objtype %d", 505 ip->meta.type); 506 break; 507 } 508 509 if (ip == pmp->iroot) 510 vsetflags(vp, VROOT); 511 512 vp->v_data = ip; 513 ip->vp = vp; 514 hammer2_inode_ref(ip); /* vp association */ 515 hammer2_inode_lock_downgrade(ip, wasexclusive); 516 break; 517 } 518 519 /* 520 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0. 521 */ 522 if (hammer2_debug & 0x0002) { 523 kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n", 524 vp, vp->v_refcnt, vp->v_auxrefs); 525 } 526 return (vp); 527 } 528 529 /* 530 * Returns the inode associated with the passed-in cluster, creating the 531 * inode if necessary and synchronizing it to the passed-in cluster otherwise. 532 * When synchronizing, if idx >= 0, only cluster index (idx) is synchronized. 533 * Otherwise the whole cluster is synchronized. 534 * 535 * The passed-in cluster must be locked and will remain locked on return. 536 * The returned inode will be locked and the caller may dispose of both 537 * via hammer2_inode_unlock() + hammer2_inode_drop(). However, if the caller 538 * needs to resolve a hardlink it must ref/unlock/relock/drop the inode. 539 * 540 * The hammer2_inode structure regulates the interface between the high level 541 * kernel VNOPS API and the filesystem backend (the chains). 542 * 543 * On return the inode is locked with the supplied cluster. 544 */ 545 hammer2_inode_t * 546 hammer2_inode_get(hammer2_pfs_t *pmp, hammer2_inode_t *dip, 547 hammer2_cluster_t *cluster, int idx) 548 { 549 hammer2_inode_t *nip; 550 const hammer2_inode_data_t *iptmp; 551 const hammer2_inode_data_t *nipdata; 552 553 KKASSERT(cluster == NULL || 554 hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE); 555 KKASSERT(pmp); 556 557 /* 558 * Interlocked lookup/ref of the inode. This code is only needed 559 * when looking up inodes with nlinks != 0 (TODO: optimize out 560 * otherwise and test for duplicates). 561 * 562 * Cluster can be NULL during the initial pfs allocation. 563 */ 564 again: 565 while (cluster) { 566 iptmp = &hammer2_cluster_rdata(cluster)->ipdata; 567 nip = hammer2_inode_lookup(pmp, iptmp->meta.inum); 568 if (nip == NULL) 569 break; 570 571 hammer2_mtx_ex(&nip->lock); 572 573 /* 574 * Handle SMP race (not applicable to the super-root spmp 575 * which can't index inodes due to duplicative inode numbers). 576 */ 577 if (pmp->spmp_hmp == NULL && 578 (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) { 579 hammer2_mtx_unlock(&nip->lock); 580 hammer2_inode_drop(nip); 581 continue; 582 } 583 if (idx >= 0) 584 hammer2_inode_repoint_one(nip, cluster, idx); 585 else 586 hammer2_inode_repoint(nip, NULL, cluster); 587 588 return nip; 589 } 590 591 /* 592 * We couldn't find the inode number, create a new inode. 593 */ 594 nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO); 595 spin_init(&nip->cluster_spin, "h2clspin"); 596 atomic_add_long(&pmp->inmem_inodes, 1); 597 hammer2_pfs_memory_inc(pmp); 598 hammer2_pfs_memory_wakeup(pmp); 599 if (pmp->spmp_hmp) 600 nip->flags = HAMMER2_INODE_SROOT; 601 602 /* 603 * Initialize nip's cluster. A cluster is provided for normal 604 * inodes but typically not for the super-root or PFS inodes. 605 */ 606 nip->cluster.refs = 1; 607 nip->cluster.pmp = pmp; 608 nip->cluster.flags |= HAMMER2_CLUSTER_INODE; 609 if (cluster) { 610 nipdata = &hammer2_cluster_rdata(cluster)->ipdata; 611 nip->meta = nipdata->meta; 612 atomic_set_int(&nip->flags, HAMMER2_INODE_METAGOOD); 613 hammer2_inode_repoint(nip, NULL, cluster); 614 } else { 615 nip->meta.inum = 1; /* PFS inum is always 1 XXX */ 616 /* mtime will be updated when a cluster is available */ 617 atomic_set_int(&nip->flags, HAMMER2_INODE_METAGOOD);/*XXX*/ 618 } 619 620 nip->pip = dip; /* can be NULL */ 621 if (dip) 622 hammer2_inode_ref(dip); /* ref dip for nip->pip */ 623 624 nip->pmp = pmp; 625 626 /* 627 * ref and lock on nip gives it state compatible to after a 628 * hammer2_inode_lock() call. 629 */ 630 nip->refs = 1; 631 hammer2_mtx_init(&nip->lock, "h2inode"); 632 hammer2_mtx_ex(&nip->lock); 633 /* combination of thread lock and chain lock == inode lock */ 634 635 /* 636 * Attempt to add the inode. If it fails we raced another inode 637 * get. Undo all the work and try again. 638 */ 639 if (pmp->spmp_hmp == NULL) { 640 hammer2_spin_ex(&pmp->inum_spin); 641 if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) { 642 hammer2_spin_unex(&pmp->inum_spin); 643 hammer2_mtx_unlock(&nip->lock); 644 hammer2_inode_drop(nip); 645 goto again; 646 } 647 atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE); 648 hammer2_spin_unex(&pmp->inum_spin); 649 } 650 651 return (nip); 652 } 653 654 /* 655 * Create a new inode in the specified directory using the vattr to 656 * figure out the type of inode. 657 * 658 * If no error occurs the new inode with its cluster locked is returned in 659 * *nipp, otherwise an error is returned and *nipp is set to NULL. 660 * 661 * If vap and/or cred are NULL the related fields are not set and the 662 * inode type defaults to a directory. This is used when creating PFSs 663 * under the super-root, so the inode number is set to 1 in this case. 664 * 665 * dip is not locked on entry. 666 * 667 * NOTE: When used to create a snapshot, the inode is temporarily associated 668 * with the super-root spmp. XXX should pass new pmp for snapshot. 669 */ 670 hammer2_inode_t * 671 hammer2_inode_create(hammer2_inode_t *dip, 672 struct vattr *vap, struct ucred *cred, 673 const uint8_t *name, size_t name_len, hammer2_key_t lhc, 674 hammer2_key_t inum, uint8_t type, uint8_t target_type, 675 int flags, int *errorp) 676 { 677 hammer2_xop_create_t *xop; 678 hammer2_inode_t *nip; 679 int error; 680 uid_t xuid; 681 uuid_t dip_uid; 682 uuid_t dip_gid; 683 uint32_t dip_mode; 684 uint8_t dip_comp_algo; 685 uint8_t dip_check_algo; 686 687 if (name) 688 lhc = hammer2_dirhash(name, name_len); 689 *errorp = 0; 690 nip = NULL; 691 692 /* 693 * Locate the inode or indirect block to create the new 694 * entry in. At the same time check for key collisions 695 * and iterate until we don't get one. 696 * 697 * NOTE: hidden inodes do not have iterators. 698 * 699 * Lock the directory exclusively for now to guarantee that 700 * we can find an unused lhc for the name. Due to collisions, 701 * two different creates can end up with the same lhc so we 702 * cannot depend on the OS to prevent the collision. 703 */ 704 hammer2_inode_lock(dip, 0); 705 706 dip_uid = dip->meta.uid; 707 dip_gid = dip->meta.gid; 708 dip_mode = dip->meta.mode; 709 dip_comp_algo = dip->meta.comp_algo; 710 dip_check_algo = dip->meta.check_algo; 711 712 /* 713 * If name specified, locate an unused key in the collision space. 714 * Otherwise use the passed-in lhc directly. 715 */ 716 if (name) { 717 hammer2_xop_scanlhc_t *sxop; 718 hammer2_key_t lhcbase; 719 720 lhcbase = lhc; 721 sxop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING); 722 sxop->lhc = lhc; 723 hammer2_xop_start(&sxop->head, hammer2_xop_scanlhc); 724 while ((error = hammer2_xop_collect(&sxop->head, 0)) == 0) { 725 if (lhc != sxop->head.cluster.focus->bref.key) 726 break; 727 ++lhc; 728 } 729 hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP); 730 731 if (error) { 732 if (error != ENOENT) 733 goto done2; 734 ++lhc; 735 error = 0; 736 } 737 if ((lhcbase ^ lhc) & ~HAMMER2_DIRHASH_LOMASK) { 738 error = ENOSPC; 739 goto done2; 740 } 741 } 742 743 /* 744 * Create the inode with the lhc as the key. 745 */ 746 xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING); 747 xop->lhc = lhc; 748 xop->flags = flags; 749 bzero(&xop->meta, sizeof(xop->meta)); 750 751 if (vap) { 752 xop->meta.type = hammer2_get_obj_type(vap->va_type); 753 754 switch (xop->meta.type) { 755 case HAMMER2_OBJTYPE_CDEV: 756 case HAMMER2_OBJTYPE_BDEV: 757 xop->meta.rmajor = vap->va_rmajor; 758 xop->meta.rminor = vap->va_rminor; 759 break; 760 default: 761 break; 762 } 763 type = xop->meta.type; 764 } else { 765 xop->meta.type = type; 766 xop->meta.target_type = target_type; 767 } 768 xop->meta.inum = inum; 769 770 /* Inherit parent's inode compression mode. */ 771 xop->meta.comp_algo = dip_comp_algo; 772 xop->meta.check_algo = dip_check_algo; 773 xop->meta.version = HAMMER2_INODE_VERSION_ONE; 774 hammer2_update_time(&xop->meta.ctime); 775 xop->meta.mtime = xop->meta.ctime; 776 if (vap) 777 xop->meta.mode = vap->va_mode; 778 xop->meta.nlinks = 1; 779 if (vap) { 780 if (dip && dip->pmp) { 781 xuid = hammer2_to_unix_xid(&dip_uid); 782 xuid = vop_helper_create_uid(dip->pmp->mp, 783 dip_mode, 784 xuid, 785 cred, 786 &vap->va_mode); 787 } else { 788 /* super-root has no dip and/or pmp */ 789 xuid = 0; 790 } 791 if (vap->va_vaflags & VA_UID_UUID_VALID) 792 xop->meta.uid = vap->va_uid_uuid; 793 else if (vap->va_uid != (uid_t)VNOVAL) 794 hammer2_guid_to_uuid(&xop->meta.uid, vap->va_uid); 795 else 796 hammer2_guid_to_uuid(&xop->meta.uid, xuid); 797 798 if (vap->va_vaflags & VA_GID_UUID_VALID) 799 xop->meta.gid = vap->va_gid_uuid; 800 else if (vap->va_gid != (gid_t)VNOVAL) 801 hammer2_guid_to_uuid(&xop->meta.gid, vap->va_gid); 802 else if (dip) 803 xop->meta.gid = dip_gid; 804 } 805 806 /* 807 * Regular files and softlinks allow a small amount of data to be 808 * directly embedded in the inode. This flag will be cleared if 809 * the size is extended past the embedded limit. 810 */ 811 if (xop->meta.type == HAMMER2_OBJTYPE_REGFILE || 812 xop->meta.type == HAMMER2_OBJTYPE_SOFTLINK || 813 xop->meta.type == HAMMER2_OBJTYPE_HARDLINK) { 814 xop->meta.op_flags |= HAMMER2_OPFLAG_DIRECTDATA; 815 } 816 if (name) 817 hammer2_xop_setname(&xop->head, name, name_len); 818 xop->meta.name_len = name_len; 819 xop->meta.name_key = lhc; 820 KKASSERT(name_len < HAMMER2_INODE_MAXNAME); 821 822 hammer2_xop_start(&xop->head, hammer2_inode_xop_create); 823 824 error = hammer2_xop_collect(&xop->head, 0); 825 #if INODE_DEBUG 826 kprintf("CREATE INODE %*.*s\n", 827 (int)name_len, (int)name_len, name); 828 #endif 829 830 if (error) { 831 *errorp = error; 832 goto done; 833 } 834 835 /* 836 * Set up the new inode if not a hardlink pointer. 837 * 838 * NOTE: *_get() integrates chain's lock into the inode lock. 839 * 840 * NOTE: Only one new inode can currently be created per 841 * transaction. If the need arises we can adjust 842 * hammer2_trans_init() to allow more. 843 * 844 * NOTE: nipdata will have chain's blockset data. 845 */ 846 if (type != HAMMER2_OBJTYPE_HARDLINK) { 847 nip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1); 848 nip->comp_heuristic = 0; 849 } else { 850 nip = NULL; 851 } 852 853 done: 854 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 855 done2: 856 hammer2_inode_unlock(dip); 857 858 return (nip); 859 } 860 861 /* 862 * Connect the disconnected inode (ip) to the directory (dip) with the 863 * specified (name, name_len). If name is NULL, (lhc) will be used as 864 * the directory key and the inode's embedded name will not be modified 865 * for future recovery purposes. 866 * 867 * dip and ip must both be locked exclusively (dip in particular to avoid 868 * lhc collisions). 869 */ 870 int 871 hammer2_inode_connect(hammer2_inode_t *dip, hammer2_inode_t *ip, 872 const char *name, size_t name_len, 873 hammer2_key_t lhc) 874 { 875 hammer2_xop_scanlhc_t *sxop; 876 hammer2_xop_connect_t *xop; 877 hammer2_inode_t *opip; 878 hammer2_key_t lhcbase; 879 int error; 880 881 /* 882 * Calculate the lhc and resolve the collision space. 883 */ 884 if (name) { 885 lhc = lhcbase = hammer2_dirhash(name, name_len); 886 sxop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING); 887 sxop->lhc = lhc; 888 hammer2_xop_start(&sxop->head, hammer2_xop_scanlhc); 889 while ((error = hammer2_xop_collect(&sxop->head, 0)) == 0) { 890 if (lhc != sxop->head.cluster.focus->bref.key) 891 break; 892 ++lhc; 893 } 894 hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP); 895 896 if (error) { 897 if (error != ENOENT) 898 goto done; 899 ++lhc; 900 error = 0; 901 } 902 if ((lhcbase ^ lhc) & ~HAMMER2_DIRHASH_LOMASK) { 903 error = ENOSPC; 904 goto done; 905 } 906 } else { 907 error = 0; 908 } 909 910 /* 911 * Formally reconnect the in-memory structure. ip must 912 * be locked exclusively to safely change ip->pip. 913 */ 914 if (ip->pip != dip) { 915 hammer2_inode_ref(dip); 916 opip = ip->pip; 917 ip->pip = dip; 918 if (opip) 919 hammer2_inode_drop(opip); 920 } 921 922 /* 923 * Connect her up 924 */ 925 xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING); 926 if (name) 927 hammer2_xop_setname(&xop->head, name, name_len); 928 hammer2_xop_setip2(&xop->head, ip); 929 xop->lhc = lhc; 930 hammer2_xop_start(&xop->head, hammer2_inode_xop_connect); 931 error = hammer2_xop_collect(&xop->head, 0); 932 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 933 934 /* 935 * On success make the same adjustments to ip->meta or the 936 * next flush may blow up the chain. 937 */ 938 if (error == 0) { 939 hammer2_inode_modify(ip); 940 ip->meta.name_key = lhc; 941 if (name) 942 ip->meta.name_len = name_len; 943 } 944 done: 945 return error; 946 } 947 948 /* 949 * Repoint ip->cluster's chains to cluster's chains and fixup the default 950 * focus. All items, valid or invalid, are repointed. hammer2_xop_start() 951 * filters out invalid or non-matching elements. 952 * 953 * Caller must hold the inode and cluster exclusive locked, if not NULL, 954 * must also be locked. 955 * 956 * Cluster may be NULL to clean out any chains in ip->cluster. 957 */ 958 void 959 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip, 960 hammer2_cluster_t *cluster) 961 { 962 hammer2_chain_t *dropch[HAMMER2_MAXCLUSTER]; 963 hammer2_chain_t *ochain; 964 hammer2_chain_t *nchain; 965 hammer2_inode_t *opip; 966 int i; 967 968 bzero(dropch, sizeof(dropch)); 969 970 /* 971 * Replace chains in ip->cluster with chains from cluster and 972 * adjust the focus if necessary. 973 * 974 * NOTE: nchain and/or ochain can be NULL due to gaps 975 * in the cluster arrays. 976 */ 977 hammer2_spin_ex(&ip->cluster_spin); 978 for (i = 0; cluster && i < cluster->nchains; ++i) { 979 /* 980 * Do not replace elements which are the same. Also handle 981 * element count discrepancies. 982 */ 983 nchain = cluster->array[i].chain; 984 if (i < ip->cluster.nchains) { 985 ochain = ip->cluster.array[i].chain; 986 if (ochain == nchain) 987 continue; 988 } else { 989 ochain = NULL; 990 } 991 992 /* 993 * Make adjustments 994 */ 995 ip->cluster.array[i].chain = nchain; 996 ip->cluster.array[i].flags &= ~HAMMER2_CITEM_INVALID; 997 ip->cluster.array[i].flags |= cluster->array[i].flags & 998 HAMMER2_CITEM_INVALID; 999 if (nchain) 1000 hammer2_chain_ref(nchain); 1001 dropch[i] = ochain; 1002 } 1003 1004 /* 1005 * Release any left-over chains in ip->cluster. 1006 */ 1007 while (i < ip->cluster.nchains) { 1008 nchain = ip->cluster.array[i].chain; 1009 if (nchain) { 1010 ip->cluster.array[i].chain = NULL; 1011 ip->cluster.array[i].flags |= HAMMER2_CITEM_INVALID; 1012 } 1013 dropch[i] = nchain; 1014 ++i; 1015 } 1016 1017 /* 1018 * Fixup fields. Note that the inode-embedded cluster is never 1019 * directly locked. 1020 */ 1021 if (cluster) { 1022 ip->cluster.nchains = cluster->nchains; 1023 ip->cluster.focus = cluster->focus; 1024 ip->cluster.flags = cluster->flags & ~HAMMER2_CLUSTER_LOCKED; 1025 } else { 1026 ip->cluster.nchains = 0; 1027 ip->cluster.focus = NULL; 1028 ip->cluster.flags &= ~HAMMER2_CLUSTER_ZFLAGS; 1029 } 1030 1031 /* 1032 * Repoint ip->pip if requested (non-NULL pip). 1033 */ 1034 if (pip && ip->pip != pip) { 1035 opip = ip->pip; 1036 hammer2_inode_ref(pip); 1037 ip->pip = pip; 1038 } else { 1039 opip = NULL; 1040 } 1041 hammer2_spin_unex(&ip->cluster_spin); 1042 1043 /* 1044 * Cleanup outside of spinlock 1045 */ 1046 while (--i >= 0) { 1047 if (dropch[i]) 1048 hammer2_chain_drop(dropch[i]); 1049 } 1050 if (opip) 1051 hammer2_inode_drop(opip); 1052 } 1053 1054 /* 1055 * Repoint a single element from the cluster to the ip. Used by the 1056 * synchronization threads to piecemeal update inodes. Does not change 1057 * focus and requires inode to be re-locked to clean-up flags (XXX). 1058 */ 1059 void 1060 hammer2_inode_repoint_one(hammer2_inode_t *ip, hammer2_cluster_t *cluster, 1061 int idx) 1062 { 1063 hammer2_chain_t *ochain; 1064 hammer2_chain_t *nchain; 1065 int i; 1066 1067 hammer2_spin_ex(&ip->cluster_spin); 1068 KKASSERT(idx < cluster->nchains); 1069 if (idx < ip->cluster.nchains) { 1070 ochain = ip->cluster.array[idx].chain; 1071 nchain = cluster->array[idx].chain; 1072 } else { 1073 ochain = NULL; 1074 nchain = cluster->array[idx].chain; 1075 ip->cluster.nchains = idx + 1; 1076 for (i = ip->cluster.nchains; i <= idx; ++i) { 1077 bzero(&ip->cluster.array[i], 1078 sizeof(ip->cluster.array[i])); 1079 ip->cluster.array[i].flags |= HAMMER2_CITEM_INVALID; 1080 } 1081 } 1082 if (ochain != nchain) { 1083 /* 1084 * Make adjustments. 1085 */ 1086 ip->cluster.array[idx].chain = nchain; 1087 ip->cluster.array[idx].flags &= ~HAMMER2_CITEM_INVALID; 1088 ip->cluster.array[idx].flags |= cluster->array[idx].flags & 1089 HAMMER2_CITEM_INVALID; 1090 } 1091 hammer2_spin_unex(&ip->cluster_spin); 1092 if (ochain != nchain) { 1093 if (nchain) 1094 hammer2_chain_ref(nchain); 1095 if (ochain) 1096 hammer2_chain_drop(ochain); 1097 } 1098 } 1099 1100 /* 1101 * Called with a locked inode to finish unlinking an inode after xop_unlink 1102 * had been run. This function is responsible for decrementing nlinks and 1103 * moving deleted inodes to the hidden directory if they are still open. 1104 * 1105 * We don't bother decrementing nlinks if the file is not open and this was 1106 * the last link. 1107 * 1108 * If the inode is a hardlink target it's chain has not yet been deleted, 1109 * otherwise it's chain has been deleted. 1110 * 1111 * If isopen then any prior deletion was not permanent and the inode must 1112 * be moved to the hidden directory. 1113 */ 1114 int 1115 hammer2_inode_unlink_finisher(hammer2_inode_t *ip, int isopen) 1116 { 1117 hammer2_pfs_t *pmp; 1118 int error; 1119 1120 pmp = ip->pmp; 1121 1122 /* 1123 * Decrement nlinks. If this is the last link and the file is 1124 * not open, the chain has already been removed and we don't bother 1125 * dirtying the inode. 1126 */ 1127 if (ip->meta.nlinks == 1) { 1128 atomic_set_int(&ip->flags, HAMMER2_INODE_ISUNLINKED); 1129 if (isopen == 0) { 1130 atomic_set_int(&ip->flags, HAMMER2_INODE_ISDELETED); 1131 return 0; 1132 } 1133 } 1134 1135 hammer2_inode_modify(ip); 1136 --ip->meta.nlinks; 1137 if ((int64_t)ip->meta.nlinks < 0) 1138 ip->meta.nlinks = 0; /* safety */ 1139 1140 /* 1141 * If nlinks is not zero we are done. However, this should only be 1142 * possible with a hardlink target. If the inode is an embedded 1143 * hardlink nlinks should have dropped to zero, warn and proceed 1144 * with the next step. 1145 */ 1146 if (ip->meta.nlinks) { 1147 if ((ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) 1148 return 0; 1149 kprintf("hammer2_inode_unlink: nlinks was not 0 (%jd)\n", 1150 (intmax_t)ip->meta.nlinks); 1151 return 0; 1152 } 1153 1154 /* 1155 * nlinks is now zero, the inode should have already been deleted. 1156 * If the file is open it was deleted non-permanently and must be 1157 * moved to the hidden directory. 1158 * 1159 * When moving to the hidden directory we force the name_key to the 1160 * inode number to avoid collisions. 1161 */ 1162 if (isopen) { 1163 hammer2_inode_lock(pmp->ihidden, 0); 1164 error = hammer2_inode_connect(pmp->ihidden, ip, 1165 NULL, 0, ip->meta.inum); 1166 hammer2_inode_unlock(pmp->ihidden); 1167 } else { 1168 error = 0; 1169 } 1170 return error; 1171 } 1172 1173 /* 1174 * This is called from the mount code to initialize pmp->ihidden 1175 */ 1176 void 1177 hammer2_inode_install_hidden(hammer2_pfs_t *pmp) 1178 { 1179 int error; 1180 1181 if (pmp->ihidden) 1182 return; 1183 1184 hammer2_trans_init(pmp, 0); 1185 hammer2_inode_lock(pmp->iroot, 0); 1186 1187 /* 1188 * Find the hidden directory 1189 */ 1190 { 1191 hammer2_xop_lookup_t *xop; 1192 1193 xop = hammer2_xop_alloc(pmp->iroot, HAMMER2_XOP_MODIFYING); 1194 xop->lhc = HAMMER2_INODE_HIDDENDIR; 1195 hammer2_xop_start(&xop->head, hammer2_xop_lookup); 1196 error = hammer2_xop_collect(&xop->head, 0); 1197 1198 if (error == 0) { 1199 /* 1200 * Found the hidden directory 1201 */ 1202 kprintf("PFS FOUND HIDDEN DIR\n"); 1203 pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, 1204 &xop->head.cluster, 1205 -1); 1206 hammer2_inode_ref(pmp->ihidden); 1207 hammer2_inode_unlock(pmp->ihidden); 1208 } 1209 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1210 } 1211 1212 /* 1213 * Create the hidden directory if it could not be found. 1214 */ 1215 if (error == ENOENT) { 1216 kprintf("PFS CREATE HIDDEN DIR\n"); 1217 1218 pmp->ihidden = hammer2_inode_create(pmp->iroot, NULL, NULL, 1219 NULL, 0, 1220 /* lhc */ HAMMER2_INODE_HIDDENDIR, 1221 /* inum */ HAMMER2_INODE_HIDDENDIR, 1222 /* type */ HAMMER2_OBJTYPE_DIRECTORY, 1223 /* target_type */ 0, 1224 /* flags */ 0, 1225 &error); 1226 if (pmp->ihidden) { 1227 hammer2_inode_ref(pmp->ihidden); 1228 hammer2_inode_unlock(pmp->ihidden); 1229 } 1230 if (error) 1231 kprintf("PFS CREATE ERROR %d\n", error); 1232 } 1233 1234 /* 1235 * Scan the hidden directory on-mount and destroy its contents 1236 */ 1237 if (error == 0) { 1238 hammer2_xop_unlinkall_t *xop; 1239 1240 hammer2_inode_lock(pmp->ihidden, 0); 1241 xop = hammer2_xop_alloc(pmp->ihidden, HAMMER2_XOP_MODIFYING); 1242 xop->key_beg = HAMMER2_KEY_MIN; 1243 xop->key_end = HAMMER2_KEY_MAX; 1244 hammer2_xop_start(&xop->head, hammer2_inode_xop_unlinkall); 1245 1246 while ((error = hammer2_xop_collect(&xop->head, 0)) == 0) { 1247 ; 1248 } 1249 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1250 hammer2_inode_unlock(pmp->ihidden); 1251 } 1252 1253 hammer2_inode_unlock(pmp->iroot); 1254 hammer2_trans_done(pmp); 1255 } 1256 1257 /* 1258 * Find the directory common to both fdip and tdip. 1259 * 1260 * Returns a held but not locked inode. Caller typically locks the inode, 1261 * and when through unlocks AND drops it. 1262 */ 1263 hammer2_inode_t * 1264 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip) 1265 { 1266 hammer2_inode_t *scan1; 1267 hammer2_inode_t *scan2; 1268 1269 /* 1270 * We used to have a depth field but it complicated matters too 1271 * much for directory renames. So now its ugly. Check for 1272 * simple cases before giving up and doing it the expensive way. 1273 * 1274 * XXX need a bottom-up topology stability lock 1275 */ 1276 if (fdip == tdip || fdip == tdip->pip) { 1277 hammer2_inode_ref(fdip); 1278 return(fdip); 1279 } 1280 if (fdip->pip == tdip) { 1281 hammer2_inode_ref(tdip); 1282 return(tdip); 1283 } 1284 1285 /* 1286 * XXX not MPSAFE 1287 */ 1288 for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) { 1289 scan2 = tdip; 1290 while (scan2->pmp == tdip->pmp) { 1291 if (scan1 == scan2) { 1292 hammer2_inode_ref(scan1); 1293 return(scan1); 1294 } 1295 scan2 = scan2->pip; 1296 if (scan2 == NULL) 1297 break; 1298 } 1299 } 1300 panic("hammer2_inode_common_parent: no common parent %p %p\n", 1301 fdip, tdip); 1302 /* NOT REACHED */ 1303 return(NULL); 1304 } 1305 1306 /* 1307 * Mark an inode as being modified, meaning that the caller will modify 1308 * ip->meta. 1309 * 1310 * If a vnode is present we set the vnode dirty and the nominal filesystem 1311 * sync will also handle synchronizing the inode meta-data. If no vnode 1312 * is present we must ensure that the inode is on pmp->sideq. 1313 * 1314 * NOTE: No mtid (modify_tid) is passed into this routine. The caller is 1315 * only modifying the in-memory inode. A modify_tid is synchronized 1316 * later when the inode gets flushed. 1317 */ 1318 void 1319 hammer2_inode_modify(hammer2_inode_t *ip) 1320 { 1321 hammer2_pfs_t *pmp; 1322 1323 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); 1324 if (ip->vp) { 1325 vsetisdirty(ip->vp); 1326 } else if ((pmp = ip->pmp) != NULL) { 1327 hammer2_inode_delayed_sideq(ip); 1328 } 1329 } 1330 1331 /* 1332 * Synchronize the inode's frontend state with the chain state prior 1333 * to any explicit flush of the inode or any strategy write call. 1334 * 1335 * Called with a locked inode inside a transaction. 1336 */ 1337 void 1338 hammer2_inode_chain_sync(hammer2_inode_t *ip) 1339 { 1340 if (ip->flags & (HAMMER2_INODE_RESIZED | HAMMER2_INODE_MODIFIED)) { 1341 hammer2_xop_fsync_t *xop; 1342 int error; 1343 1344 xop = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING); 1345 xop->clear_directdata = 0; 1346 if (ip->flags & HAMMER2_INODE_RESIZED) { 1347 if ((ip->meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA) && 1348 ip->meta.size > HAMMER2_EMBEDDED_BYTES) { 1349 ip->meta.op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA; 1350 xop->clear_directdata = 1; 1351 } 1352 xop->osize = ip->osize; 1353 } else { 1354 xop->osize = ip->meta.size; /* safety */ 1355 } 1356 xop->ipflags = ip->flags; 1357 xop->meta = ip->meta; 1358 1359 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED | 1360 HAMMER2_INODE_MODIFIED); 1361 hammer2_xop_start(&xop->head, hammer2_inode_xop_chain_sync); 1362 error = hammer2_xop_collect(&xop->head, 0); 1363 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1364 if (error == ENOENT) 1365 error = 0; 1366 if (error) { 1367 kprintf("hammer2: unable to fsync inode %p\n", ip); 1368 /* 1369 atomic_set_int(&ip->flags, 1370 xop->ipflags & (HAMMER2_INODE_RESIZED | 1371 HAMMER2_INODE_MODIFIED)); 1372 */ 1373 /* XXX return error somehow? */ 1374 } 1375 } 1376 } 1377 1378 /* 1379 * The normal filesystem sync no longer has visibility to an inode structure 1380 * after its vnode has been reclaimed. In this situation an unlinked-but-open 1381 * inode or a dirty inode may require additional processing to synchronize 1382 * ip->meta to its underlying cluster nodes. 1383 * 1384 * In particular, reclaims can occur in almost any state (for example, when 1385 * doing operations on unrelated vnodes) and flushing the reclaimed inode 1386 * in the reclaim path itself is a non-starter. 1387 * 1388 * Caller must be in a transaction. 1389 */ 1390 void 1391 hammer2_inode_run_sideq(hammer2_pfs_t *pmp) 1392 { 1393 hammer2_xop_destroy_t *xop; 1394 hammer2_inode_sideq_t *ipul; 1395 hammer2_inode_t *ip; 1396 int error; 1397 1398 if (TAILQ_EMPTY(&pmp->sideq)) 1399 return; 1400 1401 LOCKSTART; 1402 hammer2_spin_ex(&pmp->list_spin); 1403 while ((ipul = TAILQ_FIRST(&pmp->sideq)) != NULL) { 1404 TAILQ_REMOVE(&pmp->sideq, ipul, entry); 1405 ip = ipul->ip; 1406 KKASSERT(ip->flags & HAMMER2_INODE_ONSIDEQ); 1407 atomic_clear_int(&ip->flags, HAMMER2_INODE_ONSIDEQ); 1408 hammer2_spin_unex(&pmp->list_spin); 1409 kfree(ipul, pmp->minode); 1410 1411 hammer2_inode_lock(ip, 0); 1412 if (ip->flags & HAMMER2_INODE_ISUNLINKED) { 1413 /* 1414 * The inode was unlinked while open, causing H2 1415 * to relink it to a hidden directory to allow 1416 * cluster operations to continue until close. 1417 * 1418 * The inode must be deleted and destroyed. 1419 */ 1420 xop = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING); 1421 hammer2_xop_start(&xop->head, 1422 hammer2_inode_xop_destroy); 1423 error = hammer2_xop_collect(&xop->head, 0); 1424 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1425 1426 atomic_clear_int(&ip->flags, HAMMER2_INODE_ISDELETED); 1427 } else { 1428 /* 1429 * The inode was dirty as-of the reclaim, requiring 1430 * synchronization of ip->meta with its underlying 1431 * chains. 1432 */ 1433 hammer2_inode_chain_sync(ip); 1434 } 1435 1436 hammer2_inode_unlock(ip); 1437 hammer2_inode_drop(ip); /* ipul ref */ 1438 1439 hammer2_spin_ex(&pmp->list_spin); 1440 } 1441 hammer2_spin_unex(&pmp->list_spin); 1442 LOCKSTOP; 1443 } 1444 1445 /* 1446 * Inode create helper (threaded, backend) 1447 * 1448 * Used by ncreate, nmknod, nsymlink, nmkdir. 1449 * Used by nlink and rename to create HARDLINK pointers. 1450 * 1451 * Frontend holds the parent directory ip locked exclusively. We 1452 * create the inode and feed the exclusively locked chain to the 1453 * frontend. 1454 */ 1455 void 1456 hammer2_inode_xop_create(hammer2_xop_t *arg, int clindex) 1457 { 1458 hammer2_xop_create_t *xop = &arg->xop_create; 1459 hammer2_chain_t *parent; 1460 hammer2_chain_t *chain; 1461 hammer2_key_t key_next; 1462 int cache_index = -1; 1463 int error; 1464 1465 if (hammer2_debug & 0x0001) 1466 kprintf("inode_create lhc %016jx clindex %d\n", 1467 xop->lhc, clindex); 1468 1469 parent = hammer2_inode_chain(xop->head.ip1, clindex, 1470 HAMMER2_RESOLVE_ALWAYS); 1471 if (parent == NULL) { 1472 error = EIO; 1473 chain = NULL; 1474 goto fail; 1475 } 1476 chain = hammer2_chain_lookup(&parent, &key_next, 1477 xop->lhc, xop->lhc, 1478 &cache_index, 0); 1479 if (chain) { 1480 error = EEXIST; 1481 goto fail; 1482 } 1483 1484 error = hammer2_chain_create(&parent, &chain, 1485 xop->head.ip1->pmp, 1486 xop->lhc, 0, 1487 HAMMER2_BREF_TYPE_INODE, 1488 HAMMER2_INODE_BYTES, 1489 xop->head.mtid, 0, xop->flags); 1490 if (error == 0) { 1491 hammer2_chain_modify(chain, xop->head.mtid, 0, 0); 1492 chain->data->ipdata.meta = xop->meta; 1493 if (xop->head.name1) { 1494 bcopy(xop->head.name1, 1495 chain->data->ipdata.filename, 1496 xop->head.name1_len); 1497 chain->data->ipdata.meta.name_len = xop->head.name1_len; 1498 } 1499 chain->data->ipdata.meta.name_key = xop->lhc; 1500 } 1501 hammer2_chain_unlock(chain); 1502 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS | 1503 HAMMER2_RESOLVE_SHARED); 1504 fail: 1505 if (parent) { 1506 hammer2_chain_unlock(parent); 1507 hammer2_chain_drop(parent); 1508 } 1509 hammer2_xop_feed(&xop->head, chain, clindex, error); 1510 if (chain) { 1511 hammer2_chain_unlock(chain); 1512 hammer2_chain_drop(chain); 1513 } 1514 } 1515 1516 /* 1517 * Inode delete helper (backend, threaded) 1518 * 1519 * Generally used by hammer2_run_sideq() 1520 */ 1521 void 1522 hammer2_inode_xop_destroy(hammer2_xop_t *arg, int clindex) 1523 { 1524 hammer2_xop_destroy_t *xop = &arg->xop_destroy; 1525 hammer2_pfs_t *pmp; 1526 hammer2_chain_t *parent; 1527 hammer2_chain_t *chain; 1528 hammer2_inode_t *ip; 1529 int error; 1530 1531 /* 1532 * We need the precise parent chain to issue the deletion. 1533 */ 1534 ip = xop->head.ip1; 1535 pmp = ip->pmp; 1536 chain = NULL; 1537 1538 parent = hammer2_inode_chain(ip, clindex, HAMMER2_RESOLVE_ALWAYS); 1539 if (parent) 1540 hammer2_chain_getparent(&parent, HAMMER2_RESOLVE_ALWAYS); 1541 if (parent == NULL) { 1542 error = EIO; 1543 goto done; 1544 } 1545 chain = hammer2_inode_chain(ip, clindex, HAMMER2_RESOLVE_ALWAYS); 1546 if (chain == NULL) { 1547 error = EIO; 1548 goto done; 1549 } 1550 hammer2_chain_delete(parent, chain, xop->head.mtid, 0); 1551 error = 0; 1552 done: 1553 hammer2_xop_feed(&xop->head, NULL, clindex, error); 1554 if (parent) { 1555 hammer2_chain_unlock(parent); 1556 hammer2_chain_drop(parent); 1557 } 1558 if (chain) { 1559 hammer2_chain_unlock(chain); 1560 hammer2_chain_drop(chain); 1561 } 1562 } 1563 1564 void 1565 hammer2_inode_xop_unlinkall(hammer2_xop_t *arg, int clindex) 1566 { 1567 hammer2_xop_unlinkall_t *xop = &arg->xop_unlinkall; 1568 hammer2_chain_t *parent; 1569 hammer2_chain_t *chain; 1570 hammer2_key_t key_next; 1571 int cache_index = -1; 1572 1573 /* 1574 * We need the precise parent chain to issue the deletion. 1575 */ 1576 parent = hammer2_inode_chain(xop->head.ip1, clindex, 1577 HAMMER2_RESOLVE_ALWAYS); 1578 chain = NULL; 1579 if (parent == NULL) { 1580 /* XXX error */ 1581 goto done; 1582 } 1583 chain = hammer2_chain_lookup(&parent, &key_next, 1584 xop->key_beg, xop->key_end, 1585 &cache_index, 1586 HAMMER2_LOOKUP_ALWAYS); 1587 while (chain) { 1588 hammer2_chain_delete(parent, chain, 1589 xop->head.mtid, HAMMER2_DELETE_PERMANENT); 1590 hammer2_chain_unlock(chain); 1591 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS | 1592 HAMMER2_RESOLVE_SHARED); 1593 hammer2_xop_feed(&xop->head, chain, clindex, chain->error); 1594 /* depend on function to unlock the shared lock */ 1595 chain = hammer2_chain_next(&parent, chain, &key_next, 1596 key_next, xop->key_end, 1597 &cache_index, 1598 HAMMER2_LOOKUP_ALWAYS); 1599 } 1600 done: 1601 hammer2_xop_feed(&xop->head, NULL, clindex, ENOENT); 1602 if (parent) { 1603 hammer2_chain_unlock(parent); 1604 hammer2_chain_drop(parent); 1605 } 1606 if (chain) { 1607 hammer2_chain_unlock(chain); 1608 hammer2_chain_drop(chain); 1609 } 1610 } 1611 1612 void 1613 hammer2_inode_xop_connect(hammer2_xop_t *arg, int clindex) 1614 { 1615 hammer2_xop_connect_t *xop = &arg->xop_connect; 1616 hammer2_inode_data_t *wipdata; 1617 hammer2_chain_t *parent; 1618 hammer2_chain_t *chain; 1619 hammer2_pfs_t *pmp; 1620 hammer2_key_t key_dummy; 1621 int cache_index = -1; 1622 int error; 1623 1624 /* 1625 * Get directory, then issue a lookup to prime the parent chain 1626 * for the create. The lookup is expected to fail. 1627 */ 1628 pmp = xop->head.ip1->pmp; 1629 parent = hammer2_inode_chain(xop->head.ip1, clindex, 1630 HAMMER2_RESOLVE_ALWAYS); 1631 if (parent == NULL) { 1632 chain = NULL; 1633 error = EIO; 1634 goto fail; 1635 } 1636 chain = hammer2_chain_lookup(&parent, &key_dummy, 1637 xop->lhc, xop->lhc, 1638 &cache_index, 0); 1639 if (chain) { 1640 hammer2_chain_unlock(chain); 1641 hammer2_chain_drop(chain); 1642 chain = NULL; 1643 error = EEXIST; 1644 goto fail; 1645 } 1646 1647 /* 1648 * Adjust the filename in the inode, set the name key. 1649 * 1650 * NOTE: Frontend must also adjust ip2->meta on success, we can't 1651 * do it here. 1652 */ 1653 chain = hammer2_inode_chain(xop->head.ip2, clindex, 1654 HAMMER2_RESOLVE_ALWAYS); 1655 hammer2_chain_modify(chain, xop->head.mtid, 0, 0); 1656 wipdata = &chain->data->ipdata; 1657 1658 hammer2_inode_modify(xop->head.ip2); 1659 if (xop->head.name1) { 1660 bzero(wipdata->filename, sizeof(wipdata->filename)); 1661 bcopy(xop->head.name1, wipdata->filename, xop->head.name1_len); 1662 wipdata->meta.name_len = xop->head.name1_len; 1663 } 1664 wipdata->meta.name_key = xop->lhc; 1665 1666 /* 1667 * Reconnect the chain to the new parent directory 1668 */ 1669 error = hammer2_chain_create(&parent, &chain, pmp, 1670 xop->lhc, 0, 1671 HAMMER2_BREF_TYPE_INODE, 1672 HAMMER2_INODE_BYTES, 1673 xop->head.mtid, 0, 0); 1674 1675 /* 1676 * Feed result back. 1677 */ 1678 fail: 1679 hammer2_xop_feed(&xop->head, NULL, clindex, error); 1680 if (parent) { 1681 hammer2_chain_unlock(parent); 1682 hammer2_chain_drop(parent); 1683 } 1684 if (chain) { 1685 hammer2_chain_unlock(chain); 1686 hammer2_chain_drop(chain); 1687 } 1688 } 1689 1690 /* 1691 * Synchronize the in-memory inode with the chain. 1692 */ 1693 void 1694 hammer2_inode_xop_chain_sync(hammer2_xop_t *arg, int clindex) 1695 { 1696 hammer2_xop_fsync_t *xop = &arg->xop_fsync; 1697 hammer2_chain_t *parent; 1698 hammer2_chain_t *chain; 1699 int error; 1700 1701 parent = hammer2_inode_chain(xop->head.ip1, clindex, 1702 HAMMER2_RESOLVE_ALWAYS); 1703 chain = NULL; 1704 if (parent == NULL) { 1705 error = EIO; 1706 goto done; 1707 } 1708 if (parent->error) { 1709 error = parent->error; 1710 goto done; 1711 } 1712 1713 error = 0; 1714 1715 if ((xop->ipflags & HAMMER2_INODE_RESIZED) == 0) { 1716 /* osize must be ignored */ 1717 } else if (xop->meta.size < xop->osize) { 1718 /* 1719 * We must delete any chains beyond the EOF. The chain 1720 * straddling the EOF will be pending in the bioq. 1721 */ 1722 hammer2_key_t lbase; 1723 hammer2_key_t key_next; 1724 int cache_index = -1; 1725 1726 lbase = (xop->meta.size + HAMMER2_PBUFMASK64) & 1727 ~HAMMER2_PBUFMASK64; 1728 chain = hammer2_chain_lookup(&parent, &key_next, 1729 lbase, HAMMER2_KEY_MAX, 1730 &cache_index, 1731 HAMMER2_LOOKUP_NODATA | 1732 HAMMER2_LOOKUP_NODIRECT); 1733 while (chain) { 1734 /* 1735 * Degenerate embedded case, nothing to loop on 1736 */ 1737 switch (chain->bref.type) { 1738 case HAMMER2_BREF_TYPE_INODE: 1739 KKASSERT(0); 1740 break; 1741 case HAMMER2_BREF_TYPE_DATA: 1742 hammer2_chain_delete(parent, chain, 1743 xop->head.mtid, 1744 HAMMER2_DELETE_PERMANENT); 1745 break; 1746 } 1747 chain = hammer2_chain_next(&parent, chain, &key_next, 1748 key_next, HAMMER2_KEY_MAX, 1749 &cache_index, 1750 HAMMER2_LOOKUP_NODATA | 1751 HAMMER2_LOOKUP_NODIRECT); 1752 } 1753 1754 /* 1755 * Reset to point at inode for following code, if necessary. 1756 */ 1757 if (parent->bref.type != HAMMER2_BREF_TYPE_INODE) { 1758 hammer2_chain_unlock(parent); 1759 hammer2_chain_drop(parent); 1760 parent = hammer2_inode_chain(xop->head.ip1, clindex, 1761 HAMMER2_RESOLVE_ALWAYS); 1762 kprintf("hammer2: TRUNCATE RESET on '%s'\n", 1763 parent->data->ipdata.filename); 1764 } 1765 } 1766 1767 /* 1768 * Sync the inode meta-data, potentially clear the blockset area 1769 * of direct data so it can be used for blockrefs. 1770 */ 1771 hammer2_chain_modify(parent, xop->head.mtid, 0, 0); 1772 parent->data->ipdata.meta = xop->meta; 1773 if (xop->clear_directdata) { 1774 bzero(&parent->data->ipdata.u.blockset, 1775 sizeof(parent->data->ipdata.u.blockset)); 1776 } 1777 done: 1778 if (chain) { 1779 hammer2_chain_unlock(chain); 1780 hammer2_chain_drop(chain); 1781 } 1782 if (parent) { 1783 hammer2_chain_unlock(parent); 1784 hammer2_chain_drop(parent); 1785 } 1786 hammer2_xop_feed(&xop->head, NULL, clindex, error); 1787 } 1788 1789