1 /* 2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/cdefs.h> 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/types.h> 39 #include <sys/lock.h> 40 #include <sys/uuid.h> 41 42 #include "hammer2.h" 43 44 #define INODE_DEBUG 0 45 46 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp, 47 hammer2_tid_t, meta.inum); 48 49 int 50 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2) 51 { 52 if (ip1->meta.inum < ip2->meta.inum) 53 return(-1); 54 if (ip1->meta.inum > ip2->meta.inum) 55 return(1); 56 return(0); 57 } 58 59 static 60 void 61 hammer2_inode_delayed_sideq(hammer2_inode_t *ip) 62 { 63 hammer2_inode_sideq_t *ipul; 64 hammer2_pfs_t *pmp = ip->pmp; 65 66 if ((ip->flags & HAMMER2_INODE_ONSIDEQ) == 0) { 67 ipul = kmalloc(sizeof(*ipul), pmp->minode, 68 M_WAITOK | M_ZERO); 69 ipul->ip = ip; 70 hammer2_spin_ex(&pmp->list_spin); 71 if ((ip->flags & HAMMER2_INODE_ONSIDEQ) == 0) { 72 hammer2_inode_ref(ip); 73 atomic_set_int(&ip->flags, 74 HAMMER2_INODE_ONSIDEQ); 75 TAILQ_INSERT_TAIL(&pmp->sideq, ipul, entry); 76 hammer2_spin_unex(&pmp->list_spin); 77 } else { 78 hammer2_spin_unex(&pmp->list_spin); 79 kfree(ipul, pmp->minode); 80 } 81 } 82 } 83 84 /* 85 * HAMMER2 inode locks 86 * 87 * HAMMER2 offers shared and exclusive locks on inodes. Pass a mask of 88 * flags for options: 89 * 90 * - pass HAMMER2_RESOLVE_SHARED if a shared lock is desired. The 91 * inode locking function will automatically set the RDONLY flag. 92 * 93 * - pass HAMMER2_RESOLVE_ALWAYS if you need the inode's meta-data. 94 * Most front-end inode locks do. 95 * 96 * - pass HAMMER2_RESOLVE_NEVER if you do not want to require that 97 * the inode data be resolved. This is used by the syncthr because 98 * it can run on an unresolved/out-of-sync cluster, and also by the 99 * vnode reclamation code to avoid unnecessary I/O (particularly when 100 * disposing of hundreds of thousands of cached vnodes). 101 * 102 * The inode locking function locks the inode itself, resolves any stale 103 * chains in the inode's cluster, and allocates a fresh copy of the 104 * cluster with 1 ref and all the underlying chains locked. 105 * 106 * ip->cluster will be stable while the inode is locked. 107 * 108 * NOTE: We don't combine the inode/chain lock because putting away an 109 * inode would otherwise confuse multiple lock holders of the inode. 110 * 111 * NOTE: In-memory inodes always point to hardlink targets (the actual file), 112 * and never point to a hardlink pointer. 113 * 114 * NOTE: If caller passes HAMMER2_RESOLVE_RDONLY the exclusive locking code 115 * will feel free to reduce the chain set in the cluster as an 116 * optimization. It will still be validated against the quorum if 117 * appropriate, but the optimization might be able to reduce data 118 * accesses to one node. This flag is automatically set if the inode 119 * is locked with HAMMER2_RESOLVE_SHARED. 120 */ 121 void 122 hammer2_inode_lock(hammer2_inode_t *ip, int how) 123 { 124 hammer2_inode_ref(ip); 125 126 /* 127 * Inode structure mutex 128 */ 129 if (how & HAMMER2_RESOLVE_SHARED) { 130 /*how |= HAMMER2_RESOLVE_RDONLY; not used */ 131 hammer2_mtx_sh(&ip->lock); 132 } else { 133 hammer2_mtx_ex(&ip->lock); 134 } 135 } 136 137 /* 138 * Select a chain out of an inode's cluster and lock it. 139 * 140 * The inode does not have to be locked. 141 */ 142 hammer2_chain_t * 143 hammer2_inode_chain(hammer2_inode_t *ip, int clindex, int how) 144 { 145 hammer2_chain_t *chain; 146 147 hammer2_spin_sh(&ip->cluster_spin); 148 if (clindex >= ip->cluster.nchains) 149 chain = NULL; 150 else 151 chain = ip->cluster.array[clindex].chain; 152 if (chain) { 153 hammer2_chain_ref(chain); 154 hammer2_spin_unsh(&ip->cluster_spin); 155 hammer2_chain_lock(chain, how); 156 } else { 157 hammer2_spin_unsh(&ip->cluster_spin); 158 } 159 return chain; 160 } 161 162 hammer2_chain_t * 163 hammer2_inode_chain_and_parent(hammer2_inode_t *ip, int clindex, 164 hammer2_chain_t **parentp, int how) 165 { 166 hammer2_chain_t *chain; 167 hammer2_chain_t *parent; 168 169 for (;;) { 170 hammer2_spin_sh(&ip->cluster_spin); 171 if (clindex >= ip->cluster.nchains) 172 chain = NULL; 173 else 174 chain = ip->cluster.array[clindex].chain; 175 if (chain) { 176 hammer2_chain_ref(chain); 177 hammer2_spin_unsh(&ip->cluster_spin); 178 hammer2_chain_lock(chain, how); 179 } else { 180 hammer2_spin_unsh(&ip->cluster_spin); 181 } 182 183 /* 184 * Get parent, lock order must be (parent, chain). 185 */ 186 parent = chain->parent; 187 hammer2_chain_ref(parent); 188 hammer2_chain_unlock(chain); 189 hammer2_chain_lock(parent, how); 190 hammer2_chain_lock(chain, how); 191 if (ip->cluster.array[clindex].chain == chain && 192 chain->parent == parent) { 193 break; 194 } 195 196 /* 197 * Retry 198 */ 199 hammer2_chain_unlock(chain); 200 hammer2_chain_drop(chain); 201 hammer2_chain_unlock(parent); 202 hammer2_chain_drop(parent); 203 } 204 *parentp = parent; 205 206 return chain; 207 } 208 209 void 210 hammer2_inode_unlock(hammer2_inode_t *ip) 211 { 212 hammer2_mtx_unlock(&ip->lock); 213 hammer2_inode_drop(ip); 214 } 215 216 /* 217 * Temporarily release a lock held shared or exclusive. Caller must 218 * hold the lock shared or exclusive on call and lock will be released 219 * on return. 220 * 221 * Restore a lock that was temporarily released. 222 */ 223 hammer2_mtx_state_t 224 hammer2_inode_lock_temp_release(hammer2_inode_t *ip) 225 { 226 return hammer2_mtx_temp_release(&ip->lock); 227 } 228 229 void 230 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, hammer2_mtx_state_t ostate) 231 { 232 hammer2_mtx_temp_restore(&ip->lock, ostate); 233 } 234 235 /* 236 * Upgrade a shared inode lock to exclusive and return. If the inode lock 237 * is already held exclusively this is a NOP. 238 * 239 * The caller MUST hold the inode lock either shared or exclusive on call 240 * and will own the lock exclusively on return. 241 * 242 * Returns non-zero if the lock was already exclusive prior to the upgrade. 243 */ 244 int 245 hammer2_inode_lock_upgrade(hammer2_inode_t *ip) 246 { 247 int wasexclusive; 248 249 if (mtx_islocked_ex(&ip->lock)) { 250 wasexclusive = 1; 251 } else { 252 hammer2_mtx_unlock(&ip->lock); 253 hammer2_mtx_ex(&ip->lock); 254 wasexclusive = 0; 255 } 256 return wasexclusive; 257 } 258 259 /* 260 * Downgrade an inode lock from exclusive to shared only if the inode 261 * lock was previously shared. If the inode lock was previously exclusive, 262 * this is a NOP. 263 */ 264 void 265 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, int wasexclusive) 266 { 267 if (wasexclusive == 0) 268 mtx_downgrade(&ip->lock); 269 } 270 271 /* 272 * Lookup an inode by inode number 273 */ 274 hammer2_inode_t * 275 hammer2_inode_lookup(hammer2_pfs_t *pmp, hammer2_tid_t inum) 276 { 277 hammer2_inode_t *ip; 278 279 KKASSERT(pmp); 280 if (pmp->spmp_hmp) { 281 ip = NULL; 282 } else { 283 hammer2_spin_ex(&pmp->inum_spin); 284 ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum); 285 if (ip) 286 hammer2_inode_ref(ip); 287 hammer2_spin_unex(&pmp->inum_spin); 288 } 289 return(ip); 290 } 291 292 /* 293 * Adding a ref to an inode is only legal if the inode already has at least 294 * one ref. 295 * 296 * (can be called with spinlock held) 297 */ 298 void 299 hammer2_inode_ref(hammer2_inode_t *ip) 300 { 301 atomic_add_int(&ip->refs, 1); 302 if (hammer2_debug & 0x80000) { 303 kprintf("INODE+1 %p (%d->%d)\n", ip, ip->refs - 1, ip->refs); 304 print_backtrace(8); 305 } 306 } 307 308 /* 309 * Drop an inode reference, freeing the inode when the last reference goes 310 * away. 311 */ 312 void 313 hammer2_inode_drop(hammer2_inode_t *ip) 314 { 315 hammer2_pfs_t *pmp; 316 hammer2_inode_t *pip; 317 u_int refs; 318 319 while (ip) { 320 if (hammer2_debug & 0x80000) { 321 kprintf("INODE-1 %p (%d->%d)\n", 322 ip, ip->refs, ip->refs - 1); 323 print_backtrace(8); 324 } 325 refs = ip->refs; 326 cpu_ccfence(); 327 if (refs == 1) { 328 /* 329 * Transition to zero, must interlock with 330 * the inode inumber lookup tree (if applicable). 331 * It should not be possible for anyone to race 332 * the transition to 0. 333 */ 334 pmp = ip->pmp; 335 KKASSERT(pmp); 336 hammer2_spin_ex(&pmp->inum_spin); 337 338 if (atomic_cmpset_int(&ip->refs, 1, 0)) { 339 KKASSERT(hammer2_mtx_refs(&ip->lock) == 0); 340 if (ip->flags & HAMMER2_INODE_ONRBTREE) { 341 atomic_clear_int(&ip->flags, 342 HAMMER2_INODE_ONRBTREE); 343 RB_REMOVE(hammer2_inode_tree, 344 &pmp->inum_tree, ip); 345 } 346 hammer2_spin_unex(&pmp->inum_spin); 347 348 pip = ip->pip; 349 ip->pip = NULL; 350 ip->pmp = NULL; 351 352 /* 353 * Cleaning out ip->cluster isn't entirely 354 * trivial. 355 */ 356 hammer2_inode_repoint(ip, NULL, NULL); 357 358 /* 359 * We have to drop pip (if non-NULL) to 360 * dispose of our implied reference from 361 * ip->pip. We can simply loop on it. 362 */ 363 kfree(ip, pmp->minode); 364 atomic_add_long(&pmp->inmem_inodes, -1); 365 ip = pip; 366 /* continue with pip (can be NULL) */ 367 } else { 368 hammer2_spin_unex(&ip->pmp->inum_spin); 369 } 370 } else { 371 /* 372 * Non zero transition 373 */ 374 if (atomic_cmpset_int(&ip->refs, refs, refs - 1)) 375 break; 376 } 377 } 378 } 379 380 /* 381 * Get the vnode associated with the given inode, allocating the vnode if 382 * necessary. The vnode will be returned exclusively locked. 383 * 384 * The caller must lock the inode (shared or exclusive). 385 * 386 * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim 387 * races. 388 */ 389 struct vnode * 390 hammer2_igetv(hammer2_inode_t *ip, int *errorp) 391 { 392 hammer2_pfs_t *pmp; 393 struct vnode *vp; 394 395 pmp = ip->pmp; 396 KKASSERT(pmp != NULL); 397 *errorp = 0; 398 399 for (;;) { 400 /* 401 * Attempt to reuse an existing vnode assignment. It is 402 * possible to race a reclaim so the vget() may fail. The 403 * inode must be unlocked during the vget() to avoid a 404 * deadlock against a reclaim. 405 */ 406 int wasexclusive; 407 408 vp = ip->vp; 409 if (vp) { 410 /* 411 * Inode must be unlocked during the vget() to avoid 412 * possible deadlocks, but leave the ip ref intact. 413 * 414 * vnode is held to prevent destruction during the 415 * vget(). The vget() can still fail if we lost 416 * a reclaim race on the vnode. 417 */ 418 hammer2_mtx_state_t ostate; 419 420 vhold(vp); 421 ostate = hammer2_inode_lock_temp_release(ip); 422 if (vget(vp, LK_EXCLUSIVE)) { 423 vdrop(vp); 424 hammer2_inode_lock_temp_restore(ip, ostate); 425 continue; 426 } 427 hammer2_inode_lock_temp_restore(ip, ostate); 428 vdrop(vp); 429 /* vp still locked and ref from vget */ 430 if (ip->vp != vp) { 431 kprintf("hammer2: igetv race %p/%p\n", 432 ip->vp, vp); 433 vput(vp); 434 continue; 435 } 436 *errorp = 0; 437 break; 438 } 439 440 /* 441 * No vnode exists, allocate a new vnode. Beware of 442 * allocation races. This function will return an 443 * exclusively locked and referenced vnode. 444 */ 445 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0); 446 if (*errorp) { 447 kprintf("hammer2: igetv getnewvnode failed %d\n", 448 *errorp); 449 vp = NULL; 450 break; 451 } 452 453 /* 454 * Lock the inode and check for an allocation race. 455 */ 456 wasexclusive = hammer2_inode_lock_upgrade(ip); 457 if (ip->vp != NULL) { 458 vp->v_type = VBAD; 459 vx_put(vp); 460 hammer2_inode_lock_downgrade(ip, wasexclusive); 461 continue; 462 } 463 464 switch (ip->meta.type) { 465 case HAMMER2_OBJTYPE_DIRECTORY: 466 vp->v_type = VDIR; 467 break; 468 case HAMMER2_OBJTYPE_REGFILE: 469 vp->v_type = VREG; 470 vinitvmio(vp, ip->meta.size, 471 HAMMER2_LBUFSIZE, 472 (int)ip->meta.size & HAMMER2_LBUFMASK); 473 break; 474 case HAMMER2_OBJTYPE_SOFTLINK: 475 /* 476 * XXX for now we are using the generic file_read 477 * and file_write code so we need a buffer cache 478 * association. 479 */ 480 vp->v_type = VLNK; 481 vinitvmio(vp, ip->meta.size, 482 HAMMER2_LBUFSIZE, 483 (int)ip->meta.size & HAMMER2_LBUFMASK); 484 break; 485 case HAMMER2_OBJTYPE_CDEV: 486 vp->v_type = VCHR; 487 /* fall through */ 488 case HAMMER2_OBJTYPE_BDEV: 489 vp->v_ops = &pmp->mp->mnt_vn_spec_ops; 490 if (ip->meta.type != HAMMER2_OBJTYPE_CDEV) 491 vp->v_type = VBLK; 492 addaliasu(vp, 493 ip->meta.rmajor, 494 ip->meta.rminor); 495 break; 496 case HAMMER2_OBJTYPE_FIFO: 497 vp->v_type = VFIFO; 498 vp->v_ops = &pmp->mp->mnt_vn_fifo_ops; 499 break; 500 case HAMMER2_OBJTYPE_SOCKET: 501 vp->v_type = VSOCK; 502 break; 503 default: 504 panic("hammer2: unhandled objtype %d", 505 ip->meta.type); 506 break; 507 } 508 509 if (ip == pmp->iroot) 510 vsetflags(vp, VROOT); 511 512 vp->v_data = ip; 513 ip->vp = vp; 514 hammer2_inode_ref(ip); /* vp association */ 515 hammer2_inode_lock_downgrade(ip, wasexclusive); 516 break; 517 } 518 519 /* 520 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0. 521 */ 522 if (hammer2_debug & 0x0002) { 523 kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n", 524 vp, vp->v_refcnt, vp->v_auxrefs); 525 } 526 return (vp); 527 } 528 529 /* 530 * Returns the inode associated with the passed-in cluster, creating the 531 * inode if necessary and synchronizing it to the passed-in cluster otherwise. 532 * When synchronizing, if idx >= 0, only cluster index (idx) is synchronized. 533 * Otherwise the whole cluster is synchronized. 534 * 535 * The passed-in cluster must be locked and will remain locked on return. 536 * The returned inode will be locked and the caller may dispose of both 537 * via hammer2_inode_unlock() + hammer2_inode_drop(). However, if the caller 538 * needs to resolve a hardlink it must ref/unlock/relock/drop the inode. 539 * 540 * The hammer2_inode structure regulates the interface between the high level 541 * kernel VNOPS API and the filesystem backend (the chains). 542 * 543 * On return the inode is locked with the supplied cluster. 544 */ 545 hammer2_inode_t * 546 hammer2_inode_get(hammer2_pfs_t *pmp, hammer2_inode_t *dip, 547 hammer2_cluster_t *cluster, int idx) 548 { 549 hammer2_inode_t *nip; 550 const hammer2_inode_data_t *iptmp; 551 const hammer2_inode_data_t *nipdata; 552 553 KKASSERT(cluster == NULL || 554 hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE); 555 KKASSERT(pmp); 556 557 /* 558 * Interlocked lookup/ref of the inode. This code is only needed 559 * when looking up inodes with nlinks != 0 (TODO: optimize out 560 * otherwise and test for duplicates). 561 * 562 * Cluster can be NULL during the initial pfs allocation. 563 */ 564 again: 565 while (cluster) { 566 iptmp = &hammer2_cluster_rdata(cluster)->ipdata; 567 nip = hammer2_inode_lookup(pmp, iptmp->meta.inum); 568 if (nip == NULL) 569 break; 570 571 hammer2_mtx_ex(&nip->lock); 572 573 /* 574 * Handle SMP race (not applicable to the super-root spmp 575 * which can't index inodes due to duplicative inode numbers). 576 */ 577 if (pmp->spmp_hmp == NULL && 578 (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) { 579 hammer2_mtx_unlock(&nip->lock); 580 hammer2_inode_drop(nip); 581 continue; 582 } 583 if (idx >= 0) 584 hammer2_inode_repoint_one(nip, cluster, idx); 585 else 586 hammer2_inode_repoint(nip, NULL, cluster); 587 588 return nip; 589 } 590 591 /* 592 * We couldn't find the inode number, create a new inode. 593 */ 594 nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO); 595 spin_init(&nip->cluster_spin, "h2clspin"); 596 atomic_add_long(&pmp->inmem_inodes, 1); 597 hammer2_pfs_memory_inc(pmp); 598 hammer2_pfs_memory_wakeup(pmp); 599 if (pmp->spmp_hmp) 600 nip->flags = HAMMER2_INODE_SROOT; 601 602 /* 603 * Initialize nip's cluster. A cluster is provided for normal 604 * inodes but typically not for the super-root or PFS inodes. 605 */ 606 nip->cluster.refs = 1; 607 nip->cluster.pmp = pmp; 608 nip->cluster.flags |= HAMMER2_CLUSTER_INODE; 609 if (cluster) { 610 nipdata = &hammer2_cluster_rdata(cluster)->ipdata; 611 nip->meta = nipdata->meta; 612 atomic_set_int(&nip->flags, HAMMER2_INODE_METAGOOD); 613 hammer2_inode_repoint(nip, NULL, cluster); 614 } else { 615 nip->meta.inum = 1; /* PFS inum is always 1 XXX */ 616 /* mtime will be updated when a cluster is available */ 617 atomic_set_int(&nip->flags, HAMMER2_INODE_METAGOOD);/*XXX*/ 618 } 619 620 nip->pip = dip; /* can be NULL */ 621 if (dip) 622 hammer2_inode_ref(dip); /* ref dip for nip->pip */ 623 624 nip->pmp = pmp; 625 626 /* 627 * ref and lock on nip gives it state compatible to after a 628 * hammer2_inode_lock() call. 629 */ 630 nip->refs = 1; 631 hammer2_mtx_init(&nip->lock, "h2inode"); 632 hammer2_mtx_ex(&nip->lock); 633 /* combination of thread lock and chain lock == inode lock */ 634 635 /* 636 * Attempt to add the inode. If it fails we raced another inode 637 * get. Undo all the work and try again. 638 */ 639 if (pmp->spmp_hmp == NULL) { 640 hammer2_spin_ex(&pmp->inum_spin); 641 if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) { 642 hammer2_spin_unex(&pmp->inum_spin); 643 hammer2_mtx_unlock(&nip->lock); 644 hammer2_inode_drop(nip); 645 goto again; 646 } 647 atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE); 648 hammer2_spin_unex(&pmp->inum_spin); 649 } 650 651 return (nip); 652 } 653 654 /* 655 * Create a new inode in the specified directory using the vattr to 656 * figure out the type of inode. 657 * 658 * If no error occurs the new inode with its cluster locked is returned in 659 * *nipp, otherwise an error is returned and *nipp is set to NULL. 660 * 661 * If vap and/or cred are NULL the related fields are not set and the 662 * inode type defaults to a directory. This is used when creating PFSs 663 * under the super-root, so the inode number is set to 1 in this case. 664 * 665 * dip is not locked on entry. 666 * 667 * NOTE: When used to create a snapshot, the inode is temporarily associated 668 * with the super-root spmp. XXX should pass new pmp for snapshot. 669 */ 670 hammer2_inode_t * 671 hammer2_inode_create(hammer2_inode_t *dip, 672 struct vattr *vap, struct ucred *cred, 673 const uint8_t *name, size_t name_len, hammer2_key_t lhc, 674 hammer2_key_t inum, uint8_t type, uint8_t target_type, 675 int flags, int *errorp) 676 { 677 hammer2_xop_create_t *xop; 678 hammer2_inode_t *nip; 679 int error; 680 uid_t xuid; 681 uuid_t dip_uid; 682 uuid_t dip_gid; 683 uint32_t dip_mode; 684 uint8_t dip_comp_algo; 685 uint8_t dip_check_algo; 686 687 if (name) 688 lhc = hammer2_dirhash(name, name_len); 689 *errorp = 0; 690 nip = NULL; 691 692 /* 693 * Locate the inode or indirect block to create the new 694 * entry in. At the same time check for key collisions 695 * and iterate until we don't get one. 696 * 697 * NOTE: hidden inodes do not have iterators. 698 * 699 * Lock the directory exclusively for now to guarantee that 700 * we can find an unused lhc for the name. Due to collisions, 701 * two different creates can end up with the same lhc so we 702 * cannot depend on the OS to prevent the collision. 703 */ 704 hammer2_inode_lock(dip, 0); 705 706 dip_uid = dip->meta.uid; 707 dip_gid = dip->meta.gid; 708 dip_mode = dip->meta.mode; 709 dip_comp_algo = dip->meta.comp_algo; 710 dip_check_algo = dip->meta.check_algo; 711 712 /* 713 * If name specified, locate an unused key in the collision space. 714 * Otherwise use the passed-in lhc directly. 715 */ 716 if (name) { 717 hammer2_xop_scanlhc_t *sxop; 718 hammer2_key_t lhcbase; 719 720 lhcbase = lhc; 721 sxop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING); 722 sxop->lhc = lhc; 723 hammer2_xop_start(&sxop->head, hammer2_xop_scanlhc); 724 while ((error = hammer2_xop_collect(&sxop->head, 0)) == 0) { 725 if (lhc != sxop->head.cluster.focus->bref.key) 726 break; 727 ++lhc; 728 } 729 hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP); 730 731 if (error) { 732 if (error != ENOENT) 733 goto done2; 734 ++lhc; 735 error = 0; 736 } 737 if ((lhcbase ^ lhc) & ~HAMMER2_DIRHASH_LOMASK) { 738 error = ENOSPC; 739 goto done2; 740 } 741 } 742 743 /* 744 * Create the inode with the lhc as the key. 745 */ 746 xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING); 747 xop->lhc = lhc; 748 xop->flags = flags; 749 bzero(&xop->meta, sizeof(xop->meta)); 750 751 if (vap) { 752 xop->meta.type = hammer2_get_obj_type(vap->va_type); 753 754 switch (xop->meta.type) { 755 case HAMMER2_OBJTYPE_CDEV: 756 case HAMMER2_OBJTYPE_BDEV: 757 xop->meta.rmajor = vap->va_rmajor; 758 xop->meta.rminor = vap->va_rminor; 759 break; 760 default: 761 break; 762 } 763 type = xop->meta.type; 764 } else { 765 xop->meta.type = type; 766 xop->meta.target_type = target_type; 767 } 768 xop->meta.inum = inum; 769 770 /* Inherit parent's inode compression mode. */ 771 xop->meta.comp_algo = dip_comp_algo; 772 xop->meta.check_algo = dip_check_algo; 773 xop->meta.version = HAMMER2_INODE_VERSION_ONE; 774 hammer2_update_time(&xop->meta.ctime); 775 xop->meta.mtime = xop->meta.ctime; 776 if (vap) 777 xop->meta.mode = vap->va_mode; 778 xop->meta.nlinks = 1; 779 if (vap) { 780 if (dip && dip->pmp) { 781 xuid = hammer2_to_unix_xid(&dip_uid); 782 xuid = vop_helper_create_uid(dip->pmp->mp, 783 dip_mode, 784 xuid, 785 cred, 786 &vap->va_mode); 787 } else { 788 /* super-root has no dip and/or pmp */ 789 xuid = 0; 790 } 791 if (vap->va_vaflags & VA_UID_UUID_VALID) 792 xop->meta.uid = vap->va_uid_uuid; 793 else if (vap->va_uid != (uid_t)VNOVAL) 794 hammer2_guid_to_uuid(&xop->meta.uid, vap->va_uid); 795 else 796 hammer2_guid_to_uuid(&xop->meta.uid, xuid); 797 798 if (vap->va_vaflags & VA_GID_UUID_VALID) 799 xop->meta.gid = vap->va_gid_uuid; 800 else if (vap->va_gid != (gid_t)VNOVAL) 801 hammer2_guid_to_uuid(&xop->meta.gid, vap->va_gid); 802 else if (dip) 803 xop->meta.gid = dip_gid; 804 } 805 806 /* 807 * Regular files and softlinks allow a small amount of data to be 808 * directly embedded in the inode. This flag will be cleared if 809 * the size is extended past the embedded limit. 810 */ 811 if (xop->meta.type == HAMMER2_OBJTYPE_REGFILE || 812 xop->meta.type == HAMMER2_OBJTYPE_SOFTLINK || 813 xop->meta.type == HAMMER2_OBJTYPE_HARDLINK) { 814 xop->meta.op_flags |= HAMMER2_OPFLAG_DIRECTDATA; 815 } 816 if (name) 817 hammer2_xop_setname(&xop->head, name, name_len); 818 xop->meta.name_len = name_len; 819 xop->meta.name_key = lhc; 820 KKASSERT(name_len < HAMMER2_INODE_MAXNAME); 821 822 hammer2_xop_start(&xop->head, hammer2_inode_xop_create); 823 824 error = hammer2_xop_collect(&xop->head, 0); 825 #if INODE_DEBUG 826 kprintf("CREATE INODE %*.*s\n", 827 (int)name_len, (int)name_len, name); 828 #endif 829 830 if (error) { 831 *errorp = error; 832 goto done; 833 } 834 835 /* 836 * Set up the new inode if not a hardlink pointer. 837 * 838 * NOTE: *_get() integrates chain's lock into the inode lock. 839 * 840 * NOTE: Only one new inode can currently be created per 841 * transaction. If the need arises we can adjust 842 * hammer2_trans_init() to allow more. 843 * 844 * NOTE: nipdata will have chain's blockset data. 845 */ 846 if (type != HAMMER2_OBJTYPE_HARDLINK) { 847 nip = hammer2_inode_get(dip->pmp, dip, &xop->head.cluster, -1); 848 nip->comp_heuristic = 0; 849 } else { 850 nip = NULL; 851 } 852 853 done: 854 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 855 done2: 856 hammer2_inode_unlock(dip); 857 858 return (nip); 859 } 860 861 /* 862 * Connect the disconnected inode (ip) to the directory (dip) with the 863 * specified (name, name_len). If name is NULL, (lhc) will be used as 864 * the directory key and the inode's embedded name will not be modified 865 * for future recovery purposes. 866 * 867 * dip and ip must both be locked exclusively (dip in particular to avoid 868 * lhc collisions). 869 */ 870 int 871 hammer2_inode_connect(hammer2_inode_t *dip, hammer2_inode_t *ip, 872 const char *name, size_t name_len, 873 hammer2_key_t lhc) 874 { 875 hammer2_xop_scanlhc_t *sxop; 876 hammer2_xop_connect_t *xop; 877 hammer2_inode_t *opip; 878 hammer2_key_t lhcbase; 879 int error; 880 881 /* 882 * Calculate the lhc and resolve the collision space. 883 */ 884 if (name) { 885 lhc = lhcbase = hammer2_dirhash(name, name_len); 886 sxop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING); 887 sxop->lhc = lhc; 888 hammer2_xop_start(&sxop->head, hammer2_xop_scanlhc); 889 while ((error = hammer2_xop_collect(&sxop->head, 0)) == 0) { 890 if (lhc != sxop->head.cluster.focus->bref.key) 891 break; 892 ++lhc; 893 } 894 hammer2_xop_retire(&sxop->head, HAMMER2_XOPMASK_VOP); 895 896 if (error) { 897 if (error != ENOENT) 898 goto done; 899 ++lhc; 900 error = 0; 901 } 902 if ((lhcbase ^ lhc) & ~HAMMER2_DIRHASH_LOMASK) { 903 error = ENOSPC; 904 goto done; 905 } 906 } else { 907 error = 0; 908 } 909 910 /* 911 * Formally reconnect the in-memory structure. ip must 912 * be locked exclusively to safely change ip->pip. 913 */ 914 if (ip->pip != dip) { 915 hammer2_inode_ref(dip); 916 opip = ip->pip; 917 ip->pip = dip; 918 if (opip) 919 hammer2_inode_drop(opip); 920 } 921 922 /* 923 * Connect her up 924 */ 925 xop = hammer2_xop_alloc(dip, HAMMER2_XOP_MODIFYING); 926 if (name) 927 hammer2_xop_setname(&xop->head, name, name_len); 928 hammer2_xop_setip2(&xop->head, ip); 929 xop->lhc = lhc; 930 hammer2_xop_start(&xop->head, hammer2_inode_xop_connect); 931 error = hammer2_xop_collect(&xop->head, 0); 932 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 933 934 /* 935 * On success make the same adjustments to ip->meta or the 936 * next flush may blow up the chain. 937 */ 938 if (error == 0) { 939 hammer2_inode_modify(ip); 940 ip->meta.name_key = lhc; 941 if (name) 942 ip->meta.name_len = name_len; 943 } 944 done: 945 return error; 946 } 947 948 /* 949 * Repoint ip->cluster's chains to cluster's chains and fixup the default 950 * focus. All items, valid or invalid, are repointed. hammer2_xop_start() 951 * filters out invalid or non-matching elements. 952 * 953 * Caller must hold the inode and cluster exclusive locked, if not NULL, 954 * must also be locked. 955 * 956 * Cluster may be NULL to clean out any chains in ip->cluster. 957 */ 958 void 959 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip, 960 hammer2_cluster_t *cluster) 961 { 962 hammer2_chain_t *dropch[HAMMER2_MAXCLUSTER]; 963 hammer2_chain_t *ochain; 964 hammer2_chain_t *nchain; 965 hammer2_inode_t *opip; 966 int i; 967 968 bzero(dropch, sizeof(dropch)); 969 970 /* 971 * Replace chains in ip->cluster with chains from cluster and 972 * adjust the focus if necessary. 973 * 974 * NOTE: nchain and/or ochain can be NULL due to gaps 975 * in the cluster arrays. 976 */ 977 hammer2_spin_ex(&ip->cluster_spin); 978 for (i = 0; cluster && i < cluster->nchains; ++i) { 979 /* 980 * Do not replace elements which are the same. Also handle 981 * element count discrepancies. 982 */ 983 nchain = cluster->array[i].chain; 984 if (i < ip->cluster.nchains) { 985 ochain = ip->cluster.array[i].chain; 986 if (ochain == nchain) 987 continue; 988 } else { 989 ochain = NULL; 990 } 991 992 /* 993 * Make adjustments 994 */ 995 ip->cluster.array[i].chain = nchain; 996 ip->cluster.array[i].flags &= ~HAMMER2_CITEM_INVALID; 997 ip->cluster.array[i].flags |= cluster->array[i].flags & 998 HAMMER2_CITEM_INVALID; 999 if (nchain) 1000 hammer2_chain_ref(nchain); 1001 dropch[i] = ochain; 1002 } 1003 1004 /* 1005 * Release any left-over chains in ip->cluster. 1006 */ 1007 while (i < ip->cluster.nchains) { 1008 nchain = ip->cluster.array[i].chain; 1009 if (nchain) { 1010 ip->cluster.array[i].chain = NULL; 1011 ip->cluster.array[i].flags |= HAMMER2_CITEM_INVALID; 1012 } 1013 dropch[i] = nchain; 1014 ++i; 1015 } 1016 1017 /* 1018 * Fixup fields. Note that the inode-embedded cluster is never 1019 * directly locked. 1020 */ 1021 if (cluster) { 1022 ip->cluster.nchains = cluster->nchains; 1023 ip->cluster.focus = cluster->focus; 1024 ip->cluster.flags = cluster->flags & ~HAMMER2_CLUSTER_LOCKED; 1025 } else { 1026 ip->cluster.nchains = 0; 1027 ip->cluster.focus = NULL; 1028 ip->cluster.flags &= ~HAMMER2_CLUSTER_ZFLAGS; 1029 } 1030 1031 /* 1032 * Repoint ip->pip if requested (non-NULL pip). 1033 */ 1034 if (pip && ip->pip != pip) { 1035 opip = ip->pip; 1036 hammer2_inode_ref(pip); 1037 ip->pip = pip; 1038 } else { 1039 opip = NULL; 1040 } 1041 hammer2_spin_unex(&ip->cluster_spin); 1042 1043 /* 1044 * Cleanup outside of spinlock 1045 */ 1046 while (--i >= 0) { 1047 if (dropch[i]) 1048 hammer2_chain_drop(dropch[i]); 1049 } 1050 if (opip) 1051 hammer2_inode_drop(opip); 1052 } 1053 1054 /* 1055 * Repoint a single element from the cluster to the ip. Used by the 1056 * synchronization threads to piecemeal update inodes. Does not change 1057 * focus and requires inode to be re-locked to clean-up flags (XXX). 1058 */ 1059 void 1060 hammer2_inode_repoint_one(hammer2_inode_t *ip, hammer2_cluster_t *cluster, 1061 int idx) 1062 { 1063 hammer2_chain_t *ochain; 1064 hammer2_chain_t *nchain; 1065 int i; 1066 1067 hammer2_spin_ex(&ip->cluster_spin); 1068 KKASSERT(idx < cluster->nchains); 1069 if (idx < ip->cluster.nchains) { 1070 ochain = ip->cluster.array[idx].chain; 1071 nchain = cluster->array[idx].chain; 1072 } else { 1073 ochain = NULL; 1074 nchain = cluster->array[idx].chain; 1075 ip->cluster.nchains = idx + 1; 1076 for (i = ip->cluster.nchains; i <= idx; ++i) { 1077 bzero(&ip->cluster.array[i], 1078 sizeof(ip->cluster.array[i])); 1079 ip->cluster.array[i].flags |= HAMMER2_CITEM_INVALID; 1080 } 1081 } 1082 if (ochain != nchain) { 1083 /* 1084 * Make adjustments. 1085 */ 1086 ip->cluster.array[idx].chain = nchain; 1087 ip->cluster.array[idx].flags &= ~HAMMER2_CITEM_INVALID; 1088 ip->cluster.array[idx].flags |= cluster->array[idx].flags & 1089 HAMMER2_CITEM_INVALID; 1090 } 1091 hammer2_spin_unex(&ip->cluster_spin); 1092 if (ochain != nchain) { 1093 if (nchain) 1094 hammer2_chain_ref(nchain); 1095 if (ochain) 1096 hammer2_chain_drop(ochain); 1097 } 1098 } 1099 1100 /* 1101 * Called with a locked inode to finish unlinking an inode after xop_unlink 1102 * had been run. This function is responsible for decrementing nlinks and 1103 * moving deleted inodes to the hidden directory if they are still open. 1104 * 1105 * We don't bother decrementing nlinks if the file is not open and this was 1106 * the last link. 1107 * 1108 * If the inode is a hardlink target it's chain has not yet been deleted, 1109 * otherwise it's chain has been deleted. 1110 * 1111 * If isopen then any prior deletion was not permanent and the inode must 1112 * be moved to the hidden directory. 1113 */ 1114 int 1115 hammer2_inode_unlink_finisher(hammer2_inode_t *ip, int isopen) 1116 { 1117 hammer2_pfs_t *pmp; 1118 int error; 1119 1120 pmp = ip->pmp; 1121 1122 /* 1123 * Decrement nlinks. If this is the last link and the file is 1124 * not open, the chain has already been removed and we don't bother 1125 * dirtying the inode. 1126 */ 1127 if (ip->meta.nlinks == 1) { 1128 atomic_set_int(&ip->flags, HAMMER2_INODE_ISUNLINKED); 1129 if (isopen == 0) { 1130 atomic_set_int(&ip->flags, HAMMER2_INODE_ISDELETED); 1131 return 0; 1132 } 1133 } 1134 1135 hammer2_inode_modify(ip); 1136 --ip->meta.nlinks; 1137 if ((int64_t)ip->meta.nlinks < 0) 1138 ip->meta.nlinks = 0; /* safety */ 1139 1140 /* 1141 * If nlinks is not zero we are done. However, this should only be 1142 * possible with a hardlink target. If the inode is an embedded 1143 * hardlink nlinks should have dropped to zero, warn and proceed 1144 * with the next step. 1145 */ 1146 if (ip->meta.nlinks) { 1147 if ((ip->meta.name_key & HAMMER2_DIRHASH_VISIBLE) == 0) 1148 return 0; 1149 kprintf("hammer2_inode_unlink: nlinks was not 0 (%jd)\n", 1150 (intmax_t)ip->meta.nlinks); 1151 return 0; 1152 } 1153 1154 /* 1155 * nlinks is now zero, the inode should have already been deleted. 1156 * If the file is open it was deleted non-permanently and must be 1157 * moved to the hidden directory. 1158 * 1159 * When moving to the hidden directory we force the name_key to the 1160 * inode number to avoid collisions. 1161 */ 1162 if (isopen) { 1163 hammer2_inode_lock(pmp->ihidden, 0); 1164 error = hammer2_inode_connect(pmp->ihidden, ip, 1165 NULL, 0, ip->meta.inum); 1166 hammer2_inode_unlock(pmp->ihidden); 1167 } else { 1168 error = 0; 1169 } 1170 return error; 1171 } 1172 1173 /* 1174 * This is called from the mount code to initialize pmp->ihidden 1175 */ 1176 void 1177 hammer2_inode_install_hidden(hammer2_pfs_t *pmp) 1178 { 1179 int error; 1180 1181 if (pmp->ihidden) 1182 return; 1183 1184 hammer2_trans_init(pmp, 0); 1185 hammer2_inode_lock(pmp->iroot, 0); 1186 1187 /* 1188 * Find the hidden directory 1189 */ 1190 { 1191 hammer2_xop_lookup_t *xop; 1192 1193 xop = hammer2_xop_alloc(pmp->iroot, HAMMER2_XOP_MODIFYING); 1194 xop->lhc = HAMMER2_INODE_HIDDENDIR; 1195 hammer2_xop_start(&xop->head, hammer2_xop_lookup); 1196 error = hammer2_xop_collect(&xop->head, 0); 1197 1198 if (error == 0) { 1199 /* 1200 * Found the hidden directory 1201 */ 1202 kprintf("PFS FOUND HIDDEN DIR\n"); 1203 pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, 1204 &xop->head.cluster, 1205 -1); 1206 hammer2_inode_ref(pmp->ihidden); 1207 hammer2_inode_unlock(pmp->ihidden); 1208 } 1209 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1210 } 1211 1212 /* 1213 * Create the hidden directory if it could not be found. 1214 */ 1215 if (error == ENOENT) { 1216 kprintf("PFS CREATE HIDDEN DIR\n"); 1217 1218 pmp->ihidden = hammer2_inode_create(pmp->iroot, NULL, NULL, 1219 NULL, 0, 1220 /* lhc */ HAMMER2_INODE_HIDDENDIR, 1221 /* inum */ HAMMER2_INODE_HIDDENDIR, 1222 /* type */ HAMMER2_OBJTYPE_DIRECTORY, 1223 /* target_type */ 0, 1224 /* flags */ 0, 1225 &error); 1226 if (pmp->ihidden) { 1227 hammer2_inode_ref(pmp->ihidden); 1228 hammer2_inode_unlock(pmp->ihidden); 1229 } 1230 if (error) 1231 kprintf("PFS CREATE ERROR %d\n", error); 1232 } 1233 1234 /* 1235 * Scan the hidden directory on-mount and destroy its contents 1236 */ 1237 if (error == 0) { 1238 hammer2_xop_unlinkall_t *xop; 1239 1240 hammer2_inode_lock(pmp->ihidden, 0); 1241 xop = hammer2_xop_alloc(pmp->ihidden, HAMMER2_XOP_MODIFYING); 1242 xop->key_beg = HAMMER2_KEY_MIN; 1243 xop->key_end = HAMMER2_KEY_MAX; 1244 hammer2_xop_start(&xop->head, hammer2_inode_xop_unlinkall); 1245 1246 while ((error = hammer2_xop_collect(&xop->head, 0)) == 0) { 1247 ; 1248 } 1249 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1250 hammer2_inode_unlock(pmp->ihidden); 1251 } 1252 1253 hammer2_inode_unlock(pmp->iroot); 1254 hammer2_trans_done(pmp); 1255 } 1256 1257 /* 1258 * Find the directory common to both fdip and tdip that satisfies the 1259 * conditions. The common directory is not allowed to cross a XLINK 1260 * boundary. If ishardlink is non-zero and we successfully find the 1261 * common parent, we will continue to iterate parents until we hit a 1262 * XLINK boundary. 1263 * 1264 * Returns a held but not locked inode. Caller typically locks the inode, 1265 * and when through unlocks AND drops it. 1266 */ 1267 hammer2_inode_t * 1268 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip, 1269 int *errorp, int ishardlink) 1270 { 1271 hammer2_inode_t *scan1; 1272 hammer2_inode_t *scan2; 1273 int state; 1274 1275 /* 1276 * We used to have a depth field but it complicated matters too 1277 * much for directory renames. So now its ugly. Check for 1278 * simple cases before giving up and doing it the expensive way. 1279 * 1280 * XXX need a bottom-up topology stability lock 1281 */ 1282 if (fdip == tdip) { 1283 hammer2_inode_ref(fdip); 1284 return(fdip); 1285 } 1286 1287 /* 1288 * XXX not MPSAFE 1289 * 1290 * state: -1 sub-scan failed 1291 * 0 1292 * +1 sub-scan succeeded (find xlink boundary if rename) 1293 */ 1294 for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) { 1295 scan2 = tdip; 1296 state = 0; 1297 while (scan2->pmp == tdip->pmp) { 1298 if (state == 0 && scan1 == scan2) { 1299 /* 1300 * Found common parent, stop here on rename, 1301 * continue if creating a hardlink. 1302 */ 1303 if (ishardlink == 0) { 1304 hammer2_inode_ref(scan1); 1305 return(scan1); 1306 } 1307 state = 1; 1308 } 1309 if (state == 1) { 1310 /* 1311 * Search for XLINK boundary when hardlink. 1312 */ 1313 if ((scan2->meta.uflags & 1314 (SF_XLINK | UF_XLINK)) || 1315 scan2->pip == NULL || 1316 scan2->pip->pmp != scan1->pmp) { 1317 hammer2_inode_ref(scan2); 1318 return(scan2); 1319 } 1320 } 1321 if (scan2->meta.uflags & (SF_XLINK | UF_XLINK)) 1322 break; 1323 scan2 = scan2->pip; 1324 if (scan2 == NULL) 1325 break; 1326 } 1327 if (scan1->meta.uflags & (SF_XLINK | UF_XLINK)) 1328 break; 1329 } 1330 *errorp = EXDEV; 1331 return(NULL); 1332 } 1333 1334 /* 1335 * Mark an inode as being modified, meaning that the caller will modify 1336 * ip->meta. 1337 * 1338 * If a vnode is present we set the vnode dirty and the nominal filesystem 1339 * sync will also handle synchronizing the inode meta-data. If no vnode 1340 * is present we must ensure that the inode is on pmp->sideq. 1341 * 1342 * NOTE: No mtid (modify_tid) is passed into this routine. The caller is 1343 * only modifying the in-memory inode. A modify_tid is synchronized 1344 * later when the inode gets flushed. 1345 */ 1346 void 1347 hammer2_inode_modify(hammer2_inode_t *ip) 1348 { 1349 hammer2_pfs_t *pmp; 1350 1351 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); 1352 if (ip->vp) { 1353 vsetisdirty(ip->vp); 1354 } else if ((pmp = ip->pmp) != NULL) { 1355 hammer2_inode_delayed_sideq(ip); 1356 } 1357 } 1358 1359 /* 1360 * Synchronize the inode's frontend state with the chain state prior 1361 * to any explicit flush of the inode or any strategy write call. 1362 * 1363 * Called with a locked inode inside a transaction. 1364 */ 1365 void 1366 hammer2_inode_chain_sync(hammer2_inode_t *ip) 1367 { 1368 if (ip->flags & (HAMMER2_INODE_RESIZED | HAMMER2_INODE_MODIFIED)) { 1369 hammer2_xop_fsync_t *xop; 1370 int error; 1371 1372 xop = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING); 1373 xop->clear_directdata = 0; 1374 if (ip->flags & HAMMER2_INODE_RESIZED) { 1375 if ((ip->meta.op_flags & HAMMER2_OPFLAG_DIRECTDATA) && 1376 ip->meta.size > HAMMER2_EMBEDDED_BYTES) { 1377 ip->meta.op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA; 1378 xop->clear_directdata = 1; 1379 } 1380 xop->osize = ip->osize; 1381 } else { 1382 xop->osize = ip->meta.size; /* safety */ 1383 } 1384 xop->ipflags = ip->flags; 1385 xop->meta = ip->meta; 1386 1387 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED | 1388 HAMMER2_INODE_MODIFIED); 1389 hammer2_xop_start(&xop->head, hammer2_inode_xop_chain_sync); 1390 error = hammer2_xop_collect(&xop->head, 0); 1391 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1392 if (error == ENOENT) 1393 error = 0; 1394 if (error) { 1395 kprintf("hammer2: unable to fsync inode %p\n", ip); 1396 /* 1397 atomic_set_int(&ip->flags, 1398 xop->ipflags & (HAMMER2_INODE_RESIZED | 1399 HAMMER2_INODE_MODIFIED)); 1400 */ 1401 /* XXX return error somehow? */ 1402 } 1403 } 1404 } 1405 1406 /* 1407 * The normal filesystem sync no longer has visibility to an inode structure 1408 * after its vnode has been reclaimed. In this situation an unlinked-but-open 1409 * inode or a dirty inode may require additional processing to synchronize 1410 * ip->meta to its underlying cluster nodes. 1411 * 1412 * In particular, reclaims can occur in almost any state (for example, when 1413 * doing operations on unrelated vnodes) and flushing the reclaimed inode 1414 * in the reclaim path itself is a non-starter. 1415 * 1416 * Caller must be in a transaction. 1417 */ 1418 void 1419 hammer2_inode_run_sideq(hammer2_pfs_t *pmp) 1420 { 1421 hammer2_xop_destroy_t *xop; 1422 hammer2_inode_sideq_t *ipul; 1423 hammer2_inode_t *ip; 1424 int error; 1425 1426 if (TAILQ_EMPTY(&pmp->sideq)) 1427 return; 1428 1429 LOCKSTART; 1430 hammer2_spin_ex(&pmp->list_spin); 1431 while ((ipul = TAILQ_FIRST(&pmp->sideq)) != NULL) { 1432 TAILQ_REMOVE(&pmp->sideq, ipul, entry); 1433 ip = ipul->ip; 1434 KKASSERT(ip->flags & HAMMER2_INODE_ONSIDEQ); 1435 atomic_clear_int(&ip->flags, HAMMER2_INODE_ONSIDEQ); 1436 hammer2_spin_unex(&pmp->list_spin); 1437 kfree(ipul, pmp->minode); 1438 1439 hammer2_inode_lock(ip, 0); 1440 if (ip->flags & HAMMER2_INODE_ISUNLINKED) { 1441 /* 1442 * The inode was unlinked while open, causing H2 1443 * to relink it to a hidden directory to allow 1444 * cluster operations to continue until close. 1445 * 1446 * The inode must be deleted and destroyed. 1447 */ 1448 xop = hammer2_xop_alloc(ip, HAMMER2_XOP_MODIFYING); 1449 hammer2_xop_start(&xop->head, 1450 hammer2_inode_xop_destroy); 1451 error = hammer2_xop_collect(&xop->head, 0); 1452 hammer2_xop_retire(&xop->head, HAMMER2_XOPMASK_VOP); 1453 1454 atomic_clear_int(&ip->flags, HAMMER2_INODE_ISDELETED); 1455 } else { 1456 /* 1457 * The inode was dirty as-of the reclaim, requiring 1458 * synchronization of ip->meta with its underlying 1459 * chains. 1460 */ 1461 hammer2_inode_chain_sync(ip); 1462 } 1463 1464 hammer2_inode_unlock(ip); 1465 hammer2_inode_drop(ip); /* ipul ref */ 1466 1467 hammer2_spin_ex(&pmp->list_spin); 1468 } 1469 hammer2_spin_unex(&pmp->list_spin); 1470 LOCKSTOP; 1471 } 1472 1473 /* 1474 * Inode create helper (threaded, backend) 1475 * 1476 * Used by ncreate, nmknod, nsymlink, nmkdir. 1477 * Used by nlink and rename to create HARDLINK pointers. 1478 * 1479 * Frontend holds the parent directory ip locked exclusively. We 1480 * create the inode and feed the exclusively locked chain to the 1481 * frontend. 1482 */ 1483 void 1484 hammer2_inode_xop_create(hammer2_xop_t *arg, int clindex) 1485 { 1486 hammer2_xop_create_t *xop = &arg->xop_create; 1487 hammer2_chain_t *parent; 1488 hammer2_chain_t *chain; 1489 hammer2_key_t key_next; 1490 int cache_index = -1; 1491 int error; 1492 1493 if (hammer2_debug & 0x0001) 1494 kprintf("inode_create lhc %016jx clindex %d\n", 1495 xop->lhc, clindex); 1496 1497 parent = hammer2_inode_chain(xop->head.ip1, clindex, 1498 HAMMER2_RESOLVE_ALWAYS); 1499 if (parent == NULL) { 1500 error = EIO; 1501 chain = NULL; 1502 goto fail; 1503 } 1504 chain = hammer2_chain_lookup(&parent, &key_next, 1505 xop->lhc, xop->lhc, 1506 &cache_index, 0); 1507 if (chain) { 1508 error = EEXIST; 1509 goto fail; 1510 } 1511 1512 error = hammer2_chain_create(&parent, &chain, 1513 xop->head.ip1->pmp, 1514 xop->lhc, 0, 1515 HAMMER2_BREF_TYPE_INODE, 1516 HAMMER2_INODE_BYTES, 1517 xop->head.mtid, 0, xop->flags); 1518 if (error == 0) { 1519 hammer2_chain_modify(chain, xop->head.mtid, 0, 0); 1520 chain->data->ipdata.meta = xop->meta; 1521 if (xop->head.name1) { 1522 bcopy(xop->head.name1, 1523 chain->data->ipdata.filename, 1524 xop->head.name1_len); 1525 chain->data->ipdata.meta.name_len = xop->head.name1_len; 1526 } 1527 chain->data->ipdata.meta.name_key = xop->lhc; 1528 } 1529 hammer2_chain_unlock(chain); 1530 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS | 1531 HAMMER2_RESOLVE_SHARED); 1532 fail: 1533 if (parent) { 1534 hammer2_chain_unlock(parent); 1535 hammer2_chain_drop(parent); 1536 } 1537 hammer2_xop_feed(&xop->head, chain, clindex, error); 1538 if (chain) { 1539 hammer2_chain_unlock(chain); 1540 hammer2_chain_drop(chain); 1541 } 1542 } 1543 1544 /* 1545 * Inode delete helper (backend, threaded) 1546 * 1547 * Generally used by hammer2_run_sideq() 1548 */ 1549 void 1550 hammer2_inode_xop_destroy(hammer2_xop_t *arg, int clindex) 1551 { 1552 hammer2_xop_destroy_t *xop = &arg->xop_destroy; 1553 hammer2_pfs_t *pmp; 1554 hammer2_chain_t *parent; 1555 hammer2_chain_t *chain; 1556 hammer2_inode_t *ip; 1557 int error; 1558 1559 /* 1560 * We need the precise parent chain to issue the deletion. 1561 */ 1562 ip = xop->head.ip1; 1563 pmp = ip->pmp; 1564 chain = NULL; 1565 1566 parent = hammer2_inode_chain(ip, clindex, HAMMER2_RESOLVE_ALWAYS); 1567 if (parent) 1568 hammer2_chain_getparent(&parent, HAMMER2_RESOLVE_ALWAYS); 1569 if (parent == NULL) { 1570 error = EIO; 1571 goto done; 1572 } 1573 chain = hammer2_inode_chain(ip, clindex, HAMMER2_RESOLVE_ALWAYS); 1574 if (chain == NULL) { 1575 error = EIO; 1576 goto done; 1577 } 1578 hammer2_chain_delete(parent, chain, xop->head.mtid, 0); 1579 error = 0; 1580 done: 1581 hammer2_xop_feed(&xop->head, NULL, clindex, error); 1582 if (parent) { 1583 hammer2_chain_unlock(parent); 1584 hammer2_chain_drop(parent); 1585 } 1586 if (chain) { 1587 hammer2_chain_unlock(chain); 1588 hammer2_chain_drop(chain); 1589 } 1590 } 1591 1592 void 1593 hammer2_inode_xop_unlinkall(hammer2_xop_t *arg, int clindex) 1594 { 1595 hammer2_xop_unlinkall_t *xop = &arg->xop_unlinkall; 1596 hammer2_chain_t *parent; 1597 hammer2_chain_t *chain; 1598 hammer2_key_t key_next; 1599 int cache_index = -1; 1600 1601 /* 1602 * We need the precise parent chain to issue the deletion. 1603 */ 1604 parent = hammer2_inode_chain(xop->head.ip1, clindex, 1605 HAMMER2_RESOLVE_ALWAYS); 1606 chain = NULL; 1607 if (parent == NULL) { 1608 /* XXX error */ 1609 goto done; 1610 } 1611 chain = hammer2_chain_lookup(&parent, &key_next, 1612 xop->key_beg, xop->key_end, 1613 &cache_index, 1614 HAMMER2_LOOKUP_ALWAYS); 1615 while (chain) { 1616 hammer2_chain_delete(parent, chain, 1617 xop->head.mtid, HAMMER2_DELETE_PERMANENT); 1618 hammer2_chain_unlock(chain); 1619 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS | 1620 HAMMER2_RESOLVE_SHARED); 1621 hammer2_xop_feed(&xop->head, chain, clindex, chain->error); 1622 /* depend on function to unlock the shared lock */ 1623 chain = hammer2_chain_next(&parent, chain, &key_next, 1624 key_next, xop->key_end, 1625 &cache_index, 1626 HAMMER2_LOOKUP_ALWAYS); 1627 } 1628 done: 1629 hammer2_xop_feed(&xop->head, NULL, clindex, ENOENT); 1630 if (parent) { 1631 hammer2_chain_unlock(parent); 1632 hammer2_chain_drop(parent); 1633 } 1634 if (chain) { 1635 hammer2_chain_unlock(chain); 1636 hammer2_chain_drop(chain); 1637 } 1638 } 1639 1640 void 1641 hammer2_inode_xop_connect(hammer2_xop_t *arg, int clindex) 1642 { 1643 hammer2_xop_connect_t *xop = &arg->xop_connect; 1644 hammer2_inode_data_t *wipdata; 1645 hammer2_chain_t *parent; 1646 hammer2_chain_t *chain; 1647 hammer2_pfs_t *pmp; 1648 hammer2_key_t key_dummy; 1649 int cache_index = -1; 1650 int error; 1651 1652 /* 1653 * Get directory, then issue a lookup to prime the parent chain 1654 * for the create. The lookup is expected to fail. 1655 */ 1656 pmp = xop->head.ip1->pmp; 1657 parent = hammer2_inode_chain(xop->head.ip1, clindex, 1658 HAMMER2_RESOLVE_ALWAYS); 1659 if (parent == NULL) { 1660 chain = NULL; 1661 error = EIO; 1662 goto fail; 1663 } 1664 chain = hammer2_chain_lookup(&parent, &key_dummy, 1665 xop->lhc, xop->lhc, 1666 &cache_index, 0); 1667 if (chain) { 1668 hammer2_chain_unlock(chain); 1669 hammer2_chain_drop(chain); 1670 chain = NULL; 1671 error = EEXIST; 1672 goto fail; 1673 } 1674 1675 /* 1676 * Adjust the filename in the inode, set the name key. 1677 * 1678 * NOTE: Frontend must also adjust ip2->meta on success, we can't 1679 * do it here. 1680 */ 1681 chain = hammer2_inode_chain(xop->head.ip2, clindex, 1682 HAMMER2_RESOLVE_ALWAYS); 1683 hammer2_chain_modify(chain, xop->head.mtid, 0, 0); 1684 wipdata = &chain->data->ipdata; 1685 1686 hammer2_inode_modify(xop->head.ip2); 1687 if (xop->head.name1) { 1688 bzero(wipdata->filename, sizeof(wipdata->filename)); 1689 bcopy(xop->head.name1, wipdata->filename, xop->head.name1_len); 1690 wipdata->meta.name_len = xop->head.name1_len; 1691 } 1692 wipdata->meta.name_key = xop->lhc; 1693 1694 /* 1695 * Reconnect the chain to the new parent directory 1696 */ 1697 error = hammer2_chain_create(&parent, &chain, pmp, 1698 xop->lhc, 0, 1699 HAMMER2_BREF_TYPE_INODE, 1700 HAMMER2_INODE_BYTES, 1701 xop->head.mtid, 0, 0); 1702 1703 /* 1704 * Feed result back. 1705 */ 1706 fail: 1707 hammer2_xop_feed(&xop->head, NULL, clindex, error); 1708 if (parent) { 1709 hammer2_chain_unlock(parent); 1710 hammer2_chain_drop(parent); 1711 } 1712 if (chain) { 1713 hammer2_chain_unlock(chain); 1714 hammer2_chain_drop(chain); 1715 } 1716 } 1717 1718 /* 1719 * Synchronize the in-memory inode with the chain. 1720 */ 1721 void 1722 hammer2_inode_xop_chain_sync(hammer2_xop_t *arg, int clindex) 1723 { 1724 hammer2_xop_fsync_t *xop = &arg->xop_fsync; 1725 hammer2_chain_t *parent; 1726 hammer2_chain_t *chain; 1727 int error; 1728 1729 parent = hammer2_inode_chain(xop->head.ip1, clindex, 1730 HAMMER2_RESOLVE_ALWAYS); 1731 chain = NULL; 1732 if (parent == NULL) { 1733 error = EIO; 1734 goto done; 1735 } 1736 if (parent->error) { 1737 error = parent->error; 1738 goto done; 1739 } 1740 1741 error = 0; 1742 1743 if ((xop->ipflags & HAMMER2_INODE_RESIZED) == 0) { 1744 /* osize must be ignored */ 1745 } else if (xop->meta.size < xop->osize) { 1746 /* 1747 * We must delete any chains beyond the EOF. The chain 1748 * straddling the EOF will be pending in the bioq. 1749 */ 1750 hammer2_key_t lbase; 1751 hammer2_key_t key_next; 1752 int cache_index = -1; 1753 1754 lbase = (xop->meta.size + HAMMER2_PBUFMASK64) & 1755 ~HAMMER2_PBUFMASK64; 1756 chain = hammer2_chain_lookup(&parent, &key_next, 1757 lbase, HAMMER2_KEY_MAX, 1758 &cache_index, 1759 HAMMER2_LOOKUP_NODATA | 1760 HAMMER2_LOOKUP_NODIRECT); 1761 while (chain) { 1762 /* 1763 * Degenerate embedded case, nothing to loop on 1764 */ 1765 switch (chain->bref.type) { 1766 case HAMMER2_BREF_TYPE_INODE: 1767 KKASSERT(0); 1768 break; 1769 case HAMMER2_BREF_TYPE_DATA: 1770 hammer2_chain_delete(parent, chain, 1771 xop->head.mtid, 1772 HAMMER2_DELETE_PERMANENT); 1773 break; 1774 } 1775 chain = hammer2_chain_next(&parent, chain, &key_next, 1776 key_next, HAMMER2_KEY_MAX, 1777 &cache_index, 1778 HAMMER2_LOOKUP_NODATA | 1779 HAMMER2_LOOKUP_NODIRECT); 1780 } 1781 1782 /* 1783 * Reset to point at inode for following code, if necessary. 1784 */ 1785 if (parent->bref.type != HAMMER2_BREF_TYPE_INODE) { 1786 hammer2_chain_unlock(parent); 1787 hammer2_chain_drop(parent); 1788 parent = hammer2_inode_chain(xop->head.ip1, clindex, 1789 HAMMER2_RESOLVE_ALWAYS); 1790 kprintf("hammer2: TRUNCATE RESET on '%s'\n", 1791 parent->data->ipdata.filename); 1792 } 1793 } 1794 1795 /* 1796 * Sync the inode meta-data, potentially clear the blockset area 1797 * of direct data so it can be used for blockrefs. 1798 */ 1799 hammer2_chain_modify(parent, xop->head.mtid, 0, 0); 1800 parent->data->ipdata.meta = xop->meta; 1801 if (xop->clear_directdata) { 1802 bzero(&parent->data->ipdata.u.blockset, 1803 sizeof(parent->data->ipdata.u.blockset)); 1804 } 1805 done: 1806 if (chain) { 1807 hammer2_chain_unlock(chain); 1808 hammer2_chain_drop(chain); 1809 } 1810 if (parent) { 1811 hammer2_chain_unlock(parent); 1812 hammer2_chain_drop(parent); 1813 } 1814 hammer2_xop_feed(&xop->head, NULL, clindex, error); 1815 } 1816 1817