1 /* 2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 #include <sys/cdefs.h> 36 #include <sys/param.h> 37 #include <sys/systm.h> 38 #include <sys/types.h> 39 #include <sys/lock.h> 40 #include <sys/uuid.h> 41 42 #include "hammer2.h" 43 44 #define INODE_DEBUG 0 45 46 static void hammer2_inode_move_to_hidden(hammer2_trans_t *trans, 47 hammer2_cluster_t **clusterp, 48 hammer2_tid_t inum); 49 50 RB_GENERATE2(hammer2_inode_tree, hammer2_inode, rbnode, hammer2_inode_cmp, 51 hammer2_tid_t, inum); 52 53 int 54 hammer2_inode_cmp(hammer2_inode_t *ip1, hammer2_inode_t *ip2) 55 { 56 if (ip1->inum < ip2->inum) 57 return(-1); 58 if (ip1->inum > ip2->inum) 59 return(1); 60 return(0); 61 } 62 63 /* 64 * HAMMER2 inode locks 65 * 66 * HAMMER2 offers shared locks and exclusive locks on inodes. 67 * 68 * The inode locking function locks the inode itself, resolves any stale 69 * chains in the inode's cluster, and allocates a fresh copy of the 70 * cluster with 1 ref and all the underlying chains locked. Duplication 71 * races are handled by this function. 72 * 73 * ip->cluster will be stable while the inode is locked. 74 * 75 * NOTE: We don't combine the inode/chain lock because putting away an 76 * inode would otherwise confuse multiple lock holders of the inode. 77 * 78 * NOTE: Hardlinks are followed in the returned cluster but not in the 79 * inode's internal cluster (ip->cluster). 80 */ 81 hammer2_cluster_t * 82 hammer2_inode_lock_ex(hammer2_inode_t *ip) 83 { 84 const hammer2_inode_data_t *ipdata; 85 hammer2_cluster_t *cluster; 86 hammer2_chain_t *chain; 87 hammer2_chain_t *ochain; 88 hammer2_chain_core_t *core; 89 int error; 90 int i; 91 92 hammer2_inode_ref(ip); 93 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_EXCLUSIVE); 94 cluster = hammer2_cluster_copy(&ip->cluster, 95 HAMMER2_CLUSTER_COPY_NOCHAINS); 96 97 ip->cluster.focus = NULL; 98 cluster->focus = NULL; 99 100 for (i = 0; i < cluster->nchains; ++i) { 101 chain = ip->cluster.array[i]; 102 if (chain == NULL) { 103 kprintf("inode_lock: %p: missing chain\n", ip); 104 continue; 105 } 106 core = chain->core; 107 for (;;) { 108 if (chain->flags & HAMMER2_CHAIN_DUPLICATED) { 109 spin_lock(&core->cst.spin); 110 while (chain->flags & HAMMER2_CHAIN_DUPLICATED) 111 chain = TAILQ_NEXT(chain, core_entry); 112 hammer2_chain_ref(chain); 113 spin_unlock(&core->cst.spin); 114 ochain = ip->cluster.array[i]; 115 ip->cluster.array[i] = chain; 116 if (ip->cluster.focus == NULL) 117 ip->cluster.focus = chain; 118 hammer2_chain_drop(ochain); 119 } 120 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 121 if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0) 122 break; 123 hammer2_chain_unlock(chain); 124 } 125 cluster->array[i] = chain; 126 if (cluster->focus == NULL) 127 cluster->focus = chain; 128 if (ip->cluster.focus == NULL) 129 ip->cluster.focus = chain; 130 } 131 132 /* 133 * Returned cluster must resolve hardlink pointers 134 */ 135 ipdata = &hammer2_cluster_data(cluster)->ipdata; 136 if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK && 137 (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) { 138 error = hammer2_hardlink_find(ip->pip, cluster); 139 KKASSERT((cluster->focus->flags & 140 HAMMER2_CHAIN_DUPLICATED) == 0); 141 KKASSERT(error == 0); 142 } 143 144 return (cluster); 145 } 146 147 void 148 hammer2_inode_unlock_ex(hammer2_inode_t *ip, hammer2_cluster_t *cluster) 149 { 150 if (cluster) 151 hammer2_cluster_unlock(cluster); 152 ccms_thread_unlock(&ip->topo_cst); 153 hammer2_inode_drop(ip); 154 } 155 156 /* 157 * NOTE: We don't combine the inode/chain lock because putting away an 158 * inode would otherwise confuse multiple lock holders of the inode. 159 * 160 * Shared locks are especially sensitive to having too many shared 161 * lock counts (from the same thread) on certain paths which might 162 * need to upgrade them. Only one count of a shared lock can be 163 * upgraded. 164 */ 165 hammer2_cluster_t * 166 hammer2_inode_lock_sh(hammer2_inode_t *ip) 167 { 168 const hammer2_inode_data_t *ipdata; 169 hammer2_cluster_t *cluster; 170 hammer2_chain_core_t *core; 171 hammer2_chain_t *chain; 172 int error = 0; 173 int i; 174 175 hammer2_inode_ref(ip); 176 cluster = hammer2_cluster_copy(&ip->cluster, 177 HAMMER2_CLUSTER_COPY_NOCHAINS); 178 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED); 179 180 cluster->focus = NULL; 181 182 for (i = 0; i < cluster->nchains; ++i) { 183 chain = ip->cluster.array[i]; 184 185 if (chain == NULL) { 186 kprintf("inode_lock: %p: missing chain\n", ip); 187 continue; 188 } 189 190 core = chain->core; 191 192 if (chain->flags & HAMMER2_CHAIN_DUPLICATED) 193 goto cycle_excl; 194 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS | 195 HAMMER2_RESOLVE_SHARED); 196 if (chain->flags & HAMMER2_CHAIN_DUPLICATED) { 197 hammer2_chain_unlock(chain); 198 199 /* 200 * Cycle exclusive inode lock and start the loop 201 * over again. 202 */ 203 cycle_excl: 204 while (--i >= 0) { 205 chain = cluster->array[i]; 206 cluster->array[i] = NULL; 207 hammer2_chain_unlock(chain); 208 } 209 ccms_thread_unlock(&ip->topo_cst); 210 hammer2_inode_unlock_ex(ip, hammer2_inode_lock_ex(ip)); 211 ccms_thread_lock(&ip->topo_cst, CCMS_STATE_SHARED); 212 cluster->focus = NULL; 213 continue; /* restart at i=-1 -> i=0 on loop */ 214 } 215 cluster->array[i] = chain; 216 if (cluster->focus == NULL) 217 cluster->focus = chain; 218 } 219 220 /* 221 * Returned cluster must resolve hardlink pointers 222 */ 223 ipdata = &hammer2_cluster_data(cluster)->ipdata; 224 if (ipdata->type == HAMMER2_OBJTYPE_HARDLINK && 225 (cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0) { 226 error = hammer2_hardlink_find(ip->pip, cluster); 227 KKASSERT((cluster->focus->flags & 228 HAMMER2_CHAIN_DUPLICATED) == 0); 229 KKASSERT(error == 0); 230 } 231 232 return (cluster); 233 } 234 235 void 236 hammer2_inode_unlock_sh(hammer2_inode_t *ip, hammer2_cluster_t *cluster) 237 { 238 if (cluster) 239 hammer2_cluster_unlock(cluster); 240 ccms_thread_unlock(&ip->topo_cst); 241 hammer2_inode_drop(ip); 242 } 243 244 ccms_state_t 245 hammer2_inode_lock_temp_release(hammer2_inode_t *ip) 246 { 247 return(ccms_thread_lock_temp_release(&ip->topo_cst)); 248 } 249 250 void 251 hammer2_inode_lock_temp_restore(hammer2_inode_t *ip, ccms_state_t ostate) 252 { 253 ccms_thread_lock_temp_restore(&ip->topo_cst, ostate); 254 } 255 256 ccms_state_t 257 hammer2_inode_lock_upgrade(hammer2_inode_t *ip) 258 { 259 return(ccms_thread_lock_upgrade(&ip->topo_cst)); 260 } 261 262 void 263 hammer2_inode_lock_downgrade(hammer2_inode_t *ip, ccms_state_t ostate) 264 { 265 ccms_thread_lock_downgrade(&ip->topo_cst, ostate); 266 } 267 268 /* 269 * Lookup an inode by inode number 270 */ 271 hammer2_inode_t * 272 hammer2_inode_lookup(hammer2_pfsmount_t *pmp, hammer2_tid_t inum) 273 { 274 hammer2_inode_t *ip; 275 276 KKASSERT(pmp); 277 if (pmp->spmp_hmp) { 278 ip = NULL; 279 } else { 280 spin_lock(&pmp->inum_spin); 281 ip = RB_LOOKUP(hammer2_inode_tree, &pmp->inum_tree, inum); 282 if (ip) 283 hammer2_inode_ref(ip); 284 spin_unlock(&pmp->inum_spin); 285 } 286 return(ip); 287 } 288 289 /* 290 * Adding a ref to an inode is only legal if the inode already has at least 291 * one ref. 292 */ 293 void 294 hammer2_inode_ref(hammer2_inode_t *ip) 295 { 296 atomic_add_int(&ip->refs, 1); 297 } 298 299 /* 300 * Drop an inode reference, freeing the inode when the last reference goes 301 * away. 302 */ 303 void 304 hammer2_inode_drop(hammer2_inode_t *ip) 305 { 306 hammer2_pfsmount_t *pmp; 307 hammer2_inode_t *pip; 308 u_int refs; 309 310 while (ip) { 311 refs = ip->refs; 312 cpu_ccfence(); 313 if (refs == 1) { 314 /* 315 * Transition to zero, must interlock with 316 * the inode inumber lookup tree (if applicable). 317 */ 318 pmp = ip->pmp; 319 KKASSERT(pmp); 320 spin_lock(&pmp->inum_spin); 321 322 if (atomic_cmpset_int(&ip->refs, 1, 0)) { 323 KKASSERT(ip->topo_cst.count == 0); 324 if (ip->flags & HAMMER2_INODE_ONRBTREE) { 325 atomic_clear_int(&ip->flags, 326 HAMMER2_INODE_ONRBTREE); 327 RB_REMOVE(hammer2_inode_tree, 328 &pmp->inum_tree, ip); 329 } 330 spin_unlock(&pmp->inum_spin); 331 332 pip = ip->pip; 333 ip->pip = NULL; 334 ip->pmp = NULL; 335 336 /* 337 * Cleaning out ip->cluster isn't entirely 338 * trivial. 339 */ 340 hammer2_inode_repoint(ip, NULL, NULL); 341 342 /* 343 * We have to drop pip (if non-NULL) to 344 * dispose of our implied reference from 345 * ip->pip. We can simply loop on it. 346 */ 347 kfree(ip, pmp->minode); 348 atomic_add_long(&pmp->inmem_inodes, -1); 349 ip = pip; 350 /* continue with pip (can be NULL) */ 351 } else { 352 spin_unlock(&ip->pmp->inum_spin); 353 } 354 } else { 355 /* 356 * Non zero transition 357 */ 358 if (atomic_cmpset_int(&ip->refs, refs, refs - 1)) 359 break; 360 } 361 } 362 } 363 364 /* 365 * Get the vnode associated with the given inode, allocating the vnode if 366 * necessary. The vnode will be returned exclusively locked. 367 * 368 * The caller must lock the inode (shared or exclusive). 369 * 370 * Great care must be taken to avoid deadlocks and vnode acquisition/reclaim 371 * races. 372 */ 373 struct vnode * 374 hammer2_igetv(hammer2_inode_t *ip, hammer2_cluster_t *cparent, int *errorp) 375 { 376 const hammer2_inode_data_t *ipdata; 377 hammer2_pfsmount_t *pmp; 378 struct vnode *vp; 379 ccms_state_t ostate; 380 381 pmp = ip->pmp; 382 KKASSERT(pmp != NULL); 383 *errorp = 0; 384 385 ipdata = &hammer2_cluster_data(cparent)->ipdata; 386 387 for (;;) { 388 /* 389 * Attempt to reuse an existing vnode assignment. It is 390 * possible to race a reclaim so the vget() may fail. The 391 * inode must be unlocked during the vget() to avoid a 392 * deadlock against a reclaim. 393 */ 394 vp = ip->vp; 395 if (vp) { 396 /* 397 * Inode must be unlocked during the vget() to avoid 398 * possible deadlocks, but leave the ip ref intact. 399 * 400 * vnode is held to prevent destruction during the 401 * vget(). The vget() can still fail if we lost 402 * a reclaim race on the vnode. 403 */ 404 vhold(vp); 405 ostate = hammer2_inode_lock_temp_release(ip); 406 if (vget(vp, LK_EXCLUSIVE)) { 407 vdrop(vp); 408 hammer2_inode_lock_temp_restore(ip, ostate); 409 continue; 410 } 411 hammer2_inode_lock_temp_restore(ip, ostate); 412 vdrop(vp); 413 /* vp still locked and ref from vget */ 414 if (ip->vp != vp) { 415 kprintf("hammer2: igetv race %p/%p\n", 416 ip->vp, vp); 417 vput(vp); 418 continue; 419 } 420 *errorp = 0; 421 break; 422 } 423 424 /* 425 * No vnode exists, allocate a new vnode. Beware of 426 * allocation races. This function will return an 427 * exclusively locked and referenced vnode. 428 */ 429 *errorp = getnewvnode(VT_HAMMER2, pmp->mp, &vp, 0, 0); 430 if (*errorp) { 431 kprintf("hammer2: igetv getnewvnode failed %d\n", 432 *errorp); 433 vp = NULL; 434 break; 435 } 436 437 /* 438 * Lock the inode and check for an allocation race. 439 */ 440 ostate = hammer2_inode_lock_upgrade(ip); 441 if (ip->vp != NULL) { 442 vp->v_type = VBAD; 443 vx_put(vp); 444 hammer2_inode_lock_downgrade(ip, ostate); 445 continue; 446 } 447 448 switch (ipdata->type) { 449 case HAMMER2_OBJTYPE_DIRECTORY: 450 vp->v_type = VDIR; 451 break; 452 case HAMMER2_OBJTYPE_REGFILE: 453 vp->v_type = VREG; 454 vinitvmio(vp, ipdata->size, 455 HAMMER2_LBUFSIZE, 456 (int)ipdata->size & HAMMER2_LBUFMASK); 457 break; 458 case HAMMER2_OBJTYPE_SOFTLINK: 459 /* 460 * XXX for now we are using the generic file_read 461 * and file_write code so we need a buffer cache 462 * association. 463 */ 464 vp->v_type = VLNK; 465 vinitvmio(vp, ipdata->size, 466 HAMMER2_LBUFSIZE, 467 (int)ipdata->size & HAMMER2_LBUFMASK); 468 break; 469 case HAMMER2_OBJTYPE_CDEV: 470 vp->v_type = VCHR; 471 /* fall through */ 472 case HAMMER2_OBJTYPE_BDEV: 473 vp->v_ops = &pmp->mp->mnt_vn_spec_ops; 474 if (ipdata->type != HAMMER2_OBJTYPE_CDEV) 475 vp->v_type = VBLK; 476 addaliasu(vp, ipdata->rmajor, ipdata->rminor); 477 break; 478 case HAMMER2_OBJTYPE_FIFO: 479 vp->v_type = VFIFO; 480 vp->v_ops = &pmp->mp->mnt_vn_fifo_ops; 481 break; 482 default: 483 panic("hammer2: unhandled objtype %d", ipdata->type); 484 break; 485 } 486 487 if (ip == pmp->iroot) 488 vsetflags(vp, VROOT); 489 490 vp->v_data = ip; 491 ip->vp = vp; 492 hammer2_inode_ref(ip); /* vp association */ 493 hammer2_inode_lock_downgrade(ip, ostate); 494 break; 495 } 496 497 /* 498 * Return non-NULL vp and *errorp == 0, or NULL vp and *errorp != 0. 499 */ 500 if (hammer2_debug & 0x0002) { 501 kprintf("igetv vp %p refs 0x%08x aux 0x%08x\n", 502 vp, vp->v_refcnt, vp->v_auxrefs); 503 } 504 return (vp); 505 } 506 507 /* 508 * Returns the inode associated with the passed-in cluster, creating the 509 * inode if necessary and synchronizing it to the passed-in cluster otherwise. 510 * 511 * The passed-in chain must be locked and will remain locked on return. 512 * The returned inode will be locked and the caller may dispose of both 513 * via hammer2_inode_unlock_ex(). However, if the caller needs to resolve 514 * a hardlink it must ref/unlock/relock/drop the inode. 515 * 516 * The hammer2_inode structure regulates the interface between the high level 517 * kernel VNOPS API and the filesystem backend (the chains). 518 */ 519 hammer2_inode_t * 520 hammer2_inode_get(hammer2_pfsmount_t *pmp, hammer2_inode_t *dip, 521 hammer2_cluster_t *cluster) 522 { 523 hammer2_inode_t *nip; 524 const hammer2_inode_data_t *iptmp; 525 const hammer2_inode_data_t *nipdata; 526 527 KKASSERT(hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE); 528 KKASSERT(pmp); 529 530 /* 531 * Interlocked lookup/ref of the inode. This code is only needed 532 * when looking up inodes with nlinks != 0 (TODO: optimize out 533 * otherwise and test for duplicates). 534 */ 535 again: 536 for (;;) { 537 iptmp = &hammer2_cluster_data(cluster)->ipdata; 538 nip = hammer2_inode_lookup(pmp, iptmp->inum); 539 if (nip == NULL) 540 break; 541 542 ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE); 543 544 /* 545 * Handle SMP race (not applicable to the super-root spmp 546 * which can't index inodes due to duplicative inode numbers). 547 */ 548 if (pmp->spmp_hmp == NULL && 549 (nip->flags & HAMMER2_INODE_ONRBTREE) == 0) { 550 ccms_thread_unlock(&nip->topo_cst); 551 hammer2_inode_drop(nip); 552 continue; 553 } 554 hammer2_inode_repoint(nip, NULL, cluster); 555 return nip; 556 } 557 558 /* 559 * We couldn't find the inode number, create a new inode. 560 */ 561 nip = kmalloc(sizeof(*nip), pmp->minode, M_WAITOK | M_ZERO); 562 atomic_add_long(&pmp->inmem_inodes, 1); 563 hammer2_pfs_memory_inc(pmp); 564 hammer2_pfs_memory_wakeup(pmp); 565 if (pmp->spmp_hmp) 566 nip->flags = HAMMER2_INODE_SROOT; 567 568 /* 569 * Initialize nip's cluster 570 */ 571 nip->cluster.refs = 1; 572 nip->cluster.pmp = pmp; 573 nip->cluster.flags |= HAMMER2_CLUSTER_INODE; 574 hammer2_cluster_replace(&nip->cluster, cluster); 575 576 nipdata = &hammer2_cluster_data(cluster)->ipdata; 577 nip->inum = nipdata->inum; 578 nip->size = nipdata->size; 579 nip->mtime = nipdata->mtime; 580 hammer2_inode_repoint(nip, NULL, cluster); 581 nip->pip = dip; /* can be NULL */ 582 if (dip) 583 hammer2_inode_ref(dip); /* ref dip for nip->pip */ 584 585 nip->pmp = pmp; 586 587 /* 588 * ref and lock on nip gives it state compatible to after a 589 * hammer2_inode_lock_ex() call. 590 */ 591 nip->refs = 1; 592 ccms_cst_init(&nip->topo_cst, &nip->cluster); 593 ccms_thread_lock(&nip->topo_cst, CCMS_STATE_EXCLUSIVE); 594 /* combination of thread lock and chain lock == inode lock */ 595 596 /* 597 * Attempt to add the inode. If it fails we raced another inode 598 * get. Undo all the work and try again. 599 */ 600 if (pmp->spmp_hmp == NULL) { 601 spin_lock(&pmp->inum_spin); 602 if (RB_INSERT(hammer2_inode_tree, &pmp->inum_tree, nip)) { 603 spin_unlock(&pmp->inum_spin); 604 ccms_thread_unlock(&nip->topo_cst); 605 hammer2_inode_drop(nip); 606 goto again; 607 } 608 atomic_set_int(&nip->flags, HAMMER2_INODE_ONRBTREE); 609 spin_unlock(&pmp->inum_spin); 610 } 611 612 return (nip); 613 } 614 615 /* 616 * Create a new inode in the specified directory using the vattr to 617 * figure out the type of inode. 618 * 619 * If no error occurs the new inode with its cluster locked is returned in 620 * *nipp, otherwise an error is returned and *nipp is set to NULL. 621 * 622 * If vap and/or cred are NULL the related fields are not set and the 623 * inode type defaults to a directory. This is used when creating PFSs 624 * under the super-root, so the inode number is set to 1 in this case. 625 * 626 * dip is not locked on entry. 627 * 628 * NOTE: When used to create a snapshot, the inode is temporarily associated 629 * with the super-root spmp. XXX should pass new pmp for snapshot. 630 */ 631 hammer2_inode_t * 632 hammer2_inode_create(hammer2_trans_t *trans, hammer2_inode_t *dip, 633 struct vattr *vap, struct ucred *cred, 634 const uint8_t *name, size_t name_len, 635 hammer2_cluster_t **clusterp, int *errorp) 636 { 637 const hammer2_inode_data_t *dipdata; 638 hammer2_inode_data_t *nipdata; 639 hammer2_cluster_t *cluster; 640 hammer2_cluster_t *cparent; 641 hammer2_inode_t *nip; 642 hammer2_key_t key_dummy; 643 hammer2_key_t lhc; 644 int error; 645 uid_t xuid; 646 uuid_t dip_uid; 647 uuid_t dip_gid; 648 uint32_t dip_mode; 649 uint8_t dip_algo; 650 int ddflag; 651 652 lhc = hammer2_dirhash(name, name_len); 653 *errorp = 0; 654 655 /* 656 * Locate the inode or indirect block to create the new 657 * entry in. At the same time check for key collisions 658 * and iterate until we don't get one. 659 * 660 * NOTE: hidden inodes do not have iterators. 661 */ 662 retry: 663 cparent = hammer2_inode_lock_ex(dip); 664 dipdata = &hammer2_cluster_data(cparent)->ipdata; 665 dip_uid = dipdata->uid; 666 dip_gid = dipdata->gid; 667 dip_mode = dipdata->mode; 668 dip_algo = dipdata->comp_algo; 669 670 error = 0; 671 while (error == 0) { 672 cluster = hammer2_cluster_lookup(cparent, &key_dummy, 673 lhc, lhc, 0, &ddflag); 674 if (cluster == NULL) 675 break; 676 if ((lhc & HAMMER2_DIRHASH_VISIBLE) == 0) 677 error = ENOSPC; 678 if ((lhc & HAMMER2_DIRHASH_LOMASK) == HAMMER2_DIRHASH_LOMASK) 679 error = ENOSPC; 680 hammer2_cluster_unlock(cluster); 681 cluster = NULL; 682 ++lhc; 683 } 684 685 if (error == 0) { 686 error = hammer2_cluster_create(trans, cparent, &cluster, 687 lhc, 0, 688 HAMMER2_BREF_TYPE_INODE, 689 HAMMER2_INODE_BYTES); 690 } 691 #if INODE_DEBUG 692 kprintf("CREATE INODE %*.*s chain=%p\n", 693 (int)name_len, (int)name_len, name, 694 (cluster ? cluster->focus : NULL)); 695 #endif 696 697 /* 698 * Cleanup and handle retries. 699 */ 700 if (error == EAGAIN) { 701 hammer2_cluster_ref(cparent); 702 hammer2_inode_unlock_ex(dip, cparent); 703 hammer2_cluster_wait(cparent); 704 hammer2_cluster_drop(cparent); 705 goto retry; 706 } 707 hammer2_inode_unlock_ex(dip, cparent); 708 cparent = NULL; 709 710 if (error) { 711 KKASSERT(cluster == NULL); 712 *errorp = error; 713 return (NULL); 714 } 715 716 /* 717 * Set up the new inode. 718 * 719 * NOTE: *_get() integrates chain's lock into the inode lock. 720 * 721 * NOTE: Only one new inode can currently be created per 722 * transaction. If the need arises we can adjust 723 * hammer2_trans_init() to allow more. 724 * 725 * NOTE: nipdata will have chain's blockset data. 726 */ 727 KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_MODIFIED); 728 nipdata = &hammer2_cluster_wdata(cluster)->ipdata; 729 nipdata->inum = trans->inode_tid; 730 hammer2_cluster_modsync(cluster); 731 nip = hammer2_inode_get(dip->pmp, dip, cluster); 732 nipdata = &hammer2_cluster_wdata(cluster)->ipdata; 733 734 if (vap) { 735 KKASSERT(trans->inodes_created == 0); 736 nipdata->type = hammer2_get_obj_type(vap->va_type); 737 nipdata->inum = trans->inode_tid; 738 ++trans->inodes_created; 739 740 switch (nipdata->type) { 741 case HAMMER2_OBJTYPE_CDEV: 742 case HAMMER2_OBJTYPE_BDEV: 743 nipdata->rmajor = vap->va_rmajor; 744 nipdata->rminor = vap->va_rminor; 745 break; 746 default: 747 break; 748 } 749 } else { 750 nipdata->type = HAMMER2_OBJTYPE_DIRECTORY; 751 nipdata->inum = 1; 752 } 753 754 /* Inherit parent's inode compression mode. */ 755 nip->comp_heuristic = 0; 756 nipdata->comp_algo = dip_algo; 757 nipdata->version = HAMMER2_INODE_VERSION_ONE; 758 hammer2_update_time(&nipdata->ctime); 759 nipdata->mtime = nipdata->ctime; 760 if (vap) 761 nipdata->mode = vap->va_mode; 762 nipdata->nlinks = 1; 763 if (vap) { 764 if (dip && dip->pmp) { 765 xuid = hammer2_to_unix_xid(&dip_uid); 766 xuid = vop_helper_create_uid(dip->pmp->mp, 767 dip_mode, 768 xuid, 769 cred, 770 &vap->va_mode); 771 } else { 772 /* super-root has no dip and/or pmp */ 773 xuid = 0; 774 } 775 if (vap->va_vaflags & VA_UID_UUID_VALID) 776 nipdata->uid = vap->va_uid_uuid; 777 else if (vap->va_uid != (uid_t)VNOVAL) 778 hammer2_guid_to_uuid(&nipdata->uid, vap->va_uid); 779 else 780 hammer2_guid_to_uuid(&nipdata->uid, xuid); 781 782 if (vap->va_vaflags & VA_GID_UUID_VALID) 783 nipdata->gid = vap->va_gid_uuid; 784 else if (vap->va_gid != (gid_t)VNOVAL) 785 hammer2_guid_to_uuid(&nipdata->gid, vap->va_gid); 786 else if (dip) 787 nipdata->gid = dip_gid; 788 } 789 790 /* 791 * Regular files and softlinks allow a small amount of data to be 792 * directly embedded in the inode. This flag will be cleared if 793 * the size is extended past the embedded limit. 794 */ 795 if (nipdata->type == HAMMER2_OBJTYPE_REGFILE || 796 nipdata->type == HAMMER2_OBJTYPE_SOFTLINK) { 797 nipdata->op_flags |= HAMMER2_OPFLAG_DIRECTDATA; 798 } 799 800 KKASSERT(name_len < HAMMER2_INODE_MAXNAME); 801 bcopy(name, nipdata->filename, name_len); 802 nipdata->name_key = lhc; 803 nipdata->name_len = name_len; 804 hammer2_cluster_modsync(cluster); 805 *clusterp = cluster; 806 807 return (nip); 808 } 809 810 /* 811 * Shift *chainp up to the specified directory, change the filename 812 * to "0xINODENUMBER", and adjust the key. The chain becomes the 813 * invisible hardlink target. 814 * 815 * The original *chainp has already been marked deleted. 816 */ 817 static 818 void 819 hammer2_hardlink_shiftup(hammer2_trans_t *trans, hammer2_cluster_t *cluster, 820 hammer2_inode_t *dip, hammer2_cluster_t *dcluster, 821 int nlinks, int *errorp) 822 { 823 const hammer2_inode_data_t *iptmp; 824 hammer2_inode_data_t *nipdata; 825 hammer2_cluster_t *xcluster; 826 hammer2_key_t key_dummy; 827 hammer2_key_t lhc; 828 hammer2_blockref_t bref; 829 int ddflag; 830 831 iptmp = &hammer2_cluster_data(cluster)->ipdata; 832 lhc = iptmp->inum; 833 KKASSERT((lhc & HAMMER2_DIRHASH_VISIBLE) == 0); 834 835 /* 836 * Locate the inode or indirect block to create the new 837 * entry in. lhc represents the inode number so there is 838 * no collision iteration. 839 * 840 * There should be no key collisions with invisible inode keys. 841 * 842 * WARNING! Must use inode_lock_ex() on dip to handle a stale 843 * dip->cluster cache. 844 */ 845 retry: 846 *errorp = 0; 847 xcluster = hammer2_cluster_lookup(dcluster, &key_dummy, 848 lhc, lhc, 0, &ddflag); 849 if (xcluster) { 850 kprintf("X3 chain %p dip %p dchain %p dip->chain %p\n", 851 xcluster->focus, dip, dcluster->focus, 852 dip->cluster.focus); 853 hammer2_cluster_unlock(xcluster); 854 xcluster = NULL; 855 *errorp = ENOSPC; 856 #if 0 857 Debugger("X3"); 858 #endif 859 } 860 861 /* 862 * Create entry in common parent directory using the seek position 863 * calculated above. 864 * 865 * We must refactor cluster because it might have been shifted into 866 * an indirect cluster by the create. 867 */ 868 if (*errorp == 0) { 869 KKASSERT(xcluster == NULL); 870 #if 0 871 *errorp = hammer2_cluster_create(trans, dcluster, &xcluster, 872 lhc, 0, 873 HAMMER2_BREF_TYPE_INODE,/* n/a */ 874 HAMMER2_INODE_BYTES); /* n/a */ 875 #endif 876 /*XXX this somehow isn't working on cluster XXX*/ 877 /*KKASSERT(xxx)*/ 878 } 879 880 /* 881 * Cleanup and handle retries. 882 */ 883 if (*errorp == EAGAIN) { 884 kprintf("R"); 885 hammer2_cluster_wait(dcluster); 886 hammer2_cluster_drop(dcluster); 887 goto retry; 888 } 889 890 /* 891 * Handle the error case 892 */ 893 if (*errorp) { 894 panic("error2"); 895 KKASSERT(xcluster == NULL); 896 return; 897 } 898 899 /* 900 * Use xcluster as a placeholder for (lhc). Duplicate cluster to the 901 * same target bref as xcluster and then delete xcluster. The 902 * duplication occurs after xcluster in flush order even though 903 * xcluster is deleted after the duplication. XXX 904 * 905 * WARNING! Duplications (to a different parent) can cause indirect 906 * blocks to be inserted, refactor xcluster. 907 * 908 * WARNING! Only key and keybits is extracted from a passed-in bref. 909 */ 910 hammer2_cluster_bref(cluster, &bref); 911 bref.key = lhc; /* invisible dir entry key */ 912 bref.keybits = 0; 913 hammer2_cluster_duplicate(trans, dcluster, cluster, &bref, 0, 2); 914 915 /* 916 * cluster is now 'live' again.. adjust the filename. 917 * 918 * Directory entries are inodes but this is a hidden hardlink 919 * target. The name isn't used but to ease debugging give it 920 * a name after its inode number. 921 */ 922 hammer2_cluster_modify(trans, cluster, 0); 923 nipdata = &hammer2_cluster_wdata(cluster)->ipdata; 924 ksnprintf(nipdata->filename, sizeof(nipdata->filename), 925 "0x%016jx", (intmax_t)nipdata->inum); 926 nipdata->name_len = strlen(nipdata->filename); 927 nipdata->name_key = lhc; 928 nipdata->nlinks += nlinks; 929 hammer2_cluster_modsync(cluster); 930 } 931 932 /* 933 * Connect the target inode represented by (*chainp) to the media topology 934 * at (dip, name, len). The caller can pass a rough *chainp, this function 935 * will issue lookup()s to position the parent chain properly for the 936 * chain insertion. 937 * 938 * If hlink is TRUE this function creates an OBJTYPE_HARDLINK directory 939 * entry instead of connecting (*chainp). 940 * 941 * If hlink is FALSE this function uses chain_duplicate() to make a copy 942 * if (*chainp) in the directory entry. (*chainp) is likely to be deleted 943 * by the caller in this case (e.g. rename). 944 */ 945 int 946 hammer2_inode_connect(hammer2_trans_t *trans, 947 hammer2_cluster_t **clusterp, int hlink, 948 hammer2_inode_t *dip, hammer2_cluster_t *dcluster, 949 const uint8_t *name, size_t name_len, 950 hammer2_key_t lhc) 951 { 952 hammer2_inode_data_t *wipdata; 953 hammer2_cluster_t *ocluster; 954 hammer2_cluster_t *ncluster; 955 hammer2_key_t key_dummy; 956 int ddflag; 957 int error; 958 959 /* 960 * Since ocluster is either disconnected from the topology or 961 * represents a hardlink terminus which is always a parent of or 962 * equal to dip, we should be able to safely lock dip->chain for 963 * our setup. 964 * 965 * WARNING! Must use inode_lock_ex() on dip to handle a stale 966 * dip->cluster. 967 */ 968 ocluster = *clusterp; 969 970 /* 971 * If name is non-NULL we calculate lhc, else we use the passed-in 972 * lhc. 973 */ 974 if (name) { 975 lhc = hammer2_dirhash(name, name_len); 976 977 /* 978 * Locate the inode or indirect block to create the new 979 * entry in. At the same time check for key collisions 980 * and iterate until we don't get one. 981 */ 982 error = 0; 983 while (error == 0) { 984 ncluster = hammer2_cluster_lookup(dcluster, &key_dummy, 985 lhc, lhc, 986 0, &ddflag); 987 if (ncluster == NULL) 988 break; 989 if ((lhc & HAMMER2_DIRHASH_LOMASK) == 990 HAMMER2_DIRHASH_LOMASK) { 991 error = ENOSPC; 992 } 993 hammer2_cluster_unlock(ncluster); 994 ncluster = NULL; 995 ++lhc; 996 } 997 } else { 998 /* 999 * Reconnect to specific key (used when moving 1000 * unlinked-but-open files into the hidden directory). 1001 */ 1002 ncluster = hammer2_cluster_lookup(dcluster, &key_dummy, 1003 lhc, lhc, 1004 0, &ddflag); 1005 KKASSERT(ncluster == NULL); 1006 } 1007 1008 if (error == 0) { 1009 if (hlink) { 1010 /* 1011 * Hardlink pointer needed, create totally fresh 1012 * directory entry. 1013 * 1014 * We must refactor ocluster because it might have 1015 * been shifted into an indirect cluster by the 1016 * create. 1017 */ 1018 KKASSERT(ncluster == NULL); 1019 error = hammer2_cluster_create(trans, 1020 dcluster, &ncluster, 1021 lhc, 0, 1022 HAMMER2_BREF_TYPE_INODE, 1023 HAMMER2_INODE_BYTES); 1024 hammer2_cluster_refactor(ocluster); 1025 } else { 1026 /* 1027 * Reconnect the original cluster and rename. Use 1028 * cluster_duplicate(). The caller will likely delete 1029 * or has already deleted the original chain in 1030 * this case. 1031 * 1032 * NOTE: cluster_duplicate() generates a new cluster 1033 * with CHAIN_DELETED cleared (ocluster typically 1034 * has it set from the file unlink). 1035 * 1036 * WARNING! Can cause held-over clusters to require a 1037 * refactor. Fortunately we have none (our 1038 * locked clusters are passed into and 1039 * modified by the call). 1040 */ 1041 ncluster = ocluster; 1042 ocluster = NULL; 1043 hammer2_cluster_duplicate(trans, NULL, ncluster, NULL, 1044 0, 3); 1045 error = hammer2_cluster_create(trans, 1046 dcluster, &ncluster, 1047 lhc, 0, 1048 HAMMER2_BREF_TYPE_INODE, 1049 HAMMER2_INODE_BYTES); 1050 } 1051 } 1052 1053 /* 1054 * Unlock stuff. 1055 */ 1056 KKASSERT(error != EAGAIN); 1057 1058 /* 1059 * ncluster should be NULL on error, leave ocluster 1060 * (ocluster == *clusterp) alone. 1061 */ 1062 if (error) { 1063 KKASSERT(ncluster == NULL); 1064 return (error); 1065 } 1066 1067 /* 1068 * Directory entries are inodes so if the name has changed we have 1069 * to update the inode. 1070 * 1071 * When creating an OBJTYPE_HARDLINK entry remember to unlock the 1072 * cluster, the caller will access the hardlink via the actual hardlink 1073 * target file and not the hardlink pointer entry, so we must still 1074 * return ocluster. 1075 */ 1076 if (hlink && hammer2_hardlink_enable >= 0) { 1077 /* 1078 * Create the HARDLINK pointer. oip represents the hardlink 1079 * target in this situation. 1080 * 1081 * We will return ocluster (the hardlink target). 1082 */ 1083 hammer2_cluster_modify(trans, ncluster, 0); 1084 KKASSERT(name_len < HAMMER2_INODE_MAXNAME); 1085 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata; 1086 bcopy(name, wipdata->filename, name_len); 1087 wipdata->name_key = lhc; 1088 wipdata->name_len = name_len; 1089 wipdata->target_type = 1090 hammer2_cluster_data(ocluster)->ipdata.type; 1091 wipdata->type = HAMMER2_OBJTYPE_HARDLINK; 1092 wipdata->inum = hammer2_cluster_data(ocluster)->ipdata.inum; 1093 wipdata->nlinks = 1; 1094 hammer2_cluster_modsync(ncluster); 1095 hammer2_cluster_unlock(ncluster); 1096 ncluster = ocluster; 1097 ocluster = NULL; 1098 } else { 1099 /* 1100 * ncluster is a duplicate of ocluster at the new location. 1101 * We must fixup the name stored in oip. The bref key 1102 * has already been set up. 1103 */ 1104 hammer2_cluster_modify(trans, ncluster, 0); 1105 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata; 1106 1107 KKASSERT(name_len < HAMMER2_INODE_MAXNAME); 1108 bcopy(name, wipdata->filename, name_len); 1109 wipdata->name_key = lhc; 1110 wipdata->name_len = name_len; 1111 wipdata->nlinks = 1; 1112 hammer2_cluster_modsync(ncluster); 1113 } 1114 1115 /* 1116 * We are replacing ocluster with ncluster, unlock ocluster. In the 1117 * case where ocluster is left unchanged the code above sets 1118 * ncluster to ocluster and ocluster to NULL, resulting in a NOP here. 1119 */ 1120 if (ocluster) 1121 hammer2_cluster_unlock(ocluster); 1122 *clusterp = ncluster; 1123 1124 return (0); 1125 } 1126 1127 /* 1128 * Repoint ip->cluster's chains to cluster's chains. Caller must hold 1129 * the inode exclusively locked. cluster may be NULL to clean out any 1130 * chains in ip->cluster. 1131 */ 1132 void 1133 hammer2_inode_repoint(hammer2_inode_t *ip, hammer2_inode_t *pip, 1134 hammer2_cluster_t *cluster) 1135 { 1136 hammer2_chain_t *ochain; 1137 hammer2_chain_t *nchain; 1138 hammer2_inode_t *opip; 1139 int i; 1140 1141 /* 1142 * Replace chains in ip->cluster with chains from cluster and 1143 * adjust the focus if necessary. 1144 * 1145 * NOTE: nchain and/or ochain can be NULL due to gaps 1146 * in the cluster arrays. 1147 */ 1148 ip->cluster.focus = NULL; 1149 for (i = 0; cluster && i < cluster->nchains; ++i) { 1150 nchain = cluster->array[i]; 1151 if (i < ip->cluster.nchains) { 1152 ochain = ip->cluster.array[i]; 1153 if (ochain == nchain) { 1154 if (ip->cluster.focus == NULL) 1155 ip->cluster.focus = nchain; 1156 continue; 1157 } 1158 } else { 1159 ochain = NULL; 1160 } 1161 1162 /* 1163 * Make adjustments 1164 */ 1165 ip->cluster.array[i] = nchain; 1166 if (ip->cluster.focus == NULL) 1167 ip->cluster.focus = nchain; 1168 if (nchain) 1169 hammer2_chain_ref(nchain); 1170 if (ochain) 1171 hammer2_chain_drop(ochain); 1172 } 1173 1174 /* 1175 * Release any left-over chains in ip->cluster. 1176 */ 1177 while (i < ip->cluster.nchains) { 1178 nchain = ip->cluster.array[i]; 1179 if (nchain) { 1180 ip->cluster.array[i] = NULL; 1181 hammer2_chain_drop(nchain); 1182 } 1183 ++i; 1184 } 1185 ip->cluster.nchains = cluster ? cluster->nchains : 0; 1186 1187 /* 1188 * Repoint ip->pip if requested (non-NULL pip). 1189 */ 1190 if (pip && ip->pip != pip) { 1191 opip = ip->pip; 1192 hammer2_inode_ref(pip); 1193 ip->pip = pip; 1194 if (opip) 1195 hammer2_inode_drop(opip); 1196 } 1197 } 1198 1199 /* 1200 * Unlink the file from the specified directory inode. The directory inode 1201 * does not need to be locked. 1202 * 1203 * isdir determines whether a directory/non-directory check should be made. 1204 * No check is made if isdir is set to -1. 1205 * 1206 * isopen specifies whether special unlink-with-open-descriptor handling 1207 * must be performed. If set to -1 the caller is deleting a PFS and we 1208 * check whether the chain is mounted or not (chain->pmp != NULL). 1 is 1209 * implied if it is mounted. 1210 * 1211 * If isopen is 1 and nlinks drops to 0 this function must move the chain 1212 * to a special hidden directory until last-close occurs on the file. 1213 * 1214 * NOTE! The underlying file can still be active with open descriptors 1215 * or if the chain is being manually held (e.g. for rename). 1216 * 1217 * The caller is responsible for fixing up ip->chain if e.g. a 1218 * rename occurs (see chain_duplicate()). 1219 */ 1220 int 1221 hammer2_unlink_file(hammer2_trans_t *trans, hammer2_inode_t *dip, 1222 const uint8_t *name, size_t name_len, 1223 int isdir, int *hlinkp, struct nchandle *nch) 1224 { 1225 const hammer2_inode_data_t *ripdata; 1226 hammer2_inode_data_t *wipdata; 1227 hammer2_cluster_t *cparent; 1228 hammer2_cluster_t *ocluster; 1229 hammer2_cluster_t *cluster; 1230 hammer2_cluster_t *dparent; 1231 hammer2_cluster_t *dcluster; 1232 hammer2_key_t key_dummy; 1233 hammer2_key_t key_next; 1234 hammer2_key_t lhc; 1235 int error; 1236 int ddflag; 1237 uint8_t type; 1238 1239 error = 0; 1240 ocluster = NULL; 1241 lhc = hammer2_dirhash(name, name_len); 1242 1243 /* 1244 * Search for the filename in the directory 1245 */ 1246 if (hlinkp) 1247 *hlinkp = 0; 1248 cparent = hammer2_inode_lock_ex(dip); 1249 cluster = hammer2_cluster_lookup(cparent, &key_next, 1250 lhc, lhc + HAMMER2_DIRHASH_LOMASK, 1251 0, &ddflag); 1252 while (cluster) { 1253 if (hammer2_cluster_type(cluster) == HAMMER2_BREF_TYPE_INODE) { 1254 ripdata = &hammer2_cluster_data(cluster)->ipdata; 1255 if (ripdata->name_len == name_len && 1256 bcmp(ripdata->filename, name, name_len) == 0) { 1257 break; 1258 } 1259 } 1260 cluster = hammer2_cluster_next(cparent, cluster, &key_next, 1261 key_next, 1262 lhc + HAMMER2_DIRHASH_LOMASK, 1263 0); 1264 } 1265 hammer2_inode_unlock_ex(dip, NULL); /* retain parent */ 1266 1267 /* 1268 * Not found or wrong type (isdir < 0 disables the type check). 1269 * If a hardlink pointer, type checks use the hardlink target. 1270 */ 1271 if (cluster == NULL) { 1272 error = ENOENT; 1273 goto done; 1274 } 1275 ripdata = &hammer2_cluster_data(cluster)->ipdata; 1276 type = ripdata->type; 1277 if (type == HAMMER2_OBJTYPE_HARDLINK) { 1278 if (hlinkp) 1279 *hlinkp = 1; 1280 type = ripdata->target_type; 1281 } 1282 1283 if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 0) { 1284 error = ENOTDIR; 1285 goto done; 1286 } 1287 if (type != HAMMER2_OBJTYPE_DIRECTORY && isdir >= 1) { 1288 error = EISDIR; 1289 goto done; 1290 } 1291 1292 /* 1293 * Hardlink must be resolved. We can't hold the parent locked 1294 * while we do this or we could deadlock. 1295 * 1296 * On success cluster will be adjusted to point at the hardlink target 1297 * and ocluster will point to the hardlink pointer in the original 1298 * directory. Otherwise cluster remains pointing to the original. 1299 * 1300 * Lock ownership is transfered to cluster. ocluster is merely 1301 * referenced. 1302 */ 1303 if (ripdata->type == HAMMER2_OBJTYPE_HARDLINK) { 1304 hammer2_cluster_unlock(cparent); 1305 cparent = NULL; 1306 1307 ocluster = cluster; 1308 cluster = hammer2_cluster_copy(ocluster, 0); 1309 error = hammer2_hardlink_find(dip, cluster); 1310 KKASSERT(error == 0); 1311 } 1312 1313 /* 1314 * If this is a directory the directory must be empty. However, if 1315 * isdir < 0 we are doing a rename and the directory does not have 1316 * to be empty, and if isdir > 1 we are deleting a PFS/snapshot 1317 * and the directory does not have to be empty. 1318 * 1319 * NOTE: We check the full key range here which covers both visible 1320 * and invisible entries. Theoretically there should be no 1321 * invisible (hardlink target) entries if there are no visible 1322 * entries. 1323 */ 1324 if (type == HAMMER2_OBJTYPE_DIRECTORY && isdir == 1) { 1325 dparent = hammer2_cluster_lookup_init(cluster, 0); 1326 dcluster = hammer2_cluster_lookup(dparent, &key_dummy, 1327 0, (hammer2_key_t)-1, 1328 HAMMER2_LOOKUP_NODATA, 1329 &ddflag); 1330 if (dcluster) { 1331 hammer2_cluster_unlock(dcluster); 1332 hammer2_cluster_lookup_done(dparent); 1333 error = ENOTEMPTY; 1334 goto done; 1335 } 1336 hammer2_cluster_lookup_done(dparent); 1337 dparent = NULL; 1338 /* dcluster NULL */ 1339 } 1340 1341 /* 1342 * Ok, we can now unlink the cluster. We always decrement nlinks even 1343 * if the entry can be deleted in case someone has the file open and 1344 * does an fstat(). 1345 * 1346 * The cluster itself will no longer be in the on-media topology but 1347 * can still be flushed to the media (e.g. if an open descriptor 1348 * remains). When the last vnode/ip ref goes away the cluster will 1349 * be marked unmodified, avoiding any further (now unnecesary) I/O. 1350 * 1351 * A non-NULL ocluster indicates a hardlink. 1352 */ 1353 if (ocluster) { 1354 /* 1355 * Delete the original hardlink pointer unconditionally. 1356 * (any open descriptors will migrate to the hardlink 1357 * target and have no affect on this operation). 1358 * 1359 * NOTE: parent from above is NULL when ocluster != NULL 1360 * so we can reuse it. 1361 */ 1362 hammer2_cluster_lock(ocluster, HAMMER2_RESOLVE_ALWAYS); 1363 hammer2_cluster_delete(trans, ocluster, 0); 1364 hammer2_cluster_unlock(ocluster); 1365 } 1366 1367 /* 1368 * Decrement nlinks on the hardlink target (or original file if 1369 * there it was not hardlinked). Delete the target when nlinks 1370 * reaches 0 with special handling if (isopen) is set. 1371 * 1372 * NOTE! In DragonFly the vnops function calls cache_unlink() after 1373 * calling us here to clean out the namecache association, 1374 * (which does not represent a ref for the open-test), and to 1375 * force finalization of the vnode if/when the last ref gets 1376 * dropped. 1377 * 1378 * NOTE! Files are unlinked by rename and then relinked. nch will be 1379 * passed as NULL in this situation. hammer2_inode_connect() 1380 * will bump nlinks. 1381 */ 1382 KKASSERT(cluster != NULL); 1383 hammer2_cluster_modify(trans, cluster, 0); 1384 wipdata = &hammer2_cluster_wdata(cluster)->ipdata; 1385 ripdata = wipdata; 1386 --wipdata->nlinks; 1387 if ((int64_t)wipdata->nlinks < 0) { /* XXX debugging */ 1388 wipdata->nlinks = 0; 1389 } 1390 hammer2_cluster_modsync(cluster); 1391 1392 if (wipdata->nlinks == 0) { 1393 if ((cluster->focus->flags & HAMMER2_CHAIN_PFSROOT) && 1394 cluster->pmp) { 1395 error = EINVAL; 1396 kprintf("hammer2: PFS \"%s\" cannot be deleted " 1397 "while still mounted\n", 1398 wipdata->filename); 1399 goto done; 1400 } 1401 if (nch && cache_isopen(nch)) { 1402 kprintf("WARNING: unlinking open file\n"); 1403 hammer2_cluster_set_chainflags(cluster, 1404 HAMMER2_CHAIN_UNLINKED); 1405 hammer2_inode_move_to_hidden(trans, &cluster, 1406 wipdata->inum); 1407 } else { 1408 hammer2_cluster_delete(trans, cluster, 0); 1409 } 1410 } 1411 error = 0; 1412 done: 1413 if (cluster) 1414 hammer2_cluster_unlock(cluster); 1415 if (cparent) 1416 hammer2_cluster_lookup_done(cparent); 1417 if (ocluster) { 1418 hammer2_cluster_drop(ocluster); 1419 } 1420 1421 return error; 1422 } 1423 1424 /* 1425 * This is called from the mount code to initialize pmp->ihidden 1426 */ 1427 void 1428 hammer2_inode_install_hidden(hammer2_pfsmount_t *pmp) 1429 { 1430 hammer2_trans_t trans; 1431 hammer2_cluster_t *cparent; 1432 hammer2_cluster_t *cluster; 1433 hammer2_cluster_t *scan; 1434 hammer2_inode_data_t *wipdata; 1435 hammer2_key_t key_dummy; 1436 hammer2_key_t key_next; 1437 int ddflag; 1438 int error; 1439 int count; 1440 1441 if (pmp->ihidden) 1442 return; 1443 1444 /* 1445 * Find the hidden directory 1446 */ 1447 bzero(&key_dummy, sizeof(key_dummy)); 1448 hammer2_trans_init(&trans, pmp, 0); 1449 1450 cparent = hammer2_inode_lock_ex(pmp->iroot); 1451 cluster = hammer2_cluster_lookup(cparent, &key_dummy, 1452 HAMMER2_INODE_HIDDENDIR, 1453 HAMMER2_INODE_HIDDENDIR, 1454 0, &ddflag); 1455 if (cluster) { 1456 pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster); 1457 hammer2_inode_ref(pmp->ihidden); 1458 1459 /* 1460 * Remove any unlinked files which were left open as-of 1461 * any system crash. 1462 */ 1463 count = 0; 1464 scan = hammer2_cluster_lookup(cluster, &key_next, 1465 0, HAMMER2_TID_MAX, 1466 HAMMER2_LOOKUP_NODATA, &ddflag); 1467 while (scan) { 1468 if (hammer2_cluster_type(scan) == 1469 HAMMER2_BREF_TYPE_INODE) { 1470 hammer2_cluster_delete(&trans, scan, 0); 1471 ++count; 1472 } 1473 scan = hammer2_cluster_next(cluster, scan, &key_next, 1474 0, HAMMER2_TID_MAX, 1475 HAMMER2_LOOKUP_NODATA); 1476 } 1477 1478 hammer2_inode_unlock_ex(pmp->ihidden, cluster); 1479 hammer2_inode_unlock_ex(pmp->iroot, cparent); 1480 hammer2_trans_done(&trans); 1481 kprintf("hammer2: PFS loaded hidden dir, " 1482 "removed %d dead entries\n", count); 1483 return; 1484 } 1485 1486 /* 1487 * Create the hidden directory 1488 */ 1489 error = hammer2_cluster_create(&trans, cparent, &cluster, 1490 HAMMER2_INODE_HIDDENDIR, 0, 1491 HAMMER2_BREF_TYPE_INODE, 1492 HAMMER2_INODE_BYTES); 1493 hammer2_inode_unlock_ex(pmp->iroot, cparent); 1494 1495 hammer2_cluster_modify(&trans, cluster, 0); 1496 wipdata = &hammer2_cluster_wdata(cluster)->ipdata; 1497 wipdata->type = HAMMER2_OBJTYPE_DIRECTORY; 1498 wipdata->inum = HAMMER2_INODE_HIDDENDIR; 1499 wipdata->nlinks = 1; 1500 hammer2_cluster_modsync(cluster); 1501 kprintf("hammer2: PFS root missing hidden directory, creating\n"); 1502 1503 pmp->ihidden = hammer2_inode_get(pmp, pmp->iroot, cluster); 1504 hammer2_inode_ref(pmp->ihidden); 1505 hammer2_inode_unlock_ex(pmp->ihidden, cluster); 1506 hammer2_trans_done(&trans); 1507 } 1508 1509 /* 1510 * If an open file is unlinked H2 needs to retain the file in the topology 1511 * to ensure that its backing store is not recovered by the bulk free scan. 1512 * This also allows us to avoid having to special-case the CHAIN_DELETED flag. 1513 * 1514 * To do this the file is moved to a hidden directory in the PFS root and 1515 * renamed. The hidden directory must be created if it does not exist. 1516 */ 1517 static 1518 void 1519 hammer2_inode_move_to_hidden(hammer2_trans_t *trans, 1520 hammer2_cluster_t **clusterp, hammer2_tid_t inum) 1521 { 1522 hammer2_cluster_t *dcluster; 1523 hammer2_pfsmount_t *pmp; 1524 int error; 1525 1526 pmp = (*clusterp)->pmp; 1527 KKASSERT(pmp != NULL); 1528 KKASSERT(pmp->ihidden != NULL); 1529 1530 hammer2_cluster_delete(trans, *clusterp, 0); 1531 dcluster = hammer2_inode_lock_ex(pmp->ihidden); 1532 error = hammer2_inode_connect(trans, clusterp, 0, 1533 pmp->ihidden, dcluster, 1534 NULL, 0, inum); 1535 hammer2_inode_unlock_ex(pmp->ihidden, dcluster); 1536 KKASSERT(error == 0); 1537 } 1538 1539 /* 1540 * Given an exclusively locked inode and cluster we consolidate its cluster 1541 * for hardlink creation, adding (nlinks) to the file's link count and 1542 * potentially relocating the inode to a directory common to ip->pip and tdip. 1543 * 1544 * Replaces (*clusterp) if consolidation occurred, unlocking the old cluster 1545 * and returning a new locked cluster. 1546 * 1547 * NOTE! This function will also replace ip->cluster. 1548 */ 1549 int 1550 hammer2_hardlink_consolidate(hammer2_trans_t *trans, 1551 hammer2_inode_t *ip, 1552 hammer2_cluster_t **clusterp, 1553 hammer2_inode_t *cdip, 1554 hammer2_cluster_t *cdcluster, 1555 int nlinks) 1556 { 1557 const hammer2_inode_data_t *ripdata; 1558 hammer2_inode_data_t *wipdata; 1559 hammer2_cluster_t *cluster; 1560 hammer2_cluster_t *ncluster; 1561 int error; 1562 1563 cluster = *clusterp; 1564 ripdata = &hammer2_cluster_data(cluster)->ipdata; 1565 if (nlinks == 0 && /* no hardlink needed */ 1566 (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE)) { 1567 return (0); 1568 } 1569 1570 if (hammer2_hardlink_enable == 0) { /* disallow hardlinks */ 1571 hammer2_cluster_unlock(cluster); 1572 *clusterp = NULL; 1573 return (ENOTSUP); 1574 } 1575 1576 /* 1577 * If no change in the hardlink's target directory is required and 1578 * this is already a hardlink target, all we need to do is adjust 1579 * the link count. 1580 */ 1581 ripdata = &hammer2_cluster_data(cluster)->ipdata; 1582 if (cdip == ip->pip && 1583 (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) == 0) { 1584 if (nlinks) { 1585 hammer2_cluster_modify(trans, cluster, 0); 1586 wipdata = &hammer2_cluster_wdata(cluster)->ipdata; 1587 wipdata->nlinks += nlinks; 1588 hammer2_cluster_modsync(cluster); 1589 ripdata = wipdata; 1590 } 1591 error = 0; 1592 goto done; 1593 } 1594 1595 1596 /* 1597 * cluster is the real inode. If it's visible we have to convert it 1598 * to a hardlink pointer. If it is not visible then it is already 1599 * a hardlink target and only needs to be deleted. 1600 */ 1601 KKASSERT((cluster->focus->flags & HAMMER2_CHAIN_DELETED) == 0); 1602 ripdata = &hammer2_cluster_data(cluster)->ipdata; 1603 KKASSERT(ripdata->type != HAMMER2_OBJTYPE_HARDLINK); 1604 if (ripdata->name_key & HAMMER2_DIRHASH_VISIBLE) { 1605 /* 1606 * We are going to duplicate cluster later, causing its 1607 * media block to be shifted to the duplicate. Even though 1608 * we are delete-duplicating ncluster here it might decide not 1609 * to reallocate the block. Set FORCECOW to force it to. 1610 */ 1611 ncluster = hammer2_cluster_copy(cluster, 1612 HAMMER2_CLUSTER_COPY_NOREF); 1613 hammer2_cluster_lock(ncluster, HAMMER2_RESOLVE_ALWAYS); 1614 hammer2_cluster_set_chainflags(ncluster, 1615 HAMMER2_CHAIN_FORCECOW); 1616 hammer2_cluster_delete_duplicate(trans, ncluster, 1617 HAMMER2_DELDUP_RECORE); 1618 KKASSERT((ncluster->focus->flags & 1619 HAMMER2_CHAIN_DUPLICATED) == 0); 1620 wipdata = &hammer2_cluster_wdata(ncluster)->ipdata; 1621 wipdata->target_type = wipdata->type; 1622 wipdata->type = HAMMER2_OBJTYPE_HARDLINK; 1623 wipdata->uflags = 0; 1624 wipdata->rmajor = 0; 1625 wipdata->rminor = 0; 1626 wipdata->ctime = 0; 1627 wipdata->mtime = 0; 1628 wipdata->atime = 0; 1629 wipdata->btime = 0; 1630 bzero(&wipdata->uid, sizeof(wipdata->uid)); 1631 bzero(&wipdata->gid, sizeof(wipdata->gid)); 1632 wipdata->op_flags = HAMMER2_OPFLAG_DIRECTDATA; 1633 wipdata->cap_flags = 0; 1634 wipdata->mode = 0; 1635 wipdata->size = 0; 1636 wipdata->nlinks = 1; 1637 wipdata->iparent = 0; /* XXX */ 1638 wipdata->pfs_type = 0; 1639 wipdata->pfs_inum = 0; 1640 bzero(&wipdata->pfs_clid, sizeof(wipdata->pfs_clid)); 1641 bzero(&wipdata->pfs_fsid, sizeof(wipdata->pfs_fsid)); 1642 wipdata->data_quota = 0; 1643 wipdata->data_count = 0; 1644 wipdata->inode_quota = 0; 1645 wipdata->inode_count = 0; 1646 wipdata->attr_tid = 0; 1647 wipdata->dirent_tid = 0; 1648 bzero(&wipdata->u, sizeof(wipdata->u)); 1649 /* XXX transaction ids */ 1650 hammer2_cluster_modsync(ncluster); 1651 } else { 1652 hammer2_cluster_delete(trans, cluster, 0); 1653 ncluster = NULL; 1654 } 1655 ripdata = wipdata; 1656 1657 /* 1658 * cluster represents the hardlink target and is now flagged deleted. 1659 * duplicate it to the parent directory and adjust nlinks. 1660 * 1661 * WARNING! The shiftup() call can cause ncluster to be moved into 1662 * an indirect block, and our ncluster will wind up pointing 1663 * to the older/original version. 1664 */ 1665 KKASSERT(cluster->focus->flags & HAMMER2_CHAIN_DELETED); 1666 hammer2_hardlink_shiftup(trans, cluster, cdip, cdcluster, 1667 nlinks, &error); 1668 1669 if (error == 0) 1670 hammer2_inode_repoint(ip, cdip, cluster); 1671 1672 /* 1673 * Unlock and destroy ncluster. 1674 * Return the shifted cluster in *clusterp. 1675 */ 1676 if (ncluster) 1677 hammer2_cluster_unlock(ncluster); 1678 1679 done: 1680 /* 1681 * Cleanup, cluster/ncluster already dealt with. 1682 */ 1683 *clusterp = cluster; 1684 hammer2_inode_drop(cdip); 1685 1686 return (error); 1687 } 1688 1689 /* 1690 * If (*ochainp) is non-NULL it points to the forward OBJTYPE_HARDLINK 1691 * inode while (*chainp) points to the resolved (hidden hardlink 1692 * target) inode. In this situation when nlinks is 1 we wish to 1693 * deconsolidate the hardlink, moving it back to the directory that now 1694 * represents the only remaining link. 1695 */ 1696 int 1697 hammer2_hardlink_deconsolidate(hammer2_trans_t *trans, 1698 hammer2_inode_t *dip, 1699 hammer2_chain_t **chainp, 1700 hammer2_chain_t **ochainp) 1701 { 1702 if (*ochainp == NULL) 1703 return (0); 1704 /* XXX */ 1705 return (0); 1706 } 1707 1708 /* 1709 * The caller presents a locked *chainp pointing to a HAMMER2_BREF_TYPE_INODE 1710 * with an obj_type of HAMMER2_OBJTYPE_HARDLINK. This routine will gobble 1711 * the *chainp and return a new locked *chainp representing the file target 1712 * (the original *chainp will be unlocked). 1713 * 1714 * When a match is found the chain representing the original HARDLINK 1715 * will be returned in *ochainp with a ref, but not locked. 1716 * 1717 * When no match is found *chainp is set to NULL and EIO is returned. 1718 * (*ochainp) will still be set to the original chain with a ref but not 1719 * locked. 1720 */ 1721 int 1722 hammer2_hardlink_find(hammer2_inode_t *dip, hammer2_cluster_t *cluster) 1723 { 1724 const hammer2_inode_data_t *ipdata; 1725 hammer2_cluster_t *cparent; 1726 hammer2_cluster_t *rcluster; 1727 hammer2_inode_t *ip; 1728 hammer2_inode_t *pip; 1729 hammer2_key_t key_dummy; 1730 hammer2_key_t lhc; 1731 int ddflag; 1732 1733 pip = dip; 1734 hammer2_inode_ref(pip); /* for loop */ 1735 1736 /* 1737 * Locate the hardlink. pip is referenced and not locked. 1738 */ 1739 ipdata = &hammer2_cluster_data(cluster)->ipdata; 1740 lhc = ipdata->inum; 1741 1742 /* 1743 * We don't need the cluster's chains, but we need to retain the 1744 * cluster structure itself so we can load the hardlink search 1745 * result into it. 1746 */ 1747 KKASSERT(cluster->refs == 1); 1748 atomic_add_int(&cluster->refs, 1); 1749 hammer2_cluster_unlock(cluster); /* hack */ 1750 cluster->nchains = 0; /* hack */ 1751 1752 rcluster = NULL; 1753 1754 while ((ip = pip) != NULL) { 1755 cparent = hammer2_inode_lock_ex(ip); 1756 hammer2_inode_drop(ip); /* loop */ 1757 KKASSERT(hammer2_cluster_type(cparent) == 1758 HAMMER2_BREF_TYPE_INODE); 1759 rcluster = hammer2_cluster_lookup(cparent, &key_dummy, 1760 lhc, lhc, 0, &ddflag); 1761 hammer2_cluster_lookup_done(cparent); /* discard parent */ 1762 if (rcluster) 1763 break; 1764 pip = ip->pip; /* safe, ip held locked */ 1765 if (pip) 1766 hammer2_inode_ref(pip); /* loop */ 1767 hammer2_inode_unlock_ex(ip, NULL); 1768 } 1769 1770 /* 1771 * chain is locked, ip is locked. Unlock ip, return the locked 1772 * chain. *ipp is already set w/a ref count and not locked. 1773 * 1774 * (cparent is already unlocked). 1775 */ 1776 if (ip) 1777 hammer2_inode_unlock_ex(ip, NULL); 1778 1779 if (rcluster) { 1780 hammer2_cluster_replace(cluster, rcluster); 1781 hammer2_cluster_drop(rcluster); 1782 return (0); 1783 } else { 1784 return (EIO); 1785 } 1786 } 1787 1788 /* 1789 * Find the directory common to both fdip and tdip, hold and return 1790 * its inode. 1791 */ 1792 hammer2_inode_t * 1793 hammer2_inode_common_parent(hammer2_inode_t *fdip, hammer2_inode_t *tdip) 1794 { 1795 hammer2_inode_t *scan1; 1796 hammer2_inode_t *scan2; 1797 1798 /* 1799 * We used to have a depth field but it complicated matters too 1800 * much for directory renames. So now its ugly. Check for 1801 * simple cases before giving up and doing it the expensive way. 1802 * 1803 * XXX need a bottom-up topology stability lock 1804 */ 1805 if (fdip == tdip || fdip == tdip->pip) { 1806 hammer2_inode_ref(fdip); 1807 return(fdip); 1808 } 1809 if (fdip->pip == tdip) { 1810 hammer2_inode_ref(tdip); 1811 return(tdip); 1812 } 1813 1814 /* 1815 * XXX not MPSAFE 1816 */ 1817 for (scan1 = fdip; scan1->pmp == fdip->pmp; scan1 = scan1->pip) { 1818 scan2 = tdip; 1819 while (scan2->pmp == tdip->pmp) { 1820 if (scan1 == scan2) { 1821 hammer2_inode_ref(scan1); 1822 return(scan1); 1823 } 1824 scan2 = scan2->pip; 1825 if (scan2 == NULL) 1826 break; 1827 } 1828 } 1829 panic("hammer2_inode_common_parent: no common parent %p %p\n", 1830 fdip, tdip); 1831 /* NOT REACHED */ 1832 return(NULL); 1833 } 1834 1835 /* 1836 * Synchronize the inode's frontend state with the chain state prior 1837 * to any explicit flush of the inode or any strategy write call. 1838 * 1839 * Called with a locked inode. 1840 */ 1841 void 1842 hammer2_inode_fsync(hammer2_trans_t *trans, hammer2_inode_t *ip, 1843 hammer2_cluster_t *cparent) 1844 { 1845 const hammer2_inode_data_t *ripdata; 1846 hammer2_inode_data_t *wipdata; 1847 hammer2_cluster_t *dparent; 1848 hammer2_cluster_t *cluster; 1849 hammer2_key_t lbase; 1850 hammer2_key_t key_next; 1851 int dosync = 0; 1852 int ddflag; 1853 1854 ripdata = &hammer2_cluster_data(cparent)->ipdata; /* target file */ 1855 1856 if (ip->flags & HAMMER2_INODE_MTIME) { 1857 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0); 1858 atomic_clear_int(&ip->flags, HAMMER2_INODE_MTIME); 1859 wipdata->mtime = ip->mtime; 1860 dosync = 1; 1861 ripdata = wipdata; 1862 } 1863 if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size < ripdata->size) { 1864 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0); 1865 wipdata->size = ip->size; 1866 dosync = 1; 1867 ripdata = wipdata; 1868 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED); 1869 1870 /* 1871 * We must delete any chains beyond the EOF. The chain 1872 * straddling the EOF will be pending in the bioq. 1873 */ 1874 lbase = (ripdata->size + HAMMER2_PBUFMASK64) & 1875 ~HAMMER2_PBUFMASK64; 1876 dparent = hammer2_cluster_lookup_init(&ip->cluster, 0); 1877 cluster = hammer2_cluster_lookup(dparent, &key_next, 1878 lbase, (hammer2_key_t)-1, 1879 HAMMER2_LOOKUP_NODATA, 1880 &ddflag); 1881 while (cluster) { 1882 /* 1883 * Degenerate embedded case, nothing to loop on 1884 */ 1885 switch (hammer2_cluster_type(cluster)) { 1886 case HAMMER2_BREF_TYPE_INODE: 1887 hammer2_cluster_unlock(cluster); 1888 cluster = NULL; 1889 break; 1890 case HAMMER2_BREF_TYPE_DATA: 1891 hammer2_cluster_delete(trans, cluster, 0); 1892 /* fall through */ 1893 default: 1894 cluster = hammer2_cluster_next(dparent, cluster, 1895 &key_next, 1896 key_next, (hammer2_key_t)-1, 1897 HAMMER2_LOOKUP_NODATA); 1898 break; 1899 } 1900 } 1901 hammer2_cluster_lookup_done(dparent); 1902 } else 1903 if ((ip->flags & HAMMER2_INODE_RESIZED) && ip->size > ripdata->size) { 1904 wipdata = hammer2_cluster_modify_ip(trans, ip, cparent, 0); 1905 wipdata->size = ip->size; 1906 atomic_clear_int(&ip->flags, HAMMER2_INODE_RESIZED); 1907 1908 /* 1909 * When resizing larger we may not have any direct-data 1910 * available. 1911 */ 1912 if ((wipdata->op_flags & HAMMER2_OPFLAG_DIRECTDATA) && 1913 ip->size > HAMMER2_EMBEDDED_BYTES) { 1914 wipdata->op_flags &= ~HAMMER2_OPFLAG_DIRECTDATA; 1915 bzero(&wipdata->u.blockset, 1916 sizeof(wipdata->u.blockset)); 1917 } 1918 dosync = 1; 1919 ripdata = wipdata; 1920 } 1921 if (dosync) 1922 hammer2_cluster_modsync(cparent); 1923 } 1924