1 /* 2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * A chain is topologically stable once it has been inserted into the 44 * in-memory topology. Modifications which copy, move, or resize the chain 45 * are handled via the DELETE-DUPLICATE mechanic where the original chain 46 * stays intact but is marked deleted and a new chain is allocated which 47 * shares the old chain's children. 48 * 49 * This sharing is handled via the hammer2_chain_core structure. 50 * 51 * The DELETE-DUPLICATE mechanism allows the same topological level to contain 52 * many overloadings. However, our RBTREE mechanics require that there be 53 * no overlaps so we accomplish the overloading by moving conflicting chains 54 * with smaller or equal radii into a sub-RBTREE under the chain being 55 * overloaded. 56 * 57 * DELETE-DUPLICATE is also used when a modification to a chain crosses a 58 * flush synchronization boundary, allowing the flush code to continue flushing 59 * the older version of the topology and not be disrupted by new frontend 60 * operations. 61 * 62 * LIVE VS FLUSH VIEW 63 * 64 * All lookup and iterate operations and most modifications are done on the 65 * live view. During flushes lookups are not normally done and modifications 66 * may be run on the flush view. However, flushes often needs to allocate 67 * blocks and the freemap_alloc/free code issues lookups. This code is 68 * special cased to use the live view when called from a flush. 69 * 70 * General chain lookup/iteration functions are NOT aware of the flush view, 71 * they only know about live views. 72 */ 73 #include <sys/cdefs.h> 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/types.h> 77 #include <sys/lock.h> 78 #include <sys/kern_syscall.h> 79 #include <sys/uuid.h> 80 81 #include "hammer2.h" 82 83 static int hammer2_indirect_optimize; /* XXX SYSCTL */ 84 85 static hammer2_chain_t *hammer2_chain_create_indirect( 86 hammer2_trans_t *trans, hammer2_chain_t *parent, 87 hammer2_key_t key, int keybits, int for_type, int *errorp); 88 static void hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop); 89 static hammer2_chain_t *hammer2_combined_find( 90 hammer2_chain_t *parent, 91 hammer2_blockref_t *base, int count, 92 int *cache_indexp, hammer2_key_t *key_nextp, 93 hammer2_key_t key_beg, hammer2_key_t key_end, 94 hammer2_blockref_t **bresp); 95 96 /* 97 * Basic RBTree for chains (core->rbtree and core->dbtree). Chains cannot 98 * overlap in the RB trees. Deleted chains are moved from rbtree to either 99 * dbtree or to dbq. 100 * 101 * Chains in delete-duplicate sequences can always iterate through core_entry 102 * to locate the live version of the chain. 103 */ 104 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 105 106 int 107 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 108 { 109 hammer2_key_t c1_beg; 110 hammer2_key_t c1_end; 111 hammer2_key_t c2_beg; 112 hammer2_key_t c2_end; 113 114 /* 115 * Compare chains. Overlaps are not supposed to happen and catch 116 * any software issues early we count overlaps as a match. 117 */ 118 c1_beg = chain1->bref.key; 119 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 120 c2_beg = chain2->bref.key; 121 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 122 123 if (c1_end < c2_beg) /* fully to the left */ 124 return(-1); 125 if (c1_beg > c2_end) /* fully to the right */ 126 return(1); 127 return(0); /* overlap (must not cross edge boundary) */ 128 } 129 130 static __inline 131 int 132 hammer2_isclusterable(hammer2_chain_t *chain) 133 { 134 if (hammer2_cluster_enable) { 135 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 136 chain->bref.type == HAMMER2_BREF_TYPE_INODE || 137 chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 138 return(1); 139 } 140 } 141 return(0); 142 } 143 144 /* 145 * Recursively set update_xhi starting at chain and moving upward. Stop early 146 * if we hit a PFS transition (PFS flush code will have to detect the case 147 * and perform an update within its own transaction). The transaction xid 148 * is only good within the current PFS. 149 * 150 * This controls top-down visibility for flushes. The child has just one 151 * 'above' core, but the core itself can be multi-homed with parents iterated 152 * via core->ownerq. The last parent is the 'live' parent (all others had to 153 * have been delete-duplicated). We always propagate upward through the live 154 * parent. 155 * 156 * This function is not used during a flush (except when the flush is 157 * allocating which requires the live tree). The flush keeps track of its 158 * recursion itself. 159 * 160 * XXX SMP races. For now we do not allow concurrent transactions with 161 * different transaction ids and there should be no race, but if we do 162 * later on there will be a problem. 163 */ 164 void 165 hammer2_chain_setsubmod(hammer2_trans_t *trans, hammer2_chain_t *chain) 166 { 167 hammer2_chain_core_t *above; 168 169 if (chain->update_xhi < trans->sync_xid) 170 chain->update_xhi = trans->sync_xid; 171 172 while ((above = chain->above) != NULL) { 173 spin_lock(&above->cst.spin); 174 chain = TAILQ_LAST(&above->ownerq, h2_core_list); 175 if (chain->update_xhi < trans->sync_xid) 176 chain->update_xhi = trans->sync_xid; 177 spin_unlock(&above->cst.spin); 178 } 179 } 180 181 /* 182 * Allocate a new disconnected chain element representing the specified 183 * bref. chain->refs is set to 1 and the passed bref is copied to 184 * chain->bref. chain->bytes is derived from the bref. 185 * 186 * chain->core is NOT allocated and the media data and bp pointers are left 187 * NULL. The caller must call chain_core_alloc() to allocate or associate 188 * a core with the chain. 189 * 190 * chain->pmp inherits pmp unless the chain is an inode (other than the 191 * super-root inode). 192 * 193 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 194 */ 195 hammer2_chain_t * 196 hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_pfsmount_t *pmp, 197 hammer2_trans_t *trans, hammer2_blockref_t *bref) 198 { 199 hammer2_chain_t *chain; 200 u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 201 202 /* 203 * Construct the appropriate system structure. 204 */ 205 switch(bref->type) { 206 case HAMMER2_BREF_TYPE_INODE: 207 case HAMMER2_BREF_TYPE_INDIRECT: 208 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 209 case HAMMER2_BREF_TYPE_DATA: 210 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 211 /* 212 * Chain's are really only associated with the hmp but we 213 * maintain a pmp association for per-mount memory tracking 214 * purposes. The pmp can be NULL. 215 */ 216 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 217 break; 218 case HAMMER2_BREF_TYPE_VOLUME: 219 case HAMMER2_BREF_TYPE_FREEMAP: 220 chain = NULL; 221 panic("hammer2_chain_alloc volume type illegal for op"); 222 default: 223 chain = NULL; 224 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 225 bref->type); 226 } 227 228 /* 229 * Initialize the new chain structure. 230 */ 231 chain->pmp = pmp; 232 chain->hmp = hmp; 233 chain->bref = *bref; 234 chain->bytes = bytes; 235 chain->refs = 1; 236 chain->flags = HAMMER2_CHAIN_ALLOCATED; 237 chain->delete_xid = HAMMER2_XID_MAX; 238 239 /* 240 * Set the PFS boundary flag if this chain represents a PFS root. 241 */ 242 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 243 chain->flags |= HAMMER2_CHAIN_PFSBOUNDARY; 244 245 /* 246 * Set modify_xid if a transaction is creating the inode. 247 * Enforce update_xlo = 0 so nearby transactions do not think 248 * it has been flushed when it hasn't. 249 * 250 * NOTE: When loading a chain from backing store or creating a 251 * snapshot, trans will be NULL and the caller is responsible 252 * for setting these fields. 253 */ 254 if (trans) { 255 chain->modify_xid = trans->sync_xid; 256 chain->update_xlo = 0; 257 } 258 259 return (chain); 260 } 261 262 /* 263 * Associate an existing core with the chain or allocate a new core. 264 * 265 * The core is not locked. No additional refs on the chain are made. 266 * (trans) must not be NULL if (core) is not NULL. 267 * 268 * When chains are delete-duplicated during flushes we insert nchain on 269 * the ownerq after ochain instead of at the end in order to give the 270 * drop code visibility in the correct order, otherwise drops can be missed. 271 */ 272 void 273 hammer2_chain_core_alloc(hammer2_trans_t *trans, 274 hammer2_chain_t *nchain, hammer2_chain_t *ochain) 275 { 276 hammer2_chain_core_t *core; 277 278 KKASSERT(nchain->core == NULL); 279 280 if (ochain == NULL) { 281 /* 282 * Fresh core under nchain (no multi-homing of ochain's 283 * sub-tree). 284 */ 285 core = kmalloc(sizeof(*core), nchain->hmp->mchain, 286 M_WAITOK | M_ZERO); 287 TAILQ_INIT(&core->ownerq); 288 TAILQ_INIT(&core->dbq); 289 RB_INIT(&core->rbtree); /* live chains */ 290 RB_INIT(&core->dbtree); /* deleted original (bmapped) chains */ 291 core->sharecnt = 1; 292 core->good = 0x1234; 293 nchain->core = core; 294 ccms_cst_init(&core->cst, nchain); 295 TAILQ_INSERT_TAIL(&core->ownerq, nchain, core_entry); 296 } else { 297 /* 298 * Propagate the PFSROOT flag which we set on all subdirs 299 * under the super-root. 300 */ 301 atomic_set_int(&nchain->flags, 302 ochain->flags & HAMMER2_CHAIN_PFSROOT); 303 304 /* 305 * Duplicating ochain -> nchain. Set the DUPLICATED flag on 306 * ochain if nchain is not a snapshot. 307 * 308 * It is possible for the DUPLICATED flag to already be 309 * set when called via a flush operation because flush 310 * operations may have to work on elements with delete_xid's 311 * beyond the flush sync_xid. In this situation we must 312 * ensure that nchain is placed just after ochain in the 313 * ownerq and that the DUPLICATED flag is set on nchain so 314 * 'live' operations skip past it to the correct chain. 315 * 316 * The flusher understands the blockref synchronization state 317 * for any stale chains by observing bref.mirror_tid, which 318 * delete-duplicate replicates. 319 * 320 * WARNING! However, the case is disallowed when the flusher 321 * is allocating freemap space because this entails 322 * more than just adjusting a block table. 323 */ 324 if (ochain->flags & HAMMER2_CHAIN_DUPLICATED) { 325 KKASSERT(trans->flags & HAMMER2_TRANS_ISFLUSH); 326 atomic_set_int(&nchain->flags, 327 HAMMER2_CHAIN_DUPLICATED); 328 } 329 if ((nchain->flags & HAMMER2_CHAIN_SNAPSHOT) == 0) { 330 atomic_set_int(&ochain->flags, 331 HAMMER2_CHAIN_DUPLICATED); 332 } 333 core = ochain->core; 334 atomic_add_int(&core->sharecnt, 1); 335 336 spin_lock(&core->cst.spin); 337 nchain->core = core; 338 339 /* 340 * Maintain ordering for refactor test so we don't skip over 341 * a snapshot. Also, during flushes, delete-duplications 342 * for block-table updates can occur on ochains already 343 * deleted (delete-duplicated by a later transaction), or 344 * on forward-indexed ochains. We must properly insert 345 * nchain relative to ochain. 346 */ 347 if (trans && trans->sync_xid < ochain->modify_xid) { 348 TAILQ_INSERT_BEFORE(ochain, nchain, core_entry); 349 } else { 350 TAILQ_INSERT_AFTER(&core->ownerq, ochain, 351 nchain, core_entry); 352 } 353 spin_unlock(&core->cst.spin); 354 } 355 } 356 357 /* 358 * Add a reference to a chain element, preventing its destruction. 359 */ 360 void 361 hammer2_chain_ref(hammer2_chain_t *chain) 362 { 363 atomic_add_int(&chain->refs, 1); 364 } 365 366 /* 367 * Insert the chain in the core rbtree. 368 * 369 * Normal insertions are placed in the live rbtree. Insertion of a deleted 370 * chain is a special case used by the flush code that is placed on the 371 * unstaged deleted list to avoid confusing the live view. 372 */ 373 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 374 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 375 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 376 377 static 378 int 379 hammer2_chain_insert(hammer2_chain_core_t *above, 380 hammer2_chain_t *ochain, hammer2_chain_t *nchain, 381 int flags, int generation) 382 { 383 hammer2_chain_t *xchain; 384 int error = 0; 385 386 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 387 spin_lock(&above->cst.spin); 388 389 /* 390 * Interlocked by spinlock, check for race 391 */ 392 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 393 above->generation != generation) { 394 error = EAGAIN; 395 goto failed; 396 } 397 398 /* 399 * Insert nchain 400 * 401 * XXX BMAPPED might not be handled correctly for ochain/nchain 402 * ordering in both DELETED cases (flush and non-flush-term), 403 * so delete-duplicate code. 404 */ 405 if (nchain->flags & HAMMER2_CHAIN_DELETED) { 406 if (ochain && (ochain->flags & HAMMER2_CHAIN_BMAPPED)) { 407 if (ochain->flags & HAMMER2_CHAIN_ONDBTREE) { 408 RB_REMOVE(hammer2_chain_tree, 409 &above->dbtree, ochain); 410 atomic_clear_int(&ochain->flags, 411 HAMMER2_CHAIN_ONDBTREE); 412 TAILQ_INSERT_TAIL(&above->dbq, 413 ochain, db_entry); 414 atomic_set_int(&ochain->flags, 415 HAMMER2_CHAIN_ONDBQ); 416 } 417 /* clear BMAPPED (DBTREE, sometimes RBTREE) */ 418 atomic_clear_int(&ochain->flags, HAMMER2_CHAIN_BMAPPED); 419 420 xchain = RB_INSERT(hammer2_chain_tree, 421 &above->dbtree, nchain); 422 KKASSERT(xchain == NULL); 423 atomic_set_int(&nchain->flags, 424 HAMMER2_CHAIN_ONDBTREE | 425 HAMMER2_CHAIN_BMAPPED); 426 } else { 427 TAILQ_INSERT_TAIL(&above->dbq, nchain, db_entry); 428 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_ONDBQ); 429 } 430 } else { 431 xchain = RB_INSERT(hammer2_chain_tree, &above->rbtree, nchain); 432 KASSERT(xchain == NULL, 433 ("hammer2_chain_insert: collision %p", nchain)); 434 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_ONRBTREE); 435 } 436 437 nchain->above = above; 438 ++above->chain_count; 439 ++above->generation; 440 441 /* 442 * We have to keep track of the effective live-view blockref count 443 * so the create code knows when to push an indirect block. 444 */ 445 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 446 atomic_add_int(&above->live_count, 1); 447 failed: 448 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 449 spin_unlock(&above->cst.spin); 450 return error; 451 } 452 453 /* 454 * Drop the caller's reference to the chain. When the ref count drops to 455 * zero this function will try to disassociate the chain from its parent and 456 * deallocate it, then recursely drop the parent using the implied ref 457 * from the chain's chain->parent. 458 */ 459 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 460 struct h2_core_list *delayq); 461 462 void 463 hammer2_chain_drop(hammer2_chain_t *chain) 464 { 465 struct h2_core_list delayq; 466 hammer2_chain_t *scan; 467 u_int refs; 468 u_int need = 0; 469 470 if (hammer2_debug & 0x200000) 471 Debugger("drop"); 472 473 if (chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) 474 ++need; 475 if (chain->flags & HAMMER2_CHAIN_FLUSH_DELETE) 476 ++need; 477 if (chain->flags & HAMMER2_CHAIN_MODIFIED) 478 ++need; 479 KKASSERT(chain->refs > need); 480 481 TAILQ_INIT(&delayq); 482 483 while (chain) { 484 refs = chain->refs; 485 cpu_ccfence(); 486 KKASSERT(refs > 0); 487 488 if (refs == 1) { 489 chain = hammer2_chain_lastdrop(chain, &delayq); 490 } else { 491 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 492 break; 493 /* retry the same chain */ 494 } 495 496 /* 497 * When we've exhausted lastdrop chaining pull off of delayq. 498 * chains on delayq are dead but are used to placehold other 499 * chains which we added a ref to for the purpose of dropping. 500 */ 501 if (chain == NULL) { 502 hammer2_mount_t *hmp; 503 504 if ((scan = TAILQ_FIRST(&delayq)) != NULL) { 505 chain = (void *)scan->data; 506 TAILQ_REMOVE(&delayq, scan, core_entry); 507 scan->flags &= ~HAMMER2_CHAIN_ALLOCATED; 508 hmp = scan->hmp; 509 scan->hmp = NULL; 510 kfree(scan, hmp->mchain); 511 } 512 } 513 } 514 } 515 516 /* 517 * Safe handling of the 1->0 transition on chain. Returns a chain for 518 * recursive drop or NULL, possibly returning the same chain if the atomic 519 * op fails. 520 * 521 * Whem two chains need to be recursively dropped we use the chain 522 * we would otherwise free to placehold the additional chain. It's a bit 523 * convoluted but we can't just recurse without potentially blowing out 524 * the kernel stack. 525 * 526 * The chain cannot be freed if it has a non-empty core (children) or 527 * it is not at the head of ownerq. 528 * 529 * The cst spinlock is allowed nest child-to-parent (not parent-to-child). 530 */ 531 static 532 hammer2_chain_t * 533 hammer2_chain_lastdrop(hammer2_chain_t *chain, struct h2_core_list *delayq) 534 { 535 hammer2_pfsmount_t *pmp; 536 hammer2_mount_t *hmp; 537 hammer2_chain_core_t *above; 538 hammer2_chain_core_t *core; 539 hammer2_chain_t *rdrop1; 540 hammer2_chain_t *rdrop2; 541 542 /* 543 * Spinlock the core and check to see if it is empty. If it is 544 * not empty we leave chain intact with refs == 0. The elements 545 * in core->rbtree are associated with other chains contemporary 546 * with ours but not with our chain directly. 547 */ 548 if ((core = chain->core) != NULL) { 549 spin_lock(&core->cst.spin); 550 551 /* 552 * We can't free non-stale chains with children until we are 553 * able to free the children because there might be a flush 554 * dependency. Flushes of stale children (which should also 555 * have their deleted flag set) short-cut recursive flush 556 * dependencies and can be freed here. Any flushes which run 557 * through stale children due to the flush synchronization 558 * point should have a FLUSH_* bit set in the chain and not 559 * reach lastdrop at this time. 560 * 561 * NOTE: We return (chain) on failure to retry. 562 */ 563 if (core->chain_count && 564 (chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0) { 565 if (atomic_cmpset_int(&chain->refs, 1, 0)) 566 chain = NULL; /* success */ 567 spin_unlock(&core->cst.spin); 568 return(chain); 569 } 570 /* no chains left under us */ 571 572 /* 573 * Various parts of the code might be holding a ref on a 574 * stale chain as a placemarker which must be iterated to 575 * locate a later non-stale (live) chain. We must be sure 576 * NOT to free the later non-stale chain (which might have 577 * no refs). Otherwise mass confusion may result. 578 * 579 * The DUPLICATED flag tells us whether the chain is stale 580 * or not, so the rule is that any chain whos DUPLICATED flag 581 * is NOT set must also be at the head of the ownerq. 582 * 583 * Note that the DELETED flag is not involved. That is, a 584 * live chain can represent a deletion that has not yet been 585 * flushed (or still has refs). 586 */ 587 #if 0 588 if (TAILQ_NEXT(chain, core_entry) == NULL && 589 TAILQ_FIRST(&core->ownerq) != chain) { 590 #endif 591 if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0 && 592 TAILQ_FIRST(&core->ownerq) != chain) { 593 if (atomic_cmpset_int(&chain->refs, 1, 0)) 594 chain = NULL; /* success */ 595 spin_unlock(&core->cst.spin); 596 return(chain); 597 } 598 } 599 600 /* 601 * chain->core has no children left so no accessors can get to our 602 * chain from there. Now we have to lock the above core to interlock 603 * remaining possible accessors that might bump chain's refs before 604 * we can safely drop chain's refs with intent to free the chain. 605 */ 606 hmp = chain->hmp; 607 pmp = chain->pmp; /* can be NULL */ 608 rdrop1 = NULL; 609 rdrop2 = NULL; 610 611 /* 612 * Spinlock the parent and try to drop the last ref on chain. 613 * On success remove chain from its parent, otherwise return NULL. 614 * 615 * (normal core locks are top-down recursive but we define core 616 * spinlocks as bottom-up recursive, so this is safe). 617 */ 618 if ((above = chain->above) != NULL) { 619 spin_lock(&above->cst.spin); 620 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 621 /* 1->0 transition failed */ 622 spin_unlock(&above->cst.spin); 623 if (core) 624 spin_unlock(&core->cst.spin); 625 return(chain); /* retry */ 626 } 627 628 /* 629 * 1->0 transition successful, remove chain from its 630 * above core. 631 */ 632 switch (chain->flags & (HAMMER2_CHAIN_ONRBTREE | 633 HAMMER2_CHAIN_ONDBTREE | 634 HAMMER2_CHAIN_ONDBQ)) { 635 case HAMMER2_CHAIN_ONRBTREE: 636 RB_REMOVE(hammer2_chain_tree, &above->rbtree, chain); 637 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 638 break; 639 case HAMMER2_CHAIN_ONDBTREE: 640 RB_REMOVE(hammer2_chain_tree, &above->dbtree, chain); 641 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONDBTREE); 642 break; 643 case HAMMER2_CHAIN_ONDBQ: 644 TAILQ_REMOVE(&above->dbq, chain, db_entry); 645 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONDBQ); 646 break; 647 default: 648 panic("hammer2_chain_lastdrop: chain %p badflags %08x", 649 chain, chain->flags); 650 break; 651 } 652 653 --above->chain_count; 654 chain->above = NULL; 655 656 /* 657 * If our chain was the last chain in the parent's core the 658 * core is now empty and its parents might now be droppable. 659 * Try to drop the first multi-homed parent by gaining a 660 * ref on it here and then dropping it below. 661 */ 662 if (above->chain_count == 0) { 663 rdrop1 = TAILQ_FIRST(&above->ownerq); 664 if (rdrop1 && 665 atomic_cmpset_int(&rdrop1->refs, 0, 1) == 0) { 666 rdrop1 = NULL; 667 } 668 } 669 spin_unlock(&above->cst.spin); 670 above = NULL; /* safety */ 671 } 672 673 /* 674 * Successful 1->0 transition and the chain can be destroyed now. 675 * 676 * We still have the core spinlock (if core is non-NULL), and core's 677 * chain_count is 0. The above spinlock is gone. 678 * 679 * Remove chain from ownerq. Once core has no more owners (and no 680 * children which is already the case) we can destroy core. 681 * 682 * If core has more owners we may be able to continue a bottom-up 683 * drop with our next sibling. 684 */ 685 if (core) { 686 chain->core = NULL; 687 688 TAILQ_REMOVE(&core->ownerq, chain, core_entry); 689 rdrop2 = TAILQ_FIRST(&core->ownerq); 690 if (rdrop2 && atomic_cmpset_int(&rdrop2->refs, 0, 1) == 0) 691 rdrop2 = NULL; 692 spin_unlock(&core->cst.spin); 693 694 /* 695 * We can do the final 1->0 transition with an atomic op 696 * after releasing core's spinlock. 697 */ 698 if (atomic_fetchadd_int(&core->sharecnt, -1) == 1) { 699 /* 700 * On the 1->0 transition of core we can destroy 701 * it. 702 */ 703 KKASSERT(TAILQ_EMPTY(&core->ownerq)); 704 KKASSERT(RB_EMPTY(&core->rbtree) && 705 RB_EMPTY(&core->dbtree) && 706 TAILQ_EMPTY(&core->dbq) && 707 core->chain_count == 0); 708 KKASSERT(core->cst.count == 0); 709 KKASSERT(core->cst.upgrade == 0); 710 core->good = 0x5678; 711 kfree(core, hmp->mchain); 712 } 713 core = NULL; /* safety */ 714 } 715 716 /* 717 * All spin locks are gone, finish freeing stuff. 718 */ 719 KKASSERT((chain->flags & (HAMMER2_CHAIN_FLUSH_CREATE | 720 HAMMER2_CHAIN_FLUSH_DELETE | 721 HAMMER2_CHAIN_MODIFIED)) == 0); 722 hammer2_chain_drop_data(chain, 1); 723 724 KKASSERT(chain->dio == NULL); 725 726 /* 727 * Once chain resources are gone we can use the now dead chain 728 * structure to placehold what might otherwise require a recursive 729 * drop, because we have potentially two things to drop and can only 730 * return one directly. 731 */ 732 if (rdrop1 && rdrop2) { 733 KKASSERT(chain->flags & HAMMER2_CHAIN_ALLOCATED); 734 chain->data = (void *)rdrop1; 735 TAILQ_INSERT_TAIL(delayq, chain, core_entry); 736 rdrop1 = NULL; 737 } else if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 738 chain->flags &= ~HAMMER2_CHAIN_ALLOCATED; 739 chain->hmp = NULL; 740 kfree(chain, hmp->mchain); 741 } 742 743 /* 744 * Either or both can be NULL. We already handled the case where 745 * both might not have been NULL. 746 */ 747 if (rdrop1) 748 return(rdrop1); 749 else 750 return(rdrop2); 751 } 752 753 /* 754 * On either last lock release or last drop 755 */ 756 static void 757 hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop) 758 { 759 /*hammer2_mount_t *hmp = chain->hmp;*/ 760 761 switch(chain->bref.type) { 762 case HAMMER2_BREF_TYPE_VOLUME: 763 case HAMMER2_BREF_TYPE_FREEMAP: 764 if (lastdrop) 765 chain->data = NULL; 766 break; 767 default: 768 KKASSERT(chain->data == NULL); 769 break; 770 } 771 } 772 773 /* 774 * Ref and lock a chain element, acquiring its data with I/O if necessary, 775 * and specify how you would like the data to be resolved. 776 * 777 * Returns 0 on success or an error code if the data could not be acquired. 778 * The chain element is locked on return regardless of whether an error 779 * occurred or not. 780 * 781 * The lock is allowed to recurse, multiple locking ops will aggregate 782 * the requested resolve types. Once data is assigned it will not be 783 * removed until the last unlock. 784 * 785 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 786 * (typically used to avoid device/logical buffer 787 * aliasing for data) 788 * 789 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 790 * the INITIAL-create state (indirect blocks only). 791 * 792 * Do not resolve data elements for DATA chains. 793 * (typically used to avoid device/logical buffer 794 * aliasing for data) 795 * 796 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 797 * 798 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 799 * it will be locked exclusive. 800 * 801 * NOTE: Embedded elements (volume header, inodes) are always resolved 802 * regardless. 803 * 804 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 805 * element will instantiate and zero its buffer, and flush it on 806 * release. 807 * 808 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 809 * so as not to instantiate a device buffer, which could alias against 810 * a logical file buffer. However, if ALWAYS is specified the 811 * device buffer will be instantiated anyway. 812 * 813 * WARNING! If data must be fetched a shared lock will temporarily be 814 * upgraded to exclusive. However, a deadlock can occur if 815 * the caller owns more than one shared lock. 816 */ 817 int 818 hammer2_chain_lock(hammer2_chain_t *chain, int how) 819 { 820 hammer2_mount_t *hmp; 821 hammer2_chain_core_t *core; 822 hammer2_blockref_t *bref; 823 ccms_state_t ostate; 824 char *bdata; 825 int error; 826 827 /* 828 * Ref and lock the element. Recursive locks are allowed. 829 */ 830 if ((how & HAMMER2_RESOLVE_NOREF) == 0) 831 hammer2_chain_ref(chain); 832 atomic_add_int(&chain->lockcnt, 1); 833 834 hmp = chain->hmp; 835 KKASSERT(hmp != NULL); 836 837 /* 838 * Get the appropriate lock. 839 */ 840 core = chain->core; 841 if (how & HAMMER2_RESOLVE_SHARED) 842 ccms_thread_lock(&core->cst, CCMS_STATE_SHARED); 843 else 844 ccms_thread_lock(&core->cst, CCMS_STATE_EXCLUSIVE); 845 846 /* 847 * If we already have a valid data pointer no further action is 848 * necessary. 849 */ 850 if (chain->data) 851 return (0); 852 853 /* 854 * Do we have to resolve the data? 855 */ 856 switch(how & HAMMER2_RESOLVE_MASK) { 857 case HAMMER2_RESOLVE_NEVER: 858 return(0); 859 case HAMMER2_RESOLVE_MAYBE: 860 if (chain->flags & HAMMER2_CHAIN_INITIAL) 861 return(0); 862 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 863 return(0); 864 #if 0 865 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 866 return(0); 867 #endif 868 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 869 return(0); 870 /* fall through */ 871 case HAMMER2_RESOLVE_ALWAYS: 872 break; 873 } 874 875 /* 876 * Upgrade to an exclusive lock so we can safely manipulate the 877 * buffer cache. If another thread got to it before us we 878 * can just return. 879 */ 880 ostate = ccms_thread_lock_upgrade(&core->cst); 881 if (chain->data) { 882 ccms_thread_lock_downgrade(&core->cst, ostate); 883 return (0); 884 } 885 886 /* 887 * We must resolve to a device buffer, either by issuing I/O or 888 * by creating a zero-fill element. We do not mark the buffer 889 * dirty when creating a zero-fill element (the hammer2_chain_modify() 890 * API must still be used to do that). 891 * 892 * The device buffer is variable-sized in powers of 2 down 893 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 894 * chunk always contains buffers of the same size. (XXX) 895 * 896 * The minimum physical IO size may be larger than the variable 897 * block size. 898 */ 899 bref = &chain->bref; 900 901 /* 902 * The getblk() optimization can only be used on newly created 903 * elements if the physical block size matches the request. 904 */ 905 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 906 error = hammer2_io_new(hmp, bref->data_off, chain->bytes, 907 &chain->dio); 908 } else { 909 error = hammer2_io_bread(hmp, bref->data_off, chain->bytes, 910 &chain->dio); 911 hammer2_adjreadcounter(&chain->bref, chain->bytes); 912 } 913 914 if (error) { 915 kprintf("hammer2_chain_lock: I/O error %016jx: %d\n", 916 (intmax_t)bref->data_off, error); 917 hammer2_io_bqrelse(&chain->dio); 918 ccms_thread_lock_downgrade(&core->cst, ostate); 919 return (error); 920 } 921 922 #if 0 923 /* 924 * No need for this, always require that hammer2_chain_modify() 925 * be called before any modifying operations. 926 */ 927 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) && 928 !hammer2_io_isdirty(chain->dio)) { 929 hammer2_io_setdirty(chain->dio); 930 } 931 #endif 932 933 /* 934 * We can clear the INITIAL state now, we've resolved the buffer 935 * to zeros and marked it dirty with hammer2_io_new(). 936 */ 937 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 938 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 939 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 940 } 941 942 /* 943 * Setup the data pointer, either pointing it to an embedded data 944 * structure and copying the data from the buffer, or pointing it 945 * into the buffer. 946 * 947 * The buffer is not retained when copying to an embedded data 948 * structure in order to avoid potential deadlocks or recursions 949 * on the same physical buffer. 950 */ 951 switch (bref->type) { 952 case HAMMER2_BREF_TYPE_VOLUME: 953 case HAMMER2_BREF_TYPE_FREEMAP: 954 /* 955 * Copy data from bp to embedded buffer 956 */ 957 panic("hammer2_chain_lock: called on unresolved volume header"); 958 break; 959 case HAMMER2_BREF_TYPE_INODE: 960 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 961 case HAMMER2_BREF_TYPE_INDIRECT: 962 case HAMMER2_BREF_TYPE_DATA: 963 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 964 default: 965 /* 966 * Point data at the device buffer and leave dio intact. 967 */ 968 chain->data = (void *)bdata; 969 break; 970 } 971 ccms_thread_lock_downgrade(&core->cst, ostate); 972 return (0); 973 } 974 975 /* 976 * This basically calls hammer2_io_breadcb() but does some pre-processing 977 * of the chain first to handle certain cases. 978 */ 979 void 980 hammer2_chain_load_async(hammer2_cluster_t *cluster, 981 void (*callback)(hammer2_io_t *dio, 982 hammer2_cluster_t *cluster, 983 hammer2_chain_t *chain, 984 void *arg_p, off_t arg_o), 985 void *arg_p) 986 { 987 hammer2_chain_t *chain; 988 hammer2_mount_t *hmp; 989 struct hammer2_io *dio; 990 hammer2_blockref_t *bref; 991 int error; 992 int i; 993 994 /* 995 * If no chain specified see if any chain data is available and use 996 * that, otherwise begin an I/O iteration using the first chain. 997 */ 998 chain = NULL; 999 for (i = 0; i < cluster->nchains; ++i) { 1000 chain = cluster->array[i]; 1001 if (chain && chain->data) 1002 break; 1003 } 1004 if (i == cluster->nchains) { 1005 chain = cluster->array[0]; 1006 i = 0; 1007 } 1008 1009 if (chain->data) { 1010 callback(NULL, cluster, chain, arg_p, (off_t)i); 1011 return; 1012 } 1013 1014 /* 1015 * We must resolve to a device buffer, either by issuing I/O or 1016 * by creating a zero-fill element. We do not mark the buffer 1017 * dirty when creating a zero-fill element (the hammer2_chain_modify() 1018 * API must still be used to do that). 1019 * 1020 * The device buffer is variable-sized in powers of 2 down 1021 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 1022 * chunk always contains buffers of the same size. (XXX) 1023 * 1024 * The minimum physical IO size may be larger than the variable 1025 * block size. 1026 */ 1027 bref = &chain->bref; 1028 hmp = chain->hmp; 1029 1030 /* 1031 * The getblk() optimization can only be used on newly created 1032 * elements if the physical block size matches the request. 1033 */ 1034 if ((chain->flags & HAMMER2_CHAIN_INITIAL) && 1035 chain->bytes == hammer2_devblksize(chain->bytes)) { 1036 error = hammer2_io_new(hmp, bref->data_off, chain->bytes, &dio); 1037 KKASSERT(error == 0); 1038 callback(dio, cluster, chain, arg_p, (off_t)i); 1039 return; 1040 } 1041 1042 /* 1043 * Otherwise issue a read 1044 */ 1045 hammer2_adjreadcounter(&chain->bref, chain->bytes); 1046 hammer2_io_breadcb(hmp, bref->data_off, chain->bytes, 1047 callback, cluster, chain, arg_p, (off_t)i); 1048 } 1049 1050 /* 1051 * Unlock and deref a chain element. 1052 * 1053 * On the last lock release any non-embedded data (chain->dio) will be 1054 * retired. 1055 */ 1056 void 1057 hammer2_chain_unlock(hammer2_chain_t *chain) 1058 { 1059 hammer2_chain_core_t *core = chain->core; 1060 ccms_state_t ostate; 1061 long *counterp; 1062 u_int lockcnt; 1063 1064 /* 1065 * The core->cst lock can be shared across several chains so we 1066 * need to track the per-chain lockcnt separately. 1067 * 1068 * If multiple locks are present (or being attempted) on this 1069 * particular chain we can just unlock, drop refs, and return. 1070 * 1071 * Otherwise fall-through on the 1->0 transition. 1072 */ 1073 for (;;) { 1074 lockcnt = chain->lockcnt; 1075 KKASSERT(lockcnt > 0); 1076 cpu_ccfence(); 1077 if (lockcnt > 1) { 1078 if (atomic_cmpset_int(&chain->lockcnt, 1079 lockcnt, lockcnt - 1)) { 1080 ccms_thread_unlock(&core->cst); 1081 hammer2_chain_drop(chain); 1082 return; 1083 } 1084 } else { 1085 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1086 break; 1087 } 1088 /* retry */ 1089 } 1090 1091 /* 1092 * On the 1->0 transition we upgrade the core lock (if necessary) 1093 * to exclusive for terminal processing. If after upgrading we find 1094 * that lockcnt is non-zero, another thread is racing us and will 1095 * handle the unload for us later on, so just cleanup and return 1096 * leaving the data/io intact 1097 * 1098 * Otherwise if lockcnt is still 0 it is possible for it to become 1099 * non-zero and race, but since we hold the core->cst lock 1100 * exclusively all that will happen is that the chain will be 1101 * reloaded after we unload it. 1102 */ 1103 ostate = ccms_thread_lock_upgrade(&core->cst); 1104 if (chain->lockcnt) { 1105 ccms_thread_unlock_upgraded(&core->cst, ostate); 1106 hammer2_chain_drop(chain); 1107 return; 1108 } 1109 1110 /* 1111 * Shortcut the case if the data is embedded or not resolved. 1112 * 1113 * Do NOT NULL out chain->data (e.g. inode data), it might be 1114 * dirty. 1115 */ 1116 if (chain->dio == NULL) { 1117 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) 1118 hammer2_chain_drop_data(chain, 0); 1119 ccms_thread_unlock_upgraded(&core->cst, ostate); 1120 hammer2_chain_drop(chain); 1121 return; 1122 } 1123 1124 /* 1125 * Statistics 1126 */ 1127 if (hammer2_io_isdirty(chain->dio) == 0) { 1128 ; 1129 } else if (chain->flags & HAMMER2_CHAIN_IOFLUSH) { 1130 switch(chain->bref.type) { 1131 case HAMMER2_BREF_TYPE_DATA: 1132 counterp = &hammer2_ioa_file_write; 1133 break; 1134 case HAMMER2_BREF_TYPE_INODE: 1135 counterp = &hammer2_ioa_meta_write; 1136 break; 1137 case HAMMER2_BREF_TYPE_INDIRECT: 1138 counterp = &hammer2_ioa_indr_write; 1139 break; 1140 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1141 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1142 counterp = &hammer2_ioa_fmap_write; 1143 break; 1144 default: 1145 counterp = &hammer2_ioa_volu_write; 1146 break; 1147 } 1148 *counterp += chain->bytes; 1149 } else { 1150 switch(chain->bref.type) { 1151 case HAMMER2_BREF_TYPE_DATA: 1152 counterp = &hammer2_iod_file_write; 1153 break; 1154 case HAMMER2_BREF_TYPE_INODE: 1155 counterp = &hammer2_iod_meta_write; 1156 break; 1157 case HAMMER2_BREF_TYPE_INDIRECT: 1158 counterp = &hammer2_iod_indr_write; 1159 break; 1160 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1161 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1162 counterp = &hammer2_iod_fmap_write; 1163 break; 1164 default: 1165 counterp = &hammer2_iod_volu_write; 1166 break; 1167 } 1168 *counterp += chain->bytes; 1169 } 1170 1171 /* 1172 * Clean out the dio. 1173 * 1174 * If a device buffer was used for data be sure to destroy the 1175 * buffer when we are done to avoid aliases (XXX what about the 1176 * underlying VM pages?). 1177 * 1178 * NOTE: Freemap leaf's use reserved blocks and thus no aliasing 1179 * is possible. 1180 * 1181 * NOTE: The isdirty check tracks whether we have to bdwrite() the 1182 * buffer or not. The buffer might already be dirty. The 1183 * flag is re-set when chain_modify() is called, even if 1184 * MODIFIED is already set, allowing the OS to retire the 1185 * buffer independent of a hammer2 flush. 1186 */ 1187 chain->data = NULL; 1188 if ((chain->flags & HAMMER2_CHAIN_IOFLUSH) && 1189 hammer2_io_isdirty(chain->dio)) { 1190 hammer2_io_bawrite(&chain->dio); 1191 } else { 1192 hammer2_io_bqrelse(&chain->dio); 1193 } 1194 ccms_thread_unlock_upgraded(&core->cst, ostate); 1195 hammer2_chain_drop(chain); 1196 } 1197 1198 /* 1199 * This counts the number of live blockrefs in a block array and 1200 * also calculates the point at which all remaining blockrefs are empty. 1201 * This routine can only be called on a live chain (DUPLICATED flag not set). 1202 * 1203 * NOTE: Flag is not set until after the count is complete, allowing 1204 * callers to test the flag without holding the spinlock. 1205 * 1206 * NOTE: If base is NULL the related chain is still in the INITIAL 1207 * state and there are no blockrefs to count. 1208 * 1209 * NOTE: live_count may already have some counts accumulated due to 1210 * creation and deletion and could even be initially negative. 1211 */ 1212 void 1213 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1214 hammer2_blockref_t *base, int count) 1215 { 1216 hammer2_chain_core_t *core = chain->core; 1217 1218 KKASSERT((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0); 1219 1220 spin_lock(&core->cst.spin); 1221 if ((core->flags & HAMMER2_CORE_COUNTEDBREFS) == 0) { 1222 if (base) { 1223 while (--count >= 0) { 1224 if (base[count].type) 1225 break; 1226 } 1227 core->live_zero = count + 1; 1228 while (count >= 0) { 1229 if (base[count].type) 1230 atomic_add_int(&core->live_count, 1); 1231 --count; 1232 } 1233 } else { 1234 core->live_zero = 0; 1235 } 1236 /* else do not modify live_count */ 1237 atomic_set_int(&core->flags, HAMMER2_CORE_COUNTEDBREFS); 1238 } 1239 spin_unlock(&core->cst.spin); 1240 } 1241 1242 /* 1243 * Resize the chain's physical storage allocation in-place. This may 1244 * replace the passed-in chain with a new chain. 1245 * 1246 * Chains can be resized smaller without reallocating the storage. 1247 * Resizing larger will reallocate the storage. 1248 * 1249 * Must be passed an exclusively locked parent and chain, returns a new 1250 * exclusively locked chain at the same index and unlocks the old chain. 1251 * Flushes the buffer if necessary. 1252 * 1253 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1254 * to avoid instantiating a device buffer that conflicts with the vnode 1255 * data buffer. That is, the passed-in bp is a logical buffer, whereas 1256 * any chain-oriented bp would be a device buffer. 1257 * 1258 * XXX return error if cannot resize. 1259 */ 1260 void 1261 hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, 1262 hammer2_chain_t *parent, hammer2_chain_t **chainp, 1263 int nradix, int flags) 1264 { 1265 hammer2_mount_t *hmp; 1266 hammer2_chain_t *chain; 1267 size_t obytes; 1268 size_t nbytes; 1269 1270 chain = *chainp; 1271 hmp = chain->hmp; 1272 1273 /* 1274 * Only data and indirect blocks can be resized for now. 1275 * (The volu root, inodes, and freemap elements use a fixed size). 1276 */ 1277 KKASSERT(chain != &hmp->vchain); 1278 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1279 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT); 1280 1281 /* 1282 * Nothing to do if the element is already the proper size 1283 */ 1284 obytes = chain->bytes; 1285 nbytes = 1U << nradix; 1286 if (obytes == nbytes) 1287 return; 1288 1289 /* 1290 * Delete the old chain and duplicate it at the same (parent, index), 1291 * returning a new chain. This allows the old chain to still be 1292 * used by the flush code. The new chain will be returned in a 1293 * modified state. 1294 * 1295 * The parent does not have to be locked for the delete/duplicate call, 1296 * but is in this particular code path. 1297 * 1298 * NOTE: If we are not crossing a synchronization point the 1299 * duplication code will simply reuse the existing chain 1300 * structure. 1301 */ 1302 hammer2_chain_delete_duplicate(trans, &chain, 0); 1303 1304 /* 1305 * Relocate the block, even if making it smaller (because different 1306 * block sizes may be in different regions). 1307 * 1308 * (data blocks only, we aren't copying the storage here). 1309 */ 1310 hammer2_freemap_alloc(trans, chain, nbytes); 1311 chain->bytes = nbytes; 1312 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_FORCECOW); 1313 /*ip->delta_dcount += (ssize_t)(nbytes - obytes);*/ /* XXX atomic */ 1314 1315 /* 1316 * For now just support it on DATA chains (and not on indirect 1317 * blocks). 1318 */ 1319 KKASSERT(chain->dio == NULL); 1320 1321 *chainp = chain; 1322 } 1323 1324 #if 0 1325 1326 /* 1327 * REMOVED - see cluster code 1328 * 1329 * Set a chain modified, making it read-write and duplicating it if necessary. 1330 * This function will assign a new physical block to the chain if necessary 1331 * 1332 * Duplication of already-modified chains is possible when the modification 1333 * crosses a flush synchronization boundary. 1334 * 1335 * Non-data blocks - The chain should be locked to at least the RESOLVE_MAYBE 1336 * level or the COW operation will not work. 1337 * 1338 * Data blocks - The chain is usually locked RESOLVE_NEVER so as not to 1339 * run the data through the device buffers. 1340 * 1341 * This function may return a different chain than was passed, in which case 1342 * the old chain will be unlocked and the new chain will be locked. 1343 * 1344 * ip->chain may be adjusted by hammer2_chain_modify_ip(). 1345 */ 1346 hammer2_inode_data_t * 1347 hammer2_chain_modify_ip(hammer2_trans_t *trans, hammer2_inode_t *ip, 1348 hammer2_chain_t **chainp, int flags) 1349 { 1350 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); 1351 hammer2_chain_modify(trans, chainp, flags); 1352 if (ip->chain != *chainp) 1353 hammer2_inode_repoint(ip, NULL, *chainp); 1354 if (ip->vp) 1355 vsetisdirty(ip->vp); 1356 return(&ip->chain->data->ipdata); 1357 } 1358 1359 #endif 1360 1361 void 1362 hammer2_chain_modify(hammer2_trans_t *trans, hammer2_chain_t **chainp, 1363 int flags) 1364 { 1365 hammer2_mount_t *hmp; 1366 hammer2_chain_t *chain; 1367 hammer2_io_t *dio; 1368 int error; 1369 int wasinitial; 1370 char *bdata; 1371 1372 chain = *chainp; 1373 hmp = chain->hmp; 1374 1375 /* 1376 * data is not optional for freemap chains (we must always be sure 1377 * to copy the data on COW storage allocations). 1378 */ 1379 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1380 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1381 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1382 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1383 } 1384 1385 /* 1386 * Determine if a delete-duplicate is needed. 1387 * 1388 * (a) Modify_tid is part of a prior flush 1389 * (b) Transaction is concurrent with a flush (has higher tid) 1390 * (c) and chain is not in the initial state (freshly created) 1391 * (d) and caller didn't request an in-place modification. 1392 * 1393 * The freemap and volume header special chains are never D-Dd. 1394 */ 1395 if (chain->modify_xid != trans->sync_xid && /* cross boundary */ 1396 (flags & HAMMER2_MODIFY_INPLACE) == 0) { /* from d-d */ 1397 if (chain != &hmp->fchain && chain != &hmp->vchain) { 1398 KKASSERT((flags & HAMMER2_MODIFY_ASSERTNOCOPY) == 0); 1399 hammer2_chain_delete_duplicate(trans, chainp, 0); 1400 chain = *chainp; 1401 } 1402 } 1403 1404 /* 1405 * Data must be resolved if already assigned unless explicitly 1406 * flagged otherwise. 1407 */ 1408 if (chain->data == NULL && (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1409 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1410 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 1411 hammer2_chain_unlock(chain); 1412 } 1413 1414 /* 1415 * Otherwise do initial-chain handling. Set MODIFIED to indicate 1416 * that the chain has been modified. Set FLUSH_CREATE to flush 1417 * the new blockref (the D-D set FLUSH_DELETE on the old chain to 1418 * delete the old blockref). 1419 */ 1420 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1421 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1422 hammer2_chain_ref(chain); 1423 hammer2_pfs_memory_inc(chain->pmp); 1424 } 1425 if ((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0) { 1426 atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_CREATE); 1427 hammer2_chain_ref(chain); 1428 } 1429 1430 /* 1431 * The modification or re-modification requires an allocation and 1432 * possible COW. 1433 * 1434 * We normally always allocate new storage here. If storage exists 1435 * and MODIFY_NOREALLOC is passed in, we do not allocate new storage. 1436 */ 1437 if (chain != &hmp->vchain && chain != &hmp->fchain) { 1438 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1439 ((flags & HAMMER2_MODIFY_NOREALLOC) == 0 && 1440 chain->modify_xid != trans->sync_xid) 1441 ) { 1442 hammer2_freemap_alloc(trans, chain, chain->bytes); 1443 /* XXX failed allocation */ 1444 } else if (chain->flags & HAMMER2_CHAIN_FORCECOW) { 1445 hammer2_freemap_alloc(trans, chain, chain->bytes); 1446 /* XXX failed allocation */ 1447 } 1448 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_FORCECOW); 1449 } 1450 1451 /* 1452 * Update modify_xid. XXX special-case vchain/fchain because they 1453 * are always modified in-place. Otherwise the chain being modified 1454 * must not be part of a future transaction. 1455 */ 1456 if (chain == &hmp->vchain || chain == &hmp->fchain) { 1457 if (chain->modify_xid <= trans->sync_xid) 1458 chain->modify_xid = trans->sync_xid; 1459 } else { 1460 KKASSERT(chain->modify_xid <= trans->sync_xid); 1461 chain->modify_xid = trans->sync_xid; 1462 } 1463 1464 /* 1465 * Do not COW BREF_TYPE_DATA when OPTDATA is set. This is because 1466 * data modifications are done via the logical buffer cache so COWing 1467 * it here would result in unnecessary extra copies (and possibly extra 1468 * block reallocations). The INITIAL flag remains unchanged in this 1469 * situation. 1470 * 1471 * (This is a bit of a hack). 1472 */ 1473 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1474 (flags & HAMMER2_MODIFY_OPTDATA)) { 1475 goto skip2; 1476 } 1477 1478 /* 1479 * Clearing the INITIAL flag (for indirect blocks) indicates that 1480 * we've processed the uninitialized storage allocation. 1481 * 1482 * If this flag is already clear we are likely in a copy-on-write 1483 * situation but we have to be sure NOT to bzero the storage if 1484 * no data is present. 1485 */ 1486 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1487 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1488 wasinitial = 1; 1489 } else { 1490 wasinitial = 0; 1491 } 1492 1493 /* 1494 * Instantiate data buffer and possibly execute COW operation 1495 */ 1496 switch(chain->bref.type) { 1497 case HAMMER2_BREF_TYPE_VOLUME: 1498 case HAMMER2_BREF_TYPE_FREEMAP: 1499 /* 1500 * The data is embedded, no copy-on-write operation is 1501 * needed. 1502 */ 1503 KKASSERT(chain->dio == NULL); 1504 break; 1505 case HAMMER2_BREF_TYPE_INODE: 1506 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1507 case HAMMER2_BREF_TYPE_DATA: 1508 case HAMMER2_BREF_TYPE_INDIRECT: 1509 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1510 /* 1511 * Perform the copy-on-write operation 1512 * 1513 * zero-fill or copy-on-write depending on whether 1514 * chain->data exists or not and set the dirty state for 1515 * the new buffer. hammer2_io_new() will handle the 1516 * zero-fill. 1517 */ 1518 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1519 1520 if (wasinitial) { 1521 error = hammer2_io_new(hmp, chain->bref.data_off, 1522 chain->bytes, &dio); 1523 } else { 1524 error = hammer2_io_bread(hmp, chain->bref.data_off, 1525 chain->bytes, &dio); 1526 } 1527 hammer2_adjreadcounter(&chain->bref, chain->bytes); 1528 KKASSERT(error == 0); 1529 1530 bdata = hammer2_io_data(dio, chain->bref.data_off); 1531 1532 if (chain->data) { 1533 KKASSERT(chain->dio != NULL); 1534 if (chain->data != (void *)bdata) { 1535 bcopy(chain->data, bdata, chain->bytes); 1536 } 1537 } else if (wasinitial == 0) { 1538 /* 1539 * We have a problem. We were asked to COW but 1540 * we don't have any data to COW with! 1541 */ 1542 panic("hammer2_chain_modify: having a COW %p\n", 1543 chain); 1544 } 1545 1546 /* 1547 * Retire the old buffer, replace with the new 1548 */ 1549 if (chain->dio) 1550 hammer2_io_brelse(&chain->dio); 1551 chain->data = (void *)bdata; 1552 chain->dio = dio; 1553 hammer2_io_setdirty(dio); /* modified by bcopy above */ 1554 break; 1555 default: 1556 panic("hammer2_chain_modify: illegal non-embedded type %d", 1557 chain->bref.type); 1558 break; 1559 1560 } 1561 skip2: 1562 hammer2_chain_setsubmod(trans, chain); 1563 } 1564 1565 /* 1566 * Volume header data locks 1567 */ 1568 void 1569 hammer2_voldata_lock(hammer2_mount_t *hmp) 1570 { 1571 lockmgr(&hmp->vollk, LK_EXCLUSIVE); 1572 } 1573 1574 void 1575 hammer2_voldata_unlock(hammer2_mount_t *hmp) 1576 { 1577 lockmgr(&hmp->vollk, LK_RELEASE); 1578 } 1579 1580 void 1581 hammer2_voldata_modify(hammer2_mount_t *hmp) 1582 { 1583 if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1584 atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED); 1585 hammer2_chain_ref(&hmp->vchain); 1586 } 1587 } 1588 1589 /* 1590 * This function returns the chain at the nearest key within the specified 1591 * range with the highest delete_xid. The core spinlock must be held on 1592 * call and the returned chain will be referenced but not locked. 1593 * 1594 * The returned chain may or may not be in a deleted state. Note that 1595 * live chains have a delete_xid = XID_MAX. 1596 * 1597 * This function will recurse through chain->rbtree as necessary and will 1598 * return a *key_nextp suitable for iteration. *key_nextp is only set if 1599 * the iteration value is less than the current value of *key_nextp. 1600 * 1601 * The caller should use (*key_nextp) to calculate the actual range of 1602 * the returned element, which will be (key_beg to *key_nextp - 1), because 1603 * there might be another element which is superior to the returned element 1604 * and overlaps it. 1605 * 1606 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 1607 * chains continue to be returned. On EOF (*key_nextp) may overflow since 1608 * it will wind up being (key_end + 1). 1609 */ 1610 struct hammer2_chain_find_info { 1611 hammer2_chain_t *best; 1612 hammer2_key_t key_beg; 1613 hammer2_key_t key_end; 1614 hammer2_key_t key_next; 1615 }; 1616 1617 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 1618 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 1619 1620 static 1621 hammer2_chain_t * 1622 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 1623 hammer2_key_t key_beg, hammer2_key_t key_end) 1624 { 1625 struct hammer2_chain_find_info info; 1626 1627 info.best = NULL; 1628 info.key_beg = key_beg; 1629 info.key_end = key_end; 1630 info.key_next = *key_nextp; 1631 1632 KKASSERT(parent->core->good == 0x1234); 1633 RB_SCAN(hammer2_chain_tree, &parent->core->rbtree, 1634 hammer2_chain_find_cmp, hammer2_chain_find_callback, 1635 &info); 1636 *key_nextp = info.key_next; 1637 #if 0 1638 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 1639 parent, key_beg, key_end, *key_nextp); 1640 #endif 1641 1642 return (info.best); 1643 } 1644 1645 /* 1646 * Find a deleted chain covering a block table entry. Be careful to deal 1647 * with the race condition where the block table has been updated but the 1648 * chain has not yet been removed from dbtree (due to multiple parents having 1649 * to be updated). 1650 */ 1651 static 1652 hammer2_chain_t * 1653 hammer2_chain_find_deleted(hammer2_chain_t *parent, 1654 hammer2_key_t key_beg, hammer2_key_t key_end) 1655 { 1656 struct hammer2_chain_find_info info; 1657 hammer2_chain_t *child; 1658 1659 info.best = NULL; 1660 info.key_beg = key_beg; 1661 info.key_end = key_end; 1662 info.key_next = 0; 1663 1664 KKASSERT(parent->core->good == 0x1234); 1665 RB_SCAN(hammer2_chain_tree, &parent->core->dbtree, 1666 hammer2_chain_find_cmp, hammer2_chain_find_callback, 1667 &info); 1668 if ((child = info.best) != NULL) { 1669 if (child->delete_xid <= parent->update_xlo) 1670 child = NULL; 1671 } 1672 return child; 1673 } 1674 1675 static 1676 int 1677 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 1678 { 1679 struct hammer2_chain_find_info *info = data; 1680 hammer2_key_t child_beg; 1681 hammer2_key_t child_end; 1682 1683 child_beg = child->bref.key; 1684 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 1685 1686 if (child_end < info->key_beg) 1687 return(-1); 1688 if (child_beg > info->key_end) 1689 return(1); 1690 return(0); 1691 } 1692 1693 static 1694 int 1695 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 1696 { 1697 struct hammer2_chain_find_info *info = data; 1698 hammer2_chain_t *best; 1699 hammer2_key_t child_end; 1700 1701 /* 1702 * WARNING! Do not discard DUPLICATED chains, it is possible that 1703 * we are catching an insertion half-way done. If a 1704 * duplicated chain turns out to be the best choice the 1705 * caller will re-check its flags after locking it. 1706 * 1707 * WARNING! Layerq is scanned forwards, exact matches should keep 1708 * the existing info->best. 1709 */ 1710 if ((best = info->best) == NULL) { 1711 /* 1712 * No previous best. Assign best 1713 */ 1714 info->best = child; 1715 } else if (best->bref.key <= info->key_beg && 1716 child->bref.key <= info->key_beg) { 1717 /* 1718 * If our current best is flush with key_beg and child is 1719 * also flush with key_beg choose based on delete_xid. 1720 * 1721 * key_next will automatically be limited to the smaller of 1722 * the two end-points. 1723 */ 1724 if (child->delete_xid > best->delete_xid) 1725 info->best = child; 1726 } else if (child->bref.key < best->bref.key) { 1727 /* 1728 * Child has a nearer key and best is not flush with key_beg. 1729 * Truncate key_next to the old best key iff it had a better 1730 * delete_xid. 1731 */ 1732 info->best = child; 1733 if (best->delete_xid >= child->delete_xid && 1734 (info->key_next > best->bref.key || info->key_next == 0)) 1735 info->key_next = best->bref.key; 1736 } else if (child->bref.key == best->bref.key) { 1737 /* 1738 * If our current best is flush with the child then choose 1739 * based on delete_xid. 1740 * 1741 * key_next will automatically be limited to the smaller of 1742 * the two end-points. 1743 */ 1744 if (child->delete_xid > best->delete_xid) 1745 info->best = child; 1746 } else { 1747 /* 1748 * Keep the current best but truncate key_next to the child's 1749 * base iff the child has a higher delete_xid. 1750 * 1751 * key_next will also automatically be limited to the smaller 1752 * of the two end-points (probably not necessary for this case 1753 * but we do it anyway). 1754 */ 1755 if (child->delete_xid >= best->delete_xid && 1756 (info->key_next > child->bref.key || info->key_next == 0)) 1757 info->key_next = child->bref.key; 1758 } 1759 1760 /* 1761 * Always truncate key_next based on child's end-of-range. 1762 */ 1763 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 1764 if (child_end && (info->key_next > child_end || info->key_next == 0)) 1765 info->key_next = child_end; 1766 1767 return(0); 1768 } 1769 1770 /* 1771 * Retrieve the specified chain from a media blockref, creating the 1772 * in-memory chain structure which reflects it. modify_xid will be 1773 * set to the min value which forces any modifications to issue a 1774 * delete-duplicate. 1775 * 1776 * To handle insertion races pass the INSERT_RACE flag along with the 1777 * generation number of the core. NULL will be returned if the generation 1778 * number changes before we have a chance to insert the chain. Insert 1779 * races can occur because the parent might be held shared. 1780 * 1781 * Caller must hold the parent locked shared or exclusive since we may 1782 * need the parent's bref array to find our block. 1783 * 1784 * WARNING! chain->pmp is left NULL if the bref represents a PFS mount 1785 * point. 1786 */ 1787 hammer2_chain_t * 1788 hammer2_chain_get(hammer2_chain_t *parent, int generation, 1789 hammer2_blockref_t *bref) 1790 { 1791 hammer2_mount_t *hmp = parent->hmp; 1792 hammer2_chain_core_t *above = parent->core; 1793 hammer2_chain_t *chain; 1794 int error; 1795 1796 /* 1797 * Allocate a chain structure representing the existing media 1798 * entry. Resulting chain has one ref and is not locked. 1799 */ 1800 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 1801 chain = hammer2_chain_alloc(hmp, NULL, NULL, bref); 1802 else 1803 chain = hammer2_chain_alloc(hmp, parent->pmp, NULL, bref); 1804 hammer2_chain_core_alloc(NULL, chain, NULL); 1805 /* ref'd chain returned */ 1806 1807 /* 1808 * Set modify_xid and update_xlo to the chain's synchronization 1809 * point from the media. 1810 */ 1811 chain->modify_xid = HAMMER2_XID_MIN; 1812 chain->update_xlo = HAMMER2_XID_MIN; 1813 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 1814 1815 /* 1816 * Link the chain into its parent. A spinlock is required to safely 1817 * access the RBTREE, and it is possible to collide with another 1818 * hammer2_chain_get() operation because the caller might only hold 1819 * a shared lock on the parent. 1820 */ 1821 KKASSERT(parent->refs > 0); 1822 error = hammer2_chain_insert(above, NULL, chain, 1823 HAMMER2_CHAIN_INSERT_SPIN | 1824 HAMMER2_CHAIN_INSERT_RACE, 1825 generation); 1826 if (error) { 1827 KKASSERT((chain->flags & (HAMMER2_CHAIN_ONRBTREE | 1828 HAMMER2_CHAIN_ONDBTREE | 1829 HAMMER2_CHAIN_ONDBQ)) == 0); 1830 kprintf("chain %p get race\n", chain); 1831 hammer2_chain_drop(chain); 1832 chain = NULL; 1833 } else { 1834 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 1835 } 1836 1837 /* 1838 * Return our new chain referenced but not locked, or NULL if 1839 * a race occurred. 1840 */ 1841 return (chain); 1842 } 1843 1844 /* 1845 * Lookup initialization/completion API 1846 */ 1847 hammer2_chain_t * 1848 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 1849 { 1850 if (flags & HAMMER2_LOOKUP_SHARED) { 1851 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 1852 HAMMER2_RESOLVE_SHARED); 1853 } else { 1854 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 1855 } 1856 return (parent); 1857 } 1858 1859 void 1860 hammer2_chain_lookup_done(hammer2_chain_t *parent) 1861 { 1862 if (parent) 1863 hammer2_chain_unlock(parent); 1864 } 1865 1866 static 1867 hammer2_chain_t * 1868 hammer2_chain_getparent(hammer2_chain_t **parentp, int how) 1869 { 1870 hammer2_chain_t *oparent; 1871 hammer2_chain_t *bparent; 1872 hammer2_chain_t *nparent; 1873 hammer2_chain_core_t *above; 1874 1875 oparent = *parentp; 1876 above = oparent->above; 1877 1878 spin_lock(&above->cst.spin); 1879 bparent = TAILQ_FIRST(&above->ownerq); 1880 hammer2_chain_ref(bparent); 1881 1882 /* 1883 * Be careful of order, oparent must be unlocked before nparent 1884 * is locked below to avoid a deadlock. We might as well delay its 1885 * unlocking until we conveniently no longer have the spinlock (instead 1886 * of cycling the spinlock). 1887 * 1888 * Theoretically our ref on bparent should prevent elements of the 1889 * following chain from going away and prevent above from going away, 1890 * but we still need the spinlock to safely scan the list. 1891 */ 1892 for (;;) { 1893 nparent = bparent; 1894 while (nparent->flags & HAMMER2_CHAIN_DUPLICATED) 1895 nparent = TAILQ_NEXT(nparent, core_entry); 1896 hammer2_chain_ref(nparent); 1897 spin_unlock(&above->cst.spin); 1898 1899 if (oparent) { 1900 hammer2_chain_unlock(oparent); 1901 oparent = NULL; 1902 } 1903 hammer2_chain_lock(nparent, how | HAMMER2_RESOLVE_NOREF); 1904 hammer2_chain_drop(bparent); 1905 1906 /* 1907 * We might have raced a delete-duplicate. 1908 */ 1909 if ((nparent->flags & HAMMER2_CHAIN_DUPLICATED) == 0) 1910 break; 1911 bparent = nparent; 1912 hammer2_chain_ref(bparent); 1913 hammer2_chain_unlock(nparent); 1914 spin_lock(&above->cst.spin); 1915 /* retry */ 1916 } 1917 *parentp = nparent; 1918 1919 return (nparent); 1920 } 1921 1922 /* 1923 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 1924 * (*parentp) typically points to an inode but can also point to a related 1925 * indirect block and this function will recurse upwards and find the inode 1926 * again. 1927 * 1928 * (*parentp) must be exclusively locked and referenced and can be an inode 1929 * or an existing indirect block within the inode. 1930 * 1931 * On return (*parentp) will be modified to point at the deepest parent chain 1932 * element encountered during the search, as a helper for an insertion or 1933 * deletion. The new (*parentp) will be locked and referenced and the old 1934 * will be unlocked and dereferenced (no change if they are both the same). 1935 * 1936 * The matching chain will be returned exclusively locked. If NOLOCK is 1937 * requested the chain will be returned only referenced. 1938 * 1939 * NULL is returned if no match was found, but (*parentp) will still 1940 * potentially be adjusted. 1941 * 1942 * On return (*key_nextp) will point to an iterative value for key_beg. 1943 * (If NULL is returned (*key_nextp) is set to key_end). 1944 * 1945 * This function will also recurse up the chain if the key is not within the 1946 * current parent's range. (*parentp) can never be set to NULL. An iteration 1947 * can simply allow (*parentp) to float inside the loop. 1948 * 1949 * NOTE! chain->data is not always resolved. By default it will not be 1950 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 1951 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 1952 * BREF_TYPE_DATA as the device buffer can alias the logical file 1953 * buffer). 1954 */ 1955 hammer2_chain_t * 1956 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 1957 hammer2_key_t key_beg, hammer2_key_t key_end, 1958 int *cache_indexp, int flags, int *ddflagp) 1959 { 1960 hammer2_mount_t *hmp; 1961 hammer2_chain_t *parent; 1962 hammer2_chain_t *chain; 1963 hammer2_blockref_t *base; 1964 hammer2_blockref_t *bref; 1965 hammer2_blockref_t bcopy; 1966 hammer2_key_t scan_beg; 1967 hammer2_key_t scan_end; 1968 hammer2_chain_core_t *above; 1969 int count = 0; 1970 int how_always = HAMMER2_RESOLVE_ALWAYS; 1971 int how_maybe = HAMMER2_RESOLVE_MAYBE; 1972 int how; 1973 int generation; 1974 int maxloops = 300000; 1975 int wasdup; 1976 1977 *ddflagp = 0; 1978 if (flags & HAMMER2_LOOKUP_ALWAYS) { 1979 how_maybe = how_always; 1980 how = HAMMER2_RESOLVE_ALWAYS; 1981 } else if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK)) { 1982 how = HAMMER2_RESOLVE_NEVER; 1983 } else { 1984 how = HAMMER2_RESOLVE_MAYBE; 1985 } 1986 if (flags & (HAMMER2_LOOKUP_SHARED | HAMMER2_LOOKUP_NOLOCK)) { 1987 how_maybe |= HAMMER2_RESOLVE_SHARED; 1988 how_always |= HAMMER2_RESOLVE_SHARED; 1989 how |= HAMMER2_RESOLVE_SHARED; 1990 } 1991 1992 /* 1993 * Recurse (*parentp) upward if necessary until the parent completely 1994 * encloses the key range or we hit the inode. 1995 * 1996 * This function handles races against the flusher doing a delete- 1997 * duplicate above us and re-homes the parent to the duplicate in 1998 * that case, otherwise we'd wind up recursing down a stale chain. 1999 */ 2000 parent = *parentp; 2001 hmp = parent->hmp; 2002 2003 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2004 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2005 scan_beg = parent->bref.key; 2006 scan_end = scan_beg + 2007 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2008 if (key_beg >= scan_beg && key_end <= scan_end) 2009 break; 2010 parent = hammer2_chain_getparent(parentp, how_maybe); 2011 } 2012 2013 again: 2014 if (--maxloops == 0) 2015 panic("hammer2_chain_lookup: maxloops"); 2016 /* 2017 * Locate the blockref array. Currently we do a fully associative 2018 * search through the array. 2019 */ 2020 switch(parent->bref.type) { 2021 case HAMMER2_BREF_TYPE_INODE: 2022 /* 2023 * Special shortcut for embedded data returns the inode 2024 * itself. Callers must detect this condition and access 2025 * the embedded data (the strategy code does this for us). 2026 * 2027 * This is only applicable to regular files and softlinks. 2028 */ 2029 if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) { 2030 if (flags & HAMMER2_LOOKUP_NOLOCK) 2031 hammer2_chain_ref(parent); 2032 else 2033 hammer2_chain_lock(parent, how_always); 2034 *key_nextp = key_end + 1; 2035 *ddflagp = 1; 2036 return (parent); 2037 } 2038 base = &parent->data->ipdata.u.blockset.blockref[0]; 2039 count = HAMMER2_SET_COUNT; 2040 break; 2041 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2042 case HAMMER2_BREF_TYPE_INDIRECT: 2043 /* 2044 * Handle MATCHIND on the parent 2045 */ 2046 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2047 scan_beg = parent->bref.key; 2048 scan_end = scan_beg + 2049 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2050 if (key_beg == scan_beg && key_end == scan_end) { 2051 chain = parent; 2052 hammer2_chain_lock(chain, how_maybe); 2053 *key_nextp = scan_end + 1; 2054 goto done; 2055 } 2056 } 2057 /* 2058 * Optimize indirect blocks in the INITIAL state to avoid 2059 * I/O. 2060 */ 2061 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2062 base = NULL; 2063 } else { 2064 if (parent->data == NULL) 2065 panic("parent->data is NULL"); 2066 base = &parent->data->npdata[0]; 2067 } 2068 count = parent->bytes / sizeof(hammer2_blockref_t); 2069 break; 2070 case HAMMER2_BREF_TYPE_VOLUME: 2071 base = &hmp->voldata.sroot_blockset.blockref[0]; 2072 count = HAMMER2_SET_COUNT; 2073 break; 2074 case HAMMER2_BREF_TYPE_FREEMAP: 2075 base = &hmp->voldata.freemap_blockset.blockref[0]; 2076 count = HAMMER2_SET_COUNT; 2077 break; 2078 default: 2079 panic("hammer2_chain_lookup: unrecognized blockref type: %d", 2080 parent->bref.type); 2081 base = NULL; /* safety */ 2082 count = 0; /* safety */ 2083 } 2084 2085 /* 2086 * Merged scan to find next candidate. 2087 * 2088 * hammer2_base_*() functions require the above->live_* fields 2089 * to be synchronized. 2090 * 2091 * We need to hold the spinlock to access the block array and RB tree 2092 * and to interlock chain creation. 2093 */ 2094 above = parent->core; 2095 if ((parent->core->flags & HAMMER2_CORE_COUNTEDBREFS) == 0) 2096 hammer2_chain_countbrefs(parent, base, count); 2097 2098 /* 2099 * Combined search 2100 */ 2101 spin_lock(&above->cst.spin); 2102 chain = hammer2_combined_find(parent, base, count, 2103 cache_indexp, key_nextp, 2104 key_beg, key_end, 2105 &bref); 2106 generation = above->generation; 2107 2108 /* 2109 * Exhausted parent chain, iterate. 2110 */ 2111 if (bref == NULL) { 2112 spin_unlock(&above->cst.spin); 2113 if (key_beg == key_end) /* short cut single-key case */ 2114 return (NULL); 2115 2116 /* 2117 * Stop if we reached the end of the iteration. 2118 */ 2119 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2120 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2121 return (NULL); 2122 } 2123 2124 /* 2125 * Calculate next key, stop if we reached the end of the 2126 * iteration, otherwise go up one level and loop. 2127 */ 2128 key_beg = parent->bref.key + 2129 ((hammer2_key_t)1 << parent->bref.keybits); 2130 if (key_beg == 0 || key_beg > key_end) 2131 return (NULL); 2132 parent = hammer2_chain_getparent(parentp, how_maybe); 2133 goto again; 2134 } 2135 2136 /* 2137 * Selected from blockref or in-memory chain. 2138 */ 2139 if (chain == NULL) { 2140 bcopy = *bref; 2141 spin_unlock(&above->cst.spin); 2142 chain = hammer2_chain_get(parent, generation, 2143 &bcopy); 2144 if (chain == NULL) { 2145 kprintf("retry lookup parent %p keys %016jx:%016jx\n", 2146 parent, key_beg, key_end); 2147 goto again; 2148 } 2149 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 2150 hammer2_chain_drop(chain); 2151 goto again; 2152 } 2153 wasdup = 0; 2154 } else { 2155 hammer2_chain_ref(chain); 2156 wasdup = ((chain->flags & HAMMER2_CHAIN_DUPLICATED) != 0); 2157 spin_unlock(&above->cst.spin); 2158 } 2159 2160 /* 2161 * chain is referenced but not locked. We must lock the chain 2162 * to obtain definitive DUPLICATED/DELETED state 2163 */ 2164 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2165 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2166 hammer2_chain_lock(chain, how_maybe | HAMMER2_RESOLVE_NOREF); 2167 } else { 2168 hammer2_chain_lock(chain, how | HAMMER2_RESOLVE_NOREF); 2169 } 2170 2171 /* 2172 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2173 * 2174 * NOTE: Chain's key range is not relevant as there might be 2175 * one-offs within the range that are not deleted. 2176 * 2177 * NOTE: Lookups can race delete-duplicate because 2178 * delete-duplicate does not lock the parent's core 2179 * (they just use the spinlock on the core). We must 2180 * check for races by comparing the DUPLICATED flag before 2181 * releasing the spinlock with the flag after locking the 2182 * chain. 2183 */ 2184 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2185 hammer2_chain_unlock(chain); 2186 if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0 || wasdup) { 2187 key_beg = *key_nextp; 2188 if (key_beg == 0 || key_beg > key_end) 2189 return(NULL); 2190 } 2191 goto again; 2192 } 2193 2194 /* 2195 * If the chain element is an indirect block it becomes the new 2196 * parent and we loop on it. We must maintain our top-down locks 2197 * to prevent the flusher from interfering (i.e. doing a 2198 * delete-duplicate and leaving us recursing down a deleted chain). 2199 * 2200 * The parent always has to be locked with at least RESOLVE_MAYBE 2201 * so we can access its data. It might need a fixup if the caller 2202 * passed incompatible flags. Be careful not to cause a deadlock 2203 * as a data-load requires an exclusive lock. 2204 * 2205 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2206 * range is within the requested key range we return the indirect 2207 * block and do NOT loop. This is usually only used to acquire 2208 * freemap nodes. 2209 */ 2210 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2211 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2212 hammer2_chain_unlock(parent); 2213 *parentp = parent = chain; 2214 goto again; 2215 } 2216 done: 2217 /* 2218 * All done, return the chain 2219 */ 2220 return (chain); 2221 } 2222 2223 /* 2224 * After having issued a lookup we can iterate all matching keys. 2225 * 2226 * If chain is non-NULL we continue the iteration from just after it's index. 2227 * 2228 * If chain is NULL we assume the parent was exhausted and continue the 2229 * iteration at the next parent. 2230 * 2231 * parent must be locked on entry and remains locked throughout. chain's 2232 * lock status must match flags. Chain is always at least referenced. 2233 * 2234 * WARNING! The MATCHIND flag does not apply to this function. 2235 */ 2236 hammer2_chain_t * 2237 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2238 hammer2_key_t *key_nextp, 2239 hammer2_key_t key_beg, hammer2_key_t key_end, 2240 int *cache_indexp, int flags) 2241 { 2242 hammer2_chain_t *parent; 2243 int how_maybe; 2244 int ddflag; 2245 2246 /* 2247 * Calculate locking flags for upward recursion. 2248 */ 2249 how_maybe = HAMMER2_RESOLVE_MAYBE; 2250 if (flags & (HAMMER2_LOOKUP_SHARED | HAMMER2_LOOKUP_NOLOCK)) 2251 how_maybe |= HAMMER2_RESOLVE_SHARED; 2252 2253 parent = *parentp; 2254 2255 /* 2256 * Calculate the next index and recalculate the parent if necessary. 2257 */ 2258 if (chain) { 2259 key_beg = chain->bref.key + 2260 ((hammer2_key_t)1 << chain->bref.keybits); 2261 if (flags & HAMMER2_LOOKUP_NOLOCK) 2262 hammer2_chain_drop(chain); 2263 else 2264 hammer2_chain_unlock(chain); 2265 2266 /* 2267 * Any scan where the lookup returned degenerate data embedded 2268 * in the inode has an invalid index and must terminate. 2269 */ 2270 if (chain == parent) 2271 return(NULL); 2272 if (key_beg == 0 || key_beg > key_end) 2273 return(NULL); 2274 chain = NULL; 2275 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2276 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2277 /* 2278 * We reached the end of the iteration. 2279 */ 2280 return (NULL); 2281 } else { 2282 /* 2283 * Continue iteration with next parent unless the current 2284 * parent covers the range. 2285 */ 2286 key_beg = parent->bref.key + 2287 ((hammer2_key_t)1 << parent->bref.keybits); 2288 if (key_beg == 0 || key_beg > key_end) 2289 return (NULL); 2290 parent = hammer2_chain_getparent(parentp, how_maybe); 2291 } 2292 2293 /* 2294 * And execute 2295 */ 2296 return (hammer2_chain_lookup(parentp, key_nextp, 2297 key_beg, key_end, 2298 cache_indexp, flags, &ddflag)); 2299 } 2300 2301 /* 2302 * The raw scan function is similar to lookup/next but does not seek to a key. 2303 * Blockrefs are iterated via first_chain = (parent, NULL) and 2304 * next_chain = (parent, chain). 2305 * 2306 * The passed-in parent must be locked and its data resolved. The returned 2307 * chain will be locked. Pass chain == NULL to acquire the first sub-chain 2308 * under parent and then iterate with the passed-in chain (which this 2309 * function will unlock). 2310 */ 2311 hammer2_chain_t * 2312 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t *chain, 2313 int *cache_indexp, int flags) 2314 { 2315 hammer2_mount_t *hmp; 2316 hammer2_blockref_t *base; 2317 hammer2_blockref_t *bref; 2318 hammer2_blockref_t bcopy; 2319 hammer2_chain_core_t *above; 2320 hammer2_key_t key; 2321 hammer2_key_t next_key; 2322 int count = 0; 2323 int how_always = HAMMER2_RESOLVE_ALWAYS; 2324 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2325 int how; 2326 int generation; 2327 int maxloops = 300000; 2328 int wasdup; 2329 2330 hmp = parent->hmp; 2331 2332 /* 2333 * Scan flags borrowed from lookup 2334 */ 2335 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2336 how_maybe = how_always; 2337 how = HAMMER2_RESOLVE_ALWAYS; 2338 } else if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK)) { 2339 how = HAMMER2_RESOLVE_NEVER; 2340 } else { 2341 how = HAMMER2_RESOLVE_MAYBE; 2342 } 2343 if (flags & (HAMMER2_LOOKUP_SHARED | HAMMER2_LOOKUP_NOLOCK)) { 2344 how_maybe |= HAMMER2_RESOLVE_SHARED; 2345 how_always |= HAMMER2_RESOLVE_SHARED; 2346 how |= HAMMER2_RESOLVE_SHARED; 2347 } 2348 2349 /* 2350 * Calculate key to locate first/next element, unlocking the previous 2351 * element as we go. Be careful, the key calculation can overflow. 2352 */ 2353 if (chain) { 2354 key = chain->bref.key + 2355 ((hammer2_key_t)1 << chain->bref.keybits); 2356 hammer2_chain_unlock(chain); 2357 chain = NULL; 2358 if (key == 0) 2359 goto done; 2360 } else { 2361 key = 0; 2362 } 2363 2364 again: 2365 if (--maxloops == 0) 2366 panic("hammer2_chain_scan: maxloops"); 2367 /* 2368 * Locate the blockref array. Currently we do a fully associative 2369 * search through the array. 2370 */ 2371 switch(parent->bref.type) { 2372 case HAMMER2_BREF_TYPE_INODE: 2373 /* 2374 * An inode with embedded data has no sub-chains. 2375 */ 2376 if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) 2377 goto done; 2378 base = &parent->data->ipdata.u.blockset.blockref[0]; 2379 count = HAMMER2_SET_COUNT; 2380 break; 2381 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2382 case HAMMER2_BREF_TYPE_INDIRECT: 2383 /* 2384 * Optimize indirect blocks in the INITIAL state to avoid 2385 * I/O. 2386 */ 2387 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2388 base = NULL; 2389 } else { 2390 if (parent->data == NULL) 2391 panic("parent->data is NULL"); 2392 base = &parent->data->npdata[0]; 2393 } 2394 count = parent->bytes / sizeof(hammer2_blockref_t); 2395 break; 2396 case HAMMER2_BREF_TYPE_VOLUME: 2397 base = &hmp->voldata.sroot_blockset.blockref[0]; 2398 count = HAMMER2_SET_COUNT; 2399 break; 2400 case HAMMER2_BREF_TYPE_FREEMAP: 2401 base = &hmp->voldata.freemap_blockset.blockref[0]; 2402 count = HAMMER2_SET_COUNT; 2403 break; 2404 default: 2405 panic("hammer2_chain_lookup: unrecognized blockref type: %d", 2406 parent->bref.type); 2407 base = NULL; /* safety */ 2408 count = 0; /* safety */ 2409 } 2410 2411 /* 2412 * Merged scan to find next candidate. 2413 * 2414 * hammer2_base_*() functions require the above->live_* fields 2415 * to be synchronized. 2416 * 2417 * We need to hold the spinlock to access the block array and RB tree 2418 * and to interlock chain creation. 2419 */ 2420 if ((parent->core->flags & HAMMER2_CORE_COUNTEDBREFS) == 0) 2421 hammer2_chain_countbrefs(parent, base, count); 2422 2423 above = parent->core; 2424 next_key = 0; 2425 spin_lock(&above->cst.spin); 2426 chain = hammer2_combined_find(parent, base, count, 2427 cache_indexp, &next_key, 2428 key, HAMMER2_KEY_MAX, 2429 &bref); 2430 generation = above->generation; 2431 2432 /* 2433 * Exhausted parent chain, we're done. 2434 */ 2435 if (bref == NULL) { 2436 spin_unlock(&above->cst.spin); 2437 KKASSERT(chain == NULL); 2438 goto done; 2439 } 2440 2441 /* 2442 * Selected from blockref or in-memory chain. 2443 */ 2444 if (chain == NULL) { 2445 bcopy = *bref; 2446 spin_unlock(&above->cst.spin); 2447 chain = hammer2_chain_get(parent, generation, &bcopy); 2448 if (chain == NULL) { 2449 kprintf("retry scan parent %p keys %016jx\n", 2450 parent, key); 2451 goto again; 2452 } 2453 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 2454 hammer2_chain_drop(chain); 2455 chain = NULL; 2456 goto again; 2457 } 2458 wasdup = 0; 2459 } else { 2460 hammer2_chain_ref(chain); 2461 wasdup = ((chain->flags & HAMMER2_CHAIN_DUPLICATED) != 0); 2462 spin_unlock(&above->cst.spin); 2463 } 2464 2465 /* 2466 * chain is referenced but not locked. We must lock the chain 2467 * to obtain definitive DUPLICATED/DELETED state 2468 */ 2469 hammer2_chain_lock(chain, how | HAMMER2_RESOLVE_NOREF); 2470 2471 /* 2472 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2473 * 2474 * NOTE: chain's key range is not relevant as there might be 2475 * one-offs within the range that are not deleted. 2476 * 2477 * NOTE: XXX this could create problems with scans used in 2478 * situations other than mount-time recovery. 2479 * 2480 * NOTE: Lookups can race delete-duplicate because 2481 * delete-duplicate does not lock the parent's core 2482 * (they just use the spinlock on the core). We must 2483 * check for races by comparing the DUPLICATED flag before 2484 * releasing the spinlock with the flag after locking the 2485 * chain. 2486 */ 2487 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2488 hammer2_chain_unlock(chain); 2489 chain = NULL; 2490 2491 if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0 || wasdup) { 2492 key = next_key; 2493 if (key == 0) 2494 goto done; 2495 } 2496 goto again; 2497 } 2498 2499 done: 2500 /* 2501 * All done, return the chain or NULL 2502 */ 2503 return (chain); 2504 } 2505 2506 /* 2507 * Create and return a new hammer2 system memory structure of the specified 2508 * key, type and size and insert it under (*parentp). This is a full 2509 * insertion, based on the supplied key/keybits, and may involve creating 2510 * indirect blocks and moving other chains around via delete/duplicate. 2511 * 2512 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 2513 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 2514 * FULL. This typically means that the caller is creating the chain after 2515 * doing a hammer2_chain_lookup(). 2516 * 2517 * (*parentp) must be exclusive locked and may be replaced on return 2518 * depending on how much work the function had to do. 2519 * 2520 * (*chainp) usually starts out NULL and returns the newly created chain, 2521 * but if the caller desires the caller may allocate a disconnected chain 2522 * and pass it in instead. (It is also possible for the caller to use 2523 * chain_duplicate() to create a disconnected chain, manipulate it, then 2524 * pass it into this function to insert it). 2525 * 2526 * This function should NOT be used to insert INDIRECT blocks. It is 2527 * typically used to create/insert inodes and data blocks. 2528 * 2529 * Caller must pass-in an exclusively locked parent the new chain is to 2530 * be inserted under, and optionally pass-in a disconnected, exclusively 2531 * locked chain to insert (else we create a new chain). The function will 2532 * adjust (*parentp) as necessary, create or connect the chain, and 2533 * return an exclusively locked chain in *chainp. 2534 */ 2535 int 2536 hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t **parentp, 2537 hammer2_chain_t **chainp, hammer2_pfsmount_t *pmp, 2538 hammer2_key_t key, int keybits, int type, size_t bytes) 2539 { 2540 hammer2_mount_t *hmp; 2541 hammer2_chain_t *chain; 2542 hammer2_chain_t *parent = *parentp; 2543 hammer2_chain_core_t *above; 2544 hammer2_blockref_t *base; 2545 hammer2_blockref_t dummy; 2546 int allocated = 0; 2547 int error = 0; 2548 int count; 2549 int maxloops = 300000; 2550 2551 /* 2552 * Topology may be crossing a PFS boundary. 2553 */ 2554 above = parent->core; 2555 KKASSERT(ccms_thread_lock_owned(&above->cst)); 2556 hmp = parent->hmp; 2557 chain = *chainp; 2558 2559 if (chain == NULL) { 2560 /* 2561 * First allocate media space and construct the dummy bref, 2562 * then allocate the in-memory chain structure. Set the 2563 * INITIAL flag for fresh chains which do not have embedded 2564 * data. 2565 */ 2566 bzero(&dummy, sizeof(dummy)); 2567 dummy.type = type; 2568 dummy.key = key; 2569 dummy.keybits = keybits; 2570 dummy.data_off = hammer2_getradix(bytes); 2571 dummy.methods = parent->bref.methods; 2572 chain = hammer2_chain_alloc(hmp, pmp, trans, &dummy); 2573 hammer2_chain_core_alloc(trans, chain, NULL); 2574 2575 /* 2576 * Lock the chain manually, chain_lock will load the chain 2577 * which we do NOT want to do. (note: chain->refs is set 2578 * to 1 by chain_alloc() for us, but lockcnt is not). 2579 */ 2580 chain->lockcnt = 1; 2581 ccms_thread_lock(&chain->core->cst, CCMS_STATE_EXCLUSIVE); 2582 allocated = 1; 2583 2584 /* 2585 * We do NOT set INITIAL here (yet). INITIAL is only 2586 * used for indirect blocks. 2587 * 2588 * Recalculate bytes to reflect the actual media block 2589 * allocation. 2590 */ 2591 bytes = (hammer2_off_t)1 << 2592 (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX); 2593 chain->bytes = bytes; 2594 2595 switch(type) { 2596 case HAMMER2_BREF_TYPE_VOLUME: 2597 case HAMMER2_BREF_TYPE_FREEMAP: 2598 panic("hammer2_chain_create: called with volume type"); 2599 break; 2600 case HAMMER2_BREF_TYPE_INDIRECT: 2601 panic("hammer2_chain_create: cannot be used to" 2602 "create indirect block"); 2603 break; 2604 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2605 panic("hammer2_chain_create: cannot be used to" 2606 "create freemap root or node"); 2607 break; 2608 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2609 KKASSERT(bytes == sizeof(chain->data->bmdata)); 2610 /* fall through */ 2611 case HAMMER2_BREF_TYPE_INODE: 2612 case HAMMER2_BREF_TYPE_DATA: 2613 default: 2614 /* 2615 * leave chain->data NULL, set INITIAL 2616 */ 2617 KKASSERT(chain->data == NULL); 2618 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 2619 break; 2620 } 2621 } else { 2622 /* 2623 * We are reattaching a chain that has been duplicated and 2624 * left disconnected under a DIFFERENT parent with potentially 2625 * different key/keybits. 2626 * 2627 * The chain must be modified in the current transaction 2628 * (the duplication code should have done that for us), 2629 * and it's modify_xid should be greater than the parent's 2630 * bref.mirror_tid. This should cause it to be created under 2631 * the new parent. 2632 * 2633 * If deleted in the same transaction, the create/delete TIDs 2634 * will be the same and effective the chain will not have 2635 * existed at all from the point of view of the parent. 2636 * 2637 * Do NOT mess with the current state of the INITIAL flag. 2638 */ 2639 KKASSERT(chain->modify_xid == trans->sync_xid); 2640 chain->bref.key = key; 2641 chain->bref.keybits = keybits; 2642 KKASSERT(chain->above == NULL); 2643 } 2644 2645 /* 2646 * Calculate how many entries we have in the blockref array and 2647 * determine if an indirect block is required. 2648 */ 2649 again: 2650 if (--maxloops == 0) 2651 panic("hammer2_chain_create: maxloops"); 2652 above = parent->core; 2653 2654 switch(parent->bref.type) { 2655 case HAMMER2_BREF_TYPE_INODE: 2656 KKASSERT((parent->data->ipdata.op_flags & 2657 HAMMER2_OPFLAG_DIRECTDATA) == 0); 2658 KKASSERT(parent->data != NULL); 2659 base = &parent->data->ipdata.u.blockset.blockref[0]; 2660 count = HAMMER2_SET_COUNT; 2661 break; 2662 case HAMMER2_BREF_TYPE_INDIRECT: 2663 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2664 if (parent->flags & HAMMER2_CHAIN_INITIAL) 2665 base = NULL; 2666 else 2667 base = &parent->data->npdata[0]; 2668 count = parent->bytes / sizeof(hammer2_blockref_t); 2669 break; 2670 case HAMMER2_BREF_TYPE_VOLUME: 2671 KKASSERT(parent->data != NULL); 2672 base = &hmp->voldata.sroot_blockset.blockref[0]; 2673 count = HAMMER2_SET_COUNT; 2674 break; 2675 case HAMMER2_BREF_TYPE_FREEMAP: 2676 KKASSERT(parent->data != NULL); 2677 base = &hmp->voldata.freemap_blockset.blockref[0]; 2678 count = HAMMER2_SET_COUNT; 2679 break; 2680 default: 2681 panic("hammer2_chain_create: unrecognized blockref type: %d", 2682 parent->bref.type); 2683 base = NULL; 2684 count = 0; 2685 break; 2686 } 2687 2688 /* 2689 * Make sure we've counted the brefs 2690 */ 2691 if ((parent->core->flags & HAMMER2_CORE_COUNTEDBREFS) == 0) 2692 hammer2_chain_countbrefs(parent, base, count); 2693 2694 KKASSERT(above->live_count >= 0 && above->live_count <= count); 2695 2696 /* 2697 * If no free blockref could be found we must create an indirect 2698 * block and move a number of blockrefs into it. With the parent 2699 * locked we can safely lock each child in order to delete+duplicate 2700 * it without causing a deadlock. 2701 * 2702 * This may return the new indirect block or the old parent depending 2703 * on where the key falls. NULL is returned on error. 2704 */ 2705 if (above->live_count == count) { 2706 hammer2_chain_t *nparent; 2707 2708 nparent = hammer2_chain_create_indirect(trans, parent, 2709 key, keybits, 2710 type, &error); 2711 if (nparent == NULL) { 2712 if (allocated) 2713 hammer2_chain_drop(chain); 2714 chain = NULL; 2715 goto done; 2716 } 2717 if (parent != nparent) { 2718 hammer2_chain_unlock(parent); 2719 parent = *parentp = nparent; 2720 } 2721 goto again; 2722 } 2723 2724 /* 2725 * Link the chain into its parent. 2726 */ 2727 if (chain->above != NULL) 2728 panic("hammer2: hammer2_chain_create: chain already connected"); 2729 KKASSERT(chain->above == NULL); 2730 hammer2_chain_insert(above, NULL, chain, 2731 HAMMER2_CHAIN_INSERT_SPIN | 2732 HAMMER2_CHAIN_INSERT_LIVE, 2733 0); 2734 2735 if (allocated) { 2736 /* 2737 * Mark the newly created chain modified. This will cause 2738 * FLUSH_CREATE to be set. 2739 * 2740 * Device buffers are not instantiated for DATA elements 2741 * as these are handled by logical buffers. 2742 * 2743 * Indirect and freemap node indirect blocks are handled 2744 * by hammer2_chain_create_indirect() and not by this 2745 * function. 2746 * 2747 * Data for all other bref types is expected to be 2748 * instantiated (INODE, LEAF). 2749 */ 2750 switch(chain->bref.type) { 2751 case HAMMER2_BREF_TYPE_DATA: 2752 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2753 case HAMMER2_BREF_TYPE_INODE: 2754 hammer2_chain_modify(trans, &chain, 2755 HAMMER2_MODIFY_OPTDATA | 2756 HAMMER2_MODIFY_ASSERTNOCOPY); 2757 break; 2758 default: 2759 /* 2760 * Remaining types are not supported by this function. 2761 * In particular, INDIRECT and LEAF_NODE types are 2762 * handled by create_indirect(). 2763 */ 2764 panic("hammer2_chain_create: bad type: %d", 2765 chain->bref.type); 2766 /* NOT REACHED */ 2767 break; 2768 } 2769 } else { 2770 /* 2771 * When reconnecting a chain we must set FLUSH_CREATE and 2772 * setsubmod so the flush recognizes that it must update 2773 * the bref in the parent. 2774 */ 2775 if ((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0) { 2776 hammer2_chain_ref(chain); 2777 atomic_set_int(&chain->flags, 2778 HAMMER2_CHAIN_FLUSH_CREATE); 2779 } 2780 } 2781 hammer2_chain_setsubmod(trans, chain); 2782 2783 done: 2784 *chainp = chain; 2785 2786 return (error); 2787 } 2788 2789 /* 2790 * Replace (*chainp) with a duplicate in-memory chain structure which shares 2791 * the same core and media state as the orignal. The original *chainp is 2792 * unlocked and the replacement will be returned locked. The duplicated 2793 * chain is inserted under (*parentp). 2794 * 2795 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 2796 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 2797 * FULL. This typically means that the caller is creating the chain after 2798 * doing a hammer2_chain_lookup(). 2799 * 2800 * A non-NULL bref is typically passed when key and keybits must be overridden. 2801 * Note that hammer2_cluster_duplicate() *ONLY* uses the key and keybits fields 2802 * from a passed-in bref and uses the old chain's bref for everything else. 2803 * 2804 * The old chain must be in a DELETED state unless snapshot is non-zero. 2805 * 2806 * The new chain will be live (i.e. not deleted), and modified. 2807 * 2808 * If (parent) is non-NULL then the new duplicated chain is inserted under 2809 * the parent. 2810 * 2811 * If (parent) is NULL then the newly duplicated chain is not inserted 2812 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 2813 * passing into hammer2_chain_create() after this function returns). 2814 * 2815 * WARNING! This function cannot take snapshots all by itself. The caller 2816 * needs to do other massaging for snapshots. 2817 * 2818 * WARNING! This function calls create which means it can insert indirect 2819 * blocks. Callers may have to refactor locked chains held across 2820 * the call (other than the ones passed into the call). 2821 */ 2822 void 2823 hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t **parentp, 2824 hammer2_chain_t **chainp, hammer2_blockref_t *bref, 2825 int snapshot, int duplicate_reason) 2826 { 2827 hammer2_mount_t *hmp; 2828 hammer2_chain_t *parent; 2829 hammer2_chain_t *ochain; 2830 hammer2_chain_t *nchain; 2831 hammer2_chain_core_t *above; 2832 size_t bytes; 2833 2834 /* 2835 * We want nchain to be our go-to live chain, but ochain may be in 2836 * a MODIFIED state within the current flush synchronization segment. 2837 * Force any further modifications of ochain to do another COW 2838 * operation even if modify_xid indicates that one is not needed. 2839 * 2840 * We don't want to set FORCECOW on nchain simply as an optimization, 2841 * as many duplication calls simply move chains into ichains and 2842 * then delete the original. 2843 * 2844 * WARNING! We should never resolve DATA to device buffers 2845 * (XXX allow it if the caller did?), and since 2846 * we currently do not have the logical buffer cache 2847 * buffer in-hand to fix its cached physical offset 2848 * we also force the modify code to not COW it. XXX 2849 */ 2850 ochain = *chainp; 2851 hmp = ochain->hmp; 2852 KKASSERT(snapshot == 1 || (ochain->flags & HAMMER2_CHAIN_DELETED)); 2853 2854 /* 2855 * Now create a duplicate of the chain structure, associating 2856 * it with the same core, making it the same size, pointing it 2857 * to the same bref (the same media block). 2858 * 2859 * Give nchain the same modify_xid that we previously ensured was 2860 * sufficiently advanced to trigger a block table insertion on flush. 2861 * 2862 * nchain copies ochain's data and must inherit ochain->update_xlo. 2863 * 2864 * NOTE: bref.mirror_tid duplicated by virtue of bref copy in 2865 * hammer2_chain_alloc() 2866 */ 2867 if (bref == NULL) 2868 bref = &ochain->bref; 2869 if (snapshot) { 2870 nchain = hammer2_chain_alloc(hmp, NULL, trans, bref); 2871 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_SNAPSHOT); 2872 } else { 2873 nchain = hammer2_chain_alloc(hmp, ochain->pmp, trans, bref); 2874 } 2875 hammer2_chain_core_alloc(trans, nchain, ochain); 2876 bytes = (hammer2_off_t)1 << 2877 (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 2878 nchain->bytes = bytes; 2879 nchain->modify_xid = ochain->modify_xid; 2880 nchain->update_xlo = ochain->update_xlo; 2881 nchain->inode_reason = ochain->inode_reason + 0x100000; 2882 atomic_set_int(&nchain->flags, 2883 ochain->flags & (HAMMER2_CHAIN_INITIAL | 2884 HAMMER2_CHAIN_FORCECOW | 2885 HAMMER2_CHAIN_UNLINKED | 2886 HAMMER2_CHAIN_PFSROOT | 2887 HAMMER2_CHAIN_PFSBOUNDARY)); 2888 if (ochain->modify_xid == trans->sync_xid) 2889 atomic_set_int(&ochain->flags, HAMMER2_CHAIN_FORCECOW); 2890 2891 /* 2892 * Switch from ochain to nchain 2893 */ 2894 hammer2_chain_lock(nchain, HAMMER2_RESOLVE_NEVER | 2895 HAMMER2_RESOLVE_NOREF); 2896 /* nchain has 1 ref */ 2897 hammer2_chain_unlock(ochain); 2898 2899 /* 2900 * Place nchain in the modified state, instantiate media data 2901 * if necessary. Because modify_xid is already completely 2902 * synchronized this should not result in a delete-duplicate. 2903 * 2904 * We want nchain at the target to look like a new insertion. 2905 * Forcing the modification to be INPLACE accomplishes this 2906 * because we get the same nchain with an updated modify_xid. 2907 */ 2908 if (nchain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2909 hammer2_chain_modify(trans, &nchain, 2910 HAMMER2_MODIFY_OPTDATA | 2911 HAMMER2_MODIFY_NOREALLOC | 2912 HAMMER2_MODIFY_INPLACE); 2913 } else if (nchain->flags & HAMMER2_CHAIN_INITIAL) { 2914 hammer2_chain_modify(trans, &nchain, 2915 HAMMER2_MODIFY_OPTDATA | 2916 HAMMER2_MODIFY_INPLACE); 2917 } else { 2918 hammer2_chain_modify(trans, &nchain, 2919 HAMMER2_MODIFY_INPLACE); 2920 } 2921 2922 /* 2923 * If parent is not NULL the duplicated chain will be entered under 2924 * the parent and the FLUSH_CREATE bit set to tell flush to update 2925 * the blockref. 2926 * 2927 * Having both chains locked is extremely important for atomicy. 2928 */ 2929 if (parentp && (parent = *parentp) != NULL) { 2930 above = parent->core; 2931 KKASSERT(ccms_thread_lock_owned(&above->cst)); 2932 KKASSERT((nchain->flags & HAMMER2_CHAIN_DELETED) == 0); 2933 KKASSERT(parent->refs > 0); 2934 2935 hammer2_chain_create(trans, parentp, &nchain, nchain->pmp, 2936 nchain->bref.key, nchain->bref.keybits, 2937 nchain->bref.type, nchain->bytes); 2938 parent = NULL; 2939 2940 KKASSERT(nchain->flags & HAMMER2_CHAIN_FLUSH_CREATE); 2941 hammer2_chain_setsubmod(trans, nchain); 2942 } 2943 2944 *chainp = nchain; 2945 } 2946 2947 /* 2948 * Helper function for deleting chains. 2949 * 2950 * The chain is removed from the live view (the RBTREE). 2951 * 2952 * If appropriate, the chain is added to the shadow topology and FLUSH_DELETE 2953 * is set for flusher visbility. The caller is responsible for calling 2954 * setsubmod on chain, so we do not adjust update_xhi here. 2955 */ 2956 static void 2957 _hammer2_chain_delete_helper(hammer2_trans_t *trans, 2958 hammer2_chain_core_t *above, 2959 hammer2_chain_t *chain) 2960 { 2961 hammer2_mount_t *hmp; 2962 hammer2_chain_t *xchain; 2963 2964 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2965 KKASSERT(trans->sync_xid >= chain->modify_xid); 2966 KKASSERT((chain->flags & (HAMMER2_CHAIN_DELETED | 2967 HAMMER2_CHAIN_ONDBQ | 2968 HAMMER2_CHAIN_ONDBTREE | 2969 HAMMER2_CHAIN_FLUSH_DELETE)) == 0); 2970 2971 /* 2972 * Flag as deleted, reduce live_count and bump the above core's 2973 * generation. 2974 */ 2975 chain->delete_xid = trans->sync_xid; 2976 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2977 atomic_add_int(&above->live_count, -1); 2978 ++above->generation; 2979 hmp = chain->hmp; 2980 2981 /* 2982 * Remove from live tree 2983 */ 2984 RB_REMOVE(hammer2_chain_tree, &above->rbtree, chain); 2985 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 2986 2987 if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 2988 /* 2989 * If the chain was originally bmapped we must place on the 2990 * deleted tree and set FLUSH_DELETE (+ref) to prevent 2991 * destruction of the chain until the flush can reconcile 2992 * the parent's block table. 2993 * 2994 * NOTE! DBTREE is only representitive of the live view, 2995 * the flush must check both DBTREE and DBQ. 2996 */ 2997 xchain = RB_INSERT(hammer2_chain_tree, &above->dbtree, chain); 2998 KKASSERT(xchain == NULL); 2999 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONDBTREE); 3000 3001 atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_DELETE); 3002 hammer2_chain_ref(chain); 3003 } else { 3004 /* 3005 * If the chain no longer (and never had) an actual blockmap 3006 * entry we must place it on the dbq list and set FLUSH_DELETE 3007 * (+ref) to prevent destruction of the chain until the flush 3008 * can reconcile the parent's block table. 3009 * 3010 * NOTE! DBTREE is only representitive of the live view, 3011 * the flush must check both DBTREE and DBQ. 3012 */ 3013 TAILQ_INSERT_TAIL(&above->dbq, chain, db_entry); 3014 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONDBQ); 3015 3016 atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_DELETE); 3017 hammer2_chain_ref(chain); 3018 } 3019 } 3020 3021 /* 3022 * Special in-place delete-duplicate sequence which does not require a 3023 * locked parent. (*chainp) is marked DELETED and atomically replaced 3024 * with a duplicate. Atomicy is at the very-fine spin-lock level in 3025 * order to ensure that lookups do not race us. 3026 * 3027 * The flush code will sometimes call this function with a deleted chain. 3028 * In this situation the old chain's memory is reallocated without 3029 * duplicating it. 3030 * 3031 * The new chain will be marked modified for the current transaction. 3032 */ 3033 void 3034 hammer2_chain_delete_duplicate(hammer2_trans_t *trans, hammer2_chain_t **chainp, 3035 int flags) 3036 { 3037 hammer2_mount_t *hmp; 3038 hammer2_chain_t *ochain; 3039 hammer2_chain_t *nchain; 3040 hammer2_chain_core_t *above; 3041 size_t bytes; 3042 uint32_t oflags; 3043 3044 if (hammer2_debug & 0x20000) 3045 Debugger("dd"); 3046 3047 /* 3048 * Note that we do not have to call setsubmod on ochain, calling it 3049 * on nchain is sufficient. 3050 */ 3051 ochain = *chainp; 3052 oflags = ochain->flags; /* flags prior to core_alloc mods */ 3053 hmp = ochain->hmp; 3054 3055 if (ochain->bref.type == HAMMER2_BREF_TYPE_INODE) { 3056 KKASSERT(ochain->data); 3057 } 3058 3059 /* 3060 * First create a duplicate of the chain structure. 3061 * (nchain is allocated with one ref). 3062 * 3063 * In the case where nchain inherits ochains core, nchain is 3064 * effectively locked due to ochain being locked (and sharing the 3065 * core), until we can give nchain its own official ock. 3066 * 3067 * WARNING! Flusher concurrency can create two cases. The first is 3068 * that the flusher might be working on a chain that has 3069 * been deleted in the live view but is live in the flusher's 3070 * view. In the second case the flusher may be duplicating 3071 * a forward-transacted chain. In both situations nchain 3072 * must be marked deleted. 3073 * 3074 * WARNING! hammer2_chain_core_alloc() also acts on these issues. 3075 */ 3076 nchain = hammer2_chain_alloc(hmp, ochain->pmp, trans, &ochain->bref); 3077 if ((ochain->flags & HAMMER2_CHAIN_DELETED) || 3078 (ochain->modify_xid > trans->sync_xid)) { 3079 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_DELETED); 3080 } 3081 if (flags & HAMMER2_DELDUP_RECORE) 3082 hammer2_chain_core_alloc(trans, nchain, NULL); 3083 else 3084 hammer2_chain_core_alloc(trans, nchain, ochain); 3085 above = ochain->above; 3086 3087 bytes = (hammer2_off_t)1 << 3088 (int)(ochain->bref.data_off & HAMMER2_OFF_MASK_RADIX); 3089 nchain->bytes = bytes; 3090 3091 /* 3092 * nchain inherits ochain's live state including its modification 3093 * state. This function disposes of the original. Because we are 3094 * doing this in-place under the same parent the block array 3095 * inserted/deleted state does not change. 3096 * 3097 * nchain copies ochain's data and must inherit ochain->update_xlo. 3098 * 3099 * If ochain was previously marked FORCECOW we also flag nchain 3100 * FORCECOW (used during hardlink splits). FORCECOW forces a 3101 * reallocation of the block when we modify the chain a little later, 3102 * it does not force another delete-duplicate. 3103 * 3104 * NOTE: bref.mirror_tid duplicated by virtue of bref copy in 3105 * hammer2_chain_alloc() 3106 */ 3107 nchain->data_count += ochain->data_count; 3108 nchain->inode_count += ochain->inode_count; 3109 atomic_set_int(&nchain->flags, 3110 ochain->flags & (HAMMER2_CHAIN_INITIAL | 3111 HAMMER2_CHAIN_FORCECOW | 3112 HAMMER2_CHAIN_UNLINKED | 3113 HAMMER2_CHAIN_PFSROOT | 3114 HAMMER2_CHAIN_PFSBOUNDARY)); 3115 if (ochain->modify_xid == trans->sync_xid) 3116 atomic_set_int(&ochain->flags, HAMMER2_CHAIN_FORCECOW); 3117 nchain->inode_reason = ochain->inode_reason + 0x1000; 3118 nchain->update_xlo = ochain->update_xlo; 3119 3120 /* 3121 * Lock nchain so both chains are now locked (extremely important 3122 * for atomicy). The shared core allows us to unlock ochain without 3123 * actually unlocking ochain. 3124 */ 3125 hammer2_chain_lock(nchain, HAMMER2_RESOLVE_NEVER); 3126 /* extra ref still present from original allocation */ 3127 3128 KKASSERT(ochain->flags & (HAMMER2_CHAIN_ONRBTREE | 3129 HAMMER2_CHAIN_ONDBTREE | 3130 HAMMER2_CHAIN_ONDBQ)); 3131 spin_lock(&above->cst.spin); 3132 3133 nchain->modify_xid = ochain->modify_xid; 3134 nchain->delete_xid = HAMMER2_XID_MAX; 3135 3136 if ((nchain->flags & HAMMER2_CHAIN_DELETED) && 3137 (oflags & HAMMER2_CHAIN_DUPLICATED)) { 3138 /* 3139 * Special case, used by the flush code when a chain which 3140 * has been delete-duplicated is visible (effectively 'live') 3141 * in the flush code. 3142 * 3143 * In this situations nchain will be marked deleted and 3144 * insert before ochain. nchain must inherit certain features 3145 * of ochain. 3146 */ 3147 KKASSERT(trans->flags & HAMMER2_TRANS_ISFLUSH); 3148 KKASSERT(ochain->modify_xid < trans->sync_xid); 3149 KKASSERT(ochain->delete_xid > trans->sync_xid); 3150 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_FLUSH_TEMPORARY); 3151 hammer2_chain_insert(above, ochain, nchain, 0, 0); 3152 3153 if ((ochain->flags & HAMMER2_CHAIN_DELETED) && 3154 ochain->modify_xid < trans->sync_xid) { 3155 nchain->delete_xid = ochain->delete_xid; 3156 ochain->delete_xid = trans->sync_xid; 3157 } else if (ochain->modify_xid > trans->sync_xid) { 3158 nchain->delete_xid = ochain->modify_xid; 3159 } 3160 } else if (nchain->flags & HAMMER2_CHAIN_DELETED) { 3161 /* 3162 * ochain is 'live' with respect to not having been D-D'd, 3163 * but is flagged DELETED. Sometimes updates to deleted 3164 * chains must be allowed due to references which still exist 3165 * on those chains, or due to a flush trying to retire a 3166 * logical buffer cache buffer. 3167 * 3168 * In this situation the D-D operates normally, except 3169 * ochain has already been deleted and nchain is also 3170 * marked deleted. 3171 */ 3172 hammer2_chain_insert(above, ochain, nchain, 0, 0); 3173 nchain->delete_xid = trans->sync_xid; 3174 } else { 3175 /* 3176 * Normal case, delete-duplicate deletes ochain and nchain 3177 * is the new live chain. 3178 */ 3179 _hammer2_chain_delete_helper(trans, above, ochain); 3180 hammer2_chain_insert(above, ochain, nchain, 3181 HAMMER2_CHAIN_INSERT_LIVE, 0); 3182 } 3183 spin_unlock(&above->cst.spin); 3184 3185 /* 3186 * ochain must be unlocked because ochain and nchain might share 3187 * a buffer cache buffer, so we need to release it so nchain can 3188 * potentially obtain it. 3189 */ 3190 hammer2_chain_setsubmod(trans, ochain); 3191 hammer2_chain_unlock(ochain); 3192 3193 /* 3194 * Finishing fixing up nchain. A new block will be allocated if 3195 * crossing a synchronization point (meta-data only). 3196 * 3197 * Calling hammer2_chain_modify() will update modify_xid to 3198 * (typically) trans->sync_xid. 3199 */ 3200 if (nchain->bref.type == HAMMER2_BREF_TYPE_DATA) { 3201 hammer2_chain_modify(trans, &nchain, 3202 HAMMER2_MODIFY_OPTDATA | 3203 HAMMER2_MODIFY_NOREALLOC | 3204 HAMMER2_MODIFY_INPLACE); 3205 } else if (nchain->flags & HAMMER2_CHAIN_INITIAL) { 3206 hammer2_chain_modify(trans, &nchain, 3207 HAMMER2_MODIFY_OPTDATA | 3208 HAMMER2_MODIFY_INPLACE); 3209 } else { 3210 hammer2_chain_modify(trans, &nchain, 3211 HAMMER2_MODIFY_INPLACE); 3212 } 3213 hammer2_chain_drop(nchain); 3214 3215 /* 3216 * Unconditionally set FLUSH_CREATE to force the parent blockrefs to 3217 * update as the chain_modify() above won't necessarily do it. 3218 */ 3219 if ((nchain->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0) { 3220 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_FLUSH_CREATE); 3221 hammer2_chain_ref(nchain); 3222 } 3223 3224 /* 3225 * If nchain is in a DELETED state we must set FLUSH_DELETE 3226 */ 3227 if (nchain->flags & HAMMER2_CHAIN_DELETED) 3228 KKASSERT((nchain->flags & HAMMER2_CHAIN_FLUSH_DELETE) == 0); 3229 #if 1 3230 if ((nchain->flags & HAMMER2_CHAIN_FLUSH_DELETE) == 0 && 3231 (nchain->flags & HAMMER2_CHAIN_DELETED)) { 3232 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_FLUSH_DELETE); 3233 hammer2_chain_ref(nchain); 3234 } 3235 #endif 3236 hammer2_chain_setsubmod(trans, nchain); 3237 *chainp = nchain; 3238 } 3239 3240 /* 3241 * Create an indirect block that covers one or more of the elements in the 3242 * current parent. Either returns the existing parent with no locking or 3243 * ref changes or returns the new indirect block locked and referenced 3244 * and leaving the original parent lock/ref intact as well. 3245 * 3246 * If an error occurs, NULL is returned and *errorp is set to the error. 3247 * 3248 * The returned chain depends on where the specified key falls. 3249 * 3250 * The key/keybits for the indirect mode only needs to follow three rules: 3251 * 3252 * (1) That all elements underneath it fit within its key space and 3253 * 3254 * (2) That all elements outside it are outside its key space. 3255 * 3256 * (3) When creating the new indirect block any elements in the current 3257 * parent that fit within the new indirect block's keyspace must be 3258 * moved into the new indirect block. 3259 * 3260 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3261 * keyspace the the current parent, but lookup/iteration rules will 3262 * ensure (and must ensure) that rule (2) for all parents leading up 3263 * to the nearest inode or the root volume header is adhered to. This 3264 * is accomplished by always recursing through matching keyspaces in 3265 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3266 * 3267 * The current implementation calculates the current worst-case keyspace by 3268 * iterating the current parent and then divides it into two halves, choosing 3269 * whichever half has the most elements (not necessarily the half containing 3270 * the requested key). 3271 * 3272 * We can also opt to use the half with the least number of elements. This 3273 * causes lower-numbered keys (aka logical file offsets) to recurse through 3274 * fewer indirect blocks and higher-numbered keys to recurse through more. 3275 * This also has the risk of not moving enough elements to the new indirect 3276 * block and being forced to create several indirect blocks before the element 3277 * can be inserted. 3278 * 3279 * Must be called with an exclusively locked parent. 3280 */ 3281 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3282 hammer2_key_t *keyp, int keybits, 3283 hammer2_blockref_t *base, int count); 3284 static int hammer2_chain_indkey_normal(hammer2_chain_t *parent, 3285 hammer2_key_t *keyp, int keybits, 3286 hammer2_blockref_t *base, int count); 3287 static 3288 hammer2_chain_t * 3289 hammer2_chain_create_indirect(hammer2_trans_t *trans, hammer2_chain_t *parent, 3290 hammer2_key_t create_key, int create_bits, 3291 int for_type, int *errorp) 3292 { 3293 hammer2_mount_t *hmp; 3294 hammer2_chain_core_t *above; 3295 hammer2_chain_core_t *icore; 3296 hammer2_blockref_t *base; 3297 hammer2_blockref_t *bref; 3298 hammer2_blockref_t bcopy; 3299 hammer2_chain_t *chain; 3300 hammer2_chain_t *ichain; 3301 hammer2_chain_t dummy; 3302 hammer2_key_t key = create_key; 3303 hammer2_key_t key_beg; 3304 hammer2_key_t key_end; 3305 hammer2_key_t key_next; 3306 int keybits = create_bits; 3307 int count; 3308 int nbytes; 3309 int cache_index; 3310 int loops; 3311 int reason; 3312 int generation; 3313 int maxloops = 300000; 3314 int retry_same; 3315 int wasdup; 3316 3317 /* 3318 * Calculate the base blockref pointer or NULL if the chain 3319 * is known to be empty. We need to calculate the array count 3320 * for RB lookups either way. 3321 */ 3322 hmp = parent->hmp; 3323 *errorp = 0; 3324 KKASSERT(ccms_thread_lock_owned(&parent->core->cst)); 3325 above = parent->core; 3326 3327 /*hammer2_chain_modify(trans, &parent, HAMMER2_MODIFY_OPTDATA);*/ 3328 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 3329 base = NULL; 3330 3331 switch(parent->bref.type) { 3332 case HAMMER2_BREF_TYPE_INODE: 3333 count = HAMMER2_SET_COUNT; 3334 break; 3335 case HAMMER2_BREF_TYPE_INDIRECT: 3336 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3337 count = parent->bytes / sizeof(hammer2_blockref_t); 3338 break; 3339 case HAMMER2_BREF_TYPE_VOLUME: 3340 count = HAMMER2_SET_COUNT; 3341 break; 3342 case HAMMER2_BREF_TYPE_FREEMAP: 3343 count = HAMMER2_SET_COUNT; 3344 break; 3345 default: 3346 panic("hammer2_chain_create_indirect: " 3347 "unrecognized blockref type: %d", 3348 parent->bref.type); 3349 count = 0; 3350 break; 3351 } 3352 } else { 3353 switch(parent->bref.type) { 3354 case HAMMER2_BREF_TYPE_INODE: 3355 base = &parent->data->ipdata.u.blockset.blockref[0]; 3356 count = HAMMER2_SET_COUNT; 3357 break; 3358 case HAMMER2_BREF_TYPE_INDIRECT: 3359 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3360 base = &parent->data->npdata[0]; 3361 count = parent->bytes / sizeof(hammer2_blockref_t); 3362 break; 3363 case HAMMER2_BREF_TYPE_VOLUME: 3364 base = &hmp->voldata.sroot_blockset.blockref[0]; 3365 count = HAMMER2_SET_COUNT; 3366 break; 3367 case HAMMER2_BREF_TYPE_FREEMAP: 3368 base = &hmp->voldata.freemap_blockset.blockref[0]; 3369 count = HAMMER2_SET_COUNT; 3370 break; 3371 default: 3372 panic("hammer2_chain_create_indirect: " 3373 "unrecognized blockref type: %d", 3374 parent->bref.type); 3375 count = 0; 3376 break; 3377 } 3378 } 3379 3380 /* 3381 * dummy used in later chain allocation (no longer used for lookups). 3382 */ 3383 bzero(&dummy, sizeof(dummy)); 3384 dummy.delete_xid = HAMMER2_XID_MAX; 3385 3386 /* 3387 * When creating an indirect block for a freemap node or leaf 3388 * the key/keybits must be fitted to static radix levels because 3389 * particular radix levels use particular reserved blocks in the 3390 * related zone. 3391 * 3392 * This routine calculates the key/radix of the indirect block 3393 * we need to create, and whether it is on the high-side or the 3394 * low-side. 3395 */ 3396 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3397 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3398 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 3399 base, count); 3400 } else { 3401 keybits = hammer2_chain_indkey_normal(parent, &key, keybits, 3402 base, count); 3403 } 3404 3405 /* 3406 * Normalize the key for the radix being represented, keeping the 3407 * high bits and throwing away the low bits. 3408 */ 3409 key &= ~(((hammer2_key_t)1 << keybits) - 1); 3410 3411 /* 3412 * How big should our new indirect block be? It has to be at least 3413 * as large as its parent. 3414 */ 3415 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) 3416 nbytes = HAMMER2_IND_BYTES_MIN; 3417 else 3418 nbytes = HAMMER2_IND_BYTES_MAX; 3419 if (nbytes < count * sizeof(hammer2_blockref_t)) 3420 nbytes = count * sizeof(hammer2_blockref_t); 3421 3422 /* 3423 * Ok, create our new indirect block 3424 */ 3425 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3426 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3427 dummy.bref.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 3428 } else { 3429 dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT; 3430 } 3431 dummy.bref.key = key; 3432 dummy.bref.keybits = keybits; 3433 dummy.bref.data_off = hammer2_getradix(nbytes); 3434 dummy.bref.methods = parent->bref.methods; 3435 3436 ichain = hammer2_chain_alloc(hmp, parent->pmp, trans, &dummy.bref); 3437 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 3438 hammer2_chain_core_alloc(trans, ichain, NULL); 3439 icore = ichain->core; 3440 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 3441 hammer2_chain_drop(ichain); /* excess ref from alloc */ 3442 3443 /* 3444 * We have to mark it modified to allocate its block, but use 3445 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 3446 * it won't be acted upon by the flush code. 3447 */ 3448 hammer2_chain_modify(trans, &ichain, HAMMER2_MODIFY_OPTDATA); 3449 3450 /* 3451 * Iterate the original parent and move the matching brefs into 3452 * the new indirect block. 3453 * 3454 * XXX handle flushes. 3455 */ 3456 key_beg = 0; 3457 key_end = HAMMER2_KEY_MAX; 3458 cache_index = 0; 3459 spin_lock(&above->cst.spin); 3460 loops = 0; 3461 reason = 0; 3462 retry_same = 0; 3463 3464 for (;;) { 3465 if (++loops > 100000) { 3466 spin_unlock(&above->cst.spin); 3467 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 3468 reason, parent, base, count, key_next); 3469 } 3470 3471 /* 3472 * NOTE: spinlock stays intact, returned chain (if not NULL) 3473 * is not referenced or locked which means that we 3474 * cannot safely check its flagged / deletion status 3475 * until we lock it. 3476 */ 3477 chain = hammer2_combined_find(parent, base, count, 3478 &cache_index, &key_next, 3479 key_beg, key_end, 3480 &bref); 3481 generation = above->generation; 3482 if (bref == NULL) 3483 break; 3484 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3485 3486 /* 3487 * Skip keys that are not within the key/radix of the new 3488 * indirect block. They stay in the parent. 3489 */ 3490 if ((~(((hammer2_key_t)1 << keybits) - 1) & 3491 (key ^ bref->key)) != 0) { 3492 goto next_key_spinlocked; 3493 } 3494 3495 /* 3496 * Load the new indirect block by acquiring the related 3497 * chains (potentially from media as it might not be 3498 * in-memory). Then move it to the new parent (ichain) 3499 * via DELETE-DUPLICATE. 3500 * 3501 * chain is referenced but not locked. We must lock the 3502 * chain to obtain definitive DUPLICATED/DELETED state 3503 */ 3504 if (chain) { 3505 /* 3506 * Use chain already present in the RBTREE 3507 */ 3508 hammer2_chain_ref(chain); 3509 wasdup = ((chain->flags & 3510 HAMMER2_CHAIN_DUPLICATED) != 0); 3511 spin_unlock(&above->cst.spin); 3512 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER | 3513 HAMMER2_RESOLVE_NOREF); 3514 } else { 3515 /* 3516 * Get chain for blockref element. _get returns NULL 3517 * on insertion race. 3518 */ 3519 bcopy = *bref; 3520 spin_unlock(&above->cst.spin); 3521 chain = hammer2_chain_get(parent, generation, &bcopy); 3522 if (chain == NULL) { 3523 reason = 1; 3524 spin_lock(&above->cst.spin); 3525 continue; 3526 } 3527 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 3528 reason = 2; 3529 hammer2_chain_drop(chain); 3530 spin_lock(&above->cst.spin); 3531 continue; 3532 } 3533 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER | 3534 HAMMER2_RESOLVE_NOREF); 3535 wasdup = 0; 3536 } 3537 3538 /* 3539 * This is always live so if the chain has been delete- 3540 * duplicated we raced someone and we have to retry. 3541 * 3542 * NOTE: Lookups can race delete-duplicate because 3543 * delete-duplicate does not lock the parent's core 3544 * (they just use the spinlock on the core). We must 3545 * check for races by comparing the DUPLICATED flag before 3546 * releasing the spinlock with the flag after locking the 3547 * chain. 3548 * 3549 * (note reversed logic for this one) 3550 */ 3551 if (chain->flags & HAMMER2_CHAIN_DELETED) { 3552 hammer2_chain_unlock(chain); 3553 if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) && 3554 wasdup == 0) { 3555 retry_same = 1; 3556 } 3557 goto next_key; 3558 } 3559 3560 /* 3561 * Shift the chain to the indirect block. 3562 * 3563 * WARNING! Can cause held-over chains to require a refactor. 3564 * Fortunately we have none (our locked chains are 3565 * passed into and modified by the call). 3566 */ 3567 hammer2_chain_delete(trans, chain, 0); 3568 hammer2_chain_duplicate(trans, &ichain, &chain, NULL, 0, 1); 3569 hammer2_chain_unlock(chain); 3570 KKASSERT(parent->refs > 0); 3571 chain = NULL; 3572 next_key: 3573 spin_lock(&above->cst.spin); 3574 next_key_spinlocked: 3575 if (--maxloops == 0) 3576 panic("hammer2_chain_create_indirect: maxloops"); 3577 reason = 4; 3578 if (retry_same == 0) { 3579 if (key_next == 0 || key_next > key_end) 3580 break; 3581 key_beg = key_next; 3582 } 3583 /* loop */ 3584 } 3585 spin_unlock(&above->cst.spin); 3586 3587 /* 3588 * Insert the new indirect block into the parent now that we've 3589 * cleared out some entries in the parent. We calculated a good 3590 * insertion index in the loop above (ichain->index). 3591 * 3592 * We don't have to set FLUSH_CREATE here because we mark ichain 3593 * modified down below (so the normal modified -> flush -> set-moved 3594 * sequence applies). 3595 * 3596 * The insertion shouldn't race as this is a completely new block 3597 * and the parent is locked. 3598 */ 3599 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 3600 hammer2_chain_insert(above, NULL, ichain, 3601 HAMMER2_CHAIN_INSERT_SPIN | 3602 HAMMER2_CHAIN_INSERT_LIVE, 3603 0); 3604 3605 /* 3606 * Mark the new indirect block modified after insertion, which 3607 * will propagate up through parent all the way to the root and 3608 * also allocate the physical block in ichain for our caller, 3609 * and assign ichain->data to a pre-zero'd space (because there 3610 * is not prior data to copy into it). 3611 */ 3612 /*hammer2_chain_modify(trans, &ichain, HAMMER2_MODIFY_OPTDATA);*/ 3613 hammer2_chain_setsubmod(trans, ichain); 3614 3615 /* 3616 * Figure out what to return. 3617 */ 3618 if (~(((hammer2_key_t)1 << keybits) - 1) & 3619 (create_key ^ key)) { 3620 /* 3621 * Key being created is outside the key range, 3622 * return the original parent. 3623 */ 3624 hammer2_chain_unlock(ichain); 3625 } else { 3626 /* 3627 * Otherwise its in the range, return the new parent. 3628 * (leave both the new and old parent locked). 3629 */ 3630 parent = ichain; 3631 } 3632 3633 return(parent); 3634 } 3635 3636 /* 3637 * Calculate the keybits and highside/lowside of the freemap node the 3638 * caller is creating. 3639 * 3640 * This routine will specify the next higher-level freemap key/radix 3641 * representing the lowest-ordered set. By doing so, eventually all 3642 * low-ordered sets will be moved one level down. 3643 * 3644 * We have to be careful here because the freemap reserves a limited 3645 * number of blocks for a limited number of levels. So we can't just 3646 * push indiscriminately. 3647 */ 3648 int 3649 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 3650 int keybits, hammer2_blockref_t *base, int count) 3651 { 3652 hammer2_chain_core_t *above; 3653 hammer2_chain_t *chain; 3654 hammer2_blockref_t *bref; 3655 hammer2_key_t key; 3656 hammer2_key_t key_beg; 3657 hammer2_key_t key_end; 3658 hammer2_key_t key_next; 3659 int cache_index; 3660 int locount; 3661 int hicount; 3662 int maxloops = 300000; 3663 3664 key = *keyp; 3665 above = parent->core; 3666 locount = 0; 3667 hicount = 0; 3668 keybits = 64; 3669 3670 /* 3671 * Calculate the range of keys in the array being careful to skip 3672 * slots which are overridden with a deletion. 3673 */ 3674 key_beg = 0; 3675 key_end = HAMMER2_KEY_MAX; 3676 cache_index = 0; 3677 spin_lock(&above->cst.spin); 3678 3679 for (;;) { 3680 if (--maxloops == 0) { 3681 panic("indkey_freemap shit %p %p:%d\n", 3682 parent, base, count); 3683 } 3684 chain = hammer2_combined_find(parent, base, count, 3685 &cache_index, &key_next, 3686 key_beg, key_end, 3687 &bref); 3688 3689 /* 3690 * Exhausted search 3691 */ 3692 if (bref == NULL) 3693 break; 3694 3695 /* 3696 * NOTE: No need to check DUPLICATED here because we do 3697 * not release the spinlock. 3698 */ 3699 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3700 if (key_next == 0 || key_next > key_end) 3701 break; 3702 key_beg = key_next; 3703 continue; 3704 } 3705 3706 /* 3707 * Use the full live (not deleted) element for the scan 3708 * iteration. HAMMER2 does not allow partial replacements. 3709 * 3710 * XXX should be built into hammer2_combined_find(). 3711 */ 3712 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3713 3714 if (keybits > bref->keybits) { 3715 key = bref->key; 3716 keybits = bref->keybits; 3717 } else if (keybits == bref->keybits && bref->key < key) { 3718 key = bref->key; 3719 } 3720 if (key_next == 0) 3721 break; 3722 key_beg = key_next; 3723 } 3724 spin_unlock(&above->cst.spin); 3725 3726 /* 3727 * Return the keybits for a higher-level FREEMAP_NODE covering 3728 * this node. 3729 */ 3730 switch(keybits) { 3731 case HAMMER2_FREEMAP_LEVEL0_RADIX: 3732 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 3733 break; 3734 case HAMMER2_FREEMAP_LEVEL1_RADIX: 3735 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 3736 break; 3737 case HAMMER2_FREEMAP_LEVEL2_RADIX: 3738 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 3739 break; 3740 case HAMMER2_FREEMAP_LEVEL3_RADIX: 3741 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 3742 break; 3743 case HAMMER2_FREEMAP_LEVEL4_RADIX: 3744 panic("hammer2_chain_indkey_freemap: level too high"); 3745 break; 3746 default: 3747 panic("hammer2_chain_indkey_freemap: bad radix"); 3748 break; 3749 } 3750 *keyp = key; 3751 3752 return (keybits); 3753 } 3754 3755 /* 3756 * Calculate the keybits and highside/lowside of the indirect block the 3757 * caller is creating. 3758 */ 3759 static int 3760 hammer2_chain_indkey_normal(hammer2_chain_t *parent, hammer2_key_t *keyp, 3761 int keybits, hammer2_blockref_t *base, int count) 3762 { 3763 hammer2_chain_core_t *above; 3764 hammer2_blockref_t *bref; 3765 hammer2_chain_t *chain; 3766 hammer2_key_t key_beg; 3767 hammer2_key_t key_end; 3768 hammer2_key_t key_next; 3769 hammer2_key_t key; 3770 int nkeybits; 3771 int locount; 3772 int hicount; 3773 int cache_index; 3774 int maxloops = 300000; 3775 3776 key = *keyp; 3777 above = parent->core; 3778 locount = 0; 3779 hicount = 0; 3780 3781 /* 3782 * Calculate the range of keys in the array being careful to skip 3783 * slots which are overridden with a deletion. Once the scan 3784 * completes we will cut the key range in half and shift half the 3785 * range into the new indirect block. 3786 */ 3787 key_beg = 0; 3788 key_end = HAMMER2_KEY_MAX; 3789 cache_index = 0; 3790 spin_lock(&above->cst.spin); 3791 3792 for (;;) { 3793 if (--maxloops == 0) { 3794 panic("indkey_freemap shit %p %p:%d\n", 3795 parent, base, count); 3796 } 3797 chain = hammer2_combined_find(parent, base, count, 3798 &cache_index, &key_next, 3799 key_beg, key_end, 3800 &bref); 3801 3802 /* 3803 * Exhausted search 3804 */ 3805 if (bref == NULL) 3806 break; 3807 3808 /* 3809 * NOTE: No need to check DUPLICATED here because we do 3810 * not release the spinlock. 3811 */ 3812 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3813 if (key_next == 0 || key_next > key_end) 3814 break; 3815 key_beg = key_next; 3816 continue; 3817 } 3818 3819 /* 3820 * Use the full live (not deleted) element for the scan 3821 * iteration. HAMMER2 does not allow partial replacements. 3822 * 3823 * XXX should be built into hammer2_combined_find(). 3824 */ 3825 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3826 3827 /* 3828 * Expand our calculated key range (key, keybits) to fit 3829 * the scanned key. nkeybits represents the full range 3830 * that we will later cut in half (two halves @ nkeybits - 1). 3831 */ 3832 nkeybits = keybits; 3833 if (nkeybits < bref->keybits) { 3834 if (bref->keybits > 64) { 3835 kprintf("bad bref chain %p bref %p\n", 3836 chain, bref); 3837 Debugger("fubar"); 3838 } 3839 nkeybits = bref->keybits; 3840 } 3841 while (nkeybits < 64 && 3842 (~(((hammer2_key_t)1 << nkeybits) - 1) & 3843 (key ^ bref->key)) != 0) { 3844 ++nkeybits; 3845 } 3846 3847 /* 3848 * If the new key range is larger we have to determine 3849 * which side of the new key range the existing keys fall 3850 * under by checking the high bit, then collapsing the 3851 * locount into the hicount or vise-versa. 3852 */ 3853 if (keybits != nkeybits) { 3854 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 3855 hicount += locount; 3856 locount = 0; 3857 } else { 3858 locount += hicount; 3859 hicount = 0; 3860 } 3861 keybits = nkeybits; 3862 } 3863 3864 /* 3865 * The newly scanned key will be in the lower half or the 3866 * upper half of the (new) key range. 3867 */ 3868 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 3869 ++hicount; 3870 else 3871 ++locount; 3872 3873 if (key_next == 0) 3874 break; 3875 key_beg = key_next; 3876 } 3877 spin_unlock(&above->cst.spin); 3878 bref = NULL; /* now invalid (safety) */ 3879 3880 /* 3881 * Adjust keybits to represent half of the full range calculated 3882 * above (radix 63 max) 3883 */ 3884 --keybits; 3885 3886 /* 3887 * Select whichever half contains the most elements. Theoretically 3888 * we can select either side as long as it contains at least one 3889 * element (in order to ensure that a free slot is present to hold 3890 * the indirect block). 3891 */ 3892 if (hammer2_indirect_optimize) { 3893 /* 3894 * Insert node for least number of keys, this will arrange 3895 * the first few blocks of a large file or the first few 3896 * inodes in a directory with fewer indirect blocks when 3897 * created linearly. 3898 */ 3899 if (hicount < locount && hicount != 0) 3900 key |= (hammer2_key_t)1 << keybits; 3901 else 3902 key &= ~(hammer2_key_t)1 << keybits; 3903 } else { 3904 /* 3905 * Insert node for most number of keys, best for heavily 3906 * fragmented files. 3907 */ 3908 if (hicount > locount) 3909 key |= (hammer2_key_t)1 << keybits; 3910 else 3911 key &= ~(hammer2_key_t)1 << keybits; 3912 } 3913 *keyp = key; 3914 3915 return (keybits); 3916 } 3917 3918 /* 3919 * Sets CHAIN_DELETED and CHAIN_FLUSH_DELETE in the chain being deleted and 3920 * set chain->delete_xid. The chain is not actually marked possibly-free 3921 * in the freemap until the deletion is completely flushed out (because 3922 * a flush which doesn't cover the entire deletion is flushing the deleted 3923 * chain as if it were live). 3924 * 3925 * This function does NOT generate a modification to the parent. It 3926 * would be nearly impossible to figure out which parent to modify anyway. 3927 * Such modifications are handled top-down by the flush code and are 3928 * properly merged using the flush synchronization point. 3929 * 3930 * The find/get code will properly overload the RBTREE check on top of 3931 * the bref check to detect deleted entries. 3932 * 3933 * This function is NOT recursive. Any entity already pushed into the 3934 * chain (such as an inode) may still need visibility into its contents, 3935 * as well as the ability to read and modify the contents. For example, 3936 * for an unlinked file which is still open. 3937 * 3938 * NOTE: Deletions normally do not occur in the middle of a duplication 3939 * chain but we use a trick for hardlink migration that refactors 3940 * the originating inode without deleting it, so we make no assumptions 3941 * here. 3942 */ 3943 void 3944 hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *chain, int flags) 3945 { 3946 KKASSERT(ccms_thread_lock_owned(&chain->core->cst)); 3947 3948 /* 3949 * Nothing to do if already marked. 3950 */ 3951 if (chain->flags & HAMMER2_CHAIN_DELETED) 3952 return; 3953 3954 /* 3955 * The setting of DELETED causes finds, lookups, and _next iterations 3956 * to no longer recognize the chain. RB_SCAN()s will still have 3957 * visibility (needed for flush serialization points). 3958 * 3959 * We need the spinlock on the core whos RBTREE contains chain 3960 * to protect against races. 3961 */ 3962 spin_lock(&chain->above->cst.spin); 3963 _hammer2_chain_delete_helper(trans, chain->above, chain); 3964 spin_unlock(&chain->above->cst.spin); 3965 3966 hammer2_chain_setsubmod(trans, chain); 3967 } 3968 3969 /* 3970 * Returns the index of the nearest element in the blockref array >= elm. 3971 * Returns (count) if no element could be found. If delete_filter is non-zero 3972 * the scan filters out any blockrefs which match deleted chains on dbtree. 3973 * 3974 * Sets *key_nextp to the next key for loop purposes but does not modify 3975 * it if the next key would be higher than the current value of *key_nextp. 3976 * Note that *key_nexp can overflow to 0, which should be tested by the 3977 * caller. 3978 * 3979 * (*cache_indexp) is a heuristic and can be any value without effecting 3980 * the result. 3981 * 3982 * The spin lock on the related chain must be held. 3983 */ 3984 int 3985 hammer2_base_find(hammer2_chain_t *parent, 3986 hammer2_blockref_t *base, int count, 3987 int *cache_indexp, hammer2_key_t *key_nextp, 3988 hammer2_key_t key_beg, hammer2_key_t key_end, 3989 int delete_filter) 3990 { 3991 hammer2_chain_core_t *core = parent->core; 3992 hammer2_blockref_t *scan; 3993 hammer2_key_t scan_end; 3994 int i; 3995 int limit; 3996 3997 /* 3998 * Require the live chain's already have their core's counted 3999 * so we can optimize operations. 4000 */ 4001 KKASSERT((parent->flags & HAMMER2_CHAIN_DUPLICATED) || 4002 core->flags & HAMMER2_CORE_COUNTEDBREFS); 4003 4004 /* 4005 * Degenerate case 4006 */ 4007 if (count == 0 || base == NULL) 4008 return(count); 4009 4010 /* 4011 * Sequential optimization using *cache_indexp. This is the most 4012 * likely scenario. 4013 * 4014 * We can avoid trailing empty entries on live chains, otherwise 4015 * we might have to check the whole block array. 4016 */ 4017 i = *cache_indexp; 4018 cpu_ccfence(); 4019 if (parent->flags & HAMMER2_CHAIN_DUPLICATED) 4020 limit = count; 4021 else 4022 limit = core->live_zero; 4023 if (i >= limit) 4024 i = limit - 1; 4025 if (i < 0) 4026 i = 0; 4027 KKASSERT(i < count); 4028 4029 /* 4030 * Search backwards 4031 */ 4032 scan = &base[i]; 4033 while (i > 0 && (scan->type == 0 || scan->key > key_beg)) { 4034 --scan; 4035 --i; 4036 } 4037 *cache_indexp = i; 4038 4039 /* 4040 * Search forwards, stop when we find a scan element which 4041 * encloses the key or until we know that there are no further 4042 * elements. 4043 */ 4044 while (i < count) { 4045 if (scan->type != 0) { 4046 scan_end = scan->key + 4047 ((hammer2_key_t)1 << scan->keybits) - 1; 4048 if (scan->key > key_beg || scan_end >= key_beg) { 4049 /* 4050 * Check to see if the entry is covered by 4051 * a deleted chain and ignore the entry if 4052 * it is and delete_filter != 0. 4053 */ 4054 if (delete_filter == 0) 4055 break; 4056 if (hammer2_chain_find_deleted( 4057 parent, scan->key, scan_end) == NULL) { 4058 break; 4059 } 4060 } 4061 } 4062 if (i >= limit) 4063 return (count); 4064 ++scan; 4065 ++i; 4066 } 4067 if (i != count) { 4068 *cache_indexp = i; 4069 if (i >= limit) { 4070 i = count; 4071 } else { 4072 scan_end = scan->key + 4073 ((hammer2_key_t)1 << scan->keybits); 4074 if (scan_end && (*key_nextp > scan_end || 4075 *key_nextp == 0)) { 4076 *key_nextp = scan_end; 4077 } 4078 } 4079 } 4080 return (i); 4081 } 4082 4083 /* 4084 * Do a combined search and return the next match either from the blockref 4085 * array or from the in-memory chain. Sets *bresp to the returned bref in 4086 * both cases, or sets it to NULL if the search exhausted. Only returns 4087 * a non-NULL chain if the search matched from the in-memory chain. 4088 * 4089 * When no in-memory chain has been found and a non-NULL bref is returned 4090 * in *bresp. 4091 * 4092 * Must be called with above's spinlock held. Spinlock remains held 4093 * through the operation. 4094 * 4095 * The returned chain is not locked or referenced. Use the returned bref 4096 * to determine if the search exhausted or not. Iterate if the base find 4097 * is chosen but matches a deleted chain. 4098 */ 4099 static hammer2_chain_t * 4100 hammer2_combined_find(hammer2_chain_t *parent, 4101 hammer2_blockref_t *base, int count, 4102 int *cache_indexp, hammer2_key_t *key_nextp, 4103 hammer2_key_t key_beg, hammer2_key_t key_end, 4104 hammer2_blockref_t **bresp) 4105 { 4106 hammer2_blockref_t *bref; 4107 hammer2_chain_t *chain; 4108 int i; 4109 4110 /* 4111 * Lookup in block array and in rbtree. 4112 */ 4113 *key_nextp = key_end + 1; 4114 i = hammer2_base_find(parent, base, count, cache_indexp, 4115 key_nextp, key_beg, key_end, 1); 4116 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 4117 4118 /* 4119 * Neither matched 4120 */ 4121 if (i == count && chain == NULL) { 4122 *bresp = NULL; 4123 return(NULL); 4124 } 4125 4126 /* 4127 * Only chain matched. 4128 */ 4129 if (i == count) { 4130 bref = &chain->bref; 4131 goto found; 4132 } 4133 4134 /* 4135 * Only blockref matched. 4136 */ 4137 if (chain == NULL) { 4138 bref = &base[i]; 4139 goto found; 4140 } 4141 4142 /* 4143 * Both in-memory and blockref matched, select the nearer element. 4144 * 4145 * If both are flush with the left-hand side or both are the 4146 * same distance away, select the chain. In this situation the 4147 * chain must have been loaded from the matching blockmap. 4148 */ 4149 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 4150 chain->bref.key == base[i].key) { 4151 KKASSERT(chain->bref.key == base[i].key); 4152 if ((chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) { 4153 kprintf("chain not bmapped %p.%d %08x\n", 4154 chain, chain->bref.type, chain->flags); 4155 kprintf("in chain mod/del %08x %08x\n", 4156 chain->modify_xid, chain->delete_xid); 4157 kprintf("and updlo/hi %08x %08x\n", 4158 chain->update_xlo, chain->update_xhi); 4159 } 4160 KKASSERT(chain->flags & HAMMER2_CHAIN_BMAPPED); 4161 bref = &chain->bref; 4162 goto found; 4163 } 4164 4165 /* 4166 * Select the nearer key 4167 */ 4168 if (chain->bref.key < base[i].key) { 4169 bref = &chain->bref; 4170 } else { 4171 bref = &base[i]; 4172 chain = NULL; 4173 } 4174 4175 /* 4176 * If the bref is out of bounds we've exhausted our search. 4177 */ 4178 found: 4179 if (bref->key > key_end) { 4180 *bresp = NULL; 4181 chain = NULL; 4182 } else { 4183 *bresp = bref; 4184 } 4185 return(chain); 4186 } 4187 4188 /* 4189 * Locate the specified block array element and delete it. The element 4190 * must exist. 4191 * 4192 * The spin lock on the related chain must be held. 4193 * 4194 * NOTE: live_count was adjusted when the chain was deleted, so it does not 4195 * need to be adjusted when we commit the media change. 4196 */ 4197 void 4198 hammer2_base_delete(hammer2_trans_t *trans, hammer2_chain_t *parent, 4199 hammer2_blockref_t *base, int count, 4200 int *cache_indexp, hammer2_chain_t *child) 4201 { 4202 hammer2_blockref_t *elm = &child->bref; 4203 hammer2_chain_core_t *core = parent->core; 4204 hammer2_key_t key_next; 4205 int i; 4206 4207 /* 4208 * Delete element. Expect the element to exist. 4209 * 4210 * XXX see caller, flush code not yet sophisticated enough to prevent 4211 * re-flushed in some cases. 4212 */ 4213 key_next = 0; /* max range */ 4214 i = hammer2_base_find(parent, base, count, cache_indexp, 4215 &key_next, elm->key, elm->key, 0); 4216 if (i == count || base[i].type == 0 || 4217 base[i].key != elm->key || base[i].keybits != elm->keybits) { 4218 spin_unlock(&core->cst.spin); 4219 panic("delete base %p element not found at %d/%d elm %p\n" 4220 "child ino_reason=%08x\n", 4221 base, i, count, elm, 4222 child->inode_reason); 4223 return; 4224 } 4225 bzero(&base[i], sizeof(*base)); 4226 4227 /* 4228 * We can only optimize core->live_zero for live chains. 4229 */ 4230 if ((parent->flags & HAMMER2_CHAIN_DUPLICATED) == 0) { 4231 if (core->live_zero == i + 1) { 4232 while (--i >= 0 && base[i].type == 0) 4233 ; 4234 core->live_zero = i + 1; 4235 } 4236 } 4237 } 4238 4239 /* 4240 * Insert the specified element. The block array must not already have the 4241 * element and must have space available for the insertion. 4242 * 4243 * The spin lock on the related chain must be held. 4244 * 4245 * NOTE: live_count was adjusted when the chain was deleted, so it does not 4246 * need to be adjusted when we commit the media change. 4247 */ 4248 void 4249 hammer2_base_insert(hammer2_trans_t *trans __unused, hammer2_chain_t *parent, 4250 hammer2_blockref_t *base, int count, 4251 int *cache_indexp, hammer2_chain_t *child) 4252 { 4253 hammer2_blockref_t *elm = &child->bref; 4254 hammer2_chain_core_t *core = parent->core; 4255 hammer2_key_t key_next; 4256 hammer2_key_t xkey; 4257 int i; 4258 int j; 4259 int k; 4260 int l; 4261 int u = 1; 4262 4263 /* 4264 * Insert new element. Expect the element to not already exist 4265 * unless we are replacing it. 4266 * 4267 * XXX see caller, flush code not yet sophisticated enough to prevent 4268 * re-flushed in some cases. 4269 */ 4270 key_next = 0; /* max range */ 4271 i = hammer2_base_find(parent, base, count, cache_indexp, 4272 &key_next, elm->key, elm->key, 0); 4273 4274 /* 4275 * Shortcut fill optimization, typical ordered insertion(s) may not 4276 * require a search. 4277 */ 4278 KKASSERT(i >= 0 && i <= count); 4279 4280 /* 4281 * We can only optimize core->live_zero for live chains. 4282 */ 4283 if (i == count && core->live_zero < count) { 4284 if ((parent->flags & HAMMER2_CHAIN_DUPLICATED) == 0) { 4285 i = core->live_zero++; 4286 base[i] = *elm; 4287 return; 4288 } 4289 } 4290 4291 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 4292 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 4293 if (child->flags & HAMMER2_CHAIN_FLUSH_TEMPORARY) { 4294 kprintf("child %p special replace\n", child); 4295 base[i] = *elm; 4296 return; 4297 } else { 4298 spin_unlock(&core->cst.spin); 4299 panic("insert base %p overlapping " 4300 "elements at %d elm %p\n", 4301 base, i, elm); 4302 } 4303 } 4304 4305 /* 4306 * Try to find an empty slot before or after. 4307 */ 4308 j = i; 4309 k = i; 4310 while (j > 0 || k < count) { 4311 --j; 4312 if (j >= 0 && base[j].type == 0) { 4313 if (j == i - 1) { 4314 base[j] = *elm; 4315 } else { 4316 bcopy(&base[j+1], &base[j], 4317 (i - j - 1) * sizeof(*base)); 4318 base[i - 1] = *elm; 4319 } 4320 goto validate; 4321 } 4322 ++k; 4323 if (k < count && base[k].type == 0) { 4324 bcopy(&base[i], &base[i+1], 4325 (k - i) * sizeof(hammer2_blockref_t)); 4326 base[i] = *elm; 4327 4328 /* 4329 * We can only update core->live_zero for live 4330 * chains. 4331 */ 4332 if ((parent->flags & HAMMER2_CHAIN_DUPLICATED) == 0) { 4333 if (core->live_zero <= k) 4334 core->live_zero = k + 1; 4335 } 4336 u = 2; 4337 goto validate; 4338 } 4339 } 4340 panic("hammer2_base_insert: no room!"); 4341 4342 /* 4343 * Debugging 4344 */ 4345 validate: 4346 key_next = 0; 4347 for (l = 0; l < count; ++l) { 4348 if (base[l].type) { 4349 key_next = base[l].key + 4350 ((hammer2_key_t)1 << base[l].keybits) - 1; 4351 break; 4352 } 4353 } 4354 while (++l < count) { 4355 if (base[l].type) { 4356 if (base[l].key <= key_next) 4357 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 4358 key_next = base[l].key + 4359 ((hammer2_key_t)1 << base[l].keybits) - 1; 4360 4361 } 4362 } 4363 4364 } 4365 4366 #if 0 4367 4368 /* 4369 * Sort the blockref array for the chain. Used by the flush code to 4370 * sort the blockref[] array. 4371 * 4372 * The chain must be exclusively locked AND spin-locked. 4373 */ 4374 typedef hammer2_blockref_t *hammer2_blockref_p; 4375 4376 static 4377 int 4378 hammer2_base_sort_callback(const void *v1, const void *v2) 4379 { 4380 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 4381 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 4382 4383 /* 4384 * Make sure empty elements are placed at the end of the array 4385 */ 4386 if (bref1->type == 0) { 4387 if (bref2->type == 0) 4388 return(0); 4389 return(1); 4390 } else if (bref2->type == 0) { 4391 return(-1); 4392 } 4393 4394 /* 4395 * Sort by key 4396 */ 4397 if (bref1->key < bref2->key) 4398 return(-1); 4399 if (bref1->key > bref2->key) 4400 return(1); 4401 return(0); 4402 } 4403 4404 void 4405 hammer2_base_sort(hammer2_chain_t *chain) 4406 { 4407 hammer2_blockref_t *base; 4408 int count; 4409 4410 switch(chain->bref.type) { 4411 case HAMMER2_BREF_TYPE_INODE: 4412 /* 4413 * Special shortcut for embedded data returns the inode 4414 * itself. Callers must detect this condition and access 4415 * the embedded data (the strategy code does this for us). 4416 * 4417 * This is only applicable to regular files and softlinks. 4418 */ 4419 if (chain->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) 4420 return; 4421 base = &chain->data->ipdata.u.blockset.blockref[0]; 4422 count = HAMMER2_SET_COUNT; 4423 break; 4424 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 4425 case HAMMER2_BREF_TYPE_INDIRECT: 4426 /* 4427 * Optimize indirect blocks in the INITIAL state to avoid 4428 * I/O. 4429 */ 4430 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 4431 base = &chain->data->npdata[0]; 4432 count = chain->bytes / sizeof(hammer2_blockref_t); 4433 break; 4434 case HAMMER2_BREF_TYPE_VOLUME: 4435 base = &chain->hmp->voldata.sroot_blockset.blockref[0]; 4436 count = HAMMER2_SET_COUNT; 4437 break; 4438 case HAMMER2_BREF_TYPE_FREEMAP: 4439 base = &chain->hmp->voldata.freemap_blockset.blockref[0]; 4440 count = HAMMER2_SET_COUNT; 4441 break; 4442 default: 4443 panic("hammer2_chain_lookup: unrecognized blockref type: %d", 4444 chain->bref.type); 4445 base = NULL; /* safety */ 4446 count = 0; /* safety */ 4447 } 4448 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 4449 } 4450 4451 #endif 4452 4453 /* 4454 * Chain memory management 4455 */ 4456 void 4457 hammer2_chain_wait(hammer2_chain_t *chain) 4458 { 4459 tsleep(chain, 0, "chnflw", 1); 4460 } 4461 4462 /* 4463 * chain may have been moved around by the create. 4464 */ 4465 void 4466 hammer2_chain_refactor(hammer2_chain_t **chainp) 4467 { 4468 hammer2_chain_t *chain = *chainp; 4469 hammer2_chain_core_t *core; 4470 4471 core = chain->core; 4472 while (chain->flags & HAMMER2_CHAIN_DUPLICATED) { 4473 spin_lock(&core->cst.spin); 4474 chain = TAILQ_NEXT(chain, core_entry); 4475 while (chain->flags & HAMMER2_CHAIN_DUPLICATED) 4476 chain = TAILQ_NEXT(chain, core_entry); 4477 hammer2_chain_ref(chain); 4478 spin_unlock(&core->cst.spin); 4479 KKASSERT(chain->core == core); 4480 4481 hammer2_chain_unlock(*chainp); 4482 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS | 4483 HAMMER2_RESOLVE_NOREF); /* eat ref */ 4484 *chainp = chain; 4485 } 4486 } 4487