1 /* 2 * Copyright (c) 2011-2018 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/kern_syscall.h> 61 #include <sys/uuid.h> 62 63 #include <crypto/sha2/sha2.h> 64 65 #include "hammer2.h" 66 67 static hammer2_chain_t *hammer2_chain_create_indirect( 68 hammer2_chain_t *parent, 69 hammer2_key_t key, int keybits, 70 hammer2_tid_t mtid, int for_type, int *errorp); 71 static hammer2_io_t *hammer2_chain_drop_data(hammer2_chain_t *chain); 72 static hammer2_chain_t *hammer2_combined_find( 73 hammer2_chain_t *parent, 74 hammer2_blockref_t *base, int count, 75 hammer2_key_t *key_nextp, 76 hammer2_key_t key_beg, hammer2_key_t key_end, 77 hammer2_blockref_t **bresp); 78 79 static struct krate krate_h2me = { .freq = 1 }; 80 81 /* 82 * Basic RBTree for chains (core->rbtree and core->dbtree). Chains cannot 83 * overlap in the RB trees. Deleted chains are moved from rbtree to either 84 * dbtree or to dbq. 85 * 86 * Chains in delete-duplicate sequences can always iterate through core_entry 87 * to locate the live version of the chain. 88 */ 89 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 90 91 int 92 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 93 { 94 hammer2_key_t c1_beg; 95 hammer2_key_t c1_end; 96 hammer2_key_t c2_beg; 97 hammer2_key_t c2_end; 98 99 /* 100 * Compare chains. Overlaps are not supposed to happen and catch 101 * any software issues early we count overlaps as a match. 102 */ 103 c1_beg = chain1->bref.key; 104 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 105 c2_beg = chain2->bref.key; 106 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 107 108 if (c1_end < c2_beg) /* fully to the left */ 109 return(-1); 110 if (c1_beg > c2_end) /* fully to the right */ 111 return(1); 112 return(0); /* overlap (must not cross edge boundary) */ 113 } 114 115 /* 116 * Assert that a chain has no media data associated with it. 117 */ 118 static __inline void 119 hammer2_chain_assert_no_data(hammer2_chain_t *chain) 120 { 121 KKASSERT(chain->dio == NULL); 122 if (chain->bref.type != HAMMER2_BREF_TYPE_VOLUME && 123 chain->bref.type != HAMMER2_BREF_TYPE_FREEMAP && 124 chain->data) { 125 panic("hammer2_assert_no_data: chain %p still has data", chain); 126 } 127 } 128 129 /* 130 * Make a chain visible to the flusher. The flusher needs to be able to 131 * do flushes of subdirectory chains or single files so it does a top-down 132 * recursion using the ONFLUSH flag for the recursion. It locates MODIFIED 133 * or UPDATE chains and flushes back up the chain to the volume root. 134 * 135 * This routine sets ONFLUSH upward until it hits the volume root. For 136 * simplicity we ignore PFSROOT boundaries whos rules can be complex. 137 * Extra ONFLUSH flagging doesn't hurt the filesystem. 138 */ 139 void 140 hammer2_chain_setflush(hammer2_chain_t *chain) 141 { 142 hammer2_chain_t *parent; 143 144 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 145 hammer2_spin_sh(&chain->core.spin); 146 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 147 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 148 if ((parent = chain->parent) == NULL) 149 break; 150 hammer2_spin_sh(&parent->core.spin); 151 hammer2_spin_unsh(&chain->core.spin); 152 chain = parent; 153 } 154 hammer2_spin_unsh(&chain->core.spin); 155 } 156 } 157 158 /* 159 * Allocate a new disconnected chain element representing the specified 160 * bref. chain->refs is set to 1 and the passed bref is copied to 161 * chain->bref. chain->bytes is derived from the bref. 162 * 163 * chain->pmp inherits pmp unless the chain is an inode (other than the 164 * super-root inode). 165 * 166 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 167 */ 168 hammer2_chain_t * 169 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 170 hammer2_blockref_t *bref) 171 { 172 hammer2_chain_t *chain; 173 u_int bytes; 174 175 /* 176 * Special case - radix of 0 indicates a chain that does not 177 * need a data reference (context is completely embedded in the 178 * bref). 179 */ 180 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 181 bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 182 else 183 bytes = 0; 184 185 atomic_add_long(&hammer2_chain_allocs, 1); 186 187 /* 188 * Construct the appropriate system structure. 189 */ 190 switch(bref->type) { 191 case HAMMER2_BREF_TYPE_DIRENT: 192 case HAMMER2_BREF_TYPE_INODE: 193 case HAMMER2_BREF_TYPE_INDIRECT: 194 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 195 case HAMMER2_BREF_TYPE_DATA: 196 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 197 /* 198 * Chain's are really only associated with the hmp but we 199 * maintain a pmp association for per-mount memory tracking 200 * purposes. The pmp can be NULL. 201 */ 202 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 203 break; 204 case HAMMER2_BREF_TYPE_VOLUME: 205 case HAMMER2_BREF_TYPE_FREEMAP: 206 /* 207 * Only hammer2_chain_bulksnap() calls this function with these 208 * types. 209 */ 210 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 211 break; 212 default: 213 chain = NULL; 214 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 215 bref->type); 216 } 217 218 /* 219 * Initialize the new chain structure. pmp must be set to NULL for 220 * chains belonging to the super-root topology of a device mount. 221 */ 222 if (pmp == hmp->spmp) 223 chain->pmp = NULL; 224 else 225 chain->pmp = pmp; 226 227 chain->hmp = hmp; 228 chain->bref = *bref; 229 chain->bytes = bytes; 230 chain->refs = 1; 231 chain->flags = HAMMER2_CHAIN_ALLOCATED; 232 lockinit(&chain->diolk, "chdio", 0, 0); 233 234 /* 235 * Set the PFS boundary flag if this chain represents a PFS root. 236 */ 237 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 238 atomic_set_int(&chain->flags, HAMMER2_CHAIN_PFSBOUNDARY); 239 hammer2_chain_core_init(chain); 240 241 return (chain); 242 } 243 244 /* 245 * Initialize a chain's core structure. This structure used to be allocated 246 * but is now embedded. 247 * 248 * The core is not locked. No additional refs on the chain are made. 249 * (trans) must not be NULL if (core) is not NULL. 250 */ 251 void 252 hammer2_chain_core_init(hammer2_chain_t *chain) 253 { 254 /* 255 * Fresh core under nchain (no multi-homing of ochain's 256 * sub-tree). 257 */ 258 RB_INIT(&chain->core.rbtree); /* live chains */ 259 hammer2_mtx_init(&chain->lock, "h2chain"); 260 } 261 262 /* 263 * Add a reference to a chain element, preventing its destruction. 264 * 265 * (can be called with spinlock held) 266 */ 267 void 268 hammer2_chain_ref(hammer2_chain_t *chain) 269 { 270 if (atomic_fetchadd_int(&chain->refs, 1) == 0) { 271 /* 272 * Just flag that the chain was used and should be recycled 273 * on the LRU if it encounters it later. 274 */ 275 if (chain->flags & HAMMER2_CHAIN_ONLRU) 276 atomic_set_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 277 278 #if 0 279 /* 280 * REMOVED - reduces contention, lru_list is more heuristical 281 * now. 282 * 283 * 0->non-zero transition must ensure that chain is removed 284 * from the LRU list. 285 * 286 * NOTE: Already holding lru_spin here so we cannot call 287 * hammer2_chain_ref() to get it off lru_list, do 288 * it manually. 289 */ 290 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 291 hammer2_pfs_t *pmp = chain->pmp; 292 hammer2_spin_ex(&pmp->lru_spin); 293 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 294 atomic_add_int(&pmp->lru_count, -1); 295 atomic_clear_int(&chain->flags, 296 HAMMER2_CHAIN_ONLRU); 297 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 298 } 299 hammer2_spin_unex(&pmp->lru_spin); 300 } 301 #endif 302 } 303 } 304 305 /* 306 * Ref a locked chain and force the data to be held across an unlock. 307 * Chain must be currently locked. The user of the chain who desires 308 * to release the hold must call hammer2_chain_lock_unhold() to relock 309 * and unhold the chain, then unlock normally, or may simply call 310 * hammer2_chain_drop_unhold() (which is safer against deadlocks). 311 */ 312 void 313 hammer2_chain_ref_hold(hammer2_chain_t *chain) 314 { 315 atomic_add_int(&chain->lockcnt, 1); 316 hammer2_chain_ref(chain); 317 } 318 319 /* 320 * Insert the chain in the core rbtree. 321 * 322 * Normal insertions are placed in the live rbtree. Insertion of a deleted 323 * chain is a special case used by the flush code that is placed on the 324 * unstaged deleted list to avoid confusing the live view. 325 */ 326 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 327 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 328 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 329 330 static 331 int 332 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 333 int flags, int generation) 334 { 335 hammer2_chain_t *xchain; 336 int error = 0; 337 338 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 339 hammer2_spin_ex(&parent->core.spin); 340 341 /* 342 * Interlocked by spinlock, check for race 343 */ 344 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 345 parent->core.generation != generation) { 346 error = HAMMER2_ERROR_EAGAIN; 347 goto failed; 348 } 349 350 /* 351 * Insert chain 352 */ 353 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 354 KASSERT(xchain == NULL, 355 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 356 chain, xchain, chain->bref.key)); 357 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 358 chain->parent = parent; 359 ++parent->core.chain_count; 360 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 361 362 /* 363 * We have to keep track of the effective live-view blockref count 364 * so the create code knows when to push an indirect block. 365 */ 366 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 367 atomic_add_int(&parent->core.live_count, 1); 368 failed: 369 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 370 hammer2_spin_unex(&parent->core.spin); 371 return error; 372 } 373 374 /* 375 * Drop the caller's reference to the chain. When the ref count drops to 376 * zero this function will try to disassociate the chain from its parent and 377 * deallocate it, then recursely drop the parent using the implied ref 378 * from the chain's chain->parent. 379 * 380 * Nobody should own chain's mutex on the 1->0 transition, unless this drop 381 * races an acquisition by another cpu. Therefore we can loop if we are 382 * unable to acquire the mutex, and refs is unlikely to be 1 unless we again 383 * race against another drop. 384 */ 385 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 386 int depth); 387 static void hammer2_chain_lru_flush(hammer2_pfs_t *pmp); 388 389 void 390 hammer2_chain_drop(hammer2_chain_t *chain) 391 { 392 u_int refs; 393 394 if (hammer2_debug & 0x200000) 395 Debugger("drop"); 396 397 KKASSERT(chain->refs > 0); 398 399 while (chain) { 400 refs = chain->refs; 401 cpu_ccfence(); 402 KKASSERT(refs > 0); 403 404 if (refs == 1) { 405 if (hammer2_mtx_ex_try(&chain->lock) == 0) 406 chain = hammer2_chain_lastdrop(chain, 0); 407 /* retry the same chain, or chain from lastdrop */ 408 } else { 409 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 410 break; 411 /* retry the same chain */ 412 } 413 cpu_pause(); 414 } 415 } 416 417 /* 418 * Unhold a held and probably not-locked chain, ensure that the data is 419 * dropped on the 1->0 transition of lockcnt by obtaining an exclusive 420 * lock and then simply unlocking the chain. 421 */ 422 void 423 hammer2_chain_drop_unhold(hammer2_chain_t *chain) 424 { 425 u_int lockcnt; 426 int iter = 0; 427 428 for (;;) { 429 lockcnt = chain->lockcnt; 430 cpu_ccfence(); 431 if (lockcnt > 1) { 432 if (atomic_cmpset_int(&chain->lockcnt, 433 lockcnt, lockcnt - 1)) { 434 break; 435 } 436 } else if (hammer2_mtx_ex_try(&chain->lock) == 0) { 437 hammer2_chain_unlock(chain); 438 break; 439 } else { 440 /* 441 * This situation can easily occur on SMP due to 442 * the gap inbetween the 1->0 transition and the 443 * final unlock. We cannot safely block on the 444 * mutex because lockcnt might go above 1. 445 * 446 * XXX Sleep for one tick if it takes too long. 447 */ 448 if (++iter > 1000) { 449 if (iter > 1000 + hz) { 450 kprintf("hammer2: h2race1 %p\n", chain); 451 iter = 1000; 452 } 453 tsleep(&iter, 0, "h2race1", 1); 454 } 455 cpu_pause(); 456 } 457 } 458 hammer2_chain_drop(chain); 459 } 460 461 /* 462 * Handles the (potential) last drop of chain->refs from 1->0. Called with 463 * the mutex exclusively locked, refs == 1, and lockcnt 0. SMP races are 464 * possible against refs and lockcnt. We must dispose of the mutex on chain. 465 * 466 * This function returns an unlocked chain for recursive drop or NULL. It 467 * can return the same chain if it determines it has raced another ref. 468 * 469 * -- 470 * 471 * When two chains need to be recursively dropped we use the chain we 472 * would otherwise free to placehold the additional chain. It's a bit 473 * convoluted but we can't just recurse without potentially blowing out 474 * the kernel stack. 475 * 476 * The chain cannot be freed if it has any children. 477 * The chain cannot be freed if flagged MODIFIED unless we can dispose of it. 478 * The chain cannot be freed if flagged UPDATE unless we can dispose of it. 479 * Any dedup registration can remain intact. 480 * 481 * The core spinlock is allowed to nest child-to-parent (not parent-to-child). 482 */ 483 static 484 hammer2_chain_t * 485 hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth) 486 { 487 hammer2_pfs_t *pmp; 488 hammer2_dev_t *hmp; 489 hammer2_chain_t *parent; 490 hammer2_chain_t *rdrop; 491 #if 0 492 hammer2_io_t *dio; 493 #endif 494 495 #if 0 496 /* 497 * On last drop if there is no parent and data_off is good (at 498 * least does not represent the volume root), the modified chain 499 * is probably going to be destroyed. We have to make sure that 500 * the data area is not registered for dedup. 501 * 502 * XXX removed. In fact, we do not have to make sure that the 503 * data area is not registered for dedup. The data area 504 * can, in fact, still be used for dedup because it is 505 * still allocated in the freemap and the underlying I/O 506 * will still be flushed. 507 */ 508 if (chain->parent == NULL && 509 (chain->flags & HAMMER2_CHAIN_MODIFIED) && 510 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 511 hmp = chain->hmp; 512 hammer2_io_dedup_delete(hmp, chain->bref.type, 513 chain->bref.data_off, chain->bytes); 514 } 515 #endif 516 /* 517 * We need chain's spinlock to interlock the sub-tree test. 518 * We already have chain's mutex, protecting chain->parent. 519 * 520 * Remember that chain->refs can be in flux. 521 */ 522 hammer2_spin_ex(&chain->core.spin); 523 524 if (chain->parent != NULL) { 525 /* 526 * If the chain has a parent the UPDATE bit prevents scrapping 527 * as the chain is needed to properly flush the parent. Try 528 * to complete the 1->0 transition and return NULL. Retry 529 * (return chain) if we are unable to complete the 1->0 530 * transition, else return NULL (nothing more to do). 531 * 532 * If the chain has a parent the MODIFIED bit prevents 533 * scrapping. 534 * 535 * Chains with UPDATE/MODIFIED are *not* put on the LRU list! 536 */ 537 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 538 HAMMER2_CHAIN_MODIFIED)) { 539 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 540 hammer2_spin_unex(&chain->core.spin); 541 #if 0 542 dio = hammer2_chain_drop_data(chain, 0); 543 if (dio) 544 hammer2_io_bqrelse(&dio); 545 #endif 546 hammer2_chain_assert_no_data(chain); 547 hammer2_mtx_unlock(&chain->lock); 548 chain = NULL; 549 } else { 550 hammer2_spin_unex(&chain->core.spin); 551 hammer2_mtx_unlock(&chain->lock); 552 } 553 return (chain); 554 } 555 /* spinlock still held */ 556 } else if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME || 557 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP) { 558 /* 559 * Retain the static vchain and fchain. Clear bits that 560 * are not relevant. Do not clear the MODIFIED bit, 561 * and certainly do not put it on the delayed-flush queue. 562 */ 563 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 564 } else { 565 /* 566 * The chain has no parent and can be flagged for destruction. 567 * Since it has no parent, UPDATE can also be cleared. 568 */ 569 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 570 if (chain->flags & HAMMER2_CHAIN_UPDATE) 571 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 572 573 /* 574 * If the chain has children we must still flush the chain. 575 * Any dedup is already handled by the underlying DIO, so 576 * we do not have to specifically flush it here. 577 * 578 * In the case where it has children, the DESTROY flag test 579 * in the flush code will prevent unnecessary flushes of 580 * MODIFIED chains that are not flagged DEDUP so don't worry 581 * about that here. 582 */ 583 if (chain->core.chain_count) { 584 /* 585 * Put on flushq (should ensure refs > 1), retry 586 * the drop. 587 */ 588 hammer2_spin_unex(&chain->core.spin); 589 hammer2_delayed_flush(chain); 590 hammer2_mtx_unlock(&chain->lock); 591 592 return(chain); /* retry drop */ 593 } 594 595 /* 596 * Otherwise we can scrap the MODIFIED bit if it is set, 597 * and continue along the freeing path. 598 * 599 * Be sure to clean-out any dedup bits. Without a parent 600 * this chain will no longer be visible to the flush code. 601 * Easy check data_off to avoid the volume root. 602 */ 603 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 604 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 605 atomic_add_long(&hammer2_count_modified_chains, -1); 606 if (chain->pmp) 607 hammer2_pfs_memory_wakeup(chain->pmp); 608 } 609 /* spinlock still held */ 610 } 611 612 /* spinlock still held */ 613 #if 0 614 dio = NULL; 615 #endif 616 617 /* 618 * If any children exist we must leave the chain intact with refs == 0. 619 * They exist because chains are retained below us which have refs or 620 * may require flushing. 621 * 622 * Retry (return chain) if we fail to transition the refs to 0, else 623 * return NULL indication nothing more to do. 624 * 625 * Chains with children are NOT put on the LRU list. 626 */ 627 if (chain->core.chain_count) { 628 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 629 hammer2_spin_unex(&chain->core.spin); 630 hammer2_chain_assert_no_data(chain); 631 hammer2_mtx_unlock(&chain->lock); 632 chain = NULL; 633 } else { 634 hammer2_spin_unex(&chain->core.spin); 635 hammer2_mtx_unlock(&chain->lock); 636 } 637 return (chain); 638 } 639 /* spinlock still held */ 640 /* no chains left under us */ 641 642 /* 643 * chain->core has no children left so no accessors can get to our 644 * chain from there. Now we have to lock the parent core to interlock 645 * remaining possible accessors that might bump chain's refs before 646 * we can safely drop chain's refs with intent to free the chain. 647 */ 648 hmp = chain->hmp; 649 pmp = chain->pmp; /* can be NULL */ 650 rdrop = NULL; 651 652 parent = chain->parent; 653 654 /* 655 * WARNING! chain's spin lock is still held here, and other spinlocks 656 * will be acquired and released in the code below. We 657 * cannot be making fancy procedure calls! 658 */ 659 660 /* 661 * We can cache the chain if it is associated with a pmp 662 * and not flagged as being destroyed or requesting a full 663 * release. In this situation the chain is not removed 664 * from its parent, i.e. it can still be looked up. 665 * 666 * We intentionally do not cache DATA chains because these 667 * were likely used to load data into the logical buffer cache 668 * and will not be accessed again for some time. 669 */ 670 if ((chain->flags & 671 (HAMMER2_CHAIN_DESTROY | HAMMER2_CHAIN_RELEASE)) == 0 && 672 chain->pmp && 673 chain->bref.type != HAMMER2_BREF_TYPE_DATA) { 674 if (parent) 675 hammer2_spin_ex(&parent->core.spin); 676 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 677 /* 678 * 1->0 transition failed, retry. Do not drop 679 * the chain's data yet! 680 */ 681 if (parent) 682 hammer2_spin_unex(&parent->core.spin); 683 hammer2_spin_unex(&chain->core.spin); 684 hammer2_mtx_unlock(&chain->lock); 685 686 return(chain); 687 } 688 689 /* 690 * Success 691 */ 692 #if 0 693 dio = hammer2_chain_drop_data(chain, 1); 694 #endif 695 hammer2_chain_assert_no_data(chain); 696 697 /* 698 * Make sure we are on the LRU list, clean up excessive 699 * LRU entries. We can only really drop one but there might 700 * be other entries that we can remove from the lru_list 701 * without dropping. 702 * 703 * NOTE: HAMMER2_CHAIN_ONLRU may only be safely set when 704 * chain->core.spin AND pmp->lru_spin are held, but 705 * can be safely cleared only holding pmp->lru_spin. 706 */ 707 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 708 hammer2_spin_ex(&pmp->lru_spin); 709 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 710 atomic_set_int(&chain->flags, 711 HAMMER2_CHAIN_ONLRU); 712 TAILQ_INSERT_TAIL(&pmp->lru_list, 713 chain, lru_node); 714 atomic_add_int(&pmp->lru_count, 1); 715 } 716 if (pmp->lru_count < HAMMER2_LRU_LIMIT) 717 depth = 1; /* disable lru_list flush */ 718 hammer2_spin_unex(&pmp->lru_spin); 719 } else { 720 /* disable lru flush */ 721 depth = 1; 722 } 723 724 if (parent) { 725 hammer2_spin_unex(&parent->core.spin); 726 parent = NULL; /* safety */ 727 } 728 hammer2_spin_unex(&chain->core.spin); 729 hammer2_mtx_unlock(&chain->lock); 730 #if 0 731 if (dio) 732 hammer2_io_bqrelse(&dio); 733 #endif 734 735 /* 736 * lru_list hysteresis (see above for depth overrides). 737 * Note that depth also prevents excessive lastdrop recursion. 738 */ 739 if (depth == 0) 740 hammer2_chain_lru_flush(pmp); 741 742 return NULL; 743 /* NOT REACHED */ 744 } 745 746 /* 747 * Make sure we are not on the LRU list. 748 */ 749 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 750 hammer2_spin_ex(&pmp->lru_spin); 751 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 752 atomic_add_int(&pmp->lru_count, -1); 753 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 754 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 755 } 756 hammer2_spin_unex(&pmp->lru_spin); 757 } 758 759 /* 760 * Spinlock the parent and try to drop the last ref on chain. 761 * On success determine if we should dispose of the chain 762 * (remove the chain from its parent, etc). 763 * 764 * (normal core locks are top-down recursive but we define 765 * core spinlocks as bottom-up recursive, so this is safe). 766 */ 767 if (parent) { 768 hammer2_spin_ex(&parent->core.spin); 769 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 770 #if 0 771 /* XXX remove, don't try to drop data on fail */ 772 hammer2_spin_unex(&parent->core.spin); 773 dio = hammer2_chain_drop_data(chain, 0); 774 hammer2_spin_unex(&chain->core.spin); 775 if (dio) 776 hammer2_io_bqrelse(&dio); 777 #endif 778 /* 779 * 1->0 transition failed, retry. 780 */ 781 hammer2_spin_unex(&parent->core.spin); 782 hammer2_spin_unex(&chain->core.spin); 783 hammer2_mtx_unlock(&chain->lock); 784 785 return(chain); 786 } 787 788 /* 789 * 1->0 transition successful, parent spin held to prevent 790 * new lookups, chain spinlock held to protect parent field. 791 * Remove chain from the parent. 792 * 793 * If the chain is being removed from the parent's btree but 794 * is not bmapped, we have to adjust live_count downward. If 795 * it is bmapped then the blockref is retained in the parent 796 * as is its associated live_count. This case can occur when 797 * a chain added to the topology is unable to flush and is 798 * then later deleted. 799 */ 800 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 801 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) && 802 (chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) { 803 atomic_add_int(&parent->core.live_count, -1); 804 } 805 RB_REMOVE(hammer2_chain_tree, 806 &parent->core.rbtree, chain); 807 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 808 --parent->core.chain_count; 809 chain->parent = NULL; 810 } 811 812 /* 813 * If our chain was the last chain in the parent's core the 814 * core is now empty and its parent might have to be 815 * re-dropped if it has 0 refs. 816 */ 817 if (parent->core.chain_count == 0) { 818 rdrop = parent; 819 atomic_add_int(&rdrop->refs, 1); 820 /* 821 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 822 rdrop = NULL; 823 */ 824 } 825 hammer2_spin_unex(&parent->core.spin); 826 parent = NULL; /* safety */ 827 /* FALL THROUGH */ 828 } else { 829 /* 830 * No-parent case. 831 */ 832 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 833 /* 834 * 1->0 transition failed, retry. 835 */ 836 hammer2_spin_unex(&parent->core.spin); 837 hammer2_spin_unex(&chain->core.spin); 838 hammer2_mtx_unlock(&chain->lock); 839 840 return(chain); 841 } 842 } 843 844 /* 845 * Successful 1->0 transition, no parent, no children... no way for 846 * anyone to ref this chain any more. We can clean-up and free it. 847 * 848 * We still have the core spinlock, and core's chain_count is 0. 849 * Any parent spinlock is gone. 850 */ 851 hammer2_spin_unex(&chain->core.spin); 852 hammer2_chain_assert_no_data(chain); 853 hammer2_mtx_unlock(&chain->lock); 854 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 855 chain->core.chain_count == 0); 856 857 /* 858 * All locks are gone, no pointers remain to the chain, finish 859 * freeing it. 860 */ 861 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 862 HAMMER2_CHAIN_MODIFIED)) == 0); 863 #if 0 864 dio = hammer2_chain_drop_data(chain, 1); 865 if (dio) 866 hammer2_io_bqrelse(&dio); 867 #endif 868 869 /* 870 * Once chain resources are gone we can use the now dead chain 871 * structure to placehold what might otherwise require a recursive 872 * drop, because we have potentially two things to drop and can only 873 * return one directly. 874 */ 875 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 876 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED); 877 chain->hmp = NULL; 878 kfree(chain, hmp->mchain); 879 } 880 881 /* 882 * Possible chaining loop when parent re-drop needed. 883 */ 884 return(rdrop); 885 } 886 887 /* 888 * Heuristical flush of the LRU, try to reduce the number of entries 889 * on the LRU to (HAMMER2_LRU_LIMIT * 2 / 3). This procedure is called 890 * only when lru_count exceeds HAMMER2_LRU_LIMIT. 891 */ 892 static 893 void 894 hammer2_chain_lru_flush(hammer2_pfs_t *pmp) 895 { 896 hammer2_chain_t *chain; 897 898 again: 899 chain = NULL; 900 hammer2_spin_ex(&pmp->lru_spin); 901 while (pmp->lru_count > HAMMER2_LRU_LIMIT * 2 / 3) { 902 /* 903 * Pick a chain off the lru_list, just recycle it quickly 904 * if LRUHINT is set (the chain was ref'd but left on 905 * the lru_list, so cycle to the end). 906 */ 907 chain = TAILQ_FIRST(&pmp->lru_list); 908 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 909 910 if (chain->flags & HAMMER2_CHAIN_LRUHINT) { 911 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 912 TAILQ_INSERT_TAIL(&pmp->lru_list, chain, lru_node); 913 chain = NULL; 914 continue; 915 } 916 917 /* 918 * Ok, we are off the LRU. We must adjust refs before we 919 * can safely clear the ONLRU flag. 920 */ 921 atomic_add_int(&pmp->lru_count, -1); 922 if (atomic_cmpset_int(&chain->refs, 0, 1)) { 923 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 924 atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE); 925 break; 926 } 927 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 928 chain = NULL; 929 } 930 hammer2_spin_unex(&pmp->lru_spin); 931 if (chain == NULL) 932 return; 933 934 /* 935 * If we picked a chain off the lru list we may be able to lastdrop 936 * it. Use a depth of 1 to prevent excessive lastdrop recursion. 937 */ 938 while (chain) { 939 u_int refs; 940 941 refs = chain->refs; 942 cpu_ccfence(); 943 KKASSERT(refs > 0); 944 945 if (refs == 1) { 946 if (hammer2_mtx_ex_try(&chain->lock) == 0) 947 chain = hammer2_chain_lastdrop(chain, 1); 948 /* retry the same chain, or chain from lastdrop */ 949 } else { 950 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 951 break; 952 /* retry the same chain */ 953 } 954 cpu_pause(); 955 } 956 goto again; 957 } 958 959 /* 960 * On last lock release. 961 */ 962 static hammer2_io_t * 963 hammer2_chain_drop_data(hammer2_chain_t *chain) 964 { 965 hammer2_io_t *dio; 966 967 if ((dio = chain->dio) != NULL) { 968 chain->dio = NULL; 969 chain->data = NULL; 970 } else { 971 switch(chain->bref.type) { 972 case HAMMER2_BREF_TYPE_VOLUME: 973 case HAMMER2_BREF_TYPE_FREEMAP: 974 break; 975 default: 976 if (chain->data != NULL) { 977 hammer2_spin_unex(&chain->core.spin); 978 panic("chain data not null: " 979 "chain %p bref %016jx.%02x " 980 "refs %d parent %p dio %p data %p", 981 chain, chain->bref.data_off, 982 chain->bref.type, chain->refs, 983 chain->parent, 984 chain->dio, chain->data); 985 } 986 KKASSERT(chain->data == NULL); 987 break; 988 } 989 } 990 return dio; 991 } 992 993 /* 994 * Lock a referenced chain element, acquiring its data with I/O if necessary, 995 * and specify how you would like the data to be resolved. 996 * 997 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 998 * 999 * The lock is allowed to recurse, multiple locking ops will aggregate 1000 * the requested resolve types. Once data is assigned it will not be 1001 * removed until the last unlock. 1002 * 1003 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 1004 * (typically used to avoid device/logical buffer 1005 * aliasing for data) 1006 * 1007 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 1008 * the INITIAL-create state (indirect blocks only). 1009 * 1010 * Do not resolve data elements for DATA chains. 1011 * (typically used to avoid device/logical buffer 1012 * aliasing for data) 1013 * 1014 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 1015 * 1016 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 1017 * it will be locked exclusive. 1018 * 1019 * HAMMER2_RESOLVE_NONBLOCK- (flag) The chain is locked non-blocking. If 1020 * the lock fails, EAGAIN is returned. 1021 * 1022 * NOTE: Embedded elements (volume header, inodes) are always resolved 1023 * regardless. 1024 * 1025 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 1026 * element will instantiate and zero its buffer, and flush it on 1027 * release. 1028 * 1029 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 1030 * so as not to instantiate a device buffer, which could alias against 1031 * a logical file buffer. However, if ALWAYS is specified the 1032 * device buffer will be instantiated anyway. 1033 * 1034 * NOTE: The return value is always 0 unless NONBLOCK is specified, in which 1035 * case it can be either 0 or EAGAIN. 1036 * 1037 * WARNING! This function blocks on I/O if data needs to be fetched. This 1038 * blocking can run concurrent with other compatible lock holders 1039 * who do not need data returning. The lock is not upgraded to 1040 * exclusive during a data fetch, a separate bit is used to 1041 * interlock I/O. However, an exclusive lock holder can still count 1042 * on being interlocked against an I/O fetch managed by a shared 1043 * lock holder. 1044 */ 1045 int 1046 hammer2_chain_lock(hammer2_chain_t *chain, int how) 1047 { 1048 KKASSERT(chain->refs > 0); 1049 1050 if (how & HAMMER2_RESOLVE_NONBLOCK) { 1051 /* 1052 * For non-blocking operation attempt to get the lock 1053 * before bumping lockcnt, just so we don't have to deal 1054 * with dropping lockcnt (and dealing with the underlying 1055 * data) if we fail. 1056 * 1057 * NOTE: LOCKAGAIN must always succeed without blocking. 1058 */ 1059 if (how & HAMMER2_RESOLVE_SHARED) { 1060 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1061 hammer2_mtx_sh_again(&chain->lock); 1062 } else { 1063 if (hammer2_mtx_sh_try(&chain->lock) != 0) 1064 return EAGAIN; 1065 } 1066 } else { 1067 if (hammer2_mtx_ex_try(&chain->lock) != 0) 1068 return EAGAIN; 1069 } 1070 atomic_add_int(&chain->lockcnt, 1); 1071 ++curthread->td_tracker; 1072 } else { 1073 /* 1074 * Lock the element. Recursive locks are allowed. lockcnt 1075 * ensures that data is left intact. 1076 */ 1077 atomic_add_int(&chain->lockcnt, 1); 1078 1079 /* 1080 * Get the appropriate lock. If LOCKAGAIN is flagged with 1081 * SHARED the caller expects a shared lock to already be 1082 * present and we are giving it another ref. This case must 1083 * importantly not block if there is a pending exclusive lock 1084 * request. 1085 */ 1086 if (how & HAMMER2_RESOLVE_SHARED) { 1087 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1088 hammer2_mtx_sh_again(&chain->lock); 1089 } else { 1090 hammer2_mtx_sh(&chain->lock); 1091 } 1092 } else { 1093 hammer2_mtx_ex(&chain->lock); 1094 } 1095 ++curthread->td_tracker; 1096 } 1097 1098 /* 1099 * If we already have a valid data pointer make sure the data is 1100 * synchronized to the current cpu, and then no further action is 1101 * necessary. 1102 */ 1103 if (chain->data) { 1104 if (chain->dio) 1105 hammer2_io_bkvasync(chain->dio); 1106 return 0; 1107 } 1108 1109 /* 1110 * Do we have to resolve the data? This is generally only 1111 * applicable to HAMMER2_BREF_TYPE_DATA which is special-cased. 1112 * Other BREF types expects the data to be there. 1113 */ 1114 switch(how & HAMMER2_RESOLVE_MASK) { 1115 case HAMMER2_RESOLVE_NEVER: 1116 return 0; 1117 case HAMMER2_RESOLVE_MAYBE: 1118 if (chain->flags & HAMMER2_CHAIN_INITIAL) 1119 return 0; 1120 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 1121 return 0; 1122 #if 0 1123 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 1124 return 0; 1125 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 1126 return 0; 1127 #endif 1128 /* fall through */ 1129 case HAMMER2_RESOLVE_ALWAYS: 1130 default: 1131 break; 1132 } 1133 1134 /* 1135 * Caller requires data 1136 */ 1137 hammer2_chain_load_data(chain); 1138 1139 return 0; 1140 } 1141 1142 /* 1143 * Lock the chain, retain the hold, and drop the data persistence count. 1144 * The data should remain valid because we never transitioned lockcnt 1145 * through 0. 1146 */ 1147 void 1148 hammer2_chain_lock_unhold(hammer2_chain_t *chain, int how) 1149 { 1150 hammer2_chain_lock(chain, how); 1151 atomic_add_int(&chain->lockcnt, -1); 1152 } 1153 1154 #if 0 1155 /* 1156 * Downgrade an exclusive chain lock to a shared chain lock. 1157 * 1158 * NOTE: There is no upgrade equivalent due to the ease of 1159 * deadlocks in that direction. 1160 */ 1161 void 1162 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 1163 { 1164 hammer2_mtx_downgrade(&chain->lock); 1165 } 1166 #endif 1167 1168 /* 1169 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 1170 * may be of any type. 1171 * 1172 * Once chain->data is set it cannot be disposed of until all locks are 1173 * released. 1174 * 1175 * Make sure the data is synchronized to the current cpu. 1176 */ 1177 void 1178 hammer2_chain_load_data(hammer2_chain_t *chain) 1179 { 1180 hammer2_blockref_t *bref; 1181 hammer2_dev_t *hmp; 1182 hammer2_io_t *dio; 1183 char *bdata; 1184 int error; 1185 1186 /* 1187 * Degenerate case, data already present, or chain has no media 1188 * reference to load. 1189 */ 1190 if (chain->data) { 1191 if (chain->dio) 1192 hammer2_io_bkvasync(chain->dio); 1193 return; 1194 } 1195 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 1196 return; 1197 1198 hmp = chain->hmp; 1199 KKASSERT(hmp != NULL); 1200 1201 /* 1202 * Gain the IOINPROG bit, interlocked block. 1203 */ 1204 for (;;) { 1205 u_int oflags; 1206 u_int nflags; 1207 1208 oflags = chain->flags; 1209 cpu_ccfence(); 1210 if (oflags & HAMMER2_CHAIN_IOINPROG) { 1211 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 1212 tsleep_interlock(&chain->flags, 0); 1213 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1214 tsleep(&chain->flags, PINTERLOCKED, 1215 "h2iocw", 0); 1216 } 1217 /* retry */ 1218 } else { 1219 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 1220 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1221 break; 1222 } 1223 /* retry */ 1224 } 1225 } 1226 1227 /* 1228 * We own CHAIN_IOINPROG 1229 * 1230 * Degenerate case if we raced another load. 1231 */ 1232 if (chain->data) { 1233 if (chain->dio) 1234 hammer2_io_bkvasync(chain->dio); 1235 goto done; 1236 } 1237 1238 /* 1239 * We must resolve to a device buffer, either by issuing I/O or 1240 * by creating a zero-fill element. We do not mark the buffer 1241 * dirty when creating a zero-fill element (the hammer2_chain_modify() 1242 * API must still be used to do that). 1243 * 1244 * The device buffer is variable-sized in powers of 2 down 1245 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 1246 * chunk always contains buffers of the same size. (XXX) 1247 * 1248 * The minimum physical IO size may be larger than the variable 1249 * block size. 1250 */ 1251 bref = &chain->bref; 1252 1253 /* 1254 * The getblk() optimization can only be used on newly created 1255 * elements if the physical block size matches the request. 1256 */ 1257 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1258 error = hammer2_io_new(hmp, bref->type, 1259 bref->data_off, chain->bytes, 1260 &chain->dio); 1261 } else { 1262 error = hammer2_io_bread(hmp, bref->type, 1263 bref->data_off, chain->bytes, 1264 &chain->dio); 1265 hammer2_adjreadcounter(&chain->bref, chain->bytes); 1266 } 1267 if (error) { 1268 chain->error = HAMMER2_ERROR_EIO; 1269 kprintf("hammer2_chain_lock: I/O error %016jx: %d\n", 1270 (intmax_t)bref->data_off, error); 1271 hammer2_io_bqrelse(&chain->dio); 1272 goto done; 1273 } 1274 chain->error = 0; 1275 1276 /* 1277 * This isn't perfect and can be ignored on OSs which do not have 1278 * an indication as to whether a buffer is coming from cache or 1279 * if I/O was actually issued for the read. TESTEDGOOD will work 1280 * pretty well without the B_IOISSUED logic because chains are 1281 * cached, but in that situation (without B_IOISSUED) it will not 1282 * detect whether a re-read via I/O is corrupted verses the original 1283 * read. 1284 * 1285 * We can't re-run the CRC on every fresh lock. That would be 1286 * insanely expensive. 1287 * 1288 * If the underlying kernel buffer covers the entire chain we can 1289 * use the B_IOISSUED indication to determine if we have to re-run 1290 * the CRC on chain data for chains that managed to stay cached 1291 * across the kernel disposal of the original buffer. 1292 */ 1293 if ((dio = chain->dio) != NULL && dio->bp) { 1294 struct buf *bp = dio->bp; 1295 1296 if (dio->psize == chain->bytes && 1297 (bp->b_flags & B_IOISSUED)) { 1298 atomic_clear_int(&chain->flags, 1299 HAMMER2_CHAIN_TESTEDGOOD); 1300 bp->b_flags &= ~B_IOISSUED; 1301 } 1302 } 1303 1304 /* 1305 * NOTE: A locked chain's data cannot be modified without first 1306 * calling hammer2_chain_modify(). 1307 */ 1308 1309 /* 1310 * Clear INITIAL. In this case we used io_new() and the buffer has 1311 * been zero'd and marked dirty. 1312 * 1313 * NOTE: hammer2_io_data() call issues bkvasync() 1314 */ 1315 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 1316 1317 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1318 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1319 chain->bref.flags |= HAMMER2_BREF_FLAG_ZERO; 1320 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 1321 /* 1322 * check data not currently synchronized due to 1323 * modification. XXX assumes data stays in the buffer 1324 * cache, which might not be true (need biodep on flush 1325 * to calculate crc? or simple crc?). 1326 */ 1327 } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) { 1328 if (hammer2_chain_testcheck(chain, bdata) == 0) { 1329 chain->error = HAMMER2_ERROR_CHECK; 1330 } else { 1331 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 1332 } 1333 } 1334 1335 /* 1336 * Setup the data pointer, either pointing it to an embedded data 1337 * structure and copying the data from the buffer, or pointing it 1338 * into the buffer. 1339 * 1340 * The buffer is not retained when copying to an embedded data 1341 * structure in order to avoid potential deadlocks or recursions 1342 * on the same physical buffer. 1343 * 1344 * WARNING! Other threads can start using the data the instant we 1345 * set chain->data non-NULL. 1346 */ 1347 switch (bref->type) { 1348 case HAMMER2_BREF_TYPE_VOLUME: 1349 case HAMMER2_BREF_TYPE_FREEMAP: 1350 /* 1351 * Copy data from bp to embedded buffer 1352 */ 1353 panic("hammer2_chain_load_data: unresolved volume header"); 1354 break; 1355 case HAMMER2_BREF_TYPE_DIRENT: 1356 KKASSERT(chain->bytes != 0); 1357 /* fall through */ 1358 case HAMMER2_BREF_TYPE_INODE: 1359 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1360 case HAMMER2_BREF_TYPE_INDIRECT: 1361 case HAMMER2_BREF_TYPE_DATA: 1362 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1363 default: 1364 /* 1365 * Point data at the device buffer and leave dio intact. 1366 */ 1367 chain->data = (void *)bdata; 1368 break; 1369 } 1370 1371 /* 1372 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 1373 */ 1374 done: 1375 for (;;) { 1376 u_int oflags; 1377 u_int nflags; 1378 1379 oflags = chain->flags; 1380 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 1381 HAMMER2_CHAIN_IOSIGNAL); 1382 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 1383 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1384 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 1385 wakeup(&chain->flags); 1386 break; 1387 } 1388 } 1389 } 1390 1391 /* 1392 * Unlock and deref a chain element. 1393 * 1394 * Remember that the presence of children under chain prevent the chain's 1395 * destruction but do not add additional references, so the dio will still 1396 * be dropped. 1397 */ 1398 void 1399 hammer2_chain_unlock(hammer2_chain_t *chain) 1400 { 1401 hammer2_io_t *dio; 1402 u_int lockcnt; 1403 int iter = 0; 1404 1405 --curthread->td_tracker; 1406 1407 /* 1408 * If multiple locks are present (or being attempted) on this 1409 * particular chain we can just unlock, drop refs, and return. 1410 * 1411 * Otherwise fall-through on the 1->0 transition. 1412 */ 1413 for (;;) { 1414 lockcnt = chain->lockcnt; 1415 KKASSERT(lockcnt > 0); 1416 cpu_ccfence(); 1417 if (lockcnt > 1) { 1418 if (atomic_cmpset_int(&chain->lockcnt, 1419 lockcnt, lockcnt - 1)) { 1420 hammer2_mtx_unlock(&chain->lock); 1421 return; 1422 } 1423 } else if (hammer2_mtx_upgrade_try(&chain->lock) == 0) { 1424 /* while holding the mutex exclusively */ 1425 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1426 break; 1427 } else { 1428 /* 1429 * This situation can easily occur on SMP due to 1430 * the gap inbetween the 1->0 transition and the 1431 * final unlock. We cannot safely block on the 1432 * mutex because lockcnt might go above 1. 1433 * 1434 * XXX Sleep for one tick if it takes too long. 1435 */ 1436 if (++iter > 1000) { 1437 if (iter > 1000 + hz) { 1438 kprintf("hammer2: h2race2 %p\n", chain); 1439 iter = 1000; 1440 } 1441 tsleep(&iter, 0, "h2race2", 1); 1442 } 1443 cpu_pause(); 1444 } 1445 /* retry */ 1446 } 1447 1448 /* 1449 * Last unlock / mutex upgraded to exclusive. Drop the data 1450 * reference. 1451 */ 1452 dio = hammer2_chain_drop_data(chain); 1453 if (dio) 1454 hammer2_io_bqrelse(&dio); 1455 hammer2_mtx_unlock(&chain->lock); 1456 } 1457 1458 /* 1459 * Unlock and hold chain data intact 1460 */ 1461 void 1462 hammer2_chain_unlock_hold(hammer2_chain_t *chain) 1463 { 1464 atomic_add_int(&chain->lockcnt, 1); 1465 hammer2_chain_unlock(chain); 1466 } 1467 1468 /* 1469 * Helper to obtain the blockref[] array base and count for a chain. 1470 * 1471 * XXX Not widely used yet, various use cases need to be validated and 1472 * converted to use this function. 1473 */ 1474 static 1475 hammer2_blockref_t * 1476 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1477 { 1478 hammer2_blockref_t *base; 1479 int count; 1480 1481 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1482 base = NULL; 1483 1484 switch(parent->bref.type) { 1485 case HAMMER2_BREF_TYPE_INODE: 1486 count = HAMMER2_SET_COUNT; 1487 break; 1488 case HAMMER2_BREF_TYPE_INDIRECT: 1489 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1490 count = parent->bytes / sizeof(hammer2_blockref_t); 1491 break; 1492 case HAMMER2_BREF_TYPE_VOLUME: 1493 count = HAMMER2_SET_COUNT; 1494 break; 1495 case HAMMER2_BREF_TYPE_FREEMAP: 1496 count = HAMMER2_SET_COUNT; 1497 break; 1498 default: 1499 panic("hammer2_chain_create_indirect: " 1500 "unrecognized blockref type: %d", 1501 parent->bref.type); 1502 count = 0; 1503 break; 1504 } 1505 } else { 1506 switch(parent->bref.type) { 1507 case HAMMER2_BREF_TYPE_INODE: 1508 base = &parent->data->ipdata.u.blockset.blockref[0]; 1509 count = HAMMER2_SET_COUNT; 1510 break; 1511 case HAMMER2_BREF_TYPE_INDIRECT: 1512 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1513 base = &parent->data->npdata[0]; 1514 count = parent->bytes / sizeof(hammer2_blockref_t); 1515 break; 1516 case HAMMER2_BREF_TYPE_VOLUME: 1517 base = &parent->data->voldata. 1518 sroot_blockset.blockref[0]; 1519 count = HAMMER2_SET_COUNT; 1520 break; 1521 case HAMMER2_BREF_TYPE_FREEMAP: 1522 base = &parent->data->blkset.blockref[0]; 1523 count = HAMMER2_SET_COUNT; 1524 break; 1525 default: 1526 panic("hammer2_chain_create_indirect: " 1527 "unrecognized blockref type: %d", 1528 parent->bref.type); 1529 count = 0; 1530 break; 1531 } 1532 } 1533 *countp = count; 1534 1535 return base; 1536 } 1537 1538 /* 1539 * This counts the number of live blockrefs in a block array and 1540 * also calculates the point at which all remaining blockrefs are empty. 1541 * This routine can only be called on a live chain. 1542 * 1543 * Caller holds the chain locked, but possibly with a shared lock. We 1544 * must use an exclusive spinlock to prevent corruption. 1545 * 1546 * NOTE: Flag is not set until after the count is complete, allowing 1547 * callers to test the flag without holding the spinlock. 1548 * 1549 * NOTE: If base is NULL the related chain is still in the INITIAL 1550 * state and there are no blockrefs to count. 1551 * 1552 * NOTE: live_count may already have some counts accumulated due to 1553 * creation and deletion and could even be initially negative. 1554 */ 1555 void 1556 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1557 hammer2_blockref_t *base, int count) 1558 { 1559 hammer2_spin_ex(&chain->core.spin); 1560 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1561 if (base) { 1562 while (--count >= 0) { 1563 if (base[count].type) 1564 break; 1565 } 1566 chain->core.live_zero = count + 1; 1567 while (count >= 0) { 1568 if (base[count].type) 1569 atomic_add_int(&chain->core.live_count, 1570 1); 1571 --count; 1572 } 1573 } else { 1574 chain->core.live_zero = 0; 1575 } 1576 /* else do not modify live_count */ 1577 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1578 } 1579 hammer2_spin_unex(&chain->core.spin); 1580 } 1581 1582 /* 1583 * Resize the chain's physical storage allocation in-place. This function does 1584 * not usually adjust the data pointer and must be followed by (typically) a 1585 * hammer2_chain_modify() call to copy any old data over and adjust the 1586 * data pointer. 1587 * 1588 * Chains can be resized smaller without reallocating the storage. Resizing 1589 * larger will reallocate the storage. Excess or prior storage is reclaimed 1590 * asynchronously at a later time. 1591 * 1592 * An nradix value of 0 is special-cased to mean that the storage should 1593 * be disassociated, that is the chain is being resized to 0 bytes (not 1 1594 * byte). 1595 * 1596 * Must be passed an exclusively locked parent and chain. 1597 * 1598 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1599 * to avoid instantiating a device buffer that conflicts with the vnode data 1600 * buffer. However, because H2 can compress or encrypt data, the chain may 1601 * have a dio assigned to it in those situations, and they do not conflict. 1602 * 1603 * XXX return error if cannot resize. 1604 */ 1605 int 1606 hammer2_chain_resize(hammer2_chain_t *chain, 1607 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1608 int nradix, int flags) 1609 { 1610 hammer2_dev_t *hmp; 1611 size_t obytes; 1612 size_t nbytes; 1613 int error; 1614 1615 hmp = chain->hmp; 1616 1617 /* 1618 * Only data and indirect blocks can be resized for now. 1619 * (The volu root, inodes, and freemap elements use a fixed size). 1620 */ 1621 KKASSERT(chain != &hmp->vchain); 1622 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1623 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1624 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1625 1626 /* 1627 * Nothing to do if the element is already the proper size 1628 */ 1629 obytes = chain->bytes; 1630 nbytes = (nradix) ? (1U << nradix) : 0; 1631 if (obytes == nbytes) 1632 return (chain->error); 1633 1634 /* 1635 * Make sure the old data is instantiated so we can copy it. If this 1636 * is a data block, the device data may be superfluous since the data 1637 * might be in a logical block, but compressed or encrypted data is 1638 * another matter. 1639 * 1640 * NOTE: The modify will set BMAPUPD for us if BMAPPED is set. 1641 */ 1642 error = hammer2_chain_modify(chain, mtid, dedup_off, 0); 1643 if (error) 1644 return error; 1645 1646 /* 1647 * Relocate the block, even if making it smaller (because different 1648 * block sizes may be in different regions). 1649 * 1650 * NOTE: Operation does not copy the data and may only be used 1651 * to resize data blocks in-place, or directory entry blocks 1652 * which are about to be modified in some manner. 1653 */ 1654 error = hammer2_freemap_alloc(chain, nbytes); 1655 if (error) 1656 return error; 1657 1658 chain->bytes = nbytes; 1659 1660 /* 1661 * We don't want the followup chain_modify() to try to copy data 1662 * from the old (wrong-sized) buffer. It won't know how much to 1663 * copy. This case should only occur during writes when the 1664 * originator already has the data to write in-hand. 1665 */ 1666 if (chain->dio) { 1667 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1668 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1669 hammer2_io_brelse(&chain->dio); 1670 chain->data = NULL; 1671 } 1672 return (chain->error); 1673 } 1674 1675 /* 1676 * Set the chain modified so its data can be changed by the caller, or 1677 * install deduplicated data. The caller must call this routine for each 1678 * set of modifications it makes, even if the chain is already flagged 1679 * MODIFIED. 1680 * 1681 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1682 * is a CLC (cluster level change) field and is not updated by parent 1683 * propagation during a flush. 1684 * 1685 * Returns an appropriate HAMMER2_ERROR_* code, which will generally reflect 1686 * chain->error except for HAMMER2_ERROR_ENOSPC. If the allocation fails 1687 * due to no space available, HAMMER2_ERROR_ENOSPC is returned and the chain 1688 * remains unmodified with its old data ref intact and chain->error 1689 * unchanged. 1690 * 1691 * Dedup Handling 1692 * 1693 * If the DEDUPABLE flag is set in the chain the storage must be reallocated 1694 * even if the chain is still flagged MODIFIED. In this case the chain's 1695 * DEDUPABLE flag will be cleared once the new storage has been assigned. 1696 * 1697 * If the caller passes a non-zero dedup_off we will use it to assign the 1698 * new storage. The MODIFIED flag will be *CLEARED* in this case, and 1699 * DEDUPABLE will be set (NOTE: the UPDATE flag is always set). The caller 1700 * must not modify the data content upon return. 1701 */ 1702 int 1703 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1704 hammer2_off_t dedup_off, int flags) 1705 { 1706 hammer2_blockref_t obref; 1707 hammer2_dev_t *hmp; 1708 hammer2_io_t *dio; 1709 int error; 1710 int wasinitial; 1711 int setmodified; 1712 int setupdate; 1713 int newmod; 1714 char *bdata; 1715 1716 hmp = chain->hmp; 1717 obref = chain->bref; 1718 KKASSERT((chain->flags & HAMMER2_CHAIN_FICTITIOUS) == 0); 1719 1720 /* 1721 * Data is not optional for freemap chains (we must always be sure 1722 * to copy the data on COW storage allocations). 1723 */ 1724 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1725 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1726 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1727 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1728 } 1729 1730 /* 1731 * Data must be resolved if already assigned, unless explicitly 1732 * flagged otherwise. If we cannot safety load the data the 1733 * modification fails and we return early. 1734 */ 1735 if (chain->data == NULL && chain->bytes != 0 && 1736 (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1737 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1738 hammer2_chain_load_data(chain); 1739 if (chain->error) 1740 return (chain->error); 1741 } 1742 error = 0; 1743 1744 /* 1745 * Set MODIFIED to indicate that the chain has been modified. A new 1746 * allocation is required when modifying a chain. 1747 * 1748 * Set UPDATE to ensure that the blockref is updated in the parent. 1749 * 1750 * If MODIFIED is already set determine if we can reuse the assigned 1751 * data block or if we need a new data block. 1752 */ 1753 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1754 /* 1755 * Must set modified bit. 1756 */ 1757 atomic_add_long(&hammer2_count_modified_chains, 1); 1758 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1759 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1760 setmodified = 1; 1761 1762 /* 1763 * We may be able to avoid a copy-on-write if the chain's 1764 * check mode is set to NONE and the chain's current 1765 * modify_tid is beyond the last explicit snapshot tid. 1766 * 1767 * This implements HAMMER2's overwrite-in-place feature. 1768 * 1769 * NOTE! This data-block cannot be used as a de-duplication 1770 * source when the check mode is set to NONE. 1771 */ 1772 if ((chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1773 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) && 1774 (chain->flags & HAMMER2_CHAIN_INITIAL) == 0 && 1775 (chain->flags & HAMMER2_CHAIN_DEDUPABLE) == 0 && 1776 HAMMER2_DEC_CHECK(chain->bref.methods) == 1777 HAMMER2_CHECK_NONE && 1778 chain->pmp && 1779 chain->bref.modify_tid > 1780 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1781 /* 1782 * Sector overwrite allowed. 1783 */ 1784 newmod = 0; 1785 } else { 1786 /* 1787 * Sector overwrite not allowed, must copy-on-write. 1788 */ 1789 newmod = 1; 1790 } 1791 } else if (chain->flags & HAMMER2_CHAIN_DEDUPABLE) { 1792 /* 1793 * If the modified chain was registered for dedup we need 1794 * a new allocation. This only happens for delayed-flush 1795 * chains (i.e. which run through the front-end buffer 1796 * cache). 1797 */ 1798 newmod = 1; 1799 setmodified = 0; 1800 } else { 1801 /* 1802 * Already flagged modified, no new allocation is needed. 1803 */ 1804 newmod = 0; 1805 setmodified = 0; 1806 } 1807 1808 /* 1809 * Flag parent update required. 1810 */ 1811 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 1812 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1813 setupdate = 1; 1814 } else { 1815 setupdate = 0; 1816 } 1817 1818 /* 1819 * The XOP code returns held but unlocked focus chains. This 1820 * prevents the chain from being destroyed but does not prevent 1821 * it from being modified. diolk is used to interlock modifications 1822 * against XOP frontend accesses to the focus. 1823 * 1824 * This allows us to theoretically avoid deadlocking the frontend 1825 * if one of the backends lock up by not formally locking the 1826 * focused chain in the frontend. In addition, the synchronization 1827 * code relies on this mechanism to avoid deadlocking concurrent 1828 * synchronization threads. 1829 */ 1830 lockmgr(&chain->diolk, LK_EXCLUSIVE); 1831 1832 /* 1833 * The modification or re-modification requires an allocation and 1834 * possible COW. If an error occurs, the previous content and data 1835 * reference is retained and the modification fails. 1836 * 1837 * If dedup_off is non-zero, the caller is requesting a deduplication 1838 * rather than a modification. The MODIFIED bit is not set and the 1839 * data offset is set to the deduplication offset. The data cannot 1840 * be modified. 1841 * 1842 * NOTE: The dedup offset is allowed to be in a partially free state 1843 * and we must be sure to reset it to a fully allocated state 1844 * to force two bulkfree passes to free it again. 1845 * 1846 * NOTE: Only applicable when chain->bytes != 0. 1847 * 1848 * XXX can a chain already be marked MODIFIED without a data 1849 * assignment? If not, assert here instead of testing the case. 1850 */ 1851 if (chain != &hmp->vchain && chain != &hmp->fchain && 1852 chain->bytes) { 1853 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1854 newmod 1855 ) { 1856 /* 1857 * NOTE: We do not have to remove the dedup 1858 * registration because the area is still 1859 * allocated and the underlying DIO will 1860 * still be flushed. 1861 */ 1862 if (dedup_off) { 1863 chain->bref.data_off = dedup_off; 1864 chain->bytes = 1 << (dedup_off & 1865 HAMMER2_OFF_MASK_RADIX); 1866 chain->error = 0; 1867 atomic_clear_int(&chain->flags, 1868 HAMMER2_CHAIN_MODIFIED); 1869 atomic_add_long(&hammer2_count_modified_chains, 1870 -1); 1871 if (chain->pmp) 1872 hammer2_pfs_memory_wakeup(chain->pmp); 1873 hammer2_freemap_adjust(hmp, &chain->bref, 1874 HAMMER2_FREEMAP_DORECOVER); 1875 atomic_set_int(&chain->flags, 1876 HAMMER2_CHAIN_DEDUPABLE); 1877 } else { 1878 error = hammer2_freemap_alloc(chain, 1879 chain->bytes); 1880 atomic_clear_int(&chain->flags, 1881 HAMMER2_CHAIN_DEDUPABLE); 1882 } 1883 } 1884 } 1885 1886 /* 1887 * Stop here if error. We have to undo any flag bits we might 1888 * have set above. 1889 */ 1890 if (error) { 1891 if (setmodified) { 1892 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1893 atomic_add_long(&hammer2_count_modified_chains, -1); 1894 if (chain->pmp) 1895 hammer2_pfs_memory_wakeup(chain->pmp); 1896 } 1897 if (setupdate) { 1898 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1899 } 1900 lockmgr(&chain->diolk, LK_RELEASE); 1901 1902 return error; 1903 } 1904 1905 /* 1906 * Update mirror_tid and modify_tid. modify_tid is only updated 1907 * if not passed as zero (during flushes, parent propagation passes 1908 * the value 0). 1909 * 1910 * NOTE: chain->pmp could be the device spmp. 1911 */ 1912 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1913 if (mtid) 1914 chain->bref.modify_tid = mtid; 1915 1916 /* 1917 * Set BMAPUPD to tell the flush code that an existing blockmap entry 1918 * requires updating as well as to tell the delete code that the 1919 * chain's blockref might not exactly match (in terms of physical size 1920 * or block offset) the one in the parent's blocktable. The base key 1921 * of course will still match. 1922 */ 1923 if (chain->flags & HAMMER2_CHAIN_BMAPPED) 1924 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD); 1925 1926 /* 1927 * Short-cut data blocks which the caller does not need an actual 1928 * data reference to (aka OPTDATA), as long as the chain does not 1929 * already have a data pointer to the data. This generally means 1930 * that the modifications are being done via the logical buffer cache. 1931 * The INITIAL flag relates only to the device data buffer and thus 1932 * remains unchange in this situation. 1933 * 1934 * This code also handles bytes == 0 (most dirents). 1935 */ 1936 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1937 (flags & HAMMER2_MODIFY_OPTDATA) && 1938 chain->data == NULL) { 1939 KKASSERT(chain->dio == NULL); 1940 goto skip2; 1941 } 1942 1943 /* 1944 * Clearing the INITIAL flag (for indirect blocks) indicates that 1945 * we've processed the uninitialized storage allocation. 1946 * 1947 * If this flag is already clear we are likely in a copy-on-write 1948 * situation but we have to be sure NOT to bzero the storage if 1949 * no data is present. 1950 */ 1951 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1952 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1953 wasinitial = 1; 1954 } else { 1955 wasinitial = 0; 1956 } 1957 1958 /* 1959 * Instantiate data buffer and possibly execute COW operation 1960 */ 1961 switch(chain->bref.type) { 1962 case HAMMER2_BREF_TYPE_VOLUME: 1963 case HAMMER2_BREF_TYPE_FREEMAP: 1964 /* 1965 * The data is embedded, no copy-on-write operation is 1966 * needed. 1967 */ 1968 KKASSERT(chain->dio == NULL); 1969 break; 1970 case HAMMER2_BREF_TYPE_DIRENT: 1971 /* 1972 * The data might be fully embedded. 1973 */ 1974 if (chain->bytes == 0) { 1975 KKASSERT(chain->dio == NULL); 1976 break; 1977 } 1978 /* fall through */ 1979 case HAMMER2_BREF_TYPE_INODE: 1980 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1981 case HAMMER2_BREF_TYPE_DATA: 1982 case HAMMER2_BREF_TYPE_INDIRECT: 1983 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1984 /* 1985 * Perform the copy-on-write operation 1986 * 1987 * zero-fill or copy-on-write depending on whether 1988 * chain->data exists or not and set the dirty state for 1989 * the new buffer. hammer2_io_new() will handle the 1990 * zero-fill. 1991 * 1992 * If a dedup_off was supplied this is an existing block 1993 * and no COW, copy, or further modification is required. 1994 */ 1995 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1996 1997 if (wasinitial && dedup_off == 0) { 1998 error = hammer2_io_new(hmp, chain->bref.type, 1999 chain->bref.data_off, 2000 chain->bytes, &dio); 2001 } else { 2002 error = hammer2_io_bread(hmp, chain->bref.type, 2003 chain->bref.data_off, 2004 chain->bytes, &dio); 2005 } 2006 hammer2_adjreadcounter(&chain->bref, chain->bytes); 2007 2008 /* 2009 * If an I/O error occurs make sure callers cannot accidently 2010 * modify the old buffer's contents and corrupt the filesystem. 2011 * 2012 * NOTE: hammer2_io_data() call issues bkvasync() 2013 */ 2014 if (error) { 2015 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 2016 hmp); 2017 chain->error = HAMMER2_ERROR_EIO; 2018 hammer2_io_brelse(&dio); 2019 hammer2_io_brelse(&chain->dio); 2020 chain->data = NULL; 2021 break; 2022 } 2023 chain->error = 0; 2024 bdata = hammer2_io_data(dio, chain->bref.data_off); 2025 2026 if (chain->data) { 2027 /* 2028 * COW (unless a dedup). 2029 */ 2030 KKASSERT(chain->dio != NULL); 2031 if (chain->data != (void *)bdata && dedup_off == 0) { 2032 bcopy(chain->data, bdata, chain->bytes); 2033 } 2034 } else if (wasinitial == 0) { 2035 /* 2036 * We have a problem. We were asked to COW but 2037 * we don't have any data to COW with! 2038 */ 2039 panic("hammer2_chain_modify: having a COW %p\n", 2040 chain); 2041 } 2042 2043 /* 2044 * Retire the old buffer, replace with the new. Dirty or 2045 * redirty the new buffer. 2046 * 2047 * WARNING! The system buffer cache may have already flushed 2048 * the buffer, so we must be sure to [re]dirty it 2049 * for further modification. 2050 * 2051 * If dedup_off was supplied, the caller is not 2052 * expected to make any further modification to the 2053 * buffer. 2054 * 2055 * WARNING! hammer2_get_gdata() assumes dio never transitions 2056 * through NULL in order to optimize away unnecessary 2057 * diolk operations. 2058 */ 2059 { 2060 hammer2_io_t *tio; 2061 2062 if ((tio = chain->dio) != NULL) 2063 hammer2_io_bqrelse(&tio); 2064 chain->data = (void *)bdata; 2065 chain->dio = dio; 2066 if (dedup_off == 0) 2067 hammer2_io_setdirty(dio); 2068 } 2069 break; 2070 default: 2071 panic("hammer2_chain_modify: illegal non-embedded type %d", 2072 chain->bref.type); 2073 break; 2074 2075 } 2076 skip2: 2077 /* 2078 * setflush on parent indicating that the parent must recurse down 2079 * to us. Do not call on chain itself which might already have it 2080 * set. 2081 */ 2082 if (chain->parent) 2083 hammer2_chain_setflush(chain->parent); 2084 lockmgr(&chain->diolk, LK_RELEASE); 2085 2086 return (chain->error); 2087 } 2088 2089 /* 2090 * Modify the chain associated with an inode. 2091 */ 2092 int 2093 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 2094 hammer2_tid_t mtid, int flags) 2095 { 2096 int error; 2097 2098 hammer2_inode_modify(ip); 2099 error = hammer2_chain_modify(chain, mtid, 0, flags); 2100 2101 return error; 2102 } 2103 2104 /* 2105 * Volume header data locks 2106 */ 2107 void 2108 hammer2_voldata_lock(hammer2_dev_t *hmp) 2109 { 2110 lockmgr(&hmp->vollk, LK_EXCLUSIVE); 2111 } 2112 2113 void 2114 hammer2_voldata_unlock(hammer2_dev_t *hmp) 2115 { 2116 lockmgr(&hmp->vollk, LK_RELEASE); 2117 } 2118 2119 void 2120 hammer2_voldata_modify(hammer2_dev_t *hmp) 2121 { 2122 if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) == 0) { 2123 atomic_add_long(&hammer2_count_modified_chains, 1); 2124 atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED); 2125 hammer2_pfs_memory_inc(hmp->vchain.pmp); 2126 } 2127 } 2128 2129 /* 2130 * This function returns the chain at the nearest key within the specified 2131 * range. The returned chain will be referenced but not locked. 2132 * 2133 * This function will recurse through chain->rbtree as necessary and will 2134 * return a *key_nextp suitable for iteration. *key_nextp is only set if 2135 * the iteration value is less than the current value of *key_nextp. 2136 * 2137 * The caller should use (*key_nextp) to calculate the actual range of 2138 * the returned element, which will be (key_beg to *key_nextp - 1), because 2139 * there might be another element which is superior to the returned element 2140 * and overlaps it. 2141 * 2142 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 2143 * chains continue to be returned. On EOF (*key_nextp) may overflow since 2144 * it will wind up being (key_end + 1). 2145 * 2146 * WARNING! Must be called with child's spinlock held. Spinlock remains 2147 * held through the operation. 2148 */ 2149 struct hammer2_chain_find_info { 2150 hammer2_chain_t *best; 2151 hammer2_key_t key_beg; 2152 hammer2_key_t key_end; 2153 hammer2_key_t key_next; 2154 }; 2155 2156 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 2157 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 2158 2159 static 2160 hammer2_chain_t * 2161 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 2162 hammer2_key_t key_beg, hammer2_key_t key_end) 2163 { 2164 struct hammer2_chain_find_info info; 2165 2166 info.best = NULL; 2167 info.key_beg = key_beg; 2168 info.key_end = key_end; 2169 info.key_next = *key_nextp; 2170 2171 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 2172 hammer2_chain_find_cmp, hammer2_chain_find_callback, 2173 &info); 2174 *key_nextp = info.key_next; 2175 #if 0 2176 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 2177 parent, key_beg, key_end, *key_nextp); 2178 #endif 2179 2180 return (info.best); 2181 } 2182 2183 static 2184 int 2185 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 2186 { 2187 struct hammer2_chain_find_info *info = data; 2188 hammer2_key_t child_beg; 2189 hammer2_key_t child_end; 2190 2191 child_beg = child->bref.key; 2192 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 2193 2194 if (child_end < info->key_beg) 2195 return(-1); 2196 if (child_beg > info->key_end) 2197 return(1); 2198 return(0); 2199 } 2200 2201 static 2202 int 2203 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 2204 { 2205 struct hammer2_chain_find_info *info = data; 2206 hammer2_chain_t *best; 2207 hammer2_key_t child_end; 2208 2209 /* 2210 * WARNING! Layerq is scanned forwards, exact matches should keep 2211 * the existing info->best. 2212 */ 2213 if ((best = info->best) == NULL) { 2214 /* 2215 * No previous best. Assign best 2216 */ 2217 info->best = child; 2218 } else if (best->bref.key <= info->key_beg && 2219 child->bref.key <= info->key_beg) { 2220 /* 2221 * Illegal overlap. 2222 */ 2223 KKASSERT(0); 2224 /*info->best = child;*/ 2225 } else if (child->bref.key < best->bref.key) { 2226 /* 2227 * Child has a nearer key and best is not flush with key_beg. 2228 * Set best to child. Truncate key_next to the old best key. 2229 */ 2230 info->best = child; 2231 if (info->key_next > best->bref.key || info->key_next == 0) 2232 info->key_next = best->bref.key; 2233 } else if (child->bref.key == best->bref.key) { 2234 /* 2235 * If our current best is flush with the child then this 2236 * is an illegal overlap. 2237 * 2238 * key_next will automatically be limited to the smaller of 2239 * the two end-points. 2240 */ 2241 KKASSERT(0); 2242 info->best = child; 2243 } else { 2244 /* 2245 * Keep the current best but truncate key_next to the child's 2246 * base. 2247 * 2248 * key_next will also automatically be limited to the smaller 2249 * of the two end-points (probably not necessary for this case 2250 * but we do it anyway). 2251 */ 2252 if (info->key_next > child->bref.key || info->key_next == 0) 2253 info->key_next = child->bref.key; 2254 } 2255 2256 /* 2257 * Always truncate key_next based on child's end-of-range. 2258 */ 2259 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 2260 if (child_end && (info->key_next > child_end || info->key_next == 0)) 2261 info->key_next = child_end; 2262 2263 return(0); 2264 } 2265 2266 /* 2267 * Retrieve the specified chain from a media blockref, creating the 2268 * in-memory chain structure which reflects it. The returned chain is 2269 * held and locked according to (how) (HAMMER2_RESOLVE_*). The caller must 2270 * handle crc-checks and so forth, and should check chain->error before 2271 * assuming that the data is good. 2272 * 2273 * To handle insertion races pass the INSERT_RACE flag along with the 2274 * generation number of the core. NULL will be returned if the generation 2275 * number changes before we have a chance to insert the chain. Insert 2276 * races can occur because the parent might be held shared. 2277 * 2278 * Caller must hold the parent locked shared or exclusive since we may 2279 * need the parent's bref array to find our block. 2280 * 2281 * WARNING! chain->pmp is always set to NULL for any chain representing 2282 * part of the super-root topology. 2283 */ 2284 hammer2_chain_t * 2285 hammer2_chain_get(hammer2_chain_t *parent, int generation, 2286 hammer2_blockref_t *bref, int how) 2287 { 2288 hammer2_dev_t *hmp = parent->hmp; 2289 hammer2_chain_t *chain; 2290 int error; 2291 2292 /* 2293 * Allocate a chain structure representing the existing media 2294 * entry. Resulting chain has one ref and is not locked. 2295 */ 2296 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 2297 chain = hammer2_chain_alloc(hmp, NULL, bref); 2298 else 2299 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 2300 /* ref'd chain returned */ 2301 2302 /* 2303 * Flag that the chain is in the parent's blockmap so delete/flush 2304 * knows what to do with it. 2305 */ 2306 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 2307 2308 /* 2309 * chain must be locked to avoid unexpected ripouts 2310 */ 2311 hammer2_chain_lock(chain, how); 2312 2313 /* 2314 * Link the chain into its parent. A spinlock is required to safely 2315 * access the RBTREE, and it is possible to collide with another 2316 * hammer2_chain_get() operation because the caller might only hold 2317 * a shared lock on the parent. 2318 * 2319 * NOTE: Get races can occur quite often when we distribute 2320 * asynchronous read-aheads across multiple threads. 2321 */ 2322 KKASSERT(parent->refs > 0); 2323 error = hammer2_chain_insert(parent, chain, 2324 HAMMER2_CHAIN_INSERT_SPIN | 2325 HAMMER2_CHAIN_INSERT_RACE, 2326 generation); 2327 if (error) { 2328 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 2329 /*kprintf("chain %p get race\n", chain);*/ 2330 hammer2_chain_unlock(chain); 2331 hammer2_chain_drop(chain); 2332 chain = NULL; 2333 } else { 2334 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2335 } 2336 2337 /* 2338 * Return our new chain referenced but not locked, or NULL if 2339 * a race occurred. 2340 */ 2341 return (chain); 2342 } 2343 2344 /* 2345 * Lookup initialization/completion API 2346 */ 2347 hammer2_chain_t * 2348 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 2349 { 2350 hammer2_chain_ref(parent); 2351 if (flags & HAMMER2_LOOKUP_SHARED) { 2352 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 2353 HAMMER2_RESOLVE_SHARED); 2354 } else { 2355 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2356 } 2357 return (parent); 2358 } 2359 2360 void 2361 hammer2_chain_lookup_done(hammer2_chain_t *parent) 2362 { 2363 if (parent) { 2364 hammer2_chain_unlock(parent); 2365 hammer2_chain_drop(parent); 2366 } 2367 } 2368 2369 /* 2370 * Take the locked chain and return a locked parent. The chain remains 2371 * locked on return, but may have to be temporarily unlocked to acquire 2372 * the parent. Because of this, (chain) must be stable and cannot be 2373 * deleted while it was temporarily unlocked (typically means that (chain) 2374 * is an inode). 2375 * 2376 * Pass HAMMER2_RESOLVE_* flags in flags. 2377 * 2378 * This will work even if the chain is errored, and the caller can check 2379 * parent->error on return if desired since the parent will be locked. 2380 * 2381 * This function handles the lock order reversal. 2382 */ 2383 hammer2_chain_t * 2384 hammer2_chain_getparent(hammer2_chain_t *chain, int flags) 2385 { 2386 hammer2_chain_t *parent; 2387 2388 /* 2389 * Be careful of order, chain must be unlocked before parent 2390 * is locked below to avoid a deadlock. Try it trivially first. 2391 */ 2392 parent = chain->parent; 2393 if (parent == NULL) 2394 panic("hammer2_chain_getparent: no parent"); 2395 hammer2_chain_ref(parent); 2396 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) 2397 return parent; 2398 2399 for (;;) { 2400 hammer2_chain_unlock(chain); 2401 hammer2_chain_lock(parent, flags); 2402 hammer2_chain_lock(chain, flags); 2403 2404 /* 2405 * Parent relinking races are quite common. We have to get 2406 * it right or we will blow up the block table. 2407 */ 2408 if (chain->parent == parent) 2409 break; 2410 hammer2_chain_unlock(parent); 2411 hammer2_chain_drop(parent); 2412 cpu_ccfence(); 2413 parent = chain->parent; 2414 if (parent == NULL) 2415 panic("hammer2_chain_getparent: no parent"); 2416 hammer2_chain_ref(parent); 2417 } 2418 return parent; 2419 } 2420 2421 /* 2422 * Take the locked chain and return a locked parent. The chain is unlocked 2423 * and dropped. *chainp is set to the returned parent as a convenience. 2424 * Pass HAMMER2_RESOLVE_* flags in flags. 2425 * 2426 * This will work even if the chain is errored, and the caller can check 2427 * parent->error on return if desired since the parent will be locked. 2428 * 2429 * The chain does NOT need to be stable. We use a tracking structure 2430 * to track the expected parent if the chain is deleted out from under us. 2431 * 2432 * This function handles the lock order reversal. 2433 */ 2434 hammer2_chain_t * 2435 hammer2_chain_repparent(hammer2_chain_t **chainp, int flags) 2436 { 2437 hammer2_chain_t *chain; 2438 hammer2_chain_t *parent; 2439 struct hammer2_reptrack reptrack; 2440 struct hammer2_reptrack **repp; 2441 2442 /* 2443 * Be careful of order, chain must be unlocked before parent 2444 * is locked below to avoid a deadlock. Try it trivially first. 2445 */ 2446 chain = *chainp; 2447 parent = chain->parent; 2448 if (parent == NULL) { 2449 hammer2_spin_unex(&chain->core.spin); 2450 panic("hammer2_chain_repparent: no parent"); 2451 } 2452 hammer2_chain_ref(parent); 2453 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) { 2454 hammer2_chain_unlock(chain); 2455 hammer2_chain_drop(chain); 2456 *chainp = parent; 2457 2458 return parent; 2459 } 2460 2461 /* 2462 * Ok, now it gets a bit nasty. There are multiple situations where 2463 * the parent might be in the middle of a deletion, or where the child 2464 * (chain) might be deleted the instant we let go of its lock. 2465 * We can potentially end up in a no-win situation! 2466 * 2467 * In particular, the indirect_maintenance() case can cause these 2468 * situations. 2469 * 2470 * To deal with this we install a reptrack structure in the parent 2471 * This reptrack structure 'owns' the parent ref and will automatically 2472 * migrate to the parent's parent if the parent is deleted permanently. 2473 */ 2474 hammer2_spin_init(&reptrack.spin, "h2reptrk"); 2475 reptrack.chain = parent; 2476 hammer2_chain_ref(parent); /* for the reptrack */ 2477 2478 hammer2_spin_ex(&parent->core.spin); 2479 reptrack.next = parent->core.reptrack; 2480 parent->core.reptrack = &reptrack; 2481 hammer2_spin_unex(&parent->core.spin); 2482 2483 hammer2_chain_unlock(chain); 2484 hammer2_chain_drop(chain); 2485 chain = NULL; /* gone */ 2486 2487 /* 2488 * At the top of this loop, chain is gone and parent is refd both 2489 * by us explicitly AND via our reptrack. We are attempting to 2490 * lock parent. 2491 */ 2492 for (;;) { 2493 hammer2_chain_lock(parent, flags); 2494 2495 if (reptrack.chain == parent) 2496 break; 2497 hammer2_chain_unlock(parent); 2498 hammer2_chain_drop(parent); 2499 2500 kprintf("hammer2: debug REPTRACK %p->%p\n", 2501 parent, reptrack.chain); 2502 hammer2_spin_ex(&reptrack.spin); 2503 parent = reptrack.chain; 2504 hammer2_chain_ref(parent); 2505 hammer2_spin_unex(&reptrack.spin); 2506 } 2507 2508 /* 2509 * Once parent is locked and matches our reptrack, our reptrack 2510 * will be stable and we have our parent. We can unlink our 2511 * reptrack. 2512 * 2513 * WARNING! Remember that the chain lock might be shared. Chains 2514 * locked shared have stable parent linkages. 2515 */ 2516 hammer2_spin_ex(&parent->core.spin); 2517 repp = &parent->core.reptrack; 2518 while (*repp != &reptrack) 2519 repp = &(*repp)->next; 2520 *repp = reptrack.next; 2521 hammer2_spin_unex(&parent->core.spin); 2522 2523 hammer2_chain_drop(parent); /* reptrack ref */ 2524 *chainp = parent; /* return parent lock+ref */ 2525 2526 return parent; 2527 } 2528 2529 /* 2530 * Dispose of any linked reptrack structures in (chain) by shifting them to 2531 * (parent). Both (chain) and (parent) must be exclusively locked. 2532 * 2533 * This is interlocked against any children of (chain) on the other side. 2534 * No children so remain as-of when this is called so we can test 2535 * core.reptrack without holding the spin-lock. 2536 * 2537 * Used whenever the caller intends to permanently delete chains related 2538 * to topological recursions (BREF_TYPE_INDIRECT, BREF_TYPE_FREEMAP_NODE), 2539 * where the chains underneath the node being deleted are given a new parent 2540 * above the node being deleted. 2541 */ 2542 static 2543 void 2544 hammer2_chain_repchange(hammer2_chain_t *parent, hammer2_chain_t *chain) 2545 { 2546 struct hammer2_reptrack *reptrack; 2547 2548 KKASSERT(chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)); 2549 while (chain->core.reptrack) { 2550 hammer2_spin_ex(&parent->core.spin); 2551 hammer2_spin_ex(&chain->core.spin); 2552 reptrack = chain->core.reptrack; 2553 if (reptrack == NULL) { 2554 hammer2_spin_unex(&chain->core.spin); 2555 hammer2_spin_unex(&parent->core.spin); 2556 break; 2557 } 2558 hammer2_spin_ex(&reptrack->spin); 2559 chain->core.reptrack = reptrack->next; 2560 reptrack->chain = parent; 2561 reptrack->next = parent->core.reptrack; 2562 parent->core.reptrack = reptrack; 2563 hammer2_chain_ref(parent); /* reptrack */ 2564 2565 hammer2_spin_unex(&chain->core.spin); 2566 hammer2_spin_unex(&parent->core.spin); 2567 kprintf("hammer2: debug repchange %p %p->%p\n", 2568 reptrack, chain, parent); 2569 hammer2_chain_drop(chain); /* reptrack */ 2570 } 2571 } 2572 2573 /* 2574 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 2575 * (*parentp) typically points to an inode but can also point to a related 2576 * indirect block and this function will recurse upwards and find the inode 2577 * or the nearest undeleted indirect block covering the key range. 2578 * 2579 * This function unconditionally sets *errorp, replacing any previous value. 2580 * 2581 * (*parentp) must be exclusive or shared locked (depending on flags) and 2582 * referenced and can be an inode or an existing indirect block within the 2583 * inode. 2584 * 2585 * If (*parent) is errored out, this function will not attempt to recurse 2586 * the radix tree and will return NULL along with an appropriate *errorp. 2587 * If NULL is returned and *errorp is 0, the requested lookup could not be 2588 * located. 2589 * 2590 * On return (*parentp) will be modified to point at the deepest parent chain 2591 * element encountered during the search, as a helper for an insertion or 2592 * deletion. 2593 * 2594 * The new (*parentp) will be locked shared or exclusive (depending on flags), 2595 * and referenced, and the old will be unlocked and dereferenced (no change 2596 * if they are both the same). This is particularly important if the caller 2597 * wishes to insert a new chain, (*parentp) will be set properly even if NULL 2598 * is returned, as long as no error occurred. 2599 * 2600 * The matching chain will be returned locked according to flags. 2601 * 2602 * -- 2603 * 2604 * NULL is returned if no match was found, but (*parentp) will still 2605 * potentially be adjusted. 2606 * 2607 * On return (*key_nextp) will point to an iterative value for key_beg. 2608 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 2609 * 2610 * This function will also recurse up the chain if the key is not within the 2611 * current parent's range. (*parentp) can never be set to NULL. An iteration 2612 * can simply allow (*parentp) to float inside the loop. 2613 * 2614 * NOTE! chain->data is not always resolved. By default it will not be 2615 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 2616 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 2617 * BREF_TYPE_DATA as the device buffer can alias the logical file 2618 * buffer). 2619 */ 2620 2621 hammer2_chain_t * 2622 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 2623 hammer2_key_t key_beg, hammer2_key_t key_end, 2624 int *errorp, int flags) 2625 { 2626 hammer2_dev_t *hmp; 2627 hammer2_chain_t *parent; 2628 hammer2_chain_t *chain; 2629 hammer2_blockref_t *base; 2630 hammer2_blockref_t *bref; 2631 hammer2_blockref_t bcopy; 2632 hammer2_key_t scan_beg; 2633 hammer2_key_t scan_end; 2634 int count = 0; 2635 int how_always = HAMMER2_RESOLVE_ALWAYS; 2636 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2637 int how; 2638 int generation; 2639 int maxloops = 300000; 2640 volatile hammer2_mtx_t save_mtx; 2641 2642 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2643 how_maybe = how_always; 2644 how = HAMMER2_RESOLVE_ALWAYS; 2645 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2646 how = HAMMER2_RESOLVE_NEVER; 2647 } else { 2648 how = HAMMER2_RESOLVE_MAYBE; 2649 } 2650 if (flags & HAMMER2_LOOKUP_SHARED) { 2651 how_maybe |= HAMMER2_RESOLVE_SHARED; 2652 how_always |= HAMMER2_RESOLVE_SHARED; 2653 how |= HAMMER2_RESOLVE_SHARED; 2654 } 2655 2656 /* 2657 * Recurse (*parentp) upward if necessary until the parent completely 2658 * encloses the key range or we hit the inode. 2659 * 2660 * Handle races against the flusher deleting indirect nodes on its 2661 * way back up by continuing to recurse upward past the deletion. 2662 */ 2663 parent = *parentp; 2664 hmp = parent->hmp; 2665 *errorp = 0; 2666 2667 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2668 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2669 scan_beg = parent->bref.key; 2670 scan_end = scan_beg + 2671 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2672 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) { 2673 if (key_beg >= scan_beg && key_end <= scan_end) 2674 break; 2675 } 2676 parent = hammer2_chain_repparent(parentp, how_maybe); 2677 } 2678 again: 2679 if (--maxloops == 0) 2680 panic("hammer2_chain_lookup: maxloops"); 2681 /* 2682 * Locate the blockref array. Currently we do a fully associative 2683 * search through the array. 2684 */ 2685 switch(parent->bref.type) { 2686 case HAMMER2_BREF_TYPE_INODE: 2687 /* 2688 * Special shortcut for embedded data returns the inode 2689 * itself. Callers must detect this condition and access 2690 * the embedded data (the strategy code does this for us). 2691 * 2692 * This is only applicable to regular files and softlinks. 2693 * 2694 * We need a second lock on parent. Since we already have 2695 * a lock we must pass LOCKAGAIN to prevent unexpected 2696 * blocking (we don't want to block on a second shared 2697 * ref if an exclusive lock is pending) 2698 */ 2699 if (parent->data->ipdata.meta.op_flags & 2700 HAMMER2_OPFLAG_DIRECTDATA) { 2701 if (flags & HAMMER2_LOOKUP_NODIRECT) { 2702 chain = NULL; 2703 *key_nextp = key_end + 1; 2704 goto done; 2705 } 2706 hammer2_chain_ref(parent); 2707 hammer2_chain_lock(parent, how_always | 2708 HAMMER2_RESOLVE_LOCKAGAIN); 2709 *key_nextp = key_end + 1; 2710 return (parent); 2711 } 2712 base = &parent->data->ipdata.u.blockset.blockref[0]; 2713 count = HAMMER2_SET_COUNT; 2714 break; 2715 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2716 case HAMMER2_BREF_TYPE_INDIRECT: 2717 /* 2718 * Handle MATCHIND on the parent 2719 */ 2720 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2721 scan_beg = parent->bref.key; 2722 scan_end = scan_beg + 2723 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2724 if (key_beg == scan_beg && key_end == scan_end) { 2725 chain = parent; 2726 hammer2_chain_ref(chain); 2727 hammer2_chain_lock(chain, how_maybe); 2728 *key_nextp = scan_end + 1; 2729 goto done; 2730 } 2731 } 2732 2733 /* 2734 * Optimize indirect blocks in the INITIAL state to avoid 2735 * I/O. 2736 */ 2737 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2738 base = NULL; 2739 } else { 2740 if (parent->data == NULL) { 2741 kprintf("parent->data is NULL %p\n", parent); 2742 while (1) 2743 tsleep(parent, 0, "xxx", 0); 2744 } 2745 base = &parent->data->npdata[0]; 2746 } 2747 count = parent->bytes / sizeof(hammer2_blockref_t); 2748 break; 2749 case HAMMER2_BREF_TYPE_VOLUME: 2750 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2751 count = HAMMER2_SET_COUNT; 2752 break; 2753 case HAMMER2_BREF_TYPE_FREEMAP: 2754 base = &parent->data->blkset.blockref[0]; 2755 count = HAMMER2_SET_COUNT; 2756 break; 2757 default: 2758 kprintf("hammer2_chain_lookup: unrecognized " 2759 "blockref(B) type: %d", 2760 parent->bref.type); 2761 while (1) 2762 tsleep(&base, 0, "dead", 0); 2763 panic("hammer2_chain_lookup: unrecognized " 2764 "blockref(B) type: %d", 2765 parent->bref.type); 2766 base = NULL; /* safety */ 2767 count = 0; /* safety */ 2768 } 2769 2770 /* 2771 * No lookup is possible if the parent is errored. We delayed 2772 * this check as long as we could to ensure that the parent backup, 2773 * embedded data, and MATCHIND code could still execute. 2774 */ 2775 if (parent->error) { 2776 *errorp = parent->error; 2777 return NULL; 2778 } 2779 2780 /* 2781 * Merged scan to find next candidate. 2782 * 2783 * hammer2_base_*() functions require the parent->core.live_* fields 2784 * to be synchronized. 2785 * 2786 * We need to hold the spinlock to access the block array and RB tree 2787 * and to interlock chain creation. 2788 */ 2789 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2790 hammer2_chain_countbrefs(parent, base, count); 2791 2792 /* 2793 * Combined search 2794 */ 2795 hammer2_spin_ex(&parent->core.spin); 2796 chain = hammer2_combined_find(parent, base, count, 2797 key_nextp, 2798 key_beg, key_end, 2799 &bref); 2800 generation = parent->core.generation; 2801 2802 /* 2803 * Exhausted parent chain, iterate. 2804 */ 2805 if (bref == NULL) { 2806 KKASSERT(chain == NULL); 2807 hammer2_spin_unex(&parent->core.spin); 2808 if (key_beg == key_end) /* short cut single-key case */ 2809 return (NULL); 2810 2811 /* 2812 * Stop if we reached the end of the iteration. 2813 */ 2814 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2815 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2816 return (NULL); 2817 } 2818 2819 /* 2820 * Calculate next key, stop if we reached the end of the 2821 * iteration, otherwise go up one level and loop. 2822 */ 2823 key_beg = parent->bref.key + 2824 ((hammer2_key_t)1 << parent->bref.keybits); 2825 if (key_beg == 0 || key_beg > key_end) 2826 return (NULL); 2827 parent = hammer2_chain_repparent(parentp, how_maybe); 2828 goto again; 2829 } 2830 2831 /* 2832 * Selected from blockref or in-memory chain. 2833 */ 2834 bcopy = *bref; 2835 if (chain == NULL) { 2836 hammer2_spin_unex(&parent->core.spin); 2837 if (bcopy.type == HAMMER2_BREF_TYPE_INDIRECT || 2838 bcopy.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2839 chain = hammer2_chain_get(parent, generation, 2840 &bcopy, how_maybe); 2841 } else { 2842 chain = hammer2_chain_get(parent, generation, 2843 &bcopy, how); 2844 } 2845 if (chain == NULL) 2846 goto again; 2847 } else { 2848 hammer2_chain_ref(chain); 2849 hammer2_spin_unex(&parent->core.spin); 2850 2851 /* 2852 * chain is referenced but not locked. We must lock the 2853 * chain to obtain definitive state. 2854 */ 2855 if (bcopy.type == HAMMER2_BREF_TYPE_INDIRECT || 2856 bcopy.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2857 hammer2_chain_lock(chain, how_maybe); 2858 } else { 2859 hammer2_chain_lock(chain, how); 2860 } 2861 KKASSERT(chain->parent == parent); 2862 } 2863 if (bcmp(&bcopy, &chain->bref, sizeof(bcopy)) || 2864 chain->parent != parent) { 2865 hammer2_chain_unlock(chain); 2866 hammer2_chain_drop(chain); 2867 chain = NULL; /* SAFETY */ 2868 goto again; 2869 } 2870 2871 2872 /* 2873 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2874 * 2875 * NOTE: Chain's key range is not relevant as there might be 2876 * one-offs within the range that are not deleted. 2877 * 2878 * NOTE: Lookups can race delete-duplicate because 2879 * delete-duplicate does not lock the parent's core 2880 * (they just use the spinlock on the core). 2881 */ 2882 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2883 kprintf("skip deleted chain %016jx.%02x key=%016jx\n", 2884 chain->bref.data_off, chain->bref.type, 2885 chain->bref.key); 2886 hammer2_chain_unlock(chain); 2887 hammer2_chain_drop(chain); 2888 chain = NULL; /* SAFETY */ 2889 key_beg = *key_nextp; 2890 if (key_beg == 0 || key_beg > key_end) 2891 return(NULL); 2892 goto again; 2893 } 2894 2895 /* 2896 * If the chain element is an indirect block it becomes the new 2897 * parent and we loop on it. We must maintain our top-down locks 2898 * to prevent the flusher from interfering (i.e. doing a 2899 * delete-duplicate and leaving us recursing down a deleted chain). 2900 * 2901 * The parent always has to be locked with at least RESOLVE_MAYBE 2902 * so we can access its data. It might need a fixup if the caller 2903 * passed incompatible flags. Be careful not to cause a deadlock 2904 * as a data-load requires an exclusive lock. 2905 * 2906 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2907 * range is within the requested key range we return the indirect 2908 * block and do NOT loop. This is usually only used to acquire 2909 * freemap nodes. 2910 */ 2911 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2912 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2913 save_mtx = parent->lock; 2914 hammer2_chain_unlock(parent); 2915 hammer2_chain_drop(parent); 2916 *parentp = parent = chain; 2917 chain = NULL; /* SAFETY */ 2918 goto again; 2919 } 2920 done: 2921 /* 2922 * All done, return the locked chain. 2923 * 2924 * If the caller does not want a locked chain, replace the lock with 2925 * a ref. Perhaps this can eventually be optimized to not obtain the 2926 * lock in the first place for situations where the data does not 2927 * need to be resolved. 2928 * 2929 * NOTE! A chain->error must be tested by the caller upon return. 2930 * *errorp is only set based on issues which occur while 2931 * trying to reach the chain. 2932 */ 2933 return (chain); 2934 } 2935 2936 /* 2937 * After having issued a lookup we can iterate all matching keys. 2938 * 2939 * If chain is non-NULL we continue the iteration from just after it's index. 2940 * 2941 * If chain is NULL we assume the parent was exhausted and continue the 2942 * iteration at the next parent. 2943 * 2944 * If a fatal error occurs (typically an I/O error), a dummy chain is 2945 * returned with chain->error and error-identifying information set. This 2946 * chain will assert if you try to do anything fancy with it. 2947 * 2948 * XXX Depending on where the error occurs we should allow continued iteration. 2949 * 2950 * parent must be locked on entry and remains locked throughout. chain's 2951 * lock status must match flags. Chain is always at least referenced. 2952 * 2953 * WARNING! The MATCHIND flag does not apply to this function. 2954 */ 2955 hammer2_chain_t * 2956 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2957 hammer2_key_t *key_nextp, 2958 hammer2_key_t key_beg, hammer2_key_t key_end, 2959 int *errorp, int flags) 2960 { 2961 hammer2_chain_t *parent; 2962 int how_maybe; 2963 2964 /* 2965 * Calculate locking flags for upward recursion. 2966 */ 2967 how_maybe = HAMMER2_RESOLVE_MAYBE; 2968 if (flags & HAMMER2_LOOKUP_SHARED) 2969 how_maybe |= HAMMER2_RESOLVE_SHARED; 2970 2971 parent = *parentp; 2972 *errorp = 0; 2973 2974 /* 2975 * Calculate the next index and recalculate the parent if necessary. 2976 */ 2977 if (chain) { 2978 key_beg = chain->bref.key + 2979 ((hammer2_key_t)1 << chain->bref.keybits); 2980 hammer2_chain_unlock(chain); 2981 hammer2_chain_drop(chain); 2982 2983 /* 2984 * chain invalid past this point, but we can still do a 2985 * pointer comparison w/parent. 2986 * 2987 * Any scan where the lookup returned degenerate data embedded 2988 * in the inode has an invalid index and must terminate. 2989 */ 2990 if (chain == parent) 2991 return(NULL); 2992 if (key_beg == 0 || key_beg > key_end) 2993 return(NULL); 2994 chain = NULL; 2995 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2996 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2997 /* 2998 * We reached the end of the iteration. 2999 */ 3000 return (NULL); 3001 } else { 3002 /* 3003 * Continue iteration with next parent unless the current 3004 * parent covers the range. 3005 * 3006 * (This also handles the case of a deleted, empty indirect 3007 * node). 3008 */ 3009 key_beg = parent->bref.key + 3010 ((hammer2_key_t)1 << parent->bref.keybits); 3011 if (key_beg == 0 || key_beg > key_end) 3012 return (NULL); 3013 parent = hammer2_chain_repparent(parentp, how_maybe); 3014 } 3015 3016 /* 3017 * And execute 3018 */ 3019 return (hammer2_chain_lookup(parentp, key_nextp, 3020 key_beg, key_end, 3021 errorp, flags)); 3022 } 3023 3024 /* 3025 * Caller wishes to iterate chains under parent, loading new chains into 3026 * chainp. Caller must initialize *chainp to NULL and *firstp to 1, and 3027 * then call hammer2_chain_scan() repeatedly until a non-zero return. 3028 * During the scan, *firstp will be set to 0 and (*chainp) will be replaced 3029 * with the returned chain for the scan. The returned *chainp will be 3030 * locked and referenced. Any prior contents will be unlocked and dropped. 3031 * 3032 * Caller should check the return value. A normal scan EOF will return 3033 * exactly HAMMER2_ERROR_EOF. Any other non-zero value indicates an 3034 * error trying to access parent data. Any error in the returned chain 3035 * must be tested separately by the caller. 3036 * 3037 * (*chainp) is dropped on each scan, but will only be set if the returned 3038 * element itself can recurse. Leaf elements are NOT resolved, loaded, or 3039 * returned via *chainp. The caller will get their bref only. 3040 * 3041 * The raw scan function is similar to lookup/next but does not seek to a key. 3042 * Blockrefs are iterated via first_bref = (parent, NULL) and 3043 * next_chain = (parent, bref). 3044 * 3045 * The passed-in parent must be locked and its data resolved. The function 3046 * nominally returns a locked and referenced *chainp != NULL for chains 3047 * the caller might need to recurse on (and will dipose of any *chainp passed 3048 * in). The caller must check the chain->bref.type either way. 3049 */ 3050 int 3051 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 3052 hammer2_blockref_t *bref, int *firstp, 3053 int flags) 3054 { 3055 hammer2_dev_t *hmp; 3056 hammer2_blockref_t *base; 3057 hammer2_blockref_t *bref_ptr; 3058 hammer2_key_t key; 3059 hammer2_key_t next_key; 3060 hammer2_chain_t *chain = NULL; 3061 int count = 0; 3062 int how_always = HAMMER2_RESOLVE_ALWAYS; 3063 int how_maybe = HAMMER2_RESOLVE_MAYBE; 3064 int how; 3065 int generation; 3066 int maxloops = 300000; 3067 int error; 3068 3069 hmp = parent->hmp; 3070 error = 0; 3071 3072 /* 3073 * Scan flags borrowed from lookup. 3074 */ 3075 if (flags & HAMMER2_LOOKUP_ALWAYS) { 3076 how_maybe = how_always; 3077 how = HAMMER2_RESOLVE_ALWAYS; 3078 } else if (flags & HAMMER2_LOOKUP_NODATA) { 3079 how = HAMMER2_RESOLVE_NEVER; 3080 } else { 3081 how = HAMMER2_RESOLVE_MAYBE; 3082 } 3083 if (flags & HAMMER2_LOOKUP_SHARED) { 3084 how_maybe |= HAMMER2_RESOLVE_SHARED; 3085 how_always |= HAMMER2_RESOLVE_SHARED; 3086 how |= HAMMER2_RESOLVE_SHARED; 3087 } 3088 3089 /* 3090 * Calculate key to locate first/next element, unlocking the previous 3091 * element as we go. Be careful, the key calculation can overflow. 3092 * 3093 * (also reset bref to NULL) 3094 */ 3095 if (*firstp) { 3096 key = 0; 3097 *firstp = 0; 3098 } else { 3099 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 3100 if ((chain = *chainp) != NULL) { 3101 *chainp = NULL; 3102 hammer2_chain_unlock(chain); 3103 hammer2_chain_drop(chain); 3104 chain = NULL; 3105 } 3106 if (key == 0) { 3107 error |= HAMMER2_ERROR_EOF; 3108 goto done; 3109 } 3110 } 3111 3112 again: 3113 if (parent->error) { 3114 error = parent->error; 3115 goto done; 3116 } 3117 if (--maxloops == 0) 3118 panic("hammer2_chain_scan: maxloops"); 3119 3120 /* 3121 * Locate the blockref array. Currently we do a fully associative 3122 * search through the array. 3123 */ 3124 switch(parent->bref.type) { 3125 case HAMMER2_BREF_TYPE_INODE: 3126 /* 3127 * An inode with embedded data has no sub-chains. 3128 * 3129 * WARNING! Bulk scan code may pass a static chain marked 3130 * as BREF_TYPE_INODE with a copy of the volume 3131 * root blockset to snapshot the volume. 3132 */ 3133 if (parent->data->ipdata.meta.op_flags & 3134 HAMMER2_OPFLAG_DIRECTDATA) { 3135 error |= HAMMER2_ERROR_EOF; 3136 goto done; 3137 } 3138 base = &parent->data->ipdata.u.blockset.blockref[0]; 3139 count = HAMMER2_SET_COUNT; 3140 break; 3141 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3142 case HAMMER2_BREF_TYPE_INDIRECT: 3143 /* 3144 * Optimize indirect blocks in the INITIAL state to avoid 3145 * I/O. 3146 */ 3147 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 3148 base = NULL; 3149 } else { 3150 if (parent->data == NULL) 3151 panic("parent->data is NULL"); 3152 base = &parent->data->npdata[0]; 3153 } 3154 count = parent->bytes / sizeof(hammer2_blockref_t); 3155 break; 3156 case HAMMER2_BREF_TYPE_VOLUME: 3157 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3158 count = HAMMER2_SET_COUNT; 3159 break; 3160 case HAMMER2_BREF_TYPE_FREEMAP: 3161 base = &parent->data->blkset.blockref[0]; 3162 count = HAMMER2_SET_COUNT; 3163 break; 3164 default: 3165 panic("hammer2_chain_lookup: unrecognized blockref type: %d", 3166 parent->bref.type); 3167 base = NULL; /* safety */ 3168 count = 0; /* safety */ 3169 } 3170 3171 /* 3172 * Merged scan to find next candidate. 3173 * 3174 * hammer2_base_*() functions require the parent->core.live_* fields 3175 * to be synchronized. 3176 * 3177 * We need to hold the spinlock to access the block array and RB tree 3178 * and to interlock chain creation. 3179 */ 3180 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3181 hammer2_chain_countbrefs(parent, base, count); 3182 3183 next_key = 0; 3184 bref_ptr = NULL; 3185 hammer2_spin_ex(&parent->core.spin); 3186 chain = hammer2_combined_find(parent, base, count, 3187 &next_key, 3188 key, HAMMER2_KEY_MAX, 3189 &bref_ptr); 3190 generation = parent->core.generation; 3191 3192 /* 3193 * Exhausted parent chain, we're done. 3194 */ 3195 if (bref_ptr == NULL) { 3196 hammer2_spin_unex(&parent->core.spin); 3197 KKASSERT(chain == NULL); 3198 error |= HAMMER2_ERROR_EOF; 3199 goto done; 3200 } 3201 3202 /* 3203 * Copy into the supplied stack-based blockref. 3204 */ 3205 *bref = *bref_ptr; 3206 3207 /* 3208 * Selected from blockref or in-memory chain. 3209 */ 3210 if (chain == NULL) { 3211 switch(bref->type) { 3212 case HAMMER2_BREF_TYPE_INODE: 3213 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3214 case HAMMER2_BREF_TYPE_INDIRECT: 3215 case HAMMER2_BREF_TYPE_VOLUME: 3216 case HAMMER2_BREF_TYPE_FREEMAP: 3217 /* 3218 * Recursion, always get the chain 3219 */ 3220 hammer2_spin_unex(&parent->core.spin); 3221 chain = hammer2_chain_get(parent, generation, 3222 bref, how); 3223 if (chain == NULL) 3224 goto again; 3225 break; 3226 default: 3227 /* 3228 * No recursion, do not waste time instantiating 3229 * a chain, just iterate using the bref. 3230 */ 3231 hammer2_spin_unex(&parent->core.spin); 3232 break; 3233 } 3234 } else { 3235 /* 3236 * Recursion or not we need the chain in order to supply 3237 * the bref. 3238 */ 3239 hammer2_chain_ref(chain); 3240 hammer2_spin_unex(&parent->core.spin); 3241 hammer2_chain_lock(chain, how); 3242 } 3243 if (chain && 3244 (bcmp(bref, &chain->bref, sizeof(*bref)) || 3245 chain->parent != parent)) { 3246 hammer2_chain_unlock(chain); 3247 hammer2_chain_drop(chain); 3248 chain = NULL; 3249 goto again; 3250 } 3251 3252 /* 3253 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 3254 * 3255 * NOTE: chain's key range is not relevant as there might be 3256 * one-offs within the range that are not deleted. 3257 * 3258 * NOTE: XXX this could create problems with scans used in 3259 * situations other than mount-time recovery. 3260 * 3261 * NOTE: Lookups can race delete-duplicate because 3262 * delete-duplicate does not lock the parent's core 3263 * (they just use the spinlock on the core). 3264 */ 3265 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3266 hammer2_chain_unlock(chain); 3267 hammer2_chain_drop(chain); 3268 chain = NULL; 3269 3270 key = next_key; 3271 if (key == 0) { 3272 error |= HAMMER2_ERROR_EOF; 3273 goto done; 3274 } 3275 goto again; 3276 } 3277 3278 done: 3279 /* 3280 * All done, return the bref or NULL, supply chain if necessary. 3281 */ 3282 if (chain) 3283 *chainp = chain; 3284 return (error); 3285 } 3286 3287 /* 3288 * Create and return a new hammer2 system memory structure of the specified 3289 * key, type and size and insert it under (*parentp). This is a full 3290 * insertion, based on the supplied key/keybits, and may involve creating 3291 * indirect blocks and moving other chains around via delete/duplicate. 3292 * 3293 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 3294 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3295 * FULL. This typically means that the caller is creating the chain after 3296 * doing a hammer2_chain_lookup(). 3297 * 3298 * (*parentp) must be exclusive locked and may be replaced on return 3299 * depending on how much work the function had to do. 3300 * 3301 * (*parentp) must not be errored or this function will assert. 3302 * 3303 * (*chainp) usually starts out NULL and returns the newly created chain, 3304 * but if the caller desires the caller may allocate a disconnected chain 3305 * and pass it in instead. 3306 * 3307 * This function should NOT be used to insert INDIRECT blocks. It is 3308 * typically used to create/insert inodes and data blocks. 3309 * 3310 * Caller must pass-in an exclusively locked parent the new chain is to 3311 * be inserted under, and optionally pass-in a disconnected, exclusively 3312 * locked chain to insert (else we create a new chain). The function will 3313 * adjust (*parentp) as necessary, create or connect the chain, and 3314 * return an exclusively locked chain in *chainp. 3315 * 3316 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 3317 * and will be reassigned. 3318 * 3319 * NOTE: returns HAMMER_ERROR_* flags 3320 */ 3321 int 3322 hammer2_chain_create(hammer2_chain_t **parentp, hammer2_chain_t **chainp, 3323 hammer2_pfs_t *pmp, int methods, 3324 hammer2_key_t key, int keybits, int type, size_t bytes, 3325 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 3326 { 3327 hammer2_dev_t *hmp; 3328 hammer2_chain_t *chain; 3329 hammer2_chain_t *parent; 3330 hammer2_blockref_t *base; 3331 hammer2_blockref_t dummy; 3332 int allocated = 0; 3333 int error = 0; 3334 int count; 3335 int maxloops = 300000; 3336 3337 /* 3338 * Topology may be crossing a PFS boundary. 3339 */ 3340 parent = *parentp; 3341 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3342 KKASSERT(parent->error == 0); 3343 hmp = parent->hmp; 3344 chain = *chainp; 3345 3346 if (chain == NULL) { 3347 /* 3348 * First allocate media space and construct the dummy bref, 3349 * then allocate the in-memory chain structure. Set the 3350 * INITIAL flag for fresh chains which do not have embedded 3351 * data. 3352 * 3353 * XXX for now set the check mode of the child based on 3354 * the parent or, if the parent is an inode, the 3355 * specification in the inode. 3356 */ 3357 bzero(&dummy, sizeof(dummy)); 3358 dummy.type = type; 3359 dummy.key = key; 3360 dummy.keybits = keybits; 3361 dummy.data_off = hammer2_getradix(bytes); 3362 3363 /* 3364 * Inherit methods from parent by default. Primarily used 3365 * for BREF_TYPE_DATA. Non-data types *must* be set to 3366 * a non-NONE check algorithm. 3367 */ 3368 if (methods == -1) 3369 dummy.methods = parent->bref.methods; 3370 else 3371 dummy.methods = (uint8_t)methods; 3372 3373 if (type != HAMMER2_BREF_TYPE_DATA && 3374 HAMMER2_DEC_CHECK(dummy.methods) == HAMMER2_CHECK_NONE) { 3375 dummy.methods |= 3376 HAMMER2_ENC_CHECK(HAMMER2_CHECK_DEFAULT); 3377 } 3378 3379 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 3380 3381 /* 3382 * Lock the chain manually, chain_lock will load the chain 3383 * which we do NOT want to do. (note: chain->refs is set 3384 * to 1 by chain_alloc() for us, but lockcnt is not). 3385 */ 3386 chain->lockcnt = 1; 3387 hammer2_mtx_ex(&chain->lock); 3388 allocated = 1; 3389 ++curthread->td_tracker; 3390 3391 /* 3392 * Set INITIAL to optimize I/O. The flag will generally be 3393 * processed when we call hammer2_chain_modify(). 3394 * 3395 * Recalculate bytes to reflect the actual media block 3396 * allocation. Handle special case radix 0 == 0 bytes. 3397 */ 3398 bytes = (size_t)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX); 3399 if (bytes) 3400 bytes = (hammer2_off_t)1 << bytes; 3401 chain->bytes = bytes; 3402 3403 switch(type) { 3404 case HAMMER2_BREF_TYPE_VOLUME: 3405 case HAMMER2_BREF_TYPE_FREEMAP: 3406 panic("hammer2_chain_create: called with volume type"); 3407 break; 3408 case HAMMER2_BREF_TYPE_INDIRECT: 3409 panic("hammer2_chain_create: cannot be used to" 3410 "create indirect block"); 3411 break; 3412 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3413 panic("hammer2_chain_create: cannot be used to" 3414 "create freemap root or node"); 3415 break; 3416 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3417 KKASSERT(bytes == sizeof(chain->data->bmdata)); 3418 /* fall through */ 3419 case HAMMER2_BREF_TYPE_DIRENT: 3420 case HAMMER2_BREF_TYPE_INODE: 3421 case HAMMER2_BREF_TYPE_DATA: 3422 default: 3423 /* 3424 * leave chain->data NULL, set INITIAL 3425 */ 3426 KKASSERT(chain->data == NULL); 3427 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 3428 break; 3429 } 3430 } else { 3431 /* 3432 * We are reattaching a previously deleted chain, possibly 3433 * under a new parent and possibly with a new key/keybits. 3434 * The chain does not have to be in a modified state. The 3435 * UPDATE flag will be set later on in this routine. 3436 * 3437 * Do NOT mess with the current state of the INITIAL flag. 3438 */ 3439 chain->bref.key = key; 3440 chain->bref.keybits = keybits; 3441 if (chain->flags & HAMMER2_CHAIN_DELETED) 3442 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3443 KKASSERT(chain->parent == NULL); 3444 } 3445 3446 /* 3447 * Set the appropriate bref flag if requested. 3448 * 3449 * NOTE! Callers can call this function to move chains without 3450 * knowing about special flags, so don't clear bref flags 3451 * here! 3452 */ 3453 if (flags & HAMMER2_INSERT_PFSROOT) 3454 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 3455 3456 /* 3457 * Calculate how many entries we have in the blockref array and 3458 * determine if an indirect block is required. 3459 */ 3460 again: 3461 if (--maxloops == 0) 3462 panic("hammer2_chain_create: maxloops"); 3463 3464 switch(parent->bref.type) { 3465 case HAMMER2_BREF_TYPE_INODE: 3466 if ((parent->data->ipdata.meta.op_flags & 3467 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 3468 kprintf("hammer2: parent set for direct-data! " 3469 "pkey=%016jx ckey=%016jx\n", 3470 parent->bref.key, 3471 chain->bref.key); 3472 } 3473 KKASSERT((parent->data->ipdata.meta.op_flags & 3474 HAMMER2_OPFLAG_DIRECTDATA) == 0); 3475 KKASSERT(parent->data != NULL); 3476 base = &parent->data->ipdata.u.blockset.blockref[0]; 3477 count = HAMMER2_SET_COUNT; 3478 break; 3479 case HAMMER2_BREF_TYPE_INDIRECT: 3480 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3481 if (parent->flags & HAMMER2_CHAIN_INITIAL) 3482 base = NULL; 3483 else 3484 base = &parent->data->npdata[0]; 3485 count = parent->bytes / sizeof(hammer2_blockref_t); 3486 break; 3487 case HAMMER2_BREF_TYPE_VOLUME: 3488 KKASSERT(parent->data != NULL); 3489 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3490 count = HAMMER2_SET_COUNT; 3491 break; 3492 case HAMMER2_BREF_TYPE_FREEMAP: 3493 KKASSERT(parent->data != NULL); 3494 base = &parent->data->blkset.blockref[0]; 3495 count = HAMMER2_SET_COUNT; 3496 break; 3497 default: 3498 panic("hammer2_chain_create: unrecognized blockref type: %d", 3499 parent->bref.type); 3500 base = NULL; 3501 count = 0; 3502 break; 3503 } 3504 3505 /* 3506 * Make sure we've counted the brefs 3507 */ 3508 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3509 hammer2_chain_countbrefs(parent, base, count); 3510 3511 KASSERT(parent->core.live_count >= 0 && 3512 parent->core.live_count <= count, 3513 ("bad live_count %d/%d (%02x, %d)", 3514 parent->core.live_count, count, 3515 parent->bref.type, parent->bytes)); 3516 3517 /* 3518 * If no free blockref could be found we must create an indirect 3519 * block and move a number of blockrefs into it. With the parent 3520 * locked we can safely lock each child in order to delete+duplicate 3521 * it without causing a deadlock. 3522 * 3523 * This may return the new indirect block or the old parent depending 3524 * on where the key falls. NULL is returned on error. 3525 */ 3526 if (parent->core.live_count == count) { 3527 hammer2_chain_t *nparent; 3528 3529 KKASSERT((flags & HAMMER2_INSERT_SAMEPARENT) == 0); 3530 3531 nparent = hammer2_chain_create_indirect(parent, key, keybits, 3532 mtid, type, &error); 3533 if (nparent == NULL) { 3534 if (allocated) 3535 hammer2_chain_drop(chain); 3536 chain = NULL; 3537 goto done; 3538 } 3539 if (parent != nparent) { 3540 hammer2_chain_unlock(parent); 3541 hammer2_chain_drop(parent); 3542 parent = *parentp = nparent; 3543 } 3544 goto again; 3545 } 3546 3547 if (chain->flags & HAMMER2_CHAIN_DELETED) 3548 kprintf("Inserting deleted chain @%016jx\n", 3549 chain->bref.key); 3550 3551 /* 3552 * Link the chain into its parent. 3553 */ 3554 if (chain->parent != NULL) 3555 panic("hammer2: hammer2_chain_create: chain already connected"); 3556 KKASSERT(chain->parent == NULL); 3557 KKASSERT(parent->core.live_count < count); 3558 hammer2_chain_insert(parent, chain, 3559 HAMMER2_CHAIN_INSERT_SPIN | 3560 HAMMER2_CHAIN_INSERT_LIVE, 3561 0); 3562 3563 if (allocated) { 3564 /* 3565 * Mark the newly created chain modified. This will cause 3566 * UPDATE to be set and process the INITIAL flag. 3567 * 3568 * Device buffers are not instantiated for DATA elements 3569 * as these are handled by logical buffers. 3570 * 3571 * Indirect and freemap node indirect blocks are handled 3572 * by hammer2_chain_create_indirect() and not by this 3573 * function. 3574 * 3575 * Data for all other bref types is expected to be 3576 * instantiated (INODE, LEAF). 3577 */ 3578 switch(chain->bref.type) { 3579 case HAMMER2_BREF_TYPE_DATA: 3580 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3581 case HAMMER2_BREF_TYPE_DIRENT: 3582 case HAMMER2_BREF_TYPE_INODE: 3583 error = hammer2_chain_modify(chain, mtid, dedup_off, 3584 HAMMER2_MODIFY_OPTDATA); 3585 break; 3586 default: 3587 /* 3588 * Remaining types are not supported by this function. 3589 * In particular, INDIRECT and LEAF_NODE types are 3590 * handled by create_indirect(). 3591 */ 3592 panic("hammer2_chain_create: bad type: %d", 3593 chain->bref.type); 3594 /* NOT REACHED */ 3595 break; 3596 } 3597 } else { 3598 /* 3599 * When reconnecting a chain we must set UPDATE and 3600 * setflush so the flush recognizes that it must update 3601 * the bref in the parent. 3602 */ 3603 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 3604 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3605 } 3606 3607 /* 3608 * We must setflush(parent) to ensure that it recurses through to 3609 * chain. setflush(chain) might not work because ONFLUSH is possibly 3610 * already set in the chain (so it won't recurse up to set it in the 3611 * parent). 3612 */ 3613 hammer2_chain_setflush(parent); 3614 3615 done: 3616 *chainp = chain; 3617 3618 return (error); 3619 } 3620 3621 /* 3622 * Move the chain from its old parent to a new parent. The chain must have 3623 * already been deleted or already disconnected (or never associated) with 3624 * a parent. The chain is reassociated with the new parent and the deleted 3625 * flag will be cleared (no longer deleted). The chain's modification state 3626 * is not altered. 3627 * 3628 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 3629 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3630 * FULL. This typically means that the caller is creating the chain after 3631 * doing a hammer2_chain_lookup(). 3632 * 3633 * Neither (parent) or (chain) can be errored. 3634 * 3635 * If (parent) is non-NULL then the chain is inserted under the parent. 3636 * 3637 * If (parent) is NULL then the newly duplicated chain is not inserted 3638 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 3639 * passing into hammer2_chain_create() after this function returns). 3640 * 3641 * WARNING! This function calls create which means it can insert indirect 3642 * blocks. This can cause other unrelated chains in the parent to 3643 * be moved to a newly inserted indirect block in addition to the 3644 * specific chain. 3645 */ 3646 void 3647 hammer2_chain_rename(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3648 hammer2_tid_t mtid, int flags) 3649 { 3650 hammer2_blockref_t *bref; 3651 hammer2_dev_t *hmp; 3652 hammer2_chain_t *parent; 3653 size_t bytes; 3654 3655 /* 3656 * WARNING! We should never resolve DATA to device buffers 3657 * (XXX allow it if the caller did?), and since 3658 * we currently do not have the logical buffer cache 3659 * buffer in-hand to fix its cached physical offset 3660 * we also force the modify code to not COW it. XXX 3661 * 3662 * NOTE! We allow error'd chains to be renamed. The bref itself 3663 * is good and can be renamed. The content, however, may 3664 * be inaccessible. 3665 */ 3666 hmp = chain->hmp; 3667 KKASSERT(chain->parent == NULL); 3668 /*KKASSERT(chain->error == 0); allow */ 3669 3670 /* 3671 * Now create a duplicate of the chain structure, associating 3672 * it with the same core, making it the same size, pointing it 3673 * to the same bref (the same media block). 3674 * 3675 * NOTE: Handle special radix == 0 case (means 0 bytes). 3676 */ 3677 bref = &chain->bref; 3678 bytes = (size_t)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 3679 if (bytes) 3680 bytes = (hammer2_off_t)1 << bytes; 3681 3682 /* 3683 * If parent is not NULL the duplicated chain will be entered under 3684 * the parent and the UPDATE bit set to tell flush to update 3685 * the blockref. 3686 * 3687 * We must setflush(parent) to ensure that it recurses through to 3688 * chain. setflush(chain) might not work because ONFLUSH is possibly 3689 * already set in the chain (so it won't recurse up to set it in the 3690 * parent). 3691 * 3692 * Having both chains locked is extremely important for atomicy. 3693 */ 3694 if (parentp && (parent = *parentp) != NULL) { 3695 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3696 KKASSERT(parent->refs > 0); 3697 KKASSERT(parent->error == 0); 3698 3699 hammer2_chain_create(parentp, &chain, 3700 chain->pmp, HAMMER2_METH_DEFAULT, 3701 bref->key, bref->keybits, bref->type, 3702 chain->bytes, mtid, 0, flags); 3703 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 3704 hammer2_chain_setflush(*parentp); 3705 } 3706 } 3707 3708 /* 3709 * Helper function for deleting chains. 3710 * 3711 * The chain is removed from the live view (the RBTREE) as well as the parent's 3712 * blockmap. Both chain and its parent must be locked. 3713 * 3714 * parent may not be errored. chain can be errored. 3715 */ 3716 static int 3717 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 3718 hammer2_tid_t mtid, int flags) 3719 { 3720 hammer2_dev_t *hmp; 3721 int error = 0; 3722 3723 KKASSERT((chain->flags & (HAMMER2_CHAIN_DELETED | 3724 HAMMER2_CHAIN_FICTITIOUS)) == 0); 3725 KKASSERT(chain->parent == parent); 3726 hmp = chain->hmp; 3727 3728 if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 3729 /* 3730 * Chain is blockmapped, so there must be a parent. 3731 * Atomically remove the chain from the parent and remove 3732 * the blockmap entry. The parent must be set modified 3733 * to remove the blockmap entry. 3734 */ 3735 hammer2_blockref_t *base; 3736 int count; 3737 3738 KKASSERT(parent != NULL); 3739 KKASSERT(parent->error == 0); 3740 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3741 error = hammer2_chain_modify(parent, mtid, 0, 0); 3742 if (error) 3743 goto done; 3744 3745 /* 3746 * Calculate blockmap pointer 3747 */ 3748 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 3749 hammer2_spin_ex(&chain->core.spin); 3750 hammer2_spin_ex(&parent->core.spin); 3751 3752 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3753 atomic_add_int(&parent->core.live_count, -1); 3754 ++parent->core.generation; 3755 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3756 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3757 --parent->core.chain_count; 3758 chain->parent = NULL; 3759 3760 switch(parent->bref.type) { 3761 case HAMMER2_BREF_TYPE_INODE: 3762 /* 3763 * Access the inode's block array. However, there 3764 * is no block array if the inode is flagged 3765 * DIRECTDATA. 3766 */ 3767 if (parent->data && 3768 (parent->data->ipdata.meta.op_flags & 3769 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 3770 base = 3771 &parent->data->ipdata.u.blockset.blockref[0]; 3772 } else { 3773 base = NULL; 3774 } 3775 count = HAMMER2_SET_COUNT; 3776 break; 3777 case HAMMER2_BREF_TYPE_INDIRECT: 3778 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3779 if (parent->data) 3780 base = &parent->data->npdata[0]; 3781 else 3782 base = NULL; 3783 count = parent->bytes / sizeof(hammer2_blockref_t); 3784 break; 3785 case HAMMER2_BREF_TYPE_VOLUME: 3786 base = &parent->data->voldata. 3787 sroot_blockset.blockref[0]; 3788 count = HAMMER2_SET_COUNT; 3789 break; 3790 case HAMMER2_BREF_TYPE_FREEMAP: 3791 base = &parent->data->blkset.blockref[0]; 3792 count = HAMMER2_SET_COUNT; 3793 break; 3794 default: 3795 base = NULL; 3796 count = 0; 3797 panic("hammer2_flush_pass2: " 3798 "unrecognized blockref type: %d", 3799 parent->bref.type); 3800 } 3801 3802 /* 3803 * delete blockmapped chain from its parent. 3804 * 3805 * The parent is not affected by any statistics in chain 3806 * which are pending synchronization. That is, there is 3807 * nothing to undo in the parent since they have not yet 3808 * been incorporated into the parent. 3809 * 3810 * The parent is affected by statistics stored in inodes. 3811 * Those have already been synchronized, so they must be 3812 * undone. XXX split update possible w/delete in middle? 3813 */ 3814 if (base) { 3815 hammer2_base_delete(parent, base, count, chain); 3816 } 3817 hammer2_spin_unex(&parent->core.spin); 3818 hammer2_spin_unex(&chain->core.spin); 3819 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 3820 /* 3821 * Chain is not blockmapped but a parent is present. 3822 * Atomically remove the chain from the parent. There is 3823 * no blockmap entry to remove. 3824 * 3825 * Because chain was associated with a parent but not 3826 * synchronized, the chain's *_count_up fields contain 3827 * inode adjustment statistics which must be undone. 3828 */ 3829 hammer2_spin_ex(&chain->core.spin); 3830 hammer2_spin_ex(&parent->core.spin); 3831 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3832 atomic_add_int(&parent->core.live_count, -1); 3833 ++parent->core.generation; 3834 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3835 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3836 --parent->core.chain_count; 3837 chain->parent = NULL; 3838 hammer2_spin_unex(&parent->core.spin); 3839 hammer2_spin_unex(&chain->core.spin); 3840 } else { 3841 /* 3842 * Chain is not blockmapped and has no parent. This 3843 * is a degenerate case. 3844 */ 3845 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3846 } 3847 done: 3848 return error; 3849 } 3850 3851 /* 3852 * Create an indirect block that covers one or more of the elements in the 3853 * current parent. Either returns the existing parent with no locking or 3854 * ref changes or returns the new indirect block locked and referenced 3855 * and leaving the original parent lock/ref intact as well. 3856 * 3857 * If an error occurs, NULL is returned and *errorp is set to the H2 error. 3858 * 3859 * The returned chain depends on where the specified key falls. 3860 * 3861 * The key/keybits for the indirect mode only needs to follow three rules: 3862 * 3863 * (1) That all elements underneath it fit within its key space and 3864 * 3865 * (2) That all elements outside it are outside its key space. 3866 * 3867 * (3) When creating the new indirect block any elements in the current 3868 * parent that fit within the new indirect block's keyspace must be 3869 * moved into the new indirect block. 3870 * 3871 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3872 * keyspace the the current parent, but lookup/iteration rules will 3873 * ensure (and must ensure) that rule (2) for all parents leading up 3874 * to the nearest inode or the root volume header is adhered to. This 3875 * is accomplished by always recursing through matching keyspaces in 3876 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3877 * 3878 * The current implementation calculates the current worst-case keyspace by 3879 * iterating the current parent and then divides it into two halves, choosing 3880 * whichever half has the most elements (not necessarily the half containing 3881 * the requested key). 3882 * 3883 * We can also opt to use the half with the least number of elements. This 3884 * causes lower-numbered keys (aka logical file offsets) to recurse through 3885 * fewer indirect blocks and higher-numbered keys to recurse through more. 3886 * This also has the risk of not moving enough elements to the new indirect 3887 * block and being forced to create several indirect blocks before the element 3888 * can be inserted. 3889 * 3890 * Must be called with an exclusively locked parent. 3891 * 3892 * NOTE: *errorp set to HAMMER_ERROR_* flags 3893 */ 3894 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3895 hammer2_key_t *keyp, int keybits, 3896 hammer2_blockref_t *base, int count); 3897 static int hammer2_chain_indkey_file(hammer2_chain_t *parent, 3898 hammer2_key_t *keyp, int keybits, 3899 hammer2_blockref_t *base, int count, 3900 int ncount); 3901 static int hammer2_chain_indkey_dir(hammer2_chain_t *parent, 3902 hammer2_key_t *keyp, int keybits, 3903 hammer2_blockref_t *base, int count, 3904 int ncount); 3905 static 3906 hammer2_chain_t * 3907 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3908 hammer2_key_t create_key, int create_bits, 3909 hammer2_tid_t mtid, int for_type, int *errorp) 3910 { 3911 hammer2_dev_t *hmp; 3912 hammer2_blockref_t *base; 3913 hammer2_blockref_t *bref; 3914 hammer2_blockref_t bcopy; 3915 hammer2_chain_t *chain; 3916 hammer2_chain_t *ichain; 3917 hammer2_chain_t dummy; 3918 hammer2_key_t key = create_key; 3919 hammer2_key_t key_beg; 3920 hammer2_key_t key_end; 3921 hammer2_key_t key_next; 3922 int keybits = create_bits; 3923 int count; 3924 int ncount; 3925 int nbytes; 3926 int loops; 3927 int error; 3928 int reason; 3929 int generation; 3930 int maxloops = 300000; 3931 3932 /* 3933 * Calculate the base blockref pointer or NULL if the chain 3934 * is known to be empty. We need to calculate the array count 3935 * for RB lookups either way. 3936 */ 3937 hmp = parent->hmp; 3938 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3939 3940 /* 3941 * Pre-modify the parent now to avoid having to deal with error 3942 * processing if we tried to later (in the middle of our loop). 3943 */ 3944 *errorp = hammer2_chain_modify(parent, mtid, 0, 0); 3945 if (*errorp) { 3946 kprintf("hammer2_create_indirect: error %08x %s\n", 3947 *errorp, hammer2_error_str(*errorp)); 3948 return NULL; 3949 } 3950 3951 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 3952 base = hammer2_chain_base_and_count(parent, &count); 3953 3954 /* 3955 * dummy used in later chain allocation (no longer used for lookups). 3956 */ 3957 bzero(&dummy, sizeof(dummy)); 3958 3959 /* 3960 * How big should our new indirect block be? It has to be at least 3961 * as large as its parent for splits to work properly. 3962 * 3963 * The freemap uses a specific indirect block size. The number of 3964 * levels are built dynamically and ultimately depend on the size 3965 * volume. Because freemap blocks are taken from the reserved areas 3966 * of the volume our goal is efficiency (fewer levels) and not so 3967 * much to save disk space. 3968 * 3969 * The first indirect block level for a directory usually uses 3970 * HAMMER2_IND_BYTES_MIN (4KB = 32 directory entries). Due to 3971 * the hash mechanism, this typically gives us a nominal 3972 * 32 * 4 entries with one level of indirection. 3973 * 3974 * We use HAMMER2_IND_BYTES_NOM (16KB = 128 blockrefs) for FILE 3975 * indirect blocks. The initial 4 entries in the inode gives us 3976 * 256KB. Up to 4 indirect blocks gives us 32MB. Three levels 3977 * of indirection gives us 137GB, and so forth. H2 can support 3978 * huge file sizes but they are not typical, so we try to stick 3979 * with compactness and do not use a larger indirect block size. 3980 * 3981 * We could use 64KB (PBUFSIZE), giving us 512 blockrefs, but 3982 * due to the way indirect blocks are created this usually winds 3983 * up being extremely inefficient for small files. Even though 3984 * 16KB requires more levels of indirection for very large files, 3985 * the 16KB records can be ganged together into 64KB DIOs. 3986 */ 3987 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3988 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3989 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 3990 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 3991 if (parent->data->ipdata.meta.type == 3992 HAMMER2_OBJTYPE_DIRECTORY) 3993 nbytes = HAMMER2_IND_BYTES_MIN; /* 4KB = 32 entries */ 3994 else 3995 nbytes = HAMMER2_IND_BYTES_NOM; /* 16KB = ~8MB file */ 3996 3997 } else { 3998 nbytes = HAMMER2_IND_BYTES_NOM; 3999 } 4000 if (nbytes < count * sizeof(hammer2_blockref_t)) { 4001 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 4002 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 4003 nbytes = count * sizeof(hammer2_blockref_t); 4004 } 4005 ncount = nbytes / sizeof(hammer2_blockref_t); 4006 4007 /* 4008 * When creating an indirect block for a freemap node or leaf 4009 * the key/keybits must be fitted to static radix levels because 4010 * particular radix levels use particular reserved blocks in the 4011 * related zone. 4012 * 4013 * This routine calculates the key/radix of the indirect block 4014 * we need to create, and whether it is on the high-side or the 4015 * low-side. 4016 */ 4017 switch(for_type) { 4018 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 4019 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 4020 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 4021 base, count); 4022 break; 4023 case HAMMER2_BREF_TYPE_DATA: 4024 keybits = hammer2_chain_indkey_file(parent, &key, keybits, 4025 base, count, ncount); 4026 break; 4027 case HAMMER2_BREF_TYPE_DIRENT: 4028 case HAMMER2_BREF_TYPE_INODE: 4029 keybits = hammer2_chain_indkey_dir(parent, &key, keybits, 4030 base, count, ncount); 4031 break; 4032 default: 4033 panic("illegal indirect block for bref type %d", for_type); 4034 break; 4035 } 4036 4037 /* 4038 * Normalize the key for the radix being represented, keeping the 4039 * high bits and throwing away the low bits. 4040 */ 4041 key &= ~(((hammer2_key_t)1 << keybits) - 1); 4042 4043 /* 4044 * Ok, create our new indirect block 4045 */ 4046 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 4047 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 4048 dummy.bref.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 4049 } else { 4050 dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT; 4051 } 4052 dummy.bref.key = key; 4053 dummy.bref.keybits = keybits; 4054 dummy.bref.data_off = hammer2_getradix(nbytes); 4055 dummy.bref.methods = 4056 HAMMER2_ENC_CHECK(HAMMER2_DEC_CHECK(parent->bref.methods)) | 4057 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 4058 4059 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy.bref); 4060 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 4061 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 4062 /* ichain has one ref at this point */ 4063 4064 /* 4065 * We have to mark it modified to allocate its block, but use 4066 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 4067 * it won't be acted upon by the flush code. 4068 */ 4069 *errorp = hammer2_chain_modify(ichain, mtid, 0, HAMMER2_MODIFY_OPTDATA); 4070 if (*errorp) { 4071 kprintf("hammer2_alloc_indirect: error %08x %s\n", 4072 *errorp, hammer2_error_str(*errorp)); 4073 hammer2_chain_unlock(ichain); 4074 hammer2_chain_drop(ichain); 4075 return NULL; 4076 } 4077 4078 /* 4079 * Iterate the original parent and move the matching brefs into 4080 * the new indirect block. 4081 * 4082 * XXX handle flushes. 4083 */ 4084 key_beg = 0; 4085 key_end = HAMMER2_KEY_MAX; 4086 key_next = 0; /* avoid gcc warnings */ 4087 hammer2_spin_ex(&parent->core.spin); 4088 loops = 0; 4089 reason = 0; 4090 4091 for (;;) { 4092 /* 4093 * Parent may have been modified, relocating its block array. 4094 * Reload the base pointer. 4095 */ 4096 base = hammer2_chain_base_and_count(parent, &count); 4097 4098 if (++loops > 100000) { 4099 hammer2_spin_unex(&parent->core.spin); 4100 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 4101 reason, parent, base, count, key_next); 4102 } 4103 4104 /* 4105 * NOTE: spinlock stays intact, returned chain (if not NULL) 4106 * is not referenced or locked which means that we 4107 * cannot safely check its flagged / deletion status 4108 * until we lock it. 4109 */ 4110 chain = hammer2_combined_find(parent, base, count, 4111 &key_next, 4112 key_beg, key_end, 4113 &bref); 4114 generation = parent->core.generation; 4115 if (bref == NULL) 4116 break; 4117 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4118 4119 /* 4120 * Skip keys that are not within the key/radix of the new 4121 * indirect block. They stay in the parent. 4122 */ 4123 if ((~(((hammer2_key_t)1 << keybits) - 1) & 4124 (key ^ bref->key)) != 0) { 4125 goto next_key_spinlocked; 4126 } 4127 4128 /* 4129 * Load the new indirect block by acquiring the related 4130 * chains (potentially from media as it might not be 4131 * in-memory). Then move it to the new parent (ichain). 4132 * 4133 * chain is referenced but not locked. We must lock the 4134 * chain to obtain definitive state. 4135 */ 4136 bcopy = *bref; 4137 if (chain) { 4138 /* 4139 * Use chain already present in the RBTREE 4140 */ 4141 hammer2_chain_ref(chain); 4142 hammer2_spin_unex(&parent->core.spin); 4143 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 4144 } else { 4145 /* 4146 * Get chain for blockref element. _get returns NULL 4147 * on insertion race. 4148 */ 4149 hammer2_spin_unex(&parent->core.spin); 4150 chain = hammer2_chain_get(parent, generation, &bcopy, 4151 HAMMER2_RESOLVE_NEVER); 4152 if (chain == NULL) { 4153 reason = 1; 4154 hammer2_spin_ex(&parent->core.spin); 4155 continue; 4156 } 4157 } 4158 4159 /* 4160 * This is always live so if the chain has been deleted 4161 * we raced someone and we have to retry. 4162 * 4163 * NOTE: Lookups can race delete-duplicate because 4164 * delete-duplicate does not lock the parent's core 4165 * (they just use the spinlock on the core). 4166 * 4167 * (note reversed logic for this one) 4168 */ 4169 if (bcmp(&bcopy, &chain->bref, sizeof(bcopy)) || 4170 chain->parent != parent || 4171 (chain->flags & HAMMER2_CHAIN_DELETED)) { 4172 hammer2_chain_unlock(chain); 4173 hammer2_chain_drop(chain); 4174 if (hammer2_debug & 0x0040) { 4175 kprintf("LOST PARENT RETRY " 4176 "RETRY (%p,%p)->%p %08x\n", 4177 parent, chain->parent, chain, chain->flags); 4178 } 4179 hammer2_spin_ex(&parent->core.spin); 4180 continue; 4181 } 4182 4183 /* 4184 * Shift the chain to the indirect block. 4185 * 4186 * WARNING! No reason for us to load chain data, pass NOSTATS 4187 * to prevent delete/insert from trying to access 4188 * inode stats (and thus asserting if there is no 4189 * chain->data loaded). 4190 * 4191 * WARNING! The (parent, chain) deletion may modify the parent 4192 * and invalidate the base pointer. 4193 * 4194 * WARNING! Parent must already be marked modified, so we 4195 * can assume that chain_delete always suceeds. 4196 * 4197 * WARNING! hammer2_chain_repchange() does not have to be 4198 * called (and doesn't work anyway because we are 4199 * only doing a partial shift). A recursion that is 4200 * in-progress can continue at the current parent 4201 * and will be able to properly find its next key. 4202 */ 4203 error = hammer2_chain_delete(parent, chain, mtid, 0); 4204 KKASSERT(error == 0); 4205 hammer2_chain_rename(&ichain, chain, mtid, 0); 4206 hammer2_chain_unlock(chain); 4207 hammer2_chain_drop(chain); 4208 KKASSERT(parent->refs > 0); 4209 chain = NULL; 4210 base = NULL; /* safety */ 4211 hammer2_spin_ex(&parent->core.spin); 4212 next_key_spinlocked: 4213 if (--maxloops == 0) 4214 panic("hammer2_chain_create_indirect: maxloops"); 4215 reason = 4; 4216 if (key_next == 0 || key_next > key_end) 4217 break; 4218 key_beg = key_next; 4219 /* loop */ 4220 } 4221 hammer2_spin_unex(&parent->core.spin); 4222 4223 /* 4224 * Insert the new indirect block into the parent now that we've 4225 * cleared out some entries in the parent. We calculated a good 4226 * insertion index in the loop above (ichain->index). 4227 * 4228 * We don't have to set UPDATE here because we mark ichain 4229 * modified down below (so the normal modified -> flush -> set-moved 4230 * sequence applies). 4231 * 4232 * The insertion shouldn't race as this is a completely new block 4233 * and the parent is locked. 4234 */ 4235 base = NULL; /* safety, parent modify may change address */ 4236 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 4237 KKASSERT(parent->core.live_count < count); 4238 hammer2_chain_insert(parent, ichain, 4239 HAMMER2_CHAIN_INSERT_SPIN | 4240 HAMMER2_CHAIN_INSERT_LIVE, 4241 0); 4242 4243 /* 4244 * Make sure flushes propogate after our manual insertion. 4245 */ 4246 hammer2_chain_setflush(ichain); 4247 hammer2_chain_setflush(parent); 4248 4249 /* 4250 * Figure out what to return. 4251 */ 4252 if (~(((hammer2_key_t)1 << keybits) - 1) & 4253 (create_key ^ key)) { 4254 /* 4255 * Key being created is outside the key range, 4256 * return the original parent. 4257 */ 4258 hammer2_chain_unlock(ichain); 4259 hammer2_chain_drop(ichain); 4260 } else { 4261 /* 4262 * Otherwise its in the range, return the new parent. 4263 * (leave both the new and old parent locked). 4264 */ 4265 parent = ichain; 4266 } 4267 4268 return(parent); 4269 } 4270 4271 /* 4272 * Do maintenance on an indirect chain. Both parent and chain are locked. 4273 * 4274 * Returns non-zero if (chain) is deleted, either due to being empty or 4275 * because its children were safely moved into the parent. 4276 */ 4277 int 4278 hammer2_chain_indirect_maintenance(hammer2_chain_t *parent, 4279 hammer2_chain_t *chain) 4280 { 4281 hammer2_blockref_t *chain_base; 4282 hammer2_blockref_t *base; 4283 hammer2_blockref_t *bref; 4284 hammer2_blockref_t bcopy; 4285 hammer2_key_t key_next; 4286 hammer2_key_t key_beg; 4287 hammer2_key_t key_end; 4288 hammer2_chain_t *sub; 4289 int chain_count; 4290 int count; 4291 int error; 4292 int generation; 4293 4294 /* 4295 * Make sure we have an accurate live_count 4296 */ 4297 if ((chain->flags & (HAMMER2_CHAIN_INITIAL | 4298 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4299 base = &chain->data->npdata[0]; 4300 count = chain->bytes / sizeof(hammer2_blockref_t); 4301 hammer2_chain_countbrefs(chain, base, count); 4302 } 4303 4304 /* 4305 * If the indirect block is empty we can delete it. 4306 * (ignore deletion error) 4307 */ 4308 if (chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)) { 4309 hammer2_chain_delete(parent, chain, 4310 chain->bref.modify_tid, 4311 HAMMER2_DELETE_PERMANENT); 4312 hammer2_chain_repchange(parent, chain); 4313 return 1; 4314 } 4315 4316 base = hammer2_chain_base_and_count(parent, &count); 4317 4318 if ((parent->flags & (HAMMER2_CHAIN_INITIAL | 4319 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4320 hammer2_chain_countbrefs(parent, base, count); 4321 } 4322 4323 /* 4324 * Determine if we can collapse chain into parent, calculate 4325 * hysteresis for chain emptiness. 4326 */ 4327 if (parent->core.live_count + chain->core.live_count - 1 > count) 4328 return 0; 4329 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4330 if (chain->core.live_count > chain_count * 3 / 4) 4331 return 0; 4332 4333 /* 4334 * Ok, theoretically we can collapse chain's contents into 4335 * parent. chain is locked, but any in-memory children of chain 4336 * are not. For this to work, we must be able to dispose of any 4337 * in-memory children of chain. 4338 * 4339 * For now require that there are no in-memory children of chain. 4340 * 4341 * WARNING! Both chain and parent must remain locked across this 4342 * entire operation. 4343 */ 4344 4345 /* 4346 * Parent must be marked modified. Don't try to collapse it if we 4347 * can't mark it modified. Once modified, destroy chain to make room 4348 * and to get rid of what will be a conflicting key (this is included 4349 * in the calculation above). Finally, move the children of chain 4350 * into chain's parent. 4351 * 4352 * This order creates an accounting problem for bref.embed.stats 4353 * because we destroy chain before we remove its children. Any 4354 * elements whos blockref is already synchronized will be counted 4355 * twice. To deal with the problem we clean out chain's stats prior 4356 * to deleting it. 4357 */ 4358 error = hammer2_chain_modify(parent, 0, 0, 0); 4359 if (error) { 4360 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4361 hammer2_error_str(error)); 4362 return 0; 4363 } 4364 error = hammer2_chain_modify(chain, chain->bref.modify_tid, 0, 0); 4365 if (error) { 4366 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4367 hammer2_error_str(error)); 4368 return 0; 4369 } 4370 4371 chain->bref.embed.stats.inode_count = 0; 4372 chain->bref.embed.stats.data_count = 0; 4373 error = hammer2_chain_delete(parent, chain, 4374 chain->bref.modify_tid, 4375 HAMMER2_DELETE_PERMANENT); 4376 KKASSERT(error == 0); 4377 4378 /* 4379 * The combined_find call requires core.spin to be held. One would 4380 * think there wouldn't be any conflicts since we hold chain 4381 * exclusively locked, but the caching mechanism for 0-ref children 4382 * does not require a chain lock. 4383 */ 4384 hammer2_spin_ex(&chain->core.spin); 4385 4386 key_next = 0; 4387 key_beg = 0; 4388 key_end = HAMMER2_KEY_MAX; 4389 for (;;) { 4390 chain_base = &chain->data->npdata[0]; 4391 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4392 sub = hammer2_combined_find(chain, chain_base, chain_count, 4393 &key_next, 4394 key_beg, key_end, 4395 &bref); 4396 generation = chain->core.generation; 4397 if (bref == NULL) 4398 break; 4399 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4400 4401 bcopy = *bref; 4402 if (sub) { 4403 hammer2_chain_ref(sub); 4404 hammer2_spin_unex(&chain->core.spin); 4405 hammer2_chain_lock(sub, HAMMER2_RESOLVE_NEVER); 4406 } else { 4407 hammer2_spin_unex(&chain->core.spin); 4408 sub = hammer2_chain_get(chain, generation, &bcopy, 4409 HAMMER2_RESOLVE_NEVER); 4410 if (sub == NULL) { 4411 hammer2_spin_ex(&chain->core.spin); 4412 continue; 4413 } 4414 } 4415 if (bcmp(&bcopy, &sub->bref, sizeof(bcopy)) || 4416 sub->parent != chain || 4417 (sub->flags & HAMMER2_CHAIN_DELETED)) { 4418 hammer2_chain_unlock(sub); 4419 hammer2_chain_drop(sub); 4420 hammer2_spin_ex(&chain->core.spin); 4421 sub = NULL; /* safety */ 4422 continue; 4423 } 4424 error = hammer2_chain_delete(chain, sub, 4425 sub->bref.modify_tid, 0); 4426 KKASSERT(error == 0); 4427 hammer2_chain_rename(&parent, sub, 4428 sub->bref.modify_tid, 4429 HAMMER2_INSERT_SAMEPARENT); 4430 hammer2_chain_unlock(sub); 4431 hammer2_chain_drop(sub); 4432 hammer2_spin_ex(&chain->core.spin); 4433 4434 if (key_next == 0) 4435 break; 4436 key_beg = key_next; 4437 } 4438 hammer2_spin_unex(&chain->core.spin); 4439 4440 hammer2_chain_repchange(parent, chain); 4441 4442 return 1; 4443 } 4444 4445 /* 4446 * Freemap indirect blocks 4447 * 4448 * Calculate the keybits and highside/lowside of the freemap node the 4449 * caller is creating. 4450 * 4451 * This routine will specify the next higher-level freemap key/radix 4452 * representing the lowest-ordered set. By doing so, eventually all 4453 * low-ordered sets will be moved one level down. 4454 * 4455 * We have to be careful here because the freemap reserves a limited 4456 * number of blocks for a limited number of levels. So we can't just 4457 * push indiscriminately. 4458 */ 4459 int 4460 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 4461 int keybits, hammer2_blockref_t *base, int count) 4462 { 4463 hammer2_chain_t *chain; 4464 hammer2_blockref_t *bref; 4465 hammer2_key_t key; 4466 hammer2_key_t key_beg; 4467 hammer2_key_t key_end; 4468 hammer2_key_t key_next; 4469 int locount; 4470 int hicount; 4471 int maxloops = 300000; 4472 4473 key = *keyp; 4474 locount = 0; 4475 hicount = 0; 4476 keybits = 64; 4477 4478 /* 4479 * Calculate the range of keys in the array being careful to skip 4480 * slots which are overridden with a deletion. 4481 */ 4482 key_beg = 0; 4483 key_end = HAMMER2_KEY_MAX; 4484 hammer2_spin_ex(&parent->core.spin); 4485 4486 for (;;) { 4487 if (--maxloops == 0) { 4488 panic("indkey_freemap shit %p %p:%d\n", 4489 parent, base, count); 4490 } 4491 chain = hammer2_combined_find(parent, base, count, 4492 &key_next, 4493 key_beg, key_end, 4494 &bref); 4495 4496 /* 4497 * Exhausted search 4498 */ 4499 if (bref == NULL) 4500 break; 4501 4502 /* 4503 * Skip deleted chains. 4504 */ 4505 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4506 if (key_next == 0 || key_next > key_end) 4507 break; 4508 key_beg = key_next; 4509 continue; 4510 } 4511 4512 /* 4513 * Use the full live (not deleted) element for the scan 4514 * iteration. HAMMER2 does not allow partial replacements. 4515 * 4516 * XXX should be built into hammer2_combined_find(). 4517 */ 4518 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4519 4520 if (keybits > bref->keybits) { 4521 key = bref->key; 4522 keybits = bref->keybits; 4523 } else if (keybits == bref->keybits && bref->key < key) { 4524 key = bref->key; 4525 } 4526 if (key_next == 0) 4527 break; 4528 key_beg = key_next; 4529 } 4530 hammer2_spin_unex(&parent->core.spin); 4531 4532 /* 4533 * Return the keybits for a higher-level FREEMAP_NODE covering 4534 * this node. 4535 */ 4536 switch(keybits) { 4537 case HAMMER2_FREEMAP_LEVEL0_RADIX: 4538 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 4539 break; 4540 case HAMMER2_FREEMAP_LEVEL1_RADIX: 4541 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 4542 break; 4543 case HAMMER2_FREEMAP_LEVEL2_RADIX: 4544 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 4545 break; 4546 case HAMMER2_FREEMAP_LEVEL3_RADIX: 4547 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 4548 break; 4549 case HAMMER2_FREEMAP_LEVEL4_RADIX: 4550 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 4551 break; 4552 case HAMMER2_FREEMAP_LEVEL5_RADIX: 4553 panic("hammer2_chain_indkey_freemap: level too high"); 4554 break; 4555 default: 4556 panic("hammer2_chain_indkey_freemap: bad radix"); 4557 break; 4558 } 4559 *keyp = key; 4560 4561 return (keybits); 4562 } 4563 4564 /* 4565 * File indirect blocks 4566 * 4567 * Calculate the key/keybits for the indirect block to create by scanning 4568 * existing keys. The key being created is also passed in *keyp and can be 4569 * inside or outside the indirect block. Regardless, the indirect block 4570 * must hold at least two keys in order to guarantee sufficient space. 4571 * 4572 * We use a modified version of the freemap's fixed radix tree, but taylored 4573 * for file data. Basically we configure an indirect block encompassing the 4574 * smallest key. 4575 */ 4576 static int 4577 hammer2_chain_indkey_file(hammer2_chain_t *parent, hammer2_key_t *keyp, 4578 int keybits, hammer2_blockref_t *base, int count, 4579 int ncount) 4580 { 4581 hammer2_chain_t *chain; 4582 hammer2_blockref_t *bref; 4583 hammer2_key_t key; 4584 hammer2_key_t key_beg; 4585 hammer2_key_t key_end; 4586 hammer2_key_t key_next; 4587 int nradix; 4588 int locount; 4589 int hicount; 4590 int maxloops = 300000; 4591 4592 key = *keyp; 4593 locount = 0; 4594 hicount = 0; 4595 keybits = 64; 4596 4597 /* 4598 * Calculate the range of keys in the array being careful to skip 4599 * slots which are overridden with a deletion. 4600 * 4601 * Locate the smallest key. 4602 */ 4603 key_beg = 0; 4604 key_end = HAMMER2_KEY_MAX; 4605 hammer2_spin_ex(&parent->core.spin); 4606 4607 for (;;) { 4608 if (--maxloops == 0) { 4609 panic("indkey_freemap shit %p %p:%d\n", 4610 parent, base, count); 4611 } 4612 chain = hammer2_combined_find(parent, base, count, 4613 &key_next, 4614 key_beg, key_end, 4615 &bref); 4616 4617 /* 4618 * Exhausted search 4619 */ 4620 if (bref == NULL) 4621 break; 4622 4623 /* 4624 * Skip deleted chains. 4625 */ 4626 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4627 if (key_next == 0 || key_next > key_end) 4628 break; 4629 key_beg = key_next; 4630 continue; 4631 } 4632 4633 /* 4634 * Use the full live (not deleted) element for the scan 4635 * iteration. HAMMER2 does not allow partial replacements. 4636 * 4637 * XXX should be built into hammer2_combined_find(). 4638 */ 4639 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4640 4641 if (keybits > bref->keybits) { 4642 key = bref->key; 4643 keybits = bref->keybits; 4644 } else if (keybits == bref->keybits && bref->key < key) { 4645 key = bref->key; 4646 } 4647 if (key_next == 0) 4648 break; 4649 key_beg = key_next; 4650 } 4651 hammer2_spin_unex(&parent->core.spin); 4652 4653 /* 4654 * Calculate the static keybits for a higher-level indirect block 4655 * that contains the key. 4656 */ 4657 *keyp = key; 4658 4659 switch(ncount) { 4660 case HAMMER2_IND_BYTES_MIN / sizeof(hammer2_blockref_t): 4661 nradix = HAMMER2_IND_RADIX_MIN - HAMMER2_BLOCKREF_RADIX; 4662 break; 4663 case HAMMER2_IND_BYTES_NOM / sizeof(hammer2_blockref_t): 4664 nradix = HAMMER2_IND_RADIX_NOM - HAMMER2_BLOCKREF_RADIX; 4665 break; 4666 case HAMMER2_IND_BYTES_MAX / sizeof(hammer2_blockref_t): 4667 nradix = HAMMER2_IND_RADIX_MAX - HAMMER2_BLOCKREF_RADIX; 4668 break; 4669 default: 4670 panic("bad ncount %d\n", ncount); 4671 nradix = 0; 4672 break; 4673 } 4674 4675 /* 4676 * The largest radix that can be returned for an indirect block is 4677 * 63 bits. (The largest practical indirect block radix is actually 4678 * 62 bits because the top-level inode or volume root contains four 4679 * entries, but allow 63 to be returned). 4680 */ 4681 if (nradix >= 64) 4682 nradix = 63; 4683 4684 return keybits + nradix; 4685 } 4686 4687 #if 1 4688 4689 /* 4690 * Directory indirect blocks. 4691 * 4692 * Covers both the inode index (directory of inodes), and directory contents 4693 * (filenames hardlinked to inodes). 4694 * 4695 * Because directory keys are hashed we generally try to cut the space in 4696 * half. We accomodate the inode index (which tends to have linearly 4697 * increasing inode numbers) by ensuring that the keyspace is at least large 4698 * enough to fill up the indirect block being created. 4699 */ 4700 static int 4701 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4702 int keybits, hammer2_blockref_t *base, int count, 4703 int ncount) 4704 { 4705 hammer2_blockref_t *bref; 4706 hammer2_chain_t *chain; 4707 hammer2_key_t key_beg; 4708 hammer2_key_t key_end; 4709 hammer2_key_t key_next; 4710 hammer2_key_t key; 4711 int nkeybits; 4712 int locount; 4713 int hicount; 4714 int maxloops = 300000; 4715 4716 /* 4717 * NOTE: We can't take a shortcut here anymore for inodes because 4718 * the root directory can contain a mix of inodes and directory 4719 * entries (we used to just return 63 if parent->bref.type was 4720 * HAMMER2_BREF_TYPE_INODE. 4721 */ 4722 key = *keyp; 4723 locount = 0; 4724 hicount = 0; 4725 4726 /* 4727 * Calculate the range of keys in the array being careful to skip 4728 * slots which are overridden with a deletion. 4729 */ 4730 key_beg = 0; 4731 key_end = HAMMER2_KEY_MAX; 4732 hammer2_spin_ex(&parent->core.spin); 4733 4734 for (;;) { 4735 if (--maxloops == 0) { 4736 panic("indkey_freemap shit %p %p:%d\n", 4737 parent, base, count); 4738 } 4739 chain = hammer2_combined_find(parent, base, count, 4740 &key_next, 4741 key_beg, key_end, 4742 &bref); 4743 4744 /* 4745 * Exhausted search 4746 */ 4747 if (bref == NULL) 4748 break; 4749 4750 /* 4751 * Deleted object 4752 */ 4753 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4754 if (key_next == 0 || key_next > key_end) 4755 break; 4756 key_beg = key_next; 4757 continue; 4758 } 4759 4760 /* 4761 * Use the full live (not deleted) element for the scan 4762 * iteration. HAMMER2 does not allow partial replacements. 4763 * 4764 * XXX should be built into hammer2_combined_find(). 4765 */ 4766 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4767 4768 /* 4769 * Expand our calculated key range (key, keybits) to fit 4770 * the scanned key. nkeybits represents the full range 4771 * that we will later cut in half (two halves @ nkeybits - 1). 4772 */ 4773 nkeybits = keybits; 4774 if (nkeybits < bref->keybits) { 4775 if (bref->keybits > 64) { 4776 kprintf("bad bref chain %p bref %p\n", 4777 chain, bref); 4778 Debugger("fubar"); 4779 } 4780 nkeybits = bref->keybits; 4781 } 4782 while (nkeybits < 64 && 4783 (~(((hammer2_key_t)1 << nkeybits) - 1) & 4784 (key ^ bref->key)) != 0) { 4785 ++nkeybits; 4786 } 4787 4788 /* 4789 * If the new key range is larger we have to determine 4790 * which side of the new key range the existing keys fall 4791 * under by checking the high bit, then collapsing the 4792 * locount into the hicount or vise-versa. 4793 */ 4794 if (keybits != nkeybits) { 4795 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4796 hicount += locount; 4797 locount = 0; 4798 } else { 4799 locount += hicount; 4800 hicount = 0; 4801 } 4802 keybits = nkeybits; 4803 } 4804 4805 /* 4806 * The newly scanned key will be in the lower half or the 4807 * upper half of the (new) key range. 4808 */ 4809 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4810 ++hicount; 4811 else 4812 ++locount; 4813 4814 if (key_next == 0) 4815 break; 4816 key_beg = key_next; 4817 } 4818 hammer2_spin_unex(&parent->core.spin); 4819 bref = NULL; /* now invalid (safety) */ 4820 4821 /* 4822 * Adjust keybits to represent half of the full range calculated 4823 * above (radix 63 max) for our new indirect block. 4824 */ 4825 --keybits; 4826 4827 /* 4828 * Expand keybits to hold at least ncount elements. ncount will be 4829 * a power of 2. This is to try to completely fill leaf nodes (at 4830 * least for keys which are not hashes). 4831 * 4832 * We aren't counting 'in' or 'out', we are counting 'high side' 4833 * and 'low side' based on the bit at (1LL << keybits). We want 4834 * everything to be inside in these cases so shift it all to 4835 * the low or high side depending on the new high bit. 4836 */ 4837 while (((hammer2_key_t)1 << keybits) < ncount) { 4838 ++keybits; 4839 if (key & ((hammer2_key_t)1 << keybits)) { 4840 hicount += locount; 4841 locount = 0; 4842 } else { 4843 locount += hicount; 4844 hicount = 0; 4845 } 4846 } 4847 4848 if (hicount > locount) 4849 key |= (hammer2_key_t)1 << keybits; 4850 else 4851 key &= ~(hammer2_key_t)1 << keybits; 4852 4853 *keyp = key; 4854 4855 return (keybits); 4856 } 4857 4858 #else 4859 4860 /* 4861 * Directory indirect blocks. 4862 * 4863 * Covers both the inode index (directory of inodes), and directory contents 4864 * (filenames hardlinked to inodes). 4865 * 4866 * Because directory keys are hashed we generally try to cut the space in 4867 * half. We accomodate the inode index (which tends to have linearly 4868 * increasing inode numbers) by ensuring that the keyspace is at least large 4869 * enough to fill up the indirect block being created. 4870 */ 4871 static int 4872 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4873 int keybits, hammer2_blockref_t *base, int count, 4874 int ncount) 4875 { 4876 hammer2_blockref_t *bref; 4877 hammer2_chain_t *chain; 4878 hammer2_key_t key_beg; 4879 hammer2_key_t key_end; 4880 hammer2_key_t key_next; 4881 hammer2_key_t key; 4882 int nkeybits; 4883 int locount; 4884 int hicount; 4885 int maxloops = 300000; 4886 4887 /* 4888 * Shortcut if the parent is the inode. In this situation the 4889 * parent has 4+1 directory entries and we are creating an indirect 4890 * block capable of holding many more. 4891 */ 4892 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4893 return 63; 4894 } 4895 4896 key = *keyp; 4897 locount = 0; 4898 hicount = 0; 4899 4900 /* 4901 * Calculate the range of keys in the array being careful to skip 4902 * slots which are overridden with a deletion. 4903 */ 4904 key_beg = 0; 4905 key_end = HAMMER2_KEY_MAX; 4906 hammer2_spin_ex(&parent->core.spin); 4907 4908 for (;;) { 4909 if (--maxloops == 0) { 4910 panic("indkey_freemap shit %p %p:%d\n", 4911 parent, base, count); 4912 } 4913 chain = hammer2_combined_find(parent, base, count, 4914 &key_next, 4915 key_beg, key_end, 4916 &bref); 4917 4918 /* 4919 * Exhausted search 4920 */ 4921 if (bref == NULL) 4922 break; 4923 4924 /* 4925 * Deleted object 4926 */ 4927 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4928 if (key_next == 0 || key_next > key_end) 4929 break; 4930 key_beg = key_next; 4931 continue; 4932 } 4933 4934 /* 4935 * Use the full live (not deleted) element for the scan 4936 * iteration. HAMMER2 does not allow partial replacements. 4937 * 4938 * XXX should be built into hammer2_combined_find(). 4939 */ 4940 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4941 4942 /* 4943 * Expand our calculated key range (key, keybits) to fit 4944 * the scanned key. nkeybits represents the full range 4945 * that we will later cut in half (two halves @ nkeybits - 1). 4946 */ 4947 nkeybits = keybits; 4948 if (nkeybits < bref->keybits) { 4949 if (bref->keybits > 64) { 4950 kprintf("bad bref chain %p bref %p\n", 4951 chain, bref); 4952 Debugger("fubar"); 4953 } 4954 nkeybits = bref->keybits; 4955 } 4956 while (nkeybits < 64 && 4957 (~(((hammer2_key_t)1 << nkeybits) - 1) & 4958 (key ^ bref->key)) != 0) { 4959 ++nkeybits; 4960 } 4961 4962 /* 4963 * If the new key range is larger we have to determine 4964 * which side of the new key range the existing keys fall 4965 * under by checking the high bit, then collapsing the 4966 * locount into the hicount or vise-versa. 4967 */ 4968 if (keybits != nkeybits) { 4969 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4970 hicount += locount; 4971 locount = 0; 4972 } else { 4973 locount += hicount; 4974 hicount = 0; 4975 } 4976 keybits = nkeybits; 4977 } 4978 4979 /* 4980 * The newly scanned key will be in the lower half or the 4981 * upper half of the (new) key range. 4982 */ 4983 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4984 ++hicount; 4985 else 4986 ++locount; 4987 4988 if (key_next == 0) 4989 break; 4990 key_beg = key_next; 4991 } 4992 hammer2_spin_unex(&parent->core.spin); 4993 bref = NULL; /* now invalid (safety) */ 4994 4995 /* 4996 * Adjust keybits to represent half of the full range calculated 4997 * above (radix 63 max) for our new indirect block. 4998 */ 4999 --keybits; 5000 5001 /* 5002 * Expand keybits to hold at least ncount elements. ncount will be 5003 * a power of 2. This is to try to completely fill leaf nodes (at 5004 * least for keys which are not hashes). 5005 * 5006 * We aren't counting 'in' or 'out', we are counting 'high side' 5007 * and 'low side' based on the bit at (1LL << keybits). We want 5008 * everything to be inside in these cases so shift it all to 5009 * the low or high side depending on the new high bit. 5010 */ 5011 while (((hammer2_key_t)1 << keybits) < ncount) { 5012 ++keybits; 5013 if (key & ((hammer2_key_t)1 << keybits)) { 5014 hicount += locount; 5015 locount = 0; 5016 } else { 5017 locount += hicount; 5018 hicount = 0; 5019 } 5020 } 5021 5022 if (hicount > locount) 5023 key |= (hammer2_key_t)1 << keybits; 5024 else 5025 key &= ~(hammer2_key_t)1 << keybits; 5026 5027 *keyp = key; 5028 5029 return (keybits); 5030 } 5031 5032 #endif 5033 5034 /* 5035 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 5036 * it exists. 5037 * 5038 * Both parent and chain must be locked exclusively. 5039 * 5040 * This function will modify the parent if the blockref requires removal 5041 * from the parent's block table. 5042 * 5043 * This function is NOT recursive. Any entity already pushed into the 5044 * chain (such as an inode) may still need visibility into its contents, 5045 * as well as the ability to read and modify the contents. For example, 5046 * for an unlinked file which is still open. 5047 * 5048 * Also note that the flusher is responsible for cleaning up empty 5049 * indirect blocks. 5050 */ 5051 int 5052 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 5053 hammer2_tid_t mtid, int flags) 5054 { 5055 int error = 0; 5056 5057 KKASSERT(hammer2_mtx_owned(&chain->lock)); 5058 5059 /* 5060 * Nothing to do if already marked. 5061 * 5062 * We need the spinlock on the core whos RBTREE contains chain 5063 * to protect against races. 5064 */ 5065 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 5066 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 5067 chain->parent == parent); 5068 error = _hammer2_chain_delete_helper(parent, chain, 5069 mtid, flags); 5070 } 5071 5072 /* 5073 * Permanent deletions mark the chain as destroyed. 5074 * 5075 * NOTE: We do not setflush the chain unless the deletion is 5076 * permanent, since the deletion of a chain does not actually 5077 * require it to be flushed. 5078 */ 5079 if (error == 0) { 5080 if (flags & HAMMER2_DELETE_PERMANENT) { 5081 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 5082 hammer2_chain_setflush(chain); 5083 } 5084 } 5085 5086 return error; 5087 } 5088 5089 /* 5090 * Returns the index of the nearest element in the blockref array >= elm. 5091 * Returns (count) if no element could be found. 5092 * 5093 * Sets *key_nextp to the next key for loop purposes but does not modify 5094 * it if the next key would be higher than the current value of *key_nextp. 5095 * Note that *key_nexp can overflow to 0, which should be tested by the 5096 * caller. 5097 * 5098 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5099 * held through the operation. 5100 */ 5101 static int 5102 hammer2_base_find(hammer2_chain_t *parent, 5103 hammer2_blockref_t *base, int count, 5104 hammer2_key_t *key_nextp, 5105 hammer2_key_t key_beg, hammer2_key_t key_end) 5106 { 5107 hammer2_blockref_t *scan; 5108 hammer2_key_t scan_end; 5109 int i; 5110 int limit; 5111 5112 /* 5113 * Require the live chain's already have their core's counted 5114 * so we can optimize operations. 5115 */ 5116 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 5117 5118 /* 5119 * Degenerate case 5120 */ 5121 if (count == 0 || base == NULL) 5122 return(count); 5123 5124 /* 5125 * Sequential optimization using parent->cache_index. This is 5126 * the most likely scenario. 5127 * 5128 * We can avoid trailing empty entries on live chains, otherwise 5129 * we might have to check the whole block array. 5130 */ 5131 i = parent->cache_index; /* SMP RACE OK */ 5132 cpu_ccfence(); 5133 limit = parent->core.live_zero; 5134 if (i >= limit) 5135 i = limit - 1; 5136 if (i < 0) 5137 i = 0; 5138 KKASSERT(i < count); 5139 5140 /* 5141 * Search backwards 5142 */ 5143 scan = &base[i]; 5144 while (i > 0 && (scan->type == 0 || scan->key > key_beg)) { 5145 --scan; 5146 --i; 5147 } 5148 parent->cache_index = i; 5149 5150 /* 5151 * Search forwards, stop when we find a scan element which 5152 * encloses the key or until we know that there are no further 5153 * elements. 5154 */ 5155 while (i < count) { 5156 if (scan->type != 0) { 5157 scan_end = scan->key + 5158 ((hammer2_key_t)1 << scan->keybits) - 1; 5159 if (scan->key > key_beg || scan_end >= key_beg) 5160 break; 5161 } 5162 if (i >= limit) 5163 return (count); 5164 ++scan; 5165 ++i; 5166 } 5167 if (i != count) { 5168 parent->cache_index = i; 5169 if (i >= limit) { 5170 i = count; 5171 } else { 5172 scan_end = scan->key + 5173 ((hammer2_key_t)1 << scan->keybits); 5174 if (scan_end && (*key_nextp > scan_end || 5175 *key_nextp == 0)) { 5176 *key_nextp = scan_end; 5177 } 5178 } 5179 } 5180 return (i); 5181 } 5182 5183 /* 5184 * Do a combined search and return the next match either from the blockref 5185 * array or from the in-memory chain. Sets *bresp to the returned bref in 5186 * both cases, or sets it to NULL if the search exhausted. Only returns 5187 * a non-NULL chain if the search matched from the in-memory chain. 5188 * 5189 * When no in-memory chain has been found and a non-NULL bref is returned 5190 * in *bresp. 5191 * 5192 * 5193 * The returned chain is not locked or referenced. Use the returned bref 5194 * to determine if the search exhausted or not. Iterate if the base find 5195 * is chosen but matches a deleted chain. 5196 * 5197 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5198 * held through the operation. 5199 */ 5200 hammer2_chain_t * 5201 hammer2_combined_find(hammer2_chain_t *parent, 5202 hammer2_blockref_t *base, int count, 5203 hammer2_key_t *key_nextp, 5204 hammer2_key_t key_beg, hammer2_key_t key_end, 5205 hammer2_blockref_t **bresp) 5206 { 5207 hammer2_blockref_t *bref; 5208 hammer2_chain_t *chain; 5209 int i; 5210 5211 /* 5212 * Lookup in block array and in rbtree. 5213 */ 5214 *key_nextp = key_end + 1; 5215 i = hammer2_base_find(parent, base, count, key_nextp, 5216 key_beg, key_end); 5217 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 5218 5219 /* 5220 * Neither matched 5221 */ 5222 if (i == count && chain == NULL) { 5223 *bresp = NULL; 5224 return(NULL); 5225 } 5226 5227 /* 5228 * Only chain matched. 5229 */ 5230 if (i == count) { 5231 bref = &chain->bref; 5232 goto found; 5233 } 5234 5235 /* 5236 * Only blockref matched. 5237 */ 5238 if (chain == NULL) { 5239 bref = &base[i]; 5240 goto found; 5241 } 5242 5243 /* 5244 * Both in-memory and blockref matched, select the nearer element. 5245 * 5246 * If both are flush with the left-hand side or both are the 5247 * same distance away, select the chain. In this situation the 5248 * chain must have been loaded from the matching blockmap. 5249 */ 5250 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 5251 chain->bref.key == base[i].key) { 5252 KKASSERT(chain->bref.key == base[i].key); 5253 bref = &chain->bref; 5254 goto found; 5255 } 5256 5257 /* 5258 * Select the nearer key 5259 */ 5260 if (chain->bref.key < base[i].key) { 5261 bref = &chain->bref; 5262 } else { 5263 bref = &base[i]; 5264 chain = NULL; 5265 } 5266 5267 /* 5268 * If the bref is out of bounds we've exhausted our search. 5269 */ 5270 found: 5271 if (bref->key > key_end) { 5272 *bresp = NULL; 5273 chain = NULL; 5274 } else { 5275 *bresp = bref; 5276 } 5277 return(chain); 5278 } 5279 5280 /* 5281 * Locate the specified block array element and delete it. The element 5282 * must exist. 5283 * 5284 * The spin lock on the related chain must be held. 5285 * 5286 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5287 * need to be adjusted when we commit the media change. 5288 */ 5289 void 5290 hammer2_base_delete(hammer2_chain_t *parent, 5291 hammer2_blockref_t *base, int count, 5292 hammer2_chain_t *chain) 5293 { 5294 hammer2_blockref_t *elm = &chain->bref; 5295 hammer2_blockref_t *scan; 5296 hammer2_key_t key_next; 5297 int i; 5298 5299 /* 5300 * Delete element. Expect the element to exist. 5301 * 5302 * XXX see caller, flush code not yet sophisticated enough to prevent 5303 * re-flushed in some cases. 5304 */ 5305 key_next = 0; /* max range */ 5306 i = hammer2_base_find(parent, base, count, &key_next, 5307 elm->key, elm->key); 5308 scan = &base[i]; 5309 if (i == count || scan->type == 0 || 5310 scan->key != elm->key || 5311 ((chain->flags & HAMMER2_CHAIN_BMAPUPD) == 0 && 5312 scan->keybits != elm->keybits)) { 5313 hammer2_spin_unex(&parent->core.spin); 5314 panic("delete base %p element not found at %d/%d elm %p\n", 5315 base, i, count, elm); 5316 return; 5317 } 5318 5319 /* 5320 * Update stats and zero the entry. 5321 * 5322 * NOTE: Handle radix == 0 (0 bytes) case. 5323 */ 5324 if ((int)(scan->data_off & HAMMER2_OFF_MASK_RADIX)) { 5325 parent->bref.embed.stats.data_count -= (hammer2_off_t)1 << 5326 (int)(scan->data_off & HAMMER2_OFF_MASK_RADIX); 5327 } 5328 switch(scan->type) { 5329 case HAMMER2_BREF_TYPE_INODE: 5330 --parent->bref.embed.stats.inode_count; 5331 /* fall through */ 5332 case HAMMER2_BREF_TYPE_DATA: 5333 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5334 atomic_set_int(&chain->flags, 5335 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5336 } else { 5337 if (parent->bref.leaf_count) 5338 --parent->bref.leaf_count; 5339 } 5340 /* fall through */ 5341 case HAMMER2_BREF_TYPE_INDIRECT: 5342 if (scan->type != HAMMER2_BREF_TYPE_DATA) { 5343 parent->bref.embed.stats.data_count -= 5344 scan->embed.stats.data_count; 5345 parent->bref.embed.stats.inode_count -= 5346 scan->embed.stats.inode_count; 5347 } 5348 if (scan->type == HAMMER2_BREF_TYPE_INODE) 5349 break; 5350 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5351 atomic_set_int(&chain->flags, 5352 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5353 } else { 5354 if (parent->bref.leaf_count <= scan->leaf_count) 5355 parent->bref.leaf_count = 0; 5356 else 5357 parent->bref.leaf_count -= scan->leaf_count; 5358 } 5359 break; 5360 case HAMMER2_BREF_TYPE_DIRENT: 5361 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5362 atomic_set_int(&chain->flags, 5363 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5364 } else { 5365 if (parent->bref.leaf_count) 5366 --parent->bref.leaf_count; 5367 } 5368 default: 5369 break; 5370 } 5371 5372 bzero(scan, sizeof(*scan)); 5373 5374 /* 5375 * We can only optimize parent->core.live_zero for live chains. 5376 */ 5377 if (parent->core.live_zero == i + 1) { 5378 while (--i >= 0 && base[i].type == 0) 5379 ; 5380 parent->core.live_zero = i + 1; 5381 } 5382 5383 /* 5384 * Clear appropriate blockmap flags in chain. 5385 */ 5386 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED | 5387 HAMMER2_CHAIN_BMAPUPD); 5388 } 5389 5390 /* 5391 * Insert the specified element. The block array must not already have the 5392 * element and must have space available for the insertion. 5393 * 5394 * The spin lock on the related chain must be held. 5395 * 5396 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5397 * need to be adjusted when we commit the media change. 5398 */ 5399 void 5400 hammer2_base_insert(hammer2_chain_t *parent, 5401 hammer2_blockref_t *base, int count, 5402 hammer2_chain_t *chain, hammer2_blockref_t *elm) 5403 { 5404 hammer2_key_t key_next; 5405 hammer2_key_t xkey; 5406 int i; 5407 int j; 5408 int k; 5409 int l; 5410 int u = 1; 5411 5412 /* 5413 * Insert new element. Expect the element to not already exist 5414 * unless we are replacing it. 5415 * 5416 * XXX see caller, flush code not yet sophisticated enough to prevent 5417 * re-flushed in some cases. 5418 */ 5419 key_next = 0; /* max range */ 5420 i = hammer2_base_find(parent, base, count, &key_next, 5421 elm->key, elm->key); 5422 5423 /* 5424 * Shortcut fill optimization, typical ordered insertion(s) may not 5425 * require a search. 5426 */ 5427 KKASSERT(i >= 0 && i <= count); 5428 5429 /* 5430 * Set appropriate blockmap flags in chain (if not NULL) 5431 */ 5432 if (chain) 5433 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 5434 5435 /* 5436 * Update stats and zero the entry 5437 */ 5438 if ((int)(elm->data_off & HAMMER2_OFF_MASK_RADIX)) { 5439 parent->bref.embed.stats.data_count += (hammer2_off_t)1 << 5440 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 5441 } 5442 switch(elm->type) { 5443 case HAMMER2_BREF_TYPE_INODE: 5444 ++parent->bref.embed.stats.inode_count; 5445 /* fall through */ 5446 case HAMMER2_BREF_TYPE_DATA: 5447 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5448 ++parent->bref.leaf_count; 5449 /* fall through */ 5450 case HAMMER2_BREF_TYPE_INDIRECT: 5451 if (elm->type != HAMMER2_BREF_TYPE_DATA) { 5452 parent->bref.embed.stats.data_count += 5453 elm->embed.stats.data_count; 5454 parent->bref.embed.stats.inode_count += 5455 elm->embed.stats.inode_count; 5456 } 5457 if (elm->type == HAMMER2_BREF_TYPE_INODE) 5458 break; 5459 if (parent->bref.leaf_count + elm->leaf_count < 5460 HAMMER2_BLOCKREF_LEAF_MAX) { 5461 parent->bref.leaf_count += elm->leaf_count; 5462 } else { 5463 parent->bref.leaf_count = HAMMER2_BLOCKREF_LEAF_MAX; 5464 } 5465 break; 5466 case HAMMER2_BREF_TYPE_DIRENT: 5467 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5468 ++parent->bref.leaf_count; 5469 break; 5470 default: 5471 break; 5472 } 5473 5474 5475 /* 5476 * We can only optimize parent->core.live_zero for live chains. 5477 */ 5478 if (i == count && parent->core.live_zero < count) { 5479 i = parent->core.live_zero++; 5480 base[i] = *elm; 5481 return; 5482 } 5483 5484 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 5485 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 5486 hammer2_spin_unex(&parent->core.spin); 5487 panic("insert base %p overlapping elements at %d elm %p\n", 5488 base, i, elm); 5489 } 5490 5491 /* 5492 * Try to find an empty slot before or after. 5493 */ 5494 j = i; 5495 k = i; 5496 while (j > 0 || k < count) { 5497 --j; 5498 if (j >= 0 && base[j].type == 0) { 5499 if (j == i - 1) { 5500 base[j] = *elm; 5501 } else { 5502 bcopy(&base[j+1], &base[j], 5503 (i - j - 1) * sizeof(*base)); 5504 base[i - 1] = *elm; 5505 } 5506 goto validate; 5507 } 5508 ++k; 5509 if (k < count && base[k].type == 0) { 5510 bcopy(&base[i], &base[i+1], 5511 (k - i) * sizeof(hammer2_blockref_t)); 5512 base[i] = *elm; 5513 5514 /* 5515 * We can only update parent->core.live_zero for live 5516 * chains. 5517 */ 5518 if (parent->core.live_zero <= k) 5519 parent->core.live_zero = k + 1; 5520 u = 2; 5521 goto validate; 5522 } 5523 } 5524 panic("hammer2_base_insert: no room!"); 5525 5526 /* 5527 * Debugging 5528 */ 5529 validate: 5530 key_next = 0; 5531 for (l = 0; l < count; ++l) { 5532 if (base[l].type) { 5533 key_next = base[l].key + 5534 ((hammer2_key_t)1 << base[l].keybits) - 1; 5535 break; 5536 } 5537 } 5538 while (++l < count) { 5539 if (base[l].type) { 5540 if (base[l].key <= key_next) 5541 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 5542 key_next = base[l].key + 5543 ((hammer2_key_t)1 << base[l].keybits) - 1; 5544 5545 } 5546 } 5547 5548 } 5549 5550 #if 0 5551 5552 /* 5553 * Sort the blockref array for the chain. Used by the flush code to 5554 * sort the blockref[] array. 5555 * 5556 * The chain must be exclusively locked AND spin-locked. 5557 */ 5558 typedef hammer2_blockref_t *hammer2_blockref_p; 5559 5560 static 5561 int 5562 hammer2_base_sort_callback(const void *v1, const void *v2) 5563 { 5564 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 5565 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 5566 5567 /* 5568 * Make sure empty elements are placed at the end of the array 5569 */ 5570 if (bref1->type == 0) { 5571 if (bref2->type == 0) 5572 return(0); 5573 return(1); 5574 } else if (bref2->type == 0) { 5575 return(-1); 5576 } 5577 5578 /* 5579 * Sort by key 5580 */ 5581 if (bref1->key < bref2->key) 5582 return(-1); 5583 if (bref1->key > bref2->key) 5584 return(1); 5585 return(0); 5586 } 5587 5588 void 5589 hammer2_base_sort(hammer2_chain_t *chain) 5590 { 5591 hammer2_blockref_t *base; 5592 int count; 5593 5594 switch(chain->bref.type) { 5595 case HAMMER2_BREF_TYPE_INODE: 5596 /* 5597 * Special shortcut for embedded data returns the inode 5598 * itself. Callers must detect this condition and access 5599 * the embedded data (the strategy code does this for us). 5600 * 5601 * This is only applicable to regular files and softlinks. 5602 */ 5603 if (chain->data->ipdata.meta.op_flags & 5604 HAMMER2_OPFLAG_DIRECTDATA) { 5605 return; 5606 } 5607 base = &chain->data->ipdata.u.blockset.blockref[0]; 5608 count = HAMMER2_SET_COUNT; 5609 break; 5610 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 5611 case HAMMER2_BREF_TYPE_INDIRECT: 5612 /* 5613 * Optimize indirect blocks in the INITIAL state to avoid 5614 * I/O. 5615 */ 5616 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 5617 base = &chain->data->npdata[0]; 5618 count = chain->bytes / sizeof(hammer2_blockref_t); 5619 break; 5620 case HAMMER2_BREF_TYPE_VOLUME: 5621 base = &chain->data->voldata.sroot_blockset.blockref[0]; 5622 count = HAMMER2_SET_COUNT; 5623 break; 5624 case HAMMER2_BREF_TYPE_FREEMAP: 5625 base = &chain->data->blkset.blockref[0]; 5626 count = HAMMER2_SET_COUNT; 5627 break; 5628 default: 5629 kprintf("hammer2_chain_lookup: unrecognized " 5630 "blockref(A) type: %d", 5631 chain->bref.type); 5632 while (1) 5633 tsleep(&base, 0, "dead", 0); 5634 panic("hammer2_chain_lookup: unrecognized " 5635 "blockref(A) type: %d", 5636 chain->bref.type); 5637 base = NULL; /* safety */ 5638 count = 0; /* safety */ 5639 } 5640 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 5641 } 5642 5643 #endif 5644 5645 /* 5646 * Chain memory management 5647 */ 5648 void 5649 hammer2_chain_wait(hammer2_chain_t *chain) 5650 { 5651 tsleep(chain, 0, "chnflw", 1); 5652 } 5653 5654 const hammer2_media_data_t * 5655 hammer2_chain_rdata(hammer2_chain_t *chain) 5656 { 5657 KKASSERT(chain->data != NULL); 5658 return (chain->data); 5659 } 5660 5661 hammer2_media_data_t * 5662 hammer2_chain_wdata(hammer2_chain_t *chain) 5663 { 5664 KKASSERT(chain->data != NULL); 5665 return (chain->data); 5666 } 5667 5668 /* 5669 * Set the check data for a chain. This can be a heavy-weight operation 5670 * and typically only runs on-flush. For file data check data is calculated 5671 * when the logical buffers are flushed. 5672 */ 5673 void 5674 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 5675 { 5676 chain->bref.flags &= ~HAMMER2_BREF_FLAG_ZERO; 5677 5678 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5679 case HAMMER2_CHECK_NONE: 5680 break; 5681 case HAMMER2_CHECK_DISABLED: 5682 break; 5683 case HAMMER2_CHECK_ISCSI32: 5684 chain->bref.check.iscsi32.value = 5685 hammer2_icrc32(bdata, chain->bytes); 5686 break; 5687 case HAMMER2_CHECK_XXHASH64: 5688 chain->bref.check.xxhash64.value = 5689 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5690 break; 5691 case HAMMER2_CHECK_SHA192: 5692 { 5693 SHA256_CTX hash_ctx; 5694 union { 5695 uint8_t digest[SHA256_DIGEST_LENGTH]; 5696 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5697 } u; 5698 5699 SHA256_Init(&hash_ctx); 5700 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5701 SHA256_Final(u.digest, &hash_ctx); 5702 u.digest64[2] ^= u.digest64[3]; 5703 bcopy(u.digest, 5704 chain->bref.check.sha192.data, 5705 sizeof(chain->bref.check.sha192.data)); 5706 } 5707 break; 5708 case HAMMER2_CHECK_FREEMAP: 5709 chain->bref.check.freemap.icrc32 = 5710 hammer2_icrc32(bdata, chain->bytes); 5711 break; 5712 default: 5713 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5714 chain->bref.methods); 5715 break; 5716 } 5717 } 5718 5719 int 5720 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 5721 { 5722 uint32_t check32; 5723 uint64_t check64; 5724 int r; 5725 5726 if (chain->bref.flags & HAMMER2_BREF_FLAG_ZERO) 5727 return 1; 5728 5729 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5730 case HAMMER2_CHECK_NONE: 5731 r = 1; 5732 break; 5733 case HAMMER2_CHECK_DISABLED: 5734 r = 1; 5735 break; 5736 case HAMMER2_CHECK_ISCSI32: 5737 check32 = hammer2_icrc32(bdata, chain->bytes); 5738 r = (chain->bref.check.iscsi32.value == check32); 5739 if (r == 0) { 5740 kprintf("chain %016jx.%02x meth=%02x CHECK FAIL " 5741 "(flags=%08x, bref/data %08x/%08x)\n", 5742 chain->bref.data_off, 5743 chain->bref.type, 5744 chain->bref.methods, 5745 chain->flags, 5746 chain->bref.check.iscsi32.value, 5747 check32); 5748 } 5749 hammer2_process_icrc32 += chain->bytes; 5750 break; 5751 case HAMMER2_CHECK_XXHASH64: 5752 check64 = XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5753 r = (chain->bref.check.xxhash64.value == check64); 5754 if (r == 0) { 5755 kprintf("chain %016jx.%02x key=%016jx " 5756 "meth=%02x CHECK FAIL " 5757 "(flags=%08x, bref/data %016jx/%016jx)\n", 5758 chain->bref.data_off, 5759 chain->bref.type, 5760 chain->bref.key, 5761 chain->bref.methods, 5762 chain->flags, 5763 chain->bref.check.xxhash64.value, 5764 check64); 5765 } 5766 hammer2_process_xxhash64 += chain->bytes; 5767 break; 5768 case HAMMER2_CHECK_SHA192: 5769 { 5770 SHA256_CTX hash_ctx; 5771 union { 5772 uint8_t digest[SHA256_DIGEST_LENGTH]; 5773 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5774 } u; 5775 5776 SHA256_Init(&hash_ctx); 5777 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5778 SHA256_Final(u.digest, &hash_ctx); 5779 u.digest64[2] ^= u.digest64[3]; 5780 if (bcmp(u.digest, 5781 chain->bref.check.sha192.data, 5782 sizeof(chain->bref.check.sha192.data)) == 0) { 5783 r = 1; 5784 } else { 5785 r = 0; 5786 kprintf("chain %016jx.%02x meth=%02x " 5787 "CHECK FAIL\n", 5788 chain->bref.data_off, 5789 chain->bref.type, 5790 chain->bref.methods); 5791 } 5792 } 5793 break; 5794 case HAMMER2_CHECK_FREEMAP: 5795 r = (chain->bref.check.freemap.icrc32 == 5796 hammer2_icrc32(bdata, chain->bytes)); 5797 if (r == 0) { 5798 kprintf("chain %016jx.%02x meth=%02x " 5799 "CHECK FAIL\n", 5800 chain->bref.data_off, 5801 chain->bref.type, 5802 chain->bref.methods); 5803 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 5804 chain->bref.check.freemap.icrc32, 5805 hammer2_icrc32(bdata, chain->bytes), 5806 chain->bytes); 5807 if (chain->dio) 5808 kprintf("dio %p buf %016jx,%d bdata %p/%p\n", 5809 chain->dio, chain->dio->bp->b_loffset, 5810 chain->dio->bp->b_bufsize, bdata, 5811 chain->dio->bp->b_data); 5812 } 5813 5814 break; 5815 default: 5816 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5817 chain->bref.methods); 5818 r = 1; 5819 break; 5820 } 5821 return r; 5822 } 5823 5824 /* 5825 * Acquire the chain and parent representing the specified inode for the 5826 * device at the specified cluster index. 5827 * 5828 * The flags passed in are LOOKUP flags, not RESOLVE flags. 5829 * 5830 * If we are unable to locate the hardlink, INVAL is returned and *chainp 5831 * will be NULL. *parentp may still be set error or not, or NULL if the 5832 * parent itself could not be resolved. 5833 * 5834 * Caller must pass-in a valid (and locked), or NULL *parentp or *chainp. 5835 * This function replaces *parentp and *chainp. Generally speaking, if 5836 * the caller found a directory entry and wants the inode, the caller should 5837 * pass the parent,chain representing the directory entry so this function 5838 * can dispose of it properly to avoid any possible lock order reversals. 5839 */ 5840 int 5841 hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum, 5842 int clindex, int flags, 5843 hammer2_chain_t **parentp, hammer2_chain_t **chainp) 5844 { 5845 hammer2_chain_t *parent; 5846 hammer2_chain_t *rchain; 5847 hammer2_key_t key_dummy; 5848 int resolve_flags; 5849 int error; 5850 5851 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 5852 HAMMER2_RESOLVE_SHARED : 0; 5853 5854 /* 5855 * Caller expects us to replace these. 5856 */ 5857 if (*chainp) { 5858 hammer2_chain_unlock(*chainp); 5859 hammer2_chain_drop(*chainp); 5860 *chainp = NULL; 5861 } 5862 if (*parentp) { 5863 hammer2_chain_unlock(*parentp); 5864 hammer2_chain_drop(*parentp); 5865 *parentp = NULL; 5866 } 5867 5868 /* 5869 * Inodes hang off of the iroot (bit 63 is clear, differentiating 5870 * inodes from root directory entries in the key lookup). 5871 */ 5872 parent = hammer2_inode_chain(pmp->iroot, clindex, resolve_flags); 5873 rchain = NULL; 5874 if (parent) { 5875 rchain = hammer2_chain_lookup(&parent, &key_dummy, 5876 inum, inum, 5877 &error, flags); 5878 } else { 5879 error = HAMMER2_ERROR_EIO; 5880 } 5881 *parentp = parent; 5882 *chainp = rchain; 5883 5884 return error; 5885 } 5886 5887 /* 5888 * Used by the bulkscan code to snapshot the synchronized storage for 5889 * a volume, allowing it to be scanned concurrently against normal 5890 * operation. 5891 */ 5892 hammer2_chain_t * 5893 hammer2_chain_bulksnap(hammer2_dev_t *hmp) 5894 { 5895 hammer2_chain_t *copy; 5896 5897 copy = hammer2_chain_alloc(hmp, hmp->spmp, &hmp->vchain.bref); 5898 copy->data = kmalloc(sizeof(copy->data->voldata), 5899 hmp->mchain, 5900 M_WAITOK | M_ZERO); 5901 hammer2_voldata_lock(hmp); 5902 copy->data->voldata = hmp->volsync; 5903 hammer2_voldata_unlock(hmp); 5904 5905 return copy; 5906 } 5907 5908 void 5909 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 5910 { 5911 KKASSERT(copy->bref.type == HAMMER2_BREF_TYPE_VOLUME); 5912 KKASSERT(copy->data); 5913 kfree(copy->data, copy->hmp->mchain); 5914 copy->data = NULL; 5915 atomic_add_long(&hammer2_chain_allocs, -1); 5916 hammer2_chain_drop(copy); 5917 } 5918 5919 /* 5920 * Returns non-zero if the chain (INODE or DIRENT) matches the 5921 * filename. 5922 */ 5923 int 5924 hammer2_chain_dirent_test(hammer2_chain_t *chain, const char *name, 5925 size_t name_len) 5926 { 5927 const hammer2_inode_data_t *ripdata; 5928 const hammer2_dirent_head_t *den; 5929 5930 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5931 ripdata = &chain->data->ipdata; 5932 if (ripdata->meta.name_len == name_len && 5933 bcmp(ripdata->filename, name, name_len) == 0) { 5934 return 1; 5935 } 5936 } 5937 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT && 5938 chain->bref.embed.dirent.namlen == name_len) { 5939 den = &chain->bref.embed.dirent; 5940 if (name_len > sizeof(chain->bref.check.buf) && 5941 bcmp(chain->data->buf, name, name_len) == 0) { 5942 return 1; 5943 } 5944 if (name_len <= sizeof(chain->bref.check.buf) && 5945 bcmp(chain->bref.check.buf, name, name_len) == 0) { 5946 return 1; 5947 } 5948 } 5949 return 0; 5950 } 5951