1 /* 2 * Copyright (c) 2011-2018 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/kern_syscall.h> 61 #include <sys/uuid.h> 62 63 #include <crypto/sha2/sha2.h> 64 65 #include "hammer2.h" 66 67 static hammer2_chain_t *hammer2_chain_create_indirect( 68 hammer2_chain_t *parent, 69 hammer2_key_t key, int keybits, 70 hammer2_tid_t mtid, int for_type, int *errorp); 71 static hammer2_io_t *hammer2_chain_drop_data(hammer2_chain_t *chain); 72 static hammer2_chain_t *hammer2_combined_find( 73 hammer2_chain_t *parent, 74 hammer2_blockref_t *base, int count, 75 hammer2_key_t *key_nextp, 76 hammer2_key_t key_beg, hammer2_key_t key_end, 77 hammer2_blockref_t **bresp); 78 79 static struct krate krate_h2me = { .freq = 1 }; 80 81 /* 82 * Basic RBTree for chains (core->rbtree and core->dbtree). Chains cannot 83 * overlap in the RB trees. Deleted chains are moved from rbtree to either 84 * dbtree or to dbq. 85 * 86 * Chains in delete-duplicate sequences can always iterate through core_entry 87 * to locate the live version of the chain. 88 */ 89 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 90 91 int 92 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 93 { 94 hammer2_key_t c1_beg; 95 hammer2_key_t c1_end; 96 hammer2_key_t c2_beg; 97 hammer2_key_t c2_end; 98 99 /* 100 * Compare chains. Overlaps are not supposed to happen and catch 101 * any software issues early we count overlaps as a match. 102 */ 103 c1_beg = chain1->bref.key; 104 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 105 c2_beg = chain2->bref.key; 106 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 107 108 if (c1_end < c2_beg) /* fully to the left */ 109 return(-1); 110 if (c1_beg > c2_end) /* fully to the right */ 111 return(1); 112 return(0); /* overlap (must not cross edge boundary) */ 113 } 114 115 /* 116 * Assert that a chain has no media data associated with it. 117 */ 118 static __inline void 119 hammer2_chain_assert_no_data(hammer2_chain_t *chain) 120 { 121 KKASSERT(chain->dio == NULL); 122 if (chain->bref.type != HAMMER2_BREF_TYPE_VOLUME && 123 chain->bref.type != HAMMER2_BREF_TYPE_FREEMAP && 124 chain->data) { 125 panic("hammer2_assert_no_data: chain %p still has data", chain); 126 } 127 } 128 129 /* 130 * Make a chain visible to the flusher. The flusher needs to be able to 131 * do flushes of subdirectory chains or single files so it does a top-down 132 * recursion using the ONFLUSH flag for the recursion. It locates MODIFIED 133 * or UPDATE chains and flushes back up the chain to the volume root. 134 * 135 * This routine sets ONFLUSH upward until it hits the volume root. For 136 * simplicity we ignore PFSROOT boundaries whos rules can be complex. 137 * Extra ONFLUSH flagging doesn't hurt the filesystem. 138 */ 139 void 140 hammer2_chain_setflush(hammer2_chain_t *chain) 141 { 142 hammer2_chain_t *parent; 143 144 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 145 hammer2_spin_sh(&chain->core.spin); 146 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 147 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 148 if ((parent = chain->parent) == NULL) 149 break; 150 hammer2_spin_sh(&parent->core.spin); 151 hammer2_spin_unsh(&chain->core.spin); 152 chain = parent; 153 } 154 hammer2_spin_unsh(&chain->core.spin); 155 } 156 } 157 158 /* 159 * Allocate a new disconnected chain element representing the specified 160 * bref. chain->refs is set to 1 and the passed bref is copied to 161 * chain->bref. chain->bytes is derived from the bref. 162 * 163 * chain->pmp inherits pmp unless the chain is an inode (other than the 164 * super-root inode). 165 * 166 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 167 */ 168 hammer2_chain_t * 169 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 170 hammer2_blockref_t *bref) 171 { 172 hammer2_chain_t *chain; 173 u_int bytes; 174 175 /* 176 * Special case - radix of 0 indicates a chain that does not 177 * need a data reference (context is completely embedded in the 178 * bref). 179 */ 180 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 181 bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 182 else 183 bytes = 0; 184 185 atomic_add_long(&hammer2_chain_allocs, 1); 186 187 /* 188 * Construct the appropriate system structure. 189 */ 190 switch(bref->type) { 191 case HAMMER2_BREF_TYPE_DIRENT: 192 case HAMMER2_BREF_TYPE_INODE: 193 case HAMMER2_BREF_TYPE_INDIRECT: 194 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 195 case HAMMER2_BREF_TYPE_DATA: 196 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 197 /* 198 * Chain's are really only associated with the hmp but we 199 * maintain a pmp association for per-mount memory tracking 200 * purposes. The pmp can be NULL. 201 */ 202 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 203 break; 204 case HAMMER2_BREF_TYPE_VOLUME: 205 case HAMMER2_BREF_TYPE_FREEMAP: 206 /* 207 * Only hammer2_chain_bulksnap() calls this function with these 208 * types. 209 */ 210 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 211 break; 212 default: 213 chain = NULL; 214 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 215 bref->type); 216 } 217 218 /* 219 * Initialize the new chain structure. pmp must be set to NULL for 220 * chains belonging to the super-root topology of a device mount. 221 */ 222 if (pmp == hmp->spmp) 223 chain->pmp = NULL; 224 else 225 chain->pmp = pmp; 226 chain->hmp = hmp; 227 chain->bref = *bref; 228 chain->bytes = bytes; 229 chain->refs = 1; 230 chain->flags = HAMMER2_CHAIN_ALLOCATED; 231 232 /* 233 * Set the PFS boundary flag if this chain represents a PFS root. 234 */ 235 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 236 atomic_set_int(&chain->flags, HAMMER2_CHAIN_PFSBOUNDARY); 237 hammer2_chain_core_init(chain); 238 239 return (chain); 240 } 241 242 /* 243 * Initialize a chain's core structure. This structure used to be allocated 244 * but is now embedded. 245 * 246 * The core is not locked. No additional refs on the chain are made. 247 * (trans) must not be NULL if (core) is not NULL. 248 */ 249 void 250 hammer2_chain_core_init(hammer2_chain_t *chain) 251 { 252 /* 253 * Fresh core under nchain (no multi-homing of ochain's 254 * sub-tree). 255 */ 256 RB_INIT(&chain->core.rbtree); /* live chains */ 257 hammer2_mtx_init(&chain->lock, "h2chain"); 258 } 259 260 /* 261 * Add a reference to a chain element, preventing its destruction. 262 * 263 * (can be called with spinlock held) 264 */ 265 void 266 hammer2_chain_ref(hammer2_chain_t *chain) 267 { 268 if (atomic_fetchadd_int(&chain->refs, 1) == 0) { 269 /* 270 * Just flag that the chain was used and should be recycled 271 * on the LRU if it encounters it later. 272 */ 273 if (chain->flags & HAMMER2_CHAIN_ONLRU) 274 atomic_set_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 275 276 #if 0 277 /* 278 * REMOVED - reduces contention, lru_list is more heuristical 279 * now. 280 * 281 * 0->non-zero transition must ensure that chain is removed 282 * from the LRU list. 283 * 284 * NOTE: Already holding lru_spin here so we cannot call 285 * hammer2_chain_ref() to get it off lru_list, do 286 * it manually. 287 */ 288 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 289 hammer2_pfs_t *pmp = chain->pmp; 290 hammer2_spin_ex(&pmp->lru_spin); 291 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 292 atomic_add_int(&pmp->lru_count, -1); 293 atomic_clear_int(&chain->flags, 294 HAMMER2_CHAIN_ONLRU); 295 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 296 } 297 hammer2_spin_unex(&pmp->lru_spin); 298 } 299 #endif 300 } 301 } 302 303 /* 304 * Ref a locked chain and force the data to be held across an unlock. 305 * Chain must be currently locked. The user of the chain who desires 306 * to release the hold must call hammer2_chain_lock_unhold() to relock 307 * and unhold the chain, then unlock normally, or may simply call 308 * hammer2_chain_drop_unhold() (which is safer against deadlocks). 309 */ 310 void 311 hammer2_chain_ref_hold(hammer2_chain_t *chain) 312 { 313 atomic_add_int(&chain->lockcnt, 1); 314 hammer2_chain_ref(chain); 315 } 316 317 /* 318 * Insert the chain in the core rbtree. 319 * 320 * Normal insertions are placed in the live rbtree. Insertion of a deleted 321 * chain is a special case used by the flush code that is placed on the 322 * unstaged deleted list to avoid confusing the live view. 323 */ 324 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 325 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 326 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 327 328 static 329 int 330 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 331 int flags, int generation) 332 { 333 hammer2_chain_t *xchain; 334 int error = 0; 335 336 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 337 hammer2_spin_ex(&parent->core.spin); 338 339 /* 340 * Interlocked by spinlock, check for race 341 */ 342 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 343 parent->core.generation != generation) { 344 error = HAMMER2_ERROR_EAGAIN; 345 goto failed; 346 } 347 348 /* 349 * Insert chain 350 */ 351 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 352 KASSERT(xchain == NULL, 353 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 354 chain, xchain, chain->bref.key)); 355 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 356 chain->parent = parent; 357 ++parent->core.chain_count; 358 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 359 360 /* 361 * We have to keep track of the effective live-view blockref count 362 * so the create code knows when to push an indirect block. 363 */ 364 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 365 atomic_add_int(&parent->core.live_count, 1); 366 failed: 367 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 368 hammer2_spin_unex(&parent->core.spin); 369 return error; 370 } 371 372 /* 373 * Drop the caller's reference to the chain. When the ref count drops to 374 * zero this function will try to disassociate the chain from its parent and 375 * deallocate it, then recursely drop the parent using the implied ref 376 * from the chain's chain->parent. 377 * 378 * Nobody should own chain's mutex on the 1->0 transition, unless this drop 379 * races an acquisition by another cpu. Therefore we can loop if we are 380 * unable to acquire the mutex, and refs is unlikely to be 1 unless we again 381 * race against another drop. 382 */ 383 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 384 int depth); 385 static void hammer2_chain_lru_flush(hammer2_pfs_t *pmp); 386 387 void 388 hammer2_chain_drop(hammer2_chain_t *chain) 389 { 390 u_int refs; 391 392 if (hammer2_debug & 0x200000) 393 Debugger("drop"); 394 395 KKASSERT(chain->refs > 0); 396 397 while (chain) { 398 refs = chain->refs; 399 cpu_ccfence(); 400 KKASSERT(refs > 0); 401 402 if (refs == 1) { 403 if (hammer2_mtx_ex_try(&chain->lock) == 0) 404 chain = hammer2_chain_lastdrop(chain, 0); 405 /* retry the same chain, or chain from lastdrop */ 406 } else { 407 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 408 break; 409 /* retry the same chain */ 410 } 411 cpu_pause(); 412 } 413 } 414 415 /* 416 * Unhold a held and probably not-locked chain, ensure that the data is 417 * dropped on the 1->0 transition of lockcnt by obtaining an exclusive 418 * lock and then simply unlocking the chain. 419 */ 420 void 421 hammer2_chain_drop_unhold(hammer2_chain_t *chain) 422 { 423 u_int lockcnt; 424 int iter = 0; 425 426 for (;;) { 427 lockcnt = chain->lockcnt; 428 cpu_ccfence(); 429 if (lockcnt > 1) { 430 if (atomic_cmpset_int(&chain->lockcnt, 431 lockcnt, lockcnt - 1)) { 432 break; 433 } 434 } else if (hammer2_mtx_ex_try(&chain->lock) == 0) { 435 hammer2_chain_unlock(chain); 436 break; 437 } else { 438 /* 439 * This situation can easily occur on SMP due to 440 * the gap inbetween the 1->0 transition and the 441 * final unlock. We cannot safely block on the 442 * mutex because lockcnt might go above 1. 443 * 444 * XXX Sleep for one tick if it takes too long. 445 */ 446 if (++iter > 1000) { 447 if (iter > 1000 + hz) { 448 kprintf("hammer2: h2race1 %p\n", chain); 449 iter = 1000; 450 } 451 tsleep(&iter, 0, "h2race1", 1); 452 } 453 cpu_pause(); 454 } 455 } 456 hammer2_chain_drop(chain); 457 } 458 459 /* 460 * Handles the (potential) last drop of chain->refs from 1->0. Called with 461 * the mutex exclusively locked, refs == 1, and lockcnt 0. SMP races are 462 * possible against refs and lockcnt. We must dispose of the mutex on chain. 463 * 464 * This function returns an unlocked chain for recursive drop or NULL. It 465 * can return the same chain if it determines it has raced another ref. 466 * 467 * -- 468 * 469 * When two chains need to be recursively dropped we use the chain we 470 * would otherwise free to placehold the additional chain. It's a bit 471 * convoluted but we can't just recurse without potentially blowing out 472 * the kernel stack. 473 * 474 * The chain cannot be freed if it has any children. 475 * The chain cannot be freed if flagged MODIFIED unless we can dispose of it. 476 * The chain cannot be freed if flagged UPDATE unless we can dispose of it. 477 * Any dedup registration can remain intact. 478 * 479 * The core spinlock is allowed to nest child-to-parent (not parent-to-child). 480 */ 481 static 482 hammer2_chain_t * 483 hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth) 484 { 485 hammer2_pfs_t *pmp; 486 hammer2_dev_t *hmp; 487 hammer2_chain_t *parent; 488 hammer2_chain_t *rdrop; 489 #if 0 490 hammer2_io_t *dio; 491 #endif 492 493 #if 0 494 /* 495 * On last drop if there is no parent and data_off is good (at 496 * least does not represent the volume root), the modified chain 497 * is probably going to be destroyed. We have to make sure that 498 * the data area is not registered for dedup. 499 * 500 * XXX removed. In fact, we do not have to make sure that the 501 * data area is not registered for dedup. The data area 502 * can, in fact, still be used for dedup because it is 503 * still allocated in the freemap and the underlying I/O 504 * will still be flushed. 505 */ 506 if (chain->parent == NULL && 507 (chain->flags & HAMMER2_CHAIN_MODIFIED) && 508 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 509 hmp = chain->hmp; 510 hammer2_io_dedup_delete(hmp, chain->bref.type, 511 chain->bref.data_off, chain->bytes); 512 } 513 #endif 514 /* 515 * We need chain's spinlock to interlock the sub-tree test. 516 * We already have chain's mutex, protecting chain->parent. 517 * 518 * Remember that chain->refs can be in flux. 519 */ 520 hammer2_spin_ex(&chain->core.spin); 521 522 if (chain->parent != NULL) { 523 /* 524 * If the chain has a parent the UPDATE bit prevents scrapping 525 * as the chain is needed to properly flush the parent. Try 526 * to complete the 1->0 transition and return NULL. Retry 527 * (return chain) if we are unable to complete the 1->0 528 * transition, else return NULL (nothing more to do). 529 * 530 * If the chain has a parent the MODIFIED bit prevents 531 * scrapping. 532 * 533 * Chains with UPDATE/MODIFIED are *not* put on the LRU list! 534 */ 535 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 536 HAMMER2_CHAIN_MODIFIED)) { 537 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 538 hammer2_spin_unex(&chain->core.spin); 539 #if 0 540 dio = hammer2_chain_drop_data(chain, 0); 541 if (dio) 542 hammer2_io_bqrelse(&dio); 543 #endif 544 hammer2_chain_assert_no_data(chain); 545 hammer2_mtx_unlock(&chain->lock); 546 chain = NULL; 547 } else { 548 hammer2_spin_unex(&chain->core.spin); 549 hammer2_mtx_unlock(&chain->lock); 550 } 551 return (chain); 552 } 553 /* spinlock still held */ 554 } else if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME || 555 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP) { 556 /* 557 * Retain the static vchain and fchain. Clear bits that 558 * are not relevant. Do not clear the MODIFIED bit, 559 * and certainly do not put it on the delayed-flush queue. 560 */ 561 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 562 } else { 563 /* 564 * The chain has no parent and can be flagged for destruction. 565 * Since it has no parent, UPDATE can also be cleared. 566 */ 567 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 568 if (chain->flags & HAMMER2_CHAIN_UPDATE) 569 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 570 571 /* 572 * If the chain has children we must still flush the chain. 573 * Any dedup is already handled by the underlying DIO, so 574 * we do not have to specifically flush it here. 575 * 576 * In the case where it has children, the DESTROY flag test 577 * in the flush code will prevent unnecessary flushes of 578 * MODIFIED chains that are not flagged DEDUP so don't worry 579 * about that here. 580 */ 581 if (chain->core.chain_count) { 582 /* 583 * Put on flushq (should ensure refs > 1), retry 584 * the drop. 585 */ 586 hammer2_spin_unex(&chain->core.spin); 587 hammer2_delayed_flush(chain); 588 hammer2_mtx_unlock(&chain->lock); 589 590 return(chain); /* retry drop */ 591 } 592 593 /* 594 * Otherwise we can scrap the MODIFIED bit if it is set, 595 * and continue along the freeing path. 596 * 597 * Be sure to clean-out any dedup bits. Without a parent 598 * this chain will no longer be visible to the flush code. 599 * Easy check data_off to avoid the volume root. 600 */ 601 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 602 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 603 atomic_add_long(&hammer2_count_modified_chains, -1); 604 if (chain->pmp) 605 hammer2_pfs_memory_wakeup(chain->pmp); 606 } 607 /* spinlock still held */ 608 } 609 610 /* spinlock still held */ 611 #if 0 612 dio = NULL; 613 #endif 614 615 /* 616 * If any children exist we must leave the chain intact with refs == 0. 617 * They exist because chains are retained below us which have refs or 618 * may require flushing. 619 * 620 * Retry (return chain) if we fail to transition the refs to 0, else 621 * return NULL indication nothing more to do. 622 * 623 * Chains with children are NOT put on the LRU list. 624 */ 625 if (chain->core.chain_count) { 626 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 627 hammer2_spin_unex(&chain->core.spin); 628 hammer2_chain_assert_no_data(chain); 629 hammer2_mtx_unlock(&chain->lock); 630 chain = NULL; 631 } else { 632 hammer2_spin_unex(&chain->core.spin); 633 hammer2_mtx_unlock(&chain->lock); 634 } 635 return (chain); 636 } 637 /* spinlock still held */ 638 /* no chains left under us */ 639 640 /* 641 * chain->core has no children left so no accessors can get to our 642 * chain from there. Now we have to lock the parent core to interlock 643 * remaining possible accessors that might bump chain's refs before 644 * we can safely drop chain's refs with intent to free the chain. 645 */ 646 hmp = chain->hmp; 647 pmp = chain->pmp; /* can be NULL */ 648 rdrop = NULL; 649 650 parent = chain->parent; 651 652 /* 653 * WARNING! chain's spin lock is still held here, and other spinlocks 654 * will be acquired and released in the code below. We 655 * cannot be making fancy procedure calls! 656 */ 657 658 /* 659 * We can cache the chain if it is associated with a pmp 660 * and not flagged as being destroyed or requesting a full 661 * release. In this situation the chain is not removed 662 * from its parent, i.e. it can still be looked up. 663 * 664 * We intentionally do not cache DATA chains because these 665 * were likely used to load data into the logical buffer cache 666 * and will not be accessed again for some time. 667 */ 668 if ((chain->flags & 669 (HAMMER2_CHAIN_DESTROY | HAMMER2_CHAIN_RELEASE)) == 0 && 670 chain->pmp && 671 chain->bref.type != HAMMER2_BREF_TYPE_DATA) { 672 if (parent) 673 hammer2_spin_ex(&parent->core.spin); 674 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 675 /* 676 * 1->0 transition failed, retry. Do not drop 677 * the chain's data yet! 678 */ 679 if (parent) 680 hammer2_spin_unex(&parent->core.spin); 681 hammer2_spin_unex(&chain->core.spin); 682 hammer2_mtx_unlock(&chain->lock); 683 684 return(chain); 685 } 686 687 /* 688 * Success 689 */ 690 #if 0 691 dio = hammer2_chain_drop_data(chain, 1); 692 #endif 693 hammer2_chain_assert_no_data(chain); 694 695 /* 696 * Make sure we are on the LRU list, clean up excessive 697 * LRU entries. We can only really drop one but there might 698 * be other entries that we can remove from the lru_list 699 * without dropping. 700 * 701 * NOTE: HAMMER2_CHAIN_ONLRU may only be safely set when 702 * chain->core.spin AND pmp->lru_spin are held, but 703 * can be safely cleared only holding pmp->lru_spin. 704 */ 705 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 706 hammer2_spin_ex(&pmp->lru_spin); 707 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 708 atomic_set_int(&chain->flags, 709 HAMMER2_CHAIN_ONLRU); 710 TAILQ_INSERT_TAIL(&pmp->lru_list, 711 chain, lru_node); 712 atomic_add_int(&pmp->lru_count, 1); 713 } 714 if (pmp->lru_count < HAMMER2_LRU_LIMIT) 715 depth = 1; /* disable lru_list flush */ 716 hammer2_spin_unex(&pmp->lru_spin); 717 } else { 718 /* disable lru flush */ 719 depth = 1; 720 } 721 722 if (parent) { 723 hammer2_spin_unex(&parent->core.spin); 724 parent = NULL; /* safety */ 725 } 726 hammer2_spin_unex(&chain->core.spin); 727 hammer2_mtx_unlock(&chain->lock); 728 #if 0 729 if (dio) 730 hammer2_io_bqrelse(&dio); 731 #endif 732 733 /* 734 * lru_list hysteresis (see above for depth overrides). 735 * Note that depth also prevents excessive lastdrop recursion. 736 */ 737 if (depth == 0) 738 hammer2_chain_lru_flush(pmp); 739 740 return NULL; 741 /* NOT REACHED */ 742 } 743 744 /* 745 * Make sure we are not on the LRU list. 746 */ 747 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 748 hammer2_spin_ex(&pmp->lru_spin); 749 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 750 atomic_add_int(&pmp->lru_count, -1); 751 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 752 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 753 } 754 hammer2_spin_unex(&pmp->lru_spin); 755 } 756 757 /* 758 * Spinlock the parent and try to drop the last ref on chain. 759 * On success determine if we should dispose of the chain 760 * (remove the chain from its parent, etc). 761 * 762 * (normal core locks are top-down recursive but we define 763 * core spinlocks as bottom-up recursive, so this is safe). 764 */ 765 if (parent) { 766 hammer2_spin_ex(&parent->core.spin); 767 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 768 #if 0 769 /* XXX remove, don't try to drop data on fail */ 770 hammer2_spin_unex(&parent->core.spin); 771 dio = hammer2_chain_drop_data(chain, 0); 772 hammer2_spin_unex(&chain->core.spin); 773 if (dio) 774 hammer2_io_bqrelse(&dio); 775 #endif 776 /* 777 * 1->0 transition failed, retry. 778 */ 779 hammer2_spin_unex(&parent->core.spin); 780 hammer2_spin_unex(&chain->core.spin); 781 hammer2_mtx_unlock(&chain->lock); 782 783 return(chain); 784 } 785 786 /* 787 * 1->0 transition successful, parent spin held to prevent 788 * new lookups, chain spinlock held to protect parent field. 789 * Remove chain from the parent. 790 * 791 * If the chain is being removed from the parent's btree but 792 * is not bmapped, we have to adjust live_count downward. If 793 * it is bmapped then the blockref is retained in the parent 794 * as is its associated live_count. This case can occur when 795 * a chain added to the topology is unable to flush and is 796 * then later deleted. 797 */ 798 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 799 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) && 800 (chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) { 801 atomic_add_int(&parent->core.live_count, -1); 802 } 803 RB_REMOVE(hammer2_chain_tree, 804 &parent->core.rbtree, chain); 805 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 806 --parent->core.chain_count; 807 chain->parent = NULL; 808 } 809 810 /* 811 * If our chain was the last chain in the parent's core the 812 * core is now empty and its parent might have to be 813 * re-dropped if it has 0 refs. 814 */ 815 if (parent->core.chain_count == 0) { 816 rdrop = parent; 817 atomic_add_int(&rdrop->refs, 1); 818 /* 819 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 820 rdrop = NULL; 821 */ 822 } 823 hammer2_spin_unex(&parent->core.spin); 824 parent = NULL; /* safety */ 825 /* FALL THROUGH */ 826 } else { 827 /* 828 * No-parent case. 829 */ 830 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 831 /* 832 * 1->0 transition failed, retry. 833 */ 834 hammer2_spin_unex(&parent->core.spin); 835 hammer2_spin_unex(&chain->core.spin); 836 hammer2_mtx_unlock(&chain->lock); 837 838 return(chain); 839 } 840 } 841 842 /* 843 * Successful 1->0 transition, no parent, no children... no way for 844 * anyone to ref this chain any more. We can clean-up and free it. 845 * 846 * We still have the core spinlock, and core's chain_count is 0. 847 * Any parent spinlock is gone. 848 */ 849 hammer2_spin_unex(&chain->core.spin); 850 hammer2_chain_assert_no_data(chain); 851 hammer2_mtx_unlock(&chain->lock); 852 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 853 chain->core.chain_count == 0); 854 855 /* 856 * All locks are gone, no pointers remain to the chain, finish 857 * freeing it. 858 */ 859 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 860 HAMMER2_CHAIN_MODIFIED)) == 0); 861 #if 0 862 dio = hammer2_chain_drop_data(chain, 1); 863 if (dio) 864 hammer2_io_bqrelse(&dio); 865 #endif 866 867 /* 868 * Once chain resources are gone we can use the now dead chain 869 * structure to placehold what might otherwise require a recursive 870 * drop, because we have potentially two things to drop and can only 871 * return one directly. 872 */ 873 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 874 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED); 875 chain->hmp = NULL; 876 kfree(chain, hmp->mchain); 877 } 878 879 /* 880 * Possible chaining loop when parent re-drop needed. 881 */ 882 return(rdrop); 883 } 884 885 /* 886 * Heuristical flush of the LRU, try to reduce the number of entries 887 * on the LRU to (HAMMER2_LRU_LIMIT * 2 / 3). This procedure is called 888 * only when lru_count exceeds HAMMER2_LRU_LIMIT. 889 */ 890 static 891 void 892 hammer2_chain_lru_flush(hammer2_pfs_t *pmp) 893 { 894 hammer2_chain_t *chain; 895 896 again: 897 chain = NULL; 898 hammer2_spin_ex(&pmp->lru_spin); 899 while (pmp->lru_count > HAMMER2_LRU_LIMIT * 2 / 3) { 900 /* 901 * Pick a chain off the lru_list, just recycle it quickly 902 * if LRUHINT is set (the chain was ref'd but left on 903 * the lru_list, so cycle to the end). 904 */ 905 chain = TAILQ_FIRST(&pmp->lru_list); 906 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 907 908 if (chain->flags & HAMMER2_CHAIN_LRUHINT) { 909 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 910 TAILQ_INSERT_TAIL(&pmp->lru_list, chain, lru_node); 911 chain = NULL; 912 continue; 913 } 914 915 /* 916 * Ok, we are off the LRU. We must adjust refs before we 917 * can safely clear the ONLRU flag. 918 */ 919 atomic_add_int(&pmp->lru_count, -1); 920 if (atomic_cmpset_int(&chain->refs, 0, 1)) { 921 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 922 atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE); 923 break; 924 } 925 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 926 chain = NULL; 927 } 928 hammer2_spin_unex(&pmp->lru_spin); 929 if (chain == NULL) 930 return; 931 932 /* 933 * If we picked a chain off the lru list we may be able to lastdrop 934 * it. Use a depth of 1 to prevent excessive lastdrop recursion. 935 */ 936 while (chain) { 937 u_int refs; 938 939 refs = chain->refs; 940 cpu_ccfence(); 941 KKASSERT(refs > 0); 942 943 if (refs == 1) { 944 if (hammer2_mtx_ex_try(&chain->lock) == 0) 945 chain = hammer2_chain_lastdrop(chain, 1); 946 /* retry the same chain, or chain from lastdrop */ 947 } else { 948 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 949 break; 950 /* retry the same chain */ 951 } 952 cpu_pause(); 953 } 954 goto again; 955 } 956 957 /* 958 * On last lock release. 959 */ 960 static hammer2_io_t * 961 hammer2_chain_drop_data(hammer2_chain_t *chain) 962 { 963 hammer2_io_t *dio; 964 965 if ((dio = chain->dio) != NULL) { 966 chain->dio = NULL; 967 chain->data = NULL; 968 } else { 969 switch(chain->bref.type) { 970 case HAMMER2_BREF_TYPE_VOLUME: 971 case HAMMER2_BREF_TYPE_FREEMAP: 972 break; 973 default: 974 if (chain->data != NULL) { 975 hammer2_spin_unex(&chain->core.spin); 976 panic("chain data not null: " 977 "chain %p bref %016jx.%02x " 978 "refs %d parent %p dio %p data %p", 979 chain, chain->bref.data_off, 980 chain->bref.type, chain->refs, 981 chain->parent, 982 chain->dio, chain->data); 983 } 984 KKASSERT(chain->data == NULL); 985 break; 986 } 987 } 988 return dio; 989 } 990 991 /* 992 * Lock a referenced chain element, acquiring its data with I/O if necessary, 993 * and specify how you would like the data to be resolved. 994 * 995 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 996 * 997 * The lock is allowed to recurse, multiple locking ops will aggregate 998 * the requested resolve types. Once data is assigned it will not be 999 * removed until the last unlock. 1000 * 1001 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 1002 * (typically used to avoid device/logical buffer 1003 * aliasing for data) 1004 * 1005 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 1006 * the INITIAL-create state (indirect blocks only). 1007 * 1008 * Do not resolve data elements for DATA chains. 1009 * (typically used to avoid device/logical buffer 1010 * aliasing for data) 1011 * 1012 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 1013 * 1014 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 1015 * it will be locked exclusive. 1016 * 1017 * HAMMER2_RESOLVE_NONBLOCK- (flag) The chain is locked non-blocking. If 1018 * the lock fails, EAGAIN is returned. 1019 * 1020 * NOTE: Embedded elements (volume header, inodes) are always resolved 1021 * regardless. 1022 * 1023 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 1024 * element will instantiate and zero its buffer, and flush it on 1025 * release. 1026 * 1027 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 1028 * so as not to instantiate a device buffer, which could alias against 1029 * a logical file buffer. However, if ALWAYS is specified the 1030 * device buffer will be instantiated anyway. 1031 * 1032 * NOTE: The return value is always 0 unless NONBLOCK is specified, in which 1033 * case it can be either 0 or EAGAIN. 1034 * 1035 * WARNING! This function blocks on I/O if data needs to be fetched. This 1036 * blocking can run concurrent with other compatible lock holders 1037 * who do not need data returning. The lock is not upgraded to 1038 * exclusive during a data fetch, a separate bit is used to 1039 * interlock I/O. However, an exclusive lock holder can still count 1040 * on being interlocked against an I/O fetch managed by a shared 1041 * lock holder. 1042 */ 1043 int 1044 hammer2_chain_lock(hammer2_chain_t *chain, int how) 1045 { 1046 KKASSERT(chain->refs > 0); 1047 1048 if (how & HAMMER2_RESOLVE_NONBLOCK) { 1049 /* 1050 * For non-blocking operation attempt to get the lock 1051 * before bumping lockcnt, just so we don't have to deal 1052 * with dropping lockcnt (and dealing with the underlying 1053 * data) if we fail. 1054 * 1055 * NOTE: LOCKAGAIN must always succeed without blocking. 1056 */ 1057 if (how & HAMMER2_RESOLVE_SHARED) { 1058 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1059 hammer2_mtx_sh_again(&chain->lock); 1060 } else { 1061 if (hammer2_mtx_sh_try(&chain->lock) != 0) 1062 return EAGAIN; 1063 } 1064 } else { 1065 if (hammer2_mtx_ex_try(&chain->lock) != 0) 1066 return EAGAIN; 1067 } 1068 atomic_add_int(&chain->lockcnt, 1); 1069 ++curthread->td_tracker; 1070 } else { 1071 /* 1072 * Lock the element. Recursive locks are allowed. lockcnt 1073 * ensures that data is left intact. 1074 */ 1075 atomic_add_int(&chain->lockcnt, 1); 1076 1077 /* 1078 * Get the appropriate lock. If LOCKAGAIN is flagged with 1079 * SHARED the caller expects a shared lock to already be 1080 * present and we are giving it another ref. This case must 1081 * importantly not block if there is a pending exclusive lock 1082 * request. 1083 */ 1084 if (how & HAMMER2_RESOLVE_SHARED) { 1085 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1086 hammer2_mtx_sh_again(&chain->lock); 1087 } else { 1088 hammer2_mtx_sh(&chain->lock); 1089 } 1090 } else { 1091 hammer2_mtx_ex(&chain->lock); 1092 } 1093 ++curthread->td_tracker; 1094 } 1095 1096 /* 1097 * If we already have a valid data pointer make sure the data is 1098 * synchronized to the current cpu, and then no further action is 1099 * necessary. 1100 */ 1101 if (chain->data) { 1102 if (chain->dio) 1103 hammer2_io_bkvasync(chain->dio); 1104 return 0; 1105 } 1106 1107 /* 1108 * Do we have to resolve the data? This is generally only 1109 * applicable to HAMMER2_BREF_TYPE_DATA which is special-cased. 1110 * Other BREF types expects the data to be there. 1111 */ 1112 switch(how & HAMMER2_RESOLVE_MASK) { 1113 case HAMMER2_RESOLVE_NEVER: 1114 return 0; 1115 case HAMMER2_RESOLVE_MAYBE: 1116 if (chain->flags & HAMMER2_CHAIN_INITIAL) 1117 return 0; 1118 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 1119 return 0; 1120 #if 0 1121 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 1122 return 0; 1123 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 1124 return 0; 1125 #endif 1126 /* fall through */ 1127 case HAMMER2_RESOLVE_ALWAYS: 1128 default: 1129 break; 1130 } 1131 1132 /* 1133 * Caller requires data 1134 */ 1135 hammer2_chain_load_data(chain); 1136 1137 return 0; 1138 } 1139 1140 /* 1141 * Lock the chain, retain the hold, and drop the data persistence count. 1142 * The data should remain valid because we never transitioned lockcnt 1143 * through 0. 1144 */ 1145 void 1146 hammer2_chain_lock_unhold(hammer2_chain_t *chain, int how) 1147 { 1148 hammer2_chain_lock(chain, how); 1149 atomic_add_int(&chain->lockcnt, -1); 1150 } 1151 1152 #if 0 1153 /* 1154 * Downgrade an exclusive chain lock to a shared chain lock. 1155 * 1156 * NOTE: There is no upgrade equivalent due to the ease of 1157 * deadlocks in that direction. 1158 */ 1159 void 1160 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 1161 { 1162 hammer2_mtx_downgrade(&chain->lock); 1163 } 1164 #endif 1165 1166 /* 1167 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 1168 * may be of any type. 1169 * 1170 * Once chain->data is set it cannot be disposed of until all locks are 1171 * released. 1172 * 1173 * Make sure the data is synchronized to the current cpu. 1174 */ 1175 void 1176 hammer2_chain_load_data(hammer2_chain_t *chain) 1177 { 1178 hammer2_blockref_t *bref; 1179 hammer2_dev_t *hmp; 1180 hammer2_io_t *dio; 1181 char *bdata; 1182 int error; 1183 1184 /* 1185 * Degenerate case, data already present, or chain has no media 1186 * reference to load. 1187 */ 1188 if (chain->data) { 1189 if (chain->dio) 1190 hammer2_io_bkvasync(chain->dio); 1191 return; 1192 } 1193 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 1194 return; 1195 1196 hmp = chain->hmp; 1197 KKASSERT(hmp != NULL); 1198 1199 /* 1200 * Gain the IOINPROG bit, interlocked block. 1201 */ 1202 for (;;) { 1203 u_int oflags; 1204 u_int nflags; 1205 1206 oflags = chain->flags; 1207 cpu_ccfence(); 1208 if (oflags & HAMMER2_CHAIN_IOINPROG) { 1209 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 1210 tsleep_interlock(&chain->flags, 0); 1211 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1212 tsleep(&chain->flags, PINTERLOCKED, 1213 "h2iocw", 0); 1214 } 1215 /* retry */ 1216 } else { 1217 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 1218 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1219 break; 1220 } 1221 /* retry */ 1222 } 1223 } 1224 1225 /* 1226 * We own CHAIN_IOINPROG 1227 * 1228 * Degenerate case if we raced another load. 1229 */ 1230 if (chain->data) { 1231 if (chain->dio) 1232 hammer2_io_bkvasync(chain->dio); 1233 goto done; 1234 } 1235 1236 /* 1237 * We must resolve to a device buffer, either by issuing I/O or 1238 * by creating a zero-fill element. We do not mark the buffer 1239 * dirty when creating a zero-fill element (the hammer2_chain_modify() 1240 * API must still be used to do that). 1241 * 1242 * The device buffer is variable-sized in powers of 2 down 1243 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 1244 * chunk always contains buffers of the same size. (XXX) 1245 * 1246 * The minimum physical IO size may be larger than the variable 1247 * block size. 1248 */ 1249 bref = &chain->bref; 1250 1251 /* 1252 * The getblk() optimization can only be used on newly created 1253 * elements if the physical block size matches the request. 1254 */ 1255 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1256 error = hammer2_io_new(hmp, bref->type, 1257 bref->data_off, chain->bytes, 1258 &chain->dio); 1259 } else { 1260 error = hammer2_io_bread(hmp, bref->type, 1261 bref->data_off, chain->bytes, 1262 &chain->dio); 1263 hammer2_adjreadcounter(&chain->bref, chain->bytes); 1264 } 1265 if (error) { 1266 chain->error = HAMMER2_ERROR_EIO; 1267 kprintf("hammer2_chain_lock: I/O error %016jx: %d\n", 1268 (intmax_t)bref->data_off, error); 1269 hammer2_io_bqrelse(&chain->dio); 1270 goto done; 1271 } 1272 chain->error = 0; 1273 1274 /* 1275 * This isn't perfect and can be ignored on OSs which do not have 1276 * an indication as to whether a buffer is coming from cache or 1277 * if I/O was actually issued for the read. TESTEDGOOD will work 1278 * pretty well without the B_IOISSUED logic because chains are 1279 * cached, but in that situation (without B_IOISSUED) it will not 1280 * detect whether a re-read via I/O is corrupted verses the original 1281 * read. 1282 * 1283 * We can't re-run the CRC on every fresh lock. That would be 1284 * insanely expensive. 1285 * 1286 * If the underlying kernel buffer covers the entire chain we can 1287 * use the B_IOISSUED indication to determine if we have to re-run 1288 * the CRC on chain data for chains that managed to stay cached 1289 * across the kernel disposal of the original buffer. 1290 */ 1291 if ((dio = chain->dio) != NULL && dio->bp) { 1292 struct buf *bp = dio->bp; 1293 1294 if (dio->psize == chain->bytes && 1295 (bp->b_flags & B_IOISSUED)) { 1296 atomic_clear_int(&chain->flags, 1297 HAMMER2_CHAIN_TESTEDGOOD); 1298 bp->b_flags &= ~B_IOISSUED; 1299 } 1300 } 1301 1302 /* 1303 * NOTE: A locked chain's data cannot be modified without first 1304 * calling hammer2_chain_modify(). 1305 */ 1306 1307 /* 1308 * Clear INITIAL. In this case we used io_new() and the buffer has 1309 * been zero'd and marked dirty. 1310 * 1311 * NOTE: hammer2_io_data() call issues bkvasync() 1312 */ 1313 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 1314 1315 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1316 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1317 chain->bref.flags |= HAMMER2_BREF_FLAG_ZERO; 1318 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 1319 /* 1320 * check data not currently synchronized due to 1321 * modification. XXX assumes data stays in the buffer 1322 * cache, which might not be true (need biodep on flush 1323 * to calculate crc? or simple crc?). 1324 */ 1325 } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) { 1326 if (hammer2_chain_testcheck(chain, bdata) == 0) { 1327 chain->error = HAMMER2_ERROR_CHECK; 1328 } else { 1329 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 1330 } 1331 } 1332 1333 /* 1334 * Setup the data pointer, either pointing it to an embedded data 1335 * structure and copying the data from the buffer, or pointing it 1336 * into the buffer. 1337 * 1338 * The buffer is not retained when copying to an embedded data 1339 * structure in order to avoid potential deadlocks or recursions 1340 * on the same physical buffer. 1341 * 1342 * WARNING! Other threads can start using the data the instant we 1343 * set chain->data non-NULL. 1344 */ 1345 switch (bref->type) { 1346 case HAMMER2_BREF_TYPE_VOLUME: 1347 case HAMMER2_BREF_TYPE_FREEMAP: 1348 /* 1349 * Copy data from bp to embedded buffer 1350 */ 1351 panic("hammer2_chain_load_data: unresolved volume header"); 1352 break; 1353 case HAMMER2_BREF_TYPE_DIRENT: 1354 KKASSERT(chain->bytes != 0); 1355 /* fall through */ 1356 case HAMMER2_BREF_TYPE_INODE: 1357 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1358 case HAMMER2_BREF_TYPE_INDIRECT: 1359 case HAMMER2_BREF_TYPE_DATA: 1360 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1361 default: 1362 /* 1363 * Point data at the device buffer and leave dio intact. 1364 */ 1365 chain->data = (void *)bdata; 1366 break; 1367 } 1368 1369 /* 1370 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 1371 */ 1372 done: 1373 for (;;) { 1374 u_int oflags; 1375 u_int nflags; 1376 1377 oflags = chain->flags; 1378 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 1379 HAMMER2_CHAIN_IOSIGNAL); 1380 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 1381 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1382 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 1383 wakeup(&chain->flags); 1384 break; 1385 } 1386 } 1387 } 1388 1389 /* 1390 * Unlock and deref a chain element. 1391 * 1392 * Remember that the presence of children under chain prevent the chain's 1393 * destruction but do not add additional references, so the dio will still 1394 * be dropped. 1395 */ 1396 void 1397 hammer2_chain_unlock(hammer2_chain_t *chain) 1398 { 1399 hammer2_io_t *dio; 1400 u_int lockcnt; 1401 int iter = 0; 1402 1403 --curthread->td_tracker; 1404 1405 /* 1406 * If multiple locks are present (or being attempted) on this 1407 * particular chain we can just unlock, drop refs, and return. 1408 * 1409 * Otherwise fall-through on the 1->0 transition. 1410 */ 1411 for (;;) { 1412 lockcnt = chain->lockcnt; 1413 KKASSERT(lockcnt > 0); 1414 cpu_ccfence(); 1415 if (lockcnt > 1) { 1416 if (atomic_cmpset_int(&chain->lockcnt, 1417 lockcnt, lockcnt - 1)) { 1418 hammer2_mtx_unlock(&chain->lock); 1419 return; 1420 } 1421 } else if (hammer2_mtx_upgrade_try(&chain->lock) == 0) { 1422 /* while holding the mutex exclusively */ 1423 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1424 break; 1425 } else { 1426 /* 1427 * This situation can easily occur on SMP due to 1428 * the gap inbetween the 1->0 transition and the 1429 * final unlock. We cannot safely block on the 1430 * mutex because lockcnt might go above 1. 1431 * 1432 * XXX Sleep for one tick if it takes too long. 1433 */ 1434 if (++iter > 1000) { 1435 if (iter > 1000 + hz) { 1436 kprintf("hammer2: h2race2 %p\n", chain); 1437 iter = 1000; 1438 } 1439 tsleep(&iter, 0, "h2race2", 1); 1440 } 1441 cpu_pause(); 1442 } 1443 /* retry */ 1444 } 1445 1446 /* 1447 * Last unlock / mutex upgraded to exclusive. Drop the data 1448 * reference. 1449 */ 1450 dio = hammer2_chain_drop_data(chain); 1451 if (dio) 1452 hammer2_io_bqrelse(&dio); 1453 hammer2_mtx_unlock(&chain->lock); 1454 } 1455 1456 /* 1457 * Unlock and hold chain data intact 1458 */ 1459 void 1460 hammer2_chain_unlock_hold(hammer2_chain_t *chain) 1461 { 1462 atomic_add_int(&chain->lockcnt, 1); 1463 hammer2_chain_unlock(chain); 1464 } 1465 1466 /* 1467 * Helper to obtain the blockref[] array base and count for a chain. 1468 * 1469 * XXX Not widely used yet, various use cases need to be validated and 1470 * converted to use this function. 1471 */ 1472 static 1473 hammer2_blockref_t * 1474 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1475 { 1476 hammer2_blockref_t *base; 1477 int count; 1478 1479 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1480 base = NULL; 1481 1482 switch(parent->bref.type) { 1483 case HAMMER2_BREF_TYPE_INODE: 1484 count = HAMMER2_SET_COUNT; 1485 break; 1486 case HAMMER2_BREF_TYPE_INDIRECT: 1487 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1488 count = parent->bytes / sizeof(hammer2_blockref_t); 1489 break; 1490 case HAMMER2_BREF_TYPE_VOLUME: 1491 count = HAMMER2_SET_COUNT; 1492 break; 1493 case HAMMER2_BREF_TYPE_FREEMAP: 1494 count = HAMMER2_SET_COUNT; 1495 break; 1496 default: 1497 panic("hammer2_chain_create_indirect: " 1498 "unrecognized blockref type: %d", 1499 parent->bref.type); 1500 count = 0; 1501 break; 1502 } 1503 } else { 1504 switch(parent->bref.type) { 1505 case HAMMER2_BREF_TYPE_INODE: 1506 base = &parent->data->ipdata.u.blockset.blockref[0]; 1507 count = HAMMER2_SET_COUNT; 1508 break; 1509 case HAMMER2_BREF_TYPE_INDIRECT: 1510 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1511 base = &parent->data->npdata[0]; 1512 count = parent->bytes / sizeof(hammer2_blockref_t); 1513 break; 1514 case HAMMER2_BREF_TYPE_VOLUME: 1515 base = &parent->data->voldata. 1516 sroot_blockset.blockref[0]; 1517 count = HAMMER2_SET_COUNT; 1518 break; 1519 case HAMMER2_BREF_TYPE_FREEMAP: 1520 base = &parent->data->blkset.blockref[0]; 1521 count = HAMMER2_SET_COUNT; 1522 break; 1523 default: 1524 panic("hammer2_chain_create_indirect: " 1525 "unrecognized blockref type: %d", 1526 parent->bref.type); 1527 count = 0; 1528 break; 1529 } 1530 } 1531 *countp = count; 1532 1533 return base; 1534 } 1535 1536 /* 1537 * This counts the number of live blockrefs in a block array and 1538 * also calculates the point at which all remaining blockrefs are empty. 1539 * This routine can only be called on a live chain. 1540 * 1541 * Caller holds the chain locked, but possibly with a shared lock. We 1542 * must use an exclusive spinlock to prevent corruption. 1543 * 1544 * NOTE: Flag is not set until after the count is complete, allowing 1545 * callers to test the flag without holding the spinlock. 1546 * 1547 * NOTE: If base is NULL the related chain is still in the INITIAL 1548 * state and there are no blockrefs to count. 1549 * 1550 * NOTE: live_count may already have some counts accumulated due to 1551 * creation and deletion and could even be initially negative. 1552 */ 1553 void 1554 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1555 hammer2_blockref_t *base, int count) 1556 { 1557 hammer2_spin_ex(&chain->core.spin); 1558 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1559 if (base) { 1560 while (--count >= 0) { 1561 if (base[count].type) 1562 break; 1563 } 1564 chain->core.live_zero = count + 1; 1565 while (count >= 0) { 1566 if (base[count].type) 1567 atomic_add_int(&chain->core.live_count, 1568 1); 1569 --count; 1570 } 1571 } else { 1572 chain->core.live_zero = 0; 1573 } 1574 /* else do not modify live_count */ 1575 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1576 } 1577 hammer2_spin_unex(&chain->core.spin); 1578 } 1579 1580 /* 1581 * Resize the chain's physical storage allocation in-place. This function does 1582 * not usually adjust the data pointer and must be followed by (typically) a 1583 * hammer2_chain_modify() call to copy any old data over and adjust the 1584 * data pointer. 1585 * 1586 * Chains can be resized smaller without reallocating the storage. Resizing 1587 * larger will reallocate the storage. Excess or prior storage is reclaimed 1588 * asynchronously at a later time. 1589 * 1590 * An nradix value of 0 is special-cased to mean that the storage should 1591 * be disassociated, that is the chain is being resized to 0 bytes (not 1 1592 * byte). 1593 * 1594 * Must be passed an exclusively locked parent and chain. 1595 * 1596 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1597 * to avoid instantiating a device buffer that conflicts with the vnode data 1598 * buffer. However, because H2 can compress or encrypt data, the chain may 1599 * have a dio assigned to it in those situations, and they do not conflict. 1600 * 1601 * XXX return error if cannot resize. 1602 */ 1603 int 1604 hammer2_chain_resize(hammer2_chain_t *chain, 1605 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1606 int nradix, int flags) 1607 { 1608 hammer2_dev_t *hmp; 1609 size_t obytes; 1610 size_t nbytes; 1611 int error; 1612 1613 hmp = chain->hmp; 1614 1615 /* 1616 * Only data and indirect blocks can be resized for now. 1617 * (The volu root, inodes, and freemap elements use a fixed size). 1618 */ 1619 KKASSERT(chain != &hmp->vchain); 1620 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1621 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1622 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1623 1624 /* 1625 * Nothing to do if the element is already the proper size 1626 */ 1627 obytes = chain->bytes; 1628 nbytes = (nradix) ? (1U << nradix) : 0; 1629 if (obytes == nbytes) 1630 return (chain->error); 1631 1632 /* 1633 * Make sure the old data is instantiated so we can copy it. If this 1634 * is a data block, the device data may be superfluous since the data 1635 * might be in a logical block, but compressed or encrypted data is 1636 * another matter. 1637 * 1638 * NOTE: The modify will set BMAPUPD for us if BMAPPED is set. 1639 */ 1640 error = hammer2_chain_modify(chain, mtid, dedup_off, 0); 1641 if (error) 1642 return error; 1643 1644 /* 1645 * Relocate the block, even if making it smaller (because different 1646 * block sizes may be in different regions). 1647 * 1648 * NOTE: Operation does not copy the data and may only be used 1649 * to resize data blocks in-place, or directory entry blocks 1650 * which are about to be modified in some manner. 1651 */ 1652 error = hammer2_freemap_alloc(chain, nbytes); 1653 if (error) 1654 return error; 1655 1656 chain->bytes = nbytes; 1657 1658 /* 1659 * We don't want the followup chain_modify() to try to copy data 1660 * from the old (wrong-sized) buffer. It won't know how much to 1661 * copy. This case should only occur during writes when the 1662 * originator already has the data to write in-hand. 1663 */ 1664 if (chain->dio) { 1665 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1666 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1667 hammer2_io_brelse(&chain->dio); 1668 chain->data = NULL; 1669 } 1670 return (chain->error); 1671 } 1672 1673 /* 1674 * Set the chain modified so its data can be changed by the caller, or 1675 * install deduplicated data. The caller must call this routine for each 1676 * set of modifications it makes, even if the chain is already flagged 1677 * MODIFIED. 1678 * 1679 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1680 * is a CLC (cluster level change) field and is not updated by parent 1681 * propagation during a flush. 1682 * 1683 * Returns an appropriate HAMMER2_ERROR_* code, which will generally reflect 1684 * chain->error except for HAMMER2_ERROR_ENOSPC. If the allocation fails 1685 * due to no space available, HAMMER2_ERROR_ENOSPC is returned and the chain 1686 * remains unmodified with its old data ref intact and chain->error 1687 * unchanged. 1688 * 1689 * Dedup Handling 1690 * 1691 * If the DEDUPABLE flag is set in the chain the storage must be reallocated 1692 * even if the chain is still flagged MODIFIED. In this case the chain's 1693 * DEDUPABLE flag will be cleared once the new storage has been assigned. 1694 * 1695 * If the caller passes a non-zero dedup_off we will use it to assign the 1696 * new storage. The MODIFIED flag will be *CLEARED* in this case, and 1697 * DEDUPABLE will be set (NOTE: the UPDATE flag is always set). The caller 1698 * must not modify the data content upon return. 1699 */ 1700 int 1701 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1702 hammer2_off_t dedup_off, int flags) 1703 { 1704 hammer2_blockref_t obref; 1705 hammer2_dev_t *hmp; 1706 hammer2_io_t *dio; 1707 int error; 1708 int wasinitial; 1709 int setmodified; 1710 int setupdate; 1711 int newmod; 1712 char *bdata; 1713 1714 hmp = chain->hmp; 1715 obref = chain->bref; 1716 KKASSERT((chain->flags & HAMMER2_CHAIN_FICTITIOUS) == 0); 1717 1718 /* 1719 * Data is not optional for freemap chains (we must always be sure 1720 * to copy the data on COW storage allocations). 1721 */ 1722 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1723 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1724 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1725 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1726 } 1727 1728 /* 1729 * Data must be resolved if already assigned, unless explicitly 1730 * flagged otherwise. If we cannot safety load the data the 1731 * modification fails and we return early. 1732 */ 1733 if (chain->data == NULL && chain->bytes != 0 && 1734 (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1735 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1736 hammer2_chain_load_data(chain); 1737 if (chain->error) 1738 return (chain->error); 1739 } 1740 error = 0; 1741 1742 /* 1743 * Set MODIFIED to indicate that the chain has been modified. A new 1744 * allocation is required when modifying a chain. 1745 * 1746 * Set UPDATE to ensure that the blockref is updated in the parent. 1747 * 1748 * If MODIFIED is already set determine if we can reuse the assigned 1749 * data block or if we need a new data block. 1750 */ 1751 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1752 /* 1753 * Must set modified bit. 1754 */ 1755 atomic_add_long(&hammer2_count_modified_chains, 1); 1756 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1757 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1758 setmodified = 1; 1759 1760 /* 1761 * We may be able to avoid a copy-on-write if the chain's 1762 * check mode is set to NONE and the chain's current 1763 * modify_tid is beyond the last explicit snapshot tid. 1764 * 1765 * This implements HAMMER2's overwrite-in-place feature. 1766 * 1767 * NOTE! This data-block cannot be used as a de-duplication 1768 * source when the check mode is set to NONE. 1769 */ 1770 if ((chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1771 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) && 1772 (chain->flags & HAMMER2_CHAIN_INITIAL) == 0 && 1773 (chain->flags & HAMMER2_CHAIN_DEDUPABLE) == 0 && 1774 HAMMER2_DEC_CHECK(chain->bref.methods) == 1775 HAMMER2_CHECK_NONE && 1776 chain->pmp && 1777 chain->bref.modify_tid > 1778 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1779 /* 1780 * Sector overwrite allowed. 1781 */ 1782 newmod = 0; 1783 } else { 1784 /* 1785 * Sector overwrite not allowed, must copy-on-write. 1786 */ 1787 newmod = 1; 1788 } 1789 } else if (chain->flags & HAMMER2_CHAIN_DEDUPABLE) { 1790 /* 1791 * If the modified chain was registered for dedup we need 1792 * a new allocation. This only happens for delayed-flush 1793 * chains (i.e. which run through the front-end buffer 1794 * cache). 1795 */ 1796 newmod = 1; 1797 setmodified = 0; 1798 } else { 1799 /* 1800 * Already flagged modified, no new allocation is needed. 1801 */ 1802 newmod = 0; 1803 setmodified = 0; 1804 } 1805 1806 /* 1807 * Flag parent update required. 1808 */ 1809 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 1810 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1811 setupdate = 1; 1812 } else { 1813 setupdate = 0; 1814 } 1815 1816 /* 1817 * The modification or re-modification requires an allocation and 1818 * possible COW. If an error occurs, the previous content and data 1819 * reference is retained and the modification fails. 1820 * 1821 * If dedup_off is non-zero, the caller is requesting a deduplication 1822 * rather than a modification. The MODIFIED bit is not set and the 1823 * data offset is set to the deduplication offset. The data cannot 1824 * be modified. 1825 * 1826 * NOTE: The dedup offset is allowed to be in a partially free state 1827 * and we must be sure to reset it to a fully allocated state 1828 * to force two bulkfree passes to free it again. 1829 * 1830 * NOTE: Only applicable when chain->bytes != 0. 1831 * 1832 * XXX can a chain already be marked MODIFIED without a data 1833 * assignment? If not, assert here instead of testing the case. 1834 */ 1835 if (chain != &hmp->vchain && chain != &hmp->fchain && 1836 chain->bytes) { 1837 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1838 newmod 1839 ) { 1840 /* 1841 * NOTE: We do not have to remove the dedup 1842 * registration because the area is still 1843 * allocated and the underlying DIO will 1844 * still be flushed. 1845 */ 1846 if (dedup_off) { 1847 chain->bref.data_off = dedup_off; 1848 chain->bytes = 1 << (dedup_off & 1849 HAMMER2_OFF_MASK_RADIX); 1850 chain->error = 0; 1851 atomic_clear_int(&chain->flags, 1852 HAMMER2_CHAIN_MODIFIED); 1853 atomic_add_long(&hammer2_count_modified_chains, 1854 -1); 1855 if (chain->pmp) 1856 hammer2_pfs_memory_wakeup(chain->pmp); 1857 hammer2_freemap_adjust(hmp, &chain->bref, 1858 HAMMER2_FREEMAP_DORECOVER); 1859 atomic_set_int(&chain->flags, 1860 HAMMER2_CHAIN_DEDUPABLE); 1861 } else { 1862 error = hammer2_freemap_alloc(chain, 1863 chain->bytes); 1864 atomic_clear_int(&chain->flags, 1865 HAMMER2_CHAIN_DEDUPABLE); 1866 } 1867 } 1868 } 1869 1870 /* 1871 * Stop here if error. We have to undo any flag bits we might 1872 * have set above. 1873 */ 1874 if (error) { 1875 if (setmodified) { 1876 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1877 atomic_add_long(&hammer2_count_modified_chains, -1); 1878 if (chain->pmp) 1879 hammer2_pfs_memory_wakeup(chain->pmp); 1880 } 1881 if (setupdate) { 1882 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1883 } 1884 return error; 1885 } 1886 1887 /* 1888 * Update mirror_tid and modify_tid. modify_tid is only updated 1889 * if not passed as zero (during flushes, parent propagation passes 1890 * the value 0). 1891 * 1892 * NOTE: chain->pmp could be the device spmp. 1893 */ 1894 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1895 if (mtid) 1896 chain->bref.modify_tid = mtid; 1897 1898 /* 1899 * Set BMAPUPD to tell the flush code that an existing blockmap entry 1900 * requires updating as well as to tell the delete code that the 1901 * chain's blockref might not exactly match (in terms of physical size 1902 * or block offset) the one in the parent's blocktable. The base key 1903 * of course will still match. 1904 */ 1905 if (chain->flags & HAMMER2_CHAIN_BMAPPED) 1906 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD); 1907 1908 /* 1909 * Short-cut data blocks which the caller does not need an actual 1910 * data reference to (aka OPTDATA), as long as the chain does not 1911 * already have a data pointer to the data. This generally means 1912 * that the modifications are being done via the logical buffer cache. 1913 * The INITIAL flag relates only to the device data buffer and thus 1914 * remains unchange in this situation. 1915 * 1916 * This code also handles bytes == 0 (most dirents). 1917 */ 1918 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1919 (flags & HAMMER2_MODIFY_OPTDATA) && 1920 chain->data == NULL) { 1921 KKASSERT(chain->dio == NULL); 1922 goto skip2; 1923 } 1924 1925 /* 1926 * Clearing the INITIAL flag (for indirect blocks) indicates that 1927 * we've processed the uninitialized storage allocation. 1928 * 1929 * If this flag is already clear we are likely in a copy-on-write 1930 * situation but we have to be sure NOT to bzero the storage if 1931 * no data is present. 1932 */ 1933 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1934 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1935 wasinitial = 1; 1936 } else { 1937 wasinitial = 0; 1938 } 1939 1940 /* 1941 * Instantiate data buffer and possibly execute COW operation 1942 */ 1943 switch(chain->bref.type) { 1944 case HAMMER2_BREF_TYPE_VOLUME: 1945 case HAMMER2_BREF_TYPE_FREEMAP: 1946 /* 1947 * The data is embedded, no copy-on-write operation is 1948 * needed. 1949 */ 1950 KKASSERT(chain->dio == NULL); 1951 break; 1952 case HAMMER2_BREF_TYPE_DIRENT: 1953 /* 1954 * The data might be fully embedded. 1955 */ 1956 if (chain->bytes == 0) { 1957 KKASSERT(chain->dio == NULL); 1958 break; 1959 } 1960 /* fall through */ 1961 case HAMMER2_BREF_TYPE_INODE: 1962 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1963 case HAMMER2_BREF_TYPE_DATA: 1964 case HAMMER2_BREF_TYPE_INDIRECT: 1965 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1966 /* 1967 * Perform the copy-on-write operation 1968 * 1969 * zero-fill or copy-on-write depending on whether 1970 * chain->data exists or not and set the dirty state for 1971 * the new buffer. hammer2_io_new() will handle the 1972 * zero-fill. 1973 * 1974 * If a dedup_off was supplied this is an existing block 1975 * and no COW, copy, or further modification is required. 1976 */ 1977 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1978 1979 if (wasinitial && dedup_off == 0) { 1980 error = hammer2_io_new(hmp, chain->bref.type, 1981 chain->bref.data_off, 1982 chain->bytes, &dio); 1983 } else { 1984 error = hammer2_io_bread(hmp, chain->bref.type, 1985 chain->bref.data_off, 1986 chain->bytes, &dio); 1987 } 1988 hammer2_adjreadcounter(&chain->bref, chain->bytes); 1989 1990 /* 1991 * If an I/O error occurs make sure callers cannot accidently 1992 * modify the old buffer's contents and corrupt the filesystem. 1993 * 1994 * NOTE: hammer2_io_data() call issues bkvasync() 1995 */ 1996 if (error) { 1997 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 1998 hmp); 1999 chain->error = HAMMER2_ERROR_EIO; 2000 hammer2_io_brelse(&dio); 2001 hammer2_io_brelse(&chain->dio); 2002 chain->data = NULL; 2003 break; 2004 } 2005 chain->error = 0; 2006 bdata = hammer2_io_data(dio, chain->bref.data_off); 2007 2008 if (chain->data) { 2009 /* 2010 * COW (unless a dedup). 2011 */ 2012 KKASSERT(chain->dio != NULL); 2013 if (chain->data != (void *)bdata && dedup_off == 0) { 2014 bcopy(chain->data, bdata, chain->bytes); 2015 } 2016 } else if (wasinitial == 0) { 2017 /* 2018 * We have a problem. We were asked to COW but 2019 * we don't have any data to COW with! 2020 */ 2021 panic("hammer2_chain_modify: having a COW %p\n", 2022 chain); 2023 } 2024 2025 /* 2026 * Retire the old buffer, replace with the new. Dirty or 2027 * redirty the new buffer. 2028 * 2029 * WARNING! The system buffer cache may have already flushed 2030 * the buffer, so we must be sure to [re]dirty it 2031 * for further modification. 2032 * 2033 * If dedup_off was supplied, the caller is not 2034 * expected to make any further modification to the 2035 * buffer. 2036 */ 2037 if (chain->dio) 2038 hammer2_io_bqrelse(&chain->dio); 2039 chain->data = (void *)bdata; 2040 chain->dio = dio; 2041 if (dedup_off == 0) 2042 hammer2_io_setdirty(dio); 2043 break; 2044 default: 2045 panic("hammer2_chain_modify: illegal non-embedded type %d", 2046 chain->bref.type); 2047 break; 2048 2049 } 2050 skip2: 2051 /* 2052 * setflush on parent indicating that the parent must recurse down 2053 * to us. Do not call on chain itself which might already have it 2054 * set. 2055 */ 2056 if (chain->parent) 2057 hammer2_chain_setflush(chain->parent); 2058 return (chain->error); 2059 } 2060 2061 /* 2062 * Modify the chain associated with an inode. 2063 */ 2064 int 2065 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 2066 hammer2_tid_t mtid, int flags) 2067 { 2068 int error; 2069 2070 hammer2_inode_modify(ip); 2071 error = hammer2_chain_modify(chain, mtid, 0, flags); 2072 2073 return error; 2074 } 2075 2076 /* 2077 * Volume header data locks 2078 */ 2079 void 2080 hammer2_voldata_lock(hammer2_dev_t *hmp) 2081 { 2082 lockmgr(&hmp->vollk, LK_EXCLUSIVE); 2083 } 2084 2085 void 2086 hammer2_voldata_unlock(hammer2_dev_t *hmp) 2087 { 2088 lockmgr(&hmp->vollk, LK_RELEASE); 2089 } 2090 2091 void 2092 hammer2_voldata_modify(hammer2_dev_t *hmp) 2093 { 2094 if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) == 0) { 2095 atomic_add_long(&hammer2_count_modified_chains, 1); 2096 atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED); 2097 hammer2_pfs_memory_inc(hmp->vchain.pmp); 2098 } 2099 } 2100 2101 /* 2102 * This function returns the chain at the nearest key within the specified 2103 * range. The returned chain will be referenced but not locked. 2104 * 2105 * This function will recurse through chain->rbtree as necessary and will 2106 * return a *key_nextp suitable for iteration. *key_nextp is only set if 2107 * the iteration value is less than the current value of *key_nextp. 2108 * 2109 * The caller should use (*key_nextp) to calculate the actual range of 2110 * the returned element, which will be (key_beg to *key_nextp - 1), because 2111 * there might be another element which is superior to the returned element 2112 * and overlaps it. 2113 * 2114 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 2115 * chains continue to be returned. On EOF (*key_nextp) may overflow since 2116 * it will wind up being (key_end + 1). 2117 * 2118 * WARNING! Must be called with child's spinlock held. Spinlock remains 2119 * held through the operation. 2120 */ 2121 struct hammer2_chain_find_info { 2122 hammer2_chain_t *best; 2123 hammer2_key_t key_beg; 2124 hammer2_key_t key_end; 2125 hammer2_key_t key_next; 2126 }; 2127 2128 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 2129 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 2130 2131 static 2132 hammer2_chain_t * 2133 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 2134 hammer2_key_t key_beg, hammer2_key_t key_end) 2135 { 2136 struct hammer2_chain_find_info info; 2137 2138 info.best = NULL; 2139 info.key_beg = key_beg; 2140 info.key_end = key_end; 2141 info.key_next = *key_nextp; 2142 2143 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 2144 hammer2_chain_find_cmp, hammer2_chain_find_callback, 2145 &info); 2146 *key_nextp = info.key_next; 2147 #if 0 2148 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 2149 parent, key_beg, key_end, *key_nextp); 2150 #endif 2151 2152 return (info.best); 2153 } 2154 2155 static 2156 int 2157 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 2158 { 2159 struct hammer2_chain_find_info *info = data; 2160 hammer2_key_t child_beg; 2161 hammer2_key_t child_end; 2162 2163 child_beg = child->bref.key; 2164 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 2165 2166 if (child_end < info->key_beg) 2167 return(-1); 2168 if (child_beg > info->key_end) 2169 return(1); 2170 return(0); 2171 } 2172 2173 static 2174 int 2175 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 2176 { 2177 struct hammer2_chain_find_info *info = data; 2178 hammer2_chain_t *best; 2179 hammer2_key_t child_end; 2180 2181 /* 2182 * WARNING! Layerq is scanned forwards, exact matches should keep 2183 * the existing info->best. 2184 */ 2185 if ((best = info->best) == NULL) { 2186 /* 2187 * No previous best. Assign best 2188 */ 2189 info->best = child; 2190 } else if (best->bref.key <= info->key_beg && 2191 child->bref.key <= info->key_beg) { 2192 /* 2193 * Illegal overlap. 2194 */ 2195 KKASSERT(0); 2196 /*info->best = child;*/ 2197 } else if (child->bref.key < best->bref.key) { 2198 /* 2199 * Child has a nearer key and best is not flush with key_beg. 2200 * Set best to child. Truncate key_next to the old best key. 2201 */ 2202 info->best = child; 2203 if (info->key_next > best->bref.key || info->key_next == 0) 2204 info->key_next = best->bref.key; 2205 } else if (child->bref.key == best->bref.key) { 2206 /* 2207 * If our current best is flush with the child then this 2208 * is an illegal overlap. 2209 * 2210 * key_next will automatically be limited to the smaller of 2211 * the two end-points. 2212 */ 2213 KKASSERT(0); 2214 info->best = child; 2215 } else { 2216 /* 2217 * Keep the current best but truncate key_next to the child's 2218 * base. 2219 * 2220 * key_next will also automatically be limited to the smaller 2221 * of the two end-points (probably not necessary for this case 2222 * but we do it anyway). 2223 */ 2224 if (info->key_next > child->bref.key || info->key_next == 0) 2225 info->key_next = child->bref.key; 2226 } 2227 2228 /* 2229 * Always truncate key_next based on child's end-of-range. 2230 */ 2231 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 2232 if (child_end && (info->key_next > child_end || info->key_next == 0)) 2233 info->key_next = child_end; 2234 2235 return(0); 2236 } 2237 2238 /* 2239 * Retrieve the specified chain from a media blockref, creating the 2240 * in-memory chain structure which reflects it. The returned chain is 2241 * held and locked according to (how) (HAMMER2_RESOLVE_*). The caller must 2242 * handle crc-checks and so forth, and should check chain->error before 2243 * assuming that the data is good. 2244 * 2245 * To handle insertion races pass the INSERT_RACE flag along with the 2246 * generation number of the core. NULL will be returned if the generation 2247 * number changes before we have a chance to insert the chain. Insert 2248 * races can occur because the parent might be held shared. 2249 * 2250 * Caller must hold the parent locked shared or exclusive since we may 2251 * need the parent's bref array to find our block. 2252 * 2253 * WARNING! chain->pmp is always set to NULL for any chain representing 2254 * part of the super-root topology. 2255 */ 2256 hammer2_chain_t * 2257 hammer2_chain_get(hammer2_chain_t *parent, int generation, 2258 hammer2_blockref_t *bref, int how) 2259 { 2260 hammer2_dev_t *hmp = parent->hmp; 2261 hammer2_chain_t *chain; 2262 int error; 2263 2264 /* 2265 * Allocate a chain structure representing the existing media 2266 * entry. Resulting chain has one ref and is not locked. 2267 */ 2268 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 2269 chain = hammer2_chain_alloc(hmp, NULL, bref); 2270 else 2271 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 2272 /* ref'd chain returned */ 2273 2274 /* 2275 * Flag that the chain is in the parent's blockmap so delete/flush 2276 * knows what to do with it. 2277 */ 2278 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 2279 2280 /* 2281 * chain must be locked to avoid unexpected ripouts 2282 */ 2283 hammer2_chain_lock(chain, how); 2284 2285 /* 2286 * Link the chain into its parent. A spinlock is required to safely 2287 * access the RBTREE, and it is possible to collide with another 2288 * hammer2_chain_get() operation because the caller might only hold 2289 * a shared lock on the parent. 2290 * 2291 * NOTE: Get races can occur quite often when we distribute 2292 * asynchronous read-aheads across multiple threads. 2293 */ 2294 KKASSERT(parent->refs > 0); 2295 error = hammer2_chain_insert(parent, chain, 2296 HAMMER2_CHAIN_INSERT_SPIN | 2297 HAMMER2_CHAIN_INSERT_RACE, 2298 generation); 2299 if (error) { 2300 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 2301 /*kprintf("chain %p get race\n", chain);*/ 2302 hammer2_chain_unlock(chain); 2303 hammer2_chain_drop(chain); 2304 chain = NULL; 2305 } else { 2306 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2307 } 2308 2309 /* 2310 * Return our new chain referenced but not locked, or NULL if 2311 * a race occurred. 2312 */ 2313 return (chain); 2314 } 2315 2316 /* 2317 * Lookup initialization/completion API 2318 */ 2319 hammer2_chain_t * 2320 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 2321 { 2322 hammer2_chain_ref(parent); 2323 if (flags & HAMMER2_LOOKUP_SHARED) { 2324 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 2325 HAMMER2_RESOLVE_SHARED); 2326 } else { 2327 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2328 } 2329 return (parent); 2330 } 2331 2332 void 2333 hammer2_chain_lookup_done(hammer2_chain_t *parent) 2334 { 2335 if (parent) { 2336 hammer2_chain_unlock(parent); 2337 hammer2_chain_drop(parent); 2338 } 2339 } 2340 2341 /* 2342 * Take the locked chain and return a locked parent. The chain remains 2343 * locked on return, but may have to be temporarily unlocked to acquire 2344 * the parent. Because of this, (chain) must be stable and cannot be 2345 * deleted while it was temporarily unlocked (typically means that (chain) 2346 * is an inode). 2347 * 2348 * Pass HAMMER2_RESOLVE_* flags in flags. 2349 * 2350 * This will work even if the chain is errored, and the caller can check 2351 * parent->error on return if desired since the parent will be locked. 2352 * 2353 * This function handles the lock order reversal. 2354 */ 2355 hammer2_chain_t * 2356 hammer2_chain_getparent(hammer2_chain_t *chain, int flags) 2357 { 2358 hammer2_chain_t *parent; 2359 2360 /* 2361 * Be careful of order, chain must be unlocked before parent 2362 * is locked below to avoid a deadlock. Try it trivially first. 2363 */ 2364 parent = chain->parent; 2365 if (parent == NULL) 2366 panic("hammer2_chain_getparent: no parent"); 2367 hammer2_chain_ref(parent); 2368 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) 2369 return parent; 2370 2371 for (;;) { 2372 hammer2_chain_unlock(chain); 2373 hammer2_chain_lock(parent, flags); 2374 hammer2_chain_lock(chain, flags); 2375 2376 /* 2377 * Parent relinking races are quite common. We have to get 2378 * it right or we will blow up the block table. 2379 */ 2380 if (chain->parent == parent) 2381 break; 2382 hammer2_chain_unlock(parent); 2383 hammer2_chain_drop(parent); 2384 cpu_ccfence(); 2385 parent = chain->parent; 2386 if (parent == NULL) 2387 panic("hammer2_chain_getparent: no parent"); 2388 hammer2_chain_ref(parent); 2389 } 2390 return parent; 2391 } 2392 2393 /* 2394 * Take the locked chain and return a locked parent. The chain is unlocked 2395 * and dropped. *chainp is set to the returned parent as a convenience. 2396 * Pass HAMMER2_RESOLVE_* flags in flags. 2397 * 2398 * This will work even if the chain is errored, and the caller can check 2399 * parent->error on return if desired since the parent will be locked. 2400 * 2401 * The chain does NOT need to be stable. We use a tracking structure 2402 * to track the expected parent if the chain is deleted out from under us. 2403 * 2404 * This function handles the lock order reversal. 2405 */ 2406 hammer2_chain_t * 2407 hammer2_chain_repparent(hammer2_chain_t **chainp, int flags) 2408 { 2409 hammer2_chain_t *chain; 2410 hammer2_chain_t *parent; 2411 struct hammer2_reptrack reptrack; 2412 struct hammer2_reptrack **repp; 2413 2414 /* 2415 * Be careful of order, chain must be unlocked before parent 2416 * is locked below to avoid a deadlock. Try it trivially first. 2417 */ 2418 chain = *chainp; 2419 parent = chain->parent; 2420 if (parent == NULL) { 2421 hammer2_spin_unex(&chain->core.spin); 2422 panic("hammer2_chain_repparent: no parent"); 2423 } 2424 hammer2_chain_ref(parent); 2425 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) { 2426 hammer2_chain_unlock(chain); 2427 hammer2_chain_drop(chain); 2428 *chainp = parent; 2429 2430 return parent; 2431 } 2432 2433 /* 2434 * Ok, now it gets a bit nasty. There are multiple situations where 2435 * the parent might be in the middle of a deletion, or where the child 2436 * (chain) might be deleted the instant we let go of its lock. 2437 * We can potentially end up in a no-win situation! 2438 * 2439 * In particular, the indirect_maintenance() case can cause these 2440 * situations. 2441 * 2442 * To deal with this we install a reptrack structure in the parent 2443 * This reptrack structure 'owns' the parent ref and will automatically 2444 * migrate to the parent's parent if the parent is deleted permanently. 2445 */ 2446 hammer2_spin_init(&reptrack.spin, "h2reptrk"); 2447 reptrack.chain = parent; 2448 hammer2_chain_ref(parent); /* for the reptrack */ 2449 2450 hammer2_spin_ex(&parent->core.spin); 2451 reptrack.next = parent->core.reptrack; 2452 parent->core.reptrack = &reptrack; 2453 hammer2_spin_unex(&parent->core.spin); 2454 2455 hammer2_chain_unlock(chain); 2456 hammer2_chain_drop(chain); 2457 chain = NULL; /* gone */ 2458 2459 /* 2460 * At the top of this loop, chain is gone and parent is refd both 2461 * by us explicitly AND via our reptrack. We are attempting to 2462 * lock parent. 2463 */ 2464 for (;;) { 2465 hammer2_chain_lock(parent, flags); 2466 2467 if (reptrack.chain == parent) 2468 break; 2469 hammer2_chain_unlock(parent); 2470 hammer2_chain_drop(parent); 2471 2472 kprintf("hammer2: debug REPTRACK %p->%p\n", 2473 parent, reptrack.chain); 2474 hammer2_spin_ex(&reptrack.spin); 2475 parent = reptrack.chain; 2476 hammer2_chain_ref(parent); 2477 hammer2_spin_unex(&reptrack.spin); 2478 } 2479 2480 /* 2481 * Once parent is locked and matches our reptrack, our reptrack 2482 * will be stable and we have our parent. We can unlink our 2483 * reptrack. 2484 * 2485 * WARNING! Remember that the chain lock might be shared. Chains 2486 * locked shared have stable parent linkages. 2487 */ 2488 hammer2_spin_ex(&parent->core.spin); 2489 repp = &parent->core.reptrack; 2490 while (*repp != &reptrack) 2491 repp = &(*repp)->next; 2492 *repp = reptrack.next; 2493 hammer2_spin_unex(&parent->core.spin); 2494 2495 hammer2_chain_drop(parent); /* reptrack ref */ 2496 *chainp = parent; /* return parent lock+ref */ 2497 2498 return parent; 2499 } 2500 2501 /* 2502 * Dispose of any linked reptrack structures in (chain) by shifting them to 2503 * (parent). Both (chain) and (parent) must be exclusively locked. 2504 * 2505 * This is interlocked against any children of (chain) on the other side. 2506 * No children so remain as-of when this is called so we can test 2507 * core.reptrack without holding the spin-lock. 2508 * 2509 * Used whenever the caller intends to permanently delete chains related 2510 * to topological recursions (BREF_TYPE_INDIRECT, BREF_TYPE_FREEMAP_NODE), 2511 * where the chains underneath the node being deleted are given a new parent 2512 * above the node being deleted. 2513 */ 2514 static 2515 void 2516 hammer2_chain_repchange(hammer2_chain_t *parent, hammer2_chain_t *chain) 2517 { 2518 struct hammer2_reptrack *reptrack; 2519 2520 KKASSERT(chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)); 2521 while (chain->core.reptrack) { 2522 hammer2_spin_ex(&parent->core.spin); 2523 hammer2_spin_ex(&chain->core.spin); 2524 reptrack = chain->core.reptrack; 2525 if (reptrack == NULL) { 2526 hammer2_spin_unex(&chain->core.spin); 2527 hammer2_spin_unex(&parent->core.spin); 2528 break; 2529 } 2530 hammer2_spin_ex(&reptrack->spin); 2531 chain->core.reptrack = reptrack->next; 2532 reptrack->chain = parent; 2533 reptrack->next = parent->core.reptrack; 2534 parent->core.reptrack = reptrack; 2535 hammer2_chain_ref(parent); /* reptrack */ 2536 2537 hammer2_spin_unex(&chain->core.spin); 2538 hammer2_spin_unex(&parent->core.spin); 2539 kprintf("hammer2: debug repchange %p %p->%p\n", 2540 reptrack, chain, parent); 2541 hammer2_chain_drop(chain); /* reptrack */ 2542 } 2543 } 2544 2545 /* 2546 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 2547 * (*parentp) typically points to an inode but can also point to a related 2548 * indirect block and this function will recurse upwards and find the inode 2549 * or the nearest undeleted indirect block covering the key range. 2550 * 2551 * This function unconditionally sets *errorp, replacing any previous value. 2552 * 2553 * (*parentp) must be exclusive or shared locked (depending on flags) and 2554 * referenced and can be an inode or an existing indirect block within the 2555 * inode. 2556 * 2557 * If (*parent) is errored out, this function will not attempt to recurse 2558 * the radix tree and will return NULL along with an appropriate *errorp. 2559 * If NULL is returned and *errorp is 0, the requested lookup could not be 2560 * located. 2561 * 2562 * On return (*parentp) will be modified to point at the deepest parent chain 2563 * element encountered during the search, as a helper for an insertion or 2564 * deletion. 2565 * 2566 * The new (*parentp) will be locked shared or exclusive (depending on flags), 2567 * and referenced, and the old will be unlocked and dereferenced (no change 2568 * if they are both the same). This is particularly important if the caller 2569 * wishes to insert a new chain, (*parentp) will be set properly even if NULL 2570 * is returned, as long as no error occurred. 2571 * 2572 * The matching chain will be returned locked according to flags. 2573 * 2574 * -- 2575 * 2576 * NULL is returned if no match was found, but (*parentp) will still 2577 * potentially be adjusted. 2578 * 2579 * On return (*key_nextp) will point to an iterative value for key_beg. 2580 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 2581 * 2582 * This function will also recurse up the chain if the key is not within the 2583 * current parent's range. (*parentp) can never be set to NULL. An iteration 2584 * can simply allow (*parentp) to float inside the loop. 2585 * 2586 * NOTE! chain->data is not always resolved. By default it will not be 2587 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 2588 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 2589 * BREF_TYPE_DATA as the device buffer can alias the logical file 2590 * buffer). 2591 */ 2592 2593 hammer2_chain_t * 2594 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 2595 hammer2_key_t key_beg, hammer2_key_t key_end, 2596 int *errorp, int flags) 2597 { 2598 hammer2_dev_t *hmp; 2599 hammer2_chain_t *parent; 2600 hammer2_chain_t *chain; 2601 hammer2_blockref_t *base; 2602 hammer2_blockref_t *bref; 2603 hammer2_blockref_t bcopy; 2604 hammer2_key_t scan_beg; 2605 hammer2_key_t scan_end; 2606 int count = 0; 2607 int how_always = HAMMER2_RESOLVE_ALWAYS; 2608 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2609 int how; 2610 int generation; 2611 int maxloops = 300000; 2612 volatile hammer2_mtx_t save_mtx; 2613 2614 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2615 how_maybe = how_always; 2616 how = HAMMER2_RESOLVE_ALWAYS; 2617 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2618 how = HAMMER2_RESOLVE_NEVER; 2619 } else { 2620 how = HAMMER2_RESOLVE_MAYBE; 2621 } 2622 if (flags & HAMMER2_LOOKUP_SHARED) { 2623 how_maybe |= HAMMER2_RESOLVE_SHARED; 2624 how_always |= HAMMER2_RESOLVE_SHARED; 2625 how |= HAMMER2_RESOLVE_SHARED; 2626 } 2627 2628 /* 2629 * Recurse (*parentp) upward if necessary until the parent completely 2630 * encloses the key range or we hit the inode. 2631 * 2632 * Handle races against the flusher deleting indirect nodes on its 2633 * way back up by continuing to recurse upward past the deletion. 2634 */ 2635 parent = *parentp; 2636 hmp = parent->hmp; 2637 *errorp = 0; 2638 2639 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2640 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2641 scan_beg = parent->bref.key; 2642 scan_end = scan_beg + 2643 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2644 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) { 2645 if (key_beg >= scan_beg && key_end <= scan_end) 2646 break; 2647 } 2648 parent = hammer2_chain_repparent(parentp, how_maybe); 2649 } 2650 again: 2651 if (--maxloops == 0) 2652 panic("hammer2_chain_lookup: maxloops"); 2653 /* 2654 * Locate the blockref array. Currently we do a fully associative 2655 * search through the array. 2656 */ 2657 switch(parent->bref.type) { 2658 case HAMMER2_BREF_TYPE_INODE: 2659 /* 2660 * Special shortcut for embedded data returns the inode 2661 * itself. Callers must detect this condition and access 2662 * the embedded data (the strategy code does this for us). 2663 * 2664 * This is only applicable to regular files and softlinks. 2665 * 2666 * We need a second lock on parent. Since we already have 2667 * a lock we must pass LOCKAGAIN to prevent unexpected 2668 * blocking (we don't want to block on a second shared 2669 * ref if an exclusive lock is pending) 2670 */ 2671 if (parent->data->ipdata.meta.op_flags & 2672 HAMMER2_OPFLAG_DIRECTDATA) { 2673 if (flags & HAMMER2_LOOKUP_NODIRECT) { 2674 chain = NULL; 2675 *key_nextp = key_end + 1; 2676 goto done; 2677 } 2678 hammer2_chain_ref(parent); 2679 hammer2_chain_lock(parent, how_always | 2680 HAMMER2_RESOLVE_LOCKAGAIN); 2681 *key_nextp = key_end + 1; 2682 return (parent); 2683 } 2684 base = &parent->data->ipdata.u.blockset.blockref[0]; 2685 count = HAMMER2_SET_COUNT; 2686 break; 2687 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2688 case HAMMER2_BREF_TYPE_INDIRECT: 2689 /* 2690 * Handle MATCHIND on the parent 2691 */ 2692 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2693 scan_beg = parent->bref.key; 2694 scan_end = scan_beg + 2695 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2696 if (key_beg == scan_beg && key_end == scan_end) { 2697 chain = parent; 2698 hammer2_chain_ref(chain); 2699 hammer2_chain_lock(chain, how_maybe); 2700 *key_nextp = scan_end + 1; 2701 goto done; 2702 } 2703 } 2704 2705 /* 2706 * Optimize indirect blocks in the INITIAL state to avoid 2707 * I/O. 2708 */ 2709 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2710 base = NULL; 2711 } else { 2712 if (parent->data == NULL) { 2713 kprintf("parent->data is NULL %p\n", parent); 2714 while (1) 2715 tsleep(parent, 0, "xxx", 0); 2716 } 2717 base = &parent->data->npdata[0]; 2718 } 2719 count = parent->bytes / sizeof(hammer2_blockref_t); 2720 break; 2721 case HAMMER2_BREF_TYPE_VOLUME: 2722 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2723 count = HAMMER2_SET_COUNT; 2724 break; 2725 case HAMMER2_BREF_TYPE_FREEMAP: 2726 base = &parent->data->blkset.blockref[0]; 2727 count = HAMMER2_SET_COUNT; 2728 break; 2729 default: 2730 kprintf("hammer2_chain_lookup: unrecognized " 2731 "blockref(B) type: %d", 2732 parent->bref.type); 2733 while (1) 2734 tsleep(&base, 0, "dead", 0); 2735 panic("hammer2_chain_lookup: unrecognized " 2736 "blockref(B) type: %d", 2737 parent->bref.type); 2738 base = NULL; /* safety */ 2739 count = 0; /* safety */ 2740 } 2741 2742 /* 2743 * No lookup is possible if the parent is errored. We delayed 2744 * this check as long as we could to ensure that the parent backup, 2745 * embedded data, and MATCHIND code could still execute. 2746 */ 2747 if (parent->error) { 2748 *errorp = parent->error; 2749 return NULL; 2750 } 2751 2752 /* 2753 * Merged scan to find next candidate. 2754 * 2755 * hammer2_base_*() functions require the parent->core.live_* fields 2756 * to be synchronized. 2757 * 2758 * We need to hold the spinlock to access the block array and RB tree 2759 * and to interlock chain creation. 2760 */ 2761 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2762 hammer2_chain_countbrefs(parent, base, count); 2763 2764 /* 2765 * Combined search 2766 */ 2767 hammer2_spin_ex(&parent->core.spin); 2768 chain = hammer2_combined_find(parent, base, count, 2769 key_nextp, 2770 key_beg, key_end, 2771 &bref); 2772 generation = parent->core.generation; 2773 2774 /* 2775 * Exhausted parent chain, iterate. 2776 */ 2777 if (bref == NULL) { 2778 KKASSERT(chain == NULL); 2779 hammer2_spin_unex(&parent->core.spin); 2780 if (key_beg == key_end) /* short cut single-key case */ 2781 return (NULL); 2782 2783 /* 2784 * Stop if we reached the end of the iteration. 2785 */ 2786 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2787 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2788 return (NULL); 2789 } 2790 2791 /* 2792 * Calculate next key, stop if we reached the end of the 2793 * iteration, otherwise go up one level and loop. 2794 */ 2795 key_beg = parent->bref.key + 2796 ((hammer2_key_t)1 << parent->bref.keybits); 2797 if (key_beg == 0 || key_beg > key_end) 2798 return (NULL); 2799 parent = hammer2_chain_repparent(parentp, how_maybe); 2800 goto again; 2801 } 2802 2803 /* 2804 * Selected from blockref or in-memory chain. 2805 */ 2806 bcopy = *bref; 2807 if (chain == NULL) { 2808 hammer2_spin_unex(&parent->core.spin); 2809 if (bcopy.type == HAMMER2_BREF_TYPE_INDIRECT || 2810 bcopy.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2811 chain = hammer2_chain_get(parent, generation, 2812 &bcopy, how_maybe); 2813 } else { 2814 chain = hammer2_chain_get(parent, generation, 2815 &bcopy, how); 2816 } 2817 if (chain == NULL) 2818 goto again; 2819 } else { 2820 hammer2_chain_ref(chain); 2821 hammer2_spin_unex(&parent->core.spin); 2822 2823 /* 2824 * chain is referenced but not locked. We must lock the 2825 * chain to obtain definitive state. 2826 */ 2827 if (bcopy.type == HAMMER2_BREF_TYPE_INDIRECT || 2828 bcopy.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2829 hammer2_chain_lock(chain, how_maybe); 2830 } else { 2831 hammer2_chain_lock(chain, how); 2832 } 2833 KKASSERT(chain->parent == parent); 2834 } 2835 if (bcmp(&bcopy, &chain->bref, sizeof(bcopy)) || 2836 chain->parent != parent) { 2837 hammer2_chain_unlock(chain); 2838 hammer2_chain_drop(chain); 2839 chain = NULL; /* SAFETY */ 2840 goto again; 2841 } 2842 2843 2844 /* 2845 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2846 * 2847 * NOTE: Chain's key range is not relevant as there might be 2848 * one-offs within the range that are not deleted. 2849 * 2850 * NOTE: Lookups can race delete-duplicate because 2851 * delete-duplicate does not lock the parent's core 2852 * (they just use the spinlock on the core). 2853 */ 2854 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2855 kprintf("skip deleted chain %016jx.%02x key=%016jx\n", 2856 chain->bref.data_off, chain->bref.type, 2857 chain->bref.key); 2858 hammer2_chain_unlock(chain); 2859 hammer2_chain_drop(chain); 2860 chain = NULL; /* SAFETY */ 2861 key_beg = *key_nextp; 2862 if (key_beg == 0 || key_beg > key_end) 2863 return(NULL); 2864 goto again; 2865 } 2866 2867 /* 2868 * If the chain element is an indirect block it becomes the new 2869 * parent and we loop on it. We must maintain our top-down locks 2870 * to prevent the flusher from interfering (i.e. doing a 2871 * delete-duplicate and leaving us recursing down a deleted chain). 2872 * 2873 * The parent always has to be locked with at least RESOLVE_MAYBE 2874 * so we can access its data. It might need a fixup if the caller 2875 * passed incompatible flags. Be careful not to cause a deadlock 2876 * as a data-load requires an exclusive lock. 2877 * 2878 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2879 * range is within the requested key range we return the indirect 2880 * block and do NOT loop. This is usually only used to acquire 2881 * freemap nodes. 2882 */ 2883 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2884 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2885 save_mtx = parent->lock; 2886 hammer2_chain_unlock(parent); 2887 hammer2_chain_drop(parent); 2888 *parentp = parent = chain; 2889 chain = NULL; /* SAFETY */ 2890 goto again; 2891 } 2892 done: 2893 /* 2894 * All done, return the locked chain. 2895 * 2896 * If the caller does not want a locked chain, replace the lock with 2897 * a ref. Perhaps this can eventually be optimized to not obtain the 2898 * lock in the first place for situations where the data does not 2899 * need to be resolved. 2900 * 2901 * NOTE! A chain->error must be tested by the caller upon return. 2902 * *errorp is only set based on issues which occur while 2903 * trying to reach the chain. 2904 */ 2905 return (chain); 2906 } 2907 2908 /* 2909 * After having issued a lookup we can iterate all matching keys. 2910 * 2911 * If chain is non-NULL we continue the iteration from just after it's index. 2912 * 2913 * If chain is NULL we assume the parent was exhausted and continue the 2914 * iteration at the next parent. 2915 * 2916 * If a fatal error occurs (typically an I/O error), a dummy chain is 2917 * returned with chain->error and error-identifying information set. This 2918 * chain will assert if you try to do anything fancy with it. 2919 * 2920 * XXX Depending on where the error occurs we should allow continued iteration. 2921 * 2922 * parent must be locked on entry and remains locked throughout. chain's 2923 * lock status must match flags. Chain is always at least referenced. 2924 * 2925 * WARNING! The MATCHIND flag does not apply to this function. 2926 */ 2927 hammer2_chain_t * 2928 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2929 hammer2_key_t *key_nextp, 2930 hammer2_key_t key_beg, hammer2_key_t key_end, 2931 int *errorp, int flags) 2932 { 2933 hammer2_chain_t *parent; 2934 int how_maybe; 2935 2936 /* 2937 * Calculate locking flags for upward recursion. 2938 */ 2939 how_maybe = HAMMER2_RESOLVE_MAYBE; 2940 if (flags & HAMMER2_LOOKUP_SHARED) 2941 how_maybe |= HAMMER2_RESOLVE_SHARED; 2942 2943 parent = *parentp; 2944 *errorp = 0; 2945 2946 /* 2947 * Calculate the next index and recalculate the parent if necessary. 2948 */ 2949 if (chain) { 2950 key_beg = chain->bref.key + 2951 ((hammer2_key_t)1 << chain->bref.keybits); 2952 hammer2_chain_unlock(chain); 2953 hammer2_chain_drop(chain); 2954 2955 /* 2956 * chain invalid past this point, but we can still do a 2957 * pointer comparison w/parent. 2958 * 2959 * Any scan where the lookup returned degenerate data embedded 2960 * in the inode has an invalid index and must terminate. 2961 */ 2962 if (chain == parent) 2963 return(NULL); 2964 if (key_beg == 0 || key_beg > key_end) 2965 return(NULL); 2966 chain = NULL; 2967 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2968 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2969 /* 2970 * We reached the end of the iteration. 2971 */ 2972 return (NULL); 2973 } else { 2974 /* 2975 * Continue iteration with next parent unless the current 2976 * parent covers the range. 2977 * 2978 * (This also handles the case of a deleted, empty indirect 2979 * node). 2980 */ 2981 key_beg = parent->bref.key + 2982 ((hammer2_key_t)1 << parent->bref.keybits); 2983 if (key_beg == 0 || key_beg > key_end) 2984 return (NULL); 2985 parent = hammer2_chain_repparent(parentp, how_maybe); 2986 } 2987 2988 /* 2989 * And execute 2990 */ 2991 return (hammer2_chain_lookup(parentp, key_nextp, 2992 key_beg, key_end, 2993 errorp, flags)); 2994 } 2995 2996 /* 2997 * Caller wishes to iterate chains under parent, loading new chains into 2998 * chainp. Caller must initialize *chainp to NULL and *firstp to 1, and 2999 * then call hammer2_chain_scan() repeatedly until a non-zero return. 3000 * During the scan, *firstp will be set to 0 and (*chainp) will be replaced 3001 * with the returned chain for the scan. The returned *chainp will be 3002 * locked and referenced. Any prior contents will be unlocked and dropped. 3003 * 3004 * Caller should check the return value. A normal scan EOF will return 3005 * exactly HAMMER2_ERROR_EOF. Any other non-zero value indicates an 3006 * error trying to access parent data. Any error in the returned chain 3007 * must be tested separately by the caller. 3008 * 3009 * (*chainp) is dropped on each scan, but will only be set if the returned 3010 * element itself can recurse. Leaf elements are NOT resolved, loaded, or 3011 * returned via *chainp. The caller will get their bref only. 3012 * 3013 * The raw scan function is similar to lookup/next but does not seek to a key. 3014 * Blockrefs are iterated via first_bref = (parent, NULL) and 3015 * next_chain = (parent, bref). 3016 * 3017 * The passed-in parent must be locked and its data resolved. The function 3018 * nominally returns a locked and referenced *chainp != NULL for chains 3019 * the caller might need to recurse on (and will dipose of any *chainp passed 3020 * in). The caller must check the chain->bref.type either way. 3021 */ 3022 int 3023 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 3024 hammer2_blockref_t *bref, int *firstp, 3025 int flags) 3026 { 3027 hammer2_dev_t *hmp; 3028 hammer2_blockref_t *base; 3029 hammer2_blockref_t *bref_ptr; 3030 hammer2_key_t key; 3031 hammer2_key_t next_key; 3032 hammer2_chain_t *chain = NULL; 3033 int count = 0; 3034 int how_always = HAMMER2_RESOLVE_ALWAYS; 3035 int how_maybe = HAMMER2_RESOLVE_MAYBE; 3036 int how; 3037 int generation; 3038 int maxloops = 300000; 3039 int error; 3040 3041 hmp = parent->hmp; 3042 error = 0; 3043 3044 /* 3045 * Scan flags borrowed from lookup. 3046 */ 3047 if (flags & HAMMER2_LOOKUP_ALWAYS) { 3048 how_maybe = how_always; 3049 how = HAMMER2_RESOLVE_ALWAYS; 3050 } else if (flags & HAMMER2_LOOKUP_NODATA) { 3051 how = HAMMER2_RESOLVE_NEVER; 3052 } else { 3053 how = HAMMER2_RESOLVE_MAYBE; 3054 } 3055 if (flags & HAMMER2_LOOKUP_SHARED) { 3056 how_maybe |= HAMMER2_RESOLVE_SHARED; 3057 how_always |= HAMMER2_RESOLVE_SHARED; 3058 how |= HAMMER2_RESOLVE_SHARED; 3059 } 3060 3061 /* 3062 * Calculate key to locate first/next element, unlocking the previous 3063 * element as we go. Be careful, the key calculation can overflow. 3064 * 3065 * (also reset bref to NULL) 3066 */ 3067 if (*firstp) { 3068 key = 0; 3069 *firstp = 0; 3070 } else { 3071 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 3072 if ((chain = *chainp) != NULL) { 3073 *chainp = NULL; 3074 hammer2_chain_unlock(chain); 3075 hammer2_chain_drop(chain); 3076 chain = NULL; 3077 } 3078 if (key == 0) { 3079 error |= HAMMER2_ERROR_EOF; 3080 goto done; 3081 } 3082 } 3083 3084 again: 3085 if (parent->error) { 3086 error = parent->error; 3087 goto done; 3088 } 3089 if (--maxloops == 0) 3090 panic("hammer2_chain_scan: maxloops"); 3091 3092 /* 3093 * Locate the blockref array. Currently we do a fully associative 3094 * search through the array. 3095 */ 3096 switch(parent->bref.type) { 3097 case HAMMER2_BREF_TYPE_INODE: 3098 /* 3099 * An inode with embedded data has no sub-chains. 3100 * 3101 * WARNING! Bulk scan code may pass a static chain marked 3102 * as BREF_TYPE_INODE with a copy of the volume 3103 * root blockset to snapshot the volume. 3104 */ 3105 if (parent->data->ipdata.meta.op_flags & 3106 HAMMER2_OPFLAG_DIRECTDATA) { 3107 error |= HAMMER2_ERROR_EOF; 3108 goto done; 3109 } 3110 base = &parent->data->ipdata.u.blockset.blockref[0]; 3111 count = HAMMER2_SET_COUNT; 3112 break; 3113 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3114 case HAMMER2_BREF_TYPE_INDIRECT: 3115 /* 3116 * Optimize indirect blocks in the INITIAL state to avoid 3117 * I/O. 3118 */ 3119 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 3120 base = NULL; 3121 } else { 3122 if (parent->data == NULL) 3123 panic("parent->data is NULL"); 3124 base = &parent->data->npdata[0]; 3125 } 3126 count = parent->bytes / sizeof(hammer2_blockref_t); 3127 break; 3128 case HAMMER2_BREF_TYPE_VOLUME: 3129 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3130 count = HAMMER2_SET_COUNT; 3131 break; 3132 case HAMMER2_BREF_TYPE_FREEMAP: 3133 base = &parent->data->blkset.blockref[0]; 3134 count = HAMMER2_SET_COUNT; 3135 break; 3136 default: 3137 panic("hammer2_chain_lookup: unrecognized blockref type: %d", 3138 parent->bref.type); 3139 base = NULL; /* safety */ 3140 count = 0; /* safety */ 3141 } 3142 3143 /* 3144 * Merged scan to find next candidate. 3145 * 3146 * hammer2_base_*() functions require the parent->core.live_* fields 3147 * to be synchronized. 3148 * 3149 * We need to hold the spinlock to access the block array and RB tree 3150 * and to interlock chain creation. 3151 */ 3152 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3153 hammer2_chain_countbrefs(parent, base, count); 3154 3155 next_key = 0; 3156 bref_ptr = NULL; 3157 hammer2_spin_ex(&parent->core.spin); 3158 chain = hammer2_combined_find(parent, base, count, 3159 &next_key, 3160 key, HAMMER2_KEY_MAX, 3161 &bref_ptr); 3162 generation = parent->core.generation; 3163 3164 /* 3165 * Exhausted parent chain, we're done. 3166 */ 3167 if (bref_ptr == NULL) { 3168 hammer2_spin_unex(&parent->core.spin); 3169 KKASSERT(chain == NULL); 3170 error |= HAMMER2_ERROR_EOF; 3171 goto done; 3172 } 3173 3174 /* 3175 * Copy into the supplied stack-based blockref. 3176 */ 3177 *bref = *bref_ptr; 3178 3179 /* 3180 * Selected from blockref or in-memory chain. 3181 */ 3182 if (chain == NULL) { 3183 switch(bref->type) { 3184 case HAMMER2_BREF_TYPE_INODE: 3185 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3186 case HAMMER2_BREF_TYPE_INDIRECT: 3187 case HAMMER2_BREF_TYPE_VOLUME: 3188 case HAMMER2_BREF_TYPE_FREEMAP: 3189 /* 3190 * Recursion, always get the chain 3191 */ 3192 hammer2_spin_unex(&parent->core.spin); 3193 chain = hammer2_chain_get(parent, generation, 3194 bref, how); 3195 if (chain == NULL) 3196 goto again; 3197 break; 3198 default: 3199 /* 3200 * No recursion, do not waste time instantiating 3201 * a chain, just iterate using the bref. 3202 */ 3203 hammer2_spin_unex(&parent->core.spin); 3204 break; 3205 } 3206 } else { 3207 /* 3208 * Recursion or not we need the chain in order to supply 3209 * the bref. 3210 */ 3211 hammer2_chain_ref(chain); 3212 hammer2_spin_unex(&parent->core.spin); 3213 hammer2_chain_lock(chain, how); 3214 } 3215 if (chain && 3216 (bcmp(bref, &chain->bref, sizeof(*bref)) || 3217 chain->parent != parent)) { 3218 hammer2_chain_unlock(chain); 3219 hammer2_chain_drop(chain); 3220 chain = NULL; 3221 goto again; 3222 } 3223 3224 /* 3225 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 3226 * 3227 * NOTE: chain's key range is not relevant as there might be 3228 * one-offs within the range that are not deleted. 3229 * 3230 * NOTE: XXX this could create problems with scans used in 3231 * situations other than mount-time recovery. 3232 * 3233 * NOTE: Lookups can race delete-duplicate because 3234 * delete-duplicate does not lock the parent's core 3235 * (they just use the spinlock on the core). 3236 */ 3237 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3238 hammer2_chain_unlock(chain); 3239 hammer2_chain_drop(chain); 3240 chain = NULL; 3241 3242 key = next_key; 3243 if (key == 0) { 3244 error |= HAMMER2_ERROR_EOF; 3245 goto done; 3246 } 3247 goto again; 3248 } 3249 3250 done: 3251 /* 3252 * All done, return the bref or NULL, supply chain if necessary. 3253 */ 3254 if (chain) 3255 *chainp = chain; 3256 return (error); 3257 } 3258 3259 /* 3260 * Create and return a new hammer2 system memory structure of the specified 3261 * key, type and size and insert it under (*parentp). This is a full 3262 * insertion, based on the supplied key/keybits, and may involve creating 3263 * indirect blocks and moving other chains around via delete/duplicate. 3264 * 3265 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 3266 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3267 * FULL. This typically means that the caller is creating the chain after 3268 * doing a hammer2_chain_lookup(). 3269 * 3270 * (*parentp) must be exclusive locked and may be replaced on return 3271 * depending on how much work the function had to do. 3272 * 3273 * (*parentp) must not be errored or this function will assert. 3274 * 3275 * (*chainp) usually starts out NULL and returns the newly created chain, 3276 * but if the caller desires the caller may allocate a disconnected chain 3277 * and pass it in instead. 3278 * 3279 * This function should NOT be used to insert INDIRECT blocks. It is 3280 * typically used to create/insert inodes and data blocks. 3281 * 3282 * Caller must pass-in an exclusively locked parent the new chain is to 3283 * be inserted under, and optionally pass-in a disconnected, exclusively 3284 * locked chain to insert (else we create a new chain). The function will 3285 * adjust (*parentp) as necessary, create or connect the chain, and 3286 * return an exclusively locked chain in *chainp. 3287 * 3288 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 3289 * and will be reassigned. 3290 * 3291 * NOTE: returns HAMMER_ERROR_* flags 3292 */ 3293 int 3294 hammer2_chain_create(hammer2_chain_t **parentp, hammer2_chain_t **chainp, 3295 hammer2_pfs_t *pmp, int methods, 3296 hammer2_key_t key, int keybits, int type, size_t bytes, 3297 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 3298 { 3299 hammer2_dev_t *hmp; 3300 hammer2_chain_t *chain; 3301 hammer2_chain_t *parent; 3302 hammer2_blockref_t *base; 3303 hammer2_blockref_t dummy; 3304 int allocated = 0; 3305 int error = 0; 3306 int count; 3307 int maxloops = 300000; 3308 3309 /* 3310 * Topology may be crossing a PFS boundary. 3311 */ 3312 parent = *parentp; 3313 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3314 KKASSERT(parent->error == 0); 3315 hmp = parent->hmp; 3316 chain = *chainp; 3317 3318 if (chain == NULL) { 3319 /* 3320 * First allocate media space and construct the dummy bref, 3321 * then allocate the in-memory chain structure. Set the 3322 * INITIAL flag for fresh chains which do not have embedded 3323 * data. 3324 * 3325 * XXX for now set the check mode of the child based on 3326 * the parent or, if the parent is an inode, the 3327 * specification in the inode. 3328 */ 3329 bzero(&dummy, sizeof(dummy)); 3330 dummy.type = type; 3331 dummy.key = key; 3332 dummy.keybits = keybits; 3333 dummy.data_off = hammer2_getradix(bytes); 3334 3335 /* 3336 * Inherit methods from parent by default. Primarily used 3337 * for BREF_TYPE_DATA. Non-data types *must* be set to 3338 * a non-NONE check algorithm. 3339 */ 3340 if (methods == -1) 3341 dummy.methods = parent->bref.methods; 3342 else 3343 dummy.methods = (uint8_t)methods; 3344 3345 if (type != HAMMER2_BREF_TYPE_DATA && 3346 HAMMER2_DEC_CHECK(dummy.methods) == HAMMER2_CHECK_NONE) { 3347 dummy.methods |= 3348 HAMMER2_ENC_CHECK(HAMMER2_CHECK_DEFAULT); 3349 } 3350 3351 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 3352 3353 /* 3354 * Lock the chain manually, chain_lock will load the chain 3355 * which we do NOT want to do. (note: chain->refs is set 3356 * to 1 by chain_alloc() for us, but lockcnt is not). 3357 */ 3358 chain->lockcnt = 1; 3359 hammer2_mtx_ex(&chain->lock); 3360 allocated = 1; 3361 ++curthread->td_tracker; 3362 3363 /* 3364 * Set INITIAL to optimize I/O. The flag will generally be 3365 * processed when we call hammer2_chain_modify(). 3366 * 3367 * Recalculate bytes to reflect the actual media block 3368 * allocation. Handle special case radix 0 == 0 bytes. 3369 */ 3370 bytes = (size_t)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX); 3371 if (bytes) 3372 bytes = (hammer2_off_t)1 << bytes; 3373 chain->bytes = bytes; 3374 3375 switch(type) { 3376 case HAMMER2_BREF_TYPE_VOLUME: 3377 case HAMMER2_BREF_TYPE_FREEMAP: 3378 panic("hammer2_chain_create: called with volume type"); 3379 break; 3380 case HAMMER2_BREF_TYPE_INDIRECT: 3381 panic("hammer2_chain_create: cannot be used to" 3382 "create indirect block"); 3383 break; 3384 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3385 panic("hammer2_chain_create: cannot be used to" 3386 "create freemap root or node"); 3387 break; 3388 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3389 KKASSERT(bytes == sizeof(chain->data->bmdata)); 3390 /* fall through */ 3391 case HAMMER2_BREF_TYPE_DIRENT: 3392 case HAMMER2_BREF_TYPE_INODE: 3393 case HAMMER2_BREF_TYPE_DATA: 3394 default: 3395 /* 3396 * leave chain->data NULL, set INITIAL 3397 */ 3398 KKASSERT(chain->data == NULL); 3399 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 3400 break; 3401 } 3402 } else { 3403 /* 3404 * We are reattaching a previously deleted chain, possibly 3405 * under a new parent and possibly with a new key/keybits. 3406 * The chain does not have to be in a modified state. The 3407 * UPDATE flag will be set later on in this routine. 3408 * 3409 * Do NOT mess with the current state of the INITIAL flag. 3410 */ 3411 chain->bref.key = key; 3412 chain->bref.keybits = keybits; 3413 if (chain->flags & HAMMER2_CHAIN_DELETED) 3414 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3415 KKASSERT(chain->parent == NULL); 3416 } 3417 3418 /* 3419 * Set the appropriate bref flag if requested. 3420 * 3421 * NOTE! Callers can call this function to move chains without 3422 * knowing about special flags, so don't clear bref flags 3423 * here! 3424 */ 3425 if (flags & HAMMER2_INSERT_PFSROOT) 3426 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 3427 3428 /* 3429 * Calculate how many entries we have in the blockref array and 3430 * determine if an indirect block is required. 3431 */ 3432 again: 3433 if (--maxloops == 0) 3434 panic("hammer2_chain_create: maxloops"); 3435 3436 switch(parent->bref.type) { 3437 case HAMMER2_BREF_TYPE_INODE: 3438 if ((parent->data->ipdata.meta.op_flags & 3439 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 3440 kprintf("hammer2: parent set for direct-data! " 3441 "pkey=%016jx ckey=%016jx\n", 3442 parent->bref.key, 3443 chain->bref.key); 3444 } 3445 KKASSERT((parent->data->ipdata.meta.op_flags & 3446 HAMMER2_OPFLAG_DIRECTDATA) == 0); 3447 KKASSERT(parent->data != NULL); 3448 base = &parent->data->ipdata.u.blockset.blockref[0]; 3449 count = HAMMER2_SET_COUNT; 3450 break; 3451 case HAMMER2_BREF_TYPE_INDIRECT: 3452 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3453 if (parent->flags & HAMMER2_CHAIN_INITIAL) 3454 base = NULL; 3455 else 3456 base = &parent->data->npdata[0]; 3457 count = parent->bytes / sizeof(hammer2_blockref_t); 3458 break; 3459 case HAMMER2_BREF_TYPE_VOLUME: 3460 KKASSERT(parent->data != NULL); 3461 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3462 count = HAMMER2_SET_COUNT; 3463 break; 3464 case HAMMER2_BREF_TYPE_FREEMAP: 3465 KKASSERT(parent->data != NULL); 3466 base = &parent->data->blkset.blockref[0]; 3467 count = HAMMER2_SET_COUNT; 3468 break; 3469 default: 3470 panic("hammer2_chain_create: unrecognized blockref type: %d", 3471 parent->bref.type); 3472 base = NULL; 3473 count = 0; 3474 break; 3475 } 3476 3477 /* 3478 * Make sure we've counted the brefs 3479 */ 3480 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3481 hammer2_chain_countbrefs(parent, base, count); 3482 3483 KASSERT(parent->core.live_count >= 0 && 3484 parent->core.live_count <= count, 3485 ("bad live_count %d/%d (%02x, %d)", 3486 parent->core.live_count, count, 3487 parent->bref.type, parent->bytes)); 3488 3489 /* 3490 * If no free blockref could be found we must create an indirect 3491 * block and move a number of blockrefs into it. With the parent 3492 * locked we can safely lock each child in order to delete+duplicate 3493 * it without causing a deadlock. 3494 * 3495 * This may return the new indirect block or the old parent depending 3496 * on where the key falls. NULL is returned on error. 3497 */ 3498 if (parent->core.live_count == count) { 3499 hammer2_chain_t *nparent; 3500 3501 KKASSERT((flags & HAMMER2_INSERT_SAMEPARENT) == 0); 3502 3503 nparent = hammer2_chain_create_indirect(parent, key, keybits, 3504 mtid, type, &error); 3505 if (nparent == NULL) { 3506 if (allocated) 3507 hammer2_chain_drop(chain); 3508 chain = NULL; 3509 goto done; 3510 } 3511 if (parent != nparent) { 3512 hammer2_chain_unlock(parent); 3513 hammer2_chain_drop(parent); 3514 parent = *parentp = nparent; 3515 } 3516 goto again; 3517 } 3518 3519 if (chain->flags & HAMMER2_CHAIN_DELETED) 3520 kprintf("Inserting deleted chain @%016jx\n", 3521 chain->bref.key); 3522 3523 /* 3524 * Link the chain into its parent. 3525 */ 3526 if (chain->parent != NULL) 3527 panic("hammer2: hammer2_chain_create: chain already connected"); 3528 KKASSERT(chain->parent == NULL); 3529 KKASSERT(parent->core.live_count < count); 3530 hammer2_chain_insert(parent, chain, 3531 HAMMER2_CHAIN_INSERT_SPIN | 3532 HAMMER2_CHAIN_INSERT_LIVE, 3533 0); 3534 3535 if (allocated) { 3536 /* 3537 * Mark the newly created chain modified. This will cause 3538 * UPDATE to be set and process the INITIAL flag. 3539 * 3540 * Device buffers are not instantiated for DATA elements 3541 * as these are handled by logical buffers. 3542 * 3543 * Indirect and freemap node indirect blocks are handled 3544 * by hammer2_chain_create_indirect() and not by this 3545 * function. 3546 * 3547 * Data for all other bref types is expected to be 3548 * instantiated (INODE, LEAF). 3549 */ 3550 switch(chain->bref.type) { 3551 case HAMMER2_BREF_TYPE_DATA: 3552 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3553 case HAMMER2_BREF_TYPE_DIRENT: 3554 case HAMMER2_BREF_TYPE_INODE: 3555 error = hammer2_chain_modify(chain, mtid, dedup_off, 3556 HAMMER2_MODIFY_OPTDATA); 3557 break; 3558 default: 3559 /* 3560 * Remaining types are not supported by this function. 3561 * In particular, INDIRECT and LEAF_NODE types are 3562 * handled by create_indirect(). 3563 */ 3564 panic("hammer2_chain_create: bad type: %d", 3565 chain->bref.type); 3566 /* NOT REACHED */ 3567 break; 3568 } 3569 } else { 3570 /* 3571 * When reconnecting a chain we must set UPDATE and 3572 * setflush so the flush recognizes that it must update 3573 * the bref in the parent. 3574 */ 3575 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 3576 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3577 } 3578 3579 /* 3580 * We must setflush(parent) to ensure that it recurses through to 3581 * chain. setflush(chain) might not work because ONFLUSH is possibly 3582 * already set in the chain (so it won't recurse up to set it in the 3583 * parent). 3584 */ 3585 hammer2_chain_setflush(parent); 3586 3587 done: 3588 *chainp = chain; 3589 3590 return (error); 3591 } 3592 3593 /* 3594 * Move the chain from its old parent to a new parent. The chain must have 3595 * already been deleted or already disconnected (or never associated) with 3596 * a parent. The chain is reassociated with the new parent and the deleted 3597 * flag will be cleared (no longer deleted). The chain's modification state 3598 * is not altered. 3599 * 3600 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 3601 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3602 * FULL. This typically means that the caller is creating the chain after 3603 * doing a hammer2_chain_lookup(). 3604 * 3605 * Neither (parent) or (chain) can be errored. 3606 * 3607 * If (parent) is non-NULL then the chain is inserted under the parent. 3608 * 3609 * If (parent) is NULL then the newly duplicated chain is not inserted 3610 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 3611 * passing into hammer2_chain_create() after this function returns). 3612 * 3613 * WARNING! This function calls create which means it can insert indirect 3614 * blocks. This can cause other unrelated chains in the parent to 3615 * be moved to a newly inserted indirect block in addition to the 3616 * specific chain. 3617 */ 3618 void 3619 hammer2_chain_rename(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3620 hammer2_tid_t mtid, int flags) 3621 { 3622 hammer2_blockref_t *bref; 3623 hammer2_dev_t *hmp; 3624 hammer2_chain_t *parent; 3625 size_t bytes; 3626 3627 /* 3628 * WARNING! We should never resolve DATA to device buffers 3629 * (XXX allow it if the caller did?), and since 3630 * we currently do not have the logical buffer cache 3631 * buffer in-hand to fix its cached physical offset 3632 * we also force the modify code to not COW it. XXX 3633 */ 3634 hmp = chain->hmp; 3635 KKASSERT(chain->parent == NULL); 3636 KKASSERT(chain->error == 0); 3637 3638 /* 3639 * Now create a duplicate of the chain structure, associating 3640 * it with the same core, making it the same size, pointing it 3641 * to the same bref (the same media block). 3642 * 3643 * NOTE: Handle special radix == 0 case (means 0 bytes). 3644 */ 3645 bref = &chain->bref; 3646 bytes = (size_t)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 3647 if (bytes) 3648 bytes = (hammer2_off_t)1 << bytes; 3649 3650 /* 3651 * If parent is not NULL the duplicated chain will be entered under 3652 * the parent and the UPDATE bit set to tell flush to update 3653 * the blockref. 3654 * 3655 * We must setflush(parent) to ensure that it recurses through to 3656 * chain. setflush(chain) might not work because ONFLUSH is possibly 3657 * already set in the chain (so it won't recurse up to set it in the 3658 * parent). 3659 * 3660 * Having both chains locked is extremely important for atomicy. 3661 */ 3662 if (parentp && (parent = *parentp) != NULL) { 3663 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3664 KKASSERT(parent->refs > 0); 3665 KKASSERT(parent->error == 0); 3666 3667 hammer2_chain_create(parentp, &chain, 3668 chain->pmp, HAMMER2_METH_DEFAULT, 3669 bref->key, bref->keybits, bref->type, 3670 chain->bytes, mtid, 0, flags); 3671 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 3672 hammer2_chain_setflush(*parentp); 3673 } 3674 } 3675 3676 /* 3677 * Helper function for deleting chains. 3678 * 3679 * The chain is removed from the live view (the RBTREE) as well as the parent's 3680 * blockmap. Both chain and its parent must be locked. 3681 * 3682 * parent may not be errored. chain can be errored. 3683 */ 3684 static int 3685 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 3686 hammer2_tid_t mtid, int flags) 3687 { 3688 hammer2_dev_t *hmp; 3689 int error = 0; 3690 3691 KKASSERT((chain->flags & (HAMMER2_CHAIN_DELETED | 3692 HAMMER2_CHAIN_FICTITIOUS)) == 0); 3693 KKASSERT(chain->parent == parent); 3694 hmp = chain->hmp; 3695 3696 if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 3697 /* 3698 * Chain is blockmapped, so there must be a parent. 3699 * Atomically remove the chain from the parent and remove 3700 * the blockmap entry. The parent must be set modified 3701 * to remove the blockmap entry. 3702 */ 3703 hammer2_blockref_t *base; 3704 int count; 3705 3706 KKASSERT(parent != NULL); 3707 KKASSERT(parent->error == 0); 3708 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3709 error = hammer2_chain_modify(parent, mtid, 0, 0); 3710 if (error) 3711 goto done; 3712 3713 /* 3714 * Calculate blockmap pointer 3715 */ 3716 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 3717 hammer2_spin_ex(&chain->core.spin); 3718 hammer2_spin_ex(&parent->core.spin); 3719 3720 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3721 atomic_add_int(&parent->core.live_count, -1); 3722 ++parent->core.generation; 3723 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3724 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3725 --parent->core.chain_count; 3726 chain->parent = NULL; 3727 3728 switch(parent->bref.type) { 3729 case HAMMER2_BREF_TYPE_INODE: 3730 /* 3731 * Access the inode's block array. However, there 3732 * is no block array if the inode is flagged 3733 * DIRECTDATA. 3734 */ 3735 if (parent->data && 3736 (parent->data->ipdata.meta.op_flags & 3737 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 3738 base = 3739 &parent->data->ipdata.u.blockset.blockref[0]; 3740 } else { 3741 base = NULL; 3742 } 3743 count = HAMMER2_SET_COUNT; 3744 break; 3745 case HAMMER2_BREF_TYPE_INDIRECT: 3746 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3747 if (parent->data) 3748 base = &parent->data->npdata[0]; 3749 else 3750 base = NULL; 3751 count = parent->bytes / sizeof(hammer2_blockref_t); 3752 break; 3753 case HAMMER2_BREF_TYPE_VOLUME: 3754 base = &parent->data->voldata. 3755 sroot_blockset.blockref[0]; 3756 count = HAMMER2_SET_COUNT; 3757 break; 3758 case HAMMER2_BREF_TYPE_FREEMAP: 3759 base = &parent->data->blkset.blockref[0]; 3760 count = HAMMER2_SET_COUNT; 3761 break; 3762 default: 3763 base = NULL; 3764 count = 0; 3765 panic("hammer2_flush_pass2: " 3766 "unrecognized blockref type: %d", 3767 parent->bref.type); 3768 } 3769 3770 /* 3771 * delete blockmapped chain from its parent. 3772 * 3773 * The parent is not affected by any statistics in chain 3774 * which are pending synchronization. That is, there is 3775 * nothing to undo in the parent since they have not yet 3776 * been incorporated into the parent. 3777 * 3778 * The parent is affected by statistics stored in inodes. 3779 * Those have already been synchronized, so they must be 3780 * undone. XXX split update possible w/delete in middle? 3781 */ 3782 if (base) { 3783 hammer2_base_delete(parent, base, count, chain); 3784 } 3785 hammer2_spin_unex(&parent->core.spin); 3786 hammer2_spin_unex(&chain->core.spin); 3787 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 3788 /* 3789 * Chain is not blockmapped but a parent is present. 3790 * Atomically remove the chain from the parent. There is 3791 * no blockmap entry to remove. 3792 * 3793 * Because chain was associated with a parent but not 3794 * synchronized, the chain's *_count_up fields contain 3795 * inode adjustment statistics which must be undone. 3796 */ 3797 hammer2_spin_ex(&chain->core.spin); 3798 hammer2_spin_ex(&parent->core.spin); 3799 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3800 atomic_add_int(&parent->core.live_count, -1); 3801 ++parent->core.generation; 3802 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3803 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3804 --parent->core.chain_count; 3805 chain->parent = NULL; 3806 hammer2_spin_unex(&parent->core.spin); 3807 hammer2_spin_unex(&chain->core.spin); 3808 } else { 3809 /* 3810 * Chain is not blockmapped and has no parent. This 3811 * is a degenerate case. 3812 */ 3813 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3814 } 3815 done: 3816 return error; 3817 } 3818 3819 /* 3820 * Create an indirect block that covers one or more of the elements in the 3821 * current parent. Either returns the existing parent with no locking or 3822 * ref changes or returns the new indirect block locked and referenced 3823 * and leaving the original parent lock/ref intact as well. 3824 * 3825 * If an error occurs, NULL is returned and *errorp is set to the H2 error. 3826 * 3827 * The returned chain depends on where the specified key falls. 3828 * 3829 * The key/keybits for the indirect mode only needs to follow three rules: 3830 * 3831 * (1) That all elements underneath it fit within its key space and 3832 * 3833 * (2) That all elements outside it are outside its key space. 3834 * 3835 * (3) When creating the new indirect block any elements in the current 3836 * parent that fit within the new indirect block's keyspace must be 3837 * moved into the new indirect block. 3838 * 3839 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3840 * keyspace the the current parent, but lookup/iteration rules will 3841 * ensure (and must ensure) that rule (2) for all parents leading up 3842 * to the nearest inode or the root volume header is adhered to. This 3843 * is accomplished by always recursing through matching keyspaces in 3844 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3845 * 3846 * The current implementation calculates the current worst-case keyspace by 3847 * iterating the current parent and then divides it into two halves, choosing 3848 * whichever half has the most elements (not necessarily the half containing 3849 * the requested key). 3850 * 3851 * We can also opt to use the half with the least number of elements. This 3852 * causes lower-numbered keys (aka logical file offsets) to recurse through 3853 * fewer indirect blocks and higher-numbered keys to recurse through more. 3854 * This also has the risk of not moving enough elements to the new indirect 3855 * block and being forced to create several indirect blocks before the element 3856 * can be inserted. 3857 * 3858 * Must be called with an exclusively locked parent. 3859 * 3860 * NOTE: *errorp set to HAMMER_ERROR_* flags 3861 */ 3862 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3863 hammer2_key_t *keyp, int keybits, 3864 hammer2_blockref_t *base, int count); 3865 static int hammer2_chain_indkey_file(hammer2_chain_t *parent, 3866 hammer2_key_t *keyp, int keybits, 3867 hammer2_blockref_t *base, int count, 3868 int ncount); 3869 static int hammer2_chain_indkey_dir(hammer2_chain_t *parent, 3870 hammer2_key_t *keyp, int keybits, 3871 hammer2_blockref_t *base, int count, 3872 int ncount); 3873 static 3874 hammer2_chain_t * 3875 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3876 hammer2_key_t create_key, int create_bits, 3877 hammer2_tid_t mtid, int for_type, int *errorp) 3878 { 3879 hammer2_dev_t *hmp; 3880 hammer2_blockref_t *base; 3881 hammer2_blockref_t *bref; 3882 hammer2_blockref_t bcopy; 3883 hammer2_chain_t *chain; 3884 hammer2_chain_t *ichain; 3885 hammer2_chain_t dummy; 3886 hammer2_key_t key = create_key; 3887 hammer2_key_t key_beg; 3888 hammer2_key_t key_end; 3889 hammer2_key_t key_next; 3890 int keybits = create_bits; 3891 int count; 3892 int ncount; 3893 int nbytes; 3894 int loops; 3895 int error; 3896 int reason; 3897 int generation; 3898 int maxloops = 300000; 3899 3900 /* 3901 * Calculate the base blockref pointer or NULL if the chain 3902 * is known to be empty. We need to calculate the array count 3903 * for RB lookups either way. 3904 */ 3905 hmp = parent->hmp; 3906 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3907 3908 /* 3909 * Pre-modify the parent now to avoid having to deal with error 3910 * processing if we tried to later (in the middle of our loop). 3911 */ 3912 *errorp = hammer2_chain_modify(parent, mtid, 0, 0); 3913 if (*errorp) { 3914 kprintf("hammer2_create_indirect: error %08x %s\n", 3915 *errorp, hammer2_error_str(*errorp)); 3916 return NULL; 3917 } 3918 3919 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 3920 base = hammer2_chain_base_and_count(parent, &count); 3921 3922 /* 3923 * dummy used in later chain allocation (no longer used for lookups). 3924 */ 3925 bzero(&dummy, sizeof(dummy)); 3926 3927 /* 3928 * How big should our new indirect block be? It has to be at least 3929 * as large as its parent for splits to work properly. 3930 * 3931 * The freemap uses a specific indirect block size. The number of 3932 * levels are built dynamically and ultimately depend on the size 3933 * volume. Because freemap blocks are taken from the reserved areas 3934 * of the volume our goal is efficiency (fewer levels) and not so 3935 * much to save disk space. 3936 * 3937 * The first indirect block level for a directory usually uses 3938 * HAMMER2_IND_BYTES_MIN (4KB = 32 directory entries). Due to 3939 * the hash mechanism, this typically gives us a nominal 3940 * 32 * 4 entries with one level of indirection. 3941 * 3942 * We use HAMMER2_IND_BYTES_NOM (16KB = 128 blockrefs) for FILE 3943 * indirect blocks. The initial 4 entries in the inode gives us 3944 * 256KB. Up to 4 indirect blocks gives us 32MB. Three levels 3945 * of indirection gives us 137GB, and so forth. H2 can support 3946 * huge file sizes but they are not typical, so we try to stick 3947 * with compactness and do not use a larger indirect block size. 3948 * 3949 * We could use 64KB (PBUFSIZE), giving us 512 blockrefs, but 3950 * due to the way indirect blocks are created this usually winds 3951 * up being extremely inefficient for small files. Even though 3952 * 16KB requires more levels of indirection for very large files, 3953 * the 16KB records can be ganged together into 64KB DIOs. 3954 */ 3955 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3956 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3957 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 3958 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 3959 if (parent->data->ipdata.meta.type == 3960 HAMMER2_OBJTYPE_DIRECTORY) 3961 nbytes = HAMMER2_IND_BYTES_MIN; /* 4KB = 32 entries */ 3962 else 3963 nbytes = HAMMER2_IND_BYTES_NOM; /* 16KB = ~8MB file */ 3964 3965 } else { 3966 nbytes = HAMMER2_IND_BYTES_NOM; 3967 } 3968 if (nbytes < count * sizeof(hammer2_blockref_t)) { 3969 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 3970 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 3971 nbytes = count * sizeof(hammer2_blockref_t); 3972 } 3973 ncount = nbytes / sizeof(hammer2_blockref_t); 3974 3975 /* 3976 * When creating an indirect block for a freemap node or leaf 3977 * the key/keybits must be fitted to static radix levels because 3978 * particular radix levels use particular reserved blocks in the 3979 * related zone. 3980 * 3981 * This routine calculates the key/radix of the indirect block 3982 * we need to create, and whether it is on the high-side or the 3983 * low-side. 3984 */ 3985 switch(for_type) { 3986 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3987 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3988 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 3989 base, count); 3990 break; 3991 case HAMMER2_BREF_TYPE_DATA: 3992 keybits = hammer2_chain_indkey_file(parent, &key, keybits, 3993 base, count, ncount); 3994 break; 3995 case HAMMER2_BREF_TYPE_DIRENT: 3996 case HAMMER2_BREF_TYPE_INODE: 3997 keybits = hammer2_chain_indkey_dir(parent, &key, keybits, 3998 base, count, ncount); 3999 break; 4000 default: 4001 panic("illegal indirect block for bref type %d", for_type); 4002 break; 4003 } 4004 4005 /* 4006 * Normalize the key for the radix being represented, keeping the 4007 * high bits and throwing away the low bits. 4008 */ 4009 key &= ~(((hammer2_key_t)1 << keybits) - 1); 4010 4011 /* 4012 * Ok, create our new indirect block 4013 */ 4014 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 4015 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 4016 dummy.bref.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 4017 } else { 4018 dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT; 4019 } 4020 dummy.bref.key = key; 4021 dummy.bref.keybits = keybits; 4022 dummy.bref.data_off = hammer2_getradix(nbytes); 4023 dummy.bref.methods = 4024 HAMMER2_ENC_CHECK(HAMMER2_DEC_CHECK(parent->bref.methods)) | 4025 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 4026 4027 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy.bref); 4028 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 4029 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 4030 /* ichain has one ref at this point */ 4031 4032 /* 4033 * We have to mark it modified to allocate its block, but use 4034 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 4035 * it won't be acted upon by the flush code. 4036 */ 4037 *errorp = hammer2_chain_modify(ichain, mtid, 0, HAMMER2_MODIFY_OPTDATA); 4038 if (*errorp) { 4039 kprintf("hammer2_alloc_indirect: error %08x %s\n", 4040 *errorp, hammer2_error_str(*errorp)); 4041 hammer2_chain_unlock(ichain); 4042 hammer2_chain_drop(ichain); 4043 return NULL; 4044 } 4045 4046 /* 4047 * Iterate the original parent and move the matching brefs into 4048 * the new indirect block. 4049 * 4050 * XXX handle flushes. 4051 */ 4052 key_beg = 0; 4053 key_end = HAMMER2_KEY_MAX; 4054 key_next = 0; /* avoid gcc warnings */ 4055 hammer2_spin_ex(&parent->core.spin); 4056 loops = 0; 4057 reason = 0; 4058 4059 for (;;) { 4060 /* 4061 * Parent may have been modified, relocating its block array. 4062 * Reload the base pointer. 4063 */ 4064 base = hammer2_chain_base_and_count(parent, &count); 4065 4066 if (++loops > 100000) { 4067 hammer2_spin_unex(&parent->core.spin); 4068 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 4069 reason, parent, base, count, key_next); 4070 } 4071 4072 /* 4073 * NOTE: spinlock stays intact, returned chain (if not NULL) 4074 * is not referenced or locked which means that we 4075 * cannot safely check its flagged / deletion status 4076 * until we lock it. 4077 */ 4078 chain = hammer2_combined_find(parent, base, count, 4079 &key_next, 4080 key_beg, key_end, 4081 &bref); 4082 generation = parent->core.generation; 4083 if (bref == NULL) 4084 break; 4085 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4086 4087 /* 4088 * Skip keys that are not within the key/radix of the new 4089 * indirect block. They stay in the parent. 4090 */ 4091 if ((~(((hammer2_key_t)1 << keybits) - 1) & 4092 (key ^ bref->key)) != 0) { 4093 goto next_key_spinlocked; 4094 } 4095 4096 /* 4097 * Load the new indirect block by acquiring the related 4098 * chains (potentially from media as it might not be 4099 * in-memory). Then move it to the new parent (ichain). 4100 * 4101 * chain is referenced but not locked. We must lock the 4102 * chain to obtain definitive state. 4103 */ 4104 bcopy = *bref; 4105 if (chain) { 4106 /* 4107 * Use chain already present in the RBTREE 4108 */ 4109 hammer2_chain_ref(chain); 4110 hammer2_spin_unex(&parent->core.spin); 4111 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 4112 } else { 4113 /* 4114 * Get chain for blockref element. _get returns NULL 4115 * on insertion race. 4116 */ 4117 hammer2_spin_unex(&parent->core.spin); 4118 chain = hammer2_chain_get(parent, generation, &bcopy, 4119 HAMMER2_RESOLVE_NEVER); 4120 if (chain == NULL) { 4121 reason = 1; 4122 hammer2_spin_ex(&parent->core.spin); 4123 continue; 4124 } 4125 } 4126 4127 /* 4128 * This is always live so if the chain has been deleted 4129 * we raced someone and we have to retry. 4130 * 4131 * NOTE: Lookups can race delete-duplicate because 4132 * delete-duplicate does not lock the parent's core 4133 * (they just use the spinlock on the core). 4134 * 4135 * (note reversed logic for this one) 4136 */ 4137 if (bcmp(&bcopy, &chain->bref, sizeof(bcopy)) || 4138 chain->parent != parent || 4139 (chain->flags & HAMMER2_CHAIN_DELETED)) { 4140 hammer2_chain_unlock(chain); 4141 hammer2_chain_drop(chain); 4142 if (hammer2_debug & 0x0040) { 4143 kprintf("LOST PARENT RETRY " 4144 "RETRY (%p,%p)->%p %08x\n", 4145 parent, chain->parent, chain, chain->flags); 4146 } 4147 hammer2_spin_ex(&parent->core.spin); 4148 continue; 4149 } 4150 4151 /* 4152 * Shift the chain to the indirect block. 4153 * 4154 * WARNING! No reason for us to load chain data, pass NOSTATS 4155 * to prevent delete/insert from trying to access 4156 * inode stats (and thus asserting if there is no 4157 * chain->data loaded). 4158 * 4159 * WARNING! The (parent, chain) deletion may modify the parent 4160 * and invalidate the base pointer. 4161 * 4162 * WARNING! Parent must already be marked modified, so we 4163 * can assume that chain_delete always suceeds. 4164 * 4165 * WARNING! hammer2_chain_repchange() does not have to be 4166 * called (and doesn't work anyway because we are 4167 * only doing a partial shift). A recursion that is 4168 * in-progress can continue at the current parent 4169 * and will be able to properly find its next key. 4170 */ 4171 error = hammer2_chain_delete(parent, chain, mtid, 0); 4172 KKASSERT(error == 0); 4173 hammer2_chain_rename(&ichain, chain, mtid, 0); 4174 hammer2_chain_unlock(chain); 4175 hammer2_chain_drop(chain); 4176 KKASSERT(parent->refs > 0); 4177 chain = NULL; 4178 base = NULL; /* safety */ 4179 hammer2_spin_ex(&parent->core.spin); 4180 next_key_spinlocked: 4181 if (--maxloops == 0) 4182 panic("hammer2_chain_create_indirect: maxloops"); 4183 reason = 4; 4184 if (key_next == 0 || key_next > key_end) 4185 break; 4186 key_beg = key_next; 4187 /* loop */ 4188 } 4189 hammer2_spin_unex(&parent->core.spin); 4190 4191 /* 4192 * Insert the new indirect block into the parent now that we've 4193 * cleared out some entries in the parent. We calculated a good 4194 * insertion index in the loop above (ichain->index). 4195 * 4196 * We don't have to set UPDATE here because we mark ichain 4197 * modified down below (so the normal modified -> flush -> set-moved 4198 * sequence applies). 4199 * 4200 * The insertion shouldn't race as this is a completely new block 4201 * and the parent is locked. 4202 */ 4203 base = NULL; /* safety, parent modify may change address */ 4204 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 4205 KKASSERT(parent->core.live_count < count); 4206 hammer2_chain_insert(parent, ichain, 4207 HAMMER2_CHAIN_INSERT_SPIN | 4208 HAMMER2_CHAIN_INSERT_LIVE, 4209 0); 4210 4211 /* 4212 * Make sure flushes propogate after our manual insertion. 4213 */ 4214 hammer2_chain_setflush(ichain); 4215 hammer2_chain_setflush(parent); 4216 4217 /* 4218 * Figure out what to return. 4219 */ 4220 if (~(((hammer2_key_t)1 << keybits) - 1) & 4221 (create_key ^ key)) { 4222 /* 4223 * Key being created is outside the key range, 4224 * return the original parent. 4225 */ 4226 hammer2_chain_unlock(ichain); 4227 hammer2_chain_drop(ichain); 4228 } else { 4229 /* 4230 * Otherwise its in the range, return the new parent. 4231 * (leave both the new and old parent locked). 4232 */ 4233 parent = ichain; 4234 } 4235 4236 return(parent); 4237 } 4238 4239 /* 4240 * Do maintenance on an indirect chain. Both parent and chain are locked. 4241 * 4242 * Returns non-zero if (chain) is deleted, either due to being empty or 4243 * because its children were safely moved into the parent. 4244 */ 4245 int 4246 hammer2_chain_indirect_maintenance(hammer2_chain_t *parent, 4247 hammer2_chain_t *chain) 4248 { 4249 hammer2_blockref_t *chain_base; 4250 hammer2_blockref_t *base; 4251 hammer2_blockref_t *bref; 4252 hammer2_blockref_t bcopy; 4253 hammer2_key_t key_next; 4254 hammer2_key_t key_beg; 4255 hammer2_key_t key_end; 4256 hammer2_chain_t *sub; 4257 int chain_count; 4258 int count; 4259 int error; 4260 int generation; 4261 4262 /* 4263 * Make sure we have an accurate live_count 4264 */ 4265 if ((chain->flags & (HAMMER2_CHAIN_INITIAL | 4266 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4267 base = &chain->data->npdata[0]; 4268 count = chain->bytes / sizeof(hammer2_blockref_t); 4269 hammer2_chain_countbrefs(chain, base, count); 4270 } 4271 4272 /* 4273 * If the indirect block is empty we can delete it. 4274 * (ignore deletion error) 4275 */ 4276 if (chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)) { 4277 hammer2_chain_delete(parent, chain, 4278 chain->bref.modify_tid, 4279 HAMMER2_DELETE_PERMANENT); 4280 hammer2_chain_repchange(parent, chain); 4281 return 1; 4282 } 4283 4284 base = hammer2_chain_base_and_count(parent, &count); 4285 4286 if ((parent->flags & (HAMMER2_CHAIN_INITIAL | 4287 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4288 hammer2_chain_countbrefs(parent, base, count); 4289 } 4290 4291 /* 4292 * Determine if we can collapse chain into parent, calculate 4293 * hysteresis for chain emptiness. 4294 */ 4295 if (parent->core.live_count + chain->core.live_count - 1 > count) 4296 return 0; 4297 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4298 if (chain->core.live_count > chain_count * 3 / 4) 4299 return 0; 4300 4301 /* 4302 * Ok, theoretically we can collapse chain's contents into 4303 * parent. chain is locked, but any in-memory children of chain 4304 * are not. For this to work, we must be able to dispose of any 4305 * in-memory children of chain. 4306 * 4307 * For now require that there are no in-memory children of chain. 4308 * 4309 * WARNING! Both chain and parent must remain locked across this 4310 * entire operation. 4311 */ 4312 4313 /* 4314 * Parent must be marked modified. Don't try to collapse it if we 4315 * can't mark it modified. Once modified, destroy chain to make room 4316 * and to get rid of what will be a conflicting key (this is included 4317 * in the calculation above). Finally, move the children of chain 4318 * into chain's parent. 4319 * 4320 * This order creates an accounting problem for bref.embed.stats 4321 * because we destroy chain before we remove its children. Any 4322 * elements whos blockref is already synchronized will be counted 4323 * twice. To deal with the problem we clean out chain's stats prior 4324 * to deleting it. 4325 */ 4326 error = hammer2_chain_modify(parent, 0, 0, 0); 4327 if (error) { 4328 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4329 hammer2_error_str(error)); 4330 return 0; 4331 } 4332 error = hammer2_chain_modify(chain, chain->bref.modify_tid, 0, 0); 4333 if (error) { 4334 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4335 hammer2_error_str(error)); 4336 return 0; 4337 } 4338 4339 chain->bref.embed.stats.inode_count = 0; 4340 chain->bref.embed.stats.data_count = 0; 4341 error = hammer2_chain_delete(parent, chain, 4342 chain->bref.modify_tid, 4343 HAMMER2_DELETE_PERMANENT); 4344 KKASSERT(error == 0); 4345 4346 /* 4347 * The combined_find call requires core.spin to be held. One would 4348 * think there wouldn't be any conflicts since we hold chain 4349 * exclusively locked, but the caching mechanism for 0-ref children 4350 * does not require a chain lock. 4351 */ 4352 hammer2_spin_ex(&chain->core.spin); 4353 4354 key_next = 0; 4355 key_beg = 0; 4356 key_end = HAMMER2_KEY_MAX; 4357 for (;;) { 4358 chain_base = &chain->data->npdata[0]; 4359 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4360 sub = hammer2_combined_find(chain, chain_base, chain_count, 4361 &key_next, 4362 key_beg, key_end, 4363 &bref); 4364 generation = chain->core.generation; 4365 if (bref == NULL) 4366 break; 4367 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4368 4369 bcopy = *bref; 4370 if (sub) { 4371 hammer2_chain_ref(sub); 4372 hammer2_spin_unex(&chain->core.spin); 4373 hammer2_chain_lock(sub, HAMMER2_RESOLVE_NEVER); 4374 } else { 4375 hammer2_spin_unex(&chain->core.spin); 4376 sub = hammer2_chain_get(chain, generation, &bcopy, 4377 HAMMER2_RESOLVE_NEVER); 4378 if (sub == NULL) { 4379 hammer2_spin_ex(&chain->core.spin); 4380 continue; 4381 } 4382 } 4383 if (bcmp(&bcopy, &sub->bref, sizeof(bcopy)) || 4384 sub->parent != chain || 4385 (sub->flags & HAMMER2_CHAIN_DELETED)) { 4386 hammer2_chain_unlock(sub); 4387 hammer2_chain_drop(sub); 4388 hammer2_spin_ex(&chain->core.spin); 4389 sub = NULL; /* safety */ 4390 continue; 4391 } 4392 error = hammer2_chain_delete(chain, sub, 4393 sub->bref.modify_tid, 0); 4394 KKASSERT(error == 0); 4395 hammer2_chain_rename(&parent, sub, 4396 sub->bref.modify_tid, 4397 HAMMER2_INSERT_SAMEPARENT); 4398 hammer2_chain_unlock(sub); 4399 hammer2_chain_drop(sub); 4400 hammer2_spin_ex(&chain->core.spin); 4401 4402 if (key_next == 0) 4403 break; 4404 key_beg = key_next; 4405 } 4406 hammer2_spin_unex(&chain->core.spin); 4407 4408 hammer2_chain_repchange(parent, chain); 4409 4410 return 1; 4411 } 4412 4413 /* 4414 * Freemap indirect blocks 4415 * 4416 * Calculate the keybits and highside/lowside of the freemap node the 4417 * caller is creating. 4418 * 4419 * This routine will specify the next higher-level freemap key/radix 4420 * representing the lowest-ordered set. By doing so, eventually all 4421 * low-ordered sets will be moved one level down. 4422 * 4423 * We have to be careful here because the freemap reserves a limited 4424 * number of blocks for a limited number of levels. So we can't just 4425 * push indiscriminately. 4426 */ 4427 int 4428 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 4429 int keybits, hammer2_blockref_t *base, int count) 4430 { 4431 hammer2_chain_t *chain; 4432 hammer2_blockref_t *bref; 4433 hammer2_key_t key; 4434 hammer2_key_t key_beg; 4435 hammer2_key_t key_end; 4436 hammer2_key_t key_next; 4437 int locount; 4438 int hicount; 4439 int maxloops = 300000; 4440 4441 key = *keyp; 4442 locount = 0; 4443 hicount = 0; 4444 keybits = 64; 4445 4446 /* 4447 * Calculate the range of keys in the array being careful to skip 4448 * slots which are overridden with a deletion. 4449 */ 4450 key_beg = 0; 4451 key_end = HAMMER2_KEY_MAX; 4452 hammer2_spin_ex(&parent->core.spin); 4453 4454 for (;;) { 4455 if (--maxloops == 0) { 4456 panic("indkey_freemap shit %p %p:%d\n", 4457 parent, base, count); 4458 } 4459 chain = hammer2_combined_find(parent, base, count, 4460 &key_next, 4461 key_beg, key_end, 4462 &bref); 4463 4464 /* 4465 * Exhausted search 4466 */ 4467 if (bref == NULL) 4468 break; 4469 4470 /* 4471 * Skip deleted chains. 4472 */ 4473 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4474 if (key_next == 0 || key_next > key_end) 4475 break; 4476 key_beg = key_next; 4477 continue; 4478 } 4479 4480 /* 4481 * Use the full live (not deleted) element for the scan 4482 * iteration. HAMMER2 does not allow partial replacements. 4483 * 4484 * XXX should be built into hammer2_combined_find(). 4485 */ 4486 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4487 4488 if (keybits > bref->keybits) { 4489 key = bref->key; 4490 keybits = bref->keybits; 4491 } else if (keybits == bref->keybits && bref->key < key) { 4492 key = bref->key; 4493 } 4494 if (key_next == 0) 4495 break; 4496 key_beg = key_next; 4497 } 4498 hammer2_spin_unex(&parent->core.spin); 4499 4500 /* 4501 * Return the keybits for a higher-level FREEMAP_NODE covering 4502 * this node. 4503 */ 4504 switch(keybits) { 4505 case HAMMER2_FREEMAP_LEVEL0_RADIX: 4506 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 4507 break; 4508 case HAMMER2_FREEMAP_LEVEL1_RADIX: 4509 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 4510 break; 4511 case HAMMER2_FREEMAP_LEVEL2_RADIX: 4512 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 4513 break; 4514 case HAMMER2_FREEMAP_LEVEL3_RADIX: 4515 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 4516 break; 4517 case HAMMER2_FREEMAP_LEVEL4_RADIX: 4518 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 4519 break; 4520 case HAMMER2_FREEMAP_LEVEL5_RADIX: 4521 panic("hammer2_chain_indkey_freemap: level too high"); 4522 break; 4523 default: 4524 panic("hammer2_chain_indkey_freemap: bad radix"); 4525 break; 4526 } 4527 *keyp = key; 4528 4529 return (keybits); 4530 } 4531 4532 /* 4533 * File indirect blocks 4534 * 4535 * Calculate the key/keybits for the indirect block to create by scanning 4536 * existing keys. The key being created is also passed in *keyp and can be 4537 * inside or outside the indirect block. Regardless, the indirect block 4538 * must hold at least two keys in order to guarantee sufficient space. 4539 * 4540 * We use a modified version of the freemap's fixed radix tree, but taylored 4541 * for file data. Basically we configure an indirect block encompassing the 4542 * smallest key. 4543 */ 4544 static int 4545 hammer2_chain_indkey_file(hammer2_chain_t *parent, hammer2_key_t *keyp, 4546 int keybits, hammer2_blockref_t *base, int count, 4547 int ncount) 4548 { 4549 hammer2_chain_t *chain; 4550 hammer2_blockref_t *bref; 4551 hammer2_key_t key; 4552 hammer2_key_t key_beg; 4553 hammer2_key_t key_end; 4554 hammer2_key_t key_next; 4555 int nradix; 4556 int locount; 4557 int hicount; 4558 int maxloops = 300000; 4559 4560 key = *keyp; 4561 locount = 0; 4562 hicount = 0; 4563 keybits = 64; 4564 4565 /* 4566 * Calculate the range of keys in the array being careful to skip 4567 * slots which are overridden with a deletion. 4568 * 4569 * Locate the smallest key. 4570 */ 4571 key_beg = 0; 4572 key_end = HAMMER2_KEY_MAX; 4573 hammer2_spin_ex(&parent->core.spin); 4574 4575 for (;;) { 4576 if (--maxloops == 0) { 4577 panic("indkey_freemap shit %p %p:%d\n", 4578 parent, base, count); 4579 } 4580 chain = hammer2_combined_find(parent, base, count, 4581 &key_next, 4582 key_beg, key_end, 4583 &bref); 4584 4585 /* 4586 * Exhausted search 4587 */ 4588 if (bref == NULL) 4589 break; 4590 4591 /* 4592 * Skip deleted chains. 4593 */ 4594 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4595 if (key_next == 0 || key_next > key_end) 4596 break; 4597 key_beg = key_next; 4598 continue; 4599 } 4600 4601 /* 4602 * Use the full live (not deleted) element for the scan 4603 * iteration. HAMMER2 does not allow partial replacements. 4604 * 4605 * XXX should be built into hammer2_combined_find(). 4606 */ 4607 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4608 4609 if (keybits > bref->keybits) { 4610 key = bref->key; 4611 keybits = bref->keybits; 4612 } else if (keybits == bref->keybits && bref->key < key) { 4613 key = bref->key; 4614 } 4615 if (key_next == 0) 4616 break; 4617 key_beg = key_next; 4618 } 4619 hammer2_spin_unex(&parent->core.spin); 4620 4621 /* 4622 * Calculate the static keybits for a higher-level indirect block 4623 * that contains the key. 4624 */ 4625 *keyp = key; 4626 4627 switch(ncount) { 4628 case HAMMER2_IND_BYTES_MIN / sizeof(hammer2_blockref_t): 4629 nradix = HAMMER2_IND_RADIX_MIN - HAMMER2_BLOCKREF_RADIX; 4630 break; 4631 case HAMMER2_IND_BYTES_NOM / sizeof(hammer2_blockref_t): 4632 nradix = HAMMER2_IND_RADIX_NOM - HAMMER2_BLOCKREF_RADIX; 4633 break; 4634 case HAMMER2_IND_BYTES_MAX / sizeof(hammer2_blockref_t): 4635 nradix = HAMMER2_IND_RADIX_MAX - HAMMER2_BLOCKREF_RADIX; 4636 break; 4637 default: 4638 panic("bad ncount %d\n", ncount); 4639 nradix = 0; 4640 break; 4641 } 4642 4643 /* 4644 * The largest radix that can be returned for an indirect block is 4645 * 63 bits. (The largest practical indirect block radix is actually 4646 * 62 bits because the top-level inode or volume root contains four 4647 * entries, but allow 63 to be returned). 4648 */ 4649 if (nradix >= 64) 4650 nradix = 63; 4651 4652 return keybits + nradix; 4653 } 4654 4655 #if 1 4656 4657 /* 4658 * Directory indirect blocks. 4659 * 4660 * Covers both the inode index (directory of inodes), and directory contents 4661 * (filenames hardlinked to inodes). 4662 * 4663 * Because directory keys are hashed we generally try to cut the space in 4664 * half. We accomodate the inode index (which tends to have linearly 4665 * increasing inode numbers) by ensuring that the keyspace is at least large 4666 * enough to fill up the indirect block being created. 4667 */ 4668 static int 4669 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4670 int keybits, hammer2_blockref_t *base, int count, 4671 int ncount) 4672 { 4673 hammer2_blockref_t *bref; 4674 hammer2_chain_t *chain; 4675 hammer2_key_t key_beg; 4676 hammer2_key_t key_end; 4677 hammer2_key_t key_next; 4678 hammer2_key_t key; 4679 int nkeybits; 4680 int locount; 4681 int hicount; 4682 int maxloops = 300000; 4683 4684 /* 4685 * NOTE: We can't take a shortcut here anymore for inodes because 4686 * the root directory can contain a mix of inodes and directory 4687 * entries (we used to just return 63 if parent->bref.type was 4688 * HAMMER2_BREF_TYPE_INODE. 4689 */ 4690 key = *keyp; 4691 locount = 0; 4692 hicount = 0; 4693 4694 /* 4695 * Calculate the range of keys in the array being careful to skip 4696 * slots which are overridden with a deletion. 4697 */ 4698 key_beg = 0; 4699 key_end = HAMMER2_KEY_MAX; 4700 hammer2_spin_ex(&parent->core.spin); 4701 4702 for (;;) { 4703 if (--maxloops == 0) { 4704 panic("indkey_freemap shit %p %p:%d\n", 4705 parent, base, count); 4706 } 4707 chain = hammer2_combined_find(parent, base, count, 4708 &key_next, 4709 key_beg, key_end, 4710 &bref); 4711 4712 /* 4713 * Exhausted search 4714 */ 4715 if (bref == NULL) 4716 break; 4717 4718 /* 4719 * Deleted object 4720 */ 4721 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4722 if (key_next == 0 || key_next > key_end) 4723 break; 4724 key_beg = key_next; 4725 continue; 4726 } 4727 4728 /* 4729 * Use the full live (not deleted) element for the scan 4730 * iteration. HAMMER2 does not allow partial replacements. 4731 * 4732 * XXX should be built into hammer2_combined_find(). 4733 */ 4734 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4735 4736 /* 4737 * Expand our calculated key range (key, keybits) to fit 4738 * the scanned key. nkeybits represents the full range 4739 * that we will later cut in half (two halves @ nkeybits - 1). 4740 */ 4741 nkeybits = keybits; 4742 if (nkeybits < bref->keybits) { 4743 if (bref->keybits > 64) { 4744 kprintf("bad bref chain %p bref %p\n", 4745 chain, bref); 4746 Debugger("fubar"); 4747 } 4748 nkeybits = bref->keybits; 4749 } 4750 while (nkeybits < 64 && 4751 (~(((hammer2_key_t)1 << nkeybits) - 1) & 4752 (key ^ bref->key)) != 0) { 4753 ++nkeybits; 4754 } 4755 4756 /* 4757 * If the new key range is larger we have to determine 4758 * which side of the new key range the existing keys fall 4759 * under by checking the high bit, then collapsing the 4760 * locount into the hicount or vise-versa. 4761 */ 4762 if (keybits != nkeybits) { 4763 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4764 hicount += locount; 4765 locount = 0; 4766 } else { 4767 locount += hicount; 4768 hicount = 0; 4769 } 4770 keybits = nkeybits; 4771 } 4772 4773 /* 4774 * The newly scanned key will be in the lower half or the 4775 * upper half of the (new) key range. 4776 */ 4777 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4778 ++hicount; 4779 else 4780 ++locount; 4781 4782 if (key_next == 0) 4783 break; 4784 key_beg = key_next; 4785 } 4786 hammer2_spin_unex(&parent->core.spin); 4787 bref = NULL; /* now invalid (safety) */ 4788 4789 /* 4790 * Adjust keybits to represent half of the full range calculated 4791 * above (radix 63 max) for our new indirect block. 4792 */ 4793 --keybits; 4794 4795 /* 4796 * Expand keybits to hold at least ncount elements. ncount will be 4797 * a power of 2. This is to try to completely fill leaf nodes (at 4798 * least for keys which are not hashes). 4799 * 4800 * We aren't counting 'in' or 'out', we are counting 'high side' 4801 * and 'low side' based on the bit at (1LL << keybits). We want 4802 * everything to be inside in these cases so shift it all to 4803 * the low or high side depending on the new high bit. 4804 */ 4805 while (((hammer2_key_t)1 << keybits) < ncount) { 4806 ++keybits; 4807 if (key & ((hammer2_key_t)1 << keybits)) { 4808 hicount += locount; 4809 locount = 0; 4810 } else { 4811 locount += hicount; 4812 hicount = 0; 4813 } 4814 } 4815 4816 if (hicount > locount) 4817 key |= (hammer2_key_t)1 << keybits; 4818 else 4819 key &= ~(hammer2_key_t)1 << keybits; 4820 4821 *keyp = key; 4822 4823 return (keybits); 4824 } 4825 4826 #else 4827 4828 /* 4829 * Directory indirect blocks. 4830 * 4831 * Covers both the inode index (directory of inodes), and directory contents 4832 * (filenames hardlinked to inodes). 4833 * 4834 * Because directory keys are hashed we generally try to cut the space in 4835 * half. We accomodate the inode index (which tends to have linearly 4836 * increasing inode numbers) by ensuring that the keyspace is at least large 4837 * enough to fill up the indirect block being created. 4838 */ 4839 static int 4840 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4841 int keybits, hammer2_blockref_t *base, int count, 4842 int ncount) 4843 { 4844 hammer2_blockref_t *bref; 4845 hammer2_chain_t *chain; 4846 hammer2_key_t key_beg; 4847 hammer2_key_t key_end; 4848 hammer2_key_t key_next; 4849 hammer2_key_t key; 4850 int nkeybits; 4851 int locount; 4852 int hicount; 4853 int maxloops = 300000; 4854 4855 /* 4856 * Shortcut if the parent is the inode. In this situation the 4857 * parent has 4+1 directory entries and we are creating an indirect 4858 * block capable of holding many more. 4859 */ 4860 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4861 return 63; 4862 } 4863 4864 key = *keyp; 4865 locount = 0; 4866 hicount = 0; 4867 4868 /* 4869 * Calculate the range of keys in the array being careful to skip 4870 * slots which are overridden with a deletion. 4871 */ 4872 key_beg = 0; 4873 key_end = HAMMER2_KEY_MAX; 4874 hammer2_spin_ex(&parent->core.spin); 4875 4876 for (;;) { 4877 if (--maxloops == 0) { 4878 panic("indkey_freemap shit %p %p:%d\n", 4879 parent, base, count); 4880 } 4881 chain = hammer2_combined_find(parent, base, count, 4882 &key_next, 4883 key_beg, key_end, 4884 &bref); 4885 4886 /* 4887 * Exhausted search 4888 */ 4889 if (bref == NULL) 4890 break; 4891 4892 /* 4893 * Deleted object 4894 */ 4895 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4896 if (key_next == 0 || key_next > key_end) 4897 break; 4898 key_beg = key_next; 4899 continue; 4900 } 4901 4902 /* 4903 * Use the full live (not deleted) element for the scan 4904 * iteration. HAMMER2 does not allow partial replacements. 4905 * 4906 * XXX should be built into hammer2_combined_find(). 4907 */ 4908 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4909 4910 /* 4911 * Expand our calculated key range (key, keybits) to fit 4912 * the scanned key. nkeybits represents the full range 4913 * that we will later cut in half (two halves @ nkeybits - 1). 4914 */ 4915 nkeybits = keybits; 4916 if (nkeybits < bref->keybits) { 4917 if (bref->keybits > 64) { 4918 kprintf("bad bref chain %p bref %p\n", 4919 chain, bref); 4920 Debugger("fubar"); 4921 } 4922 nkeybits = bref->keybits; 4923 } 4924 while (nkeybits < 64 && 4925 (~(((hammer2_key_t)1 << nkeybits) - 1) & 4926 (key ^ bref->key)) != 0) { 4927 ++nkeybits; 4928 } 4929 4930 /* 4931 * If the new key range is larger we have to determine 4932 * which side of the new key range the existing keys fall 4933 * under by checking the high bit, then collapsing the 4934 * locount into the hicount or vise-versa. 4935 */ 4936 if (keybits != nkeybits) { 4937 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4938 hicount += locount; 4939 locount = 0; 4940 } else { 4941 locount += hicount; 4942 hicount = 0; 4943 } 4944 keybits = nkeybits; 4945 } 4946 4947 /* 4948 * The newly scanned key will be in the lower half or the 4949 * upper half of the (new) key range. 4950 */ 4951 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4952 ++hicount; 4953 else 4954 ++locount; 4955 4956 if (key_next == 0) 4957 break; 4958 key_beg = key_next; 4959 } 4960 hammer2_spin_unex(&parent->core.spin); 4961 bref = NULL; /* now invalid (safety) */ 4962 4963 /* 4964 * Adjust keybits to represent half of the full range calculated 4965 * above (radix 63 max) for our new indirect block. 4966 */ 4967 --keybits; 4968 4969 /* 4970 * Expand keybits to hold at least ncount elements. ncount will be 4971 * a power of 2. This is to try to completely fill leaf nodes (at 4972 * least for keys which are not hashes). 4973 * 4974 * We aren't counting 'in' or 'out', we are counting 'high side' 4975 * and 'low side' based on the bit at (1LL << keybits). We want 4976 * everything to be inside in these cases so shift it all to 4977 * the low or high side depending on the new high bit. 4978 */ 4979 while (((hammer2_key_t)1 << keybits) < ncount) { 4980 ++keybits; 4981 if (key & ((hammer2_key_t)1 << keybits)) { 4982 hicount += locount; 4983 locount = 0; 4984 } else { 4985 locount += hicount; 4986 hicount = 0; 4987 } 4988 } 4989 4990 if (hicount > locount) 4991 key |= (hammer2_key_t)1 << keybits; 4992 else 4993 key &= ~(hammer2_key_t)1 << keybits; 4994 4995 *keyp = key; 4996 4997 return (keybits); 4998 } 4999 5000 #endif 5001 5002 /* 5003 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 5004 * it exists. 5005 * 5006 * Both parent and chain must be locked exclusively. 5007 * 5008 * This function will modify the parent if the blockref requires removal 5009 * from the parent's block table. 5010 * 5011 * This function is NOT recursive. Any entity already pushed into the 5012 * chain (such as an inode) may still need visibility into its contents, 5013 * as well as the ability to read and modify the contents. For example, 5014 * for an unlinked file which is still open. 5015 * 5016 * Also note that the flusher is responsible for cleaning up empty 5017 * indirect blocks. 5018 */ 5019 int 5020 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 5021 hammer2_tid_t mtid, int flags) 5022 { 5023 int error = 0; 5024 5025 KKASSERT(hammer2_mtx_owned(&chain->lock)); 5026 5027 /* 5028 * Nothing to do if already marked. 5029 * 5030 * We need the spinlock on the core whos RBTREE contains chain 5031 * to protect against races. 5032 */ 5033 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 5034 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 5035 chain->parent == parent); 5036 error = _hammer2_chain_delete_helper(parent, chain, 5037 mtid, flags); 5038 } 5039 5040 /* 5041 * Permanent deletions mark the chain as destroyed. 5042 * 5043 * NOTE: We do not setflush the chain unless the deletion is 5044 * permanent, since the deletion of a chain does not actually 5045 * require it to be flushed. 5046 */ 5047 if (error == 0) { 5048 if (flags & HAMMER2_DELETE_PERMANENT) { 5049 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 5050 hammer2_chain_setflush(chain); 5051 } 5052 } 5053 5054 return error; 5055 } 5056 5057 /* 5058 * Returns the index of the nearest element in the blockref array >= elm. 5059 * Returns (count) if no element could be found. 5060 * 5061 * Sets *key_nextp to the next key for loop purposes but does not modify 5062 * it if the next key would be higher than the current value of *key_nextp. 5063 * Note that *key_nexp can overflow to 0, which should be tested by the 5064 * caller. 5065 * 5066 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5067 * held through the operation. 5068 */ 5069 static int 5070 hammer2_base_find(hammer2_chain_t *parent, 5071 hammer2_blockref_t *base, int count, 5072 hammer2_key_t *key_nextp, 5073 hammer2_key_t key_beg, hammer2_key_t key_end) 5074 { 5075 hammer2_blockref_t *scan; 5076 hammer2_key_t scan_end; 5077 int i; 5078 int limit; 5079 5080 /* 5081 * Require the live chain's already have their core's counted 5082 * so we can optimize operations. 5083 */ 5084 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 5085 5086 /* 5087 * Degenerate case 5088 */ 5089 if (count == 0 || base == NULL) 5090 return(count); 5091 5092 /* 5093 * Sequential optimization using parent->cache_index. This is 5094 * the most likely scenario. 5095 * 5096 * We can avoid trailing empty entries on live chains, otherwise 5097 * we might have to check the whole block array. 5098 */ 5099 i = parent->cache_index; /* SMP RACE OK */ 5100 cpu_ccfence(); 5101 limit = parent->core.live_zero; 5102 if (i >= limit) 5103 i = limit - 1; 5104 if (i < 0) 5105 i = 0; 5106 KKASSERT(i < count); 5107 5108 /* 5109 * Search backwards 5110 */ 5111 scan = &base[i]; 5112 while (i > 0 && (scan->type == 0 || scan->key > key_beg)) { 5113 --scan; 5114 --i; 5115 } 5116 parent->cache_index = i; 5117 5118 /* 5119 * Search forwards, stop when we find a scan element which 5120 * encloses the key or until we know that there are no further 5121 * elements. 5122 */ 5123 while (i < count) { 5124 if (scan->type != 0) { 5125 scan_end = scan->key + 5126 ((hammer2_key_t)1 << scan->keybits) - 1; 5127 if (scan->key > key_beg || scan_end >= key_beg) 5128 break; 5129 } 5130 if (i >= limit) 5131 return (count); 5132 ++scan; 5133 ++i; 5134 } 5135 if (i != count) { 5136 parent->cache_index = i; 5137 if (i >= limit) { 5138 i = count; 5139 } else { 5140 scan_end = scan->key + 5141 ((hammer2_key_t)1 << scan->keybits); 5142 if (scan_end && (*key_nextp > scan_end || 5143 *key_nextp == 0)) { 5144 *key_nextp = scan_end; 5145 } 5146 } 5147 } 5148 return (i); 5149 } 5150 5151 /* 5152 * Do a combined search and return the next match either from the blockref 5153 * array or from the in-memory chain. Sets *bresp to the returned bref in 5154 * both cases, or sets it to NULL if the search exhausted. Only returns 5155 * a non-NULL chain if the search matched from the in-memory chain. 5156 * 5157 * When no in-memory chain has been found and a non-NULL bref is returned 5158 * in *bresp. 5159 * 5160 * 5161 * The returned chain is not locked or referenced. Use the returned bref 5162 * to determine if the search exhausted or not. Iterate if the base find 5163 * is chosen but matches a deleted chain. 5164 * 5165 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5166 * held through the operation. 5167 */ 5168 hammer2_chain_t * 5169 hammer2_combined_find(hammer2_chain_t *parent, 5170 hammer2_blockref_t *base, int count, 5171 hammer2_key_t *key_nextp, 5172 hammer2_key_t key_beg, hammer2_key_t key_end, 5173 hammer2_blockref_t **bresp) 5174 { 5175 hammer2_blockref_t *bref; 5176 hammer2_chain_t *chain; 5177 int i; 5178 5179 /* 5180 * Lookup in block array and in rbtree. 5181 */ 5182 *key_nextp = key_end + 1; 5183 i = hammer2_base_find(parent, base, count, key_nextp, 5184 key_beg, key_end); 5185 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 5186 5187 /* 5188 * Neither matched 5189 */ 5190 if (i == count && chain == NULL) { 5191 *bresp = NULL; 5192 return(NULL); 5193 } 5194 5195 /* 5196 * Only chain matched. 5197 */ 5198 if (i == count) { 5199 bref = &chain->bref; 5200 goto found; 5201 } 5202 5203 /* 5204 * Only blockref matched. 5205 */ 5206 if (chain == NULL) { 5207 bref = &base[i]; 5208 goto found; 5209 } 5210 5211 /* 5212 * Both in-memory and blockref matched, select the nearer element. 5213 * 5214 * If both are flush with the left-hand side or both are the 5215 * same distance away, select the chain. In this situation the 5216 * chain must have been loaded from the matching blockmap. 5217 */ 5218 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 5219 chain->bref.key == base[i].key) { 5220 KKASSERT(chain->bref.key == base[i].key); 5221 bref = &chain->bref; 5222 goto found; 5223 } 5224 5225 /* 5226 * Select the nearer key 5227 */ 5228 if (chain->bref.key < base[i].key) { 5229 bref = &chain->bref; 5230 } else { 5231 bref = &base[i]; 5232 chain = NULL; 5233 } 5234 5235 /* 5236 * If the bref is out of bounds we've exhausted our search. 5237 */ 5238 found: 5239 if (bref->key > key_end) { 5240 *bresp = NULL; 5241 chain = NULL; 5242 } else { 5243 *bresp = bref; 5244 } 5245 return(chain); 5246 } 5247 5248 /* 5249 * Locate the specified block array element and delete it. The element 5250 * must exist. 5251 * 5252 * The spin lock on the related chain must be held. 5253 * 5254 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5255 * need to be adjusted when we commit the media change. 5256 */ 5257 void 5258 hammer2_base_delete(hammer2_chain_t *parent, 5259 hammer2_blockref_t *base, int count, 5260 hammer2_chain_t *chain) 5261 { 5262 hammer2_blockref_t *elm = &chain->bref; 5263 hammer2_blockref_t *scan; 5264 hammer2_key_t key_next; 5265 int i; 5266 5267 /* 5268 * Delete element. Expect the element to exist. 5269 * 5270 * XXX see caller, flush code not yet sophisticated enough to prevent 5271 * re-flushed in some cases. 5272 */ 5273 key_next = 0; /* max range */ 5274 i = hammer2_base_find(parent, base, count, &key_next, 5275 elm->key, elm->key); 5276 scan = &base[i]; 5277 if (i == count || scan->type == 0 || 5278 scan->key != elm->key || 5279 ((chain->flags & HAMMER2_CHAIN_BMAPUPD) == 0 && 5280 scan->keybits != elm->keybits)) { 5281 hammer2_spin_unex(&parent->core.spin); 5282 panic("delete base %p element not found at %d/%d elm %p\n", 5283 base, i, count, elm); 5284 return; 5285 } 5286 5287 /* 5288 * Update stats and zero the entry. 5289 * 5290 * NOTE: Handle radix == 0 (0 bytes) case. 5291 */ 5292 if ((int)(scan->data_off & HAMMER2_OFF_MASK_RADIX)) { 5293 parent->bref.embed.stats.data_count -= (hammer2_off_t)1 << 5294 (int)(scan->data_off & HAMMER2_OFF_MASK_RADIX); 5295 } 5296 switch(scan->type) { 5297 case HAMMER2_BREF_TYPE_INODE: 5298 --parent->bref.embed.stats.inode_count; 5299 /* fall through */ 5300 case HAMMER2_BREF_TYPE_DATA: 5301 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5302 atomic_set_int(&chain->flags, 5303 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5304 } else { 5305 if (parent->bref.leaf_count) 5306 --parent->bref.leaf_count; 5307 } 5308 /* fall through */ 5309 case HAMMER2_BREF_TYPE_INDIRECT: 5310 if (scan->type != HAMMER2_BREF_TYPE_DATA) { 5311 parent->bref.embed.stats.data_count -= 5312 scan->embed.stats.data_count; 5313 parent->bref.embed.stats.inode_count -= 5314 scan->embed.stats.inode_count; 5315 } 5316 if (scan->type == HAMMER2_BREF_TYPE_INODE) 5317 break; 5318 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5319 atomic_set_int(&chain->flags, 5320 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5321 } else { 5322 if (parent->bref.leaf_count <= scan->leaf_count) 5323 parent->bref.leaf_count = 0; 5324 else 5325 parent->bref.leaf_count -= scan->leaf_count; 5326 } 5327 break; 5328 case HAMMER2_BREF_TYPE_DIRENT: 5329 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5330 atomic_set_int(&chain->flags, 5331 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5332 } else { 5333 if (parent->bref.leaf_count) 5334 --parent->bref.leaf_count; 5335 } 5336 default: 5337 break; 5338 } 5339 5340 bzero(scan, sizeof(*scan)); 5341 5342 /* 5343 * We can only optimize parent->core.live_zero for live chains. 5344 */ 5345 if (parent->core.live_zero == i + 1) { 5346 while (--i >= 0 && base[i].type == 0) 5347 ; 5348 parent->core.live_zero = i + 1; 5349 } 5350 5351 /* 5352 * Clear appropriate blockmap flags in chain. 5353 */ 5354 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED | 5355 HAMMER2_CHAIN_BMAPUPD); 5356 } 5357 5358 /* 5359 * Insert the specified element. The block array must not already have the 5360 * element and must have space available for the insertion. 5361 * 5362 * The spin lock on the related chain must be held. 5363 * 5364 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5365 * need to be adjusted when we commit the media change. 5366 */ 5367 void 5368 hammer2_base_insert(hammer2_chain_t *parent, 5369 hammer2_blockref_t *base, int count, 5370 hammer2_chain_t *chain, hammer2_blockref_t *elm) 5371 { 5372 hammer2_key_t key_next; 5373 hammer2_key_t xkey; 5374 int i; 5375 int j; 5376 int k; 5377 int l; 5378 int u = 1; 5379 5380 /* 5381 * Insert new element. Expect the element to not already exist 5382 * unless we are replacing it. 5383 * 5384 * XXX see caller, flush code not yet sophisticated enough to prevent 5385 * re-flushed in some cases. 5386 */ 5387 key_next = 0; /* max range */ 5388 i = hammer2_base_find(parent, base, count, &key_next, 5389 elm->key, elm->key); 5390 5391 /* 5392 * Shortcut fill optimization, typical ordered insertion(s) may not 5393 * require a search. 5394 */ 5395 KKASSERT(i >= 0 && i <= count); 5396 5397 /* 5398 * Set appropriate blockmap flags in chain (if not NULL) 5399 */ 5400 if (chain) 5401 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 5402 5403 /* 5404 * Update stats and zero the entry 5405 */ 5406 if ((int)(elm->data_off & HAMMER2_OFF_MASK_RADIX)) { 5407 parent->bref.embed.stats.data_count += (hammer2_off_t)1 << 5408 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 5409 } 5410 switch(elm->type) { 5411 case HAMMER2_BREF_TYPE_INODE: 5412 ++parent->bref.embed.stats.inode_count; 5413 /* fall through */ 5414 case HAMMER2_BREF_TYPE_DATA: 5415 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5416 ++parent->bref.leaf_count; 5417 /* fall through */ 5418 case HAMMER2_BREF_TYPE_INDIRECT: 5419 if (elm->type != HAMMER2_BREF_TYPE_DATA) { 5420 parent->bref.embed.stats.data_count += 5421 elm->embed.stats.data_count; 5422 parent->bref.embed.stats.inode_count += 5423 elm->embed.stats.inode_count; 5424 } 5425 if (elm->type == HAMMER2_BREF_TYPE_INODE) 5426 break; 5427 if (parent->bref.leaf_count + elm->leaf_count < 5428 HAMMER2_BLOCKREF_LEAF_MAX) { 5429 parent->bref.leaf_count += elm->leaf_count; 5430 } else { 5431 parent->bref.leaf_count = HAMMER2_BLOCKREF_LEAF_MAX; 5432 } 5433 break; 5434 case HAMMER2_BREF_TYPE_DIRENT: 5435 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5436 ++parent->bref.leaf_count; 5437 break; 5438 default: 5439 break; 5440 } 5441 5442 5443 /* 5444 * We can only optimize parent->core.live_zero for live chains. 5445 */ 5446 if (i == count && parent->core.live_zero < count) { 5447 i = parent->core.live_zero++; 5448 base[i] = *elm; 5449 return; 5450 } 5451 5452 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 5453 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 5454 hammer2_spin_unex(&parent->core.spin); 5455 panic("insert base %p overlapping elements at %d elm %p\n", 5456 base, i, elm); 5457 } 5458 5459 /* 5460 * Try to find an empty slot before or after. 5461 */ 5462 j = i; 5463 k = i; 5464 while (j > 0 || k < count) { 5465 --j; 5466 if (j >= 0 && base[j].type == 0) { 5467 if (j == i - 1) { 5468 base[j] = *elm; 5469 } else { 5470 bcopy(&base[j+1], &base[j], 5471 (i - j - 1) * sizeof(*base)); 5472 base[i - 1] = *elm; 5473 } 5474 goto validate; 5475 } 5476 ++k; 5477 if (k < count && base[k].type == 0) { 5478 bcopy(&base[i], &base[i+1], 5479 (k - i) * sizeof(hammer2_blockref_t)); 5480 base[i] = *elm; 5481 5482 /* 5483 * We can only update parent->core.live_zero for live 5484 * chains. 5485 */ 5486 if (parent->core.live_zero <= k) 5487 parent->core.live_zero = k + 1; 5488 u = 2; 5489 goto validate; 5490 } 5491 } 5492 panic("hammer2_base_insert: no room!"); 5493 5494 /* 5495 * Debugging 5496 */ 5497 validate: 5498 key_next = 0; 5499 for (l = 0; l < count; ++l) { 5500 if (base[l].type) { 5501 key_next = base[l].key + 5502 ((hammer2_key_t)1 << base[l].keybits) - 1; 5503 break; 5504 } 5505 } 5506 while (++l < count) { 5507 if (base[l].type) { 5508 if (base[l].key <= key_next) 5509 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 5510 key_next = base[l].key + 5511 ((hammer2_key_t)1 << base[l].keybits) - 1; 5512 5513 } 5514 } 5515 5516 } 5517 5518 #if 0 5519 5520 /* 5521 * Sort the blockref array for the chain. Used by the flush code to 5522 * sort the blockref[] array. 5523 * 5524 * The chain must be exclusively locked AND spin-locked. 5525 */ 5526 typedef hammer2_blockref_t *hammer2_blockref_p; 5527 5528 static 5529 int 5530 hammer2_base_sort_callback(const void *v1, const void *v2) 5531 { 5532 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 5533 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 5534 5535 /* 5536 * Make sure empty elements are placed at the end of the array 5537 */ 5538 if (bref1->type == 0) { 5539 if (bref2->type == 0) 5540 return(0); 5541 return(1); 5542 } else if (bref2->type == 0) { 5543 return(-1); 5544 } 5545 5546 /* 5547 * Sort by key 5548 */ 5549 if (bref1->key < bref2->key) 5550 return(-1); 5551 if (bref1->key > bref2->key) 5552 return(1); 5553 return(0); 5554 } 5555 5556 void 5557 hammer2_base_sort(hammer2_chain_t *chain) 5558 { 5559 hammer2_blockref_t *base; 5560 int count; 5561 5562 switch(chain->bref.type) { 5563 case HAMMER2_BREF_TYPE_INODE: 5564 /* 5565 * Special shortcut for embedded data returns the inode 5566 * itself. Callers must detect this condition and access 5567 * the embedded data (the strategy code does this for us). 5568 * 5569 * This is only applicable to regular files and softlinks. 5570 */ 5571 if (chain->data->ipdata.meta.op_flags & 5572 HAMMER2_OPFLAG_DIRECTDATA) { 5573 return; 5574 } 5575 base = &chain->data->ipdata.u.blockset.blockref[0]; 5576 count = HAMMER2_SET_COUNT; 5577 break; 5578 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 5579 case HAMMER2_BREF_TYPE_INDIRECT: 5580 /* 5581 * Optimize indirect blocks in the INITIAL state to avoid 5582 * I/O. 5583 */ 5584 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 5585 base = &chain->data->npdata[0]; 5586 count = chain->bytes / sizeof(hammer2_blockref_t); 5587 break; 5588 case HAMMER2_BREF_TYPE_VOLUME: 5589 base = &chain->data->voldata.sroot_blockset.blockref[0]; 5590 count = HAMMER2_SET_COUNT; 5591 break; 5592 case HAMMER2_BREF_TYPE_FREEMAP: 5593 base = &chain->data->blkset.blockref[0]; 5594 count = HAMMER2_SET_COUNT; 5595 break; 5596 default: 5597 kprintf("hammer2_chain_lookup: unrecognized " 5598 "blockref(A) type: %d", 5599 chain->bref.type); 5600 while (1) 5601 tsleep(&base, 0, "dead", 0); 5602 panic("hammer2_chain_lookup: unrecognized " 5603 "blockref(A) type: %d", 5604 chain->bref.type); 5605 base = NULL; /* safety */ 5606 count = 0; /* safety */ 5607 } 5608 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 5609 } 5610 5611 #endif 5612 5613 /* 5614 * Chain memory management 5615 */ 5616 void 5617 hammer2_chain_wait(hammer2_chain_t *chain) 5618 { 5619 tsleep(chain, 0, "chnflw", 1); 5620 } 5621 5622 const hammer2_media_data_t * 5623 hammer2_chain_rdata(hammer2_chain_t *chain) 5624 { 5625 KKASSERT(chain->data != NULL); 5626 return (chain->data); 5627 } 5628 5629 hammer2_media_data_t * 5630 hammer2_chain_wdata(hammer2_chain_t *chain) 5631 { 5632 KKASSERT(chain->data != NULL); 5633 return (chain->data); 5634 } 5635 5636 /* 5637 * Set the check data for a chain. This can be a heavy-weight operation 5638 * and typically only runs on-flush. For file data check data is calculated 5639 * when the logical buffers are flushed. 5640 */ 5641 void 5642 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 5643 { 5644 chain->bref.flags &= ~HAMMER2_BREF_FLAG_ZERO; 5645 5646 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5647 case HAMMER2_CHECK_NONE: 5648 break; 5649 case HAMMER2_CHECK_DISABLED: 5650 break; 5651 case HAMMER2_CHECK_ISCSI32: 5652 chain->bref.check.iscsi32.value = 5653 hammer2_icrc32(bdata, chain->bytes); 5654 break; 5655 case HAMMER2_CHECK_XXHASH64: 5656 chain->bref.check.xxhash64.value = 5657 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5658 break; 5659 case HAMMER2_CHECK_SHA192: 5660 { 5661 SHA256_CTX hash_ctx; 5662 union { 5663 uint8_t digest[SHA256_DIGEST_LENGTH]; 5664 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5665 } u; 5666 5667 SHA256_Init(&hash_ctx); 5668 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5669 SHA256_Final(u.digest, &hash_ctx); 5670 u.digest64[2] ^= u.digest64[3]; 5671 bcopy(u.digest, 5672 chain->bref.check.sha192.data, 5673 sizeof(chain->bref.check.sha192.data)); 5674 } 5675 break; 5676 case HAMMER2_CHECK_FREEMAP: 5677 chain->bref.check.freemap.icrc32 = 5678 hammer2_icrc32(bdata, chain->bytes); 5679 break; 5680 default: 5681 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5682 chain->bref.methods); 5683 break; 5684 } 5685 } 5686 5687 int 5688 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 5689 { 5690 uint32_t check32; 5691 uint64_t check64; 5692 int r; 5693 5694 if (chain->bref.flags & HAMMER2_BREF_FLAG_ZERO) 5695 return 1; 5696 5697 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5698 case HAMMER2_CHECK_NONE: 5699 r = 1; 5700 break; 5701 case HAMMER2_CHECK_DISABLED: 5702 r = 1; 5703 break; 5704 case HAMMER2_CHECK_ISCSI32: 5705 check32 = hammer2_icrc32(bdata, chain->bytes); 5706 r = (chain->bref.check.iscsi32.value == check32); 5707 if (r == 0) { 5708 kprintf("chain %016jx.%02x meth=%02x CHECK FAIL " 5709 "(flags=%08x, bref/data %08x/%08x)\n", 5710 chain->bref.data_off, 5711 chain->bref.type, 5712 chain->bref.methods, 5713 chain->flags, 5714 chain->bref.check.iscsi32.value, 5715 check32); 5716 } 5717 hammer2_process_icrc32 += chain->bytes; 5718 break; 5719 case HAMMER2_CHECK_XXHASH64: 5720 check64 = XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5721 r = (chain->bref.check.xxhash64.value == check64); 5722 if (r == 0) { 5723 kprintf("chain %016jx.%02x key=%016jx " 5724 "meth=%02x CHECK FAIL " 5725 "(flags=%08x, bref/data %016jx/%016jx)\n", 5726 chain->bref.data_off, 5727 chain->bref.type, 5728 chain->bref.key, 5729 chain->bref.methods, 5730 chain->flags, 5731 chain->bref.check.xxhash64.value, 5732 check64); 5733 } 5734 hammer2_process_xxhash64 += chain->bytes; 5735 break; 5736 case HAMMER2_CHECK_SHA192: 5737 { 5738 SHA256_CTX hash_ctx; 5739 union { 5740 uint8_t digest[SHA256_DIGEST_LENGTH]; 5741 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5742 } u; 5743 5744 SHA256_Init(&hash_ctx); 5745 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5746 SHA256_Final(u.digest, &hash_ctx); 5747 u.digest64[2] ^= u.digest64[3]; 5748 if (bcmp(u.digest, 5749 chain->bref.check.sha192.data, 5750 sizeof(chain->bref.check.sha192.data)) == 0) { 5751 r = 1; 5752 } else { 5753 r = 0; 5754 kprintf("chain %016jx.%02x meth=%02x " 5755 "CHECK FAIL\n", 5756 chain->bref.data_off, 5757 chain->bref.type, 5758 chain->bref.methods); 5759 } 5760 } 5761 break; 5762 case HAMMER2_CHECK_FREEMAP: 5763 r = (chain->bref.check.freemap.icrc32 == 5764 hammer2_icrc32(bdata, chain->bytes)); 5765 if (r == 0) { 5766 kprintf("chain %016jx.%02x meth=%02x " 5767 "CHECK FAIL\n", 5768 chain->bref.data_off, 5769 chain->bref.type, 5770 chain->bref.methods); 5771 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 5772 chain->bref.check.freemap.icrc32, 5773 hammer2_icrc32(bdata, chain->bytes), 5774 chain->bytes); 5775 if (chain->dio) 5776 kprintf("dio %p buf %016jx,%d bdata %p/%p\n", 5777 chain->dio, chain->dio->bp->b_loffset, 5778 chain->dio->bp->b_bufsize, bdata, 5779 chain->dio->bp->b_data); 5780 } 5781 5782 break; 5783 default: 5784 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5785 chain->bref.methods); 5786 r = 1; 5787 break; 5788 } 5789 return r; 5790 } 5791 5792 /* 5793 * Acquire the chain and parent representing the specified inode for the 5794 * device at the specified cluster index. 5795 * 5796 * The flags passed in are LOOKUP flags, not RESOLVE flags. 5797 * 5798 * If we are unable to locate the hardlink, INVAL is returned and *chainp 5799 * will be NULL. *parentp may still be set error or not, or NULL if the 5800 * parent itself could not be resolved. 5801 * 5802 * Caller must pass-in a valid (and locked), or NULL *parentp or *chainp. 5803 * This function replaces *parentp and *chainp. Generally speaking, if 5804 * the caller found a directory entry and wants the inode, the caller should 5805 * pass the parent,chain representing the directory entry so this function 5806 * can dispose of it properly to avoid any possible lock order reversals. 5807 */ 5808 int 5809 hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum, 5810 int clindex, int flags, 5811 hammer2_chain_t **parentp, hammer2_chain_t **chainp) 5812 { 5813 hammer2_chain_t *parent; 5814 hammer2_chain_t *rchain; 5815 hammer2_key_t key_dummy; 5816 int resolve_flags; 5817 int error; 5818 5819 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 5820 HAMMER2_RESOLVE_SHARED : 0; 5821 5822 /* 5823 * Caller expects us to replace these. 5824 */ 5825 if (*chainp) { 5826 hammer2_chain_unlock(*chainp); 5827 hammer2_chain_drop(*chainp); 5828 *chainp = NULL; 5829 } 5830 if (*parentp) { 5831 hammer2_chain_unlock(*parentp); 5832 hammer2_chain_drop(*parentp); 5833 *parentp = NULL; 5834 } 5835 5836 /* 5837 * Inodes hang off of the iroot (bit 63 is clear, differentiating 5838 * inodes from root directory entries in the key lookup). 5839 */ 5840 parent = hammer2_inode_chain(pmp->iroot, clindex, resolve_flags); 5841 rchain = NULL; 5842 if (parent) { 5843 rchain = hammer2_chain_lookup(&parent, &key_dummy, 5844 inum, inum, 5845 &error, flags); 5846 } else { 5847 error = HAMMER2_ERROR_EIO; 5848 } 5849 *parentp = parent; 5850 *chainp = rchain; 5851 5852 return error; 5853 } 5854 5855 /* 5856 * Used by the bulkscan code to snapshot the synchronized storage for 5857 * a volume, allowing it to be scanned concurrently against normal 5858 * operation. 5859 */ 5860 hammer2_chain_t * 5861 hammer2_chain_bulksnap(hammer2_dev_t *hmp) 5862 { 5863 hammer2_chain_t *copy; 5864 5865 copy = hammer2_chain_alloc(hmp, hmp->spmp, &hmp->vchain.bref); 5866 copy->data = kmalloc(sizeof(copy->data->voldata), 5867 hmp->mchain, 5868 M_WAITOK | M_ZERO); 5869 hammer2_voldata_lock(hmp); 5870 copy->data->voldata = hmp->volsync; 5871 hammer2_voldata_unlock(hmp); 5872 5873 return copy; 5874 } 5875 5876 void 5877 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 5878 { 5879 KKASSERT(copy->bref.type == HAMMER2_BREF_TYPE_VOLUME); 5880 KKASSERT(copy->data); 5881 kfree(copy->data, copy->hmp->mchain); 5882 copy->data = NULL; 5883 atomic_add_long(&hammer2_chain_allocs, -1); 5884 hammer2_chain_drop(copy); 5885 } 5886 5887 /* 5888 * Returns non-zero if the chain (INODE or DIRENT) matches the 5889 * filename. 5890 */ 5891 int 5892 hammer2_chain_dirent_test(hammer2_chain_t *chain, const char *name, 5893 size_t name_len) 5894 { 5895 const hammer2_inode_data_t *ripdata; 5896 const hammer2_dirent_head_t *den; 5897 5898 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5899 ripdata = &chain->data->ipdata; 5900 if (ripdata->meta.name_len == name_len && 5901 bcmp(ripdata->filename, name, name_len) == 0) { 5902 return 1; 5903 } 5904 } 5905 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT && 5906 chain->bref.embed.dirent.namlen == name_len) { 5907 den = &chain->bref.embed.dirent; 5908 if (name_len > sizeof(chain->bref.check.buf) && 5909 bcmp(chain->data->buf, name, name_len) == 0) { 5910 return 1; 5911 } 5912 if (name_len <= sizeof(chain->bref.check.buf) && 5913 bcmp(chain->bref.check.buf, name, name_len) == 0) { 5914 return 1; 5915 } 5916 } 5917 return 0; 5918 } 5919