1 /* 2 * Copyright (c) 2011-2020 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/kern_syscall.h> 61 #include <sys/uuid.h> 62 63 #include <crypto/sha2/sha2.h> 64 65 #include "hammer2.h" 66 67 static hammer2_chain_t *hammer2_chain_create_indirect( 68 hammer2_chain_t *parent, 69 hammer2_key_t key, int keybits, 70 hammer2_tid_t mtid, int for_type, int *errorp); 71 static int hammer2_chain_delete_obref(hammer2_chain_t *parent, 72 hammer2_chain_t *chain, 73 hammer2_tid_t mtid, int flags, 74 hammer2_blockref_t *obref); 75 static hammer2_chain_t *hammer2_combined_find( 76 hammer2_chain_t *parent, 77 hammer2_blockref_t *base, int count, 78 hammer2_key_t *key_nextp, 79 hammer2_key_t key_beg, hammer2_key_t key_end, 80 hammer2_blockref_t **bresp); 81 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 82 int depth); 83 static void hammer2_chain_lru_flush(hammer2_pfs_t *pmp); 84 85 /* 86 * There are many degenerate situations where an extreme rate of console 87 * output can occur from warnings and errors. Make sure this output does 88 * not impede operations. 89 */ 90 static struct krate krate_h2chk = { .freq = 5 }; 91 static struct krate krate_h2me = { .freq = 1 }; 92 static struct krate krate_h2em = { .freq = 1 }; 93 94 /* 95 * Basic RBTree for chains (core.rbtree). 96 */ 97 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 98 99 int 100 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 101 { 102 hammer2_key_t c1_beg; 103 hammer2_key_t c1_end; 104 hammer2_key_t c2_beg; 105 hammer2_key_t c2_end; 106 107 /* 108 * Compare chains. Overlaps are not supposed to happen and catch 109 * any software issues early we count overlaps as a match. 110 */ 111 c1_beg = chain1->bref.key; 112 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 113 c2_beg = chain2->bref.key; 114 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 115 116 if (c1_end < c2_beg) /* fully to the left */ 117 return(-1); 118 if (c1_beg > c2_end) /* fully to the right */ 119 return(1); 120 return(0); /* overlap (must not cross edge boundary) */ 121 } 122 123 /* 124 * Assert that a chain has no media data associated with it. 125 */ 126 static __inline void 127 hammer2_chain_assert_no_data(hammer2_chain_t *chain) 128 { 129 KKASSERT(chain->dio == NULL); 130 if (chain->bref.type != HAMMER2_BREF_TYPE_VOLUME && 131 chain->bref.type != HAMMER2_BREF_TYPE_FREEMAP && 132 chain->data) { 133 panic("hammer2_chain_assert_no_data: chain %p still has data", 134 chain); 135 } 136 } 137 138 /* 139 * Make a chain visible to the flusher. The flusher operates using a top-down 140 * recursion based on the ONFLUSH flag. It locates MODIFIED and UPDATE chains, 141 * flushes them, and updates blocks back to the volume root. 142 * 143 * This routine sets the ONFLUSH flag upward from the triggering chain until 144 * it hits an inode root or the volume root. Inode chains serve as inflection 145 * points, requiring the flusher to bridge across trees. Inodes include 146 * regular inodes, PFS roots (pmp->iroot), and the media super root 147 * (spmp->iroot). 148 */ 149 void 150 hammer2_chain_setflush(hammer2_chain_t *chain) 151 { 152 hammer2_chain_t *parent; 153 154 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 155 hammer2_spin_sh(&chain->core.spin); 156 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 157 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 158 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 159 break; 160 if ((parent = chain->parent) == NULL) 161 break; 162 hammer2_spin_sh(&parent->core.spin); 163 hammer2_spin_unsh(&chain->core.spin); 164 chain = parent; 165 } 166 hammer2_spin_unsh(&chain->core.spin); 167 } 168 } 169 170 /* 171 * Allocate a new disconnected chain element representing the specified 172 * bref. chain->refs is set to 1 and the passed bref is copied to 173 * chain->bref. chain->bytes is derived from the bref. 174 * 175 * chain->pmp inherits pmp unless the chain is an inode (other than the 176 * super-root inode). 177 * 178 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 179 */ 180 hammer2_chain_t * 181 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 182 hammer2_blockref_t *bref) 183 { 184 hammer2_chain_t *chain; 185 u_int bytes; 186 187 /* 188 * Special case - radix of 0 indicates a chain that does not 189 * need a data reference (context is completely embedded in the 190 * bref). 191 */ 192 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 193 bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 194 else 195 bytes = 0; 196 197 switch(bref->type) { 198 case HAMMER2_BREF_TYPE_INODE: 199 case HAMMER2_BREF_TYPE_INDIRECT: 200 case HAMMER2_BREF_TYPE_DATA: 201 case HAMMER2_BREF_TYPE_DIRENT: 202 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 203 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 204 case HAMMER2_BREF_TYPE_FREEMAP: 205 case HAMMER2_BREF_TYPE_VOLUME: 206 chain = kmalloc_obj(sizeof(*chain), hmp->mchain, 207 M_WAITOK | M_ZERO); 208 atomic_add_long(&hammer2_chain_allocs, 1); 209 break; 210 case HAMMER2_BREF_TYPE_EMPTY: 211 default: 212 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 213 bref->type); 214 break; 215 } 216 217 /* 218 * Initialize the new chain structure. pmp must be set to NULL for 219 * chains belonging to the super-root topology of a device mount. 220 */ 221 if (pmp == hmp->spmp) 222 chain->pmp = NULL; 223 else 224 chain->pmp = pmp; 225 226 chain->hmp = hmp; 227 chain->bref = *bref; 228 chain->bytes = bytes; 229 chain->refs = 1; 230 chain->flags = HAMMER2_CHAIN_ALLOCATED; 231 lockinit(&chain->diolk, "chdio", 0, 0); 232 233 /* 234 * Set the PFS boundary flag if this chain represents a PFS root. 235 */ 236 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 237 atomic_set_int(&chain->flags, HAMMER2_CHAIN_PFSBOUNDARY); 238 hammer2_chain_core_init(chain); 239 240 return (chain); 241 } 242 243 /* 244 * Initialize a chain's core structure. This structure used to be allocated 245 * but is now embedded. 246 * 247 * The core is not locked. No additional refs on the chain are made. 248 * (trans) must not be NULL if (core) is not NULL. 249 */ 250 void 251 hammer2_chain_core_init(hammer2_chain_t *chain) 252 { 253 /* 254 * Fresh core under nchain (no multi-homing of ochain's 255 * sub-tree). 256 */ 257 RB_INIT(&chain->core.rbtree); /* live chains */ 258 hammer2_mtx_init(&chain->lock, "h2chain"); 259 } 260 261 /* 262 * Add a reference to a chain element, preventing its destruction. 263 * 264 * (can be called with spinlock held) 265 */ 266 void 267 hammer2_chain_ref(hammer2_chain_t *chain) 268 { 269 if (atomic_fetchadd_int(&chain->refs, 1) == 0) { 270 /* 271 * Just flag that the chain was used and should be recycled 272 * on the LRU if it encounters it later. 273 */ 274 if (chain->flags & HAMMER2_CHAIN_ONLRU) 275 atomic_set_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 276 277 #if 0 278 /* 279 * REMOVED - reduces contention, lru_list is more heuristical 280 * now. 281 * 282 * 0->non-zero transition must ensure that chain is removed 283 * from the LRU list. 284 * 285 * NOTE: Already holding lru_spin here so we cannot call 286 * hammer2_chain_ref() to get it off lru_list, do 287 * it manually. 288 */ 289 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 290 hammer2_pfs_t *pmp = chain->pmp; 291 hammer2_spin_ex(&pmp->lru_spin); 292 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 293 atomic_add_int(&pmp->lru_count, -1); 294 atomic_clear_int(&chain->flags, 295 HAMMER2_CHAIN_ONLRU); 296 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 297 } 298 hammer2_spin_unex(&pmp->lru_spin); 299 } 300 #endif 301 } 302 } 303 304 /* 305 * Ref a locked chain and force the data to be held across an unlock. 306 * Chain must be currently locked. The user of the chain who desires 307 * to release the hold must call hammer2_chain_lock_unhold() to relock 308 * and unhold the chain, then unlock normally, or may simply call 309 * hammer2_chain_drop_unhold() (which is safer against deadlocks). 310 */ 311 void 312 hammer2_chain_ref_hold(hammer2_chain_t *chain) 313 { 314 atomic_add_int(&chain->lockcnt, 1); 315 hammer2_chain_ref(chain); 316 } 317 318 /* 319 * Insert the chain in the core rbtree. 320 * 321 * Normal insertions are placed in the live rbtree. Insertion of a deleted 322 * chain is a special case used by the flush code that is placed on the 323 * unstaged deleted list to avoid confusing the live view. 324 */ 325 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 326 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 327 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 328 329 static 330 int 331 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 332 int flags, int generation) 333 { 334 hammer2_chain_t *xchain; 335 int error = 0; 336 337 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 338 hammer2_spin_ex(&parent->core.spin); 339 340 /* 341 * Interlocked by spinlock, check for race 342 */ 343 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 344 parent->core.generation != generation) { 345 error = HAMMER2_ERROR_EAGAIN; 346 goto failed; 347 } 348 349 /* 350 * Insert chain 351 */ 352 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 353 KASSERT(xchain == NULL, 354 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 355 chain, xchain, chain->bref.key)); 356 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 357 chain->parent = parent; 358 ++parent->core.chain_count; 359 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 360 361 /* 362 * We have to keep track of the effective live-view blockref count 363 * so the create code knows when to push an indirect block. 364 */ 365 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 366 atomic_add_int(&parent->core.live_count, 1); 367 failed: 368 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 369 hammer2_spin_unex(&parent->core.spin); 370 return error; 371 } 372 373 /* 374 * Drop the caller's reference to the chain. When the ref count drops to 375 * zero this function will try to disassociate the chain from its parent and 376 * deallocate it, then recursely drop the parent using the implied ref 377 * from the chain's chain->parent. 378 * 379 * Nobody should own chain's mutex on the 1->0 transition, unless this drop 380 * races an acquisition by another cpu. Therefore we can loop if we are 381 * unable to acquire the mutex, and refs is unlikely to be 1 unless we again 382 * race against another drop. 383 */ 384 void 385 hammer2_chain_drop(hammer2_chain_t *chain) 386 { 387 u_int refs; 388 389 KKASSERT(chain->refs > 0); 390 391 while (chain) { 392 refs = chain->refs; 393 cpu_ccfence(); 394 KKASSERT(refs > 0); 395 396 if (refs == 1) { 397 if (hammer2_mtx_ex_try(&chain->lock) == 0) 398 chain = hammer2_chain_lastdrop(chain, 0); 399 /* retry the same chain, or chain from lastdrop */ 400 } else { 401 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 402 break; 403 /* retry the same chain */ 404 } 405 cpu_pause(); 406 } 407 } 408 409 /* 410 * Unhold a held and probably not-locked chain, ensure that the data is 411 * dropped on the 1->0 transition of lockcnt by obtaining an exclusive 412 * lock and then simply unlocking the chain. 413 */ 414 void 415 hammer2_chain_unhold(hammer2_chain_t *chain) 416 { 417 u_int lockcnt; 418 int iter = 0; 419 420 for (;;) { 421 lockcnt = chain->lockcnt; 422 cpu_ccfence(); 423 if (lockcnt > 1) { 424 if (atomic_cmpset_int(&chain->lockcnt, 425 lockcnt, lockcnt - 1)) { 426 break; 427 } 428 } else if (hammer2_mtx_ex_try(&chain->lock) == 0) { 429 hammer2_chain_unlock(chain); 430 break; 431 } else { 432 /* 433 * This situation can easily occur on SMP due to 434 * the gap inbetween the 1->0 transition and the 435 * final unlock. We cannot safely block on the 436 * mutex because lockcnt might go above 1. 437 * 438 * XXX Sleep for one tick if it takes too long. 439 */ 440 if (++iter > 1000) { 441 if (iter > 1000 + hz) { 442 kprintf("hammer2: h2race1 %p\n", chain); 443 iter = 1000; 444 } 445 tsleep(&iter, 0, "h2race1", 1); 446 } 447 cpu_pause(); 448 } 449 } 450 } 451 452 void 453 hammer2_chain_drop_unhold(hammer2_chain_t *chain) 454 { 455 hammer2_chain_unhold(chain); 456 hammer2_chain_drop(chain); 457 } 458 459 void 460 hammer2_chain_rehold(hammer2_chain_t *chain) 461 { 462 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED); 463 atomic_add_int(&chain->lockcnt, 1); 464 hammer2_chain_unlock(chain); 465 } 466 467 /* 468 * Handles the (potential) last drop of chain->refs from 1->0. Called with 469 * the mutex exclusively locked, refs == 1, and lockcnt 0. SMP races are 470 * possible against refs and lockcnt. We must dispose of the mutex on chain. 471 * 472 * This function returns an unlocked chain for recursive drop or NULL. It 473 * can return the same chain if it determines it has raced another ref. 474 * 475 * -- 476 * 477 * When two chains need to be recursively dropped we use the chain we 478 * would otherwise free to placehold the additional chain. It's a bit 479 * convoluted but we can't just recurse without potentially blowing out 480 * the kernel stack. 481 * 482 * The chain cannot be freed if it has any children. 483 * The chain cannot be freed if flagged MODIFIED unless we can dispose of it. 484 * The chain cannot be freed if flagged UPDATE unless we can dispose of it. 485 * Any dedup registration can remain intact. 486 * 487 * The core spinlock is allowed to nest child-to-parent (not parent-to-child). 488 */ 489 static 490 hammer2_chain_t * 491 hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth) 492 { 493 hammer2_pfs_t *pmp; 494 hammer2_dev_t *hmp; 495 hammer2_chain_t *parent; 496 hammer2_chain_t *rdrop; 497 498 /* 499 * We need chain's spinlock to interlock the sub-tree test. 500 * We already have chain's mutex, protecting chain->parent. 501 * 502 * Remember that chain->refs can be in flux. 503 */ 504 hammer2_spin_ex(&chain->core.spin); 505 506 if (chain->parent != NULL) { 507 /* 508 * If the chain has a parent the UPDATE bit prevents scrapping 509 * as the chain is needed to properly flush the parent. Try 510 * to complete the 1->0 transition and return NULL. Retry 511 * (return chain) if we are unable to complete the 1->0 512 * transition, else return NULL (nothing more to do). 513 * 514 * If the chain has a parent the MODIFIED bit prevents 515 * scrapping. 516 * 517 * Chains with UPDATE/MODIFIED are *not* put on the LRU list! 518 */ 519 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 520 HAMMER2_CHAIN_MODIFIED)) { 521 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 522 hammer2_spin_unex(&chain->core.spin); 523 hammer2_chain_assert_no_data(chain); 524 hammer2_mtx_unlock(&chain->lock); 525 chain = NULL; 526 } else { 527 hammer2_spin_unex(&chain->core.spin); 528 hammer2_mtx_unlock(&chain->lock); 529 } 530 return (chain); 531 } 532 /* spinlock still held */ 533 } else if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME || 534 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP) { 535 /* 536 * Retain the static vchain and fchain. Clear bits that 537 * are not relevant. Do not clear the MODIFIED bit, 538 * and certainly do not put it on the delayed-flush queue. 539 */ 540 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 541 } else { 542 /* 543 * The chain has no parent and can be flagged for destruction. 544 * Since it has no parent, UPDATE can also be cleared. 545 */ 546 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 547 if (chain->flags & HAMMER2_CHAIN_UPDATE) 548 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 549 550 /* 551 * If the chain has children we must propagate the DESTROY 552 * flag downward and rip the disconnected topology apart. 553 * This is accomplished by calling hammer2_flush() on the 554 * chain. 555 * 556 * Any dedup is already handled by the underlying DIO, so 557 * we do not have to specifically flush it here. 558 */ 559 if (chain->core.chain_count) { 560 hammer2_spin_unex(&chain->core.spin); 561 hammer2_flush(chain, HAMMER2_FLUSH_TOP | 562 HAMMER2_FLUSH_ALL); 563 hammer2_mtx_unlock(&chain->lock); 564 565 return(chain); /* retry drop */ 566 } 567 568 /* 569 * Otherwise we can scrap the MODIFIED bit if it is set, 570 * and continue along the freeing path. 571 * 572 * Be sure to clean-out any dedup bits. Without a parent 573 * this chain will no longer be visible to the flush code. 574 * Easy check data_off to avoid the volume root. 575 */ 576 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 577 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 578 atomic_add_long(&hammer2_count_modified_chains, -1); 579 if (chain->pmp) 580 hammer2_pfs_memory_wakeup(chain->pmp, -1); 581 } 582 /* spinlock still held */ 583 } 584 585 /* spinlock still held */ 586 587 /* 588 * If any children exist we must leave the chain intact with refs == 0. 589 * They exist because chains are retained below us which have refs or 590 * may require flushing. 591 * 592 * Retry (return chain) if we fail to transition the refs to 0, else 593 * return NULL indication nothing more to do. 594 * 595 * Chains with children are NOT put on the LRU list. 596 */ 597 if (chain->core.chain_count) { 598 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 599 hammer2_spin_unex(&chain->core.spin); 600 hammer2_chain_assert_no_data(chain); 601 hammer2_mtx_unlock(&chain->lock); 602 chain = NULL; 603 } else { 604 hammer2_spin_unex(&chain->core.spin); 605 hammer2_mtx_unlock(&chain->lock); 606 } 607 return (chain); 608 } 609 /* spinlock still held */ 610 /* no chains left under us */ 611 612 /* 613 * chain->core has no children left so no accessors can get to our 614 * chain from there. Now we have to lock the parent core to interlock 615 * remaining possible accessors that might bump chain's refs before 616 * we can safely drop chain's refs with intent to free the chain. 617 */ 618 hmp = chain->hmp; 619 pmp = chain->pmp; /* can be NULL */ 620 rdrop = NULL; 621 622 parent = chain->parent; 623 624 /* 625 * WARNING! chain's spin lock is still held here, and other spinlocks 626 * will be acquired and released in the code below. We 627 * cannot be making fancy procedure calls! 628 */ 629 630 /* 631 * We can cache the chain if it is associated with a pmp 632 * and not flagged as being destroyed or requesting a full 633 * release. In this situation the chain is not removed 634 * from its parent, i.e. it can still be looked up. 635 * 636 * We intentionally do not cache DATA chains because these 637 * were likely used to load data into the logical buffer cache 638 * and will not be accessed again for some time. 639 */ 640 if ((chain->flags & 641 (HAMMER2_CHAIN_DESTROY | HAMMER2_CHAIN_RELEASE)) == 0 && 642 chain->pmp && 643 chain->bref.type != HAMMER2_BREF_TYPE_DATA) { 644 if (parent) 645 hammer2_spin_ex(&parent->core.spin); 646 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 647 /* 648 * 1->0 transition failed, retry. Do not drop 649 * the chain's data yet! 650 */ 651 if (parent) 652 hammer2_spin_unex(&parent->core.spin); 653 hammer2_spin_unex(&chain->core.spin); 654 hammer2_mtx_unlock(&chain->lock); 655 656 return(chain); 657 } 658 659 /* 660 * Success 661 */ 662 hammer2_chain_assert_no_data(chain); 663 664 /* 665 * Make sure we are on the LRU list, clean up excessive 666 * LRU entries. We can only really drop one but there might 667 * be other entries that we can remove from the lru_list 668 * without dropping. 669 * 670 * NOTE: HAMMER2_CHAIN_ONLRU may only be safely set when 671 * chain->core.spin AND pmp->lru_spin are held, but 672 * can be safely cleared only holding pmp->lru_spin. 673 */ 674 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 675 hammer2_spin_ex(&pmp->lru_spin); 676 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 677 atomic_set_int(&chain->flags, 678 HAMMER2_CHAIN_ONLRU); 679 TAILQ_INSERT_TAIL(&pmp->lru_list, 680 chain, lru_node); 681 atomic_add_int(&pmp->lru_count, 1); 682 } 683 if (pmp->lru_count < HAMMER2_LRU_LIMIT) 684 depth = 1; /* disable lru_list flush */ 685 hammer2_spin_unex(&pmp->lru_spin); 686 } else { 687 /* disable lru flush */ 688 depth = 1; 689 } 690 691 if (parent) { 692 hammer2_spin_unex(&parent->core.spin); 693 parent = NULL; /* safety */ 694 } 695 hammer2_spin_unex(&chain->core.spin); 696 hammer2_mtx_unlock(&chain->lock); 697 698 /* 699 * lru_list hysteresis (see above for depth overrides). 700 * Note that depth also prevents excessive lastdrop recursion. 701 */ 702 if (depth == 0) 703 hammer2_chain_lru_flush(pmp); 704 705 return NULL; 706 /* NOT REACHED */ 707 } 708 709 /* 710 * Make sure we are not on the LRU list. 711 */ 712 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 713 hammer2_spin_ex(&pmp->lru_spin); 714 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 715 atomic_add_int(&pmp->lru_count, -1); 716 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 717 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 718 } 719 hammer2_spin_unex(&pmp->lru_spin); 720 } 721 722 /* 723 * Spinlock the parent and try to drop the last ref on chain. 724 * On success determine if we should dispose of the chain 725 * (remove the chain from its parent, etc). 726 * 727 * (normal core locks are top-down recursive but we define 728 * core spinlocks as bottom-up recursive, so this is safe). 729 */ 730 if (parent) { 731 hammer2_spin_ex(&parent->core.spin); 732 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 733 /* 734 * 1->0 transition failed, retry. 735 */ 736 hammer2_spin_unex(&parent->core.spin); 737 hammer2_spin_unex(&chain->core.spin); 738 hammer2_mtx_unlock(&chain->lock); 739 740 return(chain); 741 } 742 743 /* 744 * 1->0 transition successful, parent spin held to prevent 745 * new lookups, chain spinlock held to protect parent field. 746 * Remove chain from the parent. 747 * 748 * If the chain is being removed from the parent's btree but 749 * is not bmapped, we have to adjust live_count downward. If 750 * it is bmapped then the blockref is retained in the parent 751 * as is its associated live_count. This case can occur when 752 * a chain added to the topology is unable to flush and is 753 * then later deleted. 754 */ 755 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 756 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) && 757 (chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) { 758 atomic_add_int(&parent->core.live_count, -1); 759 } 760 RB_REMOVE(hammer2_chain_tree, 761 &parent->core.rbtree, chain); 762 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 763 --parent->core.chain_count; 764 chain->parent = NULL; 765 } 766 767 /* 768 * If our chain was the last chain in the parent's core the 769 * core is now empty and its parent might have to be 770 * re-dropped if it has 0 refs. 771 */ 772 if (parent->core.chain_count == 0) { 773 rdrop = parent; 774 atomic_add_int(&rdrop->refs, 1); 775 /* 776 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 777 rdrop = NULL; 778 */ 779 } 780 hammer2_spin_unex(&parent->core.spin); 781 parent = NULL; /* safety */ 782 /* FALL THROUGH */ 783 } else { 784 /* 785 * No-parent case. 786 */ 787 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 788 /* 789 * 1->0 transition failed, retry. 790 */ 791 hammer2_spin_unex(&parent->core.spin); 792 hammer2_spin_unex(&chain->core.spin); 793 hammer2_mtx_unlock(&chain->lock); 794 795 return(chain); 796 } 797 } 798 799 /* 800 * Successful 1->0 transition, no parent, no children... no way for 801 * anyone to ref this chain any more. We can clean-up and free it. 802 * 803 * We still have the core spinlock, and core's chain_count is 0. 804 * Any parent spinlock is gone. 805 */ 806 hammer2_spin_unex(&chain->core.spin); 807 hammer2_chain_assert_no_data(chain); 808 hammer2_mtx_unlock(&chain->lock); 809 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 810 chain->core.chain_count == 0); 811 812 /* 813 * All locks are gone, no pointers remain to the chain, finish 814 * freeing it. 815 */ 816 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 817 HAMMER2_CHAIN_MODIFIED)) == 0); 818 819 /* 820 * Once chain resources are gone we can use the now dead chain 821 * structure to placehold what might otherwise require a recursive 822 * drop, because we have potentially two things to drop and can only 823 * return one directly. 824 */ 825 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 826 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED); 827 chain->hmp = NULL; 828 kfree_obj(chain, hmp->mchain); 829 } 830 831 /* 832 * Possible chaining loop when parent re-drop needed. 833 */ 834 return(rdrop); 835 } 836 837 /* 838 * Heuristical flush of the LRU, try to reduce the number of entries 839 * on the LRU to (HAMMER2_LRU_LIMIT * 2 / 3). This procedure is called 840 * only when lru_count exceeds HAMMER2_LRU_LIMIT. 841 */ 842 static 843 void 844 hammer2_chain_lru_flush(hammer2_pfs_t *pmp) 845 { 846 hammer2_chain_t *chain; 847 848 again: 849 chain = NULL; 850 hammer2_spin_ex(&pmp->lru_spin); 851 while (pmp->lru_count > HAMMER2_LRU_LIMIT * 2 / 3) { 852 /* 853 * Pick a chain off the lru_list, just recycle it quickly 854 * if LRUHINT is set (the chain was ref'd but left on 855 * the lru_list, so cycle to the end). 856 */ 857 chain = TAILQ_FIRST(&pmp->lru_list); 858 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 859 860 if (chain->flags & HAMMER2_CHAIN_LRUHINT) { 861 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 862 TAILQ_INSERT_TAIL(&pmp->lru_list, chain, lru_node); 863 chain = NULL; 864 continue; 865 } 866 867 /* 868 * Ok, we are off the LRU. We must adjust refs before we 869 * can safely clear the ONLRU flag. 870 */ 871 atomic_add_int(&pmp->lru_count, -1); 872 if (atomic_cmpset_int(&chain->refs, 0, 1)) { 873 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 874 atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE); 875 break; 876 } 877 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 878 chain = NULL; 879 } 880 hammer2_spin_unex(&pmp->lru_spin); 881 if (chain == NULL) 882 return; 883 884 /* 885 * If we picked a chain off the lru list we may be able to lastdrop 886 * it. Use a depth of 1 to prevent excessive lastdrop recursion. 887 */ 888 while (chain) { 889 u_int refs; 890 891 refs = chain->refs; 892 cpu_ccfence(); 893 KKASSERT(refs > 0); 894 895 if (refs == 1) { 896 if (hammer2_mtx_ex_try(&chain->lock) == 0) 897 chain = hammer2_chain_lastdrop(chain, 1); 898 /* retry the same chain, or chain from lastdrop */ 899 } else { 900 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 901 break; 902 /* retry the same chain */ 903 } 904 cpu_pause(); 905 } 906 goto again; 907 } 908 909 /* 910 * On last lock release. 911 */ 912 static hammer2_io_t * 913 hammer2_chain_drop_data(hammer2_chain_t *chain) 914 { 915 hammer2_io_t *dio; 916 917 if ((dio = chain->dio) != NULL) { 918 chain->dio = NULL; 919 chain->data = NULL; 920 } else { 921 switch(chain->bref.type) { 922 case HAMMER2_BREF_TYPE_VOLUME: 923 case HAMMER2_BREF_TYPE_FREEMAP: 924 break; 925 default: 926 if (chain->data != NULL) { 927 hammer2_spin_unex(&chain->core.spin); 928 panic("chain data not null: " 929 "chain %p bref %016jx.%02x " 930 "refs %d parent %p dio %p data %p", 931 chain, chain->bref.data_off, 932 chain->bref.type, chain->refs, 933 chain->parent, 934 chain->dio, chain->data); 935 } 936 KKASSERT(chain->data == NULL); 937 break; 938 } 939 } 940 return dio; 941 } 942 943 /* 944 * Lock a referenced chain element, acquiring its data with I/O if necessary, 945 * and specify how you would like the data to be resolved. 946 * 947 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 948 * 949 * The lock is allowed to recurse, multiple locking ops will aggregate 950 * the requested resolve types. Once data is assigned it will not be 951 * removed until the last unlock. 952 * 953 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 954 * (typically used to avoid device/logical buffer 955 * aliasing for data) 956 * 957 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 958 * the INITIAL-create state (indirect blocks only). 959 * 960 * Do not resolve data elements for DATA chains. 961 * (typically used to avoid device/logical buffer 962 * aliasing for data) 963 * 964 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 965 * 966 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 967 * it will be locked exclusive. 968 * 969 * HAMMER2_RESOLVE_NONBLOCK- (flag) The chain is locked non-blocking. If 970 * the lock fails, EAGAIN is returned. 971 * 972 * NOTE: Embedded elements (volume header, inodes) are always resolved 973 * regardless. 974 * 975 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 976 * element will instantiate and zero its buffer, and flush it on 977 * release. 978 * 979 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 980 * so as not to instantiate a device buffer, which could alias against 981 * a logical file buffer. However, if ALWAYS is specified the 982 * device buffer will be instantiated anyway. 983 * 984 * NOTE: The return value is always 0 unless NONBLOCK is specified, in which 985 * case it can be either 0 or EAGAIN. 986 * 987 * WARNING! This function blocks on I/O if data needs to be fetched. This 988 * blocking can run concurrent with other compatible lock holders 989 * who do not need data returning. The lock is not upgraded to 990 * exclusive during a data fetch, a separate bit is used to 991 * interlock I/O. However, an exclusive lock holder can still count 992 * on being interlocked against an I/O fetch managed by a shared 993 * lock holder. 994 */ 995 int 996 hammer2_chain_lock(hammer2_chain_t *chain, int how) 997 { 998 KKASSERT(chain->refs > 0); 999 1000 if (how & HAMMER2_RESOLVE_NONBLOCK) { 1001 /* 1002 * We still have to bump lockcnt before acquiring the lock, 1003 * even for non-blocking operation, because the unlock code 1004 * live-loops on lockcnt == 1 when dropping the last lock. 1005 * 1006 * If the non-blocking operation fails we have to use an 1007 * unhold sequence to undo the mess. 1008 * 1009 * NOTE: LOCKAGAIN must always succeed without blocking, 1010 * even if NONBLOCK is specified. 1011 */ 1012 atomic_add_int(&chain->lockcnt, 1); 1013 if (how & HAMMER2_RESOLVE_SHARED) { 1014 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1015 hammer2_mtx_sh_again(&chain->lock); 1016 } else { 1017 if (hammer2_mtx_sh_try(&chain->lock) != 0) { 1018 hammer2_chain_unhold(chain); 1019 return EAGAIN; 1020 } 1021 } 1022 } else { 1023 if (hammer2_mtx_ex_try(&chain->lock) != 0) { 1024 hammer2_chain_unhold(chain); 1025 return EAGAIN; 1026 } 1027 } 1028 } else { 1029 /* 1030 * Get the appropriate lock. If LOCKAGAIN is flagged with 1031 * SHARED the caller expects a shared lock to already be 1032 * present and we are giving it another ref. This case must 1033 * importantly not block if there is a pending exclusive lock 1034 * request. 1035 */ 1036 atomic_add_int(&chain->lockcnt, 1); 1037 if (how & HAMMER2_RESOLVE_SHARED) { 1038 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1039 hammer2_mtx_sh_again(&chain->lock); 1040 } else { 1041 hammer2_mtx_sh(&chain->lock); 1042 } 1043 } else { 1044 hammer2_mtx_ex(&chain->lock); 1045 } 1046 } 1047 1048 /* 1049 * If we already have a valid data pointer make sure the data is 1050 * synchronized to the current cpu, and then no further action is 1051 * necessary. 1052 */ 1053 if (chain->data) { 1054 if (chain->dio) 1055 hammer2_io_bkvasync(chain->dio); 1056 return 0; 1057 } 1058 1059 /* 1060 * Do we have to resolve the data? This is generally only 1061 * applicable to HAMMER2_BREF_TYPE_DATA which is special-cased. 1062 * Other BREF types expects the data to be there. 1063 */ 1064 switch(how & HAMMER2_RESOLVE_MASK) { 1065 case HAMMER2_RESOLVE_NEVER: 1066 return 0; 1067 case HAMMER2_RESOLVE_MAYBE: 1068 if (chain->flags & HAMMER2_CHAIN_INITIAL) 1069 return 0; 1070 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 1071 return 0; 1072 #if 0 1073 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 1074 return 0; 1075 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 1076 return 0; 1077 #endif 1078 /* fall through */ 1079 case HAMMER2_RESOLVE_ALWAYS: 1080 default: 1081 break; 1082 } 1083 1084 /* 1085 * Caller requires data 1086 */ 1087 hammer2_chain_load_data(chain); 1088 1089 return 0; 1090 } 1091 1092 /* 1093 * Lock the chain, retain the hold, and drop the data persistence count. 1094 * The data should remain valid because we never transitioned lockcnt 1095 * through 0. 1096 */ 1097 void 1098 hammer2_chain_lock_unhold(hammer2_chain_t *chain, int how) 1099 { 1100 hammer2_chain_lock(chain, how); 1101 atomic_add_int(&chain->lockcnt, -1); 1102 } 1103 1104 #if 0 1105 /* 1106 * Downgrade an exclusive chain lock to a shared chain lock. 1107 * 1108 * NOTE: There is no upgrade equivalent due to the ease of 1109 * deadlocks in that direction. 1110 */ 1111 void 1112 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 1113 { 1114 hammer2_mtx_downgrade(&chain->lock); 1115 } 1116 #endif 1117 1118 /* 1119 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 1120 * may be of any type. 1121 * 1122 * Once chain->data is set it cannot be disposed of until all locks are 1123 * released. 1124 * 1125 * Make sure the data is synchronized to the current cpu. 1126 */ 1127 void 1128 hammer2_chain_load_data(hammer2_chain_t *chain) 1129 { 1130 hammer2_blockref_t *bref; 1131 hammer2_dev_t *hmp; 1132 hammer2_io_t *dio; 1133 char *bdata; 1134 int error; 1135 1136 /* 1137 * Degenerate case, data already present, or chain has no media 1138 * reference to load. 1139 */ 1140 KKASSERT(chain->lock.mtx_lock & MTX_MASK); 1141 if (chain->data) { 1142 if (chain->dio) 1143 hammer2_io_bkvasync(chain->dio); 1144 return; 1145 } 1146 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 1147 return; 1148 1149 hmp = chain->hmp; 1150 KKASSERT(hmp != NULL); 1151 1152 /* 1153 * Gain the IOINPROG bit, interlocked block. 1154 */ 1155 for (;;) { 1156 u_int oflags; 1157 u_int nflags; 1158 1159 oflags = chain->flags; 1160 cpu_ccfence(); 1161 if (oflags & HAMMER2_CHAIN_IOINPROG) { 1162 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 1163 tsleep_interlock(&chain->flags, 0); 1164 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1165 tsleep(&chain->flags, PINTERLOCKED, 1166 "h2iocw", 0); 1167 } 1168 /* retry */ 1169 } else { 1170 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 1171 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1172 break; 1173 } 1174 /* retry */ 1175 } 1176 } 1177 1178 /* 1179 * We own CHAIN_IOINPROG 1180 * 1181 * Degenerate case if we raced another load. 1182 */ 1183 if (chain->data) { 1184 if (chain->dio) 1185 hammer2_io_bkvasync(chain->dio); 1186 goto done; 1187 } 1188 1189 /* 1190 * We must resolve to a device buffer, either by issuing I/O or 1191 * by creating a zero-fill element. We do not mark the buffer 1192 * dirty when creating a zero-fill element (the hammer2_chain_modify() 1193 * API must still be used to do that). 1194 * 1195 * The device buffer is variable-sized in powers of 2 down 1196 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 1197 * chunk always contains buffers of the same size. (XXX) 1198 * 1199 * The minimum physical IO size may be larger than the variable 1200 * block size. 1201 */ 1202 bref = &chain->bref; 1203 1204 /* 1205 * The getblk() optimization can only be used on newly created 1206 * elements if the physical block size matches the request. 1207 */ 1208 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1209 error = hammer2_io_new(hmp, bref->type, 1210 bref->data_off, chain->bytes, 1211 &chain->dio); 1212 } else { 1213 error = hammer2_io_bread(hmp, bref->type, 1214 bref->data_off, chain->bytes, 1215 &chain->dio); 1216 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 1217 } 1218 if (error) { 1219 chain->error = HAMMER2_ERROR_EIO; 1220 kprintf("hammer2_chain_load_data: I/O error %016jx: %d\n", 1221 (intmax_t)bref->data_off, error); 1222 hammer2_io_bqrelse(&chain->dio); 1223 goto done; 1224 } 1225 chain->error = 0; 1226 1227 /* 1228 * This isn't perfect and can be ignored on OSs which do not have 1229 * an indication as to whether a buffer is coming from cache or 1230 * if I/O was actually issued for the read. TESTEDGOOD will work 1231 * pretty well without the B_IOISSUED logic because chains are 1232 * cached, but in that situation (without B_IOISSUED) it will not 1233 * detect whether a re-read via I/O is corrupted verses the original 1234 * read. 1235 * 1236 * We can't re-run the CRC on every fresh lock. That would be 1237 * insanely expensive. 1238 * 1239 * If the underlying kernel buffer covers the entire chain we can 1240 * use the B_IOISSUED indication to determine if we have to re-run 1241 * the CRC on chain data for chains that managed to stay cached 1242 * across the kernel disposal of the original buffer. 1243 */ 1244 if ((dio = chain->dio) != NULL && dio->bp) { 1245 struct buf *bp = dio->bp; 1246 1247 if (dio->psize == chain->bytes && 1248 (bp->b_flags & B_IOISSUED)) { 1249 atomic_clear_int(&chain->flags, 1250 HAMMER2_CHAIN_TESTEDGOOD); 1251 bp->b_flags &= ~B_IOISSUED; 1252 } 1253 } 1254 1255 /* 1256 * NOTE: A locked chain's data cannot be modified without first 1257 * calling hammer2_chain_modify(). 1258 */ 1259 1260 /* 1261 * NOTE: hammer2_io_data() call issues bkvasync() 1262 */ 1263 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 1264 1265 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1266 /* 1267 * Clear INITIAL. In this case we used io_new() and the 1268 * buffer has been zero'd and marked dirty. 1269 * 1270 * CHAIN_MODIFIED has not been set yet, and we leave it 1271 * that way for now. Set a temporary CHAIN_NOTTESTED flag 1272 * to prevent hammer2_chain_testcheck() from trying to match 1273 * a check code that has not yet been generated. This bit 1274 * should NOT end up on the actual media. 1275 */ 1276 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1277 atomic_set_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 1278 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 1279 /* 1280 * check data not currently synchronized due to 1281 * modification. XXX assumes data stays in the buffer 1282 * cache, which might not be true (need biodep on flush 1283 * to calculate crc? or simple crc?). 1284 */ 1285 } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) { 1286 if (hammer2_chain_testcheck(chain, bdata) == 0) { 1287 chain->error = HAMMER2_ERROR_CHECK; 1288 } else { 1289 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 1290 } 1291 } 1292 1293 /* 1294 * Setup the data pointer, either pointing it to an embedded data 1295 * structure and copying the data from the buffer, or pointing it 1296 * into the buffer. 1297 * 1298 * The buffer is not retained when copying to an embedded data 1299 * structure in order to avoid potential deadlocks or recursions 1300 * on the same physical buffer. 1301 * 1302 * WARNING! Other threads can start using the data the instant we 1303 * set chain->data non-NULL. 1304 */ 1305 switch (bref->type) { 1306 case HAMMER2_BREF_TYPE_VOLUME: 1307 case HAMMER2_BREF_TYPE_FREEMAP: 1308 /* 1309 * Copy data from bp to embedded buffer 1310 */ 1311 panic("hammer2_chain_load_data: unresolved volume header"); 1312 break; 1313 case HAMMER2_BREF_TYPE_DIRENT: 1314 KKASSERT(chain->bytes != 0); 1315 /* fall through */ 1316 case HAMMER2_BREF_TYPE_INODE: 1317 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1318 case HAMMER2_BREF_TYPE_INDIRECT: 1319 case HAMMER2_BREF_TYPE_DATA: 1320 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1321 default: 1322 /* 1323 * Point data at the device buffer and leave dio intact. 1324 */ 1325 chain->data = (void *)bdata; 1326 break; 1327 } 1328 1329 /* 1330 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 1331 */ 1332 done: 1333 for (;;) { 1334 u_int oflags; 1335 u_int nflags; 1336 1337 oflags = chain->flags; 1338 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 1339 HAMMER2_CHAIN_IOSIGNAL); 1340 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 1341 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1342 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 1343 wakeup(&chain->flags); 1344 break; 1345 } 1346 } 1347 } 1348 1349 /* 1350 * Unlock and deref a chain element. 1351 * 1352 * Remember that the presence of children under chain prevent the chain's 1353 * destruction but do not add additional references, so the dio will still 1354 * be dropped. 1355 */ 1356 void 1357 hammer2_chain_unlock(hammer2_chain_t *chain) 1358 { 1359 hammer2_io_t *dio; 1360 u_int lockcnt; 1361 int iter = 0; 1362 1363 /* 1364 * If multiple locks are present (or being attempted) on this 1365 * particular chain we can just unlock, drop refs, and return. 1366 * 1367 * Otherwise fall-through on the 1->0 transition. 1368 */ 1369 for (;;) { 1370 lockcnt = chain->lockcnt; 1371 KKASSERT(lockcnt > 0); 1372 cpu_ccfence(); 1373 if (lockcnt > 1) { 1374 if (atomic_cmpset_int(&chain->lockcnt, 1375 lockcnt, lockcnt - 1)) { 1376 hammer2_mtx_unlock(&chain->lock); 1377 return; 1378 } 1379 } else if (hammer2_mtx_upgrade_try(&chain->lock) == 0) { 1380 /* while holding the mutex exclusively */ 1381 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1382 break; 1383 } else { 1384 /* 1385 * This situation can easily occur on SMP due to 1386 * the gap inbetween the 1->0 transition and the 1387 * final unlock. We cannot safely block on the 1388 * mutex because lockcnt might go above 1. 1389 * 1390 * XXX Sleep for one tick if it takes too long. 1391 */ 1392 if (++iter > 1000) { 1393 if (iter > 1000 + hz) { 1394 kprintf("hammer2: h2race2 %p\n", chain); 1395 iter = 1000; 1396 } 1397 tsleep(&iter, 0, "h2race2", 1); 1398 } 1399 cpu_pause(); 1400 } 1401 /* retry */ 1402 } 1403 1404 /* 1405 * Last unlock / mutex upgraded to exclusive. Drop the data 1406 * reference. 1407 */ 1408 dio = hammer2_chain_drop_data(chain); 1409 if (dio) 1410 hammer2_io_bqrelse(&dio); 1411 hammer2_mtx_unlock(&chain->lock); 1412 } 1413 1414 /* 1415 * Unlock and hold chain data intact 1416 */ 1417 void 1418 hammer2_chain_unlock_hold(hammer2_chain_t *chain) 1419 { 1420 atomic_add_int(&chain->lockcnt, 1); 1421 hammer2_chain_unlock(chain); 1422 } 1423 1424 /* 1425 * Helper to obtain the blockref[] array base and count for a chain. 1426 * 1427 * XXX Not widely used yet, various use cases need to be validated and 1428 * converted to use this function. 1429 */ 1430 static 1431 hammer2_blockref_t * 1432 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1433 { 1434 hammer2_blockref_t *base; 1435 int count; 1436 1437 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1438 base = NULL; 1439 1440 switch(parent->bref.type) { 1441 case HAMMER2_BREF_TYPE_INODE: 1442 count = HAMMER2_SET_COUNT; 1443 break; 1444 case HAMMER2_BREF_TYPE_INDIRECT: 1445 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1446 count = parent->bytes / sizeof(hammer2_blockref_t); 1447 break; 1448 case HAMMER2_BREF_TYPE_VOLUME: 1449 count = HAMMER2_SET_COUNT; 1450 break; 1451 case HAMMER2_BREF_TYPE_FREEMAP: 1452 count = HAMMER2_SET_COUNT; 1453 break; 1454 default: 1455 panic("hammer2_chain_base_and_count: " 1456 "unrecognized blockref type: %d", 1457 parent->bref.type); 1458 count = 0; 1459 break; 1460 } 1461 } else { 1462 switch(parent->bref.type) { 1463 case HAMMER2_BREF_TYPE_INODE: 1464 base = &parent->data->ipdata.u.blockset.blockref[0]; 1465 count = HAMMER2_SET_COUNT; 1466 break; 1467 case HAMMER2_BREF_TYPE_INDIRECT: 1468 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1469 base = &parent->data->npdata[0]; 1470 count = parent->bytes / sizeof(hammer2_blockref_t); 1471 break; 1472 case HAMMER2_BREF_TYPE_VOLUME: 1473 base = &parent->data->voldata. 1474 sroot_blockset.blockref[0]; 1475 count = HAMMER2_SET_COUNT; 1476 break; 1477 case HAMMER2_BREF_TYPE_FREEMAP: 1478 base = &parent->data->blkset.blockref[0]; 1479 count = HAMMER2_SET_COUNT; 1480 break; 1481 default: 1482 panic("hammer2_chain_base_and_count: " 1483 "unrecognized blockref type: %d", 1484 parent->bref.type); 1485 base = NULL; 1486 count = 0; 1487 break; 1488 } 1489 } 1490 *countp = count; 1491 1492 return base; 1493 } 1494 1495 /* 1496 * This counts the number of live blockrefs in a block array and 1497 * also calculates the point at which all remaining blockrefs are empty. 1498 * This routine can only be called on a live chain. 1499 * 1500 * Caller holds the chain locked, but possibly with a shared lock. We 1501 * must use an exclusive spinlock to prevent corruption. 1502 * 1503 * NOTE: Flag is not set until after the count is complete, allowing 1504 * callers to test the flag without holding the spinlock. 1505 * 1506 * NOTE: If base is NULL the related chain is still in the INITIAL 1507 * state and there are no blockrefs to count. 1508 * 1509 * NOTE: live_count may already have some counts accumulated due to 1510 * creation and deletion and could even be initially negative. 1511 */ 1512 void 1513 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1514 hammer2_blockref_t *base, int count) 1515 { 1516 hammer2_spin_ex(&chain->core.spin); 1517 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1518 if (base) { 1519 while (--count >= 0) { 1520 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1521 break; 1522 } 1523 chain->core.live_zero = count + 1; 1524 while (count >= 0) { 1525 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1526 atomic_add_int(&chain->core.live_count, 1527 1); 1528 --count; 1529 } 1530 } else { 1531 chain->core.live_zero = 0; 1532 } 1533 /* else do not modify live_count */ 1534 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1535 } 1536 hammer2_spin_unex(&chain->core.spin); 1537 } 1538 1539 /* 1540 * Resize the chain's physical storage allocation in-place. This function does 1541 * not usually adjust the data pointer and must be followed by (typically) a 1542 * hammer2_chain_modify() call to copy any old data over and adjust the 1543 * data pointer. 1544 * 1545 * Chains can be resized smaller without reallocating the storage. Resizing 1546 * larger will reallocate the storage. Excess or prior storage is reclaimed 1547 * asynchronously at a later time. 1548 * 1549 * An nradix value of 0 is special-cased to mean that the storage should 1550 * be disassociated, that is the chain is being resized to 0 bytes (not 1 1551 * byte). 1552 * 1553 * Must be passed an exclusively locked parent and chain. 1554 * 1555 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1556 * to avoid instantiating a device buffer that conflicts with the vnode data 1557 * buffer. However, because H2 can compress or encrypt data, the chain may 1558 * have a dio assigned to it in those situations, and they do not conflict. 1559 * 1560 * XXX return error if cannot resize. 1561 */ 1562 int 1563 hammer2_chain_resize(hammer2_chain_t *chain, 1564 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1565 int nradix, int flags) 1566 { 1567 hammer2_dev_t *hmp; 1568 size_t obytes; 1569 size_t nbytes; 1570 int error; 1571 1572 hmp = chain->hmp; 1573 1574 /* 1575 * Only data and indirect blocks can be resized for now. 1576 * (The volu root, inodes, and freemap elements use a fixed size). 1577 */ 1578 KKASSERT(chain != &hmp->vchain); 1579 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1580 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1581 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1582 1583 /* 1584 * Nothing to do if the element is already the proper size 1585 */ 1586 obytes = chain->bytes; 1587 nbytes = (nradix) ? (1U << nradix) : 0; 1588 if (obytes == nbytes) 1589 return (chain->error); 1590 1591 /* 1592 * Make sure the old data is instantiated so we can copy it. If this 1593 * is a data block, the device data may be superfluous since the data 1594 * might be in a logical block, but compressed or encrypted data is 1595 * another matter. 1596 * 1597 * NOTE: The modify will set BMAPUPD for us if BMAPPED is set. 1598 */ 1599 error = hammer2_chain_modify(chain, mtid, dedup_off, 0); 1600 if (error) 1601 return error; 1602 1603 /* 1604 * Reallocate the block, even if making it smaller (because different 1605 * block sizes may be in different regions). 1606 * 1607 * NOTE: Operation does not copy the data and may only be used 1608 * to resize data blocks in-place, or directory entry blocks 1609 * which are about to be modified in some manner. 1610 */ 1611 error = hammer2_freemap_alloc(chain, nbytes); 1612 if (error) 1613 return error; 1614 1615 chain->bytes = nbytes; 1616 1617 /* 1618 * We don't want the followup chain_modify() to try to copy data 1619 * from the old (wrong-sized) buffer. It won't know how much to 1620 * copy. This case should only occur during writes when the 1621 * originator already has the data to write in-hand. 1622 */ 1623 if (chain->dio) { 1624 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1625 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1626 hammer2_io_brelse(&chain->dio); 1627 chain->data = NULL; 1628 } 1629 return (chain->error); 1630 } 1631 1632 /* 1633 * Set the chain modified so its data can be changed by the caller, or 1634 * install deduplicated data. The caller must call this routine for each 1635 * set of modifications it makes, even if the chain is already flagged 1636 * MODIFIED. 1637 * 1638 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1639 * is a CLC (cluster level change) field and is not updated by parent 1640 * propagation during a flush. 1641 * 1642 * Returns an appropriate HAMMER2_ERROR_* code, which will generally reflect 1643 * chain->error except for HAMMER2_ERROR_ENOSPC. If the allocation fails 1644 * due to no space available, HAMMER2_ERROR_ENOSPC is returned and the chain 1645 * remains unmodified with its old data ref intact and chain->error 1646 * unchanged. 1647 * 1648 * Dedup Handling 1649 * 1650 * If the DEDUPABLE flag is set in the chain the storage must be reallocated 1651 * even if the chain is still flagged MODIFIED. In this case the chain's 1652 * DEDUPABLE flag will be cleared once the new storage has been assigned. 1653 * 1654 * If the caller passes a non-zero dedup_off we will use it to assign the 1655 * new storage. The MODIFIED flag will be *CLEARED* in this case, and 1656 * DEDUPABLE will be set (NOTE: the UPDATE flag is always set). The caller 1657 * must not modify the data content upon return. 1658 */ 1659 int 1660 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1661 hammer2_off_t dedup_off, int flags) 1662 { 1663 hammer2_blockref_t obref; 1664 hammer2_dev_t *hmp; 1665 hammer2_io_t *dio; 1666 int error; 1667 int wasinitial; 1668 int setmodified; 1669 int setupdate; 1670 int newmod; 1671 char *bdata; 1672 1673 hmp = chain->hmp; 1674 obref = chain->bref; 1675 KKASSERT(chain->lock.mtx_lock & MTX_EXCLUSIVE); 1676 1677 /* 1678 * Data is not optional for freemap chains (we must always be sure 1679 * to copy the data on COW storage allocations). 1680 */ 1681 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1682 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1683 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1684 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1685 } 1686 1687 /* 1688 * Data must be resolved if already assigned, unless explicitly 1689 * flagged otherwise. If we cannot safety load the data the 1690 * modification fails and we return early. 1691 */ 1692 if (chain->data == NULL && chain->bytes != 0 && 1693 (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1694 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1695 hammer2_chain_load_data(chain); 1696 if (chain->error) 1697 return (chain->error); 1698 } 1699 error = 0; 1700 1701 /* 1702 * Set MODIFIED to indicate that the chain has been modified. A new 1703 * allocation is required when modifying a chain. 1704 * 1705 * Set UPDATE to ensure that the blockref is updated in the parent. 1706 * 1707 * If MODIFIED is already set determine if we can reuse the assigned 1708 * data block or if we need a new data block. 1709 */ 1710 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1711 /* 1712 * Must set modified bit. 1713 */ 1714 atomic_add_long(&hammer2_count_modified_chains, 1); 1715 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1716 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1717 setmodified = 1; 1718 1719 /* 1720 * We may be able to avoid a copy-on-write if the chain's 1721 * check mode is set to NONE and the chain's current 1722 * modify_tid is beyond the last explicit snapshot tid. 1723 * 1724 * This implements HAMMER2's overwrite-in-place feature. 1725 * 1726 * NOTE! This data-block cannot be used as a de-duplication 1727 * source when the check mode is set to NONE. 1728 */ 1729 if ((chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1730 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) && 1731 (chain->flags & HAMMER2_CHAIN_INITIAL) == 0 && 1732 (chain->flags & HAMMER2_CHAIN_DEDUPABLE) == 0 && 1733 HAMMER2_DEC_CHECK(chain->bref.methods) == 1734 HAMMER2_CHECK_NONE && 1735 chain->pmp && 1736 chain->bref.modify_tid > 1737 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1738 /* 1739 * Sector overwrite allowed. 1740 */ 1741 newmod = 0; 1742 } else if ((hmp->hflags & HMNT2_EMERG) && 1743 chain->pmp && 1744 chain->bref.modify_tid > 1745 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1746 /* 1747 * If in emergency delete mode then do a modify-in- 1748 * place on any chain type belonging to the PFS as 1749 * long as it doesn't mess up a snapshot. We might 1750 * be forced to do this anyway a little further down 1751 * in the code if the allocation fails. 1752 * 1753 * Also note that in emergency mode, these modify-in- 1754 * place operations are NOT SAFE. A storage failure, 1755 * power failure, or panic can corrupt the filesystem. 1756 */ 1757 newmod = 0; 1758 } else { 1759 /* 1760 * Sector overwrite not allowed, must copy-on-write. 1761 */ 1762 newmod = 1; 1763 } 1764 } else if (chain->flags & HAMMER2_CHAIN_DEDUPABLE) { 1765 /* 1766 * If the modified chain was registered for dedup we need 1767 * a new allocation. This only happens for delayed-flush 1768 * chains (i.e. which run through the front-end buffer 1769 * cache). 1770 */ 1771 newmod = 1; 1772 setmodified = 0; 1773 } else { 1774 /* 1775 * Already flagged modified, no new allocation is needed. 1776 */ 1777 newmod = 0; 1778 setmodified = 0; 1779 } 1780 1781 /* 1782 * Flag parent update required. 1783 */ 1784 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 1785 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1786 setupdate = 1; 1787 } else { 1788 setupdate = 0; 1789 } 1790 1791 /* 1792 * The XOP code returns held but unlocked focus chains. This 1793 * prevents the chain from being destroyed but does not prevent 1794 * it from being modified. diolk is used to interlock modifications 1795 * against XOP frontend accesses to the focus. 1796 * 1797 * This allows us to theoretically avoid deadlocking the frontend 1798 * if one of the backends lock up by not formally locking the 1799 * focused chain in the frontend. In addition, the synchronization 1800 * code relies on this mechanism to avoid deadlocking concurrent 1801 * synchronization threads. 1802 */ 1803 lockmgr(&chain->diolk, LK_EXCLUSIVE); 1804 1805 /* 1806 * The modification or re-modification requires an allocation and 1807 * possible COW. If an error occurs, the previous content and data 1808 * reference is retained and the modification fails. 1809 * 1810 * If dedup_off is non-zero, the caller is requesting a deduplication 1811 * rather than a modification. The MODIFIED bit is not set and the 1812 * data offset is set to the deduplication offset. The data cannot 1813 * be modified. 1814 * 1815 * NOTE: The dedup offset is allowed to be in a partially free state 1816 * and we must be sure to reset it to a fully allocated state 1817 * to force two bulkfree passes to free it again. 1818 * 1819 * NOTE: Only applicable when chain->bytes != 0. 1820 * 1821 * XXX can a chain already be marked MODIFIED without a data 1822 * assignment? If not, assert here instead of testing the case. 1823 */ 1824 if (chain != &hmp->vchain && chain != &hmp->fchain && 1825 chain->bytes) { 1826 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1827 newmod 1828 ) { 1829 /* 1830 * NOTE: We do not have to remove the dedup 1831 * registration because the area is still 1832 * allocated and the underlying DIO will 1833 * still be flushed. 1834 */ 1835 if (dedup_off) { 1836 chain->bref.data_off = dedup_off; 1837 if ((int)(dedup_off & HAMMER2_OFF_MASK_RADIX)) 1838 chain->bytes = 1 << 1839 (int)(dedup_off & 1840 HAMMER2_OFF_MASK_RADIX); 1841 else 1842 chain->bytes = 0; 1843 chain->error = 0; 1844 atomic_clear_int(&chain->flags, 1845 HAMMER2_CHAIN_MODIFIED); 1846 atomic_add_long(&hammer2_count_modified_chains, 1847 -1); 1848 if (chain->pmp) { 1849 hammer2_pfs_memory_wakeup( 1850 chain->pmp, -1); 1851 } 1852 hammer2_freemap_adjust(hmp, &chain->bref, 1853 HAMMER2_FREEMAP_DORECOVER); 1854 atomic_set_int(&chain->flags, 1855 HAMMER2_CHAIN_DEDUPABLE); 1856 } else { 1857 error = hammer2_freemap_alloc(chain, 1858 chain->bytes); 1859 atomic_clear_int(&chain->flags, 1860 HAMMER2_CHAIN_DEDUPABLE); 1861 1862 /* 1863 * If we are unable to allocate a new block 1864 * but we are in emergency mode, issue a 1865 * warning to the console and reuse the same 1866 * block. 1867 * 1868 * We behave as if the allocation were 1869 * successful. 1870 * 1871 * THIS IS IMPORTANT: These modifications 1872 * are virtually guaranteed to corrupt any 1873 * snapshots related to this filesystem. 1874 */ 1875 if (error && (hmp->hflags & HMNT2_EMERG)) { 1876 error = 0; 1877 chain->bref.flags |= 1878 HAMMER2_BREF_FLAG_EMERG_MIP; 1879 1880 krateprintf(&krate_h2em, 1881 "hammer2: Emergency Mode WARNING: " 1882 "Operation will likely corrupt " 1883 "related snapshot: " 1884 "%016jx.%02x key=%016jx\n", 1885 chain->bref.data_off, 1886 chain->bref.type, 1887 chain->bref.key); 1888 } else if (error == 0) { 1889 chain->bref.flags &= 1890 ~HAMMER2_BREF_FLAG_EMERG_MIP; 1891 } 1892 } 1893 } 1894 } 1895 1896 /* 1897 * Stop here if error. We have to undo any flag bits we might 1898 * have set above. 1899 */ 1900 if (error) { 1901 if (setmodified) { 1902 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1903 atomic_add_long(&hammer2_count_modified_chains, -1); 1904 if (chain->pmp) 1905 hammer2_pfs_memory_wakeup(chain->pmp, -1); 1906 } 1907 if (setupdate) { 1908 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1909 } 1910 lockmgr(&chain->diolk, LK_RELEASE); 1911 1912 return error; 1913 } 1914 1915 /* 1916 * Update mirror_tid and modify_tid. modify_tid is only updated 1917 * if not passed as zero (during flushes, parent propagation passes 1918 * the value 0). 1919 * 1920 * NOTE: chain->pmp could be the device spmp. 1921 */ 1922 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1923 if (mtid) 1924 chain->bref.modify_tid = mtid; 1925 1926 /* 1927 * Set BMAPUPD to tell the flush code that an existing blockmap entry 1928 * requires updating as well as to tell the delete code that the 1929 * chain's blockref might not exactly match (in terms of physical size 1930 * or block offset) the one in the parent's blocktable. The base key 1931 * of course will still match. 1932 */ 1933 if (chain->flags & HAMMER2_CHAIN_BMAPPED) 1934 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD); 1935 1936 /* 1937 * Short-cut data block handling when the caller does not need an 1938 * actual data reference to (aka OPTDATA), as long as the chain does 1939 * not already have a data pointer to the data and no de-duplication 1940 * occurred. 1941 * 1942 * This generally means that the modifications are being done via the 1943 * logical buffer cache. 1944 * 1945 * NOTE: If deduplication occurred we have to run through the data 1946 * stuff to clear INITIAL, and the caller will likely want to 1947 * assign the check code anyway. Leaving INITIAL set on a 1948 * dedup can be deadly (it can cause the block to be zero'd!). 1949 * 1950 * This code also handles bytes == 0 (most dirents). 1951 */ 1952 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1953 (flags & HAMMER2_MODIFY_OPTDATA) && 1954 chain->data == NULL) { 1955 if (dedup_off == 0) { 1956 KKASSERT(chain->dio == NULL); 1957 goto skip2; 1958 } 1959 } 1960 1961 /* 1962 * Clearing the INITIAL flag (for indirect blocks) indicates that 1963 * we've processed the uninitialized storage allocation. 1964 * 1965 * If this flag is already clear we are likely in a copy-on-write 1966 * situation but we have to be sure NOT to bzero the storage if 1967 * no data is present. 1968 * 1969 * Clearing of NOTTESTED is allowed if the MODIFIED bit is set, 1970 */ 1971 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1972 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1973 wasinitial = 1; 1974 } else { 1975 wasinitial = 0; 1976 } 1977 1978 /* 1979 * Instantiate data buffer and possibly execute COW operation 1980 */ 1981 switch(chain->bref.type) { 1982 case HAMMER2_BREF_TYPE_VOLUME: 1983 case HAMMER2_BREF_TYPE_FREEMAP: 1984 /* 1985 * The data is embedded, no copy-on-write operation is 1986 * needed. 1987 */ 1988 KKASSERT(chain->dio == NULL); 1989 break; 1990 case HAMMER2_BREF_TYPE_DIRENT: 1991 /* 1992 * The data might be fully embedded. 1993 */ 1994 if (chain->bytes == 0) { 1995 KKASSERT(chain->dio == NULL); 1996 break; 1997 } 1998 /* fall through */ 1999 case HAMMER2_BREF_TYPE_INODE: 2000 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2001 case HAMMER2_BREF_TYPE_DATA: 2002 case HAMMER2_BREF_TYPE_INDIRECT: 2003 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2004 /* 2005 * Perform the copy-on-write operation 2006 * 2007 * zero-fill or copy-on-write depending on whether 2008 * chain->data exists or not and set the dirty state for 2009 * the new buffer. hammer2_io_new() will handle the 2010 * zero-fill. 2011 * 2012 * If a dedup_off was supplied this is an existing block 2013 * and no COW, copy, or further modification is required. 2014 */ 2015 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 2016 2017 if (wasinitial && dedup_off == 0) { 2018 error = hammer2_io_new(hmp, chain->bref.type, 2019 chain->bref.data_off, 2020 chain->bytes, &dio); 2021 } else { 2022 error = hammer2_io_bread(hmp, chain->bref.type, 2023 chain->bref.data_off, 2024 chain->bytes, &dio); 2025 } 2026 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 2027 2028 /* 2029 * If an I/O error occurs make sure callers cannot accidently 2030 * modify the old buffer's contents and corrupt the filesystem. 2031 * 2032 * NOTE: hammer2_io_data() call issues bkvasync() 2033 */ 2034 if (error) { 2035 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 2036 hmp); 2037 chain->error = HAMMER2_ERROR_EIO; 2038 hammer2_io_brelse(&dio); 2039 hammer2_io_brelse(&chain->dio); 2040 chain->data = NULL; 2041 break; 2042 } 2043 chain->error = 0; 2044 bdata = hammer2_io_data(dio, chain->bref.data_off); 2045 2046 if (chain->data) { 2047 /* 2048 * COW (unless a dedup). 2049 */ 2050 KKASSERT(chain->dio != NULL); 2051 if (chain->data != (void *)bdata && dedup_off == 0) { 2052 bcopy(chain->data, bdata, chain->bytes); 2053 } 2054 } else if (wasinitial == 0 && dedup_off == 0) { 2055 /* 2056 * We have a problem. We were asked to COW but 2057 * we don't have any data to COW with! 2058 */ 2059 panic("hammer2_chain_modify: having a COW %p\n", 2060 chain); 2061 } 2062 2063 /* 2064 * Retire the old buffer, replace with the new. Dirty or 2065 * redirty the new buffer. 2066 * 2067 * WARNING! The system buffer cache may have already flushed 2068 * the buffer, so we must be sure to [re]dirty it 2069 * for further modification. 2070 * 2071 * If dedup_off was supplied, the caller is not 2072 * expected to make any further modification to the 2073 * buffer. 2074 * 2075 * WARNING! hammer2_get_gdata() assumes dio never transitions 2076 * through NULL in order to optimize away unnecessary 2077 * diolk operations. 2078 */ 2079 { 2080 hammer2_io_t *tio; 2081 2082 if ((tio = chain->dio) != NULL) 2083 hammer2_io_bqrelse(&tio); 2084 chain->data = (void *)bdata; 2085 chain->dio = dio; 2086 if (dedup_off == 0) 2087 hammer2_io_setdirty(dio); 2088 } 2089 break; 2090 default: 2091 panic("hammer2_chain_modify: illegal non-embedded type %d", 2092 chain->bref.type); 2093 break; 2094 2095 } 2096 skip2: 2097 /* 2098 * setflush on parent indicating that the parent must recurse down 2099 * to us. Do not call on chain itself which might already have it 2100 * set. 2101 */ 2102 if (chain->parent) 2103 hammer2_chain_setflush(chain->parent); 2104 lockmgr(&chain->diolk, LK_RELEASE); 2105 2106 return (chain->error); 2107 } 2108 2109 /* 2110 * Modify the chain associated with an inode. 2111 */ 2112 int 2113 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 2114 hammer2_tid_t mtid, int flags) 2115 { 2116 int error; 2117 2118 hammer2_inode_modify(ip); 2119 error = hammer2_chain_modify(chain, mtid, 0, flags); 2120 2121 return error; 2122 } 2123 2124 /* 2125 * This function returns the chain at the nearest key within the specified 2126 * range. The returned chain will be referenced but not locked. 2127 * 2128 * This function will recurse through chain->rbtree as necessary and will 2129 * return a *key_nextp suitable for iteration. *key_nextp is only set if 2130 * the iteration value is less than the current value of *key_nextp. 2131 * 2132 * The caller should use (*key_nextp) to calculate the actual range of 2133 * the returned element, which will be (key_beg to *key_nextp - 1), because 2134 * there might be another element which is superior to the returned element 2135 * and overlaps it. 2136 * 2137 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 2138 * chains continue to be returned. On EOF (*key_nextp) may overflow since 2139 * it will wind up being (key_end + 1). 2140 * 2141 * WARNING! Must be called with child's spinlock held. Spinlock remains 2142 * held through the operation. 2143 */ 2144 struct hammer2_chain_find_info { 2145 hammer2_chain_t *best; 2146 hammer2_key_t key_beg; 2147 hammer2_key_t key_end; 2148 hammer2_key_t key_next; 2149 }; 2150 2151 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 2152 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 2153 2154 static 2155 hammer2_chain_t * 2156 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 2157 hammer2_key_t key_beg, hammer2_key_t key_end) 2158 { 2159 struct hammer2_chain_find_info info; 2160 2161 info.best = NULL; 2162 info.key_beg = key_beg; 2163 info.key_end = key_end; 2164 info.key_next = *key_nextp; 2165 2166 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 2167 hammer2_chain_find_cmp, hammer2_chain_find_callback, 2168 &info); 2169 *key_nextp = info.key_next; 2170 #if 0 2171 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 2172 parent, key_beg, key_end, *key_nextp); 2173 #endif 2174 2175 return (info.best); 2176 } 2177 2178 static 2179 int 2180 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 2181 { 2182 struct hammer2_chain_find_info *info = data; 2183 hammer2_key_t child_beg; 2184 hammer2_key_t child_end; 2185 2186 child_beg = child->bref.key; 2187 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 2188 2189 if (child_end < info->key_beg) 2190 return(-1); 2191 if (child_beg > info->key_end) 2192 return(1); 2193 return(0); 2194 } 2195 2196 static 2197 int 2198 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 2199 { 2200 struct hammer2_chain_find_info *info = data; 2201 hammer2_chain_t *best; 2202 hammer2_key_t child_end; 2203 2204 /* 2205 * WARNING! Layerq is scanned forwards, exact matches should keep 2206 * the existing info->best. 2207 */ 2208 if ((best = info->best) == NULL) { 2209 /* 2210 * No previous best. Assign best 2211 */ 2212 info->best = child; 2213 } else if (best->bref.key <= info->key_beg && 2214 child->bref.key <= info->key_beg) { 2215 /* 2216 * Illegal overlap. 2217 */ 2218 KKASSERT(0); 2219 /*info->best = child;*/ 2220 } else if (child->bref.key < best->bref.key) { 2221 /* 2222 * Child has a nearer key and best is not flush with key_beg. 2223 * Set best to child. Truncate key_next to the old best key. 2224 */ 2225 info->best = child; 2226 if (info->key_next > best->bref.key || info->key_next == 0) 2227 info->key_next = best->bref.key; 2228 } else if (child->bref.key == best->bref.key) { 2229 /* 2230 * If our current best is flush with the child then this 2231 * is an illegal overlap. 2232 * 2233 * key_next will automatically be limited to the smaller of 2234 * the two end-points. 2235 */ 2236 KKASSERT(0); 2237 info->best = child; 2238 } else { 2239 /* 2240 * Keep the current best but truncate key_next to the child's 2241 * base. 2242 * 2243 * key_next will also automatically be limited to the smaller 2244 * of the two end-points (probably not necessary for this case 2245 * but we do it anyway). 2246 */ 2247 if (info->key_next > child->bref.key || info->key_next == 0) 2248 info->key_next = child->bref.key; 2249 } 2250 2251 /* 2252 * Always truncate key_next based on child's end-of-range. 2253 */ 2254 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 2255 if (child_end && (info->key_next > child_end || info->key_next == 0)) 2256 info->key_next = child_end; 2257 2258 return(0); 2259 } 2260 2261 /* 2262 * Retrieve the specified chain from a media blockref, creating the 2263 * in-memory chain structure which reflects it. The returned chain is 2264 * held and locked according to (how) (HAMMER2_RESOLVE_*). The caller must 2265 * handle crc-checks and so forth, and should check chain->error before 2266 * assuming that the data is good. 2267 * 2268 * To handle insertion races pass the INSERT_RACE flag along with the 2269 * generation number of the core. NULL will be returned if the generation 2270 * number changes before we have a chance to insert the chain. Insert 2271 * races can occur because the parent might be held shared. 2272 * 2273 * Caller must hold the parent locked shared or exclusive since we may 2274 * need the parent's bref array to find our block. 2275 * 2276 * WARNING! chain->pmp is always set to NULL for any chain representing 2277 * part of the super-root topology. 2278 */ 2279 hammer2_chain_t * 2280 hammer2_chain_get(hammer2_chain_t *parent, int generation, 2281 hammer2_blockref_t *bref, int how) 2282 { 2283 hammer2_dev_t *hmp = parent->hmp; 2284 hammer2_chain_t *chain; 2285 int error; 2286 2287 /* 2288 * Allocate a chain structure representing the existing media 2289 * entry. Resulting chain has one ref and is not locked. 2290 */ 2291 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 2292 chain = hammer2_chain_alloc(hmp, NULL, bref); 2293 else 2294 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 2295 /* ref'd chain returned */ 2296 2297 /* 2298 * Flag that the chain is in the parent's blockmap so delete/flush 2299 * knows what to do with it. 2300 */ 2301 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 2302 2303 /* 2304 * chain must be locked to avoid unexpected ripouts 2305 */ 2306 hammer2_chain_lock(chain, how); 2307 2308 /* 2309 * Link the chain into its parent. A spinlock is required to safely 2310 * access the RBTREE, and it is possible to collide with another 2311 * hammer2_chain_get() operation because the caller might only hold 2312 * a shared lock on the parent. 2313 * 2314 * NOTE: Get races can occur quite often when we distribute 2315 * asynchronous read-aheads across multiple threads. 2316 */ 2317 KKASSERT(parent->refs > 0); 2318 error = hammer2_chain_insert(parent, chain, 2319 HAMMER2_CHAIN_INSERT_SPIN | 2320 HAMMER2_CHAIN_INSERT_RACE, 2321 generation); 2322 if (error) { 2323 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 2324 /*kprintf("chain %p get race\n", chain);*/ 2325 hammer2_chain_unlock(chain); 2326 hammer2_chain_drop(chain); 2327 chain = NULL; 2328 } else { 2329 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2330 } 2331 2332 /* 2333 * Return our new chain referenced but not locked, or NULL if 2334 * a race occurred. 2335 */ 2336 return (chain); 2337 } 2338 2339 /* 2340 * Lookup initialization/completion API 2341 */ 2342 hammer2_chain_t * 2343 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 2344 { 2345 hammer2_chain_ref(parent); 2346 if (flags & HAMMER2_LOOKUP_SHARED) { 2347 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 2348 HAMMER2_RESOLVE_SHARED); 2349 } else { 2350 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2351 } 2352 return (parent); 2353 } 2354 2355 void 2356 hammer2_chain_lookup_done(hammer2_chain_t *parent) 2357 { 2358 if (parent) { 2359 hammer2_chain_unlock(parent); 2360 hammer2_chain_drop(parent); 2361 } 2362 } 2363 2364 /* 2365 * Take the locked chain and return a locked parent. The chain remains 2366 * locked on return, but may have to be temporarily unlocked to acquire 2367 * the parent. Because of this, (chain) must be stable and cannot be 2368 * deleted while it was temporarily unlocked (typically means that (chain) 2369 * is an inode). 2370 * 2371 * Pass HAMMER2_RESOLVE_* flags in flags. 2372 * 2373 * This will work even if the chain is errored, and the caller can check 2374 * parent->error on return if desired since the parent will be locked. 2375 * 2376 * This function handles the lock order reversal. 2377 */ 2378 hammer2_chain_t * 2379 hammer2_chain_getparent(hammer2_chain_t *chain, int flags) 2380 { 2381 hammer2_chain_t *parent; 2382 2383 /* 2384 * Be careful of order, chain must be unlocked before parent 2385 * is locked below to avoid a deadlock. Try it trivially first. 2386 */ 2387 parent = chain->parent; 2388 if (parent == NULL) 2389 panic("hammer2_chain_getparent: no parent"); 2390 hammer2_chain_ref(parent); 2391 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) 2392 return parent; 2393 2394 for (;;) { 2395 hammer2_chain_unlock(chain); 2396 hammer2_chain_lock(parent, flags); 2397 hammer2_chain_lock(chain, flags); 2398 2399 /* 2400 * Parent relinking races are quite common. We have to get 2401 * it right or we will blow up the block table. 2402 */ 2403 if (chain->parent == parent) 2404 break; 2405 hammer2_chain_unlock(parent); 2406 hammer2_chain_drop(parent); 2407 cpu_ccfence(); 2408 parent = chain->parent; 2409 if (parent == NULL) 2410 panic("hammer2_chain_getparent: no parent"); 2411 hammer2_chain_ref(parent); 2412 } 2413 return parent; 2414 } 2415 2416 /* 2417 * Take the locked chain and return a locked parent. The chain is unlocked 2418 * and dropped. *chainp is set to the returned parent as a convenience. 2419 * Pass HAMMER2_RESOLVE_* flags in flags. 2420 * 2421 * This will work even if the chain is errored, and the caller can check 2422 * parent->error on return if desired since the parent will be locked. 2423 * 2424 * The chain does NOT need to be stable. We use a tracking structure 2425 * to track the expected parent if the chain is deleted out from under us. 2426 * 2427 * This function handles the lock order reversal. 2428 */ 2429 hammer2_chain_t * 2430 hammer2_chain_repparent(hammer2_chain_t **chainp, int flags) 2431 { 2432 hammer2_chain_t *chain; 2433 hammer2_chain_t *parent; 2434 struct hammer2_reptrack reptrack; 2435 struct hammer2_reptrack **repp; 2436 2437 /* 2438 * Be careful of order, chain must be unlocked before parent 2439 * is locked below to avoid a deadlock. Try it trivially first. 2440 */ 2441 chain = *chainp; 2442 parent = chain->parent; 2443 if (parent == NULL) { 2444 hammer2_spin_unex(&chain->core.spin); 2445 panic("hammer2_chain_repparent: no parent"); 2446 } 2447 hammer2_chain_ref(parent); 2448 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) { 2449 hammer2_chain_unlock(chain); 2450 hammer2_chain_drop(chain); 2451 *chainp = parent; 2452 2453 return parent; 2454 } 2455 2456 /* 2457 * Ok, now it gets a bit nasty. There are multiple situations where 2458 * the parent might be in the middle of a deletion, or where the child 2459 * (chain) might be deleted the instant we let go of its lock. 2460 * We can potentially end up in a no-win situation! 2461 * 2462 * In particular, the indirect_maintenance() case can cause these 2463 * situations. 2464 * 2465 * To deal with this we install a reptrack structure in the parent 2466 * This reptrack structure 'owns' the parent ref and will automatically 2467 * migrate to the parent's parent if the parent is deleted permanently. 2468 */ 2469 hammer2_spin_init(&reptrack.spin, "h2reptrk"); 2470 reptrack.chain = parent; 2471 hammer2_chain_ref(parent); /* for the reptrack */ 2472 2473 hammer2_spin_ex(&parent->core.spin); 2474 reptrack.next = parent->core.reptrack; 2475 parent->core.reptrack = &reptrack; 2476 hammer2_spin_unex(&parent->core.spin); 2477 2478 hammer2_chain_unlock(chain); 2479 hammer2_chain_drop(chain); 2480 chain = NULL; /* gone */ 2481 2482 /* 2483 * At the top of this loop, chain is gone and parent is refd both 2484 * by us explicitly AND via our reptrack. We are attempting to 2485 * lock parent. 2486 */ 2487 for (;;) { 2488 hammer2_chain_lock(parent, flags); 2489 2490 if (reptrack.chain == parent) 2491 break; 2492 hammer2_chain_unlock(parent); 2493 hammer2_chain_drop(parent); 2494 2495 kprintf("hammer2: debug REPTRACK %p->%p\n", 2496 parent, reptrack.chain); 2497 hammer2_spin_ex(&reptrack.spin); 2498 parent = reptrack.chain; 2499 hammer2_chain_ref(parent); 2500 hammer2_spin_unex(&reptrack.spin); 2501 } 2502 2503 /* 2504 * Once parent is locked and matches our reptrack, our reptrack 2505 * will be stable and we have our parent. We can unlink our 2506 * reptrack. 2507 * 2508 * WARNING! Remember that the chain lock might be shared. Chains 2509 * locked shared have stable parent linkages. 2510 */ 2511 hammer2_spin_ex(&parent->core.spin); 2512 repp = &parent->core.reptrack; 2513 while (*repp != &reptrack) 2514 repp = &(*repp)->next; 2515 *repp = reptrack.next; 2516 hammer2_spin_unex(&parent->core.spin); 2517 2518 hammer2_chain_drop(parent); /* reptrack ref */ 2519 *chainp = parent; /* return parent lock+ref */ 2520 2521 return parent; 2522 } 2523 2524 /* 2525 * Dispose of any linked reptrack structures in (chain) by shifting them to 2526 * (parent). Both (chain) and (parent) must be exclusively locked. 2527 * 2528 * This is interlocked against any children of (chain) on the other side. 2529 * No children so remain as-of when this is called so we can test 2530 * core.reptrack without holding the spin-lock. 2531 * 2532 * Used whenever the caller intends to permanently delete chains related 2533 * to topological recursions (BREF_TYPE_INDIRECT, BREF_TYPE_FREEMAP_NODE), 2534 * where the chains underneath the node being deleted are given a new parent 2535 * above the node being deleted. 2536 */ 2537 static 2538 void 2539 hammer2_chain_repchange(hammer2_chain_t *parent, hammer2_chain_t *chain) 2540 { 2541 struct hammer2_reptrack *reptrack; 2542 2543 KKASSERT(chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)); 2544 while (chain->core.reptrack) { 2545 hammer2_spin_ex(&parent->core.spin); 2546 hammer2_spin_ex(&chain->core.spin); 2547 reptrack = chain->core.reptrack; 2548 if (reptrack == NULL) { 2549 hammer2_spin_unex(&chain->core.spin); 2550 hammer2_spin_unex(&parent->core.spin); 2551 break; 2552 } 2553 hammer2_spin_ex(&reptrack->spin); 2554 chain->core.reptrack = reptrack->next; 2555 reptrack->chain = parent; 2556 reptrack->next = parent->core.reptrack; 2557 parent->core.reptrack = reptrack; 2558 hammer2_chain_ref(parent); /* reptrack */ 2559 2560 hammer2_spin_unex(&chain->core.spin); 2561 hammer2_spin_unex(&parent->core.spin); 2562 kprintf("hammer2: debug repchange %p %p->%p\n", 2563 reptrack, chain, parent); 2564 hammer2_chain_drop(chain); /* reptrack */ 2565 } 2566 } 2567 2568 /* 2569 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 2570 * (*parentp) typically points to an inode but can also point to a related 2571 * indirect block and this function will recurse upwards and find the inode 2572 * or the nearest undeleted indirect block covering the key range. 2573 * 2574 * This function unconditionally sets *errorp, replacing any previous value. 2575 * 2576 * (*parentp) must be exclusive or shared locked (depending on flags) and 2577 * referenced and can be an inode or an existing indirect block within the 2578 * inode. 2579 * 2580 * If (*parent) is errored out, this function will not attempt to recurse 2581 * the radix tree and will return NULL along with an appropriate *errorp. 2582 * If NULL is returned and *errorp is 0, the requested lookup could not be 2583 * located. 2584 * 2585 * On return (*parentp) will be modified to point at the deepest parent chain 2586 * element encountered during the search, as a helper for an insertion or 2587 * deletion. 2588 * 2589 * The new (*parentp) will be locked shared or exclusive (depending on flags), 2590 * and referenced, and the old will be unlocked and dereferenced (no change 2591 * if they are both the same). This is particularly important if the caller 2592 * wishes to insert a new chain, (*parentp) will be set properly even if NULL 2593 * is returned, as long as no error occurred. 2594 * 2595 * The matching chain will be returned locked according to flags. 2596 * 2597 * -- 2598 * 2599 * NULL is returned if no match was found, but (*parentp) will still 2600 * potentially be adjusted. 2601 * 2602 * On return (*key_nextp) will point to an iterative value for key_beg. 2603 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 2604 * 2605 * This function will also recurse up the chain if the key is not within the 2606 * current parent's range. (*parentp) can never be set to NULL. An iteration 2607 * can simply allow (*parentp) to float inside the loop. 2608 * 2609 * NOTE! chain->data is not always resolved. By default it will not be 2610 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 2611 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 2612 * BREF_TYPE_DATA as the device buffer can alias the logical file 2613 * buffer). 2614 */ 2615 2616 hammer2_chain_t * 2617 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 2618 hammer2_key_t key_beg, hammer2_key_t key_end, 2619 int *errorp, int flags) 2620 { 2621 hammer2_dev_t *hmp; 2622 hammer2_chain_t *parent; 2623 hammer2_chain_t *chain; 2624 hammer2_blockref_t *base; 2625 hammer2_blockref_t *bref; 2626 hammer2_blockref_t bsave; 2627 hammer2_key_t scan_beg; 2628 hammer2_key_t scan_end; 2629 int count = 0; 2630 int how_always = HAMMER2_RESOLVE_ALWAYS; 2631 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2632 int how; 2633 int generation; 2634 int maxloops = 300000; 2635 volatile hammer2_mtx_t save_mtx; 2636 2637 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2638 how_maybe = how_always; 2639 how = HAMMER2_RESOLVE_ALWAYS; 2640 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2641 how = HAMMER2_RESOLVE_NEVER; 2642 } else { 2643 how = HAMMER2_RESOLVE_MAYBE; 2644 } 2645 if (flags & HAMMER2_LOOKUP_SHARED) { 2646 how_maybe |= HAMMER2_RESOLVE_SHARED; 2647 how_always |= HAMMER2_RESOLVE_SHARED; 2648 how |= HAMMER2_RESOLVE_SHARED; 2649 } 2650 2651 /* 2652 * Recurse (*parentp) upward if necessary until the parent completely 2653 * encloses the key range or we hit the inode. 2654 * 2655 * Handle races against the flusher deleting indirect nodes on its 2656 * way back up by continuing to recurse upward past the deletion. 2657 */ 2658 parent = *parentp; 2659 hmp = parent->hmp; 2660 *errorp = 0; 2661 2662 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2663 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2664 scan_beg = parent->bref.key; 2665 scan_end = scan_beg + 2666 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2667 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) { 2668 if (key_beg >= scan_beg && key_end <= scan_end) 2669 break; 2670 } 2671 parent = hammer2_chain_repparent(parentp, how_maybe); 2672 } 2673 again: 2674 if (--maxloops == 0) 2675 panic("hammer2_chain_lookup: maxloops"); 2676 2677 /* 2678 * MATCHIND case that does not require parent->data (do prior to 2679 * parent->error check). 2680 */ 2681 switch(parent->bref.type) { 2682 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2683 case HAMMER2_BREF_TYPE_INDIRECT: 2684 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2685 scan_beg = parent->bref.key; 2686 scan_end = scan_beg + 2687 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2688 if (key_beg == scan_beg && key_end == scan_end) { 2689 chain = parent; 2690 hammer2_chain_ref(chain); 2691 hammer2_chain_lock(chain, how_maybe); 2692 *key_nextp = scan_end + 1; 2693 goto done; 2694 } 2695 } 2696 break; 2697 default: 2698 break; 2699 } 2700 2701 /* 2702 * No lookup is possible if the parent is errored. We delayed 2703 * this check as long as we could to ensure that the parent backup, 2704 * embedded data, and MATCHIND code could still execute. 2705 */ 2706 if (parent->error) { 2707 *errorp = parent->error; 2708 return NULL; 2709 } 2710 2711 /* 2712 * Locate the blockref array. Currently we do a fully associative 2713 * search through the array. 2714 */ 2715 switch(parent->bref.type) { 2716 case HAMMER2_BREF_TYPE_INODE: 2717 /* 2718 * Special shortcut for embedded data returns the inode 2719 * itself. Callers must detect this condition and access 2720 * the embedded data (the strategy code does this for us). 2721 * 2722 * This is only applicable to regular files and softlinks. 2723 * 2724 * We need a second lock on parent. Since we already have 2725 * a lock we must pass LOCKAGAIN to prevent unexpected 2726 * blocking (we don't want to block on a second shared 2727 * ref if an exclusive lock is pending) 2728 */ 2729 if (parent->data->ipdata.meta.op_flags & 2730 HAMMER2_OPFLAG_DIRECTDATA) { 2731 if (flags & HAMMER2_LOOKUP_NODIRECT) { 2732 chain = NULL; 2733 *key_nextp = key_end + 1; 2734 goto done; 2735 } 2736 hammer2_chain_ref(parent); 2737 hammer2_chain_lock(parent, how_always | 2738 HAMMER2_RESOLVE_LOCKAGAIN); 2739 *key_nextp = key_end + 1; 2740 return (parent); 2741 } 2742 base = &parent->data->ipdata.u.blockset.blockref[0]; 2743 count = HAMMER2_SET_COUNT; 2744 break; 2745 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2746 case HAMMER2_BREF_TYPE_INDIRECT: 2747 /* 2748 * Optimize indirect blocks in the INITIAL state to avoid 2749 * I/O. 2750 * 2751 * Debugging: Enter permanent wait state instead of 2752 * panicing on unexpectedly NULL data for the moment. 2753 */ 2754 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2755 base = NULL; 2756 } else { 2757 if (parent->data == NULL) { 2758 kprintf("hammer2: unexpected NULL data " 2759 "on %p\n", parent); 2760 while (1) 2761 tsleep(parent, 0, "xxx", 0); 2762 } 2763 base = &parent->data->npdata[0]; 2764 } 2765 count = parent->bytes / sizeof(hammer2_blockref_t); 2766 break; 2767 case HAMMER2_BREF_TYPE_VOLUME: 2768 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2769 count = HAMMER2_SET_COUNT; 2770 break; 2771 case HAMMER2_BREF_TYPE_FREEMAP: 2772 base = &parent->data->blkset.blockref[0]; 2773 count = HAMMER2_SET_COUNT; 2774 break; 2775 default: 2776 panic("hammer2_chain_lookup: unrecognized " 2777 "blockref(B) type: %d", 2778 parent->bref.type); 2779 base = NULL; /* safety */ 2780 count = 0; /* safety */ 2781 break; 2782 } 2783 2784 /* 2785 * Merged scan to find next candidate. 2786 * 2787 * hammer2_base_*() functions require the parent->core.live_* fields 2788 * to be synchronized. 2789 * 2790 * We need to hold the spinlock to access the block array and RB tree 2791 * and to interlock chain creation. 2792 */ 2793 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2794 hammer2_chain_countbrefs(parent, base, count); 2795 2796 /* 2797 * Combined search 2798 */ 2799 hammer2_spin_ex(&parent->core.spin); 2800 chain = hammer2_combined_find(parent, base, count, 2801 key_nextp, 2802 key_beg, key_end, 2803 &bref); 2804 generation = parent->core.generation; 2805 2806 /* 2807 * Exhausted parent chain, iterate. 2808 */ 2809 if (bref == NULL) { 2810 KKASSERT(chain == NULL); 2811 hammer2_spin_unex(&parent->core.spin); 2812 if (key_beg == key_end) /* short cut single-key case */ 2813 return (NULL); 2814 2815 /* 2816 * Stop if we reached the end of the iteration. 2817 */ 2818 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2819 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2820 return (NULL); 2821 } 2822 2823 /* 2824 * Calculate next key, stop if we reached the end of the 2825 * iteration, otherwise go up one level and loop. 2826 */ 2827 key_beg = parent->bref.key + 2828 ((hammer2_key_t)1 << parent->bref.keybits); 2829 if (key_beg == 0 || key_beg > key_end) 2830 return (NULL); 2831 parent = hammer2_chain_repparent(parentp, how_maybe); 2832 goto again; 2833 } 2834 2835 /* 2836 * Selected from blockref or in-memory chain. 2837 */ 2838 bsave = *bref; 2839 if (chain == NULL) { 2840 hammer2_spin_unex(&parent->core.spin); 2841 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2842 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2843 chain = hammer2_chain_get(parent, generation, 2844 &bsave, how_maybe); 2845 } else { 2846 chain = hammer2_chain_get(parent, generation, 2847 &bsave, how); 2848 } 2849 if (chain == NULL) 2850 goto again; 2851 } else { 2852 hammer2_chain_ref(chain); 2853 hammer2_spin_unex(&parent->core.spin); 2854 2855 /* 2856 * chain is referenced but not locked. We must lock the 2857 * chain to obtain definitive state. 2858 */ 2859 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2860 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2861 hammer2_chain_lock(chain, how_maybe); 2862 } else { 2863 hammer2_chain_lock(chain, how); 2864 } 2865 KKASSERT(chain->parent == parent); 2866 } 2867 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 2868 chain->parent != parent) { 2869 hammer2_chain_unlock(chain); 2870 hammer2_chain_drop(chain); 2871 chain = NULL; /* SAFETY */ 2872 goto again; 2873 } 2874 2875 2876 /* 2877 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2878 * 2879 * NOTE: Chain's key range is not relevant as there might be 2880 * one-offs within the range that are not deleted. 2881 * 2882 * NOTE: Lookups can race delete-duplicate because 2883 * delete-duplicate does not lock the parent's core 2884 * (they just use the spinlock on the core). 2885 */ 2886 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2887 kprintf("skip deleted chain %016jx.%02x key=%016jx\n", 2888 chain->bref.data_off, chain->bref.type, 2889 chain->bref.key); 2890 hammer2_chain_unlock(chain); 2891 hammer2_chain_drop(chain); 2892 chain = NULL; /* SAFETY */ 2893 key_beg = *key_nextp; 2894 if (key_beg == 0 || key_beg > key_end) 2895 return(NULL); 2896 goto again; 2897 } 2898 2899 /* 2900 * If the chain element is an indirect block it becomes the new 2901 * parent and we loop on it. We must maintain our top-down locks 2902 * to prevent the flusher from interfering (i.e. doing a 2903 * delete-duplicate and leaving us recursing down a deleted chain). 2904 * 2905 * The parent always has to be locked with at least RESOLVE_MAYBE 2906 * so we can access its data. It might need a fixup if the caller 2907 * passed incompatible flags. Be careful not to cause a deadlock 2908 * as a data-load requires an exclusive lock. 2909 * 2910 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2911 * range is within the requested key range we return the indirect 2912 * block and do NOT loop. This is usually only used to acquire 2913 * freemap nodes. 2914 */ 2915 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2916 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2917 save_mtx = parent->lock; 2918 hammer2_chain_unlock(parent); 2919 hammer2_chain_drop(parent); 2920 *parentp = parent = chain; 2921 chain = NULL; /* SAFETY */ 2922 goto again; 2923 } 2924 done: 2925 /* 2926 * All done, return the locked chain. 2927 * 2928 * If the caller does not want a locked chain, replace the lock with 2929 * a ref. Perhaps this can eventually be optimized to not obtain the 2930 * lock in the first place for situations where the data does not 2931 * need to be resolved. 2932 * 2933 * NOTE! A chain->error must be tested by the caller upon return. 2934 * *errorp is only set based on issues which occur while 2935 * trying to reach the chain. 2936 */ 2937 return (chain); 2938 } 2939 2940 /* 2941 * After having issued a lookup we can iterate all matching keys. 2942 * 2943 * If chain is non-NULL we continue the iteration from just after it's index. 2944 * 2945 * If chain is NULL we assume the parent was exhausted and continue the 2946 * iteration at the next parent. 2947 * 2948 * If a fatal error occurs (typically an I/O error), a dummy chain is 2949 * returned with chain->error and error-identifying information set. This 2950 * chain will assert if you try to do anything fancy with it. 2951 * 2952 * XXX Depending on where the error occurs we should allow continued iteration. 2953 * 2954 * parent must be locked on entry and remains locked throughout. chain's 2955 * lock status must match flags. Chain is always at least referenced. 2956 * 2957 * WARNING! The MATCHIND flag does not apply to this function. 2958 */ 2959 hammer2_chain_t * 2960 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2961 hammer2_key_t *key_nextp, 2962 hammer2_key_t key_beg, hammer2_key_t key_end, 2963 int *errorp, int flags) 2964 { 2965 hammer2_chain_t *parent; 2966 int how_maybe; 2967 2968 /* 2969 * Calculate locking flags for upward recursion. 2970 */ 2971 how_maybe = HAMMER2_RESOLVE_MAYBE; 2972 if (flags & HAMMER2_LOOKUP_SHARED) 2973 how_maybe |= HAMMER2_RESOLVE_SHARED; 2974 2975 parent = *parentp; 2976 *errorp = 0; 2977 2978 /* 2979 * Calculate the next index and recalculate the parent if necessary. 2980 */ 2981 if (chain) { 2982 key_beg = chain->bref.key + 2983 ((hammer2_key_t)1 << chain->bref.keybits); 2984 hammer2_chain_unlock(chain); 2985 hammer2_chain_drop(chain); 2986 2987 /* 2988 * chain invalid past this point, but we can still do a 2989 * pointer comparison w/parent. 2990 * 2991 * Any scan where the lookup returned degenerate data embedded 2992 * in the inode has an invalid index and must terminate. 2993 */ 2994 if (chain == parent) 2995 return(NULL); 2996 if (key_beg == 0 || key_beg > key_end) 2997 return(NULL); 2998 chain = NULL; 2999 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 3000 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 3001 /* 3002 * We reached the end of the iteration. 3003 */ 3004 return (NULL); 3005 } else { 3006 /* 3007 * Continue iteration with next parent unless the current 3008 * parent covers the range. 3009 * 3010 * (This also handles the case of a deleted, empty indirect 3011 * node). 3012 */ 3013 key_beg = parent->bref.key + 3014 ((hammer2_key_t)1 << parent->bref.keybits); 3015 if (key_beg == 0 || key_beg > key_end) 3016 return (NULL); 3017 parent = hammer2_chain_repparent(parentp, how_maybe); 3018 } 3019 3020 /* 3021 * And execute 3022 */ 3023 return (hammer2_chain_lookup(parentp, key_nextp, 3024 key_beg, key_end, 3025 errorp, flags)); 3026 } 3027 3028 /* 3029 * Caller wishes to iterate chains under parent, loading new chains into 3030 * chainp. Caller must initialize *chainp to NULL and *firstp to 1, and 3031 * then call hammer2_chain_scan() repeatedly until a non-zero return. 3032 * During the scan, *firstp will be set to 0 and (*chainp) will be replaced 3033 * with the returned chain for the scan. The returned *chainp will be 3034 * locked and referenced. Any prior contents will be unlocked and dropped. 3035 * 3036 * Caller should check the return value. A normal scan EOF will return 3037 * exactly HAMMER2_ERROR_EOF. Any other non-zero value indicates an 3038 * error trying to access parent data. Any error in the returned chain 3039 * must be tested separately by the caller. 3040 * 3041 * (*chainp) is dropped on each scan, but will only be set if the returned 3042 * element itself can recurse. Leaf elements are NOT resolved, loaded, or 3043 * returned via *chainp. The caller will get their bref only. 3044 * 3045 * The raw scan function is similar to lookup/next but does not seek to a key. 3046 * Blockrefs are iterated via first_bref = (parent, NULL) and 3047 * next_chain = (parent, bref). 3048 * 3049 * The passed-in parent must be locked and its data resolved. The function 3050 * nominally returns a locked and referenced *chainp != NULL for chains 3051 * the caller might need to recurse on (and will dipose of any *chainp passed 3052 * in). The caller must check the chain->bref.type either way. 3053 */ 3054 int 3055 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 3056 hammer2_blockref_t *bref, int *firstp, 3057 int flags) 3058 { 3059 hammer2_dev_t *hmp; 3060 hammer2_blockref_t *base; 3061 hammer2_blockref_t *bref_ptr; 3062 hammer2_key_t key; 3063 hammer2_key_t next_key; 3064 hammer2_chain_t *chain = NULL; 3065 int count = 0; 3066 int how_always = HAMMER2_RESOLVE_ALWAYS; 3067 int how_maybe = HAMMER2_RESOLVE_MAYBE; 3068 int how; 3069 int generation; 3070 int maxloops = 300000; 3071 int error; 3072 3073 hmp = parent->hmp; 3074 error = 0; 3075 3076 /* 3077 * Scan flags borrowed from lookup. 3078 */ 3079 if (flags & HAMMER2_LOOKUP_ALWAYS) { 3080 how_maybe = how_always; 3081 how = HAMMER2_RESOLVE_ALWAYS; 3082 } else if (flags & HAMMER2_LOOKUP_NODATA) { 3083 how = HAMMER2_RESOLVE_NEVER; 3084 } else { 3085 how = HAMMER2_RESOLVE_MAYBE; 3086 } 3087 if (flags & HAMMER2_LOOKUP_SHARED) { 3088 how_maybe |= HAMMER2_RESOLVE_SHARED; 3089 how_always |= HAMMER2_RESOLVE_SHARED; 3090 how |= HAMMER2_RESOLVE_SHARED; 3091 } 3092 3093 /* 3094 * Calculate key to locate first/next element, unlocking the previous 3095 * element as we go. Be careful, the key calculation can overflow. 3096 * 3097 * (also reset bref to NULL) 3098 */ 3099 if (*firstp) { 3100 key = 0; 3101 *firstp = 0; 3102 } else { 3103 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 3104 if ((chain = *chainp) != NULL) { 3105 *chainp = NULL; 3106 hammer2_chain_unlock(chain); 3107 hammer2_chain_drop(chain); 3108 chain = NULL; 3109 } 3110 if (key == 0) { 3111 error |= HAMMER2_ERROR_EOF; 3112 goto done; 3113 } 3114 } 3115 3116 again: 3117 if (parent->error) { 3118 error = parent->error; 3119 goto done; 3120 } 3121 if (--maxloops == 0) 3122 panic("hammer2_chain_scan: maxloops"); 3123 3124 /* 3125 * Locate the blockref array. Currently we do a fully associative 3126 * search through the array. 3127 */ 3128 switch(parent->bref.type) { 3129 case HAMMER2_BREF_TYPE_INODE: 3130 /* 3131 * An inode with embedded data has no sub-chains. 3132 * 3133 * WARNING! Bulk scan code may pass a static chain marked 3134 * as BREF_TYPE_INODE with a copy of the volume 3135 * root blockset to snapshot the volume. 3136 */ 3137 if (parent->data->ipdata.meta.op_flags & 3138 HAMMER2_OPFLAG_DIRECTDATA) { 3139 error |= HAMMER2_ERROR_EOF; 3140 goto done; 3141 } 3142 base = &parent->data->ipdata.u.blockset.blockref[0]; 3143 count = HAMMER2_SET_COUNT; 3144 break; 3145 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3146 case HAMMER2_BREF_TYPE_INDIRECT: 3147 /* 3148 * Optimize indirect blocks in the INITIAL state to avoid 3149 * I/O. 3150 */ 3151 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 3152 base = NULL; 3153 } else { 3154 if (parent->data == NULL) 3155 panic("parent->data is NULL"); 3156 base = &parent->data->npdata[0]; 3157 } 3158 count = parent->bytes / sizeof(hammer2_blockref_t); 3159 break; 3160 case HAMMER2_BREF_TYPE_VOLUME: 3161 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3162 count = HAMMER2_SET_COUNT; 3163 break; 3164 case HAMMER2_BREF_TYPE_FREEMAP: 3165 base = &parent->data->blkset.blockref[0]; 3166 count = HAMMER2_SET_COUNT; 3167 break; 3168 default: 3169 panic("hammer2_chain_scan: unrecognized blockref type: %d", 3170 parent->bref.type); 3171 base = NULL; /* safety */ 3172 count = 0; /* safety */ 3173 break; 3174 } 3175 3176 /* 3177 * Merged scan to find next candidate. 3178 * 3179 * hammer2_base_*() functions require the parent->core.live_* fields 3180 * to be synchronized. 3181 * 3182 * We need to hold the spinlock to access the block array and RB tree 3183 * and to interlock chain creation. 3184 */ 3185 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3186 hammer2_chain_countbrefs(parent, base, count); 3187 3188 next_key = 0; 3189 bref_ptr = NULL; 3190 hammer2_spin_ex(&parent->core.spin); 3191 chain = hammer2_combined_find(parent, base, count, 3192 &next_key, 3193 key, HAMMER2_KEY_MAX, 3194 &bref_ptr); 3195 generation = parent->core.generation; 3196 3197 /* 3198 * Exhausted parent chain, we're done. 3199 */ 3200 if (bref_ptr == NULL) { 3201 hammer2_spin_unex(&parent->core.spin); 3202 KKASSERT(chain == NULL); 3203 error |= HAMMER2_ERROR_EOF; 3204 goto done; 3205 } 3206 3207 /* 3208 * Copy into the supplied stack-based blockref. 3209 */ 3210 *bref = *bref_ptr; 3211 3212 /* 3213 * Selected from blockref or in-memory chain. 3214 */ 3215 if (chain == NULL) { 3216 switch(bref->type) { 3217 case HAMMER2_BREF_TYPE_INODE: 3218 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3219 case HAMMER2_BREF_TYPE_INDIRECT: 3220 case HAMMER2_BREF_TYPE_VOLUME: 3221 case HAMMER2_BREF_TYPE_FREEMAP: 3222 /* 3223 * Recursion, always get the chain 3224 */ 3225 hammer2_spin_unex(&parent->core.spin); 3226 chain = hammer2_chain_get(parent, generation, 3227 bref, how); 3228 if (chain == NULL) 3229 goto again; 3230 break; 3231 default: 3232 /* 3233 * No recursion, do not waste time instantiating 3234 * a chain, just iterate using the bref. 3235 */ 3236 hammer2_spin_unex(&parent->core.spin); 3237 break; 3238 } 3239 } else { 3240 /* 3241 * Recursion or not we need the chain in order to supply 3242 * the bref. 3243 */ 3244 hammer2_chain_ref(chain); 3245 hammer2_spin_unex(&parent->core.spin); 3246 hammer2_chain_lock(chain, how); 3247 } 3248 if (chain && 3249 (bcmp(bref, &chain->bref, sizeof(*bref)) || 3250 chain->parent != parent)) { 3251 hammer2_chain_unlock(chain); 3252 hammer2_chain_drop(chain); 3253 chain = NULL; 3254 goto again; 3255 } 3256 3257 /* 3258 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 3259 * 3260 * NOTE: chain's key range is not relevant as there might be 3261 * one-offs within the range that are not deleted. 3262 * 3263 * NOTE: XXX this could create problems with scans used in 3264 * situations other than mount-time recovery. 3265 * 3266 * NOTE: Lookups can race delete-duplicate because 3267 * delete-duplicate does not lock the parent's core 3268 * (they just use the spinlock on the core). 3269 */ 3270 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3271 hammer2_chain_unlock(chain); 3272 hammer2_chain_drop(chain); 3273 chain = NULL; 3274 3275 key = next_key; 3276 if (key == 0) { 3277 error |= HAMMER2_ERROR_EOF; 3278 goto done; 3279 } 3280 goto again; 3281 } 3282 3283 done: 3284 /* 3285 * All done, return the bref or NULL, supply chain if necessary. 3286 */ 3287 if (chain) 3288 *chainp = chain; 3289 return (error); 3290 } 3291 3292 /* 3293 * Create and return a new hammer2 system memory structure of the specified 3294 * key, type and size and insert it under (*parentp). This is a full 3295 * insertion, based on the supplied key/keybits, and may involve creating 3296 * indirect blocks and moving other chains around via delete/duplicate. 3297 * 3298 * This call can be made with parent == NULL as long as a non -1 methods 3299 * is supplied. hmp must also be supplied in this situation (otherwise 3300 * hmp is extracted from the supplied parent). The chain will be detached 3301 * from the topology. A later call with both parent and chain can be made 3302 * to attach it. 3303 * 3304 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 3305 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3306 * FULL. This typically means that the caller is creating the chain after 3307 * doing a hammer2_chain_lookup(). 3308 * 3309 * (*parentp) must be exclusive locked and may be replaced on return 3310 * depending on how much work the function had to do. 3311 * 3312 * (*parentp) must not be errored or this function will assert. 3313 * 3314 * (*chainp) usually starts out NULL and returns the newly created chain, 3315 * but if the caller desires the caller may allocate a disconnected chain 3316 * and pass it in instead. 3317 * 3318 * This function should NOT be used to insert INDIRECT blocks. It is 3319 * typically used to create/insert inodes and data blocks. 3320 * 3321 * Caller must pass-in an exclusively locked parent the new chain is to 3322 * be inserted under, and optionally pass-in a disconnected, exclusively 3323 * locked chain to insert (else we create a new chain). The function will 3324 * adjust (*parentp) as necessary, create or connect the chain, and 3325 * return an exclusively locked chain in *chainp. 3326 * 3327 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 3328 * and will be reassigned. 3329 * 3330 * NOTE: returns HAMMER_ERROR_* flags 3331 */ 3332 int 3333 hammer2_chain_create(hammer2_chain_t **parentp, hammer2_chain_t **chainp, 3334 hammer2_dev_t *hmp, hammer2_pfs_t *pmp, int methods, 3335 hammer2_key_t key, int keybits, int type, size_t bytes, 3336 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 3337 { 3338 hammer2_chain_t *chain; 3339 hammer2_chain_t *parent; 3340 hammer2_blockref_t *base; 3341 hammer2_blockref_t dummy; 3342 int allocated = 0; 3343 int error = 0; 3344 int count; 3345 int maxloops = 300000; 3346 3347 /* 3348 * Topology may be crossing a PFS boundary. 3349 */ 3350 parent = *parentp; 3351 if (parent) { 3352 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3353 KKASSERT(parent->error == 0); 3354 hmp = parent->hmp; 3355 } 3356 chain = *chainp; 3357 3358 if (chain == NULL) { 3359 /* 3360 * First allocate media space and construct the dummy bref, 3361 * then allocate the in-memory chain structure. Set the 3362 * INITIAL flag for fresh chains which do not have embedded 3363 * data. 3364 */ 3365 bzero(&dummy, sizeof(dummy)); 3366 dummy.type = type; 3367 dummy.key = key; 3368 dummy.keybits = keybits; 3369 dummy.data_off = hammer2_getradix(bytes); 3370 3371 /* 3372 * Inherit methods from parent by default. Primarily used 3373 * for BREF_TYPE_DATA. Non-data types *must* be set to 3374 * a non-NONE check algorithm. 3375 */ 3376 if (methods == -1) 3377 dummy.methods = parent->bref.methods; 3378 else 3379 dummy.methods = (uint8_t)methods; 3380 3381 if (type != HAMMER2_BREF_TYPE_DATA && 3382 HAMMER2_DEC_CHECK(dummy.methods) == HAMMER2_CHECK_NONE) { 3383 dummy.methods |= 3384 HAMMER2_ENC_CHECK(HAMMER2_CHECK_DEFAULT); 3385 } 3386 3387 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 3388 3389 /* 3390 * Lock the chain manually, chain_lock will load the chain 3391 * which we do NOT want to do. (note: chain->refs is set 3392 * to 1 by chain_alloc() for us, but lockcnt is not). 3393 */ 3394 chain->lockcnt = 1; 3395 hammer2_mtx_ex(&chain->lock); 3396 allocated = 1; 3397 3398 /* 3399 * Set INITIAL to optimize I/O. The flag will generally be 3400 * processed when we call hammer2_chain_modify(). 3401 */ 3402 switch(type) { 3403 case HAMMER2_BREF_TYPE_VOLUME: 3404 case HAMMER2_BREF_TYPE_FREEMAP: 3405 panic("hammer2_chain_create: called with volume type"); 3406 break; 3407 case HAMMER2_BREF_TYPE_INDIRECT: 3408 panic("hammer2_chain_create: cannot be used to" 3409 "create indirect block"); 3410 break; 3411 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3412 panic("hammer2_chain_create: cannot be used to" 3413 "create freemap root or node"); 3414 break; 3415 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3416 KKASSERT(bytes == sizeof(chain->data->bmdata)); 3417 /* fall through */ 3418 case HAMMER2_BREF_TYPE_DIRENT: 3419 case HAMMER2_BREF_TYPE_INODE: 3420 case HAMMER2_BREF_TYPE_DATA: 3421 default: 3422 /* 3423 * leave chain->data NULL, set INITIAL 3424 */ 3425 KKASSERT(chain->data == NULL); 3426 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 3427 break; 3428 } 3429 } else { 3430 /* 3431 * We are reattaching a previously deleted chain, possibly 3432 * under a new parent and possibly with a new key/keybits. 3433 * The chain does not have to be in a modified state. The 3434 * UPDATE flag will be set later on in this routine. 3435 * 3436 * Do NOT mess with the current state of the INITIAL flag. 3437 */ 3438 chain->bref.key = key; 3439 chain->bref.keybits = keybits; 3440 if (chain->flags & HAMMER2_CHAIN_DELETED) 3441 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3442 KKASSERT(chain->parent == NULL); 3443 } 3444 3445 /* 3446 * Set the appropriate bref flag if requested. 3447 * 3448 * NOTE! Callers can call this function to move chains without 3449 * knowing about special flags, so don't clear bref flags 3450 * here! 3451 */ 3452 if (flags & HAMMER2_INSERT_PFSROOT) 3453 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 3454 3455 if (parent == NULL) 3456 goto skip; 3457 3458 /* 3459 * Calculate how many entries we have in the blockref array and 3460 * determine if an indirect block is required when inserting into 3461 * the parent. 3462 */ 3463 again: 3464 if (--maxloops == 0) 3465 panic("hammer2_chain_create: maxloops"); 3466 3467 switch(parent->bref.type) { 3468 case HAMMER2_BREF_TYPE_INODE: 3469 if ((parent->data->ipdata.meta.op_flags & 3470 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 3471 kprintf("hammer2: parent set for direct-data! " 3472 "pkey=%016jx ckey=%016jx\n", 3473 parent->bref.key, 3474 chain->bref.key); 3475 } 3476 KKASSERT((parent->data->ipdata.meta.op_flags & 3477 HAMMER2_OPFLAG_DIRECTDATA) == 0); 3478 KKASSERT(parent->data != NULL); 3479 base = &parent->data->ipdata.u.blockset.blockref[0]; 3480 count = HAMMER2_SET_COUNT; 3481 break; 3482 case HAMMER2_BREF_TYPE_INDIRECT: 3483 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3484 if (parent->flags & HAMMER2_CHAIN_INITIAL) 3485 base = NULL; 3486 else 3487 base = &parent->data->npdata[0]; 3488 count = parent->bytes / sizeof(hammer2_blockref_t); 3489 break; 3490 case HAMMER2_BREF_TYPE_VOLUME: 3491 KKASSERT(parent->data != NULL); 3492 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3493 count = HAMMER2_SET_COUNT; 3494 break; 3495 case HAMMER2_BREF_TYPE_FREEMAP: 3496 KKASSERT(parent->data != NULL); 3497 base = &parent->data->blkset.blockref[0]; 3498 count = HAMMER2_SET_COUNT; 3499 break; 3500 default: 3501 panic("hammer2_chain_create: unrecognized blockref type: %d", 3502 parent->bref.type); 3503 base = NULL; 3504 count = 0; 3505 break; 3506 } 3507 3508 /* 3509 * Make sure we've counted the brefs 3510 */ 3511 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3512 hammer2_chain_countbrefs(parent, base, count); 3513 3514 KASSERT(parent->core.live_count >= 0 && 3515 parent->core.live_count <= count, 3516 ("bad live_count %d/%d (%02x, %d)", 3517 parent->core.live_count, count, 3518 parent->bref.type, parent->bytes)); 3519 3520 /* 3521 * If no free blockref could be found we must create an indirect 3522 * block and move a number of blockrefs into it. With the parent 3523 * locked we can safely lock each child in order to delete+duplicate 3524 * it without causing a deadlock. 3525 * 3526 * This may return the new indirect block or the old parent depending 3527 * on where the key falls. NULL is returned on error. 3528 */ 3529 if (parent->core.live_count == count) { 3530 hammer2_chain_t *nparent; 3531 3532 KKASSERT((flags & HAMMER2_INSERT_SAMEPARENT) == 0); 3533 3534 nparent = hammer2_chain_create_indirect(parent, key, keybits, 3535 mtid, type, &error); 3536 if (nparent == NULL) { 3537 if (allocated) 3538 hammer2_chain_drop(chain); 3539 chain = NULL; 3540 goto done; 3541 } 3542 if (parent != nparent) { 3543 hammer2_chain_unlock(parent); 3544 hammer2_chain_drop(parent); 3545 parent = *parentp = nparent; 3546 } 3547 goto again; 3548 } 3549 3550 /* 3551 * fall through if parent, or skip to here if no parent. 3552 */ 3553 skip: 3554 if (chain->flags & HAMMER2_CHAIN_DELETED) 3555 kprintf("Inserting deleted chain @%016jx\n", 3556 chain->bref.key); 3557 3558 /* 3559 * Link the chain into its parent. 3560 */ 3561 if (chain->parent != NULL) 3562 panic("hammer2: hammer2_chain_create: chain already connected"); 3563 KKASSERT(chain->parent == NULL); 3564 if (parent) { 3565 KKASSERT(parent->core.live_count < count); 3566 hammer2_chain_insert(parent, chain, 3567 HAMMER2_CHAIN_INSERT_SPIN | 3568 HAMMER2_CHAIN_INSERT_LIVE, 3569 0); 3570 } 3571 3572 if (allocated) { 3573 /* 3574 * Mark the newly created chain modified. This will cause 3575 * UPDATE to be set and process the INITIAL flag. 3576 * 3577 * Device buffers are not instantiated for DATA elements 3578 * as these are handled by logical buffers. 3579 * 3580 * Indirect and freemap node indirect blocks are handled 3581 * by hammer2_chain_create_indirect() and not by this 3582 * function. 3583 * 3584 * Data for all other bref types is expected to be 3585 * instantiated (INODE, LEAF). 3586 */ 3587 switch(chain->bref.type) { 3588 case HAMMER2_BREF_TYPE_DATA: 3589 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3590 case HAMMER2_BREF_TYPE_DIRENT: 3591 case HAMMER2_BREF_TYPE_INODE: 3592 error = hammer2_chain_modify(chain, mtid, dedup_off, 3593 HAMMER2_MODIFY_OPTDATA); 3594 break; 3595 default: 3596 /* 3597 * Remaining types are not supported by this function. 3598 * In particular, INDIRECT and LEAF_NODE types are 3599 * handled by create_indirect(). 3600 */ 3601 panic("hammer2_chain_create: bad type: %d", 3602 chain->bref.type); 3603 /* NOT REACHED */ 3604 break; 3605 } 3606 } else { 3607 /* 3608 * When reconnecting a chain we must set UPDATE and 3609 * setflush so the flush recognizes that it must update 3610 * the bref in the parent. 3611 */ 3612 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 3613 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3614 } 3615 3616 /* 3617 * We must setflush(parent) to ensure that it recurses through to 3618 * chain. setflush(chain) might not work because ONFLUSH is possibly 3619 * already set in the chain (so it won't recurse up to set it in the 3620 * parent). 3621 */ 3622 if (parent) 3623 hammer2_chain_setflush(parent); 3624 3625 done: 3626 *chainp = chain; 3627 3628 return (error); 3629 } 3630 3631 /* 3632 * Move the chain from its old parent to a new parent. The chain must have 3633 * already been deleted or already disconnected (or never associated) with 3634 * a parent. The chain is reassociated with the new parent and the deleted 3635 * flag will be cleared (no longer deleted). The chain's modification state 3636 * is not altered. 3637 * 3638 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 3639 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3640 * FULL. This typically means that the caller is creating the chain after 3641 * doing a hammer2_chain_lookup(). 3642 * 3643 * Neither (parent) or (chain) can be errored. 3644 * 3645 * If (parent) is non-NULL then the chain is inserted under the parent. 3646 * 3647 * If (parent) is NULL then the newly duplicated chain is not inserted 3648 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 3649 * passing into hammer2_chain_create() after this function returns). 3650 * 3651 * WARNING! This function calls create which means it can insert indirect 3652 * blocks. This can cause other unrelated chains in the parent to 3653 * be moved to a newly inserted indirect block in addition to the 3654 * specific chain. 3655 */ 3656 void 3657 hammer2_chain_rename(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3658 hammer2_tid_t mtid, int flags) 3659 { 3660 hammer2_blockref_t *bref; 3661 hammer2_dev_t *hmp; 3662 hammer2_chain_t *parent; 3663 3664 /* 3665 * WARNING! We should never resolve DATA to device buffers 3666 * (XXX allow it if the caller did?), and since 3667 * we currently do not have the logical buffer cache 3668 * buffer in-hand to fix its cached physical offset 3669 * we also force the modify code to not COW it. XXX 3670 * 3671 * NOTE! We allow error'd chains to be renamed. The bref itself 3672 * is good and can be renamed. The content, however, may 3673 * be inaccessible. 3674 */ 3675 hmp = chain->hmp; 3676 KKASSERT(chain->parent == NULL); 3677 /*KKASSERT(chain->error == 0); allow */ 3678 bref = &chain->bref; 3679 3680 /* 3681 * If parent is not NULL the duplicated chain will be entered under 3682 * the parent and the UPDATE bit set to tell flush to update 3683 * the blockref. 3684 * 3685 * We must setflush(parent) to ensure that it recurses through to 3686 * chain. setflush(chain) might not work because ONFLUSH is possibly 3687 * already set in the chain (so it won't recurse up to set it in the 3688 * parent). 3689 * 3690 * Having both chains locked is extremely important for atomicy. 3691 */ 3692 if (parentp && (parent = *parentp) != NULL) { 3693 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3694 KKASSERT(parent->refs > 0); 3695 KKASSERT(parent->error == 0); 3696 3697 hammer2_chain_create(parentp, &chain, NULL, chain->pmp, 3698 HAMMER2_METH_DEFAULT, 3699 bref->key, bref->keybits, bref->type, 3700 chain->bytes, mtid, 0, flags); 3701 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 3702 hammer2_chain_setflush(*parentp); 3703 } 3704 } 3705 3706 /* 3707 * This works in tandem with delete_obref() to install a blockref in 3708 * (typically) an indirect block that is associated with the chain being 3709 * moved to *parentp. 3710 * 3711 * The reason we need this function is that the caller needs to maintain 3712 * the blockref as it was, and not generate a new blockref for what might 3713 * be a modified chain. Otherwise stuff will leak into the flush that 3714 * the flush code's FLUSH_INODE_STOP flag is unable to catch. 3715 * 3716 * It is EXTREMELY important that we properly set CHAIN_BMAPUPD and 3717 * CHAIN_UPDATE. We must set BMAPUPD if the bref does not match, and 3718 * we must clear CHAIN_UPDATE (that was likely set by the chain_rename) if 3719 * it does. Otherwise we can end up in a situation where H2 is unable to 3720 * clean up the in-memory chain topology. 3721 * 3722 * The reason for this is that flushes do not generally flush through 3723 * BREF_TYPE_INODE chains and depend on a hammer2_inode_t queued to syncq 3724 * or sideq to properly flush and dispose of the related inode chain's flags. 3725 * Situations where the inode is not actually modified by the frontend, 3726 * but where we have to move the related chains around as we insert or cleanup 3727 * indirect blocks, can leave us with a 'dirty' (non-disposable) in-memory 3728 * inode chain that does not have a hammer2_inode_t associated with it. 3729 */ 3730 static void 3731 hammer2_chain_rename_obref(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3732 hammer2_tid_t mtid, int flags, 3733 hammer2_blockref_t *obref) 3734 { 3735 hammer2_chain_rename(parentp, chain, mtid, flags); 3736 3737 if (obref->type != HAMMER2_BREF_TYPE_EMPTY) { 3738 hammer2_blockref_t *tbase; 3739 int tcount; 3740 3741 KKASSERT((chain->flags & HAMMER2_CHAIN_BMAPPED) == 0); 3742 hammer2_chain_modify(*parentp, mtid, 0, 0); 3743 tbase = hammer2_chain_base_and_count(*parentp, &tcount); 3744 hammer2_base_insert(*parentp, tbase, tcount, chain, obref); 3745 if (bcmp(obref, &chain->bref, sizeof(chain->bref))) { 3746 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD | 3747 HAMMER2_CHAIN_UPDATE); 3748 } else { 3749 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3750 } 3751 } 3752 } 3753 3754 /* 3755 * Helper function for deleting chains. 3756 * 3757 * The chain is removed from the live view (the RBTREE) as well as the parent's 3758 * blockmap. Both chain and its parent must be locked. 3759 * 3760 * parent may not be errored. chain can be errored. 3761 */ 3762 static int 3763 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 3764 hammer2_tid_t mtid, int flags, 3765 hammer2_blockref_t *obref) 3766 { 3767 hammer2_dev_t *hmp; 3768 int error = 0; 3769 3770 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0); 3771 KKASSERT(chain->parent == parent); 3772 hmp = chain->hmp; 3773 3774 if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 3775 /* 3776 * Chain is blockmapped, so there must be a parent. 3777 * Atomically remove the chain from the parent and remove 3778 * the blockmap entry. The parent must be set modified 3779 * to remove the blockmap entry. 3780 */ 3781 hammer2_blockref_t *base; 3782 int count; 3783 3784 KKASSERT(parent != NULL); 3785 KKASSERT(parent->error == 0); 3786 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3787 error = hammer2_chain_modify(parent, mtid, 0, 0); 3788 if (error) 3789 goto done; 3790 3791 /* 3792 * Calculate blockmap pointer 3793 */ 3794 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 3795 hammer2_spin_ex(&chain->core.spin); 3796 hammer2_spin_ex(&parent->core.spin); 3797 3798 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3799 atomic_add_int(&parent->core.live_count, -1); 3800 ++parent->core.generation; 3801 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3802 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3803 --parent->core.chain_count; 3804 chain->parent = NULL; 3805 3806 switch(parent->bref.type) { 3807 case HAMMER2_BREF_TYPE_INODE: 3808 /* 3809 * Access the inode's block array. However, there 3810 * is no block array if the inode is flagged 3811 * DIRECTDATA. 3812 */ 3813 if (parent->data && 3814 (parent->data->ipdata.meta.op_flags & 3815 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 3816 base = 3817 &parent->data->ipdata.u.blockset.blockref[0]; 3818 } else { 3819 base = NULL; 3820 } 3821 count = HAMMER2_SET_COUNT; 3822 break; 3823 case HAMMER2_BREF_TYPE_INDIRECT: 3824 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3825 if (parent->data) 3826 base = &parent->data->npdata[0]; 3827 else 3828 base = NULL; 3829 count = parent->bytes / sizeof(hammer2_blockref_t); 3830 break; 3831 case HAMMER2_BREF_TYPE_VOLUME: 3832 base = &parent->data->voldata. 3833 sroot_blockset.blockref[0]; 3834 count = HAMMER2_SET_COUNT; 3835 break; 3836 case HAMMER2_BREF_TYPE_FREEMAP: 3837 base = &parent->data->blkset.blockref[0]; 3838 count = HAMMER2_SET_COUNT; 3839 break; 3840 default: 3841 base = NULL; 3842 count = 0; 3843 panic("_hammer2_chain_delete_helper: " 3844 "unrecognized blockref type: %d", 3845 parent->bref.type); 3846 break; 3847 } 3848 3849 /* 3850 * delete blockmapped chain from its parent. 3851 * 3852 * The parent is not affected by any statistics in chain 3853 * which are pending synchronization. That is, there is 3854 * nothing to undo in the parent since they have not yet 3855 * been incorporated into the parent. 3856 * 3857 * The parent is affected by statistics stored in inodes. 3858 * Those have already been synchronized, so they must be 3859 * undone. XXX split update possible w/delete in middle? 3860 */ 3861 if (base) { 3862 hammer2_base_delete(parent, base, count, chain, obref); 3863 } 3864 hammer2_spin_unex(&parent->core.spin); 3865 hammer2_spin_unex(&chain->core.spin); 3866 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 3867 /* 3868 * Chain is not blockmapped but a parent is present. 3869 * Atomically remove the chain from the parent. There is 3870 * no blockmap entry to remove. 3871 * 3872 * Because chain was associated with a parent but not 3873 * synchronized, the chain's *_count_up fields contain 3874 * inode adjustment statistics which must be undone. 3875 */ 3876 hammer2_spin_ex(&chain->core.spin); 3877 hammer2_spin_ex(&parent->core.spin); 3878 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3879 atomic_add_int(&parent->core.live_count, -1); 3880 ++parent->core.generation; 3881 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3882 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3883 --parent->core.chain_count; 3884 chain->parent = NULL; 3885 hammer2_spin_unex(&parent->core.spin); 3886 hammer2_spin_unex(&chain->core.spin); 3887 } else { 3888 /* 3889 * Chain is not blockmapped and has no parent. This 3890 * is a degenerate case. 3891 */ 3892 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3893 } 3894 done: 3895 return error; 3896 } 3897 3898 /* 3899 * Create an indirect block that covers one or more of the elements in the 3900 * current parent. Either returns the existing parent with no locking or 3901 * ref changes or returns the new indirect block locked and referenced 3902 * and leaving the original parent lock/ref intact as well. 3903 * 3904 * If an error occurs, NULL is returned and *errorp is set to the H2 error. 3905 * 3906 * The returned chain depends on where the specified key falls. 3907 * 3908 * The key/keybits for the indirect mode only needs to follow three rules: 3909 * 3910 * (1) That all elements underneath it fit within its key space and 3911 * 3912 * (2) That all elements outside it are outside its key space. 3913 * 3914 * (3) When creating the new indirect block any elements in the current 3915 * parent that fit within the new indirect block's keyspace must be 3916 * moved into the new indirect block. 3917 * 3918 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3919 * keyspace the the current parent, but lookup/iteration rules will 3920 * ensure (and must ensure) that rule (2) for all parents leading up 3921 * to the nearest inode or the root volume header is adhered to. This 3922 * is accomplished by always recursing through matching keyspaces in 3923 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3924 * 3925 * The current implementation calculates the current worst-case keyspace by 3926 * iterating the current parent and then divides it into two halves, choosing 3927 * whichever half has the most elements (not necessarily the half containing 3928 * the requested key). 3929 * 3930 * We can also opt to use the half with the least number of elements. This 3931 * causes lower-numbered keys (aka logical file offsets) to recurse through 3932 * fewer indirect blocks and higher-numbered keys to recurse through more. 3933 * This also has the risk of not moving enough elements to the new indirect 3934 * block and being forced to create several indirect blocks before the element 3935 * can be inserted. 3936 * 3937 * Must be called with an exclusively locked parent. 3938 * 3939 * NOTE: *errorp set to HAMMER_ERROR_* flags 3940 */ 3941 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3942 hammer2_key_t *keyp, int keybits, 3943 hammer2_blockref_t *base, int count); 3944 static int hammer2_chain_indkey_file(hammer2_chain_t *parent, 3945 hammer2_key_t *keyp, int keybits, 3946 hammer2_blockref_t *base, int count, 3947 int ncount); 3948 static int hammer2_chain_indkey_dir(hammer2_chain_t *parent, 3949 hammer2_key_t *keyp, int keybits, 3950 hammer2_blockref_t *base, int count, 3951 int ncount); 3952 static 3953 hammer2_chain_t * 3954 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3955 hammer2_key_t create_key, int create_bits, 3956 hammer2_tid_t mtid, int for_type, int *errorp) 3957 { 3958 hammer2_dev_t *hmp; 3959 hammer2_blockref_t *base; 3960 hammer2_blockref_t *bref; 3961 hammer2_blockref_t bsave; 3962 hammer2_blockref_t dummy; 3963 hammer2_chain_t *chain; 3964 hammer2_chain_t *ichain; 3965 hammer2_key_t key = create_key; 3966 hammer2_key_t key_beg; 3967 hammer2_key_t key_end; 3968 hammer2_key_t key_next; 3969 int keybits = create_bits; 3970 int count; 3971 int ncount; 3972 int nbytes; 3973 int loops; 3974 int error; 3975 int reason; 3976 int generation; 3977 int maxloops = 300000; 3978 3979 /* 3980 * Calculate the base blockref pointer or NULL if the chain 3981 * is known to be empty. We need to calculate the array count 3982 * for RB lookups either way. 3983 */ 3984 hmp = parent->hmp; 3985 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3986 3987 /* 3988 * Pre-modify the parent now to avoid having to deal with error 3989 * processing if we tried to later (in the middle of our loop). 3990 * 3991 * We are going to be moving bref's around, the indirect blocks 3992 * cannot be in an initial state. Do not pass MODIFY_OPTDATA. 3993 */ 3994 *errorp = hammer2_chain_modify(parent, mtid, 0, 0); 3995 if (*errorp) { 3996 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 3997 *errorp, hammer2_error_str(*errorp)); 3998 return NULL; 3999 } 4000 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 4001 4002 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 4003 base = hammer2_chain_base_and_count(parent, &count); 4004 4005 /* 4006 * How big should our new indirect block be? It has to be at least 4007 * as large as its parent for splits to work properly. 4008 * 4009 * The freemap uses a specific indirect block size. The number of 4010 * levels are built dynamically and ultimately depend on the size 4011 * volume. Because freemap blocks are taken from the reserved areas 4012 * of the volume our goal is efficiency (fewer levels) and not so 4013 * much to save disk space. 4014 * 4015 * The first indirect block level for a directory usually uses 4016 * HAMMER2_IND_BYTES_MIN (4KB = 32 directory entries). Due to 4017 * the hash mechanism, this typically gives us a nominal 4018 * 32 * 4 entries with one level of indirection. 4019 * 4020 * We use HAMMER2_IND_BYTES_NOM (16KB = 128 blockrefs) for FILE 4021 * indirect blocks. The initial 4 entries in the inode gives us 4022 * 256KB. Up to 4 indirect blocks gives us 32MB. Three levels 4023 * of indirection gives us 137GB, and so forth. H2 can support 4024 * huge file sizes but they are not typical, so we try to stick 4025 * with compactness and do not use a larger indirect block size. 4026 * 4027 * We could use 64KB (PBUFSIZE), giving us 512 blockrefs, but 4028 * due to the way indirect blocks are created this usually winds 4029 * up being extremely inefficient for small files. Even though 4030 * 16KB requires more levels of indirection for very large files, 4031 * the 16KB records can be ganged together into 64KB DIOs. 4032 */ 4033 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 4034 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 4035 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 4036 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4037 if (parent->data->ipdata.meta.type == 4038 HAMMER2_OBJTYPE_DIRECTORY) 4039 nbytes = HAMMER2_IND_BYTES_MIN; /* 4KB = 32 entries */ 4040 else 4041 nbytes = HAMMER2_IND_BYTES_NOM; /* 16KB = ~8MB file */ 4042 4043 } else { 4044 nbytes = HAMMER2_IND_BYTES_NOM; 4045 } 4046 if (nbytes < count * sizeof(hammer2_blockref_t)) { 4047 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 4048 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 4049 nbytes = count * sizeof(hammer2_blockref_t); 4050 } 4051 ncount = nbytes / sizeof(hammer2_blockref_t); 4052 4053 /* 4054 * When creating an indirect block for a freemap node or leaf 4055 * the key/keybits must be fitted to static radix levels because 4056 * particular radix levels use particular reserved blocks in the 4057 * related zone. 4058 * 4059 * This routine calculates the key/radix of the indirect block 4060 * we need to create, and whether it is on the high-side or the 4061 * low-side. 4062 */ 4063 switch(for_type) { 4064 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 4065 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 4066 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 4067 base, count); 4068 break; 4069 case HAMMER2_BREF_TYPE_DATA: 4070 keybits = hammer2_chain_indkey_file(parent, &key, keybits, 4071 base, count, ncount); 4072 break; 4073 case HAMMER2_BREF_TYPE_DIRENT: 4074 case HAMMER2_BREF_TYPE_INODE: 4075 keybits = hammer2_chain_indkey_dir(parent, &key, keybits, 4076 base, count, ncount); 4077 break; 4078 default: 4079 panic("illegal indirect block for bref type %d", for_type); 4080 break; 4081 } 4082 4083 /* 4084 * Normalize the key for the radix being represented, keeping the 4085 * high bits and throwing away the low bits. 4086 */ 4087 key &= ~(((hammer2_key_t)1 << keybits) - 1); 4088 4089 /* 4090 * Ok, create our new indirect block 4091 */ 4092 bzero(&dummy, sizeof(dummy)); 4093 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 4094 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 4095 dummy.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 4096 } else { 4097 dummy.type = HAMMER2_BREF_TYPE_INDIRECT; 4098 } 4099 dummy.key = key; 4100 dummy.keybits = keybits; 4101 dummy.data_off = hammer2_getradix(nbytes); 4102 dummy.methods = 4103 HAMMER2_ENC_CHECK(HAMMER2_DEC_CHECK(parent->bref.methods)) | 4104 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 4105 4106 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy); 4107 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 4108 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 4109 /* ichain has one ref at this point */ 4110 4111 /* 4112 * We have to mark it modified to allocate its block, but use 4113 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 4114 * it won't be acted upon by the flush code. 4115 * 4116 * XXX remove OPTDATA, we need a fully initialized indirect block to 4117 * be able to move the original blockref. 4118 */ 4119 *errorp = hammer2_chain_modify(ichain, mtid, 0, 0); 4120 if (*errorp) { 4121 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 4122 *errorp, hammer2_error_str(*errorp)); 4123 hammer2_chain_unlock(ichain); 4124 hammer2_chain_drop(ichain); 4125 return NULL; 4126 } 4127 KKASSERT((ichain->flags & HAMMER2_CHAIN_INITIAL) == 0); 4128 4129 /* 4130 * Iterate the original parent and move the matching brefs into 4131 * the new indirect block. 4132 * 4133 * XXX handle flushes. 4134 */ 4135 key_beg = 0; 4136 key_end = HAMMER2_KEY_MAX; 4137 key_next = 0; /* avoid gcc warnings */ 4138 hammer2_spin_ex(&parent->core.spin); 4139 loops = 0; 4140 reason = 0; 4141 4142 for (;;) { 4143 /* 4144 * Parent may have been modified, relocating its block array. 4145 * Reload the base pointer. 4146 */ 4147 base = hammer2_chain_base_and_count(parent, &count); 4148 4149 if (++loops > 100000) { 4150 hammer2_spin_unex(&parent->core.spin); 4151 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 4152 reason, parent, base, count, key_next); 4153 } 4154 4155 /* 4156 * NOTE: spinlock stays intact, returned chain (if not NULL) 4157 * is not referenced or locked which means that we 4158 * cannot safely check its flagged / deletion status 4159 * until we lock it. 4160 */ 4161 chain = hammer2_combined_find(parent, base, count, 4162 &key_next, 4163 key_beg, key_end, 4164 &bref); 4165 generation = parent->core.generation; 4166 if (bref == NULL) 4167 break; 4168 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4169 4170 /* 4171 * Skip keys that are not within the key/radix of the new 4172 * indirect block. They stay in the parent. 4173 */ 4174 if (rounddown2(key ^ bref->key, (hammer2_key_t)1 << keybits) != 0) { 4175 goto next_key_spinlocked; 4176 } 4177 4178 /* 4179 * Load the new indirect block by acquiring the related 4180 * chains (potentially from media as it might not be 4181 * in-memory). Then move it to the new parent (ichain). 4182 * 4183 * chain is referenced but not locked. We must lock the 4184 * chain to obtain definitive state. 4185 */ 4186 bsave = *bref; 4187 if (chain) { 4188 /* 4189 * Use chain already present in the RBTREE 4190 */ 4191 hammer2_chain_ref(chain); 4192 hammer2_spin_unex(&parent->core.spin); 4193 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 4194 } else { 4195 /* 4196 * Get chain for blockref element. _get returns NULL 4197 * on insertion race. 4198 */ 4199 hammer2_spin_unex(&parent->core.spin); 4200 chain = hammer2_chain_get(parent, generation, &bsave, 4201 HAMMER2_RESOLVE_NEVER); 4202 if (chain == NULL) { 4203 reason = 1; 4204 hammer2_spin_ex(&parent->core.spin); 4205 continue; 4206 } 4207 } 4208 4209 /* 4210 * This is always live so if the chain has been deleted 4211 * we raced someone and we have to retry. 4212 * 4213 * NOTE: Lookups can race delete-duplicate because 4214 * delete-duplicate does not lock the parent's core 4215 * (they just use the spinlock on the core). 4216 * 4217 * (note reversed logic for this one) 4218 */ 4219 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 4220 chain->parent != parent || 4221 (chain->flags & HAMMER2_CHAIN_DELETED)) { 4222 hammer2_chain_unlock(chain); 4223 hammer2_chain_drop(chain); 4224 if (hammer2_debug & 0x0040) { 4225 kprintf("LOST PARENT RETRY " 4226 "RETRY (%p,%p)->%p %08x\n", 4227 parent, chain->parent, chain, chain->flags); 4228 } 4229 hammer2_spin_ex(&parent->core.spin); 4230 continue; 4231 } 4232 4233 /* 4234 * Shift the chain to the indirect block. 4235 * 4236 * WARNING! No reason for us to load chain data, pass NOSTATS 4237 * to prevent delete/insert from trying to access 4238 * inode stats (and thus asserting if there is no 4239 * chain->data loaded). 4240 * 4241 * WARNING! The (parent, chain) deletion may modify the parent 4242 * and invalidate the base pointer. 4243 * 4244 * WARNING! Parent must already be marked modified, so we 4245 * can assume that chain_delete always suceeds. 4246 * 4247 * WARNING! hammer2_chain_repchange() does not have to be 4248 * called (and doesn't work anyway because we are 4249 * only doing a partial shift). A recursion that is 4250 * in-progress can continue at the current parent 4251 * and will be able to properly find its next key. 4252 */ 4253 error = hammer2_chain_delete_obref(parent, chain, mtid, 0, 4254 &bsave); 4255 KKASSERT(error == 0); 4256 hammer2_chain_rename_obref(&ichain, chain, mtid, 0, &bsave); 4257 hammer2_chain_unlock(chain); 4258 hammer2_chain_drop(chain); 4259 KKASSERT(parent->refs > 0); 4260 chain = NULL; 4261 base = NULL; /* safety */ 4262 hammer2_spin_ex(&parent->core.spin); 4263 next_key_spinlocked: 4264 if (--maxloops == 0) 4265 panic("hammer2_chain_create_indirect: maxloops"); 4266 reason = 4; 4267 if (key_next == 0 || key_next > key_end) 4268 break; 4269 key_beg = key_next; 4270 /* loop */ 4271 } 4272 hammer2_spin_unex(&parent->core.spin); 4273 4274 /* 4275 * Insert the new indirect block into the parent now that we've 4276 * cleared out some entries in the parent. We calculated a good 4277 * insertion index in the loop above (ichain->index). 4278 * 4279 * We don't have to set UPDATE here because we mark ichain 4280 * modified down below (so the normal modified -> flush -> set-moved 4281 * sequence applies). 4282 * 4283 * The insertion shouldn't race as this is a completely new block 4284 * and the parent is locked. 4285 */ 4286 base = NULL; /* safety, parent modify may change address */ 4287 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 4288 KKASSERT(parent->core.live_count < count); 4289 hammer2_chain_insert(parent, ichain, 4290 HAMMER2_CHAIN_INSERT_SPIN | 4291 HAMMER2_CHAIN_INSERT_LIVE, 4292 0); 4293 4294 /* 4295 * Make sure flushes propogate after our manual insertion. 4296 */ 4297 hammer2_chain_setflush(ichain); 4298 hammer2_chain_setflush(parent); 4299 4300 /* 4301 * Figure out what to return. 4302 */ 4303 if (rounddown2(create_key ^ key, (hammer2_key_t)1 << keybits)) { 4304 /* 4305 * Key being created is outside the key range, 4306 * return the original parent. 4307 */ 4308 hammer2_chain_unlock(ichain); 4309 hammer2_chain_drop(ichain); 4310 } else { 4311 /* 4312 * Otherwise its in the range, return the new parent. 4313 * (leave both the new and old parent locked). 4314 */ 4315 parent = ichain; 4316 } 4317 4318 return(parent); 4319 } 4320 4321 /* 4322 * Do maintenance on an indirect chain. Both parent and chain are locked. 4323 * 4324 * Returns non-zero if (chain) is deleted, either due to being empty or 4325 * because its children were safely moved into the parent. 4326 */ 4327 int 4328 hammer2_chain_indirect_maintenance(hammer2_chain_t *parent, 4329 hammer2_chain_t *chain) 4330 { 4331 hammer2_blockref_t *chain_base; 4332 hammer2_blockref_t *base; 4333 hammer2_blockref_t *bref; 4334 hammer2_blockref_t bsave; 4335 hammer2_key_t key_next; 4336 hammer2_key_t key_beg; 4337 hammer2_key_t key_end; 4338 hammer2_chain_t *sub; 4339 int chain_count; 4340 int count; 4341 int error; 4342 int generation; 4343 4344 /* 4345 * Make sure we have an accurate live_count 4346 */ 4347 if ((chain->flags & (HAMMER2_CHAIN_INITIAL | 4348 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4349 base = &chain->data->npdata[0]; 4350 count = chain->bytes / sizeof(hammer2_blockref_t); 4351 hammer2_chain_countbrefs(chain, base, count); 4352 } 4353 4354 /* 4355 * If the indirect block is empty we can delete it. 4356 * (ignore deletion error) 4357 */ 4358 if (chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)) { 4359 hammer2_chain_delete(parent, chain, 4360 chain->bref.modify_tid, 4361 HAMMER2_DELETE_PERMANENT); 4362 hammer2_chain_repchange(parent, chain); 4363 return 1; 4364 } 4365 4366 base = hammer2_chain_base_and_count(parent, &count); 4367 4368 if ((parent->flags & (HAMMER2_CHAIN_INITIAL | 4369 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4370 hammer2_chain_countbrefs(parent, base, count); 4371 } 4372 4373 /* 4374 * Determine if we can collapse chain into parent, calculate 4375 * hysteresis for chain emptiness. 4376 */ 4377 if (parent->core.live_count + chain->core.live_count - 1 > count) 4378 return 0; 4379 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4380 if (chain->core.live_count > chain_count * 3 / 4) 4381 return 0; 4382 4383 /* 4384 * Ok, theoretically we can collapse chain's contents into 4385 * parent. chain is locked, but any in-memory children of chain 4386 * are not. For this to work, we must be able to dispose of any 4387 * in-memory children of chain. 4388 * 4389 * For now require that there are no in-memory children of chain. 4390 * 4391 * WARNING! Both chain and parent must remain locked across this 4392 * entire operation. 4393 */ 4394 4395 /* 4396 * Parent must be marked modified. Don't try to collapse it if we 4397 * can't mark it modified. Once modified, destroy chain to make room 4398 * and to get rid of what will be a conflicting key (this is included 4399 * in the calculation above). Finally, move the children of chain 4400 * into chain's parent. 4401 * 4402 * This order creates an accounting problem for bref.embed.stats 4403 * because we destroy chain before we remove its children. Any 4404 * elements whos blockref is already synchronized will be counted 4405 * twice. To deal with the problem we clean out chain's stats prior 4406 * to deleting it. 4407 */ 4408 error = hammer2_chain_modify(parent, 0, 0, 0); 4409 if (error) { 4410 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4411 hammer2_error_str(error)); 4412 return 0; 4413 } 4414 error = hammer2_chain_modify(chain, chain->bref.modify_tid, 0, 0); 4415 if (error) { 4416 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4417 hammer2_error_str(error)); 4418 return 0; 4419 } 4420 4421 chain->bref.embed.stats.inode_count = 0; 4422 chain->bref.embed.stats.data_count = 0; 4423 error = hammer2_chain_delete(parent, chain, 4424 chain->bref.modify_tid, 4425 HAMMER2_DELETE_PERMANENT); 4426 KKASSERT(error == 0); 4427 4428 /* 4429 * The combined_find call requires core.spin to be held. One would 4430 * think there wouldn't be any conflicts since we hold chain 4431 * exclusively locked, but the caching mechanism for 0-ref children 4432 * does not require a chain lock. 4433 */ 4434 hammer2_spin_ex(&chain->core.spin); 4435 4436 key_next = 0; 4437 key_beg = 0; 4438 key_end = HAMMER2_KEY_MAX; 4439 for (;;) { 4440 chain_base = &chain->data->npdata[0]; 4441 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4442 sub = hammer2_combined_find(chain, chain_base, chain_count, 4443 &key_next, 4444 key_beg, key_end, 4445 &bref); 4446 generation = chain->core.generation; 4447 if (bref == NULL) 4448 break; 4449 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4450 4451 bsave = *bref; 4452 if (sub) { 4453 hammer2_chain_ref(sub); 4454 hammer2_spin_unex(&chain->core.spin); 4455 hammer2_chain_lock(sub, HAMMER2_RESOLVE_NEVER); 4456 } else { 4457 hammer2_spin_unex(&chain->core.spin); 4458 sub = hammer2_chain_get(chain, generation, &bsave, 4459 HAMMER2_RESOLVE_NEVER); 4460 if (sub == NULL) { 4461 hammer2_spin_ex(&chain->core.spin); 4462 continue; 4463 } 4464 } 4465 if (bcmp(&bsave, &sub->bref, sizeof(bsave)) || 4466 sub->parent != chain || 4467 (sub->flags & HAMMER2_CHAIN_DELETED)) { 4468 hammer2_chain_unlock(sub); 4469 hammer2_chain_drop(sub); 4470 hammer2_spin_ex(&chain->core.spin); 4471 sub = NULL; /* safety */ 4472 continue; 4473 } 4474 error = hammer2_chain_delete_obref(chain, sub, 4475 sub->bref.modify_tid, 0, 4476 &bsave); 4477 KKASSERT(error == 0); 4478 hammer2_chain_rename_obref(&parent, sub, 4479 sub->bref.modify_tid, 4480 HAMMER2_INSERT_SAMEPARENT, &bsave); 4481 hammer2_chain_unlock(sub); 4482 hammer2_chain_drop(sub); 4483 hammer2_spin_ex(&chain->core.spin); 4484 4485 if (key_next == 0) 4486 break; 4487 key_beg = key_next; 4488 } 4489 hammer2_spin_unex(&chain->core.spin); 4490 4491 hammer2_chain_repchange(parent, chain); 4492 4493 return 1; 4494 } 4495 4496 /* 4497 * Freemap indirect blocks 4498 * 4499 * Calculate the keybits and highside/lowside of the freemap node the 4500 * caller is creating. 4501 * 4502 * This routine will specify the next higher-level freemap key/radix 4503 * representing the lowest-ordered set. By doing so, eventually all 4504 * low-ordered sets will be moved one level down. 4505 * 4506 * We have to be careful here because the freemap reserves a limited 4507 * number of blocks for a limited number of levels. So we can't just 4508 * push indiscriminately. 4509 */ 4510 int 4511 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 4512 int keybits, hammer2_blockref_t *base, int count) 4513 { 4514 hammer2_chain_t *chain; 4515 hammer2_blockref_t *bref; 4516 hammer2_key_t key; 4517 hammer2_key_t key_beg; 4518 hammer2_key_t key_end; 4519 hammer2_key_t key_next; 4520 int locount; 4521 int hicount; 4522 int maxloops = 300000; 4523 4524 key = *keyp; 4525 locount = 0; 4526 hicount = 0; 4527 keybits = 64; 4528 4529 /* 4530 * Calculate the range of keys in the array being careful to skip 4531 * slots which are overridden with a deletion. 4532 */ 4533 key_beg = 0; 4534 key_end = HAMMER2_KEY_MAX; 4535 hammer2_spin_ex(&parent->core.spin); 4536 4537 for (;;) { 4538 if (--maxloops == 0) { 4539 panic("indkey_freemap shit %p %p:%d\n", 4540 parent, base, count); 4541 } 4542 chain = hammer2_combined_find(parent, base, count, 4543 &key_next, 4544 key_beg, key_end, 4545 &bref); 4546 4547 /* 4548 * Exhausted search 4549 */ 4550 if (bref == NULL) 4551 break; 4552 4553 /* 4554 * Skip deleted chains. 4555 */ 4556 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4557 if (key_next == 0 || key_next > key_end) 4558 break; 4559 key_beg = key_next; 4560 continue; 4561 } 4562 4563 /* 4564 * Use the full live (not deleted) element for the scan 4565 * iteration. HAMMER2 does not allow partial replacements. 4566 * 4567 * XXX should be built into hammer2_combined_find(). 4568 */ 4569 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4570 4571 if (keybits > bref->keybits) { 4572 key = bref->key; 4573 keybits = bref->keybits; 4574 } else if (keybits == bref->keybits && bref->key < key) { 4575 key = bref->key; 4576 } 4577 if (key_next == 0) 4578 break; 4579 key_beg = key_next; 4580 } 4581 hammer2_spin_unex(&parent->core.spin); 4582 4583 /* 4584 * Return the keybits for a higher-level FREEMAP_NODE covering 4585 * this node. 4586 */ 4587 switch(keybits) { 4588 case HAMMER2_FREEMAP_LEVEL0_RADIX: 4589 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 4590 break; 4591 case HAMMER2_FREEMAP_LEVEL1_RADIX: 4592 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 4593 break; 4594 case HAMMER2_FREEMAP_LEVEL2_RADIX: 4595 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 4596 break; 4597 case HAMMER2_FREEMAP_LEVEL3_RADIX: 4598 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 4599 break; 4600 case HAMMER2_FREEMAP_LEVEL4_RADIX: 4601 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 4602 break; 4603 case HAMMER2_FREEMAP_LEVEL5_RADIX: 4604 panic("hammer2_chain_indkey_freemap: level too high"); 4605 break; 4606 default: 4607 panic("hammer2_chain_indkey_freemap: bad radix"); 4608 break; 4609 } 4610 *keyp = key; 4611 4612 return (keybits); 4613 } 4614 4615 /* 4616 * File indirect blocks 4617 * 4618 * Calculate the key/keybits for the indirect block to create by scanning 4619 * existing keys. The key being created is also passed in *keyp and can be 4620 * inside or outside the indirect block. Regardless, the indirect block 4621 * must hold at least two keys in order to guarantee sufficient space. 4622 * 4623 * We use a modified version of the freemap's fixed radix tree, but taylored 4624 * for file data. Basically we configure an indirect block encompassing the 4625 * smallest key. 4626 */ 4627 static int 4628 hammer2_chain_indkey_file(hammer2_chain_t *parent, hammer2_key_t *keyp, 4629 int keybits, hammer2_blockref_t *base, int count, 4630 int ncount) 4631 { 4632 hammer2_chain_t *chain; 4633 hammer2_blockref_t *bref; 4634 hammer2_key_t key; 4635 hammer2_key_t key_beg; 4636 hammer2_key_t key_end; 4637 hammer2_key_t key_next; 4638 int nradix; 4639 int locount; 4640 int hicount; 4641 int maxloops = 300000; 4642 4643 key = *keyp; 4644 locount = 0; 4645 hicount = 0; 4646 keybits = 64; 4647 4648 /* 4649 * Calculate the range of keys in the array being careful to skip 4650 * slots which are overridden with a deletion. 4651 * 4652 * Locate the smallest key. 4653 */ 4654 key_beg = 0; 4655 key_end = HAMMER2_KEY_MAX; 4656 hammer2_spin_ex(&parent->core.spin); 4657 4658 for (;;) { 4659 if (--maxloops == 0) { 4660 panic("indkey_freemap shit %p %p:%d\n", 4661 parent, base, count); 4662 } 4663 chain = hammer2_combined_find(parent, base, count, 4664 &key_next, 4665 key_beg, key_end, 4666 &bref); 4667 4668 /* 4669 * Exhausted search 4670 */ 4671 if (bref == NULL) 4672 break; 4673 4674 /* 4675 * Skip deleted chains. 4676 */ 4677 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4678 if (key_next == 0 || key_next > key_end) 4679 break; 4680 key_beg = key_next; 4681 continue; 4682 } 4683 4684 /* 4685 * Use the full live (not deleted) element for the scan 4686 * iteration. HAMMER2 does not allow partial replacements. 4687 * 4688 * XXX should be built into hammer2_combined_find(). 4689 */ 4690 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4691 4692 if (keybits > bref->keybits) { 4693 key = bref->key; 4694 keybits = bref->keybits; 4695 } else if (keybits == bref->keybits && bref->key < key) { 4696 key = bref->key; 4697 } 4698 if (key_next == 0) 4699 break; 4700 key_beg = key_next; 4701 } 4702 hammer2_spin_unex(&parent->core.spin); 4703 4704 /* 4705 * Calculate the static keybits for a higher-level indirect block 4706 * that contains the key. 4707 */ 4708 *keyp = key; 4709 4710 switch(ncount) { 4711 case HAMMER2_IND_BYTES_MIN / sizeof(hammer2_blockref_t): 4712 nradix = HAMMER2_IND_RADIX_MIN - HAMMER2_BLOCKREF_RADIX; 4713 break; 4714 case HAMMER2_IND_BYTES_NOM / sizeof(hammer2_blockref_t): 4715 nradix = HAMMER2_IND_RADIX_NOM - HAMMER2_BLOCKREF_RADIX; 4716 break; 4717 case HAMMER2_IND_BYTES_MAX / sizeof(hammer2_blockref_t): 4718 nradix = HAMMER2_IND_RADIX_MAX - HAMMER2_BLOCKREF_RADIX; 4719 break; 4720 default: 4721 panic("bad ncount %d\n", ncount); 4722 nradix = 0; 4723 break; 4724 } 4725 4726 /* 4727 * The largest radix that can be returned for an indirect block is 4728 * 63 bits. (The largest practical indirect block radix is actually 4729 * 62 bits because the top-level inode or volume root contains four 4730 * entries, but allow 63 to be returned). 4731 */ 4732 if (nradix >= 64) 4733 nradix = 63; 4734 4735 return keybits + nradix; 4736 } 4737 4738 #if 1 4739 4740 /* 4741 * Directory indirect blocks. 4742 * 4743 * Covers both the inode index (directory of inodes), and directory contents 4744 * (filenames hardlinked to inodes). 4745 * 4746 * Because directory keys are hashed we generally try to cut the space in 4747 * half. We accomodate the inode index (which tends to have linearly 4748 * increasing inode numbers) by ensuring that the keyspace is at least large 4749 * enough to fill up the indirect block being created. 4750 */ 4751 static int 4752 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4753 int keybits, hammer2_blockref_t *base, int count, 4754 int ncount) 4755 { 4756 hammer2_blockref_t *bref; 4757 hammer2_chain_t *chain; 4758 hammer2_key_t key_beg; 4759 hammer2_key_t key_end; 4760 hammer2_key_t key_next; 4761 hammer2_key_t key; 4762 int nkeybits; 4763 int locount; 4764 int hicount; 4765 int maxloops = 300000; 4766 4767 /* 4768 * NOTE: We can't take a shortcut here anymore for inodes because 4769 * the root directory can contain a mix of inodes and directory 4770 * entries (we used to just return 63 if parent->bref.type was 4771 * HAMMER2_BREF_TYPE_INODE. 4772 */ 4773 key = *keyp; 4774 locount = 0; 4775 hicount = 0; 4776 4777 /* 4778 * Calculate the range of keys in the array being careful to skip 4779 * slots which are overridden with a deletion. 4780 */ 4781 key_beg = 0; 4782 key_end = HAMMER2_KEY_MAX; 4783 hammer2_spin_ex(&parent->core.spin); 4784 4785 for (;;) { 4786 if (--maxloops == 0) { 4787 panic("indkey_freemap shit %p %p:%d\n", 4788 parent, base, count); 4789 } 4790 chain = hammer2_combined_find(parent, base, count, 4791 &key_next, 4792 key_beg, key_end, 4793 &bref); 4794 4795 /* 4796 * Exhausted search 4797 */ 4798 if (bref == NULL) 4799 break; 4800 4801 /* 4802 * Deleted object 4803 */ 4804 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4805 if (key_next == 0 || key_next > key_end) 4806 break; 4807 key_beg = key_next; 4808 continue; 4809 } 4810 4811 /* 4812 * Use the full live (not deleted) element for the scan 4813 * iteration. HAMMER2 does not allow partial replacements. 4814 * 4815 * XXX should be built into hammer2_combined_find(). 4816 */ 4817 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4818 4819 /* 4820 * Expand our calculated key range (key, keybits) to fit 4821 * the scanned key. nkeybits represents the full range 4822 * that we will later cut in half (two halves @ nkeybits - 1). 4823 */ 4824 nkeybits = keybits; 4825 if (nkeybits < bref->keybits) { 4826 if (bref->keybits > 64) { 4827 kprintf("bad bref chain %p bref %p\n", 4828 chain, bref); 4829 Debugger("fubar"); 4830 } 4831 nkeybits = bref->keybits; 4832 } 4833 while (nkeybits < 64 && 4834 rounddown2(key ^ bref->key, (hammer2_key_t)1 << nkeybits) != 0) { 4835 ++nkeybits; 4836 } 4837 4838 /* 4839 * If the new key range is larger we have to determine 4840 * which side of the new key range the existing keys fall 4841 * under by checking the high bit, then collapsing the 4842 * locount into the hicount or vise-versa. 4843 */ 4844 if (keybits != nkeybits) { 4845 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4846 hicount += locount; 4847 locount = 0; 4848 } else { 4849 locount += hicount; 4850 hicount = 0; 4851 } 4852 keybits = nkeybits; 4853 } 4854 4855 /* 4856 * The newly scanned key will be in the lower half or the 4857 * upper half of the (new) key range. 4858 */ 4859 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4860 ++hicount; 4861 else 4862 ++locount; 4863 4864 if (key_next == 0) 4865 break; 4866 key_beg = key_next; 4867 } 4868 hammer2_spin_unex(&parent->core.spin); 4869 bref = NULL; /* now invalid (safety) */ 4870 4871 /* 4872 * Adjust keybits to represent half of the full range calculated 4873 * above (radix 63 max) for our new indirect block. 4874 */ 4875 --keybits; 4876 4877 /* 4878 * Expand keybits to hold at least ncount elements. ncount will be 4879 * a power of 2. This is to try to completely fill leaf nodes (at 4880 * least for keys which are not hashes). 4881 * 4882 * We aren't counting 'in' or 'out', we are counting 'high side' 4883 * and 'low side' based on the bit at (1LL << keybits). We want 4884 * everything to be inside in these cases so shift it all to 4885 * the low or high side depending on the new high bit. 4886 */ 4887 while (((hammer2_key_t)1 << keybits) < ncount) { 4888 ++keybits; 4889 if (key & ((hammer2_key_t)1 << keybits)) { 4890 hicount += locount; 4891 locount = 0; 4892 } else { 4893 locount += hicount; 4894 hicount = 0; 4895 } 4896 } 4897 4898 if (hicount > locount) 4899 key |= (hammer2_key_t)1 << keybits; 4900 else 4901 key &= ~(hammer2_key_t)1 << keybits; 4902 4903 *keyp = key; 4904 4905 return (keybits); 4906 } 4907 4908 #else 4909 4910 /* 4911 * Directory indirect blocks. 4912 * 4913 * Covers both the inode index (directory of inodes), and directory contents 4914 * (filenames hardlinked to inodes). 4915 * 4916 * Because directory keys are hashed we generally try to cut the space in 4917 * half. We accomodate the inode index (which tends to have linearly 4918 * increasing inode numbers) by ensuring that the keyspace is at least large 4919 * enough to fill up the indirect block being created. 4920 */ 4921 static int 4922 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4923 int keybits, hammer2_blockref_t *base, int count, 4924 int ncount) 4925 { 4926 hammer2_blockref_t *bref; 4927 hammer2_chain_t *chain; 4928 hammer2_key_t key_beg; 4929 hammer2_key_t key_end; 4930 hammer2_key_t key_next; 4931 hammer2_key_t key; 4932 int nkeybits; 4933 int locount; 4934 int hicount; 4935 int maxloops = 300000; 4936 4937 /* 4938 * Shortcut if the parent is the inode. In this situation the 4939 * parent has 4+1 directory entries and we are creating an indirect 4940 * block capable of holding many more. 4941 */ 4942 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4943 return 63; 4944 } 4945 4946 key = *keyp; 4947 locount = 0; 4948 hicount = 0; 4949 4950 /* 4951 * Calculate the range of keys in the array being careful to skip 4952 * slots which are overridden with a deletion. 4953 */ 4954 key_beg = 0; 4955 key_end = HAMMER2_KEY_MAX; 4956 hammer2_spin_ex(&parent->core.spin); 4957 4958 for (;;) { 4959 if (--maxloops == 0) { 4960 panic("indkey_freemap shit %p %p:%d\n", 4961 parent, base, count); 4962 } 4963 chain = hammer2_combined_find(parent, base, count, 4964 &key_next, 4965 key_beg, key_end, 4966 &bref); 4967 4968 /* 4969 * Exhausted search 4970 */ 4971 if (bref == NULL) 4972 break; 4973 4974 /* 4975 * Deleted object 4976 */ 4977 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4978 if (key_next == 0 || key_next > key_end) 4979 break; 4980 key_beg = key_next; 4981 continue; 4982 } 4983 4984 /* 4985 * Use the full live (not deleted) element for the scan 4986 * iteration. HAMMER2 does not allow partial replacements. 4987 * 4988 * XXX should be built into hammer2_combined_find(). 4989 */ 4990 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4991 4992 /* 4993 * Expand our calculated key range (key, keybits) to fit 4994 * the scanned key. nkeybits represents the full range 4995 * that we will later cut in half (two halves @ nkeybits - 1). 4996 */ 4997 nkeybits = keybits; 4998 if (nkeybits < bref->keybits) { 4999 if (bref->keybits > 64) { 5000 kprintf("bad bref chain %p bref %p\n", 5001 chain, bref); 5002 Debugger("fubar"); 5003 } 5004 nkeybits = bref->keybits; 5005 } 5006 while (nkeybits < 64 && 5007 (~(((hammer2_key_t)1 << nkeybits) - 1) & 5008 (key ^ bref->key)) != 0) { 5009 ++nkeybits; 5010 } 5011 5012 /* 5013 * If the new key range is larger we have to determine 5014 * which side of the new key range the existing keys fall 5015 * under by checking the high bit, then collapsing the 5016 * locount into the hicount or vise-versa. 5017 */ 5018 if (keybits != nkeybits) { 5019 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 5020 hicount += locount; 5021 locount = 0; 5022 } else { 5023 locount += hicount; 5024 hicount = 0; 5025 } 5026 keybits = nkeybits; 5027 } 5028 5029 /* 5030 * The newly scanned key will be in the lower half or the 5031 * upper half of the (new) key range. 5032 */ 5033 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 5034 ++hicount; 5035 else 5036 ++locount; 5037 5038 if (key_next == 0) 5039 break; 5040 key_beg = key_next; 5041 } 5042 hammer2_spin_unex(&parent->core.spin); 5043 bref = NULL; /* now invalid (safety) */ 5044 5045 /* 5046 * Adjust keybits to represent half of the full range calculated 5047 * above (radix 63 max) for our new indirect block. 5048 */ 5049 --keybits; 5050 5051 /* 5052 * Expand keybits to hold at least ncount elements. ncount will be 5053 * a power of 2. This is to try to completely fill leaf nodes (at 5054 * least for keys which are not hashes). 5055 * 5056 * We aren't counting 'in' or 'out', we are counting 'high side' 5057 * and 'low side' based on the bit at (1LL << keybits). We want 5058 * everything to be inside in these cases so shift it all to 5059 * the low or high side depending on the new high bit. 5060 */ 5061 while (((hammer2_key_t)1 << keybits) < ncount) { 5062 ++keybits; 5063 if (key & ((hammer2_key_t)1 << keybits)) { 5064 hicount += locount; 5065 locount = 0; 5066 } else { 5067 locount += hicount; 5068 hicount = 0; 5069 } 5070 } 5071 5072 if (hicount > locount) 5073 key |= (hammer2_key_t)1 << keybits; 5074 else 5075 key &= ~(hammer2_key_t)1 << keybits; 5076 5077 *keyp = key; 5078 5079 return (keybits); 5080 } 5081 5082 #endif 5083 5084 /* 5085 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 5086 * it exists. 5087 * 5088 * Both parent and chain must be locked exclusively. 5089 * 5090 * This function will modify the parent if the blockref requires removal 5091 * from the parent's block table. 5092 * 5093 * This function is NOT recursive. Any entity already pushed into the 5094 * chain (such as an inode) may still need visibility into its contents, 5095 * as well as the ability to read and modify the contents. For example, 5096 * for an unlinked file which is still open. 5097 * 5098 * Also note that the flusher is responsible for cleaning up empty 5099 * indirect blocks. 5100 */ 5101 int 5102 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 5103 hammer2_tid_t mtid, int flags) 5104 { 5105 int error = 0; 5106 5107 KKASSERT(hammer2_mtx_owned(&chain->lock)); 5108 5109 /* 5110 * Nothing to do if already marked. 5111 * 5112 * We need the spinlock on the core whos RBTREE contains chain 5113 * to protect against races. 5114 */ 5115 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 5116 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 5117 chain->parent == parent); 5118 error = _hammer2_chain_delete_helper(parent, chain, 5119 mtid, flags, NULL); 5120 } 5121 5122 /* 5123 * Permanent deletions mark the chain as destroyed. 5124 * 5125 * NOTE: We do not setflush the chain unless the deletion is 5126 * permanent, since the deletion of a chain does not actually 5127 * require it to be flushed. 5128 */ 5129 if (error == 0) { 5130 if (flags & HAMMER2_DELETE_PERMANENT) { 5131 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 5132 hammer2_chain_setflush(chain); 5133 } 5134 } 5135 5136 return error; 5137 } 5138 5139 static int 5140 hammer2_chain_delete_obref(hammer2_chain_t *parent, hammer2_chain_t *chain, 5141 hammer2_tid_t mtid, int flags, 5142 hammer2_blockref_t *obref) 5143 { 5144 int error = 0; 5145 5146 KKASSERT(hammer2_mtx_owned(&chain->lock)); 5147 5148 /* 5149 * Nothing to do if already marked. 5150 * 5151 * We need the spinlock on the core whos RBTREE contains chain 5152 * to protect against races. 5153 */ 5154 obref->type = HAMMER2_BREF_TYPE_EMPTY; 5155 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 5156 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 5157 chain->parent == parent); 5158 error = _hammer2_chain_delete_helper(parent, chain, 5159 mtid, flags, obref); 5160 } 5161 5162 /* 5163 * Permanent deletions mark the chain as destroyed. 5164 * 5165 * NOTE: We do not setflush the chain unless the deletion is 5166 * permanent, since the deletion of a chain does not actually 5167 * require it to be flushed. 5168 */ 5169 if (error == 0) { 5170 if (flags & HAMMER2_DELETE_PERMANENT) { 5171 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 5172 hammer2_chain_setflush(chain); 5173 } 5174 } 5175 5176 return error; 5177 } 5178 5179 /* 5180 * Returns the index of the nearest element in the blockref array >= elm. 5181 * Returns (count) if no element could be found. 5182 * 5183 * Sets *key_nextp to the next key for loop purposes but does not modify 5184 * it if the next key would be higher than the current value of *key_nextp. 5185 * Note that *key_nexp can overflow to 0, which should be tested by the 5186 * caller. 5187 * 5188 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5189 * held through the operation. 5190 */ 5191 static int 5192 hammer2_base_find(hammer2_chain_t *parent, 5193 hammer2_blockref_t *base, int count, 5194 hammer2_key_t *key_nextp, 5195 hammer2_key_t key_beg, hammer2_key_t key_end) 5196 { 5197 hammer2_blockref_t *scan; 5198 hammer2_key_t scan_end; 5199 int i; 5200 int limit; 5201 5202 /* 5203 * Require the live chain's already have their core's counted 5204 * so we can optimize operations. 5205 */ 5206 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 5207 5208 /* 5209 * Degenerate case 5210 */ 5211 if (count == 0 || base == NULL) 5212 return(count); 5213 5214 /* 5215 * Sequential optimization using parent->cache_index. This is 5216 * the most likely scenario. 5217 * 5218 * We can avoid trailing empty entries on live chains, otherwise 5219 * we might have to check the whole block array. 5220 */ 5221 i = parent->cache_index; /* SMP RACE OK */ 5222 cpu_ccfence(); 5223 limit = parent->core.live_zero; 5224 if (i >= limit) 5225 i = limit - 1; 5226 if (i < 0) 5227 i = 0; 5228 KKASSERT(i < count); 5229 5230 /* 5231 * Search backwards 5232 */ 5233 scan = &base[i]; 5234 while (i > 0 && (scan->type == HAMMER2_BREF_TYPE_EMPTY || 5235 scan->key > key_beg)) { 5236 --scan; 5237 --i; 5238 } 5239 parent->cache_index = i; 5240 5241 /* 5242 * Search forwards, stop when we find a scan element which 5243 * encloses the key or until we know that there are no further 5244 * elements. 5245 */ 5246 while (i < count) { 5247 if (scan->type != HAMMER2_BREF_TYPE_EMPTY) { 5248 scan_end = scan->key + 5249 ((hammer2_key_t)1 << scan->keybits) - 1; 5250 if (scan->key > key_beg || scan_end >= key_beg) 5251 break; 5252 } 5253 if (i >= limit) 5254 return (count); 5255 ++scan; 5256 ++i; 5257 } 5258 if (i != count) { 5259 parent->cache_index = i; 5260 if (i >= limit) { 5261 i = count; 5262 } else { 5263 scan_end = scan->key + 5264 ((hammer2_key_t)1 << scan->keybits); 5265 if (scan_end && (*key_nextp > scan_end || 5266 *key_nextp == 0)) { 5267 *key_nextp = scan_end; 5268 } 5269 } 5270 } 5271 return (i); 5272 } 5273 5274 /* 5275 * Do a combined search and return the next match either from the blockref 5276 * array or from the in-memory chain. Sets *bresp to the returned bref in 5277 * both cases, or sets it to NULL if the search exhausted. Only returns 5278 * a non-NULL chain if the search matched from the in-memory chain. 5279 * 5280 * When no in-memory chain has been found and a non-NULL bref is returned 5281 * in *bresp. 5282 * 5283 * 5284 * The returned chain is not locked or referenced. Use the returned bref 5285 * to determine if the search exhausted or not. Iterate if the base find 5286 * is chosen but matches a deleted chain. 5287 * 5288 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5289 * held through the operation. 5290 */ 5291 hammer2_chain_t * 5292 hammer2_combined_find(hammer2_chain_t *parent, 5293 hammer2_blockref_t *base, int count, 5294 hammer2_key_t *key_nextp, 5295 hammer2_key_t key_beg, hammer2_key_t key_end, 5296 hammer2_blockref_t **bresp) 5297 { 5298 hammer2_blockref_t *bref; 5299 hammer2_chain_t *chain; 5300 int i; 5301 5302 /* 5303 * Lookup in block array and in rbtree. 5304 */ 5305 *key_nextp = key_end + 1; 5306 i = hammer2_base_find(parent, base, count, key_nextp, 5307 key_beg, key_end); 5308 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 5309 5310 /* 5311 * Neither matched 5312 */ 5313 if (i == count && chain == NULL) { 5314 *bresp = NULL; 5315 return(NULL); 5316 } 5317 5318 /* 5319 * Only chain matched. 5320 */ 5321 if (i == count) { 5322 bref = &chain->bref; 5323 goto found; 5324 } 5325 5326 /* 5327 * Only blockref matched. 5328 */ 5329 if (chain == NULL) { 5330 bref = &base[i]; 5331 goto found; 5332 } 5333 5334 /* 5335 * Both in-memory and blockref matched, select the nearer element. 5336 * 5337 * If both are flush with the left-hand side or both are the 5338 * same distance away, select the chain. In this situation the 5339 * chain must have been loaded from the matching blockmap. 5340 */ 5341 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 5342 chain->bref.key == base[i].key) { 5343 KKASSERT(chain->bref.key == base[i].key); 5344 bref = &chain->bref; 5345 goto found; 5346 } 5347 5348 /* 5349 * Select the nearer key 5350 */ 5351 if (chain->bref.key < base[i].key) { 5352 bref = &chain->bref; 5353 } else { 5354 bref = &base[i]; 5355 chain = NULL; 5356 } 5357 5358 /* 5359 * If the bref is out of bounds we've exhausted our search. 5360 */ 5361 found: 5362 if (bref->key > key_end) { 5363 *bresp = NULL; 5364 chain = NULL; 5365 } else { 5366 *bresp = bref; 5367 } 5368 return(chain); 5369 } 5370 5371 /* 5372 * Locate the specified block array element and delete it. The element 5373 * must exist. 5374 * 5375 * The spin lock on the related chain must be held. 5376 * 5377 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5378 * need to be adjusted when we commit the media change. 5379 */ 5380 void 5381 hammer2_base_delete(hammer2_chain_t *parent, 5382 hammer2_blockref_t *base, int count, 5383 hammer2_chain_t *chain, 5384 hammer2_blockref_t *obref) 5385 { 5386 hammer2_blockref_t *elm = &chain->bref; 5387 hammer2_blockref_t *scan; 5388 hammer2_key_t key_next; 5389 int i; 5390 5391 /* 5392 * Delete element. Expect the element to exist. 5393 * 5394 * XXX see caller, flush code not yet sophisticated enough to prevent 5395 * re-flushed in some cases. 5396 */ 5397 key_next = 0; /* max range */ 5398 i = hammer2_base_find(parent, base, count, &key_next, 5399 elm->key, elm->key); 5400 scan = &base[i]; 5401 if (i == count || scan->type == HAMMER2_BREF_TYPE_EMPTY || 5402 scan->key != elm->key || 5403 ((chain->flags & HAMMER2_CHAIN_BMAPUPD) == 0 && 5404 scan->keybits != elm->keybits)) { 5405 hammer2_spin_unex(&parent->core.spin); 5406 panic("delete base %p element not found at %d/%d elm %p\n", 5407 base, i, count, elm); 5408 return; 5409 } 5410 5411 /* 5412 * Update stats and zero the entry. 5413 * 5414 * NOTE: Handle radix == 0 (0 bytes) case. 5415 */ 5416 if ((int)(scan->data_off & HAMMER2_OFF_MASK_RADIX)) { 5417 parent->bref.embed.stats.data_count -= (hammer2_off_t)1 << 5418 (int)(scan->data_off & HAMMER2_OFF_MASK_RADIX); 5419 } 5420 switch(scan->type) { 5421 case HAMMER2_BREF_TYPE_INODE: 5422 --parent->bref.embed.stats.inode_count; 5423 /* fall through */ 5424 case HAMMER2_BREF_TYPE_DATA: 5425 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5426 atomic_set_int(&chain->flags, 5427 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5428 } else { 5429 if (parent->bref.leaf_count) 5430 --parent->bref.leaf_count; 5431 } 5432 /* fall through */ 5433 case HAMMER2_BREF_TYPE_INDIRECT: 5434 if (scan->type != HAMMER2_BREF_TYPE_DATA) { 5435 parent->bref.embed.stats.data_count -= 5436 scan->embed.stats.data_count; 5437 parent->bref.embed.stats.inode_count -= 5438 scan->embed.stats.inode_count; 5439 } 5440 if (scan->type == HAMMER2_BREF_TYPE_INODE) 5441 break; 5442 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5443 atomic_set_int(&chain->flags, 5444 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5445 } else { 5446 if (parent->bref.leaf_count <= scan->leaf_count) 5447 parent->bref.leaf_count = 0; 5448 else 5449 parent->bref.leaf_count -= scan->leaf_count; 5450 } 5451 break; 5452 case HAMMER2_BREF_TYPE_DIRENT: 5453 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5454 atomic_set_int(&chain->flags, 5455 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5456 } else { 5457 if (parent->bref.leaf_count) 5458 --parent->bref.leaf_count; 5459 } 5460 default: 5461 break; 5462 } 5463 5464 if (obref) 5465 *obref = *scan; 5466 bzero(scan, sizeof(*scan)); 5467 5468 /* 5469 * We can only optimize parent->core.live_zero for live chains. 5470 */ 5471 if (parent->core.live_zero == i + 1) { 5472 while (--i >= 0 && base[i].type == HAMMER2_BREF_TYPE_EMPTY) 5473 ; 5474 parent->core.live_zero = i + 1; 5475 } 5476 5477 /* 5478 * Clear appropriate blockmap flags in chain. 5479 */ 5480 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED | 5481 HAMMER2_CHAIN_BMAPUPD); 5482 } 5483 5484 /* 5485 * Insert the specified element. The block array must not already have the 5486 * element and must have space available for the insertion. 5487 * 5488 * The spin lock on the related chain must be held. 5489 * 5490 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5491 * need to be adjusted when we commit the media change. 5492 */ 5493 void 5494 hammer2_base_insert(hammer2_chain_t *parent, 5495 hammer2_blockref_t *base, int count, 5496 hammer2_chain_t *chain, hammer2_blockref_t *elm) 5497 { 5498 hammer2_key_t key_next; 5499 hammer2_key_t xkey; 5500 int i; 5501 int j; 5502 int k; 5503 int l; 5504 int u = 1; 5505 5506 /* 5507 * Insert new element. Expect the element to not already exist 5508 * unless we are replacing it. 5509 * 5510 * XXX see caller, flush code not yet sophisticated enough to prevent 5511 * re-flushed in some cases. 5512 */ 5513 key_next = 0; /* max range */ 5514 i = hammer2_base_find(parent, base, count, &key_next, 5515 elm->key, elm->key); 5516 5517 /* 5518 * Shortcut fill optimization, typical ordered insertion(s) may not 5519 * require a search. 5520 */ 5521 KKASSERT(i >= 0 && i <= count); 5522 5523 /* 5524 * Set appropriate blockmap flags in chain (if not NULL) 5525 */ 5526 if (chain) 5527 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 5528 5529 /* 5530 * Update stats and zero the entry 5531 */ 5532 if ((int)(elm->data_off & HAMMER2_OFF_MASK_RADIX)) { 5533 parent->bref.embed.stats.data_count += (hammer2_off_t)1 << 5534 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 5535 } 5536 switch(elm->type) { 5537 case HAMMER2_BREF_TYPE_INODE: 5538 ++parent->bref.embed.stats.inode_count; 5539 /* fall through */ 5540 case HAMMER2_BREF_TYPE_DATA: 5541 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5542 ++parent->bref.leaf_count; 5543 /* fall through */ 5544 case HAMMER2_BREF_TYPE_INDIRECT: 5545 if (elm->type != HAMMER2_BREF_TYPE_DATA) { 5546 parent->bref.embed.stats.data_count += 5547 elm->embed.stats.data_count; 5548 parent->bref.embed.stats.inode_count += 5549 elm->embed.stats.inode_count; 5550 } 5551 if (elm->type == HAMMER2_BREF_TYPE_INODE) 5552 break; 5553 if (parent->bref.leaf_count + elm->leaf_count < 5554 HAMMER2_BLOCKREF_LEAF_MAX) { 5555 parent->bref.leaf_count += elm->leaf_count; 5556 } else { 5557 parent->bref.leaf_count = HAMMER2_BLOCKREF_LEAF_MAX; 5558 } 5559 break; 5560 case HAMMER2_BREF_TYPE_DIRENT: 5561 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5562 ++parent->bref.leaf_count; 5563 break; 5564 default: 5565 break; 5566 } 5567 5568 5569 /* 5570 * We can only optimize parent->core.live_zero for live chains. 5571 */ 5572 if (i == count && parent->core.live_zero < count) { 5573 i = parent->core.live_zero++; 5574 base[i] = *elm; 5575 return; 5576 } 5577 5578 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 5579 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 5580 hammer2_spin_unex(&parent->core.spin); 5581 panic("insert base %p overlapping elements at %d elm %p\n", 5582 base, i, elm); 5583 } 5584 5585 /* 5586 * Try to find an empty slot before or after. 5587 */ 5588 j = i; 5589 k = i; 5590 while (j > 0 || k < count) { 5591 --j; 5592 if (j >= 0 && base[j].type == HAMMER2_BREF_TYPE_EMPTY) { 5593 if (j == i - 1) { 5594 base[j] = *elm; 5595 } else { 5596 bcopy(&base[j+1], &base[j], 5597 (i - j - 1) * sizeof(*base)); 5598 base[i - 1] = *elm; 5599 } 5600 goto validate; 5601 } 5602 ++k; 5603 if (k < count && base[k].type == HAMMER2_BREF_TYPE_EMPTY) { 5604 bcopy(&base[i], &base[i+1], 5605 (k - i) * sizeof(hammer2_blockref_t)); 5606 base[i] = *elm; 5607 5608 /* 5609 * We can only update parent->core.live_zero for live 5610 * chains. 5611 */ 5612 if (parent->core.live_zero <= k) 5613 parent->core.live_zero = k + 1; 5614 u = 2; 5615 goto validate; 5616 } 5617 } 5618 panic("hammer2_base_insert: no room!"); 5619 5620 /* 5621 * Debugging 5622 */ 5623 validate: 5624 key_next = 0; 5625 for (l = 0; l < count; ++l) { 5626 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5627 key_next = base[l].key + 5628 ((hammer2_key_t)1 << base[l].keybits) - 1; 5629 break; 5630 } 5631 } 5632 while (++l < count) { 5633 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5634 if (base[l].key <= key_next) 5635 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 5636 key_next = base[l].key + 5637 ((hammer2_key_t)1 << base[l].keybits) - 1; 5638 5639 } 5640 } 5641 5642 } 5643 5644 #if 0 5645 5646 /* 5647 * Sort the blockref array for the chain. Used by the flush code to 5648 * sort the blockref[] array. 5649 * 5650 * The chain must be exclusively locked AND spin-locked. 5651 */ 5652 typedef hammer2_blockref_t *hammer2_blockref_p; 5653 5654 static 5655 int 5656 hammer2_base_sort_callback(const void *v1, const void *v2) 5657 { 5658 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 5659 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 5660 5661 /* 5662 * Make sure empty elements are placed at the end of the array 5663 */ 5664 if (bref1->type == HAMMER2_BREF_TYPE_EMPTY) { 5665 if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) 5666 return(0); 5667 return(1); 5668 } else if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) { 5669 return(-1); 5670 } 5671 5672 /* 5673 * Sort by key 5674 */ 5675 if (bref1->key < bref2->key) 5676 return(-1); 5677 if (bref1->key > bref2->key) 5678 return(1); 5679 return(0); 5680 } 5681 5682 void 5683 hammer2_base_sort(hammer2_chain_t *chain) 5684 { 5685 hammer2_blockref_t *base; 5686 int count; 5687 5688 switch(chain->bref.type) { 5689 case HAMMER2_BREF_TYPE_INODE: 5690 /* 5691 * Special shortcut for embedded data returns the inode 5692 * itself. Callers must detect this condition and access 5693 * the embedded data (the strategy code does this for us). 5694 * 5695 * This is only applicable to regular files and softlinks. 5696 */ 5697 if (chain->data->ipdata.meta.op_flags & 5698 HAMMER2_OPFLAG_DIRECTDATA) { 5699 return; 5700 } 5701 base = &chain->data->ipdata.u.blockset.blockref[0]; 5702 count = HAMMER2_SET_COUNT; 5703 break; 5704 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 5705 case HAMMER2_BREF_TYPE_INDIRECT: 5706 /* 5707 * Optimize indirect blocks in the INITIAL state to avoid 5708 * I/O. 5709 */ 5710 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 5711 base = &chain->data->npdata[0]; 5712 count = chain->bytes / sizeof(hammer2_blockref_t); 5713 break; 5714 case HAMMER2_BREF_TYPE_VOLUME: 5715 base = &chain->data->voldata.sroot_blockset.blockref[0]; 5716 count = HAMMER2_SET_COUNT; 5717 break; 5718 case HAMMER2_BREF_TYPE_FREEMAP: 5719 base = &chain->data->blkset.blockref[0]; 5720 count = HAMMER2_SET_COUNT; 5721 break; 5722 default: 5723 panic("hammer2_base_sort: unrecognized " 5724 "blockref(A) type: %d", 5725 chain->bref.type); 5726 base = NULL; /* safety */ 5727 count = 0; /* safety */ 5728 break; 5729 } 5730 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 5731 } 5732 5733 #endif 5734 5735 /* 5736 * Set the check data for a chain. This can be a heavy-weight operation 5737 * and typically only runs on-flush. For file data check data is calculated 5738 * when the logical buffers are flushed. 5739 */ 5740 void 5741 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 5742 { 5743 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 5744 5745 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5746 case HAMMER2_CHECK_NONE: 5747 break; 5748 case HAMMER2_CHECK_DISABLED: 5749 break; 5750 case HAMMER2_CHECK_ISCSI32: 5751 chain->bref.check.iscsi32.value = 5752 hammer2_icrc32(bdata, chain->bytes); 5753 break; 5754 case HAMMER2_CHECK_XXHASH64: 5755 chain->bref.check.xxhash64.value = 5756 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5757 break; 5758 case HAMMER2_CHECK_SHA192: 5759 { 5760 SHA256_CTX hash_ctx; 5761 union { 5762 uint8_t digest[SHA256_DIGEST_LENGTH]; 5763 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5764 } u; 5765 5766 SHA256_Init(&hash_ctx); 5767 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5768 SHA256_Final(u.digest, &hash_ctx); 5769 u.digest64[2] ^= u.digest64[3]; 5770 bcopy(u.digest, 5771 chain->bref.check.sha192.data, 5772 sizeof(chain->bref.check.sha192.data)); 5773 } 5774 break; 5775 case HAMMER2_CHECK_FREEMAP: 5776 chain->bref.check.freemap.icrc32 = 5777 hammer2_icrc32(bdata, chain->bytes); 5778 break; 5779 default: 5780 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5781 chain->bref.methods); 5782 break; 5783 } 5784 } 5785 5786 /* 5787 * Characterize a failed check code and try to trace back to the inode. 5788 */ 5789 static void 5790 hammer2_characterize_failed_chain(hammer2_chain_t *chain, uint64_t check, 5791 int bits) 5792 { 5793 hammer2_chain_t *lchain; 5794 hammer2_chain_t *ochain; 5795 int did; 5796 5797 did = krateprintf(&krate_h2chk, 5798 "chain %016jx.%02x (%s) meth=%02x CHECK FAIL " 5799 "(flags=%08x, bref/data ", 5800 chain->bref.data_off, 5801 chain->bref.type, 5802 hammer2_bref_type_str(chain->bref.type), 5803 chain->bref.methods, 5804 chain->flags); 5805 if (did == 0) 5806 return; 5807 5808 if (bits == 32) { 5809 kprintf("%08x/%08x)\n", 5810 chain->bref.check.iscsi32.value, 5811 (uint32_t)check); 5812 } else { 5813 kprintf("%016jx/%016jx)\n", 5814 chain->bref.check.xxhash64.value, 5815 check); 5816 } 5817 5818 /* 5819 * Run up the chains to try to find the governing inode so we 5820 * can report it. 5821 * 5822 * XXX This error reporting is not really MPSAFE 5823 */ 5824 ochain = chain; 5825 lchain = chain; 5826 while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE) { 5827 lchain = chain; 5828 chain = chain->parent; 5829 } 5830 5831 if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5832 ((chain->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) == 0 || 5833 (lchain->bref.key & HAMMER2_DIRHASH_VISIBLE))) { 5834 kprintf(" Resides at/in inode %ld\n", 5835 chain->bref.key); 5836 } else if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5837 kprintf(" Resides in inode index - CRITICAL!!!\n"); 5838 } else { 5839 kprintf(" Resides in root index - CRITICAL!!!\n"); 5840 } 5841 if (ochain->hmp) { 5842 const char *pfsname = "UNKNOWN"; 5843 int i; 5844 5845 if (ochain->pmp) { 5846 for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) { 5847 if (ochain->pmp->pfs_hmps[i] == ochain->hmp && 5848 ochain->pmp->pfs_names[i]) { 5849 pfsname = ochain->pmp->pfs_names[i]; 5850 break; 5851 } 5852 } 5853 } 5854 kprintf(" In pfs %s on device %s\n", 5855 pfsname, ochain->hmp->devrepname); 5856 } 5857 } 5858 5859 /* 5860 * Returns non-zero on success, 0 on failure. 5861 */ 5862 int 5863 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 5864 { 5865 uint32_t check32; 5866 uint64_t check64; 5867 int r; 5868 5869 if (chain->flags & HAMMER2_CHAIN_NOTTESTED) 5870 return 1; 5871 5872 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5873 case HAMMER2_CHECK_NONE: 5874 r = 1; 5875 break; 5876 case HAMMER2_CHECK_DISABLED: 5877 r = 1; 5878 break; 5879 case HAMMER2_CHECK_ISCSI32: 5880 check32 = hammer2_icrc32(bdata, chain->bytes); 5881 r = (chain->bref.check.iscsi32.value == check32); 5882 if (r == 0) { 5883 hammer2_characterize_failed_chain(chain, check32, 32); 5884 } 5885 hammer2_process_icrc32 += chain->bytes; 5886 break; 5887 case HAMMER2_CHECK_XXHASH64: 5888 check64 = XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5889 r = (chain->bref.check.xxhash64.value == check64); 5890 if (r == 0) { 5891 hammer2_characterize_failed_chain(chain, check64, 64); 5892 } 5893 hammer2_process_xxhash64 += chain->bytes; 5894 break; 5895 case HAMMER2_CHECK_SHA192: 5896 { 5897 SHA256_CTX hash_ctx; 5898 union { 5899 uint8_t digest[SHA256_DIGEST_LENGTH]; 5900 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5901 } u; 5902 5903 SHA256_Init(&hash_ctx); 5904 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5905 SHA256_Final(u.digest, &hash_ctx); 5906 u.digest64[2] ^= u.digest64[3]; 5907 if (bcmp(u.digest, 5908 chain->bref.check.sha192.data, 5909 sizeof(chain->bref.check.sha192.data)) == 0) { 5910 r = 1; 5911 } else { 5912 r = 0; 5913 krateprintf(&krate_h2chk, 5914 "chain %016jx.%02x meth=%02x " 5915 "CHECK FAIL\n", 5916 chain->bref.data_off, 5917 chain->bref.type, 5918 chain->bref.methods); 5919 } 5920 } 5921 break; 5922 case HAMMER2_CHECK_FREEMAP: 5923 r = (chain->bref.check.freemap.icrc32 == 5924 hammer2_icrc32(bdata, chain->bytes)); 5925 if (r == 0) { 5926 int did; 5927 5928 did = krateprintf(&krate_h2chk, 5929 "chain %016jx.%02x meth=%02x " 5930 "CHECK FAIL\n", 5931 chain->bref.data_off, 5932 chain->bref.type, 5933 chain->bref.methods); 5934 if (did) { 5935 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 5936 chain->bref.check.freemap.icrc32, 5937 hammer2_icrc32(bdata, chain->bytes), 5938 chain->bytes); 5939 if (chain->dio) { 5940 kprintf("dio %p buf %016jx,%d " 5941 "bdata %p/%p\n", 5942 chain->dio, 5943 chain->dio->bp->b_loffset, 5944 chain->dio->bp->b_bufsize, 5945 bdata, 5946 chain->dio->bp->b_data); 5947 } 5948 } 5949 } 5950 break; 5951 default: 5952 kprintf("hammer2_chain_testcheck: unknown check type %02x\n", 5953 chain->bref.methods); 5954 r = 1; 5955 break; 5956 } 5957 return r; 5958 } 5959 5960 /* 5961 * Acquire the chain and parent representing the specified inode for the 5962 * device at the specified cluster index. 5963 * 5964 * The flags passed in are LOOKUP flags, not RESOLVE flags. 5965 * 5966 * If we are unable to locate the inode, HAMMER2_ERROR_EIO is returned and 5967 * *chainp will be NULL. *parentp may still be set error or not, or NULL 5968 * if the parent itself could not be resolved. 5969 * 5970 * The caller may pass-in a locked *parentp and/or *chainp, or neither. 5971 * They will be unlocked and released by this function. The *parentp and 5972 * *chainp representing the located inode are returned locked. 5973 */ 5974 int 5975 hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum, 5976 int clindex, int flags, 5977 hammer2_chain_t **parentp, hammer2_chain_t **chainp) 5978 { 5979 hammer2_chain_t *parent; 5980 hammer2_chain_t *rchain; 5981 hammer2_key_t key_dummy; 5982 hammer2_inode_t *ip; 5983 int resolve_flags; 5984 int error; 5985 5986 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 5987 HAMMER2_RESOLVE_SHARED : 0; 5988 5989 /* 5990 * Caller expects us to replace these. 5991 */ 5992 if (*chainp) { 5993 hammer2_chain_unlock(*chainp); 5994 hammer2_chain_drop(*chainp); 5995 *chainp = NULL; 5996 } 5997 if (*parentp) { 5998 hammer2_chain_unlock(*parentp); 5999 hammer2_chain_drop(*parentp); 6000 *parentp = NULL; 6001 } 6002 6003 /* 6004 * Be very careful, this is a backend function and we CANNOT 6005 * lock any frontend inode structure we find. But we have to 6006 * look the inode up this way first in case it exists but is 6007 * detached from the radix tree. 6008 */ 6009 ip = hammer2_inode_lookup(pmp, inum); 6010 if (ip) { 6011 *chainp = hammer2_inode_chain_and_parent(ip, clindex, 6012 parentp, 6013 resolve_flags); 6014 hammer2_inode_drop(ip); 6015 if (*chainp) 6016 return 0; 6017 hammer2_chain_unlock(*chainp); 6018 hammer2_chain_drop(*chainp); 6019 *chainp = NULL; 6020 if (*parentp) { 6021 hammer2_chain_unlock(*parentp); 6022 hammer2_chain_drop(*parentp); 6023 *parentp = NULL; 6024 } 6025 } 6026 6027 /* 6028 * Inodes hang off of the iroot (bit 63 is clear, differentiating 6029 * inodes from root directory entries in the key lookup). 6030 */ 6031 parent = hammer2_inode_chain(pmp->iroot, clindex, resolve_flags); 6032 rchain = NULL; 6033 if (parent) { 6034 rchain = hammer2_chain_lookup(&parent, &key_dummy, 6035 inum, inum, 6036 &error, flags); 6037 } else { 6038 error = HAMMER2_ERROR_EIO; 6039 } 6040 *parentp = parent; 6041 *chainp = rchain; 6042 6043 return error; 6044 } 6045 6046 /* 6047 * Used by the bulkscan code to snapshot the synchronized storage for 6048 * a volume, allowing it to be scanned concurrently against normal 6049 * operation. 6050 */ 6051 hammer2_chain_t * 6052 hammer2_chain_bulksnap(hammer2_dev_t *hmp) 6053 { 6054 hammer2_chain_t *copy; 6055 6056 copy = hammer2_chain_alloc(hmp, hmp->spmp, &hmp->vchain.bref); 6057 copy->data = kmalloc(sizeof(copy->data->voldata), 6058 hmp->mmsg, M_WAITOK | M_ZERO); 6059 hammer2_voldata_lock(hmp); 6060 copy->data->voldata = hmp->volsync; 6061 hammer2_voldata_unlock(hmp); 6062 6063 return copy; 6064 } 6065 6066 void 6067 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 6068 { 6069 KKASSERT(copy->bref.type == HAMMER2_BREF_TYPE_VOLUME); 6070 KKASSERT(copy->data); 6071 kfree(copy->data, copy->hmp->mmsg); 6072 copy->data = NULL; 6073 atomic_add_long(&hammer2_chain_allocs, -1); 6074 hammer2_chain_drop(copy); 6075 } 6076 6077 /* 6078 * Returns non-zero if the chain (INODE or DIRENT) matches the 6079 * filename. 6080 */ 6081 int 6082 hammer2_chain_dirent_test(hammer2_chain_t *chain, const char *name, 6083 size_t name_len) 6084 { 6085 const hammer2_inode_data_t *ripdata; 6086 6087 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 6088 ripdata = &chain->data->ipdata; 6089 if (ripdata->meta.name_len == name_len && 6090 bcmp(ripdata->filename, name, name_len) == 0) { 6091 return 1; 6092 } 6093 } 6094 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT && 6095 chain->bref.embed.dirent.namlen == name_len) { 6096 if (name_len > sizeof(chain->bref.check.buf) && 6097 bcmp(chain->data->buf, name, name_len) == 0) { 6098 return 1; 6099 } 6100 if (name_len <= sizeof(chain->bref.check.buf) && 6101 bcmp(chain->bref.check.buf, name, name_len) == 0) { 6102 return 1; 6103 } 6104 } 6105 return 0; 6106 } 6107