1 /* 2 * Copyright (c) 2011-2020 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/buf.h> 61 62 #include <crypto/sha2/sha2.h> 63 64 #include "hammer2.h" 65 66 static hammer2_chain_t *hammer2_chain_create_indirect( 67 hammer2_chain_t *parent, 68 hammer2_key_t key, int keybits, 69 hammer2_tid_t mtid, int for_type, int *errorp); 70 static int hammer2_chain_delete_obref(hammer2_chain_t *parent, 71 hammer2_chain_t *chain, 72 hammer2_tid_t mtid, int flags, 73 hammer2_blockref_t *obref); 74 static hammer2_chain_t *hammer2_combined_find( 75 hammer2_chain_t *parent, 76 hammer2_blockref_t *base, int count, 77 hammer2_key_t *key_nextp, 78 hammer2_key_t key_beg, hammer2_key_t key_end, 79 hammer2_blockref_t **brefp); 80 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 81 int depth); 82 static void hammer2_chain_lru_flush(hammer2_pfs_t *pmp); 83 84 /* 85 * There are many degenerate situations where an extreme rate of console 86 * output can occur from warnings and errors. Make sure this output does 87 * not impede operations. 88 */ 89 static struct krate krate_h2chk = { .freq = 5 }; 90 static struct krate krate_h2me = { .freq = 1 }; 91 static struct krate krate_h2em = { .freq = 1 }; 92 93 /* 94 * Basic RBTree for chains (core.rbtree). 95 */ 96 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 97 98 int 99 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 100 { 101 hammer2_key_t c1_beg; 102 hammer2_key_t c1_end; 103 hammer2_key_t c2_beg; 104 hammer2_key_t c2_end; 105 106 /* 107 * Compare chains. Overlaps are not supposed to happen and catch 108 * any software issues early we count overlaps as a match. 109 */ 110 c1_beg = chain1->bref.key; 111 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 112 c2_beg = chain2->bref.key; 113 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 114 115 if (c1_end < c2_beg) /* fully to the left */ 116 return(-1); 117 if (c1_beg > c2_end) /* fully to the right */ 118 return(1); 119 return(0); /* overlap (must not cross edge boundary) */ 120 } 121 122 /* 123 * Assert that a chain has no media data associated with it. 124 */ 125 static __inline void 126 hammer2_chain_assert_no_data(hammer2_chain_t *chain) 127 { 128 KKASSERT(chain->dio == NULL); 129 if (chain->bref.type != HAMMER2_BREF_TYPE_VOLUME && 130 chain->bref.type != HAMMER2_BREF_TYPE_FREEMAP && 131 chain->data) { 132 panic("hammer2_chain_assert_no_data: chain %p still has data", 133 chain); 134 } 135 } 136 137 /* 138 * Make a chain visible to the flusher. The flusher operates using a top-down 139 * recursion based on the ONFLUSH flag. It locates MODIFIED and UPDATE chains, 140 * flushes them, and updates blocks back to the volume root. 141 * 142 * This routine sets the ONFLUSH flag upward from the triggering chain until 143 * it hits an inode root or the volume root. Inode chains serve as inflection 144 * points, requiring the flusher to bridge across trees. Inodes include 145 * regular inodes, PFS roots (pmp->iroot), and the media super root 146 * (spmp->iroot). 147 */ 148 void 149 hammer2_chain_setflush(hammer2_chain_t *chain) 150 { 151 hammer2_chain_t *parent; 152 153 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 154 hammer2_spin_sh(&chain->core.spin); 155 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 156 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 157 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 158 break; 159 if ((parent = chain->parent) == NULL) 160 break; 161 hammer2_spin_sh(&parent->core.spin); 162 hammer2_spin_unsh(&chain->core.spin); 163 chain = parent; 164 } 165 hammer2_spin_unsh(&chain->core.spin); 166 } 167 } 168 169 /* 170 * Allocate a new disconnected chain element representing the specified 171 * bref. chain->refs is set to 1 and the passed bref is copied to 172 * chain->bref. chain->bytes is derived from the bref. 173 * 174 * chain->pmp inherits pmp unless the chain is an inode (other than the 175 * super-root inode). 176 * 177 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 178 */ 179 hammer2_chain_t * 180 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 181 hammer2_blockref_t *bref) 182 { 183 hammer2_chain_t *chain; 184 u_int bytes; 185 186 /* 187 * Special case - radix of 0 indicates a chain that does not 188 * need a data reference (context is completely embedded in the 189 * bref). 190 */ 191 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 192 bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 193 else 194 bytes = 0; 195 196 switch(bref->type) { 197 case HAMMER2_BREF_TYPE_INODE: 198 case HAMMER2_BREF_TYPE_INDIRECT: 199 case HAMMER2_BREF_TYPE_DATA: 200 case HAMMER2_BREF_TYPE_DIRENT: 201 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 202 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 203 case HAMMER2_BREF_TYPE_FREEMAP: 204 case HAMMER2_BREF_TYPE_VOLUME: 205 chain = kmalloc_obj(sizeof(*chain), hmp->mchain, 206 M_WAITOK | M_ZERO); 207 atomic_add_long(&hammer2_chain_allocs, 1); 208 break; 209 case HAMMER2_BREF_TYPE_EMPTY: 210 default: 211 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 212 bref->type); 213 break; 214 } 215 216 /* 217 * Initialize the new chain structure. pmp must be set to NULL for 218 * chains belonging to the super-root topology of a device mount. 219 */ 220 if (pmp == hmp->spmp) 221 chain->pmp = NULL; 222 else 223 chain->pmp = pmp; 224 225 chain->hmp = hmp; 226 chain->bref = *bref; 227 chain->bytes = bytes; 228 chain->refs = 1; 229 chain->flags = HAMMER2_CHAIN_ALLOCATED; 230 lockinit(&chain->diolk, "chdio", 0, 0); 231 232 /* 233 * Set the PFS boundary flag if this chain represents a PFS root. 234 */ 235 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 236 atomic_set_int(&chain->flags, HAMMER2_CHAIN_PFSBOUNDARY); 237 hammer2_chain_core_init(chain); 238 239 return (chain); 240 } 241 242 /* 243 * Initialize a chain's core structure. This structure used to be allocated 244 * but is now embedded. 245 * 246 * The core is not locked. No additional refs on the chain are made. 247 * (trans) must not be NULL if (core) is not NULL. 248 */ 249 void 250 hammer2_chain_core_init(hammer2_chain_t *chain) 251 { 252 /* 253 * Fresh core under nchain (no multi-homing of ochain's 254 * sub-tree). 255 */ 256 RB_INIT(&chain->core.rbtree); /* live chains */ 257 hammer2_mtx_init(&chain->lock, "h2chain"); 258 } 259 260 /* 261 * Add a reference to a chain element, preventing its destruction. 262 * 263 * (can be called with spinlock held) 264 */ 265 void 266 hammer2_chain_ref(hammer2_chain_t *chain) 267 { 268 if (atomic_fetchadd_int(&chain->refs, 1) == 0) { 269 /* 270 * Just flag that the chain was used and should be recycled 271 * on the LRU if it encounters it later. 272 */ 273 if (chain->flags & HAMMER2_CHAIN_ONLRU) 274 atomic_set_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 275 276 #if 0 277 /* 278 * REMOVED - reduces contention, lru_list is more heuristical 279 * now. 280 * 281 * 0->non-zero transition must ensure that chain is removed 282 * from the LRU list. 283 * 284 * NOTE: Already holding lru_spin here so we cannot call 285 * hammer2_chain_ref() to get it off lru_list, do 286 * it manually. 287 */ 288 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 289 hammer2_pfs_t *pmp = chain->pmp; 290 hammer2_spin_ex(&pmp->lru_spin); 291 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 292 atomic_add_int(&pmp->lru_count, -1); 293 atomic_clear_int(&chain->flags, 294 HAMMER2_CHAIN_ONLRU); 295 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 296 } 297 hammer2_spin_unex(&pmp->lru_spin); 298 } 299 #endif 300 } 301 } 302 303 /* 304 * Ref a locked chain and force the data to be held across an unlock. 305 * Chain must be currently locked. The user of the chain who desires 306 * to release the hold must call hammer2_chain_lock_unhold() to relock 307 * and unhold the chain, then unlock normally, or may simply call 308 * hammer2_chain_drop_unhold() (which is safer against deadlocks). 309 */ 310 void 311 hammer2_chain_ref_hold(hammer2_chain_t *chain) 312 { 313 atomic_add_int(&chain->lockcnt, 1); 314 hammer2_chain_ref(chain); 315 } 316 317 /* 318 * Insert the chain in the core rbtree. 319 * 320 * Normal insertions are placed in the live rbtree. Insertion of a deleted 321 * chain is a special case used by the flush code that is placed on the 322 * unstaged deleted list to avoid confusing the live view. 323 */ 324 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 325 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 326 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 327 328 static 329 int 330 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 331 int flags, int generation) 332 { 333 hammer2_chain_t *xchain; 334 int error = 0; 335 336 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 337 hammer2_spin_ex(&parent->core.spin); 338 339 /* 340 * Interlocked by spinlock, check for race 341 */ 342 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 343 parent->core.generation != generation) { 344 error = HAMMER2_ERROR_EAGAIN; 345 goto failed; 346 } 347 348 /* 349 * Insert chain 350 */ 351 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 352 KASSERT(xchain == NULL, 353 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 354 chain, xchain, chain->bref.key)); 355 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 356 chain->parent = parent; 357 ++parent->core.chain_count; 358 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 359 360 /* 361 * We have to keep track of the effective live-view blockref count 362 * so the create code knows when to push an indirect block. 363 */ 364 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 365 atomic_add_int(&parent->core.live_count, 1); 366 failed: 367 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 368 hammer2_spin_unex(&parent->core.spin); 369 return error; 370 } 371 372 /* 373 * Drop the caller's reference to the chain. When the ref count drops to 374 * zero this function will try to disassociate the chain from its parent and 375 * deallocate it, then recursely drop the parent using the implied ref 376 * from the chain's chain->parent. 377 * 378 * Nobody should own chain's mutex on the 1->0 transition, unless this drop 379 * races an acquisition by another cpu. Therefore we can loop if we are 380 * unable to acquire the mutex, and refs is unlikely to be 1 unless we again 381 * race against another drop. 382 */ 383 void 384 hammer2_chain_drop(hammer2_chain_t *chain) 385 { 386 u_int refs; 387 388 KKASSERT(chain->refs > 0); 389 390 while (chain) { 391 refs = chain->refs; 392 cpu_ccfence(); 393 KKASSERT(refs > 0); 394 395 if (refs == 1) { 396 if (hammer2_mtx_ex_try(&chain->lock) == 0) 397 chain = hammer2_chain_lastdrop(chain, 0); 398 /* retry the same chain, or chain from lastdrop */ 399 } else { 400 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 401 break; 402 /* retry the same chain */ 403 } 404 cpu_pause(); 405 } 406 } 407 408 /* 409 * Unhold a held and probably not-locked chain, ensure that the data is 410 * dropped on the 1->0 transition of lockcnt by obtaining an exclusive 411 * lock and then simply unlocking the chain. 412 */ 413 void 414 hammer2_chain_unhold(hammer2_chain_t *chain) 415 { 416 u_int lockcnt; 417 int iter = 0; 418 419 for (;;) { 420 lockcnt = chain->lockcnt; 421 cpu_ccfence(); 422 if (lockcnt > 1) { 423 if (atomic_cmpset_int(&chain->lockcnt, 424 lockcnt, lockcnt - 1)) { 425 break; 426 } 427 } else if (hammer2_mtx_ex_try(&chain->lock) == 0) { 428 hammer2_chain_unlock(chain); 429 break; 430 } else { 431 /* 432 * This situation can easily occur on SMP due to 433 * the gap inbetween the 1->0 transition and the 434 * final unlock. We cannot safely block on the 435 * mutex because lockcnt might go above 1. 436 * 437 * XXX Sleep for one tick if it takes too long. 438 */ 439 if (++iter > 1000) { 440 if (iter > 1000 + hz) { 441 kprintf("hammer2: h2race1 %p\n", chain); 442 iter = 1000; 443 } 444 tsleep(&iter, 0, "h2race1", 1); 445 } 446 cpu_pause(); 447 } 448 } 449 } 450 451 void 452 hammer2_chain_drop_unhold(hammer2_chain_t *chain) 453 { 454 hammer2_chain_unhold(chain); 455 hammer2_chain_drop(chain); 456 } 457 458 void 459 hammer2_chain_rehold(hammer2_chain_t *chain) 460 { 461 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED); 462 atomic_add_int(&chain->lockcnt, 1); 463 hammer2_chain_unlock(chain); 464 } 465 466 /* 467 * Handles the (potential) last drop of chain->refs from 1->0. Called with 468 * the mutex exclusively locked, refs == 1, and lockcnt 0. SMP races are 469 * possible against refs and lockcnt. We must dispose of the mutex on chain. 470 * 471 * This function returns an unlocked chain for recursive drop or NULL. It 472 * can return the same chain if it determines it has raced another ref. 473 * 474 * -- 475 * 476 * When two chains need to be recursively dropped we use the chain we 477 * would otherwise free to placehold the additional chain. It's a bit 478 * convoluted but we can't just recurse without potentially blowing out 479 * the kernel stack. 480 * 481 * The chain cannot be freed if it has any children. 482 * The chain cannot be freed if flagged MODIFIED unless we can dispose of it. 483 * The chain cannot be freed if flagged UPDATE unless we can dispose of it. 484 * Any dedup registration can remain intact. 485 * 486 * The core spinlock is allowed to nest child-to-parent (not parent-to-child). 487 */ 488 static 489 hammer2_chain_t * 490 hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth) 491 { 492 hammer2_pfs_t *pmp; 493 hammer2_dev_t *hmp; 494 hammer2_chain_t *parent; 495 hammer2_chain_t *rdrop; 496 497 /* 498 * We need chain's spinlock to interlock the sub-tree test. 499 * We already have chain's mutex, protecting chain->parent. 500 * 501 * Remember that chain->refs can be in flux. 502 */ 503 hammer2_spin_ex(&chain->core.spin); 504 505 if (chain->parent != NULL) { 506 /* 507 * If the chain has a parent the UPDATE bit prevents scrapping 508 * as the chain is needed to properly flush the parent. Try 509 * to complete the 1->0 transition and return NULL. Retry 510 * (return chain) if we are unable to complete the 1->0 511 * transition, else return NULL (nothing more to do). 512 * 513 * If the chain has a parent the MODIFIED bit prevents 514 * scrapping. 515 * 516 * Chains with UPDATE/MODIFIED are *not* put on the LRU list! 517 */ 518 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 519 HAMMER2_CHAIN_MODIFIED)) { 520 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 521 hammer2_spin_unex(&chain->core.spin); 522 hammer2_chain_assert_no_data(chain); 523 hammer2_mtx_unlock(&chain->lock); 524 chain = NULL; 525 } else { 526 hammer2_spin_unex(&chain->core.spin); 527 hammer2_mtx_unlock(&chain->lock); 528 } 529 return (chain); 530 } 531 /* spinlock still held */ 532 } else if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME || 533 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP) { 534 /* 535 * Retain the static vchain and fchain. Clear bits that 536 * are not relevant. Do not clear the MODIFIED bit, 537 * and certainly do not put it on the delayed-flush queue. 538 */ 539 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 540 } else { 541 /* 542 * The chain has no parent and can be flagged for destruction. 543 * Since it has no parent, UPDATE can also be cleared. 544 */ 545 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 546 if (chain->flags & HAMMER2_CHAIN_UPDATE) 547 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 548 549 /* 550 * If the chain has children we must propagate the DESTROY 551 * flag downward and rip the disconnected topology apart. 552 * This is accomplished by calling hammer2_flush() on the 553 * chain. 554 * 555 * Any dedup is already handled by the underlying DIO, so 556 * we do not have to specifically flush it here. 557 */ 558 if (chain->core.chain_count) { 559 hammer2_spin_unex(&chain->core.spin); 560 hammer2_flush(chain, HAMMER2_FLUSH_TOP | 561 HAMMER2_FLUSH_ALL); 562 hammer2_mtx_unlock(&chain->lock); 563 564 return(chain); /* retry drop */ 565 } 566 567 /* 568 * Otherwise we can scrap the MODIFIED bit if it is set, 569 * and continue along the freeing path. 570 * 571 * Be sure to clean-out any dedup bits. Without a parent 572 * this chain will no longer be visible to the flush code. 573 * Easy check data_off to avoid the volume root. 574 */ 575 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 576 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 577 atomic_add_long(&hammer2_count_modified_chains, -1); 578 if (chain->pmp) 579 hammer2_pfs_memory_wakeup(chain->pmp, -1); 580 } 581 /* spinlock still held */ 582 } 583 584 /* spinlock still held */ 585 586 /* 587 * If any children exist we must leave the chain intact with refs == 0. 588 * They exist because chains are retained below us which have refs or 589 * may require flushing. 590 * 591 * Retry (return chain) if we fail to transition the refs to 0, else 592 * return NULL indication nothing more to do. 593 * 594 * Chains with children are NOT put on the LRU list. 595 */ 596 if (chain->core.chain_count) { 597 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 598 hammer2_spin_unex(&chain->core.spin); 599 hammer2_chain_assert_no_data(chain); 600 hammer2_mtx_unlock(&chain->lock); 601 chain = NULL; 602 } else { 603 hammer2_spin_unex(&chain->core.spin); 604 hammer2_mtx_unlock(&chain->lock); 605 } 606 return (chain); 607 } 608 /* spinlock still held */ 609 /* no chains left under us */ 610 611 /* 612 * chain->core has no children left so no accessors can get to our 613 * chain from there. Now we have to lock the parent core to interlock 614 * remaining possible accessors that might bump chain's refs before 615 * we can safely drop chain's refs with intent to free the chain. 616 */ 617 hmp = chain->hmp; 618 pmp = chain->pmp; /* can be NULL */ 619 rdrop = NULL; 620 621 parent = chain->parent; 622 623 /* 624 * WARNING! chain's spin lock is still held here, and other spinlocks 625 * will be acquired and released in the code below. We 626 * cannot be making fancy procedure calls! 627 */ 628 629 /* 630 * We can cache the chain if it is associated with a pmp 631 * and not flagged as being destroyed or requesting a full 632 * release. In this situation the chain is not removed 633 * from its parent, i.e. it can still be looked up. 634 * 635 * We intentionally do not cache DATA chains because these 636 * were likely used to load data into the logical buffer cache 637 * and will not be accessed again for some time. 638 */ 639 if ((chain->flags & 640 (HAMMER2_CHAIN_DESTROY | HAMMER2_CHAIN_RELEASE)) == 0 && 641 chain->pmp && 642 chain->bref.type != HAMMER2_BREF_TYPE_DATA) { 643 if (parent) 644 hammer2_spin_ex(&parent->core.spin); 645 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 646 /* 647 * 1->0 transition failed, retry. Do not drop 648 * the chain's data yet! 649 */ 650 if (parent) 651 hammer2_spin_unex(&parent->core.spin); 652 hammer2_spin_unex(&chain->core.spin); 653 hammer2_mtx_unlock(&chain->lock); 654 655 return(chain); 656 } 657 658 /* 659 * Success 660 */ 661 hammer2_chain_assert_no_data(chain); 662 663 /* 664 * Make sure we are on the LRU list, clean up excessive 665 * LRU entries. We can only really drop one but there might 666 * be other entries that we can remove from the lru_list 667 * without dropping. 668 * 669 * NOTE: HAMMER2_CHAIN_ONLRU may only be safely set when 670 * chain->core.spin AND pmp->lru_spin are held, but 671 * can be safely cleared only holding pmp->lru_spin. 672 */ 673 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 674 hammer2_spin_ex(&pmp->lru_spin); 675 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 676 atomic_set_int(&chain->flags, 677 HAMMER2_CHAIN_ONLRU); 678 TAILQ_INSERT_TAIL(&pmp->lru_list, 679 chain, lru_node); 680 atomic_add_int(&pmp->lru_count, 1); 681 } 682 if (pmp->lru_count < HAMMER2_LRU_LIMIT) 683 depth = 1; /* disable lru_list flush */ 684 hammer2_spin_unex(&pmp->lru_spin); 685 } else { 686 /* disable lru flush */ 687 depth = 1; 688 } 689 690 if (parent) { 691 hammer2_spin_unex(&parent->core.spin); 692 parent = NULL; /* safety */ 693 } 694 hammer2_spin_unex(&chain->core.spin); 695 hammer2_mtx_unlock(&chain->lock); 696 697 /* 698 * lru_list hysteresis (see above for depth overrides). 699 * Note that depth also prevents excessive lastdrop recursion. 700 */ 701 if (depth == 0) 702 hammer2_chain_lru_flush(pmp); 703 704 return NULL; 705 /* NOT REACHED */ 706 } 707 708 /* 709 * Make sure we are not on the LRU list. 710 */ 711 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 712 hammer2_spin_ex(&pmp->lru_spin); 713 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 714 atomic_add_int(&pmp->lru_count, -1); 715 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 716 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 717 } 718 hammer2_spin_unex(&pmp->lru_spin); 719 } 720 721 /* 722 * Spinlock the parent and try to drop the last ref on chain. 723 * On success determine if we should dispose of the chain 724 * (remove the chain from its parent, etc). 725 * 726 * (normal core locks are top-down recursive but we define 727 * core spinlocks as bottom-up recursive, so this is safe). 728 */ 729 if (parent) { 730 hammer2_spin_ex(&parent->core.spin); 731 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 732 /* 733 * 1->0 transition failed, retry. 734 */ 735 hammer2_spin_unex(&parent->core.spin); 736 hammer2_spin_unex(&chain->core.spin); 737 hammer2_mtx_unlock(&chain->lock); 738 739 return(chain); 740 } 741 742 /* 743 * 1->0 transition successful, parent spin held to prevent 744 * new lookups, chain spinlock held to protect parent field. 745 * Remove chain from the parent. 746 * 747 * If the chain is being removed from the parent's btree but 748 * is not blkmapped, we have to adjust live_count downward. If 749 * it is blkmapped then the blockref is retained in the parent 750 * as is its associated live_count. This case can occur when 751 * a chain added to the topology is unable to flush and is 752 * then later deleted. 753 */ 754 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 755 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) && 756 (chain->flags & HAMMER2_CHAIN_BLKMAPPED) == 0) { 757 atomic_add_int(&parent->core.live_count, -1); 758 } 759 RB_REMOVE(hammer2_chain_tree, 760 &parent->core.rbtree, chain); 761 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 762 --parent->core.chain_count; 763 chain->parent = NULL; 764 } 765 766 /* 767 * If our chain was the last chain in the parent's core the 768 * core is now empty and its parent might have to be 769 * re-dropped if it has 0 refs. 770 */ 771 if (parent->core.chain_count == 0) { 772 rdrop = parent; 773 atomic_add_int(&rdrop->refs, 1); 774 /* 775 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 776 rdrop = NULL; 777 */ 778 } 779 hammer2_spin_unex(&parent->core.spin); 780 parent = NULL; /* safety */ 781 /* FALL THROUGH */ 782 } else { 783 /* 784 * No-parent case. 785 */ 786 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 787 /* 788 * 1->0 transition failed, retry. 789 */ 790 hammer2_spin_unex(&parent->core.spin); 791 hammer2_spin_unex(&chain->core.spin); 792 hammer2_mtx_unlock(&chain->lock); 793 794 return(chain); 795 } 796 } 797 798 /* 799 * Successful 1->0 transition, no parent, no children... no way for 800 * anyone to ref this chain any more. We can clean-up and free it. 801 * 802 * We still have the core spinlock, and core's chain_count is 0. 803 * Any parent spinlock is gone. 804 */ 805 hammer2_spin_unex(&chain->core.spin); 806 hammer2_chain_assert_no_data(chain); 807 hammer2_mtx_unlock(&chain->lock); 808 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 809 chain->core.chain_count == 0); 810 811 /* 812 * All locks are gone, no pointers remain to the chain, finish 813 * freeing it. 814 */ 815 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 816 HAMMER2_CHAIN_MODIFIED)) == 0); 817 818 /* 819 * Once chain resources are gone we can use the now dead chain 820 * structure to placehold what might otherwise require a recursive 821 * drop, because we have potentially two things to drop and can only 822 * return one directly. 823 */ 824 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 825 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED); 826 chain->hmp = NULL; 827 kfree_obj(chain, hmp->mchain); 828 } 829 830 /* 831 * Possible chaining loop when parent re-drop needed. 832 */ 833 return(rdrop); 834 } 835 836 /* 837 * Heuristical flush of the LRU, try to reduce the number of entries 838 * on the LRU to (HAMMER2_LRU_LIMIT * 2 / 3). This procedure is called 839 * only when lru_count exceeds HAMMER2_LRU_LIMIT. 840 */ 841 static 842 void 843 hammer2_chain_lru_flush(hammer2_pfs_t *pmp) 844 { 845 hammer2_chain_t *chain; 846 847 again: 848 chain = NULL; 849 hammer2_spin_ex(&pmp->lru_spin); 850 while (pmp->lru_count > HAMMER2_LRU_LIMIT * 2 / 3) { 851 /* 852 * Pick a chain off the lru_list, just recycle it quickly 853 * if LRUHINT is set (the chain was ref'd but left on 854 * the lru_list, so cycle to the end). 855 */ 856 chain = TAILQ_FIRST(&pmp->lru_list); 857 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 858 859 if (chain->flags & HAMMER2_CHAIN_LRUHINT) { 860 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 861 TAILQ_INSERT_TAIL(&pmp->lru_list, chain, lru_node); 862 chain = NULL; 863 continue; 864 } 865 866 /* 867 * Ok, we are off the LRU. We must adjust refs before we 868 * can safely clear the ONLRU flag. 869 */ 870 atomic_add_int(&pmp->lru_count, -1); 871 if (atomic_cmpset_int(&chain->refs, 0, 1)) { 872 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 873 atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE); 874 break; 875 } 876 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 877 chain = NULL; 878 } 879 hammer2_spin_unex(&pmp->lru_spin); 880 if (chain == NULL) 881 return; 882 883 /* 884 * If we picked a chain off the lru list we may be able to lastdrop 885 * it. Use a depth of 1 to prevent excessive lastdrop recursion. 886 */ 887 while (chain) { 888 u_int refs; 889 890 refs = chain->refs; 891 cpu_ccfence(); 892 KKASSERT(refs > 0); 893 894 if (refs == 1) { 895 if (hammer2_mtx_ex_try(&chain->lock) == 0) 896 chain = hammer2_chain_lastdrop(chain, 1); 897 /* retry the same chain, or chain from lastdrop */ 898 } else { 899 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 900 break; 901 /* retry the same chain */ 902 } 903 cpu_pause(); 904 } 905 goto again; 906 } 907 908 /* 909 * On last lock release. 910 */ 911 static hammer2_io_t * 912 hammer2_chain_drop_data(hammer2_chain_t *chain) 913 { 914 hammer2_io_t *dio; 915 916 if ((dio = chain->dio) != NULL) { 917 chain->dio = NULL; 918 chain->data = NULL; 919 } else { 920 switch(chain->bref.type) { 921 case HAMMER2_BREF_TYPE_VOLUME: 922 case HAMMER2_BREF_TYPE_FREEMAP: 923 break; 924 default: 925 if (chain->data != NULL) { 926 hammer2_spin_unex(&chain->core.spin); 927 panic("chain data not null: " 928 "chain %p bref %016jx.%02x " 929 "refs %d parent %p dio %p data %p", 930 chain, chain->bref.data_off, 931 chain->bref.type, chain->refs, 932 chain->parent, 933 chain->dio, chain->data); 934 } 935 KKASSERT(chain->data == NULL); 936 break; 937 } 938 } 939 return dio; 940 } 941 942 /* 943 * Lock a referenced chain element, acquiring its data with I/O if necessary, 944 * and specify how you would like the data to be resolved. 945 * 946 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 947 * 948 * The lock is allowed to recurse, multiple locking ops will aggregate 949 * the requested resolve types. Once data is assigned it will not be 950 * removed until the last unlock. 951 * 952 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 953 * (typically used to avoid device/logical buffer 954 * aliasing for data) 955 * 956 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 957 * the INITIAL-create state (indirect blocks only). 958 * 959 * Do not resolve data elements for DATA chains. 960 * (typically used to avoid device/logical buffer 961 * aliasing for data) 962 * 963 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 964 * 965 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 966 * it will be locked exclusive. 967 * 968 * HAMMER2_RESOLVE_NONBLOCK- (flag) The chain is locked non-blocking. If 969 * the lock fails, EAGAIN is returned. 970 * 971 * NOTE: Embedded elements (volume header, inodes) are always resolved 972 * regardless. 973 * 974 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 975 * element will instantiate and zero its buffer, and flush it on 976 * release. 977 * 978 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 979 * so as not to instantiate a device buffer, which could alias against 980 * a logical file buffer. However, if ALWAYS is specified the 981 * device buffer will be instantiated anyway. 982 * 983 * NOTE: The return value is always 0 unless NONBLOCK is specified, in which 984 * case it can be either 0 or EAGAIN. 985 * 986 * WARNING! This function blocks on I/O if data needs to be fetched. This 987 * blocking can run concurrent with other compatible lock holders 988 * who do not need data returning. The lock is not upgraded to 989 * exclusive during a data fetch, a separate bit is used to 990 * interlock I/O. However, an exclusive lock holder can still count 991 * on being interlocked against an I/O fetch managed by a shared 992 * lock holder. 993 */ 994 int 995 hammer2_chain_lock(hammer2_chain_t *chain, int how) 996 { 997 KKASSERT(chain->refs > 0); 998 999 if (how & HAMMER2_RESOLVE_NONBLOCK) { 1000 /* 1001 * We still have to bump lockcnt before acquiring the lock, 1002 * even for non-blocking operation, because the unlock code 1003 * live-loops on lockcnt == 1 when dropping the last lock. 1004 * 1005 * If the non-blocking operation fails we have to use an 1006 * unhold sequence to undo the mess. 1007 * 1008 * NOTE: LOCKAGAIN must always succeed without blocking, 1009 * even if NONBLOCK is specified. 1010 */ 1011 atomic_add_int(&chain->lockcnt, 1); 1012 if (how & HAMMER2_RESOLVE_SHARED) { 1013 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1014 hammer2_mtx_sh_again(&chain->lock); 1015 } else { 1016 if (hammer2_mtx_sh_try(&chain->lock) != 0) { 1017 hammer2_chain_unhold(chain); 1018 return EAGAIN; 1019 } 1020 } 1021 } else { 1022 if (hammer2_mtx_ex_try(&chain->lock) != 0) { 1023 hammer2_chain_unhold(chain); 1024 return EAGAIN; 1025 } 1026 } 1027 } else { 1028 /* 1029 * Get the appropriate lock. If LOCKAGAIN is flagged with 1030 * SHARED the caller expects a shared lock to already be 1031 * present and we are giving it another ref. This case must 1032 * importantly not block if there is a pending exclusive lock 1033 * request. 1034 */ 1035 atomic_add_int(&chain->lockcnt, 1); 1036 if (how & HAMMER2_RESOLVE_SHARED) { 1037 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1038 hammer2_mtx_sh_again(&chain->lock); 1039 } else { 1040 hammer2_mtx_sh(&chain->lock); 1041 } 1042 } else { 1043 hammer2_mtx_ex(&chain->lock); 1044 } 1045 } 1046 1047 /* 1048 * If we already have a valid data pointer make sure the data is 1049 * synchronized to the current cpu, and then no further action is 1050 * necessary. 1051 */ 1052 if (chain->data) { 1053 if (chain->dio) 1054 hammer2_io_bkvasync(chain->dio); 1055 return 0; 1056 } 1057 1058 /* 1059 * Do we have to resolve the data? This is generally only 1060 * applicable to HAMMER2_BREF_TYPE_DATA which is special-cased. 1061 * Other BREF types expects the data to be there. 1062 */ 1063 switch(how & HAMMER2_RESOLVE_MASK) { 1064 case HAMMER2_RESOLVE_NEVER: 1065 return 0; 1066 case HAMMER2_RESOLVE_MAYBE: 1067 if (chain->flags & HAMMER2_CHAIN_INITIAL) 1068 return 0; 1069 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 1070 return 0; 1071 #if 0 1072 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 1073 return 0; 1074 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 1075 return 0; 1076 #endif 1077 /* fall through */ 1078 case HAMMER2_RESOLVE_ALWAYS: 1079 default: 1080 break; 1081 } 1082 1083 /* 1084 * Caller requires data 1085 */ 1086 hammer2_chain_load_data(chain); 1087 1088 return 0; 1089 } 1090 1091 /* 1092 * Lock the chain, retain the hold, and drop the data persistence count. 1093 * The data should remain valid because we never transitioned lockcnt 1094 * through 0. 1095 */ 1096 void 1097 hammer2_chain_lock_unhold(hammer2_chain_t *chain, int how) 1098 { 1099 hammer2_chain_lock(chain, how); 1100 atomic_add_int(&chain->lockcnt, -1); 1101 } 1102 1103 #if 0 1104 /* 1105 * Downgrade an exclusive chain lock to a shared chain lock. 1106 * 1107 * NOTE: There is no upgrade equivalent due to the ease of 1108 * deadlocks in that direction. 1109 */ 1110 void 1111 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 1112 { 1113 hammer2_mtx_downgrade(&chain->lock); 1114 } 1115 #endif 1116 1117 /* 1118 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 1119 * may be of any type. 1120 * 1121 * Once chain->data is set it cannot be disposed of until all locks are 1122 * released. 1123 * 1124 * Make sure the data is synchronized to the current cpu. 1125 */ 1126 void 1127 hammer2_chain_load_data(hammer2_chain_t *chain) 1128 { 1129 hammer2_blockref_t *bref; 1130 hammer2_dev_t *hmp; 1131 hammer2_io_t *dio; 1132 char *bdata; 1133 int error; 1134 1135 /* 1136 * Degenerate case, data already present, or chain has no media 1137 * reference to load. 1138 */ 1139 KKASSERT(chain->lock.mtx_lock & MTX_MASK); 1140 if (chain->data) { 1141 if (chain->dio) 1142 hammer2_io_bkvasync(chain->dio); 1143 return; 1144 } 1145 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 1146 return; 1147 1148 hmp = chain->hmp; 1149 KKASSERT(hmp != NULL); 1150 1151 /* 1152 * Gain the IOINPROG bit, interlocked block. 1153 */ 1154 for (;;) { 1155 u_int oflags; 1156 u_int nflags; 1157 1158 oflags = chain->flags; 1159 cpu_ccfence(); 1160 if (oflags & HAMMER2_CHAIN_IOINPROG) { 1161 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 1162 tsleep_interlock(&chain->flags, 0); 1163 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1164 tsleep(&chain->flags, PINTERLOCKED, 1165 "h2iocw", 0); 1166 } 1167 /* retry */ 1168 } else { 1169 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 1170 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1171 break; 1172 } 1173 /* retry */ 1174 } 1175 } 1176 1177 /* 1178 * We own CHAIN_IOINPROG 1179 * 1180 * Degenerate case if we raced another load. 1181 */ 1182 if (chain->data) { 1183 if (chain->dio) 1184 hammer2_io_bkvasync(chain->dio); 1185 goto done; 1186 } 1187 1188 /* 1189 * We must resolve to a device buffer, either by issuing I/O or 1190 * by creating a zero-fill element. We do not mark the buffer 1191 * dirty when creating a zero-fill element (the hammer2_chain_modify() 1192 * API must still be used to do that). 1193 * 1194 * The device buffer is variable-sized in powers of 2 down 1195 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 1196 * chunk always contains buffers of the same size. (XXX) 1197 * 1198 * The minimum physical IO size may be larger than the variable 1199 * block size. 1200 */ 1201 bref = &chain->bref; 1202 1203 /* 1204 * The getblk() optimization can only be used on newly created 1205 * elements if the physical block size matches the request. 1206 */ 1207 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1208 error = hammer2_io_new(hmp, bref->type, 1209 bref->data_off, chain->bytes, 1210 &chain->dio); 1211 } else { 1212 error = hammer2_io_bread(hmp, bref->type, 1213 bref->data_off, chain->bytes, 1214 &chain->dio); 1215 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 1216 } 1217 if (error) { 1218 chain->error = HAMMER2_ERROR_EIO; 1219 kprintf("hammer2_chain_load_data: I/O error %016jx: %d\n", 1220 (intmax_t)bref->data_off, error); 1221 hammer2_io_bqrelse(&chain->dio); 1222 goto done; 1223 } 1224 chain->error = 0; 1225 1226 /* 1227 * This isn't perfect and can be ignored on OSs which do not have 1228 * an indication as to whether a buffer is coming from cache or 1229 * if I/O was actually issued for the read. TESTEDGOOD will work 1230 * pretty well without the B_IOISSUED logic because chains are 1231 * cached, but in that situation (without B_IOISSUED) it will not 1232 * detect whether a re-read via I/O is corrupted verses the original 1233 * read. 1234 * 1235 * We can't re-run the CRC on every fresh lock. That would be 1236 * insanely expensive. 1237 * 1238 * If the underlying kernel buffer covers the entire chain we can 1239 * use the B_IOISSUED indication to determine if we have to re-run 1240 * the CRC on chain data for chains that managed to stay cached 1241 * across the kernel disposal of the original buffer. 1242 */ 1243 if ((dio = chain->dio) != NULL && dio->bp) { 1244 struct buf *bp = dio->bp; 1245 1246 if (dio->psize == chain->bytes && 1247 (bp->b_flags & B_IOISSUED)) { 1248 atomic_clear_int(&chain->flags, 1249 HAMMER2_CHAIN_TESTEDGOOD); 1250 bp->b_flags &= ~B_IOISSUED; 1251 } 1252 } 1253 1254 /* 1255 * NOTE: A locked chain's data cannot be modified without first 1256 * calling hammer2_chain_modify(). 1257 */ 1258 1259 /* 1260 * NOTE: hammer2_io_data() call issues bkvasync() 1261 */ 1262 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 1263 1264 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1265 /* 1266 * Clear INITIAL. In this case we used io_new() and the 1267 * buffer has been zero'd and marked dirty. 1268 * 1269 * CHAIN_MODIFIED has not been set yet, and we leave it 1270 * that way for now. Set a temporary CHAIN_NOTTESTED flag 1271 * to prevent hammer2_chain_testcheck() from trying to match 1272 * a check code that has not yet been generated. This bit 1273 * should NOT end up on the actual media. 1274 */ 1275 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1276 atomic_set_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 1277 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 1278 /* 1279 * check data not currently synchronized due to 1280 * modification. XXX assumes data stays in the buffer 1281 * cache, which might not be true (need biodep on flush 1282 * to calculate crc? or simple crc?). 1283 */ 1284 } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) { 1285 if (hammer2_chain_testcheck(chain, bdata) == 0) { 1286 chain->error = HAMMER2_ERROR_CHECK; 1287 } else { 1288 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 1289 } 1290 } 1291 1292 /* 1293 * Setup the data pointer, either pointing it to an embedded data 1294 * structure and copying the data from the buffer, or pointing it 1295 * into the buffer. 1296 * 1297 * The buffer is not retained when copying to an embedded data 1298 * structure in order to avoid potential deadlocks or recursions 1299 * on the same physical buffer. 1300 * 1301 * WARNING! Other threads can start using the data the instant we 1302 * set chain->data non-NULL. 1303 */ 1304 switch (bref->type) { 1305 case HAMMER2_BREF_TYPE_VOLUME: 1306 case HAMMER2_BREF_TYPE_FREEMAP: 1307 /* 1308 * Copy data from bp to embedded buffer 1309 */ 1310 panic("hammer2_chain_load_data: unresolved volume header"); 1311 break; 1312 case HAMMER2_BREF_TYPE_DIRENT: 1313 KKASSERT(chain->bytes != 0); 1314 /* fall through */ 1315 case HAMMER2_BREF_TYPE_INODE: 1316 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1317 case HAMMER2_BREF_TYPE_INDIRECT: 1318 case HAMMER2_BREF_TYPE_DATA: 1319 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1320 default: 1321 /* 1322 * Point data at the device buffer and leave dio intact. 1323 */ 1324 chain->data = (void *)bdata; 1325 break; 1326 } 1327 1328 /* 1329 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 1330 */ 1331 done: 1332 for (;;) { 1333 u_int oflags; 1334 u_int nflags; 1335 1336 oflags = chain->flags; 1337 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 1338 HAMMER2_CHAIN_IOSIGNAL); 1339 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 1340 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1341 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 1342 wakeup(&chain->flags); 1343 break; 1344 } 1345 } 1346 } 1347 1348 /* 1349 * Unlock and deref a chain element. 1350 * 1351 * Remember that the presence of children under chain prevent the chain's 1352 * destruction but do not add additional references, so the dio will still 1353 * be dropped. 1354 */ 1355 void 1356 hammer2_chain_unlock(hammer2_chain_t *chain) 1357 { 1358 hammer2_io_t *dio; 1359 u_int lockcnt; 1360 int iter = 0; 1361 1362 /* 1363 * If multiple locks are present (or being attempted) on this 1364 * particular chain we can just unlock, drop refs, and return. 1365 * 1366 * Otherwise fall-through on the 1->0 transition. 1367 */ 1368 for (;;) { 1369 lockcnt = chain->lockcnt; 1370 KKASSERT(lockcnt > 0); 1371 cpu_ccfence(); 1372 if (lockcnt > 1) { 1373 if (atomic_cmpset_int(&chain->lockcnt, 1374 lockcnt, lockcnt - 1)) { 1375 hammer2_mtx_unlock(&chain->lock); 1376 return; 1377 } 1378 } else if (hammer2_mtx_upgrade_try(&chain->lock) == 0) { 1379 /* while holding the mutex exclusively */ 1380 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1381 break; 1382 } else { 1383 /* 1384 * This situation can easily occur on SMP due to 1385 * the gap inbetween the 1->0 transition and the 1386 * final unlock. We cannot safely block on the 1387 * mutex because lockcnt might go above 1. 1388 * 1389 * XXX Sleep for one tick if it takes too long. 1390 */ 1391 if (++iter > 1000) { 1392 if (iter > 1000 + hz) { 1393 kprintf("hammer2: h2race2 %p\n", chain); 1394 iter = 1000; 1395 } 1396 tsleep(&iter, 0, "h2race2", 1); 1397 } 1398 cpu_pause(); 1399 } 1400 /* retry */ 1401 } 1402 1403 /* 1404 * Last unlock / mutex upgraded to exclusive. Drop the data 1405 * reference. 1406 */ 1407 dio = hammer2_chain_drop_data(chain); 1408 if (dio) 1409 hammer2_io_bqrelse(&dio); 1410 hammer2_mtx_unlock(&chain->lock); 1411 } 1412 1413 /* 1414 * Unlock and hold chain data intact 1415 */ 1416 void 1417 hammer2_chain_unlock_hold(hammer2_chain_t *chain) 1418 { 1419 atomic_add_int(&chain->lockcnt, 1); 1420 hammer2_chain_unlock(chain); 1421 } 1422 1423 /* 1424 * Helper to obtain the blockref[] array base and count for a chain. 1425 * 1426 * XXX Not widely used yet, various use cases need to be validated and 1427 * converted to use this function. 1428 */ 1429 static 1430 hammer2_blockref_t * 1431 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1432 { 1433 hammer2_blockref_t *base; 1434 int count; 1435 1436 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1437 base = NULL; 1438 1439 switch(parent->bref.type) { 1440 case HAMMER2_BREF_TYPE_INODE: 1441 count = HAMMER2_SET_COUNT; 1442 break; 1443 case HAMMER2_BREF_TYPE_INDIRECT: 1444 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1445 count = parent->bytes / sizeof(hammer2_blockref_t); 1446 break; 1447 case HAMMER2_BREF_TYPE_VOLUME: 1448 count = HAMMER2_SET_COUNT; 1449 break; 1450 case HAMMER2_BREF_TYPE_FREEMAP: 1451 count = HAMMER2_SET_COUNT; 1452 break; 1453 default: 1454 panic("hammer2_chain_base_and_count: " 1455 "unrecognized blockref type: %d", 1456 parent->bref.type); 1457 count = 0; 1458 break; 1459 } 1460 } else { 1461 switch(parent->bref.type) { 1462 case HAMMER2_BREF_TYPE_INODE: 1463 base = &parent->data->ipdata.u.blockset.blockref[0]; 1464 count = HAMMER2_SET_COUNT; 1465 break; 1466 case HAMMER2_BREF_TYPE_INDIRECT: 1467 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1468 base = &parent->data->npdata[0]; 1469 count = parent->bytes / sizeof(hammer2_blockref_t); 1470 break; 1471 case HAMMER2_BREF_TYPE_VOLUME: 1472 base = &parent->data->voldata. 1473 sroot_blockset.blockref[0]; 1474 count = HAMMER2_SET_COUNT; 1475 break; 1476 case HAMMER2_BREF_TYPE_FREEMAP: 1477 base = &parent->data->blkset.blockref[0]; 1478 count = HAMMER2_SET_COUNT; 1479 break; 1480 default: 1481 panic("hammer2_chain_base_and_count: " 1482 "unrecognized blockref type: %d", 1483 parent->bref.type); 1484 base = NULL; 1485 count = 0; 1486 break; 1487 } 1488 } 1489 *countp = count; 1490 1491 return base; 1492 } 1493 1494 /* 1495 * This counts the number of live blockrefs in a block array and 1496 * also calculates the point at which all remaining blockrefs are empty. 1497 * This routine can only be called on a live chain. 1498 * 1499 * Caller holds the chain locked, but possibly with a shared lock. We 1500 * must use an exclusive spinlock to prevent corruption. 1501 * 1502 * NOTE: Flag is not set until after the count is complete, allowing 1503 * callers to test the flag without holding the spinlock. 1504 * 1505 * NOTE: If base is NULL the related chain is still in the INITIAL 1506 * state and there are no blockrefs to count. 1507 * 1508 * NOTE: live_count may already have some counts accumulated due to 1509 * creation and deletion and could even be initially negative. 1510 */ 1511 void 1512 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1513 hammer2_blockref_t *base, int count) 1514 { 1515 hammer2_spin_ex(&chain->core.spin); 1516 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1517 if (base) { 1518 while (--count >= 0) { 1519 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1520 break; 1521 } 1522 chain->core.live_zero = count + 1; 1523 while (count >= 0) { 1524 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1525 atomic_add_int(&chain->core.live_count, 1526 1); 1527 --count; 1528 } 1529 } else { 1530 chain->core.live_zero = 0; 1531 } 1532 /* else do not modify live_count */ 1533 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1534 } 1535 hammer2_spin_unex(&chain->core.spin); 1536 } 1537 1538 /* 1539 * Resize the chain's physical storage allocation in-place. This function does 1540 * not usually adjust the data pointer and must be followed by (typically) a 1541 * hammer2_chain_modify() call to copy any old data over and adjust the 1542 * data pointer. 1543 * 1544 * Chains can be resized smaller without reallocating the storage. Resizing 1545 * larger will reallocate the storage. Excess or prior storage is reclaimed 1546 * asynchronously at a later time. 1547 * 1548 * An nradix value of 0 is special-cased to mean that the storage should 1549 * be disassociated, that is the chain is being resized to 0 bytes (not 1 1550 * byte). 1551 * 1552 * Must be passed an exclusively locked parent and chain. 1553 * 1554 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1555 * to avoid instantiating a device buffer that conflicts with the vnode data 1556 * buffer. However, because H2 can compress or encrypt data, the chain may 1557 * have a dio assigned to it in those situations, and they do not conflict. 1558 * 1559 * XXX return error if cannot resize. 1560 */ 1561 int 1562 hammer2_chain_resize(hammer2_chain_t *chain, 1563 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1564 int nradix, int flags) 1565 { 1566 hammer2_dev_t *hmp; 1567 size_t obytes; 1568 size_t nbytes; 1569 int error; 1570 1571 hmp = chain->hmp; 1572 1573 /* 1574 * Only data and indirect blocks can be resized for now. 1575 * (The volu root, inodes, and freemap elements use a fixed size). 1576 */ 1577 KKASSERT(chain != &hmp->vchain); 1578 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1579 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1580 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1581 1582 /* 1583 * Nothing to do if the element is already the proper size 1584 */ 1585 obytes = chain->bytes; 1586 nbytes = (nradix) ? (1U << nradix) : 0; 1587 if (obytes == nbytes) 1588 return (chain->error); 1589 1590 /* 1591 * Make sure the old data is instantiated so we can copy it. If this 1592 * is a data block, the device data may be superfluous since the data 1593 * might be in a logical block, but compressed or encrypted data is 1594 * another matter. 1595 * 1596 * NOTE: The modify will set BLKMAPUPD for us if BLKMAPPED is set. 1597 */ 1598 error = hammer2_chain_modify(chain, mtid, dedup_off, 0); 1599 if (error) 1600 return error; 1601 1602 /* 1603 * Reallocate the block, even if making it smaller (because different 1604 * block sizes may be in different regions). 1605 * 1606 * NOTE: Operation does not copy the data and may only be used 1607 * to resize data blocks in-place, or directory entry blocks 1608 * which are about to be modified in some manner. 1609 */ 1610 error = hammer2_freemap_alloc(chain, nbytes); 1611 if (error) 1612 return error; 1613 1614 chain->bytes = nbytes; 1615 1616 /* 1617 * We don't want the followup chain_modify() to try to copy data 1618 * from the old (wrong-sized) buffer. It won't know how much to 1619 * copy. This case should only occur during writes when the 1620 * originator already has the data to write in-hand. 1621 */ 1622 if (chain->dio) { 1623 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1624 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1625 hammer2_io_brelse(&chain->dio); 1626 chain->data = NULL; 1627 } 1628 return (chain->error); 1629 } 1630 1631 /* 1632 * Set the chain modified so its data can be changed by the caller, or 1633 * install deduplicated data. The caller must call this routine for each 1634 * set of modifications it makes, even if the chain is already flagged 1635 * MODIFIED. 1636 * 1637 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1638 * is a CLC (cluster level change) field and is not updated by parent 1639 * propagation during a flush. 1640 * 1641 * Returns an appropriate HAMMER2_ERROR_* code, which will generally reflect 1642 * chain->error except for HAMMER2_ERROR_ENOSPC. If the allocation fails 1643 * due to no space available, HAMMER2_ERROR_ENOSPC is returned and the chain 1644 * remains unmodified with its old data ref intact and chain->error 1645 * unchanged. 1646 * 1647 * Dedup Handling 1648 * 1649 * If the DEDUPABLE flag is set in the chain the storage must be reallocated 1650 * even if the chain is still flagged MODIFIED. In this case the chain's 1651 * DEDUPABLE flag will be cleared once the new storage has been assigned. 1652 * 1653 * If the caller passes a non-zero dedup_off we will use it to assign the 1654 * new storage. The MODIFIED flag will be *CLEARED* in this case, and 1655 * DEDUPABLE will be set (NOTE: the UPDATE flag is always set). The caller 1656 * must not modify the data content upon return. 1657 */ 1658 int 1659 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1660 hammer2_off_t dedup_off, int flags) 1661 { 1662 hammer2_blockref_t obref; 1663 hammer2_dev_t *hmp; 1664 hammer2_io_t *dio; 1665 int error; 1666 int wasinitial; 1667 int setmodified; 1668 int setupdate; 1669 int newmod; 1670 char *bdata; 1671 1672 hmp = chain->hmp; 1673 obref = chain->bref; 1674 KKASSERT(chain->lock.mtx_lock & MTX_EXCLUSIVE); 1675 1676 /* 1677 * Data is not optional for freemap chains (we must always be sure 1678 * to copy the data on COW storage allocations). 1679 */ 1680 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1681 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1682 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1683 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1684 } 1685 1686 /* 1687 * Data must be resolved if already assigned, unless explicitly 1688 * flagged otherwise. If we cannot safety load the data the 1689 * modification fails and we return early. 1690 */ 1691 if (chain->data == NULL && chain->bytes != 0 && 1692 (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1693 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1694 hammer2_chain_load_data(chain); 1695 if (chain->error) 1696 return (chain->error); 1697 } 1698 error = 0; 1699 1700 /* 1701 * Set MODIFIED to indicate that the chain has been modified. A new 1702 * allocation is required when modifying a chain. 1703 * 1704 * Set UPDATE to ensure that the blockref is updated in the parent. 1705 * 1706 * If MODIFIED is already set determine if we can reuse the assigned 1707 * data block or if we need a new data block. 1708 */ 1709 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1710 /* 1711 * Must set modified bit. 1712 */ 1713 atomic_add_long(&hammer2_count_modified_chains, 1); 1714 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1715 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1716 setmodified = 1; 1717 1718 /* 1719 * We may be able to avoid a copy-on-write if the chain's 1720 * check mode is set to NONE and the chain's current 1721 * modify_tid is beyond the last explicit snapshot tid. 1722 * 1723 * This implements HAMMER2's overwrite-in-place feature. 1724 * 1725 * NOTE! This data-block cannot be used as a de-duplication 1726 * source when the check mode is set to NONE. 1727 */ 1728 if ((chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1729 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) && 1730 (chain->flags & HAMMER2_CHAIN_INITIAL) == 0 && 1731 (chain->flags & HAMMER2_CHAIN_DEDUPABLE) == 0 && 1732 HAMMER2_DEC_CHECK(chain->bref.methods) == 1733 HAMMER2_CHECK_NONE && 1734 chain->pmp && 1735 chain->bref.modify_tid > 1736 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1737 /* 1738 * Sector overwrite allowed. 1739 */ 1740 newmod = 0; 1741 } else if ((hmp->hflags & HMNT2_EMERG) && 1742 chain->pmp && 1743 chain->bref.modify_tid > 1744 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1745 /* 1746 * If in emergency delete mode then do a modify-in- 1747 * place on any chain type belonging to the PFS as 1748 * long as it doesn't mess up a snapshot. We might 1749 * be forced to do this anyway a little further down 1750 * in the code if the allocation fails. 1751 * 1752 * Also note that in emergency mode, these modify-in- 1753 * place operations are NOT SAFE. A storage failure, 1754 * power failure, or panic can corrupt the filesystem. 1755 */ 1756 newmod = 0; 1757 } else { 1758 /* 1759 * Sector overwrite not allowed, must copy-on-write. 1760 */ 1761 newmod = 1; 1762 } 1763 } else if (chain->flags & HAMMER2_CHAIN_DEDUPABLE) { 1764 /* 1765 * If the modified chain was registered for dedup we need 1766 * a new allocation. This only happens for delayed-flush 1767 * chains (i.e. which run through the front-end buffer 1768 * cache). 1769 */ 1770 newmod = 1; 1771 setmodified = 0; 1772 } else { 1773 /* 1774 * Already flagged modified, no new allocation is needed. 1775 */ 1776 newmod = 0; 1777 setmodified = 0; 1778 } 1779 1780 /* 1781 * Flag parent update required. 1782 */ 1783 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 1784 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1785 setupdate = 1; 1786 } else { 1787 setupdate = 0; 1788 } 1789 1790 /* 1791 * The XOP code returns held but unlocked focus chains. This 1792 * prevents the chain from being destroyed but does not prevent 1793 * it from being modified. diolk is used to interlock modifications 1794 * against XOP frontend accesses to the focus. 1795 * 1796 * This allows us to theoretically avoid deadlocking the frontend 1797 * if one of the backends lock up by not formally locking the 1798 * focused chain in the frontend. In addition, the synchronization 1799 * code relies on this mechanism to avoid deadlocking concurrent 1800 * synchronization threads. 1801 */ 1802 lockmgr(&chain->diolk, LK_EXCLUSIVE); 1803 1804 /* 1805 * The modification or re-modification requires an allocation and 1806 * possible COW. If an error occurs, the previous content and data 1807 * reference is retained and the modification fails. 1808 * 1809 * If dedup_off is non-zero, the caller is requesting a deduplication 1810 * rather than a modification. The MODIFIED bit is not set and the 1811 * data offset is set to the deduplication offset. The data cannot 1812 * be modified. 1813 * 1814 * NOTE: The dedup offset is allowed to be in a partially free state 1815 * and we must be sure to reset it to a fully allocated state 1816 * to force two bulkfree passes to free it again. 1817 * 1818 * NOTE: Only applicable when chain->bytes != 0. 1819 * 1820 * XXX can a chain already be marked MODIFIED without a data 1821 * assignment? If not, assert here instead of testing the case. 1822 */ 1823 if (chain != &hmp->vchain && chain != &hmp->fchain && 1824 chain->bytes) { 1825 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1826 newmod 1827 ) { 1828 /* 1829 * NOTE: We do not have to remove the dedup 1830 * registration because the area is still 1831 * allocated and the underlying DIO will 1832 * still be flushed. 1833 */ 1834 if (dedup_off) { 1835 chain->bref.data_off = dedup_off; 1836 if ((int)(dedup_off & HAMMER2_OFF_MASK_RADIX)) 1837 chain->bytes = 1 << 1838 (int)(dedup_off & 1839 HAMMER2_OFF_MASK_RADIX); 1840 else 1841 chain->bytes = 0; 1842 chain->error = 0; 1843 atomic_clear_int(&chain->flags, 1844 HAMMER2_CHAIN_MODIFIED); 1845 atomic_add_long(&hammer2_count_modified_chains, 1846 -1); 1847 if (chain->pmp) { 1848 hammer2_pfs_memory_wakeup( 1849 chain->pmp, -1); 1850 } 1851 hammer2_freemap_adjust(hmp, &chain->bref, 1852 HAMMER2_FREEMAP_DORECOVER); 1853 atomic_set_int(&chain->flags, 1854 HAMMER2_CHAIN_DEDUPABLE); 1855 } else { 1856 error = hammer2_freemap_alloc(chain, 1857 chain->bytes); 1858 atomic_clear_int(&chain->flags, 1859 HAMMER2_CHAIN_DEDUPABLE); 1860 1861 /* 1862 * If we are unable to allocate a new block 1863 * but we are in emergency mode, issue a 1864 * warning to the console and reuse the same 1865 * block. 1866 * 1867 * We behave as if the allocation were 1868 * successful. 1869 * 1870 * THIS IS IMPORTANT: These modifications 1871 * are virtually guaranteed to corrupt any 1872 * snapshots related to this filesystem. 1873 */ 1874 if (error && (hmp->hflags & HMNT2_EMERG)) { 1875 error = 0; 1876 chain->bref.flags |= 1877 HAMMER2_BREF_FLAG_EMERG_MIP; 1878 1879 krateprintf(&krate_h2em, 1880 "hammer2: Emergency Mode WARNING: " 1881 "Operation will likely corrupt " 1882 "related snapshot: " 1883 "%016jx.%02x key=%016jx\n", 1884 chain->bref.data_off, 1885 chain->bref.type, 1886 chain->bref.key); 1887 } else if (error == 0) { 1888 chain->bref.flags &= 1889 ~HAMMER2_BREF_FLAG_EMERG_MIP; 1890 } 1891 } 1892 } 1893 } 1894 1895 /* 1896 * Stop here if error. We have to undo any flag bits we might 1897 * have set above. 1898 */ 1899 if (error) { 1900 if (setmodified) { 1901 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1902 atomic_add_long(&hammer2_count_modified_chains, -1); 1903 if (chain->pmp) 1904 hammer2_pfs_memory_wakeup(chain->pmp, -1); 1905 } 1906 if (setupdate) { 1907 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1908 } 1909 lockmgr(&chain->diolk, LK_RELEASE); 1910 1911 return error; 1912 } 1913 1914 /* 1915 * Update mirror_tid and modify_tid. modify_tid is only updated 1916 * if not passed as zero (during flushes, parent propagation passes 1917 * the value 0). 1918 * 1919 * NOTE: chain->pmp could be the device spmp. 1920 */ 1921 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1922 if (mtid) 1923 chain->bref.modify_tid = mtid; 1924 1925 /* 1926 * Set BLKMAPUPD to tell the flush code that an existing blockmap entry 1927 * requires updating as well as to tell the delete code that the 1928 * chain's blockref might not exactly match (in terms of physical size 1929 * or block offset) the one in the parent's blocktable. The base key 1930 * of course will still match. 1931 */ 1932 if (chain->flags & HAMMER2_CHAIN_BLKMAPPED) 1933 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPUPD); 1934 1935 /* 1936 * Short-cut data block handling when the caller does not need an 1937 * actual data reference to (aka OPTDATA), as long as the chain does 1938 * not already have a data pointer to the data and no de-duplication 1939 * occurred. 1940 * 1941 * This generally means that the modifications are being done via the 1942 * logical buffer cache. 1943 * 1944 * NOTE: If deduplication occurred we have to run through the data 1945 * stuff to clear INITIAL, and the caller will likely want to 1946 * assign the check code anyway. Leaving INITIAL set on a 1947 * dedup can be deadly (it can cause the block to be zero'd!). 1948 * 1949 * This code also handles bytes == 0 (most dirents). 1950 */ 1951 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1952 (flags & HAMMER2_MODIFY_OPTDATA) && 1953 chain->data == NULL) { 1954 if (dedup_off == 0) { 1955 KKASSERT(chain->dio == NULL); 1956 goto skip2; 1957 } 1958 } 1959 1960 /* 1961 * Clearing the INITIAL flag (for indirect blocks) indicates that 1962 * we've processed the uninitialized storage allocation. 1963 * 1964 * If this flag is already clear we are likely in a copy-on-write 1965 * situation but we have to be sure NOT to bzero the storage if 1966 * no data is present. 1967 * 1968 * Clearing of NOTTESTED is allowed if the MODIFIED bit is set, 1969 */ 1970 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1971 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1972 wasinitial = 1; 1973 } else { 1974 wasinitial = 0; 1975 } 1976 1977 /* 1978 * Instantiate data buffer and possibly execute COW operation 1979 */ 1980 switch(chain->bref.type) { 1981 case HAMMER2_BREF_TYPE_VOLUME: 1982 case HAMMER2_BREF_TYPE_FREEMAP: 1983 /* 1984 * The data is embedded, no copy-on-write operation is 1985 * needed. 1986 */ 1987 KKASSERT(chain->dio == NULL); 1988 break; 1989 case HAMMER2_BREF_TYPE_DIRENT: 1990 /* 1991 * The data might be fully embedded. 1992 */ 1993 if (chain->bytes == 0) { 1994 KKASSERT(chain->dio == NULL); 1995 break; 1996 } 1997 /* fall through */ 1998 case HAMMER2_BREF_TYPE_INODE: 1999 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2000 case HAMMER2_BREF_TYPE_DATA: 2001 case HAMMER2_BREF_TYPE_INDIRECT: 2002 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2003 /* 2004 * Perform the copy-on-write operation 2005 * 2006 * zero-fill or copy-on-write depending on whether 2007 * chain->data exists or not and set the dirty state for 2008 * the new buffer. hammer2_io_new() will handle the 2009 * zero-fill. 2010 * 2011 * If a dedup_off was supplied this is an existing block 2012 * and no COW, copy, or further modification is required. 2013 */ 2014 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 2015 2016 if (wasinitial && dedup_off == 0) { 2017 error = hammer2_io_new(hmp, chain->bref.type, 2018 chain->bref.data_off, 2019 chain->bytes, &dio); 2020 } else { 2021 error = hammer2_io_bread(hmp, chain->bref.type, 2022 chain->bref.data_off, 2023 chain->bytes, &dio); 2024 } 2025 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 2026 2027 /* 2028 * If an I/O error occurs make sure callers cannot accidently 2029 * modify the old buffer's contents and corrupt the filesystem. 2030 * 2031 * NOTE: hammer2_io_data() call issues bkvasync() 2032 */ 2033 if (error) { 2034 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 2035 hmp); 2036 chain->error = HAMMER2_ERROR_EIO; 2037 hammer2_io_brelse(&dio); 2038 hammer2_io_brelse(&chain->dio); 2039 chain->data = NULL; 2040 break; 2041 } 2042 chain->error = 0; 2043 bdata = hammer2_io_data(dio, chain->bref.data_off); 2044 2045 if (chain->data) { 2046 /* 2047 * COW (unless a dedup). 2048 */ 2049 KKASSERT(chain->dio != NULL); 2050 if (chain->data != (void *)bdata && dedup_off == 0) { 2051 bcopy(chain->data, bdata, chain->bytes); 2052 } 2053 } else if (wasinitial == 0 && dedup_off == 0) { 2054 /* 2055 * We have a problem. We were asked to COW but 2056 * we don't have any data to COW with! 2057 */ 2058 panic("hammer2_chain_modify: having a COW %p\n", 2059 chain); 2060 } 2061 2062 /* 2063 * Retire the old buffer, replace with the new. Dirty or 2064 * redirty the new buffer. 2065 * 2066 * WARNING! The system buffer cache may have already flushed 2067 * the buffer, so we must be sure to [re]dirty it 2068 * for further modification. 2069 * 2070 * If dedup_off was supplied, the caller is not 2071 * expected to make any further modification to the 2072 * buffer. 2073 * 2074 * WARNING! hammer2_get_gdata() assumes dio never transitions 2075 * through NULL in order to optimize away unnecessary 2076 * diolk operations. 2077 */ 2078 { 2079 hammer2_io_t *tio; 2080 2081 if ((tio = chain->dio) != NULL) 2082 hammer2_io_bqrelse(&tio); 2083 chain->data = (void *)bdata; 2084 chain->dio = dio; 2085 if (dedup_off == 0) 2086 hammer2_io_setdirty(dio); 2087 } 2088 break; 2089 default: 2090 panic("hammer2_chain_modify: illegal non-embedded type %d", 2091 chain->bref.type); 2092 break; 2093 2094 } 2095 skip2: 2096 /* 2097 * setflush on parent indicating that the parent must recurse down 2098 * to us. Do not call on chain itself which might already have it 2099 * set. 2100 */ 2101 if (chain->parent) 2102 hammer2_chain_setflush(chain->parent); 2103 lockmgr(&chain->diolk, LK_RELEASE); 2104 2105 return (chain->error); 2106 } 2107 2108 /* 2109 * Modify the chain associated with an inode. 2110 */ 2111 int 2112 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 2113 hammer2_tid_t mtid, int flags) 2114 { 2115 int error; 2116 2117 hammer2_inode_modify(ip); 2118 error = hammer2_chain_modify(chain, mtid, 0, flags); 2119 2120 return error; 2121 } 2122 2123 /* 2124 * This function returns the chain at the nearest key within the specified 2125 * range. The returned chain will be referenced but not locked. 2126 * 2127 * This function will recurse through chain->rbtree as necessary and will 2128 * return a *key_nextp suitable for iteration. *key_nextp is only set if 2129 * the iteration value is less than the current value of *key_nextp. 2130 * 2131 * The caller should use (*key_nextp) to calculate the actual range of 2132 * the returned element, which will be (key_beg to *key_nextp - 1), because 2133 * there might be another element which is superior to the returned element 2134 * and overlaps it. 2135 * 2136 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 2137 * chains continue to be returned. On EOF (*key_nextp) may overflow since 2138 * it will wind up being (key_end + 1). 2139 * 2140 * WARNING! Must be called with child's spinlock held. Spinlock remains 2141 * held through the operation. 2142 */ 2143 struct hammer2_chain_find_info { 2144 hammer2_chain_t *best; 2145 hammer2_key_t key_beg; 2146 hammer2_key_t key_end; 2147 hammer2_key_t key_next; 2148 }; 2149 2150 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 2151 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 2152 2153 static 2154 hammer2_chain_t * 2155 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 2156 hammer2_key_t key_beg, hammer2_key_t key_end) 2157 { 2158 struct hammer2_chain_find_info info; 2159 2160 info.best = NULL; 2161 info.key_beg = key_beg; 2162 info.key_end = key_end; 2163 info.key_next = *key_nextp; 2164 2165 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 2166 hammer2_chain_find_cmp, hammer2_chain_find_callback, 2167 &info); 2168 *key_nextp = info.key_next; 2169 #if 0 2170 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 2171 parent, key_beg, key_end, *key_nextp); 2172 #endif 2173 2174 return (info.best); 2175 } 2176 2177 static 2178 int 2179 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 2180 { 2181 struct hammer2_chain_find_info *info = data; 2182 hammer2_key_t child_beg; 2183 hammer2_key_t child_end; 2184 2185 child_beg = child->bref.key; 2186 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 2187 2188 if (child_end < info->key_beg) 2189 return(-1); 2190 if (child_beg > info->key_end) 2191 return(1); 2192 return(0); 2193 } 2194 2195 static 2196 int 2197 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 2198 { 2199 struct hammer2_chain_find_info *info = data; 2200 hammer2_chain_t *best; 2201 hammer2_key_t child_end; 2202 2203 /* 2204 * WARNING! Layerq is scanned forwards, exact matches should keep 2205 * the existing info->best. 2206 */ 2207 if ((best = info->best) == NULL) { 2208 /* 2209 * No previous best. Assign best 2210 */ 2211 info->best = child; 2212 } else if (best->bref.key <= info->key_beg && 2213 child->bref.key <= info->key_beg) { 2214 /* 2215 * Illegal overlap. 2216 */ 2217 KKASSERT(0); 2218 /*info->best = child;*/ 2219 } else if (child->bref.key < best->bref.key) { 2220 /* 2221 * Child has a nearer key and best is not flush with key_beg. 2222 * Set best to child. Truncate key_next to the old best key. 2223 */ 2224 info->best = child; 2225 if (info->key_next > best->bref.key || info->key_next == 0) 2226 info->key_next = best->bref.key; 2227 } else if (child->bref.key == best->bref.key) { 2228 /* 2229 * If our current best is flush with the child then this 2230 * is an illegal overlap. 2231 * 2232 * key_next will automatically be limited to the smaller of 2233 * the two end-points. 2234 */ 2235 KKASSERT(0); 2236 info->best = child; 2237 } else { 2238 /* 2239 * Keep the current best but truncate key_next to the child's 2240 * base. 2241 * 2242 * key_next will also automatically be limited to the smaller 2243 * of the two end-points (probably not necessary for this case 2244 * but we do it anyway). 2245 */ 2246 if (info->key_next > child->bref.key || info->key_next == 0) 2247 info->key_next = child->bref.key; 2248 } 2249 2250 /* 2251 * Always truncate key_next based on child's end-of-range. 2252 */ 2253 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 2254 if (child_end && (info->key_next > child_end || info->key_next == 0)) 2255 info->key_next = child_end; 2256 2257 return(0); 2258 } 2259 2260 /* 2261 * Retrieve the specified chain from a media blockref, creating the 2262 * in-memory chain structure which reflects it. The returned chain is 2263 * held and locked according to (how) (HAMMER2_RESOLVE_*). The caller must 2264 * handle crc-checks and so forth, and should check chain->error before 2265 * assuming that the data is good. 2266 * 2267 * To handle insertion races pass the INSERT_RACE flag along with the 2268 * generation number of the core. NULL will be returned if the generation 2269 * number changes before we have a chance to insert the chain. Insert 2270 * races can occur because the parent might be held shared. 2271 * 2272 * Caller must hold the parent locked shared or exclusive since we may 2273 * need the parent's bref array to find our block. 2274 * 2275 * WARNING! chain->pmp is always set to NULL for any chain representing 2276 * part of the super-root topology. 2277 */ 2278 hammer2_chain_t * 2279 hammer2_chain_get(hammer2_chain_t *parent, int generation, 2280 hammer2_blockref_t *bref, int how) 2281 { 2282 hammer2_dev_t *hmp = parent->hmp; 2283 hammer2_chain_t *chain; 2284 int error; 2285 2286 /* 2287 * Allocate a chain structure representing the existing media 2288 * entry. Resulting chain has one ref and is not locked. 2289 */ 2290 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 2291 chain = hammer2_chain_alloc(hmp, NULL, bref); 2292 else 2293 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 2294 /* ref'd chain returned */ 2295 2296 /* 2297 * Flag that the chain is in the parent's blockmap so delete/flush 2298 * knows what to do with it. 2299 */ 2300 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED); 2301 2302 /* 2303 * chain must be locked to avoid unexpected ripouts 2304 */ 2305 hammer2_chain_lock(chain, how); 2306 2307 /* 2308 * Link the chain into its parent. A spinlock is required to safely 2309 * access the RBTREE, and it is possible to collide with another 2310 * hammer2_chain_get() operation because the caller might only hold 2311 * a shared lock on the parent. 2312 * 2313 * NOTE: Get races can occur quite often when we distribute 2314 * asynchronous read-aheads across multiple threads. 2315 */ 2316 KKASSERT(parent->refs > 0); 2317 error = hammer2_chain_insert(parent, chain, 2318 HAMMER2_CHAIN_INSERT_SPIN | 2319 HAMMER2_CHAIN_INSERT_RACE, 2320 generation); 2321 if (error) { 2322 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 2323 /*kprintf("chain %p get race\n", chain);*/ 2324 hammer2_chain_unlock(chain); 2325 hammer2_chain_drop(chain); 2326 chain = NULL; 2327 } else { 2328 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2329 } 2330 2331 /* 2332 * Return our new chain referenced but not locked, or NULL if 2333 * a race occurred. 2334 */ 2335 return (chain); 2336 } 2337 2338 /* 2339 * Lookup initialization/completion API 2340 */ 2341 hammer2_chain_t * 2342 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 2343 { 2344 hammer2_chain_ref(parent); 2345 if (flags & HAMMER2_LOOKUP_SHARED) { 2346 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 2347 HAMMER2_RESOLVE_SHARED); 2348 } else { 2349 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2350 } 2351 return (parent); 2352 } 2353 2354 void 2355 hammer2_chain_lookup_done(hammer2_chain_t *parent) 2356 { 2357 if (parent) { 2358 hammer2_chain_unlock(parent); 2359 hammer2_chain_drop(parent); 2360 } 2361 } 2362 2363 /* 2364 * Take the locked chain and return a locked parent. The chain remains 2365 * locked on return, but may have to be temporarily unlocked to acquire 2366 * the parent. Because of this, (chain) must be stable and cannot be 2367 * deleted while it was temporarily unlocked (typically means that (chain) 2368 * is an inode). 2369 * 2370 * Pass HAMMER2_RESOLVE_* flags in flags. 2371 * 2372 * This will work even if the chain is errored, and the caller can check 2373 * parent->error on return if desired since the parent will be locked. 2374 * 2375 * This function handles the lock order reversal. 2376 */ 2377 hammer2_chain_t * 2378 hammer2_chain_getparent(hammer2_chain_t *chain, int flags) 2379 { 2380 hammer2_chain_t *parent; 2381 2382 /* 2383 * Be careful of order, chain must be unlocked before parent 2384 * is locked below to avoid a deadlock. Try it trivially first. 2385 */ 2386 parent = chain->parent; 2387 if (parent == NULL) 2388 panic("hammer2_chain_getparent: no parent"); 2389 hammer2_chain_ref(parent); 2390 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) 2391 return parent; 2392 2393 for (;;) { 2394 hammer2_chain_unlock(chain); 2395 hammer2_chain_lock(parent, flags); 2396 hammer2_chain_lock(chain, flags); 2397 2398 /* 2399 * Parent relinking races are quite common. We have to get 2400 * it right or we will blow up the block table. 2401 */ 2402 if (chain->parent == parent) 2403 break; 2404 hammer2_chain_unlock(parent); 2405 hammer2_chain_drop(parent); 2406 cpu_ccfence(); 2407 parent = chain->parent; 2408 if (parent == NULL) 2409 panic("hammer2_chain_getparent: no parent"); 2410 hammer2_chain_ref(parent); 2411 } 2412 return parent; 2413 } 2414 2415 /* 2416 * Take the locked chain and return a locked parent. The chain is unlocked 2417 * and dropped. *chainp is set to the returned parent as a convenience. 2418 * Pass HAMMER2_RESOLVE_* flags in flags. 2419 * 2420 * This will work even if the chain is errored, and the caller can check 2421 * parent->error on return if desired since the parent will be locked. 2422 * 2423 * The chain does NOT need to be stable. We use a tracking structure 2424 * to track the expected parent if the chain is deleted out from under us. 2425 * 2426 * This function handles the lock order reversal. 2427 */ 2428 hammer2_chain_t * 2429 hammer2_chain_repparent(hammer2_chain_t **chainp, int flags) 2430 { 2431 hammer2_chain_t *chain; 2432 hammer2_chain_t *parent; 2433 struct hammer2_reptrack reptrack; 2434 struct hammer2_reptrack **repp; 2435 2436 /* 2437 * Be careful of order, chain must be unlocked before parent 2438 * is locked below to avoid a deadlock. Try it trivially first. 2439 */ 2440 chain = *chainp; 2441 parent = chain->parent; 2442 if (parent == NULL) { 2443 hammer2_spin_unex(&chain->core.spin); 2444 panic("hammer2_chain_repparent: no parent"); 2445 } 2446 hammer2_chain_ref(parent); 2447 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) { 2448 hammer2_chain_unlock(chain); 2449 hammer2_chain_drop(chain); 2450 *chainp = parent; 2451 2452 return parent; 2453 } 2454 2455 /* 2456 * Ok, now it gets a bit nasty. There are multiple situations where 2457 * the parent might be in the middle of a deletion, or where the child 2458 * (chain) might be deleted the instant we let go of its lock. 2459 * We can potentially end up in a no-win situation! 2460 * 2461 * In particular, the indirect_maintenance() case can cause these 2462 * situations. 2463 * 2464 * To deal with this we install a reptrack structure in the parent 2465 * This reptrack structure 'owns' the parent ref and will automatically 2466 * migrate to the parent's parent if the parent is deleted permanently. 2467 */ 2468 hammer2_spin_init(&reptrack.spin, "h2reptrk"); 2469 reptrack.chain = parent; 2470 hammer2_chain_ref(parent); /* for the reptrack */ 2471 2472 hammer2_spin_ex(&parent->core.spin); 2473 reptrack.next = parent->core.reptrack; 2474 parent->core.reptrack = &reptrack; 2475 hammer2_spin_unex(&parent->core.spin); 2476 2477 hammer2_chain_unlock(chain); 2478 hammer2_chain_drop(chain); 2479 chain = NULL; /* gone */ 2480 2481 /* 2482 * At the top of this loop, chain is gone and parent is refd both 2483 * by us explicitly AND via our reptrack. We are attempting to 2484 * lock parent. 2485 */ 2486 for (;;) { 2487 hammer2_chain_lock(parent, flags); 2488 2489 if (reptrack.chain == parent) 2490 break; 2491 hammer2_chain_unlock(parent); 2492 hammer2_chain_drop(parent); 2493 2494 kprintf("hammer2: debug REPTRACK %p->%p\n", 2495 parent, reptrack.chain); 2496 hammer2_spin_ex(&reptrack.spin); 2497 parent = reptrack.chain; 2498 hammer2_chain_ref(parent); 2499 hammer2_spin_unex(&reptrack.spin); 2500 } 2501 2502 /* 2503 * Once parent is locked and matches our reptrack, our reptrack 2504 * will be stable and we have our parent. We can unlink our 2505 * reptrack. 2506 * 2507 * WARNING! Remember that the chain lock might be shared. Chains 2508 * locked shared have stable parent linkages. 2509 */ 2510 hammer2_spin_ex(&parent->core.spin); 2511 repp = &parent->core.reptrack; 2512 while (*repp != &reptrack) 2513 repp = &(*repp)->next; 2514 *repp = reptrack.next; 2515 hammer2_spin_unex(&parent->core.spin); 2516 2517 hammer2_chain_drop(parent); /* reptrack ref */ 2518 *chainp = parent; /* return parent lock+ref */ 2519 2520 return parent; 2521 } 2522 2523 /* 2524 * Dispose of any linked reptrack structures in (chain) by shifting them to 2525 * (parent). Both (chain) and (parent) must be exclusively locked. 2526 * 2527 * This is interlocked against any children of (chain) on the other side. 2528 * No children so remain as-of when this is called so we can test 2529 * core.reptrack without holding the spin-lock. 2530 * 2531 * Used whenever the caller intends to permanently delete chains related 2532 * to topological recursions (BREF_TYPE_INDIRECT, BREF_TYPE_FREEMAP_NODE), 2533 * where the chains underneath the node being deleted are given a new parent 2534 * above the node being deleted. 2535 */ 2536 static 2537 void 2538 hammer2_chain_repchange(hammer2_chain_t *parent, hammer2_chain_t *chain) 2539 { 2540 struct hammer2_reptrack *reptrack; 2541 2542 KKASSERT(chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)); 2543 while (chain->core.reptrack) { 2544 hammer2_spin_ex(&parent->core.spin); 2545 hammer2_spin_ex(&chain->core.spin); 2546 reptrack = chain->core.reptrack; 2547 if (reptrack == NULL) { 2548 hammer2_spin_unex(&chain->core.spin); 2549 hammer2_spin_unex(&parent->core.spin); 2550 break; 2551 } 2552 hammer2_spin_ex(&reptrack->spin); 2553 chain->core.reptrack = reptrack->next; 2554 reptrack->chain = parent; 2555 reptrack->next = parent->core.reptrack; 2556 parent->core.reptrack = reptrack; 2557 hammer2_chain_ref(parent); /* reptrack */ 2558 2559 hammer2_spin_unex(&chain->core.spin); 2560 hammer2_spin_unex(&parent->core.spin); 2561 kprintf("hammer2: debug repchange %p %p->%p\n", 2562 reptrack, chain, parent); 2563 hammer2_chain_drop(chain); /* reptrack */ 2564 } 2565 } 2566 2567 /* 2568 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 2569 * (*parentp) typically points to an inode but can also point to a related 2570 * indirect block and this function will recurse upwards and find the inode 2571 * or the nearest undeleted indirect block covering the key range. 2572 * 2573 * This function unconditionally sets *errorp, replacing any previous value. 2574 * 2575 * (*parentp) must be exclusive or shared locked (depending on flags) and 2576 * referenced and can be an inode or an existing indirect block within the 2577 * inode. 2578 * 2579 * If (*parent) is errored out, this function will not attempt to recurse 2580 * the radix tree and will return NULL along with an appropriate *errorp. 2581 * If NULL is returned and *errorp is 0, the requested lookup could not be 2582 * located. 2583 * 2584 * On return (*parentp) will be modified to point at the deepest parent chain 2585 * element encountered during the search, as a helper for an insertion or 2586 * deletion. 2587 * 2588 * The new (*parentp) will be locked shared or exclusive (depending on flags), 2589 * and referenced, and the old will be unlocked and dereferenced (no change 2590 * if they are both the same). This is particularly important if the caller 2591 * wishes to insert a new chain, (*parentp) will be set properly even if NULL 2592 * is returned, as long as no error occurred. 2593 * 2594 * The matching chain will be returned locked according to flags. 2595 * 2596 * -- 2597 * 2598 * NULL is returned if no match was found, but (*parentp) will still 2599 * potentially be adjusted. 2600 * 2601 * On return (*key_nextp) will point to an iterative value for key_beg. 2602 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 2603 * 2604 * This function will also recurse up the chain if the key is not within the 2605 * current parent's range. (*parentp) can never be set to NULL. An iteration 2606 * can simply allow (*parentp) to float inside the loop. 2607 * 2608 * NOTE! chain->data is not always resolved. By default it will not be 2609 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 2610 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 2611 * BREF_TYPE_DATA as the device buffer can alias the logical file 2612 * buffer). 2613 */ 2614 2615 hammer2_chain_t * 2616 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 2617 hammer2_key_t key_beg, hammer2_key_t key_end, 2618 int *errorp, int flags) 2619 { 2620 hammer2_dev_t *hmp; 2621 hammer2_chain_t *parent; 2622 hammer2_chain_t *chain; 2623 hammer2_blockref_t *base; 2624 hammer2_blockref_t *bref; 2625 hammer2_blockref_t bsave; 2626 hammer2_key_t scan_beg; 2627 hammer2_key_t scan_end; 2628 int count = 0; 2629 int how_always = HAMMER2_RESOLVE_ALWAYS; 2630 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2631 int how; 2632 int generation; 2633 int maxloops = 300000; 2634 volatile hammer2_mtx_t save_mtx; 2635 2636 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2637 how_maybe = how_always; 2638 how = HAMMER2_RESOLVE_ALWAYS; 2639 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2640 how = HAMMER2_RESOLVE_NEVER; 2641 } else { 2642 how = HAMMER2_RESOLVE_MAYBE; 2643 } 2644 if (flags & HAMMER2_LOOKUP_SHARED) { 2645 how_maybe |= HAMMER2_RESOLVE_SHARED; 2646 how_always |= HAMMER2_RESOLVE_SHARED; 2647 how |= HAMMER2_RESOLVE_SHARED; 2648 } 2649 2650 /* 2651 * Recurse (*parentp) upward if necessary until the parent completely 2652 * encloses the key range or we hit the inode. 2653 * 2654 * Handle races against the flusher deleting indirect nodes on its 2655 * way back up by continuing to recurse upward past the deletion. 2656 */ 2657 parent = *parentp; 2658 hmp = parent->hmp; 2659 *errorp = 0; 2660 2661 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2662 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2663 scan_beg = parent->bref.key; 2664 scan_end = scan_beg + 2665 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2666 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) { 2667 if (key_beg >= scan_beg && key_end <= scan_end) 2668 break; 2669 } 2670 parent = hammer2_chain_repparent(parentp, how_maybe); 2671 } 2672 again: 2673 if (--maxloops == 0) 2674 panic("hammer2_chain_lookup: maxloops"); 2675 2676 /* 2677 * MATCHIND case that does not require parent->data (do prior to 2678 * parent->error check). 2679 */ 2680 switch(parent->bref.type) { 2681 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2682 case HAMMER2_BREF_TYPE_INDIRECT: 2683 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2684 scan_beg = parent->bref.key; 2685 scan_end = scan_beg + 2686 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2687 if (key_beg == scan_beg && key_end == scan_end) { 2688 chain = parent; 2689 hammer2_chain_ref(chain); 2690 hammer2_chain_lock(chain, how_maybe); 2691 *key_nextp = scan_end + 1; 2692 goto done; 2693 } 2694 } 2695 break; 2696 default: 2697 break; 2698 } 2699 2700 /* 2701 * No lookup is possible if the parent is errored. We delayed 2702 * this check as long as we could to ensure that the parent backup, 2703 * embedded data, and MATCHIND code could still execute. 2704 */ 2705 if (parent->error) { 2706 *errorp = parent->error; 2707 return NULL; 2708 } 2709 2710 /* 2711 * Locate the blockref array. Currently we do a fully associative 2712 * search through the array. 2713 */ 2714 switch(parent->bref.type) { 2715 case HAMMER2_BREF_TYPE_INODE: 2716 /* 2717 * Special shortcut for embedded data returns the inode 2718 * itself. Callers must detect this condition and access 2719 * the embedded data (the strategy code does this for us). 2720 * 2721 * This is only applicable to regular files and softlinks. 2722 * 2723 * We need a second lock on parent. Since we already have 2724 * a lock we must pass LOCKAGAIN to prevent unexpected 2725 * blocking (we don't want to block on a second shared 2726 * ref if an exclusive lock is pending) 2727 */ 2728 if (parent->data->ipdata.meta.op_flags & 2729 HAMMER2_OPFLAG_DIRECTDATA) { 2730 if (flags & HAMMER2_LOOKUP_NODIRECT) { 2731 chain = NULL; 2732 *key_nextp = key_end + 1; 2733 goto done; 2734 } 2735 hammer2_chain_ref(parent); 2736 hammer2_chain_lock(parent, how_always | 2737 HAMMER2_RESOLVE_LOCKAGAIN); 2738 *key_nextp = key_end + 1; 2739 return (parent); 2740 } 2741 base = &parent->data->ipdata.u.blockset.blockref[0]; 2742 count = HAMMER2_SET_COUNT; 2743 break; 2744 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2745 case HAMMER2_BREF_TYPE_INDIRECT: 2746 /* 2747 * Optimize indirect blocks in the INITIAL state to avoid 2748 * I/O. 2749 * 2750 * Debugging: Enter permanent wait state instead of 2751 * panicing on unexpectedly NULL data for the moment. 2752 */ 2753 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2754 base = NULL; 2755 } else { 2756 if (parent->data == NULL) { 2757 kprintf("hammer2: unexpected NULL data " 2758 "on %p\n", parent); 2759 while (1) 2760 tsleep(parent, 0, "xxx", 0); 2761 } 2762 base = &parent->data->npdata[0]; 2763 } 2764 count = parent->bytes / sizeof(hammer2_blockref_t); 2765 break; 2766 case HAMMER2_BREF_TYPE_VOLUME: 2767 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2768 count = HAMMER2_SET_COUNT; 2769 break; 2770 case HAMMER2_BREF_TYPE_FREEMAP: 2771 base = &parent->data->blkset.blockref[0]; 2772 count = HAMMER2_SET_COUNT; 2773 break; 2774 default: 2775 panic("hammer2_chain_lookup: unrecognized " 2776 "blockref(B) type: %d", 2777 parent->bref.type); 2778 base = NULL; /* safety */ 2779 count = 0; /* safety */ 2780 break; 2781 } 2782 2783 /* 2784 * Merged scan to find next candidate. 2785 * 2786 * hammer2_base_*() functions require the parent->core.live_* fields 2787 * to be synchronized. 2788 * 2789 * We need to hold the spinlock to access the block array and RB tree 2790 * and to interlock chain creation. 2791 */ 2792 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2793 hammer2_chain_countbrefs(parent, base, count); 2794 2795 /* 2796 * Combined search 2797 */ 2798 hammer2_spin_ex(&parent->core.spin); 2799 chain = hammer2_combined_find(parent, base, count, 2800 key_nextp, 2801 key_beg, key_end, 2802 &bref); 2803 generation = parent->core.generation; 2804 2805 /* 2806 * Exhausted parent chain, iterate. 2807 */ 2808 if (bref == NULL) { 2809 KKASSERT(chain == NULL); 2810 hammer2_spin_unex(&parent->core.spin); 2811 if (key_beg == key_end) /* short cut single-key case */ 2812 return (NULL); 2813 2814 /* 2815 * Stop if we reached the end of the iteration. 2816 */ 2817 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2818 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2819 return (NULL); 2820 } 2821 2822 /* 2823 * Calculate next key, stop if we reached the end of the 2824 * iteration, otherwise go up one level and loop. 2825 */ 2826 key_beg = parent->bref.key + 2827 ((hammer2_key_t)1 << parent->bref.keybits); 2828 if (key_beg == 0 || key_beg > key_end) 2829 return (NULL); 2830 parent = hammer2_chain_repparent(parentp, how_maybe); 2831 goto again; 2832 } 2833 2834 /* 2835 * Selected from blockref or in-memory chain. 2836 */ 2837 bsave = *bref; 2838 if (chain == NULL) { 2839 hammer2_spin_unex(&parent->core.spin); 2840 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2841 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2842 chain = hammer2_chain_get(parent, generation, 2843 &bsave, how_maybe); 2844 } else { 2845 chain = hammer2_chain_get(parent, generation, 2846 &bsave, how); 2847 } 2848 if (chain == NULL) 2849 goto again; 2850 } else { 2851 hammer2_chain_ref(chain); 2852 hammer2_spin_unex(&parent->core.spin); 2853 2854 /* 2855 * chain is referenced but not locked. We must lock the 2856 * chain to obtain definitive state. 2857 */ 2858 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2859 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2860 hammer2_chain_lock(chain, how_maybe); 2861 } else { 2862 hammer2_chain_lock(chain, how); 2863 } 2864 KKASSERT(chain->parent == parent); 2865 } 2866 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 2867 chain->parent != parent) { 2868 hammer2_chain_unlock(chain); 2869 hammer2_chain_drop(chain); 2870 chain = NULL; /* SAFETY */ 2871 goto again; 2872 } 2873 2874 2875 /* 2876 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2877 * 2878 * NOTE: Chain's key range is not relevant as there might be 2879 * one-offs within the range that are not deleted. 2880 * 2881 * NOTE: Lookups can race delete-duplicate because 2882 * delete-duplicate does not lock the parent's core 2883 * (they just use the spinlock on the core). 2884 */ 2885 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2886 kprintf("skip deleted chain %016jx.%02x key=%016jx\n", 2887 chain->bref.data_off, chain->bref.type, 2888 chain->bref.key); 2889 hammer2_chain_unlock(chain); 2890 hammer2_chain_drop(chain); 2891 chain = NULL; /* SAFETY */ 2892 key_beg = *key_nextp; 2893 if (key_beg == 0 || key_beg > key_end) 2894 return(NULL); 2895 goto again; 2896 } 2897 2898 /* 2899 * If the chain element is an indirect block it becomes the new 2900 * parent and we loop on it. We must maintain our top-down locks 2901 * to prevent the flusher from interfering (i.e. doing a 2902 * delete-duplicate and leaving us recursing down a deleted chain). 2903 * 2904 * The parent always has to be locked with at least RESOLVE_MAYBE 2905 * so we can access its data. It might need a fixup if the caller 2906 * passed incompatible flags. Be careful not to cause a deadlock 2907 * as a data-load requires an exclusive lock. 2908 * 2909 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2910 * range is within the requested key range we return the indirect 2911 * block and do NOT loop. This is usually only used to acquire 2912 * freemap nodes. 2913 */ 2914 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2915 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2916 save_mtx = parent->lock; 2917 hammer2_chain_unlock(parent); 2918 hammer2_chain_drop(parent); 2919 *parentp = parent = chain; 2920 chain = NULL; /* SAFETY */ 2921 goto again; 2922 } 2923 done: 2924 /* 2925 * All done, return the locked chain. 2926 * 2927 * If the caller does not want a locked chain, replace the lock with 2928 * a ref. Perhaps this can eventually be optimized to not obtain the 2929 * lock in the first place for situations where the data does not 2930 * need to be resolved. 2931 * 2932 * NOTE! A chain->error must be tested by the caller upon return. 2933 * *errorp is only set based on issues which occur while 2934 * trying to reach the chain. 2935 */ 2936 return (chain); 2937 } 2938 2939 /* 2940 * After having issued a lookup we can iterate all matching keys. 2941 * 2942 * If chain is non-NULL we continue the iteration from just after it's index. 2943 * 2944 * If chain is NULL we assume the parent was exhausted and continue the 2945 * iteration at the next parent. 2946 * 2947 * If a fatal error occurs (typically an I/O error), a dummy chain is 2948 * returned with chain->error and error-identifying information set. This 2949 * chain will assert if you try to do anything fancy with it. 2950 * 2951 * XXX Depending on where the error occurs we should allow continued iteration. 2952 * 2953 * parent must be locked on entry and remains locked throughout. chain's 2954 * lock status must match flags. Chain is always at least referenced. 2955 * 2956 * WARNING! The MATCHIND flag does not apply to this function. 2957 */ 2958 hammer2_chain_t * 2959 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2960 hammer2_key_t *key_nextp, 2961 hammer2_key_t key_beg, hammer2_key_t key_end, 2962 int *errorp, int flags) 2963 { 2964 hammer2_chain_t *parent; 2965 int how_maybe; 2966 2967 /* 2968 * Calculate locking flags for upward recursion. 2969 */ 2970 how_maybe = HAMMER2_RESOLVE_MAYBE; 2971 if (flags & HAMMER2_LOOKUP_SHARED) 2972 how_maybe |= HAMMER2_RESOLVE_SHARED; 2973 2974 parent = *parentp; 2975 *errorp = 0; 2976 2977 /* 2978 * Calculate the next index and recalculate the parent if necessary. 2979 */ 2980 if (chain) { 2981 key_beg = chain->bref.key + 2982 ((hammer2_key_t)1 << chain->bref.keybits); 2983 hammer2_chain_unlock(chain); 2984 hammer2_chain_drop(chain); 2985 2986 /* 2987 * chain invalid past this point, but we can still do a 2988 * pointer comparison w/parent. 2989 * 2990 * Any scan where the lookup returned degenerate data embedded 2991 * in the inode has an invalid index and must terminate. 2992 */ 2993 if (chain == parent) 2994 return(NULL); 2995 if (key_beg == 0 || key_beg > key_end) 2996 return(NULL); 2997 chain = NULL; 2998 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2999 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 3000 /* 3001 * We reached the end of the iteration. 3002 */ 3003 return (NULL); 3004 } else { 3005 /* 3006 * Continue iteration with next parent unless the current 3007 * parent covers the range. 3008 * 3009 * (This also handles the case of a deleted, empty indirect 3010 * node). 3011 */ 3012 key_beg = parent->bref.key + 3013 ((hammer2_key_t)1 << parent->bref.keybits); 3014 if (key_beg == 0 || key_beg > key_end) 3015 return (NULL); 3016 parent = hammer2_chain_repparent(parentp, how_maybe); 3017 } 3018 3019 /* 3020 * And execute 3021 */ 3022 return (hammer2_chain_lookup(parentp, key_nextp, 3023 key_beg, key_end, 3024 errorp, flags)); 3025 } 3026 3027 /* 3028 * Caller wishes to iterate chains under parent, loading new chains into 3029 * chainp. Caller must initialize *chainp to NULL and *firstp to 1, and 3030 * then call hammer2_chain_scan() repeatedly until a non-zero return. 3031 * During the scan, *firstp will be set to 0 and (*chainp) will be replaced 3032 * with the returned chain for the scan. The returned *chainp will be 3033 * locked and referenced. Any prior contents will be unlocked and dropped. 3034 * 3035 * Caller should check the return value. A normal scan EOF will return 3036 * exactly HAMMER2_ERROR_EOF. Any other non-zero value indicates an 3037 * error trying to access parent data. Any error in the returned chain 3038 * must be tested separately by the caller. 3039 * 3040 * (*chainp) is dropped on each scan, but will only be set if the returned 3041 * element itself can recurse. Leaf elements are NOT resolved, loaded, or 3042 * returned via *chainp. The caller will get their bref only. 3043 * 3044 * The raw scan function is similar to lookup/next but does not seek to a key. 3045 * Blockrefs are iterated via first_bref = (parent, NULL) and 3046 * next_chain = (parent, bref). 3047 * 3048 * The passed-in parent must be locked and its data resolved. The function 3049 * nominally returns a locked and referenced *chainp != NULL for chains 3050 * the caller might need to recurse on (and will dipose of any *chainp passed 3051 * in). The caller must check the chain->bref.type either way. 3052 */ 3053 int 3054 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 3055 hammer2_blockref_t *bref, int *firstp, 3056 int flags) 3057 { 3058 hammer2_dev_t *hmp; 3059 hammer2_blockref_t *base; 3060 hammer2_blockref_t *bref_ptr; 3061 hammer2_key_t key; 3062 hammer2_key_t next_key; 3063 hammer2_chain_t *chain = NULL; 3064 int count = 0; 3065 int how_always = HAMMER2_RESOLVE_ALWAYS; 3066 int how_maybe = HAMMER2_RESOLVE_MAYBE; 3067 int how; 3068 int generation; 3069 int maxloops = 300000; 3070 int error; 3071 3072 hmp = parent->hmp; 3073 error = 0; 3074 3075 /* 3076 * Scan flags borrowed from lookup. 3077 */ 3078 if (flags & HAMMER2_LOOKUP_ALWAYS) { 3079 how_maybe = how_always; 3080 how = HAMMER2_RESOLVE_ALWAYS; 3081 } else if (flags & HAMMER2_LOOKUP_NODATA) { 3082 how = HAMMER2_RESOLVE_NEVER; 3083 } else { 3084 how = HAMMER2_RESOLVE_MAYBE; 3085 } 3086 if (flags & HAMMER2_LOOKUP_SHARED) { 3087 how_maybe |= HAMMER2_RESOLVE_SHARED; 3088 how_always |= HAMMER2_RESOLVE_SHARED; 3089 how |= HAMMER2_RESOLVE_SHARED; 3090 } 3091 3092 /* 3093 * Calculate key to locate first/next element, unlocking the previous 3094 * element as we go. Be careful, the key calculation can overflow. 3095 * 3096 * (also reset bref to NULL) 3097 */ 3098 if (*firstp) { 3099 key = 0; 3100 *firstp = 0; 3101 } else { 3102 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 3103 if ((chain = *chainp) != NULL) { 3104 *chainp = NULL; 3105 hammer2_chain_unlock(chain); 3106 hammer2_chain_drop(chain); 3107 chain = NULL; 3108 } 3109 if (key == 0) { 3110 error |= HAMMER2_ERROR_EOF; 3111 goto done; 3112 } 3113 } 3114 3115 again: 3116 if (parent->error) { 3117 error = parent->error; 3118 goto done; 3119 } 3120 if (--maxloops == 0) 3121 panic("hammer2_chain_scan: maxloops"); 3122 3123 /* 3124 * Locate the blockref array. Currently we do a fully associative 3125 * search through the array. 3126 */ 3127 switch(parent->bref.type) { 3128 case HAMMER2_BREF_TYPE_INODE: 3129 /* 3130 * An inode with embedded data has no sub-chains. 3131 * 3132 * WARNING! Bulk scan code may pass a static chain marked 3133 * as BREF_TYPE_INODE with a copy of the volume 3134 * root blockset to snapshot the volume. 3135 */ 3136 if (parent->data->ipdata.meta.op_flags & 3137 HAMMER2_OPFLAG_DIRECTDATA) { 3138 error |= HAMMER2_ERROR_EOF; 3139 goto done; 3140 } 3141 base = &parent->data->ipdata.u.blockset.blockref[0]; 3142 count = HAMMER2_SET_COUNT; 3143 break; 3144 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3145 case HAMMER2_BREF_TYPE_INDIRECT: 3146 /* 3147 * Optimize indirect blocks in the INITIAL state to avoid 3148 * I/O. 3149 */ 3150 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 3151 base = NULL; 3152 } else { 3153 if (parent->data == NULL) 3154 panic("parent->data is NULL"); 3155 base = &parent->data->npdata[0]; 3156 } 3157 count = parent->bytes / sizeof(hammer2_blockref_t); 3158 break; 3159 case HAMMER2_BREF_TYPE_VOLUME: 3160 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3161 count = HAMMER2_SET_COUNT; 3162 break; 3163 case HAMMER2_BREF_TYPE_FREEMAP: 3164 base = &parent->data->blkset.blockref[0]; 3165 count = HAMMER2_SET_COUNT; 3166 break; 3167 default: 3168 panic("hammer2_chain_scan: unrecognized blockref type: %d", 3169 parent->bref.type); 3170 base = NULL; /* safety */ 3171 count = 0; /* safety */ 3172 break; 3173 } 3174 3175 /* 3176 * Merged scan to find next candidate. 3177 * 3178 * hammer2_base_*() functions require the parent->core.live_* fields 3179 * to be synchronized. 3180 * 3181 * We need to hold the spinlock to access the block array and RB tree 3182 * and to interlock chain creation. 3183 */ 3184 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3185 hammer2_chain_countbrefs(parent, base, count); 3186 3187 next_key = 0; 3188 bref_ptr = NULL; 3189 hammer2_spin_ex(&parent->core.spin); 3190 chain = hammer2_combined_find(parent, base, count, 3191 &next_key, 3192 key, HAMMER2_KEY_MAX, 3193 &bref_ptr); 3194 generation = parent->core.generation; 3195 3196 /* 3197 * Exhausted parent chain, we're done. 3198 */ 3199 if (bref_ptr == NULL) { 3200 hammer2_spin_unex(&parent->core.spin); 3201 KKASSERT(chain == NULL); 3202 error |= HAMMER2_ERROR_EOF; 3203 goto done; 3204 } 3205 3206 /* 3207 * Copy into the supplied stack-based blockref. 3208 */ 3209 *bref = *bref_ptr; 3210 3211 /* 3212 * Selected from blockref or in-memory chain. 3213 */ 3214 if (chain == NULL) { 3215 switch(bref->type) { 3216 case HAMMER2_BREF_TYPE_INODE: 3217 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3218 case HAMMER2_BREF_TYPE_INDIRECT: 3219 case HAMMER2_BREF_TYPE_VOLUME: 3220 case HAMMER2_BREF_TYPE_FREEMAP: 3221 /* 3222 * Recursion, always get the chain 3223 */ 3224 hammer2_spin_unex(&parent->core.spin); 3225 chain = hammer2_chain_get(parent, generation, 3226 bref, how); 3227 if (chain == NULL) 3228 goto again; 3229 break; 3230 default: 3231 /* 3232 * No recursion, do not waste time instantiating 3233 * a chain, just iterate using the bref. 3234 */ 3235 hammer2_spin_unex(&parent->core.spin); 3236 break; 3237 } 3238 } else { 3239 /* 3240 * Recursion or not we need the chain in order to supply 3241 * the bref. 3242 */ 3243 hammer2_chain_ref(chain); 3244 hammer2_spin_unex(&parent->core.spin); 3245 hammer2_chain_lock(chain, how); 3246 } 3247 if (chain && 3248 (bcmp(bref, &chain->bref, sizeof(*bref)) || 3249 chain->parent != parent)) { 3250 hammer2_chain_unlock(chain); 3251 hammer2_chain_drop(chain); 3252 chain = NULL; 3253 goto again; 3254 } 3255 3256 /* 3257 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 3258 * 3259 * NOTE: chain's key range is not relevant as there might be 3260 * one-offs within the range that are not deleted. 3261 * 3262 * NOTE: XXX this could create problems with scans used in 3263 * situations other than mount-time recovery. 3264 * 3265 * NOTE: Lookups can race delete-duplicate because 3266 * delete-duplicate does not lock the parent's core 3267 * (they just use the spinlock on the core). 3268 */ 3269 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3270 hammer2_chain_unlock(chain); 3271 hammer2_chain_drop(chain); 3272 chain = NULL; 3273 3274 key = next_key; 3275 if (key == 0) { 3276 error |= HAMMER2_ERROR_EOF; 3277 goto done; 3278 } 3279 goto again; 3280 } 3281 3282 done: 3283 /* 3284 * All done, return the bref or NULL, supply chain if necessary. 3285 */ 3286 if (chain) 3287 *chainp = chain; 3288 return (error); 3289 } 3290 3291 /* 3292 * Create and return a new hammer2 system memory structure of the specified 3293 * key, type and size and insert it under (*parentp). This is a full 3294 * insertion, based on the supplied key/keybits, and may involve creating 3295 * indirect blocks and moving other chains around via delete/duplicate. 3296 * 3297 * This call can be made with parent == NULL as long as a non -1 methods 3298 * is supplied. hmp must also be supplied in this situation (otherwise 3299 * hmp is extracted from the supplied parent). The chain will be detached 3300 * from the topology. A later call with both parent and chain can be made 3301 * to attach it. 3302 * 3303 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 3304 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3305 * FULL. This typically means that the caller is creating the chain after 3306 * doing a hammer2_chain_lookup(). 3307 * 3308 * (*parentp) must be exclusive locked and may be replaced on return 3309 * depending on how much work the function had to do. 3310 * 3311 * (*parentp) must not be errored or this function will assert. 3312 * 3313 * (*chainp) usually starts out NULL and returns the newly created chain, 3314 * but if the caller desires the caller may allocate a disconnected chain 3315 * and pass it in instead. 3316 * 3317 * This function should NOT be used to insert INDIRECT blocks. It is 3318 * typically used to create/insert inodes and data blocks. 3319 * 3320 * Caller must pass-in an exclusively locked parent the new chain is to 3321 * be inserted under, and optionally pass-in a disconnected, exclusively 3322 * locked chain to insert (else we create a new chain). The function will 3323 * adjust (*parentp) as necessary, create or connect the chain, and 3324 * return an exclusively locked chain in *chainp. 3325 * 3326 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 3327 * and will be reassigned. 3328 * 3329 * NOTE: returns HAMMER_ERROR_* flags 3330 */ 3331 int 3332 hammer2_chain_create(hammer2_chain_t **parentp, hammer2_chain_t **chainp, 3333 hammer2_dev_t *hmp, hammer2_pfs_t *pmp, int methods, 3334 hammer2_key_t key, int keybits, int type, size_t bytes, 3335 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 3336 { 3337 hammer2_chain_t *chain; 3338 hammer2_chain_t *parent; 3339 hammer2_blockref_t *base; 3340 hammer2_blockref_t dummy; 3341 int allocated = 0; 3342 int error = 0; 3343 int count; 3344 int maxloops = 300000; 3345 3346 /* 3347 * Topology may be crossing a PFS boundary. 3348 */ 3349 parent = *parentp; 3350 if (parent) { 3351 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3352 KKASSERT(parent->error == 0); 3353 hmp = parent->hmp; 3354 } 3355 chain = *chainp; 3356 3357 if (chain == NULL) { 3358 /* 3359 * First allocate media space and construct the dummy bref, 3360 * then allocate the in-memory chain structure. Set the 3361 * INITIAL flag for fresh chains which do not have embedded 3362 * data. 3363 */ 3364 bzero(&dummy, sizeof(dummy)); 3365 dummy.type = type; 3366 dummy.key = key; 3367 dummy.keybits = keybits; 3368 dummy.data_off = hammer2_getradix(bytes); 3369 3370 /* 3371 * Inherit methods from parent by default. Primarily used 3372 * for BREF_TYPE_DATA. Non-data types *must* be set to 3373 * a non-NONE check algorithm. 3374 */ 3375 if (methods == -1) 3376 dummy.methods = parent->bref.methods; 3377 else 3378 dummy.methods = (uint8_t)methods; 3379 3380 if (type != HAMMER2_BREF_TYPE_DATA && 3381 HAMMER2_DEC_CHECK(dummy.methods) == HAMMER2_CHECK_NONE) { 3382 dummy.methods |= 3383 HAMMER2_ENC_CHECK(HAMMER2_CHECK_DEFAULT); 3384 } 3385 3386 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 3387 3388 /* 3389 * Lock the chain manually, chain_lock will load the chain 3390 * which we do NOT want to do. (note: chain->refs is set 3391 * to 1 by chain_alloc() for us, but lockcnt is not). 3392 */ 3393 chain->lockcnt = 1; 3394 hammer2_mtx_ex(&chain->lock); 3395 allocated = 1; 3396 3397 /* 3398 * Set INITIAL to optimize I/O. The flag will generally be 3399 * processed when we call hammer2_chain_modify(). 3400 */ 3401 switch(type) { 3402 case HAMMER2_BREF_TYPE_VOLUME: 3403 case HAMMER2_BREF_TYPE_FREEMAP: 3404 panic("hammer2_chain_create: called with volume type"); 3405 break; 3406 case HAMMER2_BREF_TYPE_INDIRECT: 3407 panic("hammer2_chain_create: cannot be used to" 3408 "create indirect block"); 3409 break; 3410 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3411 panic("hammer2_chain_create: cannot be used to" 3412 "create freemap root or node"); 3413 break; 3414 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3415 KKASSERT(bytes == sizeof(chain->data->bmdata)); 3416 /* fall through */ 3417 case HAMMER2_BREF_TYPE_DIRENT: 3418 case HAMMER2_BREF_TYPE_INODE: 3419 case HAMMER2_BREF_TYPE_DATA: 3420 default: 3421 /* 3422 * leave chain->data NULL, set INITIAL 3423 */ 3424 KKASSERT(chain->data == NULL); 3425 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 3426 break; 3427 } 3428 } else { 3429 /* 3430 * We are reattaching a previously deleted chain, possibly 3431 * under a new parent and possibly with a new key/keybits. 3432 * The chain does not have to be in a modified state. The 3433 * UPDATE flag will be set later on in this routine. 3434 * 3435 * Do NOT mess with the current state of the INITIAL flag. 3436 */ 3437 chain->bref.key = key; 3438 chain->bref.keybits = keybits; 3439 if (chain->flags & HAMMER2_CHAIN_DELETED) 3440 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3441 KKASSERT(chain->parent == NULL); 3442 } 3443 3444 /* 3445 * Set the appropriate bref flag if requested. 3446 * 3447 * NOTE! Callers can call this function to move chains without 3448 * knowing about special flags, so don't clear bref flags 3449 * here! 3450 */ 3451 if (flags & HAMMER2_INSERT_PFSROOT) 3452 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 3453 3454 if (parent == NULL) 3455 goto skip; 3456 3457 /* 3458 * Calculate how many entries we have in the blockref array and 3459 * determine if an indirect block is required when inserting into 3460 * the parent. 3461 */ 3462 again: 3463 if (--maxloops == 0) 3464 panic("hammer2_chain_create: maxloops"); 3465 3466 switch(parent->bref.type) { 3467 case HAMMER2_BREF_TYPE_INODE: 3468 if ((parent->data->ipdata.meta.op_flags & 3469 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 3470 kprintf("hammer2: parent set for direct-data! " 3471 "pkey=%016jx ckey=%016jx\n", 3472 parent->bref.key, 3473 chain->bref.key); 3474 } 3475 KKASSERT((parent->data->ipdata.meta.op_flags & 3476 HAMMER2_OPFLAG_DIRECTDATA) == 0); 3477 KKASSERT(parent->data != NULL); 3478 base = &parent->data->ipdata.u.blockset.blockref[0]; 3479 count = HAMMER2_SET_COUNT; 3480 break; 3481 case HAMMER2_BREF_TYPE_INDIRECT: 3482 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3483 if (parent->flags & HAMMER2_CHAIN_INITIAL) 3484 base = NULL; 3485 else 3486 base = &parent->data->npdata[0]; 3487 count = parent->bytes / sizeof(hammer2_blockref_t); 3488 break; 3489 case HAMMER2_BREF_TYPE_VOLUME: 3490 KKASSERT(parent->data != NULL); 3491 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3492 count = HAMMER2_SET_COUNT; 3493 break; 3494 case HAMMER2_BREF_TYPE_FREEMAP: 3495 KKASSERT(parent->data != NULL); 3496 base = &parent->data->blkset.blockref[0]; 3497 count = HAMMER2_SET_COUNT; 3498 break; 3499 default: 3500 panic("hammer2_chain_create: unrecognized blockref type: %d", 3501 parent->bref.type); 3502 base = NULL; 3503 count = 0; 3504 break; 3505 } 3506 3507 /* 3508 * Make sure we've counted the brefs 3509 */ 3510 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3511 hammer2_chain_countbrefs(parent, base, count); 3512 3513 KASSERT(parent->core.live_count >= 0 && 3514 parent->core.live_count <= count, 3515 ("bad live_count %d/%d (%02x, %d)", 3516 parent->core.live_count, count, 3517 parent->bref.type, parent->bytes)); 3518 3519 /* 3520 * If no free blockref could be found we must create an indirect 3521 * block and move a number of blockrefs into it. With the parent 3522 * locked we can safely lock each child in order to delete+duplicate 3523 * it without causing a deadlock. 3524 * 3525 * This may return the new indirect block or the old parent depending 3526 * on where the key falls. NULL is returned on error. 3527 */ 3528 if (parent->core.live_count == count) { 3529 hammer2_chain_t *nparent; 3530 3531 KKASSERT((flags & HAMMER2_INSERT_SAMEPARENT) == 0); 3532 3533 nparent = hammer2_chain_create_indirect(parent, key, keybits, 3534 mtid, type, &error); 3535 if (nparent == NULL) { 3536 if (allocated) 3537 hammer2_chain_drop(chain); 3538 chain = NULL; 3539 goto done; 3540 } 3541 if (parent != nparent) { 3542 hammer2_chain_unlock(parent); 3543 hammer2_chain_drop(parent); 3544 parent = *parentp = nparent; 3545 } 3546 goto again; 3547 } 3548 3549 /* 3550 * fall through if parent, or skip to here if no parent. 3551 */ 3552 skip: 3553 if (chain->flags & HAMMER2_CHAIN_DELETED) 3554 kprintf("Inserting deleted chain @%016jx\n", 3555 chain->bref.key); 3556 3557 /* 3558 * Link the chain into its parent. 3559 */ 3560 if (chain->parent != NULL) 3561 panic("hammer2: hammer2_chain_create: chain already connected"); 3562 KKASSERT(chain->parent == NULL); 3563 if (parent) { 3564 KKASSERT(parent->core.live_count < count); 3565 hammer2_chain_insert(parent, chain, 3566 HAMMER2_CHAIN_INSERT_SPIN | 3567 HAMMER2_CHAIN_INSERT_LIVE, 3568 0); 3569 } 3570 3571 if (allocated) { 3572 /* 3573 * Mark the newly created chain modified. This will cause 3574 * UPDATE to be set and process the INITIAL flag. 3575 * 3576 * Device buffers are not instantiated for DATA elements 3577 * as these are handled by logical buffers. 3578 * 3579 * Indirect and freemap node indirect blocks are handled 3580 * by hammer2_chain_create_indirect() and not by this 3581 * function. 3582 * 3583 * Data for all other bref types is expected to be 3584 * instantiated (INODE, LEAF). 3585 */ 3586 switch(chain->bref.type) { 3587 case HAMMER2_BREF_TYPE_DATA: 3588 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3589 case HAMMER2_BREF_TYPE_DIRENT: 3590 case HAMMER2_BREF_TYPE_INODE: 3591 error = hammer2_chain_modify(chain, mtid, dedup_off, 3592 HAMMER2_MODIFY_OPTDATA); 3593 break; 3594 default: 3595 /* 3596 * Remaining types are not supported by this function. 3597 * In particular, INDIRECT and LEAF_NODE types are 3598 * handled by create_indirect(). 3599 */ 3600 panic("hammer2_chain_create: bad type: %d", 3601 chain->bref.type); 3602 /* NOT REACHED */ 3603 break; 3604 } 3605 } else { 3606 /* 3607 * When reconnecting a chain we must set UPDATE and 3608 * setflush so the flush recognizes that it must update 3609 * the bref in the parent. 3610 */ 3611 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 3612 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3613 } 3614 3615 /* 3616 * We must setflush(parent) to ensure that it recurses through to 3617 * chain. setflush(chain) might not work because ONFLUSH is possibly 3618 * already set in the chain (so it won't recurse up to set it in the 3619 * parent). 3620 */ 3621 if (parent) 3622 hammer2_chain_setflush(parent); 3623 3624 done: 3625 *chainp = chain; 3626 3627 return (error); 3628 } 3629 3630 /* 3631 * Move the chain from its old parent to a new parent. The chain must have 3632 * already been deleted or already disconnected (or never associated) with 3633 * a parent. The chain is reassociated with the new parent and the deleted 3634 * flag will be cleared (no longer deleted). The chain's modification state 3635 * is not altered. 3636 * 3637 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 3638 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3639 * FULL. This typically means that the caller is creating the chain after 3640 * doing a hammer2_chain_lookup(). 3641 * 3642 * Neither (parent) or (chain) can be errored. 3643 * 3644 * If (parent) is non-NULL then the chain is inserted under the parent. 3645 * 3646 * If (parent) is NULL then the newly duplicated chain is not inserted 3647 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 3648 * passing into hammer2_chain_create() after this function returns). 3649 * 3650 * WARNING! This function calls create which means it can insert indirect 3651 * blocks. This can cause other unrelated chains in the parent to 3652 * be moved to a newly inserted indirect block in addition to the 3653 * specific chain. 3654 */ 3655 void 3656 hammer2_chain_rename(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3657 hammer2_tid_t mtid, int flags) 3658 { 3659 hammer2_blockref_t *bref; 3660 hammer2_dev_t *hmp; 3661 hammer2_chain_t *parent; 3662 3663 /* 3664 * WARNING! We should never resolve DATA to device buffers 3665 * (XXX allow it if the caller did?), and since 3666 * we currently do not have the logical buffer cache 3667 * buffer in-hand to fix its cached physical offset 3668 * we also force the modify code to not COW it. XXX 3669 * 3670 * NOTE! We allow error'd chains to be renamed. The bref itself 3671 * is good and can be renamed. The content, however, may 3672 * be inaccessible. 3673 */ 3674 hmp = chain->hmp; 3675 KKASSERT(chain->parent == NULL); 3676 /*KKASSERT(chain->error == 0); allow */ 3677 bref = &chain->bref; 3678 3679 /* 3680 * If parent is not NULL the duplicated chain will be entered under 3681 * the parent and the UPDATE bit set to tell flush to update 3682 * the blockref. 3683 * 3684 * We must setflush(parent) to ensure that it recurses through to 3685 * chain. setflush(chain) might not work because ONFLUSH is possibly 3686 * already set in the chain (so it won't recurse up to set it in the 3687 * parent). 3688 * 3689 * Having both chains locked is extremely important for atomicy. 3690 */ 3691 if (parentp && (parent = *parentp) != NULL) { 3692 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3693 KKASSERT(parent->refs > 0); 3694 KKASSERT(parent->error == 0); 3695 3696 hammer2_chain_create(parentp, &chain, NULL, chain->pmp, 3697 HAMMER2_METH_DEFAULT, 3698 bref->key, bref->keybits, bref->type, 3699 chain->bytes, mtid, 0, flags); 3700 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 3701 hammer2_chain_setflush(*parentp); 3702 } 3703 } 3704 3705 /* 3706 * This works in tandem with delete_obref() to install a blockref in 3707 * (typically) an indirect block that is associated with the chain being 3708 * moved to *parentp. 3709 * 3710 * The reason we need this function is that the caller needs to maintain 3711 * the blockref as it was, and not generate a new blockref for what might 3712 * be a modified chain. Otherwise stuff will leak into the flush that 3713 * the flush code's FLUSH_INODE_STOP flag is unable to catch. 3714 * 3715 * It is EXTREMELY important that we properly set CHAIN_BLKMAPUPD and 3716 * CHAIN_UPDATE. We must set BLKMAPUPD if the bref does not match, and 3717 * we must clear CHAIN_UPDATE (that was likely set by the chain_rename) if 3718 * it does. Otherwise we can end up in a situation where H2 is unable to 3719 * clean up the in-memory chain topology. 3720 * 3721 * The reason for this is that flushes do not generally flush through 3722 * BREF_TYPE_INODE chains and depend on a hammer2_inode_t queued to syncq 3723 * or sideq to properly flush and dispose of the related inode chain's flags. 3724 * Situations where the inode is not actually modified by the frontend, 3725 * but where we have to move the related chains around as we insert or cleanup 3726 * indirect blocks, can leave us with a 'dirty' (non-disposable) in-memory 3727 * inode chain that does not have a hammer2_inode_t associated with it. 3728 */ 3729 static void 3730 hammer2_chain_rename_obref(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3731 hammer2_tid_t mtid, int flags, 3732 hammer2_blockref_t *obref) 3733 { 3734 hammer2_chain_rename(parentp, chain, mtid, flags); 3735 3736 if (obref->type != HAMMER2_BREF_TYPE_EMPTY) { 3737 hammer2_blockref_t *tbase; 3738 int tcount; 3739 3740 KKASSERT((chain->flags & HAMMER2_CHAIN_BLKMAPPED) == 0); 3741 hammer2_chain_modify(*parentp, mtid, 0, 0); 3742 tbase = hammer2_chain_base_and_count(*parentp, &tcount); 3743 hammer2_base_insert(*parentp, tbase, tcount, chain, obref); 3744 if (bcmp(obref, &chain->bref, sizeof(chain->bref))) { 3745 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPUPD | 3746 HAMMER2_CHAIN_UPDATE); 3747 } else { 3748 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3749 } 3750 } 3751 } 3752 3753 /* 3754 * Helper function for deleting chains. 3755 * 3756 * The chain is removed from the live view (the RBTREE) as well as the parent's 3757 * blockmap. Both chain and its parent must be locked. 3758 * 3759 * parent may not be errored. chain can be errored. 3760 */ 3761 static int 3762 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 3763 hammer2_tid_t mtid, int flags, 3764 hammer2_blockref_t *obref) 3765 { 3766 hammer2_dev_t *hmp; 3767 int error = 0; 3768 3769 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0); 3770 KKASSERT(chain->parent == parent); 3771 hmp = chain->hmp; 3772 3773 if (chain->flags & HAMMER2_CHAIN_BLKMAPPED) { 3774 /* 3775 * Chain is blockmapped, so there must be a parent. 3776 * Atomically remove the chain from the parent and remove 3777 * the blockmap entry. The parent must be set modified 3778 * to remove the blockmap entry. 3779 */ 3780 hammer2_blockref_t *base; 3781 int count; 3782 3783 KKASSERT(parent != NULL); 3784 KKASSERT(parent->error == 0); 3785 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3786 error = hammer2_chain_modify(parent, mtid, 0, 0); 3787 if (error) 3788 goto done; 3789 3790 /* 3791 * Calculate blockmap pointer 3792 */ 3793 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 3794 hammer2_spin_ex(&chain->core.spin); 3795 hammer2_spin_ex(&parent->core.spin); 3796 3797 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3798 atomic_add_int(&parent->core.live_count, -1); 3799 ++parent->core.generation; 3800 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3801 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3802 --parent->core.chain_count; 3803 chain->parent = NULL; 3804 3805 switch(parent->bref.type) { 3806 case HAMMER2_BREF_TYPE_INODE: 3807 /* 3808 * Access the inode's block array. However, there 3809 * is no block array if the inode is flagged 3810 * DIRECTDATA. 3811 */ 3812 if (parent->data && 3813 (parent->data->ipdata.meta.op_flags & 3814 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 3815 base = 3816 &parent->data->ipdata.u.blockset.blockref[0]; 3817 } else { 3818 base = NULL; 3819 } 3820 count = HAMMER2_SET_COUNT; 3821 break; 3822 case HAMMER2_BREF_TYPE_INDIRECT: 3823 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3824 if (parent->data) 3825 base = &parent->data->npdata[0]; 3826 else 3827 base = NULL; 3828 count = parent->bytes / sizeof(hammer2_blockref_t); 3829 break; 3830 case HAMMER2_BREF_TYPE_VOLUME: 3831 base = &parent->data->voldata. 3832 sroot_blockset.blockref[0]; 3833 count = HAMMER2_SET_COUNT; 3834 break; 3835 case HAMMER2_BREF_TYPE_FREEMAP: 3836 base = &parent->data->blkset.blockref[0]; 3837 count = HAMMER2_SET_COUNT; 3838 break; 3839 default: 3840 base = NULL; 3841 count = 0; 3842 panic("_hammer2_chain_delete_helper: " 3843 "unrecognized blockref type: %d", 3844 parent->bref.type); 3845 break; 3846 } 3847 3848 /* 3849 * delete blockmapped chain from its parent. 3850 * 3851 * The parent is not affected by any statistics in chain 3852 * which are pending synchronization. That is, there is 3853 * nothing to undo in the parent since they have not yet 3854 * been incorporated into the parent. 3855 * 3856 * The parent is affected by statistics stored in inodes. 3857 * Those have already been synchronized, so they must be 3858 * undone. XXX split update possible w/delete in middle? 3859 */ 3860 if (base) { 3861 hammer2_base_delete(parent, base, count, chain, obref); 3862 } 3863 hammer2_spin_unex(&parent->core.spin); 3864 hammer2_spin_unex(&chain->core.spin); 3865 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 3866 /* 3867 * Chain is not blockmapped but a parent is present. 3868 * Atomically remove the chain from the parent. There is 3869 * no blockmap entry to remove. 3870 * 3871 * Because chain was associated with a parent but not 3872 * synchronized, the chain's *_count_up fields contain 3873 * inode adjustment statistics which must be undone. 3874 */ 3875 hammer2_spin_ex(&chain->core.spin); 3876 hammer2_spin_ex(&parent->core.spin); 3877 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3878 atomic_add_int(&parent->core.live_count, -1); 3879 ++parent->core.generation; 3880 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3881 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3882 --parent->core.chain_count; 3883 chain->parent = NULL; 3884 hammer2_spin_unex(&parent->core.spin); 3885 hammer2_spin_unex(&chain->core.spin); 3886 } else { 3887 /* 3888 * Chain is not blockmapped and has no parent. This 3889 * is a degenerate case. 3890 */ 3891 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3892 } 3893 done: 3894 return error; 3895 } 3896 3897 /* 3898 * Create an indirect block that covers one or more of the elements in the 3899 * current parent. Either returns the existing parent with no locking or 3900 * ref changes or returns the new indirect block locked and referenced 3901 * and leaving the original parent lock/ref intact as well. 3902 * 3903 * If an error occurs, NULL is returned and *errorp is set to the H2 error. 3904 * 3905 * The returned chain depends on where the specified key falls. 3906 * 3907 * The key/keybits for the indirect mode only needs to follow three rules: 3908 * 3909 * (1) That all elements underneath it fit within its key space and 3910 * 3911 * (2) That all elements outside it are outside its key space. 3912 * 3913 * (3) When creating the new indirect block any elements in the current 3914 * parent that fit within the new indirect block's keyspace must be 3915 * moved into the new indirect block. 3916 * 3917 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3918 * keyspace the the current parent, but lookup/iteration rules will 3919 * ensure (and must ensure) that rule (2) for all parents leading up 3920 * to the nearest inode or the root volume header is adhered to. This 3921 * is accomplished by always recursing through matching keyspaces in 3922 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3923 * 3924 * The current implementation calculates the current worst-case keyspace by 3925 * iterating the current parent and then divides it into two halves, choosing 3926 * whichever half has the most elements (not necessarily the half containing 3927 * the requested key). 3928 * 3929 * We can also opt to use the half with the least number of elements. This 3930 * causes lower-numbered keys (aka logical file offsets) to recurse through 3931 * fewer indirect blocks and higher-numbered keys to recurse through more. 3932 * This also has the risk of not moving enough elements to the new indirect 3933 * block and being forced to create several indirect blocks before the element 3934 * can be inserted. 3935 * 3936 * Must be called with an exclusively locked parent. 3937 * 3938 * NOTE: *errorp set to HAMMER_ERROR_* flags 3939 */ 3940 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3941 hammer2_key_t *keyp, int keybits, 3942 hammer2_blockref_t *base, int count); 3943 static int hammer2_chain_indkey_file(hammer2_chain_t *parent, 3944 hammer2_key_t *keyp, int keybits, 3945 hammer2_blockref_t *base, int count, 3946 int ncount); 3947 static int hammer2_chain_indkey_dir(hammer2_chain_t *parent, 3948 hammer2_key_t *keyp, int keybits, 3949 hammer2_blockref_t *base, int count, 3950 int ncount); 3951 static 3952 hammer2_chain_t * 3953 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3954 hammer2_key_t create_key, int create_bits, 3955 hammer2_tid_t mtid, int for_type, int *errorp) 3956 { 3957 hammer2_dev_t *hmp; 3958 hammer2_blockref_t *base; 3959 hammer2_blockref_t *bref; 3960 hammer2_blockref_t bsave; 3961 hammer2_blockref_t dummy; 3962 hammer2_chain_t *chain; 3963 hammer2_chain_t *ichain; 3964 hammer2_key_t key = create_key; 3965 hammer2_key_t key_beg; 3966 hammer2_key_t key_end; 3967 hammer2_key_t key_next; 3968 int keybits = create_bits; 3969 int count; 3970 int ncount; 3971 int nbytes; 3972 int loops; 3973 int error; 3974 int reason; 3975 int generation; 3976 int maxloops = 300000; 3977 3978 /* 3979 * Calculate the base blockref pointer or NULL if the chain 3980 * is known to be empty. We need to calculate the array count 3981 * for RB lookups either way. 3982 */ 3983 hmp = parent->hmp; 3984 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3985 3986 /* 3987 * Pre-modify the parent now to avoid having to deal with error 3988 * processing if we tried to later (in the middle of our loop). 3989 * 3990 * We are going to be moving bref's around, the indirect blocks 3991 * cannot be in an initial state. Do not pass MODIFY_OPTDATA. 3992 */ 3993 *errorp = hammer2_chain_modify(parent, mtid, 0, 0); 3994 if (*errorp) { 3995 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 3996 *errorp, hammer2_error_str(*errorp)); 3997 return NULL; 3998 } 3999 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 4000 4001 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 4002 base = hammer2_chain_base_and_count(parent, &count); 4003 4004 /* 4005 * How big should our new indirect block be? It has to be at least 4006 * as large as its parent for splits to work properly. 4007 * 4008 * The freemap uses a specific indirect block size. The number of 4009 * levels are built dynamically and ultimately depend on the size 4010 * volume. Because freemap blocks are taken from the reserved areas 4011 * of the volume our goal is efficiency (fewer levels) and not so 4012 * much to save disk space. 4013 * 4014 * The first indirect block level for a directory usually uses 4015 * HAMMER2_IND_BYTES_MIN (4KB = 32 directory entries). Due to 4016 * the hash mechanism, this typically gives us a nominal 4017 * 32 * 4 entries with one level of indirection. 4018 * 4019 * We use HAMMER2_IND_BYTES_NOM (16KB = 128 blockrefs) for FILE 4020 * indirect blocks. The initial 4 entries in the inode gives us 4021 * 256KB. Up to 4 indirect blocks gives us 32MB. Three levels 4022 * of indirection gives us 137GB, and so forth. H2 can support 4023 * huge file sizes but they are not typical, so we try to stick 4024 * with compactness and do not use a larger indirect block size. 4025 * 4026 * We could use 64KB (PBUFSIZE), giving us 512 blockrefs, but 4027 * due to the way indirect blocks are created this usually winds 4028 * up being extremely inefficient for small files. Even though 4029 * 16KB requires more levels of indirection for very large files, 4030 * the 16KB records can be ganged together into 64KB DIOs. 4031 */ 4032 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 4033 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 4034 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 4035 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4036 if (parent->data->ipdata.meta.type == 4037 HAMMER2_OBJTYPE_DIRECTORY) 4038 nbytes = HAMMER2_IND_BYTES_MIN; /* 4KB = 32 entries */ 4039 else 4040 nbytes = HAMMER2_IND_BYTES_NOM; /* 16KB = ~8MB file */ 4041 4042 } else { 4043 nbytes = HAMMER2_IND_BYTES_NOM; 4044 } 4045 if (nbytes < count * sizeof(hammer2_blockref_t)) { 4046 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 4047 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 4048 nbytes = count * sizeof(hammer2_blockref_t); 4049 } 4050 ncount = nbytes / sizeof(hammer2_blockref_t); 4051 4052 /* 4053 * When creating an indirect block for a freemap node or leaf 4054 * the key/keybits must be fitted to static radix levels because 4055 * particular radix levels use particular reserved blocks in the 4056 * related zone. 4057 * 4058 * This routine calculates the key/radix of the indirect block 4059 * we need to create, and whether it is on the high-side or the 4060 * low-side. 4061 */ 4062 switch(for_type) { 4063 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 4064 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 4065 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 4066 base, count); 4067 break; 4068 case HAMMER2_BREF_TYPE_DATA: 4069 keybits = hammer2_chain_indkey_file(parent, &key, keybits, 4070 base, count, ncount); 4071 break; 4072 case HAMMER2_BREF_TYPE_DIRENT: 4073 case HAMMER2_BREF_TYPE_INODE: 4074 keybits = hammer2_chain_indkey_dir(parent, &key, keybits, 4075 base, count, ncount); 4076 break; 4077 default: 4078 panic("illegal indirect block for bref type %d", for_type); 4079 break; 4080 } 4081 4082 /* 4083 * Normalize the key for the radix being represented, keeping the 4084 * high bits and throwing away the low bits. 4085 */ 4086 key &= ~(((hammer2_key_t)1 << keybits) - 1); 4087 4088 /* 4089 * Ok, create our new indirect block 4090 */ 4091 bzero(&dummy, sizeof(dummy)); 4092 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 4093 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 4094 dummy.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 4095 } else { 4096 dummy.type = HAMMER2_BREF_TYPE_INDIRECT; 4097 } 4098 dummy.key = key; 4099 dummy.keybits = keybits; 4100 dummy.data_off = hammer2_getradix(nbytes); 4101 dummy.methods = 4102 HAMMER2_ENC_CHECK(HAMMER2_DEC_CHECK(parent->bref.methods)) | 4103 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 4104 4105 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy); 4106 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 4107 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 4108 /* ichain has one ref at this point */ 4109 4110 /* 4111 * We have to mark it modified to allocate its block, but use 4112 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 4113 * it won't be acted upon by the flush code. 4114 * 4115 * XXX remove OPTDATA, we need a fully initialized indirect block to 4116 * be able to move the original blockref. 4117 */ 4118 *errorp = hammer2_chain_modify(ichain, mtid, 0, 0); 4119 if (*errorp) { 4120 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 4121 *errorp, hammer2_error_str(*errorp)); 4122 hammer2_chain_unlock(ichain); 4123 hammer2_chain_drop(ichain); 4124 return NULL; 4125 } 4126 KKASSERT((ichain->flags & HAMMER2_CHAIN_INITIAL) == 0); 4127 4128 /* 4129 * Iterate the original parent and move the matching brefs into 4130 * the new indirect block. 4131 * 4132 * XXX handle flushes. 4133 */ 4134 key_beg = 0; 4135 key_end = HAMMER2_KEY_MAX; 4136 key_next = 0; /* avoid gcc warnings */ 4137 hammer2_spin_ex(&parent->core.spin); 4138 loops = 0; 4139 reason = 0; 4140 4141 for (;;) { 4142 /* 4143 * Parent may have been modified, relocating its block array. 4144 * Reload the base pointer. 4145 */ 4146 base = hammer2_chain_base_and_count(parent, &count); 4147 4148 if (++loops > 100000) { 4149 hammer2_spin_unex(&parent->core.spin); 4150 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 4151 reason, parent, base, count, key_next); 4152 } 4153 4154 /* 4155 * NOTE: spinlock stays intact, returned chain (if not NULL) 4156 * is not referenced or locked which means that we 4157 * cannot safely check its flagged / deletion status 4158 * until we lock it. 4159 */ 4160 chain = hammer2_combined_find(parent, base, count, 4161 &key_next, 4162 key_beg, key_end, 4163 &bref); 4164 generation = parent->core.generation; 4165 if (bref == NULL) 4166 break; 4167 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4168 4169 /* 4170 * Skip keys that are not within the key/radix of the new 4171 * indirect block. They stay in the parent. 4172 */ 4173 if (rounddown2(key ^ bref->key, (hammer2_key_t)1 << keybits) != 0) { 4174 goto next_key_spinlocked; 4175 } 4176 4177 /* 4178 * Load the new indirect block by acquiring the related 4179 * chains (potentially from media as it might not be 4180 * in-memory). Then move it to the new parent (ichain). 4181 * 4182 * chain is referenced but not locked. We must lock the 4183 * chain to obtain definitive state. 4184 */ 4185 bsave = *bref; 4186 if (chain) { 4187 /* 4188 * Use chain already present in the RBTREE 4189 */ 4190 hammer2_chain_ref(chain); 4191 hammer2_spin_unex(&parent->core.spin); 4192 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 4193 } else { 4194 /* 4195 * Get chain for blockref element. _get returns NULL 4196 * on insertion race. 4197 */ 4198 hammer2_spin_unex(&parent->core.spin); 4199 chain = hammer2_chain_get(parent, generation, &bsave, 4200 HAMMER2_RESOLVE_NEVER); 4201 if (chain == NULL) { 4202 reason = 1; 4203 hammer2_spin_ex(&parent->core.spin); 4204 continue; 4205 } 4206 } 4207 4208 /* 4209 * This is always live so if the chain has been deleted 4210 * we raced someone and we have to retry. 4211 * 4212 * NOTE: Lookups can race delete-duplicate because 4213 * delete-duplicate does not lock the parent's core 4214 * (they just use the spinlock on the core). 4215 * 4216 * (note reversed logic for this one) 4217 */ 4218 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 4219 chain->parent != parent || 4220 (chain->flags & HAMMER2_CHAIN_DELETED)) { 4221 hammer2_chain_unlock(chain); 4222 hammer2_chain_drop(chain); 4223 if (hammer2_debug & 0x0040) { 4224 kprintf("LOST PARENT RETRY " 4225 "RETRY (%p,%p)->%p %08x\n", 4226 parent, chain->parent, chain, chain->flags); 4227 } 4228 hammer2_spin_ex(&parent->core.spin); 4229 continue; 4230 } 4231 4232 /* 4233 * Shift the chain to the indirect block. 4234 * 4235 * WARNING! No reason for us to load chain data, pass NOSTATS 4236 * to prevent delete/insert from trying to access 4237 * inode stats (and thus asserting if there is no 4238 * chain->data loaded). 4239 * 4240 * WARNING! The (parent, chain) deletion may modify the parent 4241 * and invalidate the base pointer. 4242 * 4243 * WARNING! Parent must already be marked modified, so we 4244 * can assume that chain_delete always suceeds. 4245 * 4246 * WARNING! hammer2_chain_repchange() does not have to be 4247 * called (and doesn't work anyway because we are 4248 * only doing a partial shift). A recursion that is 4249 * in-progress can continue at the current parent 4250 * and will be able to properly find its next key. 4251 */ 4252 error = hammer2_chain_delete_obref(parent, chain, mtid, 0, 4253 &bsave); 4254 KKASSERT(error == 0); 4255 hammer2_chain_rename_obref(&ichain, chain, mtid, 0, &bsave); 4256 hammer2_chain_unlock(chain); 4257 hammer2_chain_drop(chain); 4258 KKASSERT(parent->refs > 0); 4259 chain = NULL; 4260 base = NULL; /* safety */ 4261 hammer2_spin_ex(&parent->core.spin); 4262 next_key_spinlocked: 4263 if (--maxloops == 0) 4264 panic("hammer2_chain_create_indirect: maxloops"); 4265 reason = 4; 4266 if (key_next == 0 || key_next > key_end) 4267 break; 4268 key_beg = key_next; 4269 /* loop */ 4270 } 4271 hammer2_spin_unex(&parent->core.spin); 4272 4273 /* 4274 * Insert the new indirect block into the parent now that we've 4275 * cleared out some entries in the parent. We calculated a good 4276 * insertion index in the loop above (ichain->index). 4277 * 4278 * We don't have to set UPDATE here because we mark ichain 4279 * modified down below (so the normal modified -> flush -> set-moved 4280 * sequence applies). 4281 * 4282 * The insertion shouldn't race as this is a completely new block 4283 * and the parent is locked. 4284 */ 4285 base = NULL; /* safety, parent modify may change address */ 4286 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 4287 KKASSERT(parent->core.live_count < count); 4288 hammer2_chain_insert(parent, ichain, 4289 HAMMER2_CHAIN_INSERT_SPIN | 4290 HAMMER2_CHAIN_INSERT_LIVE, 4291 0); 4292 4293 /* 4294 * Make sure flushes propogate after our manual insertion. 4295 */ 4296 hammer2_chain_setflush(ichain); 4297 hammer2_chain_setflush(parent); 4298 4299 /* 4300 * Figure out what to return. 4301 */ 4302 if (rounddown2(create_key ^ key, (hammer2_key_t)1 << keybits)) { 4303 /* 4304 * Key being created is outside the key range, 4305 * return the original parent. 4306 */ 4307 hammer2_chain_unlock(ichain); 4308 hammer2_chain_drop(ichain); 4309 } else { 4310 /* 4311 * Otherwise its in the range, return the new parent. 4312 * (leave both the new and old parent locked). 4313 */ 4314 parent = ichain; 4315 } 4316 4317 return(parent); 4318 } 4319 4320 /* 4321 * Do maintenance on an indirect chain. Both parent and chain are locked. 4322 * 4323 * Returns non-zero if (chain) is deleted, either due to being empty or 4324 * because its children were safely moved into the parent. 4325 */ 4326 int 4327 hammer2_chain_indirect_maintenance(hammer2_chain_t *parent, 4328 hammer2_chain_t *chain) 4329 { 4330 hammer2_blockref_t *chain_base; 4331 hammer2_blockref_t *base; 4332 hammer2_blockref_t *bref; 4333 hammer2_blockref_t bsave; 4334 hammer2_key_t key_next; 4335 hammer2_key_t key_beg; 4336 hammer2_key_t key_end; 4337 hammer2_chain_t *sub; 4338 int chain_count; 4339 int count; 4340 int error; 4341 int generation; 4342 4343 /* 4344 * Make sure we have an accurate live_count 4345 */ 4346 if ((chain->flags & (HAMMER2_CHAIN_INITIAL | 4347 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4348 base = &chain->data->npdata[0]; 4349 count = chain->bytes / sizeof(hammer2_blockref_t); 4350 hammer2_chain_countbrefs(chain, base, count); 4351 } 4352 4353 /* 4354 * If the indirect block is empty we can delete it. 4355 * (ignore deletion error) 4356 */ 4357 if (chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)) { 4358 hammer2_chain_delete(parent, chain, 4359 chain->bref.modify_tid, 4360 HAMMER2_DELETE_PERMANENT); 4361 hammer2_chain_repchange(parent, chain); 4362 return 1; 4363 } 4364 4365 base = hammer2_chain_base_and_count(parent, &count); 4366 4367 if ((parent->flags & (HAMMER2_CHAIN_INITIAL | 4368 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4369 hammer2_chain_countbrefs(parent, base, count); 4370 } 4371 4372 /* 4373 * Determine if we can collapse chain into parent, calculate 4374 * hysteresis for chain emptiness. 4375 */ 4376 if (parent->core.live_count + chain->core.live_count - 1 > count) 4377 return 0; 4378 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4379 if (chain->core.live_count > chain_count * 3 / 4) 4380 return 0; 4381 4382 /* 4383 * Ok, theoretically we can collapse chain's contents into 4384 * parent. chain is locked, but any in-memory children of chain 4385 * are not. For this to work, we must be able to dispose of any 4386 * in-memory children of chain. 4387 * 4388 * For now require that there are no in-memory children of chain. 4389 * 4390 * WARNING! Both chain and parent must remain locked across this 4391 * entire operation. 4392 */ 4393 4394 /* 4395 * Parent must be marked modified. Don't try to collapse it if we 4396 * can't mark it modified. Once modified, destroy chain to make room 4397 * and to get rid of what will be a conflicting key (this is included 4398 * in the calculation above). Finally, move the children of chain 4399 * into chain's parent. 4400 * 4401 * This order creates an accounting problem for bref.embed.stats 4402 * because we destroy chain before we remove its children. Any 4403 * elements whos blockref is already synchronized will be counted 4404 * twice. To deal with the problem we clean out chain's stats prior 4405 * to deleting it. 4406 */ 4407 error = hammer2_chain_modify(parent, 0, 0, 0); 4408 if (error) { 4409 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4410 hammer2_error_str(error)); 4411 return 0; 4412 } 4413 error = hammer2_chain_modify(chain, chain->bref.modify_tid, 0, 0); 4414 if (error) { 4415 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4416 hammer2_error_str(error)); 4417 return 0; 4418 } 4419 4420 chain->bref.embed.stats.inode_count = 0; 4421 chain->bref.embed.stats.data_count = 0; 4422 error = hammer2_chain_delete(parent, chain, 4423 chain->bref.modify_tid, 4424 HAMMER2_DELETE_PERMANENT); 4425 KKASSERT(error == 0); 4426 4427 /* 4428 * The combined_find call requires core.spin to be held. One would 4429 * think there wouldn't be any conflicts since we hold chain 4430 * exclusively locked, but the caching mechanism for 0-ref children 4431 * does not require a chain lock. 4432 */ 4433 hammer2_spin_ex(&chain->core.spin); 4434 4435 key_next = 0; 4436 key_beg = 0; 4437 key_end = HAMMER2_KEY_MAX; 4438 for (;;) { 4439 chain_base = &chain->data->npdata[0]; 4440 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4441 sub = hammer2_combined_find(chain, chain_base, chain_count, 4442 &key_next, 4443 key_beg, key_end, 4444 &bref); 4445 generation = chain->core.generation; 4446 if (bref == NULL) 4447 break; 4448 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4449 4450 bsave = *bref; 4451 if (sub) { 4452 hammer2_chain_ref(sub); 4453 hammer2_spin_unex(&chain->core.spin); 4454 hammer2_chain_lock(sub, HAMMER2_RESOLVE_NEVER); 4455 } else { 4456 hammer2_spin_unex(&chain->core.spin); 4457 sub = hammer2_chain_get(chain, generation, &bsave, 4458 HAMMER2_RESOLVE_NEVER); 4459 if (sub == NULL) { 4460 hammer2_spin_ex(&chain->core.spin); 4461 continue; 4462 } 4463 } 4464 if (bcmp(&bsave, &sub->bref, sizeof(bsave)) || 4465 sub->parent != chain || 4466 (sub->flags & HAMMER2_CHAIN_DELETED)) { 4467 hammer2_chain_unlock(sub); 4468 hammer2_chain_drop(sub); 4469 hammer2_spin_ex(&chain->core.spin); 4470 sub = NULL; /* safety */ 4471 continue; 4472 } 4473 error = hammer2_chain_delete_obref(chain, sub, 4474 sub->bref.modify_tid, 0, 4475 &bsave); 4476 KKASSERT(error == 0); 4477 hammer2_chain_rename_obref(&parent, sub, 4478 sub->bref.modify_tid, 4479 HAMMER2_INSERT_SAMEPARENT, &bsave); 4480 hammer2_chain_unlock(sub); 4481 hammer2_chain_drop(sub); 4482 hammer2_spin_ex(&chain->core.spin); 4483 4484 if (key_next == 0) 4485 break; 4486 key_beg = key_next; 4487 } 4488 hammer2_spin_unex(&chain->core.spin); 4489 4490 hammer2_chain_repchange(parent, chain); 4491 4492 return 1; 4493 } 4494 4495 /* 4496 * Freemap indirect blocks 4497 * 4498 * Calculate the keybits and highside/lowside of the freemap node the 4499 * caller is creating. 4500 * 4501 * This routine will specify the next higher-level freemap key/radix 4502 * representing the lowest-ordered set. By doing so, eventually all 4503 * low-ordered sets will be moved one level down. 4504 * 4505 * We have to be careful here because the freemap reserves a limited 4506 * number of blocks for a limited number of levels. So we can't just 4507 * push indiscriminately. 4508 */ 4509 int 4510 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 4511 int keybits, hammer2_blockref_t *base, int count) 4512 { 4513 hammer2_chain_t *chain; 4514 hammer2_blockref_t *bref; 4515 hammer2_key_t key; 4516 hammer2_key_t key_beg; 4517 hammer2_key_t key_end; 4518 hammer2_key_t key_next; 4519 int locount; 4520 int hicount; 4521 int maxloops = 300000; 4522 4523 key = *keyp; 4524 locount = 0; 4525 hicount = 0; 4526 keybits = 64; 4527 4528 /* 4529 * Calculate the range of keys in the array being careful to skip 4530 * slots which are overridden with a deletion. 4531 */ 4532 key_beg = 0; 4533 key_end = HAMMER2_KEY_MAX; 4534 hammer2_spin_ex(&parent->core.spin); 4535 4536 for (;;) { 4537 if (--maxloops == 0) { 4538 panic("indkey_freemap shit %p %p:%d\n", 4539 parent, base, count); 4540 } 4541 chain = hammer2_combined_find(parent, base, count, 4542 &key_next, 4543 key_beg, key_end, 4544 &bref); 4545 4546 /* 4547 * Exhausted search 4548 */ 4549 if (bref == NULL) 4550 break; 4551 4552 /* 4553 * Skip deleted chains. 4554 */ 4555 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4556 if (key_next == 0 || key_next > key_end) 4557 break; 4558 key_beg = key_next; 4559 continue; 4560 } 4561 4562 /* 4563 * Use the full live (not deleted) element for the scan 4564 * iteration. HAMMER2 does not allow partial replacements. 4565 * 4566 * XXX should be built into hammer2_combined_find(). 4567 */ 4568 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4569 4570 if (keybits > bref->keybits) { 4571 key = bref->key; 4572 keybits = bref->keybits; 4573 } else if (keybits == bref->keybits && bref->key < key) { 4574 key = bref->key; 4575 } 4576 if (key_next == 0) 4577 break; 4578 key_beg = key_next; 4579 } 4580 hammer2_spin_unex(&parent->core.spin); 4581 4582 /* 4583 * Return the keybits for a higher-level FREEMAP_NODE covering 4584 * this node. 4585 */ 4586 switch(keybits) { 4587 case HAMMER2_FREEMAP_LEVEL0_RADIX: 4588 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 4589 break; 4590 case HAMMER2_FREEMAP_LEVEL1_RADIX: 4591 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 4592 break; 4593 case HAMMER2_FREEMAP_LEVEL2_RADIX: 4594 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 4595 break; 4596 case HAMMER2_FREEMAP_LEVEL3_RADIX: 4597 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 4598 break; 4599 case HAMMER2_FREEMAP_LEVEL4_RADIX: 4600 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 4601 break; 4602 case HAMMER2_FREEMAP_LEVEL5_RADIX: 4603 panic("hammer2_chain_indkey_freemap: level too high"); 4604 break; 4605 default: 4606 panic("hammer2_chain_indkey_freemap: bad radix"); 4607 break; 4608 } 4609 *keyp = key; 4610 4611 return (keybits); 4612 } 4613 4614 /* 4615 * File indirect blocks 4616 * 4617 * Calculate the key/keybits for the indirect block to create by scanning 4618 * existing keys. The key being created is also passed in *keyp and can be 4619 * inside or outside the indirect block. Regardless, the indirect block 4620 * must hold at least two keys in order to guarantee sufficient space. 4621 * 4622 * We use a modified version of the freemap's fixed radix tree, but taylored 4623 * for file data. Basically we configure an indirect block encompassing the 4624 * smallest key. 4625 */ 4626 static int 4627 hammer2_chain_indkey_file(hammer2_chain_t *parent, hammer2_key_t *keyp, 4628 int keybits, hammer2_blockref_t *base, int count, 4629 int ncount) 4630 { 4631 hammer2_chain_t *chain; 4632 hammer2_blockref_t *bref; 4633 hammer2_key_t key; 4634 hammer2_key_t key_beg; 4635 hammer2_key_t key_end; 4636 hammer2_key_t key_next; 4637 int nradix; 4638 int locount; 4639 int hicount; 4640 int maxloops = 300000; 4641 4642 key = *keyp; 4643 locount = 0; 4644 hicount = 0; 4645 keybits = 64; 4646 4647 /* 4648 * Calculate the range of keys in the array being careful to skip 4649 * slots which are overridden with a deletion. 4650 * 4651 * Locate the smallest key. 4652 */ 4653 key_beg = 0; 4654 key_end = HAMMER2_KEY_MAX; 4655 hammer2_spin_ex(&parent->core.spin); 4656 4657 for (;;) { 4658 if (--maxloops == 0) { 4659 panic("indkey_freemap shit %p %p:%d\n", 4660 parent, base, count); 4661 } 4662 chain = hammer2_combined_find(parent, base, count, 4663 &key_next, 4664 key_beg, key_end, 4665 &bref); 4666 4667 /* 4668 * Exhausted search 4669 */ 4670 if (bref == NULL) 4671 break; 4672 4673 /* 4674 * Skip deleted chains. 4675 */ 4676 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4677 if (key_next == 0 || key_next > key_end) 4678 break; 4679 key_beg = key_next; 4680 continue; 4681 } 4682 4683 /* 4684 * Use the full live (not deleted) element for the scan 4685 * iteration. HAMMER2 does not allow partial replacements. 4686 * 4687 * XXX should be built into hammer2_combined_find(). 4688 */ 4689 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4690 4691 if (keybits > bref->keybits) { 4692 key = bref->key; 4693 keybits = bref->keybits; 4694 } else if (keybits == bref->keybits && bref->key < key) { 4695 key = bref->key; 4696 } 4697 if (key_next == 0) 4698 break; 4699 key_beg = key_next; 4700 } 4701 hammer2_spin_unex(&parent->core.spin); 4702 4703 /* 4704 * Calculate the static keybits for a higher-level indirect block 4705 * that contains the key. 4706 */ 4707 *keyp = key; 4708 4709 switch(ncount) { 4710 case HAMMER2_IND_COUNT_MIN: 4711 nradix = HAMMER2_IND_RADIX_MIN - HAMMER2_BLOCKREF_RADIX; 4712 break; 4713 case HAMMER2_IND_COUNT_NOM: 4714 nradix = HAMMER2_IND_RADIX_NOM - HAMMER2_BLOCKREF_RADIX; 4715 break; 4716 case HAMMER2_IND_COUNT_MAX: 4717 nradix = HAMMER2_IND_RADIX_MAX - HAMMER2_BLOCKREF_RADIX; 4718 break; 4719 default: 4720 panic("bad ncount %d\n", ncount); 4721 nradix = 0; 4722 break; 4723 } 4724 4725 /* 4726 * The largest radix that can be returned for an indirect block is 4727 * 63 bits. (The largest practical indirect block radix is actually 4728 * 62 bits because the top-level inode or volume root contains four 4729 * entries, but allow 63 to be returned). 4730 */ 4731 if (nradix >= 64) 4732 nradix = 63; 4733 4734 return keybits + nradix; 4735 } 4736 4737 #if 1 4738 4739 /* 4740 * Directory indirect blocks. 4741 * 4742 * Covers both the inode index (directory of inodes), and directory contents 4743 * (filenames hardlinked to inodes). 4744 * 4745 * Because directory keys are hashed we generally try to cut the space in 4746 * half. We accomodate the inode index (which tends to have linearly 4747 * increasing inode numbers) by ensuring that the keyspace is at least large 4748 * enough to fill up the indirect block being created. 4749 */ 4750 static int 4751 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4752 int keybits, hammer2_blockref_t *base, int count, 4753 int ncount) 4754 { 4755 hammer2_blockref_t *bref; 4756 hammer2_chain_t *chain; 4757 hammer2_key_t key_beg; 4758 hammer2_key_t key_end; 4759 hammer2_key_t key_next; 4760 hammer2_key_t key; 4761 int nkeybits; 4762 int locount; 4763 int hicount; 4764 int maxloops = 300000; 4765 4766 /* 4767 * NOTE: We can't take a shortcut here anymore for inodes because 4768 * the root directory can contain a mix of inodes and directory 4769 * entries (we used to just return 63 if parent->bref.type was 4770 * HAMMER2_BREF_TYPE_INODE. 4771 */ 4772 key = *keyp; 4773 locount = 0; 4774 hicount = 0; 4775 4776 /* 4777 * Calculate the range of keys in the array being careful to skip 4778 * slots which are overridden with a deletion. 4779 */ 4780 key_beg = 0; 4781 key_end = HAMMER2_KEY_MAX; 4782 hammer2_spin_ex(&parent->core.spin); 4783 4784 for (;;) { 4785 if (--maxloops == 0) { 4786 panic("indkey_freemap shit %p %p:%d\n", 4787 parent, base, count); 4788 } 4789 chain = hammer2_combined_find(parent, base, count, 4790 &key_next, 4791 key_beg, key_end, 4792 &bref); 4793 4794 /* 4795 * Exhausted search 4796 */ 4797 if (bref == NULL) 4798 break; 4799 4800 /* 4801 * Deleted object 4802 */ 4803 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4804 if (key_next == 0 || key_next > key_end) 4805 break; 4806 key_beg = key_next; 4807 continue; 4808 } 4809 4810 /* 4811 * Use the full live (not deleted) element for the scan 4812 * iteration. HAMMER2 does not allow partial replacements. 4813 * 4814 * XXX should be built into hammer2_combined_find(). 4815 */ 4816 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4817 4818 /* 4819 * Expand our calculated key range (key, keybits) to fit 4820 * the scanned key. nkeybits represents the full range 4821 * that we will later cut in half (two halves @ nkeybits - 1). 4822 */ 4823 nkeybits = keybits; 4824 if (nkeybits < bref->keybits) { 4825 if (bref->keybits > 64) { 4826 kprintf("bad bref chain %p bref %p\n", 4827 chain, bref); 4828 Debugger("fubar"); 4829 } 4830 nkeybits = bref->keybits; 4831 } 4832 while (nkeybits < 64 && 4833 rounddown2(key ^ bref->key, (hammer2_key_t)1 << nkeybits) != 0) { 4834 ++nkeybits; 4835 } 4836 4837 /* 4838 * If the new key range is larger we have to determine 4839 * which side of the new key range the existing keys fall 4840 * under by checking the high bit, then collapsing the 4841 * locount into the hicount or vise-versa. 4842 */ 4843 if (keybits != nkeybits) { 4844 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4845 hicount += locount; 4846 locount = 0; 4847 } else { 4848 locount += hicount; 4849 hicount = 0; 4850 } 4851 keybits = nkeybits; 4852 } 4853 4854 /* 4855 * The newly scanned key will be in the lower half or the 4856 * upper half of the (new) key range. 4857 */ 4858 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4859 ++hicount; 4860 else 4861 ++locount; 4862 4863 if (key_next == 0) 4864 break; 4865 key_beg = key_next; 4866 } 4867 hammer2_spin_unex(&parent->core.spin); 4868 bref = NULL; /* now invalid (safety) */ 4869 4870 /* 4871 * Adjust keybits to represent half of the full range calculated 4872 * above (radix 63 max) for our new indirect block. 4873 */ 4874 --keybits; 4875 4876 /* 4877 * Expand keybits to hold at least ncount elements. ncount will be 4878 * a power of 2. This is to try to completely fill leaf nodes (at 4879 * least for keys which are not hashes). 4880 * 4881 * We aren't counting 'in' or 'out', we are counting 'high side' 4882 * and 'low side' based on the bit at (1LL << keybits). We want 4883 * everything to be inside in these cases so shift it all to 4884 * the low or high side depending on the new high bit. 4885 */ 4886 while (((hammer2_key_t)1 << keybits) < ncount) { 4887 ++keybits; 4888 if (key & ((hammer2_key_t)1 << keybits)) { 4889 hicount += locount; 4890 locount = 0; 4891 } else { 4892 locount += hicount; 4893 hicount = 0; 4894 } 4895 } 4896 4897 if (hicount > locount) 4898 key |= (hammer2_key_t)1 << keybits; 4899 else 4900 key &= ~(hammer2_key_t)1 << keybits; 4901 4902 *keyp = key; 4903 4904 return (keybits); 4905 } 4906 4907 #else 4908 4909 /* 4910 * Directory indirect blocks. 4911 * 4912 * Covers both the inode index (directory of inodes), and directory contents 4913 * (filenames hardlinked to inodes). 4914 * 4915 * Because directory keys are hashed we generally try to cut the space in 4916 * half. We accomodate the inode index (which tends to have linearly 4917 * increasing inode numbers) by ensuring that the keyspace is at least large 4918 * enough to fill up the indirect block being created. 4919 */ 4920 static int 4921 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4922 int keybits, hammer2_blockref_t *base, int count, 4923 int ncount) 4924 { 4925 hammer2_blockref_t *bref; 4926 hammer2_chain_t *chain; 4927 hammer2_key_t key_beg; 4928 hammer2_key_t key_end; 4929 hammer2_key_t key_next; 4930 hammer2_key_t key; 4931 int nkeybits; 4932 int locount; 4933 int hicount; 4934 int maxloops = 300000; 4935 4936 /* 4937 * Shortcut if the parent is the inode. In this situation the 4938 * parent has 4+1 directory entries and we are creating an indirect 4939 * block capable of holding many more. 4940 */ 4941 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4942 return 63; 4943 } 4944 4945 key = *keyp; 4946 locount = 0; 4947 hicount = 0; 4948 4949 /* 4950 * Calculate the range of keys in the array being careful to skip 4951 * slots which are overridden with a deletion. 4952 */ 4953 key_beg = 0; 4954 key_end = HAMMER2_KEY_MAX; 4955 hammer2_spin_ex(&parent->core.spin); 4956 4957 for (;;) { 4958 if (--maxloops == 0) { 4959 panic("indkey_freemap shit %p %p:%d\n", 4960 parent, base, count); 4961 } 4962 chain = hammer2_combined_find(parent, base, count, 4963 &key_next, 4964 key_beg, key_end, 4965 &bref); 4966 4967 /* 4968 * Exhausted search 4969 */ 4970 if (bref == NULL) 4971 break; 4972 4973 /* 4974 * Deleted object 4975 */ 4976 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4977 if (key_next == 0 || key_next > key_end) 4978 break; 4979 key_beg = key_next; 4980 continue; 4981 } 4982 4983 /* 4984 * Use the full live (not deleted) element for the scan 4985 * iteration. HAMMER2 does not allow partial replacements. 4986 * 4987 * XXX should be built into hammer2_combined_find(). 4988 */ 4989 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4990 4991 /* 4992 * Expand our calculated key range (key, keybits) to fit 4993 * the scanned key. nkeybits represents the full range 4994 * that we will later cut in half (two halves @ nkeybits - 1). 4995 */ 4996 nkeybits = keybits; 4997 if (nkeybits < bref->keybits) { 4998 if (bref->keybits > 64) { 4999 kprintf("bad bref chain %p bref %p\n", 5000 chain, bref); 5001 Debugger("fubar"); 5002 } 5003 nkeybits = bref->keybits; 5004 } 5005 while (nkeybits < 64 && 5006 (~(((hammer2_key_t)1 << nkeybits) - 1) & 5007 (key ^ bref->key)) != 0) { 5008 ++nkeybits; 5009 } 5010 5011 /* 5012 * If the new key range is larger we have to determine 5013 * which side of the new key range the existing keys fall 5014 * under by checking the high bit, then collapsing the 5015 * locount into the hicount or vise-versa. 5016 */ 5017 if (keybits != nkeybits) { 5018 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 5019 hicount += locount; 5020 locount = 0; 5021 } else { 5022 locount += hicount; 5023 hicount = 0; 5024 } 5025 keybits = nkeybits; 5026 } 5027 5028 /* 5029 * The newly scanned key will be in the lower half or the 5030 * upper half of the (new) key range. 5031 */ 5032 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 5033 ++hicount; 5034 else 5035 ++locount; 5036 5037 if (key_next == 0) 5038 break; 5039 key_beg = key_next; 5040 } 5041 hammer2_spin_unex(&parent->core.spin); 5042 bref = NULL; /* now invalid (safety) */ 5043 5044 /* 5045 * Adjust keybits to represent half of the full range calculated 5046 * above (radix 63 max) for our new indirect block. 5047 */ 5048 --keybits; 5049 5050 /* 5051 * Expand keybits to hold at least ncount elements. ncount will be 5052 * a power of 2. This is to try to completely fill leaf nodes (at 5053 * least for keys which are not hashes). 5054 * 5055 * We aren't counting 'in' or 'out', we are counting 'high side' 5056 * and 'low side' based on the bit at (1LL << keybits). We want 5057 * everything to be inside in these cases so shift it all to 5058 * the low or high side depending on the new high bit. 5059 */ 5060 while (((hammer2_key_t)1 << keybits) < ncount) { 5061 ++keybits; 5062 if (key & ((hammer2_key_t)1 << keybits)) { 5063 hicount += locount; 5064 locount = 0; 5065 } else { 5066 locount += hicount; 5067 hicount = 0; 5068 } 5069 } 5070 5071 if (hicount > locount) 5072 key |= (hammer2_key_t)1 << keybits; 5073 else 5074 key &= ~(hammer2_key_t)1 << keybits; 5075 5076 *keyp = key; 5077 5078 return (keybits); 5079 } 5080 5081 #endif 5082 5083 /* 5084 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 5085 * it exists. 5086 * 5087 * Both parent and chain must be locked exclusively. 5088 * 5089 * This function will modify the parent if the blockref requires removal 5090 * from the parent's block table. 5091 * 5092 * This function is NOT recursive. Any entity already pushed into the 5093 * chain (such as an inode) may still need visibility into its contents, 5094 * as well as the ability to read and modify the contents. For example, 5095 * for an unlinked file which is still open. 5096 * 5097 * Also note that the flusher is responsible for cleaning up empty 5098 * indirect blocks. 5099 */ 5100 int 5101 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 5102 hammer2_tid_t mtid, int flags) 5103 { 5104 int error = 0; 5105 5106 KKASSERT(hammer2_mtx_owned(&chain->lock)); 5107 5108 /* 5109 * Nothing to do if already marked. 5110 * 5111 * We need the spinlock on the core whos RBTREE contains chain 5112 * to protect against races. 5113 */ 5114 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 5115 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 5116 chain->parent == parent); 5117 error = _hammer2_chain_delete_helper(parent, chain, 5118 mtid, flags, NULL); 5119 } 5120 5121 /* 5122 * Permanent deletions mark the chain as destroyed. 5123 * 5124 * NOTE: We do not setflush the chain unless the deletion is 5125 * permanent, since the deletion of a chain does not actually 5126 * require it to be flushed. 5127 */ 5128 if (error == 0) { 5129 if (flags & HAMMER2_DELETE_PERMANENT) { 5130 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 5131 hammer2_chain_setflush(chain); 5132 } 5133 } 5134 5135 return error; 5136 } 5137 5138 static int 5139 hammer2_chain_delete_obref(hammer2_chain_t *parent, hammer2_chain_t *chain, 5140 hammer2_tid_t mtid, int flags, 5141 hammer2_blockref_t *obref) 5142 { 5143 int error = 0; 5144 5145 KKASSERT(hammer2_mtx_owned(&chain->lock)); 5146 5147 /* 5148 * Nothing to do if already marked. 5149 * 5150 * We need the spinlock on the core whos RBTREE contains chain 5151 * to protect against races. 5152 */ 5153 obref->type = HAMMER2_BREF_TYPE_EMPTY; 5154 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 5155 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 5156 chain->parent == parent); 5157 error = _hammer2_chain_delete_helper(parent, chain, 5158 mtid, flags, obref); 5159 } 5160 5161 /* 5162 * Permanent deletions mark the chain as destroyed. 5163 * 5164 * NOTE: We do not setflush the chain unless the deletion is 5165 * permanent, since the deletion of a chain does not actually 5166 * require it to be flushed. 5167 */ 5168 if (error == 0) { 5169 if (flags & HAMMER2_DELETE_PERMANENT) { 5170 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 5171 hammer2_chain_setflush(chain); 5172 } 5173 } 5174 5175 return error; 5176 } 5177 5178 /* 5179 * Returns the index of the nearest element in the blockref array >= elm. 5180 * Returns (count) if no element could be found. 5181 * 5182 * Sets *key_nextp to the next key for loop purposes but does not modify 5183 * it if the next key would be higher than the current value of *key_nextp. 5184 * Note that *key_nexp can overflow to 0, which should be tested by the 5185 * caller. 5186 * 5187 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5188 * held through the operation. 5189 */ 5190 static int 5191 hammer2_base_find(hammer2_chain_t *parent, 5192 hammer2_blockref_t *base, int count, 5193 hammer2_key_t *key_nextp, 5194 hammer2_key_t key_beg, hammer2_key_t key_end) 5195 { 5196 hammer2_blockref_t *scan; 5197 hammer2_key_t scan_end; 5198 int i; 5199 int limit; 5200 5201 /* 5202 * Require the live chain's already have their core's counted 5203 * so we can optimize operations. 5204 */ 5205 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 5206 5207 /* 5208 * Degenerate case 5209 */ 5210 if (count == 0 || base == NULL) 5211 return(count); 5212 5213 /* 5214 * Sequential optimization using parent->cache_index. This is 5215 * the most likely scenario. 5216 * 5217 * We can avoid trailing empty entries on live chains, otherwise 5218 * we might have to check the whole block array. 5219 */ 5220 i = parent->cache_index; /* SMP RACE OK */ 5221 cpu_ccfence(); 5222 limit = parent->core.live_zero; 5223 if (i >= limit) 5224 i = limit - 1; 5225 if (i < 0) 5226 i = 0; 5227 KKASSERT(i < count); 5228 5229 /* 5230 * Search backwards 5231 */ 5232 scan = &base[i]; 5233 while (i > 0 && (scan->type == HAMMER2_BREF_TYPE_EMPTY || 5234 scan->key > key_beg)) { 5235 --scan; 5236 --i; 5237 } 5238 parent->cache_index = i; 5239 5240 /* 5241 * Search forwards, stop when we find a scan element which 5242 * encloses the key or until we know that there are no further 5243 * elements. 5244 */ 5245 while (i < count) { 5246 if (scan->type != HAMMER2_BREF_TYPE_EMPTY) { 5247 scan_end = scan->key + 5248 ((hammer2_key_t)1 << scan->keybits) - 1; 5249 if (scan->key > key_beg || scan_end >= key_beg) 5250 break; 5251 } 5252 if (i >= limit) 5253 return (count); 5254 ++scan; 5255 ++i; 5256 } 5257 if (i != count) { 5258 parent->cache_index = i; 5259 if (i >= limit) { 5260 i = count; 5261 } else { 5262 scan_end = scan->key + 5263 ((hammer2_key_t)1 << scan->keybits); 5264 if (scan_end && (*key_nextp > scan_end || 5265 *key_nextp == 0)) { 5266 *key_nextp = scan_end; 5267 } 5268 } 5269 } 5270 return (i); 5271 } 5272 5273 /* 5274 * Do a combined search and return the next match either from the blockref 5275 * array or from the in-memory chain. Sets *brefp to the returned bref in 5276 * both cases, or sets it to NULL if the search exhausted. Only returns 5277 * a non-NULL chain if the search matched from the in-memory chain. 5278 * 5279 * When no in-memory chain has been found and a non-NULL bref is returned 5280 * in *brefp. 5281 * 5282 * 5283 * The returned chain is not locked or referenced. Use the returned bref 5284 * to determine if the search exhausted or not. Iterate if the base find 5285 * is chosen but matches a deleted chain. 5286 * 5287 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5288 * held through the operation. 5289 */ 5290 hammer2_chain_t * 5291 hammer2_combined_find(hammer2_chain_t *parent, 5292 hammer2_blockref_t *base, int count, 5293 hammer2_key_t *key_nextp, 5294 hammer2_key_t key_beg, hammer2_key_t key_end, 5295 hammer2_blockref_t **brefp) 5296 { 5297 hammer2_blockref_t *bref; 5298 hammer2_chain_t *chain; 5299 int i; 5300 5301 /* 5302 * Lookup in block array and in rbtree. 5303 */ 5304 *key_nextp = key_end + 1; 5305 i = hammer2_base_find(parent, base, count, key_nextp, 5306 key_beg, key_end); 5307 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 5308 5309 /* 5310 * Neither matched 5311 */ 5312 if (i == count && chain == NULL) { 5313 *brefp = NULL; 5314 return(NULL); 5315 } 5316 5317 /* 5318 * Only chain matched. 5319 */ 5320 if (i == count) { 5321 bref = &chain->bref; 5322 goto found; 5323 } 5324 5325 /* 5326 * Only blockref matched. 5327 */ 5328 if (chain == NULL) { 5329 bref = &base[i]; 5330 goto found; 5331 } 5332 5333 /* 5334 * Both in-memory and blockref matched, select the nearer element. 5335 * 5336 * If both are flush with the left-hand side or both are the 5337 * same distance away, select the chain. In this situation the 5338 * chain must have been loaded from the matching blockmap. 5339 */ 5340 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 5341 chain->bref.key == base[i].key) { 5342 KKASSERT(chain->bref.key == base[i].key); 5343 bref = &chain->bref; 5344 goto found; 5345 } 5346 5347 /* 5348 * Select the nearer key 5349 */ 5350 if (chain->bref.key < base[i].key) { 5351 bref = &chain->bref; 5352 } else { 5353 bref = &base[i]; 5354 chain = NULL; 5355 } 5356 5357 /* 5358 * If the bref is out of bounds we've exhausted our search. 5359 */ 5360 found: 5361 if (bref->key > key_end) { 5362 *brefp = NULL; 5363 chain = NULL; 5364 } else { 5365 *brefp = bref; 5366 } 5367 return(chain); 5368 } 5369 5370 /* 5371 * Locate the specified block array element and delete it. The element 5372 * must exist. 5373 * 5374 * The spin lock on the related chain must be held. 5375 * 5376 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5377 * need to be adjusted when we commit the media change. 5378 */ 5379 void 5380 hammer2_base_delete(hammer2_chain_t *parent, 5381 hammer2_blockref_t *base, int count, 5382 hammer2_chain_t *chain, 5383 hammer2_blockref_t *obref) 5384 { 5385 hammer2_blockref_t *elm = &chain->bref; 5386 hammer2_blockref_t *scan; 5387 hammer2_key_t key_next; 5388 int i; 5389 5390 /* 5391 * Delete element. Expect the element to exist. 5392 * 5393 * XXX see caller, flush code not yet sophisticated enough to prevent 5394 * re-flushed in some cases. 5395 */ 5396 key_next = 0; /* max range */ 5397 i = hammer2_base_find(parent, base, count, &key_next, 5398 elm->key, elm->key); 5399 scan = &base[i]; 5400 if (i == count || scan->type == HAMMER2_BREF_TYPE_EMPTY || 5401 scan->key != elm->key || 5402 ((chain->flags & HAMMER2_CHAIN_BLKMAPUPD) == 0 && 5403 scan->keybits != elm->keybits)) { 5404 hammer2_spin_unex(&parent->core.spin); 5405 panic("delete base %p element not found at %d/%d elm %p\n", 5406 base, i, count, elm); 5407 return; 5408 } 5409 5410 /* 5411 * Update stats and zero the entry. 5412 * 5413 * NOTE: Handle radix == 0 (0 bytes) case. 5414 */ 5415 if ((int)(scan->data_off & HAMMER2_OFF_MASK_RADIX)) { 5416 parent->bref.embed.stats.data_count -= (hammer2_off_t)1 << 5417 (int)(scan->data_off & HAMMER2_OFF_MASK_RADIX); 5418 } 5419 switch(scan->type) { 5420 case HAMMER2_BREF_TYPE_INODE: 5421 --parent->bref.embed.stats.inode_count; 5422 /* fall through */ 5423 case HAMMER2_BREF_TYPE_DATA: 5424 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5425 atomic_set_int(&chain->flags, 5426 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5427 } else { 5428 if (parent->bref.leaf_count) 5429 --parent->bref.leaf_count; 5430 } 5431 /* fall through */ 5432 case HAMMER2_BREF_TYPE_INDIRECT: 5433 if (scan->type != HAMMER2_BREF_TYPE_DATA) { 5434 parent->bref.embed.stats.data_count -= 5435 scan->embed.stats.data_count; 5436 parent->bref.embed.stats.inode_count -= 5437 scan->embed.stats.inode_count; 5438 } 5439 if (scan->type == HAMMER2_BREF_TYPE_INODE) 5440 break; 5441 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5442 atomic_set_int(&chain->flags, 5443 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5444 } else { 5445 if (parent->bref.leaf_count <= scan->leaf_count) 5446 parent->bref.leaf_count = 0; 5447 else 5448 parent->bref.leaf_count -= scan->leaf_count; 5449 } 5450 break; 5451 case HAMMER2_BREF_TYPE_DIRENT: 5452 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5453 atomic_set_int(&chain->flags, 5454 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5455 } else { 5456 if (parent->bref.leaf_count) 5457 --parent->bref.leaf_count; 5458 } 5459 default: 5460 break; 5461 } 5462 5463 if (obref) 5464 *obref = *scan; 5465 bzero(scan, sizeof(*scan)); 5466 5467 /* 5468 * We can only optimize parent->core.live_zero for live chains. 5469 */ 5470 if (parent->core.live_zero == i + 1) { 5471 while (--i >= 0 && base[i].type == HAMMER2_BREF_TYPE_EMPTY) 5472 ; 5473 parent->core.live_zero = i + 1; 5474 } 5475 5476 /* 5477 * Clear appropriate blockmap flags in chain. 5478 */ 5479 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED | 5480 HAMMER2_CHAIN_BLKMAPUPD); 5481 } 5482 5483 /* 5484 * Insert the specified element. The block array must not already have the 5485 * element and must have space available for the insertion. 5486 * 5487 * The spin lock on the related chain must be held. 5488 * 5489 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5490 * need to be adjusted when we commit the media change. 5491 */ 5492 void 5493 hammer2_base_insert(hammer2_chain_t *parent, 5494 hammer2_blockref_t *base, int count, 5495 hammer2_chain_t *chain, hammer2_blockref_t *elm) 5496 { 5497 hammer2_key_t key_next; 5498 hammer2_key_t xkey; 5499 int i; 5500 int j; 5501 int k; 5502 int l; 5503 int u = 1; 5504 5505 /* 5506 * Insert new element. Expect the element to not already exist 5507 * unless we are replacing it. 5508 * 5509 * XXX see caller, flush code not yet sophisticated enough to prevent 5510 * re-flushed in some cases. 5511 */ 5512 key_next = 0; /* max range */ 5513 i = hammer2_base_find(parent, base, count, &key_next, 5514 elm->key, elm->key); 5515 5516 /* 5517 * Shortcut fill optimization, typical ordered insertion(s) may not 5518 * require a search. 5519 */ 5520 KKASSERT(i >= 0 && i <= count); 5521 5522 /* 5523 * Set appropriate blockmap flags in chain (if not NULL) 5524 */ 5525 if (chain) 5526 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED); 5527 5528 /* 5529 * Update stats and zero the entry 5530 */ 5531 if ((int)(elm->data_off & HAMMER2_OFF_MASK_RADIX)) { 5532 parent->bref.embed.stats.data_count += (hammer2_off_t)1 << 5533 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 5534 } 5535 switch(elm->type) { 5536 case HAMMER2_BREF_TYPE_INODE: 5537 ++parent->bref.embed.stats.inode_count; 5538 /* fall through */ 5539 case HAMMER2_BREF_TYPE_DATA: 5540 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5541 ++parent->bref.leaf_count; 5542 /* fall through */ 5543 case HAMMER2_BREF_TYPE_INDIRECT: 5544 if (elm->type != HAMMER2_BREF_TYPE_DATA) { 5545 parent->bref.embed.stats.data_count += 5546 elm->embed.stats.data_count; 5547 parent->bref.embed.stats.inode_count += 5548 elm->embed.stats.inode_count; 5549 } 5550 if (elm->type == HAMMER2_BREF_TYPE_INODE) 5551 break; 5552 if (parent->bref.leaf_count + elm->leaf_count < 5553 HAMMER2_BLOCKREF_LEAF_MAX) { 5554 parent->bref.leaf_count += elm->leaf_count; 5555 } else { 5556 parent->bref.leaf_count = HAMMER2_BLOCKREF_LEAF_MAX; 5557 } 5558 break; 5559 case HAMMER2_BREF_TYPE_DIRENT: 5560 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5561 ++parent->bref.leaf_count; 5562 break; 5563 default: 5564 break; 5565 } 5566 5567 5568 /* 5569 * We can only optimize parent->core.live_zero for live chains. 5570 */ 5571 if (i == count && parent->core.live_zero < count) { 5572 i = parent->core.live_zero++; 5573 base[i] = *elm; 5574 return; 5575 } 5576 5577 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 5578 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 5579 hammer2_spin_unex(&parent->core.spin); 5580 panic("insert base %p overlapping elements at %d elm %p\n", 5581 base, i, elm); 5582 } 5583 5584 /* 5585 * Try to find an empty slot before or after. 5586 */ 5587 j = i; 5588 k = i; 5589 while (j > 0 || k < count) { 5590 --j; 5591 if (j >= 0 && base[j].type == HAMMER2_BREF_TYPE_EMPTY) { 5592 if (j == i - 1) { 5593 base[j] = *elm; 5594 } else { 5595 bcopy(&base[j+1], &base[j], 5596 (i - j - 1) * sizeof(*base)); 5597 base[i - 1] = *elm; 5598 } 5599 goto validate; 5600 } 5601 ++k; 5602 if (k < count && base[k].type == HAMMER2_BREF_TYPE_EMPTY) { 5603 bcopy(&base[i], &base[i+1], 5604 (k - i) * sizeof(hammer2_blockref_t)); 5605 base[i] = *elm; 5606 5607 /* 5608 * We can only update parent->core.live_zero for live 5609 * chains. 5610 */ 5611 if (parent->core.live_zero <= k) 5612 parent->core.live_zero = k + 1; 5613 u = 2; 5614 goto validate; 5615 } 5616 } 5617 panic("hammer2_base_insert: no room!"); 5618 5619 /* 5620 * Debugging 5621 */ 5622 validate: 5623 key_next = 0; 5624 for (l = 0; l < count; ++l) { 5625 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5626 key_next = base[l].key + 5627 ((hammer2_key_t)1 << base[l].keybits) - 1; 5628 break; 5629 } 5630 } 5631 while (++l < count) { 5632 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5633 if (base[l].key <= key_next) 5634 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 5635 key_next = base[l].key + 5636 ((hammer2_key_t)1 << base[l].keybits) - 1; 5637 5638 } 5639 } 5640 5641 } 5642 5643 #if 0 5644 5645 /* 5646 * Sort the blockref array for the chain. Used by the flush code to 5647 * sort the blockref[] array. 5648 * 5649 * The chain must be exclusively locked AND spin-locked. 5650 */ 5651 typedef hammer2_blockref_t *hammer2_blockref_p; 5652 5653 static 5654 int 5655 hammer2_base_sort_callback(const void *v1, const void *v2) 5656 { 5657 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 5658 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 5659 5660 /* 5661 * Make sure empty elements are placed at the end of the array 5662 */ 5663 if (bref1->type == HAMMER2_BREF_TYPE_EMPTY) { 5664 if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) 5665 return(0); 5666 return(1); 5667 } else if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) { 5668 return(-1); 5669 } 5670 5671 /* 5672 * Sort by key 5673 */ 5674 if (bref1->key < bref2->key) 5675 return(-1); 5676 if (bref1->key > bref2->key) 5677 return(1); 5678 return(0); 5679 } 5680 5681 void 5682 hammer2_base_sort(hammer2_chain_t *chain) 5683 { 5684 hammer2_blockref_t *base; 5685 int count; 5686 5687 switch(chain->bref.type) { 5688 case HAMMER2_BREF_TYPE_INODE: 5689 /* 5690 * Special shortcut for embedded data returns the inode 5691 * itself. Callers must detect this condition and access 5692 * the embedded data (the strategy code does this for us). 5693 * 5694 * This is only applicable to regular files and softlinks. 5695 */ 5696 if (chain->data->ipdata.meta.op_flags & 5697 HAMMER2_OPFLAG_DIRECTDATA) { 5698 return; 5699 } 5700 base = &chain->data->ipdata.u.blockset.blockref[0]; 5701 count = HAMMER2_SET_COUNT; 5702 break; 5703 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 5704 case HAMMER2_BREF_TYPE_INDIRECT: 5705 /* 5706 * Optimize indirect blocks in the INITIAL state to avoid 5707 * I/O. 5708 */ 5709 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 5710 base = &chain->data->npdata[0]; 5711 count = chain->bytes / sizeof(hammer2_blockref_t); 5712 break; 5713 case HAMMER2_BREF_TYPE_VOLUME: 5714 base = &chain->data->voldata.sroot_blockset.blockref[0]; 5715 count = HAMMER2_SET_COUNT; 5716 break; 5717 case HAMMER2_BREF_TYPE_FREEMAP: 5718 base = &chain->data->blkset.blockref[0]; 5719 count = HAMMER2_SET_COUNT; 5720 break; 5721 default: 5722 panic("hammer2_base_sort: unrecognized " 5723 "blockref(A) type: %d", 5724 chain->bref.type); 5725 base = NULL; /* safety */ 5726 count = 0; /* safety */ 5727 break; 5728 } 5729 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 5730 } 5731 5732 #endif 5733 5734 /* 5735 * Set the check data for a chain. This can be a heavy-weight operation 5736 * and typically only runs on-flush. For file data check data is calculated 5737 * when the logical buffers are flushed. 5738 */ 5739 void 5740 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 5741 { 5742 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 5743 5744 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5745 case HAMMER2_CHECK_NONE: 5746 break; 5747 case HAMMER2_CHECK_DISABLED: 5748 break; 5749 case HAMMER2_CHECK_ISCSI32: 5750 chain->bref.check.iscsi32.value = 5751 hammer2_icrc32(bdata, chain->bytes); 5752 break; 5753 case HAMMER2_CHECK_XXHASH64: 5754 chain->bref.check.xxhash64.value = 5755 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5756 break; 5757 case HAMMER2_CHECK_SHA192: 5758 { 5759 SHA256_CTX hash_ctx; 5760 union { 5761 uint8_t digest[SHA256_DIGEST_LENGTH]; 5762 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5763 } u; 5764 5765 SHA256_Init(&hash_ctx); 5766 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5767 SHA256_Final(u.digest, &hash_ctx); 5768 u.digest64[2] ^= u.digest64[3]; 5769 bcopy(u.digest, 5770 chain->bref.check.sha192.data, 5771 sizeof(chain->bref.check.sha192.data)); 5772 } 5773 break; 5774 case HAMMER2_CHECK_FREEMAP: 5775 chain->bref.check.freemap.icrc32 = 5776 hammer2_icrc32(bdata, chain->bytes); 5777 break; 5778 default: 5779 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5780 chain->bref.methods); 5781 break; 5782 } 5783 } 5784 5785 /* 5786 * Characterize a failed check code and try to trace back to the inode. 5787 */ 5788 static void 5789 hammer2_characterize_failed_chain(hammer2_chain_t *chain, uint64_t check, 5790 int bits) 5791 { 5792 hammer2_chain_t *lchain; 5793 hammer2_chain_t *ochain; 5794 int did; 5795 5796 did = krateprintf(&krate_h2chk, 5797 "chain %016jx.%02x (%s) meth=%02x CHECK FAIL " 5798 "(flags=%08x, bref/data ", 5799 chain->bref.data_off, 5800 chain->bref.type, 5801 hammer2_bref_type_str(chain->bref.type), 5802 chain->bref.methods, 5803 chain->flags); 5804 if (did == 0) 5805 return; 5806 5807 if (bits == 32) { 5808 kprintf("%08x/%08x)\n", 5809 chain->bref.check.iscsi32.value, 5810 (uint32_t)check); 5811 } else { 5812 kprintf("%016jx/%016jx)\n", 5813 chain->bref.check.xxhash64.value, 5814 check); 5815 } 5816 5817 /* 5818 * Run up the chains to try to find the governing inode so we 5819 * can report it. 5820 * 5821 * XXX This error reporting is not really MPSAFE 5822 */ 5823 ochain = chain; 5824 lchain = chain; 5825 while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE) { 5826 lchain = chain; 5827 chain = chain->parent; 5828 } 5829 5830 if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5831 ((chain->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) == 0 || 5832 (lchain->bref.key & HAMMER2_DIRHASH_VISIBLE))) { 5833 kprintf(" Resides at/in inode %ld\n", 5834 chain->bref.key); 5835 } else if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5836 kprintf(" Resides in inode index - CRITICAL!!!\n"); 5837 } else { 5838 kprintf(" Resides in root index - CRITICAL!!!\n"); 5839 } 5840 if (ochain->hmp) { 5841 const char *pfsname = "UNKNOWN"; 5842 int i; 5843 5844 if (ochain->pmp) { 5845 for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) { 5846 if (ochain->pmp->pfs_hmps[i] == ochain->hmp && 5847 ochain->pmp->pfs_names[i]) { 5848 pfsname = ochain->pmp->pfs_names[i]; 5849 break; 5850 } 5851 } 5852 } 5853 kprintf(" In pfs %s on device %s\n", 5854 pfsname, ochain->hmp->devrepname); 5855 } 5856 } 5857 5858 /* 5859 * Returns non-zero on success, 0 on failure. 5860 */ 5861 int 5862 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 5863 { 5864 uint32_t check32; 5865 uint64_t check64; 5866 int r; 5867 5868 if (chain->flags & HAMMER2_CHAIN_NOTTESTED) 5869 return 1; 5870 5871 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5872 case HAMMER2_CHECK_NONE: 5873 r = 1; 5874 break; 5875 case HAMMER2_CHECK_DISABLED: 5876 r = 1; 5877 break; 5878 case HAMMER2_CHECK_ISCSI32: 5879 check32 = hammer2_icrc32(bdata, chain->bytes); 5880 r = (chain->bref.check.iscsi32.value == check32); 5881 if (r == 0) { 5882 hammer2_characterize_failed_chain(chain, check32, 32); 5883 } 5884 hammer2_process_icrc32 += chain->bytes; 5885 break; 5886 case HAMMER2_CHECK_XXHASH64: 5887 check64 = XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5888 r = (chain->bref.check.xxhash64.value == check64); 5889 if (r == 0) { 5890 hammer2_characterize_failed_chain(chain, check64, 64); 5891 } 5892 hammer2_process_xxhash64 += chain->bytes; 5893 break; 5894 case HAMMER2_CHECK_SHA192: 5895 { 5896 SHA256_CTX hash_ctx; 5897 union { 5898 uint8_t digest[SHA256_DIGEST_LENGTH]; 5899 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5900 } u; 5901 5902 SHA256_Init(&hash_ctx); 5903 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5904 SHA256_Final(u.digest, &hash_ctx); 5905 u.digest64[2] ^= u.digest64[3]; 5906 if (bcmp(u.digest, 5907 chain->bref.check.sha192.data, 5908 sizeof(chain->bref.check.sha192.data)) == 0) { 5909 r = 1; 5910 } else { 5911 r = 0; 5912 krateprintf(&krate_h2chk, 5913 "chain %016jx.%02x meth=%02x " 5914 "CHECK FAIL\n", 5915 chain->bref.data_off, 5916 chain->bref.type, 5917 chain->bref.methods); 5918 } 5919 } 5920 break; 5921 case HAMMER2_CHECK_FREEMAP: 5922 r = (chain->bref.check.freemap.icrc32 == 5923 hammer2_icrc32(bdata, chain->bytes)); 5924 if (r == 0) { 5925 int did; 5926 5927 did = krateprintf(&krate_h2chk, 5928 "chain %016jx.%02x meth=%02x " 5929 "CHECK FAIL\n", 5930 chain->bref.data_off, 5931 chain->bref.type, 5932 chain->bref.methods); 5933 if (did) { 5934 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 5935 chain->bref.check.freemap.icrc32, 5936 hammer2_icrc32(bdata, chain->bytes), 5937 chain->bytes); 5938 if (chain->dio) { 5939 kprintf("dio %p buf %016jx,%d " 5940 "bdata %p/%p\n", 5941 chain->dio, 5942 chain->dio->bp->b_loffset, 5943 chain->dio->bp->b_bufsize, 5944 bdata, 5945 chain->dio->bp->b_data); 5946 } 5947 } 5948 } 5949 break; 5950 default: 5951 kprintf("hammer2_chain_testcheck: unknown check type %02x\n", 5952 chain->bref.methods); 5953 r = 1; 5954 break; 5955 } 5956 return r; 5957 } 5958 5959 /* 5960 * Acquire the chain and parent representing the specified inode for the 5961 * device at the specified cluster index. 5962 * 5963 * The flags passed in are LOOKUP flags, not RESOLVE flags. 5964 * 5965 * If we are unable to locate the inode, HAMMER2_ERROR_EIO is returned and 5966 * *chainp will be NULL. *parentp may still be set error or not, or NULL 5967 * if the parent itself could not be resolved. 5968 * 5969 * The caller may pass-in a locked *parentp and/or *chainp, or neither. 5970 * They will be unlocked and released by this function. The *parentp and 5971 * *chainp representing the located inode are returned locked. 5972 */ 5973 int 5974 hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum, 5975 int clindex, int flags, 5976 hammer2_chain_t **parentp, hammer2_chain_t **chainp) 5977 { 5978 hammer2_chain_t *parent; 5979 hammer2_chain_t *rchain; 5980 hammer2_key_t key_dummy; 5981 hammer2_inode_t *ip; 5982 int resolve_flags; 5983 int error; 5984 5985 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 5986 HAMMER2_RESOLVE_SHARED : 0; 5987 5988 /* 5989 * Caller expects us to replace these. 5990 */ 5991 if (*chainp) { 5992 hammer2_chain_unlock(*chainp); 5993 hammer2_chain_drop(*chainp); 5994 *chainp = NULL; 5995 } 5996 if (*parentp) { 5997 hammer2_chain_unlock(*parentp); 5998 hammer2_chain_drop(*parentp); 5999 *parentp = NULL; 6000 } 6001 6002 /* 6003 * Be very careful, this is a backend function and we CANNOT 6004 * lock any frontend inode structure we find. But we have to 6005 * look the inode up this way first in case it exists but is 6006 * detached from the radix tree. 6007 */ 6008 ip = hammer2_inode_lookup(pmp, inum); 6009 if (ip) { 6010 *chainp = hammer2_inode_chain_and_parent(ip, clindex, 6011 parentp, 6012 resolve_flags); 6013 hammer2_inode_drop(ip); 6014 if (*chainp) 6015 return 0; 6016 hammer2_chain_unlock(*chainp); 6017 hammer2_chain_drop(*chainp); 6018 *chainp = NULL; 6019 if (*parentp) { 6020 hammer2_chain_unlock(*parentp); 6021 hammer2_chain_drop(*parentp); 6022 *parentp = NULL; 6023 } 6024 } 6025 6026 /* 6027 * Inodes hang off of the iroot (bit 63 is clear, differentiating 6028 * inodes from root directory entries in the key lookup). 6029 */ 6030 parent = hammer2_inode_chain(pmp->iroot, clindex, resolve_flags); 6031 rchain = NULL; 6032 if (parent) { 6033 rchain = hammer2_chain_lookup(&parent, &key_dummy, 6034 inum, inum, 6035 &error, flags); 6036 } else { 6037 error = HAMMER2_ERROR_EIO; 6038 } 6039 *parentp = parent; 6040 *chainp = rchain; 6041 6042 return error; 6043 } 6044 6045 /* 6046 * Used by the bulkscan code to snapshot the synchronized storage for 6047 * a volume, allowing it to be scanned concurrently against normal 6048 * operation. 6049 */ 6050 hammer2_chain_t * 6051 hammer2_chain_bulksnap(hammer2_dev_t *hmp) 6052 { 6053 hammer2_chain_t *copy; 6054 6055 copy = hammer2_chain_alloc(hmp, hmp->spmp, &hmp->vchain.bref); 6056 copy->data = kmalloc(sizeof(copy->data->voldata), 6057 hmp->mmsg, M_WAITOK | M_ZERO); 6058 hammer2_voldata_lock(hmp); 6059 copy->data->voldata = hmp->volsync; 6060 hammer2_voldata_unlock(hmp); 6061 6062 return copy; 6063 } 6064 6065 void 6066 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 6067 { 6068 KKASSERT(copy->bref.type == HAMMER2_BREF_TYPE_VOLUME); 6069 KKASSERT(copy->data); 6070 kfree(copy->data, copy->hmp->mmsg); 6071 copy->data = NULL; 6072 atomic_add_long(&hammer2_chain_allocs, -1); 6073 hammer2_chain_drop(copy); 6074 } 6075 6076 /* 6077 * Returns non-zero if the chain (INODE or DIRENT) matches the 6078 * filename. 6079 */ 6080 int 6081 hammer2_chain_dirent_test(hammer2_chain_t *chain, const char *name, 6082 size_t name_len) 6083 { 6084 const hammer2_inode_data_t *ripdata; 6085 6086 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 6087 ripdata = &chain->data->ipdata; 6088 if (ripdata->meta.name_len == name_len && 6089 bcmp(ripdata->filename, name, name_len) == 0) { 6090 return 1; 6091 } 6092 } 6093 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT && 6094 chain->bref.embed.dirent.namlen == name_len) { 6095 if (name_len > sizeof(chain->bref.check.buf) && 6096 bcmp(chain->data->buf, name, name_len) == 0) { 6097 return 1; 6098 } 6099 if (name_len <= sizeof(chain->bref.check.buf) && 6100 bcmp(chain->bref.check.buf, name, name_len) == 0) { 6101 return 1; 6102 } 6103 } 6104 return 0; 6105 } 6106