1 /* 2 * Copyright (c) 2011-2020 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/kern_syscall.h> 61 #include <sys/uuid.h> 62 63 #include <crypto/sha2/sha2.h> 64 65 #include "hammer2.h" 66 67 static hammer2_chain_t *hammer2_chain_create_indirect( 68 hammer2_chain_t *parent, 69 hammer2_key_t key, int keybits, 70 hammer2_tid_t mtid, int for_type, int *errorp); 71 static int hammer2_chain_delete_obref(hammer2_chain_t *parent, 72 hammer2_chain_t *chain, 73 hammer2_tid_t mtid, int flags, 74 hammer2_blockref_t *obref); 75 static hammer2_chain_t *hammer2_combined_find( 76 hammer2_chain_t *parent, 77 hammer2_blockref_t *base, int count, 78 hammer2_key_t *key_nextp, 79 hammer2_key_t key_beg, hammer2_key_t key_end, 80 hammer2_blockref_t **bresp); 81 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 82 int depth); 83 static void hammer2_chain_lru_flush(hammer2_pfs_t *pmp); 84 85 /* 86 * There are many degenerate situations where an extreme rate of console 87 * output can occur from warnings and errors. Make sure this output does 88 * not impede operations. 89 */ 90 static struct krate krate_h2chk = { .freq = 5 }; 91 static struct krate krate_h2me = { .freq = 1 }; 92 static struct krate krate_h2em = { .freq = 1 }; 93 94 /* 95 * Basic RBTree for chains (core.rbtree). 96 */ 97 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 98 99 int 100 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 101 { 102 hammer2_key_t c1_beg; 103 hammer2_key_t c1_end; 104 hammer2_key_t c2_beg; 105 hammer2_key_t c2_end; 106 107 /* 108 * Compare chains. Overlaps are not supposed to happen and catch 109 * any software issues early we count overlaps as a match. 110 */ 111 c1_beg = chain1->bref.key; 112 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 113 c2_beg = chain2->bref.key; 114 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 115 116 if (c1_end < c2_beg) /* fully to the left */ 117 return(-1); 118 if (c1_beg > c2_end) /* fully to the right */ 119 return(1); 120 return(0); /* overlap (must not cross edge boundary) */ 121 } 122 123 /* 124 * Assert that a chain has no media data associated with it. 125 */ 126 static __inline void 127 hammer2_chain_assert_no_data(hammer2_chain_t *chain) 128 { 129 KKASSERT(chain->dio == NULL); 130 if (chain->bref.type != HAMMER2_BREF_TYPE_VOLUME && 131 chain->bref.type != HAMMER2_BREF_TYPE_FREEMAP && 132 chain->data) { 133 panic("hammer2_chain_assert_no_data: chain %p still has data", 134 chain); 135 } 136 } 137 138 /* 139 * Make a chain visible to the flusher. The flusher operates using a top-down 140 * recursion based on the ONFLUSH flag. It locates MODIFIED and UPDATE chains, 141 * flushes them, and updates blocks back to the volume root. 142 * 143 * This routine sets the ONFLUSH flag upward from the triggering chain until 144 * it hits an inode root or the volume root. Inode chains serve as inflection 145 * points, requiring the flusher to bridge across trees. Inodes include 146 * regular inodes, PFS roots (pmp->iroot), and the media super root 147 * (spmp->iroot). 148 */ 149 void 150 hammer2_chain_setflush(hammer2_chain_t *chain) 151 { 152 hammer2_chain_t *parent; 153 154 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 155 hammer2_spin_sh(&chain->core.spin); 156 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 157 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 158 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 159 break; 160 if ((parent = chain->parent) == NULL) 161 break; 162 hammer2_spin_sh(&parent->core.spin); 163 hammer2_spin_unsh(&chain->core.spin); 164 chain = parent; 165 } 166 hammer2_spin_unsh(&chain->core.spin); 167 } 168 } 169 170 /* 171 * Allocate a new disconnected chain element representing the specified 172 * bref. chain->refs is set to 1 and the passed bref is copied to 173 * chain->bref. chain->bytes is derived from the bref. 174 * 175 * chain->pmp inherits pmp unless the chain is an inode (other than the 176 * super-root inode). 177 * 178 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 179 */ 180 hammer2_chain_t * 181 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 182 hammer2_blockref_t *bref) 183 { 184 hammer2_chain_t *chain; 185 u_int bytes; 186 187 /* 188 * Special case - radix of 0 indicates a chain that does not 189 * need a data reference (context is completely embedded in the 190 * bref). 191 */ 192 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 193 bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 194 else 195 bytes = 0; 196 197 atomic_add_long(&hammer2_chain_allocs, 1); 198 199 /* 200 * Construct the appropriate system structure. 201 */ 202 switch(bref->type) { 203 case HAMMER2_BREF_TYPE_DIRENT: 204 case HAMMER2_BREF_TYPE_INODE: 205 case HAMMER2_BREF_TYPE_INDIRECT: 206 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 207 case HAMMER2_BREF_TYPE_DATA: 208 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 209 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 210 break; 211 case HAMMER2_BREF_TYPE_VOLUME: 212 case HAMMER2_BREF_TYPE_FREEMAP: 213 /* 214 * Only hammer2_chain_bulksnap() calls this function with these 215 * types. 216 */ 217 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 218 break; 219 default: 220 chain = NULL; 221 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 222 bref->type); 223 } 224 225 /* 226 * Initialize the new chain structure. pmp must be set to NULL for 227 * chains belonging to the super-root topology of a device mount. 228 */ 229 if (pmp == hmp->spmp) 230 chain->pmp = NULL; 231 else 232 chain->pmp = pmp; 233 234 chain->hmp = hmp; 235 chain->bref = *bref; 236 chain->bytes = bytes; 237 chain->refs = 1; 238 chain->flags = HAMMER2_CHAIN_ALLOCATED; 239 lockinit(&chain->diolk, "chdio", 0, 0); 240 241 /* 242 * Set the PFS boundary flag if this chain represents a PFS root. 243 */ 244 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 245 atomic_set_int(&chain->flags, HAMMER2_CHAIN_PFSBOUNDARY); 246 hammer2_chain_core_init(chain); 247 248 return (chain); 249 } 250 251 /* 252 * Initialize a chain's core structure. This structure used to be allocated 253 * but is now embedded. 254 * 255 * The core is not locked. No additional refs on the chain are made. 256 * (trans) must not be NULL if (core) is not NULL. 257 */ 258 void 259 hammer2_chain_core_init(hammer2_chain_t *chain) 260 { 261 /* 262 * Fresh core under nchain (no multi-homing of ochain's 263 * sub-tree). 264 */ 265 RB_INIT(&chain->core.rbtree); /* live chains */ 266 hammer2_mtx_init(&chain->lock, "h2chain"); 267 } 268 269 /* 270 * Add a reference to a chain element, preventing its destruction. 271 * 272 * (can be called with spinlock held) 273 */ 274 void 275 hammer2_chain_ref(hammer2_chain_t *chain) 276 { 277 if (atomic_fetchadd_int(&chain->refs, 1) == 0) { 278 /* 279 * Just flag that the chain was used and should be recycled 280 * on the LRU if it encounters it later. 281 */ 282 if (chain->flags & HAMMER2_CHAIN_ONLRU) 283 atomic_set_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 284 285 #if 0 286 /* 287 * REMOVED - reduces contention, lru_list is more heuristical 288 * now. 289 * 290 * 0->non-zero transition must ensure that chain is removed 291 * from the LRU list. 292 * 293 * NOTE: Already holding lru_spin here so we cannot call 294 * hammer2_chain_ref() to get it off lru_list, do 295 * it manually. 296 */ 297 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 298 hammer2_pfs_t *pmp = chain->pmp; 299 hammer2_spin_ex(&pmp->lru_spin); 300 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 301 atomic_add_int(&pmp->lru_count, -1); 302 atomic_clear_int(&chain->flags, 303 HAMMER2_CHAIN_ONLRU); 304 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 305 } 306 hammer2_spin_unex(&pmp->lru_spin); 307 } 308 #endif 309 } 310 } 311 312 /* 313 * Ref a locked chain and force the data to be held across an unlock. 314 * Chain must be currently locked. The user of the chain who desires 315 * to release the hold must call hammer2_chain_lock_unhold() to relock 316 * and unhold the chain, then unlock normally, or may simply call 317 * hammer2_chain_drop_unhold() (which is safer against deadlocks). 318 */ 319 void 320 hammer2_chain_ref_hold(hammer2_chain_t *chain) 321 { 322 atomic_add_int(&chain->lockcnt, 1); 323 hammer2_chain_ref(chain); 324 } 325 326 /* 327 * Insert the chain in the core rbtree. 328 * 329 * Normal insertions are placed in the live rbtree. Insertion of a deleted 330 * chain is a special case used by the flush code that is placed on the 331 * unstaged deleted list to avoid confusing the live view. 332 */ 333 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 334 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 335 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 336 337 static 338 int 339 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 340 int flags, int generation) 341 { 342 hammer2_chain_t *xchain; 343 int error = 0; 344 345 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 346 hammer2_spin_ex(&parent->core.spin); 347 348 /* 349 * Interlocked by spinlock, check for race 350 */ 351 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 352 parent->core.generation != generation) { 353 error = HAMMER2_ERROR_EAGAIN; 354 goto failed; 355 } 356 357 /* 358 * Insert chain 359 */ 360 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 361 KASSERT(xchain == NULL, 362 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 363 chain, xchain, chain->bref.key)); 364 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 365 chain->parent = parent; 366 ++parent->core.chain_count; 367 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 368 369 /* 370 * We have to keep track of the effective live-view blockref count 371 * so the create code knows when to push an indirect block. 372 */ 373 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 374 atomic_add_int(&parent->core.live_count, 1); 375 failed: 376 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 377 hammer2_spin_unex(&parent->core.spin); 378 return error; 379 } 380 381 /* 382 * Drop the caller's reference to the chain. When the ref count drops to 383 * zero this function will try to disassociate the chain from its parent and 384 * deallocate it, then recursely drop the parent using the implied ref 385 * from the chain's chain->parent. 386 * 387 * Nobody should own chain's mutex on the 1->0 transition, unless this drop 388 * races an acquisition by another cpu. Therefore we can loop if we are 389 * unable to acquire the mutex, and refs is unlikely to be 1 unless we again 390 * race against another drop. 391 */ 392 void 393 hammer2_chain_drop(hammer2_chain_t *chain) 394 { 395 u_int refs; 396 397 KKASSERT(chain->refs > 0); 398 399 while (chain) { 400 refs = chain->refs; 401 cpu_ccfence(); 402 KKASSERT(refs > 0); 403 404 if (refs == 1) { 405 if (hammer2_mtx_ex_try(&chain->lock) == 0) 406 chain = hammer2_chain_lastdrop(chain, 0); 407 /* retry the same chain, or chain from lastdrop */ 408 } else { 409 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 410 break; 411 /* retry the same chain */ 412 } 413 cpu_pause(); 414 } 415 } 416 417 /* 418 * Unhold a held and probably not-locked chain, ensure that the data is 419 * dropped on the 1->0 transition of lockcnt by obtaining an exclusive 420 * lock and then simply unlocking the chain. 421 */ 422 void 423 hammer2_chain_unhold(hammer2_chain_t *chain) 424 { 425 u_int lockcnt; 426 int iter = 0; 427 428 for (;;) { 429 lockcnt = chain->lockcnt; 430 cpu_ccfence(); 431 if (lockcnt > 1) { 432 if (atomic_cmpset_int(&chain->lockcnt, 433 lockcnt, lockcnt - 1)) { 434 break; 435 } 436 } else if (hammer2_mtx_ex_try(&chain->lock) == 0) { 437 hammer2_chain_unlock(chain); 438 break; 439 } else { 440 /* 441 * This situation can easily occur on SMP due to 442 * the gap inbetween the 1->0 transition and the 443 * final unlock. We cannot safely block on the 444 * mutex because lockcnt might go above 1. 445 * 446 * XXX Sleep for one tick if it takes too long. 447 */ 448 if (++iter > 1000) { 449 if (iter > 1000 + hz) { 450 kprintf("hammer2: h2race1 %p\n", chain); 451 iter = 1000; 452 } 453 tsleep(&iter, 0, "h2race1", 1); 454 } 455 cpu_pause(); 456 } 457 } 458 } 459 460 void 461 hammer2_chain_drop_unhold(hammer2_chain_t *chain) 462 { 463 hammer2_chain_unhold(chain); 464 hammer2_chain_drop(chain); 465 } 466 467 void 468 hammer2_chain_rehold(hammer2_chain_t *chain) 469 { 470 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED); 471 atomic_add_int(&chain->lockcnt, 1); 472 hammer2_chain_unlock(chain); 473 } 474 475 /* 476 * Handles the (potential) last drop of chain->refs from 1->0. Called with 477 * the mutex exclusively locked, refs == 1, and lockcnt 0. SMP races are 478 * possible against refs and lockcnt. We must dispose of the mutex on chain. 479 * 480 * This function returns an unlocked chain for recursive drop or NULL. It 481 * can return the same chain if it determines it has raced another ref. 482 * 483 * -- 484 * 485 * When two chains need to be recursively dropped we use the chain we 486 * would otherwise free to placehold the additional chain. It's a bit 487 * convoluted but we can't just recurse without potentially blowing out 488 * the kernel stack. 489 * 490 * The chain cannot be freed if it has any children. 491 * The chain cannot be freed if flagged MODIFIED unless we can dispose of it. 492 * The chain cannot be freed if flagged UPDATE unless we can dispose of it. 493 * Any dedup registration can remain intact. 494 * 495 * The core spinlock is allowed to nest child-to-parent (not parent-to-child). 496 */ 497 static 498 hammer2_chain_t * 499 hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth) 500 { 501 hammer2_pfs_t *pmp; 502 hammer2_dev_t *hmp; 503 hammer2_chain_t *parent; 504 hammer2_chain_t *rdrop; 505 506 /* 507 * We need chain's spinlock to interlock the sub-tree test. 508 * We already have chain's mutex, protecting chain->parent. 509 * 510 * Remember that chain->refs can be in flux. 511 */ 512 hammer2_spin_ex(&chain->core.spin); 513 514 if (chain->parent != NULL) { 515 /* 516 * If the chain has a parent the UPDATE bit prevents scrapping 517 * as the chain is needed to properly flush the parent. Try 518 * to complete the 1->0 transition and return NULL. Retry 519 * (return chain) if we are unable to complete the 1->0 520 * transition, else return NULL (nothing more to do). 521 * 522 * If the chain has a parent the MODIFIED bit prevents 523 * scrapping. 524 * 525 * Chains with UPDATE/MODIFIED are *not* put on the LRU list! 526 */ 527 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 528 HAMMER2_CHAIN_MODIFIED)) { 529 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 530 hammer2_spin_unex(&chain->core.spin); 531 hammer2_chain_assert_no_data(chain); 532 hammer2_mtx_unlock(&chain->lock); 533 chain = NULL; 534 } else { 535 hammer2_spin_unex(&chain->core.spin); 536 hammer2_mtx_unlock(&chain->lock); 537 } 538 return (chain); 539 } 540 /* spinlock still held */ 541 } else if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME || 542 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP) { 543 /* 544 * Retain the static vchain and fchain. Clear bits that 545 * are not relevant. Do not clear the MODIFIED bit, 546 * and certainly do not put it on the delayed-flush queue. 547 */ 548 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 549 } else { 550 /* 551 * The chain has no parent and can be flagged for destruction. 552 * Since it has no parent, UPDATE can also be cleared. 553 */ 554 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 555 if (chain->flags & HAMMER2_CHAIN_UPDATE) 556 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 557 558 /* 559 * If the chain has children we must propagate the DESTROY 560 * flag downward and rip the disconnected topology apart. 561 * This is accomplished by calling hammer2_flush() on the 562 * chain. 563 * 564 * Any dedup is already handled by the underlying DIO, so 565 * we do not have to specifically flush it here. 566 */ 567 if (chain->core.chain_count) { 568 hammer2_spin_unex(&chain->core.spin); 569 hammer2_flush(chain, HAMMER2_FLUSH_TOP | 570 HAMMER2_FLUSH_ALL); 571 hammer2_mtx_unlock(&chain->lock); 572 573 return(chain); /* retry drop */ 574 } 575 576 /* 577 * Otherwise we can scrap the MODIFIED bit if it is set, 578 * and continue along the freeing path. 579 * 580 * Be sure to clean-out any dedup bits. Without a parent 581 * this chain will no longer be visible to the flush code. 582 * Easy check data_off to avoid the volume root. 583 */ 584 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 585 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 586 atomic_add_long(&hammer2_count_modified_chains, -1); 587 if (chain->pmp) 588 hammer2_pfs_memory_wakeup(chain->pmp, -1); 589 } 590 /* spinlock still held */ 591 } 592 593 /* spinlock still held */ 594 595 /* 596 * If any children exist we must leave the chain intact with refs == 0. 597 * They exist because chains are retained below us which have refs or 598 * may require flushing. 599 * 600 * Retry (return chain) if we fail to transition the refs to 0, else 601 * return NULL indication nothing more to do. 602 * 603 * Chains with children are NOT put on the LRU list. 604 */ 605 if (chain->core.chain_count) { 606 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 607 hammer2_spin_unex(&chain->core.spin); 608 hammer2_chain_assert_no_data(chain); 609 hammer2_mtx_unlock(&chain->lock); 610 chain = NULL; 611 } else { 612 hammer2_spin_unex(&chain->core.spin); 613 hammer2_mtx_unlock(&chain->lock); 614 } 615 return (chain); 616 } 617 /* spinlock still held */ 618 /* no chains left under us */ 619 620 /* 621 * chain->core has no children left so no accessors can get to our 622 * chain from there. Now we have to lock the parent core to interlock 623 * remaining possible accessors that might bump chain's refs before 624 * we can safely drop chain's refs with intent to free the chain. 625 */ 626 hmp = chain->hmp; 627 pmp = chain->pmp; /* can be NULL */ 628 rdrop = NULL; 629 630 parent = chain->parent; 631 632 /* 633 * WARNING! chain's spin lock is still held here, and other spinlocks 634 * will be acquired and released in the code below. We 635 * cannot be making fancy procedure calls! 636 */ 637 638 /* 639 * We can cache the chain if it is associated with a pmp 640 * and not flagged as being destroyed or requesting a full 641 * release. In this situation the chain is not removed 642 * from its parent, i.e. it can still be looked up. 643 * 644 * We intentionally do not cache DATA chains because these 645 * were likely used to load data into the logical buffer cache 646 * and will not be accessed again for some time. 647 */ 648 if ((chain->flags & 649 (HAMMER2_CHAIN_DESTROY | HAMMER2_CHAIN_RELEASE)) == 0 && 650 chain->pmp && 651 chain->bref.type != HAMMER2_BREF_TYPE_DATA) { 652 if (parent) 653 hammer2_spin_ex(&parent->core.spin); 654 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 655 /* 656 * 1->0 transition failed, retry. Do not drop 657 * the chain's data yet! 658 */ 659 if (parent) 660 hammer2_spin_unex(&parent->core.spin); 661 hammer2_spin_unex(&chain->core.spin); 662 hammer2_mtx_unlock(&chain->lock); 663 664 return(chain); 665 } 666 667 /* 668 * Success 669 */ 670 hammer2_chain_assert_no_data(chain); 671 672 /* 673 * Make sure we are on the LRU list, clean up excessive 674 * LRU entries. We can only really drop one but there might 675 * be other entries that we can remove from the lru_list 676 * without dropping. 677 * 678 * NOTE: HAMMER2_CHAIN_ONLRU may only be safely set when 679 * chain->core.spin AND pmp->lru_spin are held, but 680 * can be safely cleared only holding pmp->lru_spin. 681 */ 682 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 683 hammer2_spin_ex(&pmp->lru_spin); 684 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 685 atomic_set_int(&chain->flags, 686 HAMMER2_CHAIN_ONLRU); 687 TAILQ_INSERT_TAIL(&pmp->lru_list, 688 chain, lru_node); 689 atomic_add_int(&pmp->lru_count, 1); 690 } 691 if (pmp->lru_count < HAMMER2_LRU_LIMIT) 692 depth = 1; /* disable lru_list flush */ 693 hammer2_spin_unex(&pmp->lru_spin); 694 } else { 695 /* disable lru flush */ 696 depth = 1; 697 } 698 699 if (parent) { 700 hammer2_spin_unex(&parent->core.spin); 701 parent = NULL; /* safety */ 702 } 703 hammer2_spin_unex(&chain->core.spin); 704 hammer2_mtx_unlock(&chain->lock); 705 706 /* 707 * lru_list hysteresis (see above for depth overrides). 708 * Note that depth also prevents excessive lastdrop recursion. 709 */ 710 if (depth == 0) 711 hammer2_chain_lru_flush(pmp); 712 713 return NULL; 714 /* NOT REACHED */ 715 } 716 717 /* 718 * Make sure we are not on the LRU list. 719 */ 720 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 721 hammer2_spin_ex(&pmp->lru_spin); 722 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 723 atomic_add_int(&pmp->lru_count, -1); 724 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 725 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 726 } 727 hammer2_spin_unex(&pmp->lru_spin); 728 } 729 730 /* 731 * Spinlock the parent and try to drop the last ref on chain. 732 * On success determine if we should dispose of the chain 733 * (remove the chain from its parent, etc). 734 * 735 * (normal core locks are top-down recursive but we define 736 * core spinlocks as bottom-up recursive, so this is safe). 737 */ 738 if (parent) { 739 hammer2_spin_ex(&parent->core.spin); 740 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 741 /* 742 * 1->0 transition failed, retry. 743 */ 744 hammer2_spin_unex(&parent->core.spin); 745 hammer2_spin_unex(&chain->core.spin); 746 hammer2_mtx_unlock(&chain->lock); 747 748 return(chain); 749 } 750 751 /* 752 * 1->0 transition successful, parent spin held to prevent 753 * new lookups, chain spinlock held to protect parent field. 754 * Remove chain from the parent. 755 * 756 * If the chain is being removed from the parent's btree but 757 * is not bmapped, we have to adjust live_count downward. If 758 * it is bmapped then the blockref is retained in the parent 759 * as is its associated live_count. This case can occur when 760 * a chain added to the topology is unable to flush and is 761 * then later deleted. 762 */ 763 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 764 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) && 765 (chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) { 766 atomic_add_int(&parent->core.live_count, -1); 767 } 768 RB_REMOVE(hammer2_chain_tree, 769 &parent->core.rbtree, chain); 770 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 771 --parent->core.chain_count; 772 chain->parent = NULL; 773 } 774 775 /* 776 * If our chain was the last chain in the parent's core the 777 * core is now empty and its parent might have to be 778 * re-dropped if it has 0 refs. 779 */ 780 if (parent->core.chain_count == 0) { 781 rdrop = parent; 782 atomic_add_int(&rdrop->refs, 1); 783 /* 784 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 785 rdrop = NULL; 786 */ 787 } 788 hammer2_spin_unex(&parent->core.spin); 789 parent = NULL; /* safety */ 790 /* FALL THROUGH */ 791 } else { 792 /* 793 * No-parent case. 794 */ 795 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 796 /* 797 * 1->0 transition failed, retry. 798 */ 799 hammer2_spin_unex(&parent->core.spin); 800 hammer2_spin_unex(&chain->core.spin); 801 hammer2_mtx_unlock(&chain->lock); 802 803 return(chain); 804 } 805 } 806 807 /* 808 * Successful 1->0 transition, no parent, no children... no way for 809 * anyone to ref this chain any more. We can clean-up and free it. 810 * 811 * We still have the core spinlock, and core's chain_count is 0. 812 * Any parent spinlock is gone. 813 */ 814 hammer2_spin_unex(&chain->core.spin); 815 hammer2_chain_assert_no_data(chain); 816 hammer2_mtx_unlock(&chain->lock); 817 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 818 chain->core.chain_count == 0); 819 820 /* 821 * All locks are gone, no pointers remain to the chain, finish 822 * freeing it. 823 */ 824 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 825 HAMMER2_CHAIN_MODIFIED)) == 0); 826 827 /* 828 * Once chain resources are gone we can use the now dead chain 829 * structure to placehold what might otherwise require a recursive 830 * drop, because we have potentially two things to drop and can only 831 * return one directly. 832 */ 833 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 834 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED); 835 chain->hmp = NULL; 836 kfree(chain, hmp->mchain); 837 } 838 839 /* 840 * Possible chaining loop when parent re-drop needed. 841 */ 842 return(rdrop); 843 } 844 845 /* 846 * Heuristical flush of the LRU, try to reduce the number of entries 847 * on the LRU to (HAMMER2_LRU_LIMIT * 2 / 3). This procedure is called 848 * only when lru_count exceeds HAMMER2_LRU_LIMIT. 849 */ 850 static 851 void 852 hammer2_chain_lru_flush(hammer2_pfs_t *pmp) 853 { 854 hammer2_chain_t *chain; 855 856 again: 857 chain = NULL; 858 hammer2_spin_ex(&pmp->lru_spin); 859 while (pmp->lru_count > HAMMER2_LRU_LIMIT * 2 / 3) { 860 /* 861 * Pick a chain off the lru_list, just recycle it quickly 862 * if LRUHINT is set (the chain was ref'd but left on 863 * the lru_list, so cycle to the end). 864 */ 865 chain = TAILQ_FIRST(&pmp->lru_list); 866 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 867 868 if (chain->flags & HAMMER2_CHAIN_LRUHINT) { 869 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 870 TAILQ_INSERT_TAIL(&pmp->lru_list, chain, lru_node); 871 chain = NULL; 872 continue; 873 } 874 875 /* 876 * Ok, we are off the LRU. We must adjust refs before we 877 * can safely clear the ONLRU flag. 878 */ 879 atomic_add_int(&pmp->lru_count, -1); 880 if (atomic_cmpset_int(&chain->refs, 0, 1)) { 881 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 882 atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE); 883 break; 884 } 885 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 886 chain = NULL; 887 } 888 hammer2_spin_unex(&pmp->lru_spin); 889 if (chain == NULL) 890 return; 891 892 /* 893 * If we picked a chain off the lru list we may be able to lastdrop 894 * it. Use a depth of 1 to prevent excessive lastdrop recursion. 895 */ 896 while (chain) { 897 u_int refs; 898 899 refs = chain->refs; 900 cpu_ccfence(); 901 KKASSERT(refs > 0); 902 903 if (refs == 1) { 904 if (hammer2_mtx_ex_try(&chain->lock) == 0) 905 chain = hammer2_chain_lastdrop(chain, 1); 906 /* retry the same chain, or chain from lastdrop */ 907 } else { 908 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 909 break; 910 /* retry the same chain */ 911 } 912 cpu_pause(); 913 } 914 goto again; 915 } 916 917 /* 918 * On last lock release. 919 */ 920 static hammer2_io_t * 921 hammer2_chain_drop_data(hammer2_chain_t *chain) 922 { 923 hammer2_io_t *dio; 924 925 if ((dio = chain->dio) != NULL) { 926 chain->dio = NULL; 927 chain->data = NULL; 928 } else { 929 switch(chain->bref.type) { 930 case HAMMER2_BREF_TYPE_VOLUME: 931 case HAMMER2_BREF_TYPE_FREEMAP: 932 break; 933 default: 934 if (chain->data != NULL) { 935 hammer2_spin_unex(&chain->core.spin); 936 panic("chain data not null: " 937 "chain %p bref %016jx.%02x " 938 "refs %d parent %p dio %p data %p", 939 chain, chain->bref.data_off, 940 chain->bref.type, chain->refs, 941 chain->parent, 942 chain->dio, chain->data); 943 } 944 KKASSERT(chain->data == NULL); 945 break; 946 } 947 } 948 return dio; 949 } 950 951 /* 952 * Lock a referenced chain element, acquiring its data with I/O if necessary, 953 * and specify how you would like the data to be resolved. 954 * 955 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 956 * 957 * The lock is allowed to recurse, multiple locking ops will aggregate 958 * the requested resolve types. Once data is assigned it will not be 959 * removed until the last unlock. 960 * 961 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 962 * (typically used to avoid device/logical buffer 963 * aliasing for data) 964 * 965 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 966 * the INITIAL-create state (indirect blocks only). 967 * 968 * Do not resolve data elements for DATA chains. 969 * (typically used to avoid device/logical buffer 970 * aliasing for data) 971 * 972 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 973 * 974 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 975 * it will be locked exclusive. 976 * 977 * HAMMER2_RESOLVE_NONBLOCK- (flag) The chain is locked non-blocking. If 978 * the lock fails, EAGAIN is returned. 979 * 980 * NOTE: Embedded elements (volume header, inodes) are always resolved 981 * regardless. 982 * 983 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 984 * element will instantiate and zero its buffer, and flush it on 985 * release. 986 * 987 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 988 * so as not to instantiate a device buffer, which could alias against 989 * a logical file buffer. However, if ALWAYS is specified the 990 * device buffer will be instantiated anyway. 991 * 992 * NOTE: The return value is always 0 unless NONBLOCK is specified, in which 993 * case it can be either 0 or EAGAIN. 994 * 995 * WARNING! This function blocks on I/O if data needs to be fetched. This 996 * blocking can run concurrent with other compatible lock holders 997 * who do not need data returning. The lock is not upgraded to 998 * exclusive during a data fetch, a separate bit is used to 999 * interlock I/O. However, an exclusive lock holder can still count 1000 * on being interlocked against an I/O fetch managed by a shared 1001 * lock holder. 1002 */ 1003 int 1004 hammer2_chain_lock(hammer2_chain_t *chain, int how) 1005 { 1006 KKASSERT(chain->refs > 0); 1007 1008 if (how & HAMMER2_RESOLVE_NONBLOCK) { 1009 /* 1010 * We still have to bump lockcnt before acquiring the lock, 1011 * even for non-blocking operation, because the unlock code 1012 * live-loops on lockcnt == 1 when dropping the last lock. 1013 * 1014 * If the non-blocking operation fails we have to use an 1015 * unhold sequence to undo the mess. 1016 * 1017 * NOTE: LOCKAGAIN must always succeed without blocking, 1018 * even if NONBLOCK is specified. 1019 */ 1020 atomic_add_int(&chain->lockcnt, 1); 1021 if (how & HAMMER2_RESOLVE_SHARED) { 1022 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1023 hammer2_mtx_sh_again(&chain->lock); 1024 } else { 1025 if (hammer2_mtx_sh_try(&chain->lock) != 0) { 1026 hammer2_chain_unhold(chain); 1027 return EAGAIN; 1028 } 1029 } 1030 } else { 1031 if (hammer2_mtx_ex_try(&chain->lock) != 0) { 1032 hammer2_chain_unhold(chain); 1033 return EAGAIN; 1034 } 1035 } 1036 } else { 1037 /* 1038 * Get the appropriate lock. If LOCKAGAIN is flagged with 1039 * SHARED the caller expects a shared lock to already be 1040 * present and we are giving it another ref. This case must 1041 * importantly not block if there is a pending exclusive lock 1042 * request. 1043 */ 1044 atomic_add_int(&chain->lockcnt, 1); 1045 if (how & HAMMER2_RESOLVE_SHARED) { 1046 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1047 hammer2_mtx_sh_again(&chain->lock); 1048 } else { 1049 hammer2_mtx_sh(&chain->lock); 1050 } 1051 } else { 1052 hammer2_mtx_ex(&chain->lock); 1053 } 1054 } 1055 1056 /* 1057 * If we already have a valid data pointer make sure the data is 1058 * synchronized to the current cpu, and then no further action is 1059 * necessary. 1060 */ 1061 if (chain->data) { 1062 if (chain->dio) 1063 hammer2_io_bkvasync(chain->dio); 1064 return 0; 1065 } 1066 1067 /* 1068 * Do we have to resolve the data? This is generally only 1069 * applicable to HAMMER2_BREF_TYPE_DATA which is special-cased. 1070 * Other BREF types expects the data to be there. 1071 */ 1072 switch(how & HAMMER2_RESOLVE_MASK) { 1073 case HAMMER2_RESOLVE_NEVER: 1074 return 0; 1075 case HAMMER2_RESOLVE_MAYBE: 1076 if (chain->flags & HAMMER2_CHAIN_INITIAL) 1077 return 0; 1078 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 1079 return 0; 1080 #if 0 1081 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 1082 return 0; 1083 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 1084 return 0; 1085 #endif 1086 /* fall through */ 1087 case HAMMER2_RESOLVE_ALWAYS: 1088 default: 1089 break; 1090 } 1091 1092 /* 1093 * Caller requires data 1094 */ 1095 hammer2_chain_load_data(chain); 1096 1097 return 0; 1098 } 1099 1100 /* 1101 * Lock the chain, retain the hold, and drop the data persistence count. 1102 * The data should remain valid because we never transitioned lockcnt 1103 * through 0. 1104 */ 1105 void 1106 hammer2_chain_lock_unhold(hammer2_chain_t *chain, int how) 1107 { 1108 hammer2_chain_lock(chain, how); 1109 atomic_add_int(&chain->lockcnt, -1); 1110 } 1111 1112 #if 0 1113 /* 1114 * Downgrade an exclusive chain lock to a shared chain lock. 1115 * 1116 * NOTE: There is no upgrade equivalent due to the ease of 1117 * deadlocks in that direction. 1118 */ 1119 void 1120 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 1121 { 1122 hammer2_mtx_downgrade(&chain->lock); 1123 } 1124 #endif 1125 1126 /* 1127 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 1128 * may be of any type. 1129 * 1130 * Once chain->data is set it cannot be disposed of until all locks are 1131 * released. 1132 * 1133 * Make sure the data is synchronized to the current cpu. 1134 */ 1135 void 1136 hammer2_chain_load_data(hammer2_chain_t *chain) 1137 { 1138 hammer2_blockref_t *bref; 1139 hammer2_dev_t *hmp; 1140 hammer2_io_t *dio; 1141 char *bdata; 1142 int error; 1143 1144 /* 1145 * Degenerate case, data already present, or chain has no media 1146 * reference to load. 1147 */ 1148 KKASSERT(chain->lock.mtx_lock & MTX_MASK); 1149 if (chain->data) { 1150 if (chain->dio) 1151 hammer2_io_bkvasync(chain->dio); 1152 return; 1153 } 1154 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 1155 return; 1156 1157 hmp = chain->hmp; 1158 KKASSERT(hmp != NULL); 1159 1160 /* 1161 * Gain the IOINPROG bit, interlocked block. 1162 */ 1163 for (;;) { 1164 u_int oflags; 1165 u_int nflags; 1166 1167 oflags = chain->flags; 1168 cpu_ccfence(); 1169 if (oflags & HAMMER2_CHAIN_IOINPROG) { 1170 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 1171 tsleep_interlock(&chain->flags, 0); 1172 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1173 tsleep(&chain->flags, PINTERLOCKED, 1174 "h2iocw", 0); 1175 } 1176 /* retry */ 1177 } else { 1178 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 1179 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1180 break; 1181 } 1182 /* retry */ 1183 } 1184 } 1185 1186 /* 1187 * We own CHAIN_IOINPROG 1188 * 1189 * Degenerate case if we raced another load. 1190 */ 1191 if (chain->data) { 1192 if (chain->dio) 1193 hammer2_io_bkvasync(chain->dio); 1194 goto done; 1195 } 1196 1197 /* 1198 * We must resolve to a device buffer, either by issuing I/O or 1199 * by creating a zero-fill element. We do not mark the buffer 1200 * dirty when creating a zero-fill element (the hammer2_chain_modify() 1201 * API must still be used to do that). 1202 * 1203 * The device buffer is variable-sized in powers of 2 down 1204 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 1205 * chunk always contains buffers of the same size. (XXX) 1206 * 1207 * The minimum physical IO size may be larger than the variable 1208 * block size. 1209 */ 1210 bref = &chain->bref; 1211 1212 /* 1213 * The getblk() optimization can only be used on newly created 1214 * elements if the physical block size matches the request. 1215 */ 1216 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1217 error = hammer2_io_new(hmp, bref->type, 1218 bref->data_off, chain->bytes, 1219 &chain->dio); 1220 } else { 1221 error = hammer2_io_bread(hmp, bref->type, 1222 bref->data_off, chain->bytes, 1223 &chain->dio); 1224 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 1225 } 1226 if (error) { 1227 chain->error = HAMMER2_ERROR_EIO; 1228 kprintf("hammer2_chain_load_data: I/O error %016jx: %d\n", 1229 (intmax_t)bref->data_off, error); 1230 hammer2_io_bqrelse(&chain->dio); 1231 goto done; 1232 } 1233 chain->error = 0; 1234 1235 /* 1236 * This isn't perfect and can be ignored on OSs which do not have 1237 * an indication as to whether a buffer is coming from cache or 1238 * if I/O was actually issued for the read. TESTEDGOOD will work 1239 * pretty well without the B_IOISSUED logic because chains are 1240 * cached, but in that situation (without B_IOISSUED) it will not 1241 * detect whether a re-read via I/O is corrupted verses the original 1242 * read. 1243 * 1244 * We can't re-run the CRC on every fresh lock. That would be 1245 * insanely expensive. 1246 * 1247 * If the underlying kernel buffer covers the entire chain we can 1248 * use the B_IOISSUED indication to determine if we have to re-run 1249 * the CRC on chain data for chains that managed to stay cached 1250 * across the kernel disposal of the original buffer. 1251 */ 1252 if ((dio = chain->dio) != NULL && dio->bp) { 1253 struct buf *bp = dio->bp; 1254 1255 if (dio->psize == chain->bytes && 1256 (bp->b_flags & B_IOISSUED)) { 1257 atomic_clear_int(&chain->flags, 1258 HAMMER2_CHAIN_TESTEDGOOD); 1259 bp->b_flags &= ~B_IOISSUED; 1260 } 1261 } 1262 1263 /* 1264 * NOTE: A locked chain's data cannot be modified without first 1265 * calling hammer2_chain_modify(). 1266 */ 1267 1268 /* 1269 * NOTE: hammer2_io_data() call issues bkvasync() 1270 */ 1271 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 1272 1273 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1274 /* 1275 * Clear INITIAL. In this case we used io_new() and the 1276 * buffer has been zero'd and marked dirty. 1277 * 1278 * CHAIN_MODIFIED has not been set yet, and we leave it 1279 * that way for now. Set a temporary CHAIN_NOTTESTED flag 1280 * to prevent hammer2_chain_testcheck() from trying to match 1281 * a check code that has not yet been generated. This bit 1282 * should NOT end up on the actual media. 1283 */ 1284 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1285 atomic_set_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 1286 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 1287 /* 1288 * check data not currently synchronized due to 1289 * modification. XXX assumes data stays in the buffer 1290 * cache, which might not be true (need biodep on flush 1291 * to calculate crc? or simple crc?). 1292 */ 1293 } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) { 1294 if (hammer2_chain_testcheck(chain, bdata) == 0) { 1295 chain->error = HAMMER2_ERROR_CHECK; 1296 } else { 1297 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 1298 } 1299 } 1300 1301 /* 1302 * Setup the data pointer, either pointing it to an embedded data 1303 * structure and copying the data from the buffer, or pointing it 1304 * into the buffer. 1305 * 1306 * The buffer is not retained when copying to an embedded data 1307 * structure in order to avoid potential deadlocks or recursions 1308 * on the same physical buffer. 1309 * 1310 * WARNING! Other threads can start using the data the instant we 1311 * set chain->data non-NULL. 1312 */ 1313 switch (bref->type) { 1314 case HAMMER2_BREF_TYPE_VOLUME: 1315 case HAMMER2_BREF_TYPE_FREEMAP: 1316 /* 1317 * Copy data from bp to embedded buffer 1318 */ 1319 panic("hammer2_chain_load_data: unresolved volume header"); 1320 break; 1321 case HAMMER2_BREF_TYPE_DIRENT: 1322 KKASSERT(chain->bytes != 0); 1323 /* fall through */ 1324 case HAMMER2_BREF_TYPE_INODE: 1325 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1326 case HAMMER2_BREF_TYPE_INDIRECT: 1327 case HAMMER2_BREF_TYPE_DATA: 1328 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1329 default: 1330 /* 1331 * Point data at the device buffer and leave dio intact. 1332 */ 1333 chain->data = (void *)bdata; 1334 break; 1335 } 1336 1337 /* 1338 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 1339 */ 1340 done: 1341 for (;;) { 1342 u_int oflags; 1343 u_int nflags; 1344 1345 oflags = chain->flags; 1346 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 1347 HAMMER2_CHAIN_IOSIGNAL); 1348 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 1349 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1350 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 1351 wakeup(&chain->flags); 1352 break; 1353 } 1354 } 1355 } 1356 1357 /* 1358 * Unlock and deref a chain element. 1359 * 1360 * Remember that the presence of children under chain prevent the chain's 1361 * destruction but do not add additional references, so the dio will still 1362 * be dropped. 1363 */ 1364 void 1365 hammer2_chain_unlock(hammer2_chain_t *chain) 1366 { 1367 hammer2_io_t *dio; 1368 u_int lockcnt; 1369 int iter = 0; 1370 1371 /* 1372 * If multiple locks are present (or being attempted) on this 1373 * particular chain we can just unlock, drop refs, and return. 1374 * 1375 * Otherwise fall-through on the 1->0 transition. 1376 */ 1377 for (;;) { 1378 lockcnt = chain->lockcnt; 1379 KKASSERT(lockcnt > 0); 1380 cpu_ccfence(); 1381 if (lockcnt > 1) { 1382 if (atomic_cmpset_int(&chain->lockcnt, 1383 lockcnt, lockcnt - 1)) { 1384 hammer2_mtx_unlock(&chain->lock); 1385 return; 1386 } 1387 } else if (hammer2_mtx_upgrade_try(&chain->lock) == 0) { 1388 /* while holding the mutex exclusively */ 1389 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1390 break; 1391 } else { 1392 /* 1393 * This situation can easily occur on SMP due to 1394 * the gap inbetween the 1->0 transition and the 1395 * final unlock. We cannot safely block on the 1396 * mutex because lockcnt might go above 1. 1397 * 1398 * XXX Sleep for one tick if it takes too long. 1399 */ 1400 if (++iter > 1000) { 1401 if (iter > 1000 + hz) { 1402 kprintf("hammer2: h2race2 %p\n", chain); 1403 iter = 1000; 1404 } 1405 tsleep(&iter, 0, "h2race2", 1); 1406 } 1407 cpu_pause(); 1408 } 1409 /* retry */ 1410 } 1411 1412 /* 1413 * Last unlock / mutex upgraded to exclusive. Drop the data 1414 * reference. 1415 */ 1416 dio = hammer2_chain_drop_data(chain); 1417 if (dio) 1418 hammer2_io_bqrelse(&dio); 1419 hammer2_mtx_unlock(&chain->lock); 1420 } 1421 1422 /* 1423 * Unlock and hold chain data intact 1424 */ 1425 void 1426 hammer2_chain_unlock_hold(hammer2_chain_t *chain) 1427 { 1428 atomic_add_int(&chain->lockcnt, 1); 1429 hammer2_chain_unlock(chain); 1430 } 1431 1432 /* 1433 * Helper to obtain the blockref[] array base and count for a chain. 1434 * 1435 * XXX Not widely used yet, various use cases need to be validated and 1436 * converted to use this function. 1437 */ 1438 static 1439 hammer2_blockref_t * 1440 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1441 { 1442 hammer2_blockref_t *base; 1443 int count; 1444 1445 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1446 base = NULL; 1447 1448 switch(parent->bref.type) { 1449 case HAMMER2_BREF_TYPE_INODE: 1450 count = HAMMER2_SET_COUNT; 1451 break; 1452 case HAMMER2_BREF_TYPE_INDIRECT: 1453 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1454 count = parent->bytes / sizeof(hammer2_blockref_t); 1455 break; 1456 case HAMMER2_BREF_TYPE_VOLUME: 1457 count = HAMMER2_SET_COUNT; 1458 break; 1459 case HAMMER2_BREF_TYPE_FREEMAP: 1460 count = HAMMER2_SET_COUNT; 1461 break; 1462 default: 1463 panic("hammer2_chain_base_and_count: " 1464 "unrecognized blockref type: %d", 1465 parent->bref.type); 1466 count = 0; 1467 break; 1468 } 1469 } else { 1470 switch(parent->bref.type) { 1471 case HAMMER2_BREF_TYPE_INODE: 1472 base = &parent->data->ipdata.u.blockset.blockref[0]; 1473 count = HAMMER2_SET_COUNT; 1474 break; 1475 case HAMMER2_BREF_TYPE_INDIRECT: 1476 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1477 base = &parent->data->npdata[0]; 1478 count = parent->bytes / sizeof(hammer2_blockref_t); 1479 break; 1480 case HAMMER2_BREF_TYPE_VOLUME: 1481 base = &parent->data->voldata. 1482 sroot_blockset.blockref[0]; 1483 count = HAMMER2_SET_COUNT; 1484 break; 1485 case HAMMER2_BREF_TYPE_FREEMAP: 1486 base = &parent->data->blkset.blockref[0]; 1487 count = HAMMER2_SET_COUNT; 1488 break; 1489 default: 1490 panic("hammer2_chain_base_and_count: " 1491 "unrecognized blockref type: %d", 1492 parent->bref.type); 1493 count = 0; 1494 break; 1495 } 1496 } 1497 *countp = count; 1498 1499 return base; 1500 } 1501 1502 /* 1503 * This counts the number of live blockrefs in a block array and 1504 * also calculates the point at which all remaining blockrefs are empty. 1505 * This routine can only be called on a live chain. 1506 * 1507 * Caller holds the chain locked, but possibly with a shared lock. We 1508 * must use an exclusive spinlock to prevent corruption. 1509 * 1510 * NOTE: Flag is not set until after the count is complete, allowing 1511 * callers to test the flag without holding the spinlock. 1512 * 1513 * NOTE: If base is NULL the related chain is still in the INITIAL 1514 * state and there are no blockrefs to count. 1515 * 1516 * NOTE: live_count may already have some counts accumulated due to 1517 * creation and deletion and could even be initially negative. 1518 */ 1519 void 1520 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1521 hammer2_blockref_t *base, int count) 1522 { 1523 hammer2_spin_ex(&chain->core.spin); 1524 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1525 if (base) { 1526 while (--count >= 0) { 1527 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1528 break; 1529 } 1530 chain->core.live_zero = count + 1; 1531 while (count >= 0) { 1532 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1533 atomic_add_int(&chain->core.live_count, 1534 1); 1535 --count; 1536 } 1537 } else { 1538 chain->core.live_zero = 0; 1539 } 1540 /* else do not modify live_count */ 1541 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1542 } 1543 hammer2_spin_unex(&chain->core.spin); 1544 } 1545 1546 /* 1547 * Resize the chain's physical storage allocation in-place. This function does 1548 * not usually adjust the data pointer and must be followed by (typically) a 1549 * hammer2_chain_modify() call to copy any old data over and adjust the 1550 * data pointer. 1551 * 1552 * Chains can be resized smaller without reallocating the storage. Resizing 1553 * larger will reallocate the storage. Excess or prior storage is reclaimed 1554 * asynchronously at a later time. 1555 * 1556 * An nradix value of 0 is special-cased to mean that the storage should 1557 * be disassociated, that is the chain is being resized to 0 bytes (not 1 1558 * byte). 1559 * 1560 * Must be passed an exclusively locked parent and chain. 1561 * 1562 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1563 * to avoid instantiating a device buffer that conflicts with the vnode data 1564 * buffer. However, because H2 can compress or encrypt data, the chain may 1565 * have a dio assigned to it in those situations, and they do not conflict. 1566 * 1567 * XXX return error if cannot resize. 1568 */ 1569 int 1570 hammer2_chain_resize(hammer2_chain_t *chain, 1571 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1572 int nradix, int flags) 1573 { 1574 hammer2_dev_t *hmp; 1575 size_t obytes; 1576 size_t nbytes; 1577 int error; 1578 1579 hmp = chain->hmp; 1580 1581 /* 1582 * Only data and indirect blocks can be resized for now. 1583 * (The volu root, inodes, and freemap elements use a fixed size). 1584 */ 1585 KKASSERT(chain != &hmp->vchain); 1586 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1587 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1588 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1589 1590 /* 1591 * Nothing to do if the element is already the proper size 1592 */ 1593 obytes = chain->bytes; 1594 nbytes = (nradix) ? (1U << nradix) : 0; 1595 if (obytes == nbytes) 1596 return (chain->error); 1597 1598 /* 1599 * Make sure the old data is instantiated so we can copy it. If this 1600 * is a data block, the device data may be superfluous since the data 1601 * might be in a logical block, but compressed or encrypted data is 1602 * another matter. 1603 * 1604 * NOTE: The modify will set BMAPUPD for us if BMAPPED is set. 1605 */ 1606 error = hammer2_chain_modify(chain, mtid, dedup_off, 0); 1607 if (error) 1608 return error; 1609 1610 /* 1611 * Reallocate the block, even if making it smaller (because different 1612 * block sizes may be in different regions). 1613 * 1614 * NOTE: Operation does not copy the data and may only be used 1615 * to resize data blocks in-place, or directory entry blocks 1616 * which are about to be modified in some manner. 1617 */ 1618 error = hammer2_freemap_alloc(chain, nbytes); 1619 if (error) 1620 return error; 1621 1622 chain->bytes = nbytes; 1623 1624 /* 1625 * We don't want the followup chain_modify() to try to copy data 1626 * from the old (wrong-sized) buffer. It won't know how much to 1627 * copy. This case should only occur during writes when the 1628 * originator already has the data to write in-hand. 1629 */ 1630 if (chain->dio) { 1631 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1632 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1633 hammer2_io_brelse(&chain->dio); 1634 chain->data = NULL; 1635 } 1636 return (chain->error); 1637 } 1638 1639 /* 1640 * Set the chain modified so its data can be changed by the caller, or 1641 * install deduplicated data. The caller must call this routine for each 1642 * set of modifications it makes, even if the chain is already flagged 1643 * MODIFIED. 1644 * 1645 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1646 * is a CLC (cluster level change) field and is not updated by parent 1647 * propagation during a flush. 1648 * 1649 * Returns an appropriate HAMMER2_ERROR_* code, which will generally reflect 1650 * chain->error except for HAMMER2_ERROR_ENOSPC. If the allocation fails 1651 * due to no space available, HAMMER2_ERROR_ENOSPC is returned and the chain 1652 * remains unmodified with its old data ref intact and chain->error 1653 * unchanged. 1654 * 1655 * Dedup Handling 1656 * 1657 * If the DEDUPABLE flag is set in the chain the storage must be reallocated 1658 * even if the chain is still flagged MODIFIED. In this case the chain's 1659 * DEDUPABLE flag will be cleared once the new storage has been assigned. 1660 * 1661 * If the caller passes a non-zero dedup_off we will use it to assign the 1662 * new storage. The MODIFIED flag will be *CLEARED* in this case, and 1663 * DEDUPABLE will be set (NOTE: the UPDATE flag is always set). The caller 1664 * must not modify the data content upon return. 1665 */ 1666 int 1667 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1668 hammer2_off_t dedup_off, int flags) 1669 { 1670 hammer2_blockref_t obref; 1671 hammer2_dev_t *hmp; 1672 hammer2_io_t *dio; 1673 int error; 1674 int wasinitial; 1675 int setmodified; 1676 int setupdate; 1677 int newmod; 1678 char *bdata; 1679 1680 hmp = chain->hmp; 1681 obref = chain->bref; 1682 KKASSERT(chain->lock.mtx_lock & MTX_EXCLUSIVE); 1683 1684 /* 1685 * Data is not optional for freemap chains (we must always be sure 1686 * to copy the data on COW storage allocations). 1687 */ 1688 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1689 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1690 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1691 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1692 } 1693 1694 /* 1695 * Data must be resolved if already assigned, unless explicitly 1696 * flagged otherwise. If we cannot safety load the data the 1697 * modification fails and we return early. 1698 */ 1699 if (chain->data == NULL && chain->bytes != 0 && 1700 (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1701 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1702 hammer2_chain_load_data(chain); 1703 if (chain->error) 1704 return (chain->error); 1705 } 1706 error = 0; 1707 1708 /* 1709 * Set MODIFIED to indicate that the chain has been modified. A new 1710 * allocation is required when modifying a chain. 1711 * 1712 * Set UPDATE to ensure that the blockref is updated in the parent. 1713 * 1714 * If MODIFIED is already set determine if we can reuse the assigned 1715 * data block or if we need a new data block. 1716 */ 1717 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1718 /* 1719 * Must set modified bit. 1720 */ 1721 atomic_add_long(&hammer2_count_modified_chains, 1); 1722 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1723 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1724 setmodified = 1; 1725 1726 /* 1727 * We may be able to avoid a copy-on-write if the chain's 1728 * check mode is set to NONE and the chain's current 1729 * modify_tid is beyond the last explicit snapshot tid. 1730 * 1731 * This implements HAMMER2's overwrite-in-place feature. 1732 * 1733 * NOTE! This data-block cannot be used as a de-duplication 1734 * source when the check mode is set to NONE. 1735 */ 1736 if ((chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1737 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) && 1738 (chain->flags & HAMMER2_CHAIN_INITIAL) == 0 && 1739 (chain->flags & HAMMER2_CHAIN_DEDUPABLE) == 0 && 1740 HAMMER2_DEC_CHECK(chain->bref.methods) == 1741 HAMMER2_CHECK_NONE && 1742 chain->pmp && 1743 chain->bref.modify_tid > 1744 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1745 /* 1746 * Sector overwrite allowed. 1747 */ 1748 newmod = 0; 1749 } else if ((hmp->hflags & HMNT2_EMERG) && 1750 chain->pmp && 1751 chain->bref.modify_tid > 1752 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1753 /* 1754 * If in emergency delete mode then do a modify-in- 1755 * place on any chain type belonging to the PFS as 1756 * long as it doesn't mess up a snapshot. We might 1757 * be forced to do this anyway a little further down 1758 * in the code if the allocation fails. 1759 * 1760 * Also note that in emergency mode, these modify-in- 1761 * place operations are NOT SAFE. A storage failure, 1762 * power failure, or panic can corrupt the filesystem. 1763 */ 1764 newmod = 0; 1765 } else { 1766 /* 1767 * Sector overwrite not allowed, must copy-on-write. 1768 */ 1769 newmod = 1; 1770 } 1771 } else if (chain->flags & HAMMER2_CHAIN_DEDUPABLE) { 1772 /* 1773 * If the modified chain was registered for dedup we need 1774 * a new allocation. This only happens for delayed-flush 1775 * chains (i.e. which run through the front-end buffer 1776 * cache). 1777 */ 1778 newmod = 1; 1779 setmodified = 0; 1780 } else { 1781 /* 1782 * Already flagged modified, no new allocation is needed. 1783 */ 1784 newmod = 0; 1785 setmodified = 0; 1786 } 1787 1788 /* 1789 * Flag parent update required. 1790 */ 1791 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 1792 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1793 setupdate = 1; 1794 } else { 1795 setupdate = 0; 1796 } 1797 1798 /* 1799 * The XOP code returns held but unlocked focus chains. This 1800 * prevents the chain from being destroyed but does not prevent 1801 * it from being modified. diolk is used to interlock modifications 1802 * against XOP frontend accesses to the focus. 1803 * 1804 * This allows us to theoretically avoid deadlocking the frontend 1805 * if one of the backends lock up by not formally locking the 1806 * focused chain in the frontend. In addition, the synchronization 1807 * code relies on this mechanism to avoid deadlocking concurrent 1808 * synchronization threads. 1809 */ 1810 lockmgr(&chain->diolk, LK_EXCLUSIVE); 1811 1812 /* 1813 * The modification or re-modification requires an allocation and 1814 * possible COW. If an error occurs, the previous content and data 1815 * reference is retained and the modification fails. 1816 * 1817 * If dedup_off is non-zero, the caller is requesting a deduplication 1818 * rather than a modification. The MODIFIED bit is not set and the 1819 * data offset is set to the deduplication offset. The data cannot 1820 * be modified. 1821 * 1822 * NOTE: The dedup offset is allowed to be in a partially free state 1823 * and we must be sure to reset it to a fully allocated state 1824 * to force two bulkfree passes to free it again. 1825 * 1826 * NOTE: Only applicable when chain->bytes != 0. 1827 * 1828 * XXX can a chain already be marked MODIFIED without a data 1829 * assignment? If not, assert here instead of testing the case. 1830 */ 1831 if (chain != &hmp->vchain && chain != &hmp->fchain && 1832 chain->bytes) { 1833 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1834 newmod 1835 ) { 1836 /* 1837 * NOTE: We do not have to remove the dedup 1838 * registration because the area is still 1839 * allocated and the underlying DIO will 1840 * still be flushed. 1841 */ 1842 if (dedup_off) { 1843 chain->bref.data_off = dedup_off; 1844 chain->bytes = 1 << (dedup_off & 1845 HAMMER2_OFF_MASK_RADIX); 1846 chain->error = 0; 1847 atomic_clear_int(&chain->flags, 1848 HAMMER2_CHAIN_MODIFIED); 1849 atomic_add_long(&hammer2_count_modified_chains, 1850 -1); 1851 if (chain->pmp) { 1852 hammer2_pfs_memory_wakeup( 1853 chain->pmp, -1); 1854 } 1855 hammer2_freemap_adjust(hmp, &chain->bref, 1856 HAMMER2_FREEMAP_DORECOVER); 1857 atomic_set_int(&chain->flags, 1858 HAMMER2_CHAIN_DEDUPABLE); 1859 } else { 1860 error = hammer2_freemap_alloc(chain, 1861 chain->bytes); 1862 atomic_clear_int(&chain->flags, 1863 HAMMER2_CHAIN_DEDUPABLE); 1864 1865 /* 1866 * If we are unable to allocate a new block 1867 * but we are in emergency mode, issue a 1868 * warning to the console and reuse the same 1869 * block. 1870 * 1871 * We behave as if the allocation were 1872 * successful. 1873 * 1874 * THIS IS IMPORTANT: These modifications 1875 * are virtually guaranteed to corrupt any 1876 * snapshots related to this filesystem. 1877 */ 1878 if (error && (hmp->hflags & HMNT2_EMERG)) { 1879 error = 0; 1880 chain->bref.flags |= 1881 HAMMER2_BREF_FLAG_EMERG_MIP; 1882 1883 krateprintf(&krate_h2em, 1884 "hammer2: Emergency Mode WARNING: " 1885 "Operation will likely corrupt " 1886 "related snapshot: " 1887 "%016jx.%02x key=%016jx\n", 1888 chain->bref.data_off, 1889 chain->bref.type, 1890 chain->bref.key); 1891 } else if (error == 0) { 1892 chain->bref.flags &= 1893 ~HAMMER2_BREF_FLAG_EMERG_MIP; 1894 } 1895 } 1896 } 1897 } 1898 1899 /* 1900 * Stop here if error. We have to undo any flag bits we might 1901 * have set above. 1902 */ 1903 if (error) { 1904 if (setmodified) { 1905 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1906 atomic_add_long(&hammer2_count_modified_chains, -1); 1907 if (chain->pmp) 1908 hammer2_pfs_memory_wakeup(chain->pmp, -1); 1909 } 1910 if (setupdate) { 1911 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1912 } 1913 lockmgr(&chain->diolk, LK_RELEASE); 1914 1915 return error; 1916 } 1917 1918 /* 1919 * Update mirror_tid and modify_tid. modify_tid is only updated 1920 * if not passed as zero (during flushes, parent propagation passes 1921 * the value 0). 1922 * 1923 * NOTE: chain->pmp could be the device spmp. 1924 */ 1925 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1926 if (mtid) 1927 chain->bref.modify_tid = mtid; 1928 1929 /* 1930 * Set BMAPUPD to tell the flush code that an existing blockmap entry 1931 * requires updating as well as to tell the delete code that the 1932 * chain's blockref might not exactly match (in terms of physical size 1933 * or block offset) the one in the parent's blocktable. The base key 1934 * of course will still match. 1935 */ 1936 if (chain->flags & HAMMER2_CHAIN_BMAPPED) 1937 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD); 1938 1939 /* 1940 * Short-cut data block handling when the caller does not need an 1941 * actual data reference to (aka OPTDATA), as long as the chain does 1942 * not already have a data pointer to the data and no de-duplication 1943 * occurred. 1944 * 1945 * This generally means that the modifications are being done via the 1946 * logical buffer cache. 1947 * 1948 * NOTE: If deduplication occurred we have to run through the data 1949 * stuff to clear INITIAL, and the caller will likely want to 1950 * assign the check code anyway. Leaving INITIAL set on a 1951 * dedup can be deadly (it can cause the block to be zero'd!). 1952 * 1953 * This code also handles bytes == 0 (most dirents). 1954 */ 1955 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1956 (flags & HAMMER2_MODIFY_OPTDATA) && 1957 chain->data == NULL) { 1958 if (dedup_off == 0) { 1959 KKASSERT(chain->dio == NULL); 1960 goto skip2; 1961 } 1962 } 1963 1964 /* 1965 * Clearing the INITIAL flag (for indirect blocks) indicates that 1966 * we've processed the uninitialized storage allocation. 1967 * 1968 * If this flag is already clear we are likely in a copy-on-write 1969 * situation but we have to be sure NOT to bzero the storage if 1970 * no data is present. 1971 * 1972 * Clearing of NOTTESTED is allowed if the MODIFIED bit is set, 1973 */ 1974 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1975 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1976 wasinitial = 1; 1977 } else { 1978 wasinitial = 0; 1979 } 1980 1981 /* 1982 * Instantiate data buffer and possibly execute COW operation 1983 */ 1984 switch(chain->bref.type) { 1985 case HAMMER2_BREF_TYPE_VOLUME: 1986 case HAMMER2_BREF_TYPE_FREEMAP: 1987 /* 1988 * The data is embedded, no copy-on-write operation is 1989 * needed. 1990 */ 1991 KKASSERT(chain->dio == NULL); 1992 break; 1993 case HAMMER2_BREF_TYPE_DIRENT: 1994 /* 1995 * The data might be fully embedded. 1996 */ 1997 if (chain->bytes == 0) { 1998 KKASSERT(chain->dio == NULL); 1999 break; 2000 } 2001 /* fall through */ 2002 case HAMMER2_BREF_TYPE_INODE: 2003 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2004 case HAMMER2_BREF_TYPE_DATA: 2005 case HAMMER2_BREF_TYPE_INDIRECT: 2006 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2007 /* 2008 * Perform the copy-on-write operation 2009 * 2010 * zero-fill or copy-on-write depending on whether 2011 * chain->data exists or not and set the dirty state for 2012 * the new buffer. hammer2_io_new() will handle the 2013 * zero-fill. 2014 * 2015 * If a dedup_off was supplied this is an existing block 2016 * and no COW, copy, or further modification is required. 2017 */ 2018 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 2019 2020 if (wasinitial && dedup_off == 0) { 2021 error = hammer2_io_new(hmp, chain->bref.type, 2022 chain->bref.data_off, 2023 chain->bytes, &dio); 2024 } else { 2025 error = hammer2_io_bread(hmp, chain->bref.type, 2026 chain->bref.data_off, 2027 chain->bytes, &dio); 2028 } 2029 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 2030 2031 /* 2032 * If an I/O error occurs make sure callers cannot accidently 2033 * modify the old buffer's contents and corrupt the filesystem. 2034 * 2035 * NOTE: hammer2_io_data() call issues bkvasync() 2036 */ 2037 if (error) { 2038 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 2039 hmp); 2040 chain->error = HAMMER2_ERROR_EIO; 2041 hammer2_io_brelse(&dio); 2042 hammer2_io_brelse(&chain->dio); 2043 chain->data = NULL; 2044 break; 2045 } 2046 chain->error = 0; 2047 bdata = hammer2_io_data(dio, chain->bref.data_off); 2048 2049 if (chain->data) { 2050 /* 2051 * COW (unless a dedup). 2052 */ 2053 KKASSERT(chain->dio != NULL); 2054 if (chain->data != (void *)bdata && dedup_off == 0) { 2055 bcopy(chain->data, bdata, chain->bytes); 2056 } 2057 } else if (wasinitial == 0 && dedup_off == 0) { 2058 /* 2059 * We have a problem. We were asked to COW but 2060 * we don't have any data to COW with! 2061 */ 2062 panic("hammer2_chain_modify: having a COW %p\n", 2063 chain); 2064 } 2065 2066 /* 2067 * Retire the old buffer, replace with the new. Dirty or 2068 * redirty the new buffer. 2069 * 2070 * WARNING! The system buffer cache may have already flushed 2071 * the buffer, so we must be sure to [re]dirty it 2072 * for further modification. 2073 * 2074 * If dedup_off was supplied, the caller is not 2075 * expected to make any further modification to the 2076 * buffer. 2077 * 2078 * WARNING! hammer2_get_gdata() assumes dio never transitions 2079 * through NULL in order to optimize away unnecessary 2080 * diolk operations. 2081 */ 2082 { 2083 hammer2_io_t *tio; 2084 2085 if ((tio = chain->dio) != NULL) 2086 hammer2_io_bqrelse(&tio); 2087 chain->data = (void *)bdata; 2088 chain->dio = dio; 2089 if (dedup_off == 0) 2090 hammer2_io_setdirty(dio); 2091 } 2092 break; 2093 default: 2094 panic("hammer2_chain_modify: illegal non-embedded type %d", 2095 chain->bref.type); 2096 break; 2097 2098 } 2099 skip2: 2100 /* 2101 * setflush on parent indicating that the parent must recurse down 2102 * to us. Do not call on chain itself which might already have it 2103 * set. 2104 */ 2105 if (chain->parent) 2106 hammer2_chain_setflush(chain->parent); 2107 lockmgr(&chain->diolk, LK_RELEASE); 2108 2109 return (chain->error); 2110 } 2111 2112 /* 2113 * Modify the chain associated with an inode. 2114 */ 2115 int 2116 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 2117 hammer2_tid_t mtid, int flags) 2118 { 2119 int error; 2120 2121 hammer2_inode_modify(ip); 2122 error = hammer2_chain_modify(chain, mtid, 0, flags); 2123 2124 return error; 2125 } 2126 2127 /* 2128 * Volume header data locks 2129 */ 2130 void 2131 hammer2_voldata_lock(hammer2_dev_t *hmp) 2132 { 2133 lockmgr(&hmp->vollk, LK_EXCLUSIVE); 2134 } 2135 2136 void 2137 hammer2_voldata_unlock(hammer2_dev_t *hmp) 2138 { 2139 lockmgr(&hmp->vollk, LK_RELEASE); 2140 } 2141 2142 void 2143 hammer2_voldata_modify(hammer2_dev_t *hmp) 2144 { 2145 if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) == 0) { 2146 atomic_add_long(&hammer2_count_modified_chains, 1); 2147 atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED); 2148 hammer2_pfs_memory_inc(hmp->vchain.pmp); 2149 } 2150 } 2151 2152 /* 2153 * This function returns the chain at the nearest key within the specified 2154 * range. The returned chain will be referenced but not locked. 2155 * 2156 * This function will recurse through chain->rbtree as necessary and will 2157 * return a *key_nextp suitable for iteration. *key_nextp is only set if 2158 * the iteration value is less than the current value of *key_nextp. 2159 * 2160 * The caller should use (*key_nextp) to calculate the actual range of 2161 * the returned element, which will be (key_beg to *key_nextp - 1), because 2162 * there might be another element which is superior to the returned element 2163 * and overlaps it. 2164 * 2165 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 2166 * chains continue to be returned. On EOF (*key_nextp) may overflow since 2167 * it will wind up being (key_end + 1). 2168 * 2169 * WARNING! Must be called with child's spinlock held. Spinlock remains 2170 * held through the operation. 2171 */ 2172 struct hammer2_chain_find_info { 2173 hammer2_chain_t *best; 2174 hammer2_key_t key_beg; 2175 hammer2_key_t key_end; 2176 hammer2_key_t key_next; 2177 }; 2178 2179 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 2180 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 2181 2182 static 2183 hammer2_chain_t * 2184 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 2185 hammer2_key_t key_beg, hammer2_key_t key_end) 2186 { 2187 struct hammer2_chain_find_info info; 2188 2189 info.best = NULL; 2190 info.key_beg = key_beg; 2191 info.key_end = key_end; 2192 info.key_next = *key_nextp; 2193 2194 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 2195 hammer2_chain_find_cmp, hammer2_chain_find_callback, 2196 &info); 2197 *key_nextp = info.key_next; 2198 #if 0 2199 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 2200 parent, key_beg, key_end, *key_nextp); 2201 #endif 2202 2203 return (info.best); 2204 } 2205 2206 static 2207 int 2208 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 2209 { 2210 struct hammer2_chain_find_info *info = data; 2211 hammer2_key_t child_beg; 2212 hammer2_key_t child_end; 2213 2214 child_beg = child->bref.key; 2215 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 2216 2217 if (child_end < info->key_beg) 2218 return(-1); 2219 if (child_beg > info->key_end) 2220 return(1); 2221 return(0); 2222 } 2223 2224 static 2225 int 2226 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 2227 { 2228 struct hammer2_chain_find_info *info = data; 2229 hammer2_chain_t *best; 2230 hammer2_key_t child_end; 2231 2232 /* 2233 * WARNING! Layerq is scanned forwards, exact matches should keep 2234 * the existing info->best. 2235 */ 2236 if ((best = info->best) == NULL) { 2237 /* 2238 * No previous best. Assign best 2239 */ 2240 info->best = child; 2241 } else if (best->bref.key <= info->key_beg && 2242 child->bref.key <= info->key_beg) { 2243 /* 2244 * Illegal overlap. 2245 */ 2246 KKASSERT(0); 2247 /*info->best = child;*/ 2248 } else if (child->bref.key < best->bref.key) { 2249 /* 2250 * Child has a nearer key and best is not flush with key_beg. 2251 * Set best to child. Truncate key_next to the old best key. 2252 */ 2253 info->best = child; 2254 if (info->key_next > best->bref.key || info->key_next == 0) 2255 info->key_next = best->bref.key; 2256 } else if (child->bref.key == best->bref.key) { 2257 /* 2258 * If our current best is flush with the child then this 2259 * is an illegal overlap. 2260 * 2261 * key_next will automatically be limited to the smaller of 2262 * the two end-points. 2263 */ 2264 KKASSERT(0); 2265 info->best = child; 2266 } else { 2267 /* 2268 * Keep the current best but truncate key_next to the child's 2269 * base. 2270 * 2271 * key_next will also automatically be limited to the smaller 2272 * of the two end-points (probably not necessary for this case 2273 * but we do it anyway). 2274 */ 2275 if (info->key_next > child->bref.key || info->key_next == 0) 2276 info->key_next = child->bref.key; 2277 } 2278 2279 /* 2280 * Always truncate key_next based on child's end-of-range. 2281 */ 2282 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 2283 if (child_end && (info->key_next > child_end || info->key_next == 0)) 2284 info->key_next = child_end; 2285 2286 return(0); 2287 } 2288 2289 /* 2290 * Retrieve the specified chain from a media blockref, creating the 2291 * in-memory chain structure which reflects it. The returned chain is 2292 * held and locked according to (how) (HAMMER2_RESOLVE_*). The caller must 2293 * handle crc-checks and so forth, and should check chain->error before 2294 * assuming that the data is good. 2295 * 2296 * To handle insertion races pass the INSERT_RACE flag along with the 2297 * generation number of the core. NULL will be returned if the generation 2298 * number changes before we have a chance to insert the chain. Insert 2299 * races can occur because the parent might be held shared. 2300 * 2301 * Caller must hold the parent locked shared or exclusive since we may 2302 * need the parent's bref array to find our block. 2303 * 2304 * WARNING! chain->pmp is always set to NULL for any chain representing 2305 * part of the super-root topology. 2306 */ 2307 hammer2_chain_t * 2308 hammer2_chain_get(hammer2_chain_t *parent, int generation, 2309 hammer2_blockref_t *bref, int how) 2310 { 2311 hammer2_dev_t *hmp = parent->hmp; 2312 hammer2_chain_t *chain; 2313 int error; 2314 2315 /* 2316 * Allocate a chain structure representing the existing media 2317 * entry. Resulting chain has one ref and is not locked. 2318 */ 2319 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 2320 chain = hammer2_chain_alloc(hmp, NULL, bref); 2321 else 2322 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 2323 /* ref'd chain returned */ 2324 2325 /* 2326 * Flag that the chain is in the parent's blockmap so delete/flush 2327 * knows what to do with it. 2328 */ 2329 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 2330 2331 /* 2332 * chain must be locked to avoid unexpected ripouts 2333 */ 2334 hammer2_chain_lock(chain, how); 2335 2336 /* 2337 * Link the chain into its parent. A spinlock is required to safely 2338 * access the RBTREE, and it is possible to collide with another 2339 * hammer2_chain_get() operation because the caller might only hold 2340 * a shared lock on the parent. 2341 * 2342 * NOTE: Get races can occur quite often when we distribute 2343 * asynchronous read-aheads across multiple threads. 2344 */ 2345 KKASSERT(parent->refs > 0); 2346 error = hammer2_chain_insert(parent, chain, 2347 HAMMER2_CHAIN_INSERT_SPIN | 2348 HAMMER2_CHAIN_INSERT_RACE, 2349 generation); 2350 if (error) { 2351 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 2352 /*kprintf("chain %p get race\n", chain);*/ 2353 hammer2_chain_unlock(chain); 2354 hammer2_chain_drop(chain); 2355 chain = NULL; 2356 } else { 2357 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2358 } 2359 2360 /* 2361 * Return our new chain referenced but not locked, or NULL if 2362 * a race occurred. 2363 */ 2364 return (chain); 2365 } 2366 2367 /* 2368 * Lookup initialization/completion API 2369 */ 2370 hammer2_chain_t * 2371 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 2372 { 2373 hammer2_chain_ref(parent); 2374 if (flags & HAMMER2_LOOKUP_SHARED) { 2375 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 2376 HAMMER2_RESOLVE_SHARED); 2377 } else { 2378 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2379 } 2380 return (parent); 2381 } 2382 2383 void 2384 hammer2_chain_lookup_done(hammer2_chain_t *parent) 2385 { 2386 if (parent) { 2387 hammer2_chain_unlock(parent); 2388 hammer2_chain_drop(parent); 2389 } 2390 } 2391 2392 /* 2393 * Take the locked chain and return a locked parent. The chain remains 2394 * locked on return, but may have to be temporarily unlocked to acquire 2395 * the parent. Because of this, (chain) must be stable and cannot be 2396 * deleted while it was temporarily unlocked (typically means that (chain) 2397 * is an inode). 2398 * 2399 * Pass HAMMER2_RESOLVE_* flags in flags. 2400 * 2401 * This will work even if the chain is errored, and the caller can check 2402 * parent->error on return if desired since the parent will be locked. 2403 * 2404 * This function handles the lock order reversal. 2405 */ 2406 hammer2_chain_t * 2407 hammer2_chain_getparent(hammer2_chain_t *chain, int flags) 2408 { 2409 hammer2_chain_t *parent; 2410 2411 /* 2412 * Be careful of order, chain must be unlocked before parent 2413 * is locked below to avoid a deadlock. Try it trivially first. 2414 */ 2415 parent = chain->parent; 2416 if (parent == NULL) 2417 panic("hammer2_chain_getparent: no parent"); 2418 hammer2_chain_ref(parent); 2419 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) 2420 return parent; 2421 2422 for (;;) { 2423 hammer2_chain_unlock(chain); 2424 hammer2_chain_lock(parent, flags); 2425 hammer2_chain_lock(chain, flags); 2426 2427 /* 2428 * Parent relinking races are quite common. We have to get 2429 * it right or we will blow up the block table. 2430 */ 2431 if (chain->parent == parent) 2432 break; 2433 hammer2_chain_unlock(parent); 2434 hammer2_chain_drop(parent); 2435 cpu_ccfence(); 2436 parent = chain->parent; 2437 if (parent == NULL) 2438 panic("hammer2_chain_getparent: no parent"); 2439 hammer2_chain_ref(parent); 2440 } 2441 return parent; 2442 } 2443 2444 /* 2445 * Take the locked chain and return a locked parent. The chain is unlocked 2446 * and dropped. *chainp is set to the returned parent as a convenience. 2447 * Pass HAMMER2_RESOLVE_* flags in flags. 2448 * 2449 * This will work even if the chain is errored, and the caller can check 2450 * parent->error on return if desired since the parent will be locked. 2451 * 2452 * The chain does NOT need to be stable. We use a tracking structure 2453 * to track the expected parent if the chain is deleted out from under us. 2454 * 2455 * This function handles the lock order reversal. 2456 */ 2457 hammer2_chain_t * 2458 hammer2_chain_repparent(hammer2_chain_t **chainp, int flags) 2459 { 2460 hammer2_chain_t *chain; 2461 hammer2_chain_t *parent; 2462 struct hammer2_reptrack reptrack; 2463 struct hammer2_reptrack **repp; 2464 2465 /* 2466 * Be careful of order, chain must be unlocked before parent 2467 * is locked below to avoid a deadlock. Try it trivially first. 2468 */ 2469 chain = *chainp; 2470 parent = chain->parent; 2471 if (parent == NULL) { 2472 hammer2_spin_unex(&chain->core.spin); 2473 panic("hammer2_chain_repparent: no parent"); 2474 } 2475 hammer2_chain_ref(parent); 2476 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) { 2477 hammer2_chain_unlock(chain); 2478 hammer2_chain_drop(chain); 2479 *chainp = parent; 2480 2481 return parent; 2482 } 2483 2484 /* 2485 * Ok, now it gets a bit nasty. There are multiple situations where 2486 * the parent might be in the middle of a deletion, or where the child 2487 * (chain) might be deleted the instant we let go of its lock. 2488 * We can potentially end up in a no-win situation! 2489 * 2490 * In particular, the indirect_maintenance() case can cause these 2491 * situations. 2492 * 2493 * To deal with this we install a reptrack structure in the parent 2494 * This reptrack structure 'owns' the parent ref and will automatically 2495 * migrate to the parent's parent if the parent is deleted permanently. 2496 */ 2497 hammer2_spin_init(&reptrack.spin, "h2reptrk"); 2498 reptrack.chain = parent; 2499 hammer2_chain_ref(parent); /* for the reptrack */ 2500 2501 hammer2_spin_ex(&parent->core.spin); 2502 reptrack.next = parent->core.reptrack; 2503 parent->core.reptrack = &reptrack; 2504 hammer2_spin_unex(&parent->core.spin); 2505 2506 hammer2_chain_unlock(chain); 2507 hammer2_chain_drop(chain); 2508 chain = NULL; /* gone */ 2509 2510 /* 2511 * At the top of this loop, chain is gone and parent is refd both 2512 * by us explicitly AND via our reptrack. We are attempting to 2513 * lock parent. 2514 */ 2515 for (;;) { 2516 hammer2_chain_lock(parent, flags); 2517 2518 if (reptrack.chain == parent) 2519 break; 2520 hammer2_chain_unlock(parent); 2521 hammer2_chain_drop(parent); 2522 2523 kprintf("hammer2: debug REPTRACK %p->%p\n", 2524 parent, reptrack.chain); 2525 hammer2_spin_ex(&reptrack.spin); 2526 parent = reptrack.chain; 2527 hammer2_chain_ref(parent); 2528 hammer2_spin_unex(&reptrack.spin); 2529 } 2530 2531 /* 2532 * Once parent is locked and matches our reptrack, our reptrack 2533 * will be stable and we have our parent. We can unlink our 2534 * reptrack. 2535 * 2536 * WARNING! Remember that the chain lock might be shared. Chains 2537 * locked shared have stable parent linkages. 2538 */ 2539 hammer2_spin_ex(&parent->core.spin); 2540 repp = &parent->core.reptrack; 2541 while (*repp != &reptrack) 2542 repp = &(*repp)->next; 2543 *repp = reptrack.next; 2544 hammer2_spin_unex(&parent->core.spin); 2545 2546 hammer2_chain_drop(parent); /* reptrack ref */ 2547 *chainp = parent; /* return parent lock+ref */ 2548 2549 return parent; 2550 } 2551 2552 /* 2553 * Dispose of any linked reptrack structures in (chain) by shifting them to 2554 * (parent). Both (chain) and (parent) must be exclusively locked. 2555 * 2556 * This is interlocked against any children of (chain) on the other side. 2557 * No children so remain as-of when this is called so we can test 2558 * core.reptrack without holding the spin-lock. 2559 * 2560 * Used whenever the caller intends to permanently delete chains related 2561 * to topological recursions (BREF_TYPE_INDIRECT, BREF_TYPE_FREEMAP_NODE), 2562 * where the chains underneath the node being deleted are given a new parent 2563 * above the node being deleted. 2564 */ 2565 static 2566 void 2567 hammer2_chain_repchange(hammer2_chain_t *parent, hammer2_chain_t *chain) 2568 { 2569 struct hammer2_reptrack *reptrack; 2570 2571 KKASSERT(chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)); 2572 while (chain->core.reptrack) { 2573 hammer2_spin_ex(&parent->core.spin); 2574 hammer2_spin_ex(&chain->core.spin); 2575 reptrack = chain->core.reptrack; 2576 if (reptrack == NULL) { 2577 hammer2_spin_unex(&chain->core.spin); 2578 hammer2_spin_unex(&parent->core.spin); 2579 break; 2580 } 2581 hammer2_spin_ex(&reptrack->spin); 2582 chain->core.reptrack = reptrack->next; 2583 reptrack->chain = parent; 2584 reptrack->next = parent->core.reptrack; 2585 parent->core.reptrack = reptrack; 2586 hammer2_chain_ref(parent); /* reptrack */ 2587 2588 hammer2_spin_unex(&chain->core.spin); 2589 hammer2_spin_unex(&parent->core.spin); 2590 kprintf("hammer2: debug repchange %p %p->%p\n", 2591 reptrack, chain, parent); 2592 hammer2_chain_drop(chain); /* reptrack */ 2593 } 2594 } 2595 2596 /* 2597 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 2598 * (*parentp) typically points to an inode but can also point to a related 2599 * indirect block and this function will recurse upwards and find the inode 2600 * or the nearest undeleted indirect block covering the key range. 2601 * 2602 * This function unconditionally sets *errorp, replacing any previous value. 2603 * 2604 * (*parentp) must be exclusive or shared locked (depending on flags) and 2605 * referenced and can be an inode or an existing indirect block within the 2606 * inode. 2607 * 2608 * If (*parent) is errored out, this function will not attempt to recurse 2609 * the radix tree and will return NULL along with an appropriate *errorp. 2610 * If NULL is returned and *errorp is 0, the requested lookup could not be 2611 * located. 2612 * 2613 * On return (*parentp) will be modified to point at the deepest parent chain 2614 * element encountered during the search, as a helper for an insertion or 2615 * deletion. 2616 * 2617 * The new (*parentp) will be locked shared or exclusive (depending on flags), 2618 * and referenced, and the old will be unlocked and dereferenced (no change 2619 * if they are both the same). This is particularly important if the caller 2620 * wishes to insert a new chain, (*parentp) will be set properly even if NULL 2621 * is returned, as long as no error occurred. 2622 * 2623 * The matching chain will be returned locked according to flags. 2624 * 2625 * -- 2626 * 2627 * NULL is returned if no match was found, but (*parentp) will still 2628 * potentially be adjusted. 2629 * 2630 * On return (*key_nextp) will point to an iterative value for key_beg. 2631 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 2632 * 2633 * This function will also recurse up the chain if the key is not within the 2634 * current parent's range. (*parentp) can never be set to NULL. An iteration 2635 * can simply allow (*parentp) to float inside the loop. 2636 * 2637 * NOTE! chain->data is not always resolved. By default it will not be 2638 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 2639 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 2640 * BREF_TYPE_DATA as the device buffer can alias the logical file 2641 * buffer). 2642 */ 2643 2644 hammer2_chain_t * 2645 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 2646 hammer2_key_t key_beg, hammer2_key_t key_end, 2647 int *errorp, int flags) 2648 { 2649 hammer2_dev_t *hmp; 2650 hammer2_chain_t *parent; 2651 hammer2_chain_t *chain; 2652 hammer2_blockref_t *base; 2653 hammer2_blockref_t *bref; 2654 hammer2_blockref_t bsave; 2655 hammer2_key_t scan_beg; 2656 hammer2_key_t scan_end; 2657 int count = 0; 2658 int how_always = HAMMER2_RESOLVE_ALWAYS; 2659 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2660 int how; 2661 int generation; 2662 int maxloops = 300000; 2663 volatile hammer2_mtx_t save_mtx; 2664 2665 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2666 how_maybe = how_always; 2667 how = HAMMER2_RESOLVE_ALWAYS; 2668 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2669 how = HAMMER2_RESOLVE_NEVER; 2670 } else { 2671 how = HAMMER2_RESOLVE_MAYBE; 2672 } 2673 if (flags & HAMMER2_LOOKUP_SHARED) { 2674 how_maybe |= HAMMER2_RESOLVE_SHARED; 2675 how_always |= HAMMER2_RESOLVE_SHARED; 2676 how |= HAMMER2_RESOLVE_SHARED; 2677 } 2678 2679 /* 2680 * Recurse (*parentp) upward if necessary until the parent completely 2681 * encloses the key range or we hit the inode. 2682 * 2683 * Handle races against the flusher deleting indirect nodes on its 2684 * way back up by continuing to recurse upward past the deletion. 2685 */ 2686 parent = *parentp; 2687 hmp = parent->hmp; 2688 *errorp = 0; 2689 2690 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2691 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2692 scan_beg = parent->bref.key; 2693 scan_end = scan_beg + 2694 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2695 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) { 2696 if (key_beg >= scan_beg && key_end <= scan_end) 2697 break; 2698 } 2699 parent = hammer2_chain_repparent(parentp, how_maybe); 2700 } 2701 again: 2702 if (--maxloops == 0) 2703 panic("hammer2_chain_lookup: maxloops"); 2704 2705 /* 2706 * MATCHIND case that does not require parent->data (do prior to 2707 * parent->error check). 2708 */ 2709 switch(parent->bref.type) { 2710 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2711 case HAMMER2_BREF_TYPE_INDIRECT: 2712 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2713 scan_beg = parent->bref.key; 2714 scan_end = scan_beg + 2715 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2716 if (key_beg == scan_beg && key_end == scan_end) { 2717 chain = parent; 2718 hammer2_chain_ref(chain); 2719 hammer2_chain_lock(chain, how_maybe); 2720 *key_nextp = scan_end + 1; 2721 goto done; 2722 } 2723 } 2724 break; 2725 default: 2726 break; 2727 } 2728 2729 /* 2730 * No lookup is possible if the parent is errored. We delayed 2731 * this check as long as we could to ensure that the parent backup, 2732 * embedded data, and MATCHIND code could still execute. 2733 */ 2734 if (parent->error) { 2735 *errorp = parent->error; 2736 return NULL; 2737 } 2738 2739 /* 2740 * Locate the blockref array. Currently we do a fully associative 2741 * search through the array. 2742 */ 2743 switch(parent->bref.type) { 2744 case HAMMER2_BREF_TYPE_INODE: 2745 /* 2746 * Special shortcut for embedded data returns the inode 2747 * itself. Callers must detect this condition and access 2748 * the embedded data (the strategy code does this for us). 2749 * 2750 * This is only applicable to regular files and softlinks. 2751 * 2752 * We need a second lock on parent. Since we already have 2753 * a lock we must pass LOCKAGAIN to prevent unexpected 2754 * blocking (we don't want to block on a second shared 2755 * ref if an exclusive lock is pending) 2756 */ 2757 if (parent->data->ipdata.meta.op_flags & 2758 HAMMER2_OPFLAG_DIRECTDATA) { 2759 if (flags & HAMMER2_LOOKUP_NODIRECT) { 2760 chain = NULL; 2761 *key_nextp = key_end + 1; 2762 goto done; 2763 } 2764 hammer2_chain_ref(parent); 2765 hammer2_chain_lock(parent, how_always | 2766 HAMMER2_RESOLVE_LOCKAGAIN); 2767 *key_nextp = key_end + 1; 2768 return (parent); 2769 } 2770 base = &parent->data->ipdata.u.blockset.blockref[0]; 2771 count = HAMMER2_SET_COUNT; 2772 break; 2773 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2774 case HAMMER2_BREF_TYPE_INDIRECT: 2775 /* 2776 * Optimize indirect blocks in the INITIAL state to avoid 2777 * I/O. 2778 * 2779 * Debugging: Enter permanent wait state instead of 2780 * panicing on unexpectedly NULL data for the moment. 2781 */ 2782 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2783 base = NULL; 2784 } else { 2785 if (parent->data == NULL) { 2786 kprintf("hammer2: unexpected NULL data " 2787 "on %p\n", parent); 2788 while (1) 2789 tsleep(parent, 0, "xxx", 0); 2790 } 2791 base = &parent->data->npdata[0]; 2792 } 2793 count = parent->bytes / sizeof(hammer2_blockref_t); 2794 break; 2795 case HAMMER2_BREF_TYPE_VOLUME: 2796 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2797 count = HAMMER2_SET_COUNT; 2798 break; 2799 case HAMMER2_BREF_TYPE_FREEMAP: 2800 base = &parent->data->blkset.blockref[0]; 2801 count = HAMMER2_SET_COUNT; 2802 break; 2803 default: 2804 kprintf("hammer2_chain_lookup: unrecognized " 2805 "blockref(B) type: %d", 2806 parent->bref.type); 2807 while (1) 2808 tsleep(&base, 0, "dead", 0); 2809 panic("hammer2_chain_lookup: unrecognized " 2810 "blockref(B) type: %d", 2811 parent->bref.type); 2812 base = NULL; /* safety */ 2813 count = 0; /* safety */ 2814 } 2815 2816 /* 2817 * Merged scan to find next candidate. 2818 * 2819 * hammer2_base_*() functions require the parent->core.live_* fields 2820 * to be synchronized. 2821 * 2822 * We need to hold the spinlock to access the block array and RB tree 2823 * and to interlock chain creation. 2824 */ 2825 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2826 hammer2_chain_countbrefs(parent, base, count); 2827 2828 /* 2829 * Combined search 2830 */ 2831 hammer2_spin_ex(&parent->core.spin); 2832 chain = hammer2_combined_find(parent, base, count, 2833 key_nextp, 2834 key_beg, key_end, 2835 &bref); 2836 generation = parent->core.generation; 2837 2838 /* 2839 * Exhausted parent chain, iterate. 2840 */ 2841 if (bref == NULL) { 2842 KKASSERT(chain == NULL); 2843 hammer2_spin_unex(&parent->core.spin); 2844 if (key_beg == key_end) /* short cut single-key case */ 2845 return (NULL); 2846 2847 /* 2848 * Stop if we reached the end of the iteration. 2849 */ 2850 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2851 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2852 return (NULL); 2853 } 2854 2855 /* 2856 * Calculate next key, stop if we reached the end of the 2857 * iteration, otherwise go up one level and loop. 2858 */ 2859 key_beg = parent->bref.key + 2860 ((hammer2_key_t)1 << parent->bref.keybits); 2861 if (key_beg == 0 || key_beg > key_end) 2862 return (NULL); 2863 parent = hammer2_chain_repparent(parentp, how_maybe); 2864 goto again; 2865 } 2866 2867 /* 2868 * Selected from blockref or in-memory chain. 2869 */ 2870 bsave = *bref; 2871 if (chain == NULL) { 2872 hammer2_spin_unex(&parent->core.spin); 2873 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2874 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2875 chain = hammer2_chain_get(parent, generation, 2876 &bsave, how_maybe); 2877 } else { 2878 chain = hammer2_chain_get(parent, generation, 2879 &bsave, how); 2880 } 2881 if (chain == NULL) 2882 goto again; 2883 } else { 2884 hammer2_chain_ref(chain); 2885 hammer2_spin_unex(&parent->core.spin); 2886 2887 /* 2888 * chain is referenced but not locked. We must lock the 2889 * chain to obtain definitive state. 2890 */ 2891 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2892 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2893 hammer2_chain_lock(chain, how_maybe); 2894 } else { 2895 hammer2_chain_lock(chain, how); 2896 } 2897 KKASSERT(chain->parent == parent); 2898 } 2899 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 2900 chain->parent != parent) { 2901 hammer2_chain_unlock(chain); 2902 hammer2_chain_drop(chain); 2903 chain = NULL; /* SAFETY */ 2904 goto again; 2905 } 2906 2907 2908 /* 2909 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2910 * 2911 * NOTE: Chain's key range is not relevant as there might be 2912 * one-offs within the range that are not deleted. 2913 * 2914 * NOTE: Lookups can race delete-duplicate because 2915 * delete-duplicate does not lock the parent's core 2916 * (they just use the spinlock on the core). 2917 */ 2918 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2919 kprintf("skip deleted chain %016jx.%02x key=%016jx\n", 2920 chain->bref.data_off, chain->bref.type, 2921 chain->bref.key); 2922 hammer2_chain_unlock(chain); 2923 hammer2_chain_drop(chain); 2924 chain = NULL; /* SAFETY */ 2925 key_beg = *key_nextp; 2926 if (key_beg == 0 || key_beg > key_end) 2927 return(NULL); 2928 goto again; 2929 } 2930 2931 /* 2932 * If the chain element is an indirect block it becomes the new 2933 * parent and we loop on it. We must maintain our top-down locks 2934 * to prevent the flusher from interfering (i.e. doing a 2935 * delete-duplicate and leaving us recursing down a deleted chain). 2936 * 2937 * The parent always has to be locked with at least RESOLVE_MAYBE 2938 * so we can access its data. It might need a fixup if the caller 2939 * passed incompatible flags. Be careful not to cause a deadlock 2940 * as a data-load requires an exclusive lock. 2941 * 2942 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2943 * range is within the requested key range we return the indirect 2944 * block and do NOT loop. This is usually only used to acquire 2945 * freemap nodes. 2946 */ 2947 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2948 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2949 save_mtx = parent->lock; 2950 hammer2_chain_unlock(parent); 2951 hammer2_chain_drop(parent); 2952 *parentp = parent = chain; 2953 chain = NULL; /* SAFETY */ 2954 goto again; 2955 } 2956 done: 2957 /* 2958 * All done, return the locked chain. 2959 * 2960 * If the caller does not want a locked chain, replace the lock with 2961 * a ref. Perhaps this can eventually be optimized to not obtain the 2962 * lock in the first place for situations where the data does not 2963 * need to be resolved. 2964 * 2965 * NOTE! A chain->error must be tested by the caller upon return. 2966 * *errorp is only set based on issues which occur while 2967 * trying to reach the chain. 2968 */ 2969 return (chain); 2970 } 2971 2972 /* 2973 * After having issued a lookup we can iterate all matching keys. 2974 * 2975 * If chain is non-NULL we continue the iteration from just after it's index. 2976 * 2977 * If chain is NULL we assume the parent was exhausted and continue the 2978 * iteration at the next parent. 2979 * 2980 * If a fatal error occurs (typically an I/O error), a dummy chain is 2981 * returned with chain->error and error-identifying information set. This 2982 * chain will assert if you try to do anything fancy with it. 2983 * 2984 * XXX Depending on where the error occurs we should allow continued iteration. 2985 * 2986 * parent must be locked on entry and remains locked throughout. chain's 2987 * lock status must match flags. Chain is always at least referenced. 2988 * 2989 * WARNING! The MATCHIND flag does not apply to this function. 2990 */ 2991 hammer2_chain_t * 2992 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2993 hammer2_key_t *key_nextp, 2994 hammer2_key_t key_beg, hammer2_key_t key_end, 2995 int *errorp, int flags) 2996 { 2997 hammer2_chain_t *parent; 2998 int how_maybe; 2999 3000 /* 3001 * Calculate locking flags for upward recursion. 3002 */ 3003 how_maybe = HAMMER2_RESOLVE_MAYBE; 3004 if (flags & HAMMER2_LOOKUP_SHARED) 3005 how_maybe |= HAMMER2_RESOLVE_SHARED; 3006 3007 parent = *parentp; 3008 *errorp = 0; 3009 3010 /* 3011 * Calculate the next index and recalculate the parent if necessary. 3012 */ 3013 if (chain) { 3014 key_beg = chain->bref.key + 3015 ((hammer2_key_t)1 << chain->bref.keybits); 3016 hammer2_chain_unlock(chain); 3017 hammer2_chain_drop(chain); 3018 3019 /* 3020 * chain invalid past this point, but we can still do a 3021 * pointer comparison w/parent. 3022 * 3023 * Any scan where the lookup returned degenerate data embedded 3024 * in the inode has an invalid index and must terminate. 3025 */ 3026 if (chain == parent) 3027 return(NULL); 3028 if (key_beg == 0 || key_beg > key_end) 3029 return(NULL); 3030 chain = NULL; 3031 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 3032 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 3033 /* 3034 * We reached the end of the iteration. 3035 */ 3036 return (NULL); 3037 } else { 3038 /* 3039 * Continue iteration with next parent unless the current 3040 * parent covers the range. 3041 * 3042 * (This also handles the case of a deleted, empty indirect 3043 * node). 3044 */ 3045 key_beg = parent->bref.key + 3046 ((hammer2_key_t)1 << parent->bref.keybits); 3047 if (key_beg == 0 || key_beg > key_end) 3048 return (NULL); 3049 parent = hammer2_chain_repparent(parentp, how_maybe); 3050 } 3051 3052 /* 3053 * And execute 3054 */ 3055 return (hammer2_chain_lookup(parentp, key_nextp, 3056 key_beg, key_end, 3057 errorp, flags)); 3058 } 3059 3060 /* 3061 * Caller wishes to iterate chains under parent, loading new chains into 3062 * chainp. Caller must initialize *chainp to NULL and *firstp to 1, and 3063 * then call hammer2_chain_scan() repeatedly until a non-zero return. 3064 * During the scan, *firstp will be set to 0 and (*chainp) will be replaced 3065 * with the returned chain for the scan. The returned *chainp will be 3066 * locked and referenced. Any prior contents will be unlocked and dropped. 3067 * 3068 * Caller should check the return value. A normal scan EOF will return 3069 * exactly HAMMER2_ERROR_EOF. Any other non-zero value indicates an 3070 * error trying to access parent data. Any error in the returned chain 3071 * must be tested separately by the caller. 3072 * 3073 * (*chainp) is dropped on each scan, but will only be set if the returned 3074 * element itself can recurse. Leaf elements are NOT resolved, loaded, or 3075 * returned via *chainp. The caller will get their bref only. 3076 * 3077 * The raw scan function is similar to lookup/next but does not seek to a key. 3078 * Blockrefs are iterated via first_bref = (parent, NULL) and 3079 * next_chain = (parent, bref). 3080 * 3081 * The passed-in parent must be locked and its data resolved. The function 3082 * nominally returns a locked and referenced *chainp != NULL for chains 3083 * the caller might need to recurse on (and will dipose of any *chainp passed 3084 * in). The caller must check the chain->bref.type either way. 3085 */ 3086 int 3087 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 3088 hammer2_blockref_t *bref, int *firstp, 3089 int flags) 3090 { 3091 hammer2_dev_t *hmp; 3092 hammer2_blockref_t *base; 3093 hammer2_blockref_t *bref_ptr; 3094 hammer2_key_t key; 3095 hammer2_key_t next_key; 3096 hammer2_chain_t *chain = NULL; 3097 int count = 0; 3098 int how_always = HAMMER2_RESOLVE_ALWAYS; 3099 int how_maybe = HAMMER2_RESOLVE_MAYBE; 3100 int how; 3101 int generation; 3102 int maxloops = 300000; 3103 int error; 3104 3105 hmp = parent->hmp; 3106 error = 0; 3107 3108 /* 3109 * Scan flags borrowed from lookup. 3110 */ 3111 if (flags & HAMMER2_LOOKUP_ALWAYS) { 3112 how_maybe = how_always; 3113 how = HAMMER2_RESOLVE_ALWAYS; 3114 } else if (flags & HAMMER2_LOOKUP_NODATA) { 3115 how = HAMMER2_RESOLVE_NEVER; 3116 } else { 3117 how = HAMMER2_RESOLVE_MAYBE; 3118 } 3119 if (flags & HAMMER2_LOOKUP_SHARED) { 3120 how_maybe |= HAMMER2_RESOLVE_SHARED; 3121 how_always |= HAMMER2_RESOLVE_SHARED; 3122 how |= HAMMER2_RESOLVE_SHARED; 3123 } 3124 3125 /* 3126 * Calculate key to locate first/next element, unlocking the previous 3127 * element as we go. Be careful, the key calculation can overflow. 3128 * 3129 * (also reset bref to NULL) 3130 */ 3131 if (*firstp) { 3132 key = 0; 3133 *firstp = 0; 3134 } else { 3135 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 3136 if ((chain = *chainp) != NULL) { 3137 *chainp = NULL; 3138 hammer2_chain_unlock(chain); 3139 hammer2_chain_drop(chain); 3140 chain = NULL; 3141 } 3142 if (key == 0) { 3143 error |= HAMMER2_ERROR_EOF; 3144 goto done; 3145 } 3146 } 3147 3148 again: 3149 if (parent->error) { 3150 error = parent->error; 3151 goto done; 3152 } 3153 if (--maxloops == 0) 3154 panic("hammer2_chain_scan: maxloops"); 3155 3156 /* 3157 * Locate the blockref array. Currently we do a fully associative 3158 * search through the array. 3159 */ 3160 switch(parent->bref.type) { 3161 case HAMMER2_BREF_TYPE_INODE: 3162 /* 3163 * An inode with embedded data has no sub-chains. 3164 * 3165 * WARNING! Bulk scan code may pass a static chain marked 3166 * as BREF_TYPE_INODE with a copy of the volume 3167 * root blockset to snapshot the volume. 3168 */ 3169 if (parent->data->ipdata.meta.op_flags & 3170 HAMMER2_OPFLAG_DIRECTDATA) { 3171 error |= HAMMER2_ERROR_EOF; 3172 goto done; 3173 } 3174 base = &parent->data->ipdata.u.blockset.blockref[0]; 3175 count = HAMMER2_SET_COUNT; 3176 break; 3177 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3178 case HAMMER2_BREF_TYPE_INDIRECT: 3179 /* 3180 * Optimize indirect blocks in the INITIAL state to avoid 3181 * I/O. 3182 */ 3183 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 3184 base = NULL; 3185 } else { 3186 if (parent->data == NULL) 3187 panic("parent->data is NULL"); 3188 base = &parent->data->npdata[0]; 3189 } 3190 count = parent->bytes / sizeof(hammer2_blockref_t); 3191 break; 3192 case HAMMER2_BREF_TYPE_VOLUME: 3193 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3194 count = HAMMER2_SET_COUNT; 3195 break; 3196 case HAMMER2_BREF_TYPE_FREEMAP: 3197 base = &parent->data->blkset.blockref[0]; 3198 count = HAMMER2_SET_COUNT; 3199 break; 3200 default: 3201 panic("hammer2_chain_scan: unrecognized blockref type: %d", 3202 parent->bref.type); 3203 base = NULL; /* safety */ 3204 count = 0; /* safety */ 3205 } 3206 3207 /* 3208 * Merged scan to find next candidate. 3209 * 3210 * hammer2_base_*() functions require the parent->core.live_* fields 3211 * to be synchronized. 3212 * 3213 * We need to hold the spinlock to access the block array and RB tree 3214 * and to interlock chain creation. 3215 */ 3216 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3217 hammer2_chain_countbrefs(parent, base, count); 3218 3219 next_key = 0; 3220 bref_ptr = NULL; 3221 hammer2_spin_ex(&parent->core.spin); 3222 chain = hammer2_combined_find(parent, base, count, 3223 &next_key, 3224 key, HAMMER2_KEY_MAX, 3225 &bref_ptr); 3226 generation = parent->core.generation; 3227 3228 /* 3229 * Exhausted parent chain, we're done. 3230 */ 3231 if (bref_ptr == NULL) { 3232 hammer2_spin_unex(&parent->core.spin); 3233 KKASSERT(chain == NULL); 3234 error |= HAMMER2_ERROR_EOF; 3235 goto done; 3236 } 3237 3238 /* 3239 * Copy into the supplied stack-based blockref. 3240 */ 3241 *bref = *bref_ptr; 3242 3243 /* 3244 * Selected from blockref or in-memory chain. 3245 */ 3246 if (chain == NULL) { 3247 switch(bref->type) { 3248 case HAMMER2_BREF_TYPE_INODE: 3249 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3250 case HAMMER2_BREF_TYPE_INDIRECT: 3251 case HAMMER2_BREF_TYPE_VOLUME: 3252 case HAMMER2_BREF_TYPE_FREEMAP: 3253 /* 3254 * Recursion, always get the chain 3255 */ 3256 hammer2_spin_unex(&parent->core.spin); 3257 chain = hammer2_chain_get(parent, generation, 3258 bref, how); 3259 if (chain == NULL) 3260 goto again; 3261 break; 3262 default: 3263 /* 3264 * No recursion, do not waste time instantiating 3265 * a chain, just iterate using the bref. 3266 */ 3267 hammer2_spin_unex(&parent->core.spin); 3268 break; 3269 } 3270 } else { 3271 /* 3272 * Recursion or not we need the chain in order to supply 3273 * the bref. 3274 */ 3275 hammer2_chain_ref(chain); 3276 hammer2_spin_unex(&parent->core.spin); 3277 hammer2_chain_lock(chain, how); 3278 } 3279 if (chain && 3280 (bcmp(bref, &chain->bref, sizeof(*bref)) || 3281 chain->parent != parent)) { 3282 hammer2_chain_unlock(chain); 3283 hammer2_chain_drop(chain); 3284 chain = NULL; 3285 goto again; 3286 } 3287 3288 /* 3289 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 3290 * 3291 * NOTE: chain's key range is not relevant as there might be 3292 * one-offs within the range that are not deleted. 3293 * 3294 * NOTE: XXX this could create problems with scans used in 3295 * situations other than mount-time recovery. 3296 * 3297 * NOTE: Lookups can race delete-duplicate because 3298 * delete-duplicate does not lock the parent's core 3299 * (they just use the spinlock on the core). 3300 */ 3301 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3302 hammer2_chain_unlock(chain); 3303 hammer2_chain_drop(chain); 3304 chain = NULL; 3305 3306 key = next_key; 3307 if (key == 0) { 3308 error |= HAMMER2_ERROR_EOF; 3309 goto done; 3310 } 3311 goto again; 3312 } 3313 3314 done: 3315 /* 3316 * All done, return the bref or NULL, supply chain if necessary. 3317 */ 3318 if (chain) 3319 *chainp = chain; 3320 return (error); 3321 } 3322 3323 /* 3324 * Create and return a new hammer2 system memory structure of the specified 3325 * key, type and size and insert it under (*parentp). This is a full 3326 * insertion, based on the supplied key/keybits, and may involve creating 3327 * indirect blocks and moving other chains around via delete/duplicate. 3328 * 3329 * This call can be made with parent == NULL as long as a non -1 methods 3330 * is supplied. hmp must also be supplied in this situation (otherwise 3331 * hmp is extracted from the supplied parent). The chain will be detached 3332 * from the topology. A later call with both parent and chain can be made 3333 * to attach it. 3334 * 3335 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 3336 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3337 * FULL. This typically means that the caller is creating the chain after 3338 * doing a hammer2_chain_lookup(). 3339 * 3340 * (*parentp) must be exclusive locked and may be replaced on return 3341 * depending on how much work the function had to do. 3342 * 3343 * (*parentp) must not be errored or this function will assert. 3344 * 3345 * (*chainp) usually starts out NULL and returns the newly created chain, 3346 * but if the caller desires the caller may allocate a disconnected chain 3347 * and pass it in instead. 3348 * 3349 * This function should NOT be used to insert INDIRECT blocks. It is 3350 * typically used to create/insert inodes and data blocks. 3351 * 3352 * Caller must pass-in an exclusively locked parent the new chain is to 3353 * be inserted under, and optionally pass-in a disconnected, exclusively 3354 * locked chain to insert (else we create a new chain). The function will 3355 * adjust (*parentp) as necessary, create or connect the chain, and 3356 * return an exclusively locked chain in *chainp. 3357 * 3358 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 3359 * and will be reassigned. 3360 * 3361 * NOTE: returns HAMMER_ERROR_* flags 3362 */ 3363 int 3364 hammer2_chain_create(hammer2_chain_t **parentp, hammer2_chain_t **chainp, 3365 hammer2_dev_t *hmp, hammer2_pfs_t *pmp, int methods, 3366 hammer2_key_t key, int keybits, int type, size_t bytes, 3367 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 3368 { 3369 hammer2_chain_t *chain; 3370 hammer2_chain_t *parent; 3371 hammer2_blockref_t *base; 3372 hammer2_blockref_t dummy; 3373 int allocated = 0; 3374 int error = 0; 3375 int count; 3376 int maxloops = 300000; 3377 3378 /* 3379 * Topology may be crossing a PFS boundary. 3380 */ 3381 parent = *parentp; 3382 if (parent) { 3383 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3384 KKASSERT(parent->error == 0); 3385 hmp = parent->hmp; 3386 } 3387 chain = *chainp; 3388 3389 if (chain == NULL) { 3390 /* 3391 * First allocate media space and construct the dummy bref, 3392 * then allocate the in-memory chain structure. Set the 3393 * INITIAL flag for fresh chains which do not have embedded 3394 * data. 3395 */ 3396 bzero(&dummy, sizeof(dummy)); 3397 dummy.type = type; 3398 dummy.key = key; 3399 dummy.keybits = keybits; 3400 dummy.data_off = hammer2_getradix(bytes); 3401 3402 /* 3403 * Inherit methods from parent by default. Primarily used 3404 * for BREF_TYPE_DATA. Non-data types *must* be set to 3405 * a non-NONE check algorithm. 3406 */ 3407 if (methods == -1) 3408 dummy.methods = parent->bref.methods; 3409 else 3410 dummy.methods = (uint8_t)methods; 3411 3412 if (type != HAMMER2_BREF_TYPE_DATA && 3413 HAMMER2_DEC_CHECK(dummy.methods) == HAMMER2_CHECK_NONE) { 3414 dummy.methods |= 3415 HAMMER2_ENC_CHECK(HAMMER2_CHECK_DEFAULT); 3416 } 3417 3418 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 3419 3420 /* 3421 * Lock the chain manually, chain_lock will load the chain 3422 * which we do NOT want to do. (note: chain->refs is set 3423 * to 1 by chain_alloc() for us, but lockcnt is not). 3424 */ 3425 chain->lockcnt = 1; 3426 hammer2_mtx_ex(&chain->lock); 3427 allocated = 1; 3428 3429 /* 3430 * Set INITIAL to optimize I/O. The flag will generally be 3431 * processed when we call hammer2_chain_modify(). 3432 */ 3433 switch(type) { 3434 case HAMMER2_BREF_TYPE_VOLUME: 3435 case HAMMER2_BREF_TYPE_FREEMAP: 3436 panic("hammer2_chain_create: called with volume type"); 3437 break; 3438 case HAMMER2_BREF_TYPE_INDIRECT: 3439 panic("hammer2_chain_create: cannot be used to" 3440 "create indirect block"); 3441 break; 3442 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3443 panic("hammer2_chain_create: cannot be used to" 3444 "create freemap root or node"); 3445 break; 3446 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3447 KKASSERT(bytes == sizeof(chain->data->bmdata)); 3448 /* fall through */ 3449 case HAMMER2_BREF_TYPE_DIRENT: 3450 case HAMMER2_BREF_TYPE_INODE: 3451 case HAMMER2_BREF_TYPE_DATA: 3452 default: 3453 /* 3454 * leave chain->data NULL, set INITIAL 3455 */ 3456 KKASSERT(chain->data == NULL); 3457 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 3458 break; 3459 } 3460 } else { 3461 /* 3462 * We are reattaching a previously deleted chain, possibly 3463 * under a new parent and possibly with a new key/keybits. 3464 * The chain does not have to be in a modified state. The 3465 * UPDATE flag will be set later on in this routine. 3466 * 3467 * Do NOT mess with the current state of the INITIAL flag. 3468 */ 3469 chain->bref.key = key; 3470 chain->bref.keybits = keybits; 3471 if (chain->flags & HAMMER2_CHAIN_DELETED) 3472 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3473 KKASSERT(chain->parent == NULL); 3474 } 3475 3476 /* 3477 * Set the appropriate bref flag if requested. 3478 * 3479 * NOTE! Callers can call this function to move chains without 3480 * knowing about special flags, so don't clear bref flags 3481 * here! 3482 */ 3483 if (flags & HAMMER2_INSERT_PFSROOT) 3484 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 3485 3486 if (parent == NULL) 3487 goto skip; 3488 3489 /* 3490 * Calculate how many entries we have in the blockref array and 3491 * determine if an indirect block is required when inserting into 3492 * the parent. 3493 */ 3494 again: 3495 if (--maxloops == 0) 3496 panic("hammer2_chain_create: maxloops"); 3497 3498 switch(parent->bref.type) { 3499 case HAMMER2_BREF_TYPE_INODE: 3500 if ((parent->data->ipdata.meta.op_flags & 3501 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 3502 kprintf("hammer2: parent set for direct-data! " 3503 "pkey=%016jx ckey=%016jx\n", 3504 parent->bref.key, 3505 chain->bref.key); 3506 } 3507 KKASSERT((parent->data->ipdata.meta.op_flags & 3508 HAMMER2_OPFLAG_DIRECTDATA) == 0); 3509 KKASSERT(parent->data != NULL); 3510 base = &parent->data->ipdata.u.blockset.blockref[0]; 3511 count = HAMMER2_SET_COUNT; 3512 break; 3513 case HAMMER2_BREF_TYPE_INDIRECT: 3514 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3515 if (parent->flags & HAMMER2_CHAIN_INITIAL) 3516 base = NULL; 3517 else 3518 base = &parent->data->npdata[0]; 3519 count = parent->bytes / sizeof(hammer2_blockref_t); 3520 break; 3521 case HAMMER2_BREF_TYPE_VOLUME: 3522 KKASSERT(parent->data != NULL); 3523 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3524 count = HAMMER2_SET_COUNT; 3525 break; 3526 case HAMMER2_BREF_TYPE_FREEMAP: 3527 KKASSERT(parent->data != NULL); 3528 base = &parent->data->blkset.blockref[0]; 3529 count = HAMMER2_SET_COUNT; 3530 break; 3531 default: 3532 panic("hammer2_chain_create: unrecognized blockref type: %d", 3533 parent->bref.type); 3534 base = NULL; 3535 count = 0; 3536 break; 3537 } 3538 3539 /* 3540 * Make sure we've counted the brefs 3541 */ 3542 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3543 hammer2_chain_countbrefs(parent, base, count); 3544 3545 KASSERT(parent->core.live_count >= 0 && 3546 parent->core.live_count <= count, 3547 ("bad live_count %d/%d (%02x, %d)", 3548 parent->core.live_count, count, 3549 parent->bref.type, parent->bytes)); 3550 3551 /* 3552 * If no free blockref could be found we must create an indirect 3553 * block and move a number of blockrefs into it. With the parent 3554 * locked we can safely lock each child in order to delete+duplicate 3555 * it without causing a deadlock. 3556 * 3557 * This may return the new indirect block or the old parent depending 3558 * on where the key falls. NULL is returned on error. 3559 */ 3560 if (parent->core.live_count == count) { 3561 hammer2_chain_t *nparent; 3562 3563 KKASSERT((flags & HAMMER2_INSERT_SAMEPARENT) == 0); 3564 3565 nparent = hammer2_chain_create_indirect(parent, key, keybits, 3566 mtid, type, &error); 3567 if (nparent == NULL) { 3568 if (allocated) 3569 hammer2_chain_drop(chain); 3570 chain = NULL; 3571 goto done; 3572 } 3573 if (parent != nparent) { 3574 hammer2_chain_unlock(parent); 3575 hammer2_chain_drop(parent); 3576 parent = *parentp = nparent; 3577 } 3578 goto again; 3579 } 3580 3581 /* 3582 * fall through if parent, or skip to here if no parent. 3583 */ 3584 skip: 3585 if (chain->flags & HAMMER2_CHAIN_DELETED) 3586 kprintf("Inserting deleted chain @%016jx\n", 3587 chain->bref.key); 3588 3589 /* 3590 * Link the chain into its parent. 3591 */ 3592 if (chain->parent != NULL) 3593 panic("hammer2: hammer2_chain_create: chain already connected"); 3594 KKASSERT(chain->parent == NULL); 3595 if (parent) { 3596 KKASSERT(parent->core.live_count < count); 3597 hammer2_chain_insert(parent, chain, 3598 HAMMER2_CHAIN_INSERT_SPIN | 3599 HAMMER2_CHAIN_INSERT_LIVE, 3600 0); 3601 } 3602 3603 if (allocated) { 3604 /* 3605 * Mark the newly created chain modified. This will cause 3606 * UPDATE to be set and process the INITIAL flag. 3607 * 3608 * Device buffers are not instantiated for DATA elements 3609 * as these are handled by logical buffers. 3610 * 3611 * Indirect and freemap node indirect blocks are handled 3612 * by hammer2_chain_create_indirect() and not by this 3613 * function. 3614 * 3615 * Data for all other bref types is expected to be 3616 * instantiated (INODE, LEAF). 3617 */ 3618 switch(chain->bref.type) { 3619 case HAMMER2_BREF_TYPE_DATA: 3620 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3621 case HAMMER2_BREF_TYPE_DIRENT: 3622 case HAMMER2_BREF_TYPE_INODE: 3623 error = hammer2_chain_modify(chain, mtid, dedup_off, 3624 HAMMER2_MODIFY_OPTDATA); 3625 break; 3626 default: 3627 /* 3628 * Remaining types are not supported by this function. 3629 * In particular, INDIRECT and LEAF_NODE types are 3630 * handled by create_indirect(). 3631 */ 3632 panic("hammer2_chain_create: bad type: %d", 3633 chain->bref.type); 3634 /* NOT REACHED */ 3635 break; 3636 } 3637 } else { 3638 /* 3639 * When reconnecting a chain we must set UPDATE and 3640 * setflush so the flush recognizes that it must update 3641 * the bref in the parent. 3642 */ 3643 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 3644 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3645 } 3646 3647 /* 3648 * We must setflush(parent) to ensure that it recurses through to 3649 * chain. setflush(chain) might not work because ONFLUSH is possibly 3650 * already set in the chain (so it won't recurse up to set it in the 3651 * parent). 3652 */ 3653 if (parent) 3654 hammer2_chain_setflush(parent); 3655 3656 done: 3657 *chainp = chain; 3658 3659 return (error); 3660 } 3661 3662 /* 3663 * Move the chain from its old parent to a new parent. The chain must have 3664 * already been deleted or already disconnected (or never associated) with 3665 * a parent. The chain is reassociated with the new parent and the deleted 3666 * flag will be cleared (no longer deleted). The chain's modification state 3667 * is not altered. 3668 * 3669 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 3670 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3671 * FULL. This typically means that the caller is creating the chain after 3672 * doing a hammer2_chain_lookup(). 3673 * 3674 * Neither (parent) or (chain) can be errored. 3675 * 3676 * If (parent) is non-NULL then the chain is inserted under the parent. 3677 * 3678 * If (parent) is NULL then the newly duplicated chain is not inserted 3679 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 3680 * passing into hammer2_chain_create() after this function returns). 3681 * 3682 * WARNING! This function calls create which means it can insert indirect 3683 * blocks. This can cause other unrelated chains in the parent to 3684 * be moved to a newly inserted indirect block in addition to the 3685 * specific chain. 3686 */ 3687 void 3688 hammer2_chain_rename(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3689 hammer2_tid_t mtid, int flags) 3690 { 3691 hammer2_blockref_t *bref; 3692 hammer2_dev_t *hmp; 3693 hammer2_chain_t *parent; 3694 size_t bytes; 3695 3696 /* 3697 * WARNING! We should never resolve DATA to device buffers 3698 * (XXX allow it if the caller did?), and since 3699 * we currently do not have the logical buffer cache 3700 * buffer in-hand to fix its cached physical offset 3701 * we also force the modify code to not COW it. XXX 3702 * 3703 * NOTE! We allow error'd chains to be renamed. The bref itself 3704 * is good and can be renamed. The content, however, may 3705 * be inaccessible. 3706 */ 3707 hmp = chain->hmp; 3708 KKASSERT(chain->parent == NULL); 3709 /*KKASSERT(chain->error == 0); allow */ 3710 3711 /* 3712 * Now create a duplicate of the chain structure, associating 3713 * it with the same core, making it the same size, pointing it 3714 * to the same bref (the same media block). 3715 * 3716 * NOTE: Handle special radix == 0 case (means 0 bytes). 3717 */ 3718 bref = &chain->bref; 3719 bytes = (size_t)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 3720 if (bytes) 3721 bytes = (hammer2_off_t)1 << bytes; 3722 3723 /* 3724 * If parent is not NULL the duplicated chain will be entered under 3725 * the parent and the UPDATE bit set to tell flush to update 3726 * the blockref. 3727 * 3728 * We must setflush(parent) to ensure that it recurses through to 3729 * chain. setflush(chain) might not work because ONFLUSH is possibly 3730 * already set in the chain (so it won't recurse up to set it in the 3731 * parent). 3732 * 3733 * Having both chains locked is extremely important for atomicy. 3734 */ 3735 if (parentp && (parent = *parentp) != NULL) { 3736 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3737 KKASSERT(parent->refs > 0); 3738 KKASSERT(parent->error == 0); 3739 3740 hammer2_chain_create(parentp, &chain, NULL, chain->pmp, 3741 HAMMER2_METH_DEFAULT, 3742 bref->key, bref->keybits, bref->type, 3743 chain->bytes, mtid, 0, flags); 3744 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 3745 hammer2_chain_setflush(*parentp); 3746 } 3747 } 3748 3749 /* 3750 * This works in tandem with delete_obref() to install a blockref in 3751 * (typically) an indirect block that is associated with the chain being 3752 * moved to *parentp. 3753 * 3754 * The reason we need this function is that the caller needs to maintain 3755 * the blockref as it was, and not generate a new blockref for what might 3756 * be a modified chain. Otherwise stuff will leak into the flush that 3757 * the flush code's FLUSH_INODE_STOP flag is unable to catch. 3758 * 3759 * It is EXTREMELY important that we properly set CHAIN_BMAPUPD and 3760 * CHAIN_UPDATE. We must set BMAPUPD if the bref does not match, and 3761 * we must clear CHAIN_UPDATE (that was likely set by the chain_rename) if 3762 * it does. Otherwise we can end up in a situation where H2 is unable to 3763 * clean up the in-memory chain topology. 3764 * 3765 * The reason for this is that flushes do not generally flush through 3766 * BREF_TYPE_INODE chains and depend on a hammer2_inode_t queued to syncq 3767 * or sideq to properly flush and dispose of the related inode chain's flags. 3768 * Situations where the inode is not actually modified by the frontend, 3769 * but where we have to move the related chains around as we insert or cleanup 3770 * indirect blocks, can leave us with a 'dirty' (non-disposable) in-memory 3771 * inode chain that does not have a hammer2_inode_t associated with it. 3772 */ 3773 static void 3774 hammer2_chain_rename_obref(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3775 hammer2_tid_t mtid, int flags, 3776 hammer2_blockref_t *obref) 3777 { 3778 hammer2_chain_rename(parentp, chain, mtid, flags); 3779 3780 if (obref->type != HAMMER2_BREF_TYPE_EMPTY) { 3781 hammer2_blockref_t *tbase; 3782 int tcount; 3783 3784 KKASSERT((chain->flags & HAMMER2_CHAIN_BMAPPED) == 0); 3785 hammer2_chain_modify(*parentp, mtid, 0, 0); 3786 tbase = hammer2_chain_base_and_count(*parentp, &tcount); 3787 hammer2_base_insert(*parentp, tbase, tcount, chain, obref); 3788 if (bcmp(obref, &chain->bref, sizeof(chain->bref))) { 3789 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD | 3790 HAMMER2_CHAIN_UPDATE); 3791 } else { 3792 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3793 } 3794 } 3795 } 3796 3797 /* 3798 * Helper function for deleting chains. 3799 * 3800 * The chain is removed from the live view (the RBTREE) as well as the parent's 3801 * blockmap. Both chain and its parent must be locked. 3802 * 3803 * parent may not be errored. chain can be errored. 3804 */ 3805 static int 3806 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 3807 hammer2_tid_t mtid, int flags, 3808 hammer2_blockref_t *obref) 3809 { 3810 hammer2_dev_t *hmp; 3811 int error = 0; 3812 3813 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0); 3814 KKASSERT(chain->parent == parent); 3815 hmp = chain->hmp; 3816 3817 if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 3818 /* 3819 * Chain is blockmapped, so there must be a parent. 3820 * Atomically remove the chain from the parent and remove 3821 * the blockmap entry. The parent must be set modified 3822 * to remove the blockmap entry. 3823 */ 3824 hammer2_blockref_t *base; 3825 int count; 3826 3827 KKASSERT(parent != NULL); 3828 KKASSERT(parent->error == 0); 3829 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3830 error = hammer2_chain_modify(parent, mtid, 0, 0); 3831 if (error) 3832 goto done; 3833 3834 /* 3835 * Calculate blockmap pointer 3836 */ 3837 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 3838 hammer2_spin_ex(&chain->core.spin); 3839 hammer2_spin_ex(&parent->core.spin); 3840 3841 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3842 atomic_add_int(&parent->core.live_count, -1); 3843 ++parent->core.generation; 3844 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3845 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3846 --parent->core.chain_count; 3847 chain->parent = NULL; 3848 3849 switch(parent->bref.type) { 3850 case HAMMER2_BREF_TYPE_INODE: 3851 /* 3852 * Access the inode's block array. However, there 3853 * is no block array if the inode is flagged 3854 * DIRECTDATA. 3855 */ 3856 if (parent->data && 3857 (parent->data->ipdata.meta.op_flags & 3858 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 3859 base = 3860 &parent->data->ipdata.u.blockset.blockref[0]; 3861 } else { 3862 base = NULL; 3863 } 3864 count = HAMMER2_SET_COUNT; 3865 break; 3866 case HAMMER2_BREF_TYPE_INDIRECT: 3867 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3868 if (parent->data) 3869 base = &parent->data->npdata[0]; 3870 else 3871 base = NULL; 3872 count = parent->bytes / sizeof(hammer2_blockref_t); 3873 break; 3874 case HAMMER2_BREF_TYPE_VOLUME: 3875 base = &parent->data->voldata. 3876 sroot_blockset.blockref[0]; 3877 count = HAMMER2_SET_COUNT; 3878 break; 3879 case HAMMER2_BREF_TYPE_FREEMAP: 3880 base = &parent->data->blkset.blockref[0]; 3881 count = HAMMER2_SET_COUNT; 3882 break; 3883 default: 3884 base = NULL; 3885 count = 0; 3886 panic("_hammer2_chain_delete_helper: " 3887 "unrecognized blockref type: %d", 3888 parent->bref.type); 3889 } 3890 3891 /* 3892 * delete blockmapped chain from its parent. 3893 * 3894 * The parent is not affected by any statistics in chain 3895 * which are pending synchronization. That is, there is 3896 * nothing to undo in the parent since they have not yet 3897 * been incorporated into the parent. 3898 * 3899 * The parent is affected by statistics stored in inodes. 3900 * Those have already been synchronized, so they must be 3901 * undone. XXX split update possible w/delete in middle? 3902 */ 3903 if (base) { 3904 hammer2_base_delete(parent, base, count, chain, obref); 3905 } 3906 hammer2_spin_unex(&parent->core.spin); 3907 hammer2_spin_unex(&chain->core.spin); 3908 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 3909 /* 3910 * Chain is not blockmapped but a parent is present. 3911 * Atomically remove the chain from the parent. There is 3912 * no blockmap entry to remove. 3913 * 3914 * Because chain was associated with a parent but not 3915 * synchronized, the chain's *_count_up fields contain 3916 * inode adjustment statistics which must be undone. 3917 */ 3918 hammer2_spin_ex(&chain->core.spin); 3919 hammer2_spin_ex(&parent->core.spin); 3920 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3921 atomic_add_int(&parent->core.live_count, -1); 3922 ++parent->core.generation; 3923 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3924 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3925 --parent->core.chain_count; 3926 chain->parent = NULL; 3927 hammer2_spin_unex(&parent->core.spin); 3928 hammer2_spin_unex(&chain->core.spin); 3929 } else { 3930 /* 3931 * Chain is not blockmapped and has no parent. This 3932 * is a degenerate case. 3933 */ 3934 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3935 } 3936 done: 3937 return error; 3938 } 3939 3940 /* 3941 * Create an indirect block that covers one or more of the elements in the 3942 * current parent. Either returns the existing parent with no locking or 3943 * ref changes or returns the new indirect block locked and referenced 3944 * and leaving the original parent lock/ref intact as well. 3945 * 3946 * If an error occurs, NULL is returned and *errorp is set to the H2 error. 3947 * 3948 * The returned chain depends on where the specified key falls. 3949 * 3950 * The key/keybits for the indirect mode only needs to follow three rules: 3951 * 3952 * (1) That all elements underneath it fit within its key space and 3953 * 3954 * (2) That all elements outside it are outside its key space. 3955 * 3956 * (3) When creating the new indirect block any elements in the current 3957 * parent that fit within the new indirect block's keyspace must be 3958 * moved into the new indirect block. 3959 * 3960 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3961 * keyspace the the current parent, but lookup/iteration rules will 3962 * ensure (and must ensure) that rule (2) for all parents leading up 3963 * to the nearest inode or the root volume header is adhered to. This 3964 * is accomplished by always recursing through matching keyspaces in 3965 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3966 * 3967 * The current implementation calculates the current worst-case keyspace by 3968 * iterating the current parent and then divides it into two halves, choosing 3969 * whichever half has the most elements (not necessarily the half containing 3970 * the requested key). 3971 * 3972 * We can also opt to use the half with the least number of elements. This 3973 * causes lower-numbered keys (aka logical file offsets) to recurse through 3974 * fewer indirect blocks and higher-numbered keys to recurse through more. 3975 * This also has the risk of not moving enough elements to the new indirect 3976 * block and being forced to create several indirect blocks before the element 3977 * can be inserted. 3978 * 3979 * Must be called with an exclusively locked parent. 3980 * 3981 * NOTE: *errorp set to HAMMER_ERROR_* flags 3982 */ 3983 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3984 hammer2_key_t *keyp, int keybits, 3985 hammer2_blockref_t *base, int count); 3986 static int hammer2_chain_indkey_file(hammer2_chain_t *parent, 3987 hammer2_key_t *keyp, int keybits, 3988 hammer2_blockref_t *base, int count, 3989 int ncount); 3990 static int hammer2_chain_indkey_dir(hammer2_chain_t *parent, 3991 hammer2_key_t *keyp, int keybits, 3992 hammer2_blockref_t *base, int count, 3993 int ncount); 3994 static 3995 hammer2_chain_t * 3996 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3997 hammer2_key_t create_key, int create_bits, 3998 hammer2_tid_t mtid, int for_type, int *errorp) 3999 { 4000 hammer2_dev_t *hmp; 4001 hammer2_blockref_t *base; 4002 hammer2_blockref_t *bref; 4003 hammer2_blockref_t bsave; 4004 hammer2_blockref_t dummy; 4005 hammer2_chain_t *chain; 4006 hammer2_chain_t *ichain; 4007 hammer2_key_t key = create_key; 4008 hammer2_key_t key_beg; 4009 hammer2_key_t key_end; 4010 hammer2_key_t key_next; 4011 int keybits = create_bits; 4012 int count; 4013 int ncount; 4014 int nbytes; 4015 int loops; 4016 int error; 4017 int reason; 4018 int generation; 4019 int maxloops = 300000; 4020 4021 /* 4022 * Calculate the base blockref pointer or NULL if the chain 4023 * is known to be empty. We need to calculate the array count 4024 * for RB lookups either way. 4025 */ 4026 hmp = parent->hmp; 4027 KKASSERT(hammer2_mtx_owned(&parent->lock)); 4028 4029 /* 4030 * Pre-modify the parent now to avoid having to deal with error 4031 * processing if we tried to later (in the middle of our loop). 4032 * 4033 * We are going to be moving bref's around, the indirect blocks 4034 * cannot be in an initial state. Do not pass MODIFY_OPTDATA. 4035 */ 4036 *errorp = hammer2_chain_modify(parent, mtid, 0, 0); 4037 if (*errorp) { 4038 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 4039 *errorp, hammer2_error_str(*errorp)); 4040 return NULL; 4041 } 4042 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 4043 4044 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 4045 base = hammer2_chain_base_and_count(parent, &count); 4046 4047 /* 4048 * How big should our new indirect block be? It has to be at least 4049 * as large as its parent for splits to work properly. 4050 * 4051 * The freemap uses a specific indirect block size. The number of 4052 * levels are built dynamically and ultimately depend on the size 4053 * volume. Because freemap blocks are taken from the reserved areas 4054 * of the volume our goal is efficiency (fewer levels) and not so 4055 * much to save disk space. 4056 * 4057 * The first indirect block level for a directory usually uses 4058 * HAMMER2_IND_BYTES_MIN (4KB = 32 directory entries). Due to 4059 * the hash mechanism, this typically gives us a nominal 4060 * 32 * 4 entries with one level of indirection. 4061 * 4062 * We use HAMMER2_IND_BYTES_NOM (16KB = 128 blockrefs) for FILE 4063 * indirect blocks. The initial 4 entries in the inode gives us 4064 * 256KB. Up to 4 indirect blocks gives us 32MB. Three levels 4065 * of indirection gives us 137GB, and so forth. H2 can support 4066 * huge file sizes but they are not typical, so we try to stick 4067 * with compactness and do not use a larger indirect block size. 4068 * 4069 * We could use 64KB (PBUFSIZE), giving us 512 blockrefs, but 4070 * due to the way indirect blocks are created this usually winds 4071 * up being extremely inefficient for small files. Even though 4072 * 16KB requires more levels of indirection for very large files, 4073 * the 16KB records can be ganged together into 64KB DIOs. 4074 */ 4075 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 4076 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 4077 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 4078 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4079 if (parent->data->ipdata.meta.type == 4080 HAMMER2_OBJTYPE_DIRECTORY) 4081 nbytes = HAMMER2_IND_BYTES_MIN; /* 4KB = 32 entries */ 4082 else 4083 nbytes = HAMMER2_IND_BYTES_NOM; /* 16KB = ~8MB file */ 4084 4085 } else { 4086 nbytes = HAMMER2_IND_BYTES_NOM; 4087 } 4088 if (nbytes < count * sizeof(hammer2_blockref_t)) { 4089 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 4090 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 4091 nbytes = count * sizeof(hammer2_blockref_t); 4092 } 4093 ncount = nbytes / sizeof(hammer2_blockref_t); 4094 4095 /* 4096 * When creating an indirect block for a freemap node or leaf 4097 * the key/keybits must be fitted to static radix levels because 4098 * particular radix levels use particular reserved blocks in the 4099 * related zone. 4100 * 4101 * This routine calculates the key/radix of the indirect block 4102 * we need to create, and whether it is on the high-side or the 4103 * low-side. 4104 */ 4105 switch(for_type) { 4106 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 4107 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 4108 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 4109 base, count); 4110 break; 4111 case HAMMER2_BREF_TYPE_DATA: 4112 keybits = hammer2_chain_indkey_file(parent, &key, keybits, 4113 base, count, ncount); 4114 break; 4115 case HAMMER2_BREF_TYPE_DIRENT: 4116 case HAMMER2_BREF_TYPE_INODE: 4117 keybits = hammer2_chain_indkey_dir(parent, &key, keybits, 4118 base, count, ncount); 4119 break; 4120 default: 4121 panic("illegal indirect block for bref type %d", for_type); 4122 break; 4123 } 4124 4125 /* 4126 * Normalize the key for the radix being represented, keeping the 4127 * high bits and throwing away the low bits. 4128 */ 4129 key &= ~(((hammer2_key_t)1 << keybits) - 1); 4130 4131 /* 4132 * Ok, create our new indirect block 4133 */ 4134 bzero(&dummy, sizeof(dummy)); 4135 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 4136 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 4137 dummy.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 4138 } else { 4139 dummy.type = HAMMER2_BREF_TYPE_INDIRECT; 4140 } 4141 dummy.key = key; 4142 dummy.keybits = keybits; 4143 dummy.data_off = hammer2_getradix(nbytes); 4144 dummy.methods = 4145 HAMMER2_ENC_CHECK(HAMMER2_DEC_CHECK(parent->bref.methods)) | 4146 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 4147 4148 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy); 4149 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 4150 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 4151 /* ichain has one ref at this point */ 4152 4153 /* 4154 * We have to mark it modified to allocate its block, but use 4155 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 4156 * it won't be acted upon by the flush code. 4157 * 4158 * XXX remove OPTDATA, we need a fully initialized indirect block to 4159 * be able to move the original blockref. 4160 */ 4161 *errorp = hammer2_chain_modify(ichain, mtid, 0, 0); 4162 if (*errorp) { 4163 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 4164 *errorp, hammer2_error_str(*errorp)); 4165 hammer2_chain_unlock(ichain); 4166 hammer2_chain_drop(ichain); 4167 return NULL; 4168 } 4169 KKASSERT((ichain->flags & HAMMER2_CHAIN_INITIAL) == 0); 4170 4171 /* 4172 * Iterate the original parent and move the matching brefs into 4173 * the new indirect block. 4174 * 4175 * XXX handle flushes. 4176 */ 4177 key_beg = 0; 4178 key_end = HAMMER2_KEY_MAX; 4179 key_next = 0; /* avoid gcc warnings */ 4180 hammer2_spin_ex(&parent->core.spin); 4181 loops = 0; 4182 reason = 0; 4183 4184 for (;;) { 4185 /* 4186 * Parent may have been modified, relocating its block array. 4187 * Reload the base pointer. 4188 */ 4189 base = hammer2_chain_base_and_count(parent, &count); 4190 4191 if (++loops > 100000) { 4192 hammer2_spin_unex(&parent->core.spin); 4193 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 4194 reason, parent, base, count, key_next); 4195 } 4196 4197 /* 4198 * NOTE: spinlock stays intact, returned chain (if not NULL) 4199 * is not referenced or locked which means that we 4200 * cannot safely check its flagged / deletion status 4201 * until we lock it. 4202 */ 4203 chain = hammer2_combined_find(parent, base, count, 4204 &key_next, 4205 key_beg, key_end, 4206 &bref); 4207 generation = parent->core.generation; 4208 if (bref == NULL) 4209 break; 4210 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4211 4212 /* 4213 * Skip keys that are not within the key/radix of the new 4214 * indirect block. They stay in the parent. 4215 */ 4216 if (rounddown2(key ^ bref->key, (hammer2_key_t)1 << keybits) != 0) { 4217 goto next_key_spinlocked; 4218 } 4219 4220 /* 4221 * Load the new indirect block by acquiring the related 4222 * chains (potentially from media as it might not be 4223 * in-memory). Then move it to the new parent (ichain). 4224 * 4225 * chain is referenced but not locked. We must lock the 4226 * chain to obtain definitive state. 4227 */ 4228 bsave = *bref; 4229 if (chain) { 4230 /* 4231 * Use chain already present in the RBTREE 4232 */ 4233 hammer2_chain_ref(chain); 4234 hammer2_spin_unex(&parent->core.spin); 4235 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 4236 } else { 4237 /* 4238 * Get chain for blockref element. _get returns NULL 4239 * on insertion race. 4240 */ 4241 hammer2_spin_unex(&parent->core.spin); 4242 chain = hammer2_chain_get(parent, generation, &bsave, 4243 HAMMER2_RESOLVE_NEVER); 4244 if (chain == NULL) { 4245 reason = 1; 4246 hammer2_spin_ex(&parent->core.spin); 4247 continue; 4248 } 4249 } 4250 4251 /* 4252 * This is always live so if the chain has been deleted 4253 * we raced someone and we have to retry. 4254 * 4255 * NOTE: Lookups can race delete-duplicate because 4256 * delete-duplicate does not lock the parent's core 4257 * (they just use the spinlock on the core). 4258 * 4259 * (note reversed logic for this one) 4260 */ 4261 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 4262 chain->parent != parent || 4263 (chain->flags & HAMMER2_CHAIN_DELETED)) { 4264 hammer2_chain_unlock(chain); 4265 hammer2_chain_drop(chain); 4266 if (hammer2_debug & 0x0040) { 4267 kprintf("LOST PARENT RETRY " 4268 "RETRY (%p,%p)->%p %08x\n", 4269 parent, chain->parent, chain, chain->flags); 4270 } 4271 hammer2_spin_ex(&parent->core.spin); 4272 continue; 4273 } 4274 4275 /* 4276 * Shift the chain to the indirect block. 4277 * 4278 * WARNING! No reason for us to load chain data, pass NOSTATS 4279 * to prevent delete/insert from trying to access 4280 * inode stats (and thus asserting if there is no 4281 * chain->data loaded). 4282 * 4283 * WARNING! The (parent, chain) deletion may modify the parent 4284 * and invalidate the base pointer. 4285 * 4286 * WARNING! Parent must already be marked modified, so we 4287 * can assume that chain_delete always suceeds. 4288 * 4289 * WARNING! hammer2_chain_repchange() does not have to be 4290 * called (and doesn't work anyway because we are 4291 * only doing a partial shift). A recursion that is 4292 * in-progress can continue at the current parent 4293 * and will be able to properly find its next key. 4294 */ 4295 error = hammer2_chain_delete_obref(parent, chain, mtid, 0, 4296 &bsave); 4297 KKASSERT(error == 0); 4298 hammer2_chain_rename_obref(&ichain, chain, mtid, 0, &bsave); 4299 hammer2_chain_unlock(chain); 4300 hammer2_chain_drop(chain); 4301 KKASSERT(parent->refs > 0); 4302 chain = NULL; 4303 base = NULL; /* safety */ 4304 hammer2_spin_ex(&parent->core.spin); 4305 next_key_spinlocked: 4306 if (--maxloops == 0) 4307 panic("hammer2_chain_create_indirect: maxloops"); 4308 reason = 4; 4309 if (key_next == 0 || key_next > key_end) 4310 break; 4311 key_beg = key_next; 4312 /* loop */ 4313 } 4314 hammer2_spin_unex(&parent->core.spin); 4315 4316 /* 4317 * Insert the new indirect block into the parent now that we've 4318 * cleared out some entries in the parent. We calculated a good 4319 * insertion index in the loop above (ichain->index). 4320 * 4321 * We don't have to set UPDATE here because we mark ichain 4322 * modified down below (so the normal modified -> flush -> set-moved 4323 * sequence applies). 4324 * 4325 * The insertion shouldn't race as this is a completely new block 4326 * and the parent is locked. 4327 */ 4328 base = NULL; /* safety, parent modify may change address */ 4329 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 4330 KKASSERT(parent->core.live_count < count); 4331 hammer2_chain_insert(parent, ichain, 4332 HAMMER2_CHAIN_INSERT_SPIN | 4333 HAMMER2_CHAIN_INSERT_LIVE, 4334 0); 4335 4336 /* 4337 * Make sure flushes propogate after our manual insertion. 4338 */ 4339 hammer2_chain_setflush(ichain); 4340 hammer2_chain_setflush(parent); 4341 4342 /* 4343 * Figure out what to return. 4344 */ 4345 if (rounddown2(create_key ^ key, (hammer2_key_t)1 << keybits)) { 4346 /* 4347 * Key being created is outside the key range, 4348 * return the original parent. 4349 */ 4350 hammer2_chain_unlock(ichain); 4351 hammer2_chain_drop(ichain); 4352 } else { 4353 /* 4354 * Otherwise its in the range, return the new parent. 4355 * (leave both the new and old parent locked). 4356 */ 4357 parent = ichain; 4358 } 4359 4360 return(parent); 4361 } 4362 4363 /* 4364 * Do maintenance on an indirect chain. Both parent and chain are locked. 4365 * 4366 * Returns non-zero if (chain) is deleted, either due to being empty or 4367 * because its children were safely moved into the parent. 4368 */ 4369 int 4370 hammer2_chain_indirect_maintenance(hammer2_chain_t *parent, 4371 hammer2_chain_t *chain) 4372 { 4373 hammer2_blockref_t *chain_base; 4374 hammer2_blockref_t *base; 4375 hammer2_blockref_t *bref; 4376 hammer2_blockref_t bsave; 4377 hammer2_key_t key_next; 4378 hammer2_key_t key_beg; 4379 hammer2_key_t key_end; 4380 hammer2_chain_t *sub; 4381 int chain_count; 4382 int count; 4383 int error; 4384 int generation; 4385 4386 /* 4387 * Make sure we have an accurate live_count 4388 */ 4389 if ((chain->flags & (HAMMER2_CHAIN_INITIAL | 4390 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4391 base = &chain->data->npdata[0]; 4392 count = chain->bytes / sizeof(hammer2_blockref_t); 4393 hammer2_chain_countbrefs(chain, base, count); 4394 } 4395 4396 /* 4397 * If the indirect block is empty we can delete it. 4398 * (ignore deletion error) 4399 */ 4400 if (chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)) { 4401 hammer2_chain_delete(parent, chain, 4402 chain->bref.modify_tid, 4403 HAMMER2_DELETE_PERMANENT); 4404 hammer2_chain_repchange(parent, chain); 4405 return 1; 4406 } 4407 4408 base = hammer2_chain_base_and_count(parent, &count); 4409 4410 if ((parent->flags & (HAMMER2_CHAIN_INITIAL | 4411 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4412 hammer2_chain_countbrefs(parent, base, count); 4413 } 4414 4415 /* 4416 * Determine if we can collapse chain into parent, calculate 4417 * hysteresis for chain emptiness. 4418 */ 4419 if (parent->core.live_count + chain->core.live_count - 1 > count) 4420 return 0; 4421 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4422 if (chain->core.live_count > chain_count * 3 / 4) 4423 return 0; 4424 4425 /* 4426 * Ok, theoretically we can collapse chain's contents into 4427 * parent. chain is locked, but any in-memory children of chain 4428 * are not. For this to work, we must be able to dispose of any 4429 * in-memory children of chain. 4430 * 4431 * For now require that there are no in-memory children of chain. 4432 * 4433 * WARNING! Both chain and parent must remain locked across this 4434 * entire operation. 4435 */ 4436 4437 /* 4438 * Parent must be marked modified. Don't try to collapse it if we 4439 * can't mark it modified. Once modified, destroy chain to make room 4440 * and to get rid of what will be a conflicting key (this is included 4441 * in the calculation above). Finally, move the children of chain 4442 * into chain's parent. 4443 * 4444 * This order creates an accounting problem for bref.embed.stats 4445 * because we destroy chain before we remove its children. Any 4446 * elements whos blockref is already synchronized will be counted 4447 * twice. To deal with the problem we clean out chain's stats prior 4448 * to deleting it. 4449 */ 4450 error = hammer2_chain_modify(parent, 0, 0, 0); 4451 if (error) { 4452 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4453 hammer2_error_str(error)); 4454 return 0; 4455 } 4456 error = hammer2_chain_modify(chain, chain->bref.modify_tid, 0, 0); 4457 if (error) { 4458 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4459 hammer2_error_str(error)); 4460 return 0; 4461 } 4462 4463 chain->bref.embed.stats.inode_count = 0; 4464 chain->bref.embed.stats.data_count = 0; 4465 error = hammer2_chain_delete(parent, chain, 4466 chain->bref.modify_tid, 4467 HAMMER2_DELETE_PERMANENT); 4468 KKASSERT(error == 0); 4469 4470 /* 4471 * The combined_find call requires core.spin to be held. One would 4472 * think there wouldn't be any conflicts since we hold chain 4473 * exclusively locked, but the caching mechanism for 0-ref children 4474 * does not require a chain lock. 4475 */ 4476 hammer2_spin_ex(&chain->core.spin); 4477 4478 key_next = 0; 4479 key_beg = 0; 4480 key_end = HAMMER2_KEY_MAX; 4481 for (;;) { 4482 chain_base = &chain->data->npdata[0]; 4483 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4484 sub = hammer2_combined_find(chain, chain_base, chain_count, 4485 &key_next, 4486 key_beg, key_end, 4487 &bref); 4488 generation = chain->core.generation; 4489 if (bref == NULL) 4490 break; 4491 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4492 4493 bsave = *bref; 4494 if (sub) { 4495 hammer2_chain_ref(sub); 4496 hammer2_spin_unex(&chain->core.spin); 4497 hammer2_chain_lock(sub, HAMMER2_RESOLVE_NEVER); 4498 } else { 4499 hammer2_spin_unex(&chain->core.spin); 4500 sub = hammer2_chain_get(chain, generation, &bsave, 4501 HAMMER2_RESOLVE_NEVER); 4502 if (sub == NULL) { 4503 hammer2_spin_ex(&chain->core.spin); 4504 continue; 4505 } 4506 } 4507 if (bcmp(&bsave, &sub->bref, sizeof(bsave)) || 4508 sub->parent != chain || 4509 (sub->flags & HAMMER2_CHAIN_DELETED)) { 4510 hammer2_chain_unlock(sub); 4511 hammer2_chain_drop(sub); 4512 hammer2_spin_ex(&chain->core.spin); 4513 sub = NULL; /* safety */ 4514 continue; 4515 } 4516 error = hammer2_chain_delete_obref(chain, sub, 4517 sub->bref.modify_tid, 0, 4518 &bsave); 4519 KKASSERT(error == 0); 4520 hammer2_chain_rename_obref(&parent, sub, 4521 sub->bref.modify_tid, 4522 HAMMER2_INSERT_SAMEPARENT, &bsave); 4523 hammer2_chain_unlock(sub); 4524 hammer2_chain_drop(sub); 4525 hammer2_spin_ex(&chain->core.spin); 4526 4527 if (key_next == 0) 4528 break; 4529 key_beg = key_next; 4530 } 4531 hammer2_spin_unex(&chain->core.spin); 4532 4533 hammer2_chain_repchange(parent, chain); 4534 4535 return 1; 4536 } 4537 4538 /* 4539 * Freemap indirect blocks 4540 * 4541 * Calculate the keybits and highside/lowside of the freemap node the 4542 * caller is creating. 4543 * 4544 * This routine will specify the next higher-level freemap key/radix 4545 * representing the lowest-ordered set. By doing so, eventually all 4546 * low-ordered sets will be moved one level down. 4547 * 4548 * We have to be careful here because the freemap reserves a limited 4549 * number of blocks for a limited number of levels. So we can't just 4550 * push indiscriminately. 4551 */ 4552 int 4553 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 4554 int keybits, hammer2_blockref_t *base, int count) 4555 { 4556 hammer2_chain_t *chain; 4557 hammer2_blockref_t *bref; 4558 hammer2_key_t key; 4559 hammer2_key_t key_beg; 4560 hammer2_key_t key_end; 4561 hammer2_key_t key_next; 4562 int locount; 4563 int hicount; 4564 int maxloops = 300000; 4565 4566 key = *keyp; 4567 locount = 0; 4568 hicount = 0; 4569 keybits = 64; 4570 4571 /* 4572 * Calculate the range of keys in the array being careful to skip 4573 * slots which are overridden with a deletion. 4574 */ 4575 key_beg = 0; 4576 key_end = HAMMER2_KEY_MAX; 4577 hammer2_spin_ex(&parent->core.spin); 4578 4579 for (;;) { 4580 if (--maxloops == 0) { 4581 panic("indkey_freemap shit %p %p:%d\n", 4582 parent, base, count); 4583 } 4584 chain = hammer2_combined_find(parent, base, count, 4585 &key_next, 4586 key_beg, key_end, 4587 &bref); 4588 4589 /* 4590 * Exhausted search 4591 */ 4592 if (bref == NULL) 4593 break; 4594 4595 /* 4596 * Skip deleted chains. 4597 */ 4598 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4599 if (key_next == 0 || key_next > key_end) 4600 break; 4601 key_beg = key_next; 4602 continue; 4603 } 4604 4605 /* 4606 * Use the full live (not deleted) element for the scan 4607 * iteration. HAMMER2 does not allow partial replacements. 4608 * 4609 * XXX should be built into hammer2_combined_find(). 4610 */ 4611 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4612 4613 if (keybits > bref->keybits) { 4614 key = bref->key; 4615 keybits = bref->keybits; 4616 } else if (keybits == bref->keybits && bref->key < key) { 4617 key = bref->key; 4618 } 4619 if (key_next == 0) 4620 break; 4621 key_beg = key_next; 4622 } 4623 hammer2_spin_unex(&parent->core.spin); 4624 4625 /* 4626 * Return the keybits for a higher-level FREEMAP_NODE covering 4627 * this node. 4628 */ 4629 switch(keybits) { 4630 case HAMMER2_FREEMAP_LEVEL0_RADIX: 4631 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 4632 break; 4633 case HAMMER2_FREEMAP_LEVEL1_RADIX: 4634 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 4635 break; 4636 case HAMMER2_FREEMAP_LEVEL2_RADIX: 4637 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 4638 break; 4639 case HAMMER2_FREEMAP_LEVEL3_RADIX: 4640 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 4641 break; 4642 case HAMMER2_FREEMAP_LEVEL4_RADIX: 4643 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 4644 break; 4645 case HAMMER2_FREEMAP_LEVEL5_RADIX: 4646 panic("hammer2_chain_indkey_freemap: level too high"); 4647 break; 4648 default: 4649 panic("hammer2_chain_indkey_freemap: bad radix"); 4650 break; 4651 } 4652 *keyp = key; 4653 4654 return (keybits); 4655 } 4656 4657 /* 4658 * File indirect blocks 4659 * 4660 * Calculate the key/keybits for the indirect block to create by scanning 4661 * existing keys. The key being created is also passed in *keyp and can be 4662 * inside or outside the indirect block. Regardless, the indirect block 4663 * must hold at least two keys in order to guarantee sufficient space. 4664 * 4665 * We use a modified version of the freemap's fixed radix tree, but taylored 4666 * for file data. Basically we configure an indirect block encompassing the 4667 * smallest key. 4668 */ 4669 static int 4670 hammer2_chain_indkey_file(hammer2_chain_t *parent, hammer2_key_t *keyp, 4671 int keybits, hammer2_blockref_t *base, int count, 4672 int ncount) 4673 { 4674 hammer2_chain_t *chain; 4675 hammer2_blockref_t *bref; 4676 hammer2_key_t key; 4677 hammer2_key_t key_beg; 4678 hammer2_key_t key_end; 4679 hammer2_key_t key_next; 4680 int nradix; 4681 int locount; 4682 int hicount; 4683 int maxloops = 300000; 4684 4685 key = *keyp; 4686 locount = 0; 4687 hicount = 0; 4688 keybits = 64; 4689 4690 /* 4691 * Calculate the range of keys in the array being careful to skip 4692 * slots which are overridden with a deletion. 4693 * 4694 * Locate the smallest key. 4695 */ 4696 key_beg = 0; 4697 key_end = HAMMER2_KEY_MAX; 4698 hammer2_spin_ex(&parent->core.spin); 4699 4700 for (;;) { 4701 if (--maxloops == 0) { 4702 panic("indkey_freemap shit %p %p:%d\n", 4703 parent, base, count); 4704 } 4705 chain = hammer2_combined_find(parent, base, count, 4706 &key_next, 4707 key_beg, key_end, 4708 &bref); 4709 4710 /* 4711 * Exhausted search 4712 */ 4713 if (bref == NULL) 4714 break; 4715 4716 /* 4717 * Skip deleted chains. 4718 */ 4719 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4720 if (key_next == 0 || key_next > key_end) 4721 break; 4722 key_beg = key_next; 4723 continue; 4724 } 4725 4726 /* 4727 * Use the full live (not deleted) element for the scan 4728 * iteration. HAMMER2 does not allow partial replacements. 4729 * 4730 * XXX should be built into hammer2_combined_find(). 4731 */ 4732 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4733 4734 if (keybits > bref->keybits) { 4735 key = bref->key; 4736 keybits = bref->keybits; 4737 } else if (keybits == bref->keybits && bref->key < key) { 4738 key = bref->key; 4739 } 4740 if (key_next == 0) 4741 break; 4742 key_beg = key_next; 4743 } 4744 hammer2_spin_unex(&parent->core.spin); 4745 4746 /* 4747 * Calculate the static keybits for a higher-level indirect block 4748 * that contains the key. 4749 */ 4750 *keyp = key; 4751 4752 switch(ncount) { 4753 case HAMMER2_IND_BYTES_MIN / sizeof(hammer2_blockref_t): 4754 nradix = HAMMER2_IND_RADIX_MIN - HAMMER2_BLOCKREF_RADIX; 4755 break; 4756 case HAMMER2_IND_BYTES_NOM / sizeof(hammer2_blockref_t): 4757 nradix = HAMMER2_IND_RADIX_NOM - HAMMER2_BLOCKREF_RADIX; 4758 break; 4759 case HAMMER2_IND_BYTES_MAX / sizeof(hammer2_blockref_t): 4760 nradix = HAMMER2_IND_RADIX_MAX - HAMMER2_BLOCKREF_RADIX; 4761 break; 4762 default: 4763 panic("bad ncount %d\n", ncount); 4764 nradix = 0; 4765 break; 4766 } 4767 4768 /* 4769 * The largest radix that can be returned for an indirect block is 4770 * 63 bits. (The largest practical indirect block radix is actually 4771 * 62 bits because the top-level inode or volume root contains four 4772 * entries, but allow 63 to be returned). 4773 */ 4774 if (nradix >= 64) 4775 nradix = 63; 4776 4777 return keybits + nradix; 4778 } 4779 4780 #if 1 4781 4782 /* 4783 * Directory indirect blocks. 4784 * 4785 * Covers both the inode index (directory of inodes), and directory contents 4786 * (filenames hardlinked to inodes). 4787 * 4788 * Because directory keys are hashed we generally try to cut the space in 4789 * half. We accomodate the inode index (which tends to have linearly 4790 * increasing inode numbers) by ensuring that the keyspace is at least large 4791 * enough to fill up the indirect block being created. 4792 */ 4793 static int 4794 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4795 int keybits, hammer2_blockref_t *base, int count, 4796 int ncount) 4797 { 4798 hammer2_blockref_t *bref; 4799 hammer2_chain_t *chain; 4800 hammer2_key_t key_beg; 4801 hammer2_key_t key_end; 4802 hammer2_key_t key_next; 4803 hammer2_key_t key; 4804 int nkeybits; 4805 int locount; 4806 int hicount; 4807 int maxloops = 300000; 4808 4809 /* 4810 * NOTE: We can't take a shortcut here anymore for inodes because 4811 * the root directory can contain a mix of inodes and directory 4812 * entries (we used to just return 63 if parent->bref.type was 4813 * HAMMER2_BREF_TYPE_INODE. 4814 */ 4815 key = *keyp; 4816 locount = 0; 4817 hicount = 0; 4818 4819 /* 4820 * Calculate the range of keys in the array being careful to skip 4821 * slots which are overridden with a deletion. 4822 */ 4823 key_beg = 0; 4824 key_end = HAMMER2_KEY_MAX; 4825 hammer2_spin_ex(&parent->core.spin); 4826 4827 for (;;) { 4828 if (--maxloops == 0) { 4829 panic("indkey_freemap shit %p %p:%d\n", 4830 parent, base, count); 4831 } 4832 chain = hammer2_combined_find(parent, base, count, 4833 &key_next, 4834 key_beg, key_end, 4835 &bref); 4836 4837 /* 4838 * Exhausted search 4839 */ 4840 if (bref == NULL) 4841 break; 4842 4843 /* 4844 * Deleted object 4845 */ 4846 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4847 if (key_next == 0 || key_next > key_end) 4848 break; 4849 key_beg = key_next; 4850 continue; 4851 } 4852 4853 /* 4854 * Use the full live (not deleted) element for the scan 4855 * iteration. HAMMER2 does not allow partial replacements. 4856 * 4857 * XXX should be built into hammer2_combined_find(). 4858 */ 4859 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4860 4861 /* 4862 * Expand our calculated key range (key, keybits) to fit 4863 * the scanned key. nkeybits represents the full range 4864 * that we will later cut in half (two halves @ nkeybits - 1). 4865 */ 4866 nkeybits = keybits; 4867 if (nkeybits < bref->keybits) { 4868 if (bref->keybits > 64) { 4869 kprintf("bad bref chain %p bref %p\n", 4870 chain, bref); 4871 Debugger("fubar"); 4872 } 4873 nkeybits = bref->keybits; 4874 } 4875 while (nkeybits < 64 && 4876 rounddown2(key ^ bref->key, (hammer2_key_t)1 << nkeybits) != 0) { 4877 ++nkeybits; 4878 } 4879 4880 /* 4881 * If the new key range is larger we have to determine 4882 * which side of the new key range the existing keys fall 4883 * under by checking the high bit, then collapsing the 4884 * locount into the hicount or vise-versa. 4885 */ 4886 if (keybits != nkeybits) { 4887 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4888 hicount += locount; 4889 locount = 0; 4890 } else { 4891 locount += hicount; 4892 hicount = 0; 4893 } 4894 keybits = nkeybits; 4895 } 4896 4897 /* 4898 * The newly scanned key will be in the lower half or the 4899 * upper half of the (new) key range. 4900 */ 4901 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4902 ++hicount; 4903 else 4904 ++locount; 4905 4906 if (key_next == 0) 4907 break; 4908 key_beg = key_next; 4909 } 4910 hammer2_spin_unex(&parent->core.spin); 4911 bref = NULL; /* now invalid (safety) */ 4912 4913 /* 4914 * Adjust keybits to represent half of the full range calculated 4915 * above (radix 63 max) for our new indirect block. 4916 */ 4917 --keybits; 4918 4919 /* 4920 * Expand keybits to hold at least ncount elements. ncount will be 4921 * a power of 2. This is to try to completely fill leaf nodes (at 4922 * least for keys which are not hashes). 4923 * 4924 * We aren't counting 'in' or 'out', we are counting 'high side' 4925 * and 'low side' based on the bit at (1LL << keybits). We want 4926 * everything to be inside in these cases so shift it all to 4927 * the low or high side depending on the new high bit. 4928 */ 4929 while (((hammer2_key_t)1 << keybits) < ncount) { 4930 ++keybits; 4931 if (key & ((hammer2_key_t)1 << keybits)) { 4932 hicount += locount; 4933 locount = 0; 4934 } else { 4935 locount += hicount; 4936 hicount = 0; 4937 } 4938 } 4939 4940 if (hicount > locount) 4941 key |= (hammer2_key_t)1 << keybits; 4942 else 4943 key &= ~(hammer2_key_t)1 << keybits; 4944 4945 *keyp = key; 4946 4947 return (keybits); 4948 } 4949 4950 #else 4951 4952 /* 4953 * Directory indirect blocks. 4954 * 4955 * Covers both the inode index (directory of inodes), and directory contents 4956 * (filenames hardlinked to inodes). 4957 * 4958 * Because directory keys are hashed we generally try to cut the space in 4959 * half. We accomodate the inode index (which tends to have linearly 4960 * increasing inode numbers) by ensuring that the keyspace is at least large 4961 * enough to fill up the indirect block being created. 4962 */ 4963 static int 4964 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4965 int keybits, hammer2_blockref_t *base, int count, 4966 int ncount) 4967 { 4968 hammer2_blockref_t *bref; 4969 hammer2_chain_t *chain; 4970 hammer2_key_t key_beg; 4971 hammer2_key_t key_end; 4972 hammer2_key_t key_next; 4973 hammer2_key_t key; 4974 int nkeybits; 4975 int locount; 4976 int hicount; 4977 int maxloops = 300000; 4978 4979 /* 4980 * Shortcut if the parent is the inode. In this situation the 4981 * parent has 4+1 directory entries and we are creating an indirect 4982 * block capable of holding many more. 4983 */ 4984 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4985 return 63; 4986 } 4987 4988 key = *keyp; 4989 locount = 0; 4990 hicount = 0; 4991 4992 /* 4993 * Calculate the range of keys in the array being careful to skip 4994 * slots which are overridden with a deletion. 4995 */ 4996 key_beg = 0; 4997 key_end = HAMMER2_KEY_MAX; 4998 hammer2_spin_ex(&parent->core.spin); 4999 5000 for (;;) { 5001 if (--maxloops == 0) { 5002 panic("indkey_freemap shit %p %p:%d\n", 5003 parent, base, count); 5004 } 5005 chain = hammer2_combined_find(parent, base, count, 5006 &key_next, 5007 key_beg, key_end, 5008 &bref); 5009 5010 /* 5011 * Exhausted search 5012 */ 5013 if (bref == NULL) 5014 break; 5015 5016 /* 5017 * Deleted object 5018 */ 5019 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 5020 if (key_next == 0 || key_next > key_end) 5021 break; 5022 key_beg = key_next; 5023 continue; 5024 } 5025 5026 /* 5027 * Use the full live (not deleted) element for the scan 5028 * iteration. HAMMER2 does not allow partial replacements. 5029 * 5030 * XXX should be built into hammer2_combined_find(). 5031 */ 5032 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 5033 5034 /* 5035 * Expand our calculated key range (key, keybits) to fit 5036 * the scanned key. nkeybits represents the full range 5037 * that we will later cut in half (two halves @ nkeybits - 1). 5038 */ 5039 nkeybits = keybits; 5040 if (nkeybits < bref->keybits) { 5041 if (bref->keybits > 64) { 5042 kprintf("bad bref chain %p bref %p\n", 5043 chain, bref); 5044 Debugger("fubar"); 5045 } 5046 nkeybits = bref->keybits; 5047 } 5048 while (nkeybits < 64 && 5049 (~(((hammer2_key_t)1 << nkeybits) - 1) & 5050 (key ^ bref->key)) != 0) { 5051 ++nkeybits; 5052 } 5053 5054 /* 5055 * If the new key range is larger we have to determine 5056 * which side of the new key range the existing keys fall 5057 * under by checking the high bit, then collapsing the 5058 * locount into the hicount or vise-versa. 5059 */ 5060 if (keybits != nkeybits) { 5061 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 5062 hicount += locount; 5063 locount = 0; 5064 } else { 5065 locount += hicount; 5066 hicount = 0; 5067 } 5068 keybits = nkeybits; 5069 } 5070 5071 /* 5072 * The newly scanned key will be in the lower half or the 5073 * upper half of the (new) key range. 5074 */ 5075 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 5076 ++hicount; 5077 else 5078 ++locount; 5079 5080 if (key_next == 0) 5081 break; 5082 key_beg = key_next; 5083 } 5084 hammer2_spin_unex(&parent->core.spin); 5085 bref = NULL; /* now invalid (safety) */ 5086 5087 /* 5088 * Adjust keybits to represent half of the full range calculated 5089 * above (radix 63 max) for our new indirect block. 5090 */ 5091 --keybits; 5092 5093 /* 5094 * Expand keybits to hold at least ncount elements. ncount will be 5095 * a power of 2. This is to try to completely fill leaf nodes (at 5096 * least for keys which are not hashes). 5097 * 5098 * We aren't counting 'in' or 'out', we are counting 'high side' 5099 * and 'low side' based on the bit at (1LL << keybits). We want 5100 * everything to be inside in these cases so shift it all to 5101 * the low or high side depending on the new high bit. 5102 */ 5103 while (((hammer2_key_t)1 << keybits) < ncount) { 5104 ++keybits; 5105 if (key & ((hammer2_key_t)1 << keybits)) { 5106 hicount += locount; 5107 locount = 0; 5108 } else { 5109 locount += hicount; 5110 hicount = 0; 5111 } 5112 } 5113 5114 if (hicount > locount) 5115 key |= (hammer2_key_t)1 << keybits; 5116 else 5117 key &= ~(hammer2_key_t)1 << keybits; 5118 5119 *keyp = key; 5120 5121 return (keybits); 5122 } 5123 5124 #endif 5125 5126 /* 5127 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 5128 * it exists. 5129 * 5130 * Both parent and chain must be locked exclusively. 5131 * 5132 * This function will modify the parent if the blockref requires removal 5133 * from the parent's block table. 5134 * 5135 * This function is NOT recursive. Any entity already pushed into the 5136 * chain (such as an inode) may still need visibility into its contents, 5137 * as well as the ability to read and modify the contents. For example, 5138 * for an unlinked file which is still open. 5139 * 5140 * Also note that the flusher is responsible for cleaning up empty 5141 * indirect blocks. 5142 */ 5143 int 5144 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 5145 hammer2_tid_t mtid, int flags) 5146 { 5147 int error = 0; 5148 5149 KKASSERT(hammer2_mtx_owned(&chain->lock)); 5150 5151 /* 5152 * Nothing to do if already marked. 5153 * 5154 * We need the spinlock on the core whos RBTREE contains chain 5155 * to protect against races. 5156 */ 5157 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 5158 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 5159 chain->parent == parent); 5160 error = _hammer2_chain_delete_helper(parent, chain, 5161 mtid, flags, NULL); 5162 } 5163 5164 /* 5165 * Permanent deletions mark the chain as destroyed. 5166 * 5167 * NOTE: We do not setflush the chain unless the deletion is 5168 * permanent, since the deletion of a chain does not actually 5169 * require it to be flushed. 5170 */ 5171 if (error == 0) { 5172 if (flags & HAMMER2_DELETE_PERMANENT) { 5173 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 5174 hammer2_chain_setflush(chain); 5175 } 5176 } 5177 5178 return error; 5179 } 5180 5181 static int 5182 hammer2_chain_delete_obref(hammer2_chain_t *parent, hammer2_chain_t *chain, 5183 hammer2_tid_t mtid, int flags, 5184 hammer2_blockref_t *obref) 5185 { 5186 int error = 0; 5187 5188 KKASSERT(hammer2_mtx_owned(&chain->lock)); 5189 5190 /* 5191 * Nothing to do if already marked. 5192 * 5193 * We need the spinlock on the core whos RBTREE contains chain 5194 * to protect against races. 5195 */ 5196 obref->type = HAMMER2_BREF_TYPE_EMPTY; 5197 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 5198 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 5199 chain->parent == parent); 5200 error = _hammer2_chain_delete_helper(parent, chain, 5201 mtid, flags, obref); 5202 } 5203 5204 /* 5205 * Permanent deletions mark the chain as destroyed. 5206 * 5207 * NOTE: We do not setflush the chain unless the deletion is 5208 * permanent, since the deletion of a chain does not actually 5209 * require it to be flushed. 5210 */ 5211 if (error == 0) { 5212 if (flags & HAMMER2_DELETE_PERMANENT) { 5213 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 5214 hammer2_chain_setflush(chain); 5215 } 5216 } 5217 5218 return error; 5219 } 5220 5221 /* 5222 * Returns the index of the nearest element in the blockref array >= elm. 5223 * Returns (count) if no element could be found. 5224 * 5225 * Sets *key_nextp to the next key for loop purposes but does not modify 5226 * it if the next key would be higher than the current value of *key_nextp. 5227 * Note that *key_nexp can overflow to 0, which should be tested by the 5228 * caller. 5229 * 5230 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5231 * held through the operation. 5232 */ 5233 static int 5234 hammer2_base_find(hammer2_chain_t *parent, 5235 hammer2_blockref_t *base, int count, 5236 hammer2_key_t *key_nextp, 5237 hammer2_key_t key_beg, hammer2_key_t key_end) 5238 { 5239 hammer2_blockref_t *scan; 5240 hammer2_key_t scan_end; 5241 int i; 5242 int limit; 5243 5244 /* 5245 * Require the live chain's already have their core's counted 5246 * so we can optimize operations. 5247 */ 5248 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 5249 5250 /* 5251 * Degenerate case 5252 */ 5253 if (count == 0 || base == NULL) 5254 return(count); 5255 5256 /* 5257 * Sequential optimization using parent->cache_index. This is 5258 * the most likely scenario. 5259 * 5260 * We can avoid trailing empty entries on live chains, otherwise 5261 * we might have to check the whole block array. 5262 */ 5263 i = parent->cache_index; /* SMP RACE OK */ 5264 cpu_ccfence(); 5265 limit = parent->core.live_zero; 5266 if (i >= limit) 5267 i = limit - 1; 5268 if (i < 0) 5269 i = 0; 5270 KKASSERT(i < count); 5271 5272 /* 5273 * Search backwards 5274 */ 5275 scan = &base[i]; 5276 while (i > 0 && (scan->type == HAMMER2_BREF_TYPE_EMPTY || 5277 scan->key > key_beg)) { 5278 --scan; 5279 --i; 5280 } 5281 parent->cache_index = i; 5282 5283 /* 5284 * Search forwards, stop when we find a scan element which 5285 * encloses the key or until we know that there are no further 5286 * elements. 5287 */ 5288 while (i < count) { 5289 if (scan->type != HAMMER2_BREF_TYPE_EMPTY) { 5290 scan_end = scan->key + 5291 ((hammer2_key_t)1 << scan->keybits) - 1; 5292 if (scan->key > key_beg || scan_end >= key_beg) 5293 break; 5294 } 5295 if (i >= limit) 5296 return (count); 5297 ++scan; 5298 ++i; 5299 } 5300 if (i != count) { 5301 parent->cache_index = i; 5302 if (i >= limit) { 5303 i = count; 5304 } else { 5305 scan_end = scan->key + 5306 ((hammer2_key_t)1 << scan->keybits); 5307 if (scan_end && (*key_nextp > scan_end || 5308 *key_nextp == 0)) { 5309 *key_nextp = scan_end; 5310 } 5311 } 5312 } 5313 return (i); 5314 } 5315 5316 /* 5317 * Do a combined search and return the next match either from the blockref 5318 * array or from the in-memory chain. Sets *bresp to the returned bref in 5319 * both cases, or sets it to NULL if the search exhausted. Only returns 5320 * a non-NULL chain if the search matched from the in-memory chain. 5321 * 5322 * When no in-memory chain has been found and a non-NULL bref is returned 5323 * in *bresp. 5324 * 5325 * 5326 * The returned chain is not locked or referenced. Use the returned bref 5327 * to determine if the search exhausted or not. Iterate if the base find 5328 * is chosen but matches a deleted chain. 5329 * 5330 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5331 * held through the operation. 5332 */ 5333 hammer2_chain_t * 5334 hammer2_combined_find(hammer2_chain_t *parent, 5335 hammer2_blockref_t *base, int count, 5336 hammer2_key_t *key_nextp, 5337 hammer2_key_t key_beg, hammer2_key_t key_end, 5338 hammer2_blockref_t **bresp) 5339 { 5340 hammer2_blockref_t *bref; 5341 hammer2_chain_t *chain; 5342 int i; 5343 5344 /* 5345 * Lookup in block array and in rbtree. 5346 */ 5347 *key_nextp = key_end + 1; 5348 i = hammer2_base_find(parent, base, count, key_nextp, 5349 key_beg, key_end); 5350 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 5351 5352 /* 5353 * Neither matched 5354 */ 5355 if (i == count && chain == NULL) { 5356 *bresp = NULL; 5357 return(NULL); 5358 } 5359 5360 /* 5361 * Only chain matched. 5362 */ 5363 if (i == count) { 5364 bref = &chain->bref; 5365 goto found; 5366 } 5367 5368 /* 5369 * Only blockref matched. 5370 */ 5371 if (chain == NULL) { 5372 bref = &base[i]; 5373 goto found; 5374 } 5375 5376 /* 5377 * Both in-memory and blockref matched, select the nearer element. 5378 * 5379 * If both are flush with the left-hand side or both are the 5380 * same distance away, select the chain. In this situation the 5381 * chain must have been loaded from the matching blockmap. 5382 */ 5383 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 5384 chain->bref.key == base[i].key) { 5385 KKASSERT(chain->bref.key == base[i].key); 5386 bref = &chain->bref; 5387 goto found; 5388 } 5389 5390 /* 5391 * Select the nearer key 5392 */ 5393 if (chain->bref.key < base[i].key) { 5394 bref = &chain->bref; 5395 } else { 5396 bref = &base[i]; 5397 chain = NULL; 5398 } 5399 5400 /* 5401 * If the bref is out of bounds we've exhausted our search. 5402 */ 5403 found: 5404 if (bref->key > key_end) { 5405 *bresp = NULL; 5406 chain = NULL; 5407 } else { 5408 *bresp = bref; 5409 } 5410 return(chain); 5411 } 5412 5413 /* 5414 * Locate the specified block array element and delete it. The element 5415 * must exist. 5416 * 5417 * The spin lock on the related chain must be held. 5418 * 5419 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5420 * need to be adjusted when we commit the media change. 5421 */ 5422 void 5423 hammer2_base_delete(hammer2_chain_t *parent, 5424 hammer2_blockref_t *base, int count, 5425 hammer2_chain_t *chain, 5426 hammer2_blockref_t *obref) 5427 { 5428 hammer2_blockref_t *elm = &chain->bref; 5429 hammer2_blockref_t *scan; 5430 hammer2_key_t key_next; 5431 int i; 5432 5433 /* 5434 * Delete element. Expect the element to exist. 5435 * 5436 * XXX see caller, flush code not yet sophisticated enough to prevent 5437 * re-flushed in some cases. 5438 */ 5439 key_next = 0; /* max range */ 5440 i = hammer2_base_find(parent, base, count, &key_next, 5441 elm->key, elm->key); 5442 scan = &base[i]; 5443 if (i == count || scan->type == HAMMER2_BREF_TYPE_EMPTY || 5444 scan->key != elm->key || 5445 ((chain->flags & HAMMER2_CHAIN_BMAPUPD) == 0 && 5446 scan->keybits != elm->keybits)) { 5447 hammer2_spin_unex(&parent->core.spin); 5448 panic("delete base %p element not found at %d/%d elm %p\n", 5449 base, i, count, elm); 5450 return; 5451 } 5452 5453 /* 5454 * Update stats and zero the entry. 5455 * 5456 * NOTE: Handle radix == 0 (0 bytes) case. 5457 */ 5458 if ((int)(scan->data_off & HAMMER2_OFF_MASK_RADIX)) { 5459 parent->bref.embed.stats.data_count -= (hammer2_off_t)1 << 5460 (int)(scan->data_off & HAMMER2_OFF_MASK_RADIX); 5461 } 5462 switch(scan->type) { 5463 case HAMMER2_BREF_TYPE_INODE: 5464 --parent->bref.embed.stats.inode_count; 5465 /* fall through */ 5466 case HAMMER2_BREF_TYPE_DATA: 5467 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5468 atomic_set_int(&chain->flags, 5469 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5470 } else { 5471 if (parent->bref.leaf_count) 5472 --parent->bref.leaf_count; 5473 } 5474 /* fall through */ 5475 case HAMMER2_BREF_TYPE_INDIRECT: 5476 if (scan->type != HAMMER2_BREF_TYPE_DATA) { 5477 parent->bref.embed.stats.data_count -= 5478 scan->embed.stats.data_count; 5479 parent->bref.embed.stats.inode_count -= 5480 scan->embed.stats.inode_count; 5481 } 5482 if (scan->type == HAMMER2_BREF_TYPE_INODE) 5483 break; 5484 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5485 atomic_set_int(&chain->flags, 5486 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5487 } else { 5488 if (parent->bref.leaf_count <= scan->leaf_count) 5489 parent->bref.leaf_count = 0; 5490 else 5491 parent->bref.leaf_count -= scan->leaf_count; 5492 } 5493 break; 5494 case HAMMER2_BREF_TYPE_DIRENT: 5495 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5496 atomic_set_int(&chain->flags, 5497 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5498 } else { 5499 if (parent->bref.leaf_count) 5500 --parent->bref.leaf_count; 5501 } 5502 default: 5503 break; 5504 } 5505 5506 if (obref) 5507 *obref = *scan; 5508 bzero(scan, sizeof(*scan)); 5509 5510 /* 5511 * We can only optimize parent->core.live_zero for live chains. 5512 */ 5513 if (parent->core.live_zero == i + 1) { 5514 while (--i >= 0 && base[i].type == HAMMER2_BREF_TYPE_EMPTY) 5515 ; 5516 parent->core.live_zero = i + 1; 5517 } 5518 5519 /* 5520 * Clear appropriate blockmap flags in chain. 5521 */ 5522 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED | 5523 HAMMER2_CHAIN_BMAPUPD); 5524 } 5525 5526 /* 5527 * Insert the specified element. The block array must not already have the 5528 * element and must have space available for the insertion. 5529 * 5530 * The spin lock on the related chain must be held. 5531 * 5532 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5533 * need to be adjusted when we commit the media change. 5534 */ 5535 void 5536 hammer2_base_insert(hammer2_chain_t *parent, 5537 hammer2_blockref_t *base, int count, 5538 hammer2_chain_t *chain, hammer2_blockref_t *elm) 5539 { 5540 hammer2_key_t key_next; 5541 hammer2_key_t xkey; 5542 int i; 5543 int j; 5544 int k; 5545 int l; 5546 int u = 1; 5547 5548 /* 5549 * Insert new element. Expect the element to not already exist 5550 * unless we are replacing it. 5551 * 5552 * XXX see caller, flush code not yet sophisticated enough to prevent 5553 * re-flushed in some cases. 5554 */ 5555 key_next = 0; /* max range */ 5556 i = hammer2_base_find(parent, base, count, &key_next, 5557 elm->key, elm->key); 5558 5559 /* 5560 * Shortcut fill optimization, typical ordered insertion(s) may not 5561 * require a search. 5562 */ 5563 KKASSERT(i >= 0 && i <= count); 5564 5565 /* 5566 * Set appropriate blockmap flags in chain (if not NULL) 5567 */ 5568 if (chain) 5569 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 5570 5571 /* 5572 * Update stats and zero the entry 5573 */ 5574 if ((int)(elm->data_off & HAMMER2_OFF_MASK_RADIX)) { 5575 parent->bref.embed.stats.data_count += (hammer2_off_t)1 << 5576 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 5577 } 5578 switch(elm->type) { 5579 case HAMMER2_BREF_TYPE_INODE: 5580 ++parent->bref.embed.stats.inode_count; 5581 /* fall through */ 5582 case HAMMER2_BREF_TYPE_DATA: 5583 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5584 ++parent->bref.leaf_count; 5585 /* fall through */ 5586 case HAMMER2_BREF_TYPE_INDIRECT: 5587 if (elm->type != HAMMER2_BREF_TYPE_DATA) { 5588 parent->bref.embed.stats.data_count += 5589 elm->embed.stats.data_count; 5590 parent->bref.embed.stats.inode_count += 5591 elm->embed.stats.inode_count; 5592 } 5593 if (elm->type == HAMMER2_BREF_TYPE_INODE) 5594 break; 5595 if (parent->bref.leaf_count + elm->leaf_count < 5596 HAMMER2_BLOCKREF_LEAF_MAX) { 5597 parent->bref.leaf_count += elm->leaf_count; 5598 } else { 5599 parent->bref.leaf_count = HAMMER2_BLOCKREF_LEAF_MAX; 5600 } 5601 break; 5602 case HAMMER2_BREF_TYPE_DIRENT: 5603 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5604 ++parent->bref.leaf_count; 5605 break; 5606 default: 5607 break; 5608 } 5609 5610 5611 /* 5612 * We can only optimize parent->core.live_zero for live chains. 5613 */ 5614 if (i == count && parent->core.live_zero < count) { 5615 i = parent->core.live_zero++; 5616 base[i] = *elm; 5617 return; 5618 } 5619 5620 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 5621 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 5622 hammer2_spin_unex(&parent->core.spin); 5623 panic("insert base %p overlapping elements at %d elm %p\n", 5624 base, i, elm); 5625 } 5626 5627 /* 5628 * Try to find an empty slot before or after. 5629 */ 5630 j = i; 5631 k = i; 5632 while (j > 0 || k < count) { 5633 --j; 5634 if (j >= 0 && base[j].type == HAMMER2_BREF_TYPE_EMPTY) { 5635 if (j == i - 1) { 5636 base[j] = *elm; 5637 } else { 5638 bcopy(&base[j+1], &base[j], 5639 (i - j - 1) * sizeof(*base)); 5640 base[i - 1] = *elm; 5641 } 5642 goto validate; 5643 } 5644 ++k; 5645 if (k < count && base[k].type == HAMMER2_BREF_TYPE_EMPTY) { 5646 bcopy(&base[i], &base[i+1], 5647 (k - i) * sizeof(hammer2_blockref_t)); 5648 base[i] = *elm; 5649 5650 /* 5651 * We can only update parent->core.live_zero for live 5652 * chains. 5653 */ 5654 if (parent->core.live_zero <= k) 5655 parent->core.live_zero = k + 1; 5656 u = 2; 5657 goto validate; 5658 } 5659 } 5660 panic("hammer2_base_insert: no room!"); 5661 5662 /* 5663 * Debugging 5664 */ 5665 validate: 5666 key_next = 0; 5667 for (l = 0; l < count; ++l) { 5668 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5669 key_next = base[l].key + 5670 ((hammer2_key_t)1 << base[l].keybits) - 1; 5671 break; 5672 } 5673 } 5674 while (++l < count) { 5675 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5676 if (base[l].key <= key_next) 5677 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 5678 key_next = base[l].key + 5679 ((hammer2_key_t)1 << base[l].keybits) - 1; 5680 5681 } 5682 } 5683 5684 } 5685 5686 #if 0 5687 5688 /* 5689 * Sort the blockref array for the chain. Used by the flush code to 5690 * sort the blockref[] array. 5691 * 5692 * The chain must be exclusively locked AND spin-locked. 5693 */ 5694 typedef hammer2_blockref_t *hammer2_blockref_p; 5695 5696 static 5697 int 5698 hammer2_base_sort_callback(const void *v1, const void *v2) 5699 { 5700 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 5701 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 5702 5703 /* 5704 * Make sure empty elements are placed at the end of the array 5705 */ 5706 if (bref1->type == HAMMER2_BREF_TYPE_EMPTY) { 5707 if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) 5708 return(0); 5709 return(1); 5710 } else if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) { 5711 return(-1); 5712 } 5713 5714 /* 5715 * Sort by key 5716 */ 5717 if (bref1->key < bref2->key) 5718 return(-1); 5719 if (bref1->key > bref2->key) 5720 return(1); 5721 return(0); 5722 } 5723 5724 void 5725 hammer2_base_sort(hammer2_chain_t *chain) 5726 { 5727 hammer2_blockref_t *base; 5728 int count; 5729 5730 switch(chain->bref.type) { 5731 case HAMMER2_BREF_TYPE_INODE: 5732 /* 5733 * Special shortcut for embedded data returns the inode 5734 * itself. Callers must detect this condition and access 5735 * the embedded data (the strategy code does this for us). 5736 * 5737 * This is only applicable to regular files and softlinks. 5738 */ 5739 if (chain->data->ipdata.meta.op_flags & 5740 HAMMER2_OPFLAG_DIRECTDATA) { 5741 return; 5742 } 5743 base = &chain->data->ipdata.u.blockset.blockref[0]; 5744 count = HAMMER2_SET_COUNT; 5745 break; 5746 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 5747 case HAMMER2_BREF_TYPE_INDIRECT: 5748 /* 5749 * Optimize indirect blocks in the INITIAL state to avoid 5750 * I/O. 5751 */ 5752 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 5753 base = &chain->data->npdata[0]; 5754 count = chain->bytes / sizeof(hammer2_blockref_t); 5755 break; 5756 case HAMMER2_BREF_TYPE_VOLUME: 5757 base = &chain->data->voldata.sroot_blockset.blockref[0]; 5758 count = HAMMER2_SET_COUNT; 5759 break; 5760 case HAMMER2_BREF_TYPE_FREEMAP: 5761 base = &chain->data->blkset.blockref[0]; 5762 count = HAMMER2_SET_COUNT; 5763 break; 5764 default: 5765 kprintf("hammer2_base_sort: unrecognized " 5766 "blockref(A) type: %d", 5767 chain->bref.type); 5768 while (1) 5769 tsleep(&base, 0, "dead", 0); 5770 panic("hammer2_base_sort: unrecognized " 5771 "blockref(A) type: %d", 5772 chain->bref.type); 5773 base = NULL; /* safety */ 5774 count = 0; /* safety */ 5775 } 5776 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 5777 } 5778 5779 #endif 5780 5781 /* 5782 * Chain memory management 5783 */ 5784 void 5785 hammer2_chain_wait(hammer2_chain_t *chain) 5786 { 5787 tsleep(chain, 0, "chnflw", 1); 5788 } 5789 5790 const hammer2_media_data_t * 5791 hammer2_chain_rdata(hammer2_chain_t *chain) 5792 { 5793 KKASSERT(chain->data != NULL); 5794 return (chain->data); 5795 } 5796 5797 hammer2_media_data_t * 5798 hammer2_chain_wdata(hammer2_chain_t *chain) 5799 { 5800 KKASSERT(chain->data != NULL); 5801 return (chain->data); 5802 } 5803 5804 /* 5805 * Set the check data for a chain. This can be a heavy-weight operation 5806 * and typically only runs on-flush. For file data check data is calculated 5807 * when the logical buffers are flushed. 5808 */ 5809 void 5810 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 5811 { 5812 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 5813 5814 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5815 case HAMMER2_CHECK_NONE: 5816 break; 5817 case HAMMER2_CHECK_DISABLED: 5818 break; 5819 case HAMMER2_CHECK_ISCSI32: 5820 chain->bref.check.iscsi32.value = 5821 hammer2_icrc32(bdata, chain->bytes); 5822 break; 5823 case HAMMER2_CHECK_XXHASH64: 5824 chain->bref.check.xxhash64.value = 5825 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5826 break; 5827 case HAMMER2_CHECK_SHA192: 5828 { 5829 SHA256_CTX hash_ctx; 5830 union { 5831 uint8_t digest[SHA256_DIGEST_LENGTH]; 5832 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5833 } u; 5834 5835 SHA256_Init(&hash_ctx); 5836 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5837 SHA256_Final(u.digest, &hash_ctx); 5838 u.digest64[2] ^= u.digest64[3]; 5839 bcopy(u.digest, 5840 chain->bref.check.sha192.data, 5841 sizeof(chain->bref.check.sha192.data)); 5842 } 5843 break; 5844 case HAMMER2_CHECK_FREEMAP: 5845 chain->bref.check.freemap.icrc32 = 5846 hammer2_icrc32(bdata, chain->bytes); 5847 break; 5848 default: 5849 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5850 chain->bref.methods); 5851 break; 5852 } 5853 } 5854 5855 /* 5856 * Characterize a failed check code and try to trace back to the inode. 5857 */ 5858 static void 5859 hammer2_characterize_failed_chain(hammer2_chain_t *chain, uint64_t check, 5860 int bits) 5861 { 5862 hammer2_chain_t *lchain; 5863 hammer2_chain_t *ochain; 5864 int did; 5865 5866 did = krateprintf(&krate_h2chk, 5867 "chain %016jx.%02x (%s) meth=%02x CHECK FAIL " 5868 "(flags=%08x, bref/data ", 5869 chain->bref.data_off, 5870 chain->bref.type, 5871 hammer2_bref_type_str(chain->bref.type), 5872 chain->bref.methods, 5873 chain->flags); 5874 if (did == 0) 5875 return; 5876 5877 if (bits == 32) { 5878 kprintf("%08x/%08x)\n", 5879 chain->bref.check.iscsi32.value, 5880 (uint32_t)check); 5881 } else { 5882 kprintf("%016jx/%016jx)\n", 5883 chain->bref.check.xxhash64.value, 5884 check); 5885 } 5886 5887 /* 5888 * Run up the chains to try to find the governing inode so we 5889 * can report it. 5890 * 5891 * XXX This error reporting is not really MPSAFE 5892 */ 5893 ochain = chain; 5894 lchain = chain; 5895 while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE) { 5896 lchain = chain; 5897 chain = chain->parent; 5898 } 5899 5900 if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5901 ((chain->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) == 0 || 5902 (lchain->bref.key & HAMMER2_DIRHASH_VISIBLE))) { 5903 kprintf(" Resides at/in inode %ld\n", 5904 chain->bref.key); 5905 } else if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5906 kprintf(" Resides in inode index - CRITICAL!!!\n"); 5907 } else { 5908 kprintf(" Resides in root index - CRITICAL!!!\n"); 5909 } 5910 if (ochain->hmp) { 5911 const char *pfsname = "UNKNOWN"; 5912 int i; 5913 5914 if (ochain->pmp) { 5915 for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) { 5916 if (ochain->pmp->pfs_hmps[i] == ochain->hmp && 5917 ochain->pmp->pfs_names[i]) { 5918 pfsname = ochain->pmp->pfs_names[i]; 5919 break; 5920 } 5921 } 5922 } 5923 kprintf(" In pfs %s on device %s\n", 5924 pfsname, ochain->hmp->devrepname); 5925 } 5926 } 5927 5928 /* 5929 * Returns non-zero on success, 0 on failure. 5930 */ 5931 int 5932 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 5933 { 5934 uint32_t check32; 5935 uint64_t check64; 5936 int r; 5937 5938 if (chain->flags & HAMMER2_CHAIN_NOTTESTED) 5939 return 1; 5940 5941 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5942 case HAMMER2_CHECK_NONE: 5943 r = 1; 5944 break; 5945 case HAMMER2_CHECK_DISABLED: 5946 r = 1; 5947 break; 5948 case HAMMER2_CHECK_ISCSI32: 5949 check32 = hammer2_icrc32(bdata, chain->bytes); 5950 r = (chain->bref.check.iscsi32.value == check32); 5951 if (r == 0) { 5952 hammer2_characterize_failed_chain(chain, check32, 32); 5953 } 5954 hammer2_process_icrc32 += chain->bytes; 5955 break; 5956 case HAMMER2_CHECK_XXHASH64: 5957 check64 = XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5958 r = (chain->bref.check.xxhash64.value == check64); 5959 if (r == 0) { 5960 hammer2_characterize_failed_chain(chain, check64, 64); 5961 } 5962 hammer2_process_xxhash64 += chain->bytes; 5963 break; 5964 case HAMMER2_CHECK_SHA192: 5965 { 5966 SHA256_CTX hash_ctx; 5967 union { 5968 uint8_t digest[SHA256_DIGEST_LENGTH]; 5969 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5970 } u; 5971 5972 SHA256_Init(&hash_ctx); 5973 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5974 SHA256_Final(u.digest, &hash_ctx); 5975 u.digest64[2] ^= u.digest64[3]; 5976 if (bcmp(u.digest, 5977 chain->bref.check.sha192.data, 5978 sizeof(chain->bref.check.sha192.data)) == 0) { 5979 r = 1; 5980 } else { 5981 r = 0; 5982 krateprintf(&krate_h2chk, 5983 "chain %016jx.%02x meth=%02x " 5984 "CHECK FAIL\n", 5985 chain->bref.data_off, 5986 chain->bref.type, 5987 chain->bref.methods); 5988 } 5989 } 5990 break; 5991 case HAMMER2_CHECK_FREEMAP: 5992 r = (chain->bref.check.freemap.icrc32 == 5993 hammer2_icrc32(bdata, chain->bytes)); 5994 if (r == 0) { 5995 int did; 5996 5997 did = krateprintf(&krate_h2chk, 5998 "chain %016jx.%02x meth=%02x " 5999 "CHECK FAIL\n", 6000 chain->bref.data_off, 6001 chain->bref.type, 6002 chain->bref.methods); 6003 if (did) { 6004 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 6005 chain->bref.check.freemap.icrc32, 6006 hammer2_icrc32(bdata, chain->bytes), 6007 chain->bytes); 6008 if (chain->dio) { 6009 kprintf("dio %p buf %016jx,%d " 6010 "bdata %p/%p\n", 6011 chain->dio, 6012 chain->dio->bp->b_loffset, 6013 chain->dio->bp->b_bufsize, 6014 bdata, 6015 chain->dio->bp->b_data); 6016 } 6017 } 6018 } 6019 break; 6020 default: 6021 kprintf("hammer2_chain_testcheck: unknown check type %02x\n", 6022 chain->bref.methods); 6023 r = 1; 6024 break; 6025 } 6026 return r; 6027 } 6028 6029 /* 6030 * Acquire the chain and parent representing the specified inode for the 6031 * device at the specified cluster index. 6032 * 6033 * The flags passed in are LOOKUP flags, not RESOLVE flags. 6034 * 6035 * If we are unable to locate the inode, HAMMER2_ERROR_EIO is returned and 6036 * *chainp will be NULL. *parentp may still be set error or not, or NULL 6037 * if the parent itself could not be resolved. 6038 * 6039 * The caller may pass-in a locked *parentp and/or *chainp, or neither. 6040 * They will be unlocked and released by this function. The *parentp and 6041 * *chainp representing the located inode are returned locked. 6042 */ 6043 int 6044 hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum, 6045 int clindex, int flags, 6046 hammer2_chain_t **parentp, hammer2_chain_t **chainp) 6047 { 6048 hammer2_chain_t *parent; 6049 hammer2_chain_t *rchain; 6050 hammer2_key_t key_dummy; 6051 hammer2_inode_t *ip; 6052 int resolve_flags; 6053 int error; 6054 6055 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 6056 HAMMER2_RESOLVE_SHARED : 0; 6057 6058 /* 6059 * Caller expects us to replace these. 6060 */ 6061 if (*chainp) { 6062 hammer2_chain_unlock(*chainp); 6063 hammer2_chain_drop(*chainp); 6064 *chainp = NULL; 6065 } 6066 if (*parentp) { 6067 hammer2_chain_unlock(*parentp); 6068 hammer2_chain_drop(*parentp); 6069 *parentp = NULL; 6070 } 6071 6072 /* 6073 * Be very careful, this is a backend function and we CANNOT 6074 * lock any frontend inode structure we find. But we have to 6075 * look the inode up this way first in case it exists but is 6076 * detached from the radix tree. 6077 */ 6078 ip = hammer2_inode_lookup(pmp, inum); 6079 if (ip) { 6080 *chainp = hammer2_inode_chain_and_parent(ip, clindex, 6081 parentp, 6082 resolve_flags); 6083 hammer2_inode_drop(ip); 6084 if (*chainp) 6085 return 0; 6086 hammer2_chain_unlock(*chainp); 6087 hammer2_chain_drop(*chainp); 6088 *chainp = NULL; 6089 if (*parentp) { 6090 hammer2_chain_unlock(*parentp); 6091 hammer2_chain_drop(*parentp); 6092 *parentp = NULL; 6093 } 6094 } 6095 6096 /* 6097 * Inodes hang off of the iroot (bit 63 is clear, differentiating 6098 * inodes from root directory entries in the key lookup). 6099 */ 6100 parent = hammer2_inode_chain(pmp->iroot, clindex, resolve_flags); 6101 rchain = NULL; 6102 if (parent) { 6103 rchain = hammer2_chain_lookup(&parent, &key_dummy, 6104 inum, inum, 6105 &error, flags); 6106 } else { 6107 error = HAMMER2_ERROR_EIO; 6108 } 6109 *parentp = parent; 6110 *chainp = rchain; 6111 6112 return error; 6113 } 6114 6115 /* 6116 * Used by the bulkscan code to snapshot the synchronized storage for 6117 * a volume, allowing it to be scanned concurrently against normal 6118 * operation. 6119 */ 6120 hammer2_chain_t * 6121 hammer2_chain_bulksnap(hammer2_dev_t *hmp) 6122 { 6123 hammer2_chain_t *copy; 6124 6125 copy = hammer2_chain_alloc(hmp, hmp->spmp, &hmp->vchain.bref); 6126 copy->data = kmalloc(sizeof(copy->data->voldata), 6127 hmp->mchain, 6128 M_WAITOK | M_ZERO); 6129 hammer2_voldata_lock(hmp); 6130 copy->data->voldata = hmp->volsync; 6131 hammer2_voldata_unlock(hmp); 6132 6133 return copy; 6134 } 6135 6136 void 6137 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 6138 { 6139 KKASSERT(copy->bref.type == HAMMER2_BREF_TYPE_VOLUME); 6140 KKASSERT(copy->data); 6141 kfree(copy->data, copy->hmp->mchain); 6142 copy->data = NULL; 6143 atomic_add_long(&hammer2_chain_allocs, -1); 6144 hammer2_chain_drop(copy); 6145 } 6146 6147 /* 6148 * Returns non-zero if the chain (INODE or DIRENT) matches the 6149 * filename. 6150 */ 6151 int 6152 hammer2_chain_dirent_test(hammer2_chain_t *chain, const char *name, 6153 size_t name_len) 6154 { 6155 const hammer2_inode_data_t *ripdata; 6156 const hammer2_dirent_head_t *den; 6157 6158 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 6159 ripdata = &chain->data->ipdata; 6160 if (ripdata->meta.name_len == name_len && 6161 bcmp(ripdata->filename, name, name_len) == 0) { 6162 return 1; 6163 } 6164 } 6165 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT && 6166 chain->bref.embed.dirent.namlen == name_len) { 6167 den = &chain->bref.embed.dirent; 6168 if (name_len > sizeof(chain->bref.check.buf) && 6169 bcmp(chain->data->buf, name, name_len) == 0) { 6170 return 1; 6171 } 6172 if (name_len <= sizeof(chain->bref.check.buf) && 6173 bcmp(chain->bref.check.buf, name, name_len) == 0) { 6174 return 1; 6175 } 6176 } 6177 return 0; 6178 } 6179