1 /* 2 * Copyright (c) 2011-2018 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/kern_syscall.h> 61 #include <sys/uuid.h> 62 63 #include <crypto/sha2/sha2.h> 64 65 #include "hammer2.h" 66 67 static hammer2_chain_t *hammer2_chain_create_indirect( 68 hammer2_chain_t *parent, 69 hammer2_key_t key, int keybits, 70 hammer2_tid_t mtid, int for_type, int *errorp); 71 static void hammer2_chain_rename_obref(hammer2_chain_t **parentp, 72 hammer2_chain_t *chain, hammer2_tid_t mtid, 73 int flags, hammer2_blockref_t *obref); 74 static int hammer2_chain_delete_obref(hammer2_chain_t *parent, 75 hammer2_chain_t *chain, 76 hammer2_tid_t mtid, int flags, 77 hammer2_blockref_t *obref); 78 static hammer2_io_t *hammer2_chain_drop_data(hammer2_chain_t *chain); 79 static hammer2_chain_t *hammer2_combined_find( 80 hammer2_chain_t *parent, 81 hammer2_blockref_t *base, int count, 82 hammer2_key_t *key_nextp, 83 hammer2_key_t key_beg, hammer2_key_t key_end, 84 hammer2_blockref_t **bresp); 85 86 static struct krate krate_h2me = { .freq = 1 }; 87 88 /* 89 * Basic RBTree for chains (core->rbtree and core->dbtree). Chains cannot 90 * overlap in the RB trees. Deleted chains are moved from rbtree to either 91 * dbtree or to dbq. 92 * 93 * Chains in delete-duplicate sequences can always iterate through core_entry 94 * to locate the live version of the chain. 95 */ 96 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 97 98 int 99 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 100 { 101 hammer2_key_t c1_beg; 102 hammer2_key_t c1_end; 103 hammer2_key_t c2_beg; 104 hammer2_key_t c2_end; 105 106 /* 107 * Compare chains. Overlaps are not supposed to happen and catch 108 * any software issues early we count overlaps as a match. 109 */ 110 c1_beg = chain1->bref.key; 111 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 112 c2_beg = chain2->bref.key; 113 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 114 115 if (c1_end < c2_beg) /* fully to the left */ 116 return(-1); 117 if (c1_beg > c2_end) /* fully to the right */ 118 return(1); 119 return(0); /* overlap (must not cross edge boundary) */ 120 } 121 122 /* 123 * Assert that a chain has no media data associated with it. 124 */ 125 static __inline void 126 hammer2_chain_assert_no_data(hammer2_chain_t *chain) 127 { 128 KKASSERT(chain->dio == NULL); 129 if (chain->bref.type != HAMMER2_BREF_TYPE_VOLUME && 130 chain->bref.type != HAMMER2_BREF_TYPE_FREEMAP && 131 chain->data) { 132 panic("hammer2_assert_no_data: chain %p still has data", chain); 133 } 134 } 135 136 /* 137 * Make a chain visible to the flusher. The flusher operates using a top-down 138 * recursion based on the ONFLUSH flag. It locates MODIFIED and UPDATE chains, 139 * flushes them, and updates blocks back to the volume root. 140 * 141 * This routine sets the ONFLUSH flag upward from the triggering chain until 142 * it hits an inode root or the volume root. Inode chains serve as inflection 143 * points, requiring the flusher to bridge across trees. Inodes include 144 * regular inodes, PFS roots (pmp->iroot), and the media super root 145 * (spmp->iroot). 146 */ 147 void 148 hammer2_chain_setflush(hammer2_chain_t *chain) 149 { 150 hammer2_chain_t *parent; 151 152 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 153 hammer2_spin_sh(&chain->core.spin); 154 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 155 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 156 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 157 break; 158 if ((parent = chain->parent) == NULL) 159 break; 160 hammer2_spin_sh(&parent->core.spin); 161 hammer2_spin_unsh(&chain->core.spin); 162 chain = parent; 163 } 164 hammer2_spin_unsh(&chain->core.spin); 165 } 166 } 167 168 /* 169 * Allocate a new disconnected chain element representing the specified 170 * bref. chain->refs is set to 1 and the passed bref is copied to 171 * chain->bref. chain->bytes is derived from the bref. 172 * 173 * chain->pmp inherits pmp unless the chain is an inode (other than the 174 * super-root inode). 175 * 176 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 177 */ 178 hammer2_chain_t * 179 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 180 hammer2_blockref_t *bref) 181 { 182 hammer2_chain_t *chain; 183 u_int bytes; 184 185 /* 186 * Special case - radix of 0 indicates a chain that does not 187 * need a data reference (context is completely embedded in the 188 * bref). 189 */ 190 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 191 bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 192 else 193 bytes = 0; 194 195 atomic_add_long(&hammer2_chain_allocs, 1); 196 197 /* 198 * Construct the appropriate system structure. 199 */ 200 switch(bref->type) { 201 case HAMMER2_BREF_TYPE_DIRENT: 202 case HAMMER2_BREF_TYPE_INODE: 203 case HAMMER2_BREF_TYPE_INDIRECT: 204 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 205 case HAMMER2_BREF_TYPE_DATA: 206 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 207 /* 208 * Chain's are really only associated with the hmp but we 209 * maintain a pmp association for per-mount memory tracking 210 * purposes. The pmp can be NULL. 211 */ 212 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 213 break; 214 case HAMMER2_BREF_TYPE_VOLUME: 215 case HAMMER2_BREF_TYPE_FREEMAP: 216 /* 217 * Only hammer2_chain_bulksnap() calls this function with these 218 * types. 219 */ 220 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 221 break; 222 default: 223 chain = NULL; 224 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 225 bref->type); 226 } 227 228 /* 229 * Initialize the new chain structure. pmp must be set to NULL for 230 * chains belonging to the super-root topology of a device mount. 231 */ 232 if (pmp == hmp->spmp) 233 chain->pmp = NULL; 234 else 235 chain->pmp = pmp; 236 237 chain->hmp = hmp; 238 chain->bref = *bref; 239 chain->bytes = bytes; 240 chain->refs = 1; 241 chain->flags = HAMMER2_CHAIN_ALLOCATED; 242 lockinit(&chain->diolk, "chdio", 0, 0); 243 244 /* 245 * Set the PFS boundary flag if this chain represents a PFS root. 246 */ 247 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 248 atomic_set_int(&chain->flags, HAMMER2_CHAIN_PFSBOUNDARY); 249 hammer2_chain_core_init(chain); 250 251 return (chain); 252 } 253 254 /* 255 * Initialize a chain's core structure. This structure used to be allocated 256 * but is now embedded. 257 * 258 * The core is not locked. No additional refs on the chain are made. 259 * (trans) must not be NULL if (core) is not NULL. 260 */ 261 void 262 hammer2_chain_core_init(hammer2_chain_t *chain) 263 { 264 /* 265 * Fresh core under nchain (no multi-homing of ochain's 266 * sub-tree). 267 */ 268 RB_INIT(&chain->core.rbtree); /* live chains */ 269 hammer2_mtx_init(&chain->lock, "h2chain"); 270 } 271 272 /* 273 * Add a reference to a chain element, preventing its destruction. 274 * 275 * (can be called with spinlock held) 276 */ 277 void 278 hammer2_chain_ref(hammer2_chain_t *chain) 279 { 280 if (atomic_fetchadd_int(&chain->refs, 1) == 0) { 281 /* 282 * Just flag that the chain was used and should be recycled 283 * on the LRU if it encounters it later. 284 */ 285 if (chain->flags & HAMMER2_CHAIN_ONLRU) 286 atomic_set_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 287 288 #if 0 289 /* 290 * REMOVED - reduces contention, lru_list is more heuristical 291 * now. 292 * 293 * 0->non-zero transition must ensure that chain is removed 294 * from the LRU list. 295 * 296 * NOTE: Already holding lru_spin here so we cannot call 297 * hammer2_chain_ref() to get it off lru_list, do 298 * it manually. 299 */ 300 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 301 hammer2_pfs_t *pmp = chain->pmp; 302 hammer2_spin_ex(&pmp->lru_spin); 303 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 304 atomic_add_int(&pmp->lru_count, -1); 305 atomic_clear_int(&chain->flags, 306 HAMMER2_CHAIN_ONLRU); 307 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 308 } 309 hammer2_spin_unex(&pmp->lru_spin); 310 } 311 #endif 312 } 313 } 314 315 /* 316 * Ref a locked chain and force the data to be held across an unlock. 317 * Chain must be currently locked. The user of the chain who desires 318 * to release the hold must call hammer2_chain_lock_unhold() to relock 319 * and unhold the chain, then unlock normally, or may simply call 320 * hammer2_chain_drop_unhold() (which is safer against deadlocks). 321 */ 322 void 323 hammer2_chain_ref_hold(hammer2_chain_t *chain) 324 { 325 atomic_add_int(&chain->lockcnt, 1); 326 hammer2_chain_ref(chain); 327 } 328 329 /* 330 * Insert the chain in the core rbtree. 331 * 332 * Normal insertions are placed in the live rbtree. Insertion of a deleted 333 * chain is a special case used by the flush code that is placed on the 334 * unstaged deleted list to avoid confusing the live view. 335 */ 336 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 337 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 338 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 339 340 static 341 int 342 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 343 int flags, int generation) 344 { 345 hammer2_chain_t *xchain; 346 int error = 0; 347 348 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 349 hammer2_spin_ex(&parent->core.spin); 350 351 /* 352 * Interlocked by spinlock, check for race 353 */ 354 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 355 parent->core.generation != generation) { 356 error = HAMMER2_ERROR_EAGAIN; 357 goto failed; 358 } 359 360 /* 361 * Insert chain 362 */ 363 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 364 KASSERT(xchain == NULL, 365 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 366 chain, xchain, chain->bref.key)); 367 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 368 chain->parent = parent; 369 ++parent->core.chain_count; 370 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 371 372 /* 373 * We have to keep track of the effective live-view blockref count 374 * so the create code knows when to push an indirect block. 375 */ 376 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 377 atomic_add_int(&parent->core.live_count, 1); 378 failed: 379 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 380 hammer2_spin_unex(&parent->core.spin); 381 return error; 382 } 383 384 /* 385 * Drop the caller's reference to the chain. When the ref count drops to 386 * zero this function will try to disassociate the chain from its parent and 387 * deallocate it, then recursely drop the parent using the implied ref 388 * from the chain's chain->parent. 389 * 390 * Nobody should own chain's mutex on the 1->0 transition, unless this drop 391 * races an acquisition by another cpu. Therefore we can loop if we are 392 * unable to acquire the mutex, and refs is unlikely to be 1 unless we again 393 * race against another drop. 394 */ 395 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 396 int depth); 397 static void hammer2_chain_lru_flush(hammer2_pfs_t *pmp); 398 399 void 400 hammer2_chain_drop(hammer2_chain_t *chain) 401 { 402 u_int refs; 403 404 if (hammer2_debug & 0x200000) 405 Debugger("drop"); 406 407 KKASSERT(chain->refs > 0); 408 409 while (chain) { 410 refs = chain->refs; 411 cpu_ccfence(); 412 KKASSERT(refs > 0); 413 414 if (refs == 1) { 415 if (hammer2_mtx_ex_try(&chain->lock) == 0) 416 chain = hammer2_chain_lastdrop(chain, 0); 417 /* retry the same chain, or chain from lastdrop */ 418 } else { 419 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 420 break; 421 /* retry the same chain */ 422 } 423 cpu_pause(); 424 } 425 } 426 427 /* 428 * Unhold a held and probably not-locked chain, ensure that the data is 429 * dropped on the 1->0 transition of lockcnt by obtaining an exclusive 430 * lock and then simply unlocking the chain. 431 */ 432 void 433 hammer2_chain_unhold(hammer2_chain_t *chain) 434 { 435 u_int lockcnt; 436 int iter = 0; 437 438 for (;;) { 439 lockcnt = chain->lockcnt; 440 cpu_ccfence(); 441 if (lockcnt > 1) { 442 if (atomic_cmpset_int(&chain->lockcnt, 443 lockcnt, lockcnt - 1)) { 444 break; 445 } 446 } else if (hammer2_mtx_ex_try(&chain->lock) == 0) { 447 hammer2_chain_unlock(chain); 448 break; 449 } else { 450 /* 451 * This situation can easily occur on SMP due to 452 * the gap inbetween the 1->0 transition and the 453 * final unlock. We cannot safely block on the 454 * mutex because lockcnt might go above 1. 455 * 456 * XXX Sleep for one tick if it takes too long. 457 */ 458 if (++iter > 1000) { 459 if (iter > 1000 + hz) { 460 kprintf("hammer2: h2race1 %p\n", chain); 461 iter = 1000; 462 } 463 tsleep(&iter, 0, "h2race1", 1); 464 } 465 cpu_pause(); 466 } 467 } 468 } 469 470 void 471 hammer2_chain_drop_unhold(hammer2_chain_t *chain) 472 { 473 hammer2_chain_unhold(chain); 474 hammer2_chain_drop(chain); 475 } 476 477 void 478 hammer2_chain_rehold(hammer2_chain_t *chain) 479 { 480 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED); 481 atomic_add_int(&chain->lockcnt, 1); 482 hammer2_chain_unlock(chain); 483 } 484 485 /* 486 * Handles the (potential) last drop of chain->refs from 1->0. Called with 487 * the mutex exclusively locked, refs == 1, and lockcnt 0. SMP races are 488 * possible against refs and lockcnt. We must dispose of the mutex on chain. 489 * 490 * This function returns an unlocked chain for recursive drop or NULL. It 491 * can return the same chain if it determines it has raced another ref. 492 * 493 * -- 494 * 495 * When two chains need to be recursively dropped we use the chain we 496 * would otherwise free to placehold the additional chain. It's a bit 497 * convoluted but we can't just recurse without potentially blowing out 498 * the kernel stack. 499 * 500 * The chain cannot be freed if it has any children. 501 * The chain cannot be freed if flagged MODIFIED unless we can dispose of it. 502 * The chain cannot be freed if flagged UPDATE unless we can dispose of it. 503 * Any dedup registration can remain intact. 504 * 505 * The core spinlock is allowed to nest child-to-parent (not parent-to-child). 506 */ 507 static 508 hammer2_chain_t * 509 hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth) 510 { 511 hammer2_pfs_t *pmp; 512 hammer2_dev_t *hmp; 513 hammer2_chain_t *parent; 514 hammer2_chain_t *rdrop; 515 516 /* 517 * We need chain's spinlock to interlock the sub-tree test. 518 * We already have chain's mutex, protecting chain->parent. 519 * 520 * Remember that chain->refs can be in flux. 521 */ 522 hammer2_spin_ex(&chain->core.spin); 523 524 if (chain->parent != NULL) { 525 /* 526 * If the chain has a parent the UPDATE bit prevents scrapping 527 * as the chain is needed to properly flush the parent. Try 528 * to complete the 1->0 transition and return NULL. Retry 529 * (return chain) if we are unable to complete the 1->0 530 * transition, else return NULL (nothing more to do). 531 * 532 * If the chain has a parent the MODIFIED bit prevents 533 * scrapping. 534 * 535 * Chains with UPDATE/MODIFIED are *not* put on the LRU list! 536 */ 537 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 538 HAMMER2_CHAIN_MODIFIED)) { 539 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 540 hammer2_spin_unex(&chain->core.spin); 541 hammer2_chain_assert_no_data(chain); 542 hammer2_mtx_unlock(&chain->lock); 543 chain = NULL; 544 } else { 545 hammer2_spin_unex(&chain->core.spin); 546 hammer2_mtx_unlock(&chain->lock); 547 } 548 return (chain); 549 } 550 /* spinlock still held */ 551 } else if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME || 552 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP) { 553 /* 554 * Retain the static vchain and fchain. Clear bits that 555 * are not relevant. Do not clear the MODIFIED bit, 556 * and certainly do not put it on the delayed-flush queue. 557 */ 558 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 559 } else { 560 /* 561 * The chain has no parent and can be flagged for destruction. 562 * Since it has no parent, UPDATE can also be cleared. 563 */ 564 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 565 if (chain->flags & HAMMER2_CHAIN_UPDATE) 566 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 567 568 /* 569 * If the chain has children we must propagate the DESTROY 570 * flag downward and rip the disconnected topology apart. 571 * This is accomplished by calling hammer2_flush() on the 572 * chain. 573 * 574 * Any dedup is already handled by the underlying DIO, so 575 * we do not have to specifically flush it here. 576 */ 577 if (chain->core.chain_count) { 578 hammer2_spin_unex(&chain->core.spin); 579 hammer2_flush(chain, HAMMER2_FLUSH_TOP | 580 HAMMER2_FLUSH_ALL); 581 hammer2_mtx_unlock(&chain->lock); 582 583 return(chain); /* retry drop */ 584 } 585 586 /* 587 * Otherwise we can scrap the MODIFIED bit if it is set, 588 * and continue along the freeing path. 589 * 590 * Be sure to clean-out any dedup bits. Without a parent 591 * this chain will no longer be visible to the flush code. 592 * Easy check data_off to avoid the volume root. 593 */ 594 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 595 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 596 atomic_add_long(&hammer2_count_modified_chains, -1); 597 if (chain->pmp) 598 hammer2_pfs_memory_wakeup(chain->pmp); 599 } 600 /* spinlock still held */ 601 } 602 603 /* spinlock still held */ 604 605 /* 606 * If any children exist we must leave the chain intact with refs == 0. 607 * They exist because chains are retained below us which have refs or 608 * may require flushing. 609 * 610 * Retry (return chain) if we fail to transition the refs to 0, else 611 * return NULL indication nothing more to do. 612 * 613 * Chains with children are NOT put on the LRU list. 614 */ 615 if (chain->core.chain_count) { 616 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 617 hammer2_spin_unex(&chain->core.spin); 618 hammer2_chain_assert_no_data(chain); 619 hammer2_mtx_unlock(&chain->lock); 620 chain = NULL; 621 } else { 622 hammer2_spin_unex(&chain->core.spin); 623 hammer2_mtx_unlock(&chain->lock); 624 } 625 return (chain); 626 } 627 /* spinlock still held */ 628 /* no chains left under us */ 629 630 /* 631 * chain->core has no children left so no accessors can get to our 632 * chain from there. Now we have to lock the parent core to interlock 633 * remaining possible accessors that might bump chain's refs before 634 * we can safely drop chain's refs with intent to free the chain. 635 */ 636 hmp = chain->hmp; 637 pmp = chain->pmp; /* can be NULL */ 638 rdrop = NULL; 639 640 parent = chain->parent; 641 642 /* 643 * WARNING! chain's spin lock is still held here, and other spinlocks 644 * will be acquired and released in the code below. We 645 * cannot be making fancy procedure calls! 646 */ 647 648 /* 649 * We can cache the chain if it is associated with a pmp 650 * and not flagged as being destroyed or requesting a full 651 * release. In this situation the chain is not removed 652 * from its parent, i.e. it can still be looked up. 653 * 654 * We intentionally do not cache DATA chains because these 655 * were likely used to load data into the logical buffer cache 656 * and will not be accessed again for some time. 657 */ 658 if ((chain->flags & 659 (HAMMER2_CHAIN_DESTROY | HAMMER2_CHAIN_RELEASE)) == 0 && 660 chain->pmp && 661 chain->bref.type != HAMMER2_BREF_TYPE_DATA) { 662 if (parent) 663 hammer2_spin_ex(&parent->core.spin); 664 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 665 /* 666 * 1->0 transition failed, retry. Do not drop 667 * the chain's data yet! 668 */ 669 if (parent) 670 hammer2_spin_unex(&parent->core.spin); 671 hammer2_spin_unex(&chain->core.spin); 672 hammer2_mtx_unlock(&chain->lock); 673 674 return(chain); 675 } 676 677 /* 678 * Success 679 */ 680 hammer2_chain_assert_no_data(chain); 681 682 /* 683 * Make sure we are on the LRU list, clean up excessive 684 * LRU entries. We can only really drop one but there might 685 * be other entries that we can remove from the lru_list 686 * without dropping. 687 * 688 * NOTE: HAMMER2_CHAIN_ONLRU may only be safely set when 689 * chain->core.spin AND pmp->lru_spin are held, but 690 * can be safely cleared only holding pmp->lru_spin. 691 */ 692 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 693 hammer2_spin_ex(&pmp->lru_spin); 694 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 695 atomic_set_int(&chain->flags, 696 HAMMER2_CHAIN_ONLRU); 697 TAILQ_INSERT_TAIL(&pmp->lru_list, 698 chain, lru_node); 699 atomic_add_int(&pmp->lru_count, 1); 700 } 701 if (pmp->lru_count < HAMMER2_LRU_LIMIT) 702 depth = 1; /* disable lru_list flush */ 703 hammer2_spin_unex(&pmp->lru_spin); 704 } else { 705 /* disable lru flush */ 706 depth = 1; 707 } 708 709 if (parent) { 710 hammer2_spin_unex(&parent->core.spin); 711 parent = NULL; /* safety */ 712 } 713 hammer2_spin_unex(&chain->core.spin); 714 hammer2_mtx_unlock(&chain->lock); 715 716 /* 717 * lru_list hysteresis (see above for depth overrides). 718 * Note that depth also prevents excessive lastdrop recursion. 719 */ 720 if (depth == 0) 721 hammer2_chain_lru_flush(pmp); 722 723 return NULL; 724 /* NOT REACHED */ 725 } 726 727 /* 728 * Make sure we are not on the LRU list. 729 */ 730 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 731 hammer2_spin_ex(&pmp->lru_spin); 732 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 733 atomic_add_int(&pmp->lru_count, -1); 734 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 735 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 736 } 737 hammer2_spin_unex(&pmp->lru_spin); 738 } 739 740 /* 741 * Spinlock the parent and try to drop the last ref on chain. 742 * On success determine if we should dispose of the chain 743 * (remove the chain from its parent, etc). 744 * 745 * (normal core locks are top-down recursive but we define 746 * core spinlocks as bottom-up recursive, so this is safe). 747 */ 748 if (parent) { 749 hammer2_spin_ex(&parent->core.spin); 750 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 751 /* 752 * 1->0 transition failed, retry. 753 */ 754 hammer2_spin_unex(&parent->core.spin); 755 hammer2_spin_unex(&chain->core.spin); 756 hammer2_mtx_unlock(&chain->lock); 757 758 return(chain); 759 } 760 761 /* 762 * 1->0 transition successful, parent spin held to prevent 763 * new lookups, chain spinlock held to protect parent field. 764 * Remove chain from the parent. 765 * 766 * If the chain is being removed from the parent's btree but 767 * is not bmapped, we have to adjust live_count downward. If 768 * it is bmapped then the blockref is retained in the parent 769 * as is its associated live_count. This case can occur when 770 * a chain added to the topology is unable to flush and is 771 * then later deleted. 772 */ 773 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 774 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) && 775 (chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) { 776 atomic_add_int(&parent->core.live_count, -1); 777 } 778 RB_REMOVE(hammer2_chain_tree, 779 &parent->core.rbtree, chain); 780 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 781 --parent->core.chain_count; 782 chain->parent = NULL; 783 } 784 785 /* 786 * If our chain was the last chain in the parent's core the 787 * core is now empty and its parent might have to be 788 * re-dropped if it has 0 refs. 789 */ 790 if (parent->core.chain_count == 0) { 791 rdrop = parent; 792 atomic_add_int(&rdrop->refs, 1); 793 /* 794 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 795 rdrop = NULL; 796 */ 797 } 798 hammer2_spin_unex(&parent->core.spin); 799 parent = NULL; /* safety */ 800 /* FALL THROUGH */ 801 } else { 802 /* 803 * No-parent case. 804 */ 805 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 806 /* 807 * 1->0 transition failed, retry. 808 */ 809 hammer2_spin_unex(&parent->core.spin); 810 hammer2_spin_unex(&chain->core.spin); 811 hammer2_mtx_unlock(&chain->lock); 812 813 return(chain); 814 } 815 } 816 817 /* 818 * Successful 1->0 transition, no parent, no children... no way for 819 * anyone to ref this chain any more. We can clean-up and free it. 820 * 821 * We still have the core spinlock, and core's chain_count is 0. 822 * Any parent spinlock is gone. 823 */ 824 hammer2_spin_unex(&chain->core.spin); 825 hammer2_chain_assert_no_data(chain); 826 hammer2_mtx_unlock(&chain->lock); 827 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 828 chain->core.chain_count == 0); 829 830 /* 831 * All locks are gone, no pointers remain to the chain, finish 832 * freeing it. 833 */ 834 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 835 HAMMER2_CHAIN_MODIFIED)) == 0); 836 837 /* 838 * Once chain resources are gone we can use the now dead chain 839 * structure to placehold what might otherwise require a recursive 840 * drop, because we have potentially two things to drop and can only 841 * return one directly. 842 */ 843 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 844 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED); 845 chain->hmp = NULL; 846 kfree(chain, hmp->mchain); 847 } 848 849 /* 850 * Possible chaining loop when parent re-drop needed. 851 */ 852 return(rdrop); 853 } 854 855 /* 856 * Heuristical flush of the LRU, try to reduce the number of entries 857 * on the LRU to (HAMMER2_LRU_LIMIT * 2 / 3). This procedure is called 858 * only when lru_count exceeds HAMMER2_LRU_LIMIT. 859 */ 860 static 861 void 862 hammer2_chain_lru_flush(hammer2_pfs_t *pmp) 863 { 864 hammer2_chain_t *chain; 865 866 again: 867 chain = NULL; 868 hammer2_spin_ex(&pmp->lru_spin); 869 while (pmp->lru_count > HAMMER2_LRU_LIMIT * 2 / 3) { 870 /* 871 * Pick a chain off the lru_list, just recycle it quickly 872 * if LRUHINT is set (the chain was ref'd but left on 873 * the lru_list, so cycle to the end). 874 */ 875 chain = TAILQ_FIRST(&pmp->lru_list); 876 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 877 878 if (chain->flags & HAMMER2_CHAIN_LRUHINT) { 879 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 880 TAILQ_INSERT_TAIL(&pmp->lru_list, chain, lru_node); 881 chain = NULL; 882 continue; 883 } 884 885 /* 886 * Ok, we are off the LRU. We must adjust refs before we 887 * can safely clear the ONLRU flag. 888 */ 889 atomic_add_int(&pmp->lru_count, -1); 890 if (atomic_cmpset_int(&chain->refs, 0, 1)) { 891 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 892 atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE); 893 break; 894 } 895 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 896 chain = NULL; 897 } 898 hammer2_spin_unex(&pmp->lru_spin); 899 if (chain == NULL) 900 return; 901 902 /* 903 * If we picked a chain off the lru list we may be able to lastdrop 904 * it. Use a depth of 1 to prevent excessive lastdrop recursion. 905 */ 906 while (chain) { 907 u_int refs; 908 909 refs = chain->refs; 910 cpu_ccfence(); 911 KKASSERT(refs > 0); 912 913 if (refs == 1) { 914 if (hammer2_mtx_ex_try(&chain->lock) == 0) 915 chain = hammer2_chain_lastdrop(chain, 1); 916 /* retry the same chain, or chain from lastdrop */ 917 } else { 918 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 919 break; 920 /* retry the same chain */ 921 } 922 cpu_pause(); 923 } 924 goto again; 925 } 926 927 /* 928 * On last lock release. 929 */ 930 static hammer2_io_t * 931 hammer2_chain_drop_data(hammer2_chain_t *chain) 932 { 933 hammer2_io_t *dio; 934 935 if ((dio = chain->dio) != NULL) { 936 chain->dio = NULL; 937 chain->data = NULL; 938 } else { 939 switch(chain->bref.type) { 940 case HAMMER2_BREF_TYPE_VOLUME: 941 case HAMMER2_BREF_TYPE_FREEMAP: 942 break; 943 default: 944 if (chain->data != NULL) { 945 hammer2_spin_unex(&chain->core.spin); 946 panic("chain data not null: " 947 "chain %p bref %016jx.%02x " 948 "refs %d parent %p dio %p data %p", 949 chain, chain->bref.data_off, 950 chain->bref.type, chain->refs, 951 chain->parent, 952 chain->dio, chain->data); 953 } 954 KKASSERT(chain->data == NULL); 955 break; 956 } 957 } 958 return dio; 959 } 960 961 /* 962 * Lock a referenced chain element, acquiring its data with I/O if necessary, 963 * and specify how you would like the data to be resolved. 964 * 965 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 966 * 967 * The lock is allowed to recurse, multiple locking ops will aggregate 968 * the requested resolve types. Once data is assigned it will not be 969 * removed until the last unlock. 970 * 971 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 972 * (typically used to avoid device/logical buffer 973 * aliasing for data) 974 * 975 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 976 * the INITIAL-create state (indirect blocks only). 977 * 978 * Do not resolve data elements for DATA chains. 979 * (typically used to avoid device/logical buffer 980 * aliasing for data) 981 * 982 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 983 * 984 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 985 * it will be locked exclusive. 986 * 987 * HAMMER2_RESOLVE_NONBLOCK- (flag) The chain is locked non-blocking. If 988 * the lock fails, EAGAIN is returned. 989 * 990 * NOTE: Embedded elements (volume header, inodes) are always resolved 991 * regardless. 992 * 993 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 994 * element will instantiate and zero its buffer, and flush it on 995 * release. 996 * 997 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 998 * so as not to instantiate a device buffer, which could alias against 999 * a logical file buffer. However, if ALWAYS is specified the 1000 * device buffer will be instantiated anyway. 1001 * 1002 * NOTE: The return value is always 0 unless NONBLOCK is specified, in which 1003 * case it can be either 0 or EAGAIN. 1004 * 1005 * WARNING! This function blocks on I/O if data needs to be fetched. This 1006 * blocking can run concurrent with other compatible lock holders 1007 * who do not need data returning. The lock is not upgraded to 1008 * exclusive during a data fetch, a separate bit is used to 1009 * interlock I/O. However, an exclusive lock holder can still count 1010 * on being interlocked against an I/O fetch managed by a shared 1011 * lock holder. 1012 */ 1013 int 1014 hammer2_chain_lock(hammer2_chain_t *chain, int how) 1015 { 1016 KKASSERT(chain->refs > 0); 1017 1018 if (how & HAMMER2_RESOLVE_NONBLOCK) { 1019 /* 1020 * We still have to bump lockcnt before acquiring the lock, 1021 * even for non-blocking operation, because the unlock code 1022 * live-loops on lockcnt == 1 when dropping the last lock. 1023 * 1024 * If the non-blocking operation fails we have to use an 1025 * unhold sequence to undo the mess. 1026 * 1027 * NOTE: LOCKAGAIN must always succeed without blocking, 1028 * even if NONBLOCK is specified. 1029 */ 1030 atomic_add_int(&chain->lockcnt, 1); 1031 if (how & HAMMER2_RESOLVE_SHARED) { 1032 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1033 hammer2_mtx_sh_again(&chain->lock); 1034 } else { 1035 if (hammer2_mtx_sh_try(&chain->lock) != 0) { 1036 hammer2_chain_unhold(chain); 1037 return EAGAIN; 1038 } 1039 } 1040 } else { 1041 if (hammer2_mtx_ex_try(&chain->lock) != 0) { 1042 hammer2_chain_unhold(chain); 1043 return EAGAIN; 1044 } 1045 } 1046 ++curthread->td_tracker; 1047 } else { 1048 /* 1049 * Get the appropriate lock. If LOCKAGAIN is flagged with 1050 * SHARED the caller expects a shared lock to already be 1051 * present and we are giving it another ref. This case must 1052 * importantly not block if there is a pending exclusive lock 1053 * request. 1054 */ 1055 atomic_add_int(&chain->lockcnt, 1); 1056 if (how & HAMMER2_RESOLVE_SHARED) { 1057 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1058 hammer2_mtx_sh_again(&chain->lock); 1059 } else { 1060 hammer2_mtx_sh(&chain->lock); 1061 } 1062 } else { 1063 hammer2_mtx_ex(&chain->lock); 1064 } 1065 ++curthread->td_tracker; 1066 } 1067 1068 /* 1069 * If we already have a valid data pointer make sure the data is 1070 * synchronized to the current cpu, and then no further action is 1071 * necessary. 1072 */ 1073 if (chain->data) { 1074 if (chain->dio) 1075 hammer2_io_bkvasync(chain->dio); 1076 return 0; 1077 } 1078 1079 /* 1080 * Do we have to resolve the data? This is generally only 1081 * applicable to HAMMER2_BREF_TYPE_DATA which is special-cased. 1082 * Other BREF types expects the data to be there. 1083 */ 1084 switch(how & HAMMER2_RESOLVE_MASK) { 1085 case HAMMER2_RESOLVE_NEVER: 1086 return 0; 1087 case HAMMER2_RESOLVE_MAYBE: 1088 if (chain->flags & HAMMER2_CHAIN_INITIAL) 1089 return 0; 1090 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 1091 return 0; 1092 #if 0 1093 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 1094 return 0; 1095 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 1096 return 0; 1097 #endif 1098 /* fall through */ 1099 case HAMMER2_RESOLVE_ALWAYS: 1100 default: 1101 break; 1102 } 1103 1104 /* 1105 * Caller requires data 1106 */ 1107 hammer2_chain_load_data(chain); 1108 1109 return 0; 1110 } 1111 1112 /* 1113 * Lock the chain, retain the hold, and drop the data persistence count. 1114 * The data should remain valid because we never transitioned lockcnt 1115 * through 0. 1116 */ 1117 void 1118 hammer2_chain_lock_unhold(hammer2_chain_t *chain, int how) 1119 { 1120 hammer2_chain_lock(chain, how); 1121 atomic_add_int(&chain->lockcnt, -1); 1122 } 1123 1124 #if 0 1125 /* 1126 * Downgrade an exclusive chain lock to a shared chain lock. 1127 * 1128 * NOTE: There is no upgrade equivalent due to the ease of 1129 * deadlocks in that direction. 1130 */ 1131 void 1132 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 1133 { 1134 hammer2_mtx_downgrade(&chain->lock); 1135 } 1136 #endif 1137 1138 /* 1139 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 1140 * may be of any type. 1141 * 1142 * Once chain->data is set it cannot be disposed of until all locks are 1143 * released. 1144 * 1145 * Make sure the data is synchronized to the current cpu. 1146 */ 1147 void 1148 hammer2_chain_load_data(hammer2_chain_t *chain) 1149 { 1150 hammer2_blockref_t *bref; 1151 hammer2_dev_t *hmp; 1152 hammer2_io_t *dio; 1153 char *bdata; 1154 int error; 1155 1156 /* 1157 * Degenerate case, data already present, or chain has no media 1158 * reference to load. 1159 */ 1160 KKASSERT(chain->lock.mtx_lock & MTX_MASK); 1161 if (chain->data) { 1162 if (chain->dio) 1163 hammer2_io_bkvasync(chain->dio); 1164 return; 1165 } 1166 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 1167 return; 1168 1169 hmp = chain->hmp; 1170 KKASSERT(hmp != NULL); 1171 1172 /* 1173 * Gain the IOINPROG bit, interlocked block. 1174 */ 1175 for (;;) { 1176 u_int oflags; 1177 u_int nflags; 1178 1179 oflags = chain->flags; 1180 cpu_ccfence(); 1181 if (oflags & HAMMER2_CHAIN_IOINPROG) { 1182 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 1183 tsleep_interlock(&chain->flags, 0); 1184 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1185 tsleep(&chain->flags, PINTERLOCKED, 1186 "h2iocw", 0); 1187 } 1188 /* retry */ 1189 } else { 1190 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 1191 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1192 break; 1193 } 1194 /* retry */ 1195 } 1196 } 1197 1198 /* 1199 * We own CHAIN_IOINPROG 1200 * 1201 * Degenerate case if we raced another load. 1202 */ 1203 if (chain->data) { 1204 if (chain->dio) 1205 hammer2_io_bkvasync(chain->dio); 1206 goto done; 1207 } 1208 1209 /* 1210 * We must resolve to a device buffer, either by issuing I/O or 1211 * by creating a zero-fill element. We do not mark the buffer 1212 * dirty when creating a zero-fill element (the hammer2_chain_modify() 1213 * API must still be used to do that). 1214 * 1215 * The device buffer is variable-sized in powers of 2 down 1216 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 1217 * chunk always contains buffers of the same size. (XXX) 1218 * 1219 * The minimum physical IO size may be larger than the variable 1220 * block size. 1221 */ 1222 bref = &chain->bref; 1223 1224 /* 1225 * The getblk() optimization can only be used on newly created 1226 * elements if the physical block size matches the request. 1227 */ 1228 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1229 error = hammer2_io_new(hmp, bref->type, 1230 bref->data_off, chain->bytes, 1231 &chain->dio); 1232 } else { 1233 error = hammer2_io_bread(hmp, bref->type, 1234 bref->data_off, chain->bytes, 1235 &chain->dio); 1236 hammer2_adjreadcounter(&chain->bref, chain->bytes); 1237 } 1238 if (error) { 1239 chain->error = HAMMER2_ERROR_EIO; 1240 kprintf("hammer2_chain_lock: I/O error %016jx: %d\n", 1241 (intmax_t)bref->data_off, error); 1242 hammer2_io_bqrelse(&chain->dio); 1243 goto done; 1244 } 1245 chain->error = 0; 1246 1247 /* 1248 * This isn't perfect and can be ignored on OSs which do not have 1249 * an indication as to whether a buffer is coming from cache or 1250 * if I/O was actually issued for the read. TESTEDGOOD will work 1251 * pretty well without the B_IOISSUED logic because chains are 1252 * cached, but in that situation (without B_IOISSUED) it will not 1253 * detect whether a re-read via I/O is corrupted verses the original 1254 * read. 1255 * 1256 * We can't re-run the CRC on every fresh lock. That would be 1257 * insanely expensive. 1258 * 1259 * If the underlying kernel buffer covers the entire chain we can 1260 * use the B_IOISSUED indication to determine if we have to re-run 1261 * the CRC on chain data for chains that managed to stay cached 1262 * across the kernel disposal of the original buffer. 1263 */ 1264 if ((dio = chain->dio) != NULL && dio->bp) { 1265 struct buf *bp = dio->bp; 1266 1267 if (dio->psize == chain->bytes && 1268 (bp->b_flags & B_IOISSUED)) { 1269 atomic_clear_int(&chain->flags, 1270 HAMMER2_CHAIN_TESTEDGOOD); 1271 bp->b_flags &= ~B_IOISSUED; 1272 } 1273 } 1274 1275 /* 1276 * NOTE: A locked chain's data cannot be modified without first 1277 * calling hammer2_chain_modify(). 1278 */ 1279 1280 /* 1281 * Clear INITIAL. In this case we used io_new() and the buffer has 1282 * been zero'd and marked dirty. 1283 * 1284 * NOTE: hammer2_io_data() call issues bkvasync() 1285 */ 1286 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 1287 1288 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1289 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1290 chain->bref.flags |= HAMMER2_BREF_FLAG_ZERO; 1291 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 1292 /* 1293 * check data not currently synchronized due to 1294 * modification. XXX assumes data stays in the buffer 1295 * cache, which might not be true (need biodep on flush 1296 * to calculate crc? or simple crc?). 1297 */ 1298 } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) { 1299 if (hammer2_chain_testcheck(chain, bdata) == 0) { 1300 chain->error = HAMMER2_ERROR_CHECK; 1301 } else { 1302 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 1303 } 1304 } 1305 1306 /* 1307 * Setup the data pointer, either pointing it to an embedded data 1308 * structure and copying the data from the buffer, or pointing it 1309 * into the buffer. 1310 * 1311 * The buffer is not retained when copying to an embedded data 1312 * structure in order to avoid potential deadlocks or recursions 1313 * on the same physical buffer. 1314 * 1315 * WARNING! Other threads can start using the data the instant we 1316 * set chain->data non-NULL. 1317 */ 1318 switch (bref->type) { 1319 case HAMMER2_BREF_TYPE_VOLUME: 1320 case HAMMER2_BREF_TYPE_FREEMAP: 1321 /* 1322 * Copy data from bp to embedded buffer 1323 */ 1324 panic("hammer2_chain_load_data: unresolved volume header"); 1325 break; 1326 case HAMMER2_BREF_TYPE_DIRENT: 1327 KKASSERT(chain->bytes != 0); 1328 /* fall through */ 1329 case HAMMER2_BREF_TYPE_INODE: 1330 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1331 case HAMMER2_BREF_TYPE_INDIRECT: 1332 case HAMMER2_BREF_TYPE_DATA: 1333 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1334 default: 1335 /* 1336 * Point data at the device buffer and leave dio intact. 1337 */ 1338 chain->data = (void *)bdata; 1339 break; 1340 } 1341 1342 /* 1343 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 1344 */ 1345 done: 1346 for (;;) { 1347 u_int oflags; 1348 u_int nflags; 1349 1350 oflags = chain->flags; 1351 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 1352 HAMMER2_CHAIN_IOSIGNAL); 1353 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 1354 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1355 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 1356 wakeup(&chain->flags); 1357 break; 1358 } 1359 } 1360 } 1361 1362 /* 1363 * Unlock and deref a chain element. 1364 * 1365 * Remember that the presence of children under chain prevent the chain's 1366 * destruction but do not add additional references, so the dio will still 1367 * be dropped. 1368 */ 1369 void 1370 hammer2_chain_unlock(hammer2_chain_t *chain) 1371 { 1372 hammer2_io_t *dio; 1373 u_int lockcnt; 1374 int iter = 0; 1375 1376 --curthread->td_tracker; 1377 1378 /* 1379 * If multiple locks are present (or being attempted) on this 1380 * particular chain we can just unlock, drop refs, and return. 1381 * 1382 * Otherwise fall-through on the 1->0 transition. 1383 */ 1384 for (;;) { 1385 lockcnt = chain->lockcnt; 1386 KKASSERT(lockcnt > 0); 1387 cpu_ccfence(); 1388 if (lockcnt > 1) { 1389 if (atomic_cmpset_int(&chain->lockcnt, 1390 lockcnt, lockcnt - 1)) { 1391 hammer2_mtx_unlock(&chain->lock); 1392 return; 1393 } 1394 } else if (hammer2_mtx_upgrade_try(&chain->lock) == 0) { 1395 /* while holding the mutex exclusively */ 1396 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1397 break; 1398 } else { 1399 /* 1400 * This situation can easily occur on SMP due to 1401 * the gap inbetween the 1->0 transition and the 1402 * final unlock. We cannot safely block on the 1403 * mutex because lockcnt might go above 1. 1404 * 1405 * XXX Sleep for one tick if it takes too long. 1406 */ 1407 if (++iter > 1000) { 1408 if (iter > 1000 + hz) { 1409 kprintf("hammer2: h2race2 %p\n", chain); 1410 iter = 1000; 1411 } 1412 tsleep(&iter, 0, "h2race2", 1); 1413 } 1414 cpu_pause(); 1415 } 1416 /* retry */ 1417 } 1418 1419 /* 1420 * Last unlock / mutex upgraded to exclusive. Drop the data 1421 * reference. 1422 */ 1423 dio = hammer2_chain_drop_data(chain); 1424 if (dio) 1425 hammer2_io_bqrelse(&dio); 1426 hammer2_mtx_unlock(&chain->lock); 1427 } 1428 1429 /* 1430 * Unlock and hold chain data intact 1431 */ 1432 void 1433 hammer2_chain_unlock_hold(hammer2_chain_t *chain) 1434 { 1435 atomic_add_int(&chain->lockcnt, 1); 1436 hammer2_chain_unlock(chain); 1437 } 1438 1439 /* 1440 * Helper to obtain the blockref[] array base and count for a chain. 1441 * 1442 * XXX Not widely used yet, various use cases need to be validated and 1443 * converted to use this function. 1444 */ 1445 static 1446 hammer2_blockref_t * 1447 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1448 { 1449 hammer2_blockref_t *base; 1450 int count; 1451 1452 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1453 base = NULL; 1454 1455 switch(parent->bref.type) { 1456 case HAMMER2_BREF_TYPE_INODE: 1457 count = HAMMER2_SET_COUNT; 1458 break; 1459 case HAMMER2_BREF_TYPE_INDIRECT: 1460 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1461 count = parent->bytes / sizeof(hammer2_blockref_t); 1462 break; 1463 case HAMMER2_BREF_TYPE_VOLUME: 1464 count = HAMMER2_SET_COUNT; 1465 break; 1466 case HAMMER2_BREF_TYPE_FREEMAP: 1467 count = HAMMER2_SET_COUNT; 1468 break; 1469 default: 1470 panic("hammer2_chain_create_indirect: " 1471 "unrecognized blockref type: %d", 1472 parent->bref.type); 1473 count = 0; 1474 break; 1475 } 1476 } else { 1477 switch(parent->bref.type) { 1478 case HAMMER2_BREF_TYPE_INODE: 1479 base = &parent->data->ipdata.u.blockset.blockref[0]; 1480 count = HAMMER2_SET_COUNT; 1481 break; 1482 case HAMMER2_BREF_TYPE_INDIRECT: 1483 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1484 base = &parent->data->npdata[0]; 1485 count = parent->bytes / sizeof(hammer2_blockref_t); 1486 break; 1487 case HAMMER2_BREF_TYPE_VOLUME: 1488 base = &parent->data->voldata. 1489 sroot_blockset.blockref[0]; 1490 count = HAMMER2_SET_COUNT; 1491 break; 1492 case HAMMER2_BREF_TYPE_FREEMAP: 1493 base = &parent->data->blkset.blockref[0]; 1494 count = HAMMER2_SET_COUNT; 1495 break; 1496 default: 1497 panic("hammer2_chain_create_indirect: " 1498 "unrecognized blockref type: %d", 1499 parent->bref.type); 1500 count = 0; 1501 break; 1502 } 1503 } 1504 *countp = count; 1505 1506 return base; 1507 } 1508 1509 /* 1510 * This counts the number of live blockrefs in a block array and 1511 * also calculates the point at which all remaining blockrefs are empty. 1512 * This routine can only be called on a live chain. 1513 * 1514 * Caller holds the chain locked, but possibly with a shared lock. We 1515 * must use an exclusive spinlock to prevent corruption. 1516 * 1517 * NOTE: Flag is not set until after the count is complete, allowing 1518 * callers to test the flag without holding the spinlock. 1519 * 1520 * NOTE: If base is NULL the related chain is still in the INITIAL 1521 * state and there are no blockrefs to count. 1522 * 1523 * NOTE: live_count may already have some counts accumulated due to 1524 * creation and deletion and could even be initially negative. 1525 */ 1526 void 1527 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1528 hammer2_blockref_t *base, int count) 1529 { 1530 hammer2_spin_ex(&chain->core.spin); 1531 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1532 if (base) { 1533 while (--count >= 0) { 1534 if (base[count].type) 1535 break; 1536 } 1537 chain->core.live_zero = count + 1; 1538 while (count >= 0) { 1539 if (base[count].type) 1540 atomic_add_int(&chain->core.live_count, 1541 1); 1542 --count; 1543 } 1544 } else { 1545 chain->core.live_zero = 0; 1546 } 1547 /* else do not modify live_count */ 1548 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1549 } 1550 hammer2_spin_unex(&chain->core.spin); 1551 } 1552 1553 /* 1554 * Resize the chain's physical storage allocation in-place. This function does 1555 * not usually adjust the data pointer and must be followed by (typically) a 1556 * hammer2_chain_modify() call to copy any old data over and adjust the 1557 * data pointer. 1558 * 1559 * Chains can be resized smaller without reallocating the storage. Resizing 1560 * larger will reallocate the storage. Excess or prior storage is reclaimed 1561 * asynchronously at a later time. 1562 * 1563 * An nradix value of 0 is special-cased to mean that the storage should 1564 * be disassociated, that is the chain is being resized to 0 bytes (not 1 1565 * byte). 1566 * 1567 * Must be passed an exclusively locked parent and chain. 1568 * 1569 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1570 * to avoid instantiating a device buffer that conflicts with the vnode data 1571 * buffer. However, because H2 can compress or encrypt data, the chain may 1572 * have a dio assigned to it in those situations, and they do not conflict. 1573 * 1574 * XXX return error if cannot resize. 1575 */ 1576 int 1577 hammer2_chain_resize(hammer2_chain_t *chain, 1578 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1579 int nradix, int flags) 1580 { 1581 hammer2_dev_t *hmp; 1582 size_t obytes; 1583 size_t nbytes; 1584 int error; 1585 1586 hmp = chain->hmp; 1587 1588 /* 1589 * Only data and indirect blocks can be resized for now. 1590 * (The volu root, inodes, and freemap elements use a fixed size). 1591 */ 1592 KKASSERT(chain != &hmp->vchain); 1593 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1594 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1595 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1596 1597 /* 1598 * Nothing to do if the element is already the proper size 1599 */ 1600 obytes = chain->bytes; 1601 nbytes = (nradix) ? (1U << nradix) : 0; 1602 if (obytes == nbytes) 1603 return (chain->error); 1604 1605 /* 1606 * Make sure the old data is instantiated so we can copy it. If this 1607 * is a data block, the device data may be superfluous since the data 1608 * might be in a logical block, but compressed or encrypted data is 1609 * another matter. 1610 * 1611 * NOTE: The modify will set BMAPUPD for us if BMAPPED is set. 1612 */ 1613 error = hammer2_chain_modify(chain, mtid, dedup_off, 0); 1614 if (error) 1615 return error; 1616 1617 /* 1618 * Relocate the block, even if making it smaller (because different 1619 * block sizes may be in different regions). 1620 * 1621 * NOTE: Operation does not copy the data and may only be used 1622 * to resize data blocks in-place, or directory entry blocks 1623 * which are about to be modified in some manner. 1624 */ 1625 error = hammer2_freemap_alloc(chain, nbytes); 1626 if (error) 1627 return error; 1628 1629 chain->bytes = nbytes; 1630 1631 /* 1632 * We don't want the followup chain_modify() to try to copy data 1633 * from the old (wrong-sized) buffer. It won't know how much to 1634 * copy. This case should only occur during writes when the 1635 * originator already has the data to write in-hand. 1636 */ 1637 if (chain->dio) { 1638 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1639 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1640 hammer2_io_brelse(&chain->dio); 1641 chain->data = NULL; 1642 } 1643 return (chain->error); 1644 } 1645 1646 /* 1647 * Set the chain modified so its data can be changed by the caller, or 1648 * install deduplicated data. The caller must call this routine for each 1649 * set of modifications it makes, even if the chain is already flagged 1650 * MODIFIED. 1651 * 1652 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1653 * is a CLC (cluster level change) field and is not updated by parent 1654 * propagation during a flush. 1655 * 1656 * Returns an appropriate HAMMER2_ERROR_* code, which will generally reflect 1657 * chain->error except for HAMMER2_ERROR_ENOSPC. If the allocation fails 1658 * due to no space available, HAMMER2_ERROR_ENOSPC is returned and the chain 1659 * remains unmodified with its old data ref intact and chain->error 1660 * unchanged. 1661 * 1662 * Dedup Handling 1663 * 1664 * If the DEDUPABLE flag is set in the chain the storage must be reallocated 1665 * even if the chain is still flagged MODIFIED. In this case the chain's 1666 * DEDUPABLE flag will be cleared once the new storage has been assigned. 1667 * 1668 * If the caller passes a non-zero dedup_off we will use it to assign the 1669 * new storage. The MODIFIED flag will be *CLEARED* in this case, and 1670 * DEDUPABLE will be set (NOTE: the UPDATE flag is always set). The caller 1671 * must not modify the data content upon return. 1672 */ 1673 int 1674 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1675 hammer2_off_t dedup_off, int flags) 1676 { 1677 hammer2_blockref_t obref; 1678 hammer2_dev_t *hmp; 1679 hammer2_io_t *dio; 1680 int error; 1681 int wasinitial; 1682 int setmodified; 1683 int setupdate; 1684 int newmod; 1685 char *bdata; 1686 1687 hmp = chain->hmp; 1688 obref = chain->bref; 1689 KKASSERT((chain->flags & HAMMER2_CHAIN_FICTITIOUS) == 0); 1690 KKASSERT(chain->lock.mtx_lock & MTX_EXCLUSIVE); 1691 1692 /* 1693 * Data is not optional for freemap chains (we must always be sure 1694 * to copy the data on COW storage allocations). 1695 */ 1696 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1697 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1698 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1699 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1700 } 1701 1702 /* 1703 * Data must be resolved if already assigned, unless explicitly 1704 * flagged otherwise. If we cannot safety load the data the 1705 * modification fails and we return early. 1706 */ 1707 if (chain->data == NULL && chain->bytes != 0 && 1708 (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1709 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1710 hammer2_chain_load_data(chain); 1711 if (chain->error) 1712 return (chain->error); 1713 } 1714 error = 0; 1715 1716 /* 1717 * Set MODIFIED to indicate that the chain has been modified. A new 1718 * allocation is required when modifying a chain. 1719 * 1720 * Set UPDATE to ensure that the blockref is updated in the parent. 1721 * 1722 * If MODIFIED is already set determine if we can reuse the assigned 1723 * data block or if we need a new data block. 1724 */ 1725 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1726 /* 1727 * Must set modified bit. 1728 */ 1729 atomic_add_long(&hammer2_count_modified_chains, 1); 1730 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1731 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1732 setmodified = 1; 1733 1734 /* 1735 * We may be able to avoid a copy-on-write if the chain's 1736 * check mode is set to NONE and the chain's current 1737 * modify_tid is beyond the last explicit snapshot tid. 1738 * 1739 * This implements HAMMER2's overwrite-in-place feature. 1740 * 1741 * NOTE! This data-block cannot be used as a de-duplication 1742 * source when the check mode is set to NONE. 1743 */ 1744 if ((chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1745 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) && 1746 (chain->flags & HAMMER2_CHAIN_INITIAL) == 0 && 1747 (chain->flags & HAMMER2_CHAIN_DEDUPABLE) == 0 && 1748 HAMMER2_DEC_CHECK(chain->bref.methods) == 1749 HAMMER2_CHECK_NONE && 1750 chain->pmp && 1751 chain->bref.modify_tid > 1752 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1753 /* 1754 * Sector overwrite allowed. 1755 */ 1756 newmod = 0; 1757 } else { 1758 /* 1759 * Sector overwrite not allowed, must copy-on-write. 1760 */ 1761 newmod = 1; 1762 } 1763 } else if (chain->flags & HAMMER2_CHAIN_DEDUPABLE) { 1764 /* 1765 * If the modified chain was registered for dedup we need 1766 * a new allocation. This only happens for delayed-flush 1767 * chains (i.e. which run through the front-end buffer 1768 * cache). 1769 */ 1770 newmod = 1; 1771 setmodified = 0; 1772 } else { 1773 /* 1774 * Already flagged modified, no new allocation is needed. 1775 */ 1776 newmod = 0; 1777 setmodified = 0; 1778 } 1779 1780 /* 1781 * Flag parent update required. 1782 */ 1783 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 1784 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1785 setupdate = 1; 1786 } else { 1787 setupdate = 0; 1788 } 1789 1790 /* 1791 * The XOP code returns held but unlocked focus chains. This 1792 * prevents the chain from being destroyed but does not prevent 1793 * it from being modified. diolk is used to interlock modifications 1794 * against XOP frontend accesses to the focus. 1795 * 1796 * This allows us to theoretically avoid deadlocking the frontend 1797 * if one of the backends lock up by not formally locking the 1798 * focused chain in the frontend. In addition, the synchronization 1799 * code relies on this mechanism to avoid deadlocking concurrent 1800 * synchronization threads. 1801 */ 1802 lockmgr(&chain->diolk, LK_EXCLUSIVE); 1803 1804 /* 1805 * The modification or re-modification requires an allocation and 1806 * possible COW. If an error occurs, the previous content and data 1807 * reference is retained and the modification fails. 1808 * 1809 * If dedup_off is non-zero, the caller is requesting a deduplication 1810 * rather than a modification. The MODIFIED bit is not set and the 1811 * data offset is set to the deduplication offset. The data cannot 1812 * be modified. 1813 * 1814 * NOTE: The dedup offset is allowed to be in a partially free state 1815 * and we must be sure to reset it to a fully allocated state 1816 * to force two bulkfree passes to free it again. 1817 * 1818 * NOTE: Only applicable when chain->bytes != 0. 1819 * 1820 * XXX can a chain already be marked MODIFIED without a data 1821 * assignment? If not, assert here instead of testing the case. 1822 */ 1823 if (chain != &hmp->vchain && chain != &hmp->fchain && 1824 chain->bytes) { 1825 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1826 newmod 1827 ) { 1828 /* 1829 * NOTE: We do not have to remove the dedup 1830 * registration because the area is still 1831 * allocated and the underlying DIO will 1832 * still be flushed. 1833 */ 1834 if (dedup_off) { 1835 chain->bref.data_off = dedup_off; 1836 chain->bytes = 1 << (dedup_off & 1837 HAMMER2_OFF_MASK_RADIX); 1838 chain->error = 0; 1839 atomic_clear_int(&chain->flags, 1840 HAMMER2_CHAIN_MODIFIED); 1841 atomic_add_long(&hammer2_count_modified_chains, 1842 -1); 1843 if (chain->pmp) 1844 hammer2_pfs_memory_wakeup(chain->pmp); 1845 hammer2_freemap_adjust(hmp, &chain->bref, 1846 HAMMER2_FREEMAP_DORECOVER); 1847 atomic_set_int(&chain->flags, 1848 HAMMER2_CHAIN_DEDUPABLE); 1849 } else { 1850 error = hammer2_freemap_alloc(chain, 1851 chain->bytes); 1852 atomic_clear_int(&chain->flags, 1853 HAMMER2_CHAIN_DEDUPABLE); 1854 } 1855 } 1856 } 1857 1858 /* 1859 * Stop here if error. We have to undo any flag bits we might 1860 * have set above. 1861 */ 1862 if (error) { 1863 if (setmodified) { 1864 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1865 atomic_add_long(&hammer2_count_modified_chains, -1); 1866 if (chain->pmp) 1867 hammer2_pfs_memory_wakeup(chain->pmp); 1868 } 1869 if (setupdate) { 1870 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1871 } 1872 lockmgr(&chain->diolk, LK_RELEASE); 1873 1874 return error; 1875 } 1876 1877 /* 1878 * Update mirror_tid and modify_tid. modify_tid is only updated 1879 * if not passed as zero (during flushes, parent propagation passes 1880 * the value 0). 1881 * 1882 * NOTE: chain->pmp could be the device spmp. 1883 */ 1884 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1885 if (mtid) 1886 chain->bref.modify_tid = mtid; 1887 1888 /* 1889 * Set BMAPUPD to tell the flush code that an existing blockmap entry 1890 * requires updating as well as to tell the delete code that the 1891 * chain's blockref might not exactly match (in terms of physical size 1892 * or block offset) the one in the parent's blocktable. The base key 1893 * of course will still match. 1894 */ 1895 if (chain->flags & HAMMER2_CHAIN_BMAPPED) 1896 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD); 1897 1898 /* 1899 * Short-cut data blocks which the caller does not need an actual 1900 * data reference to (aka OPTDATA), as long as the chain does not 1901 * already have a data pointer to the data. This generally means 1902 * that the modifications are being done via the logical buffer cache. 1903 * The INITIAL flag relates only to the device data buffer and thus 1904 * remains unchange in this situation. 1905 * 1906 * This code also handles bytes == 0 (most dirents). 1907 */ 1908 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1909 (flags & HAMMER2_MODIFY_OPTDATA) && 1910 chain->data == NULL) { 1911 KKASSERT(chain->dio == NULL); 1912 goto skip2; 1913 } 1914 1915 /* 1916 * Clearing the INITIAL flag (for indirect blocks) indicates that 1917 * we've processed the uninitialized storage allocation. 1918 * 1919 * If this flag is already clear we are likely in a copy-on-write 1920 * situation but we have to be sure NOT to bzero the storage if 1921 * no data is present. 1922 */ 1923 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1924 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1925 wasinitial = 1; 1926 } else { 1927 wasinitial = 0; 1928 } 1929 1930 /* 1931 * Instantiate data buffer and possibly execute COW operation 1932 */ 1933 switch(chain->bref.type) { 1934 case HAMMER2_BREF_TYPE_VOLUME: 1935 case HAMMER2_BREF_TYPE_FREEMAP: 1936 /* 1937 * The data is embedded, no copy-on-write operation is 1938 * needed. 1939 */ 1940 KKASSERT(chain->dio == NULL); 1941 break; 1942 case HAMMER2_BREF_TYPE_DIRENT: 1943 /* 1944 * The data might be fully embedded. 1945 */ 1946 if (chain->bytes == 0) { 1947 KKASSERT(chain->dio == NULL); 1948 break; 1949 } 1950 /* fall through */ 1951 case HAMMER2_BREF_TYPE_INODE: 1952 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1953 case HAMMER2_BREF_TYPE_DATA: 1954 case HAMMER2_BREF_TYPE_INDIRECT: 1955 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1956 /* 1957 * Perform the copy-on-write operation 1958 * 1959 * zero-fill or copy-on-write depending on whether 1960 * chain->data exists or not and set the dirty state for 1961 * the new buffer. hammer2_io_new() will handle the 1962 * zero-fill. 1963 * 1964 * If a dedup_off was supplied this is an existing block 1965 * and no COW, copy, or further modification is required. 1966 */ 1967 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1968 1969 if (wasinitial && dedup_off == 0) { 1970 error = hammer2_io_new(hmp, chain->bref.type, 1971 chain->bref.data_off, 1972 chain->bytes, &dio); 1973 } else { 1974 error = hammer2_io_bread(hmp, chain->bref.type, 1975 chain->bref.data_off, 1976 chain->bytes, &dio); 1977 } 1978 hammer2_adjreadcounter(&chain->bref, chain->bytes); 1979 1980 /* 1981 * If an I/O error occurs make sure callers cannot accidently 1982 * modify the old buffer's contents and corrupt the filesystem. 1983 * 1984 * NOTE: hammer2_io_data() call issues bkvasync() 1985 */ 1986 if (error) { 1987 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 1988 hmp); 1989 chain->error = HAMMER2_ERROR_EIO; 1990 hammer2_io_brelse(&dio); 1991 hammer2_io_brelse(&chain->dio); 1992 chain->data = NULL; 1993 break; 1994 } 1995 chain->error = 0; 1996 bdata = hammer2_io_data(dio, chain->bref.data_off); 1997 1998 if (chain->data) { 1999 /* 2000 * COW (unless a dedup). 2001 */ 2002 KKASSERT(chain->dio != NULL); 2003 if (chain->data != (void *)bdata && dedup_off == 0) { 2004 bcopy(chain->data, bdata, chain->bytes); 2005 } 2006 } else if (wasinitial == 0) { 2007 /* 2008 * We have a problem. We were asked to COW but 2009 * we don't have any data to COW with! 2010 */ 2011 panic("hammer2_chain_modify: having a COW %p\n", 2012 chain); 2013 } 2014 2015 /* 2016 * Retire the old buffer, replace with the new. Dirty or 2017 * redirty the new buffer. 2018 * 2019 * WARNING! The system buffer cache may have already flushed 2020 * the buffer, so we must be sure to [re]dirty it 2021 * for further modification. 2022 * 2023 * If dedup_off was supplied, the caller is not 2024 * expected to make any further modification to the 2025 * buffer. 2026 * 2027 * WARNING! hammer2_get_gdata() assumes dio never transitions 2028 * through NULL in order to optimize away unnecessary 2029 * diolk operations. 2030 */ 2031 { 2032 hammer2_io_t *tio; 2033 2034 if ((tio = chain->dio) != NULL) 2035 hammer2_io_bqrelse(&tio); 2036 chain->data = (void *)bdata; 2037 chain->dio = dio; 2038 if (dedup_off == 0) 2039 hammer2_io_setdirty(dio); 2040 } 2041 break; 2042 default: 2043 panic("hammer2_chain_modify: illegal non-embedded type %d", 2044 chain->bref.type); 2045 break; 2046 2047 } 2048 skip2: 2049 /* 2050 * setflush on parent indicating that the parent must recurse down 2051 * to us. Do not call on chain itself which might already have it 2052 * set. 2053 */ 2054 if (chain->parent) 2055 hammer2_chain_setflush(chain->parent); 2056 lockmgr(&chain->diolk, LK_RELEASE); 2057 2058 return (chain->error); 2059 } 2060 2061 /* 2062 * Modify the chain associated with an inode. 2063 */ 2064 int 2065 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 2066 hammer2_tid_t mtid, int flags) 2067 { 2068 int error; 2069 2070 hammer2_inode_modify(ip); 2071 error = hammer2_chain_modify(chain, mtid, 0, flags); 2072 2073 return error; 2074 } 2075 2076 /* 2077 * Volume header data locks 2078 */ 2079 void 2080 hammer2_voldata_lock(hammer2_dev_t *hmp) 2081 { 2082 lockmgr(&hmp->vollk, LK_EXCLUSIVE); 2083 } 2084 2085 void 2086 hammer2_voldata_unlock(hammer2_dev_t *hmp) 2087 { 2088 lockmgr(&hmp->vollk, LK_RELEASE); 2089 } 2090 2091 void 2092 hammer2_voldata_modify(hammer2_dev_t *hmp) 2093 { 2094 if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) == 0) { 2095 atomic_add_long(&hammer2_count_modified_chains, 1); 2096 atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED); 2097 hammer2_pfs_memory_inc(hmp->vchain.pmp); 2098 } 2099 } 2100 2101 /* 2102 * This function returns the chain at the nearest key within the specified 2103 * range. The returned chain will be referenced but not locked. 2104 * 2105 * This function will recurse through chain->rbtree as necessary and will 2106 * return a *key_nextp suitable for iteration. *key_nextp is only set if 2107 * the iteration value is less than the current value of *key_nextp. 2108 * 2109 * The caller should use (*key_nextp) to calculate the actual range of 2110 * the returned element, which will be (key_beg to *key_nextp - 1), because 2111 * there might be another element which is superior to the returned element 2112 * and overlaps it. 2113 * 2114 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 2115 * chains continue to be returned. On EOF (*key_nextp) may overflow since 2116 * it will wind up being (key_end + 1). 2117 * 2118 * WARNING! Must be called with child's spinlock held. Spinlock remains 2119 * held through the operation. 2120 */ 2121 struct hammer2_chain_find_info { 2122 hammer2_chain_t *best; 2123 hammer2_key_t key_beg; 2124 hammer2_key_t key_end; 2125 hammer2_key_t key_next; 2126 }; 2127 2128 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 2129 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 2130 2131 static 2132 hammer2_chain_t * 2133 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 2134 hammer2_key_t key_beg, hammer2_key_t key_end) 2135 { 2136 struct hammer2_chain_find_info info; 2137 2138 info.best = NULL; 2139 info.key_beg = key_beg; 2140 info.key_end = key_end; 2141 info.key_next = *key_nextp; 2142 2143 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 2144 hammer2_chain_find_cmp, hammer2_chain_find_callback, 2145 &info); 2146 *key_nextp = info.key_next; 2147 #if 0 2148 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 2149 parent, key_beg, key_end, *key_nextp); 2150 #endif 2151 2152 return (info.best); 2153 } 2154 2155 static 2156 int 2157 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 2158 { 2159 struct hammer2_chain_find_info *info = data; 2160 hammer2_key_t child_beg; 2161 hammer2_key_t child_end; 2162 2163 child_beg = child->bref.key; 2164 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 2165 2166 if (child_end < info->key_beg) 2167 return(-1); 2168 if (child_beg > info->key_end) 2169 return(1); 2170 return(0); 2171 } 2172 2173 static 2174 int 2175 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 2176 { 2177 struct hammer2_chain_find_info *info = data; 2178 hammer2_chain_t *best; 2179 hammer2_key_t child_end; 2180 2181 /* 2182 * WARNING! Layerq is scanned forwards, exact matches should keep 2183 * the existing info->best. 2184 */ 2185 if ((best = info->best) == NULL) { 2186 /* 2187 * No previous best. Assign best 2188 */ 2189 info->best = child; 2190 } else if (best->bref.key <= info->key_beg && 2191 child->bref.key <= info->key_beg) { 2192 /* 2193 * Illegal overlap. 2194 */ 2195 KKASSERT(0); 2196 /*info->best = child;*/ 2197 } else if (child->bref.key < best->bref.key) { 2198 /* 2199 * Child has a nearer key and best is not flush with key_beg. 2200 * Set best to child. Truncate key_next to the old best key. 2201 */ 2202 info->best = child; 2203 if (info->key_next > best->bref.key || info->key_next == 0) 2204 info->key_next = best->bref.key; 2205 } else if (child->bref.key == best->bref.key) { 2206 /* 2207 * If our current best is flush with the child then this 2208 * is an illegal overlap. 2209 * 2210 * key_next will automatically be limited to the smaller of 2211 * the two end-points. 2212 */ 2213 KKASSERT(0); 2214 info->best = child; 2215 } else { 2216 /* 2217 * Keep the current best but truncate key_next to the child's 2218 * base. 2219 * 2220 * key_next will also automatically be limited to the smaller 2221 * of the two end-points (probably not necessary for this case 2222 * but we do it anyway). 2223 */ 2224 if (info->key_next > child->bref.key || info->key_next == 0) 2225 info->key_next = child->bref.key; 2226 } 2227 2228 /* 2229 * Always truncate key_next based on child's end-of-range. 2230 */ 2231 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 2232 if (child_end && (info->key_next > child_end || info->key_next == 0)) 2233 info->key_next = child_end; 2234 2235 return(0); 2236 } 2237 2238 /* 2239 * Retrieve the specified chain from a media blockref, creating the 2240 * in-memory chain structure which reflects it. The returned chain is 2241 * held and locked according to (how) (HAMMER2_RESOLVE_*). The caller must 2242 * handle crc-checks and so forth, and should check chain->error before 2243 * assuming that the data is good. 2244 * 2245 * To handle insertion races pass the INSERT_RACE flag along with the 2246 * generation number of the core. NULL will be returned if the generation 2247 * number changes before we have a chance to insert the chain. Insert 2248 * races can occur because the parent might be held shared. 2249 * 2250 * Caller must hold the parent locked shared or exclusive since we may 2251 * need the parent's bref array to find our block. 2252 * 2253 * WARNING! chain->pmp is always set to NULL for any chain representing 2254 * part of the super-root topology. 2255 */ 2256 hammer2_chain_t * 2257 hammer2_chain_get(hammer2_chain_t *parent, int generation, 2258 hammer2_blockref_t *bref, int how) 2259 { 2260 hammer2_dev_t *hmp = parent->hmp; 2261 hammer2_chain_t *chain; 2262 int error; 2263 2264 /* 2265 * Allocate a chain structure representing the existing media 2266 * entry. Resulting chain has one ref and is not locked. 2267 */ 2268 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 2269 chain = hammer2_chain_alloc(hmp, NULL, bref); 2270 else 2271 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 2272 /* ref'd chain returned */ 2273 2274 /* 2275 * Flag that the chain is in the parent's blockmap so delete/flush 2276 * knows what to do with it. 2277 */ 2278 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 2279 2280 /* 2281 * chain must be locked to avoid unexpected ripouts 2282 */ 2283 hammer2_chain_lock(chain, how); 2284 2285 /* 2286 * Link the chain into its parent. A spinlock is required to safely 2287 * access the RBTREE, and it is possible to collide with another 2288 * hammer2_chain_get() operation because the caller might only hold 2289 * a shared lock on the parent. 2290 * 2291 * NOTE: Get races can occur quite often when we distribute 2292 * asynchronous read-aheads across multiple threads. 2293 */ 2294 KKASSERT(parent->refs > 0); 2295 error = hammer2_chain_insert(parent, chain, 2296 HAMMER2_CHAIN_INSERT_SPIN | 2297 HAMMER2_CHAIN_INSERT_RACE, 2298 generation); 2299 if (error) { 2300 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 2301 /*kprintf("chain %p get race\n", chain);*/ 2302 hammer2_chain_unlock(chain); 2303 hammer2_chain_drop(chain); 2304 chain = NULL; 2305 } else { 2306 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2307 } 2308 2309 /* 2310 * Return our new chain referenced but not locked, or NULL if 2311 * a race occurred. 2312 */ 2313 return (chain); 2314 } 2315 2316 /* 2317 * Lookup initialization/completion API 2318 */ 2319 hammer2_chain_t * 2320 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 2321 { 2322 hammer2_chain_ref(parent); 2323 if (flags & HAMMER2_LOOKUP_SHARED) { 2324 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 2325 HAMMER2_RESOLVE_SHARED); 2326 } else { 2327 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2328 } 2329 return (parent); 2330 } 2331 2332 void 2333 hammer2_chain_lookup_done(hammer2_chain_t *parent) 2334 { 2335 if (parent) { 2336 hammer2_chain_unlock(parent); 2337 hammer2_chain_drop(parent); 2338 } 2339 } 2340 2341 /* 2342 * Take the locked chain and return a locked parent. The chain remains 2343 * locked on return, but may have to be temporarily unlocked to acquire 2344 * the parent. Because of this, (chain) must be stable and cannot be 2345 * deleted while it was temporarily unlocked (typically means that (chain) 2346 * is an inode). 2347 * 2348 * Pass HAMMER2_RESOLVE_* flags in flags. 2349 * 2350 * This will work even if the chain is errored, and the caller can check 2351 * parent->error on return if desired since the parent will be locked. 2352 * 2353 * This function handles the lock order reversal. 2354 */ 2355 hammer2_chain_t * 2356 hammer2_chain_getparent(hammer2_chain_t *chain, int flags) 2357 { 2358 hammer2_chain_t *parent; 2359 2360 /* 2361 * Be careful of order, chain must be unlocked before parent 2362 * is locked below to avoid a deadlock. Try it trivially first. 2363 */ 2364 parent = chain->parent; 2365 if (parent == NULL) 2366 panic("hammer2_chain_getparent: no parent"); 2367 hammer2_chain_ref(parent); 2368 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) 2369 return parent; 2370 2371 for (;;) { 2372 hammer2_chain_unlock(chain); 2373 hammer2_chain_lock(parent, flags); 2374 hammer2_chain_lock(chain, flags); 2375 2376 /* 2377 * Parent relinking races are quite common. We have to get 2378 * it right or we will blow up the block table. 2379 */ 2380 if (chain->parent == parent) 2381 break; 2382 hammer2_chain_unlock(parent); 2383 hammer2_chain_drop(parent); 2384 cpu_ccfence(); 2385 parent = chain->parent; 2386 if (parent == NULL) 2387 panic("hammer2_chain_getparent: no parent"); 2388 hammer2_chain_ref(parent); 2389 } 2390 return parent; 2391 } 2392 2393 /* 2394 * Take the locked chain and return a locked parent. The chain is unlocked 2395 * and dropped. *chainp is set to the returned parent as a convenience. 2396 * Pass HAMMER2_RESOLVE_* flags in flags. 2397 * 2398 * This will work even if the chain is errored, and the caller can check 2399 * parent->error on return if desired since the parent will be locked. 2400 * 2401 * The chain does NOT need to be stable. We use a tracking structure 2402 * to track the expected parent if the chain is deleted out from under us. 2403 * 2404 * This function handles the lock order reversal. 2405 */ 2406 hammer2_chain_t * 2407 hammer2_chain_repparent(hammer2_chain_t **chainp, int flags) 2408 { 2409 hammer2_chain_t *chain; 2410 hammer2_chain_t *parent; 2411 struct hammer2_reptrack reptrack; 2412 struct hammer2_reptrack **repp; 2413 2414 /* 2415 * Be careful of order, chain must be unlocked before parent 2416 * is locked below to avoid a deadlock. Try it trivially first. 2417 */ 2418 chain = *chainp; 2419 parent = chain->parent; 2420 if (parent == NULL) { 2421 hammer2_spin_unex(&chain->core.spin); 2422 panic("hammer2_chain_repparent: no parent"); 2423 } 2424 hammer2_chain_ref(parent); 2425 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) { 2426 hammer2_chain_unlock(chain); 2427 hammer2_chain_drop(chain); 2428 *chainp = parent; 2429 2430 return parent; 2431 } 2432 2433 /* 2434 * Ok, now it gets a bit nasty. There are multiple situations where 2435 * the parent might be in the middle of a deletion, or where the child 2436 * (chain) might be deleted the instant we let go of its lock. 2437 * We can potentially end up in a no-win situation! 2438 * 2439 * In particular, the indirect_maintenance() case can cause these 2440 * situations. 2441 * 2442 * To deal with this we install a reptrack structure in the parent 2443 * This reptrack structure 'owns' the parent ref and will automatically 2444 * migrate to the parent's parent if the parent is deleted permanently. 2445 */ 2446 hammer2_spin_init(&reptrack.spin, "h2reptrk"); 2447 reptrack.chain = parent; 2448 hammer2_chain_ref(parent); /* for the reptrack */ 2449 2450 hammer2_spin_ex(&parent->core.spin); 2451 reptrack.next = parent->core.reptrack; 2452 parent->core.reptrack = &reptrack; 2453 hammer2_spin_unex(&parent->core.spin); 2454 2455 hammer2_chain_unlock(chain); 2456 hammer2_chain_drop(chain); 2457 chain = NULL; /* gone */ 2458 2459 /* 2460 * At the top of this loop, chain is gone and parent is refd both 2461 * by us explicitly AND via our reptrack. We are attempting to 2462 * lock parent. 2463 */ 2464 for (;;) { 2465 hammer2_chain_lock(parent, flags); 2466 2467 if (reptrack.chain == parent) 2468 break; 2469 hammer2_chain_unlock(parent); 2470 hammer2_chain_drop(parent); 2471 2472 kprintf("hammer2: debug REPTRACK %p->%p\n", 2473 parent, reptrack.chain); 2474 hammer2_spin_ex(&reptrack.spin); 2475 parent = reptrack.chain; 2476 hammer2_chain_ref(parent); 2477 hammer2_spin_unex(&reptrack.spin); 2478 } 2479 2480 /* 2481 * Once parent is locked and matches our reptrack, our reptrack 2482 * will be stable and we have our parent. We can unlink our 2483 * reptrack. 2484 * 2485 * WARNING! Remember that the chain lock might be shared. Chains 2486 * locked shared have stable parent linkages. 2487 */ 2488 hammer2_spin_ex(&parent->core.spin); 2489 repp = &parent->core.reptrack; 2490 while (*repp != &reptrack) 2491 repp = &(*repp)->next; 2492 *repp = reptrack.next; 2493 hammer2_spin_unex(&parent->core.spin); 2494 2495 hammer2_chain_drop(parent); /* reptrack ref */ 2496 *chainp = parent; /* return parent lock+ref */ 2497 2498 return parent; 2499 } 2500 2501 /* 2502 * Dispose of any linked reptrack structures in (chain) by shifting them to 2503 * (parent). Both (chain) and (parent) must be exclusively locked. 2504 * 2505 * This is interlocked against any children of (chain) on the other side. 2506 * No children so remain as-of when this is called so we can test 2507 * core.reptrack without holding the spin-lock. 2508 * 2509 * Used whenever the caller intends to permanently delete chains related 2510 * to topological recursions (BREF_TYPE_INDIRECT, BREF_TYPE_FREEMAP_NODE), 2511 * where the chains underneath the node being deleted are given a new parent 2512 * above the node being deleted. 2513 */ 2514 static 2515 void 2516 hammer2_chain_repchange(hammer2_chain_t *parent, hammer2_chain_t *chain) 2517 { 2518 struct hammer2_reptrack *reptrack; 2519 2520 KKASSERT(chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)); 2521 while (chain->core.reptrack) { 2522 hammer2_spin_ex(&parent->core.spin); 2523 hammer2_spin_ex(&chain->core.spin); 2524 reptrack = chain->core.reptrack; 2525 if (reptrack == NULL) { 2526 hammer2_spin_unex(&chain->core.spin); 2527 hammer2_spin_unex(&parent->core.spin); 2528 break; 2529 } 2530 hammer2_spin_ex(&reptrack->spin); 2531 chain->core.reptrack = reptrack->next; 2532 reptrack->chain = parent; 2533 reptrack->next = parent->core.reptrack; 2534 parent->core.reptrack = reptrack; 2535 hammer2_chain_ref(parent); /* reptrack */ 2536 2537 hammer2_spin_unex(&chain->core.spin); 2538 hammer2_spin_unex(&parent->core.spin); 2539 kprintf("hammer2: debug repchange %p %p->%p\n", 2540 reptrack, chain, parent); 2541 hammer2_chain_drop(chain); /* reptrack */ 2542 } 2543 } 2544 2545 /* 2546 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 2547 * (*parentp) typically points to an inode but can also point to a related 2548 * indirect block and this function will recurse upwards and find the inode 2549 * or the nearest undeleted indirect block covering the key range. 2550 * 2551 * This function unconditionally sets *errorp, replacing any previous value. 2552 * 2553 * (*parentp) must be exclusive or shared locked (depending on flags) and 2554 * referenced and can be an inode or an existing indirect block within the 2555 * inode. 2556 * 2557 * If (*parent) is errored out, this function will not attempt to recurse 2558 * the radix tree and will return NULL along with an appropriate *errorp. 2559 * If NULL is returned and *errorp is 0, the requested lookup could not be 2560 * located. 2561 * 2562 * On return (*parentp) will be modified to point at the deepest parent chain 2563 * element encountered during the search, as a helper for an insertion or 2564 * deletion. 2565 * 2566 * The new (*parentp) will be locked shared or exclusive (depending on flags), 2567 * and referenced, and the old will be unlocked and dereferenced (no change 2568 * if they are both the same). This is particularly important if the caller 2569 * wishes to insert a new chain, (*parentp) will be set properly even if NULL 2570 * is returned, as long as no error occurred. 2571 * 2572 * The matching chain will be returned locked according to flags. 2573 * 2574 * -- 2575 * 2576 * NULL is returned if no match was found, but (*parentp) will still 2577 * potentially be adjusted. 2578 * 2579 * On return (*key_nextp) will point to an iterative value for key_beg. 2580 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 2581 * 2582 * This function will also recurse up the chain if the key is not within the 2583 * current parent's range. (*parentp) can never be set to NULL. An iteration 2584 * can simply allow (*parentp) to float inside the loop. 2585 * 2586 * NOTE! chain->data is not always resolved. By default it will not be 2587 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 2588 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 2589 * BREF_TYPE_DATA as the device buffer can alias the logical file 2590 * buffer). 2591 */ 2592 2593 hammer2_chain_t * 2594 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 2595 hammer2_key_t key_beg, hammer2_key_t key_end, 2596 int *errorp, int flags) 2597 { 2598 hammer2_dev_t *hmp; 2599 hammer2_chain_t *parent; 2600 hammer2_chain_t *chain; 2601 hammer2_blockref_t *base; 2602 hammer2_blockref_t *bref; 2603 hammer2_blockref_t bcopy; 2604 hammer2_key_t scan_beg; 2605 hammer2_key_t scan_end; 2606 int count = 0; 2607 int how_always = HAMMER2_RESOLVE_ALWAYS; 2608 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2609 int how; 2610 int generation; 2611 int maxloops = 300000; 2612 volatile hammer2_mtx_t save_mtx; 2613 2614 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2615 how_maybe = how_always; 2616 how = HAMMER2_RESOLVE_ALWAYS; 2617 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2618 how = HAMMER2_RESOLVE_NEVER; 2619 } else { 2620 how = HAMMER2_RESOLVE_MAYBE; 2621 } 2622 if (flags & HAMMER2_LOOKUP_SHARED) { 2623 how_maybe |= HAMMER2_RESOLVE_SHARED; 2624 how_always |= HAMMER2_RESOLVE_SHARED; 2625 how |= HAMMER2_RESOLVE_SHARED; 2626 } 2627 2628 /* 2629 * Recurse (*parentp) upward if necessary until the parent completely 2630 * encloses the key range or we hit the inode. 2631 * 2632 * Handle races against the flusher deleting indirect nodes on its 2633 * way back up by continuing to recurse upward past the deletion. 2634 */ 2635 parent = *parentp; 2636 hmp = parent->hmp; 2637 *errorp = 0; 2638 2639 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2640 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2641 scan_beg = parent->bref.key; 2642 scan_end = scan_beg + 2643 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2644 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) { 2645 if (key_beg >= scan_beg && key_end <= scan_end) 2646 break; 2647 } 2648 parent = hammer2_chain_repparent(parentp, how_maybe); 2649 } 2650 again: 2651 if (--maxloops == 0) 2652 panic("hammer2_chain_lookup: maxloops"); 2653 /* 2654 * Locate the blockref array. Currently we do a fully associative 2655 * search through the array. 2656 */ 2657 switch(parent->bref.type) { 2658 case HAMMER2_BREF_TYPE_INODE: 2659 /* 2660 * Special shortcut for embedded data returns the inode 2661 * itself. Callers must detect this condition and access 2662 * the embedded data (the strategy code does this for us). 2663 * 2664 * This is only applicable to regular files and softlinks. 2665 * 2666 * We need a second lock on parent. Since we already have 2667 * a lock we must pass LOCKAGAIN to prevent unexpected 2668 * blocking (we don't want to block on a second shared 2669 * ref if an exclusive lock is pending) 2670 */ 2671 if (parent->data->ipdata.meta.op_flags & 2672 HAMMER2_OPFLAG_DIRECTDATA) { 2673 if (flags & HAMMER2_LOOKUP_NODIRECT) { 2674 chain = NULL; 2675 *key_nextp = key_end + 1; 2676 goto done; 2677 } 2678 hammer2_chain_ref(parent); 2679 hammer2_chain_lock(parent, how_always | 2680 HAMMER2_RESOLVE_LOCKAGAIN); 2681 *key_nextp = key_end + 1; 2682 return (parent); 2683 } 2684 base = &parent->data->ipdata.u.blockset.blockref[0]; 2685 count = HAMMER2_SET_COUNT; 2686 break; 2687 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2688 case HAMMER2_BREF_TYPE_INDIRECT: 2689 /* 2690 * Handle MATCHIND on the parent 2691 */ 2692 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2693 scan_beg = parent->bref.key; 2694 scan_end = scan_beg + 2695 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2696 if (key_beg == scan_beg && key_end == scan_end) { 2697 chain = parent; 2698 hammer2_chain_ref(chain); 2699 hammer2_chain_lock(chain, how_maybe); 2700 *key_nextp = scan_end + 1; 2701 goto done; 2702 } 2703 } 2704 2705 /* 2706 * Optimize indirect blocks in the INITIAL state to avoid 2707 * I/O. 2708 */ 2709 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2710 base = NULL; 2711 } else { 2712 if (parent->data == NULL) { 2713 kprintf("parent->data is NULL %p\n", parent); 2714 while (1) 2715 tsleep(parent, 0, "xxx", 0); 2716 } 2717 base = &parent->data->npdata[0]; 2718 } 2719 count = parent->bytes / sizeof(hammer2_blockref_t); 2720 break; 2721 case HAMMER2_BREF_TYPE_VOLUME: 2722 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2723 count = HAMMER2_SET_COUNT; 2724 break; 2725 case HAMMER2_BREF_TYPE_FREEMAP: 2726 base = &parent->data->blkset.blockref[0]; 2727 count = HAMMER2_SET_COUNT; 2728 break; 2729 default: 2730 kprintf("hammer2_chain_lookup: unrecognized " 2731 "blockref(B) type: %d", 2732 parent->bref.type); 2733 while (1) 2734 tsleep(&base, 0, "dead", 0); 2735 panic("hammer2_chain_lookup: unrecognized " 2736 "blockref(B) type: %d", 2737 parent->bref.type); 2738 base = NULL; /* safety */ 2739 count = 0; /* safety */ 2740 } 2741 2742 /* 2743 * No lookup is possible if the parent is errored. We delayed 2744 * this check as long as we could to ensure that the parent backup, 2745 * embedded data, and MATCHIND code could still execute. 2746 */ 2747 if (parent->error) { 2748 *errorp = parent->error; 2749 return NULL; 2750 } 2751 2752 /* 2753 * Merged scan to find next candidate. 2754 * 2755 * hammer2_base_*() functions require the parent->core.live_* fields 2756 * to be synchronized. 2757 * 2758 * We need to hold the spinlock to access the block array and RB tree 2759 * and to interlock chain creation. 2760 */ 2761 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2762 hammer2_chain_countbrefs(parent, base, count); 2763 2764 /* 2765 * Combined search 2766 */ 2767 hammer2_spin_ex(&parent->core.spin); 2768 chain = hammer2_combined_find(parent, base, count, 2769 key_nextp, 2770 key_beg, key_end, 2771 &bref); 2772 generation = parent->core.generation; 2773 2774 /* 2775 * Exhausted parent chain, iterate. 2776 */ 2777 if (bref == NULL) { 2778 KKASSERT(chain == NULL); 2779 hammer2_spin_unex(&parent->core.spin); 2780 if (key_beg == key_end) /* short cut single-key case */ 2781 return (NULL); 2782 2783 /* 2784 * Stop if we reached the end of the iteration. 2785 */ 2786 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2787 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2788 return (NULL); 2789 } 2790 2791 /* 2792 * Calculate next key, stop if we reached the end of the 2793 * iteration, otherwise go up one level and loop. 2794 */ 2795 key_beg = parent->bref.key + 2796 ((hammer2_key_t)1 << parent->bref.keybits); 2797 if (key_beg == 0 || key_beg > key_end) 2798 return (NULL); 2799 parent = hammer2_chain_repparent(parentp, how_maybe); 2800 goto again; 2801 } 2802 2803 /* 2804 * Selected from blockref or in-memory chain. 2805 */ 2806 bcopy = *bref; 2807 if (chain == NULL) { 2808 hammer2_spin_unex(&parent->core.spin); 2809 if (bcopy.type == HAMMER2_BREF_TYPE_INDIRECT || 2810 bcopy.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2811 chain = hammer2_chain_get(parent, generation, 2812 &bcopy, how_maybe); 2813 } else { 2814 chain = hammer2_chain_get(parent, generation, 2815 &bcopy, how); 2816 } 2817 if (chain == NULL) 2818 goto again; 2819 } else { 2820 hammer2_chain_ref(chain); 2821 hammer2_spin_unex(&parent->core.spin); 2822 2823 /* 2824 * chain is referenced but not locked. We must lock the 2825 * chain to obtain definitive state. 2826 */ 2827 if (bcopy.type == HAMMER2_BREF_TYPE_INDIRECT || 2828 bcopy.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2829 hammer2_chain_lock(chain, how_maybe); 2830 } else { 2831 hammer2_chain_lock(chain, how); 2832 } 2833 KKASSERT(chain->parent == parent); 2834 } 2835 if (bcmp(&bcopy, &chain->bref, sizeof(bcopy)) || 2836 chain->parent != parent) { 2837 hammer2_chain_unlock(chain); 2838 hammer2_chain_drop(chain); 2839 chain = NULL; /* SAFETY */ 2840 goto again; 2841 } 2842 2843 2844 /* 2845 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2846 * 2847 * NOTE: Chain's key range is not relevant as there might be 2848 * one-offs within the range that are not deleted. 2849 * 2850 * NOTE: Lookups can race delete-duplicate because 2851 * delete-duplicate does not lock the parent's core 2852 * (they just use the spinlock on the core). 2853 */ 2854 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2855 kprintf("skip deleted chain %016jx.%02x key=%016jx\n", 2856 chain->bref.data_off, chain->bref.type, 2857 chain->bref.key); 2858 hammer2_chain_unlock(chain); 2859 hammer2_chain_drop(chain); 2860 chain = NULL; /* SAFETY */ 2861 key_beg = *key_nextp; 2862 if (key_beg == 0 || key_beg > key_end) 2863 return(NULL); 2864 goto again; 2865 } 2866 2867 /* 2868 * If the chain element is an indirect block it becomes the new 2869 * parent and we loop on it. We must maintain our top-down locks 2870 * to prevent the flusher from interfering (i.e. doing a 2871 * delete-duplicate and leaving us recursing down a deleted chain). 2872 * 2873 * The parent always has to be locked with at least RESOLVE_MAYBE 2874 * so we can access its data. It might need a fixup if the caller 2875 * passed incompatible flags. Be careful not to cause a deadlock 2876 * as a data-load requires an exclusive lock. 2877 * 2878 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2879 * range is within the requested key range we return the indirect 2880 * block and do NOT loop. This is usually only used to acquire 2881 * freemap nodes. 2882 */ 2883 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2884 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2885 save_mtx = parent->lock; 2886 hammer2_chain_unlock(parent); 2887 hammer2_chain_drop(parent); 2888 *parentp = parent = chain; 2889 chain = NULL; /* SAFETY */ 2890 goto again; 2891 } 2892 done: 2893 /* 2894 * All done, return the locked chain. 2895 * 2896 * If the caller does not want a locked chain, replace the lock with 2897 * a ref. Perhaps this can eventually be optimized to not obtain the 2898 * lock in the first place for situations where the data does not 2899 * need to be resolved. 2900 * 2901 * NOTE! A chain->error must be tested by the caller upon return. 2902 * *errorp is only set based on issues which occur while 2903 * trying to reach the chain. 2904 */ 2905 return (chain); 2906 } 2907 2908 /* 2909 * After having issued a lookup we can iterate all matching keys. 2910 * 2911 * If chain is non-NULL we continue the iteration from just after it's index. 2912 * 2913 * If chain is NULL we assume the parent was exhausted and continue the 2914 * iteration at the next parent. 2915 * 2916 * If a fatal error occurs (typically an I/O error), a dummy chain is 2917 * returned with chain->error and error-identifying information set. This 2918 * chain will assert if you try to do anything fancy with it. 2919 * 2920 * XXX Depending on where the error occurs we should allow continued iteration. 2921 * 2922 * parent must be locked on entry and remains locked throughout. chain's 2923 * lock status must match flags. Chain is always at least referenced. 2924 * 2925 * WARNING! The MATCHIND flag does not apply to this function. 2926 */ 2927 hammer2_chain_t * 2928 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2929 hammer2_key_t *key_nextp, 2930 hammer2_key_t key_beg, hammer2_key_t key_end, 2931 int *errorp, int flags) 2932 { 2933 hammer2_chain_t *parent; 2934 int how_maybe; 2935 2936 /* 2937 * Calculate locking flags for upward recursion. 2938 */ 2939 how_maybe = HAMMER2_RESOLVE_MAYBE; 2940 if (flags & HAMMER2_LOOKUP_SHARED) 2941 how_maybe |= HAMMER2_RESOLVE_SHARED; 2942 2943 parent = *parentp; 2944 *errorp = 0; 2945 2946 /* 2947 * Calculate the next index and recalculate the parent if necessary. 2948 */ 2949 if (chain) { 2950 key_beg = chain->bref.key + 2951 ((hammer2_key_t)1 << chain->bref.keybits); 2952 hammer2_chain_unlock(chain); 2953 hammer2_chain_drop(chain); 2954 2955 /* 2956 * chain invalid past this point, but we can still do a 2957 * pointer comparison w/parent. 2958 * 2959 * Any scan where the lookup returned degenerate data embedded 2960 * in the inode has an invalid index and must terminate. 2961 */ 2962 if (chain == parent) 2963 return(NULL); 2964 if (key_beg == 0 || key_beg > key_end) 2965 return(NULL); 2966 chain = NULL; 2967 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2968 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2969 /* 2970 * We reached the end of the iteration. 2971 */ 2972 return (NULL); 2973 } else { 2974 /* 2975 * Continue iteration with next parent unless the current 2976 * parent covers the range. 2977 * 2978 * (This also handles the case of a deleted, empty indirect 2979 * node). 2980 */ 2981 key_beg = parent->bref.key + 2982 ((hammer2_key_t)1 << parent->bref.keybits); 2983 if (key_beg == 0 || key_beg > key_end) 2984 return (NULL); 2985 parent = hammer2_chain_repparent(parentp, how_maybe); 2986 } 2987 2988 /* 2989 * And execute 2990 */ 2991 return (hammer2_chain_lookup(parentp, key_nextp, 2992 key_beg, key_end, 2993 errorp, flags)); 2994 } 2995 2996 /* 2997 * Caller wishes to iterate chains under parent, loading new chains into 2998 * chainp. Caller must initialize *chainp to NULL and *firstp to 1, and 2999 * then call hammer2_chain_scan() repeatedly until a non-zero return. 3000 * During the scan, *firstp will be set to 0 and (*chainp) will be replaced 3001 * with the returned chain for the scan. The returned *chainp will be 3002 * locked and referenced. Any prior contents will be unlocked and dropped. 3003 * 3004 * Caller should check the return value. A normal scan EOF will return 3005 * exactly HAMMER2_ERROR_EOF. Any other non-zero value indicates an 3006 * error trying to access parent data. Any error in the returned chain 3007 * must be tested separately by the caller. 3008 * 3009 * (*chainp) is dropped on each scan, but will only be set if the returned 3010 * element itself can recurse. Leaf elements are NOT resolved, loaded, or 3011 * returned via *chainp. The caller will get their bref only. 3012 * 3013 * The raw scan function is similar to lookup/next but does not seek to a key. 3014 * Blockrefs are iterated via first_bref = (parent, NULL) and 3015 * next_chain = (parent, bref). 3016 * 3017 * The passed-in parent must be locked and its data resolved. The function 3018 * nominally returns a locked and referenced *chainp != NULL for chains 3019 * the caller might need to recurse on (and will dipose of any *chainp passed 3020 * in). The caller must check the chain->bref.type either way. 3021 */ 3022 int 3023 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 3024 hammer2_blockref_t *bref, int *firstp, 3025 int flags) 3026 { 3027 hammer2_dev_t *hmp; 3028 hammer2_blockref_t *base; 3029 hammer2_blockref_t *bref_ptr; 3030 hammer2_key_t key; 3031 hammer2_key_t next_key; 3032 hammer2_chain_t *chain = NULL; 3033 int count = 0; 3034 int how_always = HAMMER2_RESOLVE_ALWAYS; 3035 int how_maybe = HAMMER2_RESOLVE_MAYBE; 3036 int how; 3037 int generation; 3038 int maxloops = 300000; 3039 int error; 3040 3041 hmp = parent->hmp; 3042 error = 0; 3043 3044 /* 3045 * Scan flags borrowed from lookup. 3046 */ 3047 if (flags & HAMMER2_LOOKUP_ALWAYS) { 3048 how_maybe = how_always; 3049 how = HAMMER2_RESOLVE_ALWAYS; 3050 } else if (flags & HAMMER2_LOOKUP_NODATA) { 3051 how = HAMMER2_RESOLVE_NEVER; 3052 } else { 3053 how = HAMMER2_RESOLVE_MAYBE; 3054 } 3055 if (flags & HAMMER2_LOOKUP_SHARED) { 3056 how_maybe |= HAMMER2_RESOLVE_SHARED; 3057 how_always |= HAMMER2_RESOLVE_SHARED; 3058 how |= HAMMER2_RESOLVE_SHARED; 3059 } 3060 3061 /* 3062 * Calculate key to locate first/next element, unlocking the previous 3063 * element as we go. Be careful, the key calculation can overflow. 3064 * 3065 * (also reset bref to NULL) 3066 */ 3067 if (*firstp) { 3068 key = 0; 3069 *firstp = 0; 3070 } else { 3071 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 3072 if ((chain = *chainp) != NULL) { 3073 *chainp = NULL; 3074 hammer2_chain_unlock(chain); 3075 hammer2_chain_drop(chain); 3076 chain = NULL; 3077 } 3078 if (key == 0) { 3079 error |= HAMMER2_ERROR_EOF; 3080 goto done; 3081 } 3082 } 3083 3084 again: 3085 if (parent->error) { 3086 error = parent->error; 3087 goto done; 3088 } 3089 if (--maxloops == 0) 3090 panic("hammer2_chain_scan: maxloops"); 3091 3092 /* 3093 * Locate the blockref array. Currently we do a fully associative 3094 * search through the array. 3095 */ 3096 switch(parent->bref.type) { 3097 case HAMMER2_BREF_TYPE_INODE: 3098 /* 3099 * An inode with embedded data has no sub-chains. 3100 * 3101 * WARNING! Bulk scan code may pass a static chain marked 3102 * as BREF_TYPE_INODE with a copy of the volume 3103 * root blockset to snapshot the volume. 3104 */ 3105 if (parent->data->ipdata.meta.op_flags & 3106 HAMMER2_OPFLAG_DIRECTDATA) { 3107 error |= HAMMER2_ERROR_EOF; 3108 goto done; 3109 } 3110 base = &parent->data->ipdata.u.blockset.blockref[0]; 3111 count = HAMMER2_SET_COUNT; 3112 break; 3113 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3114 case HAMMER2_BREF_TYPE_INDIRECT: 3115 /* 3116 * Optimize indirect blocks in the INITIAL state to avoid 3117 * I/O. 3118 */ 3119 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 3120 base = NULL; 3121 } else { 3122 if (parent->data == NULL) 3123 panic("parent->data is NULL"); 3124 base = &parent->data->npdata[0]; 3125 } 3126 count = parent->bytes / sizeof(hammer2_blockref_t); 3127 break; 3128 case HAMMER2_BREF_TYPE_VOLUME: 3129 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3130 count = HAMMER2_SET_COUNT; 3131 break; 3132 case HAMMER2_BREF_TYPE_FREEMAP: 3133 base = &parent->data->blkset.blockref[0]; 3134 count = HAMMER2_SET_COUNT; 3135 break; 3136 default: 3137 panic("hammer2_chain_lookup: unrecognized blockref type: %d", 3138 parent->bref.type); 3139 base = NULL; /* safety */ 3140 count = 0; /* safety */ 3141 } 3142 3143 /* 3144 * Merged scan to find next candidate. 3145 * 3146 * hammer2_base_*() functions require the parent->core.live_* fields 3147 * to be synchronized. 3148 * 3149 * We need to hold the spinlock to access the block array and RB tree 3150 * and to interlock chain creation. 3151 */ 3152 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3153 hammer2_chain_countbrefs(parent, base, count); 3154 3155 next_key = 0; 3156 bref_ptr = NULL; 3157 hammer2_spin_ex(&parent->core.spin); 3158 chain = hammer2_combined_find(parent, base, count, 3159 &next_key, 3160 key, HAMMER2_KEY_MAX, 3161 &bref_ptr); 3162 generation = parent->core.generation; 3163 3164 /* 3165 * Exhausted parent chain, we're done. 3166 */ 3167 if (bref_ptr == NULL) { 3168 hammer2_spin_unex(&parent->core.spin); 3169 KKASSERT(chain == NULL); 3170 error |= HAMMER2_ERROR_EOF; 3171 goto done; 3172 } 3173 3174 /* 3175 * Copy into the supplied stack-based blockref. 3176 */ 3177 *bref = *bref_ptr; 3178 3179 /* 3180 * Selected from blockref or in-memory chain. 3181 */ 3182 if (chain == NULL) { 3183 switch(bref->type) { 3184 case HAMMER2_BREF_TYPE_INODE: 3185 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3186 case HAMMER2_BREF_TYPE_INDIRECT: 3187 case HAMMER2_BREF_TYPE_VOLUME: 3188 case HAMMER2_BREF_TYPE_FREEMAP: 3189 /* 3190 * Recursion, always get the chain 3191 */ 3192 hammer2_spin_unex(&parent->core.spin); 3193 chain = hammer2_chain_get(parent, generation, 3194 bref, how); 3195 if (chain == NULL) 3196 goto again; 3197 break; 3198 default: 3199 /* 3200 * No recursion, do not waste time instantiating 3201 * a chain, just iterate using the bref. 3202 */ 3203 hammer2_spin_unex(&parent->core.spin); 3204 break; 3205 } 3206 } else { 3207 /* 3208 * Recursion or not we need the chain in order to supply 3209 * the bref. 3210 */ 3211 hammer2_chain_ref(chain); 3212 hammer2_spin_unex(&parent->core.spin); 3213 hammer2_chain_lock(chain, how); 3214 } 3215 if (chain && 3216 (bcmp(bref, &chain->bref, sizeof(*bref)) || 3217 chain->parent != parent)) { 3218 hammer2_chain_unlock(chain); 3219 hammer2_chain_drop(chain); 3220 chain = NULL; 3221 goto again; 3222 } 3223 3224 /* 3225 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 3226 * 3227 * NOTE: chain's key range is not relevant as there might be 3228 * one-offs within the range that are not deleted. 3229 * 3230 * NOTE: XXX this could create problems with scans used in 3231 * situations other than mount-time recovery. 3232 * 3233 * NOTE: Lookups can race delete-duplicate because 3234 * delete-duplicate does not lock the parent's core 3235 * (they just use the spinlock on the core). 3236 */ 3237 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3238 hammer2_chain_unlock(chain); 3239 hammer2_chain_drop(chain); 3240 chain = NULL; 3241 3242 key = next_key; 3243 if (key == 0) { 3244 error |= HAMMER2_ERROR_EOF; 3245 goto done; 3246 } 3247 goto again; 3248 } 3249 3250 done: 3251 /* 3252 * All done, return the bref or NULL, supply chain if necessary. 3253 */ 3254 if (chain) 3255 *chainp = chain; 3256 return (error); 3257 } 3258 3259 /* 3260 * Create and return a new hammer2 system memory structure of the specified 3261 * key, type and size and insert it under (*parentp). This is a full 3262 * insertion, based on the supplied key/keybits, and may involve creating 3263 * indirect blocks and moving other chains around via delete/duplicate. 3264 * 3265 * This call can be made with parent == NULL as long as a non -1 methods 3266 * is supplied. hmp must also be supplied in this situation (otherwise 3267 * hmp is extracted from the supplied parent). The chain will be detached 3268 * from the topology. A later call with both parent and chain can be made 3269 * to attach it. 3270 * 3271 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 3272 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3273 * FULL. This typically means that the caller is creating the chain after 3274 * doing a hammer2_chain_lookup(). 3275 * 3276 * (*parentp) must be exclusive locked and may be replaced on return 3277 * depending on how much work the function had to do. 3278 * 3279 * (*parentp) must not be errored or this function will assert. 3280 * 3281 * (*chainp) usually starts out NULL and returns the newly created chain, 3282 * but if the caller desires the caller may allocate a disconnected chain 3283 * and pass it in instead. 3284 * 3285 * This function should NOT be used to insert INDIRECT blocks. It is 3286 * typically used to create/insert inodes and data blocks. 3287 * 3288 * Caller must pass-in an exclusively locked parent the new chain is to 3289 * be inserted under, and optionally pass-in a disconnected, exclusively 3290 * locked chain to insert (else we create a new chain). The function will 3291 * adjust (*parentp) as necessary, create or connect the chain, and 3292 * return an exclusively locked chain in *chainp. 3293 * 3294 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 3295 * and will be reassigned. 3296 * 3297 * NOTE: returns HAMMER_ERROR_* flags 3298 */ 3299 int 3300 hammer2_chain_create(hammer2_chain_t **parentp, hammer2_chain_t **chainp, 3301 hammer2_dev_t *hmp, hammer2_pfs_t *pmp, int methods, 3302 hammer2_key_t key, int keybits, int type, size_t bytes, 3303 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 3304 { 3305 hammer2_chain_t *chain; 3306 hammer2_chain_t *parent; 3307 hammer2_blockref_t *base; 3308 hammer2_blockref_t dummy; 3309 int allocated = 0; 3310 int error = 0; 3311 int count; 3312 int maxloops = 300000; 3313 3314 /* 3315 * Topology may be crossing a PFS boundary. 3316 */ 3317 parent = *parentp; 3318 if (parent) { 3319 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3320 KKASSERT(parent->error == 0); 3321 hmp = parent->hmp; 3322 } 3323 chain = *chainp; 3324 3325 if (chain == NULL) { 3326 /* 3327 * First allocate media space and construct the dummy bref, 3328 * then allocate the in-memory chain structure. Set the 3329 * INITIAL flag for fresh chains which do not have embedded 3330 * data. 3331 * 3332 * XXX for now set the check mode of the child based on 3333 * the parent or, if the parent is an inode, the 3334 * specification in the inode. 3335 */ 3336 bzero(&dummy, sizeof(dummy)); 3337 dummy.type = type; 3338 dummy.key = key; 3339 dummy.keybits = keybits; 3340 dummy.data_off = hammer2_getradix(bytes); 3341 3342 /* 3343 * Inherit methods from parent by default. Primarily used 3344 * for BREF_TYPE_DATA. Non-data types *must* be set to 3345 * a non-NONE check algorithm. 3346 */ 3347 if (methods == -1) 3348 dummy.methods = parent->bref.methods; 3349 else 3350 dummy.methods = (uint8_t)methods; 3351 3352 if (type != HAMMER2_BREF_TYPE_DATA && 3353 HAMMER2_DEC_CHECK(dummy.methods) == HAMMER2_CHECK_NONE) { 3354 dummy.methods |= 3355 HAMMER2_ENC_CHECK(HAMMER2_CHECK_DEFAULT); 3356 } 3357 3358 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 3359 3360 /* 3361 * Lock the chain manually, chain_lock will load the chain 3362 * which we do NOT want to do. (note: chain->refs is set 3363 * to 1 by chain_alloc() for us, but lockcnt is not). 3364 */ 3365 chain->lockcnt = 1; 3366 hammer2_mtx_ex(&chain->lock); 3367 allocated = 1; 3368 ++curthread->td_tracker; 3369 3370 /* 3371 * Set INITIAL to optimize I/O. The flag will generally be 3372 * processed when we call hammer2_chain_modify(). 3373 * 3374 * Recalculate bytes to reflect the actual media block 3375 * allocation. Handle special case radix 0 == 0 bytes. 3376 */ 3377 bytes = (size_t)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX); 3378 if (bytes) 3379 bytes = (hammer2_off_t)1 << bytes; 3380 chain->bytes = bytes; 3381 3382 switch(type) { 3383 case HAMMER2_BREF_TYPE_VOLUME: 3384 case HAMMER2_BREF_TYPE_FREEMAP: 3385 panic("hammer2_chain_create: called with volume type"); 3386 break; 3387 case HAMMER2_BREF_TYPE_INDIRECT: 3388 panic("hammer2_chain_create: cannot be used to" 3389 "create indirect block"); 3390 break; 3391 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3392 panic("hammer2_chain_create: cannot be used to" 3393 "create freemap root or node"); 3394 break; 3395 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3396 KKASSERT(bytes == sizeof(chain->data->bmdata)); 3397 /* fall through */ 3398 case HAMMER2_BREF_TYPE_DIRENT: 3399 case HAMMER2_BREF_TYPE_INODE: 3400 case HAMMER2_BREF_TYPE_DATA: 3401 default: 3402 /* 3403 * leave chain->data NULL, set INITIAL 3404 */ 3405 KKASSERT(chain->data == NULL); 3406 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 3407 break; 3408 } 3409 } else { 3410 /* 3411 * We are reattaching a previously deleted chain, possibly 3412 * under a new parent and possibly with a new key/keybits. 3413 * The chain does not have to be in a modified state. The 3414 * UPDATE flag will be set later on in this routine. 3415 * 3416 * Do NOT mess with the current state of the INITIAL flag. 3417 */ 3418 chain->bref.key = key; 3419 chain->bref.keybits = keybits; 3420 if (chain->flags & HAMMER2_CHAIN_DELETED) 3421 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3422 KKASSERT(chain->parent == NULL); 3423 } 3424 3425 /* 3426 * Set the appropriate bref flag if requested. 3427 * 3428 * NOTE! Callers can call this function to move chains without 3429 * knowing about special flags, so don't clear bref flags 3430 * here! 3431 */ 3432 if (flags & HAMMER2_INSERT_PFSROOT) 3433 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 3434 3435 if (parent == NULL) 3436 goto skip; 3437 3438 /* 3439 * Calculate how many entries we have in the blockref array and 3440 * determine if an indirect block is required when inserting into 3441 * the parent. 3442 */ 3443 again: 3444 if (--maxloops == 0) 3445 panic("hammer2_chain_create: maxloops"); 3446 3447 switch(parent->bref.type) { 3448 case HAMMER2_BREF_TYPE_INODE: 3449 if ((parent->data->ipdata.meta.op_flags & 3450 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 3451 kprintf("hammer2: parent set for direct-data! " 3452 "pkey=%016jx ckey=%016jx\n", 3453 parent->bref.key, 3454 chain->bref.key); 3455 } 3456 KKASSERT((parent->data->ipdata.meta.op_flags & 3457 HAMMER2_OPFLAG_DIRECTDATA) == 0); 3458 KKASSERT(parent->data != NULL); 3459 base = &parent->data->ipdata.u.blockset.blockref[0]; 3460 count = HAMMER2_SET_COUNT; 3461 break; 3462 case HAMMER2_BREF_TYPE_INDIRECT: 3463 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3464 if (parent->flags & HAMMER2_CHAIN_INITIAL) 3465 base = NULL; 3466 else 3467 base = &parent->data->npdata[0]; 3468 count = parent->bytes / sizeof(hammer2_blockref_t); 3469 break; 3470 case HAMMER2_BREF_TYPE_VOLUME: 3471 KKASSERT(parent->data != NULL); 3472 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3473 count = HAMMER2_SET_COUNT; 3474 break; 3475 case HAMMER2_BREF_TYPE_FREEMAP: 3476 KKASSERT(parent->data != NULL); 3477 base = &parent->data->blkset.blockref[0]; 3478 count = HAMMER2_SET_COUNT; 3479 break; 3480 default: 3481 panic("hammer2_chain_create: unrecognized blockref type: %d", 3482 parent->bref.type); 3483 base = NULL; 3484 count = 0; 3485 break; 3486 } 3487 3488 /* 3489 * Make sure we've counted the brefs 3490 */ 3491 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3492 hammer2_chain_countbrefs(parent, base, count); 3493 3494 KASSERT(parent->core.live_count >= 0 && 3495 parent->core.live_count <= count, 3496 ("bad live_count %d/%d (%02x, %d)", 3497 parent->core.live_count, count, 3498 parent->bref.type, parent->bytes)); 3499 3500 /* 3501 * If no free blockref could be found we must create an indirect 3502 * block and move a number of blockrefs into it. With the parent 3503 * locked we can safely lock each child in order to delete+duplicate 3504 * it without causing a deadlock. 3505 * 3506 * This may return the new indirect block or the old parent depending 3507 * on where the key falls. NULL is returned on error. 3508 */ 3509 if (parent->core.live_count == count) { 3510 hammer2_chain_t *nparent; 3511 3512 KKASSERT((flags & HAMMER2_INSERT_SAMEPARENT) == 0); 3513 3514 nparent = hammer2_chain_create_indirect(parent, key, keybits, 3515 mtid, type, &error); 3516 if (nparent == NULL) { 3517 if (allocated) 3518 hammer2_chain_drop(chain); 3519 chain = NULL; 3520 goto done; 3521 } 3522 if (parent != nparent) { 3523 hammer2_chain_unlock(parent); 3524 hammer2_chain_drop(parent); 3525 parent = *parentp = nparent; 3526 } 3527 goto again; 3528 } 3529 3530 /* 3531 * fall through if parent, or skip to here if no parent. 3532 */ 3533 skip: 3534 if (chain->flags & HAMMER2_CHAIN_DELETED) 3535 kprintf("Inserting deleted chain @%016jx\n", 3536 chain->bref.key); 3537 3538 /* 3539 * Link the chain into its parent. 3540 */ 3541 if (chain->parent != NULL) 3542 panic("hammer2: hammer2_chain_create: chain already connected"); 3543 KKASSERT(chain->parent == NULL); 3544 if (parent) { 3545 KKASSERT(parent->core.live_count < count); 3546 hammer2_chain_insert(parent, chain, 3547 HAMMER2_CHAIN_INSERT_SPIN | 3548 HAMMER2_CHAIN_INSERT_LIVE, 3549 0); 3550 } 3551 3552 if (allocated) { 3553 /* 3554 * Mark the newly created chain modified. This will cause 3555 * UPDATE to be set and process the INITIAL flag. 3556 * 3557 * Device buffers are not instantiated for DATA elements 3558 * as these are handled by logical buffers. 3559 * 3560 * Indirect and freemap node indirect blocks are handled 3561 * by hammer2_chain_create_indirect() and not by this 3562 * function. 3563 * 3564 * Data for all other bref types is expected to be 3565 * instantiated (INODE, LEAF). 3566 */ 3567 switch(chain->bref.type) { 3568 case HAMMER2_BREF_TYPE_DATA: 3569 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3570 case HAMMER2_BREF_TYPE_DIRENT: 3571 case HAMMER2_BREF_TYPE_INODE: 3572 error = hammer2_chain_modify(chain, mtid, dedup_off, 3573 HAMMER2_MODIFY_OPTDATA); 3574 break; 3575 default: 3576 /* 3577 * Remaining types are not supported by this function. 3578 * In particular, INDIRECT and LEAF_NODE types are 3579 * handled by create_indirect(). 3580 */ 3581 panic("hammer2_chain_create: bad type: %d", 3582 chain->bref.type); 3583 /* NOT REACHED */ 3584 break; 3585 } 3586 } else { 3587 /* 3588 * When reconnecting a chain we must set UPDATE and 3589 * setflush so the flush recognizes that it must update 3590 * the bref in the parent. 3591 */ 3592 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 3593 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3594 } 3595 3596 /* 3597 * We must setflush(parent) to ensure that it recurses through to 3598 * chain. setflush(chain) might not work because ONFLUSH is possibly 3599 * already set in the chain (so it won't recurse up to set it in the 3600 * parent). 3601 */ 3602 if (parent) 3603 hammer2_chain_setflush(parent); 3604 3605 done: 3606 *chainp = chain; 3607 3608 return (error); 3609 } 3610 3611 /* 3612 * Move the chain from its old parent to a new parent. The chain must have 3613 * already been deleted or already disconnected (or never associated) with 3614 * a parent. The chain is reassociated with the new parent and the deleted 3615 * flag will be cleared (no longer deleted). The chain's modification state 3616 * is not altered. 3617 * 3618 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 3619 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3620 * FULL. This typically means that the caller is creating the chain after 3621 * doing a hammer2_chain_lookup(). 3622 * 3623 * Neither (parent) or (chain) can be errored. 3624 * 3625 * If (parent) is non-NULL then the chain is inserted under the parent. 3626 * 3627 * If (parent) is NULL then the newly duplicated chain is not inserted 3628 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 3629 * passing into hammer2_chain_create() after this function returns). 3630 * 3631 * WARNING! This function calls create which means it can insert indirect 3632 * blocks. This can cause other unrelated chains in the parent to 3633 * be moved to a newly inserted indirect block in addition to the 3634 * specific chain. 3635 */ 3636 void 3637 hammer2_chain_rename(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3638 hammer2_tid_t mtid, int flags) 3639 { 3640 hammer2_blockref_t *bref; 3641 hammer2_dev_t *hmp; 3642 hammer2_chain_t *parent; 3643 size_t bytes; 3644 3645 /* 3646 * WARNING! We should never resolve DATA to device buffers 3647 * (XXX allow it if the caller did?), and since 3648 * we currently do not have the logical buffer cache 3649 * buffer in-hand to fix its cached physical offset 3650 * we also force the modify code to not COW it. XXX 3651 * 3652 * NOTE! We allow error'd chains to be renamed. The bref itself 3653 * is good and can be renamed. The content, however, may 3654 * be inaccessible. 3655 */ 3656 hmp = chain->hmp; 3657 KKASSERT(chain->parent == NULL); 3658 /*KKASSERT(chain->error == 0); allow */ 3659 3660 /* 3661 * Now create a duplicate of the chain structure, associating 3662 * it with the same core, making it the same size, pointing it 3663 * to the same bref (the same media block). 3664 * 3665 * NOTE: Handle special radix == 0 case (means 0 bytes). 3666 */ 3667 bref = &chain->bref; 3668 bytes = (size_t)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 3669 if (bytes) 3670 bytes = (hammer2_off_t)1 << bytes; 3671 3672 /* 3673 * If parent is not NULL the duplicated chain will be entered under 3674 * the parent and the UPDATE bit set to tell flush to update 3675 * the blockref. 3676 * 3677 * We must setflush(parent) to ensure that it recurses through to 3678 * chain. setflush(chain) might not work because ONFLUSH is possibly 3679 * already set in the chain (so it won't recurse up to set it in the 3680 * parent). 3681 * 3682 * Having both chains locked is extremely important for atomicy. 3683 */ 3684 if (parentp && (parent = *parentp) != NULL) { 3685 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3686 KKASSERT(parent->refs > 0); 3687 KKASSERT(parent->error == 0); 3688 3689 hammer2_chain_create(parentp, &chain, NULL, chain->pmp, 3690 HAMMER2_METH_DEFAULT, 3691 bref->key, bref->keybits, bref->type, 3692 chain->bytes, mtid, 0, flags); 3693 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 3694 hammer2_chain_setflush(*parentp); 3695 } 3696 } 3697 3698 /* 3699 * This works in tandem with delete_obref() to install a blockref in 3700 * (typically) an indirect block that is associated with the chain being 3701 * moved to *parentp. 3702 * 3703 * The reason we need this function is that the caller needs to maintain 3704 * the blockref as it was, and not generate a new blockref for what might 3705 * be a modified chain. Otherwise stuff will leak into the flush that 3706 * the flush code's FLUSH_INODE_STOP flag is unable to catch. 3707 * 3708 * It is EXTREMELY important that we properly set CHAIN_BMAPUPD and 3709 * CHAIN_UPDATE. We must set BMAPUPD if the bref does not match, and 3710 * we must clear CHAIN_UPDATE (that was likely set by the chain_rename) if 3711 * it does. Otherwise we can end up in a situation where H2 is unable to 3712 * clean up the in-memory chain topology. 3713 * 3714 * The reason for this is that flushes do not generally flush through 3715 * BREF_TYPE_INODE chains and depend on a hammer2_inode_t queued to syncq 3716 * or sideq to properly flush and dispose of the related inode chain's flags. 3717 * Situations where the inode is not actually modified by the frontend, 3718 * but where we have to move the related chains around as we insert or cleanup 3719 * indirect blocks, can leave us with a 'dirty' (non-disposable) in-memory 3720 * inode chain that does not have a hammer2_inode_t associated with it. 3721 */ 3722 void 3723 hammer2_chain_rename_obref(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3724 hammer2_tid_t mtid, int flags, 3725 hammer2_blockref_t *obref) 3726 { 3727 hammer2_chain_rename(parentp, chain, mtid, flags); 3728 3729 if (obref->type) { 3730 hammer2_blockref_t *tbase; 3731 int tcount; 3732 3733 KKASSERT((chain->flags & HAMMER2_CHAIN_BMAPPED) == 0); 3734 hammer2_chain_modify(*parentp, mtid, 0, 0); 3735 tbase = hammer2_chain_base_and_count(*parentp, &tcount); 3736 hammer2_base_insert(*parentp, tbase, tcount, chain, obref); 3737 if (bcmp(obref, &chain->bref, sizeof(chain->bref))) { 3738 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD | 3739 HAMMER2_CHAIN_UPDATE); 3740 } else { 3741 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3742 } 3743 } 3744 } 3745 3746 /* 3747 * Helper function for deleting chains. 3748 * 3749 * The chain is removed from the live view (the RBTREE) as well as the parent's 3750 * blockmap. Both chain and its parent must be locked. 3751 * 3752 * parent may not be errored. chain can be errored. 3753 */ 3754 static int 3755 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 3756 hammer2_tid_t mtid, int flags, 3757 hammer2_blockref_t *obref) 3758 { 3759 hammer2_dev_t *hmp; 3760 int error = 0; 3761 3762 KKASSERT((chain->flags & (HAMMER2_CHAIN_DELETED | 3763 HAMMER2_CHAIN_FICTITIOUS)) == 0); 3764 KKASSERT(chain->parent == parent); 3765 hmp = chain->hmp; 3766 3767 if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 3768 /* 3769 * Chain is blockmapped, so there must be a parent. 3770 * Atomically remove the chain from the parent and remove 3771 * the blockmap entry. The parent must be set modified 3772 * to remove the blockmap entry. 3773 */ 3774 hammer2_blockref_t *base; 3775 int count; 3776 3777 KKASSERT(parent != NULL); 3778 KKASSERT(parent->error == 0); 3779 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3780 error = hammer2_chain_modify(parent, mtid, 0, 0); 3781 if (error) 3782 goto done; 3783 3784 /* 3785 * Calculate blockmap pointer 3786 */ 3787 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 3788 hammer2_spin_ex(&chain->core.spin); 3789 hammer2_spin_ex(&parent->core.spin); 3790 3791 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3792 atomic_add_int(&parent->core.live_count, -1); 3793 ++parent->core.generation; 3794 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3795 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3796 --parent->core.chain_count; 3797 chain->parent = NULL; 3798 3799 switch(parent->bref.type) { 3800 case HAMMER2_BREF_TYPE_INODE: 3801 /* 3802 * Access the inode's block array. However, there 3803 * is no block array if the inode is flagged 3804 * DIRECTDATA. 3805 */ 3806 if (parent->data && 3807 (parent->data->ipdata.meta.op_flags & 3808 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 3809 base = 3810 &parent->data->ipdata.u.blockset.blockref[0]; 3811 } else { 3812 base = NULL; 3813 } 3814 count = HAMMER2_SET_COUNT; 3815 break; 3816 case HAMMER2_BREF_TYPE_INDIRECT: 3817 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3818 if (parent->data) 3819 base = &parent->data->npdata[0]; 3820 else 3821 base = NULL; 3822 count = parent->bytes / sizeof(hammer2_blockref_t); 3823 break; 3824 case HAMMER2_BREF_TYPE_VOLUME: 3825 base = &parent->data->voldata. 3826 sroot_blockset.blockref[0]; 3827 count = HAMMER2_SET_COUNT; 3828 break; 3829 case HAMMER2_BREF_TYPE_FREEMAP: 3830 base = &parent->data->blkset.blockref[0]; 3831 count = HAMMER2_SET_COUNT; 3832 break; 3833 default: 3834 base = NULL; 3835 count = 0; 3836 panic("hammer2_flush_pass2: " 3837 "unrecognized blockref type: %d", 3838 parent->bref.type); 3839 } 3840 3841 /* 3842 * delete blockmapped chain from its parent. 3843 * 3844 * The parent is not affected by any statistics in chain 3845 * which are pending synchronization. That is, there is 3846 * nothing to undo in the parent since they have not yet 3847 * been incorporated into the parent. 3848 * 3849 * The parent is affected by statistics stored in inodes. 3850 * Those have already been synchronized, so they must be 3851 * undone. XXX split update possible w/delete in middle? 3852 */ 3853 if (base) { 3854 hammer2_base_delete(parent, base, count, chain, obref); 3855 } 3856 hammer2_spin_unex(&parent->core.spin); 3857 hammer2_spin_unex(&chain->core.spin); 3858 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 3859 /* 3860 * Chain is not blockmapped but a parent is present. 3861 * Atomically remove the chain from the parent. There is 3862 * no blockmap entry to remove. 3863 * 3864 * Because chain was associated with a parent but not 3865 * synchronized, the chain's *_count_up fields contain 3866 * inode adjustment statistics which must be undone. 3867 */ 3868 hammer2_spin_ex(&chain->core.spin); 3869 hammer2_spin_ex(&parent->core.spin); 3870 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3871 atomic_add_int(&parent->core.live_count, -1); 3872 ++parent->core.generation; 3873 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3874 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3875 --parent->core.chain_count; 3876 chain->parent = NULL; 3877 hammer2_spin_unex(&parent->core.spin); 3878 hammer2_spin_unex(&chain->core.spin); 3879 } else { 3880 /* 3881 * Chain is not blockmapped and has no parent. This 3882 * is a degenerate case. 3883 */ 3884 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3885 } 3886 done: 3887 return error; 3888 } 3889 3890 /* 3891 * Create an indirect block that covers one or more of the elements in the 3892 * current parent. Either returns the existing parent with no locking or 3893 * ref changes or returns the new indirect block locked and referenced 3894 * and leaving the original parent lock/ref intact as well. 3895 * 3896 * If an error occurs, NULL is returned and *errorp is set to the H2 error. 3897 * 3898 * The returned chain depends on where the specified key falls. 3899 * 3900 * The key/keybits for the indirect mode only needs to follow three rules: 3901 * 3902 * (1) That all elements underneath it fit within its key space and 3903 * 3904 * (2) That all elements outside it are outside its key space. 3905 * 3906 * (3) When creating the new indirect block any elements in the current 3907 * parent that fit within the new indirect block's keyspace must be 3908 * moved into the new indirect block. 3909 * 3910 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3911 * keyspace the the current parent, but lookup/iteration rules will 3912 * ensure (and must ensure) that rule (2) for all parents leading up 3913 * to the nearest inode or the root volume header is adhered to. This 3914 * is accomplished by always recursing through matching keyspaces in 3915 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3916 * 3917 * The current implementation calculates the current worst-case keyspace by 3918 * iterating the current parent and then divides it into two halves, choosing 3919 * whichever half has the most elements (not necessarily the half containing 3920 * the requested key). 3921 * 3922 * We can also opt to use the half with the least number of elements. This 3923 * causes lower-numbered keys (aka logical file offsets) to recurse through 3924 * fewer indirect blocks and higher-numbered keys to recurse through more. 3925 * This also has the risk of not moving enough elements to the new indirect 3926 * block and being forced to create several indirect blocks before the element 3927 * can be inserted. 3928 * 3929 * Must be called with an exclusively locked parent. 3930 * 3931 * NOTE: *errorp set to HAMMER_ERROR_* flags 3932 */ 3933 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3934 hammer2_key_t *keyp, int keybits, 3935 hammer2_blockref_t *base, int count); 3936 static int hammer2_chain_indkey_file(hammer2_chain_t *parent, 3937 hammer2_key_t *keyp, int keybits, 3938 hammer2_blockref_t *base, int count, 3939 int ncount); 3940 static int hammer2_chain_indkey_dir(hammer2_chain_t *parent, 3941 hammer2_key_t *keyp, int keybits, 3942 hammer2_blockref_t *base, int count, 3943 int ncount); 3944 static 3945 hammer2_chain_t * 3946 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3947 hammer2_key_t create_key, int create_bits, 3948 hammer2_tid_t mtid, int for_type, int *errorp) 3949 { 3950 hammer2_dev_t *hmp; 3951 hammer2_blockref_t *base; 3952 hammer2_blockref_t *bref; 3953 hammer2_blockref_t bcopy; 3954 hammer2_chain_t *chain; 3955 hammer2_chain_t *ichain; 3956 hammer2_chain_t dummy; 3957 hammer2_key_t key = create_key; 3958 hammer2_key_t key_beg; 3959 hammer2_key_t key_end; 3960 hammer2_key_t key_next; 3961 int keybits = create_bits; 3962 int count; 3963 int ncount; 3964 int nbytes; 3965 int loops; 3966 int error; 3967 int reason; 3968 int generation; 3969 int maxloops = 300000; 3970 3971 /* 3972 * Calculate the base blockref pointer or NULL if the chain 3973 * is known to be empty. We need to calculate the array count 3974 * for RB lookups either way. 3975 */ 3976 hmp = parent->hmp; 3977 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3978 3979 /* 3980 * Pre-modify the parent now to avoid having to deal with error 3981 * processing if we tried to later (in the middle of our loop). 3982 * 3983 * We are going to be moving bref's around, the indirect blocks 3984 * cannot be in an initial state. Do not pass MODIFY_OPTDATA. 3985 */ 3986 *errorp = hammer2_chain_modify(parent, mtid, 0, 0); 3987 if (*errorp) { 3988 kprintf("hammer2_create_indirect: error %08x %s\n", 3989 *errorp, hammer2_error_str(*errorp)); 3990 return NULL; 3991 } 3992 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3993 3994 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 3995 base = hammer2_chain_base_and_count(parent, &count); 3996 3997 /* 3998 * dummy used in later chain allocation (no longer used for lookups). 3999 */ 4000 bzero(&dummy, sizeof(dummy)); 4001 4002 /* 4003 * How big should our new indirect block be? It has to be at least 4004 * as large as its parent for splits to work properly. 4005 * 4006 * The freemap uses a specific indirect block size. The number of 4007 * levels are built dynamically and ultimately depend on the size 4008 * volume. Because freemap blocks are taken from the reserved areas 4009 * of the volume our goal is efficiency (fewer levels) and not so 4010 * much to save disk space. 4011 * 4012 * The first indirect block level for a directory usually uses 4013 * HAMMER2_IND_BYTES_MIN (4KB = 32 directory entries). Due to 4014 * the hash mechanism, this typically gives us a nominal 4015 * 32 * 4 entries with one level of indirection. 4016 * 4017 * We use HAMMER2_IND_BYTES_NOM (16KB = 128 blockrefs) for FILE 4018 * indirect blocks. The initial 4 entries in the inode gives us 4019 * 256KB. Up to 4 indirect blocks gives us 32MB. Three levels 4020 * of indirection gives us 137GB, and so forth. H2 can support 4021 * huge file sizes but they are not typical, so we try to stick 4022 * with compactness and do not use a larger indirect block size. 4023 * 4024 * We could use 64KB (PBUFSIZE), giving us 512 blockrefs, but 4025 * due to the way indirect blocks are created this usually winds 4026 * up being extremely inefficient for small files. Even though 4027 * 16KB requires more levels of indirection for very large files, 4028 * the 16KB records can be ganged together into 64KB DIOs. 4029 */ 4030 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 4031 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 4032 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 4033 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4034 if (parent->data->ipdata.meta.type == 4035 HAMMER2_OBJTYPE_DIRECTORY) 4036 nbytes = HAMMER2_IND_BYTES_MIN; /* 4KB = 32 entries */ 4037 else 4038 nbytes = HAMMER2_IND_BYTES_NOM; /* 16KB = ~8MB file */ 4039 4040 } else { 4041 nbytes = HAMMER2_IND_BYTES_NOM; 4042 } 4043 if (nbytes < count * sizeof(hammer2_blockref_t)) { 4044 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 4045 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 4046 nbytes = count * sizeof(hammer2_blockref_t); 4047 } 4048 ncount = nbytes / sizeof(hammer2_blockref_t); 4049 4050 /* 4051 * When creating an indirect block for a freemap node or leaf 4052 * the key/keybits must be fitted to static radix levels because 4053 * particular radix levels use particular reserved blocks in the 4054 * related zone. 4055 * 4056 * This routine calculates the key/radix of the indirect block 4057 * we need to create, and whether it is on the high-side or the 4058 * low-side. 4059 */ 4060 switch(for_type) { 4061 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 4062 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 4063 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 4064 base, count); 4065 break; 4066 case HAMMER2_BREF_TYPE_DATA: 4067 keybits = hammer2_chain_indkey_file(parent, &key, keybits, 4068 base, count, ncount); 4069 break; 4070 case HAMMER2_BREF_TYPE_DIRENT: 4071 case HAMMER2_BREF_TYPE_INODE: 4072 keybits = hammer2_chain_indkey_dir(parent, &key, keybits, 4073 base, count, ncount); 4074 break; 4075 default: 4076 panic("illegal indirect block for bref type %d", for_type); 4077 break; 4078 } 4079 4080 /* 4081 * Normalize the key for the radix being represented, keeping the 4082 * high bits and throwing away the low bits. 4083 */ 4084 key &= ~(((hammer2_key_t)1 << keybits) - 1); 4085 4086 /* 4087 * Ok, create our new indirect block 4088 */ 4089 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 4090 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 4091 dummy.bref.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 4092 } else { 4093 dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT; 4094 } 4095 dummy.bref.key = key; 4096 dummy.bref.keybits = keybits; 4097 dummy.bref.data_off = hammer2_getradix(nbytes); 4098 dummy.bref.methods = 4099 HAMMER2_ENC_CHECK(HAMMER2_DEC_CHECK(parent->bref.methods)) | 4100 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 4101 4102 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy.bref); 4103 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 4104 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 4105 /* ichain has one ref at this point */ 4106 4107 /* 4108 * We have to mark it modified to allocate its block, but use 4109 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 4110 * it won't be acted upon by the flush code. 4111 * 4112 * XXX remove OPTDATA, we need a fully initialized indirect block to 4113 * be able to move the original blockref. 4114 */ 4115 *errorp = hammer2_chain_modify(ichain, mtid, 0, 0); 4116 if (*errorp) { 4117 kprintf("hammer2_alloc_indirect: error %08x %s\n", 4118 *errorp, hammer2_error_str(*errorp)); 4119 hammer2_chain_unlock(ichain); 4120 hammer2_chain_drop(ichain); 4121 return NULL; 4122 } 4123 KKASSERT((ichain->flags & HAMMER2_CHAIN_INITIAL) == 0); 4124 4125 /* 4126 * Iterate the original parent and move the matching brefs into 4127 * the new indirect block. 4128 * 4129 * XXX handle flushes. 4130 */ 4131 key_beg = 0; 4132 key_end = HAMMER2_KEY_MAX; 4133 key_next = 0; /* avoid gcc warnings */ 4134 hammer2_spin_ex(&parent->core.spin); 4135 loops = 0; 4136 reason = 0; 4137 4138 for (;;) { 4139 /* 4140 * Parent may have been modified, relocating its block array. 4141 * Reload the base pointer. 4142 */ 4143 base = hammer2_chain_base_and_count(parent, &count); 4144 4145 if (++loops > 100000) { 4146 hammer2_spin_unex(&parent->core.spin); 4147 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 4148 reason, parent, base, count, key_next); 4149 } 4150 4151 /* 4152 * NOTE: spinlock stays intact, returned chain (if not NULL) 4153 * is not referenced or locked which means that we 4154 * cannot safely check its flagged / deletion status 4155 * until we lock it. 4156 */ 4157 chain = hammer2_combined_find(parent, base, count, 4158 &key_next, 4159 key_beg, key_end, 4160 &bref); 4161 generation = parent->core.generation; 4162 if (bref == NULL) 4163 break; 4164 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4165 4166 /* 4167 * Skip keys that are not within the key/radix of the new 4168 * indirect block. They stay in the parent. 4169 */ 4170 if ((~(((hammer2_key_t)1 << keybits) - 1) & 4171 (key ^ bref->key)) != 0) { 4172 goto next_key_spinlocked; 4173 } 4174 4175 /* 4176 * Load the new indirect block by acquiring the related 4177 * chains (potentially from media as it might not be 4178 * in-memory). Then move it to the new parent (ichain). 4179 * 4180 * chain is referenced but not locked. We must lock the 4181 * chain to obtain definitive state. 4182 */ 4183 bcopy = *bref; 4184 if (chain) { 4185 /* 4186 * Use chain already present in the RBTREE 4187 */ 4188 hammer2_chain_ref(chain); 4189 hammer2_spin_unex(&parent->core.spin); 4190 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 4191 } else { 4192 /* 4193 * Get chain for blockref element. _get returns NULL 4194 * on insertion race. 4195 */ 4196 hammer2_spin_unex(&parent->core.spin); 4197 chain = hammer2_chain_get(parent, generation, &bcopy, 4198 HAMMER2_RESOLVE_NEVER); 4199 if (chain == NULL) { 4200 reason = 1; 4201 hammer2_spin_ex(&parent->core.spin); 4202 continue; 4203 } 4204 } 4205 4206 /* 4207 * This is always live so if the chain has been deleted 4208 * we raced someone and we have to retry. 4209 * 4210 * NOTE: Lookups can race delete-duplicate because 4211 * delete-duplicate does not lock the parent's core 4212 * (they just use the spinlock on the core). 4213 * 4214 * (note reversed logic for this one) 4215 */ 4216 if (bcmp(&bcopy, &chain->bref, sizeof(bcopy)) || 4217 chain->parent != parent || 4218 (chain->flags & HAMMER2_CHAIN_DELETED)) { 4219 hammer2_chain_unlock(chain); 4220 hammer2_chain_drop(chain); 4221 if (hammer2_debug & 0x0040) { 4222 kprintf("LOST PARENT RETRY " 4223 "RETRY (%p,%p)->%p %08x\n", 4224 parent, chain->parent, chain, chain->flags); 4225 } 4226 hammer2_spin_ex(&parent->core.spin); 4227 continue; 4228 } 4229 4230 /* 4231 * Shift the chain to the indirect block. 4232 * 4233 * WARNING! No reason for us to load chain data, pass NOSTATS 4234 * to prevent delete/insert from trying to access 4235 * inode stats (and thus asserting if there is no 4236 * chain->data loaded). 4237 * 4238 * WARNING! The (parent, chain) deletion may modify the parent 4239 * and invalidate the base pointer. 4240 * 4241 * WARNING! Parent must already be marked modified, so we 4242 * can assume that chain_delete always suceeds. 4243 * 4244 * WARNING! hammer2_chain_repchange() does not have to be 4245 * called (and doesn't work anyway because we are 4246 * only doing a partial shift). A recursion that is 4247 * in-progress can continue at the current parent 4248 * and will be able to properly find its next key. 4249 */ 4250 error = hammer2_chain_delete_obref(parent, chain, mtid, 0, 4251 &bcopy); 4252 KKASSERT(error == 0); 4253 hammer2_chain_rename_obref(&ichain, chain, mtid, 0, &bcopy); 4254 hammer2_chain_unlock(chain); 4255 hammer2_chain_drop(chain); 4256 KKASSERT(parent->refs > 0); 4257 chain = NULL; 4258 base = NULL; /* safety */ 4259 hammer2_spin_ex(&parent->core.spin); 4260 next_key_spinlocked: 4261 if (--maxloops == 0) 4262 panic("hammer2_chain_create_indirect: maxloops"); 4263 reason = 4; 4264 if (key_next == 0 || key_next > key_end) 4265 break; 4266 key_beg = key_next; 4267 /* loop */ 4268 } 4269 hammer2_spin_unex(&parent->core.spin); 4270 4271 /* 4272 * Insert the new indirect block into the parent now that we've 4273 * cleared out some entries in the parent. We calculated a good 4274 * insertion index in the loop above (ichain->index). 4275 * 4276 * We don't have to set UPDATE here because we mark ichain 4277 * modified down below (so the normal modified -> flush -> set-moved 4278 * sequence applies). 4279 * 4280 * The insertion shouldn't race as this is a completely new block 4281 * and the parent is locked. 4282 */ 4283 base = NULL; /* safety, parent modify may change address */ 4284 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 4285 KKASSERT(parent->core.live_count < count); 4286 hammer2_chain_insert(parent, ichain, 4287 HAMMER2_CHAIN_INSERT_SPIN | 4288 HAMMER2_CHAIN_INSERT_LIVE, 4289 0); 4290 4291 /* 4292 * Make sure flushes propogate after our manual insertion. 4293 */ 4294 hammer2_chain_setflush(ichain); 4295 hammer2_chain_setflush(parent); 4296 4297 /* 4298 * Figure out what to return. 4299 */ 4300 if (~(((hammer2_key_t)1 << keybits) - 1) & 4301 (create_key ^ key)) { 4302 /* 4303 * Key being created is outside the key range, 4304 * return the original parent. 4305 */ 4306 hammer2_chain_unlock(ichain); 4307 hammer2_chain_drop(ichain); 4308 } else { 4309 /* 4310 * Otherwise its in the range, return the new parent. 4311 * (leave both the new and old parent locked). 4312 */ 4313 parent = ichain; 4314 } 4315 4316 return(parent); 4317 } 4318 4319 /* 4320 * Do maintenance on an indirect chain. Both parent and chain are locked. 4321 * 4322 * Returns non-zero if (chain) is deleted, either due to being empty or 4323 * because its children were safely moved into the parent. 4324 */ 4325 int 4326 hammer2_chain_indirect_maintenance(hammer2_chain_t *parent, 4327 hammer2_chain_t *chain) 4328 { 4329 hammer2_blockref_t *chain_base; 4330 hammer2_blockref_t *base; 4331 hammer2_blockref_t *bref; 4332 hammer2_blockref_t bcopy; 4333 hammer2_key_t key_next; 4334 hammer2_key_t key_beg; 4335 hammer2_key_t key_end; 4336 hammer2_chain_t *sub; 4337 int chain_count; 4338 int count; 4339 int error; 4340 int generation; 4341 4342 /* 4343 * Make sure we have an accurate live_count 4344 */ 4345 if ((chain->flags & (HAMMER2_CHAIN_INITIAL | 4346 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4347 base = &chain->data->npdata[0]; 4348 count = chain->bytes / sizeof(hammer2_blockref_t); 4349 hammer2_chain_countbrefs(chain, base, count); 4350 } 4351 4352 /* 4353 * If the indirect block is empty we can delete it. 4354 * (ignore deletion error) 4355 */ 4356 if (chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)) { 4357 hammer2_chain_delete(parent, chain, 4358 chain->bref.modify_tid, 4359 HAMMER2_DELETE_PERMANENT); 4360 hammer2_chain_repchange(parent, chain); 4361 return 1; 4362 } 4363 4364 base = hammer2_chain_base_and_count(parent, &count); 4365 4366 if ((parent->flags & (HAMMER2_CHAIN_INITIAL | 4367 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4368 hammer2_chain_countbrefs(parent, base, count); 4369 } 4370 4371 /* 4372 * Determine if we can collapse chain into parent, calculate 4373 * hysteresis for chain emptiness. 4374 */ 4375 if (parent->core.live_count + chain->core.live_count - 1 > count) 4376 return 0; 4377 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4378 if (chain->core.live_count > chain_count * 3 / 4) 4379 return 0; 4380 4381 /* 4382 * Ok, theoretically we can collapse chain's contents into 4383 * parent. chain is locked, but any in-memory children of chain 4384 * are not. For this to work, we must be able to dispose of any 4385 * in-memory children of chain. 4386 * 4387 * For now require that there are no in-memory children of chain. 4388 * 4389 * WARNING! Both chain and parent must remain locked across this 4390 * entire operation. 4391 */ 4392 4393 /* 4394 * Parent must be marked modified. Don't try to collapse it if we 4395 * can't mark it modified. Once modified, destroy chain to make room 4396 * and to get rid of what will be a conflicting key (this is included 4397 * in the calculation above). Finally, move the children of chain 4398 * into chain's parent. 4399 * 4400 * This order creates an accounting problem for bref.embed.stats 4401 * because we destroy chain before we remove its children. Any 4402 * elements whos blockref is already synchronized will be counted 4403 * twice. To deal with the problem we clean out chain's stats prior 4404 * to deleting it. 4405 */ 4406 error = hammer2_chain_modify(parent, 0, 0, 0); 4407 if (error) { 4408 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4409 hammer2_error_str(error)); 4410 return 0; 4411 } 4412 error = hammer2_chain_modify(chain, chain->bref.modify_tid, 0, 0); 4413 if (error) { 4414 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4415 hammer2_error_str(error)); 4416 return 0; 4417 } 4418 4419 chain->bref.embed.stats.inode_count = 0; 4420 chain->bref.embed.stats.data_count = 0; 4421 error = hammer2_chain_delete(parent, chain, 4422 chain->bref.modify_tid, 4423 HAMMER2_DELETE_PERMANENT); 4424 KKASSERT(error == 0); 4425 4426 /* 4427 * The combined_find call requires core.spin to be held. One would 4428 * think there wouldn't be any conflicts since we hold chain 4429 * exclusively locked, but the caching mechanism for 0-ref children 4430 * does not require a chain lock. 4431 */ 4432 hammer2_spin_ex(&chain->core.spin); 4433 4434 key_next = 0; 4435 key_beg = 0; 4436 key_end = HAMMER2_KEY_MAX; 4437 for (;;) { 4438 chain_base = &chain->data->npdata[0]; 4439 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4440 sub = hammer2_combined_find(chain, chain_base, chain_count, 4441 &key_next, 4442 key_beg, key_end, 4443 &bref); 4444 generation = chain->core.generation; 4445 if (bref == NULL) 4446 break; 4447 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4448 4449 bcopy = *bref; 4450 if (sub) { 4451 hammer2_chain_ref(sub); 4452 hammer2_spin_unex(&chain->core.spin); 4453 hammer2_chain_lock(sub, HAMMER2_RESOLVE_NEVER); 4454 } else { 4455 hammer2_spin_unex(&chain->core.spin); 4456 sub = hammer2_chain_get(chain, generation, &bcopy, 4457 HAMMER2_RESOLVE_NEVER); 4458 if (sub == NULL) { 4459 hammer2_spin_ex(&chain->core.spin); 4460 continue; 4461 } 4462 } 4463 if (bcmp(&bcopy, &sub->bref, sizeof(bcopy)) || 4464 sub->parent != chain || 4465 (sub->flags & HAMMER2_CHAIN_DELETED)) { 4466 hammer2_chain_unlock(sub); 4467 hammer2_chain_drop(sub); 4468 hammer2_spin_ex(&chain->core.spin); 4469 sub = NULL; /* safety */ 4470 continue; 4471 } 4472 error = hammer2_chain_delete_obref(chain, sub, 4473 sub->bref.modify_tid, 0, 4474 &bcopy); 4475 KKASSERT(error == 0); 4476 hammer2_chain_rename_obref(&parent, sub, 4477 sub->bref.modify_tid, 4478 HAMMER2_INSERT_SAMEPARENT, &bcopy); 4479 hammer2_chain_unlock(sub); 4480 hammer2_chain_drop(sub); 4481 hammer2_spin_ex(&chain->core.spin); 4482 4483 if (key_next == 0) 4484 break; 4485 key_beg = key_next; 4486 } 4487 hammer2_spin_unex(&chain->core.spin); 4488 4489 hammer2_chain_repchange(parent, chain); 4490 4491 return 1; 4492 } 4493 4494 /* 4495 * Freemap indirect blocks 4496 * 4497 * Calculate the keybits and highside/lowside of the freemap node the 4498 * caller is creating. 4499 * 4500 * This routine will specify the next higher-level freemap key/radix 4501 * representing the lowest-ordered set. By doing so, eventually all 4502 * low-ordered sets will be moved one level down. 4503 * 4504 * We have to be careful here because the freemap reserves a limited 4505 * number of blocks for a limited number of levels. So we can't just 4506 * push indiscriminately. 4507 */ 4508 int 4509 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 4510 int keybits, hammer2_blockref_t *base, int count) 4511 { 4512 hammer2_chain_t *chain; 4513 hammer2_blockref_t *bref; 4514 hammer2_key_t key; 4515 hammer2_key_t key_beg; 4516 hammer2_key_t key_end; 4517 hammer2_key_t key_next; 4518 int locount; 4519 int hicount; 4520 int maxloops = 300000; 4521 4522 key = *keyp; 4523 locount = 0; 4524 hicount = 0; 4525 keybits = 64; 4526 4527 /* 4528 * Calculate the range of keys in the array being careful to skip 4529 * slots which are overridden with a deletion. 4530 */ 4531 key_beg = 0; 4532 key_end = HAMMER2_KEY_MAX; 4533 hammer2_spin_ex(&parent->core.spin); 4534 4535 for (;;) { 4536 if (--maxloops == 0) { 4537 panic("indkey_freemap shit %p %p:%d\n", 4538 parent, base, count); 4539 } 4540 chain = hammer2_combined_find(parent, base, count, 4541 &key_next, 4542 key_beg, key_end, 4543 &bref); 4544 4545 /* 4546 * Exhausted search 4547 */ 4548 if (bref == NULL) 4549 break; 4550 4551 /* 4552 * Skip deleted chains. 4553 */ 4554 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4555 if (key_next == 0 || key_next > key_end) 4556 break; 4557 key_beg = key_next; 4558 continue; 4559 } 4560 4561 /* 4562 * Use the full live (not deleted) element for the scan 4563 * iteration. HAMMER2 does not allow partial replacements. 4564 * 4565 * XXX should be built into hammer2_combined_find(). 4566 */ 4567 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4568 4569 if (keybits > bref->keybits) { 4570 key = bref->key; 4571 keybits = bref->keybits; 4572 } else if (keybits == bref->keybits && bref->key < key) { 4573 key = bref->key; 4574 } 4575 if (key_next == 0) 4576 break; 4577 key_beg = key_next; 4578 } 4579 hammer2_spin_unex(&parent->core.spin); 4580 4581 /* 4582 * Return the keybits for a higher-level FREEMAP_NODE covering 4583 * this node. 4584 */ 4585 switch(keybits) { 4586 case HAMMER2_FREEMAP_LEVEL0_RADIX: 4587 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 4588 break; 4589 case HAMMER2_FREEMAP_LEVEL1_RADIX: 4590 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 4591 break; 4592 case HAMMER2_FREEMAP_LEVEL2_RADIX: 4593 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 4594 break; 4595 case HAMMER2_FREEMAP_LEVEL3_RADIX: 4596 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 4597 break; 4598 case HAMMER2_FREEMAP_LEVEL4_RADIX: 4599 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 4600 break; 4601 case HAMMER2_FREEMAP_LEVEL5_RADIX: 4602 panic("hammer2_chain_indkey_freemap: level too high"); 4603 break; 4604 default: 4605 panic("hammer2_chain_indkey_freemap: bad radix"); 4606 break; 4607 } 4608 *keyp = key; 4609 4610 return (keybits); 4611 } 4612 4613 /* 4614 * File indirect blocks 4615 * 4616 * Calculate the key/keybits for the indirect block to create by scanning 4617 * existing keys. The key being created is also passed in *keyp and can be 4618 * inside or outside the indirect block. Regardless, the indirect block 4619 * must hold at least two keys in order to guarantee sufficient space. 4620 * 4621 * We use a modified version of the freemap's fixed radix tree, but taylored 4622 * for file data. Basically we configure an indirect block encompassing the 4623 * smallest key. 4624 */ 4625 static int 4626 hammer2_chain_indkey_file(hammer2_chain_t *parent, hammer2_key_t *keyp, 4627 int keybits, hammer2_blockref_t *base, int count, 4628 int ncount) 4629 { 4630 hammer2_chain_t *chain; 4631 hammer2_blockref_t *bref; 4632 hammer2_key_t key; 4633 hammer2_key_t key_beg; 4634 hammer2_key_t key_end; 4635 hammer2_key_t key_next; 4636 int nradix; 4637 int locount; 4638 int hicount; 4639 int maxloops = 300000; 4640 4641 key = *keyp; 4642 locount = 0; 4643 hicount = 0; 4644 keybits = 64; 4645 4646 /* 4647 * Calculate the range of keys in the array being careful to skip 4648 * slots which are overridden with a deletion. 4649 * 4650 * Locate the smallest key. 4651 */ 4652 key_beg = 0; 4653 key_end = HAMMER2_KEY_MAX; 4654 hammer2_spin_ex(&parent->core.spin); 4655 4656 for (;;) { 4657 if (--maxloops == 0) { 4658 panic("indkey_freemap shit %p %p:%d\n", 4659 parent, base, count); 4660 } 4661 chain = hammer2_combined_find(parent, base, count, 4662 &key_next, 4663 key_beg, key_end, 4664 &bref); 4665 4666 /* 4667 * Exhausted search 4668 */ 4669 if (bref == NULL) 4670 break; 4671 4672 /* 4673 * Skip deleted chains. 4674 */ 4675 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4676 if (key_next == 0 || key_next > key_end) 4677 break; 4678 key_beg = key_next; 4679 continue; 4680 } 4681 4682 /* 4683 * Use the full live (not deleted) element for the scan 4684 * iteration. HAMMER2 does not allow partial replacements. 4685 * 4686 * XXX should be built into hammer2_combined_find(). 4687 */ 4688 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4689 4690 if (keybits > bref->keybits) { 4691 key = bref->key; 4692 keybits = bref->keybits; 4693 } else if (keybits == bref->keybits && bref->key < key) { 4694 key = bref->key; 4695 } 4696 if (key_next == 0) 4697 break; 4698 key_beg = key_next; 4699 } 4700 hammer2_spin_unex(&parent->core.spin); 4701 4702 /* 4703 * Calculate the static keybits for a higher-level indirect block 4704 * that contains the key. 4705 */ 4706 *keyp = key; 4707 4708 switch(ncount) { 4709 case HAMMER2_IND_BYTES_MIN / sizeof(hammer2_blockref_t): 4710 nradix = HAMMER2_IND_RADIX_MIN - HAMMER2_BLOCKREF_RADIX; 4711 break; 4712 case HAMMER2_IND_BYTES_NOM / sizeof(hammer2_blockref_t): 4713 nradix = HAMMER2_IND_RADIX_NOM - HAMMER2_BLOCKREF_RADIX; 4714 break; 4715 case HAMMER2_IND_BYTES_MAX / sizeof(hammer2_blockref_t): 4716 nradix = HAMMER2_IND_RADIX_MAX - HAMMER2_BLOCKREF_RADIX; 4717 break; 4718 default: 4719 panic("bad ncount %d\n", ncount); 4720 nradix = 0; 4721 break; 4722 } 4723 4724 /* 4725 * The largest radix that can be returned for an indirect block is 4726 * 63 bits. (The largest practical indirect block radix is actually 4727 * 62 bits because the top-level inode or volume root contains four 4728 * entries, but allow 63 to be returned). 4729 */ 4730 if (nradix >= 64) 4731 nradix = 63; 4732 4733 return keybits + nradix; 4734 } 4735 4736 #if 1 4737 4738 /* 4739 * Directory indirect blocks. 4740 * 4741 * Covers both the inode index (directory of inodes), and directory contents 4742 * (filenames hardlinked to inodes). 4743 * 4744 * Because directory keys are hashed we generally try to cut the space in 4745 * half. We accomodate the inode index (which tends to have linearly 4746 * increasing inode numbers) by ensuring that the keyspace is at least large 4747 * enough to fill up the indirect block being created. 4748 */ 4749 static int 4750 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4751 int keybits, hammer2_blockref_t *base, int count, 4752 int ncount) 4753 { 4754 hammer2_blockref_t *bref; 4755 hammer2_chain_t *chain; 4756 hammer2_key_t key_beg; 4757 hammer2_key_t key_end; 4758 hammer2_key_t key_next; 4759 hammer2_key_t key; 4760 int nkeybits; 4761 int locount; 4762 int hicount; 4763 int maxloops = 300000; 4764 4765 /* 4766 * NOTE: We can't take a shortcut here anymore for inodes because 4767 * the root directory can contain a mix of inodes and directory 4768 * entries (we used to just return 63 if parent->bref.type was 4769 * HAMMER2_BREF_TYPE_INODE. 4770 */ 4771 key = *keyp; 4772 locount = 0; 4773 hicount = 0; 4774 4775 /* 4776 * Calculate the range of keys in the array being careful to skip 4777 * slots which are overridden with a deletion. 4778 */ 4779 key_beg = 0; 4780 key_end = HAMMER2_KEY_MAX; 4781 hammer2_spin_ex(&parent->core.spin); 4782 4783 for (;;) { 4784 if (--maxloops == 0) { 4785 panic("indkey_freemap shit %p %p:%d\n", 4786 parent, base, count); 4787 } 4788 chain = hammer2_combined_find(parent, base, count, 4789 &key_next, 4790 key_beg, key_end, 4791 &bref); 4792 4793 /* 4794 * Exhausted search 4795 */ 4796 if (bref == NULL) 4797 break; 4798 4799 /* 4800 * Deleted object 4801 */ 4802 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4803 if (key_next == 0 || key_next > key_end) 4804 break; 4805 key_beg = key_next; 4806 continue; 4807 } 4808 4809 /* 4810 * Use the full live (not deleted) element for the scan 4811 * iteration. HAMMER2 does not allow partial replacements. 4812 * 4813 * XXX should be built into hammer2_combined_find(). 4814 */ 4815 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4816 4817 /* 4818 * Expand our calculated key range (key, keybits) to fit 4819 * the scanned key. nkeybits represents the full range 4820 * that we will later cut in half (two halves @ nkeybits - 1). 4821 */ 4822 nkeybits = keybits; 4823 if (nkeybits < bref->keybits) { 4824 if (bref->keybits > 64) { 4825 kprintf("bad bref chain %p bref %p\n", 4826 chain, bref); 4827 Debugger("fubar"); 4828 } 4829 nkeybits = bref->keybits; 4830 } 4831 while (nkeybits < 64 && 4832 (~(((hammer2_key_t)1 << nkeybits) - 1) & 4833 (key ^ bref->key)) != 0) { 4834 ++nkeybits; 4835 } 4836 4837 /* 4838 * If the new key range is larger we have to determine 4839 * which side of the new key range the existing keys fall 4840 * under by checking the high bit, then collapsing the 4841 * locount into the hicount or vise-versa. 4842 */ 4843 if (keybits != nkeybits) { 4844 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4845 hicount += locount; 4846 locount = 0; 4847 } else { 4848 locount += hicount; 4849 hicount = 0; 4850 } 4851 keybits = nkeybits; 4852 } 4853 4854 /* 4855 * The newly scanned key will be in the lower half or the 4856 * upper half of the (new) key range. 4857 */ 4858 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4859 ++hicount; 4860 else 4861 ++locount; 4862 4863 if (key_next == 0) 4864 break; 4865 key_beg = key_next; 4866 } 4867 hammer2_spin_unex(&parent->core.spin); 4868 bref = NULL; /* now invalid (safety) */ 4869 4870 /* 4871 * Adjust keybits to represent half of the full range calculated 4872 * above (radix 63 max) for our new indirect block. 4873 */ 4874 --keybits; 4875 4876 /* 4877 * Expand keybits to hold at least ncount elements. ncount will be 4878 * a power of 2. This is to try to completely fill leaf nodes (at 4879 * least for keys which are not hashes). 4880 * 4881 * We aren't counting 'in' or 'out', we are counting 'high side' 4882 * and 'low side' based on the bit at (1LL << keybits). We want 4883 * everything to be inside in these cases so shift it all to 4884 * the low or high side depending on the new high bit. 4885 */ 4886 while (((hammer2_key_t)1 << keybits) < ncount) { 4887 ++keybits; 4888 if (key & ((hammer2_key_t)1 << keybits)) { 4889 hicount += locount; 4890 locount = 0; 4891 } else { 4892 locount += hicount; 4893 hicount = 0; 4894 } 4895 } 4896 4897 if (hicount > locount) 4898 key |= (hammer2_key_t)1 << keybits; 4899 else 4900 key &= ~(hammer2_key_t)1 << keybits; 4901 4902 *keyp = key; 4903 4904 return (keybits); 4905 } 4906 4907 #else 4908 4909 /* 4910 * Directory indirect blocks. 4911 * 4912 * Covers both the inode index (directory of inodes), and directory contents 4913 * (filenames hardlinked to inodes). 4914 * 4915 * Because directory keys are hashed we generally try to cut the space in 4916 * half. We accomodate the inode index (which tends to have linearly 4917 * increasing inode numbers) by ensuring that the keyspace is at least large 4918 * enough to fill up the indirect block being created. 4919 */ 4920 static int 4921 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4922 int keybits, hammer2_blockref_t *base, int count, 4923 int ncount) 4924 { 4925 hammer2_blockref_t *bref; 4926 hammer2_chain_t *chain; 4927 hammer2_key_t key_beg; 4928 hammer2_key_t key_end; 4929 hammer2_key_t key_next; 4930 hammer2_key_t key; 4931 int nkeybits; 4932 int locount; 4933 int hicount; 4934 int maxloops = 300000; 4935 4936 /* 4937 * Shortcut if the parent is the inode. In this situation the 4938 * parent has 4+1 directory entries and we are creating an indirect 4939 * block capable of holding many more. 4940 */ 4941 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4942 return 63; 4943 } 4944 4945 key = *keyp; 4946 locount = 0; 4947 hicount = 0; 4948 4949 /* 4950 * Calculate the range of keys in the array being careful to skip 4951 * slots which are overridden with a deletion. 4952 */ 4953 key_beg = 0; 4954 key_end = HAMMER2_KEY_MAX; 4955 hammer2_spin_ex(&parent->core.spin); 4956 4957 for (;;) { 4958 if (--maxloops == 0) { 4959 panic("indkey_freemap shit %p %p:%d\n", 4960 parent, base, count); 4961 } 4962 chain = hammer2_combined_find(parent, base, count, 4963 &key_next, 4964 key_beg, key_end, 4965 &bref); 4966 4967 /* 4968 * Exhausted search 4969 */ 4970 if (bref == NULL) 4971 break; 4972 4973 /* 4974 * Deleted object 4975 */ 4976 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4977 if (key_next == 0 || key_next > key_end) 4978 break; 4979 key_beg = key_next; 4980 continue; 4981 } 4982 4983 /* 4984 * Use the full live (not deleted) element for the scan 4985 * iteration. HAMMER2 does not allow partial replacements. 4986 * 4987 * XXX should be built into hammer2_combined_find(). 4988 */ 4989 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4990 4991 /* 4992 * Expand our calculated key range (key, keybits) to fit 4993 * the scanned key. nkeybits represents the full range 4994 * that we will later cut in half (two halves @ nkeybits - 1). 4995 */ 4996 nkeybits = keybits; 4997 if (nkeybits < bref->keybits) { 4998 if (bref->keybits > 64) { 4999 kprintf("bad bref chain %p bref %p\n", 5000 chain, bref); 5001 Debugger("fubar"); 5002 } 5003 nkeybits = bref->keybits; 5004 } 5005 while (nkeybits < 64 && 5006 (~(((hammer2_key_t)1 << nkeybits) - 1) & 5007 (key ^ bref->key)) != 0) { 5008 ++nkeybits; 5009 } 5010 5011 /* 5012 * If the new key range is larger we have to determine 5013 * which side of the new key range the existing keys fall 5014 * under by checking the high bit, then collapsing the 5015 * locount into the hicount or vise-versa. 5016 */ 5017 if (keybits != nkeybits) { 5018 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 5019 hicount += locount; 5020 locount = 0; 5021 } else { 5022 locount += hicount; 5023 hicount = 0; 5024 } 5025 keybits = nkeybits; 5026 } 5027 5028 /* 5029 * The newly scanned key will be in the lower half or the 5030 * upper half of the (new) key range. 5031 */ 5032 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 5033 ++hicount; 5034 else 5035 ++locount; 5036 5037 if (key_next == 0) 5038 break; 5039 key_beg = key_next; 5040 } 5041 hammer2_spin_unex(&parent->core.spin); 5042 bref = NULL; /* now invalid (safety) */ 5043 5044 /* 5045 * Adjust keybits to represent half of the full range calculated 5046 * above (radix 63 max) for our new indirect block. 5047 */ 5048 --keybits; 5049 5050 /* 5051 * Expand keybits to hold at least ncount elements. ncount will be 5052 * a power of 2. This is to try to completely fill leaf nodes (at 5053 * least for keys which are not hashes). 5054 * 5055 * We aren't counting 'in' or 'out', we are counting 'high side' 5056 * and 'low side' based on the bit at (1LL << keybits). We want 5057 * everything to be inside in these cases so shift it all to 5058 * the low or high side depending on the new high bit. 5059 */ 5060 while (((hammer2_key_t)1 << keybits) < ncount) { 5061 ++keybits; 5062 if (key & ((hammer2_key_t)1 << keybits)) { 5063 hicount += locount; 5064 locount = 0; 5065 } else { 5066 locount += hicount; 5067 hicount = 0; 5068 } 5069 } 5070 5071 if (hicount > locount) 5072 key |= (hammer2_key_t)1 << keybits; 5073 else 5074 key &= ~(hammer2_key_t)1 << keybits; 5075 5076 *keyp = key; 5077 5078 return (keybits); 5079 } 5080 5081 #endif 5082 5083 /* 5084 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 5085 * it exists. 5086 * 5087 * Both parent and chain must be locked exclusively. 5088 * 5089 * This function will modify the parent if the blockref requires removal 5090 * from the parent's block table. 5091 * 5092 * This function is NOT recursive. Any entity already pushed into the 5093 * chain (such as an inode) may still need visibility into its contents, 5094 * as well as the ability to read and modify the contents. For example, 5095 * for an unlinked file which is still open. 5096 * 5097 * Also note that the flusher is responsible for cleaning up empty 5098 * indirect blocks. 5099 */ 5100 int 5101 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 5102 hammer2_tid_t mtid, int flags) 5103 { 5104 int error = 0; 5105 5106 KKASSERT(hammer2_mtx_owned(&chain->lock)); 5107 5108 /* 5109 * Nothing to do if already marked. 5110 * 5111 * We need the spinlock on the core whos RBTREE contains chain 5112 * to protect against races. 5113 */ 5114 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 5115 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 5116 chain->parent == parent); 5117 error = _hammer2_chain_delete_helper(parent, chain, 5118 mtid, flags, NULL); 5119 } 5120 5121 /* 5122 * Permanent deletions mark the chain as destroyed. 5123 * 5124 * NOTE: We do not setflush the chain unless the deletion is 5125 * permanent, since the deletion of a chain does not actually 5126 * require it to be flushed. 5127 */ 5128 if (error == 0) { 5129 if (flags & HAMMER2_DELETE_PERMANENT) { 5130 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 5131 hammer2_chain_setflush(chain); 5132 } 5133 } 5134 5135 return error; 5136 } 5137 5138 static int 5139 hammer2_chain_delete_obref(hammer2_chain_t *parent, hammer2_chain_t *chain, 5140 hammer2_tid_t mtid, int flags, 5141 hammer2_blockref_t *obref) 5142 { 5143 int error = 0; 5144 5145 KKASSERT(hammer2_mtx_owned(&chain->lock)); 5146 5147 /* 5148 * Nothing to do if already marked. 5149 * 5150 * We need the spinlock on the core whos RBTREE contains chain 5151 * to protect against races. 5152 */ 5153 obref->type = 0; 5154 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 5155 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 5156 chain->parent == parent); 5157 error = _hammer2_chain_delete_helper(parent, chain, 5158 mtid, flags, obref); 5159 } 5160 5161 /* 5162 * Permanent deletions mark the chain as destroyed. 5163 * 5164 * NOTE: We do not setflush the chain unless the deletion is 5165 * permanent, since the deletion of a chain does not actually 5166 * require it to be flushed. 5167 */ 5168 if (error == 0) { 5169 if (flags & HAMMER2_DELETE_PERMANENT) { 5170 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 5171 hammer2_chain_setflush(chain); 5172 } 5173 } 5174 5175 return error; 5176 } 5177 5178 /* 5179 * Returns the index of the nearest element in the blockref array >= elm. 5180 * Returns (count) if no element could be found. 5181 * 5182 * Sets *key_nextp to the next key for loop purposes but does not modify 5183 * it if the next key would be higher than the current value of *key_nextp. 5184 * Note that *key_nexp can overflow to 0, which should be tested by the 5185 * caller. 5186 * 5187 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5188 * held through the operation. 5189 */ 5190 static int 5191 hammer2_base_find(hammer2_chain_t *parent, 5192 hammer2_blockref_t *base, int count, 5193 hammer2_key_t *key_nextp, 5194 hammer2_key_t key_beg, hammer2_key_t key_end) 5195 { 5196 hammer2_blockref_t *scan; 5197 hammer2_key_t scan_end; 5198 int i; 5199 int limit; 5200 5201 /* 5202 * Require the live chain's already have their core's counted 5203 * so we can optimize operations. 5204 */ 5205 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 5206 5207 /* 5208 * Degenerate case 5209 */ 5210 if (count == 0 || base == NULL) 5211 return(count); 5212 5213 /* 5214 * Sequential optimization using parent->cache_index. This is 5215 * the most likely scenario. 5216 * 5217 * We can avoid trailing empty entries on live chains, otherwise 5218 * we might have to check the whole block array. 5219 */ 5220 i = parent->cache_index; /* SMP RACE OK */ 5221 cpu_ccfence(); 5222 limit = parent->core.live_zero; 5223 if (i >= limit) 5224 i = limit - 1; 5225 if (i < 0) 5226 i = 0; 5227 KKASSERT(i < count); 5228 5229 /* 5230 * Search backwards 5231 */ 5232 scan = &base[i]; 5233 while (i > 0 && (scan->type == 0 || scan->key > key_beg)) { 5234 --scan; 5235 --i; 5236 } 5237 parent->cache_index = i; 5238 5239 /* 5240 * Search forwards, stop when we find a scan element which 5241 * encloses the key or until we know that there are no further 5242 * elements. 5243 */ 5244 while (i < count) { 5245 if (scan->type != 0) { 5246 scan_end = scan->key + 5247 ((hammer2_key_t)1 << scan->keybits) - 1; 5248 if (scan->key > key_beg || scan_end >= key_beg) 5249 break; 5250 } 5251 if (i >= limit) 5252 return (count); 5253 ++scan; 5254 ++i; 5255 } 5256 if (i != count) { 5257 parent->cache_index = i; 5258 if (i >= limit) { 5259 i = count; 5260 } else { 5261 scan_end = scan->key + 5262 ((hammer2_key_t)1 << scan->keybits); 5263 if (scan_end && (*key_nextp > scan_end || 5264 *key_nextp == 0)) { 5265 *key_nextp = scan_end; 5266 } 5267 } 5268 } 5269 return (i); 5270 } 5271 5272 /* 5273 * Do a combined search and return the next match either from the blockref 5274 * array or from the in-memory chain. Sets *bresp to the returned bref in 5275 * both cases, or sets it to NULL if the search exhausted. Only returns 5276 * a non-NULL chain if the search matched from the in-memory chain. 5277 * 5278 * When no in-memory chain has been found and a non-NULL bref is returned 5279 * in *bresp. 5280 * 5281 * 5282 * The returned chain is not locked or referenced. Use the returned bref 5283 * to determine if the search exhausted or not. Iterate if the base find 5284 * is chosen but matches a deleted chain. 5285 * 5286 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5287 * held through the operation. 5288 */ 5289 hammer2_chain_t * 5290 hammer2_combined_find(hammer2_chain_t *parent, 5291 hammer2_blockref_t *base, int count, 5292 hammer2_key_t *key_nextp, 5293 hammer2_key_t key_beg, hammer2_key_t key_end, 5294 hammer2_blockref_t **bresp) 5295 { 5296 hammer2_blockref_t *bref; 5297 hammer2_chain_t *chain; 5298 int i; 5299 5300 /* 5301 * Lookup in block array and in rbtree. 5302 */ 5303 *key_nextp = key_end + 1; 5304 i = hammer2_base_find(parent, base, count, key_nextp, 5305 key_beg, key_end); 5306 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 5307 5308 /* 5309 * Neither matched 5310 */ 5311 if (i == count && chain == NULL) { 5312 *bresp = NULL; 5313 return(NULL); 5314 } 5315 5316 /* 5317 * Only chain matched. 5318 */ 5319 if (i == count) { 5320 bref = &chain->bref; 5321 goto found; 5322 } 5323 5324 /* 5325 * Only blockref matched. 5326 */ 5327 if (chain == NULL) { 5328 bref = &base[i]; 5329 goto found; 5330 } 5331 5332 /* 5333 * Both in-memory and blockref matched, select the nearer element. 5334 * 5335 * If both are flush with the left-hand side or both are the 5336 * same distance away, select the chain. In this situation the 5337 * chain must have been loaded from the matching blockmap. 5338 */ 5339 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 5340 chain->bref.key == base[i].key) { 5341 KKASSERT(chain->bref.key == base[i].key); 5342 bref = &chain->bref; 5343 goto found; 5344 } 5345 5346 /* 5347 * Select the nearer key 5348 */ 5349 if (chain->bref.key < base[i].key) { 5350 bref = &chain->bref; 5351 } else { 5352 bref = &base[i]; 5353 chain = NULL; 5354 } 5355 5356 /* 5357 * If the bref is out of bounds we've exhausted our search. 5358 */ 5359 found: 5360 if (bref->key > key_end) { 5361 *bresp = NULL; 5362 chain = NULL; 5363 } else { 5364 *bresp = bref; 5365 } 5366 return(chain); 5367 } 5368 5369 /* 5370 * Locate the specified block array element and delete it. The element 5371 * must exist. 5372 * 5373 * The spin lock on the related chain must be held. 5374 * 5375 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5376 * need to be adjusted when we commit the media change. 5377 */ 5378 void 5379 hammer2_base_delete(hammer2_chain_t *parent, 5380 hammer2_blockref_t *base, int count, 5381 hammer2_chain_t *chain, 5382 hammer2_blockref_t *obref) 5383 { 5384 hammer2_blockref_t *elm = &chain->bref; 5385 hammer2_blockref_t *scan; 5386 hammer2_key_t key_next; 5387 int i; 5388 5389 /* 5390 * Delete element. Expect the element to exist. 5391 * 5392 * XXX see caller, flush code not yet sophisticated enough to prevent 5393 * re-flushed in some cases. 5394 */ 5395 key_next = 0; /* max range */ 5396 i = hammer2_base_find(parent, base, count, &key_next, 5397 elm->key, elm->key); 5398 scan = &base[i]; 5399 if (i == count || scan->type == 0 || 5400 scan->key != elm->key || 5401 ((chain->flags & HAMMER2_CHAIN_BMAPUPD) == 0 && 5402 scan->keybits != elm->keybits)) { 5403 hammer2_spin_unex(&parent->core.spin); 5404 panic("delete base %p element not found at %d/%d elm %p\n", 5405 base, i, count, elm); 5406 return; 5407 } 5408 5409 /* 5410 * Update stats and zero the entry. 5411 * 5412 * NOTE: Handle radix == 0 (0 bytes) case. 5413 */ 5414 if ((int)(scan->data_off & HAMMER2_OFF_MASK_RADIX)) { 5415 parent->bref.embed.stats.data_count -= (hammer2_off_t)1 << 5416 (int)(scan->data_off & HAMMER2_OFF_MASK_RADIX); 5417 } 5418 switch(scan->type) { 5419 case HAMMER2_BREF_TYPE_INODE: 5420 --parent->bref.embed.stats.inode_count; 5421 /* fall through */ 5422 case HAMMER2_BREF_TYPE_DATA: 5423 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5424 atomic_set_int(&chain->flags, 5425 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5426 } else { 5427 if (parent->bref.leaf_count) 5428 --parent->bref.leaf_count; 5429 } 5430 /* fall through */ 5431 case HAMMER2_BREF_TYPE_INDIRECT: 5432 if (scan->type != HAMMER2_BREF_TYPE_DATA) { 5433 parent->bref.embed.stats.data_count -= 5434 scan->embed.stats.data_count; 5435 parent->bref.embed.stats.inode_count -= 5436 scan->embed.stats.inode_count; 5437 } 5438 if (scan->type == HAMMER2_BREF_TYPE_INODE) 5439 break; 5440 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5441 atomic_set_int(&chain->flags, 5442 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5443 } else { 5444 if (parent->bref.leaf_count <= scan->leaf_count) 5445 parent->bref.leaf_count = 0; 5446 else 5447 parent->bref.leaf_count -= scan->leaf_count; 5448 } 5449 break; 5450 case HAMMER2_BREF_TYPE_DIRENT: 5451 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5452 atomic_set_int(&chain->flags, 5453 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5454 } else { 5455 if (parent->bref.leaf_count) 5456 --parent->bref.leaf_count; 5457 } 5458 default: 5459 break; 5460 } 5461 5462 if (obref) 5463 *obref = *scan; 5464 bzero(scan, sizeof(*scan)); 5465 5466 /* 5467 * We can only optimize parent->core.live_zero for live chains. 5468 */ 5469 if (parent->core.live_zero == i + 1) { 5470 while (--i >= 0 && base[i].type == 0) 5471 ; 5472 parent->core.live_zero = i + 1; 5473 } 5474 5475 /* 5476 * Clear appropriate blockmap flags in chain. 5477 */ 5478 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED | 5479 HAMMER2_CHAIN_BMAPUPD); 5480 } 5481 5482 /* 5483 * Insert the specified element. The block array must not already have the 5484 * element and must have space available for the insertion. 5485 * 5486 * The spin lock on the related chain must be held. 5487 * 5488 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5489 * need to be adjusted when we commit the media change. 5490 */ 5491 void 5492 hammer2_base_insert(hammer2_chain_t *parent, 5493 hammer2_blockref_t *base, int count, 5494 hammer2_chain_t *chain, hammer2_blockref_t *elm) 5495 { 5496 hammer2_key_t key_next; 5497 hammer2_key_t xkey; 5498 int i; 5499 int j; 5500 int k; 5501 int l; 5502 int u = 1; 5503 5504 /* 5505 * Insert new element. Expect the element to not already exist 5506 * unless we are replacing it. 5507 * 5508 * XXX see caller, flush code not yet sophisticated enough to prevent 5509 * re-flushed in some cases. 5510 */ 5511 key_next = 0; /* max range */ 5512 i = hammer2_base_find(parent, base, count, &key_next, 5513 elm->key, elm->key); 5514 5515 /* 5516 * Shortcut fill optimization, typical ordered insertion(s) may not 5517 * require a search. 5518 */ 5519 KKASSERT(i >= 0 && i <= count); 5520 5521 /* 5522 * Set appropriate blockmap flags in chain (if not NULL) 5523 */ 5524 if (chain) 5525 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 5526 5527 /* 5528 * Update stats and zero the entry 5529 */ 5530 if ((int)(elm->data_off & HAMMER2_OFF_MASK_RADIX)) { 5531 parent->bref.embed.stats.data_count += (hammer2_off_t)1 << 5532 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 5533 } 5534 switch(elm->type) { 5535 case HAMMER2_BREF_TYPE_INODE: 5536 ++parent->bref.embed.stats.inode_count; 5537 /* fall through */ 5538 case HAMMER2_BREF_TYPE_DATA: 5539 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5540 ++parent->bref.leaf_count; 5541 /* fall through */ 5542 case HAMMER2_BREF_TYPE_INDIRECT: 5543 if (elm->type != HAMMER2_BREF_TYPE_DATA) { 5544 parent->bref.embed.stats.data_count += 5545 elm->embed.stats.data_count; 5546 parent->bref.embed.stats.inode_count += 5547 elm->embed.stats.inode_count; 5548 } 5549 if (elm->type == HAMMER2_BREF_TYPE_INODE) 5550 break; 5551 if (parent->bref.leaf_count + elm->leaf_count < 5552 HAMMER2_BLOCKREF_LEAF_MAX) { 5553 parent->bref.leaf_count += elm->leaf_count; 5554 } else { 5555 parent->bref.leaf_count = HAMMER2_BLOCKREF_LEAF_MAX; 5556 } 5557 break; 5558 case HAMMER2_BREF_TYPE_DIRENT: 5559 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5560 ++parent->bref.leaf_count; 5561 break; 5562 default: 5563 break; 5564 } 5565 5566 5567 /* 5568 * We can only optimize parent->core.live_zero for live chains. 5569 */ 5570 if (i == count && parent->core.live_zero < count) { 5571 i = parent->core.live_zero++; 5572 base[i] = *elm; 5573 return; 5574 } 5575 5576 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 5577 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 5578 hammer2_spin_unex(&parent->core.spin); 5579 panic("insert base %p overlapping elements at %d elm %p\n", 5580 base, i, elm); 5581 } 5582 5583 /* 5584 * Try to find an empty slot before or after. 5585 */ 5586 j = i; 5587 k = i; 5588 while (j > 0 || k < count) { 5589 --j; 5590 if (j >= 0 && base[j].type == 0) { 5591 if (j == i - 1) { 5592 base[j] = *elm; 5593 } else { 5594 bcopy(&base[j+1], &base[j], 5595 (i - j - 1) * sizeof(*base)); 5596 base[i - 1] = *elm; 5597 } 5598 goto validate; 5599 } 5600 ++k; 5601 if (k < count && base[k].type == 0) { 5602 bcopy(&base[i], &base[i+1], 5603 (k - i) * sizeof(hammer2_blockref_t)); 5604 base[i] = *elm; 5605 5606 /* 5607 * We can only update parent->core.live_zero for live 5608 * chains. 5609 */ 5610 if (parent->core.live_zero <= k) 5611 parent->core.live_zero = k + 1; 5612 u = 2; 5613 goto validate; 5614 } 5615 } 5616 panic("hammer2_base_insert: no room!"); 5617 5618 /* 5619 * Debugging 5620 */ 5621 validate: 5622 key_next = 0; 5623 for (l = 0; l < count; ++l) { 5624 if (base[l].type) { 5625 key_next = base[l].key + 5626 ((hammer2_key_t)1 << base[l].keybits) - 1; 5627 break; 5628 } 5629 } 5630 while (++l < count) { 5631 if (base[l].type) { 5632 if (base[l].key <= key_next) 5633 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 5634 key_next = base[l].key + 5635 ((hammer2_key_t)1 << base[l].keybits) - 1; 5636 5637 } 5638 } 5639 5640 } 5641 5642 #if 0 5643 5644 /* 5645 * Sort the blockref array for the chain. Used by the flush code to 5646 * sort the blockref[] array. 5647 * 5648 * The chain must be exclusively locked AND spin-locked. 5649 */ 5650 typedef hammer2_blockref_t *hammer2_blockref_p; 5651 5652 static 5653 int 5654 hammer2_base_sort_callback(const void *v1, const void *v2) 5655 { 5656 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 5657 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 5658 5659 /* 5660 * Make sure empty elements are placed at the end of the array 5661 */ 5662 if (bref1->type == 0) { 5663 if (bref2->type == 0) 5664 return(0); 5665 return(1); 5666 } else if (bref2->type == 0) { 5667 return(-1); 5668 } 5669 5670 /* 5671 * Sort by key 5672 */ 5673 if (bref1->key < bref2->key) 5674 return(-1); 5675 if (bref1->key > bref2->key) 5676 return(1); 5677 return(0); 5678 } 5679 5680 void 5681 hammer2_base_sort(hammer2_chain_t *chain) 5682 { 5683 hammer2_blockref_t *base; 5684 int count; 5685 5686 switch(chain->bref.type) { 5687 case HAMMER2_BREF_TYPE_INODE: 5688 /* 5689 * Special shortcut for embedded data returns the inode 5690 * itself. Callers must detect this condition and access 5691 * the embedded data (the strategy code does this for us). 5692 * 5693 * This is only applicable to regular files and softlinks. 5694 */ 5695 if (chain->data->ipdata.meta.op_flags & 5696 HAMMER2_OPFLAG_DIRECTDATA) { 5697 return; 5698 } 5699 base = &chain->data->ipdata.u.blockset.blockref[0]; 5700 count = HAMMER2_SET_COUNT; 5701 break; 5702 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 5703 case HAMMER2_BREF_TYPE_INDIRECT: 5704 /* 5705 * Optimize indirect blocks in the INITIAL state to avoid 5706 * I/O. 5707 */ 5708 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 5709 base = &chain->data->npdata[0]; 5710 count = chain->bytes / sizeof(hammer2_blockref_t); 5711 break; 5712 case HAMMER2_BREF_TYPE_VOLUME: 5713 base = &chain->data->voldata.sroot_blockset.blockref[0]; 5714 count = HAMMER2_SET_COUNT; 5715 break; 5716 case HAMMER2_BREF_TYPE_FREEMAP: 5717 base = &chain->data->blkset.blockref[0]; 5718 count = HAMMER2_SET_COUNT; 5719 break; 5720 default: 5721 kprintf("hammer2_chain_lookup: unrecognized " 5722 "blockref(A) type: %d", 5723 chain->bref.type); 5724 while (1) 5725 tsleep(&base, 0, "dead", 0); 5726 panic("hammer2_chain_lookup: unrecognized " 5727 "blockref(A) type: %d", 5728 chain->bref.type); 5729 base = NULL; /* safety */ 5730 count = 0; /* safety */ 5731 } 5732 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 5733 } 5734 5735 #endif 5736 5737 /* 5738 * Chain memory management 5739 */ 5740 void 5741 hammer2_chain_wait(hammer2_chain_t *chain) 5742 { 5743 tsleep(chain, 0, "chnflw", 1); 5744 } 5745 5746 const hammer2_media_data_t * 5747 hammer2_chain_rdata(hammer2_chain_t *chain) 5748 { 5749 KKASSERT(chain->data != NULL); 5750 return (chain->data); 5751 } 5752 5753 hammer2_media_data_t * 5754 hammer2_chain_wdata(hammer2_chain_t *chain) 5755 { 5756 KKASSERT(chain->data != NULL); 5757 return (chain->data); 5758 } 5759 5760 /* 5761 * Set the check data for a chain. This can be a heavy-weight operation 5762 * and typically only runs on-flush. For file data check data is calculated 5763 * when the logical buffers are flushed. 5764 */ 5765 void 5766 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 5767 { 5768 chain->bref.flags &= ~HAMMER2_BREF_FLAG_ZERO; 5769 5770 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5771 case HAMMER2_CHECK_NONE: 5772 break; 5773 case HAMMER2_CHECK_DISABLED: 5774 break; 5775 case HAMMER2_CHECK_ISCSI32: 5776 chain->bref.check.iscsi32.value = 5777 hammer2_icrc32(bdata, chain->bytes); 5778 break; 5779 case HAMMER2_CHECK_XXHASH64: 5780 chain->bref.check.xxhash64.value = 5781 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5782 break; 5783 case HAMMER2_CHECK_SHA192: 5784 { 5785 SHA256_CTX hash_ctx; 5786 union { 5787 uint8_t digest[SHA256_DIGEST_LENGTH]; 5788 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5789 } u; 5790 5791 SHA256_Init(&hash_ctx); 5792 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5793 SHA256_Final(u.digest, &hash_ctx); 5794 u.digest64[2] ^= u.digest64[3]; 5795 bcopy(u.digest, 5796 chain->bref.check.sha192.data, 5797 sizeof(chain->bref.check.sha192.data)); 5798 } 5799 break; 5800 case HAMMER2_CHECK_FREEMAP: 5801 chain->bref.check.freemap.icrc32 = 5802 hammer2_icrc32(bdata, chain->bytes); 5803 break; 5804 default: 5805 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5806 chain->bref.methods); 5807 break; 5808 } 5809 } 5810 5811 /* 5812 * Characterize a failed check code and try to trace back to the inode. 5813 */ 5814 static void 5815 hammer2_characterize_failed_chain(hammer2_chain_t *chain, uint64_t check, 5816 int bits) 5817 { 5818 hammer2_chain_t *lchain; 5819 hammer2_chain_t *ochain; 5820 5821 kprintf("chain %016jx.%02x (%s) meth=%02x CHECK FAIL " 5822 "(flags=%08x, bref/data ", 5823 chain->bref.data_off, 5824 chain->bref.type, 5825 hammer2_bref_type_str(&chain->bref), 5826 chain->bref.methods, 5827 chain->flags); 5828 if (bits == 32) { 5829 kprintf("%08x/%08x)\n", 5830 chain->bref.check.iscsi32.value, 5831 (uint32_t)check); 5832 } else { 5833 kprintf("%016jx/%016jx)\n", 5834 chain->bref.check.xxhash64.value, 5835 check); 5836 } 5837 5838 /* 5839 * Run up the chains to try to find the governing inode so we 5840 * can report it. 5841 * 5842 * XXX This error reporting is not really MPSAFE 5843 */ 5844 ochain = chain; 5845 lchain = chain; 5846 while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE) { 5847 lchain = chain; 5848 chain = chain->parent; 5849 } 5850 5851 if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5852 ((chain->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) == 0 || 5853 (lchain->bref.key & HAMMER2_DIRHASH_VISIBLE))) { 5854 kprintf(" Resides at/in inode %ld\n", 5855 chain->bref.key); 5856 } else if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5857 kprintf(" Resides in inode index - CRITICAL!!!\n"); 5858 } else { 5859 kprintf(" Resides in root index - CRITICAL!!!\n"); 5860 } 5861 if (ochain->hmp) { 5862 const char *pfsname = "UNKNOWN"; 5863 int i; 5864 5865 if (ochain->pmp) { 5866 for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) { 5867 if (ochain->pmp->pfs_hmps[i] == ochain->hmp && 5868 ochain->pmp->pfs_names[i]) { 5869 pfsname = ochain->pmp->pfs_names[i]; 5870 break; 5871 } 5872 } 5873 } 5874 kprintf(" In pfs %s on device %s\n", 5875 pfsname, ochain->hmp->devrepname); 5876 } 5877 } 5878 5879 int 5880 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 5881 { 5882 uint32_t check32; 5883 uint64_t check64; 5884 int r; 5885 5886 if (chain->bref.flags & HAMMER2_BREF_FLAG_ZERO) 5887 return 1; 5888 5889 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5890 case HAMMER2_CHECK_NONE: 5891 r = 1; 5892 break; 5893 case HAMMER2_CHECK_DISABLED: 5894 r = 1; 5895 break; 5896 case HAMMER2_CHECK_ISCSI32: 5897 check32 = hammer2_icrc32(bdata, chain->bytes); 5898 r = (chain->bref.check.iscsi32.value == check32); 5899 if (r == 0) { 5900 hammer2_characterize_failed_chain(chain, check32, 32); 5901 } 5902 hammer2_process_icrc32 += chain->bytes; 5903 break; 5904 case HAMMER2_CHECK_XXHASH64: 5905 check64 = XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5906 r = (chain->bref.check.xxhash64.value == check64); 5907 if (r == 0) { 5908 hammer2_characterize_failed_chain(chain, check64, 64); 5909 } 5910 hammer2_process_xxhash64 += chain->bytes; 5911 break; 5912 case HAMMER2_CHECK_SHA192: 5913 { 5914 SHA256_CTX hash_ctx; 5915 union { 5916 uint8_t digest[SHA256_DIGEST_LENGTH]; 5917 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5918 } u; 5919 5920 SHA256_Init(&hash_ctx); 5921 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5922 SHA256_Final(u.digest, &hash_ctx); 5923 u.digest64[2] ^= u.digest64[3]; 5924 if (bcmp(u.digest, 5925 chain->bref.check.sha192.data, 5926 sizeof(chain->bref.check.sha192.data)) == 0) { 5927 r = 1; 5928 } else { 5929 r = 0; 5930 kprintf("chain %016jx.%02x meth=%02x " 5931 "CHECK FAIL\n", 5932 chain->bref.data_off, 5933 chain->bref.type, 5934 chain->bref.methods); 5935 } 5936 } 5937 break; 5938 case HAMMER2_CHECK_FREEMAP: 5939 r = (chain->bref.check.freemap.icrc32 == 5940 hammer2_icrc32(bdata, chain->bytes)); 5941 if (r == 0) { 5942 kprintf("chain %016jx.%02x meth=%02x " 5943 "CHECK FAIL\n", 5944 chain->bref.data_off, 5945 chain->bref.type, 5946 chain->bref.methods); 5947 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 5948 chain->bref.check.freemap.icrc32, 5949 hammer2_icrc32(bdata, chain->bytes), 5950 chain->bytes); 5951 if (chain->dio) 5952 kprintf("dio %p buf %016jx,%d bdata %p/%p\n", 5953 chain->dio, chain->dio->bp->b_loffset, 5954 chain->dio->bp->b_bufsize, bdata, 5955 chain->dio->bp->b_data); 5956 } 5957 5958 break; 5959 default: 5960 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5961 chain->bref.methods); 5962 r = 1; 5963 break; 5964 } 5965 return r; 5966 } 5967 5968 /* 5969 * Acquire the chain and parent representing the specified inode for the 5970 * device at the specified cluster index. 5971 * 5972 * The flags passed in are LOOKUP flags, not RESOLVE flags. 5973 * 5974 * If we are unable to locate the inode, HAMMER2_ERROR_EIO is returned and 5975 * *chainp will be NULL. *parentp may still be set error or not, or NULL 5976 * if the parent itself could not be resolved. 5977 * 5978 * The caller may pass-in a locked *parentp and/or *chainp, or neither. 5979 * They will be unlocked and released by this function. The *parentp and 5980 * *chainp representing the located inode are returned locked. 5981 */ 5982 int 5983 hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum, 5984 int clindex, int flags, 5985 hammer2_chain_t **parentp, hammer2_chain_t **chainp) 5986 { 5987 hammer2_chain_t *parent; 5988 hammer2_chain_t *rchain; 5989 hammer2_key_t key_dummy; 5990 hammer2_inode_t *ip; 5991 int resolve_flags; 5992 int error; 5993 5994 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 5995 HAMMER2_RESOLVE_SHARED : 0; 5996 5997 /* 5998 * Caller expects us to replace these. 5999 */ 6000 if (*chainp) { 6001 hammer2_chain_unlock(*chainp); 6002 hammer2_chain_drop(*chainp); 6003 *chainp = NULL; 6004 } 6005 if (*parentp) { 6006 hammer2_chain_unlock(*parentp); 6007 hammer2_chain_drop(*parentp); 6008 *parentp = NULL; 6009 } 6010 6011 /* 6012 * Be very careful, this is a backend function and we CANNOT 6013 * lock any frontend inode structure we find. But we have to 6014 * look the inode up this way first in case it exists but is 6015 * detached from the radix tree. 6016 */ 6017 ip = hammer2_inode_lookup(pmp, inum); 6018 if (ip) { 6019 *chainp = hammer2_inode_chain_and_parent(ip, clindex, 6020 parentp, 6021 resolve_flags); 6022 hammer2_inode_drop(ip); 6023 if (*chainp) 6024 return 0; 6025 hammer2_chain_unlock(*chainp); 6026 hammer2_chain_drop(*chainp); 6027 *chainp = NULL; 6028 if (*parentp) { 6029 hammer2_chain_unlock(*parentp); 6030 hammer2_chain_drop(*parentp); 6031 *parentp = NULL; 6032 } 6033 } 6034 6035 /* 6036 * Inodes hang off of the iroot (bit 63 is clear, differentiating 6037 * inodes from root directory entries in the key lookup). 6038 */ 6039 parent = hammer2_inode_chain(pmp->iroot, clindex, resolve_flags); 6040 rchain = NULL; 6041 if (parent) { 6042 rchain = hammer2_chain_lookup(&parent, &key_dummy, 6043 inum, inum, 6044 &error, flags); 6045 } else { 6046 error = HAMMER2_ERROR_EIO; 6047 } 6048 *parentp = parent; 6049 *chainp = rchain; 6050 6051 return error; 6052 } 6053 6054 /* 6055 * Used by the bulkscan code to snapshot the synchronized storage for 6056 * a volume, allowing it to be scanned concurrently against normal 6057 * operation. 6058 */ 6059 hammer2_chain_t * 6060 hammer2_chain_bulksnap(hammer2_dev_t *hmp) 6061 { 6062 hammer2_chain_t *copy; 6063 6064 copy = hammer2_chain_alloc(hmp, hmp->spmp, &hmp->vchain.bref); 6065 copy->data = kmalloc(sizeof(copy->data->voldata), 6066 hmp->mchain, 6067 M_WAITOK | M_ZERO); 6068 hammer2_voldata_lock(hmp); 6069 copy->data->voldata = hmp->volsync; 6070 hammer2_voldata_unlock(hmp); 6071 6072 return copy; 6073 } 6074 6075 void 6076 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 6077 { 6078 KKASSERT(copy->bref.type == HAMMER2_BREF_TYPE_VOLUME); 6079 KKASSERT(copy->data); 6080 kfree(copy->data, copy->hmp->mchain); 6081 copy->data = NULL; 6082 atomic_add_long(&hammer2_chain_allocs, -1); 6083 hammer2_chain_drop(copy); 6084 } 6085 6086 /* 6087 * Returns non-zero if the chain (INODE or DIRENT) matches the 6088 * filename. 6089 */ 6090 int 6091 hammer2_chain_dirent_test(hammer2_chain_t *chain, const char *name, 6092 size_t name_len) 6093 { 6094 const hammer2_inode_data_t *ripdata; 6095 const hammer2_dirent_head_t *den; 6096 6097 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 6098 ripdata = &chain->data->ipdata; 6099 if (ripdata->meta.name_len == name_len && 6100 bcmp(ripdata->filename, name, name_len) == 0) { 6101 return 1; 6102 } 6103 } 6104 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT && 6105 chain->bref.embed.dirent.namlen == name_len) { 6106 den = &chain->bref.embed.dirent; 6107 if (name_len > sizeof(chain->bref.check.buf) && 6108 bcmp(chain->data->buf, name, name_len) == 0) { 6109 return 1; 6110 } 6111 if (name_len <= sizeof(chain->bref.check.buf) && 6112 bcmp(chain->bref.check.buf, name, name_len) == 0) { 6113 return 1; 6114 } 6115 } 6116 return 0; 6117 } 6118