1 /* 2 * Copyright (c) 2011-2018 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/kern_syscall.h> 61 #include <sys/uuid.h> 62 63 #include <crypto/sha2/sha2.h> 64 65 #include "hammer2.h" 66 67 static hammer2_chain_t *hammer2_chain_create_indirect( 68 hammer2_chain_t *parent, 69 hammer2_key_t key, int keybits, 70 hammer2_tid_t mtid, int for_type, int *errorp); 71 static void hammer2_chain_rename_obref(hammer2_chain_t **parentp, 72 hammer2_chain_t *chain, hammer2_tid_t mtid, 73 int flags, hammer2_blockref_t *obref); 74 static int hammer2_chain_delete_obref(hammer2_chain_t *parent, 75 hammer2_chain_t *chain, 76 hammer2_tid_t mtid, int flags, 77 hammer2_blockref_t *obref); 78 static hammer2_io_t *hammer2_chain_drop_data(hammer2_chain_t *chain); 79 static hammer2_chain_t *hammer2_combined_find( 80 hammer2_chain_t *parent, 81 hammer2_blockref_t *base, int count, 82 hammer2_key_t *key_nextp, 83 hammer2_key_t key_beg, hammer2_key_t key_end, 84 hammer2_blockref_t **bresp); 85 86 static struct krate krate_h2me = { .freq = 1 }; 87 88 /* 89 * Basic RBTree for chains (core.rbtree). 90 */ 91 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 92 93 int 94 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 95 { 96 hammer2_key_t c1_beg; 97 hammer2_key_t c1_end; 98 hammer2_key_t c2_beg; 99 hammer2_key_t c2_end; 100 101 /* 102 * Compare chains. Overlaps are not supposed to happen and catch 103 * any software issues early we count overlaps as a match. 104 */ 105 c1_beg = chain1->bref.key; 106 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 107 c2_beg = chain2->bref.key; 108 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 109 110 if (c1_end < c2_beg) /* fully to the left */ 111 return(-1); 112 if (c1_beg > c2_end) /* fully to the right */ 113 return(1); 114 return(0); /* overlap (must not cross edge boundary) */ 115 } 116 117 /* 118 * Assert that a chain has no media data associated with it. 119 */ 120 static __inline void 121 hammer2_chain_assert_no_data(hammer2_chain_t *chain) 122 { 123 KKASSERT(chain->dio == NULL); 124 if (chain->bref.type != HAMMER2_BREF_TYPE_VOLUME && 125 chain->bref.type != HAMMER2_BREF_TYPE_FREEMAP && 126 chain->data) { 127 panic("hammer2_chain_assert_no_data: chain %p still has data", 128 chain); 129 } 130 } 131 132 /* 133 * Make a chain visible to the flusher. The flusher operates using a top-down 134 * recursion based on the ONFLUSH flag. It locates MODIFIED and UPDATE chains, 135 * flushes them, and updates blocks back to the volume root. 136 * 137 * This routine sets the ONFLUSH flag upward from the triggering chain until 138 * it hits an inode root or the volume root. Inode chains serve as inflection 139 * points, requiring the flusher to bridge across trees. Inodes include 140 * regular inodes, PFS roots (pmp->iroot), and the media super root 141 * (spmp->iroot). 142 */ 143 void 144 hammer2_chain_setflush(hammer2_chain_t *chain) 145 { 146 hammer2_chain_t *parent; 147 148 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 149 hammer2_spin_sh(&chain->core.spin); 150 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 151 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 152 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 153 break; 154 if ((parent = chain->parent) == NULL) 155 break; 156 hammer2_spin_sh(&parent->core.spin); 157 hammer2_spin_unsh(&chain->core.spin); 158 chain = parent; 159 } 160 hammer2_spin_unsh(&chain->core.spin); 161 } 162 } 163 164 /* 165 * Allocate a new disconnected chain element representing the specified 166 * bref. chain->refs is set to 1 and the passed bref is copied to 167 * chain->bref. chain->bytes is derived from the bref. 168 * 169 * chain->pmp inherits pmp unless the chain is an inode (other than the 170 * super-root inode). 171 * 172 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 173 */ 174 hammer2_chain_t * 175 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 176 hammer2_blockref_t *bref) 177 { 178 hammer2_chain_t *chain; 179 u_int bytes; 180 181 /* 182 * Special case - radix of 0 indicates a chain that does not 183 * need a data reference (context is completely embedded in the 184 * bref). 185 */ 186 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 187 bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 188 else 189 bytes = 0; 190 191 atomic_add_long(&hammer2_chain_allocs, 1); 192 193 /* 194 * Construct the appropriate system structure. 195 */ 196 switch(bref->type) { 197 case HAMMER2_BREF_TYPE_DIRENT: 198 case HAMMER2_BREF_TYPE_INODE: 199 case HAMMER2_BREF_TYPE_INDIRECT: 200 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 201 case HAMMER2_BREF_TYPE_DATA: 202 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 203 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 204 break; 205 case HAMMER2_BREF_TYPE_VOLUME: 206 case HAMMER2_BREF_TYPE_FREEMAP: 207 /* 208 * Only hammer2_chain_bulksnap() calls this function with these 209 * types. 210 */ 211 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 212 break; 213 default: 214 chain = NULL; 215 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 216 bref->type); 217 } 218 219 /* 220 * Initialize the new chain structure. pmp must be set to NULL for 221 * chains belonging to the super-root topology of a device mount. 222 */ 223 if (pmp == hmp->spmp) 224 chain->pmp = NULL; 225 else 226 chain->pmp = pmp; 227 228 chain->hmp = hmp; 229 chain->bref = *bref; 230 chain->bytes = bytes; 231 chain->refs = 1; 232 chain->flags = HAMMER2_CHAIN_ALLOCATED; 233 lockinit(&chain->diolk, "chdio", 0, 0); 234 235 /* 236 * Set the PFS boundary flag if this chain represents a PFS root. 237 */ 238 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 239 atomic_set_int(&chain->flags, HAMMER2_CHAIN_PFSBOUNDARY); 240 hammer2_chain_core_init(chain); 241 242 return (chain); 243 } 244 245 /* 246 * Initialize a chain's core structure. This structure used to be allocated 247 * but is now embedded. 248 * 249 * The core is not locked. No additional refs on the chain are made. 250 * (trans) must not be NULL if (core) is not NULL. 251 */ 252 void 253 hammer2_chain_core_init(hammer2_chain_t *chain) 254 { 255 /* 256 * Fresh core under nchain (no multi-homing of ochain's 257 * sub-tree). 258 */ 259 RB_INIT(&chain->core.rbtree); /* live chains */ 260 hammer2_mtx_init(&chain->lock, "h2chain"); 261 } 262 263 /* 264 * Add a reference to a chain element, preventing its destruction. 265 * 266 * (can be called with spinlock held) 267 */ 268 void 269 hammer2_chain_ref(hammer2_chain_t *chain) 270 { 271 if (atomic_fetchadd_int(&chain->refs, 1) == 0) { 272 /* 273 * Just flag that the chain was used and should be recycled 274 * on the LRU if it encounters it later. 275 */ 276 if (chain->flags & HAMMER2_CHAIN_ONLRU) 277 atomic_set_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 278 279 #if 0 280 /* 281 * REMOVED - reduces contention, lru_list is more heuristical 282 * now. 283 * 284 * 0->non-zero transition must ensure that chain is removed 285 * from the LRU list. 286 * 287 * NOTE: Already holding lru_spin here so we cannot call 288 * hammer2_chain_ref() to get it off lru_list, do 289 * it manually. 290 */ 291 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 292 hammer2_pfs_t *pmp = chain->pmp; 293 hammer2_spin_ex(&pmp->lru_spin); 294 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 295 atomic_add_int(&pmp->lru_count, -1); 296 atomic_clear_int(&chain->flags, 297 HAMMER2_CHAIN_ONLRU); 298 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 299 } 300 hammer2_spin_unex(&pmp->lru_spin); 301 } 302 #endif 303 } 304 } 305 306 /* 307 * Ref a locked chain and force the data to be held across an unlock. 308 * Chain must be currently locked. The user of the chain who desires 309 * to release the hold must call hammer2_chain_lock_unhold() to relock 310 * and unhold the chain, then unlock normally, or may simply call 311 * hammer2_chain_drop_unhold() (which is safer against deadlocks). 312 */ 313 void 314 hammer2_chain_ref_hold(hammer2_chain_t *chain) 315 { 316 atomic_add_int(&chain->lockcnt, 1); 317 hammer2_chain_ref(chain); 318 } 319 320 /* 321 * Insert the chain in the core rbtree. 322 * 323 * Normal insertions are placed in the live rbtree. Insertion of a deleted 324 * chain is a special case used by the flush code that is placed on the 325 * unstaged deleted list to avoid confusing the live view. 326 */ 327 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 328 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 329 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 330 331 static 332 int 333 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 334 int flags, int generation) 335 { 336 hammer2_chain_t *xchain; 337 int error = 0; 338 339 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 340 hammer2_spin_ex(&parent->core.spin); 341 342 /* 343 * Interlocked by spinlock, check for race 344 */ 345 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 346 parent->core.generation != generation) { 347 error = HAMMER2_ERROR_EAGAIN; 348 goto failed; 349 } 350 351 /* 352 * Insert chain 353 */ 354 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 355 KASSERT(xchain == NULL, 356 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 357 chain, xchain, chain->bref.key)); 358 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 359 chain->parent = parent; 360 ++parent->core.chain_count; 361 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 362 363 /* 364 * We have to keep track of the effective live-view blockref count 365 * so the create code knows when to push an indirect block. 366 */ 367 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 368 atomic_add_int(&parent->core.live_count, 1); 369 failed: 370 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 371 hammer2_spin_unex(&parent->core.spin); 372 return error; 373 } 374 375 /* 376 * Drop the caller's reference to the chain. When the ref count drops to 377 * zero this function will try to disassociate the chain from its parent and 378 * deallocate it, then recursely drop the parent using the implied ref 379 * from the chain's chain->parent. 380 * 381 * Nobody should own chain's mutex on the 1->0 transition, unless this drop 382 * races an acquisition by another cpu. Therefore we can loop if we are 383 * unable to acquire the mutex, and refs is unlikely to be 1 unless we again 384 * race against another drop. 385 */ 386 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 387 int depth); 388 static void hammer2_chain_lru_flush(hammer2_pfs_t *pmp); 389 390 void 391 hammer2_chain_drop(hammer2_chain_t *chain) 392 { 393 u_int refs; 394 395 if (hammer2_debug & 0x200000) 396 Debugger("drop"); 397 398 KKASSERT(chain->refs > 0); 399 400 while (chain) { 401 refs = chain->refs; 402 cpu_ccfence(); 403 KKASSERT(refs > 0); 404 405 if (refs == 1) { 406 if (hammer2_mtx_ex_try(&chain->lock) == 0) 407 chain = hammer2_chain_lastdrop(chain, 0); 408 /* retry the same chain, or chain from lastdrop */ 409 } else { 410 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 411 break; 412 /* retry the same chain */ 413 } 414 cpu_pause(); 415 } 416 } 417 418 /* 419 * Unhold a held and probably not-locked chain, ensure that the data is 420 * dropped on the 1->0 transition of lockcnt by obtaining an exclusive 421 * lock and then simply unlocking the chain. 422 */ 423 void 424 hammer2_chain_unhold(hammer2_chain_t *chain) 425 { 426 u_int lockcnt; 427 int iter = 0; 428 429 for (;;) { 430 lockcnt = chain->lockcnt; 431 cpu_ccfence(); 432 if (lockcnt > 1) { 433 if (atomic_cmpset_int(&chain->lockcnt, 434 lockcnt, lockcnt - 1)) { 435 break; 436 } 437 } else if (hammer2_mtx_ex_try(&chain->lock) == 0) { 438 hammer2_chain_unlock(chain); 439 break; 440 } else { 441 /* 442 * This situation can easily occur on SMP due to 443 * the gap inbetween the 1->0 transition and the 444 * final unlock. We cannot safely block on the 445 * mutex because lockcnt might go above 1. 446 * 447 * XXX Sleep for one tick if it takes too long. 448 */ 449 if (++iter > 1000) { 450 if (iter > 1000 + hz) { 451 kprintf("hammer2: h2race1 %p\n", chain); 452 iter = 1000; 453 } 454 tsleep(&iter, 0, "h2race1", 1); 455 } 456 cpu_pause(); 457 } 458 } 459 } 460 461 void 462 hammer2_chain_drop_unhold(hammer2_chain_t *chain) 463 { 464 hammer2_chain_unhold(chain); 465 hammer2_chain_drop(chain); 466 } 467 468 void 469 hammer2_chain_rehold(hammer2_chain_t *chain) 470 { 471 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED); 472 atomic_add_int(&chain->lockcnt, 1); 473 hammer2_chain_unlock(chain); 474 } 475 476 /* 477 * Handles the (potential) last drop of chain->refs from 1->0. Called with 478 * the mutex exclusively locked, refs == 1, and lockcnt 0. SMP races are 479 * possible against refs and lockcnt. We must dispose of the mutex on chain. 480 * 481 * This function returns an unlocked chain for recursive drop or NULL. It 482 * can return the same chain if it determines it has raced another ref. 483 * 484 * -- 485 * 486 * When two chains need to be recursively dropped we use the chain we 487 * would otherwise free to placehold the additional chain. It's a bit 488 * convoluted but we can't just recurse without potentially blowing out 489 * the kernel stack. 490 * 491 * The chain cannot be freed if it has any children. 492 * The chain cannot be freed if flagged MODIFIED unless we can dispose of it. 493 * The chain cannot be freed if flagged UPDATE unless we can dispose of it. 494 * Any dedup registration can remain intact. 495 * 496 * The core spinlock is allowed to nest child-to-parent (not parent-to-child). 497 */ 498 static 499 hammer2_chain_t * 500 hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth) 501 { 502 hammer2_pfs_t *pmp; 503 hammer2_dev_t *hmp; 504 hammer2_chain_t *parent; 505 hammer2_chain_t *rdrop; 506 507 /* 508 * We need chain's spinlock to interlock the sub-tree test. 509 * We already have chain's mutex, protecting chain->parent. 510 * 511 * Remember that chain->refs can be in flux. 512 */ 513 hammer2_spin_ex(&chain->core.spin); 514 515 if (chain->parent != NULL) { 516 /* 517 * If the chain has a parent the UPDATE bit prevents scrapping 518 * as the chain is needed to properly flush the parent. Try 519 * to complete the 1->0 transition and return NULL. Retry 520 * (return chain) if we are unable to complete the 1->0 521 * transition, else return NULL (nothing more to do). 522 * 523 * If the chain has a parent the MODIFIED bit prevents 524 * scrapping. 525 * 526 * Chains with UPDATE/MODIFIED are *not* put on the LRU list! 527 */ 528 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 529 HAMMER2_CHAIN_MODIFIED)) { 530 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 531 hammer2_spin_unex(&chain->core.spin); 532 hammer2_chain_assert_no_data(chain); 533 hammer2_mtx_unlock(&chain->lock); 534 chain = NULL; 535 } else { 536 hammer2_spin_unex(&chain->core.spin); 537 hammer2_mtx_unlock(&chain->lock); 538 } 539 return (chain); 540 } 541 /* spinlock still held */ 542 } else if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME || 543 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP) { 544 /* 545 * Retain the static vchain and fchain. Clear bits that 546 * are not relevant. Do not clear the MODIFIED bit, 547 * and certainly do not put it on the delayed-flush queue. 548 */ 549 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 550 } else { 551 /* 552 * The chain has no parent and can be flagged for destruction. 553 * Since it has no parent, UPDATE can also be cleared. 554 */ 555 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 556 if (chain->flags & HAMMER2_CHAIN_UPDATE) 557 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 558 559 /* 560 * If the chain has children we must propagate the DESTROY 561 * flag downward and rip the disconnected topology apart. 562 * This is accomplished by calling hammer2_flush() on the 563 * chain. 564 * 565 * Any dedup is already handled by the underlying DIO, so 566 * we do not have to specifically flush it here. 567 */ 568 if (chain->core.chain_count) { 569 hammer2_spin_unex(&chain->core.spin); 570 hammer2_flush(chain, HAMMER2_FLUSH_TOP | 571 HAMMER2_FLUSH_ALL); 572 hammer2_mtx_unlock(&chain->lock); 573 574 return(chain); /* retry drop */ 575 } 576 577 /* 578 * Otherwise we can scrap the MODIFIED bit if it is set, 579 * and continue along the freeing path. 580 * 581 * Be sure to clean-out any dedup bits. Without a parent 582 * this chain will no longer be visible to the flush code. 583 * Easy check data_off to avoid the volume root. 584 */ 585 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 586 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 587 atomic_add_long(&hammer2_count_modified_chains, -1); 588 if (chain->pmp) 589 hammer2_pfs_memory_wakeup(chain->pmp); 590 } 591 /* spinlock still held */ 592 } 593 594 /* spinlock still held */ 595 596 /* 597 * If any children exist we must leave the chain intact with refs == 0. 598 * They exist because chains are retained below us which have refs or 599 * may require flushing. 600 * 601 * Retry (return chain) if we fail to transition the refs to 0, else 602 * return NULL indication nothing more to do. 603 * 604 * Chains with children are NOT put on the LRU list. 605 */ 606 if (chain->core.chain_count) { 607 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 608 hammer2_spin_unex(&chain->core.spin); 609 hammer2_chain_assert_no_data(chain); 610 hammer2_mtx_unlock(&chain->lock); 611 chain = NULL; 612 } else { 613 hammer2_spin_unex(&chain->core.spin); 614 hammer2_mtx_unlock(&chain->lock); 615 } 616 return (chain); 617 } 618 /* spinlock still held */ 619 /* no chains left under us */ 620 621 /* 622 * chain->core has no children left so no accessors can get to our 623 * chain from there. Now we have to lock the parent core to interlock 624 * remaining possible accessors that might bump chain's refs before 625 * we can safely drop chain's refs with intent to free the chain. 626 */ 627 hmp = chain->hmp; 628 pmp = chain->pmp; /* can be NULL */ 629 rdrop = NULL; 630 631 parent = chain->parent; 632 633 /* 634 * WARNING! chain's spin lock is still held here, and other spinlocks 635 * will be acquired and released in the code below. We 636 * cannot be making fancy procedure calls! 637 */ 638 639 /* 640 * We can cache the chain if it is associated with a pmp 641 * and not flagged as being destroyed or requesting a full 642 * release. In this situation the chain is not removed 643 * from its parent, i.e. it can still be looked up. 644 * 645 * We intentionally do not cache DATA chains because these 646 * were likely used to load data into the logical buffer cache 647 * and will not be accessed again for some time. 648 */ 649 if ((chain->flags & 650 (HAMMER2_CHAIN_DESTROY | HAMMER2_CHAIN_RELEASE)) == 0 && 651 chain->pmp && 652 chain->bref.type != HAMMER2_BREF_TYPE_DATA) { 653 if (parent) 654 hammer2_spin_ex(&parent->core.spin); 655 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 656 /* 657 * 1->0 transition failed, retry. Do not drop 658 * the chain's data yet! 659 */ 660 if (parent) 661 hammer2_spin_unex(&parent->core.spin); 662 hammer2_spin_unex(&chain->core.spin); 663 hammer2_mtx_unlock(&chain->lock); 664 665 return(chain); 666 } 667 668 /* 669 * Success 670 */ 671 hammer2_chain_assert_no_data(chain); 672 673 /* 674 * Make sure we are on the LRU list, clean up excessive 675 * LRU entries. We can only really drop one but there might 676 * be other entries that we can remove from the lru_list 677 * without dropping. 678 * 679 * NOTE: HAMMER2_CHAIN_ONLRU may only be safely set when 680 * chain->core.spin AND pmp->lru_spin are held, but 681 * can be safely cleared only holding pmp->lru_spin. 682 */ 683 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 684 hammer2_spin_ex(&pmp->lru_spin); 685 if ((chain->flags & HAMMER2_CHAIN_ONLRU) == 0) { 686 atomic_set_int(&chain->flags, 687 HAMMER2_CHAIN_ONLRU); 688 TAILQ_INSERT_TAIL(&pmp->lru_list, 689 chain, lru_node); 690 atomic_add_int(&pmp->lru_count, 1); 691 } 692 if (pmp->lru_count < HAMMER2_LRU_LIMIT) 693 depth = 1; /* disable lru_list flush */ 694 hammer2_spin_unex(&pmp->lru_spin); 695 } else { 696 /* disable lru flush */ 697 depth = 1; 698 } 699 700 if (parent) { 701 hammer2_spin_unex(&parent->core.spin); 702 parent = NULL; /* safety */ 703 } 704 hammer2_spin_unex(&chain->core.spin); 705 hammer2_mtx_unlock(&chain->lock); 706 707 /* 708 * lru_list hysteresis (see above for depth overrides). 709 * Note that depth also prevents excessive lastdrop recursion. 710 */ 711 if (depth == 0) 712 hammer2_chain_lru_flush(pmp); 713 714 return NULL; 715 /* NOT REACHED */ 716 } 717 718 /* 719 * Make sure we are not on the LRU list. 720 */ 721 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 722 hammer2_spin_ex(&pmp->lru_spin); 723 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 724 atomic_add_int(&pmp->lru_count, -1); 725 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 726 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 727 } 728 hammer2_spin_unex(&pmp->lru_spin); 729 } 730 731 /* 732 * Spinlock the parent and try to drop the last ref on chain. 733 * On success determine if we should dispose of the chain 734 * (remove the chain from its parent, etc). 735 * 736 * (normal core locks are top-down recursive but we define 737 * core spinlocks as bottom-up recursive, so this is safe). 738 */ 739 if (parent) { 740 hammer2_spin_ex(&parent->core.spin); 741 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 742 /* 743 * 1->0 transition failed, retry. 744 */ 745 hammer2_spin_unex(&parent->core.spin); 746 hammer2_spin_unex(&chain->core.spin); 747 hammer2_mtx_unlock(&chain->lock); 748 749 return(chain); 750 } 751 752 /* 753 * 1->0 transition successful, parent spin held to prevent 754 * new lookups, chain spinlock held to protect parent field. 755 * Remove chain from the parent. 756 * 757 * If the chain is being removed from the parent's btree but 758 * is not bmapped, we have to adjust live_count downward. If 759 * it is bmapped then the blockref is retained in the parent 760 * as is its associated live_count. This case can occur when 761 * a chain added to the topology is unable to flush and is 762 * then later deleted. 763 */ 764 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 765 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) && 766 (chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) { 767 atomic_add_int(&parent->core.live_count, -1); 768 } 769 RB_REMOVE(hammer2_chain_tree, 770 &parent->core.rbtree, chain); 771 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 772 --parent->core.chain_count; 773 chain->parent = NULL; 774 } 775 776 /* 777 * If our chain was the last chain in the parent's core the 778 * core is now empty and its parent might have to be 779 * re-dropped if it has 0 refs. 780 */ 781 if (parent->core.chain_count == 0) { 782 rdrop = parent; 783 atomic_add_int(&rdrop->refs, 1); 784 /* 785 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 786 rdrop = NULL; 787 */ 788 } 789 hammer2_spin_unex(&parent->core.spin); 790 parent = NULL; /* safety */ 791 /* FALL THROUGH */ 792 } else { 793 /* 794 * No-parent case. 795 */ 796 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 797 /* 798 * 1->0 transition failed, retry. 799 */ 800 hammer2_spin_unex(&parent->core.spin); 801 hammer2_spin_unex(&chain->core.spin); 802 hammer2_mtx_unlock(&chain->lock); 803 804 return(chain); 805 } 806 } 807 808 /* 809 * Successful 1->0 transition, no parent, no children... no way for 810 * anyone to ref this chain any more. We can clean-up and free it. 811 * 812 * We still have the core spinlock, and core's chain_count is 0. 813 * Any parent spinlock is gone. 814 */ 815 hammer2_spin_unex(&chain->core.spin); 816 hammer2_chain_assert_no_data(chain); 817 hammer2_mtx_unlock(&chain->lock); 818 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 819 chain->core.chain_count == 0); 820 821 /* 822 * All locks are gone, no pointers remain to the chain, finish 823 * freeing it. 824 */ 825 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 826 HAMMER2_CHAIN_MODIFIED)) == 0); 827 828 /* 829 * Once chain resources are gone we can use the now dead chain 830 * structure to placehold what might otherwise require a recursive 831 * drop, because we have potentially two things to drop and can only 832 * return one directly. 833 */ 834 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 835 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED); 836 chain->hmp = NULL; 837 kfree(chain, hmp->mchain); 838 } 839 840 /* 841 * Possible chaining loop when parent re-drop needed. 842 */ 843 return(rdrop); 844 } 845 846 /* 847 * Heuristical flush of the LRU, try to reduce the number of entries 848 * on the LRU to (HAMMER2_LRU_LIMIT * 2 / 3). This procedure is called 849 * only when lru_count exceeds HAMMER2_LRU_LIMIT. 850 */ 851 static 852 void 853 hammer2_chain_lru_flush(hammer2_pfs_t *pmp) 854 { 855 hammer2_chain_t *chain; 856 857 again: 858 chain = NULL; 859 hammer2_spin_ex(&pmp->lru_spin); 860 while (pmp->lru_count > HAMMER2_LRU_LIMIT * 2 / 3) { 861 /* 862 * Pick a chain off the lru_list, just recycle it quickly 863 * if LRUHINT is set (the chain was ref'd but left on 864 * the lru_list, so cycle to the end). 865 */ 866 chain = TAILQ_FIRST(&pmp->lru_list); 867 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 868 869 if (chain->flags & HAMMER2_CHAIN_LRUHINT) { 870 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_LRUHINT); 871 TAILQ_INSERT_TAIL(&pmp->lru_list, chain, lru_node); 872 chain = NULL; 873 continue; 874 } 875 876 /* 877 * Ok, we are off the LRU. We must adjust refs before we 878 * can safely clear the ONLRU flag. 879 */ 880 atomic_add_int(&pmp->lru_count, -1); 881 if (atomic_cmpset_int(&chain->refs, 0, 1)) { 882 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 883 atomic_set_int(&chain->flags, HAMMER2_CHAIN_RELEASE); 884 break; 885 } 886 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 887 chain = NULL; 888 } 889 hammer2_spin_unex(&pmp->lru_spin); 890 if (chain == NULL) 891 return; 892 893 /* 894 * If we picked a chain off the lru list we may be able to lastdrop 895 * it. Use a depth of 1 to prevent excessive lastdrop recursion. 896 */ 897 while (chain) { 898 u_int refs; 899 900 refs = chain->refs; 901 cpu_ccfence(); 902 KKASSERT(refs > 0); 903 904 if (refs == 1) { 905 if (hammer2_mtx_ex_try(&chain->lock) == 0) 906 chain = hammer2_chain_lastdrop(chain, 1); 907 /* retry the same chain, or chain from lastdrop */ 908 } else { 909 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 910 break; 911 /* retry the same chain */ 912 } 913 cpu_pause(); 914 } 915 goto again; 916 } 917 918 /* 919 * On last lock release. 920 */ 921 static hammer2_io_t * 922 hammer2_chain_drop_data(hammer2_chain_t *chain) 923 { 924 hammer2_io_t *dio; 925 926 if ((dio = chain->dio) != NULL) { 927 chain->dio = NULL; 928 chain->data = NULL; 929 } else { 930 switch(chain->bref.type) { 931 case HAMMER2_BREF_TYPE_VOLUME: 932 case HAMMER2_BREF_TYPE_FREEMAP: 933 break; 934 default: 935 if (chain->data != NULL) { 936 hammer2_spin_unex(&chain->core.spin); 937 panic("chain data not null: " 938 "chain %p bref %016jx.%02x " 939 "refs %d parent %p dio %p data %p", 940 chain, chain->bref.data_off, 941 chain->bref.type, chain->refs, 942 chain->parent, 943 chain->dio, chain->data); 944 } 945 KKASSERT(chain->data == NULL); 946 break; 947 } 948 } 949 return dio; 950 } 951 952 /* 953 * Lock a referenced chain element, acquiring its data with I/O if necessary, 954 * and specify how you would like the data to be resolved. 955 * 956 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 957 * 958 * The lock is allowed to recurse, multiple locking ops will aggregate 959 * the requested resolve types. Once data is assigned it will not be 960 * removed until the last unlock. 961 * 962 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 963 * (typically used to avoid device/logical buffer 964 * aliasing for data) 965 * 966 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 967 * the INITIAL-create state (indirect blocks only). 968 * 969 * Do not resolve data elements for DATA chains. 970 * (typically used to avoid device/logical buffer 971 * aliasing for data) 972 * 973 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 974 * 975 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 976 * it will be locked exclusive. 977 * 978 * HAMMER2_RESOLVE_NONBLOCK- (flag) The chain is locked non-blocking. If 979 * the lock fails, EAGAIN is returned. 980 * 981 * NOTE: Embedded elements (volume header, inodes) are always resolved 982 * regardless. 983 * 984 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 985 * element will instantiate and zero its buffer, and flush it on 986 * release. 987 * 988 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 989 * so as not to instantiate a device buffer, which could alias against 990 * a logical file buffer. However, if ALWAYS is specified the 991 * device buffer will be instantiated anyway. 992 * 993 * NOTE: The return value is always 0 unless NONBLOCK is specified, in which 994 * case it can be either 0 or EAGAIN. 995 * 996 * WARNING! This function blocks on I/O if data needs to be fetched. This 997 * blocking can run concurrent with other compatible lock holders 998 * who do not need data returning. The lock is not upgraded to 999 * exclusive during a data fetch, a separate bit is used to 1000 * interlock I/O. However, an exclusive lock holder can still count 1001 * on being interlocked against an I/O fetch managed by a shared 1002 * lock holder. 1003 */ 1004 int 1005 hammer2_chain_lock(hammer2_chain_t *chain, int how) 1006 { 1007 KKASSERT(chain->refs > 0); 1008 1009 if (how & HAMMER2_RESOLVE_NONBLOCK) { 1010 /* 1011 * We still have to bump lockcnt before acquiring the lock, 1012 * even for non-blocking operation, because the unlock code 1013 * live-loops on lockcnt == 1 when dropping the last lock. 1014 * 1015 * If the non-blocking operation fails we have to use an 1016 * unhold sequence to undo the mess. 1017 * 1018 * NOTE: LOCKAGAIN must always succeed without blocking, 1019 * even if NONBLOCK is specified. 1020 */ 1021 atomic_add_int(&chain->lockcnt, 1); 1022 if (how & HAMMER2_RESOLVE_SHARED) { 1023 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1024 hammer2_mtx_sh_again(&chain->lock); 1025 } else { 1026 if (hammer2_mtx_sh_try(&chain->lock) != 0) { 1027 hammer2_chain_unhold(chain); 1028 return EAGAIN; 1029 } 1030 } 1031 } else { 1032 if (hammer2_mtx_ex_try(&chain->lock) != 0) { 1033 hammer2_chain_unhold(chain); 1034 return EAGAIN; 1035 } 1036 } 1037 ++curthread->td_tracker; 1038 } else { 1039 /* 1040 * Get the appropriate lock. If LOCKAGAIN is flagged with 1041 * SHARED the caller expects a shared lock to already be 1042 * present and we are giving it another ref. This case must 1043 * importantly not block if there is a pending exclusive lock 1044 * request. 1045 */ 1046 atomic_add_int(&chain->lockcnt, 1); 1047 if (how & HAMMER2_RESOLVE_SHARED) { 1048 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 1049 hammer2_mtx_sh_again(&chain->lock); 1050 } else { 1051 hammer2_mtx_sh(&chain->lock); 1052 } 1053 } else { 1054 hammer2_mtx_ex(&chain->lock); 1055 } 1056 ++curthread->td_tracker; 1057 } 1058 1059 /* 1060 * If we already have a valid data pointer make sure the data is 1061 * synchronized to the current cpu, and then no further action is 1062 * necessary. 1063 */ 1064 if (chain->data) { 1065 if (chain->dio) 1066 hammer2_io_bkvasync(chain->dio); 1067 return 0; 1068 } 1069 1070 /* 1071 * Do we have to resolve the data? This is generally only 1072 * applicable to HAMMER2_BREF_TYPE_DATA which is special-cased. 1073 * Other BREF types expects the data to be there. 1074 */ 1075 switch(how & HAMMER2_RESOLVE_MASK) { 1076 case HAMMER2_RESOLVE_NEVER: 1077 return 0; 1078 case HAMMER2_RESOLVE_MAYBE: 1079 if (chain->flags & HAMMER2_CHAIN_INITIAL) 1080 return 0; 1081 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 1082 return 0; 1083 #if 0 1084 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 1085 return 0; 1086 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 1087 return 0; 1088 #endif 1089 /* fall through */ 1090 case HAMMER2_RESOLVE_ALWAYS: 1091 default: 1092 break; 1093 } 1094 1095 /* 1096 * Caller requires data 1097 */ 1098 hammer2_chain_load_data(chain); 1099 1100 return 0; 1101 } 1102 1103 /* 1104 * Lock the chain, retain the hold, and drop the data persistence count. 1105 * The data should remain valid because we never transitioned lockcnt 1106 * through 0. 1107 */ 1108 void 1109 hammer2_chain_lock_unhold(hammer2_chain_t *chain, int how) 1110 { 1111 hammer2_chain_lock(chain, how); 1112 atomic_add_int(&chain->lockcnt, -1); 1113 } 1114 1115 #if 0 1116 /* 1117 * Downgrade an exclusive chain lock to a shared chain lock. 1118 * 1119 * NOTE: There is no upgrade equivalent due to the ease of 1120 * deadlocks in that direction. 1121 */ 1122 void 1123 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 1124 { 1125 hammer2_mtx_downgrade(&chain->lock); 1126 } 1127 #endif 1128 1129 /* 1130 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 1131 * may be of any type. 1132 * 1133 * Once chain->data is set it cannot be disposed of until all locks are 1134 * released. 1135 * 1136 * Make sure the data is synchronized to the current cpu. 1137 */ 1138 void 1139 hammer2_chain_load_data(hammer2_chain_t *chain) 1140 { 1141 hammer2_blockref_t *bref; 1142 hammer2_dev_t *hmp; 1143 hammer2_io_t *dio; 1144 char *bdata; 1145 int error; 1146 1147 /* 1148 * Degenerate case, data already present, or chain has no media 1149 * reference to load. 1150 */ 1151 KKASSERT(chain->lock.mtx_lock & MTX_MASK); 1152 if (chain->data) { 1153 if (chain->dio) 1154 hammer2_io_bkvasync(chain->dio); 1155 return; 1156 } 1157 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 1158 return; 1159 1160 hmp = chain->hmp; 1161 KKASSERT(hmp != NULL); 1162 1163 /* 1164 * Gain the IOINPROG bit, interlocked block. 1165 */ 1166 for (;;) { 1167 u_int oflags; 1168 u_int nflags; 1169 1170 oflags = chain->flags; 1171 cpu_ccfence(); 1172 if (oflags & HAMMER2_CHAIN_IOINPROG) { 1173 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 1174 tsleep_interlock(&chain->flags, 0); 1175 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1176 tsleep(&chain->flags, PINTERLOCKED, 1177 "h2iocw", 0); 1178 } 1179 /* retry */ 1180 } else { 1181 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 1182 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1183 break; 1184 } 1185 /* retry */ 1186 } 1187 } 1188 1189 /* 1190 * We own CHAIN_IOINPROG 1191 * 1192 * Degenerate case if we raced another load. 1193 */ 1194 if (chain->data) { 1195 if (chain->dio) 1196 hammer2_io_bkvasync(chain->dio); 1197 goto done; 1198 } 1199 1200 /* 1201 * We must resolve to a device buffer, either by issuing I/O or 1202 * by creating a zero-fill element. We do not mark the buffer 1203 * dirty when creating a zero-fill element (the hammer2_chain_modify() 1204 * API must still be used to do that). 1205 * 1206 * The device buffer is variable-sized in powers of 2 down 1207 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 1208 * chunk always contains buffers of the same size. (XXX) 1209 * 1210 * The minimum physical IO size may be larger than the variable 1211 * block size. 1212 */ 1213 bref = &chain->bref; 1214 1215 /* 1216 * The getblk() optimization can only be used on newly created 1217 * elements if the physical block size matches the request. 1218 */ 1219 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1220 error = hammer2_io_new(hmp, bref->type, 1221 bref->data_off, chain->bytes, 1222 &chain->dio); 1223 } else { 1224 error = hammer2_io_bread(hmp, bref->type, 1225 bref->data_off, chain->bytes, 1226 &chain->dio); 1227 hammer2_adjreadcounter(&chain->bref, chain->bytes); 1228 } 1229 if (error) { 1230 chain->error = HAMMER2_ERROR_EIO; 1231 kprintf("hammer2_chain_lock: I/O error %016jx: %d\n", 1232 (intmax_t)bref->data_off, error); 1233 hammer2_io_bqrelse(&chain->dio); 1234 goto done; 1235 } 1236 chain->error = 0; 1237 1238 /* 1239 * This isn't perfect and can be ignored on OSs which do not have 1240 * an indication as to whether a buffer is coming from cache or 1241 * if I/O was actually issued for the read. TESTEDGOOD will work 1242 * pretty well without the B_IOISSUED logic because chains are 1243 * cached, but in that situation (without B_IOISSUED) it will not 1244 * detect whether a re-read via I/O is corrupted verses the original 1245 * read. 1246 * 1247 * We can't re-run the CRC on every fresh lock. That would be 1248 * insanely expensive. 1249 * 1250 * If the underlying kernel buffer covers the entire chain we can 1251 * use the B_IOISSUED indication to determine if we have to re-run 1252 * the CRC on chain data for chains that managed to stay cached 1253 * across the kernel disposal of the original buffer. 1254 */ 1255 if ((dio = chain->dio) != NULL && dio->bp) { 1256 struct buf *bp = dio->bp; 1257 1258 if (dio->psize == chain->bytes && 1259 (bp->b_flags & B_IOISSUED)) { 1260 atomic_clear_int(&chain->flags, 1261 HAMMER2_CHAIN_TESTEDGOOD); 1262 bp->b_flags &= ~B_IOISSUED; 1263 } 1264 } 1265 1266 /* 1267 * NOTE: A locked chain's data cannot be modified without first 1268 * calling hammer2_chain_modify(). 1269 */ 1270 1271 /* 1272 * Clear INITIAL. In this case we used io_new() and the buffer has 1273 * been zero'd and marked dirty. 1274 * 1275 * NOTE: hammer2_io_data() call issues bkvasync() 1276 */ 1277 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 1278 1279 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1280 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1281 chain->bref.flags |= HAMMER2_BREF_FLAG_ZERO; 1282 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 1283 /* 1284 * check data not currently synchronized due to 1285 * modification. XXX assumes data stays in the buffer 1286 * cache, which might not be true (need biodep on flush 1287 * to calculate crc? or simple crc?). 1288 */ 1289 } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) { 1290 if (hammer2_chain_testcheck(chain, bdata) == 0) { 1291 chain->error = HAMMER2_ERROR_CHECK; 1292 } else { 1293 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 1294 } 1295 } 1296 1297 /* 1298 * Setup the data pointer, either pointing it to an embedded data 1299 * structure and copying the data from the buffer, or pointing it 1300 * into the buffer. 1301 * 1302 * The buffer is not retained when copying to an embedded data 1303 * structure in order to avoid potential deadlocks or recursions 1304 * on the same physical buffer. 1305 * 1306 * WARNING! Other threads can start using the data the instant we 1307 * set chain->data non-NULL. 1308 */ 1309 switch (bref->type) { 1310 case HAMMER2_BREF_TYPE_VOLUME: 1311 case HAMMER2_BREF_TYPE_FREEMAP: 1312 /* 1313 * Copy data from bp to embedded buffer 1314 */ 1315 panic("hammer2_chain_load_data: unresolved volume header"); 1316 break; 1317 case HAMMER2_BREF_TYPE_DIRENT: 1318 KKASSERT(chain->bytes != 0); 1319 /* fall through */ 1320 case HAMMER2_BREF_TYPE_INODE: 1321 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1322 case HAMMER2_BREF_TYPE_INDIRECT: 1323 case HAMMER2_BREF_TYPE_DATA: 1324 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1325 default: 1326 /* 1327 * Point data at the device buffer and leave dio intact. 1328 */ 1329 chain->data = (void *)bdata; 1330 break; 1331 } 1332 1333 /* 1334 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 1335 */ 1336 done: 1337 for (;;) { 1338 u_int oflags; 1339 u_int nflags; 1340 1341 oflags = chain->flags; 1342 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 1343 HAMMER2_CHAIN_IOSIGNAL); 1344 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 1345 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1346 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 1347 wakeup(&chain->flags); 1348 break; 1349 } 1350 } 1351 } 1352 1353 /* 1354 * Unlock and deref a chain element. 1355 * 1356 * Remember that the presence of children under chain prevent the chain's 1357 * destruction but do not add additional references, so the dio will still 1358 * be dropped. 1359 */ 1360 void 1361 hammer2_chain_unlock(hammer2_chain_t *chain) 1362 { 1363 hammer2_io_t *dio; 1364 u_int lockcnt; 1365 int iter = 0; 1366 1367 --curthread->td_tracker; 1368 1369 /* 1370 * If multiple locks are present (or being attempted) on this 1371 * particular chain we can just unlock, drop refs, and return. 1372 * 1373 * Otherwise fall-through on the 1->0 transition. 1374 */ 1375 for (;;) { 1376 lockcnt = chain->lockcnt; 1377 KKASSERT(lockcnt > 0); 1378 cpu_ccfence(); 1379 if (lockcnt > 1) { 1380 if (atomic_cmpset_int(&chain->lockcnt, 1381 lockcnt, lockcnt - 1)) { 1382 hammer2_mtx_unlock(&chain->lock); 1383 return; 1384 } 1385 } else if (hammer2_mtx_upgrade_try(&chain->lock) == 0) { 1386 /* while holding the mutex exclusively */ 1387 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1388 break; 1389 } else { 1390 /* 1391 * This situation can easily occur on SMP due to 1392 * the gap inbetween the 1->0 transition and the 1393 * final unlock. We cannot safely block on the 1394 * mutex because lockcnt might go above 1. 1395 * 1396 * XXX Sleep for one tick if it takes too long. 1397 */ 1398 if (++iter > 1000) { 1399 if (iter > 1000 + hz) { 1400 kprintf("hammer2: h2race2 %p\n", chain); 1401 iter = 1000; 1402 } 1403 tsleep(&iter, 0, "h2race2", 1); 1404 } 1405 cpu_pause(); 1406 } 1407 /* retry */ 1408 } 1409 1410 /* 1411 * Last unlock / mutex upgraded to exclusive. Drop the data 1412 * reference. 1413 */ 1414 dio = hammer2_chain_drop_data(chain); 1415 if (dio) 1416 hammer2_io_bqrelse(&dio); 1417 hammer2_mtx_unlock(&chain->lock); 1418 } 1419 1420 /* 1421 * Unlock and hold chain data intact 1422 */ 1423 void 1424 hammer2_chain_unlock_hold(hammer2_chain_t *chain) 1425 { 1426 atomic_add_int(&chain->lockcnt, 1); 1427 hammer2_chain_unlock(chain); 1428 } 1429 1430 /* 1431 * Helper to obtain the blockref[] array base and count for a chain. 1432 * 1433 * XXX Not widely used yet, various use cases need to be validated and 1434 * converted to use this function. 1435 */ 1436 static 1437 hammer2_blockref_t * 1438 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1439 { 1440 hammer2_blockref_t *base; 1441 int count; 1442 1443 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1444 base = NULL; 1445 1446 switch(parent->bref.type) { 1447 case HAMMER2_BREF_TYPE_INODE: 1448 count = HAMMER2_SET_COUNT; 1449 break; 1450 case HAMMER2_BREF_TYPE_INDIRECT: 1451 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1452 count = parent->bytes / sizeof(hammer2_blockref_t); 1453 break; 1454 case HAMMER2_BREF_TYPE_VOLUME: 1455 count = HAMMER2_SET_COUNT; 1456 break; 1457 case HAMMER2_BREF_TYPE_FREEMAP: 1458 count = HAMMER2_SET_COUNT; 1459 break; 1460 default: 1461 panic("hammer2_chain_base_and_count: " 1462 "unrecognized blockref type: %d", 1463 parent->bref.type); 1464 count = 0; 1465 break; 1466 } 1467 } else { 1468 switch(parent->bref.type) { 1469 case HAMMER2_BREF_TYPE_INODE: 1470 base = &parent->data->ipdata.u.blockset.blockref[0]; 1471 count = HAMMER2_SET_COUNT; 1472 break; 1473 case HAMMER2_BREF_TYPE_INDIRECT: 1474 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1475 base = &parent->data->npdata[0]; 1476 count = parent->bytes / sizeof(hammer2_blockref_t); 1477 break; 1478 case HAMMER2_BREF_TYPE_VOLUME: 1479 base = &parent->data->voldata. 1480 sroot_blockset.blockref[0]; 1481 count = HAMMER2_SET_COUNT; 1482 break; 1483 case HAMMER2_BREF_TYPE_FREEMAP: 1484 base = &parent->data->blkset.blockref[0]; 1485 count = HAMMER2_SET_COUNT; 1486 break; 1487 default: 1488 panic("hammer2_chain_base_and_count: " 1489 "unrecognized blockref type: %d", 1490 parent->bref.type); 1491 count = 0; 1492 break; 1493 } 1494 } 1495 *countp = count; 1496 1497 return base; 1498 } 1499 1500 /* 1501 * This counts the number of live blockrefs in a block array and 1502 * also calculates the point at which all remaining blockrefs are empty. 1503 * This routine can only be called on a live chain. 1504 * 1505 * Caller holds the chain locked, but possibly with a shared lock. We 1506 * must use an exclusive spinlock to prevent corruption. 1507 * 1508 * NOTE: Flag is not set until after the count is complete, allowing 1509 * callers to test the flag without holding the spinlock. 1510 * 1511 * NOTE: If base is NULL the related chain is still in the INITIAL 1512 * state and there are no blockrefs to count. 1513 * 1514 * NOTE: live_count may already have some counts accumulated due to 1515 * creation and deletion and could even be initially negative. 1516 */ 1517 void 1518 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1519 hammer2_blockref_t *base, int count) 1520 { 1521 hammer2_spin_ex(&chain->core.spin); 1522 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1523 if (base) { 1524 while (--count >= 0) { 1525 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1526 break; 1527 } 1528 chain->core.live_zero = count + 1; 1529 while (count >= 0) { 1530 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1531 atomic_add_int(&chain->core.live_count, 1532 1); 1533 --count; 1534 } 1535 } else { 1536 chain->core.live_zero = 0; 1537 } 1538 /* else do not modify live_count */ 1539 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1540 } 1541 hammer2_spin_unex(&chain->core.spin); 1542 } 1543 1544 /* 1545 * Resize the chain's physical storage allocation in-place. This function does 1546 * not usually adjust the data pointer and must be followed by (typically) a 1547 * hammer2_chain_modify() call to copy any old data over and adjust the 1548 * data pointer. 1549 * 1550 * Chains can be resized smaller without reallocating the storage. Resizing 1551 * larger will reallocate the storage. Excess or prior storage is reclaimed 1552 * asynchronously at a later time. 1553 * 1554 * An nradix value of 0 is special-cased to mean that the storage should 1555 * be disassociated, that is the chain is being resized to 0 bytes (not 1 1556 * byte). 1557 * 1558 * Must be passed an exclusively locked parent and chain. 1559 * 1560 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1561 * to avoid instantiating a device buffer that conflicts with the vnode data 1562 * buffer. However, because H2 can compress or encrypt data, the chain may 1563 * have a dio assigned to it in those situations, and they do not conflict. 1564 * 1565 * XXX return error if cannot resize. 1566 */ 1567 int 1568 hammer2_chain_resize(hammer2_chain_t *chain, 1569 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1570 int nradix, int flags) 1571 { 1572 hammer2_dev_t *hmp; 1573 size_t obytes; 1574 size_t nbytes; 1575 int error; 1576 1577 hmp = chain->hmp; 1578 1579 /* 1580 * Only data and indirect blocks can be resized for now. 1581 * (The volu root, inodes, and freemap elements use a fixed size). 1582 */ 1583 KKASSERT(chain != &hmp->vchain); 1584 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1585 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1586 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1587 1588 /* 1589 * Nothing to do if the element is already the proper size 1590 */ 1591 obytes = chain->bytes; 1592 nbytes = (nradix) ? (1U << nradix) : 0; 1593 if (obytes == nbytes) 1594 return (chain->error); 1595 1596 /* 1597 * Make sure the old data is instantiated so we can copy it. If this 1598 * is a data block, the device data may be superfluous since the data 1599 * might be in a logical block, but compressed or encrypted data is 1600 * another matter. 1601 * 1602 * NOTE: The modify will set BMAPUPD for us if BMAPPED is set. 1603 */ 1604 error = hammer2_chain_modify(chain, mtid, dedup_off, 0); 1605 if (error) 1606 return error; 1607 1608 /* 1609 * Relocate the block, even if making it smaller (because different 1610 * block sizes may be in different regions). 1611 * 1612 * NOTE: Operation does not copy the data and may only be used 1613 * to resize data blocks in-place, or directory entry blocks 1614 * which are about to be modified in some manner. 1615 */ 1616 error = hammer2_freemap_alloc(chain, nbytes); 1617 if (error) 1618 return error; 1619 1620 chain->bytes = nbytes; 1621 1622 /* 1623 * We don't want the followup chain_modify() to try to copy data 1624 * from the old (wrong-sized) buffer. It won't know how much to 1625 * copy. This case should only occur during writes when the 1626 * originator already has the data to write in-hand. 1627 */ 1628 if (chain->dio) { 1629 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1630 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1631 hammer2_io_brelse(&chain->dio); 1632 chain->data = NULL; 1633 } 1634 return (chain->error); 1635 } 1636 1637 /* 1638 * Set the chain modified so its data can be changed by the caller, or 1639 * install deduplicated data. The caller must call this routine for each 1640 * set of modifications it makes, even if the chain is already flagged 1641 * MODIFIED. 1642 * 1643 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1644 * is a CLC (cluster level change) field and is not updated by parent 1645 * propagation during a flush. 1646 * 1647 * Returns an appropriate HAMMER2_ERROR_* code, which will generally reflect 1648 * chain->error except for HAMMER2_ERROR_ENOSPC. If the allocation fails 1649 * due to no space available, HAMMER2_ERROR_ENOSPC is returned and the chain 1650 * remains unmodified with its old data ref intact and chain->error 1651 * unchanged. 1652 * 1653 * Dedup Handling 1654 * 1655 * If the DEDUPABLE flag is set in the chain the storage must be reallocated 1656 * even if the chain is still flagged MODIFIED. In this case the chain's 1657 * DEDUPABLE flag will be cleared once the new storage has been assigned. 1658 * 1659 * If the caller passes a non-zero dedup_off we will use it to assign the 1660 * new storage. The MODIFIED flag will be *CLEARED* in this case, and 1661 * DEDUPABLE will be set (NOTE: the UPDATE flag is always set). The caller 1662 * must not modify the data content upon return. 1663 */ 1664 int 1665 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1666 hammer2_off_t dedup_off, int flags) 1667 { 1668 hammer2_blockref_t obref; 1669 hammer2_dev_t *hmp; 1670 hammer2_io_t *dio; 1671 int error; 1672 int wasinitial; 1673 int setmodified; 1674 int setupdate; 1675 int newmod; 1676 char *bdata; 1677 1678 hmp = chain->hmp; 1679 obref = chain->bref; 1680 KKASSERT((chain->flags & HAMMER2_CHAIN_FICTITIOUS) == 0); 1681 KKASSERT(chain->lock.mtx_lock & MTX_EXCLUSIVE); 1682 1683 /* 1684 * Data is not optional for freemap chains (we must always be sure 1685 * to copy the data on COW storage allocations). 1686 */ 1687 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1688 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1689 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1690 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1691 } 1692 1693 /* 1694 * Data must be resolved if already assigned, unless explicitly 1695 * flagged otherwise. If we cannot safety load the data the 1696 * modification fails and we return early. 1697 */ 1698 if (chain->data == NULL && chain->bytes != 0 && 1699 (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1700 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1701 hammer2_chain_load_data(chain); 1702 if (chain->error) 1703 return (chain->error); 1704 } 1705 error = 0; 1706 1707 /* 1708 * Set MODIFIED to indicate that the chain has been modified. A new 1709 * allocation is required when modifying a chain. 1710 * 1711 * Set UPDATE to ensure that the blockref is updated in the parent. 1712 * 1713 * If MODIFIED is already set determine if we can reuse the assigned 1714 * data block or if we need a new data block. 1715 */ 1716 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1717 /* 1718 * Must set modified bit. 1719 */ 1720 atomic_add_long(&hammer2_count_modified_chains, 1); 1721 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1722 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1723 setmodified = 1; 1724 1725 /* 1726 * We may be able to avoid a copy-on-write if the chain's 1727 * check mode is set to NONE and the chain's current 1728 * modify_tid is beyond the last explicit snapshot tid. 1729 * 1730 * This implements HAMMER2's overwrite-in-place feature. 1731 * 1732 * NOTE! This data-block cannot be used as a de-duplication 1733 * source when the check mode is set to NONE. 1734 */ 1735 if ((chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1736 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) && 1737 (chain->flags & HAMMER2_CHAIN_INITIAL) == 0 && 1738 (chain->flags & HAMMER2_CHAIN_DEDUPABLE) == 0 && 1739 HAMMER2_DEC_CHECK(chain->bref.methods) == 1740 HAMMER2_CHECK_NONE && 1741 chain->pmp && 1742 chain->bref.modify_tid > 1743 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1744 /* 1745 * Sector overwrite allowed. 1746 */ 1747 newmod = 0; 1748 } else { 1749 /* 1750 * Sector overwrite not allowed, must copy-on-write. 1751 */ 1752 newmod = 1; 1753 } 1754 } else if (chain->flags & HAMMER2_CHAIN_DEDUPABLE) { 1755 /* 1756 * If the modified chain was registered for dedup we need 1757 * a new allocation. This only happens for delayed-flush 1758 * chains (i.e. which run through the front-end buffer 1759 * cache). 1760 */ 1761 newmod = 1; 1762 setmodified = 0; 1763 } else { 1764 /* 1765 * Already flagged modified, no new allocation is needed. 1766 */ 1767 newmod = 0; 1768 setmodified = 0; 1769 } 1770 1771 /* 1772 * Flag parent update required. 1773 */ 1774 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 1775 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1776 setupdate = 1; 1777 } else { 1778 setupdate = 0; 1779 } 1780 1781 /* 1782 * The XOP code returns held but unlocked focus chains. This 1783 * prevents the chain from being destroyed but does not prevent 1784 * it from being modified. diolk is used to interlock modifications 1785 * against XOP frontend accesses to the focus. 1786 * 1787 * This allows us to theoretically avoid deadlocking the frontend 1788 * if one of the backends lock up by not formally locking the 1789 * focused chain in the frontend. In addition, the synchronization 1790 * code relies on this mechanism to avoid deadlocking concurrent 1791 * synchronization threads. 1792 */ 1793 lockmgr(&chain->diolk, LK_EXCLUSIVE); 1794 1795 /* 1796 * The modification or re-modification requires an allocation and 1797 * possible COW. If an error occurs, the previous content and data 1798 * reference is retained and the modification fails. 1799 * 1800 * If dedup_off is non-zero, the caller is requesting a deduplication 1801 * rather than a modification. The MODIFIED bit is not set and the 1802 * data offset is set to the deduplication offset. The data cannot 1803 * be modified. 1804 * 1805 * NOTE: The dedup offset is allowed to be in a partially free state 1806 * and we must be sure to reset it to a fully allocated state 1807 * to force two bulkfree passes to free it again. 1808 * 1809 * NOTE: Only applicable when chain->bytes != 0. 1810 * 1811 * XXX can a chain already be marked MODIFIED without a data 1812 * assignment? If not, assert here instead of testing the case. 1813 */ 1814 if (chain != &hmp->vchain && chain != &hmp->fchain && 1815 chain->bytes) { 1816 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1817 newmod 1818 ) { 1819 /* 1820 * NOTE: We do not have to remove the dedup 1821 * registration because the area is still 1822 * allocated and the underlying DIO will 1823 * still be flushed. 1824 */ 1825 if (dedup_off) { 1826 chain->bref.data_off = dedup_off; 1827 chain->bytes = 1 << (dedup_off & 1828 HAMMER2_OFF_MASK_RADIX); 1829 chain->error = 0; 1830 atomic_clear_int(&chain->flags, 1831 HAMMER2_CHAIN_MODIFIED); 1832 atomic_add_long(&hammer2_count_modified_chains, 1833 -1); 1834 if (chain->pmp) 1835 hammer2_pfs_memory_wakeup(chain->pmp); 1836 hammer2_freemap_adjust(hmp, &chain->bref, 1837 HAMMER2_FREEMAP_DORECOVER); 1838 atomic_set_int(&chain->flags, 1839 HAMMER2_CHAIN_DEDUPABLE); 1840 } else { 1841 error = hammer2_freemap_alloc(chain, 1842 chain->bytes); 1843 atomic_clear_int(&chain->flags, 1844 HAMMER2_CHAIN_DEDUPABLE); 1845 } 1846 } 1847 } 1848 1849 /* 1850 * Stop here if error. We have to undo any flag bits we might 1851 * have set above. 1852 */ 1853 if (error) { 1854 if (setmodified) { 1855 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1856 atomic_add_long(&hammer2_count_modified_chains, -1); 1857 if (chain->pmp) 1858 hammer2_pfs_memory_wakeup(chain->pmp); 1859 } 1860 if (setupdate) { 1861 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1862 } 1863 lockmgr(&chain->diolk, LK_RELEASE); 1864 1865 return error; 1866 } 1867 1868 /* 1869 * Update mirror_tid and modify_tid. modify_tid is only updated 1870 * if not passed as zero (during flushes, parent propagation passes 1871 * the value 0). 1872 * 1873 * NOTE: chain->pmp could be the device spmp. 1874 */ 1875 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1876 if (mtid) 1877 chain->bref.modify_tid = mtid; 1878 1879 /* 1880 * Set BMAPUPD to tell the flush code that an existing blockmap entry 1881 * requires updating as well as to tell the delete code that the 1882 * chain's blockref might not exactly match (in terms of physical size 1883 * or block offset) the one in the parent's blocktable. The base key 1884 * of course will still match. 1885 */ 1886 if (chain->flags & HAMMER2_CHAIN_BMAPPED) 1887 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD); 1888 1889 /* 1890 * Short-cut data blocks which the caller does not need an actual 1891 * data reference to (aka OPTDATA), as long as the chain does not 1892 * already have a data pointer to the data. This generally means 1893 * that the modifications are being done via the logical buffer cache. 1894 * The INITIAL flag relates only to the device data buffer and thus 1895 * remains unchange in this situation. 1896 * 1897 * This code also handles bytes == 0 (most dirents). 1898 */ 1899 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1900 (flags & HAMMER2_MODIFY_OPTDATA) && 1901 chain->data == NULL) { 1902 KKASSERT(chain->dio == NULL); 1903 goto skip2; 1904 } 1905 1906 /* 1907 * Clearing the INITIAL flag (for indirect blocks) indicates that 1908 * we've processed the uninitialized storage allocation. 1909 * 1910 * If this flag is already clear we are likely in a copy-on-write 1911 * situation but we have to be sure NOT to bzero the storage if 1912 * no data is present. 1913 */ 1914 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1915 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1916 wasinitial = 1; 1917 } else { 1918 wasinitial = 0; 1919 } 1920 1921 /* 1922 * Instantiate data buffer and possibly execute COW operation 1923 */ 1924 switch(chain->bref.type) { 1925 case HAMMER2_BREF_TYPE_VOLUME: 1926 case HAMMER2_BREF_TYPE_FREEMAP: 1927 /* 1928 * The data is embedded, no copy-on-write operation is 1929 * needed. 1930 */ 1931 KKASSERT(chain->dio == NULL); 1932 break; 1933 case HAMMER2_BREF_TYPE_DIRENT: 1934 /* 1935 * The data might be fully embedded. 1936 */ 1937 if (chain->bytes == 0) { 1938 KKASSERT(chain->dio == NULL); 1939 break; 1940 } 1941 /* fall through */ 1942 case HAMMER2_BREF_TYPE_INODE: 1943 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1944 case HAMMER2_BREF_TYPE_DATA: 1945 case HAMMER2_BREF_TYPE_INDIRECT: 1946 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1947 /* 1948 * Perform the copy-on-write operation 1949 * 1950 * zero-fill or copy-on-write depending on whether 1951 * chain->data exists or not and set the dirty state for 1952 * the new buffer. hammer2_io_new() will handle the 1953 * zero-fill. 1954 * 1955 * If a dedup_off was supplied this is an existing block 1956 * and no COW, copy, or further modification is required. 1957 */ 1958 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1959 1960 if (wasinitial && dedup_off == 0) { 1961 error = hammer2_io_new(hmp, chain->bref.type, 1962 chain->bref.data_off, 1963 chain->bytes, &dio); 1964 } else { 1965 error = hammer2_io_bread(hmp, chain->bref.type, 1966 chain->bref.data_off, 1967 chain->bytes, &dio); 1968 } 1969 hammer2_adjreadcounter(&chain->bref, chain->bytes); 1970 1971 /* 1972 * If an I/O error occurs make sure callers cannot accidently 1973 * modify the old buffer's contents and corrupt the filesystem. 1974 * 1975 * NOTE: hammer2_io_data() call issues bkvasync() 1976 */ 1977 if (error) { 1978 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 1979 hmp); 1980 chain->error = HAMMER2_ERROR_EIO; 1981 hammer2_io_brelse(&dio); 1982 hammer2_io_brelse(&chain->dio); 1983 chain->data = NULL; 1984 break; 1985 } 1986 chain->error = 0; 1987 bdata = hammer2_io_data(dio, chain->bref.data_off); 1988 1989 if (chain->data) { 1990 /* 1991 * COW (unless a dedup). 1992 */ 1993 KKASSERT(chain->dio != NULL); 1994 if (chain->data != (void *)bdata && dedup_off == 0) { 1995 bcopy(chain->data, bdata, chain->bytes); 1996 } 1997 } else if (wasinitial == 0) { 1998 /* 1999 * We have a problem. We were asked to COW but 2000 * we don't have any data to COW with! 2001 */ 2002 panic("hammer2_chain_modify: having a COW %p\n", 2003 chain); 2004 } 2005 2006 /* 2007 * Retire the old buffer, replace with the new. Dirty or 2008 * redirty the new buffer. 2009 * 2010 * WARNING! The system buffer cache may have already flushed 2011 * the buffer, so we must be sure to [re]dirty it 2012 * for further modification. 2013 * 2014 * If dedup_off was supplied, the caller is not 2015 * expected to make any further modification to the 2016 * buffer. 2017 * 2018 * WARNING! hammer2_get_gdata() assumes dio never transitions 2019 * through NULL in order to optimize away unnecessary 2020 * diolk operations. 2021 */ 2022 { 2023 hammer2_io_t *tio; 2024 2025 if ((tio = chain->dio) != NULL) 2026 hammer2_io_bqrelse(&tio); 2027 chain->data = (void *)bdata; 2028 chain->dio = dio; 2029 if (dedup_off == 0) 2030 hammer2_io_setdirty(dio); 2031 } 2032 break; 2033 default: 2034 panic("hammer2_chain_modify: illegal non-embedded type %d", 2035 chain->bref.type); 2036 break; 2037 2038 } 2039 skip2: 2040 /* 2041 * setflush on parent indicating that the parent must recurse down 2042 * to us. Do not call on chain itself which might already have it 2043 * set. 2044 */ 2045 if (chain->parent) 2046 hammer2_chain_setflush(chain->parent); 2047 lockmgr(&chain->diolk, LK_RELEASE); 2048 2049 return (chain->error); 2050 } 2051 2052 /* 2053 * Modify the chain associated with an inode. 2054 */ 2055 int 2056 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 2057 hammer2_tid_t mtid, int flags) 2058 { 2059 int error; 2060 2061 hammer2_inode_modify(ip); 2062 error = hammer2_chain_modify(chain, mtid, 0, flags); 2063 2064 return error; 2065 } 2066 2067 /* 2068 * Volume header data locks 2069 */ 2070 void 2071 hammer2_voldata_lock(hammer2_dev_t *hmp) 2072 { 2073 lockmgr(&hmp->vollk, LK_EXCLUSIVE); 2074 } 2075 2076 void 2077 hammer2_voldata_unlock(hammer2_dev_t *hmp) 2078 { 2079 lockmgr(&hmp->vollk, LK_RELEASE); 2080 } 2081 2082 void 2083 hammer2_voldata_modify(hammer2_dev_t *hmp) 2084 { 2085 if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) == 0) { 2086 atomic_add_long(&hammer2_count_modified_chains, 1); 2087 atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED); 2088 hammer2_pfs_memory_inc(hmp->vchain.pmp); 2089 } 2090 } 2091 2092 /* 2093 * This function returns the chain at the nearest key within the specified 2094 * range. The returned chain will be referenced but not locked. 2095 * 2096 * This function will recurse through chain->rbtree as necessary and will 2097 * return a *key_nextp suitable for iteration. *key_nextp is only set if 2098 * the iteration value is less than the current value of *key_nextp. 2099 * 2100 * The caller should use (*key_nextp) to calculate the actual range of 2101 * the returned element, which will be (key_beg to *key_nextp - 1), because 2102 * there might be another element which is superior to the returned element 2103 * and overlaps it. 2104 * 2105 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 2106 * chains continue to be returned. On EOF (*key_nextp) may overflow since 2107 * it will wind up being (key_end + 1). 2108 * 2109 * WARNING! Must be called with child's spinlock held. Spinlock remains 2110 * held through the operation. 2111 */ 2112 struct hammer2_chain_find_info { 2113 hammer2_chain_t *best; 2114 hammer2_key_t key_beg; 2115 hammer2_key_t key_end; 2116 hammer2_key_t key_next; 2117 }; 2118 2119 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 2120 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 2121 2122 static 2123 hammer2_chain_t * 2124 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 2125 hammer2_key_t key_beg, hammer2_key_t key_end) 2126 { 2127 struct hammer2_chain_find_info info; 2128 2129 info.best = NULL; 2130 info.key_beg = key_beg; 2131 info.key_end = key_end; 2132 info.key_next = *key_nextp; 2133 2134 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 2135 hammer2_chain_find_cmp, hammer2_chain_find_callback, 2136 &info); 2137 *key_nextp = info.key_next; 2138 #if 0 2139 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 2140 parent, key_beg, key_end, *key_nextp); 2141 #endif 2142 2143 return (info.best); 2144 } 2145 2146 static 2147 int 2148 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 2149 { 2150 struct hammer2_chain_find_info *info = data; 2151 hammer2_key_t child_beg; 2152 hammer2_key_t child_end; 2153 2154 child_beg = child->bref.key; 2155 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 2156 2157 if (child_end < info->key_beg) 2158 return(-1); 2159 if (child_beg > info->key_end) 2160 return(1); 2161 return(0); 2162 } 2163 2164 static 2165 int 2166 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 2167 { 2168 struct hammer2_chain_find_info *info = data; 2169 hammer2_chain_t *best; 2170 hammer2_key_t child_end; 2171 2172 /* 2173 * WARNING! Layerq is scanned forwards, exact matches should keep 2174 * the existing info->best. 2175 */ 2176 if ((best = info->best) == NULL) { 2177 /* 2178 * No previous best. Assign best 2179 */ 2180 info->best = child; 2181 } else if (best->bref.key <= info->key_beg && 2182 child->bref.key <= info->key_beg) { 2183 /* 2184 * Illegal overlap. 2185 */ 2186 KKASSERT(0); 2187 /*info->best = child;*/ 2188 } else if (child->bref.key < best->bref.key) { 2189 /* 2190 * Child has a nearer key and best is not flush with key_beg. 2191 * Set best to child. Truncate key_next to the old best key. 2192 */ 2193 info->best = child; 2194 if (info->key_next > best->bref.key || info->key_next == 0) 2195 info->key_next = best->bref.key; 2196 } else if (child->bref.key == best->bref.key) { 2197 /* 2198 * If our current best is flush with the child then this 2199 * is an illegal overlap. 2200 * 2201 * key_next will automatically be limited to the smaller of 2202 * the two end-points. 2203 */ 2204 KKASSERT(0); 2205 info->best = child; 2206 } else { 2207 /* 2208 * Keep the current best but truncate key_next to the child's 2209 * base. 2210 * 2211 * key_next will also automatically be limited to the smaller 2212 * of the two end-points (probably not necessary for this case 2213 * but we do it anyway). 2214 */ 2215 if (info->key_next > child->bref.key || info->key_next == 0) 2216 info->key_next = child->bref.key; 2217 } 2218 2219 /* 2220 * Always truncate key_next based on child's end-of-range. 2221 */ 2222 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 2223 if (child_end && (info->key_next > child_end || info->key_next == 0)) 2224 info->key_next = child_end; 2225 2226 return(0); 2227 } 2228 2229 /* 2230 * Retrieve the specified chain from a media blockref, creating the 2231 * in-memory chain structure which reflects it. The returned chain is 2232 * held and locked according to (how) (HAMMER2_RESOLVE_*). The caller must 2233 * handle crc-checks and so forth, and should check chain->error before 2234 * assuming that the data is good. 2235 * 2236 * To handle insertion races pass the INSERT_RACE flag along with the 2237 * generation number of the core. NULL will be returned if the generation 2238 * number changes before we have a chance to insert the chain. Insert 2239 * races can occur because the parent might be held shared. 2240 * 2241 * Caller must hold the parent locked shared or exclusive since we may 2242 * need the parent's bref array to find our block. 2243 * 2244 * WARNING! chain->pmp is always set to NULL for any chain representing 2245 * part of the super-root topology. 2246 */ 2247 hammer2_chain_t * 2248 hammer2_chain_get(hammer2_chain_t *parent, int generation, 2249 hammer2_blockref_t *bref, int how) 2250 { 2251 hammer2_dev_t *hmp = parent->hmp; 2252 hammer2_chain_t *chain; 2253 int error; 2254 2255 /* 2256 * Allocate a chain structure representing the existing media 2257 * entry. Resulting chain has one ref and is not locked. 2258 */ 2259 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 2260 chain = hammer2_chain_alloc(hmp, NULL, bref); 2261 else 2262 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 2263 /* ref'd chain returned */ 2264 2265 /* 2266 * Flag that the chain is in the parent's blockmap so delete/flush 2267 * knows what to do with it. 2268 */ 2269 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 2270 2271 /* 2272 * chain must be locked to avoid unexpected ripouts 2273 */ 2274 hammer2_chain_lock(chain, how); 2275 2276 /* 2277 * Link the chain into its parent. A spinlock is required to safely 2278 * access the RBTREE, and it is possible to collide with another 2279 * hammer2_chain_get() operation because the caller might only hold 2280 * a shared lock on the parent. 2281 * 2282 * NOTE: Get races can occur quite often when we distribute 2283 * asynchronous read-aheads across multiple threads. 2284 */ 2285 KKASSERT(parent->refs > 0); 2286 error = hammer2_chain_insert(parent, chain, 2287 HAMMER2_CHAIN_INSERT_SPIN | 2288 HAMMER2_CHAIN_INSERT_RACE, 2289 generation); 2290 if (error) { 2291 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 2292 /*kprintf("chain %p get race\n", chain);*/ 2293 hammer2_chain_unlock(chain); 2294 hammer2_chain_drop(chain); 2295 chain = NULL; 2296 } else { 2297 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2298 } 2299 2300 /* 2301 * Return our new chain referenced but not locked, or NULL if 2302 * a race occurred. 2303 */ 2304 return (chain); 2305 } 2306 2307 /* 2308 * Lookup initialization/completion API 2309 */ 2310 hammer2_chain_t * 2311 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 2312 { 2313 hammer2_chain_ref(parent); 2314 if (flags & HAMMER2_LOOKUP_SHARED) { 2315 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 2316 HAMMER2_RESOLVE_SHARED); 2317 } else { 2318 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2319 } 2320 return (parent); 2321 } 2322 2323 void 2324 hammer2_chain_lookup_done(hammer2_chain_t *parent) 2325 { 2326 if (parent) { 2327 hammer2_chain_unlock(parent); 2328 hammer2_chain_drop(parent); 2329 } 2330 } 2331 2332 /* 2333 * Take the locked chain and return a locked parent. The chain remains 2334 * locked on return, but may have to be temporarily unlocked to acquire 2335 * the parent. Because of this, (chain) must be stable and cannot be 2336 * deleted while it was temporarily unlocked (typically means that (chain) 2337 * is an inode). 2338 * 2339 * Pass HAMMER2_RESOLVE_* flags in flags. 2340 * 2341 * This will work even if the chain is errored, and the caller can check 2342 * parent->error on return if desired since the parent will be locked. 2343 * 2344 * This function handles the lock order reversal. 2345 */ 2346 hammer2_chain_t * 2347 hammer2_chain_getparent(hammer2_chain_t *chain, int flags) 2348 { 2349 hammer2_chain_t *parent; 2350 2351 /* 2352 * Be careful of order, chain must be unlocked before parent 2353 * is locked below to avoid a deadlock. Try it trivially first. 2354 */ 2355 parent = chain->parent; 2356 if (parent == NULL) 2357 panic("hammer2_chain_getparent: no parent"); 2358 hammer2_chain_ref(parent); 2359 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) 2360 return parent; 2361 2362 for (;;) { 2363 hammer2_chain_unlock(chain); 2364 hammer2_chain_lock(parent, flags); 2365 hammer2_chain_lock(chain, flags); 2366 2367 /* 2368 * Parent relinking races are quite common. We have to get 2369 * it right or we will blow up the block table. 2370 */ 2371 if (chain->parent == parent) 2372 break; 2373 hammer2_chain_unlock(parent); 2374 hammer2_chain_drop(parent); 2375 cpu_ccfence(); 2376 parent = chain->parent; 2377 if (parent == NULL) 2378 panic("hammer2_chain_getparent: no parent"); 2379 hammer2_chain_ref(parent); 2380 } 2381 return parent; 2382 } 2383 2384 /* 2385 * Take the locked chain and return a locked parent. The chain is unlocked 2386 * and dropped. *chainp is set to the returned parent as a convenience. 2387 * Pass HAMMER2_RESOLVE_* flags in flags. 2388 * 2389 * This will work even if the chain is errored, and the caller can check 2390 * parent->error on return if desired since the parent will be locked. 2391 * 2392 * The chain does NOT need to be stable. We use a tracking structure 2393 * to track the expected parent if the chain is deleted out from under us. 2394 * 2395 * This function handles the lock order reversal. 2396 */ 2397 hammer2_chain_t * 2398 hammer2_chain_repparent(hammer2_chain_t **chainp, int flags) 2399 { 2400 hammer2_chain_t *chain; 2401 hammer2_chain_t *parent; 2402 struct hammer2_reptrack reptrack; 2403 struct hammer2_reptrack **repp; 2404 2405 /* 2406 * Be careful of order, chain must be unlocked before parent 2407 * is locked below to avoid a deadlock. Try it trivially first. 2408 */ 2409 chain = *chainp; 2410 parent = chain->parent; 2411 if (parent == NULL) { 2412 hammer2_spin_unex(&chain->core.spin); 2413 panic("hammer2_chain_repparent: no parent"); 2414 } 2415 hammer2_chain_ref(parent); 2416 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) { 2417 hammer2_chain_unlock(chain); 2418 hammer2_chain_drop(chain); 2419 *chainp = parent; 2420 2421 return parent; 2422 } 2423 2424 /* 2425 * Ok, now it gets a bit nasty. There are multiple situations where 2426 * the parent might be in the middle of a deletion, or where the child 2427 * (chain) might be deleted the instant we let go of its lock. 2428 * We can potentially end up in a no-win situation! 2429 * 2430 * In particular, the indirect_maintenance() case can cause these 2431 * situations. 2432 * 2433 * To deal with this we install a reptrack structure in the parent 2434 * This reptrack structure 'owns' the parent ref and will automatically 2435 * migrate to the parent's parent if the parent is deleted permanently. 2436 */ 2437 hammer2_spin_init(&reptrack.spin, "h2reptrk"); 2438 reptrack.chain = parent; 2439 hammer2_chain_ref(parent); /* for the reptrack */ 2440 2441 hammer2_spin_ex(&parent->core.spin); 2442 reptrack.next = parent->core.reptrack; 2443 parent->core.reptrack = &reptrack; 2444 hammer2_spin_unex(&parent->core.spin); 2445 2446 hammer2_chain_unlock(chain); 2447 hammer2_chain_drop(chain); 2448 chain = NULL; /* gone */ 2449 2450 /* 2451 * At the top of this loop, chain is gone and parent is refd both 2452 * by us explicitly AND via our reptrack. We are attempting to 2453 * lock parent. 2454 */ 2455 for (;;) { 2456 hammer2_chain_lock(parent, flags); 2457 2458 if (reptrack.chain == parent) 2459 break; 2460 hammer2_chain_unlock(parent); 2461 hammer2_chain_drop(parent); 2462 2463 kprintf("hammer2: debug REPTRACK %p->%p\n", 2464 parent, reptrack.chain); 2465 hammer2_spin_ex(&reptrack.spin); 2466 parent = reptrack.chain; 2467 hammer2_chain_ref(parent); 2468 hammer2_spin_unex(&reptrack.spin); 2469 } 2470 2471 /* 2472 * Once parent is locked and matches our reptrack, our reptrack 2473 * will be stable and we have our parent. We can unlink our 2474 * reptrack. 2475 * 2476 * WARNING! Remember that the chain lock might be shared. Chains 2477 * locked shared have stable parent linkages. 2478 */ 2479 hammer2_spin_ex(&parent->core.spin); 2480 repp = &parent->core.reptrack; 2481 while (*repp != &reptrack) 2482 repp = &(*repp)->next; 2483 *repp = reptrack.next; 2484 hammer2_spin_unex(&parent->core.spin); 2485 2486 hammer2_chain_drop(parent); /* reptrack ref */ 2487 *chainp = parent; /* return parent lock+ref */ 2488 2489 return parent; 2490 } 2491 2492 /* 2493 * Dispose of any linked reptrack structures in (chain) by shifting them to 2494 * (parent). Both (chain) and (parent) must be exclusively locked. 2495 * 2496 * This is interlocked against any children of (chain) on the other side. 2497 * No children so remain as-of when this is called so we can test 2498 * core.reptrack without holding the spin-lock. 2499 * 2500 * Used whenever the caller intends to permanently delete chains related 2501 * to topological recursions (BREF_TYPE_INDIRECT, BREF_TYPE_FREEMAP_NODE), 2502 * where the chains underneath the node being deleted are given a new parent 2503 * above the node being deleted. 2504 */ 2505 static 2506 void 2507 hammer2_chain_repchange(hammer2_chain_t *parent, hammer2_chain_t *chain) 2508 { 2509 struct hammer2_reptrack *reptrack; 2510 2511 KKASSERT(chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)); 2512 while (chain->core.reptrack) { 2513 hammer2_spin_ex(&parent->core.spin); 2514 hammer2_spin_ex(&chain->core.spin); 2515 reptrack = chain->core.reptrack; 2516 if (reptrack == NULL) { 2517 hammer2_spin_unex(&chain->core.spin); 2518 hammer2_spin_unex(&parent->core.spin); 2519 break; 2520 } 2521 hammer2_spin_ex(&reptrack->spin); 2522 chain->core.reptrack = reptrack->next; 2523 reptrack->chain = parent; 2524 reptrack->next = parent->core.reptrack; 2525 parent->core.reptrack = reptrack; 2526 hammer2_chain_ref(parent); /* reptrack */ 2527 2528 hammer2_spin_unex(&chain->core.spin); 2529 hammer2_spin_unex(&parent->core.spin); 2530 kprintf("hammer2: debug repchange %p %p->%p\n", 2531 reptrack, chain, parent); 2532 hammer2_chain_drop(chain); /* reptrack */ 2533 } 2534 } 2535 2536 /* 2537 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 2538 * (*parentp) typically points to an inode but can also point to a related 2539 * indirect block and this function will recurse upwards and find the inode 2540 * or the nearest undeleted indirect block covering the key range. 2541 * 2542 * This function unconditionally sets *errorp, replacing any previous value. 2543 * 2544 * (*parentp) must be exclusive or shared locked (depending on flags) and 2545 * referenced and can be an inode or an existing indirect block within the 2546 * inode. 2547 * 2548 * If (*parent) is errored out, this function will not attempt to recurse 2549 * the radix tree and will return NULL along with an appropriate *errorp. 2550 * If NULL is returned and *errorp is 0, the requested lookup could not be 2551 * located. 2552 * 2553 * On return (*parentp) will be modified to point at the deepest parent chain 2554 * element encountered during the search, as a helper for an insertion or 2555 * deletion. 2556 * 2557 * The new (*parentp) will be locked shared or exclusive (depending on flags), 2558 * and referenced, and the old will be unlocked and dereferenced (no change 2559 * if they are both the same). This is particularly important if the caller 2560 * wishes to insert a new chain, (*parentp) will be set properly even if NULL 2561 * is returned, as long as no error occurred. 2562 * 2563 * The matching chain will be returned locked according to flags. 2564 * 2565 * -- 2566 * 2567 * NULL is returned if no match was found, but (*parentp) will still 2568 * potentially be adjusted. 2569 * 2570 * On return (*key_nextp) will point to an iterative value for key_beg. 2571 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 2572 * 2573 * This function will also recurse up the chain if the key is not within the 2574 * current parent's range. (*parentp) can never be set to NULL. An iteration 2575 * can simply allow (*parentp) to float inside the loop. 2576 * 2577 * NOTE! chain->data is not always resolved. By default it will not be 2578 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 2579 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 2580 * BREF_TYPE_DATA as the device buffer can alias the logical file 2581 * buffer). 2582 */ 2583 2584 hammer2_chain_t * 2585 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 2586 hammer2_key_t key_beg, hammer2_key_t key_end, 2587 int *errorp, int flags) 2588 { 2589 hammer2_dev_t *hmp; 2590 hammer2_chain_t *parent; 2591 hammer2_chain_t *chain; 2592 hammer2_blockref_t *base; 2593 hammer2_blockref_t *bref; 2594 hammer2_blockref_t bcopy; 2595 hammer2_key_t scan_beg; 2596 hammer2_key_t scan_end; 2597 int count = 0; 2598 int how_always = HAMMER2_RESOLVE_ALWAYS; 2599 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2600 int how; 2601 int generation; 2602 int maxloops = 300000; 2603 volatile hammer2_mtx_t save_mtx; 2604 2605 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2606 how_maybe = how_always; 2607 how = HAMMER2_RESOLVE_ALWAYS; 2608 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2609 how = HAMMER2_RESOLVE_NEVER; 2610 } else { 2611 how = HAMMER2_RESOLVE_MAYBE; 2612 } 2613 if (flags & HAMMER2_LOOKUP_SHARED) { 2614 how_maybe |= HAMMER2_RESOLVE_SHARED; 2615 how_always |= HAMMER2_RESOLVE_SHARED; 2616 how |= HAMMER2_RESOLVE_SHARED; 2617 } 2618 2619 /* 2620 * Recurse (*parentp) upward if necessary until the parent completely 2621 * encloses the key range or we hit the inode. 2622 * 2623 * Handle races against the flusher deleting indirect nodes on its 2624 * way back up by continuing to recurse upward past the deletion. 2625 */ 2626 parent = *parentp; 2627 hmp = parent->hmp; 2628 *errorp = 0; 2629 2630 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2631 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2632 scan_beg = parent->bref.key; 2633 scan_end = scan_beg + 2634 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2635 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) { 2636 if (key_beg >= scan_beg && key_end <= scan_end) 2637 break; 2638 } 2639 parent = hammer2_chain_repparent(parentp, how_maybe); 2640 } 2641 again: 2642 if (--maxloops == 0) 2643 panic("hammer2_chain_lookup: maxloops"); 2644 /* 2645 * Locate the blockref array. Currently we do a fully associative 2646 * search through the array. 2647 */ 2648 switch(parent->bref.type) { 2649 case HAMMER2_BREF_TYPE_INODE: 2650 /* 2651 * Special shortcut for embedded data returns the inode 2652 * itself. Callers must detect this condition and access 2653 * the embedded data (the strategy code does this for us). 2654 * 2655 * This is only applicable to regular files and softlinks. 2656 * 2657 * We need a second lock on parent. Since we already have 2658 * a lock we must pass LOCKAGAIN to prevent unexpected 2659 * blocking (we don't want to block on a second shared 2660 * ref if an exclusive lock is pending) 2661 */ 2662 if (parent->data->ipdata.meta.op_flags & 2663 HAMMER2_OPFLAG_DIRECTDATA) { 2664 if (flags & HAMMER2_LOOKUP_NODIRECT) { 2665 chain = NULL; 2666 *key_nextp = key_end + 1; 2667 goto done; 2668 } 2669 hammer2_chain_ref(parent); 2670 hammer2_chain_lock(parent, how_always | 2671 HAMMER2_RESOLVE_LOCKAGAIN); 2672 *key_nextp = key_end + 1; 2673 return (parent); 2674 } 2675 base = &parent->data->ipdata.u.blockset.blockref[0]; 2676 count = HAMMER2_SET_COUNT; 2677 break; 2678 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2679 case HAMMER2_BREF_TYPE_INDIRECT: 2680 /* 2681 * Handle MATCHIND on the parent 2682 */ 2683 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2684 scan_beg = parent->bref.key; 2685 scan_end = scan_beg + 2686 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2687 if (key_beg == scan_beg && key_end == scan_end) { 2688 chain = parent; 2689 hammer2_chain_ref(chain); 2690 hammer2_chain_lock(chain, how_maybe); 2691 *key_nextp = scan_end + 1; 2692 goto done; 2693 } 2694 } 2695 2696 /* 2697 * Optimize indirect blocks in the INITIAL state to avoid 2698 * I/O. 2699 */ 2700 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2701 base = NULL; 2702 } else { 2703 if (parent->data == NULL) { 2704 kprintf("parent->data is NULL %p\n", parent); 2705 while (1) 2706 tsleep(parent, 0, "xxx", 0); 2707 } 2708 base = &parent->data->npdata[0]; 2709 } 2710 count = parent->bytes / sizeof(hammer2_blockref_t); 2711 break; 2712 case HAMMER2_BREF_TYPE_VOLUME: 2713 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2714 count = HAMMER2_SET_COUNT; 2715 break; 2716 case HAMMER2_BREF_TYPE_FREEMAP: 2717 base = &parent->data->blkset.blockref[0]; 2718 count = HAMMER2_SET_COUNT; 2719 break; 2720 default: 2721 kprintf("hammer2_chain_lookup: unrecognized " 2722 "blockref(B) type: %d", 2723 parent->bref.type); 2724 while (1) 2725 tsleep(&base, 0, "dead", 0); 2726 panic("hammer2_chain_lookup: unrecognized " 2727 "blockref(B) type: %d", 2728 parent->bref.type); 2729 base = NULL; /* safety */ 2730 count = 0; /* safety */ 2731 } 2732 2733 /* 2734 * No lookup is possible if the parent is errored. We delayed 2735 * this check as long as we could to ensure that the parent backup, 2736 * embedded data, and MATCHIND code could still execute. 2737 */ 2738 if (parent->error) { 2739 *errorp = parent->error; 2740 return NULL; 2741 } 2742 2743 /* 2744 * Merged scan to find next candidate. 2745 * 2746 * hammer2_base_*() functions require the parent->core.live_* fields 2747 * to be synchronized. 2748 * 2749 * We need to hold the spinlock to access the block array and RB tree 2750 * and to interlock chain creation. 2751 */ 2752 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2753 hammer2_chain_countbrefs(parent, base, count); 2754 2755 /* 2756 * Combined search 2757 */ 2758 hammer2_spin_ex(&parent->core.spin); 2759 chain = hammer2_combined_find(parent, base, count, 2760 key_nextp, 2761 key_beg, key_end, 2762 &bref); 2763 generation = parent->core.generation; 2764 2765 /* 2766 * Exhausted parent chain, iterate. 2767 */ 2768 if (bref == NULL) { 2769 KKASSERT(chain == NULL); 2770 hammer2_spin_unex(&parent->core.spin); 2771 if (key_beg == key_end) /* short cut single-key case */ 2772 return (NULL); 2773 2774 /* 2775 * Stop if we reached the end of the iteration. 2776 */ 2777 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2778 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2779 return (NULL); 2780 } 2781 2782 /* 2783 * Calculate next key, stop if we reached the end of the 2784 * iteration, otherwise go up one level and loop. 2785 */ 2786 key_beg = parent->bref.key + 2787 ((hammer2_key_t)1 << parent->bref.keybits); 2788 if (key_beg == 0 || key_beg > key_end) 2789 return (NULL); 2790 parent = hammer2_chain_repparent(parentp, how_maybe); 2791 goto again; 2792 } 2793 2794 /* 2795 * Selected from blockref or in-memory chain. 2796 */ 2797 bcopy = *bref; 2798 if (chain == NULL) { 2799 hammer2_spin_unex(&parent->core.spin); 2800 if (bcopy.type == HAMMER2_BREF_TYPE_INDIRECT || 2801 bcopy.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2802 chain = hammer2_chain_get(parent, generation, 2803 &bcopy, how_maybe); 2804 } else { 2805 chain = hammer2_chain_get(parent, generation, 2806 &bcopy, how); 2807 } 2808 if (chain == NULL) 2809 goto again; 2810 } else { 2811 hammer2_chain_ref(chain); 2812 hammer2_spin_unex(&parent->core.spin); 2813 2814 /* 2815 * chain is referenced but not locked. We must lock the 2816 * chain to obtain definitive state. 2817 */ 2818 if (bcopy.type == HAMMER2_BREF_TYPE_INDIRECT || 2819 bcopy.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2820 hammer2_chain_lock(chain, how_maybe); 2821 } else { 2822 hammer2_chain_lock(chain, how); 2823 } 2824 KKASSERT(chain->parent == parent); 2825 } 2826 if (bcmp(&bcopy, &chain->bref, sizeof(bcopy)) || 2827 chain->parent != parent) { 2828 hammer2_chain_unlock(chain); 2829 hammer2_chain_drop(chain); 2830 chain = NULL; /* SAFETY */ 2831 goto again; 2832 } 2833 2834 2835 /* 2836 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2837 * 2838 * NOTE: Chain's key range is not relevant as there might be 2839 * one-offs within the range that are not deleted. 2840 * 2841 * NOTE: Lookups can race delete-duplicate because 2842 * delete-duplicate does not lock the parent's core 2843 * (they just use the spinlock on the core). 2844 */ 2845 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2846 kprintf("skip deleted chain %016jx.%02x key=%016jx\n", 2847 chain->bref.data_off, chain->bref.type, 2848 chain->bref.key); 2849 hammer2_chain_unlock(chain); 2850 hammer2_chain_drop(chain); 2851 chain = NULL; /* SAFETY */ 2852 key_beg = *key_nextp; 2853 if (key_beg == 0 || key_beg > key_end) 2854 return(NULL); 2855 goto again; 2856 } 2857 2858 /* 2859 * If the chain element is an indirect block it becomes the new 2860 * parent and we loop on it. We must maintain our top-down locks 2861 * to prevent the flusher from interfering (i.e. doing a 2862 * delete-duplicate and leaving us recursing down a deleted chain). 2863 * 2864 * The parent always has to be locked with at least RESOLVE_MAYBE 2865 * so we can access its data. It might need a fixup if the caller 2866 * passed incompatible flags. Be careful not to cause a deadlock 2867 * as a data-load requires an exclusive lock. 2868 * 2869 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2870 * range is within the requested key range we return the indirect 2871 * block and do NOT loop. This is usually only used to acquire 2872 * freemap nodes. 2873 */ 2874 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2875 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2876 save_mtx = parent->lock; 2877 hammer2_chain_unlock(parent); 2878 hammer2_chain_drop(parent); 2879 *parentp = parent = chain; 2880 chain = NULL; /* SAFETY */ 2881 goto again; 2882 } 2883 done: 2884 /* 2885 * All done, return the locked chain. 2886 * 2887 * If the caller does not want a locked chain, replace the lock with 2888 * a ref. Perhaps this can eventually be optimized to not obtain the 2889 * lock in the first place for situations where the data does not 2890 * need to be resolved. 2891 * 2892 * NOTE! A chain->error must be tested by the caller upon return. 2893 * *errorp is only set based on issues which occur while 2894 * trying to reach the chain. 2895 */ 2896 return (chain); 2897 } 2898 2899 /* 2900 * After having issued a lookup we can iterate all matching keys. 2901 * 2902 * If chain is non-NULL we continue the iteration from just after it's index. 2903 * 2904 * If chain is NULL we assume the parent was exhausted and continue the 2905 * iteration at the next parent. 2906 * 2907 * If a fatal error occurs (typically an I/O error), a dummy chain is 2908 * returned with chain->error and error-identifying information set. This 2909 * chain will assert if you try to do anything fancy with it. 2910 * 2911 * XXX Depending on where the error occurs we should allow continued iteration. 2912 * 2913 * parent must be locked on entry and remains locked throughout. chain's 2914 * lock status must match flags. Chain is always at least referenced. 2915 * 2916 * WARNING! The MATCHIND flag does not apply to this function. 2917 */ 2918 hammer2_chain_t * 2919 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2920 hammer2_key_t *key_nextp, 2921 hammer2_key_t key_beg, hammer2_key_t key_end, 2922 int *errorp, int flags) 2923 { 2924 hammer2_chain_t *parent; 2925 int how_maybe; 2926 2927 /* 2928 * Calculate locking flags for upward recursion. 2929 */ 2930 how_maybe = HAMMER2_RESOLVE_MAYBE; 2931 if (flags & HAMMER2_LOOKUP_SHARED) 2932 how_maybe |= HAMMER2_RESOLVE_SHARED; 2933 2934 parent = *parentp; 2935 *errorp = 0; 2936 2937 /* 2938 * Calculate the next index and recalculate the parent if necessary. 2939 */ 2940 if (chain) { 2941 key_beg = chain->bref.key + 2942 ((hammer2_key_t)1 << chain->bref.keybits); 2943 hammer2_chain_unlock(chain); 2944 hammer2_chain_drop(chain); 2945 2946 /* 2947 * chain invalid past this point, but we can still do a 2948 * pointer comparison w/parent. 2949 * 2950 * Any scan where the lookup returned degenerate data embedded 2951 * in the inode has an invalid index and must terminate. 2952 */ 2953 if (chain == parent) 2954 return(NULL); 2955 if (key_beg == 0 || key_beg > key_end) 2956 return(NULL); 2957 chain = NULL; 2958 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2959 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2960 /* 2961 * We reached the end of the iteration. 2962 */ 2963 return (NULL); 2964 } else { 2965 /* 2966 * Continue iteration with next parent unless the current 2967 * parent covers the range. 2968 * 2969 * (This also handles the case of a deleted, empty indirect 2970 * node). 2971 */ 2972 key_beg = parent->bref.key + 2973 ((hammer2_key_t)1 << parent->bref.keybits); 2974 if (key_beg == 0 || key_beg > key_end) 2975 return (NULL); 2976 parent = hammer2_chain_repparent(parentp, how_maybe); 2977 } 2978 2979 /* 2980 * And execute 2981 */ 2982 return (hammer2_chain_lookup(parentp, key_nextp, 2983 key_beg, key_end, 2984 errorp, flags)); 2985 } 2986 2987 /* 2988 * Caller wishes to iterate chains under parent, loading new chains into 2989 * chainp. Caller must initialize *chainp to NULL and *firstp to 1, and 2990 * then call hammer2_chain_scan() repeatedly until a non-zero return. 2991 * During the scan, *firstp will be set to 0 and (*chainp) will be replaced 2992 * with the returned chain for the scan. The returned *chainp will be 2993 * locked and referenced. Any prior contents will be unlocked and dropped. 2994 * 2995 * Caller should check the return value. A normal scan EOF will return 2996 * exactly HAMMER2_ERROR_EOF. Any other non-zero value indicates an 2997 * error trying to access parent data. Any error in the returned chain 2998 * must be tested separately by the caller. 2999 * 3000 * (*chainp) is dropped on each scan, but will only be set if the returned 3001 * element itself can recurse. Leaf elements are NOT resolved, loaded, or 3002 * returned via *chainp. The caller will get their bref only. 3003 * 3004 * The raw scan function is similar to lookup/next but does not seek to a key. 3005 * Blockrefs are iterated via first_bref = (parent, NULL) and 3006 * next_chain = (parent, bref). 3007 * 3008 * The passed-in parent must be locked and its data resolved. The function 3009 * nominally returns a locked and referenced *chainp != NULL for chains 3010 * the caller might need to recurse on (and will dipose of any *chainp passed 3011 * in). The caller must check the chain->bref.type either way. 3012 */ 3013 int 3014 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 3015 hammer2_blockref_t *bref, int *firstp, 3016 int flags) 3017 { 3018 hammer2_dev_t *hmp; 3019 hammer2_blockref_t *base; 3020 hammer2_blockref_t *bref_ptr; 3021 hammer2_key_t key; 3022 hammer2_key_t next_key; 3023 hammer2_chain_t *chain = NULL; 3024 int count = 0; 3025 int how_always = HAMMER2_RESOLVE_ALWAYS; 3026 int how_maybe = HAMMER2_RESOLVE_MAYBE; 3027 int how; 3028 int generation; 3029 int maxloops = 300000; 3030 int error; 3031 3032 hmp = parent->hmp; 3033 error = 0; 3034 3035 /* 3036 * Scan flags borrowed from lookup. 3037 */ 3038 if (flags & HAMMER2_LOOKUP_ALWAYS) { 3039 how_maybe = how_always; 3040 how = HAMMER2_RESOLVE_ALWAYS; 3041 } else if (flags & HAMMER2_LOOKUP_NODATA) { 3042 how = HAMMER2_RESOLVE_NEVER; 3043 } else { 3044 how = HAMMER2_RESOLVE_MAYBE; 3045 } 3046 if (flags & HAMMER2_LOOKUP_SHARED) { 3047 how_maybe |= HAMMER2_RESOLVE_SHARED; 3048 how_always |= HAMMER2_RESOLVE_SHARED; 3049 how |= HAMMER2_RESOLVE_SHARED; 3050 } 3051 3052 /* 3053 * Calculate key to locate first/next element, unlocking the previous 3054 * element as we go. Be careful, the key calculation can overflow. 3055 * 3056 * (also reset bref to NULL) 3057 */ 3058 if (*firstp) { 3059 key = 0; 3060 *firstp = 0; 3061 } else { 3062 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 3063 if ((chain = *chainp) != NULL) { 3064 *chainp = NULL; 3065 hammer2_chain_unlock(chain); 3066 hammer2_chain_drop(chain); 3067 chain = NULL; 3068 } 3069 if (key == 0) { 3070 error |= HAMMER2_ERROR_EOF; 3071 goto done; 3072 } 3073 } 3074 3075 again: 3076 if (parent->error) { 3077 error = parent->error; 3078 goto done; 3079 } 3080 if (--maxloops == 0) 3081 panic("hammer2_chain_scan: maxloops"); 3082 3083 /* 3084 * Locate the blockref array. Currently we do a fully associative 3085 * search through the array. 3086 */ 3087 switch(parent->bref.type) { 3088 case HAMMER2_BREF_TYPE_INODE: 3089 /* 3090 * An inode with embedded data has no sub-chains. 3091 * 3092 * WARNING! Bulk scan code may pass a static chain marked 3093 * as BREF_TYPE_INODE with a copy of the volume 3094 * root blockset to snapshot the volume. 3095 */ 3096 if (parent->data->ipdata.meta.op_flags & 3097 HAMMER2_OPFLAG_DIRECTDATA) { 3098 error |= HAMMER2_ERROR_EOF; 3099 goto done; 3100 } 3101 base = &parent->data->ipdata.u.blockset.blockref[0]; 3102 count = HAMMER2_SET_COUNT; 3103 break; 3104 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3105 case HAMMER2_BREF_TYPE_INDIRECT: 3106 /* 3107 * Optimize indirect blocks in the INITIAL state to avoid 3108 * I/O. 3109 */ 3110 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 3111 base = NULL; 3112 } else { 3113 if (parent->data == NULL) 3114 panic("parent->data is NULL"); 3115 base = &parent->data->npdata[0]; 3116 } 3117 count = parent->bytes / sizeof(hammer2_blockref_t); 3118 break; 3119 case HAMMER2_BREF_TYPE_VOLUME: 3120 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3121 count = HAMMER2_SET_COUNT; 3122 break; 3123 case HAMMER2_BREF_TYPE_FREEMAP: 3124 base = &parent->data->blkset.blockref[0]; 3125 count = HAMMER2_SET_COUNT; 3126 break; 3127 default: 3128 panic("hammer2_chain_scan: unrecognized blockref type: %d", 3129 parent->bref.type); 3130 base = NULL; /* safety */ 3131 count = 0; /* safety */ 3132 } 3133 3134 /* 3135 * Merged scan to find next candidate. 3136 * 3137 * hammer2_base_*() functions require the parent->core.live_* fields 3138 * to be synchronized. 3139 * 3140 * We need to hold the spinlock to access the block array and RB tree 3141 * and to interlock chain creation. 3142 */ 3143 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3144 hammer2_chain_countbrefs(parent, base, count); 3145 3146 next_key = 0; 3147 bref_ptr = NULL; 3148 hammer2_spin_ex(&parent->core.spin); 3149 chain = hammer2_combined_find(parent, base, count, 3150 &next_key, 3151 key, HAMMER2_KEY_MAX, 3152 &bref_ptr); 3153 generation = parent->core.generation; 3154 3155 /* 3156 * Exhausted parent chain, we're done. 3157 */ 3158 if (bref_ptr == NULL) { 3159 hammer2_spin_unex(&parent->core.spin); 3160 KKASSERT(chain == NULL); 3161 error |= HAMMER2_ERROR_EOF; 3162 goto done; 3163 } 3164 3165 /* 3166 * Copy into the supplied stack-based blockref. 3167 */ 3168 *bref = *bref_ptr; 3169 3170 /* 3171 * Selected from blockref or in-memory chain. 3172 */ 3173 if (chain == NULL) { 3174 switch(bref->type) { 3175 case HAMMER2_BREF_TYPE_INODE: 3176 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3177 case HAMMER2_BREF_TYPE_INDIRECT: 3178 case HAMMER2_BREF_TYPE_VOLUME: 3179 case HAMMER2_BREF_TYPE_FREEMAP: 3180 /* 3181 * Recursion, always get the chain 3182 */ 3183 hammer2_spin_unex(&parent->core.spin); 3184 chain = hammer2_chain_get(parent, generation, 3185 bref, how); 3186 if (chain == NULL) 3187 goto again; 3188 break; 3189 default: 3190 /* 3191 * No recursion, do not waste time instantiating 3192 * a chain, just iterate using the bref. 3193 */ 3194 hammer2_spin_unex(&parent->core.spin); 3195 break; 3196 } 3197 } else { 3198 /* 3199 * Recursion or not we need the chain in order to supply 3200 * the bref. 3201 */ 3202 hammer2_chain_ref(chain); 3203 hammer2_spin_unex(&parent->core.spin); 3204 hammer2_chain_lock(chain, how); 3205 } 3206 if (chain && 3207 (bcmp(bref, &chain->bref, sizeof(*bref)) || 3208 chain->parent != parent)) { 3209 hammer2_chain_unlock(chain); 3210 hammer2_chain_drop(chain); 3211 chain = NULL; 3212 goto again; 3213 } 3214 3215 /* 3216 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 3217 * 3218 * NOTE: chain's key range is not relevant as there might be 3219 * one-offs within the range that are not deleted. 3220 * 3221 * NOTE: XXX this could create problems with scans used in 3222 * situations other than mount-time recovery. 3223 * 3224 * NOTE: Lookups can race delete-duplicate because 3225 * delete-duplicate does not lock the parent's core 3226 * (they just use the spinlock on the core). 3227 */ 3228 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3229 hammer2_chain_unlock(chain); 3230 hammer2_chain_drop(chain); 3231 chain = NULL; 3232 3233 key = next_key; 3234 if (key == 0) { 3235 error |= HAMMER2_ERROR_EOF; 3236 goto done; 3237 } 3238 goto again; 3239 } 3240 3241 done: 3242 /* 3243 * All done, return the bref or NULL, supply chain if necessary. 3244 */ 3245 if (chain) 3246 *chainp = chain; 3247 return (error); 3248 } 3249 3250 /* 3251 * Create and return a new hammer2 system memory structure of the specified 3252 * key, type and size and insert it under (*parentp). This is a full 3253 * insertion, based on the supplied key/keybits, and may involve creating 3254 * indirect blocks and moving other chains around via delete/duplicate. 3255 * 3256 * This call can be made with parent == NULL as long as a non -1 methods 3257 * is supplied. hmp must also be supplied in this situation (otherwise 3258 * hmp is extracted from the supplied parent). The chain will be detached 3259 * from the topology. A later call with both parent and chain can be made 3260 * to attach it. 3261 * 3262 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 3263 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3264 * FULL. This typically means that the caller is creating the chain after 3265 * doing a hammer2_chain_lookup(). 3266 * 3267 * (*parentp) must be exclusive locked and may be replaced on return 3268 * depending on how much work the function had to do. 3269 * 3270 * (*parentp) must not be errored or this function will assert. 3271 * 3272 * (*chainp) usually starts out NULL and returns the newly created chain, 3273 * but if the caller desires the caller may allocate a disconnected chain 3274 * and pass it in instead. 3275 * 3276 * This function should NOT be used to insert INDIRECT blocks. It is 3277 * typically used to create/insert inodes and data blocks. 3278 * 3279 * Caller must pass-in an exclusively locked parent the new chain is to 3280 * be inserted under, and optionally pass-in a disconnected, exclusively 3281 * locked chain to insert (else we create a new chain). The function will 3282 * adjust (*parentp) as necessary, create or connect the chain, and 3283 * return an exclusively locked chain in *chainp. 3284 * 3285 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 3286 * and will be reassigned. 3287 * 3288 * NOTE: returns HAMMER_ERROR_* flags 3289 */ 3290 int 3291 hammer2_chain_create(hammer2_chain_t **parentp, hammer2_chain_t **chainp, 3292 hammer2_dev_t *hmp, hammer2_pfs_t *pmp, int methods, 3293 hammer2_key_t key, int keybits, int type, size_t bytes, 3294 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 3295 { 3296 hammer2_chain_t *chain; 3297 hammer2_chain_t *parent; 3298 hammer2_blockref_t *base; 3299 hammer2_blockref_t dummy; 3300 int allocated = 0; 3301 int error = 0; 3302 int count; 3303 int maxloops = 300000; 3304 3305 /* 3306 * Topology may be crossing a PFS boundary. 3307 */ 3308 parent = *parentp; 3309 if (parent) { 3310 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3311 KKASSERT(parent->error == 0); 3312 hmp = parent->hmp; 3313 } 3314 chain = *chainp; 3315 3316 if (chain == NULL) { 3317 /* 3318 * First allocate media space and construct the dummy bref, 3319 * then allocate the in-memory chain structure. Set the 3320 * INITIAL flag for fresh chains which do not have embedded 3321 * data. 3322 * 3323 * XXX for now set the check mode of the child based on 3324 * the parent or, if the parent is an inode, the 3325 * specification in the inode. 3326 */ 3327 bzero(&dummy, sizeof(dummy)); 3328 dummy.type = type; 3329 dummy.key = key; 3330 dummy.keybits = keybits; 3331 dummy.data_off = hammer2_getradix(bytes); 3332 3333 /* 3334 * Inherit methods from parent by default. Primarily used 3335 * for BREF_TYPE_DATA. Non-data types *must* be set to 3336 * a non-NONE check algorithm. 3337 */ 3338 if (methods == -1) 3339 dummy.methods = parent->bref.methods; 3340 else 3341 dummy.methods = (uint8_t)methods; 3342 3343 if (type != HAMMER2_BREF_TYPE_DATA && 3344 HAMMER2_DEC_CHECK(dummy.methods) == HAMMER2_CHECK_NONE) { 3345 dummy.methods |= 3346 HAMMER2_ENC_CHECK(HAMMER2_CHECK_DEFAULT); 3347 } 3348 3349 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 3350 3351 /* 3352 * Lock the chain manually, chain_lock will load the chain 3353 * which we do NOT want to do. (note: chain->refs is set 3354 * to 1 by chain_alloc() for us, but lockcnt is not). 3355 */ 3356 chain->lockcnt = 1; 3357 hammer2_mtx_ex(&chain->lock); 3358 allocated = 1; 3359 ++curthread->td_tracker; 3360 3361 /* 3362 * Set INITIAL to optimize I/O. The flag will generally be 3363 * processed when we call hammer2_chain_modify(). 3364 * 3365 * Recalculate bytes to reflect the actual media block 3366 * allocation. Handle special case radix 0 == 0 bytes. 3367 */ 3368 bytes = (size_t)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX); 3369 if (bytes) 3370 bytes = (hammer2_off_t)1 << bytes; 3371 chain->bytes = bytes; 3372 3373 switch(type) { 3374 case HAMMER2_BREF_TYPE_VOLUME: 3375 case HAMMER2_BREF_TYPE_FREEMAP: 3376 panic("hammer2_chain_create: called with volume type"); 3377 break; 3378 case HAMMER2_BREF_TYPE_INDIRECT: 3379 panic("hammer2_chain_create: cannot be used to" 3380 "create indirect block"); 3381 break; 3382 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3383 panic("hammer2_chain_create: cannot be used to" 3384 "create freemap root or node"); 3385 break; 3386 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3387 KKASSERT(bytes == sizeof(chain->data->bmdata)); 3388 /* fall through */ 3389 case HAMMER2_BREF_TYPE_DIRENT: 3390 case HAMMER2_BREF_TYPE_INODE: 3391 case HAMMER2_BREF_TYPE_DATA: 3392 default: 3393 /* 3394 * leave chain->data NULL, set INITIAL 3395 */ 3396 KKASSERT(chain->data == NULL); 3397 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 3398 break; 3399 } 3400 } else { 3401 /* 3402 * We are reattaching a previously deleted chain, possibly 3403 * under a new parent and possibly with a new key/keybits. 3404 * The chain does not have to be in a modified state. The 3405 * UPDATE flag will be set later on in this routine. 3406 * 3407 * Do NOT mess with the current state of the INITIAL flag. 3408 */ 3409 chain->bref.key = key; 3410 chain->bref.keybits = keybits; 3411 if (chain->flags & HAMMER2_CHAIN_DELETED) 3412 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3413 KKASSERT(chain->parent == NULL); 3414 } 3415 3416 /* 3417 * Set the appropriate bref flag if requested. 3418 * 3419 * NOTE! Callers can call this function to move chains without 3420 * knowing about special flags, so don't clear bref flags 3421 * here! 3422 */ 3423 if (flags & HAMMER2_INSERT_PFSROOT) 3424 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 3425 3426 if (parent == NULL) 3427 goto skip; 3428 3429 /* 3430 * Calculate how many entries we have in the blockref array and 3431 * determine if an indirect block is required when inserting into 3432 * the parent. 3433 */ 3434 again: 3435 if (--maxloops == 0) 3436 panic("hammer2_chain_create: maxloops"); 3437 3438 switch(parent->bref.type) { 3439 case HAMMER2_BREF_TYPE_INODE: 3440 if ((parent->data->ipdata.meta.op_flags & 3441 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 3442 kprintf("hammer2: parent set for direct-data! " 3443 "pkey=%016jx ckey=%016jx\n", 3444 parent->bref.key, 3445 chain->bref.key); 3446 } 3447 KKASSERT((parent->data->ipdata.meta.op_flags & 3448 HAMMER2_OPFLAG_DIRECTDATA) == 0); 3449 KKASSERT(parent->data != NULL); 3450 base = &parent->data->ipdata.u.blockset.blockref[0]; 3451 count = HAMMER2_SET_COUNT; 3452 break; 3453 case HAMMER2_BREF_TYPE_INDIRECT: 3454 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3455 if (parent->flags & HAMMER2_CHAIN_INITIAL) 3456 base = NULL; 3457 else 3458 base = &parent->data->npdata[0]; 3459 count = parent->bytes / sizeof(hammer2_blockref_t); 3460 break; 3461 case HAMMER2_BREF_TYPE_VOLUME: 3462 KKASSERT(parent->data != NULL); 3463 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3464 count = HAMMER2_SET_COUNT; 3465 break; 3466 case HAMMER2_BREF_TYPE_FREEMAP: 3467 KKASSERT(parent->data != NULL); 3468 base = &parent->data->blkset.blockref[0]; 3469 count = HAMMER2_SET_COUNT; 3470 break; 3471 default: 3472 panic("hammer2_chain_create: unrecognized blockref type: %d", 3473 parent->bref.type); 3474 base = NULL; 3475 count = 0; 3476 break; 3477 } 3478 3479 /* 3480 * Make sure we've counted the brefs 3481 */ 3482 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3483 hammer2_chain_countbrefs(parent, base, count); 3484 3485 KASSERT(parent->core.live_count >= 0 && 3486 parent->core.live_count <= count, 3487 ("bad live_count %d/%d (%02x, %d)", 3488 parent->core.live_count, count, 3489 parent->bref.type, parent->bytes)); 3490 3491 /* 3492 * If no free blockref could be found we must create an indirect 3493 * block and move a number of blockrefs into it. With the parent 3494 * locked we can safely lock each child in order to delete+duplicate 3495 * it without causing a deadlock. 3496 * 3497 * This may return the new indirect block or the old parent depending 3498 * on where the key falls. NULL is returned on error. 3499 */ 3500 if (parent->core.live_count == count) { 3501 hammer2_chain_t *nparent; 3502 3503 KKASSERT((flags & HAMMER2_INSERT_SAMEPARENT) == 0); 3504 3505 nparent = hammer2_chain_create_indirect(parent, key, keybits, 3506 mtid, type, &error); 3507 if (nparent == NULL) { 3508 if (allocated) 3509 hammer2_chain_drop(chain); 3510 chain = NULL; 3511 goto done; 3512 } 3513 if (parent != nparent) { 3514 hammer2_chain_unlock(parent); 3515 hammer2_chain_drop(parent); 3516 parent = *parentp = nparent; 3517 } 3518 goto again; 3519 } 3520 3521 /* 3522 * fall through if parent, or skip to here if no parent. 3523 */ 3524 skip: 3525 if (chain->flags & HAMMER2_CHAIN_DELETED) 3526 kprintf("Inserting deleted chain @%016jx\n", 3527 chain->bref.key); 3528 3529 /* 3530 * Link the chain into its parent. 3531 */ 3532 if (chain->parent != NULL) 3533 panic("hammer2: hammer2_chain_create: chain already connected"); 3534 KKASSERT(chain->parent == NULL); 3535 if (parent) { 3536 KKASSERT(parent->core.live_count < count); 3537 hammer2_chain_insert(parent, chain, 3538 HAMMER2_CHAIN_INSERT_SPIN | 3539 HAMMER2_CHAIN_INSERT_LIVE, 3540 0); 3541 } 3542 3543 if (allocated) { 3544 /* 3545 * Mark the newly created chain modified. This will cause 3546 * UPDATE to be set and process the INITIAL flag. 3547 * 3548 * Device buffers are not instantiated for DATA elements 3549 * as these are handled by logical buffers. 3550 * 3551 * Indirect and freemap node indirect blocks are handled 3552 * by hammer2_chain_create_indirect() and not by this 3553 * function. 3554 * 3555 * Data for all other bref types is expected to be 3556 * instantiated (INODE, LEAF). 3557 */ 3558 switch(chain->bref.type) { 3559 case HAMMER2_BREF_TYPE_DATA: 3560 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3561 case HAMMER2_BREF_TYPE_DIRENT: 3562 case HAMMER2_BREF_TYPE_INODE: 3563 error = hammer2_chain_modify(chain, mtid, dedup_off, 3564 HAMMER2_MODIFY_OPTDATA); 3565 break; 3566 default: 3567 /* 3568 * Remaining types are not supported by this function. 3569 * In particular, INDIRECT and LEAF_NODE types are 3570 * handled by create_indirect(). 3571 */ 3572 panic("hammer2_chain_create: bad type: %d", 3573 chain->bref.type); 3574 /* NOT REACHED */ 3575 break; 3576 } 3577 } else { 3578 /* 3579 * When reconnecting a chain we must set UPDATE and 3580 * setflush so the flush recognizes that it must update 3581 * the bref in the parent. 3582 */ 3583 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 3584 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3585 } 3586 3587 /* 3588 * We must setflush(parent) to ensure that it recurses through to 3589 * chain. setflush(chain) might not work because ONFLUSH is possibly 3590 * already set in the chain (so it won't recurse up to set it in the 3591 * parent). 3592 */ 3593 if (parent) 3594 hammer2_chain_setflush(parent); 3595 3596 done: 3597 *chainp = chain; 3598 3599 return (error); 3600 } 3601 3602 /* 3603 * Move the chain from its old parent to a new parent. The chain must have 3604 * already been deleted or already disconnected (or never associated) with 3605 * a parent. The chain is reassociated with the new parent and the deleted 3606 * flag will be cleared (no longer deleted). The chain's modification state 3607 * is not altered. 3608 * 3609 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 3610 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3611 * FULL. This typically means that the caller is creating the chain after 3612 * doing a hammer2_chain_lookup(). 3613 * 3614 * Neither (parent) or (chain) can be errored. 3615 * 3616 * If (parent) is non-NULL then the chain is inserted under the parent. 3617 * 3618 * If (parent) is NULL then the newly duplicated chain is not inserted 3619 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 3620 * passing into hammer2_chain_create() after this function returns). 3621 * 3622 * WARNING! This function calls create which means it can insert indirect 3623 * blocks. This can cause other unrelated chains in the parent to 3624 * be moved to a newly inserted indirect block in addition to the 3625 * specific chain. 3626 */ 3627 void 3628 hammer2_chain_rename(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3629 hammer2_tid_t mtid, int flags) 3630 { 3631 hammer2_blockref_t *bref; 3632 hammer2_dev_t *hmp; 3633 hammer2_chain_t *parent; 3634 size_t bytes; 3635 3636 /* 3637 * WARNING! We should never resolve DATA to device buffers 3638 * (XXX allow it if the caller did?), and since 3639 * we currently do not have the logical buffer cache 3640 * buffer in-hand to fix its cached physical offset 3641 * we also force the modify code to not COW it. XXX 3642 * 3643 * NOTE! We allow error'd chains to be renamed. The bref itself 3644 * is good and can be renamed. The content, however, may 3645 * be inaccessible. 3646 */ 3647 hmp = chain->hmp; 3648 KKASSERT(chain->parent == NULL); 3649 /*KKASSERT(chain->error == 0); allow */ 3650 3651 /* 3652 * Now create a duplicate of the chain structure, associating 3653 * it with the same core, making it the same size, pointing it 3654 * to the same bref (the same media block). 3655 * 3656 * NOTE: Handle special radix == 0 case (means 0 bytes). 3657 */ 3658 bref = &chain->bref; 3659 bytes = (size_t)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 3660 if (bytes) 3661 bytes = (hammer2_off_t)1 << bytes; 3662 3663 /* 3664 * If parent is not NULL the duplicated chain will be entered under 3665 * the parent and the UPDATE bit set to tell flush to update 3666 * the blockref. 3667 * 3668 * We must setflush(parent) to ensure that it recurses through to 3669 * chain. setflush(chain) might not work because ONFLUSH is possibly 3670 * already set in the chain (so it won't recurse up to set it in the 3671 * parent). 3672 * 3673 * Having both chains locked is extremely important for atomicy. 3674 */ 3675 if (parentp && (parent = *parentp) != NULL) { 3676 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3677 KKASSERT(parent->refs > 0); 3678 KKASSERT(parent->error == 0); 3679 3680 hammer2_chain_create(parentp, &chain, NULL, chain->pmp, 3681 HAMMER2_METH_DEFAULT, 3682 bref->key, bref->keybits, bref->type, 3683 chain->bytes, mtid, 0, flags); 3684 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 3685 hammer2_chain_setflush(*parentp); 3686 } 3687 } 3688 3689 /* 3690 * This works in tandem with delete_obref() to install a blockref in 3691 * (typically) an indirect block that is associated with the chain being 3692 * moved to *parentp. 3693 * 3694 * The reason we need this function is that the caller needs to maintain 3695 * the blockref as it was, and not generate a new blockref for what might 3696 * be a modified chain. Otherwise stuff will leak into the flush that 3697 * the flush code's FLUSH_INODE_STOP flag is unable to catch. 3698 * 3699 * It is EXTREMELY important that we properly set CHAIN_BMAPUPD and 3700 * CHAIN_UPDATE. We must set BMAPUPD if the bref does not match, and 3701 * we must clear CHAIN_UPDATE (that was likely set by the chain_rename) if 3702 * it does. Otherwise we can end up in a situation where H2 is unable to 3703 * clean up the in-memory chain topology. 3704 * 3705 * The reason for this is that flushes do not generally flush through 3706 * BREF_TYPE_INODE chains and depend on a hammer2_inode_t queued to syncq 3707 * or sideq to properly flush and dispose of the related inode chain's flags. 3708 * Situations where the inode is not actually modified by the frontend, 3709 * but where we have to move the related chains around as we insert or cleanup 3710 * indirect blocks, can leave us with a 'dirty' (non-disposable) in-memory 3711 * inode chain that does not have a hammer2_inode_t associated with it. 3712 */ 3713 void 3714 hammer2_chain_rename_obref(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3715 hammer2_tid_t mtid, int flags, 3716 hammer2_blockref_t *obref) 3717 { 3718 hammer2_chain_rename(parentp, chain, mtid, flags); 3719 3720 if (obref->type != HAMMER2_BREF_TYPE_EMPTY) { 3721 hammer2_blockref_t *tbase; 3722 int tcount; 3723 3724 KKASSERT((chain->flags & HAMMER2_CHAIN_BMAPPED) == 0); 3725 hammer2_chain_modify(*parentp, mtid, 0, 0); 3726 tbase = hammer2_chain_base_and_count(*parentp, &tcount); 3727 hammer2_base_insert(*parentp, tbase, tcount, chain, obref); 3728 if (bcmp(obref, &chain->bref, sizeof(chain->bref))) { 3729 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD | 3730 HAMMER2_CHAIN_UPDATE); 3731 } else { 3732 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3733 } 3734 } 3735 } 3736 3737 /* 3738 * Helper function for deleting chains. 3739 * 3740 * The chain is removed from the live view (the RBTREE) as well as the parent's 3741 * blockmap. Both chain and its parent must be locked. 3742 * 3743 * parent may not be errored. chain can be errored. 3744 */ 3745 static int 3746 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 3747 hammer2_tid_t mtid, int flags, 3748 hammer2_blockref_t *obref) 3749 { 3750 hammer2_dev_t *hmp; 3751 int error = 0; 3752 3753 KKASSERT((chain->flags & (HAMMER2_CHAIN_DELETED | 3754 HAMMER2_CHAIN_FICTITIOUS)) == 0); 3755 KKASSERT(chain->parent == parent); 3756 hmp = chain->hmp; 3757 3758 if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 3759 /* 3760 * Chain is blockmapped, so there must be a parent. 3761 * Atomically remove the chain from the parent and remove 3762 * the blockmap entry. The parent must be set modified 3763 * to remove the blockmap entry. 3764 */ 3765 hammer2_blockref_t *base; 3766 int count; 3767 3768 KKASSERT(parent != NULL); 3769 KKASSERT(parent->error == 0); 3770 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3771 error = hammer2_chain_modify(parent, mtid, 0, 0); 3772 if (error) 3773 goto done; 3774 3775 /* 3776 * Calculate blockmap pointer 3777 */ 3778 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 3779 hammer2_spin_ex(&chain->core.spin); 3780 hammer2_spin_ex(&parent->core.spin); 3781 3782 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3783 atomic_add_int(&parent->core.live_count, -1); 3784 ++parent->core.generation; 3785 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3786 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3787 --parent->core.chain_count; 3788 chain->parent = NULL; 3789 3790 switch(parent->bref.type) { 3791 case HAMMER2_BREF_TYPE_INODE: 3792 /* 3793 * Access the inode's block array. However, there 3794 * is no block array if the inode is flagged 3795 * DIRECTDATA. 3796 */ 3797 if (parent->data && 3798 (parent->data->ipdata.meta.op_flags & 3799 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 3800 base = 3801 &parent->data->ipdata.u.blockset.blockref[0]; 3802 } else { 3803 base = NULL; 3804 } 3805 count = HAMMER2_SET_COUNT; 3806 break; 3807 case HAMMER2_BREF_TYPE_INDIRECT: 3808 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3809 if (parent->data) 3810 base = &parent->data->npdata[0]; 3811 else 3812 base = NULL; 3813 count = parent->bytes / sizeof(hammer2_blockref_t); 3814 break; 3815 case HAMMER2_BREF_TYPE_VOLUME: 3816 base = &parent->data->voldata. 3817 sroot_blockset.blockref[0]; 3818 count = HAMMER2_SET_COUNT; 3819 break; 3820 case HAMMER2_BREF_TYPE_FREEMAP: 3821 base = &parent->data->blkset.blockref[0]; 3822 count = HAMMER2_SET_COUNT; 3823 break; 3824 default: 3825 base = NULL; 3826 count = 0; 3827 panic("_hammer2_chain_delete_helper: " 3828 "unrecognized blockref type: %d", 3829 parent->bref.type); 3830 } 3831 3832 /* 3833 * delete blockmapped chain from its parent. 3834 * 3835 * The parent is not affected by any statistics in chain 3836 * which are pending synchronization. That is, there is 3837 * nothing to undo in the parent since they have not yet 3838 * been incorporated into the parent. 3839 * 3840 * The parent is affected by statistics stored in inodes. 3841 * Those have already been synchronized, so they must be 3842 * undone. XXX split update possible w/delete in middle? 3843 */ 3844 if (base) { 3845 hammer2_base_delete(parent, base, count, chain, obref); 3846 } 3847 hammer2_spin_unex(&parent->core.spin); 3848 hammer2_spin_unex(&chain->core.spin); 3849 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 3850 /* 3851 * Chain is not blockmapped but a parent is present. 3852 * Atomically remove the chain from the parent. There is 3853 * no blockmap entry to remove. 3854 * 3855 * Because chain was associated with a parent but not 3856 * synchronized, the chain's *_count_up fields contain 3857 * inode adjustment statistics which must be undone. 3858 */ 3859 hammer2_spin_ex(&chain->core.spin); 3860 hammer2_spin_ex(&parent->core.spin); 3861 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3862 atomic_add_int(&parent->core.live_count, -1); 3863 ++parent->core.generation; 3864 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3865 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3866 --parent->core.chain_count; 3867 chain->parent = NULL; 3868 hammer2_spin_unex(&parent->core.spin); 3869 hammer2_spin_unex(&chain->core.spin); 3870 } else { 3871 /* 3872 * Chain is not blockmapped and has no parent. This 3873 * is a degenerate case. 3874 */ 3875 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3876 } 3877 done: 3878 return error; 3879 } 3880 3881 /* 3882 * Create an indirect block that covers one or more of the elements in the 3883 * current parent. Either returns the existing parent with no locking or 3884 * ref changes or returns the new indirect block locked and referenced 3885 * and leaving the original parent lock/ref intact as well. 3886 * 3887 * If an error occurs, NULL is returned and *errorp is set to the H2 error. 3888 * 3889 * The returned chain depends on where the specified key falls. 3890 * 3891 * The key/keybits for the indirect mode only needs to follow three rules: 3892 * 3893 * (1) That all elements underneath it fit within its key space and 3894 * 3895 * (2) That all elements outside it are outside its key space. 3896 * 3897 * (3) When creating the new indirect block any elements in the current 3898 * parent that fit within the new indirect block's keyspace must be 3899 * moved into the new indirect block. 3900 * 3901 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3902 * keyspace the the current parent, but lookup/iteration rules will 3903 * ensure (and must ensure) that rule (2) for all parents leading up 3904 * to the nearest inode or the root volume header is adhered to. This 3905 * is accomplished by always recursing through matching keyspaces in 3906 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3907 * 3908 * The current implementation calculates the current worst-case keyspace by 3909 * iterating the current parent and then divides it into two halves, choosing 3910 * whichever half has the most elements (not necessarily the half containing 3911 * the requested key). 3912 * 3913 * We can also opt to use the half with the least number of elements. This 3914 * causes lower-numbered keys (aka logical file offsets) to recurse through 3915 * fewer indirect blocks and higher-numbered keys to recurse through more. 3916 * This also has the risk of not moving enough elements to the new indirect 3917 * block and being forced to create several indirect blocks before the element 3918 * can be inserted. 3919 * 3920 * Must be called with an exclusively locked parent. 3921 * 3922 * NOTE: *errorp set to HAMMER_ERROR_* flags 3923 */ 3924 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3925 hammer2_key_t *keyp, int keybits, 3926 hammer2_blockref_t *base, int count); 3927 static int hammer2_chain_indkey_file(hammer2_chain_t *parent, 3928 hammer2_key_t *keyp, int keybits, 3929 hammer2_blockref_t *base, int count, 3930 int ncount); 3931 static int hammer2_chain_indkey_dir(hammer2_chain_t *parent, 3932 hammer2_key_t *keyp, int keybits, 3933 hammer2_blockref_t *base, int count, 3934 int ncount); 3935 static 3936 hammer2_chain_t * 3937 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3938 hammer2_key_t create_key, int create_bits, 3939 hammer2_tid_t mtid, int for_type, int *errorp) 3940 { 3941 hammer2_dev_t *hmp; 3942 hammer2_blockref_t *base; 3943 hammer2_blockref_t *bref; 3944 hammer2_blockref_t bcopy; 3945 hammer2_chain_t *chain; 3946 hammer2_chain_t *ichain; 3947 hammer2_chain_t dummy; 3948 hammer2_key_t key = create_key; 3949 hammer2_key_t key_beg; 3950 hammer2_key_t key_end; 3951 hammer2_key_t key_next; 3952 int keybits = create_bits; 3953 int count; 3954 int ncount; 3955 int nbytes; 3956 int loops; 3957 int error; 3958 int reason; 3959 int generation; 3960 int maxloops = 300000; 3961 3962 /* 3963 * Calculate the base blockref pointer or NULL if the chain 3964 * is known to be empty. We need to calculate the array count 3965 * for RB lookups either way. 3966 */ 3967 hmp = parent->hmp; 3968 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3969 3970 /* 3971 * Pre-modify the parent now to avoid having to deal with error 3972 * processing if we tried to later (in the middle of our loop). 3973 * 3974 * We are going to be moving bref's around, the indirect blocks 3975 * cannot be in an initial state. Do not pass MODIFY_OPTDATA. 3976 */ 3977 *errorp = hammer2_chain_modify(parent, mtid, 0, 0); 3978 if (*errorp) { 3979 kprintf("hammer2_create_indirect: error %08x %s\n", 3980 *errorp, hammer2_error_str(*errorp)); 3981 return NULL; 3982 } 3983 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3984 3985 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 3986 base = hammer2_chain_base_and_count(parent, &count); 3987 3988 /* 3989 * dummy used in later chain allocation (no longer used for lookups). 3990 */ 3991 bzero(&dummy, sizeof(dummy)); 3992 3993 /* 3994 * How big should our new indirect block be? It has to be at least 3995 * as large as its parent for splits to work properly. 3996 * 3997 * The freemap uses a specific indirect block size. The number of 3998 * levels are built dynamically and ultimately depend on the size 3999 * volume. Because freemap blocks are taken from the reserved areas 4000 * of the volume our goal is efficiency (fewer levels) and not so 4001 * much to save disk space. 4002 * 4003 * The first indirect block level for a directory usually uses 4004 * HAMMER2_IND_BYTES_MIN (4KB = 32 directory entries). Due to 4005 * the hash mechanism, this typically gives us a nominal 4006 * 32 * 4 entries with one level of indirection. 4007 * 4008 * We use HAMMER2_IND_BYTES_NOM (16KB = 128 blockrefs) for FILE 4009 * indirect blocks. The initial 4 entries in the inode gives us 4010 * 256KB. Up to 4 indirect blocks gives us 32MB. Three levels 4011 * of indirection gives us 137GB, and so forth. H2 can support 4012 * huge file sizes but they are not typical, so we try to stick 4013 * with compactness and do not use a larger indirect block size. 4014 * 4015 * We could use 64KB (PBUFSIZE), giving us 512 blockrefs, but 4016 * due to the way indirect blocks are created this usually winds 4017 * up being extremely inefficient for small files. Even though 4018 * 16KB requires more levels of indirection for very large files, 4019 * the 16KB records can be ganged together into 64KB DIOs. 4020 */ 4021 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 4022 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 4023 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 4024 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4025 if (parent->data->ipdata.meta.type == 4026 HAMMER2_OBJTYPE_DIRECTORY) 4027 nbytes = HAMMER2_IND_BYTES_MIN; /* 4KB = 32 entries */ 4028 else 4029 nbytes = HAMMER2_IND_BYTES_NOM; /* 16KB = ~8MB file */ 4030 4031 } else { 4032 nbytes = HAMMER2_IND_BYTES_NOM; 4033 } 4034 if (nbytes < count * sizeof(hammer2_blockref_t)) { 4035 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 4036 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 4037 nbytes = count * sizeof(hammer2_blockref_t); 4038 } 4039 ncount = nbytes / sizeof(hammer2_blockref_t); 4040 4041 /* 4042 * When creating an indirect block for a freemap node or leaf 4043 * the key/keybits must be fitted to static radix levels because 4044 * particular radix levels use particular reserved blocks in the 4045 * related zone. 4046 * 4047 * This routine calculates the key/radix of the indirect block 4048 * we need to create, and whether it is on the high-side or the 4049 * low-side. 4050 */ 4051 switch(for_type) { 4052 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 4053 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 4054 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 4055 base, count); 4056 break; 4057 case HAMMER2_BREF_TYPE_DATA: 4058 keybits = hammer2_chain_indkey_file(parent, &key, keybits, 4059 base, count, ncount); 4060 break; 4061 case HAMMER2_BREF_TYPE_DIRENT: 4062 case HAMMER2_BREF_TYPE_INODE: 4063 keybits = hammer2_chain_indkey_dir(parent, &key, keybits, 4064 base, count, ncount); 4065 break; 4066 default: 4067 panic("illegal indirect block for bref type %d", for_type); 4068 break; 4069 } 4070 4071 /* 4072 * Normalize the key for the radix being represented, keeping the 4073 * high bits and throwing away the low bits. 4074 */ 4075 key &= ~(((hammer2_key_t)1 << keybits) - 1); 4076 4077 /* 4078 * Ok, create our new indirect block 4079 */ 4080 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 4081 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 4082 dummy.bref.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 4083 } else { 4084 dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT; 4085 } 4086 dummy.bref.key = key; 4087 dummy.bref.keybits = keybits; 4088 dummy.bref.data_off = hammer2_getradix(nbytes); 4089 dummy.bref.methods = 4090 HAMMER2_ENC_CHECK(HAMMER2_DEC_CHECK(parent->bref.methods)) | 4091 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 4092 4093 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy.bref); 4094 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 4095 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 4096 /* ichain has one ref at this point */ 4097 4098 /* 4099 * We have to mark it modified to allocate its block, but use 4100 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 4101 * it won't be acted upon by the flush code. 4102 * 4103 * XXX remove OPTDATA, we need a fully initialized indirect block to 4104 * be able to move the original blockref. 4105 */ 4106 *errorp = hammer2_chain_modify(ichain, mtid, 0, 0); 4107 if (*errorp) { 4108 kprintf("hammer2_alloc_indirect: error %08x %s\n", 4109 *errorp, hammer2_error_str(*errorp)); 4110 hammer2_chain_unlock(ichain); 4111 hammer2_chain_drop(ichain); 4112 return NULL; 4113 } 4114 KKASSERT((ichain->flags & HAMMER2_CHAIN_INITIAL) == 0); 4115 4116 /* 4117 * Iterate the original parent and move the matching brefs into 4118 * the new indirect block. 4119 * 4120 * XXX handle flushes. 4121 */ 4122 key_beg = 0; 4123 key_end = HAMMER2_KEY_MAX; 4124 key_next = 0; /* avoid gcc warnings */ 4125 hammer2_spin_ex(&parent->core.spin); 4126 loops = 0; 4127 reason = 0; 4128 4129 for (;;) { 4130 /* 4131 * Parent may have been modified, relocating its block array. 4132 * Reload the base pointer. 4133 */ 4134 base = hammer2_chain_base_and_count(parent, &count); 4135 4136 if (++loops > 100000) { 4137 hammer2_spin_unex(&parent->core.spin); 4138 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 4139 reason, parent, base, count, key_next); 4140 } 4141 4142 /* 4143 * NOTE: spinlock stays intact, returned chain (if not NULL) 4144 * is not referenced or locked which means that we 4145 * cannot safely check its flagged / deletion status 4146 * until we lock it. 4147 */ 4148 chain = hammer2_combined_find(parent, base, count, 4149 &key_next, 4150 key_beg, key_end, 4151 &bref); 4152 generation = parent->core.generation; 4153 if (bref == NULL) 4154 break; 4155 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4156 4157 /* 4158 * Skip keys that are not within the key/radix of the new 4159 * indirect block. They stay in the parent. 4160 */ 4161 if ((~(((hammer2_key_t)1 << keybits) - 1) & 4162 (key ^ bref->key)) != 0) { 4163 goto next_key_spinlocked; 4164 } 4165 4166 /* 4167 * Load the new indirect block by acquiring the related 4168 * chains (potentially from media as it might not be 4169 * in-memory). Then move it to the new parent (ichain). 4170 * 4171 * chain is referenced but not locked. We must lock the 4172 * chain to obtain definitive state. 4173 */ 4174 bcopy = *bref; 4175 if (chain) { 4176 /* 4177 * Use chain already present in the RBTREE 4178 */ 4179 hammer2_chain_ref(chain); 4180 hammer2_spin_unex(&parent->core.spin); 4181 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 4182 } else { 4183 /* 4184 * Get chain for blockref element. _get returns NULL 4185 * on insertion race. 4186 */ 4187 hammer2_spin_unex(&parent->core.spin); 4188 chain = hammer2_chain_get(parent, generation, &bcopy, 4189 HAMMER2_RESOLVE_NEVER); 4190 if (chain == NULL) { 4191 reason = 1; 4192 hammer2_spin_ex(&parent->core.spin); 4193 continue; 4194 } 4195 } 4196 4197 /* 4198 * This is always live so if the chain has been deleted 4199 * we raced someone and we have to retry. 4200 * 4201 * NOTE: Lookups can race delete-duplicate because 4202 * delete-duplicate does not lock the parent's core 4203 * (they just use the spinlock on the core). 4204 * 4205 * (note reversed logic for this one) 4206 */ 4207 if (bcmp(&bcopy, &chain->bref, sizeof(bcopy)) || 4208 chain->parent != parent || 4209 (chain->flags & HAMMER2_CHAIN_DELETED)) { 4210 hammer2_chain_unlock(chain); 4211 hammer2_chain_drop(chain); 4212 if (hammer2_debug & 0x0040) { 4213 kprintf("LOST PARENT RETRY " 4214 "RETRY (%p,%p)->%p %08x\n", 4215 parent, chain->parent, chain, chain->flags); 4216 } 4217 hammer2_spin_ex(&parent->core.spin); 4218 continue; 4219 } 4220 4221 /* 4222 * Shift the chain to the indirect block. 4223 * 4224 * WARNING! No reason for us to load chain data, pass NOSTATS 4225 * to prevent delete/insert from trying to access 4226 * inode stats (and thus asserting if there is no 4227 * chain->data loaded). 4228 * 4229 * WARNING! The (parent, chain) deletion may modify the parent 4230 * and invalidate the base pointer. 4231 * 4232 * WARNING! Parent must already be marked modified, so we 4233 * can assume that chain_delete always suceeds. 4234 * 4235 * WARNING! hammer2_chain_repchange() does not have to be 4236 * called (and doesn't work anyway because we are 4237 * only doing a partial shift). A recursion that is 4238 * in-progress can continue at the current parent 4239 * and will be able to properly find its next key. 4240 */ 4241 error = hammer2_chain_delete_obref(parent, chain, mtid, 0, 4242 &bcopy); 4243 KKASSERT(error == 0); 4244 hammer2_chain_rename_obref(&ichain, chain, mtid, 0, &bcopy); 4245 hammer2_chain_unlock(chain); 4246 hammer2_chain_drop(chain); 4247 KKASSERT(parent->refs > 0); 4248 chain = NULL; 4249 base = NULL; /* safety */ 4250 hammer2_spin_ex(&parent->core.spin); 4251 next_key_spinlocked: 4252 if (--maxloops == 0) 4253 panic("hammer2_chain_create_indirect: maxloops"); 4254 reason = 4; 4255 if (key_next == 0 || key_next > key_end) 4256 break; 4257 key_beg = key_next; 4258 /* loop */ 4259 } 4260 hammer2_spin_unex(&parent->core.spin); 4261 4262 /* 4263 * Insert the new indirect block into the parent now that we've 4264 * cleared out some entries in the parent. We calculated a good 4265 * insertion index in the loop above (ichain->index). 4266 * 4267 * We don't have to set UPDATE here because we mark ichain 4268 * modified down below (so the normal modified -> flush -> set-moved 4269 * sequence applies). 4270 * 4271 * The insertion shouldn't race as this is a completely new block 4272 * and the parent is locked. 4273 */ 4274 base = NULL; /* safety, parent modify may change address */ 4275 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 4276 KKASSERT(parent->core.live_count < count); 4277 hammer2_chain_insert(parent, ichain, 4278 HAMMER2_CHAIN_INSERT_SPIN | 4279 HAMMER2_CHAIN_INSERT_LIVE, 4280 0); 4281 4282 /* 4283 * Make sure flushes propogate after our manual insertion. 4284 */ 4285 hammer2_chain_setflush(ichain); 4286 hammer2_chain_setflush(parent); 4287 4288 /* 4289 * Figure out what to return. 4290 */ 4291 if (~(((hammer2_key_t)1 << keybits) - 1) & 4292 (create_key ^ key)) { 4293 /* 4294 * Key being created is outside the key range, 4295 * return the original parent. 4296 */ 4297 hammer2_chain_unlock(ichain); 4298 hammer2_chain_drop(ichain); 4299 } else { 4300 /* 4301 * Otherwise its in the range, return the new parent. 4302 * (leave both the new and old parent locked). 4303 */ 4304 parent = ichain; 4305 } 4306 4307 return(parent); 4308 } 4309 4310 /* 4311 * Do maintenance on an indirect chain. Both parent and chain are locked. 4312 * 4313 * Returns non-zero if (chain) is deleted, either due to being empty or 4314 * because its children were safely moved into the parent. 4315 */ 4316 int 4317 hammer2_chain_indirect_maintenance(hammer2_chain_t *parent, 4318 hammer2_chain_t *chain) 4319 { 4320 hammer2_blockref_t *chain_base; 4321 hammer2_blockref_t *base; 4322 hammer2_blockref_t *bref; 4323 hammer2_blockref_t bcopy; 4324 hammer2_key_t key_next; 4325 hammer2_key_t key_beg; 4326 hammer2_key_t key_end; 4327 hammer2_chain_t *sub; 4328 int chain_count; 4329 int count; 4330 int error; 4331 int generation; 4332 4333 /* 4334 * Make sure we have an accurate live_count 4335 */ 4336 if ((chain->flags & (HAMMER2_CHAIN_INITIAL | 4337 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4338 base = &chain->data->npdata[0]; 4339 count = chain->bytes / sizeof(hammer2_blockref_t); 4340 hammer2_chain_countbrefs(chain, base, count); 4341 } 4342 4343 /* 4344 * If the indirect block is empty we can delete it. 4345 * (ignore deletion error) 4346 */ 4347 if (chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)) { 4348 hammer2_chain_delete(parent, chain, 4349 chain->bref.modify_tid, 4350 HAMMER2_DELETE_PERMANENT); 4351 hammer2_chain_repchange(parent, chain); 4352 return 1; 4353 } 4354 4355 base = hammer2_chain_base_and_count(parent, &count); 4356 4357 if ((parent->flags & (HAMMER2_CHAIN_INITIAL | 4358 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4359 hammer2_chain_countbrefs(parent, base, count); 4360 } 4361 4362 /* 4363 * Determine if we can collapse chain into parent, calculate 4364 * hysteresis for chain emptiness. 4365 */ 4366 if (parent->core.live_count + chain->core.live_count - 1 > count) 4367 return 0; 4368 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4369 if (chain->core.live_count > chain_count * 3 / 4) 4370 return 0; 4371 4372 /* 4373 * Ok, theoretically we can collapse chain's contents into 4374 * parent. chain is locked, but any in-memory children of chain 4375 * are not. For this to work, we must be able to dispose of any 4376 * in-memory children of chain. 4377 * 4378 * For now require that there are no in-memory children of chain. 4379 * 4380 * WARNING! Both chain and parent must remain locked across this 4381 * entire operation. 4382 */ 4383 4384 /* 4385 * Parent must be marked modified. Don't try to collapse it if we 4386 * can't mark it modified. Once modified, destroy chain to make room 4387 * and to get rid of what will be a conflicting key (this is included 4388 * in the calculation above). Finally, move the children of chain 4389 * into chain's parent. 4390 * 4391 * This order creates an accounting problem for bref.embed.stats 4392 * because we destroy chain before we remove its children. Any 4393 * elements whos blockref is already synchronized will be counted 4394 * twice. To deal with the problem we clean out chain's stats prior 4395 * to deleting it. 4396 */ 4397 error = hammer2_chain_modify(parent, 0, 0, 0); 4398 if (error) { 4399 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4400 hammer2_error_str(error)); 4401 return 0; 4402 } 4403 error = hammer2_chain_modify(chain, chain->bref.modify_tid, 0, 0); 4404 if (error) { 4405 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4406 hammer2_error_str(error)); 4407 return 0; 4408 } 4409 4410 chain->bref.embed.stats.inode_count = 0; 4411 chain->bref.embed.stats.data_count = 0; 4412 error = hammer2_chain_delete(parent, chain, 4413 chain->bref.modify_tid, 4414 HAMMER2_DELETE_PERMANENT); 4415 KKASSERT(error == 0); 4416 4417 /* 4418 * The combined_find call requires core.spin to be held. One would 4419 * think there wouldn't be any conflicts since we hold chain 4420 * exclusively locked, but the caching mechanism for 0-ref children 4421 * does not require a chain lock. 4422 */ 4423 hammer2_spin_ex(&chain->core.spin); 4424 4425 key_next = 0; 4426 key_beg = 0; 4427 key_end = HAMMER2_KEY_MAX; 4428 for (;;) { 4429 chain_base = &chain->data->npdata[0]; 4430 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4431 sub = hammer2_combined_find(chain, chain_base, chain_count, 4432 &key_next, 4433 key_beg, key_end, 4434 &bref); 4435 generation = chain->core.generation; 4436 if (bref == NULL) 4437 break; 4438 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4439 4440 bcopy = *bref; 4441 if (sub) { 4442 hammer2_chain_ref(sub); 4443 hammer2_spin_unex(&chain->core.spin); 4444 hammer2_chain_lock(sub, HAMMER2_RESOLVE_NEVER); 4445 } else { 4446 hammer2_spin_unex(&chain->core.spin); 4447 sub = hammer2_chain_get(chain, generation, &bcopy, 4448 HAMMER2_RESOLVE_NEVER); 4449 if (sub == NULL) { 4450 hammer2_spin_ex(&chain->core.spin); 4451 continue; 4452 } 4453 } 4454 if (bcmp(&bcopy, &sub->bref, sizeof(bcopy)) || 4455 sub->parent != chain || 4456 (sub->flags & HAMMER2_CHAIN_DELETED)) { 4457 hammer2_chain_unlock(sub); 4458 hammer2_chain_drop(sub); 4459 hammer2_spin_ex(&chain->core.spin); 4460 sub = NULL; /* safety */ 4461 continue; 4462 } 4463 error = hammer2_chain_delete_obref(chain, sub, 4464 sub->bref.modify_tid, 0, 4465 &bcopy); 4466 KKASSERT(error == 0); 4467 hammer2_chain_rename_obref(&parent, sub, 4468 sub->bref.modify_tid, 4469 HAMMER2_INSERT_SAMEPARENT, &bcopy); 4470 hammer2_chain_unlock(sub); 4471 hammer2_chain_drop(sub); 4472 hammer2_spin_ex(&chain->core.spin); 4473 4474 if (key_next == 0) 4475 break; 4476 key_beg = key_next; 4477 } 4478 hammer2_spin_unex(&chain->core.spin); 4479 4480 hammer2_chain_repchange(parent, chain); 4481 4482 return 1; 4483 } 4484 4485 /* 4486 * Freemap indirect blocks 4487 * 4488 * Calculate the keybits and highside/lowside of the freemap node the 4489 * caller is creating. 4490 * 4491 * This routine will specify the next higher-level freemap key/radix 4492 * representing the lowest-ordered set. By doing so, eventually all 4493 * low-ordered sets will be moved one level down. 4494 * 4495 * We have to be careful here because the freemap reserves a limited 4496 * number of blocks for a limited number of levels. So we can't just 4497 * push indiscriminately. 4498 */ 4499 int 4500 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 4501 int keybits, hammer2_blockref_t *base, int count) 4502 { 4503 hammer2_chain_t *chain; 4504 hammer2_blockref_t *bref; 4505 hammer2_key_t key; 4506 hammer2_key_t key_beg; 4507 hammer2_key_t key_end; 4508 hammer2_key_t key_next; 4509 int locount; 4510 int hicount; 4511 int maxloops = 300000; 4512 4513 key = *keyp; 4514 locount = 0; 4515 hicount = 0; 4516 keybits = 64; 4517 4518 /* 4519 * Calculate the range of keys in the array being careful to skip 4520 * slots which are overridden with a deletion. 4521 */ 4522 key_beg = 0; 4523 key_end = HAMMER2_KEY_MAX; 4524 hammer2_spin_ex(&parent->core.spin); 4525 4526 for (;;) { 4527 if (--maxloops == 0) { 4528 panic("indkey_freemap shit %p %p:%d\n", 4529 parent, base, count); 4530 } 4531 chain = hammer2_combined_find(parent, base, count, 4532 &key_next, 4533 key_beg, key_end, 4534 &bref); 4535 4536 /* 4537 * Exhausted search 4538 */ 4539 if (bref == NULL) 4540 break; 4541 4542 /* 4543 * Skip deleted chains. 4544 */ 4545 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4546 if (key_next == 0 || key_next > key_end) 4547 break; 4548 key_beg = key_next; 4549 continue; 4550 } 4551 4552 /* 4553 * Use the full live (not deleted) element for the scan 4554 * iteration. HAMMER2 does not allow partial replacements. 4555 * 4556 * XXX should be built into hammer2_combined_find(). 4557 */ 4558 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4559 4560 if (keybits > bref->keybits) { 4561 key = bref->key; 4562 keybits = bref->keybits; 4563 } else if (keybits == bref->keybits && bref->key < key) { 4564 key = bref->key; 4565 } 4566 if (key_next == 0) 4567 break; 4568 key_beg = key_next; 4569 } 4570 hammer2_spin_unex(&parent->core.spin); 4571 4572 /* 4573 * Return the keybits for a higher-level FREEMAP_NODE covering 4574 * this node. 4575 */ 4576 switch(keybits) { 4577 case HAMMER2_FREEMAP_LEVEL0_RADIX: 4578 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 4579 break; 4580 case HAMMER2_FREEMAP_LEVEL1_RADIX: 4581 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 4582 break; 4583 case HAMMER2_FREEMAP_LEVEL2_RADIX: 4584 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 4585 break; 4586 case HAMMER2_FREEMAP_LEVEL3_RADIX: 4587 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 4588 break; 4589 case HAMMER2_FREEMAP_LEVEL4_RADIX: 4590 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 4591 break; 4592 case HAMMER2_FREEMAP_LEVEL5_RADIX: 4593 panic("hammer2_chain_indkey_freemap: level too high"); 4594 break; 4595 default: 4596 panic("hammer2_chain_indkey_freemap: bad radix"); 4597 break; 4598 } 4599 *keyp = key; 4600 4601 return (keybits); 4602 } 4603 4604 /* 4605 * File indirect blocks 4606 * 4607 * Calculate the key/keybits for the indirect block to create by scanning 4608 * existing keys. The key being created is also passed in *keyp and can be 4609 * inside or outside the indirect block. Regardless, the indirect block 4610 * must hold at least two keys in order to guarantee sufficient space. 4611 * 4612 * We use a modified version of the freemap's fixed radix tree, but taylored 4613 * for file data. Basically we configure an indirect block encompassing the 4614 * smallest key. 4615 */ 4616 static int 4617 hammer2_chain_indkey_file(hammer2_chain_t *parent, hammer2_key_t *keyp, 4618 int keybits, hammer2_blockref_t *base, int count, 4619 int ncount) 4620 { 4621 hammer2_chain_t *chain; 4622 hammer2_blockref_t *bref; 4623 hammer2_key_t key; 4624 hammer2_key_t key_beg; 4625 hammer2_key_t key_end; 4626 hammer2_key_t key_next; 4627 int nradix; 4628 int locount; 4629 int hicount; 4630 int maxloops = 300000; 4631 4632 key = *keyp; 4633 locount = 0; 4634 hicount = 0; 4635 keybits = 64; 4636 4637 /* 4638 * Calculate the range of keys in the array being careful to skip 4639 * slots which are overridden with a deletion. 4640 * 4641 * Locate the smallest key. 4642 */ 4643 key_beg = 0; 4644 key_end = HAMMER2_KEY_MAX; 4645 hammer2_spin_ex(&parent->core.spin); 4646 4647 for (;;) { 4648 if (--maxloops == 0) { 4649 panic("indkey_freemap shit %p %p:%d\n", 4650 parent, base, count); 4651 } 4652 chain = hammer2_combined_find(parent, base, count, 4653 &key_next, 4654 key_beg, key_end, 4655 &bref); 4656 4657 /* 4658 * Exhausted search 4659 */ 4660 if (bref == NULL) 4661 break; 4662 4663 /* 4664 * Skip deleted chains. 4665 */ 4666 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4667 if (key_next == 0 || key_next > key_end) 4668 break; 4669 key_beg = key_next; 4670 continue; 4671 } 4672 4673 /* 4674 * Use the full live (not deleted) element for the scan 4675 * iteration. HAMMER2 does not allow partial replacements. 4676 * 4677 * XXX should be built into hammer2_combined_find(). 4678 */ 4679 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4680 4681 if (keybits > bref->keybits) { 4682 key = bref->key; 4683 keybits = bref->keybits; 4684 } else if (keybits == bref->keybits && bref->key < key) { 4685 key = bref->key; 4686 } 4687 if (key_next == 0) 4688 break; 4689 key_beg = key_next; 4690 } 4691 hammer2_spin_unex(&parent->core.spin); 4692 4693 /* 4694 * Calculate the static keybits for a higher-level indirect block 4695 * that contains the key. 4696 */ 4697 *keyp = key; 4698 4699 switch(ncount) { 4700 case HAMMER2_IND_BYTES_MIN / sizeof(hammer2_blockref_t): 4701 nradix = HAMMER2_IND_RADIX_MIN - HAMMER2_BLOCKREF_RADIX; 4702 break; 4703 case HAMMER2_IND_BYTES_NOM / sizeof(hammer2_blockref_t): 4704 nradix = HAMMER2_IND_RADIX_NOM - HAMMER2_BLOCKREF_RADIX; 4705 break; 4706 case HAMMER2_IND_BYTES_MAX / sizeof(hammer2_blockref_t): 4707 nradix = HAMMER2_IND_RADIX_MAX - HAMMER2_BLOCKREF_RADIX; 4708 break; 4709 default: 4710 panic("bad ncount %d\n", ncount); 4711 nradix = 0; 4712 break; 4713 } 4714 4715 /* 4716 * The largest radix that can be returned for an indirect block is 4717 * 63 bits. (The largest practical indirect block radix is actually 4718 * 62 bits because the top-level inode or volume root contains four 4719 * entries, but allow 63 to be returned). 4720 */ 4721 if (nradix >= 64) 4722 nradix = 63; 4723 4724 return keybits + nradix; 4725 } 4726 4727 #if 1 4728 4729 /* 4730 * Directory indirect blocks. 4731 * 4732 * Covers both the inode index (directory of inodes), and directory contents 4733 * (filenames hardlinked to inodes). 4734 * 4735 * Because directory keys are hashed we generally try to cut the space in 4736 * half. We accomodate the inode index (which tends to have linearly 4737 * increasing inode numbers) by ensuring that the keyspace is at least large 4738 * enough to fill up the indirect block being created. 4739 */ 4740 static int 4741 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4742 int keybits, hammer2_blockref_t *base, int count, 4743 int ncount) 4744 { 4745 hammer2_blockref_t *bref; 4746 hammer2_chain_t *chain; 4747 hammer2_key_t key_beg; 4748 hammer2_key_t key_end; 4749 hammer2_key_t key_next; 4750 hammer2_key_t key; 4751 int nkeybits; 4752 int locount; 4753 int hicount; 4754 int maxloops = 300000; 4755 4756 /* 4757 * NOTE: We can't take a shortcut here anymore for inodes because 4758 * the root directory can contain a mix of inodes and directory 4759 * entries (we used to just return 63 if parent->bref.type was 4760 * HAMMER2_BREF_TYPE_INODE. 4761 */ 4762 key = *keyp; 4763 locount = 0; 4764 hicount = 0; 4765 4766 /* 4767 * Calculate the range of keys in the array being careful to skip 4768 * slots which are overridden with a deletion. 4769 */ 4770 key_beg = 0; 4771 key_end = HAMMER2_KEY_MAX; 4772 hammer2_spin_ex(&parent->core.spin); 4773 4774 for (;;) { 4775 if (--maxloops == 0) { 4776 panic("indkey_freemap shit %p %p:%d\n", 4777 parent, base, count); 4778 } 4779 chain = hammer2_combined_find(parent, base, count, 4780 &key_next, 4781 key_beg, key_end, 4782 &bref); 4783 4784 /* 4785 * Exhausted search 4786 */ 4787 if (bref == NULL) 4788 break; 4789 4790 /* 4791 * Deleted object 4792 */ 4793 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4794 if (key_next == 0 || key_next > key_end) 4795 break; 4796 key_beg = key_next; 4797 continue; 4798 } 4799 4800 /* 4801 * Use the full live (not deleted) element for the scan 4802 * iteration. HAMMER2 does not allow partial replacements. 4803 * 4804 * XXX should be built into hammer2_combined_find(). 4805 */ 4806 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4807 4808 /* 4809 * Expand our calculated key range (key, keybits) to fit 4810 * the scanned key. nkeybits represents the full range 4811 * that we will later cut in half (two halves @ nkeybits - 1). 4812 */ 4813 nkeybits = keybits; 4814 if (nkeybits < bref->keybits) { 4815 if (bref->keybits > 64) { 4816 kprintf("bad bref chain %p bref %p\n", 4817 chain, bref); 4818 Debugger("fubar"); 4819 } 4820 nkeybits = bref->keybits; 4821 } 4822 while (nkeybits < 64 && 4823 (~(((hammer2_key_t)1 << nkeybits) - 1) & 4824 (key ^ bref->key)) != 0) { 4825 ++nkeybits; 4826 } 4827 4828 /* 4829 * If the new key range is larger we have to determine 4830 * which side of the new key range the existing keys fall 4831 * under by checking the high bit, then collapsing the 4832 * locount into the hicount or vise-versa. 4833 */ 4834 if (keybits != nkeybits) { 4835 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4836 hicount += locount; 4837 locount = 0; 4838 } else { 4839 locount += hicount; 4840 hicount = 0; 4841 } 4842 keybits = nkeybits; 4843 } 4844 4845 /* 4846 * The newly scanned key will be in the lower half or the 4847 * upper half of the (new) key range. 4848 */ 4849 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4850 ++hicount; 4851 else 4852 ++locount; 4853 4854 if (key_next == 0) 4855 break; 4856 key_beg = key_next; 4857 } 4858 hammer2_spin_unex(&parent->core.spin); 4859 bref = NULL; /* now invalid (safety) */ 4860 4861 /* 4862 * Adjust keybits to represent half of the full range calculated 4863 * above (radix 63 max) for our new indirect block. 4864 */ 4865 --keybits; 4866 4867 /* 4868 * Expand keybits to hold at least ncount elements. ncount will be 4869 * a power of 2. This is to try to completely fill leaf nodes (at 4870 * least for keys which are not hashes). 4871 * 4872 * We aren't counting 'in' or 'out', we are counting 'high side' 4873 * and 'low side' based on the bit at (1LL << keybits). We want 4874 * everything to be inside in these cases so shift it all to 4875 * the low or high side depending on the new high bit. 4876 */ 4877 while (((hammer2_key_t)1 << keybits) < ncount) { 4878 ++keybits; 4879 if (key & ((hammer2_key_t)1 << keybits)) { 4880 hicount += locount; 4881 locount = 0; 4882 } else { 4883 locount += hicount; 4884 hicount = 0; 4885 } 4886 } 4887 4888 if (hicount > locount) 4889 key |= (hammer2_key_t)1 << keybits; 4890 else 4891 key &= ~(hammer2_key_t)1 << keybits; 4892 4893 *keyp = key; 4894 4895 return (keybits); 4896 } 4897 4898 #else 4899 4900 /* 4901 * Directory indirect blocks. 4902 * 4903 * Covers both the inode index (directory of inodes), and directory contents 4904 * (filenames hardlinked to inodes). 4905 * 4906 * Because directory keys are hashed we generally try to cut the space in 4907 * half. We accomodate the inode index (which tends to have linearly 4908 * increasing inode numbers) by ensuring that the keyspace is at least large 4909 * enough to fill up the indirect block being created. 4910 */ 4911 static int 4912 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4913 int keybits, hammer2_blockref_t *base, int count, 4914 int ncount) 4915 { 4916 hammer2_blockref_t *bref; 4917 hammer2_chain_t *chain; 4918 hammer2_key_t key_beg; 4919 hammer2_key_t key_end; 4920 hammer2_key_t key_next; 4921 hammer2_key_t key; 4922 int nkeybits; 4923 int locount; 4924 int hicount; 4925 int maxloops = 300000; 4926 4927 /* 4928 * Shortcut if the parent is the inode. In this situation the 4929 * parent has 4+1 directory entries and we are creating an indirect 4930 * block capable of holding many more. 4931 */ 4932 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4933 return 63; 4934 } 4935 4936 key = *keyp; 4937 locount = 0; 4938 hicount = 0; 4939 4940 /* 4941 * Calculate the range of keys in the array being careful to skip 4942 * slots which are overridden with a deletion. 4943 */ 4944 key_beg = 0; 4945 key_end = HAMMER2_KEY_MAX; 4946 hammer2_spin_ex(&parent->core.spin); 4947 4948 for (;;) { 4949 if (--maxloops == 0) { 4950 panic("indkey_freemap shit %p %p:%d\n", 4951 parent, base, count); 4952 } 4953 chain = hammer2_combined_find(parent, base, count, 4954 &key_next, 4955 key_beg, key_end, 4956 &bref); 4957 4958 /* 4959 * Exhausted search 4960 */ 4961 if (bref == NULL) 4962 break; 4963 4964 /* 4965 * Deleted object 4966 */ 4967 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4968 if (key_next == 0 || key_next > key_end) 4969 break; 4970 key_beg = key_next; 4971 continue; 4972 } 4973 4974 /* 4975 * Use the full live (not deleted) element for the scan 4976 * iteration. HAMMER2 does not allow partial replacements. 4977 * 4978 * XXX should be built into hammer2_combined_find(). 4979 */ 4980 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4981 4982 /* 4983 * Expand our calculated key range (key, keybits) to fit 4984 * the scanned key. nkeybits represents the full range 4985 * that we will later cut in half (two halves @ nkeybits - 1). 4986 */ 4987 nkeybits = keybits; 4988 if (nkeybits < bref->keybits) { 4989 if (bref->keybits > 64) { 4990 kprintf("bad bref chain %p bref %p\n", 4991 chain, bref); 4992 Debugger("fubar"); 4993 } 4994 nkeybits = bref->keybits; 4995 } 4996 while (nkeybits < 64 && 4997 (~(((hammer2_key_t)1 << nkeybits) - 1) & 4998 (key ^ bref->key)) != 0) { 4999 ++nkeybits; 5000 } 5001 5002 /* 5003 * If the new key range is larger we have to determine 5004 * which side of the new key range the existing keys fall 5005 * under by checking the high bit, then collapsing the 5006 * locount into the hicount or vise-versa. 5007 */ 5008 if (keybits != nkeybits) { 5009 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 5010 hicount += locount; 5011 locount = 0; 5012 } else { 5013 locount += hicount; 5014 hicount = 0; 5015 } 5016 keybits = nkeybits; 5017 } 5018 5019 /* 5020 * The newly scanned key will be in the lower half or the 5021 * upper half of the (new) key range. 5022 */ 5023 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 5024 ++hicount; 5025 else 5026 ++locount; 5027 5028 if (key_next == 0) 5029 break; 5030 key_beg = key_next; 5031 } 5032 hammer2_spin_unex(&parent->core.spin); 5033 bref = NULL; /* now invalid (safety) */ 5034 5035 /* 5036 * Adjust keybits to represent half of the full range calculated 5037 * above (radix 63 max) for our new indirect block. 5038 */ 5039 --keybits; 5040 5041 /* 5042 * Expand keybits to hold at least ncount elements. ncount will be 5043 * a power of 2. This is to try to completely fill leaf nodes (at 5044 * least for keys which are not hashes). 5045 * 5046 * We aren't counting 'in' or 'out', we are counting 'high side' 5047 * and 'low side' based on the bit at (1LL << keybits). We want 5048 * everything to be inside in these cases so shift it all to 5049 * the low or high side depending on the new high bit. 5050 */ 5051 while (((hammer2_key_t)1 << keybits) < ncount) { 5052 ++keybits; 5053 if (key & ((hammer2_key_t)1 << keybits)) { 5054 hicount += locount; 5055 locount = 0; 5056 } else { 5057 locount += hicount; 5058 hicount = 0; 5059 } 5060 } 5061 5062 if (hicount > locount) 5063 key |= (hammer2_key_t)1 << keybits; 5064 else 5065 key &= ~(hammer2_key_t)1 << keybits; 5066 5067 *keyp = key; 5068 5069 return (keybits); 5070 } 5071 5072 #endif 5073 5074 /* 5075 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 5076 * it exists. 5077 * 5078 * Both parent and chain must be locked exclusively. 5079 * 5080 * This function will modify the parent if the blockref requires removal 5081 * from the parent's block table. 5082 * 5083 * This function is NOT recursive. Any entity already pushed into the 5084 * chain (such as an inode) may still need visibility into its contents, 5085 * as well as the ability to read and modify the contents. For example, 5086 * for an unlinked file which is still open. 5087 * 5088 * Also note that the flusher is responsible for cleaning up empty 5089 * indirect blocks. 5090 */ 5091 int 5092 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 5093 hammer2_tid_t mtid, int flags) 5094 { 5095 int error = 0; 5096 5097 KKASSERT(hammer2_mtx_owned(&chain->lock)); 5098 5099 /* 5100 * Nothing to do if already marked. 5101 * 5102 * We need the spinlock on the core whos RBTREE contains chain 5103 * to protect against races. 5104 */ 5105 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 5106 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 5107 chain->parent == parent); 5108 error = _hammer2_chain_delete_helper(parent, chain, 5109 mtid, flags, NULL); 5110 } 5111 5112 /* 5113 * Permanent deletions mark the chain as destroyed. 5114 * 5115 * NOTE: We do not setflush the chain unless the deletion is 5116 * permanent, since the deletion of a chain does not actually 5117 * require it to be flushed. 5118 */ 5119 if (error == 0) { 5120 if (flags & HAMMER2_DELETE_PERMANENT) { 5121 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 5122 hammer2_chain_setflush(chain); 5123 } 5124 } 5125 5126 return error; 5127 } 5128 5129 static int 5130 hammer2_chain_delete_obref(hammer2_chain_t *parent, hammer2_chain_t *chain, 5131 hammer2_tid_t mtid, int flags, 5132 hammer2_blockref_t *obref) 5133 { 5134 int error = 0; 5135 5136 KKASSERT(hammer2_mtx_owned(&chain->lock)); 5137 5138 /* 5139 * Nothing to do if already marked. 5140 * 5141 * We need the spinlock on the core whos RBTREE contains chain 5142 * to protect against races. 5143 */ 5144 obref->type = HAMMER2_BREF_TYPE_EMPTY; 5145 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 5146 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 5147 chain->parent == parent); 5148 error = _hammer2_chain_delete_helper(parent, chain, 5149 mtid, flags, obref); 5150 } 5151 5152 /* 5153 * Permanent deletions mark the chain as destroyed. 5154 * 5155 * NOTE: We do not setflush the chain unless the deletion is 5156 * permanent, since the deletion of a chain does not actually 5157 * require it to be flushed. 5158 */ 5159 if (error == 0) { 5160 if (flags & HAMMER2_DELETE_PERMANENT) { 5161 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 5162 hammer2_chain_setflush(chain); 5163 } 5164 } 5165 5166 return error; 5167 } 5168 5169 /* 5170 * Returns the index of the nearest element in the blockref array >= elm. 5171 * Returns (count) if no element could be found. 5172 * 5173 * Sets *key_nextp to the next key for loop purposes but does not modify 5174 * it if the next key would be higher than the current value of *key_nextp. 5175 * Note that *key_nexp can overflow to 0, which should be tested by the 5176 * caller. 5177 * 5178 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5179 * held through the operation. 5180 */ 5181 static int 5182 hammer2_base_find(hammer2_chain_t *parent, 5183 hammer2_blockref_t *base, int count, 5184 hammer2_key_t *key_nextp, 5185 hammer2_key_t key_beg, hammer2_key_t key_end) 5186 { 5187 hammer2_blockref_t *scan; 5188 hammer2_key_t scan_end; 5189 int i; 5190 int limit; 5191 5192 /* 5193 * Require the live chain's already have their core's counted 5194 * so we can optimize operations. 5195 */ 5196 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 5197 5198 /* 5199 * Degenerate case 5200 */ 5201 if (count == 0 || base == NULL) 5202 return(count); 5203 5204 /* 5205 * Sequential optimization using parent->cache_index. This is 5206 * the most likely scenario. 5207 * 5208 * We can avoid trailing empty entries on live chains, otherwise 5209 * we might have to check the whole block array. 5210 */ 5211 i = parent->cache_index; /* SMP RACE OK */ 5212 cpu_ccfence(); 5213 limit = parent->core.live_zero; 5214 if (i >= limit) 5215 i = limit - 1; 5216 if (i < 0) 5217 i = 0; 5218 KKASSERT(i < count); 5219 5220 /* 5221 * Search backwards 5222 */ 5223 scan = &base[i]; 5224 while (i > 0 && (scan->type == HAMMER2_BREF_TYPE_EMPTY || 5225 scan->key > key_beg)) { 5226 --scan; 5227 --i; 5228 } 5229 parent->cache_index = i; 5230 5231 /* 5232 * Search forwards, stop when we find a scan element which 5233 * encloses the key or until we know that there are no further 5234 * elements. 5235 */ 5236 while (i < count) { 5237 if (scan->type != HAMMER2_BREF_TYPE_EMPTY) { 5238 scan_end = scan->key + 5239 ((hammer2_key_t)1 << scan->keybits) - 1; 5240 if (scan->key > key_beg || scan_end >= key_beg) 5241 break; 5242 } 5243 if (i >= limit) 5244 return (count); 5245 ++scan; 5246 ++i; 5247 } 5248 if (i != count) { 5249 parent->cache_index = i; 5250 if (i >= limit) { 5251 i = count; 5252 } else { 5253 scan_end = scan->key + 5254 ((hammer2_key_t)1 << scan->keybits); 5255 if (scan_end && (*key_nextp > scan_end || 5256 *key_nextp == 0)) { 5257 *key_nextp = scan_end; 5258 } 5259 } 5260 } 5261 return (i); 5262 } 5263 5264 /* 5265 * Do a combined search and return the next match either from the blockref 5266 * array or from the in-memory chain. Sets *bresp to the returned bref in 5267 * both cases, or sets it to NULL if the search exhausted. Only returns 5268 * a non-NULL chain if the search matched from the in-memory chain. 5269 * 5270 * When no in-memory chain has been found and a non-NULL bref is returned 5271 * in *bresp. 5272 * 5273 * 5274 * The returned chain is not locked or referenced. Use the returned bref 5275 * to determine if the search exhausted or not. Iterate if the base find 5276 * is chosen but matches a deleted chain. 5277 * 5278 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5279 * held through the operation. 5280 */ 5281 hammer2_chain_t * 5282 hammer2_combined_find(hammer2_chain_t *parent, 5283 hammer2_blockref_t *base, int count, 5284 hammer2_key_t *key_nextp, 5285 hammer2_key_t key_beg, hammer2_key_t key_end, 5286 hammer2_blockref_t **bresp) 5287 { 5288 hammer2_blockref_t *bref; 5289 hammer2_chain_t *chain; 5290 int i; 5291 5292 /* 5293 * Lookup in block array and in rbtree. 5294 */ 5295 *key_nextp = key_end + 1; 5296 i = hammer2_base_find(parent, base, count, key_nextp, 5297 key_beg, key_end); 5298 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 5299 5300 /* 5301 * Neither matched 5302 */ 5303 if (i == count && chain == NULL) { 5304 *bresp = NULL; 5305 return(NULL); 5306 } 5307 5308 /* 5309 * Only chain matched. 5310 */ 5311 if (i == count) { 5312 bref = &chain->bref; 5313 goto found; 5314 } 5315 5316 /* 5317 * Only blockref matched. 5318 */ 5319 if (chain == NULL) { 5320 bref = &base[i]; 5321 goto found; 5322 } 5323 5324 /* 5325 * Both in-memory and blockref matched, select the nearer element. 5326 * 5327 * If both are flush with the left-hand side or both are the 5328 * same distance away, select the chain. In this situation the 5329 * chain must have been loaded from the matching blockmap. 5330 */ 5331 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 5332 chain->bref.key == base[i].key) { 5333 KKASSERT(chain->bref.key == base[i].key); 5334 bref = &chain->bref; 5335 goto found; 5336 } 5337 5338 /* 5339 * Select the nearer key 5340 */ 5341 if (chain->bref.key < base[i].key) { 5342 bref = &chain->bref; 5343 } else { 5344 bref = &base[i]; 5345 chain = NULL; 5346 } 5347 5348 /* 5349 * If the bref is out of bounds we've exhausted our search. 5350 */ 5351 found: 5352 if (bref->key > key_end) { 5353 *bresp = NULL; 5354 chain = NULL; 5355 } else { 5356 *bresp = bref; 5357 } 5358 return(chain); 5359 } 5360 5361 /* 5362 * Locate the specified block array element and delete it. The element 5363 * must exist. 5364 * 5365 * The spin lock on the related chain must be held. 5366 * 5367 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5368 * need to be adjusted when we commit the media change. 5369 */ 5370 void 5371 hammer2_base_delete(hammer2_chain_t *parent, 5372 hammer2_blockref_t *base, int count, 5373 hammer2_chain_t *chain, 5374 hammer2_blockref_t *obref) 5375 { 5376 hammer2_blockref_t *elm = &chain->bref; 5377 hammer2_blockref_t *scan; 5378 hammer2_key_t key_next; 5379 int i; 5380 5381 /* 5382 * Delete element. Expect the element to exist. 5383 * 5384 * XXX see caller, flush code not yet sophisticated enough to prevent 5385 * re-flushed in some cases. 5386 */ 5387 key_next = 0; /* max range */ 5388 i = hammer2_base_find(parent, base, count, &key_next, 5389 elm->key, elm->key); 5390 scan = &base[i]; 5391 if (i == count || scan->type == HAMMER2_BREF_TYPE_EMPTY || 5392 scan->key != elm->key || 5393 ((chain->flags & HAMMER2_CHAIN_BMAPUPD) == 0 && 5394 scan->keybits != elm->keybits)) { 5395 hammer2_spin_unex(&parent->core.spin); 5396 panic("delete base %p element not found at %d/%d elm %p\n", 5397 base, i, count, elm); 5398 return; 5399 } 5400 5401 /* 5402 * Update stats and zero the entry. 5403 * 5404 * NOTE: Handle radix == 0 (0 bytes) case. 5405 */ 5406 if ((int)(scan->data_off & HAMMER2_OFF_MASK_RADIX)) { 5407 parent->bref.embed.stats.data_count -= (hammer2_off_t)1 << 5408 (int)(scan->data_off & HAMMER2_OFF_MASK_RADIX); 5409 } 5410 switch(scan->type) { 5411 case HAMMER2_BREF_TYPE_INODE: 5412 --parent->bref.embed.stats.inode_count; 5413 /* fall through */ 5414 case HAMMER2_BREF_TYPE_DATA: 5415 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5416 atomic_set_int(&chain->flags, 5417 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5418 } else { 5419 if (parent->bref.leaf_count) 5420 --parent->bref.leaf_count; 5421 } 5422 /* fall through */ 5423 case HAMMER2_BREF_TYPE_INDIRECT: 5424 if (scan->type != HAMMER2_BREF_TYPE_DATA) { 5425 parent->bref.embed.stats.data_count -= 5426 scan->embed.stats.data_count; 5427 parent->bref.embed.stats.inode_count -= 5428 scan->embed.stats.inode_count; 5429 } 5430 if (scan->type == HAMMER2_BREF_TYPE_INODE) 5431 break; 5432 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5433 atomic_set_int(&chain->flags, 5434 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5435 } else { 5436 if (parent->bref.leaf_count <= scan->leaf_count) 5437 parent->bref.leaf_count = 0; 5438 else 5439 parent->bref.leaf_count -= scan->leaf_count; 5440 } 5441 break; 5442 case HAMMER2_BREF_TYPE_DIRENT: 5443 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5444 atomic_set_int(&chain->flags, 5445 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5446 } else { 5447 if (parent->bref.leaf_count) 5448 --parent->bref.leaf_count; 5449 } 5450 default: 5451 break; 5452 } 5453 5454 if (obref) 5455 *obref = *scan; 5456 bzero(scan, sizeof(*scan)); 5457 5458 /* 5459 * We can only optimize parent->core.live_zero for live chains. 5460 */ 5461 if (parent->core.live_zero == i + 1) { 5462 while (--i >= 0 && base[i].type == HAMMER2_BREF_TYPE_EMPTY) 5463 ; 5464 parent->core.live_zero = i + 1; 5465 } 5466 5467 /* 5468 * Clear appropriate blockmap flags in chain. 5469 */ 5470 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED | 5471 HAMMER2_CHAIN_BMAPUPD); 5472 } 5473 5474 /* 5475 * Insert the specified element. The block array must not already have the 5476 * element and must have space available for the insertion. 5477 * 5478 * The spin lock on the related chain must be held. 5479 * 5480 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5481 * need to be adjusted when we commit the media change. 5482 */ 5483 void 5484 hammer2_base_insert(hammer2_chain_t *parent, 5485 hammer2_blockref_t *base, int count, 5486 hammer2_chain_t *chain, hammer2_blockref_t *elm) 5487 { 5488 hammer2_key_t key_next; 5489 hammer2_key_t xkey; 5490 int i; 5491 int j; 5492 int k; 5493 int l; 5494 int u = 1; 5495 5496 /* 5497 * Insert new element. Expect the element to not already exist 5498 * unless we are replacing it. 5499 * 5500 * XXX see caller, flush code not yet sophisticated enough to prevent 5501 * re-flushed in some cases. 5502 */ 5503 key_next = 0; /* max range */ 5504 i = hammer2_base_find(parent, base, count, &key_next, 5505 elm->key, elm->key); 5506 5507 /* 5508 * Shortcut fill optimization, typical ordered insertion(s) may not 5509 * require a search. 5510 */ 5511 KKASSERT(i >= 0 && i <= count); 5512 5513 /* 5514 * Set appropriate blockmap flags in chain (if not NULL) 5515 */ 5516 if (chain) 5517 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 5518 5519 /* 5520 * Update stats and zero the entry 5521 */ 5522 if ((int)(elm->data_off & HAMMER2_OFF_MASK_RADIX)) { 5523 parent->bref.embed.stats.data_count += (hammer2_off_t)1 << 5524 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 5525 } 5526 switch(elm->type) { 5527 case HAMMER2_BREF_TYPE_INODE: 5528 ++parent->bref.embed.stats.inode_count; 5529 /* fall through */ 5530 case HAMMER2_BREF_TYPE_DATA: 5531 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5532 ++parent->bref.leaf_count; 5533 /* fall through */ 5534 case HAMMER2_BREF_TYPE_INDIRECT: 5535 if (elm->type != HAMMER2_BREF_TYPE_DATA) { 5536 parent->bref.embed.stats.data_count += 5537 elm->embed.stats.data_count; 5538 parent->bref.embed.stats.inode_count += 5539 elm->embed.stats.inode_count; 5540 } 5541 if (elm->type == HAMMER2_BREF_TYPE_INODE) 5542 break; 5543 if (parent->bref.leaf_count + elm->leaf_count < 5544 HAMMER2_BLOCKREF_LEAF_MAX) { 5545 parent->bref.leaf_count += elm->leaf_count; 5546 } else { 5547 parent->bref.leaf_count = HAMMER2_BLOCKREF_LEAF_MAX; 5548 } 5549 break; 5550 case HAMMER2_BREF_TYPE_DIRENT: 5551 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5552 ++parent->bref.leaf_count; 5553 break; 5554 default: 5555 break; 5556 } 5557 5558 5559 /* 5560 * We can only optimize parent->core.live_zero for live chains. 5561 */ 5562 if (i == count && parent->core.live_zero < count) { 5563 i = parent->core.live_zero++; 5564 base[i] = *elm; 5565 return; 5566 } 5567 5568 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 5569 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 5570 hammer2_spin_unex(&parent->core.spin); 5571 panic("insert base %p overlapping elements at %d elm %p\n", 5572 base, i, elm); 5573 } 5574 5575 /* 5576 * Try to find an empty slot before or after. 5577 */ 5578 j = i; 5579 k = i; 5580 while (j > 0 || k < count) { 5581 --j; 5582 if (j >= 0 && base[j].type == HAMMER2_BREF_TYPE_EMPTY) { 5583 if (j == i - 1) { 5584 base[j] = *elm; 5585 } else { 5586 bcopy(&base[j+1], &base[j], 5587 (i - j - 1) * sizeof(*base)); 5588 base[i - 1] = *elm; 5589 } 5590 goto validate; 5591 } 5592 ++k; 5593 if (k < count && base[k].type == HAMMER2_BREF_TYPE_EMPTY) { 5594 bcopy(&base[i], &base[i+1], 5595 (k - i) * sizeof(hammer2_blockref_t)); 5596 base[i] = *elm; 5597 5598 /* 5599 * We can only update parent->core.live_zero for live 5600 * chains. 5601 */ 5602 if (parent->core.live_zero <= k) 5603 parent->core.live_zero = k + 1; 5604 u = 2; 5605 goto validate; 5606 } 5607 } 5608 panic("hammer2_base_insert: no room!"); 5609 5610 /* 5611 * Debugging 5612 */ 5613 validate: 5614 key_next = 0; 5615 for (l = 0; l < count; ++l) { 5616 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5617 key_next = base[l].key + 5618 ((hammer2_key_t)1 << base[l].keybits) - 1; 5619 break; 5620 } 5621 } 5622 while (++l < count) { 5623 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5624 if (base[l].key <= key_next) 5625 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 5626 key_next = base[l].key + 5627 ((hammer2_key_t)1 << base[l].keybits) - 1; 5628 5629 } 5630 } 5631 5632 } 5633 5634 #if 0 5635 5636 /* 5637 * Sort the blockref array for the chain. Used by the flush code to 5638 * sort the blockref[] array. 5639 * 5640 * The chain must be exclusively locked AND spin-locked. 5641 */ 5642 typedef hammer2_blockref_t *hammer2_blockref_p; 5643 5644 static 5645 int 5646 hammer2_base_sort_callback(const void *v1, const void *v2) 5647 { 5648 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 5649 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 5650 5651 /* 5652 * Make sure empty elements are placed at the end of the array 5653 */ 5654 if (bref1->type == HAMMER2_BREF_TYPE_EMPTY) { 5655 if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) 5656 return(0); 5657 return(1); 5658 } else if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) { 5659 return(-1); 5660 } 5661 5662 /* 5663 * Sort by key 5664 */ 5665 if (bref1->key < bref2->key) 5666 return(-1); 5667 if (bref1->key > bref2->key) 5668 return(1); 5669 return(0); 5670 } 5671 5672 void 5673 hammer2_base_sort(hammer2_chain_t *chain) 5674 { 5675 hammer2_blockref_t *base; 5676 int count; 5677 5678 switch(chain->bref.type) { 5679 case HAMMER2_BREF_TYPE_INODE: 5680 /* 5681 * Special shortcut for embedded data returns the inode 5682 * itself. Callers must detect this condition and access 5683 * the embedded data (the strategy code does this for us). 5684 * 5685 * This is only applicable to regular files and softlinks. 5686 */ 5687 if (chain->data->ipdata.meta.op_flags & 5688 HAMMER2_OPFLAG_DIRECTDATA) { 5689 return; 5690 } 5691 base = &chain->data->ipdata.u.blockset.blockref[0]; 5692 count = HAMMER2_SET_COUNT; 5693 break; 5694 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 5695 case HAMMER2_BREF_TYPE_INDIRECT: 5696 /* 5697 * Optimize indirect blocks in the INITIAL state to avoid 5698 * I/O. 5699 */ 5700 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 5701 base = &chain->data->npdata[0]; 5702 count = chain->bytes / sizeof(hammer2_blockref_t); 5703 break; 5704 case HAMMER2_BREF_TYPE_VOLUME: 5705 base = &chain->data->voldata.sroot_blockset.blockref[0]; 5706 count = HAMMER2_SET_COUNT; 5707 break; 5708 case HAMMER2_BREF_TYPE_FREEMAP: 5709 base = &chain->data->blkset.blockref[0]; 5710 count = HAMMER2_SET_COUNT; 5711 break; 5712 default: 5713 kprintf("hammer2_chain_lookup: unrecognized " 5714 "blockref(A) type: %d", 5715 chain->bref.type); 5716 while (1) 5717 tsleep(&base, 0, "dead", 0); 5718 panic("hammer2_base_sort: unrecognized " 5719 "blockref(A) type: %d", 5720 chain->bref.type); 5721 base = NULL; /* safety */ 5722 count = 0; /* safety */ 5723 } 5724 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 5725 } 5726 5727 #endif 5728 5729 /* 5730 * Chain memory management 5731 */ 5732 void 5733 hammer2_chain_wait(hammer2_chain_t *chain) 5734 { 5735 tsleep(chain, 0, "chnflw", 1); 5736 } 5737 5738 const hammer2_media_data_t * 5739 hammer2_chain_rdata(hammer2_chain_t *chain) 5740 { 5741 KKASSERT(chain->data != NULL); 5742 return (chain->data); 5743 } 5744 5745 hammer2_media_data_t * 5746 hammer2_chain_wdata(hammer2_chain_t *chain) 5747 { 5748 KKASSERT(chain->data != NULL); 5749 return (chain->data); 5750 } 5751 5752 /* 5753 * Set the check data for a chain. This can be a heavy-weight operation 5754 * and typically only runs on-flush. For file data check data is calculated 5755 * when the logical buffers are flushed. 5756 */ 5757 void 5758 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 5759 { 5760 chain->bref.flags &= ~HAMMER2_BREF_FLAG_ZERO; 5761 5762 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5763 case HAMMER2_CHECK_NONE: 5764 break; 5765 case HAMMER2_CHECK_DISABLED: 5766 break; 5767 case HAMMER2_CHECK_ISCSI32: 5768 chain->bref.check.iscsi32.value = 5769 hammer2_icrc32(bdata, chain->bytes); 5770 break; 5771 case HAMMER2_CHECK_XXHASH64: 5772 chain->bref.check.xxhash64.value = 5773 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5774 break; 5775 case HAMMER2_CHECK_SHA192: 5776 { 5777 SHA256_CTX hash_ctx; 5778 union { 5779 uint8_t digest[SHA256_DIGEST_LENGTH]; 5780 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5781 } u; 5782 5783 SHA256_Init(&hash_ctx); 5784 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5785 SHA256_Final(u.digest, &hash_ctx); 5786 u.digest64[2] ^= u.digest64[3]; 5787 bcopy(u.digest, 5788 chain->bref.check.sha192.data, 5789 sizeof(chain->bref.check.sha192.data)); 5790 } 5791 break; 5792 case HAMMER2_CHECK_FREEMAP: 5793 chain->bref.check.freemap.icrc32 = 5794 hammer2_icrc32(bdata, chain->bytes); 5795 break; 5796 default: 5797 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5798 chain->bref.methods); 5799 break; 5800 } 5801 } 5802 5803 /* 5804 * Characterize a failed check code and try to trace back to the inode. 5805 */ 5806 static void 5807 hammer2_characterize_failed_chain(hammer2_chain_t *chain, uint64_t check, 5808 int bits) 5809 { 5810 hammer2_chain_t *lchain; 5811 hammer2_chain_t *ochain; 5812 5813 kprintf("chain %016jx.%02x (%s) meth=%02x CHECK FAIL " 5814 "(flags=%08x, bref/data ", 5815 chain->bref.data_off, 5816 chain->bref.type, 5817 hammer2_bref_type_str(&chain->bref), 5818 chain->bref.methods, 5819 chain->flags); 5820 if (bits == 32) { 5821 kprintf("%08x/%08x)\n", 5822 chain->bref.check.iscsi32.value, 5823 (uint32_t)check); 5824 } else { 5825 kprintf("%016jx/%016jx)\n", 5826 chain->bref.check.xxhash64.value, 5827 check); 5828 } 5829 5830 /* 5831 * Run up the chains to try to find the governing inode so we 5832 * can report it. 5833 * 5834 * XXX This error reporting is not really MPSAFE 5835 */ 5836 ochain = chain; 5837 lchain = chain; 5838 while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE) { 5839 lchain = chain; 5840 chain = chain->parent; 5841 } 5842 5843 if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5844 ((chain->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) == 0 || 5845 (lchain->bref.key & HAMMER2_DIRHASH_VISIBLE))) { 5846 kprintf(" Resides at/in inode %ld\n", 5847 chain->bref.key); 5848 } else if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5849 kprintf(" Resides in inode index - CRITICAL!!!\n"); 5850 } else { 5851 kprintf(" Resides in root index - CRITICAL!!!\n"); 5852 } 5853 if (ochain->hmp) { 5854 const char *pfsname = "UNKNOWN"; 5855 int i; 5856 5857 if (ochain->pmp) { 5858 for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) { 5859 if (ochain->pmp->pfs_hmps[i] == ochain->hmp && 5860 ochain->pmp->pfs_names[i]) { 5861 pfsname = ochain->pmp->pfs_names[i]; 5862 break; 5863 } 5864 } 5865 } 5866 kprintf(" In pfs %s on device %s\n", 5867 pfsname, ochain->hmp->devrepname); 5868 } 5869 } 5870 5871 int 5872 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 5873 { 5874 uint32_t check32; 5875 uint64_t check64; 5876 int r; 5877 5878 if (chain->bref.flags & HAMMER2_BREF_FLAG_ZERO) 5879 return 1; 5880 5881 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5882 case HAMMER2_CHECK_NONE: 5883 r = 1; 5884 break; 5885 case HAMMER2_CHECK_DISABLED: 5886 r = 1; 5887 break; 5888 case HAMMER2_CHECK_ISCSI32: 5889 check32 = hammer2_icrc32(bdata, chain->bytes); 5890 r = (chain->bref.check.iscsi32.value == check32); 5891 if (r == 0) { 5892 hammer2_characterize_failed_chain(chain, check32, 32); 5893 } 5894 hammer2_process_icrc32 += chain->bytes; 5895 break; 5896 case HAMMER2_CHECK_XXHASH64: 5897 check64 = XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5898 r = (chain->bref.check.xxhash64.value == check64); 5899 if (r == 0) { 5900 hammer2_characterize_failed_chain(chain, check64, 64); 5901 } 5902 hammer2_process_xxhash64 += chain->bytes; 5903 break; 5904 case HAMMER2_CHECK_SHA192: 5905 { 5906 SHA256_CTX hash_ctx; 5907 union { 5908 uint8_t digest[SHA256_DIGEST_LENGTH]; 5909 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5910 } u; 5911 5912 SHA256_Init(&hash_ctx); 5913 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5914 SHA256_Final(u.digest, &hash_ctx); 5915 u.digest64[2] ^= u.digest64[3]; 5916 if (bcmp(u.digest, 5917 chain->bref.check.sha192.data, 5918 sizeof(chain->bref.check.sha192.data)) == 0) { 5919 r = 1; 5920 } else { 5921 r = 0; 5922 kprintf("chain %016jx.%02x meth=%02x " 5923 "CHECK FAIL\n", 5924 chain->bref.data_off, 5925 chain->bref.type, 5926 chain->bref.methods); 5927 } 5928 } 5929 break; 5930 case HAMMER2_CHECK_FREEMAP: 5931 r = (chain->bref.check.freemap.icrc32 == 5932 hammer2_icrc32(bdata, chain->bytes)); 5933 if (r == 0) { 5934 kprintf("chain %016jx.%02x meth=%02x " 5935 "CHECK FAIL\n", 5936 chain->bref.data_off, 5937 chain->bref.type, 5938 chain->bref.methods); 5939 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 5940 chain->bref.check.freemap.icrc32, 5941 hammer2_icrc32(bdata, chain->bytes), 5942 chain->bytes); 5943 if (chain->dio) 5944 kprintf("dio %p buf %016jx,%d bdata %p/%p\n", 5945 chain->dio, chain->dio->bp->b_loffset, 5946 chain->dio->bp->b_bufsize, bdata, 5947 chain->dio->bp->b_data); 5948 } 5949 5950 break; 5951 default: 5952 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5953 chain->bref.methods); 5954 r = 1; 5955 break; 5956 } 5957 return r; 5958 } 5959 5960 /* 5961 * Acquire the chain and parent representing the specified inode for the 5962 * device at the specified cluster index. 5963 * 5964 * The flags passed in are LOOKUP flags, not RESOLVE flags. 5965 * 5966 * If we are unable to locate the inode, HAMMER2_ERROR_EIO is returned and 5967 * *chainp will be NULL. *parentp may still be set error or not, or NULL 5968 * if the parent itself could not be resolved. 5969 * 5970 * The caller may pass-in a locked *parentp and/or *chainp, or neither. 5971 * They will be unlocked and released by this function. The *parentp and 5972 * *chainp representing the located inode are returned locked. 5973 */ 5974 int 5975 hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum, 5976 int clindex, int flags, 5977 hammer2_chain_t **parentp, hammer2_chain_t **chainp) 5978 { 5979 hammer2_chain_t *parent; 5980 hammer2_chain_t *rchain; 5981 hammer2_key_t key_dummy; 5982 hammer2_inode_t *ip; 5983 int resolve_flags; 5984 int error; 5985 5986 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 5987 HAMMER2_RESOLVE_SHARED : 0; 5988 5989 /* 5990 * Caller expects us to replace these. 5991 */ 5992 if (*chainp) { 5993 hammer2_chain_unlock(*chainp); 5994 hammer2_chain_drop(*chainp); 5995 *chainp = NULL; 5996 } 5997 if (*parentp) { 5998 hammer2_chain_unlock(*parentp); 5999 hammer2_chain_drop(*parentp); 6000 *parentp = NULL; 6001 } 6002 6003 /* 6004 * Be very careful, this is a backend function and we CANNOT 6005 * lock any frontend inode structure we find. But we have to 6006 * look the inode up this way first in case it exists but is 6007 * detached from the radix tree. 6008 */ 6009 ip = hammer2_inode_lookup(pmp, inum); 6010 if (ip) { 6011 *chainp = hammer2_inode_chain_and_parent(ip, clindex, 6012 parentp, 6013 resolve_flags); 6014 hammer2_inode_drop(ip); 6015 if (*chainp) 6016 return 0; 6017 hammer2_chain_unlock(*chainp); 6018 hammer2_chain_drop(*chainp); 6019 *chainp = NULL; 6020 if (*parentp) { 6021 hammer2_chain_unlock(*parentp); 6022 hammer2_chain_drop(*parentp); 6023 *parentp = NULL; 6024 } 6025 } 6026 6027 /* 6028 * Inodes hang off of the iroot (bit 63 is clear, differentiating 6029 * inodes from root directory entries in the key lookup). 6030 */ 6031 parent = hammer2_inode_chain(pmp->iroot, clindex, resolve_flags); 6032 rchain = NULL; 6033 if (parent) { 6034 rchain = hammer2_chain_lookup(&parent, &key_dummy, 6035 inum, inum, 6036 &error, flags); 6037 } else { 6038 error = HAMMER2_ERROR_EIO; 6039 } 6040 *parentp = parent; 6041 *chainp = rchain; 6042 6043 return error; 6044 } 6045 6046 /* 6047 * Used by the bulkscan code to snapshot the synchronized storage for 6048 * a volume, allowing it to be scanned concurrently against normal 6049 * operation. 6050 */ 6051 hammer2_chain_t * 6052 hammer2_chain_bulksnap(hammer2_dev_t *hmp) 6053 { 6054 hammer2_chain_t *copy; 6055 6056 copy = hammer2_chain_alloc(hmp, hmp->spmp, &hmp->vchain.bref); 6057 copy->data = kmalloc(sizeof(copy->data->voldata), 6058 hmp->mchain, 6059 M_WAITOK | M_ZERO); 6060 hammer2_voldata_lock(hmp); 6061 copy->data->voldata = hmp->volsync; 6062 hammer2_voldata_unlock(hmp); 6063 6064 return copy; 6065 } 6066 6067 void 6068 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 6069 { 6070 KKASSERT(copy->bref.type == HAMMER2_BREF_TYPE_VOLUME); 6071 KKASSERT(copy->data); 6072 kfree(copy->data, copy->hmp->mchain); 6073 copy->data = NULL; 6074 atomic_add_long(&hammer2_chain_allocs, -1); 6075 hammer2_chain_drop(copy); 6076 } 6077 6078 /* 6079 * Returns non-zero if the chain (INODE or DIRENT) matches the 6080 * filename. 6081 */ 6082 int 6083 hammer2_chain_dirent_test(hammer2_chain_t *chain, const char *name, 6084 size_t name_len) 6085 { 6086 const hammer2_inode_data_t *ripdata; 6087 const hammer2_dirent_head_t *den; 6088 6089 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 6090 ripdata = &chain->data->ipdata; 6091 if (ripdata->meta.name_len == name_len && 6092 bcmp(ripdata->filename, name, name_len) == 0) { 6093 return 1; 6094 } 6095 } 6096 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT && 6097 chain->bref.embed.dirent.namlen == name_len) { 6098 den = &chain->bref.embed.dirent; 6099 if (name_len > sizeof(chain->bref.check.buf) && 6100 bcmp(chain->data->buf, name, name_len) == 0) { 6101 return 1; 6102 } 6103 if (name_len <= sizeof(chain->bref.check.buf) && 6104 bcmp(chain->bref.check.buf, name, name_len) == 0) { 6105 return 1; 6106 } 6107 } 6108 return 0; 6109 } 6110