1 /* 2 * Copyright (c) 2011-2020 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/buf.h> 61 62 #include <crypto/sha2/sha2.h> 63 64 #include "hammer2.h" 65 66 static hammer2_chain_t *hammer2_chain_create_indirect( 67 hammer2_chain_t *parent, 68 hammer2_key_t key, int keybits, 69 hammer2_tid_t mtid, int for_type, int *errorp); 70 static int hammer2_chain_delete_obref(hammer2_chain_t *parent, 71 hammer2_chain_t *chain, 72 hammer2_tid_t mtid, int flags, 73 hammer2_blockref_t *obref); 74 static hammer2_chain_t *hammer2_combined_find( 75 hammer2_chain_t *parent, 76 hammer2_blockref_t *base, int count, 77 hammer2_key_t *key_nextp, 78 hammer2_key_t key_beg, hammer2_key_t key_end, 79 hammer2_blockref_t **brefp); 80 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 81 int depth); 82 /* 83 * There are many degenerate situations where an extreme rate of console 84 * output can occur from warnings and errors. Make sure this output does 85 * not impede operations. 86 */ 87 static struct krate krate_h2chk = { .freq = 5 }; 88 static struct krate krate_h2me = { .freq = 1 }; 89 static struct krate krate_h2em = { .freq = 1 }; 90 91 /* 92 * Basic RBTree for chains (core.rbtree). 93 */ 94 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 95 96 int 97 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 98 { 99 hammer2_key_t c1_beg; 100 hammer2_key_t c1_end; 101 hammer2_key_t c2_beg; 102 hammer2_key_t c2_end; 103 104 /* 105 * Compare chains. Overlaps are not supposed to happen and catch 106 * any software issues early we count overlaps as a match. 107 */ 108 c1_beg = chain1->bref.key; 109 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 110 c2_beg = chain2->bref.key; 111 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 112 113 if (c1_end < c2_beg) /* fully to the left */ 114 return(-1); 115 if (c1_beg > c2_end) /* fully to the right */ 116 return(1); 117 return(0); /* overlap (must not cross edge boundary) */ 118 } 119 120 /* 121 * Assert that a chain has no media data associated with it. 122 */ 123 static __inline void 124 hammer2_chain_assert_no_data(hammer2_chain_t *chain) 125 { 126 KKASSERT(chain->dio == NULL); 127 if (chain->bref.type != HAMMER2_BREF_TYPE_VOLUME && 128 chain->bref.type != HAMMER2_BREF_TYPE_FREEMAP && 129 chain->data) { 130 panic("hammer2_chain_assert_no_data: chain %p still has data", 131 chain); 132 } 133 } 134 135 /* 136 * Make a chain visible to the flusher. The flusher operates using a top-down 137 * recursion based on the ONFLUSH flag. It locates MODIFIED and UPDATE chains, 138 * flushes them, and updates blocks back to the volume root. 139 * 140 * This routine sets the ONFLUSH flag upward from the triggering chain until 141 * it hits an inode root or the volume root. Inode chains serve as inflection 142 * points, requiring the flusher to bridge across trees. Inodes include 143 * regular inodes, PFS roots (pmp->iroot), and the media super root 144 * (spmp->iroot). 145 */ 146 void 147 hammer2_chain_setflush(hammer2_chain_t *chain) 148 { 149 hammer2_chain_t *parent; 150 151 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 152 hammer2_spin_sh(&chain->core.spin); 153 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 154 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 155 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 156 break; 157 if ((parent = chain->parent) == NULL) 158 break; 159 hammer2_spin_sh(&parent->core.spin); 160 hammer2_spin_unsh(&chain->core.spin); 161 chain = parent; 162 } 163 hammer2_spin_unsh(&chain->core.spin); 164 } 165 } 166 167 /* 168 * Allocate a new disconnected chain element representing the specified 169 * bref. chain->refs is set to 1 and the passed bref is copied to 170 * chain->bref. chain->bytes is derived from the bref. 171 * 172 * chain->pmp inherits pmp unless the chain is an inode (other than the 173 * super-root inode). 174 * 175 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 176 */ 177 hammer2_chain_t * 178 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 179 hammer2_blockref_t *bref) 180 { 181 hammer2_chain_t *chain; 182 u_int bytes; 183 184 /* 185 * Special case - radix of 0 indicates a chain that does not 186 * need a data reference (context is completely embedded in the 187 * bref). 188 */ 189 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 190 bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 191 else 192 bytes = 0; 193 194 switch(bref->type) { 195 case HAMMER2_BREF_TYPE_INODE: 196 case HAMMER2_BREF_TYPE_INDIRECT: 197 case HAMMER2_BREF_TYPE_DATA: 198 case HAMMER2_BREF_TYPE_DIRENT: 199 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 200 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 201 case HAMMER2_BREF_TYPE_FREEMAP: 202 case HAMMER2_BREF_TYPE_VOLUME: 203 chain = kmalloc_obj(sizeof(*chain), hmp->mchain, 204 M_WAITOK | M_ZERO); 205 atomic_add_long(&hammer2_chain_allocs, 1); 206 break; 207 case HAMMER2_BREF_TYPE_EMPTY: 208 default: 209 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 210 bref->type); 211 break; 212 } 213 214 /* 215 * Initialize the new chain structure. pmp must be set to NULL for 216 * chains belonging to the super-root topology of a device mount. 217 */ 218 if (pmp == hmp->spmp) 219 chain->pmp = NULL; 220 else 221 chain->pmp = pmp; 222 223 chain->hmp = hmp; 224 chain->bref = *bref; 225 chain->bytes = bytes; 226 chain->refs = 1; 227 chain->flags = HAMMER2_CHAIN_ALLOCATED; 228 229 /* 230 * Set the PFS boundary flag if this chain represents a PFS root. 231 */ 232 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 233 atomic_set_int(&chain->flags, HAMMER2_CHAIN_PFSBOUNDARY); 234 hammer2_chain_init(chain); 235 236 return (chain); 237 } 238 239 /* 240 * A common function to initialize chains including fchain and vchain. 241 */ 242 void 243 hammer2_chain_init(hammer2_chain_t *chain) 244 { 245 RB_INIT(&chain->core.rbtree); /* live chains */ 246 hammer2_mtx_init(&chain->lock, "h2chain"); 247 hammer2_spin_init(&chain->core.spin, "h2chain"); 248 lockinit(&chain->diolk, "chdio", 0, 0); 249 } 250 251 /* 252 * Add a reference to a chain element, preventing its destruction. 253 * Undone via hammer2_chain_drop() 254 * 255 * (can be called with spinlock held) 256 */ 257 void 258 hammer2_chain_ref(hammer2_chain_t *chain) 259 { 260 if (atomic_fetchadd_int(&chain->refs, 1) == 0) { 261 /* NOP */ 262 } 263 } 264 265 /* 266 * Ref a locked chain and force the data to be held across an unlock. 267 * Chain must be currently locked. The user of the chain who desires 268 * to release the hold must call hammer2_chain_lock_unhold() to relock 269 * and unhold the chain, then unlock normally, or may simply call 270 * hammer2_chain_drop_unhold() (which is safer against deadlocks). 271 */ 272 void 273 hammer2_chain_ref_hold(hammer2_chain_t *chain) 274 { 275 atomic_add_int(&chain->lockcnt, 1); 276 hammer2_chain_ref(chain); 277 } 278 279 /* 280 * Insert the chain in the core rbtree. 281 * 282 * Normal insertions are placed in the live rbtree. Insertion of a deleted 283 * chain is a special case used by the flush code that is placed on the 284 * unstaged deleted list to avoid confusing the live view. 285 */ 286 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 287 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 288 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 289 290 static 291 int 292 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 293 int flags, int generation) 294 { 295 hammer2_chain_t *xchain __debugvar; 296 int error = 0; 297 298 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 299 hammer2_spin_ex(&parent->core.spin); 300 301 /* 302 * Interlocked by spinlock, check for race 303 */ 304 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 305 parent->core.generation != generation) { 306 error = HAMMER2_ERROR_EAGAIN; 307 goto failed; 308 } 309 310 /* 311 * Insert chain 312 */ 313 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 314 KASSERT(xchain == NULL, 315 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 316 chain, xchain, chain->bref.key)); 317 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 318 chain->parent = parent; 319 ++parent->core.chain_count; 320 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 321 322 /* 323 * We have to keep track of the effective live-view blockref count 324 * so the create code knows when to push an indirect block. 325 */ 326 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 327 atomic_add_int(&parent->core.live_count, 1); 328 failed: 329 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 330 hammer2_spin_unex(&parent->core.spin); 331 return error; 332 } 333 334 /* 335 * Drop the caller's reference to the chain. When the ref count drops to 336 * zero this function will try to disassociate the chain from its parent and 337 * deallocate it, then recursely drop the parent using the implied ref 338 * from the chain's chain->parent. 339 * 340 * Nobody should own chain's mutex on the 1->0 transition, unless this drop 341 * races an acquisition by another cpu. Therefore we can loop if we are 342 * unable to acquire the mutex, and refs is unlikely to be 1 unless we again 343 * race against another drop. 344 */ 345 void 346 hammer2_chain_drop(hammer2_chain_t *chain) 347 { 348 u_int refs; 349 350 KKASSERT(chain->refs > 0); 351 352 while (chain) { 353 refs = chain->refs; 354 cpu_ccfence(); 355 KKASSERT(refs > 0); 356 357 if (refs == 1) { 358 if (hammer2_mtx_ex_try(&chain->lock) == 0) 359 chain = hammer2_chain_lastdrop(chain, 0); 360 /* retry the same chain, or chain from lastdrop */ 361 } else { 362 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 363 break; 364 /* retry the same chain */ 365 } 366 cpu_pause(); 367 } 368 } 369 370 /* 371 * Unhold a held and probably not-locked chain, ensure that the data is 372 * dropped on the 1->0 transition of lockcnt by obtaining an exclusive 373 * lock and then simply unlocking the chain. 374 */ 375 void 376 hammer2_chain_unhold(hammer2_chain_t *chain) 377 { 378 u_int lockcnt; 379 int iter = 0; 380 381 for (;;) { 382 lockcnt = chain->lockcnt; 383 cpu_ccfence(); 384 if (lockcnt > 1) { 385 if (atomic_cmpset_int(&chain->lockcnt, 386 lockcnt, lockcnt - 1)) { 387 break; 388 } 389 } else if (hammer2_mtx_ex_try(&chain->lock) == 0) { 390 hammer2_chain_unlock(chain); 391 break; 392 } else { 393 /* 394 * This situation can easily occur on SMP due to 395 * the gap inbetween the 1->0 transition and the 396 * final unlock. We cannot safely block on the 397 * mutex because lockcnt might go above 1. 398 * 399 * XXX Sleep for one tick if it takes too long. 400 */ 401 if (++iter > 1000) { 402 if (iter > 1000 + hz) { 403 kprintf("hammer2: h2race1 %p\n", chain); 404 iter = 1000; 405 } 406 tsleep(&iter, 0, "h2race1", 1); 407 } 408 cpu_pause(); 409 } 410 } 411 } 412 413 void 414 hammer2_chain_drop_unhold(hammer2_chain_t *chain) 415 { 416 hammer2_chain_unhold(chain); 417 hammer2_chain_drop(chain); 418 } 419 420 void 421 hammer2_chain_rehold(hammer2_chain_t *chain) 422 { 423 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED); 424 atomic_add_int(&chain->lockcnt, 1); 425 hammer2_chain_unlock(chain); 426 } 427 428 /* 429 * Handles the (potential) last drop of chain->refs from 1->0. Called with 430 * the mutex exclusively locked, refs == 1, and lockcnt 0. SMP races are 431 * possible against refs and lockcnt. We must dispose of the mutex on chain. 432 * 433 * This function returns an unlocked chain for recursive drop or NULL. It 434 * can return the same chain if it determines it has raced another ref. 435 * 436 * -- 437 * 438 * When two chains need to be recursively dropped we use the chain we 439 * would otherwise free to placehold the additional chain. It's a bit 440 * convoluted but we can't just recurse without potentially blowing out 441 * the kernel stack. 442 * 443 * The chain cannot be freed if it has any children. 444 * The chain cannot be freed if flagged MODIFIED unless we can dispose of it. 445 * The chain cannot be freed if flagged UPDATE unless we can dispose of it. 446 * Any dedup registration can remain intact. 447 * 448 * The core spinlock is allowed to nest child-to-parent (not parent-to-child). 449 */ 450 static 451 hammer2_chain_t * 452 hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth) 453 { 454 hammer2_dev_t *hmp; 455 hammer2_chain_t *parent; 456 hammer2_chain_t *rdrop; 457 458 /* 459 * We need chain's spinlock to interlock the sub-tree test. 460 * We already have chain's mutex, protecting chain->parent. 461 * 462 * Remember that chain->refs can be in flux. 463 */ 464 hammer2_spin_ex(&chain->core.spin); 465 466 if (chain->parent != NULL) { 467 /* 468 * If the chain has a parent the UPDATE bit prevents scrapping 469 * as the chain is needed to properly flush the parent. Try 470 * to complete the 1->0 transition and return NULL. Retry 471 * (return chain) if we are unable to complete the 1->0 472 * transition, else return NULL (nothing more to do). 473 * 474 * If the chain has a parent the MODIFIED bit prevents 475 * scrapping. 476 */ 477 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 478 HAMMER2_CHAIN_MODIFIED)) { 479 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 480 hammer2_spin_unex(&chain->core.spin); 481 hammer2_chain_assert_no_data(chain); 482 hammer2_mtx_unlock(&chain->lock); 483 chain = NULL; 484 } else { 485 hammer2_spin_unex(&chain->core.spin); 486 hammer2_mtx_unlock(&chain->lock); 487 } 488 return (chain); 489 } 490 /* spinlock still held */ 491 } else if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME || 492 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP) { 493 /* 494 * Retain the static vchain and fchain. Clear bits that 495 * are not relevant. Do not clear the MODIFIED bit, 496 * and certainly do not put it on the delayed-flush queue. 497 */ 498 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 499 } else { 500 /* 501 * The chain has no parent and can be flagged for destruction. 502 * Since it has no parent, UPDATE can also be cleared. 503 */ 504 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 505 if (chain->flags & HAMMER2_CHAIN_UPDATE) 506 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 507 508 /* 509 * If the chain has children we must propagate the DESTROY 510 * flag downward and rip the disconnected topology apart. 511 * This is accomplished by calling hammer2_flush() on the 512 * chain. 513 * 514 * Any dedup is already handled by the underlying DIO, so 515 * we do not have to specifically flush it here. 516 */ 517 if (chain->core.chain_count) { 518 hammer2_spin_unex(&chain->core.spin); 519 hammer2_flush(chain, HAMMER2_FLUSH_TOP | 520 HAMMER2_FLUSH_ALL); 521 hammer2_mtx_unlock(&chain->lock); 522 523 return(chain); /* retry drop */ 524 } 525 526 /* 527 * Otherwise we can scrap the MODIFIED bit if it is set, 528 * and continue along the freeing path. 529 * 530 * Be sure to clean-out any dedup bits. Without a parent 531 * this chain will no longer be visible to the flush code. 532 * Easy check data_off to avoid the volume root. 533 */ 534 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 535 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 536 atomic_add_long(&hammer2_count_modified_chains, -1); 537 if (chain->pmp) 538 hammer2_pfs_memory_wakeup(chain->pmp, -1); 539 } 540 /* spinlock still held */ 541 } 542 543 /* spinlock still held */ 544 545 /* 546 * If any children exist we must leave the chain intact with refs == 0. 547 * They exist because chains are retained below us which have refs or 548 * may require flushing. 549 * 550 * Retry (return chain) if we fail to transition the refs to 0, else 551 * return NULL indication nothing more to do. 552 */ 553 if (chain->core.chain_count) { 554 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 555 hammer2_spin_unex(&chain->core.spin); 556 hammer2_chain_assert_no_data(chain); 557 hammer2_mtx_unlock(&chain->lock); 558 chain = NULL; 559 } else { 560 hammer2_spin_unex(&chain->core.spin); 561 hammer2_mtx_unlock(&chain->lock); 562 } 563 return (chain); 564 } 565 /* spinlock still held */ 566 /* no chains left under us */ 567 568 /* 569 * chain->core has no children left so no accessors can get to our 570 * chain from there. Now we have to lock the parent core to interlock 571 * remaining possible accessors that might bump chain's refs before 572 * we can safely drop chain's refs with intent to free the chain. 573 */ 574 hmp = chain->hmp; 575 rdrop = NULL; 576 577 parent = chain->parent; 578 579 /* 580 * WARNING! chain's spin lock is still held here, and other spinlocks 581 * will be acquired and released in the code below. We 582 * cannot be making fancy procedure calls! 583 */ 584 585 /* 586 * Spinlock the parent and try to drop the last ref on chain. 587 * On success determine if we should dispose of the chain 588 * (remove the chain from its parent, etc). 589 * 590 * (normal core locks are top-down recursive but we define 591 * core spinlocks as bottom-up recursive, so this is safe). 592 */ 593 if (parent) { 594 hammer2_spin_ex(&parent->core.spin); 595 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 596 /* 597 * 1->0 transition failed, retry. 598 */ 599 hammer2_spin_unex(&parent->core.spin); 600 hammer2_spin_unex(&chain->core.spin); 601 hammer2_mtx_unlock(&chain->lock); 602 603 return(chain); 604 } 605 606 /* 607 * 1->0 transition successful, parent spin held to prevent 608 * new lookups, chain spinlock held to protect parent field. 609 * Remove chain from the parent. 610 * 611 * If the chain is being removed from the parent's rbtree but 612 * is not blkmapped, we have to adjust live_count downward. If 613 * it is blkmapped then the blockref is retained in the parent 614 * as is its associated live_count. This case can occur when 615 * a chain added to the topology is unable to flush and is 616 * then later deleted. 617 */ 618 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 619 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) && 620 (chain->flags & HAMMER2_CHAIN_BLKMAPPED) == 0) { 621 atomic_add_int(&parent->core.live_count, -1); 622 } 623 RB_REMOVE(hammer2_chain_tree, 624 &parent->core.rbtree, chain); 625 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 626 --parent->core.chain_count; 627 chain->parent = NULL; 628 } 629 630 /* 631 * If our chain was the last chain in the parent's core the 632 * core is now empty and its parent might have to be 633 * re-dropped if it has 0 refs. 634 */ 635 if (parent->core.chain_count == 0) { 636 rdrop = parent; 637 atomic_add_int(&rdrop->refs, 1); 638 /* 639 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 640 rdrop = NULL; 641 */ 642 } 643 hammer2_spin_unex(&parent->core.spin); 644 parent = NULL; /* safety */ 645 /* FALL THROUGH */ 646 } else { 647 /* 648 * No-parent case. 649 */ 650 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 651 /* 652 * 1->0 transition failed, retry. 653 */ 654 hammer2_spin_unex(&parent->core.spin); 655 hammer2_spin_unex(&chain->core.spin); 656 hammer2_mtx_unlock(&chain->lock); 657 658 return(chain); 659 } 660 } 661 662 /* 663 * Successful 1->0 transition, no parent, no children... no way for 664 * anyone to ref this chain any more. We can clean-up and free it. 665 * 666 * We still have the core spinlock, and core's chain_count is 0. 667 * Any parent spinlock is gone. 668 */ 669 hammer2_spin_unex(&chain->core.spin); 670 hammer2_chain_assert_no_data(chain); 671 hammer2_mtx_unlock(&chain->lock); 672 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 673 chain->core.chain_count == 0); 674 675 /* 676 * All locks are gone, no pointers remain to the chain, finish 677 * freeing it. 678 */ 679 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 680 HAMMER2_CHAIN_MODIFIED)) == 0); 681 682 /* 683 * Once chain resources are gone we can use the now dead chain 684 * structure to placehold what might otherwise require a recursive 685 * drop, because we have potentially two things to drop and can only 686 * return one directly. 687 */ 688 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 689 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED); 690 chain->hmp = NULL; 691 kfree_obj(chain, hmp->mchain); 692 atomic_add_long(&hammer2_chain_allocs, -1); 693 } 694 695 /* 696 * Possible chaining loop when parent re-drop needed. 697 */ 698 return(rdrop); 699 } 700 701 /* 702 * On last lock release. 703 */ 704 static hammer2_io_t * 705 hammer2_chain_drop_data(hammer2_chain_t *chain) 706 { 707 hammer2_io_t *dio; 708 709 if ((dio = chain->dio) != NULL) { 710 chain->dio = NULL; 711 chain->data = NULL; 712 } else { 713 switch(chain->bref.type) { 714 case HAMMER2_BREF_TYPE_VOLUME: 715 case HAMMER2_BREF_TYPE_FREEMAP: 716 break; 717 default: 718 if (chain->data != NULL) { 719 hammer2_spin_unex(&chain->core.spin); 720 panic("chain data not null: " 721 "chain %p bref %016jx.%02x " 722 "refs %d parent %p dio %p data %p", 723 chain, chain->bref.data_off, 724 chain->bref.type, chain->refs, 725 chain->parent, 726 chain->dio, chain->data); 727 } 728 KKASSERT(chain->data == NULL); 729 break; 730 } 731 } 732 return dio; 733 } 734 735 /* 736 * Lock a referenced chain element, acquiring its data with I/O if necessary, 737 * and specify how you would like the data to be resolved. 738 * 739 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 740 * 741 * The lock is allowed to recurse, multiple locking ops will aggregate 742 * the requested resolve types. Once data is assigned it will not be 743 * removed until the last unlock. 744 * 745 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 746 * (typically used to avoid device/logical buffer 747 * aliasing for data) 748 * 749 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 750 * the INITIAL-create state (indirect blocks only). 751 * 752 * Do not resolve data elements for DATA chains. 753 * (typically used to avoid device/logical buffer 754 * aliasing for data) 755 * 756 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 757 * 758 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 759 * it will be locked exclusive. 760 * 761 * HAMMER2_RESOLVE_NONBLOCK- (flag) The chain is locked non-blocking. If 762 * the lock fails, EAGAIN is returned. 763 * 764 * NOTE: Embedded elements (volume header, inodes) are always resolved 765 * regardless. 766 * 767 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 768 * element will instantiate and zero its buffer, and flush it on 769 * release. 770 * 771 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 772 * so as not to instantiate a device buffer, which could alias against 773 * a logical file buffer. However, if ALWAYS is specified the 774 * device buffer will be instantiated anyway. 775 * 776 * NOTE: The return value is always 0 unless NONBLOCK is specified, in which 777 * case it can be either 0 or EAGAIN. 778 * 779 * WARNING! This function blocks on I/O if data needs to be fetched. This 780 * blocking can run concurrent with other compatible lock holders 781 * who do not need data returning. The lock is not upgraded to 782 * exclusive during a data fetch, a separate bit is used to 783 * interlock I/O. However, an exclusive lock holder can still count 784 * on being interlocked against an I/O fetch managed by a shared 785 * lock holder. 786 */ 787 int 788 hammer2_chain_lock(hammer2_chain_t *chain, int how) 789 { 790 KKASSERT(chain->refs > 0); 791 792 if (how & HAMMER2_RESOLVE_NONBLOCK) { 793 /* 794 * We still have to bump lockcnt before acquiring the lock, 795 * even for non-blocking operation, because the unlock code 796 * live-loops on lockcnt == 1 when dropping the last lock. 797 * 798 * If the non-blocking operation fails we have to use an 799 * unhold sequence to undo the mess. 800 * 801 * NOTE: LOCKAGAIN must always succeed without blocking, 802 * even if NONBLOCK is specified. 803 */ 804 atomic_add_int(&chain->lockcnt, 1); 805 if (how & HAMMER2_RESOLVE_SHARED) { 806 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 807 hammer2_mtx_sh_again(&chain->lock); 808 } else { 809 if (hammer2_mtx_sh_try(&chain->lock) != 0) { 810 hammer2_chain_unhold(chain); 811 return EAGAIN; 812 } 813 } 814 } else { 815 if (hammer2_mtx_ex_try(&chain->lock) != 0) { 816 hammer2_chain_unhold(chain); 817 return EAGAIN; 818 } 819 } 820 } else { 821 /* 822 * Get the appropriate lock. If LOCKAGAIN is flagged with 823 * SHARED the caller expects a shared lock to already be 824 * present and we are giving it another ref. This case must 825 * importantly not block if there is a pending exclusive lock 826 * request. 827 */ 828 atomic_add_int(&chain->lockcnt, 1); 829 if (how & HAMMER2_RESOLVE_SHARED) { 830 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 831 hammer2_mtx_sh_again(&chain->lock); 832 } else { 833 hammer2_mtx_sh(&chain->lock); 834 } 835 } else { 836 hammer2_mtx_ex(&chain->lock); 837 } 838 } 839 840 /* 841 * If we already have a valid data pointer make sure the data is 842 * synchronized to the current cpu, and then no further action is 843 * necessary. 844 */ 845 if (chain->data) { 846 if (chain->dio) 847 hammer2_io_bkvasync(chain->dio); 848 return 0; 849 } 850 851 /* 852 * Do we have to resolve the data? This is generally only 853 * applicable to HAMMER2_BREF_TYPE_DATA which is special-cased. 854 * Other BREF types expects the data to be there. 855 */ 856 switch(how & HAMMER2_RESOLVE_MASK) { 857 case HAMMER2_RESOLVE_NEVER: 858 return 0; 859 case HAMMER2_RESOLVE_MAYBE: 860 if (chain->flags & HAMMER2_CHAIN_INITIAL) 861 return 0; 862 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 863 return 0; 864 #if 0 865 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 866 return 0; 867 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 868 return 0; 869 #endif 870 /* fall through */ 871 case HAMMER2_RESOLVE_ALWAYS: 872 default: 873 break; 874 } 875 876 /* 877 * Caller requires data 878 */ 879 hammer2_chain_load_data(chain); 880 881 return 0; 882 } 883 884 #if 0 885 /* 886 * Lock the chain, retain the hold, and drop the data persistence count. 887 * The data should remain valid because we never transitioned lockcnt 888 * through 0. 889 */ 890 void 891 hammer2_chain_lock_unhold(hammer2_chain_t *chain, int how) 892 { 893 hammer2_chain_lock(chain, how); 894 atomic_add_int(&chain->lockcnt, -1); 895 } 896 897 /* 898 * Downgrade an exclusive chain lock to a shared chain lock. 899 * 900 * NOTE: There is no upgrade equivalent due to the ease of 901 * deadlocks in that direction. 902 */ 903 void 904 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 905 { 906 hammer2_mtx_downgrade(&chain->lock); 907 } 908 #endif 909 910 /* 911 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 912 * may be of any type. 913 * 914 * Once chain->data is set it cannot be disposed of until all locks are 915 * released. 916 * 917 * Make sure the data is synchronized to the current cpu. 918 */ 919 void 920 hammer2_chain_load_data(hammer2_chain_t *chain) 921 { 922 hammer2_blockref_t *bref; 923 hammer2_dev_t *hmp; 924 hammer2_io_t *dio; 925 char *bdata; 926 int error; 927 928 /* 929 * Degenerate case, data already present, or chain has no media 930 * reference to load. 931 */ 932 KKASSERT(chain->lock.mtx_lock & MTX_MASK); 933 if (chain->data) { 934 if (chain->dio) 935 hammer2_io_bkvasync(chain->dio); 936 return; 937 } 938 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 939 return; 940 941 hmp = chain->hmp; 942 KKASSERT(hmp != NULL); 943 944 /* 945 * Gain the IOINPROG bit, interlocked block. 946 */ 947 for (;;) { 948 u_int oflags; 949 u_int nflags; 950 951 oflags = chain->flags; 952 cpu_ccfence(); 953 if (oflags & HAMMER2_CHAIN_IOINPROG) { 954 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 955 tsleep_interlock(&chain->flags, 0); 956 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 957 tsleep(&chain->flags, PINTERLOCKED, 958 "h2iocw", 0); 959 } 960 /* retry */ 961 } else { 962 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 963 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 964 break; 965 } 966 /* retry */ 967 } 968 } 969 970 /* 971 * We own CHAIN_IOINPROG 972 * 973 * Degenerate case if we raced another load. 974 */ 975 if (chain->data) { 976 if (chain->dio) 977 hammer2_io_bkvasync(chain->dio); 978 goto done; 979 } 980 981 /* 982 * We must resolve to a device buffer, either by issuing I/O or 983 * by creating a zero-fill element. We do not mark the buffer 984 * dirty when creating a zero-fill element (the hammer2_chain_modify() 985 * API must still be used to do that). 986 * 987 * The device buffer is variable-sized in powers of 2 down 988 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 989 * chunk always contains buffers of the same size. (XXX) 990 * 991 * The minimum physical IO size may be larger than the variable 992 * block size. 993 */ 994 bref = &chain->bref; 995 996 /* 997 * The getblk() optimization can only be used on newly created 998 * elements if the physical block size matches the request. 999 */ 1000 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1001 error = hammer2_io_new(hmp, bref->type, 1002 bref->data_off, chain->bytes, 1003 &chain->dio); 1004 } else { 1005 error = hammer2_io_bread(hmp, bref->type, 1006 bref->data_off, chain->bytes, 1007 &chain->dio); 1008 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 1009 } 1010 if (error) { 1011 chain->error = HAMMER2_ERROR_EIO; 1012 kprintf("hammer2_chain_load_data: I/O error %016jx: %d\n", 1013 (intmax_t)bref->data_off, error); 1014 hammer2_io_bqrelse(&chain->dio); 1015 goto done; 1016 } 1017 chain->error = 0; 1018 1019 /* 1020 * This isn't perfect and can be ignored on OSs which do not have 1021 * an indication as to whether a buffer is coming from cache or 1022 * if I/O was actually issued for the read. TESTEDGOOD will work 1023 * pretty well without the B_IOISSUED logic because chains are 1024 * cached, but in that situation (without B_IOISSUED) it will not 1025 * detect whether a re-read via I/O is corrupted verses the original 1026 * read. 1027 * 1028 * We can't re-run the CRC on every fresh lock. That would be 1029 * insanely expensive. 1030 * 1031 * If the underlying kernel buffer covers the entire chain we can 1032 * use the B_IOISSUED indication to determine if we have to re-run 1033 * the CRC on chain data for chains that managed to stay cached 1034 * across the kernel disposal of the original buffer. 1035 */ 1036 if ((dio = chain->dio) != NULL && dio->bp) { 1037 struct buf *bp = dio->bp; 1038 1039 if (dio->psize == chain->bytes && 1040 (bp->b_flags & B_IOISSUED)) { 1041 atomic_clear_int(&chain->flags, 1042 HAMMER2_CHAIN_TESTEDGOOD); 1043 bp->b_flags &= ~B_IOISSUED; 1044 } 1045 } 1046 1047 /* 1048 * NOTE: A locked chain's data cannot be modified without first 1049 * calling hammer2_chain_modify(). 1050 */ 1051 1052 /* 1053 * NOTE: hammer2_io_data() call issues bkvasync() 1054 */ 1055 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 1056 1057 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1058 /* 1059 * Clear INITIAL. In this case we used io_new() and the 1060 * buffer has been zero'd and marked dirty. 1061 * 1062 * CHAIN_MODIFIED has not been set yet, and we leave it 1063 * that way for now. Set a temporary CHAIN_NOTTESTED flag 1064 * to prevent hammer2_chain_testcheck() from trying to match 1065 * a check code that has not yet been generated. This bit 1066 * should NOT end up on the actual media. 1067 */ 1068 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1069 atomic_set_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 1070 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 1071 /* 1072 * check data not currently synchronized due to 1073 * modification. XXX assumes data stays in the buffer 1074 * cache, which might not be true (need biodep on flush 1075 * to calculate crc? or simple crc?). 1076 */ 1077 } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) { 1078 if (hammer2_chain_testcheck(chain, bdata) == 0) { 1079 chain->error = HAMMER2_ERROR_CHECK; 1080 } else { 1081 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 1082 } 1083 } 1084 1085 /* 1086 * Setup the data pointer by pointing it into the buffer. 1087 * WARNING! Other threads can start using the data the instant we 1088 * set chain->data non-NULL. 1089 */ 1090 switch (bref->type) { 1091 case HAMMER2_BREF_TYPE_VOLUME: 1092 case HAMMER2_BREF_TYPE_FREEMAP: 1093 panic("hammer2_chain_load_data: unresolved volume header"); 1094 break; 1095 case HAMMER2_BREF_TYPE_DIRENT: 1096 KKASSERT(chain->bytes != 0); 1097 /* fall through */ 1098 case HAMMER2_BREF_TYPE_INODE: 1099 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1100 case HAMMER2_BREF_TYPE_INDIRECT: 1101 case HAMMER2_BREF_TYPE_DATA: 1102 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1103 default: 1104 /* 1105 * Point data at the device buffer and leave dio intact. 1106 */ 1107 chain->data = (void *)bdata; 1108 break; 1109 } 1110 1111 /* 1112 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 1113 */ 1114 done: 1115 for (;;) { 1116 u_int oflags; 1117 u_int nflags; 1118 1119 oflags = chain->flags; 1120 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 1121 HAMMER2_CHAIN_IOSIGNAL); 1122 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 1123 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1124 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 1125 wakeup(&chain->flags); 1126 break; 1127 } 1128 } 1129 } 1130 1131 /* 1132 * Unlock and deref a chain element. 1133 * 1134 * Remember that the presence of children under chain prevent the chain's 1135 * destruction but do not add additional references, so the dio will still 1136 * be dropped. 1137 */ 1138 void 1139 hammer2_chain_unlock(hammer2_chain_t *chain) 1140 { 1141 hammer2_io_t *dio; 1142 u_int lockcnt; 1143 int iter = 0; 1144 1145 /* 1146 * If multiple locks are present (or being attempted) on this 1147 * particular chain we can just unlock, drop refs, and return. 1148 * 1149 * Otherwise fall-through on the 1->0 transition. 1150 */ 1151 for (;;) { 1152 lockcnt = chain->lockcnt; 1153 KKASSERT(lockcnt > 0); 1154 cpu_ccfence(); 1155 if (lockcnt > 1) { 1156 if (atomic_cmpset_int(&chain->lockcnt, 1157 lockcnt, lockcnt - 1)) { 1158 hammer2_mtx_unlock(&chain->lock); 1159 return; 1160 } 1161 } else if (hammer2_mtx_upgrade_try(&chain->lock) == 0) { 1162 /* while holding the mutex exclusively */ 1163 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1164 break; 1165 } else { 1166 /* 1167 * This situation can easily occur on SMP due to 1168 * the gap inbetween the 1->0 transition and the 1169 * final unlock. We cannot safely block on the 1170 * mutex because lockcnt might go above 1. 1171 * 1172 * XXX Sleep for one tick if it takes too long. 1173 */ 1174 if (++iter > 1000) { 1175 if (iter > 1000 + hz) { 1176 kprintf("hammer2: h2race2 %p\n", chain); 1177 iter = 1000; 1178 } 1179 tsleep(&iter, 0, "h2race2", 1); 1180 } 1181 cpu_pause(); 1182 } 1183 /* retry */ 1184 } 1185 1186 /* 1187 * Last unlock / mutex upgraded to exclusive. Drop the data 1188 * reference. 1189 */ 1190 dio = hammer2_chain_drop_data(chain); 1191 if (dio) 1192 hammer2_io_bqrelse(&dio); 1193 hammer2_mtx_unlock(&chain->lock); 1194 } 1195 1196 #if 0 1197 /* 1198 * Unlock and hold chain data intact 1199 */ 1200 void 1201 hammer2_chain_unlock_hold(hammer2_chain_t *chain) 1202 { 1203 atomic_add_int(&chain->lockcnt, 1); 1204 hammer2_chain_unlock(chain); 1205 } 1206 #endif 1207 1208 /* 1209 * Helper to obtain the blockref[] array base and count for a chain. 1210 * 1211 * XXX Not widely used yet, various use cases need to be validated and 1212 * converted to use this function. 1213 */ 1214 static 1215 hammer2_blockref_t * 1216 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1217 { 1218 hammer2_blockref_t *base; 1219 int count; 1220 1221 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1222 base = NULL; 1223 1224 switch(parent->bref.type) { 1225 case HAMMER2_BREF_TYPE_INODE: 1226 count = HAMMER2_SET_COUNT; 1227 break; 1228 case HAMMER2_BREF_TYPE_INDIRECT: 1229 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1230 count = parent->bytes / sizeof(hammer2_blockref_t); 1231 break; 1232 case HAMMER2_BREF_TYPE_VOLUME: 1233 count = HAMMER2_SET_COUNT; 1234 break; 1235 case HAMMER2_BREF_TYPE_FREEMAP: 1236 count = HAMMER2_SET_COUNT; 1237 break; 1238 default: 1239 panic("hammer2_chain_base_and_count: " 1240 "unrecognized blockref type: %d", 1241 parent->bref.type); 1242 count = 0; 1243 break; 1244 } 1245 } else { 1246 switch(parent->bref.type) { 1247 case HAMMER2_BREF_TYPE_INODE: 1248 base = &parent->data->ipdata.u.blockset.blockref[0]; 1249 count = HAMMER2_SET_COUNT; 1250 break; 1251 case HAMMER2_BREF_TYPE_INDIRECT: 1252 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1253 base = &parent->data->npdata[0]; 1254 count = parent->bytes / sizeof(hammer2_blockref_t); 1255 break; 1256 case HAMMER2_BREF_TYPE_VOLUME: 1257 base = &parent->data->voldata. 1258 sroot_blockset.blockref[0]; 1259 count = HAMMER2_SET_COUNT; 1260 break; 1261 case HAMMER2_BREF_TYPE_FREEMAP: 1262 base = &parent->data->blkset.blockref[0]; 1263 count = HAMMER2_SET_COUNT; 1264 break; 1265 default: 1266 panic("hammer2_chain_base_and_count: " 1267 "unrecognized blockref type: %d", 1268 parent->bref.type); 1269 base = NULL; 1270 count = 0; 1271 break; 1272 } 1273 } 1274 *countp = count; 1275 1276 return base; 1277 } 1278 1279 /* 1280 * This counts the number of live blockrefs in a block array and 1281 * also calculates the point at which all remaining blockrefs are empty. 1282 * This routine can only be called on a live chain. 1283 * 1284 * Caller holds the chain locked, but possibly with a shared lock. We 1285 * must use an exclusive spinlock to prevent corruption. 1286 * 1287 * NOTE: Flag is not set until after the count is complete, allowing 1288 * callers to test the flag without holding the spinlock. 1289 * 1290 * NOTE: If base is NULL the related chain is still in the INITIAL 1291 * state and there are no blockrefs to count. 1292 * 1293 * NOTE: live_count may already have some counts accumulated due to 1294 * creation and deletion and could even be initially negative. 1295 */ 1296 void 1297 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1298 hammer2_blockref_t *base, int count) 1299 { 1300 hammer2_spin_ex(&chain->core.spin); 1301 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1302 if (base) { 1303 while (--count >= 0) { 1304 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1305 break; 1306 } 1307 chain->core.live_zero = count + 1; 1308 while (count >= 0) { 1309 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1310 atomic_add_int(&chain->core.live_count, 1311 1); 1312 --count; 1313 } 1314 } else { 1315 chain->core.live_zero = 0; 1316 } 1317 /* else do not modify live_count */ 1318 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1319 } 1320 hammer2_spin_unex(&chain->core.spin); 1321 } 1322 1323 /* 1324 * Resize the chain's physical storage allocation in-place. This function does 1325 * not usually adjust the data pointer and must be followed by (typically) a 1326 * hammer2_chain_modify() call to copy any old data over and adjust the 1327 * data pointer. 1328 * 1329 * Chains can be resized smaller without reallocating the storage. Resizing 1330 * larger will reallocate the storage. Excess or prior storage is reclaimed 1331 * asynchronously at a later time. 1332 * 1333 * An nradix value of 0 is special-cased to mean that the storage should 1334 * be disassociated, that is the chain is being resized to 0 bytes (not 1 1335 * byte). 1336 * 1337 * Must be passed an exclusively locked parent and chain. 1338 * 1339 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1340 * to avoid instantiating a device buffer that conflicts with the vnode data 1341 * buffer. However, because H2 can compress or encrypt data, the chain may 1342 * have a dio assigned to it in those situations, and they do not conflict. 1343 * 1344 * XXX return error if cannot resize. 1345 */ 1346 int 1347 hammer2_chain_resize(hammer2_chain_t *chain, 1348 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1349 int nradix, int flags) 1350 { 1351 hammer2_dev_t *hmp; 1352 size_t obytes; 1353 size_t nbytes; 1354 int error; 1355 1356 hmp = chain->hmp; 1357 1358 /* 1359 * Only data and indirect blocks can be resized for now. 1360 * (The volu root, inodes, and freemap elements use a fixed size). 1361 */ 1362 KKASSERT(chain != &hmp->vchain); 1363 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1364 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1365 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1366 1367 /* 1368 * Nothing to do if the element is already the proper size 1369 */ 1370 obytes = chain->bytes; 1371 nbytes = (nradix) ? (1U << nradix) : 0; 1372 if (obytes == nbytes) 1373 return (chain->error); 1374 1375 /* 1376 * Make sure the old data is instantiated so we can copy it. If this 1377 * is a data block, the device data may be superfluous since the data 1378 * might be in a logical block, but compressed or encrypted data is 1379 * another matter. 1380 * 1381 * NOTE: The modify will set BLKMAPUPD for us if BLKMAPPED is set. 1382 */ 1383 error = hammer2_chain_modify(chain, mtid, dedup_off, 0); 1384 if (error) 1385 return error; 1386 1387 /* 1388 * Reallocate the block, even if making it smaller (because different 1389 * block sizes may be in different regions). 1390 * 1391 * NOTE: Operation does not copy the data and may only be used 1392 * to resize data blocks in-place, or directory entry blocks 1393 * which are about to be modified in some manner. 1394 */ 1395 error = hammer2_freemap_alloc(chain, nbytes); 1396 if (error) 1397 return error; 1398 1399 chain->bytes = nbytes; 1400 1401 /* 1402 * We don't want the followup chain_modify() to try to copy data 1403 * from the old (wrong-sized) buffer. It won't know how much to 1404 * copy. This case should only occur during writes when the 1405 * originator already has the data to write in-hand. 1406 */ 1407 if (chain->dio) { 1408 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1409 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1410 hammer2_io_brelse(&chain->dio); 1411 chain->data = NULL; 1412 } 1413 return (chain->error); 1414 } 1415 1416 /* 1417 * Set the chain modified so its data can be changed by the caller, or 1418 * install deduplicated data. The caller must call this routine for each 1419 * set of modifications it makes, even if the chain is already flagged 1420 * MODIFIED. 1421 * 1422 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1423 * is a CLC (cluster level change) field and is not updated by parent 1424 * propagation during a flush. 1425 * 1426 * Returns an appropriate HAMMER2_ERROR_* code, which will generally reflect 1427 * chain->error except for HAMMER2_ERROR_ENOSPC. If the allocation fails 1428 * due to no space available, HAMMER2_ERROR_ENOSPC is returned and the chain 1429 * remains unmodified with its old data ref intact and chain->error 1430 * unchanged. 1431 * 1432 * Dedup Handling 1433 * 1434 * If the DEDUPABLE flag is set in the chain the storage must be reallocated 1435 * even if the chain is still flagged MODIFIED. In this case the chain's 1436 * DEDUPABLE flag will be cleared once the new storage has been assigned. 1437 * 1438 * If the caller passes a non-zero dedup_off we will use it to assign the 1439 * new storage. The MODIFIED flag will be *CLEARED* in this case, and 1440 * DEDUPABLE will be set (NOTE: the UPDATE flag is always set). The caller 1441 * must not modify the data content upon return. 1442 */ 1443 int 1444 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1445 hammer2_off_t dedup_off, int flags) 1446 { 1447 hammer2_dev_t *hmp; 1448 hammer2_io_t *dio; 1449 int error; 1450 int wasinitial; 1451 int setmodified; 1452 int setupdate; 1453 int newmod; 1454 char *bdata; 1455 1456 hmp = chain->hmp; 1457 KKASSERT(chain->lock.mtx_lock & MTX_EXCLUSIVE); 1458 1459 /* 1460 * Data is not optional for freemap chains (we must always be sure 1461 * to copy the data on COW storage allocations). 1462 */ 1463 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1464 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1465 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1466 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1467 } 1468 1469 /* 1470 * Data must be resolved if already assigned, unless explicitly 1471 * flagged otherwise. If we cannot safety load the data the 1472 * modification fails and we return early. 1473 */ 1474 if (chain->data == NULL && chain->bytes != 0 && 1475 (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1476 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1477 hammer2_chain_load_data(chain); 1478 if (chain->error) 1479 return (chain->error); 1480 } 1481 error = 0; 1482 1483 /* 1484 * Set MODIFIED to indicate that the chain has been modified. A new 1485 * allocation is required when modifying a chain. 1486 * 1487 * Set UPDATE to ensure that the blockref is updated in the parent. 1488 * 1489 * If MODIFIED is already set determine if we can reuse the assigned 1490 * data block or if we need a new data block. 1491 */ 1492 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1493 /* 1494 * Must set modified bit. 1495 */ 1496 atomic_add_long(&hammer2_count_modified_chains, 1); 1497 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1498 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1499 setmodified = 1; 1500 1501 /* 1502 * We may be able to avoid a copy-on-write if the chain's 1503 * check mode is set to NONE and the chain's current 1504 * modify_tid is beyond the last explicit snapshot tid. 1505 * 1506 * This implements HAMMER2's overwrite-in-place feature. 1507 * 1508 * NOTE! This data-block cannot be used as a de-duplication 1509 * source when the check mode is set to NONE. 1510 */ 1511 if ((chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1512 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) && 1513 (chain->flags & HAMMER2_CHAIN_INITIAL) == 0 && 1514 (chain->flags & HAMMER2_CHAIN_DEDUPABLE) == 0 && 1515 HAMMER2_DEC_CHECK(chain->bref.methods) == 1516 HAMMER2_CHECK_NONE && 1517 chain->pmp && 1518 chain->bref.modify_tid > 1519 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1520 /* 1521 * Sector overwrite allowed. 1522 */ 1523 newmod = 0; 1524 } else if ((hmp->hflags & HMNT2_EMERG) && 1525 chain->pmp && 1526 chain->bref.modify_tid > 1527 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1528 /* 1529 * If in emergency delete mode then do a modify-in- 1530 * place on any chain type belonging to the PFS as 1531 * long as it doesn't mess up a snapshot. We might 1532 * be forced to do this anyway a little further down 1533 * in the code if the allocation fails. 1534 * 1535 * Also note that in emergency mode, these modify-in- 1536 * place operations are NOT SAFE. A storage failure, 1537 * power failure, or panic can corrupt the filesystem. 1538 */ 1539 newmod = 0; 1540 } else { 1541 /* 1542 * Sector overwrite not allowed, must copy-on-write. 1543 */ 1544 newmod = 1; 1545 } 1546 } else if (chain->flags & HAMMER2_CHAIN_DEDUPABLE) { 1547 /* 1548 * If the modified chain was registered for dedup we need 1549 * a new allocation. This only happens for delayed-flush 1550 * chains (i.e. which run through the front-end buffer 1551 * cache). 1552 */ 1553 newmod = 1; 1554 setmodified = 0; 1555 } else { 1556 /* 1557 * Already flagged modified, no new allocation is needed. 1558 */ 1559 newmod = 0; 1560 setmodified = 0; 1561 } 1562 1563 /* 1564 * Flag parent update required. 1565 */ 1566 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 1567 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1568 setupdate = 1; 1569 } else { 1570 setupdate = 0; 1571 } 1572 1573 /* 1574 * The XOP code returns held but unlocked focus chains. This 1575 * prevents the chain from being destroyed but does not prevent 1576 * it from being modified. diolk is used to interlock modifications 1577 * against XOP frontend accesses to the focus. 1578 * 1579 * This allows us to theoretically avoid deadlocking the frontend 1580 * if one of the backends lock up by not formally locking the 1581 * focused chain in the frontend. In addition, the synchronization 1582 * code relies on this mechanism to avoid deadlocking concurrent 1583 * synchronization threads. 1584 */ 1585 lockmgr(&chain->diolk, LK_EXCLUSIVE); 1586 1587 /* 1588 * The modification or re-modification requires an allocation and 1589 * possible COW. If an error occurs, the previous content and data 1590 * reference is retained and the modification fails. 1591 * 1592 * If dedup_off is non-zero, the caller is requesting a deduplication 1593 * rather than a modification. The MODIFIED bit is not set and the 1594 * data offset is set to the deduplication offset. The data cannot 1595 * be modified. 1596 * 1597 * NOTE: The dedup offset is allowed to be in a partially free state 1598 * and we must be sure to reset it to a fully allocated state 1599 * to force two bulkfree passes to free it again. 1600 * 1601 * NOTE: Only applicable when chain->bytes != 0. 1602 * 1603 * XXX can a chain already be marked MODIFIED without a data 1604 * assignment? If not, assert here instead of testing the case. 1605 */ 1606 if (chain != &hmp->vchain && chain != &hmp->fchain && 1607 chain->bytes) { 1608 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1609 newmod 1610 ) { 1611 /* 1612 * NOTE: We do not have to remove the dedup 1613 * registration because the area is still 1614 * allocated and the underlying DIO will 1615 * still be flushed. 1616 */ 1617 if (dedup_off) { 1618 chain->bref.data_off = dedup_off; 1619 if ((int)(dedup_off & HAMMER2_OFF_MASK_RADIX)) 1620 chain->bytes = 1 << 1621 (int)(dedup_off & 1622 HAMMER2_OFF_MASK_RADIX); 1623 else 1624 chain->bytes = 0; 1625 chain->error = 0; 1626 atomic_clear_int(&chain->flags, 1627 HAMMER2_CHAIN_MODIFIED); 1628 atomic_add_long(&hammer2_count_modified_chains, 1629 -1); 1630 if (chain->pmp) { 1631 hammer2_pfs_memory_wakeup( 1632 chain->pmp, -1); 1633 } 1634 hammer2_freemap_adjust(hmp, &chain->bref, 1635 HAMMER2_FREEMAP_DORECOVER); 1636 atomic_set_int(&chain->flags, 1637 HAMMER2_CHAIN_DEDUPABLE); 1638 } else { 1639 error = hammer2_freemap_alloc(chain, 1640 chain->bytes); 1641 atomic_clear_int(&chain->flags, 1642 HAMMER2_CHAIN_DEDUPABLE); 1643 1644 /* 1645 * If we are unable to allocate a new block 1646 * but we are in emergency mode, issue a 1647 * warning to the console and reuse the same 1648 * block. 1649 * 1650 * We behave as if the allocation were 1651 * successful. 1652 * 1653 * THIS IS IMPORTANT: These modifications 1654 * are virtually guaranteed to corrupt any 1655 * snapshots related to this filesystem. 1656 */ 1657 if (error && (hmp->hflags & HMNT2_EMERG)) { 1658 error = 0; 1659 chain->bref.flags |= 1660 HAMMER2_BREF_FLAG_EMERG_MIP; 1661 1662 krateprintf(&krate_h2em, 1663 "hammer2: Emergency Mode WARNING: " 1664 "Operation will likely corrupt " 1665 "related snapshot: " 1666 "%016jx.%02x key=%016jx\n", 1667 chain->bref.data_off, 1668 chain->bref.type, 1669 chain->bref.key); 1670 } else if (error == 0) { 1671 chain->bref.flags &= 1672 ~HAMMER2_BREF_FLAG_EMERG_MIP; 1673 } 1674 } 1675 } 1676 } 1677 1678 /* 1679 * Stop here if error. We have to undo any flag bits we might 1680 * have set above. 1681 */ 1682 if (error) { 1683 if (setmodified) { 1684 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1685 atomic_add_long(&hammer2_count_modified_chains, -1); 1686 if (chain->pmp) 1687 hammer2_pfs_memory_wakeup(chain->pmp, -1); 1688 } 1689 if (setupdate) { 1690 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1691 } 1692 lockmgr(&chain->diolk, LK_RELEASE); 1693 1694 return error; 1695 } 1696 1697 /* 1698 * Update mirror_tid and modify_tid. modify_tid is only updated 1699 * if not passed as zero (during flushes, parent propagation passes 1700 * the value 0). 1701 * 1702 * NOTE: chain->pmp could be the device spmp. 1703 */ 1704 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1705 if (mtid) 1706 chain->bref.modify_tid = mtid; 1707 1708 /* 1709 * Set BLKMAPUPD to tell the flush code that an existing blockmap entry 1710 * requires updating as well as to tell the delete code that the 1711 * chain's blockref might not exactly match (in terms of physical size 1712 * or block offset) the one in the parent's blocktable. The base key 1713 * of course will still match. 1714 */ 1715 if (chain->flags & HAMMER2_CHAIN_BLKMAPPED) 1716 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPUPD); 1717 1718 /* 1719 * Short-cut data block handling when the caller does not need an 1720 * actual data reference to (aka OPTDATA), as long as the chain does 1721 * not already have a data pointer to the data and no de-duplication 1722 * occurred. 1723 * 1724 * This generally means that the modifications are being done via the 1725 * logical buffer cache. 1726 * 1727 * NOTE: If deduplication occurred we have to run through the data 1728 * stuff to clear INITIAL, and the caller will likely want to 1729 * assign the check code anyway. Leaving INITIAL set on a 1730 * dedup can be deadly (it can cause the block to be zero'd!). 1731 * 1732 * This code also handles bytes == 0 (most dirents). 1733 */ 1734 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1735 (flags & HAMMER2_MODIFY_OPTDATA) && 1736 chain->data == NULL) { 1737 if (dedup_off == 0) { 1738 KKASSERT(chain->dio == NULL); 1739 goto skip2; 1740 } 1741 } 1742 1743 /* 1744 * Clearing the INITIAL flag (for indirect blocks) indicates that 1745 * we've processed the uninitialized storage allocation. 1746 * 1747 * If this flag is already clear we are likely in a copy-on-write 1748 * situation but we have to be sure NOT to bzero the storage if 1749 * no data is present. 1750 * 1751 * Clearing of NOTTESTED is allowed if the MODIFIED bit is set, 1752 */ 1753 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1754 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1755 wasinitial = 1; 1756 } else { 1757 wasinitial = 0; 1758 } 1759 1760 /* 1761 * Instantiate data buffer and possibly execute COW operation 1762 */ 1763 switch(chain->bref.type) { 1764 case HAMMER2_BREF_TYPE_VOLUME: 1765 case HAMMER2_BREF_TYPE_FREEMAP: 1766 /* 1767 * The data is embedded, no copy-on-write operation is 1768 * needed. 1769 */ 1770 KKASSERT(chain->dio == NULL); 1771 break; 1772 case HAMMER2_BREF_TYPE_DIRENT: 1773 /* 1774 * The data might be fully embedded. 1775 */ 1776 if (chain->bytes == 0) { 1777 KKASSERT(chain->dio == NULL); 1778 break; 1779 } 1780 /* fall through */ 1781 case HAMMER2_BREF_TYPE_INODE: 1782 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1783 case HAMMER2_BREF_TYPE_DATA: 1784 case HAMMER2_BREF_TYPE_INDIRECT: 1785 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1786 /* 1787 * Perform the copy-on-write operation 1788 * 1789 * zero-fill or copy-on-write depending on whether 1790 * chain->data exists or not and set the dirty state for 1791 * the new buffer. hammer2_io_new() will handle the 1792 * zero-fill. 1793 * 1794 * If a dedup_off was supplied this is an existing block 1795 * and no COW, copy, or further modification is required. 1796 */ 1797 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1798 1799 if (wasinitial && dedup_off == 0) { 1800 error = hammer2_io_new(hmp, chain->bref.type, 1801 chain->bref.data_off, 1802 chain->bytes, &dio); 1803 } else { 1804 error = hammer2_io_bread(hmp, chain->bref.type, 1805 chain->bref.data_off, 1806 chain->bytes, &dio); 1807 } 1808 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 1809 1810 /* 1811 * If an I/O error occurs make sure callers cannot accidently 1812 * modify the old buffer's contents and corrupt the filesystem. 1813 * 1814 * NOTE: hammer2_io_data() call issues bkvasync() 1815 */ 1816 if (error) { 1817 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 1818 hmp); 1819 chain->error = HAMMER2_ERROR_EIO; 1820 hammer2_io_brelse(&dio); 1821 hammer2_io_brelse(&chain->dio); 1822 chain->data = NULL; 1823 break; 1824 } 1825 chain->error = 0; 1826 bdata = hammer2_io_data(dio, chain->bref.data_off); 1827 1828 if (chain->data) { 1829 /* 1830 * COW (unless a dedup). 1831 */ 1832 KKASSERT(chain->dio != NULL); 1833 if (chain->data != (void *)bdata && dedup_off == 0) { 1834 bcopy(chain->data, bdata, chain->bytes); 1835 } 1836 } else if (wasinitial == 0 && dedup_off == 0) { 1837 /* 1838 * We have a problem. We were asked to COW but 1839 * we don't have any data to COW with! 1840 */ 1841 panic("hammer2_chain_modify: having a COW %p\n", 1842 chain); 1843 } 1844 1845 /* 1846 * Retire the old buffer, replace with the new. Dirty or 1847 * redirty the new buffer. 1848 * 1849 * WARNING! The system buffer cache may have already flushed 1850 * the buffer, so we must be sure to [re]dirty it 1851 * for further modification. 1852 * 1853 * If dedup_off was supplied, the caller is not 1854 * expected to make any further modification to the 1855 * buffer. 1856 * 1857 * WARNING! hammer2_get_gdata() assumes dio never transitions 1858 * through NULL in order to optimize away unnecessary 1859 * diolk operations. 1860 */ 1861 { 1862 hammer2_io_t *tio; 1863 1864 if ((tio = chain->dio) != NULL) 1865 hammer2_io_bqrelse(&tio); 1866 chain->data = (void *)bdata; 1867 chain->dio = dio; 1868 if (dedup_off == 0) 1869 hammer2_io_setdirty(dio); 1870 } 1871 break; 1872 default: 1873 panic("hammer2_chain_modify: illegal non-embedded type %d", 1874 chain->bref.type); 1875 break; 1876 1877 } 1878 skip2: 1879 /* 1880 * setflush on parent indicating that the parent must recurse down 1881 * to us. Do not call on chain itself which might already have it 1882 * set. 1883 */ 1884 if (chain->parent) 1885 hammer2_chain_setflush(chain->parent); 1886 lockmgr(&chain->diolk, LK_RELEASE); 1887 1888 return (chain->error); 1889 } 1890 1891 /* 1892 * Modify the chain associated with an inode. 1893 */ 1894 int 1895 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 1896 hammer2_tid_t mtid, int flags) 1897 { 1898 int error; 1899 1900 hammer2_inode_modify(ip); 1901 error = hammer2_chain_modify(chain, mtid, 0, flags); 1902 1903 return error; 1904 } 1905 1906 /* 1907 * This function returns the chain at the nearest key within the specified 1908 * range. The returned chain will be referenced but not locked. 1909 * 1910 * This function will recurse through chain->rbtree as necessary and will 1911 * return a *key_nextp suitable for iteration. *key_nextp is only set if 1912 * the iteration value is less than the current value of *key_nextp. 1913 * 1914 * The caller should use (*key_nextp) to calculate the actual range of 1915 * the returned element, which will be (key_beg to *key_nextp - 1), because 1916 * there might be another element which is superior to the returned element 1917 * and overlaps it. 1918 * 1919 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 1920 * chains continue to be returned. On EOF (*key_nextp) may overflow since 1921 * it will wind up being (key_end + 1). 1922 * 1923 * WARNING! Must be called with child's spinlock held. Spinlock remains 1924 * held through the operation. 1925 */ 1926 struct hammer2_chain_find_info { 1927 hammer2_chain_t *best; 1928 hammer2_key_t key_beg; 1929 hammer2_key_t key_end; 1930 hammer2_key_t key_next; 1931 }; 1932 1933 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 1934 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 1935 1936 static 1937 hammer2_chain_t * 1938 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 1939 hammer2_key_t key_beg, hammer2_key_t key_end) 1940 { 1941 struct hammer2_chain_find_info info; 1942 1943 info.best = NULL; 1944 info.key_beg = key_beg; 1945 info.key_end = key_end; 1946 info.key_next = *key_nextp; 1947 1948 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 1949 hammer2_chain_find_cmp, hammer2_chain_find_callback, 1950 &info); 1951 *key_nextp = info.key_next; 1952 #if 0 1953 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 1954 parent, key_beg, key_end, *key_nextp); 1955 #endif 1956 1957 return (info.best); 1958 } 1959 1960 static 1961 int 1962 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 1963 { 1964 struct hammer2_chain_find_info *info = data; 1965 hammer2_key_t child_beg; 1966 hammer2_key_t child_end; 1967 1968 child_beg = child->bref.key; 1969 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 1970 1971 if (child_end < info->key_beg) 1972 return(-1); 1973 if (child_beg > info->key_end) 1974 return(1); 1975 return(0); 1976 } 1977 1978 static 1979 int 1980 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 1981 { 1982 struct hammer2_chain_find_info *info = data; 1983 hammer2_chain_t *best; 1984 hammer2_key_t child_end; 1985 1986 if ((best = info->best) == NULL) { 1987 /* 1988 * No previous best. Assign best 1989 */ 1990 info->best = child; 1991 } else if (best->bref.key <= info->key_beg && 1992 child->bref.key <= info->key_beg) { 1993 /* 1994 * Illegal overlap. 1995 */ 1996 KKASSERT(0); 1997 /*info->best = child;*/ 1998 } else if (child->bref.key < best->bref.key) { 1999 /* 2000 * Child has a nearer key and best is not flush with key_beg. 2001 * Set best to child. Truncate key_next to the old best key. 2002 */ 2003 info->best = child; 2004 if (info->key_next > best->bref.key || info->key_next == 0) 2005 info->key_next = best->bref.key; 2006 } else if (child->bref.key == best->bref.key) { 2007 /* 2008 * If our current best is flush with the child then this 2009 * is an illegal overlap. 2010 * 2011 * key_next will automatically be limited to the smaller of 2012 * the two end-points. 2013 */ 2014 KKASSERT(0); 2015 info->best = child; 2016 } else { 2017 /* 2018 * Keep the current best but truncate key_next to the child's 2019 * base. 2020 * 2021 * key_next will also automatically be limited to the smaller 2022 * of the two end-points (probably not necessary for this case 2023 * but we do it anyway). 2024 */ 2025 if (info->key_next > child->bref.key || info->key_next == 0) 2026 info->key_next = child->bref.key; 2027 } 2028 2029 /* 2030 * Always truncate key_next based on child's end-of-range. 2031 */ 2032 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 2033 if (child_end && (info->key_next > child_end || info->key_next == 0)) 2034 info->key_next = child_end; 2035 2036 return(0); 2037 } 2038 2039 /* 2040 * Retrieve the specified chain from a media blockref, creating the 2041 * in-memory chain structure which reflects it. The returned chain is 2042 * held and locked according to (how) (HAMMER2_RESOLVE_*). The caller must 2043 * handle crc-checks and so forth, and should check chain->error before 2044 * assuming that the data is good. 2045 * 2046 * To handle insertion races pass the INSERT_RACE flag along with the 2047 * generation number of the core. NULL will be returned if the generation 2048 * number changes before we have a chance to insert the chain. Insert 2049 * races can occur because the parent might be held shared. 2050 * 2051 * Caller must hold the parent locked shared or exclusive since we may 2052 * need the parent's bref array to find our block. 2053 * 2054 * WARNING! chain->pmp is always set to NULL for any chain representing 2055 * part of the super-root topology. 2056 */ 2057 hammer2_chain_t * 2058 hammer2_chain_get(hammer2_chain_t *parent, int generation, 2059 hammer2_blockref_t *bref, int how) 2060 { 2061 hammer2_dev_t *hmp = parent->hmp; 2062 hammer2_chain_t *chain; 2063 int error; 2064 2065 /* 2066 * Allocate a chain structure representing the existing media 2067 * entry. Resulting chain has one ref and is not locked. 2068 */ 2069 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 2070 chain = hammer2_chain_alloc(hmp, NULL, bref); 2071 else 2072 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 2073 /* ref'd chain returned */ 2074 2075 /* 2076 * Flag that the chain is in the parent's blockmap so delete/flush 2077 * knows what to do with it. 2078 */ 2079 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED); 2080 2081 /* 2082 * chain must be locked to avoid unexpected ripouts 2083 */ 2084 hammer2_chain_lock(chain, how); 2085 2086 /* 2087 * Link the chain into its parent. A spinlock is required to safely 2088 * access the RBTREE, and it is possible to collide with another 2089 * hammer2_chain_get() operation because the caller might only hold 2090 * a shared lock on the parent. 2091 * 2092 * NOTE: Get races can occur quite often when we distribute 2093 * asynchronous read-aheads across multiple threads. 2094 */ 2095 KKASSERT(parent->refs > 0); 2096 error = hammer2_chain_insert(parent, chain, 2097 HAMMER2_CHAIN_INSERT_SPIN | 2098 HAMMER2_CHAIN_INSERT_RACE, 2099 generation); 2100 if (error) { 2101 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 2102 /*kprintf("chain %p get race\n", chain);*/ 2103 hammer2_chain_unlock(chain); 2104 hammer2_chain_drop(chain); 2105 chain = NULL; 2106 } else { 2107 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2108 } 2109 2110 /* 2111 * Return our new chain referenced but not locked, or NULL if 2112 * a race occurred. 2113 */ 2114 return (chain); 2115 } 2116 2117 /* 2118 * Lookup initialization/completion API 2119 */ 2120 hammer2_chain_t * 2121 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 2122 { 2123 hammer2_chain_ref(parent); 2124 if (flags & HAMMER2_LOOKUP_SHARED) { 2125 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 2126 HAMMER2_RESOLVE_SHARED); 2127 } else { 2128 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2129 } 2130 return (parent); 2131 } 2132 2133 void 2134 hammer2_chain_lookup_done(hammer2_chain_t *parent) 2135 { 2136 if (parent) { 2137 hammer2_chain_unlock(parent); 2138 hammer2_chain_drop(parent); 2139 } 2140 } 2141 2142 /* 2143 * Take the locked chain and return a locked parent. The chain remains 2144 * locked on return, but may have to be temporarily unlocked to acquire 2145 * the parent. Because of this, (chain) must be stable and cannot be 2146 * deleted while it was temporarily unlocked (typically means that (chain) 2147 * is an inode). 2148 * 2149 * Pass HAMMER2_RESOLVE_* flags in flags. 2150 * 2151 * This will work even if the chain is errored, and the caller can check 2152 * parent->error on return if desired since the parent will be locked. 2153 * 2154 * This function handles the lock order reversal. 2155 */ 2156 hammer2_chain_t * 2157 hammer2_chain_getparent(hammer2_chain_t *chain, int flags) 2158 { 2159 hammer2_chain_t *parent; 2160 2161 /* 2162 * Be careful of order, chain must be unlocked before parent 2163 * is locked below to avoid a deadlock. Try it trivially first. 2164 */ 2165 parent = chain->parent; 2166 if (parent == NULL) 2167 panic("hammer2_chain_getparent: no parent"); 2168 hammer2_chain_ref(parent); 2169 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) 2170 return parent; 2171 2172 for (;;) { 2173 hammer2_chain_unlock(chain); 2174 hammer2_chain_lock(parent, flags); 2175 hammer2_chain_lock(chain, flags); 2176 2177 /* 2178 * Parent relinking races are quite common. We have to get 2179 * it right or we will blow up the block table. 2180 */ 2181 if (chain->parent == parent) 2182 break; 2183 hammer2_chain_unlock(parent); 2184 hammer2_chain_drop(parent); 2185 cpu_ccfence(); 2186 parent = chain->parent; 2187 if (parent == NULL) 2188 panic("hammer2_chain_getparent: no parent"); 2189 hammer2_chain_ref(parent); 2190 } 2191 return parent; 2192 } 2193 2194 /* 2195 * Take the locked chain and return a locked parent. The chain is unlocked 2196 * and dropped. *chainp is set to the returned parent as a convenience. 2197 * Pass HAMMER2_RESOLVE_* flags in flags. 2198 * 2199 * This will work even if the chain is errored, and the caller can check 2200 * parent->error on return if desired since the parent will be locked. 2201 * 2202 * The chain does NOT need to be stable. We use a tracking structure 2203 * to track the expected parent if the chain is deleted out from under us. 2204 * 2205 * This function handles the lock order reversal. 2206 */ 2207 hammer2_chain_t * 2208 hammer2_chain_repparent(hammer2_chain_t **chainp, int flags) 2209 { 2210 hammer2_chain_t *chain; 2211 hammer2_chain_t *parent; 2212 struct hammer2_reptrack reptrack; 2213 struct hammer2_reptrack **repp; 2214 2215 /* 2216 * Be careful of order, chain must be unlocked before parent 2217 * is locked below to avoid a deadlock. Try it trivially first. 2218 */ 2219 chain = *chainp; 2220 parent = chain->parent; 2221 if (parent == NULL) { 2222 hammer2_spin_unex(&chain->core.spin); 2223 panic("hammer2_chain_repparent: no parent"); 2224 } 2225 hammer2_chain_ref(parent); 2226 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) { 2227 hammer2_chain_unlock(chain); 2228 hammer2_chain_drop(chain); 2229 *chainp = parent; 2230 2231 return parent; 2232 } 2233 2234 /* 2235 * Ok, now it gets a bit nasty. There are multiple situations where 2236 * the parent might be in the middle of a deletion, or where the child 2237 * (chain) might be deleted the instant we let go of its lock. 2238 * We can potentially end up in a no-win situation! 2239 * 2240 * In particular, the indirect_maintenance() case can cause these 2241 * situations. 2242 * 2243 * To deal with this we install a reptrack structure in the parent 2244 * This reptrack structure 'owns' the parent ref and will automatically 2245 * migrate to the parent's parent if the parent is deleted permanently. 2246 */ 2247 hammer2_spin_init(&reptrack.spin, "h2reptrk"); 2248 reptrack.chain = parent; 2249 hammer2_chain_ref(parent); /* for the reptrack */ 2250 2251 hammer2_spin_ex(&parent->core.spin); 2252 reptrack.next = parent->core.reptrack; 2253 parent->core.reptrack = &reptrack; 2254 hammer2_spin_unex(&parent->core.spin); 2255 2256 hammer2_chain_unlock(chain); 2257 hammer2_chain_drop(chain); 2258 chain = NULL; /* gone */ 2259 2260 /* 2261 * At the top of this loop, chain is gone and parent is refd both 2262 * by us explicitly AND via our reptrack. We are attempting to 2263 * lock parent. 2264 */ 2265 for (;;) { 2266 hammer2_chain_lock(parent, flags); 2267 2268 if (reptrack.chain == parent) 2269 break; 2270 hammer2_chain_unlock(parent); 2271 hammer2_chain_drop(parent); 2272 2273 kprintf("hammer2: debug REPTRACK %p->%p\n", 2274 parent, reptrack.chain); 2275 hammer2_spin_ex(&reptrack.spin); 2276 parent = reptrack.chain; 2277 hammer2_chain_ref(parent); 2278 hammer2_spin_unex(&reptrack.spin); 2279 } 2280 2281 /* 2282 * Once parent is locked and matches our reptrack, our reptrack 2283 * will be stable and we have our parent. We can unlink our 2284 * reptrack. 2285 * 2286 * WARNING! Remember that the chain lock might be shared. Chains 2287 * locked shared have stable parent linkages. 2288 */ 2289 hammer2_spin_ex(&parent->core.spin); 2290 repp = &parent->core.reptrack; 2291 while (*repp != &reptrack) 2292 repp = &(*repp)->next; 2293 *repp = reptrack.next; 2294 hammer2_spin_unex(&parent->core.spin); 2295 2296 hammer2_chain_drop(parent); /* reptrack ref */ 2297 *chainp = parent; /* return parent lock+ref */ 2298 2299 return parent; 2300 } 2301 2302 /* 2303 * Dispose of any linked reptrack structures in (chain) by shifting them to 2304 * (parent). Both (chain) and (parent) must be exclusively locked. 2305 * 2306 * This is interlocked against any children of (chain) on the other side. 2307 * No children so remain as-of when this is called so we can test 2308 * core.reptrack without holding the spin-lock. 2309 * 2310 * Used whenever the caller intends to permanently delete chains related 2311 * to topological recursions (BREF_TYPE_INDIRECT, BREF_TYPE_FREEMAP_NODE), 2312 * where the chains underneath the node being deleted are given a new parent 2313 * above the node being deleted. 2314 */ 2315 static 2316 void 2317 hammer2_chain_repchange(hammer2_chain_t *parent, hammer2_chain_t *chain) 2318 { 2319 struct hammer2_reptrack *reptrack; 2320 2321 KKASSERT(chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)); 2322 while (chain->core.reptrack) { 2323 hammer2_spin_ex(&parent->core.spin); 2324 hammer2_spin_ex(&chain->core.spin); 2325 reptrack = chain->core.reptrack; 2326 if (reptrack == NULL) { 2327 hammer2_spin_unex(&chain->core.spin); 2328 hammer2_spin_unex(&parent->core.spin); 2329 break; 2330 } 2331 hammer2_spin_ex(&reptrack->spin); 2332 chain->core.reptrack = reptrack->next; 2333 reptrack->chain = parent; 2334 reptrack->next = parent->core.reptrack; 2335 parent->core.reptrack = reptrack; 2336 hammer2_chain_ref(parent); /* reptrack */ 2337 2338 hammer2_spin_unex(&chain->core.spin); 2339 hammer2_spin_unex(&parent->core.spin); 2340 kprintf("hammer2: debug repchange %p %p->%p\n", 2341 reptrack, chain, parent); 2342 hammer2_chain_drop(chain); /* reptrack */ 2343 } 2344 } 2345 2346 /* 2347 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 2348 * (*parentp) typically points to an inode but can also point to a related 2349 * indirect block and this function will recurse upwards and find the inode 2350 * or the nearest undeleted indirect block covering the key range. 2351 * 2352 * This function unconditionally sets *errorp, replacing any previous value. 2353 * 2354 * (*parentp) must be exclusive or shared locked (depending on flags) and 2355 * referenced and can be an inode or an existing indirect block within the 2356 * inode. 2357 * 2358 * If (*parent) is errored out, this function will not attempt to recurse 2359 * the radix tree and will return NULL along with an appropriate *errorp. 2360 * If NULL is returned and *errorp is 0, the requested lookup could not be 2361 * located. 2362 * 2363 * On return (*parentp) will be modified to point at the deepest parent chain 2364 * element encountered during the search, as a helper for an insertion or 2365 * deletion. 2366 * 2367 * The new (*parentp) will be locked shared or exclusive (depending on flags), 2368 * and referenced, and the old will be unlocked and dereferenced (no change 2369 * if they are both the same). This is particularly important if the caller 2370 * wishes to insert a new chain, (*parentp) will be set properly even if NULL 2371 * is returned, as long as no error occurred. 2372 * 2373 * The matching chain will be returned locked according to flags. 2374 * 2375 * -- 2376 * 2377 * NULL is returned if no match was found, but (*parentp) will still 2378 * potentially be adjusted. 2379 * 2380 * On return (*key_nextp) will point to an iterative value for key_beg. 2381 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 2382 * 2383 * This function will also recurse up the chain if the key is not within the 2384 * current parent's range. (*parentp) can never be set to NULL. An iteration 2385 * can simply allow (*parentp) to float inside the loop. 2386 * 2387 * NOTE! chain->data is not always resolved. By default it will not be 2388 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 2389 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 2390 * BREF_TYPE_DATA as the device buffer can alias the logical file 2391 * buffer). 2392 */ 2393 hammer2_chain_t * 2394 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 2395 hammer2_key_t key_beg, hammer2_key_t key_end, 2396 int *errorp, int flags) 2397 { 2398 hammer2_chain_t *parent; 2399 hammer2_chain_t *chain; 2400 hammer2_blockref_t *base; 2401 hammer2_blockref_t *bref; 2402 hammer2_blockref_t bsave; 2403 hammer2_key_t scan_beg; 2404 hammer2_key_t scan_end; 2405 int count = 0; 2406 int how_always = HAMMER2_RESOLVE_ALWAYS; 2407 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2408 int how; 2409 int generation; 2410 int maxloops = 300000; 2411 2412 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2413 how_maybe = how_always; 2414 how = HAMMER2_RESOLVE_ALWAYS; 2415 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2416 how = HAMMER2_RESOLVE_NEVER; 2417 } else { 2418 how = HAMMER2_RESOLVE_MAYBE; 2419 } 2420 if (flags & HAMMER2_LOOKUP_SHARED) { 2421 how_maybe |= HAMMER2_RESOLVE_SHARED; 2422 how_always |= HAMMER2_RESOLVE_SHARED; 2423 how |= HAMMER2_RESOLVE_SHARED; 2424 } 2425 2426 /* 2427 * Recurse (*parentp) upward if necessary until the parent completely 2428 * encloses the key range or we hit the inode. 2429 * 2430 * Handle races against the flusher deleting indirect nodes on its 2431 * way back up by continuing to recurse upward past the deletion. 2432 */ 2433 parent = *parentp; 2434 *errorp = 0; 2435 2436 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2437 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2438 scan_beg = parent->bref.key; 2439 scan_end = scan_beg + 2440 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2441 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) { 2442 if (key_beg >= scan_beg && key_end <= scan_end) 2443 break; 2444 } 2445 parent = hammer2_chain_repparent(parentp, how_maybe); 2446 } 2447 again: 2448 if (--maxloops == 0) 2449 panic("hammer2_chain_lookup: maxloops"); 2450 2451 /* 2452 * MATCHIND case that does not require parent->data (do prior to 2453 * parent->error check). 2454 */ 2455 switch(parent->bref.type) { 2456 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2457 case HAMMER2_BREF_TYPE_INDIRECT: 2458 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2459 scan_beg = parent->bref.key; 2460 scan_end = scan_beg + 2461 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2462 if (key_beg == scan_beg && key_end == scan_end) { 2463 chain = parent; 2464 hammer2_chain_ref(chain); 2465 hammer2_chain_lock(chain, how_maybe); 2466 *key_nextp = scan_end + 1; 2467 goto done; 2468 } 2469 } 2470 break; 2471 default: 2472 break; 2473 } 2474 2475 /* 2476 * No lookup is possible if the parent is errored. We delayed 2477 * this check as long as we could to ensure that the parent backup, 2478 * embedded data, and MATCHIND code could still execute. 2479 */ 2480 if (parent->error) { 2481 *errorp = parent->error; 2482 return NULL; 2483 } 2484 2485 /* 2486 * Locate the blockref array. Currently we do a fully associative 2487 * search through the array. 2488 */ 2489 switch(parent->bref.type) { 2490 case HAMMER2_BREF_TYPE_INODE: 2491 /* 2492 * Special shortcut for embedded data returns the inode 2493 * itself. Callers must detect this condition and access 2494 * the embedded data (the strategy code does this for us). 2495 * 2496 * This is only applicable to regular files and softlinks. 2497 * 2498 * We need a second lock on parent. Since we already have 2499 * a lock we must pass LOCKAGAIN to prevent unexpected 2500 * blocking (we don't want to block on a second shared 2501 * ref if an exclusive lock is pending) 2502 */ 2503 if (parent->data->ipdata.meta.op_flags & 2504 HAMMER2_OPFLAG_DIRECTDATA) { 2505 if (flags & HAMMER2_LOOKUP_NODIRECT) { 2506 chain = NULL; 2507 *key_nextp = key_end + 1; 2508 goto done; 2509 } 2510 hammer2_chain_ref(parent); 2511 hammer2_chain_lock(parent, how_always | 2512 HAMMER2_RESOLVE_LOCKAGAIN); 2513 *key_nextp = key_end + 1; 2514 return (parent); 2515 } 2516 base = &parent->data->ipdata.u.blockset.blockref[0]; 2517 count = HAMMER2_SET_COUNT; 2518 break; 2519 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2520 case HAMMER2_BREF_TYPE_INDIRECT: 2521 /* 2522 * Optimize indirect blocks in the INITIAL state to avoid 2523 * I/O. 2524 * 2525 * Debugging: Enter permanent wait state instead of 2526 * panicing on unexpectedly NULL data for the moment. 2527 */ 2528 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2529 base = NULL; 2530 } else { 2531 if (parent->data == NULL) { 2532 kprintf("hammer2: unexpected NULL data " 2533 "on %p\n", parent); 2534 while (1) 2535 tsleep(parent, 0, "xxx", 0); 2536 } 2537 base = &parent->data->npdata[0]; 2538 } 2539 count = parent->bytes / sizeof(hammer2_blockref_t); 2540 break; 2541 case HAMMER2_BREF_TYPE_VOLUME: 2542 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2543 count = HAMMER2_SET_COUNT; 2544 break; 2545 case HAMMER2_BREF_TYPE_FREEMAP: 2546 base = &parent->data->blkset.blockref[0]; 2547 count = HAMMER2_SET_COUNT; 2548 break; 2549 default: 2550 panic("hammer2_chain_lookup: unrecognized " 2551 "blockref(B) type: %d", 2552 parent->bref.type); 2553 base = NULL; /* safety */ 2554 count = 0; /* safety */ 2555 break; 2556 } 2557 2558 /* 2559 * Merged scan to find next candidate. 2560 * 2561 * hammer2_base_*() functions require the parent->core.live_* fields 2562 * to be synchronized. 2563 * 2564 * We need to hold the spinlock to access the block array and RB tree 2565 * and to interlock chain creation. 2566 */ 2567 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2568 hammer2_chain_countbrefs(parent, base, count); 2569 2570 /* 2571 * Combined search 2572 */ 2573 hammer2_spin_ex(&parent->core.spin); 2574 chain = hammer2_combined_find(parent, base, count, 2575 key_nextp, 2576 key_beg, key_end, 2577 &bref); 2578 generation = parent->core.generation; 2579 2580 /* 2581 * Exhausted parent chain, iterate. 2582 */ 2583 if (bref == NULL) { 2584 KKASSERT(chain == NULL); 2585 hammer2_spin_unex(&parent->core.spin); 2586 if (key_beg == key_end) /* short cut single-key case */ 2587 return (NULL); 2588 2589 /* 2590 * Stop if we reached the end of the iteration. 2591 */ 2592 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2593 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2594 return (NULL); 2595 } 2596 2597 /* 2598 * Calculate next key, stop if we reached the end of the 2599 * iteration, otherwise go up one level and loop. 2600 */ 2601 key_beg = parent->bref.key + 2602 ((hammer2_key_t)1 << parent->bref.keybits); 2603 if (key_beg == 0 || key_beg > key_end) 2604 return (NULL); 2605 parent = hammer2_chain_repparent(parentp, how_maybe); 2606 goto again; 2607 } 2608 2609 /* 2610 * Selected from blockref or in-memory chain. 2611 */ 2612 bsave = *bref; 2613 if (chain == NULL) { 2614 hammer2_spin_unex(&parent->core.spin); 2615 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2616 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2617 chain = hammer2_chain_get(parent, generation, 2618 &bsave, how_maybe); 2619 } else { 2620 chain = hammer2_chain_get(parent, generation, 2621 &bsave, how); 2622 } 2623 if (chain == NULL) 2624 goto again; 2625 } else { 2626 hammer2_chain_ref(chain); 2627 hammer2_spin_unex(&parent->core.spin); 2628 2629 /* 2630 * chain is referenced but not locked. We must lock the 2631 * chain to obtain definitive state. 2632 */ 2633 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2634 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2635 hammer2_chain_lock(chain, how_maybe); 2636 } else { 2637 hammer2_chain_lock(chain, how); 2638 } 2639 KKASSERT(chain->parent == parent); 2640 } 2641 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 2642 chain->parent != parent) { 2643 hammer2_chain_unlock(chain); 2644 hammer2_chain_drop(chain); 2645 chain = NULL; /* SAFETY */ 2646 goto again; 2647 } 2648 2649 2650 /* 2651 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2652 * 2653 * NOTE: Chain's key range is not relevant as there might be 2654 * one-offs within the range that are not deleted. 2655 * 2656 * NOTE: Lookups can race delete-duplicate because 2657 * delete-duplicate does not lock the parent's core 2658 * (they just use the spinlock on the core). 2659 */ 2660 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2661 kprintf("skip deleted chain %016jx.%02x key=%016jx\n", 2662 chain->bref.data_off, chain->bref.type, 2663 chain->bref.key); 2664 hammer2_chain_unlock(chain); 2665 hammer2_chain_drop(chain); 2666 chain = NULL; /* SAFETY */ 2667 key_beg = *key_nextp; 2668 if (key_beg == 0 || key_beg > key_end) 2669 return(NULL); 2670 goto again; 2671 } 2672 2673 /* 2674 * If the chain element is an indirect block it becomes the new 2675 * parent and we loop on it. We must maintain our top-down locks 2676 * to prevent the flusher from interfering (i.e. doing a 2677 * delete-duplicate and leaving us recursing down a deleted chain). 2678 * 2679 * The parent always has to be locked with at least RESOLVE_MAYBE 2680 * so we can access its data. It might need a fixup if the caller 2681 * passed incompatible flags. Be careful not to cause a deadlock 2682 * as a data-load requires an exclusive lock. 2683 * 2684 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2685 * range is within the requested key range we return the indirect 2686 * block and do NOT loop. This is usually only used to acquire 2687 * freemap nodes. 2688 */ 2689 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2690 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2691 hammer2_chain_unlock(parent); 2692 hammer2_chain_drop(parent); 2693 *parentp = parent = chain; 2694 chain = NULL; /* SAFETY */ 2695 goto again; 2696 } 2697 done: 2698 /* 2699 * All done, return the locked chain. 2700 * 2701 * If the caller does not want a locked chain, replace the lock with 2702 * a ref. Perhaps this can eventually be optimized to not obtain the 2703 * lock in the first place for situations where the data does not 2704 * need to be resolved. 2705 * 2706 * NOTE! A chain->error must be tested by the caller upon return. 2707 * *errorp is only set based on issues which occur while 2708 * trying to reach the chain. 2709 */ 2710 return (chain); 2711 } 2712 2713 /* 2714 * After having issued a lookup we can iterate all matching keys. 2715 * 2716 * If chain is non-NULL we continue the iteration from just after it's index. 2717 * 2718 * If chain is NULL we assume the parent was exhausted and continue the 2719 * iteration at the next parent. 2720 * 2721 * If a fatal error occurs (typically an I/O error), a dummy chain is 2722 * returned with chain->error and error-identifying information set. This 2723 * chain will assert if you try to do anything fancy with it. 2724 * 2725 * XXX Depending on where the error occurs we should allow continued iteration. 2726 * 2727 * parent must be locked on entry and remains locked throughout. chain's 2728 * lock status must match flags. Chain is always at least referenced. 2729 * 2730 * WARNING! The MATCHIND flag does not apply to this function. 2731 */ 2732 hammer2_chain_t * 2733 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2734 hammer2_key_t *key_nextp, 2735 hammer2_key_t key_beg, hammer2_key_t key_end, 2736 int *errorp, int flags) 2737 { 2738 hammer2_chain_t *parent; 2739 int how_maybe; 2740 2741 /* 2742 * Calculate locking flags for upward recursion. 2743 */ 2744 how_maybe = HAMMER2_RESOLVE_MAYBE; 2745 if (flags & HAMMER2_LOOKUP_SHARED) 2746 how_maybe |= HAMMER2_RESOLVE_SHARED; 2747 2748 parent = *parentp; 2749 *errorp = 0; 2750 2751 /* 2752 * Calculate the next index and recalculate the parent if necessary. 2753 */ 2754 if (chain) { 2755 key_beg = chain->bref.key + 2756 ((hammer2_key_t)1 << chain->bref.keybits); 2757 hammer2_chain_unlock(chain); 2758 hammer2_chain_drop(chain); 2759 2760 /* 2761 * chain invalid past this point, but we can still do a 2762 * pointer comparison w/parent. 2763 * 2764 * Any scan where the lookup returned degenerate data embedded 2765 * in the inode has an invalid index and must terminate. 2766 */ 2767 if (chain == parent) 2768 return(NULL); 2769 if (key_beg == 0 || key_beg > key_end) 2770 return(NULL); 2771 chain = NULL; 2772 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2773 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2774 /* 2775 * We reached the end of the iteration. 2776 */ 2777 return (NULL); 2778 } else { 2779 /* 2780 * Continue iteration with next parent unless the current 2781 * parent covers the range. 2782 * 2783 * (This also handles the case of a deleted, empty indirect 2784 * node). 2785 */ 2786 key_beg = parent->bref.key + 2787 ((hammer2_key_t)1 << parent->bref.keybits); 2788 if (key_beg == 0 || key_beg > key_end) 2789 return (NULL); 2790 parent = hammer2_chain_repparent(parentp, how_maybe); 2791 } 2792 2793 /* 2794 * And execute 2795 */ 2796 return (hammer2_chain_lookup(parentp, key_nextp, 2797 key_beg, key_end, 2798 errorp, flags)); 2799 } 2800 2801 /* 2802 * Caller wishes to iterate chains under parent, loading new chains into 2803 * chainp. Caller must initialize *chainp to NULL and *firstp to 1, and 2804 * then call hammer2_chain_scan() repeatedly until a non-zero return. 2805 * During the scan, *firstp will be set to 0 and (*chainp) will be replaced 2806 * with the returned chain for the scan. The returned *chainp will be 2807 * locked and referenced. Any prior contents will be unlocked and dropped. 2808 * 2809 * Caller should check the return value. A normal scan EOF will return 2810 * exactly HAMMER2_ERROR_EOF. Any other non-zero value indicates an 2811 * error trying to access parent data. Any error in the returned chain 2812 * must be tested separately by the caller. 2813 * 2814 * (*chainp) is dropped on each scan, but will only be set if the returned 2815 * element itself can recurse. Leaf elements are NOT resolved, loaded, or 2816 * returned via *chainp. The caller will get their bref only. 2817 * 2818 * The raw scan function is similar to lookup/next but does not seek to a key. 2819 * Blockrefs are iterated via first_bref = (parent, NULL) and 2820 * next_chain = (parent, bref). 2821 * 2822 * The passed-in parent must be locked and its data resolved. The function 2823 * nominally returns a locked and referenced *chainp != NULL for chains 2824 * the caller might need to recurse on (and will dipose of any *chainp passed 2825 * in). The caller must check the chain->bref.type either way. 2826 */ 2827 int 2828 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 2829 hammer2_blockref_t *bref, int *firstp, 2830 int flags) 2831 { 2832 hammer2_blockref_t *base; 2833 hammer2_blockref_t *bref_ptr; 2834 hammer2_key_t key; 2835 hammer2_key_t next_key; 2836 hammer2_chain_t *chain = NULL; 2837 int count = 0; 2838 int how; 2839 int generation; 2840 int maxloops = 300000; 2841 int error; 2842 2843 error = 0; 2844 2845 /* 2846 * Scan flags borrowed from lookup. 2847 */ 2848 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2849 how = HAMMER2_RESOLVE_ALWAYS; 2850 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2851 how = HAMMER2_RESOLVE_NEVER; 2852 } else { 2853 how = HAMMER2_RESOLVE_MAYBE; 2854 } 2855 if (flags & HAMMER2_LOOKUP_SHARED) { 2856 how |= HAMMER2_RESOLVE_SHARED; 2857 } 2858 2859 /* 2860 * Calculate key to locate first/next element, unlocking the previous 2861 * element as we go. Be careful, the key calculation can overflow. 2862 * 2863 * (also reset bref to NULL) 2864 */ 2865 if (*firstp) { 2866 key = 0; 2867 *firstp = 0; 2868 } else { 2869 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 2870 if ((chain = *chainp) != NULL) { 2871 *chainp = NULL; 2872 hammer2_chain_unlock(chain); 2873 hammer2_chain_drop(chain); 2874 chain = NULL; 2875 } 2876 if (key == 0) { 2877 error |= HAMMER2_ERROR_EOF; 2878 goto done; 2879 } 2880 } 2881 2882 again: 2883 if (parent->error) { 2884 error = parent->error; 2885 goto done; 2886 } 2887 if (--maxloops == 0) 2888 panic("hammer2_chain_scan: maxloops"); 2889 2890 /* 2891 * Locate the blockref array. Currently we do a fully associative 2892 * search through the array. 2893 */ 2894 switch(parent->bref.type) { 2895 case HAMMER2_BREF_TYPE_INODE: 2896 /* 2897 * An inode with embedded data has no sub-chains. 2898 * 2899 * WARNING! Bulk scan code may pass a static chain marked 2900 * as BREF_TYPE_INODE with a copy of the volume 2901 * root blockset to snapshot the volume. 2902 */ 2903 if (parent->data->ipdata.meta.op_flags & 2904 HAMMER2_OPFLAG_DIRECTDATA) { 2905 error |= HAMMER2_ERROR_EOF; 2906 goto done; 2907 } 2908 base = &parent->data->ipdata.u.blockset.blockref[0]; 2909 count = HAMMER2_SET_COUNT; 2910 break; 2911 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2912 case HAMMER2_BREF_TYPE_INDIRECT: 2913 /* 2914 * Optimize indirect blocks in the INITIAL state to avoid 2915 * I/O. 2916 */ 2917 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2918 base = NULL; 2919 } else { 2920 if (parent->data == NULL) 2921 panic("parent->data is NULL"); 2922 base = &parent->data->npdata[0]; 2923 } 2924 count = parent->bytes / sizeof(hammer2_blockref_t); 2925 break; 2926 case HAMMER2_BREF_TYPE_VOLUME: 2927 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2928 count = HAMMER2_SET_COUNT; 2929 break; 2930 case HAMMER2_BREF_TYPE_FREEMAP: 2931 base = &parent->data->blkset.blockref[0]; 2932 count = HAMMER2_SET_COUNT; 2933 break; 2934 default: 2935 panic("hammer2_chain_scan: unrecognized blockref type: %d", 2936 parent->bref.type); 2937 base = NULL; /* safety */ 2938 count = 0; /* safety */ 2939 break; 2940 } 2941 2942 /* 2943 * Merged scan to find next candidate. 2944 * 2945 * hammer2_base_*() functions require the parent->core.live_* fields 2946 * to be synchronized. 2947 * 2948 * We need to hold the spinlock to access the block array and RB tree 2949 * and to interlock chain creation. 2950 */ 2951 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2952 hammer2_chain_countbrefs(parent, base, count); 2953 2954 next_key = 0; 2955 bref_ptr = NULL; 2956 hammer2_spin_ex(&parent->core.spin); 2957 chain = hammer2_combined_find(parent, base, count, 2958 &next_key, 2959 key, HAMMER2_KEY_MAX, 2960 &bref_ptr); 2961 generation = parent->core.generation; 2962 2963 /* 2964 * Exhausted parent chain, we're done. 2965 */ 2966 if (bref_ptr == NULL) { 2967 hammer2_spin_unex(&parent->core.spin); 2968 KKASSERT(chain == NULL); 2969 error |= HAMMER2_ERROR_EOF; 2970 goto done; 2971 } 2972 2973 /* 2974 * Copy into the supplied stack-based blockref. 2975 */ 2976 *bref = *bref_ptr; 2977 2978 /* 2979 * Selected from blockref or in-memory chain. 2980 */ 2981 if (chain == NULL) { 2982 switch(bref->type) { 2983 case HAMMER2_BREF_TYPE_INODE: 2984 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2985 case HAMMER2_BREF_TYPE_INDIRECT: 2986 case HAMMER2_BREF_TYPE_VOLUME: 2987 case HAMMER2_BREF_TYPE_FREEMAP: 2988 /* 2989 * Recursion, always get the chain 2990 */ 2991 hammer2_spin_unex(&parent->core.spin); 2992 chain = hammer2_chain_get(parent, generation, 2993 bref, how); 2994 if (chain == NULL) 2995 goto again; 2996 break; 2997 default: 2998 /* 2999 * No recursion, do not waste time instantiating 3000 * a chain, just iterate using the bref. 3001 */ 3002 hammer2_spin_unex(&parent->core.spin); 3003 break; 3004 } 3005 } else { 3006 /* 3007 * Recursion or not we need the chain in order to supply 3008 * the bref. 3009 */ 3010 hammer2_chain_ref(chain); 3011 hammer2_spin_unex(&parent->core.spin); 3012 hammer2_chain_lock(chain, how); 3013 } 3014 if (chain && 3015 (bcmp(bref, &chain->bref, sizeof(*bref)) || 3016 chain->parent != parent)) { 3017 hammer2_chain_unlock(chain); 3018 hammer2_chain_drop(chain); 3019 chain = NULL; 3020 goto again; 3021 } 3022 3023 /* 3024 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 3025 * 3026 * NOTE: chain's key range is not relevant as there might be 3027 * one-offs within the range that are not deleted. 3028 * 3029 * NOTE: XXX this could create problems with scans used in 3030 * situations other than mount-time recovery. 3031 * 3032 * NOTE: Lookups can race delete-duplicate because 3033 * delete-duplicate does not lock the parent's core 3034 * (they just use the spinlock on the core). 3035 */ 3036 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3037 hammer2_chain_unlock(chain); 3038 hammer2_chain_drop(chain); 3039 chain = NULL; 3040 3041 key = next_key; 3042 if (key == 0) { 3043 error |= HAMMER2_ERROR_EOF; 3044 goto done; 3045 } 3046 goto again; 3047 } 3048 3049 done: 3050 /* 3051 * All done, return the bref or NULL, supply chain if necessary. 3052 */ 3053 if (chain) 3054 *chainp = chain; 3055 return (error); 3056 } 3057 3058 /* 3059 * Create and return a new hammer2 system memory structure of the specified 3060 * key, type and size and insert it under (*parentp). This is a full 3061 * insertion, based on the supplied key/keybits, and may involve creating 3062 * indirect blocks and moving other chains around via delete/duplicate. 3063 * 3064 * This call can be made with parent == NULL as long as a non -1 methods 3065 * is supplied. hmp must also be supplied in this situation (otherwise 3066 * hmp is extracted from the supplied parent). The chain will be detached 3067 * from the topology. A later call with both parent and chain can be made 3068 * to attach it. 3069 * 3070 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 3071 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3072 * FULL. This typically means that the caller is creating the chain after 3073 * doing a hammer2_chain_lookup(). 3074 * 3075 * (*parentp) must be exclusive locked and may be replaced on return 3076 * depending on how much work the function had to do. 3077 * 3078 * (*parentp) must not be errored or this function will assert. 3079 * 3080 * (*chainp) usually starts out NULL and returns the newly created chain, 3081 * but if the caller desires the caller may allocate a disconnected chain 3082 * and pass it in instead. 3083 * 3084 * This function should NOT be used to insert INDIRECT blocks. It is 3085 * typically used to create/insert inodes and data blocks. 3086 * 3087 * Caller must pass-in an exclusively locked parent the new chain is to 3088 * be inserted under, and optionally pass-in a disconnected, exclusively 3089 * locked chain to insert (else we create a new chain). The function will 3090 * adjust (*parentp) as necessary, create or connect the chain, and 3091 * return an exclusively locked chain in *chainp. 3092 * 3093 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 3094 * and will be reassigned. 3095 * 3096 * NOTE: returns HAMMER_ERROR_* flags 3097 */ 3098 int 3099 hammer2_chain_create(hammer2_chain_t **parentp, hammer2_chain_t **chainp, 3100 hammer2_dev_t *hmp, hammer2_pfs_t *pmp, int methods, 3101 hammer2_key_t key, int keybits, int type, size_t bytes, 3102 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 3103 { 3104 hammer2_chain_t *chain; 3105 hammer2_chain_t *parent; 3106 hammer2_blockref_t *base; 3107 hammer2_blockref_t dummy; 3108 int allocated = 0; 3109 int error = 0; 3110 int count; 3111 int maxloops = 300000; 3112 3113 /* 3114 * Topology may be crossing a PFS boundary. 3115 */ 3116 parent = *parentp; 3117 if (parent) { 3118 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3119 KKASSERT(parent->error == 0); 3120 hmp = parent->hmp; 3121 } 3122 chain = *chainp; 3123 3124 if (chain == NULL) { 3125 /* 3126 * First allocate media space and construct the dummy bref, 3127 * then allocate the in-memory chain structure. Set the 3128 * INITIAL flag for fresh chains which do not have embedded 3129 * data. 3130 */ 3131 bzero(&dummy, sizeof(dummy)); 3132 dummy.type = type; 3133 dummy.key = key; 3134 dummy.keybits = keybits; 3135 dummy.data_off = hammer2_getradix(bytes); 3136 3137 /* 3138 * Inherit methods from parent by default. Primarily used 3139 * for BREF_TYPE_DATA. Non-data types *must* be set to 3140 * a non-NONE check algorithm. 3141 */ 3142 if (methods == HAMMER2_METH_DEFAULT) 3143 dummy.methods = parent->bref.methods; 3144 else 3145 dummy.methods = (uint8_t)methods; 3146 3147 if (type != HAMMER2_BREF_TYPE_DATA && 3148 HAMMER2_DEC_CHECK(dummy.methods) == HAMMER2_CHECK_NONE) { 3149 dummy.methods |= 3150 HAMMER2_ENC_CHECK(HAMMER2_CHECK_DEFAULT); 3151 } 3152 3153 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 3154 3155 /* 3156 * Lock the chain manually, chain_lock will load the chain 3157 * which we do NOT want to do. (note: chain->refs is set 3158 * to 1 by chain_alloc() for us, but lockcnt is not). 3159 */ 3160 chain->lockcnt = 1; 3161 hammer2_mtx_ex(&chain->lock); 3162 allocated = 1; 3163 3164 /* 3165 * Set INITIAL to optimize I/O. The flag will generally be 3166 * processed when we call hammer2_chain_modify(). 3167 */ 3168 switch(type) { 3169 case HAMMER2_BREF_TYPE_VOLUME: 3170 case HAMMER2_BREF_TYPE_FREEMAP: 3171 panic("hammer2_chain_create: called with volume type"); 3172 break; 3173 case HAMMER2_BREF_TYPE_INDIRECT: 3174 panic("hammer2_chain_create: cannot be used to" 3175 "create indirect block"); 3176 break; 3177 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3178 panic("hammer2_chain_create: cannot be used to" 3179 "create freemap root or node"); 3180 break; 3181 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3182 KKASSERT(bytes == sizeof(chain->data->bmdata)); 3183 /* fall through */ 3184 case HAMMER2_BREF_TYPE_DIRENT: 3185 case HAMMER2_BREF_TYPE_INODE: 3186 case HAMMER2_BREF_TYPE_DATA: 3187 default: 3188 /* 3189 * leave chain->data NULL, set INITIAL 3190 */ 3191 KKASSERT(chain->data == NULL); 3192 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 3193 break; 3194 } 3195 } else { 3196 /* 3197 * We are reattaching a previously deleted chain, possibly 3198 * under a new parent and possibly with a new key/keybits. 3199 * The chain does not have to be in a modified state. The 3200 * UPDATE flag will be set later on in this routine. 3201 * 3202 * Do NOT mess with the current state of the INITIAL flag. 3203 */ 3204 chain->bref.key = key; 3205 chain->bref.keybits = keybits; 3206 if (chain->flags & HAMMER2_CHAIN_DELETED) 3207 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3208 KKASSERT(chain->parent == NULL); 3209 } 3210 3211 /* 3212 * Set the appropriate bref flag if requested. 3213 * 3214 * NOTE! Callers can call this function to move chains without 3215 * knowing about special flags, so don't clear bref flags 3216 * here! 3217 */ 3218 if (flags & HAMMER2_INSERT_PFSROOT) 3219 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 3220 3221 if (parent == NULL) 3222 goto skip; 3223 3224 /* 3225 * Calculate how many entries we have in the blockref array and 3226 * determine if an indirect block is required when inserting into 3227 * the parent. 3228 */ 3229 again: 3230 if (--maxloops == 0) 3231 panic("hammer2_chain_create: maxloops"); 3232 3233 switch(parent->bref.type) { 3234 case HAMMER2_BREF_TYPE_INODE: 3235 if ((parent->data->ipdata.meta.op_flags & 3236 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 3237 kprintf("hammer2: parent set for direct-data! " 3238 "pkey=%016jx ckey=%016jx\n", 3239 parent->bref.key, 3240 chain->bref.key); 3241 } 3242 KKASSERT((parent->data->ipdata.meta.op_flags & 3243 HAMMER2_OPFLAG_DIRECTDATA) == 0); 3244 KKASSERT(parent->data != NULL); 3245 base = &parent->data->ipdata.u.blockset.blockref[0]; 3246 count = HAMMER2_SET_COUNT; 3247 break; 3248 case HAMMER2_BREF_TYPE_INDIRECT: 3249 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3250 if (parent->flags & HAMMER2_CHAIN_INITIAL) 3251 base = NULL; 3252 else 3253 base = &parent->data->npdata[0]; 3254 count = parent->bytes / sizeof(hammer2_blockref_t); 3255 break; 3256 case HAMMER2_BREF_TYPE_VOLUME: 3257 KKASSERT(parent->data != NULL); 3258 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3259 count = HAMMER2_SET_COUNT; 3260 break; 3261 case HAMMER2_BREF_TYPE_FREEMAP: 3262 KKASSERT(parent->data != NULL); 3263 base = &parent->data->blkset.blockref[0]; 3264 count = HAMMER2_SET_COUNT; 3265 break; 3266 default: 3267 panic("hammer2_chain_create: unrecognized blockref type: %d", 3268 parent->bref.type); 3269 base = NULL; 3270 count = 0; 3271 break; 3272 } 3273 3274 /* 3275 * Make sure we've counted the brefs 3276 */ 3277 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3278 hammer2_chain_countbrefs(parent, base, count); 3279 3280 KASSERT(parent->core.live_count >= 0 && 3281 parent->core.live_count <= count, 3282 ("bad live_count %d/%d (%02x, %d)", 3283 parent->core.live_count, count, 3284 parent->bref.type, parent->bytes)); 3285 3286 /* 3287 * If no free blockref could be found we must create an indirect 3288 * block and move a number of blockrefs into it. With the parent 3289 * locked we can safely lock each child in order to delete+duplicate 3290 * it without causing a deadlock. 3291 * 3292 * This may return the new indirect block or the old parent depending 3293 * on where the key falls. NULL is returned on error. 3294 */ 3295 if (parent->core.live_count == count) { 3296 hammer2_chain_t *nparent; 3297 3298 KKASSERT((flags & HAMMER2_INSERT_SAMEPARENT) == 0); 3299 3300 nparent = hammer2_chain_create_indirect(parent, key, keybits, 3301 mtid, type, &error); 3302 if (nparent == NULL) { 3303 if (allocated) 3304 hammer2_chain_drop(chain); 3305 chain = NULL; 3306 goto done; 3307 } 3308 if (parent != nparent) { 3309 hammer2_chain_unlock(parent); 3310 hammer2_chain_drop(parent); 3311 parent = *parentp = nparent; 3312 } 3313 goto again; 3314 } 3315 3316 /* 3317 * fall through if parent, or skip to here if no parent. 3318 */ 3319 skip: 3320 if (chain->flags & HAMMER2_CHAIN_DELETED) 3321 kprintf("Inserting deleted chain @%016jx\n", 3322 chain->bref.key); 3323 3324 /* 3325 * Link the chain into its parent. 3326 */ 3327 if (chain->parent != NULL) 3328 panic("hammer2: hammer2_chain_create: chain already connected"); 3329 KKASSERT(chain->parent == NULL); 3330 if (parent) { 3331 KKASSERT(parent->core.live_count < count); 3332 hammer2_chain_insert(parent, chain, 3333 HAMMER2_CHAIN_INSERT_SPIN | 3334 HAMMER2_CHAIN_INSERT_LIVE, 3335 0); 3336 } 3337 3338 if (allocated) { 3339 /* 3340 * Mark the newly created chain modified. This will cause 3341 * UPDATE to be set and process the INITIAL flag. 3342 * 3343 * Device buffers are not instantiated for DATA elements 3344 * as these are handled by logical buffers. 3345 * 3346 * Indirect and freemap node indirect blocks are handled 3347 * by hammer2_chain_create_indirect() and not by this 3348 * function. 3349 * 3350 * Data for all other bref types is expected to be 3351 * instantiated (INODE, LEAF). 3352 */ 3353 switch(chain->bref.type) { 3354 case HAMMER2_BREF_TYPE_DATA: 3355 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3356 case HAMMER2_BREF_TYPE_DIRENT: 3357 case HAMMER2_BREF_TYPE_INODE: 3358 error = hammer2_chain_modify(chain, mtid, dedup_off, 3359 HAMMER2_MODIFY_OPTDATA); 3360 break; 3361 default: 3362 /* 3363 * Remaining types are not supported by this function. 3364 * In particular, INDIRECT and LEAF_NODE types are 3365 * handled by create_indirect(). 3366 */ 3367 panic("hammer2_chain_create: bad type: %d", 3368 chain->bref.type); 3369 /* NOT REACHED */ 3370 break; 3371 } 3372 } else { 3373 /* 3374 * When reconnecting a chain we must set UPDATE and 3375 * setflush so the flush recognizes that it must update 3376 * the bref in the parent. 3377 */ 3378 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 3379 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3380 } 3381 3382 /* 3383 * We must setflush(parent) to ensure that it recurses through to 3384 * chain. setflush(chain) might not work because ONFLUSH is possibly 3385 * already set in the chain (so it won't recurse up to set it in the 3386 * parent). 3387 */ 3388 if (parent) 3389 hammer2_chain_setflush(parent); 3390 3391 done: 3392 *chainp = chain; 3393 3394 return (error); 3395 } 3396 3397 /* 3398 * Move the chain from its old parent to a new parent. The chain must have 3399 * already been deleted or already disconnected (or never associated) with 3400 * a parent. The chain is reassociated with the new parent and the deleted 3401 * flag will be cleared (no longer deleted). The chain's modification state 3402 * is not altered. 3403 * 3404 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 3405 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3406 * FULL. This typically means that the caller is creating the chain after 3407 * doing a hammer2_chain_lookup(). 3408 * 3409 * Neither (parent) or (chain) can be errored. 3410 * 3411 * If (parent) is non-NULL then the chain is inserted under the parent. 3412 * 3413 * If (parent) is NULL then the newly duplicated chain is not inserted 3414 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 3415 * passing into hammer2_chain_create() after this function returns). 3416 * 3417 * WARNING! This function calls create which means it can insert indirect 3418 * blocks. This can cause other unrelated chains in the parent to 3419 * be moved to a newly inserted indirect block in addition to the 3420 * specific chain. 3421 */ 3422 void 3423 hammer2_chain_rename(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3424 hammer2_tid_t mtid, int flags) 3425 { 3426 hammer2_blockref_t *bref; 3427 hammer2_chain_t *parent; 3428 3429 /* 3430 * WARNING! We should never resolve DATA to device buffers 3431 * (XXX allow it if the caller did?), and since 3432 * we currently do not have the logical buffer cache 3433 * buffer in-hand to fix its cached physical offset 3434 * we also force the modify code to not COW it. XXX 3435 * 3436 * NOTE! We allow error'd chains to be renamed. The bref itself 3437 * is good and can be renamed. The content, however, may 3438 * be inaccessible. 3439 */ 3440 KKASSERT(chain->parent == NULL); 3441 /*KKASSERT(chain->error == 0); allow */ 3442 bref = &chain->bref; 3443 3444 /* 3445 * If parent is not NULL the duplicated chain will be entered under 3446 * the parent and the UPDATE bit set to tell flush to update 3447 * the blockref. 3448 * 3449 * We must setflush(parent) to ensure that it recurses through to 3450 * chain. setflush(chain) might not work because ONFLUSH is possibly 3451 * already set in the chain (so it won't recurse up to set it in the 3452 * parent). 3453 * 3454 * Having both chains locked is extremely important for atomicy. 3455 */ 3456 if (parentp && (parent = *parentp) != NULL) { 3457 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3458 KKASSERT(parent->refs > 0); 3459 KKASSERT(parent->error == 0); 3460 3461 hammer2_chain_create(parentp, &chain, NULL, chain->pmp, 3462 HAMMER2_METH_DEFAULT, 3463 bref->key, bref->keybits, bref->type, 3464 chain->bytes, mtid, 0, flags); 3465 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 3466 hammer2_chain_setflush(*parentp); 3467 } 3468 } 3469 3470 /* 3471 * This works in tandem with delete_obref() to install a blockref in 3472 * (typically) an indirect block that is associated with the chain being 3473 * moved to *parentp. 3474 * 3475 * The reason we need this function is that the caller needs to maintain 3476 * the blockref as it was, and not generate a new blockref for what might 3477 * be a modified chain. Otherwise stuff will leak into the flush that 3478 * the flush code's FLUSH_INODE_STOP flag is unable to catch. 3479 * 3480 * It is EXTREMELY important that we properly set CHAIN_BLKMAPUPD and 3481 * CHAIN_UPDATE. We must set BLKMAPUPD if the bref does not match, and 3482 * we must clear CHAIN_UPDATE (that was likely set by the chain_rename) if 3483 * it does. Otherwise we can end up in a situation where H2 is unable to 3484 * clean up the in-memory chain topology. 3485 * 3486 * The reason for this is that flushes do not generally flush through 3487 * BREF_TYPE_INODE chains and depend on a hammer2_inode_t queued to syncq 3488 * or sideq to properly flush and dispose of the related inode chain's flags. 3489 * Situations where the inode is not actually modified by the frontend, 3490 * but where we have to move the related chains around as we insert or cleanup 3491 * indirect blocks, can leave us with a 'dirty' (non-disposable) in-memory 3492 * inode chain that does not have a hammer2_inode_t associated with it. 3493 */ 3494 static void 3495 hammer2_chain_rename_obref(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3496 hammer2_tid_t mtid, int flags, 3497 hammer2_blockref_t *obref) 3498 { 3499 hammer2_chain_rename(parentp, chain, mtid, flags); 3500 3501 if (obref->type != HAMMER2_BREF_TYPE_EMPTY) { 3502 hammer2_blockref_t *tbase; 3503 int tcount; 3504 3505 KKASSERT((chain->flags & HAMMER2_CHAIN_BLKMAPPED) == 0); 3506 hammer2_chain_modify(*parentp, mtid, 0, 0); 3507 tbase = hammer2_chain_base_and_count(*parentp, &tcount); 3508 hammer2_base_insert(*parentp, tbase, tcount, chain, obref); 3509 if (bcmp(obref, &chain->bref, sizeof(chain->bref))) { 3510 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPUPD | 3511 HAMMER2_CHAIN_UPDATE); 3512 } else { 3513 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3514 } 3515 } 3516 } 3517 3518 /* 3519 * Helper function for deleting chains. 3520 * 3521 * The chain is removed from the live view (the RBTREE) as well as the parent's 3522 * blockmap. Both chain and its parent must be locked. 3523 * 3524 * parent may not be errored. chain can be errored. 3525 */ 3526 static int 3527 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 3528 hammer2_tid_t mtid, int flags, 3529 hammer2_blockref_t *obref) 3530 { 3531 int error = 0; 3532 3533 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0); 3534 KKASSERT(chain->parent == parent); 3535 3536 if (chain->flags & HAMMER2_CHAIN_BLKMAPPED) { 3537 /* 3538 * Chain is blockmapped, so there must be a parent. 3539 * Atomically remove the chain from the parent and remove 3540 * the blockmap entry. The parent must be set modified 3541 * to remove the blockmap entry. 3542 */ 3543 hammer2_blockref_t *base; 3544 int count; 3545 3546 KKASSERT(parent != NULL); 3547 KKASSERT(parent->error == 0); 3548 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3549 error = hammer2_chain_modify(parent, mtid, 0, 0); 3550 if (error) 3551 goto done; 3552 3553 /* 3554 * Calculate blockmap pointer 3555 */ 3556 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 3557 hammer2_spin_ex(&chain->core.spin); 3558 hammer2_spin_ex(&parent->core.spin); 3559 3560 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3561 atomic_add_int(&parent->core.live_count, -1); 3562 ++parent->core.generation; 3563 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3564 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3565 --parent->core.chain_count; 3566 chain->parent = NULL; 3567 3568 switch(parent->bref.type) { 3569 case HAMMER2_BREF_TYPE_INODE: 3570 /* 3571 * Access the inode's block array. However, there 3572 * is no block array if the inode is flagged 3573 * DIRECTDATA. 3574 */ 3575 if (parent->data && 3576 (parent->data->ipdata.meta.op_flags & 3577 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 3578 base = 3579 &parent->data->ipdata.u.blockset.blockref[0]; 3580 } else { 3581 base = NULL; 3582 } 3583 count = HAMMER2_SET_COUNT; 3584 break; 3585 case HAMMER2_BREF_TYPE_INDIRECT: 3586 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3587 if (parent->data) 3588 base = &parent->data->npdata[0]; 3589 else 3590 base = NULL; 3591 count = parent->bytes / sizeof(hammer2_blockref_t); 3592 break; 3593 case HAMMER2_BREF_TYPE_VOLUME: 3594 base = &parent->data->voldata. 3595 sroot_blockset.blockref[0]; 3596 count = HAMMER2_SET_COUNT; 3597 break; 3598 case HAMMER2_BREF_TYPE_FREEMAP: 3599 base = &parent->data->blkset.blockref[0]; 3600 count = HAMMER2_SET_COUNT; 3601 break; 3602 default: 3603 base = NULL; 3604 count = 0; 3605 panic("_hammer2_chain_delete_helper: " 3606 "unrecognized blockref type: %d", 3607 parent->bref.type); 3608 break; 3609 } 3610 3611 /* 3612 * delete blockmapped chain from its parent. 3613 * 3614 * The parent is not affected by any statistics in chain 3615 * which are pending synchronization. That is, there is 3616 * nothing to undo in the parent since they have not yet 3617 * been incorporated into the parent. 3618 * 3619 * The parent is affected by statistics stored in inodes. 3620 * Those have already been synchronized, so they must be 3621 * undone. XXX split update possible w/delete in middle? 3622 */ 3623 if (base) { 3624 hammer2_base_delete(parent, base, count, chain, obref); 3625 } 3626 hammer2_spin_unex(&parent->core.spin); 3627 hammer2_spin_unex(&chain->core.spin); 3628 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 3629 /* 3630 * Chain is not blockmapped but a parent is present. 3631 * Atomically remove the chain from the parent. There is 3632 * no blockmap entry to remove. 3633 * 3634 * Because chain was associated with a parent but not 3635 * synchronized, the chain's *_count_up fields contain 3636 * inode adjustment statistics which must be undone. 3637 */ 3638 hammer2_spin_ex(&chain->core.spin); 3639 hammer2_spin_ex(&parent->core.spin); 3640 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3641 atomic_add_int(&parent->core.live_count, -1); 3642 ++parent->core.generation; 3643 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3644 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3645 --parent->core.chain_count; 3646 chain->parent = NULL; 3647 hammer2_spin_unex(&parent->core.spin); 3648 hammer2_spin_unex(&chain->core.spin); 3649 } else { 3650 /* 3651 * Chain is not blockmapped and has no parent. This 3652 * is a degenerate case. 3653 */ 3654 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3655 } 3656 done: 3657 return error; 3658 } 3659 3660 /* 3661 * Create an indirect block that covers one or more of the elements in the 3662 * current parent. Either returns the existing parent with no locking or 3663 * ref changes or returns the new indirect block locked and referenced 3664 * and leaving the original parent lock/ref intact as well. 3665 * 3666 * If an error occurs, NULL is returned and *errorp is set to the H2 error. 3667 * 3668 * The returned chain depends on where the specified key falls. 3669 * 3670 * The key/keybits for the indirect mode only needs to follow three rules: 3671 * 3672 * (1) That all elements underneath it fit within its key space and 3673 * 3674 * (2) That all elements outside it are outside its key space. 3675 * 3676 * (3) When creating the new indirect block any elements in the current 3677 * parent that fit within the new indirect block's keyspace must be 3678 * moved into the new indirect block. 3679 * 3680 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3681 * keyspace the the current parent, but lookup/iteration rules will 3682 * ensure (and must ensure) that rule (2) for all parents leading up 3683 * to the nearest inode or the root volume header is adhered to. This 3684 * is accomplished by always recursing through matching keyspaces in 3685 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3686 * 3687 * The current implementation calculates the current worst-case keyspace by 3688 * iterating the current parent and then divides it into two halves, choosing 3689 * whichever half has the most elements (not necessarily the half containing 3690 * the requested key). 3691 * 3692 * We can also opt to use the half with the least number of elements. This 3693 * causes lower-numbered keys (aka logical file offsets) to recurse through 3694 * fewer indirect blocks and higher-numbered keys to recurse through more. 3695 * This also has the risk of not moving enough elements to the new indirect 3696 * block and being forced to create several indirect blocks before the element 3697 * can be inserted. 3698 * 3699 * Must be called with an exclusively locked parent. 3700 * 3701 * NOTE: *errorp set to HAMMER_ERROR_* flags 3702 */ 3703 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3704 hammer2_key_t *keyp, int keybits, 3705 hammer2_blockref_t *base, int count); 3706 static int hammer2_chain_indkey_file(hammer2_chain_t *parent, 3707 hammer2_key_t *keyp, int keybits, 3708 hammer2_blockref_t *base, int count, 3709 int ncount); 3710 static int hammer2_chain_indkey_dir(hammer2_chain_t *parent, 3711 hammer2_key_t *keyp, int keybits, 3712 hammer2_blockref_t *base, int count, 3713 int ncount); 3714 static 3715 hammer2_chain_t * 3716 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3717 hammer2_key_t create_key, int create_bits, 3718 hammer2_tid_t mtid, int for_type, int *errorp) 3719 { 3720 hammer2_dev_t *hmp; 3721 hammer2_blockref_t *base; 3722 hammer2_blockref_t *bref; 3723 hammer2_blockref_t bsave; 3724 hammer2_blockref_t dummy; 3725 hammer2_chain_t *chain; 3726 hammer2_chain_t *ichain; 3727 hammer2_key_t key = create_key; 3728 hammer2_key_t key_beg; 3729 hammer2_key_t key_end; 3730 hammer2_key_t key_next; 3731 int keybits = create_bits; 3732 int count; 3733 int ncount; 3734 int nbytes; 3735 int loops; 3736 int error; 3737 int reason; 3738 int generation; 3739 int maxloops = 300000; 3740 3741 /* 3742 * Calculate the base blockref pointer or NULL if the chain 3743 * is known to be empty. We need to calculate the array count 3744 * for RB lookups either way. 3745 */ 3746 hmp = parent->hmp; 3747 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3748 3749 /* 3750 * Pre-modify the parent now to avoid having to deal with error 3751 * processing if we tried to later (in the middle of our loop). 3752 * 3753 * We are going to be moving bref's around, the indirect blocks 3754 * cannot be in an initial state. Do not pass MODIFY_OPTDATA. 3755 */ 3756 *errorp = hammer2_chain_modify(parent, mtid, 0, 0); 3757 if (*errorp) { 3758 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 3759 *errorp, hammer2_error_str(*errorp)); 3760 return NULL; 3761 } 3762 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3763 3764 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 3765 base = hammer2_chain_base_and_count(parent, &count); 3766 3767 /* 3768 * How big should our new indirect block be? It has to be at least 3769 * as large as its parent for splits to work properly. 3770 * 3771 * The freemap uses a specific indirect block size. The number of 3772 * levels are built dynamically and ultimately depend on the size 3773 * volume. Because freemap blocks are taken from the reserved areas 3774 * of the volume our goal is efficiency (fewer levels) and not so 3775 * much to save disk space. 3776 * 3777 * The first indirect block level for a directory usually uses 3778 * HAMMER2_IND_BYTES_MIN (4KB = 32 directory entries). Due to 3779 * the hash mechanism, this typically gives us a nominal 3780 * 32 * 4 entries with one level of indirection. 3781 * 3782 * We use HAMMER2_IND_BYTES_NOM (16KB = 128 blockrefs) for FILE 3783 * indirect blocks. The initial 4 entries in the inode gives us 3784 * 256KB. Up to 4 indirect blocks gives us 32MB. Three levels 3785 * of indirection gives us 137GB, and so forth. H2 can support 3786 * huge file sizes but they are not typical, so we try to stick 3787 * with compactness and do not use a larger indirect block size. 3788 * 3789 * We could use 64KB (PBUFSIZE), giving us 512 blockrefs, but 3790 * due to the way indirect blocks are created this usually winds 3791 * up being extremely inefficient for small files. Even though 3792 * 16KB requires more levels of indirection for very large files, 3793 * the 16KB records can be ganged together into 64KB DIOs. 3794 */ 3795 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3796 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3797 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 3798 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 3799 if (parent->data->ipdata.meta.type == 3800 HAMMER2_OBJTYPE_DIRECTORY) 3801 nbytes = HAMMER2_IND_BYTES_MIN; /* 4KB = 32 entries */ 3802 else 3803 nbytes = HAMMER2_IND_BYTES_NOM; /* 16KB = ~8MB file */ 3804 3805 } else { 3806 nbytes = HAMMER2_IND_BYTES_NOM; 3807 } 3808 if (nbytes < count * sizeof(hammer2_blockref_t)) { 3809 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 3810 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 3811 nbytes = count * sizeof(hammer2_blockref_t); 3812 } 3813 ncount = nbytes / sizeof(hammer2_blockref_t); 3814 3815 /* 3816 * When creating an indirect block for a freemap node or leaf 3817 * the key/keybits must be fitted to static radix levels because 3818 * particular radix levels use particular reserved blocks in the 3819 * related zone. 3820 * 3821 * This routine calculates the key/radix of the indirect block 3822 * we need to create, and whether it is on the high-side or the 3823 * low-side. 3824 */ 3825 switch(for_type) { 3826 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3827 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3828 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 3829 base, count); 3830 break; 3831 case HAMMER2_BREF_TYPE_DATA: 3832 keybits = hammer2_chain_indkey_file(parent, &key, keybits, 3833 base, count, ncount); 3834 break; 3835 case HAMMER2_BREF_TYPE_DIRENT: 3836 case HAMMER2_BREF_TYPE_INODE: 3837 keybits = hammer2_chain_indkey_dir(parent, &key, keybits, 3838 base, count, ncount); 3839 break; 3840 default: 3841 panic("illegal indirect block for bref type %d", for_type); 3842 break; 3843 } 3844 3845 /* 3846 * Normalize the key for the radix being represented, keeping the 3847 * high bits and throwing away the low bits. 3848 */ 3849 key &= ~(((hammer2_key_t)1 << keybits) - 1); 3850 3851 /* 3852 * Ok, create our new indirect block 3853 */ 3854 bzero(&dummy, sizeof(dummy)); 3855 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3856 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3857 dummy.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 3858 } else { 3859 dummy.type = HAMMER2_BREF_TYPE_INDIRECT; 3860 } 3861 dummy.key = key; 3862 dummy.keybits = keybits; 3863 dummy.data_off = hammer2_getradix(nbytes); 3864 dummy.methods = 3865 HAMMER2_ENC_CHECK(HAMMER2_DEC_CHECK(parent->bref.methods)) | 3866 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 3867 3868 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy); 3869 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 3870 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 3871 /* ichain has one ref at this point */ 3872 3873 /* 3874 * We have to mark it modified to allocate its block, but use 3875 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 3876 * it won't be acted upon by the flush code. 3877 * 3878 * XXX remove OPTDATA, we need a fully initialized indirect block to 3879 * be able to move the original blockref. 3880 */ 3881 *errorp = hammer2_chain_modify(ichain, mtid, 0, 0); 3882 if (*errorp) { 3883 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 3884 *errorp, hammer2_error_str(*errorp)); 3885 hammer2_chain_unlock(ichain); 3886 hammer2_chain_drop(ichain); 3887 return NULL; 3888 } 3889 KKASSERT((ichain->flags & HAMMER2_CHAIN_INITIAL) == 0); 3890 3891 /* 3892 * Iterate the original parent and move the matching brefs into 3893 * the new indirect block. 3894 * 3895 * XXX handle flushes. 3896 */ 3897 key_beg = 0; 3898 key_end = HAMMER2_KEY_MAX; 3899 key_next = 0; /* avoid gcc warnings */ 3900 hammer2_spin_ex(&parent->core.spin); 3901 loops = 0; 3902 reason = 0; 3903 3904 for (;;) { 3905 /* 3906 * Parent may have been modified, relocating its block array. 3907 * Reload the base pointer. 3908 */ 3909 base = hammer2_chain_base_and_count(parent, &count); 3910 3911 if (++loops > 100000) { 3912 hammer2_spin_unex(&parent->core.spin); 3913 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 3914 reason, parent, base, count, key_next); 3915 } 3916 3917 /* 3918 * NOTE: spinlock stays intact, returned chain (if not NULL) 3919 * is not referenced or locked which means that we 3920 * cannot safely check its flagged / deletion status 3921 * until we lock it. 3922 */ 3923 chain = hammer2_combined_find(parent, base, count, 3924 &key_next, 3925 key_beg, key_end, 3926 &bref); 3927 generation = parent->core.generation; 3928 if (bref == NULL) 3929 break; 3930 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3931 3932 /* 3933 * Skip keys that are not within the key/radix of the new 3934 * indirect block. They stay in the parent. 3935 */ 3936 if (rounddown2(key ^ bref->key, (hammer2_key_t)1 << keybits) != 0) { 3937 goto next_key_spinlocked; 3938 } 3939 3940 /* 3941 * Load the new indirect block by acquiring the related 3942 * chains (potentially from media as it might not be 3943 * in-memory). Then move it to the new parent (ichain). 3944 * 3945 * chain is referenced but not locked. We must lock the 3946 * chain to obtain definitive state. 3947 */ 3948 bsave = *bref; 3949 if (chain) { 3950 /* 3951 * Use chain already present in the RBTREE 3952 */ 3953 hammer2_chain_ref(chain); 3954 hammer2_spin_unex(&parent->core.spin); 3955 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 3956 } else { 3957 /* 3958 * Get chain for blockref element. _get returns NULL 3959 * on insertion race. 3960 */ 3961 hammer2_spin_unex(&parent->core.spin); 3962 chain = hammer2_chain_get(parent, generation, &bsave, 3963 HAMMER2_RESOLVE_NEVER); 3964 if (chain == NULL) { 3965 reason = 1; 3966 hammer2_spin_ex(&parent->core.spin); 3967 continue; 3968 } 3969 } 3970 3971 /* 3972 * This is always live so if the chain has been deleted 3973 * we raced someone and we have to retry. 3974 * 3975 * NOTE: Lookups can race delete-duplicate because 3976 * delete-duplicate does not lock the parent's core 3977 * (they just use the spinlock on the core). 3978 * 3979 * (note reversed logic for this one) 3980 */ 3981 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 3982 chain->parent != parent || 3983 (chain->flags & HAMMER2_CHAIN_DELETED)) { 3984 hammer2_chain_unlock(chain); 3985 hammer2_chain_drop(chain); 3986 if (hammer2_debug & 0x0040) { 3987 kprintf("LOST PARENT RETRY " 3988 "RETRY (%p,%p)->%p %08x\n", 3989 parent, chain->parent, chain, chain->flags); 3990 } 3991 hammer2_spin_ex(&parent->core.spin); 3992 continue; 3993 } 3994 3995 /* 3996 * Shift the chain to the indirect block. 3997 * 3998 * WARNING! No reason for us to load chain data, pass NOSTATS 3999 * to prevent delete/insert from trying to access 4000 * inode stats (and thus asserting if there is no 4001 * chain->data loaded). 4002 * 4003 * WARNING! The (parent, chain) deletion may modify the parent 4004 * and invalidate the base pointer. 4005 * 4006 * WARNING! Parent must already be marked modified, so we 4007 * can assume that chain_delete always suceeds. 4008 * 4009 * WARNING! hammer2_chain_repchange() does not have to be 4010 * called (and doesn't work anyway because we are 4011 * only doing a partial shift). A recursion that is 4012 * in-progress can continue at the current parent 4013 * and will be able to properly find its next key. 4014 */ 4015 error = hammer2_chain_delete_obref(parent, chain, mtid, 0, 4016 &bsave); 4017 KKASSERT(error == 0); 4018 hammer2_chain_rename_obref(&ichain, chain, mtid, 0, &bsave); 4019 hammer2_chain_unlock(chain); 4020 hammer2_chain_drop(chain); 4021 KKASSERT(parent->refs > 0); 4022 chain = NULL; 4023 base = NULL; /* safety */ 4024 hammer2_spin_ex(&parent->core.spin); 4025 next_key_spinlocked: 4026 if (--maxloops == 0) 4027 panic("hammer2_chain_create_indirect: maxloops"); 4028 reason = 4; 4029 if (key_next == 0 || key_next > key_end) 4030 break; 4031 key_beg = key_next; 4032 /* loop */ 4033 } 4034 hammer2_spin_unex(&parent->core.spin); 4035 4036 /* 4037 * Insert the new indirect block into the parent now that we've 4038 * cleared out some entries in the parent. We calculated a good 4039 * insertion index in the loop above (ichain->index). 4040 * 4041 * We don't have to set UPDATE here because we mark ichain 4042 * modified down below (so the normal modified -> flush -> set-moved 4043 * sequence applies). 4044 * 4045 * The insertion shouldn't race as this is a completely new block 4046 * and the parent is locked. 4047 */ 4048 base = NULL; /* safety, parent modify may change address */ 4049 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 4050 KKASSERT(parent->core.live_count < count); 4051 hammer2_chain_insert(parent, ichain, 4052 HAMMER2_CHAIN_INSERT_SPIN | 4053 HAMMER2_CHAIN_INSERT_LIVE, 4054 0); 4055 4056 /* 4057 * Make sure flushes propogate after our manual insertion. 4058 */ 4059 hammer2_chain_setflush(ichain); 4060 hammer2_chain_setflush(parent); 4061 4062 /* 4063 * Figure out what to return. 4064 */ 4065 if (rounddown2(create_key ^ key, (hammer2_key_t)1 << keybits) != 0) { 4066 /* 4067 * Key being created is outside the key range, 4068 * return the original parent. 4069 */ 4070 hammer2_chain_unlock(ichain); 4071 hammer2_chain_drop(ichain); 4072 } else { 4073 /* 4074 * Otherwise its in the range, return the new parent. 4075 * (leave both the new and old parent locked). 4076 */ 4077 parent = ichain; 4078 } 4079 4080 return(parent); 4081 } 4082 4083 /* 4084 * Do maintenance on an indirect chain. Both parent and chain are locked. 4085 * 4086 * Returns non-zero if (chain) is deleted, either due to being empty or 4087 * because its children were safely moved into the parent. 4088 */ 4089 int 4090 hammer2_chain_indirect_maintenance(hammer2_chain_t *parent, 4091 hammer2_chain_t *chain) 4092 { 4093 hammer2_blockref_t *chain_base; 4094 hammer2_blockref_t *base; 4095 hammer2_blockref_t *bref; 4096 hammer2_blockref_t bsave; 4097 hammer2_key_t key_next; 4098 hammer2_key_t key_beg; 4099 hammer2_key_t key_end; 4100 hammer2_chain_t *sub; 4101 int chain_count; 4102 int count; 4103 int error; 4104 int generation; 4105 4106 /* 4107 * Make sure we have an accurate live_count 4108 */ 4109 if ((chain->flags & (HAMMER2_CHAIN_INITIAL | 4110 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4111 base = &chain->data->npdata[0]; 4112 count = chain->bytes / sizeof(hammer2_blockref_t); 4113 hammer2_chain_countbrefs(chain, base, count); 4114 } 4115 4116 /* 4117 * If the indirect block is empty we can delete it. 4118 * (ignore deletion error) 4119 */ 4120 if (chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)) { 4121 hammer2_chain_delete(parent, chain, 4122 chain->bref.modify_tid, 4123 HAMMER2_DELETE_PERMANENT); 4124 hammer2_chain_repchange(parent, chain); 4125 return 1; 4126 } 4127 4128 base = hammer2_chain_base_and_count(parent, &count); 4129 4130 if ((parent->flags & (HAMMER2_CHAIN_INITIAL | 4131 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4132 hammer2_chain_countbrefs(parent, base, count); 4133 } 4134 4135 /* 4136 * Determine if we can collapse chain into parent, calculate 4137 * hysteresis for chain emptiness. 4138 */ 4139 if (parent->core.live_count + chain->core.live_count - 1 > count) 4140 return 0; 4141 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4142 if (chain->core.live_count > chain_count * 3 / 4) 4143 return 0; 4144 4145 /* 4146 * Ok, theoretically we can collapse chain's contents into 4147 * parent. chain is locked, but any in-memory children of chain 4148 * are not. For this to work, we must be able to dispose of any 4149 * in-memory children of chain. 4150 * 4151 * For now require that there are no in-memory children of chain. 4152 * 4153 * WARNING! Both chain and parent must remain locked across this 4154 * entire operation. 4155 */ 4156 4157 /* 4158 * Parent must be marked modified. Don't try to collapse it if we 4159 * can't mark it modified. Once modified, destroy chain to make room 4160 * and to get rid of what will be a conflicting key (this is included 4161 * in the calculation above). Finally, move the children of chain 4162 * into chain's parent. 4163 * 4164 * This order creates an accounting problem for bref.embed.stats 4165 * because we destroy chain before we remove its children. Any 4166 * elements whos blockref is already synchronized will be counted 4167 * twice. To deal with the problem we clean out chain's stats prior 4168 * to deleting it. 4169 */ 4170 error = hammer2_chain_modify(parent, 0, 0, 0); 4171 if (error) { 4172 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4173 hammer2_error_str(error)); 4174 return 0; 4175 } 4176 error = hammer2_chain_modify(chain, chain->bref.modify_tid, 0, 0); 4177 if (error) { 4178 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4179 hammer2_error_str(error)); 4180 return 0; 4181 } 4182 4183 chain->bref.embed.stats.inode_count = 0; 4184 chain->bref.embed.stats.data_count = 0; 4185 error = hammer2_chain_delete(parent, chain, 4186 chain->bref.modify_tid, 4187 HAMMER2_DELETE_PERMANENT); 4188 KKASSERT(error == 0); 4189 4190 /* 4191 * The combined_find call requires core.spin to be held. One would 4192 * think there wouldn't be any conflicts since we hold chain 4193 * exclusively locked, but the caching mechanism for 0-ref children 4194 * does not require a chain lock. 4195 */ 4196 hammer2_spin_ex(&chain->core.spin); 4197 4198 key_next = 0; 4199 key_beg = 0; 4200 key_end = HAMMER2_KEY_MAX; 4201 for (;;) { 4202 chain_base = &chain->data->npdata[0]; 4203 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4204 sub = hammer2_combined_find(chain, chain_base, chain_count, 4205 &key_next, 4206 key_beg, key_end, 4207 &bref); 4208 generation = chain->core.generation; 4209 if (bref == NULL) 4210 break; 4211 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4212 4213 bsave = *bref; 4214 if (sub) { 4215 hammer2_chain_ref(sub); 4216 hammer2_spin_unex(&chain->core.spin); 4217 hammer2_chain_lock(sub, HAMMER2_RESOLVE_NEVER); 4218 } else { 4219 hammer2_spin_unex(&chain->core.spin); 4220 sub = hammer2_chain_get(chain, generation, &bsave, 4221 HAMMER2_RESOLVE_NEVER); 4222 if (sub == NULL) { 4223 hammer2_spin_ex(&chain->core.spin); 4224 continue; 4225 } 4226 } 4227 if (bcmp(&bsave, &sub->bref, sizeof(bsave)) || 4228 sub->parent != chain || 4229 (sub->flags & HAMMER2_CHAIN_DELETED)) { 4230 hammer2_chain_unlock(sub); 4231 hammer2_chain_drop(sub); 4232 hammer2_spin_ex(&chain->core.spin); 4233 sub = NULL; /* safety */ 4234 continue; 4235 } 4236 error = hammer2_chain_delete_obref(chain, sub, 4237 sub->bref.modify_tid, 0, 4238 &bsave); 4239 KKASSERT(error == 0); 4240 hammer2_chain_rename_obref(&parent, sub, 4241 sub->bref.modify_tid, 4242 HAMMER2_INSERT_SAMEPARENT, &bsave); 4243 hammer2_chain_unlock(sub); 4244 hammer2_chain_drop(sub); 4245 hammer2_spin_ex(&chain->core.spin); 4246 4247 if (key_next == 0) 4248 break; 4249 key_beg = key_next; 4250 } 4251 hammer2_spin_unex(&chain->core.spin); 4252 4253 hammer2_chain_repchange(parent, chain); 4254 4255 return 1; 4256 } 4257 4258 /* 4259 * Freemap indirect blocks 4260 * 4261 * Calculate the keybits and highside/lowside of the freemap node the 4262 * caller is creating. 4263 * 4264 * This routine will specify the next higher-level freemap key/radix 4265 * representing the lowest-ordered set. By doing so, eventually all 4266 * low-ordered sets will be moved one level down. 4267 * 4268 * We have to be careful here because the freemap reserves a limited 4269 * number of blocks for a limited number of levels. So we can't just 4270 * push indiscriminately. 4271 */ 4272 int 4273 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 4274 int keybits, hammer2_blockref_t *base, int count) 4275 { 4276 hammer2_chain_t *chain; 4277 hammer2_blockref_t *bref; 4278 hammer2_key_t key; 4279 hammer2_key_t key_beg; 4280 hammer2_key_t key_end; 4281 hammer2_key_t key_next; 4282 int maxloops = 300000; 4283 4284 key = *keyp; 4285 keybits = 64; 4286 4287 /* 4288 * Calculate the range of keys in the array being careful to skip 4289 * slots which are overridden with a deletion. 4290 */ 4291 key_beg = 0; 4292 key_end = HAMMER2_KEY_MAX; 4293 hammer2_spin_ex(&parent->core.spin); 4294 4295 for (;;) { 4296 if (--maxloops == 0) { 4297 panic("indkey_freemap shit %p %p:%d\n", 4298 parent, base, count); 4299 } 4300 chain = hammer2_combined_find(parent, base, count, 4301 &key_next, 4302 key_beg, key_end, 4303 &bref); 4304 4305 /* 4306 * Exhausted search 4307 */ 4308 if (bref == NULL) 4309 break; 4310 4311 /* 4312 * Skip deleted chains. 4313 */ 4314 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4315 if (key_next == 0 || key_next > key_end) 4316 break; 4317 key_beg = key_next; 4318 continue; 4319 } 4320 4321 /* 4322 * Use the full live (not deleted) element for the scan 4323 * iteration. HAMMER2 does not allow partial replacements. 4324 * 4325 * XXX should be built into hammer2_combined_find(). 4326 */ 4327 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4328 4329 if (keybits > bref->keybits) { 4330 key = bref->key; 4331 keybits = bref->keybits; 4332 } else if (keybits == bref->keybits && bref->key < key) { 4333 key = bref->key; 4334 } 4335 if (key_next == 0) 4336 break; 4337 key_beg = key_next; 4338 } 4339 hammer2_spin_unex(&parent->core.spin); 4340 4341 /* 4342 * Return the keybits for a higher-level FREEMAP_NODE covering 4343 * this node. 4344 */ 4345 switch(keybits) { 4346 case HAMMER2_FREEMAP_LEVEL0_RADIX: 4347 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 4348 break; 4349 case HAMMER2_FREEMAP_LEVEL1_RADIX: 4350 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 4351 break; 4352 case HAMMER2_FREEMAP_LEVEL2_RADIX: 4353 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 4354 break; 4355 case HAMMER2_FREEMAP_LEVEL3_RADIX: 4356 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 4357 break; 4358 case HAMMER2_FREEMAP_LEVEL4_RADIX: 4359 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 4360 break; 4361 case HAMMER2_FREEMAP_LEVEL5_RADIX: 4362 panic("hammer2_chain_indkey_freemap: level too high"); 4363 break; 4364 default: 4365 panic("hammer2_chain_indkey_freemap: bad radix"); 4366 break; 4367 } 4368 *keyp = key; 4369 4370 return (keybits); 4371 } 4372 4373 /* 4374 * File indirect blocks 4375 * 4376 * Calculate the key/keybits for the indirect block to create by scanning 4377 * existing keys. The key being created is also passed in *keyp and can be 4378 * inside or outside the indirect block. Regardless, the indirect block 4379 * must hold at least two keys in order to guarantee sufficient space. 4380 * 4381 * We use a modified version of the freemap's fixed radix tree, but taylored 4382 * for file data. Basically we configure an indirect block encompassing the 4383 * smallest key. 4384 */ 4385 static int 4386 hammer2_chain_indkey_file(hammer2_chain_t *parent, hammer2_key_t *keyp, 4387 int keybits, hammer2_blockref_t *base, int count, 4388 int ncount) 4389 { 4390 hammer2_chain_t *chain; 4391 hammer2_blockref_t *bref; 4392 hammer2_key_t key; 4393 hammer2_key_t key_beg; 4394 hammer2_key_t key_end; 4395 hammer2_key_t key_next; 4396 int nradix; 4397 int maxloops = 300000; 4398 4399 key = *keyp; 4400 keybits = 64; 4401 4402 /* 4403 * Calculate the range of keys in the array being careful to skip 4404 * slots which are overridden with a deletion. 4405 * 4406 * Locate the smallest key. 4407 */ 4408 key_beg = 0; 4409 key_end = HAMMER2_KEY_MAX; 4410 hammer2_spin_ex(&parent->core.spin); 4411 4412 for (;;) { 4413 if (--maxloops == 0) { 4414 panic("indkey_freemap shit %p %p:%d\n", 4415 parent, base, count); 4416 } 4417 chain = hammer2_combined_find(parent, base, count, 4418 &key_next, 4419 key_beg, key_end, 4420 &bref); 4421 4422 /* 4423 * Exhausted search 4424 */ 4425 if (bref == NULL) 4426 break; 4427 4428 /* 4429 * Skip deleted chains. 4430 */ 4431 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4432 if (key_next == 0 || key_next > key_end) 4433 break; 4434 key_beg = key_next; 4435 continue; 4436 } 4437 4438 /* 4439 * Use the full live (not deleted) element for the scan 4440 * iteration. HAMMER2 does not allow partial replacements. 4441 * 4442 * XXX should be built into hammer2_combined_find(). 4443 */ 4444 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4445 4446 if (keybits > bref->keybits) { 4447 key = bref->key; 4448 keybits = bref->keybits; 4449 } else if (keybits == bref->keybits && bref->key < key) { 4450 key = bref->key; 4451 } 4452 if (key_next == 0) 4453 break; 4454 key_beg = key_next; 4455 } 4456 hammer2_spin_unex(&parent->core.spin); 4457 4458 /* 4459 * Calculate the static keybits for a higher-level indirect block 4460 * that contains the key. 4461 */ 4462 *keyp = key; 4463 4464 switch(ncount) { 4465 case HAMMER2_IND_COUNT_MIN: 4466 nradix = HAMMER2_IND_RADIX_MIN - HAMMER2_BLOCKREF_RADIX; 4467 break; 4468 case HAMMER2_IND_COUNT_NOM: 4469 nradix = HAMMER2_IND_RADIX_NOM - HAMMER2_BLOCKREF_RADIX; 4470 break; 4471 case HAMMER2_IND_COUNT_MAX: 4472 nradix = HAMMER2_IND_RADIX_MAX - HAMMER2_BLOCKREF_RADIX; 4473 break; 4474 default: 4475 panic("bad ncount %d\n", ncount); 4476 nradix = 0; 4477 break; 4478 } 4479 4480 /* 4481 * The largest radix that can be returned for an indirect block is 4482 * 63 bits. (The largest practical indirect block radix is actually 4483 * 62 bits because the top-level inode or volume root contains four 4484 * entries, but allow 63 to be returned). 4485 */ 4486 if (nradix >= 64) 4487 nradix = 63; 4488 4489 return keybits + nradix; 4490 } 4491 4492 #if 1 4493 4494 /* 4495 * Directory indirect blocks. 4496 * 4497 * Covers both the inode index (directory of inodes), and directory contents 4498 * (filenames hardlinked to inodes). 4499 * 4500 * Because directory keys are hashed we generally try to cut the space in 4501 * half. We accomodate the inode index (which tends to have linearly 4502 * increasing inode numbers) by ensuring that the keyspace is at least large 4503 * enough to fill up the indirect block being created. 4504 */ 4505 static int 4506 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4507 int keybits, hammer2_blockref_t *base, int count, 4508 int ncount) 4509 { 4510 hammer2_blockref_t *bref; 4511 hammer2_chain_t *chain; 4512 hammer2_key_t key_beg; 4513 hammer2_key_t key_end; 4514 hammer2_key_t key_next; 4515 hammer2_key_t key; 4516 int nkeybits; 4517 int locount; 4518 int hicount; 4519 int maxloops = 300000; 4520 4521 /* 4522 * NOTE: We can't take a shortcut here anymore for inodes because 4523 * the root directory can contain a mix of inodes and directory 4524 * entries (we used to just return 63 if parent->bref.type was 4525 * HAMMER2_BREF_TYPE_INODE. 4526 */ 4527 key = *keyp; 4528 locount = 0; 4529 hicount = 0; 4530 4531 /* 4532 * Calculate the range of keys in the array being careful to skip 4533 * slots which are overridden with a deletion. 4534 */ 4535 key_beg = 0; 4536 key_end = HAMMER2_KEY_MAX; 4537 hammer2_spin_ex(&parent->core.spin); 4538 4539 for (;;) { 4540 if (--maxloops == 0) { 4541 panic("indkey_freemap shit %p %p:%d\n", 4542 parent, base, count); 4543 } 4544 chain = hammer2_combined_find(parent, base, count, 4545 &key_next, 4546 key_beg, key_end, 4547 &bref); 4548 4549 /* 4550 * Exhausted search 4551 */ 4552 if (bref == NULL) 4553 break; 4554 4555 /* 4556 * Deleted object 4557 */ 4558 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4559 if (key_next == 0 || key_next > key_end) 4560 break; 4561 key_beg = key_next; 4562 continue; 4563 } 4564 4565 /* 4566 * Use the full live (not deleted) element for the scan 4567 * iteration. HAMMER2 does not allow partial replacements. 4568 * 4569 * XXX should be built into hammer2_combined_find(). 4570 */ 4571 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4572 4573 /* 4574 * Expand our calculated key range (key, keybits) to fit 4575 * the scanned key. nkeybits represents the full range 4576 * that we will later cut in half (two halves @ nkeybits - 1). 4577 */ 4578 nkeybits = keybits; 4579 if (nkeybits < bref->keybits) { 4580 if (bref->keybits > 64) { 4581 kprintf("bad bref chain %p bref %p\n", 4582 chain, bref); 4583 Debugger("fubar"); 4584 } 4585 nkeybits = bref->keybits; 4586 } 4587 while (nkeybits < 64 && 4588 rounddown2(key ^ bref->key, (hammer2_key_t)1 << nkeybits) != 0) { 4589 ++nkeybits; 4590 } 4591 4592 /* 4593 * If the new key range is larger we have to determine 4594 * which side of the new key range the existing keys fall 4595 * under by checking the high bit, then collapsing the 4596 * locount into the hicount or vise-versa. 4597 */ 4598 if (keybits != nkeybits) { 4599 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4600 hicount += locount; 4601 locount = 0; 4602 } else { 4603 locount += hicount; 4604 hicount = 0; 4605 } 4606 keybits = nkeybits; 4607 } 4608 4609 /* 4610 * The newly scanned key will be in the lower half or the 4611 * upper half of the (new) key range. 4612 */ 4613 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4614 ++hicount; 4615 else 4616 ++locount; 4617 4618 if (key_next == 0) 4619 break; 4620 key_beg = key_next; 4621 } 4622 hammer2_spin_unex(&parent->core.spin); 4623 bref = NULL; /* now invalid (safety) */ 4624 4625 /* 4626 * Adjust keybits to represent half of the full range calculated 4627 * above (radix 63 max) for our new indirect block. 4628 */ 4629 --keybits; 4630 4631 /* 4632 * Expand keybits to hold at least ncount elements. ncount will be 4633 * a power of 2. This is to try to completely fill leaf nodes (at 4634 * least for keys which are not hashes). 4635 * 4636 * We aren't counting 'in' or 'out', we are counting 'high side' 4637 * and 'low side' based on the bit at (1LL << keybits). We want 4638 * everything to be inside in these cases so shift it all to 4639 * the low or high side depending on the new high bit. 4640 */ 4641 while (((hammer2_key_t)1 << keybits) < ncount) { 4642 ++keybits; 4643 if (key & ((hammer2_key_t)1 << keybits)) { 4644 hicount += locount; 4645 locount = 0; 4646 } else { 4647 locount += hicount; 4648 hicount = 0; 4649 } 4650 } 4651 4652 if (hicount > locount) 4653 key |= (hammer2_key_t)1 << keybits; 4654 else 4655 key &= ~(hammer2_key_t)1 << keybits; 4656 4657 *keyp = key; 4658 4659 return (keybits); 4660 } 4661 4662 #else 4663 4664 /* 4665 * Directory indirect blocks. 4666 * 4667 * Covers both the inode index (directory of inodes), and directory contents 4668 * (filenames hardlinked to inodes). 4669 * 4670 * Because directory keys are hashed we generally try to cut the space in 4671 * half. We accomodate the inode index (which tends to have linearly 4672 * increasing inode numbers) by ensuring that the keyspace is at least large 4673 * enough to fill up the indirect block being created. 4674 */ 4675 static int 4676 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4677 int keybits, hammer2_blockref_t *base, int count, 4678 int ncount) 4679 { 4680 hammer2_blockref_t *bref; 4681 hammer2_chain_t *chain; 4682 hammer2_key_t key_beg; 4683 hammer2_key_t key_end; 4684 hammer2_key_t key_next; 4685 hammer2_key_t key; 4686 int nkeybits; 4687 int locount; 4688 int hicount; 4689 int maxloops = 300000; 4690 4691 /* 4692 * Shortcut if the parent is the inode. In this situation the 4693 * parent has 4+1 directory entries and we are creating an indirect 4694 * block capable of holding many more. 4695 */ 4696 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4697 return 63; 4698 } 4699 4700 key = *keyp; 4701 locount = 0; 4702 hicount = 0; 4703 4704 /* 4705 * Calculate the range of keys in the array being careful to skip 4706 * slots which are overridden with a deletion. 4707 */ 4708 key_beg = 0; 4709 key_end = HAMMER2_KEY_MAX; 4710 hammer2_spin_ex(&parent->core.spin); 4711 4712 for (;;) { 4713 if (--maxloops == 0) { 4714 panic("indkey_freemap shit %p %p:%d\n", 4715 parent, base, count); 4716 } 4717 chain = hammer2_combined_find(parent, base, count, 4718 &key_next, 4719 key_beg, key_end, 4720 &bref); 4721 4722 /* 4723 * Exhausted search 4724 */ 4725 if (bref == NULL) 4726 break; 4727 4728 /* 4729 * Deleted object 4730 */ 4731 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4732 if (key_next == 0 || key_next > key_end) 4733 break; 4734 key_beg = key_next; 4735 continue; 4736 } 4737 4738 /* 4739 * Use the full live (not deleted) element for the scan 4740 * iteration. HAMMER2 does not allow partial replacements. 4741 * 4742 * XXX should be built into hammer2_combined_find(). 4743 */ 4744 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4745 4746 /* 4747 * Expand our calculated key range (key, keybits) to fit 4748 * the scanned key. nkeybits represents the full range 4749 * that we will later cut in half (two halves @ nkeybits - 1). 4750 */ 4751 nkeybits = keybits; 4752 if (nkeybits < bref->keybits) { 4753 if (bref->keybits > 64) { 4754 kprintf("bad bref chain %p bref %p\n", 4755 chain, bref); 4756 Debugger("fubar"); 4757 } 4758 nkeybits = bref->keybits; 4759 } 4760 while (nkeybits < 64 && 4761 (~(((hammer2_key_t)1 << nkeybits) - 1) & 4762 (key ^ bref->key)) != 0) { 4763 ++nkeybits; 4764 } 4765 4766 /* 4767 * If the new key range is larger we have to determine 4768 * which side of the new key range the existing keys fall 4769 * under by checking the high bit, then collapsing the 4770 * locount into the hicount or vise-versa. 4771 */ 4772 if (keybits != nkeybits) { 4773 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4774 hicount += locount; 4775 locount = 0; 4776 } else { 4777 locount += hicount; 4778 hicount = 0; 4779 } 4780 keybits = nkeybits; 4781 } 4782 4783 /* 4784 * The newly scanned key will be in the lower half or the 4785 * upper half of the (new) key range. 4786 */ 4787 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4788 ++hicount; 4789 else 4790 ++locount; 4791 4792 if (key_next == 0) 4793 break; 4794 key_beg = key_next; 4795 } 4796 hammer2_spin_unex(&parent->core.spin); 4797 bref = NULL; /* now invalid (safety) */ 4798 4799 /* 4800 * Adjust keybits to represent half of the full range calculated 4801 * above (radix 63 max) for our new indirect block. 4802 */ 4803 --keybits; 4804 4805 /* 4806 * Expand keybits to hold at least ncount elements. ncount will be 4807 * a power of 2. This is to try to completely fill leaf nodes (at 4808 * least for keys which are not hashes). 4809 * 4810 * We aren't counting 'in' or 'out', we are counting 'high side' 4811 * and 'low side' based on the bit at (1LL << keybits). We want 4812 * everything to be inside in these cases so shift it all to 4813 * the low or high side depending on the new high bit. 4814 */ 4815 while (((hammer2_key_t)1 << keybits) < ncount) { 4816 ++keybits; 4817 if (key & ((hammer2_key_t)1 << keybits)) { 4818 hicount += locount; 4819 locount = 0; 4820 } else { 4821 locount += hicount; 4822 hicount = 0; 4823 } 4824 } 4825 4826 if (hicount > locount) 4827 key |= (hammer2_key_t)1 << keybits; 4828 else 4829 key &= ~(hammer2_key_t)1 << keybits; 4830 4831 *keyp = key; 4832 4833 return (keybits); 4834 } 4835 4836 #endif 4837 4838 /* 4839 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 4840 * it exists. 4841 * 4842 * Both parent and chain must be locked exclusively. 4843 * 4844 * This function will modify the parent if the blockref requires removal 4845 * from the parent's block table. 4846 * 4847 * This function is NOT recursive. Any entity already pushed into the 4848 * chain (such as an inode) may still need visibility into its contents, 4849 * as well as the ability to read and modify the contents. For example, 4850 * for an unlinked file which is still open. 4851 * 4852 * Also note that the flusher is responsible for cleaning up empty 4853 * indirect blocks. 4854 */ 4855 int 4856 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 4857 hammer2_tid_t mtid, int flags) 4858 { 4859 int error = 0; 4860 4861 KKASSERT(hammer2_mtx_owned(&chain->lock)); 4862 4863 /* 4864 * Nothing to do if already marked. 4865 * 4866 * We need the spinlock on the core whos RBTREE contains chain 4867 * to protect against races. 4868 */ 4869 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 4870 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 4871 chain->parent == parent); 4872 error = _hammer2_chain_delete_helper(parent, chain, 4873 mtid, flags, NULL); 4874 } 4875 4876 /* 4877 * Permanent deletions mark the chain as destroyed. 4878 * 4879 * NOTE: We do not setflush the chain unless the deletion is 4880 * permanent, since the deletion of a chain does not actually 4881 * require it to be flushed. 4882 */ 4883 if (error == 0) { 4884 if (flags & HAMMER2_DELETE_PERMANENT) { 4885 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 4886 hammer2_chain_setflush(chain); 4887 } 4888 } 4889 4890 return error; 4891 } 4892 4893 static int 4894 hammer2_chain_delete_obref(hammer2_chain_t *parent, hammer2_chain_t *chain, 4895 hammer2_tid_t mtid, int flags, 4896 hammer2_blockref_t *obref) 4897 { 4898 int error = 0; 4899 4900 KKASSERT(hammer2_mtx_owned(&chain->lock)); 4901 4902 /* 4903 * Nothing to do if already marked. 4904 * 4905 * We need the spinlock on the core whos RBTREE contains chain 4906 * to protect against races. 4907 */ 4908 obref->type = HAMMER2_BREF_TYPE_EMPTY; 4909 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 4910 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 4911 chain->parent == parent); 4912 error = _hammer2_chain_delete_helper(parent, chain, 4913 mtid, flags, obref); 4914 } 4915 4916 /* 4917 * Permanent deletions mark the chain as destroyed. 4918 * 4919 * NOTE: We do not setflush the chain unless the deletion is 4920 * permanent, since the deletion of a chain does not actually 4921 * require it to be flushed. 4922 */ 4923 if (error == 0) { 4924 if (flags & HAMMER2_DELETE_PERMANENT) { 4925 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 4926 hammer2_chain_setflush(chain); 4927 } 4928 } 4929 4930 return error; 4931 } 4932 4933 /* 4934 * Returns the index of the nearest element in the blockref array >= elm. 4935 * Returns (count) if no element could be found. 4936 * 4937 * Sets *key_nextp to the next key for loop purposes but does not modify 4938 * it if the next key would be higher than the current value of *key_nextp. 4939 * Note that *key_nexp can overflow to 0, which should be tested by the 4940 * caller. 4941 * 4942 * WARNING! Must be called with parent's spinlock held. Spinlock remains 4943 * held through the operation. 4944 */ 4945 static int 4946 hammer2_base_find(hammer2_chain_t *parent, 4947 hammer2_blockref_t *base, int count, 4948 hammer2_key_t *key_nextp, 4949 hammer2_key_t key_beg, hammer2_key_t key_end) 4950 { 4951 hammer2_blockref_t *scan; 4952 hammer2_key_t scan_end; 4953 int i; 4954 int limit; 4955 4956 /* 4957 * Require the live chain's already have their core's counted 4958 * so we can optimize operations. 4959 */ 4960 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 4961 4962 /* 4963 * Degenerate case 4964 */ 4965 if (count == 0 || base == NULL) 4966 return(count); 4967 4968 /* 4969 * Sequential optimization using parent->cache_index. This is 4970 * the most likely scenario. 4971 * 4972 * We can avoid trailing empty entries on live chains, otherwise 4973 * we might have to check the whole block array. 4974 */ 4975 i = parent->cache_index; /* SMP RACE OK */ 4976 cpu_ccfence(); 4977 limit = parent->core.live_zero; 4978 if (i >= limit) 4979 i = limit - 1; 4980 if (i < 0) 4981 i = 0; 4982 KKASSERT(i < count); 4983 4984 /* 4985 * Search backwards 4986 */ 4987 scan = &base[i]; 4988 while (i > 0 && (scan->type == HAMMER2_BREF_TYPE_EMPTY || 4989 scan->key > key_beg)) { 4990 --scan; 4991 --i; 4992 } 4993 parent->cache_index = i; 4994 4995 /* 4996 * Search forwards, stop when we find a scan element which 4997 * encloses the key or until we know that there are no further 4998 * elements. 4999 */ 5000 while (i < count) { 5001 if (scan->type != HAMMER2_BREF_TYPE_EMPTY) { 5002 scan_end = scan->key + 5003 ((hammer2_key_t)1 << scan->keybits) - 1; 5004 if (scan->key > key_beg || scan_end >= key_beg) 5005 break; 5006 } 5007 if (i >= limit) 5008 return (count); 5009 ++scan; 5010 ++i; 5011 } 5012 if (i != count) { 5013 parent->cache_index = i; 5014 if (i >= limit) { 5015 i = count; 5016 } else { 5017 scan_end = scan->key + 5018 ((hammer2_key_t)1 << scan->keybits); 5019 if (scan_end && (*key_nextp > scan_end || 5020 *key_nextp == 0)) { 5021 *key_nextp = scan_end; 5022 } 5023 } 5024 } 5025 return (i); 5026 } 5027 5028 /* 5029 * Do a combined search and return the next match either from the blockref 5030 * array or from the in-memory chain. Sets *brefp to the returned bref in 5031 * both cases, or sets it to NULL if the search exhausted. Only returns 5032 * a non-NULL chain if the search matched from the in-memory chain. 5033 * 5034 * When no in-memory chain has been found and a non-NULL bref is returned 5035 * in *brefp. 5036 * 5037 * 5038 * The returned chain is not locked or referenced. Use the returned bref 5039 * to determine if the search exhausted or not. Iterate if the base find 5040 * is chosen but matches a deleted chain. 5041 * 5042 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5043 * held through the operation. 5044 */ 5045 static hammer2_chain_t * 5046 hammer2_combined_find(hammer2_chain_t *parent, 5047 hammer2_blockref_t *base, int count, 5048 hammer2_key_t *key_nextp, 5049 hammer2_key_t key_beg, hammer2_key_t key_end, 5050 hammer2_blockref_t **brefp) 5051 { 5052 hammer2_blockref_t *bref; 5053 hammer2_chain_t *chain; 5054 int i; 5055 5056 /* 5057 * Lookup in block array and in rbtree. 5058 */ 5059 *key_nextp = key_end + 1; 5060 i = hammer2_base_find(parent, base, count, key_nextp, 5061 key_beg, key_end); 5062 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 5063 5064 /* 5065 * Neither matched 5066 */ 5067 if (i == count && chain == NULL) { 5068 *brefp = NULL; 5069 return(NULL); 5070 } 5071 5072 /* 5073 * Only chain matched. 5074 */ 5075 if (i == count) { 5076 bref = &chain->bref; 5077 goto found; 5078 } 5079 5080 /* 5081 * Only blockref matched. 5082 */ 5083 if (chain == NULL) { 5084 bref = &base[i]; 5085 goto found; 5086 } 5087 5088 /* 5089 * Both in-memory and blockref matched, select the nearer element. 5090 * 5091 * If both are flush with the left-hand side or both are the 5092 * same distance away, select the chain. In this situation the 5093 * chain must have been loaded from the matching blockmap. 5094 */ 5095 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 5096 chain->bref.key == base[i].key) { 5097 KKASSERT(chain->bref.key == base[i].key); 5098 bref = &chain->bref; 5099 goto found; 5100 } 5101 5102 /* 5103 * Select the nearer key 5104 */ 5105 if (chain->bref.key < base[i].key) { 5106 bref = &chain->bref; 5107 } else { 5108 bref = &base[i]; 5109 chain = NULL; 5110 } 5111 5112 /* 5113 * If the bref is out of bounds we've exhausted our search. 5114 */ 5115 found: 5116 if (bref->key > key_end) { 5117 *brefp = NULL; 5118 chain = NULL; 5119 } else { 5120 *brefp = bref; 5121 } 5122 return(chain); 5123 } 5124 5125 /* 5126 * Locate the specified block array element and delete it. The element 5127 * must exist. 5128 * 5129 * The spin lock on the related chain must be held. 5130 * 5131 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5132 * need to be adjusted when we commit the media change. 5133 */ 5134 void 5135 hammer2_base_delete(hammer2_chain_t *parent, 5136 hammer2_blockref_t *base, int count, 5137 hammer2_chain_t *chain, 5138 hammer2_blockref_t *obref) 5139 { 5140 hammer2_blockref_t *elm = &chain->bref; 5141 hammer2_blockref_t *scan; 5142 hammer2_key_t key_next; 5143 int i; 5144 5145 /* 5146 * Delete element. Expect the element to exist. 5147 * 5148 * XXX see caller, flush code not yet sophisticated enough to prevent 5149 * re-flushed in some cases. 5150 */ 5151 key_next = 0; /* max range */ 5152 i = hammer2_base_find(parent, base, count, &key_next, 5153 elm->key, elm->key); 5154 scan = &base[i]; 5155 if (i == count || scan->type == HAMMER2_BREF_TYPE_EMPTY || 5156 scan->key != elm->key || 5157 ((chain->flags & HAMMER2_CHAIN_BLKMAPUPD) == 0 && 5158 scan->keybits != elm->keybits)) { 5159 hammer2_spin_unex(&parent->core.spin); 5160 panic("delete base %p element not found at %d/%d elm %p\n", 5161 base, i, count, elm); 5162 return; 5163 } 5164 5165 /* 5166 * Update stats and zero the entry. 5167 * 5168 * NOTE: Handle radix == 0 (0 bytes) case. 5169 */ 5170 if ((int)(scan->data_off & HAMMER2_OFF_MASK_RADIX)) { 5171 parent->bref.embed.stats.data_count -= (hammer2_off_t)1 << 5172 (int)(scan->data_off & HAMMER2_OFF_MASK_RADIX); 5173 } 5174 switch(scan->type) { 5175 case HAMMER2_BREF_TYPE_INODE: 5176 --parent->bref.embed.stats.inode_count; 5177 /* fall through */ 5178 case HAMMER2_BREF_TYPE_DATA: 5179 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5180 atomic_set_int(&chain->flags, 5181 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5182 } else { 5183 if (parent->bref.leaf_count) 5184 --parent->bref.leaf_count; 5185 } 5186 /* fall through */ 5187 case HAMMER2_BREF_TYPE_INDIRECT: 5188 if (scan->type != HAMMER2_BREF_TYPE_DATA) { 5189 parent->bref.embed.stats.data_count -= 5190 scan->embed.stats.data_count; 5191 parent->bref.embed.stats.inode_count -= 5192 scan->embed.stats.inode_count; 5193 } 5194 if (scan->type == HAMMER2_BREF_TYPE_INODE) 5195 break; 5196 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5197 atomic_set_int(&chain->flags, 5198 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5199 } else { 5200 if (parent->bref.leaf_count <= scan->leaf_count) 5201 parent->bref.leaf_count = 0; 5202 else 5203 parent->bref.leaf_count -= scan->leaf_count; 5204 } 5205 break; 5206 case HAMMER2_BREF_TYPE_DIRENT: 5207 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5208 atomic_set_int(&chain->flags, 5209 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5210 } else { 5211 if (parent->bref.leaf_count) 5212 --parent->bref.leaf_count; 5213 } 5214 default: 5215 break; 5216 } 5217 5218 if (obref) 5219 *obref = *scan; 5220 bzero(scan, sizeof(*scan)); 5221 5222 /* 5223 * We can only optimize parent->core.live_zero for live chains. 5224 */ 5225 if (parent->core.live_zero == i + 1) { 5226 while (--i >= 0 && base[i].type == HAMMER2_BREF_TYPE_EMPTY) 5227 ; 5228 parent->core.live_zero = i + 1; 5229 } 5230 5231 /* 5232 * Clear appropriate blockmap flags in chain. 5233 */ 5234 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED | 5235 HAMMER2_CHAIN_BLKMAPUPD); 5236 } 5237 5238 /* 5239 * Insert the specified element. The block array must not already have the 5240 * element and must have space available for the insertion. 5241 * 5242 * The spin lock on the related chain must be held. 5243 * 5244 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5245 * need to be adjusted when we commit the media change. 5246 */ 5247 void 5248 hammer2_base_insert(hammer2_chain_t *parent, 5249 hammer2_blockref_t *base, int count, 5250 hammer2_chain_t *chain, hammer2_blockref_t *elm) 5251 { 5252 hammer2_key_t key_next; 5253 hammer2_key_t xkey; 5254 int i; 5255 int j; 5256 int k; 5257 int l; 5258 int u = 1; 5259 5260 /* 5261 * Insert new element. Expect the element to not already exist 5262 * unless we are replacing it. 5263 * 5264 * XXX see caller, flush code not yet sophisticated enough to prevent 5265 * re-flushed in some cases. 5266 */ 5267 key_next = 0; /* max range */ 5268 i = hammer2_base_find(parent, base, count, &key_next, 5269 elm->key, elm->key); 5270 5271 /* 5272 * Shortcut fill optimization, typical ordered insertion(s) may not 5273 * require a search. 5274 */ 5275 KKASSERT(i >= 0 && i <= count); 5276 5277 /* 5278 * Set appropriate blockmap flags in chain (if not NULL) 5279 */ 5280 if (chain) 5281 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED); 5282 5283 /* 5284 * Update stats and zero the entry 5285 */ 5286 if ((int)(elm->data_off & HAMMER2_OFF_MASK_RADIX)) { 5287 parent->bref.embed.stats.data_count += (hammer2_off_t)1 << 5288 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 5289 } 5290 switch(elm->type) { 5291 case HAMMER2_BREF_TYPE_INODE: 5292 ++parent->bref.embed.stats.inode_count; 5293 /* fall through */ 5294 case HAMMER2_BREF_TYPE_DATA: 5295 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5296 ++parent->bref.leaf_count; 5297 /* fall through */ 5298 case HAMMER2_BREF_TYPE_INDIRECT: 5299 if (elm->type != HAMMER2_BREF_TYPE_DATA) { 5300 parent->bref.embed.stats.data_count += 5301 elm->embed.stats.data_count; 5302 parent->bref.embed.stats.inode_count += 5303 elm->embed.stats.inode_count; 5304 } 5305 if (elm->type == HAMMER2_BREF_TYPE_INODE) 5306 break; 5307 if (parent->bref.leaf_count + elm->leaf_count < 5308 HAMMER2_BLOCKREF_LEAF_MAX) { 5309 parent->bref.leaf_count += elm->leaf_count; 5310 } else { 5311 parent->bref.leaf_count = HAMMER2_BLOCKREF_LEAF_MAX; 5312 } 5313 break; 5314 case HAMMER2_BREF_TYPE_DIRENT: 5315 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5316 ++parent->bref.leaf_count; 5317 break; 5318 default: 5319 break; 5320 } 5321 5322 5323 /* 5324 * We can only optimize parent->core.live_zero for live chains. 5325 */ 5326 if (i == count && parent->core.live_zero < count) { 5327 i = parent->core.live_zero++; 5328 base[i] = *elm; 5329 return; 5330 } 5331 5332 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 5333 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 5334 hammer2_spin_unex(&parent->core.spin); 5335 panic("insert base %p overlapping elements at %d elm %p\n", 5336 base, i, elm); 5337 } 5338 5339 /* 5340 * Try to find an empty slot before or after. 5341 */ 5342 j = i; 5343 k = i; 5344 while (j > 0 || k < count) { 5345 --j; 5346 if (j >= 0 && base[j].type == HAMMER2_BREF_TYPE_EMPTY) { 5347 if (j == i - 1) { 5348 base[j] = *elm; 5349 } else { 5350 bcopy(&base[j+1], &base[j], 5351 (i - j - 1) * sizeof(*base)); 5352 base[i - 1] = *elm; 5353 } 5354 goto validate; 5355 } 5356 ++k; 5357 if (k < count && base[k].type == HAMMER2_BREF_TYPE_EMPTY) { 5358 bcopy(&base[i], &base[i+1], 5359 (k - i) * sizeof(hammer2_blockref_t)); 5360 base[i] = *elm; 5361 5362 /* 5363 * We can only update parent->core.live_zero for live 5364 * chains. 5365 */ 5366 if (parent->core.live_zero <= k) 5367 parent->core.live_zero = k + 1; 5368 u = 2; 5369 goto validate; 5370 } 5371 } 5372 panic("hammer2_base_insert: no room!"); 5373 5374 /* 5375 * Debugging 5376 */ 5377 validate: 5378 key_next = 0; 5379 for (l = 0; l < count; ++l) { 5380 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5381 key_next = base[l].key + 5382 ((hammer2_key_t)1 << base[l].keybits) - 1; 5383 break; 5384 } 5385 } 5386 while (++l < count) { 5387 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5388 if (base[l].key <= key_next) 5389 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 5390 key_next = base[l].key + 5391 ((hammer2_key_t)1 << base[l].keybits) - 1; 5392 5393 } 5394 } 5395 5396 } 5397 5398 #if 0 5399 5400 /* 5401 * Sort the blockref array for the chain. Used by the flush code to 5402 * sort the blockref[] array. 5403 * 5404 * The chain must be exclusively locked AND spin-locked. 5405 */ 5406 typedef hammer2_blockref_t *hammer2_blockref_p; 5407 5408 static 5409 int 5410 hammer2_base_sort_callback(const void *v1, const void *v2) 5411 { 5412 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 5413 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 5414 5415 /* 5416 * Make sure empty elements are placed at the end of the array 5417 */ 5418 if (bref1->type == HAMMER2_BREF_TYPE_EMPTY) { 5419 if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) 5420 return(0); 5421 return(1); 5422 } else if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) { 5423 return(-1); 5424 } 5425 5426 /* 5427 * Sort by key 5428 */ 5429 if (bref1->key < bref2->key) 5430 return(-1); 5431 if (bref1->key > bref2->key) 5432 return(1); 5433 return(0); 5434 } 5435 5436 void 5437 hammer2_base_sort(hammer2_chain_t *chain) 5438 { 5439 hammer2_blockref_t *base; 5440 int count; 5441 5442 switch(chain->bref.type) { 5443 case HAMMER2_BREF_TYPE_INODE: 5444 /* 5445 * Special shortcut for embedded data returns the inode 5446 * itself. Callers must detect this condition and access 5447 * the embedded data (the strategy code does this for us). 5448 * 5449 * This is only applicable to regular files and softlinks. 5450 */ 5451 if (chain->data->ipdata.meta.op_flags & 5452 HAMMER2_OPFLAG_DIRECTDATA) { 5453 return; 5454 } 5455 base = &chain->data->ipdata.u.blockset.blockref[0]; 5456 count = HAMMER2_SET_COUNT; 5457 break; 5458 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 5459 case HAMMER2_BREF_TYPE_INDIRECT: 5460 /* 5461 * Optimize indirect blocks in the INITIAL state to avoid 5462 * I/O. 5463 */ 5464 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 5465 base = &chain->data->npdata[0]; 5466 count = chain->bytes / sizeof(hammer2_blockref_t); 5467 break; 5468 case HAMMER2_BREF_TYPE_VOLUME: 5469 base = &chain->data->voldata.sroot_blockset.blockref[0]; 5470 count = HAMMER2_SET_COUNT; 5471 break; 5472 case HAMMER2_BREF_TYPE_FREEMAP: 5473 base = &chain->data->blkset.blockref[0]; 5474 count = HAMMER2_SET_COUNT; 5475 break; 5476 default: 5477 panic("hammer2_base_sort: unrecognized " 5478 "blockref(A) type: %d", 5479 chain->bref.type); 5480 base = NULL; /* safety */ 5481 count = 0; /* safety */ 5482 break; 5483 } 5484 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 5485 } 5486 5487 #endif 5488 5489 /* 5490 * Set the check data for a chain. This can be a heavy-weight operation 5491 * and typically only runs on-flush. For file data check data is calculated 5492 * when the logical buffers are flushed. 5493 */ 5494 void 5495 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 5496 { 5497 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 5498 5499 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5500 case HAMMER2_CHECK_NONE: 5501 break; 5502 case HAMMER2_CHECK_DISABLED: 5503 break; 5504 case HAMMER2_CHECK_ISCSI32: 5505 chain->bref.check.iscsi32.value = 5506 hammer2_icrc32(bdata, chain->bytes); 5507 break; 5508 case HAMMER2_CHECK_XXHASH64: 5509 chain->bref.check.xxhash64.value = 5510 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5511 break; 5512 case HAMMER2_CHECK_SHA192: 5513 { 5514 SHA256_CTX hash_ctx; 5515 union { 5516 uint8_t digest[SHA256_DIGEST_LENGTH]; 5517 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5518 } u; 5519 5520 SHA256_Init(&hash_ctx); 5521 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5522 SHA256_Final(u.digest, &hash_ctx); 5523 u.digest64[2] ^= u.digest64[3]; 5524 bcopy(u.digest, 5525 chain->bref.check.sha192.data, 5526 sizeof(chain->bref.check.sha192.data)); 5527 } 5528 break; 5529 case HAMMER2_CHECK_FREEMAP: 5530 chain->bref.check.freemap.icrc32 = 5531 hammer2_icrc32(bdata, chain->bytes); 5532 break; 5533 default: 5534 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5535 chain->bref.methods); 5536 break; 5537 } 5538 } 5539 5540 /* 5541 * Characterize a failed check code and try to trace back to the inode. 5542 */ 5543 static void 5544 hammer2_characterize_failed_chain(hammer2_chain_t *chain, uint64_t check, 5545 int bits) 5546 { 5547 hammer2_chain_t *lchain; 5548 hammer2_chain_t *ochain; 5549 int did; 5550 5551 did = krateprintf(&krate_h2chk, 5552 "chain %016jx.%02x (%s) meth=%02x CHECK FAIL " 5553 "(flags=%08x, bref/data ", 5554 chain->bref.data_off, 5555 chain->bref.type, 5556 hammer2_bref_type_str(chain->bref.type), 5557 chain->bref.methods, 5558 chain->flags); 5559 if (did == 0) 5560 return; 5561 5562 if (bits == 32) { 5563 kprintf("%08x/%08x)\n", 5564 chain->bref.check.iscsi32.value, 5565 (uint32_t)check); 5566 } else { 5567 kprintf("%016jx/%016jx)\n", 5568 chain->bref.check.xxhash64.value, 5569 check); 5570 } 5571 5572 /* 5573 * Run up the chains to try to find the governing inode so we 5574 * can report it. 5575 * 5576 * XXX This error reporting is not really MPSAFE 5577 */ 5578 ochain = chain; 5579 lchain = chain; 5580 while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE) { 5581 lchain = chain; 5582 chain = chain->parent; 5583 } 5584 5585 if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5586 ((chain->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) == 0 || 5587 (lchain->bref.key & HAMMER2_DIRHASH_VISIBLE))) { 5588 kprintf(" Resides at/in inode %ld\n", 5589 (long)chain->bref.key); 5590 } else if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5591 kprintf(" Resides in inode index - CRITICAL!!!\n"); 5592 } else { 5593 kprintf(" Resides in root index - CRITICAL!!!\n"); 5594 } 5595 if (ochain->hmp) { 5596 const char *pfsname = "UNKNOWN"; 5597 int i; 5598 5599 if (ochain->pmp) { 5600 for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) { 5601 if (ochain->pmp->pfs_hmps[i] == ochain->hmp && 5602 ochain->pmp->pfs_names[i]) { 5603 pfsname = ochain->pmp->pfs_names[i]; 5604 break; 5605 } 5606 } 5607 } 5608 kprintf(" In pfs %s on device %s\n", 5609 pfsname, ochain->hmp->devrepname); 5610 } 5611 } 5612 5613 /* 5614 * Returns non-zero on success, 0 on failure. 5615 */ 5616 int 5617 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 5618 { 5619 uint32_t check32; 5620 uint64_t check64; 5621 int r; 5622 5623 if (chain->flags & HAMMER2_CHAIN_NOTTESTED) 5624 return 1; 5625 5626 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5627 case HAMMER2_CHECK_NONE: 5628 r = 1; 5629 break; 5630 case HAMMER2_CHECK_DISABLED: 5631 r = 1; 5632 break; 5633 case HAMMER2_CHECK_ISCSI32: 5634 check32 = hammer2_icrc32(bdata, chain->bytes); 5635 r = (chain->bref.check.iscsi32.value == check32); 5636 if (r == 0) { 5637 hammer2_characterize_failed_chain(chain, check32, 32); 5638 } 5639 hammer2_process_icrc32 += chain->bytes; 5640 break; 5641 case HAMMER2_CHECK_XXHASH64: 5642 check64 = XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5643 r = (chain->bref.check.xxhash64.value == check64); 5644 if (r == 0) { 5645 hammer2_characterize_failed_chain(chain, check64, 64); 5646 } 5647 hammer2_process_xxhash64 += chain->bytes; 5648 break; 5649 case HAMMER2_CHECK_SHA192: 5650 { 5651 SHA256_CTX hash_ctx; 5652 union { 5653 uint8_t digest[SHA256_DIGEST_LENGTH]; 5654 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5655 } u; 5656 5657 SHA256_Init(&hash_ctx); 5658 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5659 SHA256_Final(u.digest, &hash_ctx); 5660 u.digest64[2] ^= u.digest64[3]; 5661 if (bcmp(u.digest, 5662 chain->bref.check.sha192.data, 5663 sizeof(chain->bref.check.sha192.data)) == 0) { 5664 r = 1; 5665 } else { 5666 r = 0; 5667 krateprintf(&krate_h2chk, 5668 "chain %016jx.%02x meth=%02x " 5669 "CHECK FAIL\n", 5670 chain->bref.data_off, 5671 chain->bref.type, 5672 chain->bref.methods); 5673 } 5674 } 5675 break; 5676 case HAMMER2_CHECK_FREEMAP: 5677 r = (chain->bref.check.freemap.icrc32 == 5678 hammer2_icrc32(bdata, chain->bytes)); 5679 if (r == 0) { 5680 int did; 5681 5682 did = krateprintf(&krate_h2chk, 5683 "chain %016jx.%02x meth=%02x " 5684 "CHECK FAIL\n", 5685 chain->bref.data_off, 5686 chain->bref.type, 5687 chain->bref.methods); 5688 if (did) { 5689 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 5690 chain->bref.check.freemap.icrc32, 5691 hammer2_icrc32(bdata, chain->bytes), 5692 chain->bytes); 5693 if (chain->dio) { 5694 kprintf("dio %p buf %016jx,%d " 5695 "bdata %p/%p\n", 5696 chain->dio, 5697 (intmax_t)chain->dio->bp->b_loffset, 5698 chain->dio->bp->b_bufsize, 5699 bdata, 5700 chain->dio->bp->b_data); 5701 } 5702 } 5703 } 5704 break; 5705 default: 5706 kprintf("hammer2_chain_testcheck: unknown check type %02x\n", 5707 chain->bref.methods); 5708 r = 1; 5709 break; 5710 } 5711 return r; 5712 } 5713 5714 /* 5715 * Acquire the chain and parent representing the specified inode for the 5716 * device at the specified cluster index. 5717 * 5718 * The flags passed in are LOOKUP flags, not RESOLVE flags. 5719 * 5720 * If we are unable to locate the inode, HAMMER2_ERROR_EIO or HAMMER2_ERROR_CHECK 5721 * is returned. In case of error, *chainp and/or *parentp may still be returned 5722 * non-NULL. 5723 * 5724 * The caller may pass-in a locked *parentp and/or *chainp, or neither. 5725 * They will be unlocked and released by this function. The *parentp and 5726 * *chainp representing the located inode are returned locked. 5727 * 5728 * The returned error includes any error on the returned chain in addition to 5729 * errors incurred while trying to lookup the inode. However, a chain->error 5730 * might not be recognized if HAMMER2_LOOKUP_NODATA is passed. This flag may 5731 * not be passed to this function. 5732 */ 5733 int 5734 hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum, 5735 int clindex, int flags, 5736 hammer2_chain_t **parentp, hammer2_chain_t **chainp) 5737 { 5738 hammer2_chain_t *parent; 5739 hammer2_chain_t *rchain; 5740 hammer2_key_t key_dummy; 5741 hammer2_inode_t *ip; 5742 int resolve_flags; 5743 int error; 5744 5745 KKASSERT((flags & HAMMER2_LOOKUP_NODATA) == 0); 5746 5747 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 5748 HAMMER2_RESOLVE_SHARED : 0; 5749 5750 /* 5751 * Caller expects us to replace these. 5752 */ 5753 if (*chainp) { 5754 hammer2_chain_unlock(*chainp); 5755 hammer2_chain_drop(*chainp); 5756 *chainp = NULL; 5757 } 5758 if (*parentp) { 5759 hammer2_chain_unlock(*parentp); 5760 hammer2_chain_drop(*parentp); 5761 *parentp = NULL; 5762 } 5763 5764 /* 5765 * Be very careful, this is a backend function and we CANNOT 5766 * lock any frontend inode structure we find. But we have to 5767 * look the inode up this way first in case it exists but is 5768 * detached from the radix tree. 5769 */ 5770 ip = hammer2_inode_lookup(pmp, inum); 5771 if (ip) { 5772 *chainp = hammer2_inode_chain_and_parent(ip, clindex, 5773 parentp, 5774 resolve_flags); 5775 hammer2_inode_drop(ip); 5776 if (*chainp) 5777 return (*chainp)->error; 5778 hammer2_chain_unlock(*chainp); 5779 hammer2_chain_drop(*chainp); 5780 *chainp = NULL; 5781 if (*parentp) { 5782 hammer2_chain_unlock(*parentp); 5783 hammer2_chain_drop(*parentp); 5784 *parentp = NULL; 5785 } 5786 } 5787 5788 /* 5789 * Inodes hang off of the iroot (bit 63 is clear, differentiating 5790 * inodes from root directory entries in the key lookup). 5791 */ 5792 parent = hammer2_inode_chain(pmp->iroot, clindex, resolve_flags); 5793 rchain = NULL; 5794 if (parent) { 5795 /* 5796 * NOTE: rchain can be returned as NULL even if error == 0 5797 * (i.e. not found) 5798 */ 5799 rchain = hammer2_chain_lookup(&parent, &key_dummy, 5800 inum, inum, 5801 &error, flags); 5802 /* 5803 * Propagate a chain-specific error to caller. 5804 * 5805 * If the chain is not errored, we must still validate that the inode 5806 * number is correct, because all hell will break loose if it isn't 5807 * correct. It should always be correct so print to the console and 5808 * simulate a CHECK error if it is not. 5809 */ 5810 if (error == 0 && rchain) { 5811 error = rchain->error; 5812 if (error == 0 && rchain->data) { 5813 if (inum != rchain->data->ipdata.meta.inum) { 5814 kprintf("hammer2_chain_inode_find: lookup inum %ld, " 5815 "got valid inode but with inum %ld\n", 5816 (long)inum, (long)rchain->data->ipdata.meta.inum); 5817 error = HAMMER2_ERROR_CHECK; 5818 rchain->error = error; 5819 } 5820 } 5821 } 5822 } else { 5823 error = HAMMER2_ERROR_EIO; 5824 } 5825 *parentp = parent; 5826 *chainp = rchain; 5827 5828 return error; 5829 } 5830 5831 /* 5832 * Used by the bulkscan code to snapshot the synchronized storage for 5833 * a volume, allowing it to be scanned concurrently against normal 5834 * operation. 5835 */ 5836 hammer2_chain_t * 5837 hammer2_chain_bulksnap(hammer2_dev_t *hmp) 5838 { 5839 hammer2_chain_t *copy; 5840 5841 copy = hammer2_chain_alloc(hmp, hmp->spmp, &hmp->vchain.bref); 5842 copy->data = kmalloc(sizeof(copy->data->voldata), 5843 hmp->mmsg, M_WAITOK | M_ZERO); 5844 hammer2_voldata_lock(hmp); 5845 copy->data->voldata = hmp->volsync; 5846 hammer2_voldata_unlock(hmp); 5847 5848 return copy; 5849 } 5850 5851 void 5852 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 5853 { 5854 KKASSERT(copy->bref.type == HAMMER2_BREF_TYPE_VOLUME); 5855 KKASSERT(copy->data); 5856 kfree(copy->data, copy->hmp->mmsg); 5857 copy->data = NULL; 5858 hammer2_chain_drop(copy); 5859 } 5860 5861 /* 5862 * Returns non-zero if the chain (INODE or DIRENT) matches the 5863 * filename. 5864 */ 5865 int 5866 hammer2_chain_dirent_test(hammer2_chain_t *chain, const char *name, 5867 size_t name_len) 5868 { 5869 const hammer2_inode_data_t *ripdata; 5870 5871 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5872 ripdata = &chain->data->ipdata; 5873 if (ripdata->meta.name_len == name_len && 5874 bcmp(ripdata->filename, name, name_len) == 0) { 5875 return 1; 5876 } 5877 } 5878 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT && 5879 chain->bref.embed.dirent.namlen == name_len) { 5880 if (name_len > sizeof(chain->bref.check.buf) && 5881 bcmp(chain->data->buf, name, name_len) == 0) { 5882 return 1; 5883 } 5884 if (name_len <= sizeof(chain->bref.check.buf) && 5885 bcmp(chain->bref.check.buf, name, name_len) == 0) { 5886 return 1; 5887 } 5888 } 5889 return 0; 5890 } 5891 5892 /* 5893 * Debugging 5894 */ 5895 void 5896 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int bi, int *countp, 5897 char pfx, u_int flags) 5898 { 5899 hammer2_chain_t *scan; 5900 hammer2_chain_t *parent; 5901 5902 --*countp; 5903 if (*countp == 0) { 5904 kprintf("%*.*s...\n", tab, tab, ""); 5905 return; 5906 } 5907 if (*countp < 0) 5908 return; 5909 kprintf("%*.*s%c-chain %p %s.%-3d %016jx %016jx/%-2d mir=%016jx\n", 5910 tab, tab, "", pfx, chain, 5911 hammer2_bref_type_str(chain->bref.type), bi, 5912 chain->bref.data_off, chain->bref.key, chain->bref.keybits, 5913 chain->bref.mirror_tid); 5914 5915 kprintf("%*.*s [%08x] (%s) refs=%d", 5916 tab, tab, "", 5917 chain->flags, 5918 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5919 chain->data) ? (char *)chain->data->ipdata.filename : "?"), 5920 chain->refs); 5921 5922 parent = chain->parent; 5923 if (parent) 5924 kprintf("\n%*.*s p=%p [pflags %08x prefs %d]", 5925 tab, tab, "", 5926 parent, parent->flags, parent->refs); 5927 if (RB_EMPTY(&chain->core.rbtree)) { 5928 kprintf("\n"); 5929 } else { 5930 int bi = 0; 5931 kprintf(" {\n"); 5932 RB_FOREACH(scan, hammer2_chain_tree, &chain->core.rbtree) { 5933 if ((scan->flags & flags) || flags == (u_int)-1) { 5934 hammer2_dump_chain(scan, tab + 4, bi, countp, 5935 'a', flags); 5936 } 5937 bi++; 5938 } 5939 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) 5940 kprintf("%*.*s}(%s)\n", tab, tab, "", 5941 chain->data->ipdata.filename); 5942 else 5943 kprintf("%*.*s}\n", tab, tab, ""); 5944 } 5945 } 5946