1 /* 2 * Copyright (c) 2011-2020 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/buf.h> 61 62 #include <crypto/sha2/sha2.h> 63 64 #include "hammer2.h" 65 66 static hammer2_chain_t *hammer2_chain_create_indirect( 67 hammer2_chain_t *parent, 68 hammer2_key_t key, int keybits, 69 hammer2_tid_t mtid, int for_type, int *errorp); 70 static int hammer2_chain_delete_obref(hammer2_chain_t *parent, 71 hammer2_chain_t *chain, 72 hammer2_tid_t mtid, int flags, 73 hammer2_blockref_t *obref); 74 static hammer2_chain_t *hammer2_combined_find( 75 hammer2_chain_t *parent, 76 hammer2_blockref_t *base, int count, 77 hammer2_key_t *key_nextp, 78 hammer2_key_t key_beg, hammer2_key_t key_end, 79 hammer2_blockref_t **brefp); 80 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 81 int depth); 82 /* 83 * There are many degenerate situations where an extreme rate of console 84 * output can occur from warnings and errors. Make sure this output does 85 * not impede operations. 86 */ 87 static struct krate krate_h2chk = { .freq = 5 }; 88 static struct krate krate_h2me = { .freq = 1 }; 89 static struct krate krate_h2em = { .freq = 1 }; 90 91 /* 92 * Basic RBTree for chains (core.rbtree). 93 */ 94 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 95 96 int 97 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 98 { 99 hammer2_key_t c1_beg; 100 hammer2_key_t c1_end; 101 hammer2_key_t c2_beg; 102 hammer2_key_t c2_end; 103 104 /* 105 * Compare chains. Overlaps are not supposed to happen and catch 106 * any software issues early we count overlaps as a match. 107 */ 108 c1_beg = chain1->bref.key; 109 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 110 c2_beg = chain2->bref.key; 111 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 112 113 if (c1_end < c2_beg) /* fully to the left */ 114 return(-1); 115 if (c1_beg > c2_end) /* fully to the right */ 116 return(1); 117 return(0); /* overlap (must not cross edge boundary) */ 118 } 119 120 /* 121 * Assert that a chain has no media data associated with it. 122 */ 123 static __inline void 124 hammer2_chain_assert_no_data(hammer2_chain_t *chain) 125 { 126 KKASSERT(chain->dio == NULL); 127 if (chain->bref.type != HAMMER2_BREF_TYPE_VOLUME && 128 chain->bref.type != HAMMER2_BREF_TYPE_FREEMAP && 129 chain->data) { 130 panic("hammer2_chain_assert_no_data: chain %p still has data", 131 chain); 132 } 133 } 134 135 /* 136 * Make a chain visible to the flusher. The flusher operates using a top-down 137 * recursion based on the ONFLUSH flag. It locates MODIFIED and UPDATE chains, 138 * flushes them, and updates blocks back to the volume root. 139 * 140 * This routine sets the ONFLUSH flag upward from the triggering chain until 141 * it hits an inode root or the volume root. Inode chains serve as inflection 142 * points, requiring the flusher to bridge across trees. Inodes include 143 * regular inodes, PFS roots (pmp->iroot), and the media super root 144 * (spmp->iroot). 145 */ 146 void 147 hammer2_chain_setflush(hammer2_chain_t *chain) 148 { 149 hammer2_chain_t *parent; 150 151 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 152 hammer2_spin_sh(&chain->core.spin); 153 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 154 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 155 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 156 break; 157 if ((parent = chain->parent) == NULL) 158 break; 159 hammer2_spin_sh(&parent->core.spin); 160 hammer2_spin_unsh(&chain->core.spin); 161 chain = parent; 162 } 163 hammer2_spin_unsh(&chain->core.spin); 164 } 165 } 166 167 /* 168 * Allocate a new disconnected chain element representing the specified 169 * bref. chain->refs is set to 1 and the passed bref is copied to 170 * chain->bref. chain->bytes is derived from the bref. 171 * 172 * chain->pmp inherits pmp unless the chain is an inode (other than the 173 * super-root inode). 174 * 175 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 176 */ 177 hammer2_chain_t * 178 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 179 hammer2_blockref_t *bref) 180 { 181 hammer2_chain_t *chain; 182 u_int bytes; 183 184 /* 185 * Special case - radix of 0 indicates a chain that does not 186 * need a data reference (context is completely embedded in the 187 * bref). 188 */ 189 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 190 bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 191 else 192 bytes = 0; 193 194 switch(bref->type) { 195 case HAMMER2_BREF_TYPE_INODE: 196 case HAMMER2_BREF_TYPE_INDIRECT: 197 case HAMMER2_BREF_TYPE_DATA: 198 case HAMMER2_BREF_TYPE_DIRENT: 199 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 200 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 201 case HAMMER2_BREF_TYPE_FREEMAP: 202 case HAMMER2_BREF_TYPE_VOLUME: 203 chain = kmalloc_obj(sizeof(*chain), hmp->mchain, 204 M_WAITOK | M_ZERO); 205 atomic_add_long(&hammer2_chain_allocs, 1); 206 break; 207 case HAMMER2_BREF_TYPE_EMPTY: 208 default: 209 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 210 bref->type); 211 break; 212 } 213 214 /* 215 * Initialize the new chain structure. pmp must be set to NULL for 216 * chains belonging to the super-root topology of a device mount. 217 */ 218 if (pmp == hmp->spmp) 219 chain->pmp = NULL; 220 else 221 chain->pmp = pmp; 222 223 chain->hmp = hmp; 224 chain->bref = *bref; 225 chain->bytes = bytes; 226 chain->refs = 1; 227 chain->flags = HAMMER2_CHAIN_ALLOCATED; 228 229 /* 230 * Set the PFS boundary flag if this chain represents a PFS root. 231 */ 232 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 233 atomic_set_int(&chain->flags, HAMMER2_CHAIN_PFSBOUNDARY); 234 hammer2_chain_init(chain); 235 236 return (chain); 237 } 238 239 /* 240 * A common function to initialize chains including fchain and vchain. 241 */ 242 void 243 hammer2_chain_init(hammer2_chain_t *chain) 244 { 245 RB_INIT(&chain->core.rbtree); /* live chains */ 246 hammer2_mtx_init(&chain->lock, "h2chain"); 247 hammer2_spin_init(&chain->core.spin, "h2chain"); 248 lockinit(&chain->diolk, "chdio", 0, 0); 249 } 250 251 /* 252 * Add a reference to a chain element, preventing its destruction. 253 * Undone via hammer2_chain_drop() 254 * 255 * (can be called with spinlock held) 256 */ 257 void 258 hammer2_chain_ref(hammer2_chain_t *chain) 259 { 260 if (atomic_fetchadd_int(&chain->refs, 1) == 0) { 261 /* NOP */ 262 } 263 } 264 265 /* 266 * Ref a locked chain and force the data to be held across an unlock. 267 * Chain must be currently locked. The user of the chain who desires 268 * to release the hold must call hammer2_chain_lock_unhold() to relock 269 * and unhold the chain, then unlock normally, or may simply call 270 * hammer2_chain_drop_unhold() (which is safer against deadlocks). 271 */ 272 void 273 hammer2_chain_ref_hold(hammer2_chain_t *chain) 274 { 275 atomic_add_int(&chain->lockcnt, 1); 276 hammer2_chain_ref(chain); 277 } 278 279 /* 280 * Insert the chain in the core rbtree. 281 * 282 * Normal insertions are placed in the live rbtree. Insertion of a deleted 283 * chain is a special case used by the flush code that is placed on the 284 * unstaged deleted list to avoid confusing the live view. 285 */ 286 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 287 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 288 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 289 290 static 291 int 292 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 293 int flags, int generation) 294 { 295 hammer2_chain_t *xchain __debugvar; 296 int error = 0; 297 298 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 299 hammer2_spin_ex(&parent->core.spin); 300 301 /* 302 * Interlocked by spinlock, check for race 303 */ 304 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 305 parent->core.generation != generation) { 306 error = HAMMER2_ERROR_EAGAIN; 307 goto failed; 308 } 309 310 /* 311 * Insert chain 312 */ 313 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 314 KASSERT(xchain == NULL, 315 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 316 chain, xchain, chain->bref.key)); 317 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 318 chain->parent = parent; 319 ++parent->core.chain_count; 320 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 321 322 /* 323 * We have to keep track of the effective live-view blockref count 324 * so the create code knows when to push an indirect block. 325 */ 326 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 327 atomic_add_int(&parent->core.live_count, 1); 328 failed: 329 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 330 hammer2_spin_unex(&parent->core.spin); 331 return error; 332 } 333 334 /* 335 * Drop the caller's reference to the chain. When the ref count drops to 336 * zero this function will try to disassociate the chain from its parent and 337 * deallocate it, then recursely drop the parent using the implied ref 338 * from the chain's chain->parent. 339 * 340 * Nobody should own chain's mutex on the 1->0 transition, unless this drop 341 * races an acquisition by another cpu. Therefore we can loop if we are 342 * unable to acquire the mutex, and refs is unlikely to be 1 unless we again 343 * race against another drop. 344 */ 345 void 346 hammer2_chain_drop(hammer2_chain_t *chain) 347 { 348 u_int refs; 349 350 KKASSERT(chain->refs > 0); 351 352 while (chain) { 353 refs = chain->refs; 354 cpu_ccfence(); 355 KKASSERT(refs > 0); 356 357 if (refs == 1) { 358 if (hammer2_mtx_ex_try(&chain->lock) == 0) 359 chain = hammer2_chain_lastdrop(chain, 0); 360 /* retry the same chain, or chain from lastdrop */ 361 } else { 362 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 363 break; 364 /* retry the same chain */ 365 } 366 cpu_pause(); 367 } 368 } 369 370 /* 371 * Unhold a held and probably not-locked chain, ensure that the data is 372 * dropped on the 1->0 transition of lockcnt by obtaining an exclusive 373 * lock and then simply unlocking the chain. 374 */ 375 void 376 hammer2_chain_unhold(hammer2_chain_t *chain) 377 { 378 u_int lockcnt; 379 int iter = 0; 380 381 for (;;) { 382 lockcnt = chain->lockcnt; 383 cpu_ccfence(); 384 if (lockcnt > 1) { 385 if (atomic_cmpset_int(&chain->lockcnt, 386 lockcnt, lockcnt - 1)) { 387 break; 388 } 389 } else if (hammer2_mtx_ex_try(&chain->lock) == 0) { 390 hammer2_chain_unlock(chain); 391 break; 392 } else { 393 /* 394 * This situation can easily occur on SMP due to 395 * the gap inbetween the 1->0 transition and the 396 * final unlock. We cannot safely block on the 397 * mutex because lockcnt might go above 1. 398 * 399 * XXX Sleep for one tick if it takes too long. 400 */ 401 if (++iter > 1000) { 402 if (iter > 1000 + hz) { 403 kprintf("hammer2: h2race1 %p\n", chain); 404 iter = 1000; 405 } 406 tsleep(&iter, 0, "h2race1", 1); 407 } 408 cpu_pause(); 409 } 410 } 411 } 412 413 void 414 hammer2_chain_drop_unhold(hammer2_chain_t *chain) 415 { 416 hammer2_chain_unhold(chain); 417 hammer2_chain_drop(chain); 418 } 419 420 void 421 hammer2_chain_rehold(hammer2_chain_t *chain) 422 { 423 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED); 424 atomic_add_int(&chain->lockcnt, 1); 425 hammer2_chain_unlock(chain); 426 } 427 428 /* 429 * Handles the (potential) last drop of chain->refs from 1->0. Called with 430 * the mutex exclusively locked, refs == 1, and lockcnt 0. SMP races are 431 * possible against refs and lockcnt. We must dispose of the mutex on chain. 432 * 433 * This function returns an unlocked chain for recursive drop or NULL. It 434 * can return the same chain if it determines it has raced another ref. 435 * 436 * -- 437 * 438 * When two chains need to be recursively dropped we use the chain we 439 * would otherwise free to placehold the additional chain. It's a bit 440 * convoluted but we can't just recurse without potentially blowing out 441 * the kernel stack. 442 * 443 * The chain cannot be freed if it has any children. 444 * The chain cannot be freed if flagged MODIFIED unless we can dispose of it. 445 * The chain cannot be freed if flagged UPDATE unless we can dispose of it. 446 * Any dedup registration can remain intact. 447 * 448 * The core spinlock is allowed to nest child-to-parent (not parent-to-child). 449 */ 450 static 451 hammer2_chain_t * 452 hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth) 453 { 454 hammer2_pfs_t *pmp; 455 hammer2_dev_t *hmp; 456 hammer2_chain_t *parent; 457 hammer2_chain_t *rdrop; 458 459 /* 460 * We need chain's spinlock to interlock the sub-tree test. 461 * We already have chain's mutex, protecting chain->parent. 462 * 463 * Remember that chain->refs can be in flux. 464 */ 465 hammer2_spin_ex(&chain->core.spin); 466 467 if (chain->parent != NULL) { 468 /* 469 * If the chain has a parent the UPDATE bit prevents scrapping 470 * as the chain is needed to properly flush the parent. Try 471 * to complete the 1->0 transition and return NULL. Retry 472 * (return chain) if we are unable to complete the 1->0 473 * transition, else return NULL (nothing more to do). 474 * 475 * If the chain has a parent the MODIFIED bit prevents 476 * scrapping. 477 * 478 * Chains with UPDATE/MODIFIED are *not* put on the LRU list! 479 */ 480 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 481 HAMMER2_CHAIN_MODIFIED)) { 482 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 483 hammer2_spin_unex(&chain->core.spin); 484 hammer2_chain_assert_no_data(chain); 485 hammer2_mtx_unlock(&chain->lock); 486 chain = NULL; 487 } else { 488 hammer2_spin_unex(&chain->core.spin); 489 hammer2_mtx_unlock(&chain->lock); 490 } 491 return (chain); 492 } 493 /* spinlock still held */ 494 } else if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME || 495 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP) { 496 /* 497 * Retain the static vchain and fchain. Clear bits that 498 * are not relevant. Do not clear the MODIFIED bit, 499 * and certainly do not put it on the delayed-flush queue. 500 */ 501 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 502 } else { 503 /* 504 * The chain has no parent and can be flagged for destruction. 505 * Since it has no parent, UPDATE can also be cleared. 506 */ 507 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 508 if (chain->flags & HAMMER2_CHAIN_UPDATE) 509 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 510 511 /* 512 * If the chain has children we must propagate the DESTROY 513 * flag downward and rip the disconnected topology apart. 514 * This is accomplished by calling hammer2_flush() on the 515 * chain. 516 * 517 * Any dedup is already handled by the underlying DIO, so 518 * we do not have to specifically flush it here. 519 */ 520 if (chain->core.chain_count) { 521 hammer2_spin_unex(&chain->core.spin); 522 hammer2_flush(chain, HAMMER2_FLUSH_TOP | 523 HAMMER2_FLUSH_ALL); 524 hammer2_mtx_unlock(&chain->lock); 525 526 return(chain); /* retry drop */ 527 } 528 529 /* 530 * Otherwise we can scrap the MODIFIED bit if it is set, 531 * and continue along the freeing path. 532 * 533 * Be sure to clean-out any dedup bits. Without a parent 534 * this chain will no longer be visible to the flush code. 535 * Easy check data_off to avoid the volume root. 536 */ 537 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 538 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 539 atomic_add_long(&hammer2_count_modified_chains, -1); 540 if (chain->pmp) 541 hammer2_pfs_memory_wakeup(chain->pmp, -1); 542 } 543 /* spinlock still held */ 544 } 545 546 /* spinlock still held */ 547 548 /* 549 * If any children exist we must leave the chain intact with refs == 0. 550 * They exist because chains are retained below us which have refs or 551 * may require flushing. 552 * 553 * Retry (return chain) if we fail to transition the refs to 0, else 554 * return NULL indication nothing more to do. 555 * 556 * Chains with children are NOT put on the LRU list. 557 */ 558 if (chain->core.chain_count) { 559 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 560 hammer2_spin_unex(&chain->core.spin); 561 hammer2_chain_assert_no_data(chain); 562 hammer2_mtx_unlock(&chain->lock); 563 chain = NULL; 564 } else { 565 hammer2_spin_unex(&chain->core.spin); 566 hammer2_mtx_unlock(&chain->lock); 567 } 568 return (chain); 569 } 570 /* spinlock still held */ 571 /* no chains left under us */ 572 573 /* 574 * chain->core has no children left so no accessors can get to our 575 * chain from there. Now we have to lock the parent core to interlock 576 * remaining possible accessors that might bump chain's refs before 577 * we can safely drop chain's refs with intent to free the chain. 578 */ 579 hmp = chain->hmp; 580 pmp = chain->pmp; /* can be NULL */ 581 rdrop = NULL; 582 583 parent = chain->parent; 584 585 /* 586 * WARNING! chain's spin lock is still held here, and other spinlocks 587 * will be acquired and released in the code below. We 588 * cannot be making fancy procedure calls! 589 */ 590 591 /* 592 * Spinlock the parent and try to drop the last ref on chain. 593 * On success determine if we should dispose of the chain 594 * (remove the chain from its parent, etc). 595 * 596 * (normal core locks are top-down recursive but we define 597 * core spinlocks as bottom-up recursive, so this is safe). 598 */ 599 if (parent) { 600 hammer2_spin_ex(&parent->core.spin); 601 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 602 /* 603 * 1->0 transition failed, retry. 604 */ 605 hammer2_spin_unex(&parent->core.spin); 606 hammer2_spin_unex(&chain->core.spin); 607 hammer2_mtx_unlock(&chain->lock); 608 609 return(chain); 610 } 611 612 /* 613 * 1->0 transition successful, parent spin held to prevent 614 * new lookups, chain spinlock held to protect parent field. 615 * Remove chain from the parent. 616 * 617 * If the chain is being removed from the parent's rbtree but 618 * is not blkmapped, we have to adjust live_count downward. If 619 * it is blkmapped then the blockref is retained in the parent 620 * as is its associated live_count. This case can occur when 621 * a chain added to the topology is unable to flush and is 622 * then later deleted. 623 */ 624 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 625 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) && 626 (chain->flags & HAMMER2_CHAIN_BLKMAPPED) == 0) { 627 atomic_add_int(&parent->core.live_count, -1); 628 } 629 RB_REMOVE(hammer2_chain_tree, 630 &parent->core.rbtree, chain); 631 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 632 --parent->core.chain_count; 633 chain->parent = NULL; 634 } 635 636 /* 637 * If our chain was the last chain in the parent's core the 638 * core is now empty and its parent might have to be 639 * re-dropped if it has 0 refs. 640 */ 641 if (parent->core.chain_count == 0) { 642 rdrop = parent; 643 atomic_add_int(&rdrop->refs, 1); 644 /* 645 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 646 rdrop = NULL; 647 */ 648 } 649 hammer2_spin_unex(&parent->core.spin); 650 parent = NULL; /* safety */ 651 /* FALL THROUGH */ 652 } else { 653 /* 654 * No-parent case. 655 */ 656 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 657 /* 658 * 1->0 transition failed, retry. 659 */ 660 hammer2_spin_unex(&parent->core.spin); 661 hammer2_spin_unex(&chain->core.spin); 662 hammer2_mtx_unlock(&chain->lock); 663 664 return(chain); 665 } 666 } 667 668 /* 669 * Successful 1->0 transition, no parent, no children... no way for 670 * anyone to ref this chain any more. We can clean-up and free it. 671 * 672 * We still have the core spinlock, and core's chain_count is 0. 673 * Any parent spinlock is gone. 674 */ 675 hammer2_spin_unex(&chain->core.spin); 676 hammer2_chain_assert_no_data(chain); 677 hammer2_mtx_unlock(&chain->lock); 678 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 679 chain->core.chain_count == 0); 680 681 /* 682 * All locks are gone, no pointers remain to the chain, finish 683 * freeing it. 684 */ 685 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 686 HAMMER2_CHAIN_MODIFIED)) == 0); 687 688 /* 689 * Once chain resources are gone we can use the now dead chain 690 * structure to placehold what might otherwise require a recursive 691 * drop, because we have potentially two things to drop and can only 692 * return one directly. 693 */ 694 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 695 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED); 696 chain->hmp = NULL; 697 kfree_obj(chain, hmp->mchain); 698 atomic_add_long(&hammer2_chain_allocs, -1); 699 } 700 701 /* 702 * Possible chaining loop when parent re-drop needed. 703 */ 704 return(rdrop); 705 } 706 707 /* 708 * On last lock release. 709 */ 710 static hammer2_io_t * 711 hammer2_chain_drop_data(hammer2_chain_t *chain) 712 { 713 hammer2_io_t *dio; 714 715 if ((dio = chain->dio) != NULL) { 716 chain->dio = NULL; 717 chain->data = NULL; 718 } else { 719 switch(chain->bref.type) { 720 case HAMMER2_BREF_TYPE_VOLUME: 721 case HAMMER2_BREF_TYPE_FREEMAP: 722 break; 723 default: 724 if (chain->data != NULL) { 725 hammer2_spin_unex(&chain->core.spin); 726 panic("chain data not null: " 727 "chain %p bref %016jx.%02x " 728 "refs %d parent %p dio %p data %p", 729 chain, chain->bref.data_off, 730 chain->bref.type, chain->refs, 731 chain->parent, 732 chain->dio, chain->data); 733 } 734 KKASSERT(chain->data == NULL); 735 break; 736 } 737 } 738 return dio; 739 } 740 741 /* 742 * Lock a referenced chain element, acquiring its data with I/O if necessary, 743 * and specify how you would like the data to be resolved. 744 * 745 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 746 * 747 * The lock is allowed to recurse, multiple locking ops will aggregate 748 * the requested resolve types. Once data is assigned it will not be 749 * removed until the last unlock. 750 * 751 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 752 * (typically used to avoid device/logical buffer 753 * aliasing for data) 754 * 755 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 756 * the INITIAL-create state (indirect blocks only). 757 * 758 * Do not resolve data elements for DATA chains. 759 * (typically used to avoid device/logical buffer 760 * aliasing for data) 761 * 762 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 763 * 764 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 765 * it will be locked exclusive. 766 * 767 * HAMMER2_RESOLVE_NONBLOCK- (flag) The chain is locked non-blocking. If 768 * the lock fails, EAGAIN is returned. 769 * 770 * NOTE: Embedded elements (volume header, inodes) are always resolved 771 * regardless. 772 * 773 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 774 * element will instantiate and zero its buffer, and flush it on 775 * release. 776 * 777 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 778 * so as not to instantiate a device buffer, which could alias against 779 * a logical file buffer. However, if ALWAYS is specified the 780 * device buffer will be instantiated anyway. 781 * 782 * NOTE: The return value is always 0 unless NONBLOCK is specified, in which 783 * case it can be either 0 or EAGAIN. 784 * 785 * WARNING! This function blocks on I/O if data needs to be fetched. This 786 * blocking can run concurrent with other compatible lock holders 787 * who do not need data returning. The lock is not upgraded to 788 * exclusive during a data fetch, a separate bit is used to 789 * interlock I/O. However, an exclusive lock holder can still count 790 * on being interlocked against an I/O fetch managed by a shared 791 * lock holder. 792 */ 793 int 794 hammer2_chain_lock(hammer2_chain_t *chain, int how) 795 { 796 KKASSERT(chain->refs > 0); 797 798 if (how & HAMMER2_RESOLVE_NONBLOCK) { 799 /* 800 * We still have to bump lockcnt before acquiring the lock, 801 * even for non-blocking operation, because the unlock code 802 * live-loops on lockcnt == 1 when dropping the last lock. 803 * 804 * If the non-blocking operation fails we have to use an 805 * unhold sequence to undo the mess. 806 * 807 * NOTE: LOCKAGAIN must always succeed without blocking, 808 * even if NONBLOCK is specified. 809 */ 810 atomic_add_int(&chain->lockcnt, 1); 811 if (how & HAMMER2_RESOLVE_SHARED) { 812 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 813 hammer2_mtx_sh_again(&chain->lock); 814 } else { 815 if (hammer2_mtx_sh_try(&chain->lock) != 0) { 816 hammer2_chain_unhold(chain); 817 return EAGAIN; 818 } 819 } 820 } else { 821 if (hammer2_mtx_ex_try(&chain->lock) != 0) { 822 hammer2_chain_unhold(chain); 823 return EAGAIN; 824 } 825 } 826 } else { 827 /* 828 * Get the appropriate lock. If LOCKAGAIN is flagged with 829 * SHARED the caller expects a shared lock to already be 830 * present and we are giving it another ref. This case must 831 * importantly not block if there is a pending exclusive lock 832 * request. 833 */ 834 atomic_add_int(&chain->lockcnt, 1); 835 if (how & HAMMER2_RESOLVE_SHARED) { 836 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 837 hammer2_mtx_sh_again(&chain->lock); 838 } else { 839 hammer2_mtx_sh(&chain->lock); 840 } 841 } else { 842 hammer2_mtx_ex(&chain->lock); 843 } 844 } 845 846 /* 847 * If we already have a valid data pointer make sure the data is 848 * synchronized to the current cpu, and then no further action is 849 * necessary. 850 */ 851 if (chain->data) { 852 if (chain->dio) 853 hammer2_io_bkvasync(chain->dio); 854 return 0; 855 } 856 857 /* 858 * Do we have to resolve the data? This is generally only 859 * applicable to HAMMER2_BREF_TYPE_DATA which is special-cased. 860 * Other BREF types expects the data to be there. 861 */ 862 switch(how & HAMMER2_RESOLVE_MASK) { 863 case HAMMER2_RESOLVE_NEVER: 864 return 0; 865 case HAMMER2_RESOLVE_MAYBE: 866 if (chain->flags & HAMMER2_CHAIN_INITIAL) 867 return 0; 868 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 869 return 0; 870 #if 0 871 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 872 return 0; 873 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 874 return 0; 875 #endif 876 /* fall through */ 877 case HAMMER2_RESOLVE_ALWAYS: 878 default: 879 break; 880 } 881 882 /* 883 * Caller requires data 884 */ 885 hammer2_chain_load_data(chain); 886 887 return 0; 888 } 889 890 #if 0 891 /* 892 * Lock the chain, retain the hold, and drop the data persistence count. 893 * The data should remain valid because we never transitioned lockcnt 894 * through 0. 895 */ 896 void 897 hammer2_chain_lock_unhold(hammer2_chain_t *chain, int how) 898 { 899 hammer2_chain_lock(chain, how); 900 atomic_add_int(&chain->lockcnt, -1); 901 } 902 903 /* 904 * Downgrade an exclusive chain lock to a shared chain lock. 905 * 906 * NOTE: There is no upgrade equivalent due to the ease of 907 * deadlocks in that direction. 908 */ 909 void 910 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 911 { 912 hammer2_mtx_downgrade(&chain->lock); 913 } 914 #endif 915 916 /* 917 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 918 * may be of any type. 919 * 920 * Once chain->data is set it cannot be disposed of until all locks are 921 * released. 922 * 923 * Make sure the data is synchronized to the current cpu. 924 */ 925 void 926 hammer2_chain_load_data(hammer2_chain_t *chain) 927 { 928 hammer2_blockref_t *bref; 929 hammer2_dev_t *hmp; 930 hammer2_io_t *dio; 931 char *bdata; 932 int error; 933 934 /* 935 * Degenerate case, data already present, or chain has no media 936 * reference to load. 937 */ 938 KKASSERT(chain->lock.mtx_lock & MTX_MASK); 939 if (chain->data) { 940 if (chain->dio) 941 hammer2_io_bkvasync(chain->dio); 942 return; 943 } 944 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 945 return; 946 947 hmp = chain->hmp; 948 KKASSERT(hmp != NULL); 949 950 /* 951 * Gain the IOINPROG bit, interlocked block. 952 */ 953 for (;;) { 954 u_int oflags; 955 u_int nflags; 956 957 oflags = chain->flags; 958 cpu_ccfence(); 959 if (oflags & HAMMER2_CHAIN_IOINPROG) { 960 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 961 tsleep_interlock(&chain->flags, 0); 962 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 963 tsleep(&chain->flags, PINTERLOCKED, 964 "h2iocw", 0); 965 } 966 /* retry */ 967 } else { 968 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 969 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 970 break; 971 } 972 /* retry */ 973 } 974 } 975 976 /* 977 * We own CHAIN_IOINPROG 978 * 979 * Degenerate case if we raced another load. 980 */ 981 if (chain->data) { 982 if (chain->dio) 983 hammer2_io_bkvasync(chain->dio); 984 goto done; 985 } 986 987 /* 988 * We must resolve to a device buffer, either by issuing I/O or 989 * by creating a zero-fill element. We do not mark the buffer 990 * dirty when creating a zero-fill element (the hammer2_chain_modify() 991 * API must still be used to do that). 992 * 993 * The device buffer is variable-sized in powers of 2 down 994 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 995 * chunk always contains buffers of the same size. (XXX) 996 * 997 * The minimum physical IO size may be larger than the variable 998 * block size. 999 */ 1000 bref = &chain->bref; 1001 1002 /* 1003 * The getblk() optimization can only be used on newly created 1004 * elements if the physical block size matches the request. 1005 */ 1006 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1007 error = hammer2_io_new(hmp, bref->type, 1008 bref->data_off, chain->bytes, 1009 &chain->dio); 1010 } else { 1011 error = hammer2_io_bread(hmp, bref->type, 1012 bref->data_off, chain->bytes, 1013 &chain->dio); 1014 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 1015 } 1016 if (error) { 1017 chain->error = HAMMER2_ERROR_EIO; 1018 kprintf("hammer2_chain_load_data: I/O error %016jx: %d\n", 1019 (intmax_t)bref->data_off, error); 1020 hammer2_io_bqrelse(&chain->dio); 1021 goto done; 1022 } 1023 chain->error = 0; 1024 1025 /* 1026 * This isn't perfect and can be ignored on OSs which do not have 1027 * an indication as to whether a buffer is coming from cache or 1028 * if I/O was actually issued for the read. TESTEDGOOD will work 1029 * pretty well without the B_IOISSUED logic because chains are 1030 * cached, but in that situation (without B_IOISSUED) it will not 1031 * detect whether a re-read via I/O is corrupted verses the original 1032 * read. 1033 * 1034 * We can't re-run the CRC on every fresh lock. That would be 1035 * insanely expensive. 1036 * 1037 * If the underlying kernel buffer covers the entire chain we can 1038 * use the B_IOISSUED indication to determine if we have to re-run 1039 * the CRC on chain data for chains that managed to stay cached 1040 * across the kernel disposal of the original buffer. 1041 */ 1042 if ((dio = chain->dio) != NULL && dio->bp) { 1043 struct buf *bp = dio->bp; 1044 1045 if (dio->psize == chain->bytes && 1046 (bp->b_flags & B_IOISSUED)) { 1047 atomic_clear_int(&chain->flags, 1048 HAMMER2_CHAIN_TESTEDGOOD); 1049 bp->b_flags &= ~B_IOISSUED; 1050 } 1051 } 1052 1053 /* 1054 * NOTE: A locked chain's data cannot be modified without first 1055 * calling hammer2_chain_modify(). 1056 */ 1057 1058 /* 1059 * NOTE: hammer2_io_data() call issues bkvasync() 1060 */ 1061 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 1062 1063 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1064 /* 1065 * Clear INITIAL. In this case we used io_new() and the 1066 * buffer has been zero'd and marked dirty. 1067 * 1068 * CHAIN_MODIFIED has not been set yet, and we leave it 1069 * that way for now. Set a temporary CHAIN_NOTTESTED flag 1070 * to prevent hammer2_chain_testcheck() from trying to match 1071 * a check code that has not yet been generated. This bit 1072 * should NOT end up on the actual media. 1073 */ 1074 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1075 atomic_set_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 1076 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 1077 /* 1078 * check data not currently synchronized due to 1079 * modification. XXX assumes data stays in the buffer 1080 * cache, which might not be true (need biodep on flush 1081 * to calculate crc? or simple crc?). 1082 */ 1083 } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) { 1084 if (hammer2_chain_testcheck(chain, bdata) == 0) { 1085 chain->error = HAMMER2_ERROR_CHECK; 1086 } else { 1087 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 1088 } 1089 } 1090 1091 /* 1092 * Setup the data pointer by pointing it into the buffer. 1093 * WARNING! Other threads can start using the data the instant we 1094 * set chain->data non-NULL. 1095 */ 1096 switch (bref->type) { 1097 case HAMMER2_BREF_TYPE_VOLUME: 1098 case HAMMER2_BREF_TYPE_FREEMAP: 1099 panic("hammer2_chain_load_data: unresolved volume header"); 1100 break; 1101 case HAMMER2_BREF_TYPE_DIRENT: 1102 KKASSERT(chain->bytes != 0); 1103 /* fall through */ 1104 case HAMMER2_BREF_TYPE_INODE: 1105 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1106 case HAMMER2_BREF_TYPE_INDIRECT: 1107 case HAMMER2_BREF_TYPE_DATA: 1108 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1109 default: 1110 /* 1111 * Point data at the device buffer and leave dio intact. 1112 */ 1113 chain->data = (void *)bdata; 1114 break; 1115 } 1116 1117 /* 1118 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 1119 */ 1120 done: 1121 for (;;) { 1122 u_int oflags; 1123 u_int nflags; 1124 1125 oflags = chain->flags; 1126 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 1127 HAMMER2_CHAIN_IOSIGNAL); 1128 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 1129 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1130 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 1131 wakeup(&chain->flags); 1132 break; 1133 } 1134 } 1135 } 1136 1137 /* 1138 * Unlock and deref a chain element. 1139 * 1140 * Remember that the presence of children under chain prevent the chain's 1141 * destruction but do not add additional references, so the dio will still 1142 * be dropped. 1143 */ 1144 void 1145 hammer2_chain_unlock(hammer2_chain_t *chain) 1146 { 1147 hammer2_io_t *dio; 1148 u_int lockcnt; 1149 int iter = 0; 1150 1151 /* 1152 * If multiple locks are present (or being attempted) on this 1153 * particular chain we can just unlock, drop refs, and return. 1154 * 1155 * Otherwise fall-through on the 1->0 transition. 1156 */ 1157 for (;;) { 1158 lockcnt = chain->lockcnt; 1159 KKASSERT(lockcnt > 0); 1160 cpu_ccfence(); 1161 if (lockcnt > 1) { 1162 if (atomic_cmpset_int(&chain->lockcnt, 1163 lockcnt, lockcnt - 1)) { 1164 hammer2_mtx_unlock(&chain->lock); 1165 return; 1166 } 1167 } else if (hammer2_mtx_upgrade_try(&chain->lock) == 0) { 1168 /* while holding the mutex exclusively */ 1169 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1170 break; 1171 } else { 1172 /* 1173 * This situation can easily occur on SMP due to 1174 * the gap inbetween the 1->0 transition and the 1175 * final unlock. We cannot safely block on the 1176 * mutex because lockcnt might go above 1. 1177 * 1178 * XXX Sleep for one tick if it takes too long. 1179 */ 1180 if (++iter > 1000) { 1181 if (iter > 1000 + hz) { 1182 kprintf("hammer2: h2race2 %p\n", chain); 1183 iter = 1000; 1184 } 1185 tsleep(&iter, 0, "h2race2", 1); 1186 } 1187 cpu_pause(); 1188 } 1189 /* retry */ 1190 } 1191 1192 /* 1193 * Last unlock / mutex upgraded to exclusive. Drop the data 1194 * reference. 1195 */ 1196 dio = hammer2_chain_drop_data(chain); 1197 if (dio) 1198 hammer2_io_bqrelse(&dio); 1199 hammer2_mtx_unlock(&chain->lock); 1200 } 1201 1202 #if 0 1203 /* 1204 * Unlock and hold chain data intact 1205 */ 1206 void 1207 hammer2_chain_unlock_hold(hammer2_chain_t *chain) 1208 { 1209 atomic_add_int(&chain->lockcnt, 1); 1210 hammer2_chain_unlock(chain); 1211 } 1212 #endif 1213 1214 /* 1215 * Helper to obtain the blockref[] array base and count for a chain. 1216 * 1217 * XXX Not widely used yet, various use cases need to be validated and 1218 * converted to use this function. 1219 */ 1220 static 1221 hammer2_blockref_t * 1222 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1223 { 1224 hammer2_blockref_t *base; 1225 int count; 1226 1227 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1228 base = NULL; 1229 1230 switch(parent->bref.type) { 1231 case HAMMER2_BREF_TYPE_INODE: 1232 count = HAMMER2_SET_COUNT; 1233 break; 1234 case HAMMER2_BREF_TYPE_INDIRECT: 1235 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1236 count = parent->bytes / sizeof(hammer2_blockref_t); 1237 break; 1238 case HAMMER2_BREF_TYPE_VOLUME: 1239 count = HAMMER2_SET_COUNT; 1240 break; 1241 case HAMMER2_BREF_TYPE_FREEMAP: 1242 count = HAMMER2_SET_COUNT; 1243 break; 1244 default: 1245 panic("hammer2_chain_base_and_count: " 1246 "unrecognized blockref type: %d", 1247 parent->bref.type); 1248 count = 0; 1249 break; 1250 } 1251 } else { 1252 switch(parent->bref.type) { 1253 case HAMMER2_BREF_TYPE_INODE: 1254 base = &parent->data->ipdata.u.blockset.blockref[0]; 1255 count = HAMMER2_SET_COUNT; 1256 break; 1257 case HAMMER2_BREF_TYPE_INDIRECT: 1258 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1259 base = &parent->data->npdata[0]; 1260 count = parent->bytes / sizeof(hammer2_blockref_t); 1261 break; 1262 case HAMMER2_BREF_TYPE_VOLUME: 1263 base = &parent->data->voldata. 1264 sroot_blockset.blockref[0]; 1265 count = HAMMER2_SET_COUNT; 1266 break; 1267 case HAMMER2_BREF_TYPE_FREEMAP: 1268 base = &parent->data->blkset.blockref[0]; 1269 count = HAMMER2_SET_COUNT; 1270 break; 1271 default: 1272 panic("hammer2_chain_base_and_count: " 1273 "unrecognized blockref type: %d", 1274 parent->bref.type); 1275 base = NULL; 1276 count = 0; 1277 break; 1278 } 1279 } 1280 *countp = count; 1281 1282 return base; 1283 } 1284 1285 /* 1286 * This counts the number of live blockrefs in a block array and 1287 * also calculates the point at which all remaining blockrefs are empty. 1288 * This routine can only be called on a live chain. 1289 * 1290 * Caller holds the chain locked, but possibly with a shared lock. We 1291 * must use an exclusive spinlock to prevent corruption. 1292 * 1293 * NOTE: Flag is not set until after the count is complete, allowing 1294 * callers to test the flag without holding the spinlock. 1295 * 1296 * NOTE: If base is NULL the related chain is still in the INITIAL 1297 * state and there are no blockrefs to count. 1298 * 1299 * NOTE: live_count may already have some counts accumulated due to 1300 * creation and deletion and could even be initially negative. 1301 */ 1302 void 1303 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1304 hammer2_blockref_t *base, int count) 1305 { 1306 hammer2_spin_ex(&chain->core.spin); 1307 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1308 if (base) { 1309 while (--count >= 0) { 1310 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1311 break; 1312 } 1313 chain->core.live_zero = count + 1; 1314 while (count >= 0) { 1315 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1316 atomic_add_int(&chain->core.live_count, 1317 1); 1318 --count; 1319 } 1320 } else { 1321 chain->core.live_zero = 0; 1322 } 1323 /* else do not modify live_count */ 1324 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1325 } 1326 hammer2_spin_unex(&chain->core.spin); 1327 } 1328 1329 /* 1330 * Resize the chain's physical storage allocation in-place. This function does 1331 * not usually adjust the data pointer and must be followed by (typically) a 1332 * hammer2_chain_modify() call to copy any old data over and adjust the 1333 * data pointer. 1334 * 1335 * Chains can be resized smaller without reallocating the storage. Resizing 1336 * larger will reallocate the storage. Excess or prior storage is reclaimed 1337 * asynchronously at a later time. 1338 * 1339 * An nradix value of 0 is special-cased to mean that the storage should 1340 * be disassociated, that is the chain is being resized to 0 bytes (not 1 1341 * byte). 1342 * 1343 * Must be passed an exclusively locked parent and chain. 1344 * 1345 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1346 * to avoid instantiating a device buffer that conflicts with the vnode data 1347 * buffer. However, because H2 can compress or encrypt data, the chain may 1348 * have a dio assigned to it in those situations, and they do not conflict. 1349 * 1350 * XXX return error if cannot resize. 1351 */ 1352 int 1353 hammer2_chain_resize(hammer2_chain_t *chain, 1354 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1355 int nradix, int flags) 1356 { 1357 hammer2_dev_t *hmp; 1358 size_t obytes; 1359 size_t nbytes; 1360 int error; 1361 1362 hmp = chain->hmp; 1363 1364 /* 1365 * Only data and indirect blocks can be resized for now. 1366 * (The volu root, inodes, and freemap elements use a fixed size). 1367 */ 1368 KKASSERT(chain != &hmp->vchain); 1369 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1370 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1371 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1372 1373 /* 1374 * Nothing to do if the element is already the proper size 1375 */ 1376 obytes = chain->bytes; 1377 nbytes = (nradix) ? (1U << nradix) : 0; 1378 if (obytes == nbytes) 1379 return (chain->error); 1380 1381 /* 1382 * Make sure the old data is instantiated so we can copy it. If this 1383 * is a data block, the device data may be superfluous since the data 1384 * might be in a logical block, but compressed or encrypted data is 1385 * another matter. 1386 * 1387 * NOTE: The modify will set BLKMAPUPD for us if BLKMAPPED is set. 1388 */ 1389 error = hammer2_chain_modify(chain, mtid, dedup_off, 0); 1390 if (error) 1391 return error; 1392 1393 /* 1394 * Reallocate the block, even if making it smaller (because different 1395 * block sizes may be in different regions). 1396 * 1397 * NOTE: Operation does not copy the data and may only be used 1398 * to resize data blocks in-place, or directory entry blocks 1399 * which are about to be modified in some manner. 1400 */ 1401 error = hammer2_freemap_alloc(chain, nbytes); 1402 if (error) 1403 return error; 1404 1405 chain->bytes = nbytes; 1406 1407 /* 1408 * We don't want the followup chain_modify() to try to copy data 1409 * from the old (wrong-sized) buffer. It won't know how much to 1410 * copy. This case should only occur during writes when the 1411 * originator already has the data to write in-hand. 1412 */ 1413 if (chain->dio) { 1414 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1415 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1416 hammer2_io_brelse(&chain->dio); 1417 chain->data = NULL; 1418 } 1419 return (chain->error); 1420 } 1421 1422 /* 1423 * Set the chain modified so its data can be changed by the caller, or 1424 * install deduplicated data. The caller must call this routine for each 1425 * set of modifications it makes, even if the chain is already flagged 1426 * MODIFIED. 1427 * 1428 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1429 * is a CLC (cluster level change) field and is not updated by parent 1430 * propagation during a flush. 1431 * 1432 * Returns an appropriate HAMMER2_ERROR_* code, which will generally reflect 1433 * chain->error except for HAMMER2_ERROR_ENOSPC. If the allocation fails 1434 * due to no space available, HAMMER2_ERROR_ENOSPC is returned and the chain 1435 * remains unmodified with its old data ref intact and chain->error 1436 * unchanged. 1437 * 1438 * Dedup Handling 1439 * 1440 * If the DEDUPABLE flag is set in the chain the storage must be reallocated 1441 * even if the chain is still flagged MODIFIED. In this case the chain's 1442 * DEDUPABLE flag will be cleared once the new storage has been assigned. 1443 * 1444 * If the caller passes a non-zero dedup_off we will use it to assign the 1445 * new storage. The MODIFIED flag will be *CLEARED* in this case, and 1446 * DEDUPABLE will be set (NOTE: the UPDATE flag is always set). The caller 1447 * must not modify the data content upon return. 1448 */ 1449 int 1450 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1451 hammer2_off_t dedup_off, int flags) 1452 { 1453 hammer2_dev_t *hmp; 1454 hammer2_io_t *dio; 1455 int error; 1456 int wasinitial; 1457 int setmodified; 1458 int setupdate; 1459 int newmod; 1460 char *bdata; 1461 1462 hmp = chain->hmp; 1463 KKASSERT(chain->lock.mtx_lock & MTX_EXCLUSIVE); 1464 1465 /* 1466 * Data is not optional for freemap chains (we must always be sure 1467 * to copy the data on COW storage allocations). 1468 */ 1469 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1470 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1471 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1472 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1473 } 1474 1475 /* 1476 * Data must be resolved if already assigned, unless explicitly 1477 * flagged otherwise. If we cannot safety load the data the 1478 * modification fails and we return early. 1479 */ 1480 if (chain->data == NULL && chain->bytes != 0 && 1481 (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1482 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1483 hammer2_chain_load_data(chain); 1484 if (chain->error) 1485 return (chain->error); 1486 } 1487 error = 0; 1488 1489 /* 1490 * Set MODIFIED to indicate that the chain has been modified. A new 1491 * allocation is required when modifying a chain. 1492 * 1493 * Set UPDATE to ensure that the blockref is updated in the parent. 1494 * 1495 * If MODIFIED is already set determine if we can reuse the assigned 1496 * data block or if we need a new data block. 1497 */ 1498 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1499 /* 1500 * Must set modified bit. 1501 */ 1502 atomic_add_long(&hammer2_count_modified_chains, 1); 1503 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1504 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1505 setmodified = 1; 1506 1507 /* 1508 * We may be able to avoid a copy-on-write if the chain's 1509 * check mode is set to NONE and the chain's current 1510 * modify_tid is beyond the last explicit snapshot tid. 1511 * 1512 * This implements HAMMER2's overwrite-in-place feature. 1513 * 1514 * NOTE! This data-block cannot be used as a de-duplication 1515 * source when the check mode is set to NONE. 1516 */ 1517 if ((chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1518 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) && 1519 (chain->flags & HAMMER2_CHAIN_INITIAL) == 0 && 1520 (chain->flags & HAMMER2_CHAIN_DEDUPABLE) == 0 && 1521 HAMMER2_DEC_CHECK(chain->bref.methods) == 1522 HAMMER2_CHECK_NONE && 1523 chain->pmp && 1524 chain->bref.modify_tid > 1525 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1526 /* 1527 * Sector overwrite allowed. 1528 */ 1529 newmod = 0; 1530 } else if ((hmp->hflags & HMNT2_EMERG) && 1531 chain->pmp && 1532 chain->bref.modify_tid > 1533 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1534 /* 1535 * If in emergency delete mode then do a modify-in- 1536 * place on any chain type belonging to the PFS as 1537 * long as it doesn't mess up a snapshot. We might 1538 * be forced to do this anyway a little further down 1539 * in the code if the allocation fails. 1540 * 1541 * Also note that in emergency mode, these modify-in- 1542 * place operations are NOT SAFE. A storage failure, 1543 * power failure, or panic can corrupt the filesystem. 1544 */ 1545 newmod = 0; 1546 } else { 1547 /* 1548 * Sector overwrite not allowed, must copy-on-write. 1549 */ 1550 newmod = 1; 1551 } 1552 } else if (chain->flags & HAMMER2_CHAIN_DEDUPABLE) { 1553 /* 1554 * If the modified chain was registered for dedup we need 1555 * a new allocation. This only happens for delayed-flush 1556 * chains (i.e. which run through the front-end buffer 1557 * cache). 1558 */ 1559 newmod = 1; 1560 setmodified = 0; 1561 } else { 1562 /* 1563 * Already flagged modified, no new allocation is needed. 1564 */ 1565 newmod = 0; 1566 setmodified = 0; 1567 } 1568 1569 /* 1570 * Flag parent update required. 1571 */ 1572 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 1573 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1574 setupdate = 1; 1575 } else { 1576 setupdate = 0; 1577 } 1578 1579 /* 1580 * The XOP code returns held but unlocked focus chains. This 1581 * prevents the chain from being destroyed but does not prevent 1582 * it from being modified. diolk is used to interlock modifications 1583 * against XOP frontend accesses to the focus. 1584 * 1585 * This allows us to theoretically avoid deadlocking the frontend 1586 * if one of the backends lock up by not formally locking the 1587 * focused chain in the frontend. In addition, the synchronization 1588 * code relies on this mechanism to avoid deadlocking concurrent 1589 * synchronization threads. 1590 */ 1591 lockmgr(&chain->diolk, LK_EXCLUSIVE); 1592 1593 /* 1594 * The modification or re-modification requires an allocation and 1595 * possible COW. If an error occurs, the previous content and data 1596 * reference is retained and the modification fails. 1597 * 1598 * If dedup_off is non-zero, the caller is requesting a deduplication 1599 * rather than a modification. The MODIFIED bit is not set and the 1600 * data offset is set to the deduplication offset. The data cannot 1601 * be modified. 1602 * 1603 * NOTE: The dedup offset is allowed to be in a partially free state 1604 * and we must be sure to reset it to a fully allocated state 1605 * to force two bulkfree passes to free it again. 1606 * 1607 * NOTE: Only applicable when chain->bytes != 0. 1608 * 1609 * XXX can a chain already be marked MODIFIED without a data 1610 * assignment? If not, assert here instead of testing the case. 1611 */ 1612 if (chain != &hmp->vchain && chain != &hmp->fchain && 1613 chain->bytes) { 1614 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1615 newmod 1616 ) { 1617 /* 1618 * NOTE: We do not have to remove the dedup 1619 * registration because the area is still 1620 * allocated and the underlying DIO will 1621 * still be flushed. 1622 */ 1623 if (dedup_off) { 1624 chain->bref.data_off = dedup_off; 1625 if ((int)(dedup_off & HAMMER2_OFF_MASK_RADIX)) 1626 chain->bytes = 1 << 1627 (int)(dedup_off & 1628 HAMMER2_OFF_MASK_RADIX); 1629 else 1630 chain->bytes = 0; 1631 chain->error = 0; 1632 atomic_clear_int(&chain->flags, 1633 HAMMER2_CHAIN_MODIFIED); 1634 atomic_add_long(&hammer2_count_modified_chains, 1635 -1); 1636 if (chain->pmp) { 1637 hammer2_pfs_memory_wakeup( 1638 chain->pmp, -1); 1639 } 1640 hammer2_freemap_adjust(hmp, &chain->bref, 1641 HAMMER2_FREEMAP_DORECOVER); 1642 atomic_set_int(&chain->flags, 1643 HAMMER2_CHAIN_DEDUPABLE); 1644 } else { 1645 error = hammer2_freemap_alloc(chain, 1646 chain->bytes); 1647 atomic_clear_int(&chain->flags, 1648 HAMMER2_CHAIN_DEDUPABLE); 1649 1650 /* 1651 * If we are unable to allocate a new block 1652 * but we are in emergency mode, issue a 1653 * warning to the console and reuse the same 1654 * block. 1655 * 1656 * We behave as if the allocation were 1657 * successful. 1658 * 1659 * THIS IS IMPORTANT: These modifications 1660 * are virtually guaranteed to corrupt any 1661 * snapshots related to this filesystem. 1662 */ 1663 if (error && (hmp->hflags & HMNT2_EMERG)) { 1664 error = 0; 1665 chain->bref.flags |= 1666 HAMMER2_BREF_FLAG_EMERG_MIP; 1667 1668 krateprintf(&krate_h2em, 1669 "hammer2: Emergency Mode WARNING: " 1670 "Operation will likely corrupt " 1671 "related snapshot: " 1672 "%016jx.%02x key=%016jx\n", 1673 chain->bref.data_off, 1674 chain->bref.type, 1675 chain->bref.key); 1676 } else if (error == 0) { 1677 chain->bref.flags &= 1678 ~HAMMER2_BREF_FLAG_EMERG_MIP; 1679 } 1680 } 1681 } 1682 } 1683 1684 /* 1685 * Stop here if error. We have to undo any flag bits we might 1686 * have set above. 1687 */ 1688 if (error) { 1689 if (setmodified) { 1690 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1691 atomic_add_long(&hammer2_count_modified_chains, -1); 1692 if (chain->pmp) 1693 hammer2_pfs_memory_wakeup(chain->pmp, -1); 1694 } 1695 if (setupdate) { 1696 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1697 } 1698 lockmgr(&chain->diolk, LK_RELEASE); 1699 1700 return error; 1701 } 1702 1703 /* 1704 * Update mirror_tid and modify_tid. modify_tid is only updated 1705 * if not passed as zero (during flushes, parent propagation passes 1706 * the value 0). 1707 * 1708 * NOTE: chain->pmp could be the device spmp. 1709 */ 1710 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1711 if (mtid) 1712 chain->bref.modify_tid = mtid; 1713 1714 /* 1715 * Set BLKMAPUPD to tell the flush code that an existing blockmap entry 1716 * requires updating as well as to tell the delete code that the 1717 * chain's blockref might not exactly match (in terms of physical size 1718 * or block offset) the one in the parent's blocktable. The base key 1719 * of course will still match. 1720 */ 1721 if (chain->flags & HAMMER2_CHAIN_BLKMAPPED) 1722 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPUPD); 1723 1724 /* 1725 * Short-cut data block handling when the caller does not need an 1726 * actual data reference to (aka OPTDATA), as long as the chain does 1727 * not already have a data pointer to the data and no de-duplication 1728 * occurred. 1729 * 1730 * This generally means that the modifications are being done via the 1731 * logical buffer cache. 1732 * 1733 * NOTE: If deduplication occurred we have to run through the data 1734 * stuff to clear INITIAL, and the caller will likely want to 1735 * assign the check code anyway. Leaving INITIAL set on a 1736 * dedup can be deadly (it can cause the block to be zero'd!). 1737 * 1738 * This code also handles bytes == 0 (most dirents). 1739 */ 1740 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1741 (flags & HAMMER2_MODIFY_OPTDATA) && 1742 chain->data == NULL) { 1743 if (dedup_off == 0) { 1744 KKASSERT(chain->dio == NULL); 1745 goto skip2; 1746 } 1747 } 1748 1749 /* 1750 * Clearing the INITIAL flag (for indirect blocks) indicates that 1751 * we've processed the uninitialized storage allocation. 1752 * 1753 * If this flag is already clear we are likely in a copy-on-write 1754 * situation but we have to be sure NOT to bzero the storage if 1755 * no data is present. 1756 * 1757 * Clearing of NOTTESTED is allowed if the MODIFIED bit is set, 1758 */ 1759 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1760 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1761 wasinitial = 1; 1762 } else { 1763 wasinitial = 0; 1764 } 1765 1766 /* 1767 * Instantiate data buffer and possibly execute COW operation 1768 */ 1769 switch(chain->bref.type) { 1770 case HAMMER2_BREF_TYPE_VOLUME: 1771 case HAMMER2_BREF_TYPE_FREEMAP: 1772 /* 1773 * The data is embedded, no copy-on-write operation is 1774 * needed. 1775 */ 1776 KKASSERT(chain->dio == NULL); 1777 break; 1778 case HAMMER2_BREF_TYPE_DIRENT: 1779 /* 1780 * The data might be fully embedded. 1781 */ 1782 if (chain->bytes == 0) { 1783 KKASSERT(chain->dio == NULL); 1784 break; 1785 } 1786 /* fall through */ 1787 case HAMMER2_BREF_TYPE_INODE: 1788 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1789 case HAMMER2_BREF_TYPE_DATA: 1790 case HAMMER2_BREF_TYPE_INDIRECT: 1791 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1792 /* 1793 * Perform the copy-on-write operation 1794 * 1795 * zero-fill or copy-on-write depending on whether 1796 * chain->data exists or not and set the dirty state for 1797 * the new buffer. hammer2_io_new() will handle the 1798 * zero-fill. 1799 * 1800 * If a dedup_off was supplied this is an existing block 1801 * and no COW, copy, or further modification is required. 1802 */ 1803 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1804 1805 if (wasinitial && dedup_off == 0) { 1806 error = hammer2_io_new(hmp, chain->bref.type, 1807 chain->bref.data_off, 1808 chain->bytes, &dio); 1809 } else { 1810 error = hammer2_io_bread(hmp, chain->bref.type, 1811 chain->bref.data_off, 1812 chain->bytes, &dio); 1813 } 1814 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 1815 1816 /* 1817 * If an I/O error occurs make sure callers cannot accidently 1818 * modify the old buffer's contents and corrupt the filesystem. 1819 * 1820 * NOTE: hammer2_io_data() call issues bkvasync() 1821 */ 1822 if (error) { 1823 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 1824 hmp); 1825 chain->error = HAMMER2_ERROR_EIO; 1826 hammer2_io_brelse(&dio); 1827 hammer2_io_brelse(&chain->dio); 1828 chain->data = NULL; 1829 break; 1830 } 1831 chain->error = 0; 1832 bdata = hammer2_io_data(dio, chain->bref.data_off); 1833 1834 if (chain->data) { 1835 /* 1836 * COW (unless a dedup). 1837 */ 1838 KKASSERT(chain->dio != NULL); 1839 if (chain->data != (void *)bdata && dedup_off == 0) { 1840 bcopy(chain->data, bdata, chain->bytes); 1841 } 1842 } else if (wasinitial == 0 && dedup_off == 0) { 1843 /* 1844 * We have a problem. We were asked to COW but 1845 * we don't have any data to COW with! 1846 */ 1847 panic("hammer2_chain_modify: having a COW %p\n", 1848 chain); 1849 } 1850 1851 /* 1852 * Retire the old buffer, replace with the new. Dirty or 1853 * redirty the new buffer. 1854 * 1855 * WARNING! The system buffer cache may have already flushed 1856 * the buffer, so we must be sure to [re]dirty it 1857 * for further modification. 1858 * 1859 * If dedup_off was supplied, the caller is not 1860 * expected to make any further modification to the 1861 * buffer. 1862 * 1863 * WARNING! hammer2_get_gdata() assumes dio never transitions 1864 * through NULL in order to optimize away unnecessary 1865 * diolk operations. 1866 */ 1867 { 1868 hammer2_io_t *tio; 1869 1870 if ((tio = chain->dio) != NULL) 1871 hammer2_io_bqrelse(&tio); 1872 chain->data = (void *)bdata; 1873 chain->dio = dio; 1874 if (dedup_off == 0) 1875 hammer2_io_setdirty(dio); 1876 } 1877 break; 1878 default: 1879 panic("hammer2_chain_modify: illegal non-embedded type %d", 1880 chain->bref.type); 1881 break; 1882 1883 } 1884 skip2: 1885 /* 1886 * setflush on parent indicating that the parent must recurse down 1887 * to us. Do not call on chain itself which might already have it 1888 * set. 1889 */ 1890 if (chain->parent) 1891 hammer2_chain_setflush(chain->parent); 1892 lockmgr(&chain->diolk, LK_RELEASE); 1893 1894 return (chain->error); 1895 } 1896 1897 /* 1898 * Modify the chain associated with an inode. 1899 */ 1900 int 1901 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 1902 hammer2_tid_t mtid, int flags) 1903 { 1904 int error; 1905 1906 hammer2_inode_modify(ip); 1907 error = hammer2_chain_modify(chain, mtid, 0, flags); 1908 1909 return error; 1910 } 1911 1912 /* 1913 * This function returns the chain at the nearest key within the specified 1914 * range. The returned chain will be referenced but not locked. 1915 * 1916 * This function will recurse through chain->rbtree as necessary and will 1917 * return a *key_nextp suitable for iteration. *key_nextp is only set if 1918 * the iteration value is less than the current value of *key_nextp. 1919 * 1920 * The caller should use (*key_nextp) to calculate the actual range of 1921 * the returned element, which will be (key_beg to *key_nextp - 1), because 1922 * there might be another element which is superior to the returned element 1923 * and overlaps it. 1924 * 1925 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 1926 * chains continue to be returned. On EOF (*key_nextp) may overflow since 1927 * it will wind up being (key_end + 1). 1928 * 1929 * WARNING! Must be called with child's spinlock held. Spinlock remains 1930 * held through the operation. 1931 */ 1932 struct hammer2_chain_find_info { 1933 hammer2_chain_t *best; 1934 hammer2_key_t key_beg; 1935 hammer2_key_t key_end; 1936 hammer2_key_t key_next; 1937 }; 1938 1939 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 1940 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 1941 1942 static 1943 hammer2_chain_t * 1944 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 1945 hammer2_key_t key_beg, hammer2_key_t key_end) 1946 { 1947 struct hammer2_chain_find_info info; 1948 1949 info.best = NULL; 1950 info.key_beg = key_beg; 1951 info.key_end = key_end; 1952 info.key_next = *key_nextp; 1953 1954 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 1955 hammer2_chain_find_cmp, hammer2_chain_find_callback, 1956 &info); 1957 *key_nextp = info.key_next; 1958 #if 0 1959 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 1960 parent, key_beg, key_end, *key_nextp); 1961 #endif 1962 1963 return (info.best); 1964 } 1965 1966 static 1967 int 1968 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 1969 { 1970 struct hammer2_chain_find_info *info = data; 1971 hammer2_key_t child_beg; 1972 hammer2_key_t child_end; 1973 1974 child_beg = child->bref.key; 1975 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 1976 1977 if (child_end < info->key_beg) 1978 return(-1); 1979 if (child_beg > info->key_end) 1980 return(1); 1981 return(0); 1982 } 1983 1984 static 1985 int 1986 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 1987 { 1988 struct hammer2_chain_find_info *info = data; 1989 hammer2_chain_t *best; 1990 hammer2_key_t child_end; 1991 1992 if ((best = info->best) == NULL) { 1993 /* 1994 * No previous best. Assign best 1995 */ 1996 info->best = child; 1997 } else if (best->bref.key <= info->key_beg && 1998 child->bref.key <= info->key_beg) { 1999 /* 2000 * Illegal overlap. 2001 */ 2002 KKASSERT(0); 2003 /*info->best = child;*/ 2004 } else if (child->bref.key < best->bref.key) { 2005 /* 2006 * Child has a nearer key and best is not flush with key_beg. 2007 * Set best to child. Truncate key_next to the old best key. 2008 */ 2009 info->best = child; 2010 if (info->key_next > best->bref.key || info->key_next == 0) 2011 info->key_next = best->bref.key; 2012 } else if (child->bref.key == best->bref.key) { 2013 /* 2014 * If our current best is flush with the child then this 2015 * is an illegal overlap. 2016 * 2017 * key_next will automatically be limited to the smaller of 2018 * the two end-points. 2019 */ 2020 KKASSERT(0); 2021 info->best = child; 2022 } else { 2023 /* 2024 * Keep the current best but truncate key_next to the child's 2025 * base. 2026 * 2027 * key_next will also automatically be limited to the smaller 2028 * of the two end-points (probably not necessary for this case 2029 * but we do it anyway). 2030 */ 2031 if (info->key_next > child->bref.key || info->key_next == 0) 2032 info->key_next = child->bref.key; 2033 } 2034 2035 /* 2036 * Always truncate key_next based on child's end-of-range. 2037 */ 2038 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 2039 if (child_end && (info->key_next > child_end || info->key_next == 0)) 2040 info->key_next = child_end; 2041 2042 return(0); 2043 } 2044 2045 /* 2046 * Retrieve the specified chain from a media blockref, creating the 2047 * in-memory chain structure which reflects it. The returned chain is 2048 * held and locked according to (how) (HAMMER2_RESOLVE_*). The caller must 2049 * handle crc-checks and so forth, and should check chain->error before 2050 * assuming that the data is good. 2051 * 2052 * To handle insertion races pass the INSERT_RACE flag along with the 2053 * generation number of the core. NULL will be returned if the generation 2054 * number changes before we have a chance to insert the chain. Insert 2055 * races can occur because the parent might be held shared. 2056 * 2057 * Caller must hold the parent locked shared or exclusive since we may 2058 * need the parent's bref array to find our block. 2059 * 2060 * WARNING! chain->pmp is always set to NULL for any chain representing 2061 * part of the super-root topology. 2062 */ 2063 hammer2_chain_t * 2064 hammer2_chain_get(hammer2_chain_t *parent, int generation, 2065 hammer2_blockref_t *bref, int how) 2066 { 2067 hammer2_dev_t *hmp = parent->hmp; 2068 hammer2_chain_t *chain; 2069 int error; 2070 2071 /* 2072 * Allocate a chain structure representing the existing media 2073 * entry. Resulting chain has one ref and is not locked. 2074 */ 2075 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 2076 chain = hammer2_chain_alloc(hmp, NULL, bref); 2077 else 2078 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 2079 /* ref'd chain returned */ 2080 2081 /* 2082 * Flag that the chain is in the parent's blockmap so delete/flush 2083 * knows what to do with it. 2084 */ 2085 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED); 2086 2087 /* 2088 * chain must be locked to avoid unexpected ripouts 2089 */ 2090 hammer2_chain_lock(chain, how); 2091 2092 /* 2093 * Link the chain into its parent. A spinlock is required to safely 2094 * access the RBTREE, and it is possible to collide with another 2095 * hammer2_chain_get() operation because the caller might only hold 2096 * a shared lock on the parent. 2097 * 2098 * NOTE: Get races can occur quite often when we distribute 2099 * asynchronous read-aheads across multiple threads. 2100 */ 2101 KKASSERT(parent->refs > 0); 2102 error = hammer2_chain_insert(parent, chain, 2103 HAMMER2_CHAIN_INSERT_SPIN | 2104 HAMMER2_CHAIN_INSERT_RACE, 2105 generation); 2106 if (error) { 2107 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 2108 /*kprintf("chain %p get race\n", chain);*/ 2109 hammer2_chain_unlock(chain); 2110 hammer2_chain_drop(chain); 2111 chain = NULL; 2112 } else { 2113 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2114 } 2115 2116 /* 2117 * Return our new chain referenced but not locked, or NULL if 2118 * a race occurred. 2119 */ 2120 return (chain); 2121 } 2122 2123 /* 2124 * Lookup initialization/completion API 2125 */ 2126 hammer2_chain_t * 2127 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 2128 { 2129 hammer2_chain_ref(parent); 2130 if (flags & HAMMER2_LOOKUP_SHARED) { 2131 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 2132 HAMMER2_RESOLVE_SHARED); 2133 } else { 2134 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2135 } 2136 return (parent); 2137 } 2138 2139 void 2140 hammer2_chain_lookup_done(hammer2_chain_t *parent) 2141 { 2142 if (parent) { 2143 hammer2_chain_unlock(parent); 2144 hammer2_chain_drop(parent); 2145 } 2146 } 2147 2148 /* 2149 * Take the locked chain and return a locked parent. The chain remains 2150 * locked on return, but may have to be temporarily unlocked to acquire 2151 * the parent. Because of this, (chain) must be stable and cannot be 2152 * deleted while it was temporarily unlocked (typically means that (chain) 2153 * is an inode). 2154 * 2155 * Pass HAMMER2_RESOLVE_* flags in flags. 2156 * 2157 * This will work even if the chain is errored, and the caller can check 2158 * parent->error on return if desired since the parent will be locked. 2159 * 2160 * This function handles the lock order reversal. 2161 */ 2162 hammer2_chain_t * 2163 hammer2_chain_getparent(hammer2_chain_t *chain, int flags) 2164 { 2165 hammer2_chain_t *parent; 2166 2167 /* 2168 * Be careful of order, chain must be unlocked before parent 2169 * is locked below to avoid a deadlock. Try it trivially first. 2170 */ 2171 parent = chain->parent; 2172 if (parent == NULL) 2173 panic("hammer2_chain_getparent: no parent"); 2174 hammer2_chain_ref(parent); 2175 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) 2176 return parent; 2177 2178 for (;;) { 2179 hammer2_chain_unlock(chain); 2180 hammer2_chain_lock(parent, flags); 2181 hammer2_chain_lock(chain, flags); 2182 2183 /* 2184 * Parent relinking races are quite common. We have to get 2185 * it right or we will blow up the block table. 2186 */ 2187 if (chain->parent == parent) 2188 break; 2189 hammer2_chain_unlock(parent); 2190 hammer2_chain_drop(parent); 2191 cpu_ccfence(); 2192 parent = chain->parent; 2193 if (parent == NULL) 2194 panic("hammer2_chain_getparent: no parent"); 2195 hammer2_chain_ref(parent); 2196 } 2197 return parent; 2198 } 2199 2200 /* 2201 * Take the locked chain and return a locked parent. The chain is unlocked 2202 * and dropped. *chainp is set to the returned parent as a convenience. 2203 * Pass HAMMER2_RESOLVE_* flags in flags. 2204 * 2205 * This will work even if the chain is errored, and the caller can check 2206 * parent->error on return if desired since the parent will be locked. 2207 * 2208 * The chain does NOT need to be stable. We use a tracking structure 2209 * to track the expected parent if the chain is deleted out from under us. 2210 * 2211 * This function handles the lock order reversal. 2212 */ 2213 hammer2_chain_t * 2214 hammer2_chain_repparent(hammer2_chain_t **chainp, int flags) 2215 { 2216 hammer2_chain_t *chain; 2217 hammer2_chain_t *parent; 2218 struct hammer2_reptrack reptrack; 2219 struct hammer2_reptrack **repp; 2220 2221 /* 2222 * Be careful of order, chain must be unlocked before parent 2223 * is locked below to avoid a deadlock. Try it trivially first. 2224 */ 2225 chain = *chainp; 2226 parent = chain->parent; 2227 if (parent == NULL) { 2228 hammer2_spin_unex(&chain->core.spin); 2229 panic("hammer2_chain_repparent: no parent"); 2230 } 2231 hammer2_chain_ref(parent); 2232 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) { 2233 hammer2_chain_unlock(chain); 2234 hammer2_chain_drop(chain); 2235 *chainp = parent; 2236 2237 return parent; 2238 } 2239 2240 /* 2241 * Ok, now it gets a bit nasty. There are multiple situations where 2242 * the parent might be in the middle of a deletion, or where the child 2243 * (chain) might be deleted the instant we let go of its lock. 2244 * We can potentially end up in a no-win situation! 2245 * 2246 * In particular, the indirect_maintenance() case can cause these 2247 * situations. 2248 * 2249 * To deal with this we install a reptrack structure in the parent 2250 * This reptrack structure 'owns' the parent ref and will automatically 2251 * migrate to the parent's parent if the parent is deleted permanently. 2252 */ 2253 hammer2_spin_init(&reptrack.spin, "h2reptrk"); 2254 reptrack.chain = parent; 2255 hammer2_chain_ref(parent); /* for the reptrack */ 2256 2257 hammer2_spin_ex(&parent->core.spin); 2258 reptrack.next = parent->core.reptrack; 2259 parent->core.reptrack = &reptrack; 2260 hammer2_spin_unex(&parent->core.spin); 2261 2262 hammer2_chain_unlock(chain); 2263 hammer2_chain_drop(chain); 2264 chain = NULL; /* gone */ 2265 2266 /* 2267 * At the top of this loop, chain is gone and parent is refd both 2268 * by us explicitly AND via our reptrack. We are attempting to 2269 * lock parent. 2270 */ 2271 for (;;) { 2272 hammer2_chain_lock(parent, flags); 2273 2274 if (reptrack.chain == parent) 2275 break; 2276 hammer2_chain_unlock(parent); 2277 hammer2_chain_drop(parent); 2278 2279 kprintf("hammer2: debug REPTRACK %p->%p\n", 2280 parent, reptrack.chain); 2281 hammer2_spin_ex(&reptrack.spin); 2282 parent = reptrack.chain; 2283 hammer2_chain_ref(parent); 2284 hammer2_spin_unex(&reptrack.spin); 2285 } 2286 2287 /* 2288 * Once parent is locked and matches our reptrack, our reptrack 2289 * will be stable and we have our parent. We can unlink our 2290 * reptrack. 2291 * 2292 * WARNING! Remember that the chain lock might be shared. Chains 2293 * locked shared have stable parent linkages. 2294 */ 2295 hammer2_spin_ex(&parent->core.spin); 2296 repp = &parent->core.reptrack; 2297 while (*repp != &reptrack) 2298 repp = &(*repp)->next; 2299 *repp = reptrack.next; 2300 hammer2_spin_unex(&parent->core.spin); 2301 2302 hammer2_chain_drop(parent); /* reptrack ref */ 2303 *chainp = parent; /* return parent lock+ref */ 2304 2305 return parent; 2306 } 2307 2308 /* 2309 * Dispose of any linked reptrack structures in (chain) by shifting them to 2310 * (parent). Both (chain) and (parent) must be exclusively locked. 2311 * 2312 * This is interlocked against any children of (chain) on the other side. 2313 * No children so remain as-of when this is called so we can test 2314 * core.reptrack without holding the spin-lock. 2315 * 2316 * Used whenever the caller intends to permanently delete chains related 2317 * to topological recursions (BREF_TYPE_INDIRECT, BREF_TYPE_FREEMAP_NODE), 2318 * where the chains underneath the node being deleted are given a new parent 2319 * above the node being deleted. 2320 */ 2321 static 2322 void 2323 hammer2_chain_repchange(hammer2_chain_t *parent, hammer2_chain_t *chain) 2324 { 2325 struct hammer2_reptrack *reptrack; 2326 2327 KKASSERT(chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)); 2328 while (chain->core.reptrack) { 2329 hammer2_spin_ex(&parent->core.spin); 2330 hammer2_spin_ex(&chain->core.spin); 2331 reptrack = chain->core.reptrack; 2332 if (reptrack == NULL) { 2333 hammer2_spin_unex(&chain->core.spin); 2334 hammer2_spin_unex(&parent->core.spin); 2335 break; 2336 } 2337 hammer2_spin_ex(&reptrack->spin); 2338 chain->core.reptrack = reptrack->next; 2339 reptrack->chain = parent; 2340 reptrack->next = parent->core.reptrack; 2341 parent->core.reptrack = reptrack; 2342 hammer2_chain_ref(parent); /* reptrack */ 2343 2344 hammer2_spin_unex(&chain->core.spin); 2345 hammer2_spin_unex(&parent->core.spin); 2346 kprintf("hammer2: debug repchange %p %p->%p\n", 2347 reptrack, chain, parent); 2348 hammer2_chain_drop(chain); /* reptrack */ 2349 } 2350 } 2351 2352 /* 2353 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 2354 * (*parentp) typically points to an inode but can also point to a related 2355 * indirect block and this function will recurse upwards and find the inode 2356 * or the nearest undeleted indirect block covering the key range. 2357 * 2358 * This function unconditionally sets *errorp, replacing any previous value. 2359 * 2360 * (*parentp) must be exclusive or shared locked (depending on flags) and 2361 * referenced and can be an inode or an existing indirect block within the 2362 * inode. 2363 * 2364 * If (*parent) is errored out, this function will not attempt to recurse 2365 * the radix tree and will return NULL along with an appropriate *errorp. 2366 * If NULL is returned and *errorp is 0, the requested lookup could not be 2367 * located. 2368 * 2369 * On return (*parentp) will be modified to point at the deepest parent chain 2370 * element encountered during the search, as a helper for an insertion or 2371 * deletion. 2372 * 2373 * The new (*parentp) will be locked shared or exclusive (depending on flags), 2374 * and referenced, and the old will be unlocked and dereferenced (no change 2375 * if they are both the same). This is particularly important if the caller 2376 * wishes to insert a new chain, (*parentp) will be set properly even if NULL 2377 * is returned, as long as no error occurred. 2378 * 2379 * The matching chain will be returned locked according to flags. 2380 * 2381 * -- 2382 * 2383 * NULL is returned if no match was found, but (*parentp) will still 2384 * potentially be adjusted. 2385 * 2386 * On return (*key_nextp) will point to an iterative value for key_beg. 2387 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 2388 * 2389 * This function will also recurse up the chain if the key is not within the 2390 * current parent's range. (*parentp) can never be set to NULL. An iteration 2391 * can simply allow (*parentp) to float inside the loop. 2392 * 2393 * NOTE! chain->data is not always resolved. By default it will not be 2394 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 2395 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 2396 * BREF_TYPE_DATA as the device buffer can alias the logical file 2397 * buffer). 2398 */ 2399 hammer2_chain_t * 2400 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 2401 hammer2_key_t key_beg, hammer2_key_t key_end, 2402 int *errorp, int flags) 2403 { 2404 hammer2_chain_t *parent; 2405 hammer2_chain_t *chain; 2406 hammer2_blockref_t *base; 2407 hammer2_blockref_t *bref; 2408 hammer2_blockref_t bsave; 2409 hammer2_key_t scan_beg; 2410 hammer2_key_t scan_end; 2411 int count = 0; 2412 int how_always = HAMMER2_RESOLVE_ALWAYS; 2413 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2414 int how; 2415 int generation; 2416 int maxloops = 300000; 2417 2418 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2419 how_maybe = how_always; 2420 how = HAMMER2_RESOLVE_ALWAYS; 2421 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2422 how = HAMMER2_RESOLVE_NEVER; 2423 } else { 2424 how = HAMMER2_RESOLVE_MAYBE; 2425 } 2426 if (flags & HAMMER2_LOOKUP_SHARED) { 2427 how_maybe |= HAMMER2_RESOLVE_SHARED; 2428 how_always |= HAMMER2_RESOLVE_SHARED; 2429 how |= HAMMER2_RESOLVE_SHARED; 2430 } 2431 2432 /* 2433 * Recurse (*parentp) upward if necessary until the parent completely 2434 * encloses the key range or we hit the inode. 2435 * 2436 * Handle races against the flusher deleting indirect nodes on its 2437 * way back up by continuing to recurse upward past the deletion. 2438 */ 2439 parent = *parentp; 2440 *errorp = 0; 2441 2442 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2443 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2444 scan_beg = parent->bref.key; 2445 scan_end = scan_beg + 2446 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2447 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) { 2448 if (key_beg >= scan_beg && key_end <= scan_end) 2449 break; 2450 } 2451 parent = hammer2_chain_repparent(parentp, how_maybe); 2452 } 2453 again: 2454 if (--maxloops == 0) 2455 panic("hammer2_chain_lookup: maxloops"); 2456 2457 /* 2458 * MATCHIND case that does not require parent->data (do prior to 2459 * parent->error check). 2460 */ 2461 switch(parent->bref.type) { 2462 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2463 case HAMMER2_BREF_TYPE_INDIRECT: 2464 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2465 scan_beg = parent->bref.key; 2466 scan_end = scan_beg + 2467 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2468 if (key_beg == scan_beg && key_end == scan_end) { 2469 chain = parent; 2470 hammer2_chain_ref(chain); 2471 hammer2_chain_lock(chain, how_maybe); 2472 *key_nextp = scan_end + 1; 2473 goto done; 2474 } 2475 } 2476 break; 2477 default: 2478 break; 2479 } 2480 2481 /* 2482 * No lookup is possible if the parent is errored. We delayed 2483 * this check as long as we could to ensure that the parent backup, 2484 * embedded data, and MATCHIND code could still execute. 2485 */ 2486 if (parent->error) { 2487 *errorp = parent->error; 2488 return NULL; 2489 } 2490 2491 /* 2492 * Locate the blockref array. Currently we do a fully associative 2493 * search through the array. 2494 */ 2495 switch(parent->bref.type) { 2496 case HAMMER2_BREF_TYPE_INODE: 2497 /* 2498 * Special shortcut for embedded data returns the inode 2499 * itself. Callers must detect this condition and access 2500 * the embedded data (the strategy code does this for us). 2501 * 2502 * This is only applicable to regular files and softlinks. 2503 * 2504 * We need a second lock on parent. Since we already have 2505 * a lock we must pass LOCKAGAIN to prevent unexpected 2506 * blocking (we don't want to block on a second shared 2507 * ref if an exclusive lock is pending) 2508 */ 2509 if (parent->data->ipdata.meta.op_flags & 2510 HAMMER2_OPFLAG_DIRECTDATA) { 2511 if (flags & HAMMER2_LOOKUP_NODIRECT) { 2512 chain = NULL; 2513 *key_nextp = key_end + 1; 2514 goto done; 2515 } 2516 hammer2_chain_ref(parent); 2517 hammer2_chain_lock(parent, how_always | 2518 HAMMER2_RESOLVE_LOCKAGAIN); 2519 *key_nextp = key_end + 1; 2520 return (parent); 2521 } 2522 base = &parent->data->ipdata.u.blockset.blockref[0]; 2523 count = HAMMER2_SET_COUNT; 2524 break; 2525 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2526 case HAMMER2_BREF_TYPE_INDIRECT: 2527 /* 2528 * Optimize indirect blocks in the INITIAL state to avoid 2529 * I/O. 2530 * 2531 * Debugging: Enter permanent wait state instead of 2532 * panicing on unexpectedly NULL data for the moment. 2533 */ 2534 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2535 base = NULL; 2536 } else { 2537 if (parent->data == NULL) { 2538 kprintf("hammer2: unexpected NULL data " 2539 "on %p\n", parent); 2540 while (1) 2541 tsleep(parent, 0, "xxx", 0); 2542 } 2543 base = &parent->data->npdata[0]; 2544 } 2545 count = parent->bytes / sizeof(hammer2_blockref_t); 2546 break; 2547 case HAMMER2_BREF_TYPE_VOLUME: 2548 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2549 count = HAMMER2_SET_COUNT; 2550 break; 2551 case HAMMER2_BREF_TYPE_FREEMAP: 2552 base = &parent->data->blkset.blockref[0]; 2553 count = HAMMER2_SET_COUNT; 2554 break; 2555 default: 2556 panic("hammer2_chain_lookup: unrecognized " 2557 "blockref(B) type: %d", 2558 parent->bref.type); 2559 base = NULL; /* safety */ 2560 count = 0; /* safety */ 2561 break; 2562 } 2563 2564 /* 2565 * Merged scan to find next candidate. 2566 * 2567 * hammer2_base_*() functions require the parent->core.live_* fields 2568 * to be synchronized. 2569 * 2570 * We need to hold the spinlock to access the block array and RB tree 2571 * and to interlock chain creation. 2572 */ 2573 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2574 hammer2_chain_countbrefs(parent, base, count); 2575 2576 /* 2577 * Combined search 2578 */ 2579 hammer2_spin_ex(&parent->core.spin); 2580 chain = hammer2_combined_find(parent, base, count, 2581 key_nextp, 2582 key_beg, key_end, 2583 &bref); 2584 generation = parent->core.generation; 2585 2586 /* 2587 * Exhausted parent chain, iterate. 2588 */ 2589 if (bref == NULL) { 2590 KKASSERT(chain == NULL); 2591 hammer2_spin_unex(&parent->core.spin); 2592 if (key_beg == key_end) /* short cut single-key case */ 2593 return (NULL); 2594 2595 /* 2596 * Stop if we reached the end of the iteration. 2597 */ 2598 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2599 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2600 return (NULL); 2601 } 2602 2603 /* 2604 * Calculate next key, stop if we reached the end of the 2605 * iteration, otherwise go up one level and loop. 2606 */ 2607 key_beg = parent->bref.key + 2608 ((hammer2_key_t)1 << parent->bref.keybits); 2609 if (key_beg == 0 || key_beg > key_end) 2610 return (NULL); 2611 parent = hammer2_chain_repparent(parentp, how_maybe); 2612 goto again; 2613 } 2614 2615 /* 2616 * Selected from blockref or in-memory chain. 2617 */ 2618 bsave = *bref; 2619 if (chain == NULL) { 2620 hammer2_spin_unex(&parent->core.spin); 2621 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2622 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2623 chain = hammer2_chain_get(parent, generation, 2624 &bsave, how_maybe); 2625 } else { 2626 chain = hammer2_chain_get(parent, generation, 2627 &bsave, how); 2628 } 2629 if (chain == NULL) 2630 goto again; 2631 } else { 2632 hammer2_chain_ref(chain); 2633 hammer2_spin_unex(&parent->core.spin); 2634 2635 /* 2636 * chain is referenced but not locked. We must lock the 2637 * chain to obtain definitive state. 2638 */ 2639 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2640 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2641 hammer2_chain_lock(chain, how_maybe); 2642 } else { 2643 hammer2_chain_lock(chain, how); 2644 } 2645 KKASSERT(chain->parent == parent); 2646 } 2647 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 2648 chain->parent != parent) { 2649 hammer2_chain_unlock(chain); 2650 hammer2_chain_drop(chain); 2651 chain = NULL; /* SAFETY */ 2652 goto again; 2653 } 2654 2655 2656 /* 2657 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2658 * 2659 * NOTE: Chain's key range is not relevant as there might be 2660 * one-offs within the range that are not deleted. 2661 * 2662 * NOTE: Lookups can race delete-duplicate because 2663 * delete-duplicate does not lock the parent's core 2664 * (they just use the spinlock on the core). 2665 */ 2666 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2667 kprintf("skip deleted chain %016jx.%02x key=%016jx\n", 2668 chain->bref.data_off, chain->bref.type, 2669 chain->bref.key); 2670 hammer2_chain_unlock(chain); 2671 hammer2_chain_drop(chain); 2672 chain = NULL; /* SAFETY */ 2673 key_beg = *key_nextp; 2674 if (key_beg == 0 || key_beg > key_end) 2675 return(NULL); 2676 goto again; 2677 } 2678 2679 /* 2680 * If the chain element is an indirect block it becomes the new 2681 * parent and we loop on it. We must maintain our top-down locks 2682 * to prevent the flusher from interfering (i.e. doing a 2683 * delete-duplicate and leaving us recursing down a deleted chain). 2684 * 2685 * The parent always has to be locked with at least RESOLVE_MAYBE 2686 * so we can access its data. It might need a fixup if the caller 2687 * passed incompatible flags. Be careful not to cause a deadlock 2688 * as a data-load requires an exclusive lock. 2689 * 2690 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2691 * range is within the requested key range we return the indirect 2692 * block and do NOT loop. This is usually only used to acquire 2693 * freemap nodes. 2694 */ 2695 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2696 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2697 hammer2_chain_unlock(parent); 2698 hammer2_chain_drop(parent); 2699 *parentp = parent = chain; 2700 chain = NULL; /* SAFETY */ 2701 goto again; 2702 } 2703 done: 2704 /* 2705 * All done, return the locked chain. 2706 * 2707 * If the caller does not want a locked chain, replace the lock with 2708 * a ref. Perhaps this can eventually be optimized to not obtain the 2709 * lock in the first place for situations where the data does not 2710 * need to be resolved. 2711 * 2712 * NOTE! A chain->error must be tested by the caller upon return. 2713 * *errorp is only set based on issues which occur while 2714 * trying to reach the chain. 2715 */ 2716 return (chain); 2717 } 2718 2719 /* 2720 * After having issued a lookup we can iterate all matching keys. 2721 * 2722 * If chain is non-NULL we continue the iteration from just after it's index. 2723 * 2724 * If chain is NULL we assume the parent was exhausted and continue the 2725 * iteration at the next parent. 2726 * 2727 * If a fatal error occurs (typically an I/O error), a dummy chain is 2728 * returned with chain->error and error-identifying information set. This 2729 * chain will assert if you try to do anything fancy with it. 2730 * 2731 * XXX Depending on where the error occurs we should allow continued iteration. 2732 * 2733 * parent must be locked on entry and remains locked throughout. chain's 2734 * lock status must match flags. Chain is always at least referenced. 2735 * 2736 * WARNING! The MATCHIND flag does not apply to this function. 2737 */ 2738 hammer2_chain_t * 2739 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2740 hammer2_key_t *key_nextp, 2741 hammer2_key_t key_beg, hammer2_key_t key_end, 2742 int *errorp, int flags) 2743 { 2744 hammer2_chain_t *parent; 2745 int how_maybe; 2746 2747 /* 2748 * Calculate locking flags for upward recursion. 2749 */ 2750 how_maybe = HAMMER2_RESOLVE_MAYBE; 2751 if (flags & HAMMER2_LOOKUP_SHARED) 2752 how_maybe |= HAMMER2_RESOLVE_SHARED; 2753 2754 parent = *parentp; 2755 *errorp = 0; 2756 2757 /* 2758 * Calculate the next index and recalculate the parent if necessary. 2759 */ 2760 if (chain) { 2761 key_beg = chain->bref.key + 2762 ((hammer2_key_t)1 << chain->bref.keybits); 2763 hammer2_chain_unlock(chain); 2764 hammer2_chain_drop(chain); 2765 2766 /* 2767 * chain invalid past this point, but we can still do a 2768 * pointer comparison w/parent. 2769 * 2770 * Any scan where the lookup returned degenerate data embedded 2771 * in the inode has an invalid index and must terminate. 2772 */ 2773 if (chain == parent) 2774 return(NULL); 2775 if (key_beg == 0 || key_beg > key_end) 2776 return(NULL); 2777 chain = NULL; 2778 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2779 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2780 /* 2781 * We reached the end of the iteration. 2782 */ 2783 return (NULL); 2784 } else { 2785 /* 2786 * Continue iteration with next parent unless the current 2787 * parent covers the range. 2788 * 2789 * (This also handles the case of a deleted, empty indirect 2790 * node). 2791 */ 2792 key_beg = parent->bref.key + 2793 ((hammer2_key_t)1 << parent->bref.keybits); 2794 if (key_beg == 0 || key_beg > key_end) 2795 return (NULL); 2796 parent = hammer2_chain_repparent(parentp, how_maybe); 2797 } 2798 2799 /* 2800 * And execute 2801 */ 2802 return (hammer2_chain_lookup(parentp, key_nextp, 2803 key_beg, key_end, 2804 errorp, flags)); 2805 } 2806 2807 /* 2808 * Caller wishes to iterate chains under parent, loading new chains into 2809 * chainp. Caller must initialize *chainp to NULL and *firstp to 1, and 2810 * then call hammer2_chain_scan() repeatedly until a non-zero return. 2811 * During the scan, *firstp will be set to 0 and (*chainp) will be replaced 2812 * with the returned chain for the scan. The returned *chainp will be 2813 * locked and referenced. Any prior contents will be unlocked and dropped. 2814 * 2815 * Caller should check the return value. A normal scan EOF will return 2816 * exactly HAMMER2_ERROR_EOF. Any other non-zero value indicates an 2817 * error trying to access parent data. Any error in the returned chain 2818 * must be tested separately by the caller. 2819 * 2820 * (*chainp) is dropped on each scan, but will only be set if the returned 2821 * element itself can recurse. Leaf elements are NOT resolved, loaded, or 2822 * returned via *chainp. The caller will get their bref only. 2823 * 2824 * The raw scan function is similar to lookup/next but does not seek to a key. 2825 * Blockrefs are iterated via first_bref = (parent, NULL) and 2826 * next_chain = (parent, bref). 2827 * 2828 * The passed-in parent must be locked and its data resolved. The function 2829 * nominally returns a locked and referenced *chainp != NULL for chains 2830 * the caller might need to recurse on (and will dipose of any *chainp passed 2831 * in). The caller must check the chain->bref.type either way. 2832 */ 2833 int 2834 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 2835 hammer2_blockref_t *bref, int *firstp, 2836 int flags) 2837 { 2838 hammer2_blockref_t *base; 2839 hammer2_blockref_t *bref_ptr; 2840 hammer2_key_t key; 2841 hammer2_key_t next_key; 2842 hammer2_chain_t *chain = NULL; 2843 int count = 0; 2844 int how; 2845 int generation; 2846 int maxloops = 300000; 2847 int error; 2848 2849 error = 0; 2850 2851 /* 2852 * Scan flags borrowed from lookup. 2853 */ 2854 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2855 how = HAMMER2_RESOLVE_ALWAYS; 2856 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2857 how = HAMMER2_RESOLVE_NEVER; 2858 } else { 2859 how = HAMMER2_RESOLVE_MAYBE; 2860 } 2861 if (flags & HAMMER2_LOOKUP_SHARED) { 2862 how |= HAMMER2_RESOLVE_SHARED; 2863 } 2864 2865 /* 2866 * Calculate key to locate first/next element, unlocking the previous 2867 * element as we go. Be careful, the key calculation can overflow. 2868 * 2869 * (also reset bref to NULL) 2870 */ 2871 if (*firstp) { 2872 key = 0; 2873 *firstp = 0; 2874 } else { 2875 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 2876 if ((chain = *chainp) != NULL) { 2877 *chainp = NULL; 2878 hammer2_chain_unlock(chain); 2879 hammer2_chain_drop(chain); 2880 chain = NULL; 2881 } 2882 if (key == 0) { 2883 error |= HAMMER2_ERROR_EOF; 2884 goto done; 2885 } 2886 } 2887 2888 again: 2889 if (parent->error) { 2890 error = parent->error; 2891 goto done; 2892 } 2893 if (--maxloops == 0) 2894 panic("hammer2_chain_scan: maxloops"); 2895 2896 /* 2897 * Locate the blockref array. Currently we do a fully associative 2898 * search through the array. 2899 */ 2900 switch(parent->bref.type) { 2901 case HAMMER2_BREF_TYPE_INODE: 2902 /* 2903 * An inode with embedded data has no sub-chains. 2904 * 2905 * WARNING! Bulk scan code may pass a static chain marked 2906 * as BREF_TYPE_INODE with a copy of the volume 2907 * root blockset to snapshot the volume. 2908 */ 2909 if (parent->data->ipdata.meta.op_flags & 2910 HAMMER2_OPFLAG_DIRECTDATA) { 2911 error |= HAMMER2_ERROR_EOF; 2912 goto done; 2913 } 2914 base = &parent->data->ipdata.u.blockset.blockref[0]; 2915 count = HAMMER2_SET_COUNT; 2916 break; 2917 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2918 case HAMMER2_BREF_TYPE_INDIRECT: 2919 /* 2920 * Optimize indirect blocks in the INITIAL state to avoid 2921 * I/O. 2922 */ 2923 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2924 base = NULL; 2925 } else { 2926 if (parent->data == NULL) 2927 panic("parent->data is NULL"); 2928 base = &parent->data->npdata[0]; 2929 } 2930 count = parent->bytes / sizeof(hammer2_blockref_t); 2931 break; 2932 case HAMMER2_BREF_TYPE_VOLUME: 2933 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2934 count = HAMMER2_SET_COUNT; 2935 break; 2936 case HAMMER2_BREF_TYPE_FREEMAP: 2937 base = &parent->data->blkset.blockref[0]; 2938 count = HAMMER2_SET_COUNT; 2939 break; 2940 default: 2941 panic("hammer2_chain_scan: unrecognized blockref type: %d", 2942 parent->bref.type); 2943 base = NULL; /* safety */ 2944 count = 0; /* safety */ 2945 break; 2946 } 2947 2948 /* 2949 * Merged scan to find next candidate. 2950 * 2951 * hammer2_base_*() functions require the parent->core.live_* fields 2952 * to be synchronized. 2953 * 2954 * We need to hold the spinlock to access the block array and RB tree 2955 * and to interlock chain creation. 2956 */ 2957 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2958 hammer2_chain_countbrefs(parent, base, count); 2959 2960 next_key = 0; 2961 bref_ptr = NULL; 2962 hammer2_spin_ex(&parent->core.spin); 2963 chain = hammer2_combined_find(parent, base, count, 2964 &next_key, 2965 key, HAMMER2_KEY_MAX, 2966 &bref_ptr); 2967 generation = parent->core.generation; 2968 2969 /* 2970 * Exhausted parent chain, we're done. 2971 */ 2972 if (bref_ptr == NULL) { 2973 hammer2_spin_unex(&parent->core.spin); 2974 KKASSERT(chain == NULL); 2975 error |= HAMMER2_ERROR_EOF; 2976 goto done; 2977 } 2978 2979 /* 2980 * Copy into the supplied stack-based blockref. 2981 */ 2982 *bref = *bref_ptr; 2983 2984 /* 2985 * Selected from blockref or in-memory chain. 2986 */ 2987 if (chain == NULL) { 2988 switch(bref->type) { 2989 case HAMMER2_BREF_TYPE_INODE: 2990 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2991 case HAMMER2_BREF_TYPE_INDIRECT: 2992 case HAMMER2_BREF_TYPE_VOLUME: 2993 case HAMMER2_BREF_TYPE_FREEMAP: 2994 /* 2995 * Recursion, always get the chain 2996 */ 2997 hammer2_spin_unex(&parent->core.spin); 2998 chain = hammer2_chain_get(parent, generation, 2999 bref, how); 3000 if (chain == NULL) 3001 goto again; 3002 break; 3003 default: 3004 /* 3005 * No recursion, do not waste time instantiating 3006 * a chain, just iterate using the bref. 3007 */ 3008 hammer2_spin_unex(&parent->core.spin); 3009 break; 3010 } 3011 } else { 3012 /* 3013 * Recursion or not we need the chain in order to supply 3014 * the bref. 3015 */ 3016 hammer2_chain_ref(chain); 3017 hammer2_spin_unex(&parent->core.spin); 3018 hammer2_chain_lock(chain, how); 3019 } 3020 if (chain && 3021 (bcmp(bref, &chain->bref, sizeof(*bref)) || 3022 chain->parent != parent)) { 3023 hammer2_chain_unlock(chain); 3024 hammer2_chain_drop(chain); 3025 chain = NULL; 3026 goto again; 3027 } 3028 3029 /* 3030 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 3031 * 3032 * NOTE: chain's key range is not relevant as there might be 3033 * one-offs within the range that are not deleted. 3034 * 3035 * NOTE: XXX this could create problems with scans used in 3036 * situations other than mount-time recovery. 3037 * 3038 * NOTE: Lookups can race delete-duplicate because 3039 * delete-duplicate does not lock the parent's core 3040 * (they just use the spinlock on the core). 3041 */ 3042 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3043 hammer2_chain_unlock(chain); 3044 hammer2_chain_drop(chain); 3045 chain = NULL; 3046 3047 key = next_key; 3048 if (key == 0) { 3049 error |= HAMMER2_ERROR_EOF; 3050 goto done; 3051 } 3052 goto again; 3053 } 3054 3055 done: 3056 /* 3057 * All done, return the bref or NULL, supply chain if necessary. 3058 */ 3059 if (chain) 3060 *chainp = chain; 3061 return (error); 3062 } 3063 3064 /* 3065 * Create and return a new hammer2 system memory structure of the specified 3066 * key, type and size and insert it under (*parentp). This is a full 3067 * insertion, based on the supplied key/keybits, and may involve creating 3068 * indirect blocks and moving other chains around via delete/duplicate. 3069 * 3070 * This call can be made with parent == NULL as long as a non -1 methods 3071 * is supplied. hmp must also be supplied in this situation (otherwise 3072 * hmp is extracted from the supplied parent). The chain will be detached 3073 * from the topology. A later call with both parent and chain can be made 3074 * to attach it. 3075 * 3076 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 3077 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3078 * FULL. This typically means that the caller is creating the chain after 3079 * doing a hammer2_chain_lookup(). 3080 * 3081 * (*parentp) must be exclusive locked and may be replaced on return 3082 * depending on how much work the function had to do. 3083 * 3084 * (*parentp) must not be errored or this function will assert. 3085 * 3086 * (*chainp) usually starts out NULL and returns the newly created chain, 3087 * but if the caller desires the caller may allocate a disconnected chain 3088 * and pass it in instead. 3089 * 3090 * This function should NOT be used to insert INDIRECT blocks. It is 3091 * typically used to create/insert inodes and data blocks. 3092 * 3093 * Caller must pass-in an exclusively locked parent the new chain is to 3094 * be inserted under, and optionally pass-in a disconnected, exclusively 3095 * locked chain to insert (else we create a new chain). The function will 3096 * adjust (*parentp) as necessary, create or connect the chain, and 3097 * return an exclusively locked chain in *chainp. 3098 * 3099 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 3100 * and will be reassigned. 3101 * 3102 * NOTE: returns HAMMER_ERROR_* flags 3103 */ 3104 int 3105 hammer2_chain_create(hammer2_chain_t **parentp, hammer2_chain_t **chainp, 3106 hammer2_dev_t *hmp, hammer2_pfs_t *pmp, int methods, 3107 hammer2_key_t key, int keybits, int type, size_t bytes, 3108 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 3109 { 3110 hammer2_chain_t *chain; 3111 hammer2_chain_t *parent; 3112 hammer2_blockref_t *base; 3113 hammer2_blockref_t dummy; 3114 int allocated = 0; 3115 int error = 0; 3116 int count; 3117 int maxloops = 300000; 3118 3119 /* 3120 * Topology may be crossing a PFS boundary. 3121 */ 3122 parent = *parentp; 3123 if (parent) { 3124 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3125 KKASSERT(parent->error == 0); 3126 hmp = parent->hmp; 3127 } 3128 chain = *chainp; 3129 3130 if (chain == NULL) { 3131 /* 3132 * First allocate media space and construct the dummy bref, 3133 * then allocate the in-memory chain structure. Set the 3134 * INITIAL flag for fresh chains which do not have embedded 3135 * data. 3136 */ 3137 bzero(&dummy, sizeof(dummy)); 3138 dummy.type = type; 3139 dummy.key = key; 3140 dummy.keybits = keybits; 3141 dummy.data_off = hammer2_getradix(bytes); 3142 3143 /* 3144 * Inherit methods from parent by default. Primarily used 3145 * for BREF_TYPE_DATA. Non-data types *must* be set to 3146 * a non-NONE check algorithm. 3147 */ 3148 if (methods == HAMMER2_METH_DEFAULT) 3149 dummy.methods = parent->bref.methods; 3150 else 3151 dummy.methods = (uint8_t)methods; 3152 3153 if (type != HAMMER2_BREF_TYPE_DATA && 3154 HAMMER2_DEC_CHECK(dummy.methods) == HAMMER2_CHECK_NONE) { 3155 dummy.methods |= 3156 HAMMER2_ENC_CHECK(HAMMER2_CHECK_DEFAULT); 3157 } 3158 3159 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 3160 3161 /* 3162 * Lock the chain manually, chain_lock will load the chain 3163 * which we do NOT want to do. (note: chain->refs is set 3164 * to 1 by chain_alloc() for us, but lockcnt is not). 3165 */ 3166 chain->lockcnt = 1; 3167 hammer2_mtx_ex(&chain->lock); 3168 allocated = 1; 3169 3170 /* 3171 * Set INITIAL to optimize I/O. The flag will generally be 3172 * processed when we call hammer2_chain_modify(). 3173 */ 3174 switch(type) { 3175 case HAMMER2_BREF_TYPE_VOLUME: 3176 case HAMMER2_BREF_TYPE_FREEMAP: 3177 panic("hammer2_chain_create: called with volume type"); 3178 break; 3179 case HAMMER2_BREF_TYPE_INDIRECT: 3180 panic("hammer2_chain_create: cannot be used to" 3181 "create indirect block"); 3182 break; 3183 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3184 panic("hammer2_chain_create: cannot be used to" 3185 "create freemap root or node"); 3186 break; 3187 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3188 KKASSERT(bytes == sizeof(chain->data->bmdata)); 3189 /* fall through */ 3190 case HAMMER2_BREF_TYPE_DIRENT: 3191 case HAMMER2_BREF_TYPE_INODE: 3192 case HAMMER2_BREF_TYPE_DATA: 3193 default: 3194 /* 3195 * leave chain->data NULL, set INITIAL 3196 */ 3197 KKASSERT(chain->data == NULL); 3198 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 3199 break; 3200 } 3201 } else { 3202 /* 3203 * We are reattaching a previously deleted chain, possibly 3204 * under a new parent and possibly with a new key/keybits. 3205 * The chain does not have to be in a modified state. The 3206 * UPDATE flag will be set later on in this routine. 3207 * 3208 * Do NOT mess with the current state of the INITIAL flag. 3209 */ 3210 chain->bref.key = key; 3211 chain->bref.keybits = keybits; 3212 if (chain->flags & HAMMER2_CHAIN_DELETED) 3213 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3214 KKASSERT(chain->parent == NULL); 3215 } 3216 3217 /* 3218 * Set the appropriate bref flag if requested. 3219 * 3220 * NOTE! Callers can call this function to move chains without 3221 * knowing about special flags, so don't clear bref flags 3222 * here! 3223 */ 3224 if (flags & HAMMER2_INSERT_PFSROOT) 3225 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 3226 3227 if (parent == NULL) 3228 goto skip; 3229 3230 /* 3231 * Calculate how many entries we have in the blockref array and 3232 * determine if an indirect block is required when inserting into 3233 * the parent. 3234 */ 3235 again: 3236 if (--maxloops == 0) 3237 panic("hammer2_chain_create: maxloops"); 3238 3239 switch(parent->bref.type) { 3240 case HAMMER2_BREF_TYPE_INODE: 3241 if ((parent->data->ipdata.meta.op_flags & 3242 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 3243 kprintf("hammer2: parent set for direct-data! " 3244 "pkey=%016jx ckey=%016jx\n", 3245 parent->bref.key, 3246 chain->bref.key); 3247 } 3248 KKASSERT((parent->data->ipdata.meta.op_flags & 3249 HAMMER2_OPFLAG_DIRECTDATA) == 0); 3250 KKASSERT(parent->data != NULL); 3251 base = &parent->data->ipdata.u.blockset.blockref[0]; 3252 count = HAMMER2_SET_COUNT; 3253 break; 3254 case HAMMER2_BREF_TYPE_INDIRECT: 3255 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3256 if (parent->flags & HAMMER2_CHAIN_INITIAL) 3257 base = NULL; 3258 else 3259 base = &parent->data->npdata[0]; 3260 count = parent->bytes / sizeof(hammer2_blockref_t); 3261 break; 3262 case HAMMER2_BREF_TYPE_VOLUME: 3263 KKASSERT(parent->data != NULL); 3264 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3265 count = HAMMER2_SET_COUNT; 3266 break; 3267 case HAMMER2_BREF_TYPE_FREEMAP: 3268 KKASSERT(parent->data != NULL); 3269 base = &parent->data->blkset.blockref[0]; 3270 count = HAMMER2_SET_COUNT; 3271 break; 3272 default: 3273 panic("hammer2_chain_create: unrecognized blockref type: %d", 3274 parent->bref.type); 3275 base = NULL; 3276 count = 0; 3277 break; 3278 } 3279 3280 /* 3281 * Make sure we've counted the brefs 3282 */ 3283 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3284 hammer2_chain_countbrefs(parent, base, count); 3285 3286 KASSERT(parent->core.live_count >= 0 && 3287 parent->core.live_count <= count, 3288 ("bad live_count %d/%d (%02x, %d)", 3289 parent->core.live_count, count, 3290 parent->bref.type, parent->bytes)); 3291 3292 /* 3293 * If no free blockref could be found we must create an indirect 3294 * block and move a number of blockrefs into it. With the parent 3295 * locked we can safely lock each child in order to delete+duplicate 3296 * it without causing a deadlock. 3297 * 3298 * This may return the new indirect block or the old parent depending 3299 * on where the key falls. NULL is returned on error. 3300 */ 3301 if (parent->core.live_count == count) { 3302 hammer2_chain_t *nparent; 3303 3304 KKASSERT((flags & HAMMER2_INSERT_SAMEPARENT) == 0); 3305 3306 nparent = hammer2_chain_create_indirect(parent, key, keybits, 3307 mtid, type, &error); 3308 if (nparent == NULL) { 3309 if (allocated) 3310 hammer2_chain_drop(chain); 3311 chain = NULL; 3312 goto done; 3313 } 3314 if (parent != nparent) { 3315 hammer2_chain_unlock(parent); 3316 hammer2_chain_drop(parent); 3317 parent = *parentp = nparent; 3318 } 3319 goto again; 3320 } 3321 3322 /* 3323 * fall through if parent, or skip to here if no parent. 3324 */ 3325 skip: 3326 if (chain->flags & HAMMER2_CHAIN_DELETED) 3327 kprintf("Inserting deleted chain @%016jx\n", 3328 chain->bref.key); 3329 3330 /* 3331 * Link the chain into its parent. 3332 */ 3333 if (chain->parent != NULL) 3334 panic("hammer2: hammer2_chain_create: chain already connected"); 3335 KKASSERT(chain->parent == NULL); 3336 if (parent) { 3337 KKASSERT(parent->core.live_count < count); 3338 hammer2_chain_insert(parent, chain, 3339 HAMMER2_CHAIN_INSERT_SPIN | 3340 HAMMER2_CHAIN_INSERT_LIVE, 3341 0); 3342 } 3343 3344 if (allocated) { 3345 /* 3346 * Mark the newly created chain modified. This will cause 3347 * UPDATE to be set and process the INITIAL flag. 3348 * 3349 * Device buffers are not instantiated for DATA elements 3350 * as these are handled by logical buffers. 3351 * 3352 * Indirect and freemap node indirect blocks are handled 3353 * by hammer2_chain_create_indirect() and not by this 3354 * function. 3355 * 3356 * Data for all other bref types is expected to be 3357 * instantiated (INODE, LEAF). 3358 */ 3359 switch(chain->bref.type) { 3360 case HAMMER2_BREF_TYPE_DATA: 3361 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3362 case HAMMER2_BREF_TYPE_DIRENT: 3363 case HAMMER2_BREF_TYPE_INODE: 3364 error = hammer2_chain_modify(chain, mtid, dedup_off, 3365 HAMMER2_MODIFY_OPTDATA); 3366 break; 3367 default: 3368 /* 3369 * Remaining types are not supported by this function. 3370 * In particular, INDIRECT and LEAF_NODE types are 3371 * handled by create_indirect(). 3372 */ 3373 panic("hammer2_chain_create: bad type: %d", 3374 chain->bref.type); 3375 /* NOT REACHED */ 3376 break; 3377 } 3378 } else { 3379 /* 3380 * When reconnecting a chain we must set UPDATE and 3381 * setflush so the flush recognizes that it must update 3382 * the bref in the parent. 3383 */ 3384 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 3385 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3386 } 3387 3388 /* 3389 * We must setflush(parent) to ensure that it recurses through to 3390 * chain. setflush(chain) might not work because ONFLUSH is possibly 3391 * already set in the chain (so it won't recurse up to set it in the 3392 * parent). 3393 */ 3394 if (parent) 3395 hammer2_chain_setflush(parent); 3396 3397 done: 3398 *chainp = chain; 3399 3400 return (error); 3401 } 3402 3403 /* 3404 * Move the chain from its old parent to a new parent. The chain must have 3405 * already been deleted or already disconnected (or never associated) with 3406 * a parent. The chain is reassociated with the new parent and the deleted 3407 * flag will be cleared (no longer deleted). The chain's modification state 3408 * is not altered. 3409 * 3410 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 3411 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3412 * FULL. This typically means that the caller is creating the chain after 3413 * doing a hammer2_chain_lookup(). 3414 * 3415 * Neither (parent) or (chain) can be errored. 3416 * 3417 * If (parent) is non-NULL then the chain is inserted under the parent. 3418 * 3419 * If (parent) is NULL then the newly duplicated chain is not inserted 3420 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 3421 * passing into hammer2_chain_create() after this function returns). 3422 * 3423 * WARNING! This function calls create which means it can insert indirect 3424 * blocks. This can cause other unrelated chains in the parent to 3425 * be moved to a newly inserted indirect block in addition to the 3426 * specific chain. 3427 */ 3428 void 3429 hammer2_chain_rename(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3430 hammer2_tid_t mtid, int flags) 3431 { 3432 hammer2_blockref_t *bref; 3433 hammer2_chain_t *parent; 3434 3435 /* 3436 * WARNING! We should never resolve DATA to device buffers 3437 * (XXX allow it if the caller did?), and since 3438 * we currently do not have the logical buffer cache 3439 * buffer in-hand to fix its cached physical offset 3440 * we also force the modify code to not COW it. XXX 3441 * 3442 * NOTE! We allow error'd chains to be renamed. The bref itself 3443 * is good and can be renamed. The content, however, may 3444 * be inaccessible. 3445 */ 3446 KKASSERT(chain->parent == NULL); 3447 /*KKASSERT(chain->error == 0); allow */ 3448 bref = &chain->bref; 3449 3450 /* 3451 * If parent is not NULL the duplicated chain will be entered under 3452 * the parent and the UPDATE bit set to tell flush to update 3453 * the blockref. 3454 * 3455 * We must setflush(parent) to ensure that it recurses through to 3456 * chain. setflush(chain) might not work because ONFLUSH is possibly 3457 * already set in the chain (so it won't recurse up to set it in the 3458 * parent). 3459 * 3460 * Having both chains locked is extremely important for atomicy. 3461 */ 3462 if (parentp && (parent = *parentp) != NULL) { 3463 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3464 KKASSERT(parent->refs > 0); 3465 KKASSERT(parent->error == 0); 3466 3467 hammer2_chain_create(parentp, &chain, NULL, chain->pmp, 3468 HAMMER2_METH_DEFAULT, 3469 bref->key, bref->keybits, bref->type, 3470 chain->bytes, mtid, 0, flags); 3471 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 3472 hammer2_chain_setflush(*parentp); 3473 } 3474 } 3475 3476 /* 3477 * This works in tandem with delete_obref() to install a blockref in 3478 * (typically) an indirect block that is associated with the chain being 3479 * moved to *parentp. 3480 * 3481 * The reason we need this function is that the caller needs to maintain 3482 * the blockref as it was, and not generate a new blockref for what might 3483 * be a modified chain. Otherwise stuff will leak into the flush that 3484 * the flush code's FLUSH_INODE_STOP flag is unable to catch. 3485 * 3486 * It is EXTREMELY important that we properly set CHAIN_BLKMAPUPD and 3487 * CHAIN_UPDATE. We must set BLKMAPUPD if the bref does not match, and 3488 * we must clear CHAIN_UPDATE (that was likely set by the chain_rename) if 3489 * it does. Otherwise we can end up in a situation where H2 is unable to 3490 * clean up the in-memory chain topology. 3491 * 3492 * The reason for this is that flushes do not generally flush through 3493 * BREF_TYPE_INODE chains and depend on a hammer2_inode_t queued to syncq 3494 * or sideq to properly flush and dispose of the related inode chain's flags. 3495 * Situations where the inode is not actually modified by the frontend, 3496 * but where we have to move the related chains around as we insert or cleanup 3497 * indirect blocks, can leave us with a 'dirty' (non-disposable) in-memory 3498 * inode chain that does not have a hammer2_inode_t associated with it. 3499 */ 3500 static void 3501 hammer2_chain_rename_obref(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3502 hammer2_tid_t mtid, int flags, 3503 hammer2_blockref_t *obref) 3504 { 3505 hammer2_chain_rename(parentp, chain, mtid, flags); 3506 3507 if (obref->type != HAMMER2_BREF_TYPE_EMPTY) { 3508 hammer2_blockref_t *tbase; 3509 int tcount; 3510 3511 KKASSERT((chain->flags & HAMMER2_CHAIN_BLKMAPPED) == 0); 3512 hammer2_chain_modify(*parentp, mtid, 0, 0); 3513 tbase = hammer2_chain_base_and_count(*parentp, &tcount); 3514 hammer2_base_insert(*parentp, tbase, tcount, chain, obref); 3515 if (bcmp(obref, &chain->bref, sizeof(chain->bref))) { 3516 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPUPD | 3517 HAMMER2_CHAIN_UPDATE); 3518 } else { 3519 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3520 } 3521 } 3522 } 3523 3524 /* 3525 * Helper function for deleting chains. 3526 * 3527 * The chain is removed from the live view (the RBTREE) as well as the parent's 3528 * blockmap. Both chain and its parent must be locked. 3529 * 3530 * parent may not be errored. chain can be errored. 3531 */ 3532 static int 3533 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 3534 hammer2_tid_t mtid, int flags, 3535 hammer2_blockref_t *obref) 3536 { 3537 int error = 0; 3538 3539 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0); 3540 KKASSERT(chain->parent == parent); 3541 3542 if (chain->flags & HAMMER2_CHAIN_BLKMAPPED) { 3543 /* 3544 * Chain is blockmapped, so there must be a parent. 3545 * Atomically remove the chain from the parent and remove 3546 * the blockmap entry. The parent must be set modified 3547 * to remove the blockmap entry. 3548 */ 3549 hammer2_blockref_t *base; 3550 int count; 3551 3552 KKASSERT(parent != NULL); 3553 KKASSERT(parent->error == 0); 3554 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3555 error = hammer2_chain_modify(parent, mtid, 0, 0); 3556 if (error) 3557 goto done; 3558 3559 /* 3560 * Calculate blockmap pointer 3561 */ 3562 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 3563 hammer2_spin_ex(&chain->core.spin); 3564 hammer2_spin_ex(&parent->core.spin); 3565 3566 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3567 atomic_add_int(&parent->core.live_count, -1); 3568 ++parent->core.generation; 3569 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3570 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3571 --parent->core.chain_count; 3572 chain->parent = NULL; 3573 3574 switch(parent->bref.type) { 3575 case HAMMER2_BREF_TYPE_INODE: 3576 /* 3577 * Access the inode's block array. However, there 3578 * is no block array if the inode is flagged 3579 * DIRECTDATA. 3580 */ 3581 if (parent->data && 3582 (parent->data->ipdata.meta.op_flags & 3583 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 3584 base = 3585 &parent->data->ipdata.u.blockset.blockref[0]; 3586 } else { 3587 base = NULL; 3588 } 3589 count = HAMMER2_SET_COUNT; 3590 break; 3591 case HAMMER2_BREF_TYPE_INDIRECT: 3592 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3593 if (parent->data) 3594 base = &parent->data->npdata[0]; 3595 else 3596 base = NULL; 3597 count = parent->bytes / sizeof(hammer2_blockref_t); 3598 break; 3599 case HAMMER2_BREF_TYPE_VOLUME: 3600 base = &parent->data->voldata. 3601 sroot_blockset.blockref[0]; 3602 count = HAMMER2_SET_COUNT; 3603 break; 3604 case HAMMER2_BREF_TYPE_FREEMAP: 3605 base = &parent->data->blkset.blockref[0]; 3606 count = HAMMER2_SET_COUNT; 3607 break; 3608 default: 3609 base = NULL; 3610 count = 0; 3611 panic("_hammer2_chain_delete_helper: " 3612 "unrecognized blockref type: %d", 3613 parent->bref.type); 3614 break; 3615 } 3616 3617 /* 3618 * delete blockmapped chain from its parent. 3619 * 3620 * The parent is not affected by any statistics in chain 3621 * which are pending synchronization. That is, there is 3622 * nothing to undo in the parent since they have not yet 3623 * been incorporated into the parent. 3624 * 3625 * The parent is affected by statistics stored in inodes. 3626 * Those have already been synchronized, so they must be 3627 * undone. XXX split update possible w/delete in middle? 3628 */ 3629 if (base) { 3630 hammer2_base_delete(parent, base, count, chain, obref); 3631 } 3632 hammer2_spin_unex(&parent->core.spin); 3633 hammer2_spin_unex(&chain->core.spin); 3634 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 3635 /* 3636 * Chain is not blockmapped but a parent is present. 3637 * Atomically remove the chain from the parent. There is 3638 * no blockmap entry to remove. 3639 * 3640 * Because chain was associated with a parent but not 3641 * synchronized, the chain's *_count_up fields contain 3642 * inode adjustment statistics which must be undone. 3643 */ 3644 hammer2_spin_ex(&chain->core.spin); 3645 hammer2_spin_ex(&parent->core.spin); 3646 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3647 atomic_add_int(&parent->core.live_count, -1); 3648 ++parent->core.generation; 3649 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3650 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3651 --parent->core.chain_count; 3652 chain->parent = NULL; 3653 hammer2_spin_unex(&parent->core.spin); 3654 hammer2_spin_unex(&chain->core.spin); 3655 } else { 3656 /* 3657 * Chain is not blockmapped and has no parent. This 3658 * is a degenerate case. 3659 */ 3660 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3661 } 3662 done: 3663 return error; 3664 } 3665 3666 /* 3667 * Create an indirect block that covers one or more of the elements in the 3668 * current parent. Either returns the existing parent with no locking or 3669 * ref changes or returns the new indirect block locked and referenced 3670 * and leaving the original parent lock/ref intact as well. 3671 * 3672 * If an error occurs, NULL is returned and *errorp is set to the H2 error. 3673 * 3674 * The returned chain depends on where the specified key falls. 3675 * 3676 * The key/keybits for the indirect mode only needs to follow three rules: 3677 * 3678 * (1) That all elements underneath it fit within its key space and 3679 * 3680 * (2) That all elements outside it are outside its key space. 3681 * 3682 * (3) When creating the new indirect block any elements in the current 3683 * parent that fit within the new indirect block's keyspace must be 3684 * moved into the new indirect block. 3685 * 3686 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3687 * keyspace the the current parent, but lookup/iteration rules will 3688 * ensure (and must ensure) that rule (2) for all parents leading up 3689 * to the nearest inode or the root volume header is adhered to. This 3690 * is accomplished by always recursing through matching keyspaces in 3691 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3692 * 3693 * The current implementation calculates the current worst-case keyspace by 3694 * iterating the current parent and then divides it into two halves, choosing 3695 * whichever half has the most elements (not necessarily the half containing 3696 * the requested key). 3697 * 3698 * We can also opt to use the half with the least number of elements. This 3699 * causes lower-numbered keys (aka logical file offsets) to recurse through 3700 * fewer indirect blocks and higher-numbered keys to recurse through more. 3701 * This also has the risk of not moving enough elements to the new indirect 3702 * block and being forced to create several indirect blocks before the element 3703 * can be inserted. 3704 * 3705 * Must be called with an exclusively locked parent. 3706 * 3707 * NOTE: *errorp set to HAMMER_ERROR_* flags 3708 */ 3709 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3710 hammer2_key_t *keyp, int keybits, 3711 hammer2_blockref_t *base, int count); 3712 static int hammer2_chain_indkey_file(hammer2_chain_t *parent, 3713 hammer2_key_t *keyp, int keybits, 3714 hammer2_blockref_t *base, int count, 3715 int ncount); 3716 static int hammer2_chain_indkey_dir(hammer2_chain_t *parent, 3717 hammer2_key_t *keyp, int keybits, 3718 hammer2_blockref_t *base, int count, 3719 int ncount); 3720 static 3721 hammer2_chain_t * 3722 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3723 hammer2_key_t create_key, int create_bits, 3724 hammer2_tid_t mtid, int for_type, int *errorp) 3725 { 3726 hammer2_dev_t *hmp; 3727 hammer2_blockref_t *base; 3728 hammer2_blockref_t *bref; 3729 hammer2_blockref_t bsave; 3730 hammer2_blockref_t dummy; 3731 hammer2_chain_t *chain; 3732 hammer2_chain_t *ichain; 3733 hammer2_key_t key = create_key; 3734 hammer2_key_t key_beg; 3735 hammer2_key_t key_end; 3736 hammer2_key_t key_next; 3737 int keybits = create_bits; 3738 int count; 3739 int ncount; 3740 int nbytes; 3741 int loops; 3742 int error; 3743 int reason; 3744 int generation; 3745 int maxloops = 300000; 3746 3747 /* 3748 * Calculate the base blockref pointer or NULL if the chain 3749 * is known to be empty. We need to calculate the array count 3750 * for RB lookups either way. 3751 */ 3752 hmp = parent->hmp; 3753 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3754 3755 /* 3756 * Pre-modify the parent now to avoid having to deal with error 3757 * processing if we tried to later (in the middle of our loop). 3758 * 3759 * We are going to be moving bref's around, the indirect blocks 3760 * cannot be in an initial state. Do not pass MODIFY_OPTDATA. 3761 */ 3762 *errorp = hammer2_chain_modify(parent, mtid, 0, 0); 3763 if (*errorp) { 3764 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 3765 *errorp, hammer2_error_str(*errorp)); 3766 return NULL; 3767 } 3768 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3769 3770 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 3771 base = hammer2_chain_base_and_count(parent, &count); 3772 3773 /* 3774 * How big should our new indirect block be? It has to be at least 3775 * as large as its parent for splits to work properly. 3776 * 3777 * The freemap uses a specific indirect block size. The number of 3778 * levels are built dynamically and ultimately depend on the size 3779 * volume. Because freemap blocks are taken from the reserved areas 3780 * of the volume our goal is efficiency (fewer levels) and not so 3781 * much to save disk space. 3782 * 3783 * The first indirect block level for a directory usually uses 3784 * HAMMER2_IND_BYTES_MIN (4KB = 32 directory entries). Due to 3785 * the hash mechanism, this typically gives us a nominal 3786 * 32 * 4 entries with one level of indirection. 3787 * 3788 * We use HAMMER2_IND_BYTES_NOM (16KB = 128 blockrefs) for FILE 3789 * indirect blocks. The initial 4 entries in the inode gives us 3790 * 256KB. Up to 4 indirect blocks gives us 32MB. Three levels 3791 * of indirection gives us 137GB, and so forth. H2 can support 3792 * huge file sizes but they are not typical, so we try to stick 3793 * with compactness and do not use a larger indirect block size. 3794 * 3795 * We could use 64KB (PBUFSIZE), giving us 512 blockrefs, but 3796 * due to the way indirect blocks are created this usually winds 3797 * up being extremely inefficient for small files. Even though 3798 * 16KB requires more levels of indirection for very large files, 3799 * the 16KB records can be ganged together into 64KB DIOs. 3800 */ 3801 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3802 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3803 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 3804 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 3805 if (parent->data->ipdata.meta.type == 3806 HAMMER2_OBJTYPE_DIRECTORY) 3807 nbytes = HAMMER2_IND_BYTES_MIN; /* 4KB = 32 entries */ 3808 else 3809 nbytes = HAMMER2_IND_BYTES_NOM; /* 16KB = ~8MB file */ 3810 3811 } else { 3812 nbytes = HAMMER2_IND_BYTES_NOM; 3813 } 3814 if (nbytes < count * sizeof(hammer2_blockref_t)) { 3815 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 3816 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 3817 nbytes = count * sizeof(hammer2_blockref_t); 3818 } 3819 ncount = nbytes / sizeof(hammer2_blockref_t); 3820 3821 /* 3822 * When creating an indirect block for a freemap node or leaf 3823 * the key/keybits must be fitted to static radix levels because 3824 * particular radix levels use particular reserved blocks in the 3825 * related zone. 3826 * 3827 * This routine calculates the key/radix of the indirect block 3828 * we need to create, and whether it is on the high-side or the 3829 * low-side. 3830 */ 3831 switch(for_type) { 3832 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3833 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3834 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 3835 base, count); 3836 break; 3837 case HAMMER2_BREF_TYPE_DATA: 3838 keybits = hammer2_chain_indkey_file(parent, &key, keybits, 3839 base, count, ncount); 3840 break; 3841 case HAMMER2_BREF_TYPE_DIRENT: 3842 case HAMMER2_BREF_TYPE_INODE: 3843 keybits = hammer2_chain_indkey_dir(parent, &key, keybits, 3844 base, count, ncount); 3845 break; 3846 default: 3847 panic("illegal indirect block for bref type %d", for_type); 3848 break; 3849 } 3850 3851 /* 3852 * Normalize the key for the radix being represented, keeping the 3853 * high bits and throwing away the low bits. 3854 */ 3855 key &= ~(((hammer2_key_t)1 << keybits) - 1); 3856 3857 /* 3858 * Ok, create our new indirect block 3859 */ 3860 bzero(&dummy, sizeof(dummy)); 3861 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3862 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3863 dummy.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 3864 } else { 3865 dummy.type = HAMMER2_BREF_TYPE_INDIRECT; 3866 } 3867 dummy.key = key; 3868 dummy.keybits = keybits; 3869 dummy.data_off = hammer2_getradix(nbytes); 3870 dummy.methods = 3871 HAMMER2_ENC_CHECK(HAMMER2_DEC_CHECK(parent->bref.methods)) | 3872 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 3873 3874 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy); 3875 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 3876 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 3877 /* ichain has one ref at this point */ 3878 3879 /* 3880 * We have to mark it modified to allocate its block, but use 3881 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 3882 * it won't be acted upon by the flush code. 3883 * 3884 * XXX remove OPTDATA, we need a fully initialized indirect block to 3885 * be able to move the original blockref. 3886 */ 3887 *errorp = hammer2_chain_modify(ichain, mtid, 0, 0); 3888 if (*errorp) { 3889 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 3890 *errorp, hammer2_error_str(*errorp)); 3891 hammer2_chain_unlock(ichain); 3892 hammer2_chain_drop(ichain); 3893 return NULL; 3894 } 3895 KKASSERT((ichain->flags & HAMMER2_CHAIN_INITIAL) == 0); 3896 3897 /* 3898 * Iterate the original parent and move the matching brefs into 3899 * the new indirect block. 3900 * 3901 * XXX handle flushes. 3902 */ 3903 key_beg = 0; 3904 key_end = HAMMER2_KEY_MAX; 3905 key_next = 0; /* avoid gcc warnings */ 3906 hammer2_spin_ex(&parent->core.spin); 3907 loops = 0; 3908 reason = 0; 3909 3910 for (;;) { 3911 /* 3912 * Parent may have been modified, relocating its block array. 3913 * Reload the base pointer. 3914 */ 3915 base = hammer2_chain_base_and_count(parent, &count); 3916 3917 if (++loops > 100000) { 3918 hammer2_spin_unex(&parent->core.spin); 3919 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 3920 reason, parent, base, count, key_next); 3921 } 3922 3923 /* 3924 * NOTE: spinlock stays intact, returned chain (if not NULL) 3925 * is not referenced or locked which means that we 3926 * cannot safely check its flagged / deletion status 3927 * until we lock it. 3928 */ 3929 chain = hammer2_combined_find(parent, base, count, 3930 &key_next, 3931 key_beg, key_end, 3932 &bref); 3933 generation = parent->core.generation; 3934 if (bref == NULL) 3935 break; 3936 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3937 3938 /* 3939 * Skip keys that are not within the key/radix of the new 3940 * indirect block. They stay in the parent. 3941 */ 3942 if (rounddown2(key ^ bref->key, (hammer2_key_t)1 << keybits) != 0) { 3943 goto next_key_spinlocked; 3944 } 3945 3946 /* 3947 * Load the new indirect block by acquiring the related 3948 * chains (potentially from media as it might not be 3949 * in-memory). Then move it to the new parent (ichain). 3950 * 3951 * chain is referenced but not locked. We must lock the 3952 * chain to obtain definitive state. 3953 */ 3954 bsave = *bref; 3955 if (chain) { 3956 /* 3957 * Use chain already present in the RBTREE 3958 */ 3959 hammer2_chain_ref(chain); 3960 hammer2_spin_unex(&parent->core.spin); 3961 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 3962 } else { 3963 /* 3964 * Get chain for blockref element. _get returns NULL 3965 * on insertion race. 3966 */ 3967 hammer2_spin_unex(&parent->core.spin); 3968 chain = hammer2_chain_get(parent, generation, &bsave, 3969 HAMMER2_RESOLVE_NEVER); 3970 if (chain == NULL) { 3971 reason = 1; 3972 hammer2_spin_ex(&parent->core.spin); 3973 continue; 3974 } 3975 } 3976 3977 /* 3978 * This is always live so if the chain has been deleted 3979 * we raced someone and we have to retry. 3980 * 3981 * NOTE: Lookups can race delete-duplicate because 3982 * delete-duplicate does not lock the parent's core 3983 * (they just use the spinlock on the core). 3984 * 3985 * (note reversed logic for this one) 3986 */ 3987 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 3988 chain->parent != parent || 3989 (chain->flags & HAMMER2_CHAIN_DELETED)) { 3990 hammer2_chain_unlock(chain); 3991 hammer2_chain_drop(chain); 3992 if (hammer2_debug & 0x0040) { 3993 kprintf("LOST PARENT RETRY " 3994 "RETRY (%p,%p)->%p %08x\n", 3995 parent, chain->parent, chain, chain->flags); 3996 } 3997 hammer2_spin_ex(&parent->core.spin); 3998 continue; 3999 } 4000 4001 /* 4002 * Shift the chain to the indirect block. 4003 * 4004 * WARNING! No reason for us to load chain data, pass NOSTATS 4005 * to prevent delete/insert from trying to access 4006 * inode stats (and thus asserting if there is no 4007 * chain->data loaded). 4008 * 4009 * WARNING! The (parent, chain) deletion may modify the parent 4010 * and invalidate the base pointer. 4011 * 4012 * WARNING! Parent must already be marked modified, so we 4013 * can assume that chain_delete always suceeds. 4014 * 4015 * WARNING! hammer2_chain_repchange() does not have to be 4016 * called (and doesn't work anyway because we are 4017 * only doing a partial shift). A recursion that is 4018 * in-progress can continue at the current parent 4019 * and will be able to properly find its next key. 4020 */ 4021 error = hammer2_chain_delete_obref(parent, chain, mtid, 0, 4022 &bsave); 4023 KKASSERT(error == 0); 4024 hammer2_chain_rename_obref(&ichain, chain, mtid, 0, &bsave); 4025 hammer2_chain_unlock(chain); 4026 hammer2_chain_drop(chain); 4027 KKASSERT(parent->refs > 0); 4028 chain = NULL; 4029 base = NULL; /* safety */ 4030 hammer2_spin_ex(&parent->core.spin); 4031 next_key_spinlocked: 4032 if (--maxloops == 0) 4033 panic("hammer2_chain_create_indirect: maxloops"); 4034 reason = 4; 4035 if (key_next == 0 || key_next > key_end) 4036 break; 4037 key_beg = key_next; 4038 /* loop */ 4039 } 4040 hammer2_spin_unex(&parent->core.spin); 4041 4042 /* 4043 * Insert the new indirect block into the parent now that we've 4044 * cleared out some entries in the parent. We calculated a good 4045 * insertion index in the loop above (ichain->index). 4046 * 4047 * We don't have to set UPDATE here because we mark ichain 4048 * modified down below (so the normal modified -> flush -> set-moved 4049 * sequence applies). 4050 * 4051 * The insertion shouldn't race as this is a completely new block 4052 * and the parent is locked. 4053 */ 4054 base = NULL; /* safety, parent modify may change address */ 4055 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 4056 KKASSERT(parent->core.live_count < count); 4057 hammer2_chain_insert(parent, ichain, 4058 HAMMER2_CHAIN_INSERT_SPIN | 4059 HAMMER2_CHAIN_INSERT_LIVE, 4060 0); 4061 4062 /* 4063 * Make sure flushes propogate after our manual insertion. 4064 */ 4065 hammer2_chain_setflush(ichain); 4066 hammer2_chain_setflush(parent); 4067 4068 /* 4069 * Figure out what to return. 4070 */ 4071 if (rounddown2(create_key ^ key, (hammer2_key_t)1 << keybits) != 0) { 4072 /* 4073 * Key being created is outside the key range, 4074 * return the original parent. 4075 */ 4076 hammer2_chain_unlock(ichain); 4077 hammer2_chain_drop(ichain); 4078 } else { 4079 /* 4080 * Otherwise its in the range, return the new parent. 4081 * (leave both the new and old parent locked). 4082 */ 4083 parent = ichain; 4084 } 4085 4086 return(parent); 4087 } 4088 4089 /* 4090 * Do maintenance on an indirect chain. Both parent and chain are locked. 4091 * 4092 * Returns non-zero if (chain) is deleted, either due to being empty or 4093 * because its children were safely moved into the parent. 4094 */ 4095 int 4096 hammer2_chain_indirect_maintenance(hammer2_chain_t *parent, 4097 hammer2_chain_t *chain) 4098 { 4099 hammer2_blockref_t *chain_base; 4100 hammer2_blockref_t *base; 4101 hammer2_blockref_t *bref; 4102 hammer2_blockref_t bsave; 4103 hammer2_key_t key_next; 4104 hammer2_key_t key_beg; 4105 hammer2_key_t key_end; 4106 hammer2_chain_t *sub; 4107 int chain_count; 4108 int count; 4109 int error; 4110 int generation; 4111 4112 /* 4113 * Make sure we have an accurate live_count 4114 */ 4115 if ((chain->flags & (HAMMER2_CHAIN_INITIAL | 4116 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4117 base = &chain->data->npdata[0]; 4118 count = chain->bytes / sizeof(hammer2_blockref_t); 4119 hammer2_chain_countbrefs(chain, base, count); 4120 } 4121 4122 /* 4123 * If the indirect block is empty we can delete it. 4124 * (ignore deletion error) 4125 */ 4126 if (chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)) { 4127 hammer2_chain_delete(parent, chain, 4128 chain->bref.modify_tid, 4129 HAMMER2_DELETE_PERMANENT); 4130 hammer2_chain_repchange(parent, chain); 4131 return 1; 4132 } 4133 4134 base = hammer2_chain_base_and_count(parent, &count); 4135 4136 if ((parent->flags & (HAMMER2_CHAIN_INITIAL | 4137 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4138 hammer2_chain_countbrefs(parent, base, count); 4139 } 4140 4141 /* 4142 * Determine if we can collapse chain into parent, calculate 4143 * hysteresis for chain emptiness. 4144 */ 4145 if (parent->core.live_count + chain->core.live_count - 1 > count) 4146 return 0; 4147 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4148 if (chain->core.live_count > chain_count * 3 / 4) 4149 return 0; 4150 4151 /* 4152 * Ok, theoretically we can collapse chain's contents into 4153 * parent. chain is locked, but any in-memory children of chain 4154 * are not. For this to work, we must be able to dispose of any 4155 * in-memory children of chain. 4156 * 4157 * For now require that there are no in-memory children of chain. 4158 * 4159 * WARNING! Both chain and parent must remain locked across this 4160 * entire operation. 4161 */ 4162 4163 /* 4164 * Parent must be marked modified. Don't try to collapse it if we 4165 * can't mark it modified. Once modified, destroy chain to make room 4166 * and to get rid of what will be a conflicting key (this is included 4167 * in the calculation above). Finally, move the children of chain 4168 * into chain's parent. 4169 * 4170 * This order creates an accounting problem for bref.embed.stats 4171 * because we destroy chain before we remove its children. Any 4172 * elements whos blockref is already synchronized will be counted 4173 * twice. To deal with the problem we clean out chain's stats prior 4174 * to deleting it. 4175 */ 4176 error = hammer2_chain_modify(parent, 0, 0, 0); 4177 if (error) { 4178 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4179 hammer2_error_str(error)); 4180 return 0; 4181 } 4182 error = hammer2_chain_modify(chain, chain->bref.modify_tid, 0, 0); 4183 if (error) { 4184 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4185 hammer2_error_str(error)); 4186 return 0; 4187 } 4188 4189 chain->bref.embed.stats.inode_count = 0; 4190 chain->bref.embed.stats.data_count = 0; 4191 error = hammer2_chain_delete(parent, chain, 4192 chain->bref.modify_tid, 4193 HAMMER2_DELETE_PERMANENT); 4194 KKASSERT(error == 0); 4195 4196 /* 4197 * The combined_find call requires core.spin to be held. One would 4198 * think there wouldn't be any conflicts since we hold chain 4199 * exclusively locked, but the caching mechanism for 0-ref children 4200 * does not require a chain lock. 4201 */ 4202 hammer2_spin_ex(&chain->core.spin); 4203 4204 key_next = 0; 4205 key_beg = 0; 4206 key_end = HAMMER2_KEY_MAX; 4207 for (;;) { 4208 chain_base = &chain->data->npdata[0]; 4209 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4210 sub = hammer2_combined_find(chain, chain_base, chain_count, 4211 &key_next, 4212 key_beg, key_end, 4213 &bref); 4214 generation = chain->core.generation; 4215 if (bref == NULL) 4216 break; 4217 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4218 4219 bsave = *bref; 4220 if (sub) { 4221 hammer2_chain_ref(sub); 4222 hammer2_spin_unex(&chain->core.spin); 4223 hammer2_chain_lock(sub, HAMMER2_RESOLVE_NEVER); 4224 } else { 4225 hammer2_spin_unex(&chain->core.spin); 4226 sub = hammer2_chain_get(chain, generation, &bsave, 4227 HAMMER2_RESOLVE_NEVER); 4228 if (sub == NULL) { 4229 hammer2_spin_ex(&chain->core.spin); 4230 continue; 4231 } 4232 } 4233 if (bcmp(&bsave, &sub->bref, sizeof(bsave)) || 4234 sub->parent != chain || 4235 (sub->flags & HAMMER2_CHAIN_DELETED)) { 4236 hammer2_chain_unlock(sub); 4237 hammer2_chain_drop(sub); 4238 hammer2_spin_ex(&chain->core.spin); 4239 sub = NULL; /* safety */ 4240 continue; 4241 } 4242 error = hammer2_chain_delete_obref(chain, sub, 4243 sub->bref.modify_tid, 0, 4244 &bsave); 4245 KKASSERT(error == 0); 4246 hammer2_chain_rename_obref(&parent, sub, 4247 sub->bref.modify_tid, 4248 HAMMER2_INSERT_SAMEPARENT, &bsave); 4249 hammer2_chain_unlock(sub); 4250 hammer2_chain_drop(sub); 4251 hammer2_spin_ex(&chain->core.spin); 4252 4253 if (key_next == 0) 4254 break; 4255 key_beg = key_next; 4256 } 4257 hammer2_spin_unex(&chain->core.spin); 4258 4259 hammer2_chain_repchange(parent, chain); 4260 4261 return 1; 4262 } 4263 4264 /* 4265 * Freemap indirect blocks 4266 * 4267 * Calculate the keybits and highside/lowside of the freemap node the 4268 * caller is creating. 4269 * 4270 * This routine will specify the next higher-level freemap key/radix 4271 * representing the lowest-ordered set. By doing so, eventually all 4272 * low-ordered sets will be moved one level down. 4273 * 4274 * We have to be careful here because the freemap reserves a limited 4275 * number of blocks for a limited number of levels. So we can't just 4276 * push indiscriminately. 4277 */ 4278 int 4279 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 4280 int keybits, hammer2_blockref_t *base, int count) 4281 { 4282 hammer2_chain_t *chain; 4283 hammer2_blockref_t *bref; 4284 hammer2_key_t key; 4285 hammer2_key_t key_beg; 4286 hammer2_key_t key_end; 4287 hammer2_key_t key_next; 4288 int maxloops = 300000; 4289 4290 key = *keyp; 4291 keybits = 64; 4292 4293 /* 4294 * Calculate the range of keys in the array being careful to skip 4295 * slots which are overridden with a deletion. 4296 */ 4297 key_beg = 0; 4298 key_end = HAMMER2_KEY_MAX; 4299 hammer2_spin_ex(&parent->core.spin); 4300 4301 for (;;) { 4302 if (--maxloops == 0) { 4303 panic("indkey_freemap shit %p %p:%d\n", 4304 parent, base, count); 4305 } 4306 chain = hammer2_combined_find(parent, base, count, 4307 &key_next, 4308 key_beg, key_end, 4309 &bref); 4310 4311 /* 4312 * Exhausted search 4313 */ 4314 if (bref == NULL) 4315 break; 4316 4317 /* 4318 * Skip deleted chains. 4319 */ 4320 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4321 if (key_next == 0 || key_next > key_end) 4322 break; 4323 key_beg = key_next; 4324 continue; 4325 } 4326 4327 /* 4328 * Use the full live (not deleted) element for the scan 4329 * iteration. HAMMER2 does not allow partial replacements. 4330 * 4331 * XXX should be built into hammer2_combined_find(). 4332 */ 4333 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4334 4335 if (keybits > bref->keybits) { 4336 key = bref->key; 4337 keybits = bref->keybits; 4338 } else if (keybits == bref->keybits && bref->key < key) { 4339 key = bref->key; 4340 } 4341 if (key_next == 0) 4342 break; 4343 key_beg = key_next; 4344 } 4345 hammer2_spin_unex(&parent->core.spin); 4346 4347 /* 4348 * Return the keybits for a higher-level FREEMAP_NODE covering 4349 * this node. 4350 */ 4351 switch(keybits) { 4352 case HAMMER2_FREEMAP_LEVEL0_RADIX: 4353 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 4354 break; 4355 case HAMMER2_FREEMAP_LEVEL1_RADIX: 4356 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 4357 break; 4358 case HAMMER2_FREEMAP_LEVEL2_RADIX: 4359 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 4360 break; 4361 case HAMMER2_FREEMAP_LEVEL3_RADIX: 4362 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 4363 break; 4364 case HAMMER2_FREEMAP_LEVEL4_RADIX: 4365 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 4366 break; 4367 case HAMMER2_FREEMAP_LEVEL5_RADIX: 4368 panic("hammer2_chain_indkey_freemap: level too high"); 4369 break; 4370 default: 4371 panic("hammer2_chain_indkey_freemap: bad radix"); 4372 break; 4373 } 4374 *keyp = key; 4375 4376 return (keybits); 4377 } 4378 4379 /* 4380 * File indirect blocks 4381 * 4382 * Calculate the key/keybits for the indirect block to create by scanning 4383 * existing keys. The key being created is also passed in *keyp and can be 4384 * inside or outside the indirect block. Regardless, the indirect block 4385 * must hold at least two keys in order to guarantee sufficient space. 4386 * 4387 * We use a modified version of the freemap's fixed radix tree, but taylored 4388 * for file data. Basically we configure an indirect block encompassing the 4389 * smallest key. 4390 */ 4391 static int 4392 hammer2_chain_indkey_file(hammer2_chain_t *parent, hammer2_key_t *keyp, 4393 int keybits, hammer2_blockref_t *base, int count, 4394 int ncount) 4395 { 4396 hammer2_chain_t *chain; 4397 hammer2_blockref_t *bref; 4398 hammer2_key_t key; 4399 hammer2_key_t key_beg; 4400 hammer2_key_t key_end; 4401 hammer2_key_t key_next; 4402 int nradix; 4403 int maxloops = 300000; 4404 4405 key = *keyp; 4406 keybits = 64; 4407 4408 /* 4409 * Calculate the range of keys in the array being careful to skip 4410 * slots which are overridden with a deletion. 4411 * 4412 * Locate the smallest key. 4413 */ 4414 key_beg = 0; 4415 key_end = HAMMER2_KEY_MAX; 4416 hammer2_spin_ex(&parent->core.spin); 4417 4418 for (;;) { 4419 if (--maxloops == 0) { 4420 panic("indkey_freemap shit %p %p:%d\n", 4421 parent, base, count); 4422 } 4423 chain = hammer2_combined_find(parent, base, count, 4424 &key_next, 4425 key_beg, key_end, 4426 &bref); 4427 4428 /* 4429 * Exhausted search 4430 */ 4431 if (bref == NULL) 4432 break; 4433 4434 /* 4435 * Skip deleted chains. 4436 */ 4437 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4438 if (key_next == 0 || key_next > key_end) 4439 break; 4440 key_beg = key_next; 4441 continue; 4442 } 4443 4444 /* 4445 * Use the full live (not deleted) element for the scan 4446 * iteration. HAMMER2 does not allow partial replacements. 4447 * 4448 * XXX should be built into hammer2_combined_find(). 4449 */ 4450 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4451 4452 if (keybits > bref->keybits) { 4453 key = bref->key; 4454 keybits = bref->keybits; 4455 } else if (keybits == bref->keybits && bref->key < key) { 4456 key = bref->key; 4457 } 4458 if (key_next == 0) 4459 break; 4460 key_beg = key_next; 4461 } 4462 hammer2_spin_unex(&parent->core.spin); 4463 4464 /* 4465 * Calculate the static keybits for a higher-level indirect block 4466 * that contains the key. 4467 */ 4468 *keyp = key; 4469 4470 switch(ncount) { 4471 case HAMMER2_IND_COUNT_MIN: 4472 nradix = HAMMER2_IND_RADIX_MIN - HAMMER2_BLOCKREF_RADIX; 4473 break; 4474 case HAMMER2_IND_COUNT_NOM: 4475 nradix = HAMMER2_IND_RADIX_NOM - HAMMER2_BLOCKREF_RADIX; 4476 break; 4477 case HAMMER2_IND_COUNT_MAX: 4478 nradix = HAMMER2_IND_RADIX_MAX - HAMMER2_BLOCKREF_RADIX; 4479 break; 4480 default: 4481 panic("bad ncount %d\n", ncount); 4482 nradix = 0; 4483 break; 4484 } 4485 4486 /* 4487 * The largest radix that can be returned for an indirect block is 4488 * 63 bits. (The largest practical indirect block radix is actually 4489 * 62 bits because the top-level inode or volume root contains four 4490 * entries, but allow 63 to be returned). 4491 */ 4492 if (nradix >= 64) 4493 nradix = 63; 4494 4495 return keybits + nradix; 4496 } 4497 4498 #if 1 4499 4500 /* 4501 * Directory indirect blocks. 4502 * 4503 * Covers both the inode index (directory of inodes), and directory contents 4504 * (filenames hardlinked to inodes). 4505 * 4506 * Because directory keys are hashed we generally try to cut the space in 4507 * half. We accomodate the inode index (which tends to have linearly 4508 * increasing inode numbers) by ensuring that the keyspace is at least large 4509 * enough to fill up the indirect block being created. 4510 */ 4511 static int 4512 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4513 int keybits, hammer2_blockref_t *base, int count, 4514 int ncount) 4515 { 4516 hammer2_blockref_t *bref; 4517 hammer2_chain_t *chain; 4518 hammer2_key_t key_beg; 4519 hammer2_key_t key_end; 4520 hammer2_key_t key_next; 4521 hammer2_key_t key; 4522 int nkeybits; 4523 int locount; 4524 int hicount; 4525 int maxloops = 300000; 4526 4527 /* 4528 * NOTE: We can't take a shortcut here anymore for inodes because 4529 * the root directory can contain a mix of inodes and directory 4530 * entries (we used to just return 63 if parent->bref.type was 4531 * HAMMER2_BREF_TYPE_INODE. 4532 */ 4533 key = *keyp; 4534 locount = 0; 4535 hicount = 0; 4536 4537 /* 4538 * Calculate the range of keys in the array being careful to skip 4539 * slots which are overridden with a deletion. 4540 */ 4541 key_beg = 0; 4542 key_end = HAMMER2_KEY_MAX; 4543 hammer2_spin_ex(&parent->core.spin); 4544 4545 for (;;) { 4546 if (--maxloops == 0) { 4547 panic("indkey_freemap shit %p %p:%d\n", 4548 parent, base, count); 4549 } 4550 chain = hammer2_combined_find(parent, base, count, 4551 &key_next, 4552 key_beg, key_end, 4553 &bref); 4554 4555 /* 4556 * Exhausted search 4557 */ 4558 if (bref == NULL) 4559 break; 4560 4561 /* 4562 * Deleted object 4563 */ 4564 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4565 if (key_next == 0 || key_next > key_end) 4566 break; 4567 key_beg = key_next; 4568 continue; 4569 } 4570 4571 /* 4572 * Use the full live (not deleted) element for the scan 4573 * iteration. HAMMER2 does not allow partial replacements. 4574 * 4575 * XXX should be built into hammer2_combined_find(). 4576 */ 4577 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4578 4579 /* 4580 * Expand our calculated key range (key, keybits) to fit 4581 * the scanned key. nkeybits represents the full range 4582 * that we will later cut in half (two halves @ nkeybits - 1). 4583 */ 4584 nkeybits = keybits; 4585 if (nkeybits < bref->keybits) { 4586 if (bref->keybits > 64) { 4587 kprintf("bad bref chain %p bref %p\n", 4588 chain, bref); 4589 Debugger("fubar"); 4590 } 4591 nkeybits = bref->keybits; 4592 } 4593 while (nkeybits < 64 && 4594 rounddown2(key ^ bref->key, (hammer2_key_t)1 << nkeybits) != 0) { 4595 ++nkeybits; 4596 } 4597 4598 /* 4599 * If the new key range is larger we have to determine 4600 * which side of the new key range the existing keys fall 4601 * under by checking the high bit, then collapsing the 4602 * locount into the hicount or vise-versa. 4603 */ 4604 if (keybits != nkeybits) { 4605 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4606 hicount += locount; 4607 locount = 0; 4608 } else { 4609 locount += hicount; 4610 hicount = 0; 4611 } 4612 keybits = nkeybits; 4613 } 4614 4615 /* 4616 * The newly scanned key will be in the lower half or the 4617 * upper half of the (new) key range. 4618 */ 4619 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4620 ++hicount; 4621 else 4622 ++locount; 4623 4624 if (key_next == 0) 4625 break; 4626 key_beg = key_next; 4627 } 4628 hammer2_spin_unex(&parent->core.spin); 4629 bref = NULL; /* now invalid (safety) */ 4630 4631 /* 4632 * Adjust keybits to represent half of the full range calculated 4633 * above (radix 63 max) for our new indirect block. 4634 */ 4635 --keybits; 4636 4637 /* 4638 * Expand keybits to hold at least ncount elements. ncount will be 4639 * a power of 2. This is to try to completely fill leaf nodes (at 4640 * least for keys which are not hashes). 4641 * 4642 * We aren't counting 'in' or 'out', we are counting 'high side' 4643 * and 'low side' based on the bit at (1LL << keybits). We want 4644 * everything to be inside in these cases so shift it all to 4645 * the low or high side depending on the new high bit. 4646 */ 4647 while (((hammer2_key_t)1 << keybits) < ncount) { 4648 ++keybits; 4649 if (key & ((hammer2_key_t)1 << keybits)) { 4650 hicount += locount; 4651 locount = 0; 4652 } else { 4653 locount += hicount; 4654 hicount = 0; 4655 } 4656 } 4657 4658 if (hicount > locount) 4659 key |= (hammer2_key_t)1 << keybits; 4660 else 4661 key &= ~(hammer2_key_t)1 << keybits; 4662 4663 *keyp = key; 4664 4665 return (keybits); 4666 } 4667 4668 #else 4669 4670 /* 4671 * Directory indirect blocks. 4672 * 4673 * Covers both the inode index (directory of inodes), and directory contents 4674 * (filenames hardlinked to inodes). 4675 * 4676 * Because directory keys are hashed we generally try to cut the space in 4677 * half. We accomodate the inode index (which tends to have linearly 4678 * increasing inode numbers) by ensuring that the keyspace is at least large 4679 * enough to fill up the indirect block being created. 4680 */ 4681 static int 4682 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4683 int keybits, hammer2_blockref_t *base, int count, 4684 int ncount) 4685 { 4686 hammer2_blockref_t *bref; 4687 hammer2_chain_t *chain; 4688 hammer2_key_t key_beg; 4689 hammer2_key_t key_end; 4690 hammer2_key_t key_next; 4691 hammer2_key_t key; 4692 int nkeybits; 4693 int locount; 4694 int hicount; 4695 int maxloops = 300000; 4696 4697 /* 4698 * Shortcut if the parent is the inode. In this situation the 4699 * parent has 4+1 directory entries and we are creating an indirect 4700 * block capable of holding many more. 4701 */ 4702 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4703 return 63; 4704 } 4705 4706 key = *keyp; 4707 locount = 0; 4708 hicount = 0; 4709 4710 /* 4711 * Calculate the range of keys in the array being careful to skip 4712 * slots which are overridden with a deletion. 4713 */ 4714 key_beg = 0; 4715 key_end = HAMMER2_KEY_MAX; 4716 hammer2_spin_ex(&parent->core.spin); 4717 4718 for (;;) { 4719 if (--maxloops == 0) { 4720 panic("indkey_freemap shit %p %p:%d\n", 4721 parent, base, count); 4722 } 4723 chain = hammer2_combined_find(parent, base, count, 4724 &key_next, 4725 key_beg, key_end, 4726 &bref); 4727 4728 /* 4729 * Exhausted search 4730 */ 4731 if (bref == NULL) 4732 break; 4733 4734 /* 4735 * Deleted object 4736 */ 4737 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4738 if (key_next == 0 || key_next > key_end) 4739 break; 4740 key_beg = key_next; 4741 continue; 4742 } 4743 4744 /* 4745 * Use the full live (not deleted) element for the scan 4746 * iteration. HAMMER2 does not allow partial replacements. 4747 * 4748 * XXX should be built into hammer2_combined_find(). 4749 */ 4750 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4751 4752 /* 4753 * Expand our calculated key range (key, keybits) to fit 4754 * the scanned key. nkeybits represents the full range 4755 * that we will later cut in half (two halves @ nkeybits - 1). 4756 */ 4757 nkeybits = keybits; 4758 if (nkeybits < bref->keybits) { 4759 if (bref->keybits > 64) { 4760 kprintf("bad bref chain %p bref %p\n", 4761 chain, bref); 4762 Debugger("fubar"); 4763 } 4764 nkeybits = bref->keybits; 4765 } 4766 while (nkeybits < 64 && 4767 (~(((hammer2_key_t)1 << nkeybits) - 1) & 4768 (key ^ bref->key)) != 0) { 4769 ++nkeybits; 4770 } 4771 4772 /* 4773 * If the new key range is larger we have to determine 4774 * which side of the new key range the existing keys fall 4775 * under by checking the high bit, then collapsing the 4776 * locount into the hicount or vise-versa. 4777 */ 4778 if (keybits != nkeybits) { 4779 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4780 hicount += locount; 4781 locount = 0; 4782 } else { 4783 locount += hicount; 4784 hicount = 0; 4785 } 4786 keybits = nkeybits; 4787 } 4788 4789 /* 4790 * The newly scanned key will be in the lower half or the 4791 * upper half of the (new) key range. 4792 */ 4793 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4794 ++hicount; 4795 else 4796 ++locount; 4797 4798 if (key_next == 0) 4799 break; 4800 key_beg = key_next; 4801 } 4802 hammer2_spin_unex(&parent->core.spin); 4803 bref = NULL; /* now invalid (safety) */ 4804 4805 /* 4806 * Adjust keybits to represent half of the full range calculated 4807 * above (radix 63 max) for our new indirect block. 4808 */ 4809 --keybits; 4810 4811 /* 4812 * Expand keybits to hold at least ncount elements. ncount will be 4813 * a power of 2. This is to try to completely fill leaf nodes (at 4814 * least for keys which are not hashes). 4815 * 4816 * We aren't counting 'in' or 'out', we are counting 'high side' 4817 * and 'low side' based on the bit at (1LL << keybits). We want 4818 * everything to be inside in these cases so shift it all to 4819 * the low or high side depending on the new high bit. 4820 */ 4821 while (((hammer2_key_t)1 << keybits) < ncount) { 4822 ++keybits; 4823 if (key & ((hammer2_key_t)1 << keybits)) { 4824 hicount += locount; 4825 locount = 0; 4826 } else { 4827 locount += hicount; 4828 hicount = 0; 4829 } 4830 } 4831 4832 if (hicount > locount) 4833 key |= (hammer2_key_t)1 << keybits; 4834 else 4835 key &= ~(hammer2_key_t)1 << keybits; 4836 4837 *keyp = key; 4838 4839 return (keybits); 4840 } 4841 4842 #endif 4843 4844 /* 4845 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 4846 * it exists. 4847 * 4848 * Both parent and chain must be locked exclusively. 4849 * 4850 * This function will modify the parent if the blockref requires removal 4851 * from the parent's block table. 4852 * 4853 * This function is NOT recursive. Any entity already pushed into the 4854 * chain (such as an inode) may still need visibility into its contents, 4855 * as well as the ability to read and modify the contents. For example, 4856 * for an unlinked file which is still open. 4857 * 4858 * Also note that the flusher is responsible for cleaning up empty 4859 * indirect blocks. 4860 */ 4861 int 4862 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 4863 hammer2_tid_t mtid, int flags) 4864 { 4865 int error = 0; 4866 4867 KKASSERT(hammer2_mtx_owned(&chain->lock)); 4868 4869 /* 4870 * Nothing to do if already marked. 4871 * 4872 * We need the spinlock on the core whos RBTREE contains chain 4873 * to protect against races. 4874 */ 4875 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 4876 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 4877 chain->parent == parent); 4878 error = _hammer2_chain_delete_helper(parent, chain, 4879 mtid, flags, NULL); 4880 } 4881 4882 /* 4883 * Permanent deletions mark the chain as destroyed. 4884 * 4885 * NOTE: We do not setflush the chain unless the deletion is 4886 * permanent, since the deletion of a chain does not actually 4887 * require it to be flushed. 4888 */ 4889 if (error == 0) { 4890 if (flags & HAMMER2_DELETE_PERMANENT) { 4891 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 4892 hammer2_chain_setflush(chain); 4893 } 4894 } 4895 4896 return error; 4897 } 4898 4899 static int 4900 hammer2_chain_delete_obref(hammer2_chain_t *parent, hammer2_chain_t *chain, 4901 hammer2_tid_t mtid, int flags, 4902 hammer2_blockref_t *obref) 4903 { 4904 int error = 0; 4905 4906 KKASSERT(hammer2_mtx_owned(&chain->lock)); 4907 4908 /* 4909 * Nothing to do if already marked. 4910 * 4911 * We need the spinlock on the core whos RBTREE contains chain 4912 * to protect against races. 4913 */ 4914 obref->type = HAMMER2_BREF_TYPE_EMPTY; 4915 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 4916 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 4917 chain->parent == parent); 4918 error = _hammer2_chain_delete_helper(parent, chain, 4919 mtid, flags, obref); 4920 } 4921 4922 /* 4923 * Permanent deletions mark the chain as destroyed. 4924 * 4925 * NOTE: We do not setflush the chain unless the deletion is 4926 * permanent, since the deletion of a chain does not actually 4927 * require it to be flushed. 4928 */ 4929 if (error == 0) { 4930 if (flags & HAMMER2_DELETE_PERMANENT) { 4931 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 4932 hammer2_chain_setflush(chain); 4933 } 4934 } 4935 4936 return error; 4937 } 4938 4939 /* 4940 * Returns the index of the nearest element in the blockref array >= elm. 4941 * Returns (count) if no element could be found. 4942 * 4943 * Sets *key_nextp to the next key for loop purposes but does not modify 4944 * it if the next key would be higher than the current value of *key_nextp. 4945 * Note that *key_nexp can overflow to 0, which should be tested by the 4946 * caller. 4947 * 4948 * WARNING! Must be called with parent's spinlock held. Spinlock remains 4949 * held through the operation. 4950 */ 4951 static int 4952 hammer2_base_find(hammer2_chain_t *parent, 4953 hammer2_blockref_t *base, int count, 4954 hammer2_key_t *key_nextp, 4955 hammer2_key_t key_beg, hammer2_key_t key_end) 4956 { 4957 hammer2_blockref_t *scan; 4958 hammer2_key_t scan_end; 4959 int i; 4960 int limit; 4961 4962 /* 4963 * Require the live chain's already have their core's counted 4964 * so we can optimize operations. 4965 */ 4966 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 4967 4968 /* 4969 * Degenerate case 4970 */ 4971 if (count == 0 || base == NULL) 4972 return(count); 4973 4974 /* 4975 * Sequential optimization using parent->cache_index. This is 4976 * the most likely scenario. 4977 * 4978 * We can avoid trailing empty entries on live chains, otherwise 4979 * we might have to check the whole block array. 4980 */ 4981 i = parent->cache_index; /* SMP RACE OK */ 4982 cpu_ccfence(); 4983 limit = parent->core.live_zero; 4984 if (i >= limit) 4985 i = limit - 1; 4986 if (i < 0) 4987 i = 0; 4988 KKASSERT(i < count); 4989 4990 /* 4991 * Search backwards 4992 */ 4993 scan = &base[i]; 4994 while (i > 0 && (scan->type == HAMMER2_BREF_TYPE_EMPTY || 4995 scan->key > key_beg)) { 4996 --scan; 4997 --i; 4998 } 4999 parent->cache_index = i; 5000 5001 /* 5002 * Search forwards, stop when we find a scan element which 5003 * encloses the key or until we know that there are no further 5004 * elements. 5005 */ 5006 while (i < count) { 5007 if (scan->type != HAMMER2_BREF_TYPE_EMPTY) { 5008 scan_end = scan->key + 5009 ((hammer2_key_t)1 << scan->keybits) - 1; 5010 if (scan->key > key_beg || scan_end >= key_beg) 5011 break; 5012 } 5013 if (i >= limit) 5014 return (count); 5015 ++scan; 5016 ++i; 5017 } 5018 if (i != count) { 5019 parent->cache_index = i; 5020 if (i >= limit) { 5021 i = count; 5022 } else { 5023 scan_end = scan->key + 5024 ((hammer2_key_t)1 << scan->keybits); 5025 if (scan_end && (*key_nextp > scan_end || 5026 *key_nextp == 0)) { 5027 *key_nextp = scan_end; 5028 } 5029 } 5030 } 5031 return (i); 5032 } 5033 5034 /* 5035 * Do a combined search and return the next match either from the blockref 5036 * array or from the in-memory chain. Sets *brefp to the returned bref in 5037 * both cases, or sets it to NULL if the search exhausted. Only returns 5038 * a non-NULL chain if the search matched from the in-memory chain. 5039 * 5040 * When no in-memory chain has been found and a non-NULL bref is returned 5041 * in *brefp. 5042 * 5043 * 5044 * The returned chain is not locked or referenced. Use the returned bref 5045 * to determine if the search exhausted or not. Iterate if the base find 5046 * is chosen but matches a deleted chain. 5047 * 5048 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5049 * held through the operation. 5050 */ 5051 static hammer2_chain_t * 5052 hammer2_combined_find(hammer2_chain_t *parent, 5053 hammer2_blockref_t *base, int count, 5054 hammer2_key_t *key_nextp, 5055 hammer2_key_t key_beg, hammer2_key_t key_end, 5056 hammer2_blockref_t **brefp) 5057 { 5058 hammer2_blockref_t *bref; 5059 hammer2_chain_t *chain; 5060 int i; 5061 5062 /* 5063 * Lookup in block array and in rbtree. 5064 */ 5065 *key_nextp = key_end + 1; 5066 i = hammer2_base_find(parent, base, count, key_nextp, 5067 key_beg, key_end); 5068 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 5069 5070 /* 5071 * Neither matched 5072 */ 5073 if (i == count && chain == NULL) { 5074 *brefp = NULL; 5075 return(NULL); 5076 } 5077 5078 /* 5079 * Only chain matched. 5080 */ 5081 if (i == count) { 5082 bref = &chain->bref; 5083 goto found; 5084 } 5085 5086 /* 5087 * Only blockref matched. 5088 */ 5089 if (chain == NULL) { 5090 bref = &base[i]; 5091 goto found; 5092 } 5093 5094 /* 5095 * Both in-memory and blockref matched, select the nearer element. 5096 * 5097 * If both are flush with the left-hand side or both are the 5098 * same distance away, select the chain. In this situation the 5099 * chain must have been loaded from the matching blockmap. 5100 */ 5101 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 5102 chain->bref.key == base[i].key) { 5103 KKASSERT(chain->bref.key == base[i].key); 5104 bref = &chain->bref; 5105 goto found; 5106 } 5107 5108 /* 5109 * Select the nearer key 5110 */ 5111 if (chain->bref.key < base[i].key) { 5112 bref = &chain->bref; 5113 } else { 5114 bref = &base[i]; 5115 chain = NULL; 5116 } 5117 5118 /* 5119 * If the bref is out of bounds we've exhausted our search. 5120 */ 5121 found: 5122 if (bref->key > key_end) { 5123 *brefp = NULL; 5124 chain = NULL; 5125 } else { 5126 *brefp = bref; 5127 } 5128 return(chain); 5129 } 5130 5131 /* 5132 * Locate the specified block array element and delete it. The element 5133 * must exist. 5134 * 5135 * The spin lock on the related chain must be held. 5136 * 5137 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5138 * need to be adjusted when we commit the media change. 5139 */ 5140 void 5141 hammer2_base_delete(hammer2_chain_t *parent, 5142 hammer2_blockref_t *base, int count, 5143 hammer2_chain_t *chain, 5144 hammer2_blockref_t *obref) 5145 { 5146 hammer2_blockref_t *elm = &chain->bref; 5147 hammer2_blockref_t *scan; 5148 hammer2_key_t key_next; 5149 int i; 5150 5151 /* 5152 * Delete element. Expect the element to exist. 5153 * 5154 * XXX see caller, flush code not yet sophisticated enough to prevent 5155 * re-flushed in some cases. 5156 */ 5157 key_next = 0; /* max range */ 5158 i = hammer2_base_find(parent, base, count, &key_next, 5159 elm->key, elm->key); 5160 scan = &base[i]; 5161 if (i == count || scan->type == HAMMER2_BREF_TYPE_EMPTY || 5162 scan->key != elm->key || 5163 ((chain->flags & HAMMER2_CHAIN_BLKMAPUPD) == 0 && 5164 scan->keybits != elm->keybits)) { 5165 hammer2_spin_unex(&parent->core.spin); 5166 panic("delete base %p element not found at %d/%d elm %p\n", 5167 base, i, count, elm); 5168 return; 5169 } 5170 5171 /* 5172 * Update stats and zero the entry. 5173 * 5174 * NOTE: Handle radix == 0 (0 bytes) case. 5175 */ 5176 if ((int)(scan->data_off & HAMMER2_OFF_MASK_RADIX)) { 5177 parent->bref.embed.stats.data_count -= (hammer2_off_t)1 << 5178 (int)(scan->data_off & HAMMER2_OFF_MASK_RADIX); 5179 } 5180 switch(scan->type) { 5181 case HAMMER2_BREF_TYPE_INODE: 5182 --parent->bref.embed.stats.inode_count; 5183 /* fall through */ 5184 case HAMMER2_BREF_TYPE_DATA: 5185 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5186 atomic_set_int(&chain->flags, 5187 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5188 } else { 5189 if (parent->bref.leaf_count) 5190 --parent->bref.leaf_count; 5191 } 5192 /* fall through */ 5193 case HAMMER2_BREF_TYPE_INDIRECT: 5194 if (scan->type != HAMMER2_BREF_TYPE_DATA) { 5195 parent->bref.embed.stats.data_count -= 5196 scan->embed.stats.data_count; 5197 parent->bref.embed.stats.inode_count -= 5198 scan->embed.stats.inode_count; 5199 } 5200 if (scan->type == HAMMER2_BREF_TYPE_INODE) 5201 break; 5202 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5203 atomic_set_int(&chain->flags, 5204 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5205 } else { 5206 if (parent->bref.leaf_count <= scan->leaf_count) 5207 parent->bref.leaf_count = 0; 5208 else 5209 parent->bref.leaf_count -= scan->leaf_count; 5210 } 5211 break; 5212 case HAMMER2_BREF_TYPE_DIRENT: 5213 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5214 atomic_set_int(&chain->flags, 5215 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5216 } else { 5217 if (parent->bref.leaf_count) 5218 --parent->bref.leaf_count; 5219 } 5220 default: 5221 break; 5222 } 5223 5224 if (obref) 5225 *obref = *scan; 5226 bzero(scan, sizeof(*scan)); 5227 5228 /* 5229 * We can only optimize parent->core.live_zero for live chains. 5230 */ 5231 if (parent->core.live_zero == i + 1) { 5232 while (--i >= 0 && base[i].type == HAMMER2_BREF_TYPE_EMPTY) 5233 ; 5234 parent->core.live_zero = i + 1; 5235 } 5236 5237 /* 5238 * Clear appropriate blockmap flags in chain. 5239 */ 5240 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED | 5241 HAMMER2_CHAIN_BLKMAPUPD); 5242 } 5243 5244 /* 5245 * Insert the specified element. The block array must not already have the 5246 * element and must have space available for the insertion. 5247 * 5248 * The spin lock on the related chain must be held. 5249 * 5250 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5251 * need to be adjusted when we commit the media change. 5252 */ 5253 void 5254 hammer2_base_insert(hammer2_chain_t *parent, 5255 hammer2_blockref_t *base, int count, 5256 hammer2_chain_t *chain, hammer2_blockref_t *elm) 5257 { 5258 hammer2_key_t key_next; 5259 hammer2_key_t xkey; 5260 int i; 5261 int j; 5262 int k; 5263 int l; 5264 int u = 1; 5265 5266 /* 5267 * Insert new element. Expect the element to not already exist 5268 * unless we are replacing it. 5269 * 5270 * XXX see caller, flush code not yet sophisticated enough to prevent 5271 * re-flushed in some cases. 5272 */ 5273 key_next = 0; /* max range */ 5274 i = hammer2_base_find(parent, base, count, &key_next, 5275 elm->key, elm->key); 5276 5277 /* 5278 * Shortcut fill optimization, typical ordered insertion(s) may not 5279 * require a search. 5280 */ 5281 KKASSERT(i >= 0 && i <= count); 5282 5283 /* 5284 * Set appropriate blockmap flags in chain (if not NULL) 5285 */ 5286 if (chain) 5287 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED); 5288 5289 /* 5290 * Update stats and zero the entry 5291 */ 5292 if ((int)(elm->data_off & HAMMER2_OFF_MASK_RADIX)) { 5293 parent->bref.embed.stats.data_count += (hammer2_off_t)1 << 5294 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 5295 } 5296 switch(elm->type) { 5297 case HAMMER2_BREF_TYPE_INODE: 5298 ++parent->bref.embed.stats.inode_count; 5299 /* fall through */ 5300 case HAMMER2_BREF_TYPE_DATA: 5301 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5302 ++parent->bref.leaf_count; 5303 /* fall through */ 5304 case HAMMER2_BREF_TYPE_INDIRECT: 5305 if (elm->type != HAMMER2_BREF_TYPE_DATA) { 5306 parent->bref.embed.stats.data_count += 5307 elm->embed.stats.data_count; 5308 parent->bref.embed.stats.inode_count += 5309 elm->embed.stats.inode_count; 5310 } 5311 if (elm->type == HAMMER2_BREF_TYPE_INODE) 5312 break; 5313 if (parent->bref.leaf_count + elm->leaf_count < 5314 HAMMER2_BLOCKREF_LEAF_MAX) { 5315 parent->bref.leaf_count += elm->leaf_count; 5316 } else { 5317 parent->bref.leaf_count = HAMMER2_BLOCKREF_LEAF_MAX; 5318 } 5319 break; 5320 case HAMMER2_BREF_TYPE_DIRENT: 5321 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5322 ++parent->bref.leaf_count; 5323 break; 5324 default: 5325 break; 5326 } 5327 5328 5329 /* 5330 * We can only optimize parent->core.live_zero for live chains. 5331 */ 5332 if (i == count && parent->core.live_zero < count) { 5333 i = parent->core.live_zero++; 5334 base[i] = *elm; 5335 return; 5336 } 5337 5338 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 5339 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 5340 hammer2_spin_unex(&parent->core.spin); 5341 panic("insert base %p overlapping elements at %d elm %p\n", 5342 base, i, elm); 5343 } 5344 5345 /* 5346 * Try to find an empty slot before or after. 5347 */ 5348 j = i; 5349 k = i; 5350 while (j > 0 || k < count) { 5351 --j; 5352 if (j >= 0 && base[j].type == HAMMER2_BREF_TYPE_EMPTY) { 5353 if (j == i - 1) { 5354 base[j] = *elm; 5355 } else { 5356 bcopy(&base[j+1], &base[j], 5357 (i - j - 1) * sizeof(*base)); 5358 base[i - 1] = *elm; 5359 } 5360 goto validate; 5361 } 5362 ++k; 5363 if (k < count && base[k].type == HAMMER2_BREF_TYPE_EMPTY) { 5364 bcopy(&base[i], &base[i+1], 5365 (k - i) * sizeof(hammer2_blockref_t)); 5366 base[i] = *elm; 5367 5368 /* 5369 * We can only update parent->core.live_zero for live 5370 * chains. 5371 */ 5372 if (parent->core.live_zero <= k) 5373 parent->core.live_zero = k + 1; 5374 u = 2; 5375 goto validate; 5376 } 5377 } 5378 panic("hammer2_base_insert: no room!"); 5379 5380 /* 5381 * Debugging 5382 */ 5383 validate: 5384 key_next = 0; 5385 for (l = 0; l < count; ++l) { 5386 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5387 key_next = base[l].key + 5388 ((hammer2_key_t)1 << base[l].keybits) - 1; 5389 break; 5390 } 5391 } 5392 while (++l < count) { 5393 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5394 if (base[l].key <= key_next) 5395 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 5396 key_next = base[l].key + 5397 ((hammer2_key_t)1 << base[l].keybits) - 1; 5398 5399 } 5400 } 5401 5402 } 5403 5404 #if 0 5405 5406 /* 5407 * Sort the blockref array for the chain. Used by the flush code to 5408 * sort the blockref[] array. 5409 * 5410 * The chain must be exclusively locked AND spin-locked. 5411 */ 5412 typedef hammer2_blockref_t *hammer2_blockref_p; 5413 5414 static 5415 int 5416 hammer2_base_sort_callback(const void *v1, const void *v2) 5417 { 5418 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 5419 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 5420 5421 /* 5422 * Make sure empty elements are placed at the end of the array 5423 */ 5424 if (bref1->type == HAMMER2_BREF_TYPE_EMPTY) { 5425 if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) 5426 return(0); 5427 return(1); 5428 } else if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) { 5429 return(-1); 5430 } 5431 5432 /* 5433 * Sort by key 5434 */ 5435 if (bref1->key < bref2->key) 5436 return(-1); 5437 if (bref1->key > bref2->key) 5438 return(1); 5439 return(0); 5440 } 5441 5442 void 5443 hammer2_base_sort(hammer2_chain_t *chain) 5444 { 5445 hammer2_blockref_t *base; 5446 int count; 5447 5448 switch(chain->bref.type) { 5449 case HAMMER2_BREF_TYPE_INODE: 5450 /* 5451 * Special shortcut for embedded data returns the inode 5452 * itself. Callers must detect this condition and access 5453 * the embedded data (the strategy code does this for us). 5454 * 5455 * This is only applicable to regular files and softlinks. 5456 */ 5457 if (chain->data->ipdata.meta.op_flags & 5458 HAMMER2_OPFLAG_DIRECTDATA) { 5459 return; 5460 } 5461 base = &chain->data->ipdata.u.blockset.blockref[0]; 5462 count = HAMMER2_SET_COUNT; 5463 break; 5464 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 5465 case HAMMER2_BREF_TYPE_INDIRECT: 5466 /* 5467 * Optimize indirect blocks in the INITIAL state to avoid 5468 * I/O. 5469 */ 5470 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 5471 base = &chain->data->npdata[0]; 5472 count = chain->bytes / sizeof(hammer2_blockref_t); 5473 break; 5474 case HAMMER2_BREF_TYPE_VOLUME: 5475 base = &chain->data->voldata.sroot_blockset.blockref[0]; 5476 count = HAMMER2_SET_COUNT; 5477 break; 5478 case HAMMER2_BREF_TYPE_FREEMAP: 5479 base = &chain->data->blkset.blockref[0]; 5480 count = HAMMER2_SET_COUNT; 5481 break; 5482 default: 5483 panic("hammer2_base_sort: unrecognized " 5484 "blockref(A) type: %d", 5485 chain->bref.type); 5486 base = NULL; /* safety */ 5487 count = 0; /* safety */ 5488 break; 5489 } 5490 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 5491 } 5492 5493 #endif 5494 5495 /* 5496 * Set the check data for a chain. This can be a heavy-weight operation 5497 * and typically only runs on-flush. For file data check data is calculated 5498 * when the logical buffers are flushed. 5499 */ 5500 void 5501 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 5502 { 5503 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 5504 5505 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5506 case HAMMER2_CHECK_NONE: 5507 break; 5508 case HAMMER2_CHECK_DISABLED: 5509 break; 5510 case HAMMER2_CHECK_ISCSI32: 5511 chain->bref.check.iscsi32.value = 5512 hammer2_icrc32(bdata, chain->bytes); 5513 break; 5514 case HAMMER2_CHECK_XXHASH64: 5515 chain->bref.check.xxhash64.value = 5516 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5517 break; 5518 case HAMMER2_CHECK_SHA192: 5519 { 5520 SHA256_CTX hash_ctx; 5521 union { 5522 uint8_t digest[SHA256_DIGEST_LENGTH]; 5523 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5524 } u; 5525 5526 SHA256_Init(&hash_ctx); 5527 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5528 SHA256_Final(u.digest, &hash_ctx); 5529 u.digest64[2] ^= u.digest64[3]; 5530 bcopy(u.digest, 5531 chain->bref.check.sha192.data, 5532 sizeof(chain->bref.check.sha192.data)); 5533 } 5534 break; 5535 case HAMMER2_CHECK_FREEMAP: 5536 chain->bref.check.freemap.icrc32 = 5537 hammer2_icrc32(bdata, chain->bytes); 5538 break; 5539 default: 5540 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5541 chain->bref.methods); 5542 break; 5543 } 5544 } 5545 5546 /* 5547 * Characterize a failed check code and try to trace back to the inode. 5548 */ 5549 static void 5550 hammer2_characterize_failed_chain(hammer2_chain_t *chain, uint64_t check, 5551 int bits) 5552 { 5553 hammer2_chain_t *lchain; 5554 hammer2_chain_t *ochain; 5555 int did; 5556 5557 did = krateprintf(&krate_h2chk, 5558 "chain %016jx.%02x (%s) meth=%02x CHECK FAIL " 5559 "(flags=%08x, bref/data ", 5560 chain->bref.data_off, 5561 chain->bref.type, 5562 hammer2_bref_type_str(chain->bref.type), 5563 chain->bref.methods, 5564 chain->flags); 5565 if (did == 0) 5566 return; 5567 5568 if (bits == 32) { 5569 kprintf("%08x/%08x)\n", 5570 chain->bref.check.iscsi32.value, 5571 (uint32_t)check); 5572 } else { 5573 kprintf("%016jx/%016jx)\n", 5574 chain->bref.check.xxhash64.value, 5575 check); 5576 } 5577 5578 /* 5579 * Run up the chains to try to find the governing inode so we 5580 * can report it. 5581 * 5582 * XXX This error reporting is not really MPSAFE 5583 */ 5584 ochain = chain; 5585 lchain = chain; 5586 while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE) { 5587 lchain = chain; 5588 chain = chain->parent; 5589 } 5590 5591 if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5592 ((chain->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) == 0 || 5593 (lchain->bref.key & HAMMER2_DIRHASH_VISIBLE))) { 5594 kprintf(" Resides at/in inode %ld\n", 5595 (long)chain->bref.key); 5596 } else if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5597 kprintf(" Resides in inode index - CRITICAL!!!\n"); 5598 } else { 5599 kprintf(" Resides in root index - CRITICAL!!!\n"); 5600 } 5601 if (ochain->hmp) { 5602 const char *pfsname = "UNKNOWN"; 5603 int i; 5604 5605 if (ochain->pmp) { 5606 for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) { 5607 if (ochain->pmp->pfs_hmps[i] == ochain->hmp && 5608 ochain->pmp->pfs_names[i]) { 5609 pfsname = ochain->pmp->pfs_names[i]; 5610 break; 5611 } 5612 } 5613 } 5614 kprintf(" In pfs %s on device %s\n", 5615 pfsname, ochain->hmp->devrepname); 5616 } 5617 } 5618 5619 /* 5620 * Returns non-zero on success, 0 on failure. 5621 */ 5622 int 5623 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 5624 { 5625 uint32_t check32; 5626 uint64_t check64; 5627 int r; 5628 5629 if (chain->flags & HAMMER2_CHAIN_NOTTESTED) 5630 return 1; 5631 5632 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5633 case HAMMER2_CHECK_NONE: 5634 r = 1; 5635 break; 5636 case HAMMER2_CHECK_DISABLED: 5637 r = 1; 5638 break; 5639 case HAMMER2_CHECK_ISCSI32: 5640 check32 = hammer2_icrc32(bdata, chain->bytes); 5641 r = (chain->bref.check.iscsi32.value == check32); 5642 if (r == 0) { 5643 hammer2_characterize_failed_chain(chain, check32, 32); 5644 } 5645 hammer2_process_icrc32 += chain->bytes; 5646 break; 5647 case HAMMER2_CHECK_XXHASH64: 5648 check64 = XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5649 r = (chain->bref.check.xxhash64.value == check64); 5650 if (r == 0) { 5651 hammer2_characterize_failed_chain(chain, check64, 64); 5652 } 5653 hammer2_process_xxhash64 += chain->bytes; 5654 break; 5655 case HAMMER2_CHECK_SHA192: 5656 { 5657 SHA256_CTX hash_ctx; 5658 union { 5659 uint8_t digest[SHA256_DIGEST_LENGTH]; 5660 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5661 } u; 5662 5663 SHA256_Init(&hash_ctx); 5664 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5665 SHA256_Final(u.digest, &hash_ctx); 5666 u.digest64[2] ^= u.digest64[3]; 5667 if (bcmp(u.digest, 5668 chain->bref.check.sha192.data, 5669 sizeof(chain->bref.check.sha192.data)) == 0) { 5670 r = 1; 5671 } else { 5672 r = 0; 5673 krateprintf(&krate_h2chk, 5674 "chain %016jx.%02x meth=%02x " 5675 "CHECK FAIL\n", 5676 chain->bref.data_off, 5677 chain->bref.type, 5678 chain->bref.methods); 5679 } 5680 } 5681 break; 5682 case HAMMER2_CHECK_FREEMAP: 5683 r = (chain->bref.check.freemap.icrc32 == 5684 hammer2_icrc32(bdata, chain->bytes)); 5685 if (r == 0) { 5686 int did; 5687 5688 did = krateprintf(&krate_h2chk, 5689 "chain %016jx.%02x meth=%02x " 5690 "CHECK FAIL\n", 5691 chain->bref.data_off, 5692 chain->bref.type, 5693 chain->bref.methods); 5694 if (did) { 5695 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 5696 chain->bref.check.freemap.icrc32, 5697 hammer2_icrc32(bdata, chain->bytes), 5698 chain->bytes); 5699 if (chain->dio) { 5700 kprintf("dio %p buf %016jx,%d " 5701 "bdata %p/%p\n", 5702 chain->dio, 5703 (intmax_t)chain->dio->bp->b_loffset, 5704 chain->dio->bp->b_bufsize, 5705 bdata, 5706 chain->dio->bp->b_data); 5707 } 5708 } 5709 } 5710 break; 5711 default: 5712 kprintf("hammer2_chain_testcheck: unknown check type %02x\n", 5713 chain->bref.methods); 5714 r = 1; 5715 break; 5716 } 5717 return r; 5718 } 5719 5720 /* 5721 * Acquire the chain and parent representing the specified inode for the 5722 * device at the specified cluster index. 5723 * 5724 * The flags passed in are LOOKUP flags, not RESOLVE flags. 5725 * 5726 * If we are unable to locate the inode, HAMMER2_ERROR_EIO or HAMMER2_ERROR_CHECK 5727 * is returned. In case of error, *chainp and/or *parentp may still be returned 5728 * non-NULL. 5729 * 5730 * The caller may pass-in a locked *parentp and/or *chainp, or neither. 5731 * They will be unlocked and released by this function. The *parentp and 5732 * *chainp representing the located inode are returned locked. 5733 * 5734 * The returned error includes any error on the returned chain in addition to 5735 * errors incurred while trying to lookup the inode. However, a chain->error 5736 * might not be recognized if HAMMER2_LOOKUP_NODATA is passed. This flag may 5737 * not be passed to this function. 5738 */ 5739 int 5740 hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum, 5741 int clindex, int flags, 5742 hammer2_chain_t **parentp, hammer2_chain_t **chainp) 5743 { 5744 hammer2_chain_t *parent; 5745 hammer2_chain_t *rchain; 5746 hammer2_key_t key_dummy; 5747 hammer2_inode_t *ip; 5748 int resolve_flags; 5749 int error; 5750 5751 KKASSERT((flags & HAMMER2_LOOKUP_NODATA) == 0); 5752 5753 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 5754 HAMMER2_RESOLVE_SHARED : 0; 5755 5756 /* 5757 * Caller expects us to replace these. 5758 */ 5759 if (*chainp) { 5760 hammer2_chain_unlock(*chainp); 5761 hammer2_chain_drop(*chainp); 5762 *chainp = NULL; 5763 } 5764 if (*parentp) { 5765 hammer2_chain_unlock(*parentp); 5766 hammer2_chain_drop(*parentp); 5767 *parentp = NULL; 5768 } 5769 5770 /* 5771 * Be very careful, this is a backend function and we CANNOT 5772 * lock any frontend inode structure we find. But we have to 5773 * look the inode up this way first in case it exists but is 5774 * detached from the radix tree. 5775 */ 5776 ip = hammer2_inode_lookup(pmp, inum); 5777 if (ip) { 5778 *chainp = hammer2_inode_chain_and_parent(ip, clindex, 5779 parentp, 5780 resolve_flags); 5781 hammer2_inode_drop(ip); 5782 if (*chainp) 5783 return (*chainp)->error; 5784 hammer2_chain_unlock(*chainp); 5785 hammer2_chain_drop(*chainp); 5786 *chainp = NULL; 5787 if (*parentp) { 5788 hammer2_chain_unlock(*parentp); 5789 hammer2_chain_drop(*parentp); 5790 *parentp = NULL; 5791 } 5792 } 5793 5794 /* 5795 * Inodes hang off of the iroot (bit 63 is clear, differentiating 5796 * inodes from root directory entries in the key lookup). 5797 */ 5798 parent = hammer2_inode_chain(pmp->iroot, clindex, resolve_flags); 5799 rchain = NULL; 5800 if (parent) { 5801 /* 5802 * NOTE: rchain can be returned as NULL even if error == 0 5803 * (i.e. not found) 5804 */ 5805 rchain = hammer2_chain_lookup(&parent, &key_dummy, 5806 inum, inum, 5807 &error, flags); 5808 /* 5809 * Propagate a chain-specific error to caller. 5810 * 5811 * If the chain is not errored, we must still validate that the inode 5812 * number is correct, because all hell will break loose if it isn't 5813 * correct. It should always be correct so print to the console and 5814 * simulate a CHECK error if it is not. 5815 */ 5816 if (error == 0 && rchain) { 5817 error = rchain->error; 5818 if (error == 0 && rchain->data) { 5819 if (inum != rchain->data->ipdata.meta.inum) { 5820 kprintf("hammer2_chain_inode_find: lookup inum %ld, " 5821 "got valid inode but with inum %ld\n", 5822 (long)inum, (long)rchain->data->ipdata.meta.inum); 5823 error = HAMMER2_ERROR_CHECK; 5824 rchain->error = error; 5825 } 5826 } 5827 } 5828 } else { 5829 error = HAMMER2_ERROR_EIO; 5830 } 5831 *parentp = parent; 5832 *chainp = rchain; 5833 5834 return error; 5835 } 5836 5837 /* 5838 * Used by the bulkscan code to snapshot the synchronized storage for 5839 * a volume, allowing it to be scanned concurrently against normal 5840 * operation. 5841 */ 5842 hammer2_chain_t * 5843 hammer2_chain_bulksnap(hammer2_dev_t *hmp) 5844 { 5845 hammer2_chain_t *copy; 5846 5847 copy = hammer2_chain_alloc(hmp, hmp->spmp, &hmp->vchain.bref); 5848 copy->data = kmalloc(sizeof(copy->data->voldata), 5849 hmp->mmsg, M_WAITOK | M_ZERO); 5850 hammer2_voldata_lock(hmp); 5851 copy->data->voldata = hmp->volsync; 5852 hammer2_voldata_unlock(hmp); 5853 5854 return copy; 5855 } 5856 5857 void 5858 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 5859 { 5860 KKASSERT(copy->bref.type == HAMMER2_BREF_TYPE_VOLUME); 5861 KKASSERT(copy->data); 5862 kfree(copy->data, copy->hmp->mmsg); 5863 copy->data = NULL; 5864 hammer2_chain_drop(copy); 5865 } 5866 5867 /* 5868 * Returns non-zero if the chain (INODE or DIRENT) matches the 5869 * filename. 5870 */ 5871 int 5872 hammer2_chain_dirent_test(hammer2_chain_t *chain, const char *name, 5873 size_t name_len) 5874 { 5875 const hammer2_inode_data_t *ripdata; 5876 5877 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5878 ripdata = &chain->data->ipdata; 5879 if (ripdata->meta.name_len == name_len && 5880 bcmp(ripdata->filename, name, name_len) == 0) { 5881 return 1; 5882 } 5883 } 5884 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT && 5885 chain->bref.embed.dirent.namlen == name_len) { 5886 if (name_len > sizeof(chain->bref.check.buf) && 5887 bcmp(chain->data->buf, name, name_len) == 0) { 5888 return 1; 5889 } 5890 if (name_len <= sizeof(chain->bref.check.buf) && 5891 bcmp(chain->bref.check.buf, name, name_len) == 0) { 5892 return 1; 5893 } 5894 } 5895 return 0; 5896 } 5897 5898 /* 5899 * Debugging 5900 */ 5901 void 5902 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int bi, int *countp, 5903 char pfx, u_int flags) 5904 { 5905 hammer2_chain_t *scan; 5906 hammer2_chain_t *parent; 5907 5908 --*countp; 5909 if (*countp == 0) { 5910 kprintf("%*.*s...\n", tab, tab, ""); 5911 return; 5912 } 5913 if (*countp < 0) 5914 return; 5915 kprintf("%*.*s%c-chain %p %s.%-3d %016jx %016jx/%-2d mir=%016jx\n", 5916 tab, tab, "", pfx, chain, 5917 hammer2_bref_type_str(chain->bref.type), bi, 5918 chain->bref.data_off, chain->bref.key, chain->bref.keybits, 5919 chain->bref.mirror_tid); 5920 5921 kprintf("%*.*s [%08x] (%s) refs=%d", 5922 tab, tab, "", 5923 chain->flags, 5924 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5925 chain->data) ? (char *)chain->data->ipdata.filename : "?"), 5926 chain->refs); 5927 5928 parent = chain->parent; 5929 if (parent) 5930 kprintf("\n%*.*s p=%p [pflags %08x prefs %d]", 5931 tab, tab, "", 5932 parent, parent->flags, parent->refs); 5933 if (RB_EMPTY(&chain->core.rbtree)) { 5934 kprintf("\n"); 5935 } else { 5936 int bi = 0; 5937 kprintf(" {\n"); 5938 RB_FOREACH(scan, hammer2_chain_tree, &chain->core.rbtree) { 5939 if ((scan->flags & flags) || flags == (u_int)-1) { 5940 hammer2_dump_chain(scan, tab + 4, bi, countp, 5941 'a', flags); 5942 } 5943 bi++; 5944 } 5945 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) 5946 kprintf("%*.*s}(%s)\n", tab, tab, "", 5947 chain->data->ipdata.filename); 5948 else 5949 kprintf("%*.*s}\n", tab, tab, ""); 5950 } 5951 } 5952